{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 10719, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000279876854184159, "grad_norm": 1.4728044242366627, "learning_rate": 0.0, "loss": 1.8205, "step": 1 }, { "epoch": 0.000559753708368318, "grad_norm": 1.505239345302424, "learning_rate": 1.8656716417910447e-07, "loss": 1.8335, "step": 2 }, { "epoch": 0.0008396305625524769, "grad_norm": 1.3433563758023555, "learning_rate": 3.7313432835820895e-07, "loss": 1.7795, "step": 3 }, { "epoch": 0.001119507416736636, "grad_norm": 1.4232474811860303, "learning_rate": 5.597014925373135e-07, "loss": 1.7965, "step": 4 }, { "epoch": 0.001399384270920795, "grad_norm": 1.4247553473189891, "learning_rate": 7.462686567164179e-07, "loss": 1.7917, "step": 5 }, { "epoch": 0.0016792611251049538, "grad_norm": 1.3711252767403777, "learning_rate": 9.328358208955223e-07, "loss": 1.8019, "step": 6 }, { "epoch": 0.0019591379792891126, "grad_norm": 1.4549056602853179, "learning_rate": 1.119402985074627e-06, "loss": 1.8298, "step": 7 }, { "epoch": 0.002239014833473272, "grad_norm": 1.7659390136520288, "learning_rate": 1.3059701492537314e-06, "loss": 1.81, "step": 8 }, { "epoch": 0.0025188916876574307, "grad_norm": 1.4049173237098618, "learning_rate": 1.4925373134328358e-06, "loss": 1.7558, "step": 9 }, { "epoch": 0.00279876854184159, "grad_norm": 1.8665887897015843, "learning_rate": 1.6791044776119404e-06, "loss": 1.809, "step": 10 }, { "epoch": 0.0030786453960257487, "grad_norm": 1.4414438014667166, "learning_rate": 1.8656716417910446e-06, "loss": 1.7982, "step": 11 }, { "epoch": 0.0033585222502099076, "grad_norm": 1.5505834107968952, "learning_rate": 2.0522388059701493e-06, "loss": 1.8397, "step": 12 }, { "epoch": 0.003638399104394067, "grad_norm": 1.5140005067509932, "learning_rate": 2.238805970149254e-06, "loss": 1.7652, "step": 13 }, { "epoch": 0.003918275958578225, "grad_norm": 3.009122030724365, "learning_rate": 2.4253731343283585e-06, "loss": 1.7922, "step": 14 }, { "epoch": 0.0041981528127623844, "grad_norm": 1.4566857198502647, "learning_rate": 2.6119402985074627e-06, "loss": 1.7847, "step": 15 }, { "epoch": 0.004478029666946544, "grad_norm": 1.641801445984581, "learning_rate": 2.7985074626865674e-06, "loss": 1.7609, "step": 16 }, { "epoch": 0.004757906521130702, "grad_norm": 1.4761399904976473, "learning_rate": 2.9850746268656716e-06, "loss": 1.7963, "step": 17 }, { "epoch": 0.005037783375314861, "grad_norm": 1.5555108403101896, "learning_rate": 3.171641791044776e-06, "loss": 1.7737, "step": 18 }, { "epoch": 0.005317660229499021, "grad_norm": 1.493846433622173, "learning_rate": 3.358208955223881e-06, "loss": 1.7524, "step": 19 }, { "epoch": 0.00559753708368318, "grad_norm": 1.4220754152692414, "learning_rate": 3.544776119402985e-06, "loss": 1.7791, "step": 20 }, { "epoch": 0.005877413937867338, "grad_norm": 1.8947233265935908, "learning_rate": 3.7313432835820893e-06, "loss": 1.7816, "step": 21 }, { "epoch": 0.0061572907920514975, "grad_norm": 1.407763579837366, "learning_rate": 3.917910447761194e-06, "loss": 1.7954, "step": 22 }, { "epoch": 0.006437167646235657, "grad_norm": 1.5237240703857127, "learning_rate": 4.1044776119402985e-06, "loss": 1.7398, "step": 23 }, { "epoch": 0.006717044500419815, "grad_norm": 1.4696094431787539, "learning_rate": 4.291044776119403e-06, "loss": 1.7687, "step": 24 }, { "epoch": 0.006996921354603974, "grad_norm": 1.4428552674089288, "learning_rate": 4.477611940298508e-06, "loss": 1.7289, "step": 25 }, { "epoch": 0.007276798208788134, "grad_norm": 2.662733330011018, "learning_rate": 4.664179104477613e-06, "loss": 1.6835, "step": 26 }, { "epoch": 0.007556675062972292, "grad_norm": 1.3563238971244345, "learning_rate": 4.850746268656717e-06, "loss": 1.6712, "step": 27 }, { "epoch": 0.00783655191715645, "grad_norm": 1.2826269537602049, "learning_rate": 5.037313432835821e-06, "loss": 1.6464, "step": 28 }, { "epoch": 0.00811642877134061, "grad_norm": 1.2434477139354885, "learning_rate": 5.2238805970149255e-06, "loss": 1.6427, "step": 29 }, { "epoch": 0.008396305625524769, "grad_norm": 1.2654431934709676, "learning_rate": 5.41044776119403e-06, "loss": 1.5934, "step": 30 }, { "epoch": 0.008676182479708928, "grad_norm": 2.014623704486581, "learning_rate": 5.597014925373135e-06, "loss": 1.5545, "step": 31 }, { "epoch": 0.008956059333893087, "grad_norm": 1.2003860288390893, "learning_rate": 5.783582089552239e-06, "loss": 1.6107, "step": 32 }, { "epoch": 0.009235936188077247, "grad_norm": 1.1313653705450164, "learning_rate": 5.970149253731343e-06, "loss": 1.5306, "step": 33 }, { "epoch": 0.009515813042261404, "grad_norm": 1.0408280246236337, "learning_rate": 6.156716417910448e-06, "loss": 1.5471, "step": 34 }, { "epoch": 0.009795689896445563, "grad_norm": 1.144505928801738, "learning_rate": 6.343283582089552e-06, "loss": 1.4912, "step": 35 }, { "epoch": 0.010075566750629723, "grad_norm": 1.0617565147825843, "learning_rate": 6.529850746268657e-06, "loss": 1.5086, "step": 36 }, { "epoch": 0.010355443604813882, "grad_norm": 1.0091951750456902, "learning_rate": 6.716417910447762e-06, "loss": 1.4814, "step": 37 }, { "epoch": 0.010635320458998041, "grad_norm": 0.9363987444801333, "learning_rate": 6.902985074626867e-06, "loss": 1.4334, "step": 38 }, { "epoch": 0.0109151973131822, "grad_norm": 0.8681716751168916, "learning_rate": 7.08955223880597e-06, "loss": 1.4477, "step": 39 }, { "epoch": 0.01119507416736636, "grad_norm": 0.8320202206333529, "learning_rate": 7.276119402985075e-06, "loss": 1.4034, "step": 40 }, { "epoch": 0.011474951021550517, "grad_norm": 0.821194741981735, "learning_rate": 7.4626865671641785e-06, "loss": 1.3807, "step": 41 }, { "epoch": 0.011754827875734676, "grad_norm": 0.7123537368766032, "learning_rate": 7.649253731343284e-06, "loss": 1.343, "step": 42 }, { "epoch": 0.012034704729918836, "grad_norm": 0.6977173373063026, "learning_rate": 7.835820895522389e-06, "loss": 1.3257, "step": 43 }, { "epoch": 0.012314581584102995, "grad_norm": 0.8881966783800591, "learning_rate": 8.022388059701493e-06, "loss": 1.3192, "step": 44 }, { "epoch": 0.012594458438287154, "grad_norm": 0.7281807244161581, "learning_rate": 8.208955223880597e-06, "loss": 1.2733, "step": 45 }, { "epoch": 0.012874335292471313, "grad_norm": 0.5786930321019962, "learning_rate": 8.395522388059703e-06, "loss": 1.2658, "step": 46 }, { "epoch": 0.013154212146655471, "grad_norm": 0.5312407865275912, "learning_rate": 8.582089552238805e-06, "loss": 1.2432, "step": 47 }, { "epoch": 0.01343408900083963, "grad_norm": 0.5018260207663885, "learning_rate": 8.768656716417911e-06, "loss": 1.2261, "step": 48 }, { "epoch": 0.01371396585502379, "grad_norm": 0.5587813650021505, "learning_rate": 8.955223880597016e-06, "loss": 1.2251, "step": 49 }, { "epoch": 0.013993842709207949, "grad_norm": 0.6161639441738711, "learning_rate": 9.14179104477612e-06, "loss": 1.2459, "step": 50 }, { "epoch": 0.014273719563392108, "grad_norm": 0.8289046689676719, "learning_rate": 9.328358208955226e-06, "loss": 1.222, "step": 51 }, { "epoch": 0.014553596417576267, "grad_norm": 0.47193853390904894, "learning_rate": 9.514925373134328e-06, "loss": 1.202, "step": 52 }, { "epoch": 0.014833473271760425, "grad_norm": 0.39196836071066965, "learning_rate": 9.701492537313434e-06, "loss": 1.2004, "step": 53 }, { "epoch": 0.015113350125944584, "grad_norm": 0.3947951825723403, "learning_rate": 9.888059701492537e-06, "loss": 1.1622, "step": 54 }, { "epoch": 0.015393226980128743, "grad_norm": 0.41206724699457215, "learning_rate": 1.0074626865671643e-05, "loss": 1.1594, "step": 55 }, { "epoch": 0.0156731038343129, "grad_norm": 0.4376944946677843, "learning_rate": 1.0261194029850747e-05, "loss": 1.1592, "step": 56 }, { "epoch": 0.01595298068849706, "grad_norm": 0.3668202168603127, "learning_rate": 1.0447761194029851e-05, "loss": 1.1709, "step": 57 }, { "epoch": 0.01623285754268122, "grad_norm": 0.4159298936193091, "learning_rate": 1.0634328358208955e-05, "loss": 1.1241, "step": 58 }, { "epoch": 0.01651273439686538, "grad_norm": 0.35125578222565534, "learning_rate": 1.082089552238806e-05, "loss": 1.1084, "step": 59 }, { "epoch": 0.016792611251049538, "grad_norm": 0.5341734892876742, "learning_rate": 1.1007462686567165e-05, "loss": 1.1317, "step": 60 }, { "epoch": 0.017072488105233697, "grad_norm": 0.834819597810914, "learning_rate": 1.119402985074627e-05, "loss": 1.1106, "step": 61 }, { "epoch": 0.017352364959417856, "grad_norm": 0.3210562794260083, "learning_rate": 1.1380597014925374e-05, "loss": 1.0916, "step": 62 }, { "epoch": 0.017632241813602016, "grad_norm": 0.380813227684211, "learning_rate": 1.1567164179104478e-05, "loss": 1.1158, "step": 63 }, { "epoch": 0.017912118667786175, "grad_norm": 0.3239524649011721, "learning_rate": 1.1753731343283584e-05, "loss": 1.1236, "step": 64 }, { "epoch": 0.018191995521970334, "grad_norm": 0.9504123736544604, "learning_rate": 1.1940298507462686e-05, "loss": 1.0827, "step": 65 }, { "epoch": 0.018471872376154493, "grad_norm": 0.2923251684511702, "learning_rate": 1.2126865671641792e-05, "loss": 1.0691, "step": 66 }, { "epoch": 0.018751749230338653, "grad_norm": 0.31695938515870953, "learning_rate": 1.2313432835820896e-05, "loss": 1.0576, "step": 67 }, { "epoch": 0.01903162608452281, "grad_norm": 0.37524185307654057, "learning_rate": 1.25e-05, "loss": 1.0269, "step": 68 }, { "epoch": 0.019311502938706968, "grad_norm": 0.43494475956498324, "learning_rate": 1.2686567164179105e-05, "loss": 1.0618, "step": 69 }, { "epoch": 0.019591379792891127, "grad_norm": 0.3634493682472731, "learning_rate": 1.287313432835821e-05, "loss": 1.0541, "step": 70 }, { "epoch": 0.019871256647075286, "grad_norm": 0.3831070498006833, "learning_rate": 1.3059701492537313e-05, "loss": 1.0448, "step": 71 }, { "epoch": 0.020151133501259445, "grad_norm": 0.33594175735888865, "learning_rate": 1.3246268656716417e-05, "loss": 1.0254, "step": 72 }, { "epoch": 0.020431010355443605, "grad_norm": 0.31128046999844944, "learning_rate": 1.3432835820895523e-05, "loss": 1.027, "step": 73 }, { "epoch": 0.020710887209627764, "grad_norm": 0.29518135623840586, "learning_rate": 1.3619402985074628e-05, "loss": 1.0395, "step": 74 }, { "epoch": 0.020990764063811923, "grad_norm": 0.405888150308363, "learning_rate": 1.3805970149253733e-05, "loss": 1.0591, "step": 75 }, { "epoch": 0.021270640917996082, "grad_norm": 0.33804780440415055, "learning_rate": 1.3992537313432836e-05, "loss": 1.0109, "step": 76 }, { "epoch": 0.02155051777218024, "grad_norm": 0.44351887804070345, "learning_rate": 1.417910447761194e-05, "loss": 0.9729, "step": 77 }, { "epoch": 0.0218303946263644, "grad_norm": 0.3591134895473745, "learning_rate": 1.4365671641791046e-05, "loss": 1.0168, "step": 78 }, { "epoch": 0.02211027148054856, "grad_norm": 0.46503349012770845, "learning_rate": 1.455223880597015e-05, "loss": 1.0353, "step": 79 }, { "epoch": 0.02239014833473272, "grad_norm": 0.29491222542801054, "learning_rate": 1.4738805970149256e-05, "loss": 1.0137, "step": 80 }, { "epoch": 0.022670025188916875, "grad_norm": 0.3312727144725203, "learning_rate": 1.4925373134328357e-05, "loss": 1.0049, "step": 81 }, { "epoch": 0.022949902043101034, "grad_norm": 0.4164987168137994, "learning_rate": 1.5111940298507463e-05, "loss": 0.9981, "step": 82 }, { "epoch": 0.023229778897285194, "grad_norm": 0.31812835092237385, "learning_rate": 1.529850746268657e-05, "loss": 0.9707, "step": 83 }, { "epoch": 0.023509655751469353, "grad_norm": 0.38858425420869547, "learning_rate": 1.5485074626865673e-05, "loss": 1.01, "step": 84 }, { "epoch": 0.023789532605653512, "grad_norm": 0.29944476492402083, "learning_rate": 1.5671641791044777e-05, "loss": 1.0048, "step": 85 }, { "epoch": 0.02406940945983767, "grad_norm": 0.40597927338494716, "learning_rate": 1.585820895522388e-05, "loss": 0.9852, "step": 86 }, { "epoch": 0.02434928631402183, "grad_norm": 0.5099191281965668, "learning_rate": 1.6044776119402986e-05, "loss": 0.9473, "step": 87 }, { "epoch": 0.02462916316820599, "grad_norm": 0.3478753122305574, "learning_rate": 1.623134328358209e-05, "loss": 0.9482, "step": 88 }, { "epoch": 0.02490904002239015, "grad_norm": 0.2991341678437568, "learning_rate": 1.6417910447761194e-05, "loss": 0.98, "step": 89 }, { "epoch": 0.02518891687657431, "grad_norm": 1.2717574046483888, "learning_rate": 1.66044776119403e-05, "loss": 0.9714, "step": 90 }, { "epoch": 0.025468793730758468, "grad_norm": 0.36168485005523193, "learning_rate": 1.6791044776119406e-05, "loss": 0.9864, "step": 91 }, { "epoch": 0.025748670584942627, "grad_norm": 0.2553235723041327, "learning_rate": 1.6977611940298507e-05, "loss": 0.9544, "step": 92 }, { "epoch": 0.026028547439126783, "grad_norm": 0.38968799546815525, "learning_rate": 1.716417910447761e-05, "loss": 0.9673, "step": 93 }, { "epoch": 0.026308424293310942, "grad_norm": 0.29040443572150015, "learning_rate": 1.735074626865672e-05, "loss": 0.9604, "step": 94 }, { "epoch": 0.0265883011474951, "grad_norm": 0.33718051088371714, "learning_rate": 1.7537313432835823e-05, "loss": 0.9513, "step": 95 }, { "epoch": 0.02686817800167926, "grad_norm": 0.25695938572493227, "learning_rate": 1.7723880597014927e-05, "loss": 0.9611, "step": 96 }, { "epoch": 0.02714805485586342, "grad_norm": 0.37451233625994973, "learning_rate": 1.791044776119403e-05, "loss": 0.9304, "step": 97 }, { "epoch": 0.02742793171004758, "grad_norm": 0.2770675190804006, "learning_rate": 1.8097014925373135e-05, "loss": 0.9458, "step": 98 }, { "epoch": 0.027707808564231738, "grad_norm": 0.6986110230661327, "learning_rate": 1.828358208955224e-05, "loss": 0.9232, "step": 99 }, { "epoch": 0.027987685418415897, "grad_norm": 0.5685133471176088, "learning_rate": 1.8470149253731344e-05, "loss": 0.9216, "step": 100 }, { "epoch": 0.028267562272600057, "grad_norm": 0.3056276167497264, "learning_rate": 1.865671641791045e-05, "loss": 0.9487, "step": 101 }, { "epoch": 0.028547439126784216, "grad_norm": 0.690496142181496, "learning_rate": 1.8843283582089552e-05, "loss": 0.9603, "step": 102 }, { "epoch": 0.028827315980968375, "grad_norm": 0.2512661906485543, "learning_rate": 1.9029850746268656e-05, "loss": 0.912, "step": 103 }, { "epoch": 0.029107192835152534, "grad_norm": 0.2962493395206657, "learning_rate": 1.921641791044776e-05, "loss": 0.9254, "step": 104 }, { "epoch": 0.02938706968933669, "grad_norm": 0.3267774778813336, "learning_rate": 1.9402985074626868e-05, "loss": 0.9106, "step": 105 }, { "epoch": 0.02966694654352085, "grad_norm": 0.30294232804776927, "learning_rate": 1.9589552238805972e-05, "loss": 0.9074, "step": 106 }, { "epoch": 0.02994682339770501, "grad_norm": 0.3343955678765164, "learning_rate": 1.9776119402985073e-05, "loss": 0.9306, "step": 107 }, { "epoch": 0.030226700251889168, "grad_norm": 0.33550956325604053, "learning_rate": 1.996268656716418e-05, "loss": 0.8916, "step": 108 }, { "epoch": 0.030506577106073327, "grad_norm": 0.2899729584402835, "learning_rate": 2.0149253731343285e-05, "loss": 0.8908, "step": 109 }, { "epoch": 0.030786453960257486, "grad_norm": 0.47310364998373344, "learning_rate": 2.033582089552239e-05, "loss": 0.9328, "step": 110 }, { "epoch": 0.031066330814441646, "grad_norm": 0.28797628991236673, "learning_rate": 2.0522388059701493e-05, "loss": 0.9037, "step": 111 }, { "epoch": 0.0313462076686258, "grad_norm": 0.33945200140239673, "learning_rate": 2.0708955223880598e-05, "loss": 0.9206, "step": 112 }, { "epoch": 0.03162608452280996, "grad_norm": 0.5558734834690924, "learning_rate": 2.0895522388059702e-05, "loss": 0.9297, "step": 113 }, { "epoch": 0.03190596137699412, "grad_norm": 0.29382478566663683, "learning_rate": 2.1082089552238806e-05, "loss": 0.8795, "step": 114 }, { "epoch": 0.03218583823117828, "grad_norm": 0.3519871926142322, "learning_rate": 2.126865671641791e-05, "loss": 0.9089, "step": 115 }, { "epoch": 0.03246571508536244, "grad_norm": 0.31201842451781076, "learning_rate": 2.1455223880597018e-05, "loss": 0.8517, "step": 116 }, { "epoch": 0.0327455919395466, "grad_norm": 0.6019828532548496, "learning_rate": 2.164179104477612e-05, "loss": 0.8786, "step": 117 }, { "epoch": 0.03302546879373076, "grad_norm": 0.5405213522708816, "learning_rate": 2.1828358208955223e-05, "loss": 0.8566, "step": 118 }, { "epoch": 0.033305345647914916, "grad_norm": 0.4757647550403344, "learning_rate": 2.201492537313433e-05, "loss": 0.8996, "step": 119 }, { "epoch": 0.033585222502099076, "grad_norm": 0.3453825177094335, "learning_rate": 2.2201492537313435e-05, "loss": 0.8582, "step": 120 }, { "epoch": 0.033865099356283235, "grad_norm": 0.2944792995014774, "learning_rate": 2.238805970149254e-05, "loss": 0.8602, "step": 121 }, { "epoch": 0.034144976210467394, "grad_norm": 0.37379657884593837, "learning_rate": 2.2574626865671643e-05, "loss": 0.8562, "step": 122 }, { "epoch": 0.03442485306465155, "grad_norm": 0.382340008793532, "learning_rate": 2.2761194029850747e-05, "loss": 0.9138, "step": 123 }, { "epoch": 0.03470472991883571, "grad_norm": 0.5434059277538503, "learning_rate": 2.294776119402985e-05, "loss": 0.8562, "step": 124 }, { "epoch": 0.03498460677301987, "grad_norm": 0.34085582829165434, "learning_rate": 2.3134328358208956e-05, "loss": 0.8917, "step": 125 }, { "epoch": 0.03526448362720403, "grad_norm": 0.36563512573885215, "learning_rate": 2.332089552238806e-05, "loss": 0.8787, "step": 126 }, { "epoch": 0.03554436048138819, "grad_norm": 0.30805995301934036, "learning_rate": 2.3507462686567168e-05, "loss": 0.8695, "step": 127 }, { "epoch": 0.03582423733557235, "grad_norm": 0.32064094869892495, "learning_rate": 2.369402985074627e-05, "loss": 0.8869, "step": 128 }, { "epoch": 0.03610411418975651, "grad_norm": 0.35001466763801176, "learning_rate": 2.3880597014925373e-05, "loss": 0.8405, "step": 129 }, { "epoch": 0.03638399104394067, "grad_norm": 0.3051929273057518, "learning_rate": 2.406716417910448e-05, "loss": 0.8695, "step": 130 }, { "epoch": 0.03666386789812483, "grad_norm": 0.31303531514779737, "learning_rate": 2.4253731343283584e-05, "loss": 0.8231, "step": 131 }, { "epoch": 0.036943744752308987, "grad_norm": 0.3327959941927146, "learning_rate": 2.444029850746269e-05, "loss": 0.8222, "step": 132 }, { "epoch": 0.037223621606493146, "grad_norm": 0.31649436249699664, "learning_rate": 2.4626865671641793e-05, "loss": 0.8646, "step": 133 }, { "epoch": 0.037503498460677305, "grad_norm": 0.31248535586119264, "learning_rate": 2.4813432835820897e-05, "loss": 0.8859, "step": 134 }, { "epoch": 0.037783375314861464, "grad_norm": 0.31221012646478513, "learning_rate": 2.5e-05, "loss": 0.8687, "step": 135 }, { "epoch": 0.03806325216904562, "grad_norm": 0.31379766380092944, "learning_rate": 2.5186567164179102e-05, "loss": 0.8406, "step": 136 }, { "epoch": 0.038343129023229776, "grad_norm": 0.3342098492012485, "learning_rate": 2.537313432835821e-05, "loss": 0.8505, "step": 137 }, { "epoch": 0.038623005877413935, "grad_norm": 0.30891061812009696, "learning_rate": 2.5559701492537314e-05, "loss": 0.8608, "step": 138 }, { "epoch": 0.038902882731598094, "grad_norm": 0.31111148239775727, "learning_rate": 2.574626865671642e-05, "loss": 0.8647, "step": 139 }, { "epoch": 0.039182759585782254, "grad_norm": 0.3235359516250046, "learning_rate": 2.5932835820895522e-05, "loss": 0.8217, "step": 140 }, { "epoch": 0.03946263643996641, "grad_norm": 0.33378846630471376, "learning_rate": 2.6119402985074626e-05, "loss": 0.846, "step": 141 }, { "epoch": 0.03974251329415057, "grad_norm": 0.3432784883296147, "learning_rate": 2.6305970149253734e-05, "loss": 0.8393, "step": 142 }, { "epoch": 0.04002239014833473, "grad_norm": 0.35622120890161457, "learning_rate": 2.6492537313432835e-05, "loss": 0.8608, "step": 143 }, { "epoch": 0.04030226700251889, "grad_norm": 0.35471829729039506, "learning_rate": 2.6679104477611942e-05, "loss": 0.8326, "step": 144 }, { "epoch": 0.04058214385670305, "grad_norm": 0.33749459736584697, "learning_rate": 2.6865671641791047e-05, "loss": 0.8368, "step": 145 }, { "epoch": 0.04086202071088721, "grad_norm": 0.32296179654799884, "learning_rate": 2.7052238805970147e-05, "loss": 0.8323, "step": 146 }, { "epoch": 0.04114189756507137, "grad_norm": 0.3151895401016503, "learning_rate": 2.7238805970149255e-05, "loss": 0.8555, "step": 147 }, { "epoch": 0.04142177441925553, "grad_norm": 0.29806035262199587, "learning_rate": 2.742537313432836e-05, "loss": 0.8048, "step": 148 }, { "epoch": 0.04170165127343969, "grad_norm": 0.3434423472592504, "learning_rate": 2.7611940298507467e-05, "loss": 0.782, "step": 149 }, { "epoch": 0.041981528127623846, "grad_norm": 0.34313430189812544, "learning_rate": 2.7798507462686568e-05, "loss": 0.827, "step": 150 }, { "epoch": 0.042261404981808005, "grad_norm": 0.3600176344186182, "learning_rate": 2.7985074626865672e-05, "loss": 0.8383, "step": 151 }, { "epoch": 0.042541281835992165, "grad_norm": 0.350304616705117, "learning_rate": 2.817164179104478e-05, "loss": 0.8405, "step": 152 }, { "epoch": 0.042821158690176324, "grad_norm": 0.35965814336077234, "learning_rate": 2.835820895522388e-05, "loss": 0.8402, "step": 153 }, { "epoch": 0.04310103554436048, "grad_norm": 0.32598483337211687, "learning_rate": 2.8544776119402988e-05, "loss": 0.831, "step": 154 }, { "epoch": 0.04338091239854464, "grad_norm": 0.3423837739076617, "learning_rate": 2.8731343283582092e-05, "loss": 0.7902, "step": 155 }, { "epoch": 0.0436607892527288, "grad_norm": 0.3380164512912703, "learning_rate": 2.8917910447761193e-05, "loss": 0.7833, "step": 156 }, { "epoch": 0.04394066610691296, "grad_norm": 0.3380512948518286, "learning_rate": 2.91044776119403e-05, "loss": 0.8125, "step": 157 }, { "epoch": 0.04422054296109712, "grad_norm": 0.3587187062888246, "learning_rate": 2.92910447761194e-05, "loss": 0.8129, "step": 158 }, { "epoch": 0.04450041981528128, "grad_norm": 0.3237088416973387, "learning_rate": 2.9477611940298512e-05, "loss": 0.8083, "step": 159 }, { "epoch": 0.04478029666946544, "grad_norm": 0.45938766048445673, "learning_rate": 2.9664179104477613e-05, "loss": 0.8071, "step": 160 }, { "epoch": 0.04506017352364959, "grad_norm": 0.34486561942202365, "learning_rate": 2.9850746268656714e-05, "loss": 0.8334, "step": 161 }, { "epoch": 0.04534005037783375, "grad_norm": 0.3453695395177396, "learning_rate": 3.003731343283582e-05, "loss": 0.8213, "step": 162 }, { "epoch": 0.04561992723201791, "grad_norm": 0.36176570579211076, "learning_rate": 3.0223880597014926e-05, "loss": 0.8032, "step": 163 }, { "epoch": 0.04589980408620207, "grad_norm": 0.4033961981787797, "learning_rate": 3.0410447761194033e-05, "loss": 0.806, "step": 164 }, { "epoch": 0.04617968094038623, "grad_norm": 0.38152797111667225, "learning_rate": 3.059701492537314e-05, "loss": 0.8116, "step": 165 }, { "epoch": 0.04645955779457039, "grad_norm": 0.3867278932508252, "learning_rate": 3.078358208955224e-05, "loss": 0.8079, "step": 166 }, { "epoch": 0.046739434648754546, "grad_norm": 0.3662832007508961, "learning_rate": 3.0970149253731346e-05, "loss": 0.8502, "step": 167 }, { "epoch": 0.047019311502938706, "grad_norm": 0.39805167295405547, "learning_rate": 3.115671641791045e-05, "loss": 0.7612, "step": 168 }, { "epoch": 0.047299188357122865, "grad_norm": 0.4207759157399436, "learning_rate": 3.1343283582089554e-05, "loss": 0.78, "step": 169 }, { "epoch": 0.047579065211307024, "grad_norm": 0.4428754011821389, "learning_rate": 3.1529850746268655e-05, "loss": 0.78, "step": 170 }, { "epoch": 0.04785894206549118, "grad_norm": 0.35968432621077273, "learning_rate": 3.171641791044776e-05, "loss": 0.7916, "step": 171 }, { "epoch": 0.04813881891967534, "grad_norm": 0.7517748076336829, "learning_rate": 3.190298507462687e-05, "loss": 0.7901, "step": 172 }, { "epoch": 0.0484186957738595, "grad_norm": 0.4179752380360452, "learning_rate": 3.208955223880597e-05, "loss": 0.7783, "step": 173 }, { "epoch": 0.04869857262804366, "grad_norm": 0.3651217198371012, "learning_rate": 3.227611940298508e-05, "loss": 0.7923, "step": 174 }, { "epoch": 0.04897844948222782, "grad_norm": 0.42966943123987805, "learning_rate": 3.246268656716418e-05, "loss": 0.7719, "step": 175 }, { "epoch": 0.04925832633641198, "grad_norm": 0.3829077114727494, "learning_rate": 3.264925373134329e-05, "loss": 0.8053, "step": 176 }, { "epoch": 0.04953820319059614, "grad_norm": 0.4143034582254228, "learning_rate": 3.283582089552239e-05, "loss": 0.8113, "step": 177 }, { "epoch": 0.0498180800447803, "grad_norm": 0.38297119283356057, "learning_rate": 3.302238805970149e-05, "loss": 0.8041, "step": 178 }, { "epoch": 0.05009795689896446, "grad_norm": 0.3961083935491084, "learning_rate": 3.32089552238806e-05, "loss": 0.8226, "step": 179 }, { "epoch": 0.05037783375314862, "grad_norm": 0.3604601053896823, "learning_rate": 3.3395522388059704e-05, "loss": 0.7905, "step": 180 }, { "epoch": 0.050657710607332776, "grad_norm": 0.4452392053944784, "learning_rate": 3.358208955223881e-05, "loss": 0.7781, "step": 181 }, { "epoch": 0.050937587461516935, "grad_norm": 0.36090384291409017, "learning_rate": 3.376865671641791e-05, "loss": 0.8145, "step": 182 }, { "epoch": 0.051217464315701094, "grad_norm": 0.41637973003542017, "learning_rate": 3.395522388059701e-05, "loss": 0.7799, "step": 183 }, { "epoch": 0.051497341169885254, "grad_norm": 0.4175389089983213, "learning_rate": 3.414179104477612e-05, "loss": 0.7815, "step": 184 }, { "epoch": 0.051777218024069406, "grad_norm": 0.36591396795760794, "learning_rate": 3.432835820895522e-05, "loss": 0.7616, "step": 185 }, { "epoch": 0.052057094878253565, "grad_norm": 0.39343179849771737, "learning_rate": 3.451492537313433e-05, "loss": 0.7979, "step": 186 }, { "epoch": 0.052336971732437725, "grad_norm": 0.4168663914343234, "learning_rate": 3.470149253731344e-05, "loss": 0.8179, "step": 187 }, { "epoch": 0.052616848586621884, "grad_norm": 0.42519235894982943, "learning_rate": 3.488805970149254e-05, "loss": 0.7615, "step": 188 }, { "epoch": 0.05289672544080604, "grad_norm": 0.3808260408144091, "learning_rate": 3.5074626865671645e-05, "loss": 0.7641, "step": 189 }, { "epoch": 0.0531766022949902, "grad_norm": 0.41167014625126297, "learning_rate": 3.5261194029850746e-05, "loss": 0.7857, "step": 190 }, { "epoch": 0.05345647914917436, "grad_norm": 0.38876907205109745, "learning_rate": 3.5447761194029854e-05, "loss": 0.7546, "step": 191 }, { "epoch": 0.05373635600335852, "grad_norm": 0.41160971741402497, "learning_rate": 3.5634328358208955e-05, "loss": 0.7756, "step": 192 }, { "epoch": 0.05401623285754268, "grad_norm": 0.3923391334923292, "learning_rate": 3.582089552238806e-05, "loss": 0.7451, "step": 193 }, { "epoch": 0.05429610971172684, "grad_norm": 0.4339472355223145, "learning_rate": 3.600746268656717e-05, "loss": 0.7458, "step": 194 }, { "epoch": 0.054575986565911, "grad_norm": 0.4220123704322767, "learning_rate": 3.619402985074627e-05, "loss": 0.787, "step": 195 }, { "epoch": 0.05485586342009516, "grad_norm": 0.3945992666601785, "learning_rate": 3.638059701492538e-05, "loss": 0.7695, "step": 196 }, { "epoch": 0.05513574027427932, "grad_norm": 0.45369858609918495, "learning_rate": 3.656716417910448e-05, "loss": 0.7653, "step": 197 }, { "epoch": 0.055415617128463476, "grad_norm": 0.4177283408518827, "learning_rate": 3.675373134328358e-05, "loss": 0.7447, "step": 198 }, { "epoch": 0.055695493982647636, "grad_norm": 0.39914383848741325, "learning_rate": 3.694029850746269e-05, "loss": 0.7538, "step": 199 }, { "epoch": 0.055975370836831795, "grad_norm": 0.4246872629249871, "learning_rate": 3.7126865671641795e-05, "loss": 0.7802, "step": 200 }, { "epoch": 0.056255247691015954, "grad_norm": 0.42907384825838263, "learning_rate": 3.73134328358209e-05, "loss": 0.7674, "step": 201 }, { "epoch": 0.05653512454520011, "grad_norm": 0.4542528371439862, "learning_rate": 3.7500000000000003e-05, "loss": 0.7694, "step": 202 }, { "epoch": 0.05681500139938427, "grad_norm": 0.43788777933290146, "learning_rate": 3.7686567164179104e-05, "loss": 0.7658, "step": 203 }, { "epoch": 0.05709487825356843, "grad_norm": 0.47259008763699245, "learning_rate": 3.787313432835821e-05, "loss": 0.7811, "step": 204 }, { "epoch": 0.05737475510775259, "grad_norm": 0.4279490303137887, "learning_rate": 3.805970149253731e-05, "loss": 0.7597, "step": 205 }, { "epoch": 0.05765463196193675, "grad_norm": 0.40970401648855564, "learning_rate": 3.824626865671642e-05, "loss": 0.7598, "step": 206 }, { "epoch": 0.05793450881612091, "grad_norm": 0.4299596238876096, "learning_rate": 3.843283582089552e-05, "loss": 0.7486, "step": 207 }, { "epoch": 0.05821438567030507, "grad_norm": 0.46847839915315687, "learning_rate": 3.861940298507463e-05, "loss": 0.8066, "step": 208 }, { "epoch": 0.05849426252448923, "grad_norm": 0.43393799277461775, "learning_rate": 3.8805970149253736e-05, "loss": 0.7618, "step": 209 }, { "epoch": 0.05877413937867338, "grad_norm": 0.4453971304180821, "learning_rate": 3.899253731343284e-05, "loss": 0.7554, "step": 210 }, { "epoch": 0.05905401623285754, "grad_norm": 0.45614809752187924, "learning_rate": 3.9179104477611945e-05, "loss": 0.7437, "step": 211 }, { "epoch": 0.0593338930870417, "grad_norm": 0.4422390427833751, "learning_rate": 3.9365671641791046e-05, "loss": 0.7568, "step": 212 }, { "epoch": 0.05961376994122586, "grad_norm": 0.38638591200368005, "learning_rate": 3.9552238805970146e-05, "loss": 0.7648, "step": 213 }, { "epoch": 0.05989364679541002, "grad_norm": 0.4097213277018535, "learning_rate": 3.9738805970149254e-05, "loss": 0.7356, "step": 214 }, { "epoch": 0.06017352364959418, "grad_norm": 0.453249742519671, "learning_rate": 3.992537313432836e-05, "loss": 0.7158, "step": 215 }, { "epoch": 0.060453400503778336, "grad_norm": 0.4845923097087401, "learning_rate": 4.011194029850747e-05, "loss": 0.7282, "step": 216 }, { "epoch": 0.060733277357962495, "grad_norm": 0.4425865451015721, "learning_rate": 4.029850746268657e-05, "loss": 0.764, "step": 217 }, { "epoch": 0.061013154212146654, "grad_norm": 0.459102974798196, "learning_rate": 4.048507462686567e-05, "loss": 0.7314, "step": 218 }, { "epoch": 0.061293031066330814, "grad_norm": 0.4595061468832727, "learning_rate": 4.067164179104478e-05, "loss": 0.7579, "step": 219 }, { "epoch": 0.06157290792051497, "grad_norm": 0.4823737095308922, "learning_rate": 4.085820895522388e-05, "loss": 0.7162, "step": 220 }, { "epoch": 0.06185278477469913, "grad_norm": 0.4261734637156516, "learning_rate": 4.104477611940299e-05, "loss": 0.7227, "step": 221 }, { "epoch": 0.06213266162888329, "grad_norm": 0.4350348027694342, "learning_rate": 4.1231343283582094e-05, "loss": 0.7365, "step": 222 }, { "epoch": 0.06241253848306745, "grad_norm": 0.4832999574991559, "learning_rate": 4.1417910447761195e-05, "loss": 0.7522, "step": 223 }, { "epoch": 0.0626924153372516, "grad_norm": 0.5057681114795001, "learning_rate": 4.16044776119403e-05, "loss": 0.7586, "step": 224 }, { "epoch": 0.06297229219143577, "grad_norm": 0.44285792537328683, "learning_rate": 4.1791044776119404e-05, "loss": 0.7792, "step": 225 }, { "epoch": 0.06325216904561992, "grad_norm": 0.45762174110305115, "learning_rate": 4.197761194029851e-05, "loss": 0.7793, "step": 226 }, { "epoch": 0.06353204589980409, "grad_norm": 0.4468144859741723, "learning_rate": 4.216417910447761e-05, "loss": 0.7242, "step": 227 }, { "epoch": 0.06381192275398824, "grad_norm": 0.4370998673209247, "learning_rate": 4.235074626865671e-05, "loss": 0.7179, "step": 228 }, { "epoch": 0.0640917996081724, "grad_norm": 0.44335959563588795, "learning_rate": 4.253731343283582e-05, "loss": 0.7376, "step": 229 }, { "epoch": 0.06437167646235656, "grad_norm": 0.4273577942277025, "learning_rate": 4.272388059701493e-05, "loss": 0.7578, "step": 230 }, { "epoch": 0.06465155331654072, "grad_norm": 0.40467771476605674, "learning_rate": 4.2910447761194036e-05, "loss": 0.7661, "step": 231 }, { "epoch": 0.06493143017072488, "grad_norm": 0.45246247401021156, "learning_rate": 4.3097014925373137e-05, "loss": 0.7611, "step": 232 }, { "epoch": 0.06521130702490904, "grad_norm": 0.45481303561804026, "learning_rate": 4.328358208955224e-05, "loss": 0.7308, "step": 233 }, { "epoch": 0.0654911838790932, "grad_norm": 0.42823127005374406, "learning_rate": 4.3470149253731345e-05, "loss": 0.7122, "step": 234 }, { "epoch": 0.06577106073327736, "grad_norm": 0.42606389284139023, "learning_rate": 4.3656716417910446e-05, "loss": 0.7253, "step": 235 }, { "epoch": 0.06605093758746151, "grad_norm": 0.40394923201625044, "learning_rate": 4.384328358208955e-05, "loss": 0.7386, "step": 236 }, { "epoch": 0.06633081444164568, "grad_norm": 0.4219423013130042, "learning_rate": 4.402985074626866e-05, "loss": 0.7524, "step": 237 }, { "epoch": 0.06661069129582983, "grad_norm": 0.46896382361198863, "learning_rate": 4.421641791044777e-05, "loss": 0.7176, "step": 238 }, { "epoch": 0.066890568150014, "grad_norm": 0.4467940315104462, "learning_rate": 4.440298507462687e-05, "loss": 0.7209, "step": 239 }, { "epoch": 0.06717044500419815, "grad_norm": 0.42497709063428196, "learning_rate": 4.458955223880597e-05, "loss": 0.6983, "step": 240 }, { "epoch": 0.06745032185838232, "grad_norm": 0.44655477362737434, "learning_rate": 4.477611940298508e-05, "loss": 0.7065, "step": 241 }, { "epoch": 0.06773019871256647, "grad_norm": 0.4677011150337956, "learning_rate": 4.496268656716418e-05, "loss": 0.722, "step": 242 }, { "epoch": 0.06801007556675064, "grad_norm": 0.4226232080324054, "learning_rate": 4.5149253731343286e-05, "loss": 0.7436, "step": 243 }, { "epoch": 0.06828995242093479, "grad_norm": 0.48006747711595266, "learning_rate": 4.5335820895522394e-05, "loss": 0.7589, "step": 244 }, { "epoch": 0.06856982927511895, "grad_norm": 0.43447772456651546, "learning_rate": 4.5522388059701495e-05, "loss": 0.7341, "step": 245 }, { "epoch": 0.0688497061293031, "grad_norm": 0.4672355481768206, "learning_rate": 4.57089552238806e-05, "loss": 0.7264, "step": 246 }, { "epoch": 0.06912958298348726, "grad_norm": 0.44525158274162946, "learning_rate": 4.58955223880597e-05, "loss": 0.7511, "step": 247 }, { "epoch": 0.06940945983767143, "grad_norm": 0.4213977009056596, "learning_rate": 4.608208955223881e-05, "loss": 0.7391, "step": 248 }, { "epoch": 0.06968933669185558, "grad_norm": 0.4297280843902765, "learning_rate": 4.626865671641791e-05, "loss": 0.7304, "step": 249 }, { "epoch": 0.06996921354603974, "grad_norm": 0.49130561822426916, "learning_rate": 4.645522388059701e-05, "loss": 0.7591, "step": 250 }, { "epoch": 0.0702490904002239, "grad_norm": 0.4588404235287188, "learning_rate": 4.664179104477612e-05, "loss": 0.7224, "step": 251 }, { "epoch": 0.07052896725440806, "grad_norm": 0.4170892489330508, "learning_rate": 4.682835820895523e-05, "loss": 0.7187, "step": 252 }, { "epoch": 0.07080884410859221, "grad_norm": 0.48174211630070884, "learning_rate": 4.7014925373134335e-05, "loss": 0.7211, "step": 253 }, { "epoch": 0.07108872096277638, "grad_norm": 0.44419470582928033, "learning_rate": 4.7201492537313436e-05, "loss": 0.7363, "step": 254 }, { "epoch": 0.07136859781696053, "grad_norm": 0.4661471299819716, "learning_rate": 4.738805970149254e-05, "loss": 0.7318, "step": 255 }, { "epoch": 0.0716484746711447, "grad_norm": 0.44815383924055374, "learning_rate": 4.7574626865671644e-05, "loss": 0.7028, "step": 256 }, { "epoch": 0.07192835152532885, "grad_norm": 0.4347681423244314, "learning_rate": 4.7761194029850745e-05, "loss": 0.738, "step": 257 }, { "epoch": 0.07220822837951302, "grad_norm": 0.42103800902759775, "learning_rate": 4.794776119402985e-05, "loss": 0.7197, "step": 258 }, { "epoch": 0.07248810523369717, "grad_norm": 0.43609163124254846, "learning_rate": 4.813432835820896e-05, "loss": 0.7097, "step": 259 }, { "epoch": 0.07276798208788134, "grad_norm": 0.4513244060976077, "learning_rate": 4.832089552238806e-05, "loss": 0.6991, "step": 260 }, { "epoch": 0.07304785894206549, "grad_norm": 0.46046386444527604, "learning_rate": 4.850746268656717e-05, "loss": 0.7256, "step": 261 }, { "epoch": 0.07332773579624965, "grad_norm": 0.4561647252356896, "learning_rate": 4.869402985074627e-05, "loss": 0.7315, "step": 262 }, { "epoch": 0.0736076126504338, "grad_norm": 0.4884637427080716, "learning_rate": 4.888059701492538e-05, "loss": 0.7403, "step": 263 }, { "epoch": 0.07388748950461797, "grad_norm": 0.7652121557299206, "learning_rate": 4.906716417910448e-05, "loss": 0.7185, "step": 264 }, { "epoch": 0.07416736635880213, "grad_norm": 0.45244064682449026, "learning_rate": 4.9253731343283586e-05, "loss": 0.7288, "step": 265 }, { "epoch": 0.07444724321298629, "grad_norm": 0.45889296478066427, "learning_rate": 4.944029850746269e-05, "loss": 0.7176, "step": 266 }, { "epoch": 0.07472712006717044, "grad_norm": 0.45826546985668176, "learning_rate": 4.9626865671641794e-05, "loss": 0.7159, "step": 267 }, { "epoch": 0.07500699692135461, "grad_norm": 0.46389858384982025, "learning_rate": 4.98134328358209e-05, "loss": 0.7046, "step": 268 }, { "epoch": 0.07528687377553876, "grad_norm": 0.4528735717410825, "learning_rate": 5e-05, "loss": 0.7375, "step": 269 }, { "epoch": 0.07556675062972293, "grad_norm": 0.43357562424446217, "learning_rate": 5.018656716417911e-05, "loss": 0.7291, "step": 270 }, { "epoch": 0.07584662748390708, "grad_norm": 0.47087363708784613, "learning_rate": 5.0373134328358204e-05, "loss": 0.7161, "step": 271 }, { "epoch": 0.07612650433809123, "grad_norm": 0.5002887404008286, "learning_rate": 5.055970149253731e-05, "loss": 0.7027, "step": 272 }, { "epoch": 0.0764063811922754, "grad_norm": 0.44100264957876795, "learning_rate": 5.074626865671642e-05, "loss": 0.7028, "step": 273 }, { "epoch": 0.07668625804645955, "grad_norm": 0.45791609628530144, "learning_rate": 5.0932835820895534e-05, "loss": 0.7079, "step": 274 }, { "epoch": 0.07696613490064372, "grad_norm": 0.42461907033295915, "learning_rate": 5.111940298507463e-05, "loss": 0.7408, "step": 275 }, { "epoch": 0.07724601175482787, "grad_norm": 0.474800866810756, "learning_rate": 5.1305970149253735e-05, "loss": 0.7328, "step": 276 }, { "epoch": 0.07752588860901204, "grad_norm": 0.4349633463201631, "learning_rate": 5.149253731343284e-05, "loss": 0.6997, "step": 277 }, { "epoch": 0.07780576546319619, "grad_norm": 0.4301040535810014, "learning_rate": 5.167910447761194e-05, "loss": 0.7072, "step": 278 }, { "epoch": 0.07808564231738035, "grad_norm": 0.450034297489181, "learning_rate": 5.1865671641791044e-05, "loss": 0.7213, "step": 279 }, { "epoch": 0.07836551917156451, "grad_norm": 0.44624329150212216, "learning_rate": 5.205223880597015e-05, "loss": 0.7426, "step": 280 }, { "epoch": 0.07864539602574867, "grad_norm": 0.4343366431937922, "learning_rate": 5.223880597014925e-05, "loss": 0.6915, "step": 281 }, { "epoch": 0.07892527287993283, "grad_norm": 0.45204300206030235, "learning_rate": 5.242537313432836e-05, "loss": 0.7232, "step": 282 }, { "epoch": 0.07920514973411699, "grad_norm": 0.4548205588152112, "learning_rate": 5.261194029850747e-05, "loss": 0.7138, "step": 283 }, { "epoch": 0.07948502658830114, "grad_norm": 0.45435492373302583, "learning_rate": 5.2798507462686576e-05, "loss": 0.6934, "step": 284 }, { "epoch": 0.07976490344248531, "grad_norm": 0.4656914294998773, "learning_rate": 5.298507462686567e-05, "loss": 0.7082, "step": 285 }, { "epoch": 0.08004478029666946, "grad_norm": 0.47456640793620797, "learning_rate": 5.317164179104478e-05, "loss": 0.7129, "step": 286 }, { "epoch": 0.08032465715085363, "grad_norm": 0.4318842240940535, "learning_rate": 5.3358208955223885e-05, "loss": 0.6785, "step": 287 }, { "epoch": 0.08060453400503778, "grad_norm": 0.44584560135570095, "learning_rate": 5.3544776119402986e-05, "loss": 0.7086, "step": 288 }, { "epoch": 0.08088441085922195, "grad_norm": 0.47857178698148334, "learning_rate": 5.373134328358209e-05, "loss": 0.7083, "step": 289 }, { "epoch": 0.0811642877134061, "grad_norm": 0.4625177089964261, "learning_rate": 5.39179104477612e-05, "loss": 0.7112, "step": 290 }, { "epoch": 0.08144416456759027, "grad_norm": 0.4313745318502042, "learning_rate": 5.4104477611940295e-05, "loss": 0.712, "step": 291 }, { "epoch": 0.08172404142177442, "grad_norm": 0.47216163058190774, "learning_rate": 5.42910447761194e-05, "loss": 0.7034, "step": 292 }, { "epoch": 0.08200391827595858, "grad_norm": 0.47780319900261636, "learning_rate": 5.447761194029851e-05, "loss": 0.6932, "step": 293 }, { "epoch": 0.08228379513014274, "grad_norm": 0.4137823178775821, "learning_rate": 5.466417910447762e-05, "loss": 0.7149, "step": 294 }, { "epoch": 0.0825636719843269, "grad_norm": 0.437078138593924, "learning_rate": 5.485074626865672e-05, "loss": 0.7171, "step": 295 }, { "epoch": 0.08284354883851106, "grad_norm": 0.4238041082775488, "learning_rate": 5.5037313432835826e-05, "loss": 0.7202, "step": 296 }, { "epoch": 0.08312342569269521, "grad_norm": 0.4794628101196632, "learning_rate": 5.5223880597014934e-05, "loss": 0.6946, "step": 297 }, { "epoch": 0.08340330254687937, "grad_norm": 0.4423732648514037, "learning_rate": 5.541044776119403e-05, "loss": 0.7228, "step": 298 }, { "epoch": 0.08368317940106353, "grad_norm": 0.44145759084944025, "learning_rate": 5.5597014925373135e-05, "loss": 0.7087, "step": 299 }, { "epoch": 0.08396305625524769, "grad_norm": 0.40934388363608293, "learning_rate": 5.578358208955224e-05, "loss": 0.6979, "step": 300 }, { "epoch": 0.08424293310943184, "grad_norm": 0.4234180639188906, "learning_rate": 5.5970149253731344e-05, "loss": 0.7229, "step": 301 }, { "epoch": 0.08452280996361601, "grad_norm": 0.4472825373799119, "learning_rate": 5.615671641791045e-05, "loss": 0.7135, "step": 302 }, { "epoch": 0.08480268681780016, "grad_norm": 0.45272708650047383, "learning_rate": 5.634328358208956e-05, "loss": 0.6826, "step": 303 }, { "epoch": 0.08508256367198433, "grad_norm": 0.4494694600775036, "learning_rate": 5.652985074626867e-05, "loss": 0.7313, "step": 304 }, { "epoch": 0.08536244052616848, "grad_norm": 0.7339141540328916, "learning_rate": 5.671641791044776e-05, "loss": 0.6984, "step": 305 }, { "epoch": 0.08564231738035265, "grad_norm": 0.45248907297003077, "learning_rate": 5.690298507462687e-05, "loss": 0.7477, "step": 306 }, { "epoch": 0.0859221942345368, "grad_norm": 0.4482803119122466, "learning_rate": 5.7089552238805976e-05, "loss": 0.7186, "step": 307 }, { "epoch": 0.08620207108872097, "grad_norm": 0.4355901897442642, "learning_rate": 5.727611940298508e-05, "loss": 0.7257, "step": 308 }, { "epoch": 0.08648194794290512, "grad_norm": 0.4161768534777681, "learning_rate": 5.7462686567164184e-05, "loss": 0.7166, "step": 309 }, { "epoch": 0.08676182479708928, "grad_norm": 0.5497629735001676, "learning_rate": 5.764925373134329e-05, "loss": 0.7034, "step": 310 }, { "epoch": 0.08704170165127344, "grad_norm": 0.43493036515863065, "learning_rate": 5.7835820895522386e-05, "loss": 0.7041, "step": 311 }, { "epoch": 0.0873215785054576, "grad_norm": 0.4087448488869078, "learning_rate": 5.8022388059701494e-05, "loss": 0.7112, "step": 312 }, { "epoch": 0.08760145535964176, "grad_norm": 0.44234991171225274, "learning_rate": 5.82089552238806e-05, "loss": 0.7053, "step": 313 }, { "epoch": 0.08788133221382592, "grad_norm": 0.5291674247547381, "learning_rate": 5.839552238805971e-05, "loss": 0.7405, "step": 314 }, { "epoch": 0.08816120906801007, "grad_norm": 0.4607497702179738, "learning_rate": 5.85820895522388e-05, "loss": 0.7009, "step": 315 }, { "epoch": 0.08844108592219424, "grad_norm": 0.4023643317276832, "learning_rate": 5.876865671641791e-05, "loss": 0.6958, "step": 316 }, { "epoch": 0.08872096277637839, "grad_norm": 0.4536954164659193, "learning_rate": 5.8955223880597025e-05, "loss": 0.7297, "step": 317 }, { "epoch": 0.08900083963056256, "grad_norm": 0.4161966261264851, "learning_rate": 5.914179104477612e-05, "loss": 0.6986, "step": 318 }, { "epoch": 0.08928071648474671, "grad_norm": 0.4289977366688839, "learning_rate": 5.9328358208955226e-05, "loss": 0.6719, "step": 319 }, { "epoch": 0.08956059333893088, "grad_norm": 0.4200723677019632, "learning_rate": 5.9514925373134334e-05, "loss": 0.7129, "step": 320 }, { "epoch": 0.08984047019311503, "grad_norm": 0.42635737483866826, "learning_rate": 5.970149253731343e-05, "loss": 0.723, "step": 321 }, { "epoch": 0.09012034704729918, "grad_norm": 0.4293620901269125, "learning_rate": 5.9888059701492536e-05, "loss": 0.6997, "step": 322 }, { "epoch": 0.09040022390148335, "grad_norm": 0.4050924506501903, "learning_rate": 6.007462686567164e-05, "loss": 0.7089, "step": 323 }, { "epoch": 0.0906801007556675, "grad_norm": 0.43906366645519723, "learning_rate": 6.026119402985075e-05, "loss": 0.7261, "step": 324 }, { "epoch": 0.09095997760985167, "grad_norm": 0.4486632214065769, "learning_rate": 6.044776119402985e-05, "loss": 0.7127, "step": 325 }, { "epoch": 0.09123985446403582, "grad_norm": 0.42554371690002163, "learning_rate": 6.063432835820896e-05, "loss": 0.723, "step": 326 }, { "epoch": 0.09151973131821999, "grad_norm": 0.4208462259883393, "learning_rate": 6.082089552238807e-05, "loss": 0.7123, "step": 327 }, { "epoch": 0.09179960817240414, "grad_norm": 0.4649806874138297, "learning_rate": 6.100746268656716e-05, "loss": 0.7142, "step": 328 }, { "epoch": 0.0920794850265883, "grad_norm": 0.43028372538351295, "learning_rate": 6.119402985074628e-05, "loss": 0.689, "step": 329 }, { "epoch": 0.09235936188077246, "grad_norm": 0.4558325684400419, "learning_rate": 6.138059701492538e-05, "loss": 0.6912, "step": 330 }, { "epoch": 0.09263923873495662, "grad_norm": 0.4381458483310585, "learning_rate": 6.156716417910448e-05, "loss": 0.6814, "step": 331 }, { "epoch": 0.09291911558914077, "grad_norm": 0.406646618290278, "learning_rate": 6.175373134328358e-05, "loss": 0.6712, "step": 332 }, { "epoch": 0.09319899244332494, "grad_norm": 0.4980160165116508, "learning_rate": 6.194029850746269e-05, "loss": 0.7163, "step": 333 }, { "epoch": 0.09347886929750909, "grad_norm": 0.4574980195335612, "learning_rate": 6.21268656716418e-05, "loss": 0.6838, "step": 334 }, { "epoch": 0.09375874615169326, "grad_norm": 0.44414868273725466, "learning_rate": 6.23134328358209e-05, "loss": 0.682, "step": 335 }, { "epoch": 0.09403862300587741, "grad_norm": 0.4248528405896676, "learning_rate": 6.25e-05, "loss": 0.6904, "step": 336 }, { "epoch": 0.09431849986006158, "grad_norm": 0.4919839023335616, "learning_rate": 6.268656716417911e-05, "loss": 0.6897, "step": 337 }, { "epoch": 0.09459837671424573, "grad_norm": 1.3161816435263172, "learning_rate": 6.28731343283582e-05, "loss": 0.6755, "step": 338 }, { "epoch": 0.0948782535684299, "grad_norm": 0.513774926789864, "learning_rate": 6.305970149253731e-05, "loss": 0.6984, "step": 339 }, { "epoch": 0.09515813042261405, "grad_norm": 0.47769900312879693, "learning_rate": 6.324626865671642e-05, "loss": 0.6736, "step": 340 }, { "epoch": 0.09543800727679821, "grad_norm": 0.5872335086531077, "learning_rate": 6.343283582089553e-05, "loss": 0.6924, "step": 341 }, { "epoch": 0.09571788413098237, "grad_norm": 0.6685685565941328, "learning_rate": 6.361940298507463e-05, "loss": 0.7139, "step": 342 }, { "epoch": 0.09599776098516653, "grad_norm": 0.644943432147974, "learning_rate": 6.380597014925374e-05, "loss": 0.6985, "step": 343 }, { "epoch": 0.09627763783935069, "grad_norm": 0.8158341804464563, "learning_rate": 6.399253731343285e-05, "loss": 0.6909, "step": 344 }, { "epoch": 0.09655751469353484, "grad_norm": 0.442615963515534, "learning_rate": 6.417910447761194e-05, "loss": 0.6766, "step": 345 }, { "epoch": 0.096837391547719, "grad_norm": 0.4410297475354073, "learning_rate": 6.436567164179105e-05, "loss": 0.6973, "step": 346 }, { "epoch": 0.09711726840190316, "grad_norm": 0.7799315559914135, "learning_rate": 6.455223880597016e-05, "loss": 0.6911, "step": 347 }, { "epoch": 0.09739714525608732, "grad_norm": 0.8056737357080603, "learning_rate": 6.473880597014925e-05, "loss": 0.6887, "step": 348 }, { "epoch": 0.09767702211027147, "grad_norm": 4.298578143710853, "learning_rate": 6.492537313432836e-05, "loss": 0.6869, "step": 349 }, { "epoch": 0.09795689896445564, "grad_norm": 0.8435981133020648, "learning_rate": 6.511194029850747e-05, "loss": 0.723, "step": 350 }, { "epoch": 0.0982367758186398, "grad_norm": 0.7371117969719428, "learning_rate": 6.529850746268657e-05, "loss": 0.6868, "step": 351 }, { "epoch": 0.09851665267282396, "grad_norm": 0.6164397349080035, "learning_rate": 6.548507462686567e-05, "loss": 0.6882, "step": 352 }, { "epoch": 0.09879652952700811, "grad_norm": 0.5317239294299322, "learning_rate": 6.567164179104478e-05, "loss": 0.6873, "step": 353 }, { "epoch": 0.09907640638119228, "grad_norm": 0.8761702871109263, "learning_rate": 6.585820895522388e-05, "loss": 0.6833, "step": 354 }, { "epoch": 0.09935628323537643, "grad_norm": 0.4582754633466804, "learning_rate": 6.604477611940298e-05, "loss": 0.7004, "step": 355 }, { "epoch": 0.0996361600895606, "grad_norm": 0.447018180737809, "learning_rate": 6.62313432835821e-05, "loss": 0.7102, "step": 356 }, { "epoch": 0.09991603694374475, "grad_norm": 0.6708485391162204, "learning_rate": 6.64179104477612e-05, "loss": 0.7077, "step": 357 }, { "epoch": 0.10019591379792891, "grad_norm": 0.7193088718027209, "learning_rate": 6.66044776119403e-05, "loss": 0.7, "step": 358 }, { "epoch": 0.10047579065211307, "grad_norm": 0.5130518893188671, "learning_rate": 6.679104477611941e-05, "loss": 0.7128, "step": 359 }, { "epoch": 0.10075566750629723, "grad_norm": 0.4730820378689476, "learning_rate": 6.697761194029852e-05, "loss": 0.668, "step": 360 }, { "epoch": 0.10103554436048139, "grad_norm": 0.5984318381359485, "learning_rate": 6.716417910447762e-05, "loss": 0.7275, "step": 361 }, { "epoch": 0.10131542121466555, "grad_norm": 1.3919482365808233, "learning_rate": 6.735074626865672e-05, "loss": 0.6905, "step": 362 }, { "epoch": 0.1015952980688497, "grad_norm": 0.6069392215504623, "learning_rate": 6.753731343283583e-05, "loss": 0.6927, "step": 363 }, { "epoch": 0.10187517492303387, "grad_norm": 0.49002026414282235, "learning_rate": 6.772388059701493e-05, "loss": 0.7032, "step": 364 }, { "epoch": 0.10215505177721802, "grad_norm": 0.45719536473500955, "learning_rate": 6.791044776119403e-05, "loss": 0.6808, "step": 365 }, { "epoch": 0.10243492863140219, "grad_norm": 0.48445489023836774, "learning_rate": 6.809701492537313e-05, "loss": 0.6811, "step": 366 }, { "epoch": 0.10271480548558634, "grad_norm": 0.466557292066106, "learning_rate": 6.828358208955224e-05, "loss": 0.6586, "step": 367 }, { "epoch": 0.10299468233977051, "grad_norm": 0.4717880558758552, "learning_rate": 6.847014925373134e-05, "loss": 0.672, "step": 368 }, { "epoch": 0.10327455919395466, "grad_norm": 0.4877821397302723, "learning_rate": 6.865671641791044e-05, "loss": 0.6826, "step": 369 }, { "epoch": 0.10355443604813881, "grad_norm": 0.55713406216083, "learning_rate": 6.884328358208955e-05, "loss": 0.6796, "step": 370 }, { "epoch": 0.10383431290232298, "grad_norm": 0.47581625927326593, "learning_rate": 6.902985074626866e-05, "loss": 0.6714, "step": 371 }, { "epoch": 0.10411418975650713, "grad_norm": 0.43257191406789336, "learning_rate": 6.921641791044777e-05, "loss": 0.6863, "step": 372 }, { "epoch": 0.1043940666106913, "grad_norm": 0.4437395414763435, "learning_rate": 6.940298507462687e-05, "loss": 0.6635, "step": 373 }, { "epoch": 0.10467394346487545, "grad_norm": 0.7068875446240742, "learning_rate": 6.958955223880598e-05, "loss": 0.6806, "step": 374 }, { "epoch": 0.10495382031905962, "grad_norm": 0.44911225867373566, "learning_rate": 6.977611940298508e-05, "loss": 0.6571, "step": 375 }, { "epoch": 0.10523369717324377, "grad_norm": 0.46333664540903235, "learning_rate": 6.996268656716418e-05, "loss": 0.6974, "step": 376 }, { "epoch": 0.10551357402742793, "grad_norm": 0.5054230893447995, "learning_rate": 7.014925373134329e-05, "loss": 0.6998, "step": 377 }, { "epoch": 0.10579345088161209, "grad_norm": 0.4490793398584244, "learning_rate": 7.033582089552238e-05, "loss": 0.7085, "step": 378 }, { "epoch": 0.10607332773579625, "grad_norm": 0.4290140309326081, "learning_rate": 7.052238805970149e-05, "loss": 0.7181, "step": 379 }, { "epoch": 0.1063532045899804, "grad_norm": 0.45362497264391827, "learning_rate": 7.07089552238806e-05, "loss": 0.691, "step": 380 }, { "epoch": 0.10663308144416457, "grad_norm": 0.42874926552817305, "learning_rate": 7.089552238805971e-05, "loss": 0.6493, "step": 381 }, { "epoch": 0.10691295829834872, "grad_norm": 0.47237696937944235, "learning_rate": 7.10820895522388e-05, "loss": 0.6987, "step": 382 }, { "epoch": 0.10719283515253289, "grad_norm": 0.38855172325103726, "learning_rate": 7.126865671641791e-05, "loss": 0.6977, "step": 383 }, { "epoch": 0.10747271200671704, "grad_norm": 0.44911430898712273, "learning_rate": 7.145522388059702e-05, "loss": 0.6957, "step": 384 }, { "epoch": 0.10775258886090121, "grad_norm": 0.4280200749942071, "learning_rate": 7.164179104477612e-05, "loss": 0.681, "step": 385 }, { "epoch": 0.10803246571508536, "grad_norm": 0.41670607217895367, "learning_rate": 7.182835820895523e-05, "loss": 0.6779, "step": 386 }, { "epoch": 0.10831234256926953, "grad_norm": 0.4409124040164888, "learning_rate": 7.201492537313434e-05, "loss": 0.6851, "step": 387 }, { "epoch": 0.10859221942345368, "grad_norm": 0.3960561625846798, "learning_rate": 7.220149253731343e-05, "loss": 0.7148, "step": 388 }, { "epoch": 0.10887209627763784, "grad_norm": 0.41019388499923165, "learning_rate": 7.238805970149254e-05, "loss": 0.6924, "step": 389 }, { "epoch": 0.109151973131822, "grad_norm": 0.4144681082109547, "learning_rate": 7.257462686567165e-05, "loss": 0.6862, "step": 390 }, { "epoch": 0.10943184998600616, "grad_norm": 1.6774478100169978, "learning_rate": 7.276119402985076e-05, "loss": 0.6802, "step": 391 }, { "epoch": 0.10971172684019032, "grad_norm": 0.3987937834141996, "learning_rate": 7.294776119402985e-05, "loss": 0.6487, "step": 392 }, { "epoch": 0.10999160369437448, "grad_norm": 0.4296763054463155, "learning_rate": 7.313432835820896e-05, "loss": 0.682, "step": 393 }, { "epoch": 0.11027148054855863, "grad_norm": 0.4249814109394962, "learning_rate": 7.332089552238807e-05, "loss": 0.641, "step": 394 }, { "epoch": 0.11055135740274279, "grad_norm": 0.5429804546291697, "learning_rate": 7.350746268656716e-05, "loss": 0.6468, "step": 395 }, { "epoch": 0.11083123425692695, "grad_norm": 0.4093190306279177, "learning_rate": 7.369402985074627e-05, "loss": 0.6743, "step": 396 }, { "epoch": 0.1111111111111111, "grad_norm": 0.7033065138757928, "learning_rate": 7.388059701492537e-05, "loss": 0.6988, "step": 397 }, { "epoch": 0.11139098796529527, "grad_norm": 0.5570947590054259, "learning_rate": 7.406716417910447e-05, "loss": 0.7029, "step": 398 }, { "epoch": 0.11167086481947942, "grad_norm": 0.40859828053308395, "learning_rate": 7.425373134328359e-05, "loss": 0.668, "step": 399 }, { "epoch": 0.11195074167366359, "grad_norm": 0.37920101805001655, "learning_rate": 7.44402985074627e-05, "loss": 0.6638, "step": 400 }, { "epoch": 0.11223061852784774, "grad_norm": 0.4606413966283373, "learning_rate": 7.46268656716418e-05, "loss": 0.6705, "step": 401 }, { "epoch": 0.11251049538203191, "grad_norm": 0.4117466938030516, "learning_rate": 7.48134328358209e-05, "loss": 0.6503, "step": 402 }, { "epoch": 0.11279037223621606, "grad_norm": 0.41733242064764725, "learning_rate": 7.500000000000001e-05, "loss": 0.6972, "step": 403 }, { "epoch": 0.11307024909040023, "grad_norm": 0.44048880975860893, "learning_rate": 7.518656716417911e-05, "loss": 0.6473, "step": 404 }, { "epoch": 0.11335012594458438, "grad_norm": 0.4093052594431773, "learning_rate": 7.537313432835821e-05, "loss": 0.6748, "step": 405 }, { "epoch": 0.11363000279876855, "grad_norm": 0.422883608137739, "learning_rate": 7.555970149253732e-05, "loss": 0.6962, "step": 406 }, { "epoch": 0.1139098796529527, "grad_norm": 0.4622367714089799, "learning_rate": 7.574626865671642e-05, "loss": 0.6444, "step": 407 }, { "epoch": 0.11418975650713686, "grad_norm": 0.4179728359996031, "learning_rate": 7.593283582089553e-05, "loss": 0.6751, "step": 408 }, { "epoch": 0.11446963336132102, "grad_norm": 0.4081264989680072, "learning_rate": 7.611940298507463e-05, "loss": 0.6379, "step": 409 }, { "epoch": 0.11474951021550518, "grad_norm": 0.39085981520536767, "learning_rate": 7.630597014925373e-05, "loss": 0.6688, "step": 410 }, { "epoch": 0.11502938706968933, "grad_norm": 0.43031742568193565, "learning_rate": 7.649253731343284e-05, "loss": 0.6894, "step": 411 }, { "epoch": 0.1153092639238735, "grad_norm": 0.4389085433902972, "learning_rate": 7.667910447761193e-05, "loss": 0.6433, "step": 412 }, { "epoch": 0.11558914077805765, "grad_norm": 0.42220390576147654, "learning_rate": 7.686567164179104e-05, "loss": 0.6718, "step": 413 }, { "epoch": 0.11586901763224182, "grad_norm": 0.41024362930644254, "learning_rate": 7.705223880597015e-05, "loss": 0.6668, "step": 414 }, { "epoch": 0.11614889448642597, "grad_norm": 0.41757579668923, "learning_rate": 7.723880597014926e-05, "loss": 0.6848, "step": 415 }, { "epoch": 0.11642877134061014, "grad_norm": 0.4231612312524277, "learning_rate": 7.742537313432837e-05, "loss": 0.6994, "step": 416 }, { "epoch": 0.11670864819479429, "grad_norm": 0.39796462051489095, "learning_rate": 7.761194029850747e-05, "loss": 0.6587, "step": 417 }, { "epoch": 0.11698852504897846, "grad_norm": 0.445462589657519, "learning_rate": 7.779850746268658e-05, "loss": 0.655, "step": 418 }, { "epoch": 0.11726840190316261, "grad_norm": 0.45089633882777275, "learning_rate": 7.798507462686567e-05, "loss": 0.6638, "step": 419 }, { "epoch": 0.11754827875734676, "grad_norm": 0.42457883121608503, "learning_rate": 7.817164179104478e-05, "loss": 0.6377, "step": 420 }, { "epoch": 0.11782815561153093, "grad_norm": 0.43788212692058714, "learning_rate": 7.835820895522389e-05, "loss": 0.6706, "step": 421 }, { "epoch": 0.11810803246571508, "grad_norm": 0.408158060527127, "learning_rate": 7.854477611940298e-05, "loss": 0.6743, "step": 422 }, { "epoch": 0.11838790931989925, "grad_norm": 0.40421527642634947, "learning_rate": 7.873134328358209e-05, "loss": 0.6906, "step": 423 }, { "epoch": 0.1186677861740834, "grad_norm": 0.5528953780977632, "learning_rate": 7.89179104477612e-05, "loss": 0.6959, "step": 424 }, { "epoch": 0.11894766302826756, "grad_norm": 0.403342469392748, "learning_rate": 7.910447761194029e-05, "loss": 0.6871, "step": 425 }, { "epoch": 0.11922753988245172, "grad_norm": 0.41377648915254395, "learning_rate": 7.92910447761194e-05, "loss": 0.6691, "step": 426 }, { "epoch": 0.11950741673663588, "grad_norm": 0.39062244201193275, "learning_rate": 7.947761194029851e-05, "loss": 0.6646, "step": 427 }, { "epoch": 0.11978729359082003, "grad_norm": 0.38607174947535255, "learning_rate": 7.966417910447762e-05, "loss": 0.6848, "step": 428 }, { "epoch": 0.1200671704450042, "grad_norm": 0.4270778478137736, "learning_rate": 7.985074626865672e-05, "loss": 0.6699, "step": 429 }, { "epoch": 0.12034704729918835, "grad_norm": 0.4313322992886548, "learning_rate": 8.003731343283583e-05, "loss": 0.6771, "step": 430 }, { "epoch": 0.12062692415337252, "grad_norm": 0.40835846468892506, "learning_rate": 8.022388059701494e-05, "loss": 0.667, "step": 431 }, { "epoch": 0.12090680100755667, "grad_norm": 0.42088882100934216, "learning_rate": 8.041044776119403e-05, "loss": 0.6475, "step": 432 }, { "epoch": 0.12118667786174084, "grad_norm": 0.6464518775485, "learning_rate": 8.059701492537314e-05, "loss": 0.6888, "step": 433 }, { "epoch": 0.12146655471592499, "grad_norm": 0.4125284299211265, "learning_rate": 8.078358208955225e-05, "loss": 0.6227, "step": 434 }, { "epoch": 0.12174643157010916, "grad_norm": 0.47989278107702377, "learning_rate": 8.097014925373134e-05, "loss": 0.6936, "step": 435 }, { "epoch": 0.12202630842429331, "grad_norm": 0.4261772840242414, "learning_rate": 8.115671641791045e-05, "loss": 0.676, "step": 436 }, { "epoch": 0.12230618527847748, "grad_norm": 0.4066754280472226, "learning_rate": 8.134328358208956e-05, "loss": 0.6876, "step": 437 }, { "epoch": 0.12258606213266163, "grad_norm": 0.43891426861460775, "learning_rate": 8.152985074626866e-05, "loss": 0.6616, "step": 438 }, { "epoch": 0.1228659389868458, "grad_norm": 0.3676904720946436, "learning_rate": 8.171641791044776e-05, "loss": 0.6609, "step": 439 }, { "epoch": 0.12314581584102995, "grad_norm": 0.3892126784524761, "learning_rate": 8.190298507462687e-05, "loss": 0.6846, "step": 440 }, { "epoch": 0.12342569269521411, "grad_norm": 0.38049231210876644, "learning_rate": 8.208955223880597e-05, "loss": 0.6585, "step": 441 }, { "epoch": 0.12370556954939826, "grad_norm": 0.37520534882657697, "learning_rate": 8.227611940298508e-05, "loss": 0.6606, "step": 442 }, { "epoch": 0.12398544640358243, "grad_norm": 0.39513205718163796, "learning_rate": 8.246268656716419e-05, "loss": 0.6729, "step": 443 }, { "epoch": 0.12426532325776658, "grad_norm": 0.42998721974753285, "learning_rate": 8.26492537313433e-05, "loss": 0.6704, "step": 444 }, { "epoch": 0.12454520011195074, "grad_norm": 0.408931714944995, "learning_rate": 8.283582089552239e-05, "loss": 0.6596, "step": 445 }, { "epoch": 0.1248250769661349, "grad_norm": 0.43411264954711126, "learning_rate": 8.30223880597015e-05, "loss": 0.6563, "step": 446 }, { "epoch": 0.12510495382031905, "grad_norm": 0.40133533031153473, "learning_rate": 8.32089552238806e-05, "loss": 0.6767, "step": 447 }, { "epoch": 0.1253848306745032, "grad_norm": 0.40872477830181203, "learning_rate": 8.339552238805971e-05, "loss": 0.6775, "step": 448 }, { "epoch": 0.12566470752868739, "grad_norm": 0.3925395199407519, "learning_rate": 8.358208955223881e-05, "loss": 0.6504, "step": 449 }, { "epoch": 0.12594458438287154, "grad_norm": 0.39825635877986537, "learning_rate": 8.376865671641791e-05, "loss": 0.6556, "step": 450 }, { "epoch": 0.1262244612370557, "grad_norm": 0.4107531606961135, "learning_rate": 8.395522388059702e-05, "loss": 0.6698, "step": 451 }, { "epoch": 0.12650433809123984, "grad_norm": 0.41710025849144117, "learning_rate": 8.414179104477612e-05, "loss": 0.6702, "step": 452 }, { "epoch": 0.12678421494542402, "grad_norm": 0.3833524739876693, "learning_rate": 8.432835820895522e-05, "loss": 0.6424, "step": 453 }, { "epoch": 0.12706409179960818, "grad_norm": 0.37471559013898337, "learning_rate": 8.451492537313433e-05, "loss": 0.684, "step": 454 }, { "epoch": 0.12734396865379233, "grad_norm": 0.41349519361632053, "learning_rate": 8.470149253731343e-05, "loss": 0.7108, "step": 455 }, { "epoch": 0.12762384550797648, "grad_norm": 0.38043562798558783, "learning_rate": 8.488805970149253e-05, "loss": 0.6782, "step": 456 }, { "epoch": 0.12790372236216066, "grad_norm": 0.3630384514666315, "learning_rate": 8.507462686567164e-05, "loss": 0.6387, "step": 457 }, { "epoch": 0.1281835992163448, "grad_norm": 0.39964441377131393, "learning_rate": 8.526119402985075e-05, "loss": 0.654, "step": 458 }, { "epoch": 0.12846347607052896, "grad_norm": 0.4171728266265153, "learning_rate": 8.544776119402986e-05, "loss": 0.6777, "step": 459 }, { "epoch": 0.12874335292471312, "grad_norm": 0.40790236781406547, "learning_rate": 8.563432835820896e-05, "loss": 0.6853, "step": 460 }, { "epoch": 0.1290232297788973, "grad_norm": 0.4110683142830724, "learning_rate": 8.582089552238807e-05, "loss": 0.6642, "step": 461 }, { "epoch": 0.12930310663308145, "grad_norm": 0.43782266552108556, "learning_rate": 8.600746268656717e-05, "loss": 0.6753, "step": 462 }, { "epoch": 0.1295829834872656, "grad_norm": 0.3972085783903419, "learning_rate": 8.619402985074627e-05, "loss": 0.681, "step": 463 }, { "epoch": 0.12986286034144975, "grad_norm": 0.44630647399233214, "learning_rate": 8.638059701492538e-05, "loss": 0.668, "step": 464 }, { "epoch": 0.13014273719563393, "grad_norm": 0.3731598467477508, "learning_rate": 8.656716417910447e-05, "loss": 0.638, "step": 465 }, { "epoch": 0.1304226140498181, "grad_norm": 0.6582886918597636, "learning_rate": 8.675373134328358e-05, "loss": 0.6697, "step": 466 }, { "epoch": 0.13070249090400224, "grad_norm": 0.6873071146539492, "learning_rate": 8.694029850746269e-05, "loss": 0.6664, "step": 467 }, { "epoch": 0.1309823677581864, "grad_norm": 0.3831571859149306, "learning_rate": 8.71268656716418e-05, "loss": 0.6593, "step": 468 }, { "epoch": 0.13126224461237054, "grad_norm": 0.35676734914188696, "learning_rate": 8.731343283582089e-05, "loss": 0.6584, "step": 469 }, { "epoch": 0.13154212146655472, "grad_norm": 0.37186744217198814, "learning_rate": 8.75e-05, "loss": 0.6843, "step": 470 }, { "epoch": 0.13182199832073888, "grad_norm": 0.390404389872634, "learning_rate": 8.76865671641791e-05, "loss": 0.6483, "step": 471 }, { "epoch": 0.13210187517492303, "grad_norm": 0.3738856376207189, "learning_rate": 8.787313432835821e-05, "loss": 0.6551, "step": 472 }, { "epoch": 0.13238175202910718, "grad_norm": 0.48004224555935937, "learning_rate": 8.805970149253732e-05, "loss": 0.6493, "step": 473 }, { "epoch": 0.13266162888329136, "grad_norm": 0.3736785443142544, "learning_rate": 8.824626865671643e-05, "loss": 0.6643, "step": 474 }, { "epoch": 0.1329415057374755, "grad_norm": 0.36882550460560304, "learning_rate": 8.843283582089554e-05, "loss": 0.6903, "step": 475 }, { "epoch": 0.13322138259165967, "grad_norm": 0.40272658663853805, "learning_rate": 8.861940298507463e-05, "loss": 0.6539, "step": 476 }, { "epoch": 0.13350125944584382, "grad_norm": 0.35544347325052605, "learning_rate": 8.880597014925374e-05, "loss": 0.6493, "step": 477 }, { "epoch": 0.133781136300028, "grad_norm": 0.3614647777559769, "learning_rate": 8.899253731343285e-05, "loss": 0.6763, "step": 478 }, { "epoch": 0.13406101315421215, "grad_norm": 0.36971957161130825, "learning_rate": 8.917910447761194e-05, "loss": 0.6904, "step": 479 }, { "epoch": 0.1343408900083963, "grad_norm": 0.38740279571359765, "learning_rate": 8.936567164179105e-05, "loss": 0.6316, "step": 480 }, { "epoch": 0.13462076686258045, "grad_norm": 0.3746089452612046, "learning_rate": 8.955223880597016e-05, "loss": 0.6775, "step": 481 }, { "epoch": 0.13490064371676463, "grad_norm": 0.4096573183041176, "learning_rate": 8.973880597014925e-05, "loss": 0.6331, "step": 482 }, { "epoch": 0.1351805205709488, "grad_norm": 0.39578042975326794, "learning_rate": 8.992537313432836e-05, "loss": 0.6527, "step": 483 }, { "epoch": 0.13546039742513294, "grad_norm": 0.3750647028289509, "learning_rate": 9.011194029850746e-05, "loss": 0.6282, "step": 484 }, { "epoch": 0.1357402742793171, "grad_norm": 0.3787020948866338, "learning_rate": 9.029850746268657e-05, "loss": 0.6571, "step": 485 }, { "epoch": 0.13602015113350127, "grad_norm": 0.4113950094446887, "learning_rate": 9.048507462686568e-05, "loss": 0.6331, "step": 486 }, { "epoch": 0.13630002798768542, "grad_norm": 1.0715952785604606, "learning_rate": 9.067164179104479e-05, "loss": 0.6272, "step": 487 }, { "epoch": 0.13657990484186958, "grad_norm": 0.3983710454128697, "learning_rate": 9.08582089552239e-05, "loss": 0.6616, "step": 488 }, { "epoch": 0.13685978169605373, "grad_norm": 0.4495974542727547, "learning_rate": 9.104477611940299e-05, "loss": 0.6623, "step": 489 }, { "epoch": 0.1371396585502379, "grad_norm": 0.36281685504496297, "learning_rate": 9.12313432835821e-05, "loss": 0.6636, "step": 490 }, { "epoch": 0.13741953540442206, "grad_norm": 0.353930752331264, "learning_rate": 9.14179104477612e-05, "loss": 0.6545, "step": 491 }, { "epoch": 0.1376994122586062, "grad_norm": 0.37500739942877725, "learning_rate": 9.16044776119403e-05, "loss": 0.6569, "step": 492 }, { "epoch": 0.13797928911279037, "grad_norm": 0.3820874791181939, "learning_rate": 9.17910447761194e-05, "loss": 0.6458, "step": 493 }, { "epoch": 0.13825916596697452, "grad_norm": 0.400717183706268, "learning_rate": 9.197761194029851e-05, "loss": 0.6473, "step": 494 }, { "epoch": 0.1385390428211587, "grad_norm": 0.4015144031719116, "learning_rate": 9.216417910447762e-05, "loss": 0.6177, "step": 495 }, { "epoch": 0.13881891967534285, "grad_norm": 0.7088085383546942, "learning_rate": 9.235074626865672e-05, "loss": 0.6584, "step": 496 }, { "epoch": 0.139098796529527, "grad_norm": 1.0178752109753382, "learning_rate": 9.253731343283582e-05, "loss": 0.6813, "step": 497 }, { "epoch": 0.13937867338371115, "grad_norm": 0.5102057156876113, "learning_rate": 9.272388059701493e-05, "loss": 0.6592, "step": 498 }, { "epoch": 0.13965855023789533, "grad_norm": 2.7074502386451083, "learning_rate": 9.291044776119402e-05, "loss": 0.6909, "step": 499 }, { "epoch": 0.1399384270920795, "grad_norm": 0.5944929804172767, "learning_rate": 9.309701492537313e-05, "loss": 0.6575, "step": 500 }, { "epoch": 0.14021830394626364, "grad_norm": 1.8487977049802586, "learning_rate": 9.328358208955224e-05, "loss": 0.6701, "step": 501 }, { "epoch": 0.1404981808004478, "grad_norm": 1.1173366320876923, "learning_rate": 9.347014925373135e-05, "loss": 0.6922, "step": 502 }, { "epoch": 0.14077805765463197, "grad_norm": 0.4780147783751096, "learning_rate": 9.365671641791045e-05, "loss": 0.6613, "step": 503 }, { "epoch": 0.14105793450881612, "grad_norm": 0.35430385583757923, "learning_rate": 9.384328358208956e-05, "loss": 0.675, "step": 504 }, { "epoch": 0.14133781136300028, "grad_norm": 0.48386478637752267, "learning_rate": 9.402985074626867e-05, "loss": 0.6585, "step": 505 }, { "epoch": 0.14161768821718443, "grad_norm": 0.38156271567770533, "learning_rate": 9.421641791044776e-05, "loss": 0.6599, "step": 506 }, { "epoch": 0.1418975650713686, "grad_norm": 0.3788471218505237, "learning_rate": 9.440298507462687e-05, "loss": 0.6478, "step": 507 }, { "epoch": 0.14217744192555276, "grad_norm": 0.35070799994627283, "learning_rate": 9.458955223880598e-05, "loss": 0.6221, "step": 508 }, { "epoch": 0.1424573187797369, "grad_norm": 0.3801612235838529, "learning_rate": 9.477611940298507e-05, "loss": 0.6298, "step": 509 }, { "epoch": 0.14273719563392107, "grad_norm": 0.375699707803378, "learning_rate": 9.496268656716418e-05, "loss": 0.6601, "step": 510 }, { "epoch": 0.14301707248810525, "grad_norm": 0.3453134958214971, "learning_rate": 9.514925373134329e-05, "loss": 0.6312, "step": 511 }, { "epoch": 0.1432969493422894, "grad_norm": 0.5285573505383262, "learning_rate": 9.533582089552238e-05, "loss": 0.648, "step": 512 }, { "epoch": 0.14357682619647355, "grad_norm": 0.36405062708691716, "learning_rate": 9.552238805970149e-05, "loss": 0.648, "step": 513 }, { "epoch": 0.1438567030506577, "grad_norm": 0.3693638605210676, "learning_rate": 9.57089552238806e-05, "loss": 0.6903, "step": 514 }, { "epoch": 0.14413657990484188, "grad_norm": 0.40528808147040785, "learning_rate": 9.58955223880597e-05, "loss": 0.6474, "step": 515 }, { "epoch": 0.14441645675902604, "grad_norm": 0.4271064032840139, "learning_rate": 9.608208955223881e-05, "loss": 0.6617, "step": 516 }, { "epoch": 0.1446963336132102, "grad_norm": 0.3704408892050665, "learning_rate": 9.626865671641792e-05, "loss": 0.6527, "step": 517 }, { "epoch": 0.14497621046739434, "grad_norm": 0.36416815625544374, "learning_rate": 9.645522388059703e-05, "loss": 0.6775, "step": 518 }, { "epoch": 0.1452560873215785, "grad_norm": 0.37214211322290497, "learning_rate": 9.664179104477612e-05, "loss": 0.6693, "step": 519 }, { "epoch": 0.14553596417576267, "grad_norm": 0.34813096434431123, "learning_rate": 9.682835820895523e-05, "loss": 0.6438, "step": 520 }, { "epoch": 0.14581584102994682, "grad_norm": 0.37853947367790136, "learning_rate": 9.701492537313434e-05, "loss": 0.6482, "step": 521 }, { "epoch": 0.14609571788413098, "grad_norm": 0.36937575889952384, "learning_rate": 9.720149253731343e-05, "loss": 0.672, "step": 522 }, { "epoch": 0.14637559473831513, "grad_norm": 0.3584928313519803, "learning_rate": 9.738805970149254e-05, "loss": 0.6744, "step": 523 }, { "epoch": 0.1466554715924993, "grad_norm": 0.34468056884846726, "learning_rate": 9.757462686567165e-05, "loss": 0.6451, "step": 524 }, { "epoch": 0.14693534844668346, "grad_norm": 0.3467550680805756, "learning_rate": 9.776119402985075e-05, "loss": 0.6327, "step": 525 }, { "epoch": 0.1472152253008676, "grad_norm": 0.393307955817238, "learning_rate": 9.794776119402985e-05, "loss": 0.6529, "step": 526 }, { "epoch": 0.14749510215505177, "grad_norm": 0.3706696868767734, "learning_rate": 9.813432835820896e-05, "loss": 0.6944, "step": 527 }, { "epoch": 0.14777497900923595, "grad_norm": 0.35440153514955575, "learning_rate": 9.832089552238806e-05, "loss": 0.6627, "step": 528 }, { "epoch": 0.1480548558634201, "grad_norm": 0.35011677077474185, "learning_rate": 9.850746268656717e-05, "loss": 0.6446, "step": 529 }, { "epoch": 0.14833473271760425, "grad_norm": 0.353375329903762, "learning_rate": 9.869402985074628e-05, "loss": 0.6804, "step": 530 }, { "epoch": 0.1486146095717884, "grad_norm": 0.35084665576786195, "learning_rate": 9.888059701492539e-05, "loss": 0.662, "step": 531 }, { "epoch": 0.14889448642597258, "grad_norm": 0.3447708979608197, "learning_rate": 9.906716417910448e-05, "loss": 0.6669, "step": 532 }, { "epoch": 0.14917436328015674, "grad_norm": 0.35527684031521845, "learning_rate": 9.925373134328359e-05, "loss": 0.6684, "step": 533 }, { "epoch": 0.1494542401343409, "grad_norm": 0.3610149287886694, "learning_rate": 9.94402985074627e-05, "loss": 0.6629, "step": 534 }, { "epoch": 0.14973411698852504, "grad_norm": 0.32069706799707914, "learning_rate": 9.96268656716418e-05, "loss": 0.6494, "step": 535 }, { "epoch": 0.15001399384270922, "grad_norm": 0.3528379810923722, "learning_rate": 9.98134328358209e-05, "loss": 0.6755, "step": 536 }, { "epoch": 0.15029387069689337, "grad_norm": 0.352761540563393, "learning_rate": 0.0001, "loss": 0.6744, "step": 537 }, { "epoch": 0.15057374755107752, "grad_norm": 0.3192361239700303, "learning_rate": 9.999999762048602e-05, "loss": 0.6382, "step": 538 }, { "epoch": 0.15085362440526168, "grad_norm": 0.35496003453929414, "learning_rate": 9.999999048194425e-05, "loss": 0.6568, "step": 539 }, { "epoch": 0.15113350125944586, "grad_norm": 0.35732688372914906, "learning_rate": 9.999997858437541e-05, "loss": 0.6835, "step": 540 }, { "epoch": 0.15141337811363, "grad_norm": 0.3459422957498091, "learning_rate": 9.999996192778065e-05, "loss": 0.671, "step": 541 }, { "epoch": 0.15169325496781416, "grad_norm": 0.3629900969756224, "learning_rate": 9.999994051216151e-05, "loss": 0.6527, "step": 542 }, { "epoch": 0.15197313182199831, "grad_norm": 0.3359649863697183, "learning_rate": 9.999991433752003e-05, "loss": 0.6276, "step": 543 }, { "epoch": 0.15225300867618247, "grad_norm": 0.33179698726550116, "learning_rate": 9.999988340385873e-05, "loss": 0.6717, "step": 544 }, { "epoch": 0.15253288553036665, "grad_norm": 0.3387592133630175, "learning_rate": 9.999984771118054e-05, "loss": 0.6677, "step": 545 }, { "epoch": 0.1528127623845508, "grad_norm": 0.3949036193320457, "learning_rate": 9.999980725948886e-05, "loss": 0.6305, "step": 546 }, { "epoch": 0.15309263923873495, "grad_norm": 0.3296071177268487, "learning_rate": 9.999976204878753e-05, "loss": 0.6535, "step": 547 }, { "epoch": 0.1533725160929191, "grad_norm": 0.3035639208800312, "learning_rate": 9.999971207908087e-05, "loss": 0.6459, "step": 548 }, { "epoch": 0.15365239294710328, "grad_norm": 0.34793917616897585, "learning_rate": 9.999965735037364e-05, "loss": 0.6555, "step": 549 }, { "epoch": 0.15393226980128744, "grad_norm": 0.33941641898613595, "learning_rate": 9.999959786267103e-05, "loss": 0.6534, "step": 550 }, { "epoch": 0.1542121466554716, "grad_norm": 0.35110231378849793, "learning_rate": 9.99995336159787e-05, "loss": 0.6612, "step": 551 }, { "epoch": 0.15449202350965574, "grad_norm": 0.3272549474936474, "learning_rate": 9.999946461030279e-05, "loss": 0.6331, "step": 552 }, { "epoch": 0.15477190036383992, "grad_norm": 0.3343686163686602, "learning_rate": 9.999939084564985e-05, "loss": 0.6498, "step": 553 }, { "epoch": 0.15505177721802407, "grad_norm": 0.3449979088086382, "learning_rate": 9.999931232202689e-05, "loss": 0.6539, "step": 554 }, { "epoch": 0.15533165407220823, "grad_norm": 0.3447312307894874, "learning_rate": 9.999922903944139e-05, "loss": 0.6201, "step": 555 }, { "epoch": 0.15561153092639238, "grad_norm": 0.31637501434858883, "learning_rate": 9.99991409979013e-05, "loss": 0.6073, "step": 556 }, { "epoch": 0.15589140778057656, "grad_norm": 0.3316495181989054, "learning_rate": 9.999904819741499e-05, "loss": 0.6347, "step": 557 }, { "epoch": 0.1561712846347607, "grad_norm": 0.3407282822121997, "learning_rate": 9.999895063799127e-05, "loss": 0.6241, "step": 558 }, { "epoch": 0.15645116148894486, "grad_norm": 0.3418343751138157, "learning_rate": 9.999884831963946e-05, "loss": 0.6738, "step": 559 }, { "epoch": 0.15673103834312901, "grad_norm": 0.35359280098762447, "learning_rate": 9.999874124236927e-05, "loss": 0.6493, "step": 560 }, { "epoch": 0.1570109151973132, "grad_norm": 0.3444280324334195, "learning_rate": 9.99986294061909e-05, "loss": 0.639, "step": 561 }, { "epoch": 0.15729079205149735, "grad_norm": 0.33392447648348894, "learning_rate": 9.999851281111501e-05, "loss": 0.6583, "step": 562 }, { "epoch": 0.1575706689056815, "grad_norm": 0.339319192915043, "learning_rate": 9.999839145715269e-05, "loss": 0.6578, "step": 563 }, { "epoch": 0.15785054575986565, "grad_norm": 0.33436449611237556, "learning_rate": 9.999826534431546e-05, "loss": 0.6577, "step": 564 }, { "epoch": 0.15813042261404983, "grad_norm": 0.31870643898655826, "learning_rate": 9.999813447261536e-05, "loss": 0.6507, "step": 565 }, { "epoch": 0.15841029946823398, "grad_norm": 0.31539297776414027, "learning_rate": 9.999799884206484e-05, "loss": 0.625, "step": 566 }, { "epoch": 0.15869017632241814, "grad_norm": 0.33819593568049605, "learning_rate": 9.999785845267681e-05, "loss": 0.6573, "step": 567 }, { "epoch": 0.1589700531766023, "grad_norm": 0.32485623049274076, "learning_rate": 9.999771330446462e-05, "loss": 0.6514, "step": 568 }, { "epoch": 0.15924993003078644, "grad_norm": 0.3308253870901051, "learning_rate": 9.99975633974421e-05, "loss": 0.644, "step": 569 }, { "epoch": 0.15952980688497062, "grad_norm": 0.3262387922383822, "learning_rate": 9.99974087316235e-05, "loss": 0.6539, "step": 570 }, { "epoch": 0.15980968373915477, "grad_norm": 0.3249032849983034, "learning_rate": 9.999724930702356e-05, "loss": 0.641, "step": 571 }, { "epoch": 0.16008956059333893, "grad_norm": 0.3546281299107742, "learning_rate": 9.999708512365744e-05, "loss": 0.6301, "step": 572 }, { "epoch": 0.16036943744752308, "grad_norm": 0.349394735113186, "learning_rate": 9.999691618154077e-05, "loss": 0.6697, "step": 573 }, { "epoch": 0.16064931430170726, "grad_norm": 0.3388422869594151, "learning_rate": 9.999674248068964e-05, "loss": 0.6493, "step": 574 }, { "epoch": 0.1609291911558914, "grad_norm": 0.3366125852937771, "learning_rate": 9.999656402112059e-05, "loss": 0.6218, "step": 575 }, { "epoch": 0.16120906801007556, "grad_norm": 0.33380398174771947, "learning_rate": 9.999638080285058e-05, "loss": 0.6166, "step": 576 }, { "epoch": 0.16148894486425971, "grad_norm": 0.36646577585598783, "learning_rate": 9.999619282589705e-05, "loss": 0.6541, "step": 577 }, { "epoch": 0.1617688217184439, "grad_norm": 0.3370800941263289, "learning_rate": 9.999600009027792e-05, "loss": 0.6578, "step": 578 }, { "epoch": 0.16204869857262805, "grad_norm": 0.31956453412198904, "learning_rate": 9.999580259601151e-05, "loss": 0.6209, "step": 579 }, { "epoch": 0.1623285754268122, "grad_norm": 0.3361863837783066, "learning_rate": 9.999560034311663e-05, "loss": 0.6306, "step": 580 }, { "epoch": 0.16260845228099635, "grad_norm": 0.3276060075852739, "learning_rate": 9.999539333161251e-05, "loss": 0.6532, "step": 581 }, { "epoch": 0.16288832913518053, "grad_norm": 0.30105154207683, "learning_rate": 9.999518156151888e-05, "loss": 0.6619, "step": 582 }, { "epoch": 0.16316820598936468, "grad_norm": 0.3222246686403399, "learning_rate": 9.999496503285589e-05, "loss": 0.6415, "step": 583 }, { "epoch": 0.16344808284354884, "grad_norm": 0.3478766083008775, "learning_rate": 9.999474374564415e-05, "loss": 0.6603, "step": 584 }, { "epoch": 0.163727959697733, "grad_norm": 0.3236462403946272, "learning_rate": 9.99945176999047e-05, "loss": 0.6671, "step": 585 }, { "epoch": 0.16400783655191717, "grad_norm": 0.3140274802138082, "learning_rate": 9.999428689565909e-05, "loss": 0.6423, "step": 586 }, { "epoch": 0.16428771340610132, "grad_norm": 0.3500979849784527, "learning_rate": 9.999405133292925e-05, "loss": 0.6618, "step": 587 }, { "epoch": 0.16456759026028547, "grad_norm": 0.31359317793341923, "learning_rate": 9.999381101173764e-05, "loss": 0.6325, "step": 588 }, { "epoch": 0.16484746711446963, "grad_norm": 0.3355690039308301, "learning_rate": 9.999356593210709e-05, "loss": 0.6836, "step": 589 }, { "epoch": 0.1651273439686538, "grad_norm": 0.33351754121946403, "learning_rate": 9.999331609406098e-05, "loss": 0.6632, "step": 590 }, { "epoch": 0.16540722082283796, "grad_norm": 0.32338398053521783, "learning_rate": 9.999306149762304e-05, "loss": 0.6462, "step": 591 }, { "epoch": 0.1656870976770221, "grad_norm": 0.33566937706531347, "learning_rate": 9.999280214281754e-05, "loss": 0.6659, "step": 592 }, { "epoch": 0.16596697453120626, "grad_norm": 0.3183972843362386, "learning_rate": 9.999253802966914e-05, "loss": 0.649, "step": 593 }, { "epoch": 0.16624685138539042, "grad_norm": 0.3402072648711224, "learning_rate": 9.999226915820298e-05, "loss": 0.6821, "step": 594 }, { "epoch": 0.1665267282395746, "grad_norm": 0.31877277389056163, "learning_rate": 9.999199552844469e-05, "loss": 0.6205, "step": 595 }, { "epoch": 0.16680660509375875, "grad_norm": 0.33758778123086086, "learning_rate": 9.999171714042026e-05, "loss": 0.6621, "step": 596 }, { "epoch": 0.1670864819479429, "grad_norm": 0.3223090328122372, "learning_rate": 9.999143399415622e-05, "loss": 0.6125, "step": 597 }, { "epoch": 0.16736635880212705, "grad_norm": 0.31000927984882737, "learning_rate": 9.99911460896795e-05, "loss": 0.6385, "step": 598 }, { "epoch": 0.16764623565631123, "grad_norm": 0.33246563079295793, "learning_rate": 9.999085342701753e-05, "loss": 0.6626, "step": 599 }, { "epoch": 0.16792611251049538, "grad_norm": 0.312302187510902, "learning_rate": 9.999055600619814e-05, "loss": 0.6684, "step": 600 }, { "epoch": 0.16820598936467954, "grad_norm": 0.34065399079592823, "learning_rate": 9.999025382724965e-05, "loss": 0.6076, "step": 601 }, { "epoch": 0.1684858662188637, "grad_norm": 0.32782160106703323, "learning_rate": 9.998994689020082e-05, "loss": 0.6354, "step": 602 }, { "epoch": 0.16876574307304787, "grad_norm": 0.30407057283641703, "learning_rate": 9.998963519508087e-05, "loss": 0.6353, "step": 603 }, { "epoch": 0.16904561992723202, "grad_norm": 0.3318246992586628, "learning_rate": 9.998931874191945e-05, "loss": 0.6626, "step": 604 }, { "epoch": 0.16932549678141617, "grad_norm": 0.3255311682463083, "learning_rate": 9.998899753074669e-05, "loss": 0.634, "step": 605 }, { "epoch": 0.16960537363560033, "grad_norm": 0.3159736551954797, "learning_rate": 9.998867156159318e-05, "loss": 0.6412, "step": 606 }, { "epoch": 0.1698852504897845, "grad_norm": 0.3151525995521645, "learning_rate": 9.998834083448991e-05, "loss": 0.6233, "step": 607 }, { "epoch": 0.17016512734396866, "grad_norm": 0.31240621793504697, "learning_rate": 9.998800534946839e-05, "loss": 0.6199, "step": 608 }, { "epoch": 0.1704450041981528, "grad_norm": 0.32750340408237494, "learning_rate": 9.998766510656056e-05, "loss": 0.6482, "step": 609 }, { "epoch": 0.17072488105233696, "grad_norm": 0.3243616902352178, "learning_rate": 9.998732010579876e-05, "loss": 0.6088, "step": 610 }, { "epoch": 0.17100475790652114, "grad_norm": 0.32607558121529656, "learning_rate": 9.998697034721587e-05, "loss": 0.6368, "step": 611 }, { "epoch": 0.1712846347607053, "grad_norm": 0.31635826408054585, "learning_rate": 9.998661583084516e-05, "loss": 0.6464, "step": 612 }, { "epoch": 0.17156451161488945, "grad_norm": 0.32383391526072597, "learning_rate": 9.998625655672037e-05, "loss": 0.6498, "step": 613 }, { "epoch": 0.1718443884690736, "grad_norm": 0.32236318872224334, "learning_rate": 9.998589252487571e-05, "loss": 0.6295, "step": 614 }, { "epoch": 0.17212426532325778, "grad_norm": 0.32684039910383994, "learning_rate": 9.998552373534582e-05, "loss": 0.6603, "step": 615 }, { "epoch": 0.17240414217744193, "grad_norm": 0.3140580905751736, "learning_rate": 9.998515018816579e-05, "loss": 0.6284, "step": 616 }, { "epoch": 0.17268401903162608, "grad_norm": 0.3341019894639354, "learning_rate": 9.99847718833712e-05, "loss": 0.657, "step": 617 }, { "epoch": 0.17296389588581024, "grad_norm": 0.32375888251616164, "learning_rate": 9.998438882099805e-05, "loss": 0.6398, "step": 618 }, { "epoch": 0.1732437727399944, "grad_norm": 0.3200189127041982, "learning_rate": 9.998400100108279e-05, "loss": 0.6495, "step": 619 }, { "epoch": 0.17352364959417857, "grad_norm": 0.33045751781447524, "learning_rate": 9.998360842366232e-05, "loss": 0.6394, "step": 620 }, { "epoch": 0.17380352644836272, "grad_norm": 0.3109262315892561, "learning_rate": 9.998321108877405e-05, "loss": 0.6312, "step": 621 }, { "epoch": 0.17408340330254687, "grad_norm": 0.31570657112812556, "learning_rate": 9.998280899645574e-05, "loss": 0.6467, "step": 622 }, { "epoch": 0.17436328015673103, "grad_norm": 0.3071127908154649, "learning_rate": 9.998240214674572e-05, "loss": 0.6412, "step": 623 }, { "epoch": 0.1746431570109152, "grad_norm": 0.3142523728280045, "learning_rate": 9.998199053968267e-05, "loss": 0.6312, "step": 624 }, { "epoch": 0.17492303386509936, "grad_norm": 0.31095019347566755, "learning_rate": 9.99815741753058e-05, "loss": 0.6012, "step": 625 }, { "epoch": 0.1752029107192835, "grad_norm": 0.3186015632457505, "learning_rate": 9.998115305365471e-05, "loss": 0.6178, "step": 626 }, { "epoch": 0.17548278757346766, "grad_norm": 0.3391631997299979, "learning_rate": 9.998072717476951e-05, "loss": 0.6538, "step": 627 }, { "epoch": 0.17576266442765184, "grad_norm": 0.3342822626637587, "learning_rate": 9.998029653869071e-05, "loss": 0.6293, "step": 628 }, { "epoch": 0.176042541281836, "grad_norm": 0.3015306664313235, "learning_rate": 9.997986114545932e-05, "loss": 0.6458, "step": 629 }, { "epoch": 0.17632241813602015, "grad_norm": 0.31440909073843937, "learning_rate": 9.997942099511676e-05, "loss": 0.6206, "step": 630 }, { "epoch": 0.1766022949902043, "grad_norm": 0.32982370779234377, "learning_rate": 9.997897608770495e-05, "loss": 0.6538, "step": 631 }, { "epoch": 0.17688217184438848, "grad_norm": 0.3124634522078974, "learning_rate": 9.997852642326622e-05, "loss": 0.6376, "step": 632 }, { "epoch": 0.17716204869857263, "grad_norm": 0.31262914391944435, "learning_rate": 9.997807200184335e-05, "loss": 0.6232, "step": 633 }, { "epoch": 0.17744192555275679, "grad_norm": 0.3175264108164007, "learning_rate": 9.997761282347963e-05, "loss": 0.6646, "step": 634 }, { "epoch": 0.17772180240694094, "grad_norm": 0.30590158489987684, "learning_rate": 9.997714888821874e-05, "loss": 0.6159, "step": 635 }, { "epoch": 0.17800167926112512, "grad_norm": 0.3317089000758993, "learning_rate": 9.997668019610486e-05, "loss": 0.6697, "step": 636 }, { "epoch": 0.17828155611530927, "grad_norm": 0.3101179298204563, "learning_rate": 9.997620674718257e-05, "loss": 0.6339, "step": 637 }, { "epoch": 0.17856143296949342, "grad_norm": 0.303044616721775, "learning_rate": 9.997572854149696e-05, "loss": 0.6395, "step": 638 }, { "epoch": 0.17884130982367757, "grad_norm": 0.3110256568397774, "learning_rate": 9.997524557909352e-05, "loss": 0.5983, "step": 639 }, { "epoch": 0.17912118667786175, "grad_norm": 0.3149989233307906, "learning_rate": 9.997475786001826e-05, "loss": 0.6286, "step": 640 }, { "epoch": 0.1794010635320459, "grad_norm": 0.2845203322534443, "learning_rate": 9.997426538431755e-05, "loss": 0.6055, "step": 641 }, { "epoch": 0.17968094038623006, "grad_norm": 0.3350427459911618, "learning_rate": 9.997376815203829e-05, "loss": 0.6226, "step": 642 }, { "epoch": 0.1799608172404142, "grad_norm": 0.3148746819181946, "learning_rate": 9.997326616322782e-05, "loss": 0.6547, "step": 643 }, { "epoch": 0.18024069409459836, "grad_norm": 0.3028224259698831, "learning_rate": 9.997275941793389e-05, "loss": 0.6193, "step": 644 }, { "epoch": 0.18052057094878254, "grad_norm": 0.29673426752395654, "learning_rate": 9.997224791620476e-05, "loss": 0.62, "step": 645 }, { "epoch": 0.1808004478029667, "grad_norm": 0.3058890686267457, "learning_rate": 9.99717316580891e-05, "loss": 0.658, "step": 646 }, { "epoch": 0.18108032465715085, "grad_norm": 0.40700293516024183, "learning_rate": 9.997121064363606e-05, "loss": 0.6653, "step": 647 }, { "epoch": 0.181360201511335, "grad_norm": 0.323877426832963, "learning_rate": 9.99706848728952e-05, "loss": 0.6556, "step": 648 }, { "epoch": 0.18164007836551918, "grad_norm": 0.29657675077306456, "learning_rate": 9.997015434591659e-05, "loss": 0.6384, "step": 649 }, { "epoch": 0.18191995521970333, "grad_norm": 0.30619817479643974, "learning_rate": 9.996961906275073e-05, "loss": 0.62, "step": 650 }, { "epoch": 0.18219983207388749, "grad_norm": 0.32270553389437934, "learning_rate": 9.996907902344856e-05, "loss": 0.6491, "step": 651 }, { "epoch": 0.18247970892807164, "grad_norm": 0.2969028118634236, "learning_rate": 9.996853422806146e-05, "loss": 0.6528, "step": 652 }, { "epoch": 0.18275958578225582, "grad_norm": 0.29891300543350263, "learning_rate": 9.996798467664132e-05, "loss": 0.6387, "step": 653 }, { "epoch": 0.18303946263643997, "grad_norm": 0.3134949112446973, "learning_rate": 9.996743036924042e-05, "loss": 0.6351, "step": 654 }, { "epoch": 0.18331933949062412, "grad_norm": 0.3264595354016295, "learning_rate": 9.996687130591153e-05, "loss": 0.6308, "step": 655 }, { "epoch": 0.18359921634480827, "grad_norm": 0.2963404807242696, "learning_rate": 9.996630748670787e-05, "loss": 0.6485, "step": 656 }, { "epoch": 0.18387909319899245, "grad_norm": 0.3209518691253152, "learning_rate": 9.99657389116831e-05, "loss": 0.6568, "step": 657 }, { "epoch": 0.1841589700531766, "grad_norm": 0.320733443890241, "learning_rate": 9.996516558089133e-05, "loss": 0.6192, "step": 658 }, { "epoch": 0.18443884690736076, "grad_norm": 0.3004612938137131, "learning_rate": 9.996458749438712e-05, "loss": 0.6372, "step": 659 }, { "epoch": 0.1847187237615449, "grad_norm": 0.3240748172972207, "learning_rate": 9.996400465222551e-05, "loss": 0.6367, "step": 660 }, { "epoch": 0.1849986006157291, "grad_norm": 0.30365473088989753, "learning_rate": 9.996341705446197e-05, "loss": 0.655, "step": 661 }, { "epoch": 0.18527847746991324, "grad_norm": 0.3101674467288344, "learning_rate": 9.996282470115245e-05, "loss": 0.6254, "step": 662 }, { "epoch": 0.1855583543240974, "grad_norm": 0.30271824363428407, "learning_rate": 9.996222759235329e-05, "loss": 0.6263, "step": 663 }, { "epoch": 0.18583823117828155, "grad_norm": 0.3079318380313509, "learning_rate": 9.996162572812135e-05, "loss": 0.6221, "step": 664 }, { "epoch": 0.18611810803246573, "grad_norm": 0.3051785871844845, "learning_rate": 9.99610191085139e-05, "loss": 0.6227, "step": 665 }, { "epoch": 0.18639798488664988, "grad_norm": 0.3247709985735292, "learning_rate": 9.99604077335887e-05, "loss": 0.6214, "step": 666 }, { "epoch": 0.18667786174083403, "grad_norm": 0.3223222446657553, "learning_rate": 9.995979160340392e-05, "loss": 0.6688, "step": 667 }, { "epoch": 0.18695773859501819, "grad_norm": 0.30940214214876993, "learning_rate": 9.995917071801821e-05, "loss": 0.6162, "step": 668 }, { "epoch": 0.18723761544920234, "grad_norm": 0.3153905402430541, "learning_rate": 9.995854507749068e-05, "loss": 0.6316, "step": 669 }, { "epoch": 0.18751749230338652, "grad_norm": 0.31896302875409144, "learning_rate": 9.995791468188083e-05, "loss": 0.6546, "step": 670 }, { "epoch": 0.18779736915757067, "grad_norm": 0.3020089911619597, "learning_rate": 9.995727953124874e-05, "loss": 0.6266, "step": 671 }, { "epoch": 0.18807724601175482, "grad_norm": 0.2863324313594203, "learning_rate": 9.99566396256548e-05, "loss": 0.6282, "step": 672 }, { "epoch": 0.18835712286593898, "grad_norm": 0.3251047115450784, "learning_rate": 9.995599496515995e-05, "loss": 0.6637, "step": 673 }, { "epoch": 0.18863699972012316, "grad_norm": 0.31740822333464325, "learning_rate": 9.995534554982553e-05, "loss": 0.5948, "step": 674 }, { "epoch": 0.1889168765743073, "grad_norm": 0.2995959785820535, "learning_rate": 9.995469137971337e-05, "loss": 0.5992, "step": 675 }, { "epoch": 0.18919675342849146, "grad_norm": 0.31641331477201434, "learning_rate": 9.99540324548857e-05, "loss": 0.6299, "step": 676 }, { "epoch": 0.1894766302826756, "grad_norm": 0.31194497903046237, "learning_rate": 9.995336877540527e-05, "loss": 0.6163, "step": 677 }, { "epoch": 0.1897565071368598, "grad_norm": 0.2901853783690715, "learning_rate": 9.995270034133525e-05, "loss": 0.5985, "step": 678 }, { "epoch": 0.19003638399104394, "grad_norm": 0.312865923561726, "learning_rate": 9.995202715273925e-05, "loss": 0.6312, "step": 679 }, { "epoch": 0.1903162608452281, "grad_norm": 0.3149652925861172, "learning_rate": 9.995134920968135e-05, "loss": 0.6212, "step": 680 }, { "epoch": 0.19059613769941225, "grad_norm": 0.29455550139493053, "learning_rate": 9.995066651222606e-05, "loss": 0.6105, "step": 681 }, { "epoch": 0.19087601455359643, "grad_norm": 0.31123731050816483, "learning_rate": 9.994997906043837e-05, "loss": 0.6152, "step": 682 }, { "epoch": 0.19115589140778058, "grad_norm": 0.3322634981253788, "learning_rate": 9.994928685438373e-05, "loss": 0.6159, "step": 683 }, { "epoch": 0.19143576826196473, "grad_norm": 0.3275257284931996, "learning_rate": 9.994858989412801e-05, "loss": 0.6117, "step": 684 }, { "epoch": 0.1917156451161489, "grad_norm": 0.3040485430620487, "learning_rate": 9.994788817973753e-05, "loss": 0.6015, "step": 685 }, { "epoch": 0.19199552197033307, "grad_norm": 0.3095529932531528, "learning_rate": 9.99471817112791e-05, "loss": 0.6451, "step": 686 }, { "epoch": 0.19227539882451722, "grad_norm": 0.29265260205457316, "learning_rate": 9.994647048881998e-05, "loss": 0.6398, "step": 687 }, { "epoch": 0.19255527567870137, "grad_norm": 0.290284836365612, "learning_rate": 9.994575451242782e-05, "loss": 0.6342, "step": 688 }, { "epoch": 0.19283515253288552, "grad_norm": 0.2977989328108598, "learning_rate": 9.994503378217079e-05, "loss": 0.6454, "step": 689 }, { "epoch": 0.19311502938706968, "grad_norm": 0.31642739862391606, "learning_rate": 9.99443082981175e-05, "loss": 0.6274, "step": 690 }, { "epoch": 0.19339490624125386, "grad_norm": 0.2866466996125448, "learning_rate": 9.994357806033699e-05, "loss": 0.6053, "step": 691 }, { "epoch": 0.193674783095438, "grad_norm": 0.30120527019938376, "learning_rate": 9.994284306889876e-05, "loss": 0.6059, "step": 692 }, { "epoch": 0.19395465994962216, "grad_norm": 0.2988298937768542, "learning_rate": 9.994210332387277e-05, "loss": 0.6346, "step": 693 }, { "epoch": 0.1942345368038063, "grad_norm": 0.32095988302603706, "learning_rate": 9.994135882532943e-05, "loss": 0.6202, "step": 694 }, { "epoch": 0.1945144136579905, "grad_norm": 0.2980296981986961, "learning_rate": 9.994060957333962e-05, "loss": 0.6183, "step": 695 }, { "epoch": 0.19479429051217464, "grad_norm": 0.3184631025074255, "learning_rate": 9.993985556797463e-05, "loss": 0.6459, "step": 696 }, { "epoch": 0.1950741673663588, "grad_norm": 0.28480537458562444, "learning_rate": 9.993909680930622e-05, "loss": 0.6469, "step": 697 }, { "epoch": 0.19535404422054295, "grad_norm": 0.29814808673431314, "learning_rate": 9.993833329740663e-05, "loss": 0.6171, "step": 698 }, { "epoch": 0.19563392107472713, "grad_norm": 0.3234399945777799, "learning_rate": 9.993756503234855e-05, "loss": 0.6165, "step": 699 }, { "epoch": 0.19591379792891128, "grad_norm": 0.30129220271900264, "learning_rate": 9.993679201420505e-05, "loss": 0.6121, "step": 700 }, { "epoch": 0.19619367478309543, "grad_norm": 0.3064131778971802, "learning_rate": 9.993601424304975e-05, "loss": 0.6359, "step": 701 }, { "epoch": 0.1964735516372796, "grad_norm": 0.30456250610209257, "learning_rate": 9.993523171895664e-05, "loss": 0.6157, "step": 702 }, { "epoch": 0.19675342849146377, "grad_norm": 0.3058162494981979, "learning_rate": 9.993444444200026e-05, "loss": 0.6158, "step": 703 }, { "epoch": 0.19703330534564792, "grad_norm": 0.31695347305058746, "learning_rate": 9.993365241225548e-05, "loss": 0.631, "step": 704 }, { "epoch": 0.19731318219983207, "grad_norm": 0.2804550533874116, "learning_rate": 9.993285562979773e-05, "loss": 0.6495, "step": 705 }, { "epoch": 0.19759305905401622, "grad_norm": 0.29588930540308134, "learning_rate": 9.993205409470283e-05, "loss": 0.6583, "step": 706 }, { "epoch": 0.1978729359082004, "grad_norm": 0.2938872096547923, "learning_rate": 9.993124780704707e-05, "loss": 0.6035, "step": 707 }, { "epoch": 0.19815281276238456, "grad_norm": 0.3026828687774274, "learning_rate": 9.993043676690721e-05, "loss": 0.6521, "step": 708 }, { "epoch": 0.1984326896165687, "grad_norm": 0.29562997826441406, "learning_rate": 9.99296209743604e-05, "loss": 0.6247, "step": 709 }, { "epoch": 0.19871256647075286, "grad_norm": 0.2915499622213924, "learning_rate": 9.992880042948435e-05, "loss": 0.6107, "step": 710 }, { "epoch": 0.19899244332493704, "grad_norm": 0.3001574440988939, "learning_rate": 9.992797513235713e-05, "loss": 0.6422, "step": 711 }, { "epoch": 0.1992723201791212, "grad_norm": 0.30837916317033914, "learning_rate": 9.992714508305727e-05, "loss": 0.6172, "step": 712 }, { "epoch": 0.19955219703330535, "grad_norm": 0.29558819715261386, "learning_rate": 9.99263102816638e-05, "loss": 0.6081, "step": 713 }, { "epoch": 0.1998320738874895, "grad_norm": 0.3047160383997923, "learning_rate": 9.992547072825618e-05, "loss": 0.611, "step": 714 }, { "epoch": 0.20011195074167365, "grad_norm": 0.30541138661385725, "learning_rate": 9.99246264229143e-05, "loss": 0.5902, "step": 715 }, { "epoch": 0.20039182759585783, "grad_norm": 0.28570029418490417, "learning_rate": 9.992377736571853e-05, "loss": 0.6181, "step": 716 }, { "epoch": 0.20067170445004198, "grad_norm": 0.29682223835244825, "learning_rate": 9.992292355674971e-05, "loss": 0.6033, "step": 717 }, { "epoch": 0.20095158130422613, "grad_norm": 0.2979443560789934, "learning_rate": 9.992206499608907e-05, "loss": 0.6266, "step": 718 }, { "epoch": 0.2012314581584103, "grad_norm": 0.30588550823118615, "learning_rate": 9.992120168381834e-05, "loss": 0.6287, "step": 719 }, { "epoch": 0.20151133501259447, "grad_norm": 0.4100123853154365, "learning_rate": 9.992033362001969e-05, "loss": 0.6247, "step": 720 }, { "epoch": 0.20179121186677862, "grad_norm": 0.29072001475354536, "learning_rate": 9.991946080477574e-05, "loss": 0.6464, "step": 721 }, { "epoch": 0.20207108872096277, "grad_norm": 0.30962293495077287, "learning_rate": 9.991858323816958e-05, "loss": 0.6335, "step": 722 }, { "epoch": 0.20235096557514692, "grad_norm": 0.3053900143595763, "learning_rate": 9.991770092028473e-05, "loss": 0.6345, "step": 723 }, { "epoch": 0.2026308424293311, "grad_norm": 0.31353933176985543, "learning_rate": 9.991681385120515e-05, "loss": 0.6249, "step": 724 }, { "epoch": 0.20291071928351526, "grad_norm": 0.3068086339106254, "learning_rate": 9.99159220310153e-05, "loss": 0.6147, "step": 725 }, { "epoch": 0.2031905961376994, "grad_norm": 0.3074442656071545, "learning_rate": 9.991502545980005e-05, "loss": 0.6119, "step": 726 }, { "epoch": 0.20347047299188356, "grad_norm": 0.28818327843923686, "learning_rate": 9.991412413764474e-05, "loss": 0.6103, "step": 727 }, { "epoch": 0.20375034984606774, "grad_norm": 0.2978307288549914, "learning_rate": 9.991321806463516e-05, "loss": 0.6073, "step": 728 }, { "epoch": 0.2040302267002519, "grad_norm": 0.3018786599383204, "learning_rate": 9.991230724085754e-05, "loss": 0.6129, "step": 729 }, { "epoch": 0.20431010355443605, "grad_norm": 0.29415963739630785, "learning_rate": 9.991139166639859e-05, "loss": 0.6364, "step": 730 }, { "epoch": 0.2045899804086202, "grad_norm": 0.2946680094302844, "learning_rate": 9.991047134134541e-05, "loss": 0.61, "step": 731 }, { "epoch": 0.20486985726280438, "grad_norm": 0.3015152834518143, "learning_rate": 9.990954626578567e-05, "loss": 0.6138, "step": 732 }, { "epoch": 0.20514973411698853, "grad_norm": 0.30186507932408085, "learning_rate": 9.990861643980735e-05, "loss": 0.6355, "step": 733 }, { "epoch": 0.20542961097117268, "grad_norm": 0.31699518043113645, "learning_rate": 9.990768186349902e-05, "loss": 0.6152, "step": 734 }, { "epoch": 0.20570948782535683, "grad_norm": 0.3063390693369908, "learning_rate": 9.990674253694957e-05, "loss": 0.648, "step": 735 }, { "epoch": 0.20598936467954101, "grad_norm": 0.29032196511694675, "learning_rate": 9.990579846024844e-05, "loss": 0.607, "step": 736 }, { "epoch": 0.20626924153372517, "grad_norm": 0.30391551611435835, "learning_rate": 9.990484963348547e-05, "loss": 0.6027, "step": 737 }, { "epoch": 0.20654911838790932, "grad_norm": 0.3021305427216307, "learning_rate": 9.990389605675098e-05, "loss": 0.6201, "step": 738 }, { "epoch": 0.20682899524209347, "grad_norm": 0.3072286928000085, "learning_rate": 9.990293773013575e-05, "loss": 0.6285, "step": 739 }, { "epoch": 0.20710887209627762, "grad_norm": 0.29643099870704986, "learning_rate": 9.990197465373095e-05, "loss": 0.631, "step": 740 }, { "epoch": 0.2073887489504618, "grad_norm": 0.2892199981073291, "learning_rate": 9.990100682762828e-05, "loss": 0.6204, "step": 741 }, { "epoch": 0.20766862580464596, "grad_norm": 0.29927602855446483, "learning_rate": 9.990003425191985e-05, "loss": 0.6067, "step": 742 }, { "epoch": 0.2079485026588301, "grad_norm": 0.3013840368630654, "learning_rate": 9.989905692669823e-05, "loss": 0.6478, "step": 743 }, { "epoch": 0.20822837951301426, "grad_norm": 0.278203984020985, "learning_rate": 9.989807485205645e-05, "loss": 0.6369, "step": 744 }, { "epoch": 0.20850825636719844, "grad_norm": 0.30116207295894537, "learning_rate": 9.989708802808797e-05, "loss": 0.6307, "step": 745 }, { "epoch": 0.2087881332213826, "grad_norm": 0.3139662061574362, "learning_rate": 9.989609645488671e-05, "loss": 0.6237, "step": 746 }, { "epoch": 0.20906801007556675, "grad_norm": 0.29459551907956566, "learning_rate": 9.989510013254708e-05, "loss": 0.6299, "step": 747 }, { "epoch": 0.2093478869297509, "grad_norm": 0.2877917475806658, "learning_rate": 9.98940990611639e-05, "loss": 0.6289, "step": 748 }, { "epoch": 0.20962776378393508, "grad_norm": 0.3148595914832477, "learning_rate": 9.989309324083241e-05, "loss": 0.6381, "step": 749 }, { "epoch": 0.20990764063811923, "grad_norm": 0.29523633254911474, "learning_rate": 9.98920826716484e-05, "loss": 0.6236, "step": 750 }, { "epoch": 0.21018751749230338, "grad_norm": 0.2982350541251637, "learning_rate": 9.989106735370803e-05, "loss": 0.6046, "step": 751 }, { "epoch": 0.21046739434648754, "grad_norm": 0.28857266833371326, "learning_rate": 9.989004728710795e-05, "loss": 0.6033, "step": 752 }, { "epoch": 0.21074727120067172, "grad_norm": 0.2994922713392514, "learning_rate": 9.988902247194525e-05, "loss": 0.6359, "step": 753 }, { "epoch": 0.21102714805485587, "grad_norm": 0.29393640928691533, "learning_rate": 9.988799290831746e-05, "loss": 0.617, "step": 754 }, { "epoch": 0.21130702490904002, "grad_norm": 0.2786124339094459, "learning_rate": 9.988695859632257e-05, "loss": 0.6103, "step": 755 }, { "epoch": 0.21158690176322417, "grad_norm": 0.26576693865200707, "learning_rate": 9.988591953605906e-05, "loss": 0.6107, "step": 756 }, { "epoch": 0.21186677861740835, "grad_norm": 0.2846284299244608, "learning_rate": 9.98848757276258e-05, "loss": 0.6568, "step": 757 }, { "epoch": 0.2121466554715925, "grad_norm": 0.30853876057537716, "learning_rate": 9.988382717112213e-05, "loss": 0.5932, "step": 758 }, { "epoch": 0.21242653232577666, "grad_norm": 0.2849933204804723, "learning_rate": 9.988277386664788e-05, "loss": 0.6012, "step": 759 }, { "epoch": 0.2127064091799608, "grad_norm": 0.2845166335515254, "learning_rate": 9.98817158143033e-05, "loss": 0.6323, "step": 760 }, { "epoch": 0.212986286034145, "grad_norm": 0.28949031191183927, "learning_rate": 9.988065301418907e-05, "loss": 0.6286, "step": 761 }, { "epoch": 0.21326616288832914, "grad_norm": 0.2996863267327104, "learning_rate": 9.987958546640639e-05, "loss": 0.6261, "step": 762 }, { "epoch": 0.2135460397425133, "grad_norm": 0.2895413025607892, "learning_rate": 9.987851317105683e-05, "loss": 0.6012, "step": 763 }, { "epoch": 0.21382591659669745, "grad_norm": 0.2787834235743242, "learning_rate": 9.987743612824248e-05, "loss": 0.6412, "step": 764 }, { "epoch": 0.2141057934508816, "grad_norm": 0.285344899631201, "learning_rate": 9.987635433806582e-05, "loss": 0.6048, "step": 765 }, { "epoch": 0.21438567030506578, "grad_norm": 0.29709825924990785, "learning_rate": 9.987526780062986e-05, "loss": 0.6306, "step": 766 }, { "epoch": 0.21466554715924993, "grad_norm": 0.2955171277517626, "learning_rate": 9.987417651603799e-05, "loss": 0.6299, "step": 767 }, { "epoch": 0.21494542401343408, "grad_norm": 0.2801877576532585, "learning_rate": 9.987308048439408e-05, "loss": 0.598, "step": 768 }, { "epoch": 0.21522530086761824, "grad_norm": 0.27618254097083866, "learning_rate": 9.987197970580244e-05, "loss": 0.6148, "step": 769 }, { "epoch": 0.21550517772180242, "grad_norm": 0.2977612642300702, "learning_rate": 9.987087418036788e-05, "loss": 0.62, "step": 770 }, { "epoch": 0.21578505457598657, "grad_norm": 0.28309302062153524, "learning_rate": 9.986976390819559e-05, "loss": 0.6046, "step": 771 }, { "epoch": 0.21606493143017072, "grad_norm": 0.2892016544151395, "learning_rate": 9.986864888939126e-05, "loss": 0.624, "step": 772 }, { "epoch": 0.21634480828435487, "grad_norm": 0.29115742297952063, "learning_rate": 9.986752912406101e-05, "loss": 0.6199, "step": 773 }, { "epoch": 0.21662468513853905, "grad_norm": 0.3076197675401591, "learning_rate": 9.986640461231144e-05, "loss": 0.6066, "step": 774 }, { "epoch": 0.2169045619927232, "grad_norm": 0.2987549026154578, "learning_rate": 9.986527535424957e-05, "loss": 0.6214, "step": 775 }, { "epoch": 0.21718443884690736, "grad_norm": 0.30218128492263907, "learning_rate": 9.986414134998286e-05, "loss": 0.6294, "step": 776 }, { "epoch": 0.2174643157010915, "grad_norm": 0.2747490065906423, "learning_rate": 9.986300259961927e-05, "loss": 0.6338, "step": 777 }, { "epoch": 0.2177441925552757, "grad_norm": 0.2819702498657955, "learning_rate": 9.98618591032672e-05, "loss": 0.5994, "step": 778 }, { "epoch": 0.21802406940945984, "grad_norm": 0.28472568974607454, "learning_rate": 9.986071086103545e-05, "loss": 0.6065, "step": 779 }, { "epoch": 0.218303946263644, "grad_norm": 0.25512175714572793, "learning_rate": 9.985955787303335e-05, "loss": 0.6252, "step": 780 }, { "epoch": 0.21858382311782815, "grad_norm": 0.2833178477240765, "learning_rate": 9.985840013937062e-05, "loss": 0.609, "step": 781 }, { "epoch": 0.21886369997201233, "grad_norm": 0.29001540439460133, "learning_rate": 9.985723766015744e-05, "loss": 0.6513, "step": 782 }, { "epoch": 0.21914357682619648, "grad_norm": 0.28256802894257993, "learning_rate": 9.98560704355045e-05, "loss": 0.6078, "step": 783 }, { "epoch": 0.21942345368038063, "grad_norm": 0.31307453923452017, "learning_rate": 9.985489846552286e-05, "loss": 0.633, "step": 784 }, { "epoch": 0.21970333053456478, "grad_norm": 0.28850734424795094, "learning_rate": 9.985372175032408e-05, "loss": 0.6214, "step": 785 }, { "epoch": 0.21998320738874896, "grad_norm": 0.2791079028943534, "learning_rate": 9.985254029002015e-05, "loss": 0.6264, "step": 786 }, { "epoch": 0.22026308424293312, "grad_norm": 0.28860212120936385, "learning_rate": 9.985135408472355e-05, "loss": 0.6122, "step": 787 }, { "epoch": 0.22054296109711727, "grad_norm": 0.27794402322042944, "learning_rate": 9.985016313454715e-05, "loss": 0.621, "step": 788 }, { "epoch": 0.22082283795130142, "grad_norm": 0.26834532886232865, "learning_rate": 9.984896743960432e-05, "loss": 0.6267, "step": 789 }, { "epoch": 0.22110271480548557, "grad_norm": 0.29474895680880275, "learning_rate": 9.984776700000886e-05, "loss": 0.6023, "step": 790 }, { "epoch": 0.22138259165966975, "grad_norm": 0.29769900879551847, "learning_rate": 9.984656181587506e-05, "loss": 0.6205, "step": 791 }, { "epoch": 0.2216624685138539, "grad_norm": 0.274009754957161, "learning_rate": 9.984535188731759e-05, "loss": 0.5994, "step": 792 }, { "epoch": 0.22194234536803806, "grad_norm": 0.2988779618403234, "learning_rate": 9.984413721445162e-05, "loss": 0.6426, "step": 793 }, { "epoch": 0.2222222222222222, "grad_norm": 0.29594649319588723, "learning_rate": 9.984291779739278e-05, "loss": 0.615, "step": 794 }, { "epoch": 0.2225020990764064, "grad_norm": 0.30013434264676025, "learning_rate": 9.984169363625715e-05, "loss": 0.6371, "step": 795 }, { "epoch": 0.22278197593059054, "grad_norm": 0.2879459082697263, "learning_rate": 9.98404647311612e-05, "loss": 0.6141, "step": 796 }, { "epoch": 0.2230618527847747, "grad_norm": 0.2810009862527073, "learning_rate": 9.983923108222191e-05, "loss": 0.6065, "step": 797 }, { "epoch": 0.22334172963895885, "grad_norm": 0.29299520183344274, "learning_rate": 9.983799268955672e-05, "loss": 0.6099, "step": 798 }, { "epoch": 0.22362160649314303, "grad_norm": 0.2816039311381955, "learning_rate": 9.983674955328349e-05, "loss": 0.5978, "step": 799 }, { "epoch": 0.22390148334732718, "grad_norm": 0.2809159135916206, "learning_rate": 9.983550167352054e-05, "loss": 0.6027, "step": 800 }, { "epoch": 0.22418136020151133, "grad_norm": 0.288265729397608, "learning_rate": 9.983424905038666e-05, "loss": 0.6064, "step": 801 }, { "epoch": 0.22446123705569548, "grad_norm": 0.2741336820446325, "learning_rate": 9.983299168400105e-05, "loss": 0.6153, "step": 802 }, { "epoch": 0.22474111390987966, "grad_norm": 0.29156434791130137, "learning_rate": 9.983172957448339e-05, "loss": 0.6191, "step": 803 }, { "epoch": 0.22502099076406382, "grad_norm": 0.27991759137428984, "learning_rate": 9.983046272195384e-05, "loss": 0.6391, "step": 804 }, { "epoch": 0.22530086761824797, "grad_norm": 0.29192928869818263, "learning_rate": 9.982919112653293e-05, "loss": 0.6171, "step": 805 }, { "epoch": 0.22558074447243212, "grad_norm": 0.2893461284592979, "learning_rate": 9.982791478834173e-05, "loss": 0.6075, "step": 806 }, { "epoch": 0.2258606213266163, "grad_norm": 0.27061523394207493, "learning_rate": 9.982663370750172e-05, "loss": 0.6058, "step": 807 }, { "epoch": 0.22614049818080045, "grad_norm": 0.29574318337062955, "learning_rate": 9.982534788413482e-05, "loss": 0.6146, "step": 808 }, { "epoch": 0.2264203750349846, "grad_norm": 0.26800247836730356, "learning_rate": 9.982405731836342e-05, "loss": 0.6038, "step": 809 }, { "epoch": 0.22670025188916876, "grad_norm": 0.2803212952525355, "learning_rate": 9.982276201031034e-05, "loss": 0.5969, "step": 810 }, { "epoch": 0.22698012874335294, "grad_norm": 0.2966399380284488, "learning_rate": 9.98214619600989e-05, "loss": 0.5894, "step": 811 }, { "epoch": 0.2272600055975371, "grad_norm": 0.2686952657009844, "learning_rate": 9.982015716785282e-05, "loss": 0.6235, "step": 812 }, { "epoch": 0.22753988245172124, "grad_norm": 0.2939487483296079, "learning_rate": 9.98188476336963e-05, "loss": 0.6202, "step": 813 }, { "epoch": 0.2278197593059054, "grad_norm": 0.2722218703101461, "learning_rate": 9.981753335775398e-05, "loss": 0.6021, "step": 814 }, { "epoch": 0.22809963616008955, "grad_norm": 0.2839592936475026, "learning_rate": 9.981621434015094e-05, "loss": 0.6084, "step": 815 }, { "epoch": 0.22837951301427373, "grad_norm": 0.28133850499751295, "learning_rate": 9.981489058101275e-05, "loss": 0.6152, "step": 816 }, { "epoch": 0.22865938986845788, "grad_norm": 0.2867889951147302, "learning_rate": 9.981356208046537e-05, "loss": 0.6417, "step": 817 }, { "epoch": 0.22893926672264203, "grad_norm": 0.274845590590577, "learning_rate": 9.98122288386353e-05, "loss": 0.6004, "step": 818 }, { "epoch": 0.22921914357682618, "grad_norm": 0.29619409262200774, "learning_rate": 9.981089085564939e-05, "loss": 0.602, "step": 819 }, { "epoch": 0.22949902043101036, "grad_norm": 0.28827763748484586, "learning_rate": 9.9809548131635e-05, "loss": 0.6079, "step": 820 }, { "epoch": 0.22977889728519452, "grad_norm": 0.29019480587941326, "learning_rate": 9.980820066671994e-05, "loss": 0.6014, "step": 821 }, { "epoch": 0.23005877413937867, "grad_norm": 0.27407811837477164, "learning_rate": 9.980684846103247e-05, "loss": 0.5921, "step": 822 }, { "epoch": 0.23033865099356282, "grad_norm": 0.28569684349982677, "learning_rate": 9.980549151470129e-05, "loss": 0.6188, "step": 823 }, { "epoch": 0.230618527847747, "grad_norm": 0.29290777334920826, "learning_rate": 9.980412982785555e-05, "loss": 0.6273, "step": 824 }, { "epoch": 0.23089840470193115, "grad_norm": 0.28439951018793513, "learning_rate": 9.980276340062484e-05, "loss": 0.6133, "step": 825 }, { "epoch": 0.2311782815561153, "grad_norm": 0.2881461107716947, "learning_rate": 9.980139223313925e-05, "loss": 0.6233, "step": 826 }, { "epoch": 0.23145815841029946, "grad_norm": 0.29024146744075585, "learning_rate": 9.980001632552927e-05, "loss": 0.5906, "step": 827 }, { "epoch": 0.23173803526448364, "grad_norm": 0.2789044611831552, "learning_rate": 9.979863567792586e-05, "loss": 0.6049, "step": 828 }, { "epoch": 0.2320179121186678, "grad_norm": 0.2882599052261539, "learning_rate": 9.979725029046043e-05, "loss": 0.6007, "step": 829 }, { "epoch": 0.23229778897285194, "grad_norm": 0.27197359513956976, "learning_rate": 9.979586016326485e-05, "loss": 0.5905, "step": 830 }, { "epoch": 0.2325776658270361, "grad_norm": 0.26372064319553057, "learning_rate": 9.979446529647142e-05, "loss": 0.6102, "step": 831 }, { "epoch": 0.23285754268122028, "grad_norm": 0.2848276573029936, "learning_rate": 9.979306569021291e-05, "loss": 0.6252, "step": 832 }, { "epoch": 0.23313741953540443, "grad_norm": 0.2974637344184027, "learning_rate": 9.979166134462256e-05, "loss": 0.6212, "step": 833 }, { "epoch": 0.23341729638958858, "grad_norm": 0.2768186580125523, "learning_rate": 9.9790252259834e-05, "loss": 0.6105, "step": 834 }, { "epoch": 0.23369717324377273, "grad_norm": 0.2862169736585286, "learning_rate": 9.978883843598134e-05, "loss": 0.6201, "step": 835 }, { "epoch": 0.2339770500979569, "grad_norm": 0.2790379481170973, "learning_rate": 9.97874198731992e-05, "loss": 0.6285, "step": 836 }, { "epoch": 0.23425692695214106, "grad_norm": 0.27596925432949154, "learning_rate": 9.978599657162255e-05, "loss": 0.6091, "step": 837 }, { "epoch": 0.23453680380632522, "grad_norm": 0.27854083765069304, "learning_rate": 9.978456853138689e-05, "loss": 0.5935, "step": 838 }, { "epoch": 0.23481668066050937, "grad_norm": 0.2788150865363137, "learning_rate": 9.978313575262814e-05, "loss": 0.6339, "step": 839 }, { "epoch": 0.23509655751469352, "grad_norm": 0.2986905483488892, "learning_rate": 9.978169823548264e-05, "loss": 0.6049, "step": 840 }, { "epoch": 0.2353764343688777, "grad_norm": 0.26777121733565923, "learning_rate": 9.978025598008725e-05, "loss": 0.5833, "step": 841 }, { "epoch": 0.23565631122306185, "grad_norm": 0.26613159796298175, "learning_rate": 9.977880898657923e-05, "loss": 0.5819, "step": 842 }, { "epoch": 0.235936188077246, "grad_norm": 0.27348517758743046, "learning_rate": 9.977735725509632e-05, "loss": 0.6056, "step": 843 }, { "epoch": 0.23621606493143016, "grad_norm": 0.2675358231011222, "learning_rate": 9.977590078577667e-05, "loss": 0.6196, "step": 844 }, { "epoch": 0.23649594178561434, "grad_norm": 0.3033349991064293, "learning_rate": 9.977443957875894e-05, "loss": 0.627, "step": 845 }, { "epoch": 0.2367758186397985, "grad_norm": 0.28645441659428106, "learning_rate": 9.977297363418217e-05, "loss": 0.6184, "step": 846 }, { "epoch": 0.23705569549398264, "grad_norm": 0.27961588977671503, "learning_rate": 9.977150295218591e-05, "loss": 0.6278, "step": 847 }, { "epoch": 0.2373355723481668, "grad_norm": 0.27714345397000484, "learning_rate": 9.977002753291016e-05, "loss": 0.6039, "step": 848 }, { "epoch": 0.23761544920235098, "grad_norm": 0.28921622716812045, "learning_rate": 9.976854737649532e-05, "loss": 0.6138, "step": 849 }, { "epoch": 0.23789532605653513, "grad_norm": 0.28740298178395807, "learning_rate": 9.97670624830823e-05, "loss": 0.6326, "step": 850 }, { "epoch": 0.23817520291071928, "grad_norm": 0.3028247639351463, "learning_rate": 9.976557285281239e-05, "loss": 0.6345, "step": 851 }, { "epoch": 0.23845507976490343, "grad_norm": 0.29368597273038405, "learning_rate": 9.976407848582742e-05, "loss": 0.6202, "step": 852 }, { "epoch": 0.2387349566190876, "grad_norm": 0.2960219096193092, "learning_rate": 9.97625793822696e-05, "loss": 0.5859, "step": 853 }, { "epoch": 0.23901483347327176, "grad_norm": 0.28193812478404506, "learning_rate": 9.976107554228163e-05, "loss": 0.5885, "step": 854 }, { "epoch": 0.23929471032745592, "grad_norm": 0.2603374063558071, "learning_rate": 9.975956696600662e-05, "loss": 0.5936, "step": 855 }, { "epoch": 0.23957458718164007, "grad_norm": 0.2724833017435636, "learning_rate": 9.975805365358818e-05, "loss": 0.6019, "step": 856 }, { "epoch": 0.23985446403582425, "grad_norm": 0.28411268929988087, "learning_rate": 9.975653560517035e-05, "loss": 0.6093, "step": 857 }, { "epoch": 0.2401343408900084, "grad_norm": 0.27073318243611616, "learning_rate": 9.97550128208976e-05, "loss": 0.618, "step": 858 }, { "epoch": 0.24041421774419255, "grad_norm": 0.27563149813291027, "learning_rate": 9.975348530091488e-05, "loss": 0.5827, "step": 859 }, { "epoch": 0.2406940945983767, "grad_norm": 0.2730665314327667, "learning_rate": 9.97519530453676e-05, "loss": 0.5936, "step": 860 }, { "epoch": 0.2409739714525609, "grad_norm": 0.2891190589294283, "learning_rate": 9.975041605440157e-05, "loss": 0.5869, "step": 861 }, { "epoch": 0.24125384830674504, "grad_norm": 0.2784758939476977, "learning_rate": 9.974887432816309e-05, "loss": 0.5875, "step": 862 }, { "epoch": 0.2415337251609292, "grad_norm": 0.2738695742355149, "learning_rate": 9.974732786679892e-05, "loss": 0.569, "step": 863 }, { "epoch": 0.24181360201511334, "grad_norm": 0.3014195796535495, "learning_rate": 9.974577667045622e-05, "loss": 0.617, "step": 864 }, { "epoch": 0.2420934788692975, "grad_norm": 0.28435696969419094, "learning_rate": 9.974422073928266e-05, "loss": 0.6171, "step": 865 }, { "epoch": 0.24237335572348168, "grad_norm": 0.27668635302453437, "learning_rate": 9.974266007342632e-05, "loss": 0.6019, "step": 866 }, { "epoch": 0.24265323257766583, "grad_norm": 0.29211185935471046, "learning_rate": 9.974109467303576e-05, "loss": 0.649, "step": 867 }, { "epoch": 0.24293310943184998, "grad_norm": 0.3056363778877445, "learning_rate": 9.973952453825997e-05, "loss": 0.6185, "step": 868 }, { "epoch": 0.24321298628603413, "grad_norm": 0.27136141444147716, "learning_rate": 9.973794966924839e-05, "loss": 0.6409, "step": 869 }, { "epoch": 0.2434928631402183, "grad_norm": 0.27814182446754976, "learning_rate": 9.973637006615094e-05, "loss": 0.5939, "step": 870 }, { "epoch": 0.24377273999440247, "grad_norm": 0.27448637013656907, "learning_rate": 9.973478572911792e-05, "loss": 0.6398, "step": 871 }, { "epoch": 0.24405261684858662, "grad_norm": 0.27623380550060017, "learning_rate": 9.973319665830016e-05, "loss": 0.6351, "step": 872 }, { "epoch": 0.24433249370277077, "grad_norm": 0.2726881086669371, "learning_rate": 9.973160285384891e-05, "loss": 0.5702, "step": 873 }, { "epoch": 0.24461237055695495, "grad_norm": 0.27001277974661575, "learning_rate": 9.973000431591587e-05, "loss": 0.6309, "step": 874 }, { "epoch": 0.2448922474111391, "grad_norm": 0.285476081481007, "learning_rate": 9.972840104465318e-05, "loss": 0.5909, "step": 875 }, { "epoch": 0.24517212426532325, "grad_norm": 0.29280323111517875, "learning_rate": 9.972679304021344e-05, "loss": 0.6203, "step": 876 }, { "epoch": 0.2454520011195074, "grad_norm": 0.29271860848920134, "learning_rate": 9.972518030274971e-05, "loss": 0.6014, "step": 877 }, { "epoch": 0.2457318779736916, "grad_norm": 0.2901211225693263, "learning_rate": 9.97235628324155e-05, "loss": 0.6282, "step": 878 }, { "epoch": 0.24601175482787574, "grad_norm": 0.28248728430486114, "learning_rate": 9.972194062936473e-05, "loss": 0.6157, "step": 879 }, { "epoch": 0.2462916316820599, "grad_norm": 0.2874781275011946, "learning_rate": 9.972031369375182e-05, "loss": 0.5917, "step": 880 }, { "epoch": 0.24657150853624404, "grad_norm": 0.2911616785918501, "learning_rate": 9.971868202573162e-05, "loss": 0.6075, "step": 881 }, { "epoch": 0.24685138539042822, "grad_norm": 0.279464811855995, "learning_rate": 9.971704562545945e-05, "loss": 0.6248, "step": 882 }, { "epoch": 0.24713126224461238, "grad_norm": 0.29786012151021235, "learning_rate": 9.971540449309104e-05, "loss": 0.6231, "step": 883 }, { "epoch": 0.24741113909879653, "grad_norm": 0.28285506113295183, "learning_rate": 9.971375862878262e-05, "loss": 0.6162, "step": 884 }, { "epoch": 0.24769101595298068, "grad_norm": 0.2850500642631151, "learning_rate": 9.971210803269081e-05, "loss": 0.5769, "step": 885 }, { "epoch": 0.24797089280716486, "grad_norm": 0.27139297175942056, "learning_rate": 9.971045270497273e-05, "loss": 0.5813, "step": 886 }, { "epoch": 0.248250769661349, "grad_norm": 0.26038124190330875, "learning_rate": 9.970879264578595e-05, "loss": 0.6071, "step": 887 }, { "epoch": 0.24853064651553317, "grad_norm": 0.2778961647917277, "learning_rate": 9.970712785528846e-05, "loss": 0.5925, "step": 888 }, { "epoch": 0.24881052336971732, "grad_norm": 0.28236769242362414, "learning_rate": 9.970545833363871e-05, "loss": 0.6289, "step": 889 }, { "epoch": 0.24909040022390147, "grad_norm": 0.2939545197445072, "learning_rate": 9.970378408099562e-05, "loss": 0.6006, "step": 890 }, { "epoch": 0.24937027707808565, "grad_norm": 0.27056136323662533, "learning_rate": 9.970210509751854e-05, "loss": 0.6116, "step": 891 }, { "epoch": 0.2496501539322698, "grad_norm": 0.2647281072320447, "learning_rate": 9.970042138336728e-05, "loss": 0.6102, "step": 892 }, { "epoch": 0.24993003078645395, "grad_norm": 0.2761621321826647, "learning_rate": 9.969873293870209e-05, "loss": 0.5794, "step": 893 }, { "epoch": 0.2502099076406381, "grad_norm": 0.28508553823962185, "learning_rate": 9.969703976368368e-05, "loss": 0.6215, "step": 894 }, { "epoch": 0.2504897844948223, "grad_norm": 0.2924319229186995, "learning_rate": 9.969534185847322e-05, "loss": 0.6241, "step": 895 }, { "epoch": 0.2507696613490064, "grad_norm": 0.26778201393431406, "learning_rate": 9.96936392232323e-05, "loss": 0.624, "step": 896 }, { "epoch": 0.2510495382031906, "grad_norm": 0.29427879854368805, "learning_rate": 9.969193185812298e-05, "loss": 0.6199, "step": 897 }, { "epoch": 0.25132941505737477, "grad_norm": 0.2948563585971674, "learning_rate": 9.969021976330777e-05, "loss": 0.5879, "step": 898 }, { "epoch": 0.2516092919115589, "grad_norm": 0.28148667221036944, "learning_rate": 9.968850293894964e-05, "loss": 0.5579, "step": 899 }, { "epoch": 0.2518891687657431, "grad_norm": 0.25900686462303496, "learning_rate": 9.968678138521198e-05, "loss": 0.6123, "step": 900 }, { "epoch": 0.25216904561992726, "grad_norm": 0.2626827498089003, "learning_rate": 9.968505510225866e-05, "loss": 0.5784, "step": 901 }, { "epoch": 0.2524489224741114, "grad_norm": 0.28274277452405416, "learning_rate": 9.968332409025398e-05, "loss": 0.595, "step": 902 }, { "epoch": 0.25272879932829556, "grad_norm": 0.271630482547548, "learning_rate": 9.968158834936272e-05, "loss": 0.6056, "step": 903 }, { "epoch": 0.2530086761824797, "grad_norm": 0.3051229110087096, "learning_rate": 9.967984787975007e-05, "loss": 0.6362, "step": 904 }, { "epoch": 0.25328855303666387, "grad_norm": 0.28806620249122716, "learning_rate": 9.967810268158167e-05, "loss": 0.5967, "step": 905 }, { "epoch": 0.25356842989084805, "grad_norm": 0.27491245903789907, "learning_rate": 9.967635275502368e-05, "loss": 0.6202, "step": 906 }, { "epoch": 0.25384830674503217, "grad_norm": 0.28456405223038556, "learning_rate": 9.96745981002426e-05, "loss": 0.6083, "step": 907 }, { "epoch": 0.25412818359921635, "grad_norm": 0.26460147110990734, "learning_rate": 9.967283871740548e-05, "loss": 0.5917, "step": 908 }, { "epoch": 0.25440806045340053, "grad_norm": 0.26918516291180145, "learning_rate": 9.967107460667978e-05, "loss": 0.5497, "step": 909 }, { "epoch": 0.25468793730758466, "grad_norm": 0.2840242578443607, "learning_rate": 9.966930576823338e-05, "loss": 0.6493, "step": 910 }, { "epoch": 0.25496781416176884, "grad_norm": 0.283115742278163, "learning_rate": 9.966753220223465e-05, "loss": 0.6049, "step": 911 }, { "epoch": 0.25524769101595296, "grad_norm": 0.2714744996140369, "learning_rate": 9.966575390885244e-05, "loss": 0.5958, "step": 912 }, { "epoch": 0.25552756787013714, "grad_norm": 0.26846164422872715, "learning_rate": 9.966397088825594e-05, "loss": 0.6164, "step": 913 }, { "epoch": 0.2558074447243213, "grad_norm": 0.27508859334968633, "learning_rate": 9.96621831406149e-05, "loss": 0.5992, "step": 914 }, { "epoch": 0.25608732157850544, "grad_norm": 0.27333482422132627, "learning_rate": 9.966039066609949e-05, "loss": 0.601, "step": 915 }, { "epoch": 0.2563671984326896, "grad_norm": 0.29591881112266905, "learning_rate": 9.96585934648803e-05, "loss": 0.6307, "step": 916 }, { "epoch": 0.25664707528687375, "grad_norm": 0.2599796159340984, "learning_rate": 9.965679153712836e-05, "loss": 0.6164, "step": 917 }, { "epoch": 0.25692695214105793, "grad_norm": 0.26053285881351435, "learning_rate": 9.965498488301522e-05, "loss": 0.5906, "step": 918 }, { "epoch": 0.2572068289952421, "grad_norm": 0.2759319240086795, "learning_rate": 9.965317350271284e-05, "loss": 0.5892, "step": 919 }, { "epoch": 0.25748670584942623, "grad_norm": 0.27698261367548277, "learning_rate": 9.965135739639359e-05, "loss": 0.5982, "step": 920 }, { "epoch": 0.2577665827036104, "grad_norm": 0.3357178697038624, "learning_rate": 9.964953656423038e-05, "loss": 0.6202, "step": 921 }, { "epoch": 0.2580464595577946, "grad_norm": 0.28472242803358416, "learning_rate": 9.964771100639646e-05, "loss": 0.5902, "step": 922 }, { "epoch": 0.2583263364119787, "grad_norm": 0.2918879034888202, "learning_rate": 9.964588072306563e-05, "loss": 0.6003, "step": 923 }, { "epoch": 0.2586062132661629, "grad_norm": 0.2595496788501369, "learning_rate": 9.964404571441208e-05, "loss": 0.6095, "step": 924 }, { "epoch": 0.258886090120347, "grad_norm": 0.28799642407440884, "learning_rate": 9.964220598061049e-05, "loss": 0.5924, "step": 925 }, { "epoch": 0.2591659669745312, "grad_norm": 0.28278716744834675, "learning_rate": 9.964036152183593e-05, "loss": 0.589, "step": 926 }, { "epoch": 0.2594458438287154, "grad_norm": 0.2777978462846363, "learning_rate": 9.963851233826397e-05, "loss": 0.5989, "step": 927 }, { "epoch": 0.2597257206828995, "grad_norm": 0.28105828021526974, "learning_rate": 9.963665843007064e-05, "loss": 0.6118, "step": 928 }, { "epoch": 0.2600055975370837, "grad_norm": 0.27873172937419916, "learning_rate": 9.963479979743237e-05, "loss": 0.6148, "step": 929 }, { "epoch": 0.26028547439126787, "grad_norm": 0.28443215714757414, "learning_rate": 9.963293644052609e-05, "loss": 0.6184, "step": 930 }, { "epoch": 0.260565351245452, "grad_norm": 0.2828574419909584, "learning_rate": 9.963106835952912e-05, "loss": 0.6126, "step": 931 }, { "epoch": 0.2608452280996362, "grad_norm": 0.28167140161768456, "learning_rate": 9.96291955546193e-05, "loss": 0.6351, "step": 932 }, { "epoch": 0.2611251049538203, "grad_norm": 0.25233320469051, "learning_rate": 9.962731802597484e-05, "loss": 0.5965, "step": 933 }, { "epoch": 0.2614049818080045, "grad_norm": 0.27303090462669166, "learning_rate": 9.96254357737745e-05, "loss": 0.6062, "step": 934 }, { "epoch": 0.26168485866218866, "grad_norm": 0.25705929063520483, "learning_rate": 9.96235487981974e-05, "loss": 0.5998, "step": 935 }, { "epoch": 0.2619647355163728, "grad_norm": 0.2640967425519281, "learning_rate": 9.962165709942313e-05, "loss": 0.5946, "step": 936 }, { "epoch": 0.26224461237055696, "grad_norm": 0.2735664946761113, "learning_rate": 9.961976067763179e-05, "loss": 0.6025, "step": 937 }, { "epoch": 0.2625244892247411, "grad_norm": 0.27131297193419807, "learning_rate": 9.961785953300385e-05, "loss": 0.6231, "step": 938 }, { "epoch": 0.26280436607892527, "grad_norm": 0.2684145277146189, "learning_rate": 9.961595366572025e-05, "loss": 0.6134, "step": 939 }, { "epoch": 0.26308424293310945, "grad_norm": 0.2514536858988313, "learning_rate": 9.961404307596243e-05, "loss": 0.5714, "step": 940 }, { "epoch": 0.26336411978729357, "grad_norm": 0.26661765756611233, "learning_rate": 9.96121277639122e-05, "loss": 0.629, "step": 941 }, { "epoch": 0.26364399664147775, "grad_norm": 0.28014701288949345, "learning_rate": 9.961020772975189e-05, "loss": 0.6151, "step": 942 }, { "epoch": 0.26392387349566193, "grad_norm": 0.2754571173200175, "learning_rate": 9.960828297366425e-05, "loss": 0.597, "step": 943 }, { "epoch": 0.26420375034984606, "grad_norm": 0.2730088822424412, "learning_rate": 9.960635349583245e-05, "loss": 0.6072, "step": 944 }, { "epoch": 0.26448362720403024, "grad_norm": 0.2703578819692016, "learning_rate": 9.960441929644017e-05, "loss": 0.6144, "step": 945 }, { "epoch": 0.26476350405821436, "grad_norm": 0.27573868394189366, "learning_rate": 9.960248037567149e-05, "loss": 0.6014, "step": 946 }, { "epoch": 0.26504338091239854, "grad_norm": 0.2908649459980856, "learning_rate": 9.960053673371097e-05, "loss": 0.5792, "step": 947 }, { "epoch": 0.2653232577665827, "grad_norm": 0.28223122241673676, "learning_rate": 9.959858837074361e-05, "loss": 0.5889, "step": 948 }, { "epoch": 0.26560313462076685, "grad_norm": 0.26432974167954687, "learning_rate": 9.959663528695482e-05, "loss": 0.5978, "step": 949 }, { "epoch": 0.265883011474951, "grad_norm": 0.2723457851349199, "learning_rate": 9.959467748253055e-05, "loss": 0.6224, "step": 950 }, { "epoch": 0.2661628883291352, "grad_norm": 0.27948914118114576, "learning_rate": 9.95927149576571e-05, "loss": 0.6059, "step": 951 }, { "epoch": 0.26644276518331933, "grad_norm": 0.2626681250038233, "learning_rate": 9.959074771252131e-05, "loss": 0.6011, "step": 952 }, { "epoch": 0.2667226420375035, "grad_norm": 0.2762883435064009, "learning_rate": 9.958877574731037e-05, "loss": 0.621, "step": 953 }, { "epoch": 0.26700251889168763, "grad_norm": 0.26163482978376007, "learning_rate": 9.9586799062212e-05, "loss": 0.6056, "step": 954 }, { "epoch": 0.2672823957458718, "grad_norm": 0.27237691070952597, "learning_rate": 9.958481765741434e-05, "loss": 0.6044, "step": 955 }, { "epoch": 0.267562272600056, "grad_norm": 0.26363886412093257, "learning_rate": 9.958283153310599e-05, "loss": 0.5942, "step": 956 }, { "epoch": 0.2678421494542401, "grad_norm": 0.27270700436482376, "learning_rate": 9.958084068947598e-05, "loss": 0.5887, "step": 957 }, { "epoch": 0.2681220263084243, "grad_norm": 0.2765738289591348, "learning_rate": 9.95788451267138e-05, "loss": 0.5823, "step": 958 }, { "epoch": 0.2684019031626085, "grad_norm": 0.2662914222098519, "learning_rate": 9.957684484500938e-05, "loss": 0.5842, "step": 959 }, { "epoch": 0.2686817800167926, "grad_norm": 0.2693829553518625, "learning_rate": 9.957483984455313e-05, "loss": 0.612, "step": 960 }, { "epoch": 0.2689616568709768, "grad_norm": 0.28198638950057897, "learning_rate": 9.957283012553587e-05, "loss": 0.6308, "step": 961 }, { "epoch": 0.2692415337251609, "grad_norm": 0.2805583044437407, "learning_rate": 9.95708156881489e-05, "loss": 0.617, "step": 962 }, { "epoch": 0.2695214105793451, "grad_norm": 0.28291665878839123, "learning_rate": 9.956879653258394e-05, "loss": 0.5906, "step": 963 }, { "epoch": 0.26980128743352927, "grad_norm": 0.28059897948775586, "learning_rate": 9.956677265903318e-05, "loss": 0.5817, "step": 964 }, { "epoch": 0.2700811642877134, "grad_norm": 0.2622792783790028, "learning_rate": 9.956474406768925e-05, "loss": 0.6236, "step": 965 }, { "epoch": 0.2703610411418976, "grad_norm": 0.2716935989110165, "learning_rate": 9.956271075874526e-05, "loss": 0.5948, "step": 966 }, { "epoch": 0.2706409179960817, "grad_norm": 0.27814557340457063, "learning_rate": 9.95606727323947e-05, "loss": 0.5819, "step": 967 }, { "epoch": 0.2709207948502659, "grad_norm": 0.25713661611856053, "learning_rate": 9.955862998883157e-05, "loss": 0.6038, "step": 968 }, { "epoch": 0.27120067170445006, "grad_norm": 0.35186999778057515, "learning_rate": 9.95565825282503e-05, "loss": 0.5926, "step": 969 }, { "epoch": 0.2714805485586342, "grad_norm": 0.267435250594246, "learning_rate": 9.955453035084576e-05, "loss": 0.5834, "step": 970 }, { "epoch": 0.27176042541281836, "grad_norm": 0.2813180143133331, "learning_rate": 9.95524734568133e-05, "loss": 0.6113, "step": 971 }, { "epoch": 0.27204030226700254, "grad_norm": 0.27599248130038134, "learning_rate": 9.955041184634867e-05, "loss": 0.6339, "step": 972 }, { "epoch": 0.27232017912118667, "grad_norm": 0.2759096761375916, "learning_rate": 9.95483455196481e-05, "loss": 0.5978, "step": 973 }, { "epoch": 0.27260005597537085, "grad_norm": 0.2748515788564237, "learning_rate": 9.954627447690828e-05, "loss": 0.6157, "step": 974 }, { "epoch": 0.27287993282955497, "grad_norm": 0.27312379599353603, "learning_rate": 9.954419871832632e-05, "loss": 0.6021, "step": 975 }, { "epoch": 0.27315980968373915, "grad_norm": 0.26254830363154275, "learning_rate": 9.95421182440998e-05, "loss": 0.5949, "step": 976 }, { "epoch": 0.27343968653792333, "grad_norm": 0.26614398085798086, "learning_rate": 9.954003305442673e-05, "loss": 0.5834, "step": 977 }, { "epoch": 0.27371956339210746, "grad_norm": 0.25638339807793464, "learning_rate": 9.95379431495056e-05, "loss": 0.5787, "step": 978 }, { "epoch": 0.27399944024629164, "grad_norm": 0.2936388532502744, "learning_rate": 9.953584852953529e-05, "loss": 0.5972, "step": 979 }, { "epoch": 0.2742793171004758, "grad_norm": 0.2742154692474115, "learning_rate": 9.953374919471522e-05, "loss": 0.5862, "step": 980 }, { "epoch": 0.27455919395465994, "grad_norm": 0.28776549392919626, "learning_rate": 9.953164514524513e-05, "loss": 0.6145, "step": 981 }, { "epoch": 0.2748390708088441, "grad_norm": 0.26552937227956486, "learning_rate": 9.952953638132536e-05, "loss": 0.6087, "step": 982 }, { "epoch": 0.27511894766302825, "grad_norm": 0.2786682092441462, "learning_rate": 9.95274229031566e-05, "loss": 0.6083, "step": 983 }, { "epoch": 0.2753988245172124, "grad_norm": 0.271474680333771, "learning_rate": 9.952530471094e-05, "loss": 0.6017, "step": 984 }, { "epoch": 0.2756787013713966, "grad_norm": 0.28260129028292885, "learning_rate": 9.952318180487717e-05, "loss": 0.5846, "step": 985 }, { "epoch": 0.27595857822558073, "grad_norm": 0.27022931757510327, "learning_rate": 9.95210541851702e-05, "loss": 0.6158, "step": 986 }, { "epoch": 0.2762384550797649, "grad_norm": 0.2680005624160554, "learning_rate": 9.951892185202154e-05, "loss": 0.6172, "step": 987 }, { "epoch": 0.27651833193394904, "grad_norm": 0.27830258224366494, "learning_rate": 9.95167848056342e-05, "loss": 0.5998, "step": 988 }, { "epoch": 0.2767982087881332, "grad_norm": 0.2792144180130348, "learning_rate": 9.951464304621156e-05, "loss": 0.6098, "step": 989 }, { "epoch": 0.2770780856423174, "grad_norm": 0.2725138960107155, "learning_rate": 9.95124965739575e-05, "loss": 0.6086, "step": 990 }, { "epoch": 0.2773579624965015, "grad_norm": 0.2627344099137454, "learning_rate": 9.951034538907628e-05, "loss": 0.5989, "step": 991 }, { "epoch": 0.2776378393506857, "grad_norm": 0.27102831176084596, "learning_rate": 9.950818949177268e-05, "loss": 0.5889, "step": 992 }, { "epoch": 0.2779177162048699, "grad_norm": 0.26694858006916766, "learning_rate": 9.950602888225189e-05, "loss": 0.5933, "step": 993 }, { "epoch": 0.278197593059054, "grad_norm": 0.2625987723781404, "learning_rate": 9.950386356071957e-05, "loss": 0.5788, "step": 994 }, { "epoch": 0.2784774699132382, "grad_norm": 0.26360059118488244, "learning_rate": 9.950169352738181e-05, "loss": 0.5985, "step": 995 }, { "epoch": 0.2787573467674223, "grad_norm": 0.2615450758854652, "learning_rate": 9.949951878244515e-05, "loss": 0.5909, "step": 996 }, { "epoch": 0.2790372236216065, "grad_norm": 0.27196421232001006, "learning_rate": 9.949733932611658e-05, "loss": 0.622, "step": 997 }, { "epoch": 0.27931710047579067, "grad_norm": 0.2910453173297014, "learning_rate": 9.949515515860354e-05, "loss": 0.6075, "step": 998 }, { "epoch": 0.2795969773299748, "grad_norm": 0.26061247085466427, "learning_rate": 9.949296628011394e-05, "loss": 0.5884, "step": 999 }, { "epoch": 0.279876854184159, "grad_norm": 0.27444733214256306, "learning_rate": 9.949077269085612e-05, "loss": 0.603, "step": 1000 }, { "epoch": 0.28015673103834315, "grad_norm": 0.27391616009521086, "learning_rate": 9.948857439103882e-05, "loss": 0.5959, "step": 1001 }, { "epoch": 0.2804366078925273, "grad_norm": 0.2760842761307169, "learning_rate": 9.948637138087133e-05, "loss": 0.5807, "step": 1002 }, { "epoch": 0.28071648474671146, "grad_norm": 0.2669814240346824, "learning_rate": 9.948416366056332e-05, "loss": 0.5718, "step": 1003 }, { "epoch": 0.2809963616008956, "grad_norm": 0.28233653487539345, "learning_rate": 9.948195123032491e-05, "loss": 0.65, "step": 1004 }, { "epoch": 0.28127623845507976, "grad_norm": 0.27260826487029294, "learning_rate": 9.947973409036669e-05, "loss": 0.5911, "step": 1005 }, { "epoch": 0.28155611530926394, "grad_norm": 0.28098283552885095, "learning_rate": 9.947751224089968e-05, "loss": 0.5897, "step": 1006 }, { "epoch": 0.28183599216344807, "grad_norm": 0.2676544920210222, "learning_rate": 9.947528568213536e-05, "loss": 0.5909, "step": 1007 }, { "epoch": 0.28211586901763225, "grad_norm": 0.2587664698367932, "learning_rate": 9.947305441428565e-05, "loss": 0.6, "step": 1008 }, { "epoch": 0.28239574587181643, "grad_norm": 0.26104593545132293, "learning_rate": 9.947081843756293e-05, "loss": 0.5701, "step": 1009 }, { "epoch": 0.28267562272600055, "grad_norm": 0.2777624470132468, "learning_rate": 9.946857775218003e-05, "loss": 0.5734, "step": 1010 }, { "epoch": 0.28295549958018473, "grad_norm": 0.271606897904836, "learning_rate": 9.94663323583502e-05, "loss": 0.6035, "step": 1011 }, { "epoch": 0.28323537643436886, "grad_norm": 0.2663552253008882, "learning_rate": 9.946408225628719e-05, "loss": 0.5923, "step": 1012 }, { "epoch": 0.28351525328855304, "grad_norm": 0.2627440179998988, "learning_rate": 9.946182744620512e-05, "loss": 0.5849, "step": 1013 }, { "epoch": 0.2837951301427372, "grad_norm": 0.2662012385006806, "learning_rate": 9.945956792831863e-05, "loss": 0.5875, "step": 1014 }, { "epoch": 0.28407500699692134, "grad_norm": 0.28504881278858585, "learning_rate": 9.94573037028428e-05, "loss": 0.5744, "step": 1015 }, { "epoch": 0.2843548838511055, "grad_norm": 0.288989312859365, "learning_rate": 9.945503476999311e-05, "loss": 0.6071, "step": 1016 }, { "epoch": 0.28463476070528965, "grad_norm": 0.2645334711064779, "learning_rate": 9.945276112998553e-05, "loss": 0.6271, "step": 1017 }, { "epoch": 0.2849146375594738, "grad_norm": 0.2588427238423153, "learning_rate": 9.945048278303645e-05, "loss": 0.6045, "step": 1018 }, { "epoch": 0.285194514413658, "grad_norm": 0.27436590182121146, "learning_rate": 9.944819972936277e-05, "loss": 0.6195, "step": 1019 }, { "epoch": 0.28547439126784213, "grad_norm": 0.275966442208794, "learning_rate": 9.944591196918175e-05, "loss": 0.5998, "step": 1020 }, { "epoch": 0.2857542681220263, "grad_norm": 0.2794753908066549, "learning_rate": 9.944361950271115e-05, "loss": 0.6101, "step": 1021 }, { "epoch": 0.2860341449762105, "grad_norm": 0.27313870946188806, "learning_rate": 9.944132233016916e-05, "loss": 0.6022, "step": 1022 }, { "epoch": 0.2863140218303946, "grad_norm": 0.26652648188303374, "learning_rate": 9.943902045177445e-05, "loss": 0.5968, "step": 1023 }, { "epoch": 0.2865938986845788, "grad_norm": 0.2794085723982734, "learning_rate": 9.943671386774611e-05, "loss": 0.5927, "step": 1024 }, { "epoch": 0.2868737755387629, "grad_norm": 0.2796160365142537, "learning_rate": 9.943440257830366e-05, "loss": 0.6057, "step": 1025 }, { "epoch": 0.2871536523929471, "grad_norm": 0.26291805107032756, "learning_rate": 9.943208658366711e-05, "loss": 0.5876, "step": 1026 }, { "epoch": 0.2874335292471313, "grad_norm": 0.26187642395698535, "learning_rate": 9.942976588405689e-05, "loss": 0.5436, "step": 1027 }, { "epoch": 0.2877134061013154, "grad_norm": 0.2744183024872371, "learning_rate": 9.942744047969388e-05, "loss": 0.6115, "step": 1028 }, { "epoch": 0.2879932829554996, "grad_norm": 0.264826759070424, "learning_rate": 9.942511037079942e-05, "loss": 0.6198, "step": 1029 }, { "epoch": 0.28827315980968377, "grad_norm": 0.27401931788791184, "learning_rate": 9.942277555759529e-05, "loss": 0.6001, "step": 1030 }, { "epoch": 0.2885530366638679, "grad_norm": 0.26733469295639417, "learning_rate": 9.942043604030372e-05, "loss": 0.5619, "step": 1031 }, { "epoch": 0.28883291351805207, "grad_norm": 0.26524110787728267, "learning_rate": 9.941809181914738e-05, "loss": 0.5811, "step": 1032 }, { "epoch": 0.2891127903722362, "grad_norm": 0.25400137970600717, "learning_rate": 9.941574289434941e-05, "loss": 0.5917, "step": 1033 }, { "epoch": 0.2893926672264204, "grad_norm": 0.263302696284142, "learning_rate": 9.941338926613337e-05, "loss": 0.6041, "step": 1034 }, { "epoch": 0.28967254408060455, "grad_norm": 0.2555759343619727, "learning_rate": 9.941103093472329e-05, "loss": 0.5863, "step": 1035 }, { "epoch": 0.2899524209347887, "grad_norm": 0.2728911085684263, "learning_rate": 9.940866790034363e-05, "loss": 0.5894, "step": 1036 }, { "epoch": 0.29023229778897286, "grad_norm": 0.2586285672017633, "learning_rate": 9.940630016321928e-05, "loss": 0.5899, "step": 1037 }, { "epoch": 0.290512174643157, "grad_norm": 0.2596358831445758, "learning_rate": 9.940392772357565e-05, "loss": 0.6286, "step": 1038 }, { "epoch": 0.29079205149734116, "grad_norm": 0.2561486933508156, "learning_rate": 9.940155058163851e-05, "loss": 0.5794, "step": 1039 }, { "epoch": 0.29107192835152534, "grad_norm": 0.27378265122860135, "learning_rate": 9.939916873763415e-05, "loss": 0.6152, "step": 1040 }, { "epoch": 0.29135180520570947, "grad_norm": 0.26343237493237315, "learning_rate": 9.939678219178925e-05, "loss": 0.5963, "step": 1041 }, { "epoch": 0.29163168205989365, "grad_norm": 0.2692789610845327, "learning_rate": 9.939439094433098e-05, "loss": 0.5756, "step": 1042 }, { "epoch": 0.29191155891407783, "grad_norm": 0.265489619392448, "learning_rate": 9.939199499548692e-05, "loss": 0.6048, "step": 1043 }, { "epoch": 0.29219143576826195, "grad_norm": 0.26159473965570745, "learning_rate": 9.938959434548513e-05, "loss": 0.5992, "step": 1044 }, { "epoch": 0.29247131262244613, "grad_norm": 0.2651332960627244, "learning_rate": 9.938718899455413e-05, "loss": 0.5668, "step": 1045 }, { "epoch": 0.29275118947663026, "grad_norm": 0.2713938373931137, "learning_rate": 9.938477894292281e-05, "loss": 0.579, "step": 1046 }, { "epoch": 0.29303106633081444, "grad_norm": 0.2629954882749189, "learning_rate": 9.938236419082061e-05, "loss": 0.5852, "step": 1047 }, { "epoch": 0.2933109431849986, "grad_norm": 0.2748138533245085, "learning_rate": 9.937994473847733e-05, "loss": 0.6048, "step": 1048 }, { "epoch": 0.29359082003918274, "grad_norm": 0.2687295022545514, "learning_rate": 9.937752058612328e-05, "loss": 0.608, "step": 1049 }, { "epoch": 0.2938706968933669, "grad_norm": 0.26658527628367473, "learning_rate": 9.937509173398918e-05, "loss": 0.5851, "step": 1050 }, { "epoch": 0.2941505737475511, "grad_norm": 0.2728718605298641, "learning_rate": 9.93726581823062e-05, "loss": 0.5877, "step": 1051 }, { "epoch": 0.2944304506017352, "grad_norm": 0.26644686401704976, "learning_rate": 9.9370219931306e-05, "loss": 0.5696, "step": 1052 }, { "epoch": 0.2947103274559194, "grad_norm": 0.25271495199119975, "learning_rate": 9.936777698122061e-05, "loss": 0.591, "step": 1053 }, { "epoch": 0.29499020431010353, "grad_norm": 0.2598427046722628, "learning_rate": 9.93653293322826e-05, "loss": 0.6115, "step": 1054 }, { "epoch": 0.2952700811642877, "grad_norm": 0.2725678140201863, "learning_rate": 9.93628769847249e-05, "loss": 0.5837, "step": 1055 }, { "epoch": 0.2955499580184719, "grad_norm": 0.2704809174511505, "learning_rate": 9.936041993878093e-05, "loss": 0.6075, "step": 1056 }, { "epoch": 0.295829834872656, "grad_norm": 0.2693829026217594, "learning_rate": 9.935795819468459e-05, "loss": 0.5817, "step": 1057 }, { "epoch": 0.2961097117268402, "grad_norm": 0.2632695991665687, "learning_rate": 9.935549175267013e-05, "loss": 0.588, "step": 1058 }, { "epoch": 0.2963895885810244, "grad_norm": 0.2666915384441575, "learning_rate": 9.935302061297236e-05, "loss": 0.5951, "step": 1059 }, { "epoch": 0.2966694654352085, "grad_norm": 0.2528790162576626, "learning_rate": 9.935054477582646e-05, "loss": 0.6159, "step": 1060 }, { "epoch": 0.2969493422893927, "grad_norm": 0.258695386401773, "learning_rate": 9.934806424146809e-05, "loss": 0.6317, "step": 1061 }, { "epoch": 0.2972292191435768, "grad_norm": 0.25882616695355243, "learning_rate": 9.934557901013333e-05, "loss": 0.5475, "step": 1062 }, { "epoch": 0.297509095997761, "grad_norm": 0.2547630767859152, "learning_rate": 9.934308908205875e-05, "loss": 0.59, "step": 1063 }, { "epoch": 0.29778897285194517, "grad_norm": 0.2581350059717819, "learning_rate": 9.934059445748134e-05, "loss": 0.582, "step": 1064 }, { "epoch": 0.2980688497061293, "grad_norm": 0.27509269212921794, "learning_rate": 9.93380951366385e-05, "loss": 0.5981, "step": 1065 }, { "epoch": 0.29834872656031347, "grad_norm": 0.2604372488330954, "learning_rate": 9.933559111976818e-05, "loss": 0.5569, "step": 1066 }, { "epoch": 0.2986286034144976, "grad_norm": 0.2645858515338992, "learning_rate": 9.933308240710868e-05, "loss": 0.5852, "step": 1067 }, { "epoch": 0.2989084802686818, "grad_norm": 0.2727455621721883, "learning_rate": 9.933056899889878e-05, "loss": 0.5923, "step": 1068 }, { "epoch": 0.29918835712286596, "grad_norm": 0.2639919791582596, "learning_rate": 9.932805089537771e-05, "loss": 0.6059, "step": 1069 }, { "epoch": 0.2994682339770501, "grad_norm": 0.2689253534280279, "learning_rate": 9.932552809678515e-05, "loss": 0.5812, "step": 1070 }, { "epoch": 0.29974811083123426, "grad_norm": 0.2729777989503698, "learning_rate": 9.93230006033612e-05, "loss": 0.6067, "step": 1071 }, { "epoch": 0.30002798768541844, "grad_norm": 0.2705257986314921, "learning_rate": 9.932046841534646e-05, "loss": 0.5968, "step": 1072 }, { "epoch": 0.30030786453960256, "grad_norm": 0.2640730489370255, "learning_rate": 9.931793153298192e-05, "loss": 0.5716, "step": 1073 }, { "epoch": 0.30058774139378674, "grad_norm": 0.2610573382932847, "learning_rate": 9.931538995650907e-05, "loss": 0.5639, "step": 1074 }, { "epoch": 0.30086761824797087, "grad_norm": 0.2692689996451498, "learning_rate": 9.931284368616978e-05, "loss": 0.5806, "step": 1075 }, { "epoch": 0.30114749510215505, "grad_norm": 0.2815554921303491, "learning_rate": 9.931029272220644e-05, "loss": 0.5798, "step": 1076 }, { "epoch": 0.30142737195633923, "grad_norm": 0.2655026281350125, "learning_rate": 9.930773706486185e-05, "loss": 0.5986, "step": 1077 }, { "epoch": 0.30170724881052335, "grad_norm": 0.27030316189420567, "learning_rate": 9.930517671437923e-05, "loss": 0.5976, "step": 1078 }, { "epoch": 0.30198712566470753, "grad_norm": 0.2590977730284289, "learning_rate": 9.930261167100229e-05, "loss": 0.5955, "step": 1079 }, { "epoch": 0.3022670025188917, "grad_norm": 0.2620915114177364, "learning_rate": 9.930004193497519e-05, "loss": 0.6212, "step": 1080 }, { "epoch": 0.30254687937307584, "grad_norm": 0.27777303725376995, "learning_rate": 9.929746750654249e-05, "loss": 0.6132, "step": 1081 }, { "epoch": 0.30282675622726, "grad_norm": 0.2499508096326113, "learning_rate": 9.929488838594925e-05, "loss": 0.5995, "step": 1082 }, { "epoch": 0.30310663308144414, "grad_norm": 0.2504456280815404, "learning_rate": 9.929230457344093e-05, "loss": 0.5854, "step": 1083 }, { "epoch": 0.3033865099356283, "grad_norm": 0.2756607289113451, "learning_rate": 9.928971606926347e-05, "loss": 0.5941, "step": 1084 }, { "epoch": 0.3036663867898125, "grad_norm": 0.26535612580493123, "learning_rate": 9.928712287366326e-05, "loss": 0.6104, "step": 1085 }, { "epoch": 0.30394626364399663, "grad_norm": 0.24719193247256463, "learning_rate": 9.928452498688711e-05, "loss": 0.5803, "step": 1086 }, { "epoch": 0.3042261404981808, "grad_norm": 0.26520607157065035, "learning_rate": 9.928192240918227e-05, "loss": 0.5923, "step": 1087 }, { "epoch": 0.30450601735236493, "grad_norm": 0.2664832595562186, "learning_rate": 9.927931514079648e-05, "loss": 0.5693, "step": 1088 }, { "epoch": 0.3047858942065491, "grad_norm": 0.2653592619200589, "learning_rate": 9.927670318197789e-05, "loss": 0.5971, "step": 1089 }, { "epoch": 0.3050657710607333, "grad_norm": 0.2698349805193774, "learning_rate": 9.92740865329751e-05, "loss": 0.5755, "step": 1090 }, { "epoch": 0.3053456479149174, "grad_norm": 0.2657086659526615, "learning_rate": 9.92714651940372e-05, "loss": 0.5801, "step": 1091 }, { "epoch": 0.3056255247691016, "grad_norm": 0.2713662771623878, "learning_rate": 9.926883916541364e-05, "loss": 0.5963, "step": 1092 }, { "epoch": 0.3059054016232858, "grad_norm": 0.27085680359250447, "learning_rate": 9.92662084473544e-05, "loss": 0.5884, "step": 1093 }, { "epoch": 0.3061852784774699, "grad_norm": 0.2738147342608081, "learning_rate": 9.926357304010987e-05, "loss": 0.5809, "step": 1094 }, { "epoch": 0.3064651553316541, "grad_norm": 0.2635383752148601, "learning_rate": 9.926093294393087e-05, "loss": 0.5855, "step": 1095 }, { "epoch": 0.3067450321858382, "grad_norm": 0.2584319865589908, "learning_rate": 9.925828815906871e-05, "loss": 0.5692, "step": 1096 }, { "epoch": 0.3070249090400224, "grad_norm": 0.24917565418840854, "learning_rate": 9.925563868577511e-05, "loss": 0.5938, "step": 1097 }, { "epoch": 0.30730478589420657, "grad_norm": 0.27764442234697434, "learning_rate": 9.925298452430226e-05, "loss": 0.594, "step": 1098 }, { "epoch": 0.3075846627483907, "grad_norm": 0.26175790946980354, "learning_rate": 9.925032567490275e-05, "loss": 0.5942, "step": 1099 }, { "epoch": 0.30786453960257487, "grad_norm": 0.2641376921332692, "learning_rate": 9.92476621378297e-05, "loss": 0.5859, "step": 1100 }, { "epoch": 0.30814441645675905, "grad_norm": 0.2624741488450959, "learning_rate": 9.924499391333659e-05, "loss": 0.6135, "step": 1101 }, { "epoch": 0.3084242933109432, "grad_norm": 0.258648592673151, "learning_rate": 9.924232100167741e-05, "loss": 0.5821, "step": 1102 }, { "epoch": 0.30870417016512736, "grad_norm": 0.2520646311408594, "learning_rate": 9.923964340310654e-05, "loss": 0.5903, "step": 1103 }, { "epoch": 0.3089840470193115, "grad_norm": 0.2508943082834356, "learning_rate": 9.923696111787884e-05, "loss": 0.5536, "step": 1104 }, { "epoch": 0.30926392387349566, "grad_norm": 0.25590191362612047, "learning_rate": 9.923427414624964e-05, "loss": 0.6122, "step": 1105 }, { "epoch": 0.30954380072767984, "grad_norm": 0.2620523692519131, "learning_rate": 9.923158248847466e-05, "loss": 0.5788, "step": 1106 }, { "epoch": 0.30982367758186397, "grad_norm": 0.26371485722936605, "learning_rate": 9.922888614481012e-05, "loss": 0.5943, "step": 1107 }, { "epoch": 0.31010355443604815, "grad_norm": 0.26383852093572285, "learning_rate": 9.922618511551263e-05, "loss": 0.5494, "step": 1108 }, { "epoch": 0.31038343129023227, "grad_norm": 0.2601015265137906, "learning_rate": 9.922347940083928e-05, "loss": 0.5963, "step": 1109 }, { "epoch": 0.31066330814441645, "grad_norm": 0.2584865584377887, "learning_rate": 9.922076900104762e-05, "loss": 0.598, "step": 1110 }, { "epoch": 0.31094318499860063, "grad_norm": 0.2566279120727724, "learning_rate": 9.921805391639561e-05, "loss": 0.5857, "step": 1111 }, { "epoch": 0.31122306185278475, "grad_norm": 0.2517603449460284, "learning_rate": 9.921533414714168e-05, "loss": 0.6075, "step": 1112 }, { "epoch": 0.31150293870696893, "grad_norm": 0.24381384656835792, "learning_rate": 9.921260969354471e-05, "loss": 0.6012, "step": 1113 }, { "epoch": 0.3117828155611531, "grad_norm": 0.25356308624131946, "learning_rate": 9.9209880555864e-05, "loss": 0.5975, "step": 1114 }, { "epoch": 0.31206269241533724, "grad_norm": 0.24198658009302337, "learning_rate": 9.92071467343593e-05, "loss": 0.572, "step": 1115 }, { "epoch": 0.3123425692695214, "grad_norm": 0.25933353238025053, "learning_rate": 9.920440822929085e-05, "loss": 0.5839, "step": 1116 }, { "epoch": 0.31262244612370554, "grad_norm": 0.26002323769885366, "learning_rate": 9.920166504091927e-05, "loss": 0.5932, "step": 1117 }, { "epoch": 0.3129023229778897, "grad_norm": 0.2572780922199821, "learning_rate": 9.919891716950566e-05, "loss": 0.5634, "step": 1118 }, { "epoch": 0.3131821998320739, "grad_norm": 0.2594440372689817, "learning_rate": 9.91961646153116e-05, "loss": 0.5845, "step": 1119 }, { "epoch": 0.31346207668625803, "grad_norm": 0.24926425960120063, "learning_rate": 9.919340737859906e-05, "loss": 0.5778, "step": 1120 }, { "epoch": 0.3137419535404422, "grad_norm": 0.2611456735182473, "learning_rate": 9.919064545963046e-05, "loss": 0.5766, "step": 1121 }, { "epoch": 0.3140218303946264, "grad_norm": 0.282742563833002, "learning_rate": 9.91878788586687e-05, "loss": 0.6143, "step": 1122 }, { "epoch": 0.3143017072488105, "grad_norm": 0.2665321164338926, "learning_rate": 9.918510757597708e-05, "loss": 0.5967, "step": 1123 }, { "epoch": 0.3145815841029947, "grad_norm": 0.25822441174138605, "learning_rate": 9.91823316118194e-05, "loss": 0.5771, "step": 1124 }, { "epoch": 0.3148614609571788, "grad_norm": 0.265687585828365, "learning_rate": 9.917955096645987e-05, "loss": 0.5731, "step": 1125 }, { "epoch": 0.315141337811363, "grad_norm": 0.2531331848624826, "learning_rate": 9.917676564016315e-05, "loss": 0.5721, "step": 1126 }, { "epoch": 0.3154212146655472, "grad_norm": 0.26311383941298366, "learning_rate": 9.917397563319434e-05, "loss": 0.5839, "step": 1127 }, { "epoch": 0.3157010915197313, "grad_norm": 0.2546061798405397, "learning_rate": 9.917118094581903e-05, "loss": 0.5765, "step": 1128 }, { "epoch": 0.3159809683739155, "grad_norm": 0.255912334062152, "learning_rate": 9.916838157830319e-05, "loss": 0.5878, "step": 1129 }, { "epoch": 0.31626084522809966, "grad_norm": 0.25087761510058526, "learning_rate": 9.916557753091326e-05, "loss": 0.6024, "step": 1130 }, { "epoch": 0.3165407220822838, "grad_norm": 0.2638276003803527, "learning_rate": 9.916276880391614e-05, "loss": 0.5871, "step": 1131 }, { "epoch": 0.31682059893646797, "grad_norm": 0.24961641220572936, "learning_rate": 9.915995539757917e-05, "loss": 0.5902, "step": 1132 }, { "epoch": 0.3171004757906521, "grad_norm": 0.2572607629430118, "learning_rate": 9.915713731217014e-05, "loss": 0.5973, "step": 1133 }, { "epoch": 0.31738035264483627, "grad_norm": 0.25485521505291603, "learning_rate": 9.915431454795725e-05, "loss": 0.5671, "step": 1134 }, { "epoch": 0.31766022949902045, "grad_norm": 0.24656253410737883, "learning_rate": 9.915148710520921e-05, "loss": 0.5757, "step": 1135 }, { "epoch": 0.3179401063532046, "grad_norm": 0.2712707220725371, "learning_rate": 9.91486549841951e-05, "loss": 0.6036, "step": 1136 }, { "epoch": 0.31821998320738876, "grad_norm": 0.27167567171785995, "learning_rate": 9.91458181851845e-05, "loss": 0.5816, "step": 1137 }, { "epoch": 0.3184998600615729, "grad_norm": 0.2613591558250261, "learning_rate": 9.914297670844742e-05, "loss": 0.5973, "step": 1138 }, { "epoch": 0.31877973691575706, "grad_norm": 0.256824803592396, "learning_rate": 9.914013055425431e-05, "loss": 0.5816, "step": 1139 }, { "epoch": 0.31905961376994124, "grad_norm": 0.2652728041759387, "learning_rate": 9.913727972287606e-05, "loss": 0.598, "step": 1140 }, { "epoch": 0.31933949062412537, "grad_norm": 0.2555407037193432, "learning_rate": 9.913442421458404e-05, "loss": 0.6037, "step": 1141 }, { "epoch": 0.31961936747830955, "grad_norm": 0.2601481703232158, "learning_rate": 9.913156402965001e-05, "loss": 0.5651, "step": 1142 }, { "epoch": 0.3198992443324937, "grad_norm": 0.2509235089023841, "learning_rate": 9.912869916834622e-05, "loss": 0.575, "step": 1143 }, { "epoch": 0.32017912118667785, "grad_norm": 0.2664948588632181, "learning_rate": 9.912582963094533e-05, "loss": 0.6087, "step": 1144 }, { "epoch": 0.32045899804086203, "grad_norm": 0.25430703442836183, "learning_rate": 9.91229554177205e-05, "loss": 0.579, "step": 1145 }, { "epoch": 0.32073887489504616, "grad_norm": 0.2761772671368232, "learning_rate": 9.912007652894526e-05, "loss": 0.6283, "step": 1146 }, { "epoch": 0.32101875174923034, "grad_norm": 0.26450694721226353, "learning_rate": 9.911719296489366e-05, "loss": 0.575, "step": 1147 }, { "epoch": 0.3212986286034145, "grad_norm": 0.2675400274143289, "learning_rate": 9.911430472584013e-05, "loss": 0.5817, "step": 1148 }, { "epoch": 0.32157850545759864, "grad_norm": 0.2647524034579435, "learning_rate": 9.911141181205958e-05, "loss": 0.6011, "step": 1149 }, { "epoch": 0.3218583823117828, "grad_norm": 0.2505623645611872, "learning_rate": 9.910851422382739e-05, "loss": 0.5626, "step": 1150 }, { "epoch": 0.322138259165967, "grad_norm": 0.2663609883611856, "learning_rate": 9.91056119614193e-05, "loss": 0.5983, "step": 1151 }, { "epoch": 0.3224181360201511, "grad_norm": 0.24503997090574697, "learning_rate": 9.910270502511159e-05, "loss": 0.5809, "step": 1152 }, { "epoch": 0.3226980128743353, "grad_norm": 0.2790500311043266, "learning_rate": 9.909979341518093e-05, "loss": 0.6015, "step": 1153 }, { "epoch": 0.32297788972851943, "grad_norm": 0.2705731083416747, "learning_rate": 9.909687713190445e-05, "loss": 0.5702, "step": 1154 }, { "epoch": 0.3232577665827036, "grad_norm": 0.25550553982629376, "learning_rate": 9.909395617555973e-05, "loss": 0.5716, "step": 1155 }, { "epoch": 0.3235376434368878, "grad_norm": 0.2685885166862554, "learning_rate": 9.909103054642478e-05, "loss": 0.6136, "step": 1156 }, { "epoch": 0.3238175202910719, "grad_norm": 0.2620236933847693, "learning_rate": 9.908810024477807e-05, "loss": 0.5949, "step": 1157 }, { "epoch": 0.3240973971452561, "grad_norm": 0.26623920075723845, "learning_rate": 9.908516527089848e-05, "loss": 0.5799, "step": 1158 }, { "epoch": 0.3243772739994402, "grad_norm": 0.2822439756971142, "learning_rate": 9.908222562506542e-05, "loss": 0.602, "step": 1159 }, { "epoch": 0.3246571508536244, "grad_norm": 0.2637678258803881, "learning_rate": 9.907928130755862e-05, "loss": 0.5605, "step": 1160 }, { "epoch": 0.3249370277078086, "grad_norm": 0.261430455404208, "learning_rate": 9.907633231865838e-05, "loss": 0.5925, "step": 1161 }, { "epoch": 0.3252169045619927, "grad_norm": 0.2583250833297409, "learning_rate": 9.907337865864534e-05, "loss": 0.5585, "step": 1162 }, { "epoch": 0.3254967814161769, "grad_norm": 0.2567489982772435, "learning_rate": 9.907042032780067e-05, "loss": 0.5715, "step": 1163 }, { "epoch": 0.32577665827036106, "grad_norm": 0.25405686607101835, "learning_rate": 9.906745732640592e-05, "loss": 0.5685, "step": 1164 }, { "epoch": 0.3260565351245452, "grad_norm": 0.3087236259561368, "learning_rate": 9.906448965474312e-05, "loss": 0.6123, "step": 1165 }, { "epoch": 0.32633641197872937, "grad_norm": 0.28014190343761486, "learning_rate": 9.906151731309472e-05, "loss": 0.5644, "step": 1166 }, { "epoch": 0.3266162888329135, "grad_norm": 0.27121399211710767, "learning_rate": 9.905854030174364e-05, "loss": 0.5987, "step": 1167 }, { "epoch": 0.3268961656870977, "grad_norm": 0.26693600160204783, "learning_rate": 9.905555862097324e-05, "loss": 0.5981, "step": 1168 }, { "epoch": 0.32717604254128185, "grad_norm": 0.25613297806326796, "learning_rate": 9.905257227106733e-05, "loss": 0.5908, "step": 1169 }, { "epoch": 0.327455919395466, "grad_norm": 0.26904417275188486, "learning_rate": 9.904958125231012e-05, "loss": 0.5788, "step": 1170 }, { "epoch": 0.32773579624965016, "grad_norm": 0.2541131649139711, "learning_rate": 9.904658556498631e-05, "loss": 0.5922, "step": 1171 }, { "epoch": 0.32801567310383434, "grad_norm": 0.29144181026230515, "learning_rate": 9.904358520938104e-05, "loss": 0.6014, "step": 1172 }, { "epoch": 0.32829554995801846, "grad_norm": 0.2633395039432903, "learning_rate": 9.904058018577987e-05, "loss": 0.5986, "step": 1173 }, { "epoch": 0.32857542681220264, "grad_norm": 0.269393619976543, "learning_rate": 9.903757049446884e-05, "loss": 0.6017, "step": 1174 }, { "epoch": 0.32885530366638677, "grad_norm": 0.2564490743444378, "learning_rate": 9.90345561357344e-05, "loss": 0.5759, "step": 1175 }, { "epoch": 0.32913518052057095, "grad_norm": 0.2486777003142995, "learning_rate": 9.903153710986346e-05, "loss": 0.5706, "step": 1176 }, { "epoch": 0.3294150573747551, "grad_norm": 0.262014400862415, "learning_rate": 9.902851341714337e-05, "loss": 0.5938, "step": 1177 }, { "epoch": 0.32969493422893925, "grad_norm": 0.26309639336992313, "learning_rate": 9.902548505786193e-05, "loss": 0.5717, "step": 1178 }, { "epoch": 0.32997481108312343, "grad_norm": 0.2526464138072752, "learning_rate": 9.902245203230738e-05, "loss": 0.5937, "step": 1179 }, { "epoch": 0.3302546879373076, "grad_norm": 0.28039963690433434, "learning_rate": 9.901941434076841e-05, "loss": 0.6125, "step": 1180 }, { "epoch": 0.33053456479149174, "grad_norm": 0.24737630654081313, "learning_rate": 9.901637198353415e-05, "loss": 0.5755, "step": 1181 }, { "epoch": 0.3308144416456759, "grad_norm": 0.2532820655849239, "learning_rate": 9.901332496089417e-05, "loss": 0.5821, "step": 1182 }, { "epoch": 0.33109431849986004, "grad_norm": 0.2511166115916497, "learning_rate": 9.901027327313848e-05, "loss": 0.5641, "step": 1183 }, { "epoch": 0.3313741953540442, "grad_norm": 0.2622700043966974, "learning_rate": 9.900721692055755e-05, "loss": 0.5829, "step": 1184 }, { "epoch": 0.3316540722082284, "grad_norm": 0.26469762684272724, "learning_rate": 9.900415590344227e-05, "loss": 0.5592, "step": 1185 }, { "epoch": 0.3319339490624125, "grad_norm": 0.2685833366185621, "learning_rate": 9.900109022208403e-05, "loss": 0.58, "step": 1186 }, { "epoch": 0.3322138259165967, "grad_norm": 0.2565920420694011, "learning_rate": 9.899801987677457e-05, "loss": 0.5737, "step": 1187 }, { "epoch": 0.33249370277078083, "grad_norm": 0.25752184536440154, "learning_rate": 9.899494486780616e-05, "loss": 0.5877, "step": 1188 }, { "epoch": 0.332773579624965, "grad_norm": 0.24842346723141795, "learning_rate": 9.899186519547147e-05, "loss": 0.5683, "step": 1189 }, { "epoch": 0.3330534564791492, "grad_norm": 0.252341889344035, "learning_rate": 9.898878086006364e-05, "loss": 0.5845, "step": 1190 }, { "epoch": 0.3333333333333333, "grad_norm": 0.2596260336526809, "learning_rate": 9.898569186187622e-05, "loss": 0.5799, "step": 1191 }, { "epoch": 0.3336132101875175, "grad_norm": 0.26315295565046337, "learning_rate": 9.898259820120325e-05, "loss": 0.5988, "step": 1192 }, { "epoch": 0.3338930870417017, "grad_norm": 0.25577823881308154, "learning_rate": 9.897949987833915e-05, "loss": 0.5811, "step": 1193 }, { "epoch": 0.3341729638958858, "grad_norm": 0.2616508614494166, "learning_rate": 9.897639689357883e-05, "loss": 0.5875, "step": 1194 }, { "epoch": 0.33445284075007, "grad_norm": 0.24829264184735783, "learning_rate": 9.897328924721765e-05, "loss": 0.566, "step": 1195 }, { "epoch": 0.3347327176042541, "grad_norm": 0.24936120561320013, "learning_rate": 9.897017693955139e-05, "loss": 0.5766, "step": 1196 }, { "epoch": 0.3350125944584383, "grad_norm": 0.25117922631543743, "learning_rate": 9.896705997087626e-05, "loss": 0.6102, "step": 1197 }, { "epoch": 0.33529247131262246, "grad_norm": 0.2616185379504625, "learning_rate": 9.896393834148898e-05, "loss": 0.6022, "step": 1198 }, { "epoch": 0.3355723481668066, "grad_norm": 0.25387793292589556, "learning_rate": 9.896081205168662e-05, "loss": 0.6053, "step": 1199 }, { "epoch": 0.33585222502099077, "grad_norm": 0.26318212341326885, "learning_rate": 9.895768110176678e-05, "loss": 0.5864, "step": 1200 }, { "epoch": 0.33613210187517495, "grad_norm": 0.24747602735444504, "learning_rate": 9.895454549202745e-05, "loss": 0.5945, "step": 1201 }, { "epoch": 0.3364119787293591, "grad_norm": 0.24962327359025266, "learning_rate": 9.895140522276707e-05, "loss": 0.5929, "step": 1202 }, { "epoch": 0.33669185558354325, "grad_norm": 0.25896810155293915, "learning_rate": 9.894826029428454e-05, "loss": 0.6084, "step": 1203 }, { "epoch": 0.3369717324377274, "grad_norm": 0.25672321740795007, "learning_rate": 9.894511070687919e-05, "loss": 0.5897, "step": 1204 }, { "epoch": 0.33725160929191156, "grad_norm": 0.2518349813338347, "learning_rate": 9.894195646085083e-05, "loss": 0.576, "step": 1205 }, { "epoch": 0.33753148614609574, "grad_norm": 0.2643446814910832, "learning_rate": 9.893879755649965e-05, "loss": 0.5666, "step": 1206 }, { "epoch": 0.33781136300027986, "grad_norm": 0.26047434907347133, "learning_rate": 9.893563399412634e-05, "loss": 0.6044, "step": 1207 }, { "epoch": 0.33809123985446404, "grad_norm": 0.2549770752064979, "learning_rate": 9.893246577403197e-05, "loss": 0.546, "step": 1208 }, { "epoch": 0.33837111670864817, "grad_norm": 0.2639226655722739, "learning_rate": 9.892929289651813e-05, "loss": 0.6111, "step": 1209 }, { "epoch": 0.33865099356283235, "grad_norm": 0.25276014704600214, "learning_rate": 9.892611536188681e-05, "loss": 0.5766, "step": 1210 }, { "epoch": 0.3389308704170165, "grad_norm": 0.25611846388371784, "learning_rate": 9.892293317044043e-05, "loss": 0.587, "step": 1211 }, { "epoch": 0.33921074727120065, "grad_norm": 0.2562736816159841, "learning_rate": 9.891974632248192e-05, "loss": 0.606, "step": 1212 }, { "epoch": 0.33949062412538483, "grad_norm": 0.25325327589508856, "learning_rate": 9.891655481831453e-05, "loss": 0.5876, "step": 1213 }, { "epoch": 0.339770500979569, "grad_norm": 0.2511111558833725, "learning_rate": 9.89133586582421e-05, "loss": 0.5886, "step": 1214 }, { "epoch": 0.34005037783375314, "grad_norm": 0.25908982198478825, "learning_rate": 9.891015784256881e-05, "loss": 0.5984, "step": 1215 }, { "epoch": 0.3403302546879373, "grad_norm": 0.2648801626570116, "learning_rate": 9.890695237159931e-05, "loss": 0.5756, "step": 1216 }, { "epoch": 0.34061013154212144, "grad_norm": 0.25204186393477696, "learning_rate": 9.890374224563872e-05, "loss": 0.597, "step": 1217 }, { "epoch": 0.3408900083963056, "grad_norm": 0.2537507218509391, "learning_rate": 9.890052746499256e-05, "loss": 0.5812, "step": 1218 }, { "epoch": 0.3411698852504898, "grad_norm": 0.2613338341516281, "learning_rate": 9.889730802996683e-05, "loss": 0.5852, "step": 1219 }, { "epoch": 0.3414497621046739, "grad_norm": 0.2513548166389704, "learning_rate": 9.889408394086796e-05, "loss": 0.5968, "step": 1220 }, { "epoch": 0.3417296389588581, "grad_norm": 0.24822866983712577, "learning_rate": 9.889085519800282e-05, "loss": 0.573, "step": 1221 }, { "epoch": 0.3420095158130423, "grad_norm": 0.24594567150746716, "learning_rate": 9.888762180167871e-05, "loss": 0.5551, "step": 1222 }, { "epoch": 0.3422893926672264, "grad_norm": 0.2616874323983542, "learning_rate": 9.888438375220339e-05, "loss": 0.5845, "step": 1223 }, { "epoch": 0.3425692695214106, "grad_norm": 0.25449712886631054, "learning_rate": 9.888114104988506e-05, "loss": 0.5544, "step": 1224 }, { "epoch": 0.3428491463755947, "grad_norm": 0.2651541726904754, "learning_rate": 9.887789369503237e-05, "loss": 0.5912, "step": 1225 }, { "epoch": 0.3431290232297789, "grad_norm": 0.24951492881588697, "learning_rate": 9.887464168795439e-05, "loss": 0.5737, "step": 1226 }, { "epoch": 0.3434089000839631, "grad_norm": 0.26051669703885516, "learning_rate": 9.887138502896067e-05, "loss": 0.5922, "step": 1227 }, { "epoch": 0.3436887769381472, "grad_norm": 0.25618708803578677, "learning_rate": 9.886812371836116e-05, "loss": 0.5673, "step": 1228 }, { "epoch": 0.3439686537923314, "grad_norm": 0.24094067673672262, "learning_rate": 9.886485775646629e-05, "loss": 0.5947, "step": 1229 }, { "epoch": 0.34424853064651556, "grad_norm": 0.25019065804154744, "learning_rate": 9.886158714358691e-05, "loss": 0.5641, "step": 1230 }, { "epoch": 0.3445284075006997, "grad_norm": 0.2559694149536015, "learning_rate": 9.88583118800343e-05, "loss": 0.612, "step": 1231 }, { "epoch": 0.34480828435488386, "grad_norm": 0.2946784708107338, "learning_rate": 9.885503196612022e-05, "loss": 0.6035, "step": 1232 }, { "epoch": 0.345088161209068, "grad_norm": 0.26396022206734865, "learning_rate": 9.885174740215687e-05, "loss": 0.5713, "step": 1233 }, { "epoch": 0.34536803806325217, "grad_norm": 0.26166485957468827, "learning_rate": 9.884845818845685e-05, "loss": 0.5764, "step": 1234 }, { "epoch": 0.34564791491743635, "grad_norm": 0.24931389332771994, "learning_rate": 9.884516432533324e-05, "loss": 0.5852, "step": 1235 }, { "epoch": 0.3459277917716205, "grad_norm": 0.2532975570578218, "learning_rate": 9.884186581309954e-05, "loss": 0.5678, "step": 1236 }, { "epoch": 0.34620766862580465, "grad_norm": 0.2569653422063483, "learning_rate": 9.883856265206972e-05, "loss": 0.5856, "step": 1237 }, { "epoch": 0.3464875454799888, "grad_norm": 0.262440592618675, "learning_rate": 9.883525484255817e-05, "loss": 0.6175, "step": 1238 }, { "epoch": 0.34676742233417296, "grad_norm": 0.2530545077274828, "learning_rate": 9.883194238487974e-05, "loss": 0.5919, "step": 1239 }, { "epoch": 0.34704729918835714, "grad_norm": 0.2519144804995559, "learning_rate": 9.882862527934968e-05, "loss": 0.6003, "step": 1240 }, { "epoch": 0.34732717604254126, "grad_norm": 0.25535977220109574, "learning_rate": 9.882530352628375e-05, "loss": 0.5827, "step": 1241 }, { "epoch": 0.34760705289672544, "grad_norm": 0.2485332607677699, "learning_rate": 9.88219771259981e-05, "loss": 0.5708, "step": 1242 }, { "epoch": 0.3478869297509096, "grad_norm": 0.24740278514232453, "learning_rate": 9.881864607880934e-05, "loss": 0.6051, "step": 1243 }, { "epoch": 0.34816680660509375, "grad_norm": 0.2605790936490167, "learning_rate": 9.881531038503454e-05, "loss": 0.5822, "step": 1244 }, { "epoch": 0.34844668345927793, "grad_norm": 0.2556716841241045, "learning_rate": 9.881197004499114e-05, "loss": 0.5855, "step": 1245 }, { "epoch": 0.34872656031346205, "grad_norm": 0.2514842683099283, "learning_rate": 9.880862505899714e-05, "loss": 0.5886, "step": 1246 }, { "epoch": 0.34900643716764623, "grad_norm": 0.24987138476583054, "learning_rate": 9.880527542737085e-05, "loss": 0.5709, "step": 1247 }, { "epoch": 0.3492863140218304, "grad_norm": 0.2574864063751009, "learning_rate": 9.880192115043115e-05, "loss": 0.6387, "step": 1248 }, { "epoch": 0.34956619087601454, "grad_norm": 0.24946089622378548, "learning_rate": 9.879856222849728e-05, "loss": 0.5708, "step": 1249 }, { "epoch": 0.3498460677301987, "grad_norm": 0.24532522184225758, "learning_rate": 9.879519866188896e-05, "loss": 0.5662, "step": 1250 }, { "epoch": 0.3501259445843829, "grad_norm": 0.2465495625404283, "learning_rate": 9.879183045092628e-05, "loss": 0.5767, "step": 1251 }, { "epoch": 0.350405821438567, "grad_norm": 0.25297900433606335, "learning_rate": 9.87884575959299e-05, "loss": 0.5788, "step": 1252 }, { "epoch": 0.3506856982927512, "grad_norm": 0.2462356056449759, "learning_rate": 9.87850800972208e-05, "loss": 0.5928, "step": 1253 }, { "epoch": 0.3509655751469353, "grad_norm": 0.24973311591582598, "learning_rate": 9.878169795512049e-05, "loss": 0.5776, "step": 1254 }, { "epoch": 0.3512454520011195, "grad_norm": 0.23920281263452609, "learning_rate": 9.877831116995084e-05, "loss": 0.5573, "step": 1255 }, { "epoch": 0.3515253288553037, "grad_norm": 0.25725393191303575, "learning_rate": 9.877491974203426e-05, "loss": 0.5836, "step": 1256 }, { "epoch": 0.3518052057094878, "grad_norm": 0.25232408274091755, "learning_rate": 9.877152367169349e-05, "loss": 0.5762, "step": 1257 }, { "epoch": 0.352085082563672, "grad_norm": 0.24134526587445634, "learning_rate": 9.87681229592518e-05, "loss": 0.5497, "step": 1258 }, { "epoch": 0.3523649594178561, "grad_norm": 0.23982988589107124, "learning_rate": 9.876471760503288e-05, "loss": 0.554, "step": 1259 }, { "epoch": 0.3526448362720403, "grad_norm": 0.23890864878221263, "learning_rate": 9.876130760936085e-05, "loss": 0.5627, "step": 1260 }, { "epoch": 0.3529247131262245, "grad_norm": 0.2497327225548309, "learning_rate": 9.875789297256027e-05, "loss": 0.5605, "step": 1261 }, { "epoch": 0.3532045899804086, "grad_norm": 0.25367700851397396, "learning_rate": 9.875447369495613e-05, "loss": 0.5674, "step": 1262 }, { "epoch": 0.3534844668345928, "grad_norm": 0.25314163600853484, "learning_rate": 9.875104977687391e-05, "loss": 0.5738, "step": 1263 }, { "epoch": 0.35376434368877696, "grad_norm": 0.2659056574428081, "learning_rate": 9.874762121863947e-05, "loss": 0.6137, "step": 1264 }, { "epoch": 0.3540442205429611, "grad_norm": 0.24822887643168823, "learning_rate": 9.874418802057917e-05, "loss": 0.551, "step": 1265 }, { "epoch": 0.35432409739714527, "grad_norm": 0.25592040095777707, "learning_rate": 9.874075018301976e-05, "loss": 0.5654, "step": 1266 }, { "epoch": 0.3546039742513294, "grad_norm": 0.24619356115307459, "learning_rate": 9.873730770628847e-05, "loss": 0.5854, "step": 1267 }, { "epoch": 0.35488385110551357, "grad_norm": 0.2556041539363497, "learning_rate": 9.873386059071294e-05, "loss": 0.602, "step": 1268 }, { "epoch": 0.35516372795969775, "grad_norm": 0.24159093702159906, "learning_rate": 9.873040883662131e-05, "loss": 0.591, "step": 1269 }, { "epoch": 0.3554436048138819, "grad_norm": 0.2557836704157818, "learning_rate": 9.872695244434207e-05, "loss": 0.6033, "step": 1270 }, { "epoch": 0.35572348166806605, "grad_norm": 0.26359043655843445, "learning_rate": 9.872349141420423e-05, "loss": 0.6023, "step": 1271 }, { "epoch": 0.35600335852225024, "grad_norm": 0.25715716587656756, "learning_rate": 9.872002574653722e-05, "loss": 0.5904, "step": 1272 }, { "epoch": 0.35628323537643436, "grad_norm": 0.2674975391039773, "learning_rate": 9.871655544167087e-05, "loss": 0.5724, "step": 1273 }, { "epoch": 0.35656311223061854, "grad_norm": 0.23916191244118243, "learning_rate": 9.871308049993551e-05, "loss": 0.5614, "step": 1274 }, { "epoch": 0.35684298908480266, "grad_norm": 0.2546830151355151, "learning_rate": 9.870960092166188e-05, "loss": 0.5699, "step": 1275 }, { "epoch": 0.35712286593898684, "grad_norm": 0.25398189363013207, "learning_rate": 9.870611670718118e-05, "loss": 0.5658, "step": 1276 }, { "epoch": 0.357402742793171, "grad_norm": 0.2399760785879194, "learning_rate": 9.870262785682502e-05, "loss": 0.606, "step": 1277 }, { "epoch": 0.35768261964735515, "grad_norm": 0.24778415767989007, "learning_rate": 9.869913437092549e-05, "loss": 0.5952, "step": 1278 }, { "epoch": 0.35796249650153933, "grad_norm": 0.24623543250412283, "learning_rate": 9.869563624981507e-05, "loss": 0.5615, "step": 1279 }, { "epoch": 0.3582423733557235, "grad_norm": 0.25371512789372525, "learning_rate": 9.869213349382676e-05, "loss": 0.5877, "step": 1280 }, { "epoch": 0.35852225020990763, "grad_norm": 0.24454272947446215, "learning_rate": 9.868862610329391e-05, "loss": 0.5897, "step": 1281 }, { "epoch": 0.3588021270640918, "grad_norm": 0.26926391171765396, "learning_rate": 9.868511407855039e-05, "loss": 0.5979, "step": 1282 }, { "epoch": 0.35908200391827594, "grad_norm": 0.25281226346086155, "learning_rate": 9.868159741993046e-05, "loss": 0.5683, "step": 1283 }, { "epoch": 0.3593618807724601, "grad_norm": 0.25819032163170924, "learning_rate": 9.867807612776884e-05, "loss": 0.5667, "step": 1284 }, { "epoch": 0.3596417576266443, "grad_norm": 0.2574286159352014, "learning_rate": 9.867455020240069e-05, "loss": 0.5614, "step": 1285 }, { "epoch": 0.3599216344808284, "grad_norm": 0.24387447608482338, "learning_rate": 9.867101964416159e-05, "loss": 0.5875, "step": 1286 }, { "epoch": 0.3602015113350126, "grad_norm": 0.2569257456738481, "learning_rate": 9.866748445338761e-05, "loss": 0.589, "step": 1287 }, { "epoch": 0.3604813881891967, "grad_norm": 0.2363017877964043, "learning_rate": 9.866394463041522e-05, "loss": 0.5765, "step": 1288 }, { "epoch": 0.3607612650433809, "grad_norm": 0.26516592444385784, "learning_rate": 9.866040017558135e-05, "loss": 0.5848, "step": 1289 }, { "epoch": 0.3610411418975651, "grad_norm": 0.2551001849367737, "learning_rate": 9.865685108922333e-05, "loss": 0.584, "step": 1290 }, { "epoch": 0.3613210187517492, "grad_norm": 0.2794469256119939, "learning_rate": 9.8653297371679e-05, "loss": 0.599, "step": 1291 }, { "epoch": 0.3616008956059334, "grad_norm": 0.2703219263382521, "learning_rate": 9.864973902328661e-05, "loss": 0.5768, "step": 1292 }, { "epoch": 0.3618807724601176, "grad_norm": 0.24803881599949243, "learning_rate": 9.864617604438482e-05, "loss": 0.5591, "step": 1293 }, { "epoch": 0.3621606493143017, "grad_norm": 0.2477846881637209, "learning_rate": 9.864260843531276e-05, "loss": 0.5647, "step": 1294 }, { "epoch": 0.3624405261684859, "grad_norm": 0.26701383956206354, "learning_rate": 9.863903619641002e-05, "loss": 0.5692, "step": 1295 }, { "epoch": 0.36272040302267, "grad_norm": 0.25059678055974044, "learning_rate": 9.863545932801656e-05, "loss": 0.5871, "step": 1296 }, { "epoch": 0.3630002798768542, "grad_norm": 0.2456024730485762, "learning_rate": 9.863187783047289e-05, "loss": 0.5649, "step": 1297 }, { "epoch": 0.36328015673103836, "grad_norm": 0.23771467036633712, "learning_rate": 9.862829170411985e-05, "loss": 0.5785, "step": 1298 }, { "epoch": 0.3635600335852225, "grad_norm": 0.25459202670443437, "learning_rate": 9.862470094929879e-05, "loss": 0.5877, "step": 1299 }, { "epoch": 0.36383991043940667, "grad_norm": 0.24400139112438918, "learning_rate": 9.862110556635148e-05, "loss": 0.5769, "step": 1300 }, { "epoch": 0.36411978729359085, "grad_norm": 0.25462037753744304, "learning_rate": 9.861750555562012e-05, "loss": 0.5617, "step": 1301 }, { "epoch": 0.36439966414777497, "grad_norm": 0.2437847338961068, "learning_rate": 9.861390091744737e-05, "loss": 0.5592, "step": 1302 }, { "epoch": 0.36467954100195915, "grad_norm": 0.25360373464902747, "learning_rate": 9.861029165217633e-05, "loss": 0.5719, "step": 1303 }, { "epoch": 0.3649594178561433, "grad_norm": 0.2522267922475495, "learning_rate": 9.860667776015052e-05, "loss": 0.6024, "step": 1304 }, { "epoch": 0.36523929471032746, "grad_norm": 0.24090443131476025, "learning_rate": 9.860305924171392e-05, "loss": 0.5789, "step": 1305 }, { "epoch": 0.36551917156451164, "grad_norm": 0.24234901539669954, "learning_rate": 9.859943609721092e-05, "loss": 0.5498, "step": 1306 }, { "epoch": 0.36579904841869576, "grad_norm": 0.2601574128209228, "learning_rate": 9.85958083269864e-05, "loss": 0.5848, "step": 1307 }, { "epoch": 0.36607892527287994, "grad_norm": 0.24408752147616794, "learning_rate": 9.859217593138564e-05, "loss": 0.5473, "step": 1308 }, { "epoch": 0.36635880212706406, "grad_norm": 0.25108140346626096, "learning_rate": 9.858853891075437e-05, "loss": 0.5678, "step": 1309 }, { "epoch": 0.36663867898124824, "grad_norm": 0.26693222797979294, "learning_rate": 9.858489726543878e-05, "loss": 0.5867, "step": 1310 }, { "epoch": 0.3669185558354324, "grad_norm": 0.252675663643795, "learning_rate": 9.858125099578547e-05, "loss": 0.61, "step": 1311 }, { "epoch": 0.36719843268961655, "grad_norm": 0.25287964475016456, "learning_rate": 9.85776001021415e-05, "loss": 0.5606, "step": 1312 }, { "epoch": 0.36747830954380073, "grad_norm": 0.23667971209316457, "learning_rate": 9.857394458485436e-05, "loss": 0.5495, "step": 1313 }, { "epoch": 0.3677581863979849, "grad_norm": 0.2577794731055881, "learning_rate": 9.8570284444272e-05, "loss": 0.5967, "step": 1314 }, { "epoch": 0.36803806325216903, "grad_norm": 0.25147450268312077, "learning_rate": 9.856661968074277e-05, "loss": 0.5661, "step": 1315 }, { "epoch": 0.3683179401063532, "grad_norm": 0.2499365765595922, "learning_rate": 9.856295029461548e-05, "loss": 0.5827, "step": 1316 }, { "epoch": 0.36859781696053734, "grad_norm": 0.2654215393452005, "learning_rate": 9.855927628623943e-05, "loss": 0.592, "step": 1317 }, { "epoch": 0.3688776938147215, "grad_norm": 0.25165760068861626, "learning_rate": 9.855559765596426e-05, "loss": 0.5634, "step": 1318 }, { "epoch": 0.3691575706689057, "grad_norm": 0.2343431409837341, "learning_rate": 9.855191440414013e-05, "loss": 0.5605, "step": 1319 }, { "epoch": 0.3694374475230898, "grad_norm": 0.23468194509285104, "learning_rate": 9.854822653111761e-05, "loss": 0.5734, "step": 1320 }, { "epoch": 0.369717324377274, "grad_norm": 0.2528275233327272, "learning_rate": 9.854453403724773e-05, "loss": 0.5879, "step": 1321 }, { "epoch": 0.3699972012314582, "grad_norm": 0.24929497405022225, "learning_rate": 9.854083692288192e-05, "loss": 0.5964, "step": 1322 }, { "epoch": 0.3702770780856423, "grad_norm": 0.24606277157925519, "learning_rate": 9.853713518837209e-05, "loss": 0.538, "step": 1323 }, { "epoch": 0.3705569549398265, "grad_norm": 0.24479708249049445, "learning_rate": 9.853342883407055e-05, "loss": 0.5761, "step": 1324 }, { "epoch": 0.3708368317940106, "grad_norm": 0.25155137261806565, "learning_rate": 9.852971786033009e-05, "loss": 0.5751, "step": 1325 }, { "epoch": 0.3711167086481948, "grad_norm": 0.24372667494850433, "learning_rate": 9.852600226750393e-05, "loss": 0.5766, "step": 1326 }, { "epoch": 0.371396585502379, "grad_norm": 0.24230425494337146, "learning_rate": 9.852228205594571e-05, "loss": 0.5795, "step": 1327 }, { "epoch": 0.3716764623565631, "grad_norm": 0.237209943944504, "learning_rate": 9.851855722600952e-05, "loss": 0.5977, "step": 1328 }, { "epoch": 0.3719563392107473, "grad_norm": 0.26817736687336596, "learning_rate": 9.85148277780499e-05, "loss": 0.5706, "step": 1329 }, { "epoch": 0.37223621606493146, "grad_norm": 0.2496500975473135, "learning_rate": 9.851109371242182e-05, "loss": 0.5852, "step": 1330 }, { "epoch": 0.3725160929191156, "grad_norm": 0.25342103629148044, "learning_rate": 9.850735502948069e-05, "loss": 0.5772, "step": 1331 }, { "epoch": 0.37279596977329976, "grad_norm": 0.24207736564553414, "learning_rate": 9.850361172958234e-05, "loss": 0.5803, "step": 1332 }, { "epoch": 0.3730758466274839, "grad_norm": 0.24826864519216052, "learning_rate": 9.84998638130831e-05, "loss": 0.5626, "step": 1333 }, { "epoch": 0.37335572348166807, "grad_norm": 0.2549209969409992, "learning_rate": 9.849611128033967e-05, "loss": 0.5617, "step": 1334 }, { "epoch": 0.37363560033585225, "grad_norm": 0.2548161261358772, "learning_rate": 9.84923541317092e-05, "loss": 0.5645, "step": 1335 }, { "epoch": 0.37391547719003637, "grad_norm": 0.25086816394183653, "learning_rate": 9.848859236754935e-05, "loss": 0.5668, "step": 1336 }, { "epoch": 0.37419535404422055, "grad_norm": 0.2524264803100986, "learning_rate": 9.848482598821813e-05, "loss": 0.6069, "step": 1337 }, { "epoch": 0.3744752308984047, "grad_norm": 0.25162801199296647, "learning_rate": 9.848105499407403e-05, "loss": 0.5633, "step": 1338 }, { "epoch": 0.37475510775258886, "grad_norm": 0.2527560152710071, "learning_rate": 9.847727938547599e-05, "loss": 0.5695, "step": 1339 }, { "epoch": 0.37503498460677304, "grad_norm": 0.25737762181379004, "learning_rate": 9.847349916278335e-05, "loss": 0.5845, "step": 1340 }, { "epoch": 0.37531486146095716, "grad_norm": 0.23523597902347113, "learning_rate": 9.846971432635593e-05, "loss": 0.5688, "step": 1341 }, { "epoch": 0.37559473831514134, "grad_norm": 0.24654863143444625, "learning_rate": 9.846592487655398e-05, "loss": 0.6078, "step": 1342 }, { "epoch": 0.3758746151693255, "grad_norm": 0.25188389597355215, "learning_rate": 9.846213081373816e-05, "loss": 0.583, "step": 1343 }, { "epoch": 0.37615449202350965, "grad_norm": 0.24664471800837676, "learning_rate": 9.845833213826962e-05, "loss": 0.5919, "step": 1344 }, { "epoch": 0.3764343688776938, "grad_norm": 0.255510689169472, "learning_rate": 9.84545288505099e-05, "loss": 0.5674, "step": 1345 }, { "epoch": 0.37671424573187795, "grad_norm": 0.23922536995893914, "learning_rate": 9.8450720950821e-05, "loss": 0.5674, "step": 1346 }, { "epoch": 0.37699412258606213, "grad_norm": 0.23943968149671538, "learning_rate": 9.844690843956534e-05, "loss": 0.5587, "step": 1347 }, { "epoch": 0.3772739994402463, "grad_norm": 0.2404224740926476, "learning_rate": 9.844309131710585e-05, "loss": 0.5493, "step": 1348 }, { "epoch": 0.37755387629443043, "grad_norm": 0.2461865431891018, "learning_rate": 9.843926958380581e-05, "loss": 0.563, "step": 1349 }, { "epoch": 0.3778337531486146, "grad_norm": 0.24682489667591118, "learning_rate": 9.843544324002895e-05, "loss": 0.5964, "step": 1350 }, { "epoch": 0.3781136300027988, "grad_norm": 0.2534555642684441, "learning_rate": 9.843161228613951e-05, "loss": 0.5945, "step": 1351 }, { "epoch": 0.3783935068569829, "grad_norm": 0.24827491742347144, "learning_rate": 9.842777672250212e-05, "loss": 0.5593, "step": 1352 }, { "epoch": 0.3786733837111671, "grad_norm": 0.2434939810912754, "learning_rate": 9.842393654948181e-05, "loss": 0.5761, "step": 1353 }, { "epoch": 0.3789532605653512, "grad_norm": 0.25475309432094806, "learning_rate": 9.842009176744413e-05, "loss": 0.5862, "step": 1354 }, { "epoch": 0.3792331374195354, "grad_norm": 0.24077707514403346, "learning_rate": 9.841624237675499e-05, "loss": 0.5659, "step": 1355 }, { "epoch": 0.3795130142737196, "grad_norm": 0.2438247277705193, "learning_rate": 9.841238837778084e-05, "loss": 0.5434, "step": 1356 }, { "epoch": 0.3797928911279037, "grad_norm": 0.24783230207209245, "learning_rate": 9.840852977088844e-05, "loss": 0.572, "step": 1357 }, { "epoch": 0.3800727679820879, "grad_norm": 0.2542560008900286, "learning_rate": 9.840466655644509e-05, "loss": 0.5706, "step": 1358 }, { "epoch": 0.380352644836272, "grad_norm": 0.2587275663743225, "learning_rate": 9.840079873481847e-05, "loss": 0.5581, "step": 1359 }, { "epoch": 0.3806325216904562, "grad_norm": 0.24411845202564678, "learning_rate": 9.839692630637676e-05, "loss": 0.5738, "step": 1360 }, { "epoch": 0.3809123985446404, "grad_norm": 0.2657088118008263, "learning_rate": 9.839304927148849e-05, "loss": 0.5952, "step": 1361 }, { "epoch": 0.3811922753988245, "grad_norm": 0.24120355667171373, "learning_rate": 9.838916763052273e-05, "loss": 0.5716, "step": 1362 }, { "epoch": 0.3814721522530087, "grad_norm": 0.25988001922025383, "learning_rate": 9.838528138384888e-05, "loss": 0.583, "step": 1363 }, { "epoch": 0.38175202910719286, "grad_norm": 0.251565534572992, "learning_rate": 9.83813905318369e-05, "loss": 0.5501, "step": 1364 }, { "epoch": 0.382031905961377, "grad_norm": 0.2526421398627077, "learning_rate": 9.837749507485706e-05, "loss": 0.5936, "step": 1365 }, { "epoch": 0.38231178281556116, "grad_norm": 0.24433714214079807, "learning_rate": 9.837359501328017e-05, "loss": 0.5472, "step": 1366 }, { "epoch": 0.3825916596697453, "grad_norm": 0.2536070233357884, "learning_rate": 9.836969034747743e-05, "loss": 0.5869, "step": 1367 }, { "epoch": 0.38287153652392947, "grad_norm": 0.2380215963557464, "learning_rate": 9.836578107782049e-05, "loss": 0.5823, "step": 1368 }, { "epoch": 0.38315141337811365, "grad_norm": 0.24701908840173167, "learning_rate": 9.836186720468144e-05, "loss": 0.6023, "step": 1369 }, { "epoch": 0.3834312902322978, "grad_norm": 0.25839264139407375, "learning_rate": 9.83579487284328e-05, "loss": 0.5816, "step": 1370 }, { "epoch": 0.38371116708648195, "grad_norm": 0.24147497736430357, "learning_rate": 9.835402564944752e-05, "loss": 0.5957, "step": 1371 }, { "epoch": 0.38399104394066613, "grad_norm": 0.2479587500985481, "learning_rate": 9.835009796809902e-05, "loss": 0.5601, "step": 1372 }, { "epoch": 0.38427092079485026, "grad_norm": 0.24172569401654465, "learning_rate": 9.834616568476114e-05, "loss": 0.5612, "step": 1373 }, { "epoch": 0.38455079764903444, "grad_norm": 0.24368403167640812, "learning_rate": 9.834222879980815e-05, "loss": 0.5533, "step": 1374 }, { "epoch": 0.38483067450321856, "grad_norm": 0.2411430338474899, "learning_rate": 9.833828731361476e-05, "loss": 0.5579, "step": 1375 }, { "epoch": 0.38511055135740274, "grad_norm": 0.24266943199089647, "learning_rate": 9.833434122655612e-05, "loss": 0.5784, "step": 1376 }, { "epoch": 0.3853904282115869, "grad_norm": 0.23968345541464048, "learning_rate": 9.833039053900783e-05, "loss": 0.5715, "step": 1377 }, { "epoch": 0.38567030506577105, "grad_norm": 0.25497451001939403, "learning_rate": 9.832643525134593e-05, "loss": 0.5743, "step": 1378 }, { "epoch": 0.3859501819199552, "grad_norm": 0.23794379443832098, "learning_rate": 9.832247536394687e-05, "loss": 0.5688, "step": 1379 }, { "epoch": 0.38623005877413935, "grad_norm": 0.2407260271800774, "learning_rate": 9.831851087718755e-05, "loss": 0.5494, "step": 1380 }, { "epoch": 0.38650993562832353, "grad_norm": 0.23980211745135735, "learning_rate": 9.831454179144532e-05, "loss": 0.5525, "step": 1381 }, { "epoch": 0.3867898124825077, "grad_norm": 0.2449405522761386, "learning_rate": 9.831056810709796e-05, "loss": 0.5892, "step": 1382 }, { "epoch": 0.38706968933669184, "grad_norm": 0.24433135015995563, "learning_rate": 9.83065898245237e-05, "loss": 0.5563, "step": 1383 }, { "epoch": 0.387349566190876, "grad_norm": 0.24674073855883902, "learning_rate": 9.830260694410116e-05, "loss": 0.5523, "step": 1384 }, { "epoch": 0.3876294430450602, "grad_norm": 0.2407361444313013, "learning_rate": 9.829861946620946e-05, "loss": 0.5786, "step": 1385 }, { "epoch": 0.3879093198992443, "grad_norm": 0.24374340567894476, "learning_rate": 9.829462739122814e-05, "loss": 0.5794, "step": 1386 }, { "epoch": 0.3881891967534285, "grad_norm": 0.23433303313796344, "learning_rate": 9.829063071953714e-05, "loss": 0.5705, "step": 1387 }, { "epoch": 0.3884690736076126, "grad_norm": 0.24789014314786506, "learning_rate": 9.828662945151688e-05, "loss": 0.5572, "step": 1388 }, { "epoch": 0.3887489504617968, "grad_norm": 0.25171929518719044, "learning_rate": 9.828262358754821e-05, "loss": 0.5976, "step": 1389 }, { "epoch": 0.389028827315981, "grad_norm": 0.24431104445326382, "learning_rate": 9.82786131280124e-05, "loss": 0.5732, "step": 1390 }, { "epoch": 0.3893087041701651, "grad_norm": 0.2479963265314493, "learning_rate": 9.827459807329116e-05, "loss": 0.5941, "step": 1391 }, { "epoch": 0.3895885810243493, "grad_norm": 0.2342558038183622, "learning_rate": 9.827057842376665e-05, "loss": 0.5763, "step": 1392 }, { "epoch": 0.38986845787853347, "grad_norm": 0.24997810891787892, "learning_rate": 9.82665541798215e-05, "loss": 0.5798, "step": 1393 }, { "epoch": 0.3901483347327176, "grad_norm": 0.2588555058785034, "learning_rate": 9.826252534183869e-05, "loss": 0.5922, "step": 1394 }, { "epoch": 0.3904282115869018, "grad_norm": 0.2566940966721624, "learning_rate": 9.825849191020169e-05, "loss": 0.5731, "step": 1395 }, { "epoch": 0.3907080884410859, "grad_norm": 0.24224772705378775, "learning_rate": 9.825445388529443e-05, "loss": 0.5753, "step": 1396 }, { "epoch": 0.3909879652952701, "grad_norm": 0.23802043040677046, "learning_rate": 9.825041126750123e-05, "loss": 0.5689, "step": 1397 }, { "epoch": 0.39126784214945426, "grad_norm": 0.24010833324273848, "learning_rate": 9.82463640572069e-05, "loss": 0.5932, "step": 1398 }, { "epoch": 0.3915477190036384, "grad_norm": 0.23942869227945068, "learning_rate": 9.82423122547966e-05, "loss": 0.578, "step": 1399 }, { "epoch": 0.39182759585782256, "grad_norm": 0.23922324829498465, "learning_rate": 9.823825586065604e-05, "loss": 0.567, "step": 1400 }, { "epoch": 0.39210747271200674, "grad_norm": 0.24430202942016468, "learning_rate": 9.823419487517129e-05, "loss": 0.5757, "step": 1401 }, { "epoch": 0.39238734956619087, "grad_norm": 0.2518377452208979, "learning_rate": 9.823012929872888e-05, "loss": 0.572, "step": 1402 }, { "epoch": 0.39266722642037505, "grad_norm": 0.22723461063326472, "learning_rate": 9.822605913171576e-05, "loss": 0.5793, "step": 1403 }, { "epoch": 0.3929471032745592, "grad_norm": 0.23563441315632314, "learning_rate": 9.822198437451932e-05, "loss": 0.5593, "step": 1404 }, { "epoch": 0.39322698012874335, "grad_norm": 0.24457109666750093, "learning_rate": 9.821790502752745e-05, "loss": 0.5923, "step": 1405 }, { "epoch": 0.39350685698292753, "grad_norm": 0.24189164632769405, "learning_rate": 9.821382109112836e-05, "loss": 0.5565, "step": 1406 }, { "epoch": 0.39378673383711166, "grad_norm": 0.24660376801379894, "learning_rate": 9.82097325657108e-05, "loss": 0.569, "step": 1407 }, { "epoch": 0.39406661069129584, "grad_norm": 0.24932022315992264, "learning_rate": 9.820563945166393e-05, "loss": 0.5935, "step": 1408 }, { "epoch": 0.39434648754547996, "grad_norm": 0.2371698308262719, "learning_rate": 9.82015417493773e-05, "loss": 0.5674, "step": 1409 }, { "epoch": 0.39462636439966414, "grad_norm": 0.24530065971135384, "learning_rate": 9.819743945924095e-05, "loss": 0.6034, "step": 1410 }, { "epoch": 0.3949062412538483, "grad_norm": 0.25000585371231543, "learning_rate": 9.819333258164534e-05, "loss": 0.5706, "step": 1411 }, { "epoch": 0.39518611810803245, "grad_norm": 0.25145226169082646, "learning_rate": 9.818922111698135e-05, "loss": 0.61, "step": 1412 }, { "epoch": 0.3954659949622166, "grad_norm": 0.24653399767282239, "learning_rate": 9.818510506564032e-05, "loss": 0.5415, "step": 1413 }, { "epoch": 0.3957458718164008, "grad_norm": 0.24944047338705888, "learning_rate": 9.818098442801403e-05, "loss": 0.5844, "step": 1414 }, { "epoch": 0.39602574867058493, "grad_norm": 0.23122977585694265, "learning_rate": 9.817685920449465e-05, "loss": 0.5971, "step": 1415 }, { "epoch": 0.3963056255247691, "grad_norm": 0.24685199164459454, "learning_rate": 9.817272939547487e-05, "loss": 0.5672, "step": 1416 }, { "epoch": 0.39658550237895324, "grad_norm": 0.24948841772290037, "learning_rate": 9.816859500134772e-05, "loss": 0.5692, "step": 1417 }, { "epoch": 0.3968653792331374, "grad_norm": 0.24779947022891766, "learning_rate": 9.816445602250676e-05, "loss": 0.5672, "step": 1418 }, { "epoch": 0.3971452560873216, "grad_norm": 0.25052807070496963, "learning_rate": 9.816031245934592e-05, "loss": 0.5853, "step": 1419 }, { "epoch": 0.3974251329415057, "grad_norm": 0.25698634743083887, "learning_rate": 9.815616431225956e-05, "loss": 0.5765, "step": 1420 }, { "epoch": 0.3977050097956899, "grad_norm": 0.2598518341256616, "learning_rate": 9.815201158164254e-05, "loss": 0.5561, "step": 1421 }, { "epoch": 0.3979848866498741, "grad_norm": 0.2527933730835377, "learning_rate": 9.81478542678901e-05, "loss": 0.5832, "step": 1422 }, { "epoch": 0.3982647635040582, "grad_norm": 0.2330086346185573, "learning_rate": 9.814369237139795e-05, "loss": 0.5699, "step": 1423 }, { "epoch": 0.3985446403582424, "grad_norm": 0.2416250707522738, "learning_rate": 9.813952589256221e-05, "loss": 0.5535, "step": 1424 }, { "epoch": 0.3988245172124265, "grad_norm": 0.25028962040266256, "learning_rate": 9.813535483177945e-05, "loss": 0.5727, "step": 1425 }, { "epoch": 0.3991043940666107, "grad_norm": 0.2580212153823351, "learning_rate": 9.813117918944667e-05, "loss": 0.5769, "step": 1426 }, { "epoch": 0.39938427092079487, "grad_norm": 0.2395087042527386, "learning_rate": 9.812699896596132e-05, "loss": 0.5715, "step": 1427 }, { "epoch": 0.399664147774979, "grad_norm": 0.2370166418531977, "learning_rate": 9.812281416172127e-05, "loss": 0.5871, "step": 1428 }, { "epoch": 0.3999440246291632, "grad_norm": 0.2544828790207878, "learning_rate": 9.811862477712484e-05, "loss": 0.5876, "step": 1429 }, { "epoch": 0.4002239014833473, "grad_norm": 0.24156333772037894, "learning_rate": 9.811443081257075e-05, "loss": 0.544, "step": 1430 }, { "epoch": 0.4005037783375315, "grad_norm": 0.2534180250079355, "learning_rate": 9.811023226845822e-05, "loss": 0.5503, "step": 1431 }, { "epoch": 0.40078365519171566, "grad_norm": 0.2396171926593597, "learning_rate": 9.810602914518685e-05, "loss": 0.5644, "step": 1432 }, { "epoch": 0.4010635320458998, "grad_norm": 0.2580740191270764, "learning_rate": 9.810182144315669e-05, "loss": 0.6111, "step": 1433 }, { "epoch": 0.40134340890008396, "grad_norm": 0.25392432270498255, "learning_rate": 9.809760916276826e-05, "loss": 0.5737, "step": 1434 }, { "epoch": 0.40162328575426814, "grad_norm": 0.25951610318742846, "learning_rate": 9.809339230442247e-05, "loss": 0.5735, "step": 1435 }, { "epoch": 0.40190316260845227, "grad_norm": 0.2505532584485498, "learning_rate": 9.808917086852067e-05, "loss": 0.5649, "step": 1436 }, { "epoch": 0.40218303946263645, "grad_norm": 0.24601010187993994, "learning_rate": 9.808494485546467e-05, "loss": 0.5756, "step": 1437 }, { "epoch": 0.4024629163168206, "grad_norm": 0.26329883872874177, "learning_rate": 9.808071426565671e-05, "loss": 0.5954, "step": 1438 }, { "epoch": 0.40274279317100475, "grad_norm": 0.2425089899801096, "learning_rate": 9.807647909949944e-05, "loss": 0.5687, "step": 1439 }, { "epoch": 0.40302267002518893, "grad_norm": 0.2554382173259353, "learning_rate": 9.807223935739598e-05, "loss": 0.5923, "step": 1440 }, { "epoch": 0.40330254687937306, "grad_norm": 0.23716757627054405, "learning_rate": 9.806799503974988e-05, "loss": 0.5502, "step": 1441 }, { "epoch": 0.40358242373355724, "grad_norm": 0.22925249816015852, "learning_rate": 9.806374614696512e-05, "loss": 0.5585, "step": 1442 }, { "epoch": 0.4038623005877414, "grad_norm": 0.2402121585867316, "learning_rate": 9.805949267944609e-05, "loss": 0.5753, "step": 1443 }, { "epoch": 0.40414217744192554, "grad_norm": 0.24862370331193331, "learning_rate": 9.805523463759764e-05, "loss": 0.5682, "step": 1444 }, { "epoch": 0.4044220542961097, "grad_norm": 0.26093849074195435, "learning_rate": 9.805097202182506e-05, "loss": 0.5719, "step": 1445 }, { "epoch": 0.40470193115029385, "grad_norm": 0.2450551233086624, "learning_rate": 9.804670483253407e-05, "loss": 0.5622, "step": 1446 }, { "epoch": 0.404981808004478, "grad_norm": 0.2462324177887218, "learning_rate": 9.804243307013083e-05, "loss": 0.5678, "step": 1447 }, { "epoch": 0.4052616848586622, "grad_norm": 0.24844948626045485, "learning_rate": 9.80381567350219e-05, "loss": 0.571, "step": 1448 }, { "epoch": 0.40554156171284633, "grad_norm": 0.2618704536066427, "learning_rate": 9.803387582761435e-05, "loss": 0.5695, "step": 1449 }, { "epoch": 0.4058214385670305, "grad_norm": 0.2437642348316598, "learning_rate": 9.80295903483156e-05, "loss": 0.5867, "step": 1450 }, { "epoch": 0.4061013154212147, "grad_norm": 0.24824252189949325, "learning_rate": 9.802530029753354e-05, "loss": 0.5529, "step": 1451 }, { "epoch": 0.4063811922753988, "grad_norm": 0.2517819351187458, "learning_rate": 9.802100567567654e-05, "loss": 0.5894, "step": 1452 }, { "epoch": 0.406661069129583, "grad_norm": 0.25033833944703193, "learning_rate": 9.801670648315333e-05, "loss": 0.585, "step": 1453 }, { "epoch": 0.4069409459837671, "grad_norm": 0.23891449311006835, "learning_rate": 9.801240272037313e-05, "loss": 0.5842, "step": 1454 }, { "epoch": 0.4072208228379513, "grad_norm": 0.26300638380055125, "learning_rate": 9.800809438774556e-05, "loss": 0.5767, "step": 1455 }, { "epoch": 0.4075006996921355, "grad_norm": 0.2476856874567442, "learning_rate": 9.80037814856807e-05, "loss": 0.5632, "step": 1456 }, { "epoch": 0.4077805765463196, "grad_norm": 0.2456684512036152, "learning_rate": 9.799946401458904e-05, "loss": 0.5783, "step": 1457 }, { "epoch": 0.4080604534005038, "grad_norm": 0.2442164088958097, "learning_rate": 9.799514197488153e-05, "loss": 0.5891, "step": 1458 }, { "epoch": 0.4083403302546879, "grad_norm": 0.24458087286921504, "learning_rate": 9.799081536696954e-05, "loss": 0.5514, "step": 1459 }, { "epoch": 0.4086202071088721, "grad_norm": 0.24478380799919897, "learning_rate": 9.79864841912649e-05, "loss": 0.5592, "step": 1460 }, { "epoch": 0.40890008396305627, "grad_norm": 0.26026549588069137, "learning_rate": 9.798214844817983e-05, "loss": 0.5553, "step": 1461 }, { "epoch": 0.4091799608172404, "grad_norm": 0.24356732058277175, "learning_rate": 9.7977808138127e-05, "loss": 0.5615, "step": 1462 }, { "epoch": 0.4094598376714246, "grad_norm": 0.24658345703383888, "learning_rate": 9.797346326151955e-05, "loss": 0.5853, "step": 1463 }, { "epoch": 0.40973971452560876, "grad_norm": 0.2449348078252221, "learning_rate": 9.7969113818771e-05, "loss": 0.5593, "step": 1464 }, { "epoch": 0.4100195913797929, "grad_norm": 0.24113495521809883, "learning_rate": 9.796475981029536e-05, "loss": 0.5563, "step": 1465 }, { "epoch": 0.41029946823397706, "grad_norm": 0.25061954422907295, "learning_rate": 9.796040123650702e-05, "loss": 0.5724, "step": 1466 }, { "epoch": 0.4105793450881612, "grad_norm": 0.2495813782898103, "learning_rate": 9.795603809782086e-05, "loss": 0.6059, "step": 1467 }, { "epoch": 0.41085922194234537, "grad_norm": 0.2463598012700213, "learning_rate": 9.795167039465216e-05, "loss": 0.5734, "step": 1468 }, { "epoch": 0.41113909879652955, "grad_norm": 0.2378664592520467, "learning_rate": 9.794729812741661e-05, "loss": 0.5719, "step": 1469 }, { "epoch": 0.41141897565071367, "grad_norm": 0.2533971400126434, "learning_rate": 9.79429212965304e-05, "loss": 0.5542, "step": 1470 }, { "epoch": 0.41169885250489785, "grad_norm": 0.25207117761398184, "learning_rate": 9.79385399024101e-05, "loss": 0.5379, "step": 1471 }, { "epoch": 0.41197872935908203, "grad_norm": 0.25312302669436926, "learning_rate": 9.793415394547274e-05, "loss": 0.5677, "step": 1472 }, { "epoch": 0.41225860621326615, "grad_norm": 0.25635865763607046, "learning_rate": 9.792976342613577e-05, "loss": 0.5526, "step": 1473 }, { "epoch": 0.41253848306745033, "grad_norm": 0.24001395354032368, "learning_rate": 9.792536834481711e-05, "loss": 0.5882, "step": 1474 }, { "epoch": 0.41281835992163446, "grad_norm": 0.23611612710272417, "learning_rate": 9.792096870193506e-05, "loss": 0.542, "step": 1475 }, { "epoch": 0.41309823677581864, "grad_norm": 0.2426571369583708, "learning_rate": 9.791656449790838e-05, "loss": 0.5707, "step": 1476 }, { "epoch": 0.4133781136300028, "grad_norm": 0.24876165754893334, "learning_rate": 9.791215573315628e-05, "loss": 0.5789, "step": 1477 }, { "epoch": 0.41365799048418694, "grad_norm": 0.25134751542763856, "learning_rate": 9.790774240809837e-05, "loss": 0.5694, "step": 1478 }, { "epoch": 0.4139378673383711, "grad_norm": 0.24209618480153364, "learning_rate": 9.790332452315471e-05, "loss": 0.597, "step": 1479 }, { "epoch": 0.41421774419255525, "grad_norm": 0.24343182478483422, "learning_rate": 9.789890207874584e-05, "loss": 0.5827, "step": 1480 }, { "epoch": 0.41449762104673943, "grad_norm": 0.25417129479575934, "learning_rate": 9.789447507529263e-05, "loss": 0.5697, "step": 1481 }, { "epoch": 0.4147774979009236, "grad_norm": 0.23407711090966488, "learning_rate": 9.78900435132165e-05, "loss": 0.5802, "step": 1482 }, { "epoch": 0.41505737475510773, "grad_norm": 0.24349506605245003, "learning_rate": 9.788560739293921e-05, "loss": 0.5787, "step": 1483 }, { "epoch": 0.4153372516092919, "grad_norm": 0.2439399587049185, "learning_rate": 9.788116671488301e-05, "loss": 0.5823, "step": 1484 }, { "epoch": 0.4156171284634761, "grad_norm": 0.23938444715747859, "learning_rate": 9.787672147947055e-05, "loss": 0.5869, "step": 1485 }, { "epoch": 0.4158970053176602, "grad_norm": 0.23789190270077706, "learning_rate": 9.787227168712496e-05, "loss": 0.5518, "step": 1486 }, { "epoch": 0.4161768821718444, "grad_norm": 0.2511551205740477, "learning_rate": 9.786781733826975e-05, "loss": 0.5698, "step": 1487 }, { "epoch": 0.4164567590260285, "grad_norm": 0.24037779375713417, "learning_rate": 9.786335843332888e-05, "loss": 0.5774, "step": 1488 }, { "epoch": 0.4167366358802127, "grad_norm": 0.2463066275549332, "learning_rate": 9.785889497272677e-05, "loss": 0.5691, "step": 1489 }, { "epoch": 0.4170165127343969, "grad_norm": 0.24876426837688265, "learning_rate": 9.785442695688826e-05, "loss": 0.5734, "step": 1490 }, { "epoch": 0.417296389588581, "grad_norm": 0.2607685450251353, "learning_rate": 9.784995438623861e-05, "loss": 0.5504, "step": 1491 }, { "epoch": 0.4175762664427652, "grad_norm": 0.2560398653950305, "learning_rate": 9.78454772612035e-05, "loss": 0.5578, "step": 1492 }, { "epoch": 0.41785614329694937, "grad_norm": 0.2570477421034459, "learning_rate": 9.784099558220909e-05, "loss": 0.5777, "step": 1493 }, { "epoch": 0.4181360201511335, "grad_norm": 0.24709753942346674, "learning_rate": 9.783650934968196e-05, "loss": 0.5692, "step": 1494 }, { "epoch": 0.41841589700531767, "grad_norm": 0.2418074136805273, "learning_rate": 9.783201856404907e-05, "loss": 0.5849, "step": 1495 }, { "epoch": 0.4186957738595018, "grad_norm": 0.24516351210637674, "learning_rate": 9.782752322573789e-05, "loss": 0.579, "step": 1496 }, { "epoch": 0.418975650713686, "grad_norm": 0.2317373143926596, "learning_rate": 9.782302333517628e-05, "loss": 0.5748, "step": 1497 }, { "epoch": 0.41925552756787016, "grad_norm": 0.23625423054299133, "learning_rate": 9.781851889279255e-05, "loss": 0.5609, "step": 1498 }, { "epoch": 0.4195354044220543, "grad_norm": 0.24961648317309634, "learning_rate": 9.781400989901541e-05, "loss": 0.5644, "step": 1499 }, { "epoch": 0.41981528127623846, "grad_norm": 0.25003382796083357, "learning_rate": 9.780949635427406e-05, "loss": 0.5789, "step": 1500 }, { "epoch": 0.42009515813042264, "grad_norm": 0.23860510714669342, "learning_rate": 9.780497825899807e-05, "loss": 0.5395, "step": 1501 }, { "epoch": 0.42037503498460677, "grad_norm": 0.23884887456756274, "learning_rate": 9.78004556136175e-05, "loss": 0.5553, "step": 1502 }, { "epoch": 0.42065491183879095, "grad_norm": 0.24634040348102276, "learning_rate": 9.779592841856282e-05, "loss": 0.5735, "step": 1503 }, { "epoch": 0.42093478869297507, "grad_norm": 0.235482109411473, "learning_rate": 9.77913966742649e-05, "loss": 0.5867, "step": 1504 }, { "epoch": 0.42121466554715925, "grad_norm": 0.24696965456900874, "learning_rate": 9.77868603811551e-05, "loss": 0.5865, "step": 1505 }, { "epoch": 0.42149454240134343, "grad_norm": 0.25493276759446354, "learning_rate": 9.778231953966519e-05, "loss": 0.5481, "step": 1506 }, { "epoch": 0.42177441925552756, "grad_norm": 0.2622234052682419, "learning_rate": 9.777777415022736e-05, "loss": 0.5786, "step": 1507 }, { "epoch": 0.42205429610971174, "grad_norm": 0.2357278357950739, "learning_rate": 9.777322421327424e-05, "loss": 0.555, "step": 1508 }, { "epoch": 0.42233417296389586, "grad_norm": 0.25881963337215835, "learning_rate": 9.776866972923891e-05, "loss": 0.5609, "step": 1509 }, { "epoch": 0.42261404981808004, "grad_norm": 0.25257102830171657, "learning_rate": 9.776411069855485e-05, "loss": 0.5502, "step": 1510 }, { "epoch": 0.4228939266722642, "grad_norm": 0.24171241739880903, "learning_rate": 9.7759547121656e-05, "loss": 0.5907, "step": 1511 }, { "epoch": 0.42317380352644834, "grad_norm": 0.2390417244023811, "learning_rate": 9.775497899897672e-05, "loss": 0.5541, "step": 1512 }, { "epoch": 0.4234536803806325, "grad_norm": 0.24817087359921536, "learning_rate": 9.77504063309518e-05, "loss": 0.5823, "step": 1513 }, { "epoch": 0.4237335572348167, "grad_norm": 0.29322666574088346, "learning_rate": 9.77458291180165e-05, "loss": 0.5645, "step": 1514 }, { "epoch": 0.42401343408900083, "grad_norm": 0.23208189125798345, "learning_rate": 9.774124736060644e-05, "loss": 0.582, "step": 1515 }, { "epoch": 0.424293310943185, "grad_norm": 0.23723817455860452, "learning_rate": 9.773666105915776e-05, "loss": 0.5581, "step": 1516 }, { "epoch": 0.42457318779736913, "grad_norm": 0.25083464625496504, "learning_rate": 9.773207021410693e-05, "loss": 0.5738, "step": 1517 }, { "epoch": 0.4248530646515533, "grad_norm": 0.23059374288413967, "learning_rate": 9.772747482589096e-05, "loss": 0.5561, "step": 1518 }, { "epoch": 0.4251329415057375, "grad_norm": 0.25229853454428236, "learning_rate": 9.772287489494723e-05, "loss": 0.5642, "step": 1519 }, { "epoch": 0.4254128183599216, "grad_norm": 0.23799928620613445, "learning_rate": 9.771827042171353e-05, "loss": 0.5618, "step": 1520 }, { "epoch": 0.4256926952141058, "grad_norm": 0.2425613119222005, "learning_rate": 9.771366140662816e-05, "loss": 0.5904, "step": 1521 }, { "epoch": 0.42597257206829, "grad_norm": 0.25360883243210447, "learning_rate": 9.770904785012978e-05, "loss": 0.6064, "step": 1522 }, { "epoch": 0.4262524489224741, "grad_norm": 0.23903796352532092, "learning_rate": 9.770442975265752e-05, "loss": 0.5807, "step": 1523 }, { "epoch": 0.4265323257766583, "grad_norm": 0.2367605837276456, "learning_rate": 9.769980711465094e-05, "loss": 0.5687, "step": 1524 }, { "epoch": 0.4268122026308424, "grad_norm": 0.2589316099254001, "learning_rate": 9.769517993655003e-05, "loss": 0.5608, "step": 1525 }, { "epoch": 0.4270920794850266, "grad_norm": 0.2443355361513885, "learning_rate": 9.76905482187952e-05, "loss": 0.5851, "step": 1526 }, { "epoch": 0.42737195633921077, "grad_norm": 0.25885465164488697, "learning_rate": 9.768591196182729e-05, "loss": 0.5841, "step": 1527 }, { "epoch": 0.4276518331933949, "grad_norm": 0.25052779405674647, "learning_rate": 9.768127116608758e-05, "loss": 0.5751, "step": 1528 }, { "epoch": 0.4279317100475791, "grad_norm": 0.23999292142611786, "learning_rate": 9.767662583201779e-05, "loss": 0.5597, "step": 1529 }, { "epoch": 0.4282115869017632, "grad_norm": 0.2296914558111579, "learning_rate": 9.767197596006008e-05, "loss": 0.5628, "step": 1530 }, { "epoch": 0.4284914637559474, "grad_norm": 0.2460827362281314, "learning_rate": 9.7667321550657e-05, "loss": 0.5662, "step": 1531 }, { "epoch": 0.42877134061013156, "grad_norm": 0.2480536926331666, "learning_rate": 9.766266260425159e-05, "loss": 0.5888, "step": 1532 }, { "epoch": 0.4290512174643157, "grad_norm": 0.24603486571963212, "learning_rate": 9.765799912128725e-05, "loss": 0.6004, "step": 1533 }, { "epoch": 0.42933109431849986, "grad_norm": 0.2332324033244973, "learning_rate": 9.765333110220792e-05, "loss": 0.5453, "step": 1534 }, { "epoch": 0.42961097117268404, "grad_norm": 0.25109569714469726, "learning_rate": 9.764865854745784e-05, "loss": 0.5367, "step": 1535 }, { "epoch": 0.42989084802686817, "grad_norm": 0.2407370658163797, "learning_rate": 9.764398145748176e-05, "loss": 0.5739, "step": 1536 }, { "epoch": 0.43017072488105235, "grad_norm": 0.2452166003329777, "learning_rate": 9.763929983272486e-05, "loss": 0.5526, "step": 1537 }, { "epoch": 0.43045060173523647, "grad_norm": 0.23973167204459642, "learning_rate": 9.763461367363276e-05, "loss": 0.5625, "step": 1538 }, { "epoch": 0.43073047858942065, "grad_norm": 0.24240553263222253, "learning_rate": 9.762992298065144e-05, "loss": 0.5596, "step": 1539 }, { "epoch": 0.43101035544360483, "grad_norm": 0.24681000971969366, "learning_rate": 9.762522775422741e-05, "loss": 0.5452, "step": 1540 }, { "epoch": 0.43129023229778896, "grad_norm": 0.2557476843343845, "learning_rate": 9.762052799480755e-05, "loss": 0.58, "step": 1541 }, { "epoch": 0.43157010915197314, "grad_norm": 0.23828427931263624, "learning_rate": 9.761582370283915e-05, "loss": 0.5738, "step": 1542 }, { "epoch": 0.4318499860061573, "grad_norm": 0.256106565070577, "learning_rate": 9.761111487877001e-05, "loss": 0.5881, "step": 1543 }, { "epoch": 0.43212986286034144, "grad_norm": 0.2464782551768715, "learning_rate": 9.760640152304833e-05, "loss": 0.5608, "step": 1544 }, { "epoch": 0.4324097397145256, "grad_norm": 0.23747551380698148, "learning_rate": 9.76016836361227e-05, "loss": 0.5713, "step": 1545 }, { "epoch": 0.43268961656870975, "grad_norm": 0.2548872684408437, "learning_rate": 9.759696121844215e-05, "loss": 0.5618, "step": 1546 }, { "epoch": 0.4329694934228939, "grad_norm": 0.23686173170856198, "learning_rate": 9.759223427045622e-05, "loss": 0.5847, "step": 1547 }, { "epoch": 0.4332493702770781, "grad_norm": 0.2324906952052999, "learning_rate": 9.758750279261478e-05, "loss": 0.5596, "step": 1548 }, { "epoch": 0.43352924713126223, "grad_norm": 0.22842513283373136, "learning_rate": 9.75827667853682e-05, "loss": 0.5743, "step": 1549 }, { "epoch": 0.4338091239854464, "grad_norm": 0.23168661869478185, "learning_rate": 9.757802624916723e-05, "loss": 0.5697, "step": 1550 }, { "epoch": 0.4340890008396306, "grad_norm": 0.25140947684336423, "learning_rate": 9.757328118446309e-05, "loss": 0.5819, "step": 1551 }, { "epoch": 0.4343688776938147, "grad_norm": 0.2417463533284766, "learning_rate": 9.756853159170742e-05, "loss": 0.5475, "step": 1552 }, { "epoch": 0.4346487545479989, "grad_norm": 0.22934434928297026, "learning_rate": 9.75637774713523e-05, "loss": 0.5475, "step": 1553 }, { "epoch": 0.434928631402183, "grad_norm": 0.24110459703507403, "learning_rate": 9.755901882385021e-05, "loss": 0.5679, "step": 1554 }, { "epoch": 0.4352085082563672, "grad_norm": 0.2435000187314083, "learning_rate": 9.75542556496541e-05, "loss": 0.5409, "step": 1555 }, { "epoch": 0.4354883851105514, "grad_norm": 0.2512030616213865, "learning_rate": 9.75494879492173e-05, "loss": 0.5712, "step": 1556 }, { "epoch": 0.4357682619647355, "grad_norm": 0.2490874474172369, "learning_rate": 9.754471572299363e-05, "loss": 0.5814, "step": 1557 }, { "epoch": 0.4360481388189197, "grad_norm": 0.2516458467604343, "learning_rate": 9.753993897143731e-05, "loss": 0.5688, "step": 1558 }, { "epoch": 0.4363280156731038, "grad_norm": 0.23361860635570805, "learning_rate": 9.753515769500299e-05, "loss": 0.5689, "step": 1559 }, { "epoch": 0.436607892527288, "grad_norm": 0.2587815211957352, "learning_rate": 9.753037189414575e-05, "loss": 0.582, "step": 1560 }, { "epoch": 0.43688776938147217, "grad_norm": 0.24267621433328138, "learning_rate": 9.75255815693211e-05, "loss": 0.5866, "step": 1561 }, { "epoch": 0.4371676462356563, "grad_norm": 0.24249945491986472, "learning_rate": 9.752078672098502e-05, "loss": 0.561, "step": 1562 }, { "epoch": 0.4374475230898405, "grad_norm": 0.2475570815863539, "learning_rate": 9.751598734959384e-05, "loss": 0.5814, "step": 1563 }, { "epoch": 0.43772739994402465, "grad_norm": 0.231386974038659, "learning_rate": 9.75111834556044e-05, "loss": 0.5391, "step": 1564 }, { "epoch": 0.4380072767982088, "grad_norm": 0.24466180538359716, "learning_rate": 9.750637503947391e-05, "loss": 0.5837, "step": 1565 }, { "epoch": 0.43828715365239296, "grad_norm": 0.2417070291325543, "learning_rate": 9.750156210166006e-05, "loss": 0.5797, "step": 1566 }, { "epoch": 0.4385670305065771, "grad_norm": 0.23817213169576182, "learning_rate": 9.749674464262094e-05, "loss": 0.5782, "step": 1567 }, { "epoch": 0.43884690736076126, "grad_norm": 0.26182755912460104, "learning_rate": 9.749192266281508e-05, "loss": 0.5454, "step": 1568 }, { "epoch": 0.43912678421494544, "grad_norm": 0.23464678913914763, "learning_rate": 9.748709616270144e-05, "loss": 0.5851, "step": 1569 }, { "epoch": 0.43940666106912957, "grad_norm": 0.24434762824944903, "learning_rate": 9.748226514273941e-05, "loss": 0.5386, "step": 1570 }, { "epoch": 0.43968653792331375, "grad_norm": 0.24697878019438874, "learning_rate": 9.747742960338881e-05, "loss": 0.5731, "step": 1571 }, { "epoch": 0.4399664147774979, "grad_norm": 0.23124415337553447, "learning_rate": 9.747258954510989e-05, "loss": 0.5824, "step": 1572 }, { "epoch": 0.44024629163168205, "grad_norm": 0.23813567944378347, "learning_rate": 9.746774496836332e-05, "loss": 0.5638, "step": 1573 }, { "epoch": 0.44052616848586623, "grad_norm": 0.24249100484216313, "learning_rate": 9.746289587361021e-05, "loss": 0.5721, "step": 1574 }, { "epoch": 0.44080604534005036, "grad_norm": 0.25464163267322554, "learning_rate": 9.745804226131211e-05, "loss": 0.5585, "step": 1575 }, { "epoch": 0.44108592219423454, "grad_norm": 0.24342188484477878, "learning_rate": 9.745318413193099e-05, "loss": 0.5801, "step": 1576 }, { "epoch": 0.4413657990484187, "grad_norm": 0.2563628080106205, "learning_rate": 9.744832148592923e-05, "loss": 0.5895, "step": 1577 }, { "epoch": 0.44164567590260284, "grad_norm": 0.23789494903848302, "learning_rate": 9.744345432376967e-05, "loss": 0.5859, "step": 1578 }, { "epoch": 0.441925552756787, "grad_norm": 0.2462756686319238, "learning_rate": 9.743858264591559e-05, "loss": 0.5451, "step": 1579 }, { "epoch": 0.44220542961097115, "grad_norm": 0.23791103680326336, "learning_rate": 9.743370645283066e-05, "loss": 0.5452, "step": 1580 }, { "epoch": 0.4424853064651553, "grad_norm": 0.2279743487899946, "learning_rate": 9.742882574497898e-05, "loss": 0.5452, "step": 1581 }, { "epoch": 0.4427651833193395, "grad_norm": 0.22940390173471523, "learning_rate": 9.742394052282513e-05, "loss": 0.574, "step": 1582 }, { "epoch": 0.44304506017352363, "grad_norm": 0.23052987503067043, "learning_rate": 9.741905078683407e-05, "loss": 0.5448, "step": 1583 }, { "epoch": 0.4433249370277078, "grad_norm": 0.24075917632009064, "learning_rate": 9.741415653747123e-05, "loss": 0.5501, "step": 1584 }, { "epoch": 0.443604813881892, "grad_norm": 0.23917340606953735, "learning_rate": 9.740925777520242e-05, "loss": 0.5458, "step": 1585 }, { "epoch": 0.4438846907360761, "grad_norm": 0.23908385812899932, "learning_rate": 9.740435450049392e-05, "loss": 0.5795, "step": 1586 }, { "epoch": 0.4441645675902603, "grad_norm": 0.2401723385481669, "learning_rate": 9.739944671381243e-05, "loss": 0.5602, "step": 1587 }, { "epoch": 0.4444444444444444, "grad_norm": 0.23661393948970766, "learning_rate": 9.739453441562507e-05, "loss": 0.5552, "step": 1588 }, { "epoch": 0.4447243212986286, "grad_norm": 0.2345794145401572, "learning_rate": 9.73896176063994e-05, "loss": 0.5464, "step": 1589 }, { "epoch": 0.4450041981528128, "grad_norm": 0.23041977175787082, "learning_rate": 9.73846962866034e-05, "loss": 0.5612, "step": 1590 }, { "epoch": 0.4452840750069969, "grad_norm": 0.2530760752289133, "learning_rate": 9.737977045670548e-05, "loss": 0.5611, "step": 1591 }, { "epoch": 0.4455639518611811, "grad_norm": 0.24151200181727236, "learning_rate": 9.737484011717448e-05, "loss": 0.5486, "step": 1592 }, { "epoch": 0.44584382871536526, "grad_norm": 0.24737202608433992, "learning_rate": 9.73699052684797e-05, "loss": 0.5466, "step": 1593 }, { "epoch": 0.4461237055695494, "grad_norm": 0.22787072108928844, "learning_rate": 9.736496591109081e-05, "loss": 0.5498, "step": 1594 }, { "epoch": 0.44640358242373357, "grad_norm": 0.23838951228491187, "learning_rate": 9.736002204547795e-05, "loss": 0.5587, "step": 1595 }, { "epoch": 0.4466834592779177, "grad_norm": 0.23089190730961634, "learning_rate": 9.73550736721117e-05, "loss": 0.5546, "step": 1596 }, { "epoch": 0.4469633361321019, "grad_norm": 0.22923545709162893, "learning_rate": 9.735012079146302e-05, "loss": 0.5705, "step": 1597 }, { "epoch": 0.44724321298628605, "grad_norm": 0.23597042843706925, "learning_rate": 9.734516340400335e-05, "loss": 0.5516, "step": 1598 }, { "epoch": 0.4475230898404702, "grad_norm": 0.24282398200242442, "learning_rate": 9.73402015102045e-05, "loss": 0.5961, "step": 1599 }, { "epoch": 0.44780296669465436, "grad_norm": 0.23585467598423718, "learning_rate": 9.73352351105388e-05, "loss": 0.5841, "step": 1600 }, { "epoch": 0.44808284354883854, "grad_norm": 0.2630123217166499, "learning_rate": 9.733026420547892e-05, "loss": 0.6106, "step": 1601 }, { "epoch": 0.44836272040302266, "grad_norm": 0.24183326676579633, "learning_rate": 9.732528879549801e-05, "loss": 0.5877, "step": 1602 }, { "epoch": 0.44864259725720684, "grad_norm": 0.24433069568397509, "learning_rate": 9.73203088810696e-05, "loss": 0.5749, "step": 1603 }, { "epoch": 0.44892247411139097, "grad_norm": 0.23445527046913978, "learning_rate": 9.731532446266772e-05, "loss": 0.5982, "step": 1604 }, { "epoch": 0.44920235096557515, "grad_norm": 0.23672023092324654, "learning_rate": 9.731033554076678e-05, "loss": 0.5656, "step": 1605 }, { "epoch": 0.44948222781975933, "grad_norm": 0.25124651998982117, "learning_rate": 9.730534211584161e-05, "loss": 0.5578, "step": 1606 }, { "epoch": 0.44976210467394345, "grad_norm": 0.24062654391573005, "learning_rate": 9.73003441883675e-05, "loss": 0.5706, "step": 1607 }, { "epoch": 0.45004198152812763, "grad_norm": 0.23863690695406042, "learning_rate": 9.729534175882016e-05, "loss": 0.5803, "step": 1608 }, { "epoch": 0.45032185838231176, "grad_norm": 0.2364801765216035, "learning_rate": 9.729033482767572e-05, "loss": 0.5432, "step": 1609 }, { "epoch": 0.45060173523649594, "grad_norm": 0.24493619611199438, "learning_rate": 9.728532339541074e-05, "loss": 0.541, "step": 1610 }, { "epoch": 0.4508816120906801, "grad_norm": 0.23030588426816914, "learning_rate": 9.728030746250221e-05, "loss": 0.5613, "step": 1611 }, { "epoch": 0.45116148894486424, "grad_norm": 0.2435388946534101, "learning_rate": 9.727528702942755e-05, "loss": 0.574, "step": 1612 }, { "epoch": 0.4514413657990484, "grad_norm": 0.2335591545712549, "learning_rate": 9.727026209666461e-05, "loss": 0.5432, "step": 1613 }, { "epoch": 0.4517212426532326, "grad_norm": 0.25186953366070236, "learning_rate": 9.726523266469167e-05, "loss": 0.568, "step": 1614 }, { "epoch": 0.4520011195074167, "grad_norm": 0.2452529418604345, "learning_rate": 9.726019873398742e-05, "loss": 0.5742, "step": 1615 }, { "epoch": 0.4522809963616009, "grad_norm": 0.24267589547772744, "learning_rate": 9.725516030503101e-05, "loss": 0.5635, "step": 1616 }, { "epoch": 0.45256087321578503, "grad_norm": 0.24073431048985094, "learning_rate": 9.7250117378302e-05, "loss": 0.5799, "step": 1617 }, { "epoch": 0.4528407500699692, "grad_norm": 0.23325718270101103, "learning_rate": 9.724506995428036e-05, "loss": 0.5849, "step": 1618 }, { "epoch": 0.4531206269241534, "grad_norm": 0.2314095553527315, "learning_rate": 9.724001803344652e-05, "loss": 0.5392, "step": 1619 }, { "epoch": 0.4534005037783375, "grad_norm": 0.24920588938072338, "learning_rate": 9.723496161628132e-05, "loss": 0.5367, "step": 1620 }, { "epoch": 0.4536803806325217, "grad_norm": 0.22744504084522982, "learning_rate": 9.722990070326604e-05, "loss": 0.557, "step": 1621 }, { "epoch": 0.4539602574867059, "grad_norm": 0.25589444054133037, "learning_rate": 9.722483529488238e-05, "loss": 0.5929, "step": 1622 }, { "epoch": 0.45424013434089, "grad_norm": 0.23229518165002788, "learning_rate": 9.721976539161245e-05, "loss": 0.5634, "step": 1623 }, { "epoch": 0.4545200111950742, "grad_norm": 0.23435892514032364, "learning_rate": 9.721469099393883e-05, "loss": 0.5753, "step": 1624 }, { "epoch": 0.4547998880492583, "grad_norm": 0.22410564597322272, "learning_rate": 9.720961210234449e-05, "loss": 0.5382, "step": 1625 }, { "epoch": 0.4550797649034425, "grad_norm": 0.2368563135184631, "learning_rate": 9.720452871731285e-05, "loss": 0.582, "step": 1626 }, { "epoch": 0.45535964175762667, "grad_norm": 0.24090942265379905, "learning_rate": 9.719944083932773e-05, "loss": 0.5496, "step": 1627 }, { "epoch": 0.4556395186118108, "grad_norm": 0.24205985416292858, "learning_rate": 9.719434846887343e-05, "loss": 0.5972, "step": 1628 }, { "epoch": 0.45591939546599497, "grad_norm": 0.24637174261436595, "learning_rate": 9.718925160643461e-05, "loss": 0.5547, "step": 1629 }, { "epoch": 0.4561992723201791, "grad_norm": 0.24242927759220934, "learning_rate": 9.718415025249644e-05, "loss": 0.5608, "step": 1630 }, { "epoch": 0.4564791491743633, "grad_norm": 0.2398370017922551, "learning_rate": 9.71790444075444e-05, "loss": 0.5941, "step": 1631 }, { "epoch": 0.45675902602854745, "grad_norm": 0.22464648132157194, "learning_rate": 9.717393407206453e-05, "loss": 0.5785, "step": 1632 }, { "epoch": 0.4570389028827316, "grad_norm": 0.2359371075076253, "learning_rate": 9.71688192465432e-05, "loss": 0.562, "step": 1633 }, { "epoch": 0.45731877973691576, "grad_norm": 0.23216743239512033, "learning_rate": 9.716369993146725e-05, "loss": 0.5873, "step": 1634 }, { "epoch": 0.45759865659109994, "grad_norm": 0.24372612182360875, "learning_rate": 9.715857612732397e-05, "loss": 0.6053, "step": 1635 }, { "epoch": 0.45787853344528406, "grad_norm": 0.2424657932214619, "learning_rate": 9.715344783460098e-05, "loss": 0.5714, "step": 1636 }, { "epoch": 0.45815841029946824, "grad_norm": 0.23375270597332154, "learning_rate": 9.714831505378646e-05, "loss": 0.5747, "step": 1637 }, { "epoch": 0.45843828715365237, "grad_norm": 0.24199967270319583, "learning_rate": 9.714317778536891e-05, "loss": 0.5743, "step": 1638 }, { "epoch": 0.45871816400783655, "grad_norm": 0.2531179895334508, "learning_rate": 9.713803602983731e-05, "loss": 0.568, "step": 1639 }, { "epoch": 0.45899804086202073, "grad_norm": 0.25284719666112554, "learning_rate": 9.713288978768107e-05, "loss": 0.5677, "step": 1640 }, { "epoch": 0.45927791771620485, "grad_norm": 0.245645975031102, "learning_rate": 9.712773905938999e-05, "loss": 0.5799, "step": 1641 }, { "epoch": 0.45955779457038903, "grad_norm": 0.23607686469748843, "learning_rate": 9.712258384545432e-05, "loss": 0.5681, "step": 1642 }, { "epoch": 0.4598376714245732, "grad_norm": 0.24529416278386937, "learning_rate": 9.711742414636476e-05, "loss": 0.5939, "step": 1643 }, { "epoch": 0.46011754827875734, "grad_norm": 0.24808453699518654, "learning_rate": 9.711225996261238e-05, "loss": 0.5815, "step": 1644 }, { "epoch": 0.4603974251329415, "grad_norm": 0.24072875374815905, "learning_rate": 9.710709129468873e-05, "loss": 0.5911, "step": 1645 }, { "epoch": 0.46067730198712564, "grad_norm": 0.24379613392778396, "learning_rate": 9.710191814308577e-05, "loss": 0.5782, "step": 1646 }, { "epoch": 0.4609571788413098, "grad_norm": 0.23454801611000467, "learning_rate": 9.709674050829588e-05, "loss": 0.5696, "step": 1647 }, { "epoch": 0.461237055695494, "grad_norm": 0.24632377157127103, "learning_rate": 9.709155839081186e-05, "loss": 0.575, "step": 1648 }, { "epoch": 0.4615169325496781, "grad_norm": 0.24485361784242987, "learning_rate": 9.708637179112696e-05, "loss": 0.5629, "step": 1649 }, { "epoch": 0.4617968094038623, "grad_norm": 0.2429849320197462, "learning_rate": 9.708118070973483e-05, "loss": 0.5582, "step": 1650 }, { "epoch": 0.46207668625804643, "grad_norm": 0.23895538009941178, "learning_rate": 9.707598514712956e-05, "loss": 0.5516, "step": 1651 }, { "epoch": 0.4623565631122306, "grad_norm": 0.2355035827858529, "learning_rate": 9.707078510380569e-05, "loss": 0.5502, "step": 1652 }, { "epoch": 0.4626364399664148, "grad_norm": 0.25052010000277225, "learning_rate": 9.706558058025815e-05, "loss": 0.5735, "step": 1653 }, { "epoch": 0.4629163168205989, "grad_norm": 0.2449698774305263, "learning_rate": 9.70603715769823e-05, "loss": 0.5363, "step": 1654 }, { "epoch": 0.4631961936747831, "grad_norm": 0.2362853939678543, "learning_rate": 9.705515809447394e-05, "loss": 0.5724, "step": 1655 }, { "epoch": 0.4634760705289673, "grad_norm": 0.2568714356965974, "learning_rate": 9.70499401332293e-05, "loss": 0.575, "step": 1656 }, { "epoch": 0.4637559473831514, "grad_norm": 0.24169809634696723, "learning_rate": 9.704471769374501e-05, "loss": 0.5684, "step": 1657 }, { "epoch": 0.4640358242373356, "grad_norm": 0.24687664486505012, "learning_rate": 9.703949077651817e-05, "loss": 0.5745, "step": 1658 }, { "epoch": 0.4643157010915197, "grad_norm": 0.2444165408649831, "learning_rate": 9.703425938204627e-05, "loss": 0.5369, "step": 1659 }, { "epoch": 0.4645955779457039, "grad_norm": 0.2431557857220034, "learning_rate": 9.702902351082723e-05, "loss": 0.5499, "step": 1660 }, { "epoch": 0.46487545479988807, "grad_norm": 0.23202598566873428, "learning_rate": 9.702378316335942e-05, "loss": 0.5755, "step": 1661 }, { "epoch": 0.4651553316540722, "grad_norm": 0.2288877686751276, "learning_rate": 9.70185383401416e-05, "loss": 0.5436, "step": 1662 }, { "epoch": 0.46543520850825637, "grad_norm": 0.24748492346111, "learning_rate": 9.701328904167298e-05, "loss": 0.5757, "step": 1663 }, { "epoch": 0.46571508536244055, "grad_norm": 0.23227249736511413, "learning_rate": 9.700803526845319e-05, "loss": 0.5808, "step": 1664 }, { "epoch": 0.4659949622166247, "grad_norm": 0.23359697484490272, "learning_rate": 9.700277702098231e-05, "loss": 0.5551, "step": 1665 }, { "epoch": 0.46627483907080886, "grad_norm": 0.24117964707666567, "learning_rate": 9.699751429976079e-05, "loss": 0.572, "step": 1666 }, { "epoch": 0.466554715924993, "grad_norm": 0.24306895681163349, "learning_rate": 9.699224710528955e-05, "loss": 0.5722, "step": 1667 }, { "epoch": 0.46683459277917716, "grad_norm": 0.23446319214186462, "learning_rate": 9.698697543806994e-05, "loss": 0.5729, "step": 1668 }, { "epoch": 0.46711446963336134, "grad_norm": 0.24165369931898403, "learning_rate": 9.69816992986037e-05, "loss": 0.5557, "step": 1669 }, { "epoch": 0.46739434648754546, "grad_norm": 0.22916898589779047, "learning_rate": 9.697641868739303e-05, "loss": 0.5581, "step": 1670 }, { "epoch": 0.46767422334172964, "grad_norm": 0.24919921268999934, "learning_rate": 9.697113360494052e-05, "loss": 0.5784, "step": 1671 }, { "epoch": 0.4679541001959138, "grad_norm": 0.23349556147225378, "learning_rate": 9.696584405174925e-05, "loss": 0.5531, "step": 1672 }, { "epoch": 0.46823397705009795, "grad_norm": 0.23301988889837738, "learning_rate": 9.696055002832263e-05, "loss": 0.5698, "step": 1673 }, { "epoch": 0.46851385390428213, "grad_norm": 0.23371805922133018, "learning_rate": 9.695525153516459e-05, "loss": 0.5781, "step": 1674 }, { "epoch": 0.46879373075846625, "grad_norm": 0.2445795859274827, "learning_rate": 9.694994857277942e-05, "loss": 0.5807, "step": 1675 }, { "epoch": 0.46907360761265043, "grad_norm": 0.24647542564702576, "learning_rate": 9.694464114167186e-05, "loss": 0.5602, "step": 1676 }, { "epoch": 0.4693534844668346, "grad_norm": 0.24022018858423302, "learning_rate": 9.693932924234708e-05, "loss": 0.5654, "step": 1677 }, { "epoch": 0.46963336132101874, "grad_norm": 0.23281322118283435, "learning_rate": 9.693401287531067e-05, "loss": 0.5574, "step": 1678 }, { "epoch": 0.4699132381752029, "grad_norm": 0.23754798733117446, "learning_rate": 9.692869204106866e-05, "loss": 0.5647, "step": 1679 }, { "epoch": 0.47019311502938704, "grad_norm": 0.23969572637873995, "learning_rate": 9.692336674012746e-05, "loss": 0.5678, "step": 1680 }, { "epoch": 0.4704729918835712, "grad_norm": 0.23776421914820323, "learning_rate": 9.691803697299396e-05, "loss": 0.5666, "step": 1681 }, { "epoch": 0.4707528687377554, "grad_norm": 0.2245483928489407, "learning_rate": 9.691270274017543e-05, "loss": 0.541, "step": 1682 }, { "epoch": 0.47103274559193953, "grad_norm": 0.23614537930723442, "learning_rate": 9.690736404217959e-05, "loss": 0.5552, "step": 1683 }, { "epoch": 0.4713126224461237, "grad_norm": 0.23576106419016976, "learning_rate": 9.69020208795146e-05, "loss": 0.5689, "step": 1684 }, { "epoch": 0.4715924993003079, "grad_norm": 0.2423673322680066, "learning_rate": 9.6896673252689e-05, "loss": 0.5606, "step": 1685 }, { "epoch": 0.471872376154492, "grad_norm": 0.2343894728813448, "learning_rate": 9.68913211622118e-05, "loss": 0.5305, "step": 1686 }, { "epoch": 0.4721522530086762, "grad_norm": 0.23548944963250124, "learning_rate": 9.68859646085924e-05, "loss": 0.567, "step": 1687 }, { "epoch": 0.4724321298628603, "grad_norm": 0.23966407867775644, "learning_rate": 9.688060359234064e-05, "loss": 0.5747, "step": 1688 }, { "epoch": 0.4727120067170445, "grad_norm": 0.24706733837968547, "learning_rate": 9.687523811396679e-05, "loss": 0.5548, "step": 1689 }, { "epoch": 0.4729918835712287, "grad_norm": 0.23784005195636673, "learning_rate": 9.686986817398155e-05, "loss": 0.5641, "step": 1690 }, { "epoch": 0.4732717604254128, "grad_norm": 0.24228957192858178, "learning_rate": 9.686449377289601e-05, "loss": 0.5598, "step": 1691 }, { "epoch": 0.473551637279597, "grad_norm": 0.24173114213297364, "learning_rate": 9.685911491122175e-05, "loss": 0.5577, "step": 1692 }, { "epoch": 0.47383151413378116, "grad_norm": 0.22701719607890072, "learning_rate": 9.685373158947067e-05, "loss": 0.5475, "step": 1693 }, { "epoch": 0.4741113909879653, "grad_norm": 0.24187651396474402, "learning_rate": 9.684834380815522e-05, "loss": 0.5488, "step": 1694 }, { "epoch": 0.47439126784214947, "grad_norm": 0.2508222727693515, "learning_rate": 9.684295156778815e-05, "loss": 0.5827, "step": 1695 }, { "epoch": 0.4746711446963336, "grad_norm": 0.22557444004295016, "learning_rate": 9.683755486888277e-05, "loss": 0.5873, "step": 1696 }, { "epoch": 0.47495102155051777, "grad_norm": 0.2339936997134689, "learning_rate": 9.683215371195267e-05, "loss": 0.5967, "step": 1697 }, { "epoch": 0.47523089840470195, "grad_norm": 0.23302300536397813, "learning_rate": 9.682674809751198e-05, "loss": 0.5778, "step": 1698 }, { "epoch": 0.4755107752588861, "grad_norm": 0.2284584881896518, "learning_rate": 9.682133802607519e-05, "loss": 0.5612, "step": 1699 }, { "epoch": 0.47579065211307026, "grad_norm": 0.2482456132223044, "learning_rate": 9.681592349815725e-05, "loss": 0.5969, "step": 1700 }, { "epoch": 0.4760705289672544, "grad_norm": 0.2371325439267351, "learning_rate": 9.681050451427349e-05, "loss": 0.5759, "step": 1701 }, { "epoch": 0.47635040582143856, "grad_norm": 0.2276233032410326, "learning_rate": 9.680508107493974e-05, "loss": 0.5692, "step": 1702 }, { "epoch": 0.47663028267562274, "grad_norm": 0.23006587175018722, "learning_rate": 9.679965318067214e-05, "loss": 0.5646, "step": 1703 }, { "epoch": 0.47691015952980687, "grad_norm": 0.23438779914728478, "learning_rate": 9.679422083198738e-05, "loss": 0.5521, "step": 1704 }, { "epoch": 0.47719003638399105, "grad_norm": 0.2376983080464345, "learning_rate": 9.678878402940249e-05, "loss": 0.5657, "step": 1705 }, { "epoch": 0.4774699132381752, "grad_norm": 0.2533956785568868, "learning_rate": 9.678334277343493e-05, "loss": 0.5749, "step": 1706 }, { "epoch": 0.47774979009235935, "grad_norm": 0.24240802881417522, "learning_rate": 9.677789706460263e-05, "loss": 0.5844, "step": 1707 }, { "epoch": 0.47802966694654353, "grad_norm": 0.2368764108420469, "learning_rate": 9.67724469034239e-05, "loss": 0.5707, "step": 1708 }, { "epoch": 0.47830954380072765, "grad_norm": 0.23976498497368334, "learning_rate": 9.676699229041749e-05, "loss": 0.5611, "step": 1709 }, { "epoch": 0.47858942065491183, "grad_norm": 0.23873554876842037, "learning_rate": 9.676153322610259e-05, "loss": 0.5582, "step": 1710 }, { "epoch": 0.478869297509096, "grad_norm": 0.24447888689261207, "learning_rate": 9.675606971099878e-05, "loss": 0.561, "step": 1711 }, { "epoch": 0.47914917436328014, "grad_norm": 0.23776601617560683, "learning_rate": 9.675060174562607e-05, "loss": 0.5662, "step": 1712 }, { "epoch": 0.4794290512174643, "grad_norm": 0.2287431865973953, "learning_rate": 9.674512933050493e-05, "loss": 0.5485, "step": 1713 }, { "epoch": 0.4797089280716485, "grad_norm": 0.2460925836878855, "learning_rate": 9.673965246615621e-05, "loss": 0.5714, "step": 1714 }, { "epoch": 0.4799888049258326, "grad_norm": 0.23900950341817997, "learning_rate": 9.673417115310121e-05, "loss": 0.5473, "step": 1715 }, { "epoch": 0.4802686817800168, "grad_norm": 0.22945766993352582, "learning_rate": 9.672868539186166e-05, "loss": 0.5541, "step": 1716 }, { "epoch": 0.48054855863420093, "grad_norm": 0.2474126864137905, "learning_rate": 9.672319518295965e-05, "loss": 0.5437, "step": 1717 }, { "epoch": 0.4808284354883851, "grad_norm": 0.2253507794048116, "learning_rate": 9.67177005269178e-05, "loss": 0.5754, "step": 1718 }, { "epoch": 0.4811083123425693, "grad_norm": 0.2214830764551037, "learning_rate": 9.671220142425905e-05, "loss": 0.5437, "step": 1719 }, { "epoch": 0.4813881891967534, "grad_norm": 0.24453365086702913, "learning_rate": 9.670669787550682e-05, "loss": 0.5771, "step": 1720 }, { "epoch": 0.4816680660509376, "grad_norm": 0.22646871005462496, "learning_rate": 9.670118988118493e-05, "loss": 0.5701, "step": 1721 }, { "epoch": 0.4819479429051218, "grad_norm": 0.24667167448757019, "learning_rate": 9.669567744181767e-05, "loss": 0.5627, "step": 1722 }, { "epoch": 0.4822278197593059, "grad_norm": 0.2247237263507309, "learning_rate": 9.669016055792967e-05, "loss": 0.5553, "step": 1723 }, { "epoch": 0.4825076966134901, "grad_norm": 0.8916908358012535, "learning_rate": 9.668463923004608e-05, "loss": 0.5348, "step": 1724 }, { "epoch": 0.4827875734676742, "grad_norm": 0.2292032131128853, "learning_rate": 9.667911345869239e-05, "loss": 0.5717, "step": 1725 }, { "epoch": 0.4830674503218584, "grad_norm": 0.23436419844039819, "learning_rate": 9.667358324439455e-05, "loss": 0.5577, "step": 1726 }, { "epoch": 0.48334732717604256, "grad_norm": 0.23307361568857196, "learning_rate": 9.666804858767894e-05, "loss": 0.5396, "step": 1727 }, { "epoch": 0.4836272040302267, "grad_norm": 0.24883370720128642, "learning_rate": 9.666250948907234e-05, "loss": 0.5898, "step": 1728 }, { "epoch": 0.48390708088441087, "grad_norm": 0.2380820186350895, "learning_rate": 9.665696594910196e-05, "loss": 0.5791, "step": 1729 }, { "epoch": 0.484186957738595, "grad_norm": 0.2421076443682589, "learning_rate": 9.665141796829545e-05, "loss": 0.5444, "step": 1730 }, { "epoch": 0.48446683459277917, "grad_norm": 0.24762436331514348, "learning_rate": 9.664586554718086e-05, "loss": 0.5533, "step": 1731 }, { "epoch": 0.48474671144696335, "grad_norm": 0.2496491363838159, "learning_rate": 9.66403086862867e-05, "loss": 0.5731, "step": 1732 }, { "epoch": 0.4850265883011475, "grad_norm": 0.23970275567042595, "learning_rate": 9.663474738614185e-05, "loss": 0.5886, "step": 1733 }, { "epoch": 0.48530646515533166, "grad_norm": 0.24925697169184896, "learning_rate": 9.662918164727563e-05, "loss": 0.5503, "step": 1734 }, { "epoch": 0.48558634200951584, "grad_norm": 0.24197149218541705, "learning_rate": 9.662361147021779e-05, "loss": 0.5749, "step": 1735 }, { "epoch": 0.48586621886369996, "grad_norm": 0.2248961182333366, "learning_rate": 9.661803685549853e-05, "loss": 0.5562, "step": 1736 }, { "epoch": 0.48614609571788414, "grad_norm": 0.23254674182459417, "learning_rate": 9.661245780364843e-05, "loss": 0.5697, "step": 1737 }, { "epoch": 0.48642597257206827, "grad_norm": 0.23923114242653176, "learning_rate": 9.66068743151985e-05, "loss": 0.5622, "step": 1738 }, { "epoch": 0.48670584942625245, "grad_norm": 0.23552215352744707, "learning_rate": 9.660128639068018e-05, "loss": 0.5411, "step": 1739 }, { "epoch": 0.4869857262804366, "grad_norm": 0.23970839749696465, "learning_rate": 9.659569403062535e-05, "loss": 0.5437, "step": 1740 }, { "epoch": 0.48726560313462075, "grad_norm": 0.2367322032905853, "learning_rate": 9.659009723556627e-05, "loss": 0.5724, "step": 1741 }, { "epoch": 0.48754547998880493, "grad_norm": 0.22170711596683768, "learning_rate": 9.658449600603568e-05, "loss": 0.5712, "step": 1742 }, { "epoch": 0.4878253568429891, "grad_norm": 0.24393327932601608, "learning_rate": 9.657889034256666e-05, "loss": 0.5631, "step": 1743 }, { "epoch": 0.48810523369717324, "grad_norm": 0.2598732846882264, "learning_rate": 9.65732802456928e-05, "loss": 0.6077, "step": 1744 }, { "epoch": 0.4883851105513574, "grad_norm": 0.23872536309296974, "learning_rate": 9.656766571594805e-05, "loss": 0.5413, "step": 1745 }, { "epoch": 0.48866498740554154, "grad_norm": 0.23088966250187232, "learning_rate": 9.656204675386682e-05, "loss": 0.5669, "step": 1746 }, { "epoch": 0.4889448642597257, "grad_norm": 0.228682225348453, "learning_rate": 9.655642335998391e-05, "loss": 0.563, "step": 1747 }, { "epoch": 0.4892247411139099, "grad_norm": 0.24228207172083097, "learning_rate": 9.655079553483457e-05, "loss": 0.5695, "step": 1748 }, { "epoch": 0.489504617968094, "grad_norm": 0.23134302629555473, "learning_rate": 9.654516327895445e-05, "loss": 0.5505, "step": 1749 }, { "epoch": 0.4897844948222782, "grad_norm": 0.24081654112806575, "learning_rate": 9.653952659287963e-05, "loss": 0.5868, "step": 1750 }, { "epoch": 0.49006437167646233, "grad_norm": 0.2301704112159972, "learning_rate": 9.653388547714665e-05, "loss": 0.5883, "step": 1751 }, { "epoch": 0.4903442485306465, "grad_norm": 0.23815098899205434, "learning_rate": 9.652823993229239e-05, "loss": 0.5508, "step": 1752 }, { "epoch": 0.4906241253848307, "grad_norm": 0.2480390974116189, "learning_rate": 9.65225899588542e-05, "loss": 0.5623, "step": 1753 }, { "epoch": 0.4909040022390148, "grad_norm": 0.23597688623387672, "learning_rate": 9.651693555736986e-05, "loss": 0.5768, "step": 1754 }, { "epoch": 0.491183879093199, "grad_norm": 0.23803132760044657, "learning_rate": 9.651127672837757e-05, "loss": 0.5698, "step": 1755 }, { "epoch": 0.4914637559473832, "grad_norm": 0.2252120497537188, "learning_rate": 9.650561347241592e-05, "loss": 0.5561, "step": 1756 }, { "epoch": 0.4917436328015673, "grad_norm": 0.2338161916293681, "learning_rate": 9.649994579002392e-05, "loss": 0.5761, "step": 1757 }, { "epoch": 0.4920235096557515, "grad_norm": 0.22758790036460252, "learning_rate": 9.649427368174109e-05, "loss": 0.5487, "step": 1758 }, { "epoch": 0.4923033865099356, "grad_norm": 0.23630527762162581, "learning_rate": 9.648859714810725e-05, "loss": 0.545, "step": 1759 }, { "epoch": 0.4925832633641198, "grad_norm": 0.24074390662221748, "learning_rate": 9.648291618966273e-05, "loss": 0.59, "step": 1760 }, { "epoch": 0.49286314021830396, "grad_norm": 0.2322264115487179, "learning_rate": 9.647723080694821e-05, "loss": 0.5619, "step": 1761 }, { "epoch": 0.4931430170724881, "grad_norm": 0.25287558279676103, "learning_rate": 9.647154100050486e-05, "loss": 0.5504, "step": 1762 }, { "epoch": 0.49342289392667227, "grad_norm": 0.2383217604267359, "learning_rate": 9.646584677087422e-05, "loss": 0.5319, "step": 1763 }, { "epoch": 0.49370277078085645, "grad_norm": 0.23082191881659192, "learning_rate": 9.646014811859829e-05, "loss": 0.5466, "step": 1764 }, { "epoch": 0.4939826476350406, "grad_norm": 0.23398471193474196, "learning_rate": 9.645444504421944e-05, "loss": 0.5571, "step": 1765 }, { "epoch": 0.49426252448922475, "grad_norm": 0.23633229687218657, "learning_rate": 9.644873754828052e-05, "loss": 0.5561, "step": 1766 }, { "epoch": 0.4945424013434089, "grad_norm": 0.25790489199568895, "learning_rate": 9.644302563132475e-05, "loss": 0.5523, "step": 1767 }, { "epoch": 0.49482227819759306, "grad_norm": 0.23756523765843604, "learning_rate": 9.64373092938958e-05, "loss": 0.5772, "step": 1768 }, { "epoch": 0.49510215505177724, "grad_norm": 0.24904463596657922, "learning_rate": 9.643158853653778e-05, "loss": 0.5706, "step": 1769 }, { "epoch": 0.49538203190596136, "grad_norm": 0.2302259622916314, "learning_rate": 9.642586335979517e-05, "loss": 0.5593, "step": 1770 }, { "epoch": 0.49566190876014554, "grad_norm": 0.23618995273954105, "learning_rate": 9.64201337642129e-05, "loss": 0.5385, "step": 1771 }, { "epoch": 0.4959417856143297, "grad_norm": 0.25011648673412507, "learning_rate": 9.641439975033631e-05, "loss": 0.592, "step": 1772 }, { "epoch": 0.49622166246851385, "grad_norm": 0.23046022743268094, "learning_rate": 9.640866131871115e-05, "loss": 0.5798, "step": 1773 }, { "epoch": 0.496501539322698, "grad_norm": 0.24263488443094086, "learning_rate": 9.640291846988367e-05, "loss": 0.5824, "step": 1774 }, { "epoch": 0.49678141617688215, "grad_norm": 0.23480203611455353, "learning_rate": 9.639717120440042e-05, "loss": 0.5492, "step": 1775 }, { "epoch": 0.49706129303106633, "grad_norm": 0.2324077757263914, "learning_rate": 9.639141952280845e-05, "loss": 0.5806, "step": 1776 }, { "epoch": 0.4973411698852505, "grad_norm": 0.23674735406929293, "learning_rate": 9.63856634256552e-05, "loss": 0.5632, "step": 1777 }, { "epoch": 0.49762104673943464, "grad_norm": 0.22564044893242224, "learning_rate": 9.637990291348853e-05, "loss": 0.5419, "step": 1778 }, { "epoch": 0.4979009235936188, "grad_norm": 0.23733544029512643, "learning_rate": 9.637413798685675e-05, "loss": 0.5387, "step": 1779 }, { "epoch": 0.49818080044780294, "grad_norm": 0.23226211989695644, "learning_rate": 9.636836864630856e-05, "loss": 0.5401, "step": 1780 }, { "epoch": 0.4984606773019871, "grad_norm": 0.25178990006315966, "learning_rate": 9.63625948923931e-05, "loss": 0.5543, "step": 1781 }, { "epoch": 0.4987405541561713, "grad_norm": 0.24134903900795754, "learning_rate": 9.635681672565989e-05, "loss": 0.5811, "step": 1782 }, { "epoch": 0.4990204310103554, "grad_norm": 0.2464353452542341, "learning_rate": 9.635103414665893e-05, "loss": 0.561, "step": 1783 }, { "epoch": 0.4993003078645396, "grad_norm": 0.23746760502538525, "learning_rate": 9.634524715594058e-05, "loss": 0.5918, "step": 1784 }, { "epoch": 0.4995801847187238, "grad_norm": 0.24264934578582315, "learning_rate": 9.633945575405567e-05, "loss": 0.5603, "step": 1785 }, { "epoch": 0.4998600615729079, "grad_norm": 0.24157740893727817, "learning_rate": 9.633365994155544e-05, "loss": 0.5562, "step": 1786 }, { "epoch": 0.500139938427092, "grad_norm": 0.23872271292004676, "learning_rate": 9.632785971899151e-05, "loss": 0.5607, "step": 1787 }, { "epoch": 0.5004198152812762, "grad_norm": 0.29377607225803737, "learning_rate": 9.632205508691596e-05, "loss": 0.5434, "step": 1788 }, { "epoch": 0.5006996921354604, "grad_norm": 0.22816170933325136, "learning_rate": 9.631624604588129e-05, "loss": 0.5696, "step": 1789 }, { "epoch": 0.5009795689896446, "grad_norm": 0.22804984775922338, "learning_rate": 9.631043259644039e-05, "loss": 0.5553, "step": 1790 }, { "epoch": 0.5012594458438288, "grad_norm": 0.23836259743233856, "learning_rate": 9.63046147391466e-05, "loss": 0.5881, "step": 1791 }, { "epoch": 0.5015393226980128, "grad_norm": 0.2411643961682015, "learning_rate": 9.629879247455365e-05, "loss": 0.5503, "step": 1792 }, { "epoch": 0.501819199552197, "grad_norm": 0.22687508674274784, "learning_rate": 9.629296580321571e-05, "loss": 0.5418, "step": 1793 }, { "epoch": 0.5020990764063812, "grad_norm": 0.23509273299700292, "learning_rate": 9.62871347256874e-05, "loss": 0.5437, "step": 1794 }, { "epoch": 0.5023789532605654, "grad_norm": 0.2430667135329794, "learning_rate": 9.628129924252369e-05, "loss": 0.5455, "step": 1795 }, { "epoch": 0.5026588301147495, "grad_norm": 0.24672159908029315, "learning_rate": 9.627545935427999e-05, "loss": 0.5866, "step": 1796 }, { "epoch": 0.5029387069689337, "grad_norm": 0.242629180649455, "learning_rate": 9.62696150615122e-05, "loss": 0.5632, "step": 1797 }, { "epoch": 0.5032185838231178, "grad_norm": 0.24438038191497316, "learning_rate": 9.626376636477653e-05, "loss": 0.6019, "step": 1798 }, { "epoch": 0.503498460677302, "grad_norm": 0.23661072125513372, "learning_rate": 9.625791326462969e-05, "loss": 0.5588, "step": 1799 }, { "epoch": 0.5037783375314862, "grad_norm": 0.24995935229611654, "learning_rate": 9.625205576162877e-05, "loss": 0.5487, "step": 1800 }, { "epoch": 0.5040582143856703, "grad_norm": 0.24343740056074598, "learning_rate": 9.62461938563313e-05, "loss": 0.5585, "step": 1801 }, { "epoch": 0.5043380912398545, "grad_norm": 0.24236120456339055, "learning_rate": 9.624032754929522e-05, "loss": 0.5816, "step": 1802 }, { "epoch": 0.5046179680940386, "grad_norm": 0.25800761709487613, "learning_rate": 9.623445684107886e-05, "loss": 0.5985, "step": 1803 }, { "epoch": 0.5048978449482228, "grad_norm": 0.22645047786414052, "learning_rate": 9.622858173224103e-05, "loss": 0.5352, "step": 1804 }, { "epoch": 0.5051777218024069, "grad_norm": 0.24908389535441078, "learning_rate": 9.622270222334092e-05, "loss": 0.5509, "step": 1805 }, { "epoch": 0.5054575986565911, "grad_norm": 0.24691042377307731, "learning_rate": 9.621681831493814e-05, "loss": 0.5672, "step": 1806 }, { "epoch": 0.5057374755107753, "grad_norm": 0.22132863335088446, "learning_rate": 9.621093000759271e-05, "loss": 0.5806, "step": 1807 }, { "epoch": 0.5060173523649594, "grad_norm": 0.22962279406791766, "learning_rate": 9.620503730186512e-05, "loss": 0.5724, "step": 1808 }, { "epoch": 0.5062972292191436, "grad_norm": 0.2249337051683322, "learning_rate": 9.619914019831619e-05, "loss": 0.5284, "step": 1809 }, { "epoch": 0.5065771060733277, "grad_norm": 0.23783368590261306, "learning_rate": 9.619323869750727e-05, "loss": 0.5638, "step": 1810 }, { "epoch": 0.5068569829275119, "grad_norm": 0.22446658791123203, "learning_rate": 9.618733280000001e-05, "loss": 0.5519, "step": 1811 }, { "epoch": 0.5071368597816961, "grad_norm": 0.23534736176816506, "learning_rate": 9.618142250635658e-05, "loss": 0.5837, "step": 1812 }, { "epoch": 0.5074167366358802, "grad_norm": 0.22303012234251496, "learning_rate": 9.617550781713949e-05, "loss": 0.5414, "step": 1813 }, { "epoch": 0.5076966134900643, "grad_norm": 0.238460413121849, "learning_rate": 9.616958873291173e-05, "loss": 0.5758, "step": 1814 }, { "epoch": 0.5079764903442485, "grad_norm": 0.2389949338953074, "learning_rate": 9.616366525423666e-05, "loss": 0.5766, "step": 1815 }, { "epoch": 0.5082563671984327, "grad_norm": 0.22642327324417794, "learning_rate": 9.61577373816781e-05, "loss": 0.5595, "step": 1816 }, { "epoch": 0.5085362440526169, "grad_norm": 0.23824066188804274, "learning_rate": 9.615180511580026e-05, "loss": 0.5638, "step": 1817 }, { "epoch": 0.5088161209068011, "grad_norm": 0.23189431339771868, "learning_rate": 9.614586845716777e-05, "loss": 0.5662, "step": 1818 }, { "epoch": 0.5090959977609851, "grad_norm": 0.23194498047736026, "learning_rate": 9.613992740634572e-05, "loss": 0.563, "step": 1819 }, { "epoch": 0.5093758746151693, "grad_norm": 0.23745188110537496, "learning_rate": 9.613398196389954e-05, "loss": 0.5478, "step": 1820 }, { "epoch": 0.5096557514693535, "grad_norm": 0.23459963343848525, "learning_rate": 9.612803213039512e-05, "loss": 0.5739, "step": 1821 }, { "epoch": 0.5099356283235377, "grad_norm": 0.23694885368323856, "learning_rate": 9.612207790639879e-05, "loss": 0.54, "step": 1822 }, { "epoch": 0.5102155051777219, "grad_norm": 0.24602605516224743, "learning_rate": 9.611611929247726e-05, "loss": 0.5688, "step": 1823 }, { "epoch": 0.5104953820319059, "grad_norm": 0.22630239119332318, "learning_rate": 9.61101562891977e-05, "loss": 0.541, "step": 1824 }, { "epoch": 0.5107752588860901, "grad_norm": 0.23519921347325817, "learning_rate": 9.610418889712765e-05, "loss": 0.5628, "step": 1825 }, { "epoch": 0.5110551357402743, "grad_norm": 0.23070811933354385, "learning_rate": 9.609821711683509e-05, "loss": 0.5679, "step": 1826 }, { "epoch": 0.5113350125944585, "grad_norm": 0.23724394099776144, "learning_rate": 9.609224094888842e-05, "loss": 0.548, "step": 1827 }, { "epoch": 0.5116148894486426, "grad_norm": 0.24225479851705234, "learning_rate": 9.608626039385648e-05, "loss": 0.5702, "step": 1828 }, { "epoch": 0.5118947663028267, "grad_norm": 0.2461919864351795, "learning_rate": 9.608027545230847e-05, "loss": 0.5542, "step": 1829 }, { "epoch": 0.5121746431570109, "grad_norm": 0.22350936533247612, "learning_rate": 9.607428612481404e-05, "loss": 0.5234, "step": 1830 }, { "epoch": 0.5124545200111951, "grad_norm": 0.22097363018732752, "learning_rate": 9.606829241194327e-05, "loss": 0.5296, "step": 1831 }, { "epoch": 0.5127343968653792, "grad_norm": 0.23235613494758678, "learning_rate": 9.606229431426663e-05, "loss": 0.5435, "step": 1832 }, { "epoch": 0.5130142737195634, "grad_norm": 0.2319305788291165, "learning_rate": 9.605629183235506e-05, "loss": 0.5514, "step": 1833 }, { "epoch": 0.5132941505737475, "grad_norm": 0.2441169559939731, "learning_rate": 9.605028496677983e-05, "loss": 0.5691, "step": 1834 }, { "epoch": 0.5135740274279317, "grad_norm": 0.22513807184150386, "learning_rate": 9.604427371811273e-05, "loss": 0.5791, "step": 1835 }, { "epoch": 0.5138539042821159, "grad_norm": 0.2283649347192075, "learning_rate": 9.603825808692587e-05, "loss": 0.5439, "step": 1836 }, { "epoch": 0.5141337811363, "grad_norm": 0.23833928056167464, "learning_rate": 9.603223807379183e-05, "loss": 0.558, "step": 1837 }, { "epoch": 0.5144136579904842, "grad_norm": 0.2370905233182266, "learning_rate": 9.602621367928362e-05, "loss": 0.5729, "step": 1838 }, { "epoch": 0.5146935348446684, "grad_norm": 0.23328697868395074, "learning_rate": 9.602018490397462e-05, "loss": 0.5557, "step": 1839 }, { "epoch": 0.5149734116988525, "grad_norm": 0.23919119334639152, "learning_rate": 9.601415174843866e-05, "loss": 0.54, "step": 1840 }, { "epoch": 0.5152532885530366, "grad_norm": 0.23209531414825327, "learning_rate": 9.600811421324999e-05, "loss": 0.5385, "step": 1841 }, { "epoch": 0.5155331654072208, "grad_norm": 0.23622064587622715, "learning_rate": 9.600207229898325e-05, "loss": 0.5378, "step": 1842 }, { "epoch": 0.515813042261405, "grad_norm": 0.23316704314698783, "learning_rate": 9.599602600621353e-05, "loss": 0.5434, "step": 1843 }, { "epoch": 0.5160929191155892, "grad_norm": 0.2365672488806659, "learning_rate": 9.598997533551631e-05, "loss": 0.5536, "step": 1844 }, { "epoch": 0.5163727959697733, "grad_norm": 0.24001992184581208, "learning_rate": 9.598392028746748e-05, "loss": 0.547, "step": 1845 }, { "epoch": 0.5166526728239574, "grad_norm": 0.2270027847107539, "learning_rate": 9.597786086264338e-05, "loss": 0.5757, "step": 1846 }, { "epoch": 0.5169325496781416, "grad_norm": 0.24629004358677026, "learning_rate": 9.597179706162076e-05, "loss": 0.5712, "step": 1847 }, { "epoch": 0.5172124265323258, "grad_norm": 0.22870468213578413, "learning_rate": 9.596572888497677e-05, "loss": 0.5403, "step": 1848 }, { "epoch": 0.51749230338651, "grad_norm": 0.22444490352606403, "learning_rate": 9.595965633328897e-05, "loss": 0.5862, "step": 1849 }, { "epoch": 0.517772180240694, "grad_norm": 0.23245352945553727, "learning_rate": 9.595357940713534e-05, "loss": 0.582, "step": 1850 }, { "epoch": 0.5180520570948782, "grad_norm": 0.23265760512745268, "learning_rate": 9.594749810709432e-05, "loss": 0.5443, "step": 1851 }, { "epoch": 0.5183319339490624, "grad_norm": 0.23765425762096956, "learning_rate": 9.59414124337447e-05, "loss": 0.5586, "step": 1852 }, { "epoch": 0.5186118108032466, "grad_norm": 0.22943262496098119, "learning_rate": 9.593532238766574e-05, "loss": 0.5547, "step": 1853 }, { "epoch": 0.5188916876574308, "grad_norm": 0.24586384683093845, "learning_rate": 9.592922796943707e-05, "loss": 0.5618, "step": 1854 }, { "epoch": 0.5191715645116148, "grad_norm": 0.23118537262405697, "learning_rate": 9.592312917963878e-05, "loss": 0.5403, "step": 1855 }, { "epoch": 0.519451441365799, "grad_norm": 0.23040203657112565, "learning_rate": 9.591702601885135e-05, "loss": 0.5599, "step": 1856 }, { "epoch": 0.5197313182199832, "grad_norm": 0.23825571871577292, "learning_rate": 9.59109184876557e-05, "loss": 0.5767, "step": 1857 }, { "epoch": 0.5200111950741674, "grad_norm": 0.22947366405695055, "learning_rate": 9.59048065866331e-05, "loss": 0.5417, "step": 1858 }, { "epoch": 0.5202910719283516, "grad_norm": 0.23364051222262436, "learning_rate": 9.589869031636533e-05, "loss": 0.5335, "step": 1859 }, { "epoch": 0.5205709487825357, "grad_norm": 0.24928639073870468, "learning_rate": 9.589256967743453e-05, "loss": 0.5496, "step": 1860 }, { "epoch": 0.5208508256367198, "grad_norm": 0.223264011896636, "learning_rate": 9.588644467042327e-05, "loss": 0.5532, "step": 1861 }, { "epoch": 0.521130702490904, "grad_norm": 0.24565336079633493, "learning_rate": 9.58803152959145e-05, "loss": 0.5634, "step": 1862 }, { "epoch": 0.5214105793450882, "grad_norm": 0.22199641199925949, "learning_rate": 9.587418155449167e-05, "loss": 0.5409, "step": 1863 }, { "epoch": 0.5216904561992723, "grad_norm": 0.22608326732484155, "learning_rate": 9.586804344673853e-05, "loss": 0.5509, "step": 1864 }, { "epoch": 0.5219703330534565, "grad_norm": 0.23813917903969617, "learning_rate": 9.586190097323934e-05, "loss": 0.5488, "step": 1865 }, { "epoch": 0.5222502099076406, "grad_norm": 0.24733511129134678, "learning_rate": 9.585575413457877e-05, "loss": 0.6012, "step": 1866 }, { "epoch": 0.5225300867618248, "grad_norm": 0.24835802315135277, "learning_rate": 9.584960293134184e-05, "loss": 0.5847, "step": 1867 }, { "epoch": 0.522809963616009, "grad_norm": 0.22261323703635638, "learning_rate": 9.584344736411405e-05, "loss": 0.5555, "step": 1868 }, { "epoch": 0.5230898404701931, "grad_norm": 0.23134104657282056, "learning_rate": 9.583728743348128e-05, "loss": 0.5683, "step": 1869 }, { "epoch": 0.5233697173243773, "grad_norm": 0.23069707962901834, "learning_rate": 9.583112314002983e-05, "loss": 0.5687, "step": 1870 }, { "epoch": 0.5236495941785614, "grad_norm": 0.23107051966706807, "learning_rate": 9.582495448434643e-05, "loss": 0.538, "step": 1871 }, { "epoch": 0.5239294710327456, "grad_norm": 0.23540799518395647, "learning_rate": 9.581878146701821e-05, "loss": 0.5768, "step": 1872 }, { "epoch": 0.5242093478869297, "grad_norm": 0.2231158508992458, "learning_rate": 9.581260408863272e-05, "loss": 0.5605, "step": 1873 }, { "epoch": 0.5244892247411139, "grad_norm": 0.23665288817701655, "learning_rate": 9.580642234977792e-05, "loss": 0.5521, "step": 1874 }, { "epoch": 0.5247691015952981, "grad_norm": 0.22260080585557304, "learning_rate": 9.580023625104223e-05, "loss": 0.5609, "step": 1875 }, { "epoch": 0.5250489784494822, "grad_norm": 0.2292353822993223, "learning_rate": 9.579404579301441e-05, "loss": 0.5579, "step": 1876 }, { "epoch": 0.5253288553036664, "grad_norm": 0.23344828178509996, "learning_rate": 9.578785097628367e-05, "loss": 0.5791, "step": 1877 }, { "epoch": 0.5256087321578505, "grad_norm": 0.23980135171832784, "learning_rate": 9.578165180143965e-05, "loss": 0.5871, "step": 1878 }, { "epoch": 0.5258886090120347, "grad_norm": 0.23034237676380445, "learning_rate": 9.577544826907238e-05, "loss": 0.5401, "step": 1879 }, { "epoch": 0.5261684858662189, "grad_norm": 0.2379282172099179, "learning_rate": 9.576924037977233e-05, "loss": 0.5398, "step": 1880 }, { "epoch": 0.5264483627204031, "grad_norm": 0.23368080388160048, "learning_rate": 9.576302813413036e-05, "loss": 0.583, "step": 1881 }, { "epoch": 0.5267282395745871, "grad_norm": 0.23033677903394748, "learning_rate": 9.575681153273776e-05, "loss": 0.5751, "step": 1882 }, { "epoch": 0.5270081164287713, "grad_norm": 0.23594766017150454, "learning_rate": 9.575059057618623e-05, "loss": 0.5553, "step": 1883 }, { "epoch": 0.5272879932829555, "grad_norm": 0.24174536855126785, "learning_rate": 9.574436526506788e-05, "loss": 0.5887, "step": 1884 }, { "epoch": 0.5275678701371397, "grad_norm": 0.23461999323173222, "learning_rate": 9.573813559997522e-05, "loss": 0.5494, "step": 1885 }, { "epoch": 0.5278477469913239, "grad_norm": 0.23800563209230874, "learning_rate": 9.573190158150122e-05, "loss": 0.5688, "step": 1886 }, { "epoch": 0.5281276238455079, "grad_norm": 0.2198457840744784, "learning_rate": 9.572566321023925e-05, "loss": 0.5599, "step": 1887 }, { "epoch": 0.5284075006996921, "grad_norm": 0.2227507524362117, "learning_rate": 9.571942048678306e-05, "loss": 0.5601, "step": 1888 }, { "epoch": 0.5286873775538763, "grad_norm": 0.23017247054839587, "learning_rate": 9.571317341172681e-05, "loss": 0.5635, "step": 1889 }, { "epoch": 0.5289672544080605, "grad_norm": 0.23356722144675973, "learning_rate": 9.570692198566515e-05, "loss": 0.5363, "step": 1890 }, { "epoch": 0.5292471312622447, "grad_norm": 0.22272007526605261, "learning_rate": 9.570066620919307e-05, "loss": 0.5772, "step": 1891 }, { "epoch": 0.5295270081164287, "grad_norm": 0.22992922392480677, "learning_rate": 9.569440608290601e-05, "loss": 0.5502, "step": 1892 }, { "epoch": 0.5298068849706129, "grad_norm": 0.25133674677929696, "learning_rate": 9.568814160739978e-05, "loss": 0.5577, "step": 1893 }, { "epoch": 0.5300867618247971, "grad_norm": 0.23222311214547842, "learning_rate": 9.568187278327067e-05, "loss": 0.5655, "step": 1894 }, { "epoch": 0.5303666386789813, "grad_norm": 0.23073644663937348, "learning_rate": 9.567559961111534e-05, "loss": 0.5331, "step": 1895 }, { "epoch": 0.5306465155331654, "grad_norm": 0.23651252528460873, "learning_rate": 9.566932209153088e-05, "loss": 0.5549, "step": 1896 }, { "epoch": 0.5309263923873496, "grad_norm": 0.244109438111545, "learning_rate": 9.566304022511477e-05, "loss": 0.5634, "step": 1897 }, { "epoch": 0.5312062692415337, "grad_norm": 0.23293967146364056, "learning_rate": 9.565675401246494e-05, "loss": 0.5852, "step": 1898 }, { "epoch": 0.5314861460957179, "grad_norm": 0.23870681376181876, "learning_rate": 9.565046345417969e-05, "loss": 0.5733, "step": 1899 }, { "epoch": 0.531766022949902, "grad_norm": 0.23187763560436186, "learning_rate": 9.564416855085781e-05, "loss": 0.5661, "step": 1900 }, { "epoch": 0.5320458998040862, "grad_norm": 0.24612542749443875, "learning_rate": 9.56378693030984e-05, "loss": 0.5465, "step": 1901 }, { "epoch": 0.5323257766582704, "grad_norm": 0.23013071753066777, "learning_rate": 9.563156571150105e-05, "loss": 0.5661, "step": 1902 }, { "epoch": 0.5326056535124545, "grad_norm": 0.23472383974694802, "learning_rate": 9.562525777666572e-05, "loss": 0.5559, "step": 1903 }, { "epoch": 0.5328855303666387, "grad_norm": 0.23324049855111026, "learning_rate": 9.561894549919283e-05, "loss": 0.5505, "step": 1904 }, { "epoch": 0.5331654072208228, "grad_norm": 0.2406931330869733, "learning_rate": 9.561262887968317e-05, "loss": 0.5708, "step": 1905 }, { "epoch": 0.533445284075007, "grad_norm": 0.21556770800085526, "learning_rate": 9.560630791873797e-05, "loss": 0.5675, "step": 1906 }, { "epoch": 0.5337251609291912, "grad_norm": 0.23629431823799524, "learning_rate": 9.559998261695883e-05, "loss": 0.5668, "step": 1907 }, { "epoch": 0.5340050377833753, "grad_norm": 0.23188856043931416, "learning_rate": 9.559365297494784e-05, "loss": 0.552, "step": 1908 }, { "epoch": 0.5342849146375594, "grad_norm": 0.2309807204022842, "learning_rate": 9.558731899330745e-05, "loss": 0.5657, "step": 1909 }, { "epoch": 0.5345647914917436, "grad_norm": 0.22570161807706715, "learning_rate": 9.558098067264052e-05, "loss": 0.5946, "step": 1910 }, { "epoch": 0.5348446683459278, "grad_norm": 0.2361099600158028, "learning_rate": 9.557463801355032e-05, "loss": 0.5611, "step": 1911 }, { "epoch": 0.535124545200112, "grad_norm": 0.2348866086463897, "learning_rate": 9.556829101664057e-05, "loss": 0.5482, "step": 1912 }, { "epoch": 0.5354044220542961, "grad_norm": 0.22647267846564423, "learning_rate": 9.55619396825154e-05, "loss": 0.545, "step": 1913 }, { "epoch": 0.5356842989084802, "grad_norm": 0.23909560172612637, "learning_rate": 9.555558401177926e-05, "loss": 0.5503, "step": 1914 }, { "epoch": 0.5359641757626644, "grad_norm": 0.23514676112645405, "learning_rate": 9.554922400503718e-05, "loss": 0.5464, "step": 1915 }, { "epoch": 0.5362440526168486, "grad_norm": 0.24219390194935297, "learning_rate": 9.554285966289445e-05, "loss": 0.581, "step": 1916 }, { "epoch": 0.5365239294710328, "grad_norm": 0.26667302882813204, "learning_rate": 9.553649098595682e-05, "loss": 0.5611, "step": 1917 }, { "epoch": 0.536803806325217, "grad_norm": 0.25536556951793493, "learning_rate": 9.553011797483052e-05, "loss": 0.5867, "step": 1918 }, { "epoch": 0.537083683179401, "grad_norm": 0.24412883483923561, "learning_rate": 9.55237406301221e-05, "loss": 0.5593, "step": 1919 }, { "epoch": 0.5373635600335852, "grad_norm": 0.2426078288284984, "learning_rate": 9.551735895243857e-05, "loss": 0.555, "step": 1920 }, { "epoch": 0.5376434368877694, "grad_norm": 0.23475525081658452, "learning_rate": 9.551097294238734e-05, "loss": 0.5845, "step": 1921 }, { "epoch": 0.5379233137419536, "grad_norm": 0.24906252543249288, "learning_rate": 9.550458260057622e-05, "loss": 0.553, "step": 1922 }, { "epoch": 0.5382031905961377, "grad_norm": 0.23275761033329403, "learning_rate": 9.549818792761347e-05, "loss": 0.5779, "step": 1923 }, { "epoch": 0.5384830674503218, "grad_norm": 0.2348949256678806, "learning_rate": 9.549178892410772e-05, "loss": 0.5668, "step": 1924 }, { "epoch": 0.538762944304506, "grad_norm": 0.24253072449017118, "learning_rate": 9.548538559066804e-05, "loss": 0.5672, "step": 1925 }, { "epoch": 0.5390428211586902, "grad_norm": 0.24269874600361666, "learning_rate": 9.54789779279039e-05, "loss": 0.5849, "step": 1926 }, { "epoch": 0.5393226980128744, "grad_norm": 0.22165250504144346, "learning_rate": 9.547256593642517e-05, "loss": 0.547, "step": 1927 }, { "epoch": 0.5396025748670585, "grad_norm": 0.23225886142882915, "learning_rate": 9.546614961684217e-05, "loss": 0.583, "step": 1928 }, { "epoch": 0.5398824517212426, "grad_norm": 0.24075858244204984, "learning_rate": 9.545972896976561e-05, "loss": 0.5821, "step": 1929 }, { "epoch": 0.5401623285754268, "grad_norm": 0.2289433754905644, "learning_rate": 9.545330399580659e-05, "loss": 0.5443, "step": 1930 }, { "epoch": 0.540442205429611, "grad_norm": 0.23766814224236468, "learning_rate": 9.544687469557666e-05, "loss": 0.5623, "step": 1931 }, { "epoch": 0.5407220822837951, "grad_norm": 0.2356527757625123, "learning_rate": 9.544044106968777e-05, "loss": 0.5587, "step": 1932 }, { "epoch": 0.5410019591379793, "grad_norm": 0.23257587339724806, "learning_rate": 9.543400311875225e-05, "loss": 0.562, "step": 1933 }, { "epoch": 0.5412818359921634, "grad_norm": 0.23234528561284273, "learning_rate": 9.542756084338289e-05, "loss": 0.5363, "step": 1934 }, { "epoch": 0.5415617128463476, "grad_norm": 0.22934883021742244, "learning_rate": 9.542111424419286e-05, "loss": 0.569, "step": 1935 }, { "epoch": 0.5418415897005318, "grad_norm": 0.22697340158798782, "learning_rate": 9.541466332179576e-05, "loss": 0.5651, "step": 1936 }, { "epoch": 0.5421214665547159, "grad_norm": 0.23855856743322248, "learning_rate": 9.540820807680557e-05, "loss": 0.542, "step": 1937 }, { "epoch": 0.5424013434089001, "grad_norm": 0.23493788787767833, "learning_rate": 9.540174850983673e-05, "loss": 0.542, "step": 1938 }, { "epoch": 0.5426812202630843, "grad_norm": 0.2350779825278462, "learning_rate": 9.539528462150405e-05, "loss": 0.5648, "step": 1939 }, { "epoch": 0.5429610971172684, "grad_norm": 0.2391293156386913, "learning_rate": 9.538881641242276e-05, "loss": 0.5532, "step": 1940 }, { "epoch": 0.5432409739714525, "grad_norm": 0.23997894628745364, "learning_rate": 9.538234388320855e-05, "loss": 0.5667, "step": 1941 }, { "epoch": 0.5435208508256367, "grad_norm": 0.23574334521382298, "learning_rate": 9.537586703447743e-05, "loss": 0.576, "step": 1942 }, { "epoch": 0.5438007276798209, "grad_norm": 0.2385474638070604, "learning_rate": 9.536938586684587e-05, "loss": 0.5424, "step": 1943 }, { "epoch": 0.5440806045340051, "grad_norm": 0.24772515180100296, "learning_rate": 9.536290038093078e-05, "loss": 0.5532, "step": 1944 }, { "epoch": 0.5443604813881892, "grad_norm": 0.23021456892542566, "learning_rate": 9.535641057734945e-05, "loss": 0.5579, "step": 1945 }, { "epoch": 0.5446403582423733, "grad_norm": 0.2311219424121253, "learning_rate": 9.534991645671958e-05, "loss": 0.5678, "step": 1946 }, { "epoch": 0.5449202350965575, "grad_norm": 0.22398028919231158, "learning_rate": 9.534341801965926e-05, "loss": 0.5305, "step": 1947 }, { "epoch": 0.5452001119507417, "grad_norm": 0.23108011550849794, "learning_rate": 9.533691526678705e-05, "loss": 0.5838, "step": 1948 }, { "epoch": 0.5454799888049259, "grad_norm": 0.23265157506126793, "learning_rate": 9.533040819872185e-05, "loss": 0.5656, "step": 1949 }, { "epoch": 0.5457598656591099, "grad_norm": 0.288365000981492, "learning_rate": 9.532389681608305e-05, "loss": 0.5606, "step": 1950 }, { "epoch": 0.5460397425132941, "grad_norm": 0.2321060095008162, "learning_rate": 9.531738111949036e-05, "loss": 0.5795, "step": 1951 }, { "epoch": 0.5463196193674783, "grad_norm": 0.22758287819714584, "learning_rate": 9.531086110956398e-05, "loss": 0.5372, "step": 1952 }, { "epoch": 0.5465994962216625, "grad_norm": 0.23224673201452703, "learning_rate": 9.530433678692447e-05, "loss": 0.5572, "step": 1953 }, { "epoch": 0.5468793730758467, "grad_norm": 0.21852267247765625, "learning_rate": 9.529780815219284e-05, "loss": 0.535, "step": 1954 }, { "epoch": 0.5471592499300307, "grad_norm": 0.2297540027147699, "learning_rate": 9.529127520599046e-05, "loss": 0.5504, "step": 1955 }, { "epoch": 0.5474391267842149, "grad_norm": 0.23915485872593667, "learning_rate": 9.528473794893917e-05, "loss": 0.5707, "step": 1956 }, { "epoch": 0.5477190036383991, "grad_norm": 0.22453867541566563, "learning_rate": 9.527819638166117e-05, "loss": 0.5759, "step": 1957 }, { "epoch": 0.5479988804925833, "grad_norm": 0.22199196523457324, "learning_rate": 9.527165050477909e-05, "loss": 0.537, "step": 1958 }, { "epoch": 0.5482787573467675, "grad_norm": 0.24859979390155498, "learning_rate": 9.526510031891598e-05, "loss": 0.5718, "step": 1959 }, { "epoch": 0.5485586342009516, "grad_norm": 0.228265159824563, "learning_rate": 9.525854582469528e-05, "loss": 0.5482, "step": 1960 }, { "epoch": 0.5488385110551357, "grad_norm": 0.22451337588017242, "learning_rate": 9.525198702274087e-05, "loss": 0.5296, "step": 1961 }, { "epoch": 0.5491183879093199, "grad_norm": 0.2314849500943932, "learning_rate": 9.524542391367699e-05, "loss": 0.5706, "step": 1962 }, { "epoch": 0.5493982647635041, "grad_norm": 0.22769313524812834, "learning_rate": 9.523885649812833e-05, "loss": 0.5715, "step": 1963 }, { "epoch": 0.5496781416176882, "grad_norm": 0.23231869619896636, "learning_rate": 9.523228477672001e-05, "loss": 0.5658, "step": 1964 }, { "epoch": 0.5499580184718724, "grad_norm": 0.24055911201147767, "learning_rate": 9.52257087500775e-05, "loss": 0.5708, "step": 1965 }, { "epoch": 0.5502378953260565, "grad_norm": 0.22845147101402621, "learning_rate": 9.521912841882672e-05, "loss": 0.5656, "step": 1966 }, { "epoch": 0.5505177721802407, "grad_norm": 0.23328848827384172, "learning_rate": 9.521254378359398e-05, "loss": 0.5321, "step": 1967 }, { "epoch": 0.5507976490344249, "grad_norm": 0.22460346332344272, "learning_rate": 9.520595484500602e-05, "loss": 0.546, "step": 1968 }, { "epoch": 0.551077525888609, "grad_norm": 0.23776827874011325, "learning_rate": 9.519936160368998e-05, "loss": 0.5699, "step": 1969 }, { "epoch": 0.5513574027427932, "grad_norm": 0.23351372187371927, "learning_rate": 9.519276406027339e-05, "loss": 0.5493, "step": 1970 }, { "epoch": 0.5516372795969773, "grad_norm": 0.2304708018654687, "learning_rate": 9.518616221538424e-05, "loss": 0.5427, "step": 1971 }, { "epoch": 0.5519171564511615, "grad_norm": 0.23232900384938365, "learning_rate": 9.517955606965086e-05, "loss": 0.5744, "step": 1972 }, { "epoch": 0.5521970333053456, "grad_norm": 0.22698205075703637, "learning_rate": 9.517294562370205e-05, "loss": 0.542, "step": 1973 }, { "epoch": 0.5524769101595298, "grad_norm": 0.22764760603881273, "learning_rate": 9.516633087816699e-05, "loss": 0.574, "step": 1974 }, { "epoch": 0.552756787013714, "grad_norm": 0.22869155706946398, "learning_rate": 9.515971183367527e-05, "loss": 0.5568, "step": 1975 }, { "epoch": 0.5530366638678981, "grad_norm": 0.23575186347623067, "learning_rate": 9.515308849085691e-05, "loss": 0.5566, "step": 1976 }, { "epoch": 0.5533165407220823, "grad_norm": 0.24052513468678355, "learning_rate": 9.514646085034232e-05, "loss": 0.5533, "step": 1977 }, { "epoch": 0.5535964175762664, "grad_norm": 0.23553457119270182, "learning_rate": 9.513982891276232e-05, "loss": 0.5643, "step": 1978 }, { "epoch": 0.5538762944304506, "grad_norm": 0.2278446877532151, "learning_rate": 9.513319267874812e-05, "loss": 0.5753, "step": 1979 }, { "epoch": 0.5541561712846348, "grad_norm": 0.2233103849978068, "learning_rate": 9.512655214893138e-05, "loss": 0.5532, "step": 1980 }, { "epoch": 0.554436048138819, "grad_norm": 0.2366373117514305, "learning_rate": 9.511990732394416e-05, "loss": 0.5749, "step": 1981 }, { "epoch": 0.554715924993003, "grad_norm": 0.24265896290156705, "learning_rate": 9.51132582044189e-05, "loss": 0.5791, "step": 1982 }, { "epoch": 0.5549958018471872, "grad_norm": 0.2220396722562633, "learning_rate": 9.510660479098847e-05, "loss": 0.5254, "step": 1983 }, { "epoch": 0.5552756787013714, "grad_norm": 0.23307690627014327, "learning_rate": 9.509994708428615e-05, "loss": 0.5479, "step": 1984 }, { "epoch": 0.5555555555555556, "grad_norm": 0.23876319960416012, "learning_rate": 9.509328508494563e-05, "loss": 0.5527, "step": 1985 }, { "epoch": 0.5558354324097398, "grad_norm": 0.23872847969233363, "learning_rate": 9.5086618793601e-05, "loss": 0.5706, "step": 1986 }, { "epoch": 0.5561153092639238, "grad_norm": 0.22964560955999136, "learning_rate": 9.507994821088675e-05, "loss": 0.5516, "step": 1987 }, { "epoch": 0.556395186118108, "grad_norm": 0.2300913379025995, "learning_rate": 9.50732733374378e-05, "loss": 0.5738, "step": 1988 }, { "epoch": 0.5566750629722922, "grad_norm": 0.22915071668113138, "learning_rate": 9.506659417388945e-05, "loss": 0.5657, "step": 1989 }, { "epoch": 0.5569549398264764, "grad_norm": 0.23357691835678449, "learning_rate": 9.505991072087747e-05, "loss": 0.5728, "step": 1990 }, { "epoch": 0.5572348166806605, "grad_norm": 0.23183903513631868, "learning_rate": 9.505322297903794e-05, "loss": 0.5484, "step": 1991 }, { "epoch": 0.5575146935348446, "grad_norm": 0.2349802066578527, "learning_rate": 9.504653094900744e-05, "loss": 0.5647, "step": 1992 }, { "epoch": 0.5577945703890288, "grad_norm": 0.2371316694083046, "learning_rate": 9.503983463142292e-05, "loss": 0.5898, "step": 1993 }, { "epoch": 0.558074447243213, "grad_norm": 0.2273397184762064, "learning_rate": 9.50331340269217e-05, "loss": 0.5588, "step": 1994 }, { "epoch": 0.5583543240973972, "grad_norm": 0.23348817571906577, "learning_rate": 9.502642913614161e-05, "loss": 0.5592, "step": 1995 }, { "epoch": 0.5586342009515813, "grad_norm": 0.2152881868863065, "learning_rate": 9.501971995972078e-05, "loss": 0.562, "step": 1996 }, { "epoch": 0.5589140778057654, "grad_norm": 0.21872152794288896, "learning_rate": 9.501300649829781e-05, "loss": 0.5823, "step": 1997 }, { "epoch": 0.5591939546599496, "grad_norm": 0.2386995145124459, "learning_rate": 9.500628875251168e-05, "loss": 0.5693, "step": 1998 }, { "epoch": 0.5594738315141338, "grad_norm": 0.22862033139971408, "learning_rate": 9.499956672300178e-05, "loss": 0.5944, "step": 1999 }, { "epoch": 0.559753708368318, "grad_norm": 0.22519690744331325, "learning_rate": 9.499284041040797e-05, "loss": 0.5784, "step": 2000 }, { "epoch": 0.5600335852225021, "grad_norm": 0.2210358036618059, "learning_rate": 9.49861098153704e-05, "loss": 0.5369, "step": 2001 }, { "epoch": 0.5603134620766863, "grad_norm": 0.22415424156405234, "learning_rate": 9.497937493852971e-05, "loss": 0.5639, "step": 2002 }, { "epoch": 0.5605933389308704, "grad_norm": 0.23794546338939157, "learning_rate": 9.497263578052695e-05, "loss": 0.5712, "step": 2003 }, { "epoch": 0.5608732157850546, "grad_norm": 0.2183833771180705, "learning_rate": 9.496589234200355e-05, "loss": 0.5306, "step": 2004 }, { "epoch": 0.5611530926392387, "grad_norm": 0.22839249865463046, "learning_rate": 9.495914462360134e-05, "loss": 0.5573, "step": 2005 }, { "epoch": 0.5614329694934229, "grad_norm": 0.23122609821488663, "learning_rate": 9.495239262596258e-05, "loss": 0.5494, "step": 2006 }, { "epoch": 0.5617128463476071, "grad_norm": 0.22952413883185982, "learning_rate": 9.494563634972994e-05, "loss": 0.5246, "step": 2007 }, { "epoch": 0.5619927232017912, "grad_norm": 0.2227247431464232, "learning_rate": 9.493887579554647e-05, "loss": 0.5555, "step": 2008 }, { "epoch": 0.5622726000559753, "grad_norm": 0.22670809691968316, "learning_rate": 9.493211096405564e-05, "loss": 0.5216, "step": 2009 }, { "epoch": 0.5625524769101595, "grad_norm": 0.23020903542611063, "learning_rate": 9.492534185590134e-05, "loss": 0.5717, "step": 2010 }, { "epoch": 0.5628323537643437, "grad_norm": 0.22910534158633578, "learning_rate": 9.491856847172786e-05, "loss": 0.551, "step": 2011 }, { "epoch": 0.5631122306185279, "grad_norm": 0.24505507442272167, "learning_rate": 9.491179081217989e-05, "loss": 0.5409, "step": 2012 }, { "epoch": 0.563392107472712, "grad_norm": 0.22108833619567092, "learning_rate": 9.490500887790255e-05, "loss": 0.5427, "step": 2013 }, { "epoch": 0.5636719843268961, "grad_norm": 0.23810167959392928, "learning_rate": 9.48982226695413e-05, "loss": 0.571, "step": 2014 }, { "epoch": 0.5639518611810803, "grad_norm": 0.23758128510274953, "learning_rate": 9.48914321877421e-05, "loss": 0.5501, "step": 2015 }, { "epoch": 0.5642317380352645, "grad_norm": 0.24894816413938858, "learning_rate": 9.488463743315126e-05, "loss": 0.5701, "step": 2016 }, { "epoch": 0.5645116148894487, "grad_norm": 0.23635924382790377, "learning_rate": 9.487783840641551e-05, "loss": 0.5672, "step": 2017 }, { "epoch": 0.5647914917436329, "grad_norm": 0.222049319952158, "learning_rate": 9.487103510818197e-05, "loss": 0.559, "step": 2018 }, { "epoch": 0.5650713685978169, "grad_norm": 0.23100028165490633, "learning_rate": 9.486422753909819e-05, "loss": 0.5255, "step": 2019 }, { "epoch": 0.5653512454520011, "grad_norm": 0.22600325418508446, "learning_rate": 9.485741569981214e-05, "loss": 0.563, "step": 2020 }, { "epoch": 0.5656311223061853, "grad_norm": 0.2469821168912917, "learning_rate": 9.485059959097213e-05, "loss": 0.5394, "step": 2021 }, { "epoch": 0.5659109991603695, "grad_norm": 0.23842274970505892, "learning_rate": 9.484377921322697e-05, "loss": 0.5588, "step": 2022 }, { "epoch": 0.5661908760145536, "grad_norm": 0.2277464279718841, "learning_rate": 9.483695456722579e-05, "loss": 0.5651, "step": 2023 }, { "epoch": 0.5664707528687377, "grad_norm": 0.2289879156338395, "learning_rate": 9.483012565361819e-05, "loss": 0.591, "step": 2024 }, { "epoch": 0.5667506297229219, "grad_norm": 0.24315991038974277, "learning_rate": 9.482329247305413e-05, "loss": 0.5579, "step": 2025 }, { "epoch": 0.5670305065771061, "grad_norm": 0.23791478263652036, "learning_rate": 9.4816455026184e-05, "loss": 0.5618, "step": 2026 }, { "epoch": 0.5673103834312903, "grad_norm": 0.22770686606928436, "learning_rate": 9.48096133136586e-05, "loss": 0.5658, "step": 2027 }, { "epoch": 0.5675902602854744, "grad_norm": 0.22992843543633568, "learning_rate": 9.480276733612914e-05, "loss": 0.5565, "step": 2028 }, { "epoch": 0.5678701371396585, "grad_norm": 0.2324655434035538, "learning_rate": 9.479591709424717e-05, "loss": 0.5427, "step": 2029 }, { "epoch": 0.5681500139938427, "grad_norm": 0.22765992848354477, "learning_rate": 9.478906258866478e-05, "loss": 0.5453, "step": 2030 }, { "epoch": 0.5684298908480269, "grad_norm": 0.23022522938379125, "learning_rate": 9.478220382003431e-05, "loss": 0.5764, "step": 2031 }, { "epoch": 0.568709767702211, "grad_norm": 0.2408048445919462, "learning_rate": 9.477534078900864e-05, "loss": 0.5716, "step": 2032 }, { "epoch": 0.5689896445563952, "grad_norm": 0.2342047777837725, "learning_rate": 9.476847349624097e-05, "loss": 0.5481, "step": 2033 }, { "epoch": 0.5692695214105793, "grad_norm": 0.22897053654830277, "learning_rate": 9.47616019423849e-05, "loss": 0.5698, "step": 2034 }, { "epoch": 0.5695493982647635, "grad_norm": 0.2295180393979441, "learning_rate": 9.475472612809452e-05, "loss": 0.547, "step": 2035 }, { "epoch": 0.5698292751189477, "grad_norm": 0.2332745513887627, "learning_rate": 9.474784605402428e-05, "loss": 0.5625, "step": 2036 }, { "epoch": 0.5701091519731318, "grad_norm": 0.23148919434798484, "learning_rate": 9.4740961720829e-05, "loss": 0.5834, "step": 2037 }, { "epoch": 0.570389028827316, "grad_norm": 0.2252512855750053, "learning_rate": 9.473407312916393e-05, "loss": 0.5468, "step": 2038 }, { "epoch": 0.5706689056815002, "grad_norm": 0.22787780671938834, "learning_rate": 9.472718027968474e-05, "loss": 0.5723, "step": 2039 }, { "epoch": 0.5709487825356843, "grad_norm": 0.23796730720112072, "learning_rate": 9.472028317304748e-05, "loss": 0.5626, "step": 2040 }, { "epoch": 0.5712286593898684, "grad_norm": 0.23279731883326624, "learning_rate": 9.471338180990868e-05, "loss": 0.5663, "step": 2041 }, { "epoch": 0.5715085362440526, "grad_norm": 0.22774061863041195, "learning_rate": 9.470647619092514e-05, "loss": 0.5417, "step": 2042 }, { "epoch": 0.5717884130982368, "grad_norm": 0.23274343346879015, "learning_rate": 9.469956631675418e-05, "loss": 0.5333, "step": 2043 }, { "epoch": 0.572068289952421, "grad_norm": 0.2214570256037127, "learning_rate": 9.469265218805348e-05, "loss": 0.5541, "step": 2044 }, { "epoch": 0.572348166806605, "grad_norm": 0.2347970692270651, "learning_rate": 9.468573380548112e-05, "loss": 0.5399, "step": 2045 }, { "epoch": 0.5726280436607892, "grad_norm": 0.23428890664169302, "learning_rate": 9.467881116969561e-05, "loss": 0.5659, "step": 2046 }, { "epoch": 0.5729079205149734, "grad_norm": 0.2284076394558981, "learning_rate": 9.467188428135585e-05, "loss": 0.5509, "step": 2047 }, { "epoch": 0.5731877973691576, "grad_norm": 0.22483696040078632, "learning_rate": 9.466495314112114e-05, "loss": 0.5367, "step": 2048 }, { "epoch": 0.5734676742233418, "grad_norm": 0.2354170577344445, "learning_rate": 9.465801774965118e-05, "loss": 0.5399, "step": 2049 }, { "epoch": 0.5737475510775258, "grad_norm": 0.21579766700944755, "learning_rate": 9.46510781076061e-05, "loss": 0.5206, "step": 2050 }, { "epoch": 0.57402742793171, "grad_norm": 0.2394430793632505, "learning_rate": 9.464413421564642e-05, "loss": 0.5456, "step": 2051 }, { "epoch": 0.5743073047858942, "grad_norm": 0.227974969171813, "learning_rate": 9.463718607443307e-05, "loss": 0.5348, "step": 2052 }, { "epoch": 0.5745871816400784, "grad_norm": 0.2319299252666734, "learning_rate": 9.463023368462733e-05, "loss": 0.5631, "step": 2053 }, { "epoch": 0.5748670584942626, "grad_norm": 0.22631513111151783, "learning_rate": 9.462327704689098e-05, "loss": 0.5554, "step": 2054 }, { "epoch": 0.5751469353484466, "grad_norm": 0.23219123547444884, "learning_rate": 9.461631616188616e-05, "loss": 0.5574, "step": 2055 }, { "epoch": 0.5754268122026308, "grad_norm": 0.22546526779744472, "learning_rate": 9.460935103027538e-05, "loss": 0.5432, "step": 2056 }, { "epoch": 0.575706689056815, "grad_norm": 0.22897797719256435, "learning_rate": 9.460238165272159e-05, "loss": 0.5477, "step": 2057 }, { "epoch": 0.5759865659109992, "grad_norm": 0.227820803915676, "learning_rate": 9.459540802988817e-05, "loss": 0.531, "step": 2058 }, { "epoch": 0.5762664427651834, "grad_norm": 0.23181545499850942, "learning_rate": 9.458843016243884e-05, "loss": 0.5718, "step": 2059 }, { "epoch": 0.5765463196193675, "grad_norm": 0.22065737580260858, "learning_rate": 9.458144805103778e-05, "loss": 0.5657, "step": 2060 }, { "epoch": 0.5768261964735516, "grad_norm": 0.24229414516266962, "learning_rate": 9.457446169634953e-05, "loss": 0.5426, "step": 2061 }, { "epoch": 0.5771060733277358, "grad_norm": 0.23351271188728046, "learning_rate": 9.456747109903907e-05, "loss": 0.5701, "step": 2062 }, { "epoch": 0.57738595018192, "grad_norm": 0.2275805490827519, "learning_rate": 9.456047625977178e-05, "loss": 0.544, "step": 2063 }, { "epoch": 0.5776658270361041, "grad_norm": 0.23367033225052225, "learning_rate": 9.455347717921341e-05, "loss": 0.562, "step": 2064 }, { "epoch": 0.5779457038902883, "grad_norm": 0.23151158124130852, "learning_rate": 9.454647385803012e-05, "loss": 0.5403, "step": 2065 }, { "epoch": 0.5782255807444724, "grad_norm": 0.23553604374552775, "learning_rate": 9.453946629688855e-05, "loss": 0.5518, "step": 2066 }, { "epoch": 0.5785054575986566, "grad_norm": 0.22536131300420634, "learning_rate": 9.453245449645563e-05, "loss": 0.537, "step": 2067 }, { "epoch": 0.5787853344528407, "grad_norm": 0.23704728389979268, "learning_rate": 9.452543845739876e-05, "loss": 0.5375, "step": 2068 }, { "epoch": 0.5790652113070249, "grad_norm": 0.2343371150333796, "learning_rate": 9.451841818038575e-05, "loss": 0.5819, "step": 2069 }, { "epoch": 0.5793450881612091, "grad_norm": 0.24122881266900179, "learning_rate": 9.451139366608477e-05, "loss": 0.5602, "step": 2070 }, { "epoch": 0.5796249650153932, "grad_norm": 0.22664015045862007, "learning_rate": 9.450436491516444e-05, "loss": 0.5506, "step": 2071 }, { "epoch": 0.5799048418695774, "grad_norm": 0.22147445817593867, "learning_rate": 9.449733192829373e-05, "loss": 0.5587, "step": 2072 }, { "epoch": 0.5801847187237615, "grad_norm": 0.2271617764396628, "learning_rate": 9.449029470614206e-05, "loss": 0.5761, "step": 2073 }, { "epoch": 0.5804645955779457, "grad_norm": 0.22847100595534037, "learning_rate": 9.448325324937925e-05, "loss": 0.5761, "step": 2074 }, { "epoch": 0.5807444724321299, "grad_norm": 0.22553840101117412, "learning_rate": 9.447620755867548e-05, "loss": 0.5141, "step": 2075 }, { "epoch": 0.581024349286314, "grad_norm": 0.22621686515287362, "learning_rate": 9.44691576347014e-05, "loss": 0.5453, "step": 2076 }, { "epoch": 0.5813042261404981, "grad_norm": 0.22331804582221645, "learning_rate": 9.446210347812801e-05, "loss": 0.5601, "step": 2077 }, { "epoch": 0.5815841029946823, "grad_norm": 0.22232235898316122, "learning_rate": 9.445504508962671e-05, "loss": 0.5644, "step": 2078 }, { "epoch": 0.5818639798488665, "grad_norm": 0.2336489789496591, "learning_rate": 9.444798246986933e-05, "loss": 0.5323, "step": 2079 }, { "epoch": 0.5821438567030507, "grad_norm": 0.2416382159368307, "learning_rate": 9.44409156195281e-05, "loss": 0.5601, "step": 2080 }, { "epoch": 0.5824237335572349, "grad_norm": 0.24598405587353261, "learning_rate": 9.443384453927567e-05, "loss": 0.5576, "step": 2081 }, { "epoch": 0.5827036104114189, "grad_norm": 0.23820692785331665, "learning_rate": 9.442676922978503e-05, "loss": 0.5525, "step": 2082 }, { "epoch": 0.5829834872656031, "grad_norm": 0.22371910029075298, "learning_rate": 9.441968969172964e-05, "loss": 0.5475, "step": 2083 }, { "epoch": 0.5832633641197873, "grad_norm": 0.23579026492566837, "learning_rate": 9.441260592578329e-05, "loss": 0.5483, "step": 2084 }, { "epoch": 0.5835432409739715, "grad_norm": 0.23040794838373302, "learning_rate": 9.440551793262027e-05, "loss": 0.5787, "step": 2085 }, { "epoch": 0.5838231178281557, "grad_norm": 0.21907945983694874, "learning_rate": 9.439842571291521e-05, "loss": 0.5086, "step": 2086 }, { "epoch": 0.5841029946823397, "grad_norm": 0.2319913619380196, "learning_rate": 9.439132926734313e-05, "loss": 0.5672, "step": 2087 }, { "epoch": 0.5843828715365239, "grad_norm": 0.39337399651224053, "learning_rate": 9.438422859657947e-05, "loss": 0.5676, "step": 2088 }, { "epoch": 0.5846627483907081, "grad_norm": 0.2183883009142669, "learning_rate": 9.43771237013001e-05, "loss": 0.5744, "step": 2089 }, { "epoch": 0.5849426252448923, "grad_norm": 5.779487133580226, "learning_rate": 9.437001458218127e-05, "loss": 0.6276, "step": 2090 }, { "epoch": 0.5852225020990764, "grad_norm": 0.22071823141183178, "learning_rate": 9.43629012398996e-05, "loss": 0.5385, "step": 2091 }, { "epoch": 0.5855023789532605, "grad_norm": 0.22604144228491943, "learning_rate": 9.435578367513215e-05, "loss": 0.5399, "step": 2092 }, { "epoch": 0.5857822558074447, "grad_norm": 0.23461762169954076, "learning_rate": 9.434866188855641e-05, "loss": 0.5404, "step": 2093 }, { "epoch": 0.5860621326616289, "grad_norm": 0.22303629652425513, "learning_rate": 9.434153588085019e-05, "loss": 0.5256, "step": 2094 }, { "epoch": 0.5863420095158131, "grad_norm": 0.47065174666148146, "learning_rate": 9.433440565269178e-05, "loss": 0.5437, "step": 2095 }, { "epoch": 0.5866218863699972, "grad_norm": 0.24230721619534515, "learning_rate": 9.432727120475981e-05, "loss": 0.5403, "step": 2096 }, { "epoch": 0.5869017632241813, "grad_norm": 0.25008885174940576, "learning_rate": 9.432013253773337e-05, "loss": 0.5488, "step": 2097 }, { "epoch": 0.5871816400783655, "grad_norm": 0.2436018783912833, "learning_rate": 9.43129896522919e-05, "loss": 0.5437, "step": 2098 }, { "epoch": 0.5874615169325497, "grad_norm": 0.2377397180240944, "learning_rate": 9.430584254911527e-05, "loss": 0.5373, "step": 2099 }, { "epoch": 0.5877413937867338, "grad_norm": 0.2554239950571331, "learning_rate": 9.429869122888375e-05, "loss": 0.5634, "step": 2100 }, { "epoch": 0.588021270640918, "grad_norm": 0.3419522755702989, "learning_rate": 9.4291535692278e-05, "loss": 0.5568, "step": 2101 }, { "epoch": 0.5883011474951022, "grad_norm": 0.22386569521000957, "learning_rate": 9.428437593997909e-05, "loss": 0.5492, "step": 2102 }, { "epoch": 0.5885810243492863, "grad_norm": 0.23803678936879716, "learning_rate": 9.42772119726685e-05, "loss": 0.542, "step": 2103 }, { "epoch": 0.5888609012034705, "grad_norm": 0.23133509004822347, "learning_rate": 9.427004379102809e-05, "loss": 0.5495, "step": 2104 }, { "epoch": 0.5891407780576546, "grad_norm": 0.23135027895129517, "learning_rate": 9.426287139574012e-05, "loss": 0.5457, "step": 2105 }, { "epoch": 0.5894206549118388, "grad_norm": 0.22859640564954456, "learning_rate": 9.42556947874873e-05, "loss": 0.5468, "step": 2106 }, { "epoch": 0.589700531766023, "grad_norm": 0.3210496124769348, "learning_rate": 9.424851396695267e-05, "loss": 0.5367, "step": 2107 }, { "epoch": 0.5899804086202071, "grad_norm": 0.23479468230687703, "learning_rate": 9.42413289348197e-05, "loss": 0.5442, "step": 2108 }, { "epoch": 0.5902602854743912, "grad_norm": 0.3436130182221491, "learning_rate": 9.423413969177229e-05, "loss": 0.5487, "step": 2109 }, { "epoch": 0.5905401623285754, "grad_norm": 0.23276523609630337, "learning_rate": 9.422694623849469e-05, "loss": 0.5769, "step": 2110 }, { "epoch": 0.5908200391827596, "grad_norm": 0.22712292607444542, "learning_rate": 9.421974857567162e-05, "loss": 0.5743, "step": 2111 }, { "epoch": 0.5910999160369438, "grad_norm": 0.23743817390424976, "learning_rate": 9.421254670398811e-05, "loss": 0.5301, "step": 2112 }, { "epoch": 0.5913797928911279, "grad_norm": 0.2481966408048911, "learning_rate": 9.420534062412966e-05, "loss": 0.5369, "step": 2113 }, { "epoch": 0.591659669745312, "grad_norm": 0.2318159884307774, "learning_rate": 9.419813033678215e-05, "loss": 0.5313, "step": 2114 }, { "epoch": 0.5919395465994962, "grad_norm": 0.23293490976835007, "learning_rate": 9.419091584263184e-05, "loss": 0.5691, "step": 2115 }, { "epoch": 0.5922194234536804, "grad_norm": 0.23663246086893724, "learning_rate": 9.418369714236546e-05, "loss": 0.574, "step": 2116 }, { "epoch": 0.5924993003078646, "grad_norm": 0.2690452252940053, "learning_rate": 9.417647423667002e-05, "loss": 0.5422, "step": 2117 }, { "epoch": 0.5927791771620488, "grad_norm": 0.23220328242643445, "learning_rate": 9.416924712623305e-05, "loss": 0.5551, "step": 2118 }, { "epoch": 0.5930590540162328, "grad_norm": 0.23303329094420952, "learning_rate": 9.416201581174242e-05, "loss": 0.5342, "step": 2119 }, { "epoch": 0.593338930870417, "grad_norm": 0.232614172873898, "learning_rate": 9.41547802938864e-05, "loss": 0.5583, "step": 2120 }, { "epoch": 0.5936188077246012, "grad_norm": 0.23504105712801626, "learning_rate": 9.414754057335369e-05, "loss": 0.5421, "step": 2121 }, { "epoch": 0.5938986845787854, "grad_norm": 0.22532357528964758, "learning_rate": 9.414029665083335e-05, "loss": 0.5757, "step": 2122 }, { "epoch": 0.5941785614329695, "grad_norm": 0.23439540126628672, "learning_rate": 9.413304852701486e-05, "loss": 0.5539, "step": 2123 }, { "epoch": 0.5944584382871536, "grad_norm": 0.23902983294180502, "learning_rate": 9.412579620258812e-05, "loss": 0.5524, "step": 2124 }, { "epoch": 0.5947383151413378, "grad_norm": 0.24190922233112297, "learning_rate": 9.411853967824339e-05, "loss": 0.5617, "step": 2125 }, { "epoch": 0.595018191995522, "grad_norm": 0.23759151697581649, "learning_rate": 9.411127895467135e-05, "loss": 0.549, "step": 2126 }, { "epoch": 0.5952980688497062, "grad_norm": 0.22968291636303048, "learning_rate": 9.410401403256312e-05, "loss": 0.5864, "step": 2127 }, { "epoch": 0.5955779457038903, "grad_norm": 0.23413373281241773, "learning_rate": 9.409674491261014e-05, "loss": 0.5571, "step": 2128 }, { "epoch": 0.5958578225580744, "grad_norm": 0.22989266756331098, "learning_rate": 9.408947159550428e-05, "loss": 0.566, "step": 2129 }, { "epoch": 0.5961376994122586, "grad_norm": 0.25859836940334974, "learning_rate": 9.408219408193783e-05, "loss": 0.5415, "step": 2130 }, { "epoch": 0.5964175762664428, "grad_norm": 0.22716914480550707, "learning_rate": 9.40749123726035e-05, "loss": 0.5399, "step": 2131 }, { "epoch": 0.5966974531206269, "grad_norm": 0.23777124759659524, "learning_rate": 9.406762646819433e-05, "loss": 0.5438, "step": 2132 }, { "epoch": 0.5969773299748111, "grad_norm": 0.23100395831119283, "learning_rate": 9.406033636940378e-05, "loss": 0.5525, "step": 2133 }, { "epoch": 0.5972572068289952, "grad_norm": 0.24430225003224879, "learning_rate": 9.40530420769258e-05, "loss": 0.5706, "step": 2134 }, { "epoch": 0.5975370836831794, "grad_norm": 0.34331338348630325, "learning_rate": 9.404574359145459e-05, "loss": 0.5635, "step": 2135 }, { "epoch": 0.5978169605373636, "grad_norm": 0.2369012608363964, "learning_rate": 9.403844091368486e-05, "loss": 0.5631, "step": 2136 }, { "epoch": 0.5980968373915477, "grad_norm": 0.23748420549451538, "learning_rate": 9.403113404431167e-05, "loss": 0.5614, "step": 2137 }, { "epoch": 0.5983767142457319, "grad_norm": 0.23140857941781828, "learning_rate": 9.40238229840305e-05, "loss": 0.5961, "step": 2138 }, { "epoch": 0.5986565910999161, "grad_norm": 0.24126547630721637, "learning_rate": 9.401650773353721e-05, "loss": 0.5721, "step": 2139 }, { "epoch": 0.5989364679541002, "grad_norm": 0.22664415941602947, "learning_rate": 9.400918829352807e-05, "loss": 0.5742, "step": 2140 }, { "epoch": 0.5992163448082843, "grad_norm": 0.22951037537087327, "learning_rate": 9.400186466469978e-05, "loss": 0.5562, "step": 2141 }, { "epoch": 0.5994962216624685, "grad_norm": 0.22282961316159394, "learning_rate": 9.399453684774937e-05, "loss": 0.5396, "step": 2142 }, { "epoch": 0.5997760985166527, "grad_norm": 0.23344191933282923, "learning_rate": 9.398720484337431e-05, "loss": 0.5694, "step": 2143 }, { "epoch": 0.6000559753708369, "grad_norm": 0.226237346259715, "learning_rate": 9.397986865227248e-05, "loss": 0.5695, "step": 2144 }, { "epoch": 0.600335852225021, "grad_norm": 0.23347717906049928, "learning_rate": 9.397252827514214e-05, "loss": 0.5515, "step": 2145 }, { "epoch": 0.6006157290792051, "grad_norm": 0.23303486001101228, "learning_rate": 9.396518371268192e-05, "loss": 0.5504, "step": 2146 }, { "epoch": 0.6008956059333893, "grad_norm": 0.22700010739843296, "learning_rate": 9.395783496559094e-05, "loss": 0.5554, "step": 2147 }, { "epoch": 0.6011754827875735, "grad_norm": 0.22397350220572565, "learning_rate": 9.395048203456861e-05, "loss": 0.5552, "step": 2148 }, { "epoch": 0.6014553596417577, "grad_norm": 0.22672012483051324, "learning_rate": 9.394312492031479e-05, "loss": 0.544, "step": 2149 }, { "epoch": 0.6017352364959417, "grad_norm": 0.24590612945582327, "learning_rate": 9.393576362352977e-05, "loss": 0.5519, "step": 2150 }, { "epoch": 0.6020151133501259, "grad_norm": 0.2412160023677888, "learning_rate": 9.392839814491416e-05, "loss": 0.5577, "step": 2151 }, { "epoch": 0.6022949902043101, "grad_norm": 0.23613718554981, "learning_rate": 9.392102848516901e-05, "loss": 0.5701, "step": 2152 }, { "epoch": 0.6025748670584943, "grad_norm": 0.23046699755852104, "learning_rate": 9.391365464499581e-05, "loss": 0.5497, "step": 2153 }, { "epoch": 0.6028547439126785, "grad_norm": 0.22353637618584268, "learning_rate": 9.390627662509637e-05, "loss": 0.5656, "step": 2154 }, { "epoch": 0.6031346207668625, "grad_norm": 0.22387346115727752, "learning_rate": 9.389889442617295e-05, "loss": 0.5546, "step": 2155 }, { "epoch": 0.6034144976210467, "grad_norm": 0.2251342211026499, "learning_rate": 9.389150804892819e-05, "loss": 0.5858, "step": 2156 }, { "epoch": 0.6036943744752309, "grad_norm": 0.22973229416437546, "learning_rate": 9.388411749406512e-05, "loss": 0.5415, "step": 2157 }, { "epoch": 0.6039742513294151, "grad_norm": 0.23055150324750887, "learning_rate": 9.387672276228719e-05, "loss": 0.5374, "step": 2158 }, { "epoch": 0.6042541281835992, "grad_norm": 0.23143025596635663, "learning_rate": 9.386932385429822e-05, "loss": 0.5583, "step": 2159 }, { "epoch": 0.6045340050377834, "grad_norm": 0.21611838843533412, "learning_rate": 9.386192077080245e-05, "loss": 0.5294, "step": 2160 }, { "epoch": 0.6048138818919675, "grad_norm": 0.2137986906121095, "learning_rate": 9.385451351250452e-05, "loss": 0.5515, "step": 2161 }, { "epoch": 0.6050937587461517, "grad_norm": 0.23435303738485605, "learning_rate": 9.384710208010945e-05, "loss": 0.5687, "step": 2162 }, { "epoch": 0.6053736356003359, "grad_norm": 0.2180667344257166, "learning_rate": 9.383968647432265e-05, "loss": 0.5324, "step": 2163 }, { "epoch": 0.60565351245452, "grad_norm": 0.2282908475437685, "learning_rate": 9.383226669584995e-05, "loss": 0.5349, "step": 2164 }, { "epoch": 0.6059333893087042, "grad_norm": 0.22937195448161374, "learning_rate": 9.382484274539758e-05, "loss": 0.5628, "step": 2165 }, { "epoch": 0.6062132661628883, "grad_norm": 0.23805023067234415, "learning_rate": 9.381741462367215e-05, "loss": 0.5644, "step": 2166 }, { "epoch": 0.6064931430170725, "grad_norm": 0.23109045218425225, "learning_rate": 9.380998233138068e-05, "loss": 0.5277, "step": 2167 }, { "epoch": 0.6067730198712566, "grad_norm": 0.23104619170007182, "learning_rate": 9.380254586923056e-05, "loss": 0.5651, "step": 2168 }, { "epoch": 0.6070528967254408, "grad_norm": 0.2343794987860335, "learning_rate": 9.379510523792961e-05, "loss": 0.5873, "step": 2169 }, { "epoch": 0.607332773579625, "grad_norm": 0.2185645973439327, "learning_rate": 9.378766043818601e-05, "loss": 0.5522, "step": 2170 }, { "epoch": 0.6076126504338091, "grad_norm": 0.22412756839460132, "learning_rate": 9.37802114707084e-05, "loss": 0.5506, "step": 2171 }, { "epoch": 0.6078925272879933, "grad_norm": 0.2144459810271091, "learning_rate": 9.377275833620576e-05, "loss": 0.5271, "step": 2172 }, { "epoch": 0.6081724041421774, "grad_norm": 0.2109310703601954, "learning_rate": 9.376530103538748e-05, "loss": 0.5454, "step": 2173 }, { "epoch": 0.6084522809963616, "grad_norm": 0.22814283994446383, "learning_rate": 9.375783956896333e-05, "loss": 0.5517, "step": 2174 }, { "epoch": 0.6087321578505458, "grad_norm": 0.21725478847051968, "learning_rate": 9.375037393764355e-05, "loss": 0.5472, "step": 2175 }, { "epoch": 0.6090120347047299, "grad_norm": 0.224529319802153, "learning_rate": 9.374290414213867e-05, "loss": 0.5483, "step": 2176 }, { "epoch": 0.609291911558914, "grad_norm": 0.22342063582437635, "learning_rate": 9.373543018315969e-05, "loss": 0.547, "step": 2177 }, { "epoch": 0.6095717884130982, "grad_norm": 0.2358182591902062, "learning_rate": 9.372795206141798e-05, "loss": 0.5509, "step": 2178 }, { "epoch": 0.6098516652672824, "grad_norm": 0.22835904537672128, "learning_rate": 9.372046977762533e-05, "loss": 0.5542, "step": 2179 }, { "epoch": 0.6101315421214666, "grad_norm": 0.2301374730540312, "learning_rate": 9.37129833324939e-05, "loss": 0.5374, "step": 2180 }, { "epoch": 0.6104114189756508, "grad_norm": 0.21823760601817832, "learning_rate": 9.370549272673623e-05, "loss": 0.543, "step": 2181 }, { "epoch": 0.6106912958298348, "grad_norm": 0.24705484430025587, "learning_rate": 9.369799796106531e-05, "loss": 0.5657, "step": 2182 }, { "epoch": 0.610971172684019, "grad_norm": 0.23558866131221204, "learning_rate": 9.36904990361945e-05, "loss": 0.5522, "step": 2183 }, { "epoch": 0.6112510495382032, "grad_norm": 0.2413614541787735, "learning_rate": 9.368299595283751e-05, "loss": 0.5585, "step": 2184 }, { "epoch": 0.6115309263923874, "grad_norm": 0.23405538131421783, "learning_rate": 9.367548871170853e-05, "loss": 0.5702, "step": 2185 }, { "epoch": 0.6118108032465716, "grad_norm": 0.22141123795458678, "learning_rate": 9.366797731352209e-05, "loss": 0.5319, "step": 2186 }, { "epoch": 0.6120906801007556, "grad_norm": 0.2225962903572838, "learning_rate": 9.366046175899311e-05, "loss": 0.5688, "step": 2187 }, { "epoch": 0.6123705569549398, "grad_norm": 0.22959480058064163, "learning_rate": 9.365294204883696e-05, "loss": 0.5749, "step": 2188 }, { "epoch": 0.612650433809124, "grad_norm": 0.23519338282340332, "learning_rate": 9.364541818376934e-05, "loss": 0.5711, "step": 2189 }, { "epoch": 0.6129303106633082, "grad_norm": 0.24498715468104948, "learning_rate": 9.36378901645064e-05, "loss": 0.5305, "step": 2190 }, { "epoch": 0.6132101875174923, "grad_norm": 1.2655857992676953, "learning_rate": 9.363035799176463e-05, "loss": 0.5614, "step": 2191 }, { "epoch": 0.6134900643716764, "grad_norm": 0.23654286776943562, "learning_rate": 9.362282166626098e-05, "loss": 0.5686, "step": 2192 }, { "epoch": 0.6137699412258606, "grad_norm": 0.22742679400884647, "learning_rate": 9.361528118871274e-05, "loss": 0.5432, "step": 2193 }, { "epoch": 0.6140498180800448, "grad_norm": 0.24458561478471183, "learning_rate": 9.360773655983763e-05, "loss": 0.5544, "step": 2194 }, { "epoch": 0.614329694934229, "grad_norm": 0.2421957545425036, "learning_rate": 9.360018778035375e-05, "loss": 0.5336, "step": 2195 }, { "epoch": 0.6146095717884131, "grad_norm": 0.23387462121446417, "learning_rate": 9.35926348509796e-05, "loss": 0.5727, "step": 2196 }, { "epoch": 0.6148894486425972, "grad_norm": 0.22734956188694808, "learning_rate": 9.358507777243403e-05, "loss": 0.537, "step": 2197 }, { "epoch": 0.6151693254967814, "grad_norm": 0.223586012193236, "learning_rate": 9.35775165454364e-05, "loss": 0.5297, "step": 2198 }, { "epoch": 0.6154492023509656, "grad_norm": 0.23523420171430215, "learning_rate": 9.356995117070634e-05, "loss": 0.5578, "step": 2199 }, { "epoch": 0.6157290792051497, "grad_norm": 0.4100477560788732, "learning_rate": 9.356238164896393e-05, "loss": 0.5489, "step": 2200 }, { "epoch": 0.6160089560593339, "grad_norm": 0.2462234975316756, "learning_rate": 9.355480798092967e-05, "loss": 0.5741, "step": 2201 }, { "epoch": 0.6162888329135181, "grad_norm": 0.23506519804551732, "learning_rate": 9.35472301673244e-05, "loss": 0.5601, "step": 2202 }, { "epoch": 0.6165687097677022, "grad_norm": 0.2526039051321268, "learning_rate": 9.353964820886938e-05, "loss": 0.5493, "step": 2203 }, { "epoch": 0.6168485866218864, "grad_norm": 0.238125342569292, "learning_rate": 9.353206210628629e-05, "loss": 0.5553, "step": 2204 }, { "epoch": 0.6171284634760705, "grad_norm": 0.2326993166713717, "learning_rate": 9.352447186029714e-05, "loss": 0.5836, "step": 2205 }, { "epoch": 0.6174083403302547, "grad_norm": 0.23815229620152004, "learning_rate": 9.35168774716244e-05, "loss": 0.5503, "step": 2206 }, { "epoch": 0.6176882171844389, "grad_norm": 0.23278379399657392, "learning_rate": 9.350927894099093e-05, "loss": 0.5447, "step": 2207 }, { "epoch": 0.617968094038623, "grad_norm": 0.23129700841609557, "learning_rate": 9.350167626911993e-05, "loss": 0.5619, "step": 2208 }, { "epoch": 0.6182479708928071, "grad_norm": 0.232947905522226, "learning_rate": 9.349406945673502e-05, "loss": 0.5465, "step": 2209 }, { "epoch": 0.6185278477469913, "grad_norm": 0.22936914223681212, "learning_rate": 9.348645850456024e-05, "loss": 0.5316, "step": 2210 }, { "epoch": 0.6188077246011755, "grad_norm": 0.23364232892207495, "learning_rate": 9.347884341332e-05, "loss": 0.5464, "step": 2211 }, { "epoch": 0.6190876014553597, "grad_norm": 0.24271945753437266, "learning_rate": 9.34712241837391e-05, "loss": 0.5371, "step": 2212 }, { "epoch": 0.6193674783095438, "grad_norm": 0.2201927376784854, "learning_rate": 9.346360081654275e-05, "loss": 0.5659, "step": 2213 }, { "epoch": 0.6196473551637279, "grad_norm": 0.2364922733794689, "learning_rate": 9.345597331245657e-05, "loss": 0.5526, "step": 2214 }, { "epoch": 0.6199272320179121, "grad_norm": 0.22070329983117307, "learning_rate": 9.344834167220651e-05, "loss": 0.5343, "step": 2215 }, { "epoch": 0.6202071088720963, "grad_norm": 0.2354897372974203, "learning_rate": 9.344070589651897e-05, "loss": 0.5697, "step": 2216 }, { "epoch": 0.6204869857262805, "grad_norm": 0.22433146622437808, "learning_rate": 9.343306598612071e-05, "loss": 0.5523, "step": 2217 }, { "epoch": 0.6207668625804645, "grad_norm": 0.22235688023135813, "learning_rate": 9.342542194173896e-05, "loss": 0.5618, "step": 2218 }, { "epoch": 0.6210467394346487, "grad_norm": 0.22201231251278364, "learning_rate": 9.341777376410122e-05, "loss": 0.5366, "step": 2219 }, { "epoch": 0.6213266162888329, "grad_norm": 0.21543991405034435, "learning_rate": 9.341012145393547e-05, "loss": 0.5506, "step": 2220 }, { "epoch": 0.6216064931430171, "grad_norm": 0.22190770928380732, "learning_rate": 9.340246501197005e-05, "loss": 0.5809, "step": 2221 }, { "epoch": 0.6218863699972013, "grad_norm": 0.22244013944186045, "learning_rate": 9.339480443893374e-05, "loss": 0.511, "step": 2222 }, { "epoch": 0.6221662468513854, "grad_norm": 0.22395845487584806, "learning_rate": 9.338713973555564e-05, "loss": 0.5602, "step": 2223 }, { "epoch": 0.6224461237055695, "grad_norm": 0.23139619395019506, "learning_rate": 9.33794709025653e-05, "loss": 0.5741, "step": 2224 }, { "epoch": 0.6227260005597537, "grad_norm": 0.23752388942609212, "learning_rate": 9.337179794069264e-05, "loss": 0.5286, "step": 2225 }, { "epoch": 0.6230058774139379, "grad_norm": 0.2274071498186889, "learning_rate": 9.336412085066798e-05, "loss": 0.5483, "step": 2226 }, { "epoch": 0.623285754268122, "grad_norm": 0.22486488581133954, "learning_rate": 9.335643963322203e-05, "loss": 0.5243, "step": 2227 }, { "epoch": 0.6235656311223062, "grad_norm": 0.2487354662580747, "learning_rate": 9.334875428908588e-05, "loss": 0.5429, "step": 2228 }, { "epoch": 0.6238455079764903, "grad_norm": 0.22803775898340814, "learning_rate": 9.334106481899102e-05, "loss": 0.5576, "step": 2229 }, { "epoch": 0.6241253848306745, "grad_norm": 0.22072757617052552, "learning_rate": 9.333337122366937e-05, "loss": 0.5417, "step": 2230 }, { "epoch": 0.6244052616848587, "grad_norm": 0.234083176200024, "learning_rate": 9.332567350385317e-05, "loss": 0.5316, "step": 2231 }, { "epoch": 0.6246851385390428, "grad_norm": 0.2325929584790608, "learning_rate": 9.331797166027514e-05, "loss": 0.5368, "step": 2232 }, { "epoch": 0.624965015393227, "grad_norm": 0.22818549979562075, "learning_rate": 9.331026569366832e-05, "loss": 0.5373, "step": 2233 }, { "epoch": 0.6252448922474111, "grad_norm": 0.22432409845951193, "learning_rate": 9.330255560476616e-05, "loss": 0.5415, "step": 2234 }, { "epoch": 0.6255247691015953, "grad_norm": 0.23425028225855624, "learning_rate": 9.329484139430252e-05, "loss": 0.5431, "step": 2235 }, { "epoch": 0.6258046459557794, "grad_norm": 0.22186903257274523, "learning_rate": 9.328712306301163e-05, "loss": 0.5281, "step": 2236 }, { "epoch": 0.6260845228099636, "grad_norm": 0.25764056170581956, "learning_rate": 9.327940061162817e-05, "loss": 0.5481, "step": 2237 }, { "epoch": 0.6263643996641478, "grad_norm": 0.2219924366960302, "learning_rate": 9.32716740408871e-05, "loss": 0.5398, "step": 2238 }, { "epoch": 0.626644276518332, "grad_norm": 0.24346931514182896, "learning_rate": 9.326394335152391e-05, "loss": 0.5654, "step": 2239 }, { "epoch": 0.6269241533725161, "grad_norm": 0.22636605160474477, "learning_rate": 9.325620854427435e-05, "loss": 0.5421, "step": 2240 }, { "epoch": 0.6272040302267002, "grad_norm": 0.22340805320347967, "learning_rate": 9.324846961987466e-05, "loss": 0.5583, "step": 2241 }, { "epoch": 0.6274839070808844, "grad_norm": 0.21464910424219602, "learning_rate": 9.324072657906142e-05, "loss": 0.5471, "step": 2242 }, { "epoch": 0.6277637839350686, "grad_norm": 0.22745971241035404, "learning_rate": 9.323297942257163e-05, "loss": 0.5451, "step": 2243 }, { "epoch": 0.6280436607892528, "grad_norm": 0.23149705136886, "learning_rate": 9.322522815114265e-05, "loss": 0.5142, "step": 2244 }, { "epoch": 0.6283235376434368, "grad_norm": 0.22838987642013736, "learning_rate": 9.321747276551227e-05, "loss": 0.5394, "step": 2245 }, { "epoch": 0.628603414497621, "grad_norm": 0.23142203663897262, "learning_rate": 9.320971326641863e-05, "loss": 0.5328, "step": 2246 }, { "epoch": 0.6288832913518052, "grad_norm": 0.23225764513487954, "learning_rate": 9.32019496546003e-05, "loss": 0.5651, "step": 2247 }, { "epoch": 0.6291631682059894, "grad_norm": 0.2320281458733709, "learning_rate": 9.319418193079622e-05, "loss": 0.5751, "step": 2248 }, { "epoch": 0.6294430450601736, "grad_norm": 0.24492992978803949, "learning_rate": 9.318641009574573e-05, "loss": 0.5466, "step": 2249 }, { "epoch": 0.6297229219143576, "grad_norm": 0.22022199272503803, "learning_rate": 9.317863415018856e-05, "loss": 0.556, "step": 2250 }, { "epoch": 0.6300027987685418, "grad_norm": 0.2304301736703019, "learning_rate": 9.317085409486481e-05, "loss": 0.571, "step": 2251 }, { "epoch": 0.630282675622726, "grad_norm": 0.22509218496281094, "learning_rate": 9.316306993051501e-05, "loss": 0.5465, "step": 2252 }, { "epoch": 0.6305625524769102, "grad_norm": 0.22252812690646834, "learning_rate": 9.315528165788006e-05, "loss": 0.5411, "step": 2253 }, { "epoch": 0.6308424293310944, "grad_norm": 0.2328350894152955, "learning_rate": 9.314748927770125e-05, "loss": 0.5604, "step": 2254 }, { "epoch": 0.6311223061852784, "grad_norm": 0.2213710380114181, "learning_rate": 9.313969279072025e-05, "loss": 0.5377, "step": 2255 }, { "epoch": 0.6314021830394626, "grad_norm": 0.22193669309360173, "learning_rate": 9.313189219767915e-05, "loss": 0.5125, "step": 2256 }, { "epoch": 0.6316820598936468, "grad_norm": 0.22549734138875063, "learning_rate": 9.31240874993204e-05, "loss": 0.5564, "step": 2257 }, { "epoch": 0.631961936747831, "grad_norm": 0.2259199231381941, "learning_rate": 9.311627869638686e-05, "loss": 0.5523, "step": 2258 }, { "epoch": 0.6322418136020151, "grad_norm": 0.229056475520519, "learning_rate": 9.31084657896218e-05, "loss": 0.5336, "step": 2259 }, { "epoch": 0.6325216904561993, "grad_norm": 0.23347803243129697, "learning_rate": 9.310064877976883e-05, "loss": 0.5255, "step": 2260 }, { "epoch": 0.6328015673103834, "grad_norm": 0.22553841163286872, "learning_rate": 9.309282766757197e-05, "loss": 0.5473, "step": 2261 }, { "epoch": 0.6330814441645676, "grad_norm": 0.2259053835740961, "learning_rate": 9.308500245377567e-05, "loss": 0.5612, "step": 2262 }, { "epoch": 0.6333613210187518, "grad_norm": 0.22324138836960267, "learning_rate": 9.307717313912472e-05, "loss": 0.5289, "step": 2263 }, { "epoch": 0.6336411978729359, "grad_norm": 0.235700850033682, "learning_rate": 9.306933972436431e-05, "loss": 0.5303, "step": 2264 }, { "epoch": 0.6339210747271201, "grad_norm": 0.22274464641205288, "learning_rate": 9.306150221024004e-05, "loss": 0.5521, "step": 2265 }, { "epoch": 0.6342009515813042, "grad_norm": 0.23536595808718414, "learning_rate": 9.305366059749788e-05, "loss": 0.5501, "step": 2266 }, { "epoch": 0.6344808284354884, "grad_norm": 0.23187842569592845, "learning_rate": 9.304581488688422e-05, "loss": 0.5611, "step": 2267 }, { "epoch": 0.6347607052896725, "grad_norm": 0.22053474331324538, "learning_rate": 9.30379650791458e-05, "loss": 0.5529, "step": 2268 }, { "epoch": 0.6350405821438567, "grad_norm": 0.22749795353557378, "learning_rate": 9.303011117502978e-05, "loss": 0.5507, "step": 2269 }, { "epoch": 0.6353204589980409, "grad_norm": 0.2208736218934104, "learning_rate": 9.302225317528368e-05, "loss": 0.5401, "step": 2270 }, { "epoch": 0.635600335852225, "grad_norm": 0.23811281327022943, "learning_rate": 9.301439108065546e-05, "loss": 0.5554, "step": 2271 }, { "epoch": 0.6358802127064092, "grad_norm": 0.2200082176528252, "learning_rate": 9.300652489189342e-05, "loss": 0.546, "step": 2272 }, { "epoch": 0.6361600895605933, "grad_norm": 0.22694066876811875, "learning_rate": 9.299865460974624e-05, "loss": 0.5597, "step": 2273 }, { "epoch": 0.6364399664147775, "grad_norm": 0.23316692349991483, "learning_rate": 9.299078023496307e-05, "loss": 0.56, "step": 2274 }, { "epoch": 0.6367198432689617, "grad_norm": 0.22515063137419997, "learning_rate": 9.298290176829338e-05, "loss": 0.5593, "step": 2275 }, { "epoch": 0.6369997201231458, "grad_norm": 0.23614366525635452, "learning_rate": 9.297501921048703e-05, "loss": 0.5536, "step": 2276 }, { "epoch": 0.6372795969773299, "grad_norm": 0.22514963570539365, "learning_rate": 9.29671325622943e-05, "loss": 0.5357, "step": 2277 }, { "epoch": 0.6375594738315141, "grad_norm": 0.22528905629020501, "learning_rate": 9.295924182446584e-05, "loss": 0.5465, "step": 2278 }, { "epoch": 0.6378393506856983, "grad_norm": 0.23085147748649962, "learning_rate": 9.29513469977527e-05, "loss": 0.5374, "step": 2279 }, { "epoch": 0.6381192275398825, "grad_norm": 0.2356490016621578, "learning_rate": 9.294344808290632e-05, "loss": 0.5674, "step": 2280 }, { "epoch": 0.6383991043940667, "grad_norm": 0.2289003216162852, "learning_rate": 9.29355450806785e-05, "loss": 0.538, "step": 2281 }, { "epoch": 0.6386789812482507, "grad_norm": 0.22565129640786685, "learning_rate": 9.292763799182147e-05, "loss": 0.5413, "step": 2282 }, { "epoch": 0.6389588581024349, "grad_norm": 0.22970949286117814, "learning_rate": 9.291972681708782e-05, "loss": 0.5548, "step": 2283 }, { "epoch": 0.6392387349566191, "grad_norm": 0.22374611908346378, "learning_rate": 9.291181155723056e-05, "loss": 0.5677, "step": 2284 }, { "epoch": 0.6395186118108033, "grad_norm": 0.23480068531041298, "learning_rate": 9.290389221300308e-05, "loss": 0.5414, "step": 2285 }, { "epoch": 0.6397984886649875, "grad_norm": 0.22994014727408524, "learning_rate": 9.28959687851591e-05, "loss": 0.5589, "step": 2286 }, { "epoch": 0.6400783655191715, "grad_norm": 0.22794352806305146, "learning_rate": 9.288804127445279e-05, "loss": 0.5533, "step": 2287 }, { "epoch": 0.6403582423733557, "grad_norm": 0.2157477812026282, "learning_rate": 9.288010968163872e-05, "loss": 0.5497, "step": 2288 }, { "epoch": 0.6406381192275399, "grad_norm": 0.2501669333990063, "learning_rate": 9.287217400747182e-05, "loss": 0.5685, "step": 2289 }, { "epoch": 0.6409179960817241, "grad_norm": 0.22656067225401694, "learning_rate": 9.286423425270738e-05, "loss": 0.5191, "step": 2290 }, { "epoch": 0.6411978729359082, "grad_norm": 0.22767658426171816, "learning_rate": 9.285629041810115e-05, "loss": 0.5397, "step": 2291 }, { "epoch": 0.6414777497900923, "grad_norm": 0.22180886853440163, "learning_rate": 9.284834250440921e-05, "loss": 0.5553, "step": 2292 }, { "epoch": 0.6417576266442765, "grad_norm": 0.22099414569596135, "learning_rate": 9.284039051238804e-05, "loss": 0.5544, "step": 2293 }, { "epoch": 0.6420375034984607, "grad_norm": 0.232736470506679, "learning_rate": 9.283243444279452e-05, "loss": 0.5643, "step": 2294 }, { "epoch": 0.6423173803526449, "grad_norm": 0.22869140915855235, "learning_rate": 9.282447429638592e-05, "loss": 0.5624, "step": 2295 }, { "epoch": 0.642597257206829, "grad_norm": 0.22348008058789776, "learning_rate": 9.281651007391987e-05, "loss": 0.5559, "step": 2296 }, { "epoch": 0.6428771340610131, "grad_norm": 0.21958367303055723, "learning_rate": 9.280854177615444e-05, "loss": 0.5493, "step": 2297 }, { "epoch": 0.6431570109151973, "grad_norm": 0.21565247233331253, "learning_rate": 9.280056940384805e-05, "loss": 0.5536, "step": 2298 }, { "epoch": 0.6434368877693815, "grad_norm": 0.22416765670334324, "learning_rate": 9.27925929577595e-05, "loss": 0.5486, "step": 2299 }, { "epoch": 0.6437167646235656, "grad_norm": 0.22088911741615, "learning_rate": 9.2784612438648e-05, "loss": 0.5471, "step": 2300 }, { "epoch": 0.6439966414777498, "grad_norm": 0.23209657090685998, "learning_rate": 9.277662784727315e-05, "loss": 0.5321, "step": 2301 }, { "epoch": 0.644276518331934, "grad_norm": 0.23182387131170437, "learning_rate": 9.27686391843949e-05, "loss": 0.5606, "step": 2302 }, { "epoch": 0.6445563951861181, "grad_norm": 0.2339164369879079, "learning_rate": 9.276064645077365e-05, "loss": 0.5506, "step": 2303 }, { "epoch": 0.6448362720403022, "grad_norm": 0.21714544722292511, "learning_rate": 9.275264964717012e-05, "loss": 0.5415, "step": 2304 }, { "epoch": 0.6451161488944864, "grad_norm": 0.2201203799089658, "learning_rate": 9.274464877434548e-05, "loss": 0.5601, "step": 2305 }, { "epoch": 0.6453960257486706, "grad_norm": 0.21866346921146546, "learning_rate": 9.273664383306125e-05, "loss": 0.5384, "step": 2306 }, { "epoch": 0.6456759026028548, "grad_norm": 0.22511168679753563, "learning_rate": 9.272863482407931e-05, "loss": 0.5593, "step": 2307 }, { "epoch": 0.6459557794570389, "grad_norm": 0.22442764655366385, "learning_rate": 9.272062174816202e-05, "loss": 0.5548, "step": 2308 }, { "epoch": 0.646235656311223, "grad_norm": 0.22902751071678393, "learning_rate": 9.271260460607202e-05, "loss": 0.5474, "step": 2309 }, { "epoch": 0.6465155331654072, "grad_norm": 0.22939826127270602, "learning_rate": 9.270458339857244e-05, "loss": 0.5443, "step": 2310 }, { "epoch": 0.6467954100195914, "grad_norm": 0.2303813527730354, "learning_rate": 9.269655812642668e-05, "loss": 0.5661, "step": 2311 }, { "epoch": 0.6470752868737756, "grad_norm": 0.2260651119592067, "learning_rate": 9.268852879039863e-05, "loss": 0.5618, "step": 2312 }, { "epoch": 0.6473551637279596, "grad_norm": 0.242050014211222, "learning_rate": 9.26804953912525e-05, "loss": 0.5577, "step": 2313 }, { "epoch": 0.6476350405821438, "grad_norm": 0.22424313698253537, "learning_rate": 9.267245792975294e-05, "loss": 0.5442, "step": 2314 }, { "epoch": 0.647914917436328, "grad_norm": 0.24264178315852836, "learning_rate": 9.266441640666496e-05, "loss": 0.587, "step": 2315 }, { "epoch": 0.6481947942905122, "grad_norm": 0.230444323299661, "learning_rate": 9.265637082275393e-05, "loss": 0.5519, "step": 2316 }, { "epoch": 0.6484746711446964, "grad_norm": 0.23137614429545186, "learning_rate": 9.264832117878566e-05, "loss": 0.5306, "step": 2317 }, { "epoch": 0.6487545479988804, "grad_norm": 0.21618124130299693, "learning_rate": 9.26402674755263e-05, "loss": 0.5386, "step": 2318 }, { "epoch": 0.6490344248530646, "grad_norm": 0.22619207063263208, "learning_rate": 9.263220971374243e-05, "loss": 0.5485, "step": 2319 }, { "epoch": 0.6493143017072488, "grad_norm": 0.22718565387746364, "learning_rate": 9.262414789420097e-05, "loss": 0.5456, "step": 2320 }, { "epoch": 0.649594178561433, "grad_norm": 0.22771105387052398, "learning_rate": 9.261608201766925e-05, "loss": 0.5229, "step": 2321 }, { "epoch": 0.6498740554156172, "grad_norm": 0.2291366099851266, "learning_rate": 9.260801208491498e-05, "loss": 0.5604, "step": 2322 }, { "epoch": 0.6501539322698013, "grad_norm": 0.2289666255387694, "learning_rate": 9.25999380967063e-05, "loss": 0.5468, "step": 2323 }, { "epoch": 0.6504338091239854, "grad_norm": 0.23417748564258375, "learning_rate": 9.259186005381164e-05, "loss": 0.5475, "step": 2324 }, { "epoch": 0.6507136859781696, "grad_norm": 0.24046818943969445, "learning_rate": 9.258377795699992e-05, "loss": 0.5756, "step": 2325 }, { "epoch": 0.6509935628323538, "grad_norm": 0.22598414155699229, "learning_rate": 9.257569180704038e-05, "loss": 0.548, "step": 2326 }, { "epoch": 0.651273439686538, "grad_norm": 0.2341541064036925, "learning_rate": 9.256760160470264e-05, "loss": 0.5362, "step": 2327 }, { "epoch": 0.6515533165407221, "grad_norm": 0.23515841156435047, "learning_rate": 9.255950735075678e-05, "loss": 0.5825, "step": 2328 }, { "epoch": 0.6518331933949062, "grad_norm": 0.23173010860238322, "learning_rate": 9.255140904597317e-05, "loss": 0.526, "step": 2329 }, { "epoch": 0.6521130702490904, "grad_norm": 0.2297433185972963, "learning_rate": 9.254330669112264e-05, "loss": 0.5349, "step": 2330 }, { "epoch": 0.6523929471032746, "grad_norm": 0.23215511229319558, "learning_rate": 9.253520028697636e-05, "loss": 0.5418, "step": 2331 }, { "epoch": 0.6526728239574587, "grad_norm": 0.23788094793990464, "learning_rate": 9.252708983430592e-05, "loss": 0.5445, "step": 2332 }, { "epoch": 0.6529527008116429, "grad_norm": 0.22767547559622842, "learning_rate": 9.251897533388325e-05, "loss": 0.5445, "step": 2333 }, { "epoch": 0.653232577665827, "grad_norm": 0.23724286683345358, "learning_rate": 9.251085678648072e-05, "loss": 0.5714, "step": 2334 }, { "epoch": 0.6535124545200112, "grad_norm": 0.2297351591977234, "learning_rate": 9.250273419287103e-05, "loss": 0.5533, "step": 2335 }, { "epoch": 0.6537923313741953, "grad_norm": 0.23837270156352452, "learning_rate": 9.249460755382733e-05, "loss": 0.5516, "step": 2336 }, { "epoch": 0.6540722082283795, "grad_norm": 0.2275643138395253, "learning_rate": 9.248647687012308e-05, "loss": 0.5642, "step": 2337 }, { "epoch": 0.6543520850825637, "grad_norm": 0.2089555358270792, "learning_rate": 9.247834214253219e-05, "loss": 0.5237, "step": 2338 }, { "epoch": 0.6546319619367479, "grad_norm": 0.22375962164740998, "learning_rate": 9.247020337182893e-05, "loss": 0.5583, "step": 2339 }, { "epoch": 0.654911838790932, "grad_norm": 0.23154662092022185, "learning_rate": 9.246206055878793e-05, "loss": 0.5636, "step": 2340 }, { "epoch": 0.6551917156451161, "grad_norm": 0.22140482953493013, "learning_rate": 9.245391370418423e-05, "loss": 0.5278, "step": 2341 }, { "epoch": 0.6554715924993003, "grad_norm": 0.22372665401886213, "learning_rate": 9.244576280879329e-05, "loss": 0.554, "step": 2342 }, { "epoch": 0.6557514693534845, "grad_norm": 0.22183815887232886, "learning_rate": 9.243760787339086e-05, "loss": 0.5415, "step": 2343 }, { "epoch": 0.6560313462076687, "grad_norm": 0.21311967326441986, "learning_rate": 9.242944889875318e-05, "loss": 0.549, "step": 2344 }, { "epoch": 0.6563112230618527, "grad_norm": 0.2336176164793105, "learning_rate": 9.24212858856568e-05, "loss": 0.5576, "step": 2345 }, { "epoch": 0.6565910999160369, "grad_norm": 0.2296357453534462, "learning_rate": 9.241311883487866e-05, "loss": 0.5655, "step": 2346 }, { "epoch": 0.6568709767702211, "grad_norm": 0.22485073089543192, "learning_rate": 9.240494774719614e-05, "loss": 0.5584, "step": 2347 }, { "epoch": 0.6571508536244053, "grad_norm": 0.2409800791342636, "learning_rate": 9.239677262338697e-05, "loss": 0.5374, "step": 2348 }, { "epoch": 0.6574307304785895, "grad_norm": 0.23198325497364455, "learning_rate": 9.238859346422925e-05, "loss": 0.5615, "step": 2349 }, { "epoch": 0.6577106073327735, "grad_norm": 0.22246276638503817, "learning_rate": 9.238041027050148e-05, "loss": 0.5382, "step": 2350 }, { "epoch": 0.6579904841869577, "grad_norm": 0.22748624442445645, "learning_rate": 9.237222304298254e-05, "loss": 0.5218, "step": 2351 }, { "epoch": 0.6582703610411419, "grad_norm": 0.21900229802920876, "learning_rate": 9.236403178245168e-05, "loss": 0.5618, "step": 2352 }, { "epoch": 0.6585502378953261, "grad_norm": 0.2240368013808672, "learning_rate": 9.235583648968858e-05, "loss": 0.5389, "step": 2353 }, { "epoch": 0.6588301147495103, "grad_norm": 0.23200122166044623, "learning_rate": 9.234763716547326e-05, "loss": 0.5713, "step": 2354 }, { "epoch": 0.6591099916036943, "grad_norm": 0.22746746470463916, "learning_rate": 9.233943381058611e-05, "loss": 0.5516, "step": 2355 }, { "epoch": 0.6593898684578785, "grad_norm": 0.2338898749230664, "learning_rate": 9.233122642580796e-05, "loss": 0.5253, "step": 2356 }, { "epoch": 0.6596697453120627, "grad_norm": 0.2273430072844073, "learning_rate": 9.232301501191997e-05, "loss": 0.5359, "step": 2357 }, { "epoch": 0.6599496221662469, "grad_norm": 0.22364010827333952, "learning_rate": 9.231479956970375e-05, "loss": 0.5656, "step": 2358 }, { "epoch": 0.660229499020431, "grad_norm": 0.23121257126929268, "learning_rate": 9.230658009994122e-05, "loss": 0.5461, "step": 2359 }, { "epoch": 0.6605093758746152, "grad_norm": 0.2199409482019848, "learning_rate": 9.22983566034147e-05, "loss": 0.5342, "step": 2360 }, { "epoch": 0.6607892527287993, "grad_norm": 0.2170295285108138, "learning_rate": 9.229012908090693e-05, "loss": 0.528, "step": 2361 }, { "epoch": 0.6610691295829835, "grad_norm": 0.22830110850500412, "learning_rate": 9.2281897533201e-05, "loss": 0.5527, "step": 2362 }, { "epoch": 0.6613490064371677, "grad_norm": 0.21411374168762964, "learning_rate": 9.227366196108041e-05, "loss": 0.5205, "step": 2363 }, { "epoch": 0.6616288832913518, "grad_norm": 0.2410665757103906, "learning_rate": 9.226542236532899e-05, "loss": 0.5619, "step": 2364 }, { "epoch": 0.661908760145536, "grad_norm": 0.2366806580469498, "learning_rate": 9.225717874673102e-05, "loss": 0.5524, "step": 2365 }, { "epoch": 0.6621886369997201, "grad_norm": 0.22475522821299487, "learning_rate": 9.224893110607114e-05, "loss": 0.5433, "step": 2366 }, { "epoch": 0.6624685138539043, "grad_norm": 0.22504798697278638, "learning_rate": 9.224067944413432e-05, "loss": 0.5479, "step": 2367 }, { "epoch": 0.6627483907080884, "grad_norm": 0.22805381693945523, "learning_rate": 9.223242376170602e-05, "loss": 0.5496, "step": 2368 }, { "epoch": 0.6630282675622726, "grad_norm": 0.2384274415138598, "learning_rate": 9.222416405957197e-05, "loss": 0.5268, "step": 2369 }, { "epoch": 0.6633081444164568, "grad_norm": 0.21116076140816192, "learning_rate": 9.221590033851835e-05, "loss": 0.5301, "step": 2370 }, { "epoch": 0.6635880212706409, "grad_norm": 0.2269853204511418, "learning_rate": 9.220763259933172e-05, "loss": 0.5401, "step": 2371 }, { "epoch": 0.663867898124825, "grad_norm": 0.2230449755853807, "learning_rate": 9.219936084279898e-05, "loss": 0.5667, "step": 2372 }, { "epoch": 0.6641477749790092, "grad_norm": 0.22065782443329063, "learning_rate": 9.219108506970746e-05, "loss": 0.5564, "step": 2373 }, { "epoch": 0.6644276518331934, "grad_norm": 0.2201001629076314, "learning_rate": 9.218280528084485e-05, "loss": 0.5446, "step": 2374 }, { "epoch": 0.6647075286873776, "grad_norm": 0.22674262537396084, "learning_rate": 9.217452147699921e-05, "loss": 0.5508, "step": 2375 }, { "epoch": 0.6649874055415617, "grad_norm": 0.22583712914169812, "learning_rate": 9.216623365895903e-05, "loss": 0.5615, "step": 2376 }, { "epoch": 0.6652672823957458, "grad_norm": 0.22867332059589804, "learning_rate": 9.215794182751312e-05, "loss": 0.5584, "step": 2377 }, { "epoch": 0.66554715924993, "grad_norm": 0.22269331756145164, "learning_rate": 9.21496459834507e-05, "loss": 0.5347, "step": 2378 }, { "epoch": 0.6658270361041142, "grad_norm": 0.22174083315356938, "learning_rate": 9.21413461275614e-05, "loss": 0.5514, "step": 2379 }, { "epoch": 0.6661069129582984, "grad_norm": 0.22052978174508833, "learning_rate": 9.213304226063517e-05, "loss": 0.5451, "step": 2380 }, { "epoch": 0.6663867898124826, "grad_norm": 0.21348865687005658, "learning_rate": 9.212473438346241e-05, "loss": 0.5117, "step": 2381 }, { "epoch": 0.6666666666666666, "grad_norm": 0.23190312932544782, "learning_rate": 9.211642249683385e-05, "loss": 0.5329, "step": 2382 }, { "epoch": 0.6669465435208508, "grad_norm": 0.22492833839773166, "learning_rate": 9.210810660154062e-05, "loss": 0.5315, "step": 2383 }, { "epoch": 0.667226420375035, "grad_norm": 0.2251329828911091, "learning_rate": 9.209978669837424e-05, "loss": 0.5361, "step": 2384 }, { "epoch": 0.6675062972292192, "grad_norm": 0.2205152237600707, "learning_rate": 9.20914627881266e-05, "loss": 0.5353, "step": 2385 }, { "epoch": 0.6677861740834033, "grad_norm": 0.2264829193519305, "learning_rate": 9.208313487158996e-05, "loss": 0.5364, "step": 2386 }, { "epoch": 0.6680660509375874, "grad_norm": 0.22027286795041154, "learning_rate": 9.2074802949557e-05, "loss": 0.515, "step": 2387 }, { "epoch": 0.6683459277917716, "grad_norm": 0.22548829241160073, "learning_rate": 9.206646702282073e-05, "loss": 0.5241, "step": 2388 }, { "epoch": 0.6686258046459558, "grad_norm": 0.2270820347736705, "learning_rate": 9.205812709217459e-05, "loss": 0.5353, "step": 2389 }, { "epoch": 0.66890568150014, "grad_norm": 0.23210343238307118, "learning_rate": 9.204978315841237e-05, "loss": 0.5452, "step": 2390 }, { "epoch": 0.6691855583543241, "grad_norm": 0.23331168699302035, "learning_rate": 9.204143522232826e-05, "loss": 0.5415, "step": 2391 }, { "epoch": 0.6694654352085082, "grad_norm": 0.23412565351809025, "learning_rate": 9.20330832847168e-05, "loss": 0.5211, "step": 2392 }, { "epoch": 0.6697453120626924, "grad_norm": 0.22973650679730895, "learning_rate": 9.202472734637298e-05, "loss": 0.532, "step": 2393 }, { "epoch": 0.6700251889168766, "grad_norm": 0.24068726671626947, "learning_rate": 9.201636740809205e-05, "loss": 0.5871, "step": 2394 }, { "epoch": 0.6703050657710607, "grad_norm": 0.2215762036776492, "learning_rate": 9.200800347066977e-05, "loss": 0.5461, "step": 2395 }, { "epoch": 0.6705849426252449, "grad_norm": 0.2275544008034563, "learning_rate": 9.199963553490221e-05, "loss": 0.5465, "step": 2396 }, { "epoch": 0.670864819479429, "grad_norm": 0.21223357979497362, "learning_rate": 9.199126360158582e-05, "loss": 0.5535, "step": 2397 }, { "epoch": 0.6711446963336132, "grad_norm": 0.23540388723962063, "learning_rate": 9.198288767151747e-05, "loss": 0.5853, "step": 2398 }, { "epoch": 0.6714245731877974, "grad_norm": 0.22455122087448684, "learning_rate": 9.197450774549437e-05, "loss": 0.5368, "step": 2399 }, { "epoch": 0.6717044500419815, "grad_norm": 0.234263871491455, "learning_rate": 9.196612382431412e-05, "loss": 0.5527, "step": 2400 }, { "epoch": 0.6719843268961657, "grad_norm": 0.21848303921515058, "learning_rate": 9.195773590877473e-05, "loss": 0.5511, "step": 2401 }, { "epoch": 0.6722642037503499, "grad_norm": 0.24333069219894635, "learning_rate": 9.194934399967453e-05, "loss": 0.5436, "step": 2402 }, { "epoch": 0.672544080604534, "grad_norm": 0.22364954864683115, "learning_rate": 9.194094809781231e-05, "loss": 0.5638, "step": 2403 }, { "epoch": 0.6728239574587181, "grad_norm": 0.22776773701161734, "learning_rate": 9.193254820398716e-05, "loss": 0.5702, "step": 2404 }, { "epoch": 0.6731038343129023, "grad_norm": 0.2361587046478995, "learning_rate": 9.19241443189986e-05, "loss": 0.5393, "step": 2405 }, { "epoch": 0.6733837111670865, "grad_norm": 0.21742465711363043, "learning_rate": 9.191573644364653e-05, "loss": 0.5233, "step": 2406 }, { "epoch": 0.6736635880212707, "grad_norm": 0.21674312156195077, "learning_rate": 9.190732457873119e-05, "loss": 0.5694, "step": 2407 }, { "epoch": 0.6739434648754548, "grad_norm": 0.2313842918322785, "learning_rate": 9.189890872505325e-05, "loss": 0.5525, "step": 2408 }, { "epoch": 0.6742233417296389, "grad_norm": 0.2203117668983562, "learning_rate": 9.189048888341372e-05, "loss": 0.5261, "step": 2409 }, { "epoch": 0.6745032185838231, "grad_norm": 0.22571547951266857, "learning_rate": 9.188206505461401e-05, "loss": 0.5293, "step": 2410 }, { "epoch": 0.6747830954380073, "grad_norm": 0.22703033150078905, "learning_rate": 9.187363723945591e-05, "loss": 0.5309, "step": 2411 }, { "epoch": 0.6750629722921915, "grad_norm": 0.23310980729189318, "learning_rate": 9.186520543874157e-05, "loss": 0.542, "step": 2412 }, { "epoch": 0.6753428491463755, "grad_norm": 0.22296474878704078, "learning_rate": 9.185676965327356e-05, "loss": 0.5458, "step": 2413 }, { "epoch": 0.6756227260005597, "grad_norm": 0.2304910791578109, "learning_rate": 9.184832988385478e-05, "loss": 0.5719, "step": 2414 }, { "epoch": 0.6759026028547439, "grad_norm": 0.22024656460652256, "learning_rate": 9.183988613128853e-05, "loss": 0.5375, "step": 2415 }, { "epoch": 0.6761824797089281, "grad_norm": 0.22196935076754826, "learning_rate": 9.183143839637851e-05, "loss": 0.5274, "step": 2416 }, { "epoch": 0.6764623565631123, "grad_norm": 0.22362608921611213, "learning_rate": 9.182298667992876e-05, "loss": 0.532, "step": 2417 }, { "epoch": 0.6767422334172963, "grad_norm": 0.2372497581911805, "learning_rate": 9.181453098274373e-05, "loss": 0.536, "step": 2418 }, { "epoch": 0.6770221102714805, "grad_norm": 0.22115460519012234, "learning_rate": 9.180607130562825e-05, "loss": 0.5245, "step": 2419 }, { "epoch": 0.6773019871256647, "grad_norm": 0.22168503602089848, "learning_rate": 9.17976076493875e-05, "loss": 0.5236, "step": 2420 }, { "epoch": 0.6775818639798489, "grad_norm": 0.23592879638708428, "learning_rate": 9.178914001482704e-05, "loss": 0.5702, "step": 2421 }, { "epoch": 0.677861740834033, "grad_norm": 0.22994135134672122, "learning_rate": 9.178066840275286e-05, "loss": 0.5621, "step": 2422 }, { "epoch": 0.6781416176882172, "grad_norm": 0.2252303515878882, "learning_rate": 9.177219281397127e-05, "loss": 0.5861, "step": 2423 }, { "epoch": 0.6784214945424013, "grad_norm": 0.21099309134814076, "learning_rate": 9.176371324928899e-05, "loss": 0.5287, "step": 2424 }, { "epoch": 0.6787013713965855, "grad_norm": 0.21214908924130665, "learning_rate": 9.175522970951311e-05, "loss": 0.5476, "step": 2425 }, { "epoch": 0.6789812482507697, "grad_norm": 0.20941477744330658, "learning_rate": 9.174674219545108e-05, "loss": 0.5493, "step": 2426 }, { "epoch": 0.6792611251049538, "grad_norm": 0.22236387650826203, "learning_rate": 9.173825070791078e-05, "loss": 0.5412, "step": 2427 }, { "epoch": 0.679541001959138, "grad_norm": 0.2227943379605569, "learning_rate": 9.17297552477004e-05, "loss": 0.5347, "step": 2428 }, { "epoch": 0.6798208788133221, "grad_norm": 0.233040637554221, "learning_rate": 9.172125581562857e-05, "loss": 0.5218, "step": 2429 }, { "epoch": 0.6801007556675063, "grad_norm": 0.2155492753595792, "learning_rate": 9.171275241250426e-05, "loss": 0.5401, "step": 2430 }, { "epoch": 0.6803806325216905, "grad_norm": 0.22573562829812002, "learning_rate": 9.170424503913681e-05, "loss": 0.5192, "step": 2431 }, { "epoch": 0.6806605093758746, "grad_norm": 0.22200806427149247, "learning_rate": 9.1695733696336e-05, "loss": 0.532, "step": 2432 }, { "epoch": 0.6809403862300588, "grad_norm": 0.2491249646322403, "learning_rate": 9.16872183849119e-05, "loss": 0.5203, "step": 2433 }, { "epoch": 0.6812202630842429, "grad_norm": 0.23987168728778543, "learning_rate": 9.167869910567501e-05, "loss": 0.5492, "step": 2434 }, { "epoch": 0.6815001399384271, "grad_norm": 0.22391714684958228, "learning_rate": 9.167017585943623e-05, "loss": 0.5354, "step": 2435 }, { "epoch": 0.6817800167926112, "grad_norm": 0.2350819672659733, "learning_rate": 9.16616486470068e-05, "loss": 0.539, "step": 2436 }, { "epoch": 0.6820598936467954, "grad_norm": 0.24275550843007201, "learning_rate": 9.165311746919831e-05, "loss": 0.5498, "step": 2437 }, { "epoch": 0.6823397705009796, "grad_norm": 0.21304284629654555, "learning_rate": 9.164458232682278e-05, "loss": 0.542, "step": 2438 }, { "epoch": 0.6826196473551638, "grad_norm": 0.21884367177244404, "learning_rate": 9.163604322069261e-05, "loss": 0.5601, "step": 2439 }, { "epoch": 0.6828995242093479, "grad_norm": 0.22410444834241158, "learning_rate": 9.162750015162054e-05, "loss": 0.5368, "step": 2440 }, { "epoch": 0.683179401063532, "grad_norm": 0.2271307266049798, "learning_rate": 9.161895312041971e-05, "loss": 0.5675, "step": 2441 }, { "epoch": 0.6834592779177162, "grad_norm": 0.22202818191834683, "learning_rate": 9.161040212790362e-05, "loss": 0.5421, "step": 2442 }, { "epoch": 0.6837391547719004, "grad_norm": 0.22530864987073587, "learning_rate": 9.160184717488616e-05, "loss": 0.5494, "step": 2443 }, { "epoch": 0.6840190316260846, "grad_norm": 0.2260749470138487, "learning_rate": 9.159328826218161e-05, "loss": 0.5501, "step": 2444 }, { "epoch": 0.6842989084802686, "grad_norm": 0.24187502091301433, "learning_rate": 9.158472539060458e-05, "loss": 0.5322, "step": 2445 }, { "epoch": 0.6845787853344528, "grad_norm": 0.23046316147105173, "learning_rate": 9.157615856097015e-05, "loss": 0.5389, "step": 2446 }, { "epoch": 0.684858662188637, "grad_norm": 0.227924703026395, "learning_rate": 9.156758777409364e-05, "loss": 0.5541, "step": 2447 }, { "epoch": 0.6851385390428212, "grad_norm": 0.21384638121163238, "learning_rate": 9.155901303079089e-05, "loss": 0.5227, "step": 2448 }, { "epoch": 0.6854184158970054, "grad_norm": 0.2258287798205943, "learning_rate": 9.1550434331878e-05, "loss": 0.5399, "step": 2449 }, { "epoch": 0.6856982927511894, "grad_norm": 0.2289440302608055, "learning_rate": 9.15418516781715e-05, "loss": 0.5713, "step": 2450 }, { "epoch": 0.6859781696053736, "grad_norm": 0.21971883054547414, "learning_rate": 9.153326507048834e-05, "loss": 0.5455, "step": 2451 }, { "epoch": 0.6862580464595578, "grad_norm": 0.2111801448115332, "learning_rate": 9.152467450964574e-05, "loss": 0.5295, "step": 2452 }, { "epoch": 0.686537923313742, "grad_norm": 0.2245461783271496, "learning_rate": 9.15160799964614e-05, "loss": 0.5478, "step": 2453 }, { "epoch": 0.6868178001679262, "grad_norm": 0.22719775795417232, "learning_rate": 9.15074815317533e-05, "loss": 0.5364, "step": 2454 }, { "epoch": 0.6870976770221102, "grad_norm": 0.22141104365177822, "learning_rate": 9.149887911633989e-05, "loss": 0.508, "step": 2455 }, { "epoch": 0.6873775538762944, "grad_norm": 0.22725076881125048, "learning_rate": 9.149027275103993e-05, "loss": 0.5376, "step": 2456 }, { "epoch": 0.6876574307304786, "grad_norm": 0.21874077073861875, "learning_rate": 9.14816624366726e-05, "loss": 0.5518, "step": 2457 }, { "epoch": 0.6879373075846628, "grad_norm": 0.22733593578986663, "learning_rate": 9.14730481740574e-05, "loss": 0.5476, "step": 2458 }, { "epoch": 0.6882171844388469, "grad_norm": 0.22468116668619806, "learning_rate": 9.146442996401428e-05, "loss": 0.5526, "step": 2459 }, { "epoch": 0.6884970612930311, "grad_norm": 0.22269294666763426, "learning_rate": 9.145580780736349e-05, "loss": 0.5117, "step": 2460 }, { "epoch": 0.6887769381472152, "grad_norm": 0.23002433824113408, "learning_rate": 9.144718170492572e-05, "loss": 0.5444, "step": 2461 }, { "epoch": 0.6890568150013994, "grad_norm": 0.2314978853312327, "learning_rate": 9.143855165752199e-05, "loss": 0.5357, "step": 2462 }, { "epoch": 0.6893366918555835, "grad_norm": 0.21741171624768588, "learning_rate": 9.142991766597374e-05, "loss": 0.5301, "step": 2463 }, { "epoch": 0.6896165687097677, "grad_norm": 0.23379671587908718, "learning_rate": 9.142127973110273e-05, "loss": 0.5439, "step": 2464 }, { "epoch": 0.6898964455639519, "grad_norm": 0.2277896226934478, "learning_rate": 9.141263785373111e-05, "loss": 0.5667, "step": 2465 }, { "epoch": 0.690176322418136, "grad_norm": 0.2376465386077255, "learning_rate": 9.140399203468146e-05, "loss": 0.5599, "step": 2466 }, { "epoch": 0.6904561992723202, "grad_norm": 0.2383134382595478, "learning_rate": 9.139534227477667e-05, "loss": 0.5625, "step": 2467 }, { "epoch": 0.6907360761265043, "grad_norm": 0.2195144626812075, "learning_rate": 9.138668857484003e-05, "loss": 0.5384, "step": 2468 }, { "epoch": 0.6910159529806885, "grad_norm": 0.217829592785949, "learning_rate": 9.137803093569519e-05, "loss": 0.5543, "step": 2469 }, { "epoch": 0.6912958298348727, "grad_norm": 0.23624278846074326, "learning_rate": 9.136936935816623e-05, "loss": 0.5423, "step": 2470 }, { "epoch": 0.6915757066890568, "grad_norm": 0.2186568174685527, "learning_rate": 9.136070384307754e-05, "loss": 0.544, "step": 2471 }, { "epoch": 0.691855583543241, "grad_norm": 0.2175408840074656, "learning_rate": 9.135203439125389e-05, "loss": 0.5399, "step": 2472 }, { "epoch": 0.6921354603974251, "grad_norm": 0.22644551440211397, "learning_rate": 9.134336100352046e-05, "loss": 0.5688, "step": 2473 }, { "epoch": 0.6924153372516093, "grad_norm": 0.21841015771728386, "learning_rate": 9.13346836807028e-05, "loss": 0.5554, "step": 2474 }, { "epoch": 0.6926952141057935, "grad_norm": 0.2198257691403368, "learning_rate": 9.132600242362681e-05, "loss": 0.5388, "step": 2475 }, { "epoch": 0.6929750909599776, "grad_norm": 0.22003184324424363, "learning_rate": 9.131731723311877e-05, "loss": 0.5337, "step": 2476 }, { "epoch": 0.6932549678141617, "grad_norm": 0.22790725907279172, "learning_rate": 9.130862811000536e-05, "loss": 0.5426, "step": 2477 }, { "epoch": 0.6935348446683459, "grad_norm": 0.2216557770186376, "learning_rate": 9.12999350551136e-05, "loss": 0.5322, "step": 2478 }, { "epoch": 0.6938147215225301, "grad_norm": 0.2387747292363135, "learning_rate": 9.129123806927088e-05, "loss": 0.5353, "step": 2479 }, { "epoch": 0.6940945983767143, "grad_norm": 0.23886840537708795, "learning_rate": 9.128253715330506e-05, "loss": 0.5813, "step": 2480 }, { "epoch": 0.6943744752308985, "grad_norm": 0.21426295665075762, "learning_rate": 9.127383230804421e-05, "loss": 0.5224, "step": 2481 }, { "epoch": 0.6946543520850825, "grad_norm": 0.22804583074574128, "learning_rate": 9.12651235343169e-05, "loss": 0.5586, "step": 2482 }, { "epoch": 0.6949342289392667, "grad_norm": 0.22531355170150924, "learning_rate": 9.125641083295206e-05, "loss": 0.519, "step": 2483 }, { "epoch": 0.6952141057934509, "grad_norm": 0.22369900699564935, "learning_rate": 9.124769420477892e-05, "loss": 0.5345, "step": 2484 }, { "epoch": 0.6954939826476351, "grad_norm": 0.21667888869213964, "learning_rate": 9.123897365062717e-05, "loss": 0.5139, "step": 2485 }, { "epoch": 0.6957738595018192, "grad_norm": 0.22037004183119002, "learning_rate": 9.123024917132683e-05, "loss": 0.5619, "step": 2486 }, { "epoch": 0.6960537363560033, "grad_norm": 0.23529833658505683, "learning_rate": 9.12215207677083e-05, "loss": 0.5538, "step": 2487 }, { "epoch": 0.6963336132101875, "grad_norm": 0.22814892384509067, "learning_rate": 9.121278844060232e-05, "loss": 0.5442, "step": 2488 }, { "epoch": 0.6966134900643717, "grad_norm": 0.210286542902692, "learning_rate": 9.12040521908401e-05, "loss": 0.5436, "step": 2489 }, { "epoch": 0.6968933669185559, "grad_norm": 0.22496438800070656, "learning_rate": 9.119531201925311e-05, "loss": 0.5618, "step": 2490 }, { "epoch": 0.69717324377274, "grad_norm": 0.2301749121128698, "learning_rate": 9.118656792667328e-05, "loss": 0.5211, "step": 2491 }, { "epoch": 0.6974531206269241, "grad_norm": 0.22979032730530666, "learning_rate": 9.117781991393283e-05, "loss": 0.5385, "step": 2492 }, { "epoch": 0.6977329974811083, "grad_norm": 0.2263644099605242, "learning_rate": 9.116906798186448e-05, "loss": 0.5756, "step": 2493 }, { "epoch": 0.6980128743352925, "grad_norm": 0.2369073042879937, "learning_rate": 9.116031213130117e-05, "loss": 0.5721, "step": 2494 }, { "epoch": 0.6982927511894766, "grad_norm": 0.22793661358143325, "learning_rate": 9.115155236307631e-05, "loss": 0.5267, "step": 2495 }, { "epoch": 0.6985726280436608, "grad_norm": 0.22566745591681991, "learning_rate": 9.114278867802366e-05, "loss": 0.5371, "step": 2496 }, { "epoch": 0.6988525048978449, "grad_norm": 0.22738247065862394, "learning_rate": 9.113402107697737e-05, "loss": 0.5425, "step": 2497 }, { "epoch": 0.6991323817520291, "grad_norm": 0.23734515453707278, "learning_rate": 9.112524956077193e-05, "loss": 0.5499, "step": 2498 }, { "epoch": 0.6994122586062133, "grad_norm": 0.2544658923189295, "learning_rate": 9.11164741302422e-05, "loss": 0.5436, "step": 2499 }, { "epoch": 0.6996921354603974, "grad_norm": 0.23105511764419184, "learning_rate": 9.110769478622346e-05, "loss": 0.5528, "step": 2500 }, { "epoch": 0.6999720123145816, "grad_norm": 0.22154709356239966, "learning_rate": 9.109891152955132e-05, "loss": 0.5569, "step": 2501 }, { "epoch": 0.7002518891687658, "grad_norm": 0.23139612263439707, "learning_rate": 9.109012436106178e-05, "loss": 0.5536, "step": 2502 }, { "epoch": 0.7005317660229499, "grad_norm": 0.22860454581068818, "learning_rate": 9.108133328159121e-05, "loss": 0.5557, "step": 2503 }, { "epoch": 0.700811642877134, "grad_norm": 0.2291406331101458, "learning_rate": 9.107253829197633e-05, "loss": 0.5602, "step": 2504 }, { "epoch": 0.7010915197313182, "grad_norm": 0.22668470166488117, "learning_rate": 9.106373939305426e-05, "loss": 0.5402, "step": 2505 }, { "epoch": 0.7013713965855024, "grad_norm": 0.22880385201909617, "learning_rate": 9.105493658566251e-05, "loss": 0.5356, "step": 2506 }, { "epoch": 0.7016512734396866, "grad_norm": 0.23568857832719925, "learning_rate": 9.104612987063891e-05, "loss": 0.5296, "step": 2507 }, { "epoch": 0.7019311502938707, "grad_norm": 0.22098103092045723, "learning_rate": 9.103731924882169e-05, "loss": 0.5414, "step": 2508 }, { "epoch": 0.7022110271480548, "grad_norm": 0.22701922252440868, "learning_rate": 9.102850472104944e-05, "loss": 0.5541, "step": 2509 }, { "epoch": 0.702490904002239, "grad_norm": 0.22767011589671496, "learning_rate": 9.101968628816118e-05, "loss": 0.5267, "step": 2510 }, { "epoch": 0.7027707808564232, "grad_norm": 0.2333123777940145, "learning_rate": 9.10108639509962e-05, "loss": 0.5327, "step": 2511 }, { "epoch": 0.7030506577106074, "grad_norm": 0.2252300859420707, "learning_rate": 9.100203771039424e-05, "loss": 0.5393, "step": 2512 }, { "epoch": 0.7033305345647914, "grad_norm": 0.22824894530208628, "learning_rate": 9.099320756719537e-05, "loss": 0.5271, "step": 2513 }, { "epoch": 0.7036104114189756, "grad_norm": 0.23138598533223126, "learning_rate": 9.098437352224006e-05, "loss": 0.5532, "step": 2514 }, { "epoch": 0.7038902882731598, "grad_norm": 0.21853894319361244, "learning_rate": 9.097553557636916e-05, "loss": 0.5312, "step": 2515 }, { "epoch": 0.704170165127344, "grad_norm": 0.23278694561533653, "learning_rate": 9.096669373042382e-05, "loss": 0.563, "step": 2516 }, { "epoch": 0.7044500419815282, "grad_norm": 0.23425782484952629, "learning_rate": 9.095784798524566e-05, "loss": 0.5296, "step": 2517 }, { "epoch": 0.7047299188357122, "grad_norm": 0.22751597950416957, "learning_rate": 9.094899834167658e-05, "loss": 0.533, "step": 2518 }, { "epoch": 0.7050097956898964, "grad_norm": 0.23366548542267773, "learning_rate": 9.094014480055895e-05, "loss": 0.5286, "step": 2519 }, { "epoch": 0.7052896725440806, "grad_norm": 0.22636866726907906, "learning_rate": 9.09312873627354e-05, "loss": 0.5603, "step": 2520 }, { "epoch": 0.7055695493982648, "grad_norm": 0.22411400509153592, "learning_rate": 9.092242602904903e-05, "loss": 0.5358, "step": 2521 }, { "epoch": 0.705849426252449, "grad_norm": 0.22564708212854936, "learning_rate": 9.091356080034322e-05, "loss": 0.5046, "step": 2522 }, { "epoch": 0.7061293031066331, "grad_norm": 0.2254724697285052, "learning_rate": 9.09046916774618e-05, "loss": 0.5477, "step": 2523 }, { "epoch": 0.7064091799608172, "grad_norm": 0.2238374914670721, "learning_rate": 9.089581866124894e-05, "loss": 0.5622, "step": 2524 }, { "epoch": 0.7066890568150014, "grad_norm": 0.21942806561676945, "learning_rate": 9.088694175254916e-05, "loss": 0.5398, "step": 2525 }, { "epoch": 0.7069689336691856, "grad_norm": 0.21567431447044527, "learning_rate": 9.087806095220739e-05, "loss": 0.5413, "step": 2526 }, { "epoch": 0.7072488105233697, "grad_norm": 0.22112822696207846, "learning_rate": 9.086917626106889e-05, "loss": 0.5362, "step": 2527 }, { "epoch": 0.7075286873775539, "grad_norm": 0.2230119498609584, "learning_rate": 9.086028767997932e-05, "loss": 0.5163, "step": 2528 }, { "epoch": 0.707808564231738, "grad_norm": 0.22128627734084555, "learning_rate": 9.08513952097847e-05, "loss": 0.5543, "step": 2529 }, { "epoch": 0.7080884410859222, "grad_norm": 0.2248930989689319, "learning_rate": 9.08424988513314e-05, "loss": 0.5584, "step": 2530 }, { "epoch": 0.7083683179401064, "grad_norm": 0.22292807302428705, "learning_rate": 9.083359860546622e-05, "loss": 0.5555, "step": 2531 }, { "epoch": 0.7086481947942905, "grad_norm": 0.2266481146482922, "learning_rate": 9.082469447303627e-05, "loss": 0.5508, "step": 2532 }, { "epoch": 0.7089280716484747, "grad_norm": 0.2236124543070458, "learning_rate": 9.081578645488904e-05, "loss": 0.563, "step": 2533 }, { "epoch": 0.7092079485026588, "grad_norm": 0.21623042480224322, "learning_rate": 9.08068745518724e-05, "loss": 0.5323, "step": 2534 }, { "epoch": 0.709487825356843, "grad_norm": 0.23072175564283595, "learning_rate": 9.079795876483462e-05, "loss": 0.5409, "step": 2535 }, { "epoch": 0.7097677022110271, "grad_norm": 0.22102417472322713, "learning_rate": 9.078903909462428e-05, "loss": 0.5394, "step": 2536 }, { "epoch": 0.7100475790652113, "grad_norm": 0.21099745557956595, "learning_rate": 9.078011554209038e-05, "loss": 0.5345, "step": 2537 }, { "epoch": 0.7103274559193955, "grad_norm": 0.22400036434514708, "learning_rate": 9.077118810808225e-05, "loss": 0.5302, "step": 2538 }, { "epoch": 0.7106073327735796, "grad_norm": 0.2095690846545887, "learning_rate": 9.07622567934496e-05, "loss": 0.545, "step": 2539 }, { "epoch": 0.7108872096277637, "grad_norm": 0.2325850449801102, "learning_rate": 9.075332159904255e-05, "loss": 0.5381, "step": 2540 }, { "epoch": 0.7111670864819479, "grad_norm": 0.22466041408969864, "learning_rate": 9.074438252571153e-05, "loss": 0.5594, "step": 2541 }, { "epoch": 0.7114469633361321, "grad_norm": 0.22859958906579383, "learning_rate": 9.073543957430738e-05, "loss": 0.5506, "step": 2542 }, { "epoch": 0.7117268401903163, "grad_norm": 0.23946774219802966, "learning_rate": 9.072649274568129e-05, "loss": 0.5468, "step": 2543 }, { "epoch": 0.7120067170445005, "grad_norm": 0.22027467570847611, "learning_rate": 9.071754204068482e-05, "loss": 0.5259, "step": 2544 }, { "epoch": 0.7122865938986845, "grad_norm": 0.22514093412641412, "learning_rate": 9.070858746016992e-05, "loss": 0.5397, "step": 2545 }, { "epoch": 0.7125664707528687, "grad_norm": 0.21277314961836397, "learning_rate": 9.069962900498888e-05, "loss": 0.5316, "step": 2546 }, { "epoch": 0.7128463476070529, "grad_norm": 0.22209882294945468, "learning_rate": 9.069066667599434e-05, "loss": 0.5532, "step": 2547 }, { "epoch": 0.7131262244612371, "grad_norm": 0.20645453081914583, "learning_rate": 9.068170047403942e-05, "loss": 0.5144, "step": 2548 }, { "epoch": 0.7134061013154213, "grad_norm": 0.22658946845370995, "learning_rate": 9.067273039997744e-05, "loss": 0.5497, "step": 2549 }, { "epoch": 0.7136859781696053, "grad_norm": 0.23185946591160245, "learning_rate": 9.066375645466222e-05, "loss": 0.5608, "step": 2550 }, { "epoch": 0.7139658550237895, "grad_norm": 0.2201375694693047, "learning_rate": 9.065477863894792e-05, "loss": 0.5324, "step": 2551 }, { "epoch": 0.7142457318779737, "grad_norm": 0.22507634091058842, "learning_rate": 9.064579695368902e-05, "loss": 0.5497, "step": 2552 }, { "epoch": 0.7145256087321579, "grad_norm": 0.22208298299269505, "learning_rate": 9.063681139974041e-05, "loss": 0.5323, "step": 2553 }, { "epoch": 0.714805485586342, "grad_norm": 0.23861968693150126, "learning_rate": 9.062782197795736e-05, "loss": 0.5679, "step": 2554 }, { "epoch": 0.7150853624405261, "grad_norm": 0.23821003166266916, "learning_rate": 9.061882868919546e-05, "loss": 0.5309, "step": 2555 }, { "epoch": 0.7153652392947103, "grad_norm": 0.2245213474819366, "learning_rate": 9.060983153431073e-05, "loss": 0.5348, "step": 2556 }, { "epoch": 0.7156451161488945, "grad_norm": 0.2285669613279405, "learning_rate": 9.060083051415949e-05, "loss": 0.545, "step": 2557 }, { "epoch": 0.7159249930030787, "grad_norm": 0.22544893736292657, "learning_rate": 9.059182562959848e-05, "loss": 0.5533, "step": 2558 }, { "epoch": 0.7162048698572628, "grad_norm": 0.22679876936910812, "learning_rate": 9.058281688148477e-05, "loss": 0.5406, "step": 2559 }, { "epoch": 0.716484746711447, "grad_norm": 0.21540258409652902, "learning_rate": 9.057380427067584e-05, "loss": 0.5388, "step": 2560 }, { "epoch": 0.7167646235656311, "grad_norm": 0.2318314054357572, "learning_rate": 9.056478779802952e-05, "loss": 0.5612, "step": 2561 }, { "epoch": 0.7170445004198153, "grad_norm": 0.2251120346846295, "learning_rate": 9.0555767464404e-05, "loss": 0.5511, "step": 2562 }, { "epoch": 0.7173243772739994, "grad_norm": 0.209993428669391, "learning_rate": 9.054674327065781e-05, "loss": 0.535, "step": 2563 }, { "epoch": 0.7176042541281836, "grad_norm": 0.22113928151644965, "learning_rate": 9.05377152176499e-05, "loss": 0.5305, "step": 2564 }, { "epoch": 0.7178841309823678, "grad_norm": 0.22341179413776524, "learning_rate": 9.052868330623959e-05, "loss": 0.5304, "step": 2565 }, { "epoch": 0.7181640078365519, "grad_norm": 0.22135061017833846, "learning_rate": 9.05196475372865e-05, "loss": 0.5456, "step": 2566 }, { "epoch": 0.7184438846907361, "grad_norm": 0.22129377660549585, "learning_rate": 9.051060791165069e-05, "loss": 0.5289, "step": 2567 }, { "epoch": 0.7187237615449202, "grad_norm": 0.22048892915569251, "learning_rate": 9.050156443019253e-05, "loss": 0.5438, "step": 2568 }, { "epoch": 0.7190036383991044, "grad_norm": 0.21299945876885298, "learning_rate": 9.049251709377282e-05, "loss": 0.5369, "step": 2569 }, { "epoch": 0.7192835152532886, "grad_norm": 0.21964049638600658, "learning_rate": 9.048346590325264e-05, "loss": 0.5372, "step": 2570 }, { "epoch": 0.7195633921074727, "grad_norm": 0.23338691731135155, "learning_rate": 9.047441085949354e-05, "loss": 0.5362, "step": 2571 }, { "epoch": 0.7198432689616568, "grad_norm": 0.22879152170084813, "learning_rate": 9.046535196335735e-05, "loss": 0.5599, "step": 2572 }, { "epoch": 0.720123145815841, "grad_norm": 0.2402491397964616, "learning_rate": 9.04562892157063e-05, "loss": 0.5649, "step": 2573 }, { "epoch": 0.7204030226700252, "grad_norm": 0.2351157095883497, "learning_rate": 9.044722261740301e-05, "loss": 0.5583, "step": 2574 }, { "epoch": 0.7206828995242094, "grad_norm": 0.22494632283570237, "learning_rate": 9.043815216931043e-05, "loss": 0.5319, "step": 2575 }, { "epoch": 0.7209627763783935, "grad_norm": 0.22353460388964774, "learning_rate": 9.042907787229189e-05, "loss": 0.5442, "step": 2576 }, { "epoch": 0.7212426532325776, "grad_norm": 0.23178700738408642, "learning_rate": 9.041999972721109e-05, "loss": 0.5395, "step": 2577 }, { "epoch": 0.7215225300867618, "grad_norm": 0.223273726747122, "learning_rate": 9.04109177349321e-05, "loss": 0.5477, "step": 2578 }, { "epoch": 0.721802406940946, "grad_norm": 0.2238439973920132, "learning_rate": 9.040183189631932e-05, "loss": 0.5485, "step": 2579 }, { "epoch": 0.7220822837951302, "grad_norm": 0.23062693859751235, "learning_rate": 9.039274221223759e-05, "loss": 0.5439, "step": 2580 }, { "epoch": 0.7223621606493144, "grad_norm": 0.22042520477106617, "learning_rate": 9.038364868355204e-05, "loss": 0.5461, "step": 2581 }, { "epoch": 0.7226420375034984, "grad_norm": 0.23343794742484003, "learning_rate": 9.037455131112819e-05, "loss": 0.5579, "step": 2582 }, { "epoch": 0.7229219143576826, "grad_norm": 0.22016660472763663, "learning_rate": 9.036545009583198e-05, "loss": 0.5594, "step": 2583 }, { "epoch": 0.7232017912118668, "grad_norm": 0.22215114217283774, "learning_rate": 9.035634503852961e-05, "loss": 0.5556, "step": 2584 }, { "epoch": 0.723481668066051, "grad_norm": 0.22430675789441773, "learning_rate": 9.034723614008773e-05, "loss": 0.5464, "step": 2585 }, { "epoch": 0.7237615449202351, "grad_norm": 0.2264205841303816, "learning_rate": 9.033812340137334e-05, "loss": 0.5555, "step": 2586 }, { "epoch": 0.7240414217744192, "grad_norm": 0.2165219165080543, "learning_rate": 9.032900682325378e-05, "loss": 0.5178, "step": 2587 }, { "epoch": 0.7243212986286034, "grad_norm": 0.21021652285032288, "learning_rate": 9.031988640659679e-05, "loss": 0.5117, "step": 2588 }, { "epoch": 0.7246011754827876, "grad_norm": 0.23006724448066837, "learning_rate": 9.031076215227043e-05, "loss": 0.5438, "step": 2589 }, { "epoch": 0.7248810523369718, "grad_norm": 0.2372244939552078, "learning_rate": 9.030163406114316e-05, "loss": 0.5438, "step": 2590 }, { "epoch": 0.7251609291911559, "grad_norm": 0.22886659116273975, "learning_rate": 9.029250213408382e-05, "loss": 0.5098, "step": 2591 }, { "epoch": 0.72544080604534, "grad_norm": 0.23270221849125097, "learning_rate": 9.028336637196158e-05, "loss": 0.5314, "step": 2592 }, { "epoch": 0.7257206828995242, "grad_norm": 0.22839365533135098, "learning_rate": 9.027422677564596e-05, "loss": 0.546, "step": 2593 }, { "epoch": 0.7260005597537084, "grad_norm": 0.22766488812730518, "learning_rate": 9.02650833460069e-05, "loss": 0.5595, "step": 2594 }, { "epoch": 0.7262804366078925, "grad_norm": 0.21650086692890955, "learning_rate": 9.025593608391467e-05, "loss": 0.5303, "step": 2595 }, { "epoch": 0.7265603134620767, "grad_norm": 0.21363366533674547, "learning_rate": 9.024678499023991e-05, "loss": 0.5426, "step": 2596 }, { "epoch": 0.7268401903162608, "grad_norm": 0.22551173213075484, "learning_rate": 9.023763006585364e-05, "loss": 0.5421, "step": 2597 }, { "epoch": 0.727120067170445, "grad_norm": 0.22686860980709742, "learning_rate": 9.022847131162721e-05, "loss": 0.5607, "step": 2598 }, { "epoch": 0.7273999440246292, "grad_norm": 0.22405581529218108, "learning_rate": 9.021930872843236e-05, "loss": 0.5463, "step": 2599 }, { "epoch": 0.7276798208788133, "grad_norm": 0.22396964443628806, "learning_rate": 9.02101423171412e-05, "loss": 0.5237, "step": 2600 }, { "epoch": 0.7279596977329975, "grad_norm": 0.21505223093740625, "learning_rate": 9.020097207862617e-05, "loss": 0.5264, "step": 2601 }, { "epoch": 0.7282395745871817, "grad_norm": 0.21557819647463233, "learning_rate": 9.019179801376013e-05, "loss": 0.5175, "step": 2602 }, { "epoch": 0.7285194514413658, "grad_norm": 0.21356209983230737, "learning_rate": 9.018262012341627e-05, "loss": 0.531, "step": 2603 }, { "epoch": 0.7287993282955499, "grad_norm": 0.221118666679804, "learning_rate": 9.017343840846812e-05, "loss": 0.5431, "step": 2604 }, { "epoch": 0.7290792051497341, "grad_norm": 0.21379233115425508, "learning_rate": 9.016425286978961e-05, "loss": 0.5434, "step": 2605 }, { "epoch": 0.7293590820039183, "grad_norm": 0.21863786166249938, "learning_rate": 9.015506350825504e-05, "loss": 0.533, "step": 2606 }, { "epoch": 0.7296389588581025, "grad_norm": 0.2121735124197766, "learning_rate": 9.014587032473906e-05, "loss": 0.5264, "step": 2607 }, { "epoch": 0.7299188357122866, "grad_norm": 0.22337384706295352, "learning_rate": 9.013667332011665e-05, "loss": 0.5407, "step": 2608 }, { "epoch": 0.7301987125664707, "grad_norm": 0.22611911023687936, "learning_rate": 9.012747249526324e-05, "loss": 0.548, "step": 2609 }, { "epoch": 0.7304785894206549, "grad_norm": 0.2208382972947634, "learning_rate": 9.011826785105451e-05, "loss": 0.5284, "step": 2610 }, { "epoch": 0.7307584662748391, "grad_norm": 0.22919286676851466, "learning_rate": 9.010905938836661e-05, "loss": 0.5226, "step": 2611 }, { "epoch": 0.7310383431290233, "grad_norm": 0.22167131435437362, "learning_rate": 9.009984710807595e-05, "loss": 0.5017, "step": 2612 }, { "epoch": 0.7313182199832073, "grad_norm": 0.2212876818656353, "learning_rate": 9.009063101105943e-05, "loss": 0.5542, "step": 2613 }, { "epoch": 0.7315980968373915, "grad_norm": 0.22541444570079514, "learning_rate": 9.00814110981942e-05, "loss": 0.5411, "step": 2614 }, { "epoch": 0.7318779736915757, "grad_norm": 0.2129620860116927, "learning_rate": 9.007218737035782e-05, "loss": 0.5332, "step": 2615 }, { "epoch": 0.7321578505457599, "grad_norm": 0.2216253089851693, "learning_rate": 9.006295982842825e-05, "loss": 0.5379, "step": 2616 }, { "epoch": 0.7324377273999441, "grad_norm": 0.2109998430273148, "learning_rate": 9.00537284732837e-05, "loss": 0.5285, "step": 2617 }, { "epoch": 0.7327176042541281, "grad_norm": 0.2249144197432733, "learning_rate": 9.004449330580288e-05, "loss": 0.5397, "step": 2618 }, { "epoch": 0.7329974811083123, "grad_norm": 0.21344031391081858, "learning_rate": 9.003525432686477e-05, "loss": 0.5551, "step": 2619 }, { "epoch": 0.7332773579624965, "grad_norm": 0.2289174917729721, "learning_rate": 9.002601153734874e-05, "loss": 0.5628, "step": 2620 }, { "epoch": 0.7335572348166807, "grad_norm": 0.20781833809778474, "learning_rate": 9.001676493813452e-05, "loss": 0.5309, "step": 2621 }, { "epoch": 0.7338371116708649, "grad_norm": 0.2160990710039784, "learning_rate": 9.000751453010223e-05, "loss": 0.5297, "step": 2622 }, { "epoch": 0.734116988525049, "grad_norm": 0.22871188542141155, "learning_rate": 8.99982603141323e-05, "loss": 0.586, "step": 2623 }, { "epoch": 0.7343968653792331, "grad_norm": 0.21264166113703792, "learning_rate": 8.998900229110557e-05, "loss": 0.5459, "step": 2624 }, { "epoch": 0.7346767422334173, "grad_norm": 0.2244892589900409, "learning_rate": 8.997974046190323e-05, "loss": 0.5345, "step": 2625 }, { "epoch": 0.7349566190876015, "grad_norm": 0.2252025077322312, "learning_rate": 8.99704748274068e-05, "loss": 0.542, "step": 2626 }, { "epoch": 0.7352364959417856, "grad_norm": 0.21434324820570827, "learning_rate": 8.996120538849822e-05, "loss": 0.5178, "step": 2627 }, { "epoch": 0.7355163727959698, "grad_norm": 0.22143531292288385, "learning_rate": 8.995193214605973e-05, "loss": 0.5215, "step": 2628 }, { "epoch": 0.7357962496501539, "grad_norm": 0.21683386039801417, "learning_rate": 8.994265510097397e-05, "loss": 0.5642, "step": 2629 }, { "epoch": 0.7360761265043381, "grad_norm": 0.22279042121537643, "learning_rate": 8.993337425412396e-05, "loss": 0.5344, "step": 2630 }, { "epoch": 0.7363560033585222, "grad_norm": 0.2242316556716447, "learning_rate": 8.992408960639303e-05, "loss": 0.5341, "step": 2631 }, { "epoch": 0.7366358802127064, "grad_norm": 0.2111941057458277, "learning_rate": 8.991480115866489e-05, "loss": 0.5463, "step": 2632 }, { "epoch": 0.7369157570668906, "grad_norm": 0.22429499663794777, "learning_rate": 8.990550891182365e-05, "loss": 0.5402, "step": 2633 }, { "epoch": 0.7371956339210747, "grad_norm": 0.22019295218467896, "learning_rate": 8.989621286675373e-05, "loss": 0.549, "step": 2634 }, { "epoch": 0.7374755107752589, "grad_norm": 0.227911797879774, "learning_rate": 8.988691302433993e-05, "loss": 0.5627, "step": 2635 }, { "epoch": 0.737755387629443, "grad_norm": 0.21941965086394474, "learning_rate": 8.987760938546744e-05, "loss": 0.5588, "step": 2636 }, { "epoch": 0.7380352644836272, "grad_norm": 0.2179316528240126, "learning_rate": 8.986830195102176e-05, "loss": 0.5446, "step": 2637 }, { "epoch": 0.7383151413378114, "grad_norm": 0.2145935169967857, "learning_rate": 8.98589907218888e-05, "loss": 0.5333, "step": 2638 }, { "epoch": 0.7385950181919955, "grad_norm": 0.2257690582727668, "learning_rate": 8.984967569895477e-05, "loss": 0.5314, "step": 2639 }, { "epoch": 0.7388748950461796, "grad_norm": 0.2303994125554735, "learning_rate": 8.98403568831063e-05, "loss": 0.5333, "step": 2640 }, { "epoch": 0.7391547719003638, "grad_norm": 0.22440134450016616, "learning_rate": 8.983103427523039e-05, "loss": 0.5599, "step": 2641 }, { "epoch": 0.739434648754548, "grad_norm": 0.2203078419742426, "learning_rate": 8.982170787621432e-05, "loss": 0.5592, "step": 2642 }, { "epoch": 0.7397145256087322, "grad_norm": 0.21943756803723738, "learning_rate": 8.981237768694582e-05, "loss": 0.5209, "step": 2643 }, { "epoch": 0.7399944024629164, "grad_norm": 0.2251406932636578, "learning_rate": 8.980304370831292e-05, "loss": 0.5369, "step": 2644 }, { "epoch": 0.7402742793171004, "grad_norm": 0.24029662759994772, "learning_rate": 8.979370594120402e-05, "loss": 0.5635, "step": 2645 }, { "epoch": 0.7405541561712846, "grad_norm": 0.23198592666987902, "learning_rate": 8.978436438650795e-05, "loss": 0.5518, "step": 2646 }, { "epoch": 0.7408340330254688, "grad_norm": 0.2265286213827318, "learning_rate": 8.97750190451138e-05, "loss": 0.5569, "step": 2647 }, { "epoch": 0.741113909879653, "grad_norm": 0.21371175891317676, "learning_rate": 8.976566991791107e-05, "loss": 0.5119, "step": 2648 }, { "epoch": 0.7413937867338372, "grad_norm": 0.2135139892642674, "learning_rate": 8.975631700578962e-05, "loss": 0.5331, "step": 2649 }, { "epoch": 0.7416736635880212, "grad_norm": 0.21976528912048277, "learning_rate": 8.974696030963965e-05, "loss": 0.517, "step": 2650 }, { "epoch": 0.7419535404422054, "grad_norm": 0.2223069175926191, "learning_rate": 8.973759983035177e-05, "loss": 0.5392, "step": 2651 }, { "epoch": 0.7422334172963896, "grad_norm": 0.22494355617310022, "learning_rate": 8.972823556881689e-05, "loss": 0.5556, "step": 2652 }, { "epoch": 0.7425132941505738, "grad_norm": 0.21628539978386174, "learning_rate": 8.971886752592631e-05, "loss": 0.5233, "step": 2653 }, { "epoch": 0.742793171004758, "grad_norm": 0.21273840238289846, "learning_rate": 8.970949570257169e-05, "loss": 0.5502, "step": 2654 }, { "epoch": 0.743073047858942, "grad_norm": 0.22284434464455785, "learning_rate": 8.970012009964503e-05, "loss": 0.521, "step": 2655 }, { "epoch": 0.7433529247131262, "grad_norm": 0.22036668898365644, "learning_rate": 8.969074071803875e-05, "loss": 0.5516, "step": 2656 }, { "epoch": 0.7436328015673104, "grad_norm": 0.2237368242432521, "learning_rate": 8.968135755864553e-05, "loss": 0.5124, "step": 2657 }, { "epoch": 0.7439126784214946, "grad_norm": 0.24465980933910203, "learning_rate": 8.967197062235848e-05, "loss": 0.5416, "step": 2658 }, { "epoch": 0.7441925552756787, "grad_norm": 0.2103241232334168, "learning_rate": 8.966257991007108e-05, "loss": 0.542, "step": 2659 }, { "epoch": 0.7444724321298629, "grad_norm": 0.2309919340598145, "learning_rate": 8.965318542267711e-05, "loss": 0.5556, "step": 2660 }, { "epoch": 0.744752308984047, "grad_norm": 0.22292085304964945, "learning_rate": 8.964378716107076e-05, "loss": 0.5627, "step": 2661 }, { "epoch": 0.7450321858382312, "grad_norm": 0.22509967643412987, "learning_rate": 8.963438512614655e-05, "loss": 0.5494, "step": 2662 }, { "epoch": 0.7453120626924153, "grad_norm": 0.2167942969204872, "learning_rate": 8.96249793187994e-05, "loss": 0.5438, "step": 2663 }, { "epoch": 0.7455919395465995, "grad_norm": 0.2240355642128715, "learning_rate": 8.961556973992452e-05, "loss": 0.5702, "step": 2664 }, { "epoch": 0.7458718164007837, "grad_norm": 0.22188055447651847, "learning_rate": 8.960615639041755e-05, "loss": 0.5208, "step": 2665 }, { "epoch": 0.7461516932549678, "grad_norm": 0.22901006620538905, "learning_rate": 8.959673927117444e-05, "loss": 0.5806, "step": 2666 }, { "epoch": 0.746431570109152, "grad_norm": 0.21738752651697146, "learning_rate": 8.958731838309153e-05, "loss": 0.5207, "step": 2667 }, { "epoch": 0.7467114469633361, "grad_norm": 0.2181564292843853, "learning_rate": 8.957789372706548e-05, "loss": 0.5306, "step": 2668 }, { "epoch": 0.7469913238175203, "grad_norm": 0.22843602708917043, "learning_rate": 8.956846530399338e-05, "loss": 0.5407, "step": 2669 }, { "epoch": 0.7472712006717045, "grad_norm": 0.2139752967512492, "learning_rate": 8.955903311477259e-05, "loss": 0.533, "step": 2670 }, { "epoch": 0.7475510775258886, "grad_norm": 0.23982720290324436, "learning_rate": 8.95495971603009e-05, "loss": 0.55, "step": 2671 }, { "epoch": 0.7478309543800727, "grad_norm": 0.21307316566105344, "learning_rate": 8.954015744147639e-05, "loss": 0.5208, "step": 2672 }, { "epoch": 0.7481108312342569, "grad_norm": 0.21710414532432137, "learning_rate": 8.953071395919757e-05, "loss": 0.5229, "step": 2673 }, { "epoch": 0.7483907080884411, "grad_norm": 0.22802418489130677, "learning_rate": 8.952126671436327e-05, "loss": 0.5352, "step": 2674 }, { "epoch": 0.7486705849426253, "grad_norm": 0.22741647869103276, "learning_rate": 8.95118157078727e-05, "loss": 0.5348, "step": 2675 }, { "epoch": 0.7489504617968094, "grad_norm": 0.23040467034208675, "learning_rate": 8.950236094062537e-05, "loss": 0.518, "step": 2676 }, { "epoch": 0.7492303386509935, "grad_norm": 0.2241049617264922, "learning_rate": 8.949290241352124e-05, "loss": 0.524, "step": 2677 }, { "epoch": 0.7495102155051777, "grad_norm": 0.22660931524184508, "learning_rate": 8.948344012746053e-05, "loss": 0.5555, "step": 2678 }, { "epoch": 0.7497900923593619, "grad_norm": 0.21537979934095933, "learning_rate": 8.947397408334391e-05, "loss": 0.5045, "step": 2679 }, { "epoch": 0.7500699692135461, "grad_norm": 0.21305412259855636, "learning_rate": 8.946450428207233e-05, "loss": 0.5181, "step": 2680 }, { "epoch": 0.7503498460677303, "grad_norm": 0.22846564876789416, "learning_rate": 8.945503072454714e-05, "loss": 0.5426, "step": 2681 }, { "epoch": 0.7506297229219143, "grad_norm": 0.21719840487731035, "learning_rate": 8.944555341167004e-05, "loss": 0.5142, "step": 2682 }, { "epoch": 0.7509095997760985, "grad_norm": 0.20698249269282035, "learning_rate": 8.94360723443431e-05, "loss": 0.5607, "step": 2683 }, { "epoch": 0.7511894766302827, "grad_norm": 0.22016718001568167, "learning_rate": 8.942658752346871e-05, "loss": 0.5707, "step": 2684 }, { "epoch": 0.7514693534844669, "grad_norm": 0.21775658131362371, "learning_rate": 8.941709894994966e-05, "loss": 0.536, "step": 2685 }, { "epoch": 0.751749230338651, "grad_norm": 0.22609157050532247, "learning_rate": 8.940760662468907e-05, "loss": 0.5763, "step": 2686 }, { "epoch": 0.7520291071928351, "grad_norm": 0.22980289403211504, "learning_rate": 8.939811054859042e-05, "loss": 0.5479, "step": 2687 }, { "epoch": 0.7523089840470193, "grad_norm": 0.22606307563622416, "learning_rate": 8.938861072255755e-05, "loss": 0.5421, "step": 2688 }, { "epoch": 0.7525888609012035, "grad_norm": 0.21508725728349393, "learning_rate": 8.937910714749468e-05, "loss": 0.5133, "step": 2689 }, { "epoch": 0.7528687377553877, "grad_norm": 0.22178141643697674, "learning_rate": 8.936959982430634e-05, "loss": 0.5314, "step": 2690 }, { "epoch": 0.7531486146095718, "grad_norm": 0.22299246748814808, "learning_rate": 8.936008875389746e-05, "loss": 0.5476, "step": 2691 }, { "epoch": 0.7534284914637559, "grad_norm": 0.20794294107003272, "learning_rate": 8.935057393717329e-05, "loss": 0.5537, "step": 2692 }, { "epoch": 0.7537083683179401, "grad_norm": 0.21841321736125574, "learning_rate": 8.934105537503947e-05, "loss": 0.5323, "step": 2693 }, { "epoch": 0.7539882451721243, "grad_norm": 0.23010265456635823, "learning_rate": 8.933153306840199e-05, "loss": 0.557, "step": 2694 }, { "epoch": 0.7542681220263084, "grad_norm": 0.21571026554168998, "learning_rate": 8.932200701816716e-05, "loss": 0.5475, "step": 2695 }, { "epoch": 0.7545479988804926, "grad_norm": 0.20343509482874472, "learning_rate": 8.931247722524169e-05, "loss": 0.5128, "step": 2696 }, { "epoch": 0.7548278757346767, "grad_norm": 0.21885553947566588, "learning_rate": 8.930294369053265e-05, "loss": 0.5391, "step": 2697 }, { "epoch": 0.7551077525888609, "grad_norm": 0.21544555079315492, "learning_rate": 8.929340641494743e-05, "loss": 0.5403, "step": 2698 }, { "epoch": 0.755387629443045, "grad_norm": 0.21953091916737594, "learning_rate": 8.92838653993938e-05, "loss": 0.5456, "step": 2699 }, { "epoch": 0.7556675062972292, "grad_norm": 0.20638901705605822, "learning_rate": 8.927432064477985e-05, "loss": 0.5377, "step": 2700 }, { "epoch": 0.7559473831514134, "grad_norm": 0.22355038181969095, "learning_rate": 8.92647721520141e-05, "loss": 0.5483, "step": 2701 }, { "epoch": 0.7562272600055976, "grad_norm": 0.22500535830489837, "learning_rate": 8.925521992200536e-05, "loss": 0.5274, "step": 2702 }, { "epoch": 0.7565071368597817, "grad_norm": 0.2253729760858358, "learning_rate": 8.924566395566279e-05, "loss": 0.5396, "step": 2703 }, { "epoch": 0.7567870137139658, "grad_norm": 0.2122121189260114, "learning_rate": 8.923610425389599e-05, "loss": 0.5469, "step": 2704 }, { "epoch": 0.75706689056815, "grad_norm": 0.22644094592631228, "learning_rate": 8.92265408176148e-05, "loss": 0.5317, "step": 2705 }, { "epoch": 0.7573467674223342, "grad_norm": 0.22333687597978505, "learning_rate": 8.92169736477295e-05, "loss": 0.5554, "step": 2706 }, { "epoch": 0.7576266442765184, "grad_norm": 0.2080552531340251, "learning_rate": 8.920740274515072e-05, "loss": 0.5019, "step": 2707 }, { "epoch": 0.7579065211307024, "grad_norm": 0.23186829630086137, "learning_rate": 8.919782811078938e-05, "loss": 0.5198, "step": 2708 }, { "epoch": 0.7581863979848866, "grad_norm": 0.21678582425035686, "learning_rate": 8.918824974555682e-05, "loss": 0.5361, "step": 2709 }, { "epoch": 0.7584662748390708, "grad_norm": 0.22451487293338224, "learning_rate": 8.917866765036473e-05, "loss": 0.5262, "step": 2710 }, { "epoch": 0.758746151693255, "grad_norm": 0.220116295849953, "learning_rate": 8.916908182612511e-05, "loss": 0.541, "step": 2711 }, { "epoch": 0.7590260285474392, "grad_norm": 0.21177038061510053, "learning_rate": 8.915949227375037e-05, "loss": 0.5211, "step": 2712 }, { "epoch": 0.7593059054016232, "grad_norm": 0.21529028775339493, "learning_rate": 8.914989899415323e-05, "loss": 0.5371, "step": 2713 }, { "epoch": 0.7595857822558074, "grad_norm": 0.23356289952438744, "learning_rate": 8.91403019882468e-05, "loss": 0.6028, "step": 2714 }, { "epoch": 0.7598656591099916, "grad_norm": 0.21624377736402656, "learning_rate": 8.913070125694452e-05, "loss": 0.5405, "step": 2715 }, { "epoch": 0.7601455359641758, "grad_norm": 0.22031622012182514, "learning_rate": 8.912109680116016e-05, "loss": 0.5367, "step": 2716 }, { "epoch": 0.76042541281836, "grad_norm": 0.21890336861203946, "learning_rate": 8.911148862180794e-05, "loss": 0.5365, "step": 2717 }, { "epoch": 0.760705289672544, "grad_norm": 0.22556741663629698, "learning_rate": 8.910187671980233e-05, "loss": 0.5598, "step": 2718 }, { "epoch": 0.7609851665267282, "grad_norm": 0.2170309750725635, "learning_rate": 8.909226109605822e-05, "loss": 0.5131, "step": 2719 }, { "epoch": 0.7612650433809124, "grad_norm": 0.2235836195817433, "learning_rate": 8.908264175149081e-05, "loss": 0.5374, "step": 2720 }, { "epoch": 0.7615449202350966, "grad_norm": 0.20347233313344, "learning_rate": 8.907301868701567e-05, "loss": 0.5171, "step": 2721 }, { "epoch": 0.7618247970892807, "grad_norm": 0.21826991398296136, "learning_rate": 8.906339190354875e-05, "loss": 0.5435, "step": 2722 }, { "epoch": 0.7621046739434649, "grad_norm": 0.2190244617422154, "learning_rate": 8.905376140200635e-05, "loss": 0.5426, "step": 2723 }, { "epoch": 0.762384550797649, "grad_norm": 0.21375749928212628, "learning_rate": 8.904412718330504e-05, "loss": 0.5213, "step": 2724 }, { "epoch": 0.7626644276518332, "grad_norm": 0.23205902236105794, "learning_rate": 8.903448924836188e-05, "loss": 0.5342, "step": 2725 }, { "epoch": 0.7629443045060174, "grad_norm": 0.21547996683498147, "learning_rate": 8.902484759809416e-05, "loss": 0.519, "step": 2726 }, { "epoch": 0.7632241813602015, "grad_norm": 0.22224134483386554, "learning_rate": 8.901520223341961e-05, "loss": 0.5205, "step": 2727 }, { "epoch": 0.7635040582143857, "grad_norm": 0.2202861538829083, "learning_rate": 8.900555315525629e-05, "loss": 0.5359, "step": 2728 }, { "epoch": 0.7637839350685698, "grad_norm": 0.22905506573800075, "learning_rate": 8.899590036452257e-05, "loss": 0.5444, "step": 2729 }, { "epoch": 0.764063811922754, "grad_norm": 0.22835262069963058, "learning_rate": 8.898624386213725e-05, "loss": 0.5464, "step": 2730 }, { "epoch": 0.7643436887769381, "grad_norm": 0.2163484683591616, "learning_rate": 8.89765836490194e-05, "loss": 0.5265, "step": 2731 }, { "epoch": 0.7646235656311223, "grad_norm": 0.21830680694234028, "learning_rate": 8.896691972608849e-05, "loss": 0.5472, "step": 2732 }, { "epoch": 0.7649034424853065, "grad_norm": 0.21113697085923283, "learning_rate": 8.895725209426436e-05, "loss": 0.532, "step": 2733 }, { "epoch": 0.7651833193394906, "grad_norm": 0.22708482386870013, "learning_rate": 8.894758075446718e-05, "loss": 0.5669, "step": 2734 }, { "epoch": 0.7654631961936748, "grad_norm": 0.2254664501566954, "learning_rate": 8.893790570761746e-05, "loss": 0.5324, "step": 2735 }, { "epoch": 0.7657430730478589, "grad_norm": 0.22068391267207088, "learning_rate": 8.892822695463607e-05, "loss": 0.559, "step": 2736 }, { "epoch": 0.7660229499020431, "grad_norm": 0.2168443428728145, "learning_rate": 8.891854449644426e-05, "loss": 0.5379, "step": 2737 }, { "epoch": 0.7663028267562273, "grad_norm": 0.21181212036692257, "learning_rate": 8.890885833396361e-05, "loss": 0.551, "step": 2738 }, { "epoch": 0.7665827036104114, "grad_norm": 0.2194815162367123, "learning_rate": 8.889916846811602e-05, "loss": 0.5625, "step": 2739 }, { "epoch": 0.7668625804645955, "grad_norm": 0.22642300385422914, "learning_rate": 8.888947489982382e-05, "loss": 0.5429, "step": 2740 }, { "epoch": 0.7671424573187797, "grad_norm": 0.21777346578717538, "learning_rate": 8.887977763000963e-05, "loss": 0.5217, "step": 2741 }, { "epoch": 0.7674223341729639, "grad_norm": 0.21351032496663272, "learning_rate": 8.887007665959643e-05, "loss": 0.5345, "step": 2742 }, { "epoch": 0.7677022110271481, "grad_norm": 0.22136583939301327, "learning_rate": 8.88603719895076e-05, "loss": 0.5195, "step": 2743 }, { "epoch": 0.7679820878813323, "grad_norm": 0.21902060567153697, "learning_rate": 8.885066362066679e-05, "loss": 0.5438, "step": 2744 }, { "epoch": 0.7682619647355163, "grad_norm": 0.22396010992489948, "learning_rate": 8.884095155399808e-05, "loss": 0.5335, "step": 2745 }, { "epoch": 0.7685418415897005, "grad_norm": 0.2248353337599658, "learning_rate": 8.883123579042587e-05, "loss": 0.512, "step": 2746 }, { "epoch": 0.7688217184438847, "grad_norm": 0.22778594293934562, "learning_rate": 8.88215163308749e-05, "loss": 0.5367, "step": 2747 }, { "epoch": 0.7691015952980689, "grad_norm": 0.22070845746206372, "learning_rate": 8.881179317627027e-05, "loss": 0.5526, "step": 2748 }, { "epoch": 0.769381472152253, "grad_norm": 0.22597830242137348, "learning_rate": 8.880206632753744e-05, "loss": 0.5422, "step": 2749 }, { "epoch": 0.7696613490064371, "grad_norm": 0.21202209406816486, "learning_rate": 8.879233578560222e-05, "loss": 0.5309, "step": 2750 }, { "epoch": 0.7699412258606213, "grad_norm": 0.2339424440918282, "learning_rate": 8.878260155139078e-05, "loss": 0.5406, "step": 2751 }, { "epoch": 0.7702211027148055, "grad_norm": 0.2199231232761973, "learning_rate": 8.877286362582959e-05, "loss": 0.5382, "step": 2752 }, { "epoch": 0.7705009795689897, "grad_norm": 0.23479978285061998, "learning_rate": 8.876312200984556e-05, "loss": 0.5593, "step": 2753 }, { "epoch": 0.7707808564231738, "grad_norm": 0.22459337014686442, "learning_rate": 8.875337670436587e-05, "loss": 0.5594, "step": 2754 }, { "epoch": 0.7710607332773579, "grad_norm": 0.22300395102172538, "learning_rate": 8.87436277103181e-05, "loss": 0.5329, "step": 2755 }, { "epoch": 0.7713406101315421, "grad_norm": 0.2150771771958537, "learning_rate": 8.873387502863015e-05, "loss": 0.5249, "step": 2756 }, { "epoch": 0.7716204869857263, "grad_norm": 0.22949069847397122, "learning_rate": 8.872411866023031e-05, "loss": 0.5563, "step": 2757 }, { "epoch": 0.7719003638399105, "grad_norm": 0.20382007322682827, "learning_rate": 8.871435860604717e-05, "loss": 0.5228, "step": 2758 }, { "epoch": 0.7721802406940946, "grad_norm": 0.21711045845545285, "learning_rate": 8.87045948670097e-05, "loss": 0.5237, "step": 2759 }, { "epoch": 0.7724601175482787, "grad_norm": 0.22083055732760784, "learning_rate": 8.869482744404724e-05, "loss": 0.5343, "step": 2760 }, { "epoch": 0.7727399944024629, "grad_norm": 0.22910669410226342, "learning_rate": 8.868505633808946e-05, "loss": 0.5212, "step": 2761 }, { "epoch": 0.7730198712566471, "grad_norm": 0.2149562405455874, "learning_rate": 8.867528155006633e-05, "loss": 0.535, "step": 2762 }, { "epoch": 0.7732997481108312, "grad_norm": 0.20511754519125563, "learning_rate": 8.866550308090828e-05, "loss": 0.5079, "step": 2763 }, { "epoch": 0.7735796249650154, "grad_norm": 0.23127912590994432, "learning_rate": 8.8655720931546e-05, "loss": 0.5455, "step": 2764 }, { "epoch": 0.7738595018191996, "grad_norm": 0.2267108511790525, "learning_rate": 8.864593510291055e-05, "loss": 0.5465, "step": 2765 }, { "epoch": 0.7741393786733837, "grad_norm": 0.2254690183852538, "learning_rate": 8.863614559593339e-05, "loss": 0.5399, "step": 2766 }, { "epoch": 0.7744192555275679, "grad_norm": 0.22791125506573726, "learning_rate": 8.862635241154625e-05, "loss": 0.5502, "step": 2767 }, { "epoch": 0.774699132381752, "grad_norm": 0.21637335970823557, "learning_rate": 8.861655555068127e-05, "loss": 0.5618, "step": 2768 }, { "epoch": 0.7749790092359362, "grad_norm": 0.21816634264789905, "learning_rate": 8.860675501427091e-05, "loss": 0.5314, "step": 2769 }, { "epoch": 0.7752588860901204, "grad_norm": 0.22643928807573033, "learning_rate": 8.859695080324801e-05, "loss": 0.5249, "step": 2770 }, { "epoch": 0.7755387629443045, "grad_norm": 0.22942199525901047, "learning_rate": 8.858714291854573e-05, "loss": 0.5652, "step": 2771 }, { "epoch": 0.7758186397984886, "grad_norm": 0.2169522453773073, "learning_rate": 8.857733136109758e-05, "loss": 0.5333, "step": 2772 }, { "epoch": 0.7760985166526728, "grad_norm": 0.2246009564360003, "learning_rate": 8.856751613183745e-05, "loss": 0.5364, "step": 2773 }, { "epoch": 0.776378393506857, "grad_norm": 0.23165635218522246, "learning_rate": 8.855769723169954e-05, "loss": 0.576, "step": 2774 }, { "epoch": 0.7766582703610412, "grad_norm": 0.22344252442325427, "learning_rate": 8.854787466161842e-05, "loss": 0.5607, "step": 2775 }, { "epoch": 0.7769381472152252, "grad_norm": 0.22061510174243557, "learning_rate": 8.853804842252903e-05, "loss": 0.5355, "step": 2776 }, { "epoch": 0.7772180240694094, "grad_norm": 0.21645191801589111, "learning_rate": 8.852821851536661e-05, "loss": 0.5365, "step": 2777 }, { "epoch": 0.7774979009235936, "grad_norm": 0.2175101734293596, "learning_rate": 8.851838494106678e-05, "loss": 0.5434, "step": 2778 }, { "epoch": 0.7777777777777778, "grad_norm": 0.21406000462718366, "learning_rate": 8.850854770056554e-05, "loss": 0.5381, "step": 2779 }, { "epoch": 0.778057654631962, "grad_norm": 0.22331999551569442, "learning_rate": 8.849870679479915e-05, "loss": 0.5199, "step": 2780 }, { "epoch": 0.7783375314861462, "grad_norm": 0.21943748138385877, "learning_rate": 8.84888622247043e-05, "loss": 0.5575, "step": 2781 }, { "epoch": 0.7786174083403302, "grad_norm": 0.20529141525084196, "learning_rate": 8.8479013991218e-05, "loss": 0.5208, "step": 2782 }, { "epoch": 0.7788972851945144, "grad_norm": 0.2262937696827364, "learning_rate": 8.846916209527763e-05, "loss": 0.559, "step": 2783 }, { "epoch": 0.7791771620486986, "grad_norm": 0.22596654737586358, "learning_rate": 8.845930653782086e-05, "loss": 0.5414, "step": 2784 }, { "epoch": 0.7794570389028828, "grad_norm": 0.2274190341430264, "learning_rate": 8.844944731978577e-05, "loss": 0.5374, "step": 2785 }, { "epoch": 0.7797369157570669, "grad_norm": 0.22649843895076985, "learning_rate": 8.843958444211075e-05, "loss": 0.5258, "step": 2786 }, { "epoch": 0.780016792611251, "grad_norm": 0.22725924280520562, "learning_rate": 8.84297179057346e-05, "loss": 0.5388, "step": 2787 }, { "epoch": 0.7802966694654352, "grad_norm": 0.22689835726130753, "learning_rate": 8.841984771159636e-05, "loss": 0.5034, "step": 2788 }, { "epoch": 0.7805765463196194, "grad_norm": 0.23329504821929226, "learning_rate": 8.840997386063553e-05, "loss": 0.5269, "step": 2789 }, { "epoch": 0.7808564231738035, "grad_norm": 0.216598053395184, "learning_rate": 8.840009635379187e-05, "loss": 0.5629, "step": 2790 }, { "epoch": 0.7811363000279877, "grad_norm": 0.22091397007640448, "learning_rate": 8.839021519200557e-05, "loss": 0.5364, "step": 2791 }, { "epoch": 0.7814161768821718, "grad_norm": 0.22454445877781262, "learning_rate": 8.838033037621708e-05, "loss": 0.5398, "step": 2792 }, { "epoch": 0.781696053736356, "grad_norm": 0.22364940410539516, "learning_rate": 8.837044190736726e-05, "loss": 0.5298, "step": 2793 }, { "epoch": 0.7819759305905402, "grad_norm": 0.20652769204510682, "learning_rate": 8.836054978639732e-05, "loss": 0.5264, "step": 2794 }, { "epoch": 0.7822558074447243, "grad_norm": 0.21276771356881036, "learning_rate": 8.835065401424877e-05, "loss": 0.515, "step": 2795 }, { "epoch": 0.7825356842989085, "grad_norm": 0.21361040126559733, "learning_rate": 8.83407545918635e-05, "loss": 0.535, "step": 2796 }, { "epoch": 0.7828155611530926, "grad_norm": 0.23286320904578853, "learning_rate": 8.833085152018375e-05, "loss": 0.5716, "step": 2797 }, { "epoch": 0.7830954380072768, "grad_norm": 0.22105089026834285, "learning_rate": 8.83209448001521e-05, "loss": 0.5376, "step": 2798 }, { "epoch": 0.783375314861461, "grad_norm": 0.2117441316678202, "learning_rate": 8.83110344327115e-05, "loss": 0.492, "step": 2799 }, { "epoch": 0.7836551917156451, "grad_norm": 0.22413333754161954, "learning_rate": 8.830112041880516e-05, "loss": 0.5259, "step": 2800 }, { "epoch": 0.7839350685698293, "grad_norm": 0.22303533904875358, "learning_rate": 8.829120275937675e-05, "loss": 0.5289, "step": 2801 }, { "epoch": 0.7842149454240135, "grad_norm": 0.23224013663200205, "learning_rate": 8.828128145537024e-05, "loss": 0.5378, "step": 2802 }, { "epoch": 0.7844948222781976, "grad_norm": 0.21037489494798792, "learning_rate": 8.827135650772994e-05, "loss": 0.5339, "step": 2803 }, { "epoch": 0.7847746991323817, "grad_norm": 0.22897771454277885, "learning_rate": 8.826142791740049e-05, "loss": 0.5531, "step": 2804 }, { "epoch": 0.7850545759865659, "grad_norm": 0.219861900135347, "learning_rate": 8.825149568532691e-05, "loss": 0.5298, "step": 2805 }, { "epoch": 0.7853344528407501, "grad_norm": 0.23432080612454642, "learning_rate": 8.824155981245457e-05, "loss": 0.5352, "step": 2806 }, { "epoch": 0.7856143296949343, "grad_norm": 0.21523092077944095, "learning_rate": 8.823162029972917e-05, "loss": 0.5283, "step": 2807 }, { "epoch": 0.7858942065491183, "grad_norm": 0.2201121770858865, "learning_rate": 8.822167714809673e-05, "loss": 0.5473, "step": 2808 }, { "epoch": 0.7861740834033025, "grad_norm": 0.22845228302948312, "learning_rate": 8.821173035850368e-05, "loss": 0.5447, "step": 2809 }, { "epoch": 0.7864539602574867, "grad_norm": 0.2177072634790322, "learning_rate": 8.820177993189675e-05, "loss": 0.5308, "step": 2810 }, { "epoch": 0.7867338371116709, "grad_norm": 0.21641334292617864, "learning_rate": 8.819182586922302e-05, "loss": 0.5322, "step": 2811 }, { "epoch": 0.7870137139658551, "grad_norm": 0.221026995060422, "learning_rate": 8.818186817142993e-05, "loss": 0.5378, "step": 2812 }, { "epoch": 0.7872935908200391, "grad_norm": 0.22261066108655456, "learning_rate": 8.817190683946526e-05, "loss": 0.5218, "step": 2813 }, { "epoch": 0.7875734676742233, "grad_norm": 0.2180297967634272, "learning_rate": 8.816194187427711e-05, "loss": 0.5211, "step": 2814 }, { "epoch": 0.7878533445284075, "grad_norm": 0.2219325714430237, "learning_rate": 8.815197327681399e-05, "loss": 0.5303, "step": 2815 }, { "epoch": 0.7881332213825917, "grad_norm": 0.21901054380779303, "learning_rate": 8.814200104802469e-05, "loss": 0.5541, "step": 2816 }, { "epoch": 0.7884130982367759, "grad_norm": 0.2182971480723001, "learning_rate": 8.81320251888584e-05, "loss": 0.5184, "step": 2817 }, { "epoch": 0.7886929750909599, "grad_norm": 0.2234946616304098, "learning_rate": 8.812204570026458e-05, "loss": 0.5167, "step": 2818 }, { "epoch": 0.7889728519451441, "grad_norm": 0.22267377011421308, "learning_rate": 8.811206258319313e-05, "loss": 0.5507, "step": 2819 }, { "epoch": 0.7892527287993283, "grad_norm": 0.2247193866649521, "learning_rate": 8.810207583859423e-05, "loss": 0.5305, "step": 2820 }, { "epoch": 0.7895326056535125, "grad_norm": 0.2233578217040272, "learning_rate": 8.809208546741842e-05, "loss": 0.551, "step": 2821 }, { "epoch": 0.7898124825076966, "grad_norm": 0.21971966516217226, "learning_rate": 8.808209147061659e-05, "loss": 0.564, "step": 2822 }, { "epoch": 0.7900923593618808, "grad_norm": 0.25814484841942725, "learning_rate": 8.807209384913999e-05, "loss": 0.5492, "step": 2823 }, { "epoch": 0.7903722362160649, "grad_norm": 0.22185428438583155, "learning_rate": 8.806209260394018e-05, "loss": 0.5468, "step": 2824 }, { "epoch": 0.7906521130702491, "grad_norm": 0.2233594052708055, "learning_rate": 8.80520877359691e-05, "loss": 0.5625, "step": 2825 }, { "epoch": 0.7909319899244333, "grad_norm": 0.2256990651820279, "learning_rate": 8.8042079246179e-05, "loss": 0.5407, "step": 2826 }, { "epoch": 0.7912118667786174, "grad_norm": 0.22760471565206933, "learning_rate": 8.803206713552252e-05, "loss": 0.5411, "step": 2827 }, { "epoch": 0.7914917436328016, "grad_norm": 0.21692049590750304, "learning_rate": 8.802205140495259e-05, "loss": 0.5204, "step": 2828 }, { "epoch": 0.7917716204869857, "grad_norm": 0.21457081307008663, "learning_rate": 8.801203205542252e-05, "loss": 0.5508, "step": 2829 }, { "epoch": 0.7920514973411699, "grad_norm": 0.22345148575294949, "learning_rate": 8.800200908788598e-05, "loss": 0.5435, "step": 2830 }, { "epoch": 0.792331374195354, "grad_norm": 0.2101856819329527, "learning_rate": 8.799198250329692e-05, "loss": 0.5258, "step": 2831 }, { "epoch": 0.7926112510495382, "grad_norm": 0.21800264042889614, "learning_rate": 8.798195230260973e-05, "loss": 0.5197, "step": 2832 }, { "epoch": 0.7928911279037224, "grad_norm": 0.21075015346930503, "learning_rate": 8.797191848677904e-05, "loss": 0.5282, "step": 2833 }, { "epoch": 0.7931710047579065, "grad_norm": 0.24180470598921924, "learning_rate": 8.79618810567599e-05, "loss": 0.5311, "step": 2834 }, { "epoch": 0.7934508816120907, "grad_norm": 0.21314976378368844, "learning_rate": 8.795184001350767e-05, "loss": 0.5351, "step": 2835 }, { "epoch": 0.7937307584662748, "grad_norm": 0.2262732327054876, "learning_rate": 8.794179535797806e-05, "loss": 0.5557, "step": 2836 }, { "epoch": 0.794010635320459, "grad_norm": 0.2109717137303336, "learning_rate": 8.793174709112714e-05, "loss": 0.5205, "step": 2837 }, { "epoch": 0.7942905121746432, "grad_norm": 0.22044485605307892, "learning_rate": 8.792169521391132e-05, "loss": 0.5456, "step": 2838 }, { "epoch": 0.7945703890288273, "grad_norm": 0.23439227561476458, "learning_rate": 8.791163972728729e-05, "loss": 0.5322, "step": 2839 }, { "epoch": 0.7948502658830114, "grad_norm": 0.22338269897225688, "learning_rate": 8.790158063221219e-05, "loss": 0.5499, "step": 2840 }, { "epoch": 0.7951301427371956, "grad_norm": 0.2232145969653595, "learning_rate": 8.789151792964342e-05, "loss": 0.5331, "step": 2841 }, { "epoch": 0.7954100195913798, "grad_norm": 0.2185630854730285, "learning_rate": 8.788145162053877e-05, "loss": 0.53, "step": 2842 }, { "epoch": 0.795689896445564, "grad_norm": 0.20638335673994054, "learning_rate": 8.787138170585635e-05, "loss": 0.5578, "step": 2843 }, { "epoch": 0.7959697732997482, "grad_norm": 0.21157362620188566, "learning_rate": 8.786130818655462e-05, "loss": 0.523, "step": 2844 }, { "epoch": 0.7962496501539322, "grad_norm": 0.2146520898061125, "learning_rate": 8.785123106359238e-05, "loss": 0.5405, "step": 2845 }, { "epoch": 0.7965295270081164, "grad_norm": 0.21887109159244958, "learning_rate": 8.78411503379288e-05, "loss": 0.5465, "step": 2846 }, { "epoch": 0.7968094038623006, "grad_norm": 0.21665261854698148, "learning_rate": 8.783106601052333e-05, "loss": 0.545, "step": 2847 }, { "epoch": 0.7970892807164848, "grad_norm": 0.21475423505975183, "learning_rate": 8.782097808233584e-05, "loss": 0.5137, "step": 2848 }, { "epoch": 0.797369157570669, "grad_norm": 0.21469560760181244, "learning_rate": 8.781088655432648e-05, "loss": 0.518, "step": 2849 }, { "epoch": 0.797649034424853, "grad_norm": 0.21697308410113145, "learning_rate": 8.780079142745576e-05, "loss": 0.5152, "step": 2850 }, { "epoch": 0.7979289112790372, "grad_norm": 0.22059316431053475, "learning_rate": 8.779069270268456e-05, "loss": 0.5509, "step": 2851 }, { "epoch": 0.7982087881332214, "grad_norm": 0.21439874759153993, "learning_rate": 8.778059038097409e-05, "loss": 0.5217, "step": 2852 }, { "epoch": 0.7984886649874056, "grad_norm": 0.24195536709810261, "learning_rate": 8.777048446328588e-05, "loss": 0.4948, "step": 2853 }, { "epoch": 0.7987685418415897, "grad_norm": 0.22542568973369742, "learning_rate": 8.77603749505818e-05, "loss": 0.5508, "step": 2854 }, { "epoch": 0.7990484186957738, "grad_norm": 0.2340567086340302, "learning_rate": 8.775026184382411e-05, "loss": 0.5454, "step": 2855 }, { "epoch": 0.799328295549958, "grad_norm": 0.2578561350080951, "learning_rate": 8.774014514397537e-05, "loss": 0.5566, "step": 2856 }, { "epoch": 0.7996081724041422, "grad_norm": 0.22763271026186724, "learning_rate": 8.773002485199849e-05, "loss": 0.5322, "step": 2857 }, { "epoch": 0.7998880492583264, "grad_norm": 0.22724074095648678, "learning_rate": 8.771990096885672e-05, "loss": 0.5228, "step": 2858 }, { "epoch": 0.8001679261125105, "grad_norm": 0.23037448798055957, "learning_rate": 8.770977349551366e-05, "loss": 0.5394, "step": 2859 }, { "epoch": 0.8004478029666946, "grad_norm": 0.21513282819816093, "learning_rate": 8.769964243293326e-05, "loss": 0.5252, "step": 2860 }, { "epoch": 0.8007276798208788, "grad_norm": 0.2191177699463853, "learning_rate": 8.768950778207978e-05, "loss": 0.5447, "step": 2861 }, { "epoch": 0.801007556675063, "grad_norm": 0.20963860250391247, "learning_rate": 8.767936954391787e-05, "loss": 0.5324, "step": 2862 }, { "epoch": 0.8012874335292471, "grad_norm": 0.22403841844799766, "learning_rate": 8.766922771941246e-05, "loss": 0.5572, "step": 2863 }, { "epoch": 0.8015673103834313, "grad_norm": 0.23077398062211876, "learning_rate": 8.765908230952888e-05, "loss": 0.5569, "step": 2864 }, { "epoch": 0.8018471872376155, "grad_norm": 0.23387319310509821, "learning_rate": 8.764893331523278e-05, "loss": 0.5166, "step": 2865 }, { "epoch": 0.8021270640917996, "grad_norm": 0.2221398047613915, "learning_rate": 8.763878073749012e-05, "loss": 0.5355, "step": 2866 }, { "epoch": 0.8024069409459837, "grad_norm": 0.21527982155363673, "learning_rate": 8.762862457726723e-05, "loss": 0.5259, "step": 2867 }, { "epoch": 0.8026868178001679, "grad_norm": 0.2273342193569453, "learning_rate": 8.761846483553083e-05, "loss": 0.5218, "step": 2868 }, { "epoch": 0.8029666946543521, "grad_norm": 0.22705463249266675, "learning_rate": 8.760830151324786e-05, "loss": 0.5466, "step": 2869 }, { "epoch": 0.8032465715085363, "grad_norm": 0.21478700010646237, "learning_rate": 8.75981346113857e-05, "loss": 0.525, "step": 2870 }, { "epoch": 0.8035264483627204, "grad_norm": 0.2303179163497238, "learning_rate": 8.758796413091204e-05, "loss": 0.5581, "step": 2871 }, { "epoch": 0.8038063252169045, "grad_norm": 0.21606750681499284, "learning_rate": 8.757779007279493e-05, "loss": 0.5031, "step": 2872 }, { "epoch": 0.8040862020710887, "grad_norm": 0.2218226313286871, "learning_rate": 8.756761243800273e-05, "loss": 0.5111, "step": 2873 }, { "epoch": 0.8043660789252729, "grad_norm": 0.2285726570299832, "learning_rate": 8.755743122750415e-05, "loss": 0.5335, "step": 2874 }, { "epoch": 0.8046459557794571, "grad_norm": 0.22297193493897877, "learning_rate": 8.754724644226824e-05, "loss": 0.518, "step": 2875 }, { "epoch": 0.8049258326336411, "grad_norm": 0.21257692593885053, "learning_rate": 8.753705808326438e-05, "loss": 0.5354, "step": 2876 }, { "epoch": 0.8052057094878253, "grad_norm": 0.22159427074143495, "learning_rate": 8.752686615146234e-05, "loss": 0.5499, "step": 2877 }, { "epoch": 0.8054855863420095, "grad_norm": 0.20808489492074214, "learning_rate": 8.751667064783218e-05, "loss": 0.5011, "step": 2878 }, { "epoch": 0.8057654631961937, "grad_norm": 0.2321803174350652, "learning_rate": 8.75064715733443e-05, "loss": 0.5172, "step": 2879 }, { "epoch": 0.8060453400503779, "grad_norm": 0.21486242311207676, "learning_rate": 8.749626892896944e-05, "loss": 0.5331, "step": 2880 }, { "epoch": 0.806325216904562, "grad_norm": 0.2618784125152297, "learning_rate": 8.748606271567876e-05, "loss": 0.5367, "step": 2881 }, { "epoch": 0.8066050937587461, "grad_norm": 0.22166037267218722, "learning_rate": 8.747585293444363e-05, "loss": 0.5285, "step": 2882 }, { "epoch": 0.8068849706129303, "grad_norm": 0.223680190345337, "learning_rate": 8.746563958623584e-05, "loss": 0.5709, "step": 2883 }, { "epoch": 0.8071648474671145, "grad_norm": 0.23106289340452013, "learning_rate": 8.745542267202749e-05, "loss": 0.5546, "step": 2884 }, { "epoch": 0.8074447243212987, "grad_norm": 0.2257407444251882, "learning_rate": 8.744520219279106e-05, "loss": 0.5279, "step": 2885 }, { "epoch": 0.8077246011754828, "grad_norm": 0.22253837697732912, "learning_rate": 8.743497814949932e-05, "loss": 0.5435, "step": 2886 }, { "epoch": 0.8080044780296669, "grad_norm": 0.22287026196738555, "learning_rate": 8.742475054312542e-05, "loss": 0.5546, "step": 2887 }, { "epoch": 0.8082843548838511, "grad_norm": 0.22115599643611797, "learning_rate": 8.74145193746428e-05, "loss": 0.5405, "step": 2888 }, { "epoch": 0.8085642317380353, "grad_norm": 0.21972420074671384, "learning_rate": 8.740428464502529e-05, "loss": 0.5306, "step": 2889 }, { "epoch": 0.8088441085922194, "grad_norm": 0.2181093091866074, "learning_rate": 8.739404635524704e-05, "loss": 0.5124, "step": 2890 }, { "epoch": 0.8091239854464036, "grad_norm": 0.22584053887315925, "learning_rate": 8.738380450628252e-05, "loss": 0.5315, "step": 2891 }, { "epoch": 0.8094038623005877, "grad_norm": 0.22101277900065378, "learning_rate": 8.737355909910656e-05, "loss": 0.5343, "step": 2892 }, { "epoch": 0.8096837391547719, "grad_norm": 0.21226432343971377, "learning_rate": 8.736331013469432e-05, "loss": 0.5179, "step": 2893 }, { "epoch": 0.809963616008956, "grad_norm": 0.2281987608223546, "learning_rate": 8.735305761402132e-05, "loss": 0.5395, "step": 2894 }, { "epoch": 0.8102434928631402, "grad_norm": 0.2330515088733716, "learning_rate": 8.734280153806338e-05, "loss": 0.5472, "step": 2895 }, { "epoch": 0.8105233697173244, "grad_norm": 0.2303513278523418, "learning_rate": 8.73325419077967e-05, "loss": 0.5476, "step": 2896 }, { "epoch": 0.8108032465715085, "grad_norm": 0.22998588585301685, "learning_rate": 8.732227872419778e-05, "loss": 0.5233, "step": 2897 }, { "epoch": 0.8110831234256927, "grad_norm": 0.2196595321225993, "learning_rate": 8.731201198824347e-05, "loss": 0.5485, "step": 2898 }, { "epoch": 0.8113630002798768, "grad_norm": 0.21344379675029646, "learning_rate": 8.730174170091099e-05, "loss": 0.5185, "step": 2899 }, { "epoch": 0.811642877134061, "grad_norm": 0.21515576014505106, "learning_rate": 8.729146786317786e-05, "loss": 0.5471, "step": 2900 }, { "epoch": 0.8119227539882452, "grad_norm": 0.22280304383786542, "learning_rate": 8.728119047602192e-05, "loss": 0.5501, "step": 2901 }, { "epoch": 0.8122026308424294, "grad_norm": 0.21889943027225472, "learning_rate": 8.727090954042142e-05, "loss": 0.5373, "step": 2902 }, { "epoch": 0.8124825076966135, "grad_norm": 0.21020812287582277, "learning_rate": 8.726062505735489e-05, "loss": 0.5387, "step": 2903 }, { "epoch": 0.8127623845507976, "grad_norm": 0.2175228949728657, "learning_rate": 8.725033702780121e-05, "loss": 0.4943, "step": 2904 }, { "epoch": 0.8130422614049818, "grad_norm": 0.22803427588451333, "learning_rate": 8.72400454527396e-05, "loss": 0.5579, "step": 2905 }, { "epoch": 0.813322138259166, "grad_norm": 0.22717917846053645, "learning_rate": 8.722975033314962e-05, "loss": 0.5445, "step": 2906 }, { "epoch": 0.8136020151133502, "grad_norm": 0.21615324377825143, "learning_rate": 8.721945167001116e-05, "loss": 0.5425, "step": 2907 }, { "epoch": 0.8138818919675342, "grad_norm": 0.21621814365605796, "learning_rate": 8.720914946430446e-05, "loss": 0.504, "step": 2908 }, { "epoch": 0.8141617688217184, "grad_norm": 0.21309418385195072, "learning_rate": 8.71988437170101e-05, "loss": 0.5451, "step": 2909 }, { "epoch": 0.8144416456759026, "grad_norm": 0.22841599010793856, "learning_rate": 8.718853442910896e-05, "loss": 0.5242, "step": 2910 }, { "epoch": 0.8147215225300868, "grad_norm": 0.2168662958041549, "learning_rate": 8.71782216015823e-05, "loss": 0.5267, "step": 2911 }, { "epoch": 0.815001399384271, "grad_norm": 0.23129303357103495, "learning_rate": 8.71679052354117e-05, "loss": 0.5458, "step": 2912 }, { "epoch": 0.815281276238455, "grad_norm": 0.22375846753309966, "learning_rate": 8.715758533157907e-05, "loss": 0.5418, "step": 2913 }, { "epoch": 0.8155611530926392, "grad_norm": 0.21948924723541993, "learning_rate": 8.714726189106668e-05, "loss": 0.5321, "step": 2914 }, { "epoch": 0.8158410299468234, "grad_norm": 0.21981769355357852, "learning_rate": 8.713693491485711e-05, "loss": 0.5161, "step": 2915 }, { "epoch": 0.8161209068010076, "grad_norm": 0.2213284784587669, "learning_rate": 8.712660440393328e-05, "loss": 0.5443, "step": 2916 }, { "epoch": 0.8164007836551918, "grad_norm": 0.2221214987354716, "learning_rate": 8.711627035927847e-05, "loss": 0.5322, "step": 2917 }, { "epoch": 0.8166806605093758, "grad_norm": 0.2183012593576887, "learning_rate": 8.710593278187626e-05, "loss": 0.5099, "step": 2918 }, { "epoch": 0.81696053736356, "grad_norm": 0.2229608818399287, "learning_rate": 8.709559167271061e-05, "loss": 0.5373, "step": 2919 }, { "epoch": 0.8172404142177442, "grad_norm": 0.22985599509228125, "learning_rate": 8.708524703276578e-05, "loss": 0.56, "step": 2920 }, { "epoch": 0.8175202910719284, "grad_norm": 0.20196807128492697, "learning_rate": 8.707489886302638e-05, "loss": 0.5317, "step": 2921 }, { "epoch": 0.8178001679261125, "grad_norm": 0.2076045300988094, "learning_rate": 8.706454716447736e-05, "loss": 0.5487, "step": 2922 }, { "epoch": 0.8180800447802967, "grad_norm": 0.2086327737778983, "learning_rate": 8.705419193810398e-05, "loss": 0.5003, "step": 2923 }, { "epoch": 0.8183599216344808, "grad_norm": 0.21564421210403176, "learning_rate": 8.704383318489189e-05, "loss": 0.5165, "step": 2924 }, { "epoch": 0.818639798488665, "grad_norm": 0.21993509376897255, "learning_rate": 8.7033470905827e-05, "loss": 0.5482, "step": 2925 }, { "epoch": 0.8189196753428492, "grad_norm": 0.22198759878988114, "learning_rate": 8.702310510189563e-05, "loss": 0.5562, "step": 2926 }, { "epoch": 0.8191995521970333, "grad_norm": 0.22435205545335035, "learning_rate": 8.701273577408441e-05, "loss": 0.5525, "step": 2927 }, { "epoch": 0.8194794290512175, "grad_norm": 0.2127898962581027, "learning_rate": 8.700236292338026e-05, "loss": 0.5326, "step": 2928 }, { "epoch": 0.8197593059054016, "grad_norm": 0.22537423412478533, "learning_rate": 8.699198655077053e-05, "loss": 0.5474, "step": 2929 }, { "epoch": 0.8200391827595858, "grad_norm": 0.21328517915897816, "learning_rate": 8.698160665724278e-05, "loss": 0.553, "step": 2930 }, { "epoch": 0.8203190596137699, "grad_norm": 0.22641606256105556, "learning_rate": 8.697122324378502e-05, "loss": 0.5399, "step": 2931 }, { "epoch": 0.8205989364679541, "grad_norm": 0.21823819490837804, "learning_rate": 8.696083631138553e-05, "loss": 0.5256, "step": 2932 }, { "epoch": 0.8208788133221383, "grad_norm": 0.21836411962141716, "learning_rate": 8.695044586103296e-05, "loss": 0.5137, "step": 2933 }, { "epoch": 0.8211586901763224, "grad_norm": 0.2255160147723362, "learning_rate": 8.694005189371627e-05, "loss": 0.5363, "step": 2934 }, { "epoch": 0.8214385670305066, "grad_norm": 0.21757007853250637, "learning_rate": 8.692965441042477e-05, "loss": 0.5559, "step": 2935 }, { "epoch": 0.8217184438846907, "grad_norm": 0.2185395628240894, "learning_rate": 8.691925341214808e-05, "loss": 0.5106, "step": 2936 }, { "epoch": 0.8219983207388749, "grad_norm": 0.2104799722351941, "learning_rate": 8.690884889987619e-05, "loss": 0.5412, "step": 2937 }, { "epoch": 0.8222781975930591, "grad_norm": 0.22737278191374266, "learning_rate": 8.689844087459941e-05, "loss": 0.5421, "step": 2938 }, { "epoch": 0.8225580744472432, "grad_norm": 0.21825281303841354, "learning_rate": 8.688802933730838e-05, "loss": 0.5204, "step": 2939 }, { "epoch": 0.8228379513014273, "grad_norm": 0.22366010628868704, "learning_rate": 8.687761428899406e-05, "loss": 0.5297, "step": 2940 }, { "epoch": 0.8231178281556115, "grad_norm": 0.2211469477225885, "learning_rate": 8.686719573064776e-05, "loss": 0.5452, "step": 2941 }, { "epoch": 0.8233977050097957, "grad_norm": 0.22025487840882585, "learning_rate": 8.685677366326115e-05, "loss": 0.5747, "step": 2942 }, { "epoch": 0.8236775818639799, "grad_norm": 0.2156221179343058, "learning_rate": 8.68463480878262e-05, "loss": 0.5387, "step": 2943 }, { "epoch": 0.8239574587181641, "grad_norm": 0.2180794735528772, "learning_rate": 8.683591900533519e-05, "loss": 0.5366, "step": 2944 }, { "epoch": 0.8242373355723481, "grad_norm": 0.22272214971857127, "learning_rate": 8.68254864167808e-05, "loss": 0.533, "step": 2945 }, { "epoch": 0.8245172124265323, "grad_norm": 0.21944632741313155, "learning_rate": 8.681505032315602e-05, "loss": 0.5588, "step": 2946 }, { "epoch": 0.8247970892807165, "grad_norm": 0.2230834224586324, "learning_rate": 8.680461072545411e-05, "loss": 0.5553, "step": 2947 }, { "epoch": 0.8250769661349007, "grad_norm": 0.21119652852437684, "learning_rate": 8.67941676246688e-05, "loss": 0.5556, "step": 2948 }, { "epoch": 0.8253568429890848, "grad_norm": 0.21568690682339875, "learning_rate": 8.678372102179397e-05, "loss": 0.5507, "step": 2949 }, { "epoch": 0.8256367198432689, "grad_norm": 0.22047401512478768, "learning_rate": 8.677327091782401e-05, "loss": 0.5194, "step": 2950 }, { "epoch": 0.8259165966974531, "grad_norm": 0.2203767571422379, "learning_rate": 8.676281731375353e-05, "loss": 0.5107, "step": 2951 }, { "epoch": 0.8261964735516373, "grad_norm": 0.21433731401494893, "learning_rate": 8.675236021057754e-05, "loss": 0.5209, "step": 2952 }, { "epoch": 0.8264763504058215, "grad_norm": 0.21481760769970837, "learning_rate": 8.674189960929132e-05, "loss": 0.5301, "step": 2953 }, { "epoch": 0.8267562272600056, "grad_norm": 0.21487242001534035, "learning_rate": 8.673143551089054e-05, "loss": 0.5322, "step": 2954 }, { "epoch": 0.8270361041141897, "grad_norm": 0.22973971830681553, "learning_rate": 8.672096791637115e-05, "loss": 0.5298, "step": 2955 }, { "epoch": 0.8273159809683739, "grad_norm": 0.22644056273751867, "learning_rate": 8.67104968267295e-05, "loss": 0.5325, "step": 2956 }, { "epoch": 0.8275958578225581, "grad_norm": 0.21955045194299588, "learning_rate": 8.670002224296221e-05, "loss": 0.5431, "step": 2957 }, { "epoch": 0.8278757346767422, "grad_norm": 0.23781902841644872, "learning_rate": 8.668954416606625e-05, "loss": 0.5121, "step": 2958 }, { "epoch": 0.8281556115309264, "grad_norm": 0.22609994299163855, "learning_rate": 8.667906259703895e-05, "loss": 0.5512, "step": 2959 }, { "epoch": 0.8284354883851105, "grad_norm": 0.21714338783466877, "learning_rate": 8.666857753687793e-05, "loss": 0.526, "step": 2960 }, { "epoch": 0.8287153652392947, "grad_norm": 0.2169107567494965, "learning_rate": 8.665808898658119e-05, "loss": 0.5501, "step": 2961 }, { "epoch": 0.8289952420934789, "grad_norm": 0.225957160407284, "learning_rate": 8.664759694714701e-05, "loss": 0.5392, "step": 2962 }, { "epoch": 0.829275118947663, "grad_norm": 0.21152421887093917, "learning_rate": 8.663710141957405e-05, "loss": 0.5422, "step": 2963 }, { "epoch": 0.8295549958018472, "grad_norm": 0.2235772061757988, "learning_rate": 8.662660240486127e-05, "loss": 0.5147, "step": 2964 }, { "epoch": 0.8298348726560314, "grad_norm": 0.2605627305586067, "learning_rate": 8.661609990400796e-05, "loss": 0.5369, "step": 2965 }, { "epoch": 0.8301147495102155, "grad_norm": 0.2181372860032632, "learning_rate": 8.660559391801377e-05, "loss": 0.5435, "step": 2966 }, { "epoch": 0.8303946263643996, "grad_norm": 0.21369808087418557, "learning_rate": 8.659508444787867e-05, "loss": 0.5299, "step": 2967 }, { "epoch": 0.8306745032185838, "grad_norm": 0.22535358055775553, "learning_rate": 8.658457149460295e-05, "loss": 0.5104, "step": 2968 }, { "epoch": 0.830954380072768, "grad_norm": 0.21689791959655993, "learning_rate": 8.657405505918723e-05, "loss": 0.5261, "step": 2969 }, { "epoch": 0.8312342569269522, "grad_norm": 0.2186281600265367, "learning_rate": 8.65635351426325e-05, "loss": 0.5095, "step": 2970 }, { "epoch": 0.8315141337811363, "grad_norm": 0.21441682748376117, "learning_rate": 8.655301174593999e-05, "loss": 0.5299, "step": 2971 }, { "epoch": 0.8317940106353204, "grad_norm": 0.21736371444094799, "learning_rate": 8.654248487011139e-05, "loss": 0.5112, "step": 2972 }, { "epoch": 0.8320738874895046, "grad_norm": 0.21915618735838882, "learning_rate": 8.653195451614862e-05, "loss": 0.5353, "step": 2973 }, { "epoch": 0.8323537643436888, "grad_norm": 0.22543908758416845, "learning_rate": 8.652142068505398e-05, "loss": 0.5525, "step": 2974 }, { "epoch": 0.832633641197873, "grad_norm": 0.23538984408900665, "learning_rate": 8.651088337783007e-05, "loss": 0.5394, "step": 2975 }, { "epoch": 0.832913518052057, "grad_norm": 0.2186494700659604, "learning_rate": 8.650034259547983e-05, "loss": 0.5284, "step": 2976 }, { "epoch": 0.8331933949062412, "grad_norm": 0.22811388308632696, "learning_rate": 8.648979833900656e-05, "loss": 0.5362, "step": 2977 }, { "epoch": 0.8334732717604254, "grad_norm": 0.21611044864898632, "learning_rate": 8.647925060941386e-05, "loss": 0.5234, "step": 2978 }, { "epoch": 0.8337531486146096, "grad_norm": 0.2087566925854075, "learning_rate": 8.646869940770567e-05, "loss": 0.5158, "step": 2979 }, { "epoch": 0.8340330254687938, "grad_norm": 0.22616478661119407, "learning_rate": 8.645814473488627e-05, "loss": 0.514, "step": 2980 }, { "epoch": 0.834312902322978, "grad_norm": 0.2155972000331264, "learning_rate": 8.644758659196023e-05, "loss": 0.5197, "step": 2981 }, { "epoch": 0.834592779177162, "grad_norm": 0.2142662820181751, "learning_rate": 8.643702497993251e-05, "loss": 0.5418, "step": 2982 }, { "epoch": 0.8348726560313462, "grad_norm": 0.22435896598531738, "learning_rate": 8.642645989980836e-05, "loss": 0.5373, "step": 2983 }, { "epoch": 0.8351525328855304, "grad_norm": 0.20422818906927728, "learning_rate": 8.641589135259336e-05, "loss": 0.5166, "step": 2984 }, { "epoch": 0.8354324097397146, "grad_norm": 0.2178930729884611, "learning_rate": 8.640531933929344e-05, "loss": 0.5199, "step": 2985 }, { "epoch": 0.8357122865938987, "grad_norm": 0.20772686523058428, "learning_rate": 8.639474386091485e-05, "loss": 0.5138, "step": 2986 }, { "epoch": 0.8359921634480828, "grad_norm": 0.21310742868775848, "learning_rate": 8.638416491846417e-05, "loss": 0.5158, "step": 2987 }, { "epoch": 0.836272040302267, "grad_norm": 0.21822248030969937, "learning_rate": 8.63735825129483e-05, "loss": 0.5274, "step": 2988 }, { "epoch": 0.8365519171564512, "grad_norm": 0.21565497743306078, "learning_rate": 8.636299664537449e-05, "loss": 0.5521, "step": 2989 }, { "epoch": 0.8368317940106353, "grad_norm": 0.22967598513625365, "learning_rate": 8.63524073167503e-05, "loss": 0.5351, "step": 2990 }, { "epoch": 0.8371116708648195, "grad_norm": 0.22272323520405976, "learning_rate": 8.634181452808366e-05, "loss": 0.5286, "step": 2991 }, { "epoch": 0.8373915477190036, "grad_norm": 0.2122248361469332, "learning_rate": 8.633121828038275e-05, "loss": 0.5204, "step": 2992 }, { "epoch": 0.8376714245731878, "grad_norm": 0.2224232128088561, "learning_rate": 8.632061857465614e-05, "loss": 0.5373, "step": 2993 }, { "epoch": 0.837951301427372, "grad_norm": 0.22770490944769647, "learning_rate": 8.631001541191275e-05, "loss": 0.5305, "step": 2994 }, { "epoch": 0.8382311782815561, "grad_norm": 0.22115756576959772, "learning_rate": 8.629940879316175e-05, "loss": 0.5416, "step": 2995 }, { "epoch": 0.8385110551357403, "grad_norm": 0.2137085524719575, "learning_rate": 8.628879871941271e-05, "loss": 0.5207, "step": 2996 }, { "epoch": 0.8387909319899244, "grad_norm": 0.2647897668434767, "learning_rate": 8.62781851916755e-05, "loss": 0.5319, "step": 2997 }, { "epoch": 0.8390708088441086, "grad_norm": 0.22114974994858042, "learning_rate": 8.62675682109603e-05, "loss": 0.5624, "step": 2998 }, { "epoch": 0.8393506856982927, "grad_norm": 0.21597740903387966, "learning_rate": 8.625694777827768e-05, "loss": 0.5377, "step": 2999 }, { "epoch": 0.8396305625524769, "grad_norm": 0.23160460862914972, "learning_rate": 8.624632389463847e-05, "loss": 0.5217, "step": 3000 }, { "epoch": 0.8399104394066611, "grad_norm": 0.22198800038480565, "learning_rate": 8.623569656105386e-05, "loss": 0.5398, "step": 3001 }, { "epoch": 0.8401903162608453, "grad_norm": 0.22880709948052833, "learning_rate": 8.622506577853538e-05, "loss": 0.5477, "step": 3002 }, { "epoch": 0.8404701931150294, "grad_norm": 0.22341643822669172, "learning_rate": 8.621443154809484e-05, "loss": 0.5247, "step": 3003 }, { "epoch": 0.8407500699692135, "grad_norm": 0.21597317363433163, "learning_rate": 8.620379387074445e-05, "loss": 0.5344, "step": 3004 }, { "epoch": 0.8410299468233977, "grad_norm": 0.2192461822601934, "learning_rate": 8.619315274749669e-05, "loss": 0.5304, "step": 3005 }, { "epoch": 0.8413098236775819, "grad_norm": 0.21837947530897744, "learning_rate": 8.61825081793644e-05, "loss": 0.5271, "step": 3006 }, { "epoch": 0.8415897005317661, "grad_norm": 0.23056929005798127, "learning_rate": 8.617186016736073e-05, "loss": 0.5309, "step": 3007 }, { "epoch": 0.8418695773859501, "grad_norm": 0.48552992745090096, "learning_rate": 8.616120871249915e-05, "loss": 0.534, "step": 3008 }, { "epoch": 0.8421494542401343, "grad_norm": 0.22014967001203464, "learning_rate": 8.615055381579351e-05, "loss": 0.5079, "step": 3009 }, { "epoch": 0.8424293310943185, "grad_norm": 0.22228896792519673, "learning_rate": 8.613989547825791e-05, "loss": 0.554, "step": 3010 }, { "epoch": 0.8427092079485027, "grad_norm": 0.3271222337534686, "learning_rate": 8.612923370090681e-05, "loss": 0.5256, "step": 3011 }, { "epoch": 0.8429890848026869, "grad_norm": 0.23895832466196784, "learning_rate": 8.611856848475506e-05, "loss": 0.54, "step": 3012 }, { "epoch": 0.8432689616568709, "grad_norm": 0.23813344642390777, "learning_rate": 8.610789983081773e-05, "loss": 0.5428, "step": 3013 }, { "epoch": 0.8435488385110551, "grad_norm": 0.2150596947662532, "learning_rate": 8.609722774011027e-05, "loss": 0.5245, "step": 3014 }, { "epoch": 0.8438287153652393, "grad_norm": 0.4205050738415862, "learning_rate": 8.608655221364848e-05, "loss": 0.5225, "step": 3015 }, { "epoch": 0.8441085922194235, "grad_norm": 0.48182958198991427, "learning_rate": 8.607587325244845e-05, "loss": 0.5287, "step": 3016 }, { "epoch": 0.8443884690736077, "grad_norm": 0.2357786403355586, "learning_rate": 8.606519085752661e-05, "loss": 0.5474, "step": 3017 }, { "epoch": 0.8446683459277917, "grad_norm": 0.2325231976984887, "learning_rate": 8.605450502989974e-05, "loss": 0.5393, "step": 3018 }, { "epoch": 0.8449482227819759, "grad_norm": 0.30687287538085123, "learning_rate": 8.604381577058486e-05, "loss": 0.5103, "step": 3019 }, { "epoch": 0.8452280996361601, "grad_norm": 0.28608036056032426, "learning_rate": 8.603312308059944e-05, "loss": 0.544, "step": 3020 }, { "epoch": 0.8455079764903443, "grad_norm": 0.7097432176114093, "learning_rate": 8.602242696096121e-05, "loss": 0.5325, "step": 3021 }, { "epoch": 0.8457878533445284, "grad_norm": 0.3829433514503948, "learning_rate": 8.60117274126882e-05, "loss": 0.5548, "step": 3022 }, { "epoch": 0.8460677301987126, "grad_norm": 0.20884200146313728, "learning_rate": 8.600102443679882e-05, "loss": 0.5269, "step": 3023 }, { "epoch": 0.8463476070528967, "grad_norm": 0.5019778171462909, "learning_rate": 8.599031803431179e-05, "loss": 0.5346, "step": 3024 }, { "epoch": 0.8466274839070809, "grad_norm": 0.21645085459579996, "learning_rate": 8.597960820624615e-05, "loss": 0.5216, "step": 3025 }, { "epoch": 0.846907360761265, "grad_norm": 0.22013350170302393, "learning_rate": 8.596889495362126e-05, "loss": 0.5371, "step": 3026 }, { "epoch": 0.8471872376154492, "grad_norm": 0.23428537071912092, "learning_rate": 8.595817827745681e-05, "loss": 0.5389, "step": 3027 }, { "epoch": 0.8474671144696334, "grad_norm": 0.26038829635626154, "learning_rate": 8.594745817877283e-05, "loss": 0.5339, "step": 3028 }, { "epoch": 0.8477469913238175, "grad_norm": 0.2526970486204993, "learning_rate": 8.593673465858964e-05, "loss": 0.5369, "step": 3029 }, { "epoch": 0.8480268681780017, "grad_norm": 0.22912139308680848, "learning_rate": 8.592600771792796e-05, "loss": 0.5573, "step": 3030 }, { "epoch": 0.8483067450321858, "grad_norm": 0.23083891019087724, "learning_rate": 8.591527735780874e-05, "loss": 0.5307, "step": 3031 }, { "epoch": 0.84858662188637, "grad_norm": 0.2287937314794176, "learning_rate": 8.590454357925333e-05, "loss": 0.5329, "step": 3032 }, { "epoch": 0.8488664987405542, "grad_norm": 0.2311557611551994, "learning_rate": 8.589380638328335e-05, "loss": 0.5061, "step": 3033 }, { "epoch": 0.8491463755947383, "grad_norm": 0.24368922031902182, "learning_rate": 8.58830657709208e-05, "loss": 0.5359, "step": 3034 }, { "epoch": 0.8494262524489224, "grad_norm": 0.2190276350247355, "learning_rate": 8.587232174318795e-05, "loss": 0.5562, "step": 3035 }, { "epoch": 0.8497061293031066, "grad_norm": 0.23802916448732422, "learning_rate": 8.586157430110747e-05, "loss": 0.5116, "step": 3036 }, { "epoch": 0.8499860061572908, "grad_norm": 0.2316176129573859, "learning_rate": 8.585082344570224e-05, "loss": 0.5207, "step": 3037 }, { "epoch": 0.850265883011475, "grad_norm": 0.24220759817972742, "learning_rate": 8.584006917799559e-05, "loss": 0.5282, "step": 3038 }, { "epoch": 0.8505457598656591, "grad_norm": 0.20917558157479316, "learning_rate": 8.582931149901108e-05, "loss": 0.5239, "step": 3039 }, { "epoch": 0.8508256367198432, "grad_norm": 0.21993803371135584, "learning_rate": 8.581855040977265e-05, "loss": 0.5234, "step": 3040 }, { "epoch": 0.8511055135740274, "grad_norm": 0.21482793953146043, "learning_rate": 8.580778591130455e-05, "loss": 0.5371, "step": 3041 }, { "epoch": 0.8513853904282116, "grad_norm": 0.2300206860974878, "learning_rate": 8.579701800463133e-05, "loss": 0.5537, "step": 3042 }, { "epoch": 0.8516652672823958, "grad_norm": 0.23620665970415572, "learning_rate": 8.578624669077792e-05, "loss": 0.5639, "step": 3043 }, { "epoch": 0.85194514413658, "grad_norm": 0.23789290559836274, "learning_rate": 8.577547197076951e-05, "loss": 0.5328, "step": 3044 }, { "epoch": 0.852225020990764, "grad_norm": 0.23353656471451908, "learning_rate": 8.576469384563167e-05, "loss": 0.5403, "step": 3045 }, { "epoch": 0.8525048978449482, "grad_norm": 0.23339557126696017, "learning_rate": 8.575391231639023e-05, "loss": 0.5285, "step": 3046 }, { "epoch": 0.8527847746991324, "grad_norm": 0.2192162077558338, "learning_rate": 8.57431273840714e-05, "loss": 0.5331, "step": 3047 }, { "epoch": 0.8530646515533166, "grad_norm": 0.23200636869438518, "learning_rate": 8.573233904970171e-05, "loss": 0.5543, "step": 3048 }, { "epoch": 0.8533445284075007, "grad_norm": 0.228854877015622, "learning_rate": 8.5721547314308e-05, "loss": 0.5556, "step": 3049 }, { "epoch": 0.8536244052616848, "grad_norm": 0.23432131992320163, "learning_rate": 8.57107521789174e-05, "loss": 0.5397, "step": 3050 }, { "epoch": 0.853904282115869, "grad_norm": 0.21593071073555029, "learning_rate": 8.569995364455743e-05, "loss": 0.5167, "step": 3051 }, { "epoch": 0.8541841589700532, "grad_norm": 0.2334519803327315, "learning_rate": 8.568915171225589e-05, "loss": 0.5336, "step": 3052 }, { "epoch": 0.8544640358242374, "grad_norm": 0.2393187471142604, "learning_rate": 8.56783463830409e-05, "loss": 0.5642, "step": 3053 }, { "epoch": 0.8547439126784215, "grad_norm": 0.23015236105141004, "learning_rate": 8.566753765794096e-05, "loss": 0.5441, "step": 3054 }, { "epoch": 0.8550237895326056, "grad_norm": 0.40683635224973136, "learning_rate": 8.56567255379848e-05, "loss": 0.5403, "step": 3055 }, { "epoch": 0.8553036663867898, "grad_norm": 0.21313781378496474, "learning_rate": 8.564591002420155e-05, "loss": 0.5052, "step": 3056 }, { "epoch": 0.855583543240974, "grad_norm": 0.21644306822669965, "learning_rate": 8.563509111762064e-05, "loss": 0.5316, "step": 3057 }, { "epoch": 0.8558634200951581, "grad_norm": 0.222839599469616, "learning_rate": 8.562426881927181e-05, "loss": 0.5362, "step": 3058 }, { "epoch": 0.8561432969493423, "grad_norm": 0.22677337413030232, "learning_rate": 8.561344313018513e-05, "loss": 0.5377, "step": 3059 }, { "epoch": 0.8564231738035264, "grad_norm": 0.22181193529032409, "learning_rate": 8.560261405139098e-05, "loss": 0.5332, "step": 3060 }, { "epoch": 0.8567030506577106, "grad_norm": 0.21684088220810097, "learning_rate": 8.559178158392012e-05, "loss": 0.5087, "step": 3061 }, { "epoch": 0.8569829275118948, "grad_norm": 0.22488537452868168, "learning_rate": 8.558094572880357e-05, "loss": 0.5456, "step": 3062 }, { "epoch": 0.8572628043660789, "grad_norm": 0.2160019014092832, "learning_rate": 8.557010648707268e-05, "loss": 0.5228, "step": 3063 }, { "epoch": 0.8575426812202631, "grad_norm": 0.21151813117300422, "learning_rate": 8.555926385975914e-05, "loss": 0.5368, "step": 3064 }, { "epoch": 0.8578225580744473, "grad_norm": 0.22124207366336962, "learning_rate": 8.554841784789499e-05, "loss": 0.5284, "step": 3065 }, { "epoch": 0.8581024349286314, "grad_norm": 0.22389008378654957, "learning_rate": 8.553756845251251e-05, "loss": 0.5456, "step": 3066 }, { "epoch": 0.8583823117828155, "grad_norm": 0.22294828670104347, "learning_rate": 8.552671567464436e-05, "loss": 0.5342, "step": 3067 }, { "epoch": 0.8586621886369997, "grad_norm": 0.22699959701765185, "learning_rate": 8.551585951532355e-05, "loss": 0.5535, "step": 3068 }, { "epoch": 0.8589420654911839, "grad_norm": 0.21951338821931762, "learning_rate": 8.550499997558335e-05, "loss": 0.5352, "step": 3069 }, { "epoch": 0.8592219423453681, "grad_norm": 0.2207427299372135, "learning_rate": 8.549413705645737e-05, "loss": 0.5292, "step": 3070 }, { "epoch": 0.8595018191995522, "grad_norm": 0.2158791998334058, "learning_rate": 8.548327075897955e-05, "loss": 0.52, "step": 3071 }, { "epoch": 0.8597816960537363, "grad_norm": 0.22515172406642706, "learning_rate": 8.547240108418417e-05, "loss": 0.5263, "step": 3072 }, { "epoch": 0.8600615729079205, "grad_norm": 0.21368624688447932, "learning_rate": 8.54615280331058e-05, "loss": 0.5593, "step": 3073 }, { "epoch": 0.8603414497621047, "grad_norm": 0.22572367960252235, "learning_rate": 8.545065160677935e-05, "loss": 0.551, "step": 3074 }, { "epoch": 0.8606213266162889, "grad_norm": 0.2228799855804906, "learning_rate": 8.543977180624003e-05, "loss": 0.5275, "step": 3075 }, { "epoch": 0.8609012034704729, "grad_norm": 0.22250102641620206, "learning_rate": 8.54288886325234e-05, "loss": 0.537, "step": 3076 }, { "epoch": 0.8611810803246571, "grad_norm": 0.22202228022932616, "learning_rate": 8.54180020866653e-05, "loss": 0.5346, "step": 3077 }, { "epoch": 0.8614609571788413, "grad_norm": 0.22936964314002728, "learning_rate": 8.540711216970197e-05, "loss": 0.5206, "step": 3078 }, { "epoch": 0.8617408340330255, "grad_norm": 0.3770062819332392, "learning_rate": 8.539621888266985e-05, "loss": 0.5517, "step": 3079 }, { "epoch": 0.8620207108872097, "grad_norm": 0.2228015196743566, "learning_rate": 8.538532222660583e-05, "loss": 0.522, "step": 3080 }, { "epoch": 0.8623005877413937, "grad_norm": 0.2228220545406724, "learning_rate": 8.537442220254703e-05, "loss": 0.5256, "step": 3081 }, { "epoch": 0.8625804645955779, "grad_norm": 0.23350733934227055, "learning_rate": 8.53635188115309e-05, "loss": 0.5485, "step": 3082 }, { "epoch": 0.8628603414497621, "grad_norm": 0.21846066077770462, "learning_rate": 8.53526120545953e-05, "loss": 0.5276, "step": 3083 }, { "epoch": 0.8631402183039463, "grad_norm": 0.2335962307244182, "learning_rate": 8.534170193277827e-05, "loss": 0.5548, "step": 3084 }, { "epoch": 0.8634200951581305, "grad_norm": 0.2201364789143078, "learning_rate": 8.533078844711826e-05, "loss": 0.5426, "step": 3085 }, { "epoch": 0.8636999720123146, "grad_norm": 0.23065771245439703, "learning_rate": 8.531987159865405e-05, "loss": 0.5716, "step": 3086 }, { "epoch": 0.8639798488664987, "grad_norm": 0.23289686034673565, "learning_rate": 8.530895138842467e-05, "loss": 0.5299, "step": 3087 }, { "epoch": 0.8642597257206829, "grad_norm": 0.21787168916983998, "learning_rate": 8.529802781746956e-05, "loss": 0.5342, "step": 3088 }, { "epoch": 0.8645396025748671, "grad_norm": 0.22481660268884418, "learning_rate": 8.528710088682839e-05, "loss": 0.5174, "step": 3089 }, { "epoch": 0.8648194794290512, "grad_norm": 0.22367814560169486, "learning_rate": 8.52761705975412e-05, "loss": 0.5474, "step": 3090 }, { "epoch": 0.8650993562832354, "grad_norm": 0.2179802466288477, "learning_rate": 8.526523695064836e-05, "loss": 0.4985, "step": 3091 }, { "epoch": 0.8653792331374195, "grad_norm": 0.22584878578261564, "learning_rate": 8.525429994719052e-05, "loss": 0.5362, "step": 3092 }, { "epoch": 0.8656591099916037, "grad_norm": 0.21681602528527902, "learning_rate": 8.524335958820868e-05, "loss": 0.5399, "step": 3093 }, { "epoch": 0.8659389868457879, "grad_norm": 0.21004233979125894, "learning_rate": 8.523241587474416e-05, "loss": 0.5279, "step": 3094 }, { "epoch": 0.866218863699972, "grad_norm": 0.22216773769256115, "learning_rate": 8.522146880783855e-05, "loss": 0.5254, "step": 3095 }, { "epoch": 0.8664987405541562, "grad_norm": 0.22621255953137892, "learning_rate": 8.521051838853385e-05, "loss": 0.5328, "step": 3096 }, { "epoch": 0.8667786174083403, "grad_norm": 0.213235618976495, "learning_rate": 8.519956461787228e-05, "loss": 0.5247, "step": 3097 }, { "epoch": 0.8670584942625245, "grad_norm": 0.21174058618586977, "learning_rate": 8.518860749689648e-05, "loss": 0.5372, "step": 3098 }, { "epoch": 0.8673383711167086, "grad_norm": 0.2329901448660573, "learning_rate": 8.51776470266493e-05, "loss": 0.507, "step": 3099 }, { "epoch": 0.8676182479708928, "grad_norm": 0.2345445756356732, "learning_rate": 8.516668320817399e-05, "loss": 0.5254, "step": 3100 }, { "epoch": 0.867898124825077, "grad_norm": 0.22585171397064097, "learning_rate": 8.51557160425141e-05, "loss": 0.5053, "step": 3101 }, { "epoch": 0.8681780016792612, "grad_norm": 0.22080060758473458, "learning_rate": 8.514474553071345e-05, "loss": 0.5214, "step": 3102 }, { "epoch": 0.8684578785334452, "grad_norm": 0.2248032890887987, "learning_rate": 8.513377167381626e-05, "loss": 0.5314, "step": 3103 }, { "epoch": 0.8687377553876294, "grad_norm": 0.2192324263951842, "learning_rate": 8.512279447286703e-05, "loss": 0.5145, "step": 3104 }, { "epoch": 0.8690176322418136, "grad_norm": 0.2257624455841242, "learning_rate": 8.511181392891055e-05, "loss": 0.5195, "step": 3105 }, { "epoch": 0.8692975090959978, "grad_norm": 0.2328728079571696, "learning_rate": 8.510083004299199e-05, "loss": 0.5448, "step": 3106 }, { "epoch": 0.869577385950182, "grad_norm": 0.2203283253375442, "learning_rate": 8.508984281615675e-05, "loss": 0.5536, "step": 3107 }, { "epoch": 0.869857262804366, "grad_norm": 0.22063659465343083, "learning_rate": 8.507885224945065e-05, "loss": 0.5381, "step": 3108 }, { "epoch": 0.8701371396585502, "grad_norm": 0.22939783313111242, "learning_rate": 8.506785834391975e-05, "loss": 0.5565, "step": 3109 }, { "epoch": 0.8704170165127344, "grad_norm": 0.2294001337165578, "learning_rate": 8.505686110061046e-05, "loss": 0.5214, "step": 3110 }, { "epoch": 0.8706968933669186, "grad_norm": 0.21851962678707973, "learning_rate": 8.504586052056951e-05, "loss": 0.5474, "step": 3111 }, { "epoch": 0.8709767702211028, "grad_norm": 0.21350852258826278, "learning_rate": 8.503485660484396e-05, "loss": 0.5049, "step": 3112 }, { "epoch": 0.8712566470752868, "grad_norm": 0.22786109805847043, "learning_rate": 8.502384935448112e-05, "loss": 0.5214, "step": 3113 }, { "epoch": 0.871536523929471, "grad_norm": 0.2236176254734842, "learning_rate": 8.50128387705287e-05, "loss": 0.5342, "step": 3114 }, { "epoch": 0.8718164007836552, "grad_norm": 0.21937941632307223, "learning_rate": 8.500182485403471e-05, "loss": 0.5551, "step": 3115 }, { "epoch": 0.8720962776378394, "grad_norm": 0.22777612967246738, "learning_rate": 8.499080760604742e-05, "loss": 0.5145, "step": 3116 }, { "epoch": 0.8723761544920235, "grad_norm": 0.20649570660870228, "learning_rate": 8.497978702761547e-05, "loss": 0.526, "step": 3117 }, { "epoch": 0.8726560313462076, "grad_norm": 0.21192410040800452, "learning_rate": 8.496876311978784e-05, "loss": 0.5276, "step": 3118 }, { "epoch": 0.8729359082003918, "grad_norm": 0.24903919474732164, "learning_rate": 8.495773588361374e-05, "loss": 0.5246, "step": 3119 }, { "epoch": 0.873215785054576, "grad_norm": 0.22174705900135905, "learning_rate": 8.494670532014277e-05, "loss": 0.5367, "step": 3120 }, { "epoch": 0.8734956619087602, "grad_norm": 0.22209745002607398, "learning_rate": 8.493567143042485e-05, "loss": 0.5531, "step": 3121 }, { "epoch": 0.8737755387629443, "grad_norm": 0.21573048363141414, "learning_rate": 8.492463421551016e-05, "loss": 0.5532, "step": 3122 }, { "epoch": 0.8740554156171285, "grad_norm": 0.2186833501586502, "learning_rate": 8.491359367644922e-05, "loss": 0.5235, "step": 3123 }, { "epoch": 0.8743352924713126, "grad_norm": 0.2233053486143271, "learning_rate": 8.490254981429291e-05, "loss": 0.5531, "step": 3124 }, { "epoch": 0.8746151693254968, "grad_norm": 0.21989657534322912, "learning_rate": 8.489150263009236e-05, "loss": 0.5343, "step": 3125 }, { "epoch": 0.874895046179681, "grad_norm": 0.22215578956680457, "learning_rate": 8.488045212489906e-05, "loss": 0.5026, "step": 3126 }, { "epoch": 0.8751749230338651, "grad_norm": 0.2144000586652832, "learning_rate": 8.486939829976481e-05, "loss": 0.5499, "step": 3127 }, { "epoch": 0.8754547998880493, "grad_norm": 0.21964329639737248, "learning_rate": 8.485834115574171e-05, "loss": 0.5309, "step": 3128 }, { "epoch": 0.8757346767422334, "grad_norm": 0.2250524115638172, "learning_rate": 8.48472806938822e-05, "loss": 0.5273, "step": 3129 }, { "epoch": 0.8760145535964176, "grad_norm": 0.21934176725743307, "learning_rate": 8.4836216915239e-05, "loss": 0.5471, "step": 3130 }, { "epoch": 0.8762944304506017, "grad_norm": 0.21883613562376256, "learning_rate": 8.482514982086517e-05, "loss": 0.5424, "step": 3131 }, { "epoch": 0.8765743073047859, "grad_norm": 0.21740762627582735, "learning_rate": 8.48140794118141e-05, "loss": 0.5239, "step": 3132 }, { "epoch": 0.8768541841589701, "grad_norm": 0.2181406659999808, "learning_rate": 8.480300568913945e-05, "loss": 0.5207, "step": 3133 }, { "epoch": 0.8771340610131542, "grad_norm": 0.22269041831994688, "learning_rate": 8.479192865389525e-05, "loss": 0.5611, "step": 3134 }, { "epoch": 0.8774139378673383, "grad_norm": 0.22622911694068062, "learning_rate": 8.47808483071358e-05, "loss": 0.5355, "step": 3135 }, { "epoch": 0.8776938147215225, "grad_norm": 0.22244603082827713, "learning_rate": 8.476976464991573e-05, "loss": 0.5024, "step": 3136 }, { "epoch": 0.8779736915757067, "grad_norm": 0.2321149946809836, "learning_rate": 8.475867768329002e-05, "loss": 0.5107, "step": 3137 }, { "epoch": 0.8782535684298909, "grad_norm": 0.24475291898145704, "learning_rate": 8.47475874083139e-05, "loss": 0.533, "step": 3138 }, { "epoch": 0.878533445284075, "grad_norm": 0.22701519899541858, "learning_rate": 8.473649382604296e-05, "loss": 0.5515, "step": 3139 }, { "epoch": 0.8788133221382591, "grad_norm": 0.2187927170120886, "learning_rate": 8.472539693753311e-05, "loss": 0.5406, "step": 3140 }, { "epoch": 0.8790931989924433, "grad_norm": 0.217235931346614, "learning_rate": 8.471429674384052e-05, "loss": 0.5321, "step": 3141 }, { "epoch": 0.8793730758466275, "grad_norm": 0.21597888070041155, "learning_rate": 8.470319324602176e-05, "loss": 0.5554, "step": 3142 }, { "epoch": 0.8796529527008117, "grad_norm": 0.21855940183015699, "learning_rate": 8.469208644513363e-05, "loss": 0.533, "step": 3143 }, { "epoch": 0.8799328295549959, "grad_norm": 0.22623110729627563, "learning_rate": 8.46809763422333e-05, "loss": 0.5367, "step": 3144 }, { "epoch": 0.8802127064091799, "grad_norm": 0.22743398846355872, "learning_rate": 8.466986293837822e-05, "loss": 0.5538, "step": 3145 }, { "epoch": 0.8804925832633641, "grad_norm": 0.21509057495178288, "learning_rate": 8.465874623462619e-05, "loss": 0.5249, "step": 3146 }, { "epoch": 0.8807724601175483, "grad_norm": 0.21457145431495728, "learning_rate": 8.46476262320353e-05, "loss": 0.5452, "step": 3147 }, { "epoch": 0.8810523369717325, "grad_norm": 0.21222960094378254, "learning_rate": 8.463650293166393e-05, "loss": 0.5192, "step": 3148 }, { "epoch": 0.8813322138259166, "grad_norm": 0.20859749016140702, "learning_rate": 8.462537633457084e-05, "loss": 0.5341, "step": 3149 }, { "epoch": 0.8816120906801007, "grad_norm": 0.21203505951889612, "learning_rate": 8.461424644181503e-05, "loss": 0.5472, "step": 3150 }, { "epoch": 0.8818919675342849, "grad_norm": 0.22782042811565067, "learning_rate": 8.460311325445589e-05, "loss": 0.533, "step": 3151 }, { "epoch": 0.8821718443884691, "grad_norm": 0.21469946099009463, "learning_rate": 8.459197677355306e-05, "loss": 0.5261, "step": 3152 }, { "epoch": 0.8824517212426533, "grad_norm": 0.20998221489633284, "learning_rate": 8.458083700016653e-05, "loss": 0.4949, "step": 3153 }, { "epoch": 0.8827315980968374, "grad_norm": 0.22004558444916747, "learning_rate": 8.456969393535655e-05, "loss": 0.5338, "step": 3154 }, { "epoch": 0.8830114749510215, "grad_norm": 0.22344979424863592, "learning_rate": 8.455854758018376e-05, "loss": 0.555, "step": 3155 }, { "epoch": 0.8832913518052057, "grad_norm": 0.23319072215414186, "learning_rate": 8.454739793570909e-05, "loss": 0.5402, "step": 3156 }, { "epoch": 0.8835712286593899, "grad_norm": 0.21611772173232785, "learning_rate": 8.453624500299373e-05, "loss": 0.5281, "step": 3157 }, { "epoch": 0.883851105513574, "grad_norm": 0.2432682966502491, "learning_rate": 8.452508878309923e-05, "loss": 0.5538, "step": 3158 }, { "epoch": 0.8841309823677582, "grad_norm": 0.21309772960948314, "learning_rate": 8.451392927708747e-05, "loss": 0.5405, "step": 3159 }, { "epoch": 0.8844108592219423, "grad_norm": 0.2324534497746261, "learning_rate": 8.450276648602061e-05, "loss": 0.5485, "step": 3160 }, { "epoch": 0.8846907360761265, "grad_norm": 0.22615165140933946, "learning_rate": 8.44916004109611e-05, "loss": 0.542, "step": 3161 }, { "epoch": 0.8849706129303107, "grad_norm": 0.22573627988782002, "learning_rate": 8.448043105297178e-05, "loss": 0.5553, "step": 3162 }, { "epoch": 0.8852504897844948, "grad_norm": 0.21403239299002255, "learning_rate": 8.446925841311572e-05, "loss": 0.5219, "step": 3163 }, { "epoch": 0.885530366638679, "grad_norm": 0.22349300654120277, "learning_rate": 8.445808249245634e-05, "loss": 0.5244, "step": 3164 }, { "epoch": 0.8858102434928632, "grad_norm": 0.220638465210544, "learning_rate": 8.444690329205742e-05, "loss": 0.5189, "step": 3165 }, { "epoch": 0.8860901203470473, "grad_norm": 0.2092624701038376, "learning_rate": 8.443572081298294e-05, "loss": 0.5236, "step": 3166 }, { "epoch": 0.8863699972012314, "grad_norm": 0.21465614741067168, "learning_rate": 8.442453505629726e-05, "loss": 0.5191, "step": 3167 }, { "epoch": 0.8866498740554156, "grad_norm": 0.21519642727409988, "learning_rate": 8.441334602306509e-05, "loss": 0.5123, "step": 3168 }, { "epoch": 0.8869297509095998, "grad_norm": 0.21478883558439016, "learning_rate": 8.440215371435137e-05, "loss": 0.5275, "step": 3169 }, { "epoch": 0.887209627763784, "grad_norm": 0.21644613627124493, "learning_rate": 8.439095813122143e-05, "loss": 0.5313, "step": 3170 }, { "epoch": 0.887489504617968, "grad_norm": 0.2291596989569196, "learning_rate": 8.43797592747408e-05, "loss": 0.564, "step": 3171 }, { "epoch": 0.8877693814721522, "grad_norm": 0.21334073065507236, "learning_rate": 8.436855714597546e-05, "loss": 0.534, "step": 3172 }, { "epoch": 0.8880492583263364, "grad_norm": 0.22089687449675843, "learning_rate": 8.435735174599165e-05, "loss": 0.5205, "step": 3173 }, { "epoch": 0.8883291351805206, "grad_norm": 0.23584516821934443, "learning_rate": 8.434614307585582e-05, "loss": 0.5656, "step": 3174 }, { "epoch": 0.8886090120347048, "grad_norm": 0.226205757196865, "learning_rate": 8.433493113663489e-05, "loss": 0.5481, "step": 3175 }, { "epoch": 0.8888888888888888, "grad_norm": 0.21360041855627168, "learning_rate": 8.432371592939599e-05, "loss": 0.5363, "step": 3176 }, { "epoch": 0.889168765743073, "grad_norm": 0.21904486669173082, "learning_rate": 8.43124974552066e-05, "loss": 0.5203, "step": 3177 }, { "epoch": 0.8894486425972572, "grad_norm": 0.22179102451077717, "learning_rate": 8.43012757151345e-05, "loss": 0.5451, "step": 3178 }, { "epoch": 0.8897285194514414, "grad_norm": 0.22856237475782754, "learning_rate": 8.429005071024778e-05, "loss": 0.5414, "step": 3179 }, { "epoch": 0.8900083963056256, "grad_norm": 0.2158331073274858, "learning_rate": 8.427882244161482e-05, "loss": 0.5316, "step": 3180 }, { "epoch": 0.8902882731598096, "grad_norm": 0.21357794510832812, "learning_rate": 8.426759091030437e-05, "loss": 0.5152, "step": 3181 }, { "epoch": 0.8905681500139938, "grad_norm": 0.22180266892326359, "learning_rate": 8.425635611738543e-05, "loss": 0.5469, "step": 3182 }, { "epoch": 0.890848026868178, "grad_norm": 0.2087533268175315, "learning_rate": 8.424511806392736e-05, "loss": 0.5245, "step": 3183 }, { "epoch": 0.8911279037223622, "grad_norm": 0.2199988164234642, "learning_rate": 8.423387675099977e-05, "loss": 0.5301, "step": 3184 }, { "epoch": 0.8914077805765463, "grad_norm": 0.21437648175825566, "learning_rate": 8.422263217967263e-05, "loss": 0.5351, "step": 3185 }, { "epoch": 0.8916876574307305, "grad_norm": 0.2198696949107264, "learning_rate": 8.421138435101618e-05, "loss": 0.5118, "step": 3186 }, { "epoch": 0.8919675342849146, "grad_norm": 0.22454727769491456, "learning_rate": 8.420013326610105e-05, "loss": 0.5471, "step": 3187 }, { "epoch": 0.8922474111390988, "grad_norm": 0.21990546237502193, "learning_rate": 8.418887892599809e-05, "loss": 0.5488, "step": 3188 }, { "epoch": 0.892527287993283, "grad_norm": 0.21714014362804088, "learning_rate": 8.417762133177848e-05, "loss": 0.5081, "step": 3189 }, { "epoch": 0.8928071648474671, "grad_norm": 0.2231419036615365, "learning_rate": 8.416636048451376e-05, "loss": 0.5307, "step": 3190 }, { "epoch": 0.8930870417016513, "grad_norm": 0.22930027763459426, "learning_rate": 8.415509638527572e-05, "loss": 0.5383, "step": 3191 }, { "epoch": 0.8933669185558354, "grad_norm": 0.2246271030278197, "learning_rate": 8.414382903513649e-05, "loss": 0.5494, "step": 3192 }, { "epoch": 0.8936467954100196, "grad_norm": 0.22405343698134375, "learning_rate": 8.413255843516851e-05, "loss": 0.5178, "step": 3193 }, { "epoch": 0.8939266722642037, "grad_norm": 0.24221654128655928, "learning_rate": 8.412128458644449e-05, "loss": 0.5563, "step": 3194 }, { "epoch": 0.8942065491183879, "grad_norm": 0.21815434040286807, "learning_rate": 8.411000749003753e-05, "loss": 0.5336, "step": 3195 }, { "epoch": 0.8944864259725721, "grad_norm": 0.22157664448096417, "learning_rate": 8.409872714702093e-05, "loss": 0.5423, "step": 3196 }, { "epoch": 0.8947663028267562, "grad_norm": 0.2105901061172276, "learning_rate": 8.408744355846842e-05, "loss": 0.5443, "step": 3197 }, { "epoch": 0.8950461796809404, "grad_norm": 0.244545495591243, "learning_rate": 8.407615672545396e-05, "loss": 0.5522, "step": 3198 }, { "epoch": 0.8953260565351245, "grad_norm": 0.2081948552672021, "learning_rate": 8.406486664905183e-05, "loss": 0.5295, "step": 3199 }, { "epoch": 0.8956059333893087, "grad_norm": 0.22325001321795235, "learning_rate": 8.405357333033659e-05, "loss": 0.5158, "step": 3200 }, { "epoch": 0.8958858102434929, "grad_norm": 0.2158764769238209, "learning_rate": 8.404227677038322e-05, "loss": 0.5282, "step": 3201 }, { "epoch": 0.8961656870976771, "grad_norm": 0.22807617422393783, "learning_rate": 8.403097697026687e-05, "loss": 0.551, "step": 3202 }, { "epoch": 0.8964455639518611, "grad_norm": 0.20899930240384806, "learning_rate": 8.401967393106309e-05, "loss": 0.5334, "step": 3203 }, { "epoch": 0.8967254408060453, "grad_norm": 0.2182254048702635, "learning_rate": 8.40083676538477e-05, "loss": 0.5129, "step": 3204 }, { "epoch": 0.8970053176602295, "grad_norm": 0.2195985871550385, "learning_rate": 8.399705813969685e-05, "loss": 0.534, "step": 3205 }, { "epoch": 0.8972851945144137, "grad_norm": 0.21838456840893322, "learning_rate": 8.398574538968697e-05, "loss": 0.5077, "step": 3206 }, { "epoch": 0.8975650713685979, "grad_norm": 0.20531380979348746, "learning_rate": 8.397442940489481e-05, "loss": 0.5403, "step": 3207 }, { "epoch": 0.8978449482227819, "grad_norm": 0.2249946706098934, "learning_rate": 8.396311018639746e-05, "loss": 0.5483, "step": 3208 }, { "epoch": 0.8981248250769661, "grad_norm": 0.21855501304245706, "learning_rate": 8.395178773527229e-05, "loss": 0.5377, "step": 3209 }, { "epoch": 0.8984047019311503, "grad_norm": 0.20427182425970294, "learning_rate": 8.394046205259693e-05, "loss": 0.5149, "step": 3210 }, { "epoch": 0.8986845787853345, "grad_norm": 0.2193552303123338, "learning_rate": 8.39291331394494e-05, "loss": 0.517, "step": 3211 }, { "epoch": 0.8989644556395187, "grad_norm": 0.21587873171075797, "learning_rate": 8.391780099690799e-05, "loss": 0.4998, "step": 3212 }, { "epoch": 0.8992443324937027, "grad_norm": 0.21433001058656947, "learning_rate": 8.390646562605129e-05, "loss": 0.5226, "step": 3213 }, { "epoch": 0.8995242093478869, "grad_norm": 0.22970641247922163, "learning_rate": 8.389512702795823e-05, "loss": 0.5353, "step": 3214 }, { "epoch": 0.8998040862020711, "grad_norm": 0.2182246871696593, "learning_rate": 8.388378520370799e-05, "loss": 0.5576, "step": 3215 }, { "epoch": 0.9000839630562553, "grad_norm": 0.2228115798851557, "learning_rate": 8.387244015438012e-05, "loss": 0.5402, "step": 3216 }, { "epoch": 0.9003638399104394, "grad_norm": 0.21320783404344404, "learning_rate": 8.386109188105442e-05, "loss": 0.5249, "step": 3217 }, { "epoch": 0.9006437167646235, "grad_norm": 0.23125313491309887, "learning_rate": 8.384974038481105e-05, "loss": 0.5465, "step": 3218 }, { "epoch": 0.9009235936188077, "grad_norm": 0.2159942191919595, "learning_rate": 8.383838566673046e-05, "loss": 0.5355, "step": 3219 }, { "epoch": 0.9012034704729919, "grad_norm": 0.2151240015535236, "learning_rate": 8.382702772789337e-05, "loss": 0.5443, "step": 3220 }, { "epoch": 0.901483347327176, "grad_norm": 0.20777649848856372, "learning_rate": 8.381566656938085e-05, "loss": 0.5121, "step": 3221 }, { "epoch": 0.9017632241813602, "grad_norm": 0.21754586222299052, "learning_rate": 8.380430219227427e-05, "loss": 0.5195, "step": 3222 }, { "epoch": 0.9020431010355444, "grad_norm": 0.21248672937134955, "learning_rate": 8.379293459765526e-05, "loss": 0.5351, "step": 3223 }, { "epoch": 0.9023229778897285, "grad_norm": 0.2206266391186216, "learning_rate": 8.378156378660584e-05, "loss": 0.5223, "step": 3224 }, { "epoch": 0.9026028547439127, "grad_norm": 0.21535885831813834, "learning_rate": 8.377018976020824e-05, "loss": 0.5227, "step": 3225 }, { "epoch": 0.9028827315980968, "grad_norm": 0.22093169414907043, "learning_rate": 8.375881251954512e-05, "loss": 0.5511, "step": 3226 }, { "epoch": 0.903162608452281, "grad_norm": 0.21838882264546794, "learning_rate": 8.374743206569931e-05, "loss": 0.5245, "step": 3227 }, { "epoch": 0.9034424853064652, "grad_norm": 0.23592055142362406, "learning_rate": 8.373604839975403e-05, "loss": 0.5511, "step": 3228 }, { "epoch": 0.9037223621606493, "grad_norm": 0.2188882784893106, "learning_rate": 8.372466152279278e-05, "loss": 0.5267, "step": 3229 }, { "epoch": 0.9040022390148335, "grad_norm": 0.22086254230761396, "learning_rate": 8.371327143589935e-05, "loss": 0.5235, "step": 3230 }, { "epoch": 0.9042821158690176, "grad_norm": 0.21454242664136042, "learning_rate": 8.370187814015788e-05, "loss": 0.538, "step": 3231 }, { "epoch": 0.9045619927232018, "grad_norm": 0.23239524589251495, "learning_rate": 8.369048163665281e-05, "loss": 0.5507, "step": 3232 }, { "epoch": 0.904841869577386, "grad_norm": 0.22496197247981775, "learning_rate": 8.367908192646882e-05, "loss": 0.5093, "step": 3233 }, { "epoch": 0.9051217464315701, "grad_norm": 0.2266084138636589, "learning_rate": 8.366767901069097e-05, "loss": 0.5493, "step": 3234 }, { "epoch": 0.9054016232857542, "grad_norm": 0.223294142829303, "learning_rate": 8.365627289040457e-05, "loss": 0.5297, "step": 3235 }, { "epoch": 0.9056815001399384, "grad_norm": 0.2463666179530782, "learning_rate": 8.36448635666953e-05, "loss": 0.5465, "step": 3236 }, { "epoch": 0.9059613769941226, "grad_norm": 0.21553561426283782, "learning_rate": 8.363345104064907e-05, "loss": 0.505, "step": 3237 }, { "epoch": 0.9062412538483068, "grad_norm": 0.22666488857798106, "learning_rate": 8.362203531335216e-05, "loss": 0.5507, "step": 3238 }, { "epoch": 0.9065211307024909, "grad_norm": 0.21703062964799782, "learning_rate": 8.361061638589109e-05, "loss": 0.5511, "step": 3239 }, { "epoch": 0.906801007556675, "grad_norm": 0.21897725160879003, "learning_rate": 8.359919425935275e-05, "loss": 0.5498, "step": 3240 }, { "epoch": 0.9070808844108592, "grad_norm": 0.22635000840894065, "learning_rate": 8.35877689348243e-05, "loss": 0.5341, "step": 3241 }, { "epoch": 0.9073607612650434, "grad_norm": 0.21723297443205186, "learning_rate": 8.35763404133932e-05, "loss": 0.5077, "step": 3242 }, { "epoch": 0.9076406381192276, "grad_norm": 0.22043675691302084, "learning_rate": 8.356490869614722e-05, "loss": 0.5272, "step": 3243 }, { "epoch": 0.9079205149734118, "grad_norm": 0.21856138350677917, "learning_rate": 8.355347378417444e-05, "loss": 0.523, "step": 3244 }, { "epoch": 0.9082003918275958, "grad_norm": 0.21218178468442178, "learning_rate": 8.354203567856325e-05, "loss": 0.5375, "step": 3245 }, { "epoch": 0.90848026868178, "grad_norm": 0.2034089891436105, "learning_rate": 8.353059438040234e-05, "loss": 0.5424, "step": 3246 }, { "epoch": 0.9087601455359642, "grad_norm": 0.21769263669876449, "learning_rate": 8.351914989078067e-05, "loss": 0.5218, "step": 3247 }, { "epoch": 0.9090400223901484, "grad_norm": 0.22274418924076697, "learning_rate": 8.350770221078756e-05, "loss": 0.5144, "step": 3248 }, { "epoch": 0.9093198992443325, "grad_norm": 0.22355166839093885, "learning_rate": 8.34962513415126e-05, "loss": 0.53, "step": 3249 }, { "epoch": 0.9095997760985166, "grad_norm": 0.2112130469842736, "learning_rate": 8.348479728404568e-05, "loss": 0.5327, "step": 3250 }, { "epoch": 0.9098796529527008, "grad_norm": 0.2184856519406378, "learning_rate": 8.347334003947702e-05, "loss": 0.5393, "step": 3251 }, { "epoch": 0.910159529806885, "grad_norm": 0.23537860474721548, "learning_rate": 8.346187960889713e-05, "loss": 0.5296, "step": 3252 }, { "epoch": 0.9104394066610692, "grad_norm": 0.2251462655166089, "learning_rate": 8.345041599339679e-05, "loss": 0.5313, "step": 3253 }, { "epoch": 0.9107192835152533, "grad_norm": 0.21360222223163164, "learning_rate": 8.343894919406715e-05, "loss": 0.5324, "step": 3254 }, { "epoch": 0.9109991603694374, "grad_norm": 0.21448874261285444, "learning_rate": 8.34274792119996e-05, "loss": 0.5341, "step": 3255 }, { "epoch": 0.9112790372236216, "grad_norm": 0.22939842431716173, "learning_rate": 8.341600604828587e-05, "loss": 0.5194, "step": 3256 }, { "epoch": 0.9115589140778058, "grad_norm": 0.22299870746661596, "learning_rate": 8.340452970401797e-05, "loss": 0.525, "step": 3257 }, { "epoch": 0.9118387909319899, "grad_norm": 0.2153913886879192, "learning_rate": 8.339305018028825e-05, "loss": 0.5038, "step": 3258 }, { "epoch": 0.9121186677861741, "grad_norm": 0.22649404750533841, "learning_rate": 8.338156747818932e-05, "loss": 0.5396, "step": 3259 }, { "epoch": 0.9123985446403582, "grad_norm": 0.22601307943292215, "learning_rate": 8.33700815988141e-05, "loss": 0.55, "step": 3260 }, { "epoch": 0.9126784214945424, "grad_norm": 0.2249755063093647, "learning_rate": 8.335859254325586e-05, "loss": 0.5343, "step": 3261 }, { "epoch": 0.9129582983487265, "grad_norm": 0.2187030142320576, "learning_rate": 8.334710031260809e-05, "loss": 0.5097, "step": 3262 }, { "epoch": 0.9132381752029107, "grad_norm": 0.21936405476969345, "learning_rate": 8.333560490796466e-05, "loss": 0.5478, "step": 3263 }, { "epoch": 0.9135180520570949, "grad_norm": 0.2296443547918883, "learning_rate": 8.332410633041969e-05, "loss": 0.5337, "step": 3264 }, { "epoch": 0.9137979289112791, "grad_norm": 0.2235279234121592, "learning_rate": 8.331260458106763e-05, "loss": 0.5303, "step": 3265 }, { "epoch": 0.9140778057654632, "grad_norm": 0.20620001354942905, "learning_rate": 8.330109966100321e-05, "loss": 0.5382, "step": 3266 }, { "epoch": 0.9143576826196473, "grad_norm": 0.24224294750708605, "learning_rate": 8.328959157132149e-05, "loss": 0.5307, "step": 3267 }, { "epoch": 0.9146375594738315, "grad_norm": 0.22520339984118107, "learning_rate": 8.327808031311781e-05, "loss": 0.5374, "step": 3268 }, { "epoch": 0.9149174363280157, "grad_norm": 0.21908728497768143, "learning_rate": 8.326656588748783e-05, "loss": 0.511, "step": 3269 }, { "epoch": 0.9151973131821999, "grad_norm": 0.22056425570357524, "learning_rate": 8.325504829552748e-05, "loss": 0.538, "step": 3270 }, { "epoch": 0.915477190036384, "grad_norm": 0.2169779119488914, "learning_rate": 8.3243527538333e-05, "loss": 0.5476, "step": 3271 }, { "epoch": 0.9157570668905681, "grad_norm": 0.22693785290832416, "learning_rate": 8.323200361700099e-05, "loss": 0.5532, "step": 3272 }, { "epoch": 0.9160369437447523, "grad_norm": 0.21859137257035782, "learning_rate": 8.322047653262828e-05, "loss": 0.5396, "step": 3273 }, { "epoch": 0.9163168205989365, "grad_norm": 0.22368091938009044, "learning_rate": 8.3208946286312e-05, "loss": 0.5238, "step": 3274 }, { "epoch": 0.9165966974531207, "grad_norm": 0.2234209802865883, "learning_rate": 8.319741287914963e-05, "loss": 0.5248, "step": 3275 }, { "epoch": 0.9168765743073047, "grad_norm": 0.22535030226742628, "learning_rate": 8.318587631223893e-05, "loss": 0.5239, "step": 3276 }, { "epoch": 0.9171564511614889, "grad_norm": 0.21695281932030427, "learning_rate": 8.317433658667792e-05, "loss": 0.5468, "step": 3277 }, { "epoch": 0.9174363280156731, "grad_norm": 0.22308396551305662, "learning_rate": 8.316279370356502e-05, "loss": 0.5379, "step": 3278 }, { "epoch": 0.9177162048698573, "grad_norm": 0.20666742442512492, "learning_rate": 8.315124766399884e-05, "loss": 0.5089, "step": 3279 }, { "epoch": 0.9179960817240415, "grad_norm": 0.21970520284983006, "learning_rate": 8.313969846907834e-05, "loss": 0.5111, "step": 3280 }, { "epoch": 0.9182759585782255, "grad_norm": 0.21344841959442235, "learning_rate": 8.31281461199028e-05, "loss": 0.4997, "step": 3281 }, { "epoch": 0.9185558354324097, "grad_norm": 0.21641038559178405, "learning_rate": 8.311659061757177e-05, "loss": 0.5144, "step": 3282 }, { "epoch": 0.9188357122865939, "grad_norm": 0.22139600923036631, "learning_rate": 8.310503196318511e-05, "loss": 0.533, "step": 3283 }, { "epoch": 0.9191155891407781, "grad_norm": 0.2184096652559976, "learning_rate": 8.309347015784298e-05, "loss": 0.4991, "step": 3284 }, { "epoch": 0.9193954659949622, "grad_norm": 0.22664252510152488, "learning_rate": 8.308190520264583e-05, "loss": 0.5185, "step": 3285 }, { "epoch": 0.9196753428491464, "grad_norm": 0.23589148483698946, "learning_rate": 8.307033709869443e-05, "loss": 0.5313, "step": 3286 }, { "epoch": 0.9199552197033305, "grad_norm": 0.23207782520666115, "learning_rate": 8.305876584708982e-05, "loss": 0.5357, "step": 3287 }, { "epoch": 0.9202350965575147, "grad_norm": 0.23192084926906445, "learning_rate": 8.30471914489334e-05, "loss": 0.5614, "step": 3288 }, { "epoch": 0.9205149734116989, "grad_norm": 0.21986567938578405, "learning_rate": 8.30356139053268e-05, "loss": 0.532, "step": 3289 }, { "epoch": 0.920794850265883, "grad_norm": 0.213230683261702, "learning_rate": 8.302403321737195e-05, "loss": 0.526, "step": 3290 }, { "epoch": 0.9210747271200672, "grad_norm": 0.22119328715545494, "learning_rate": 8.301244938617116e-05, "loss": 0.537, "step": 3291 }, { "epoch": 0.9213546039742513, "grad_norm": 0.21757587067757342, "learning_rate": 8.300086241282694e-05, "loss": 0.5421, "step": 3292 }, { "epoch": 0.9216344808284355, "grad_norm": 0.2146811201840199, "learning_rate": 8.298927229844218e-05, "loss": 0.5029, "step": 3293 }, { "epoch": 0.9219143576826196, "grad_norm": 0.22371432424042492, "learning_rate": 8.297767904412002e-05, "loss": 0.5317, "step": 3294 }, { "epoch": 0.9221942345368038, "grad_norm": 0.20763289845884048, "learning_rate": 8.296608265096388e-05, "loss": 0.5373, "step": 3295 }, { "epoch": 0.922474111390988, "grad_norm": 0.21430336874827588, "learning_rate": 8.295448312007756e-05, "loss": 0.5215, "step": 3296 }, { "epoch": 0.9227539882451721, "grad_norm": 0.21709980732777526, "learning_rate": 8.294288045256511e-05, "loss": 0.5519, "step": 3297 }, { "epoch": 0.9230338650993563, "grad_norm": 0.2103398948722985, "learning_rate": 8.293127464953083e-05, "loss": 0.5196, "step": 3298 }, { "epoch": 0.9233137419535404, "grad_norm": 0.2183885312635107, "learning_rate": 8.291966571207943e-05, "loss": 0.5359, "step": 3299 }, { "epoch": 0.9235936188077246, "grad_norm": 0.22417649082857644, "learning_rate": 8.29080536413158e-05, "loss": 0.5508, "step": 3300 }, { "epoch": 0.9238734956619088, "grad_norm": 0.2242180021228506, "learning_rate": 8.28964384383452e-05, "loss": 0.5165, "step": 3301 }, { "epoch": 0.9241533725160929, "grad_norm": 0.22026698067633488, "learning_rate": 8.288482010427319e-05, "loss": 0.5367, "step": 3302 }, { "epoch": 0.924433249370277, "grad_norm": 0.22307198854070948, "learning_rate": 8.287319864020558e-05, "loss": 0.5269, "step": 3303 }, { "epoch": 0.9247131262244612, "grad_norm": 0.2161676425004044, "learning_rate": 8.286157404724853e-05, "loss": 0.5049, "step": 3304 }, { "epoch": 0.9249930030786454, "grad_norm": 0.2178264056156661, "learning_rate": 8.284994632650847e-05, "loss": 0.5336, "step": 3305 }, { "epoch": 0.9252728799328296, "grad_norm": 0.2220693587557796, "learning_rate": 8.283831547909213e-05, "loss": 0.5383, "step": 3306 }, { "epoch": 0.9255527567870138, "grad_norm": 0.22821604070015108, "learning_rate": 8.282668150610655e-05, "loss": 0.5213, "step": 3307 }, { "epoch": 0.9258326336411978, "grad_norm": 0.20891709009526233, "learning_rate": 8.281504440865905e-05, "loss": 0.553, "step": 3308 }, { "epoch": 0.926112510495382, "grad_norm": 0.2522342969506194, "learning_rate": 8.280340418785726e-05, "loss": 0.5359, "step": 3309 }, { "epoch": 0.9263923873495662, "grad_norm": 0.21913336319177137, "learning_rate": 8.279176084480909e-05, "loss": 0.5231, "step": 3310 }, { "epoch": 0.9266722642037504, "grad_norm": 0.22496493748652746, "learning_rate": 8.278011438062276e-05, "loss": 0.5362, "step": 3311 }, { "epoch": 0.9269521410579346, "grad_norm": 0.2135751325818056, "learning_rate": 8.27684647964068e-05, "loss": 0.511, "step": 3312 }, { "epoch": 0.9272320179121186, "grad_norm": 0.22436336250045402, "learning_rate": 8.275681209327002e-05, "loss": 0.5526, "step": 3313 }, { "epoch": 0.9275118947663028, "grad_norm": 0.2999163446230905, "learning_rate": 8.274515627232153e-05, "loss": 0.5456, "step": 3314 }, { "epoch": 0.927791771620487, "grad_norm": 0.2322438897760758, "learning_rate": 8.273349733467076e-05, "loss": 0.5244, "step": 3315 }, { "epoch": 0.9280716484746712, "grad_norm": 0.22875002476722534, "learning_rate": 8.272183528142737e-05, "loss": 0.5394, "step": 3316 }, { "epoch": 0.9283515253288553, "grad_norm": 0.23141460413703294, "learning_rate": 8.271017011370136e-05, "loss": 0.525, "step": 3317 }, { "epoch": 0.9286314021830394, "grad_norm": 0.23110037584971857, "learning_rate": 8.269850183260309e-05, "loss": 0.5216, "step": 3318 }, { "epoch": 0.9289112790372236, "grad_norm": 0.24909584556341074, "learning_rate": 8.26868304392431e-05, "loss": 0.5143, "step": 3319 }, { "epoch": 0.9291911558914078, "grad_norm": 0.22769543995760647, "learning_rate": 8.26751559347323e-05, "loss": 0.5303, "step": 3320 }, { "epoch": 0.929471032745592, "grad_norm": 0.2190293330582465, "learning_rate": 8.266347832018185e-05, "loss": 0.5116, "step": 3321 }, { "epoch": 0.9297509095997761, "grad_norm": 0.2154438364528873, "learning_rate": 8.265179759670326e-05, "loss": 0.525, "step": 3322 }, { "epoch": 0.9300307864539603, "grad_norm": 0.21866564123388582, "learning_rate": 8.26401137654083e-05, "loss": 0.5515, "step": 3323 }, { "epoch": 0.9303106633081444, "grad_norm": 0.22965351157119618, "learning_rate": 8.262842682740905e-05, "loss": 0.5275, "step": 3324 }, { "epoch": 0.9305905401623286, "grad_norm": 0.23206410004529923, "learning_rate": 8.261673678381786e-05, "loss": 0.5497, "step": 3325 }, { "epoch": 0.9308704170165127, "grad_norm": 0.22020100188994954, "learning_rate": 8.260504363574741e-05, "loss": 0.5329, "step": 3326 }, { "epoch": 0.9311502938706969, "grad_norm": 0.2280782425820405, "learning_rate": 8.259334738431066e-05, "loss": 0.5187, "step": 3327 }, { "epoch": 0.9314301707248811, "grad_norm": 0.21920488734262566, "learning_rate": 8.258164803062088e-05, "loss": 0.5287, "step": 3328 }, { "epoch": 0.9317100475790652, "grad_norm": 0.22547720610684008, "learning_rate": 8.256994557579156e-05, "loss": 0.5385, "step": 3329 }, { "epoch": 0.9319899244332494, "grad_norm": 0.22394556448498107, "learning_rate": 8.255824002093662e-05, "loss": 0.53, "step": 3330 }, { "epoch": 0.9322698012874335, "grad_norm": 0.22206432199514495, "learning_rate": 8.254653136717016e-05, "loss": 0.537, "step": 3331 }, { "epoch": 0.9325496781416177, "grad_norm": 0.21538828155939793, "learning_rate": 8.253481961560665e-05, "loss": 0.5571, "step": 3332 }, { "epoch": 0.9328295549958019, "grad_norm": 0.2206053910702642, "learning_rate": 8.252310476736077e-05, "loss": 0.5253, "step": 3333 }, { "epoch": 0.933109431849986, "grad_norm": 0.23229006057926363, "learning_rate": 8.251138682354758e-05, "loss": 0.533, "step": 3334 }, { "epoch": 0.9333893087041701, "grad_norm": 0.2354313195974534, "learning_rate": 8.249966578528241e-05, "loss": 0.5326, "step": 3335 }, { "epoch": 0.9336691855583543, "grad_norm": 0.23758558910894242, "learning_rate": 8.248794165368085e-05, "loss": 0.5222, "step": 3336 }, { "epoch": 0.9339490624125385, "grad_norm": 0.21839564498117695, "learning_rate": 8.247621442985883e-05, "loss": 0.5472, "step": 3337 }, { "epoch": 0.9342289392667227, "grad_norm": 0.21572044108209534, "learning_rate": 8.246448411493252e-05, "loss": 0.5424, "step": 3338 }, { "epoch": 0.9345088161209067, "grad_norm": 0.22118750044316762, "learning_rate": 8.245275071001846e-05, "loss": 0.5251, "step": 3339 }, { "epoch": 0.9347886929750909, "grad_norm": 0.21007158618082628, "learning_rate": 8.244101421623341e-05, "loss": 0.5389, "step": 3340 }, { "epoch": 0.9350685698292751, "grad_norm": 0.21516354443097485, "learning_rate": 8.242927463469448e-05, "loss": 0.4985, "step": 3341 }, { "epoch": 0.9353484466834593, "grad_norm": 0.21503581987178988, "learning_rate": 8.241753196651902e-05, "loss": 0.5335, "step": 3342 }, { "epoch": 0.9356283235376435, "grad_norm": 0.2214884310011914, "learning_rate": 8.240578621282474e-05, "loss": 0.54, "step": 3343 }, { "epoch": 0.9359082003918276, "grad_norm": 0.22379935880219792, "learning_rate": 8.239403737472958e-05, "loss": 0.5275, "step": 3344 }, { "epoch": 0.9361880772460117, "grad_norm": 0.22171699044684767, "learning_rate": 8.238228545335183e-05, "loss": 0.521, "step": 3345 }, { "epoch": 0.9364679541001959, "grad_norm": 0.21222872526636102, "learning_rate": 8.237053044981001e-05, "loss": 0.5022, "step": 3346 }, { "epoch": 0.9367478309543801, "grad_norm": 0.22115768080723117, "learning_rate": 8.235877236522298e-05, "loss": 0.5197, "step": 3347 }, { "epoch": 0.9370277078085643, "grad_norm": 0.22310515684936527, "learning_rate": 8.234701120070989e-05, "loss": 0.546, "step": 3348 }, { "epoch": 0.9373075846627484, "grad_norm": 0.23072090710835966, "learning_rate": 8.233524695739017e-05, "loss": 0.5553, "step": 3349 }, { "epoch": 0.9375874615169325, "grad_norm": 0.22580325936311144, "learning_rate": 8.232347963638354e-05, "loss": 0.5366, "step": 3350 }, { "epoch": 0.9378673383711167, "grad_norm": 0.2168037341491656, "learning_rate": 8.231170923881005e-05, "loss": 0.5305, "step": 3351 }, { "epoch": 0.9381472152253009, "grad_norm": 0.21691770135249946, "learning_rate": 8.229993576578995e-05, "loss": 0.5061, "step": 3352 }, { "epoch": 0.938427092079485, "grad_norm": 0.21486304767912126, "learning_rate": 8.22881592184439e-05, "loss": 0.5179, "step": 3353 }, { "epoch": 0.9387069689336692, "grad_norm": 0.210964042393187, "learning_rate": 8.227637959789279e-05, "loss": 0.5191, "step": 3354 }, { "epoch": 0.9389868457878533, "grad_norm": 0.21109165296573837, "learning_rate": 8.22645969052578e-05, "loss": 0.5474, "step": 3355 }, { "epoch": 0.9392667226420375, "grad_norm": 0.21391586189902217, "learning_rate": 8.225281114166043e-05, "loss": 0.549, "step": 3356 }, { "epoch": 0.9395465994962217, "grad_norm": 0.23001874093667968, "learning_rate": 8.224102230822242e-05, "loss": 0.524, "step": 3357 }, { "epoch": 0.9398264763504058, "grad_norm": 0.21120555198380334, "learning_rate": 8.222923040606588e-05, "loss": 0.5254, "step": 3358 }, { "epoch": 0.94010635320459, "grad_norm": 0.21607266231750683, "learning_rate": 8.221743543631313e-05, "loss": 0.5424, "step": 3359 }, { "epoch": 0.9403862300587741, "grad_norm": 0.2167694700672283, "learning_rate": 8.220563740008687e-05, "loss": 0.5304, "step": 3360 }, { "epoch": 0.9406661069129583, "grad_norm": 0.20829066435264945, "learning_rate": 8.219383629850998e-05, "loss": 0.5074, "step": 3361 }, { "epoch": 0.9409459837671424, "grad_norm": 0.21946557578426373, "learning_rate": 8.218203213270576e-05, "loss": 0.525, "step": 3362 }, { "epoch": 0.9412258606213266, "grad_norm": 0.21422867572085813, "learning_rate": 8.21702249037977e-05, "loss": 0.5343, "step": 3363 }, { "epoch": 0.9415057374755108, "grad_norm": 0.21225627725671894, "learning_rate": 8.215841461290963e-05, "loss": 0.5166, "step": 3364 }, { "epoch": 0.941785614329695, "grad_norm": 0.2125501539626726, "learning_rate": 8.214660126116566e-05, "loss": 0.5356, "step": 3365 }, { "epoch": 0.9420654911838791, "grad_norm": 0.22675920099238844, "learning_rate": 8.213478484969017e-05, "loss": 0.5314, "step": 3366 }, { "epoch": 0.9423453680380632, "grad_norm": 0.23108570578589452, "learning_rate": 8.21229653796079e-05, "loss": 0.5298, "step": 3367 }, { "epoch": 0.9426252448922474, "grad_norm": 0.2218603161067393, "learning_rate": 8.211114285204378e-05, "loss": 0.5314, "step": 3368 }, { "epoch": 0.9429051217464316, "grad_norm": 0.23463134342368347, "learning_rate": 8.209931726812312e-05, "loss": 0.5491, "step": 3369 }, { "epoch": 0.9431849986006158, "grad_norm": 0.22482437164367458, "learning_rate": 8.208748862897147e-05, "loss": 0.5119, "step": 3370 }, { "epoch": 0.9434648754547998, "grad_norm": 0.2181993821710289, "learning_rate": 8.20756569357147e-05, "loss": 0.4967, "step": 3371 }, { "epoch": 0.943744752308984, "grad_norm": 0.21704410621928005, "learning_rate": 8.206382218947895e-05, "loss": 0.5282, "step": 3372 }, { "epoch": 0.9440246291631682, "grad_norm": 0.21578762209142244, "learning_rate": 8.205198439139066e-05, "loss": 0.5426, "step": 3373 }, { "epoch": 0.9443045060173524, "grad_norm": 0.22317736739177998, "learning_rate": 8.204014354257654e-05, "loss": 0.5132, "step": 3374 }, { "epoch": 0.9445843828715366, "grad_norm": 0.22459777955028615, "learning_rate": 8.202829964416364e-05, "loss": 0.5611, "step": 3375 }, { "epoch": 0.9448642597257206, "grad_norm": 0.20569382551072954, "learning_rate": 8.201645269727925e-05, "loss": 0.5087, "step": 3376 }, { "epoch": 0.9451441365799048, "grad_norm": 0.22181050657136128, "learning_rate": 8.200460270305097e-05, "loss": 0.5086, "step": 3377 }, { "epoch": 0.945424013434089, "grad_norm": 0.22011878276120816, "learning_rate": 8.199274966260669e-05, "loss": 0.5208, "step": 3378 }, { "epoch": 0.9457038902882732, "grad_norm": 0.22850994647719924, "learning_rate": 8.198089357707458e-05, "loss": 0.5281, "step": 3379 }, { "epoch": 0.9459837671424574, "grad_norm": 0.21889111596981828, "learning_rate": 8.196903444758312e-05, "loss": 0.5356, "step": 3380 }, { "epoch": 0.9462636439966414, "grad_norm": 0.2158418041627815, "learning_rate": 8.195717227526109e-05, "loss": 0.5237, "step": 3381 }, { "epoch": 0.9465435208508256, "grad_norm": 0.2089107590248314, "learning_rate": 8.19453070612375e-05, "loss": 0.5309, "step": 3382 }, { "epoch": 0.9468233977050098, "grad_norm": 0.2286941673056431, "learning_rate": 8.19334388066417e-05, "loss": 0.5195, "step": 3383 }, { "epoch": 0.947103274559194, "grad_norm": 0.2141772255391924, "learning_rate": 8.192156751260332e-05, "loss": 0.5284, "step": 3384 }, { "epoch": 0.9473831514133781, "grad_norm": 0.21727007644459415, "learning_rate": 8.190969318025228e-05, "loss": 0.5296, "step": 3385 }, { "epoch": 0.9476630282675623, "grad_norm": 0.21485065824015953, "learning_rate": 8.189781581071879e-05, "loss": 0.5279, "step": 3386 }, { "epoch": 0.9479429051217464, "grad_norm": 0.2200669410310803, "learning_rate": 8.188593540513334e-05, "loss": 0.5366, "step": 3387 }, { "epoch": 0.9482227819759306, "grad_norm": 0.2109932805242757, "learning_rate": 8.18740519646267e-05, "loss": 0.5347, "step": 3388 }, { "epoch": 0.9485026588301148, "grad_norm": 0.2213002976546276, "learning_rate": 8.186216549032995e-05, "loss": 0.5469, "step": 3389 }, { "epoch": 0.9487825356842989, "grad_norm": 0.21103488566562068, "learning_rate": 8.185027598337446e-05, "loss": 0.5183, "step": 3390 }, { "epoch": 0.9490624125384831, "grad_norm": 0.23950365831087736, "learning_rate": 8.183838344489187e-05, "loss": 0.5366, "step": 3391 }, { "epoch": 0.9493422893926672, "grad_norm": 0.21496148762292563, "learning_rate": 8.182648787601414e-05, "loss": 0.531, "step": 3392 }, { "epoch": 0.9496221662468514, "grad_norm": 0.2239322942326302, "learning_rate": 8.181458927787347e-05, "loss": 0.5209, "step": 3393 }, { "epoch": 0.9499020431010355, "grad_norm": 0.23274600817797758, "learning_rate": 8.180268765160237e-05, "loss": 0.5488, "step": 3394 }, { "epoch": 0.9501819199552197, "grad_norm": 0.2188450011394307, "learning_rate": 8.179078299833367e-05, "loss": 0.5333, "step": 3395 }, { "epoch": 0.9504617968094039, "grad_norm": 0.22476830987697433, "learning_rate": 8.177887531920045e-05, "loss": 0.5284, "step": 3396 }, { "epoch": 0.950741673663588, "grad_norm": 0.21649046045356746, "learning_rate": 8.17669646153361e-05, "loss": 0.5277, "step": 3397 }, { "epoch": 0.9510215505177722, "grad_norm": 0.22288881244129846, "learning_rate": 8.175505088787426e-05, "loss": 0.5371, "step": 3398 }, { "epoch": 0.9513014273719563, "grad_norm": 0.22731584658365314, "learning_rate": 8.174313413794892e-05, "loss": 0.5338, "step": 3399 }, { "epoch": 0.9515813042261405, "grad_norm": 0.22274506340840244, "learning_rate": 8.173121436669428e-05, "loss": 0.5086, "step": 3400 }, { "epoch": 0.9518611810803247, "grad_norm": 0.22497675125736963, "learning_rate": 8.171929157524491e-05, "loss": 0.5259, "step": 3401 }, { "epoch": 0.9521410579345088, "grad_norm": 0.22830048832712135, "learning_rate": 8.170736576473563e-05, "loss": 0.512, "step": 3402 }, { "epoch": 0.9524209347886929, "grad_norm": 0.25452996809266615, "learning_rate": 8.169543693630151e-05, "loss": 0.5203, "step": 3403 }, { "epoch": 0.9527008116428771, "grad_norm": 0.26772054627038366, "learning_rate": 8.168350509107795e-05, "loss": 0.5571, "step": 3404 }, { "epoch": 0.9529806884970613, "grad_norm": 0.22907334090385764, "learning_rate": 8.167157023020066e-05, "loss": 0.5319, "step": 3405 }, { "epoch": 0.9532605653512455, "grad_norm": 0.47850244409919623, "learning_rate": 8.16596323548056e-05, "loss": 0.5498, "step": 3406 }, { "epoch": 0.9535404422054297, "grad_norm": 0.26977769050935135, "learning_rate": 8.164769146602899e-05, "loss": 0.5467, "step": 3407 }, { "epoch": 0.9538203190596137, "grad_norm": 0.47410180986287737, "learning_rate": 8.16357475650074e-05, "loss": 0.5786, "step": 3408 }, { "epoch": 0.9541001959137979, "grad_norm": 1.8304529902198994, "learning_rate": 8.162380065287766e-05, "loss": 0.5325, "step": 3409 }, { "epoch": 0.9543800727679821, "grad_norm": 0.2377845171142009, "learning_rate": 8.161185073077686e-05, "loss": 0.5227, "step": 3410 }, { "epoch": 0.9546599496221663, "grad_norm": 0.3081705322827635, "learning_rate": 8.159989779984242e-05, "loss": 0.5485, "step": 3411 }, { "epoch": 0.9549398264763505, "grad_norm": 0.2184041246946621, "learning_rate": 8.158794186121202e-05, "loss": 0.5656, "step": 3412 }, { "epoch": 0.9552197033305345, "grad_norm": 0.22391711746891552, "learning_rate": 8.157598291602362e-05, "loss": 0.5335, "step": 3413 }, { "epoch": 0.9554995801847187, "grad_norm": 0.2144146398918876, "learning_rate": 8.15640209654155e-05, "loss": 0.5428, "step": 3414 }, { "epoch": 0.9557794570389029, "grad_norm": 0.22302761311139452, "learning_rate": 8.15520560105262e-05, "loss": 0.5397, "step": 3415 }, { "epoch": 0.9560593338930871, "grad_norm": 0.22297966431799787, "learning_rate": 8.154008805249454e-05, "loss": 0.5183, "step": 3416 }, { "epoch": 0.9563392107472712, "grad_norm": 0.2161617150362051, "learning_rate": 8.152811709245967e-05, "loss": 0.5467, "step": 3417 }, { "epoch": 0.9566190876014553, "grad_norm": 0.23247263670525822, "learning_rate": 8.151614313156095e-05, "loss": 0.5368, "step": 3418 }, { "epoch": 0.9568989644556395, "grad_norm": 0.21350355080837546, "learning_rate": 8.150416617093808e-05, "loss": 0.5536, "step": 3419 }, { "epoch": 0.9571788413098237, "grad_norm": 0.22149224523024807, "learning_rate": 8.149218621173105e-05, "loss": 0.5314, "step": 3420 }, { "epoch": 0.9574587181640078, "grad_norm": 0.22045822843111815, "learning_rate": 8.14802032550801e-05, "loss": 0.5318, "step": 3421 }, { "epoch": 0.957738595018192, "grad_norm": 0.21141350617451612, "learning_rate": 8.14682173021258e-05, "loss": 0.5213, "step": 3422 }, { "epoch": 0.9580184718723762, "grad_norm": 0.23370290276535358, "learning_rate": 8.145622835400895e-05, "loss": 0.5603, "step": 3423 }, { "epoch": 0.9582983487265603, "grad_norm": 0.22957506378932596, "learning_rate": 8.144423641187069e-05, "loss": 0.5256, "step": 3424 }, { "epoch": 0.9585782255807445, "grad_norm": 0.23654506009966345, "learning_rate": 8.14322414768524e-05, "loss": 0.5463, "step": 3425 }, { "epoch": 0.9588581024349286, "grad_norm": 0.235485318675011, "learning_rate": 8.142024355009577e-05, "loss": 0.5059, "step": 3426 }, { "epoch": 0.9591379792891128, "grad_norm": 0.23097179799511522, "learning_rate": 8.140824263274279e-05, "loss": 0.5356, "step": 3427 }, { "epoch": 0.959417856143297, "grad_norm": 0.2302862749501254, "learning_rate": 8.139623872593567e-05, "loss": 0.5787, "step": 3428 }, { "epoch": 0.9596977329974811, "grad_norm": 0.2193323203769243, "learning_rate": 8.138423183081699e-05, "loss": 0.5417, "step": 3429 }, { "epoch": 0.9599776098516652, "grad_norm": 0.2277662806104916, "learning_rate": 8.137222194852955e-05, "loss": 0.5642, "step": 3430 }, { "epoch": 0.9602574867058494, "grad_norm": 0.2141847213081554, "learning_rate": 8.136020908021647e-05, "loss": 0.5221, "step": 3431 }, { "epoch": 0.9605373635600336, "grad_norm": 0.21106804184196115, "learning_rate": 8.134819322702114e-05, "loss": 0.5198, "step": 3432 }, { "epoch": 0.9608172404142178, "grad_norm": 0.23110787874906272, "learning_rate": 8.133617439008723e-05, "loss": 0.4946, "step": 3433 }, { "epoch": 0.9610971172684019, "grad_norm": 0.21659770119960287, "learning_rate": 8.13241525705587e-05, "loss": 0.5259, "step": 3434 }, { "epoch": 0.961376994122586, "grad_norm": 0.22003405504635687, "learning_rate": 8.13121277695798e-05, "loss": 0.5285, "step": 3435 }, { "epoch": 0.9616568709767702, "grad_norm": 0.22217668462938064, "learning_rate": 8.130009998829504e-05, "loss": 0.5224, "step": 3436 }, { "epoch": 0.9619367478309544, "grad_norm": 0.23345429987225355, "learning_rate": 8.128806922784927e-05, "loss": 0.5642, "step": 3437 }, { "epoch": 0.9622166246851386, "grad_norm": 0.22556320906052635, "learning_rate": 8.127603548938754e-05, "loss": 0.5116, "step": 3438 }, { "epoch": 0.9624965015393226, "grad_norm": 0.2256630583184537, "learning_rate": 8.126399877405524e-05, "loss": 0.5292, "step": 3439 }, { "epoch": 0.9627763783935068, "grad_norm": 0.22957429736417528, "learning_rate": 8.125195908299804e-05, "loss": 0.5326, "step": 3440 }, { "epoch": 0.963056255247691, "grad_norm": 0.21724747228624117, "learning_rate": 8.123991641736189e-05, "loss": 0.5214, "step": 3441 }, { "epoch": 0.9633361321018752, "grad_norm": 0.21776413520686494, "learning_rate": 8.1227870778293e-05, "loss": 0.5398, "step": 3442 }, { "epoch": 0.9636160089560594, "grad_norm": 0.22292545990751855, "learning_rate": 8.121582216693791e-05, "loss": 0.5281, "step": 3443 }, { "epoch": 0.9638958858102435, "grad_norm": 0.22405172895322287, "learning_rate": 8.120377058444336e-05, "loss": 0.5454, "step": 3444 }, { "epoch": 0.9641757626644276, "grad_norm": 0.22737436469087888, "learning_rate": 8.11917160319565e-05, "loss": 0.5463, "step": 3445 }, { "epoch": 0.9644556395186118, "grad_norm": 0.23344539867249173, "learning_rate": 8.117965851062463e-05, "loss": 0.5177, "step": 3446 }, { "epoch": 0.964735516372796, "grad_norm": 0.2101934078395959, "learning_rate": 8.11675980215954e-05, "loss": 0.5472, "step": 3447 }, { "epoch": 0.9650153932269802, "grad_norm": 0.21299782200792122, "learning_rate": 8.115553456601676e-05, "loss": 0.5107, "step": 3448 }, { "epoch": 0.9652952700811643, "grad_norm": 0.20530871191472141, "learning_rate": 8.114346814503689e-05, "loss": 0.5159, "step": 3449 }, { "epoch": 0.9655751469353484, "grad_norm": 0.21161623004748173, "learning_rate": 8.11313987598043e-05, "loss": 0.5179, "step": 3450 }, { "epoch": 0.9658550237895326, "grad_norm": 0.21483478174616114, "learning_rate": 8.111932641146775e-05, "loss": 0.5222, "step": 3451 }, { "epoch": 0.9661349006437168, "grad_norm": 0.255745208170339, "learning_rate": 8.110725110117629e-05, "loss": 0.5141, "step": 3452 }, { "epoch": 0.966414777497901, "grad_norm": 0.22620658357394077, "learning_rate": 8.109517283007926e-05, "loss": 0.5472, "step": 3453 }, { "epoch": 0.9666946543520851, "grad_norm": 0.22280728342450096, "learning_rate": 8.108309159932629e-05, "loss": 0.5164, "step": 3454 }, { "epoch": 0.9669745312062692, "grad_norm": 0.2117522196193627, "learning_rate": 8.107100741006724e-05, "loss": 0.5041, "step": 3455 }, { "epoch": 0.9672544080604534, "grad_norm": 0.22689956471804157, "learning_rate": 8.105892026345232e-05, "loss": 0.5265, "step": 3456 }, { "epoch": 0.9675342849146376, "grad_norm": 0.22073307051907443, "learning_rate": 8.104683016063199e-05, "loss": 0.5392, "step": 3457 }, { "epoch": 0.9678141617688217, "grad_norm": 0.22294195192815078, "learning_rate": 8.1034737102757e-05, "loss": 0.5536, "step": 3458 }, { "epoch": 0.9680940386230059, "grad_norm": 0.22542057272774796, "learning_rate": 8.102264109097834e-05, "loss": 0.5474, "step": 3459 }, { "epoch": 0.96837391547719, "grad_norm": 0.222310929724708, "learning_rate": 8.101054212644734e-05, "loss": 0.5301, "step": 3460 }, { "epoch": 0.9686537923313742, "grad_norm": 0.2220964572142937, "learning_rate": 8.09984402103156e-05, "loss": 0.5349, "step": 3461 }, { "epoch": 0.9689336691855583, "grad_norm": 0.2604818174403925, "learning_rate": 8.098633534373495e-05, "loss": 0.5201, "step": 3462 }, { "epoch": 0.9692135460397425, "grad_norm": 0.2720256034646371, "learning_rate": 8.097422752785757e-05, "loss": 0.5205, "step": 3463 }, { "epoch": 0.9694934228939267, "grad_norm": 0.21645967778039324, "learning_rate": 8.096211676383587e-05, "loss": 0.519, "step": 3464 }, { "epoch": 0.9697732997481109, "grad_norm": 0.22463050221626324, "learning_rate": 8.095000305282256e-05, "loss": 0.5471, "step": 3465 }, { "epoch": 0.970053176602295, "grad_norm": 0.21294014795145857, "learning_rate": 8.093788639597066e-05, "loss": 0.5184, "step": 3466 }, { "epoch": 0.9703330534564791, "grad_norm": 0.22764952208155148, "learning_rate": 8.09257667944334e-05, "loss": 0.5385, "step": 3467 }, { "epoch": 0.9706129303106633, "grad_norm": 0.22290847883624781, "learning_rate": 8.091364424936434e-05, "loss": 0.5316, "step": 3468 }, { "epoch": 0.9708928071648475, "grad_norm": 0.22385138132278512, "learning_rate": 8.090151876191732e-05, "loss": 0.5138, "step": 3469 }, { "epoch": 0.9711726840190317, "grad_norm": 0.22536306681068463, "learning_rate": 8.088939033324646e-05, "loss": 0.5202, "step": 3470 }, { "epoch": 0.9714525608732157, "grad_norm": 0.2179320847304912, "learning_rate": 8.087725896450613e-05, "loss": 0.5404, "step": 3471 }, { "epoch": 0.9717324377273999, "grad_norm": 0.23475091902179873, "learning_rate": 8.086512465685102e-05, "loss": 0.5253, "step": 3472 }, { "epoch": 0.9720123145815841, "grad_norm": 0.21569738041156963, "learning_rate": 8.085298741143605e-05, "loss": 0.533, "step": 3473 }, { "epoch": 0.9722921914357683, "grad_norm": 0.21294782426391667, "learning_rate": 8.084084722941648e-05, "loss": 0.4959, "step": 3474 }, { "epoch": 0.9725720682899525, "grad_norm": 0.2590805337167687, "learning_rate": 8.08287041119478e-05, "loss": 0.5243, "step": 3475 }, { "epoch": 0.9728519451441365, "grad_norm": 0.22947616671633794, "learning_rate": 8.081655806018582e-05, "loss": 0.5192, "step": 3476 }, { "epoch": 0.9731318219983207, "grad_norm": 0.21757589819921253, "learning_rate": 8.080440907528659e-05, "loss": 0.5185, "step": 3477 }, { "epoch": 0.9734116988525049, "grad_norm": 0.27752038602754175, "learning_rate": 8.079225715840646e-05, "loss": 0.5388, "step": 3478 }, { "epoch": 0.9736915757066891, "grad_norm": 0.21265504628185963, "learning_rate": 8.078010231070206e-05, "loss": 0.5654, "step": 3479 }, { "epoch": 0.9739714525608733, "grad_norm": 0.2565727224751588, "learning_rate": 8.07679445333303e-05, "loss": 0.5093, "step": 3480 }, { "epoch": 0.9742513294150573, "grad_norm": 0.2266504434173381, "learning_rate": 8.075578382744835e-05, "loss": 0.5269, "step": 3481 }, { "epoch": 0.9745312062692415, "grad_norm": 0.2530018381690689, "learning_rate": 8.074362019421369e-05, "loss": 0.5493, "step": 3482 }, { "epoch": 0.9748110831234257, "grad_norm": 0.22279415055269902, "learning_rate": 8.073145363478404e-05, "loss": 0.528, "step": 3483 }, { "epoch": 0.9750909599776099, "grad_norm": 0.2203374998023382, "learning_rate": 8.071928415031744e-05, "loss": 0.5315, "step": 3484 }, { "epoch": 0.975370836831794, "grad_norm": 0.2329537787485757, "learning_rate": 8.070711174197216e-05, "loss": 0.5163, "step": 3485 }, { "epoch": 0.9756507136859782, "grad_norm": 0.23049755696239946, "learning_rate": 8.069493641090682e-05, "loss": 0.5287, "step": 3486 }, { "epoch": 0.9759305905401623, "grad_norm": 0.326471449083994, "learning_rate": 8.068275815828025e-05, "loss": 0.5114, "step": 3487 }, { "epoch": 0.9762104673943465, "grad_norm": 0.2111422990739202, "learning_rate": 8.067057698525158e-05, "loss": 0.503, "step": 3488 }, { "epoch": 0.9764903442485307, "grad_norm": 0.37376760279297455, "learning_rate": 8.065839289298022e-05, "loss": 0.5139, "step": 3489 }, { "epoch": 0.9767702211027148, "grad_norm": 0.25463993604496876, "learning_rate": 8.064620588262588e-05, "loss": 0.5351, "step": 3490 }, { "epoch": 0.977050097956899, "grad_norm": 9.762766576942438, "learning_rate": 8.06340159553485e-05, "loss": 0.5325, "step": 3491 }, { "epoch": 0.9773299748110831, "grad_norm": 0.21411387277434157, "learning_rate": 8.062182311230832e-05, "loss": 0.5146, "step": 3492 }, { "epoch": 0.9776098516652673, "grad_norm": 0.2554389080701559, "learning_rate": 8.06096273546659e-05, "loss": 0.5352, "step": 3493 }, { "epoch": 0.9778897285194514, "grad_norm": 0.22578356573796832, "learning_rate": 8.0597428683582e-05, "loss": 0.5443, "step": 3494 }, { "epoch": 0.9781696053736356, "grad_norm": 0.2209885227506747, "learning_rate": 8.058522710021772e-05, "loss": 0.5419, "step": 3495 }, { "epoch": 0.9784494822278198, "grad_norm": 0.23048565206375848, "learning_rate": 8.05730226057344e-05, "loss": 0.5101, "step": 3496 }, { "epoch": 0.9787293590820039, "grad_norm": 0.21212395218447444, "learning_rate": 8.056081520129368e-05, "loss": 0.5107, "step": 3497 }, { "epoch": 0.979009235936188, "grad_norm": 0.31711299756514666, "learning_rate": 8.054860488805746e-05, "loss": 0.5404, "step": 3498 }, { "epoch": 0.9792891127903722, "grad_norm": 0.23485744603348208, "learning_rate": 8.053639166718794e-05, "loss": 0.5352, "step": 3499 }, { "epoch": 0.9795689896445564, "grad_norm": 0.29408204964329543, "learning_rate": 8.052417553984755e-05, "loss": 0.5427, "step": 3500 }, { "epoch": 0.9798488664987406, "grad_norm": 0.22809336985401973, "learning_rate": 8.051195650719906e-05, "loss": 0.5285, "step": 3501 }, { "epoch": 0.9801287433529247, "grad_norm": 0.22648004082745615, "learning_rate": 8.049973457040547e-05, "loss": 0.5103, "step": 3502 }, { "epoch": 0.9804086202071088, "grad_norm": 0.22843520715879986, "learning_rate": 8.048750973063008e-05, "loss": 0.5002, "step": 3503 }, { "epoch": 0.980688497061293, "grad_norm": 0.29856449350201997, "learning_rate": 8.047528198903643e-05, "loss": 0.5038, "step": 3504 }, { "epoch": 0.9809683739154772, "grad_norm": 0.23217715481736995, "learning_rate": 8.046305134678839e-05, "loss": 0.5437, "step": 3505 }, { "epoch": 0.9812482507696614, "grad_norm": 0.25092258580232923, "learning_rate": 8.045081780505007e-05, "loss": 0.5252, "step": 3506 }, { "epoch": 0.9815281276238456, "grad_norm": 0.24238971814228671, "learning_rate": 8.043858136498587e-05, "loss": 0.4997, "step": 3507 }, { "epoch": 0.9818080044780296, "grad_norm": 0.2157044047072059, "learning_rate": 8.042634202776048e-05, "loss": 0.5261, "step": 3508 }, { "epoch": 0.9820878813322138, "grad_norm": 0.23811875278582165, "learning_rate": 8.04140997945388e-05, "loss": 0.5324, "step": 3509 }, { "epoch": 0.982367758186398, "grad_norm": 0.22645701004467556, "learning_rate": 8.040185466648608e-05, "loss": 0.5334, "step": 3510 }, { "epoch": 0.9826476350405822, "grad_norm": 0.21380010388065268, "learning_rate": 8.038960664476782e-05, "loss": 0.5134, "step": 3511 }, { "epoch": 0.9829275118947663, "grad_norm": 0.20867270737824553, "learning_rate": 8.037735573054979e-05, "loss": 0.5184, "step": 3512 }, { "epoch": 0.9832073887489504, "grad_norm": 0.6408505850508476, "learning_rate": 8.036510192499803e-05, "loss": 0.5233, "step": 3513 }, { "epoch": 0.9834872656031346, "grad_norm": 0.2604155985197762, "learning_rate": 8.035284522927888e-05, "loss": 0.5459, "step": 3514 }, { "epoch": 0.9837671424573188, "grad_norm": 0.2143759329653001, "learning_rate": 8.034058564455892e-05, "loss": 0.5126, "step": 3515 }, { "epoch": 0.984047019311503, "grad_norm": 0.2254522149470925, "learning_rate": 8.032832317200506e-05, "loss": 0.5244, "step": 3516 }, { "epoch": 0.9843268961656871, "grad_norm": 0.22296227768383173, "learning_rate": 8.031605781278442e-05, "loss": 0.5425, "step": 3517 }, { "epoch": 0.9846067730198712, "grad_norm": 0.23808888295866004, "learning_rate": 8.030378956806442e-05, "loss": 0.5121, "step": 3518 }, { "epoch": 0.9848866498740554, "grad_norm": 0.21628251679951602, "learning_rate": 8.029151843901277e-05, "loss": 0.5105, "step": 3519 }, { "epoch": 0.9851665267282396, "grad_norm": 0.22557051976823203, "learning_rate": 8.027924442679744e-05, "loss": 0.5168, "step": 3520 }, { "epoch": 0.9854464035824237, "grad_norm": 0.8047584846343594, "learning_rate": 8.026696753258666e-05, "loss": 0.5362, "step": 3521 }, { "epoch": 0.9857262804366079, "grad_norm": 0.2257322789750837, "learning_rate": 8.025468775754899e-05, "loss": 0.5411, "step": 3522 }, { "epoch": 0.9860061572907921, "grad_norm": 0.21725479117049543, "learning_rate": 8.024240510285321e-05, "loss": 0.5273, "step": 3523 }, { "epoch": 0.9862860341449762, "grad_norm": 0.2194162271385188, "learning_rate": 8.023011956966837e-05, "loss": 0.5359, "step": 3524 }, { "epoch": 0.9865659109991604, "grad_norm": 0.25876227964758386, "learning_rate": 8.021783115916384e-05, "loss": 0.535, "step": 3525 }, { "epoch": 0.9868457878533445, "grad_norm": 0.2191099552855977, "learning_rate": 8.020553987250921e-05, "loss": 0.4944, "step": 3526 }, { "epoch": 0.9871256647075287, "grad_norm": 0.217320249188783, "learning_rate": 8.019324571087441e-05, "loss": 0.5326, "step": 3527 }, { "epoch": 0.9874055415617129, "grad_norm": 0.2204292530846392, "learning_rate": 8.018094867542956e-05, "loss": 0.5304, "step": 3528 }, { "epoch": 0.987685418415897, "grad_norm": 0.48734436501989903, "learning_rate": 8.016864876734514e-05, "loss": 0.5339, "step": 3529 }, { "epoch": 0.9879652952700811, "grad_norm": 0.22022547951211005, "learning_rate": 8.015634598779185e-05, "loss": 0.5736, "step": 3530 }, { "epoch": 0.9882451721242653, "grad_norm": 0.7229887197064508, "learning_rate": 8.014404033794065e-05, "loss": 0.5232, "step": 3531 }, { "epoch": 0.9885250489784495, "grad_norm": 0.3084444243431006, "learning_rate": 8.013173181896283e-05, "loss": 0.5266, "step": 3532 }, { "epoch": 0.9888049258326337, "grad_norm": 0.22077699051425648, "learning_rate": 8.01194204320299e-05, "loss": 0.509, "step": 3533 }, { "epoch": 0.9890848026868178, "grad_norm": 0.21499207519422178, "learning_rate": 8.010710617831367e-05, "loss": 0.5092, "step": 3534 }, { "epoch": 0.9893646795410019, "grad_norm": 0.21870865777949547, "learning_rate": 8.009478905898624e-05, "loss": 0.5157, "step": 3535 }, { "epoch": 0.9896445563951861, "grad_norm": 0.23674757663998383, "learning_rate": 8.008246907521995e-05, "loss": 0.5036, "step": 3536 }, { "epoch": 0.9899244332493703, "grad_norm": 0.22855947816976926, "learning_rate": 8.00701462281874e-05, "loss": 0.5349, "step": 3537 }, { "epoch": 0.9902043101035545, "grad_norm": 0.2087842790298733, "learning_rate": 8.00578205190615e-05, "loss": 0.491, "step": 3538 }, { "epoch": 0.9904841869577385, "grad_norm": 0.21314197387860112, "learning_rate": 8.004549194901542e-05, "loss": 0.5285, "step": 3539 }, { "epoch": 0.9907640638119227, "grad_norm": 0.2447348765486558, "learning_rate": 8.00331605192226e-05, "loss": 0.5288, "step": 3540 }, { "epoch": 0.9910439406661069, "grad_norm": 0.4706807986021696, "learning_rate": 8.002082623085675e-05, "loss": 0.5015, "step": 3541 }, { "epoch": 0.9913238175202911, "grad_norm": 0.22781388811872166, "learning_rate": 8.000848908509187e-05, "loss": 0.5325, "step": 3542 }, { "epoch": 0.9916036943744753, "grad_norm": 0.2190282990322059, "learning_rate": 7.999614908310218e-05, "loss": 0.5385, "step": 3543 }, { "epoch": 0.9918835712286594, "grad_norm": 0.22792660575310014, "learning_rate": 7.998380622606224e-05, "loss": 0.5231, "step": 3544 }, { "epoch": 0.9921634480828435, "grad_norm": 0.2266927437256646, "learning_rate": 7.997146051514685e-05, "loss": 0.5361, "step": 3545 }, { "epoch": 0.9924433249370277, "grad_norm": 0.30535923620371125, "learning_rate": 7.995911195153105e-05, "loss": 0.5236, "step": 3546 }, { "epoch": 0.9927232017912119, "grad_norm": 0.23941019401490524, "learning_rate": 7.994676053639024e-05, "loss": 0.5475, "step": 3547 }, { "epoch": 0.993003078645396, "grad_norm": 0.22715410710121234, "learning_rate": 7.993440627089996e-05, "loss": 0.4979, "step": 3548 }, { "epoch": 0.9932829554995802, "grad_norm": 0.3993189271581588, "learning_rate": 7.992204915623615e-05, "loss": 0.518, "step": 3549 }, { "epoch": 0.9935628323537643, "grad_norm": 0.2214579350273138, "learning_rate": 7.990968919357498e-05, "loss": 0.5389, "step": 3550 }, { "epoch": 0.9938427092079485, "grad_norm": 0.22771130491834968, "learning_rate": 7.989732638409282e-05, "loss": 0.5167, "step": 3551 }, { "epoch": 0.9941225860621327, "grad_norm": 0.22670484308075003, "learning_rate": 7.98849607289664e-05, "loss": 0.5346, "step": 3552 }, { "epoch": 0.9944024629163168, "grad_norm": 0.2112443650005292, "learning_rate": 7.987259222937272e-05, "loss": 0.5308, "step": 3553 }, { "epoch": 0.994682339770501, "grad_norm": 0.27579278071876434, "learning_rate": 7.986022088648896e-05, "loss": 0.5533, "step": 3554 }, { "epoch": 0.9949622166246851, "grad_norm": 0.20925224837009185, "learning_rate": 7.984784670149267e-05, "loss": 0.4939, "step": 3555 }, { "epoch": 0.9952420934788693, "grad_norm": 0.2269621090205349, "learning_rate": 7.983546967556165e-05, "loss": 0.5224, "step": 3556 }, { "epoch": 0.9955219703330535, "grad_norm": 0.22863293485275069, "learning_rate": 7.982308980987389e-05, "loss": 0.5326, "step": 3557 }, { "epoch": 0.9958018471872376, "grad_norm": 0.22493675349205328, "learning_rate": 7.981070710560777e-05, "loss": 0.5298, "step": 3558 }, { "epoch": 0.9960817240414218, "grad_norm": 0.23209458196468175, "learning_rate": 7.979832156394185e-05, "loss": 0.5164, "step": 3559 }, { "epoch": 0.9963616008956059, "grad_norm": 0.2128829501010205, "learning_rate": 7.978593318605502e-05, "loss": 0.5206, "step": 3560 }, { "epoch": 0.9966414777497901, "grad_norm": 0.219681850578271, "learning_rate": 7.977354197312638e-05, "loss": 0.5278, "step": 3561 }, { "epoch": 0.9969213546039742, "grad_norm": 0.22295759058538542, "learning_rate": 7.976114792633536e-05, "loss": 0.5179, "step": 3562 }, { "epoch": 0.9972012314581584, "grad_norm": 0.2113438981839933, "learning_rate": 7.974875104686163e-05, "loss": 0.4972, "step": 3563 }, { "epoch": 0.9974811083123426, "grad_norm": 0.2138013327802012, "learning_rate": 7.973635133588513e-05, "loss": 0.521, "step": 3564 }, { "epoch": 0.9977609851665268, "grad_norm": 0.22029413818449997, "learning_rate": 7.972394879458605e-05, "loss": 0.546, "step": 3565 }, { "epoch": 0.9980408620207109, "grad_norm": 0.22049509263757874, "learning_rate": 7.971154342414489e-05, "loss": 0.5145, "step": 3566 }, { "epoch": 0.998320738874895, "grad_norm": 0.21986872037743224, "learning_rate": 7.96991352257424e-05, "loss": 0.5127, "step": 3567 }, { "epoch": 0.9986006157290792, "grad_norm": 0.21480916211150758, "learning_rate": 7.968672420055958e-05, "loss": 0.5257, "step": 3568 }, { "epoch": 0.9988804925832634, "grad_norm": 0.23421886641376274, "learning_rate": 7.967431034977775e-05, "loss": 0.5385, "step": 3569 }, { "epoch": 0.9991603694374476, "grad_norm": 0.23289558241292804, "learning_rate": 7.966189367457844e-05, "loss": 0.5485, "step": 3570 }, { "epoch": 0.9994402462916316, "grad_norm": 0.2140992292192523, "learning_rate": 7.96494741761435e-05, "loss": 0.5122, "step": 3571 }, { "epoch": 0.9997201231458158, "grad_norm": 0.22440790715798145, "learning_rate": 7.9637051855655e-05, "loss": 0.5258, "step": 3572 }, { "epoch": 1.0, "grad_norm": 0.26865370894700985, "learning_rate": 7.962462671429532e-05, "loss": 0.5524, "step": 3573 }, { "epoch": 1.000279876854184, "grad_norm": 0.2151522824258909, "learning_rate": 7.961219875324709e-05, "loss": 0.5202, "step": 3574 }, { "epoch": 1.0005597537083684, "grad_norm": 0.2145821490209512, "learning_rate": 7.959976797369322e-05, "loss": 0.505, "step": 3575 }, { "epoch": 1.0008396305625524, "grad_norm": 0.2397604540898404, "learning_rate": 7.958733437681685e-05, "loss": 0.505, "step": 3576 }, { "epoch": 1.0011195074167367, "grad_norm": 0.21141014949735462, "learning_rate": 7.957489796380143e-05, "loss": 0.4845, "step": 3577 }, { "epoch": 1.0013993842709208, "grad_norm": 0.2579292044721452, "learning_rate": 7.956245873583068e-05, "loss": 0.5226, "step": 3578 }, { "epoch": 1.0016792611251049, "grad_norm": 0.2132179301411428, "learning_rate": 7.955001669408854e-05, "loss": 0.5024, "step": 3579 }, { "epoch": 1.0019591379792891, "grad_norm": 0.21816584767506073, "learning_rate": 7.95375718397593e-05, "loss": 0.5057, "step": 3580 }, { "epoch": 1.0022390148334732, "grad_norm": 0.21752783780863907, "learning_rate": 7.952512417402743e-05, "loss": 0.475, "step": 3581 }, { "epoch": 1.0025188916876575, "grad_norm": 0.22963190478577222, "learning_rate": 7.95126736980777e-05, "loss": 0.489, "step": 3582 }, { "epoch": 1.0027987685418416, "grad_norm": 0.23025272572685854, "learning_rate": 7.95002204130952e-05, "loss": 0.5092, "step": 3583 }, { "epoch": 1.0030786453960256, "grad_norm": 0.2260023492559017, "learning_rate": 7.94877643202652e-05, "loss": 0.501, "step": 3584 }, { "epoch": 1.00335852225021, "grad_norm": 0.255758955202744, "learning_rate": 7.947530542077326e-05, "loss": 0.5082, "step": 3585 }, { "epoch": 1.003638399104394, "grad_norm": 0.2229700476877229, "learning_rate": 7.946284371580526e-05, "loss": 0.5073, "step": 3586 }, { "epoch": 1.0039182759585783, "grad_norm": 0.21423101759819524, "learning_rate": 7.945037920654733e-05, "loss": 0.504, "step": 3587 }, { "epoch": 1.0041981528127624, "grad_norm": 0.22091810849257734, "learning_rate": 7.943791189418579e-05, "loss": 0.5041, "step": 3588 }, { "epoch": 1.0044780296669464, "grad_norm": 0.22291839525112908, "learning_rate": 7.942544177990734e-05, "loss": 0.5065, "step": 3589 }, { "epoch": 1.0047579065211307, "grad_norm": 0.21788969754061052, "learning_rate": 7.941296886489888e-05, "loss": 0.5031, "step": 3590 }, { "epoch": 1.0050377833753148, "grad_norm": 0.22905497358669408, "learning_rate": 7.940049315034755e-05, "loss": 0.5205, "step": 3591 }, { "epoch": 1.005317660229499, "grad_norm": 0.22599453526951566, "learning_rate": 7.938801463744084e-05, "loss": 0.5014, "step": 3592 }, { "epoch": 1.0055975370836832, "grad_norm": 0.2308528471667276, "learning_rate": 7.937553332736646e-05, "loss": 0.5017, "step": 3593 }, { "epoch": 1.0058774139378674, "grad_norm": 0.2263486323970854, "learning_rate": 7.936304922131238e-05, "loss": 0.494, "step": 3594 }, { "epoch": 1.0061572907920515, "grad_norm": 0.2257292243070338, "learning_rate": 7.93505623204668e-05, "loss": 0.5268, "step": 3595 }, { "epoch": 1.0064371676462356, "grad_norm": 0.22837865013465125, "learning_rate": 7.93380726260183e-05, "loss": 0.49, "step": 3596 }, { "epoch": 1.0067170445004199, "grad_norm": 0.22308161880337282, "learning_rate": 7.932558013915562e-05, "loss": 0.5322, "step": 3597 }, { "epoch": 1.006996921354604, "grad_norm": 0.21222487192372674, "learning_rate": 7.931308486106782e-05, "loss": 0.4966, "step": 3598 }, { "epoch": 1.0072767982087882, "grad_norm": 0.21715491769174602, "learning_rate": 7.930058679294418e-05, "loss": 0.4965, "step": 3599 }, { "epoch": 1.0075566750629723, "grad_norm": 0.21062504848809954, "learning_rate": 7.92880859359743e-05, "loss": 0.4712, "step": 3600 }, { "epoch": 1.0078365519171564, "grad_norm": 0.2254344063296754, "learning_rate": 7.9275582291348e-05, "loss": 0.501, "step": 3601 }, { "epoch": 1.0081164287713407, "grad_norm": 0.2193290328015753, "learning_rate": 7.926307586025539e-05, "loss": 0.5079, "step": 3602 }, { "epoch": 1.0083963056255247, "grad_norm": 0.2209881490859096, "learning_rate": 7.925056664388683e-05, "loss": 0.5096, "step": 3603 }, { "epoch": 1.008676182479709, "grad_norm": 0.23068930444761246, "learning_rate": 7.9238054643433e-05, "loss": 0.5264, "step": 3604 }, { "epoch": 1.008956059333893, "grad_norm": 0.21715947474168884, "learning_rate": 7.922553986008472e-05, "loss": 0.4934, "step": 3605 }, { "epoch": 1.0092359361880772, "grad_norm": 0.22774236254353156, "learning_rate": 7.921302229503323e-05, "loss": 0.5219, "step": 3606 }, { "epoch": 1.0095158130422615, "grad_norm": 0.2434574119410726, "learning_rate": 7.92005019494699e-05, "loss": 0.5219, "step": 3607 }, { "epoch": 1.0097956898964455, "grad_norm": 0.21282961651133284, "learning_rate": 7.918797882458649e-05, "loss": 0.5087, "step": 3608 }, { "epoch": 1.0100755667506298, "grad_norm": 0.2298090155797842, "learning_rate": 7.917545292157489e-05, "loss": 0.5081, "step": 3609 }, { "epoch": 1.0103554436048139, "grad_norm": 0.22865017098908946, "learning_rate": 7.916292424162735e-05, "loss": 0.506, "step": 3610 }, { "epoch": 1.010635320458998, "grad_norm": 0.22999448688734112, "learning_rate": 7.915039278593637e-05, "loss": 0.5068, "step": 3611 }, { "epoch": 1.0109151973131822, "grad_norm": 0.2301902343868322, "learning_rate": 7.913785855569466e-05, "loss": 0.51, "step": 3612 }, { "epoch": 1.0111950741673663, "grad_norm": 0.22166767646491686, "learning_rate": 7.91253215520953e-05, "loss": 0.4878, "step": 3613 }, { "epoch": 1.0114749510215506, "grad_norm": 0.22027238357501053, "learning_rate": 7.911278177633151e-05, "loss": 0.4995, "step": 3614 }, { "epoch": 1.0117548278757347, "grad_norm": 0.23521438900435754, "learning_rate": 7.910023922959686e-05, "loss": 0.5192, "step": 3615 }, { "epoch": 1.0120347047299187, "grad_norm": 0.23505219062362195, "learning_rate": 7.908769391308517e-05, "loss": 0.5199, "step": 3616 }, { "epoch": 1.012314581584103, "grad_norm": 0.22162025503854033, "learning_rate": 7.907514582799047e-05, "loss": 0.4905, "step": 3617 }, { "epoch": 1.012594458438287, "grad_norm": 0.22840127255661585, "learning_rate": 7.906259497550712e-05, "loss": 0.5159, "step": 3618 }, { "epoch": 1.0128743352924714, "grad_norm": 0.21422106628265694, "learning_rate": 7.905004135682971e-05, "loss": 0.5029, "step": 3619 }, { "epoch": 1.0131542121466555, "grad_norm": 0.21321876589631053, "learning_rate": 7.903748497315312e-05, "loss": 0.5045, "step": 3620 }, { "epoch": 1.0134340890008395, "grad_norm": 0.2271247535376571, "learning_rate": 7.902492582567244e-05, "loss": 0.4885, "step": 3621 }, { "epoch": 1.0137139658550238, "grad_norm": 0.22542713081875076, "learning_rate": 7.901236391558309e-05, "loss": 0.517, "step": 3622 }, { "epoch": 1.013993842709208, "grad_norm": 0.21738520054120622, "learning_rate": 7.899979924408069e-05, "loss": 0.4939, "step": 3623 }, { "epoch": 1.0142737195633922, "grad_norm": 0.22862862115632895, "learning_rate": 7.898723181236116e-05, "loss": 0.5143, "step": 3624 }, { "epoch": 1.0145535964175763, "grad_norm": 0.22251220885729586, "learning_rate": 7.897466162162071e-05, "loss": 0.4944, "step": 3625 }, { "epoch": 1.0148334732717603, "grad_norm": 0.21495788914755032, "learning_rate": 7.896208867305572e-05, "loss": 0.4935, "step": 3626 }, { "epoch": 1.0151133501259446, "grad_norm": 0.2223636056894036, "learning_rate": 7.894951296786292e-05, "loss": 0.5013, "step": 3627 }, { "epoch": 1.0153932269801287, "grad_norm": 0.21313442084895964, "learning_rate": 7.89369345072393e-05, "loss": 0.5075, "step": 3628 }, { "epoch": 1.015673103834313, "grad_norm": 0.21419141410352954, "learning_rate": 7.892435329238204e-05, "loss": 0.5078, "step": 3629 }, { "epoch": 1.015952980688497, "grad_norm": 0.21199822486315878, "learning_rate": 7.891176932448864e-05, "loss": 0.5208, "step": 3630 }, { "epoch": 1.0162328575426811, "grad_norm": 0.2178367464258803, "learning_rate": 7.889918260475685e-05, "loss": 0.4889, "step": 3631 }, { "epoch": 1.0165127343968654, "grad_norm": 0.21567705751207655, "learning_rate": 7.88865931343847e-05, "loss": 0.4906, "step": 3632 }, { "epoch": 1.0167926112510495, "grad_norm": 0.2217669672523888, "learning_rate": 7.887400091457043e-05, "loss": 0.5106, "step": 3633 }, { "epoch": 1.0170724881052338, "grad_norm": 0.20893848571473597, "learning_rate": 7.886140594651259e-05, "loss": 0.4839, "step": 3634 }, { "epoch": 1.0173523649594178, "grad_norm": 0.21881956483322057, "learning_rate": 7.884880823140998e-05, "loss": 0.4953, "step": 3635 }, { "epoch": 1.0176322418136021, "grad_norm": 0.22237682379708062, "learning_rate": 7.883620777046167e-05, "loss": 0.4864, "step": 3636 }, { "epoch": 1.0179121186677862, "grad_norm": 0.2293932877642561, "learning_rate": 7.882360456486696e-05, "loss": 0.5126, "step": 3637 }, { "epoch": 1.0181919955219703, "grad_norm": 0.22595454310736232, "learning_rate": 7.881099861582542e-05, "loss": 0.5028, "step": 3638 }, { "epoch": 1.0184718723761546, "grad_norm": 0.21979301425980904, "learning_rate": 7.879838992453691e-05, "loss": 0.5221, "step": 3639 }, { "epoch": 1.0187517492303386, "grad_norm": 0.22012884916710354, "learning_rate": 7.878577849220154e-05, "loss": 0.4979, "step": 3640 }, { "epoch": 1.019031626084523, "grad_norm": 0.2172238980519223, "learning_rate": 7.877316432001965e-05, "loss": 0.5153, "step": 3641 }, { "epoch": 1.019311502938707, "grad_norm": 0.2420916094594506, "learning_rate": 7.876054740919188e-05, "loss": 0.5226, "step": 3642 }, { "epoch": 1.019591379792891, "grad_norm": 0.220317374341056, "learning_rate": 7.874792776091911e-05, "loss": 0.4988, "step": 3643 }, { "epoch": 1.0198712566470753, "grad_norm": 0.22291240894867526, "learning_rate": 7.87353053764025e-05, "loss": 0.5225, "step": 3644 }, { "epoch": 1.0201511335012594, "grad_norm": 0.21868923910777843, "learning_rate": 7.872268025684342e-05, "loss": 0.5158, "step": 3645 }, { "epoch": 1.0204310103554437, "grad_norm": 0.23874857903317756, "learning_rate": 7.871005240344356e-05, "loss": 0.5116, "step": 3646 }, { "epoch": 1.0207108872096278, "grad_norm": 0.23305321859946382, "learning_rate": 7.869742181740484e-05, "loss": 0.5079, "step": 3647 }, { "epoch": 1.0209907640638118, "grad_norm": 0.22456830528366797, "learning_rate": 7.868478849992945e-05, "loss": 0.5181, "step": 3648 }, { "epoch": 1.0212706409179961, "grad_norm": 0.2201775470465584, "learning_rate": 7.867215245221983e-05, "loss": 0.4907, "step": 3649 }, { "epoch": 1.0215505177721802, "grad_norm": 0.2226429327885552, "learning_rate": 7.86595136754787e-05, "loss": 0.4961, "step": 3650 }, { "epoch": 1.0218303946263645, "grad_norm": 0.23263872512360653, "learning_rate": 7.864687217090901e-05, "loss": 0.5159, "step": 3651 }, { "epoch": 1.0221102714805486, "grad_norm": 0.22524051741579265, "learning_rate": 7.8634227939714e-05, "loss": 0.5273, "step": 3652 }, { "epoch": 1.0223901483347326, "grad_norm": 0.22234250746513734, "learning_rate": 7.862158098309715e-05, "loss": 0.4821, "step": 3653 }, { "epoch": 1.022670025188917, "grad_norm": 0.22423880484357808, "learning_rate": 7.860893130226219e-05, "loss": 0.5169, "step": 3654 }, { "epoch": 1.022949902043101, "grad_norm": 0.22416996618762852, "learning_rate": 7.859627889841314e-05, "loss": 0.4965, "step": 3655 }, { "epoch": 1.0232297788972853, "grad_norm": 0.22406438174150115, "learning_rate": 7.858362377275426e-05, "loss": 0.5221, "step": 3656 }, { "epoch": 1.0235096557514693, "grad_norm": 0.2318239704342264, "learning_rate": 7.857096592649007e-05, "loss": 0.5206, "step": 3657 }, { "epoch": 1.0237895326056534, "grad_norm": 0.21836130956054062, "learning_rate": 7.855830536082536e-05, "loss": 0.5063, "step": 3658 }, { "epoch": 1.0240694094598377, "grad_norm": 0.2219583339403444, "learning_rate": 7.854564207696514e-05, "loss": 0.4789, "step": 3659 }, { "epoch": 1.0243492863140218, "grad_norm": 0.2370270364275783, "learning_rate": 7.853297607611474e-05, "loss": 0.4862, "step": 3660 }, { "epoch": 1.024629163168206, "grad_norm": 0.22431161646064307, "learning_rate": 7.852030735947972e-05, "loss": 0.5118, "step": 3661 }, { "epoch": 1.0249090400223901, "grad_norm": 0.2283212493299846, "learning_rate": 7.850763592826587e-05, "loss": 0.5303, "step": 3662 }, { "epoch": 1.0251889168765742, "grad_norm": 0.23409502376978944, "learning_rate": 7.849496178367928e-05, "loss": 0.494, "step": 3663 }, { "epoch": 1.0254687937307585, "grad_norm": 0.2147483425223085, "learning_rate": 7.848228492692626e-05, "loss": 0.5265, "step": 3664 }, { "epoch": 1.0257486705849426, "grad_norm": 0.22011900065948645, "learning_rate": 7.846960535921344e-05, "loss": 0.4961, "step": 3665 }, { "epoch": 1.0260285474391269, "grad_norm": 0.21620581271904024, "learning_rate": 7.845692308174763e-05, "loss": 0.5238, "step": 3666 }, { "epoch": 1.026308424293311, "grad_norm": 0.20659401458152649, "learning_rate": 7.844423809573598e-05, "loss": 0.5136, "step": 3667 }, { "epoch": 1.026588301147495, "grad_norm": 0.21696794715860793, "learning_rate": 7.84315504023858e-05, "loss": 0.5125, "step": 3668 }, { "epoch": 1.0268681780016793, "grad_norm": 0.226905351752765, "learning_rate": 7.841886000290475e-05, "loss": 0.5131, "step": 3669 }, { "epoch": 1.0271480548558634, "grad_norm": 0.22998961957295685, "learning_rate": 7.840616689850068e-05, "loss": 0.5046, "step": 3670 }, { "epoch": 1.0274279317100476, "grad_norm": 0.21251326918552876, "learning_rate": 7.839347109038177e-05, "loss": 0.493, "step": 3671 }, { "epoch": 1.0277078085642317, "grad_norm": 0.22252036125757305, "learning_rate": 7.83807725797564e-05, "loss": 0.4958, "step": 3672 }, { "epoch": 1.0279876854184158, "grad_norm": 0.21811964113001786, "learning_rate": 7.836807136783319e-05, "loss": 0.5041, "step": 3673 }, { "epoch": 1.0282675622726, "grad_norm": 0.23204598074564745, "learning_rate": 7.835536745582107e-05, "loss": 0.5074, "step": 3674 }, { "epoch": 1.0285474391267841, "grad_norm": 0.22050068047631413, "learning_rate": 7.83426608449292e-05, "loss": 0.5009, "step": 3675 }, { "epoch": 1.0288273159809684, "grad_norm": 0.22693963333949582, "learning_rate": 7.832995153636701e-05, "loss": 0.5001, "step": 3676 }, { "epoch": 1.0291071928351525, "grad_norm": 0.22729119103749862, "learning_rate": 7.831723953134418e-05, "loss": 0.4972, "step": 3677 }, { "epoch": 1.0293870696893368, "grad_norm": 0.21855378715453477, "learning_rate": 7.830452483107063e-05, "loss": 0.5291, "step": 3678 }, { "epoch": 1.0296669465435209, "grad_norm": 0.23079382860819328, "learning_rate": 7.829180743675657e-05, "loss": 0.5158, "step": 3679 }, { "epoch": 1.029946823397705, "grad_norm": 0.21558792962330395, "learning_rate": 7.827908734961245e-05, "loss": 0.518, "step": 3680 }, { "epoch": 1.0302267002518892, "grad_norm": 0.21739769261590977, "learning_rate": 7.826636457084897e-05, "loss": 0.4882, "step": 3681 }, { "epoch": 1.0305065771060733, "grad_norm": 0.22069486679964603, "learning_rate": 7.825363910167708e-05, "loss": 0.5054, "step": 3682 }, { "epoch": 1.0307864539602576, "grad_norm": 0.2370634533060172, "learning_rate": 7.8240910943308e-05, "loss": 0.4993, "step": 3683 }, { "epoch": 1.0310663308144417, "grad_norm": 0.2360609192064416, "learning_rate": 7.822818009695322e-05, "loss": 0.4821, "step": 3684 }, { "epoch": 1.0313462076686257, "grad_norm": 0.21923797582612406, "learning_rate": 7.821544656382445e-05, "loss": 0.5123, "step": 3685 }, { "epoch": 1.03162608452281, "grad_norm": 0.21959227309143034, "learning_rate": 7.820271034513369e-05, "loss": 0.4917, "step": 3686 }, { "epoch": 1.031905961376994, "grad_norm": 0.22578074050452607, "learning_rate": 7.818997144209317e-05, "loss": 0.4999, "step": 3687 }, { "epoch": 1.0321858382311784, "grad_norm": 0.22418214779760068, "learning_rate": 7.81772298559154e-05, "loss": 0.5035, "step": 3688 }, { "epoch": 1.0324657150853624, "grad_norm": 0.2181856244345083, "learning_rate": 7.81644855878131e-05, "loss": 0.518, "step": 3689 }, { "epoch": 1.0327455919395465, "grad_norm": 0.21754485220945913, "learning_rate": 7.815173863899932e-05, "loss": 0.469, "step": 3690 }, { "epoch": 1.0330254687937308, "grad_norm": 0.23659125740358572, "learning_rate": 7.813898901068727e-05, "loss": 0.5033, "step": 3691 }, { "epoch": 1.0333053456479149, "grad_norm": 0.22440086793698985, "learning_rate": 7.812623670409052e-05, "loss": 0.5136, "step": 3692 }, { "epoch": 1.0335852225020992, "grad_norm": 0.22152283188669944, "learning_rate": 7.811348172042282e-05, "loss": 0.4964, "step": 3693 }, { "epoch": 1.0338650993562832, "grad_norm": 0.22666937105652316, "learning_rate": 7.81007240608982e-05, "loss": 0.4898, "step": 3694 }, { "epoch": 1.0341449762104673, "grad_norm": 0.22737577897722452, "learning_rate": 7.80879637267309e-05, "loss": 0.4793, "step": 3695 }, { "epoch": 1.0344248530646516, "grad_norm": 0.22454308849167792, "learning_rate": 7.807520071913553e-05, "loss": 0.5043, "step": 3696 }, { "epoch": 1.0347047299188357, "grad_norm": 0.22583644746256398, "learning_rate": 7.806243503932681e-05, "loss": 0.5248, "step": 3697 }, { "epoch": 1.03498460677302, "grad_norm": 0.24001174737920145, "learning_rate": 7.804966668851984e-05, "loss": 0.4972, "step": 3698 }, { "epoch": 1.035264483627204, "grad_norm": 0.23204276896869927, "learning_rate": 7.803689566792989e-05, "loss": 0.5061, "step": 3699 }, { "epoch": 1.035544360481388, "grad_norm": 0.22768586153412737, "learning_rate": 7.80241219787725e-05, "loss": 0.5019, "step": 3700 }, { "epoch": 1.0358242373355724, "grad_norm": 0.2278122899786938, "learning_rate": 7.801134562226351e-05, "loss": 0.5106, "step": 3701 }, { "epoch": 1.0361041141897565, "grad_norm": 0.23306330492410876, "learning_rate": 7.799856659961896e-05, "loss": 0.5611, "step": 3702 }, { "epoch": 1.0363839910439407, "grad_norm": 0.2279186512689613, "learning_rate": 7.798578491205517e-05, "loss": 0.5034, "step": 3703 }, { "epoch": 1.0366638678981248, "grad_norm": 0.2176282765030379, "learning_rate": 7.79730005607887e-05, "loss": 0.4993, "step": 3704 }, { "epoch": 1.0369437447523089, "grad_norm": 0.2389784110027637, "learning_rate": 7.796021354703638e-05, "loss": 0.4983, "step": 3705 }, { "epoch": 1.0372236216064932, "grad_norm": 0.227742898410014, "learning_rate": 7.79474238720153e-05, "loss": 0.5365, "step": 3706 }, { "epoch": 1.0375034984606772, "grad_norm": 0.22046182855204585, "learning_rate": 7.793463153694277e-05, "loss": 0.503, "step": 3707 }, { "epoch": 1.0377833753148615, "grad_norm": 0.22869558767230014, "learning_rate": 7.792183654303638e-05, "loss": 0.5084, "step": 3708 }, { "epoch": 1.0380632521690456, "grad_norm": 0.21523346168863788, "learning_rate": 7.790903889151393e-05, "loss": 0.4982, "step": 3709 }, { "epoch": 1.0383431290232297, "grad_norm": 0.215141081448551, "learning_rate": 7.789623858359356e-05, "loss": 0.4773, "step": 3710 }, { "epoch": 1.038623005877414, "grad_norm": 0.23749807648684113, "learning_rate": 7.788343562049359e-05, "loss": 0.5113, "step": 3711 }, { "epoch": 1.038902882731598, "grad_norm": 0.2263293463190765, "learning_rate": 7.78706300034326e-05, "loss": 0.5105, "step": 3712 }, { "epoch": 1.0391827595857823, "grad_norm": 0.22836076680374096, "learning_rate": 7.785782173362945e-05, "loss": 0.5284, "step": 3713 }, { "epoch": 1.0394626364399664, "grad_norm": 0.2210892686929637, "learning_rate": 7.784501081230323e-05, "loss": 0.5027, "step": 3714 }, { "epoch": 1.0397425132941507, "grad_norm": 0.22263999621691236, "learning_rate": 7.78321972406733e-05, "loss": 0.4852, "step": 3715 }, { "epoch": 1.0400223901483348, "grad_norm": 0.22576600416532278, "learning_rate": 7.781938101995927e-05, "loss": 0.4973, "step": 3716 }, { "epoch": 1.0403022670025188, "grad_norm": 0.2190389765571684, "learning_rate": 7.780656215138097e-05, "loss": 0.5181, "step": 3717 }, { "epoch": 1.0405821438567031, "grad_norm": 0.22358476178617778, "learning_rate": 7.779374063615851e-05, "loss": 0.4923, "step": 3718 }, { "epoch": 1.0408620207108872, "grad_norm": 0.21674036560877505, "learning_rate": 7.778091647551228e-05, "loss": 0.4751, "step": 3719 }, { "epoch": 1.0411418975650715, "grad_norm": 0.22126498915078868, "learning_rate": 7.776808967066285e-05, "loss": 0.5283, "step": 3720 }, { "epoch": 1.0414217744192555, "grad_norm": 0.2765440597539088, "learning_rate": 7.775526022283113e-05, "loss": 0.4908, "step": 3721 }, { "epoch": 1.0417016512734396, "grad_norm": 0.23209777214489224, "learning_rate": 7.774242813323817e-05, "loss": 0.5457, "step": 3722 }, { "epoch": 1.041981528127624, "grad_norm": 0.23080917925574035, "learning_rate": 7.772959340310541e-05, "loss": 0.5095, "step": 3723 }, { "epoch": 1.042261404981808, "grad_norm": 0.25309199876030164, "learning_rate": 7.771675603365441e-05, "loss": 0.5181, "step": 3724 }, { "epoch": 1.0425412818359923, "grad_norm": 0.22190312983812366, "learning_rate": 7.770391602610706e-05, "loss": 0.5207, "step": 3725 }, { "epoch": 1.0428211586901763, "grad_norm": 0.22668013820155763, "learning_rate": 7.769107338168548e-05, "loss": 0.4917, "step": 3726 }, { "epoch": 1.0431010355443604, "grad_norm": 0.25886643025629247, "learning_rate": 7.767822810161203e-05, "loss": 0.5176, "step": 3727 }, { "epoch": 1.0433809123985447, "grad_norm": 0.2273589243644261, "learning_rate": 7.766538018710934e-05, "loss": 0.4813, "step": 3728 }, { "epoch": 1.0436607892527288, "grad_norm": 0.2288098441950441, "learning_rate": 7.765252963940026e-05, "loss": 0.5261, "step": 3729 }, { "epoch": 1.043940666106913, "grad_norm": 0.23432135225594952, "learning_rate": 7.763967645970796e-05, "loss": 0.5026, "step": 3730 }, { "epoch": 1.0442205429610971, "grad_norm": 0.2257468875164591, "learning_rate": 7.762682064925578e-05, "loss": 0.5091, "step": 3731 }, { "epoch": 1.0445004198152812, "grad_norm": 0.2258328663868932, "learning_rate": 7.761396220926733e-05, "loss": 0.5069, "step": 3732 }, { "epoch": 1.0447802966694655, "grad_norm": 0.23853146649917603, "learning_rate": 7.76011011409665e-05, "loss": 0.5042, "step": 3733 }, { "epoch": 1.0450601735236495, "grad_norm": 0.22491834872133007, "learning_rate": 7.758823744557744e-05, "loss": 0.5223, "step": 3734 }, { "epoch": 1.0453400503778338, "grad_norm": 0.22982063950382134, "learning_rate": 7.757537112432448e-05, "loss": 0.5022, "step": 3735 }, { "epoch": 1.045619927232018, "grad_norm": 0.22254638081289047, "learning_rate": 7.756250217843226e-05, "loss": 0.498, "step": 3736 }, { "epoch": 1.045899804086202, "grad_norm": 0.2267195513303436, "learning_rate": 7.754963060912565e-05, "loss": 0.5126, "step": 3737 }, { "epoch": 1.0461796809403863, "grad_norm": 0.2286198041088637, "learning_rate": 7.753675641762981e-05, "loss": 0.4757, "step": 3738 }, { "epoch": 1.0464595577945703, "grad_norm": 0.22037508345309462, "learning_rate": 7.752387960517004e-05, "loss": 0.4988, "step": 3739 }, { "epoch": 1.0467394346487546, "grad_norm": 0.21687297637714298, "learning_rate": 7.751100017297204e-05, "loss": 0.4794, "step": 3740 }, { "epoch": 1.0470193115029387, "grad_norm": 0.22023636752093112, "learning_rate": 7.749811812226161e-05, "loss": 0.5087, "step": 3741 }, { "epoch": 1.0472991883571228, "grad_norm": 0.22566517206426936, "learning_rate": 7.748523345426493e-05, "loss": 0.5206, "step": 3742 }, { "epoch": 1.047579065211307, "grad_norm": 0.2291302872555876, "learning_rate": 7.747234617020835e-05, "loss": 0.505, "step": 3743 }, { "epoch": 1.0478589420654911, "grad_norm": 0.21768976187721323, "learning_rate": 7.745945627131848e-05, "loss": 0.493, "step": 3744 }, { "epoch": 1.0481388189196754, "grad_norm": 0.21776900587766715, "learning_rate": 7.744656375882218e-05, "loss": 0.486, "step": 3745 }, { "epoch": 1.0484186957738595, "grad_norm": 0.2167297075444961, "learning_rate": 7.743366863394658e-05, "loss": 0.5071, "step": 3746 }, { "epoch": 1.0486985726280436, "grad_norm": 0.22121549586597575, "learning_rate": 7.742077089791904e-05, "loss": 0.5113, "step": 3747 }, { "epoch": 1.0489784494822278, "grad_norm": 0.21653569757474664, "learning_rate": 7.740787055196718e-05, "loss": 0.4833, "step": 3748 }, { "epoch": 1.049258326336412, "grad_norm": 0.22610627454716303, "learning_rate": 7.739496759731888e-05, "loss": 0.5147, "step": 3749 }, { "epoch": 1.0495382031905962, "grad_norm": 0.22583420842560206, "learning_rate": 7.738206203520222e-05, "loss": 0.5204, "step": 3750 }, { "epoch": 1.0498180800447803, "grad_norm": 0.23854311733296116, "learning_rate": 7.736915386684556e-05, "loss": 0.5225, "step": 3751 }, { "epoch": 1.0500979568989646, "grad_norm": 0.2067873549877098, "learning_rate": 7.735624309347753e-05, "loss": 0.5012, "step": 3752 }, { "epoch": 1.0503778337531486, "grad_norm": 0.21602368238643985, "learning_rate": 7.734332971632696e-05, "loss": 0.4904, "step": 3753 }, { "epoch": 1.0506577106073327, "grad_norm": 0.2278415591193141, "learning_rate": 7.733041373662297e-05, "loss": 0.5086, "step": 3754 }, { "epoch": 1.050937587461517, "grad_norm": 0.2174762455042572, "learning_rate": 7.73174951555949e-05, "loss": 0.5061, "step": 3755 }, { "epoch": 1.051217464315701, "grad_norm": 0.2187822877203153, "learning_rate": 7.730457397447235e-05, "loss": 0.496, "step": 3756 }, { "epoch": 1.0514973411698854, "grad_norm": 0.213077764698822, "learning_rate": 7.729165019448517e-05, "loss": 0.4848, "step": 3757 }, { "epoch": 1.0517772180240694, "grad_norm": 0.23034650377157573, "learning_rate": 7.727872381686343e-05, "loss": 0.4874, "step": 3758 }, { "epoch": 1.0520570948782535, "grad_norm": 0.2162098522324769, "learning_rate": 7.72657948428375e-05, "loss": 0.5007, "step": 3759 }, { "epoch": 1.0523369717324378, "grad_norm": 0.22197862336329033, "learning_rate": 7.725286327363797e-05, "loss": 0.4821, "step": 3760 }, { "epoch": 1.0526168485866219, "grad_norm": 0.23258034245390838, "learning_rate": 7.723992911049565e-05, "loss": 0.513, "step": 3761 }, { "epoch": 1.0528967254408061, "grad_norm": 0.21320431812644003, "learning_rate": 7.722699235464163e-05, "loss": 0.4911, "step": 3762 }, { "epoch": 1.0531766022949902, "grad_norm": 0.2231566023024273, "learning_rate": 7.721405300730723e-05, "loss": 0.4801, "step": 3763 }, { "epoch": 1.0534564791491743, "grad_norm": 0.21994583286566283, "learning_rate": 7.720111106972402e-05, "loss": 0.486, "step": 3764 }, { "epoch": 1.0537363560033586, "grad_norm": 0.2355246280315124, "learning_rate": 7.718816654312386e-05, "loss": 0.517, "step": 3765 }, { "epoch": 1.0540162328575426, "grad_norm": 0.23056238675580698, "learning_rate": 7.71752194287388e-05, "loss": 0.5317, "step": 3766 }, { "epoch": 1.054296109711727, "grad_norm": 0.22877985408701595, "learning_rate": 7.716226972780112e-05, "loss": 0.4998, "step": 3767 }, { "epoch": 1.054575986565911, "grad_norm": 0.21833274013072324, "learning_rate": 7.714931744154342e-05, "loss": 0.5015, "step": 3768 }, { "epoch": 1.054855863420095, "grad_norm": 0.2121862112484023, "learning_rate": 7.713636257119848e-05, "loss": 0.4948, "step": 3769 }, { "epoch": 1.0551357402742794, "grad_norm": 0.23148855835565083, "learning_rate": 7.712340511799934e-05, "loss": 0.5171, "step": 3770 }, { "epoch": 1.0554156171284634, "grad_norm": 0.21933393639811025, "learning_rate": 7.711044508317935e-05, "loss": 0.4776, "step": 3771 }, { "epoch": 1.0556954939826477, "grad_norm": 0.22031419590569426, "learning_rate": 7.709748246797201e-05, "loss": 0.5263, "step": 3772 }, { "epoch": 1.0559753708368318, "grad_norm": 0.22683983817719575, "learning_rate": 7.708451727361113e-05, "loss": 0.4929, "step": 3773 }, { "epoch": 1.0562552476910159, "grad_norm": 0.23286618883029137, "learning_rate": 7.707154950133073e-05, "loss": 0.4773, "step": 3774 }, { "epoch": 1.0565351245452002, "grad_norm": 0.22956515759896887, "learning_rate": 7.70585791523651e-05, "loss": 0.5414, "step": 3775 }, { "epoch": 1.0568150013993842, "grad_norm": 0.22520125737178678, "learning_rate": 7.704560622794875e-05, "loss": 0.5009, "step": 3776 }, { "epoch": 1.0570948782535685, "grad_norm": 0.21589269098650588, "learning_rate": 7.703263072931648e-05, "loss": 0.4783, "step": 3777 }, { "epoch": 1.0573747551077526, "grad_norm": 0.2365087413936952, "learning_rate": 7.701965265770326e-05, "loss": 0.5391, "step": 3778 }, { "epoch": 1.0576546319619367, "grad_norm": 0.21983360387197257, "learning_rate": 7.70066720143444e-05, "loss": 0.4941, "step": 3779 }, { "epoch": 1.057934508816121, "grad_norm": 0.22353050709079425, "learning_rate": 7.699368880047538e-05, "loss": 0.5269, "step": 3780 }, { "epoch": 1.058214385670305, "grad_norm": 0.22967064761977415, "learning_rate": 7.698070301733193e-05, "loss": 0.507, "step": 3781 }, { "epoch": 1.0584942625244893, "grad_norm": 0.22850020289574346, "learning_rate": 7.69677146661501e-05, "loss": 0.5163, "step": 3782 }, { "epoch": 1.0587741393786734, "grad_norm": 0.2374433412315277, "learning_rate": 7.695472374816606e-05, "loss": 0.5289, "step": 3783 }, { "epoch": 1.0590540162328574, "grad_norm": 0.250525413138875, "learning_rate": 7.694173026461634e-05, "loss": 0.4979, "step": 3784 }, { "epoch": 1.0593338930870417, "grad_norm": 0.22186768349601863, "learning_rate": 7.692873421673765e-05, "loss": 0.5138, "step": 3785 }, { "epoch": 1.0596137699412258, "grad_norm": 0.2342367806949055, "learning_rate": 7.691573560576696e-05, "loss": 0.4844, "step": 3786 }, { "epoch": 1.05989364679541, "grad_norm": 0.2220425010818297, "learning_rate": 7.690273443294151e-05, "loss": 0.4974, "step": 3787 }, { "epoch": 1.0601735236495942, "grad_norm": 0.21519697584239855, "learning_rate": 7.688973069949871e-05, "loss": 0.4885, "step": 3788 }, { "epoch": 1.0604534005037782, "grad_norm": 0.2187205362803618, "learning_rate": 7.687672440667633e-05, "loss": 0.4987, "step": 3789 }, { "epoch": 1.0607332773579625, "grad_norm": 0.21744890897097585, "learning_rate": 7.686371555571224e-05, "loss": 0.488, "step": 3790 }, { "epoch": 1.0610131542121466, "grad_norm": 0.21004707710627715, "learning_rate": 7.685070414784468e-05, "loss": 0.4889, "step": 3791 }, { "epoch": 1.0612930310663309, "grad_norm": 0.21830103524657823, "learning_rate": 7.683769018431208e-05, "loss": 0.5386, "step": 3792 }, { "epoch": 1.061572907920515, "grad_norm": 0.22674849870313224, "learning_rate": 7.68246736663531e-05, "loss": 0.5069, "step": 3793 }, { "epoch": 1.061852784774699, "grad_norm": 0.2143074131367963, "learning_rate": 7.681165459520666e-05, "loss": 0.4915, "step": 3794 }, { "epoch": 1.0621326616288833, "grad_norm": 0.22081785671530887, "learning_rate": 7.679863297211195e-05, "loss": 0.5092, "step": 3795 }, { "epoch": 1.0624125384830674, "grad_norm": 0.20953164470217545, "learning_rate": 7.678560879830832e-05, "loss": 0.5172, "step": 3796 }, { "epoch": 1.0626924153372517, "grad_norm": 0.23091093486011183, "learning_rate": 7.677258207503547e-05, "loss": 0.505, "step": 3797 }, { "epoch": 1.0629722921914357, "grad_norm": 0.23316138255867908, "learning_rate": 7.675955280353328e-05, "loss": 0.5305, "step": 3798 }, { "epoch": 1.06325216904562, "grad_norm": 0.22396546236292414, "learning_rate": 7.674652098504186e-05, "loss": 0.495, "step": 3799 }, { "epoch": 1.063532045899804, "grad_norm": 0.2245788406883771, "learning_rate": 7.673348662080161e-05, "loss": 0.5167, "step": 3800 }, { "epoch": 1.0638119227539882, "grad_norm": 0.2205397897839526, "learning_rate": 7.672044971205314e-05, "loss": 0.507, "step": 3801 }, { "epoch": 1.0640917996081725, "grad_norm": 0.2227383971878762, "learning_rate": 7.670741026003731e-05, "loss": 0.5235, "step": 3802 }, { "epoch": 1.0643716764623565, "grad_norm": 0.22539336501337817, "learning_rate": 7.669436826599522e-05, "loss": 0.5347, "step": 3803 }, { "epoch": 1.0646515533165408, "grad_norm": 0.22719825863243975, "learning_rate": 7.668132373116822e-05, "loss": 0.5049, "step": 3804 }, { "epoch": 1.064931430170725, "grad_norm": 0.2214197606529923, "learning_rate": 7.66682766567979e-05, "loss": 0.4873, "step": 3805 }, { "epoch": 1.065211307024909, "grad_norm": 0.21954336514362585, "learning_rate": 7.665522704412607e-05, "loss": 0.4912, "step": 3806 }, { "epoch": 1.0654911838790933, "grad_norm": 0.22102130479639134, "learning_rate": 7.66421748943948e-05, "loss": 0.5294, "step": 3807 }, { "epoch": 1.0657710607332773, "grad_norm": 0.21446008680300668, "learning_rate": 7.662912020884643e-05, "loss": 0.5019, "step": 3808 }, { "epoch": 1.0660509375874616, "grad_norm": 0.2206328164969449, "learning_rate": 7.661606298872349e-05, "loss": 0.5047, "step": 3809 }, { "epoch": 1.0663308144416457, "grad_norm": 0.21756584651981553, "learning_rate": 7.660300323526878e-05, "loss": 0.4964, "step": 3810 }, { "epoch": 1.0666106912958297, "grad_norm": 0.22430413786534004, "learning_rate": 7.658994094972533e-05, "loss": 0.5041, "step": 3811 }, { "epoch": 1.066890568150014, "grad_norm": 0.21934530109163514, "learning_rate": 7.657687613333642e-05, "loss": 0.526, "step": 3812 }, { "epoch": 1.067170445004198, "grad_norm": 0.22283384020942965, "learning_rate": 7.656380878734555e-05, "loss": 0.5078, "step": 3813 }, { "epoch": 1.0674503218583824, "grad_norm": 0.21966713305042523, "learning_rate": 7.65507389129965e-05, "loss": 0.4956, "step": 3814 }, { "epoch": 1.0677301987125665, "grad_norm": 0.2144153612144557, "learning_rate": 7.653766651153326e-05, "loss": 0.5026, "step": 3815 }, { "epoch": 1.0680100755667505, "grad_norm": 0.22565636445137668, "learning_rate": 7.652459158420007e-05, "loss": 0.4879, "step": 3816 }, { "epoch": 1.0682899524209348, "grad_norm": 0.22303918321379382, "learning_rate": 7.65115141322414e-05, "loss": 0.4946, "step": 3817 }, { "epoch": 1.068569829275119, "grad_norm": 0.23063572648720812, "learning_rate": 7.649843415690198e-05, "loss": 0.5176, "step": 3818 }, { "epoch": 1.0688497061293032, "grad_norm": 0.21981819761370128, "learning_rate": 7.648535165942677e-05, "loss": 0.4891, "step": 3819 }, { "epoch": 1.0691295829834873, "grad_norm": 0.24644240232076658, "learning_rate": 7.647226664106095e-05, "loss": 0.5078, "step": 3820 }, { "epoch": 1.0694094598376713, "grad_norm": 0.23528066256126917, "learning_rate": 7.645917910304998e-05, "loss": 0.5336, "step": 3821 }, { "epoch": 1.0696893366918556, "grad_norm": 0.2397239247242819, "learning_rate": 7.644608904663955e-05, "loss": 0.5175, "step": 3822 }, { "epoch": 1.0699692135460397, "grad_norm": 0.232170783854431, "learning_rate": 7.643299647307554e-05, "loss": 0.5035, "step": 3823 }, { "epoch": 1.070249090400224, "grad_norm": 0.21433696654298434, "learning_rate": 7.641990138360412e-05, "loss": 0.5099, "step": 3824 }, { "epoch": 1.070528967254408, "grad_norm": 0.21173823050745963, "learning_rate": 7.640680377947173e-05, "loss": 0.496, "step": 3825 }, { "epoch": 1.0708088441085921, "grad_norm": 0.2290236926495319, "learning_rate": 7.639370366192496e-05, "loss": 0.4894, "step": 3826 }, { "epoch": 1.0710887209627764, "grad_norm": 0.22753587593982447, "learning_rate": 7.638060103221072e-05, "loss": 0.4933, "step": 3827 }, { "epoch": 1.0713685978169605, "grad_norm": 0.2258659794897729, "learning_rate": 7.636749589157608e-05, "loss": 0.5112, "step": 3828 }, { "epoch": 1.0716484746711448, "grad_norm": 0.23164992793209913, "learning_rate": 7.635438824126843e-05, "loss": 0.5043, "step": 3829 }, { "epoch": 1.0719283515253288, "grad_norm": 0.21650578338652135, "learning_rate": 7.634127808253537e-05, "loss": 0.5256, "step": 3830 }, { "epoch": 1.072208228379513, "grad_norm": 0.22089266761764356, "learning_rate": 7.632816541662471e-05, "loss": 0.5065, "step": 3831 }, { "epoch": 1.0724881052336972, "grad_norm": 0.21431430064386633, "learning_rate": 7.631505024478452e-05, "loss": 0.4938, "step": 3832 }, { "epoch": 1.0727679820878813, "grad_norm": 0.20856808669043467, "learning_rate": 7.630193256826313e-05, "loss": 0.4727, "step": 3833 }, { "epoch": 1.0730478589420656, "grad_norm": 0.22296633001784025, "learning_rate": 7.628881238830907e-05, "loss": 0.524, "step": 3834 }, { "epoch": 1.0733277357962496, "grad_norm": 0.2240612228926075, "learning_rate": 7.627568970617113e-05, "loss": 0.4838, "step": 3835 }, { "epoch": 1.073607612650434, "grad_norm": 0.21351479114079525, "learning_rate": 7.626256452309836e-05, "loss": 0.4966, "step": 3836 }, { "epoch": 1.073887489504618, "grad_norm": 0.22141062969527397, "learning_rate": 7.624943684033998e-05, "loss": 0.4952, "step": 3837 }, { "epoch": 1.074167366358802, "grad_norm": 0.22088480272505118, "learning_rate": 7.623630665914551e-05, "loss": 0.4856, "step": 3838 }, { "epoch": 1.0744472432129863, "grad_norm": 0.22057886913820374, "learning_rate": 7.622317398076468e-05, "loss": 0.5046, "step": 3839 }, { "epoch": 1.0747271200671704, "grad_norm": 0.22254489674410552, "learning_rate": 7.621003880644748e-05, "loss": 0.5045, "step": 3840 }, { "epoch": 1.0750069969213547, "grad_norm": 0.22828366945400475, "learning_rate": 7.619690113744412e-05, "loss": 0.4912, "step": 3841 }, { "epoch": 1.0752868737755388, "grad_norm": 0.2373877195829569, "learning_rate": 7.618376097500504e-05, "loss": 0.4948, "step": 3842 }, { "epoch": 1.0755667506297228, "grad_norm": 0.21642635871544502, "learning_rate": 7.617061832038095e-05, "loss": 0.5043, "step": 3843 }, { "epoch": 1.0758466274839071, "grad_norm": 0.20746003075145658, "learning_rate": 7.615747317482274e-05, "loss": 0.4975, "step": 3844 }, { "epoch": 1.0761265043380912, "grad_norm": 0.22922300715140198, "learning_rate": 7.61443255395816e-05, "loss": 0.5002, "step": 3845 }, { "epoch": 1.0764063811922755, "grad_norm": 0.22262266059975339, "learning_rate": 7.613117541590892e-05, "loss": 0.47, "step": 3846 }, { "epoch": 1.0766862580464596, "grad_norm": 0.21922009104427154, "learning_rate": 7.611802280505634e-05, "loss": 0.4931, "step": 3847 }, { "epoch": 1.0769661349006436, "grad_norm": 0.24920565933246105, "learning_rate": 7.610486770827573e-05, "loss": 0.5226, "step": 3848 }, { "epoch": 1.077246011754828, "grad_norm": 0.22925813151376365, "learning_rate": 7.609171012681919e-05, "loss": 0.5073, "step": 3849 }, { "epoch": 1.077525888609012, "grad_norm": 0.22185591528917906, "learning_rate": 7.607855006193908e-05, "loss": 0.5166, "step": 3850 }, { "epoch": 1.0778057654631963, "grad_norm": 0.23176119898527425, "learning_rate": 7.6065387514888e-05, "loss": 0.5118, "step": 3851 }, { "epoch": 1.0780856423173804, "grad_norm": 0.230844068591777, "learning_rate": 7.605222248691872e-05, "loss": 0.4976, "step": 3852 }, { "epoch": 1.0783655191715644, "grad_norm": 0.22967617877081778, "learning_rate": 7.603905497928434e-05, "loss": 0.5118, "step": 3853 }, { "epoch": 1.0786453960257487, "grad_norm": 0.22235229326238526, "learning_rate": 7.602588499323812e-05, "loss": 0.5327, "step": 3854 }, { "epoch": 1.0789252728799328, "grad_norm": 0.2222409391099783, "learning_rate": 7.601271253003361e-05, "loss": 0.4979, "step": 3855 }, { "epoch": 1.079205149734117, "grad_norm": 0.23195003249182133, "learning_rate": 7.599953759092455e-05, "loss": 0.4961, "step": 3856 }, { "epoch": 1.0794850265883011, "grad_norm": 0.22716270571121247, "learning_rate": 7.598636017716496e-05, "loss": 0.4911, "step": 3857 }, { "epoch": 1.0797649034424852, "grad_norm": 0.21778475956550405, "learning_rate": 7.597318029000906e-05, "loss": 0.4969, "step": 3858 }, { "epoch": 1.0800447802966695, "grad_norm": 0.22821702121996498, "learning_rate": 7.595999793071131e-05, "loss": 0.506, "step": 3859 }, { "epoch": 1.0803246571508536, "grad_norm": 0.23175362531939797, "learning_rate": 7.594681310052645e-05, "loss": 0.484, "step": 3860 }, { "epoch": 1.0806045340050379, "grad_norm": 0.22531728623967945, "learning_rate": 7.593362580070937e-05, "loss": 0.5081, "step": 3861 }, { "epoch": 1.080884410859222, "grad_norm": 0.21878111139174958, "learning_rate": 7.592043603251529e-05, "loss": 0.4851, "step": 3862 }, { "epoch": 1.081164287713406, "grad_norm": 0.21420800413672003, "learning_rate": 7.59072437971996e-05, "loss": 0.4936, "step": 3863 }, { "epoch": 1.0814441645675903, "grad_norm": 0.21895571432038985, "learning_rate": 7.589404909601793e-05, "loss": 0.518, "step": 3864 }, { "epoch": 1.0817240414217744, "grad_norm": 0.22844836395438742, "learning_rate": 7.588085193022618e-05, "loss": 0.4873, "step": 3865 }, { "epoch": 1.0820039182759587, "grad_norm": 0.2459045119791399, "learning_rate": 7.586765230108046e-05, "loss": 0.5021, "step": 3866 }, { "epoch": 1.0822837951301427, "grad_norm": 0.23175907115140723, "learning_rate": 7.585445020983711e-05, "loss": 0.4952, "step": 3867 }, { "epoch": 1.0825636719843268, "grad_norm": 0.23330686736631892, "learning_rate": 7.584124565775272e-05, "loss": 0.4911, "step": 3868 }, { "epoch": 1.082843548838511, "grad_norm": 0.22702907790495536, "learning_rate": 7.582803864608411e-05, "loss": 0.5375, "step": 3869 }, { "epoch": 1.0831234256926952, "grad_norm": 0.22027379206298642, "learning_rate": 7.581482917608832e-05, "loss": 0.5259, "step": 3870 }, { "epoch": 1.0834033025468794, "grad_norm": 0.22687339401866688, "learning_rate": 7.580161724902263e-05, "loss": 0.5174, "step": 3871 }, { "epoch": 1.0836831794010635, "grad_norm": 0.22648503128740832, "learning_rate": 7.578840286614459e-05, "loss": 0.5211, "step": 3872 }, { "epoch": 1.0839630562552478, "grad_norm": 0.22312352399527574, "learning_rate": 7.577518602871192e-05, "loss": 0.4809, "step": 3873 }, { "epoch": 1.0842429331094319, "grad_norm": 0.22815285561006848, "learning_rate": 7.576196673798262e-05, "loss": 0.5314, "step": 3874 }, { "epoch": 1.084522809963616, "grad_norm": 0.20861969161496066, "learning_rate": 7.57487449952149e-05, "loss": 0.4906, "step": 3875 }, { "epoch": 1.0848026868178002, "grad_norm": 0.22163694118804153, "learning_rate": 7.573552080166722e-05, "loss": 0.5199, "step": 3876 }, { "epoch": 1.0850825636719843, "grad_norm": 0.2249256201028353, "learning_rate": 7.572229415859827e-05, "loss": 0.5099, "step": 3877 }, { "epoch": 1.0853624405261684, "grad_norm": 0.2378261645205978, "learning_rate": 7.570906506726697e-05, "loss": 0.5399, "step": 3878 }, { "epoch": 1.0856423173803527, "grad_norm": 0.21914759134732284, "learning_rate": 7.569583352893245e-05, "loss": 0.5009, "step": 3879 }, { "epoch": 1.0859221942345367, "grad_norm": 0.22379024983302978, "learning_rate": 7.568259954485411e-05, "loss": 0.4887, "step": 3880 }, { "epoch": 1.086202071088721, "grad_norm": 0.22892909741472098, "learning_rate": 7.566936311629158e-05, "loss": 0.5098, "step": 3881 }, { "epoch": 1.086481947942905, "grad_norm": 0.2201131590342588, "learning_rate": 7.565612424450471e-05, "loss": 0.5016, "step": 3882 }, { "epoch": 1.0867618247970894, "grad_norm": 0.22667326283362565, "learning_rate": 7.564288293075357e-05, "loss": 0.4997, "step": 3883 }, { "epoch": 1.0870417016512735, "grad_norm": 0.22168560884901403, "learning_rate": 7.562963917629847e-05, "loss": 0.4901, "step": 3884 }, { "epoch": 1.0873215785054575, "grad_norm": 0.22631729313244486, "learning_rate": 7.561639298239997e-05, "loss": 0.4758, "step": 3885 }, { "epoch": 1.0876014553596418, "grad_norm": 0.2338524440250818, "learning_rate": 7.560314435031885e-05, "loss": 0.5054, "step": 3886 }, { "epoch": 1.0878813322138259, "grad_norm": 0.22119235290818326, "learning_rate": 7.558989328131613e-05, "loss": 0.474, "step": 3887 }, { "epoch": 1.0881612090680102, "grad_norm": 0.22800219538548638, "learning_rate": 7.557663977665304e-05, "loss": 0.5154, "step": 3888 }, { "epoch": 1.0884410859221942, "grad_norm": 0.2146893923232453, "learning_rate": 7.556338383759105e-05, "loss": 0.4989, "step": 3889 }, { "epoch": 1.0887209627763783, "grad_norm": 0.21957207199801138, "learning_rate": 7.555012546539188e-05, "loss": 0.4885, "step": 3890 }, { "epoch": 1.0890008396305626, "grad_norm": 0.2283523860823396, "learning_rate": 7.553686466131747e-05, "loss": 0.512, "step": 3891 }, { "epoch": 1.0892807164847467, "grad_norm": 0.23672055375470924, "learning_rate": 7.552360142662999e-05, "loss": 0.5125, "step": 3892 }, { "epoch": 1.089560593338931, "grad_norm": 0.22939385152277814, "learning_rate": 7.551033576259183e-05, "loss": 0.5431, "step": 3893 }, { "epoch": 1.089840470193115, "grad_norm": 0.2285399202759489, "learning_rate": 7.549706767046565e-05, "loss": 0.4925, "step": 3894 }, { "epoch": 1.090120347047299, "grad_norm": 0.22353177443499367, "learning_rate": 7.548379715151428e-05, "loss": 0.4951, "step": 3895 }, { "epoch": 1.0904002239014834, "grad_norm": 0.22202912430917907, "learning_rate": 7.547052420700085e-05, "loss": 0.4806, "step": 3896 }, { "epoch": 1.0906801007556675, "grad_norm": 0.21661626552023552, "learning_rate": 7.545724883818865e-05, "loss": 0.5037, "step": 3897 }, { "epoch": 1.0909599776098517, "grad_norm": 0.22988693042275354, "learning_rate": 7.544397104634128e-05, "loss": 0.504, "step": 3898 }, { "epoch": 1.0912398544640358, "grad_norm": 0.22080517506414357, "learning_rate": 7.543069083272249e-05, "loss": 0.5186, "step": 3899 }, { "epoch": 1.0915197313182199, "grad_norm": 0.22531274355080877, "learning_rate": 7.541740819859632e-05, "loss": 0.5107, "step": 3900 }, { "epoch": 1.0917996081724042, "grad_norm": 0.21995059903334982, "learning_rate": 7.5404123145227e-05, "loss": 0.5162, "step": 3901 }, { "epoch": 1.0920794850265882, "grad_norm": 0.2237659393370415, "learning_rate": 7.539083567387904e-05, "loss": 0.5005, "step": 3902 }, { "epoch": 1.0923593618807725, "grad_norm": 0.22888919607289115, "learning_rate": 7.53775457858171e-05, "loss": 0.502, "step": 3903 }, { "epoch": 1.0926392387349566, "grad_norm": 0.22955372167578642, "learning_rate": 7.536425348230617e-05, "loss": 0.4971, "step": 3904 }, { "epoch": 1.0929191155891407, "grad_norm": 0.22301297637817336, "learning_rate": 7.535095876461138e-05, "loss": 0.5007, "step": 3905 }, { "epoch": 1.093198992443325, "grad_norm": 0.23809298142797114, "learning_rate": 7.533766163399816e-05, "loss": 0.5244, "step": 3906 }, { "epoch": 1.093478869297509, "grad_norm": 0.2190281078249623, "learning_rate": 7.532436209173213e-05, "loss": 0.492, "step": 3907 }, { "epoch": 1.0937587461516933, "grad_norm": 0.2267505963590063, "learning_rate": 7.531106013907911e-05, "loss": 0.4972, "step": 3908 }, { "epoch": 1.0940386230058774, "grad_norm": 0.23097391565857056, "learning_rate": 7.529775577730525e-05, "loss": 0.5227, "step": 3909 }, { "epoch": 1.0943184998600617, "grad_norm": 0.22143711436518954, "learning_rate": 7.528444900767682e-05, "loss": 0.4953, "step": 3910 }, { "epoch": 1.0945983767142458, "grad_norm": 0.2144417384807327, "learning_rate": 7.527113983146038e-05, "loss": 0.5174, "step": 3911 }, { "epoch": 1.0948782535684298, "grad_norm": 0.22592666799398026, "learning_rate": 7.525782824992271e-05, "loss": 0.488, "step": 3912 }, { "epoch": 1.0951581304226141, "grad_norm": 0.2494883368003387, "learning_rate": 7.52445142643308e-05, "loss": 0.5015, "step": 3913 }, { "epoch": 1.0954380072767982, "grad_norm": 0.22054061266688652, "learning_rate": 7.52311978759519e-05, "loss": 0.49, "step": 3914 }, { "epoch": 1.0957178841309823, "grad_norm": 0.23958747852787587, "learning_rate": 7.521787908605349e-05, "loss": 0.5218, "step": 3915 }, { "epoch": 1.0959977609851665, "grad_norm": 0.2157341944758859, "learning_rate": 7.520455789590319e-05, "loss": 0.5011, "step": 3916 }, { "epoch": 1.0962776378393506, "grad_norm": 0.22119871039538472, "learning_rate": 7.519123430676899e-05, "loss": 0.5135, "step": 3917 }, { "epoch": 1.096557514693535, "grad_norm": 0.23261646419392415, "learning_rate": 7.5177908319919e-05, "loss": 0.5237, "step": 3918 }, { "epoch": 1.096837391547719, "grad_norm": 0.21912662190164525, "learning_rate": 7.516457993662161e-05, "loss": 0.4969, "step": 3919 }, { "epoch": 1.0971172684019033, "grad_norm": 0.21119565994755876, "learning_rate": 7.51512491581454e-05, "loss": 0.4891, "step": 3920 }, { "epoch": 1.0973971452560873, "grad_norm": 0.21202423716618202, "learning_rate": 7.513791598575923e-05, "loss": 0.4862, "step": 3921 }, { "epoch": 1.0976770221102714, "grad_norm": 0.22509411180443678, "learning_rate": 7.512458042073214e-05, "loss": 0.4971, "step": 3922 }, { "epoch": 1.0979568989644557, "grad_norm": 0.21635296175845792, "learning_rate": 7.511124246433342e-05, "loss": 0.5044, "step": 3923 }, { "epoch": 1.0982367758186398, "grad_norm": 0.21496274544460234, "learning_rate": 7.509790211783261e-05, "loss": 0.4733, "step": 3924 }, { "epoch": 1.098516652672824, "grad_norm": 0.22632612773336558, "learning_rate": 7.508455938249942e-05, "loss": 0.5074, "step": 3925 }, { "epoch": 1.0987965295270081, "grad_norm": 0.227020819243741, "learning_rate": 7.50712142596038e-05, "loss": 0.5057, "step": 3926 }, { "epoch": 1.0990764063811922, "grad_norm": 0.2231419819317831, "learning_rate": 7.5057866750416e-05, "loss": 0.5048, "step": 3927 }, { "epoch": 1.0993562832353765, "grad_norm": 0.22439510327942658, "learning_rate": 7.50445168562064e-05, "loss": 0.4935, "step": 3928 }, { "epoch": 1.0996361600895606, "grad_norm": 0.23708832947391364, "learning_rate": 7.503116457824568e-05, "loss": 0.4896, "step": 3929 }, { "epoch": 1.0999160369437448, "grad_norm": 0.22151595670376018, "learning_rate": 7.50178099178047e-05, "loss": 0.5061, "step": 3930 }, { "epoch": 1.100195913797929, "grad_norm": 0.5290360487146467, "learning_rate": 7.500445287615456e-05, "loss": 0.5242, "step": 3931 }, { "epoch": 1.100475790652113, "grad_norm": 0.21946455042509816, "learning_rate": 7.499109345456662e-05, "loss": 0.493, "step": 3932 }, { "epoch": 1.1007556675062973, "grad_norm": 0.24430199441378006, "learning_rate": 7.497773165431241e-05, "loss": 0.5281, "step": 3933 }, { "epoch": 1.1010355443604813, "grad_norm": 0.2158165078305361, "learning_rate": 7.496436747666372e-05, "loss": 0.5065, "step": 3934 }, { "epoch": 1.1013154212146656, "grad_norm": 0.2170809312137706, "learning_rate": 7.495100092289256e-05, "loss": 0.5104, "step": 3935 }, { "epoch": 1.1015952980688497, "grad_norm": 0.2157092122895801, "learning_rate": 7.493763199427117e-05, "loss": 0.4946, "step": 3936 }, { "epoch": 1.1018751749230338, "grad_norm": 0.22601878455235838, "learning_rate": 7.4924260692072e-05, "loss": 0.5061, "step": 3937 }, { "epoch": 1.102155051777218, "grad_norm": 0.2200046203941348, "learning_rate": 7.491088701756775e-05, "loss": 0.5304, "step": 3938 }, { "epoch": 1.1024349286314021, "grad_norm": 0.22308895038824725, "learning_rate": 7.489751097203133e-05, "loss": 0.504, "step": 3939 }, { "epoch": 1.1027148054855864, "grad_norm": 0.22743067215840185, "learning_rate": 7.488413255673588e-05, "loss": 0.4983, "step": 3940 }, { "epoch": 1.1029946823397705, "grad_norm": 0.22978696573372845, "learning_rate": 7.487075177295477e-05, "loss": 0.5119, "step": 3941 }, { "epoch": 1.1032745591939546, "grad_norm": 0.21442187635270676, "learning_rate": 7.485736862196157e-05, "loss": 0.493, "step": 3942 }, { "epoch": 1.1035544360481389, "grad_norm": 0.2391998819954397, "learning_rate": 7.484398310503014e-05, "loss": 0.4936, "step": 3943 }, { "epoch": 1.103834312902323, "grad_norm": 0.22774254590835027, "learning_rate": 7.483059522343448e-05, "loss": 0.5015, "step": 3944 }, { "epoch": 1.1041141897565072, "grad_norm": 0.22042592966709393, "learning_rate": 7.481720497844885e-05, "loss": 0.4996, "step": 3945 }, { "epoch": 1.1043940666106913, "grad_norm": 0.5397382889686548, "learning_rate": 7.480381237134777e-05, "loss": 0.4879, "step": 3946 }, { "epoch": 1.1046739434648754, "grad_norm": 0.22660537818506746, "learning_rate": 7.479041740340595e-05, "loss": 0.5128, "step": 3947 }, { "epoch": 1.1049538203190596, "grad_norm": 0.2259600732655642, "learning_rate": 7.477702007589832e-05, "loss": 0.4873, "step": 3948 }, { "epoch": 1.1052336971732437, "grad_norm": 0.22081623754235322, "learning_rate": 7.476362039010005e-05, "loss": 0.5058, "step": 3949 }, { "epoch": 1.105513574027428, "grad_norm": 0.23403480837563395, "learning_rate": 7.475021834728654e-05, "loss": 0.4883, "step": 3950 }, { "epoch": 1.105793450881612, "grad_norm": 0.23872258169546243, "learning_rate": 7.47368139487334e-05, "loss": 0.5118, "step": 3951 }, { "epoch": 1.1060733277357961, "grad_norm": 0.23105921460824574, "learning_rate": 7.472340719571645e-05, "loss": 0.5104, "step": 3952 }, { "epoch": 1.1063532045899804, "grad_norm": 0.2245049549252369, "learning_rate": 7.470999808951176e-05, "loss": 0.4996, "step": 3953 }, { "epoch": 1.1066330814441645, "grad_norm": 0.2373984025689569, "learning_rate": 7.469658663139563e-05, "loss": 0.514, "step": 3954 }, { "epoch": 1.1069129582983488, "grad_norm": 0.21771894540135592, "learning_rate": 7.468317282264456e-05, "loss": 0.4899, "step": 3955 }, { "epoch": 1.1071928351525329, "grad_norm": 0.21674073993322088, "learning_rate": 7.466975666453528e-05, "loss": 0.5294, "step": 3956 }, { "epoch": 1.1074727120067172, "grad_norm": 0.22217151016513056, "learning_rate": 7.465633815834473e-05, "loss": 0.5052, "step": 3957 }, { "epoch": 1.1077525888609012, "grad_norm": 0.229801954450481, "learning_rate": 7.464291730535016e-05, "loss": 0.5037, "step": 3958 }, { "epoch": 1.1080324657150853, "grad_norm": 0.22920707926710376, "learning_rate": 7.46294941068289e-05, "loss": 0.4939, "step": 3959 }, { "epoch": 1.1083123425692696, "grad_norm": 0.21907021312456276, "learning_rate": 7.461606856405862e-05, "loss": 0.515, "step": 3960 }, { "epoch": 1.1085922194234537, "grad_norm": 0.23781660925955275, "learning_rate": 7.460264067831715e-05, "loss": 0.5023, "step": 3961 }, { "epoch": 1.108872096277638, "grad_norm": 0.20439890808463512, "learning_rate": 7.458921045088258e-05, "loss": 0.5104, "step": 3962 }, { "epoch": 1.109151973131822, "grad_norm": 0.2240262499431877, "learning_rate": 7.457577788303318e-05, "loss": 0.5143, "step": 3963 }, { "epoch": 1.109431849986006, "grad_norm": 0.23717555660734368, "learning_rate": 7.456234297604749e-05, "loss": 0.5095, "step": 3964 }, { "epoch": 1.1097117268401904, "grad_norm": 0.2242753396687637, "learning_rate": 7.454890573120424e-05, "loss": 0.518, "step": 3965 }, { "epoch": 1.1099916036943744, "grad_norm": 0.237628458375431, "learning_rate": 7.453546614978239e-05, "loss": 0.5184, "step": 3966 }, { "epoch": 1.1102714805485587, "grad_norm": 0.2232154358193692, "learning_rate": 7.452202423306116e-05, "loss": 0.5263, "step": 3967 }, { "epoch": 1.1105513574027428, "grad_norm": 0.21611304074653276, "learning_rate": 7.450857998231995e-05, "loss": 0.5069, "step": 3968 }, { "epoch": 1.1108312342569269, "grad_norm": 0.22557895807258563, "learning_rate": 7.449513339883836e-05, "loss": 0.5014, "step": 3969 }, { "epoch": 1.1111111111111112, "grad_norm": 0.205553625116001, "learning_rate": 7.448168448389627e-05, "loss": 0.4678, "step": 3970 }, { "epoch": 1.1113909879652952, "grad_norm": 0.22628077462935112, "learning_rate": 7.446823323877375e-05, "loss": 0.533, "step": 3971 }, { "epoch": 1.1116708648194795, "grad_norm": 0.22317889780015895, "learning_rate": 7.445477966475108e-05, "loss": 0.4728, "step": 3972 }, { "epoch": 1.1119507416736636, "grad_norm": 0.2235224351114337, "learning_rate": 7.444132376310881e-05, "loss": 0.505, "step": 3973 }, { "epoch": 1.1122306185278477, "grad_norm": 0.21922304196356124, "learning_rate": 7.442786553512764e-05, "loss": 0.4965, "step": 3974 }, { "epoch": 1.112510495382032, "grad_norm": 0.2244625761338939, "learning_rate": 7.441440498208858e-05, "loss": 0.4955, "step": 3975 }, { "epoch": 1.112790372236216, "grad_norm": 0.2277128080284073, "learning_rate": 7.440094210527277e-05, "loss": 0.5432, "step": 3976 }, { "epoch": 1.1130702490904003, "grad_norm": 0.22637258287783846, "learning_rate": 7.438747690596165e-05, "loss": 0.5216, "step": 3977 }, { "epoch": 1.1133501259445844, "grad_norm": 0.2286180614189198, "learning_rate": 7.437400938543682e-05, "loss": 0.4783, "step": 3978 }, { "epoch": 1.1136300027987684, "grad_norm": 0.23422347281845565, "learning_rate": 7.436053954498015e-05, "loss": 0.5181, "step": 3979 }, { "epoch": 1.1139098796529527, "grad_norm": 0.221375051592428, "learning_rate": 7.434706738587368e-05, "loss": 0.5028, "step": 3980 }, { "epoch": 1.1141897565071368, "grad_norm": 0.23079758129720093, "learning_rate": 7.43335929093997e-05, "loss": 0.4971, "step": 3981 }, { "epoch": 1.114469633361321, "grad_norm": 0.22848630298020803, "learning_rate": 7.432011611684073e-05, "loss": 0.5079, "step": 3982 }, { "epoch": 1.1147495102155052, "grad_norm": 0.22222877729702842, "learning_rate": 7.430663700947948e-05, "loss": 0.4861, "step": 3983 }, { "epoch": 1.1150293870696892, "grad_norm": 0.22471377154495475, "learning_rate": 7.429315558859895e-05, "loss": 0.4925, "step": 3984 }, { "epoch": 1.1153092639238735, "grad_norm": 0.2243152332371436, "learning_rate": 7.427967185548224e-05, "loss": 0.4953, "step": 3985 }, { "epoch": 1.1155891407780576, "grad_norm": 0.2489701283125392, "learning_rate": 7.426618581141279e-05, "loss": 0.5047, "step": 3986 }, { "epoch": 1.1158690176322419, "grad_norm": 0.22901357998009123, "learning_rate": 7.42526974576742e-05, "loss": 0.5202, "step": 3987 }, { "epoch": 1.116148894486426, "grad_norm": 0.229652633254522, "learning_rate": 7.423920679555028e-05, "loss": 0.5333, "step": 3988 }, { "epoch": 1.11642877134061, "grad_norm": 0.2264897650671679, "learning_rate": 7.422571382632509e-05, "loss": 0.5195, "step": 3989 }, { "epoch": 1.1167086481947943, "grad_norm": 0.22776081041661542, "learning_rate": 7.42122185512829e-05, "loss": 0.5038, "step": 3990 }, { "epoch": 1.1169885250489784, "grad_norm": 0.22137987701782322, "learning_rate": 7.41987209717082e-05, "loss": 0.4995, "step": 3991 }, { "epoch": 1.1172684019031627, "grad_norm": 0.2411846928455049, "learning_rate": 7.418522108888568e-05, "loss": 0.5231, "step": 3992 }, { "epoch": 1.1175482787573467, "grad_norm": 0.21324438743416152, "learning_rate": 7.417171890410029e-05, "loss": 0.4775, "step": 3993 }, { "epoch": 1.117828155611531, "grad_norm": 0.2268611907829221, "learning_rate": 7.415821441863716e-05, "loss": 0.5278, "step": 3994 }, { "epoch": 1.118108032465715, "grad_norm": 0.22666263789361527, "learning_rate": 7.414470763378166e-05, "loss": 0.5116, "step": 3995 }, { "epoch": 1.1183879093198992, "grad_norm": 0.22154158502340937, "learning_rate": 7.413119855081938e-05, "loss": 0.4956, "step": 3996 }, { "epoch": 1.1186677861740835, "grad_norm": 0.26600970427729226, "learning_rate": 7.411768717103612e-05, "loss": 0.4974, "step": 3997 }, { "epoch": 1.1189476630282675, "grad_norm": 0.2308800817397494, "learning_rate": 7.41041734957179e-05, "loss": 0.5007, "step": 3998 }, { "epoch": 1.1192275398824516, "grad_norm": 0.22563471246040492, "learning_rate": 7.409065752615094e-05, "loss": 0.4882, "step": 3999 }, { "epoch": 1.119507416736636, "grad_norm": 0.2249642920007204, "learning_rate": 7.40771392636217e-05, "loss": 0.5041, "step": 4000 }, { "epoch": 1.11978729359082, "grad_norm": 0.2219281303629957, "learning_rate": 7.406361870941688e-05, "loss": 0.4986, "step": 4001 }, { "epoch": 1.1200671704450043, "grad_norm": 0.2352913759985641, "learning_rate": 7.405009586482336e-05, "loss": 0.5098, "step": 4002 }, { "epoch": 1.1203470472991883, "grad_norm": 0.22423843025035858, "learning_rate": 7.403657073112826e-05, "loss": 0.4886, "step": 4003 }, { "epoch": 1.1206269241533726, "grad_norm": 0.24015195876825654, "learning_rate": 7.402304330961892e-05, "loss": 0.5146, "step": 4004 }, { "epoch": 1.1209068010075567, "grad_norm": 0.22195974971523932, "learning_rate": 7.400951360158284e-05, "loss": 0.4792, "step": 4005 }, { "epoch": 1.1211866778617408, "grad_norm": 0.2124027004858279, "learning_rate": 7.399598160830785e-05, "loss": 0.4949, "step": 4006 }, { "epoch": 1.121466554715925, "grad_norm": 0.22096425979108092, "learning_rate": 7.398244733108188e-05, "loss": 0.5049, "step": 4007 }, { "epoch": 1.1217464315701091, "grad_norm": 0.23352774774966856, "learning_rate": 7.396891077119314e-05, "loss": 0.5336, "step": 4008 }, { "epoch": 1.1220263084242934, "grad_norm": 0.22693250166211001, "learning_rate": 7.395537192993006e-05, "loss": 0.5384, "step": 4009 }, { "epoch": 1.1223061852784775, "grad_norm": 0.22410185812759292, "learning_rate": 7.394183080858128e-05, "loss": 0.4995, "step": 4010 }, { "epoch": 1.1225860621326615, "grad_norm": 0.2159306200814348, "learning_rate": 7.392828740843565e-05, "loss": 0.5065, "step": 4011 }, { "epoch": 1.1228659389868458, "grad_norm": 0.2212926685164199, "learning_rate": 7.391474173078222e-05, "loss": 0.4915, "step": 4012 }, { "epoch": 1.12314581584103, "grad_norm": 0.21902616955118648, "learning_rate": 7.39011937769103e-05, "loss": 0.491, "step": 4013 }, { "epoch": 1.1234256926952142, "grad_norm": 0.21190653907480703, "learning_rate": 7.388764354810935e-05, "loss": 0.4762, "step": 4014 }, { "epoch": 1.1237055695493983, "grad_norm": 0.21731547278169128, "learning_rate": 7.387409104566915e-05, "loss": 0.4676, "step": 4015 }, { "epoch": 1.1239854464035823, "grad_norm": 0.22096594136238323, "learning_rate": 7.386053627087959e-05, "loss": 0.5045, "step": 4016 }, { "epoch": 1.1242653232577666, "grad_norm": 0.223290153323746, "learning_rate": 7.384697922503081e-05, "loss": 0.5084, "step": 4017 }, { "epoch": 1.1245452001119507, "grad_norm": 0.21834334265777824, "learning_rate": 7.383341990941321e-05, "loss": 0.5055, "step": 4018 }, { "epoch": 1.124825076966135, "grad_norm": 0.23158304511179106, "learning_rate": 7.381985832531738e-05, "loss": 0.4942, "step": 4019 }, { "epoch": 1.125104953820319, "grad_norm": 0.2181884728536942, "learning_rate": 7.380629447403408e-05, "loss": 0.4916, "step": 4020 }, { "epoch": 1.1253848306745031, "grad_norm": 0.24216185502464424, "learning_rate": 7.379272835685436e-05, "loss": 0.5089, "step": 4021 }, { "epoch": 1.1256647075286874, "grad_norm": 0.22842620885262918, "learning_rate": 7.377915997506945e-05, "loss": 0.525, "step": 4022 }, { "epoch": 1.1259445843828715, "grad_norm": 0.2131697616716636, "learning_rate": 7.376558932997077e-05, "loss": 0.5101, "step": 4023 }, { "epoch": 1.1262244612370558, "grad_norm": 0.21574241056556848, "learning_rate": 7.375201642285e-05, "loss": 0.5233, "step": 4024 }, { "epoch": 1.1265043380912398, "grad_norm": 0.21973087229331548, "learning_rate": 7.373844125499902e-05, "loss": 0.5032, "step": 4025 }, { "epoch": 1.126784214945424, "grad_norm": 0.2316693277548038, "learning_rate": 7.372486382770988e-05, "loss": 0.4948, "step": 4026 }, { "epoch": 1.1270640917996082, "grad_norm": 0.21643730587161744, "learning_rate": 7.371128414227495e-05, "loss": 0.5045, "step": 4027 }, { "epoch": 1.1273439686537923, "grad_norm": 0.2253032763134499, "learning_rate": 7.369770219998671e-05, "loss": 0.5151, "step": 4028 }, { "epoch": 1.1276238455079766, "grad_norm": 0.21610329540544126, "learning_rate": 7.368411800213792e-05, "loss": 0.4973, "step": 4029 }, { "epoch": 1.1279037223621606, "grad_norm": 0.22582337560928176, "learning_rate": 7.367053155002153e-05, "loss": 0.5107, "step": 4030 }, { "epoch": 1.128183599216345, "grad_norm": 0.22260830561784425, "learning_rate": 7.365694284493067e-05, "loss": 0.4937, "step": 4031 }, { "epoch": 1.128463476070529, "grad_norm": 0.22579513338201246, "learning_rate": 7.364335188815879e-05, "loss": 0.5165, "step": 4032 }, { "epoch": 1.128743352924713, "grad_norm": 0.2196064816768498, "learning_rate": 7.362975868099942e-05, "loss": 0.5031, "step": 4033 }, { "epoch": 1.1290232297788974, "grad_norm": 0.22283353084824703, "learning_rate": 7.361616322474639e-05, "loss": 0.5189, "step": 4034 }, { "epoch": 1.1293031066330814, "grad_norm": 0.22830486174505005, "learning_rate": 7.360256552069373e-05, "loss": 0.5042, "step": 4035 }, { "epoch": 1.1295829834872655, "grad_norm": 0.22881786221022365, "learning_rate": 7.358896557013566e-05, "loss": 0.5124, "step": 4036 }, { "epoch": 1.1298628603414498, "grad_norm": 0.22450305533083373, "learning_rate": 7.357536337436666e-05, "loss": 0.5157, "step": 4037 }, { "epoch": 1.1301427371956339, "grad_norm": 0.2215223008891001, "learning_rate": 7.356175893468137e-05, "loss": 0.5167, "step": 4038 }, { "epoch": 1.1304226140498181, "grad_norm": 0.2210892939529426, "learning_rate": 7.354815225237468e-05, "loss": 0.4938, "step": 4039 }, { "epoch": 1.1307024909040022, "grad_norm": 0.2287015944620728, "learning_rate": 7.353454332874168e-05, "loss": 0.5051, "step": 4040 }, { "epoch": 1.1309823677581865, "grad_norm": 0.22765714439851215, "learning_rate": 7.352093216507767e-05, "loss": 0.4849, "step": 4041 }, { "epoch": 1.1312622446123706, "grad_norm": 0.23468073629206568, "learning_rate": 7.350731876267819e-05, "loss": 0.5182, "step": 4042 }, { "epoch": 1.1315421214665546, "grad_norm": 0.2278784184839403, "learning_rate": 7.349370312283892e-05, "loss": 0.4889, "step": 4043 }, { "epoch": 1.131821998320739, "grad_norm": 0.2302163249529019, "learning_rate": 7.348008524685586e-05, "loss": 0.5035, "step": 4044 }, { "epoch": 1.132101875174923, "grad_norm": 0.22271831637056483, "learning_rate": 7.346646513602513e-05, "loss": 0.4826, "step": 4045 }, { "epoch": 1.132381752029107, "grad_norm": 0.22812934226579695, "learning_rate": 7.345284279164312e-05, "loss": 0.4708, "step": 4046 }, { "epoch": 1.1326616288832914, "grad_norm": 0.22337073647858013, "learning_rate": 7.34392182150064e-05, "loss": 0.4843, "step": 4047 }, { "epoch": 1.1329415057374754, "grad_norm": 0.23428618132006931, "learning_rate": 7.342559140741178e-05, "loss": 0.5195, "step": 4048 }, { "epoch": 1.1332213825916597, "grad_norm": 0.21658888481202418, "learning_rate": 7.341196237015625e-05, "loss": 0.5165, "step": 4049 }, { "epoch": 1.1335012594458438, "grad_norm": 0.22357993130256928, "learning_rate": 7.339833110453705e-05, "loss": 0.4835, "step": 4050 }, { "epoch": 1.133781136300028, "grad_norm": 0.21458289644518913, "learning_rate": 7.338469761185159e-05, "loss": 0.4936, "step": 4051 }, { "epoch": 1.1340610131542121, "grad_norm": 0.22384757186563023, "learning_rate": 7.337106189339751e-05, "loss": 0.4866, "step": 4052 }, { "epoch": 1.1343408900083962, "grad_norm": 0.23985823753627802, "learning_rate": 7.335742395047269e-05, "loss": 0.5006, "step": 4053 }, { "epoch": 1.1346207668625805, "grad_norm": 0.22311980934561043, "learning_rate": 7.334378378437519e-05, "loss": 0.4997, "step": 4054 }, { "epoch": 1.1349006437167646, "grad_norm": 0.2164550009077947, "learning_rate": 7.333014139640327e-05, "loss": 0.5055, "step": 4055 }, { "epoch": 1.1351805205709489, "grad_norm": 0.22534961429285863, "learning_rate": 7.331649678785546e-05, "loss": 0.4853, "step": 4056 }, { "epoch": 1.135460397425133, "grad_norm": 0.23185781552040344, "learning_rate": 7.33028499600304e-05, "loss": 0.4937, "step": 4057 }, { "epoch": 1.135740274279317, "grad_norm": 0.2213397453761991, "learning_rate": 7.328920091422706e-05, "loss": 0.5128, "step": 4058 }, { "epoch": 1.1360201511335013, "grad_norm": 0.2208586969694549, "learning_rate": 7.327554965174454e-05, "loss": 0.5164, "step": 4059 }, { "epoch": 1.1363000279876854, "grad_norm": 0.23022256529255308, "learning_rate": 7.326189617388218e-05, "loss": 0.5255, "step": 4060 }, { "epoch": 1.1365799048418697, "grad_norm": 0.21733781380197187, "learning_rate": 7.32482404819395e-05, "loss": 0.498, "step": 4061 }, { "epoch": 1.1368597816960537, "grad_norm": 0.21849333909572238, "learning_rate": 7.32345825772163e-05, "loss": 0.5279, "step": 4062 }, { "epoch": 1.1371396585502378, "grad_norm": 0.22513243789664358, "learning_rate": 7.32209224610125e-05, "loss": 0.5093, "step": 4063 }, { "epoch": 1.137419535404422, "grad_norm": 0.2163399051652371, "learning_rate": 7.320726013462833e-05, "loss": 0.4955, "step": 4064 }, { "epoch": 1.1376994122586062, "grad_norm": 0.21823966611068824, "learning_rate": 7.319359559936414e-05, "loss": 0.4826, "step": 4065 }, { "epoch": 1.1379792891127904, "grad_norm": 0.22781418531552539, "learning_rate": 7.317992885652055e-05, "loss": 0.4866, "step": 4066 }, { "epoch": 1.1382591659669745, "grad_norm": 0.2273204215532043, "learning_rate": 7.316625990739833e-05, "loss": 0.4924, "step": 4067 }, { "epoch": 1.1385390428211588, "grad_norm": 0.22467194139826965, "learning_rate": 7.315258875329855e-05, "loss": 0.4875, "step": 4068 }, { "epoch": 1.1388189196753429, "grad_norm": 0.2892321884351586, "learning_rate": 7.313891539552241e-05, "loss": 0.5072, "step": 4069 }, { "epoch": 1.139098796529527, "grad_norm": 0.22352433560311688, "learning_rate": 7.312523983537135e-05, "loss": 0.4962, "step": 4070 }, { "epoch": 1.1393786733837112, "grad_norm": 0.23113575822496088, "learning_rate": 7.311156207414702e-05, "loss": 0.5091, "step": 4071 }, { "epoch": 1.1396585502378953, "grad_norm": 0.22610375100898752, "learning_rate": 7.309788211315126e-05, "loss": 0.5098, "step": 4072 }, { "epoch": 1.1399384270920794, "grad_norm": 0.22518720418436866, "learning_rate": 7.308419995368616e-05, "loss": 0.5146, "step": 4073 }, { "epoch": 1.1402183039462637, "grad_norm": 0.23821428509493398, "learning_rate": 7.307051559705399e-05, "loss": 0.5393, "step": 4074 }, { "epoch": 1.1404981808004477, "grad_norm": 0.22244560903461974, "learning_rate": 7.305682904455723e-05, "loss": 0.4794, "step": 4075 }, { "epoch": 1.140778057654632, "grad_norm": 0.21767576164824462, "learning_rate": 7.304314029749859e-05, "loss": 0.4936, "step": 4076 }, { "epoch": 1.141057934508816, "grad_norm": 0.21992091203741246, "learning_rate": 7.302944935718095e-05, "loss": 0.4904, "step": 4077 }, { "epoch": 1.1413378113630004, "grad_norm": 0.2188229441835584, "learning_rate": 7.301575622490742e-05, "loss": 0.4983, "step": 4078 }, { "epoch": 1.1416176882171845, "grad_norm": 0.22609618421285535, "learning_rate": 7.300206090198134e-05, "loss": 0.5382, "step": 4079 }, { "epoch": 1.1418975650713685, "grad_norm": 0.2218307408425978, "learning_rate": 7.298836338970622e-05, "loss": 0.482, "step": 4080 }, { "epoch": 1.1421774419255528, "grad_norm": 0.22681541463674998, "learning_rate": 7.297466368938581e-05, "loss": 0.5243, "step": 4081 }, { "epoch": 1.1424573187797369, "grad_norm": 0.22496500993361587, "learning_rate": 7.296096180232406e-05, "loss": 0.5002, "step": 4082 }, { "epoch": 1.142737195633921, "grad_norm": 0.23006353150031264, "learning_rate": 7.29472577298251e-05, "loss": 0.5081, "step": 4083 }, { "epoch": 1.1430170724881052, "grad_norm": 0.21456278073237026, "learning_rate": 7.293355147319331e-05, "loss": 0.4965, "step": 4084 }, { "epoch": 1.1432969493422893, "grad_norm": 0.2204601380611181, "learning_rate": 7.291984303373326e-05, "loss": 0.5226, "step": 4085 }, { "epoch": 1.1435768261964736, "grad_norm": 0.22113479429311303, "learning_rate": 7.290613241274972e-05, "loss": 0.5146, "step": 4086 }, { "epoch": 1.1438567030506577, "grad_norm": 0.22229384157109938, "learning_rate": 7.289241961154766e-05, "loss": 0.4957, "step": 4087 }, { "epoch": 1.144136579904842, "grad_norm": 0.2193742084806028, "learning_rate": 7.287870463143232e-05, "loss": 0.4763, "step": 4088 }, { "epoch": 1.144416456759026, "grad_norm": 0.22541904725437054, "learning_rate": 7.286498747370904e-05, "loss": 0.5091, "step": 4089 }, { "epoch": 1.14469633361321, "grad_norm": 0.22786417978835122, "learning_rate": 7.285126813968346e-05, "loss": 0.5159, "step": 4090 }, { "epoch": 1.1449762104673944, "grad_norm": 0.21919232932500993, "learning_rate": 7.28375466306614e-05, "loss": 0.5137, "step": 4091 }, { "epoch": 1.1452560873215785, "grad_norm": 0.22803902956656727, "learning_rate": 7.282382294794884e-05, "loss": 0.5113, "step": 4092 }, { "epoch": 1.1455359641757628, "grad_norm": 0.23313279194119976, "learning_rate": 7.281009709285207e-05, "loss": 0.5349, "step": 4093 }, { "epoch": 1.1458158410299468, "grad_norm": 0.235238941798849, "learning_rate": 7.279636906667747e-05, "loss": 0.5125, "step": 4094 }, { "epoch": 1.146095717884131, "grad_norm": 0.23183299771869897, "learning_rate": 7.278263887073172e-05, "loss": 0.5029, "step": 4095 }, { "epoch": 1.1463755947383152, "grad_norm": 0.22936431756748155, "learning_rate": 7.276890650632163e-05, "loss": 0.4942, "step": 4096 }, { "epoch": 1.1466554715924993, "grad_norm": 0.22496097541144508, "learning_rate": 7.275517197475429e-05, "loss": 0.519, "step": 4097 }, { "epoch": 1.1469353484466835, "grad_norm": 0.22325961646468948, "learning_rate": 7.274143527733695e-05, "loss": 0.4933, "step": 4098 }, { "epoch": 1.1472152253008676, "grad_norm": 0.22062466364741165, "learning_rate": 7.272769641537705e-05, "loss": 0.5147, "step": 4099 }, { "epoch": 1.1474951021550517, "grad_norm": 0.21330436108753204, "learning_rate": 7.27139553901823e-05, "loss": 0.5201, "step": 4100 }, { "epoch": 1.147774979009236, "grad_norm": 0.24501083098398252, "learning_rate": 7.270021220306056e-05, "loss": 0.4954, "step": 4101 }, { "epoch": 1.14805485586342, "grad_norm": 0.22131722594544098, "learning_rate": 7.268646685531991e-05, "loss": 0.5157, "step": 4102 }, { "epoch": 1.1483347327176043, "grad_norm": 0.22862286024502293, "learning_rate": 7.267271934826865e-05, "loss": 0.491, "step": 4103 }, { "epoch": 1.1486146095717884, "grad_norm": 0.2225709660910424, "learning_rate": 7.265896968321527e-05, "loss": 0.5188, "step": 4104 }, { "epoch": 1.1488944864259727, "grad_norm": 0.24685751888948226, "learning_rate": 7.264521786146847e-05, "loss": 0.493, "step": 4105 }, { "epoch": 1.1491743632801568, "grad_norm": 0.22542089142703023, "learning_rate": 7.263146388433717e-05, "loss": 0.5055, "step": 4106 }, { "epoch": 1.1494542401343408, "grad_norm": 0.23744703131267442, "learning_rate": 7.261770775313046e-05, "loss": 0.5044, "step": 4107 }, { "epoch": 1.1497341169885251, "grad_norm": 0.21399538877733398, "learning_rate": 7.260394946915767e-05, "loss": 0.4867, "step": 4108 }, { "epoch": 1.1500139938427092, "grad_norm": 0.21812370534388678, "learning_rate": 7.259018903372832e-05, "loss": 0.4928, "step": 4109 }, { "epoch": 1.1502938706968933, "grad_norm": 0.22271726533539943, "learning_rate": 7.257642644815213e-05, "loss": 0.4996, "step": 4110 }, { "epoch": 1.1505737475510776, "grad_norm": 0.22232448280925565, "learning_rate": 7.256266171373905e-05, "loss": 0.5301, "step": 4111 }, { "epoch": 1.1508536244052616, "grad_norm": 0.21768880214177133, "learning_rate": 7.254889483179918e-05, "loss": 0.5005, "step": 4112 }, { "epoch": 1.151133501259446, "grad_norm": 0.23532171554645165, "learning_rate": 7.253512580364288e-05, "loss": 0.5096, "step": 4113 }, { "epoch": 1.15141337811363, "grad_norm": 0.2235180972449941, "learning_rate": 7.25213546305807e-05, "loss": 0.4732, "step": 4114 }, { "epoch": 1.1516932549678143, "grad_norm": 0.22504699821899796, "learning_rate": 7.250758131392336e-05, "loss": 0.4927, "step": 4115 }, { "epoch": 1.1519731318219983, "grad_norm": 0.2310752269631708, "learning_rate": 7.249380585498185e-05, "loss": 0.4986, "step": 4116 }, { "epoch": 1.1522530086761824, "grad_norm": 0.21324067512538059, "learning_rate": 7.248002825506731e-05, "loss": 0.4998, "step": 4117 }, { "epoch": 1.1525328855303667, "grad_norm": 0.25504846748980925, "learning_rate": 7.24662485154911e-05, "loss": 0.5074, "step": 4118 }, { "epoch": 1.1528127623845508, "grad_norm": 0.2207465599901764, "learning_rate": 7.245246663756477e-05, "loss": 0.5229, "step": 4119 }, { "epoch": 1.1530926392387348, "grad_norm": 0.22748257967342314, "learning_rate": 7.243868262260011e-05, "loss": 0.5194, "step": 4120 }, { "epoch": 1.1533725160929191, "grad_norm": 0.23415114877001775, "learning_rate": 7.242489647190907e-05, "loss": 0.523, "step": 4121 }, { "epoch": 1.1536523929471032, "grad_norm": 0.22463898013038625, "learning_rate": 7.241110818680384e-05, "loss": 0.539, "step": 4122 }, { "epoch": 1.1539322698012875, "grad_norm": 0.22484010984193606, "learning_rate": 7.239731776859679e-05, "loss": 0.5062, "step": 4123 }, { "epoch": 1.1542121466554716, "grad_norm": 0.22342956279143927, "learning_rate": 7.238352521860049e-05, "loss": 0.5065, "step": 4124 }, { "epoch": 1.1544920235096559, "grad_norm": 0.23159403676848503, "learning_rate": 7.236973053812774e-05, "loss": 0.4985, "step": 4125 }, { "epoch": 1.15477190036384, "grad_norm": 0.22508726822985886, "learning_rate": 7.235593372849149e-05, "loss": 0.5014, "step": 4126 }, { "epoch": 1.155051777218024, "grad_norm": 0.24508678785258117, "learning_rate": 7.234213479100498e-05, "loss": 0.5241, "step": 4127 }, { "epoch": 1.1553316540722083, "grad_norm": 0.22596941266435985, "learning_rate": 7.232833372698157e-05, "loss": 0.5093, "step": 4128 }, { "epoch": 1.1556115309263923, "grad_norm": 0.21903393064508364, "learning_rate": 7.231453053773486e-05, "loss": 0.5034, "step": 4129 }, { "epoch": 1.1558914077805766, "grad_norm": 0.22118997116070338, "learning_rate": 7.230072522457864e-05, "loss": 0.5053, "step": 4130 }, { "epoch": 1.1561712846347607, "grad_norm": 0.2404046933927392, "learning_rate": 7.228691778882693e-05, "loss": 0.5045, "step": 4131 }, { "epoch": 1.1564511614889448, "grad_norm": 0.22552848637334438, "learning_rate": 7.227310823179388e-05, "loss": 0.5168, "step": 4132 }, { "epoch": 1.156731038343129, "grad_norm": 0.22503819110084003, "learning_rate": 7.225929655479393e-05, "loss": 0.4902, "step": 4133 }, { "epoch": 1.1570109151973131, "grad_norm": 0.21986557301349818, "learning_rate": 7.224548275914169e-05, "loss": 0.4883, "step": 4134 }, { "epoch": 1.1572907920514974, "grad_norm": 0.22799502855172393, "learning_rate": 7.223166684615194e-05, "loss": 0.5064, "step": 4135 }, { "epoch": 1.1575706689056815, "grad_norm": 0.22248076047565904, "learning_rate": 7.22178488171397e-05, "loss": 0.4931, "step": 4136 }, { "epoch": 1.1578505457598656, "grad_norm": 0.2238793104988906, "learning_rate": 7.220402867342015e-05, "loss": 0.5208, "step": 4137 }, { "epoch": 1.1581304226140499, "grad_norm": 0.2252932526520466, "learning_rate": 7.219020641630875e-05, "loss": 0.5022, "step": 4138 }, { "epoch": 1.158410299468234, "grad_norm": 0.2286714548895435, "learning_rate": 7.217638204712107e-05, "loss": 0.509, "step": 4139 }, { "epoch": 1.1586901763224182, "grad_norm": 0.23009376358579406, "learning_rate": 7.216255556717295e-05, "loss": 0.5062, "step": 4140 }, { "epoch": 1.1589700531766023, "grad_norm": 0.23301354568137048, "learning_rate": 7.214872697778037e-05, "loss": 0.4901, "step": 4141 }, { "epoch": 1.1592499300307864, "grad_norm": 0.22337006995086173, "learning_rate": 7.213489628025956e-05, "loss": 0.5082, "step": 4142 }, { "epoch": 1.1595298068849706, "grad_norm": 0.23391547958969938, "learning_rate": 7.212106347592694e-05, "loss": 0.5038, "step": 4143 }, { "epoch": 1.1598096837391547, "grad_norm": 0.24235555652130966, "learning_rate": 7.21072285660991e-05, "loss": 0.5189, "step": 4144 }, { "epoch": 1.160089560593339, "grad_norm": 0.2241935902477743, "learning_rate": 7.209339155209289e-05, "loss": 0.5013, "step": 4145 }, { "epoch": 1.160369437447523, "grad_norm": 0.2275127344469746, "learning_rate": 7.20795524352253e-05, "loss": 0.4829, "step": 4146 }, { "epoch": 1.1606493143017071, "grad_norm": 0.22879314084355956, "learning_rate": 7.206571121681356e-05, "loss": 0.5209, "step": 4147 }, { "epoch": 1.1609291911558914, "grad_norm": 0.21831346078803268, "learning_rate": 7.205186789817506e-05, "loss": 0.5183, "step": 4148 }, { "epoch": 1.1612090680100755, "grad_norm": 0.23003191649847718, "learning_rate": 7.203802248062743e-05, "loss": 0.5195, "step": 4149 }, { "epoch": 1.1614889448642598, "grad_norm": 0.2265791854241328, "learning_rate": 7.20241749654885e-05, "loss": 0.5007, "step": 4150 }, { "epoch": 1.1617688217184439, "grad_norm": 0.22236826227261736, "learning_rate": 7.201032535407626e-05, "loss": 0.4808, "step": 4151 }, { "epoch": 1.1620486985726282, "grad_norm": 0.22475326847282717, "learning_rate": 7.199647364770894e-05, "loss": 0.4987, "step": 4152 }, { "epoch": 1.1623285754268122, "grad_norm": 0.2304447791042639, "learning_rate": 7.198261984770493e-05, "loss": 0.5281, "step": 4153 }, { "epoch": 1.1626084522809963, "grad_norm": 0.22080745978818953, "learning_rate": 7.196876395538288e-05, "loss": 0.5264, "step": 4154 }, { "epoch": 1.1628883291351806, "grad_norm": 0.21563963829000318, "learning_rate": 7.195490597206155e-05, "loss": 0.5037, "step": 4155 }, { "epoch": 1.1631682059893647, "grad_norm": 0.22494081118857215, "learning_rate": 7.194104589906e-05, "loss": 0.5261, "step": 4156 }, { "epoch": 1.1634480828435487, "grad_norm": 0.20843236305617785, "learning_rate": 7.192718373769744e-05, "loss": 0.5171, "step": 4157 }, { "epoch": 1.163727959697733, "grad_norm": 0.21074589056143336, "learning_rate": 7.191331948929323e-05, "loss": 0.4952, "step": 4158 }, { "epoch": 1.164007836551917, "grad_norm": 0.2378016825363355, "learning_rate": 7.189945315516702e-05, "loss": 0.5065, "step": 4159 }, { "epoch": 1.1642877134061014, "grad_norm": 0.2231089660276172, "learning_rate": 7.18855847366386e-05, "loss": 0.5114, "step": 4160 }, { "epoch": 1.1645675902602854, "grad_norm": 0.21688883427276945, "learning_rate": 7.187171423502796e-05, "loss": 0.5037, "step": 4161 }, { "epoch": 1.1648474671144697, "grad_norm": 0.21732412053294106, "learning_rate": 7.185784165165534e-05, "loss": 0.518, "step": 4162 }, { "epoch": 1.1651273439686538, "grad_norm": 0.21816360877095778, "learning_rate": 7.18439669878411e-05, "loss": 0.5123, "step": 4163 }, { "epoch": 1.1654072208228379, "grad_norm": 0.22840278810585588, "learning_rate": 7.183009024490586e-05, "loss": 0.4988, "step": 4164 }, { "epoch": 1.1656870976770222, "grad_norm": 0.22580954724899185, "learning_rate": 7.181621142417041e-05, "loss": 0.4803, "step": 4165 }, { "epoch": 1.1659669745312062, "grad_norm": 0.21973971602955505, "learning_rate": 7.180233052695576e-05, "loss": 0.4917, "step": 4166 }, { "epoch": 1.1662468513853903, "grad_norm": 0.2141544294295572, "learning_rate": 7.178844755458306e-05, "loss": 0.502, "step": 4167 }, { "epoch": 1.1665267282395746, "grad_norm": 0.20975885297654492, "learning_rate": 7.177456250837375e-05, "loss": 0.4885, "step": 4168 }, { "epoch": 1.1668066050937587, "grad_norm": 0.22178780169939083, "learning_rate": 7.176067538964938e-05, "loss": 0.5012, "step": 4169 }, { "epoch": 1.167086481947943, "grad_norm": 0.21691390524881485, "learning_rate": 7.174678619973176e-05, "loss": 0.4961, "step": 4170 }, { "epoch": 1.167366358802127, "grad_norm": 0.2150898334825614, "learning_rate": 7.173289493994284e-05, "loss": 0.5132, "step": 4171 }, { "epoch": 1.1676462356563113, "grad_norm": 0.24284800144882926, "learning_rate": 7.171900161160483e-05, "loss": 0.5265, "step": 4172 }, { "epoch": 1.1679261125104954, "grad_norm": 0.23582397104039957, "learning_rate": 7.170510621604008e-05, "loss": 0.5147, "step": 4173 }, { "epoch": 1.1682059893646795, "grad_norm": 0.2149756660438223, "learning_rate": 7.169120875457117e-05, "loss": 0.4853, "step": 4174 }, { "epoch": 1.1684858662188637, "grad_norm": 0.2256078156739452, "learning_rate": 7.167730922852087e-05, "loss": 0.5093, "step": 4175 }, { "epoch": 1.1687657430730478, "grad_norm": 0.21261342003842387, "learning_rate": 7.166340763921215e-05, "loss": 0.4893, "step": 4176 }, { "epoch": 1.169045619927232, "grad_norm": 0.22808664499214856, "learning_rate": 7.164950398796816e-05, "loss": 0.511, "step": 4177 }, { "epoch": 1.1693254967814162, "grad_norm": 0.2141617495811512, "learning_rate": 7.163559827611227e-05, "loss": 0.4953, "step": 4178 }, { "epoch": 1.1696053736356002, "grad_norm": 0.2253734698665144, "learning_rate": 7.162169050496803e-05, "loss": 0.4878, "step": 4179 }, { "epoch": 1.1698852504897845, "grad_norm": 0.22424223754322692, "learning_rate": 7.160778067585917e-05, "loss": 0.5277, "step": 4180 }, { "epoch": 1.1701651273439686, "grad_norm": 0.230645854225705, "learning_rate": 7.159386879010967e-05, "loss": 0.5069, "step": 4181 }, { "epoch": 1.170445004198153, "grad_norm": 0.24414442942752934, "learning_rate": 7.157995484904362e-05, "loss": 0.5309, "step": 4182 }, { "epoch": 1.170724881052337, "grad_norm": 0.2246046772363703, "learning_rate": 7.156603885398542e-05, "loss": 0.4954, "step": 4183 }, { "epoch": 1.171004757906521, "grad_norm": 0.21183764566664356, "learning_rate": 7.155212080625955e-05, "loss": 0.5322, "step": 4184 }, { "epoch": 1.1712846347607053, "grad_norm": 0.2370294973889655, "learning_rate": 7.153820070719077e-05, "loss": 0.5157, "step": 4185 }, { "epoch": 1.1715645116148894, "grad_norm": 0.22460134322724573, "learning_rate": 7.1524278558104e-05, "loss": 0.5063, "step": 4186 }, { "epoch": 1.1718443884690737, "grad_norm": 0.22926016536982338, "learning_rate": 7.151035436032434e-05, "loss": 0.5261, "step": 4187 }, { "epoch": 1.1721242653232578, "grad_norm": 0.23804642643452714, "learning_rate": 7.149642811517712e-05, "loss": 0.527, "step": 4188 }, { "epoch": 1.172404142177442, "grad_norm": 0.21963346274539094, "learning_rate": 7.148249982398783e-05, "loss": 0.516, "step": 4189 }, { "epoch": 1.1726840190316261, "grad_norm": 0.23088774948417135, "learning_rate": 7.146856948808217e-05, "loss": 0.5006, "step": 4190 }, { "epoch": 1.1729638958858102, "grad_norm": 0.21627511010611178, "learning_rate": 7.145463710878607e-05, "loss": 0.4886, "step": 4191 }, { "epoch": 1.1732437727399945, "grad_norm": 0.22502464689520862, "learning_rate": 7.14407026874256e-05, "loss": 0.5054, "step": 4192 }, { "epoch": 1.1735236495941785, "grad_norm": 0.2138403320112918, "learning_rate": 7.142676622532702e-05, "loss": 0.5093, "step": 4193 }, { "epoch": 1.1738035264483626, "grad_norm": 0.21659948998091277, "learning_rate": 7.141282772381687e-05, "loss": 0.4971, "step": 4194 }, { "epoch": 1.174083403302547, "grad_norm": 0.2263863822165275, "learning_rate": 7.139888718422177e-05, "loss": 0.5011, "step": 4195 }, { "epoch": 1.174363280156731, "grad_norm": 0.22543225536791175, "learning_rate": 7.138494460786864e-05, "loss": 0.4875, "step": 4196 }, { "epoch": 1.1746431570109153, "grad_norm": 0.23198190772915978, "learning_rate": 7.137099999608449e-05, "loss": 0.534, "step": 4197 }, { "epoch": 1.1749230338650993, "grad_norm": 0.22694380435663544, "learning_rate": 7.13570533501966e-05, "loss": 0.5159, "step": 4198 }, { "epoch": 1.1752029107192836, "grad_norm": 0.22794374417325902, "learning_rate": 7.134310467153243e-05, "loss": 0.5135, "step": 4199 }, { "epoch": 1.1754827875734677, "grad_norm": 0.22704528252422798, "learning_rate": 7.132915396141959e-05, "loss": 0.5075, "step": 4200 }, { "epoch": 1.1757626644276518, "grad_norm": 0.2188067658947318, "learning_rate": 7.131520122118594e-05, "loss": 0.5219, "step": 4201 }, { "epoch": 1.176042541281836, "grad_norm": 0.22755473228578177, "learning_rate": 7.130124645215952e-05, "loss": 0.5111, "step": 4202 }, { "epoch": 1.1763224181360201, "grad_norm": 0.21692041672620438, "learning_rate": 7.128728965566853e-05, "loss": 0.5159, "step": 4203 }, { "epoch": 1.1766022949902042, "grad_norm": 0.2301526320218446, "learning_rate": 7.12733308330414e-05, "loss": 0.5236, "step": 4204 }, { "epoch": 1.1768821718443885, "grad_norm": 0.225977373758813, "learning_rate": 7.125936998560676e-05, "loss": 0.4887, "step": 4205 }, { "epoch": 1.1771620486985725, "grad_norm": 0.22064171515702388, "learning_rate": 7.124540711469336e-05, "loss": 0.5013, "step": 4206 }, { "epoch": 1.1774419255527568, "grad_norm": 0.21741836774697432, "learning_rate": 7.123144222163021e-05, "loss": 0.5069, "step": 4207 }, { "epoch": 1.177721802406941, "grad_norm": 0.24009228236769486, "learning_rate": 7.121747530774652e-05, "loss": 0.5293, "step": 4208 }, { "epoch": 1.1780016792611252, "grad_norm": 0.22672478401477372, "learning_rate": 7.120350637437165e-05, "loss": 0.494, "step": 4209 }, { "epoch": 1.1782815561153093, "grad_norm": 0.21899359915851813, "learning_rate": 7.118953542283518e-05, "loss": 0.5135, "step": 4210 }, { "epoch": 1.1785614329694933, "grad_norm": 0.22205936151888048, "learning_rate": 7.117556245446685e-05, "loss": 0.5003, "step": 4211 }, { "epoch": 1.1788413098236776, "grad_norm": 0.22632344881796398, "learning_rate": 7.116158747059664e-05, "loss": 0.5193, "step": 4212 }, { "epoch": 1.1791211866778617, "grad_norm": 0.2198089447085492, "learning_rate": 7.11476104725547e-05, "loss": 0.4851, "step": 4213 }, { "epoch": 1.179401063532046, "grad_norm": 0.21986722770148254, "learning_rate": 7.113363146167138e-05, "loss": 0.4945, "step": 4214 }, { "epoch": 1.17968094038623, "grad_norm": 0.22086848798747755, "learning_rate": 7.111965043927715e-05, "loss": 0.4991, "step": 4215 }, { "epoch": 1.1799608172404141, "grad_norm": 0.21428595212648865, "learning_rate": 7.11056674067028e-05, "loss": 0.5025, "step": 4216 }, { "epoch": 1.1802406940945984, "grad_norm": 0.23039218794510508, "learning_rate": 7.109168236527919e-05, "loss": 0.4839, "step": 4217 }, { "epoch": 1.1805205709487825, "grad_norm": 0.2227231307963435, "learning_rate": 7.107769531633745e-05, "loss": 0.5128, "step": 4218 }, { "epoch": 1.1808004478029668, "grad_norm": 0.22568155647611768, "learning_rate": 7.106370626120887e-05, "loss": 0.5136, "step": 4219 }, { "epoch": 1.1810803246571508, "grad_norm": 0.2310292125877394, "learning_rate": 7.104971520122495e-05, "loss": 0.4963, "step": 4220 }, { "epoch": 1.181360201511335, "grad_norm": 0.21907795572412855, "learning_rate": 7.103572213771734e-05, "loss": 0.4986, "step": 4221 }, { "epoch": 1.1816400783655192, "grad_norm": 0.2307946285519689, "learning_rate": 7.102172707201793e-05, "loss": 0.4923, "step": 4222 }, { "epoch": 1.1819199552197033, "grad_norm": 0.2202551400353409, "learning_rate": 7.100773000545879e-05, "loss": 0.498, "step": 4223 }, { "epoch": 1.1821998320738876, "grad_norm": 0.226422990684998, "learning_rate": 7.099373093937213e-05, "loss": 0.5214, "step": 4224 }, { "epoch": 1.1824797089280716, "grad_norm": 0.2181716687563738, "learning_rate": 7.09797298750904e-05, "loss": 0.5208, "step": 4225 }, { "epoch": 1.182759585782256, "grad_norm": 0.22933086486553092, "learning_rate": 7.096572681394625e-05, "loss": 0.5244, "step": 4226 }, { "epoch": 1.18303946263644, "grad_norm": 0.22881353509664235, "learning_rate": 7.095172175727247e-05, "loss": 0.5068, "step": 4227 }, { "epoch": 1.183319339490624, "grad_norm": 0.22088408171918741, "learning_rate": 7.093771470640211e-05, "loss": 0.5028, "step": 4228 }, { "epoch": 1.1835992163448084, "grad_norm": 0.22761263480559474, "learning_rate": 7.092370566266834e-05, "loss": 0.5163, "step": 4229 }, { "epoch": 1.1838790931989924, "grad_norm": 0.2255300599919595, "learning_rate": 7.090969462740454e-05, "loss": 0.5177, "step": 4230 }, { "epoch": 1.1841589700531765, "grad_norm": 0.22184946442958645, "learning_rate": 7.089568160194431e-05, "loss": 0.4953, "step": 4231 }, { "epoch": 1.1844388469073608, "grad_norm": 0.22584014466341526, "learning_rate": 7.088166658762143e-05, "loss": 0.5073, "step": 4232 }, { "epoch": 1.1847187237615449, "grad_norm": 0.2180202061909379, "learning_rate": 7.086764958576982e-05, "loss": 0.4949, "step": 4233 }, { "epoch": 1.1849986006157291, "grad_norm": 0.22643632655484283, "learning_rate": 7.085363059772364e-05, "loss": 0.5312, "step": 4234 }, { "epoch": 1.1852784774699132, "grad_norm": 0.21482765251570712, "learning_rate": 7.083960962481721e-05, "loss": 0.4775, "step": 4235 }, { "epoch": 1.1855583543240975, "grad_norm": 0.22397023021744042, "learning_rate": 7.082558666838508e-05, "loss": 0.4901, "step": 4236 }, { "epoch": 1.1858382311782816, "grad_norm": 0.2187701538506456, "learning_rate": 7.081156172976197e-05, "loss": 0.4916, "step": 4237 }, { "epoch": 1.1861181080324656, "grad_norm": 0.22276752537598846, "learning_rate": 7.079753481028275e-05, "loss": 0.5131, "step": 4238 }, { "epoch": 1.18639798488665, "grad_norm": 0.2262761299765684, "learning_rate": 7.078350591128253e-05, "loss": 0.498, "step": 4239 }, { "epoch": 1.186677861740834, "grad_norm": 0.21780706068190353, "learning_rate": 7.076947503409659e-05, "loss": 0.5116, "step": 4240 }, { "epoch": 1.186957738595018, "grad_norm": 0.23506301305915292, "learning_rate": 7.07554421800604e-05, "loss": 0.5365, "step": 4241 }, { "epoch": 1.1872376154492024, "grad_norm": 0.2266700959774239, "learning_rate": 7.07414073505096e-05, "loss": 0.507, "step": 4242 }, { "epoch": 1.1875174923033864, "grad_norm": 0.22107972963006747, "learning_rate": 7.072737054678003e-05, "loss": 0.5124, "step": 4243 }, { "epoch": 1.1877973691575707, "grad_norm": 0.21329136604897145, "learning_rate": 7.071333177020774e-05, "loss": 0.4866, "step": 4244 }, { "epoch": 1.1880772460117548, "grad_norm": 0.22066376242840038, "learning_rate": 7.069929102212892e-05, "loss": 0.5257, "step": 4245 }, { "epoch": 1.188357122865939, "grad_norm": 0.22224000689293583, "learning_rate": 7.068524830388e-05, "loss": 0.4757, "step": 4246 }, { "epoch": 1.1886369997201232, "grad_norm": 0.23408366010403492, "learning_rate": 7.067120361679758e-05, "loss": 0.5311, "step": 4247 }, { "epoch": 1.1889168765743072, "grad_norm": 0.23003287864002572, "learning_rate": 7.065715696221843e-05, "loss": 0.5086, "step": 4248 }, { "epoch": 1.1891967534284915, "grad_norm": 0.2195987273406075, "learning_rate": 7.064310834147951e-05, "loss": 0.4967, "step": 4249 }, { "epoch": 1.1894766302826756, "grad_norm": 0.22065475281740798, "learning_rate": 7.0629057755918e-05, "loss": 0.4955, "step": 4250 }, { "epoch": 1.1897565071368599, "grad_norm": 0.2170458307794216, "learning_rate": 7.06150052068712e-05, "loss": 0.5069, "step": 4251 }, { "epoch": 1.190036383991044, "grad_norm": 0.23148532456129006, "learning_rate": 7.060095069567668e-05, "loss": 0.5116, "step": 4252 }, { "epoch": 1.190316260845228, "grad_norm": 0.23364056097506877, "learning_rate": 7.058689422367212e-05, "loss": 0.5063, "step": 4253 }, { "epoch": 1.1905961376994123, "grad_norm": 0.23397608285019136, "learning_rate": 7.057283579219548e-05, "loss": 0.5086, "step": 4254 }, { "epoch": 1.1908760145535964, "grad_norm": 0.21945407843328624, "learning_rate": 7.05587754025848e-05, "loss": 0.4787, "step": 4255 }, { "epoch": 1.1911558914077807, "grad_norm": 0.22055980090323987, "learning_rate": 7.054471305617837e-05, "loss": 0.4912, "step": 4256 }, { "epoch": 1.1914357682619647, "grad_norm": 0.23223658919043302, "learning_rate": 7.053064875431465e-05, "loss": 0.5028, "step": 4257 }, { "epoch": 1.1917156451161488, "grad_norm": 0.22158566582115585, "learning_rate": 7.051658249833228e-05, "loss": 0.5224, "step": 4258 }, { "epoch": 1.191995521970333, "grad_norm": 0.231116800212288, "learning_rate": 7.050251428957013e-05, "loss": 0.5074, "step": 4259 }, { "epoch": 1.1922753988245172, "grad_norm": 0.2209692838660835, "learning_rate": 7.048844412936719e-05, "loss": 0.4882, "step": 4260 }, { "epoch": 1.1925552756787015, "grad_norm": 0.21651888268196262, "learning_rate": 7.047437201906265e-05, "loss": 0.4616, "step": 4261 }, { "epoch": 1.1928351525328855, "grad_norm": 0.23096357615514918, "learning_rate": 7.046029795999592e-05, "loss": 0.504, "step": 4262 }, { "epoch": 1.1931150293870696, "grad_norm": 0.22201221291485465, "learning_rate": 7.044622195350658e-05, "loss": 0.515, "step": 4263 }, { "epoch": 1.1933949062412539, "grad_norm": 0.229046761714028, "learning_rate": 7.04321440009344e-05, "loss": 0.4885, "step": 4264 }, { "epoch": 1.193674783095438, "grad_norm": 0.22760846530361756, "learning_rate": 7.041806410361933e-05, "loss": 0.4941, "step": 4265 }, { "epoch": 1.1939546599496222, "grad_norm": 0.22472571842244649, "learning_rate": 7.040398226290148e-05, "loss": 0.4957, "step": 4266 }, { "epoch": 1.1942345368038063, "grad_norm": 0.22740631147937912, "learning_rate": 7.038989848012116e-05, "loss": 0.498, "step": 4267 }, { "epoch": 1.1945144136579904, "grad_norm": 0.222479304520405, "learning_rate": 7.037581275661891e-05, "loss": 0.5029, "step": 4268 }, { "epoch": 1.1947942905121747, "grad_norm": 0.21828111993437252, "learning_rate": 7.036172509373539e-05, "loss": 0.5176, "step": 4269 }, { "epoch": 1.1950741673663587, "grad_norm": 0.2244098147858522, "learning_rate": 7.034763549281149e-05, "loss": 0.4921, "step": 4270 }, { "epoch": 1.195354044220543, "grad_norm": 0.23001952760016786, "learning_rate": 7.033354395518823e-05, "loss": 0.4849, "step": 4271 }, { "epoch": 1.195633921074727, "grad_norm": 0.2191270609470566, "learning_rate": 7.031945048220689e-05, "loss": 0.4908, "step": 4272 }, { "epoch": 1.1959137979289114, "grad_norm": 0.2197867383422929, "learning_rate": 7.030535507520889e-05, "loss": 0.5046, "step": 4273 }, { "epoch": 1.1961936747830955, "grad_norm": 0.2198926648283125, "learning_rate": 7.02912577355358e-05, "loss": 0.4663, "step": 4274 }, { "epoch": 1.1964735516372795, "grad_norm": 0.22876495991098986, "learning_rate": 7.027715846452947e-05, "loss": 0.5068, "step": 4275 }, { "epoch": 1.1967534284914638, "grad_norm": 0.2186226872262562, "learning_rate": 7.026305726353184e-05, "loss": 0.5076, "step": 4276 }, { "epoch": 1.197033305345648, "grad_norm": 0.23654969360520095, "learning_rate": 7.024895413388508e-05, "loss": 0.5576, "step": 4277 }, { "epoch": 1.197313182199832, "grad_norm": 0.23619070584067375, "learning_rate": 7.023484907693153e-05, "loss": 0.4998, "step": 4278 }, { "epoch": 1.1975930590540163, "grad_norm": 0.2286984082526474, "learning_rate": 7.02207420940137e-05, "loss": 0.4986, "step": 4279 }, { "epoch": 1.1978729359082003, "grad_norm": 0.23121220371959625, "learning_rate": 7.020663318647433e-05, "loss": 0.52, "step": 4280 }, { "epoch": 1.1981528127623846, "grad_norm": 0.22056528872688555, "learning_rate": 7.019252235565632e-05, "loss": 0.5063, "step": 4281 }, { "epoch": 1.1984326896165687, "grad_norm": 0.21282310607009033, "learning_rate": 7.017840960290272e-05, "loss": 0.4889, "step": 4282 }, { "epoch": 1.198712566470753, "grad_norm": 0.2169820340377364, "learning_rate": 7.01642949295568e-05, "loss": 0.5072, "step": 4283 }, { "epoch": 1.198992443324937, "grad_norm": 0.22417296859401595, "learning_rate": 7.015017833696199e-05, "loss": 0.4898, "step": 4284 }, { "epoch": 1.199272320179121, "grad_norm": 0.2292495309509333, "learning_rate": 7.013605982646195e-05, "loss": 0.5063, "step": 4285 }, { "epoch": 1.1995521970333054, "grad_norm": 0.2256952971670304, "learning_rate": 7.012193939940045e-05, "loss": 0.4951, "step": 4286 }, { "epoch": 1.1998320738874895, "grad_norm": 0.22250011293368643, "learning_rate": 7.01078170571215e-05, "loss": 0.4935, "step": 4287 }, { "epoch": 1.2001119507416735, "grad_norm": 0.22529443222396162, "learning_rate": 7.009369280096926e-05, "loss": 0.4904, "step": 4288 }, { "epoch": 1.2003918275958578, "grad_norm": 0.21903130349810393, "learning_rate": 7.007956663228809e-05, "loss": 0.4954, "step": 4289 }, { "epoch": 1.200671704450042, "grad_norm": 0.2225428572951667, "learning_rate": 7.006543855242254e-05, "loss": 0.5008, "step": 4290 }, { "epoch": 1.2009515813042262, "grad_norm": 0.2218727051149631, "learning_rate": 7.005130856271731e-05, "loss": 0.4875, "step": 4291 }, { "epoch": 1.2012314581584103, "grad_norm": 0.21450334615811675, "learning_rate": 7.003717666451732e-05, "loss": 0.4835, "step": 4292 }, { "epoch": 1.2015113350125946, "grad_norm": 0.22975132976395687, "learning_rate": 7.002304285916762e-05, "loss": 0.4843, "step": 4293 }, { "epoch": 1.2017912118667786, "grad_norm": 0.23056674838049043, "learning_rate": 7.000890714801351e-05, "loss": 0.4847, "step": 4294 }, { "epoch": 1.2020710887209627, "grad_norm": 0.22610304577022208, "learning_rate": 6.999476953240042e-05, "loss": 0.5117, "step": 4295 }, { "epoch": 1.202350965575147, "grad_norm": 0.22731879058846147, "learning_rate": 6.998063001367397e-05, "loss": 0.5054, "step": 4296 }, { "epoch": 1.202630842429331, "grad_norm": 0.22615544087622722, "learning_rate": 6.996648859317995e-05, "loss": 0.4931, "step": 4297 }, { "epoch": 1.2029107192835153, "grad_norm": 0.22247817324041005, "learning_rate": 6.99523452722644e-05, "loss": 0.4965, "step": 4298 }, { "epoch": 1.2031905961376994, "grad_norm": 0.2175063462583486, "learning_rate": 6.993820005227343e-05, "loss": 0.5088, "step": 4299 }, { "epoch": 1.2034704729918835, "grad_norm": 0.22253078911614768, "learning_rate": 6.992405293455346e-05, "loss": 0.5166, "step": 4300 }, { "epoch": 1.2037503498460678, "grad_norm": 0.2184481026106758, "learning_rate": 6.990990392045095e-05, "loss": 0.5075, "step": 4301 }, { "epoch": 1.2040302267002518, "grad_norm": 0.23101561958891997, "learning_rate": 6.989575301131264e-05, "loss": 0.5193, "step": 4302 }, { "epoch": 1.2043101035544361, "grad_norm": 0.21959358271302212, "learning_rate": 6.988160020848543e-05, "loss": 0.5274, "step": 4303 }, { "epoch": 1.2045899804086202, "grad_norm": 0.23230253016100438, "learning_rate": 6.98674455133164e-05, "loss": 0.5068, "step": 4304 }, { "epoch": 1.2048698572628043, "grad_norm": 0.2245698445284393, "learning_rate": 6.985328892715275e-05, "loss": 0.479, "step": 4305 }, { "epoch": 1.2051497341169886, "grad_norm": 0.2204339776275824, "learning_rate": 6.983913045134197e-05, "loss": 0.4945, "step": 4306 }, { "epoch": 1.2054296109711726, "grad_norm": 0.21126067215074626, "learning_rate": 6.982497008723164e-05, "loss": 0.508, "step": 4307 }, { "epoch": 1.205709487825357, "grad_norm": 0.21710117348999522, "learning_rate": 6.981080783616958e-05, "loss": 0.5073, "step": 4308 }, { "epoch": 1.205989364679541, "grad_norm": 0.22228130817799446, "learning_rate": 6.979664369950371e-05, "loss": 0.5185, "step": 4309 }, { "epoch": 1.2062692415337253, "grad_norm": 0.23298061886059024, "learning_rate": 6.978247767858224e-05, "loss": 0.5362, "step": 4310 }, { "epoch": 1.2065491183879093, "grad_norm": 0.2213775602242786, "learning_rate": 6.976830977475346e-05, "loss": 0.4994, "step": 4311 }, { "epoch": 1.2068289952420934, "grad_norm": 0.22193225682948606, "learning_rate": 6.97541399893659e-05, "loss": 0.5179, "step": 4312 }, { "epoch": 1.2071088720962777, "grad_norm": 0.2147931682359676, "learning_rate": 6.973996832376823e-05, "loss": 0.4959, "step": 4313 }, { "epoch": 1.2073887489504618, "grad_norm": 0.22157983408144044, "learning_rate": 6.972579477930933e-05, "loss": 0.487, "step": 4314 }, { "epoch": 1.2076686258046458, "grad_norm": 0.221426402302768, "learning_rate": 6.971161935733823e-05, "loss": 0.4954, "step": 4315 }, { "epoch": 1.2079485026588301, "grad_norm": 0.22797227652978325, "learning_rate": 6.969744205920419e-05, "loss": 0.4935, "step": 4316 }, { "epoch": 1.2082283795130142, "grad_norm": 0.22617728962444442, "learning_rate": 6.968326288625658e-05, "loss": 0.5155, "step": 4317 }, { "epoch": 1.2085082563671985, "grad_norm": 0.26518826871724366, "learning_rate": 6.966908183984497e-05, "loss": 0.5305, "step": 4318 }, { "epoch": 1.2087881332213826, "grad_norm": 0.2257023193535358, "learning_rate": 6.965489892131917e-05, "loss": 0.4971, "step": 4319 }, { "epoch": 1.2090680100755669, "grad_norm": 0.22311422674947592, "learning_rate": 6.964071413202907e-05, "loss": 0.492, "step": 4320 }, { "epoch": 1.209347886929751, "grad_norm": 0.2276132950462771, "learning_rate": 6.962652747332481e-05, "loss": 0.504, "step": 4321 }, { "epoch": 1.209627763783935, "grad_norm": 0.23757199110528004, "learning_rate": 6.961233894655669e-05, "loss": 0.508, "step": 4322 }, { "epoch": 1.2099076406381193, "grad_norm": 0.22065205456398929, "learning_rate": 6.959814855307516e-05, "loss": 0.5089, "step": 4323 }, { "epoch": 1.2101875174923034, "grad_norm": 0.22791538874212347, "learning_rate": 6.958395629423088e-05, "loss": 0.5095, "step": 4324 }, { "epoch": 1.2104673943464874, "grad_norm": 0.22710981503104447, "learning_rate": 6.95697621713747e-05, "loss": 0.4876, "step": 4325 }, { "epoch": 1.2107472712006717, "grad_norm": 0.22701901738159178, "learning_rate": 6.955556618585758e-05, "loss": 0.4939, "step": 4326 }, { "epoch": 1.2110271480548558, "grad_norm": 0.2134448349238976, "learning_rate": 6.954136833903073e-05, "loss": 0.5135, "step": 4327 }, { "epoch": 1.21130702490904, "grad_norm": 0.2279914956284989, "learning_rate": 6.952716863224551e-05, "loss": 0.5045, "step": 4328 }, { "epoch": 1.2115869017632241, "grad_norm": 0.22369873232593462, "learning_rate": 6.951296706685344e-05, "loss": 0.5349, "step": 4329 }, { "epoch": 1.2118667786174084, "grad_norm": 0.22437327522413142, "learning_rate": 6.949876364420624e-05, "loss": 0.499, "step": 4330 }, { "epoch": 1.2121466554715925, "grad_norm": 0.2354358076812393, "learning_rate": 6.948455836565582e-05, "loss": 0.5251, "step": 4331 }, { "epoch": 1.2124265323257766, "grad_norm": 0.23018675432170763, "learning_rate": 6.947035123255421e-05, "loss": 0.5113, "step": 4332 }, { "epoch": 1.2127064091799609, "grad_norm": 0.22635467212494143, "learning_rate": 6.945614224625368e-05, "loss": 0.4983, "step": 4333 }, { "epoch": 1.212986286034145, "grad_norm": 0.2296508815181712, "learning_rate": 6.944193140810664e-05, "loss": 0.5045, "step": 4334 }, { "epoch": 1.2132661628883292, "grad_norm": 0.21446795403491542, "learning_rate": 6.94277187194657e-05, "loss": 0.4984, "step": 4335 }, { "epoch": 1.2135460397425133, "grad_norm": 0.22026288884944412, "learning_rate": 6.941350418168359e-05, "loss": 0.5024, "step": 4336 }, { "epoch": 1.2138259165966974, "grad_norm": 0.21403543573421865, "learning_rate": 6.93992877961133e-05, "loss": 0.4955, "step": 4337 }, { "epoch": 1.2141057934508817, "grad_norm": 0.22403749573915716, "learning_rate": 6.938506956410795e-05, "loss": 0.4981, "step": 4338 }, { "epoch": 1.2143856703050657, "grad_norm": 0.22715026917631595, "learning_rate": 6.937084948702081e-05, "loss": 0.5212, "step": 4339 }, { "epoch": 1.21466554715925, "grad_norm": 0.22249576289313944, "learning_rate": 6.93566275662054e-05, "loss": 0.5029, "step": 4340 }, { "epoch": 1.214945424013434, "grad_norm": 0.22576346777078676, "learning_rate": 6.934240380301532e-05, "loss": 0.5128, "step": 4341 }, { "epoch": 1.2152253008676182, "grad_norm": 0.2326453627873477, "learning_rate": 6.932817819880442e-05, "loss": 0.5163, "step": 4342 }, { "epoch": 1.2155051777218024, "grad_norm": 0.22232628361222592, "learning_rate": 6.931395075492671e-05, "loss": 0.4873, "step": 4343 }, { "epoch": 1.2157850545759865, "grad_norm": 0.21402567488069954, "learning_rate": 6.929972147273636e-05, "loss": 0.5165, "step": 4344 }, { "epoch": 1.2160649314301708, "grad_norm": 0.21582883929277186, "learning_rate": 6.928549035358772e-05, "loss": 0.4906, "step": 4345 }, { "epoch": 1.2163448082843549, "grad_norm": 0.23829116546080992, "learning_rate": 6.92712573988353e-05, "loss": 0.4942, "step": 4346 }, { "epoch": 1.2166246851385392, "grad_norm": 0.21699945188045142, "learning_rate": 6.925702260983381e-05, "loss": 0.5, "step": 4347 }, { "epoch": 1.2169045619927232, "grad_norm": 0.22263458258539465, "learning_rate": 6.924278598793814e-05, "loss": 0.4832, "step": 4348 }, { "epoch": 1.2171844388469073, "grad_norm": 0.22228713628757352, "learning_rate": 6.922854753450333e-05, "loss": 0.5084, "step": 4349 }, { "epoch": 1.2174643157010916, "grad_norm": 0.23887434738098665, "learning_rate": 6.92143072508846e-05, "loss": 0.5052, "step": 4350 }, { "epoch": 1.2177441925552757, "grad_norm": 0.23523705013624385, "learning_rate": 6.920006513843736e-05, "loss": 0.5279, "step": 4351 }, { "epoch": 1.2180240694094597, "grad_norm": 0.22399348524879345, "learning_rate": 6.918582119851716e-05, "loss": 0.5155, "step": 4352 }, { "epoch": 1.218303946263644, "grad_norm": 0.22735951210011596, "learning_rate": 6.917157543247976e-05, "loss": 0.4958, "step": 4353 }, { "epoch": 1.218583823117828, "grad_norm": 0.22455010629801977, "learning_rate": 6.915732784168109e-05, "loss": 0.5138, "step": 4354 }, { "epoch": 1.2188636999720124, "grad_norm": 0.22523964003086352, "learning_rate": 6.914307842747723e-05, "loss": 0.479, "step": 4355 }, { "epoch": 1.2191435768261965, "grad_norm": 0.22385931106122062, "learning_rate": 6.912882719122445e-05, "loss": 0.5198, "step": 4356 }, { "epoch": 1.2194234536803807, "grad_norm": 0.22893654538163064, "learning_rate": 6.911457413427917e-05, "loss": 0.4957, "step": 4357 }, { "epoch": 1.2197033305345648, "grad_norm": 0.23360101326660163, "learning_rate": 6.910031925799805e-05, "loss": 0.5094, "step": 4358 }, { "epoch": 1.2199832073887489, "grad_norm": 0.22439865759101557, "learning_rate": 6.908606256373785e-05, "loss": 0.4961, "step": 4359 }, { "epoch": 1.2202630842429332, "grad_norm": 0.2130920644271705, "learning_rate": 6.907180405285552e-05, "loss": 0.4832, "step": 4360 }, { "epoch": 1.2205429610971172, "grad_norm": 0.21962313250065543, "learning_rate": 6.905754372670822e-05, "loss": 0.4953, "step": 4361 }, { "epoch": 1.2208228379513013, "grad_norm": 0.23117775885750053, "learning_rate": 6.904328158665323e-05, "loss": 0.4906, "step": 4362 }, { "epoch": 1.2211027148054856, "grad_norm": 0.22927440489987105, "learning_rate": 6.902901763404805e-05, "loss": 0.482, "step": 4363 }, { "epoch": 1.2213825916596697, "grad_norm": 0.22566949847086684, "learning_rate": 6.901475187025032e-05, "loss": 0.4796, "step": 4364 }, { "epoch": 1.221662468513854, "grad_norm": 0.23315687664279067, "learning_rate": 6.900048429661785e-05, "loss": 0.5197, "step": 4365 }, { "epoch": 1.221942345368038, "grad_norm": 0.23302202277015088, "learning_rate": 6.898621491450867e-05, "loss": 0.4977, "step": 4366 }, { "epoch": 1.2222222222222223, "grad_norm": 0.22656513575201978, "learning_rate": 6.89719437252809e-05, "loss": 0.4817, "step": 4367 }, { "epoch": 1.2225020990764064, "grad_norm": 0.23097869734734172, "learning_rate": 6.895767073029293e-05, "loss": 0.489, "step": 4368 }, { "epoch": 1.2227819759305905, "grad_norm": 0.2190357020412073, "learning_rate": 6.894339593090324e-05, "loss": 0.5145, "step": 4369 }, { "epoch": 1.2230618527847748, "grad_norm": 0.2312220645059619, "learning_rate": 6.892911932847053e-05, "loss": 0.5304, "step": 4370 }, { "epoch": 1.2233417296389588, "grad_norm": 0.22147256958326017, "learning_rate": 6.891484092435364e-05, "loss": 0.5085, "step": 4371 }, { "epoch": 1.223621606493143, "grad_norm": 0.2178564902303058, "learning_rate": 6.89005607199116e-05, "loss": 0.5104, "step": 4372 }, { "epoch": 1.2239014833473272, "grad_norm": 0.2373240342136605, "learning_rate": 6.888627871650362e-05, "loss": 0.5133, "step": 4373 }, { "epoch": 1.2241813602015112, "grad_norm": 0.22315431750372944, "learning_rate": 6.887199491548906e-05, "loss": 0.5342, "step": 4374 }, { "epoch": 1.2244612370556955, "grad_norm": 0.22512542970153293, "learning_rate": 6.885770931822745e-05, "loss": 0.4847, "step": 4375 }, { "epoch": 1.2247411139098796, "grad_norm": 0.23573484698243163, "learning_rate": 6.884342192607853e-05, "loss": 0.5104, "step": 4376 }, { "epoch": 1.225020990764064, "grad_norm": 0.22956595334186586, "learning_rate": 6.882913274040214e-05, "loss": 0.5141, "step": 4377 }, { "epoch": 1.225300867618248, "grad_norm": 0.23092574398464455, "learning_rate": 6.881484176255837e-05, "loss": 0.5041, "step": 4378 }, { "epoch": 1.225580744472432, "grad_norm": 0.23105765714019483, "learning_rate": 6.880054899390744e-05, "loss": 0.4865, "step": 4379 }, { "epoch": 1.2258606213266163, "grad_norm": 0.22009969745888194, "learning_rate": 6.878625443580973e-05, "loss": 0.4787, "step": 4380 }, { "epoch": 1.2261404981808004, "grad_norm": 0.22907396351623255, "learning_rate": 6.877195808962579e-05, "loss": 0.4963, "step": 4381 }, { "epoch": 1.2264203750349847, "grad_norm": 0.2260090364840226, "learning_rate": 6.87576599567164e-05, "loss": 0.516, "step": 4382 }, { "epoch": 1.2267002518891688, "grad_norm": 0.23052936858197257, "learning_rate": 6.874336003844241e-05, "loss": 0.5109, "step": 4383 }, { "epoch": 1.226980128743353, "grad_norm": 0.22102925274263935, "learning_rate": 6.872905833616493e-05, "loss": 0.5198, "step": 4384 }, { "epoch": 1.2272600055975371, "grad_norm": 0.22367362492602536, "learning_rate": 6.871475485124518e-05, "loss": 0.514, "step": 4385 }, { "epoch": 1.2275398824517212, "grad_norm": 0.23236912743197236, "learning_rate": 6.870044958504461e-05, "loss": 0.5114, "step": 4386 }, { "epoch": 1.2278197593059055, "grad_norm": 0.22858973761660165, "learning_rate": 6.868614253892478e-05, "loss": 0.4928, "step": 4387 }, { "epoch": 1.2280996361600895, "grad_norm": 0.23860342464787931, "learning_rate": 6.867183371424744e-05, "loss": 0.5112, "step": 4388 }, { "epoch": 1.2283795130142736, "grad_norm": 0.22155547850155127, "learning_rate": 6.86575231123745e-05, "loss": 0.512, "step": 4389 }, { "epoch": 1.228659389868458, "grad_norm": 0.22476076748312143, "learning_rate": 6.864321073466809e-05, "loss": 0.5067, "step": 4390 }, { "epoch": 1.228939266722642, "grad_norm": 0.23491372461400523, "learning_rate": 6.862889658249044e-05, "loss": 0.5079, "step": 4391 }, { "epoch": 1.2292191435768263, "grad_norm": 0.22948361156194794, "learning_rate": 6.861458065720399e-05, "loss": 0.4997, "step": 4392 }, { "epoch": 1.2294990204310103, "grad_norm": 0.23391799870148752, "learning_rate": 6.860026296017132e-05, "loss": 0.5264, "step": 4393 }, { "epoch": 1.2297788972851946, "grad_norm": 0.22897848051188208, "learning_rate": 6.858594349275522e-05, "loss": 0.5161, "step": 4394 }, { "epoch": 1.2300587741393787, "grad_norm": 0.24500574082564155, "learning_rate": 6.85716222563186e-05, "loss": 0.5172, "step": 4395 }, { "epoch": 1.2303386509935628, "grad_norm": 0.2269097252300049, "learning_rate": 6.855729925222462e-05, "loss": 0.5149, "step": 4396 }, { "epoch": 1.230618527847747, "grad_norm": 0.2307053073066549, "learning_rate": 6.854297448183647e-05, "loss": 0.4934, "step": 4397 }, { "epoch": 1.2308984047019311, "grad_norm": 0.2339722588104781, "learning_rate": 6.852864794651765e-05, "loss": 0.517, "step": 4398 }, { "epoch": 1.2311782815561152, "grad_norm": 0.22895011510671143, "learning_rate": 6.851431964763174e-05, "loss": 0.5015, "step": 4399 }, { "epoch": 1.2314581584102995, "grad_norm": 0.2246427196761417, "learning_rate": 6.849998958654252e-05, "loss": 0.526, "step": 4400 }, { "epoch": 1.2317380352644836, "grad_norm": 0.22561048513210016, "learning_rate": 6.848565776461394e-05, "loss": 0.5124, "step": 4401 }, { "epoch": 1.2320179121186678, "grad_norm": 0.21326768138178004, "learning_rate": 6.847132418321012e-05, "loss": 0.4889, "step": 4402 }, { "epoch": 1.232297788972852, "grad_norm": 0.22435921932213576, "learning_rate": 6.845698884369529e-05, "loss": 0.5111, "step": 4403 }, { "epoch": 1.2325776658270362, "grad_norm": 0.2322836935784332, "learning_rate": 6.844265174743396e-05, "loss": 0.5049, "step": 4404 }, { "epoch": 1.2328575426812203, "grad_norm": 0.2274950055276708, "learning_rate": 6.842831289579071e-05, "loss": 0.5006, "step": 4405 }, { "epoch": 1.2331374195354043, "grad_norm": 0.22198212014618518, "learning_rate": 6.841397229013032e-05, "loss": 0.4784, "step": 4406 }, { "epoch": 1.2334172963895886, "grad_norm": 0.2590475668793141, "learning_rate": 6.839962993181775e-05, "loss": 0.5087, "step": 4407 }, { "epoch": 1.2336971732437727, "grad_norm": 0.22235082191551603, "learning_rate": 6.83852858222181e-05, "loss": 0.5102, "step": 4408 }, { "epoch": 1.233977050097957, "grad_norm": 0.22191258055780216, "learning_rate": 6.837093996269665e-05, "loss": 0.4893, "step": 4409 }, { "epoch": 1.234256926952141, "grad_norm": 0.22963042675137446, "learning_rate": 6.835659235461884e-05, "loss": 0.4991, "step": 4410 }, { "epoch": 1.2345368038063251, "grad_norm": 0.2321101229570174, "learning_rate": 6.83422429993503e-05, "loss": 0.5069, "step": 4411 }, { "epoch": 1.2348166806605094, "grad_norm": 0.2198828652135261, "learning_rate": 6.832789189825681e-05, "loss": 0.4802, "step": 4412 }, { "epoch": 1.2350965575146935, "grad_norm": 0.2301212143679133, "learning_rate": 6.831353905270434e-05, "loss": 0.5047, "step": 4413 }, { "epoch": 1.2353764343688778, "grad_norm": 0.228381496483737, "learning_rate": 6.829918446405894e-05, "loss": 0.5093, "step": 4414 }, { "epoch": 1.2356563112230619, "grad_norm": 0.23159916571391817, "learning_rate": 6.828482813368692e-05, "loss": 0.5193, "step": 4415 }, { "epoch": 1.235936188077246, "grad_norm": 0.22818500716053816, "learning_rate": 6.827047006295473e-05, "loss": 0.5011, "step": 4416 }, { "epoch": 1.2362160649314302, "grad_norm": 0.2314663773681912, "learning_rate": 6.825611025322898e-05, "loss": 0.5018, "step": 4417 }, { "epoch": 1.2364959417856143, "grad_norm": 0.2173769415599354, "learning_rate": 6.824174870587643e-05, "loss": 0.4738, "step": 4418 }, { "epoch": 1.2367758186397986, "grad_norm": 0.23004061754008617, "learning_rate": 6.822738542226402e-05, "loss": 0.5145, "step": 4419 }, { "epoch": 1.2370556954939826, "grad_norm": 0.2144677896012713, "learning_rate": 6.821302040375886e-05, "loss": 0.5236, "step": 4420 }, { "epoch": 1.2373355723481667, "grad_norm": 0.22778656766316274, "learning_rate": 6.819865365172824e-05, "loss": 0.5095, "step": 4421 }, { "epoch": 1.237615449202351, "grad_norm": 0.22309038235696235, "learning_rate": 6.818428516753959e-05, "loss": 0.4989, "step": 4422 }, { "epoch": 1.237895326056535, "grad_norm": 0.2215635131690984, "learning_rate": 6.816991495256047e-05, "loss": 0.4813, "step": 4423 }, { "epoch": 1.2381752029107194, "grad_norm": 0.2230856565953154, "learning_rate": 6.81555430081587e-05, "loss": 0.5037, "step": 4424 }, { "epoch": 1.2384550797649034, "grad_norm": 0.23180567720413758, "learning_rate": 6.814116933570217e-05, "loss": 0.5368, "step": 4425 }, { "epoch": 1.2387349566190875, "grad_norm": 0.21018030181583042, "learning_rate": 6.812679393655898e-05, "loss": 0.492, "step": 4426 }, { "epoch": 1.2390148334732718, "grad_norm": 0.21309149281973794, "learning_rate": 6.811241681209741e-05, "loss": 0.53, "step": 4427 }, { "epoch": 1.2392947103274559, "grad_norm": 0.2260166789213048, "learning_rate": 6.809803796368588e-05, "loss": 0.5033, "step": 4428 }, { "epoch": 1.2395745871816402, "grad_norm": 0.23952326573956984, "learning_rate": 6.808365739269294e-05, "loss": 0.5007, "step": 4429 }, { "epoch": 1.2398544640358242, "grad_norm": 0.21874113451870314, "learning_rate": 6.806927510048738e-05, "loss": 0.4997, "step": 4430 }, { "epoch": 1.2401343408900085, "grad_norm": 0.21426388745862024, "learning_rate": 6.805489108843813e-05, "loss": 0.5068, "step": 4431 }, { "epoch": 1.2404142177441926, "grad_norm": 0.2215734126920041, "learning_rate": 6.80405053579142e-05, "loss": 0.5175, "step": 4432 }, { "epoch": 1.2406940945983767, "grad_norm": 0.23137841249112412, "learning_rate": 6.802611791028489e-05, "loss": 0.5073, "step": 4433 }, { "epoch": 1.240973971452561, "grad_norm": 0.2482104985235173, "learning_rate": 6.801172874691959e-05, "loss": 0.5158, "step": 4434 }, { "epoch": 1.241253848306745, "grad_norm": 0.2167572132339733, "learning_rate": 6.799733786918785e-05, "loss": 0.504, "step": 4435 }, { "epoch": 1.241533725160929, "grad_norm": 0.21886396347628806, "learning_rate": 6.798294527845943e-05, "loss": 0.4887, "step": 4436 }, { "epoch": 1.2418136020151134, "grad_norm": 0.22523846948020232, "learning_rate": 6.79685509761042e-05, "loss": 0.4918, "step": 4437 }, { "epoch": 1.2420934788692974, "grad_norm": 0.22338648580035944, "learning_rate": 6.795415496349224e-05, "loss": 0.4976, "step": 4438 }, { "epoch": 1.2423733557234817, "grad_norm": 0.21679938045963373, "learning_rate": 6.793975724199377e-05, "loss": 0.4948, "step": 4439 }, { "epoch": 1.2426532325776658, "grad_norm": 0.24280504081104756, "learning_rate": 6.792535781297917e-05, "loss": 0.4916, "step": 4440 }, { "epoch": 1.24293310943185, "grad_norm": 0.23459037292940543, "learning_rate": 6.791095667781897e-05, "loss": 0.4941, "step": 4441 }, { "epoch": 1.2432129862860342, "grad_norm": 0.22356252534979063, "learning_rate": 6.78965538378839e-05, "loss": 0.5066, "step": 4442 }, { "epoch": 1.2434928631402182, "grad_norm": 0.22891790303476722, "learning_rate": 6.78821492945448e-05, "loss": 0.51, "step": 4443 }, { "epoch": 1.2437727399944025, "grad_norm": 0.2173379812467162, "learning_rate": 6.786774304917272e-05, "loss": 0.4871, "step": 4444 }, { "epoch": 1.2440526168485866, "grad_norm": 0.2212811210753908, "learning_rate": 6.785333510313886e-05, "loss": 0.5069, "step": 4445 }, { "epoch": 1.2443324937027707, "grad_norm": 0.21885501957202622, "learning_rate": 6.783892545781456e-05, "loss": 0.5033, "step": 4446 }, { "epoch": 1.244612370556955, "grad_norm": 0.22307423176941266, "learning_rate": 6.782451411457137e-05, "loss": 0.4953, "step": 4447 }, { "epoch": 1.244892247411139, "grad_norm": 0.23209156643527004, "learning_rate": 6.781010107478094e-05, "loss": 0.5105, "step": 4448 }, { "epoch": 1.2451721242653233, "grad_norm": 0.23180375537378448, "learning_rate": 6.779568633981514e-05, "loss": 0.5034, "step": 4449 }, { "epoch": 1.2454520011195074, "grad_norm": 0.22218934128045695, "learning_rate": 6.778126991104594e-05, "loss": 0.5122, "step": 4450 }, { "epoch": 1.2457318779736917, "grad_norm": 0.225292661771242, "learning_rate": 6.776685178984551e-05, "loss": 0.5086, "step": 4451 }, { "epoch": 1.2460117548278757, "grad_norm": 0.23538128851183887, "learning_rate": 6.775243197758619e-05, "loss": 0.4958, "step": 4452 }, { "epoch": 1.2462916316820598, "grad_norm": 0.2236835830138261, "learning_rate": 6.773801047564045e-05, "loss": 0.4944, "step": 4453 }, { "epoch": 1.246571508536244, "grad_norm": 0.22707190607036884, "learning_rate": 6.772358728538095e-05, "loss": 0.4943, "step": 4454 }, { "epoch": 1.2468513853904282, "grad_norm": 0.2363436807243141, "learning_rate": 6.770916240818048e-05, "loss": 0.5219, "step": 4455 }, { "epoch": 1.2471312622446125, "grad_norm": 0.22731878144830908, "learning_rate": 6.769473584541203e-05, "loss": 0.5069, "step": 4456 }, { "epoch": 1.2474111390987965, "grad_norm": 0.22436996842966697, "learning_rate": 6.768030759844872e-05, "loss": 0.4823, "step": 4457 }, { "epoch": 1.2476910159529806, "grad_norm": 0.2205881881487971, "learning_rate": 6.766587766866386e-05, "loss": 0.4802, "step": 4458 }, { "epoch": 1.2479708928071649, "grad_norm": 0.22828588267098623, "learning_rate": 6.765144605743084e-05, "loss": 0.5087, "step": 4459 }, { "epoch": 1.248250769661349, "grad_norm": 0.2328653388828949, "learning_rate": 6.763701276612333e-05, "loss": 0.5154, "step": 4460 }, { "epoch": 1.2485306465155332, "grad_norm": 0.23316643588287533, "learning_rate": 6.762257779611505e-05, "loss": 0.5068, "step": 4461 }, { "epoch": 1.2488105233697173, "grad_norm": 0.22243507890188366, "learning_rate": 6.760814114877995e-05, "loss": 0.4869, "step": 4462 }, { "epoch": 1.2490904002239014, "grad_norm": 0.22383762587784456, "learning_rate": 6.759370282549213e-05, "loss": 0.4923, "step": 4463 }, { "epoch": 1.2493702770780857, "grad_norm": 0.21883428619120182, "learning_rate": 6.757926282762583e-05, "loss": 0.5009, "step": 4464 }, { "epoch": 1.2496501539322697, "grad_norm": 0.2225919049797821, "learning_rate": 6.756482115655545e-05, "loss": 0.5179, "step": 4465 }, { "epoch": 1.249930030786454, "grad_norm": 0.2600433388240312, "learning_rate": 6.755037781365557e-05, "loss": 0.5224, "step": 4466 }, { "epoch": 1.250209907640638, "grad_norm": 0.2270385776196222, "learning_rate": 6.75359328003009e-05, "loss": 0.4987, "step": 4467 }, { "epoch": 1.2504897844948224, "grad_norm": 0.22286683184484726, "learning_rate": 6.752148611786633e-05, "loss": 0.4952, "step": 4468 }, { "epoch": 1.2507696613490065, "grad_norm": 0.23560808843530517, "learning_rate": 6.750703776772691e-05, "loss": 0.5111, "step": 4469 }, { "epoch": 1.2510495382031905, "grad_norm": 0.21047351981030982, "learning_rate": 6.749258775125783e-05, "loss": 0.51, "step": 4470 }, { "epoch": 1.2513294150573748, "grad_norm": 0.22618105344721426, "learning_rate": 6.747813606983446e-05, "loss": 0.5043, "step": 4471 }, { "epoch": 1.251609291911559, "grad_norm": 0.22894601947232676, "learning_rate": 6.74636827248323e-05, "loss": 0.5096, "step": 4472 }, { "epoch": 1.251889168765743, "grad_norm": 0.2336288045912679, "learning_rate": 6.744922771762705e-05, "loss": 0.5251, "step": 4473 }, { "epoch": 1.2521690456199273, "grad_norm": 0.2187782199417927, "learning_rate": 6.743477104959455e-05, "loss": 0.5028, "step": 4474 }, { "epoch": 1.2524489224741113, "grad_norm": 0.21555620580526136, "learning_rate": 6.742031272211078e-05, "loss": 0.4703, "step": 4475 }, { "epoch": 1.2527287993282956, "grad_norm": 0.21785022135296023, "learning_rate": 6.74058527365519e-05, "loss": 0.4953, "step": 4476 }, { "epoch": 1.2530086761824797, "grad_norm": 0.22264608063454083, "learning_rate": 6.73913910942942e-05, "loss": 0.5011, "step": 4477 }, { "epoch": 1.253288553036664, "grad_norm": 0.22298368111761738, "learning_rate": 6.737692779671417e-05, "loss": 0.4983, "step": 4478 }, { "epoch": 1.253568429890848, "grad_norm": 0.21456946013095476, "learning_rate": 6.736246284518843e-05, "loss": 0.5151, "step": 4479 }, { "epoch": 1.2538483067450321, "grad_norm": 0.21229148838518647, "learning_rate": 6.734799624109376e-05, "loss": 0.4978, "step": 4480 }, { "epoch": 1.2541281835992164, "grad_norm": 0.22282362281730558, "learning_rate": 6.733352798580708e-05, "loss": 0.4751, "step": 4481 }, { "epoch": 1.2544080604534005, "grad_norm": 0.2292642849466078, "learning_rate": 6.731905808070551e-05, "loss": 0.497, "step": 4482 }, { "epoch": 1.2546879373075845, "grad_norm": 0.21463230053034607, "learning_rate": 6.73045865271663e-05, "loss": 0.5232, "step": 4483 }, { "epoch": 1.2549678141617688, "grad_norm": 0.2272707335182323, "learning_rate": 6.729011332656685e-05, "loss": 0.5037, "step": 4484 }, { "epoch": 1.255247691015953, "grad_norm": 0.2315495126259172, "learning_rate": 6.727563848028478e-05, "loss": 0.5226, "step": 4485 }, { "epoch": 1.2555275678701372, "grad_norm": 0.22264526390434827, "learning_rate": 6.726116198969773e-05, "loss": 0.4986, "step": 4486 }, { "epoch": 1.2558074447243213, "grad_norm": 0.21724436342094056, "learning_rate": 6.724668385618362e-05, "loss": 0.4771, "step": 4487 }, { "epoch": 1.2560873215785056, "grad_norm": 0.22695688304661193, "learning_rate": 6.72322040811205e-05, "loss": 0.4797, "step": 4488 }, { "epoch": 1.2563671984326896, "grad_norm": 0.223153852616965, "learning_rate": 6.721772266588653e-05, "loss": 0.493, "step": 4489 }, { "epoch": 1.2566470752868737, "grad_norm": 0.2287880239038558, "learning_rate": 6.72032396118601e-05, "loss": 0.5151, "step": 4490 }, { "epoch": 1.256926952141058, "grad_norm": 0.22493308899385148, "learning_rate": 6.718875492041968e-05, "loss": 0.4996, "step": 4491 }, { "epoch": 1.257206828995242, "grad_norm": 0.23090239774892557, "learning_rate": 6.717426859294395e-05, "loss": 0.5154, "step": 4492 }, { "epoch": 1.2574867058494261, "grad_norm": 0.2772282550858744, "learning_rate": 6.715978063081174e-05, "loss": 0.5094, "step": 4493 }, { "epoch": 1.2577665827036104, "grad_norm": 0.23126063717392392, "learning_rate": 6.7145291035402e-05, "loss": 0.4935, "step": 4494 }, { "epoch": 1.2580464595577947, "grad_norm": 0.23362686207403668, "learning_rate": 6.713079980809385e-05, "loss": 0.509, "step": 4495 }, { "epoch": 1.2583263364119788, "grad_norm": 0.23498337073517991, "learning_rate": 6.71163069502666e-05, "loss": 0.5124, "step": 4496 }, { "epoch": 1.2586062132661628, "grad_norm": 0.21308792444705082, "learning_rate": 6.710181246329965e-05, "loss": 0.4849, "step": 4497 }, { "epoch": 1.2588860901203471, "grad_norm": 0.2247191896077761, "learning_rate": 6.708731634857263e-05, "loss": 0.5158, "step": 4498 }, { "epoch": 1.2591659669745312, "grad_norm": 0.21883638501941233, "learning_rate": 6.707281860746529e-05, "loss": 0.4787, "step": 4499 }, { "epoch": 1.2594458438287153, "grad_norm": 0.2268980946026509, "learning_rate": 6.705831924135749e-05, "loss": 0.5036, "step": 4500 }, { "epoch": 1.2597257206828996, "grad_norm": 0.23025472531366095, "learning_rate": 6.704381825162934e-05, "loss": 0.4776, "step": 4501 }, { "epoch": 1.2600055975370836, "grad_norm": 0.2262865710994982, "learning_rate": 6.702931563966101e-05, "loss": 0.4971, "step": 4502 }, { "epoch": 1.260285474391268, "grad_norm": 0.2288895381282692, "learning_rate": 6.701481140683291e-05, "loss": 0.5048, "step": 4503 }, { "epoch": 1.260565351245452, "grad_norm": 0.23004911632596733, "learning_rate": 6.700030555452552e-05, "loss": 0.5033, "step": 4504 }, { "epoch": 1.2608452280996363, "grad_norm": 0.22495432634208204, "learning_rate": 6.698579808411954e-05, "loss": 0.5096, "step": 4505 }, { "epoch": 1.2611251049538204, "grad_norm": 0.23058737312728403, "learning_rate": 6.69712889969958e-05, "loss": 0.5085, "step": 4506 }, { "epoch": 1.2614049818080044, "grad_norm": 0.22932014941072562, "learning_rate": 6.695677829453524e-05, "loss": 0.5079, "step": 4507 }, { "epoch": 1.2616848586621887, "grad_norm": 0.21917317206185066, "learning_rate": 6.694226597811906e-05, "loss": 0.5295, "step": 4508 }, { "epoch": 1.2619647355163728, "grad_norm": 0.2212541162421701, "learning_rate": 6.692775204912852e-05, "loss": 0.4812, "step": 4509 }, { "epoch": 1.2622446123705569, "grad_norm": 0.2227090718976145, "learning_rate": 6.691323650894504e-05, "loss": 0.4831, "step": 4510 }, { "epoch": 1.2625244892247411, "grad_norm": 0.2303567506652895, "learning_rate": 6.689871935895026e-05, "loss": 0.5286, "step": 4511 }, { "epoch": 1.2628043660789252, "grad_norm": 0.23590189093245803, "learning_rate": 6.688420060052593e-05, "loss": 0.5132, "step": 4512 }, { "epoch": 1.2630842429331095, "grad_norm": 0.22756965231482607, "learning_rate": 6.686968023505393e-05, "loss": 0.4943, "step": 4513 }, { "epoch": 1.2633641197872936, "grad_norm": 0.2261027838051452, "learning_rate": 6.68551582639163e-05, "loss": 0.5154, "step": 4514 }, { "epoch": 1.2636439966414779, "grad_norm": 0.22471759104126507, "learning_rate": 6.684063468849527e-05, "loss": 0.4975, "step": 4515 }, { "epoch": 1.263923873495662, "grad_norm": 0.22240388619876472, "learning_rate": 6.682610951017323e-05, "loss": 0.4935, "step": 4516 }, { "epoch": 1.264203750349846, "grad_norm": 0.22414050150156797, "learning_rate": 6.681158273033265e-05, "loss": 0.5043, "step": 4517 }, { "epoch": 1.2644836272040303, "grad_norm": 0.21799138634781873, "learning_rate": 6.679705435035622e-05, "loss": 0.5051, "step": 4518 }, { "epoch": 1.2647635040582144, "grad_norm": 0.24016586750733343, "learning_rate": 6.678252437162677e-05, "loss": 0.4907, "step": 4519 }, { "epoch": 1.2650433809123984, "grad_norm": 0.22503684929739645, "learning_rate": 6.676799279552723e-05, "loss": 0.5082, "step": 4520 }, { "epoch": 1.2653232577665827, "grad_norm": 0.2268392453680348, "learning_rate": 6.675345962344078e-05, "loss": 0.4803, "step": 4521 }, { "epoch": 1.2656031346207668, "grad_norm": 0.22869329063653454, "learning_rate": 6.673892485675066e-05, "loss": 0.4835, "step": 4522 }, { "epoch": 1.265883011474951, "grad_norm": 0.22282360543506255, "learning_rate": 6.67243884968403e-05, "loss": 0.49, "step": 4523 }, { "epoch": 1.2661628883291351, "grad_norm": 0.2207826807449118, "learning_rate": 6.670985054509326e-05, "loss": 0.4784, "step": 4524 }, { "epoch": 1.2664427651833194, "grad_norm": 0.22161549480639797, "learning_rate": 6.66953110028933e-05, "loss": 0.5048, "step": 4525 }, { "epoch": 1.2667226420375035, "grad_norm": 0.2237115226263842, "learning_rate": 6.66807698716243e-05, "loss": 0.4922, "step": 4526 }, { "epoch": 1.2670025188916876, "grad_norm": 0.2307389487534476, "learning_rate": 6.66662271526703e-05, "loss": 0.4964, "step": 4527 }, { "epoch": 1.2672823957458719, "grad_norm": 0.2276555245827964, "learning_rate": 6.665168284741545e-05, "loss": 0.4903, "step": 4528 }, { "epoch": 1.267562272600056, "grad_norm": 0.2272755431780669, "learning_rate": 6.663713695724412e-05, "loss": 0.4973, "step": 4529 }, { "epoch": 1.26784214945424, "grad_norm": 0.23393735854339515, "learning_rate": 6.66225894835408e-05, "loss": 0.487, "step": 4530 }, { "epoch": 1.2681220263084243, "grad_norm": 0.22483424192193738, "learning_rate": 6.660804042769008e-05, "loss": 0.5087, "step": 4531 }, { "epoch": 1.2684019031626086, "grad_norm": 0.231993026857539, "learning_rate": 6.659348979107679e-05, "loss": 0.4966, "step": 4532 }, { "epoch": 1.2686817800167927, "grad_norm": 0.225707753150604, "learning_rate": 6.657893757508583e-05, "loss": 0.5229, "step": 4533 }, { "epoch": 1.2689616568709767, "grad_norm": 0.22251313029927405, "learning_rate": 6.656438378110234e-05, "loss": 0.4667, "step": 4534 }, { "epoch": 1.269241533725161, "grad_norm": 0.22719061758486747, "learning_rate": 6.654982841051151e-05, "loss": 0.4864, "step": 4535 }, { "epoch": 1.269521410579345, "grad_norm": 0.22339650100648326, "learning_rate": 6.653527146469877e-05, "loss": 0.5132, "step": 4536 }, { "epoch": 1.2698012874335292, "grad_norm": 0.22253956314267231, "learning_rate": 6.652071294504963e-05, "loss": 0.4971, "step": 4537 }, { "epoch": 1.2700811642877134, "grad_norm": 0.2275811964542699, "learning_rate": 6.650615285294977e-05, "loss": 0.4942, "step": 4538 }, { "epoch": 1.2703610411418975, "grad_norm": 0.22768182303817025, "learning_rate": 6.649159118978506e-05, "loss": 0.4759, "step": 4539 }, { "epoch": 1.2706409179960816, "grad_norm": 0.2283229731914254, "learning_rate": 6.647702795694146e-05, "loss": 0.5152, "step": 4540 }, { "epoch": 1.2709207948502659, "grad_norm": 0.2964667401302082, "learning_rate": 6.64624631558051e-05, "loss": 0.4978, "step": 4541 }, { "epoch": 1.2712006717044502, "grad_norm": 0.22264768078734123, "learning_rate": 6.64478967877623e-05, "loss": 0.4911, "step": 4542 }, { "epoch": 1.2714805485586342, "grad_norm": 0.2262480614267082, "learning_rate": 6.643332885419949e-05, "loss": 0.5077, "step": 4543 }, { "epoch": 1.2717604254128183, "grad_norm": 0.22698807994529827, "learning_rate": 6.641875935650324e-05, "loss": 0.5027, "step": 4544 }, { "epoch": 1.2720403022670026, "grad_norm": 0.22790119660781558, "learning_rate": 6.640418829606026e-05, "loss": 0.5058, "step": 4545 }, { "epoch": 1.2723201791211867, "grad_norm": 0.23398749484193088, "learning_rate": 6.638961567425747e-05, "loss": 0.5208, "step": 4546 }, { "epoch": 1.2726000559753707, "grad_norm": 0.2129361266639578, "learning_rate": 6.637504149248191e-05, "loss": 0.5143, "step": 4547 }, { "epoch": 1.272879932829555, "grad_norm": 0.2217325763753521, "learning_rate": 6.636046575212072e-05, "loss": 0.4922, "step": 4548 }, { "epoch": 1.273159809683739, "grad_norm": 0.21700462244893803, "learning_rate": 6.634588845456123e-05, "loss": 0.4788, "step": 4549 }, { "epoch": 1.2734396865379234, "grad_norm": 0.24376781045059606, "learning_rate": 6.633130960119092e-05, "loss": 0.4911, "step": 4550 }, { "epoch": 1.2737195633921075, "grad_norm": 0.23127889698525972, "learning_rate": 6.631672919339743e-05, "loss": 0.5012, "step": 4551 }, { "epoch": 1.2739994402462917, "grad_norm": 0.22248802603051918, "learning_rate": 6.630214723256853e-05, "loss": 0.4832, "step": 4552 }, { "epoch": 1.2742793171004758, "grad_norm": 0.22005460542819927, "learning_rate": 6.628756372009213e-05, "loss": 0.4931, "step": 4553 }, { "epoch": 1.2745591939546599, "grad_norm": 0.2314092087083626, "learning_rate": 6.627297865735629e-05, "loss": 0.4915, "step": 4554 }, { "epoch": 1.2748390708088442, "grad_norm": 0.2247683211760867, "learning_rate": 6.625839204574925e-05, "loss": 0.4816, "step": 4555 }, { "epoch": 1.2751189476630282, "grad_norm": 0.21776217439031326, "learning_rate": 6.624380388665934e-05, "loss": 0.5035, "step": 4556 }, { "epoch": 1.2753988245172123, "grad_norm": 0.2209502526007058, "learning_rate": 6.622921418147509e-05, "loss": 0.497, "step": 4557 }, { "epoch": 1.2756787013713966, "grad_norm": 0.23819498739874898, "learning_rate": 6.621462293158514e-05, "loss": 0.5042, "step": 4558 }, { "epoch": 1.2759585782255807, "grad_norm": 0.23453751221019697, "learning_rate": 6.620003013837832e-05, "loss": 0.5094, "step": 4559 }, { "epoch": 1.276238455079765, "grad_norm": 0.23350259985026242, "learning_rate": 6.618543580324355e-05, "loss": 0.4976, "step": 4560 }, { "epoch": 1.276518331933949, "grad_norm": 0.21956856279332757, "learning_rate": 6.617083992756994e-05, "loss": 0.4983, "step": 4561 }, { "epoch": 1.2767982087881333, "grad_norm": 0.21809874334246912, "learning_rate": 6.615624251274676e-05, "loss": 0.508, "step": 4562 }, { "epoch": 1.2770780856423174, "grad_norm": 0.2255808899958128, "learning_rate": 6.614164356016335e-05, "loss": 0.5093, "step": 4563 }, { "epoch": 1.2773579624965015, "grad_norm": 0.21786145442604976, "learning_rate": 6.612704307120928e-05, "loss": 0.4797, "step": 4564 }, { "epoch": 1.2776378393506858, "grad_norm": 0.22242215099322205, "learning_rate": 6.611244104727422e-05, "loss": 0.493, "step": 4565 }, { "epoch": 1.2779177162048698, "grad_norm": 0.23106558933286547, "learning_rate": 6.609783748974802e-05, "loss": 0.4773, "step": 4566 }, { "epoch": 1.278197593059054, "grad_norm": 0.21779580434800108, "learning_rate": 6.608323240002061e-05, "loss": 0.4943, "step": 4567 }, { "epoch": 1.2784774699132382, "grad_norm": 0.22340662539705994, "learning_rate": 6.606862577948214e-05, "loss": 0.5164, "step": 4568 }, { "epoch": 1.2787573467674223, "grad_norm": 0.215103577951189, "learning_rate": 6.60540176295229e-05, "loss": 0.5019, "step": 4569 }, { "epoch": 1.2790372236216065, "grad_norm": 0.2253343365947351, "learning_rate": 6.603940795153325e-05, "loss": 0.5093, "step": 4570 }, { "epoch": 1.2793171004757906, "grad_norm": 0.22132564801684748, "learning_rate": 6.602479674690378e-05, "loss": 0.5178, "step": 4571 }, { "epoch": 1.279596977329975, "grad_norm": 0.22016093350012908, "learning_rate": 6.60101840170252e-05, "loss": 0.4912, "step": 4572 }, { "epoch": 1.279876854184159, "grad_norm": 0.2145943732816903, "learning_rate": 6.599556976328833e-05, "loss": 0.4993, "step": 4573 }, { "epoch": 1.280156731038343, "grad_norm": 0.23687774823603283, "learning_rate": 6.598095398708417e-05, "loss": 0.4875, "step": 4574 }, { "epoch": 1.2804366078925273, "grad_norm": 0.21423686934006037, "learning_rate": 6.596633668980388e-05, "loss": 0.502, "step": 4575 }, { "epoch": 1.2807164847467114, "grad_norm": 0.2123447415833241, "learning_rate": 6.595171787283871e-05, "loss": 0.4921, "step": 4576 }, { "epoch": 1.2809963616008955, "grad_norm": 0.2224877807022716, "learning_rate": 6.593709753758013e-05, "loss": 0.5066, "step": 4577 }, { "epoch": 1.2812762384550798, "grad_norm": 0.23941024051589668, "learning_rate": 6.592247568541967e-05, "loss": 0.503, "step": 4578 }, { "epoch": 1.281556115309264, "grad_norm": 0.2304541410031591, "learning_rate": 6.590785231774907e-05, "loss": 0.5054, "step": 4579 }, { "epoch": 1.2818359921634481, "grad_norm": 0.23324628828292474, "learning_rate": 6.589322743596018e-05, "loss": 0.5123, "step": 4580 }, { "epoch": 1.2821158690176322, "grad_norm": 0.21602298932667466, "learning_rate": 6.587860104144499e-05, "loss": 0.4896, "step": 4581 }, { "epoch": 1.2823957458718165, "grad_norm": 0.22259542764176052, "learning_rate": 6.586397313559568e-05, "loss": 0.5054, "step": 4582 }, { "epoch": 1.2826756227260006, "grad_norm": 0.22714730682310053, "learning_rate": 6.584934371980453e-05, "loss": 0.5146, "step": 4583 }, { "epoch": 1.2829554995801846, "grad_norm": 0.222548577851698, "learning_rate": 6.583471279546398e-05, "loss": 0.5052, "step": 4584 }, { "epoch": 1.283235376434369, "grad_norm": 0.21497085516350536, "learning_rate": 6.582008036396658e-05, "loss": 0.4959, "step": 4585 }, { "epoch": 1.283515253288553, "grad_norm": 0.22752922366939657, "learning_rate": 6.580544642670509e-05, "loss": 0.4985, "step": 4586 }, { "epoch": 1.2837951301427373, "grad_norm": 0.21605540825063918, "learning_rate": 6.579081098507236e-05, "loss": 0.4903, "step": 4587 }, { "epoch": 1.2840750069969213, "grad_norm": 0.21362644909682582, "learning_rate": 6.57761740404614e-05, "loss": 0.5166, "step": 4588 }, { "epoch": 1.2843548838511056, "grad_norm": 0.23245160827101932, "learning_rate": 6.576153559426537e-05, "loss": 0.5227, "step": 4589 }, { "epoch": 1.2846347607052897, "grad_norm": 0.22673442602767166, "learning_rate": 6.574689564787756e-05, "loss": 0.5083, "step": 4590 }, { "epoch": 1.2849146375594738, "grad_norm": 0.2201733292431062, "learning_rate": 6.57322542026914e-05, "loss": 0.5009, "step": 4591 }, { "epoch": 1.285194514413658, "grad_norm": 0.2218832645586175, "learning_rate": 6.571761126010049e-05, "loss": 0.5012, "step": 4592 }, { "epoch": 1.2854743912678421, "grad_norm": 0.22575243646899995, "learning_rate": 6.570296682149854e-05, "loss": 0.5004, "step": 4593 }, { "epoch": 1.2857542681220262, "grad_norm": 0.22551316113104805, "learning_rate": 6.568832088827941e-05, "loss": 0.5005, "step": 4594 }, { "epoch": 1.2860341449762105, "grad_norm": 0.2238456767561351, "learning_rate": 6.567367346183713e-05, "loss": 0.512, "step": 4595 }, { "epoch": 1.2863140218303946, "grad_norm": 0.2269691834643914, "learning_rate": 6.565902454356583e-05, "loss": 0.5303, "step": 4596 }, { "epoch": 1.2865938986845789, "grad_norm": 0.22855335149517753, "learning_rate": 6.564437413485981e-05, "loss": 0.5176, "step": 4597 }, { "epoch": 1.286873775538763, "grad_norm": 0.21678142607897863, "learning_rate": 6.56297222371135e-05, "loss": 0.5206, "step": 4598 }, { "epoch": 1.2871536523929472, "grad_norm": 0.22658488233250337, "learning_rate": 6.561506885172149e-05, "loss": 0.4901, "step": 4599 }, { "epoch": 1.2874335292471313, "grad_norm": 0.22615414039556447, "learning_rate": 6.560041398007847e-05, "loss": 0.4823, "step": 4600 }, { "epoch": 1.2877134061013153, "grad_norm": 0.2213057307234734, "learning_rate": 6.558575762357933e-05, "loss": 0.5024, "step": 4601 }, { "epoch": 1.2879932829554996, "grad_norm": 0.2174894780415531, "learning_rate": 6.557109978361904e-05, "loss": 0.4979, "step": 4602 }, { "epoch": 1.2882731598096837, "grad_norm": 0.21131699835689913, "learning_rate": 6.555644046159277e-05, "loss": 0.4867, "step": 4603 }, { "epoch": 1.2885530366638678, "grad_norm": 0.22554621064216013, "learning_rate": 6.554177965889578e-05, "loss": 0.5123, "step": 4604 }, { "epoch": 1.288832913518052, "grad_norm": 0.2176737111156412, "learning_rate": 6.552711737692351e-05, "loss": 0.5085, "step": 4605 }, { "epoch": 1.2891127903722361, "grad_norm": 0.2155271472547195, "learning_rate": 6.551245361707152e-05, "loss": 0.4895, "step": 4606 }, { "epoch": 1.2893926672264204, "grad_norm": 0.21550969305176285, "learning_rate": 6.54977883807355e-05, "loss": 0.471, "step": 4607 }, { "epoch": 1.2896725440806045, "grad_norm": 0.22382560786400904, "learning_rate": 6.548312166931131e-05, "loss": 0.4848, "step": 4608 }, { "epoch": 1.2899524209347888, "grad_norm": 0.2287736043856429, "learning_rate": 6.546845348419494e-05, "loss": 0.5113, "step": 4609 }, { "epoch": 1.2902322977889729, "grad_norm": 0.22632328103882504, "learning_rate": 6.545378382678252e-05, "loss": 0.487, "step": 4610 }, { "epoch": 1.290512174643157, "grad_norm": 0.224941686173709, "learning_rate": 6.54391126984703e-05, "loss": 0.5142, "step": 4611 }, { "epoch": 1.2907920514973412, "grad_norm": 0.2271752358038953, "learning_rate": 6.542444010065468e-05, "loss": 0.514, "step": 4612 }, { "epoch": 1.2910719283515253, "grad_norm": 0.22085345432306197, "learning_rate": 6.540976603473223e-05, "loss": 0.5007, "step": 4613 }, { "epoch": 1.2913518052057094, "grad_norm": 0.22708975928677427, "learning_rate": 6.539509050209961e-05, "loss": 0.4772, "step": 4614 }, { "epoch": 1.2916316820598936, "grad_norm": 0.21885416416242665, "learning_rate": 6.538041350415368e-05, "loss": 0.492, "step": 4615 }, { "epoch": 1.291911558914078, "grad_norm": 0.23595705137744344, "learning_rate": 6.536573504229135e-05, "loss": 0.504, "step": 4616 }, { "epoch": 1.292191435768262, "grad_norm": 0.2222855519629023, "learning_rate": 6.53510551179098e-05, "loss": 0.501, "step": 4617 }, { "epoch": 1.292471312622446, "grad_norm": 0.2202684290052884, "learning_rate": 6.53363737324062e-05, "loss": 0.5159, "step": 4618 }, { "epoch": 1.2927511894766304, "grad_norm": 0.22828953849570324, "learning_rate": 6.532169088717797e-05, "loss": 0.4997, "step": 4619 }, { "epoch": 1.2930310663308144, "grad_norm": 0.22001787503268458, "learning_rate": 6.530700658362263e-05, "loss": 0.5038, "step": 4620 }, { "epoch": 1.2933109431849985, "grad_norm": 0.22919760384205226, "learning_rate": 6.529232082313783e-05, "loss": 0.4968, "step": 4621 }, { "epoch": 1.2935908200391828, "grad_norm": 0.21539147881567156, "learning_rate": 6.527763360712138e-05, "loss": 0.4912, "step": 4622 }, { "epoch": 1.2938706968933669, "grad_norm": 0.22598274573206428, "learning_rate": 6.52629449369712e-05, "loss": 0.5142, "step": 4623 }, { "epoch": 1.2941505737475512, "grad_norm": 0.23171482903840254, "learning_rate": 6.52482548140854e-05, "loss": 0.5022, "step": 4624 }, { "epoch": 1.2944304506017352, "grad_norm": 0.22221397820711014, "learning_rate": 6.523356323986216e-05, "loss": 0.5027, "step": 4625 }, { "epoch": 1.2947103274559195, "grad_norm": 0.23262272297796627, "learning_rate": 6.521887021569984e-05, "loss": 0.5042, "step": 4626 }, { "epoch": 1.2949902043101036, "grad_norm": 0.2229541251763377, "learning_rate": 6.520417574299695e-05, "loss": 0.4987, "step": 4627 }, { "epoch": 1.2952700811642877, "grad_norm": 0.21670560844131473, "learning_rate": 6.518947982315209e-05, "loss": 0.4978, "step": 4628 }, { "epoch": 1.295549958018472, "grad_norm": 0.23215184350732523, "learning_rate": 6.517478245756403e-05, "loss": 0.5115, "step": 4629 }, { "epoch": 1.295829834872656, "grad_norm": 0.21806720642178692, "learning_rate": 6.51600836476317e-05, "loss": 0.5142, "step": 4630 }, { "epoch": 1.29610971172684, "grad_norm": 0.22185631840174178, "learning_rate": 6.514538339475411e-05, "loss": 0.5102, "step": 4631 }, { "epoch": 1.2963895885810244, "grad_norm": 0.21006387393433928, "learning_rate": 6.513068170033047e-05, "loss": 0.5116, "step": 4632 }, { "epoch": 1.2966694654352084, "grad_norm": 0.22376905117718077, "learning_rate": 6.511597856576006e-05, "loss": 0.5129, "step": 4633 }, { "epoch": 1.2969493422893927, "grad_norm": 0.23116756610449993, "learning_rate": 6.510127399244234e-05, "loss": 0.5064, "step": 4634 }, { "epoch": 1.2972292191435768, "grad_norm": 0.21770023154106802, "learning_rate": 6.508656798177692e-05, "loss": 0.4937, "step": 4635 }, { "epoch": 1.297509095997761, "grad_norm": 0.21374486310641394, "learning_rate": 6.507186053516351e-05, "loss": 0.4903, "step": 4636 }, { "epoch": 1.2977889728519452, "grad_norm": 0.217338272468009, "learning_rate": 6.5057151654002e-05, "loss": 0.5117, "step": 4637 }, { "epoch": 1.2980688497061292, "grad_norm": 0.21938217567391707, "learning_rate": 6.504244133969235e-05, "loss": 0.4978, "step": 4638 }, { "epoch": 1.2983487265603135, "grad_norm": 0.23609496333007188, "learning_rate": 6.502772959363472e-05, "loss": 0.4873, "step": 4639 }, { "epoch": 1.2986286034144976, "grad_norm": 0.2303756434279393, "learning_rate": 6.501301641722937e-05, "loss": 0.5106, "step": 4640 }, { "epoch": 1.2989084802686817, "grad_norm": 0.2312830221919868, "learning_rate": 6.499830181187672e-05, "loss": 0.4806, "step": 4641 }, { "epoch": 1.299188357122866, "grad_norm": 0.2324375801687925, "learning_rate": 6.498358577897731e-05, "loss": 0.4837, "step": 4642 }, { "epoch": 1.29946823397705, "grad_norm": 0.23304549317929413, "learning_rate": 6.49688683199318e-05, "loss": 0.5126, "step": 4643 }, { "epoch": 1.2997481108312343, "grad_norm": 0.2259910405244759, "learning_rate": 6.495414943614105e-05, "loss": 0.487, "step": 4644 }, { "epoch": 1.3000279876854184, "grad_norm": 0.23609529250810166, "learning_rate": 6.493942912900597e-05, "loss": 0.5076, "step": 4645 }, { "epoch": 1.3003078645396027, "grad_norm": 0.23387623580651815, "learning_rate": 6.492470739992767e-05, "loss": 0.4848, "step": 4646 }, { "epoch": 1.3005877413937867, "grad_norm": 0.22397861859626766, "learning_rate": 6.490998425030735e-05, "loss": 0.4939, "step": 4647 }, { "epoch": 1.3008676182479708, "grad_norm": 0.22400745922601117, "learning_rate": 6.489525968154642e-05, "loss": 0.5165, "step": 4648 }, { "epoch": 1.301147495102155, "grad_norm": 0.23050838486974656, "learning_rate": 6.48805336950463e-05, "loss": 0.5036, "step": 4649 }, { "epoch": 1.3014273719563392, "grad_norm": 0.22045488080662087, "learning_rate": 6.486580629220868e-05, "loss": 0.4918, "step": 4650 }, { "epoch": 1.3017072488105232, "grad_norm": 0.2269785178860283, "learning_rate": 6.485107747443528e-05, "loss": 0.5034, "step": 4651 }, { "epoch": 1.3019871256647075, "grad_norm": 0.2278074174643306, "learning_rate": 6.483634724312802e-05, "loss": 0.4936, "step": 4652 }, { "epoch": 1.3022670025188918, "grad_norm": 0.22577671659450127, "learning_rate": 6.482161559968894e-05, "loss": 0.4945, "step": 4653 }, { "epoch": 1.302546879373076, "grad_norm": 0.22740257340647596, "learning_rate": 6.480688254552016e-05, "loss": 0.5235, "step": 4654 }, { "epoch": 1.30282675622726, "grad_norm": 0.23350044835590317, "learning_rate": 6.479214808202403e-05, "loss": 0.4841, "step": 4655 }, { "epoch": 1.3031066330814443, "grad_norm": 0.22544348184644067, "learning_rate": 6.477741221060295e-05, "loss": 0.481, "step": 4656 }, { "epoch": 1.3033865099356283, "grad_norm": 0.2269730372463868, "learning_rate": 6.476267493265953e-05, "loss": 0.4958, "step": 4657 }, { "epoch": 1.3036663867898124, "grad_norm": 0.23736714312852122, "learning_rate": 6.474793624959643e-05, "loss": 0.5077, "step": 4658 }, { "epoch": 1.3039462636439967, "grad_norm": 0.22662145697682098, "learning_rate": 6.473319616281652e-05, "loss": 0.5225, "step": 4659 }, { "epoch": 1.3042261404981808, "grad_norm": 0.21929342074079694, "learning_rate": 6.471845467372273e-05, "loss": 0.4701, "step": 4660 }, { "epoch": 1.3045060173523648, "grad_norm": 0.2265212872451589, "learning_rate": 6.47037117837182e-05, "loss": 0.4989, "step": 4661 }, { "epoch": 1.3047858942065491, "grad_norm": 0.21358014649033347, "learning_rate": 6.468896749420614e-05, "loss": 0.5159, "step": 4662 }, { "epoch": 1.3050657710607334, "grad_norm": 0.22053449437667336, "learning_rate": 6.467422180658994e-05, "loss": 0.5198, "step": 4663 }, { "epoch": 1.3053456479149175, "grad_norm": 0.21856246184017916, "learning_rate": 6.46594747222731e-05, "loss": 0.4871, "step": 4664 }, { "epoch": 1.3056255247691015, "grad_norm": 0.22481669624283085, "learning_rate": 6.464472624265923e-05, "loss": 0.4761, "step": 4665 }, { "epoch": 1.3059054016232858, "grad_norm": 0.2375178895669889, "learning_rate": 6.462997636915215e-05, "loss": 0.4875, "step": 4666 }, { "epoch": 1.30618527847747, "grad_norm": 0.21897222128053973, "learning_rate": 6.461522510315571e-05, "loss": 0.5067, "step": 4667 }, { "epoch": 1.306465155331654, "grad_norm": 0.22033388668398798, "learning_rate": 6.460047244607397e-05, "loss": 0.4813, "step": 4668 }, { "epoch": 1.3067450321858383, "grad_norm": 0.2199675990516879, "learning_rate": 6.458571839931109e-05, "loss": 0.5002, "step": 4669 }, { "epoch": 1.3070249090400223, "grad_norm": 0.22006752103480154, "learning_rate": 6.457096296427137e-05, "loss": 0.5089, "step": 4670 }, { "epoch": 1.3073047858942066, "grad_norm": 0.2201068651261749, "learning_rate": 6.455620614235924e-05, "loss": 0.4823, "step": 4671 }, { "epoch": 1.3075846627483907, "grad_norm": 0.22125638999287564, "learning_rate": 6.454144793497926e-05, "loss": 0.4937, "step": 4672 }, { "epoch": 1.307864539602575, "grad_norm": 0.23477307884019413, "learning_rate": 6.452668834353611e-05, "loss": 0.4984, "step": 4673 }, { "epoch": 1.308144416456759, "grad_norm": 0.21975294977691442, "learning_rate": 6.451192736943466e-05, "loss": 0.4793, "step": 4674 }, { "epoch": 1.3084242933109431, "grad_norm": 0.23699559958971625, "learning_rate": 6.449716501407984e-05, "loss": 0.494, "step": 4675 }, { "epoch": 1.3087041701651274, "grad_norm": 0.2258106266233653, "learning_rate": 6.448240127887671e-05, "loss": 0.4966, "step": 4676 }, { "epoch": 1.3089840470193115, "grad_norm": 0.22730883819000816, "learning_rate": 6.446763616523055e-05, "loss": 0.5106, "step": 4677 }, { "epoch": 1.3092639238734955, "grad_norm": 0.2163831582075994, "learning_rate": 6.445286967454666e-05, "loss": 0.4877, "step": 4678 }, { "epoch": 1.3095438007276798, "grad_norm": 0.2330624101798806, "learning_rate": 6.443810180823056e-05, "loss": 0.5108, "step": 4679 }, { "epoch": 1.309823677581864, "grad_norm": 0.21603565802881391, "learning_rate": 6.442333256768784e-05, "loss": 0.5047, "step": 4680 }, { "epoch": 1.3101035544360482, "grad_norm": 0.21938758607667536, "learning_rate": 6.440856195432426e-05, "loss": 0.5042, "step": 4681 }, { "epoch": 1.3103834312902323, "grad_norm": 0.23806358505288744, "learning_rate": 6.439378996954568e-05, "loss": 0.492, "step": 4682 }, { "epoch": 1.3106633081444166, "grad_norm": 0.22318586284509329, "learning_rate": 6.437901661475811e-05, "loss": 0.4839, "step": 4683 }, { "epoch": 1.3109431849986006, "grad_norm": 0.21871111013503194, "learning_rate": 6.436424189136772e-05, "loss": 0.4931, "step": 4684 }, { "epoch": 1.3112230618527847, "grad_norm": 0.21499676471656837, "learning_rate": 6.434946580078072e-05, "loss": 0.498, "step": 4685 }, { "epoch": 1.311502938706969, "grad_norm": 0.21720831314150404, "learning_rate": 6.433468834440356e-05, "loss": 0.4927, "step": 4686 }, { "epoch": 1.311782815561153, "grad_norm": 0.23401604712471621, "learning_rate": 6.431990952364271e-05, "loss": 0.513, "step": 4687 }, { "epoch": 1.3120626924153371, "grad_norm": 0.21885905440437273, "learning_rate": 6.430512933990486e-05, "loss": 0.492, "step": 4688 }, { "epoch": 1.3123425692695214, "grad_norm": 0.22498399331645105, "learning_rate": 6.429034779459679e-05, "loss": 0.4972, "step": 4689 }, { "epoch": 1.3126224461237055, "grad_norm": 0.21826256190213422, "learning_rate": 6.427556488912543e-05, "loss": 0.4971, "step": 4690 }, { "epoch": 1.3129023229778898, "grad_norm": 0.22789436404685254, "learning_rate": 6.42607806248978e-05, "loss": 0.5085, "step": 4691 }, { "epoch": 1.3131821998320738, "grad_norm": 0.2219310789830236, "learning_rate": 6.42459950033211e-05, "loss": 0.4894, "step": 4692 }, { "epoch": 1.3134620766862581, "grad_norm": 0.22507886237445038, "learning_rate": 6.423120802580263e-05, "loss": 0.4928, "step": 4693 }, { "epoch": 1.3137419535404422, "grad_norm": 0.24038631090046275, "learning_rate": 6.42164196937498e-05, "loss": 0.5156, "step": 4694 }, { "epoch": 1.3140218303946263, "grad_norm": 0.22047131400075695, "learning_rate": 6.42016300085702e-05, "loss": 0.4992, "step": 4695 }, { "epoch": 1.3143017072488106, "grad_norm": 0.22230506787803977, "learning_rate": 6.41868389716715e-05, "loss": 0.497, "step": 4696 }, { "epoch": 1.3145815841029946, "grad_norm": 0.22722650947558032, "learning_rate": 6.417204658446152e-05, "loss": 0.4982, "step": 4697 }, { "epoch": 1.3148614609571787, "grad_norm": 0.2254129475987711, "learning_rate": 6.415725284834822e-05, "loss": 0.4927, "step": 4698 }, { "epoch": 1.315141337811363, "grad_norm": 0.22211096551732776, "learning_rate": 6.414245776473964e-05, "loss": 0.4829, "step": 4699 }, { "epoch": 1.3154212146655473, "grad_norm": 0.22199031809195682, "learning_rate": 6.412766133504405e-05, "loss": 0.5126, "step": 4700 }, { "epoch": 1.3157010915197314, "grad_norm": 0.22396834599543242, "learning_rate": 6.411286356066975e-05, "loss": 0.4927, "step": 4701 }, { "epoch": 1.3159809683739154, "grad_norm": 0.22881830847296752, "learning_rate": 6.409806444302518e-05, "loss": 0.4868, "step": 4702 }, { "epoch": 1.3162608452280997, "grad_norm": 0.23468255489824635, "learning_rate": 6.408326398351895e-05, "loss": 0.5051, "step": 4703 }, { "epoch": 1.3165407220822838, "grad_norm": 0.23152761908826994, "learning_rate": 6.406846218355977e-05, "loss": 0.511, "step": 4704 }, { "epoch": 1.3168205989364679, "grad_norm": 0.22458082062715418, "learning_rate": 6.40536590445565e-05, "loss": 0.4861, "step": 4705 }, { "epoch": 1.3171004757906521, "grad_norm": 0.21977348364975618, "learning_rate": 6.403885456791807e-05, "loss": 0.4952, "step": 4706 }, { "epoch": 1.3173803526448362, "grad_norm": 0.2143295074476737, "learning_rate": 6.402404875505362e-05, "loss": 0.4889, "step": 4707 }, { "epoch": 1.3176602294990205, "grad_norm": 0.22485304045464563, "learning_rate": 6.400924160737234e-05, "loss": 0.513, "step": 4708 }, { "epoch": 1.3179401063532046, "grad_norm": 0.21490462207915342, "learning_rate": 6.399443312628363e-05, "loss": 0.4884, "step": 4709 }, { "epoch": 1.3182199832073889, "grad_norm": 0.22718250602188456, "learning_rate": 6.397962331319692e-05, "loss": 0.4947, "step": 4710 }, { "epoch": 1.318499860061573, "grad_norm": 0.22543975476627806, "learning_rate": 6.396481216952186e-05, "loss": 0.4947, "step": 4711 }, { "epoch": 1.318779736915757, "grad_norm": 0.2195984700756812, "learning_rate": 6.394999969666815e-05, "loss": 0.4878, "step": 4712 }, { "epoch": 1.3190596137699413, "grad_norm": 0.23930971128941994, "learning_rate": 6.393518589604567e-05, "loss": 0.5046, "step": 4713 }, { "epoch": 1.3193394906241254, "grad_norm": 0.2330286582031531, "learning_rate": 6.392037076906439e-05, "loss": 0.4952, "step": 4714 }, { "epoch": 1.3196193674783094, "grad_norm": 0.212829444351127, "learning_rate": 6.390555431713442e-05, "loss": 0.4782, "step": 4715 }, { "epoch": 1.3198992443324937, "grad_norm": 0.22048334769377406, "learning_rate": 6.389073654166602e-05, "loss": 0.4889, "step": 4716 }, { "epoch": 1.3201791211866778, "grad_norm": 0.2328567892686063, "learning_rate": 6.387591744406951e-05, "loss": 0.5119, "step": 4717 }, { "epoch": 1.320458998040862, "grad_norm": 0.22954351194606487, "learning_rate": 6.386109702575545e-05, "loss": 0.5118, "step": 4718 }, { "epoch": 1.3207388748950462, "grad_norm": 0.22259220419694478, "learning_rate": 6.38462752881344e-05, "loss": 0.4906, "step": 4719 }, { "epoch": 1.3210187517492304, "grad_norm": 0.2267080847553509, "learning_rate": 6.383145223261712e-05, "loss": 0.5052, "step": 4720 }, { "epoch": 1.3212986286034145, "grad_norm": 0.21041299621266424, "learning_rate": 6.381662786061448e-05, "loss": 0.493, "step": 4721 }, { "epoch": 1.3215785054575986, "grad_norm": 0.22450646041364145, "learning_rate": 6.380180217353746e-05, "loss": 0.5041, "step": 4722 }, { "epoch": 1.3218583823117829, "grad_norm": 0.22175856286452167, "learning_rate": 6.378697517279718e-05, "loss": 0.4701, "step": 4723 }, { "epoch": 1.322138259165967, "grad_norm": 0.22275708747147227, "learning_rate": 6.377214685980489e-05, "loss": 0.4969, "step": 4724 }, { "epoch": 1.322418136020151, "grad_norm": 0.22013441784850707, "learning_rate": 6.375731723597194e-05, "loss": 0.5121, "step": 4725 }, { "epoch": 1.3226980128743353, "grad_norm": 0.21238260380550128, "learning_rate": 6.374248630270985e-05, "loss": 0.4964, "step": 4726 }, { "epoch": 1.3229778897285194, "grad_norm": 0.23561985632728638, "learning_rate": 6.372765406143023e-05, "loss": 0.5025, "step": 4727 }, { "epoch": 1.3232577665827037, "grad_norm": 0.2242488728215307, "learning_rate": 6.37128205135448e-05, "loss": 0.5204, "step": 4728 }, { "epoch": 1.3235376434368877, "grad_norm": 0.21401293399237717, "learning_rate": 6.369798566046545e-05, "loss": 0.504, "step": 4729 }, { "epoch": 1.323817520291072, "grad_norm": 0.22034017856381255, "learning_rate": 6.368314950360415e-05, "loss": 0.4982, "step": 4730 }, { "epoch": 1.324097397145256, "grad_norm": 0.21056206469299288, "learning_rate": 6.366831204437302e-05, "loss": 0.4761, "step": 4731 }, { "epoch": 1.3243772739994402, "grad_norm": 0.2329875674472275, "learning_rate": 6.36534732841843e-05, "loss": 0.5098, "step": 4732 }, { "epoch": 1.3246571508536245, "grad_norm": 0.23177788945168468, "learning_rate": 6.363863322445036e-05, "loss": 0.4898, "step": 4733 }, { "epoch": 1.3249370277078085, "grad_norm": 0.22351103686194437, "learning_rate": 6.362379186658365e-05, "loss": 0.4962, "step": 4734 }, { "epoch": 1.3252169045619926, "grad_norm": 0.2280168836124819, "learning_rate": 6.360894921199682e-05, "loss": 0.5199, "step": 4735 }, { "epoch": 1.3254967814161769, "grad_norm": 0.23481066826371386, "learning_rate": 6.359410526210258e-05, "loss": 0.486, "step": 4736 }, { "epoch": 1.3257766582703612, "grad_norm": 0.22151126008727215, "learning_rate": 6.357926001831381e-05, "loss": 0.496, "step": 4737 }, { "epoch": 1.3260565351245452, "grad_norm": 0.22161530996309398, "learning_rate": 6.356441348204348e-05, "loss": 0.4819, "step": 4738 }, { "epoch": 1.3263364119787293, "grad_norm": 0.22943625558146605, "learning_rate": 6.354956565470466e-05, "loss": 0.4931, "step": 4739 }, { "epoch": 1.3266162888329136, "grad_norm": 0.22540649565140475, "learning_rate": 6.353471653771059e-05, "loss": 0.4903, "step": 4740 }, { "epoch": 1.3268961656870977, "grad_norm": 0.22683525578616123, "learning_rate": 6.351986613247465e-05, "loss": 0.4995, "step": 4741 }, { "epoch": 1.3271760425412817, "grad_norm": 0.2182989044801757, "learning_rate": 6.350501444041026e-05, "loss": 0.5032, "step": 4742 }, { "epoch": 1.327455919395466, "grad_norm": 0.22302126704546218, "learning_rate": 6.349016146293103e-05, "loss": 0.4981, "step": 4743 }, { "epoch": 1.32773579624965, "grad_norm": 0.23159155905157916, "learning_rate": 6.34753072014507e-05, "loss": 0.4965, "step": 4744 }, { "epoch": 1.3280156731038344, "grad_norm": 0.22139141956605743, "learning_rate": 6.346045165738309e-05, "loss": 0.4998, "step": 4745 }, { "epoch": 1.3282955499580185, "grad_norm": 0.23315805768894807, "learning_rate": 6.344559483214215e-05, "loss": 0.5043, "step": 4746 }, { "epoch": 1.3285754268122028, "grad_norm": 0.22841483405260213, "learning_rate": 6.343073672714199e-05, "loss": 0.4905, "step": 4747 }, { "epoch": 1.3288553036663868, "grad_norm": 0.22947480262023023, "learning_rate": 6.341587734379676e-05, "loss": 0.4946, "step": 4748 }, { "epoch": 1.329135180520571, "grad_norm": 0.22656550982068765, "learning_rate": 6.340101668352083e-05, "loss": 0.4789, "step": 4749 }, { "epoch": 1.3294150573747552, "grad_norm": 0.23220779940363576, "learning_rate": 6.338615474772862e-05, "loss": 0.4944, "step": 4750 }, { "epoch": 1.3296949342289393, "grad_norm": 0.2273617717651779, "learning_rate": 6.337129153783471e-05, "loss": 0.5034, "step": 4751 }, { "epoch": 1.3299748110831233, "grad_norm": 0.21960534139981308, "learning_rate": 6.335642705525378e-05, "loss": 0.4999, "step": 4752 }, { "epoch": 1.3302546879373076, "grad_norm": 0.2360726375869372, "learning_rate": 6.334156130140068e-05, "loss": 0.5004, "step": 4753 }, { "epoch": 1.3305345647914917, "grad_norm": 0.21337455412485573, "learning_rate": 6.332669427769028e-05, "loss": 0.501, "step": 4754 }, { "epoch": 1.330814441645676, "grad_norm": 0.2257426587297062, "learning_rate": 6.331182598553766e-05, "loss": 0.5086, "step": 4755 }, { "epoch": 1.33109431849986, "grad_norm": 0.23008188954974348, "learning_rate": 6.3296956426358e-05, "loss": 0.4809, "step": 4756 }, { "epoch": 1.3313741953540443, "grad_norm": 0.23182473779450402, "learning_rate": 6.32820856015666e-05, "loss": 0.4832, "step": 4757 }, { "epoch": 1.3316540722082284, "grad_norm": 0.22953960976525958, "learning_rate": 6.326721351257884e-05, "loss": 0.5066, "step": 4758 }, { "epoch": 1.3319339490624125, "grad_norm": 0.21807919472092635, "learning_rate": 6.325234016081027e-05, "loss": 0.4981, "step": 4759 }, { "epoch": 1.3322138259165968, "grad_norm": 0.2295915976641455, "learning_rate": 6.323746554767653e-05, "loss": 0.5026, "step": 4760 }, { "epoch": 1.3324937027707808, "grad_norm": 0.21817698600915825, "learning_rate": 6.322258967459344e-05, "loss": 0.4999, "step": 4761 }, { "epoch": 1.332773579624965, "grad_norm": 0.217287414692134, "learning_rate": 6.320771254297684e-05, "loss": 0.4983, "step": 4762 }, { "epoch": 1.3330534564791492, "grad_norm": 0.21927009183186255, "learning_rate": 6.319283415424279e-05, "loss": 0.4766, "step": 4763 }, { "epoch": 1.3333333333333333, "grad_norm": 0.2213969872620352, "learning_rate": 6.31779545098074e-05, "loss": 0.4948, "step": 4764 }, { "epoch": 1.3336132101875176, "grad_norm": 0.21969073392087798, "learning_rate": 6.316307361108692e-05, "loss": 0.504, "step": 4765 }, { "epoch": 1.3338930870417016, "grad_norm": 0.22088737612147463, "learning_rate": 6.314819145949772e-05, "loss": 0.4627, "step": 4766 }, { "epoch": 1.334172963895886, "grad_norm": 0.2153264695661567, "learning_rate": 6.31333080564563e-05, "loss": 0.5108, "step": 4767 }, { "epoch": 1.33445284075007, "grad_norm": 0.21888635991664576, "learning_rate": 6.311842340337925e-05, "loss": 0.4821, "step": 4768 }, { "epoch": 1.334732717604254, "grad_norm": 0.22741872715040778, "learning_rate": 6.310353750168333e-05, "loss": 0.4938, "step": 4769 }, { "epoch": 1.3350125944584383, "grad_norm": 0.23285899827112347, "learning_rate": 6.30886503527854e-05, "loss": 0.4915, "step": 4770 }, { "epoch": 1.3352924713126224, "grad_norm": 0.21963809122637543, "learning_rate": 6.307376195810238e-05, "loss": 0.5014, "step": 4771 }, { "epoch": 1.3355723481668065, "grad_norm": 0.2275057665413845, "learning_rate": 6.305887231905138e-05, "loss": 0.5351, "step": 4772 }, { "epoch": 1.3358522250209908, "grad_norm": 0.21937968106171388, "learning_rate": 6.30439814370496e-05, "loss": 0.4837, "step": 4773 }, { "epoch": 1.336132101875175, "grad_norm": 0.23340339542763372, "learning_rate": 6.302908931351438e-05, "loss": 0.5266, "step": 4774 }, { "epoch": 1.3364119787293591, "grad_norm": 0.21622934229164417, "learning_rate": 6.301419594986314e-05, "loss": 0.4817, "step": 4775 }, { "epoch": 1.3366918555835432, "grad_norm": 0.2306834778997054, "learning_rate": 6.299930134751344e-05, "loss": 0.504, "step": 4776 }, { "epoch": 1.3369717324377275, "grad_norm": 0.2384546455505702, "learning_rate": 6.298440550788296e-05, "loss": 0.5239, "step": 4777 }, { "epoch": 1.3372516092919116, "grad_norm": 0.22504121494751755, "learning_rate": 6.29695084323895e-05, "loss": 0.4876, "step": 4778 }, { "epoch": 1.3375314861460956, "grad_norm": 0.2216634018412408, "learning_rate": 6.295461012245097e-05, "loss": 0.5133, "step": 4779 }, { "epoch": 1.33781136300028, "grad_norm": 0.2207626799054461, "learning_rate": 6.29397105794854e-05, "loss": 0.4951, "step": 4780 }, { "epoch": 1.338091239854464, "grad_norm": 0.2322294653571303, "learning_rate": 6.292480980491094e-05, "loss": 0.5001, "step": 4781 }, { "epoch": 1.338371116708648, "grad_norm": 0.22187636224284968, "learning_rate": 6.290990780014582e-05, "loss": 0.4907, "step": 4782 }, { "epoch": 1.3386509935628323, "grad_norm": 0.22467144852771262, "learning_rate": 6.289500456660848e-05, "loss": 0.495, "step": 4783 }, { "epoch": 1.3389308704170166, "grad_norm": 0.23455136872692678, "learning_rate": 6.288010010571738e-05, "loss": 0.5156, "step": 4784 }, { "epoch": 1.3392107472712007, "grad_norm": 0.2189676584244879, "learning_rate": 6.286519441889114e-05, "loss": 0.5094, "step": 4785 }, { "epoch": 1.3394906241253848, "grad_norm": 0.22244557228116685, "learning_rate": 6.285028750754849e-05, "loss": 0.4909, "step": 4786 }, { "epoch": 1.339770500979569, "grad_norm": 0.2153146699586078, "learning_rate": 6.283537937310828e-05, "loss": 0.4981, "step": 4787 }, { "epoch": 1.3400503778337531, "grad_norm": 0.21528893540817695, "learning_rate": 6.282047001698948e-05, "loss": 0.496, "step": 4788 }, { "epoch": 1.3403302546879372, "grad_norm": 0.22388445323361997, "learning_rate": 6.280555944061118e-05, "loss": 0.4991, "step": 4789 }, { "epoch": 1.3406101315421215, "grad_norm": 0.22370320505369892, "learning_rate": 6.279064764539256e-05, "loss": 0.5196, "step": 4790 }, { "epoch": 1.3408900083963056, "grad_norm": 0.2259520871203192, "learning_rate": 6.277573463275293e-05, "loss": 0.5197, "step": 4791 }, { "epoch": 1.3411698852504899, "grad_norm": 0.22594160224656487, "learning_rate": 6.276082040411174e-05, "loss": 0.4793, "step": 4792 }, { "epoch": 1.341449762104674, "grad_norm": 0.239789985501656, "learning_rate": 6.274590496088852e-05, "loss": 0.5342, "step": 4793 }, { "epoch": 1.3417296389588582, "grad_norm": 0.23199647815813662, "learning_rate": 6.27309883045029e-05, "loss": 0.5066, "step": 4794 }, { "epoch": 1.3420095158130423, "grad_norm": 0.21526405986152142, "learning_rate": 6.271607043637472e-05, "loss": 0.4823, "step": 4795 }, { "epoch": 1.3422893926672264, "grad_norm": 0.21958703219627543, "learning_rate": 6.270115135792383e-05, "loss": 0.5053, "step": 4796 }, { "epoch": 1.3425692695214106, "grad_norm": 0.22609464409833172, "learning_rate": 6.268623107057023e-05, "loss": 0.5056, "step": 4797 }, { "epoch": 1.3428491463755947, "grad_norm": 0.22194077400812287, "learning_rate": 6.267130957573408e-05, "loss": 0.5087, "step": 4798 }, { "epoch": 1.3431290232297788, "grad_norm": 0.23204024727785122, "learning_rate": 6.265638687483558e-05, "loss": 0.5043, "step": 4799 }, { "epoch": 1.343408900083963, "grad_norm": 0.22938061058411732, "learning_rate": 6.264146296929509e-05, "loss": 0.4891, "step": 4800 }, { "epoch": 1.3436887769381471, "grad_norm": 0.22274245548079766, "learning_rate": 6.26265378605331e-05, "loss": 0.4812, "step": 4801 }, { "epoch": 1.3439686537923314, "grad_norm": 0.2171688869158793, "learning_rate": 6.261161154997016e-05, "loss": 0.4798, "step": 4802 }, { "epoch": 1.3442485306465155, "grad_norm": 0.21930534844838145, "learning_rate": 6.259668403902695e-05, "loss": 0.4972, "step": 4803 }, { "epoch": 1.3445284075006998, "grad_norm": 0.23115945927184667, "learning_rate": 6.258175532912431e-05, "loss": 0.4867, "step": 4804 }, { "epoch": 1.3448082843548839, "grad_norm": 0.2233713975022183, "learning_rate": 6.256682542168318e-05, "loss": 0.4997, "step": 4805 }, { "epoch": 1.345088161209068, "grad_norm": 0.236053453803603, "learning_rate": 6.255189431812455e-05, "loss": 0.4804, "step": 4806 }, { "epoch": 1.3453680380632522, "grad_norm": 0.22318309010050524, "learning_rate": 6.253696201986961e-05, "loss": 0.4918, "step": 4807 }, { "epoch": 1.3456479149174363, "grad_norm": 0.22434174613416846, "learning_rate": 6.25220285283396e-05, "loss": 0.5007, "step": 4808 }, { "epoch": 1.3459277917716204, "grad_norm": 0.230362751286978, "learning_rate": 6.25070938449559e-05, "loss": 0.5054, "step": 4809 }, { "epoch": 1.3462076686258047, "grad_norm": 0.2198229960126399, "learning_rate": 6.249215797114e-05, "loss": 0.4772, "step": 4810 }, { "epoch": 1.3464875454799887, "grad_norm": 0.23163298555086242, "learning_rate": 6.247722090831354e-05, "loss": 0.5157, "step": 4811 }, { "epoch": 1.346767422334173, "grad_norm": 0.22279990270725775, "learning_rate": 6.246228265789819e-05, "loss": 0.4687, "step": 4812 }, { "epoch": 1.347047299188357, "grad_norm": 0.2361531773223951, "learning_rate": 6.24473432213158e-05, "loss": 0.5136, "step": 4813 }, { "epoch": 1.3473271760425414, "grad_norm": 0.2287702209214274, "learning_rate": 6.243240259998831e-05, "loss": 0.5033, "step": 4814 }, { "epoch": 1.3476070528967254, "grad_norm": 0.2356348596820923, "learning_rate": 6.24174607953378e-05, "loss": 0.5074, "step": 4815 }, { "epoch": 1.3478869297509095, "grad_norm": 0.23262247919309248, "learning_rate": 6.240251780878641e-05, "loss": 0.5238, "step": 4816 }, { "epoch": 1.3481668066050938, "grad_norm": 0.2173593030771436, "learning_rate": 6.238757364175644e-05, "loss": 0.4791, "step": 4817 }, { "epoch": 1.3484466834592779, "grad_norm": 0.23331170999229453, "learning_rate": 6.237262829567027e-05, "loss": 0.4954, "step": 4818 }, { "epoch": 1.348726560313462, "grad_norm": 0.23005691137462347, "learning_rate": 6.235768177195042e-05, "loss": 0.4864, "step": 4819 }, { "epoch": 1.3490064371676462, "grad_norm": 0.2191333581429469, "learning_rate": 6.23427340720195e-05, "loss": 0.4958, "step": 4820 }, { "epoch": 1.3492863140218305, "grad_norm": 0.22999061796024522, "learning_rate": 6.232778519730023e-05, "loss": 0.5156, "step": 4821 }, { "epoch": 1.3495661908760146, "grad_norm": 0.2301668625919981, "learning_rate": 6.231283514921547e-05, "loss": 0.5048, "step": 4822 }, { "epoch": 1.3498460677301987, "grad_norm": 0.22473564312347116, "learning_rate": 6.229788392918819e-05, "loss": 0.4871, "step": 4823 }, { "epoch": 1.350125944584383, "grad_norm": 0.2259976525067703, "learning_rate": 6.228293153864142e-05, "loss": 0.502, "step": 4824 }, { "epoch": 1.350405821438567, "grad_norm": 0.23636246438309857, "learning_rate": 6.226797797899835e-05, "loss": 0.5243, "step": 4825 }, { "epoch": 1.350685698292751, "grad_norm": 0.21997183945333693, "learning_rate": 6.225302325168227e-05, "loss": 0.5145, "step": 4826 }, { "epoch": 1.3509655751469354, "grad_norm": 0.22055693597634543, "learning_rate": 6.223806735811657e-05, "loss": 0.4995, "step": 4827 }, { "epoch": 1.3512454520011195, "grad_norm": 0.22283424242343022, "learning_rate": 6.222311029972477e-05, "loss": 0.4863, "step": 4828 }, { "epoch": 1.3515253288553037, "grad_norm": 0.2227311932162068, "learning_rate": 6.22081520779305e-05, "loss": 0.4889, "step": 4829 }, { "epoch": 1.3518052057094878, "grad_norm": 0.21546280503278523, "learning_rate": 6.219319269415747e-05, "loss": 0.5107, "step": 4830 }, { "epoch": 1.352085082563672, "grad_norm": 0.23060313961191806, "learning_rate": 6.217823214982955e-05, "loss": 0.5085, "step": 4831 }, { "epoch": 1.3523649594178562, "grad_norm": 0.23194577730240898, "learning_rate": 6.216327044637067e-05, "loss": 0.488, "step": 4832 }, { "epoch": 1.3526448362720402, "grad_norm": 0.23049515631372122, "learning_rate": 6.214830758520491e-05, "loss": 0.4915, "step": 4833 }, { "epoch": 1.3529247131262245, "grad_norm": 0.22403765225918498, "learning_rate": 6.213334356775642e-05, "loss": 0.4963, "step": 4834 }, { "epoch": 1.3532045899804086, "grad_norm": 0.22720845592377828, "learning_rate": 6.21183783954495e-05, "loss": 0.5111, "step": 4835 }, { "epoch": 1.3534844668345927, "grad_norm": 0.22105383560152483, "learning_rate": 6.210341206970855e-05, "loss": 0.4922, "step": 4836 }, { "epoch": 1.353764343688777, "grad_norm": 0.22698275434901025, "learning_rate": 6.208844459195806e-05, "loss": 0.4778, "step": 4837 }, { "epoch": 1.354044220542961, "grad_norm": 0.2324373874536657, "learning_rate": 6.207347596362265e-05, "loss": 0.508, "step": 4838 }, { "epoch": 1.3543240973971453, "grad_norm": 0.23648326306698714, "learning_rate": 6.205850618612703e-05, "loss": 0.5025, "step": 4839 }, { "epoch": 1.3546039742513294, "grad_norm": 0.22326814690064614, "learning_rate": 6.204353526089606e-05, "loss": 0.4822, "step": 4840 }, { "epoch": 1.3548838511055137, "grad_norm": 0.2183293592342329, "learning_rate": 6.202856318935463e-05, "loss": 0.4829, "step": 4841 }, { "epoch": 1.3551637279596978, "grad_norm": 0.22675519081505494, "learning_rate": 6.201358997292786e-05, "loss": 0.5089, "step": 4842 }, { "epoch": 1.3554436048138818, "grad_norm": 0.2284883687367303, "learning_rate": 6.199861561304086e-05, "loss": 0.4875, "step": 4843 }, { "epoch": 1.355723481668066, "grad_norm": 0.2319921494654131, "learning_rate": 6.19836401111189e-05, "loss": 0.515, "step": 4844 }, { "epoch": 1.3560033585222502, "grad_norm": 0.2315120662356948, "learning_rate": 6.196866346858738e-05, "loss": 0.4998, "step": 4845 }, { "epoch": 1.3562832353764342, "grad_norm": 0.22493496369229218, "learning_rate": 6.195368568687176e-05, "loss": 0.5216, "step": 4846 }, { "epoch": 1.3565631122306185, "grad_norm": 0.2268476164734343, "learning_rate": 6.193870676739765e-05, "loss": 0.4953, "step": 4847 }, { "epoch": 1.3568429890848026, "grad_norm": 0.2299974124752463, "learning_rate": 6.192372671159075e-05, "loss": 0.5088, "step": 4848 }, { "epoch": 1.357122865938987, "grad_norm": 0.23282050525171238, "learning_rate": 6.190874552087686e-05, "loss": 0.5126, "step": 4849 }, { "epoch": 1.357402742793171, "grad_norm": 0.23778468068019334, "learning_rate": 6.189376319668192e-05, "loss": 0.5093, "step": 4850 }, { "epoch": 1.3576826196473553, "grad_norm": 0.2324677545913148, "learning_rate": 6.187877974043192e-05, "loss": 0.5056, "step": 4851 }, { "epoch": 1.3579624965015393, "grad_norm": 0.23153802153864753, "learning_rate": 6.186379515355304e-05, "loss": 0.5152, "step": 4852 }, { "epoch": 1.3582423733557234, "grad_norm": 0.21973354509480217, "learning_rate": 6.184880943747148e-05, "loss": 0.502, "step": 4853 }, { "epoch": 1.3585222502099077, "grad_norm": 0.2369756551082989, "learning_rate": 6.183382259361362e-05, "loss": 0.4752, "step": 4854 }, { "epoch": 1.3588021270640918, "grad_norm": 0.23540156776186763, "learning_rate": 6.181883462340588e-05, "loss": 0.5003, "step": 4855 }, { "epoch": 1.3590820039182758, "grad_norm": 0.2202761181726255, "learning_rate": 6.180384552827487e-05, "loss": 0.511, "step": 4856 }, { "epoch": 1.3593618807724601, "grad_norm": 0.21997182463050327, "learning_rate": 6.178885530964722e-05, "loss": 0.5014, "step": 4857 }, { "epoch": 1.3596417576266444, "grad_norm": 0.2307998778689992, "learning_rate": 6.177386396894973e-05, "loss": 0.5087, "step": 4858 }, { "epoch": 1.3599216344808285, "grad_norm": 0.22173137184083913, "learning_rate": 6.175887150760927e-05, "loss": 0.4698, "step": 4859 }, { "epoch": 1.3602015113350125, "grad_norm": 0.23534290672162, "learning_rate": 6.174387792705286e-05, "loss": 0.4982, "step": 4860 }, { "epoch": 1.3604813881891968, "grad_norm": 0.2229138478331696, "learning_rate": 6.172888322870754e-05, "loss": 0.4962, "step": 4861 }, { "epoch": 1.360761265043381, "grad_norm": 0.2274025469077541, "learning_rate": 6.171388741400057e-05, "loss": 0.4896, "step": 4862 }, { "epoch": 1.361041141897565, "grad_norm": 0.22381917227607304, "learning_rate": 6.169889048435924e-05, "loss": 0.489, "step": 4863 }, { "epoch": 1.3613210187517493, "grad_norm": 0.22810534932635093, "learning_rate": 6.168389244121096e-05, "loss": 0.5079, "step": 4864 }, { "epoch": 1.3616008956059333, "grad_norm": 0.2128500277491175, "learning_rate": 6.166889328598326e-05, "loss": 0.5037, "step": 4865 }, { "epoch": 1.3618807724601176, "grad_norm": 0.22321074914783084, "learning_rate": 6.165389302010377e-05, "loss": 0.5147, "step": 4866 }, { "epoch": 1.3621606493143017, "grad_norm": 0.2401386710456637, "learning_rate": 6.163889164500022e-05, "loss": 0.5111, "step": 4867 }, { "epoch": 1.362440526168486, "grad_norm": 0.2391316111387079, "learning_rate": 6.162388916210044e-05, "loss": 0.5158, "step": 4868 }, { "epoch": 1.36272040302267, "grad_norm": 0.3199516716609756, "learning_rate": 6.160888557283239e-05, "loss": 0.5238, "step": 4869 }, { "epoch": 1.3630002798768541, "grad_norm": 0.21661828950826603, "learning_rate": 6.159388087862412e-05, "loss": 0.4873, "step": 4870 }, { "epoch": 1.3632801567310384, "grad_norm": 0.22277479866780228, "learning_rate": 6.157887508090375e-05, "loss": 0.4735, "step": 4871 }, { "epoch": 1.3635600335852225, "grad_norm": 0.22983980627900288, "learning_rate": 6.15638681810996e-05, "loss": 0.5243, "step": 4872 }, { "epoch": 1.3638399104394066, "grad_norm": 0.2265029377674558, "learning_rate": 6.154886018063999e-05, "loss": 0.5044, "step": 4873 }, { "epoch": 1.3641197872935908, "grad_norm": 0.22217929053543983, "learning_rate": 6.15338510809534e-05, "loss": 0.4971, "step": 4874 }, { "epoch": 1.364399664147775, "grad_norm": 0.22261889453846326, "learning_rate": 6.151884088346842e-05, "loss": 0.4827, "step": 4875 }, { "epoch": 1.3646795410019592, "grad_norm": 0.21438655916459587, "learning_rate": 6.15038295896137e-05, "loss": 0.4868, "step": 4876 }, { "epoch": 1.3649594178561433, "grad_norm": 0.22544405100383874, "learning_rate": 6.148881720081806e-05, "loss": 0.4943, "step": 4877 }, { "epoch": 1.3652392947103276, "grad_norm": 0.22266497608271735, "learning_rate": 6.147380371851036e-05, "loss": 0.4814, "step": 4878 }, { "epoch": 1.3655191715645116, "grad_norm": 0.223885701707682, "learning_rate": 6.14587891441196e-05, "loss": 0.5012, "step": 4879 }, { "epoch": 1.3657990484186957, "grad_norm": 0.23269224202408606, "learning_rate": 6.144377347907487e-05, "loss": 0.5239, "step": 4880 }, { "epoch": 1.36607892527288, "grad_norm": 0.22609485478475636, "learning_rate": 6.14287567248054e-05, "loss": 0.5014, "step": 4881 }, { "epoch": 1.366358802127064, "grad_norm": 0.23157140715010618, "learning_rate": 6.141373888274044e-05, "loss": 0.5326, "step": 4882 }, { "epoch": 1.3666386789812481, "grad_norm": 0.22651333469638466, "learning_rate": 6.139871995430944e-05, "loss": 0.5387, "step": 4883 }, { "epoch": 1.3669185558354324, "grad_norm": 0.22906579691133122, "learning_rate": 6.138369994094188e-05, "loss": 0.4838, "step": 4884 }, { "epoch": 1.3671984326896165, "grad_norm": 0.21872294356138144, "learning_rate": 6.13686788440674e-05, "loss": 0.501, "step": 4885 }, { "epoch": 1.3674783095438008, "grad_norm": 0.2438449946521116, "learning_rate": 6.13536566651157e-05, "loss": 0.4875, "step": 4886 }, { "epoch": 1.3677581863979849, "grad_norm": 0.2308407569999151, "learning_rate": 6.13386334055166e-05, "loss": 0.4991, "step": 4887 }, { "epoch": 1.3680380632521691, "grad_norm": 0.22926425337022524, "learning_rate": 6.132360906670002e-05, "loss": 0.4856, "step": 4888 }, { "epoch": 1.3683179401063532, "grad_norm": 0.2212095014222958, "learning_rate": 6.1308583650096e-05, "loss": 0.5108, "step": 4889 }, { "epoch": 1.3685978169605373, "grad_norm": 0.22955795622145608, "learning_rate": 6.129355715713468e-05, "loss": 0.476, "step": 4890 }, { "epoch": 1.3688776938147216, "grad_norm": 0.21778846604224314, "learning_rate": 6.127852958924624e-05, "loss": 0.4957, "step": 4891 }, { "epoch": 1.3691575706689056, "grad_norm": 0.224328153450806, "learning_rate": 6.126350094786105e-05, "loss": 0.5033, "step": 4892 }, { "epoch": 1.3694374475230897, "grad_norm": 0.22334866258573038, "learning_rate": 6.124847123440953e-05, "loss": 0.4965, "step": 4893 }, { "epoch": 1.369717324377274, "grad_norm": 0.23176984056081099, "learning_rate": 6.123344045032223e-05, "loss": 0.4969, "step": 4894 }, { "epoch": 1.3699972012314583, "grad_norm": 0.2186322897741303, "learning_rate": 6.121840859702977e-05, "loss": 0.4829, "step": 4895 }, { "epoch": 1.3702770780856424, "grad_norm": 0.23066094336528212, "learning_rate": 6.12033756759629e-05, "loss": 0.4914, "step": 4896 }, { "epoch": 1.3705569549398264, "grad_norm": 0.23044860403211298, "learning_rate": 6.118834168855248e-05, "loss": 0.482, "step": 4897 }, { "epoch": 1.3708368317940107, "grad_norm": 0.223957109973142, "learning_rate": 6.117330663622942e-05, "loss": 0.4826, "step": 4898 }, { "epoch": 1.3711167086481948, "grad_norm": 0.21772359136514263, "learning_rate": 6.115827052042479e-05, "loss": 0.489, "step": 4899 }, { "epoch": 1.3713965855023789, "grad_norm": 0.22640568563848076, "learning_rate": 6.114323334256972e-05, "loss": 0.4999, "step": 4900 }, { "epoch": 1.3716764623565632, "grad_norm": 0.21726592509754597, "learning_rate": 6.112819510409547e-05, "loss": 0.4839, "step": 4901 }, { "epoch": 1.3719563392107472, "grad_norm": 0.23134599121488977, "learning_rate": 6.111315580643337e-05, "loss": 0.5139, "step": 4902 }, { "epoch": 1.3722362160649315, "grad_norm": 0.224669563980622, "learning_rate": 6.109811545101488e-05, "loss": 0.5005, "step": 4903 }, { "epoch": 1.3725160929191156, "grad_norm": 0.2360258757364829, "learning_rate": 6.108307403927156e-05, "loss": 0.4819, "step": 4904 }, { "epoch": 1.3727959697732999, "grad_norm": 0.49855816430372935, "learning_rate": 6.106803157263504e-05, "loss": 0.5051, "step": 4905 }, { "epoch": 1.373075846627484, "grad_norm": 0.21973509941982283, "learning_rate": 6.105298805253708e-05, "loss": 0.5112, "step": 4906 }, { "epoch": 1.373355723481668, "grad_norm": 0.22033958319892305, "learning_rate": 6.103794348040954e-05, "loss": 0.4948, "step": 4907 }, { "epoch": 1.3736356003358523, "grad_norm": 0.2249123039487554, "learning_rate": 6.102289785768436e-05, "loss": 0.493, "step": 4908 }, { "epoch": 1.3739154771900364, "grad_norm": 0.217968458008853, "learning_rate": 6.1007851185793575e-05, "loss": 0.4968, "step": 4909 }, { "epoch": 1.3741953540442204, "grad_norm": 0.2332396971806336, "learning_rate": 6.099280346616938e-05, "loss": 0.5023, "step": 4910 }, { "epoch": 1.3744752308984047, "grad_norm": 0.22387058472821672, "learning_rate": 6.0977754700243986e-05, "loss": 0.5086, "step": 4911 }, { "epoch": 1.3747551077525888, "grad_norm": 0.2270411436043115, "learning_rate": 6.096270488944975e-05, "loss": 0.4935, "step": 4912 }, { "epoch": 1.375034984606773, "grad_norm": 0.22162581729918035, "learning_rate": 6.0947654035219135e-05, "loss": 0.4822, "step": 4913 }, { "epoch": 1.3753148614609572, "grad_norm": 0.2235969551155392, "learning_rate": 6.0932602138984675e-05, "loss": 0.4709, "step": 4914 }, { "epoch": 1.3755947383151415, "grad_norm": 0.24945799445412362, "learning_rate": 6.091754920217903e-05, "loss": 0.5103, "step": 4915 }, { "epoch": 1.3758746151693255, "grad_norm": 0.2531319547808986, "learning_rate": 6.090249522623493e-05, "loss": 0.5231, "step": 4916 }, { "epoch": 1.3761544920235096, "grad_norm": 0.21215161998682955, "learning_rate": 6.0887440212585244e-05, "loss": 0.4839, "step": 4917 }, { "epoch": 1.3764343688776939, "grad_norm": 0.22563609490813957, "learning_rate": 6.0872384162662886e-05, "loss": 0.5046, "step": 4918 }, { "epoch": 1.376714245731878, "grad_norm": 0.22422489178315794, "learning_rate": 6.085732707790095e-05, "loss": 0.4959, "step": 4919 }, { "epoch": 1.376994122586062, "grad_norm": 0.22083064508632638, "learning_rate": 6.0842268959732515e-05, "loss": 0.482, "step": 4920 }, { "epoch": 1.3772739994402463, "grad_norm": 0.22922582638465872, "learning_rate": 6.082720980959087e-05, "loss": 0.494, "step": 4921 }, { "epoch": 1.3775538762944304, "grad_norm": 0.2251895411465758, "learning_rate": 6.081214962890933e-05, "loss": 0.4815, "step": 4922 }, { "epoch": 1.3778337531486147, "grad_norm": 0.21360756897421826, "learning_rate": 6.079708841912133e-05, "loss": 0.508, "step": 4923 }, { "epoch": 1.3781136300027987, "grad_norm": 0.22467858319617595, "learning_rate": 6.0782026181660425e-05, "loss": 0.507, "step": 4924 }, { "epoch": 1.378393506856983, "grad_norm": 0.22496655839864346, "learning_rate": 6.0766962917960226e-05, "loss": 0.5045, "step": 4925 }, { "epoch": 1.378673383711167, "grad_norm": 0.2202228953655769, "learning_rate": 6.075189862945446e-05, "loss": 0.5023, "step": 4926 }, { "epoch": 1.3789532605653512, "grad_norm": 0.2359178000522644, "learning_rate": 6.073683331757696e-05, "loss": 0.5114, "step": 4927 }, { "epoch": 1.3792331374195355, "grad_norm": 0.21847078746225165, "learning_rate": 6.072176698376168e-05, "loss": 0.5042, "step": 4928 }, { "epoch": 1.3795130142737195, "grad_norm": 0.22474509016118469, "learning_rate": 6.070669962944261e-05, "loss": 0.4797, "step": 4929 }, { "epoch": 1.3797928911279036, "grad_norm": 0.21644842941771736, "learning_rate": 6.069163125605387e-05, "loss": 0.4917, "step": 4930 }, { "epoch": 1.380072767982088, "grad_norm": 0.2220378729056806, "learning_rate": 6.067656186502968e-05, "loss": 0.4936, "step": 4931 }, { "epoch": 1.380352644836272, "grad_norm": 0.22442868456390033, "learning_rate": 6.0661491457804365e-05, "loss": 0.5211, "step": 4932 }, { "epoch": 1.3806325216904562, "grad_norm": 0.2341319335577658, "learning_rate": 6.064642003581231e-05, "loss": 0.4931, "step": 4933 }, { "epoch": 1.3809123985446403, "grad_norm": 0.22816881579358053, "learning_rate": 6.0631347600488055e-05, "loss": 0.507, "step": 4934 }, { "epoch": 1.3811922753988246, "grad_norm": 0.21892281487675827, "learning_rate": 6.061627415326616e-05, "loss": 0.4975, "step": 4935 }, { "epoch": 1.3814721522530087, "grad_norm": 0.22137019103946362, "learning_rate": 6.0601199695581365e-05, "loss": 0.5011, "step": 4936 }, { "epoch": 1.3817520291071927, "grad_norm": 0.21767486976382813, "learning_rate": 6.058612422886847e-05, "loss": 0.5143, "step": 4937 }, { "epoch": 1.382031905961377, "grad_norm": 0.2219460822095807, "learning_rate": 6.057104775456233e-05, "loss": 0.4911, "step": 4938 }, { "epoch": 1.382311782815561, "grad_norm": 0.22738339474408734, "learning_rate": 6.055597027409794e-05, "loss": 0.5077, "step": 4939 }, { "epoch": 1.3825916596697452, "grad_norm": 0.22200654926719496, "learning_rate": 6.054089178891039e-05, "loss": 0.4902, "step": 4940 }, { "epoch": 1.3828715365239295, "grad_norm": 0.22430847631014658, "learning_rate": 6.052581230043487e-05, "loss": 0.4761, "step": 4941 }, { "epoch": 1.3831514133781138, "grad_norm": 0.2198314918943528, "learning_rate": 6.0510731810106635e-05, "loss": 0.5191, "step": 4942 }, { "epoch": 1.3834312902322978, "grad_norm": 0.23671993116774223, "learning_rate": 6.049565031936106e-05, "loss": 0.4956, "step": 4943 }, { "epoch": 1.383711167086482, "grad_norm": 0.2217175544356953, "learning_rate": 6.048056782963362e-05, "loss": 0.4867, "step": 4944 }, { "epoch": 1.3839910439406662, "grad_norm": 0.23732346326989096, "learning_rate": 6.046548434235986e-05, "loss": 0.5274, "step": 4945 }, { "epoch": 1.3842709207948503, "grad_norm": 0.22172463375245813, "learning_rate": 6.045039985897546e-05, "loss": 0.4908, "step": 4946 }, { "epoch": 1.3845507976490343, "grad_norm": 0.22860434100991603, "learning_rate": 6.043531438091614e-05, "loss": 0.5121, "step": 4947 }, { "epoch": 1.3848306745032186, "grad_norm": 0.23924233802678888, "learning_rate": 6.0420227909617754e-05, "loss": 0.5166, "step": 4948 }, { "epoch": 1.3851105513574027, "grad_norm": 0.22391349971425006, "learning_rate": 6.040514044651625e-05, "loss": 0.5166, "step": 4949 }, { "epoch": 1.385390428211587, "grad_norm": 0.21978485436046194, "learning_rate": 6.039005199304765e-05, "loss": 0.506, "step": 4950 }, { "epoch": 1.385670305065771, "grad_norm": 0.23175563724704498, "learning_rate": 6.037496255064808e-05, "loss": 0.5066, "step": 4951 }, { "epoch": 1.3859501819199553, "grad_norm": 0.23372162575135808, "learning_rate": 6.035987212075378e-05, "loss": 0.4748, "step": 4952 }, { "epoch": 1.3862300587741394, "grad_norm": 0.2352190843452797, "learning_rate": 6.034478070480104e-05, "loss": 0.5189, "step": 4953 }, { "epoch": 1.3865099356283235, "grad_norm": 0.22783618571883854, "learning_rate": 6.032968830422628e-05, "loss": 0.5101, "step": 4954 }, { "epoch": 1.3867898124825078, "grad_norm": 0.22408681222800939, "learning_rate": 6.0314594920466025e-05, "loss": 0.4963, "step": 4955 }, { "epoch": 1.3870696893366918, "grad_norm": 0.23597349527187633, "learning_rate": 6.0299500554956845e-05, "loss": 0.513, "step": 4956 }, { "epoch": 1.387349566190876, "grad_norm": 0.2447697911650183, "learning_rate": 6.028440520913544e-05, "loss": 0.5048, "step": 4957 }, { "epoch": 1.3876294430450602, "grad_norm": 0.2227897136060564, "learning_rate": 6.026930888443858e-05, "loss": 0.4852, "step": 4958 }, { "epoch": 1.3879093198992443, "grad_norm": 0.2322352415827976, "learning_rate": 6.0254211582303154e-05, "loss": 0.5065, "step": 4959 }, { "epoch": 1.3881891967534286, "grad_norm": 0.21090238770420058, "learning_rate": 6.0239113304166136e-05, "loss": 0.4699, "step": 4960 }, { "epoch": 1.3884690736076126, "grad_norm": 0.2226884079788672, "learning_rate": 6.022401405146457e-05, "loss": 0.4933, "step": 4961 }, { "epoch": 1.388748950461797, "grad_norm": 0.23210330842943613, "learning_rate": 6.0208913825635636e-05, "loss": 0.4949, "step": 4962 }, { "epoch": 1.389028827315981, "grad_norm": 0.21929677966562106, "learning_rate": 6.019381262811656e-05, "loss": 0.5, "step": 4963 }, { "epoch": 1.389308704170165, "grad_norm": 0.2301597174744963, "learning_rate": 6.017871046034471e-05, "loss": 0.5067, "step": 4964 }, { "epoch": 1.3895885810243493, "grad_norm": 0.22117009999570827, "learning_rate": 6.01636073237575e-05, "loss": 0.5129, "step": 4965 }, { "epoch": 1.3898684578785334, "grad_norm": 0.21357227768456047, "learning_rate": 6.014850321979244e-05, "loss": 0.4745, "step": 4966 }, { "epoch": 1.3901483347327175, "grad_norm": 0.2301716139823612, "learning_rate": 6.013339814988718e-05, "loss": 0.5154, "step": 4967 }, { "epoch": 1.3904282115869018, "grad_norm": 0.22082139725228092, "learning_rate": 6.0118292115479405e-05, "loss": 0.4911, "step": 4968 }, { "epoch": 1.3907080884410858, "grad_norm": 0.22858146113004257, "learning_rate": 6.010318511800691e-05, "loss": 0.489, "step": 4969 }, { "epoch": 1.3909879652952701, "grad_norm": 0.21903256391982898, "learning_rate": 6.0088077158907605e-05, "loss": 0.4786, "step": 4970 }, { "epoch": 1.3912678421494542, "grad_norm": 0.22212827669015747, "learning_rate": 6.007296823961947e-05, "loss": 0.4959, "step": 4971 }, { "epoch": 1.3915477190036385, "grad_norm": 0.22735127002345915, "learning_rate": 6.005785836158059e-05, "loss": 0.5067, "step": 4972 }, { "epoch": 1.3918275958578226, "grad_norm": 0.22553672459643198, "learning_rate": 6.004274752622913e-05, "loss": 0.5012, "step": 4973 }, { "epoch": 1.3921074727120066, "grad_norm": 0.2315360169513721, "learning_rate": 6.0027635735003316e-05, "loss": 0.5126, "step": 4974 }, { "epoch": 1.392387349566191, "grad_norm": 0.224431775744776, "learning_rate": 6.001252298934154e-05, "loss": 0.4965, "step": 4975 }, { "epoch": 1.392667226420375, "grad_norm": 0.2181392716101542, "learning_rate": 5.9997409290682206e-05, "loss": 0.4878, "step": 4976 }, { "epoch": 1.392947103274559, "grad_norm": 0.23699397041647086, "learning_rate": 5.9982294640463865e-05, "loss": 0.5045, "step": 4977 }, { "epoch": 1.3932269801287434, "grad_norm": 0.22519207347435027, "learning_rate": 5.996717904012514e-05, "loss": 0.4924, "step": 4978 }, { "epoch": 1.3935068569829276, "grad_norm": 0.23364550992987365, "learning_rate": 5.995206249110472e-05, "loss": 0.5084, "step": 4979 }, { "epoch": 1.3937867338371117, "grad_norm": 0.2128467269144305, "learning_rate": 5.993694499484143e-05, "loss": 0.4656, "step": 4980 }, { "epoch": 1.3940666106912958, "grad_norm": 0.2186018293630736, "learning_rate": 5.992182655277415e-05, "loss": 0.4947, "step": 4981 }, { "epoch": 1.39434648754548, "grad_norm": 0.2105537455267023, "learning_rate": 5.990670716634188e-05, "loss": 0.4693, "step": 4982 }, { "epoch": 1.3946263643996641, "grad_norm": 0.22416727318435012, "learning_rate": 5.989158683698366e-05, "loss": 0.5024, "step": 4983 }, { "epoch": 1.3949062412538482, "grad_norm": 0.2234911347900639, "learning_rate": 5.987646556613868e-05, "loss": 0.4814, "step": 4984 }, { "epoch": 1.3951861181080325, "grad_norm": 0.22608029505877594, "learning_rate": 5.986134335524617e-05, "loss": 0.4851, "step": 4985 }, { "epoch": 1.3954659949622166, "grad_norm": 0.21696160680906468, "learning_rate": 5.9846220205745486e-05, "loss": 0.4886, "step": 4986 }, { "epoch": 1.3957458718164009, "grad_norm": 0.22680582852428668, "learning_rate": 5.9831096119076034e-05, "loss": 0.486, "step": 4987 }, { "epoch": 1.396025748670585, "grad_norm": 0.2123468287750178, "learning_rate": 5.981597109667737e-05, "loss": 0.5121, "step": 4988 }, { "epoch": 1.3963056255247692, "grad_norm": 0.2379904396157953, "learning_rate": 5.980084513998908e-05, "loss": 0.5206, "step": 4989 }, { "epoch": 1.3965855023789533, "grad_norm": 0.22597013044968492, "learning_rate": 5.978571825045086e-05, "loss": 0.492, "step": 4990 }, { "epoch": 1.3968653792331374, "grad_norm": 0.23010187260971798, "learning_rate": 5.9770590429502516e-05, "loss": 0.5024, "step": 4991 }, { "epoch": 1.3971452560873217, "grad_norm": 0.21997489110166854, "learning_rate": 5.975546167858389e-05, "loss": 0.4809, "step": 4992 }, { "epoch": 1.3974251329415057, "grad_norm": 0.21762511882155733, "learning_rate": 5.974033199913496e-05, "loss": 0.5147, "step": 4993 }, { "epoch": 1.3977050097956898, "grad_norm": 0.2124238924762238, "learning_rate": 5.9725201392595785e-05, "loss": 0.4521, "step": 4994 }, { "epoch": 1.397984886649874, "grad_norm": 0.22673216640657248, "learning_rate": 5.971006986040649e-05, "loss": 0.4995, "step": 4995 }, { "epoch": 1.3982647635040582, "grad_norm": 0.21788438437220065, "learning_rate": 5.9694937404007304e-05, "loss": 0.506, "step": 4996 }, { "epoch": 1.3985446403582424, "grad_norm": 0.22551416929690996, "learning_rate": 5.967980402483856e-05, "loss": 0.5019, "step": 4997 }, { "epoch": 1.3988245172124265, "grad_norm": 0.28356064717799917, "learning_rate": 5.966466972434065e-05, "loss": 0.515, "step": 4998 }, { "epoch": 1.3991043940666108, "grad_norm": 0.2473033755647614, "learning_rate": 5.9649534503954075e-05, "loss": 0.5146, "step": 4999 }, { "epoch": 1.3993842709207949, "grad_norm": 0.23527890407280222, "learning_rate": 5.96343983651194e-05, "loss": 0.5275, "step": 5000 }, { "epoch": 1.399664147774979, "grad_norm": 0.23767422093574717, "learning_rate": 5.9619261309277296e-05, "loss": 0.4832, "step": 5001 }, { "epoch": 1.3999440246291632, "grad_norm": 0.2196809284577659, "learning_rate": 5.960412333786851e-05, "loss": 0.4906, "step": 5002 }, { "epoch": 1.4002239014833473, "grad_norm": 0.22609996239196942, "learning_rate": 5.9588984452333894e-05, "loss": 0.5003, "step": 5003 }, { "epoch": 1.4005037783375314, "grad_norm": 0.22196006951410982, "learning_rate": 5.9573844654114366e-05, "loss": 0.511, "step": 5004 }, { "epoch": 1.4007836551917157, "grad_norm": 0.2299174173773503, "learning_rate": 5.955870394465094e-05, "loss": 0.4924, "step": 5005 }, { "epoch": 1.4010635320458997, "grad_norm": 0.223593199641648, "learning_rate": 5.954356232538474e-05, "loss": 0.493, "step": 5006 }, { "epoch": 1.401343408900084, "grad_norm": 0.2234269638659027, "learning_rate": 5.952841979775692e-05, "loss": 0.5018, "step": 5007 }, { "epoch": 1.401623285754268, "grad_norm": 0.23371931602661175, "learning_rate": 5.9513276363208784e-05, "loss": 0.5214, "step": 5008 }, { "epoch": 1.4019031626084524, "grad_norm": 0.22229756297983255, "learning_rate": 5.9498132023181665e-05, "loss": 0.4972, "step": 5009 }, { "epoch": 1.4021830394626364, "grad_norm": 0.21886655329394156, "learning_rate": 5.948298677911705e-05, "loss": 0.486, "step": 5010 }, { "epoch": 1.4024629163168205, "grad_norm": 0.22710976531252083, "learning_rate": 5.946784063245642e-05, "loss": 0.4989, "step": 5011 }, { "epoch": 1.4027427931710048, "grad_norm": 0.23140190148043827, "learning_rate": 5.945269358464143e-05, "loss": 0.5041, "step": 5012 }, { "epoch": 1.4030226700251889, "grad_norm": 0.21961096827576404, "learning_rate": 5.943754563711376e-05, "loss": 0.4905, "step": 5013 }, { "epoch": 1.403302546879373, "grad_norm": 0.22983839026146266, "learning_rate": 5.942239679131521e-05, "loss": 0.496, "step": 5014 }, { "epoch": 1.4035824237335572, "grad_norm": 0.22969809349749876, "learning_rate": 5.9407247048687675e-05, "loss": 0.5194, "step": 5015 }, { "epoch": 1.4038623005877415, "grad_norm": 0.22027628783970996, "learning_rate": 5.93920964106731e-05, "loss": 0.4902, "step": 5016 }, { "epoch": 1.4041421774419256, "grad_norm": 0.2214674881750312, "learning_rate": 5.9376944878713536e-05, "loss": 0.4973, "step": 5017 }, { "epoch": 1.4044220542961097, "grad_norm": 0.21576432232474238, "learning_rate": 5.9361792454251094e-05, "loss": 0.4857, "step": 5018 }, { "epoch": 1.404701931150294, "grad_norm": 0.22104374925337936, "learning_rate": 5.934663913872803e-05, "loss": 0.484, "step": 5019 }, { "epoch": 1.404981808004478, "grad_norm": 0.22502827769670583, "learning_rate": 5.933148493358661e-05, "loss": 0.5049, "step": 5020 }, { "epoch": 1.405261684858662, "grad_norm": 0.23289360228746306, "learning_rate": 5.9316329840269226e-05, "loss": 0.4844, "step": 5021 }, { "epoch": 1.4055415617128464, "grad_norm": 0.21772145567277384, "learning_rate": 5.930117386021835e-05, "loss": 0.5249, "step": 5022 }, { "epoch": 1.4058214385670305, "grad_norm": 0.2177600665874925, "learning_rate": 5.9286016994876536e-05, "loss": 0.4923, "step": 5023 }, { "epoch": 1.4061013154212147, "grad_norm": 0.23106005486503775, "learning_rate": 5.9270859245686436e-05, "loss": 0.5098, "step": 5024 }, { "epoch": 1.4063811922753988, "grad_norm": 0.21462041939366736, "learning_rate": 5.925570061409077e-05, "loss": 0.4775, "step": 5025 }, { "epoch": 1.406661069129583, "grad_norm": 0.22339497204583186, "learning_rate": 5.924054110153233e-05, "loss": 0.507, "step": 5026 }, { "epoch": 1.4069409459837672, "grad_norm": 0.22864504868173752, "learning_rate": 5.922538070945402e-05, "loss": 0.481, "step": 5027 }, { "epoch": 1.4072208228379512, "grad_norm": 0.22863576947528313, "learning_rate": 5.921021943929882e-05, "loss": 0.4889, "step": 5028 }, { "epoch": 1.4075006996921355, "grad_norm": 0.23413466753896148, "learning_rate": 5.919505729250977e-05, "loss": 0.5141, "step": 5029 }, { "epoch": 1.4077805765463196, "grad_norm": 0.2209986247340032, "learning_rate": 5.917989427053e-05, "loss": 0.4662, "step": 5030 }, { "epoch": 1.4080604534005037, "grad_norm": 0.23618778371542407, "learning_rate": 5.916473037480278e-05, "loss": 0.5019, "step": 5031 }, { "epoch": 1.408340330254688, "grad_norm": 0.22161486533350255, "learning_rate": 5.9149565606771386e-05, "loss": 0.491, "step": 5032 }, { "epoch": 1.408620207108872, "grad_norm": 0.23550043411825708, "learning_rate": 5.913439996787922e-05, "loss": 0.5119, "step": 5033 }, { "epoch": 1.4089000839630563, "grad_norm": 0.2290820612203507, "learning_rate": 5.9119233459569745e-05, "loss": 0.4965, "step": 5034 }, { "epoch": 1.4091799608172404, "grad_norm": 0.2273188319385308, "learning_rate": 5.910406608328652e-05, "loss": 0.4791, "step": 5035 }, { "epoch": 1.4094598376714247, "grad_norm": 0.2315924812143491, "learning_rate": 5.90888978404732e-05, "loss": 0.4968, "step": 5036 }, { "epoch": 1.4097397145256088, "grad_norm": 0.22312571758129626, "learning_rate": 5.907372873257351e-05, "loss": 0.5036, "step": 5037 }, { "epoch": 1.4100195913797928, "grad_norm": 0.23115034375508464, "learning_rate": 5.905855876103122e-05, "loss": 0.4971, "step": 5038 }, { "epoch": 1.4102994682339771, "grad_norm": 0.23239591964968542, "learning_rate": 5.9043387927290236e-05, "loss": 0.4688, "step": 5039 }, { "epoch": 1.4105793450881612, "grad_norm": 0.23394890424832476, "learning_rate": 5.902821623279453e-05, "loss": 0.4916, "step": 5040 }, { "epoch": 1.4108592219423453, "grad_norm": 0.22836562675520436, "learning_rate": 5.901304367898815e-05, "loss": 0.4986, "step": 5041 }, { "epoch": 1.4111390987965295, "grad_norm": 0.2570095442819792, "learning_rate": 5.8997870267315234e-05, "loss": 0.5019, "step": 5042 }, { "epoch": 1.4114189756507136, "grad_norm": 0.22527101983838463, "learning_rate": 5.898269599921998e-05, "loss": 0.4874, "step": 5043 }, { "epoch": 1.411698852504898, "grad_norm": 0.2286594264095889, "learning_rate": 5.896752087614671e-05, "loss": 0.5181, "step": 5044 }, { "epoch": 1.411978729359082, "grad_norm": 0.22783639873794284, "learning_rate": 5.895234489953977e-05, "loss": 0.4997, "step": 5045 }, { "epoch": 1.4122586062132663, "grad_norm": 0.22781834355265154, "learning_rate": 5.893716807084364e-05, "loss": 0.4912, "step": 5046 }, { "epoch": 1.4125384830674503, "grad_norm": 0.23013603717321154, "learning_rate": 5.8921990391502845e-05, "loss": 0.5124, "step": 5047 }, { "epoch": 1.4128183599216344, "grad_norm": 0.2254745207543635, "learning_rate": 5.8906811862962005e-05, "loss": 0.5052, "step": 5048 }, { "epoch": 1.4130982367758187, "grad_norm": 0.22267036138148985, "learning_rate": 5.889163248666583e-05, "loss": 0.505, "step": 5049 }, { "epoch": 1.4133781136300028, "grad_norm": 0.2241121222711816, "learning_rate": 5.88764522640591e-05, "loss": 0.5208, "step": 5050 }, { "epoch": 1.4136579904841868, "grad_norm": 0.2221941014441487, "learning_rate": 5.8861271196586676e-05, "loss": 0.4722, "step": 5051 }, { "epoch": 1.4139378673383711, "grad_norm": 0.2280223143118141, "learning_rate": 5.8846089285693495e-05, "loss": 0.5033, "step": 5052 }, { "epoch": 1.4142177441925552, "grad_norm": 0.2253822856763662, "learning_rate": 5.8830906532824594e-05, "loss": 0.5158, "step": 5053 }, { "epoch": 1.4144976210467395, "grad_norm": 0.2191644446815118, "learning_rate": 5.881572293942506e-05, "loss": 0.4695, "step": 5054 }, { "epoch": 1.4147774979009236, "grad_norm": 0.21817385434715142, "learning_rate": 5.8800538506940096e-05, "loss": 0.4846, "step": 5055 }, { "epoch": 1.4150573747551078, "grad_norm": 0.2238080059259505, "learning_rate": 5.878535323681492e-05, "loss": 0.4902, "step": 5056 }, { "epoch": 1.415337251609292, "grad_norm": 0.2217783404490955, "learning_rate": 5.8770167130494924e-05, "loss": 0.5072, "step": 5057 }, { "epoch": 1.415617128463476, "grad_norm": 0.21282959711668764, "learning_rate": 5.875498018942551e-05, "loss": 0.4763, "step": 5058 }, { "epoch": 1.4158970053176603, "grad_norm": 0.22241242725168442, "learning_rate": 5.873979241505218e-05, "loss": 0.4931, "step": 5059 }, { "epoch": 1.4161768821718443, "grad_norm": 0.22465901428241183, "learning_rate": 5.8724603808820525e-05, "loss": 0.4983, "step": 5060 }, { "epoch": 1.4164567590260284, "grad_norm": 0.23378838641919514, "learning_rate": 5.870941437217618e-05, "loss": 0.53, "step": 5061 }, { "epoch": 1.4167366358802127, "grad_norm": 0.2206834054188844, "learning_rate": 5.869422410656492e-05, "loss": 0.4919, "step": 5062 }, { "epoch": 1.417016512734397, "grad_norm": 0.21967802320622712, "learning_rate": 5.867903301343253e-05, "loss": 0.4706, "step": 5063 }, { "epoch": 1.417296389588581, "grad_norm": 0.23081947704695172, "learning_rate": 5.866384109422494e-05, "loss": 0.487, "step": 5064 }, { "epoch": 1.4175762664427651, "grad_norm": 0.22330659531424463, "learning_rate": 5.864864835038807e-05, "loss": 0.5036, "step": 5065 }, { "epoch": 1.4178561432969494, "grad_norm": 0.21905444711381003, "learning_rate": 5.8633454783368034e-05, "loss": 0.5101, "step": 5066 }, { "epoch": 1.4181360201511335, "grad_norm": 0.22362906053773424, "learning_rate": 5.8618260394610923e-05, "loss": 0.5005, "step": 5067 }, { "epoch": 1.4184158970053176, "grad_norm": 0.22619063934025008, "learning_rate": 5.8603065185562976e-05, "loss": 0.5098, "step": 5068 }, { "epoch": 1.4186957738595019, "grad_norm": 0.22980843456710603, "learning_rate": 5.858786915767047e-05, "loss": 0.4836, "step": 5069 }, { "epoch": 1.418975650713686, "grad_norm": 0.22520190570306362, "learning_rate": 5.8572672312379773e-05, "loss": 0.4856, "step": 5070 }, { "epoch": 1.4192555275678702, "grad_norm": 0.23043142938009362, "learning_rate": 5.855747465113731e-05, "loss": 0.5054, "step": 5071 }, { "epoch": 1.4195354044220543, "grad_norm": 0.22230338189102747, "learning_rate": 5.8542276175389635e-05, "loss": 0.5231, "step": 5072 }, { "epoch": 1.4198152812762386, "grad_norm": 0.23568286790250229, "learning_rate": 5.852707688658334e-05, "loss": 0.5045, "step": 5073 }, { "epoch": 1.4200951581304226, "grad_norm": 0.22574382109006813, "learning_rate": 5.851187678616508e-05, "loss": 0.5107, "step": 5074 }, { "epoch": 1.4203750349846067, "grad_norm": 0.23709819701337603, "learning_rate": 5.849667587558162e-05, "loss": 0.4989, "step": 5075 }, { "epoch": 1.420654911838791, "grad_norm": 0.2286566361517191, "learning_rate": 5.84814741562798e-05, "loss": 0.5202, "step": 5076 }, { "epoch": 1.420934788692975, "grad_norm": 0.2204586319662191, "learning_rate": 5.8466271629706526e-05, "loss": 0.5218, "step": 5077 }, { "epoch": 1.4212146655471591, "grad_norm": 0.2149152439134319, "learning_rate": 5.8451068297308774e-05, "loss": 0.4962, "step": 5078 }, { "epoch": 1.4214945424013434, "grad_norm": 0.22085380067325006, "learning_rate": 5.843586416053362e-05, "loss": 0.4947, "step": 5079 }, { "epoch": 1.4217744192555275, "grad_norm": 0.2174060073356206, "learning_rate": 5.842065922082818e-05, "loss": 0.4787, "step": 5080 }, { "epoch": 1.4220542961097118, "grad_norm": 0.2255742449065406, "learning_rate": 5.8405453479639684e-05, "loss": 0.4813, "step": 5081 }, { "epoch": 1.4223341729638959, "grad_norm": 0.2227538236941638, "learning_rate": 5.839024693841543e-05, "loss": 0.4914, "step": 5082 }, { "epoch": 1.4226140498180802, "grad_norm": 0.2082428477566117, "learning_rate": 5.8375039598602774e-05, "loss": 0.5206, "step": 5083 }, { "epoch": 1.4228939266722642, "grad_norm": 0.22473516709512023, "learning_rate": 5.835983146164916e-05, "loss": 0.4884, "step": 5084 }, { "epoch": 1.4231738035264483, "grad_norm": 0.23382157834934864, "learning_rate": 5.8344622529002105e-05, "loss": 0.4924, "step": 5085 }, { "epoch": 1.4234536803806326, "grad_norm": 0.2308118760235207, "learning_rate": 5.8329412802109203e-05, "loss": 0.4941, "step": 5086 }, { "epoch": 1.4237335572348166, "grad_norm": 0.2344467641014014, "learning_rate": 5.831420228241814e-05, "loss": 0.5103, "step": 5087 }, { "epoch": 1.4240134340890007, "grad_norm": 0.2300614074466648, "learning_rate": 5.829899097137664e-05, "loss": 0.5031, "step": 5088 }, { "epoch": 1.424293310943185, "grad_norm": 0.22412137344537142, "learning_rate": 5.828377887043255e-05, "loss": 0.4917, "step": 5089 }, { "epoch": 1.424573187797369, "grad_norm": 0.2457724772272532, "learning_rate": 5.826856598103374e-05, "loss": 0.5309, "step": 5090 }, { "epoch": 1.4248530646515534, "grad_norm": 0.22946844825061005, "learning_rate": 5.8253352304628185e-05, "loss": 0.4976, "step": 5091 }, { "epoch": 1.4251329415057374, "grad_norm": 0.22740190593370493, "learning_rate": 5.823813784266394e-05, "loss": 0.4988, "step": 5092 }, { "epoch": 1.4254128183599217, "grad_norm": 0.21465493384224607, "learning_rate": 5.822292259658914e-05, "loss": 0.4791, "step": 5093 }, { "epoch": 1.4256926952141058, "grad_norm": 0.22384317574679752, "learning_rate": 5.820770656785195e-05, "loss": 0.479, "step": 5094 }, { "epoch": 1.4259725720682899, "grad_norm": 0.2330975977727562, "learning_rate": 5.819248975790066e-05, "loss": 0.5108, "step": 5095 }, { "epoch": 1.4262524489224742, "grad_norm": 0.22474847282534643, "learning_rate": 5.81772721681836e-05, "loss": 0.4877, "step": 5096 }, { "epoch": 1.4265323257766582, "grad_norm": 0.2266302760241128, "learning_rate": 5.816205380014921e-05, "loss": 0.4988, "step": 5097 }, { "epoch": 1.4268122026308423, "grad_norm": 0.2272754251670803, "learning_rate": 5.814683465524596e-05, "loss": 0.4779, "step": 5098 }, { "epoch": 1.4270920794850266, "grad_norm": 0.2257802828793489, "learning_rate": 5.813161473492245e-05, "loss": 0.498, "step": 5099 }, { "epoch": 1.4273719563392109, "grad_norm": 0.22761805180770237, "learning_rate": 5.811639404062727e-05, "loss": 0.4931, "step": 5100 }, { "epoch": 1.427651833193395, "grad_norm": 0.2215397250599594, "learning_rate": 5.8101172573809184e-05, "loss": 0.5294, "step": 5101 }, { "epoch": 1.427931710047579, "grad_norm": 0.22818676960933698, "learning_rate": 5.808595033591694e-05, "loss": 0.4994, "step": 5102 }, { "epoch": 1.4282115869017633, "grad_norm": 0.21807604927218227, "learning_rate": 5.8070727328399424e-05, "loss": 0.4853, "step": 5103 }, { "epoch": 1.4284914637559474, "grad_norm": 0.227821285830176, "learning_rate": 5.805550355270557e-05, "loss": 0.4844, "step": 5104 }, { "epoch": 1.4287713406101314, "grad_norm": 0.2330054532629367, "learning_rate": 5.804027901028437e-05, "loss": 0.4992, "step": 5105 }, { "epoch": 1.4290512174643157, "grad_norm": 0.22922692229530123, "learning_rate": 5.802505370258492e-05, "loss": 0.4881, "step": 5106 }, { "epoch": 1.4293310943184998, "grad_norm": 0.21999471584967822, "learning_rate": 5.8009827631056366e-05, "loss": 0.5016, "step": 5107 }, { "epoch": 1.429610971172684, "grad_norm": 0.22227379808463635, "learning_rate": 5.799460079714793e-05, "loss": 0.4926, "step": 5108 }, { "epoch": 1.4298908480268682, "grad_norm": 0.2160585080824326, "learning_rate": 5.7979373202308916e-05, "loss": 0.4996, "step": 5109 }, { "epoch": 1.4301707248810525, "grad_norm": 0.22022489562667785, "learning_rate": 5.79641448479887e-05, "loss": 0.5135, "step": 5110 }, { "epoch": 1.4304506017352365, "grad_norm": 0.21799375721861491, "learning_rate": 5.794891573563671e-05, "loss": 0.5084, "step": 5111 }, { "epoch": 1.4307304785894206, "grad_norm": 0.21586980033273376, "learning_rate": 5.793368586670248e-05, "loss": 0.4667, "step": 5112 }, { "epoch": 1.4310103554436049, "grad_norm": 0.21824290616845962, "learning_rate": 5.791845524263559e-05, "loss": 0.5119, "step": 5113 }, { "epoch": 1.431290232297789, "grad_norm": 0.23236991119315753, "learning_rate": 5.790322386488569e-05, "loss": 0.4889, "step": 5114 }, { "epoch": 1.431570109151973, "grad_norm": 0.2175335504781246, "learning_rate": 5.788799173490252e-05, "loss": 0.5001, "step": 5115 }, { "epoch": 1.4318499860061573, "grad_norm": 0.2195394483250366, "learning_rate": 5.787275885413589e-05, "loss": 0.4928, "step": 5116 }, { "epoch": 1.4321298628603414, "grad_norm": 0.22265226840877025, "learning_rate": 5.7857525224035655e-05, "loss": 0.4874, "step": 5117 }, { "epoch": 1.4324097397145257, "grad_norm": 0.22134877298355587, "learning_rate": 5.7842290846051784e-05, "loss": 0.4928, "step": 5118 }, { "epoch": 1.4326896165687097, "grad_norm": 0.2347206470548645, "learning_rate": 5.782705572163427e-05, "loss": 0.5051, "step": 5119 }, { "epoch": 1.432969493422894, "grad_norm": 0.40032604481325523, "learning_rate": 5.781181985223322e-05, "loss": 0.4919, "step": 5120 }, { "epoch": 1.433249370277078, "grad_norm": 0.23219106263107278, "learning_rate": 5.7796583239298776e-05, "loss": 0.4809, "step": 5121 }, { "epoch": 1.4335292471312622, "grad_norm": 0.23372318208069728, "learning_rate": 5.7781345884281165e-05, "loss": 0.5083, "step": 5122 }, { "epoch": 1.4338091239854465, "grad_norm": 0.2205676570277828, "learning_rate": 5.776610778863072e-05, "loss": 0.4899, "step": 5123 }, { "epoch": 1.4340890008396305, "grad_norm": 0.23020409363981567, "learning_rate": 5.775086895379778e-05, "loss": 0.4706, "step": 5124 }, { "epoch": 1.4343688776938146, "grad_norm": 0.23267482291393507, "learning_rate": 5.7735629381232795e-05, "loss": 0.5059, "step": 5125 }, { "epoch": 1.434648754547999, "grad_norm": 0.2328435937625657, "learning_rate": 5.772038907238627e-05, "loss": 0.5225, "step": 5126 }, { "epoch": 1.434928631402183, "grad_norm": 0.22604008472819107, "learning_rate": 5.770514802870879e-05, "loss": 0.4991, "step": 5127 }, { "epoch": 1.4352085082563673, "grad_norm": 0.21336922426089364, "learning_rate": 5.7689906251651016e-05, "loss": 0.474, "step": 5128 }, { "epoch": 1.4354883851105513, "grad_norm": 0.23368535426931242, "learning_rate": 5.767466374266366e-05, "loss": 0.4804, "step": 5129 }, { "epoch": 1.4357682619647356, "grad_norm": 0.23429336490785493, "learning_rate": 5.7659420503197514e-05, "loss": 0.4892, "step": 5130 }, { "epoch": 1.4360481388189197, "grad_norm": 0.22699847341582347, "learning_rate": 5.764417653470343e-05, "loss": 0.496, "step": 5131 }, { "epoch": 1.4363280156731038, "grad_norm": 0.2176473038796731, "learning_rate": 5.762893183863235e-05, "loss": 0.4854, "step": 5132 }, { "epoch": 1.436607892527288, "grad_norm": 0.22785538450447065, "learning_rate": 5.7613686416435273e-05, "loss": 0.5282, "step": 5133 }, { "epoch": 1.4368877693814721, "grad_norm": 0.22131021495121425, "learning_rate": 5.7598440269563245e-05, "loss": 0.4966, "step": 5134 }, { "epoch": 1.4371676462356562, "grad_norm": 0.23120351661154448, "learning_rate": 5.758319339946744e-05, "loss": 0.5099, "step": 5135 }, { "epoch": 1.4374475230898405, "grad_norm": 0.23538010852021032, "learning_rate": 5.7567945807599035e-05, "loss": 0.4718, "step": 5136 }, { "epoch": 1.4377273999440248, "grad_norm": 0.22925175083253585, "learning_rate": 5.7552697495409304e-05, "loss": 0.4831, "step": 5137 }, { "epoch": 1.4380072767982088, "grad_norm": 0.22124896147976106, "learning_rate": 5.753744846434961e-05, "loss": 0.4856, "step": 5138 }, { "epoch": 1.438287153652393, "grad_norm": 0.21536526251900118, "learning_rate": 5.752219871587134e-05, "loss": 0.4927, "step": 5139 }, { "epoch": 1.4385670305065772, "grad_norm": 0.2332442620797546, "learning_rate": 5.7506948251426e-05, "loss": 0.4887, "step": 5140 }, { "epoch": 1.4388469073607613, "grad_norm": 0.21917960264428496, "learning_rate": 5.7491697072465114e-05, "loss": 0.5107, "step": 5141 }, { "epoch": 1.4391267842149453, "grad_norm": 0.2332156472110391, "learning_rate": 5.74764451804403e-05, "loss": 0.5142, "step": 5142 }, { "epoch": 1.4394066610691296, "grad_norm": 0.21790915008402828, "learning_rate": 5.7461192576803256e-05, "loss": 0.4878, "step": 5143 }, { "epoch": 1.4396865379233137, "grad_norm": 0.22261157577003818, "learning_rate": 5.7445939263005734e-05, "loss": 0.4902, "step": 5144 }, { "epoch": 1.439966414777498, "grad_norm": 0.21779936210905748, "learning_rate": 5.743068524049954e-05, "loss": 0.5062, "step": 5145 }, { "epoch": 1.440246291631682, "grad_norm": 0.22274628015121947, "learning_rate": 5.7415430510736555e-05, "loss": 0.5088, "step": 5146 }, { "epoch": 1.4405261684858663, "grad_norm": 0.22570440775155368, "learning_rate": 5.740017507516876e-05, "loss": 0.4709, "step": 5147 }, { "epoch": 1.4408060453400504, "grad_norm": 0.21426655450686172, "learning_rate": 5.738491893524816e-05, "loss": 0.5096, "step": 5148 }, { "epoch": 1.4410859221942345, "grad_norm": 0.21895351701900131, "learning_rate": 5.736966209242682e-05, "loss": 0.5018, "step": 5149 }, { "epoch": 1.4413657990484188, "grad_norm": 0.21923286726146438, "learning_rate": 5.735440454815694e-05, "loss": 0.4973, "step": 5150 }, { "epoch": 1.4416456759026028, "grad_norm": 0.21408259235103064, "learning_rate": 5.73391463038907e-05, "loss": 0.4773, "step": 5151 }, { "epoch": 1.441925552756787, "grad_norm": 0.21924257695252328, "learning_rate": 5.732388736108042e-05, "loss": 0.5044, "step": 5152 }, { "epoch": 1.4422054296109712, "grad_norm": 0.22754723786755976, "learning_rate": 5.730862772117844e-05, "loss": 0.4802, "step": 5153 }, { "epoch": 1.4424853064651553, "grad_norm": 0.22664994828210788, "learning_rate": 5.7293367385637186e-05, "loss": 0.4974, "step": 5154 }, { "epoch": 1.4427651833193396, "grad_norm": 0.22815602094981982, "learning_rate": 5.7278106355909136e-05, "loss": 0.4773, "step": 5155 }, { "epoch": 1.4430450601735236, "grad_norm": 0.22836029867656918, "learning_rate": 5.726284463344686e-05, "loss": 0.4879, "step": 5156 }, { "epoch": 1.443324937027708, "grad_norm": 0.21959018773379563, "learning_rate": 5.7247582219702946e-05, "loss": 0.4524, "step": 5157 }, { "epoch": 1.443604813881892, "grad_norm": 0.24105814344774842, "learning_rate": 5.723231911613012e-05, "loss": 0.5236, "step": 5158 }, { "epoch": 1.443884690736076, "grad_norm": 0.2173295053647425, "learning_rate": 5.72170553241811e-05, "loss": 0.4619, "step": 5159 }, { "epoch": 1.4441645675902604, "grad_norm": 0.2348393430708412, "learning_rate": 5.720179084530871e-05, "loss": 0.5036, "step": 5160 }, { "epoch": 1.4444444444444444, "grad_norm": 0.226209054313454, "learning_rate": 5.718652568096585e-05, "loss": 0.4984, "step": 5161 }, { "epoch": 1.4447243212986285, "grad_norm": 0.23282268334346848, "learning_rate": 5.717125983260545e-05, "loss": 0.5118, "step": 5162 }, { "epoch": 1.4450041981528128, "grad_norm": 0.2203782380160922, "learning_rate": 5.715599330168052e-05, "loss": 0.4869, "step": 5163 }, { "epoch": 1.4452840750069968, "grad_norm": 0.21882686743677007, "learning_rate": 5.714072608964415e-05, "loss": 0.4922, "step": 5164 }, { "epoch": 1.4455639518611811, "grad_norm": 0.2278236064292984, "learning_rate": 5.7125458197949464e-05, "loss": 0.5061, "step": 5165 }, { "epoch": 1.4458438287153652, "grad_norm": 0.22791485905826125, "learning_rate": 5.711018962804968e-05, "loss": 0.4962, "step": 5166 }, { "epoch": 1.4461237055695495, "grad_norm": 0.2251294062949208, "learning_rate": 5.7094920381398075e-05, "loss": 0.4799, "step": 5167 }, { "epoch": 1.4464035824237336, "grad_norm": 0.22602341455673444, "learning_rate": 5.7079650459447975e-05, "loss": 0.5078, "step": 5168 }, { "epoch": 1.4466834592779176, "grad_norm": 0.21981970982390575, "learning_rate": 5.706437986365278e-05, "loss": 0.4822, "step": 5169 }, { "epoch": 1.446963336132102, "grad_norm": 0.22519556170862506, "learning_rate": 5.704910859546595e-05, "loss": 0.4918, "step": 5170 }, { "epoch": 1.447243212986286, "grad_norm": 0.21841411859899734, "learning_rate": 5.703383665634101e-05, "loss": 0.4779, "step": 5171 }, { "epoch": 1.44752308984047, "grad_norm": 0.221552263703885, "learning_rate": 5.701856404773159e-05, "loss": 0.5063, "step": 5172 }, { "epoch": 1.4478029666946544, "grad_norm": 0.2194652347724914, "learning_rate": 5.70032907710913e-05, "loss": 0.4858, "step": 5173 }, { "epoch": 1.4480828435488386, "grad_norm": 0.21714844506779132, "learning_rate": 5.698801682787387e-05, "loss": 0.4994, "step": 5174 }, { "epoch": 1.4483627204030227, "grad_norm": 0.2239726565976733, "learning_rate": 5.697274221953309e-05, "loss": 0.4913, "step": 5175 }, { "epoch": 1.4486425972572068, "grad_norm": 0.2184591734505271, "learning_rate": 5.695746694752281e-05, "loss": 0.518, "step": 5176 }, { "epoch": 1.448922474111391, "grad_norm": 0.21352170695384015, "learning_rate": 5.694219101329692e-05, "loss": 0.4755, "step": 5177 }, { "epoch": 1.4492023509655751, "grad_norm": 0.2298478154080551, "learning_rate": 5.692691441830941e-05, "loss": 0.5125, "step": 5178 }, { "epoch": 1.4494822278197592, "grad_norm": 0.22984436924526433, "learning_rate": 5.691163716401431e-05, "loss": 0.5096, "step": 5179 }, { "epoch": 1.4497621046739435, "grad_norm": 0.2154386140166561, "learning_rate": 5.6896359251865695e-05, "loss": 0.4629, "step": 5180 }, { "epoch": 1.4500419815281276, "grad_norm": 0.22045415166240703, "learning_rate": 5.688108068331778e-05, "loss": 0.4747, "step": 5181 }, { "epoch": 1.4503218583823116, "grad_norm": 0.22369895885338856, "learning_rate": 5.686580145982473e-05, "loss": 0.5064, "step": 5182 }, { "epoch": 1.450601735236496, "grad_norm": 0.23857189418531322, "learning_rate": 5.685052158284087e-05, "loss": 0.5091, "step": 5183 }, { "epoch": 1.4508816120906802, "grad_norm": 0.23972159953548952, "learning_rate": 5.683524105382052e-05, "loss": 0.4993, "step": 5184 }, { "epoch": 1.4511614889448643, "grad_norm": 0.22379421206513053, "learning_rate": 5.6819959874218106e-05, "loss": 0.4961, "step": 5185 }, { "epoch": 1.4514413657990484, "grad_norm": 0.23106690502050894, "learning_rate": 5.680467804548809e-05, "loss": 0.5074, "step": 5186 }, { "epoch": 1.4517212426532327, "grad_norm": 0.21852188404279954, "learning_rate": 5.678939556908501e-05, "loss": 0.4707, "step": 5187 }, { "epoch": 1.4520011195074167, "grad_norm": 0.2298226129085423, "learning_rate": 5.6774112446463465e-05, "loss": 0.4899, "step": 5188 }, { "epoch": 1.4522809963616008, "grad_norm": 0.2368934336936178, "learning_rate": 5.675882867907809e-05, "loss": 0.5274, "step": 5189 }, { "epoch": 1.452560873215785, "grad_norm": 0.23342324032605585, "learning_rate": 5.674354426838364e-05, "loss": 0.501, "step": 5190 }, { "epoch": 1.4528407500699692, "grad_norm": 0.22532627937468913, "learning_rate": 5.672825921583487e-05, "loss": 0.482, "step": 5191 }, { "epoch": 1.4531206269241534, "grad_norm": 0.21818767199489672, "learning_rate": 5.67129735228866e-05, "loss": 0.4931, "step": 5192 }, { "epoch": 1.4534005037783375, "grad_norm": 0.22983193225464882, "learning_rate": 5.6697687190993775e-05, "loss": 0.5101, "step": 5193 }, { "epoch": 1.4536803806325218, "grad_norm": 0.22830254759515695, "learning_rate": 5.668240022161132e-05, "loss": 0.5066, "step": 5194 }, { "epoch": 1.4539602574867059, "grad_norm": 0.22800427004652327, "learning_rate": 5.666711261619428e-05, "loss": 0.5008, "step": 5195 }, { "epoch": 1.45424013434089, "grad_norm": 0.23164596534267978, "learning_rate": 5.665182437619773e-05, "loss": 0.5002, "step": 5196 }, { "epoch": 1.4545200111950742, "grad_norm": 0.22864191762300512, "learning_rate": 5.6636535503076796e-05, "loss": 0.4838, "step": 5197 }, { "epoch": 1.4547998880492583, "grad_norm": 0.22163210383934065, "learning_rate": 5.662124599828671e-05, "loss": 0.5134, "step": 5198 }, { "epoch": 1.4550797649034424, "grad_norm": 0.21822141625786304, "learning_rate": 5.6605955863282736e-05, "loss": 0.4745, "step": 5199 }, { "epoch": 1.4553596417576267, "grad_norm": 0.21976683123079507, "learning_rate": 5.659066509952018e-05, "loss": 0.4938, "step": 5200 }, { "epoch": 1.4556395186118107, "grad_norm": 0.23161957364357091, "learning_rate": 5.657537370845441e-05, "loss": 0.4852, "step": 5201 }, { "epoch": 1.455919395465995, "grad_norm": 0.23122405168374782, "learning_rate": 5.656008169154091e-05, "loss": 0.5011, "step": 5202 }, { "epoch": 1.456199272320179, "grad_norm": 0.22373361029429523, "learning_rate": 5.654478905023516e-05, "loss": 0.49, "step": 5203 }, { "epoch": 1.4564791491743634, "grad_norm": 0.21456645930140103, "learning_rate": 5.6529495785992725e-05, "loss": 0.5002, "step": 5204 }, { "epoch": 1.4567590260285475, "grad_norm": 0.22801884562895566, "learning_rate": 5.651420190026922e-05, "loss": 0.4992, "step": 5205 }, { "epoch": 1.4570389028827315, "grad_norm": 0.22717453576260827, "learning_rate": 5.649890739452033e-05, "loss": 0.4776, "step": 5206 }, { "epoch": 1.4573187797369158, "grad_norm": 0.22945912550546757, "learning_rate": 5.6483612270201804e-05, "loss": 0.5201, "step": 5207 }, { "epoch": 1.4575986565910999, "grad_norm": 0.23501778884197766, "learning_rate": 5.646831652876945e-05, "loss": 0.525, "step": 5208 }, { "epoch": 1.457878533445284, "grad_norm": 0.2190781689178128, "learning_rate": 5.645302017167908e-05, "loss": 0.5224, "step": 5209 }, { "epoch": 1.4581584102994682, "grad_norm": 0.22367722761054348, "learning_rate": 5.643772320038665e-05, "loss": 0.4816, "step": 5210 }, { "epoch": 1.4584382871536523, "grad_norm": 0.21706226354460845, "learning_rate": 5.642242561634812e-05, "loss": 0.4874, "step": 5211 }, { "epoch": 1.4587181640078366, "grad_norm": 0.2327980171449258, "learning_rate": 5.6407127421019534e-05, "loss": 0.5045, "step": 5212 }, { "epoch": 1.4589980408620207, "grad_norm": 0.22404302537377715, "learning_rate": 5.639182861585697e-05, "loss": 0.4817, "step": 5213 }, { "epoch": 1.459277917716205, "grad_norm": 0.2262302122989276, "learning_rate": 5.6376529202316554e-05, "loss": 0.4716, "step": 5214 }, { "epoch": 1.459557794570389, "grad_norm": 0.2272961937678117, "learning_rate": 5.636122918185455e-05, "loss": 0.5025, "step": 5215 }, { "epoch": 1.459837671424573, "grad_norm": 0.21677487509548762, "learning_rate": 5.634592855592717e-05, "loss": 0.4994, "step": 5216 }, { "epoch": 1.4601175482787574, "grad_norm": 0.2159696268066842, "learning_rate": 5.633062732599078e-05, "loss": 0.4568, "step": 5217 }, { "epoch": 1.4603974251329415, "grad_norm": 0.20843525204322297, "learning_rate": 5.6315325493501745e-05, "loss": 0.4718, "step": 5218 }, { "epoch": 1.4606773019871255, "grad_norm": 0.21621720695856078, "learning_rate": 5.630002305991647e-05, "loss": 0.4907, "step": 5219 }, { "epoch": 1.4609571788413098, "grad_norm": 0.22486264201399042, "learning_rate": 5.6284720026691494e-05, "loss": 0.4825, "step": 5220 }, { "epoch": 1.4612370556954941, "grad_norm": 0.2138664838463663, "learning_rate": 5.626941639528334e-05, "loss": 0.4965, "step": 5221 }, { "epoch": 1.4615169325496782, "grad_norm": 0.22227984300138978, "learning_rate": 5.6254112167148623e-05, "loss": 0.5035, "step": 5222 }, { "epoch": 1.4617968094038623, "grad_norm": 0.23560786340253842, "learning_rate": 5.6238807343743995e-05, "loss": 0.495, "step": 5223 }, { "epoch": 1.4620766862580465, "grad_norm": 0.2222486262089791, "learning_rate": 5.62235019265262e-05, "loss": 0.496, "step": 5224 }, { "epoch": 1.4623565631122306, "grad_norm": 0.2153877918829292, "learning_rate": 5.620819591695201e-05, "loss": 0.4799, "step": 5225 }, { "epoch": 1.4626364399664147, "grad_norm": 0.2313760021948021, "learning_rate": 5.619288931647827e-05, "loss": 0.5008, "step": 5226 }, { "epoch": 1.462916316820599, "grad_norm": 0.21184068885958354, "learning_rate": 5.6177582126561853e-05, "loss": 0.4822, "step": 5227 }, { "epoch": 1.463196193674783, "grad_norm": 0.24409516586058003, "learning_rate": 5.616227434865972e-05, "loss": 0.5304, "step": 5228 }, { "epoch": 1.4634760705289673, "grad_norm": 0.2290601368355482, "learning_rate": 5.614696598422885e-05, "loss": 0.489, "step": 5229 }, { "epoch": 1.4637559473831514, "grad_norm": 0.232763262317614, "learning_rate": 5.613165703472632e-05, "loss": 0.4942, "step": 5230 }, { "epoch": 1.4640358242373357, "grad_norm": 0.22710153940008881, "learning_rate": 5.611634750160924e-05, "loss": 0.4905, "step": 5231 }, { "epoch": 1.4643157010915198, "grad_norm": 0.22411737403719634, "learning_rate": 5.610103738633477e-05, "loss": 0.5022, "step": 5232 }, { "epoch": 1.4645955779457038, "grad_norm": 0.25103886274908743, "learning_rate": 5.6085726690360165e-05, "loss": 0.5184, "step": 5233 }, { "epoch": 1.4648754547998881, "grad_norm": 0.2393366002243448, "learning_rate": 5.607041541514268e-05, "loss": 0.5032, "step": 5234 }, { "epoch": 1.4651553316540722, "grad_norm": 0.22221812011519437, "learning_rate": 5.6055103562139656e-05, "loss": 0.4867, "step": 5235 }, { "epoch": 1.4654352085082563, "grad_norm": 0.23326981284420606, "learning_rate": 5.6039791132808505e-05, "loss": 0.5069, "step": 5236 }, { "epoch": 1.4657150853624406, "grad_norm": 0.22326394555032164, "learning_rate": 5.602447812860664e-05, "loss": 0.5059, "step": 5237 }, { "epoch": 1.4659949622166246, "grad_norm": 0.23551628104814457, "learning_rate": 5.6009164550991565e-05, "loss": 0.5015, "step": 5238 }, { "epoch": 1.466274839070809, "grad_norm": 0.21941415535650327, "learning_rate": 5.5993850401420856e-05, "loss": 0.4906, "step": 5239 }, { "epoch": 1.466554715924993, "grad_norm": 0.21585708426300135, "learning_rate": 5.59785356813521e-05, "loss": 0.493, "step": 5240 }, { "epoch": 1.4668345927791773, "grad_norm": 0.22399307811497346, "learning_rate": 5.5963220392242975e-05, "loss": 0.491, "step": 5241 }, { "epoch": 1.4671144696333613, "grad_norm": 0.22221753004680211, "learning_rate": 5.59479045355512e-05, "loss": 0.495, "step": 5242 }, { "epoch": 1.4673943464875454, "grad_norm": 0.2322004302720725, "learning_rate": 5.593258811273454e-05, "loss": 0.4957, "step": 5243 }, { "epoch": 1.4676742233417297, "grad_norm": 0.22743787157192655, "learning_rate": 5.5917271125250824e-05, "loss": 0.4968, "step": 5244 }, { "epoch": 1.4679541001959138, "grad_norm": 0.2234875833928903, "learning_rate": 5.5901953574557945e-05, "loss": 0.5145, "step": 5245 }, { "epoch": 1.4682339770500978, "grad_norm": 0.2337471811956906, "learning_rate": 5.5886635462113804e-05, "loss": 0.4813, "step": 5246 }, { "epoch": 1.4685138539042821, "grad_norm": 0.22750366413705767, "learning_rate": 5.58713167893764e-05, "loss": 0.4777, "step": 5247 }, { "epoch": 1.4687937307584662, "grad_norm": 0.22660182890774772, "learning_rate": 5.58559975578038e-05, "loss": 0.501, "step": 5248 }, { "epoch": 1.4690736076126505, "grad_norm": 0.23145870816757763, "learning_rate": 5.584067776885404e-05, "loss": 0.4788, "step": 5249 }, { "epoch": 1.4693534844668346, "grad_norm": 0.2207247354311303, "learning_rate": 5.582535742398533e-05, "loss": 0.5002, "step": 5250 }, { "epoch": 1.4696333613210188, "grad_norm": 0.22446853877397394, "learning_rate": 5.581003652465583e-05, "loss": 0.4932, "step": 5251 }, { "epoch": 1.469913238175203, "grad_norm": 0.22604299120318153, "learning_rate": 5.57947150723238e-05, "loss": 0.4886, "step": 5252 }, { "epoch": 1.470193115029387, "grad_norm": 0.22934992109241698, "learning_rate": 5.577939306844755e-05, "loss": 0.4954, "step": 5253 }, { "epoch": 1.4704729918835713, "grad_norm": 0.228733971565801, "learning_rate": 5.5764070514485435e-05, "loss": 0.4826, "step": 5254 }, { "epoch": 1.4707528687377553, "grad_norm": 0.21659572130593793, "learning_rate": 5.5748747411895865e-05, "loss": 0.4975, "step": 5255 }, { "epoch": 1.4710327455919394, "grad_norm": 0.2212187681174885, "learning_rate": 5.573342376213728e-05, "loss": 0.488, "step": 5256 }, { "epoch": 1.4713126224461237, "grad_norm": 0.233789400573396, "learning_rate": 5.571809956666822e-05, "loss": 0.4907, "step": 5257 }, { "epoch": 1.471592499300308, "grad_norm": 0.22737192042317877, "learning_rate": 5.570277482694725e-05, "loss": 0.4825, "step": 5258 }, { "epoch": 1.471872376154492, "grad_norm": 0.22242946443921727, "learning_rate": 5.568744954443297e-05, "loss": 0.4824, "step": 5259 }, { "epoch": 1.4721522530086761, "grad_norm": 0.23064635878022757, "learning_rate": 5.567212372058407e-05, "loss": 0.5146, "step": 5260 }, { "epoch": 1.4724321298628604, "grad_norm": 0.2330896871546213, "learning_rate": 5.565679735685925e-05, "loss": 0.5143, "step": 5261 }, { "epoch": 1.4727120067170445, "grad_norm": 0.2307403627780489, "learning_rate": 5.56414704547173e-05, "loss": 0.5101, "step": 5262 }, { "epoch": 1.4729918835712286, "grad_norm": 0.22428272350933046, "learning_rate": 5.562614301561704e-05, "loss": 0.4815, "step": 5263 }, { "epoch": 1.4732717604254129, "grad_norm": 0.23103900170190556, "learning_rate": 5.561081504101733e-05, "loss": 0.4893, "step": 5264 }, { "epoch": 1.473551637279597, "grad_norm": 0.22658422565410644, "learning_rate": 5.559548653237711e-05, "loss": 0.495, "step": 5265 }, { "epoch": 1.4738315141337812, "grad_norm": 0.2315015655369644, "learning_rate": 5.558015749115533e-05, "loss": 0.4957, "step": 5266 }, { "epoch": 1.4741113909879653, "grad_norm": 0.22864661559358873, "learning_rate": 5.556482791881105e-05, "loss": 0.4829, "step": 5267 }, { "epoch": 1.4743912678421496, "grad_norm": 0.23110443055954935, "learning_rate": 5.554949781680333e-05, "loss": 0.491, "step": 5268 }, { "epoch": 1.4746711446963336, "grad_norm": 0.22929566401500065, "learning_rate": 5.55341671865913e-05, "loss": 0.4865, "step": 5269 }, { "epoch": 1.4749510215505177, "grad_norm": 0.22677701838405592, "learning_rate": 5.5518836029634145e-05, "loss": 0.5081, "step": 5270 }, { "epoch": 1.475230898404702, "grad_norm": 0.22090661176482015, "learning_rate": 5.550350434739109e-05, "loss": 0.5021, "step": 5271 }, { "epoch": 1.475510775258886, "grad_norm": 0.23453787157715336, "learning_rate": 5.548817214132143e-05, "loss": 0.4788, "step": 5272 }, { "epoch": 1.4757906521130701, "grad_norm": 0.22712003870974973, "learning_rate": 5.547283941288445e-05, "loss": 0.4986, "step": 5273 }, { "epoch": 1.4760705289672544, "grad_norm": 0.2204667160719935, "learning_rate": 5.545750616353955e-05, "loss": 0.4953, "step": 5274 }, { "epoch": 1.4763504058214385, "grad_norm": 0.2143925115875876, "learning_rate": 5.544217239474615e-05, "loss": 0.4911, "step": 5275 }, { "epoch": 1.4766302826756228, "grad_norm": 0.22585843148824863, "learning_rate": 5.542683810796374e-05, "loss": 0.4965, "step": 5276 }, { "epoch": 1.4769101595298069, "grad_norm": 0.22279290043328198, "learning_rate": 5.541150330465186e-05, "loss": 0.5046, "step": 5277 }, { "epoch": 1.4771900363839912, "grad_norm": 0.21904534514230872, "learning_rate": 5.539616798627005e-05, "loss": 0.4913, "step": 5278 }, { "epoch": 1.4774699132381752, "grad_norm": 0.2275017475919835, "learning_rate": 5.538083215427796e-05, "loss": 0.5126, "step": 5279 }, { "epoch": 1.4777497900923593, "grad_norm": 0.2273895244217181, "learning_rate": 5.536549581013525e-05, "loss": 0.4924, "step": 5280 }, { "epoch": 1.4780296669465436, "grad_norm": 0.22152081037512972, "learning_rate": 5.5350158955301657e-05, "loss": 0.4695, "step": 5281 }, { "epoch": 1.4783095438007277, "grad_norm": 0.22460405259049548, "learning_rate": 5.533482159123693e-05, "loss": 0.4724, "step": 5282 }, { "epoch": 1.4785894206549117, "grad_norm": 0.22040711529727752, "learning_rate": 5.531948371940089e-05, "loss": 0.4899, "step": 5283 }, { "epoch": 1.478869297509096, "grad_norm": 0.23397617899090678, "learning_rate": 5.530414534125341e-05, "loss": 0.4925, "step": 5284 }, { "epoch": 1.47914917436328, "grad_norm": 0.22775382648696416, "learning_rate": 5.5288806458254414e-05, "loss": 0.4994, "step": 5285 }, { "epoch": 1.4794290512174644, "grad_norm": 0.23539696449203906, "learning_rate": 5.527346707186386e-05, "loss": 0.4811, "step": 5286 }, { "epoch": 1.4797089280716484, "grad_norm": 0.22867356620474527, "learning_rate": 5.5258127183541766e-05, "loss": 0.4762, "step": 5287 }, { "epoch": 1.4799888049258327, "grad_norm": 0.21167606898677446, "learning_rate": 5.524278679474817e-05, "loss": 0.4723, "step": 5288 }, { "epoch": 1.4802686817800168, "grad_norm": 0.22849129149195513, "learning_rate": 5.52274459069432e-05, "loss": 0.504, "step": 5289 }, { "epoch": 1.4805485586342009, "grad_norm": 0.22557473301195613, "learning_rate": 5.5212104521587016e-05, "loss": 0.4877, "step": 5290 }, { "epoch": 1.4808284354883852, "grad_norm": 0.21591100269906305, "learning_rate": 5.5196762640139786e-05, "loss": 0.4901, "step": 5291 }, { "epoch": 1.4811083123425692, "grad_norm": 0.2168375814509893, "learning_rate": 5.518142026406178e-05, "loss": 0.4995, "step": 5292 }, { "epoch": 1.4813881891967533, "grad_norm": 0.2308367184534482, "learning_rate": 5.5166077394813296e-05, "loss": 0.4981, "step": 5293 }, { "epoch": 1.4816680660509376, "grad_norm": 0.23033343237219403, "learning_rate": 5.515073403385468e-05, "loss": 0.4775, "step": 5294 }, { "epoch": 1.4819479429051219, "grad_norm": 0.22540094381086165, "learning_rate": 5.5135390182646304e-05, "loss": 0.4959, "step": 5295 }, { "epoch": 1.482227819759306, "grad_norm": 0.22041950776340755, "learning_rate": 5.512004584264864e-05, "loss": 0.49, "step": 5296 }, { "epoch": 1.48250769661349, "grad_norm": 0.22465911313300704, "learning_rate": 5.5104701015322125e-05, "loss": 0.4834, "step": 5297 }, { "epoch": 1.4827875734676743, "grad_norm": 0.2237692653976678, "learning_rate": 5.508935570212732e-05, "loss": 0.4779, "step": 5298 }, { "epoch": 1.4830674503218584, "grad_norm": 0.2075798796732379, "learning_rate": 5.507400990452479e-05, "loss": 0.466, "step": 5299 }, { "epoch": 1.4833473271760425, "grad_norm": 0.22768642302095388, "learning_rate": 5.505866362397516e-05, "loss": 0.4867, "step": 5300 }, { "epoch": 1.4836272040302267, "grad_norm": 0.22653079807440119, "learning_rate": 5.504331686193907e-05, "loss": 0.4902, "step": 5301 }, { "epoch": 1.4839070808844108, "grad_norm": 0.22136596493574517, "learning_rate": 5.502796961987728e-05, "loss": 0.4952, "step": 5302 }, { "epoch": 1.4841869577385949, "grad_norm": 0.22922252527003512, "learning_rate": 5.501262189925053e-05, "loss": 0.5016, "step": 5303 }, { "epoch": 1.4844668345927792, "grad_norm": 0.22846620736430626, "learning_rate": 5.4997273701519615e-05, "loss": 0.4862, "step": 5304 }, { "epoch": 1.4847467114469635, "grad_norm": 0.22791579215918112, "learning_rate": 5.4981925028145385e-05, "loss": 0.4836, "step": 5305 }, { "epoch": 1.4850265883011475, "grad_norm": 0.2326806099711271, "learning_rate": 5.4966575880588755e-05, "loss": 0.4942, "step": 5306 }, { "epoch": 1.4853064651553316, "grad_norm": 0.23153575433444576, "learning_rate": 5.495122626031065e-05, "loss": 0.4946, "step": 5307 }, { "epoch": 1.485586342009516, "grad_norm": 0.22958469706081977, "learning_rate": 5.493587616877207e-05, "loss": 0.4935, "step": 5308 }, { "epoch": 1.4858662188637, "grad_norm": 0.2405831372653318, "learning_rate": 5.492052560743402e-05, "loss": 0.4871, "step": 5309 }, { "epoch": 1.486146095717884, "grad_norm": 0.22904421275674433, "learning_rate": 5.490517457775758e-05, "loss": 0.4831, "step": 5310 }, { "epoch": 1.4864259725720683, "grad_norm": 0.2251918346907918, "learning_rate": 5.4889823081203884e-05, "loss": 0.4882, "step": 5311 }, { "epoch": 1.4867058494262524, "grad_norm": 0.2270591895873666, "learning_rate": 5.4874471119234096e-05, "loss": 0.4862, "step": 5312 }, { "epoch": 1.4869857262804367, "grad_norm": 0.2118106516721711, "learning_rate": 5.485911869330942e-05, "loss": 0.4878, "step": 5313 }, { "epoch": 1.4872656031346208, "grad_norm": 0.22146512440368507, "learning_rate": 5.48437658048911e-05, "loss": 0.4688, "step": 5314 }, { "epoch": 1.487545479988805, "grad_norm": 0.22131754103121587, "learning_rate": 5.482841245544044e-05, "loss": 0.5199, "step": 5315 }, { "epoch": 1.487825356842989, "grad_norm": 0.22080796039787157, "learning_rate": 5.481305864641878e-05, "loss": 0.4681, "step": 5316 }, { "epoch": 1.4881052336971732, "grad_norm": 0.22997572194429822, "learning_rate": 5.479770437928752e-05, "loss": 0.5235, "step": 5317 }, { "epoch": 1.4883851105513575, "grad_norm": 0.23238507853581836, "learning_rate": 5.478234965550805e-05, "loss": 0.4784, "step": 5318 }, { "epoch": 1.4886649874055415, "grad_norm": 0.2176568295354096, "learning_rate": 5.4766994476541864e-05, "loss": 0.4892, "step": 5319 }, { "epoch": 1.4889448642597256, "grad_norm": 0.21519620095808345, "learning_rate": 5.4751638843850485e-05, "loss": 0.4837, "step": 5320 }, { "epoch": 1.48922474111391, "grad_norm": 0.26714408771298775, "learning_rate": 5.4736282758895466e-05, "loss": 0.4881, "step": 5321 }, { "epoch": 1.489504617968094, "grad_norm": 0.23504972996274257, "learning_rate": 5.472092622313839e-05, "loss": 0.4959, "step": 5322 }, { "epoch": 1.4897844948222783, "grad_norm": 0.22993518344546857, "learning_rate": 5.470556923804092e-05, "loss": 0.4993, "step": 5323 }, { "epoch": 1.4900643716764623, "grad_norm": 0.2222276901233184, "learning_rate": 5.4690211805064725e-05, "loss": 0.4903, "step": 5324 }, { "epoch": 1.4903442485306466, "grad_norm": 0.21738014706958292, "learning_rate": 5.4674853925671566e-05, "loss": 0.516, "step": 5325 }, { "epoch": 1.4906241253848307, "grad_norm": 0.22262884554108853, "learning_rate": 5.46594956013232e-05, "loss": 0.4913, "step": 5326 }, { "epoch": 1.4909040022390148, "grad_norm": 0.23295215853361276, "learning_rate": 5.4644136833481395e-05, "loss": 0.5027, "step": 5327 }, { "epoch": 1.491183879093199, "grad_norm": 0.24375183887012075, "learning_rate": 5.462877762360808e-05, "loss": 0.5255, "step": 5328 }, { "epoch": 1.4914637559473831, "grad_norm": 0.2142324763075613, "learning_rate": 5.4613417973165106e-05, "loss": 0.4851, "step": 5329 }, { "epoch": 1.4917436328015672, "grad_norm": 0.2227648490124219, "learning_rate": 5.459805788361443e-05, "loss": 0.4771, "step": 5330 }, { "epoch": 1.4920235096557515, "grad_norm": 0.2234673346516884, "learning_rate": 5.4582697356418034e-05, "loss": 0.5016, "step": 5331 }, { "epoch": 1.4923033865099355, "grad_norm": 0.23108008144566453, "learning_rate": 5.4567336393037925e-05, "loss": 0.4808, "step": 5332 }, { "epoch": 1.4925832633641198, "grad_norm": 0.2394730006065942, "learning_rate": 5.455197499493621e-05, "loss": 0.5208, "step": 5333 }, { "epoch": 1.492863140218304, "grad_norm": 0.22679622242621722, "learning_rate": 5.453661316357495e-05, "loss": 0.4827, "step": 5334 }, { "epoch": 1.4931430170724882, "grad_norm": 0.2249228663322211, "learning_rate": 5.452125090041631e-05, "loss": 0.5112, "step": 5335 }, { "epoch": 1.4934228939266723, "grad_norm": 0.22804097193667003, "learning_rate": 5.4505888206922475e-05, "loss": 0.518, "step": 5336 }, { "epoch": 1.4937027707808563, "grad_norm": 0.23804668663717438, "learning_rate": 5.449052508455568e-05, "loss": 0.5165, "step": 5337 }, { "epoch": 1.4939826476350406, "grad_norm": 0.22062720030244787, "learning_rate": 5.44751615347782e-05, "loss": 0.5085, "step": 5338 }, { "epoch": 1.4942625244892247, "grad_norm": 0.21912947508418557, "learning_rate": 5.4459797559052325e-05, "loss": 0.4873, "step": 5339 }, { "epoch": 1.4945424013434088, "grad_norm": 0.22000486459954285, "learning_rate": 5.4444433158840436e-05, "loss": 0.4703, "step": 5340 }, { "epoch": 1.494822278197593, "grad_norm": 0.22053919747764167, "learning_rate": 5.4429068335604906e-05, "loss": 0.4967, "step": 5341 }, { "epoch": 1.4951021550517773, "grad_norm": 0.23365904599562048, "learning_rate": 5.441370309080818e-05, "loss": 0.4904, "step": 5342 }, { "epoch": 1.4953820319059614, "grad_norm": 0.22269707032354097, "learning_rate": 5.4398337425912715e-05, "loss": 0.5094, "step": 5343 }, { "epoch": 1.4956619087601455, "grad_norm": 0.2329760203926926, "learning_rate": 5.438297134238104e-05, "loss": 0.4969, "step": 5344 }, { "epoch": 1.4959417856143298, "grad_norm": 0.22746772287351746, "learning_rate": 5.436760484167569e-05, "loss": 0.4893, "step": 5345 }, { "epoch": 1.4962216624685138, "grad_norm": 0.23883262642649386, "learning_rate": 5.435223792525928e-05, "loss": 0.5169, "step": 5346 }, { "epoch": 1.496501539322698, "grad_norm": 0.2338676465082267, "learning_rate": 5.433687059459441e-05, "loss": 0.5161, "step": 5347 }, { "epoch": 1.4967814161768822, "grad_norm": 0.2363460086159002, "learning_rate": 5.432150285114378e-05, "loss": 0.5047, "step": 5348 }, { "epoch": 1.4970612930310663, "grad_norm": 0.2178005445609828, "learning_rate": 5.430613469637009e-05, "loss": 0.4908, "step": 5349 }, { "epoch": 1.4973411698852506, "grad_norm": 0.2277203144839674, "learning_rate": 5.429076613173609e-05, "loss": 0.4884, "step": 5350 }, { "epoch": 1.4976210467394346, "grad_norm": 0.225794560934591, "learning_rate": 5.427539715870457e-05, "loss": 0.477, "step": 5351 }, { "epoch": 1.497900923593619, "grad_norm": 0.2243749753222779, "learning_rate": 5.4260027778738354e-05, "loss": 0.5022, "step": 5352 }, { "epoch": 1.498180800447803, "grad_norm": 0.2334257872575213, "learning_rate": 5.42446579933003e-05, "loss": 0.509, "step": 5353 }, { "epoch": 1.498460677301987, "grad_norm": 0.23904002604805577, "learning_rate": 5.422928780385333e-05, "loss": 0.4877, "step": 5354 }, { "epoch": 1.4987405541561714, "grad_norm": 0.22873236866181154, "learning_rate": 5.4213917211860375e-05, "loss": 0.4899, "step": 5355 }, { "epoch": 1.4990204310103554, "grad_norm": 0.21349431724529108, "learning_rate": 5.419854621878443e-05, "loss": 0.5047, "step": 5356 }, { "epoch": 1.4993003078645395, "grad_norm": 0.21486505820150203, "learning_rate": 5.41831748260885e-05, "loss": 0.4784, "step": 5357 }, { "epoch": 1.4995801847187238, "grad_norm": 0.231161793878974, "learning_rate": 5.416780303523565e-05, "loss": 0.496, "step": 5358 }, { "epoch": 1.4998600615729079, "grad_norm": 0.22658153518783808, "learning_rate": 5.415243084768897e-05, "loss": 0.493, "step": 5359 }, { "epoch": 1.500139938427092, "grad_norm": 0.2319266323750032, "learning_rate": 5.413705826491161e-05, "loss": 0.5139, "step": 5360 }, { "epoch": 1.5004198152812762, "grad_norm": 0.2339520560162951, "learning_rate": 5.412168528836672e-05, "loss": 0.4844, "step": 5361 }, { "epoch": 1.5006996921354605, "grad_norm": 0.2310069006519829, "learning_rate": 5.410631191951752e-05, "loss": 0.5026, "step": 5362 }, { "epoch": 1.5009795689896446, "grad_norm": 0.21884529678923517, "learning_rate": 5.409093815982724e-05, "loss": 0.4705, "step": 5363 }, { "epoch": 1.5012594458438286, "grad_norm": 0.22179252483945588, "learning_rate": 5.407556401075919e-05, "loss": 0.486, "step": 5364 }, { "epoch": 1.501539322698013, "grad_norm": 0.22721510546534038, "learning_rate": 5.4060189473776676e-05, "loss": 0.49, "step": 5365 }, { "epoch": 1.501819199552197, "grad_norm": 0.22306617178167543, "learning_rate": 5.404481455034305e-05, "loss": 0.4755, "step": 5366 }, { "epoch": 1.502099076406381, "grad_norm": 0.22766584094233727, "learning_rate": 5.402943924192172e-05, "loss": 0.4863, "step": 5367 }, { "epoch": 1.5023789532605654, "grad_norm": 0.22418659756892878, "learning_rate": 5.40140635499761e-05, "loss": 0.495, "step": 5368 }, { "epoch": 1.5026588301147497, "grad_norm": 0.21751904435545283, "learning_rate": 5.3998687475969666e-05, "loss": 0.4814, "step": 5369 }, { "epoch": 1.5029387069689337, "grad_norm": 0.22492589981571987, "learning_rate": 5.398331102136591e-05, "loss": 0.4821, "step": 5370 }, { "epoch": 1.5032185838231178, "grad_norm": 0.2155570134958645, "learning_rate": 5.39679341876284e-05, "loss": 0.485, "step": 5371 }, { "epoch": 1.503498460677302, "grad_norm": 0.22664020079766536, "learning_rate": 5.395255697622068e-05, "loss": 0.4983, "step": 5372 }, { "epoch": 1.5037783375314862, "grad_norm": 0.22532655513223326, "learning_rate": 5.393717938860638e-05, "loss": 0.4534, "step": 5373 }, { "epoch": 1.5040582143856702, "grad_norm": 0.22792151749075482, "learning_rate": 5.392180142624914e-05, "loss": 0.4911, "step": 5374 }, { "epoch": 1.5043380912398545, "grad_norm": 0.24385146635223562, "learning_rate": 5.390642309061264e-05, "loss": 0.5051, "step": 5375 }, { "epoch": 1.5046179680940386, "grad_norm": 0.21833874786159185, "learning_rate": 5.3891044383160615e-05, "loss": 0.4925, "step": 5376 }, { "epoch": 1.5048978449482227, "grad_norm": 0.22136799527665596, "learning_rate": 5.38756653053568e-05, "loss": 0.491, "step": 5377 }, { "epoch": 1.505177721802407, "grad_norm": 0.22621569826533633, "learning_rate": 5.3860285858665e-05, "loss": 0.5002, "step": 5378 }, { "epoch": 1.5054575986565912, "grad_norm": 0.23668566739683558, "learning_rate": 5.384490604454903e-05, "loss": 0.4896, "step": 5379 }, { "epoch": 1.5057374755107753, "grad_norm": 0.23314549281402172, "learning_rate": 5.382952586447274e-05, "loss": 0.4927, "step": 5380 }, { "epoch": 1.5060173523649594, "grad_norm": 0.23345373624989663, "learning_rate": 5.3814145319900045e-05, "loss": 0.5174, "step": 5381 }, { "epoch": 1.5062972292191437, "grad_norm": 0.2240940396842753, "learning_rate": 5.379876441229486e-05, "loss": 0.5021, "step": 5382 }, { "epoch": 1.5065771060733277, "grad_norm": 0.22182790957879425, "learning_rate": 5.378338314312115e-05, "loss": 0.5035, "step": 5383 }, { "epoch": 1.5068569829275118, "grad_norm": 0.23044292401060051, "learning_rate": 5.3768001513842915e-05, "loss": 0.4842, "step": 5384 }, { "epoch": 1.507136859781696, "grad_norm": 0.22103487297910235, "learning_rate": 5.375261952592418e-05, "loss": 0.521, "step": 5385 }, { "epoch": 1.5074167366358802, "grad_norm": 0.23267000511855776, "learning_rate": 5.373723718082904e-05, "loss": 0.4874, "step": 5386 }, { "epoch": 1.5076966134900642, "grad_norm": 0.22475563162231108, "learning_rate": 5.372185448002155e-05, "loss": 0.5027, "step": 5387 }, { "epoch": 1.5079764903442485, "grad_norm": 0.22201077261229946, "learning_rate": 5.3706471424965875e-05, "loss": 0.4739, "step": 5388 }, { "epoch": 1.5082563671984328, "grad_norm": 0.23888457805830648, "learning_rate": 5.369108801712618e-05, "loss": 0.4969, "step": 5389 }, { "epoch": 1.5085362440526169, "grad_norm": 0.2329132526064181, "learning_rate": 5.3675704257966665e-05, "loss": 0.4988, "step": 5390 }, { "epoch": 1.508816120906801, "grad_norm": 0.22335102429751483, "learning_rate": 5.366032014895155e-05, "loss": 0.4832, "step": 5391 }, { "epoch": 1.5090959977609852, "grad_norm": 0.22084691242722718, "learning_rate": 5.3644935691545116e-05, "loss": 0.4899, "step": 5392 }, { "epoch": 1.5093758746151693, "grad_norm": 0.35612480869485336, "learning_rate": 5.3629550887211666e-05, "loss": 0.5011, "step": 5393 }, { "epoch": 1.5096557514693534, "grad_norm": 0.222333798556437, "learning_rate": 5.361416573741554e-05, "loss": 0.4796, "step": 5394 }, { "epoch": 1.5099356283235377, "grad_norm": 0.228078232568696, "learning_rate": 5.359878024362108e-05, "loss": 0.5145, "step": 5395 }, { "epoch": 1.510215505177722, "grad_norm": 0.23042506286294107, "learning_rate": 5.3583394407292706e-05, "loss": 0.523, "step": 5396 }, { "epoch": 1.5104953820319058, "grad_norm": 0.22279054198944678, "learning_rate": 5.356800822989486e-05, "loss": 0.5128, "step": 5397 }, { "epoch": 1.51077525888609, "grad_norm": 0.21799199011198456, "learning_rate": 5.355262171289198e-05, "loss": 0.4674, "step": 5398 }, { "epoch": 1.5110551357402744, "grad_norm": 0.2271852323952012, "learning_rate": 5.3537234857748584e-05, "loss": 0.4992, "step": 5399 }, { "epoch": 1.5113350125944585, "grad_norm": 0.2358545110627064, "learning_rate": 5.3521847665929194e-05, "loss": 0.5086, "step": 5400 }, { "epoch": 1.5116148894486425, "grad_norm": 0.22501120662928606, "learning_rate": 5.3506460138898364e-05, "loss": 0.4967, "step": 5401 }, { "epoch": 1.5118947663028268, "grad_norm": 0.22456639835775383, "learning_rate": 5.3491072278120704e-05, "loss": 0.4861, "step": 5402 }, { "epoch": 1.512174643157011, "grad_norm": 0.2187884148915267, "learning_rate": 5.347568408506082e-05, "loss": 0.5045, "step": 5403 }, { "epoch": 1.512454520011195, "grad_norm": 0.22347995490945863, "learning_rate": 5.346029556118338e-05, "loss": 0.5031, "step": 5404 }, { "epoch": 1.5127343968653792, "grad_norm": 0.22946628984463002, "learning_rate": 5.344490670795308e-05, "loss": 0.5148, "step": 5405 }, { "epoch": 1.5130142737195635, "grad_norm": 0.23206586802458026, "learning_rate": 5.342951752683464e-05, "loss": 0.4893, "step": 5406 }, { "epoch": 1.5132941505737474, "grad_norm": 0.22901279293918392, "learning_rate": 5.3414128019292785e-05, "loss": 0.5041, "step": 5407 }, { "epoch": 1.5135740274279317, "grad_norm": 0.2311481037821431, "learning_rate": 5.339873818679232e-05, "loss": 0.5154, "step": 5408 }, { "epoch": 1.513853904282116, "grad_norm": 0.2242858288544814, "learning_rate": 5.3383348030798056e-05, "loss": 0.4854, "step": 5409 }, { "epoch": 1.5141337811363, "grad_norm": 0.23104327232500133, "learning_rate": 5.336795755277483e-05, "loss": 0.477, "step": 5410 }, { "epoch": 1.514413657990484, "grad_norm": 0.22600539690273602, "learning_rate": 5.335256675418752e-05, "loss": 0.5054, "step": 5411 }, { "epoch": 1.5146935348446684, "grad_norm": 0.228359395355165, "learning_rate": 5.3337175636501024e-05, "loss": 0.4993, "step": 5412 }, { "epoch": 1.5149734116988525, "grad_norm": 0.22401380622820097, "learning_rate": 5.332178420118028e-05, "loss": 0.4838, "step": 5413 }, { "epoch": 1.5152532885530365, "grad_norm": 0.23032159907474142, "learning_rate": 5.3306392449690266e-05, "loss": 0.5123, "step": 5414 }, { "epoch": 1.5155331654072208, "grad_norm": 0.22346671937052898, "learning_rate": 5.329100038349597e-05, "loss": 0.4803, "step": 5415 }, { "epoch": 1.5158130422614051, "grad_norm": 0.2287473712593248, "learning_rate": 5.327560800406241e-05, "loss": 0.517, "step": 5416 }, { "epoch": 1.5160929191155892, "grad_norm": 0.2240763556185364, "learning_rate": 5.3260215312854644e-05, "loss": 0.4953, "step": 5417 }, { "epoch": 1.5163727959697733, "grad_norm": 0.22193937162416424, "learning_rate": 5.3244822311337764e-05, "loss": 0.5142, "step": 5418 }, { "epoch": 1.5166526728239575, "grad_norm": 0.2159287036872767, "learning_rate": 5.322942900097688e-05, "loss": 0.4774, "step": 5419 }, { "epoch": 1.5169325496781416, "grad_norm": 0.2355219929023816, "learning_rate": 5.3214035383237135e-05, "loss": 0.5015, "step": 5420 }, { "epoch": 1.5172124265323257, "grad_norm": 0.22157497736140797, "learning_rate": 5.319864145958371e-05, "loss": 0.4953, "step": 5421 }, { "epoch": 1.51749230338651, "grad_norm": 0.22879521258183022, "learning_rate": 5.318324723148179e-05, "loss": 0.5156, "step": 5422 }, { "epoch": 1.517772180240694, "grad_norm": 0.22218590136515592, "learning_rate": 5.3167852700396614e-05, "loss": 0.5034, "step": 5423 }, { "epoch": 1.5180520570948781, "grad_norm": 0.22744342293880013, "learning_rate": 5.3152457867793446e-05, "loss": 0.5053, "step": 5424 }, { "epoch": 1.5183319339490624, "grad_norm": 0.22860124369276388, "learning_rate": 5.313706273513758e-05, "loss": 0.506, "step": 5425 }, { "epoch": 1.5186118108032467, "grad_norm": 0.21901078600220478, "learning_rate": 5.312166730389434e-05, "loss": 0.4745, "step": 5426 }, { "epoch": 1.5188916876574308, "grad_norm": 0.222248326215959, "learning_rate": 5.310627157552904e-05, "loss": 0.5012, "step": 5427 }, { "epoch": 1.5191715645116148, "grad_norm": 0.2327393237346538, "learning_rate": 5.309087555150708e-05, "loss": 0.4937, "step": 5428 }, { "epoch": 1.5194514413657991, "grad_norm": 0.22886242942321186, "learning_rate": 5.307547923329386e-05, "loss": 0.487, "step": 5429 }, { "epoch": 1.5197313182199832, "grad_norm": 0.24026441635054493, "learning_rate": 5.306008262235479e-05, "loss": 0.5019, "step": 5430 }, { "epoch": 1.5200111950741673, "grad_norm": 0.23357057655379512, "learning_rate": 5.304468572015535e-05, "loss": 0.5054, "step": 5431 }, { "epoch": 1.5202910719283516, "grad_norm": 0.23349083419089312, "learning_rate": 5.302928852816102e-05, "loss": 0.5036, "step": 5432 }, { "epoch": 1.5205709487825358, "grad_norm": 0.22110916014226956, "learning_rate": 5.30138910478373e-05, "loss": 0.4749, "step": 5433 }, { "epoch": 1.5208508256367197, "grad_norm": 0.22426314741174758, "learning_rate": 5.299849328064976e-05, "loss": 0.5053, "step": 5434 }, { "epoch": 1.521130702490904, "grad_norm": 0.22431996803172644, "learning_rate": 5.2983095228063964e-05, "loss": 0.485, "step": 5435 }, { "epoch": 1.5214105793450883, "grad_norm": 0.22836923342568963, "learning_rate": 5.296769689154547e-05, "loss": 0.4857, "step": 5436 }, { "epoch": 1.5216904561992723, "grad_norm": 0.36549331515179856, "learning_rate": 5.295229827255993e-05, "loss": 0.5039, "step": 5437 }, { "epoch": 1.5219703330534564, "grad_norm": 0.2096493734106698, "learning_rate": 5.293689937257299e-05, "loss": 0.4847, "step": 5438 }, { "epoch": 1.5222502099076407, "grad_norm": 0.22195267794046467, "learning_rate": 5.292150019305033e-05, "loss": 0.4957, "step": 5439 }, { "epoch": 1.5225300867618248, "grad_norm": 0.22974226173599646, "learning_rate": 5.290610073545764e-05, "loss": 0.4731, "step": 5440 }, { "epoch": 1.5228099636160088, "grad_norm": 0.22637494741228384, "learning_rate": 5.289070100126066e-05, "loss": 0.4881, "step": 5441 }, { "epoch": 1.5230898404701931, "grad_norm": 0.2284943980225406, "learning_rate": 5.2875300991925114e-05, "loss": 0.4811, "step": 5442 }, { "epoch": 1.5233697173243774, "grad_norm": 0.22132666260049025, "learning_rate": 5.2859900708916844e-05, "loss": 0.4863, "step": 5443 }, { "epoch": 1.5236495941785613, "grad_norm": 0.23433782492196434, "learning_rate": 5.2844500153701615e-05, "loss": 0.4985, "step": 5444 }, { "epoch": 1.5239294710327456, "grad_norm": 0.23414573490537904, "learning_rate": 5.2829099327745266e-05, "loss": 0.5004, "step": 5445 }, { "epoch": 1.5242093478869299, "grad_norm": 0.2250347831621673, "learning_rate": 5.281369823251366e-05, "loss": 0.4866, "step": 5446 }, { "epoch": 1.524489224741114, "grad_norm": 0.22436169247133989, "learning_rate": 5.279829686947269e-05, "loss": 0.477, "step": 5447 }, { "epoch": 1.524769101595298, "grad_norm": 0.22536271246100767, "learning_rate": 5.278289524008825e-05, "loss": 0.497, "step": 5448 }, { "epoch": 1.5250489784494823, "grad_norm": 0.23488341377345673, "learning_rate": 5.276749334582628e-05, "loss": 0.5029, "step": 5449 }, { "epoch": 1.5253288553036664, "grad_norm": 0.2315441593858389, "learning_rate": 5.275209118815273e-05, "loss": 0.5065, "step": 5450 }, { "epoch": 1.5256087321578504, "grad_norm": 0.25428653237842314, "learning_rate": 5.273668876853361e-05, "loss": 0.5263, "step": 5451 }, { "epoch": 1.5258886090120347, "grad_norm": 0.22391293143001556, "learning_rate": 5.272128608843494e-05, "loss": 0.4808, "step": 5452 }, { "epoch": 1.526168485866219, "grad_norm": 0.2337345910607096, "learning_rate": 5.270588314932273e-05, "loss": 0.5286, "step": 5453 }, { "epoch": 1.526448362720403, "grad_norm": 0.23036909516695217, "learning_rate": 5.2690479952663054e-05, "loss": 0.4915, "step": 5454 }, { "epoch": 1.5267282395745871, "grad_norm": 0.21697624969892057, "learning_rate": 5.267507649992197e-05, "loss": 0.4823, "step": 5455 }, { "epoch": 1.5270081164287714, "grad_norm": 0.23912664018691718, "learning_rate": 5.2659672792565615e-05, "loss": 0.5188, "step": 5456 }, { "epoch": 1.5272879932829555, "grad_norm": 0.23041920103790217, "learning_rate": 5.2644268832060114e-05, "loss": 0.4853, "step": 5457 }, { "epoch": 1.5275678701371396, "grad_norm": 0.21799483053785548, "learning_rate": 5.2628864619871635e-05, "loss": 0.5059, "step": 5458 }, { "epoch": 1.5278477469913239, "grad_norm": 0.21065053181165194, "learning_rate": 5.261346015746633e-05, "loss": 0.4882, "step": 5459 }, { "epoch": 1.528127623845508, "grad_norm": 0.21809702296056538, "learning_rate": 5.259805544631043e-05, "loss": 0.4825, "step": 5460 }, { "epoch": 1.528407500699692, "grad_norm": 0.21134778412815755, "learning_rate": 5.258265048787018e-05, "loss": 0.5025, "step": 5461 }, { "epoch": 1.5286873775538763, "grad_norm": 0.2302931779222548, "learning_rate": 5.25672452836118e-05, "loss": 0.5098, "step": 5462 }, { "epoch": 1.5289672544080606, "grad_norm": 0.23252348128385064, "learning_rate": 5.255183983500157e-05, "loss": 0.5031, "step": 5463 }, { "epoch": 1.5292471312622447, "grad_norm": 0.21668314929212756, "learning_rate": 5.2536434143505806e-05, "loss": 0.4751, "step": 5464 }, { "epoch": 1.5295270081164287, "grad_norm": 0.2207455463779076, "learning_rate": 5.2521028210590806e-05, "loss": 0.5001, "step": 5465 }, { "epoch": 1.529806884970613, "grad_norm": 0.2293560980736755, "learning_rate": 5.2505622037722945e-05, "loss": 0.4932, "step": 5466 }, { "epoch": 1.530086761824797, "grad_norm": 0.21491334500688608, "learning_rate": 5.249021562636857e-05, "loss": 0.4877, "step": 5467 }, { "epoch": 1.5303666386789812, "grad_norm": 0.22356099799464063, "learning_rate": 5.247480897799406e-05, "loss": 0.4879, "step": 5468 }, { "epoch": 1.5306465155331654, "grad_norm": 0.22047742493084346, "learning_rate": 5.245940209406587e-05, "loss": 0.4982, "step": 5469 }, { "epoch": 1.5309263923873497, "grad_norm": 0.2243889938386529, "learning_rate": 5.24439949760504e-05, "loss": 0.4901, "step": 5470 }, { "epoch": 1.5312062692415336, "grad_norm": 0.22565115953567208, "learning_rate": 5.242858762541414e-05, "loss": 0.4746, "step": 5471 }, { "epoch": 1.5314861460957179, "grad_norm": 0.22541263446564486, "learning_rate": 5.241318004362353e-05, "loss": 0.4903, "step": 5472 }, { "epoch": 1.5317660229499022, "grad_norm": 0.23603589753057705, "learning_rate": 5.2397772232145105e-05, "loss": 0.4955, "step": 5473 }, { "epoch": 1.5320458998040862, "grad_norm": 0.2302453543942871, "learning_rate": 5.238236419244537e-05, "loss": 0.4914, "step": 5474 }, { "epoch": 1.5323257766582703, "grad_norm": 0.24355197085429647, "learning_rate": 5.236695592599088e-05, "loss": 0.5246, "step": 5475 }, { "epoch": 1.5326056535124546, "grad_norm": 0.22854580307770703, "learning_rate": 5.235154743424818e-05, "loss": 0.4868, "step": 5476 }, { "epoch": 1.5328855303666387, "grad_norm": 0.2228751796284566, "learning_rate": 5.23361387186839e-05, "loss": 0.4765, "step": 5477 }, { "epoch": 1.5331654072208227, "grad_norm": 0.2259556807149954, "learning_rate": 5.2320729780764635e-05, "loss": 0.4854, "step": 5478 }, { "epoch": 1.533445284075007, "grad_norm": 0.22914257075987968, "learning_rate": 5.2305320621957e-05, "loss": 0.4947, "step": 5479 }, { "epoch": 1.5337251609291913, "grad_norm": 0.22139845150999246, "learning_rate": 5.2289911243727665e-05, "loss": 0.4677, "step": 5480 }, { "epoch": 1.5340050377833752, "grad_norm": 0.21705017782435324, "learning_rate": 5.22745016475433e-05, "loss": 0.4904, "step": 5481 }, { "epoch": 1.5342849146375594, "grad_norm": 0.22065078716060177, "learning_rate": 5.2259091834870575e-05, "loss": 0.4838, "step": 5482 }, { "epoch": 1.5345647914917437, "grad_norm": 0.21302127219688868, "learning_rate": 5.2243681807176236e-05, "loss": 0.5126, "step": 5483 }, { "epoch": 1.5348446683459278, "grad_norm": 0.2245992878314648, "learning_rate": 5.222827156592701e-05, "loss": 0.4855, "step": 5484 }, { "epoch": 1.5351245452001119, "grad_norm": 0.2237374298522733, "learning_rate": 5.221286111258963e-05, "loss": 0.4964, "step": 5485 }, { "epoch": 1.5354044220542962, "grad_norm": 0.23570909450106375, "learning_rate": 5.219745044863091e-05, "loss": 0.5162, "step": 5486 }, { "epoch": 1.5356842989084802, "grad_norm": 0.22737616594999852, "learning_rate": 5.2182039575517616e-05, "loss": 0.4874, "step": 5487 }, { "epoch": 1.5359641757626643, "grad_norm": 0.223409371172766, "learning_rate": 5.2166628494716585e-05, "loss": 0.5021, "step": 5488 }, { "epoch": 1.5362440526168486, "grad_norm": 0.22576016070750649, "learning_rate": 5.215121720769465e-05, "loss": 0.5042, "step": 5489 }, { "epoch": 1.536523929471033, "grad_norm": 0.22072691297681046, "learning_rate": 5.213580571591864e-05, "loss": 0.4864, "step": 5490 }, { "epoch": 1.536803806325217, "grad_norm": 0.2305464803155683, "learning_rate": 5.2120394020855456e-05, "loss": 0.5024, "step": 5491 }, { "epoch": 1.537083683179401, "grad_norm": 0.22406606473296256, "learning_rate": 5.2104982123971967e-05, "loss": 0.5135, "step": 5492 }, { "epoch": 1.5373635600335853, "grad_norm": 0.22949407945698136, "learning_rate": 5.208957002673511e-05, "loss": 0.5018, "step": 5493 }, { "epoch": 1.5376434368877694, "grad_norm": 0.22522436493783196, "learning_rate": 5.2074157730611805e-05, "loss": 0.5089, "step": 5494 }, { "epoch": 1.5379233137419535, "grad_norm": 0.21835002861091665, "learning_rate": 5.2058745237069004e-05, "loss": 0.5025, "step": 5495 }, { "epoch": 1.5382031905961377, "grad_norm": 0.22598496581301414, "learning_rate": 5.204333254757369e-05, "loss": 0.4954, "step": 5496 }, { "epoch": 1.5384830674503218, "grad_norm": 0.21953127951438886, "learning_rate": 5.202791966359284e-05, "loss": 0.4867, "step": 5497 }, { "epoch": 1.5387629443045059, "grad_norm": 0.2321845195378221, "learning_rate": 5.201250658659347e-05, "loss": 0.4951, "step": 5498 }, { "epoch": 1.5390428211586902, "grad_norm": 0.22580315013759258, "learning_rate": 5.199709331804258e-05, "loss": 0.482, "step": 5499 }, { "epoch": 1.5393226980128745, "grad_norm": 0.2106715632460807, "learning_rate": 5.198167985940723e-05, "loss": 0.4826, "step": 5500 }, { "epoch": 1.5396025748670585, "grad_norm": 0.22257357891325769, "learning_rate": 5.196626621215449e-05, "loss": 0.4784, "step": 5501 }, { "epoch": 1.5398824517212426, "grad_norm": 0.22051229132963152, "learning_rate": 5.195085237775141e-05, "loss": 0.4776, "step": 5502 }, { "epoch": 1.540162328575427, "grad_norm": 0.22602824434366847, "learning_rate": 5.193543835766513e-05, "loss": 0.508, "step": 5503 }, { "epoch": 1.540442205429611, "grad_norm": 0.2326471364228337, "learning_rate": 5.192002415336273e-05, "loss": 0.5088, "step": 5504 }, { "epoch": 1.540722082283795, "grad_norm": 0.2157668784817698, "learning_rate": 5.1904609766311374e-05, "loss": 0.4859, "step": 5505 }, { "epoch": 1.5410019591379793, "grad_norm": 0.21311169923914217, "learning_rate": 5.1889195197978194e-05, "loss": 0.4836, "step": 5506 }, { "epoch": 1.5412818359921634, "grad_norm": 0.2280535170130329, "learning_rate": 5.1873780449830355e-05, "loss": 0.5156, "step": 5507 }, { "epoch": 1.5415617128463475, "grad_norm": 0.22595258533730095, "learning_rate": 5.185836552333504e-05, "loss": 0.4869, "step": 5508 }, { "epoch": 1.5418415897005318, "grad_norm": 0.2311339774670807, "learning_rate": 5.1842950419959445e-05, "loss": 0.4919, "step": 5509 }, { "epoch": 1.542121466554716, "grad_norm": 0.22176105493539253, "learning_rate": 5.1827535141170814e-05, "loss": 0.4886, "step": 5510 }, { "epoch": 1.5424013434089001, "grad_norm": 0.20993472889014483, "learning_rate": 5.1812119688436345e-05, "loss": 0.4817, "step": 5511 }, { "epoch": 1.5426812202630842, "grad_norm": 0.23066979961305653, "learning_rate": 5.179670406322332e-05, "loss": 0.5261, "step": 5512 }, { "epoch": 1.5429610971172685, "grad_norm": 0.22212220602819874, "learning_rate": 5.1781288266998994e-05, "loss": 0.4723, "step": 5513 }, { "epoch": 1.5432409739714525, "grad_norm": 0.22180880486084367, "learning_rate": 5.176587230123067e-05, "loss": 0.4986, "step": 5514 }, { "epoch": 1.5435208508256366, "grad_norm": 0.22771426437422765, "learning_rate": 5.175045616738561e-05, "loss": 0.4985, "step": 5515 }, { "epoch": 1.543800727679821, "grad_norm": 0.22737842481544127, "learning_rate": 5.173503986693118e-05, "loss": 0.4884, "step": 5516 }, { "epoch": 1.5440806045340052, "grad_norm": 0.22372893906589034, "learning_rate": 5.171962340133466e-05, "loss": 0.4792, "step": 5517 }, { "epoch": 1.544360481388189, "grad_norm": 0.22725234823286433, "learning_rate": 5.170420677206343e-05, "loss": 0.4921, "step": 5518 }, { "epoch": 1.5446403582423733, "grad_norm": 0.2272673329241304, "learning_rate": 5.168878998058485e-05, "loss": 0.5053, "step": 5519 }, { "epoch": 1.5449202350965576, "grad_norm": 0.2175128874622248, "learning_rate": 5.167337302836628e-05, "loss": 0.4734, "step": 5520 }, { "epoch": 1.5452001119507417, "grad_norm": 0.22463279961009588, "learning_rate": 5.165795591687513e-05, "loss": 0.496, "step": 5521 }, { "epoch": 1.5454799888049258, "grad_norm": 0.21911222931517413, "learning_rate": 5.164253864757882e-05, "loss": 0.4978, "step": 5522 }, { "epoch": 1.54575986565911, "grad_norm": 0.2144687118973654, "learning_rate": 5.162712122194475e-05, "loss": 0.4864, "step": 5523 }, { "epoch": 1.5460397425132941, "grad_norm": 0.2234349525817024, "learning_rate": 5.161170364144038e-05, "loss": 0.4913, "step": 5524 }, { "epoch": 1.5463196193674782, "grad_norm": 0.23038865018825, "learning_rate": 5.159628590753317e-05, "loss": 0.479, "step": 5525 }, { "epoch": 1.5465994962216625, "grad_norm": 0.22704121635477, "learning_rate": 5.1580868021690554e-05, "loss": 0.4737, "step": 5526 }, { "epoch": 1.5468793730758468, "grad_norm": 0.22058861548784583, "learning_rate": 5.1565449985380045e-05, "loss": 0.4794, "step": 5527 }, { "epoch": 1.5471592499300306, "grad_norm": 0.22361203755141656, "learning_rate": 5.155003180006911e-05, "loss": 0.505, "step": 5528 }, { "epoch": 1.547439126784215, "grad_norm": 0.2246715982507394, "learning_rate": 5.153461346722529e-05, "loss": 0.4919, "step": 5529 }, { "epoch": 1.5477190036383992, "grad_norm": 0.22097702675372954, "learning_rate": 5.151919498831611e-05, "loss": 0.4857, "step": 5530 }, { "epoch": 1.5479988804925833, "grad_norm": 0.22288398323463426, "learning_rate": 5.1503776364809095e-05, "loss": 0.5018, "step": 5531 }, { "epoch": 1.5482787573467673, "grad_norm": 0.22599295507683007, "learning_rate": 5.1488357598171796e-05, "loss": 0.4731, "step": 5532 }, { "epoch": 1.5485586342009516, "grad_norm": 0.22634596383355085, "learning_rate": 5.147293868987181e-05, "loss": 0.4803, "step": 5533 }, { "epoch": 1.5488385110551357, "grad_norm": 0.23343780091190894, "learning_rate": 5.145751964137669e-05, "loss": 0.4933, "step": 5534 }, { "epoch": 1.5491183879093198, "grad_norm": 0.23179343716192186, "learning_rate": 5.144210045415402e-05, "loss": 0.5037, "step": 5535 }, { "epoch": 1.549398264763504, "grad_norm": 0.22337254319140473, "learning_rate": 5.142668112967143e-05, "loss": 0.4718, "step": 5536 }, { "epoch": 1.5496781416176884, "grad_norm": 0.22856473071054442, "learning_rate": 5.141126166939652e-05, "loss": 0.4972, "step": 5537 }, { "epoch": 1.5499580184718724, "grad_norm": 0.22682415196387343, "learning_rate": 5.139584207479694e-05, "loss": 0.4788, "step": 5538 }, { "epoch": 1.5502378953260565, "grad_norm": 0.2342114551926887, "learning_rate": 5.138042234734034e-05, "loss": 0.4926, "step": 5539 }, { "epoch": 1.5505177721802408, "grad_norm": 0.22073298235366773, "learning_rate": 5.136500248849436e-05, "loss": 0.4827, "step": 5540 }, { "epoch": 1.5507976490344249, "grad_norm": 0.2308918394389832, "learning_rate": 5.1349582499726675e-05, "loss": 0.4794, "step": 5541 }, { "epoch": 1.551077525888609, "grad_norm": 0.2373222903561083, "learning_rate": 5.133416238250499e-05, "loss": 0.5264, "step": 5542 }, { "epoch": 1.5513574027427932, "grad_norm": 0.23196388741003965, "learning_rate": 5.131874213829698e-05, "loss": 0.5137, "step": 5543 }, { "epoch": 1.5516372795969773, "grad_norm": 0.23835911241143926, "learning_rate": 5.1303321768570345e-05, "loss": 0.544, "step": 5544 }, { "epoch": 1.5519171564511614, "grad_norm": 0.22224209128980993, "learning_rate": 5.128790127479281e-05, "loss": 0.4761, "step": 5545 }, { "epoch": 1.5521970333053456, "grad_norm": 0.2266650114888923, "learning_rate": 5.127248065843211e-05, "loss": 0.4986, "step": 5546 }, { "epoch": 1.55247691015953, "grad_norm": 0.21559488524631035, "learning_rate": 5.1257059920955995e-05, "loss": 0.4744, "step": 5547 }, { "epoch": 1.552756787013714, "grad_norm": 0.20869256256699395, "learning_rate": 5.124163906383223e-05, "loss": 0.4897, "step": 5548 }, { "epoch": 1.553036663867898, "grad_norm": 0.22554354805913607, "learning_rate": 5.122621808852853e-05, "loss": 0.5028, "step": 5549 }, { "epoch": 1.5533165407220824, "grad_norm": 0.22404022736807438, "learning_rate": 5.121079699651273e-05, "loss": 0.476, "step": 5550 }, { "epoch": 1.5535964175762664, "grad_norm": 0.21840184697867143, "learning_rate": 5.119537578925259e-05, "loss": 0.4786, "step": 5551 }, { "epoch": 1.5538762944304505, "grad_norm": 0.21642551836098425, "learning_rate": 5.1179954468215915e-05, "loss": 0.4923, "step": 5552 }, { "epoch": 1.5541561712846348, "grad_norm": 0.22562525130432082, "learning_rate": 5.116453303487052e-05, "loss": 0.4876, "step": 5553 }, { "epoch": 1.554436048138819, "grad_norm": 0.22369043937979324, "learning_rate": 5.11491114906842e-05, "loss": 0.4734, "step": 5554 }, { "epoch": 1.554715924993003, "grad_norm": 0.21331217522552084, "learning_rate": 5.113368983712481e-05, "loss": 0.5092, "step": 5555 }, { "epoch": 1.5549958018471872, "grad_norm": 0.2285230341412732, "learning_rate": 5.111826807566019e-05, "loss": 0.5007, "step": 5556 }, { "epoch": 1.5552756787013715, "grad_norm": 0.22319739454818105, "learning_rate": 5.1102846207758195e-05, "loss": 0.4733, "step": 5557 }, { "epoch": 1.5555555555555556, "grad_norm": 0.23825240682033183, "learning_rate": 5.108742423488667e-05, "loss": 0.5241, "step": 5558 }, { "epoch": 1.5558354324097396, "grad_norm": 0.2300284898939921, "learning_rate": 5.10720021585135e-05, "loss": 0.4836, "step": 5559 }, { "epoch": 1.556115309263924, "grad_norm": 0.22652834007336548, "learning_rate": 5.1056579980106564e-05, "loss": 0.5129, "step": 5560 }, { "epoch": 1.556395186118108, "grad_norm": 0.23259689587392177, "learning_rate": 5.104115770113377e-05, "loss": 0.5042, "step": 5561 }, { "epoch": 1.556675062972292, "grad_norm": 0.22243522953532718, "learning_rate": 5.1025735323063e-05, "loss": 0.4757, "step": 5562 }, { "epoch": 1.5569549398264764, "grad_norm": 0.23530880200349663, "learning_rate": 5.101031284736214e-05, "loss": 0.5087, "step": 5563 }, { "epoch": 1.5572348166806607, "grad_norm": 0.22853810652850762, "learning_rate": 5.0994890275499155e-05, "loss": 0.4821, "step": 5564 }, { "epoch": 1.5575146935348445, "grad_norm": 0.22454368425628163, "learning_rate": 5.097946760894195e-05, "loss": 0.4842, "step": 5565 }, { "epoch": 1.5577945703890288, "grad_norm": 0.22903907096900009, "learning_rate": 5.096404484915849e-05, "loss": 0.5012, "step": 5566 }, { "epoch": 1.558074447243213, "grad_norm": 0.22171177976385112, "learning_rate": 5.094862199761669e-05, "loss": 0.4762, "step": 5567 }, { "epoch": 1.5583543240973972, "grad_norm": 0.22613511012567147, "learning_rate": 5.0933199055784505e-05, "loss": 0.4895, "step": 5568 }, { "epoch": 1.5586342009515812, "grad_norm": 0.23439319379854245, "learning_rate": 5.0917776025129926e-05, "loss": 0.5054, "step": 5569 }, { "epoch": 1.5589140778057655, "grad_norm": 0.2342005317269749, "learning_rate": 5.090235290712092e-05, "loss": 0.4969, "step": 5570 }, { "epoch": 1.5591939546599496, "grad_norm": 0.24274956040005172, "learning_rate": 5.088692970322545e-05, "loss": 0.4997, "step": 5571 }, { "epoch": 1.5594738315141337, "grad_norm": 0.2242674755747388, "learning_rate": 5.08715064149115e-05, "loss": 0.5054, "step": 5572 }, { "epoch": 1.559753708368318, "grad_norm": 0.23541270133664366, "learning_rate": 5.085608304364708e-05, "loss": 0.4977, "step": 5573 }, { "epoch": 1.5600335852225022, "grad_norm": 0.2312343919974206, "learning_rate": 5.084065959090022e-05, "loss": 0.4944, "step": 5574 }, { "epoch": 1.5603134620766863, "grad_norm": 0.2248216189596048, "learning_rate": 5.0825236058138906e-05, "loss": 0.4895, "step": 5575 }, { "epoch": 1.5605933389308704, "grad_norm": 0.2359319249211282, "learning_rate": 5.080981244683115e-05, "loss": 0.484, "step": 5576 }, { "epoch": 1.5608732157850547, "grad_norm": 0.2314013255489555, "learning_rate": 5.0794388758445e-05, "loss": 0.4982, "step": 5577 }, { "epoch": 1.5611530926392387, "grad_norm": 0.22130249344904962, "learning_rate": 5.077896499444847e-05, "loss": 0.4944, "step": 5578 }, { "epoch": 1.5614329694934228, "grad_norm": 0.22318504258286237, "learning_rate": 5.0763541156309646e-05, "loss": 0.4903, "step": 5579 }, { "epoch": 1.561712846347607, "grad_norm": 0.2253565705016098, "learning_rate": 5.074811724549652e-05, "loss": 0.4949, "step": 5580 }, { "epoch": 1.5619927232017912, "grad_norm": 0.22285492366295562, "learning_rate": 5.0732693263477185e-05, "loss": 0.4894, "step": 5581 }, { "epoch": 1.5622726000559752, "grad_norm": 0.2297401192687475, "learning_rate": 5.0717269211719685e-05, "loss": 0.4832, "step": 5582 }, { "epoch": 1.5625524769101595, "grad_norm": 0.25651403582689786, "learning_rate": 5.0701845091692116e-05, "loss": 0.466, "step": 5583 }, { "epoch": 1.5628323537643438, "grad_norm": 0.23471078254796965, "learning_rate": 5.0686420904862534e-05, "loss": 0.4863, "step": 5584 }, { "epoch": 1.5631122306185279, "grad_norm": 0.22602221563616173, "learning_rate": 5.0670996652699024e-05, "loss": 0.4859, "step": 5585 }, { "epoch": 1.563392107472712, "grad_norm": 0.2170110679437644, "learning_rate": 5.065557233666968e-05, "loss": 0.4755, "step": 5586 }, { "epoch": 1.5636719843268962, "grad_norm": 0.22292328836669514, "learning_rate": 5.064014795824258e-05, "loss": 0.4978, "step": 5587 }, { "epoch": 1.5639518611810803, "grad_norm": 0.22283976419155824, "learning_rate": 5.0624723518885864e-05, "loss": 0.503, "step": 5588 }, { "epoch": 1.5642317380352644, "grad_norm": 0.22697066784965056, "learning_rate": 5.0609299020067594e-05, "loss": 0.4915, "step": 5589 }, { "epoch": 1.5645116148894487, "grad_norm": 0.2237210696005833, "learning_rate": 5.05938744632559e-05, "loss": 0.488, "step": 5590 }, { "epoch": 1.564791491743633, "grad_norm": 0.23033565315694643, "learning_rate": 5.057844984991891e-05, "loss": 0.4818, "step": 5591 }, { "epoch": 1.5650713685978168, "grad_norm": 0.22594536289051512, "learning_rate": 5.0563025181524736e-05, "loss": 0.4924, "step": 5592 }, { "epoch": 1.565351245452001, "grad_norm": 0.22902664035682394, "learning_rate": 5.054760045954152e-05, "loss": 0.4825, "step": 5593 }, { "epoch": 1.5656311223061854, "grad_norm": 0.2284375583849929, "learning_rate": 5.053217568543738e-05, "loss": 0.4993, "step": 5594 }, { "epoch": 1.5659109991603695, "grad_norm": 0.21817010761159086, "learning_rate": 5.051675086068045e-05, "loss": 0.4869, "step": 5595 }, { "epoch": 1.5661908760145535, "grad_norm": 0.21837069183033944, "learning_rate": 5.05013259867389e-05, "loss": 0.4762, "step": 5596 }, { "epoch": 1.5664707528687378, "grad_norm": 0.2292995169660129, "learning_rate": 5.048590106508086e-05, "loss": 0.471, "step": 5597 }, { "epoch": 1.566750629722922, "grad_norm": 0.23333384974605587, "learning_rate": 5.047047609717448e-05, "loss": 0.5175, "step": 5598 }, { "epoch": 1.567030506577106, "grad_norm": 0.22607909993090125, "learning_rate": 5.0455051084487915e-05, "loss": 0.4879, "step": 5599 }, { "epoch": 1.5673103834312903, "grad_norm": 0.2188263315194909, "learning_rate": 5.043962602848934e-05, "loss": 0.4783, "step": 5600 }, { "epoch": 1.5675902602854745, "grad_norm": 0.22301232445952954, "learning_rate": 5.0424200930646914e-05, "loss": 0.503, "step": 5601 }, { "epoch": 1.5678701371396584, "grad_norm": 0.23886610604380776, "learning_rate": 5.040877579242881e-05, "loss": 0.4975, "step": 5602 }, { "epoch": 1.5681500139938427, "grad_norm": 0.2350143648331525, "learning_rate": 5.039335061530319e-05, "loss": 0.5022, "step": 5603 }, { "epoch": 1.568429890848027, "grad_norm": 0.22150415708990884, "learning_rate": 5.037792540073823e-05, "loss": 0.4857, "step": 5604 }, { "epoch": 1.568709767702211, "grad_norm": 0.2303358839947542, "learning_rate": 5.036250015020214e-05, "loss": 0.5091, "step": 5605 }, { "epoch": 1.5689896445563951, "grad_norm": 0.2217424316390478, "learning_rate": 5.034707486516307e-05, "loss": 0.4799, "step": 5606 }, { "epoch": 1.5692695214105794, "grad_norm": 0.23553254094918738, "learning_rate": 5.033164954708922e-05, "loss": 0.509, "step": 5607 }, { "epoch": 1.5695493982647635, "grad_norm": 0.2235691534728348, "learning_rate": 5.031622419744879e-05, "loss": 0.4867, "step": 5608 }, { "epoch": 1.5698292751189475, "grad_norm": 0.22532176345221014, "learning_rate": 5.030079881770996e-05, "loss": 0.4909, "step": 5609 }, { "epoch": 1.5701091519731318, "grad_norm": 0.2236565549519103, "learning_rate": 5.028537340934092e-05, "loss": 0.4938, "step": 5610 }, { "epoch": 1.5703890288273161, "grad_norm": 0.22500947527017523, "learning_rate": 5.0269947973809886e-05, "loss": 0.4907, "step": 5611 }, { "epoch": 1.5706689056815002, "grad_norm": 0.2323955181170479, "learning_rate": 5.0254522512585056e-05, "loss": 0.4987, "step": 5612 }, { "epoch": 1.5709487825356843, "grad_norm": 0.22234777205149137, "learning_rate": 5.023909702713463e-05, "loss": 0.4779, "step": 5613 }, { "epoch": 1.5712286593898686, "grad_norm": 0.21985912973673657, "learning_rate": 5.0223671518926806e-05, "loss": 0.4798, "step": 5614 }, { "epoch": 1.5715085362440526, "grad_norm": 0.23034250758950078, "learning_rate": 5.020824598942981e-05, "loss": 0.491, "step": 5615 }, { "epoch": 1.5717884130982367, "grad_norm": 0.2228880479318296, "learning_rate": 5.019282044011184e-05, "loss": 0.5168, "step": 5616 }, { "epoch": 1.572068289952421, "grad_norm": 0.22690103597598169, "learning_rate": 5.017739487244112e-05, "loss": 0.4984, "step": 5617 }, { "epoch": 1.572348166806605, "grad_norm": 0.21771862196970726, "learning_rate": 5.016196928788586e-05, "loss": 0.4724, "step": 5618 }, { "epoch": 1.5726280436607891, "grad_norm": 0.2314285834647458, "learning_rate": 5.014654368791426e-05, "loss": 0.4915, "step": 5619 }, { "epoch": 1.5729079205149734, "grad_norm": 0.2325948459261361, "learning_rate": 5.0131118073994556e-05, "loss": 0.4928, "step": 5620 }, { "epoch": 1.5731877973691577, "grad_norm": 0.222898035682407, "learning_rate": 5.0115692447594954e-05, "loss": 0.4787, "step": 5621 }, { "epoch": 1.5734676742233418, "grad_norm": 0.2275370877876276, "learning_rate": 5.010026681018368e-05, "loss": 0.4814, "step": 5622 }, { "epoch": 1.5737475510775258, "grad_norm": 0.24306375118915882, "learning_rate": 5.0084841163228966e-05, "loss": 0.506, "step": 5623 }, { "epoch": 1.5740274279317101, "grad_norm": 0.2367990653165563, "learning_rate": 5.006941550819901e-05, "loss": 0.493, "step": 5624 }, { "epoch": 1.5743073047858942, "grad_norm": 0.2255838024038827, "learning_rate": 5.005398984656205e-05, "loss": 0.492, "step": 5625 }, { "epoch": 1.5745871816400783, "grad_norm": 0.2290702472706961, "learning_rate": 5.003856417978632e-05, "loss": 0.4938, "step": 5626 }, { "epoch": 1.5748670584942626, "grad_norm": 0.239322202470668, "learning_rate": 5.0023138509340016e-05, "loss": 0.5062, "step": 5627 }, { "epoch": 1.5751469353484466, "grad_norm": 0.2258696469173803, "learning_rate": 5.000771283669138e-05, "loss": 0.4786, "step": 5628 }, { "epoch": 1.5754268122026307, "grad_norm": 0.21587624527192376, "learning_rate": 4.999228716330864e-05, "loss": 0.4804, "step": 5629 }, { "epoch": 1.575706689056815, "grad_norm": 0.22861948012999467, "learning_rate": 4.997686149066e-05, "loss": 0.5041, "step": 5630 }, { "epoch": 1.5759865659109993, "grad_norm": 0.22952288917208172, "learning_rate": 4.9961435820213704e-05, "loss": 0.4678, "step": 5631 }, { "epoch": 1.5762664427651834, "grad_norm": 0.22081528456431063, "learning_rate": 4.994601015343796e-05, "loss": 0.5185, "step": 5632 }, { "epoch": 1.5765463196193674, "grad_norm": 0.23351140051001967, "learning_rate": 4.9930584491801e-05, "loss": 0.5161, "step": 5633 }, { "epoch": 1.5768261964735517, "grad_norm": 0.2243394134223082, "learning_rate": 4.9915158836771046e-05, "loss": 0.4846, "step": 5634 }, { "epoch": 1.5771060733277358, "grad_norm": 0.2217145560085866, "learning_rate": 4.9899733189816326e-05, "loss": 0.4935, "step": 5635 }, { "epoch": 1.5773859501819198, "grad_norm": 0.22724535119791522, "learning_rate": 4.988430755240506e-05, "loss": 0.4795, "step": 5636 }, { "epoch": 1.5776658270361041, "grad_norm": 0.21932074450067263, "learning_rate": 4.986888192600546e-05, "loss": 0.498, "step": 5637 }, { "epoch": 1.5779457038902884, "grad_norm": 0.21841368179356946, "learning_rate": 4.985345631208576e-05, "loss": 0.4711, "step": 5638 }, { "epoch": 1.5782255807444723, "grad_norm": 0.230609290062492, "learning_rate": 4.983803071211416e-05, "loss": 0.4965, "step": 5639 }, { "epoch": 1.5785054575986566, "grad_norm": 0.2415922466704433, "learning_rate": 4.982260512755889e-05, "loss": 0.4885, "step": 5640 }, { "epoch": 1.5787853344528409, "grad_norm": 0.2354073525679611, "learning_rate": 4.980717955988817e-05, "loss": 0.5004, "step": 5641 }, { "epoch": 1.579065211307025, "grad_norm": 0.2303289745490306, "learning_rate": 4.97917540105702e-05, "loss": 0.5003, "step": 5642 }, { "epoch": 1.579345088161209, "grad_norm": 0.2230092275624761, "learning_rate": 4.977632848107319e-05, "loss": 0.4791, "step": 5643 }, { "epoch": 1.5796249650153933, "grad_norm": 0.23070455125285247, "learning_rate": 4.9760902972865376e-05, "loss": 0.496, "step": 5644 }, { "epoch": 1.5799048418695774, "grad_norm": 0.23893053439705275, "learning_rate": 4.974547748741494e-05, "loss": 0.4724, "step": 5645 }, { "epoch": 1.5801847187237614, "grad_norm": 0.22555313781105923, "learning_rate": 4.973005202619011e-05, "loss": 0.4678, "step": 5646 }, { "epoch": 1.5804645955779457, "grad_norm": 0.21855551936057607, "learning_rate": 4.9714626590659104e-05, "loss": 0.4702, "step": 5647 }, { "epoch": 1.58074447243213, "grad_norm": 0.23351187271333862, "learning_rate": 4.9699201182290065e-05, "loss": 0.4795, "step": 5648 }, { "epoch": 1.5810243492863139, "grad_norm": 0.22358200316100899, "learning_rate": 4.968377580255123e-05, "loss": 0.4816, "step": 5649 }, { "epoch": 1.5813042261404981, "grad_norm": 0.23157501059580052, "learning_rate": 4.966835045291079e-05, "loss": 0.4999, "step": 5650 }, { "epoch": 1.5815841029946824, "grad_norm": 0.2144969406219859, "learning_rate": 4.965292513483694e-05, "loss": 0.4652, "step": 5651 }, { "epoch": 1.5818639798488665, "grad_norm": 0.23900376296684603, "learning_rate": 4.963749984979787e-05, "loss": 0.5058, "step": 5652 }, { "epoch": 1.5821438567030506, "grad_norm": 0.23008662543329655, "learning_rate": 4.962207459926177e-05, "loss": 0.4895, "step": 5653 }, { "epoch": 1.5824237335572349, "grad_norm": 0.22020178253816597, "learning_rate": 4.9606649384696826e-05, "loss": 0.4952, "step": 5654 }, { "epoch": 1.582703610411419, "grad_norm": 0.22277943987091794, "learning_rate": 4.959122420757121e-05, "loss": 0.4903, "step": 5655 }, { "epoch": 1.582983487265603, "grad_norm": 0.23096417077931158, "learning_rate": 4.95757990693531e-05, "loss": 0.5208, "step": 5656 }, { "epoch": 1.5832633641197873, "grad_norm": 0.22222705436929513, "learning_rate": 4.956037397151067e-05, "loss": 0.48, "step": 5657 }, { "epoch": 1.5835432409739716, "grad_norm": 0.23613640831850014, "learning_rate": 4.95449489155121e-05, "loss": 0.4863, "step": 5658 }, { "epoch": 1.5838231178281557, "grad_norm": 0.23204011606346303, "learning_rate": 4.9529523902825534e-05, "loss": 0.4981, "step": 5659 }, { "epoch": 1.5841029946823397, "grad_norm": 0.22282931745119186, "learning_rate": 4.951409893491914e-05, "loss": 0.4811, "step": 5660 }, { "epoch": 1.584382871536524, "grad_norm": 0.21492766007267308, "learning_rate": 4.94986740132611e-05, "loss": 0.4848, "step": 5661 }, { "epoch": 1.584662748390708, "grad_norm": 0.2311786884387169, "learning_rate": 4.948324913931954e-05, "loss": 0.4805, "step": 5662 }, { "epoch": 1.5849426252448922, "grad_norm": 0.21940614258724447, "learning_rate": 4.946782431456262e-05, "loss": 0.4871, "step": 5663 }, { "epoch": 1.5852225020990764, "grad_norm": 0.23041075544554102, "learning_rate": 4.9452399540458484e-05, "loss": 0.482, "step": 5664 }, { "epoch": 1.5855023789532605, "grad_norm": 0.2210787333418233, "learning_rate": 4.943697481847528e-05, "loss": 0.4974, "step": 5665 }, { "epoch": 1.5857822558074446, "grad_norm": 0.2230033247855133, "learning_rate": 4.942155015008111e-05, "loss": 0.4939, "step": 5666 }, { "epoch": 1.5860621326616289, "grad_norm": 0.22711898224752045, "learning_rate": 4.940612553674411e-05, "loss": 0.4866, "step": 5667 }, { "epoch": 1.5863420095158132, "grad_norm": 0.22210416401114763, "learning_rate": 4.9390700979932425e-05, "loss": 0.4793, "step": 5668 }, { "epoch": 1.5866218863699972, "grad_norm": 0.23036242768367657, "learning_rate": 4.937527648111416e-05, "loss": 0.4666, "step": 5669 }, { "epoch": 1.5869017632241813, "grad_norm": 0.23307610569854123, "learning_rate": 4.935985204175743e-05, "loss": 0.5148, "step": 5670 }, { "epoch": 1.5871816400783656, "grad_norm": 0.22876382302810352, "learning_rate": 4.934442766333034e-05, "loss": 0.5002, "step": 5671 }, { "epoch": 1.5874615169325497, "grad_norm": 0.2300745790895094, "learning_rate": 4.932900334730099e-05, "loss": 0.4718, "step": 5672 }, { "epoch": 1.5877413937867337, "grad_norm": 0.23143121589506777, "learning_rate": 4.931357909513748e-05, "loss": 0.4986, "step": 5673 }, { "epoch": 1.588021270640918, "grad_norm": 0.2426854556143326, "learning_rate": 4.9298154908307896e-05, "loss": 0.4778, "step": 5674 }, { "epoch": 1.5883011474951023, "grad_norm": 0.22520928699315454, "learning_rate": 4.928273078828032e-05, "loss": 0.4875, "step": 5675 }, { "epoch": 1.5885810243492862, "grad_norm": 0.23469119149336215, "learning_rate": 4.926730673652283e-05, "loss": 0.4993, "step": 5676 }, { "epoch": 1.5888609012034705, "grad_norm": 0.23477085799090816, "learning_rate": 4.9251882754503494e-05, "loss": 0.4927, "step": 5677 }, { "epoch": 1.5891407780576547, "grad_norm": 0.2277194693858088, "learning_rate": 4.9236458843690366e-05, "loss": 0.4788, "step": 5678 }, { "epoch": 1.5894206549118388, "grad_norm": 0.2315005439338584, "learning_rate": 4.922103500555152e-05, "loss": 0.4772, "step": 5679 }, { "epoch": 1.5897005317660229, "grad_norm": 0.2148963207070413, "learning_rate": 4.9205611241555005e-05, "loss": 0.4727, "step": 5680 }, { "epoch": 1.5899804086202072, "grad_norm": 0.21648755482076593, "learning_rate": 4.9190187553168845e-05, "loss": 0.5016, "step": 5681 }, { "epoch": 1.5902602854743912, "grad_norm": 0.2650990713965363, "learning_rate": 4.91747639418611e-05, "loss": 0.5108, "step": 5682 }, { "epoch": 1.5905401623285753, "grad_norm": 0.22934040499718267, "learning_rate": 4.91593404090998e-05, "loss": 0.5056, "step": 5683 }, { "epoch": 1.5908200391827596, "grad_norm": 0.23879577773825494, "learning_rate": 4.914391695635292e-05, "loss": 0.5016, "step": 5684 }, { "epoch": 1.591099916036944, "grad_norm": 0.22643280906868846, "learning_rate": 4.912849358508851e-05, "loss": 0.481, "step": 5685 }, { "epoch": 1.5913797928911277, "grad_norm": 0.21949304829353772, "learning_rate": 4.9113070296774575e-05, "loss": 0.5085, "step": 5686 }, { "epoch": 1.591659669745312, "grad_norm": 0.23354786840345054, "learning_rate": 4.90976470928791e-05, "loss": 0.4891, "step": 5687 }, { "epoch": 1.5919395465994963, "grad_norm": 0.22745535886127166, "learning_rate": 4.9082223974870086e-05, "loss": 0.4946, "step": 5688 }, { "epoch": 1.5922194234536804, "grad_norm": 0.20991743426911497, "learning_rate": 4.90668009442155e-05, "loss": 0.4869, "step": 5689 }, { "epoch": 1.5924993003078645, "grad_norm": 0.21985904149845484, "learning_rate": 4.905137800238333e-05, "loss": 0.49, "step": 5690 }, { "epoch": 1.5927791771620488, "grad_norm": 0.22658574255004135, "learning_rate": 4.903595515084153e-05, "loss": 0.494, "step": 5691 }, { "epoch": 1.5930590540162328, "grad_norm": 0.2203899452351098, "learning_rate": 4.902053239105805e-05, "loss": 0.4939, "step": 5692 }, { "epoch": 1.593338930870417, "grad_norm": 0.23498014264086428, "learning_rate": 4.900510972450086e-05, "loss": 0.5185, "step": 5693 }, { "epoch": 1.5936188077246012, "grad_norm": 0.23073398037069082, "learning_rate": 4.898968715263787e-05, "loss": 0.4849, "step": 5694 }, { "epoch": 1.5938986845787855, "grad_norm": 0.24388999910783205, "learning_rate": 4.897426467693702e-05, "loss": 0.4741, "step": 5695 }, { "epoch": 1.5941785614329695, "grad_norm": 0.22466535931596224, "learning_rate": 4.895884229886624e-05, "loss": 0.493, "step": 5696 }, { "epoch": 1.5944584382871536, "grad_norm": 0.2319889488478196, "learning_rate": 4.8943420019893434e-05, "loss": 0.5071, "step": 5697 }, { "epoch": 1.594738315141338, "grad_norm": 0.2197193456644569, "learning_rate": 4.89279978414865e-05, "loss": 0.4796, "step": 5698 }, { "epoch": 1.595018191995522, "grad_norm": 0.24180193937634586, "learning_rate": 4.8912575765113336e-05, "loss": 0.4856, "step": 5699 }, { "epoch": 1.595298068849706, "grad_norm": 0.22368090012983122, "learning_rate": 4.889715379224181e-05, "loss": 0.5004, "step": 5700 }, { "epoch": 1.5955779457038903, "grad_norm": 0.22759770021300402, "learning_rate": 4.888173192433982e-05, "loss": 0.4882, "step": 5701 }, { "epoch": 1.5958578225580744, "grad_norm": 0.22742258388402647, "learning_rate": 4.8866310162875204e-05, "loss": 0.4998, "step": 5702 }, { "epoch": 1.5961376994122585, "grad_norm": 0.22141383827545275, "learning_rate": 4.885088850931582e-05, "loss": 0.4694, "step": 5703 }, { "epoch": 1.5964175762664428, "grad_norm": 0.2248426734194824, "learning_rate": 4.8835466965129503e-05, "loss": 0.5102, "step": 5704 }, { "epoch": 1.596697453120627, "grad_norm": 0.22648678235381703, "learning_rate": 4.8820045531784096e-05, "loss": 0.4878, "step": 5705 }, { "epoch": 1.5969773299748111, "grad_norm": 0.23415672028477244, "learning_rate": 4.8804624210747424e-05, "loss": 0.4917, "step": 5706 }, { "epoch": 1.5972572068289952, "grad_norm": 0.21833155102939192, "learning_rate": 4.8789203003487274e-05, "loss": 0.4946, "step": 5707 }, { "epoch": 1.5975370836831795, "grad_norm": 0.22172592701075167, "learning_rate": 4.877378191147147e-05, "loss": 0.4761, "step": 5708 }, { "epoch": 1.5978169605373636, "grad_norm": 0.22807897163810353, "learning_rate": 4.875836093616779e-05, "loss": 0.4774, "step": 5709 }, { "epoch": 1.5980968373915476, "grad_norm": 0.23259623595043447, "learning_rate": 4.874294007904401e-05, "loss": 0.5043, "step": 5710 }, { "epoch": 1.598376714245732, "grad_norm": 0.2324499737480055, "learning_rate": 4.8727519341567895e-05, "loss": 0.4906, "step": 5711 }, { "epoch": 1.5986565910999162, "grad_norm": 0.2400088947626389, "learning_rate": 4.871209872520719e-05, "loss": 0.4934, "step": 5712 }, { "epoch": 1.5989364679541, "grad_norm": 0.22025664699223768, "learning_rate": 4.8696678231429666e-05, "loss": 0.4722, "step": 5713 }, { "epoch": 1.5992163448082843, "grad_norm": 0.23673944026346538, "learning_rate": 4.868125786170303e-05, "loss": 0.4969, "step": 5714 }, { "epoch": 1.5994962216624686, "grad_norm": 0.23613047153648795, "learning_rate": 4.866583761749501e-05, "loss": 0.4936, "step": 5715 }, { "epoch": 1.5997760985166527, "grad_norm": 0.21196338207099613, "learning_rate": 4.8650417500273316e-05, "loss": 0.4937, "step": 5716 }, { "epoch": 1.6000559753708368, "grad_norm": 0.22503696481116564, "learning_rate": 4.8634997511505636e-05, "loss": 0.491, "step": 5717 }, { "epoch": 1.600335852225021, "grad_norm": 0.22896987780471204, "learning_rate": 4.861957765265966e-05, "loss": 0.4764, "step": 5718 }, { "epoch": 1.6006157290792051, "grad_norm": 0.22550125568022933, "learning_rate": 4.8604157925203064e-05, "loss": 0.4887, "step": 5719 }, { "epoch": 1.6008956059333892, "grad_norm": 0.22001401426355968, "learning_rate": 4.858873833060349e-05, "loss": 0.513, "step": 5720 }, { "epoch": 1.6011754827875735, "grad_norm": 0.23222746846310632, "learning_rate": 4.857331887032859e-05, "loss": 0.5125, "step": 5721 }, { "epoch": 1.6014553596417578, "grad_norm": 0.22786792983349033, "learning_rate": 4.8557899545846e-05, "loss": 0.4646, "step": 5722 }, { "epoch": 1.6017352364959416, "grad_norm": 0.21720613549163814, "learning_rate": 4.854248035862333e-05, "loss": 0.479, "step": 5723 }, { "epoch": 1.602015113350126, "grad_norm": 0.22555211669472716, "learning_rate": 4.852706131012821e-05, "loss": 0.5052, "step": 5724 }, { "epoch": 1.6022949902043102, "grad_norm": 0.22159626115305234, "learning_rate": 4.851164240182821e-05, "loss": 0.5021, "step": 5725 }, { "epoch": 1.6025748670584943, "grad_norm": 0.22149375881581837, "learning_rate": 4.8496223635190916e-05, "loss": 0.4923, "step": 5726 }, { "epoch": 1.6028547439126783, "grad_norm": 0.21561536936147985, "learning_rate": 4.8480805011683903e-05, "loss": 0.4842, "step": 5727 }, { "epoch": 1.6031346207668626, "grad_norm": 0.22233172955508182, "learning_rate": 4.846538653277472e-05, "loss": 0.4722, "step": 5728 }, { "epoch": 1.6034144976210467, "grad_norm": 0.223820077918928, "learning_rate": 4.8449968199930903e-05, "loss": 0.4892, "step": 5729 }, { "epoch": 1.6036943744752308, "grad_norm": 0.22504862664574823, "learning_rate": 4.8434550014619974e-05, "loss": 0.4922, "step": 5730 }, { "epoch": 1.603974251329415, "grad_norm": 0.23114975781002625, "learning_rate": 4.841913197830946e-05, "loss": 0.4864, "step": 5731 }, { "epoch": 1.6042541281835994, "grad_norm": 0.21962614629382674, "learning_rate": 4.840371409246684e-05, "loss": 0.4935, "step": 5732 }, { "epoch": 1.6045340050377834, "grad_norm": 0.22062467171930908, "learning_rate": 4.838829635855962e-05, "loss": 0.4768, "step": 5733 }, { "epoch": 1.6048138818919675, "grad_norm": 0.22731014871243183, "learning_rate": 4.8372878778055245e-05, "loss": 0.4709, "step": 5734 }, { "epoch": 1.6050937587461518, "grad_norm": 0.23312215764311575, "learning_rate": 4.835746135242118e-05, "loss": 0.5032, "step": 5735 }, { "epoch": 1.6053736356003359, "grad_norm": 0.23382333183840684, "learning_rate": 4.834204408312487e-05, "loss": 0.4783, "step": 5736 }, { "epoch": 1.60565351245452, "grad_norm": 0.22949331194837824, "learning_rate": 4.832662697163373e-05, "loss": 0.4918, "step": 5737 }, { "epoch": 1.6059333893087042, "grad_norm": 0.23931647766632208, "learning_rate": 4.8311210019415174e-05, "loss": 0.4775, "step": 5738 }, { "epoch": 1.6062132661628883, "grad_norm": 0.22694283907923388, "learning_rate": 4.829579322793659e-05, "loss": 0.5003, "step": 5739 }, { "epoch": 1.6064931430170724, "grad_norm": 0.24169102480458646, "learning_rate": 4.8280376598665364e-05, "loss": 0.5015, "step": 5740 }, { "epoch": 1.6067730198712566, "grad_norm": 0.23385063482340737, "learning_rate": 4.8264960133068846e-05, "loss": 0.4996, "step": 5741 }, { "epoch": 1.607052896725441, "grad_norm": 0.2365497882143094, "learning_rate": 4.82495438326144e-05, "loss": 0.5006, "step": 5742 }, { "epoch": 1.607332773579625, "grad_norm": 0.22155722089844201, "learning_rate": 4.823412769876935e-05, "loss": 0.4537, "step": 5743 }, { "epoch": 1.607612650433809, "grad_norm": 0.22160297539346716, "learning_rate": 4.821871173300101e-05, "loss": 0.5077, "step": 5744 }, { "epoch": 1.6078925272879934, "grad_norm": 0.21528485247190282, "learning_rate": 4.820329593677669e-05, "loss": 0.4781, "step": 5745 }, { "epoch": 1.6081724041421774, "grad_norm": 0.22039165194741256, "learning_rate": 4.818788031156367e-05, "loss": 0.4704, "step": 5746 }, { "epoch": 1.6084522809963615, "grad_norm": 0.22621626635198547, "learning_rate": 4.81724648588292e-05, "loss": 0.5048, "step": 5747 }, { "epoch": 1.6087321578505458, "grad_norm": 0.23215933840603425, "learning_rate": 4.815704958004056e-05, "loss": 0.514, "step": 5748 }, { "epoch": 1.6090120347047299, "grad_norm": 0.23004859929167618, "learning_rate": 4.814163447666498e-05, "loss": 0.498, "step": 5749 }, { "epoch": 1.609291911558914, "grad_norm": 0.22818970963295204, "learning_rate": 4.812621955016966e-05, "loss": 0.4717, "step": 5750 }, { "epoch": 1.6095717884130982, "grad_norm": 0.22221224321953403, "learning_rate": 4.811080480202181e-05, "loss": 0.4497, "step": 5751 }, { "epoch": 1.6098516652672825, "grad_norm": 0.2164486914325114, "learning_rate": 4.8095390233688624e-05, "loss": 0.4698, "step": 5752 }, { "epoch": 1.6101315421214666, "grad_norm": 0.22699179329191232, "learning_rate": 4.807997584663726e-05, "loss": 0.5135, "step": 5753 }, { "epoch": 1.6104114189756507, "grad_norm": 0.22011899742096697, "learning_rate": 4.806456164233487e-05, "loss": 0.4858, "step": 5754 }, { "epoch": 1.610691295829835, "grad_norm": 0.22914681138089035, "learning_rate": 4.8049147622248586e-05, "loss": 0.4927, "step": 5755 }, { "epoch": 1.610971172684019, "grad_norm": 0.225856117082813, "learning_rate": 4.8033733787845535e-05, "loss": 0.5069, "step": 5756 }, { "epoch": 1.611251049538203, "grad_norm": 0.22067990693800643, "learning_rate": 4.801832014059279e-05, "loss": 0.4802, "step": 5757 }, { "epoch": 1.6115309263923874, "grad_norm": 0.23779024106537164, "learning_rate": 4.8002906681957444e-05, "loss": 0.5095, "step": 5758 }, { "epoch": 1.6118108032465717, "grad_norm": 0.22280889650205968, "learning_rate": 4.798749341340656e-05, "loss": 0.466, "step": 5759 }, { "epoch": 1.6120906801007555, "grad_norm": 0.22707467688410218, "learning_rate": 4.797208033640718e-05, "loss": 0.4826, "step": 5760 }, { "epoch": 1.6123705569549398, "grad_norm": 0.22367707952061988, "learning_rate": 4.7956667452426315e-05, "loss": 0.4943, "step": 5761 }, { "epoch": 1.612650433809124, "grad_norm": 0.2274460927084838, "learning_rate": 4.7941254762931e-05, "loss": 0.5029, "step": 5762 }, { "epoch": 1.6129303106633082, "grad_norm": 0.22289882798046468, "learning_rate": 4.7925842269388206e-05, "loss": 0.4829, "step": 5763 }, { "epoch": 1.6132101875174922, "grad_norm": 0.2271771812863479, "learning_rate": 4.791042997326489e-05, "loss": 0.4935, "step": 5764 }, { "epoch": 1.6134900643716765, "grad_norm": 0.2267766613971914, "learning_rate": 4.789501787602804e-05, "loss": 0.4851, "step": 5765 }, { "epoch": 1.6137699412258606, "grad_norm": 0.23638489851748645, "learning_rate": 4.787960597914456e-05, "loss": 0.51, "step": 5766 }, { "epoch": 1.6140498180800447, "grad_norm": 0.2271291952099077, "learning_rate": 4.786419428408137e-05, "loss": 0.5048, "step": 5767 }, { "epoch": 1.614329694934229, "grad_norm": 0.22745023606135853, "learning_rate": 4.784878279230536e-05, "loss": 0.4881, "step": 5768 }, { "epoch": 1.6146095717884132, "grad_norm": 0.2263025599007156, "learning_rate": 4.783337150528341e-05, "loss": 0.4861, "step": 5769 }, { "epoch": 1.614889448642597, "grad_norm": 0.2294863080810387, "learning_rate": 4.7817960424482375e-05, "loss": 0.4778, "step": 5770 }, { "epoch": 1.6151693254967814, "grad_norm": 0.23131908954082656, "learning_rate": 4.780254955136909e-05, "loss": 0.4879, "step": 5771 }, { "epoch": 1.6154492023509657, "grad_norm": 0.22520059969785933, "learning_rate": 4.778713888741036e-05, "loss": 0.4877, "step": 5772 }, { "epoch": 1.6157290792051497, "grad_norm": 0.2260490977803011, "learning_rate": 4.7771728434073e-05, "loss": 0.5019, "step": 5773 }, { "epoch": 1.6160089560593338, "grad_norm": 0.2231625875053975, "learning_rate": 4.775631819282378e-05, "loss": 0.4636, "step": 5774 }, { "epoch": 1.616288832913518, "grad_norm": 0.22510355582497066, "learning_rate": 4.774090816512944e-05, "loss": 0.491, "step": 5775 }, { "epoch": 1.6165687097677022, "grad_norm": 0.24549719427424876, "learning_rate": 4.7725498352456735e-05, "loss": 0.5195, "step": 5776 }, { "epoch": 1.6168485866218862, "grad_norm": 0.22356303132998295, "learning_rate": 4.771008875627236e-05, "loss": 0.4998, "step": 5777 }, { "epoch": 1.6171284634760705, "grad_norm": 0.22229706812601246, "learning_rate": 4.7694679378043014e-05, "loss": 0.4688, "step": 5778 }, { "epoch": 1.6174083403302548, "grad_norm": 0.22785819550643896, "learning_rate": 4.7679270219235384e-05, "loss": 0.5035, "step": 5779 }, { "epoch": 1.617688217184439, "grad_norm": 0.231961879890167, "learning_rate": 4.766386128131611e-05, "loss": 0.4939, "step": 5780 }, { "epoch": 1.617968094038623, "grad_norm": 0.23494886687096408, "learning_rate": 4.764845256575183e-05, "loss": 0.4753, "step": 5781 }, { "epoch": 1.6182479708928073, "grad_norm": 0.22048812903283166, "learning_rate": 4.7633044074009134e-05, "loss": 0.4864, "step": 5782 }, { "epoch": 1.6185278477469913, "grad_norm": 0.23580651142556416, "learning_rate": 4.7617635807554644e-05, "loss": 0.5018, "step": 5783 }, { "epoch": 1.6188077246011754, "grad_norm": 0.23905429525768718, "learning_rate": 4.7602227767854906e-05, "loss": 0.5257, "step": 5784 }, { "epoch": 1.6190876014553597, "grad_norm": 0.2134645462747951, "learning_rate": 4.758681995637648e-05, "loss": 0.4698, "step": 5785 }, { "epoch": 1.6193674783095438, "grad_norm": 0.21912439150211308, "learning_rate": 4.757141237458587e-05, "loss": 0.4953, "step": 5786 }, { "epoch": 1.6196473551637278, "grad_norm": 0.23041845944682493, "learning_rate": 4.75560050239496e-05, "loss": 0.486, "step": 5787 }, { "epoch": 1.619927232017912, "grad_norm": 0.22738879643277715, "learning_rate": 4.7540597905934136e-05, "loss": 0.502, "step": 5788 }, { "epoch": 1.6202071088720964, "grad_norm": 0.21373301400501682, "learning_rate": 4.7525191022005935e-05, "loss": 0.5089, "step": 5789 }, { "epoch": 1.6204869857262805, "grad_norm": 0.2326314009368342, "learning_rate": 4.7509784373631444e-05, "loss": 0.5047, "step": 5790 }, { "epoch": 1.6207668625804645, "grad_norm": 0.22847212324678506, "learning_rate": 4.749437796227707e-05, "loss": 0.4866, "step": 5791 }, { "epoch": 1.6210467394346488, "grad_norm": 0.22142696297588046, "learning_rate": 4.747897178940921e-05, "loss": 0.4793, "step": 5792 }, { "epoch": 1.621326616288833, "grad_norm": 0.23040322172517957, "learning_rate": 4.746356585649422e-05, "loss": 0.4813, "step": 5793 }, { "epoch": 1.621606493143017, "grad_norm": 0.2280228144904133, "learning_rate": 4.744816016499845e-05, "loss": 0.4785, "step": 5794 }, { "epoch": 1.6218863699972013, "grad_norm": 0.22175047501904213, "learning_rate": 4.7432754716388224e-05, "loss": 0.4742, "step": 5795 }, { "epoch": 1.6221662468513856, "grad_norm": 0.21681999451189307, "learning_rate": 4.741734951212984e-05, "loss": 0.469, "step": 5796 }, { "epoch": 1.6224461237055694, "grad_norm": 0.2291453645582424, "learning_rate": 4.740194455368957e-05, "loss": 0.4807, "step": 5797 }, { "epoch": 1.6227260005597537, "grad_norm": 0.21659639889454546, "learning_rate": 4.738653984253368e-05, "loss": 0.4974, "step": 5798 }, { "epoch": 1.623005877413938, "grad_norm": 0.22857678859931882, "learning_rate": 4.737113538012838e-05, "loss": 0.4767, "step": 5799 }, { "epoch": 1.623285754268122, "grad_norm": 0.22695758737965813, "learning_rate": 4.735573116793989e-05, "loss": 0.4908, "step": 5800 }, { "epoch": 1.6235656311223061, "grad_norm": 0.2241580374931068, "learning_rate": 4.734032720743439e-05, "loss": 0.4954, "step": 5801 }, { "epoch": 1.6238455079764904, "grad_norm": 0.2295402350886931, "learning_rate": 4.732492350007804e-05, "loss": 0.5076, "step": 5802 }, { "epoch": 1.6241253848306745, "grad_norm": 0.225986721024452, "learning_rate": 4.7309520047336964e-05, "loss": 0.4669, "step": 5803 }, { "epoch": 1.6244052616848585, "grad_norm": 0.23057794100971507, "learning_rate": 4.729411685067728e-05, "loss": 0.4836, "step": 5804 }, { "epoch": 1.6246851385390428, "grad_norm": 0.22842679919809245, "learning_rate": 4.727871391156507e-05, "loss": 0.4877, "step": 5805 }, { "epoch": 1.6249650153932271, "grad_norm": 0.2239617920477314, "learning_rate": 4.726331123146638e-05, "loss": 0.4781, "step": 5806 }, { "epoch": 1.625244892247411, "grad_norm": 0.2322547555875918, "learning_rate": 4.724790881184727e-05, "loss": 0.493, "step": 5807 }, { "epoch": 1.6255247691015953, "grad_norm": 0.22273928750984806, "learning_rate": 4.723250665417374e-05, "loss": 0.4685, "step": 5808 }, { "epoch": 1.6258046459557796, "grad_norm": 0.22855695969211148, "learning_rate": 4.721710475991177e-05, "loss": 0.4709, "step": 5809 }, { "epoch": 1.6260845228099636, "grad_norm": 0.23504133375235953, "learning_rate": 4.720170313052734e-05, "loss": 0.4944, "step": 5810 }, { "epoch": 1.6263643996641477, "grad_norm": 0.2199186142727277, "learning_rate": 4.718630176748636e-05, "loss": 0.4761, "step": 5811 }, { "epoch": 1.626644276518332, "grad_norm": 0.2275762801104239, "learning_rate": 4.717090067225475e-05, "loss": 0.4682, "step": 5812 }, { "epoch": 1.626924153372516, "grad_norm": 0.2275213474136106, "learning_rate": 4.7155499846298404e-05, "loss": 0.4917, "step": 5813 }, { "epoch": 1.6272040302267001, "grad_norm": 0.22296716415371437, "learning_rate": 4.7140099291083174e-05, "loss": 0.4915, "step": 5814 }, { "epoch": 1.6274839070808844, "grad_norm": 0.22721460994923723, "learning_rate": 4.712469900807489e-05, "loss": 0.4967, "step": 5815 }, { "epoch": 1.6277637839350687, "grad_norm": 0.2322878810055175, "learning_rate": 4.710929899873936e-05, "loss": 0.4911, "step": 5816 }, { "epoch": 1.6280436607892528, "grad_norm": 0.2282839435903056, "learning_rate": 4.709389926454237e-05, "loss": 0.4971, "step": 5817 }, { "epoch": 1.6283235376434368, "grad_norm": 0.2225925443350299, "learning_rate": 4.7078499806949685e-05, "loss": 0.4995, "step": 5818 }, { "epoch": 1.6286034144976211, "grad_norm": 0.22744894693790926, "learning_rate": 4.706310062742702e-05, "loss": 0.4741, "step": 5819 }, { "epoch": 1.6288832913518052, "grad_norm": 0.22262399579480868, "learning_rate": 4.704770172744008e-05, "loss": 0.4994, "step": 5820 }, { "epoch": 1.6291631682059893, "grad_norm": 0.23047125974994434, "learning_rate": 4.703230310845454e-05, "loss": 0.5055, "step": 5821 }, { "epoch": 1.6294430450601736, "grad_norm": 0.2308635977951529, "learning_rate": 4.7016904771936054e-05, "loss": 0.5175, "step": 5822 }, { "epoch": 1.6297229219143576, "grad_norm": 0.23588138138650422, "learning_rate": 4.700150671935024e-05, "loss": 0.4896, "step": 5823 }, { "epoch": 1.6300027987685417, "grad_norm": 0.2315473408984742, "learning_rate": 4.6986108952162695e-05, "loss": 0.496, "step": 5824 }, { "epoch": 1.630282675622726, "grad_norm": 0.22932480749325715, "learning_rate": 4.697071147183899e-05, "loss": 0.5087, "step": 5825 }, { "epoch": 1.6305625524769103, "grad_norm": 0.2321368033950167, "learning_rate": 4.695531427984466e-05, "loss": 0.4858, "step": 5826 }, { "epoch": 1.6308424293310944, "grad_norm": 0.2220572018861899, "learning_rate": 4.693991737764521e-05, "loss": 0.466, "step": 5827 }, { "epoch": 1.6311223061852784, "grad_norm": 0.22144207840823874, "learning_rate": 4.692452076670617e-05, "loss": 0.4942, "step": 5828 }, { "epoch": 1.6314021830394627, "grad_norm": 0.2241728442411565, "learning_rate": 4.690912444849294e-05, "loss": 0.5083, "step": 5829 }, { "epoch": 1.6316820598936468, "grad_norm": 0.2238142220763365, "learning_rate": 4.6893728424470976e-05, "loss": 0.4913, "step": 5830 }, { "epoch": 1.6319619367478309, "grad_norm": 0.21963971541187846, "learning_rate": 4.6878332696105685e-05, "loss": 0.5038, "step": 5831 }, { "epoch": 1.6322418136020151, "grad_norm": 0.22034137768128578, "learning_rate": 4.6862937264862435e-05, "loss": 0.4751, "step": 5832 }, { "epoch": 1.6325216904561994, "grad_norm": 0.23384429528325093, "learning_rate": 4.6847542132206566e-05, "loss": 0.4861, "step": 5833 }, { "epoch": 1.6328015673103833, "grad_norm": 0.23162851689482852, "learning_rate": 4.683214729960339e-05, "loss": 0.4917, "step": 5834 }, { "epoch": 1.6330814441645676, "grad_norm": 0.2205854299944986, "learning_rate": 4.681675276851822e-05, "loss": 0.4724, "step": 5835 }, { "epoch": 1.6333613210187519, "grad_norm": 0.22251589089958843, "learning_rate": 4.6801358540416304e-05, "loss": 0.455, "step": 5836 }, { "epoch": 1.633641197872936, "grad_norm": 0.23482068124776187, "learning_rate": 4.678596461676288e-05, "loss": 0.4969, "step": 5837 }, { "epoch": 1.63392107472712, "grad_norm": 0.23365477138352836, "learning_rate": 4.677057099902313e-05, "loss": 0.5035, "step": 5838 }, { "epoch": 1.6342009515813043, "grad_norm": 0.2335303523267849, "learning_rate": 4.675517768866224e-05, "loss": 0.4858, "step": 5839 }, { "epoch": 1.6344808284354884, "grad_norm": 0.23195225972423889, "learning_rate": 4.673978468714537e-05, "loss": 0.4971, "step": 5840 }, { "epoch": 1.6347607052896724, "grad_norm": 0.21817598108023795, "learning_rate": 4.6724391995937604e-05, "loss": 0.4643, "step": 5841 }, { "epoch": 1.6350405821438567, "grad_norm": 0.21250469154648938, "learning_rate": 4.670899961650405e-05, "loss": 0.4755, "step": 5842 }, { "epoch": 1.635320458998041, "grad_norm": 0.23772420948105108, "learning_rate": 4.6693607550309746e-05, "loss": 0.496, "step": 5843 }, { "epoch": 1.6356003358522249, "grad_norm": 0.23044886190912658, "learning_rate": 4.667821579881973e-05, "loss": 0.4909, "step": 5844 }, { "epoch": 1.6358802127064092, "grad_norm": 0.2286914092546674, "learning_rate": 4.666282436349898e-05, "loss": 0.4789, "step": 5845 }, { "epoch": 1.6361600895605934, "grad_norm": 0.233776104459152, "learning_rate": 4.664743324581251e-05, "loss": 0.463, "step": 5846 }, { "epoch": 1.6364399664147775, "grad_norm": 0.2197032189289928, "learning_rate": 4.66320424472252e-05, "loss": 0.5025, "step": 5847 }, { "epoch": 1.6367198432689616, "grad_norm": 0.22844563758750308, "learning_rate": 4.661665196920197e-05, "loss": 0.486, "step": 5848 }, { "epoch": 1.6369997201231459, "grad_norm": 0.23659771077719677, "learning_rate": 4.66012618132077e-05, "loss": 0.5113, "step": 5849 }, { "epoch": 1.63727959697733, "grad_norm": 0.21783685094359637, "learning_rate": 4.658587198070723e-05, "loss": 0.5006, "step": 5850 }, { "epoch": 1.637559473831514, "grad_norm": 0.22521610038427864, "learning_rate": 4.657048247316538e-05, "loss": 0.4972, "step": 5851 }, { "epoch": 1.6378393506856983, "grad_norm": 0.23846270943962297, "learning_rate": 4.655509329204692e-05, "loss": 0.5103, "step": 5852 }, { "epoch": 1.6381192275398826, "grad_norm": 0.23309715924226423, "learning_rate": 4.653970443881662e-05, "loss": 0.5049, "step": 5853 }, { "epoch": 1.6383991043940667, "grad_norm": 0.22791721167712203, "learning_rate": 4.6524315914939184e-05, "loss": 0.4753, "step": 5854 }, { "epoch": 1.6386789812482507, "grad_norm": 0.21530879095648967, "learning_rate": 4.6508927721879315e-05, "loss": 0.4819, "step": 5855 }, { "epoch": 1.638958858102435, "grad_norm": 0.21904974706164682, "learning_rate": 4.649353986110165e-05, "loss": 0.4931, "step": 5856 }, { "epoch": 1.639238734956619, "grad_norm": 0.21927640189226147, "learning_rate": 4.6478152334070825e-05, "loss": 0.4786, "step": 5857 }, { "epoch": 1.6395186118108032, "grad_norm": 0.22423225093334115, "learning_rate": 4.646276514225143e-05, "loss": 0.4932, "step": 5858 }, { "epoch": 1.6397984886649875, "grad_norm": 0.23389077452977902, "learning_rate": 4.644737828710803e-05, "loss": 0.4889, "step": 5859 }, { "epoch": 1.6400783655191715, "grad_norm": 0.22798625696329272, "learning_rate": 4.643199177010515e-05, "loss": 0.5063, "step": 5860 }, { "epoch": 1.6403582423733556, "grad_norm": 0.22614433623546817, "learning_rate": 4.64166055927073e-05, "loss": 0.4863, "step": 5861 }, { "epoch": 1.6406381192275399, "grad_norm": 0.24011361772060963, "learning_rate": 4.640121975637892e-05, "loss": 0.4931, "step": 5862 }, { "epoch": 1.6409179960817242, "grad_norm": 0.22126510942477237, "learning_rate": 4.638583426258447e-05, "loss": 0.4858, "step": 5863 }, { "epoch": 1.6411978729359082, "grad_norm": 0.2303787783755845, "learning_rate": 4.637044911278835e-05, "loss": 0.4924, "step": 5864 }, { "epoch": 1.6414777497900923, "grad_norm": 0.2291733480745904, "learning_rate": 4.6355064308454896e-05, "loss": 0.4915, "step": 5865 }, { "epoch": 1.6417576266442766, "grad_norm": 0.22438927776433693, "learning_rate": 4.633967985104847e-05, "loss": 0.4928, "step": 5866 }, { "epoch": 1.6420375034984607, "grad_norm": 0.22222514556871162, "learning_rate": 4.632429574203337e-05, "loss": 0.4942, "step": 5867 }, { "epoch": 1.6423173803526447, "grad_norm": 0.23048259448382782, "learning_rate": 4.6308911982873827e-05, "loss": 0.5152, "step": 5868 }, { "epoch": 1.642597257206829, "grad_norm": 0.23404076649464106, "learning_rate": 4.629352857503413e-05, "loss": 0.4885, "step": 5869 }, { "epoch": 1.642877134061013, "grad_norm": 0.22963516782425958, "learning_rate": 4.627814551997845e-05, "loss": 0.4869, "step": 5870 }, { "epoch": 1.6431570109151972, "grad_norm": 0.23071543276779807, "learning_rate": 4.626276281917098e-05, "loss": 0.4776, "step": 5871 }, { "epoch": 1.6434368877693815, "grad_norm": 0.22203563859459977, "learning_rate": 4.624738047407582e-05, "loss": 0.4821, "step": 5872 }, { "epoch": 1.6437167646235658, "grad_norm": 0.24155444106265953, "learning_rate": 4.6231998486157096e-05, "loss": 0.5049, "step": 5873 }, { "epoch": 1.6439966414777498, "grad_norm": 0.22196095420075007, "learning_rate": 4.621661685687886e-05, "loss": 0.47, "step": 5874 }, { "epoch": 1.644276518331934, "grad_norm": 0.21753029932178544, "learning_rate": 4.6201235587705154e-05, "loss": 0.4843, "step": 5875 }, { "epoch": 1.6445563951861182, "grad_norm": 0.2201254291133512, "learning_rate": 4.6185854680099974e-05, "loss": 0.4876, "step": 5876 }, { "epoch": 1.6448362720403022, "grad_norm": 0.23444558673827312, "learning_rate": 4.617047413552727e-05, "loss": 0.5071, "step": 5877 }, { "epoch": 1.6451161488944863, "grad_norm": 0.23655398524443977, "learning_rate": 4.6155093955450985e-05, "loss": 0.4718, "step": 5878 }, { "epoch": 1.6453960257486706, "grad_norm": 0.22630376536241545, "learning_rate": 4.6139714141335e-05, "loss": 0.4784, "step": 5879 }, { "epoch": 1.645675902602855, "grad_norm": 0.22067053937881898, "learning_rate": 4.61243346946432e-05, "loss": 0.4799, "step": 5880 }, { "epoch": 1.6459557794570387, "grad_norm": 0.23505338311486845, "learning_rate": 4.610895561683938e-05, "loss": 0.4818, "step": 5881 }, { "epoch": 1.646235656311223, "grad_norm": 0.22452848634660116, "learning_rate": 4.6093576909387376e-05, "loss": 0.475, "step": 5882 }, { "epoch": 1.6465155331654073, "grad_norm": 0.23065555689457484, "learning_rate": 4.607819857375088e-05, "loss": 0.5123, "step": 5883 }, { "epoch": 1.6467954100195914, "grad_norm": 0.219445416008085, "learning_rate": 4.606282061139364e-05, "loss": 0.4816, "step": 5884 }, { "epoch": 1.6470752868737755, "grad_norm": 0.22709438830454265, "learning_rate": 4.604744302377933e-05, "loss": 0.5004, "step": 5885 }, { "epoch": 1.6473551637279598, "grad_norm": 0.2355941425384972, "learning_rate": 4.6032065812371614e-05, "loss": 0.4682, "step": 5886 }, { "epoch": 1.6476350405821438, "grad_norm": 0.22510368357791385, "learning_rate": 4.6016688978634095e-05, "loss": 0.506, "step": 5887 }, { "epoch": 1.647914917436328, "grad_norm": 0.2258353823731744, "learning_rate": 4.600131252403035e-05, "loss": 0.4832, "step": 5888 }, { "epoch": 1.6481947942905122, "grad_norm": 0.22288426387673418, "learning_rate": 4.598593645002392e-05, "loss": 0.481, "step": 5889 }, { "epoch": 1.6484746711446965, "grad_norm": 0.22088939892550016, "learning_rate": 4.597056075807829e-05, "loss": 0.4951, "step": 5890 }, { "epoch": 1.6487545479988803, "grad_norm": 0.23297039859727472, "learning_rate": 4.5955185449656956e-05, "loss": 0.4725, "step": 5891 }, { "epoch": 1.6490344248530646, "grad_norm": 0.22609904076997075, "learning_rate": 4.5939810526223336e-05, "loss": 0.5165, "step": 5892 }, { "epoch": 1.649314301707249, "grad_norm": 0.21915577383921972, "learning_rate": 4.5924435989240813e-05, "loss": 0.5098, "step": 5893 }, { "epoch": 1.649594178561433, "grad_norm": 0.2169439502194135, "learning_rate": 4.5909061840172764e-05, "loss": 0.4764, "step": 5894 }, { "epoch": 1.649874055415617, "grad_norm": 0.22813028526847892, "learning_rate": 4.5893688080482494e-05, "loss": 0.4878, "step": 5895 }, { "epoch": 1.6501539322698013, "grad_norm": 0.22952508110132797, "learning_rate": 4.587831471163328e-05, "loss": 0.4893, "step": 5896 }, { "epoch": 1.6504338091239854, "grad_norm": 0.21842203635432386, "learning_rate": 4.586294173508839e-05, "loss": 0.4738, "step": 5897 }, { "epoch": 1.6507136859781695, "grad_norm": 0.22715534977074694, "learning_rate": 4.5847569152311025e-05, "loss": 0.4734, "step": 5898 }, { "epoch": 1.6509935628323538, "grad_norm": 0.23395063346934178, "learning_rate": 4.5832196964764354e-05, "loss": 0.475, "step": 5899 }, { "epoch": 1.651273439686538, "grad_norm": 0.22177215733046468, "learning_rate": 4.5816825173911524e-05, "loss": 0.4698, "step": 5900 }, { "epoch": 1.6515533165407221, "grad_norm": 0.2355590874464105, "learning_rate": 4.580145378121559e-05, "loss": 0.4928, "step": 5901 }, { "epoch": 1.6518331933949062, "grad_norm": 0.23261071282838638, "learning_rate": 4.578608278813964e-05, "loss": 0.5261, "step": 5902 }, { "epoch": 1.6521130702490905, "grad_norm": 0.21952656709338766, "learning_rate": 4.577071219614668e-05, "loss": 0.4746, "step": 5903 }, { "epoch": 1.6523929471032746, "grad_norm": 0.2324125419181912, "learning_rate": 4.5755342006699706e-05, "loss": 0.5151, "step": 5904 }, { "epoch": 1.6526728239574586, "grad_norm": 0.22493144406925525, "learning_rate": 4.5739972221261664e-05, "loss": 0.4883, "step": 5905 }, { "epoch": 1.652952700811643, "grad_norm": 0.23110646357607306, "learning_rate": 4.572460284129544e-05, "loss": 0.4691, "step": 5906 }, { "epoch": 1.653232577665827, "grad_norm": 0.22352867732678683, "learning_rate": 4.5709233868263926e-05, "loss": 0.4825, "step": 5907 }, { "epoch": 1.653512454520011, "grad_norm": 0.22669544003950048, "learning_rate": 4.569386530362992e-05, "loss": 0.4984, "step": 5908 }, { "epoch": 1.6537923313741953, "grad_norm": 0.21539919627219203, "learning_rate": 4.567849714885623e-05, "loss": 0.4809, "step": 5909 }, { "epoch": 1.6540722082283796, "grad_norm": 0.22892231206163438, "learning_rate": 4.5663129405405594e-05, "loss": 0.4964, "step": 5910 }, { "epoch": 1.6543520850825637, "grad_norm": 0.22588310995304536, "learning_rate": 4.5647762074740733e-05, "loss": 0.4896, "step": 5911 }, { "epoch": 1.6546319619367478, "grad_norm": 0.22702448418150784, "learning_rate": 4.563239515832432e-05, "loss": 0.4736, "step": 5912 }, { "epoch": 1.654911838790932, "grad_norm": 0.21587541396299906, "learning_rate": 4.561702865761897e-05, "loss": 0.4635, "step": 5913 }, { "epoch": 1.6551917156451161, "grad_norm": 0.21924899188902525, "learning_rate": 4.560166257408728e-05, "loss": 0.4737, "step": 5914 }, { "epoch": 1.6554715924993002, "grad_norm": 0.224600179807053, "learning_rate": 4.558629690919182e-05, "loss": 0.4944, "step": 5915 }, { "epoch": 1.6557514693534845, "grad_norm": 0.23126227922062179, "learning_rate": 4.5570931664395086e-05, "loss": 0.4833, "step": 5916 }, { "epoch": 1.6560313462076688, "grad_norm": 0.24263733339694077, "learning_rate": 4.555556684115956e-05, "loss": 0.4934, "step": 5917 }, { "epoch": 1.6563112230618526, "grad_norm": 0.22776045189029373, "learning_rate": 4.5540202440947694e-05, "loss": 0.4959, "step": 5918 }, { "epoch": 1.656591099916037, "grad_norm": 0.23660381641151323, "learning_rate": 4.5524838465221834e-05, "loss": 0.5021, "step": 5919 }, { "epoch": 1.6568709767702212, "grad_norm": 0.23757745635885563, "learning_rate": 4.550947491544433e-05, "loss": 0.4886, "step": 5920 }, { "epoch": 1.6571508536244053, "grad_norm": 0.2308209439062421, "learning_rate": 4.5494111793077544e-05, "loss": 0.5043, "step": 5921 }, { "epoch": 1.6574307304785894, "grad_norm": 0.23364151807682357, "learning_rate": 4.5478749099583715e-05, "loss": 0.4823, "step": 5922 }, { "epoch": 1.6577106073327736, "grad_norm": 0.214234033539558, "learning_rate": 4.546338683642507e-05, "loss": 0.4854, "step": 5923 }, { "epoch": 1.6579904841869577, "grad_norm": 0.23334496890303244, "learning_rate": 4.544802500506381e-05, "loss": 0.5016, "step": 5924 }, { "epoch": 1.6582703610411418, "grad_norm": 0.2211220584306918, "learning_rate": 4.543266360696208e-05, "loss": 0.4759, "step": 5925 }, { "epoch": 1.658550237895326, "grad_norm": 0.22684648721566092, "learning_rate": 4.5417302643581985e-05, "loss": 0.4795, "step": 5926 }, { "epoch": 1.6588301147495104, "grad_norm": 0.2245276472669048, "learning_rate": 4.5401942116385584e-05, "loss": 0.4801, "step": 5927 }, { "epoch": 1.6591099916036942, "grad_norm": 0.22804224758993163, "learning_rate": 4.5386582026834906e-05, "loss": 0.4978, "step": 5928 }, { "epoch": 1.6593898684578785, "grad_norm": 0.22697568082009562, "learning_rate": 4.5371222376391935e-05, "loss": 0.4914, "step": 5929 }, { "epoch": 1.6596697453120628, "grad_norm": 0.23634017328640117, "learning_rate": 4.5355863166518616e-05, "loss": 0.475, "step": 5930 }, { "epoch": 1.6599496221662469, "grad_norm": 0.22538460152394849, "learning_rate": 4.534050439867682e-05, "loss": 0.4762, "step": 5931 }, { "epoch": 1.660229499020431, "grad_norm": 0.21779274669211907, "learning_rate": 4.532514607432843e-05, "loss": 0.4813, "step": 5932 }, { "epoch": 1.6605093758746152, "grad_norm": 0.22294375761213864, "learning_rate": 4.5309788194935266e-05, "loss": 0.469, "step": 5933 }, { "epoch": 1.6607892527287993, "grad_norm": 0.21893675036246654, "learning_rate": 4.5294430761959086e-05, "loss": 0.4788, "step": 5934 }, { "epoch": 1.6610691295829834, "grad_norm": 0.2334628749230677, "learning_rate": 4.527907377686161e-05, "loss": 0.4925, "step": 5935 }, { "epoch": 1.6613490064371677, "grad_norm": 0.23360420114826788, "learning_rate": 4.5263717241104566e-05, "loss": 0.4958, "step": 5936 }, { "epoch": 1.661628883291352, "grad_norm": 0.2263676043324893, "learning_rate": 4.5248361156149526e-05, "loss": 0.4867, "step": 5937 }, { "epoch": 1.661908760145536, "grad_norm": 0.2330537673372661, "learning_rate": 4.523300552345814e-05, "loss": 0.4953, "step": 5938 }, { "epoch": 1.66218863699972, "grad_norm": 0.23512582798314416, "learning_rate": 4.521765034449197e-05, "loss": 0.4913, "step": 5939 }, { "epoch": 1.6624685138539044, "grad_norm": 0.22191100656784873, "learning_rate": 4.5202295620712505e-05, "loss": 0.4736, "step": 5940 }, { "epoch": 1.6627483907080884, "grad_norm": 0.22857678048063068, "learning_rate": 4.518694135358123e-05, "loss": 0.4974, "step": 5941 }, { "epoch": 1.6630282675622725, "grad_norm": 0.23791749099365347, "learning_rate": 4.517158754455957e-05, "loss": 0.4997, "step": 5942 }, { "epoch": 1.6633081444164568, "grad_norm": 0.22313267681411098, "learning_rate": 4.5156234195108916e-05, "loss": 0.4862, "step": 5943 }, { "epoch": 1.6635880212706409, "grad_norm": 0.2280697590093085, "learning_rate": 4.5140881306690594e-05, "loss": 0.4988, "step": 5944 }, { "epoch": 1.663867898124825, "grad_norm": 0.22302134670852036, "learning_rate": 4.5125528880765916e-05, "loss": 0.5072, "step": 5945 }, { "epoch": 1.6641477749790092, "grad_norm": 0.23689990892063556, "learning_rate": 4.511017691879613e-05, "loss": 0.4952, "step": 5946 }, { "epoch": 1.6644276518331935, "grad_norm": 0.2377504636023883, "learning_rate": 4.509482542224243e-05, "loss": 0.4788, "step": 5947 }, { "epoch": 1.6647075286873776, "grad_norm": 0.2242851228251371, "learning_rate": 4.507947439256599e-05, "loss": 0.4843, "step": 5948 }, { "epoch": 1.6649874055415617, "grad_norm": 0.23313302147950857, "learning_rate": 4.506412383122794e-05, "loss": 0.4704, "step": 5949 }, { "epoch": 1.665267282395746, "grad_norm": 0.23475206775431098, "learning_rate": 4.504877373968935e-05, "loss": 0.489, "step": 5950 }, { "epoch": 1.66554715924993, "grad_norm": 0.24143362398417503, "learning_rate": 4.503342411941124e-05, "loss": 0.5068, "step": 5951 }, { "epoch": 1.665827036104114, "grad_norm": 0.21870969193252662, "learning_rate": 4.5018074971854606e-05, "loss": 0.4911, "step": 5952 }, { "epoch": 1.6661069129582984, "grad_norm": 0.2267434263891294, "learning_rate": 4.5002726298480383e-05, "loss": 0.499, "step": 5953 }, { "epoch": 1.6663867898124827, "grad_norm": 0.21980272024638106, "learning_rate": 4.4987378100749475e-05, "loss": 0.4954, "step": 5954 }, { "epoch": 1.6666666666666665, "grad_norm": 0.21489239501568574, "learning_rate": 4.497203038012272e-05, "loss": 0.4776, "step": 5955 }, { "epoch": 1.6669465435208508, "grad_norm": 0.22328585712202512, "learning_rate": 4.495668313806093e-05, "loss": 0.4715, "step": 5956 }, { "epoch": 1.667226420375035, "grad_norm": 0.22063427316487816, "learning_rate": 4.494133637602487e-05, "loss": 0.4998, "step": 5957 }, { "epoch": 1.6675062972292192, "grad_norm": 0.2247994619535898, "learning_rate": 4.492599009547523e-05, "loss": 0.4851, "step": 5958 }, { "epoch": 1.6677861740834032, "grad_norm": 0.23070369528670504, "learning_rate": 4.491064429787269e-05, "loss": 0.4945, "step": 5959 }, { "epoch": 1.6680660509375875, "grad_norm": 0.22756132306061724, "learning_rate": 4.4895298984677886e-05, "loss": 0.4875, "step": 5960 }, { "epoch": 1.6683459277917716, "grad_norm": 0.21784003112082723, "learning_rate": 4.487995415735138e-05, "loss": 0.4794, "step": 5961 }, { "epoch": 1.6686258046459557, "grad_norm": 0.2203660024385122, "learning_rate": 4.48646098173537e-05, "loss": 0.4654, "step": 5962 }, { "epoch": 1.66890568150014, "grad_norm": 0.22296256835570522, "learning_rate": 4.484926596614533e-05, "loss": 0.474, "step": 5963 }, { "epoch": 1.6691855583543243, "grad_norm": 0.224512619034478, "learning_rate": 4.483392260518671e-05, "loss": 0.5039, "step": 5964 }, { "epoch": 1.669465435208508, "grad_norm": 0.2293958680434901, "learning_rate": 4.4818579735938234e-05, "loss": 0.5024, "step": 5965 }, { "epoch": 1.6697453120626924, "grad_norm": 0.2156340768395178, "learning_rate": 4.480323735986022e-05, "loss": 0.4879, "step": 5966 }, { "epoch": 1.6700251889168767, "grad_norm": 0.2200493844929716, "learning_rate": 4.4787895478412996e-05, "loss": 0.5048, "step": 5967 }, { "epoch": 1.6703050657710607, "grad_norm": 0.2283927289000887, "learning_rate": 4.4772554093056794e-05, "loss": 0.4835, "step": 5968 }, { "epoch": 1.6705849426252448, "grad_norm": 0.22184732269432453, "learning_rate": 4.4757213205251826e-05, "loss": 0.4973, "step": 5969 }, { "epoch": 1.670864819479429, "grad_norm": 0.2323183695512758, "learning_rate": 4.474187281645824e-05, "loss": 0.4947, "step": 5970 }, { "epoch": 1.6711446963336132, "grad_norm": 0.22148138120751784, "learning_rate": 4.472653292813614e-05, "loss": 0.4751, "step": 5971 }, { "epoch": 1.6714245731877972, "grad_norm": 0.2307676613015895, "learning_rate": 4.4711193541745584e-05, "loss": 0.4809, "step": 5972 }, { "epoch": 1.6717044500419815, "grad_norm": 0.2151710300959927, "learning_rate": 4.46958546587466e-05, "loss": 0.4666, "step": 5973 }, { "epoch": 1.6719843268961658, "grad_norm": 0.23775857612586254, "learning_rate": 4.4680516280599136e-05, "loss": 0.5043, "step": 5974 }, { "epoch": 1.67226420375035, "grad_norm": 0.23148145914124751, "learning_rate": 4.46651784087631e-05, "loss": 0.5176, "step": 5975 }, { "epoch": 1.672544080604534, "grad_norm": 0.23257994507830823, "learning_rate": 4.464984104469837e-05, "loss": 0.5227, "step": 5976 }, { "epoch": 1.6728239574587183, "grad_norm": 0.23169417162029926, "learning_rate": 4.4634504189864765e-05, "loss": 0.4867, "step": 5977 }, { "epoch": 1.6731038343129023, "grad_norm": 0.2202409752353094, "learning_rate": 4.4619167845722056e-05, "loss": 0.4904, "step": 5978 }, { "epoch": 1.6733837111670864, "grad_norm": 0.2324491677944803, "learning_rate": 4.4603832013729966e-05, "loss": 0.507, "step": 5979 }, { "epoch": 1.6736635880212707, "grad_norm": 0.22796481642585026, "learning_rate": 4.4588496695348155e-05, "loss": 0.4838, "step": 5980 }, { "epoch": 1.6739434648754548, "grad_norm": 0.22547180308225825, "learning_rate": 4.4573161892036264e-05, "loss": 0.4839, "step": 5981 }, { "epoch": 1.6742233417296388, "grad_norm": 0.22324709385121597, "learning_rate": 4.455782760525386e-05, "loss": 0.4676, "step": 5982 }, { "epoch": 1.6745032185838231, "grad_norm": 0.21827408215289565, "learning_rate": 4.454249383646046e-05, "loss": 0.4838, "step": 5983 }, { "epoch": 1.6747830954380074, "grad_norm": 0.2178667398543891, "learning_rate": 4.452716058711557e-05, "loss": 0.4762, "step": 5984 }, { "epoch": 1.6750629722921915, "grad_norm": 0.21918654796609172, "learning_rate": 4.451182785867859e-05, "loss": 0.5008, "step": 5985 }, { "epoch": 1.6753428491463755, "grad_norm": 0.22649588054297143, "learning_rate": 4.4496495652608904e-05, "loss": 0.4837, "step": 5986 }, { "epoch": 1.6756227260005598, "grad_norm": 0.23231101716757543, "learning_rate": 4.4481163970365846e-05, "loss": 0.4907, "step": 5987 }, { "epoch": 1.675902602854744, "grad_norm": 0.22660506943019057, "learning_rate": 4.446583281340869e-05, "loss": 0.4705, "step": 5988 }, { "epoch": 1.676182479708928, "grad_norm": 0.24184240414493793, "learning_rate": 4.445050218319667e-05, "loss": 0.4793, "step": 5989 }, { "epoch": 1.6764623565631123, "grad_norm": 0.22491414842907215, "learning_rate": 4.443517208118895e-05, "loss": 0.4876, "step": 5990 }, { "epoch": 1.6767422334172963, "grad_norm": 0.23170841467206285, "learning_rate": 4.441984250884468e-05, "loss": 0.4831, "step": 5991 }, { "epoch": 1.6770221102714804, "grad_norm": 0.22880432470515008, "learning_rate": 4.440451346762292e-05, "loss": 0.4973, "step": 5992 }, { "epoch": 1.6773019871256647, "grad_norm": 0.228564804999199, "learning_rate": 4.438918495898269e-05, "loss": 0.4643, "step": 5993 }, { "epoch": 1.677581863979849, "grad_norm": 0.2332438590298918, "learning_rate": 4.4373856984382984e-05, "loss": 0.5096, "step": 5994 }, { "epoch": 1.677861740834033, "grad_norm": 0.2249740894865392, "learning_rate": 4.435852954528271e-05, "loss": 0.5285, "step": 5995 }, { "epoch": 1.6781416176882171, "grad_norm": 0.23589221551388054, "learning_rate": 4.434320264314076e-05, "loss": 0.4954, "step": 5996 }, { "epoch": 1.6784214945424014, "grad_norm": 0.22070208814414577, "learning_rate": 4.432787627941594e-05, "loss": 0.4601, "step": 5997 }, { "epoch": 1.6787013713965855, "grad_norm": 0.22806956980514126, "learning_rate": 4.431255045556704e-05, "loss": 0.4991, "step": 5998 }, { "epoch": 1.6789812482507696, "grad_norm": 0.22335521181772075, "learning_rate": 4.429722517305276e-05, "loss": 0.4932, "step": 5999 }, { "epoch": 1.6792611251049538, "grad_norm": 0.2201566645866044, "learning_rate": 4.428190043333178e-05, "loss": 0.4876, "step": 6000 }, { "epoch": 1.6795410019591381, "grad_norm": 0.21884538054303299, "learning_rate": 4.426657623786272e-05, "loss": 0.4914, "step": 6001 }, { "epoch": 1.679820878813322, "grad_norm": 0.23234277992764904, "learning_rate": 4.4251252588104153e-05, "loss": 0.5001, "step": 6002 }, { "epoch": 1.6801007556675063, "grad_norm": 0.2218292395366639, "learning_rate": 4.4235929485514576e-05, "loss": 0.4644, "step": 6003 }, { "epoch": 1.6803806325216906, "grad_norm": 0.21673290973954457, "learning_rate": 4.4220606931552454e-05, "loss": 0.4808, "step": 6004 }, { "epoch": 1.6806605093758746, "grad_norm": 0.2374249951356412, "learning_rate": 4.420528492767621e-05, "loss": 0.4766, "step": 6005 }, { "epoch": 1.6809403862300587, "grad_norm": 0.2150067457956711, "learning_rate": 4.418996347534418e-05, "loss": 0.4516, "step": 6006 }, { "epoch": 1.681220263084243, "grad_norm": 0.22367424033055425, "learning_rate": 4.4174642576014677e-05, "loss": 0.5006, "step": 6007 }, { "epoch": 1.681500139938427, "grad_norm": 0.2277806770007238, "learning_rate": 4.4159322231145956e-05, "loss": 0.4803, "step": 6008 }, { "epoch": 1.6817800167926111, "grad_norm": 0.2209137338224397, "learning_rate": 4.414400244219623e-05, "loss": 0.4828, "step": 6009 }, { "epoch": 1.6820598936467954, "grad_norm": 0.2269520107756094, "learning_rate": 4.412868321062361e-05, "loss": 0.4935, "step": 6010 }, { "epoch": 1.6823397705009797, "grad_norm": 0.21970491392431596, "learning_rate": 4.4113364537886215e-05, "loss": 0.4775, "step": 6011 }, { "epoch": 1.6826196473551638, "grad_norm": 0.23179244760445664, "learning_rate": 4.409804642544208e-05, "loss": 0.4977, "step": 6012 }, { "epoch": 1.6828995242093479, "grad_norm": 0.23282758041738078, "learning_rate": 4.408272887474919e-05, "loss": 0.5092, "step": 6013 }, { "epoch": 1.6831794010635321, "grad_norm": 0.22119181773137908, "learning_rate": 4.406741188726547e-05, "loss": 0.4692, "step": 6014 }, { "epoch": 1.6834592779177162, "grad_norm": 0.23504555619711215, "learning_rate": 4.405209546444881e-05, "loss": 0.4934, "step": 6015 }, { "epoch": 1.6837391547719003, "grad_norm": 0.2258657833541981, "learning_rate": 4.403677960775704e-05, "loss": 0.4939, "step": 6016 }, { "epoch": 1.6840190316260846, "grad_norm": 0.22764896633116333, "learning_rate": 4.402146431864791e-05, "loss": 0.4862, "step": 6017 }, { "epoch": 1.6842989084802686, "grad_norm": 0.2221145229758645, "learning_rate": 4.4006149598579155e-05, "loss": 0.509, "step": 6018 }, { "epoch": 1.6845787853344527, "grad_norm": 0.22262471785163387, "learning_rate": 4.399083544900845e-05, "loss": 0.4824, "step": 6019 }, { "epoch": 1.684858662188637, "grad_norm": 0.22314588114794257, "learning_rate": 4.3975521871393374e-05, "loss": 0.4692, "step": 6020 }, { "epoch": 1.6851385390428213, "grad_norm": 0.23523248038800929, "learning_rate": 4.396020886719151e-05, "loss": 0.5041, "step": 6021 }, { "epoch": 1.6854184158970054, "grad_norm": 0.23084253004983865, "learning_rate": 4.394489643786034e-05, "loss": 0.4845, "step": 6022 }, { "epoch": 1.6856982927511894, "grad_norm": 0.23477951351420004, "learning_rate": 4.392958458485733e-05, "loss": 0.5015, "step": 6023 }, { "epoch": 1.6859781696053737, "grad_norm": 0.23127872384997272, "learning_rate": 4.391427330963984e-05, "loss": 0.4925, "step": 6024 }, { "epoch": 1.6862580464595578, "grad_norm": 0.22680649351589788, "learning_rate": 4.389896261366523e-05, "loss": 0.4778, "step": 6025 }, { "epoch": 1.6865379233137419, "grad_norm": 0.2325083981656526, "learning_rate": 4.388365249839077e-05, "loss": 0.489, "step": 6026 }, { "epoch": 1.6868178001679262, "grad_norm": 0.2327805304002526, "learning_rate": 4.386834296527371e-05, "loss": 0.4944, "step": 6027 }, { "epoch": 1.6870976770221102, "grad_norm": 0.2239983250870412, "learning_rate": 4.385303401577118e-05, "loss": 0.4778, "step": 6028 }, { "epoch": 1.6873775538762943, "grad_norm": 0.2321190798666152, "learning_rate": 4.3837725651340314e-05, "loss": 0.4714, "step": 6029 }, { "epoch": 1.6876574307304786, "grad_norm": 0.2512752993266075, "learning_rate": 4.3822417873438165e-05, "loss": 0.4904, "step": 6030 }, { "epoch": 1.6879373075846629, "grad_norm": 0.2334635714913292, "learning_rate": 4.3807110683521744e-05, "loss": 0.4754, "step": 6031 }, { "epoch": 1.688217184438847, "grad_norm": 0.2374893210946379, "learning_rate": 4.3791804083047997e-05, "loss": 0.5237, "step": 6032 }, { "epoch": 1.688497061293031, "grad_norm": 0.2308533756499172, "learning_rate": 4.377649807347381e-05, "loss": 0.4938, "step": 6033 }, { "epoch": 1.6887769381472153, "grad_norm": 0.23803728355173645, "learning_rate": 4.376119265625602e-05, "loss": 0.4918, "step": 6034 }, { "epoch": 1.6890568150013994, "grad_norm": 0.23214402677120147, "learning_rate": 4.3745887832851395e-05, "loss": 0.4882, "step": 6035 }, { "epoch": 1.6893366918555834, "grad_norm": 0.2145404758363729, "learning_rate": 4.373058360471668e-05, "loss": 0.4714, "step": 6036 }, { "epoch": 1.6896165687097677, "grad_norm": 0.22197510657771885, "learning_rate": 4.371527997330852e-05, "loss": 0.4564, "step": 6037 }, { "epoch": 1.689896445563952, "grad_norm": 0.2342687179885491, "learning_rate": 4.3699976940083534e-05, "loss": 0.487, "step": 6038 }, { "epoch": 1.6901763224181359, "grad_norm": 0.22799044299008367, "learning_rate": 4.3684674506498266e-05, "loss": 0.4946, "step": 6039 }, { "epoch": 1.6904561992723202, "grad_norm": 0.23406817373569913, "learning_rate": 4.366937267400922e-05, "loss": 0.5055, "step": 6040 }, { "epoch": 1.6907360761265045, "grad_norm": 0.22671184495415245, "learning_rate": 4.365407144407282e-05, "loss": 0.4862, "step": 6041 }, { "epoch": 1.6910159529806885, "grad_norm": 0.23380234311764664, "learning_rate": 4.3638770818145455e-05, "loss": 0.457, "step": 6042 }, { "epoch": 1.6912958298348726, "grad_norm": 0.23079266529705078, "learning_rate": 4.3623470797683444e-05, "loss": 0.472, "step": 6043 }, { "epoch": 1.6915757066890569, "grad_norm": 0.23830170302766132, "learning_rate": 4.360817138414305e-05, "loss": 0.4936, "step": 6044 }, { "epoch": 1.691855583543241, "grad_norm": 0.23149585393763558, "learning_rate": 4.359287257898049e-05, "loss": 0.4891, "step": 6045 }, { "epoch": 1.692135460397425, "grad_norm": 0.22228461204854483, "learning_rate": 4.35775743836519e-05, "loss": 0.4601, "step": 6046 }, { "epoch": 1.6924153372516093, "grad_norm": 0.23799397030513383, "learning_rate": 4.356227679961337e-05, "loss": 0.4945, "step": 6047 }, { "epoch": 1.6926952141057936, "grad_norm": 0.2295661357899793, "learning_rate": 4.354697982832094e-05, "loss": 0.4995, "step": 6048 }, { "epoch": 1.6929750909599774, "grad_norm": 0.22878351557382878, "learning_rate": 4.353168347123058e-05, "loss": 0.5071, "step": 6049 }, { "epoch": 1.6932549678141617, "grad_norm": 0.22099703818909397, "learning_rate": 4.351638772979821e-05, "loss": 0.4929, "step": 6050 }, { "epoch": 1.693534844668346, "grad_norm": 0.2322131541717201, "learning_rate": 4.350109260547968e-05, "loss": 0.484, "step": 6051 }, { "epoch": 1.69381472152253, "grad_norm": 0.23344038059601066, "learning_rate": 4.348579809973078e-05, "loss": 0.4944, "step": 6052 }, { "epoch": 1.6940945983767142, "grad_norm": 0.23085834900844993, "learning_rate": 4.3470504214007286e-05, "loss": 0.489, "step": 6053 }, { "epoch": 1.6943744752308985, "grad_norm": 0.21827723584864406, "learning_rate": 4.345521094976485e-05, "loss": 0.4846, "step": 6054 }, { "epoch": 1.6946543520850825, "grad_norm": 0.22716920261273174, "learning_rate": 4.3439918308459096e-05, "loss": 0.4849, "step": 6055 }, { "epoch": 1.6949342289392666, "grad_norm": 0.2341787702407808, "learning_rate": 4.342462629154559e-05, "loss": 0.4872, "step": 6056 }, { "epoch": 1.6952141057934509, "grad_norm": 0.23229886711449935, "learning_rate": 4.3409334900479836e-05, "loss": 0.4619, "step": 6057 }, { "epoch": 1.6954939826476352, "grad_norm": 0.22132737650463888, "learning_rate": 4.3394044136717276e-05, "loss": 0.4665, "step": 6058 }, { "epoch": 1.6957738595018192, "grad_norm": 0.22553467244797953, "learning_rate": 4.337875400171329e-05, "loss": 0.4635, "step": 6059 }, { "epoch": 1.6960537363560033, "grad_norm": 0.2385463024188196, "learning_rate": 4.336346449692321e-05, "loss": 0.4988, "step": 6060 }, { "epoch": 1.6963336132101876, "grad_norm": 0.2254335937544675, "learning_rate": 4.3348175623802284e-05, "loss": 0.4875, "step": 6061 }, { "epoch": 1.6966134900643717, "grad_norm": 0.2278016706264, "learning_rate": 4.333288738380573e-05, "loss": 0.513, "step": 6062 }, { "epoch": 1.6968933669185557, "grad_norm": 0.22408095344803847, "learning_rate": 4.33175997783887e-05, "loss": 0.4716, "step": 6063 }, { "epoch": 1.69717324377274, "grad_norm": 0.2408144700089221, "learning_rate": 4.330231280900625e-05, "loss": 0.4994, "step": 6064 }, { "epoch": 1.697453120626924, "grad_norm": 0.22873174835259108, "learning_rate": 4.328702647711342e-05, "loss": 0.4892, "step": 6065 }, { "epoch": 1.6977329974811082, "grad_norm": 0.23249504156013415, "learning_rate": 4.327174078416516e-05, "loss": 0.484, "step": 6066 }, { "epoch": 1.6980128743352925, "grad_norm": 0.23536992910481908, "learning_rate": 4.3256455731616385e-05, "loss": 0.4891, "step": 6067 }, { "epoch": 1.6982927511894768, "grad_norm": 0.23425136163775556, "learning_rate": 4.324117132092193e-05, "loss": 0.475, "step": 6068 }, { "epoch": 1.6985726280436608, "grad_norm": 0.23819526558478157, "learning_rate": 4.3225887553536546e-05, "loss": 0.4904, "step": 6069 }, { "epoch": 1.698852504897845, "grad_norm": 0.22210495860003826, "learning_rate": 4.3210604430914995e-05, "loss": 0.4859, "step": 6070 }, { "epoch": 1.6991323817520292, "grad_norm": 0.24111902482093175, "learning_rate": 4.319532195451192e-05, "loss": 0.4911, "step": 6071 }, { "epoch": 1.6994122586062133, "grad_norm": 0.218296239987164, "learning_rate": 4.3180040125781905e-05, "loss": 0.4821, "step": 6072 }, { "epoch": 1.6996921354603973, "grad_norm": 0.23173927481366371, "learning_rate": 4.316475894617949e-05, "loss": 0.51, "step": 6073 }, { "epoch": 1.6999720123145816, "grad_norm": 0.22818910508063336, "learning_rate": 4.314947841715914e-05, "loss": 0.4996, "step": 6074 }, { "epoch": 1.700251889168766, "grad_norm": 0.23734091010495434, "learning_rate": 4.313419854017528e-05, "loss": 0.4831, "step": 6075 }, { "epoch": 1.7005317660229498, "grad_norm": 0.22723183806207628, "learning_rate": 4.311891931668223e-05, "loss": 0.495, "step": 6076 }, { "epoch": 1.700811642877134, "grad_norm": 0.2325060112545814, "learning_rate": 4.31036407481343e-05, "loss": 0.4872, "step": 6077 }, { "epoch": 1.7010915197313183, "grad_norm": 0.22319014948993274, "learning_rate": 4.308836283598571e-05, "loss": 0.4972, "step": 6078 }, { "epoch": 1.7013713965855024, "grad_norm": 0.2286954356567191, "learning_rate": 4.3073085581690605e-05, "loss": 0.4978, "step": 6079 }, { "epoch": 1.7016512734396865, "grad_norm": 0.22910917223706653, "learning_rate": 4.305780898670308e-05, "loss": 0.4995, "step": 6080 }, { "epoch": 1.7019311502938708, "grad_norm": 0.21654689465526492, "learning_rate": 4.304253305247722e-05, "loss": 0.4879, "step": 6081 }, { "epoch": 1.7022110271480548, "grad_norm": 0.2318852139598937, "learning_rate": 4.302725778046693e-05, "loss": 0.5043, "step": 6082 }, { "epoch": 1.702490904002239, "grad_norm": 0.23702870671028914, "learning_rate": 4.301198317212615e-05, "loss": 0.5109, "step": 6083 }, { "epoch": 1.7027707808564232, "grad_norm": 0.22319624244566316, "learning_rate": 4.299670922890873e-05, "loss": 0.505, "step": 6084 }, { "epoch": 1.7030506577106075, "grad_norm": 0.22340990152011508, "learning_rate": 4.298143595226843e-05, "loss": 0.4995, "step": 6085 }, { "epoch": 1.7033305345647913, "grad_norm": 0.21696548119035303, "learning_rate": 4.2966163343658994e-05, "loss": 0.4749, "step": 6086 }, { "epoch": 1.7036104114189756, "grad_norm": 0.2234274616324956, "learning_rate": 4.2950891404534056e-05, "loss": 0.4828, "step": 6087 }, { "epoch": 1.70389028827316, "grad_norm": 0.22168470891317238, "learning_rate": 4.293562013634723e-05, "loss": 0.4793, "step": 6088 }, { "epoch": 1.704170165127344, "grad_norm": 0.2289638613127193, "learning_rate": 4.292034954055204e-05, "loss": 0.5193, "step": 6089 }, { "epoch": 1.704450041981528, "grad_norm": 0.2312629677688086, "learning_rate": 4.290507961860194e-05, "loss": 0.4982, "step": 6090 }, { "epoch": 1.7047299188357123, "grad_norm": 0.23012149422157516, "learning_rate": 4.2889810371950325e-05, "loss": 0.4896, "step": 6091 }, { "epoch": 1.7050097956898964, "grad_norm": 0.22197492836476815, "learning_rate": 4.287454180205055e-05, "loss": 0.4881, "step": 6092 }, { "epoch": 1.7052896725440805, "grad_norm": 0.23131945807116241, "learning_rate": 4.285927391035587e-05, "loss": 0.4764, "step": 6093 }, { "epoch": 1.7055695493982648, "grad_norm": 0.22619551209841873, "learning_rate": 4.284400669831949e-05, "loss": 0.5032, "step": 6094 }, { "epoch": 1.705849426252449, "grad_norm": 0.21874781523908743, "learning_rate": 4.282874016739456e-05, "loss": 0.4633, "step": 6095 }, { "epoch": 1.7061293031066331, "grad_norm": 0.23408565220552005, "learning_rate": 4.281347431903416e-05, "loss": 0.5201, "step": 6096 }, { "epoch": 1.7064091799608172, "grad_norm": 0.21388880137663957, "learning_rate": 4.2798209154691294e-05, "loss": 0.4691, "step": 6097 }, { "epoch": 1.7066890568150015, "grad_norm": 0.21823192167589522, "learning_rate": 4.2782944675818905e-05, "loss": 0.486, "step": 6098 }, { "epoch": 1.7069689336691856, "grad_norm": 0.23560885770576326, "learning_rate": 4.276768088386991e-05, "loss": 0.4867, "step": 6099 }, { "epoch": 1.7072488105233696, "grad_norm": 0.23159807051987655, "learning_rate": 4.275241778029707e-05, "loss": 0.4874, "step": 6100 }, { "epoch": 1.707528687377554, "grad_norm": 0.2291395922136475, "learning_rate": 4.2737155366553174e-05, "loss": 0.4702, "step": 6101 }, { "epoch": 1.707808564231738, "grad_norm": 0.2333931478112489, "learning_rate": 4.272189364409088e-05, "loss": 0.4955, "step": 6102 }, { "epoch": 1.708088441085922, "grad_norm": 0.23706638507750027, "learning_rate": 4.270663261436284e-05, "loss": 0.5035, "step": 6103 }, { "epoch": 1.7083683179401064, "grad_norm": 0.22135302301079363, "learning_rate": 4.269137227882157e-05, "loss": 0.4602, "step": 6104 }, { "epoch": 1.7086481947942906, "grad_norm": 0.23384398860035133, "learning_rate": 4.2676112638919584e-05, "loss": 0.4814, "step": 6105 }, { "epoch": 1.7089280716484747, "grad_norm": 0.23066932047048283, "learning_rate": 4.26608536961093e-05, "loss": 0.5053, "step": 6106 }, { "epoch": 1.7092079485026588, "grad_norm": 0.22072806062248457, "learning_rate": 4.2645595451843075e-05, "loss": 0.4765, "step": 6107 }, { "epoch": 1.709487825356843, "grad_norm": 0.22973694101920933, "learning_rate": 4.263033790757319e-05, "loss": 0.4999, "step": 6108 }, { "epoch": 1.7097677022110271, "grad_norm": 0.23426867632197704, "learning_rate": 4.261508106475186e-05, "loss": 0.4965, "step": 6109 }, { "epoch": 1.7100475790652112, "grad_norm": 0.21082326494989032, "learning_rate": 4.2599824924831254e-05, "loss": 0.4932, "step": 6110 }, { "epoch": 1.7103274559193955, "grad_norm": 0.2260451150003131, "learning_rate": 4.258456948926345e-05, "loss": 0.4795, "step": 6111 }, { "epoch": 1.7106073327735796, "grad_norm": 0.2302765994078192, "learning_rate": 4.256931475950048e-05, "loss": 0.4866, "step": 6112 }, { "epoch": 1.7108872096277636, "grad_norm": 0.2247863162171495, "learning_rate": 4.2554060736994284e-05, "loss": 0.4895, "step": 6113 }, { "epoch": 1.711167086481948, "grad_norm": 0.23224152782161891, "learning_rate": 4.2538807423196755e-05, "loss": 0.4795, "step": 6114 }, { "epoch": 1.7114469633361322, "grad_norm": 0.22876991671248526, "learning_rate": 4.25235548195597e-05, "loss": 0.4911, "step": 6115 }, { "epoch": 1.7117268401903163, "grad_norm": 0.23545289575333217, "learning_rate": 4.250830292753489e-05, "loss": 0.5082, "step": 6116 }, { "epoch": 1.7120067170445004, "grad_norm": 0.22156199507874028, "learning_rate": 4.249305174857403e-05, "loss": 0.4847, "step": 6117 }, { "epoch": 1.7122865938986847, "grad_norm": 0.22339088924553077, "learning_rate": 4.247780128412868e-05, "loss": 0.4956, "step": 6118 }, { "epoch": 1.7125664707528687, "grad_norm": 0.2310339784394574, "learning_rate": 4.246255153565042e-05, "loss": 0.4957, "step": 6119 }, { "epoch": 1.7128463476070528, "grad_norm": 0.2232197000183747, "learning_rate": 4.244730250459072e-05, "loss": 0.4688, "step": 6120 }, { "epoch": 1.713126224461237, "grad_norm": 0.22006292518528192, "learning_rate": 4.2432054192400983e-05, "loss": 0.4819, "step": 6121 }, { "epoch": 1.7134061013154214, "grad_norm": 0.23648859392172267, "learning_rate": 4.241680660053258e-05, "loss": 0.5008, "step": 6122 }, { "epoch": 1.7136859781696052, "grad_norm": 0.21708059395993587, "learning_rate": 4.240155973043676e-05, "loss": 0.4826, "step": 6123 }, { "epoch": 1.7139658550237895, "grad_norm": 0.2336780630673386, "learning_rate": 4.238631358356474e-05, "loss": 0.4981, "step": 6124 }, { "epoch": 1.7142457318779738, "grad_norm": 0.22097470300748698, "learning_rate": 4.2371068161367655e-05, "loss": 0.4634, "step": 6125 }, { "epoch": 1.7145256087321579, "grad_norm": 0.21234749760716135, "learning_rate": 4.235582346529658e-05, "loss": 0.4549, "step": 6126 }, { "epoch": 1.714805485586342, "grad_norm": 0.22530451777881053, "learning_rate": 4.2340579496802505e-05, "loss": 0.4986, "step": 6127 }, { "epoch": 1.7150853624405262, "grad_norm": 0.23549957729924606, "learning_rate": 4.232533625733635e-05, "loss": 0.497, "step": 6128 }, { "epoch": 1.7153652392947103, "grad_norm": 0.22971748808940273, "learning_rate": 4.2310093748348995e-05, "loss": 0.4939, "step": 6129 }, { "epoch": 1.7156451161488944, "grad_norm": 0.23289807795750575, "learning_rate": 4.229485197129122e-05, "loss": 0.4977, "step": 6130 }, { "epoch": 1.7159249930030787, "grad_norm": 0.24009905850185356, "learning_rate": 4.227961092761374e-05, "loss": 0.4764, "step": 6131 }, { "epoch": 1.716204869857263, "grad_norm": 0.2652582748926836, "learning_rate": 4.226437061876721e-05, "loss": 0.4748, "step": 6132 }, { "epoch": 1.716484746711447, "grad_norm": 0.22182048827203818, "learning_rate": 4.2249131046202225e-05, "loss": 0.4977, "step": 6133 }, { "epoch": 1.716764623565631, "grad_norm": 0.22451475379748234, "learning_rate": 4.2233892211369286e-05, "loss": 0.5034, "step": 6134 }, { "epoch": 1.7170445004198154, "grad_norm": 0.23047466351686094, "learning_rate": 4.2218654115718846e-05, "loss": 0.5037, "step": 6135 }, { "epoch": 1.7173243772739994, "grad_norm": 0.2320646993137256, "learning_rate": 4.2203416760701256e-05, "loss": 0.4801, "step": 6136 }, { "epoch": 1.7176042541281835, "grad_norm": 0.22834671959408195, "learning_rate": 4.218818014776681e-05, "loss": 0.496, "step": 6137 }, { "epoch": 1.7178841309823678, "grad_norm": 0.23205588739717026, "learning_rate": 4.2172944278365755e-05, "loss": 0.4876, "step": 6138 }, { "epoch": 1.7181640078365519, "grad_norm": 0.2301065946049179, "learning_rate": 4.2157709153948234e-05, "loss": 0.4884, "step": 6139 }, { "epoch": 1.718443884690736, "grad_norm": 0.22658124411305183, "learning_rate": 4.214247477596435e-05, "loss": 0.4641, "step": 6140 }, { "epoch": 1.7187237615449202, "grad_norm": 0.22846579796177366, "learning_rate": 4.212724114586412e-05, "loss": 0.4899, "step": 6141 }, { "epoch": 1.7190036383991045, "grad_norm": 0.22292216756622865, "learning_rate": 4.211200826509749e-05, "loss": 0.4755, "step": 6142 }, { "epoch": 1.7192835152532886, "grad_norm": 0.23448196005844724, "learning_rate": 4.209677613511432e-05, "loss": 0.5053, "step": 6143 }, { "epoch": 1.7195633921074727, "grad_norm": 0.22431288444980207, "learning_rate": 4.208154475736442e-05, "loss": 0.4776, "step": 6144 }, { "epoch": 1.719843268961657, "grad_norm": 0.2178878211374519, "learning_rate": 4.2066314133297526e-05, "loss": 0.4595, "step": 6145 }, { "epoch": 1.720123145815841, "grad_norm": 0.22213929533746019, "learning_rate": 4.20510842643633e-05, "loss": 0.4704, "step": 6146 }, { "epoch": 1.720403022670025, "grad_norm": 0.23103599253361262, "learning_rate": 4.203585515201131e-05, "loss": 0.4715, "step": 6147 }, { "epoch": 1.7206828995242094, "grad_norm": 0.218019831707151, "learning_rate": 4.2020626797691096e-05, "loss": 0.4908, "step": 6148 }, { "epoch": 1.7209627763783935, "grad_norm": 0.2293186538753741, "learning_rate": 4.200539920285208e-05, "loss": 0.4889, "step": 6149 }, { "epoch": 1.7212426532325775, "grad_norm": 0.22837270173547924, "learning_rate": 4.199017236894364e-05, "loss": 0.4829, "step": 6150 }, { "epoch": 1.7215225300867618, "grad_norm": 0.22800302529739508, "learning_rate": 4.1974946297415077e-05, "loss": 0.4794, "step": 6151 }, { "epoch": 1.721802406940946, "grad_norm": 0.24263392801028452, "learning_rate": 4.1959720989715626e-05, "loss": 0.5092, "step": 6152 }, { "epoch": 1.7220822837951302, "grad_norm": 0.21515296149303667, "learning_rate": 4.194449644729444e-05, "loss": 0.4974, "step": 6153 }, { "epoch": 1.7223621606493142, "grad_norm": 0.23012271944667229, "learning_rate": 4.1929272671600594e-05, "loss": 0.491, "step": 6154 }, { "epoch": 1.7226420375034985, "grad_norm": 0.2463645417102324, "learning_rate": 4.191404966408308e-05, "loss": 0.4851, "step": 6155 }, { "epoch": 1.7229219143576826, "grad_norm": 0.2269257728365019, "learning_rate": 4.1898827426190834e-05, "loss": 0.4902, "step": 6156 }, { "epoch": 1.7232017912118667, "grad_norm": 0.27498288725529796, "learning_rate": 4.188360595937274e-05, "loss": 0.4782, "step": 6157 }, { "epoch": 1.723481668066051, "grad_norm": 0.23911156220462287, "learning_rate": 4.186838526507757e-05, "loss": 0.5124, "step": 6158 }, { "epoch": 1.7237615449202353, "grad_norm": 0.2312944792273134, "learning_rate": 4.185316534475404e-05, "loss": 0.4978, "step": 6159 }, { "epoch": 1.724041421774419, "grad_norm": 0.22707976307773028, "learning_rate": 4.18379461998508e-05, "loss": 0.4673, "step": 6160 }, { "epoch": 1.7243212986286034, "grad_norm": 0.239679140818221, "learning_rate": 4.18227278318164e-05, "loss": 0.4783, "step": 6161 }, { "epoch": 1.7246011754827877, "grad_norm": 0.23343532877153514, "learning_rate": 4.180751024209935e-05, "loss": 0.4955, "step": 6162 }, { "epoch": 1.7248810523369718, "grad_norm": 0.2304604774398203, "learning_rate": 4.179229343214806e-05, "loss": 0.4941, "step": 6163 }, { "epoch": 1.7251609291911558, "grad_norm": 0.22100401338558318, "learning_rate": 4.177707740341087e-05, "loss": 0.483, "step": 6164 }, { "epoch": 1.7254408060453401, "grad_norm": 0.23943947980530428, "learning_rate": 4.1761862157336065e-05, "loss": 0.4971, "step": 6165 }, { "epoch": 1.7257206828995242, "grad_norm": 0.23364708009853402, "learning_rate": 4.174664769537182e-05, "loss": 0.4994, "step": 6166 }, { "epoch": 1.7260005597537083, "grad_norm": 0.2177972882653534, "learning_rate": 4.1731434018966265e-05, "loss": 0.4832, "step": 6167 }, { "epoch": 1.7262804366078925, "grad_norm": 0.22638760788070802, "learning_rate": 4.1716221129567455e-05, "loss": 0.4819, "step": 6168 }, { "epoch": 1.7265603134620768, "grad_norm": 0.23315084038473147, "learning_rate": 4.170100902862335e-05, "loss": 0.4755, "step": 6169 }, { "epoch": 1.7268401903162607, "grad_norm": 0.2168653007820914, "learning_rate": 4.168579771758186e-05, "loss": 0.4905, "step": 6170 }, { "epoch": 1.727120067170445, "grad_norm": 0.4397010930184802, "learning_rate": 4.1670587197890795e-05, "loss": 0.4947, "step": 6171 }, { "epoch": 1.7273999440246293, "grad_norm": 0.23897181759928504, "learning_rate": 4.165537747099791e-05, "loss": 0.4993, "step": 6172 }, { "epoch": 1.7276798208788133, "grad_norm": 0.2436988300206224, "learning_rate": 4.164016853835085e-05, "loss": 0.4878, "step": 6173 }, { "epoch": 1.7279596977329974, "grad_norm": 0.234216966427448, "learning_rate": 4.162496040139724e-05, "loss": 0.501, "step": 6174 }, { "epoch": 1.7282395745871817, "grad_norm": 0.23058344419520013, "learning_rate": 4.160975306158458e-05, "loss": 0.4741, "step": 6175 }, { "epoch": 1.7285194514413658, "grad_norm": 0.226064221841301, "learning_rate": 4.159454652036032e-05, "loss": 0.4812, "step": 6176 }, { "epoch": 1.7287993282955498, "grad_norm": 0.23533501416134883, "learning_rate": 4.157934077917183e-05, "loss": 0.4985, "step": 6177 }, { "epoch": 1.7290792051497341, "grad_norm": 0.2248949857682273, "learning_rate": 4.15641358394664e-05, "loss": 0.4921, "step": 6178 }, { "epoch": 1.7293590820039184, "grad_norm": 0.21832457514267647, "learning_rate": 4.154893170269124e-05, "loss": 0.4687, "step": 6179 }, { "epoch": 1.7296389588581025, "grad_norm": 0.22864792597259992, "learning_rate": 4.153372837029349e-05, "loss": 0.465, "step": 6180 }, { "epoch": 1.7299188357122866, "grad_norm": 0.2189754608542842, "learning_rate": 4.1518525843720216e-05, "loss": 0.4822, "step": 6181 }, { "epoch": 1.7301987125664708, "grad_norm": 0.2270261070504502, "learning_rate": 4.150332412441839e-05, "loss": 0.47, "step": 6182 }, { "epoch": 1.730478589420655, "grad_norm": 0.2343048346316914, "learning_rate": 4.148812321383494e-05, "loss": 0.4763, "step": 6183 }, { "epoch": 1.730758466274839, "grad_norm": 0.2293296292049538, "learning_rate": 4.147292311341667e-05, "loss": 0.498, "step": 6184 }, { "epoch": 1.7310383431290233, "grad_norm": 0.23190044498144893, "learning_rate": 4.145772382461036e-05, "loss": 0.4924, "step": 6185 }, { "epoch": 1.7313182199832073, "grad_norm": 0.22096446277113688, "learning_rate": 4.144252534886268e-05, "loss": 0.5141, "step": 6186 }, { "epoch": 1.7315980968373914, "grad_norm": 0.23537546826112543, "learning_rate": 4.142732768762023e-05, "loss": 0.4994, "step": 6187 }, { "epoch": 1.7318779736915757, "grad_norm": 0.22247615205398874, "learning_rate": 4.1412130842329534e-05, "loss": 0.479, "step": 6188 }, { "epoch": 1.73215785054576, "grad_norm": 0.2318827393974484, "learning_rate": 4.139693481443702e-05, "loss": 0.4861, "step": 6189 }, { "epoch": 1.732437727399944, "grad_norm": 0.22755283443826474, "learning_rate": 4.1381739605389095e-05, "loss": 0.4839, "step": 6190 }, { "epoch": 1.7327176042541281, "grad_norm": 0.22817109923223067, "learning_rate": 4.136654521663198e-05, "loss": 0.4777, "step": 6191 }, { "epoch": 1.7329974811083124, "grad_norm": 0.22596647868744635, "learning_rate": 4.1351351649611933e-05, "loss": 0.4612, "step": 6192 }, { "epoch": 1.7332773579624965, "grad_norm": 0.23397970580814761, "learning_rate": 4.1336158905775086e-05, "loss": 0.4844, "step": 6193 }, { "epoch": 1.7335572348166806, "grad_norm": 0.2355951420758951, "learning_rate": 4.1320966986567476e-05, "loss": 0.4744, "step": 6194 }, { "epoch": 1.7338371116708649, "grad_norm": 0.24338253587553524, "learning_rate": 4.13057758934351e-05, "loss": 0.4749, "step": 6195 }, { "epoch": 1.7341169885250491, "grad_norm": 0.21927606053474624, "learning_rate": 4.129058562782382e-05, "loss": 0.4829, "step": 6196 }, { "epoch": 1.734396865379233, "grad_norm": 0.22479603677132748, "learning_rate": 4.127539619117948e-05, "loss": 0.483, "step": 6197 }, { "epoch": 1.7346767422334173, "grad_norm": 0.23048192785742735, "learning_rate": 4.126020758494782e-05, "loss": 0.4874, "step": 6198 }, { "epoch": 1.7349566190876016, "grad_norm": 0.22266574526724875, "learning_rate": 4.1245019810574495e-05, "loss": 0.4798, "step": 6199 }, { "epoch": 1.7352364959417856, "grad_norm": 0.22664210498131207, "learning_rate": 4.122983286950508e-05, "loss": 0.4851, "step": 6200 }, { "epoch": 1.7355163727959697, "grad_norm": 0.2330936722013338, "learning_rate": 4.121464676318509e-05, "loss": 0.469, "step": 6201 }, { "epoch": 1.735796249650154, "grad_norm": 0.23141386256000987, "learning_rate": 4.119946149305992e-05, "loss": 0.4854, "step": 6202 }, { "epoch": 1.736076126504338, "grad_norm": 0.22741908377211895, "learning_rate": 4.118427706057494e-05, "loss": 0.5071, "step": 6203 }, { "epoch": 1.7363560033585221, "grad_norm": 0.23079084357341084, "learning_rate": 4.1169093467175404e-05, "loss": 0.5085, "step": 6204 }, { "epoch": 1.7366358802127064, "grad_norm": 0.221600111079701, "learning_rate": 4.1153910714306496e-05, "loss": 0.5207, "step": 6205 }, { "epoch": 1.7369157570668907, "grad_norm": 0.23615707134305802, "learning_rate": 4.113872880341332e-05, "loss": 0.5139, "step": 6206 }, { "epoch": 1.7371956339210746, "grad_norm": 0.22398368347694772, "learning_rate": 4.11235477359409e-05, "loss": 0.4764, "step": 6207 }, { "epoch": 1.7374755107752589, "grad_norm": 0.21892919892376989, "learning_rate": 4.110836751333418e-05, "loss": 0.4723, "step": 6208 }, { "epoch": 1.7377553876294431, "grad_norm": 0.22126669964411433, "learning_rate": 4.109318813703801e-05, "loss": 0.4675, "step": 6209 }, { "epoch": 1.7380352644836272, "grad_norm": 0.2249764201476906, "learning_rate": 4.107800960849717e-05, "loss": 0.482, "step": 6210 }, { "epoch": 1.7383151413378113, "grad_norm": 0.22660197424580764, "learning_rate": 4.106283192915638e-05, "loss": 0.4635, "step": 6211 }, { "epoch": 1.7385950181919956, "grad_norm": 0.24296870443709023, "learning_rate": 4.104765510046024e-05, "loss": 0.4906, "step": 6212 }, { "epoch": 1.7388748950461796, "grad_norm": 0.2310097726134897, "learning_rate": 4.1032479123853306e-05, "loss": 0.4988, "step": 6213 }, { "epoch": 1.7391547719003637, "grad_norm": 0.22724682084757694, "learning_rate": 4.1017304000780025e-05, "loss": 0.4881, "step": 6214 }, { "epoch": 1.739434648754548, "grad_norm": 0.22766719539469013, "learning_rate": 4.100212973268478e-05, "loss": 0.4852, "step": 6215 }, { "epoch": 1.7397145256087323, "grad_norm": 0.23270993851446942, "learning_rate": 4.098695632101186e-05, "loss": 0.494, "step": 6216 }, { "epoch": 1.7399944024629164, "grad_norm": 0.2257314465143826, "learning_rate": 4.097178376720548e-05, "loss": 0.5083, "step": 6217 }, { "epoch": 1.7402742793171004, "grad_norm": 0.21487172598219728, "learning_rate": 4.0956612072709775e-05, "loss": 0.4795, "step": 6218 }, { "epoch": 1.7405541561712847, "grad_norm": 0.22280803848247047, "learning_rate": 4.0941441238968784e-05, "loss": 0.4952, "step": 6219 }, { "epoch": 1.7408340330254688, "grad_norm": 0.2288712931118916, "learning_rate": 4.0926271267426505e-05, "loss": 0.481, "step": 6220 }, { "epoch": 1.7411139098796529, "grad_norm": 0.2317947148548979, "learning_rate": 4.091110215952679e-05, "loss": 0.484, "step": 6221 }, { "epoch": 1.7413937867338372, "grad_norm": 0.24196046213019462, "learning_rate": 4.0895933916713475e-05, "loss": 0.4967, "step": 6222 }, { "epoch": 1.7416736635880212, "grad_norm": 0.22802728951721227, "learning_rate": 4.088076654043026e-05, "loss": 0.4833, "step": 6223 }, { "epoch": 1.7419535404422053, "grad_norm": 0.23532017702606559, "learning_rate": 4.086560003212079e-05, "loss": 0.4566, "step": 6224 }, { "epoch": 1.7422334172963896, "grad_norm": 0.22664788392552132, "learning_rate": 4.085043439322861e-05, "loss": 0.4931, "step": 6225 }, { "epoch": 1.7425132941505739, "grad_norm": 0.22804682386682001, "learning_rate": 4.0835269625197235e-05, "loss": 0.4951, "step": 6226 }, { "epoch": 1.742793171004758, "grad_norm": 0.22851141767746166, "learning_rate": 4.0820105729470005e-05, "loss": 0.4711, "step": 6227 }, { "epoch": 1.743073047858942, "grad_norm": 0.23419407066537856, "learning_rate": 4.0804942707490254e-05, "loss": 0.4985, "step": 6228 }, { "epoch": 1.7433529247131263, "grad_norm": 0.2281211997665903, "learning_rate": 4.0789780560701204e-05, "loss": 0.4958, "step": 6229 }, { "epoch": 1.7436328015673104, "grad_norm": 0.23278838235072313, "learning_rate": 4.077461929054599e-05, "loss": 0.4876, "step": 6230 }, { "epoch": 1.7439126784214944, "grad_norm": 0.23339218741007328, "learning_rate": 4.075945889846768e-05, "loss": 0.4867, "step": 6231 }, { "epoch": 1.7441925552756787, "grad_norm": 0.23597406995369719, "learning_rate": 4.074429938590924e-05, "loss": 0.4854, "step": 6232 }, { "epoch": 1.744472432129863, "grad_norm": 0.2181253638617583, "learning_rate": 4.072914075431357e-05, "loss": 0.479, "step": 6233 }, { "epoch": 1.7447523089840469, "grad_norm": 0.2375729240684907, "learning_rate": 4.071398300512347e-05, "loss": 0.4863, "step": 6234 }, { "epoch": 1.7450321858382312, "grad_norm": 0.21824965754073894, "learning_rate": 4.069882613978167e-05, "loss": 0.4745, "step": 6235 }, { "epoch": 1.7453120626924155, "grad_norm": 0.2423222093731873, "learning_rate": 4.068367015973078e-05, "loss": 0.511, "step": 6236 }, { "epoch": 1.7455919395465995, "grad_norm": 0.2225530815769493, "learning_rate": 4.066851506641341e-05, "loss": 0.466, "step": 6237 }, { "epoch": 1.7458718164007836, "grad_norm": 0.23503900746030318, "learning_rate": 4.0653360861271974e-05, "loss": 0.4776, "step": 6238 }, { "epoch": 1.7461516932549679, "grad_norm": 0.23179912266962155, "learning_rate": 4.06382075457489e-05, "loss": 0.4961, "step": 6239 }, { "epoch": 1.746431570109152, "grad_norm": 0.23010916446882304, "learning_rate": 4.062305512128647e-05, "loss": 0.5108, "step": 6240 }, { "epoch": 1.746711446963336, "grad_norm": 0.22742092032785105, "learning_rate": 4.0607903589326896e-05, "loss": 0.4692, "step": 6241 }, { "epoch": 1.7469913238175203, "grad_norm": 0.22283313750520264, "learning_rate": 4.0592752951312316e-05, "loss": 0.4917, "step": 6242 }, { "epoch": 1.7472712006717046, "grad_norm": 0.22105652456579566, "learning_rate": 4.0577603208684785e-05, "loss": 0.4947, "step": 6243 }, { "epoch": 1.7475510775258885, "grad_norm": 0.2306414346923748, "learning_rate": 4.056245436288626e-05, "loss": 0.4713, "step": 6244 }, { "epoch": 1.7478309543800727, "grad_norm": 0.22984458746847602, "learning_rate": 4.05473064153586e-05, "loss": 0.4944, "step": 6245 }, { "epoch": 1.748110831234257, "grad_norm": 0.2213908462787107, "learning_rate": 4.05321593675436e-05, "loss": 0.4637, "step": 6246 }, { "epoch": 1.748390708088441, "grad_norm": 0.23214944822098388, "learning_rate": 4.051701322088298e-05, "loss": 0.486, "step": 6247 }, { "epoch": 1.7486705849426252, "grad_norm": 0.22587110510421393, "learning_rate": 4.050186797681834e-05, "loss": 0.5056, "step": 6248 }, { "epoch": 1.7489504617968095, "grad_norm": 0.23345726596750907, "learning_rate": 4.0486723636791234e-05, "loss": 0.4893, "step": 6249 }, { "epoch": 1.7492303386509935, "grad_norm": 0.23430229132783856, "learning_rate": 4.047158020224309e-05, "loss": 0.4704, "step": 6250 }, { "epoch": 1.7495102155051776, "grad_norm": 0.22962546937050268, "learning_rate": 4.045643767461528e-05, "loss": 0.4866, "step": 6251 }, { "epoch": 1.749790092359362, "grad_norm": 0.2256801577148114, "learning_rate": 4.044129605534907e-05, "loss": 0.5009, "step": 6252 }, { "epoch": 1.7500699692135462, "grad_norm": 0.24114583256973654, "learning_rate": 4.042615534588565e-05, "loss": 0.5052, "step": 6253 }, { "epoch": 1.7503498460677303, "grad_norm": 0.22799986588431403, "learning_rate": 4.041101554766612e-05, "loss": 0.4999, "step": 6254 }, { "epoch": 1.7506297229219143, "grad_norm": 0.23193963651872831, "learning_rate": 4.0395876662131494e-05, "loss": 0.5025, "step": 6255 }, { "epoch": 1.7509095997760986, "grad_norm": 0.24000487006075624, "learning_rate": 4.0380738690722715e-05, "loss": 0.4988, "step": 6256 }, { "epoch": 1.7511894766302827, "grad_norm": 0.22289020086343436, "learning_rate": 4.0365601634880604e-05, "loss": 0.4807, "step": 6257 }, { "epoch": 1.7514693534844668, "grad_norm": 0.22062471093548774, "learning_rate": 4.035046549604593e-05, "loss": 0.504, "step": 6258 }, { "epoch": 1.751749230338651, "grad_norm": 0.24988358237523547, "learning_rate": 4.0335330275659346e-05, "loss": 0.4961, "step": 6259 }, { "epoch": 1.7520291071928351, "grad_norm": 0.23267036115777692, "learning_rate": 4.032019597516144e-05, "loss": 0.4985, "step": 6260 }, { "epoch": 1.7523089840470192, "grad_norm": 0.22988664272037695, "learning_rate": 4.0305062595992694e-05, "loss": 0.4991, "step": 6261 }, { "epoch": 1.7525888609012035, "grad_norm": 0.22123590529816817, "learning_rate": 4.028993013959353e-05, "loss": 0.4859, "step": 6262 }, { "epoch": 1.7528687377553878, "grad_norm": 0.2226616011786503, "learning_rate": 4.027479860740424e-05, "loss": 0.4736, "step": 6263 }, { "epoch": 1.7531486146095718, "grad_norm": 0.23406933748112344, "learning_rate": 4.025966800086506e-05, "loss": 0.5066, "step": 6264 }, { "epoch": 1.753428491463756, "grad_norm": 0.23290244909657634, "learning_rate": 4.024453832141613e-05, "loss": 0.4798, "step": 6265 }, { "epoch": 1.7537083683179402, "grad_norm": 0.23299853930618206, "learning_rate": 4.022940957049751e-05, "loss": 0.487, "step": 6266 }, { "epoch": 1.7539882451721243, "grad_norm": 0.22798329834562744, "learning_rate": 4.021428174954915e-05, "loss": 0.4771, "step": 6267 }, { "epoch": 1.7542681220263083, "grad_norm": 0.23048191503476212, "learning_rate": 4.0199154860010936e-05, "loss": 0.4972, "step": 6268 }, { "epoch": 1.7545479988804926, "grad_norm": 0.2178656870948323, "learning_rate": 4.018402890332264e-05, "loss": 0.4957, "step": 6269 }, { "epoch": 1.7548278757346767, "grad_norm": 0.2270165658527656, "learning_rate": 4.016890388092398e-05, "loss": 0.4823, "step": 6270 }, { "epoch": 1.7551077525888608, "grad_norm": 0.23299633722136034, "learning_rate": 4.0153779794254525e-05, "loss": 0.4797, "step": 6271 }, { "epoch": 1.755387629443045, "grad_norm": 0.23142542256353943, "learning_rate": 4.013865664475384e-05, "loss": 0.4818, "step": 6272 }, { "epoch": 1.7556675062972293, "grad_norm": 0.22913844147841553, "learning_rate": 4.012353443386133e-05, "loss": 0.4926, "step": 6273 }, { "epoch": 1.7559473831514134, "grad_norm": 0.22416087593041847, "learning_rate": 4.010841316301634e-05, "loss": 0.4894, "step": 6274 }, { "epoch": 1.7562272600055975, "grad_norm": 0.21741814918138835, "learning_rate": 4.009329283365813e-05, "loss": 0.494, "step": 6275 }, { "epoch": 1.7565071368597818, "grad_norm": 0.22517899957162932, "learning_rate": 4.007817344722585e-05, "loss": 0.5039, "step": 6276 }, { "epoch": 1.7567870137139658, "grad_norm": 0.23003578969254598, "learning_rate": 4.0063055005158576e-05, "loss": 0.5039, "step": 6277 }, { "epoch": 1.75706689056815, "grad_norm": 0.24009596113519552, "learning_rate": 4.004793750889528e-05, "loss": 0.495, "step": 6278 }, { "epoch": 1.7573467674223342, "grad_norm": 0.23205304974684965, "learning_rate": 4.0032820959874875e-05, "loss": 0.4815, "step": 6279 }, { "epoch": 1.7576266442765185, "grad_norm": 0.22990380313040454, "learning_rate": 4.0017705359536153e-05, "loss": 0.4962, "step": 6280 }, { "epoch": 1.7579065211307023, "grad_norm": 0.23529770235912076, "learning_rate": 4.000259070931781e-05, "loss": 0.5154, "step": 6281 }, { "epoch": 1.7581863979848866, "grad_norm": 0.22976403488340347, "learning_rate": 3.998747701065849e-05, "loss": 0.4849, "step": 6282 }, { "epoch": 1.758466274839071, "grad_norm": 0.2218847105560266, "learning_rate": 3.9972364264996696e-05, "loss": 0.4548, "step": 6283 }, { "epoch": 1.758746151693255, "grad_norm": 0.22186692728920557, "learning_rate": 3.99572524737709e-05, "loss": 0.4967, "step": 6284 }, { "epoch": 1.759026028547439, "grad_norm": 0.22254185623061357, "learning_rate": 3.994214163841942e-05, "loss": 0.4958, "step": 6285 }, { "epoch": 1.7593059054016233, "grad_norm": 0.22156417669810374, "learning_rate": 3.992703176038054e-05, "loss": 0.4816, "step": 6286 }, { "epoch": 1.7595857822558074, "grad_norm": 0.23901536957158068, "learning_rate": 3.991192284109241e-05, "loss": 0.4871, "step": 6287 }, { "epoch": 1.7598656591099915, "grad_norm": 0.22301450682162732, "learning_rate": 3.989681488199309e-05, "loss": 0.4591, "step": 6288 }, { "epoch": 1.7601455359641758, "grad_norm": 0.22377759256850285, "learning_rate": 3.9881707884520613e-05, "loss": 0.4844, "step": 6289 }, { "epoch": 1.76042541281836, "grad_norm": 0.24620730002455676, "learning_rate": 3.986660185011283e-05, "loss": 0.5027, "step": 6290 }, { "epoch": 1.760705289672544, "grad_norm": 0.2261648256000998, "learning_rate": 3.985149678020756e-05, "loss": 0.499, "step": 6291 }, { "epoch": 1.7609851665267282, "grad_norm": 0.23200553036089605, "learning_rate": 3.983639267624251e-05, "loss": 0.4994, "step": 6292 }, { "epoch": 1.7612650433809125, "grad_norm": 0.22256074334168915, "learning_rate": 3.9821289539655297e-05, "loss": 0.5009, "step": 6293 }, { "epoch": 1.7615449202350966, "grad_norm": 0.21606778749430056, "learning_rate": 3.9806187371883435e-05, "loss": 0.4703, "step": 6294 }, { "epoch": 1.7618247970892806, "grad_norm": 0.23302426125720777, "learning_rate": 3.979108617436437e-05, "loss": 0.4838, "step": 6295 }, { "epoch": 1.762104673943465, "grad_norm": 0.2325499111792386, "learning_rate": 3.977598594853543e-05, "loss": 0.4997, "step": 6296 }, { "epoch": 1.762384550797649, "grad_norm": 0.230697920236471, "learning_rate": 3.976088669583387e-05, "loss": 0.4824, "step": 6297 }, { "epoch": 1.762664427651833, "grad_norm": 0.22450802207118495, "learning_rate": 3.974578841769686e-05, "loss": 0.4695, "step": 6298 }, { "epoch": 1.7629443045060174, "grad_norm": 0.22781091871879922, "learning_rate": 3.973069111556144e-05, "loss": 0.4842, "step": 6299 }, { "epoch": 1.7632241813602016, "grad_norm": 0.2304046252653234, "learning_rate": 3.9715594790864586e-05, "loss": 0.4881, "step": 6300 }, { "epoch": 1.7635040582143857, "grad_norm": 0.23292913610813235, "learning_rate": 3.970049944504317e-05, "loss": 0.4732, "step": 6301 }, { "epoch": 1.7637839350685698, "grad_norm": 0.23933804793398222, "learning_rate": 3.9685405079533986e-05, "loss": 0.4829, "step": 6302 }, { "epoch": 1.764063811922754, "grad_norm": 0.23299746176743352, "learning_rate": 3.967031169577373e-05, "loss": 0.4874, "step": 6303 }, { "epoch": 1.7643436887769381, "grad_norm": 0.2342529534679421, "learning_rate": 3.9655219295198976e-05, "loss": 0.4867, "step": 6304 }, { "epoch": 1.7646235656311222, "grad_norm": 0.22562566201708564, "learning_rate": 3.964012787924623e-05, "loss": 0.4785, "step": 6305 }, { "epoch": 1.7649034424853065, "grad_norm": 0.23661630561981475, "learning_rate": 3.962503744935192e-05, "loss": 0.4892, "step": 6306 }, { "epoch": 1.7651833193394906, "grad_norm": 0.22968855464150276, "learning_rate": 3.960994800695236e-05, "loss": 0.4864, "step": 6307 }, { "epoch": 1.7654631961936746, "grad_norm": 0.23394090987024926, "learning_rate": 3.959485955348376e-05, "loss": 0.4874, "step": 6308 }, { "epoch": 1.765743073047859, "grad_norm": 0.24092999284959826, "learning_rate": 3.957977209038226e-05, "loss": 0.5025, "step": 6309 }, { "epoch": 1.7660229499020432, "grad_norm": 0.2333841671522603, "learning_rate": 3.9564685619083875e-05, "loss": 0.4828, "step": 6310 }, { "epoch": 1.7663028267562273, "grad_norm": 0.24408625874166232, "learning_rate": 3.954960014102455e-05, "loss": 0.5011, "step": 6311 }, { "epoch": 1.7665827036104114, "grad_norm": 0.2218998319541268, "learning_rate": 3.953451565764014e-05, "loss": 0.4815, "step": 6312 }, { "epoch": 1.7668625804645957, "grad_norm": 0.233599747588889, "learning_rate": 3.951943217036639e-05, "loss": 0.4731, "step": 6313 }, { "epoch": 1.7671424573187797, "grad_norm": 0.22581810206686742, "learning_rate": 3.9504349680638944e-05, "loss": 0.4801, "step": 6314 }, { "epoch": 1.7674223341729638, "grad_norm": 0.22230328041234768, "learning_rate": 3.948926818989338e-05, "loss": 0.4719, "step": 6315 }, { "epoch": 1.767702211027148, "grad_norm": 0.23914765385746733, "learning_rate": 3.9474187699565155e-05, "loss": 0.4966, "step": 6316 }, { "epoch": 1.7679820878813324, "grad_norm": 0.22668827409996398, "learning_rate": 3.945910821108963e-05, "loss": 0.4913, "step": 6317 }, { "epoch": 1.7682619647355162, "grad_norm": 0.23524978952363726, "learning_rate": 3.944402972590209e-05, "loss": 0.495, "step": 6318 }, { "epoch": 1.7685418415897005, "grad_norm": 0.22704137986481232, "learning_rate": 3.9428952245437703e-05, "loss": 0.4779, "step": 6319 }, { "epoch": 1.7688217184438848, "grad_norm": 0.22448072357965906, "learning_rate": 3.9413875771131555e-05, "loss": 0.4914, "step": 6320 }, { "epoch": 1.7691015952980689, "grad_norm": 0.2243162231529199, "learning_rate": 3.939880030441864e-05, "loss": 0.4734, "step": 6321 }, { "epoch": 1.769381472152253, "grad_norm": 0.22234487866670052, "learning_rate": 3.9383725846733845e-05, "loss": 0.4872, "step": 6322 }, { "epoch": 1.7696613490064372, "grad_norm": 0.23020609811624973, "learning_rate": 3.9368652399511956e-05, "loss": 0.5028, "step": 6323 }, { "epoch": 1.7699412258606213, "grad_norm": 0.21793451134665903, "learning_rate": 3.935357996418769e-05, "loss": 0.4961, "step": 6324 }, { "epoch": 1.7702211027148054, "grad_norm": 0.24087508446996647, "learning_rate": 3.9338508542195654e-05, "loss": 0.5188, "step": 6325 }, { "epoch": 1.7705009795689897, "grad_norm": 0.22854684206317447, "learning_rate": 3.932343813497033e-05, "loss": 0.4797, "step": 6326 }, { "epoch": 1.770780856423174, "grad_norm": 0.2196907509152965, "learning_rate": 3.930836874394615e-05, "loss": 0.4937, "step": 6327 }, { "epoch": 1.7710607332773578, "grad_norm": 0.23307236479312057, "learning_rate": 3.9293300370557404e-05, "loss": 0.5012, "step": 6328 }, { "epoch": 1.771340610131542, "grad_norm": 0.22377438635370053, "learning_rate": 3.927823301623833e-05, "loss": 0.4956, "step": 6329 }, { "epoch": 1.7716204869857264, "grad_norm": 0.2374136571358909, "learning_rate": 3.926316668242304e-05, "loss": 0.5015, "step": 6330 }, { "epoch": 1.7719003638399105, "grad_norm": 0.23691055635127978, "learning_rate": 3.924810137054555e-05, "loss": 0.4839, "step": 6331 }, { "epoch": 1.7721802406940945, "grad_norm": 0.22235079629447116, "learning_rate": 3.923303708203979e-05, "loss": 0.4761, "step": 6332 }, { "epoch": 1.7724601175482788, "grad_norm": 0.23762699681876637, "learning_rate": 3.9217973818339593e-05, "loss": 0.4972, "step": 6333 }, { "epoch": 1.7727399944024629, "grad_norm": 0.22485042309032557, "learning_rate": 3.920291158087869e-05, "loss": 0.4842, "step": 6334 }, { "epoch": 1.773019871256647, "grad_norm": 0.2247140994121346, "learning_rate": 3.918785037109069e-05, "loss": 0.4886, "step": 6335 }, { "epoch": 1.7732997481108312, "grad_norm": 0.21513677179734114, "learning_rate": 3.9172790190409156e-05, "loss": 0.4937, "step": 6336 }, { "epoch": 1.7735796249650155, "grad_norm": 0.24485179374287883, "learning_rate": 3.91577310402675e-05, "loss": 0.4939, "step": 6337 }, { "epoch": 1.7738595018191996, "grad_norm": 0.22785343433801122, "learning_rate": 3.914267292209908e-05, "loss": 0.4643, "step": 6338 }, { "epoch": 1.7741393786733837, "grad_norm": 0.2245338842317454, "learning_rate": 3.9127615837337126e-05, "loss": 0.4948, "step": 6339 }, { "epoch": 1.774419255527568, "grad_norm": 0.2285664646257787, "learning_rate": 3.911255978741477e-05, "loss": 0.5124, "step": 6340 }, { "epoch": 1.774699132381752, "grad_norm": 0.2245933140485806, "learning_rate": 3.909750477376508e-05, "loss": 0.4879, "step": 6341 }, { "epoch": 1.774979009235936, "grad_norm": 0.2208241866180384, "learning_rate": 3.908245079782098e-05, "loss": 0.4977, "step": 6342 }, { "epoch": 1.7752588860901204, "grad_norm": 0.23605904644450681, "learning_rate": 3.906739786101533e-05, "loss": 0.4851, "step": 6343 }, { "epoch": 1.7755387629443045, "grad_norm": 0.22801536193033944, "learning_rate": 3.9052345964780876e-05, "loss": 0.474, "step": 6344 }, { "epoch": 1.7758186397984885, "grad_norm": 0.22982947570701895, "learning_rate": 3.9037295110550254e-05, "loss": 0.4867, "step": 6345 }, { "epoch": 1.7760985166526728, "grad_norm": 0.21889992668874073, "learning_rate": 3.902224529975602e-05, "loss": 0.479, "step": 6346 }, { "epoch": 1.7763783935068571, "grad_norm": 0.2359937738567436, "learning_rate": 3.900719653383063e-05, "loss": 0.4913, "step": 6347 }, { "epoch": 1.7766582703610412, "grad_norm": 0.22221531659324922, "learning_rate": 3.899214881420642e-05, "loss": 0.4932, "step": 6348 }, { "epoch": 1.7769381472152252, "grad_norm": 0.23793621342928006, "learning_rate": 3.8977102142315645e-05, "loss": 0.4904, "step": 6349 }, { "epoch": 1.7772180240694095, "grad_norm": 0.2258191464006303, "learning_rate": 3.8962056519590465e-05, "loss": 0.4933, "step": 6350 }, { "epoch": 1.7774979009235936, "grad_norm": 0.2244462131808889, "learning_rate": 3.894701194746291e-05, "loss": 0.4742, "step": 6351 }, { "epoch": 1.7777777777777777, "grad_norm": 0.22935811664036207, "learning_rate": 3.8931968427364976e-05, "loss": 0.4815, "step": 6352 }, { "epoch": 1.778057654631962, "grad_norm": 0.2283719653872888, "learning_rate": 3.891692596072846e-05, "loss": 0.4958, "step": 6353 }, { "epoch": 1.7783375314861463, "grad_norm": 0.22735346225222564, "learning_rate": 3.890188454898514e-05, "loss": 0.4862, "step": 6354 }, { "epoch": 1.77861740834033, "grad_norm": 0.22510619282237004, "learning_rate": 3.8886844193566655e-05, "loss": 0.5052, "step": 6355 }, { "epoch": 1.7788972851945144, "grad_norm": 0.23266100370870244, "learning_rate": 3.887180489590456e-05, "loss": 0.5101, "step": 6356 }, { "epoch": 1.7791771620486987, "grad_norm": 0.2366196477148252, "learning_rate": 3.885676665743029e-05, "loss": 0.4814, "step": 6357 }, { "epoch": 1.7794570389028828, "grad_norm": 0.23246604857315453, "learning_rate": 3.8841729479575225e-05, "loss": 0.4884, "step": 6358 }, { "epoch": 1.7797369157570668, "grad_norm": 0.22894957258348297, "learning_rate": 3.882669336377059e-05, "loss": 0.4946, "step": 6359 }, { "epoch": 1.7800167926112511, "grad_norm": 0.22782382461275164, "learning_rate": 3.8811658311447535e-05, "loss": 0.4813, "step": 6360 }, { "epoch": 1.7802966694654352, "grad_norm": 0.2235324323506944, "learning_rate": 3.87966243240371e-05, "loss": 0.4987, "step": 6361 }, { "epoch": 1.7805765463196193, "grad_norm": 0.22887829375861352, "learning_rate": 3.878159140297024e-05, "loss": 0.5041, "step": 6362 }, { "epoch": 1.7808564231738035, "grad_norm": 0.22269130801298512, "learning_rate": 3.8766559549677786e-05, "loss": 0.4696, "step": 6363 }, { "epoch": 1.7811363000279878, "grad_norm": 0.21900743631481634, "learning_rate": 3.8751528765590485e-05, "loss": 0.4924, "step": 6364 }, { "epoch": 1.7814161768821717, "grad_norm": 0.2344240137873727, "learning_rate": 3.8736499052138965e-05, "loss": 0.4932, "step": 6365 }, { "epoch": 1.781696053736356, "grad_norm": 0.2247852256750384, "learning_rate": 3.872147041075378e-05, "loss": 0.4954, "step": 6366 }, { "epoch": 1.7819759305905403, "grad_norm": 0.23149465846396255, "learning_rate": 3.870644284286534e-05, "loss": 0.4864, "step": 6367 }, { "epoch": 1.7822558074447243, "grad_norm": 0.2251673707119352, "learning_rate": 3.869141634990399e-05, "loss": 0.4927, "step": 6368 }, { "epoch": 1.7825356842989084, "grad_norm": 0.2380626169847424, "learning_rate": 3.8676390933299974e-05, "loss": 0.482, "step": 6369 }, { "epoch": 1.7828155611530927, "grad_norm": 0.22734488945781423, "learning_rate": 3.86613665944834e-05, "loss": 0.4882, "step": 6370 }, { "epoch": 1.7830954380072768, "grad_norm": 0.23057205016926202, "learning_rate": 3.864634333488433e-05, "loss": 0.476, "step": 6371 }, { "epoch": 1.7833753148614608, "grad_norm": 0.23982531779577837, "learning_rate": 3.863132115593263e-05, "loss": 0.4823, "step": 6372 }, { "epoch": 1.7836551917156451, "grad_norm": 0.22970950953957034, "learning_rate": 3.8616300059058144e-05, "loss": 0.4971, "step": 6373 }, { "epoch": 1.7839350685698294, "grad_norm": 0.2258159796651948, "learning_rate": 3.860128004569059e-05, "loss": 0.4829, "step": 6374 }, { "epoch": 1.7842149454240135, "grad_norm": 0.23586354154872846, "learning_rate": 3.8586261117259575e-05, "loss": 0.4871, "step": 6375 }, { "epoch": 1.7844948222781976, "grad_norm": 0.23842329177186955, "learning_rate": 3.857124327519463e-05, "loss": 0.5011, "step": 6376 }, { "epoch": 1.7847746991323818, "grad_norm": 0.23156115393268714, "learning_rate": 3.8556226520925134e-05, "loss": 0.4647, "step": 6377 }, { "epoch": 1.785054575986566, "grad_norm": 0.2309254906804709, "learning_rate": 3.854121085588041e-05, "loss": 0.4879, "step": 6378 }, { "epoch": 1.78533445284075, "grad_norm": 0.23286436381940664, "learning_rate": 3.8526196281489655e-05, "loss": 0.4879, "step": 6379 }, { "epoch": 1.7856143296949343, "grad_norm": 0.23320900715097417, "learning_rate": 3.851118279918195e-05, "loss": 0.4818, "step": 6380 }, { "epoch": 1.7858942065491183, "grad_norm": 0.23225467578973877, "learning_rate": 3.84961704103863e-05, "loss": 0.4769, "step": 6381 }, { "epoch": 1.7861740834033024, "grad_norm": 0.22808060063133884, "learning_rate": 3.8481159116531596e-05, "loss": 0.4898, "step": 6382 }, { "epoch": 1.7864539602574867, "grad_norm": 0.2282644479443838, "learning_rate": 3.846614891904661e-05, "loss": 0.4759, "step": 6383 }, { "epoch": 1.786733837111671, "grad_norm": 0.23846114335204405, "learning_rate": 3.8451139819360024e-05, "loss": 0.4891, "step": 6384 }, { "epoch": 1.787013713965855, "grad_norm": 0.22896905378144364, "learning_rate": 3.8436131818900416e-05, "loss": 0.4646, "step": 6385 }, { "epoch": 1.7872935908200391, "grad_norm": 0.22096322501271812, "learning_rate": 3.842112491909624e-05, "loss": 0.4616, "step": 6386 }, { "epoch": 1.7875734676742234, "grad_norm": 0.2360268565352658, "learning_rate": 3.8406119121375895e-05, "loss": 0.4927, "step": 6387 }, { "epoch": 1.7878533445284075, "grad_norm": 0.2276378931920508, "learning_rate": 3.839111442716761e-05, "loss": 0.4745, "step": 6388 }, { "epoch": 1.7881332213825916, "grad_norm": 0.225107392841617, "learning_rate": 3.837611083789958e-05, "loss": 0.4779, "step": 6389 }, { "epoch": 1.7884130982367759, "grad_norm": 0.22067837867254925, "learning_rate": 3.8361108354999805e-05, "loss": 0.4652, "step": 6390 }, { "epoch": 1.78869297509096, "grad_norm": 0.22840495579109682, "learning_rate": 3.834610697989625e-05, "loss": 0.4784, "step": 6391 }, { "epoch": 1.788972851945144, "grad_norm": 0.23273210232128885, "learning_rate": 3.833110671401675e-05, "loss": 0.4944, "step": 6392 }, { "epoch": 1.7892527287993283, "grad_norm": 0.2373862840506717, "learning_rate": 3.831610755878905e-05, "loss": 0.512, "step": 6393 }, { "epoch": 1.7895326056535126, "grad_norm": 0.23747946030365935, "learning_rate": 3.830110951564077e-05, "loss": 0.4902, "step": 6394 }, { "epoch": 1.7898124825076966, "grad_norm": 0.23891899288686125, "learning_rate": 3.828611258599944e-05, "loss": 0.4834, "step": 6395 }, { "epoch": 1.7900923593618807, "grad_norm": 0.23151568879370174, "learning_rate": 3.8271116771292465e-05, "loss": 0.4863, "step": 6396 }, { "epoch": 1.790372236216065, "grad_norm": 0.2345709172082861, "learning_rate": 3.825612207294716e-05, "loss": 0.4816, "step": 6397 }, { "epoch": 1.790652113070249, "grad_norm": 0.24314278190424068, "learning_rate": 3.824112849239074e-05, "loss": 0.4779, "step": 6398 }, { "epoch": 1.7909319899244331, "grad_norm": 0.24296716811599, "learning_rate": 3.822613603105028e-05, "loss": 0.4901, "step": 6399 }, { "epoch": 1.7912118667786174, "grad_norm": 0.2340895854999377, "learning_rate": 3.821114469035279e-05, "loss": 0.4665, "step": 6400 }, { "epoch": 1.7914917436328017, "grad_norm": 0.22546872352764571, "learning_rate": 3.819615447172514e-05, "loss": 0.4745, "step": 6401 }, { "epoch": 1.7917716204869856, "grad_norm": 0.23117676908656137, "learning_rate": 3.818116537659412e-05, "loss": 0.4933, "step": 6402 }, { "epoch": 1.7920514973411699, "grad_norm": 0.223562285981355, "learning_rate": 3.816617740638638e-05, "loss": 0.4813, "step": 6403 }, { "epoch": 1.7923313741953542, "grad_norm": 0.22966780341359835, "learning_rate": 3.815119056252851e-05, "loss": 0.5076, "step": 6404 }, { "epoch": 1.7926112510495382, "grad_norm": 0.22153350027548668, "learning_rate": 3.813620484644696e-05, "loss": 0.464, "step": 6405 }, { "epoch": 1.7928911279037223, "grad_norm": 0.2291257171279058, "learning_rate": 3.812122025956807e-05, "loss": 0.4692, "step": 6406 }, { "epoch": 1.7931710047579066, "grad_norm": 0.2446160171924654, "learning_rate": 3.810623680331811e-05, "loss": 0.494, "step": 6407 }, { "epoch": 1.7934508816120907, "grad_norm": 0.22334207412154658, "learning_rate": 3.8091254479123156e-05, "loss": 0.5052, "step": 6408 }, { "epoch": 1.7937307584662747, "grad_norm": 0.2287809965363887, "learning_rate": 3.8076273288409256e-05, "loss": 0.4681, "step": 6409 }, { "epoch": 1.794010635320459, "grad_norm": 0.23459820544496301, "learning_rate": 3.806129323260236e-05, "loss": 0.5016, "step": 6410 }, { "epoch": 1.7942905121746433, "grad_norm": 0.23928913174996086, "learning_rate": 3.8046314313128253e-05, "loss": 0.4891, "step": 6411 }, { "epoch": 1.7945703890288272, "grad_norm": 0.23311269055296494, "learning_rate": 3.8031336531412636e-05, "loss": 0.4928, "step": 6412 }, { "epoch": 1.7948502658830114, "grad_norm": 0.23535177840249255, "learning_rate": 3.801635988888111e-05, "loss": 0.4765, "step": 6413 }, { "epoch": 1.7951301427371957, "grad_norm": 0.2239102737937147, "learning_rate": 3.8001384386959156e-05, "loss": 0.488, "step": 6414 }, { "epoch": 1.7954100195913798, "grad_norm": 0.23349315077746952, "learning_rate": 3.798641002707215e-05, "loss": 0.4838, "step": 6415 }, { "epoch": 1.7956898964455639, "grad_norm": 0.23442839874047128, "learning_rate": 3.7971436810645366e-05, "loss": 0.4855, "step": 6416 }, { "epoch": 1.7959697732997482, "grad_norm": 0.23515838770678582, "learning_rate": 3.795646473910396e-05, "loss": 0.4978, "step": 6417 }, { "epoch": 1.7962496501539322, "grad_norm": 0.23190432868173524, "learning_rate": 3.794149381387297e-05, "loss": 0.4781, "step": 6418 }, { "epoch": 1.7965295270081163, "grad_norm": 0.23091824593707724, "learning_rate": 3.7926524036377364e-05, "loss": 0.4752, "step": 6419 }, { "epoch": 1.7968094038623006, "grad_norm": 0.23437018190683176, "learning_rate": 3.791155540804194e-05, "loss": 0.5035, "step": 6420 }, { "epoch": 1.7970892807164849, "grad_norm": 0.21984239262466293, "learning_rate": 3.789658793029145e-05, "loss": 0.4884, "step": 6421 }, { "epoch": 1.797369157570669, "grad_norm": 0.22807071931345446, "learning_rate": 3.78816216045505e-05, "loss": 0.4783, "step": 6422 }, { "epoch": 1.797649034424853, "grad_norm": 0.22784355353303584, "learning_rate": 3.786665643224358e-05, "loss": 0.4728, "step": 6423 }, { "epoch": 1.7979289112790373, "grad_norm": 0.2347225972423178, "learning_rate": 3.785169241479509e-05, "loss": 0.5039, "step": 6424 }, { "epoch": 1.7982087881332214, "grad_norm": 0.22206278320218145, "learning_rate": 3.783672955362935e-05, "loss": 0.5007, "step": 6425 }, { "epoch": 1.7984886649874054, "grad_norm": 0.2351551878025639, "learning_rate": 3.7821767850170466e-05, "loss": 0.4912, "step": 6426 }, { "epoch": 1.7987685418415897, "grad_norm": 0.23269033290744898, "learning_rate": 3.780680730584253e-05, "loss": 0.4968, "step": 6427 }, { "epoch": 1.7990484186957738, "grad_norm": 0.22914788941688, "learning_rate": 3.779184792206951e-05, "loss": 0.475, "step": 6428 }, { "epoch": 1.7993282955499579, "grad_norm": 0.22543778209049295, "learning_rate": 3.777688970027524e-05, "loss": 0.4863, "step": 6429 }, { "epoch": 1.7996081724041422, "grad_norm": 0.22460144271306612, "learning_rate": 3.776193264188344e-05, "loss": 0.4947, "step": 6430 }, { "epoch": 1.7998880492583265, "grad_norm": 0.21557219381407622, "learning_rate": 3.774697674831775e-05, "loss": 0.4765, "step": 6431 }, { "epoch": 1.8001679261125105, "grad_norm": 0.23275502909831608, "learning_rate": 3.7732022021001664e-05, "loss": 0.4858, "step": 6432 }, { "epoch": 1.8004478029666946, "grad_norm": 0.23199607288966145, "learning_rate": 3.7717068461358596e-05, "loss": 0.4903, "step": 6433 }, { "epoch": 1.800727679820879, "grad_norm": 0.2278125739350418, "learning_rate": 3.770211607081183e-05, "loss": 0.4872, "step": 6434 }, { "epoch": 1.801007556675063, "grad_norm": 0.2379301071524677, "learning_rate": 3.7687164850784526e-05, "loss": 0.479, "step": 6435 }, { "epoch": 1.801287433529247, "grad_norm": 0.22751805524157373, "learning_rate": 3.767221480269978e-05, "loss": 0.4873, "step": 6436 }, { "epoch": 1.8015673103834313, "grad_norm": 0.2309160753830912, "learning_rate": 3.765726592798052e-05, "loss": 0.4618, "step": 6437 }, { "epoch": 1.8018471872376156, "grad_norm": 0.21758202907819693, "learning_rate": 3.764231822804958e-05, "loss": 0.4732, "step": 6438 }, { "epoch": 1.8021270640917995, "grad_norm": 0.23736044965375436, "learning_rate": 3.762737170432973e-05, "loss": 0.4818, "step": 6439 }, { "epoch": 1.8024069409459837, "grad_norm": 0.23311279739565727, "learning_rate": 3.761242635824356e-05, "loss": 0.494, "step": 6440 }, { "epoch": 1.802686817800168, "grad_norm": 0.22807656196516024, "learning_rate": 3.759748219121359e-05, "loss": 0.4803, "step": 6441 }, { "epoch": 1.802966694654352, "grad_norm": 0.2321034621612971, "learning_rate": 3.75825392046622e-05, "loss": 0.4779, "step": 6442 }, { "epoch": 1.8032465715085362, "grad_norm": 0.22768239233460297, "learning_rate": 3.7567597400011703e-05, "loss": 0.4735, "step": 6443 }, { "epoch": 1.8035264483627205, "grad_norm": 0.22936595037518426, "learning_rate": 3.7552656778684216e-05, "loss": 0.4825, "step": 6444 }, { "epoch": 1.8038063252169045, "grad_norm": 0.2237806501924362, "learning_rate": 3.753771734210183e-05, "loss": 0.4796, "step": 6445 }, { "epoch": 1.8040862020710886, "grad_norm": 0.23211481534268055, "learning_rate": 3.752277909168648e-05, "loss": 0.4845, "step": 6446 }, { "epoch": 1.804366078925273, "grad_norm": 0.23266260150977672, "learning_rate": 3.750784202886001e-05, "loss": 0.4851, "step": 6447 }, { "epoch": 1.8046459557794572, "grad_norm": 0.23755488035578567, "learning_rate": 3.7492906155044114e-05, "loss": 0.5005, "step": 6448 }, { "epoch": 1.804925832633641, "grad_norm": 0.2287752327500001, "learning_rate": 3.747797147166042e-05, "loss": 0.4707, "step": 6449 }, { "epoch": 1.8052057094878253, "grad_norm": 0.22820070453364244, "learning_rate": 3.746303798013041e-05, "loss": 0.4666, "step": 6450 }, { "epoch": 1.8054855863420096, "grad_norm": 0.22947505886552355, "learning_rate": 3.744810568187545e-05, "loss": 0.489, "step": 6451 }, { "epoch": 1.8057654631961937, "grad_norm": 0.22844334265879734, "learning_rate": 3.7433174578316835e-05, "loss": 0.5023, "step": 6452 }, { "epoch": 1.8060453400503778, "grad_norm": 0.21741136174269188, "learning_rate": 3.741824467087569e-05, "loss": 0.475, "step": 6453 }, { "epoch": 1.806325216904562, "grad_norm": 0.2148390706314293, "learning_rate": 3.7403315960973064e-05, "loss": 0.505, "step": 6454 }, { "epoch": 1.8066050937587461, "grad_norm": 0.2288312399873347, "learning_rate": 3.7388388450029855e-05, "loss": 0.4684, "step": 6455 }, { "epoch": 1.8068849706129302, "grad_norm": 0.2285322793435309, "learning_rate": 3.737346213946691e-05, "loss": 0.4773, "step": 6456 }, { "epoch": 1.8071648474671145, "grad_norm": 0.2239650476414393, "learning_rate": 3.7358537030704896e-05, "loss": 0.4818, "step": 6457 }, { "epoch": 1.8074447243212988, "grad_norm": 0.22043383339681774, "learning_rate": 3.734361312516442e-05, "loss": 0.508, "step": 6458 }, { "epoch": 1.8077246011754828, "grad_norm": 0.2242485484748912, "learning_rate": 3.732869042426592e-05, "loss": 0.4914, "step": 6459 }, { "epoch": 1.808004478029667, "grad_norm": 0.24177245495128274, "learning_rate": 3.731376892942976e-05, "loss": 0.4846, "step": 6460 }, { "epoch": 1.8082843548838512, "grad_norm": 0.22739999611614095, "learning_rate": 3.729884864207618e-05, "loss": 0.477, "step": 6461 }, { "epoch": 1.8085642317380353, "grad_norm": 0.23692348971431781, "learning_rate": 3.7283929563625294e-05, "loss": 0.4876, "step": 6462 }, { "epoch": 1.8088441085922193, "grad_norm": 0.22754627198776253, "learning_rate": 3.72690116954971e-05, "loss": 0.4812, "step": 6463 }, { "epoch": 1.8091239854464036, "grad_norm": 0.2282740894772259, "learning_rate": 3.725409503911151e-05, "loss": 0.4742, "step": 6464 }, { "epoch": 1.8094038623005877, "grad_norm": 0.22680885610375315, "learning_rate": 3.723917959588828e-05, "loss": 0.4716, "step": 6465 }, { "epoch": 1.8096837391547718, "grad_norm": 0.2397221881680731, "learning_rate": 3.722426536724708e-05, "loss": 0.4512, "step": 6466 }, { "epoch": 1.809963616008956, "grad_norm": 0.2357184203711619, "learning_rate": 3.720935235460745e-05, "loss": 0.5143, "step": 6467 }, { "epoch": 1.8102434928631403, "grad_norm": 0.23959450410895225, "learning_rate": 3.719444055938883e-05, "loss": 0.4687, "step": 6468 }, { "epoch": 1.8105233697173244, "grad_norm": 0.23308362789130577, "learning_rate": 3.717952998301052e-05, "loss": 0.4992, "step": 6469 }, { "epoch": 1.8108032465715085, "grad_norm": 0.231560784337764, "learning_rate": 3.716462062689172e-05, "loss": 0.4846, "step": 6470 }, { "epoch": 1.8110831234256928, "grad_norm": 0.22361674901817202, "learning_rate": 3.714971249245152e-05, "loss": 0.4763, "step": 6471 }, { "epoch": 1.8113630002798768, "grad_norm": 0.2192466550776247, "learning_rate": 3.713480558110887e-05, "loss": 0.4574, "step": 6472 }, { "epoch": 1.811642877134061, "grad_norm": 0.2245220027892542, "learning_rate": 3.711989989428263e-05, "loss": 0.4688, "step": 6473 }, { "epoch": 1.8119227539882452, "grad_norm": 0.22460989978525742, "learning_rate": 3.710499543339152e-05, "loss": 0.5207, "step": 6474 }, { "epoch": 1.8122026308424295, "grad_norm": 0.22365795712389236, "learning_rate": 3.709009219985417e-05, "loss": 0.4746, "step": 6475 }, { "epoch": 1.8124825076966133, "grad_norm": 0.22742510462896132, "learning_rate": 3.707519019508907e-05, "loss": 0.4616, "step": 6476 }, { "epoch": 1.8127623845507976, "grad_norm": 0.23270651754693447, "learning_rate": 3.706028942051461e-05, "loss": 0.4919, "step": 6477 }, { "epoch": 1.813042261404982, "grad_norm": 0.22924717727397215, "learning_rate": 3.704538987754903e-05, "loss": 0.4571, "step": 6478 }, { "epoch": 1.813322138259166, "grad_norm": 0.2324567980807715, "learning_rate": 3.7030491567610506e-05, "loss": 0.4908, "step": 6479 }, { "epoch": 1.81360201511335, "grad_norm": 0.22630809404807603, "learning_rate": 3.701559449211705e-05, "loss": 0.4715, "step": 6480 }, { "epoch": 1.8138818919675344, "grad_norm": 0.2325789468179387, "learning_rate": 3.700069865248658e-05, "loss": 0.4995, "step": 6481 }, { "epoch": 1.8141617688217184, "grad_norm": 0.22868875421283755, "learning_rate": 3.698580405013688e-05, "loss": 0.4681, "step": 6482 }, { "epoch": 1.8144416456759025, "grad_norm": 0.2228385106509269, "learning_rate": 3.697091068648564e-05, "loss": 0.4979, "step": 6483 }, { "epoch": 1.8147215225300868, "grad_norm": 0.23306944761670445, "learning_rate": 3.6956018562950415e-05, "loss": 0.4873, "step": 6484 }, { "epoch": 1.815001399384271, "grad_norm": 0.24804230272369826, "learning_rate": 3.694112768094864e-05, "loss": 0.4971, "step": 6485 }, { "epoch": 1.815281276238455, "grad_norm": 0.2140979449228239, "learning_rate": 3.692623804189764e-05, "loss": 0.4893, "step": 6486 }, { "epoch": 1.8155611530926392, "grad_norm": 0.2312693550145618, "learning_rate": 3.691134964721462e-05, "loss": 0.4692, "step": 6487 }, { "epoch": 1.8158410299468235, "grad_norm": 0.22042986305597487, "learning_rate": 3.689646249831668e-05, "loss": 0.4956, "step": 6488 }, { "epoch": 1.8161209068010076, "grad_norm": 0.23717865677569938, "learning_rate": 3.688157659662076e-05, "loss": 0.5061, "step": 6489 }, { "epoch": 1.8164007836551916, "grad_norm": 0.22830538337239978, "learning_rate": 3.686669194354371e-05, "loss": 0.4825, "step": 6490 }, { "epoch": 1.816680660509376, "grad_norm": 0.23548809670770163, "learning_rate": 3.6851808540502295e-05, "loss": 0.4824, "step": 6491 }, { "epoch": 1.81696053736356, "grad_norm": 0.23247411953207245, "learning_rate": 3.6836926388913095e-05, "loss": 0.4702, "step": 6492 }, { "epoch": 1.817240414217744, "grad_norm": 0.2322560620840003, "learning_rate": 3.682204549019261e-05, "loss": 0.4958, "step": 6493 }, { "epoch": 1.8175202910719284, "grad_norm": 0.2367174302539643, "learning_rate": 3.680716584575721e-05, "loss": 0.4842, "step": 6494 }, { "epoch": 1.8178001679261127, "grad_norm": 0.2262330883899346, "learning_rate": 3.679228745702315e-05, "loss": 0.4709, "step": 6495 }, { "epoch": 1.8180800447802967, "grad_norm": 0.23941925398908787, "learning_rate": 3.677741032540656e-05, "loss": 0.4847, "step": 6496 }, { "epoch": 1.8183599216344808, "grad_norm": 0.23766584439123845, "learning_rate": 3.6762534452323473e-05, "loss": 0.4977, "step": 6497 }, { "epoch": 1.818639798488665, "grad_norm": 0.22628950665675004, "learning_rate": 3.674765983918975e-05, "loss": 0.4898, "step": 6498 }, { "epoch": 1.8189196753428492, "grad_norm": 0.2325501537253925, "learning_rate": 3.673278648742118e-05, "loss": 0.5121, "step": 6499 }, { "epoch": 1.8191995521970332, "grad_norm": 0.21975349812296702, "learning_rate": 3.671791439843343e-05, "loss": 0.4696, "step": 6500 }, { "epoch": 1.8194794290512175, "grad_norm": 0.22476305581145867, "learning_rate": 3.6703043573642006e-05, "loss": 0.4782, "step": 6501 }, { "epoch": 1.8197593059054016, "grad_norm": 0.2303965136921295, "learning_rate": 3.668817401446234e-05, "loss": 0.5142, "step": 6502 }, { "epoch": 1.8200391827595856, "grad_norm": 0.21528946747237063, "learning_rate": 3.667330572230974e-05, "loss": 0.4836, "step": 6503 }, { "epoch": 1.82031905961377, "grad_norm": 0.2272320800398011, "learning_rate": 3.665843869859934e-05, "loss": 0.4773, "step": 6504 }, { "epoch": 1.8205989364679542, "grad_norm": 0.22549971262125146, "learning_rate": 3.664357294474622e-05, "loss": 0.4868, "step": 6505 }, { "epoch": 1.8208788133221383, "grad_norm": 0.2268554758779282, "learning_rate": 3.6628708462165305e-05, "loss": 0.5041, "step": 6506 }, { "epoch": 1.8211586901763224, "grad_norm": 0.234173341629569, "learning_rate": 3.6613845252271384e-05, "loss": 0.4934, "step": 6507 }, { "epoch": 1.8214385670305067, "grad_norm": 0.22910008037893956, "learning_rate": 3.659898331647918e-05, "loss": 0.4912, "step": 6508 }, { "epoch": 1.8217184438846907, "grad_norm": 0.2292380469118307, "learning_rate": 3.658412265620325e-05, "loss": 0.505, "step": 6509 }, { "epoch": 1.8219983207388748, "grad_norm": 0.2117386117147549, "learning_rate": 3.656926327285803e-05, "loss": 0.484, "step": 6510 }, { "epoch": 1.822278197593059, "grad_norm": 0.22782567886548166, "learning_rate": 3.655440516785785e-05, "loss": 0.4701, "step": 6511 }, { "epoch": 1.8225580744472432, "grad_norm": 0.23506440586366945, "learning_rate": 3.6539548342616915e-05, "loss": 0.4761, "step": 6512 }, { "epoch": 1.8228379513014272, "grad_norm": 0.22249264011253417, "learning_rate": 3.65246927985493e-05, "loss": 0.4787, "step": 6513 }, { "epoch": 1.8231178281556115, "grad_norm": 0.23584814428475087, "learning_rate": 3.650983853706896e-05, "loss": 0.4827, "step": 6514 }, { "epoch": 1.8233977050097958, "grad_norm": 0.23505181554847834, "learning_rate": 3.6494985559589756e-05, "loss": 0.4771, "step": 6515 }, { "epoch": 1.8236775818639799, "grad_norm": 0.22564307430272826, "learning_rate": 3.648013386752538e-05, "loss": 0.4863, "step": 6516 }, { "epoch": 1.823957458718164, "grad_norm": 0.22307986252388057, "learning_rate": 3.6465283462289425e-05, "loss": 0.4731, "step": 6517 }, { "epoch": 1.8242373355723482, "grad_norm": 0.23434808973815136, "learning_rate": 3.6450434345295365e-05, "loss": 0.4793, "step": 6518 }, { "epoch": 1.8245172124265323, "grad_norm": 0.23478699045619666, "learning_rate": 3.643558651795654e-05, "loss": 0.4981, "step": 6519 }, { "epoch": 1.8247970892807164, "grad_norm": 0.2247815529834582, "learning_rate": 3.6420739981686204e-05, "loss": 0.4651, "step": 6520 }, { "epoch": 1.8250769661349007, "grad_norm": 0.22969336588368408, "learning_rate": 3.6405894737897414e-05, "loss": 0.4951, "step": 6521 }, { "epoch": 1.825356842989085, "grad_norm": 0.22075444109330236, "learning_rate": 3.639105078800319e-05, "loss": 0.4958, "step": 6522 }, { "epoch": 1.8256367198432688, "grad_norm": 0.224078144205714, "learning_rate": 3.637620813341636e-05, "loss": 0.4938, "step": 6523 }, { "epoch": 1.825916596697453, "grad_norm": 0.2291087961429806, "learning_rate": 3.6361366775549656e-05, "loss": 0.4964, "step": 6524 }, { "epoch": 1.8261964735516374, "grad_norm": 0.2433034533933429, "learning_rate": 3.6346526715815705e-05, "loss": 0.4832, "step": 6525 }, { "epoch": 1.8264763504058215, "grad_norm": 0.23054066820239014, "learning_rate": 3.633168795562698e-05, "loss": 0.4901, "step": 6526 }, { "epoch": 1.8267562272600055, "grad_norm": 0.22596352161533168, "learning_rate": 3.631685049639586e-05, "loss": 0.491, "step": 6527 }, { "epoch": 1.8270361041141898, "grad_norm": 0.23062114972409564, "learning_rate": 3.630201433953456e-05, "loss": 0.4836, "step": 6528 }, { "epoch": 1.8273159809683739, "grad_norm": 0.23658848890530146, "learning_rate": 3.6287179486455206e-05, "loss": 0.4775, "step": 6529 }, { "epoch": 1.827595857822558, "grad_norm": 0.22364569244115756, "learning_rate": 3.6272345938569777e-05, "loss": 0.4859, "step": 6530 }, { "epoch": 1.8278757346767422, "grad_norm": 0.22561869333344586, "learning_rate": 3.6257513697290145e-05, "loss": 0.4676, "step": 6531 }, { "epoch": 1.8281556115309265, "grad_norm": 0.23534361999312964, "learning_rate": 3.624268276402806e-05, "loss": 0.4816, "step": 6532 }, { "epoch": 1.8284354883851104, "grad_norm": 0.22658793404892805, "learning_rate": 3.622785314019513e-05, "loss": 0.4779, "step": 6533 }, { "epoch": 1.8287153652392947, "grad_norm": 0.23492950159751907, "learning_rate": 3.621302482720284e-05, "loss": 0.4906, "step": 6534 }, { "epoch": 1.828995242093479, "grad_norm": 0.23279797475181524, "learning_rate": 3.619819782646256e-05, "loss": 0.4961, "step": 6535 }, { "epoch": 1.829275118947663, "grad_norm": 0.2425314395695167, "learning_rate": 3.6183372139385546e-05, "loss": 0.4855, "step": 6536 }, { "epoch": 1.829554995801847, "grad_norm": 0.22813066719609001, "learning_rate": 3.61685477673829e-05, "loss": 0.4988, "step": 6537 }, { "epoch": 1.8298348726560314, "grad_norm": 0.24460833182073274, "learning_rate": 3.615372471186562e-05, "loss": 0.4935, "step": 6538 }, { "epoch": 1.8301147495102155, "grad_norm": 0.225970454542716, "learning_rate": 3.613890297424457e-05, "loss": 0.4664, "step": 6539 }, { "epoch": 1.8303946263643995, "grad_norm": 0.22757166180289132, "learning_rate": 3.612408255593049e-05, "loss": 0.4829, "step": 6540 }, { "epoch": 1.8306745032185838, "grad_norm": 0.2276845648399905, "learning_rate": 3.6109263458333995e-05, "loss": 0.4789, "step": 6541 }, { "epoch": 1.8309543800727681, "grad_norm": 0.23151724478412966, "learning_rate": 3.609444568286559e-05, "loss": 0.47, "step": 6542 }, { "epoch": 1.8312342569269522, "grad_norm": 0.23053407200540227, "learning_rate": 3.607962923093563e-05, "loss": 0.5052, "step": 6543 }, { "epoch": 1.8315141337811363, "grad_norm": 0.2377910574957702, "learning_rate": 3.606481410395435e-05, "loss": 0.4953, "step": 6544 }, { "epoch": 1.8317940106353205, "grad_norm": 0.23321664208838241, "learning_rate": 3.605000030333185e-05, "loss": 0.5014, "step": 6545 }, { "epoch": 1.8320738874895046, "grad_norm": 0.23429185721320184, "learning_rate": 3.6035187830478144e-05, "loss": 0.5034, "step": 6546 }, { "epoch": 1.8323537643436887, "grad_norm": 0.230345886930467, "learning_rate": 3.602037668680308e-05, "loss": 0.4694, "step": 6547 }, { "epoch": 1.832633641197873, "grad_norm": 0.21593717678789537, "learning_rate": 3.6005566873716376e-05, "loss": 0.4658, "step": 6548 }, { "epoch": 1.832913518052057, "grad_norm": 0.21838213114269892, "learning_rate": 3.5990758392627655e-05, "loss": 0.481, "step": 6549 }, { "epoch": 1.8331933949062411, "grad_norm": 0.2223844140768618, "learning_rate": 3.597595124494639e-05, "loss": 0.4811, "step": 6550 }, { "epoch": 1.8334732717604254, "grad_norm": 0.2237573796585568, "learning_rate": 3.596114543208194e-05, "loss": 0.4745, "step": 6551 }, { "epoch": 1.8337531486146097, "grad_norm": 0.22706248415344202, "learning_rate": 3.594634095544353e-05, "loss": 0.4772, "step": 6552 }, { "epoch": 1.8340330254687938, "grad_norm": 0.2399243951861105, "learning_rate": 3.593153781644024e-05, "loss": 0.4908, "step": 6553 }, { "epoch": 1.8343129023229778, "grad_norm": 0.22794553449691923, "learning_rate": 3.5916736016481065e-05, "loss": 0.4951, "step": 6554 }, { "epoch": 1.8345927791771621, "grad_norm": 0.21512098560738155, "learning_rate": 3.5901935556974834e-05, "loss": 0.474, "step": 6555 }, { "epoch": 1.8348726560313462, "grad_norm": 0.2297676345335937, "learning_rate": 3.588713643933027e-05, "loss": 0.4825, "step": 6556 }, { "epoch": 1.8351525328855303, "grad_norm": 0.22045759188560637, "learning_rate": 3.587233866495596e-05, "loss": 0.4857, "step": 6557 }, { "epoch": 1.8354324097397146, "grad_norm": 0.23160230731971757, "learning_rate": 3.5857542235260354e-05, "loss": 0.4833, "step": 6558 }, { "epoch": 1.8357122865938988, "grad_norm": 0.23279517202259958, "learning_rate": 3.584274715165179e-05, "loss": 0.518, "step": 6559 }, { "epoch": 1.8359921634480827, "grad_norm": 0.22913301949792936, "learning_rate": 3.5827953415538495e-05, "loss": 0.493, "step": 6560 }, { "epoch": 1.836272040302267, "grad_norm": 0.22332197541170207, "learning_rate": 3.581316102832851e-05, "loss": 0.4582, "step": 6561 }, { "epoch": 1.8365519171564513, "grad_norm": 0.21537955298112685, "learning_rate": 3.579836999142981e-05, "loss": 0.479, "step": 6562 }, { "epoch": 1.8368317940106353, "grad_norm": 0.22208188083115105, "learning_rate": 3.578358030625021e-05, "loss": 0.4949, "step": 6563 }, { "epoch": 1.8371116708648194, "grad_norm": 0.221294918604475, "learning_rate": 3.576879197419738e-05, "loss": 0.4923, "step": 6564 }, { "epoch": 1.8373915477190037, "grad_norm": 0.23670146131103706, "learning_rate": 3.57540049966789e-05, "loss": 0.4874, "step": 6565 }, { "epoch": 1.8376714245731878, "grad_norm": 0.22769863797499706, "learning_rate": 3.5739219375102195e-05, "loss": 0.4438, "step": 6566 }, { "epoch": 1.8379513014273718, "grad_norm": 0.23301064843160793, "learning_rate": 3.5724435110874575e-05, "loss": 0.4984, "step": 6567 }, { "epoch": 1.8382311782815561, "grad_norm": 0.2361435753693997, "learning_rate": 3.570965220540321e-05, "loss": 0.4968, "step": 6568 }, { "epoch": 1.8385110551357404, "grad_norm": 0.22412440380364346, "learning_rate": 3.5694870660095155e-05, "loss": 0.4815, "step": 6569 }, { "epoch": 1.8387909319899243, "grad_norm": 0.23031003962032542, "learning_rate": 3.568009047635732e-05, "loss": 0.4977, "step": 6570 }, { "epoch": 1.8390708088441086, "grad_norm": 0.2265965665561193, "learning_rate": 3.5665311655596477e-05, "loss": 0.4907, "step": 6571 }, { "epoch": 1.8393506856982929, "grad_norm": 0.21941080840045463, "learning_rate": 3.5650534199219296e-05, "loss": 0.4874, "step": 6572 }, { "epoch": 1.839630562552477, "grad_norm": 0.2307372989342663, "learning_rate": 3.563575810863231e-05, "loss": 0.5001, "step": 6573 }, { "epoch": 1.839910439406661, "grad_norm": 0.23681930184012304, "learning_rate": 3.562098338524189e-05, "loss": 0.4887, "step": 6574 }, { "epoch": 1.8401903162608453, "grad_norm": 0.22566788746641836, "learning_rate": 3.5606210030454336e-05, "loss": 0.4873, "step": 6575 }, { "epoch": 1.8404701931150294, "grad_norm": 0.23045563489193585, "learning_rate": 3.5591438045675754e-05, "loss": 0.4746, "step": 6576 }, { "epoch": 1.8407500699692134, "grad_norm": 0.22970062919591314, "learning_rate": 3.557666743231216e-05, "loss": 0.4868, "step": 6577 }, { "epoch": 1.8410299468233977, "grad_norm": 0.232551428834001, "learning_rate": 3.556189819176945e-05, "loss": 0.5005, "step": 6578 }, { "epoch": 1.841309823677582, "grad_norm": 0.2512464483416976, "learning_rate": 3.554713032545334e-05, "loss": 0.5052, "step": 6579 }, { "epoch": 1.841589700531766, "grad_norm": 0.2561037995866109, "learning_rate": 3.5532363834769466e-05, "loss": 0.4866, "step": 6580 }, { "epoch": 1.8418695773859501, "grad_norm": 0.23349099698411724, "learning_rate": 3.551759872112329e-05, "loss": 0.4706, "step": 6581 }, { "epoch": 1.8421494542401344, "grad_norm": 0.2400292340362861, "learning_rate": 3.550283498592018e-05, "loss": 0.4975, "step": 6582 }, { "epoch": 1.8424293310943185, "grad_norm": 0.23739387080033075, "learning_rate": 3.548807263056535e-05, "loss": 0.472, "step": 6583 }, { "epoch": 1.8427092079485026, "grad_norm": 0.23018052737349742, "learning_rate": 3.547331165646389e-05, "loss": 0.4953, "step": 6584 }, { "epoch": 1.8429890848026869, "grad_norm": 0.23040655492611717, "learning_rate": 3.545855206502076e-05, "loss": 0.4842, "step": 6585 }, { "epoch": 1.843268961656871, "grad_norm": 0.22808817307408025, "learning_rate": 3.544379385764077e-05, "loss": 0.4702, "step": 6586 }, { "epoch": 1.843548838511055, "grad_norm": 0.22607561763707232, "learning_rate": 3.542903703572863e-05, "loss": 0.4764, "step": 6587 }, { "epoch": 1.8438287153652393, "grad_norm": 0.22690591780656705, "learning_rate": 3.541428160068893e-05, "loss": 0.4811, "step": 6588 }, { "epoch": 1.8441085922194236, "grad_norm": 0.22569890558528055, "learning_rate": 3.539952755392605e-05, "loss": 0.4887, "step": 6589 }, { "epoch": 1.8443884690736077, "grad_norm": 0.22791151144576147, "learning_rate": 3.538477489684431e-05, "loss": 0.4849, "step": 6590 }, { "epoch": 1.8446683459277917, "grad_norm": 0.2309855937697562, "learning_rate": 3.537002363084788e-05, "loss": 0.4751, "step": 6591 }, { "epoch": 1.844948222781976, "grad_norm": 0.22107399622057003, "learning_rate": 3.535527375734078e-05, "loss": 0.4722, "step": 6592 }, { "epoch": 1.84522809963616, "grad_norm": 0.22591956207176844, "learning_rate": 3.534052527772692e-05, "loss": 0.4705, "step": 6593 }, { "epoch": 1.8455079764903441, "grad_norm": 0.22394642942919243, "learning_rate": 3.5325778193410066e-05, "loss": 0.4834, "step": 6594 }, { "epoch": 1.8457878533445284, "grad_norm": 0.23104025724382923, "learning_rate": 3.5311032505793875e-05, "loss": 0.4872, "step": 6595 }, { "epoch": 1.8460677301987127, "grad_norm": 0.23423258362383212, "learning_rate": 3.5296288216281816e-05, "loss": 0.4748, "step": 6596 }, { "epoch": 1.8463476070528966, "grad_norm": 0.2291913005070103, "learning_rate": 3.528154532627728e-05, "loss": 0.4748, "step": 6597 }, { "epoch": 1.8466274839070809, "grad_norm": 0.22803791237575421, "learning_rate": 3.5266803837183507e-05, "loss": 0.4768, "step": 6598 }, { "epoch": 1.8469073607612652, "grad_norm": 0.23021079120937982, "learning_rate": 3.525206375040358e-05, "loss": 0.4951, "step": 6599 }, { "epoch": 1.8471872376154492, "grad_norm": 0.23460469932875438, "learning_rate": 3.5237325067340485e-05, "loss": 0.4734, "step": 6600 }, { "epoch": 1.8474671144696333, "grad_norm": 0.21942483725409273, "learning_rate": 3.5222587789397046e-05, "loss": 0.4691, "step": 6601 }, { "epoch": 1.8477469913238176, "grad_norm": 0.2435986519977707, "learning_rate": 3.520785191797598e-05, "loss": 0.5043, "step": 6602 }, { "epoch": 1.8480268681780017, "grad_norm": 0.22883687635578676, "learning_rate": 3.519311745447985e-05, "loss": 0.501, "step": 6603 }, { "epoch": 1.8483067450321857, "grad_norm": 0.23179054577381408, "learning_rate": 3.517838440031107e-05, "loss": 0.4677, "step": 6604 }, { "epoch": 1.84858662188637, "grad_norm": 0.21903980354418212, "learning_rate": 3.516365275687198e-05, "loss": 0.4802, "step": 6605 }, { "epoch": 1.8488664987405543, "grad_norm": 0.22525928956838956, "learning_rate": 3.514892252556474e-05, "loss": 0.5016, "step": 6606 }, { "epoch": 1.8491463755947382, "grad_norm": 0.23713793510283132, "learning_rate": 3.513419370779134e-05, "loss": 0.5043, "step": 6607 }, { "epoch": 1.8494262524489224, "grad_norm": 0.2277633125686585, "learning_rate": 3.511946630495371e-05, "loss": 0.4862, "step": 6608 }, { "epoch": 1.8497061293031067, "grad_norm": 0.22653383162702354, "learning_rate": 3.51047403184536e-05, "loss": 0.4469, "step": 6609 }, { "epoch": 1.8499860061572908, "grad_norm": 0.22371585729474508, "learning_rate": 3.509001574969265e-05, "loss": 0.4788, "step": 6610 }, { "epoch": 1.8502658830114749, "grad_norm": 0.23568903915808184, "learning_rate": 3.507529260007234e-05, "loss": 0.473, "step": 6611 }, { "epoch": 1.8505457598656592, "grad_norm": 0.23328271347663027, "learning_rate": 3.506057087099404e-05, "loss": 0.5062, "step": 6612 }, { "epoch": 1.8508256367198432, "grad_norm": 0.22975755415061422, "learning_rate": 3.504585056385896e-05, "loss": 0.4913, "step": 6613 }, { "epoch": 1.8511055135740273, "grad_norm": 0.23899740895978278, "learning_rate": 3.50311316800682e-05, "loss": 0.4964, "step": 6614 }, { "epoch": 1.8513853904282116, "grad_norm": 0.2324594531990676, "learning_rate": 3.501641422102271e-05, "loss": 0.4926, "step": 6615 }, { "epoch": 1.8516652672823959, "grad_norm": 0.2240877633476286, "learning_rate": 3.500169818812329e-05, "loss": 0.4939, "step": 6616 }, { "epoch": 1.85194514413658, "grad_norm": 0.21802692671464374, "learning_rate": 3.498698358277064e-05, "loss": 0.4685, "step": 6617 }, { "epoch": 1.852225020990764, "grad_norm": 0.23417188850486598, "learning_rate": 3.497227040636529e-05, "loss": 0.4927, "step": 6618 }, { "epoch": 1.8525048978449483, "grad_norm": 0.22578949964344666, "learning_rate": 3.4957558660307655e-05, "loss": 0.4668, "step": 6619 }, { "epoch": 1.8527847746991324, "grad_norm": 0.2337330880335226, "learning_rate": 3.494284834599801e-05, "loss": 0.4906, "step": 6620 }, { "epoch": 1.8530646515533165, "grad_norm": 0.21849890336454536, "learning_rate": 3.4928139464836484e-05, "loss": 0.4718, "step": 6621 }, { "epoch": 1.8533445284075007, "grad_norm": 0.22638760452896164, "learning_rate": 3.491343201822307e-05, "loss": 0.4633, "step": 6622 }, { "epoch": 1.8536244052616848, "grad_norm": 0.23730653988761322, "learning_rate": 3.489872600755765e-05, "loss": 0.4808, "step": 6623 }, { "epoch": 1.8539042821158689, "grad_norm": 0.23330392054852062, "learning_rate": 3.488402143423997e-05, "loss": 0.4954, "step": 6624 }, { "epoch": 1.8541841589700532, "grad_norm": 0.2352002684184638, "learning_rate": 3.486931829966956e-05, "loss": 0.4723, "step": 6625 }, { "epoch": 1.8544640358242375, "grad_norm": 0.2262474444702206, "learning_rate": 3.48546166052459e-05, "loss": 0.4855, "step": 6626 }, { "epoch": 1.8547439126784215, "grad_norm": 0.23792312337110524, "learning_rate": 3.483991635236832e-05, "loss": 0.4909, "step": 6627 }, { "epoch": 1.8550237895326056, "grad_norm": 0.23675326339490255, "learning_rate": 3.4825217542435976e-05, "loss": 0.4925, "step": 6628 }, { "epoch": 1.85530366638679, "grad_norm": 0.2322328205565557, "learning_rate": 3.4810520176847926e-05, "loss": 0.478, "step": 6629 }, { "epoch": 1.855583543240974, "grad_norm": 0.23451994931984804, "learning_rate": 3.4795824257003066e-05, "loss": 0.505, "step": 6630 }, { "epoch": 1.855863420095158, "grad_norm": 0.23738871132729128, "learning_rate": 3.4781129784300173e-05, "loss": 0.4692, "step": 6631 }, { "epoch": 1.8561432969493423, "grad_norm": 0.23858813090463596, "learning_rate": 3.4766436760137855e-05, "loss": 0.5056, "step": 6632 }, { "epoch": 1.8564231738035264, "grad_norm": 0.23120069797033554, "learning_rate": 3.4751745185914616e-05, "loss": 0.4864, "step": 6633 }, { "epoch": 1.8567030506577105, "grad_norm": 0.2311163610037621, "learning_rate": 3.47370550630288e-05, "loss": 0.4959, "step": 6634 }, { "epoch": 1.8569829275118948, "grad_norm": 0.23299874761747652, "learning_rate": 3.472236639287863e-05, "loss": 0.4871, "step": 6635 }, { "epoch": 1.857262804366079, "grad_norm": 0.23315580319021786, "learning_rate": 3.470767917686218e-05, "loss": 0.4946, "step": 6636 }, { "epoch": 1.8575426812202631, "grad_norm": 0.21697351826870254, "learning_rate": 3.4692993416377384e-05, "loss": 0.4817, "step": 6637 }, { "epoch": 1.8578225580744472, "grad_norm": 0.23157225537121584, "learning_rate": 3.4678309112822045e-05, "loss": 0.5037, "step": 6638 }, { "epoch": 1.8581024349286315, "grad_norm": 0.21691501081625855, "learning_rate": 3.4663626267593804e-05, "loss": 0.482, "step": 6639 }, { "epoch": 1.8583823117828155, "grad_norm": 0.2255015824527897, "learning_rate": 3.464894488209022e-05, "loss": 0.4784, "step": 6640 }, { "epoch": 1.8586621886369996, "grad_norm": 0.2270068270032285, "learning_rate": 3.463426495770865e-05, "loss": 0.4746, "step": 6641 }, { "epoch": 1.858942065491184, "grad_norm": 0.23644966889917676, "learning_rate": 3.4619586495846357e-05, "loss": 0.4982, "step": 6642 }, { "epoch": 1.8592219423453682, "grad_norm": 0.23171323603454722, "learning_rate": 3.460490949790041e-05, "loss": 0.4941, "step": 6643 }, { "epoch": 1.859501819199552, "grad_norm": 0.2429913281245455, "learning_rate": 3.45902339652678e-05, "loss": 0.4936, "step": 6644 }, { "epoch": 1.8597816960537363, "grad_norm": 0.23158854033323945, "learning_rate": 3.457555989934533e-05, "loss": 0.4722, "step": 6645 }, { "epoch": 1.8600615729079206, "grad_norm": 0.2372162625718152, "learning_rate": 3.456088730152972e-05, "loss": 0.4812, "step": 6646 }, { "epoch": 1.8603414497621047, "grad_norm": 0.2380636609178275, "learning_rate": 3.454621617321749e-05, "loss": 0.4957, "step": 6647 }, { "epoch": 1.8606213266162888, "grad_norm": 0.23527489381309008, "learning_rate": 3.4531546515805056e-05, "loss": 0.4883, "step": 6648 }, { "epoch": 1.860901203470473, "grad_norm": 0.22678876118962954, "learning_rate": 3.4516878330688684e-05, "loss": 0.4744, "step": 6649 }, { "epoch": 1.8611810803246571, "grad_norm": 0.23155560539218384, "learning_rate": 3.45022116192645e-05, "loss": 0.4848, "step": 6650 }, { "epoch": 1.8614609571788412, "grad_norm": 0.22284067438286498, "learning_rate": 3.4487546382928495e-05, "loss": 0.477, "step": 6651 }, { "epoch": 1.8617408340330255, "grad_norm": 0.23531975822926823, "learning_rate": 3.44728826230765e-05, "loss": 0.4859, "step": 6652 }, { "epoch": 1.8620207108872098, "grad_norm": 0.22356410580624714, "learning_rate": 3.445822034110422e-05, "loss": 0.4721, "step": 6653 }, { "epoch": 1.8623005877413936, "grad_norm": 0.23753243385983955, "learning_rate": 3.444355953840724e-05, "loss": 0.4741, "step": 6654 }, { "epoch": 1.862580464595578, "grad_norm": 0.22271276532637385, "learning_rate": 3.442890021638097e-05, "loss": 0.4554, "step": 6655 }, { "epoch": 1.8628603414497622, "grad_norm": 0.23500052714654035, "learning_rate": 3.441424237642068e-05, "loss": 0.4737, "step": 6656 }, { "epoch": 1.8631402183039463, "grad_norm": 0.2211684202951456, "learning_rate": 3.4399586019921534e-05, "loss": 0.494, "step": 6657 }, { "epoch": 1.8634200951581303, "grad_norm": 0.228229708890135, "learning_rate": 3.438493114827852e-05, "loss": 0.5004, "step": 6658 }, { "epoch": 1.8636999720123146, "grad_norm": 0.2304615792709606, "learning_rate": 3.437027776288651e-05, "loss": 0.4975, "step": 6659 }, { "epoch": 1.8639798488664987, "grad_norm": 0.22220549585512975, "learning_rate": 3.4355625865140216e-05, "loss": 0.4695, "step": 6660 }, { "epoch": 1.8642597257206828, "grad_norm": 0.22582991796289023, "learning_rate": 3.4340975456434194e-05, "loss": 0.4763, "step": 6661 }, { "epoch": 1.864539602574867, "grad_norm": 0.2339641001908726, "learning_rate": 3.432632653816289e-05, "loss": 0.5006, "step": 6662 }, { "epoch": 1.8648194794290514, "grad_norm": 0.24080089659130607, "learning_rate": 3.43116791117206e-05, "loss": 0.4743, "step": 6663 }, { "epoch": 1.8650993562832354, "grad_norm": 0.22493271396098588, "learning_rate": 3.429703317850147e-05, "loss": 0.4905, "step": 6664 }, { "epoch": 1.8653792331374195, "grad_norm": 0.22651811462105334, "learning_rate": 3.428238873989952e-05, "loss": 0.4594, "step": 6665 }, { "epoch": 1.8656591099916038, "grad_norm": 0.22980238210861795, "learning_rate": 3.4267745797308603e-05, "loss": 0.4772, "step": 6666 }, { "epoch": 1.8659389868457879, "grad_norm": 0.22222766889007314, "learning_rate": 3.4253104352122455e-05, "loss": 0.4574, "step": 6667 }, { "epoch": 1.866218863699972, "grad_norm": 0.22346612379582007, "learning_rate": 3.423846440573464e-05, "loss": 0.4651, "step": 6668 }, { "epoch": 1.8664987405541562, "grad_norm": 0.2391847394314958, "learning_rate": 3.4223825959538605e-05, "loss": 0.509, "step": 6669 }, { "epoch": 1.8667786174083403, "grad_norm": 0.2378791997329002, "learning_rate": 3.420918901492765e-05, "loss": 0.5103, "step": 6670 }, { "epoch": 1.8670584942625243, "grad_norm": 0.2291997970568722, "learning_rate": 3.4194553573294915e-05, "loss": 0.5246, "step": 6671 }, { "epoch": 1.8673383711167086, "grad_norm": 0.23050602087609529, "learning_rate": 3.417991963603343e-05, "loss": 0.4878, "step": 6672 }, { "epoch": 1.867618247970893, "grad_norm": 0.22618728410437525, "learning_rate": 3.416528720453604e-05, "loss": 0.458, "step": 6673 }, { "epoch": 1.867898124825077, "grad_norm": 0.22957725079065297, "learning_rate": 3.415065628019547e-05, "loss": 0.486, "step": 6674 }, { "epoch": 1.868178001679261, "grad_norm": 0.21686842573351836, "learning_rate": 3.4136026864404317e-05, "loss": 0.474, "step": 6675 }, { "epoch": 1.8684578785334454, "grad_norm": 0.22803966934711845, "learning_rate": 3.412139895855501e-05, "loss": 0.4867, "step": 6676 }, { "epoch": 1.8687377553876294, "grad_norm": 0.22594656416803122, "learning_rate": 3.4106772564039836e-05, "loss": 0.5091, "step": 6677 }, { "epoch": 1.8690176322418135, "grad_norm": 0.22327474274885217, "learning_rate": 3.4092147682250955e-05, "loss": 0.508, "step": 6678 }, { "epoch": 1.8692975090959978, "grad_norm": 0.23355716789072498, "learning_rate": 3.407752431458036e-05, "loss": 0.4902, "step": 6679 }, { "epoch": 1.869577385950182, "grad_norm": 0.2274599028974708, "learning_rate": 3.4062902462419885e-05, "loss": 0.5023, "step": 6680 }, { "epoch": 1.869857262804366, "grad_norm": 0.23691963820836076, "learning_rate": 3.404828212716129e-05, "loss": 0.4974, "step": 6681 }, { "epoch": 1.8701371396585502, "grad_norm": 0.22293792749485655, "learning_rate": 3.403366331019613e-05, "loss": 0.4766, "step": 6682 }, { "epoch": 1.8704170165127345, "grad_norm": 0.2243697088365475, "learning_rate": 3.4019046012915836e-05, "loss": 0.4759, "step": 6683 }, { "epoch": 1.8706968933669186, "grad_norm": 0.23707864223862304, "learning_rate": 3.400443023671169e-05, "loss": 0.5076, "step": 6684 }, { "epoch": 1.8709767702211026, "grad_norm": 0.22529443944272834, "learning_rate": 3.398981598297482e-05, "loss": 0.4754, "step": 6685 }, { "epoch": 1.871256647075287, "grad_norm": 0.23318736667270665, "learning_rate": 3.397520325309623e-05, "loss": 0.5036, "step": 6686 }, { "epoch": 1.871536523929471, "grad_norm": 0.22390308841712703, "learning_rate": 3.3960592048466764e-05, "loss": 0.4811, "step": 6687 }, { "epoch": 1.871816400783655, "grad_norm": 0.22970308023106925, "learning_rate": 3.394598237047713e-05, "loss": 0.4783, "step": 6688 }, { "epoch": 1.8720962776378394, "grad_norm": 0.22221704198782882, "learning_rate": 3.3931374220517866e-05, "loss": 0.4755, "step": 6689 }, { "epoch": 1.8723761544920237, "grad_norm": 0.22796392224910958, "learning_rate": 3.3916767599979407e-05, "loss": 0.4845, "step": 6690 }, { "epoch": 1.8726560313462075, "grad_norm": 0.22382994048146973, "learning_rate": 3.3902162510252e-05, "loss": 0.5051, "step": 6691 }, { "epoch": 1.8729359082003918, "grad_norm": 0.22864055488099297, "learning_rate": 3.388755895272578e-05, "loss": 0.4845, "step": 6692 }, { "epoch": 1.873215785054576, "grad_norm": 0.23888721778466862, "learning_rate": 3.387295692879072e-05, "loss": 0.496, "step": 6693 }, { "epoch": 1.8734956619087602, "grad_norm": 0.2272996783429827, "learning_rate": 3.3858356439836644e-05, "loss": 0.473, "step": 6694 }, { "epoch": 1.8737755387629442, "grad_norm": 0.23718106520445464, "learning_rate": 3.384375748725325e-05, "loss": 0.479, "step": 6695 }, { "epoch": 1.8740554156171285, "grad_norm": 0.2319471396994759, "learning_rate": 3.382916007243007e-05, "loss": 0.472, "step": 6696 }, { "epoch": 1.8743352924713126, "grad_norm": 0.22802839646117284, "learning_rate": 3.381456419675645e-05, "loss": 0.4745, "step": 6697 }, { "epoch": 1.8746151693254967, "grad_norm": 0.23381792574372254, "learning_rate": 3.37999698616217e-05, "loss": 0.4878, "step": 6698 }, { "epoch": 1.874895046179681, "grad_norm": 0.23649436863047152, "learning_rate": 3.3785377068414865e-05, "loss": 0.4807, "step": 6699 }, { "epoch": 1.8751749230338652, "grad_norm": 0.23406065361013736, "learning_rate": 3.377078581852492e-05, "loss": 0.4652, "step": 6700 }, { "epoch": 1.8754547998880493, "grad_norm": 0.25478096416382523, "learning_rate": 3.375619611334067e-05, "loss": 0.467, "step": 6701 }, { "epoch": 1.8757346767422334, "grad_norm": 0.22884520788962845, "learning_rate": 3.3741607954250765e-05, "loss": 0.4725, "step": 6702 }, { "epoch": 1.8760145535964177, "grad_norm": 0.2500879542893693, "learning_rate": 3.3727021342643714e-05, "loss": 0.4955, "step": 6703 }, { "epoch": 1.8762944304506017, "grad_norm": 0.2315372897410905, "learning_rate": 3.371243627990788e-05, "loss": 0.4754, "step": 6704 }, { "epoch": 1.8765743073047858, "grad_norm": 0.2343821662968564, "learning_rate": 3.3697852767431484e-05, "loss": 0.4614, "step": 6705 }, { "epoch": 1.87685418415897, "grad_norm": 0.22417983360116148, "learning_rate": 3.3683270806602575e-05, "loss": 0.4779, "step": 6706 }, { "epoch": 1.8771340610131542, "grad_norm": 0.2269968776273528, "learning_rate": 3.366869039880909e-05, "loss": 0.494, "step": 6707 }, { "epoch": 1.8774139378673382, "grad_norm": 0.22291882165936222, "learning_rate": 3.365411154543878e-05, "loss": 0.4575, "step": 6708 }, { "epoch": 1.8776938147215225, "grad_norm": 0.22856643072056357, "learning_rate": 3.36395342478793e-05, "loss": 0.4682, "step": 6709 }, { "epoch": 1.8779736915757068, "grad_norm": 0.22809095891160383, "learning_rate": 3.36249585075181e-05, "loss": 0.477, "step": 6710 }, { "epoch": 1.8782535684298909, "grad_norm": 0.22944389585931602, "learning_rate": 3.3610384325742514e-05, "loss": 0.4827, "step": 6711 }, { "epoch": 1.878533445284075, "grad_norm": 0.23926445809624353, "learning_rate": 3.3595811703939726e-05, "loss": 0.483, "step": 6712 }, { "epoch": 1.8788133221382592, "grad_norm": 0.2270824439574971, "learning_rate": 3.3581240643496763e-05, "loss": 0.4928, "step": 6713 }, { "epoch": 1.8790931989924433, "grad_norm": 0.23341885127196485, "learning_rate": 3.356667114580052e-05, "loss": 0.4832, "step": 6714 }, { "epoch": 1.8793730758466274, "grad_norm": 0.2264185400000116, "learning_rate": 3.355210321223769e-05, "loss": 0.4941, "step": 6715 }, { "epoch": 1.8796529527008117, "grad_norm": 0.22818613255724515, "learning_rate": 3.353753684419489e-05, "loss": 0.4886, "step": 6716 }, { "epoch": 1.879932829554996, "grad_norm": 0.25188605457591406, "learning_rate": 3.3522972043058555e-05, "loss": 0.4732, "step": 6717 }, { "epoch": 1.8802127064091798, "grad_norm": 0.22689699208048122, "learning_rate": 3.350840881021496e-05, "loss": 0.4806, "step": 6718 }, { "epoch": 1.880492583263364, "grad_norm": 0.22009302848574322, "learning_rate": 3.349384714705024e-05, "loss": 0.509, "step": 6719 }, { "epoch": 1.8807724601175484, "grad_norm": 0.21810958134175087, "learning_rate": 3.347928705495039e-05, "loss": 0.4671, "step": 6720 }, { "epoch": 1.8810523369717325, "grad_norm": 0.22203103242697145, "learning_rate": 3.346472853530125e-05, "loss": 0.4761, "step": 6721 }, { "epoch": 1.8813322138259165, "grad_norm": 0.23112227639094682, "learning_rate": 3.34501715894885e-05, "loss": 0.4864, "step": 6722 }, { "epoch": 1.8816120906801008, "grad_norm": 0.22943863932519792, "learning_rate": 3.3435616218897674e-05, "loss": 0.5088, "step": 6723 }, { "epoch": 1.881891967534285, "grad_norm": 0.22710685521934543, "learning_rate": 3.342106242491417e-05, "loss": 0.478, "step": 6724 }, { "epoch": 1.882171844388469, "grad_norm": 0.230110155385769, "learning_rate": 3.3406510208923224e-05, "loss": 0.4833, "step": 6725 }, { "epoch": 1.8824517212426533, "grad_norm": 0.22411641904965765, "learning_rate": 3.339195957230993e-05, "loss": 0.4576, "step": 6726 }, { "epoch": 1.8827315980968375, "grad_norm": 0.22616843299935008, "learning_rate": 3.337741051645922e-05, "loss": 0.4855, "step": 6727 }, { "epoch": 1.8830114749510214, "grad_norm": 0.23439793114301566, "learning_rate": 3.3362863042755876e-05, "loss": 0.47, "step": 6728 }, { "epoch": 1.8832913518052057, "grad_norm": 0.23099348044755166, "learning_rate": 3.334831715258454e-05, "loss": 0.4729, "step": 6729 }, { "epoch": 1.88357122865939, "grad_norm": 0.247098590957284, "learning_rate": 3.33337728473297e-05, "loss": 0.4891, "step": 6730 }, { "epoch": 1.883851105513574, "grad_norm": 0.23603307439394292, "learning_rate": 3.331923012837569e-05, "loss": 0.4804, "step": 6731 }, { "epoch": 1.8841309823677581, "grad_norm": 0.2241205392850302, "learning_rate": 3.33046889971067e-05, "loss": 0.4798, "step": 6732 }, { "epoch": 1.8844108592219424, "grad_norm": 0.23273793641263346, "learning_rate": 3.329014945490675e-05, "loss": 0.4578, "step": 6733 }, { "epoch": 1.8846907360761265, "grad_norm": 0.23846937477481026, "learning_rate": 3.327561150315973e-05, "loss": 0.4626, "step": 6734 }, { "epoch": 1.8849706129303105, "grad_norm": 0.22400149776889866, "learning_rate": 3.326107514324936e-05, "loss": 0.4957, "step": 6735 }, { "epoch": 1.8852504897844948, "grad_norm": 0.24377813442499874, "learning_rate": 3.3246540376559234e-05, "loss": 0.4978, "step": 6736 }, { "epoch": 1.8855303666386791, "grad_norm": 0.22339977902939695, "learning_rate": 3.323200720447277e-05, "loss": 0.5022, "step": 6737 }, { "epoch": 1.8858102434928632, "grad_norm": 0.2412162155364134, "learning_rate": 3.3217475628373245e-05, "loss": 0.5013, "step": 6738 }, { "epoch": 1.8860901203470473, "grad_norm": 0.23513602950112825, "learning_rate": 3.320294564964379e-05, "loss": 0.4944, "step": 6739 }, { "epoch": 1.8863699972012316, "grad_norm": 0.2298080315654412, "learning_rate": 3.318841726966736e-05, "loss": 0.4745, "step": 6740 }, { "epoch": 1.8866498740554156, "grad_norm": 0.23436143477223603, "learning_rate": 3.3173890489826784e-05, "loss": 0.4821, "step": 6741 }, { "epoch": 1.8869297509095997, "grad_norm": 0.23349787346008202, "learning_rate": 3.315936531150473e-05, "loss": 0.4906, "step": 6742 }, { "epoch": 1.887209627763784, "grad_norm": 0.23196108339389324, "learning_rate": 3.314484173608371e-05, "loss": 0.487, "step": 6743 }, { "epoch": 1.887489504617968, "grad_norm": 0.22526643770602886, "learning_rate": 3.313031976494609e-05, "loss": 0.4957, "step": 6744 }, { "epoch": 1.8877693814721521, "grad_norm": 0.22093999000554704, "learning_rate": 3.3115799399474077e-05, "loss": 0.4776, "step": 6745 }, { "epoch": 1.8880492583263364, "grad_norm": 0.2289801069621866, "learning_rate": 3.310128064104974e-05, "loss": 0.4782, "step": 6746 }, { "epoch": 1.8883291351805207, "grad_norm": 0.22843801143379772, "learning_rate": 3.308676349105495e-05, "loss": 0.4753, "step": 6747 }, { "epoch": 1.8886090120347048, "grad_norm": 0.23811988591323568, "learning_rate": 3.3072247950871496e-05, "loss": 0.4804, "step": 6748 }, { "epoch": 1.8888888888888888, "grad_norm": 0.23532957675361993, "learning_rate": 3.305773402188095e-05, "loss": 0.4921, "step": 6749 }, { "epoch": 1.8891687657430731, "grad_norm": 0.22624397351583414, "learning_rate": 3.3043221705464766e-05, "loss": 0.4809, "step": 6750 }, { "epoch": 1.8894486425972572, "grad_norm": 0.23696380018089494, "learning_rate": 3.302871100300423e-05, "loss": 0.4797, "step": 6751 }, { "epoch": 1.8897285194514413, "grad_norm": 0.23025909142493592, "learning_rate": 3.301420191588048e-05, "loss": 0.4917, "step": 6752 }, { "epoch": 1.8900083963056256, "grad_norm": 0.2222611228566544, "learning_rate": 3.2999694445474494e-05, "loss": 0.4805, "step": 6753 }, { "epoch": 1.8902882731598096, "grad_norm": 0.22800570578446014, "learning_rate": 3.298518859316711e-05, "loss": 0.5008, "step": 6754 }, { "epoch": 1.8905681500139937, "grad_norm": 0.23659529528159529, "learning_rate": 3.297068436033899e-05, "loss": 0.471, "step": 6755 }, { "epoch": 1.890848026868178, "grad_norm": 0.23670723026194176, "learning_rate": 3.2956181748370674e-05, "loss": 0.4957, "step": 6756 }, { "epoch": 1.8911279037223623, "grad_norm": 0.23089372161918936, "learning_rate": 3.294168075864252e-05, "loss": 0.4883, "step": 6757 }, { "epoch": 1.8914077805765463, "grad_norm": 0.22239354153578902, "learning_rate": 3.292718139253473e-05, "loss": 0.4789, "step": 6758 }, { "epoch": 1.8916876574307304, "grad_norm": 0.2256668861696989, "learning_rate": 3.291268365142738e-05, "loss": 0.4982, "step": 6759 }, { "epoch": 1.8919675342849147, "grad_norm": 0.2429305644567666, "learning_rate": 3.289818753670035e-05, "loss": 0.4695, "step": 6760 }, { "epoch": 1.8922474111390988, "grad_norm": 0.23982032082301433, "learning_rate": 3.288369304973342e-05, "loss": 0.497, "step": 6761 }, { "epoch": 1.8925272879932828, "grad_norm": 0.2236843177274853, "learning_rate": 3.2869200191906157e-05, "loss": 0.4893, "step": 6762 }, { "epoch": 1.8928071648474671, "grad_norm": 0.2307508653621394, "learning_rate": 3.2854708964598014e-05, "loss": 0.4804, "step": 6763 }, { "epoch": 1.8930870417016514, "grad_norm": 0.23444924896112015, "learning_rate": 3.2840219369188265e-05, "loss": 0.4881, "step": 6764 }, { "epoch": 1.8933669185558353, "grad_norm": 0.22349152559402358, "learning_rate": 3.282573140705604e-05, "loss": 0.4651, "step": 6765 }, { "epoch": 1.8936467954100196, "grad_norm": 0.23378393901685646, "learning_rate": 3.2811245079580316e-05, "loss": 0.4947, "step": 6766 }, { "epoch": 1.8939266722642039, "grad_norm": 0.23377842235723953, "learning_rate": 3.27967603881399e-05, "loss": 0.4805, "step": 6767 }, { "epoch": 1.894206549118388, "grad_norm": 0.23044283625762985, "learning_rate": 3.278227733411348e-05, "loss": 0.4677, "step": 6768 }, { "epoch": 1.894486425972572, "grad_norm": 0.22821637898356942, "learning_rate": 3.276779591887952e-05, "loss": 0.489, "step": 6769 }, { "epoch": 1.8947663028267563, "grad_norm": 0.2349851898158919, "learning_rate": 3.27533161438164e-05, "loss": 0.5062, "step": 6770 }, { "epoch": 1.8950461796809404, "grad_norm": 0.22807032935111787, "learning_rate": 3.2738838010302295e-05, "loss": 0.4549, "step": 6771 }, { "epoch": 1.8953260565351244, "grad_norm": 0.23021950686549847, "learning_rate": 3.2724361519715254e-05, "loss": 0.4869, "step": 6772 }, { "epoch": 1.8956059333893087, "grad_norm": 0.2257923102895986, "learning_rate": 3.270988667343315e-05, "loss": 0.4766, "step": 6773 }, { "epoch": 1.895885810243493, "grad_norm": 0.22582074092110432, "learning_rate": 3.269541347283371e-05, "loss": 0.475, "step": 6774 }, { "epoch": 1.896165687097677, "grad_norm": 0.2542424124741826, "learning_rate": 3.268094191929451e-05, "loss": 0.4913, "step": 6775 }, { "epoch": 1.8964455639518611, "grad_norm": 0.2244689958047732, "learning_rate": 3.266647201419294e-05, "loss": 0.4724, "step": 6776 }, { "epoch": 1.8967254408060454, "grad_norm": 0.2344822286633166, "learning_rate": 3.265200375890626e-05, "loss": 0.4826, "step": 6777 }, { "epoch": 1.8970053176602295, "grad_norm": 0.22813453503741773, "learning_rate": 3.263753715481158e-05, "loss": 0.4631, "step": 6778 }, { "epoch": 1.8972851945144136, "grad_norm": 0.23215944271930963, "learning_rate": 3.262307220328583e-05, "loss": 0.4744, "step": 6779 }, { "epoch": 1.8975650713685979, "grad_norm": 0.2419283337502692, "learning_rate": 3.26086089057058e-05, "loss": 0.4807, "step": 6780 }, { "epoch": 1.897844948222782, "grad_norm": 0.23628743917134987, "learning_rate": 3.2594147263448106e-05, "loss": 0.4974, "step": 6781 }, { "epoch": 1.898124825076966, "grad_norm": 0.2259527845255626, "learning_rate": 3.257968727788922e-05, "loss": 0.4708, "step": 6782 }, { "epoch": 1.8984047019311503, "grad_norm": 0.23373185042166159, "learning_rate": 3.256522895040545e-05, "loss": 0.513, "step": 6783 }, { "epoch": 1.8986845787853346, "grad_norm": 0.22550441473117894, "learning_rate": 3.2550772282372945e-05, "loss": 0.4756, "step": 6784 }, { "epoch": 1.8989644556395187, "grad_norm": 0.23326936591686565, "learning_rate": 3.253631727516771e-05, "loss": 0.4633, "step": 6785 }, { "epoch": 1.8992443324937027, "grad_norm": 0.22180939238271521, "learning_rate": 3.2521863930165555e-05, "loss": 0.4943, "step": 6786 }, { "epoch": 1.899524209347887, "grad_norm": 0.22088803819986813, "learning_rate": 3.2507412248742195e-05, "loss": 0.4722, "step": 6787 }, { "epoch": 1.899804086202071, "grad_norm": 0.22825648960038597, "learning_rate": 3.2492962232273115e-05, "loss": 0.4788, "step": 6788 }, { "epoch": 1.9000839630562552, "grad_norm": 0.2349773301452994, "learning_rate": 3.247851388213369e-05, "loss": 0.4838, "step": 6789 }, { "epoch": 1.9003638399104394, "grad_norm": 0.22767266404411718, "learning_rate": 3.246406719969912e-05, "loss": 0.4626, "step": 6790 }, { "epoch": 1.9006437167646235, "grad_norm": 0.2253672913354678, "learning_rate": 3.2449622186344445e-05, "loss": 0.4648, "step": 6791 }, { "epoch": 1.9009235936188076, "grad_norm": 0.23378096511454644, "learning_rate": 3.243517884344456e-05, "loss": 0.4808, "step": 6792 }, { "epoch": 1.9012034704729919, "grad_norm": 0.23490868833061007, "learning_rate": 3.242073717237418e-05, "loss": 0.4832, "step": 6793 }, { "epoch": 1.9014833473271762, "grad_norm": 0.22865554902914265, "learning_rate": 3.240629717450788e-05, "loss": 0.4704, "step": 6794 }, { "epoch": 1.9017632241813602, "grad_norm": 0.22979670192876384, "learning_rate": 3.239185885122005e-05, "loss": 0.4763, "step": 6795 }, { "epoch": 1.9020431010355443, "grad_norm": 0.23161283702216043, "learning_rate": 3.2377422203884963e-05, "loss": 0.4904, "step": 6796 }, { "epoch": 1.9023229778897286, "grad_norm": 0.22735673238208506, "learning_rate": 3.236298723387669e-05, "loss": 0.4802, "step": 6797 }, { "epoch": 1.9026028547439127, "grad_norm": 0.2236196980638249, "learning_rate": 3.234855394256917e-05, "loss": 0.4883, "step": 6798 }, { "epoch": 1.9028827315980967, "grad_norm": 0.24825711839058326, "learning_rate": 3.233412233133616e-05, "loss": 0.4953, "step": 6799 }, { "epoch": 1.903162608452281, "grad_norm": 0.21784708167937517, "learning_rate": 3.231969240155127e-05, "loss": 0.477, "step": 6800 }, { "epoch": 1.9034424853064653, "grad_norm": 0.22480391886199635, "learning_rate": 3.2305264154587964e-05, "loss": 0.4774, "step": 6801 }, { "epoch": 1.9037223621606492, "grad_norm": 0.24005887709323095, "learning_rate": 3.2290837591819515e-05, "loss": 0.473, "step": 6802 }, { "epoch": 1.9040022390148335, "grad_norm": 0.23900000138365213, "learning_rate": 3.227641271461906e-05, "loss": 0.4758, "step": 6803 }, { "epoch": 1.9042821158690177, "grad_norm": 0.23707660125217198, "learning_rate": 3.226198952435956e-05, "loss": 0.4903, "step": 6804 }, { "epoch": 1.9045619927232018, "grad_norm": 0.23458362369244137, "learning_rate": 3.224756802241383e-05, "loss": 0.5005, "step": 6805 }, { "epoch": 1.9048418695773859, "grad_norm": 0.22772437266238116, "learning_rate": 3.2233148210154505e-05, "loss": 0.48, "step": 6806 }, { "epoch": 1.9051217464315702, "grad_norm": 0.23658507405210563, "learning_rate": 3.2218730088954085e-05, "loss": 0.5001, "step": 6807 }, { "epoch": 1.9054016232857542, "grad_norm": 0.2347934669458113, "learning_rate": 3.220431366018488e-05, "loss": 0.4981, "step": 6808 }, { "epoch": 1.9056815001399383, "grad_norm": 0.2386236334617226, "learning_rate": 3.218989892521907e-05, "loss": 0.4966, "step": 6809 }, { "epoch": 1.9059613769941226, "grad_norm": 0.22405168964152639, "learning_rate": 3.217548588542864e-05, "loss": 0.5002, "step": 6810 }, { "epoch": 1.906241253848307, "grad_norm": 0.22443528202264984, "learning_rate": 3.2161074542185446e-05, "loss": 0.4716, "step": 6811 }, { "epoch": 1.9065211307024907, "grad_norm": 0.22918651917991598, "learning_rate": 3.214666489686115e-05, "loss": 0.4711, "step": 6812 }, { "epoch": 1.906801007556675, "grad_norm": 0.23550169298562623, "learning_rate": 3.213225695082729e-05, "loss": 0.4957, "step": 6813 }, { "epoch": 1.9070808844108593, "grad_norm": 0.23245829516667219, "learning_rate": 3.211785070545522e-05, "loss": 0.4958, "step": 6814 }, { "epoch": 1.9073607612650434, "grad_norm": 0.23398829095356824, "learning_rate": 3.2103446162116123e-05, "loss": 0.4737, "step": 6815 }, { "epoch": 1.9076406381192275, "grad_norm": 0.23008193940614188, "learning_rate": 3.208904332218104e-05, "loss": 0.484, "step": 6816 }, { "epoch": 1.9079205149734118, "grad_norm": 0.2321203631771179, "learning_rate": 3.2074642187020844e-05, "loss": 0.4659, "step": 6817 }, { "epoch": 1.9082003918275958, "grad_norm": 0.2378090521192645, "learning_rate": 3.2060242758006224e-05, "loss": 0.4779, "step": 6818 }, { "epoch": 1.90848026868178, "grad_norm": 0.2375734569737336, "learning_rate": 3.204584503650775e-05, "loss": 0.5021, "step": 6819 }, { "epoch": 1.9087601455359642, "grad_norm": 0.22816905535331417, "learning_rate": 3.2031449023895796e-05, "loss": 0.4792, "step": 6820 }, { "epoch": 1.9090400223901485, "grad_norm": 0.2330459331434565, "learning_rate": 3.2017054721540574e-05, "loss": 0.4663, "step": 6821 }, { "epoch": 1.9093198992443325, "grad_norm": 0.2336438924530243, "learning_rate": 3.200266213081216e-05, "loss": 0.4634, "step": 6822 }, { "epoch": 1.9095997760985166, "grad_norm": 0.23412554809101752, "learning_rate": 3.1988271253080435e-05, "loss": 0.5008, "step": 6823 }, { "epoch": 1.909879652952701, "grad_norm": 0.22469914491950652, "learning_rate": 3.1973882089715134e-05, "loss": 0.4814, "step": 6824 }, { "epoch": 1.910159529806885, "grad_norm": 0.21854263756921014, "learning_rate": 3.195949464208582e-05, "loss": 0.5001, "step": 6825 }, { "epoch": 1.910439406661069, "grad_norm": 0.22998329956911545, "learning_rate": 3.1945108911561905e-05, "loss": 0.4831, "step": 6826 }, { "epoch": 1.9107192835152533, "grad_norm": 0.22654095187481313, "learning_rate": 3.193072489951263e-05, "loss": 0.4668, "step": 6827 }, { "epoch": 1.9109991603694374, "grad_norm": 0.2325386858114916, "learning_rate": 3.191634260730707e-05, "loss": 0.4807, "step": 6828 }, { "epoch": 1.9112790372236215, "grad_norm": 0.22987321747102826, "learning_rate": 3.1901962036314135e-05, "loss": 0.4721, "step": 6829 }, { "epoch": 1.9115589140778058, "grad_norm": 0.23193940874677382, "learning_rate": 3.1887583187902594e-05, "loss": 0.5256, "step": 6830 }, { "epoch": 1.91183879093199, "grad_norm": 0.2274389175956893, "learning_rate": 3.187320606344102e-05, "loss": 0.4872, "step": 6831 }, { "epoch": 1.9121186677861741, "grad_norm": 0.23002855024204658, "learning_rate": 3.185883066429784e-05, "loss": 0.5087, "step": 6832 }, { "epoch": 1.9123985446403582, "grad_norm": 0.23766752700212365, "learning_rate": 3.1844456991841316e-05, "loss": 0.499, "step": 6833 }, { "epoch": 1.9126784214945425, "grad_norm": 0.2369432673308841, "learning_rate": 3.1830085047439535e-05, "loss": 0.4971, "step": 6834 }, { "epoch": 1.9129582983487265, "grad_norm": 0.2291129532696114, "learning_rate": 3.181571483246043e-05, "loss": 0.4531, "step": 6835 }, { "epoch": 1.9132381752029106, "grad_norm": 0.23528789356582233, "learning_rate": 3.180134634827177e-05, "loss": 0.4949, "step": 6836 }, { "epoch": 1.913518052057095, "grad_norm": 0.23423299731789948, "learning_rate": 3.178697959624114e-05, "loss": 0.4633, "step": 6837 }, { "epoch": 1.9137979289112792, "grad_norm": 0.227982563824072, "learning_rate": 3.1772614577735986e-05, "loss": 0.4606, "step": 6838 }, { "epoch": 1.914077805765463, "grad_norm": 0.24287594115101296, "learning_rate": 3.175825129412359e-05, "loss": 0.4902, "step": 6839 }, { "epoch": 1.9143576826196473, "grad_norm": 0.23235641962549547, "learning_rate": 3.174388974677103e-05, "loss": 0.4824, "step": 6840 }, { "epoch": 1.9146375594738316, "grad_norm": 0.21938849222606618, "learning_rate": 3.172952993704529e-05, "loss": 0.4674, "step": 6841 }, { "epoch": 1.9149174363280157, "grad_norm": 0.2331902976043227, "learning_rate": 3.1715171866313096e-05, "loss": 0.4907, "step": 6842 }, { "epoch": 1.9151973131821998, "grad_norm": 0.2292911227099199, "learning_rate": 3.1700815535941084e-05, "loss": 0.4916, "step": 6843 }, { "epoch": 1.915477190036384, "grad_norm": 0.23688346430017465, "learning_rate": 3.1686460947295695e-05, "loss": 0.498, "step": 6844 }, { "epoch": 1.9157570668905681, "grad_norm": 0.2282263419118602, "learning_rate": 3.167210810174319e-05, "loss": 0.4707, "step": 6845 }, { "epoch": 1.9160369437447522, "grad_norm": 0.22748798163523531, "learning_rate": 3.1657757000649704e-05, "loss": 0.4669, "step": 6846 }, { "epoch": 1.9163168205989365, "grad_norm": 0.23148091960667105, "learning_rate": 3.1643407645381164e-05, "loss": 0.5006, "step": 6847 }, { "epoch": 1.9165966974531208, "grad_norm": 0.22499136668384834, "learning_rate": 3.162906003730337e-05, "loss": 0.4706, "step": 6848 }, { "epoch": 1.9168765743073046, "grad_norm": 0.2268158645626959, "learning_rate": 3.1614714177781915e-05, "loss": 0.4729, "step": 6849 }, { "epoch": 1.917156451161489, "grad_norm": 0.22077174740142377, "learning_rate": 3.1600370068182264e-05, "loss": 0.4623, "step": 6850 }, { "epoch": 1.9174363280156732, "grad_norm": 0.22790442444950615, "learning_rate": 3.158602770986968e-05, "loss": 0.4837, "step": 6851 }, { "epoch": 1.9177162048698573, "grad_norm": 0.23374442369929613, "learning_rate": 3.15716871042093e-05, "loss": 0.4879, "step": 6852 }, { "epoch": 1.9179960817240413, "grad_norm": 0.23013580297805894, "learning_rate": 3.155734825256604e-05, "loss": 0.4994, "step": 6853 }, { "epoch": 1.9182759585782256, "grad_norm": 0.2266661411073759, "learning_rate": 3.154301115630471e-05, "loss": 0.4692, "step": 6854 }, { "epoch": 1.9185558354324097, "grad_norm": 0.23331203969907446, "learning_rate": 3.15286758167899e-05, "loss": 0.4765, "step": 6855 }, { "epoch": 1.9188357122865938, "grad_norm": 0.2262449587177227, "learning_rate": 3.1514342235386064e-05, "loss": 0.4788, "step": 6856 }, { "epoch": 1.919115589140778, "grad_norm": 0.24121557769879817, "learning_rate": 3.1500010413457484e-05, "loss": 0.4841, "step": 6857 }, { "epoch": 1.9193954659949624, "grad_norm": 0.2366969780444885, "learning_rate": 3.148568035236826e-05, "loss": 0.4866, "step": 6858 }, { "epoch": 1.9196753428491464, "grad_norm": 0.23380953792861275, "learning_rate": 3.147135205348237e-05, "loss": 0.4957, "step": 6859 }, { "epoch": 1.9199552197033305, "grad_norm": 0.2227770637262004, "learning_rate": 3.145702551816354e-05, "loss": 0.4794, "step": 6860 }, { "epoch": 1.9202350965575148, "grad_norm": 0.22126815045739895, "learning_rate": 3.1442700747775414e-05, "loss": 0.4806, "step": 6861 }, { "epoch": 1.9205149734116989, "grad_norm": 0.21848077923144377, "learning_rate": 3.1428377743681405e-05, "loss": 0.4625, "step": 6862 }, { "epoch": 1.920794850265883, "grad_norm": 0.23327747417855133, "learning_rate": 3.1414056507244794e-05, "loss": 0.4868, "step": 6863 }, { "epoch": 1.9210747271200672, "grad_norm": 0.22302391172410224, "learning_rate": 3.139973703982869e-05, "loss": 0.4625, "step": 6864 }, { "epoch": 1.9213546039742513, "grad_norm": 0.23149430994358766, "learning_rate": 3.138541934279603e-05, "loss": 0.464, "step": 6865 }, { "epoch": 1.9216344808284354, "grad_norm": 0.21861418479211353, "learning_rate": 3.137110341750957e-05, "loss": 0.4839, "step": 6866 }, { "epoch": 1.9219143576826196, "grad_norm": 0.22460244112099875, "learning_rate": 3.135678926533192e-05, "loss": 0.4669, "step": 6867 }, { "epoch": 1.922194234536804, "grad_norm": 0.22979776423988815, "learning_rate": 3.13424768876255e-05, "loss": 0.4842, "step": 6868 }, { "epoch": 1.922474111390988, "grad_norm": 0.227849370538838, "learning_rate": 3.132816628575257e-05, "loss": 0.486, "step": 6869 }, { "epoch": 1.922753988245172, "grad_norm": 0.2352057090343283, "learning_rate": 3.131385746107523e-05, "loss": 0.4568, "step": 6870 }, { "epoch": 1.9230338650993564, "grad_norm": 0.24022913515036032, "learning_rate": 3.1299550414955394e-05, "loss": 0.475, "step": 6871 }, { "epoch": 1.9233137419535404, "grad_norm": 0.23218267938665885, "learning_rate": 3.1285245148754815e-05, "loss": 0.4434, "step": 6872 }, { "epoch": 1.9235936188077245, "grad_norm": 0.23746534284199786, "learning_rate": 3.127094166383509e-05, "loss": 0.4746, "step": 6873 }, { "epoch": 1.9238734956619088, "grad_norm": 0.2312195414137495, "learning_rate": 3.12566399615576e-05, "loss": 0.494, "step": 6874 }, { "epoch": 1.9241533725160929, "grad_norm": 0.22627180660925544, "learning_rate": 3.1242340043283614e-05, "loss": 0.4756, "step": 6875 }, { "epoch": 1.924433249370277, "grad_norm": 0.23208926301994978, "learning_rate": 3.1228041910374205e-05, "loss": 0.5087, "step": 6876 }, { "epoch": 1.9247131262244612, "grad_norm": 0.22887270094402065, "learning_rate": 3.12137455641903e-05, "loss": 0.4687, "step": 6877 }, { "epoch": 1.9249930030786455, "grad_norm": 0.22519232721995053, "learning_rate": 3.1199451006092584e-05, "loss": 0.4807, "step": 6878 }, { "epoch": 1.9252728799328296, "grad_norm": 0.23411256382615975, "learning_rate": 3.1185158237441644e-05, "loss": 0.4714, "step": 6879 }, { "epoch": 1.9255527567870137, "grad_norm": 0.219722626424481, "learning_rate": 3.117086725959787e-05, "loss": 0.4781, "step": 6880 }, { "epoch": 1.925832633641198, "grad_norm": 0.23152207410977735, "learning_rate": 3.1156578073921486e-05, "loss": 0.484, "step": 6881 }, { "epoch": 1.926112510495382, "grad_norm": 0.22507155641696094, "learning_rate": 3.114229068177256e-05, "loss": 0.4631, "step": 6882 }, { "epoch": 1.926392387349566, "grad_norm": 0.22872350848907855, "learning_rate": 3.1128005084510955e-05, "loss": 0.4747, "step": 6883 }, { "epoch": 1.9266722642037504, "grad_norm": 0.2398090353730163, "learning_rate": 3.111372128349639e-05, "loss": 0.4873, "step": 6884 }, { "epoch": 1.9269521410579347, "grad_norm": 0.23516847024261786, "learning_rate": 3.109943928008841e-05, "loss": 0.4985, "step": 6885 }, { "epoch": 1.9272320179121185, "grad_norm": 0.2400545689646885, "learning_rate": 3.108515907564638e-05, "loss": 0.4731, "step": 6886 }, { "epoch": 1.9275118947663028, "grad_norm": 0.23056596783776126, "learning_rate": 3.107088067152948e-05, "loss": 0.4816, "step": 6887 }, { "epoch": 1.927791771620487, "grad_norm": 0.23462545850016983, "learning_rate": 3.105660406909677e-05, "loss": 0.486, "step": 6888 }, { "epoch": 1.9280716484746712, "grad_norm": 0.24154000300239664, "learning_rate": 3.104232926970708e-05, "loss": 0.4858, "step": 6889 }, { "epoch": 1.9283515253288552, "grad_norm": 0.22110620665817354, "learning_rate": 3.1028056274719105e-05, "loss": 0.4443, "step": 6890 }, { "epoch": 1.9286314021830395, "grad_norm": 0.2352114639897142, "learning_rate": 3.101378508549135e-05, "loss": 0.508, "step": 6891 }, { "epoch": 1.9289112790372236, "grad_norm": 0.225769653804193, "learning_rate": 3.0999515703382144e-05, "loss": 0.4903, "step": 6892 }, { "epoch": 1.9291911558914077, "grad_norm": 0.23538240263250854, "learning_rate": 3.098524812974969e-05, "loss": 0.4891, "step": 6893 }, { "epoch": 1.929471032745592, "grad_norm": 0.23268110961277907, "learning_rate": 3.097098236595195e-05, "loss": 0.4785, "step": 6894 }, { "epoch": 1.9297509095997762, "grad_norm": 0.24068440132113061, "learning_rate": 3.095671841334678e-05, "loss": 0.5166, "step": 6895 }, { "epoch": 1.9300307864539603, "grad_norm": 0.22853175972526274, "learning_rate": 3.0942456273291796e-05, "loss": 0.4891, "step": 6896 }, { "epoch": 1.9303106633081444, "grad_norm": 0.23087725893848762, "learning_rate": 3.092819594714449e-05, "loss": 0.4868, "step": 6897 }, { "epoch": 1.9305905401623287, "grad_norm": 0.23132547733018058, "learning_rate": 3.0913937436262175e-05, "loss": 0.4874, "step": 6898 }, { "epoch": 1.9308704170165127, "grad_norm": 0.23381523443609475, "learning_rate": 3.0899680742001956e-05, "loss": 0.4879, "step": 6899 }, { "epoch": 1.9311502938706968, "grad_norm": 0.22696264033813296, "learning_rate": 3.088542586572083e-05, "loss": 0.4754, "step": 6900 }, { "epoch": 1.931430170724881, "grad_norm": 0.22362342375231267, "learning_rate": 3.087117280877557e-05, "loss": 0.4697, "step": 6901 }, { "epoch": 1.9317100475790652, "grad_norm": 0.23054527726771223, "learning_rate": 3.085692157252279e-05, "loss": 0.4659, "step": 6902 }, { "epoch": 1.9319899244332492, "grad_norm": 0.23376377890307531, "learning_rate": 3.084267215831893e-05, "loss": 0.4749, "step": 6903 }, { "epoch": 1.9322698012874335, "grad_norm": 0.23148748010655892, "learning_rate": 3.082842456752025e-05, "loss": 0.4968, "step": 6904 }, { "epoch": 1.9325496781416178, "grad_norm": 0.23592315242289705, "learning_rate": 3.081417880148285e-05, "loss": 0.4683, "step": 6905 }, { "epoch": 1.932829554995802, "grad_norm": 0.24650908244682188, "learning_rate": 3.0799934861562655e-05, "loss": 0.4693, "step": 6906 }, { "epoch": 1.933109431849986, "grad_norm": 0.22955108605469735, "learning_rate": 3.0785692749115414e-05, "loss": 0.4858, "step": 6907 }, { "epoch": 1.9333893087041703, "grad_norm": 0.2236911996856329, "learning_rate": 3.0771452465496684e-05, "loss": 0.4779, "step": 6908 }, { "epoch": 1.9336691855583543, "grad_norm": 0.24161723810605487, "learning_rate": 3.075721401206187e-05, "loss": 0.4721, "step": 6909 }, { "epoch": 1.9339490624125384, "grad_norm": 0.2303975187687021, "learning_rate": 3.074297739016618e-05, "loss": 0.4758, "step": 6910 }, { "epoch": 1.9342289392667227, "grad_norm": 0.23451034142286228, "learning_rate": 3.0728742601164705e-05, "loss": 0.5024, "step": 6911 }, { "epoch": 1.9345088161209067, "grad_norm": 0.23476269244260678, "learning_rate": 3.0714509646412296e-05, "loss": 0.4575, "step": 6912 }, { "epoch": 1.9347886929750908, "grad_norm": 0.2394929049749627, "learning_rate": 3.070027852726366e-05, "loss": 0.4853, "step": 6913 }, { "epoch": 1.935068569829275, "grad_norm": 0.2427409412793654, "learning_rate": 3.068604924507331e-05, "loss": 0.47, "step": 6914 }, { "epoch": 1.9353484466834594, "grad_norm": 0.23343392326369555, "learning_rate": 3.0671821801195594e-05, "loss": 0.4833, "step": 6915 }, { "epoch": 1.9356283235376435, "grad_norm": 0.22446464120246062, "learning_rate": 3.0657596196984686e-05, "loss": 0.4544, "step": 6916 }, { "epoch": 1.9359082003918275, "grad_norm": 0.2302097824648424, "learning_rate": 3.064337243379462e-05, "loss": 0.5017, "step": 6917 }, { "epoch": 1.9361880772460118, "grad_norm": 0.23476317780853365, "learning_rate": 3.062915051297919e-05, "loss": 0.493, "step": 6918 }, { "epoch": 1.936467954100196, "grad_norm": 0.23094801660810896, "learning_rate": 3.061493043589206e-05, "loss": 0.4744, "step": 6919 }, { "epoch": 1.93674783095438, "grad_norm": 0.24366671092119307, "learning_rate": 3.06007122038867e-05, "loss": 0.4844, "step": 6920 }, { "epoch": 1.9370277078085643, "grad_norm": 0.22928569606518756, "learning_rate": 3.0586495818316405e-05, "loss": 0.4557, "step": 6921 }, { "epoch": 1.9373075846627485, "grad_norm": 0.2357603007546275, "learning_rate": 3.0572281280534324e-05, "loss": 0.4751, "step": 6922 }, { "epoch": 1.9375874615169324, "grad_norm": 0.22876167350947205, "learning_rate": 3.055806859189336e-05, "loss": 0.4853, "step": 6923 }, { "epoch": 1.9378673383711167, "grad_norm": 0.22487240855797255, "learning_rate": 3.054385775374632e-05, "loss": 0.4663, "step": 6924 }, { "epoch": 1.938147215225301, "grad_norm": 0.2332209486511338, "learning_rate": 3.05296487674458e-05, "loss": 0.4957, "step": 6925 }, { "epoch": 1.938427092079485, "grad_norm": 0.22952610403124868, "learning_rate": 3.0515441634344195e-05, "loss": 0.4868, "step": 6926 }, { "epoch": 1.9387069689336691, "grad_norm": 0.22732984211747553, "learning_rate": 3.0501236355793754e-05, "loss": 0.469, "step": 6927 }, { "epoch": 1.9389868457878534, "grad_norm": 0.23296474093773015, "learning_rate": 3.0487032933146564e-05, "loss": 0.4933, "step": 6928 }, { "epoch": 1.9392667226420375, "grad_norm": 0.2396372070201162, "learning_rate": 3.0472831367754494e-05, "loss": 0.4876, "step": 6929 }, { "epoch": 1.9395465994962215, "grad_norm": 0.23398515721260013, "learning_rate": 3.0458631660969273e-05, "loss": 0.4757, "step": 6930 }, { "epoch": 1.9398264763504058, "grad_norm": 0.23799658394799308, "learning_rate": 3.044443381414244e-05, "loss": 0.4932, "step": 6931 }, { "epoch": 1.9401063532045901, "grad_norm": 0.23882297071006706, "learning_rate": 3.043023782862533e-05, "loss": 0.4845, "step": 6932 }, { "epoch": 1.940386230058774, "grad_norm": 0.2289941761074013, "learning_rate": 3.0416043705769125e-05, "loss": 0.469, "step": 6933 }, { "epoch": 1.9406661069129583, "grad_norm": 0.2280878658820877, "learning_rate": 3.0401851446924846e-05, "loss": 0.4738, "step": 6934 }, { "epoch": 1.9409459837671426, "grad_norm": 0.23462376522494938, "learning_rate": 3.0387661053443324e-05, "loss": 0.5245, "step": 6935 }, { "epoch": 1.9412258606213266, "grad_norm": 0.2226797572636581, "learning_rate": 3.0373472526675197e-05, "loss": 0.4704, "step": 6936 }, { "epoch": 1.9415057374755107, "grad_norm": 0.22545363218823844, "learning_rate": 3.035928586797094e-05, "loss": 0.4903, "step": 6937 }, { "epoch": 1.941785614329695, "grad_norm": 0.22948329099575007, "learning_rate": 3.0345101078680848e-05, "loss": 0.4961, "step": 6938 }, { "epoch": 1.942065491183879, "grad_norm": 0.23123306372187905, "learning_rate": 3.0330918160155035e-05, "loss": 0.4795, "step": 6939 }, { "epoch": 1.9423453680380631, "grad_norm": 0.2334816833466265, "learning_rate": 3.0316737113743442e-05, "loss": 0.4702, "step": 6940 }, { "epoch": 1.9426252448922474, "grad_norm": 0.23592211266861282, "learning_rate": 3.0302557940795828e-05, "loss": 0.4814, "step": 6941 }, { "epoch": 1.9429051217464317, "grad_norm": 0.21555884914805812, "learning_rate": 3.0288380642661774e-05, "loss": 0.4812, "step": 6942 }, { "epoch": 1.9431849986006158, "grad_norm": 0.22766833160207603, "learning_rate": 3.0274205220690686e-05, "loss": 0.4642, "step": 6943 }, { "epoch": 1.9434648754547998, "grad_norm": 0.24708929888125925, "learning_rate": 3.0260031676231772e-05, "loss": 0.4925, "step": 6944 }, { "epoch": 1.9437447523089841, "grad_norm": 0.233628491339286, "learning_rate": 3.0245860010634104e-05, "loss": 0.4918, "step": 6945 }, { "epoch": 1.9440246291631682, "grad_norm": 0.22982334012920114, "learning_rate": 3.0231690225246535e-05, "loss": 0.4871, "step": 6946 }, { "epoch": 1.9443045060173523, "grad_norm": 0.2237908785868204, "learning_rate": 3.0217522321417758e-05, "loss": 0.458, "step": 6947 }, { "epoch": 1.9445843828715366, "grad_norm": 0.24540427596737752, "learning_rate": 3.0203356300496277e-05, "loss": 0.4849, "step": 6948 }, { "epoch": 1.9448642597257206, "grad_norm": 0.22946116864867663, "learning_rate": 3.018919216383045e-05, "loss": 0.488, "step": 6949 }, { "epoch": 1.9451441365799047, "grad_norm": 0.25076988348664525, "learning_rate": 3.017502991276836e-05, "loss": 0.5008, "step": 6950 }, { "epoch": 1.945424013434089, "grad_norm": 0.2281895669330235, "learning_rate": 3.016086954865804e-05, "loss": 0.4904, "step": 6951 }, { "epoch": 1.9457038902882733, "grad_norm": 0.23592514096754377, "learning_rate": 3.0146711072847257e-05, "loss": 0.4791, "step": 6952 }, { "epoch": 1.9459837671424574, "grad_norm": 0.2332401196359121, "learning_rate": 3.0132554486683628e-05, "loss": 0.4945, "step": 6953 }, { "epoch": 1.9462636439966414, "grad_norm": 0.2407452785702372, "learning_rate": 3.011839979151458e-05, "loss": 0.4773, "step": 6954 }, { "epoch": 1.9465435208508257, "grad_norm": 0.2325901569668764, "learning_rate": 3.0104246988687368e-05, "loss": 0.4964, "step": 6955 }, { "epoch": 1.9468233977050098, "grad_norm": 0.23238008213955866, "learning_rate": 3.009009607954907e-05, "loss": 0.4803, "step": 6956 }, { "epoch": 1.9471032745591939, "grad_norm": 0.2382190319205473, "learning_rate": 3.0075947065446563e-05, "loss": 0.4959, "step": 6957 }, { "epoch": 1.9473831514133781, "grad_norm": 0.2419764363830295, "learning_rate": 3.0061799947726565e-05, "loss": 0.5032, "step": 6958 }, { "epoch": 1.9476630282675624, "grad_norm": 0.2297780721980634, "learning_rate": 3.004765472773562e-05, "loss": 0.4674, "step": 6959 }, { "epoch": 1.9479429051217463, "grad_norm": 0.2514690104952155, "learning_rate": 3.0033511406820058e-05, "loss": 0.468, "step": 6960 }, { "epoch": 1.9482227819759306, "grad_norm": 0.2294590882490544, "learning_rate": 3.001936998632604e-05, "loss": 0.4799, "step": 6961 }, { "epoch": 1.9485026588301149, "grad_norm": 0.23477045519017806, "learning_rate": 3.000523046759959e-05, "loss": 0.4708, "step": 6962 }, { "epoch": 1.948782535684299, "grad_norm": 0.227380390921727, "learning_rate": 2.999109285198649e-05, "loss": 0.4658, "step": 6963 }, { "epoch": 1.949062412538483, "grad_norm": 0.22534009942285327, "learning_rate": 2.9976957140832374e-05, "loss": 0.4906, "step": 6964 }, { "epoch": 1.9493422893926673, "grad_norm": 0.23131293568262534, "learning_rate": 2.9962823335482693e-05, "loss": 0.4915, "step": 6965 }, { "epoch": 1.9496221662468514, "grad_norm": 0.24348116094107597, "learning_rate": 2.994869143728269e-05, "loss": 0.467, "step": 6966 }, { "epoch": 1.9499020431010354, "grad_norm": 0.2356463252888148, "learning_rate": 2.993456144757748e-05, "loss": 0.4937, "step": 6967 }, { "epoch": 1.9501819199552197, "grad_norm": 0.22808636286042844, "learning_rate": 2.992043336771192e-05, "loss": 0.4691, "step": 6968 }, { "epoch": 1.950461796809404, "grad_norm": 0.22755773681479166, "learning_rate": 2.9906307199030758e-05, "loss": 0.4666, "step": 6969 }, { "epoch": 1.9507416736635879, "grad_norm": 0.2535203021836645, "learning_rate": 2.9892182942878522e-05, "loss": 0.5067, "step": 6970 }, { "epoch": 1.9510215505177722, "grad_norm": 0.23077107892665602, "learning_rate": 2.9878060600599565e-05, "loss": 0.4912, "step": 6971 }, { "epoch": 1.9513014273719564, "grad_norm": 0.23179166256489142, "learning_rate": 2.9863940173538074e-05, "loss": 0.4941, "step": 6972 }, { "epoch": 1.9515813042261405, "grad_norm": 0.2295845434567394, "learning_rate": 2.984982166303802e-05, "loss": 0.5013, "step": 6973 }, { "epoch": 1.9518611810803246, "grad_norm": 0.23301915929344238, "learning_rate": 2.983570507044322e-05, "loss": 0.4806, "step": 6974 }, { "epoch": 1.9521410579345089, "grad_norm": 0.2364962286582459, "learning_rate": 2.9821590397097298e-05, "loss": 0.49, "step": 6975 }, { "epoch": 1.952420934788693, "grad_norm": 0.22415602367856208, "learning_rate": 2.9807477644343695e-05, "loss": 0.4744, "step": 6976 }, { "epoch": 1.952700811642877, "grad_norm": 0.23720920427411543, "learning_rate": 2.979336681352567e-05, "loss": 0.4687, "step": 6977 }, { "epoch": 1.9529806884970613, "grad_norm": 0.227201964135838, "learning_rate": 2.9779257905986302e-05, "loss": 0.4894, "step": 6978 }, { "epoch": 1.9532605653512456, "grad_norm": 0.22895255908481088, "learning_rate": 2.976515092306848e-05, "loss": 0.4809, "step": 6979 }, { "epoch": 1.9535404422054297, "grad_norm": 0.2285392463168476, "learning_rate": 2.9751045866114922e-05, "loss": 0.507, "step": 6980 }, { "epoch": 1.9538203190596137, "grad_norm": 0.22626235418131851, "learning_rate": 2.9736942736468166e-05, "loss": 0.5094, "step": 6981 }, { "epoch": 1.954100195913798, "grad_norm": 0.2335110819416272, "learning_rate": 2.9722841535470524e-05, "loss": 0.4817, "step": 6982 }, { "epoch": 1.954380072767982, "grad_norm": 0.24596529967737227, "learning_rate": 2.9708742264464185e-05, "loss": 0.4868, "step": 6983 }, { "epoch": 1.9546599496221662, "grad_norm": 0.24755451917883628, "learning_rate": 2.9694644924791116e-05, "loss": 0.4876, "step": 6984 }, { "epoch": 1.9549398264763505, "grad_norm": 0.2394044335536084, "learning_rate": 2.9680549517793106e-05, "loss": 0.4937, "step": 6985 }, { "epoch": 1.9552197033305345, "grad_norm": 0.23541114527914012, "learning_rate": 2.9666456044811774e-05, "loss": 0.4839, "step": 6986 }, { "epoch": 1.9554995801847186, "grad_norm": 0.23128141273377667, "learning_rate": 2.965236450718853e-05, "loss": 0.4928, "step": 6987 }, { "epoch": 1.9557794570389029, "grad_norm": 0.23090437271847997, "learning_rate": 2.963827490626462e-05, "loss": 0.4798, "step": 6988 }, { "epoch": 1.9560593338930872, "grad_norm": 0.22273393555307908, "learning_rate": 2.9624187243381095e-05, "loss": 0.4607, "step": 6989 }, { "epoch": 1.9563392107472712, "grad_norm": 0.2304498043001874, "learning_rate": 2.961010151987884e-05, "loss": 0.4937, "step": 6990 }, { "epoch": 1.9566190876014553, "grad_norm": 0.2330779304346798, "learning_rate": 2.9596017737098536e-05, "loss": 0.4659, "step": 6991 }, { "epoch": 1.9568989644556396, "grad_norm": 0.25328248738358466, "learning_rate": 2.9581935896380685e-05, "loss": 0.4923, "step": 6992 }, { "epoch": 1.9571788413098237, "grad_norm": 0.2325139361543725, "learning_rate": 2.9567855999065596e-05, "loss": 0.4781, "step": 6993 }, { "epoch": 1.9574587181640077, "grad_norm": 0.22713006650382206, "learning_rate": 2.955377804649342e-05, "loss": 0.4727, "step": 6994 }, { "epoch": 1.957738595018192, "grad_norm": 0.23001038600887078, "learning_rate": 2.9539702040004085e-05, "loss": 0.4837, "step": 6995 }, { "epoch": 1.9580184718723763, "grad_norm": 0.23198450068624474, "learning_rate": 2.9525627980937355e-05, "loss": 0.4872, "step": 6996 }, { "epoch": 1.9582983487265602, "grad_norm": 0.2348553910644492, "learning_rate": 2.9511555870632824e-05, "loss": 0.4777, "step": 6997 }, { "epoch": 1.9585782255807445, "grad_norm": 0.22722001530923186, "learning_rate": 2.9497485710429873e-05, "loss": 0.4715, "step": 6998 }, { "epoch": 1.9588581024349287, "grad_norm": 0.23336732850772657, "learning_rate": 2.948341750166771e-05, "loss": 0.4931, "step": 6999 }, { "epoch": 1.9591379792891128, "grad_norm": 0.232762538670184, "learning_rate": 2.946935124568535e-05, "loss": 0.4728, "step": 7000 }, { "epoch": 1.9594178561432969, "grad_norm": 0.2315457543303341, "learning_rate": 2.9455286943821638e-05, "loss": 0.461, "step": 7001 }, { "epoch": 1.9596977329974812, "grad_norm": 0.23351845968891322, "learning_rate": 2.94412245974152e-05, "loss": 0.4728, "step": 7002 }, { "epoch": 1.9599776098516652, "grad_norm": 0.23551139242126456, "learning_rate": 2.942716420780452e-05, "loss": 0.4704, "step": 7003 }, { "epoch": 1.9602574867058493, "grad_norm": 0.23262927457816346, "learning_rate": 2.9413105776327877e-05, "loss": 0.504, "step": 7004 }, { "epoch": 1.9605373635600336, "grad_norm": 0.22498542500355775, "learning_rate": 2.9399049304323334e-05, "loss": 0.4709, "step": 7005 }, { "epoch": 1.960817240414218, "grad_norm": 0.2364436633012764, "learning_rate": 2.938499479312882e-05, "loss": 0.4597, "step": 7006 }, { "epoch": 1.9610971172684017, "grad_norm": 0.23580077097171936, "learning_rate": 2.9370942244082022e-05, "loss": 0.4692, "step": 7007 }, { "epoch": 1.961376994122586, "grad_norm": 0.2196275596296198, "learning_rate": 2.9356891658520502e-05, "loss": 0.4782, "step": 7008 }, { "epoch": 1.9616568709767703, "grad_norm": 0.22992860368248752, "learning_rate": 2.9342843037781587e-05, "loss": 0.4698, "step": 7009 }, { "epoch": 1.9619367478309544, "grad_norm": 0.22885797763020418, "learning_rate": 2.9328796383202427e-05, "loss": 0.4898, "step": 7010 }, { "epoch": 1.9622166246851385, "grad_norm": 0.24773494403951035, "learning_rate": 2.9314751696120003e-05, "loss": 0.4944, "step": 7011 }, { "epoch": 1.9624965015393228, "grad_norm": 0.2370585113625114, "learning_rate": 2.9300708977871095e-05, "loss": 0.4881, "step": 7012 }, { "epoch": 1.9627763783935068, "grad_norm": 0.24828605566473014, "learning_rate": 2.9286668229792274e-05, "loss": 0.4751, "step": 7013 }, { "epoch": 1.963056255247691, "grad_norm": 0.23103862341489811, "learning_rate": 2.927262945321998e-05, "loss": 0.4528, "step": 7014 }, { "epoch": 1.9633361321018752, "grad_norm": 0.23100437274171104, "learning_rate": 2.9258592649490413e-05, "loss": 0.4619, "step": 7015 }, { "epoch": 1.9636160089560595, "grad_norm": 0.2229633172860549, "learning_rate": 2.9244557819939606e-05, "loss": 0.4739, "step": 7016 }, { "epoch": 1.9638958858102435, "grad_norm": 0.2327869635447997, "learning_rate": 2.9230524965903406e-05, "loss": 0.4764, "step": 7017 }, { "epoch": 1.9641757626644276, "grad_norm": 0.24366798841238196, "learning_rate": 2.9216494088717463e-05, "loss": 0.5019, "step": 7018 }, { "epoch": 1.964455639518612, "grad_norm": 0.23207685832803884, "learning_rate": 2.920246518971724e-05, "loss": 0.4697, "step": 7019 }, { "epoch": 1.964735516372796, "grad_norm": 0.23765465654318918, "learning_rate": 2.9188438270238032e-05, "loss": 0.4789, "step": 7020 }, { "epoch": 1.96501539322698, "grad_norm": 0.23208967128417798, "learning_rate": 2.9174413331614915e-05, "loss": 0.4751, "step": 7021 }, { "epoch": 1.9652952700811643, "grad_norm": 0.2294397803522859, "learning_rate": 2.916039037518281e-05, "loss": 0.5058, "step": 7022 }, { "epoch": 1.9655751469353484, "grad_norm": 0.22858794716313705, "learning_rate": 2.9146369402276395e-05, "loss": 0.4738, "step": 7023 }, { "epoch": 1.9658550237895325, "grad_norm": 0.2273453226076543, "learning_rate": 2.913235041423022e-05, "loss": 0.481, "step": 7024 }, { "epoch": 1.9661349006437168, "grad_norm": 0.22862180788730038, "learning_rate": 2.9118333412378586e-05, "loss": 0.4565, "step": 7025 }, { "epoch": 1.966414777497901, "grad_norm": 0.23703629882129632, "learning_rate": 2.9104318398055684e-05, "loss": 0.4687, "step": 7026 }, { "epoch": 1.9666946543520851, "grad_norm": 0.21975098352834288, "learning_rate": 2.9090305372595457e-05, "loss": 0.4765, "step": 7027 }, { "epoch": 1.9669745312062692, "grad_norm": 0.22575644568142314, "learning_rate": 2.9076294337331666e-05, "loss": 0.4931, "step": 7028 }, { "epoch": 1.9672544080604535, "grad_norm": 0.22853639099213402, "learning_rate": 2.906228529359789e-05, "loss": 0.4727, "step": 7029 }, { "epoch": 1.9675342849146376, "grad_norm": 0.2315150628974756, "learning_rate": 2.9048278242727524e-05, "loss": 0.4725, "step": 7030 }, { "epoch": 1.9678141617688216, "grad_norm": 0.2341757688214872, "learning_rate": 2.9034273186053755e-05, "loss": 0.4701, "step": 7031 }, { "epoch": 1.968094038623006, "grad_norm": 0.23047135269148536, "learning_rate": 2.902027012490961e-05, "loss": 0.4831, "step": 7032 }, { "epoch": 1.96837391547719, "grad_norm": 0.2222554430448998, "learning_rate": 2.900626906062789e-05, "loss": 0.487, "step": 7033 }, { "epoch": 1.968653792331374, "grad_norm": 0.22936272671328484, "learning_rate": 2.8992269994541233e-05, "loss": 0.4701, "step": 7034 }, { "epoch": 1.9689336691855583, "grad_norm": 0.23650867361925126, "learning_rate": 2.897827292798207e-05, "loss": 0.4626, "step": 7035 }, { "epoch": 1.9692135460397426, "grad_norm": 0.23024960992005195, "learning_rate": 2.8964277862282664e-05, "loss": 0.4765, "step": 7036 }, { "epoch": 1.9694934228939267, "grad_norm": 0.2217600833787514, "learning_rate": 2.8950284798775064e-05, "loss": 0.4772, "step": 7037 }, { "epoch": 1.9697732997481108, "grad_norm": 0.23007353371239403, "learning_rate": 2.8936293738791132e-05, "loss": 0.4622, "step": 7038 }, { "epoch": 1.970053176602295, "grad_norm": 0.22376176935042869, "learning_rate": 2.892230468366256e-05, "loss": 0.4852, "step": 7039 }, { "epoch": 1.9703330534564791, "grad_norm": 0.23936365390929332, "learning_rate": 2.8908317634720845e-05, "loss": 0.4987, "step": 7040 }, { "epoch": 1.9706129303106632, "grad_norm": 0.22464425664828297, "learning_rate": 2.889433259329724e-05, "loss": 0.4837, "step": 7041 }, { "epoch": 1.9708928071648475, "grad_norm": 0.23670794125702688, "learning_rate": 2.888034956072285e-05, "loss": 0.4919, "step": 7042 }, { "epoch": 1.9711726840190318, "grad_norm": 0.22692560651586183, "learning_rate": 2.8866368538328636e-05, "loss": 0.4967, "step": 7043 }, { "epoch": 1.9714525608732156, "grad_norm": 0.22788833578279027, "learning_rate": 2.885238952744529e-05, "loss": 0.4787, "step": 7044 }, { "epoch": 1.9717324377274, "grad_norm": 0.2433319210610914, "learning_rate": 2.883841252940335e-05, "loss": 0.5063, "step": 7045 }, { "epoch": 1.9720123145815842, "grad_norm": 0.22761454388568558, "learning_rate": 2.8824437545533144e-05, "loss": 0.475, "step": 7046 }, { "epoch": 1.9722921914357683, "grad_norm": 0.2307141235514982, "learning_rate": 2.881046457716483e-05, "loss": 0.4896, "step": 7047 }, { "epoch": 1.9725720682899524, "grad_norm": 0.23221708734677798, "learning_rate": 2.8796493625628356e-05, "loss": 0.4752, "step": 7048 }, { "epoch": 1.9728519451441366, "grad_norm": 0.23345716338046027, "learning_rate": 2.878252469225349e-05, "loss": 0.4998, "step": 7049 }, { "epoch": 1.9731318219983207, "grad_norm": 0.25912862470150677, "learning_rate": 2.8768557778369793e-05, "loss": 0.5138, "step": 7050 }, { "epoch": 1.9734116988525048, "grad_norm": 0.2384157264648219, "learning_rate": 2.875459288530665e-05, "loss": 0.4774, "step": 7051 }, { "epoch": 1.973691575706689, "grad_norm": 0.22621812786139864, "learning_rate": 2.8740630014393254e-05, "loss": 0.4582, "step": 7052 }, { "epoch": 1.9739714525608734, "grad_norm": 0.23510762856967313, "learning_rate": 2.8726669166958592e-05, "loss": 0.5015, "step": 7053 }, { "epoch": 1.9742513294150572, "grad_norm": 0.23103924219529073, "learning_rate": 2.871271034433146e-05, "loss": 0.4715, "step": 7054 }, { "epoch": 1.9745312062692415, "grad_norm": 0.23620001333983057, "learning_rate": 2.869875354784048e-05, "loss": 0.486, "step": 7055 }, { "epoch": 1.9748110831234258, "grad_norm": 0.234569119182929, "learning_rate": 2.868479877881406e-05, "loss": 0.4825, "step": 7056 }, { "epoch": 1.9750909599776099, "grad_norm": 0.23410974720603886, "learning_rate": 2.8670846038580412e-05, "loss": 0.472, "step": 7057 }, { "epoch": 1.975370836831794, "grad_norm": 0.22408059353002907, "learning_rate": 2.8656895328467603e-05, "loss": 0.465, "step": 7058 }, { "epoch": 1.9756507136859782, "grad_norm": 0.22850538612943877, "learning_rate": 2.8642946649803425e-05, "loss": 0.4883, "step": 7059 }, { "epoch": 1.9759305905401623, "grad_norm": 0.2429574859448099, "learning_rate": 2.8629000003915518e-05, "loss": 0.4938, "step": 7060 }, { "epoch": 1.9762104673943464, "grad_norm": 0.23163296805534375, "learning_rate": 2.8615055392131372e-05, "loss": 0.461, "step": 7061 }, { "epoch": 1.9764903442485307, "grad_norm": 0.2355781056223335, "learning_rate": 2.8601112815778223e-05, "loss": 0.4767, "step": 7062 }, { "epoch": 1.976770221102715, "grad_norm": 0.22809593095417793, "learning_rate": 2.858717227618314e-05, "loss": 0.4838, "step": 7063 }, { "epoch": 1.977050097956899, "grad_norm": 0.23390999068278936, "learning_rate": 2.8573233774672975e-05, "loss": 0.5037, "step": 7064 }, { "epoch": 1.977329974811083, "grad_norm": 0.23022170186616017, "learning_rate": 2.8559297312574417e-05, "loss": 0.47, "step": 7065 }, { "epoch": 1.9776098516652674, "grad_norm": 0.2244910351811928, "learning_rate": 2.8545362891213944e-05, "loss": 0.4753, "step": 7066 }, { "epoch": 1.9778897285194514, "grad_norm": 0.23452644856574384, "learning_rate": 2.8531430511917834e-05, "loss": 0.4863, "step": 7067 }, { "epoch": 1.9781696053736355, "grad_norm": 0.23093788826410583, "learning_rate": 2.8517500176012192e-05, "loss": 0.5043, "step": 7068 }, { "epoch": 1.9784494822278198, "grad_norm": 0.22957059730720628, "learning_rate": 2.85035718848229e-05, "loss": 0.4709, "step": 7069 }, { "epoch": 1.9787293590820039, "grad_norm": 0.22844945758409066, "learning_rate": 2.8489645639675672e-05, "loss": 0.4629, "step": 7070 }, { "epoch": 1.979009235936188, "grad_norm": 0.24689877723442868, "learning_rate": 2.8475721441896008e-05, "loss": 0.5032, "step": 7071 }, { "epoch": 1.9792891127903722, "grad_norm": 0.23283754577445992, "learning_rate": 2.8461799292809234e-05, "loss": 0.4743, "step": 7072 }, { "epoch": 1.9795689896445565, "grad_norm": 0.23100739927796426, "learning_rate": 2.8447879193740445e-05, "loss": 0.4904, "step": 7073 }, { "epoch": 1.9798488664987406, "grad_norm": 0.23567442996764618, "learning_rate": 2.8433961146014588e-05, "loss": 0.4799, "step": 7074 }, { "epoch": 1.9801287433529247, "grad_norm": 0.22812684774489528, "learning_rate": 2.8420045150956374e-05, "loss": 0.4875, "step": 7075 }, { "epoch": 1.980408620207109, "grad_norm": 0.23086532968492773, "learning_rate": 2.840613120989037e-05, "loss": 0.4796, "step": 7076 }, { "epoch": 1.980688497061293, "grad_norm": 0.23034140314456664, "learning_rate": 2.8392219324140835e-05, "loss": 0.479, "step": 7077 }, { "epoch": 1.980968373915477, "grad_norm": 0.2313665761715524, "learning_rate": 2.8378309495031984e-05, "loss": 0.4889, "step": 7078 }, { "epoch": 1.9812482507696614, "grad_norm": 0.22063695647756695, "learning_rate": 2.8364401723887735e-05, "loss": 0.4625, "step": 7079 }, { "epoch": 1.9815281276238457, "grad_norm": 0.2521820924444219, "learning_rate": 2.8350496012031847e-05, "loss": 0.506, "step": 7080 }, { "epoch": 1.9818080044780295, "grad_norm": 0.2285491258933469, "learning_rate": 2.833659236078786e-05, "loss": 0.4606, "step": 7081 }, { "epoch": 1.9820878813322138, "grad_norm": 0.23882669110070884, "learning_rate": 2.832269077147913e-05, "loss": 0.5018, "step": 7082 }, { "epoch": 1.982367758186398, "grad_norm": 0.22985496995197235, "learning_rate": 2.830879124542884e-05, "loss": 0.4748, "step": 7083 }, { "epoch": 1.9826476350405822, "grad_norm": 0.23402777728462829, "learning_rate": 2.829489378395993e-05, "loss": 0.4837, "step": 7084 }, { "epoch": 1.9829275118947662, "grad_norm": 0.24467558647557744, "learning_rate": 2.8280998388395185e-05, "loss": 0.5006, "step": 7085 }, { "epoch": 1.9832073887489505, "grad_norm": 0.22804699437165032, "learning_rate": 2.826710506005717e-05, "loss": 0.4617, "step": 7086 }, { "epoch": 1.9834872656031346, "grad_norm": 0.2296227757594068, "learning_rate": 2.8253213800268256e-05, "loss": 0.4551, "step": 7087 }, { "epoch": 1.9837671424573187, "grad_norm": 0.240150003055722, "learning_rate": 2.8239324610350625e-05, "loss": 0.4761, "step": 7088 }, { "epoch": 1.984047019311503, "grad_norm": 0.23114381456106, "learning_rate": 2.822543749162626e-05, "loss": 0.4639, "step": 7089 }, { "epoch": 1.9843268961656872, "grad_norm": 0.24736307218798204, "learning_rate": 2.8211552445416946e-05, "loss": 0.4985, "step": 7090 }, { "epoch": 1.984606773019871, "grad_norm": 0.22653634632324202, "learning_rate": 2.8197669473044257e-05, "loss": 0.4674, "step": 7091 }, { "epoch": 1.9848866498740554, "grad_norm": 0.2417910132935196, "learning_rate": 2.8183788575829596e-05, "loss": 0.4839, "step": 7092 }, { "epoch": 1.9851665267282397, "grad_norm": 0.22664370137669457, "learning_rate": 2.816990975509415e-05, "loss": 0.4805, "step": 7093 }, { "epoch": 1.9854464035824237, "grad_norm": 0.22736144605270978, "learning_rate": 2.815603301215891e-05, "loss": 0.4806, "step": 7094 }, { "epoch": 1.9857262804366078, "grad_norm": 0.2369043453440284, "learning_rate": 2.8142158348344673e-05, "loss": 0.4734, "step": 7095 }, { "epoch": 1.986006157290792, "grad_norm": 0.2442381808375003, "learning_rate": 2.812828576497204e-05, "loss": 0.4869, "step": 7096 }, { "epoch": 1.9862860341449762, "grad_norm": 0.2382920120174186, "learning_rate": 2.8114415263361416e-05, "loss": 0.4851, "step": 7097 }, { "epoch": 1.9865659109991602, "grad_norm": 0.23822704343586873, "learning_rate": 2.8100546844832988e-05, "loss": 0.4821, "step": 7098 }, { "epoch": 1.9868457878533445, "grad_norm": 0.22946593045084673, "learning_rate": 2.8086680510706774e-05, "loss": 0.5088, "step": 7099 }, { "epoch": 1.9871256647075288, "grad_norm": 0.2314498711980787, "learning_rate": 2.807281626230257e-05, "loss": 0.4606, "step": 7100 }, { "epoch": 1.987405541561713, "grad_norm": 0.2249556893406552, "learning_rate": 2.8058954100939992e-05, "loss": 0.4815, "step": 7101 }, { "epoch": 1.987685418415897, "grad_norm": 0.23023146470575578, "learning_rate": 2.8045094027938447e-05, "loss": 0.5015, "step": 7102 }, { "epoch": 1.9879652952700813, "grad_norm": 0.22364453842125873, "learning_rate": 2.8031236044617137e-05, "loss": 0.4721, "step": 7103 }, { "epoch": 1.9882451721242653, "grad_norm": 0.23806221721710097, "learning_rate": 2.801738015229507e-05, "loss": 0.4975, "step": 7104 }, { "epoch": 1.9885250489784494, "grad_norm": 0.2243512238062485, "learning_rate": 2.8003526352291077e-05, "loss": 0.4773, "step": 7105 }, { "epoch": 1.9888049258326337, "grad_norm": 0.23878315834031513, "learning_rate": 2.7989674645923747e-05, "loss": 0.4997, "step": 7106 }, { "epoch": 1.9890848026868178, "grad_norm": 0.22136624159924373, "learning_rate": 2.797582503451151e-05, "loss": 0.4785, "step": 7107 }, { "epoch": 1.9893646795410018, "grad_norm": 0.22851727181910395, "learning_rate": 2.7961977519372575e-05, "loss": 0.4656, "step": 7108 }, { "epoch": 1.9896445563951861, "grad_norm": 0.22676215976761419, "learning_rate": 2.7948132101824946e-05, "loss": 0.4689, "step": 7109 }, { "epoch": 1.9899244332493704, "grad_norm": 0.22903461841804382, "learning_rate": 2.7934288783186458e-05, "loss": 0.496, "step": 7110 }, { "epoch": 1.9902043101035545, "grad_norm": 0.23399417725757138, "learning_rate": 2.7920447564774704e-05, "loss": 0.4984, "step": 7111 }, { "epoch": 1.9904841869577385, "grad_norm": 0.23468405703801676, "learning_rate": 2.7906608447907113e-05, "loss": 0.4789, "step": 7112 }, { "epoch": 1.9907640638119228, "grad_norm": 0.22461133254682347, "learning_rate": 2.78927714339009e-05, "loss": 0.4832, "step": 7113 }, { "epoch": 1.991043940666107, "grad_norm": 0.2361043309281983, "learning_rate": 2.7878936524073074e-05, "loss": 0.4804, "step": 7114 }, { "epoch": 1.991323817520291, "grad_norm": 0.24284029579036037, "learning_rate": 2.786510371974045e-05, "loss": 0.4745, "step": 7115 }, { "epoch": 1.9916036943744753, "grad_norm": 0.22604323427116058, "learning_rate": 2.7851273022219644e-05, "loss": 0.4926, "step": 7116 }, { "epoch": 1.9918835712286596, "grad_norm": 0.22322252295804929, "learning_rate": 2.7837444432827066e-05, "loss": 0.478, "step": 7117 }, { "epoch": 1.9921634480828434, "grad_norm": 0.23382189009195833, "learning_rate": 2.7823617952878932e-05, "loss": 0.4776, "step": 7118 }, { "epoch": 1.9924433249370277, "grad_norm": 0.22790903388172398, "learning_rate": 2.7809793583691258e-05, "loss": 0.4936, "step": 7119 }, { "epoch": 1.992723201791212, "grad_norm": 0.23520628706779156, "learning_rate": 2.779597132657985e-05, "loss": 0.5012, "step": 7120 }, { "epoch": 1.993003078645396, "grad_norm": 0.2262907996432626, "learning_rate": 2.7782151182860318e-05, "loss": 0.4829, "step": 7121 }, { "epoch": 1.9932829554995801, "grad_norm": 0.2193334470655366, "learning_rate": 2.7768333153848075e-05, "loss": 0.4855, "step": 7122 }, { "epoch": 1.9935628323537644, "grad_norm": 0.2197528998059351, "learning_rate": 2.7754517240858325e-05, "loss": 0.4629, "step": 7123 }, { "epoch": 1.9938427092079485, "grad_norm": 0.22762622975934896, "learning_rate": 2.7740703445206072e-05, "loss": 0.4816, "step": 7124 }, { "epoch": 1.9941225860621326, "grad_norm": 0.22736377592299664, "learning_rate": 2.7726891768206132e-05, "loss": 0.491, "step": 7125 }, { "epoch": 1.9944024629163168, "grad_norm": 0.2921883336014387, "learning_rate": 2.771308221117309e-05, "loss": 0.4808, "step": 7126 }, { "epoch": 1.9946823397705011, "grad_norm": 0.2321603110306611, "learning_rate": 2.7699274775421363e-05, "loss": 0.4787, "step": 7127 }, { "epoch": 1.994962216624685, "grad_norm": 0.2341763652512763, "learning_rate": 2.7685469462265144e-05, "loss": 0.5024, "step": 7128 }, { "epoch": 1.9952420934788693, "grad_norm": 0.2262431486301722, "learning_rate": 2.7671666273018433e-05, "loss": 0.4699, "step": 7129 }, { "epoch": 1.9955219703330536, "grad_norm": 0.2292033135830934, "learning_rate": 2.7657865208995025e-05, "loss": 0.4998, "step": 7130 }, { "epoch": 1.9958018471872376, "grad_norm": 0.22982800104503978, "learning_rate": 2.7644066271508506e-05, "loss": 0.4826, "step": 7131 }, { "epoch": 1.9960817240414217, "grad_norm": 0.23537974193570985, "learning_rate": 2.763026946187228e-05, "loss": 0.4996, "step": 7132 }, { "epoch": 1.996361600895606, "grad_norm": 0.2198193987375018, "learning_rate": 2.7616474781399526e-05, "loss": 0.4559, "step": 7133 }, { "epoch": 1.99664147774979, "grad_norm": 0.22833082201651922, "learning_rate": 2.7602682231403228e-05, "loss": 0.4969, "step": 7134 }, { "epoch": 1.9969213546039741, "grad_norm": 0.23158821295374815, "learning_rate": 2.758889181319617e-05, "loss": 0.5089, "step": 7135 }, { "epoch": 1.9972012314581584, "grad_norm": 0.2372044565687655, "learning_rate": 2.7575103528090935e-05, "loss": 0.4648, "step": 7136 }, { "epoch": 1.9974811083123427, "grad_norm": 0.2340136785012341, "learning_rate": 2.7561317377399897e-05, "loss": 0.4687, "step": 7137 }, { "epoch": 1.9977609851665268, "grad_norm": 0.23109783645478796, "learning_rate": 2.7547533362435234e-05, "loss": 0.5156, "step": 7138 }, { "epoch": 1.9980408620207109, "grad_norm": 0.2159904106190594, "learning_rate": 2.7533751484508907e-05, "loss": 0.4507, "step": 7139 }, { "epoch": 1.9983207388748951, "grad_norm": 0.2411622800166489, "learning_rate": 2.751997174493269e-05, "loss": 0.483, "step": 7140 }, { "epoch": 1.9986006157290792, "grad_norm": 0.2316522269934337, "learning_rate": 2.750619414501815e-05, "loss": 0.4801, "step": 7141 }, { "epoch": 1.9988804925832633, "grad_norm": 0.24186152949997194, "learning_rate": 2.7492418686076644e-05, "loss": 0.4845, "step": 7142 }, { "epoch": 1.9991603694374476, "grad_norm": 0.23094396636398243, "learning_rate": 2.747864536941932e-05, "loss": 0.4713, "step": 7143 }, { "epoch": 1.9994402462916316, "grad_norm": 0.23730511172168406, "learning_rate": 2.746487419635714e-05, "loss": 0.4823, "step": 7144 }, { "epoch": 1.9997201231458157, "grad_norm": 0.2377301352358611, "learning_rate": 2.745110516820084e-05, "loss": 0.4704, "step": 7145 }, { "epoch": 2.0, "grad_norm": 0.2342240748180366, "learning_rate": 2.743733828626097e-05, "loss": 0.4657, "step": 7146 }, { "epoch": 2.0002798768541843, "grad_norm": 0.23536629569011525, "learning_rate": 2.742357355184788e-05, "loss": 0.4528, "step": 7147 }, { "epoch": 2.000559753708368, "grad_norm": 0.24180568723549165, "learning_rate": 2.7409810966271687e-05, "loss": 0.4775, "step": 7148 }, { "epoch": 2.0008396305625524, "grad_norm": 0.2266540329366163, "learning_rate": 2.7396050530842338e-05, "loss": 0.4431, "step": 7149 }, { "epoch": 2.0011195074167367, "grad_norm": 0.2318668980184379, "learning_rate": 2.7382292246869547e-05, "loss": 0.4576, "step": 7150 }, { "epoch": 2.001399384270921, "grad_norm": 0.23099092697837148, "learning_rate": 2.7368536115662846e-05, "loss": 0.4608, "step": 7151 }, { "epoch": 2.001679261125105, "grad_norm": 0.2289346802474108, "learning_rate": 2.7354782138531536e-05, "loss": 0.4628, "step": 7152 }, { "epoch": 2.001959137979289, "grad_norm": 0.23500463684622683, "learning_rate": 2.7341030316784742e-05, "loss": 0.4529, "step": 7153 }, { "epoch": 2.0022390148334734, "grad_norm": 0.23324874847693192, "learning_rate": 2.732728065173136e-05, "loss": 0.4544, "step": 7154 }, { "epoch": 2.0025188916876573, "grad_norm": 0.24384206284288545, "learning_rate": 2.7313533144680104e-05, "loss": 0.468, "step": 7155 }, { "epoch": 2.0027987685418416, "grad_norm": 0.24620629471749755, "learning_rate": 2.7299787796939456e-05, "loss": 0.4621, "step": 7156 }, { "epoch": 2.003078645396026, "grad_norm": 0.2491240336762495, "learning_rate": 2.7286044609817718e-05, "loss": 0.4462, "step": 7157 }, { "epoch": 2.0033585222502097, "grad_norm": 0.23485142680080093, "learning_rate": 2.727230358462296e-05, "loss": 0.4477, "step": 7158 }, { "epoch": 2.003638399104394, "grad_norm": 0.26540711606896583, "learning_rate": 2.725856472266307e-05, "loss": 0.4732, "step": 7159 }, { "epoch": 2.0039182759585783, "grad_norm": 0.2951729471292445, "learning_rate": 2.7244828025245716e-05, "loss": 0.4654, "step": 7160 }, { "epoch": 2.0041981528127626, "grad_norm": 0.24255185428714476, "learning_rate": 2.7231093493678373e-05, "loss": 0.4441, "step": 7161 }, { "epoch": 2.0044780296669464, "grad_norm": 0.257612487412905, "learning_rate": 2.721736112926829e-05, "loss": 0.4576, "step": 7162 }, { "epoch": 2.0047579065211307, "grad_norm": 0.3496534649361711, "learning_rate": 2.720363093332253e-05, "loss": 0.4636, "step": 7163 }, { "epoch": 2.005037783375315, "grad_norm": 0.281179917706595, "learning_rate": 2.718990290714794e-05, "loss": 0.4383, "step": 7164 }, { "epoch": 2.005317660229499, "grad_norm": 0.28473189327032133, "learning_rate": 2.7176177052051153e-05, "loss": 0.4545, "step": 7165 }, { "epoch": 2.005597537083683, "grad_norm": 0.27743126173059895, "learning_rate": 2.7162453369338614e-05, "loss": 0.4391, "step": 7166 }, { "epoch": 2.0058774139378674, "grad_norm": 0.2429334979008607, "learning_rate": 2.7148731860316546e-05, "loss": 0.4569, "step": 7167 }, { "epoch": 2.0061572907920513, "grad_norm": 0.2475676642145208, "learning_rate": 2.7135012526290972e-05, "loss": 0.4769, "step": 7168 }, { "epoch": 2.0064371676462356, "grad_norm": 0.2565373673679602, "learning_rate": 2.7121295368567702e-05, "loss": 0.4845, "step": 7169 }, { "epoch": 2.00671704450042, "grad_norm": 0.24954284048023126, "learning_rate": 2.7107580388452335e-05, "loss": 0.4566, "step": 7170 }, { "epoch": 2.006996921354604, "grad_norm": 0.25129636868291216, "learning_rate": 2.7093867587250288e-05, "loss": 0.448, "step": 7171 }, { "epoch": 2.007276798208788, "grad_norm": 0.2369952002730348, "learning_rate": 2.7080156966266745e-05, "loss": 0.4562, "step": 7172 }, { "epoch": 2.0075566750629723, "grad_norm": 0.25261368158797665, "learning_rate": 2.7066448526806697e-05, "loss": 0.4723, "step": 7173 }, { "epoch": 2.0078365519171566, "grad_norm": 0.23919265204567164, "learning_rate": 2.7052742270174902e-05, "loss": 0.4399, "step": 7174 }, { "epoch": 2.0081164287713404, "grad_norm": 0.2402531935253443, "learning_rate": 2.703903819767595e-05, "loss": 0.4544, "step": 7175 }, { "epoch": 2.0083963056255247, "grad_norm": 0.24190698892130647, "learning_rate": 2.702533631061419e-05, "loss": 0.4616, "step": 7176 }, { "epoch": 2.008676182479709, "grad_norm": 0.2584528292180475, "learning_rate": 2.701163661029379e-05, "loss": 0.4507, "step": 7177 }, { "epoch": 2.008956059333893, "grad_norm": 0.243630883588506, "learning_rate": 2.6997939098018678e-05, "loss": 0.4453, "step": 7178 }, { "epoch": 2.009235936188077, "grad_norm": 0.24893016839360596, "learning_rate": 2.698424377509259e-05, "loss": 0.4522, "step": 7179 }, { "epoch": 2.0095158130422615, "grad_norm": 0.24682927072592956, "learning_rate": 2.697055064281907e-05, "loss": 0.4692, "step": 7180 }, { "epoch": 2.0097956898964457, "grad_norm": 0.2440123746660851, "learning_rate": 2.6956859702501426e-05, "loss": 0.4402, "step": 7181 }, { "epoch": 2.0100755667506296, "grad_norm": 0.27723488664785007, "learning_rate": 2.6943170955442774e-05, "loss": 0.4522, "step": 7182 }, { "epoch": 2.010355443604814, "grad_norm": 0.23837291179707326, "learning_rate": 2.6929484402946014e-05, "loss": 0.4645, "step": 7183 }, { "epoch": 2.010635320458998, "grad_norm": 0.2267454845076299, "learning_rate": 2.6915800046313848e-05, "loss": 0.4598, "step": 7184 }, { "epoch": 2.010915197313182, "grad_norm": 0.23354531967932965, "learning_rate": 2.6902117886848755e-05, "loss": 0.4606, "step": 7185 }, { "epoch": 2.0111950741673663, "grad_norm": 0.23231625117562205, "learning_rate": 2.6888437925853005e-05, "loss": 0.443, "step": 7186 }, { "epoch": 2.0114749510215506, "grad_norm": 0.2409498444756786, "learning_rate": 2.6874760164628666e-05, "loss": 0.4631, "step": 7187 }, { "epoch": 2.011754827875735, "grad_norm": 0.247697741758576, "learning_rate": 2.6861084604477604e-05, "loss": 0.4608, "step": 7188 }, { "epoch": 2.0120347047299187, "grad_norm": 0.23719228635563858, "learning_rate": 2.684741124670146e-05, "loss": 0.4608, "step": 7189 }, { "epoch": 2.012314581584103, "grad_norm": 0.24805011363637294, "learning_rate": 2.6833740092601673e-05, "loss": 0.4532, "step": 7190 }, { "epoch": 2.0125944584382873, "grad_norm": 0.24152105620775793, "learning_rate": 2.6820071143479468e-05, "loss": 0.4497, "step": 7191 }, { "epoch": 2.012874335292471, "grad_norm": 0.23978609357587777, "learning_rate": 2.680640440063587e-05, "loss": 0.4353, "step": 7192 }, { "epoch": 2.0131542121466555, "grad_norm": 0.23870573736222256, "learning_rate": 2.679273986537168e-05, "loss": 0.4674, "step": 7193 }, { "epoch": 2.0134340890008398, "grad_norm": 0.2458402418123988, "learning_rate": 2.67790775389875e-05, "loss": 0.4521, "step": 7194 }, { "epoch": 2.0137139658550236, "grad_norm": 0.23847230539227382, "learning_rate": 2.676541742278372e-05, "loss": 0.4531, "step": 7195 }, { "epoch": 2.013993842709208, "grad_norm": 0.23306581560973855, "learning_rate": 2.675175951806051e-05, "loss": 0.4339, "step": 7196 }, { "epoch": 2.014273719563392, "grad_norm": 0.25087566501309366, "learning_rate": 2.6738103826117843e-05, "loss": 0.4546, "step": 7197 }, { "epoch": 2.0145535964175765, "grad_norm": 0.24016871065332204, "learning_rate": 2.6724450348255477e-05, "loss": 0.4685, "step": 7198 }, { "epoch": 2.0148334732717603, "grad_norm": 0.23767573246241336, "learning_rate": 2.6710799085772954e-05, "loss": 0.4372, "step": 7199 }, { "epoch": 2.0151133501259446, "grad_norm": 0.24510462731356397, "learning_rate": 2.6697150039969603e-05, "loss": 0.4467, "step": 7200 }, { "epoch": 2.015393226980129, "grad_norm": 0.2546874855617003, "learning_rate": 2.6683503212144563e-05, "loss": 0.4463, "step": 7201 }, { "epoch": 2.0156731038343128, "grad_norm": 0.23618363043472076, "learning_rate": 2.666985860359673e-05, "loss": 0.4515, "step": 7202 }, { "epoch": 2.015952980688497, "grad_norm": 0.2528679248266655, "learning_rate": 2.6656216215624818e-05, "loss": 0.455, "step": 7203 }, { "epoch": 2.0162328575426813, "grad_norm": 0.2460945714151176, "learning_rate": 2.6642576049527313e-05, "loss": 0.4576, "step": 7204 }, { "epoch": 2.016512734396865, "grad_norm": 0.24045379737398392, "learning_rate": 2.6628938106602497e-05, "loss": 0.4547, "step": 7205 }, { "epoch": 2.0167926112510495, "grad_norm": 0.2431107303219301, "learning_rate": 2.6615302388148428e-05, "loss": 0.4724, "step": 7206 }, { "epoch": 2.0170724881052338, "grad_norm": 0.2569350048873161, "learning_rate": 2.6601668895462973e-05, "loss": 0.4745, "step": 7207 }, { "epoch": 2.017352364959418, "grad_norm": 0.2500396671786939, "learning_rate": 2.658803762984376e-05, "loss": 0.4739, "step": 7208 }, { "epoch": 2.017632241813602, "grad_norm": 0.2399155743260989, "learning_rate": 2.6574408592588234e-05, "loss": 0.4272, "step": 7209 }, { "epoch": 2.017912118667786, "grad_norm": 0.25007912665628107, "learning_rate": 2.656078178499361e-05, "loss": 0.4611, "step": 7210 }, { "epoch": 2.0181919955219705, "grad_norm": 0.23622471689458255, "learning_rate": 2.65471572083569e-05, "loss": 0.4584, "step": 7211 }, { "epoch": 2.0184718723761543, "grad_norm": 0.2359639700080925, "learning_rate": 2.6533534863974886e-05, "loss": 0.4767, "step": 7212 }, { "epoch": 2.0187517492303386, "grad_norm": 0.2418095161208412, "learning_rate": 2.6519914753144158e-05, "loss": 0.4571, "step": 7213 }, { "epoch": 2.019031626084523, "grad_norm": 0.25041641986788166, "learning_rate": 2.6506296877161092e-05, "loss": 0.4662, "step": 7214 }, { "epoch": 2.0193115029387068, "grad_norm": 0.23799631436914484, "learning_rate": 2.6492681237321836e-05, "loss": 0.4542, "step": 7215 }, { "epoch": 2.019591379792891, "grad_norm": 0.2357176967359638, "learning_rate": 2.647906783492234e-05, "loss": 0.4636, "step": 7216 }, { "epoch": 2.0198712566470753, "grad_norm": 0.2361599119201025, "learning_rate": 2.6465456671258333e-05, "loss": 0.4482, "step": 7217 }, { "epoch": 2.0201511335012596, "grad_norm": 0.24568280575614648, "learning_rate": 2.645184774762533e-05, "loss": 0.4613, "step": 7218 }, { "epoch": 2.0204310103554435, "grad_norm": 0.23437213348064267, "learning_rate": 2.6438241065318637e-05, "loss": 0.4443, "step": 7219 }, { "epoch": 2.0207108872096278, "grad_norm": 0.23810477968970578, "learning_rate": 2.6424636625633337e-05, "loss": 0.444, "step": 7220 }, { "epoch": 2.020990764063812, "grad_norm": 0.24182078537158005, "learning_rate": 2.6411034429864347e-05, "loss": 0.4608, "step": 7221 }, { "epoch": 2.021270640917996, "grad_norm": 0.5722824858401844, "learning_rate": 2.6397434479306294e-05, "loss": 0.4622, "step": 7222 }, { "epoch": 2.02155051777218, "grad_norm": 0.24109465311373007, "learning_rate": 2.638383677525363e-05, "loss": 0.4527, "step": 7223 }, { "epoch": 2.0218303946263645, "grad_norm": 0.23539187624819047, "learning_rate": 2.63702413190006e-05, "loss": 0.4535, "step": 7224 }, { "epoch": 2.022110271480549, "grad_norm": 0.23084401150477463, "learning_rate": 2.635664811184123e-05, "loss": 0.4645, "step": 7225 }, { "epoch": 2.0223901483347326, "grad_norm": 0.24562055324409324, "learning_rate": 2.6343057155069328e-05, "loss": 0.4832, "step": 7226 }, { "epoch": 2.022670025188917, "grad_norm": 0.2333027891352052, "learning_rate": 2.632946844997849e-05, "loss": 0.4489, "step": 7227 }, { "epoch": 2.022949902043101, "grad_norm": 0.2432113946363506, "learning_rate": 2.6315881997862086e-05, "loss": 0.4549, "step": 7228 }, { "epoch": 2.023229778897285, "grad_norm": 0.23477990070633872, "learning_rate": 2.6302297800013297e-05, "loss": 0.4668, "step": 7229 }, { "epoch": 2.0235096557514693, "grad_norm": 0.2329525268170207, "learning_rate": 2.6288715857725067e-05, "loss": 0.463, "step": 7230 }, { "epoch": 2.0237895326056536, "grad_norm": 0.23645176432539472, "learning_rate": 2.6275136172290127e-05, "loss": 0.4549, "step": 7231 }, { "epoch": 2.0240694094598375, "grad_norm": 0.2256120580521212, "learning_rate": 2.626155874500101e-05, "loss": 0.4532, "step": 7232 }, { "epoch": 2.024349286314022, "grad_norm": 0.24494509486609295, "learning_rate": 2.6247983577150016e-05, "loss": 0.4415, "step": 7233 }, { "epoch": 2.024629163168206, "grad_norm": 0.25558526117028796, "learning_rate": 2.6234410670029243e-05, "loss": 0.4493, "step": 7234 }, { "epoch": 2.0249090400223904, "grad_norm": 0.25373525330697044, "learning_rate": 2.622084002493056e-05, "loss": 0.4666, "step": 7235 }, { "epoch": 2.025188916876574, "grad_norm": 0.24092647473756687, "learning_rate": 2.6207271643145635e-05, "loss": 0.463, "step": 7236 }, { "epoch": 2.0254687937307585, "grad_norm": 0.2398350079466824, "learning_rate": 2.619370552596592e-05, "loss": 0.4568, "step": 7237 }, { "epoch": 2.025748670584943, "grad_norm": 0.2402813065093519, "learning_rate": 2.6180141674682612e-05, "loss": 0.454, "step": 7238 }, { "epoch": 2.0260285474391266, "grad_norm": 0.24515891879724147, "learning_rate": 2.616658009058679e-05, "loss": 0.4659, "step": 7239 }, { "epoch": 2.026308424293311, "grad_norm": 0.25237161543010617, "learning_rate": 2.61530207749692e-05, "loss": 0.4639, "step": 7240 }, { "epoch": 2.026588301147495, "grad_norm": 0.2428784920326867, "learning_rate": 2.613946372912044e-05, "loss": 0.4443, "step": 7241 }, { "epoch": 2.026868178001679, "grad_norm": 0.24933667845887866, "learning_rate": 2.6125908954330868e-05, "loss": 0.4591, "step": 7242 }, { "epoch": 2.0271480548558634, "grad_norm": 0.2343228016709955, "learning_rate": 2.611235645189065e-05, "loss": 0.4512, "step": 7243 }, { "epoch": 2.0274279317100476, "grad_norm": 0.2387259647127503, "learning_rate": 2.6098806223089723e-05, "loss": 0.4852, "step": 7244 }, { "epoch": 2.027707808564232, "grad_norm": 0.23711731697612165, "learning_rate": 2.6085258269217795e-05, "loss": 0.4327, "step": 7245 }, { "epoch": 2.027987685418416, "grad_norm": 0.23941383218364568, "learning_rate": 2.6071712591564367e-05, "loss": 0.4418, "step": 7246 }, { "epoch": 2.0282675622726, "grad_norm": 0.24611736084243424, "learning_rate": 2.6058169191418725e-05, "loss": 0.4651, "step": 7247 }, { "epoch": 2.0285474391267844, "grad_norm": 0.2393564650461905, "learning_rate": 2.6044628070069945e-05, "loss": 0.4428, "step": 7248 }, { "epoch": 2.028827315980968, "grad_norm": 0.24443034810960743, "learning_rate": 2.603108922880687e-05, "loss": 0.439, "step": 7249 }, { "epoch": 2.0291071928351525, "grad_norm": 0.24455955218091963, "learning_rate": 2.6017552668918143e-05, "loss": 0.4614, "step": 7250 }, { "epoch": 2.029387069689337, "grad_norm": 0.23920551494992115, "learning_rate": 2.6004018391692175e-05, "loss": 0.4391, "step": 7251 }, { "epoch": 2.0296669465435206, "grad_norm": 0.23962490949893656, "learning_rate": 2.599048639841717e-05, "loss": 0.4405, "step": 7252 }, { "epoch": 2.029946823397705, "grad_norm": 0.2461391762251693, "learning_rate": 2.59769566903811e-05, "loss": 0.4657, "step": 7253 }, { "epoch": 2.0302267002518892, "grad_norm": 0.24859487862695334, "learning_rate": 2.5963429268871743e-05, "loss": 0.4527, "step": 7254 }, { "epoch": 2.0305065771060735, "grad_norm": 0.23903524348802838, "learning_rate": 2.5949904135176624e-05, "loss": 0.4395, "step": 7255 }, { "epoch": 2.0307864539602574, "grad_norm": 0.2380227664798421, "learning_rate": 2.5936381290583112e-05, "loss": 0.4546, "step": 7256 }, { "epoch": 2.0310663308144417, "grad_norm": 0.2474716956010892, "learning_rate": 2.5922860736378314e-05, "loss": 0.4415, "step": 7257 }, { "epoch": 2.031346207668626, "grad_norm": 0.23970092978536683, "learning_rate": 2.5909342473849087e-05, "loss": 0.4496, "step": 7258 }, { "epoch": 2.03162608452281, "grad_norm": 0.2493147804591437, "learning_rate": 2.5895826504282127e-05, "loss": 0.4514, "step": 7259 }, { "epoch": 2.031905961376994, "grad_norm": 0.26862492448438524, "learning_rate": 2.5882312828963895e-05, "loss": 0.4567, "step": 7260 }, { "epoch": 2.0321858382311784, "grad_norm": 0.2475580247703966, "learning_rate": 2.5868801449180625e-05, "loss": 0.4424, "step": 7261 }, { "epoch": 2.0324657150853622, "grad_norm": 0.24809876655430108, "learning_rate": 2.585529236621834e-05, "loss": 0.4612, "step": 7262 }, { "epoch": 2.0327455919395465, "grad_norm": 0.24451421788854538, "learning_rate": 2.584178558136285e-05, "loss": 0.4426, "step": 7263 }, { "epoch": 2.033025468793731, "grad_norm": 0.23848747788817679, "learning_rate": 2.582828109589972e-05, "loss": 0.4627, "step": 7264 }, { "epoch": 2.033305345647915, "grad_norm": 0.2349102396252516, "learning_rate": 2.581477891111433e-05, "loss": 0.4589, "step": 7265 }, { "epoch": 2.033585222502099, "grad_norm": 0.24949051830562546, "learning_rate": 2.580127902829182e-05, "loss": 0.4594, "step": 7266 }, { "epoch": 2.0338650993562832, "grad_norm": 0.24670318721825205, "learning_rate": 2.5787781448717112e-05, "loss": 0.4484, "step": 7267 }, { "epoch": 2.0341449762104675, "grad_norm": 0.25083849647804124, "learning_rate": 2.577428617367492e-05, "loss": 0.469, "step": 7268 }, { "epoch": 2.0344248530646514, "grad_norm": 0.24612589767460558, "learning_rate": 2.5760793204449735e-05, "loss": 0.4629, "step": 7269 }, { "epoch": 2.0347047299188357, "grad_norm": 0.24557294540946742, "learning_rate": 2.5747302542325813e-05, "loss": 0.4792, "step": 7270 }, { "epoch": 2.03498460677302, "grad_norm": 0.25051018400826347, "learning_rate": 2.5733814188587213e-05, "loss": 0.4522, "step": 7271 }, { "epoch": 2.0352644836272042, "grad_norm": 0.24763663644100886, "learning_rate": 2.5720328144517748e-05, "loss": 0.4527, "step": 7272 }, { "epoch": 2.035544360481388, "grad_norm": 0.25407321940786803, "learning_rate": 2.570684441140105e-05, "loss": 0.4803, "step": 7273 }, { "epoch": 2.0358242373355724, "grad_norm": 0.2562832117551291, "learning_rate": 2.5693362990520498e-05, "loss": 0.4822, "step": 7274 }, { "epoch": 2.0361041141897567, "grad_norm": 0.24653770990649573, "learning_rate": 2.5679883883159283e-05, "loss": 0.4829, "step": 7275 }, { "epoch": 2.0363839910439405, "grad_norm": 0.25026215764599047, "learning_rate": 2.566640709060032e-05, "loss": 0.4713, "step": 7276 }, { "epoch": 2.036663867898125, "grad_norm": 0.23154260517396444, "learning_rate": 2.5652932614126345e-05, "loss": 0.4283, "step": 7277 }, { "epoch": 2.036943744752309, "grad_norm": 0.23802200814652452, "learning_rate": 2.563946045501987e-05, "loss": 0.4712, "step": 7278 }, { "epoch": 2.037223621606493, "grad_norm": 0.24291863177250364, "learning_rate": 2.5625990614563184e-05, "loss": 0.4654, "step": 7279 }, { "epoch": 2.0375034984606772, "grad_norm": 0.24164486995545642, "learning_rate": 2.5612523094038355e-05, "loss": 0.4478, "step": 7280 }, { "epoch": 2.0377833753148615, "grad_norm": 0.25548731208347847, "learning_rate": 2.559905789472723e-05, "loss": 0.4592, "step": 7281 }, { "epoch": 2.038063252169046, "grad_norm": 0.25126792292383143, "learning_rate": 2.558559501791143e-05, "loss": 0.4479, "step": 7282 }, { "epoch": 2.0383431290232297, "grad_norm": 0.25616064956120344, "learning_rate": 2.5572134464872364e-05, "loss": 0.4389, "step": 7283 }, { "epoch": 2.038623005877414, "grad_norm": 0.2495229580549435, "learning_rate": 2.555867623689121e-05, "loss": 0.451, "step": 7284 }, { "epoch": 2.0389028827315983, "grad_norm": 0.24825812008906087, "learning_rate": 2.554522033524893e-05, "loss": 0.4718, "step": 7285 }, { "epoch": 2.039182759585782, "grad_norm": 0.22993352633384567, "learning_rate": 2.5531766761226272e-05, "loss": 0.4377, "step": 7286 }, { "epoch": 2.0394626364399664, "grad_norm": 0.23348585502524938, "learning_rate": 2.5518315516103748e-05, "loss": 0.4505, "step": 7287 }, { "epoch": 2.0397425132941507, "grad_norm": 0.2505855142977987, "learning_rate": 2.5504866601161652e-05, "loss": 0.4748, "step": 7288 }, { "epoch": 2.0400223901483345, "grad_norm": 0.24675514469301155, "learning_rate": 2.5491420017680047e-05, "loss": 0.4576, "step": 7289 }, { "epoch": 2.040302267002519, "grad_norm": 0.24383144668329754, "learning_rate": 2.5477975766938824e-05, "loss": 0.4498, "step": 7290 }, { "epoch": 2.040582143856703, "grad_norm": 0.24710370847883345, "learning_rate": 2.546453385021759e-05, "loss": 0.465, "step": 7291 }, { "epoch": 2.0408620207108874, "grad_norm": 0.2500316025046973, "learning_rate": 2.545109426879576e-05, "loss": 0.4503, "step": 7292 }, { "epoch": 2.0411418975650713, "grad_norm": 0.23595454209093394, "learning_rate": 2.543765702395253e-05, "loss": 0.4468, "step": 7293 }, { "epoch": 2.0414217744192555, "grad_norm": 0.2506616203082804, "learning_rate": 2.5424222116966844e-05, "loss": 0.4563, "step": 7294 }, { "epoch": 2.04170165127344, "grad_norm": 0.24512988782659414, "learning_rate": 2.5410789549117447e-05, "loss": 0.4497, "step": 7295 }, { "epoch": 2.0419815281276237, "grad_norm": 0.2442620989671407, "learning_rate": 2.539735932168287e-05, "loss": 0.4463, "step": 7296 }, { "epoch": 2.042261404981808, "grad_norm": 0.2362726934455694, "learning_rate": 2.5383931435941394e-05, "loss": 0.4595, "step": 7297 }, { "epoch": 2.0425412818359923, "grad_norm": 0.2446448559543068, "learning_rate": 2.5370505893171104e-05, "loss": 0.4667, "step": 7298 }, { "epoch": 2.042821158690176, "grad_norm": 0.2424844078232981, "learning_rate": 2.5357082694649852e-05, "loss": 0.4636, "step": 7299 }, { "epoch": 2.0431010355443604, "grad_norm": 0.24831067498702217, "learning_rate": 2.5343661841655263e-05, "loss": 0.4685, "step": 7300 }, { "epoch": 2.0433809123985447, "grad_norm": 0.2473059494485125, "learning_rate": 2.5330243335464737e-05, "loss": 0.4638, "step": 7301 }, { "epoch": 2.043660789252729, "grad_norm": 0.24366057860506135, "learning_rate": 2.5316827177355464e-05, "loss": 0.4491, "step": 7302 }, { "epoch": 2.043940666106913, "grad_norm": 0.2504327054990302, "learning_rate": 2.530341336860439e-05, "loss": 0.4546, "step": 7303 }, { "epoch": 2.044220542961097, "grad_norm": 0.24348921913908614, "learning_rate": 2.5290001910488257e-05, "loss": 0.4725, "step": 7304 }, { "epoch": 2.0445004198152814, "grad_norm": 0.2495963566684416, "learning_rate": 2.5276592804283573e-05, "loss": 0.4689, "step": 7305 }, { "epoch": 2.0447802966694653, "grad_norm": 0.2546161261898743, "learning_rate": 2.526318605126663e-05, "loss": 0.4632, "step": 7306 }, { "epoch": 2.0450601735236495, "grad_norm": 0.24076784172964433, "learning_rate": 2.5249781652713457e-05, "loss": 0.4594, "step": 7307 }, { "epoch": 2.045340050377834, "grad_norm": 0.24357188961456142, "learning_rate": 2.523637960989994e-05, "loss": 0.4579, "step": 7308 }, { "epoch": 2.0456199272320177, "grad_norm": 0.23903357537864758, "learning_rate": 2.5222979924101675e-05, "loss": 0.4742, "step": 7309 }, { "epoch": 2.045899804086202, "grad_norm": 0.24550663594381486, "learning_rate": 2.520958259659405e-05, "loss": 0.4608, "step": 7310 }, { "epoch": 2.0461796809403863, "grad_norm": 0.2583528489198306, "learning_rate": 2.5196187628652247e-05, "loss": 0.448, "step": 7311 }, { "epoch": 2.0464595577945706, "grad_norm": 0.24482717544201815, "learning_rate": 2.5182795021551163e-05, "loss": 0.4677, "step": 7312 }, { "epoch": 2.0467394346487544, "grad_norm": 0.25048101638936504, "learning_rate": 2.5169404776565553e-05, "loss": 0.465, "step": 7313 }, { "epoch": 2.0470193115029387, "grad_norm": 0.24573404057412682, "learning_rate": 2.5156016894969887e-05, "loss": 0.4634, "step": 7314 }, { "epoch": 2.047299188357123, "grad_norm": 0.25471144676227236, "learning_rate": 2.5142631378038438e-05, "loss": 0.4767, "step": 7315 }, { "epoch": 2.047579065211307, "grad_norm": 0.24112860647988485, "learning_rate": 2.5129248227045248e-05, "loss": 0.457, "step": 7316 }, { "epoch": 2.047858942065491, "grad_norm": 0.24418834987738441, "learning_rate": 2.5115867443264136e-05, "loss": 0.4351, "step": 7317 }, { "epoch": 2.0481388189196754, "grad_norm": 0.2370929090253794, "learning_rate": 2.510248902796869e-05, "loss": 0.456, "step": 7318 }, { "epoch": 2.0484186957738597, "grad_norm": 0.2379488471734615, "learning_rate": 2.5089112982432268e-05, "loss": 0.4582, "step": 7319 }, { "epoch": 2.0486985726280436, "grad_norm": 0.24573479537604365, "learning_rate": 2.5075739307928014e-05, "loss": 0.4739, "step": 7320 }, { "epoch": 2.048978449482228, "grad_norm": 0.25121647950701465, "learning_rate": 2.5062368005728855e-05, "loss": 0.4604, "step": 7321 }, { "epoch": 2.049258326336412, "grad_norm": 0.246351919919773, "learning_rate": 2.504899907710746e-05, "loss": 0.4605, "step": 7322 }, { "epoch": 2.049538203190596, "grad_norm": 0.24631304878976065, "learning_rate": 2.5035632523336293e-05, "loss": 0.4559, "step": 7323 }, { "epoch": 2.0498180800447803, "grad_norm": 0.24343820253286605, "learning_rate": 2.502226834568758e-05, "loss": 0.4606, "step": 7324 }, { "epoch": 2.0500979568989646, "grad_norm": 0.2654916928538729, "learning_rate": 2.5008906545433375e-05, "loss": 0.4518, "step": 7325 }, { "epoch": 2.0503778337531484, "grad_norm": 0.24908078981837442, "learning_rate": 2.4995547123845426e-05, "loss": 0.4553, "step": 7326 }, { "epoch": 2.0506577106073327, "grad_norm": 0.2484741919983697, "learning_rate": 2.4982190082195293e-05, "loss": 0.4649, "step": 7327 }, { "epoch": 2.050937587461517, "grad_norm": 0.2534526358928263, "learning_rate": 2.4968835421754316e-05, "loss": 0.4807, "step": 7328 }, { "epoch": 2.0512174643157013, "grad_norm": 0.24992970447352875, "learning_rate": 2.4955483143793613e-05, "loss": 0.4661, "step": 7329 }, { "epoch": 2.051497341169885, "grad_norm": 0.2326036789805862, "learning_rate": 2.494213324958402e-05, "loss": 0.4676, "step": 7330 }, { "epoch": 2.0517772180240694, "grad_norm": 0.24315149972019612, "learning_rate": 2.4928785740396215e-05, "loss": 0.4444, "step": 7331 }, { "epoch": 2.0520570948782537, "grad_norm": 0.2363183917864702, "learning_rate": 2.4915440617500613e-05, "loss": 0.4741, "step": 7332 }, { "epoch": 2.0523369717324376, "grad_norm": 0.24517539840395006, "learning_rate": 2.4902097882167415e-05, "loss": 0.4461, "step": 7333 }, { "epoch": 2.052616848586622, "grad_norm": 0.240754716123818, "learning_rate": 2.488875753566659e-05, "loss": 0.4702, "step": 7334 }, { "epoch": 2.052896725440806, "grad_norm": 0.24546696196290843, "learning_rate": 2.4875419579267873e-05, "loss": 0.4443, "step": 7335 }, { "epoch": 2.05317660229499, "grad_norm": 0.24602789936627345, "learning_rate": 2.486208401424079e-05, "loss": 0.458, "step": 7336 }, { "epoch": 2.0534564791491743, "grad_norm": 0.2549588748124507, "learning_rate": 2.4848750841854616e-05, "loss": 0.4526, "step": 7337 }, { "epoch": 2.0537363560033586, "grad_norm": 0.24879893937875075, "learning_rate": 2.4835420063378418e-05, "loss": 0.479, "step": 7338 }, { "epoch": 2.054016232857543, "grad_norm": 0.24536255209201727, "learning_rate": 2.4822091680081018e-05, "loss": 0.4428, "step": 7339 }, { "epoch": 2.0542961097117267, "grad_norm": 0.2400429868181301, "learning_rate": 2.480876569323103e-05, "loss": 0.4697, "step": 7340 }, { "epoch": 2.054575986565911, "grad_norm": 0.24794095141216924, "learning_rate": 2.47954421040968e-05, "loss": 0.4669, "step": 7341 }, { "epoch": 2.0548558634200953, "grad_norm": 0.24993591672310664, "learning_rate": 2.4782120913946523e-05, "loss": 0.4416, "step": 7342 }, { "epoch": 2.055135740274279, "grad_norm": 0.24633001747357577, "learning_rate": 2.4768802124048085e-05, "loss": 0.4632, "step": 7343 }, { "epoch": 2.0554156171284634, "grad_norm": 0.2444112442902117, "learning_rate": 2.4755485735669188e-05, "loss": 0.4499, "step": 7344 }, { "epoch": 2.0556954939826477, "grad_norm": 0.24607674096120183, "learning_rate": 2.4742171750077286e-05, "loss": 0.4681, "step": 7345 }, { "epoch": 2.0559753708368316, "grad_norm": 0.2650090843559069, "learning_rate": 2.4728860168539618e-05, "loss": 0.4784, "step": 7346 }, { "epoch": 2.056255247691016, "grad_norm": 0.24861802624393378, "learning_rate": 2.47155509923232e-05, "loss": 0.4575, "step": 7347 }, { "epoch": 2.0565351245452, "grad_norm": 0.24588781256107936, "learning_rate": 2.4702244222694776e-05, "loss": 0.4609, "step": 7348 }, { "epoch": 2.0568150013993844, "grad_norm": 0.24159597154506285, "learning_rate": 2.46889398609209e-05, "loss": 0.437, "step": 7349 }, { "epoch": 2.0570948782535683, "grad_norm": 0.2471740579027645, "learning_rate": 2.4675637908267903e-05, "loss": 0.4278, "step": 7350 }, { "epoch": 2.0573747551077526, "grad_norm": 0.2409864273656536, "learning_rate": 2.466233836600186e-05, "loss": 0.4491, "step": 7351 }, { "epoch": 2.057654631961937, "grad_norm": 0.24855932017320295, "learning_rate": 2.4649041235388633e-05, "loss": 0.4622, "step": 7352 }, { "epoch": 2.0579345088161207, "grad_norm": 0.24838985966907967, "learning_rate": 2.4635746517693853e-05, "loss": 0.482, "step": 7353 }, { "epoch": 2.058214385670305, "grad_norm": 0.24954842637061003, "learning_rate": 2.4622454214182917e-05, "loss": 0.4653, "step": 7354 }, { "epoch": 2.0584942625244893, "grad_norm": 0.23712606161658334, "learning_rate": 2.4609164326120986e-05, "loss": 0.4565, "step": 7355 }, { "epoch": 2.0587741393786736, "grad_norm": 0.2423155389090088, "learning_rate": 2.4595876854773016e-05, "loss": 0.4532, "step": 7356 }, { "epoch": 2.0590540162328574, "grad_norm": 0.2375487504413494, "learning_rate": 2.4582591801403705e-05, "loss": 0.4621, "step": 7357 }, { "epoch": 2.0593338930870417, "grad_norm": 0.24704218164447447, "learning_rate": 2.4569309167277528e-05, "loss": 0.4706, "step": 7358 }, { "epoch": 2.059613769941226, "grad_norm": 0.26403400576395336, "learning_rate": 2.4556028953658722e-05, "loss": 0.4841, "step": 7359 }, { "epoch": 2.05989364679541, "grad_norm": 0.24067560034869967, "learning_rate": 2.454275116181134e-05, "loss": 0.4509, "step": 7360 }, { "epoch": 2.060173523649594, "grad_norm": 0.2390827251944361, "learning_rate": 2.452947579299915e-05, "loss": 0.4659, "step": 7361 }, { "epoch": 2.0604534005037785, "grad_norm": 0.2404377544195593, "learning_rate": 2.4516202848485713e-05, "loss": 0.4399, "step": 7362 }, { "epoch": 2.0607332773579623, "grad_norm": 0.24907212792594013, "learning_rate": 2.4502932329534356e-05, "loss": 0.4732, "step": 7363 }, { "epoch": 2.0610131542121466, "grad_norm": 0.23203863169907962, "learning_rate": 2.4489664237408165e-05, "loss": 0.4568, "step": 7364 }, { "epoch": 2.061293031066331, "grad_norm": 0.2501366444762416, "learning_rate": 2.4476398573370035e-05, "loss": 0.4648, "step": 7365 }, { "epoch": 2.061572907920515, "grad_norm": 0.2525304994937337, "learning_rate": 2.446313533868255e-05, "loss": 0.4629, "step": 7366 }, { "epoch": 2.061852784774699, "grad_norm": 0.23931456218056962, "learning_rate": 2.444987453460814e-05, "loss": 0.4478, "step": 7367 }, { "epoch": 2.0621326616288833, "grad_norm": 0.24774046523785284, "learning_rate": 2.4436616162408975e-05, "loss": 0.4497, "step": 7368 }, { "epoch": 2.0624125384830676, "grad_norm": 0.24952988350952923, "learning_rate": 2.442336022334699e-05, "loss": 0.4616, "step": 7369 }, { "epoch": 2.0626924153372515, "grad_norm": 0.23974477546908268, "learning_rate": 2.4410106718683896e-05, "loss": 0.4442, "step": 7370 }, { "epoch": 2.0629722921914357, "grad_norm": 0.2497246097093398, "learning_rate": 2.4396855649681166e-05, "loss": 0.4425, "step": 7371 }, { "epoch": 2.06325216904562, "grad_norm": 0.24174250745367293, "learning_rate": 2.4383607017600048e-05, "loss": 0.4454, "step": 7372 }, { "epoch": 2.063532045899804, "grad_norm": 0.2537840961291248, "learning_rate": 2.437036082370155e-05, "loss": 0.4565, "step": 7373 }, { "epoch": 2.063811922753988, "grad_norm": 0.25333088689501954, "learning_rate": 2.4357117069246455e-05, "loss": 0.4557, "step": 7374 }, { "epoch": 2.0640917996081725, "grad_norm": 0.24056535254910136, "learning_rate": 2.434387575549531e-05, "loss": 0.4525, "step": 7375 }, { "epoch": 2.0643716764623568, "grad_norm": 0.24208215006306735, "learning_rate": 2.4330636883708412e-05, "loss": 0.4392, "step": 7376 }, { "epoch": 2.0646515533165406, "grad_norm": 0.2620641537026158, "learning_rate": 2.4317400455145882e-05, "loss": 0.4637, "step": 7377 }, { "epoch": 2.064931430170725, "grad_norm": 0.23863057542189706, "learning_rate": 2.430416647106756e-05, "loss": 0.4502, "step": 7378 }, { "epoch": 2.065211307024909, "grad_norm": 0.24783973345151425, "learning_rate": 2.4290934932733045e-05, "loss": 0.4775, "step": 7379 }, { "epoch": 2.065491183879093, "grad_norm": 0.24599111696734668, "learning_rate": 2.4277705841401737e-05, "loss": 0.4692, "step": 7380 }, { "epoch": 2.0657710607332773, "grad_norm": 0.24491120001851938, "learning_rate": 2.4264479198332785e-05, "loss": 0.4358, "step": 7381 }, { "epoch": 2.0660509375874616, "grad_norm": 0.24505219777758316, "learning_rate": 2.4251255004785106e-05, "loss": 0.4551, "step": 7382 }, { "epoch": 2.0663308144416455, "grad_norm": 0.23700138605356436, "learning_rate": 2.4238033262017405e-05, "loss": 0.4586, "step": 7383 }, { "epoch": 2.0666106912958297, "grad_norm": 0.2461152083478605, "learning_rate": 2.42248139712881e-05, "loss": 0.4578, "step": 7384 }, { "epoch": 2.066890568150014, "grad_norm": 0.24478452390468058, "learning_rate": 2.421159713385543e-05, "loss": 0.4479, "step": 7385 }, { "epoch": 2.0671704450041983, "grad_norm": 0.24983949345962733, "learning_rate": 2.4198382750977384e-05, "loss": 0.4407, "step": 7386 }, { "epoch": 2.067450321858382, "grad_norm": 0.2441473963548969, "learning_rate": 2.41851708239117e-05, "loss": 0.4665, "step": 7387 }, { "epoch": 2.0677301987125665, "grad_norm": 0.23834976405597819, "learning_rate": 2.417196135391591e-05, "loss": 0.4532, "step": 7388 }, { "epoch": 2.0680100755667508, "grad_norm": 0.3029073235707665, "learning_rate": 2.41587543422473e-05, "loss": 0.4705, "step": 7389 }, { "epoch": 2.0682899524209346, "grad_norm": 0.26545078973583597, "learning_rate": 2.4145549790162906e-05, "loss": 0.473, "step": 7390 }, { "epoch": 2.068569829275119, "grad_norm": 0.23377478818315944, "learning_rate": 2.413234769891956e-05, "loss": 0.4174, "step": 7391 }, { "epoch": 2.068849706129303, "grad_norm": 0.24501899558071738, "learning_rate": 2.411914806977384e-05, "loss": 0.455, "step": 7392 }, { "epoch": 2.0691295829834875, "grad_norm": 0.2509900879447519, "learning_rate": 2.410595090398207e-05, "loss": 0.4631, "step": 7393 }, { "epoch": 2.0694094598376713, "grad_norm": 0.24425155333154205, "learning_rate": 2.409275620280041e-05, "loss": 0.4459, "step": 7394 }, { "epoch": 2.0696893366918556, "grad_norm": 0.24231387445788757, "learning_rate": 2.4079563967484713e-05, "loss": 0.444, "step": 7395 }, { "epoch": 2.06996921354604, "grad_norm": 0.24994496182304815, "learning_rate": 2.4066374199290626e-05, "loss": 0.4568, "step": 7396 }, { "epoch": 2.0702490904002238, "grad_norm": 0.23904145426295392, "learning_rate": 2.4053186899473557e-05, "loss": 0.4761, "step": 7397 }, { "epoch": 2.070528967254408, "grad_norm": 0.2544856406150435, "learning_rate": 2.4040002069288687e-05, "loss": 0.4529, "step": 7398 }, { "epoch": 2.0708088441085923, "grad_norm": 0.238774996920386, "learning_rate": 2.4026819709990945e-05, "loss": 0.4433, "step": 7399 }, { "epoch": 2.071088720962776, "grad_norm": 0.24918531664750443, "learning_rate": 2.4013639822835042e-05, "loss": 0.4653, "step": 7400 }, { "epoch": 2.0713685978169605, "grad_norm": 0.24042479136433584, "learning_rate": 2.4000462409075447e-05, "loss": 0.4439, "step": 7401 }, { "epoch": 2.0716484746711448, "grad_norm": 0.24644526224999894, "learning_rate": 2.3987287469966413e-05, "loss": 0.4543, "step": 7402 }, { "epoch": 2.071928351525329, "grad_norm": 0.24734302322857166, "learning_rate": 2.3974115006761894e-05, "loss": 0.4411, "step": 7403 }, { "epoch": 2.072208228379513, "grad_norm": 0.24448473335472756, "learning_rate": 2.396094502071568e-05, "loss": 0.4591, "step": 7404 }, { "epoch": 2.072488105233697, "grad_norm": 0.2465905535816699, "learning_rate": 2.3947777513081292e-05, "loss": 0.4421, "step": 7405 }, { "epoch": 2.0727679820878815, "grad_norm": 0.24346649080667676, "learning_rate": 2.3934612485112024e-05, "loss": 0.4449, "step": 7406 }, { "epoch": 2.0730478589420653, "grad_norm": 0.24604002050814763, "learning_rate": 2.3921449938060924e-05, "loss": 0.4467, "step": 7407 }, { "epoch": 2.0733277357962496, "grad_norm": 0.24562926902681928, "learning_rate": 2.3908289873180823e-05, "loss": 0.4599, "step": 7408 }, { "epoch": 2.073607612650434, "grad_norm": 0.24589573249544378, "learning_rate": 2.389513229172429e-05, "loss": 0.4539, "step": 7409 }, { "epoch": 2.0738874895046178, "grad_norm": 0.24678813561369586, "learning_rate": 2.3881977194943677e-05, "loss": 0.4579, "step": 7410 }, { "epoch": 2.074167366358802, "grad_norm": 0.2662776923644153, "learning_rate": 2.386882458409108e-05, "loss": 0.4671, "step": 7411 }, { "epoch": 2.0744472432129863, "grad_norm": 0.25978459638648005, "learning_rate": 2.3855674460418404e-05, "loss": 0.4596, "step": 7412 }, { "epoch": 2.0747271200671706, "grad_norm": 0.23503045811023623, "learning_rate": 2.384252682517726e-05, "loss": 0.4432, "step": 7413 }, { "epoch": 2.0750069969213545, "grad_norm": 0.23601056429318246, "learning_rate": 2.3829381679619058e-05, "loss": 0.4394, "step": 7414 }, { "epoch": 2.0752868737755388, "grad_norm": 0.24795182253458156, "learning_rate": 2.3816239024994957e-05, "loss": 0.4549, "step": 7415 }, { "epoch": 2.075566750629723, "grad_norm": 0.2425400511452179, "learning_rate": 2.3803098862555877e-05, "loss": 0.439, "step": 7416 }, { "epoch": 2.075846627483907, "grad_norm": 0.2573212425961692, "learning_rate": 2.378996119355251e-05, "loss": 0.4557, "step": 7417 }, { "epoch": 2.076126504338091, "grad_norm": 0.25253090866307665, "learning_rate": 2.3776826019235315e-05, "loss": 0.4568, "step": 7418 }, { "epoch": 2.0764063811922755, "grad_norm": 0.25066486315121606, "learning_rate": 2.3763693340854493e-05, "loss": 0.4416, "step": 7419 }, { "epoch": 2.0766862580464593, "grad_norm": 0.24113915131918504, "learning_rate": 2.3750563159660044e-05, "loss": 0.4516, "step": 7420 }, { "epoch": 2.0769661349006436, "grad_norm": 0.24712480325430675, "learning_rate": 2.3737435476901663e-05, "loss": 0.4485, "step": 7421 }, { "epoch": 2.077246011754828, "grad_norm": 0.2312072937319624, "learning_rate": 2.372431029382888e-05, "loss": 0.4344, "step": 7422 }, { "epoch": 2.077525888609012, "grad_norm": 0.24605372198296094, "learning_rate": 2.3711187611690944e-05, "loss": 0.4625, "step": 7423 }, { "epoch": 2.077805765463196, "grad_norm": 0.23906096090817333, "learning_rate": 2.3698067431736887e-05, "loss": 0.4799, "step": 7424 }, { "epoch": 2.0780856423173804, "grad_norm": 0.2489263103624333, "learning_rate": 2.3684949755215492e-05, "loss": 0.4669, "step": 7425 }, { "epoch": 2.0783655191715646, "grad_norm": 0.25040341531072063, "learning_rate": 2.3671834583375313e-05, "loss": 0.4677, "step": 7426 }, { "epoch": 2.0786453960257485, "grad_norm": 0.2490478099157188, "learning_rate": 2.365872191746465e-05, "loss": 0.4458, "step": 7427 }, { "epoch": 2.078925272879933, "grad_norm": 0.2433664703471242, "learning_rate": 2.364561175873156e-05, "loss": 0.4501, "step": 7428 }, { "epoch": 2.079205149734117, "grad_norm": 0.24597492205580676, "learning_rate": 2.363250410842392e-05, "loss": 0.4705, "step": 7429 }, { "epoch": 2.0794850265883014, "grad_norm": 0.24294513003802817, "learning_rate": 2.3619398967789292e-05, "loss": 0.4438, "step": 7430 }, { "epoch": 2.079764903442485, "grad_norm": 0.24488962710679907, "learning_rate": 2.3606296338075034e-05, "loss": 0.4556, "step": 7431 }, { "epoch": 2.0800447802966695, "grad_norm": 0.257853787991794, "learning_rate": 2.359319622052827e-05, "loss": 0.457, "step": 7432 }, { "epoch": 2.080324657150854, "grad_norm": 0.2494678467193366, "learning_rate": 2.3580098616395863e-05, "loss": 0.4782, "step": 7433 }, { "epoch": 2.0806045340050376, "grad_norm": 0.2369808762035699, "learning_rate": 2.3567003526924463e-05, "loss": 0.4293, "step": 7434 }, { "epoch": 2.080884410859222, "grad_norm": 0.23811384961661042, "learning_rate": 2.355391095336046e-05, "loss": 0.4509, "step": 7435 }, { "epoch": 2.0811642877134062, "grad_norm": 0.24552576073462928, "learning_rate": 2.3540820896950016e-05, "loss": 0.4626, "step": 7436 }, { "epoch": 2.08144416456759, "grad_norm": 0.24413988388687807, "learning_rate": 2.3527733358939046e-05, "loss": 0.4546, "step": 7437 }, { "epoch": 2.0817240414217744, "grad_norm": 0.2485981838183438, "learning_rate": 2.3514648340573257e-05, "loss": 0.4599, "step": 7438 }, { "epoch": 2.0820039182759587, "grad_norm": 0.24454659192468253, "learning_rate": 2.350156584309804e-05, "loss": 0.444, "step": 7439 }, { "epoch": 2.082283795130143, "grad_norm": 0.24074442743098742, "learning_rate": 2.348848586775862e-05, "loss": 0.4703, "step": 7440 }, { "epoch": 2.082563671984327, "grad_norm": 0.23775973111168042, "learning_rate": 2.3475408415799953e-05, "loss": 0.4436, "step": 7441 }, { "epoch": 2.082843548838511, "grad_norm": 0.23971493006095523, "learning_rate": 2.3462333488466758e-05, "loss": 0.4513, "step": 7442 }, { "epoch": 2.0831234256926954, "grad_norm": 0.24551777555783094, "learning_rate": 2.344926108700352e-05, "loss": 0.4517, "step": 7443 }, { "epoch": 2.083403302546879, "grad_norm": 0.24608179058927987, "learning_rate": 2.343619121265447e-05, "loss": 0.4543, "step": 7444 }, { "epoch": 2.0836831794010635, "grad_norm": 0.23219516309417146, "learning_rate": 2.342312386666359e-05, "loss": 0.4364, "step": 7445 }, { "epoch": 2.083963056255248, "grad_norm": 0.24001757191204898, "learning_rate": 2.3410059050274674e-05, "loss": 0.448, "step": 7446 }, { "epoch": 2.0842429331094317, "grad_norm": 0.24052130703717217, "learning_rate": 2.339699676473122e-05, "loss": 0.4356, "step": 7447 }, { "epoch": 2.084522809963616, "grad_norm": 0.2576403626158046, "learning_rate": 2.338393701127651e-05, "loss": 0.4737, "step": 7448 }, { "epoch": 2.0848026868178002, "grad_norm": 0.24973816696897705, "learning_rate": 2.3370879791153566e-05, "loss": 0.4641, "step": 7449 }, { "epoch": 2.0850825636719845, "grad_norm": 0.24707987841698237, "learning_rate": 2.335782510560519e-05, "loss": 0.4485, "step": 7450 }, { "epoch": 2.0853624405261684, "grad_norm": 0.24198995221156094, "learning_rate": 2.3344772955873934e-05, "loss": 0.4763, "step": 7451 }, { "epoch": 2.0856423173803527, "grad_norm": 0.24573034712654576, "learning_rate": 2.3331723343202106e-05, "loss": 0.4598, "step": 7452 }, { "epoch": 2.085922194234537, "grad_norm": 0.24392825108681687, "learning_rate": 2.331867626883178e-05, "loss": 0.4611, "step": 7453 }, { "epoch": 2.086202071088721, "grad_norm": 0.25078868089134737, "learning_rate": 2.330563173400478e-05, "loss": 0.4664, "step": 7454 }, { "epoch": 2.086481947942905, "grad_norm": 0.25552493357833944, "learning_rate": 2.3292589739962695e-05, "loss": 0.463, "step": 7455 }, { "epoch": 2.0867618247970894, "grad_norm": 0.25152576521590647, "learning_rate": 2.327955028794688e-05, "loss": 0.4657, "step": 7456 }, { "epoch": 2.0870417016512732, "grad_norm": 0.2540216750184329, "learning_rate": 2.3266513379198413e-05, "loss": 0.4431, "step": 7457 }, { "epoch": 2.0873215785054575, "grad_norm": 0.257161622211932, "learning_rate": 2.3253479014958164e-05, "loss": 0.4471, "step": 7458 }, { "epoch": 2.087601455359642, "grad_norm": 0.24570160762022308, "learning_rate": 2.3240447196466748e-05, "loss": 0.4429, "step": 7459 }, { "epoch": 2.087881332213826, "grad_norm": 0.24817088308505086, "learning_rate": 2.322741792496455e-05, "loss": 0.471, "step": 7460 }, { "epoch": 2.08816120906801, "grad_norm": 0.24166357490471427, "learning_rate": 2.32143912016917e-05, "loss": 0.4488, "step": 7461 }, { "epoch": 2.0884410859221942, "grad_norm": 0.24797840483804628, "learning_rate": 2.3201367027888083e-05, "loss": 0.4661, "step": 7462 }, { "epoch": 2.0887209627763785, "grad_norm": 0.24327516494509666, "learning_rate": 2.3188345404793338e-05, "loss": 0.45, "step": 7463 }, { "epoch": 2.0890008396305624, "grad_norm": 0.2494607268291219, "learning_rate": 2.31753263336469e-05, "loss": 0.4706, "step": 7464 }, { "epoch": 2.0892807164847467, "grad_norm": 0.2413127471468335, "learning_rate": 2.3162309815687922e-05, "loss": 0.4474, "step": 7465 }, { "epoch": 2.089560593338931, "grad_norm": 0.23146437649500243, "learning_rate": 2.3149295852155313e-05, "loss": 0.452, "step": 7466 }, { "epoch": 2.0898404701931153, "grad_norm": 0.24190359388748187, "learning_rate": 2.3136284444287755e-05, "loss": 0.455, "step": 7467 }, { "epoch": 2.090120347047299, "grad_norm": 0.2480340305458742, "learning_rate": 2.312327559332368e-05, "loss": 0.4677, "step": 7468 }, { "epoch": 2.0904002239014834, "grad_norm": 0.25900478336119287, "learning_rate": 2.3110269300501275e-05, "loss": 0.4716, "step": 7469 }, { "epoch": 2.0906801007556677, "grad_norm": 0.25055030068392614, "learning_rate": 2.309726556705849e-05, "loss": 0.4553, "step": 7470 }, { "epoch": 2.0909599776098515, "grad_norm": 0.24567747066069287, "learning_rate": 2.308426439423303e-05, "loss": 0.4482, "step": 7471 }, { "epoch": 2.091239854464036, "grad_norm": 0.24812686237349435, "learning_rate": 2.3071265783262345e-05, "loss": 0.4717, "step": 7472 }, { "epoch": 2.09151973131822, "grad_norm": 0.2549949899850669, "learning_rate": 2.305826973538366e-05, "loss": 0.4695, "step": 7473 }, { "epoch": 2.091799608172404, "grad_norm": 0.2504635907404072, "learning_rate": 2.304527625183396e-05, "loss": 0.4495, "step": 7474 }, { "epoch": 2.0920794850265882, "grad_norm": 0.23500629964294462, "learning_rate": 2.303228533384993e-05, "loss": 0.4415, "step": 7475 }, { "epoch": 2.0923593618807725, "grad_norm": 0.24579359852312388, "learning_rate": 2.3019296982668083e-05, "loss": 0.4419, "step": 7476 }, { "epoch": 2.092639238734957, "grad_norm": 0.24262114918616326, "learning_rate": 2.3006311199524645e-05, "loss": 0.4435, "step": 7477 }, { "epoch": 2.0929191155891407, "grad_norm": 0.24015906914268179, "learning_rate": 2.2993327985655614e-05, "loss": 0.4368, "step": 7478 }, { "epoch": 2.093198992443325, "grad_norm": 0.25274943579405135, "learning_rate": 2.2980347342296747e-05, "loss": 0.4523, "step": 7479 }, { "epoch": 2.0934788692975093, "grad_norm": 0.24605545727398748, "learning_rate": 2.2967369270683525e-05, "loss": 0.446, "step": 7480 }, { "epoch": 2.093758746151693, "grad_norm": 0.24493701274465995, "learning_rate": 2.2954393772051245e-05, "loss": 0.4429, "step": 7481 }, { "epoch": 2.0940386230058774, "grad_norm": 0.23577548000071086, "learning_rate": 2.2941420847634905e-05, "loss": 0.4612, "step": 7482 }, { "epoch": 2.0943184998600617, "grad_norm": 0.25489438452229934, "learning_rate": 2.2928450498669274e-05, "loss": 0.4528, "step": 7483 }, { "epoch": 2.0945983767142455, "grad_norm": 0.24634515710529292, "learning_rate": 2.291548272638887e-05, "loss": 0.4512, "step": 7484 }, { "epoch": 2.09487825356843, "grad_norm": 0.2542990576902803, "learning_rate": 2.290251753202799e-05, "loss": 0.4846, "step": 7485 }, { "epoch": 2.095158130422614, "grad_norm": 0.2551101021992683, "learning_rate": 2.2889554916820655e-05, "loss": 0.4598, "step": 7486 }, { "epoch": 2.0954380072767984, "grad_norm": 0.25317851487491305, "learning_rate": 2.287659488200065e-05, "loss": 0.4619, "step": 7487 }, { "epoch": 2.0957178841309823, "grad_norm": 0.24711890584810983, "learning_rate": 2.2863637428801536e-05, "loss": 0.4702, "step": 7488 }, { "epoch": 2.0959977609851665, "grad_norm": 0.2435421357837326, "learning_rate": 2.2850682558456593e-05, "loss": 0.4518, "step": 7489 }, { "epoch": 2.096277637839351, "grad_norm": 0.2517223552482673, "learning_rate": 2.2837730272198888e-05, "loss": 0.4485, "step": 7490 }, { "epoch": 2.0965575146935347, "grad_norm": 0.25108703790852543, "learning_rate": 2.2824780571261213e-05, "loss": 0.4534, "step": 7491 }, { "epoch": 2.096837391547719, "grad_norm": 0.24242185222208118, "learning_rate": 2.2811833456876152e-05, "loss": 0.4414, "step": 7492 }, { "epoch": 2.0971172684019033, "grad_norm": 0.23892286033892943, "learning_rate": 2.2798888930275984e-05, "loss": 0.4454, "step": 7493 }, { "epoch": 2.097397145256087, "grad_norm": 0.24753543768992284, "learning_rate": 2.2785946992692796e-05, "loss": 0.4627, "step": 7494 }, { "epoch": 2.0976770221102714, "grad_norm": 0.24247874550019946, "learning_rate": 2.2773007645358403e-05, "loss": 0.4569, "step": 7495 }, { "epoch": 2.0979568989644557, "grad_norm": 0.2450691802857712, "learning_rate": 2.2760070889504382e-05, "loss": 0.4325, "step": 7496 }, { "epoch": 2.09823677581864, "grad_norm": 0.24571952118794047, "learning_rate": 2.2747136726362038e-05, "loss": 0.4554, "step": 7497 }, { "epoch": 2.098516652672824, "grad_norm": 0.23875697882298036, "learning_rate": 2.2734205157162498e-05, "loss": 0.4283, "step": 7498 }, { "epoch": 2.098796529527008, "grad_norm": 0.2533906134623975, "learning_rate": 2.272127618313657e-05, "loss": 0.465, "step": 7499 }, { "epoch": 2.0990764063811924, "grad_norm": 0.251507126168585, "learning_rate": 2.2708349805514846e-05, "loss": 0.4604, "step": 7500 }, { "epoch": 2.0993562832353763, "grad_norm": 0.24871299306195427, "learning_rate": 2.269542602552766e-05, "loss": 0.4489, "step": 7501 }, { "epoch": 2.0996361600895606, "grad_norm": 0.2577185184620966, "learning_rate": 2.268250484440511e-05, "loss": 0.4775, "step": 7502 }, { "epoch": 2.099916036943745, "grad_norm": 0.2560814309266284, "learning_rate": 2.2669586263377033e-05, "loss": 0.4412, "step": 7503 }, { "epoch": 2.100195913797929, "grad_norm": 0.2350102493647487, "learning_rate": 2.2656670283673042e-05, "loss": 0.4512, "step": 7504 }, { "epoch": 2.100475790652113, "grad_norm": 0.25754304347631335, "learning_rate": 2.2643756906522478e-05, "loss": 0.4509, "step": 7505 }, { "epoch": 2.1007556675062973, "grad_norm": 0.2497629763548119, "learning_rate": 2.2630846133154436e-05, "loss": 0.4402, "step": 7506 }, { "epoch": 2.1010355443604816, "grad_norm": 0.24545977648616682, "learning_rate": 2.2617937964797785e-05, "loss": 0.4396, "step": 7507 }, { "epoch": 2.1013154212146654, "grad_norm": 0.25390296199369455, "learning_rate": 2.260503240268112e-05, "loss": 0.4612, "step": 7508 }, { "epoch": 2.1015952980688497, "grad_norm": 0.2360792832276678, "learning_rate": 2.259212944803281e-05, "loss": 0.4438, "step": 7509 }, { "epoch": 2.101875174923034, "grad_norm": 0.23824582001171501, "learning_rate": 2.2579229102080973e-05, "loss": 0.4824, "step": 7510 }, { "epoch": 2.102155051777218, "grad_norm": 0.23849784399502194, "learning_rate": 2.2566331366053446e-05, "loss": 0.4501, "step": 7511 }, { "epoch": 2.102434928631402, "grad_norm": 0.2564225630725051, "learning_rate": 2.255343624117785e-05, "loss": 0.4534, "step": 7512 }, { "epoch": 2.1027148054855864, "grad_norm": 0.2575461076175124, "learning_rate": 2.2540543728681556e-05, "loss": 0.4621, "step": 7513 }, { "epoch": 2.1029946823397707, "grad_norm": 0.25905110465218073, "learning_rate": 2.2527653829791662e-05, "loss": 0.4589, "step": 7514 }, { "epoch": 2.1032745591939546, "grad_norm": 0.2561865667351127, "learning_rate": 2.251476654573507e-05, "loss": 0.4399, "step": 7515 }, { "epoch": 2.103554436048139, "grad_norm": 0.24058587130721956, "learning_rate": 2.2501881877738383e-05, "loss": 0.4261, "step": 7516 }, { "epoch": 2.103834312902323, "grad_norm": 0.2515336882108558, "learning_rate": 2.2488999827027972e-05, "loss": 0.4466, "step": 7517 }, { "epoch": 2.104114189756507, "grad_norm": 0.2451665174890162, "learning_rate": 2.2476120394829952e-05, "loss": 0.4719, "step": 7518 }, { "epoch": 2.1043940666106913, "grad_norm": 0.25910932881308824, "learning_rate": 2.24632435823702e-05, "loss": 0.4741, "step": 7519 }, { "epoch": 2.1046739434648756, "grad_norm": 0.24546199751436237, "learning_rate": 2.2450369390874344e-05, "loss": 0.4778, "step": 7520 }, { "epoch": 2.1049538203190594, "grad_norm": 0.2474355420866834, "learning_rate": 2.2437497821567744e-05, "loss": 0.4541, "step": 7521 }, { "epoch": 2.1052336971732437, "grad_norm": 0.24332045321952375, "learning_rate": 2.2424628875675524e-05, "loss": 0.4688, "step": 7522 }, { "epoch": 2.105513574027428, "grad_norm": 0.23802478038156527, "learning_rate": 2.241176255442257e-05, "loss": 0.4574, "step": 7523 }, { "epoch": 2.1057934508816123, "grad_norm": 0.25560288102315903, "learning_rate": 2.2398898859033494e-05, "loss": 0.4644, "step": 7524 }, { "epoch": 2.106073327735796, "grad_norm": 0.2490834736430194, "learning_rate": 2.2386037790732673e-05, "loss": 0.4479, "step": 7525 }, { "epoch": 2.1063532045899804, "grad_norm": 0.24455409618143778, "learning_rate": 2.2373179350744235e-05, "loss": 0.488, "step": 7526 }, { "epoch": 2.1066330814441647, "grad_norm": 0.25081577247405235, "learning_rate": 2.2360323540292044e-05, "loss": 0.4606, "step": 7527 }, { "epoch": 2.1069129582983486, "grad_norm": 0.23562210124486416, "learning_rate": 2.2347470360599754e-05, "loss": 0.4474, "step": 7528 }, { "epoch": 2.107192835152533, "grad_norm": 0.2548395480913713, "learning_rate": 2.233461981289069e-05, "loss": 0.46, "step": 7529 }, { "epoch": 2.107472712006717, "grad_norm": 0.2379713472145678, "learning_rate": 2.2321771898388e-05, "loss": 0.4555, "step": 7530 }, { "epoch": 2.107752588860901, "grad_norm": 0.251286433144628, "learning_rate": 2.2308926618314553e-05, "loss": 0.4684, "step": 7531 }, { "epoch": 2.1080324657150853, "grad_norm": 0.23615427735493252, "learning_rate": 2.2296083973892945e-05, "loss": 0.4599, "step": 7532 }, { "epoch": 2.1083123425692696, "grad_norm": 0.24847602036792382, "learning_rate": 2.2283243966345596e-05, "loss": 0.4361, "step": 7533 }, { "epoch": 2.108592219423454, "grad_norm": 0.24301117802883626, "learning_rate": 2.22704065968946e-05, "loss": 0.4253, "step": 7534 }, { "epoch": 2.1088720962776377, "grad_norm": 0.23710501731491193, "learning_rate": 2.2257571866761824e-05, "loss": 0.43, "step": 7535 }, { "epoch": 2.109151973131822, "grad_norm": 0.2529835935009079, "learning_rate": 2.224473977716888e-05, "loss": 0.4465, "step": 7536 }, { "epoch": 2.1094318499860063, "grad_norm": 0.2408870811268891, "learning_rate": 2.2231910329337147e-05, "loss": 0.455, "step": 7537 }, { "epoch": 2.10971172684019, "grad_norm": 0.24820848610356108, "learning_rate": 2.2219083524487726e-05, "loss": 0.466, "step": 7538 }, { "epoch": 2.1099916036943744, "grad_norm": 0.2512888152568631, "learning_rate": 2.220625936384149e-05, "loss": 0.4473, "step": 7539 }, { "epoch": 2.1102714805485587, "grad_norm": 0.24460559721548294, "learning_rate": 2.219343784861904e-05, "loss": 0.4505, "step": 7540 }, { "epoch": 2.1105513574027426, "grad_norm": 0.24237395765297018, "learning_rate": 2.2180618980040747e-05, "loss": 0.4444, "step": 7541 }, { "epoch": 2.110831234256927, "grad_norm": 0.2472666300963442, "learning_rate": 2.2167802759326704e-05, "loss": 0.4739, "step": 7542 }, { "epoch": 2.111111111111111, "grad_norm": 0.24105355979943674, "learning_rate": 2.2154989187696772e-05, "loss": 0.4328, "step": 7543 }, { "epoch": 2.1113909879652955, "grad_norm": 0.24678765407012285, "learning_rate": 2.2142178266370557e-05, "loss": 0.4398, "step": 7544 }, { "epoch": 2.1116708648194793, "grad_norm": 0.2444408885095439, "learning_rate": 2.2129369996567406e-05, "loss": 0.4418, "step": 7545 }, { "epoch": 2.1119507416736636, "grad_norm": 0.24095764617311172, "learning_rate": 2.2116564379506437e-05, "loss": 0.4487, "step": 7546 }, { "epoch": 2.112230618527848, "grad_norm": 0.23834019563772968, "learning_rate": 2.2103761416406466e-05, "loss": 0.475, "step": 7547 }, { "epoch": 2.1125104953820317, "grad_norm": 0.24684141566440712, "learning_rate": 2.2090961108486092e-05, "loss": 0.4794, "step": 7548 }, { "epoch": 2.112790372236216, "grad_norm": 0.24724331112303274, "learning_rate": 2.207816345696364e-05, "loss": 0.4581, "step": 7549 }, { "epoch": 2.1130702490904003, "grad_norm": 0.2377554401642799, "learning_rate": 2.206536846305724e-05, "loss": 0.4588, "step": 7550 }, { "epoch": 2.113350125944584, "grad_norm": 0.2551818636759096, "learning_rate": 2.2052576127984704e-05, "loss": 0.4605, "step": 7551 }, { "epoch": 2.1136300027987684, "grad_norm": 0.2472219607296317, "learning_rate": 2.203978645296362e-05, "loss": 0.4645, "step": 7552 }, { "epoch": 2.1139098796529527, "grad_norm": 0.25375763961112763, "learning_rate": 2.2026999439211305e-05, "loss": 0.4618, "step": 7553 }, { "epoch": 2.114189756507137, "grad_norm": 0.24460683253482174, "learning_rate": 2.201421508794484e-05, "loss": 0.4429, "step": 7554 }, { "epoch": 2.114469633361321, "grad_norm": 0.24165091989308077, "learning_rate": 2.200143340038105e-05, "loss": 0.4587, "step": 7555 }, { "epoch": 2.114749510215505, "grad_norm": 0.24218243076459725, "learning_rate": 2.19886543777365e-05, "loss": 0.4675, "step": 7556 }, { "epoch": 2.1150293870696895, "grad_norm": 0.2437905324988868, "learning_rate": 2.1975878021227507e-05, "loss": 0.4573, "step": 7557 }, { "epoch": 2.1153092639238733, "grad_norm": 0.2422809195703653, "learning_rate": 2.1963104332070127e-05, "loss": 0.4544, "step": 7558 }, { "epoch": 2.1155891407780576, "grad_norm": 0.24630979034974235, "learning_rate": 2.195033331148017e-05, "loss": 0.4797, "step": 7559 }, { "epoch": 2.115869017632242, "grad_norm": 0.240337674393747, "learning_rate": 2.193756496067319e-05, "loss": 0.449, "step": 7560 }, { "epoch": 2.116148894486426, "grad_norm": 0.25419261804892673, "learning_rate": 2.192479928086448e-05, "loss": 0.4546, "step": 7561 }, { "epoch": 2.11642877134061, "grad_norm": 0.2573381459024405, "learning_rate": 2.19120362732691e-05, "loss": 0.4913, "step": 7562 }, { "epoch": 2.1167086481947943, "grad_norm": 0.23587256095619225, "learning_rate": 2.189927593910182e-05, "loss": 0.4383, "step": 7563 }, { "epoch": 2.1169885250489786, "grad_norm": 0.24591806879134256, "learning_rate": 2.18865182795772e-05, "loss": 0.4572, "step": 7564 }, { "epoch": 2.1172684019031625, "grad_norm": 0.236748623891741, "learning_rate": 2.1873763295909492e-05, "loss": 0.4569, "step": 7565 }, { "epoch": 2.1175482787573467, "grad_norm": 0.2551511152984237, "learning_rate": 2.186101098931272e-05, "loss": 0.4723, "step": 7566 }, { "epoch": 2.117828155611531, "grad_norm": 0.25639745673483555, "learning_rate": 2.1848261361000687e-05, "loss": 0.4627, "step": 7567 }, { "epoch": 2.118108032465715, "grad_norm": 0.2575518876408739, "learning_rate": 2.1835514412186896e-05, "loss": 0.4501, "step": 7568 }, { "epoch": 2.118387909319899, "grad_norm": 0.23815492828339324, "learning_rate": 2.1822770144084616e-05, "loss": 0.4431, "step": 7569 }, { "epoch": 2.1186677861740835, "grad_norm": 0.2586557287359828, "learning_rate": 2.1810028557906832e-05, "loss": 0.4785, "step": 7570 }, { "epoch": 2.1189476630282678, "grad_norm": 0.2584600684594632, "learning_rate": 2.1797289654866314e-05, "loss": 0.4646, "step": 7571 }, { "epoch": 2.1192275398824516, "grad_norm": 0.2473268074022004, "learning_rate": 2.1784553436175553e-05, "loss": 0.4476, "step": 7572 }, { "epoch": 2.119507416736636, "grad_norm": 0.23380845612844886, "learning_rate": 2.1771819903046785e-05, "loss": 0.4431, "step": 7573 }, { "epoch": 2.11978729359082, "grad_norm": 0.24317569820214538, "learning_rate": 2.1759089056692006e-05, "loss": 0.441, "step": 7574 }, { "epoch": 2.120067170445004, "grad_norm": 0.25405334047753797, "learning_rate": 2.1746360898322933e-05, "loss": 0.472, "step": 7575 }, { "epoch": 2.1203470472991883, "grad_norm": 0.2529813566209135, "learning_rate": 2.1733635429151046e-05, "loss": 0.4727, "step": 7576 }, { "epoch": 2.1206269241533726, "grad_norm": 0.24345463428834613, "learning_rate": 2.1720912650387554e-05, "loss": 0.4596, "step": 7577 }, { "epoch": 2.1209068010075565, "grad_norm": 0.24367450650630482, "learning_rate": 2.170819256324343e-05, "loss": 0.4416, "step": 7578 }, { "epoch": 2.1211866778617408, "grad_norm": 0.2400383852178851, "learning_rate": 2.1695475168929375e-05, "loss": 0.4754, "step": 7579 }, { "epoch": 2.121466554715925, "grad_norm": 0.24937719815034812, "learning_rate": 2.1682760468655834e-05, "loss": 0.4489, "step": 7580 }, { "epoch": 2.1217464315701093, "grad_norm": 0.24596825255221352, "learning_rate": 2.1670048463632996e-05, "loss": 0.452, "step": 7581 }, { "epoch": 2.122026308424293, "grad_norm": 0.2452061773321738, "learning_rate": 2.1657339155070828e-05, "loss": 0.4769, "step": 7582 }, { "epoch": 2.1223061852784775, "grad_norm": 0.23904809431617988, "learning_rate": 2.1644632544178967e-05, "loss": 0.4764, "step": 7583 }, { "epoch": 2.1225860621326618, "grad_norm": 0.24015059340537998, "learning_rate": 2.1631928632166827e-05, "loss": 0.4484, "step": 7584 }, { "epoch": 2.1228659389868456, "grad_norm": 0.2516526249697741, "learning_rate": 2.1619227420243615e-05, "loss": 0.4481, "step": 7585 }, { "epoch": 2.12314581584103, "grad_norm": 0.24541177461344438, "learning_rate": 2.160652890961823e-05, "loss": 0.4397, "step": 7586 }, { "epoch": 2.123425692695214, "grad_norm": 0.2371911292223706, "learning_rate": 2.159383310149931e-05, "loss": 0.4617, "step": 7587 }, { "epoch": 2.123705569549398, "grad_norm": 0.2514411025705835, "learning_rate": 2.1581139997095258e-05, "loss": 0.451, "step": 7588 }, { "epoch": 2.1239854464035823, "grad_norm": 0.24660085157883696, "learning_rate": 2.1568449597614205e-05, "loss": 0.4729, "step": 7589 }, { "epoch": 2.1242653232577666, "grad_norm": 0.2565046085253248, "learning_rate": 2.1555761904264034e-05, "loss": 0.4712, "step": 7590 }, { "epoch": 2.124545200111951, "grad_norm": 0.2530203823346398, "learning_rate": 2.154307691825237e-05, "loss": 0.4677, "step": 7591 }, { "epoch": 2.1248250769661348, "grad_norm": 0.24950382280245662, "learning_rate": 2.1530394640786567e-05, "loss": 0.4531, "step": 7592 }, { "epoch": 2.125104953820319, "grad_norm": 0.2485366336476582, "learning_rate": 2.1517715073073742e-05, "loss": 0.4705, "step": 7593 }, { "epoch": 2.1253848306745033, "grad_norm": 0.24778048156237242, "learning_rate": 2.1505038216320735e-05, "loss": 0.4515, "step": 7594 }, { "epoch": 2.125664707528687, "grad_norm": 0.24125265596005324, "learning_rate": 2.149236407173414e-05, "loss": 0.4502, "step": 7595 }, { "epoch": 2.1259445843828715, "grad_norm": 0.25854875672337907, "learning_rate": 2.1479692640520292e-05, "loss": 0.4565, "step": 7596 }, { "epoch": 2.1262244612370558, "grad_norm": 0.2510271068420979, "learning_rate": 2.146702392388526e-05, "loss": 0.4707, "step": 7597 }, { "epoch": 2.12650433809124, "grad_norm": 0.24128873224968087, "learning_rate": 2.1454357923034864e-05, "loss": 0.4463, "step": 7598 }, { "epoch": 2.126784214945424, "grad_norm": 0.24663224433083245, "learning_rate": 2.1441694639174652e-05, "loss": 0.4687, "step": 7599 }, { "epoch": 2.127064091799608, "grad_norm": 0.24317158373744283, "learning_rate": 2.142903407350995e-05, "loss": 0.4481, "step": 7600 }, { "epoch": 2.1273439686537925, "grad_norm": 0.261255558763693, "learning_rate": 2.1416376227245744e-05, "loss": 0.4714, "step": 7601 }, { "epoch": 2.1276238455079763, "grad_norm": 0.24449693848440052, "learning_rate": 2.1403721101586864e-05, "loss": 0.4335, "step": 7602 }, { "epoch": 2.1279037223621606, "grad_norm": 0.24366650669769435, "learning_rate": 2.1391068697737815e-05, "loss": 0.449, "step": 7603 }, { "epoch": 2.128183599216345, "grad_norm": 0.24012998115601292, "learning_rate": 2.137841901690286e-05, "loss": 0.4626, "step": 7604 }, { "epoch": 2.1284634760705288, "grad_norm": 0.2835087041441265, "learning_rate": 2.1365772060286006e-05, "loss": 0.4836, "step": 7605 }, { "epoch": 2.128743352924713, "grad_norm": 0.24088128251814953, "learning_rate": 2.1353127829090995e-05, "loss": 0.442, "step": 7606 }, { "epoch": 2.1290232297788974, "grad_norm": 0.2573840347580425, "learning_rate": 2.134048632452131e-05, "loss": 0.4833, "step": 7607 }, { "epoch": 2.1293031066330816, "grad_norm": 0.25199802270150407, "learning_rate": 2.132784754778018e-05, "loss": 0.475, "step": 7608 }, { "epoch": 2.1295829834872655, "grad_norm": 0.24247900055381982, "learning_rate": 2.1315211500070558e-05, "loss": 0.4534, "step": 7609 }, { "epoch": 2.12986286034145, "grad_norm": 0.24406454232964037, "learning_rate": 2.1302578182595172e-05, "loss": 0.4585, "step": 7610 }, { "epoch": 2.130142737195634, "grad_norm": 0.2504265524046617, "learning_rate": 2.1289947596556454e-05, "loss": 0.4444, "step": 7611 }, { "epoch": 2.130422614049818, "grad_norm": 0.24761477417319153, "learning_rate": 2.1277319743156593e-05, "loss": 0.48, "step": 7612 }, { "epoch": 2.130702490904002, "grad_norm": 0.24011045291782035, "learning_rate": 2.126469462359752e-05, "loss": 0.451, "step": 7613 }, { "epoch": 2.1309823677581865, "grad_norm": 0.2485102356825258, "learning_rate": 2.1252072239080893e-05, "loss": 0.4717, "step": 7614 }, { "epoch": 2.1312622446123703, "grad_norm": 0.2408935994630069, "learning_rate": 2.1239452590808124e-05, "loss": 0.4539, "step": 7615 }, { "epoch": 2.1315421214665546, "grad_norm": 0.24679220856227196, "learning_rate": 2.122683567998035e-05, "loss": 0.4525, "step": 7616 }, { "epoch": 2.131821998320739, "grad_norm": 0.2454251676747661, "learning_rate": 2.1214221507798466e-05, "loss": 0.4586, "step": 7617 }, { "epoch": 2.1321018751749232, "grad_norm": 0.24173225633486115, "learning_rate": 2.1201610075463085e-05, "loss": 0.4496, "step": 7618 }, { "epoch": 2.132381752029107, "grad_norm": 0.24472665196638912, "learning_rate": 2.1189001384174578e-05, "loss": 0.4493, "step": 7619 }, { "epoch": 2.1326616288832914, "grad_norm": 0.24959590534553086, "learning_rate": 2.1176395435133052e-05, "loss": 0.4457, "step": 7620 }, { "epoch": 2.1329415057374757, "grad_norm": 0.24445113584781797, "learning_rate": 2.1163792229538336e-05, "loss": 0.4539, "step": 7621 }, { "epoch": 2.1332213825916595, "grad_norm": 0.24851884857757958, "learning_rate": 2.1151191768590016e-05, "loss": 0.4394, "step": 7622 }, { "epoch": 2.133501259445844, "grad_norm": 0.2384034792363101, "learning_rate": 2.1138594053487415e-05, "loss": 0.4575, "step": 7623 }, { "epoch": 2.133781136300028, "grad_norm": 0.24618774382705233, "learning_rate": 2.1125999085429583e-05, "loss": 0.4573, "step": 7624 }, { "epoch": 2.134061013154212, "grad_norm": 0.24962501465608256, "learning_rate": 2.1113406865615322e-05, "loss": 0.4587, "step": 7625 }, { "epoch": 2.134340890008396, "grad_norm": 0.2398732566660393, "learning_rate": 2.1100817395243157e-05, "loss": 0.455, "step": 7626 }, { "epoch": 2.1346207668625805, "grad_norm": 0.25842324905438246, "learning_rate": 2.1088230675511372e-05, "loss": 0.4477, "step": 7627 }, { "epoch": 2.134900643716765, "grad_norm": 0.251392968254428, "learning_rate": 2.1075646707617973e-05, "loss": 0.4733, "step": 7628 }, { "epoch": 2.1351805205709486, "grad_norm": 0.2505656721002468, "learning_rate": 2.1063065492760715e-05, "loss": 0.4636, "step": 7629 }, { "epoch": 2.135460397425133, "grad_norm": 0.2572929962069552, "learning_rate": 2.105048703213708e-05, "loss": 0.4581, "step": 7630 }, { "epoch": 2.1357402742793172, "grad_norm": 0.2417165861116719, "learning_rate": 2.1037911326944286e-05, "loss": 0.4631, "step": 7631 }, { "epoch": 2.136020151133501, "grad_norm": 0.24921891767128523, "learning_rate": 2.1025338378379312e-05, "loss": 0.4637, "step": 7632 }, { "epoch": 2.1363000279876854, "grad_norm": 0.2407116134780612, "learning_rate": 2.1012768187638844e-05, "loss": 0.4546, "step": 7633 }, { "epoch": 2.1365799048418697, "grad_norm": 0.24763876381341868, "learning_rate": 2.100020075591932e-05, "loss": 0.4369, "step": 7634 }, { "epoch": 2.136859781696054, "grad_norm": 0.2539968119489963, "learning_rate": 2.0987636084416924e-05, "loss": 0.4434, "step": 7635 }, { "epoch": 2.137139658550238, "grad_norm": 0.25589564146611865, "learning_rate": 2.0975074174327564e-05, "loss": 0.4596, "step": 7636 }, { "epoch": 2.137419535404422, "grad_norm": 0.2376089711663209, "learning_rate": 2.096251502684689e-05, "loss": 0.4436, "step": 7637 }, { "epoch": 2.1376994122586064, "grad_norm": 0.2540651550718165, "learning_rate": 2.0949958643170294e-05, "loss": 0.4765, "step": 7638 }, { "epoch": 2.1379792891127902, "grad_norm": 0.25008346334171755, "learning_rate": 2.093740502449289e-05, "loss": 0.4611, "step": 7639 }, { "epoch": 2.1382591659669745, "grad_norm": 0.24815997334452597, "learning_rate": 2.092485417200954e-05, "loss": 0.443, "step": 7640 }, { "epoch": 2.138539042821159, "grad_norm": 0.24897749979619618, "learning_rate": 2.0912306086914846e-05, "loss": 0.4397, "step": 7641 }, { "epoch": 2.1388189196753427, "grad_norm": 0.24983510933255013, "learning_rate": 2.0899760770403144e-05, "loss": 0.4769, "step": 7642 }, { "epoch": 2.139098796529527, "grad_norm": 0.24567791617937942, "learning_rate": 2.088721822366849e-05, "loss": 0.4653, "step": 7643 }, { "epoch": 2.1393786733837112, "grad_norm": 0.2511893988591538, "learning_rate": 2.0874678447904712e-05, "loss": 0.4655, "step": 7644 }, { "epoch": 2.1396585502378955, "grad_norm": 0.25340979163159116, "learning_rate": 2.086214144430534e-05, "loss": 0.4454, "step": 7645 }, { "epoch": 2.1399384270920794, "grad_norm": 0.24200109811789502, "learning_rate": 2.0849607214063647e-05, "loss": 0.4398, "step": 7646 }, { "epoch": 2.1402183039462637, "grad_norm": 0.24770574144742674, "learning_rate": 2.083707575837266e-05, "loss": 0.4451, "step": 7647 }, { "epoch": 2.140498180800448, "grad_norm": 0.2507817072966352, "learning_rate": 2.0824547078425126e-05, "loss": 0.4472, "step": 7648 }, { "epoch": 2.140778057654632, "grad_norm": 0.23937088937477222, "learning_rate": 2.081202117541353e-05, "loss": 0.4271, "step": 7649 }, { "epoch": 2.141057934508816, "grad_norm": 0.24435448910173604, "learning_rate": 2.0799498050530097e-05, "loss": 0.4521, "step": 7650 }, { "epoch": 2.1413378113630004, "grad_norm": 0.24234436062097528, "learning_rate": 2.0786977704966782e-05, "loss": 0.4566, "step": 7651 }, { "epoch": 2.1416176882171842, "grad_norm": 0.24159361754388, "learning_rate": 2.077446013991528e-05, "loss": 0.4554, "step": 7652 }, { "epoch": 2.1418975650713685, "grad_norm": 0.24298997738763822, "learning_rate": 2.0761945356567025e-05, "loss": 0.4579, "step": 7653 }, { "epoch": 2.142177441925553, "grad_norm": 0.23575064778162888, "learning_rate": 2.0749433356113168e-05, "loss": 0.4305, "step": 7654 }, { "epoch": 2.142457318779737, "grad_norm": 0.2526297090096628, "learning_rate": 2.0736924139744624e-05, "loss": 0.4445, "step": 7655 }, { "epoch": 2.142737195633921, "grad_norm": 0.2467617189365528, "learning_rate": 2.0724417708652017e-05, "loss": 0.4405, "step": 7656 }, { "epoch": 2.1430170724881052, "grad_norm": 0.24657196476447835, "learning_rate": 2.071191406402572e-05, "loss": 0.4621, "step": 7657 }, { "epoch": 2.1432969493422895, "grad_norm": 0.2413834403646487, "learning_rate": 2.0699413207055834e-05, "loss": 0.4569, "step": 7658 }, { "epoch": 2.1435768261964734, "grad_norm": 0.25549227235015254, "learning_rate": 2.0686915138932195e-05, "loss": 0.4604, "step": 7659 }, { "epoch": 2.1438567030506577, "grad_norm": 0.24141759699102472, "learning_rate": 2.0674419860844384e-05, "loss": 0.4619, "step": 7660 }, { "epoch": 2.144136579904842, "grad_norm": 0.24408268820201984, "learning_rate": 2.066192737398171e-05, "loss": 0.4408, "step": 7661 }, { "epoch": 2.144416456759026, "grad_norm": 0.24501183545718927, "learning_rate": 2.0649437679533202e-05, "loss": 0.4641, "step": 7662 }, { "epoch": 2.14469633361321, "grad_norm": 0.2444438500568767, "learning_rate": 2.0636950778687647e-05, "loss": 0.4631, "step": 7663 }, { "epoch": 2.1449762104673944, "grad_norm": 0.23872278645009623, "learning_rate": 2.0624466672633552e-05, "loss": 0.4451, "step": 7664 }, { "epoch": 2.1452560873215787, "grad_norm": 0.24839893809469274, "learning_rate": 2.0611985362559166e-05, "loss": 0.474, "step": 7665 }, { "epoch": 2.1455359641757625, "grad_norm": 0.23810533617093482, "learning_rate": 2.0599506849652456e-05, "loss": 0.4435, "step": 7666 }, { "epoch": 2.145815841029947, "grad_norm": 0.24728729741769404, "learning_rate": 2.058703113510114e-05, "loss": 0.4569, "step": 7667 }, { "epoch": 2.146095717884131, "grad_norm": 0.24522991057602664, "learning_rate": 2.0574558220092665e-05, "loss": 0.4512, "step": 7668 }, { "epoch": 2.146375594738315, "grad_norm": 0.2452430298428381, "learning_rate": 2.0562088105814213e-05, "loss": 0.4384, "step": 7669 }, { "epoch": 2.1466554715924993, "grad_norm": 0.25861421447453514, "learning_rate": 2.054962079345269e-05, "loss": 0.4789, "step": 7670 }, { "epoch": 2.1469353484466835, "grad_norm": 0.24613330428350227, "learning_rate": 2.0537156284194743e-05, "loss": 0.4484, "step": 7671 }, { "epoch": 2.147215225300868, "grad_norm": 0.24725153809854933, "learning_rate": 2.052469457922675e-05, "loss": 0.4309, "step": 7672 }, { "epoch": 2.1474951021550517, "grad_norm": 0.24838110219764029, "learning_rate": 2.0512235679734825e-05, "loss": 0.445, "step": 7673 }, { "epoch": 2.147774979009236, "grad_norm": 0.25241695717372814, "learning_rate": 2.0499779586904815e-05, "loss": 0.458, "step": 7674 }, { "epoch": 2.1480548558634203, "grad_norm": 0.2365323819303878, "learning_rate": 2.04873263019223e-05, "loss": 0.4746, "step": 7675 }, { "epoch": 2.148334732717604, "grad_norm": 0.25602552930195377, "learning_rate": 2.047487582597258e-05, "loss": 0.4699, "step": 7676 }, { "epoch": 2.1486146095717884, "grad_norm": 0.24166625285944415, "learning_rate": 2.046242816024071e-05, "loss": 0.4457, "step": 7677 }, { "epoch": 2.1488944864259727, "grad_norm": 0.25305605131513187, "learning_rate": 2.0449983305911457e-05, "loss": 0.4777, "step": 7678 }, { "epoch": 2.1491743632801565, "grad_norm": 0.2460520677445608, "learning_rate": 2.043754126416933e-05, "loss": 0.4438, "step": 7679 }, { "epoch": 2.149454240134341, "grad_norm": 0.2519080224748392, "learning_rate": 2.042510203619858e-05, "loss": 0.4483, "step": 7680 }, { "epoch": 2.149734116988525, "grad_norm": 0.24991720339072024, "learning_rate": 2.0412665623183162e-05, "loss": 0.4575, "step": 7681 }, { "epoch": 2.1500139938427094, "grad_norm": 0.24773907482820717, "learning_rate": 2.04002320263068e-05, "loss": 0.4661, "step": 7682 }, { "epoch": 2.1502938706968933, "grad_norm": 0.2581008504331832, "learning_rate": 2.038780124675292e-05, "loss": 0.4512, "step": 7683 }, { "epoch": 2.1505737475510776, "grad_norm": 0.2592193497806851, "learning_rate": 2.0375373285704685e-05, "loss": 0.4794, "step": 7684 }, { "epoch": 2.150853624405262, "grad_norm": 0.24498764169014103, "learning_rate": 2.036294814434501e-05, "loss": 0.4489, "step": 7685 }, { "epoch": 2.1511335012594457, "grad_norm": 0.24997505633995695, "learning_rate": 2.0350525823856516e-05, "loss": 0.4485, "step": 7686 }, { "epoch": 2.15141337811363, "grad_norm": 0.24728319568293508, "learning_rate": 2.033810632542157e-05, "loss": 0.4411, "step": 7687 }, { "epoch": 2.1516932549678143, "grad_norm": 0.23170605418281978, "learning_rate": 2.0325689650222268e-05, "loss": 0.4514, "step": 7688 }, { "epoch": 2.151973131821998, "grad_norm": 0.2410678794367316, "learning_rate": 2.0313275799440435e-05, "loss": 0.4482, "step": 7689 }, { "epoch": 2.1522530086761824, "grad_norm": 0.2439369810038682, "learning_rate": 2.030086477425762e-05, "loss": 0.4494, "step": 7690 }, { "epoch": 2.1525328855303667, "grad_norm": 0.24753795944884796, "learning_rate": 2.028845657585513e-05, "loss": 0.4581, "step": 7691 }, { "epoch": 2.152812762384551, "grad_norm": 0.24646798280703414, "learning_rate": 2.0276051205413967e-05, "loss": 0.447, "step": 7692 }, { "epoch": 2.153092639238735, "grad_norm": 0.24227431911441139, "learning_rate": 2.0263648664114886e-05, "loss": 0.4586, "step": 7693 }, { "epoch": 2.153372516092919, "grad_norm": 0.23730019959175033, "learning_rate": 2.0251248953138374e-05, "loss": 0.434, "step": 7694 }, { "epoch": 2.1536523929471034, "grad_norm": 0.2549009708513573, "learning_rate": 2.023885207366464e-05, "loss": 0.4504, "step": 7695 }, { "epoch": 2.1539322698012873, "grad_norm": 0.2433620312879842, "learning_rate": 2.0226458026873616e-05, "loss": 0.4591, "step": 7696 }, { "epoch": 2.1542121466554716, "grad_norm": 0.24673402406570474, "learning_rate": 2.0214066813944988e-05, "loss": 0.4336, "step": 7697 }, { "epoch": 2.154492023509656, "grad_norm": 0.25424975467360705, "learning_rate": 2.0201678436058148e-05, "loss": 0.4394, "step": 7698 }, { "epoch": 2.1547719003638397, "grad_norm": 0.24143025981613622, "learning_rate": 2.0189292894392238e-05, "loss": 0.45, "step": 7699 }, { "epoch": 2.155051777218024, "grad_norm": 0.254837983121012, "learning_rate": 2.0176910190126114e-05, "loss": 0.4801, "step": 7700 }, { "epoch": 2.1553316540722083, "grad_norm": 0.25871164567727056, "learning_rate": 2.0164530324438368e-05, "loss": 0.464, "step": 7701 }, { "epoch": 2.1556115309263926, "grad_norm": 0.2584209085151839, "learning_rate": 2.0152153298507324e-05, "loss": 0.464, "step": 7702 }, { "epoch": 2.1558914077805764, "grad_norm": 0.25135649506036073, "learning_rate": 2.0139779113511044e-05, "loss": 0.4612, "step": 7703 }, { "epoch": 2.1561712846347607, "grad_norm": 0.24974697379436328, "learning_rate": 2.0127407770627298e-05, "loss": 0.4583, "step": 7704 }, { "epoch": 2.156451161488945, "grad_norm": 0.2485539660436353, "learning_rate": 2.01150392710336e-05, "loss": 0.4574, "step": 7705 }, { "epoch": 2.156731038343129, "grad_norm": 0.2536019454279282, "learning_rate": 2.0102673615907193e-05, "loss": 0.46, "step": 7706 }, { "epoch": 2.157010915197313, "grad_norm": 0.24308067268202438, "learning_rate": 2.009031080642504e-05, "loss": 0.4472, "step": 7707 }, { "epoch": 2.1572907920514974, "grad_norm": 0.26105563418332145, "learning_rate": 2.0077950843763847e-05, "loss": 0.4723, "step": 7708 }, { "epoch": 2.1575706689056817, "grad_norm": 0.25978300743181865, "learning_rate": 2.0065593729100046e-05, "loss": 0.4697, "step": 7709 }, { "epoch": 2.1578505457598656, "grad_norm": 0.25504147037350067, "learning_rate": 2.0053239463609785e-05, "loss": 0.448, "step": 7710 }, { "epoch": 2.15813042261405, "grad_norm": 0.2461420145940867, "learning_rate": 2.0040888048468954e-05, "loss": 0.4568, "step": 7711 }, { "epoch": 2.158410299468234, "grad_norm": 0.2401419650973817, "learning_rate": 2.002853948485317e-05, "loss": 0.4349, "step": 7712 }, { "epoch": 2.158690176322418, "grad_norm": 0.2513144966848644, "learning_rate": 2.0016193773937776e-05, "loss": 0.4682, "step": 7713 }, { "epoch": 2.1589700531766023, "grad_norm": 0.2536880790577134, "learning_rate": 2.000385091689783e-05, "loss": 0.4559, "step": 7714 }, { "epoch": 2.1592499300307866, "grad_norm": 0.25917030427131443, "learning_rate": 1.999151091490815e-05, "loss": 0.4528, "step": 7715 }, { "epoch": 2.1595298068849704, "grad_norm": 0.2501556664053448, "learning_rate": 1.997917376914326e-05, "loss": 0.4461, "step": 7716 }, { "epoch": 2.1598096837391547, "grad_norm": 0.24738787444597712, "learning_rate": 1.9966839480777415e-05, "loss": 0.467, "step": 7717 }, { "epoch": 2.160089560593339, "grad_norm": 0.23323438252697498, "learning_rate": 1.9954508050984592e-05, "loss": 0.4322, "step": 7718 }, { "epoch": 2.160369437447523, "grad_norm": 0.2458908563658238, "learning_rate": 1.9942179480938517e-05, "loss": 0.4654, "step": 7719 }, { "epoch": 2.160649314301707, "grad_norm": 0.2584866831872372, "learning_rate": 1.992985377181262e-05, "loss": 0.4725, "step": 7720 }, { "epoch": 2.1609291911558914, "grad_norm": 0.243870700648934, "learning_rate": 1.991753092478007e-05, "loss": 0.4391, "step": 7721 }, { "epoch": 2.1612090680100757, "grad_norm": 0.2420300015222653, "learning_rate": 1.9905210941013765e-05, "loss": 0.4289, "step": 7722 }, { "epoch": 2.1614889448642596, "grad_norm": 0.2511378627198556, "learning_rate": 1.989289382168633e-05, "loss": 0.4623, "step": 7723 }, { "epoch": 2.161768821718444, "grad_norm": 0.24696892614899602, "learning_rate": 1.988057956797011e-05, "loss": 0.4542, "step": 7724 }, { "epoch": 2.162048698572628, "grad_norm": 0.24678778236656013, "learning_rate": 1.9868268181037185e-05, "loss": 0.4601, "step": 7725 }, { "epoch": 2.162328575426812, "grad_norm": 0.2512369548355241, "learning_rate": 1.9855959662059365e-05, "loss": 0.4589, "step": 7726 }, { "epoch": 2.1626084522809963, "grad_norm": 0.25344596384048185, "learning_rate": 1.9843654012208173e-05, "loss": 0.4461, "step": 7727 }, { "epoch": 2.1628883291351806, "grad_norm": 0.23938430816339948, "learning_rate": 1.9831351232654872e-05, "loss": 0.47, "step": 7728 }, { "epoch": 2.163168205989365, "grad_norm": 0.2445065327742629, "learning_rate": 1.9819051324570443e-05, "loss": 0.4535, "step": 7729 }, { "epoch": 2.1634480828435487, "grad_norm": 0.24813418604265364, "learning_rate": 1.9806754289125605e-05, "loss": 0.4763, "step": 7730 }, { "epoch": 2.163727959697733, "grad_norm": 0.2515889228984909, "learning_rate": 1.9794460127490794e-05, "loss": 0.4625, "step": 7731 }, { "epoch": 2.1640078365519173, "grad_norm": 0.243327037487538, "learning_rate": 1.978216884083618e-05, "loss": 0.446, "step": 7732 }, { "epoch": 2.164287713406101, "grad_norm": 0.24996788209547616, "learning_rate": 1.976988043033164e-05, "loss": 0.4591, "step": 7733 }, { "epoch": 2.1645675902602854, "grad_norm": 0.2592706923058631, "learning_rate": 1.9757594897146807e-05, "loss": 0.48, "step": 7734 }, { "epoch": 2.1648474671144697, "grad_norm": 0.26093538918194764, "learning_rate": 1.974531224245102e-05, "loss": 0.4649, "step": 7735 }, { "epoch": 2.1651273439686536, "grad_norm": 0.25467004494827217, "learning_rate": 1.9733032467413343e-05, "loss": 0.4467, "step": 7736 }, { "epoch": 2.165407220822838, "grad_norm": 0.24955975582786427, "learning_rate": 1.972075557320258e-05, "loss": 0.4568, "step": 7737 }, { "epoch": 2.165687097677022, "grad_norm": 0.2391669143405891, "learning_rate": 1.9708481560987245e-05, "loss": 0.448, "step": 7738 }, { "epoch": 2.1659669745312065, "grad_norm": 0.25376949522374376, "learning_rate": 1.9696210431935595e-05, "loss": 0.4708, "step": 7739 }, { "epoch": 2.1662468513853903, "grad_norm": 0.25206443679063284, "learning_rate": 1.9683942187215597e-05, "loss": 0.48, "step": 7740 }, { "epoch": 2.1665267282395746, "grad_norm": 0.24860030256079377, "learning_rate": 1.967167682799495e-05, "loss": 0.4839, "step": 7741 }, { "epoch": 2.166806605093759, "grad_norm": 0.2585097787688693, "learning_rate": 1.965941435544108e-05, "loss": 0.4772, "step": 7742 }, { "epoch": 2.1670864819479427, "grad_norm": 0.2354632217802597, "learning_rate": 1.964715477072113e-05, "loss": 0.4414, "step": 7743 }, { "epoch": 2.167366358802127, "grad_norm": 0.23677024749952494, "learning_rate": 1.9634898075001967e-05, "loss": 0.4629, "step": 7744 }, { "epoch": 2.1676462356563113, "grad_norm": 0.24593218395377459, "learning_rate": 1.962264426945023e-05, "loss": 0.4526, "step": 7745 }, { "epoch": 2.1679261125104956, "grad_norm": 0.24647489372050965, "learning_rate": 1.96103933552322e-05, "loss": 0.4613, "step": 7746 }, { "epoch": 2.1682059893646795, "grad_norm": 0.2577261946690843, "learning_rate": 1.959814533351394e-05, "loss": 0.4628, "step": 7747 }, { "epoch": 2.1684858662188637, "grad_norm": 0.24898198594961482, "learning_rate": 1.9585900205461223e-05, "loss": 0.4615, "step": 7748 }, { "epoch": 2.168765743073048, "grad_norm": 0.244537865743805, "learning_rate": 1.9573657972239546e-05, "loss": 0.4633, "step": 7749 }, { "epoch": 2.169045619927232, "grad_norm": 0.26205331576804664, "learning_rate": 1.956141863501414e-05, "loss": 0.4577, "step": 7750 }, { "epoch": 2.169325496781416, "grad_norm": 0.25195861776390166, "learning_rate": 1.954918219494994e-05, "loss": 0.4781, "step": 7751 }, { "epoch": 2.1696053736356005, "grad_norm": 0.24194083733508762, "learning_rate": 1.9536948653211623e-05, "loss": 0.4428, "step": 7752 }, { "epoch": 2.1698852504897843, "grad_norm": 0.2594792382723162, "learning_rate": 1.9524718010963583e-05, "loss": 0.4777, "step": 7753 }, { "epoch": 2.1701651273439686, "grad_norm": 0.25071856407895177, "learning_rate": 1.9512490269369944e-05, "loss": 0.4666, "step": 7754 }, { "epoch": 2.170445004198153, "grad_norm": 0.23606933457195933, "learning_rate": 1.9500265429594543e-05, "loss": 0.4551, "step": 7755 }, { "epoch": 2.1707248810523367, "grad_norm": 0.2462499962828051, "learning_rate": 1.948804349280095e-05, "loss": 0.4512, "step": 7756 }, { "epoch": 2.171004757906521, "grad_norm": 0.24669402234344157, "learning_rate": 1.9475824460152458e-05, "loss": 0.4508, "step": 7757 }, { "epoch": 2.1712846347607053, "grad_norm": 0.24447062976228986, "learning_rate": 1.946360833281208e-05, "loss": 0.4598, "step": 7758 }, { "epoch": 2.1715645116148896, "grad_norm": 0.24508799052581162, "learning_rate": 1.945139511194255e-05, "loss": 0.4523, "step": 7759 }, { "epoch": 2.1718443884690735, "grad_norm": 0.2494302055038184, "learning_rate": 1.9439184798706334e-05, "loss": 0.4466, "step": 7760 }, { "epoch": 2.1721242653232578, "grad_norm": 0.24283933159113955, "learning_rate": 1.9426977394265593e-05, "loss": 0.4418, "step": 7761 }, { "epoch": 2.172404142177442, "grad_norm": 0.23621243570705355, "learning_rate": 1.9414772899782276e-05, "loss": 0.4496, "step": 7762 }, { "epoch": 2.172684019031626, "grad_norm": 0.2372523608522219, "learning_rate": 1.940257131641801e-05, "loss": 0.4512, "step": 7763 }, { "epoch": 2.17296389588581, "grad_norm": 0.24828497828490664, "learning_rate": 1.939037264533412e-05, "loss": 0.4538, "step": 7764 }, { "epoch": 2.1732437727399945, "grad_norm": 0.25129769055341117, "learning_rate": 1.937817688769169e-05, "loss": 0.4795, "step": 7765 }, { "epoch": 2.1735236495941788, "grad_norm": 0.24392408891951042, "learning_rate": 1.9365984044651525e-05, "loss": 0.4293, "step": 7766 }, { "epoch": 2.1738035264483626, "grad_norm": 0.25498994034625544, "learning_rate": 1.935379411737414e-05, "loss": 0.4562, "step": 7767 }, { "epoch": 2.174083403302547, "grad_norm": 0.2446279437951083, "learning_rate": 1.9341607107019794e-05, "loss": 0.4364, "step": 7768 }, { "epoch": 2.174363280156731, "grad_norm": 0.2493656735787167, "learning_rate": 1.9329423014748437e-05, "loss": 0.455, "step": 7769 }, { "epoch": 2.174643157010915, "grad_norm": 0.2557171108570862, "learning_rate": 1.9317241841719768e-05, "loss": 0.4582, "step": 7770 }, { "epoch": 2.1749230338650993, "grad_norm": 0.24847114191914865, "learning_rate": 1.930506358909319e-05, "loss": 0.4464, "step": 7771 }, { "epoch": 2.1752029107192836, "grad_norm": 0.24827474418241086, "learning_rate": 1.9292888258027842e-05, "loss": 0.4604, "step": 7772 }, { "epoch": 2.1754827875734675, "grad_norm": 0.2562796116528402, "learning_rate": 1.928071584968258e-05, "loss": 0.4394, "step": 7773 }, { "epoch": 2.1757626644276518, "grad_norm": 0.25578007725600227, "learning_rate": 1.9268546365215978e-05, "loss": 0.4388, "step": 7774 }, { "epoch": 2.176042541281836, "grad_norm": 0.24884757763501225, "learning_rate": 1.925637980578633e-05, "loss": 0.4467, "step": 7775 }, { "epoch": 2.1763224181360203, "grad_norm": 0.2606704877742673, "learning_rate": 1.924421617255166e-05, "loss": 0.478, "step": 7776 }, { "epoch": 2.176602294990204, "grad_norm": 0.25730842774347135, "learning_rate": 1.9232055466669714e-05, "loss": 0.4363, "step": 7777 }, { "epoch": 2.1768821718443885, "grad_norm": 0.24536639692149034, "learning_rate": 1.9219897689297944e-05, "loss": 0.464, "step": 7778 }, { "epoch": 2.1771620486985728, "grad_norm": 0.2635693184531069, "learning_rate": 1.920774284159353e-05, "loss": 0.4578, "step": 7779 }, { "epoch": 2.1774419255527566, "grad_norm": 0.2508380517486447, "learning_rate": 1.9195590924713403e-05, "loss": 0.4511, "step": 7780 }, { "epoch": 2.177721802406941, "grad_norm": 0.24561353911377817, "learning_rate": 1.918344193981419e-05, "loss": 0.4449, "step": 7781 }, { "epoch": 2.178001679261125, "grad_norm": 0.2515938483646392, "learning_rate": 1.9171295888052205e-05, "loss": 0.4856, "step": 7782 }, { "epoch": 2.1782815561153095, "grad_norm": 0.2554487519001131, "learning_rate": 1.9159152770583528e-05, "loss": 0.4558, "step": 7783 }, { "epoch": 2.1785614329694933, "grad_norm": 0.24599832289776138, "learning_rate": 1.9147012588563955e-05, "loss": 0.4538, "step": 7784 }, { "epoch": 2.1788413098236776, "grad_norm": 0.24827797552563804, "learning_rate": 1.9134875343149e-05, "loss": 0.4643, "step": 7785 }, { "epoch": 2.179121186677862, "grad_norm": 0.2423392026682831, "learning_rate": 1.912274103549388e-05, "loss": 0.4634, "step": 7786 }, { "epoch": 2.1794010635320458, "grad_norm": 0.2530638548225227, "learning_rate": 1.911060966675355e-05, "loss": 0.4401, "step": 7787 }, { "epoch": 2.17968094038623, "grad_norm": 0.24564953024856784, "learning_rate": 1.9098481238082684e-05, "loss": 0.4578, "step": 7788 }, { "epoch": 2.1799608172404144, "grad_norm": 0.2596550701569648, "learning_rate": 1.908635575063567e-05, "loss": 0.4594, "step": 7789 }, { "epoch": 2.180240694094598, "grad_norm": 0.2498892016284524, "learning_rate": 1.9074233205566617e-05, "loss": 0.4528, "step": 7790 }, { "epoch": 2.1805205709487825, "grad_norm": 0.2464999319142464, "learning_rate": 1.906211360402936e-05, "loss": 0.4565, "step": 7791 }, { "epoch": 2.180800447802967, "grad_norm": 0.24425654605345726, "learning_rate": 1.9049996947177444e-05, "loss": 0.4693, "step": 7792 }, { "epoch": 2.1810803246571506, "grad_norm": 0.24228558200186168, "learning_rate": 1.9037883236164146e-05, "loss": 0.4416, "step": 7793 }, { "epoch": 2.181360201511335, "grad_norm": 0.24514792986010997, "learning_rate": 1.9025772472142443e-05, "loss": 0.4393, "step": 7794 }, { "epoch": 2.181640078365519, "grad_norm": 0.25220357421385875, "learning_rate": 1.9013664656265063e-05, "loss": 0.457, "step": 7795 }, { "epoch": 2.1819199552197035, "grad_norm": 0.2540426741164844, "learning_rate": 1.9001559789684404e-05, "loss": 0.4655, "step": 7796 }, { "epoch": 2.1821998320738873, "grad_norm": 0.25053459425933816, "learning_rate": 1.8989457873552652e-05, "loss": 0.4565, "step": 7797 }, { "epoch": 2.1824797089280716, "grad_norm": 0.24788181896230466, "learning_rate": 1.897735890902166e-05, "loss": 0.468, "step": 7798 }, { "epoch": 2.182759585782256, "grad_norm": 0.24620568822923727, "learning_rate": 1.8965262897243023e-05, "loss": 0.445, "step": 7799 }, { "epoch": 2.1830394626364398, "grad_norm": 0.24546099937554955, "learning_rate": 1.895316983936802e-05, "loss": 0.4381, "step": 7800 }, { "epoch": 2.183319339490624, "grad_norm": 0.25539167735130275, "learning_rate": 1.894107973654769e-05, "loss": 0.4499, "step": 7801 }, { "epoch": 2.1835992163448084, "grad_norm": 0.251713145144343, "learning_rate": 1.8928992589932772e-05, "loss": 0.4373, "step": 7802 }, { "epoch": 2.1838790931989926, "grad_norm": 0.25427802966282137, "learning_rate": 1.8916908400673733e-05, "loss": 0.473, "step": 7803 }, { "epoch": 2.1841589700531765, "grad_norm": 0.2452132412022567, "learning_rate": 1.8904827169920748e-05, "loss": 0.4378, "step": 7804 }, { "epoch": 2.184438846907361, "grad_norm": 0.24816561372083384, "learning_rate": 1.889274889882372e-05, "loss": 0.4458, "step": 7805 }, { "epoch": 2.184718723761545, "grad_norm": 0.2667112882195227, "learning_rate": 1.888067358853226e-05, "loss": 0.4531, "step": 7806 }, { "epoch": 2.184998600615729, "grad_norm": 0.2402304873279501, "learning_rate": 1.886860124019571e-05, "loss": 0.4479, "step": 7807 }, { "epoch": 2.185278477469913, "grad_norm": 0.25721229276055896, "learning_rate": 1.8856531854963123e-05, "loss": 0.4583, "step": 7808 }, { "epoch": 2.1855583543240975, "grad_norm": 0.25904591171456187, "learning_rate": 1.8844465433983256e-05, "loss": 0.4666, "step": 7809 }, { "epoch": 2.1858382311782814, "grad_norm": 0.24044364905075324, "learning_rate": 1.8832401978404612e-05, "loss": 0.4395, "step": 7810 }, { "epoch": 2.1861181080324656, "grad_norm": 0.2542181852723932, "learning_rate": 1.882034148937539e-05, "loss": 0.4665, "step": 7811 }, { "epoch": 2.18639798488665, "grad_norm": 0.24055634571479526, "learning_rate": 1.8808283968043528e-05, "loss": 0.4708, "step": 7812 }, { "epoch": 2.1866778617408342, "grad_norm": 0.2478938980240328, "learning_rate": 1.8796229415556628e-05, "loss": 0.4628, "step": 7813 }, { "epoch": 2.186957738595018, "grad_norm": 0.2559188829558101, "learning_rate": 1.87841778330621e-05, "loss": 0.4548, "step": 7814 }, { "epoch": 2.1872376154492024, "grad_norm": 0.2384135511042799, "learning_rate": 1.8772129221706997e-05, "loss": 0.4675, "step": 7815 }, { "epoch": 2.1875174923033867, "grad_norm": 0.24523900409859284, "learning_rate": 1.876008358263811e-05, "loss": 0.4465, "step": 7816 }, { "epoch": 2.1877973691575705, "grad_norm": 0.2452968286397749, "learning_rate": 1.874804091700196e-05, "loss": 0.452, "step": 7817 }, { "epoch": 2.188077246011755, "grad_norm": 0.2545186025574785, "learning_rate": 1.8736001225944783e-05, "loss": 0.4547, "step": 7818 }, { "epoch": 2.188357122865939, "grad_norm": 0.24134995755574007, "learning_rate": 1.872396451061249e-05, "loss": 0.4534, "step": 7819 }, { "epoch": 2.1886369997201234, "grad_norm": 0.23698879149519445, "learning_rate": 1.871193077215076e-05, "loss": 0.4475, "step": 7820 }, { "epoch": 2.1889168765743072, "grad_norm": 0.24258602822455302, "learning_rate": 1.8699900011704972e-05, "loss": 0.4307, "step": 7821 }, { "epoch": 2.1891967534284915, "grad_norm": 0.24370852116856703, "learning_rate": 1.8687872230420224e-05, "loss": 0.4689, "step": 7822 }, { "epoch": 2.189476630282676, "grad_norm": 0.24094433935092024, "learning_rate": 1.8675847429441317e-05, "loss": 0.4524, "step": 7823 }, { "epoch": 2.1897565071368597, "grad_norm": 0.2393943275153635, "learning_rate": 1.866382560991279e-05, "loss": 0.4646, "step": 7824 }, { "epoch": 2.190036383991044, "grad_norm": 0.24600322975573322, "learning_rate": 1.865180677297888e-05, "loss": 0.4417, "step": 7825 }, { "epoch": 2.1903162608452282, "grad_norm": 0.23653467949928408, "learning_rate": 1.8639790919783547e-05, "loss": 0.4421, "step": 7826 }, { "epoch": 2.190596137699412, "grad_norm": 0.24844045629034311, "learning_rate": 1.8627778051470467e-05, "loss": 0.4658, "step": 7827 }, { "epoch": 2.1908760145535964, "grad_norm": 0.25891714351155704, "learning_rate": 1.861576816918303e-05, "loss": 0.4512, "step": 7828 }, { "epoch": 2.1911558914077807, "grad_norm": 0.24755295731004745, "learning_rate": 1.8603761274064347e-05, "loss": 0.4445, "step": 7829 }, { "epoch": 2.1914357682619645, "grad_norm": 0.24871185991226788, "learning_rate": 1.859175736725724e-05, "loss": 0.4607, "step": 7830 }, { "epoch": 2.191715645116149, "grad_norm": 0.2536417126447717, "learning_rate": 1.8579756449904227e-05, "loss": 0.456, "step": 7831 }, { "epoch": 2.191995521970333, "grad_norm": 0.2533466720717258, "learning_rate": 1.8567758523147606e-05, "loss": 0.487, "step": 7832 }, { "epoch": 2.1922753988245174, "grad_norm": 0.2722920814139466, "learning_rate": 1.8555763588129316e-05, "loss": 0.4738, "step": 7833 }, { "epoch": 2.1925552756787012, "grad_norm": 0.24499078426528714, "learning_rate": 1.8543771645991047e-05, "loss": 0.4573, "step": 7834 }, { "epoch": 2.1928351525328855, "grad_norm": 0.24114442727471078, "learning_rate": 1.85317826978742e-05, "loss": 0.4557, "step": 7835 }, { "epoch": 2.19311502938707, "grad_norm": 0.2479406770568865, "learning_rate": 1.851979674491991e-05, "loss": 0.45, "step": 7836 }, { "epoch": 2.1933949062412537, "grad_norm": 0.24512816090760622, "learning_rate": 1.8507813788268967e-05, "loss": 0.467, "step": 7837 }, { "epoch": 2.193674783095438, "grad_norm": 0.2488999454409054, "learning_rate": 1.8495833829061937e-05, "loss": 0.4659, "step": 7838 }, { "epoch": 2.1939546599496222, "grad_norm": 0.2525505742883965, "learning_rate": 1.848385686843907e-05, "loss": 0.45, "step": 7839 }, { "epoch": 2.1942345368038065, "grad_norm": 0.25118705308969275, "learning_rate": 1.8471882907540355e-05, "loss": 0.4492, "step": 7840 }, { "epoch": 2.1945144136579904, "grad_norm": 0.24164285045356698, "learning_rate": 1.8459911947505464e-05, "loss": 0.4776, "step": 7841 }, { "epoch": 2.1947942905121747, "grad_norm": 0.24733942385268706, "learning_rate": 1.8447943989473814e-05, "loss": 0.4739, "step": 7842 }, { "epoch": 2.195074167366359, "grad_norm": 0.25025012948468583, "learning_rate": 1.843597903458451e-05, "loss": 0.4424, "step": 7843 }, { "epoch": 2.195354044220543, "grad_norm": 0.24455113988511687, "learning_rate": 1.8424017083976393e-05, "loss": 0.467, "step": 7844 }, { "epoch": 2.195633921074727, "grad_norm": 0.24870464909046316, "learning_rate": 1.8412058138788003e-05, "loss": 0.4566, "step": 7845 }, { "epoch": 2.1959137979289114, "grad_norm": 0.25326316139271077, "learning_rate": 1.8400102200157598e-05, "loss": 0.44, "step": 7846 }, { "epoch": 2.1961936747830952, "grad_norm": 0.24756143910448022, "learning_rate": 1.8388149269223153e-05, "loss": 0.4629, "step": 7847 }, { "epoch": 2.1964735516372795, "grad_norm": 0.2494077946205501, "learning_rate": 1.837619934712234e-05, "loss": 0.4901, "step": 7848 }, { "epoch": 2.196753428491464, "grad_norm": 0.2571123514241369, "learning_rate": 1.836425243499259e-05, "loss": 0.4811, "step": 7849 }, { "epoch": 2.197033305345648, "grad_norm": 0.2504517733448195, "learning_rate": 1.8352308533971002e-05, "loss": 0.4354, "step": 7850 }, { "epoch": 2.197313182199832, "grad_norm": 0.26024904980336894, "learning_rate": 1.83403676451944e-05, "loss": 0.4561, "step": 7851 }, { "epoch": 2.1975930590540163, "grad_norm": 0.24541373901313274, "learning_rate": 1.8328429769799323e-05, "loss": 0.4544, "step": 7852 }, { "epoch": 2.1978729359082005, "grad_norm": 0.24228993703254692, "learning_rate": 1.8316494908922033e-05, "loss": 0.4323, "step": 7853 }, { "epoch": 2.1981528127623844, "grad_norm": 0.2511516559277119, "learning_rate": 1.8304563063698504e-05, "loss": 0.4675, "step": 7854 }, { "epoch": 2.1984326896165687, "grad_norm": 0.25999360726883414, "learning_rate": 1.829263423526439e-05, "loss": 0.4498, "step": 7855 }, { "epoch": 2.198712566470753, "grad_norm": 0.2439316770122664, "learning_rate": 1.8280708424755095e-05, "loss": 0.4481, "step": 7856 }, { "epoch": 2.1989924433249373, "grad_norm": 0.24385697889986588, "learning_rate": 1.826878563330573e-05, "loss": 0.4461, "step": 7857 }, { "epoch": 2.199272320179121, "grad_norm": 0.25325081441565767, "learning_rate": 1.8256865862051105e-05, "loss": 0.4598, "step": 7858 }, { "epoch": 2.1995521970333054, "grad_norm": 0.25207055701347525, "learning_rate": 1.8244949112125752e-05, "loss": 0.4571, "step": 7859 }, { "epoch": 2.1998320738874897, "grad_norm": 0.23878169220599021, "learning_rate": 1.823303538466392e-05, "loss": 0.4497, "step": 7860 }, { "epoch": 2.2001119507416735, "grad_norm": 0.24768614949321813, "learning_rate": 1.8221124680799563e-05, "loss": 0.462, "step": 7861 }, { "epoch": 2.200391827595858, "grad_norm": 0.2528266987490449, "learning_rate": 1.8209217001666346e-05, "loss": 0.4702, "step": 7862 }, { "epoch": 2.200671704450042, "grad_norm": 0.24943924240071652, "learning_rate": 1.8197312348397644e-05, "loss": 0.4637, "step": 7863 }, { "epoch": 2.200951581304226, "grad_norm": 0.25086026249232646, "learning_rate": 1.8185410722126556e-05, "loss": 0.4616, "step": 7864 }, { "epoch": 2.2012314581584103, "grad_norm": 0.23726761680461356, "learning_rate": 1.8173512123985868e-05, "loss": 0.4357, "step": 7865 }, { "epoch": 2.2015113350125946, "grad_norm": 0.24414251569135859, "learning_rate": 1.8161616555108124e-05, "loss": 0.4646, "step": 7866 }, { "epoch": 2.2017912118667784, "grad_norm": 0.24469801905230784, "learning_rate": 1.814972401662554e-05, "loss": 0.4502, "step": 7867 }, { "epoch": 2.2020710887209627, "grad_norm": 0.24314284411565362, "learning_rate": 1.813783450967005e-05, "loss": 0.4474, "step": 7868 }, { "epoch": 2.202350965575147, "grad_norm": 0.2493523090663002, "learning_rate": 1.8125948035373302e-05, "loss": 0.4518, "step": 7869 }, { "epoch": 2.2026308424293313, "grad_norm": 0.2501987440792787, "learning_rate": 1.8114064594866663e-05, "loss": 0.4492, "step": 7870 }, { "epoch": 2.202910719283515, "grad_norm": 0.24608784215228605, "learning_rate": 1.810218418928121e-05, "loss": 0.449, "step": 7871 }, { "epoch": 2.2031905961376994, "grad_norm": 0.2545219012417861, "learning_rate": 1.8090306819747727e-05, "loss": 0.4557, "step": 7872 }, { "epoch": 2.2034704729918837, "grad_norm": 0.2498453330510054, "learning_rate": 1.807843248739669e-05, "loss": 0.4664, "step": 7873 }, { "epoch": 2.2037503498460675, "grad_norm": 0.24678614300565516, "learning_rate": 1.8066561193358317e-05, "loss": 0.4325, "step": 7874 }, { "epoch": 2.204030226700252, "grad_norm": 0.2479581754897145, "learning_rate": 1.805469293876252e-05, "loss": 0.4506, "step": 7875 }, { "epoch": 2.204310103554436, "grad_norm": 0.2526399472491942, "learning_rate": 1.804282772473893e-05, "loss": 0.459, "step": 7876 }, { "epoch": 2.2045899804086204, "grad_norm": 0.2556463232245629, "learning_rate": 1.8030965552416885e-05, "loss": 0.4599, "step": 7877 }, { "epoch": 2.2048698572628043, "grad_norm": 0.2348266903089208, "learning_rate": 1.8019106422925436e-05, "loss": 0.4287, "step": 7878 }, { "epoch": 2.2051497341169886, "grad_norm": 0.24295159596583496, "learning_rate": 1.8007250337393334e-05, "loss": 0.4525, "step": 7879 }, { "epoch": 2.205429610971173, "grad_norm": 0.249595405375553, "learning_rate": 1.7995397296949052e-05, "loss": 0.4455, "step": 7880 }, { "epoch": 2.2057094878253567, "grad_norm": 0.24467598585723746, "learning_rate": 1.798354730272077e-05, "loss": 0.4649, "step": 7881 }, { "epoch": 2.205989364679541, "grad_norm": 0.24678481309084288, "learning_rate": 1.7971700355836376e-05, "loss": 0.4455, "step": 7882 }, { "epoch": 2.2062692415337253, "grad_norm": 0.25572344637615263, "learning_rate": 1.7959856457423453e-05, "loss": 0.4407, "step": 7883 }, { "epoch": 2.206549118387909, "grad_norm": 0.25686611477700355, "learning_rate": 1.7948015608609343e-05, "loss": 0.457, "step": 7884 }, { "epoch": 2.2068289952420934, "grad_norm": 0.2357672420576398, "learning_rate": 1.793617781052105e-05, "loss": 0.4332, "step": 7885 }, { "epoch": 2.2071088720962777, "grad_norm": 0.25389890172651997, "learning_rate": 1.7924343064285293e-05, "loss": 0.461, "step": 7886 }, { "epoch": 2.207388748950462, "grad_norm": 0.2430272573712817, "learning_rate": 1.7912511371028524e-05, "loss": 0.4497, "step": 7887 }, { "epoch": 2.207668625804646, "grad_norm": 0.24875172492486222, "learning_rate": 1.7900682731876877e-05, "loss": 0.4535, "step": 7888 }, { "epoch": 2.20794850265883, "grad_norm": 0.24448566149127557, "learning_rate": 1.788885714795622e-05, "loss": 0.4411, "step": 7889 }, { "epoch": 2.2082283795130144, "grad_norm": 0.25529872514782653, "learning_rate": 1.7877034620392126e-05, "loss": 0.468, "step": 7890 }, { "epoch": 2.2085082563671983, "grad_norm": 0.2426010058489337, "learning_rate": 1.786521515030984e-05, "loss": 0.4449, "step": 7891 }, { "epoch": 2.2087881332213826, "grad_norm": 0.25397649146267437, "learning_rate": 1.785339873883436e-05, "loss": 0.4752, "step": 7892 }, { "epoch": 2.209068010075567, "grad_norm": 0.2485246127152016, "learning_rate": 1.784158538709039e-05, "loss": 0.4601, "step": 7893 }, { "epoch": 2.2093478869297507, "grad_norm": 0.25436142306490184, "learning_rate": 1.782977509620231e-05, "loss": 0.4622, "step": 7894 }, { "epoch": 2.209627763783935, "grad_norm": 0.24936267035405102, "learning_rate": 1.7817967867294254e-05, "loss": 0.4475, "step": 7895 }, { "epoch": 2.2099076406381193, "grad_norm": 0.2520182646580836, "learning_rate": 1.7806163701490025e-05, "loss": 0.4486, "step": 7896 }, { "epoch": 2.2101875174923036, "grad_norm": 0.24339898686184286, "learning_rate": 1.7794362599913155e-05, "loss": 0.4472, "step": 7897 }, { "epoch": 2.2104673943464874, "grad_norm": 0.24708836780938634, "learning_rate": 1.7782564563686884e-05, "loss": 0.4584, "step": 7898 }, { "epoch": 2.2107472712006717, "grad_norm": 0.24613254514568578, "learning_rate": 1.7770769593934144e-05, "loss": 0.4588, "step": 7899 }, { "epoch": 2.211027148054856, "grad_norm": 0.2345340256289829, "learning_rate": 1.775897769177758e-05, "loss": 0.4554, "step": 7900 }, { "epoch": 2.21130702490904, "grad_norm": 0.2553194001094716, "learning_rate": 1.7747188858339576e-05, "loss": 0.4535, "step": 7901 }, { "epoch": 2.211586901763224, "grad_norm": 0.24910823911685434, "learning_rate": 1.7735403094742198e-05, "loss": 0.4518, "step": 7902 }, { "epoch": 2.2118667786174084, "grad_norm": 0.24519084778159617, "learning_rate": 1.7723620402107207e-05, "loss": 0.4409, "step": 7903 }, { "epoch": 2.2121466554715923, "grad_norm": 0.25162372860192306, "learning_rate": 1.7711840781556092e-05, "loss": 0.4574, "step": 7904 }, { "epoch": 2.2124265323257766, "grad_norm": 0.2557966513214806, "learning_rate": 1.7700064234210046e-05, "loss": 0.4605, "step": 7905 }, { "epoch": 2.212706409179961, "grad_norm": 0.25660172741415943, "learning_rate": 1.7688290761189967e-05, "loss": 0.4504, "step": 7906 }, { "epoch": 2.212986286034145, "grad_norm": 0.24517902037664688, "learning_rate": 1.7676520363616456e-05, "loss": 0.4448, "step": 7907 }, { "epoch": 2.213266162888329, "grad_norm": 0.25686493409749006, "learning_rate": 1.7664753042609845e-05, "loss": 0.4542, "step": 7908 }, { "epoch": 2.2135460397425133, "grad_norm": 0.24705122241299396, "learning_rate": 1.7652988799290127e-05, "loss": 0.4547, "step": 7909 }, { "epoch": 2.2138259165966976, "grad_norm": 0.252047790992777, "learning_rate": 1.7641227634777035e-05, "loss": 0.4483, "step": 7910 }, { "epoch": 2.2141057934508814, "grad_norm": 0.24962222001761322, "learning_rate": 1.762946955019001e-05, "loss": 0.4395, "step": 7911 }, { "epoch": 2.2143856703050657, "grad_norm": 0.25361686004361983, "learning_rate": 1.761771454664819e-05, "loss": 0.4661, "step": 7912 }, { "epoch": 2.21466554715925, "grad_norm": 0.25256660077060694, "learning_rate": 1.7605962625270428e-05, "loss": 0.4549, "step": 7913 }, { "epoch": 2.2149454240134343, "grad_norm": 0.2499270156603375, "learning_rate": 1.7594213787175274e-05, "loss": 0.4414, "step": 7914 }, { "epoch": 2.215225300867618, "grad_norm": 0.2603825740085603, "learning_rate": 1.7582468033480992e-05, "loss": 0.4639, "step": 7915 }, { "epoch": 2.2155051777218024, "grad_norm": 0.2451397470222861, "learning_rate": 1.7570725365305547e-05, "loss": 0.485, "step": 7916 }, { "epoch": 2.2157850545759867, "grad_norm": 0.2529732044457226, "learning_rate": 1.755898578376659e-05, "loss": 0.4659, "step": 7917 }, { "epoch": 2.2160649314301706, "grad_norm": 0.24203003086204153, "learning_rate": 1.7547249289981548e-05, "loss": 0.444, "step": 7918 }, { "epoch": 2.216344808284355, "grad_norm": 0.25175761527493534, "learning_rate": 1.7535515885067484e-05, "loss": 0.4529, "step": 7919 }, { "epoch": 2.216624685138539, "grad_norm": 0.24528838001752543, "learning_rate": 1.7523785570141182e-05, "loss": 0.4618, "step": 7920 }, { "epoch": 2.216904561992723, "grad_norm": 0.24149357114131356, "learning_rate": 1.7512058346319148e-05, "loss": 0.4216, "step": 7921 }, { "epoch": 2.2171844388469073, "grad_norm": 0.25314218786784454, "learning_rate": 1.750033421471759e-05, "loss": 0.4701, "step": 7922 }, { "epoch": 2.2174643157010916, "grad_norm": 0.24908668695012096, "learning_rate": 1.7488613176452412e-05, "loss": 0.4356, "step": 7923 }, { "epoch": 2.217744192555276, "grad_norm": 0.25202973088004654, "learning_rate": 1.747689523263923e-05, "loss": 0.4551, "step": 7924 }, { "epoch": 2.2180240694094597, "grad_norm": 0.2521952848999994, "learning_rate": 1.746518038439336e-05, "loss": 0.4592, "step": 7925 }, { "epoch": 2.218303946263644, "grad_norm": 0.25766929948130207, "learning_rate": 1.745346863282985e-05, "loss": 0.4449, "step": 7926 }, { "epoch": 2.2185838231178283, "grad_norm": 0.24359965870129346, "learning_rate": 1.7441759979063392e-05, "loss": 0.4495, "step": 7927 }, { "epoch": 2.218863699972012, "grad_norm": 0.24798138730762323, "learning_rate": 1.7430054424208447e-05, "loss": 0.4347, "step": 7928 }, { "epoch": 2.2191435768261965, "grad_norm": 0.25300592307291464, "learning_rate": 1.7418351969379154e-05, "loss": 0.4432, "step": 7929 }, { "epoch": 2.2194234536803807, "grad_norm": 0.2487162651416905, "learning_rate": 1.7406652615689356e-05, "loss": 0.4599, "step": 7930 }, { "epoch": 2.2197033305345646, "grad_norm": 0.24073338574486924, "learning_rate": 1.7394956364252602e-05, "loss": 0.448, "step": 7931 }, { "epoch": 2.219983207388749, "grad_norm": 0.2403837745791843, "learning_rate": 1.7383263216182157e-05, "loss": 0.4338, "step": 7932 }, { "epoch": 2.220263084242933, "grad_norm": 0.24989577766616916, "learning_rate": 1.737157317259097e-05, "loss": 0.443, "step": 7933 }, { "epoch": 2.2205429610971175, "grad_norm": 0.2515323230954262, "learning_rate": 1.7359886234591695e-05, "loss": 0.4535, "step": 7934 }, { "epoch": 2.2208228379513013, "grad_norm": 0.25878382581198806, "learning_rate": 1.7348202403296737e-05, "loss": 0.4466, "step": 7935 }, { "epoch": 2.2211027148054856, "grad_norm": 0.24917245373517516, "learning_rate": 1.7336521679818147e-05, "loss": 0.4341, "step": 7936 }, { "epoch": 2.22138259165967, "grad_norm": 0.2480932593442455, "learning_rate": 1.7324844065267708e-05, "loss": 0.4458, "step": 7937 }, { "epoch": 2.2216624685138537, "grad_norm": 0.2512838503594705, "learning_rate": 1.7313169560756903e-05, "loss": 0.4549, "step": 7938 }, { "epoch": 2.221942345368038, "grad_norm": 0.2632030347489567, "learning_rate": 1.7301498167396914e-05, "loss": 0.4466, "step": 7939 }, { "epoch": 2.2222222222222223, "grad_norm": 0.2501334246477985, "learning_rate": 1.7289829886298624e-05, "loss": 0.4669, "step": 7940 }, { "epoch": 2.222502099076406, "grad_norm": 0.24813646405405118, "learning_rate": 1.7278164718572644e-05, "loss": 0.4735, "step": 7941 }, { "epoch": 2.2227819759305905, "grad_norm": 0.2555327423460743, "learning_rate": 1.7266502665329252e-05, "loss": 0.4369, "step": 7942 }, { "epoch": 2.2230618527847748, "grad_norm": 0.24518656085447507, "learning_rate": 1.7254843727678467e-05, "loss": 0.4618, "step": 7943 }, { "epoch": 2.223341729638959, "grad_norm": 0.24291977999803366, "learning_rate": 1.7243187906729995e-05, "loss": 0.4498, "step": 7944 }, { "epoch": 2.223621606493143, "grad_norm": 0.24063118891127847, "learning_rate": 1.723153520359322e-05, "loss": 0.4485, "step": 7945 }, { "epoch": 2.223901483347327, "grad_norm": 0.2679634417254974, "learning_rate": 1.7219885619377264e-05, "loss": 0.4889, "step": 7946 }, { "epoch": 2.2241813602015115, "grad_norm": 0.25885212572291527, "learning_rate": 1.7208239155190943e-05, "loss": 0.4565, "step": 7947 }, { "epoch": 2.2244612370556953, "grad_norm": 0.24100553328119104, "learning_rate": 1.719659581214277e-05, "loss": 0.4461, "step": 7948 }, { "epoch": 2.2247411139098796, "grad_norm": 0.24928149919491513, "learning_rate": 1.7184955591340974e-05, "loss": 0.4442, "step": 7949 }, { "epoch": 2.225020990764064, "grad_norm": 0.2515400428146291, "learning_rate": 1.717331849389347e-05, "loss": 0.4576, "step": 7950 }, { "epoch": 2.225300867618248, "grad_norm": 0.2515434570758564, "learning_rate": 1.7161684520907883e-05, "loss": 0.4772, "step": 7951 }, { "epoch": 2.225580744472432, "grad_norm": 0.238449405053394, "learning_rate": 1.7150053673491528e-05, "loss": 0.4562, "step": 7952 }, { "epoch": 2.2258606213266163, "grad_norm": 0.2549100803745451, "learning_rate": 1.713842595275147e-05, "loss": 0.4369, "step": 7953 }, { "epoch": 2.2261404981808006, "grad_norm": 0.2503589637496714, "learning_rate": 1.7126801359794418e-05, "loss": 0.4304, "step": 7954 }, { "epoch": 2.2264203750349845, "grad_norm": 0.2512816021218268, "learning_rate": 1.7115179895726823e-05, "loss": 0.4428, "step": 7955 }, { "epoch": 2.2267002518891688, "grad_norm": 0.24388927147551884, "learning_rate": 1.71035615616548e-05, "loss": 0.4592, "step": 7956 }, { "epoch": 2.226980128743353, "grad_norm": 0.24372933422977305, "learning_rate": 1.7091946358684213e-05, "loss": 0.4756, "step": 7957 }, { "epoch": 2.227260005597537, "grad_norm": 0.23988976309214124, "learning_rate": 1.708033428792058e-05, "loss": 0.4266, "step": 7958 }, { "epoch": 2.227539882451721, "grad_norm": 0.2481820869753129, "learning_rate": 1.7068725350469162e-05, "loss": 0.4264, "step": 7959 }, { "epoch": 2.2278197593059055, "grad_norm": 0.24135649240636686, "learning_rate": 1.7057119547434895e-05, "loss": 0.4614, "step": 7960 }, { "epoch": 2.2280996361600893, "grad_norm": 0.252011329197858, "learning_rate": 1.704551687992243e-05, "loss": 0.4441, "step": 7961 }, { "epoch": 2.2283795130142736, "grad_norm": 0.25738068801468555, "learning_rate": 1.7033917349036127e-05, "loss": 0.4659, "step": 7962 }, { "epoch": 2.228659389868458, "grad_norm": 0.2515975015760517, "learning_rate": 1.702232095588001e-05, "loss": 0.4446, "step": 7963 }, { "epoch": 2.228939266722642, "grad_norm": 0.2518176234187117, "learning_rate": 1.7010727701557837e-05, "loss": 0.4284, "step": 7964 }, { "epoch": 2.229219143576826, "grad_norm": 0.24727712793208703, "learning_rate": 1.6999137587173074e-05, "loss": 0.4463, "step": 7965 }, { "epoch": 2.2294990204310103, "grad_norm": 0.24965373613298716, "learning_rate": 1.6987550613828862e-05, "loss": 0.4352, "step": 7966 }, { "epoch": 2.2297788972851946, "grad_norm": 0.24686778507922055, "learning_rate": 1.697596678262806e-05, "loss": 0.4646, "step": 7967 }, { "epoch": 2.2300587741393785, "grad_norm": 0.24507759996160586, "learning_rate": 1.696438609467323e-05, "loss": 0.4511, "step": 7968 }, { "epoch": 2.2303386509935628, "grad_norm": 0.27052804940673875, "learning_rate": 1.69528085510666e-05, "loss": 0.4774, "step": 7969 }, { "epoch": 2.230618527847747, "grad_norm": 0.2668370135729792, "learning_rate": 1.6941234152910168e-05, "loss": 0.485, "step": 7970 }, { "epoch": 2.2308984047019313, "grad_norm": 0.2523499356202924, "learning_rate": 1.692966290130557e-05, "loss": 0.4419, "step": 7971 }, { "epoch": 2.231178281556115, "grad_norm": 0.2437389307602732, "learning_rate": 1.6918094797354174e-05, "loss": 0.4521, "step": 7972 }, { "epoch": 2.2314581584102995, "grad_norm": 0.24168020682706032, "learning_rate": 1.6906529842157027e-05, "loss": 0.4488, "step": 7973 }, { "epoch": 2.2317380352644838, "grad_norm": 0.25162232662424494, "learning_rate": 1.689496803681489e-05, "loss": 0.4706, "step": 7974 }, { "epoch": 2.2320179121186676, "grad_norm": 0.26197643712733076, "learning_rate": 1.6883409382428233e-05, "loss": 0.4635, "step": 7975 }, { "epoch": 2.232297788972852, "grad_norm": 0.2470103189286477, "learning_rate": 1.68718538800972e-05, "loss": 0.4524, "step": 7976 }, { "epoch": 2.232577665827036, "grad_norm": 0.2480387357654966, "learning_rate": 1.6860301530921662e-05, "loss": 0.4536, "step": 7977 }, { "epoch": 2.23285754268122, "grad_norm": 0.2339010891610054, "learning_rate": 1.684875233600117e-05, "loss": 0.4511, "step": 7978 }, { "epoch": 2.2331374195354043, "grad_norm": 0.24500286682754885, "learning_rate": 1.6837206296434987e-05, "loss": 0.4559, "step": 7979 }, { "epoch": 2.2334172963895886, "grad_norm": 0.2430729908238167, "learning_rate": 1.682566341332209e-05, "loss": 0.4493, "step": 7980 }, { "epoch": 2.233697173243773, "grad_norm": 0.25333897406913003, "learning_rate": 1.6814123687761095e-05, "loss": 0.4609, "step": 7981 }, { "epoch": 2.2339770500979568, "grad_norm": 0.24631010567382958, "learning_rate": 1.6802587120850387e-05, "loss": 0.4402, "step": 7982 }, { "epoch": 2.234256926952141, "grad_norm": 0.24177119739089156, "learning_rate": 1.679105371368802e-05, "loss": 0.4709, "step": 7983 }, { "epoch": 2.2345368038063254, "grad_norm": 0.2404616845514908, "learning_rate": 1.677952346737175e-05, "loss": 0.4462, "step": 7984 }, { "epoch": 2.234816680660509, "grad_norm": 0.24421904816616632, "learning_rate": 1.6767996382999024e-05, "loss": 0.4415, "step": 7985 }, { "epoch": 2.2350965575146935, "grad_norm": 0.25773948255866413, "learning_rate": 1.6756472461666988e-05, "loss": 0.456, "step": 7986 }, { "epoch": 2.235376434368878, "grad_norm": 0.24322252207362016, "learning_rate": 1.6744951704472527e-05, "loss": 0.4695, "step": 7987 }, { "epoch": 2.235656311223062, "grad_norm": 0.2429963628251191, "learning_rate": 1.673343411251218e-05, "loss": 0.4522, "step": 7988 }, { "epoch": 2.235936188077246, "grad_norm": 0.24708892921844902, "learning_rate": 1.6721919686882194e-05, "loss": 0.4661, "step": 7989 }, { "epoch": 2.23621606493143, "grad_norm": 0.2441905285899125, "learning_rate": 1.6710408428678513e-05, "loss": 0.4668, "step": 7990 }, { "epoch": 2.2364959417856145, "grad_norm": 0.2502825217382606, "learning_rate": 1.66989003389968e-05, "loss": 0.4496, "step": 7991 }, { "epoch": 2.2367758186397984, "grad_norm": 0.24243456426849705, "learning_rate": 1.6687395418932384e-05, "loss": 0.4278, "step": 7992 }, { "epoch": 2.2370556954939826, "grad_norm": 0.2369164369311766, "learning_rate": 1.6675893669580322e-05, "loss": 0.4585, "step": 7993 }, { "epoch": 2.237335572348167, "grad_norm": 0.24799242223856602, "learning_rate": 1.666439509203535e-05, "loss": 0.4608, "step": 7994 }, { "epoch": 2.237615449202351, "grad_norm": 0.25866548823886354, "learning_rate": 1.6652899687391914e-05, "loss": 0.467, "step": 7995 }, { "epoch": 2.237895326056535, "grad_norm": 0.24710441655216528, "learning_rate": 1.6641407456744152e-05, "loss": 0.4422, "step": 7996 }, { "epoch": 2.2381752029107194, "grad_norm": 0.24306993015065884, "learning_rate": 1.6629918401185894e-05, "loss": 0.4571, "step": 7997 }, { "epoch": 2.238455079764903, "grad_norm": 0.2523588605274083, "learning_rate": 1.66184325218107e-05, "loss": 0.4607, "step": 7998 }, { "epoch": 2.2387349566190875, "grad_norm": 0.2507205117190013, "learning_rate": 1.660694981971177e-05, "loss": 0.4637, "step": 7999 }, { "epoch": 2.239014833473272, "grad_norm": 0.25074998498597595, "learning_rate": 1.6595470295982045e-05, "loss": 0.4503, "step": 8000 }, { "epoch": 2.239294710327456, "grad_norm": 0.25097648213155677, "learning_rate": 1.6583993951714154e-05, "loss": 0.4365, "step": 8001 }, { "epoch": 2.23957458718164, "grad_norm": 0.24297808111841446, "learning_rate": 1.657252078800042e-05, "loss": 0.4497, "step": 8002 }, { "epoch": 2.239854464035824, "grad_norm": 0.2440741001324849, "learning_rate": 1.6561050805932875e-05, "loss": 0.4513, "step": 8003 }, { "epoch": 2.2401343408900085, "grad_norm": 0.2487975890820272, "learning_rate": 1.654958400660321e-05, "loss": 0.4442, "step": 8004 }, { "epoch": 2.2404142177441924, "grad_norm": 0.24342845114817827, "learning_rate": 1.653812039110288e-05, "loss": 0.4495, "step": 8005 }, { "epoch": 2.2406940945983767, "grad_norm": 0.25650397910269307, "learning_rate": 1.652665996052298e-05, "loss": 0.4409, "step": 8006 }, { "epoch": 2.240973971452561, "grad_norm": 0.23901972894006557, "learning_rate": 1.651520271595432e-05, "loss": 0.4528, "step": 8007 }, { "epoch": 2.2412538483067452, "grad_norm": 0.252973107022516, "learning_rate": 1.6503748658487405e-05, "loss": 0.4617, "step": 8008 }, { "epoch": 2.241533725160929, "grad_norm": 0.2470775167754655, "learning_rate": 1.6492297789212445e-05, "loss": 0.4576, "step": 8009 }, { "epoch": 2.2418136020151134, "grad_norm": 0.25849123200894225, "learning_rate": 1.6480850109219335e-05, "loss": 0.4601, "step": 8010 }, { "epoch": 2.2420934788692977, "grad_norm": 0.2849183280028203, "learning_rate": 1.646940561959767e-05, "loss": 0.471, "step": 8011 }, { "epoch": 2.2423733557234815, "grad_norm": 0.24331633282152842, "learning_rate": 1.6457964321436754e-05, "loss": 0.4767, "step": 8012 }, { "epoch": 2.242653232577666, "grad_norm": 0.2363492627788017, "learning_rate": 1.6446526215825564e-05, "loss": 0.4554, "step": 8013 }, { "epoch": 2.24293310943185, "grad_norm": 0.25239999785216144, "learning_rate": 1.643509130385279e-05, "loss": 0.4637, "step": 8014 }, { "epoch": 2.243212986286034, "grad_norm": 0.2490404161533813, "learning_rate": 1.642365958660681e-05, "loss": 0.4799, "step": 8015 }, { "epoch": 2.2434928631402182, "grad_norm": 0.24581997293193475, "learning_rate": 1.6412231065175726e-05, "loss": 0.4605, "step": 8016 }, { "epoch": 2.2437727399944025, "grad_norm": 0.25046046242059733, "learning_rate": 1.6400805740647267e-05, "loss": 0.4634, "step": 8017 }, { "epoch": 2.244052616848587, "grad_norm": 0.24579928775716894, "learning_rate": 1.638938361410893e-05, "loss": 0.4558, "step": 8018 }, { "epoch": 2.2443324937027707, "grad_norm": 0.25121405006280206, "learning_rate": 1.6377964686647868e-05, "loss": 0.4525, "step": 8019 }, { "epoch": 2.244612370556955, "grad_norm": 0.24297240413396543, "learning_rate": 1.6366548959350947e-05, "loss": 0.4435, "step": 8020 }, { "epoch": 2.2448922474111392, "grad_norm": 0.23754474483492782, "learning_rate": 1.635513643330471e-05, "loss": 0.434, "step": 8021 }, { "epoch": 2.245172124265323, "grad_norm": 0.250770465819617, "learning_rate": 1.6343727109595426e-05, "loss": 0.4693, "step": 8022 }, { "epoch": 2.2454520011195074, "grad_norm": 0.24653655580384282, "learning_rate": 1.6332320989309042e-05, "loss": 0.4512, "step": 8023 }, { "epoch": 2.2457318779736917, "grad_norm": 0.2512710812882383, "learning_rate": 1.632091807353119e-05, "loss": 0.4624, "step": 8024 }, { "epoch": 2.246011754827876, "grad_norm": 0.23761548726103143, "learning_rate": 1.6309518363347203e-05, "loss": 0.4534, "step": 8025 }, { "epoch": 2.24629163168206, "grad_norm": 0.2485012937243386, "learning_rate": 1.6298121859842115e-05, "loss": 0.4503, "step": 8026 }, { "epoch": 2.246571508536244, "grad_norm": 0.2639350958290308, "learning_rate": 1.6286728564100657e-05, "loss": 0.4568, "step": 8027 }, { "epoch": 2.2468513853904284, "grad_norm": 0.24878318674886682, "learning_rate": 1.6275338477207243e-05, "loss": 0.4537, "step": 8028 }, { "epoch": 2.2471312622446122, "grad_norm": 0.26098167252836724, "learning_rate": 1.6263951600245986e-05, "loss": 0.4636, "step": 8029 }, { "epoch": 2.2474111390987965, "grad_norm": 0.2728680058475497, "learning_rate": 1.62525679343007e-05, "loss": 0.478, "step": 8030 }, { "epoch": 2.247691015952981, "grad_norm": 0.2509449203301831, "learning_rate": 1.624118748045489e-05, "loss": 0.4641, "step": 8031 }, { "epoch": 2.2479708928071647, "grad_norm": 0.25270506991418473, "learning_rate": 1.622981023979175e-05, "loss": 0.4422, "step": 8032 }, { "epoch": 2.248250769661349, "grad_norm": 0.2546047036652015, "learning_rate": 1.621843621339417e-05, "loss": 0.461, "step": 8033 }, { "epoch": 2.2485306465155332, "grad_norm": 0.25526419135366757, "learning_rate": 1.6207065402344747e-05, "loss": 0.4491, "step": 8034 }, { "epoch": 2.248810523369717, "grad_norm": 0.2558771163863261, "learning_rate": 1.6195697807725763e-05, "loss": 0.4576, "step": 8035 }, { "epoch": 2.2490904002239014, "grad_norm": 0.24170528357334292, "learning_rate": 1.618433343061917e-05, "loss": 0.4578, "step": 8036 }, { "epoch": 2.2493702770780857, "grad_norm": 0.23538827050402764, "learning_rate": 1.6172972272106647e-05, "loss": 0.4464, "step": 8037 }, { "epoch": 2.24965015393227, "grad_norm": 0.24439090793335866, "learning_rate": 1.616161433326954e-05, "loss": 0.4413, "step": 8038 }, { "epoch": 2.249930030786454, "grad_norm": 0.25081941860473644, "learning_rate": 1.6150259615188938e-05, "loss": 0.4673, "step": 8039 }, { "epoch": 2.250209907640638, "grad_norm": 0.2453433926538471, "learning_rate": 1.613890811894557e-05, "loss": 0.443, "step": 8040 }, { "epoch": 2.2504897844948224, "grad_norm": 0.24975312010648723, "learning_rate": 1.612755984561989e-05, "loss": 0.4477, "step": 8041 }, { "epoch": 2.2507696613490062, "grad_norm": 0.24148933691877925, "learning_rate": 1.6116214796292013e-05, "loss": 0.4478, "step": 8042 }, { "epoch": 2.2510495382031905, "grad_norm": 0.23798042129055016, "learning_rate": 1.610487297204178e-05, "loss": 0.4603, "step": 8043 }, { "epoch": 2.251329415057375, "grad_norm": 0.24366092254186447, "learning_rate": 1.609353437394871e-05, "loss": 0.4364, "step": 8044 }, { "epoch": 2.251609291911559, "grad_norm": 0.25532438746680247, "learning_rate": 1.608219900309202e-05, "loss": 0.4796, "step": 8045 }, { "epoch": 2.251889168765743, "grad_norm": 0.24529362408593128, "learning_rate": 1.6070866860550603e-05, "loss": 0.4475, "step": 8046 }, { "epoch": 2.2521690456199273, "grad_norm": 0.2555494009784305, "learning_rate": 1.605953794740308e-05, "loss": 0.4607, "step": 8047 }, { "epoch": 2.2524489224741115, "grad_norm": 0.2412023951279725, "learning_rate": 1.604821226472773e-05, "loss": 0.446, "step": 8048 }, { "epoch": 2.2527287993282954, "grad_norm": 0.26411489823517675, "learning_rate": 1.6036889813602534e-05, "loss": 0.46, "step": 8049 }, { "epoch": 2.2530086761824797, "grad_norm": 0.2577320791532774, "learning_rate": 1.602557059510518e-05, "loss": 0.4484, "step": 8050 }, { "epoch": 2.253288553036664, "grad_norm": 0.24845629287382937, "learning_rate": 1.6014254610313033e-05, "loss": 0.4705, "step": 8051 }, { "epoch": 2.253568429890848, "grad_norm": 0.2515467965214697, "learning_rate": 1.6002941860303163e-05, "loss": 0.4559, "step": 8052 }, { "epoch": 2.253848306745032, "grad_norm": 0.24220978874171253, "learning_rate": 1.5991632346152318e-05, "loss": 0.4698, "step": 8053 }, { "epoch": 2.2541281835992164, "grad_norm": 0.2540236744257015, "learning_rate": 1.5980326068936936e-05, "loss": 0.4638, "step": 8054 }, { "epoch": 2.2544080604534007, "grad_norm": 0.25123858182802145, "learning_rate": 1.5969023029733156e-05, "loss": 0.46, "step": 8055 }, { "epoch": 2.2546879373075845, "grad_norm": 0.24846569597307558, "learning_rate": 1.5957723229616796e-05, "loss": 0.4567, "step": 8056 }, { "epoch": 2.254967814161769, "grad_norm": 0.2385083401685797, "learning_rate": 1.5946426669663406e-05, "loss": 0.4413, "step": 8057 }, { "epoch": 2.255247691015953, "grad_norm": 0.24742727656734848, "learning_rate": 1.593513335094819e-05, "loss": 0.4812, "step": 8058 }, { "epoch": 2.255527567870137, "grad_norm": 0.2475928660820711, "learning_rate": 1.5923843274546047e-05, "loss": 0.4632, "step": 8059 }, { "epoch": 2.2558074447243213, "grad_norm": 0.2466775051598817, "learning_rate": 1.5912556441531577e-05, "loss": 0.4579, "step": 8060 }, { "epoch": 2.2560873215785056, "grad_norm": 0.2482971041756975, "learning_rate": 1.590127285297906e-05, "loss": 0.4539, "step": 8061 }, { "epoch": 2.25636719843269, "grad_norm": 0.23303363154287318, "learning_rate": 1.588999250996248e-05, "loss": 0.4172, "step": 8062 }, { "epoch": 2.2566470752868737, "grad_norm": 0.2475263519986645, "learning_rate": 1.587871541355551e-05, "loss": 0.4588, "step": 8063 }, { "epoch": 2.256926952141058, "grad_norm": 0.2513488351825603, "learning_rate": 1.5867441564831502e-05, "loss": 0.4494, "step": 8064 }, { "epoch": 2.2572068289952423, "grad_norm": 0.2568746568874127, "learning_rate": 1.5856170964863508e-05, "loss": 0.4623, "step": 8065 }, { "epoch": 2.257486705849426, "grad_norm": 0.23994801380931918, "learning_rate": 1.5844903614724278e-05, "loss": 0.4447, "step": 8066 }, { "epoch": 2.2577665827036104, "grad_norm": 0.25522426947200266, "learning_rate": 1.5833639515486237e-05, "loss": 0.4664, "step": 8067 }, { "epoch": 2.2580464595577947, "grad_norm": 0.2651894912794593, "learning_rate": 1.582237866822151e-05, "loss": 0.4814, "step": 8068 }, { "epoch": 2.2583263364119786, "grad_norm": 0.25243363676802255, "learning_rate": 1.5811121074001917e-05, "loss": 0.4619, "step": 8069 }, { "epoch": 2.258606213266163, "grad_norm": 0.2389919454384051, "learning_rate": 1.579986673389895e-05, "loss": 0.4481, "step": 8070 }, { "epoch": 2.258886090120347, "grad_norm": 0.2479257488901504, "learning_rate": 1.5788615648983828e-05, "loss": 0.45, "step": 8071 }, { "epoch": 2.259165966974531, "grad_norm": 0.24832827728218682, "learning_rate": 1.57773678203274e-05, "loss": 0.4545, "step": 8072 }, { "epoch": 2.2594458438287153, "grad_norm": 0.2477948436432771, "learning_rate": 1.5766123249000244e-05, "loss": 0.4498, "step": 8073 }, { "epoch": 2.2597257206828996, "grad_norm": 0.2481975889054319, "learning_rate": 1.5754881936072653e-05, "loss": 0.447, "step": 8074 }, { "epoch": 2.260005597537084, "grad_norm": 0.25287797136419315, "learning_rate": 1.5743643882614566e-05, "loss": 0.4431, "step": 8075 }, { "epoch": 2.2602854743912677, "grad_norm": 0.24869014300598982, "learning_rate": 1.5732409089695632e-05, "loss": 0.4577, "step": 8076 }, { "epoch": 2.260565351245452, "grad_norm": 0.25910950830768154, "learning_rate": 1.572117755838518e-05, "loss": 0.4671, "step": 8077 }, { "epoch": 2.2608452280996363, "grad_norm": 0.2538591097969899, "learning_rate": 1.5709949289752235e-05, "loss": 0.4632, "step": 8078 }, { "epoch": 2.26112510495382, "grad_norm": 0.24107156956953155, "learning_rate": 1.5698724284865506e-05, "loss": 0.4495, "step": 8079 }, { "epoch": 2.2614049818080044, "grad_norm": 0.24782583848826115, "learning_rate": 1.5687502544793405e-05, "loss": 0.4534, "step": 8080 }, { "epoch": 2.2616848586621887, "grad_norm": 0.23074484549061458, "learning_rate": 1.5676284070604015e-05, "loss": 0.4603, "step": 8081 }, { "epoch": 2.261964735516373, "grad_norm": 0.2513337097130585, "learning_rate": 1.5665068863365117e-05, "loss": 0.4482, "step": 8082 }, { "epoch": 2.262244612370557, "grad_norm": 0.23376538153171827, "learning_rate": 1.5653856924144183e-05, "loss": 0.4313, "step": 8083 }, { "epoch": 2.262524489224741, "grad_norm": 0.2507810713962941, "learning_rate": 1.564264825400837e-05, "loss": 0.463, "step": 8084 }, { "epoch": 2.2628043660789254, "grad_norm": 0.26057856417098435, "learning_rate": 1.563144285402453e-05, "loss": 0.4611, "step": 8085 }, { "epoch": 2.2630842429331093, "grad_norm": 0.25263231780762113, "learning_rate": 1.5620240725259194e-05, "loss": 0.4786, "step": 8086 }, { "epoch": 2.2633641197872936, "grad_norm": 0.24399915657311877, "learning_rate": 1.560904186877859e-05, "loss": 0.4609, "step": 8087 }, { "epoch": 2.263643996641478, "grad_norm": 0.2567266375720405, "learning_rate": 1.559784628564863e-05, "loss": 0.4648, "step": 8088 }, { "epoch": 2.2639238734956617, "grad_norm": 0.2533129948185237, "learning_rate": 1.5586653976934936e-05, "loss": 0.453, "step": 8089 }, { "epoch": 2.264203750349846, "grad_norm": 0.2662838390784181, "learning_rate": 1.557546494370274e-05, "loss": 0.4684, "step": 8090 }, { "epoch": 2.2644836272040303, "grad_norm": 0.2563249729476431, "learning_rate": 1.5564279187017077e-05, "loss": 0.4596, "step": 8091 }, { "epoch": 2.264763504058214, "grad_norm": 0.2551216698026058, "learning_rate": 1.55530967079426e-05, "loss": 0.4609, "step": 8092 }, { "epoch": 2.2650433809123984, "grad_norm": 0.23783019435087047, "learning_rate": 1.554191750754365e-05, "loss": 0.4539, "step": 8093 }, { "epoch": 2.2653232577665827, "grad_norm": 0.2557040638757712, "learning_rate": 1.553074158688429e-05, "loss": 0.4551, "step": 8094 }, { "epoch": 2.265603134620767, "grad_norm": 0.25439045422952744, "learning_rate": 1.5519568947028228e-05, "loss": 0.4659, "step": 8095 }, { "epoch": 2.265883011474951, "grad_norm": 0.25520332065671886, "learning_rate": 1.55083995890389e-05, "loss": 0.4741, "step": 8096 }, { "epoch": 2.266162888329135, "grad_norm": 0.2520445436560445, "learning_rate": 1.5497233513979404e-05, "loss": 0.4663, "step": 8097 }, { "epoch": 2.2664427651833194, "grad_norm": 0.2537060451873474, "learning_rate": 1.5486070722912532e-05, "loss": 0.4618, "step": 8098 }, { "epoch": 2.2667226420375037, "grad_norm": 0.2494349200310384, "learning_rate": 1.5474911216900774e-05, "loss": 0.4598, "step": 8099 }, { "epoch": 2.2670025188916876, "grad_norm": 0.23177861020387283, "learning_rate": 1.546375499700628e-05, "loss": 0.4366, "step": 8100 }, { "epoch": 2.267282395745872, "grad_norm": 0.24954003694144591, "learning_rate": 1.5452602064290923e-05, "loss": 0.4439, "step": 8101 }, { "epoch": 2.267562272600056, "grad_norm": 0.2543620032513836, "learning_rate": 1.5441452419816237e-05, "loss": 0.4585, "step": 8102 }, { "epoch": 2.26784214945424, "grad_norm": 0.23552521125261952, "learning_rate": 1.543030606464345e-05, "loss": 0.4432, "step": 8103 }, { "epoch": 2.2681220263084243, "grad_norm": 0.24986342113588852, "learning_rate": 1.5419162999833485e-05, "loss": 0.4577, "step": 8104 }, { "epoch": 2.2684019031626086, "grad_norm": 0.25637804753175986, "learning_rate": 1.5408023226446945e-05, "loss": 0.4722, "step": 8105 }, { "epoch": 2.2686817800167924, "grad_norm": 0.2500963453039843, "learning_rate": 1.539688674554411e-05, "loss": 0.4427, "step": 8106 }, { "epoch": 2.2689616568709767, "grad_norm": 0.25095577334372066, "learning_rate": 1.5385753558184967e-05, "loss": 0.4657, "step": 8107 }, { "epoch": 2.269241533725161, "grad_norm": 0.24959553726593925, "learning_rate": 1.537462366542917e-05, "loss": 0.4416, "step": 8108 }, { "epoch": 2.269521410579345, "grad_norm": 0.24021352227065768, "learning_rate": 1.5363497068336075e-05, "loss": 0.4682, "step": 8109 }, { "epoch": 2.269801287433529, "grad_norm": 0.25741380873199216, "learning_rate": 1.5352373767964717e-05, "loss": 0.474, "step": 8110 }, { "epoch": 2.2700811642877134, "grad_norm": 0.2447863931933491, "learning_rate": 1.534125376537382e-05, "loss": 0.4537, "step": 8111 }, { "epoch": 2.2703610411418977, "grad_norm": 0.2551213740238796, "learning_rate": 1.5330137061621784e-05, "loss": 0.464, "step": 8112 }, { "epoch": 2.2706409179960816, "grad_norm": 0.24024301283981186, "learning_rate": 1.5319023657766708e-05, "loss": 0.4509, "step": 8113 }, { "epoch": 2.270920794850266, "grad_norm": 0.24078770185235696, "learning_rate": 1.5307913554866376e-05, "loss": 0.4659, "step": 8114 }, { "epoch": 2.27120067170445, "grad_norm": 0.23763705871991425, "learning_rate": 1.529680675397825e-05, "loss": 0.4259, "step": 8115 }, { "epoch": 2.271480548558634, "grad_norm": 0.24525825175218002, "learning_rate": 1.5285703256159473e-05, "loss": 0.439, "step": 8116 }, { "epoch": 2.2717604254128183, "grad_norm": 0.24981404349348715, "learning_rate": 1.5274603062466897e-05, "loss": 0.4719, "step": 8117 }, { "epoch": 2.2720403022670026, "grad_norm": 0.25704553808778363, "learning_rate": 1.5263506173957037e-05, "loss": 0.455, "step": 8118 }, { "epoch": 2.272320179121187, "grad_norm": 0.24196506660176514, "learning_rate": 1.5252412591686105e-05, "loss": 0.4423, "step": 8119 }, { "epoch": 2.2726000559753707, "grad_norm": 0.2547124939604785, "learning_rate": 1.5241322316709989e-05, "loss": 0.4688, "step": 8120 }, { "epoch": 2.272879932829555, "grad_norm": 0.24573521875961346, "learning_rate": 1.5230235350084271e-05, "loss": 0.4246, "step": 8121 }, { "epoch": 2.2731598096837393, "grad_norm": 0.25083222391365206, "learning_rate": 1.5219151692864214e-05, "loss": 0.4541, "step": 8122 }, { "epoch": 2.273439686537923, "grad_norm": 0.26043181528542003, "learning_rate": 1.5208071346104764e-05, "loss": 0.4671, "step": 8123 }, { "epoch": 2.2737195633921075, "grad_norm": 0.2525542715754832, "learning_rate": 1.5196994310860562e-05, "loss": 0.4483, "step": 8124 }, { "epoch": 2.2739994402462917, "grad_norm": 0.24809264440721193, "learning_rate": 1.5185920588185914e-05, "loss": 0.4512, "step": 8125 }, { "epoch": 2.2742793171004756, "grad_norm": 0.24484303724741177, "learning_rate": 1.5174850179134837e-05, "loss": 0.4337, "step": 8126 }, { "epoch": 2.27455919395466, "grad_norm": 0.2655569281435212, "learning_rate": 1.5163783084761012e-05, "loss": 0.4684, "step": 8127 }, { "epoch": 2.274839070808844, "grad_norm": 0.24443331183909747, "learning_rate": 1.5152719306117812e-05, "loss": 0.4613, "step": 8128 }, { "epoch": 2.275118947663028, "grad_norm": 0.2500321468947835, "learning_rate": 1.5141658844258289e-05, "loss": 0.4351, "step": 8129 }, { "epoch": 2.2753988245172123, "grad_norm": 0.26528907915259103, "learning_rate": 1.5130601700235192e-05, "loss": 0.4756, "step": 8130 }, { "epoch": 2.2756787013713966, "grad_norm": 0.25078409080690917, "learning_rate": 1.5119547875100943e-05, "loss": 0.4623, "step": 8131 }, { "epoch": 2.275958578225581, "grad_norm": 0.24248842102632745, "learning_rate": 1.5108497369907648e-05, "loss": 0.4516, "step": 8132 }, { "epoch": 2.2762384550797647, "grad_norm": 0.24784659006939802, "learning_rate": 1.5097450185707107e-05, "loss": 0.4694, "step": 8133 }, { "epoch": 2.276518331933949, "grad_norm": 0.2434622522765377, "learning_rate": 1.5086406323550789e-05, "loss": 0.4574, "step": 8134 }, { "epoch": 2.2767982087881333, "grad_norm": 0.2538724860310962, "learning_rate": 1.507536578448986e-05, "loss": 0.4615, "step": 8135 }, { "epoch": 2.2770780856423176, "grad_norm": 0.24416755593258385, "learning_rate": 1.5064328569575165e-05, "loss": 0.4455, "step": 8136 }, { "epoch": 2.2773579624965015, "grad_norm": 0.24009991950767856, "learning_rate": 1.5053294679857226e-05, "loss": 0.4503, "step": 8137 }, { "epoch": 2.2776378393506858, "grad_norm": 0.25349782407749055, "learning_rate": 1.5042264116386267e-05, "loss": 0.4421, "step": 8138 }, { "epoch": 2.27791771620487, "grad_norm": 0.23701714061786106, "learning_rate": 1.5031236880212174e-05, "loss": 0.4424, "step": 8139 }, { "epoch": 2.278197593059054, "grad_norm": 0.24938668142473927, "learning_rate": 1.5020212972384528e-05, "loss": 0.4648, "step": 8140 }, { "epoch": 2.278477469913238, "grad_norm": 0.2581491945048349, "learning_rate": 1.5009192393952588e-05, "loss": 0.4625, "step": 8141 }, { "epoch": 2.2787573467674225, "grad_norm": 0.251193013726438, "learning_rate": 1.4998175145965305e-05, "loss": 0.4671, "step": 8142 }, { "epoch": 2.2790372236216063, "grad_norm": 0.2462144802011149, "learning_rate": 1.4987161229471298e-05, "loss": 0.463, "step": 8143 }, { "epoch": 2.2793171004757906, "grad_norm": 0.2561687695043128, "learning_rate": 1.4976150645518888e-05, "loss": 0.4647, "step": 8144 }, { "epoch": 2.279596977329975, "grad_norm": 0.2582874286202819, "learning_rate": 1.4965143395156057e-05, "loss": 0.4721, "step": 8145 }, { "epoch": 2.2798768541841588, "grad_norm": 0.24647383016131252, "learning_rate": 1.495413947943049e-05, "loss": 0.4436, "step": 8146 }, { "epoch": 2.280156731038343, "grad_norm": 0.2552866334765592, "learning_rate": 1.4943138899389548e-05, "loss": 0.4653, "step": 8147 }, { "epoch": 2.2804366078925273, "grad_norm": 0.24920880928984912, "learning_rate": 1.4932141656080262e-05, "loss": 0.4562, "step": 8148 }, { "epoch": 2.2807164847467116, "grad_norm": 0.2474984339952306, "learning_rate": 1.4921147750549364e-05, "loss": 0.4509, "step": 8149 }, { "epoch": 2.2809963616008955, "grad_norm": 0.2596381026767097, "learning_rate": 1.491015718384326e-05, "loss": 0.4744, "step": 8150 }, { "epoch": 2.2812762384550798, "grad_norm": 0.2594766583217914, "learning_rate": 1.489916995700803e-05, "loss": 0.4444, "step": 8151 }, { "epoch": 2.281556115309264, "grad_norm": 0.26437821310395326, "learning_rate": 1.4888186071089455e-05, "loss": 0.447, "step": 8152 }, { "epoch": 2.281835992163448, "grad_norm": 0.25639370080130053, "learning_rate": 1.4877205527132982e-05, "loss": 0.4531, "step": 8153 }, { "epoch": 2.282115869017632, "grad_norm": 0.24847524370433224, "learning_rate": 1.4866228326183745e-05, "loss": 0.4738, "step": 8154 }, { "epoch": 2.2823957458718165, "grad_norm": 0.23987257178653953, "learning_rate": 1.4855254469286562e-05, "loss": 0.4529, "step": 8155 }, { "epoch": 2.2826756227260008, "grad_norm": 0.250830051068186, "learning_rate": 1.4844283957485926e-05, "loss": 0.4646, "step": 8156 }, { "epoch": 2.2829554995801846, "grad_norm": 0.2535638382021271, "learning_rate": 1.4833316791826024e-05, "loss": 0.4383, "step": 8157 }, { "epoch": 2.283235376434369, "grad_norm": 0.24069102606738224, "learning_rate": 1.4822352973350712e-05, "loss": 0.4295, "step": 8158 }, { "epoch": 2.283515253288553, "grad_norm": 0.2499143944118685, "learning_rate": 1.4811392503103539e-05, "loss": 0.4617, "step": 8159 }, { "epoch": 2.283795130142737, "grad_norm": 0.2563370698706162, "learning_rate": 1.480043538212772e-05, "loss": 0.4634, "step": 8160 }, { "epoch": 2.2840750069969213, "grad_norm": 0.24612872482873813, "learning_rate": 1.478948161146616e-05, "loss": 0.4433, "step": 8161 }, { "epoch": 2.2843548838511056, "grad_norm": 0.25350081895250187, "learning_rate": 1.477853119216145e-05, "loss": 0.4628, "step": 8162 }, { "epoch": 2.2846347607052895, "grad_norm": 0.25489830309456557, "learning_rate": 1.4767584125255856e-05, "loss": 0.4705, "step": 8163 }, { "epoch": 2.2849146375594738, "grad_norm": 0.253489068099149, "learning_rate": 1.4756640411791328e-05, "loss": 0.4623, "step": 8164 }, { "epoch": 2.285194514413658, "grad_norm": 0.25731998172642323, "learning_rate": 1.474570005280949e-05, "loss": 0.4594, "step": 8165 }, { "epoch": 2.285474391267842, "grad_norm": 0.249878739953495, "learning_rate": 1.4734763049351652e-05, "loss": 0.4528, "step": 8166 }, { "epoch": 2.285754268122026, "grad_norm": 0.23429922575446413, "learning_rate": 1.4723829402458812e-05, "loss": 0.4538, "step": 8167 }, { "epoch": 2.2860341449762105, "grad_norm": 0.24121347435384824, "learning_rate": 1.471289911317163e-05, "loss": 0.457, "step": 8168 }, { "epoch": 2.286314021830395, "grad_norm": 0.24431296330442784, "learning_rate": 1.470197218253046e-05, "loss": 0.4508, "step": 8169 }, { "epoch": 2.2865938986845786, "grad_norm": 0.2449363370232026, "learning_rate": 1.4691048611575337e-05, "loss": 0.4355, "step": 8170 }, { "epoch": 2.286873775538763, "grad_norm": 0.24530705472179876, "learning_rate": 1.4680128401345966e-05, "loss": 0.442, "step": 8171 }, { "epoch": 2.287153652392947, "grad_norm": 0.2564336937749055, "learning_rate": 1.466921155288175e-05, "loss": 0.4565, "step": 8172 }, { "epoch": 2.2874335292471315, "grad_norm": 0.2365470389322341, "learning_rate": 1.4658298067221749e-05, "loss": 0.4211, "step": 8173 }, { "epoch": 2.2877134061013153, "grad_norm": 0.24465723777855133, "learning_rate": 1.4647387945404722e-05, "loss": 0.4555, "step": 8174 }, { "epoch": 2.2879932829554996, "grad_norm": 0.26635016025713437, "learning_rate": 1.4636481188469097e-05, "loss": 0.4776, "step": 8175 }, { "epoch": 2.288273159809684, "grad_norm": 0.25383134788551887, "learning_rate": 1.4625577797452988e-05, "loss": 0.4383, "step": 8176 }, { "epoch": 2.288553036663868, "grad_norm": 0.2532454461626958, "learning_rate": 1.4614677773394181e-05, "loss": 0.4453, "step": 8177 }, { "epoch": 2.288832913518052, "grad_norm": 0.25733062906476273, "learning_rate": 1.460378111733015e-05, "loss": 0.4542, "step": 8178 }, { "epoch": 2.2891127903722364, "grad_norm": 0.2509878337226054, "learning_rate": 1.4592887830298046e-05, "loss": 0.4678, "step": 8179 }, { "epoch": 2.28939266722642, "grad_norm": 0.2565161084024464, "learning_rate": 1.4581997913334699e-05, "loss": 0.4613, "step": 8180 }, { "epoch": 2.2896725440806045, "grad_norm": 0.24786064781544728, "learning_rate": 1.4571111367476615e-05, "loss": 0.4506, "step": 8181 }, { "epoch": 2.289952420934789, "grad_norm": 0.25825938258403075, "learning_rate": 1.4560228193759977e-05, "loss": 0.4372, "step": 8182 }, { "epoch": 2.2902322977889726, "grad_norm": 0.2569675973629223, "learning_rate": 1.4549348393220657e-05, "loss": 0.4535, "step": 8183 }, { "epoch": 2.290512174643157, "grad_norm": 0.251514946659093, "learning_rate": 1.4538471966894202e-05, "loss": 0.471, "step": 8184 }, { "epoch": 2.290792051497341, "grad_norm": 0.2488805215828181, "learning_rate": 1.4527598915815832e-05, "loss": 0.448, "step": 8185 }, { "epoch": 2.2910719283515255, "grad_norm": 0.2552764388029711, "learning_rate": 1.4516729241020449e-05, "loss": 0.4355, "step": 8186 }, { "epoch": 2.2913518052057094, "grad_norm": 0.26067368005432057, "learning_rate": 1.4505862943542642e-05, "loss": 0.4534, "step": 8187 }, { "epoch": 2.2916316820598936, "grad_norm": 0.2580691067943451, "learning_rate": 1.4495000024416666e-05, "loss": 0.4703, "step": 8188 }, { "epoch": 2.291911558914078, "grad_norm": 0.25961930704971553, "learning_rate": 1.4484140484676462e-05, "loss": 0.4463, "step": 8189 }, { "epoch": 2.292191435768262, "grad_norm": 0.2489274522281798, "learning_rate": 1.4473284325355647e-05, "loss": 0.4411, "step": 8190 }, { "epoch": 2.292471312622446, "grad_norm": 0.2391449177632102, "learning_rate": 1.446243154748751e-05, "loss": 0.4584, "step": 8191 }, { "epoch": 2.2927511894766304, "grad_norm": 0.26327118042471787, "learning_rate": 1.4451582152105032e-05, "loss": 0.4606, "step": 8192 }, { "epoch": 2.2930310663308147, "grad_norm": 0.24558232982989367, "learning_rate": 1.4440736140240862e-05, "loss": 0.4501, "step": 8193 }, { "epoch": 2.2933109431849985, "grad_norm": 0.2641606051268955, "learning_rate": 1.4429893512927328e-05, "loss": 0.4615, "step": 8194 }, { "epoch": 2.293590820039183, "grad_norm": 0.26140842053268204, "learning_rate": 1.4419054271196441e-05, "loss": 0.4689, "step": 8195 }, { "epoch": 2.293870696893367, "grad_norm": 0.2623345051788462, "learning_rate": 1.4408218416079883e-05, "loss": 0.456, "step": 8196 }, { "epoch": 2.294150573747551, "grad_norm": 0.2606823728849341, "learning_rate": 1.439738594860902e-05, "loss": 0.4547, "step": 8197 }, { "epoch": 2.2944304506017352, "grad_norm": 0.24816251909493267, "learning_rate": 1.4386556869814888e-05, "loss": 0.4766, "step": 8198 }, { "epoch": 2.2947103274559195, "grad_norm": 0.25449515930559313, "learning_rate": 1.4375731180728208e-05, "loss": 0.4631, "step": 8199 }, { "epoch": 2.2949902043101034, "grad_norm": 0.2418384559191895, "learning_rate": 1.4364908882379373e-05, "loss": 0.469, "step": 8200 }, { "epoch": 2.2952700811642877, "grad_norm": 0.25366734954018105, "learning_rate": 1.4354089975798457e-05, "loss": 0.4555, "step": 8201 }, { "epoch": 2.295549958018472, "grad_norm": 0.24591943345385542, "learning_rate": 1.434327446201521e-05, "loss": 0.4393, "step": 8202 }, { "epoch": 2.295829834872656, "grad_norm": 0.24941628216726364, "learning_rate": 1.4332462342059055e-05, "loss": 0.4611, "step": 8203 }, { "epoch": 2.29610971172684, "grad_norm": 0.25261349961534235, "learning_rate": 1.4321653616959097e-05, "loss": 0.436, "step": 8204 }, { "epoch": 2.2963895885810244, "grad_norm": 0.26218510261393874, "learning_rate": 1.4310848287744122e-05, "loss": 0.4636, "step": 8205 }, { "epoch": 2.2966694654352087, "grad_norm": 0.2565325292751425, "learning_rate": 1.4300046355442581e-05, "loss": 0.4766, "step": 8206 }, { "epoch": 2.2969493422893925, "grad_norm": 0.2592283565934206, "learning_rate": 1.4289247821082613e-05, "loss": 0.4566, "step": 8207 }, { "epoch": 2.297229219143577, "grad_norm": 0.26156076257911903, "learning_rate": 1.4278452685692023e-05, "loss": 0.4737, "step": 8208 }, { "epoch": 2.297509095997761, "grad_norm": 0.253839003034228, "learning_rate": 1.4267660950298301e-05, "loss": 0.4742, "step": 8209 }, { "epoch": 2.2977889728519454, "grad_norm": 0.24817123267220337, "learning_rate": 1.4256872615928607e-05, "loss": 0.4531, "step": 8210 }, { "epoch": 2.2980688497061292, "grad_norm": 0.24552989846882364, "learning_rate": 1.4246087683609789e-05, "loss": 0.4432, "step": 8211 }, { "epoch": 2.2983487265603135, "grad_norm": 0.24874254309051902, "learning_rate": 1.4235306154368355e-05, "loss": 0.466, "step": 8212 }, { "epoch": 2.298628603414498, "grad_norm": 0.24071033384046536, "learning_rate": 1.42245280292305e-05, "loss": 0.4353, "step": 8213 }, { "epoch": 2.2989084802686817, "grad_norm": 0.23276705113483642, "learning_rate": 1.4213753309222089e-05, "loss": 0.4398, "step": 8214 }, { "epoch": 2.299188357122866, "grad_norm": 0.2523788869113829, "learning_rate": 1.420298199536867e-05, "loss": 0.4476, "step": 8215 }, { "epoch": 2.2994682339770502, "grad_norm": 0.23698737418413152, "learning_rate": 1.419221408869546e-05, "loss": 0.4337, "step": 8216 }, { "epoch": 2.299748110831234, "grad_norm": 0.2581606112338792, "learning_rate": 1.4181449590227359e-05, "loss": 0.4657, "step": 8217 }, { "epoch": 2.3000279876854184, "grad_norm": 0.2575486533079859, "learning_rate": 1.4170688500988933e-05, "loss": 0.4554, "step": 8218 }, { "epoch": 2.3003078645396027, "grad_norm": 0.24648739397368136, "learning_rate": 1.415993082200443e-05, "loss": 0.4321, "step": 8219 }, { "epoch": 2.3005877413937865, "grad_norm": 0.2550029914456197, "learning_rate": 1.4149176554297772e-05, "loss": 0.4637, "step": 8220 }, { "epoch": 2.300867618247971, "grad_norm": 0.2343995984996954, "learning_rate": 1.4138425698892555e-05, "loss": 0.414, "step": 8221 }, { "epoch": 2.301147495102155, "grad_norm": 0.24341070300162065, "learning_rate": 1.4127678256812055e-05, "loss": 0.4367, "step": 8222 }, { "epoch": 2.3014273719563394, "grad_norm": 0.24704913384199684, "learning_rate": 1.4116934229079215e-05, "loss": 0.4527, "step": 8223 }, { "epoch": 2.3017072488105232, "grad_norm": 0.23806248083878626, "learning_rate": 1.4106193616716662e-05, "loss": 0.4478, "step": 8224 }, { "epoch": 2.3019871256647075, "grad_norm": 0.2553295484170634, "learning_rate": 1.4095456420746688e-05, "loss": 0.4541, "step": 8225 }, { "epoch": 2.302267002518892, "grad_norm": 0.25511032208447576, "learning_rate": 1.4084722642191267e-05, "loss": 0.4418, "step": 8226 }, { "epoch": 2.3025468793730757, "grad_norm": 0.26250324891176086, "learning_rate": 1.4073992282072052e-05, "loss": 0.4686, "step": 8227 }, { "epoch": 2.30282675622726, "grad_norm": 0.2505089484194936, "learning_rate": 1.4063265341410359e-05, "loss": 0.439, "step": 8228 }, { "epoch": 2.3031066330814443, "grad_norm": 0.2492830985896294, "learning_rate": 1.405254182122719e-05, "loss": 0.4368, "step": 8229 }, { "epoch": 2.3033865099356285, "grad_norm": 0.2378450618701384, "learning_rate": 1.4041821722543203e-05, "loss": 0.4474, "step": 8230 }, { "epoch": 2.3036663867898124, "grad_norm": 0.2588730283123059, "learning_rate": 1.4031105046378756e-05, "loss": 0.4239, "step": 8231 }, { "epoch": 2.3039462636439967, "grad_norm": 0.2555618257380051, "learning_rate": 1.4020391793753863e-05, "loss": 0.4599, "step": 8232 }, { "epoch": 2.304226140498181, "grad_norm": 0.25581666827708954, "learning_rate": 1.4009681965688198e-05, "loss": 0.4412, "step": 8233 }, { "epoch": 2.304506017352365, "grad_norm": 0.2560991316176909, "learning_rate": 1.3998975563201183e-05, "loss": 0.4447, "step": 8234 }, { "epoch": 2.304785894206549, "grad_norm": 0.2463972513662189, "learning_rate": 1.3988272587311807e-05, "loss": 0.4533, "step": 8235 }, { "epoch": 2.3050657710607334, "grad_norm": 0.24653116877321052, "learning_rate": 1.3977573039038804e-05, "loss": 0.4473, "step": 8236 }, { "epoch": 2.3053456479149173, "grad_norm": 0.26062490064044275, "learning_rate": 1.3966876919400562e-05, "loss": 0.4437, "step": 8237 }, { "epoch": 2.3056255247691015, "grad_norm": 0.24942429844838426, "learning_rate": 1.3956184229415148e-05, "loss": 0.4625, "step": 8238 }, { "epoch": 2.305905401623286, "grad_norm": 0.2532496257644462, "learning_rate": 1.3945494970100286e-05, "loss": 0.4569, "step": 8239 }, { "epoch": 2.3061852784774697, "grad_norm": 0.25576705083040635, "learning_rate": 1.3934809142473399e-05, "loss": 0.4577, "step": 8240 }, { "epoch": 2.306465155331654, "grad_norm": 0.2497333718141048, "learning_rate": 1.3924126747551557e-05, "loss": 0.4423, "step": 8241 }, { "epoch": 2.3067450321858383, "grad_norm": 0.25013346789155255, "learning_rate": 1.391344778635153e-05, "loss": 0.4641, "step": 8242 }, { "epoch": 2.3070249090400226, "grad_norm": 0.2519152201595828, "learning_rate": 1.390277225988974e-05, "loss": 0.4635, "step": 8243 }, { "epoch": 2.3073047858942064, "grad_norm": 0.2656688470446249, "learning_rate": 1.3892100169182292e-05, "loss": 0.4524, "step": 8244 }, { "epoch": 2.3075846627483907, "grad_norm": 0.24355235271055817, "learning_rate": 1.3881431515244958e-05, "loss": 0.4342, "step": 8245 }, { "epoch": 2.307864539602575, "grad_norm": 0.26436980827927553, "learning_rate": 1.3870766299093191e-05, "loss": 0.4747, "step": 8246 }, { "epoch": 2.3081444164567593, "grad_norm": 0.25831009264726085, "learning_rate": 1.3860104521742106e-05, "loss": 0.4618, "step": 8247 }, { "epoch": 2.308424293310943, "grad_norm": 0.2672249877325695, "learning_rate": 1.3849446184206506e-05, "loss": 0.4448, "step": 8248 }, { "epoch": 2.3087041701651274, "grad_norm": 0.25312586834946443, "learning_rate": 1.3838791287500852e-05, "loss": 0.4581, "step": 8249 }, { "epoch": 2.3089840470193117, "grad_norm": 0.24347548711792347, "learning_rate": 1.3828139832639281e-05, "loss": 0.444, "step": 8250 }, { "epoch": 2.3092639238734955, "grad_norm": 0.24634867984844497, "learning_rate": 1.3817491820635592e-05, "loss": 0.4416, "step": 8251 }, { "epoch": 2.30954380072768, "grad_norm": 0.25697191736106406, "learning_rate": 1.3806847252503313e-05, "loss": 0.4618, "step": 8252 }, { "epoch": 2.309823677581864, "grad_norm": 0.2713855878912528, "learning_rate": 1.3796206129255557e-05, "loss": 0.46, "step": 8253 }, { "epoch": 2.310103554436048, "grad_norm": 0.24101979738734647, "learning_rate": 1.3785568451905167e-05, "loss": 0.4295, "step": 8254 }, { "epoch": 2.3103834312902323, "grad_norm": 0.24317281038151736, "learning_rate": 1.3774934221464642e-05, "loss": 0.4498, "step": 8255 }, { "epoch": 2.3106633081444166, "grad_norm": 0.25011964806713044, "learning_rate": 1.3764303438946152e-05, "loss": 0.4438, "step": 8256 }, { "epoch": 2.3109431849986004, "grad_norm": 0.2450960944441535, "learning_rate": 1.3753676105361545e-05, "loss": 0.4497, "step": 8257 }, { "epoch": 2.3112230618527847, "grad_norm": 0.25587246810033215, "learning_rate": 1.374305222172233e-05, "loss": 0.4559, "step": 8258 }, { "epoch": 2.311502938706969, "grad_norm": 0.25257742847074705, "learning_rate": 1.3732431789039702e-05, "loss": 0.4559, "step": 8259 }, { "epoch": 2.3117828155611533, "grad_norm": 0.24499552379034534, "learning_rate": 1.3721814808324519e-05, "loss": 0.4539, "step": 8260 }, { "epoch": 2.312062692415337, "grad_norm": 0.2519249772197435, "learning_rate": 1.3711201280587305e-05, "loss": 0.4666, "step": 8261 }, { "epoch": 2.3123425692695214, "grad_norm": 0.2454133835186868, "learning_rate": 1.3700591206838264e-05, "loss": 0.4671, "step": 8262 }, { "epoch": 2.3126224461237057, "grad_norm": 0.24724259151477296, "learning_rate": 1.3689984588087268e-05, "loss": 0.4649, "step": 8263 }, { "epoch": 2.3129023229778896, "grad_norm": 0.2526043702303986, "learning_rate": 1.3679381425343867e-05, "loss": 0.4318, "step": 8264 }, { "epoch": 2.313182199832074, "grad_norm": 0.2639671356249918, "learning_rate": 1.3668781719617269e-05, "loss": 0.4431, "step": 8265 }, { "epoch": 2.313462076686258, "grad_norm": 0.2459965316812249, "learning_rate": 1.3658185471916363e-05, "loss": 0.4574, "step": 8266 }, { "epoch": 2.3137419535404424, "grad_norm": 0.258995108849743, "learning_rate": 1.3647592683249706e-05, "loss": 0.4735, "step": 8267 }, { "epoch": 2.3140218303946263, "grad_norm": 0.25323242675205754, "learning_rate": 1.3637003354625505e-05, "loss": 0.4518, "step": 8268 }, { "epoch": 2.3143017072488106, "grad_norm": 0.2569839965095759, "learning_rate": 1.3626417487051695e-05, "loss": 0.4636, "step": 8269 }, { "epoch": 2.314581584102995, "grad_norm": 0.2583915219613413, "learning_rate": 1.3615835081535844e-05, "loss": 0.4772, "step": 8270 }, { "epoch": 2.3148614609571787, "grad_norm": 0.26519040963695417, "learning_rate": 1.3605256139085166e-05, "loss": 0.4751, "step": 8271 }, { "epoch": 2.315141337811363, "grad_norm": 0.2544051272367222, "learning_rate": 1.359468066070657e-05, "loss": 0.4478, "step": 8272 }, { "epoch": 2.3154212146655473, "grad_norm": 0.25904166475846885, "learning_rate": 1.3584108647406651e-05, "loss": 0.4367, "step": 8273 }, { "epoch": 2.315701091519731, "grad_norm": 0.24203488420889496, "learning_rate": 1.357354010019165e-05, "loss": 0.452, "step": 8274 }, { "epoch": 2.3159809683739154, "grad_norm": 0.24408034041809595, "learning_rate": 1.3562975020067498e-05, "loss": 0.476, "step": 8275 }, { "epoch": 2.3162608452280997, "grad_norm": 0.25098600027808327, "learning_rate": 1.3552413408039772e-05, "loss": 0.4441, "step": 8276 }, { "epoch": 2.3165407220822836, "grad_norm": 0.25957133328367177, "learning_rate": 1.354185526511374e-05, "loss": 0.4705, "step": 8277 }, { "epoch": 2.316820598936468, "grad_norm": 0.25482623346727046, "learning_rate": 1.3531300592294333e-05, "loss": 0.459, "step": 8278 }, { "epoch": 2.317100475790652, "grad_norm": 0.2455638731764033, "learning_rate": 1.352074939058614e-05, "loss": 0.4434, "step": 8279 }, { "epoch": 2.3173803526448364, "grad_norm": 0.27113276648181606, "learning_rate": 1.3510201660993449e-05, "loss": 0.4702, "step": 8280 }, { "epoch": 2.3176602294990203, "grad_norm": 0.2583368965508004, "learning_rate": 1.3499657404520182e-05, "loss": 0.4539, "step": 8281 }, { "epoch": 2.3179401063532046, "grad_norm": 0.2412400044041814, "learning_rate": 1.348911662216995e-05, "loss": 0.4405, "step": 8282 }, { "epoch": 2.318219983207389, "grad_norm": 0.24332594539723115, "learning_rate": 1.3478579314946044e-05, "loss": 0.4318, "step": 8283 }, { "epoch": 2.3184998600615727, "grad_norm": 0.24523850364563363, "learning_rate": 1.3468045483851393e-05, "loss": 0.4529, "step": 8284 }, { "epoch": 2.318779736915757, "grad_norm": 0.2502813446703391, "learning_rate": 1.345751512988861e-05, "loss": 0.4599, "step": 8285 }, { "epoch": 2.3190596137699413, "grad_norm": 0.24642028360544369, "learning_rate": 1.344698825406e-05, "loss": 0.454, "step": 8286 }, { "epoch": 2.3193394906241256, "grad_norm": 0.23968596721138646, "learning_rate": 1.3436464857367514e-05, "loss": 0.4592, "step": 8287 }, { "epoch": 2.3196193674783094, "grad_norm": 0.2483871896176604, "learning_rate": 1.342594494081278e-05, "loss": 0.47, "step": 8288 }, { "epoch": 2.3198992443324937, "grad_norm": 0.2542722786790647, "learning_rate": 1.341542850539706e-05, "loss": 0.4811, "step": 8289 }, { "epoch": 2.320179121186678, "grad_norm": 0.25567761198216693, "learning_rate": 1.3404915552121339e-05, "loss": 0.4599, "step": 8290 }, { "epoch": 2.320458998040862, "grad_norm": 0.2403457902353109, "learning_rate": 1.3394406081986233e-05, "loss": 0.4513, "step": 8291 }, { "epoch": 2.320738874895046, "grad_norm": 0.25775077918557937, "learning_rate": 1.3383900095992047e-05, "loss": 0.4445, "step": 8292 }, { "epoch": 2.3210187517492304, "grad_norm": 0.2458236879822526, "learning_rate": 1.3373397595138749e-05, "loss": 0.4582, "step": 8293 }, { "epoch": 2.3212986286034143, "grad_norm": 0.249500610212342, "learning_rate": 1.3362898580425964e-05, "loss": 0.4599, "step": 8294 }, { "epoch": 2.3215785054575986, "grad_norm": 0.25463565599713955, "learning_rate": 1.3352403052853002e-05, "loss": 0.4559, "step": 8295 }, { "epoch": 2.321858382311783, "grad_norm": 0.2525468242326315, "learning_rate": 1.3341911013418823e-05, "loss": 0.4341, "step": 8296 }, { "epoch": 2.322138259165967, "grad_norm": 0.24353159669023183, "learning_rate": 1.3331422463122078e-05, "loss": 0.456, "step": 8297 }, { "epoch": 2.322418136020151, "grad_norm": 0.25635587644285407, "learning_rate": 1.332093740296107e-05, "loss": 0.4445, "step": 8298 }, { "epoch": 2.3226980128743353, "grad_norm": 0.25652748059284, "learning_rate": 1.3310455833933767e-05, "loss": 0.4607, "step": 8299 }, { "epoch": 2.3229778897285196, "grad_norm": 0.2435045692065939, "learning_rate": 1.3299977757037813e-05, "loss": 0.4768, "step": 8300 }, { "epoch": 2.3232577665827034, "grad_norm": 0.2645763871535758, "learning_rate": 1.3289503173270519e-05, "loss": 0.453, "step": 8301 }, { "epoch": 2.3235376434368877, "grad_norm": 0.2594497147849001, "learning_rate": 1.3279032083628856e-05, "loss": 0.4698, "step": 8302 }, { "epoch": 2.323817520291072, "grad_norm": 0.2510893759452567, "learning_rate": 1.3268564489109463e-05, "loss": 0.4556, "step": 8303 }, { "epoch": 2.3240973971452563, "grad_norm": 0.2690127713973988, "learning_rate": 1.3258100390708678e-05, "loss": 0.4804, "step": 8304 }, { "epoch": 2.32437727399944, "grad_norm": 0.25566558146444035, "learning_rate": 1.3247639789422455e-05, "loss": 0.4621, "step": 8305 }, { "epoch": 2.3246571508536245, "grad_norm": 0.2535564702423962, "learning_rate": 1.3237182686246468e-05, "loss": 0.4535, "step": 8306 }, { "epoch": 2.3249370277078087, "grad_norm": 0.29939560314990055, "learning_rate": 1.3226729082175998e-05, "loss": 0.4631, "step": 8307 }, { "epoch": 2.3252169045619926, "grad_norm": 0.24921423058478057, "learning_rate": 1.3216278978206037e-05, "loss": 0.437, "step": 8308 }, { "epoch": 2.325496781416177, "grad_norm": 0.2505100630181456, "learning_rate": 1.3205832375331228e-05, "loss": 0.472, "step": 8309 }, { "epoch": 2.325776658270361, "grad_norm": 0.26485177491182993, "learning_rate": 1.3195389274545888e-05, "loss": 0.4449, "step": 8310 }, { "epoch": 2.326056535124545, "grad_norm": 0.25579492366756157, "learning_rate": 1.3184949676844e-05, "loss": 0.4575, "step": 8311 }, { "epoch": 2.3263364119787293, "grad_norm": 0.25150989588368383, "learning_rate": 1.3174513583219206e-05, "loss": 0.4295, "step": 8312 }, { "epoch": 2.3266162888329136, "grad_norm": 0.25062198093585214, "learning_rate": 1.3164080994664819e-05, "loss": 0.4744, "step": 8313 }, { "epoch": 2.3268961656870975, "grad_norm": 0.25611396668986003, "learning_rate": 1.3153651912173825e-05, "loss": 0.45, "step": 8314 }, { "epoch": 2.3271760425412817, "grad_norm": 0.25084896562845693, "learning_rate": 1.3143226336738862e-05, "loss": 0.4595, "step": 8315 }, { "epoch": 2.327455919395466, "grad_norm": 0.25015225652760464, "learning_rate": 1.313280426935225e-05, "loss": 0.4619, "step": 8316 }, { "epoch": 2.3277357962496503, "grad_norm": 0.26072051902000226, "learning_rate": 1.3122385711005963e-05, "loss": 0.4649, "step": 8317 }, { "epoch": 2.328015673103834, "grad_norm": 0.2620674602640789, "learning_rate": 1.3111970662691641e-05, "loss": 0.4542, "step": 8318 }, { "epoch": 2.3282955499580185, "grad_norm": 0.2462533774998526, "learning_rate": 1.3101559125400603e-05, "loss": 0.4607, "step": 8319 }, { "epoch": 2.3285754268122028, "grad_norm": 0.2486600260013821, "learning_rate": 1.3091151100123806e-05, "loss": 0.4387, "step": 8320 }, { "epoch": 2.3288553036663866, "grad_norm": 0.2578339282929137, "learning_rate": 1.3080746587851917e-05, "loss": 0.4611, "step": 8321 }, { "epoch": 2.329135180520571, "grad_norm": 0.24276440019133075, "learning_rate": 1.3070345589575234e-05, "loss": 0.4372, "step": 8322 }, { "epoch": 2.329415057374755, "grad_norm": 0.26731122717672473, "learning_rate": 1.3059948106283725e-05, "loss": 0.4706, "step": 8323 }, { "epoch": 2.3296949342289395, "grad_norm": 0.250612444759731, "learning_rate": 1.3049554138967051e-05, "loss": 0.4648, "step": 8324 }, { "epoch": 2.3299748110831233, "grad_norm": 0.24418046783448943, "learning_rate": 1.303916368861448e-05, "loss": 0.4243, "step": 8325 }, { "epoch": 2.3302546879373076, "grad_norm": 0.25864633555230676, "learning_rate": 1.3028776756215e-05, "loss": 0.4628, "step": 8326 }, { "epoch": 2.330534564791492, "grad_norm": 0.2596393502184638, "learning_rate": 1.3018393342757235e-05, "loss": 0.4467, "step": 8327 }, { "epoch": 2.3308144416456757, "grad_norm": 0.2502298990120087, "learning_rate": 1.3008013449229494e-05, "loss": 0.4613, "step": 8328 }, { "epoch": 2.33109431849986, "grad_norm": 0.25085265852572536, "learning_rate": 1.299763707661974e-05, "loss": 0.4682, "step": 8329 }, { "epoch": 2.3313741953540443, "grad_norm": 0.2420344453522528, "learning_rate": 1.2987264225915601e-05, "loss": 0.4516, "step": 8330 }, { "epoch": 2.331654072208228, "grad_norm": 0.25060684699525815, "learning_rate": 1.297689489810437e-05, "loss": 0.4456, "step": 8331 }, { "epoch": 2.3319339490624125, "grad_norm": 0.24789521553675894, "learning_rate": 1.2966529094173002e-05, "loss": 0.4354, "step": 8332 }, { "epoch": 2.3322138259165968, "grad_norm": 0.2553686594992788, "learning_rate": 1.2956166815108128e-05, "loss": 0.4716, "step": 8333 }, { "epoch": 2.3324937027707806, "grad_norm": 0.2520626030690028, "learning_rate": 1.2945808061896025e-05, "loss": 0.4497, "step": 8334 }, { "epoch": 2.332773579624965, "grad_norm": 0.26246718718579354, "learning_rate": 1.2935452835522654e-05, "loss": 0.4798, "step": 8335 }, { "epoch": 2.333053456479149, "grad_norm": 0.25996697497970495, "learning_rate": 1.2925101136973627e-05, "loss": 0.4548, "step": 8336 }, { "epoch": 2.3333333333333335, "grad_norm": 0.252207790482802, "learning_rate": 1.2914752967234211e-05, "loss": 0.4464, "step": 8337 }, { "epoch": 2.3336132101875173, "grad_norm": 0.2571888889923139, "learning_rate": 1.290440832728938e-05, "loss": 0.4579, "step": 8338 }, { "epoch": 2.3338930870417016, "grad_norm": 0.2462320302594575, "learning_rate": 1.2894067218123729e-05, "loss": 0.4408, "step": 8339 }, { "epoch": 2.334172963895886, "grad_norm": 0.24476831206771804, "learning_rate": 1.2883729640721531e-05, "loss": 0.437, "step": 8340 }, { "epoch": 2.33445284075007, "grad_norm": 0.24245968798802456, "learning_rate": 1.2873395596066718e-05, "loss": 0.417, "step": 8341 }, { "epoch": 2.334732717604254, "grad_norm": 0.2493309105517221, "learning_rate": 1.2863065085142906e-05, "loss": 0.4459, "step": 8342 }, { "epoch": 2.3350125944584383, "grad_norm": 0.24558585019024368, "learning_rate": 1.2852738108933332e-05, "loss": 0.4524, "step": 8343 }, { "epoch": 2.3352924713126226, "grad_norm": 0.261617174287669, "learning_rate": 1.2842414668420938e-05, "loss": 0.4634, "step": 8344 }, { "epoch": 2.3355723481668065, "grad_norm": 0.2596256719318115, "learning_rate": 1.2832094764588316e-05, "loss": 0.4547, "step": 8345 }, { "epoch": 2.3358522250209908, "grad_norm": 0.24890030706086635, "learning_rate": 1.2821778398417716e-05, "loss": 0.453, "step": 8346 }, { "epoch": 2.336132101875175, "grad_norm": 0.2506008504759721, "learning_rate": 1.2811465570891057e-05, "loss": 0.4562, "step": 8347 }, { "epoch": 2.336411978729359, "grad_norm": 0.24276476872341263, "learning_rate": 1.2801156282989918e-05, "loss": 0.4459, "step": 8348 }, { "epoch": 2.336691855583543, "grad_norm": 0.24679338909226162, "learning_rate": 1.2790850535695548e-05, "loss": 0.4465, "step": 8349 }, { "epoch": 2.3369717324377275, "grad_norm": 0.25632954802168545, "learning_rate": 1.2780548329988851e-05, "loss": 0.4708, "step": 8350 }, { "epoch": 2.3372516092919113, "grad_norm": 0.24926302589325672, "learning_rate": 1.2770249666850399e-05, "loss": 0.4419, "step": 8351 }, { "epoch": 2.3375314861460956, "grad_norm": 0.245843961577653, "learning_rate": 1.2759954547260417e-05, "loss": 0.4566, "step": 8352 }, { "epoch": 2.33781136300028, "grad_norm": 0.26314821525666565, "learning_rate": 1.2749662972198806e-05, "loss": 0.4622, "step": 8353 }, { "epoch": 2.338091239854464, "grad_norm": 0.2606144564266123, "learning_rate": 1.273937494264511e-05, "loss": 0.4645, "step": 8354 }, { "epoch": 2.338371116708648, "grad_norm": 0.27126219153303943, "learning_rate": 1.272909045957858e-05, "loss": 0.4775, "step": 8355 }, { "epoch": 2.3386509935628323, "grad_norm": 0.25245097134514854, "learning_rate": 1.2718809523978077e-05, "loss": 0.4537, "step": 8356 }, { "epoch": 2.3389308704170166, "grad_norm": 0.24209596472168504, "learning_rate": 1.2708532136822155e-05, "loss": 0.4348, "step": 8357 }, { "epoch": 2.3392107472712005, "grad_norm": 0.25556493560390914, "learning_rate": 1.2698258299089011e-05, "loss": 0.4608, "step": 8358 }, { "epoch": 2.3394906241253848, "grad_norm": 0.248939787297999, "learning_rate": 1.2687988011756524e-05, "loss": 0.437, "step": 8359 }, { "epoch": 2.339770500979569, "grad_norm": 0.24750924242357977, "learning_rate": 1.267772127580224e-05, "loss": 0.4555, "step": 8360 }, { "epoch": 2.3400503778337534, "grad_norm": 0.25532401963847123, "learning_rate": 1.2667458092203316e-05, "loss": 0.4495, "step": 8361 }, { "epoch": 2.340330254687937, "grad_norm": 0.24882237368089657, "learning_rate": 1.2657198461936632e-05, "loss": 0.4673, "step": 8362 }, { "epoch": 2.3406101315421215, "grad_norm": 0.25017244077714124, "learning_rate": 1.2646942385978695e-05, "loss": 0.4652, "step": 8363 }, { "epoch": 2.340890008396306, "grad_norm": 0.25600251884801806, "learning_rate": 1.263668986530569e-05, "loss": 0.4641, "step": 8364 }, { "epoch": 2.3411698852504896, "grad_norm": 0.24819184545591527, "learning_rate": 1.2626440900893461e-05, "loss": 0.4309, "step": 8365 }, { "epoch": 2.341449762104674, "grad_norm": 0.2592169963027563, "learning_rate": 1.2616195493717503e-05, "loss": 0.4646, "step": 8366 }, { "epoch": 2.341729638958858, "grad_norm": 0.2401897302540128, "learning_rate": 1.2605953644752983e-05, "loss": 0.4501, "step": 8367 }, { "epoch": 2.342009515813042, "grad_norm": 0.25976734294746356, "learning_rate": 1.2595715354974725e-05, "loss": 0.4697, "step": 8368 }, { "epoch": 2.3422893926672264, "grad_norm": 0.2562485280560772, "learning_rate": 1.2585480625357215e-05, "loss": 0.4522, "step": 8369 }, { "epoch": 2.3425692695214106, "grad_norm": 0.2535452702032081, "learning_rate": 1.25752494568746e-05, "loss": 0.4643, "step": 8370 }, { "epoch": 2.3428491463755945, "grad_norm": 0.26249151307478963, "learning_rate": 1.2565021850500692e-05, "loss": 0.4508, "step": 8371 }, { "epoch": 2.343129023229779, "grad_norm": 0.24245600725469135, "learning_rate": 1.255479780720894e-05, "loss": 0.4602, "step": 8372 }, { "epoch": 2.343408900083963, "grad_norm": 0.24935167034834935, "learning_rate": 1.2544577327972506e-05, "loss": 0.4255, "step": 8373 }, { "epoch": 2.3436887769381474, "grad_norm": 0.24987231561353682, "learning_rate": 1.2534360413764169e-05, "loss": 0.4634, "step": 8374 }, { "epoch": 2.343968653792331, "grad_norm": 0.24784216651321836, "learning_rate": 1.252414706555638e-05, "loss": 0.4402, "step": 8375 }, { "epoch": 2.3442485306465155, "grad_norm": 0.2522270909559883, "learning_rate": 1.2513937284321247e-05, "loss": 0.4517, "step": 8376 }, { "epoch": 2.3445284075007, "grad_norm": 0.2552643636788191, "learning_rate": 1.2503731071030545e-05, "loss": 0.4512, "step": 8377 }, { "epoch": 2.344808284354884, "grad_norm": 0.23577579148392627, "learning_rate": 1.249352842665572e-05, "loss": 0.463, "step": 8378 }, { "epoch": 2.345088161209068, "grad_norm": 0.2478778888263256, "learning_rate": 1.2483329352167845e-05, "loss": 0.4248, "step": 8379 }, { "epoch": 2.3453680380632522, "grad_norm": 0.24702159194094486, "learning_rate": 1.2473133848537672e-05, "loss": 0.443, "step": 8380 }, { "epoch": 2.3456479149174365, "grad_norm": 0.2653285232474737, "learning_rate": 1.2462941916735632e-05, "loss": 0.462, "step": 8381 }, { "epoch": 2.3459277917716204, "grad_norm": 0.2617016511643717, "learning_rate": 1.2452753557731784e-05, "loss": 0.4475, "step": 8382 }, { "epoch": 2.3462076686258047, "grad_norm": 0.25530482866239995, "learning_rate": 1.2442568772495872e-05, "loss": 0.4545, "step": 8383 }, { "epoch": 2.346487545479989, "grad_norm": 0.24709930348089, "learning_rate": 1.2432387561997288e-05, "loss": 0.4354, "step": 8384 }, { "epoch": 2.346767422334173, "grad_norm": 0.25460629853692296, "learning_rate": 1.242220992720508e-05, "loss": 0.4477, "step": 8385 }, { "epoch": 2.347047299188357, "grad_norm": 0.2443884228766934, "learning_rate": 1.2412035869087967e-05, "loss": 0.4487, "step": 8386 }, { "epoch": 2.3473271760425414, "grad_norm": 0.25595280642141804, "learning_rate": 1.2401865388614315e-05, "loss": 0.4485, "step": 8387 }, { "epoch": 2.347607052896725, "grad_norm": 0.2487615002853761, "learning_rate": 1.2391698486752162e-05, "loss": 0.4558, "step": 8388 }, { "epoch": 2.3478869297509095, "grad_norm": 0.24954185753813174, "learning_rate": 1.2381535164469183e-05, "loss": 0.4618, "step": 8389 }, { "epoch": 2.348166806605094, "grad_norm": 0.253669681545023, "learning_rate": 1.2371375422732761e-05, "loss": 0.4451, "step": 8390 }, { "epoch": 2.348446683459278, "grad_norm": 0.262076025150562, "learning_rate": 1.2361219262509883e-05, "loss": 0.4623, "step": 8391 }, { "epoch": 2.348726560313462, "grad_norm": 0.2611064528389205, "learning_rate": 1.2351066684767226e-05, "loss": 0.4491, "step": 8392 }, { "epoch": 2.3490064371676462, "grad_norm": 0.2517744268132719, "learning_rate": 1.234091769047111e-05, "loss": 0.4273, "step": 8393 }, { "epoch": 2.3492863140218305, "grad_norm": 0.256443946686235, "learning_rate": 1.2330772280587532e-05, "loss": 0.4418, "step": 8394 }, { "epoch": 2.3495661908760144, "grad_norm": 0.24454576138412137, "learning_rate": 1.2320630456082133e-05, "loss": 0.4282, "step": 8395 }, { "epoch": 2.3498460677301987, "grad_norm": 0.2563834790872159, "learning_rate": 1.2310492217920227e-05, "loss": 0.4357, "step": 8396 }, { "epoch": 2.350125944584383, "grad_norm": 0.2600202692628765, "learning_rate": 1.2300357567066756e-05, "loss": 0.4734, "step": 8397 }, { "epoch": 2.3504058214385672, "grad_norm": 0.24637218071644576, "learning_rate": 1.2290226504486351e-05, "loss": 0.4459, "step": 8398 }, { "epoch": 2.350685698292751, "grad_norm": 0.24790095006311919, "learning_rate": 1.22800990311433e-05, "loss": 0.4442, "step": 8399 }, { "epoch": 2.3509655751469354, "grad_norm": 0.25465728953280475, "learning_rate": 1.2269975148001534e-05, "loss": 0.4527, "step": 8400 }, { "epoch": 2.3512454520011197, "grad_norm": 0.2532217279249197, "learning_rate": 1.225985485602465e-05, "loss": 0.4621, "step": 8401 }, { "epoch": 2.3515253288553035, "grad_norm": 0.2606280595395842, "learning_rate": 1.2249738156175906e-05, "loss": 0.4529, "step": 8402 }, { "epoch": 2.351805205709488, "grad_norm": 0.25089789120110084, "learning_rate": 1.2239625049418213e-05, "loss": 0.4555, "step": 8403 }, { "epoch": 2.352085082563672, "grad_norm": 0.2633404388003439, "learning_rate": 1.2229515536714143e-05, "loss": 0.464, "step": 8404 }, { "epoch": 2.352364959417856, "grad_norm": 0.24919943604379635, "learning_rate": 1.2219409619025923e-05, "loss": 0.4154, "step": 8405 }, { "epoch": 2.3526448362720402, "grad_norm": 0.24462315372296126, "learning_rate": 1.2209307297315432e-05, "loss": 0.4423, "step": 8406 }, { "epoch": 2.3529247131262245, "grad_norm": 0.25343039410110674, "learning_rate": 1.2199208572544235e-05, "loss": 0.4749, "step": 8407 }, { "epoch": 2.3532045899804084, "grad_norm": 0.24891937821030463, "learning_rate": 1.2189113445673528e-05, "loss": 0.4557, "step": 8408 }, { "epoch": 2.3534844668345927, "grad_norm": 0.2492162822411207, "learning_rate": 1.2179021917664169e-05, "loss": 0.4544, "step": 8409 }, { "epoch": 2.353764343688777, "grad_norm": 0.2537192931013115, "learning_rate": 1.2168933989476667e-05, "loss": 0.4774, "step": 8410 }, { "epoch": 2.3540442205429613, "grad_norm": 0.2506170605975324, "learning_rate": 1.2158849662071203e-05, "loss": 0.4484, "step": 8411 }, { "epoch": 2.354324097397145, "grad_norm": 0.25102479492205515, "learning_rate": 1.2148768936407612e-05, "loss": 0.4526, "step": 8412 }, { "epoch": 2.3546039742513294, "grad_norm": 0.26854370336913724, "learning_rate": 1.213869181344538e-05, "loss": 0.4581, "step": 8413 }, { "epoch": 2.3548838511055137, "grad_norm": 0.2509218105622706, "learning_rate": 1.2128618294143667e-05, "loss": 0.4535, "step": 8414 }, { "epoch": 2.355163727959698, "grad_norm": 0.25942931229597765, "learning_rate": 1.2118548379461247e-05, "loss": 0.4636, "step": 8415 }, { "epoch": 2.355443604813882, "grad_norm": 0.24517258144625204, "learning_rate": 1.2108482070356596e-05, "loss": 0.4555, "step": 8416 }, { "epoch": 2.355723481668066, "grad_norm": 0.24999274596383914, "learning_rate": 1.2098419367787833e-05, "loss": 0.4525, "step": 8417 }, { "epoch": 2.3560033585222504, "grad_norm": 0.241417344554353, "learning_rate": 1.2088360272712728e-05, "loss": 0.4468, "step": 8418 }, { "epoch": 2.3562832353764342, "grad_norm": 0.25576661641780774, "learning_rate": 1.2078304786088707e-05, "loss": 0.472, "step": 8419 }, { "epoch": 2.3565631122306185, "grad_norm": 0.24005259286460717, "learning_rate": 1.2068252908872867e-05, "loss": 0.4407, "step": 8420 }, { "epoch": 2.356842989084803, "grad_norm": 0.24663659363954651, "learning_rate": 1.2058204642021948e-05, "loss": 0.4564, "step": 8421 }, { "epoch": 2.3571228659389867, "grad_norm": 0.2643555755912351, "learning_rate": 1.2048159986492347e-05, "loss": 0.4572, "step": 8422 }, { "epoch": 2.357402742793171, "grad_norm": 0.24345084309527462, "learning_rate": 1.2038118943240118e-05, "loss": 0.4549, "step": 8423 }, { "epoch": 2.3576826196473553, "grad_norm": 0.258733728353715, "learning_rate": 1.2028081513220962e-05, "loss": 0.4549, "step": 8424 }, { "epoch": 2.357962496501539, "grad_norm": 0.2509791103448085, "learning_rate": 1.2018047697390279e-05, "loss": 0.4731, "step": 8425 }, { "epoch": 2.3582423733557234, "grad_norm": 0.24755504442418122, "learning_rate": 1.2008017496703072e-05, "loss": 0.4544, "step": 8426 }, { "epoch": 2.3585222502099077, "grad_norm": 0.2569324588508519, "learning_rate": 1.1997990912114026e-05, "loss": 0.4566, "step": 8427 }, { "epoch": 2.358802127064092, "grad_norm": 0.24303490972506264, "learning_rate": 1.1987967944577477e-05, "loss": 0.4445, "step": 8428 }, { "epoch": 2.359082003918276, "grad_norm": 0.2527798835191069, "learning_rate": 1.1977948595047417e-05, "loss": 0.4385, "step": 8429 }, { "epoch": 2.35936188077246, "grad_norm": 0.25286962790773965, "learning_rate": 1.1967932864477488e-05, "loss": 0.4457, "step": 8430 }, { "epoch": 2.3596417576266444, "grad_norm": 0.2490643236869597, "learning_rate": 1.1957920753820994e-05, "loss": 0.4457, "step": 8431 }, { "epoch": 2.3599216344808283, "grad_norm": 0.25069382241206445, "learning_rate": 1.1947912264030914e-05, "loss": 0.4784, "step": 8432 }, { "epoch": 2.3602015113350125, "grad_norm": 0.26386931472411046, "learning_rate": 1.1937907396059833e-05, "loss": 0.454, "step": 8433 }, { "epoch": 2.360481388189197, "grad_norm": 0.2561981374840693, "learning_rate": 1.1927906150860025e-05, "loss": 0.4629, "step": 8434 }, { "epoch": 2.360761265043381, "grad_norm": 0.26558819123351496, "learning_rate": 1.191790852938342e-05, "loss": 0.4646, "step": 8435 }, { "epoch": 2.361041141897565, "grad_norm": 0.25531702647885207, "learning_rate": 1.19079145325816e-05, "loss": 0.4591, "step": 8436 }, { "epoch": 2.3613210187517493, "grad_norm": 0.27255461611800547, "learning_rate": 1.1897924161405788e-05, "loss": 0.461, "step": 8437 }, { "epoch": 2.3616008956059336, "grad_norm": 0.24334621995825967, "learning_rate": 1.1887937416806888e-05, "loss": 0.4479, "step": 8438 }, { "epoch": 2.3618807724601174, "grad_norm": 0.2537850517740165, "learning_rate": 1.1877954299735434e-05, "loss": 0.4443, "step": 8439 }, { "epoch": 2.3621606493143017, "grad_norm": 0.2495366582398718, "learning_rate": 1.186797481114163e-05, "loss": 0.4293, "step": 8440 }, { "epoch": 2.362440526168486, "grad_norm": 0.24886917301015574, "learning_rate": 1.1857998951975308e-05, "loss": 0.4593, "step": 8441 }, { "epoch": 2.36272040302267, "grad_norm": 0.25535409313463214, "learning_rate": 1.1848026723186012e-05, "loss": 0.4564, "step": 8442 }, { "epoch": 2.363000279876854, "grad_norm": 0.24791219708171475, "learning_rate": 1.1838058125722889e-05, "loss": 0.4333, "step": 8443 }, { "epoch": 2.3632801567310384, "grad_norm": 0.24544745511039154, "learning_rate": 1.1828093160534753e-05, "loss": 0.4615, "step": 8444 }, { "epoch": 2.3635600335852223, "grad_norm": 0.24488233523327232, "learning_rate": 1.1818131828570073e-05, "loss": 0.4547, "step": 8445 }, { "epoch": 2.3638399104394066, "grad_norm": 0.24834077034366778, "learning_rate": 1.1808174130776978e-05, "loss": 0.4404, "step": 8446 }, { "epoch": 2.364119787293591, "grad_norm": 0.2515092195111415, "learning_rate": 1.179822006810325e-05, "loss": 0.447, "step": 8447 }, { "epoch": 2.364399664147775, "grad_norm": 0.24456693293954418, "learning_rate": 1.1788269641496314e-05, "loss": 0.4382, "step": 8448 }, { "epoch": 2.364679541001959, "grad_norm": 0.24135189123791034, "learning_rate": 1.1778322851903262e-05, "loss": 0.4228, "step": 8449 }, { "epoch": 2.3649594178561433, "grad_norm": 0.24672020178430434, "learning_rate": 1.1768379700270837e-05, "loss": 0.4439, "step": 8450 }, { "epoch": 2.3652392947103276, "grad_norm": 0.24364002490914613, "learning_rate": 1.1758440187545444e-05, "loss": 0.4511, "step": 8451 }, { "epoch": 2.365519171564512, "grad_norm": 0.25583148991624116, "learning_rate": 1.1748504314673103e-05, "loss": 0.4371, "step": 8452 }, { "epoch": 2.3657990484186957, "grad_norm": 0.25055055778103663, "learning_rate": 1.173857208259953e-05, "loss": 0.4433, "step": 8453 }, { "epoch": 2.36607892527288, "grad_norm": 0.25223392882482665, "learning_rate": 1.1728643492270086e-05, "loss": 0.4564, "step": 8454 }, { "epoch": 2.3663588021270643, "grad_norm": 0.2552693560851051, "learning_rate": 1.1718718544629775e-05, "loss": 0.4689, "step": 8455 }, { "epoch": 2.366638678981248, "grad_norm": 0.2514387784740974, "learning_rate": 1.170879724062326e-05, "loss": 0.4516, "step": 8456 }, { "epoch": 2.3669185558354324, "grad_norm": 0.25300993156690305, "learning_rate": 1.1698879581194855e-05, "loss": 0.4668, "step": 8457 }, { "epoch": 2.3671984326896167, "grad_norm": 0.24725806328930555, "learning_rate": 1.1688965567288518e-05, "loss": 0.4547, "step": 8458 }, { "epoch": 2.3674783095438006, "grad_norm": 0.2640539988900624, "learning_rate": 1.1679055199847893e-05, "loss": 0.4723, "step": 8459 }, { "epoch": 2.367758186397985, "grad_norm": 0.2553325697968964, "learning_rate": 1.1669148479816244e-05, "loss": 0.432, "step": 8460 }, { "epoch": 2.368038063252169, "grad_norm": 0.25233999820344744, "learning_rate": 1.1659245408136498e-05, "loss": 0.4672, "step": 8461 }, { "epoch": 2.368317940106353, "grad_norm": 0.24428085281649775, "learning_rate": 1.1649345985751237e-05, "loss": 0.4499, "step": 8462 }, { "epoch": 2.3685978169605373, "grad_norm": 0.25982128241925356, "learning_rate": 1.1639450213602687e-05, "loss": 0.4585, "step": 8463 }, { "epoch": 2.3688776938147216, "grad_norm": 0.2442033543668397, "learning_rate": 1.1629558092632736e-05, "loss": 0.4625, "step": 8464 }, { "epoch": 2.369157570668906, "grad_norm": 0.24608761580131538, "learning_rate": 1.1619669623782925e-05, "loss": 0.4485, "step": 8465 }, { "epoch": 2.3694374475230897, "grad_norm": 0.25627952642747487, "learning_rate": 1.1609784807994445e-05, "loss": 0.4437, "step": 8466 }, { "epoch": 2.369717324377274, "grad_norm": 0.26470736320009125, "learning_rate": 1.1599903646208127e-05, "loss": 0.4681, "step": 8467 }, { "epoch": 2.3699972012314583, "grad_norm": 0.2504622206793435, "learning_rate": 1.159002613936448e-05, "loss": 0.4427, "step": 8468 }, { "epoch": 2.370277078085642, "grad_norm": 0.24825332132246183, "learning_rate": 1.1580152288403656e-05, "loss": 0.4409, "step": 8469 }, { "epoch": 2.3705569549398264, "grad_norm": 0.25954707635115476, "learning_rate": 1.1570282094265428e-05, "loss": 0.4522, "step": 8470 }, { "epoch": 2.3708368317940107, "grad_norm": 0.25321343728879336, "learning_rate": 1.156041555788926e-05, "loss": 0.451, "step": 8471 }, { "epoch": 2.371116708648195, "grad_norm": 0.24750246691792838, "learning_rate": 1.1550552680214255e-05, "loss": 0.4601, "step": 8472 }, { "epoch": 2.371396585502379, "grad_norm": 0.2580413676391593, "learning_rate": 1.1540693462179164e-05, "loss": 0.4579, "step": 8473 }, { "epoch": 2.371676462356563, "grad_norm": 0.243592156305927, "learning_rate": 1.1530837904722397e-05, "loss": 0.4402, "step": 8474 }, { "epoch": 2.3719563392107474, "grad_norm": 0.25033382298234824, "learning_rate": 1.1520986008782014e-05, "loss": 0.4385, "step": 8475 }, { "epoch": 2.3722362160649313, "grad_norm": 0.27509721948040766, "learning_rate": 1.1511137775295704e-05, "loss": 0.4441, "step": 8476 }, { "epoch": 2.3725160929191156, "grad_norm": 0.22598811889755624, "learning_rate": 1.1501293205200859e-05, "loss": 0.4515, "step": 8477 }, { "epoch": 2.3727959697733, "grad_norm": 0.2557994410155763, "learning_rate": 1.1491452299434474e-05, "loss": 0.4377, "step": 8478 }, { "epoch": 2.3730758466274837, "grad_norm": 0.25167272119375567, "learning_rate": 1.1481615058933215e-05, "loss": 0.4489, "step": 8479 }, { "epoch": 2.373355723481668, "grad_norm": 0.25403894749544725, "learning_rate": 1.1471781484633393e-05, "loss": 0.4554, "step": 8480 }, { "epoch": 2.3736356003358523, "grad_norm": 0.25835384953502943, "learning_rate": 1.146195157747097e-05, "loss": 0.4611, "step": 8481 }, { "epoch": 2.373915477190036, "grad_norm": 0.24090620200456578, "learning_rate": 1.1452125338381575e-05, "loss": 0.4265, "step": 8482 }, { "epoch": 2.3741953540442204, "grad_norm": 0.26330330751365594, "learning_rate": 1.1442302768300462e-05, "loss": 0.455, "step": 8483 }, { "epoch": 2.3744752308984047, "grad_norm": 0.2647296160726092, "learning_rate": 1.1432483868162553e-05, "loss": 0.4403, "step": 8484 }, { "epoch": 2.374755107752589, "grad_norm": 0.2562155822778771, "learning_rate": 1.1422668638902418e-05, "loss": 0.4604, "step": 8485 }, { "epoch": 2.375034984606773, "grad_norm": 0.24498676345082143, "learning_rate": 1.141285708145427e-05, "loss": 0.4414, "step": 8486 }, { "epoch": 2.375314861460957, "grad_norm": 0.25020471503214947, "learning_rate": 1.1403049196752003e-05, "loss": 0.4504, "step": 8487 }, { "epoch": 2.3755947383151415, "grad_norm": 0.2571397649585966, "learning_rate": 1.1393244985729102e-05, "loss": 0.4608, "step": 8488 }, { "epoch": 2.3758746151693257, "grad_norm": 0.24323730976911384, "learning_rate": 1.1383444449318753e-05, "loss": 0.4497, "step": 8489 }, { "epoch": 2.3761544920235096, "grad_norm": 0.25109532925424516, "learning_rate": 1.1373647588453773e-05, "loss": 0.4685, "step": 8490 }, { "epoch": 2.376434368877694, "grad_norm": 0.2479280759369505, "learning_rate": 1.1363854404066638e-05, "loss": 0.4619, "step": 8491 }, { "epoch": 2.376714245731878, "grad_norm": 0.25209496643912516, "learning_rate": 1.1354064897089462e-05, "loss": 0.4549, "step": 8492 }, { "epoch": 2.376994122586062, "grad_norm": 0.2538103598208296, "learning_rate": 1.1344279068454011e-05, "loss": 0.4564, "step": 8493 }, { "epoch": 2.3772739994402463, "grad_norm": 0.2567942540639802, "learning_rate": 1.1334496919091731e-05, "loss": 0.4469, "step": 8494 }, { "epoch": 2.3775538762944306, "grad_norm": 0.2616791211581202, "learning_rate": 1.1324718449933669e-05, "loss": 0.4646, "step": 8495 }, { "epoch": 2.3778337531486144, "grad_norm": 0.24270226011231513, "learning_rate": 1.131494366191056e-05, "loss": 0.453, "step": 8496 }, { "epoch": 2.3781136300027987, "grad_norm": 0.25307643984953343, "learning_rate": 1.1305172555952758e-05, "loss": 0.4402, "step": 8497 }, { "epoch": 2.378393506856983, "grad_norm": 0.23995819948642394, "learning_rate": 1.1295405132990294e-05, "loss": 0.4228, "step": 8498 }, { "epoch": 2.378673383711167, "grad_norm": 0.25915800116593424, "learning_rate": 1.128564139395284e-05, "loss": 0.4732, "step": 8499 }, { "epoch": 2.378953260565351, "grad_norm": 0.25760410570267217, "learning_rate": 1.12758813397697e-05, "loss": 0.4447, "step": 8500 }, { "epoch": 2.3792331374195355, "grad_norm": 0.24910130559213378, "learning_rate": 1.1266124971369851e-05, "loss": 0.4556, "step": 8501 }, { "epoch": 2.3795130142737198, "grad_norm": 0.2554496544371673, "learning_rate": 1.1256372289681905e-05, "loss": 0.4579, "step": 8502 }, { "epoch": 2.3797928911279036, "grad_norm": 0.25992853781633357, "learning_rate": 1.1246623295634134e-05, "loss": 0.4482, "step": 8503 }, { "epoch": 2.380072767982088, "grad_norm": 0.2515653705583453, "learning_rate": 1.1236877990154449e-05, "loss": 0.4457, "step": 8504 }, { "epoch": 2.380352644836272, "grad_norm": 0.2588612060050965, "learning_rate": 1.1227136374170428e-05, "loss": 0.4693, "step": 8505 }, { "epoch": 2.380632521690456, "grad_norm": 0.23450885032738195, "learning_rate": 1.121739844860925e-05, "loss": 0.4372, "step": 8506 }, { "epoch": 2.3809123985446403, "grad_norm": 0.24992041320964856, "learning_rate": 1.12076642143978e-05, "loss": 0.467, "step": 8507 }, { "epoch": 2.3811922753988246, "grad_norm": 0.2460138063010371, "learning_rate": 1.119793367246258e-05, "loss": 0.4452, "step": 8508 }, { "epoch": 2.381472152253009, "grad_norm": 0.2610456256884649, "learning_rate": 1.1188206823729752e-05, "loss": 0.473, "step": 8509 }, { "epoch": 2.3817520291071927, "grad_norm": 0.24559419442246114, "learning_rate": 1.1178483669125112e-05, "loss": 0.443, "step": 8510 }, { "epoch": 2.382031905961377, "grad_norm": 0.2508224584655094, "learning_rate": 1.1168764209574134e-05, "loss": 0.4487, "step": 8511 }, { "epoch": 2.3823117828155613, "grad_norm": 0.2620645803352295, "learning_rate": 1.1159048446001918e-05, "loss": 0.4419, "step": 8512 }, { "epoch": 2.382591659669745, "grad_norm": 0.24188700477460595, "learning_rate": 1.1149336379333208e-05, "loss": 0.4522, "step": 8513 }, { "epoch": 2.3828715365239295, "grad_norm": 0.241568253265115, "learning_rate": 1.113962801049241e-05, "loss": 0.4411, "step": 8514 }, { "epoch": 2.3831514133781138, "grad_norm": 0.24872667384802113, "learning_rate": 1.112992334040357e-05, "loss": 0.4552, "step": 8515 }, { "epoch": 2.3834312902322976, "grad_norm": 0.24840115162390053, "learning_rate": 1.1120222369990379e-05, "loss": 0.4572, "step": 8516 }, { "epoch": 2.383711167086482, "grad_norm": 0.2478572673421125, "learning_rate": 1.1110525100176183e-05, "loss": 0.4243, "step": 8517 }, { "epoch": 2.383991043940666, "grad_norm": 0.23767018995649516, "learning_rate": 1.1100831531883982e-05, "loss": 0.4451, "step": 8518 }, { "epoch": 2.38427092079485, "grad_norm": 0.2535901641769806, "learning_rate": 1.1091141666036403e-05, "loss": 0.4487, "step": 8519 }, { "epoch": 2.3845507976490343, "grad_norm": 0.26581437337015335, "learning_rate": 1.1081455503555743e-05, "loss": 0.4511, "step": 8520 }, { "epoch": 2.3848306745032186, "grad_norm": 0.26287891550412334, "learning_rate": 1.1071773045363931e-05, "loss": 0.4701, "step": 8521 }, { "epoch": 2.385110551357403, "grad_norm": 0.25444013359480133, "learning_rate": 1.1062094292382547e-05, "loss": 0.4386, "step": 8522 }, { "epoch": 2.3853904282115868, "grad_norm": 0.2505144992278886, "learning_rate": 1.1052419245532842e-05, "loss": 0.4767, "step": 8523 }, { "epoch": 2.385670305065771, "grad_norm": 0.2472367432507622, "learning_rate": 1.1042747905735651e-05, "loss": 0.4735, "step": 8524 }, { "epoch": 2.3859501819199553, "grad_norm": 0.25266064924184417, "learning_rate": 1.1033080273911522e-05, "loss": 0.4655, "step": 8525 }, { "epoch": 2.386230058774139, "grad_norm": 0.2578619442893693, "learning_rate": 1.1023416350980625e-05, "loss": 0.4431, "step": 8526 }, { "epoch": 2.3865099356283235, "grad_norm": 0.2545384617196527, "learning_rate": 1.101375613786278e-05, "loss": 0.4405, "step": 8527 }, { "epoch": 2.3867898124825078, "grad_norm": 0.2574613754642643, "learning_rate": 1.1004099635477427e-05, "loss": 0.4486, "step": 8528 }, { "epoch": 2.387069689336692, "grad_norm": 0.24314583711923138, "learning_rate": 1.0994446844743717e-05, "loss": 0.4427, "step": 8529 }, { "epoch": 2.387349566190876, "grad_norm": 0.2620409814504985, "learning_rate": 1.0984797766580384e-05, "loss": 0.4463, "step": 8530 }, { "epoch": 2.38762944304506, "grad_norm": 0.25234223114246107, "learning_rate": 1.0975152401905842e-05, "loss": 0.4484, "step": 8531 }, { "epoch": 2.3879093198992445, "grad_norm": 0.250087034848043, "learning_rate": 1.0965510751638131e-05, "loss": 0.4757, "step": 8532 }, { "epoch": 2.3881891967534283, "grad_norm": 0.25321507095995915, "learning_rate": 1.0955872816694963e-05, "loss": 0.4505, "step": 8533 }, { "epoch": 2.3884690736076126, "grad_norm": 0.2605523473618677, "learning_rate": 1.094623859799367e-05, "loss": 0.4498, "step": 8534 }, { "epoch": 2.388748950461797, "grad_norm": 0.2541970227069143, "learning_rate": 1.0936608096451245e-05, "loss": 0.4425, "step": 8535 }, { "epoch": 2.3890288273159808, "grad_norm": 0.25825358231260204, "learning_rate": 1.0926981312984324e-05, "loss": 0.4499, "step": 8536 }, { "epoch": 2.389308704170165, "grad_norm": 0.25421668578967516, "learning_rate": 1.0917358248509196e-05, "loss": 0.4433, "step": 8537 }, { "epoch": 2.3895885810243493, "grad_norm": 0.2537661699338842, "learning_rate": 1.0907738903941789e-05, "loss": 0.4414, "step": 8538 }, { "epoch": 2.3898684578785336, "grad_norm": 0.2580294505365846, "learning_rate": 1.089812328019767e-05, "loss": 0.4607, "step": 8539 }, { "epoch": 2.3901483347327175, "grad_norm": 0.2511456533382572, "learning_rate": 1.0888511378192062e-05, "loss": 0.4563, "step": 8540 }, { "epoch": 2.3904282115869018, "grad_norm": 0.2549293949705993, "learning_rate": 1.0878903198839846e-05, "loss": 0.4511, "step": 8541 }, { "epoch": 2.390708088441086, "grad_norm": 0.24967874919311853, "learning_rate": 1.0869298743055512e-05, "loss": 0.45, "step": 8542 }, { "epoch": 2.39098796529527, "grad_norm": 0.2643741079619027, "learning_rate": 1.0859698011753222e-05, "loss": 0.4368, "step": 8543 }, { "epoch": 2.391267842149454, "grad_norm": 0.2370939773182887, "learning_rate": 1.0850101005846786e-05, "loss": 0.4397, "step": 8544 }, { "epoch": 2.3915477190036385, "grad_norm": 0.24725258826462226, "learning_rate": 1.0840507726249632e-05, "loss": 0.4419, "step": 8545 }, { "epoch": 2.391827595857823, "grad_norm": 0.2551646833782642, "learning_rate": 1.0830918173874887e-05, "loss": 0.4516, "step": 8546 }, { "epoch": 2.3921074727120066, "grad_norm": 0.25736879017287306, "learning_rate": 1.0821332349635272e-05, "loss": 0.435, "step": 8547 }, { "epoch": 2.392387349566191, "grad_norm": 0.25750594326208964, "learning_rate": 1.0811750254443177e-05, "loss": 0.4558, "step": 8548 }, { "epoch": 2.392667226420375, "grad_norm": 0.2516396755730355, "learning_rate": 1.0802171889210621e-05, "loss": 0.4559, "step": 8549 }, { "epoch": 2.392947103274559, "grad_norm": 0.2537997014701762, "learning_rate": 1.079259725484929e-05, "loss": 0.4582, "step": 8550 }, { "epoch": 2.3932269801287434, "grad_norm": 0.24586619868940723, "learning_rate": 1.0783026352270497e-05, "loss": 0.4608, "step": 8551 }, { "epoch": 2.3935068569829276, "grad_norm": 0.23958963468288366, "learning_rate": 1.0773459182385203e-05, "loss": 0.4416, "step": 8552 }, { "epoch": 2.3937867338371115, "grad_norm": 0.25757865687385034, "learning_rate": 1.0763895746104025e-05, "loss": 0.4466, "step": 8553 }, { "epoch": 2.394066610691296, "grad_norm": 0.24553407549061387, "learning_rate": 1.075433604433721e-05, "loss": 0.4508, "step": 8554 }, { "epoch": 2.39434648754548, "grad_norm": 0.24820752981238575, "learning_rate": 1.0744780077994654e-05, "loss": 0.46, "step": 8555 }, { "epoch": 2.394626364399664, "grad_norm": 0.2597759636128657, "learning_rate": 1.0735227847985907e-05, "loss": 0.4711, "step": 8556 }, { "epoch": 2.394906241253848, "grad_norm": 0.2565794358175612, "learning_rate": 1.0725679355220147e-05, "loss": 0.4553, "step": 8557 }, { "epoch": 2.3951861181080325, "grad_norm": 0.25660870939499814, "learning_rate": 1.071613460060621e-05, "loss": 0.4579, "step": 8558 }, { "epoch": 2.395465994962217, "grad_norm": 0.2589972903164503, "learning_rate": 1.0706593585052588e-05, "loss": 0.4319, "step": 8559 }, { "epoch": 2.3957458718164006, "grad_norm": 0.25647126213705046, "learning_rate": 1.0697056309467363e-05, "loss": 0.4353, "step": 8560 }, { "epoch": 2.396025748670585, "grad_norm": 0.25211816040062407, "learning_rate": 1.0687522774758319e-05, "loss": 0.4738, "step": 8561 }, { "epoch": 2.3963056255247692, "grad_norm": 0.269195193074081, "learning_rate": 1.0677992981832847e-05, "loss": 0.4631, "step": 8562 }, { "epoch": 2.396585502378953, "grad_norm": 0.2572591613253173, "learning_rate": 1.0668466931598026e-05, "loss": 0.435, "step": 8563 }, { "epoch": 2.3968653792331374, "grad_norm": 0.2433479360365131, "learning_rate": 1.0658944624960537e-05, "loss": 0.4382, "step": 8564 }, { "epoch": 2.3971452560873217, "grad_norm": 0.25964090246581323, "learning_rate": 1.0649426062826717e-05, "loss": 0.4688, "step": 8565 }, { "epoch": 2.397425132941506, "grad_norm": 0.25815098741393544, "learning_rate": 1.0639911246102551e-05, "loss": 0.456, "step": 8566 }, { "epoch": 2.39770500979569, "grad_norm": 0.25282898037390106, "learning_rate": 1.0630400175693667e-05, "loss": 0.4703, "step": 8567 }, { "epoch": 2.397984886649874, "grad_norm": 0.25190562989901727, "learning_rate": 1.0620892852505326e-05, "loss": 0.4586, "step": 8568 }, { "epoch": 2.3982647635040584, "grad_norm": 0.2611461007462448, "learning_rate": 1.0611389277442452e-05, "loss": 0.4848, "step": 8569 }, { "epoch": 2.398544640358242, "grad_norm": 0.24982906183339657, "learning_rate": 1.0601889451409586e-05, "loss": 0.4606, "step": 8570 }, { "epoch": 2.3988245172124265, "grad_norm": 0.26870082819238894, "learning_rate": 1.0592393375310932e-05, "loss": 0.4582, "step": 8571 }, { "epoch": 2.399104394066611, "grad_norm": 0.2591418969042139, "learning_rate": 1.0582901050050343e-05, "loss": 0.4661, "step": 8572 }, { "epoch": 2.3993842709207946, "grad_norm": 0.2562995108371808, "learning_rate": 1.0573412476531286e-05, "loss": 0.4311, "step": 8573 }, { "epoch": 2.399664147774979, "grad_norm": 0.2357820928071976, "learning_rate": 1.0563927655656903e-05, "loss": 0.4284, "step": 8574 }, { "epoch": 2.3999440246291632, "grad_norm": 0.2585138815343089, "learning_rate": 1.0554446588329958e-05, "loss": 0.4669, "step": 8575 }, { "epoch": 2.400223901483347, "grad_norm": 0.24608526786812152, "learning_rate": 1.0544969275452865e-05, "loss": 0.4571, "step": 8576 }, { "epoch": 2.4005037783375314, "grad_norm": 0.25021791058956777, "learning_rate": 1.0535495717927697e-05, "loss": 0.4499, "step": 8577 }, { "epoch": 2.4007836551917157, "grad_norm": 0.2394709493644098, "learning_rate": 1.0526025916656119e-05, "loss": 0.4286, "step": 8578 }, { "epoch": 2.4010635320459, "grad_norm": 0.24850933716896373, "learning_rate": 1.0516559872539472e-05, "loss": 0.4709, "step": 8579 }, { "epoch": 2.401343408900084, "grad_norm": 0.2473285040876372, "learning_rate": 1.050709758647877e-05, "loss": 0.4409, "step": 8580 }, { "epoch": 2.401623285754268, "grad_norm": 0.24585197917796422, "learning_rate": 1.049763905937463e-05, "loss": 0.4441, "step": 8581 }, { "epoch": 2.4019031626084524, "grad_norm": 0.2561980962896979, "learning_rate": 1.0488184292127312e-05, "loss": 0.4559, "step": 8582 }, { "epoch": 2.4021830394626367, "grad_norm": 0.2628419010408199, "learning_rate": 1.0478733285636732e-05, "loss": 0.459, "step": 8583 }, { "epoch": 2.4024629163168205, "grad_norm": 0.26645929706731025, "learning_rate": 1.0469286040802434e-05, "loss": 0.4646, "step": 8584 }, { "epoch": 2.402742793171005, "grad_norm": 0.2498350546475695, "learning_rate": 1.045984255852362e-05, "loss": 0.4455, "step": 8585 }, { "epoch": 2.403022670025189, "grad_norm": 0.24867557897568118, "learning_rate": 1.0450402839699125e-05, "loss": 0.4613, "step": 8586 }, { "epoch": 2.403302546879373, "grad_norm": 0.2589311019287912, "learning_rate": 1.044096688522742e-05, "loss": 0.4583, "step": 8587 }, { "epoch": 2.4035824237335572, "grad_norm": 0.2594301193961265, "learning_rate": 1.0431534696006629e-05, "loss": 0.4581, "step": 8588 }, { "epoch": 2.4038623005877415, "grad_norm": 0.2444535036771609, "learning_rate": 1.0422106272934518e-05, "loss": 0.4344, "step": 8589 }, { "epoch": 2.4041421774419254, "grad_norm": 0.24864364982483306, "learning_rate": 1.0412681616908482e-05, "loss": 0.4372, "step": 8590 }, { "epoch": 2.4044220542961097, "grad_norm": 0.25733610925231926, "learning_rate": 1.0403260728825564e-05, "loss": 0.4641, "step": 8591 }, { "epoch": 2.404701931150294, "grad_norm": 0.24866074269218585, "learning_rate": 1.0393843609582454e-05, "loss": 0.4525, "step": 8592 }, { "epoch": 2.404981808004478, "grad_norm": 0.25090480950398686, "learning_rate": 1.0384430260075483e-05, "loss": 0.4426, "step": 8593 }, { "epoch": 2.405261684858662, "grad_norm": 0.2527307782120318, "learning_rate": 1.037502068120061e-05, "loss": 0.4462, "step": 8594 }, { "epoch": 2.4055415617128464, "grad_norm": 0.26634405280805074, "learning_rate": 1.0365614873853462e-05, "loss": 0.4614, "step": 8595 }, { "epoch": 2.4058214385670307, "grad_norm": 0.2571775265876603, "learning_rate": 1.0356212838929263e-05, "loss": 0.4472, "step": 8596 }, { "epoch": 2.4061013154212145, "grad_norm": 0.27237001904567404, "learning_rate": 1.03468145773229e-05, "loss": 0.4605, "step": 8597 }, { "epoch": 2.406381192275399, "grad_norm": 0.2484780604021942, "learning_rate": 1.0337420089928934e-05, "loss": 0.4328, "step": 8598 }, { "epoch": 2.406661069129583, "grad_norm": 0.2584202081029937, "learning_rate": 1.0328029377641524e-05, "loss": 0.4377, "step": 8599 }, { "epoch": 2.406940945983767, "grad_norm": 0.2535064978309816, "learning_rate": 1.031864244135448e-05, "loss": 0.4631, "step": 8600 }, { "epoch": 2.4072208228379512, "grad_norm": 0.26025377701465413, "learning_rate": 1.0309259281961264e-05, "loss": 0.4417, "step": 8601 }, { "epoch": 2.4075006996921355, "grad_norm": 0.2616665535860985, "learning_rate": 1.0299879900354964e-05, "loss": 0.469, "step": 8602 }, { "epoch": 2.40778057654632, "grad_norm": 0.2413895721759028, "learning_rate": 1.0290504297428316e-05, "loss": 0.4332, "step": 8603 }, { "epoch": 2.4080604534005037, "grad_norm": 0.26131763707584116, "learning_rate": 1.0281132474073697e-05, "loss": 0.4587, "step": 8604 }, { "epoch": 2.408340330254688, "grad_norm": 0.2553180213830489, "learning_rate": 1.0271764431183117e-05, "loss": 0.4513, "step": 8605 }, { "epoch": 2.4086202071088723, "grad_norm": 0.2531893195043381, "learning_rate": 1.0262400169648235e-05, "loss": 0.4548, "step": 8606 }, { "epoch": 2.408900083963056, "grad_norm": 0.2637153486653331, "learning_rate": 1.025303969036035e-05, "loss": 0.4609, "step": 8607 }, { "epoch": 2.4091799608172404, "grad_norm": 0.24966077178018864, "learning_rate": 1.0243682994210396e-05, "loss": 0.4582, "step": 8608 }, { "epoch": 2.4094598376714247, "grad_norm": 0.2477961428780549, "learning_rate": 1.0234330082088944e-05, "loss": 0.4368, "step": 8609 }, { "epoch": 2.4097397145256085, "grad_norm": 0.26031003578224476, "learning_rate": 1.0224980954886215e-05, "loss": 0.4524, "step": 8610 }, { "epoch": 2.410019591379793, "grad_norm": 0.24885377610396178, "learning_rate": 1.0215635613492059e-05, "loss": 0.4661, "step": 8611 }, { "epoch": 2.410299468233977, "grad_norm": 0.2544164546472397, "learning_rate": 1.0206294058795973e-05, "loss": 0.4611, "step": 8612 }, { "epoch": 2.410579345088161, "grad_norm": 0.2605012999203762, "learning_rate": 1.019695629168711e-05, "loss": 0.4638, "step": 8613 }, { "epoch": 2.4108592219423453, "grad_norm": 0.2485367521380268, "learning_rate": 1.0187622313054196e-05, "loss": 0.4499, "step": 8614 }, { "epoch": 2.4111390987965295, "grad_norm": 0.25561012536182637, "learning_rate": 1.0178292123785688e-05, "loss": 0.454, "step": 8615 }, { "epoch": 2.411418975650714, "grad_norm": 0.25548857242052186, "learning_rate": 1.0168965724769624e-05, "loss": 0.4544, "step": 8616 }, { "epoch": 2.4116988525048977, "grad_norm": 0.25797691362024977, "learning_rate": 1.0159643116893697e-05, "loss": 0.4391, "step": 8617 }, { "epoch": 2.411978729359082, "grad_norm": 0.25412226742638816, "learning_rate": 1.015032430104524e-05, "loss": 0.4566, "step": 8618 }, { "epoch": 2.4122586062132663, "grad_norm": 0.25735285136315833, "learning_rate": 1.0141009278111218e-05, "loss": 0.4634, "step": 8619 }, { "epoch": 2.4125384830674506, "grad_norm": 0.25326124853396187, "learning_rate": 1.0131698048978245e-05, "loss": 0.4612, "step": 8620 }, { "epoch": 2.4128183599216344, "grad_norm": 0.251539458502865, "learning_rate": 1.0122390614532567e-05, "loss": 0.4508, "step": 8621 }, { "epoch": 2.4130982367758187, "grad_norm": 0.24558310542459344, "learning_rate": 1.0113086975660069e-05, "loss": 0.4455, "step": 8622 }, { "epoch": 2.413378113630003, "grad_norm": 0.24438323246842192, "learning_rate": 1.0103787133246278e-05, "loss": 0.4411, "step": 8623 }, { "epoch": 2.413657990484187, "grad_norm": 0.2652900454859552, "learning_rate": 1.009449108817636e-05, "loss": 0.4565, "step": 8624 }, { "epoch": 2.413937867338371, "grad_norm": 0.24680876324047746, "learning_rate": 1.0085198841335113e-05, "loss": 0.4718, "step": 8625 }, { "epoch": 2.4142177441925554, "grad_norm": 0.25450331110947566, "learning_rate": 1.0075910393606985e-05, "loss": 0.4459, "step": 8626 }, { "epoch": 2.4144976210467393, "grad_norm": 0.24516058193926904, "learning_rate": 1.0066625745876057e-05, "loss": 0.4678, "step": 8627 }, { "epoch": 2.4147774979009236, "grad_norm": 0.256412199736452, "learning_rate": 1.0057344899026033e-05, "loss": 0.4535, "step": 8628 }, { "epoch": 2.415057374755108, "grad_norm": 0.24002494060523508, "learning_rate": 1.0048067853940285e-05, "loss": 0.4331, "step": 8629 }, { "epoch": 2.4153372516092917, "grad_norm": 0.247358564282145, "learning_rate": 1.00387946115018e-05, "loss": 0.4527, "step": 8630 }, { "epoch": 2.415617128463476, "grad_norm": 0.2591784977096219, "learning_rate": 1.0029525172593207e-05, "loss": 0.4442, "step": 8631 }, { "epoch": 2.4158970053176603, "grad_norm": 0.24240558272495144, "learning_rate": 1.0020259538096783e-05, "loss": 0.4423, "step": 8632 }, { "epoch": 2.4161768821718446, "grad_norm": 0.25622432374280196, "learning_rate": 1.0010997708894431e-05, "loss": 0.454, "step": 8633 }, { "epoch": 2.4164567590260284, "grad_norm": 0.2632338277656419, "learning_rate": 1.0001739685867707e-05, "loss": 0.4441, "step": 8634 }, { "epoch": 2.4167366358802127, "grad_norm": 0.25270250977478576, "learning_rate": 9.992485469897784e-06, "loss": 0.4423, "step": 8635 }, { "epoch": 2.417016512734397, "grad_norm": 0.24681380974500228, "learning_rate": 9.983235061865488e-06, "loss": 0.47, "step": 8636 }, { "epoch": 2.417296389588581, "grad_norm": 0.2686663215069251, "learning_rate": 9.973988462651273e-06, "loss": 0.4626, "step": 8637 }, { "epoch": 2.417576266442765, "grad_norm": 0.26299024284108885, "learning_rate": 9.96474567313524e-06, "loss": 0.4431, "step": 8638 }, { "epoch": 2.4178561432969494, "grad_norm": 0.26772873928657787, "learning_rate": 9.955506694197125e-06, "loss": 0.4559, "step": 8639 }, { "epoch": 2.4181360201511337, "grad_norm": 0.2477671241569365, "learning_rate": 9.9462715267163e-06, "loss": 0.4361, "step": 8640 }, { "epoch": 2.4184158970053176, "grad_norm": 0.24284858624233002, "learning_rate": 9.937040171571766e-06, "loss": 0.4486, "step": 8641 }, { "epoch": 2.418695773859502, "grad_norm": 0.25814165198388794, "learning_rate": 9.927812629642175e-06, "loss": 0.4504, "step": 8642 }, { "epoch": 2.418975650713686, "grad_norm": 0.23605886613592167, "learning_rate": 9.918588901805803e-06, "loss": 0.4407, "step": 8643 }, { "epoch": 2.41925552756787, "grad_norm": 0.2521803210979653, "learning_rate": 9.909368988940576e-06, "loss": 0.4465, "step": 8644 }, { "epoch": 2.4195354044220543, "grad_norm": 0.2530730669884669, "learning_rate": 9.900152891924048e-06, "loss": 0.4451, "step": 8645 }, { "epoch": 2.4198152812762386, "grad_norm": 0.2580819773635043, "learning_rate": 9.890940611633414e-06, "loss": 0.4462, "step": 8646 }, { "epoch": 2.4200951581304224, "grad_norm": 0.24238307584133761, "learning_rate": 9.881732148945506e-06, "loss": 0.4525, "step": 8647 }, { "epoch": 2.4203750349846067, "grad_norm": 0.25469538168173506, "learning_rate": 9.872527504736779e-06, "loss": 0.4343, "step": 8648 }, { "epoch": 2.420654911838791, "grad_norm": 0.25592123097193537, "learning_rate": 9.86332667988335e-06, "loss": 0.4282, "step": 8649 }, { "epoch": 2.420934788692975, "grad_norm": 0.2541152179969242, "learning_rate": 9.854129675260954e-06, "loss": 0.4392, "step": 8650 }, { "epoch": 2.421214665547159, "grad_norm": 0.2519173852407417, "learning_rate": 9.84493649174496e-06, "loss": 0.4615, "step": 8651 }, { "epoch": 2.4214945424013434, "grad_norm": 0.2509635352782236, "learning_rate": 9.835747130210394e-06, "loss": 0.4422, "step": 8652 }, { "epoch": 2.4217744192555277, "grad_norm": 0.25094145497835696, "learning_rate": 9.826561591531891e-06, "loss": 0.4614, "step": 8653 }, { "epoch": 2.4220542961097116, "grad_norm": 0.25343795423147647, "learning_rate": 9.81737987658375e-06, "loss": 0.4655, "step": 8654 }, { "epoch": 2.422334172963896, "grad_norm": 0.26416564626482847, "learning_rate": 9.808201986239873e-06, "loss": 0.4617, "step": 8655 }, { "epoch": 2.42261404981808, "grad_norm": 0.25689984577385283, "learning_rate": 9.799027921373832e-06, "loss": 0.4528, "step": 8656 }, { "epoch": 2.4228939266722644, "grad_norm": 0.29975297082457075, "learning_rate": 9.789857682858816e-06, "loss": 0.4642, "step": 8657 }, { "epoch": 2.4231738035264483, "grad_norm": 0.2606575568398438, "learning_rate": 9.780691271567655e-06, "loss": 0.4683, "step": 8658 }, { "epoch": 2.4234536803806326, "grad_norm": 0.25885281887690115, "learning_rate": 9.771528688372805e-06, "loss": 0.4433, "step": 8659 }, { "epoch": 2.423733557234817, "grad_norm": 0.2516278579206306, "learning_rate": 9.762369934146371e-06, "loss": 0.4694, "step": 8660 }, { "epoch": 2.4240134340890007, "grad_norm": 0.24606825146254277, "learning_rate": 9.753215009760092e-06, "loss": 0.4527, "step": 8661 }, { "epoch": 2.424293310943185, "grad_norm": 0.24880312290263892, "learning_rate": 9.744063916085334e-06, "loss": 0.4358, "step": 8662 }, { "epoch": 2.4245731877973693, "grad_norm": 0.2521487586611111, "learning_rate": 9.734916653993103e-06, "loss": 0.4598, "step": 8663 }, { "epoch": 2.424853064651553, "grad_norm": 0.24923895019671002, "learning_rate": 9.725773224354046e-06, "loss": 0.4237, "step": 8664 }, { "epoch": 2.4251329415057374, "grad_norm": 0.25683108938031485, "learning_rate": 9.716633628038435e-06, "loss": 0.4481, "step": 8665 }, { "epoch": 2.4254128183599217, "grad_norm": 0.2594524193860435, "learning_rate": 9.707497865916181e-06, "loss": 0.4476, "step": 8666 }, { "epoch": 2.4256926952141056, "grad_norm": 0.24778126512126722, "learning_rate": 9.698365938856834e-06, "loss": 0.4378, "step": 8667 }, { "epoch": 2.42597257206829, "grad_norm": 0.26174957377692537, "learning_rate": 9.689237847729576e-06, "loss": 0.4322, "step": 8668 }, { "epoch": 2.426252448922474, "grad_norm": 0.2464064176147182, "learning_rate": 9.680113593403222e-06, "loss": 0.4684, "step": 8669 }, { "epoch": 2.4265323257766584, "grad_norm": 0.25190262983201533, "learning_rate": 9.670993176746223e-06, "loss": 0.4698, "step": 8670 }, { "epoch": 2.4268122026308423, "grad_norm": 0.2457078652813174, "learning_rate": 9.661876598626669e-06, "loss": 0.4503, "step": 8671 }, { "epoch": 2.4270920794850266, "grad_norm": 0.24796744225271272, "learning_rate": 9.652763859912279e-06, "loss": 0.4283, "step": 8672 }, { "epoch": 2.427371956339211, "grad_norm": 0.25001875025402515, "learning_rate": 9.643654961470405e-06, "loss": 0.4471, "step": 8673 }, { "epoch": 2.4276518331933947, "grad_norm": 0.26094082727578394, "learning_rate": 9.634549904168038e-06, "loss": 0.4556, "step": 8674 }, { "epoch": 2.427931710047579, "grad_norm": 0.2527766814496001, "learning_rate": 9.625448688871808e-06, "loss": 0.4396, "step": 8675 }, { "epoch": 2.4282115869017633, "grad_norm": 0.2533891833120829, "learning_rate": 9.61635131644797e-06, "loss": 0.4496, "step": 8676 }, { "epoch": 2.4284914637559476, "grad_norm": 0.24953538158933503, "learning_rate": 9.60725778776242e-06, "loss": 0.4521, "step": 8677 }, { "epoch": 2.4287713406101314, "grad_norm": 0.25194688014968925, "learning_rate": 9.598168103680676e-06, "loss": 0.4422, "step": 8678 }, { "epoch": 2.4290512174643157, "grad_norm": 0.2593759850548933, "learning_rate": 9.58908226506791e-06, "loss": 0.47, "step": 8679 }, { "epoch": 2.4293310943185, "grad_norm": 0.2516560451199984, "learning_rate": 9.580000272788914e-06, "loss": 0.4493, "step": 8680 }, { "epoch": 2.429610971172684, "grad_norm": 0.25406709751230233, "learning_rate": 9.570922127708115e-06, "loss": 0.4541, "step": 8681 }, { "epoch": 2.429890848026868, "grad_norm": 0.2570269532750313, "learning_rate": 9.56184783068958e-06, "loss": 0.4477, "step": 8682 }, { "epoch": 2.4301707248810525, "grad_norm": 0.24971566492724123, "learning_rate": 9.552777382597e-06, "loss": 0.4532, "step": 8683 }, { "epoch": 2.4304506017352363, "grad_norm": 0.25302483303820306, "learning_rate": 9.543710784293709e-06, "loss": 0.4446, "step": 8684 }, { "epoch": 2.4307304785894206, "grad_norm": 0.23827549478306503, "learning_rate": 9.534648036642662e-06, "loss": 0.445, "step": 8685 }, { "epoch": 2.431010355443605, "grad_norm": 0.2533356794578736, "learning_rate": 9.525589140506475e-06, "loss": 0.463, "step": 8686 }, { "epoch": 2.4312902322977887, "grad_norm": 0.2534237929116753, "learning_rate": 9.516534096747365e-06, "loss": 0.46, "step": 8687 }, { "epoch": 2.431570109151973, "grad_norm": 0.26224123572476243, "learning_rate": 9.507482906227193e-06, "loss": 0.4528, "step": 8688 }, { "epoch": 2.4318499860061573, "grad_norm": 0.25835046245255744, "learning_rate": 9.498435569807474e-06, "loss": 0.4575, "step": 8689 }, { "epoch": 2.4321298628603416, "grad_norm": 0.2528107537029733, "learning_rate": 9.48939208834932e-06, "loss": 0.4557, "step": 8690 }, { "epoch": 2.4324097397145255, "grad_norm": 0.24686083665992026, "learning_rate": 9.480352462713505e-06, "loss": 0.4417, "step": 8691 }, { "epoch": 2.4326896165687097, "grad_norm": 0.244128112584507, "learning_rate": 9.471316693760418e-06, "loss": 0.4298, "step": 8692 }, { "epoch": 2.432969493422894, "grad_norm": 0.2642604604375863, "learning_rate": 9.462284782350095e-06, "loss": 0.4629, "step": 8693 }, { "epoch": 2.4332493702770783, "grad_norm": 0.23697579728429405, "learning_rate": 9.453256729342198e-06, "loss": 0.4507, "step": 8694 }, { "epoch": 2.433529247131262, "grad_norm": 0.2598769680279463, "learning_rate": 9.44423253559602e-06, "loss": 0.443, "step": 8695 }, { "epoch": 2.4338091239854465, "grad_norm": 0.2519461630230364, "learning_rate": 9.435212201970488e-06, "loss": 0.4316, "step": 8696 }, { "epoch": 2.4340890008396308, "grad_norm": 0.2570446768156951, "learning_rate": 9.426195729324161e-06, "loss": 0.4467, "step": 8697 }, { "epoch": 2.4343688776938146, "grad_norm": 0.2511001572796495, "learning_rate": 9.417183118515238e-06, "loss": 0.4277, "step": 8698 }, { "epoch": 2.434648754547999, "grad_norm": 0.2534741851012963, "learning_rate": 9.408174370401546e-06, "loss": 0.4632, "step": 8699 }, { "epoch": 2.434928631402183, "grad_norm": 0.24495205335664716, "learning_rate": 9.399169485840531e-06, "loss": 0.4512, "step": 8700 }, { "epoch": 2.435208508256367, "grad_norm": 0.2588232626877536, "learning_rate": 9.390168465689291e-06, "loss": 0.4572, "step": 8701 }, { "epoch": 2.4354883851105513, "grad_norm": 0.2595244918965313, "learning_rate": 9.381171310804549e-06, "loss": 0.4507, "step": 8702 }, { "epoch": 2.4357682619647356, "grad_norm": 0.2509568386919133, "learning_rate": 9.372178022042655e-06, "loss": 0.4537, "step": 8703 }, { "epoch": 2.4360481388189195, "grad_norm": 0.2574851073663655, "learning_rate": 9.363188600259592e-06, "loss": 0.4629, "step": 8704 }, { "epoch": 2.4363280156731038, "grad_norm": 0.24143585573403095, "learning_rate": 9.354203046310989e-06, "loss": 0.4416, "step": 8705 }, { "epoch": 2.436607892527288, "grad_norm": 0.2567156832788759, "learning_rate": 9.345221361052092e-06, "loss": 0.4557, "step": 8706 }, { "epoch": 2.4368877693814723, "grad_norm": 0.2684706355733321, "learning_rate": 9.33624354533778e-06, "loss": 0.4394, "step": 8707 }, { "epoch": 2.437167646235656, "grad_norm": 0.2571773773922458, "learning_rate": 9.327269600022564e-06, "loss": 0.4574, "step": 8708 }, { "epoch": 2.4374475230898405, "grad_norm": 0.25389521166220824, "learning_rate": 9.3182995259606e-06, "loss": 0.4646, "step": 8709 }, { "epoch": 2.4377273999440248, "grad_norm": 0.2573298915143599, "learning_rate": 9.309333324005653e-06, "loss": 0.4627, "step": 8710 }, { "epoch": 2.4380072767982086, "grad_norm": 0.25888647008282245, "learning_rate": 9.300370995011137e-06, "loss": 0.4628, "step": 8711 }, { "epoch": 2.438287153652393, "grad_norm": 0.24873750669651037, "learning_rate": 9.291412539830091e-06, "loss": 0.4358, "step": 8712 }, { "epoch": 2.438567030506577, "grad_norm": 0.2460586067257239, "learning_rate": 9.282457959315183e-06, "loss": 0.4391, "step": 8713 }, { "epoch": 2.4388469073607615, "grad_norm": 0.24809386972930658, "learning_rate": 9.27350725431872e-06, "loss": 0.4659, "step": 8714 }, { "epoch": 2.4391267842149453, "grad_norm": 0.24405633863970483, "learning_rate": 9.264560425692632e-06, "loss": 0.4277, "step": 8715 }, { "epoch": 2.4394066610691296, "grad_norm": 0.2438372099492113, "learning_rate": 9.25561747428848e-06, "loss": 0.4404, "step": 8716 }, { "epoch": 2.439686537923314, "grad_norm": 0.25901714955768335, "learning_rate": 9.246678400957464e-06, "loss": 0.4703, "step": 8717 }, { "epoch": 2.4399664147774978, "grad_norm": 0.26139742145792844, "learning_rate": 9.23774320655041e-06, "loss": 0.4708, "step": 8718 }, { "epoch": 2.440246291631682, "grad_norm": 0.25447992886901827, "learning_rate": 9.228811891917771e-06, "loss": 0.4432, "step": 8719 }, { "epoch": 2.4405261684858663, "grad_norm": 0.24308488526054634, "learning_rate": 9.219884457909634e-06, "loss": 0.45, "step": 8720 }, { "epoch": 2.44080604534005, "grad_norm": 0.25362680347028754, "learning_rate": 9.210960905375726e-06, "loss": 0.4535, "step": 8721 }, { "epoch": 2.4410859221942345, "grad_norm": 0.2549043073450669, "learning_rate": 9.202041235165387e-06, "loss": 0.4497, "step": 8722 }, { "epoch": 2.4413657990484188, "grad_norm": 0.24929151091884463, "learning_rate": 9.193125448127599e-06, "loss": 0.4331, "step": 8723 }, { "epoch": 2.4416456759026026, "grad_norm": 0.2555091617525663, "learning_rate": 9.18421354511097e-06, "loss": 0.4637, "step": 8724 }, { "epoch": 2.441925552756787, "grad_norm": 0.2459175146329819, "learning_rate": 9.175305526963746e-06, "loss": 0.4709, "step": 8725 }, { "epoch": 2.442205429610971, "grad_norm": 0.257298654064916, "learning_rate": 9.166401394533786e-06, "loss": 0.4504, "step": 8726 }, { "epoch": 2.4424853064651555, "grad_norm": 0.25975736680860273, "learning_rate": 9.157501148668602e-06, "loss": 0.4594, "step": 8727 }, { "epoch": 2.4427651833193393, "grad_norm": 0.26699151334811305, "learning_rate": 9.148604790215316e-06, "loss": 0.4449, "step": 8728 }, { "epoch": 2.4430450601735236, "grad_norm": 0.26310916710123866, "learning_rate": 9.139712320020694e-06, "loss": 0.4613, "step": 8729 }, { "epoch": 2.443324937027708, "grad_norm": 0.24990486843781354, "learning_rate": 9.130823738931122e-06, "loss": 0.4572, "step": 8730 }, { "epoch": 2.443604813881892, "grad_norm": 0.2454856718093436, "learning_rate": 9.121939047792621e-06, "loss": 0.4421, "step": 8731 }, { "epoch": 2.443884690736076, "grad_norm": 0.258350340714871, "learning_rate": 9.113058247450846e-06, "loss": 0.4629, "step": 8732 }, { "epoch": 2.4441645675902604, "grad_norm": 0.25948440717570664, "learning_rate": 9.104181338751072e-06, "loss": 0.4498, "step": 8733 }, { "epoch": 2.4444444444444446, "grad_norm": 0.2462499351337228, "learning_rate": 9.095308322538204e-06, "loss": 0.4396, "step": 8734 }, { "epoch": 2.4447243212986285, "grad_norm": 0.2569682798802448, "learning_rate": 9.086439199656787e-06, "loss": 0.445, "step": 8735 }, { "epoch": 2.445004198152813, "grad_norm": 0.246647949016701, "learning_rate": 9.077573970950991e-06, "loss": 0.4683, "step": 8736 }, { "epoch": 2.445284075006997, "grad_norm": 0.24789787281425485, "learning_rate": 9.068712637264604e-06, "loss": 0.4457, "step": 8737 }, { "epoch": 2.445563951861181, "grad_norm": 0.2681755619265382, "learning_rate": 9.059855199441065e-06, "loss": 0.4724, "step": 8738 }, { "epoch": 2.445843828715365, "grad_norm": 0.24597980816150247, "learning_rate": 9.051001658323415e-06, "loss": 0.4506, "step": 8739 }, { "epoch": 2.4461237055695495, "grad_norm": 0.24518445891027368, "learning_rate": 9.042152014754357e-06, "loss": 0.4432, "step": 8740 }, { "epoch": 2.4464035824237333, "grad_norm": 0.2608204879290255, "learning_rate": 9.033306269576186e-06, "loss": 0.4687, "step": 8741 }, { "epoch": 2.4466834592779176, "grad_norm": 0.2512059862644084, "learning_rate": 9.024464423630863e-06, "loss": 0.4539, "step": 8742 }, { "epoch": 2.446963336132102, "grad_norm": 0.2598669445578634, "learning_rate": 9.015626477759942e-06, "loss": 0.4392, "step": 8743 }, { "epoch": 2.447243212986286, "grad_norm": 0.2611220893656895, "learning_rate": 9.00679243280464e-06, "loss": 0.443, "step": 8744 }, { "epoch": 2.44752308984047, "grad_norm": 0.25262976592223696, "learning_rate": 8.997962289605778e-06, "loss": 0.4345, "step": 8745 }, { "epoch": 2.4478029666946544, "grad_norm": 0.2571089164905739, "learning_rate": 8.989136049003816e-06, "loss": 0.4532, "step": 8746 }, { "epoch": 2.4480828435488386, "grad_norm": 0.24915094209957508, "learning_rate": 8.980313711838833e-06, "loss": 0.469, "step": 8747 }, { "epoch": 2.4483627204030225, "grad_norm": 0.2449521949390417, "learning_rate": 8.971495278950559e-06, "loss": 0.4449, "step": 8748 }, { "epoch": 2.448642597257207, "grad_norm": 0.25833922557453676, "learning_rate": 8.96268075117832e-06, "loss": 0.4414, "step": 8749 }, { "epoch": 2.448922474111391, "grad_norm": 0.2561921132027639, "learning_rate": 8.953870129361103e-06, "loss": 0.4751, "step": 8750 }, { "epoch": 2.4492023509655754, "grad_norm": 0.25243468817494663, "learning_rate": 8.945063414337502e-06, "loss": 0.4535, "step": 8751 }, { "epoch": 2.449482227819759, "grad_norm": 0.24407529942373457, "learning_rate": 8.936260606945746e-06, "loss": 0.4455, "step": 8752 }, { "epoch": 2.4497621046739435, "grad_norm": 0.254439271242042, "learning_rate": 8.927461708023682e-06, "loss": 0.45, "step": 8753 }, { "epoch": 2.450041981528128, "grad_norm": 0.2499518236860164, "learning_rate": 8.918666718408807e-06, "loss": 0.4276, "step": 8754 }, { "epoch": 2.4503218583823116, "grad_norm": 0.24972048345014827, "learning_rate": 8.90987563893823e-06, "loss": 0.4638, "step": 8755 }, { "epoch": 2.450601735236496, "grad_norm": 0.2592715456981266, "learning_rate": 8.901088470448688e-06, "loss": 0.4634, "step": 8756 }, { "epoch": 2.4508816120906802, "grad_norm": 0.26717180906496124, "learning_rate": 8.892305213776537e-06, "loss": 0.4551, "step": 8757 }, { "epoch": 2.451161488944864, "grad_norm": 0.2727554500837517, "learning_rate": 8.883525869757808e-06, "loss": 0.4698, "step": 8758 }, { "epoch": 2.4514413657990484, "grad_norm": 0.25320575306537013, "learning_rate": 8.874750439228086e-06, "loss": 0.4599, "step": 8759 }, { "epoch": 2.4517212426532327, "grad_norm": 0.26015165359909925, "learning_rate": 8.865978923022639e-06, "loss": 0.4373, "step": 8760 }, { "epoch": 2.4520011195074165, "grad_norm": 0.24973055709950392, "learning_rate": 8.857211321976345e-06, "loss": 0.4371, "step": 8761 }, { "epoch": 2.452280996361601, "grad_norm": 0.2567892230943019, "learning_rate": 8.8484476369237e-06, "loss": 0.4508, "step": 8762 }, { "epoch": 2.452560873215785, "grad_norm": 0.2651888022491084, "learning_rate": 8.839687868698843e-06, "loss": 0.4512, "step": 8763 }, { "epoch": 2.4528407500699694, "grad_norm": 0.24439360381708025, "learning_rate": 8.830932018135534e-06, "loss": 0.4353, "step": 8764 }, { "epoch": 2.4531206269241532, "grad_norm": 0.26569808054625527, "learning_rate": 8.82218008606716e-06, "loss": 0.4664, "step": 8765 }, { "epoch": 2.4534005037783375, "grad_norm": 0.2604596365649492, "learning_rate": 8.813432073326733e-06, "loss": 0.4555, "step": 8766 }, { "epoch": 2.453680380632522, "grad_norm": 0.25717993946272316, "learning_rate": 8.804687980746895e-06, "loss": 0.4516, "step": 8767 }, { "epoch": 2.453960257486706, "grad_norm": 0.26238604410316096, "learning_rate": 8.795947809159916e-06, "loss": 0.4604, "step": 8768 }, { "epoch": 2.45424013434089, "grad_norm": 0.26518424198451557, "learning_rate": 8.787211559397685e-06, "loss": 0.4413, "step": 8769 }, { "epoch": 2.4545200111950742, "grad_norm": 0.2633362977252646, "learning_rate": 8.778479232291725e-06, "loss": 0.4407, "step": 8770 }, { "epoch": 2.4547998880492585, "grad_norm": 0.2596837173525926, "learning_rate": 8.769750828673185e-06, "loss": 0.4413, "step": 8771 }, { "epoch": 2.4550797649034424, "grad_norm": 0.24849747158814303, "learning_rate": 8.761026349372836e-06, "loss": 0.4538, "step": 8772 }, { "epoch": 2.4553596417576267, "grad_norm": 0.26329439229113977, "learning_rate": 8.752305795221083e-06, "loss": 0.4447, "step": 8773 }, { "epoch": 2.455639518611811, "grad_norm": 0.25238534245377653, "learning_rate": 8.74358916704795e-06, "loss": 0.435, "step": 8774 }, { "epoch": 2.455919395465995, "grad_norm": 0.24898705016741032, "learning_rate": 8.734876465683084e-06, "loss": 0.4338, "step": 8775 }, { "epoch": 2.456199272320179, "grad_norm": 0.24785772107154505, "learning_rate": 8.726167691955795e-06, "loss": 0.4729, "step": 8776 }, { "epoch": 2.4564791491743634, "grad_norm": 0.239248536315287, "learning_rate": 8.717462846694958e-06, "loss": 0.4089, "step": 8777 }, { "epoch": 2.4567590260285472, "grad_norm": 0.2429698015847293, "learning_rate": 8.708761930729108e-06, "loss": 0.4564, "step": 8778 }, { "epoch": 2.4570389028827315, "grad_norm": 0.2546477530163254, "learning_rate": 8.700064944886416e-06, "loss": 0.4613, "step": 8779 }, { "epoch": 2.457318779736916, "grad_norm": 0.25185237329456067, "learning_rate": 8.691371889994653e-06, "loss": 0.4555, "step": 8780 }, { "epoch": 2.4575986565911, "grad_norm": 0.2600163293195142, "learning_rate": 8.68268276688124e-06, "loss": 0.4516, "step": 8781 }, { "epoch": 2.457878533445284, "grad_norm": 0.2522208989063548, "learning_rate": 8.673997576373205e-06, "loss": 0.483, "step": 8782 }, { "epoch": 2.4581584102994682, "grad_norm": 0.25219010364377603, "learning_rate": 8.665316319297211e-06, "loss": 0.4744, "step": 8783 }, { "epoch": 2.4584382871536525, "grad_norm": 0.2460263169031998, "learning_rate": 8.65663899647955e-06, "loss": 0.4654, "step": 8784 }, { "epoch": 2.4587181640078364, "grad_norm": 0.2475874976944104, "learning_rate": 8.647965608746128e-06, "loss": 0.4435, "step": 8785 }, { "epoch": 2.4589980408620207, "grad_norm": 0.26112941218263963, "learning_rate": 8.639296156922482e-06, "loss": 0.447, "step": 8786 }, { "epoch": 2.459277917716205, "grad_norm": 0.2381450065309774, "learning_rate": 8.630630641833782e-06, "loss": 0.4314, "step": 8787 }, { "epoch": 2.4595577945703893, "grad_norm": 0.25006214540187555, "learning_rate": 8.621969064304813e-06, "loss": 0.4659, "step": 8788 }, { "epoch": 2.459837671424573, "grad_norm": 0.24784708367652952, "learning_rate": 8.613311425159986e-06, "loss": 0.4465, "step": 8789 }, { "epoch": 2.4601175482787574, "grad_norm": 0.23901346003043714, "learning_rate": 8.604657725223348e-06, "loss": 0.4502, "step": 8790 }, { "epoch": 2.4603974251329417, "grad_norm": 0.2563017090038037, "learning_rate": 8.596007965318553e-06, "loss": 0.4454, "step": 8791 }, { "epoch": 2.4606773019871255, "grad_norm": 0.2534308207864483, "learning_rate": 8.58736214626888e-06, "loss": 0.4471, "step": 8792 }, { "epoch": 2.46095717884131, "grad_norm": 0.24716832102749217, "learning_rate": 8.578720268897278e-06, "loss": 0.4682, "step": 8793 }, { "epoch": 2.461237055695494, "grad_norm": 0.24589647734389258, "learning_rate": 8.570082334026269e-06, "loss": 0.4238, "step": 8794 }, { "epoch": 2.461516932549678, "grad_norm": 0.2706887481862387, "learning_rate": 8.561448342478006e-06, "loss": 0.4727, "step": 8795 }, { "epoch": 2.4617968094038623, "grad_norm": 0.25009743700279913, "learning_rate": 8.552818295074283e-06, "loss": 0.4653, "step": 8796 }, { "epoch": 2.4620766862580465, "grad_norm": 0.2609228214806486, "learning_rate": 8.544192192636514e-06, "loss": 0.4579, "step": 8797 }, { "epoch": 2.4623565631122304, "grad_norm": 0.2552038023734495, "learning_rate": 8.535570035985735e-06, "loss": 0.4499, "step": 8798 }, { "epoch": 2.4626364399664147, "grad_norm": 0.2623068259763304, "learning_rate": 8.526951825942609e-06, "loss": 0.4722, "step": 8799 }, { "epoch": 2.462916316820599, "grad_norm": 0.2345347434331712, "learning_rate": 8.518337563327416e-06, "loss": 0.4388, "step": 8800 }, { "epoch": 2.4631961936747833, "grad_norm": 0.33316634183098015, "learning_rate": 8.509727248960081e-06, "loss": 0.4658, "step": 8801 }, { "epoch": 2.463476070528967, "grad_norm": 0.2614180155249143, "learning_rate": 8.501120883660124e-06, "loss": 0.4473, "step": 8802 }, { "epoch": 2.4637559473831514, "grad_norm": 0.2578616030305097, "learning_rate": 8.492518468246713e-06, "loss": 0.4632, "step": 8803 }, { "epoch": 2.4640358242373357, "grad_norm": 0.25364120687260067, "learning_rate": 8.483920003538626e-06, "loss": 0.4464, "step": 8804 }, { "epoch": 2.4643157010915195, "grad_norm": 0.2511528232877162, "learning_rate": 8.47532549035427e-06, "loss": 0.4442, "step": 8805 }, { "epoch": 2.464595577945704, "grad_norm": 0.26722745107266116, "learning_rate": 8.466734929511672e-06, "loss": 0.4448, "step": 8806 }, { "epoch": 2.464875454799888, "grad_norm": 0.24661331952045684, "learning_rate": 8.458148321828495e-06, "loss": 0.4411, "step": 8807 }, { "epoch": 2.4651553316540724, "grad_norm": 0.24234777529026694, "learning_rate": 8.449565668122012e-06, "loss": 0.4449, "step": 8808 }, { "epoch": 2.4654352085082563, "grad_norm": 0.25100135931081274, "learning_rate": 8.440986969209114e-06, "loss": 0.4568, "step": 8809 }, { "epoch": 2.4657150853624406, "grad_norm": 0.2616029794356942, "learning_rate": 8.43241222590635e-06, "loss": 0.4488, "step": 8810 }, { "epoch": 2.465994962216625, "grad_norm": 0.2516585904165877, "learning_rate": 8.423841439029856e-06, "loss": 0.4507, "step": 8811 }, { "epoch": 2.4662748390708087, "grad_norm": 0.250785187040853, "learning_rate": 8.415274609395419e-06, "loss": 0.4453, "step": 8812 }, { "epoch": 2.466554715924993, "grad_norm": 0.26842108922718766, "learning_rate": 8.406711737818407e-06, "loss": 0.4515, "step": 8813 }, { "epoch": 2.4668345927791773, "grad_norm": 0.25759536406008543, "learning_rate": 8.398152825113853e-06, "loss": 0.4349, "step": 8814 }, { "epoch": 2.467114469633361, "grad_norm": 0.24877343150791728, "learning_rate": 8.389597872096395e-06, "loss": 0.4577, "step": 8815 }, { "epoch": 2.4673943464875454, "grad_norm": 0.24690516069503435, "learning_rate": 8.381046879580306e-06, "loss": 0.4472, "step": 8816 }, { "epoch": 2.4676742233417297, "grad_norm": 0.2498296130339311, "learning_rate": 8.372499848379473e-06, "loss": 0.4423, "step": 8817 }, { "epoch": 2.467954100195914, "grad_norm": 0.23970722909371703, "learning_rate": 8.363956779307397e-06, "loss": 0.4387, "step": 8818 }, { "epoch": 2.468233977050098, "grad_norm": 0.25215230521910237, "learning_rate": 8.355417673177224e-06, "loss": 0.4764, "step": 8819 }, { "epoch": 2.468513853904282, "grad_norm": 0.2627276042100404, "learning_rate": 8.346882530801703e-06, "loss": 0.4791, "step": 8820 }, { "epoch": 2.4687937307584664, "grad_norm": 0.2634063154876318, "learning_rate": 8.338351352993223e-06, "loss": 0.4523, "step": 8821 }, { "epoch": 2.4690736076126503, "grad_norm": 0.260379878904615, "learning_rate": 8.329824140563774e-06, "loss": 0.4537, "step": 8822 }, { "epoch": 2.4693534844668346, "grad_norm": 0.2474845610484414, "learning_rate": 8.32130089432499e-06, "loss": 0.4466, "step": 8823 }, { "epoch": 2.469633361321019, "grad_norm": 0.24575569815410245, "learning_rate": 8.312781615088117e-06, "loss": 0.4678, "step": 8824 }, { "epoch": 2.469913238175203, "grad_norm": 0.2501725858848952, "learning_rate": 8.304266303664022e-06, "loss": 0.4318, "step": 8825 }, { "epoch": 2.470193115029387, "grad_norm": 0.26158386112949367, "learning_rate": 8.295754960863183e-06, "loss": 0.4641, "step": 8826 }, { "epoch": 2.4704729918835713, "grad_norm": 0.2512532674692248, "learning_rate": 8.287247587495745e-06, "loss": 0.4666, "step": 8827 }, { "epoch": 2.4707528687377556, "grad_norm": 0.25758225105575505, "learning_rate": 8.278744184371428e-06, "loss": 0.4466, "step": 8828 }, { "epoch": 2.4710327455919394, "grad_norm": 0.26442196017313674, "learning_rate": 8.270244752299594e-06, "loss": 0.4453, "step": 8829 }, { "epoch": 2.4713126224461237, "grad_norm": 0.24282226088915165, "learning_rate": 8.261749292089232e-06, "loss": 0.4483, "step": 8830 }, { "epoch": 2.471592499300308, "grad_norm": 0.2653192403347194, "learning_rate": 8.253257804548926e-06, "loss": 0.4606, "step": 8831 }, { "epoch": 2.471872376154492, "grad_norm": 0.256316459436771, "learning_rate": 8.244770290486902e-06, "loss": 0.4357, "step": 8832 }, { "epoch": 2.472152253008676, "grad_norm": 0.257732831978499, "learning_rate": 8.23628675071102e-06, "loss": 0.4578, "step": 8833 }, { "epoch": 2.4724321298628604, "grad_norm": 0.25750455150037393, "learning_rate": 8.227807186028735e-06, "loss": 0.4754, "step": 8834 }, { "epoch": 2.4727120067170443, "grad_norm": 0.2585203961533809, "learning_rate": 8.219331597247149e-06, "loss": 0.4446, "step": 8835 }, { "epoch": 2.4729918835712286, "grad_norm": 0.2502622173079045, "learning_rate": 8.210859985172965e-06, "loss": 0.4393, "step": 8836 }, { "epoch": 2.473271760425413, "grad_norm": 0.26794254234560555, "learning_rate": 8.202392350612515e-06, "loss": 0.4653, "step": 8837 }, { "epoch": 2.473551637279597, "grad_norm": 0.253063023944246, "learning_rate": 8.193928694371762e-06, "loss": 0.4692, "step": 8838 }, { "epoch": 2.473831514133781, "grad_norm": 0.2631953633339251, "learning_rate": 8.18546901725627e-06, "loss": 0.4533, "step": 8839 }, { "epoch": 2.4741113909879653, "grad_norm": 0.25273374524819214, "learning_rate": 8.177013320071242e-06, "loss": 0.4284, "step": 8840 }, { "epoch": 2.4743912678421496, "grad_norm": 0.26540190913298567, "learning_rate": 8.1685616036215e-06, "loss": 0.4653, "step": 8841 }, { "epoch": 2.4746711446963334, "grad_norm": 0.25256252488623404, "learning_rate": 8.160113868711472e-06, "loss": 0.4344, "step": 8842 }, { "epoch": 2.4749510215505177, "grad_norm": 0.2422732339902129, "learning_rate": 8.151670116145232e-06, "loss": 0.4262, "step": 8843 }, { "epoch": 2.475230898404702, "grad_norm": 0.2469530278705053, "learning_rate": 8.143230346726437e-06, "loss": 0.4265, "step": 8844 }, { "epoch": 2.4755107752588863, "grad_norm": 0.23441539423895605, "learning_rate": 8.13479456125842e-06, "loss": 0.4525, "step": 8845 }, { "epoch": 2.47579065211307, "grad_norm": 0.2569000369849948, "learning_rate": 8.126362760544087e-06, "loss": 0.465, "step": 8846 }, { "epoch": 2.4760705289672544, "grad_norm": 0.25036897978886113, "learning_rate": 8.11793494538599e-06, "loss": 0.4373, "step": 8847 }, { "epoch": 2.4763504058214387, "grad_norm": 0.2534382996776073, "learning_rate": 8.109511116586293e-06, "loss": 0.4353, "step": 8848 }, { "epoch": 2.4766302826756226, "grad_norm": 0.2625875150888653, "learning_rate": 8.101091274946766e-06, "loss": 0.4589, "step": 8849 }, { "epoch": 2.476910159529807, "grad_norm": 0.2458607655275804, "learning_rate": 8.092675421268826e-06, "loss": 0.4419, "step": 8850 }, { "epoch": 2.477190036383991, "grad_norm": 0.2695265276415108, "learning_rate": 8.084263556353488e-06, "loss": 0.4528, "step": 8851 }, { "epoch": 2.477469913238175, "grad_norm": 0.23958898865776768, "learning_rate": 8.075855681001415e-06, "loss": 0.453, "step": 8852 }, { "epoch": 2.4777497900923593, "grad_norm": 0.25722658792905134, "learning_rate": 8.067451796012859e-06, "loss": 0.4461, "step": 8853 }, { "epoch": 2.4780296669465436, "grad_norm": 0.2505958536295343, "learning_rate": 8.059051902187714e-06, "loss": 0.4564, "step": 8854 }, { "epoch": 2.4783095438007274, "grad_norm": 0.25713175258741855, "learning_rate": 8.05065600032548e-06, "loss": 0.4703, "step": 8855 }, { "epoch": 2.4785894206549117, "grad_norm": 0.24969849742796021, "learning_rate": 8.042264091225293e-06, "loss": 0.4369, "step": 8856 }, { "epoch": 2.478869297509096, "grad_norm": 0.25187749023349426, "learning_rate": 8.033876175685894e-06, "loss": 0.4327, "step": 8857 }, { "epoch": 2.4791491743632803, "grad_norm": 0.25141787198038745, "learning_rate": 8.025492254505646e-06, "loss": 0.4401, "step": 8858 }, { "epoch": 2.479429051217464, "grad_norm": 0.24336124216872376, "learning_rate": 8.017112328482546e-06, "loss": 0.4411, "step": 8859 }, { "epoch": 2.4797089280716484, "grad_norm": 0.25966246947911736, "learning_rate": 8.008736398414185e-06, "loss": 0.4462, "step": 8860 }, { "epoch": 2.4799888049258327, "grad_norm": 0.2529278877262405, "learning_rate": 8.000364465097792e-06, "loss": 0.4425, "step": 8861 }, { "epoch": 2.480268681780017, "grad_norm": 0.2538931757905225, "learning_rate": 7.991996529330226e-06, "loss": 0.4541, "step": 8862 }, { "epoch": 2.480548558634201, "grad_norm": 0.25134289965651335, "learning_rate": 7.983632591907947e-06, "loss": 0.4533, "step": 8863 }, { "epoch": 2.480828435488385, "grad_norm": 0.24866939522478199, "learning_rate": 7.975272653627031e-06, "loss": 0.4388, "step": 8864 }, { "epoch": 2.4811083123425695, "grad_norm": 0.25517906403239415, "learning_rate": 7.96691671528319e-06, "loss": 0.4493, "step": 8865 }, { "epoch": 2.4813881891967533, "grad_norm": 0.2606694042685941, "learning_rate": 7.95856477767174e-06, "loss": 0.468, "step": 8866 }, { "epoch": 2.4816680660509376, "grad_norm": 0.2692510674753085, "learning_rate": 7.950216841587638e-06, "loss": 0.4843, "step": 8867 }, { "epoch": 2.481947942905122, "grad_norm": 0.2632445380264223, "learning_rate": 7.941872907825421e-06, "loss": 0.4639, "step": 8868 }, { "epoch": 2.4822278197593057, "grad_norm": 0.257729898588726, "learning_rate": 7.933532977179287e-06, "loss": 0.4548, "step": 8869 }, { "epoch": 2.48250769661349, "grad_norm": 0.24741173124978624, "learning_rate": 7.925197050443023e-06, "loss": 0.4403, "step": 8870 }, { "epoch": 2.4827875734676743, "grad_norm": 0.2729225867355483, "learning_rate": 7.916865128410056e-06, "loss": 0.4707, "step": 8871 }, { "epoch": 2.483067450321858, "grad_norm": 0.2538403651257615, "learning_rate": 7.908537211873423e-06, "loss": 0.4738, "step": 8872 }, { "epoch": 2.4833473271760425, "grad_norm": 0.25179095236947746, "learning_rate": 7.900213301625774e-06, "loss": 0.4427, "step": 8873 }, { "epoch": 2.4836272040302267, "grad_norm": 0.2511032981585161, "learning_rate": 7.891893398459393e-06, "loss": 0.4643, "step": 8874 }, { "epoch": 2.483907080884411, "grad_norm": 0.24316285899510243, "learning_rate": 7.883577503166162e-06, "loss": 0.4184, "step": 8875 }, { "epoch": 2.484186957738595, "grad_norm": 0.23892801848790826, "learning_rate": 7.875265616537603e-06, "loss": 0.4482, "step": 8876 }, { "epoch": 2.484466834592779, "grad_norm": 0.2586827045175894, "learning_rate": 7.866957739364838e-06, "loss": 0.4413, "step": 8877 }, { "epoch": 2.4847467114469635, "grad_norm": 0.24991802724800657, "learning_rate": 7.858653872438604e-06, "loss": 0.4474, "step": 8878 }, { "epoch": 2.4850265883011473, "grad_norm": 0.25370606353769065, "learning_rate": 7.850354016549299e-06, "loss": 0.4686, "step": 8879 }, { "epoch": 2.4853064651553316, "grad_norm": 0.2708748813220551, "learning_rate": 7.842058172486888e-06, "loss": 0.473, "step": 8880 }, { "epoch": 2.485586342009516, "grad_norm": 0.27692138171144687, "learning_rate": 7.833766341040977e-06, "loss": 0.4571, "step": 8881 }, { "epoch": 2.4858662188637, "grad_norm": 0.2793897596750318, "learning_rate": 7.825478523000784e-06, "loss": 0.4483, "step": 8882 }, { "epoch": 2.486146095717884, "grad_norm": 0.25167269286679217, "learning_rate": 7.817194719155157e-06, "loss": 0.4409, "step": 8883 }, { "epoch": 2.4864259725720683, "grad_norm": 0.25515552979710937, "learning_rate": 7.808914930292543e-06, "loss": 0.4396, "step": 8884 }, { "epoch": 2.4867058494262526, "grad_norm": 0.2551114566159558, "learning_rate": 7.800639157201034e-06, "loss": 0.4694, "step": 8885 }, { "epoch": 2.4869857262804365, "grad_norm": 0.26029356290704797, "learning_rate": 7.792367400668298e-06, "loss": 0.4627, "step": 8886 }, { "epoch": 2.4872656031346208, "grad_norm": 0.23504242524148974, "learning_rate": 7.784099661481659e-06, "loss": 0.4478, "step": 8887 }, { "epoch": 2.487545479988805, "grad_norm": 0.2512748274880192, "learning_rate": 7.775835940428045e-06, "loss": 0.4537, "step": 8888 }, { "epoch": 2.487825356842989, "grad_norm": 0.25171981143190264, "learning_rate": 7.767576238294e-06, "loss": 0.4581, "step": 8889 }, { "epoch": 2.488105233697173, "grad_norm": 0.24622432633520117, "learning_rate": 7.759320555865685e-06, "loss": 0.4513, "step": 8890 }, { "epoch": 2.4883851105513575, "grad_norm": 0.25616479919867263, "learning_rate": 7.75106889392888e-06, "loss": 0.4391, "step": 8891 }, { "epoch": 2.4886649874055413, "grad_norm": 0.2627191482362859, "learning_rate": 7.74282125326899e-06, "loss": 0.4676, "step": 8892 }, { "epoch": 2.4889448642597256, "grad_norm": 0.2510764610320255, "learning_rate": 7.734577634671026e-06, "loss": 0.4428, "step": 8893 }, { "epoch": 2.48922474111391, "grad_norm": 0.2563019188998492, "learning_rate": 7.726338038919617e-06, "loss": 0.4775, "step": 8894 }, { "epoch": 2.489504617968094, "grad_norm": 0.24968828858127015, "learning_rate": 7.718102466799016e-06, "loss": 0.436, "step": 8895 }, { "epoch": 2.489784494822278, "grad_norm": 0.24668294561710194, "learning_rate": 7.709870919093076e-06, "loss": 0.4428, "step": 8896 }, { "epoch": 2.4900643716764623, "grad_norm": 0.2642183359750441, "learning_rate": 7.701643396585307e-06, "loss": 0.4412, "step": 8897 }, { "epoch": 2.4903442485306466, "grad_norm": 0.25214599009723276, "learning_rate": 7.69341990005879e-06, "loss": 0.4476, "step": 8898 }, { "epoch": 2.490624125384831, "grad_norm": 0.25425374085772656, "learning_rate": 7.685200430296246e-06, "loss": 0.4562, "step": 8899 }, { "epoch": 2.4909040022390148, "grad_norm": 0.2585353507804545, "learning_rate": 7.676984988080016e-06, "loss": 0.459, "step": 8900 }, { "epoch": 2.491183879093199, "grad_norm": 0.2489273363132629, "learning_rate": 7.66877357419204e-06, "loss": 0.4469, "step": 8901 }, { "epoch": 2.4914637559473833, "grad_norm": 0.2540000855545819, "learning_rate": 7.660566189413892e-06, "loss": 0.4511, "step": 8902 }, { "epoch": 2.491743632801567, "grad_norm": 0.25985644349776127, "learning_rate": 7.652362834526766e-06, "loss": 0.449, "step": 8903 }, { "epoch": 2.4920235096557515, "grad_norm": 0.2562089695994192, "learning_rate": 7.64416351031143e-06, "loss": 0.4439, "step": 8904 }, { "epoch": 2.4923033865099358, "grad_norm": 0.25278132260236597, "learning_rate": 7.635968217548322e-06, "loss": 0.4659, "step": 8905 }, { "epoch": 2.4925832633641196, "grad_norm": 0.25888564877155157, "learning_rate": 7.627776957017474e-06, "loss": 0.4399, "step": 8906 }, { "epoch": 2.492863140218304, "grad_norm": 0.24493965083645694, "learning_rate": 7.619589729498527e-06, "loss": 0.4377, "step": 8907 }, { "epoch": 2.493143017072488, "grad_norm": 0.24969443975352626, "learning_rate": 7.6114065357707544e-06, "loss": 0.4644, "step": 8908 }, { "epoch": 2.493422893926672, "grad_norm": 0.2565209300903901, "learning_rate": 7.603227376613031e-06, "loss": 0.4552, "step": 8909 }, { "epoch": 2.4937027707808563, "grad_norm": 0.2452124420124257, "learning_rate": 7.595052252803858e-06, "loss": 0.4517, "step": 8910 }, { "epoch": 2.4939826476350406, "grad_norm": 0.255949702565928, "learning_rate": 7.5868811651213454e-06, "loss": 0.4557, "step": 8911 }, { "epoch": 2.494262524489225, "grad_norm": 0.259286446567972, "learning_rate": 7.578714114343227e-06, "loss": 0.4734, "step": 8912 }, { "epoch": 2.4945424013434088, "grad_norm": 0.2640374675122462, "learning_rate": 7.570551101246831e-06, "loss": 0.4644, "step": 8913 }, { "epoch": 2.494822278197593, "grad_norm": 0.25127260420103387, "learning_rate": 7.5623921266091355e-06, "loss": 0.4435, "step": 8914 }, { "epoch": 2.4951021550517773, "grad_norm": 0.25389631309855887, "learning_rate": 7.55423719120672e-06, "loss": 0.4564, "step": 8915 }, { "epoch": 2.495382031905961, "grad_norm": 0.24305461929854702, "learning_rate": 7.5460862958157575e-06, "loss": 0.44, "step": 8916 }, { "epoch": 2.4956619087601455, "grad_norm": 0.2551714121846865, "learning_rate": 7.537939441212072e-06, "loss": 0.4585, "step": 8917 }, { "epoch": 2.4959417856143298, "grad_norm": 0.2734545899115857, "learning_rate": 7.5297966281710705e-06, "loss": 0.4838, "step": 8918 }, { "epoch": 2.496221662468514, "grad_norm": 0.24950143295608296, "learning_rate": 7.521657857467801e-06, "loss": 0.4266, "step": 8919 }, { "epoch": 2.496501539322698, "grad_norm": 0.25819665620444043, "learning_rate": 7.513523129876915e-06, "loss": 0.4411, "step": 8920 }, { "epoch": 2.496781416176882, "grad_norm": 0.2598365440070756, "learning_rate": 7.505392446172688e-06, "loss": 0.4554, "step": 8921 }, { "epoch": 2.4970612930310665, "grad_norm": 0.23971359460619432, "learning_rate": 7.497265807128978e-06, "loss": 0.4393, "step": 8922 }, { "epoch": 2.4973411698852503, "grad_norm": 0.2568520714516961, "learning_rate": 7.489143213519301e-06, "loss": 0.4371, "step": 8923 }, { "epoch": 2.4976210467394346, "grad_norm": 0.25148870661721523, "learning_rate": 7.481024666116765e-06, "loss": 0.4459, "step": 8924 }, { "epoch": 2.497900923593619, "grad_norm": 0.2588802187966045, "learning_rate": 7.472910165694102e-06, "loss": 0.4425, "step": 8925 }, { "epoch": 2.4981808004478028, "grad_norm": 0.2519353375045778, "learning_rate": 7.464799713023657e-06, "loss": 0.4417, "step": 8926 }, { "epoch": 2.498460677301987, "grad_norm": 0.2550387065917063, "learning_rate": 7.456693308877377e-06, "loss": 0.4535, "step": 8927 }, { "epoch": 2.4987405541561714, "grad_norm": 0.25619742864510653, "learning_rate": 7.448590954026846e-06, "loss": 0.4384, "step": 8928 }, { "epoch": 2.499020431010355, "grad_norm": 0.2577727926770196, "learning_rate": 7.440492649243242e-06, "loss": 0.4519, "step": 8929 }, { "epoch": 2.4993003078645395, "grad_norm": 0.24415656171714847, "learning_rate": 7.432398395297357e-06, "loss": 0.4394, "step": 8930 }, { "epoch": 2.499580184718724, "grad_norm": 0.24783691948030032, "learning_rate": 7.4243081929596336e-06, "loss": 0.4396, "step": 8931 }, { "epoch": 2.499860061572908, "grad_norm": 0.2509008364025493, "learning_rate": 7.416222043000082e-06, "loss": 0.4505, "step": 8932 }, { "epoch": 2.500139938427092, "grad_norm": 0.25113708543384494, "learning_rate": 7.408139946188358e-06, "loss": 0.4582, "step": 8933 }, { "epoch": 2.500419815281276, "grad_norm": 0.25948635498933226, "learning_rate": 7.400061903293709e-06, "loss": 0.4673, "step": 8934 }, { "epoch": 2.5006996921354605, "grad_norm": 0.24315429208856337, "learning_rate": 7.391987915085013e-06, "loss": 0.4348, "step": 8935 }, { "epoch": 2.500979568989645, "grad_norm": 0.26204781206030575, "learning_rate": 7.383917982330757e-06, "loss": 0.434, "step": 8936 }, { "epoch": 2.5012594458438286, "grad_norm": 0.2588636451443479, "learning_rate": 7.375852105799036e-06, "loss": 0.4425, "step": 8937 }, { "epoch": 2.501539322698013, "grad_norm": 0.24993444792058758, "learning_rate": 7.367790286257576e-06, "loss": 0.4478, "step": 8938 }, { "epoch": 2.5018191995521972, "grad_norm": 0.25553144415491774, "learning_rate": 7.3597325244737056e-06, "loss": 0.472, "step": 8939 }, { "epoch": 2.502099076406381, "grad_norm": 0.25597088030086723, "learning_rate": 7.351678821214353e-06, "loss": 0.4211, "step": 8940 }, { "epoch": 2.5023789532605654, "grad_norm": 0.2630377552359938, "learning_rate": 7.343629177246081e-06, "loss": 0.453, "step": 8941 }, { "epoch": 2.5026588301147497, "grad_norm": 0.25873977474103477, "learning_rate": 7.335583593335055e-06, "loss": 0.4524, "step": 8942 }, { "epoch": 2.5029387069689335, "grad_norm": 0.251957955374892, "learning_rate": 7.327542070247068e-06, "loss": 0.4671, "step": 8943 }, { "epoch": 2.503218583823118, "grad_norm": 0.2432185898006891, "learning_rate": 7.319504608747507e-06, "loss": 0.4365, "step": 8944 }, { "epoch": 2.503498460677302, "grad_norm": 0.24733585421860688, "learning_rate": 7.3114712096013894e-06, "loss": 0.465, "step": 8945 }, { "epoch": 2.503778337531486, "grad_norm": 0.2489244920393296, "learning_rate": 7.303441873573335e-06, "loss": 0.4554, "step": 8946 }, { "epoch": 2.5040582143856702, "grad_norm": 0.2621419028480469, "learning_rate": 7.295416601427585e-06, "loss": 0.4565, "step": 8947 }, { "epoch": 2.5043380912398545, "grad_norm": 0.25392129715622236, "learning_rate": 7.287395393927971e-06, "loss": 0.4525, "step": 8948 }, { "epoch": 2.5046179680940384, "grad_norm": 0.2597443364769656, "learning_rate": 7.279378251837982e-06, "loss": 0.4546, "step": 8949 }, { "epoch": 2.5048978449482227, "grad_norm": 0.25569931522305633, "learning_rate": 7.271365175920686e-06, "loss": 0.4467, "step": 8950 }, { "epoch": 2.505177721802407, "grad_norm": 0.2483428630146001, "learning_rate": 7.263356166938762e-06, "loss": 0.448, "step": 8951 }, { "epoch": 2.5054575986565912, "grad_norm": 0.24620526913419488, "learning_rate": 7.255351225654527e-06, "loss": 0.4504, "step": 8952 }, { "epoch": 2.5057374755107755, "grad_norm": 0.24532576029083908, "learning_rate": 7.2473503528298794e-06, "loss": 0.4471, "step": 8953 }, { "epoch": 2.5060173523649594, "grad_norm": 0.24277139236472012, "learning_rate": 7.239353549226363e-06, "loss": 0.4472, "step": 8954 }, { "epoch": 2.5062972292191437, "grad_norm": 0.2507319047245604, "learning_rate": 7.231360815605104e-06, "loss": 0.4522, "step": 8955 }, { "epoch": 2.506577106073328, "grad_norm": 0.24821010930428444, "learning_rate": 7.223372152726865e-06, "loss": 0.4553, "step": 8956 }, { "epoch": 2.506856982927512, "grad_norm": 0.2615670280351122, "learning_rate": 7.215387561352016e-06, "loss": 0.4612, "step": 8957 }, { "epoch": 2.507136859781696, "grad_norm": 0.2600748753959852, "learning_rate": 7.207407042240516e-06, "loss": 0.445, "step": 8958 }, { "epoch": 2.5074167366358804, "grad_norm": 0.25896581280794956, "learning_rate": 7.199430596151968e-06, "loss": 0.456, "step": 8959 }, { "epoch": 2.5076966134900642, "grad_norm": 0.2558477947203808, "learning_rate": 7.19145822384557e-06, "loss": 0.4573, "step": 8960 }, { "epoch": 2.5079764903442485, "grad_norm": 0.2502157967721201, "learning_rate": 7.1834899260801395e-06, "loss": 0.4478, "step": 8961 }, { "epoch": 2.508256367198433, "grad_norm": 0.25875228888298124, "learning_rate": 7.1755257036141e-06, "loss": 0.454, "step": 8962 }, { "epoch": 2.5085362440526167, "grad_norm": 0.25368430084325355, "learning_rate": 7.167565557205497e-06, "loss": 0.4575, "step": 8963 }, { "epoch": 2.508816120906801, "grad_norm": 0.2564756941022119, "learning_rate": 7.159609487611979e-06, "loss": 0.467, "step": 8964 }, { "epoch": 2.5090959977609852, "grad_norm": 0.24892436207172702, "learning_rate": 7.151657495590802e-06, "loss": 0.4814, "step": 8965 }, { "epoch": 2.509375874615169, "grad_norm": 0.26435105508762835, "learning_rate": 7.14370958189885e-06, "loss": 0.4582, "step": 8966 }, { "epoch": 2.5096557514693534, "grad_norm": 0.2517061912480958, "learning_rate": 7.135765747292617e-06, "loss": 0.4379, "step": 8967 }, { "epoch": 2.5099356283235377, "grad_norm": 0.2589595047484895, "learning_rate": 7.127825992528187e-06, "loss": 0.4438, "step": 8968 }, { "epoch": 2.510215505177722, "grad_norm": 0.26025832593462245, "learning_rate": 7.119890318361277e-06, "loss": 0.4434, "step": 8969 }, { "epoch": 2.510495382031906, "grad_norm": 0.2558621471436027, "learning_rate": 7.111958725547208e-06, "loss": 0.4391, "step": 8970 }, { "epoch": 2.51077525888609, "grad_norm": 0.2659495080127039, "learning_rate": 7.104031214840906e-06, "loss": 0.4462, "step": 8971 }, { "epoch": 2.5110551357402744, "grad_norm": 0.2503816090494375, "learning_rate": 7.0961077869969305e-06, "loss": 0.4292, "step": 8972 }, { "epoch": 2.5113350125944587, "grad_norm": 0.2534700631485543, "learning_rate": 7.088188442769428e-06, "loss": 0.4501, "step": 8973 }, { "epoch": 2.5116148894486425, "grad_norm": 0.25580737668576414, "learning_rate": 7.0802731829121724e-06, "loss": 0.4741, "step": 8974 }, { "epoch": 2.511894766302827, "grad_norm": 0.2628449581556826, "learning_rate": 7.072362008178546e-06, "loss": 0.4694, "step": 8975 }, { "epoch": 2.512174643157011, "grad_norm": 0.24390541906259172, "learning_rate": 7.064454919321517e-06, "loss": 0.4471, "step": 8976 }, { "epoch": 2.512454520011195, "grad_norm": 0.2504828534106917, "learning_rate": 7.056551917093707e-06, "loss": 0.4565, "step": 8977 }, { "epoch": 2.5127343968653792, "grad_norm": 0.24321842590206041, "learning_rate": 7.048653002247319e-06, "loss": 0.4345, "step": 8978 }, { "epoch": 2.5130142737195635, "grad_norm": 0.26925868771948464, "learning_rate": 7.040758175534179e-06, "loss": 0.452, "step": 8979 }, { "epoch": 2.5132941505737474, "grad_norm": 0.25258720681110736, "learning_rate": 7.03286743770572e-06, "loss": 0.4676, "step": 8980 }, { "epoch": 2.5135740274279317, "grad_norm": 0.2515970074169978, "learning_rate": 7.024980789512991e-06, "loss": 0.4414, "step": 8981 }, { "epoch": 2.513853904282116, "grad_norm": 0.2513935049927517, "learning_rate": 7.0170982317066315e-06, "loss": 0.4375, "step": 8982 }, { "epoch": 2.5141337811363, "grad_norm": 0.25409520958708476, "learning_rate": 7.009219765036934e-06, "loss": 0.4478, "step": 8983 }, { "epoch": 2.514413657990484, "grad_norm": 0.27062936308402946, "learning_rate": 7.001345390253761e-06, "loss": 0.4617, "step": 8984 }, { "epoch": 2.5146935348446684, "grad_norm": 0.25502899641806187, "learning_rate": 6.993475108106601e-06, "loss": 0.4565, "step": 8985 }, { "epoch": 2.5149734116988522, "grad_norm": 0.25329809166927825, "learning_rate": 6.98560891934455e-06, "loss": 0.4476, "step": 8986 }, { "epoch": 2.5152532885530365, "grad_norm": 0.2646511731091114, "learning_rate": 6.977746824716319e-06, "loss": 0.4733, "step": 8987 }, { "epoch": 2.515533165407221, "grad_norm": 0.2586772269443656, "learning_rate": 6.969888824970228e-06, "loss": 0.4556, "step": 8988 }, { "epoch": 2.515813042261405, "grad_norm": 0.25663256481890245, "learning_rate": 6.962034920854199e-06, "loss": 0.4562, "step": 8989 }, { "epoch": 2.5160929191155894, "grad_norm": 0.2451363489483608, "learning_rate": 6.95418511311578e-06, "loss": 0.4521, "step": 8990 }, { "epoch": 2.5163727959697733, "grad_norm": 0.25364956233507796, "learning_rate": 6.946339402502116e-06, "loss": 0.462, "step": 8991 }, { "epoch": 2.5166526728239575, "grad_norm": 0.24462490587022592, "learning_rate": 6.938497789759968e-06, "loss": 0.4572, "step": 8992 }, { "epoch": 2.516932549678142, "grad_norm": 0.2639739919137141, "learning_rate": 6.930660275635709e-06, "loss": 0.4592, "step": 8993 }, { "epoch": 2.5172124265323257, "grad_norm": 0.25208749476232456, "learning_rate": 6.922826860875303e-06, "loss": 0.4645, "step": 8994 }, { "epoch": 2.51749230338651, "grad_norm": 0.24924361587793836, "learning_rate": 6.91499754622435e-06, "loss": 0.4265, "step": 8995 }, { "epoch": 2.5177721802406943, "grad_norm": 0.248666852870826, "learning_rate": 6.907172332428041e-06, "loss": 0.464, "step": 8996 }, { "epoch": 2.518052057094878, "grad_norm": 0.2530985619314788, "learning_rate": 6.899351220231193e-06, "loss": 0.4464, "step": 8997 }, { "epoch": 2.5183319339490624, "grad_norm": 0.25352600897579103, "learning_rate": 6.89153421037822e-06, "loss": 0.4524, "step": 8998 }, { "epoch": 2.5186118108032467, "grad_norm": 0.25109304395332893, "learning_rate": 6.883721303613139e-06, "loss": 0.4395, "step": 8999 }, { "epoch": 2.5188916876574305, "grad_norm": 0.2581392575479511, "learning_rate": 6.875912500679605e-06, "loss": 0.4562, "step": 9000 }, { "epoch": 2.519171564511615, "grad_norm": 0.26440397696851237, "learning_rate": 6.868107802320861e-06, "loss": 0.4523, "step": 9001 }, { "epoch": 2.519451441365799, "grad_norm": 0.25347825803240537, "learning_rate": 6.860307209279759e-06, "loss": 0.4585, "step": 9002 }, { "epoch": 2.519731318219983, "grad_norm": 0.24683638479026232, "learning_rate": 6.852510722298761e-06, "loss": 0.4479, "step": 9003 }, { "epoch": 2.5200111950741673, "grad_norm": 0.24739050528434142, "learning_rate": 6.844718342119944e-06, "loss": 0.4384, "step": 9004 }, { "epoch": 2.5202910719283516, "grad_norm": 0.2691543647274035, "learning_rate": 6.836930069484987e-06, "loss": 0.4619, "step": 9005 }, { "epoch": 2.520570948782536, "grad_norm": 0.25099324436026205, "learning_rate": 6.829145905135187e-06, "loss": 0.4429, "step": 9006 }, { "epoch": 2.5208508256367197, "grad_norm": 0.24724463802634064, "learning_rate": 6.821365849811445e-06, "loss": 0.4272, "step": 9007 }, { "epoch": 2.521130702490904, "grad_norm": 0.2572366796115523, "learning_rate": 6.813589904254269e-06, "loss": 0.4924, "step": 9008 }, { "epoch": 2.5214105793450883, "grad_norm": 0.25816372621467104, "learning_rate": 6.805818069203779e-06, "loss": 0.4453, "step": 9009 }, { "epoch": 2.5216904561992726, "grad_norm": 0.25110778666278544, "learning_rate": 6.798050345399704e-06, "loss": 0.4646, "step": 9010 }, { "epoch": 2.5219703330534564, "grad_norm": 0.2580243655826121, "learning_rate": 6.790286733581386e-06, "loss": 0.4595, "step": 9011 }, { "epoch": 2.5222502099076407, "grad_norm": 0.2579702369872616, "learning_rate": 6.782527234487751e-06, "loss": 0.4497, "step": 9012 }, { "epoch": 2.522530086761825, "grad_norm": 0.2551010773248085, "learning_rate": 6.7747718488573645e-06, "loss": 0.4612, "step": 9013 }, { "epoch": 2.522809963616009, "grad_norm": 0.25772777460612917, "learning_rate": 6.7670205774283905e-06, "loss": 0.4461, "step": 9014 }, { "epoch": 2.523089840470193, "grad_norm": 0.26076216237241695, "learning_rate": 6.759273420938594e-06, "loss": 0.4769, "step": 9015 }, { "epoch": 2.5233697173243774, "grad_norm": 0.2502091012128725, "learning_rate": 6.751530380125353e-06, "loss": 0.4271, "step": 9016 }, { "epoch": 2.5236495941785613, "grad_norm": 0.26811058278756883, "learning_rate": 6.743791455725651e-06, "loss": 0.4563, "step": 9017 }, { "epoch": 2.5239294710327456, "grad_norm": 0.2586330387799208, "learning_rate": 6.736056648476102e-06, "loss": 0.4579, "step": 9018 }, { "epoch": 2.52420934788693, "grad_norm": 0.2645844473897462, "learning_rate": 6.728325959112891e-06, "loss": 0.4703, "step": 9019 }, { "epoch": 2.5244892247411137, "grad_norm": 0.26234738979855504, "learning_rate": 6.72059938837184e-06, "loss": 0.4759, "step": 9020 }, { "epoch": 2.524769101595298, "grad_norm": 0.2531272337988228, "learning_rate": 6.712876936988366e-06, "loss": 0.4573, "step": 9021 }, { "epoch": 2.5250489784494823, "grad_norm": 0.25923149226462266, "learning_rate": 6.705158605697487e-06, "loss": 0.4449, "step": 9022 }, { "epoch": 2.525328855303666, "grad_norm": 0.2527412087067922, "learning_rate": 6.697444395233849e-06, "loss": 0.4411, "step": 9023 }, { "epoch": 2.5256087321578504, "grad_norm": 0.24458877134965698, "learning_rate": 6.689734306331691e-06, "loss": 0.4696, "step": 9024 }, { "epoch": 2.5258886090120347, "grad_norm": 0.28629513349931024, "learning_rate": 6.682028339724866e-06, "loss": 0.457, "step": 9025 }, { "epoch": 2.526168485866219, "grad_norm": 0.24219808104391424, "learning_rate": 6.6743264961468266e-06, "loss": 0.4486, "step": 9026 }, { "epoch": 2.5264483627204033, "grad_norm": 0.24907028126722813, "learning_rate": 6.666628776330641e-06, "loss": 0.4515, "step": 9027 }, { "epoch": 2.526728239574587, "grad_norm": 0.2428588276750298, "learning_rate": 6.658935181008985e-06, "loss": 0.4444, "step": 9028 }, { "epoch": 2.5270081164287714, "grad_norm": 0.2598518384155076, "learning_rate": 6.65124571091415e-06, "loss": 0.4639, "step": 9029 }, { "epoch": 2.5272879932829557, "grad_norm": 0.26021567833303383, "learning_rate": 6.643560366777995e-06, "loss": 0.4608, "step": 9030 }, { "epoch": 2.5275678701371396, "grad_norm": 0.26185111067548805, "learning_rate": 6.635879149332036e-06, "loss": 0.4412, "step": 9031 }, { "epoch": 2.527847746991324, "grad_norm": 0.2627699129277269, "learning_rate": 6.62820205930737e-06, "loss": 0.4632, "step": 9032 }, { "epoch": 2.528127623845508, "grad_norm": 0.2608584967089512, "learning_rate": 6.620529097434714e-06, "loss": 0.4631, "step": 9033 }, { "epoch": 2.528407500699692, "grad_norm": 0.251428636092459, "learning_rate": 6.612860264444359e-06, "loss": 0.4492, "step": 9034 }, { "epoch": 2.5286873775538763, "grad_norm": 0.24945361347811598, "learning_rate": 6.605195561066269e-06, "loss": 0.4562, "step": 9035 }, { "epoch": 2.5289672544080606, "grad_norm": 0.24857318034272893, "learning_rate": 6.597534988029946e-06, "loss": 0.4576, "step": 9036 }, { "epoch": 2.5292471312622444, "grad_norm": 0.25722373000155074, "learning_rate": 6.589878546064543e-06, "loss": 0.4617, "step": 9037 }, { "epoch": 2.5295270081164287, "grad_norm": 0.25686729673985775, "learning_rate": 6.5822262358987916e-06, "loss": 0.4741, "step": 9038 }, { "epoch": 2.529806884970613, "grad_norm": 0.2560864645379675, "learning_rate": 6.574578058261055e-06, "loss": 0.4418, "step": 9039 }, { "epoch": 2.530086761824797, "grad_norm": 0.256450121733098, "learning_rate": 6.566934013879283e-06, "loss": 0.4464, "step": 9040 }, { "epoch": 2.530366638678981, "grad_norm": 0.24927342458415117, "learning_rate": 6.55929410348104e-06, "loss": 0.4594, "step": 9041 }, { "epoch": 2.5306465155331654, "grad_norm": 0.24086033624755418, "learning_rate": 6.551658327793503e-06, "loss": 0.4611, "step": 9042 }, { "epoch": 2.5309263923873497, "grad_norm": 0.23626263901940917, "learning_rate": 6.544026687543442e-06, "loss": 0.4513, "step": 9043 }, { "epoch": 2.5312062692415336, "grad_norm": 0.25190955097541173, "learning_rate": 6.536399183457248e-06, "loss": 0.4551, "step": 9044 }, { "epoch": 2.531486146095718, "grad_norm": 0.41540484331269534, "learning_rate": 6.528775816260901e-06, "loss": 0.4493, "step": 9045 }, { "epoch": 2.531766022949902, "grad_norm": 0.24828989706009602, "learning_rate": 6.521156586680011e-06, "loss": 0.445, "step": 9046 }, { "epoch": 2.5320458998040865, "grad_norm": 0.2537136199076485, "learning_rate": 6.5135414954397785e-06, "loss": 0.463, "step": 9047 }, { "epoch": 2.5323257766582703, "grad_norm": 0.2461613470711474, "learning_rate": 6.505930543264999e-06, "loss": 0.4803, "step": 9048 }, { "epoch": 2.5326056535124546, "grad_norm": 0.24533467246800178, "learning_rate": 6.498323730880096e-06, "loss": 0.4678, "step": 9049 }, { "epoch": 2.532885530366639, "grad_norm": 0.25797351546042757, "learning_rate": 6.490721059009086e-06, "loss": 0.4339, "step": 9050 }, { "epoch": 2.5331654072208227, "grad_norm": 0.25157856160358383, "learning_rate": 6.483122528375591e-06, "loss": 0.4545, "step": 9051 }, { "epoch": 2.533445284075007, "grad_norm": 0.2465713257227587, "learning_rate": 6.4755281397028576e-06, "loss": 0.455, "step": 9052 }, { "epoch": 2.5337251609291913, "grad_norm": 0.25310491688299097, "learning_rate": 6.4679378937137215e-06, "loss": 0.4426, "step": 9053 }, { "epoch": 2.534005037783375, "grad_norm": 0.25791684141482535, "learning_rate": 6.46035179113062e-06, "loss": 0.4408, "step": 9054 }, { "epoch": 2.5342849146375594, "grad_norm": 0.2572030751913601, "learning_rate": 6.452769832675609e-06, "loss": 0.4476, "step": 9055 }, { "epoch": 2.5345647914917437, "grad_norm": 0.266356508072427, "learning_rate": 6.445192019070334e-06, "loss": 0.4558, "step": 9056 }, { "epoch": 2.5348446683459276, "grad_norm": 0.257384068028928, "learning_rate": 6.437618351036068e-06, "loss": 0.4591, "step": 9057 }, { "epoch": 2.535124545200112, "grad_norm": 0.2546444328612967, "learning_rate": 6.430048829293667e-06, "loss": 0.4528, "step": 9058 }, { "epoch": 2.535404422054296, "grad_norm": 0.26424748863084735, "learning_rate": 6.422483454563605e-06, "loss": 0.4522, "step": 9059 }, { "epoch": 2.53568429890848, "grad_norm": 0.25642044074867987, "learning_rate": 6.414922227565962e-06, "loss": 0.4463, "step": 9060 }, { "epoch": 2.5359641757626643, "grad_norm": 0.2410920603981759, "learning_rate": 6.407365149020411e-06, "loss": 0.4479, "step": 9061 }, { "epoch": 2.5362440526168486, "grad_norm": 0.25678693014949794, "learning_rate": 6.399812219646251e-06, "loss": 0.4389, "step": 9062 }, { "epoch": 2.536523929471033, "grad_norm": 0.2709547706867928, "learning_rate": 6.392263440162366e-06, "loss": 0.4538, "step": 9063 }, { "epoch": 2.536803806325217, "grad_norm": 0.25724482144428323, "learning_rate": 6.384718811287255e-06, "loss": 0.4581, "step": 9064 }, { "epoch": 2.537083683179401, "grad_norm": 0.2630105013815421, "learning_rate": 6.37717833373902e-06, "loss": 0.4371, "step": 9065 }, { "epoch": 2.5373635600335853, "grad_norm": 0.2642686115741411, "learning_rate": 6.369642008235377e-06, "loss": 0.4673, "step": 9066 }, { "epoch": 2.5376434368877696, "grad_norm": 0.25008826463627004, "learning_rate": 6.3621098354936235e-06, "loss": 0.4477, "step": 9067 }, { "epoch": 2.5379233137419535, "grad_norm": 0.24809789646075484, "learning_rate": 6.354581816230676e-06, "loss": 0.4407, "step": 9068 }, { "epoch": 2.5382031905961377, "grad_norm": 0.25287845157192435, "learning_rate": 6.347057951163049e-06, "loss": 0.4293, "step": 9069 }, { "epoch": 2.538483067450322, "grad_norm": 0.25872036606644555, "learning_rate": 6.3395382410068925e-06, "loss": 0.4389, "step": 9070 }, { "epoch": 2.538762944304506, "grad_norm": 0.24919407583183825, "learning_rate": 6.332022686477928e-06, "loss": 0.4447, "step": 9071 }, { "epoch": 2.53904282115869, "grad_norm": 0.24639849359031837, "learning_rate": 6.324511288291479e-06, "loss": 0.4392, "step": 9072 }, { "epoch": 2.5393226980128745, "grad_norm": 0.24467253580160808, "learning_rate": 6.3170040471625005e-06, "loss": 0.4274, "step": 9073 }, { "epoch": 2.5396025748670583, "grad_norm": 0.2514500342020152, "learning_rate": 6.30950096380552e-06, "loss": 0.4507, "step": 9074 }, { "epoch": 2.5398824517212426, "grad_norm": 0.24603248756069154, "learning_rate": 6.302002038934696e-06, "loss": 0.4429, "step": 9075 }, { "epoch": 2.540162328575427, "grad_norm": 0.2629686044634512, "learning_rate": 6.294507273263772e-06, "loss": 0.4683, "step": 9076 }, { "epoch": 2.5404422054296107, "grad_norm": 0.25872850504593214, "learning_rate": 6.287016667506113e-06, "loss": 0.4483, "step": 9077 }, { "epoch": 2.540722082283795, "grad_norm": 0.2480412433276257, "learning_rate": 6.279530222374674e-06, "loss": 0.4386, "step": 9078 }, { "epoch": 2.5410019591379793, "grad_norm": 0.2531260430664589, "learning_rate": 6.27204793858202e-06, "loss": 0.461, "step": 9079 }, { "epoch": 2.541281835992163, "grad_norm": 0.25795413308401616, "learning_rate": 6.264569816840321e-06, "loss": 0.468, "step": 9080 }, { "epoch": 2.5415617128463475, "grad_norm": 0.24996489529895616, "learning_rate": 6.257095857861345e-06, "loss": 0.4521, "step": 9081 }, { "epoch": 2.5418415897005318, "grad_norm": 0.2552172480352286, "learning_rate": 6.249626062356467e-06, "loss": 0.4369, "step": 9082 }, { "epoch": 2.542121466554716, "grad_norm": 0.2561180333740874, "learning_rate": 6.24216043103667e-06, "loss": 0.4309, "step": 9083 }, { "epoch": 2.5424013434089003, "grad_norm": 0.2620053313294719, "learning_rate": 6.234698964612545e-06, "loss": 0.4716, "step": 9084 }, { "epoch": 2.542681220263084, "grad_norm": 0.24833583400042106, "learning_rate": 6.227241663794259e-06, "loss": 0.4415, "step": 9085 }, { "epoch": 2.5429610971172685, "grad_norm": 0.24157989185972797, "learning_rate": 6.219788529291603e-06, "loss": 0.4447, "step": 9086 }, { "epoch": 2.5432409739714528, "grad_norm": 0.24469414025311992, "learning_rate": 6.21233956181399e-06, "loss": 0.4455, "step": 9087 }, { "epoch": 2.5435208508256366, "grad_norm": 0.25049764277045034, "learning_rate": 6.204894762070407e-06, "loss": 0.4402, "step": 9088 }, { "epoch": 2.543800727679821, "grad_norm": 0.2486790610579519, "learning_rate": 6.197454130769448e-06, "loss": 0.4447, "step": 9089 }, { "epoch": 2.544080604534005, "grad_norm": 0.24619858352040716, "learning_rate": 6.19001766861933e-06, "loss": 0.4435, "step": 9090 }, { "epoch": 2.544360481388189, "grad_norm": 0.2558167745797563, "learning_rate": 6.1825853763278485e-06, "loss": 0.4402, "step": 9091 }, { "epoch": 2.5446403582423733, "grad_norm": 0.2353500169975788, "learning_rate": 6.175157254602415e-06, "loss": 0.4262, "step": 9092 }, { "epoch": 2.5449202350965576, "grad_norm": 0.25616219547313984, "learning_rate": 6.167733304150048e-06, "loss": 0.4783, "step": 9093 }, { "epoch": 2.5452001119507415, "grad_norm": 0.2621886960057489, "learning_rate": 6.160313525677353e-06, "loss": 0.4453, "step": 9094 }, { "epoch": 2.5454799888049258, "grad_norm": 0.252862558908742, "learning_rate": 6.15289791989056e-06, "loss": 0.4584, "step": 9095 }, { "epoch": 2.54575986565911, "grad_norm": 0.25631498872333514, "learning_rate": 6.1454864874954834e-06, "loss": 0.4514, "step": 9096 }, { "epoch": 2.546039742513294, "grad_norm": 0.24840661224398433, "learning_rate": 6.138079229197552e-06, "loss": 0.4478, "step": 9097 }, { "epoch": 2.546319619367478, "grad_norm": 0.24389373928599684, "learning_rate": 6.130676145701786e-06, "loss": 0.4381, "step": 9098 }, { "epoch": 2.5465994962216625, "grad_norm": 0.257340560366041, "learning_rate": 6.123277237712821e-06, "loss": 0.456, "step": 9099 }, { "epoch": 2.5468793730758468, "grad_norm": 0.2562883816613838, "learning_rate": 6.115882505934889e-06, "loss": 0.4354, "step": 9100 }, { "epoch": 2.5471592499300306, "grad_norm": 0.25208727499306416, "learning_rate": 6.108491951071821e-06, "loss": 0.4423, "step": 9101 }, { "epoch": 2.547439126784215, "grad_norm": 0.2568919940162497, "learning_rate": 6.101105573827065e-06, "loss": 0.4696, "step": 9102 }, { "epoch": 2.547719003638399, "grad_norm": 0.2568095046552021, "learning_rate": 6.093723374903632e-06, "loss": 0.4231, "step": 9103 }, { "epoch": 2.5479988804925835, "grad_norm": 0.2482858904137207, "learning_rate": 6.086345355004197e-06, "loss": 0.4422, "step": 9104 }, { "epoch": 2.5482787573467673, "grad_norm": 0.25164730562085175, "learning_rate": 6.078971514830989e-06, "loss": 0.4762, "step": 9105 }, { "epoch": 2.5485586342009516, "grad_norm": 0.25451172396853644, "learning_rate": 6.071601855085857e-06, "loss": 0.4449, "step": 9106 }, { "epoch": 2.548838511055136, "grad_norm": 0.24662730540699523, "learning_rate": 6.064236376470245e-06, "loss": 0.4433, "step": 9107 }, { "epoch": 2.5491183879093198, "grad_norm": 0.26930731644611416, "learning_rate": 6.056875079685209e-06, "loss": 0.4539, "step": 9108 }, { "epoch": 2.549398264763504, "grad_norm": 0.24951810190856688, "learning_rate": 6.049517965431401e-06, "loss": 0.4364, "step": 9109 }, { "epoch": 2.5496781416176884, "grad_norm": 0.2588401379405034, "learning_rate": 6.04216503440907e-06, "loss": 0.4617, "step": 9110 }, { "epoch": 2.549958018471872, "grad_norm": 0.2481260845789059, "learning_rate": 6.034816287318073e-06, "loss": 0.4413, "step": 9111 }, { "epoch": 2.5502378953260565, "grad_norm": 0.24731364381426707, "learning_rate": 6.027471724857875e-06, "loss": 0.4459, "step": 9112 }, { "epoch": 2.550517772180241, "grad_norm": 0.258436832246282, "learning_rate": 6.0201313477275256e-06, "loss": 0.4552, "step": 9113 }, { "epoch": 2.5507976490344246, "grad_norm": 0.23769209221220836, "learning_rate": 6.012795156625695e-06, "loss": 0.4323, "step": 9114 }, { "epoch": 2.551077525888609, "grad_norm": 0.2583512286228744, "learning_rate": 6.005463152250645e-06, "loss": 0.4468, "step": 9115 }, { "epoch": 2.551357402742793, "grad_norm": 0.25152119278179613, "learning_rate": 5.998135335300231e-06, "loss": 0.4318, "step": 9116 }, { "epoch": 2.551637279596977, "grad_norm": 0.250313446831586, "learning_rate": 5.990811706471927e-06, "loss": 0.4546, "step": 9117 }, { "epoch": 2.5519171564511614, "grad_norm": 0.2534164940441637, "learning_rate": 5.983492266462798e-06, "loss": 0.4237, "step": 9118 }, { "epoch": 2.5521970333053456, "grad_norm": 0.2558220303793233, "learning_rate": 5.976177015969514e-06, "loss": 0.4629, "step": 9119 }, { "epoch": 2.55247691015953, "grad_norm": 0.2676523221666284, "learning_rate": 5.968865955688352e-06, "loss": 0.4678, "step": 9120 }, { "epoch": 2.5527567870137142, "grad_norm": 0.26340062183579227, "learning_rate": 5.9615590863151495e-06, "loss": 0.482, "step": 9121 }, { "epoch": 2.553036663867898, "grad_norm": 0.2613714215157316, "learning_rate": 5.9542564085454165e-06, "loss": 0.4677, "step": 9122 }, { "epoch": 2.5533165407220824, "grad_norm": 0.25983017898091737, "learning_rate": 5.946957923074209e-06, "loss": 0.4613, "step": 9123 }, { "epoch": 2.5535964175762667, "grad_norm": 0.24451686698187075, "learning_rate": 5.939663630596209e-06, "loss": 0.4591, "step": 9124 }, { "epoch": 2.5538762944304505, "grad_norm": 0.24427666017394079, "learning_rate": 5.932373531805685e-06, "loss": 0.4518, "step": 9125 }, { "epoch": 2.554156171284635, "grad_norm": 0.2456198460591965, "learning_rate": 5.925087627396508e-06, "loss": 0.4607, "step": 9126 }, { "epoch": 2.554436048138819, "grad_norm": 0.25358946251252196, "learning_rate": 5.917805918062169e-06, "loss": 0.4435, "step": 9127 }, { "epoch": 2.554715924993003, "grad_norm": 0.2478587311658339, "learning_rate": 5.91052840449573e-06, "loss": 0.4436, "step": 9128 }, { "epoch": 2.554995801847187, "grad_norm": 0.2526430282536233, "learning_rate": 5.903255087389881e-06, "loss": 0.4382, "step": 9129 }, { "epoch": 2.5552756787013715, "grad_norm": 0.2599604379789864, "learning_rate": 5.89598596743689e-06, "loss": 0.4713, "step": 9130 }, { "epoch": 2.5555555555555554, "grad_norm": 0.26059258912979244, "learning_rate": 5.888721045328644e-06, "loss": 0.4607, "step": 9131 }, { "epoch": 2.5558354324097396, "grad_norm": 0.256164164169617, "learning_rate": 5.8814603217566155e-06, "loss": 0.4543, "step": 9132 }, { "epoch": 2.556115309263924, "grad_norm": 0.2495455717137147, "learning_rate": 5.874203797411887e-06, "loss": 0.4571, "step": 9133 }, { "epoch": 2.556395186118108, "grad_norm": 0.25740927083003284, "learning_rate": 5.866951472985143e-06, "loss": 0.4425, "step": 9134 }, { "epoch": 2.556675062972292, "grad_norm": 0.2542573316512893, "learning_rate": 5.859703349166662e-06, "loss": 0.4586, "step": 9135 }, { "epoch": 2.5569549398264764, "grad_norm": 0.262186312086353, "learning_rate": 5.852459426646317e-06, "loss": 0.4867, "step": 9136 }, { "epoch": 2.5572348166806607, "grad_norm": 0.26371603263245813, "learning_rate": 5.845219706113597e-06, "loss": 0.4626, "step": 9137 }, { "epoch": 2.5575146935348445, "grad_norm": 0.2509764711358416, "learning_rate": 5.837984188257583e-06, "loss": 0.4447, "step": 9138 }, { "epoch": 2.557794570389029, "grad_norm": 0.2579467960049227, "learning_rate": 5.830752873766948e-06, "loss": 0.4457, "step": 9139 }, { "epoch": 2.558074447243213, "grad_norm": 0.25286430913608743, "learning_rate": 5.823525763329979e-06, "loss": 0.4438, "step": 9140 }, { "epoch": 2.5583543240973974, "grad_norm": 0.263430597262002, "learning_rate": 5.816302857634553e-06, "loss": 0.4566, "step": 9141 }, { "epoch": 2.5586342009515812, "grad_norm": 0.25998784760305815, "learning_rate": 5.809084157368155e-06, "loss": 0.4824, "step": 9142 }, { "epoch": 2.5589140778057655, "grad_norm": 0.25757352165151176, "learning_rate": 5.801869663217857e-06, "loss": 0.4561, "step": 9143 }, { "epoch": 2.55919395465995, "grad_norm": 0.2611364733536436, "learning_rate": 5.794659375870348e-06, "loss": 0.4598, "step": 9144 }, { "epoch": 2.5594738315141337, "grad_norm": 0.24673739305454123, "learning_rate": 5.787453296011902e-06, "loss": 0.4472, "step": 9145 }, { "epoch": 2.559753708368318, "grad_norm": 0.2525542203161026, "learning_rate": 5.7802514243283925e-06, "loss": 0.4772, "step": 9146 }, { "epoch": 2.5600335852225022, "grad_norm": 0.24610967119593202, "learning_rate": 5.7730537615053095e-06, "loss": 0.4534, "step": 9147 }, { "epoch": 2.560313462076686, "grad_norm": 0.2508092311732347, "learning_rate": 5.765860308227722e-06, "loss": 0.4444, "step": 9148 }, { "epoch": 2.5605933389308704, "grad_norm": 0.25882638519393514, "learning_rate": 5.75867106518031e-06, "loss": 0.4638, "step": 9149 }, { "epoch": 2.5608732157850547, "grad_norm": 0.25450553546604476, "learning_rate": 5.751486033047349e-06, "loss": 0.4339, "step": 9150 }, { "epoch": 2.5611530926392385, "grad_norm": 0.2669342479300081, "learning_rate": 5.7443052125127125e-06, "loss": 0.4525, "step": 9151 }, { "epoch": 2.561432969493423, "grad_norm": 0.25777195645807727, "learning_rate": 5.737128604259878e-06, "loss": 0.4499, "step": 9152 }, { "epoch": 2.561712846347607, "grad_norm": 0.2597046531032639, "learning_rate": 5.729956208971915e-06, "loss": 0.4729, "step": 9153 }, { "epoch": 2.561992723201791, "grad_norm": 0.2614699484353166, "learning_rate": 5.7227880273315045e-06, "loss": 0.4684, "step": 9154 }, { "epoch": 2.5622726000559752, "grad_norm": 0.2501052573574934, "learning_rate": 5.715624060020908e-06, "loss": 0.4442, "step": 9155 }, { "epoch": 2.5625524769101595, "grad_norm": 0.2520769280854139, "learning_rate": 5.708464307722006e-06, "loss": 0.4622, "step": 9156 }, { "epoch": 2.562832353764344, "grad_norm": 0.2547266031794907, "learning_rate": 5.701308771116254e-06, "loss": 0.4521, "step": 9157 }, { "epoch": 2.563112230618528, "grad_norm": 0.2563651052950184, "learning_rate": 5.694157450884735e-06, "loss": 0.4519, "step": 9158 }, { "epoch": 2.563392107472712, "grad_norm": 0.25744604735413434, "learning_rate": 5.687010347708105e-06, "loss": 0.4623, "step": 9159 }, { "epoch": 2.5636719843268962, "grad_norm": 0.25466997886611975, "learning_rate": 5.679867462266636e-06, "loss": 0.4548, "step": 9160 }, { "epoch": 2.5639518611810805, "grad_norm": 0.23892686124911805, "learning_rate": 5.67272879524019e-06, "loss": 0.435, "step": 9161 }, { "epoch": 2.5642317380352644, "grad_norm": 0.25698434922165514, "learning_rate": 5.665594347308229e-06, "loss": 0.4459, "step": 9162 }, { "epoch": 2.5645116148894487, "grad_norm": 0.257195515088587, "learning_rate": 5.658464119149809e-06, "loss": 0.4641, "step": 9163 }, { "epoch": 2.564791491743633, "grad_norm": 0.25358151850238736, "learning_rate": 5.651338111443594e-06, "loss": 0.4464, "step": 9164 }, { "epoch": 2.565071368597817, "grad_norm": 0.2572662942917605, "learning_rate": 5.644216324867841e-06, "loss": 0.4566, "step": 9165 }, { "epoch": 2.565351245452001, "grad_norm": 0.2558001665540111, "learning_rate": 5.637098760100407e-06, "loss": 0.4704, "step": 9166 }, { "epoch": 2.5656311223061854, "grad_norm": 0.24883429377142569, "learning_rate": 5.62998541781874e-06, "loss": 0.446, "step": 9167 }, { "epoch": 2.5659109991603692, "grad_norm": 0.26564797320747585, "learning_rate": 5.622876298699898e-06, "loss": 0.4608, "step": 9168 }, { "epoch": 2.5661908760145535, "grad_norm": 0.2411278117744789, "learning_rate": 5.6157714034205334e-06, "loss": 0.4393, "step": 9169 }, { "epoch": 2.566470752868738, "grad_norm": 0.25815355494097664, "learning_rate": 5.6086707326568845e-06, "loss": 0.4269, "step": 9170 }, { "epoch": 2.5667506297229217, "grad_norm": 0.2616000562617375, "learning_rate": 5.6015742870847985e-06, "loss": 0.4481, "step": 9171 }, { "epoch": 2.567030506577106, "grad_norm": 0.255265647625433, "learning_rate": 5.594482067379731e-06, "loss": 0.4694, "step": 9172 }, { "epoch": 2.5673103834312903, "grad_norm": 0.25239103218717907, "learning_rate": 5.587394074216712e-06, "loss": 0.4394, "step": 9173 }, { "epoch": 2.5675902602854745, "grad_norm": 0.25854733542183617, "learning_rate": 5.580310308270381e-06, "loss": 0.4495, "step": 9174 }, { "epoch": 2.5678701371396584, "grad_norm": 0.24697676977817049, "learning_rate": 5.573230770214982e-06, "loss": 0.4315, "step": 9175 }, { "epoch": 2.5681500139938427, "grad_norm": 0.26384154704967105, "learning_rate": 5.566155460724343e-06, "loss": 0.4489, "step": 9176 }, { "epoch": 2.568429890848027, "grad_norm": 0.24507750117012964, "learning_rate": 5.559084380471896e-06, "loss": 0.4517, "step": 9177 }, { "epoch": 2.5687097677022113, "grad_norm": 0.27425588684557417, "learning_rate": 5.552017530130676e-06, "loss": 0.4512, "step": 9178 }, { "epoch": 2.568989644556395, "grad_norm": 0.2488587339406811, "learning_rate": 5.5449549103733065e-06, "loss": 0.4371, "step": 9179 }, { "epoch": 2.5692695214105794, "grad_norm": 0.25506453667985635, "learning_rate": 5.537896521872005e-06, "loss": 0.4365, "step": 9180 }, { "epoch": 2.5695493982647637, "grad_norm": 0.2656730131529827, "learning_rate": 5.530842365298605e-06, "loss": 0.468, "step": 9181 }, { "epoch": 2.5698292751189475, "grad_norm": 0.25973678949507073, "learning_rate": 5.523792441324516e-06, "loss": 0.457, "step": 9182 }, { "epoch": 2.570109151973132, "grad_norm": 0.24539048819265274, "learning_rate": 5.51674675062076e-06, "loss": 0.4769, "step": 9183 }, { "epoch": 2.570389028827316, "grad_norm": 0.2531422133017602, "learning_rate": 5.509705293857947e-06, "loss": 0.4574, "step": 9184 }, { "epoch": 2.5706689056815, "grad_norm": 0.2548109395272116, "learning_rate": 5.5026680717062855e-06, "loss": 0.4543, "step": 9185 }, { "epoch": 2.5709487825356843, "grad_norm": 0.25359074586043096, "learning_rate": 5.495635084835582e-06, "loss": 0.4467, "step": 9186 }, { "epoch": 2.5712286593898686, "grad_norm": 0.2617761868678252, "learning_rate": 5.488606333915236e-06, "loss": 0.4542, "step": 9187 }, { "epoch": 2.5715085362440524, "grad_norm": 0.25215950198858933, "learning_rate": 5.481581819614262e-06, "loss": 0.4571, "step": 9188 }, { "epoch": 2.5717884130982367, "grad_norm": 0.2538088206469343, "learning_rate": 5.474561542601242e-06, "loss": 0.449, "step": 9189 }, { "epoch": 2.572068289952421, "grad_norm": 0.2603239345951513, "learning_rate": 5.46754550354438e-06, "loss": 0.4461, "step": 9190 }, { "epoch": 2.572348166806605, "grad_norm": 0.24946940193842998, "learning_rate": 5.460533703111465e-06, "loss": 0.4677, "step": 9191 }, { "epoch": 2.572628043660789, "grad_norm": 0.2461630152636201, "learning_rate": 5.453526141969878e-06, "loss": 0.4672, "step": 9192 }, { "epoch": 2.5729079205149734, "grad_norm": 0.25407980345989045, "learning_rate": 5.4465228207866095e-06, "loss": 0.4551, "step": 9193 }, { "epoch": 2.5731877973691577, "grad_norm": 0.256288476843359, "learning_rate": 5.439523740228236e-06, "loss": 0.4406, "step": 9194 }, { "epoch": 2.573467674223342, "grad_norm": 0.25172429485079223, "learning_rate": 5.432528900960931e-06, "loss": 0.4613, "step": 9195 }, { "epoch": 2.573747551077526, "grad_norm": 0.2459655324435995, "learning_rate": 5.4255383036504736e-06, "loss": 0.4335, "step": 9196 }, { "epoch": 2.57402742793171, "grad_norm": 0.2463592720999968, "learning_rate": 5.41855194896223e-06, "loss": 0.455, "step": 9197 }, { "epoch": 2.5743073047858944, "grad_norm": 0.2599890636185018, "learning_rate": 5.411569837561164e-06, "loss": 0.4407, "step": 9198 }, { "epoch": 2.5745871816400783, "grad_norm": 0.2697605036552731, "learning_rate": 5.404591970111838e-06, "loss": 0.4578, "step": 9199 }, { "epoch": 2.5748670584942626, "grad_norm": 0.25532802799904475, "learning_rate": 5.39761834727841e-06, "loss": 0.4456, "step": 9200 }, { "epoch": 2.575146935348447, "grad_norm": 0.2604318981372324, "learning_rate": 5.390648969724632e-06, "loss": 0.4699, "step": 9201 }, { "epoch": 2.5754268122026307, "grad_norm": 0.24311926826298216, "learning_rate": 5.383683838113856e-06, "loss": 0.4582, "step": 9202 }, { "epoch": 2.575706689056815, "grad_norm": 0.25901859166320634, "learning_rate": 5.376722953109026e-06, "loss": 0.4402, "step": 9203 }, { "epoch": 2.5759865659109993, "grad_norm": 0.23953969446327295, "learning_rate": 5.369766315372676e-06, "loss": 0.4295, "step": 9204 }, { "epoch": 2.576266442765183, "grad_norm": 0.25651576602281145, "learning_rate": 5.3628139255669566e-06, "loss": 0.4586, "step": 9205 }, { "epoch": 2.5765463196193674, "grad_norm": 0.24919727706103317, "learning_rate": 5.3558657843535865e-06, "loss": 0.458, "step": 9206 }, { "epoch": 2.5768261964735517, "grad_norm": 0.25696382072887175, "learning_rate": 5.348921892393904e-06, "loss": 0.4213, "step": 9207 }, { "epoch": 2.5771060733277356, "grad_norm": 0.2553158735417908, "learning_rate": 5.3419822503488256e-06, "loss": 0.4495, "step": 9208 }, { "epoch": 2.57738595018192, "grad_norm": 0.25377523123978984, "learning_rate": 5.335046858878873e-06, "loss": 0.454, "step": 9209 }, { "epoch": 2.577665827036104, "grad_norm": 0.2457531256711823, "learning_rate": 5.328115718644161e-06, "loss": 0.4465, "step": 9210 }, { "epoch": 2.5779457038902884, "grad_norm": 0.2612425526475498, "learning_rate": 5.321188830304397e-06, "loss": 0.4695, "step": 9211 }, { "epoch": 2.5782255807444723, "grad_norm": 0.2553886435888375, "learning_rate": 5.314266194518886e-06, "loss": 0.4415, "step": 9212 }, { "epoch": 2.5785054575986566, "grad_norm": 0.247513891804963, "learning_rate": 5.307347811946534e-06, "loss": 0.4416, "step": 9213 }, { "epoch": 2.578785334452841, "grad_norm": 0.25266220607001677, "learning_rate": 5.300433683245831e-06, "loss": 0.4461, "step": 9214 }, { "epoch": 2.579065211307025, "grad_norm": 0.24858735261916182, "learning_rate": 5.293523809074874e-06, "loss": 0.4606, "step": 9215 }, { "epoch": 2.579345088161209, "grad_norm": 0.2558186712920539, "learning_rate": 5.286618190091341e-06, "loss": 0.464, "step": 9216 }, { "epoch": 2.5796249650153933, "grad_norm": 0.25630614765529586, "learning_rate": 5.279716826952513e-06, "loss": 0.4565, "step": 9217 }, { "epoch": 2.5799048418695776, "grad_norm": 0.2488157396102064, "learning_rate": 5.272819720315275e-06, "loss": 0.4445, "step": 9218 }, { "epoch": 2.5801847187237614, "grad_norm": 0.254845405717992, "learning_rate": 5.265926870836085e-06, "loss": 0.4567, "step": 9219 }, { "epoch": 2.5804645955779457, "grad_norm": 0.2442042932718986, "learning_rate": 5.259038279171014e-06, "loss": 0.4496, "step": 9220 }, { "epoch": 2.58074447243213, "grad_norm": 0.2583695399185578, "learning_rate": 5.252153945975724e-06, "loss": 0.4357, "step": 9221 }, { "epoch": 2.581024349286314, "grad_norm": 0.2599667365110196, "learning_rate": 5.245273871905471e-06, "loss": 0.4552, "step": 9222 }, { "epoch": 2.581304226140498, "grad_norm": 0.24578533495555563, "learning_rate": 5.2383980576150956e-06, "loss": 0.4342, "step": 9223 }, { "epoch": 2.5815841029946824, "grad_norm": 0.25983770635532155, "learning_rate": 5.231526503759054e-06, "loss": 0.4541, "step": 9224 }, { "epoch": 2.5818639798488663, "grad_norm": 0.25978791332708273, "learning_rate": 5.224659210991373e-06, "loss": 0.4513, "step": 9225 }, { "epoch": 2.5821438567030506, "grad_norm": 0.25163242302701827, "learning_rate": 5.21779617996569e-06, "loss": 0.4405, "step": 9226 }, { "epoch": 2.582423733557235, "grad_norm": 0.2543188791677801, "learning_rate": 5.210937411335237e-06, "loss": 0.4565, "step": 9227 }, { "epoch": 2.5827036104114187, "grad_norm": 0.25620720936735775, "learning_rate": 5.204082905752822e-06, "loss": 0.4559, "step": 9228 }, { "epoch": 2.582983487265603, "grad_norm": 0.2550947738601276, "learning_rate": 5.1972326638708765e-06, "loss": 0.4364, "step": 9229 }, { "epoch": 2.5832633641197873, "grad_norm": 0.24627397104248913, "learning_rate": 5.190386686341403e-06, "loss": 0.4428, "step": 9230 }, { "epoch": 2.5835432409739716, "grad_norm": 0.2590679580610083, "learning_rate": 5.183544973816001e-06, "loss": 0.4621, "step": 9231 }, { "epoch": 2.583823117828156, "grad_norm": 0.25292247781032845, "learning_rate": 5.176707526945879e-06, "loss": 0.4679, "step": 9232 }, { "epoch": 2.5841029946823397, "grad_norm": 0.27356158880275805, "learning_rate": 5.16987434638182e-06, "loss": 0.4841, "step": 9233 }, { "epoch": 2.584382871536524, "grad_norm": 0.2490183167988874, "learning_rate": 5.163045432774211e-06, "loss": 0.4343, "step": 9234 }, { "epoch": 2.5846627483907083, "grad_norm": 0.24613135330540556, "learning_rate": 5.156220786773042e-06, "loss": 0.4238, "step": 9235 }, { "epoch": 2.584942625244892, "grad_norm": 0.25588783561535067, "learning_rate": 5.149400409027871e-06, "loss": 0.4785, "step": 9236 }, { "epoch": 2.5852225020990764, "grad_norm": 0.24552521028453203, "learning_rate": 5.142584300187875e-06, "loss": 0.4395, "step": 9237 }, { "epoch": 2.5855023789532607, "grad_norm": 0.2543110945501354, "learning_rate": 5.135772460901816e-06, "loss": 0.4766, "step": 9238 }, { "epoch": 2.5857822558074446, "grad_norm": 0.2527108255469426, "learning_rate": 5.128964891818039e-06, "loss": 0.4556, "step": 9239 }, { "epoch": 2.586062132661629, "grad_norm": 0.24839543549521476, "learning_rate": 5.122161593584507e-06, "loss": 0.4545, "step": 9240 }, { "epoch": 2.586342009515813, "grad_norm": 0.2593936213171141, "learning_rate": 5.115362566848747e-06, "loss": 0.4625, "step": 9241 }, { "epoch": 2.586621886369997, "grad_norm": 0.2517068631168117, "learning_rate": 5.108567812257908e-06, "loss": 0.4543, "step": 9242 }, { "epoch": 2.5869017632241813, "grad_norm": 0.2539529424820145, "learning_rate": 5.101777330458707e-06, "loss": 0.4539, "step": 9243 }, { "epoch": 2.5871816400783656, "grad_norm": 0.28103238452968893, "learning_rate": 5.09499112209747e-06, "loss": 0.4523, "step": 9244 }, { "epoch": 2.5874615169325494, "grad_norm": 0.25606370897440894, "learning_rate": 5.0882091878201145e-06, "loss": 0.4526, "step": 9245 }, { "epoch": 2.5877413937867337, "grad_norm": 0.24275143396759824, "learning_rate": 5.081431528272146e-06, "loss": 0.4337, "step": 9246 }, { "epoch": 2.588021270640918, "grad_norm": 0.24786400825264993, "learning_rate": 5.0746581440986655e-06, "loss": 0.4371, "step": 9247 }, { "epoch": 2.5883011474951023, "grad_norm": 0.2569107452959841, "learning_rate": 5.0678890359443676e-06, "loss": 0.4462, "step": 9248 }, { "epoch": 2.588581024349286, "grad_norm": 0.24840227265534442, "learning_rate": 5.061124204453544e-06, "loss": 0.4416, "step": 9249 }, { "epoch": 2.5888609012034705, "grad_norm": 0.2475647497707761, "learning_rate": 5.054363650270072e-06, "loss": 0.4493, "step": 9250 }, { "epoch": 2.5891407780576547, "grad_norm": 0.2611274030792405, "learning_rate": 5.0476073740374215e-06, "loss": 0.452, "step": 9251 }, { "epoch": 2.589420654911839, "grad_norm": 0.25050214858935405, "learning_rate": 5.040855376398662e-06, "loss": 0.4479, "step": 9252 }, { "epoch": 2.589700531766023, "grad_norm": 0.2508115086874871, "learning_rate": 5.034107657996456e-06, "loss": 0.4429, "step": 9253 }, { "epoch": 2.589980408620207, "grad_norm": 0.25708179010064847, "learning_rate": 5.027364219473052e-06, "loss": 0.4506, "step": 9254 }, { "epoch": 2.5902602854743915, "grad_norm": 0.25173750004236733, "learning_rate": 5.020625061470291e-06, "loss": 0.4486, "step": 9255 }, { "epoch": 2.5905401623285753, "grad_norm": 0.2512137293591381, "learning_rate": 5.013890184629616e-06, "loss": 0.4252, "step": 9256 }, { "epoch": 2.5908200391827596, "grad_norm": 0.2543750373430273, "learning_rate": 5.007159589592047e-06, "loss": 0.4574, "step": 9257 }, { "epoch": 2.591099916036944, "grad_norm": 0.2472024703921494, "learning_rate": 5.000433276998218e-06, "loss": 0.4349, "step": 9258 }, { "epoch": 2.5913797928911277, "grad_norm": 0.2409312346089113, "learning_rate": 4.9937112474883365e-06, "loss": 0.4545, "step": 9259 }, { "epoch": 2.591659669745312, "grad_norm": 0.26704917029047187, "learning_rate": 4.986993501702209e-06, "loss": 0.4465, "step": 9260 }, { "epoch": 2.5919395465994963, "grad_norm": 0.2525748147837304, "learning_rate": 4.980280040279229e-06, "loss": 0.4486, "step": 9261 }, { "epoch": 2.59221942345368, "grad_norm": 0.23942735513286054, "learning_rate": 4.973570863858401e-06, "loss": 0.4473, "step": 9262 }, { "epoch": 2.5924993003078645, "grad_norm": 0.27030908353186944, "learning_rate": 4.9668659730783e-06, "loss": 0.4768, "step": 9263 }, { "epoch": 2.5927791771620488, "grad_norm": 0.2512822128836338, "learning_rate": 4.960165368577096e-06, "loss": 0.426, "step": 9264 }, { "epoch": 2.5930590540162326, "grad_norm": 0.2603896442018894, "learning_rate": 4.953469050992565e-06, "loss": 0.4469, "step": 9265 }, { "epoch": 2.593338930870417, "grad_norm": 0.25378765872640163, "learning_rate": 4.9467770209620675e-06, "loss": 0.4566, "step": 9266 }, { "epoch": 2.593618807724601, "grad_norm": 0.25966225841536006, "learning_rate": 4.9400892791225454e-06, "loss": 0.476, "step": 9267 }, { "epoch": 2.5938986845787855, "grad_norm": 0.25115292088678226, "learning_rate": 4.933405826110549e-06, "loss": 0.4391, "step": 9268 }, { "epoch": 2.5941785614329698, "grad_norm": 0.25405678616581745, "learning_rate": 4.926726662562209e-06, "loss": 0.47, "step": 9269 }, { "epoch": 2.5944584382871536, "grad_norm": 0.2480627202841323, "learning_rate": 4.920051789113256e-06, "loss": 0.4476, "step": 9270 }, { "epoch": 2.594738315141338, "grad_norm": 0.24902788193748368, "learning_rate": 4.913381206399003e-06, "loss": 0.459, "step": 9271 }, { "epoch": 2.595018191995522, "grad_norm": 0.2526288474786995, "learning_rate": 4.906714915054367e-06, "loss": 0.4462, "step": 9272 }, { "epoch": 2.595298068849706, "grad_norm": 0.24683415975415668, "learning_rate": 4.900052915713843e-06, "loss": 0.4354, "step": 9273 }, { "epoch": 2.5955779457038903, "grad_norm": 0.26488285631448083, "learning_rate": 4.893395209011531e-06, "loss": 0.4635, "step": 9274 }, { "epoch": 2.5958578225580746, "grad_norm": 0.25325409943157845, "learning_rate": 4.886741795581101e-06, "loss": 0.4658, "step": 9275 }, { "epoch": 2.5961376994122585, "grad_norm": 0.26043278499621375, "learning_rate": 4.880092676055848e-06, "loss": 0.4406, "step": 9276 }, { "epoch": 2.5964175762664428, "grad_norm": 0.269277412353307, "learning_rate": 4.873447851068619e-06, "loss": 0.5047, "step": 9277 }, { "epoch": 2.596697453120627, "grad_norm": 0.2620382040003155, "learning_rate": 4.866807321251888e-06, "loss": 0.466, "step": 9278 }, { "epoch": 2.596977329974811, "grad_norm": 0.25525522928605204, "learning_rate": 4.860171087237697e-06, "loss": 0.4333, "step": 9279 }, { "epoch": 2.597257206828995, "grad_norm": 0.24995066162807658, "learning_rate": 4.853539149657688e-06, "loss": 0.432, "step": 9280 }, { "epoch": 2.5975370836831795, "grad_norm": 0.2574171365789426, "learning_rate": 4.846911509143082e-06, "loss": 0.4573, "step": 9281 }, { "epoch": 2.5978169605373633, "grad_norm": 0.26075742452234985, "learning_rate": 4.840288166324724e-06, "loss": 0.4543, "step": 9282 }, { "epoch": 2.5980968373915476, "grad_norm": 0.2569715760319337, "learning_rate": 4.833669121833023e-06, "loss": 0.4408, "step": 9283 }, { "epoch": 2.598376714245732, "grad_norm": 0.2484605217426453, "learning_rate": 4.827054376297963e-06, "loss": 0.4331, "step": 9284 }, { "epoch": 2.598656591099916, "grad_norm": 0.24905568287189572, "learning_rate": 4.820443930349156e-06, "loss": 0.4476, "step": 9285 }, { "epoch": 2.5989364679541, "grad_norm": 0.2590631511587284, "learning_rate": 4.813837784615782e-06, "loss": 0.4304, "step": 9286 }, { "epoch": 2.5992163448082843, "grad_norm": 0.25008885615691817, "learning_rate": 4.807235939726617e-06, "loss": 0.4406, "step": 9287 }, { "epoch": 2.5994962216624686, "grad_norm": 0.267542945204892, "learning_rate": 4.800638396310036e-06, "loss": 0.4563, "step": 9288 }, { "epoch": 2.599776098516653, "grad_norm": 0.25736597982979836, "learning_rate": 4.794045154993993e-06, "loss": 0.4474, "step": 9289 }, { "epoch": 2.6000559753708368, "grad_norm": 0.24746449651076893, "learning_rate": 4.787456216406028e-06, "loss": 0.4475, "step": 9290 }, { "epoch": 2.600335852225021, "grad_norm": 0.2453563596198923, "learning_rate": 4.780871581173291e-06, "loss": 0.4207, "step": 9291 }, { "epoch": 2.6006157290792054, "grad_norm": 0.26032544087459286, "learning_rate": 4.774291249922508e-06, "loss": 0.4709, "step": 9292 }, { "epoch": 2.600895605933389, "grad_norm": 0.2580696455857522, "learning_rate": 4.767715223279995e-06, "loss": 0.453, "step": 9293 }, { "epoch": 2.6011754827875735, "grad_norm": 0.25871962509925484, "learning_rate": 4.761143501871667e-06, "loss": 0.4555, "step": 9294 }, { "epoch": 2.601455359641758, "grad_norm": 0.2749257480568536, "learning_rate": 4.75457608632302e-06, "loss": 0.4566, "step": 9295 }, { "epoch": 2.6017352364959416, "grad_norm": 0.2532048996850684, "learning_rate": 4.748012977259147e-06, "loss": 0.4384, "step": 9296 }, { "epoch": 2.602015113350126, "grad_norm": 0.26328460933205455, "learning_rate": 4.741454175304727e-06, "loss": 0.4614, "step": 9297 }, { "epoch": 2.60229499020431, "grad_norm": 0.2664634685783237, "learning_rate": 4.734899681084021e-06, "loss": 0.4457, "step": 9298 }, { "epoch": 2.602574867058494, "grad_norm": 0.25823670038817764, "learning_rate": 4.728349495220908e-06, "loss": 0.4432, "step": 9299 }, { "epoch": 2.6028547439126783, "grad_norm": 0.2440733968119489, "learning_rate": 4.721803618338832e-06, "loss": 0.4337, "step": 9300 }, { "epoch": 2.6031346207668626, "grad_norm": 0.26225762731850727, "learning_rate": 4.7152620510608424e-06, "loss": 0.4775, "step": 9301 }, { "epoch": 2.6034144976210465, "grad_norm": 0.24616384891613666, "learning_rate": 4.708724794009545e-06, "loss": 0.4524, "step": 9302 }, { "epoch": 2.6036943744752308, "grad_norm": 0.24668665097430656, "learning_rate": 4.702191847807169e-06, "loss": 0.445, "step": 9303 }, { "epoch": 2.603974251329415, "grad_norm": 0.2597920558729576, "learning_rate": 4.695663213075535e-06, "loss": 0.4601, "step": 9304 }, { "epoch": 2.6042541281835994, "grad_norm": 0.24607487003505218, "learning_rate": 4.689138890436029e-06, "loss": 0.4604, "step": 9305 }, { "epoch": 2.6045340050377837, "grad_norm": 0.2600636397934303, "learning_rate": 4.6826188805096484e-06, "loss": 0.4569, "step": 9306 }, { "epoch": 2.6048138818919675, "grad_norm": 0.2459774794523608, "learning_rate": 4.676103183916963e-06, "loss": 0.4312, "step": 9307 }, { "epoch": 2.605093758746152, "grad_norm": 0.2469395153022534, "learning_rate": 4.669591801278151e-06, "loss": 0.4445, "step": 9308 }, { "epoch": 2.605373635600336, "grad_norm": 0.2523058020611295, "learning_rate": 4.6630847332129575e-06, "loss": 0.4723, "step": 9309 }, { "epoch": 2.60565351245452, "grad_norm": 0.24619581804301602, "learning_rate": 4.656581980340741e-06, "loss": 0.4586, "step": 9310 }, { "epoch": 2.605933389308704, "grad_norm": 0.25924161343200974, "learning_rate": 4.650083543280431e-06, "loss": 0.4717, "step": 9311 }, { "epoch": 2.6062132661628885, "grad_norm": 0.2531297057253927, "learning_rate": 4.643589422650552e-06, "loss": 0.4374, "step": 9312 }, { "epoch": 2.6064931430170724, "grad_norm": 0.24542663509039195, "learning_rate": 4.637099619069213e-06, "loss": 0.4564, "step": 9313 }, { "epoch": 2.6067730198712566, "grad_norm": 0.2557134244304275, "learning_rate": 4.630614133154132e-06, "loss": 0.4558, "step": 9314 }, { "epoch": 2.607052896725441, "grad_norm": 0.2658720097253333, "learning_rate": 4.6241329655225875e-06, "loss": 0.4734, "step": 9315 }, { "epoch": 2.607332773579625, "grad_norm": 0.2607839304856215, "learning_rate": 4.617656116791458e-06, "loss": 0.4418, "step": 9316 }, { "epoch": 2.607612650433809, "grad_norm": 0.2542006757984473, "learning_rate": 4.611183587577228e-06, "loss": 0.4478, "step": 9317 }, { "epoch": 2.6078925272879934, "grad_norm": 0.26790537167162126, "learning_rate": 4.6047153784959495e-06, "loss": 0.4781, "step": 9318 }, { "epoch": 2.608172404142177, "grad_norm": 0.25588698574798024, "learning_rate": 4.59825149016328e-06, "loss": 0.4895, "step": 9319 }, { "epoch": 2.6084522809963615, "grad_norm": 0.2552989315535823, "learning_rate": 4.591791923194438e-06, "loss": 0.4451, "step": 9320 }, { "epoch": 2.608732157850546, "grad_norm": 0.2441500900857033, "learning_rate": 4.5853366782042555e-06, "loss": 0.4334, "step": 9321 }, { "epoch": 2.6090120347047296, "grad_norm": 0.26821549973815967, "learning_rate": 4.57888575580715e-06, "loss": 0.4662, "step": 9322 }, { "epoch": 2.609291911558914, "grad_norm": 0.26360346162674064, "learning_rate": 4.57243915661712e-06, "loss": 0.4493, "step": 9323 }, { "epoch": 2.6095717884130982, "grad_norm": 0.25924151998896916, "learning_rate": 4.565996881247758e-06, "loss": 0.4556, "step": 9324 }, { "epoch": 2.6098516652672825, "grad_norm": 0.2604518073930235, "learning_rate": 4.559558930312241e-06, "loss": 0.4478, "step": 9325 }, { "epoch": 2.610131542121467, "grad_norm": 0.2562775273881299, "learning_rate": 4.553125304423339e-06, "loss": 0.449, "step": 9326 }, { "epoch": 2.6104114189756507, "grad_norm": 0.26455514338810987, "learning_rate": 4.546696004193413e-06, "loss": 0.4771, "step": 9327 }, { "epoch": 2.610691295829835, "grad_norm": 0.24071992091921762, "learning_rate": 4.5402710302344e-06, "loss": 0.4382, "step": 9328 }, { "epoch": 2.6109711726840192, "grad_norm": 0.2560770546418357, "learning_rate": 4.533850383157834e-06, "loss": 0.4555, "step": 9329 }, { "epoch": 2.611251049538203, "grad_norm": 0.2367427791260587, "learning_rate": 4.527434063574843e-06, "loss": 0.4269, "step": 9330 }, { "epoch": 2.6115309263923874, "grad_norm": 0.25413566039375374, "learning_rate": 4.5210220720961205e-06, "loss": 0.4639, "step": 9331 }, { "epoch": 2.6118108032465717, "grad_norm": 0.25336488873316837, "learning_rate": 4.51461440933198e-06, "loss": 0.454, "step": 9332 }, { "epoch": 2.6120906801007555, "grad_norm": 0.2655120275885839, "learning_rate": 4.508211075892288e-06, "loss": 0.457, "step": 9333 }, { "epoch": 2.61237055695494, "grad_norm": 0.24853946314736047, "learning_rate": 4.5018120723865354e-06, "loss": 0.4475, "step": 9334 }, { "epoch": 2.612650433809124, "grad_norm": 0.26015719641746526, "learning_rate": 4.495417399423779e-06, "loss": 0.4608, "step": 9335 }, { "epoch": 2.612930310663308, "grad_norm": 0.2481190133426938, "learning_rate": 4.489027057612666e-06, "loss": 0.4272, "step": 9336 }, { "epoch": 2.6132101875174922, "grad_norm": 0.2599020825492872, "learning_rate": 4.482641047561437e-06, "loss": 0.4533, "step": 9337 }, { "epoch": 2.6134900643716765, "grad_norm": 0.26310922664294667, "learning_rate": 4.476259369877906e-06, "loss": 0.4599, "step": 9338 }, { "epoch": 2.6137699412258604, "grad_norm": 0.28213603933508197, "learning_rate": 4.469882025169481e-06, "loss": 0.4593, "step": 9339 }, { "epoch": 2.6140498180800447, "grad_norm": 0.26416135955336856, "learning_rate": 4.463509014043177e-06, "loss": 0.4543, "step": 9340 }, { "epoch": 2.614329694934229, "grad_norm": 0.25295242555747477, "learning_rate": 4.45714033710557e-06, "loss": 0.4582, "step": 9341 }, { "epoch": 2.6146095717884132, "grad_norm": 0.25592679760765735, "learning_rate": 4.450775994962836e-06, "loss": 0.4866, "step": 9342 }, { "epoch": 2.614889448642597, "grad_norm": 0.25470082594219184, "learning_rate": 4.44441598822074e-06, "loss": 0.4537, "step": 9343 }, { "epoch": 2.6151693254967814, "grad_norm": 0.29443900741901563, "learning_rate": 4.438060317484627e-06, "loss": 0.4697, "step": 9344 }, { "epoch": 2.6154492023509657, "grad_norm": 0.2631036528520912, "learning_rate": 4.431708983359434e-06, "loss": 0.4307, "step": 9345 }, { "epoch": 2.61572907920515, "grad_norm": 0.2598629462089242, "learning_rate": 4.425361986449689e-06, "loss": 0.4627, "step": 9346 }, { "epoch": 2.616008956059334, "grad_norm": 0.25912034443154314, "learning_rate": 4.419019327359497e-06, "loss": 0.4711, "step": 9347 }, { "epoch": 2.616288832913518, "grad_norm": 0.24671391772179715, "learning_rate": 4.41268100669256e-06, "loss": 0.4573, "step": 9348 }, { "epoch": 2.6165687097677024, "grad_norm": 0.24826633076239904, "learning_rate": 4.40634702505216e-06, "loss": 0.4457, "step": 9349 }, { "epoch": 2.6168485866218862, "grad_norm": 0.2684082894019948, "learning_rate": 4.400017383041161e-06, "loss": 0.4365, "step": 9350 }, { "epoch": 2.6171284634760705, "grad_norm": 0.2622832928493746, "learning_rate": 4.393692081262035e-06, "loss": 0.4539, "step": 9351 }, { "epoch": 2.617408340330255, "grad_norm": 0.2531070945819376, "learning_rate": 4.38737112031683e-06, "loss": 0.4257, "step": 9352 }, { "epoch": 2.6176882171844387, "grad_norm": 0.2721155574974308, "learning_rate": 4.381054500807175e-06, "loss": 0.4669, "step": 9353 }, { "epoch": 2.617968094038623, "grad_norm": 0.24878218136538488, "learning_rate": 4.3747422233342775e-06, "loss": 0.4577, "step": 9354 }, { "epoch": 2.6182479708928073, "grad_norm": 0.2548314632370883, "learning_rate": 4.368434288498968e-06, "loss": 0.4639, "step": 9355 }, { "epoch": 2.618527847746991, "grad_norm": 0.2529519792388573, "learning_rate": 4.362130696901617e-06, "loss": 0.4363, "step": 9356 }, { "epoch": 2.6188077246011754, "grad_norm": 0.2534106158408726, "learning_rate": 4.355831449142206e-06, "loss": 0.4521, "step": 9357 }, { "epoch": 2.6190876014553597, "grad_norm": 0.2536426424989659, "learning_rate": 4.349536545820309e-06, "loss": 0.432, "step": 9358 }, { "epoch": 2.6193674783095435, "grad_norm": 0.2535331169579017, "learning_rate": 4.343245987535072e-06, "loss": 0.441, "step": 9359 }, { "epoch": 2.619647355163728, "grad_norm": 0.2500811714147653, "learning_rate": 4.336959774885241e-06, "loss": 0.4656, "step": 9360 }, { "epoch": 2.619927232017912, "grad_norm": 0.25858977177423315, "learning_rate": 4.330677908469133e-06, "loss": 0.4546, "step": 9361 }, { "epoch": 2.6202071088720964, "grad_norm": 0.2518512386049054, "learning_rate": 4.324400388884664e-06, "loss": 0.4336, "step": 9362 }, { "epoch": 2.6204869857262807, "grad_norm": 0.2699716072155538, "learning_rate": 4.318127216729334e-06, "loss": 0.4531, "step": 9363 }, { "epoch": 2.6207668625804645, "grad_norm": 0.2541266763600309, "learning_rate": 4.311858392600226e-06, "loss": 0.4339, "step": 9364 }, { "epoch": 2.621046739434649, "grad_norm": 0.2634890024901075, "learning_rate": 4.3055939170940086e-06, "loss": 0.4505, "step": 9365 }, { "epoch": 2.621326616288833, "grad_norm": 0.24990794728809038, "learning_rate": 4.2993337908069366e-06, "loss": 0.4287, "step": 9366 }, { "epoch": 2.621606493143017, "grad_norm": 0.2480944203989238, "learning_rate": 4.2930780143348555e-06, "loss": 0.4563, "step": 9367 }, { "epoch": 2.6218863699972013, "grad_norm": 0.24803952381587008, "learning_rate": 4.286826588273185e-06, "loss": 0.4277, "step": 9368 }, { "epoch": 2.6221662468513856, "grad_norm": 0.24203231261637667, "learning_rate": 4.280579513216954e-06, "loss": 0.4485, "step": 9369 }, { "epoch": 2.6224461237055694, "grad_norm": 0.2479442825992165, "learning_rate": 4.274336789760752e-06, "loss": 0.4365, "step": 9370 }, { "epoch": 2.6227260005597537, "grad_norm": 0.24976414773135683, "learning_rate": 4.268098418498773e-06, "loss": 0.4684, "step": 9371 }, { "epoch": 2.623005877413938, "grad_norm": 0.25003686869830055, "learning_rate": 4.2618644000247785e-06, "loss": 0.4199, "step": 9372 }, { "epoch": 2.623285754268122, "grad_norm": 0.26451613692079345, "learning_rate": 4.255634734932146e-06, "loss": 0.4522, "step": 9373 }, { "epoch": 2.623565631122306, "grad_norm": 0.2509294379357824, "learning_rate": 4.249409423813788e-06, "loss": 0.4445, "step": 9374 }, { "epoch": 2.6238455079764904, "grad_norm": 0.2892647018192773, "learning_rate": 4.243188467262255e-06, "loss": 0.468, "step": 9375 }, { "epoch": 2.6241253848306743, "grad_norm": 0.24355127012649377, "learning_rate": 4.236971865869655e-06, "loss": 0.4221, "step": 9376 }, { "epoch": 2.6244052616848585, "grad_norm": 0.24109406623426521, "learning_rate": 4.2307596202276815e-06, "loss": 0.4566, "step": 9377 }, { "epoch": 2.624685138539043, "grad_norm": 0.2470566193498977, "learning_rate": 4.224551730927628e-06, "loss": 0.4417, "step": 9378 }, { "epoch": 2.624965015393227, "grad_norm": 0.256879649992719, "learning_rate": 4.218348198560368e-06, "loss": 0.4509, "step": 9379 }, { "epoch": 2.625244892247411, "grad_norm": 0.254632909125894, "learning_rate": 4.212149023716344e-06, "loss": 0.4429, "step": 9380 }, { "epoch": 2.6255247691015953, "grad_norm": 0.24720789018540543, "learning_rate": 4.205954206985607e-06, "loss": 0.4557, "step": 9381 }, { "epoch": 2.6258046459557796, "grad_norm": 0.25333054501266145, "learning_rate": 4.19976374895778e-06, "loss": 0.4447, "step": 9382 }, { "epoch": 2.626084522809964, "grad_norm": 0.2588455562400075, "learning_rate": 4.19357765022208e-06, "loss": 0.4652, "step": 9383 }, { "epoch": 2.6263643996641477, "grad_norm": 0.2607027020838425, "learning_rate": 4.187395911367292e-06, "loss": 0.4599, "step": 9384 }, { "epoch": 2.626644276518332, "grad_norm": 0.24573620452859748, "learning_rate": 4.181218532981796e-06, "loss": 0.4457, "step": 9385 }, { "epoch": 2.6269241533725163, "grad_norm": 0.24968986893656317, "learning_rate": 4.175045515653575e-06, "loss": 0.4487, "step": 9386 }, { "epoch": 2.6272040302267, "grad_norm": 0.2563148180069053, "learning_rate": 4.168876859970172e-06, "loss": 0.4607, "step": 9387 }, { "epoch": 2.6274839070808844, "grad_norm": 0.25600605587513764, "learning_rate": 4.162712566518723e-06, "loss": 0.4439, "step": 9388 }, { "epoch": 2.6277637839350687, "grad_norm": 0.25005785222597743, "learning_rate": 4.156552635885946e-06, "loss": 0.4615, "step": 9389 }, { "epoch": 2.6280436607892526, "grad_norm": 0.24510843450387793, "learning_rate": 4.1503970686581514e-06, "loss": 0.4413, "step": 9390 }, { "epoch": 2.628323537643437, "grad_norm": 0.24194767758289132, "learning_rate": 4.144245865421237e-06, "loss": 0.4362, "step": 9391 }, { "epoch": 2.628603414497621, "grad_norm": 0.2490759801194332, "learning_rate": 4.138099026760656e-06, "loss": 0.4495, "step": 9392 }, { "epoch": 2.628883291351805, "grad_norm": 0.253358245566839, "learning_rate": 4.131956553261479e-06, "loss": 0.4523, "step": 9393 }, { "epoch": 2.6291631682059893, "grad_norm": 0.2630190315744542, "learning_rate": 4.1258184455083505e-06, "loss": 0.4435, "step": 9394 }, { "epoch": 2.6294430450601736, "grad_norm": 0.25470117843539053, "learning_rate": 4.119684704085502e-06, "loss": 0.4464, "step": 9395 }, { "epoch": 2.6297229219143574, "grad_norm": 0.25859111668674567, "learning_rate": 4.1135553295767455e-06, "loss": 0.4557, "step": 9396 }, { "epoch": 2.6300027987685417, "grad_norm": 0.25524386836612356, "learning_rate": 4.107430322565469e-06, "loss": 0.4599, "step": 9397 }, { "epoch": 2.630282675622726, "grad_norm": 0.24574519551467336, "learning_rate": 4.101309683634669e-06, "loss": 0.4564, "step": 9398 }, { "epoch": 2.6305625524769103, "grad_norm": 0.2574978646230604, "learning_rate": 4.095193413366899e-06, "loss": 0.4527, "step": 9399 }, { "epoch": 2.6308424293310946, "grad_norm": 0.25000210745410584, "learning_rate": 4.089081512344317e-06, "loss": 0.4376, "step": 9400 }, { "epoch": 2.6311223061852784, "grad_norm": 0.2605679393117334, "learning_rate": 4.082973981148653e-06, "loss": 0.4421, "step": 9401 }, { "epoch": 2.6314021830394627, "grad_norm": 0.26409217588005957, "learning_rate": 4.0768708203612175e-06, "loss": 0.4371, "step": 9402 }, { "epoch": 2.631682059893647, "grad_norm": 0.25377680711251804, "learning_rate": 4.07077203056293e-06, "loss": 0.4514, "step": 9403 }, { "epoch": 2.631961936747831, "grad_norm": 0.25210873312266174, "learning_rate": 4.064677612334267e-06, "loss": 0.4387, "step": 9404 }, { "epoch": 2.632241813602015, "grad_norm": 0.24494119559621805, "learning_rate": 4.058587566255306e-06, "loss": 0.4297, "step": 9405 }, { "epoch": 2.6325216904561994, "grad_norm": 0.2596515985559775, "learning_rate": 4.052501892905691e-06, "loss": 0.4564, "step": 9406 }, { "epoch": 2.6328015673103833, "grad_norm": 0.2518905835826524, "learning_rate": 4.046420592864664e-06, "loss": 0.4349, "step": 9407 }, { "epoch": 2.6330814441645676, "grad_norm": 0.2467819090355505, "learning_rate": 4.040343666711044e-06, "loss": 0.4525, "step": 9408 }, { "epoch": 2.633361321018752, "grad_norm": 0.24948075480582393, "learning_rate": 4.034271115023247e-06, "loss": 0.4267, "step": 9409 }, { "epoch": 2.6336411978729357, "grad_norm": 0.2489089465903646, "learning_rate": 4.02820293837925e-06, "loss": 0.4311, "step": 9410 }, { "epoch": 2.63392107472712, "grad_norm": 0.256823975308967, "learning_rate": 4.022139137356623e-06, "loss": 0.4477, "step": 9411 }, { "epoch": 2.6342009515813043, "grad_norm": 0.24609712398915673, "learning_rate": 4.0160797125325335e-06, "loss": 0.4619, "step": 9412 }, { "epoch": 2.634480828435488, "grad_norm": 0.2593112314554741, "learning_rate": 4.010024664483708e-06, "loss": 0.4427, "step": 9413 }, { "epoch": 2.6347607052896724, "grad_norm": 0.2650976512280279, "learning_rate": 4.00397399378648e-06, "loss": 0.458, "step": 9414 }, { "epoch": 2.6350405821438567, "grad_norm": 0.2606263463407475, "learning_rate": 3.997927701016757e-06, "loss": 0.4542, "step": 9415 }, { "epoch": 2.635320458998041, "grad_norm": 0.26155105720954686, "learning_rate": 3.991885786750021e-06, "loss": 0.4437, "step": 9416 }, { "epoch": 2.635600335852225, "grad_norm": 0.27000033674043966, "learning_rate": 3.985848251561347e-06, "loss": 0.4659, "step": 9417 }, { "epoch": 2.635880212706409, "grad_norm": 0.27621755838983997, "learning_rate": 3.979815096025391e-06, "loss": 0.4488, "step": 9418 }, { "epoch": 2.6361600895605934, "grad_norm": 0.26000645509963716, "learning_rate": 3.973786320716394e-06, "loss": 0.4533, "step": 9419 }, { "epoch": 2.6364399664147777, "grad_norm": 0.25805612731916294, "learning_rate": 3.967761926208163e-06, "loss": 0.4493, "step": 9420 }, { "epoch": 2.6367198432689616, "grad_norm": 0.26245896850743844, "learning_rate": 3.961741913074135e-06, "loss": 0.4411, "step": 9421 }, { "epoch": 2.636999720123146, "grad_norm": 0.25100152576965584, "learning_rate": 3.955726281887273e-06, "loss": 0.4511, "step": 9422 }, { "epoch": 2.63727959697733, "grad_norm": 0.2565874408931755, "learning_rate": 3.9497150332201585e-06, "loss": 0.4672, "step": 9423 }, { "epoch": 2.637559473831514, "grad_norm": 0.2656034989017934, "learning_rate": 3.943708167644944e-06, "loss": 0.4567, "step": 9424 }, { "epoch": 2.6378393506856983, "grad_norm": 0.2550238839321128, "learning_rate": 3.937705685733362e-06, "loss": 0.4569, "step": 9425 }, { "epoch": 2.6381192275398826, "grad_norm": 0.2489611089489336, "learning_rate": 3.931707588056738e-06, "loss": 0.4626, "step": 9426 }, { "epoch": 2.6383991043940664, "grad_norm": 0.24179225105831964, "learning_rate": 3.925713875185977e-06, "loss": 0.4363, "step": 9427 }, { "epoch": 2.6386789812482507, "grad_norm": 0.25738028516432154, "learning_rate": 3.919724547691556e-06, "loss": 0.455, "step": 9428 }, { "epoch": 2.638958858102435, "grad_norm": 0.3421980163502586, "learning_rate": 3.91373960614354e-06, "loss": 0.4578, "step": 9429 }, { "epoch": 2.639238734956619, "grad_norm": 0.25132835113339924, "learning_rate": 3.907759051111581e-06, "loss": 0.4484, "step": 9430 }, { "epoch": 2.639518611810803, "grad_norm": 0.2622300126748502, "learning_rate": 3.901782883164918e-06, "loss": 0.4753, "step": 9431 }, { "epoch": 2.6397984886649875, "grad_norm": 0.25253521288343556, "learning_rate": 3.89581110287236e-06, "loss": 0.4446, "step": 9432 }, { "epoch": 2.6400783655191713, "grad_norm": 0.2578958518456159, "learning_rate": 3.88984371080231e-06, "loss": 0.4706, "step": 9433 }, { "epoch": 2.6403582423733556, "grad_norm": 0.25334138612729395, "learning_rate": 3.883880707522747e-06, "loss": 0.4506, "step": 9434 }, { "epoch": 2.64063811922754, "grad_norm": 0.24506347974857887, "learning_rate": 3.8779220936012275e-06, "loss": 0.4417, "step": 9435 }, { "epoch": 2.640917996081724, "grad_norm": 0.25878707958534775, "learning_rate": 3.871967869604898e-06, "loss": 0.4465, "step": 9436 }, { "epoch": 2.6411978729359085, "grad_norm": 0.25511636798625403, "learning_rate": 3.866018036100477e-06, "loss": 0.4254, "step": 9437 }, { "epoch": 2.6414777497900923, "grad_norm": 0.27087832571071385, "learning_rate": 3.86007259365429e-06, "loss": 0.4726, "step": 9438 }, { "epoch": 2.6417576266442766, "grad_norm": 0.2639956803014047, "learning_rate": 3.854131542832218e-06, "loss": 0.4569, "step": 9439 }, { "epoch": 2.642037503498461, "grad_norm": 0.2608292468835911, "learning_rate": 3.848194884199735e-06, "loss": 0.4278, "step": 9440 }, { "epoch": 2.6423173803526447, "grad_norm": 0.257109106994948, "learning_rate": 3.8422626183218956e-06, "loss": 0.461, "step": 9441 }, { "epoch": 2.642597257206829, "grad_norm": 0.25760729945150973, "learning_rate": 3.836334745763337e-06, "loss": 0.4414, "step": 9442 }, { "epoch": 2.6428771340610133, "grad_norm": 0.25075381348627446, "learning_rate": 3.8304112670882796e-06, "loss": 0.4348, "step": 9443 }, { "epoch": 2.643157010915197, "grad_norm": 0.26297703591974236, "learning_rate": 3.824492182860517e-06, "loss": 0.4559, "step": 9444 }, { "epoch": 2.6434368877693815, "grad_norm": 0.24954680414939545, "learning_rate": 3.818577493643444e-06, "loss": 0.4449, "step": 9445 }, { "epoch": 2.6437167646235658, "grad_norm": 0.2530958409819108, "learning_rate": 3.812667200000003e-06, "loss": 0.4522, "step": 9446 }, { "epoch": 2.6439966414777496, "grad_norm": 0.26183666165205954, "learning_rate": 3.8067613024927506e-06, "loss": 0.4612, "step": 9447 }, { "epoch": 2.644276518331934, "grad_norm": 0.2541657204587236, "learning_rate": 3.8008598016838094e-06, "loss": 0.4394, "step": 9448 }, { "epoch": 2.644556395186118, "grad_norm": 0.2620238517426079, "learning_rate": 3.7949626981348964e-06, "loss": 0.4571, "step": 9449 }, { "epoch": 2.644836272040302, "grad_norm": 0.24271353893657108, "learning_rate": 3.789069992407296e-06, "loss": 0.4488, "step": 9450 }, { "epoch": 2.6451161488944863, "grad_norm": 0.2525646148455708, "learning_rate": 3.783181685061876e-06, "loss": 0.4551, "step": 9451 }, { "epoch": 2.6453960257486706, "grad_norm": 0.24362772636513352, "learning_rate": 3.7772977766590943e-06, "loss": 0.4566, "step": 9452 }, { "epoch": 2.645675902602855, "grad_norm": 0.24968313358726013, "learning_rate": 3.7714182677589804e-06, "loss": 0.4522, "step": 9453 }, { "epoch": 2.6459557794570387, "grad_norm": 0.26749174667974374, "learning_rate": 3.7655431589211422e-06, "loss": 0.4662, "step": 9454 }, { "epoch": 2.646235656311223, "grad_norm": 0.25207039781310786, "learning_rate": 3.7596724507047932e-06, "loss": 0.4511, "step": 9455 }, { "epoch": 2.6465155331654073, "grad_norm": 0.26139214837358166, "learning_rate": 3.753806143668703e-06, "loss": 0.4604, "step": 9456 }, { "epoch": 2.6467954100195916, "grad_norm": 0.25471814006145055, "learning_rate": 3.7479442383712316e-06, "loss": 0.4343, "step": 9457 }, { "epoch": 2.6470752868737755, "grad_norm": 0.26040601131187685, "learning_rate": 3.7420867353703147e-06, "loss": 0.4602, "step": 9458 }, { "epoch": 2.6473551637279598, "grad_norm": 0.24031830715623834, "learning_rate": 3.736233635223474e-06, "loss": 0.4458, "step": 9459 }, { "epoch": 2.647635040582144, "grad_norm": 0.24991718967960128, "learning_rate": 3.7303849384878076e-06, "loss": 0.4533, "step": 9460 }, { "epoch": 2.647914917436328, "grad_norm": 0.25381014077048275, "learning_rate": 3.724540645720004e-06, "loss": 0.4533, "step": 9461 }, { "epoch": 2.648194794290512, "grad_norm": 0.24513905116283924, "learning_rate": 3.7187007574763232e-06, "loss": 0.4207, "step": 9462 }, { "epoch": 2.6484746711446965, "grad_norm": 0.2622613581031185, "learning_rate": 3.712865274312616e-06, "loss": 0.4519, "step": 9463 }, { "epoch": 2.6487545479988803, "grad_norm": 0.25305479769519595, "learning_rate": 3.7070341967842926e-06, "loss": 0.4307, "step": 9464 }, { "epoch": 2.6490344248530646, "grad_norm": 0.2369397572733977, "learning_rate": 3.701207525446365e-06, "loss": 0.4281, "step": 9465 }, { "epoch": 2.649314301707249, "grad_norm": 0.25102184052885873, "learning_rate": 3.695385260853418e-06, "loss": 0.4733, "step": 9466 }, { "epoch": 2.6495941785614328, "grad_norm": 0.2892932955279966, "learning_rate": 3.689567403559624e-06, "loss": 0.4563, "step": 9467 }, { "epoch": 2.649874055415617, "grad_norm": 0.2542357110750897, "learning_rate": 3.6837539541187238e-06, "loss": 0.4327, "step": 9468 }, { "epoch": 2.6501539322698013, "grad_norm": 0.2576517441763403, "learning_rate": 3.677944913084047e-06, "loss": 0.4507, "step": 9469 }, { "epoch": 2.650433809123985, "grad_norm": 0.2642765121906414, "learning_rate": 3.6721402810085015e-06, "loss": 0.4581, "step": 9470 }, { "epoch": 2.6507136859781695, "grad_norm": 0.2603716715661713, "learning_rate": 3.6663400584445616e-06, "loss": 0.4487, "step": 9471 }, { "epoch": 2.6509935628323538, "grad_norm": 0.24098686381718468, "learning_rate": 3.660544245944325e-06, "loss": 0.4379, "step": 9472 }, { "epoch": 2.651273439686538, "grad_norm": 0.2604086414853814, "learning_rate": 3.654752844059417e-06, "loss": 0.4596, "step": 9473 }, { "epoch": 2.6515533165407223, "grad_norm": 0.2701012382621206, "learning_rate": 3.6489658533410797e-06, "loss": 0.4603, "step": 9474 }, { "epoch": 2.651833193394906, "grad_norm": 0.255398830252583, "learning_rate": 3.643183274340112e-06, "loss": 0.4407, "step": 9475 }, { "epoch": 2.6521130702490905, "grad_norm": 0.25049589227035957, "learning_rate": 3.6374051076069115e-06, "loss": 0.4466, "step": 9476 }, { "epoch": 2.652392947103275, "grad_norm": 0.25494597161481597, "learning_rate": 3.631631353691439e-06, "loss": 0.4388, "step": 9477 }, { "epoch": 2.6526728239574586, "grad_norm": 0.25292281522898435, "learning_rate": 3.62586201314325e-06, "loss": 0.4354, "step": 9478 }, { "epoch": 2.652952700811643, "grad_norm": 0.2521383821512615, "learning_rate": 3.6200970865114704e-06, "loss": 0.4563, "step": 9479 }, { "epoch": 2.653232577665827, "grad_norm": 0.25445549286093044, "learning_rate": 3.614336574344812e-06, "loss": 0.4499, "step": 9480 }, { "epoch": 2.653512454520011, "grad_norm": 0.2571486008365219, "learning_rate": 3.608580477191559e-06, "loss": 0.4608, "step": 9481 }, { "epoch": 2.6537923313741953, "grad_norm": 0.2515101810049176, "learning_rate": 3.6028287955995943e-06, "loss": 0.4131, "step": 9482 }, { "epoch": 2.6540722082283796, "grad_norm": 0.2600451628950232, "learning_rate": 3.597081530116342e-06, "loss": 0.4628, "step": 9483 }, { "epoch": 2.6543520850825635, "grad_norm": 0.24947153800241603, "learning_rate": 3.5913386812888474e-06, "loss": 0.4456, "step": 9484 }, { "epoch": 2.6546319619367478, "grad_norm": 0.25041000642750527, "learning_rate": 3.585600249663712e-06, "loss": 0.4572, "step": 9485 }, { "epoch": 2.654911838790932, "grad_norm": 0.2531926139912886, "learning_rate": 3.5798662357871217e-06, "loss": 0.4379, "step": 9486 }, { "epoch": 2.655191715645116, "grad_norm": 0.27303601220831636, "learning_rate": 3.574136640204845e-06, "loss": 0.4742, "step": 9487 }, { "epoch": 2.6554715924993, "grad_norm": 0.25279944241663, "learning_rate": 3.568411463462229e-06, "loss": 0.4476, "step": 9488 }, { "epoch": 2.6557514693534845, "grad_norm": 0.25596048011253975, "learning_rate": 3.5626907061041937e-06, "loss": 0.4591, "step": 9489 }, { "epoch": 2.656031346207669, "grad_norm": 0.264374812286928, "learning_rate": 3.556974368675253e-06, "loss": 0.467, "step": 9490 }, { "epoch": 2.6563112230618526, "grad_norm": 0.2633281828279678, "learning_rate": 3.5512624517194893e-06, "loss": 0.4556, "step": 9491 }, { "epoch": 2.656591099916037, "grad_norm": 0.24839076237085658, "learning_rate": 3.545554955780567e-06, "loss": 0.4624, "step": 9492 }, { "epoch": 2.656870976770221, "grad_norm": 0.2552202991168326, "learning_rate": 3.5398518814017177e-06, "loss": 0.436, "step": 9493 }, { "epoch": 2.6571508536244055, "grad_norm": 0.25713779284626853, "learning_rate": 3.5341532291257806e-06, "loss": 0.462, "step": 9494 }, { "epoch": 2.6574307304785894, "grad_norm": 0.2566118638588749, "learning_rate": 3.5284589994951435e-06, "loss": 0.4532, "step": 9495 }, { "epoch": 2.6577106073327736, "grad_norm": 0.2529663695003584, "learning_rate": 3.522769193051789e-06, "loss": 0.4342, "step": 9496 }, { "epoch": 2.657990484186958, "grad_norm": 0.24959068090001982, "learning_rate": 3.517083810337274e-06, "loss": 0.4337, "step": 9497 }, { "epoch": 2.658270361041142, "grad_norm": 0.25888889878029636, "learning_rate": 3.5114028518927476e-06, "loss": 0.4459, "step": 9498 }, { "epoch": 2.658550237895326, "grad_norm": 0.251809556835883, "learning_rate": 3.505726318258912e-06, "loss": 0.4438, "step": 9499 }, { "epoch": 2.6588301147495104, "grad_norm": 0.23883640860702854, "learning_rate": 3.500054209976078e-06, "loss": 0.4742, "step": 9500 }, { "epoch": 2.659109991603694, "grad_norm": 0.2535837618499653, "learning_rate": 3.4943865275841036e-06, "loss": 0.4264, "step": 9501 }, { "epoch": 2.6593898684578785, "grad_norm": 0.24931965791720442, "learning_rate": 3.4887232716224515e-06, "loss": 0.4559, "step": 9502 }, { "epoch": 2.659669745312063, "grad_norm": 0.2566775946151293, "learning_rate": 3.4830644426301516e-06, "loss": 0.4653, "step": 9503 }, { "epoch": 2.6599496221662466, "grad_norm": 0.26540747561173816, "learning_rate": 3.477410041145818e-06, "loss": 0.452, "step": 9504 }, { "epoch": 2.660229499020431, "grad_norm": 0.23606934915419622, "learning_rate": 3.47176006770763e-06, "loss": 0.4565, "step": 9505 }, { "epoch": 2.6605093758746152, "grad_norm": 0.2501107069181252, "learning_rate": 3.4661145228533587e-06, "loss": 0.4552, "step": 9506 }, { "epoch": 2.660789252728799, "grad_norm": 0.268483862398106, "learning_rate": 3.4604734071203625e-06, "loss": 0.457, "step": 9507 }, { "epoch": 2.6610691295829834, "grad_norm": 0.24398110463413233, "learning_rate": 3.4548367210455514e-06, "loss": 0.4344, "step": 9508 }, { "epoch": 2.6613490064371677, "grad_norm": 0.25772212739615136, "learning_rate": 3.4492044651654344e-06, "loss": 0.47, "step": 9509 }, { "epoch": 2.661628883291352, "grad_norm": 0.2649890920852127, "learning_rate": 3.443576640016094e-06, "loss": 0.4575, "step": 9510 }, { "epoch": 2.6619087601455362, "grad_norm": 0.246274944821003, "learning_rate": 3.437953246133191e-06, "loss": 0.4419, "step": 9511 }, { "epoch": 2.66218863699972, "grad_norm": 0.25328972822913165, "learning_rate": 3.4323342840519523e-06, "loss": 0.4425, "step": 9512 }, { "epoch": 2.6624685138539044, "grad_norm": 0.25838070294669013, "learning_rate": 3.426719754307206e-06, "loss": 0.4321, "step": 9513 }, { "epoch": 2.6627483907080887, "grad_norm": 0.2529082247004486, "learning_rate": 3.421109657433341e-06, "loss": 0.428, "step": 9514 }, { "epoch": 2.6630282675622725, "grad_norm": 0.25964210499745244, "learning_rate": 3.4155039939643306e-06, "loss": 0.4622, "step": 9515 }, { "epoch": 2.663308144416457, "grad_norm": 0.25382490703939575, "learning_rate": 3.409902764433726e-06, "loss": 0.4486, "step": 9516 }, { "epoch": 2.663588021270641, "grad_norm": 0.2592529548773974, "learning_rate": 3.4043059693746546e-06, "loss": 0.4728, "step": 9517 }, { "epoch": 2.663867898124825, "grad_norm": 0.25246685281677483, "learning_rate": 3.3987136093198255e-06, "loss": 0.4477, "step": 9518 }, { "epoch": 2.6641477749790092, "grad_norm": 0.2621064650997339, "learning_rate": 3.3931256848015123e-06, "loss": 0.4512, "step": 9519 }, { "epoch": 2.6644276518331935, "grad_norm": 0.25466340259682224, "learning_rate": 3.3875421963515842e-06, "loss": 0.4609, "step": 9520 }, { "epoch": 2.6647075286873774, "grad_norm": 0.26012689481029794, "learning_rate": 3.381963144501482e-06, "loss": 0.4337, "step": 9521 }, { "epoch": 2.6649874055415617, "grad_norm": 0.24874050743360282, "learning_rate": 3.376388529782215e-06, "loss": 0.4477, "step": 9522 }, { "epoch": 2.665267282395746, "grad_norm": 0.2606179998971198, "learning_rate": 3.370818352724381e-06, "loss": 0.4227, "step": 9523 }, { "epoch": 2.66554715924993, "grad_norm": 0.2579114898590253, "learning_rate": 3.3652526138581665e-06, "loss": 0.4414, "step": 9524 }, { "epoch": 2.665827036104114, "grad_norm": 0.25710483426366676, "learning_rate": 3.3596913137133033e-06, "loss": 0.4643, "step": 9525 }, { "epoch": 2.6661069129582984, "grad_norm": 0.2469217533571249, "learning_rate": 3.35413445281913e-06, "loss": 0.4401, "step": 9526 }, { "epoch": 2.6663867898124827, "grad_norm": 0.2670611971362608, "learning_rate": 3.3485820317045503e-06, "loss": 0.4793, "step": 9527 }, { "epoch": 2.6666666666666665, "grad_norm": 0.26109629252961764, "learning_rate": 3.3430340508980418e-06, "loss": 0.4336, "step": 9528 }, { "epoch": 2.666946543520851, "grad_norm": 0.2542831554154449, "learning_rate": 3.3374905109276712e-06, "loss": 0.446, "step": 9529 }, { "epoch": 2.667226420375035, "grad_norm": 0.25660788217170455, "learning_rate": 3.331951412321066e-06, "loss": 0.4537, "step": 9530 }, { "epoch": 2.6675062972292194, "grad_norm": 0.258753500608575, "learning_rate": 3.3264167556054493e-06, "loss": 0.4629, "step": 9531 }, { "epoch": 2.6677861740834032, "grad_norm": 0.24303736973262738, "learning_rate": 3.3208865413076106e-06, "loss": 0.4452, "step": 9532 }, { "epoch": 2.6680660509375875, "grad_norm": 0.2546674566555844, "learning_rate": 3.315360769953918e-06, "loss": 0.4519, "step": 9533 }, { "epoch": 2.668345927791772, "grad_norm": 0.2548655228739014, "learning_rate": 3.3098394420703226e-06, "loss": 0.4639, "step": 9534 }, { "epoch": 2.6686258046459557, "grad_norm": 0.2595714172104206, "learning_rate": 3.304322558182338e-06, "loss": 0.4634, "step": 9535 }, { "epoch": 2.66890568150014, "grad_norm": 0.2591402840263963, "learning_rate": 3.2988101188150767e-06, "loss": 0.4301, "step": 9536 }, { "epoch": 2.6691855583543243, "grad_norm": 0.259532454648795, "learning_rate": 3.2933021244932027e-06, "loss": 0.4635, "step": 9537 }, { "epoch": 2.669465435208508, "grad_norm": 0.24871495624519288, "learning_rate": 3.287798575740969e-06, "loss": 0.4395, "step": 9538 }, { "epoch": 2.6697453120626924, "grad_norm": 0.256465366145869, "learning_rate": 3.2822994730822177e-06, "loss": 0.4419, "step": 9539 }, { "epoch": 2.6700251889168767, "grad_norm": 0.2581741144493468, "learning_rate": 3.276804817040352e-06, "loss": 0.4438, "step": 9540 }, { "epoch": 2.6703050657710605, "grad_norm": 0.2608276603552817, "learning_rate": 3.2713146081383484e-06, "loss": 0.4527, "step": 9541 }, { "epoch": 2.670584942625245, "grad_norm": 0.25669486283378345, "learning_rate": 3.265828846898783e-06, "loss": 0.4629, "step": 9542 }, { "epoch": 2.670864819479429, "grad_norm": 0.2553481115471904, "learning_rate": 3.2603475338437828e-06, "loss": 0.4516, "step": 9543 }, { "epoch": 2.671144696333613, "grad_norm": 0.2568392811345736, "learning_rate": 3.2548706694950693e-06, "loss": 0.4594, "step": 9544 }, { "epoch": 2.6714245731877972, "grad_norm": 0.25504560907747265, "learning_rate": 3.249398254373931e-06, "loss": 0.4539, "step": 9545 }, { "epoch": 2.6717044500419815, "grad_norm": 0.2583480188591699, "learning_rate": 3.2439302890012346e-06, "loss": 0.4332, "step": 9546 }, { "epoch": 2.671984326896166, "grad_norm": 0.2462940740601506, "learning_rate": 3.2384667738974196e-06, "loss": 0.4526, "step": 9547 }, { "epoch": 2.67226420375035, "grad_norm": 0.25364777033394637, "learning_rate": 3.233007709582514e-06, "loss": 0.4453, "step": 9548 }, { "epoch": 2.672544080604534, "grad_norm": 0.25792848970974264, "learning_rate": 3.227553096576108e-06, "loss": 0.448, "step": 9549 }, { "epoch": 2.6728239574587183, "grad_norm": 0.25744147779501747, "learning_rate": 3.222102935397381e-06, "loss": 0.4671, "step": 9550 }, { "epoch": 2.6731038343129025, "grad_norm": 0.24928936236121033, "learning_rate": 3.2166572265650787e-06, "loss": 0.4351, "step": 9551 }, { "epoch": 2.6733837111670864, "grad_norm": 0.25632576413837144, "learning_rate": 3.2112159705975254e-06, "loss": 0.4449, "step": 9552 }, { "epoch": 2.6736635880212707, "grad_norm": 0.2506734509361175, "learning_rate": 3.205779168012624e-06, "loss": 0.4504, "step": 9553 }, { "epoch": 2.673943464875455, "grad_norm": 0.2502185843859141, "learning_rate": 3.200346819327865e-06, "loss": 0.442, "step": 9554 }, { "epoch": 2.674223341729639, "grad_norm": 0.24697086837287455, "learning_rate": 3.194918925060281e-06, "loss": 0.4464, "step": 9555 }, { "epoch": 2.674503218583823, "grad_norm": 0.24180270927745726, "learning_rate": 3.1894954857265126e-06, "loss": 0.4651, "step": 9556 }, { "epoch": 2.6747830954380074, "grad_norm": 0.24510244175229476, "learning_rate": 3.1840765018427642e-06, "loss": 0.4463, "step": 9557 }, { "epoch": 2.6750629722921913, "grad_norm": 0.2627364790670975, "learning_rate": 3.1786619739248123e-06, "loss": 0.456, "step": 9558 }, { "epoch": 2.6753428491463755, "grad_norm": 0.23953547585015808, "learning_rate": 3.1732519024880227e-06, "loss": 0.45, "step": 9559 }, { "epoch": 2.67562272600056, "grad_norm": 0.24941273014969406, "learning_rate": 3.1678462880473324e-06, "loss": 0.4421, "step": 9560 }, { "epoch": 2.6759026028547437, "grad_norm": 0.24889410295356765, "learning_rate": 3.1624451311172475e-06, "loss": 0.4548, "step": 9561 }, { "epoch": 2.676182479708928, "grad_norm": 0.24757414595847224, "learning_rate": 3.1570484322118454e-06, "loss": 0.4619, "step": 9562 }, { "epoch": 2.6764623565631123, "grad_norm": 0.255381341343547, "learning_rate": 3.151656191844793e-06, "loss": 0.4509, "step": 9563 }, { "epoch": 2.676742233417296, "grad_norm": 0.2545565791122728, "learning_rate": 3.1462684105293293e-06, "loss": 0.44, "step": 9564 }, { "epoch": 2.6770221102714804, "grad_norm": 0.2601252350876901, "learning_rate": 3.1408850887782615e-06, "loss": 0.4648, "step": 9565 }, { "epoch": 2.6773019871256647, "grad_norm": 0.2541944562033961, "learning_rate": 3.135506227103985e-06, "loss": 0.4658, "step": 9566 }, { "epoch": 2.677581863979849, "grad_norm": 0.25952499281932945, "learning_rate": 3.130131826018451e-06, "loss": 0.4576, "step": 9567 }, { "epoch": 2.6778617408340333, "grad_norm": 0.24352318330194347, "learning_rate": 3.1247618860332064e-06, "loss": 0.4568, "step": 9568 }, { "epoch": 2.678141617688217, "grad_norm": 0.25483798914707423, "learning_rate": 3.1193964076593583e-06, "loss": 0.4597, "step": 9569 }, { "epoch": 2.6784214945424014, "grad_norm": 0.2664555666188415, "learning_rate": 3.114035391407605e-06, "loss": 0.4431, "step": 9570 }, { "epoch": 2.6787013713965857, "grad_norm": 0.2558920128272072, "learning_rate": 3.1086788377882037e-06, "loss": 0.4584, "step": 9571 }, { "epoch": 2.6789812482507696, "grad_norm": 0.2564401741229892, "learning_rate": 3.103326747311008e-06, "loss": 0.4421, "step": 9572 }, { "epoch": 2.679261125104954, "grad_norm": 0.261257903348952, "learning_rate": 3.0979791204854116e-06, "loss": 0.4442, "step": 9573 }, { "epoch": 2.679541001959138, "grad_norm": 0.24728596928066218, "learning_rate": 3.0926359578204168e-06, "loss": 0.4207, "step": 9574 }, { "epoch": 2.679820878813322, "grad_norm": 0.25384082637236177, "learning_rate": 3.0872972598245785e-06, "loss": 0.4727, "step": 9575 }, { "epoch": 2.6801007556675063, "grad_norm": 0.25155267496967193, "learning_rate": 3.0819630270060518e-06, "loss": 0.4465, "step": 9576 }, { "epoch": 2.6803806325216906, "grad_norm": 0.2514218639113108, "learning_rate": 3.0766332598725413e-06, "loss": 0.4471, "step": 9577 }, { "epoch": 2.6806605093758744, "grad_norm": 0.24556715410688237, "learning_rate": 3.0713079589313466e-06, "loss": 0.456, "step": 9578 }, { "epoch": 2.6809403862300587, "grad_norm": 0.23776124231133908, "learning_rate": 3.0659871246893292e-06, "loss": 0.4488, "step": 9579 }, { "epoch": 2.681220263084243, "grad_norm": 0.25766891290935423, "learning_rate": 3.0606707576529225e-06, "loss": 0.4234, "step": 9580 }, { "epoch": 2.681500139938427, "grad_norm": 0.24452010593592108, "learning_rate": 3.0553588583281444e-06, "loss": 0.4293, "step": 9581 }, { "epoch": 2.681780016792611, "grad_norm": 0.2598302592284333, "learning_rate": 3.0500514272205906e-06, "loss": 0.4497, "step": 9582 }, { "epoch": 2.6820598936467954, "grad_norm": 0.25562772663169386, "learning_rate": 3.044748464835423e-06, "loss": 0.4617, "step": 9583 }, { "epoch": 2.6823397705009797, "grad_norm": 0.25476205270862523, "learning_rate": 3.039449971677377e-06, "loss": 0.4478, "step": 9584 }, { "epoch": 2.682619647355164, "grad_norm": 0.2538151461762847, "learning_rate": 3.0341559482507666e-06, "loss": 0.4395, "step": 9585 }, { "epoch": 2.682899524209348, "grad_norm": 0.26844670171940377, "learning_rate": 3.0288663950594766e-06, "loss": 0.4312, "step": 9586 }, { "epoch": 2.683179401063532, "grad_norm": 0.25131678810625624, "learning_rate": 3.0235813126069822e-06, "loss": 0.4441, "step": 9587 }, { "epoch": 2.6834592779177164, "grad_norm": 0.265383114183847, "learning_rate": 3.0183007013963092e-06, "loss": 0.4467, "step": 9588 }, { "epoch": 2.6837391547719003, "grad_norm": 0.2598004507884646, "learning_rate": 3.0130245619300666e-06, "loss": 0.4436, "step": 9589 }, { "epoch": 2.6840190316260846, "grad_norm": 0.25780086768921223, "learning_rate": 3.0077528947104585e-06, "loss": 0.4396, "step": 9590 }, { "epoch": 2.684298908480269, "grad_norm": 0.25261973652628045, "learning_rate": 3.002485700239227e-06, "loss": 0.4565, "step": 9591 }, { "epoch": 2.6845787853344527, "grad_norm": 0.25393925572197334, "learning_rate": 2.997222979017711e-06, "loss": 0.4396, "step": 9592 }, { "epoch": 2.684858662188637, "grad_norm": 0.2381658079414295, "learning_rate": 2.9919647315468093e-06, "loss": 0.4691, "step": 9593 }, { "epoch": 2.6851385390428213, "grad_norm": 0.2715883406636102, "learning_rate": 2.986710958327027e-06, "loss": 0.4633, "step": 9594 }, { "epoch": 2.685418415897005, "grad_norm": 0.255247208575035, "learning_rate": 2.9814616598584087e-06, "loss": 0.4324, "step": 9595 }, { "epoch": 2.6856982927511894, "grad_norm": 0.29036514800454216, "learning_rate": 2.9762168366405886e-06, "loss": 0.4661, "step": 9596 }, { "epoch": 2.6859781696053737, "grad_norm": 0.2518865790086803, "learning_rate": 2.970976489172772e-06, "loss": 0.4788, "step": 9597 }, { "epoch": 2.6862580464595576, "grad_norm": 0.24712504303747004, "learning_rate": 2.965740617953733e-06, "loss": 0.4477, "step": 9598 }, { "epoch": 2.686537923313742, "grad_norm": 0.2607184257165128, "learning_rate": 2.960509223481828e-06, "loss": 0.4553, "step": 9599 }, { "epoch": 2.686817800167926, "grad_norm": 0.25130344370407304, "learning_rate": 2.955282306254986e-06, "loss": 0.4459, "step": 9600 }, { "epoch": 2.68709767702211, "grad_norm": 0.24468795689000825, "learning_rate": 2.9500598667707045e-06, "loss": 0.4376, "step": 9601 }, { "epoch": 2.6873775538762943, "grad_norm": 0.240846706524123, "learning_rate": 2.9448419055260634e-06, "loss": 0.4311, "step": 9602 }, { "epoch": 2.6876574307304786, "grad_norm": 0.2506620901909148, "learning_rate": 2.939628423017704e-06, "loss": 0.4398, "step": 9603 }, { "epoch": 2.687937307584663, "grad_norm": 0.2546535852948708, "learning_rate": 2.934419419741852e-06, "loss": 0.4481, "step": 9604 }, { "epoch": 2.688217184438847, "grad_norm": 0.25635196813437233, "learning_rate": 2.9292148961943044e-06, "loss": 0.4631, "step": 9605 }, { "epoch": 2.688497061293031, "grad_norm": 0.26961047587511755, "learning_rate": 2.9240148528704326e-06, "loss": 0.4471, "step": 9606 }, { "epoch": 2.6887769381472153, "grad_norm": 0.2502754243244984, "learning_rate": 2.9188192902651736e-06, "loss": 0.4387, "step": 9607 }, { "epoch": 2.6890568150013996, "grad_norm": 0.2557472176361832, "learning_rate": 2.9136282088730536e-06, "loss": 0.4576, "step": 9608 }, { "epoch": 2.6893366918555834, "grad_norm": 0.24776733623736844, "learning_rate": 2.9084416091881506e-06, "loss": 0.4406, "step": 9609 }, { "epoch": 2.6896165687097677, "grad_norm": 0.25696230575270806, "learning_rate": 2.9032594917041244e-06, "loss": 0.4502, "step": 9610 }, { "epoch": 2.689896445563952, "grad_norm": 0.25842164096611264, "learning_rate": 2.898081856914231e-06, "loss": 0.4441, "step": 9611 }, { "epoch": 2.690176322418136, "grad_norm": 0.2621535345165615, "learning_rate": 2.89290870531127e-06, "loss": 0.4611, "step": 9612 }, { "epoch": 2.69045619927232, "grad_norm": 0.2474173482149627, "learning_rate": 2.887740037387626e-06, "loss": 0.4523, "step": 9613 }, { "epoch": 2.6907360761265045, "grad_norm": 0.2490375360888736, "learning_rate": 2.88257585363525e-06, "loss": 0.4476, "step": 9614 }, { "epoch": 2.6910159529806883, "grad_norm": 0.2529025213225243, "learning_rate": 2.877416154545681e-06, "loss": 0.4385, "step": 9615 }, { "epoch": 2.6912958298348726, "grad_norm": 0.2511885081465574, "learning_rate": 2.872260940610022e-06, "loss": 0.4278, "step": 9616 }, { "epoch": 2.691575706689057, "grad_norm": 0.2510840460673405, "learning_rate": 2.8671102123189408e-06, "loss": 0.4429, "step": 9617 }, { "epoch": 2.6918555835432407, "grad_norm": 0.2603164784694938, "learning_rate": 2.86196397016269e-06, "loss": 0.4453, "step": 9618 }, { "epoch": 2.692135460397425, "grad_norm": 0.2583463118208237, "learning_rate": 2.856822214631094e-06, "loss": 0.4746, "step": 9619 }, { "epoch": 2.6924153372516093, "grad_norm": 0.2554763338118534, "learning_rate": 2.8516849462135454e-06, "loss": 0.4507, "step": 9620 }, { "epoch": 2.6926952141057936, "grad_norm": 0.2585290063608683, "learning_rate": 2.8465521653990135e-06, "loss": 0.4483, "step": 9621 }, { "epoch": 2.6929750909599774, "grad_norm": 0.2623714587562958, "learning_rate": 2.8414238726760414e-06, "loss": 0.447, "step": 9622 }, { "epoch": 2.6932549678141617, "grad_norm": 0.2659255301786653, "learning_rate": 2.8363000685327436e-06, "loss": 0.466, "step": 9623 }, { "epoch": 2.693534844668346, "grad_norm": 0.2609359558585346, "learning_rate": 2.8311807534568024e-06, "loss": 0.4496, "step": 9624 }, { "epoch": 2.6938147215225303, "grad_norm": 0.26097482483463635, "learning_rate": 2.826065927935473e-06, "loss": 0.4396, "step": 9625 }, { "epoch": 2.694094598376714, "grad_norm": 0.25537061492759044, "learning_rate": 2.8209555924556042e-06, "loss": 0.4691, "step": 9626 }, { "epoch": 2.6943744752308985, "grad_norm": 0.25011120329684866, "learning_rate": 2.8158497475035792e-06, "loss": 0.4659, "step": 9627 }, { "epoch": 2.6946543520850827, "grad_norm": 0.26208656472724784, "learning_rate": 2.810748393565388e-06, "loss": 0.4572, "step": 9628 }, { "epoch": 2.6949342289392666, "grad_norm": 0.2606565840292971, "learning_rate": 2.8056515311265795e-06, "loss": 0.4587, "step": 9629 }, { "epoch": 2.695214105793451, "grad_norm": 0.2589436096839342, "learning_rate": 2.8005591606722727e-06, "loss": 0.4438, "step": 9630 }, { "epoch": 2.695493982647635, "grad_norm": 0.25800776062159353, "learning_rate": 2.7954712826871622e-06, "loss": 0.4357, "step": 9631 }, { "epoch": 2.695773859501819, "grad_norm": 0.2608140540071773, "learning_rate": 2.7903878976555163e-06, "loss": 0.45, "step": 9632 }, { "epoch": 2.6960537363560033, "grad_norm": 0.2650190509760145, "learning_rate": 2.785309006061176e-06, "loss": 0.4384, "step": 9633 }, { "epoch": 2.6963336132101876, "grad_norm": 0.24931993941455413, "learning_rate": 2.780234608387555e-06, "loss": 0.446, "step": 9634 }, { "epoch": 2.6966134900643715, "grad_norm": 0.2658399973734569, "learning_rate": 2.7751647051176278e-06, "loss": 0.4471, "step": 9635 }, { "epoch": 2.6968933669185557, "grad_norm": 0.24952100274742145, "learning_rate": 2.7700992967339633e-06, "loss": 0.4729, "step": 9636 }, { "epoch": 2.69717324377274, "grad_norm": 0.26158161361345517, "learning_rate": 2.7650383837186823e-06, "loss": 0.4642, "step": 9637 }, { "epoch": 2.697453120626924, "grad_norm": 0.2631291321973273, "learning_rate": 2.759981966553482e-06, "loss": 0.4264, "step": 9638 }, { "epoch": 2.697732997481108, "grad_norm": 0.2575367709015392, "learning_rate": 2.754930045719645e-06, "loss": 0.4597, "step": 9639 }, { "epoch": 2.6980128743352925, "grad_norm": 0.2564659384915359, "learning_rate": 2.7498826216980022e-06, "loss": 0.4723, "step": 9640 }, { "epoch": 2.6982927511894768, "grad_norm": 0.2662945491752931, "learning_rate": 2.744839694968987e-06, "loss": 0.4575, "step": 9641 }, { "epoch": 2.698572628043661, "grad_norm": 0.2607535522450551, "learning_rate": 2.739801266012576e-06, "loss": 0.4441, "step": 9642 }, { "epoch": 2.698852504897845, "grad_norm": 0.2568796561308157, "learning_rate": 2.734767335308336e-06, "loss": 0.4569, "step": 9643 }, { "epoch": 2.699132381752029, "grad_norm": 0.2604173575784273, "learning_rate": 2.7297379033353944e-06, "loss": 0.4486, "step": 9644 }, { "epoch": 2.6994122586062135, "grad_norm": 0.2546225387213733, "learning_rate": 2.7247129705724572e-06, "loss": 0.4546, "step": 9645 }, { "epoch": 2.6996921354603973, "grad_norm": 0.24754408215495408, "learning_rate": 2.7196925374977978e-06, "loss": 0.4539, "step": 9646 }, { "epoch": 2.6999720123145816, "grad_norm": 0.26432126577422416, "learning_rate": 2.7146766045892722e-06, "loss": 0.448, "step": 9647 }, { "epoch": 2.700251889168766, "grad_norm": 0.25476615361813054, "learning_rate": 2.709665172324288e-06, "loss": 0.4516, "step": 9648 }, { "epoch": 2.7005317660229498, "grad_norm": 0.26414146297212104, "learning_rate": 2.7046582411798473e-06, "loss": 0.4379, "step": 9649 }, { "epoch": 2.700811642877134, "grad_norm": 0.2524114589093038, "learning_rate": 2.6996558116325067e-06, "loss": 0.4514, "step": 9650 }, { "epoch": 2.7010915197313183, "grad_norm": 0.25452540330895845, "learning_rate": 2.6946578841583968e-06, "loss": 0.4602, "step": 9651 }, { "epoch": 2.701371396585502, "grad_norm": 0.256703708724523, "learning_rate": 2.689664459233232e-06, "loss": 0.4305, "step": 9652 }, { "epoch": 2.7016512734396865, "grad_norm": 0.26223148502940047, "learning_rate": 2.684675537332287e-06, "loss": 0.4298, "step": 9653 }, { "epoch": 2.7019311502938708, "grad_norm": 0.262019513621444, "learning_rate": 2.6796911189304053e-06, "loss": 0.4517, "step": 9654 }, { "epoch": 2.7022110271480546, "grad_norm": 0.2604320882640914, "learning_rate": 2.6747112045020063e-06, "loss": 0.4731, "step": 9655 }, { "epoch": 2.702490904002239, "grad_norm": 0.2451978939009998, "learning_rate": 2.669735794521089e-06, "loss": 0.4373, "step": 9656 }, { "epoch": 2.702770780856423, "grad_norm": 0.25363039131620124, "learning_rate": 2.6647648894612076e-06, "loss": 0.4396, "step": 9657 }, { "epoch": 2.7030506577106075, "grad_norm": 0.263893339157257, "learning_rate": 2.659798489795495e-06, "loss": 0.4463, "step": 9658 }, { "epoch": 2.7033305345647913, "grad_norm": 0.27918997221012776, "learning_rate": 2.654836595996668e-06, "loss": 0.4727, "step": 9659 }, { "epoch": 2.7036104114189756, "grad_norm": 0.25622222685787027, "learning_rate": 2.6498792085369873e-06, "loss": 0.4431, "step": 9660 }, { "epoch": 2.70389028827316, "grad_norm": 0.25115136845374564, "learning_rate": 2.6449263278883086e-06, "loss": 0.4451, "step": 9661 }, { "epoch": 2.704170165127344, "grad_norm": 0.25241869160083336, "learning_rate": 2.6399779545220493e-06, "loss": 0.4358, "step": 9662 }, { "epoch": 2.704450041981528, "grad_norm": 0.25012657718988424, "learning_rate": 2.635034088909194e-06, "loss": 0.4412, "step": 9663 }, { "epoch": 2.7047299188357123, "grad_norm": 0.2529009342703803, "learning_rate": 2.630094731520311e-06, "loss": 0.4449, "step": 9664 }, { "epoch": 2.7050097956898966, "grad_norm": 0.2529681464102911, "learning_rate": 2.6251598828255177e-06, "loss": 0.4213, "step": 9665 }, { "epoch": 2.7052896725440805, "grad_norm": 0.24697254757555365, "learning_rate": 2.620229543294528e-06, "loss": 0.443, "step": 9666 }, { "epoch": 2.7055695493982648, "grad_norm": 0.2564555043163987, "learning_rate": 2.615303713396611e-06, "loss": 0.4615, "step": 9667 }, { "epoch": 2.705849426252449, "grad_norm": 0.2393032225739067, "learning_rate": 2.6103823936006078e-06, "loss": 0.4458, "step": 9668 }, { "epoch": 2.706129303106633, "grad_norm": 0.254402736502425, "learning_rate": 2.605465584374933e-06, "loss": 0.4527, "step": 9669 }, { "epoch": 2.706409179960817, "grad_norm": 0.25882846930758674, "learning_rate": 2.6005532861875736e-06, "loss": 0.473, "step": 9670 }, { "epoch": 2.7066890568150015, "grad_norm": 0.26023853810341335, "learning_rate": 2.5956454995060774e-06, "loss": 0.4664, "step": 9671 }, { "epoch": 2.7069689336691853, "grad_norm": 0.25544238437269734, "learning_rate": 2.590742224797582e-06, "loss": 0.4568, "step": 9672 }, { "epoch": 2.7072488105233696, "grad_norm": 0.2591892632742536, "learning_rate": 2.5858434625287753e-06, "loss": 0.4673, "step": 9673 }, { "epoch": 2.707528687377554, "grad_norm": 0.2537890934138441, "learning_rate": 2.5809492131659285e-06, "loss": 0.4512, "step": 9674 }, { "epoch": 2.7078085642317378, "grad_norm": 0.2601607784858347, "learning_rate": 2.5760594771748747e-06, "loss": 0.4539, "step": 9675 }, { "epoch": 2.708088441085922, "grad_norm": 0.2675710384101143, "learning_rate": 2.571174255021025e-06, "loss": 0.4671, "step": 9676 }, { "epoch": 2.7083683179401064, "grad_norm": 0.253588464582648, "learning_rate": 2.5662935471693573e-06, "loss": 0.4419, "step": 9677 }, { "epoch": 2.7086481947942906, "grad_norm": 0.25297096701091965, "learning_rate": 2.5614173540844223e-06, "loss": 0.4465, "step": 9678 }, { "epoch": 2.708928071648475, "grad_norm": 0.25726382234904843, "learning_rate": 2.556545676230332e-06, "loss": 0.4463, "step": 9679 }, { "epoch": 2.709207948502659, "grad_norm": 0.27082897903045283, "learning_rate": 2.551678514070782e-06, "loss": 0.4916, "step": 9680 }, { "epoch": 2.709487825356843, "grad_norm": 0.24652697571702764, "learning_rate": 2.5468158680690246e-06, "loss": 0.4405, "step": 9681 }, { "epoch": 2.7097677022110274, "grad_norm": 0.24925217968179736, "learning_rate": 2.5419577386879002e-06, "loss": 0.4427, "step": 9682 }, { "epoch": 2.710047579065211, "grad_norm": 0.26476711391996977, "learning_rate": 2.537104126389794e-06, "loss": 0.4589, "step": 9683 }, { "epoch": 2.7103274559193955, "grad_norm": 0.25447263864085196, "learning_rate": 2.532255031636688e-06, "loss": 0.4353, "step": 9684 }, { "epoch": 2.71060733277358, "grad_norm": 0.2563979710447523, "learning_rate": 2.5274104548901166e-06, "loss": 0.4615, "step": 9685 }, { "epoch": 2.7108872096277636, "grad_norm": 0.25158898126609197, "learning_rate": 2.52257039661119e-06, "loss": 0.4488, "step": 9686 }, { "epoch": 2.711167086481948, "grad_norm": 0.2584423608435234, "learning_rate": 2.51773485726059e-06, "loss": 0.464, "step": 9687 }, { "epoch": 2.711446963336132, "grad_norm": 0.2603578049857069, "learning_rate": 2.512903837298558e-06, "loss": 0.4399, "step": 9688 }, { "epoch": 2.711726840190316, "grad_norm": 0.26123083105238193, "learning_rate": 2.508077337184922e-06, "loss": 0.4648, "step": 9689 }, { "epoch": 2.7120067170445004, "grad_norm": 0.2469999078727203, "learning_rate": 2.503255357379064e-06, "loss": 0.4414, "step": 9690 }, { "epoch": 2.7122865938986847, "grad_norm": 0.25414093190683257, "learning_rate": 2.4984378983399504e-06, "loss": 0.4331, "step": 9691 }, { "epoch": 2.7125664707528685, "grad_norm": 0.24754182443592454, "learning_rate": 2.4936249605261032e-06, "loss": 0.4328, "step": 9692 }, { "epoch": 2.712846347607053, "grad_norm": 0.2720956891822757, "learning_rate": 2.4888165443956225e-06, "loss": 0.4463, "step": 9693 }, { "epoch": 2.713126224461237, "grad_norm": 0.2625198655714074, "learning_rate": 2.484012650406176e-06, "loss": 0.4444, "step": 9694 }, { "epoch": 2.7134061013154214, "grad_norm": 0.250325071522633, "learning_rate": 2.4792132790150026e-06, "loss": 0.4561, "step": 9695 }, { "epoch": 2.713685978169605, "grad_norm": 0.25871354229605087, "learning_rate": 2.4744184306789042e-06, "loss": 0.4501, "step": 9696 }, { "epoch": 2.7139658550237895, "grad_norm": 0.2663409146154799, "learning_rate": 2.469628105854266e-06, "loss": 0.4665, "step": 9697 }, { "epoch": 2.714245731877974, "grad_norm": 0.2572069481207055, "learning_rate": 2.464842304997023e-06, "loss": 0.4605, "step": 9698 }, { "epoch": 2.714525608732158, "grad_norm": 0.25399706969156943, "learning_rate": 2.460061028562699e-06, "loss": 0.4532, "step": 9699 }, { "epoch": 2.714805485586342, "grad_norm": 0.2574344431098343, "learning_rate": 2.4552842770063757e-06, "loss": 0.452, "step": 9700 }, { "epoch": 2.7150853624405262, "grad_norm": 0.2578065467749837, "learning_rate": 2.4505120507827105e-06, "loss": 0.4564, "step": 9701 }, { "epoch": 2.7153652392947105, "grad_norm": 0.2515542162601119, "learning_rate": 2.445744350345919e-06, "loss": 0.4532, "step": 9702 }, { "epoch": 2.7156451161488944, "grad_norm": 0.26410474275459034, "learning_rate": 2.440981176149798e-06, "loss": 0.4465, "step": 9703 }, { "epoch": 2.7159249930030787, "grad_norm": 0.25209619717054277, "learning_rate": 2.436222528647708e-06, "loss": 0.4451, "step": 9704 }, { "epoch": 2.716204869857263, "grad_norm": 0.2510633138055131, "learning_rate": 2.431468408292581e-06, "loss": 0.4477, "step": 9705 }, { "epoch": 2.716484746711447, "grad_norm": 0.27023137077398746, "learning_rate": 2.4267188155369157e-06, "loss": 0.4689, "step": 9706 }, { "epoch": 2.716764623565631, "grad_norm": 0.25424533967816504, "learning_rate": 2.4219737508327788e-06, "loss": 0.4666, "step": 9707 }, { "epoch": 2.7170445004198154, "grad_norm": 0.26711550608448836, "learning_rate": 2.4172332146318145e-06, "loss": 0.444, "step": 9708 }, { "epoch": 2.7173243772739992, "grad_norm": 0.2612666601638906, "learning_rate": 2.412497207385228e-06, "loss": 0.4565, "step": 9709 }, { "epoch": 2.7176042541281835, "grad_norm": 0.25180959549909626, "learning_rate": 2.4077657295437872e-06, "loss": 0.436, "step": 9710 }, { "epoch": 2.717884130982368, "grad_norm": 0.2537135056863, "learning_rate": 2.403038781557848e-06, "loss": 0.4567, "step": 9711 }, { "epoch": 2.7181640078365517, "grad_norm": 0.2666149702062973, "learning_rate": 2.3983163638773175e-06, "loss": 0.4445, "step": 9712 }, { "epoch": 2.718443884690736, "grad_norm": 0.25902707631329347, "learning_rate": 2.3935984769516807e-06, "loss": 0.4209, "step": 9713 }, { "epoch": 2.7187237615449202, "grad_norm": 0.2588325646202454, "learning_rate": 2.3888851212299832e-06, "loss": 0.4675, "step": 9714 }, { "epoch": 2.7190036383991045, "grad_norm": 0.2562007232326913, "learning_rate": 2.3841762971608506e-06, "loss": 0.4555, "step": 9715 }, { "epoch": 2.719283515253289, "grad_norm": 0.25269977295520657, "learning_rate": 2.3794720051924677e-06, "loss": 0.4507, "step": 9716 }, { "epoch": 2.7195633921074727, "grad_norm": 0.2525499154762477, "learning_rate": 2.3747722457725996e-06, "loss": 0.471, "step": 9717 }, { "epoch": 2.719843268961657, "grad_norm": 0.2579482568280547, "learning_rate": 2.37007701934856e-06, "loss": 0.4394, "step": 9718 }, { "epoch": 2.7201231458158412, "grad_norm": 0.2508807784812661, "learning_rate": 2.3653863263672527e-06, "loss": 0.4592, "step": 9719 }, { "epoch": 2.720403022670025, "grad_norm": 0.2530849878278066, "learning_rate": 2.360700167275137e-06, "loss": 0.4403, "step": 9720 }, { "epoch": 2.7206828995242094, "grad_norm": 0.25064367633886986, "learning_rate": 2.3560185425182458e-06, "loss": 0.4546, "step": 9721 }, { "epoch": 2.7209627763783937, "grad_norm": 0.24754252052662992, "learning_rate": 2.351341452542177e-06, "loss": 0.4347, "step": 9722 }, { "epoch": 2.7212426532325775, "grad_norm": 0.24698393700365023, "learning_rate": 2.3466688977920923e-06, "loss": 0.4413, "step": 9723 }, { "epoch": 2.721522530086762, "grad_norm": 0.25850155854762463, "learning_rate": 2.342000878712741e-06, "loss": 0.4613, "step": 9724 }, { "epoch": 2.721802406940946, "grad_norm": 0.2603882550246586, "learning_rate": 2.3373373957484233e-06, "loss": 0.4463, "step": 9725 }, { "epoch": 2.72208228379513, "grad_norm": 0.2619025457880354, "learning_rate": 2.3326784493430066e-06, "loss": 0.4585, "step": 9726 }, { "epoch": 2.7223621606493142, "grad_norm": 0.2463781540172044, "learning_rate": 2.3280240399399356e-06, "loss": 0.4577, "step": 9727 }, { "epoch": 2.7226420375034985, "grad_norm": 0.2491900824207974, "learning_rate": 2.3233741679822173e-06, "loss": 0.4396, "step": 9728 }, { "epoch": 2.7229219143576824, "grad_norm": 0.2570978650724168, "learning_rate": 2.318728833912437e-06, "loss": 0.4268, "step": 9729 }, { "epoch": 2.7232017912118667, "grad_norm": 0.27026489858051084, "learning_rate": 2.314088038172729e-06, "loss": 0.4504, "step": 9730 }, { "epoch": 2.723481668066051, "grad_norm": 0.26183851864325597, "learning_rate": 2.309451781204819e-06, "loss": 0.4676, "step": 9731 }, { "epoch": 2.7237615449202353, "grad_norm": 0.25103174510057086, "learning_rate": 2.3048200634499752e-06, "loss": 0.4604, "step": 9732 }, { "epoch": 2.724041421774419, "grad_norm": 0.288239760592007, "learning_rate": 2.3001928853490563e-06, "loss": 0.443, "step": 9733 }, { "epoch": 2.7243212986286034, "grad_norm": 0.24397708267580392, "learning_rate": 2.2955702473424824e-06, "loss": 0.4574, "step": 9734 }, { "epoch": 2.7246011754827877, "grad_norm": 0.2504832668909398, "learning_rate": 2.290952149870229e-06, "loss": 0.4471, "step": 9735 }, { "epoch": 2.724881052336972, "grad_norm": 0.2578583590442245, "learning_rate": 2.2863385933718506e-06, "loss": 0.4737, "step": 9736 }, { "epoch": 2.725160929191156, "grad_norm": 0.25663532849515097, "learning_rate": 2.281729578286479e-06, "loss": 0.4547, "step": 9737 }, { "epoch": 2.72544080604534, "grad_norm": 0.2548716365918792, "learning_rate": 2.277125105052791e-06, "loss": 0.4399, "step": 9738 }, { "epoch": 2.7257206828995244, "grad_norm": 0.26531838813698677, "learning_rate": 2.272525174109047e-06, "loss": 0.4529, "step": 9739 }, { "epoch": 2.7260005597537083, "grad_norm": 0.2634166447757077, "learning_rate": 2.267929785893069e-06, "loss": 0.4678, "step": 9740 }, { "epoch": 2.7262804366078925, "grad_norm": 0.24376956925000517, "learning_rate": 2.263338940842258e-06, "loss": 0.4472, "step": 9741 }, { "epoch": 2.726560313462077, "grad_norm": 0.2613972036128885, "learning_rate": 2.2587526393935575e-06, "loss": 0.4561, "step": 9742 }, { "epoch": 2.7268401903162607, "grad_norm": 0.26207387859482995, "learning_rate": 2.2541708819835083e-06, "loss": 0.483, "step": 9743 }, { "epoch": 2.727120067170445, "grad_norm": 0.2584530797788964, "learning_rate": 2.2495936690481943e-06, "loss": 0.4611, "step": 9744 }, { "epoch": 2.7273999440246293, "grad_norm": 0.25829123389903674, "learning_rate": 2.2450210010232895e-06, "loss": 0.456, "step": 9745 }, { "epoch": 2.727679820878813, "grad_norm": 0.25503319866970714, "learning_rate": 2.240452878344007e-06, "loss": 0.4476, "step": 9746 }, { "epoch": 2.7279596977329974, "grad_norm": 0.25741869424005215, "learning_rate": 2.23588930144516e-06, "loss": 0.4738, "step": 9747 }, { "epoch": 2.7282395745871817, "grad_norm": 0.2543053628985632, "learning_rate": 2.2313302707611006e-06, "loss": 0.4474, "step": 9748 }, { "epoch": 2.7285194514413655, "grad_norm": 0.2771948450205726, "learning_rate": 2.22677578672576e-06, "loss": 0.4772, "step": 9749 }, { "epoch": 2.72879932829555, "grad_norm": 0.2639708823854919, "learning_rate": 2.222225849772641e-06, "loss": 0.4595, "step": 9750 }, { "epoch": 2.729079205149734, "grad_norm": 0.25593857277096793, "learning_rate": 2.217680460334809e-06, "loss": 0.4306, "step": 9751 }, { "epoch": 2.7293590820039184, "grad_norm": 0.2561656048222062, "learning_rate": 2.2131396188449005e-06, "loss": 0.443, "step": 9752 }, { "epoch": 2.7296389588581027, "grad_norm": 0.26033103658113366, "learning_rate": 2.2086033257351035e-06, "loss": 0.4528, "step": 9753 }, { "epoch": 2.7299188357122866, "grad_norm": 0.2620716770154568, "learning_rate": 2.2040715814371947e-06, "loss": 0.438, "step": 9754 }, { "epoch": 2.730198712566471, "grad_norm": 0.25569202205132774, "learning_rate": 2.199544386382507e-06, "loss": 0.4514, "step": 9755 }, { "epoch": 2.730478589420655, "grad_norm": 0.2457195033787847, "learning_rate": 2.1950217410019348e-06, "loss": 0.4407, "step": 9756 }, { "epoch": 2.730758466274839, "grad_norm": 0.25164064702847944, "learning_rate": 2.1905036457259555e-06, "loss": 0.4479, "step": 9757 }, { "epoch": 2.7310383431290233, "grad_norm": 0.24407379992726314, "learning_rate": 2.1859901009845974e-06, "loss": 0.4382, "step": 9758 }, { "epoch": 2.7313182199832076, "grad_norm": 0.24912827766097292, "learning_rate": 2.181481107207467e-06, "loss": 0.4634, "step": 9759 }, { "epoch": 2.7315980968373914, "grad_norm": 0.2605340914466241, "learning_rate": 2.1769766648237265e-06, "loss": 0.4404, "step": 9760 }, { "epoch": 2.7318779736915757, "grad_norm": 0.2580613226425842, "learning_rate": 2.1724767742621167e-06, "loss": 0.4762, "step": 9761 }, { "epoch": 2.73215785054576, "grad_norm": 0.2603137926796265, "learning_rate": 2.1679814359509333e-06, "loss": 0.4495, "step": 9762 }, { "epoch": 2.732437727399944, "grad_norm": 0.2524330504497244, "learning_rate": 2.1634906503180565e-06, "loss": 0.4324, "step": 9763 }, { "epoch": 2.732717604254128, "grad_norm": 0.260142923673047, "learning_rate": 2.159004417790911e-06, "loss": 0.4768, "step": 9764 }, { "epoch": 2.7329974811083124, "grad_norm": 0.26635278369881427, "learning_rate": 2.154522738796505e-06, "loss": 0.4342, "step": 9765 }, { "epoch": 2.7332773579624963, "grad_norm": 0.2473458112234101, "learning_rate": 2.1500456137614033e-06, "loss": 0.4337, "step": 9766 }, { "epoch": 2.7335572348166806, "grad_norm": 0.25070281196189426, "learning_rate": 2.1455730431117427e-06, "loss": 0.4589, "step": 9767 }, { "epoch": 2.733837111670865, "grad_norm": 0.252400866560634, "learning_rate": 2.141105027273227e-06, "loss": 0.4426, "step": 9768 }, { "epoch": 2.734116988525049, "grad_norm": 0.25260736603149075, "learning_rate": 2.136641566671116e-06, "loss": 0.451, "step": 9769 }, { "epoch": 2.734396865379233, "grad_norm": 0.2515553157562186, "learning_rate": 2.132182661730253e-06, "loss": 0.435, "step": 9770 }, { "epoch": 2.7346767422334173, "grad_norm": 0.26044560295493985, "learning_rate": 2.1277283128750437e-06, "loss": 0.4585, "step": 9771 }, { "epoch": 2.7349566190876016, "grad_norm": 0.2534114378150915, "learning_rate": 2.123278520529448e-06, "loss": 0.4475, "step": 9772 }, { "epoch": 2.735236495941786, "grad_norm": 0.2529724150240257, "learning_rate": 2.1188332851169946e-06, "loss": 0.465, "step": 9773 }, { "epoch": 2.7355163727959697, "grad_norm": 0.2492456543986863, "learning_rate": 2.1143926070607945e-06, "loss": 0.4436, "step": 9774 }, { "epoch": 2.735796249650154, "grad_norm": 0.2505347128345977, "learning_rate": 2.1099564867835098e-06, "loss": 0.4406, "step": 9775 }, { "epoch": 2.7360761265043383, "grad_norm": 0.2562143674896613, "learning_rate": 2.1055249247073638e-06, "loss": 0.4617, "step": 9776 }, { "epoch": 2.736356003358522, "grad_norm": 0.2445628376186289, "learning_rate": 2.1010979212541692e-06, "loss": 0.4317, "step": 9777 }, { "epoch": 2.7366358802127064, "grad_norm": 0.24936755921346454, "learning_rate": 2.0966754768452836e-06, "loss": 0.4415, "step": 9778 }, { "epoch": 2.7369157570668907, "grad_norm": 0.24447213129673334, "learning_rate": 2.092257591901642e-06, "loss": 0.4409, "step": 9779 }, { "epoch": 2.7371956339210746, "grad_norm": 0.2503946767854177, "learning_rate": 2.087844266843736e-06, "loss": 0.4457, "step": 9780 }, { "epoch": 2.737475510775259, "grad_norm": 0.25055245006537563, "learning_rate": 2.0834355020916296e-06, "loss": 0.4338, "step": 9781 }, { "epoch": 2.737755387629443, "grad_norm": 0.26274715786940184, "learning_rate": 2.0790312980649485e-06, "loss": 0.4538, "step": 9782 }, { "epoch": 2.738035264483627, "grad_norm": 0.24467616380968663, "learning_rate": 2.0746316551828958e-06, "loss": 0.4498, "step": 9783 }, { "epoch": 2.7383151413378113, "grad_norm": 0.24718713979995247, "learning_rate": 2.070236573864226e-06, "loss": 0.4317, "step": 9784 }, { "epoch": 2.7385950181919956, "grad_norm": 0.24938040432107095, "learning_rate": 2.065846054527265e-06, "loss": 0.4307, "step": 9785 }, { "epoch": 2.7388748950461794, "grad_norm": 0.25620621588726555, "learning_rate": 2.061460097589907e-06, "loss": 0.4271, "step": 9786 }, { "epoch": 2.7391547719003637, "grad_norm": 0.2443597292643001, "learning_rate": 2.0570787034696117e-06, "loss": 0.4573, "step": 9787 }, { "epoch": 2.739434648754548, "grad_norm": 0.2431003346385221, "learning_rate": 2.05270187258339e-06, "loss": 0.4552, "step": 9788 }, { "epoch": 2.7397145256087323, "grad_norm": 0.2622162635281076, "learning_rate": 2.0483296053478596e-06, "loss": 0.4545, "step": 9789 }, { "epoch": 2.7399944024629166, "grad_norm": 0.24673910627636125, "learning_rate": 2.043961902179148e-06, "loss": 0.4311, "step": 9790 }, { "epoch": 2.7402742793171004, "grad_norm": 0.26986549912269525, "learning_rate": 2.0395987634929835e-06, "loss": 0.4602, "step": 9791 }, { "epoch": 2.7405541561712847, "grad_norm": 0.2584870948790133, "learning_rate": 2.0352401897046514e-06, "loss": 0.4574, "step": 9792 }, { "epoch": 2.740834033025469, "grad_norm": 0.2717921986704552, "learning_rate": 2.030886181229008e-06, "loss": 0.4705, "step": 9793 }, { "epoch": 2.741113909879653, "grad_norm": 0.2598638961512423, "learning_rate": 2.026536738480467e-06, "loss": 0.4484, "step": 9794 }, { "epoch": 2.741393786733837, "grad_norm": 0.2676571347095541, "learning_rate": 2.022191861873013e-06, "loss": 0.4689, "step": 9795 }, { "epoch": 2.7416736635880214, "grad_norm": 0.2555603117542369, "learning_rate": 2.017851551820188e-06, "loss": 0.4426, "step": 9796 }, { "epoch": 2.7419535404422053, "grad_norm": 0.26338405397950065, "learning_rate": 2.0135158087351116e-06, "loss": 0.4632, "step": 9797 }, { "epoch": 2.7422334172963896, "grad_norm": 0.24756470929017552, "learning_rate": 2.0091846330304587e-06, "loss": 0.4338, "step": 9798 }, { "epoch": 2.742513294150574, "grad_norm": 0.24951686147478658, "learning_rate": 2.004858025118472e-06, "loss": 0.4353, "step": 9799 }, { "epoch": 2.7427931710047577, "grad_norm": 0.26044379006654206, "learning_rate": 2.0005359854109674e-06, "loss": 0.443, "step": 9800 }, { "epoch": 2.743073047858942, "grad_norm": 0.2541233637604626, "learning_rate": 1.9962185143193146e-06, "loss": 0.4613, "step": 9801 }, { "epoch": 2.7433529247131263, "grad_norm": 0.2422587448045734, "learning_rate": 1.9919056122544465e-06, "loss": 0.4455, "step": 9802 }, { "epoch": 2.74363280156731, "grad_norm": 0.26598242085573076, "learning_rate": 1.9875972796268796e-06, "loss": 0.4491, "step": 9803 }, { "epoch": 2.7439126784214944, "grad_norm": 0.2367366865239579, "learning_rate": 1.9832935168466737e-06, "loss": 0.4535, "step": 9804 }, { "epoch": 2.7441925552756787, "grad_norm": 0.25565944852140315, "learning_rate": 1.978994324323463e-06, "loss": 0.4411, "step": 9805 }, { "epoch": 2.744472432129863, "grad_norm": 0.265656398995227, "learning_rate": 1.9746997024664537e-06, "loss": 0.4605, "step": 9806 }, { "epoch": 2.744752308984047, "grad_norm": 0.2506083541144176, "learning_rate": 1.9704096516844185e-06, "loss": 0.4279, "step": 9807 }, { "epoch": 2.745032185838231, "grad_norm": 0.2536719791358648, "learning_rate": 1.966124172385664e-06, "loss": 0.4332, "step": 9808 }, { "epoch": 2.7453120626924155, "grad_norm": 0.2491911982119003, "learning_rate": 1.9618432649781026e-06, "loss": 0.4568, "step": 9809 }, { "epoch": 2.7455919395465997, "grad_norm": 0.25381163034410015, "learning_rate": 1.957566929869181e-06, "loss": 0.4625, "step": 9810 }, { "epoch": 2.7458718164007836, "grad_norm": 0.249212606963689, "learning_rate": 1.953295167465935e-06, "loss": 0.4503, "step": 9811 }, { "epoch": 2.746151693254968, "grad_norm": 0.25293889988967516, "learning_rate": 1.9490279781749444e-06, "loss": 0.4497, "step": 9812 }, { "epoch": 2.746431570109152, "grad_norm": 0.25946744619315143, "learning_rate": 1.9447653624023677e-06, "loss": 0.4376, "step": 9813 }, { "epoch": 2.746711446963336, "grad_norm": 0.2522338245764953, "learning_rate": 1.940507320553925e-06, "loss": 0.4538, "step": 9814 }, { "epoch": 2.7469913238175203, "grad_norm": 0.26342991229246765, "learning_rate": 1.9362538530348916e-06, "loss": 0.4576, "step": 9815 }, { "epoch": 2.7472712006717046, "grad_norm": 0.2535452965318185, "learning_rate": 1.9320049602501166e-06, "loss": 0.4542, "step": 9816 }, { "epoch": 2.7475510775258885, "grad_norm": 0.2533952199194951, "learning_rate": 1.9277606426040206e-06, "loss": 0.4594, "step": 9817 }, { "epoch": 2.7478309543800727, "grad_norm": 0.2583910405217963, "learning_rate": 1.9235209005005693e-06, "loss": 0.4669, "step": 9818 }, { "epoch": 2.748110831234257, "grad_norm": 0.2498732511385406, "learning_rate": 1.919285734343307e-06, "loss": 0.4412, "step": 9819 }, { "epoch": 2.748390708088441, "grad_norm": 0.26116816402370263, "learning_rate": 1.915055144535344e-06, "loss": 0.4558, "step": 9820 }, { "epoch": 2.748670584942625, "grad_norm": 0.25829591909850785, "learning_rate": 1.910829131479347e-06, "loss": 0.4623, "step": 9821 }, { "epoch": 2.7489504617968095, "grad_norm": 0.24712886039603282, "learning_rate": 1.9066076955775392e-06, "loss": 0.4601, "step": 9822 }, { "epoch": 2.7492303386509933, "grad_norm": 0.24310791930278508, "learning_rate": 1.9023908372317434e-06, "loss": 0.44, "step": 9823 }, { "epoch": 2.7495102155051776, "grad_norm": 0.24835019957768184, "learning_rate": 1.8981785568433052e-06, "loss": 0.413, "step": 9824 }, { "epoch": 2.749790092359362, "grad_norm": 0.24464392895275985, "learning_rate": 1.8939708548131596e-06, "loss": 0.4617, "step": 9825 }, { "epoch": 2.750069969213546, "grad_norm": 0.2633395189470949, "learning_rate": 1.8897677315417917e-06, "loss": 0.4657, "step": 9826 }, { "epoch": 2.7503498460677305, "grad_norm": 0.2503926669341791, "learning_rate": 1.885569187429259e-06, "loss": 0.4652, "step": 9827 }, { "epoch": 2.7506297229219143, "grad_norm": 0.27174935999295685, "learning_rate": 1.8813752228751813e-06, "loss": 0.4748, "step": 9828 }, { "epoch": 2.7509095997760986, "grad_norm": 0.25368284762597176, "learning_rate": 1.877185838278739e-06, "loss": 0.453, "step": 9829 }, { "epoch": 2.751189476630283, "grad_norm": 0.2604977777841358, "learning_rate": 1.8730010340386906e-06, "loss": 0.4457, "step": 9830 }, { "epoch": 2.7514693534844668, "grad_norm": 0.25199200486936535, "learning_rate": 1.8688208105533345e-06, "loss": 0.4571, "step": 9831 }, { "epoch": 2.751749230338651, "grad_norm": 0.2564707300617125, "learning_rate": 1.8646451682205634e-06, "loss": 0.4473, "step": 9832 }, { "epoch": 2.7520291071928353, "grad_norm": 0.2465814274437075, "learning_rate": 1.8604741074377985e-06, "loss": 0.441, "step": 9833 }, { "epoch": 2.752308984047019, "grad_norm": 0.24819787350402822, "learning_rate": 1.856307628602061e-06, "loss": 0.434, "step": 9834 }, { "epoch": 2.7525888609012035, "grad_norm": 0.2522458208535914, "learning_rate": 1.8521457321099056e-06, "loss": 0.4555, "step": 9835 }, { "epoch": 2.7528687377553878, "grad_norm": 0.25325237798506217, "learning_rate": 1.8479884183574657e-06, "loss": 0.4486, "step": 9836 }, { "epoch": 2.7531486146095716, "grad_norm": 0.2512687359912989, "learning_rate": 1.8438356877404472e-06, "loss": 0.4463, "step": 9837 }, { "epoch": 2.753428491463756, "grad_norm": 0.24482383859662027, "learning_rate": 1.8396875406541003e-06, "loss": 0.4266, "step": 9838 }, { "epoch": 2.75370836831794, "grad_norm": 0.255910446221892, "learning_rate": 1.835543977493248e-06, "loss": 0.4702, "step": 9839 }, { "epoch": 2.753988245172124, "grad_norm": 0.24189453109675438, "learning_rate": 1.8314049986522697e-06, "loss": 0.4319, "step": 9840 }, { "epoch": 2.7542681220263083, "grad_norm": 0.25312095976269955, "learning_rate": 1.8272706045251386e-06, "loss": 0.4498, "step": 9841 }, { "epoch": 2.7545479988804926, "grad_norm": 0.24774475119856862, "learning_rate": 1.8231407955053515e-06, "loss": 0.4422, "step": 9842 }, { "epoch": 2.7548278757346765, "grad_norm": 0.24530932973762928, "learning_rate": 1.8190155719859937e-06, "loss": 0.4435, "step": 9843 }, { "epoch": 2.7551077525888608, "grad_norm": 0.2486010765037517, "learning_rate": 1.8148949343596955e-06, "loss": 0.4402, "step": 9844 }, { "epoch": 2.755387629443045, "grad_norm": 0.25405471905500593, "learning_rate": 1.8107788830186657e-06, "loss": 0.4466, "step": 9845 }, { "epoch": 2.7556675062972293, "grad_norm": 0.25319744713438, "learning_rate": 1.8066674183546796e-06, "loss": 0.466, "step": 9846 }, { "epoch": 2.7559473831514136, "grad_norm": 0.24416571032093162, "learning_rate": 1.802560540759063e-06, "loss": 0.4308, "step": 9847 }, { "epoch": 2.7562272600055975, "grad_norm": 0.25587903696865794, "learning_rate": 1.7984582506227087e-06, "loss": 0.4623, "step": 9848 }, { "epoch": 2.7565071368597818, "grad_norm": 0.27282076814942396, "learning_rate": 1.794360548336077e-06, "loss": 0.4604, "step": 9849 }, { "epoch": 2.756787013713966, "grad_norm": 0.24360921050416073, "learning_rate": 1.7902674342891945e-06, "loss": 0.4324, "step": 9850 }, { "epoch": 2.75706689056815, "grad_norm": 0.2666268305331878, "learning_rate": 1.7861789088716385e-06, "loss": 0.442, "step": 9851 }, { "epoch": 2.757346767422334, "grad_norm": 0.2556956994805931, "learning_rate": 1.7820949724725644e-06, "loss": 0.4492, "step": 9852 }, { "epoch": 2.7576266442765185, "grad_norm": 0.2497913576433295, "learning_rate": 1.7780156254806779e-06, "loss": 0.4321, "step": 9853 }, { "epoch": 2.7579065211307023, "grad_norm": 0.26680275607938037, "learning_rate": 1.7739408682842519e-06, "loss": 0.4459, "step": 9854 }, { "epoch": 2.7581863979848866, "grad_norm": 0.24563229509684617, "learning_rate": 1.7698707012711313e-06, "loss": 0.4331, "step": 9855 }, { "epoch": 2.758466274839071, "grad_norm": 0.2588336788891555, "learning_rate": 1.7658051248287066e-06, "loss": 0.4515, "step": 9856 }, { "epoch": 2.7587461516932548, "grad_norm": 0.26412216104427755, "learning_rate": 1.761744139343946e-06, "loss": 0.4487, "step": 9857 }, { "epoch": 2.759026028547439, "grad_norm": 0.26252899037835437, "learning_rate": 1.7576877452033847e-06, "loss": 0.4389, "step": 9858 }, { "epoch": 2.7593059054016233, "grad_norm": 0.2512836814322738, "learning_rate": 1.7536359427931083e-06, "loss": 0.4403, "step": 9859 }, { "epoch": 2.759585782255807, "grad_norm": 0.26114576787388766, "learning_rate": 1.7495887324987636e-06, "loss": 0.4584, "step": 9860 }, { "epoch": 2.7598656591099915, "grad_norm": 0.24786863869500758, "learning_rate": 1.7455461147055819e-06, "loss": 0.4467, "step": 9861 }, { "epoch": 2.760145535964176, "grad_norm": 0.25527130359432465, "learning_rate": 1.7415080897983215e-06, "loss": 0.4499, "step": 9862 }, { "epoch": 2.76042541281836, "grad_norm": 0.25850781816512897, "learning_rate": 1.737474658161331e-06, "loss": 0.4488, "step": 9863 }, { "epoch": 2.760705289672544, "grad_norm": 0.24088271779080977, "learning_rate": 1.7334458201785143e-06, "loss": 0.4212, "step": 9864 }, { "epoch": 2.760985166526728, "grad_norm": 0.24949668871945238, "learning_rate": 1.7294215762333422e-06, "loss": 0.4284, "step": 9865 }, { "epoch": 2.7612650433809125, "grad_norm": 0.25583877952988887, "learning_rate": 1.7254019267088472e-06, "loss": 0.4682, "step": 9866 }, { "epoch": 2.761544920235097, "grad_norm": 0.2638343758900408, "learning_rate": 1.7213868719876125e-06, "loss": 0.4408, "step": 9867 }, { "epoch": 2.7618247970892806, "grad_norm": 0.26082987065987406, "learning_rate": 1.7173764124517987e-06, "loss": 0.4546, "step": 9868 }, { "epoch": 2.762104673943465, "grad_norm": 0.24988160273768262, "learning_rate": 1.7133705484831231e-06, "loss": 0.4305, "step": 9869 }, { "epoch": 2.762384550797649, "grad_norm": 0.2515710888517263, "learning_rate": 1.7093692804628635e-06, "loss": 0.4361, "step": 9870 }, { "epoch": 2.762664427651833, "grad_norm": 0.2552183979411106, "learning_rate": 1.7053726087718712e-06, "loss": 0.4653, "step": 9871 }, { "epoch": 2.7629443045060174, "grad_norm": 0.2537436909675899, "learning_rate": 1.701380533790542e-06, "loss": 0.4345, "step": 9872 }, { "epoch": 2.7632241813602016, "grad_norm": 0.2500433034108333, "learning_rate": 1.6973930558988438e-06, "loss": 0.4502, "step": 9873 }, { "epoch": 2.7635040582143855, "grad_norm": 0.26169076283954357, "learning_rate": 1.6934101754763122e-06, "loss": 0.4419, "step": 9874 }, { "epoch": 2.76378393506857, "grad_norm": 0.27014196100755844, "learning_rate": 1.6894318929020381e-06, "loss": 0.4479, "step": 9875 }, { "epoch": 2.764063811922754, "grad_norm": 0.2574726657825003, "learning_rate": 1.68545820855468e-06, "loss": 0.4429, "step": 9876 }, { "epoch": 2.764343688776938, "grad_norm": 0.2524880184970193, "learning_rate": 1.6814891228124518e-06, "loss": 0.441, "step": 9877 }, { "epoch": 2.764623565631122, "grad_norm": 0.2441941462031394, "learning_rate": 1.6775246360531348e-06, "loss": 0.4461, "step": 9878 }, { "epoch": 2.7649034424853065, "grad_norm": 0.25143709731337827, "learning_rate": 1.6735647486540773e-06, "loss": 0.4582, "step": 9879 }, { "epoch": 2.7651833193394904, "grad_norm": 0.2600667779290014, "learning_rate": 1.669609460992172e-06, "loss": 0.4495, "step": 9880 }, { "epoch": 2.7654631961936746, "grad_norm": 0.2598431401850228, "learning_rate": 1.6656587734438845e-06, "loss": 0.4418, "step": 9881 }, { "epoch": 2.765743073047859, "grad_norm": 0.24950260170920652, "learning_rate": 1.6617126863852528e-06, "loss": 0.4377, "step": 9882 }, { "epoch": 2.7660229499020432, "grad_norm": 0.2597516854380162, "learning_rate": 1.65777120019186e-06, "loss": 0.4539, "step": 9883 }, { "epoch": 2.7663028267562275, "grad_norm": 0.2582783102350748, "learning_rate": 1.653834315238867e-06, "loss": 0.4445, "step": 9884 }, { "epoch": 2.7665827036104114, "grad_norm": 0.2597813332993534, "learning_rate": 1.6499020319009796e-06, "loss": 0.4449, "step": 9885 }, { "epoch": 2.7668625804645957, "grad_norm": 0.2589579891258187, "learning_rate": 1.645974350552487e-06, "loss": 0.4335, "step": 9886 }, { "epoch": 2.76714245731878, "grad_norm": 0.25390228325926073, "learning_rate": 1.6420512715672131e-06, "loss": 0.4522, "step": 9887 }, { "epoch": 2.767422334172964, "grad_norm": 0.24855966704301372, "learning_rate": 1.6381327953185698e-06, "loss": 0.4431, "step": 9888 }, { "epoch": 2.767702211027148, "grad_norm": 0.26122318036499725, "learning_rate": 1.6342189221795146e-06, "loss": 0.4476, "step": 9889 }, { "epoch": 2.7679820878813324, "grad_norm": 0.26490584141169976, "learning_rate": 1.630309652522577e-06, "loss": 0.4568, "step": 9890 }, { "epoch": 2.7682619647355162, "grad_norm": 0.25900542075149086, "learning_rate": 1.6264049867198316e-06, "loss": 0.4567, "step": 9891 }, { "epoch": 2.7685418415897005, "grad_norm": 0.26407639153594076, "learning_rate": 1.6225049251429424e-06, "loss": 0.4764, "step": 9892 }, { "epoch": 2.768821718443885, "grad_norm": 0.25400032000388784, "learning_rate": 1.6186094681631126e-06, "loss": 0.4611, "step": 9893 }, { "epoch": 2.7691015952980687, "grad_norm": 0.24900510360273606, "learning_rate": 1.614718616151112e-06, "loss": 0.4579, "step": 9894 }, { "epoch": 2.769381472152253, "grad_norm": 0.2637590373757003, "learning_rate": 1.6108323694772775e-06, "loss": 0.4569, "step": 9895 }, { "epoch": 2.7696613490064372, "grad_norm": 0.24799032492316023, "learning_rate": 1.6069507285115027e-06, "loss": 0.4384, "step": 9896 }, { "epoch": 2.769941225860621, "grad_norm": 0.25441732706797976, "learning_rate": 1.6030736936232472e-06, "loss": 0.4416, "step": 9897 }, { "epoch": 2.7702211027148054, "grad_norm": 0.24776083490307405, "learning_rate": 1.599201265181527e-06, "loss": 0.4272, "step": 9898 }, { "epoch": 2.7705009795689897, "grad_norm": 0.26213435984723776, "learning_rate": 1.5953334435549195e-06, "loss": 0.444, "step": 9899 }, { "epoch": 2.770780856423174, "grad_norm": 0.2607649999457887, "learning_rate": 1.591470229111569e-06, "loss": 0.4442, "step": 9900 }, { "epoch": 2.771060733277358, "grad_norm": 0.26314542894095144, "learning_rate": 1.5876116222191762e-06, "loss": 0.4421, "step": 9901 }, { "epoch": 2.771340610131542, "grad_norm": 0.2562781836764644, "learning_rate": 1.5837576232450025e-06, "loss": 0.4556, "step": 9902 }, { "epoch": 2.7716204869857264, "grad_norm": 0.2552996940280876, "learning_rate": 1.5799082325558822e-06, "loss": 0.4571, "step": 9903 }, { "epoch": 2.7719003638399107, "grad_norm": 0.24840947215054535, "learning_rate": 1.5760634505182004e-06, "loss": 0.4705, "step": 9904 }, { "epoch": 2.7721802406940945, "grad_norm": 0.24769111202528704, "learning_rate": 1.5722232774978974e-06, "loss": 0.4342, "step": 9905 }, { "epoch": 2.772460117548279, "grad_norm": 0.2466219726623981, "learning_rate": 1.568387713860492e-06, "loss": 0.4345, "step": 9906 }, { "epoch": 2.772739994402463, "grad_norm": 0.26863136609639976, "learning_rate": 1.5645567599710532e-06, "loss": 0.4508, "step": 9907 }, { "epoch": 2.773019871256647, "grad_norm": 0.26252638536666434, "learning_rate": 1.5607304161942115e-06, "loss": 0.4648, "step": 9908 }, { "epoch": 2.7732997481108312, "grad_norm": 0.26615456002637095, "learning_rate": 1.5569086828941593e-06, "loss": 0.4443, "step": 9909 }, { "epoch": 2.7735796249650155, "grad_norm": 0.25319575151533724, "learning_rate": 1.5530915604346553e-06, "loss": 0.4519, "step": 9910 }, { "epoch": 2.7738595018191994, "grad_norm": 0.25393800338746597, "learning_rate": 1.5492790491790143e-06, "loss": 0.4525, "step": 9911 }, { "epoch": 2.7741393786733837, "grad_norm": 0.25350606794488084, "learning_rate": 1.5454711494901076e-06, "loss": 0.4308, "step": 9912 }, { "epoch": 2.774419255527568, "grad_norm": 0.2678909115242242, "learning_rate": 1.5416678617303838e-06, "loss": 0.4629, "step": 9913 }, { "epoch": 2.774699132381752, "grad_norm": 0.2366863998554562, "learning_rate": 1.5378691862618367e-06, "loss": 0.4591, "step": 9914 }, { "epoch": 2.774979009235936, "grad_norm": 0.25635978902418827, "learning_rate": 1.5340751234460216e-06, "loss": 0.4458, "step": 9915 }, { "epoch": 2.7752588860901204, "grad_norm": 0.258296792425405, "learning_rate": 1.5302856736440773e-06, "loss": 0.4361, "step": 9916 }, { "epoch": 2.7755387629443042, "grad_norm": 0.25931834446669616, "learning_rate": 1.52650083721666e-06, "loss": 0.479, "step": 9917 }, { "epoch": 2.7758186397984885, "grad_norm": 0.26385446568239773, "learning_rate": 1.522720614524026e-06, "loss": 0.4348, "step": 9918 }, { "epoch": 2.776098516652673, "grad_norm": 0.2622619028570416, "learning_rate": 1.5189450059259759e-06, "loss": 0.4665, "step": 9919 }, { "epoch": 2.776378393506857, "grad_norm": 0.2530816873962765, "learning_rate": 1.515174011781878e-06, "loss": 0.4329, "step": 9920 }, { "epoch": 2.7766582703610414, "grad_norm": 0.26841643057303033, "learning_rate": 1.5114076324506565e-06, "loss": 0.4672, "step": 9921 }, { "epoch": 2.7769381472152252, "grad_norm": 0.2766572726103652, "learning_rate": 1.5076458682907967e-06, "loss": 0.4766, "step": 9922 }, { "epoch": 2.7772180240694095, "grad_norm": 0.26976189067539585, "learning_rate": 1.5038887196603458e-06, "loss": 0.4587, "step": 9923 }, { "epoch": 2.777497900923594, "grad_norm": 0.25998431386306364, "learning_rate": 1.5001361869169117e-06, "loss": 0.4425, "step": 9924 }, { "epoch": 2.7777777777777777, "grad_norm": 0.2572445614318216, "learning_rate": 1.496388270417659e-06, "loss": 0.449, "step": 9925 }, { "epoch": 2.778057654631962, "grad_norm": 0.25004335296725344, "learning_rate": 1.492644970519319e-06, "loss": 0.4467, "step": 9926 }, { "epoch": 2.7783375314861463, "grad_norm": 0.2524736550997618, "learning_rate": 1.4889062875781846e-06, "loss": 0.4523, "step": 9927 }, { "epoch": 2.77861740834033, "grad_norm": 0.24379796656378236, "learning_rate": 1.4851722219500986e-06, "loss": 0.4475, "step": 9928 }, { "epoch": 2.7788972851945144, "grad_norm": 0.2637163149355571, "learning_rate": 1.4814427739904824e-06, "loss": 0.4594, "step": 9929 }, { "epoch": 2.7791771620486987, "grad_norm": 0.26285694312144064, "learning_rate": 1.477717944054291e-06, "loss": 0.4508, "step": 9930 }, { "epoch": 2.7794570389028825, "grad_norm": 0.2385400019552132, "learning_rate": 1.4739977324960742e-06, "loss": 0.441, "step": 9931 }, { "epoch": 2.779736915757067, "grad_norm": 0.2488919146263261, "learning_rate": 1.4702821396699095e-06, "loss": 0.4215, "step": 9932 }, { "epoch": 2.780016792611251, "grad_norm": 0.2553818793559371, "learning_rate": 1.4665711659294535e-06, "loss": 0.4433, "step": 9933 }, { "epoch": 2.780296669465435, "grad_norm": 0.24543217828489985, "learning_rate": 1.462864811627923e-06, "loss": 0.442, "step": 9934 }, { "epoch": 2.7805765463196193, "grad_norm": 0.26225398041640935, "learning_rate": 1.4591630771180865e-06, "loss": 0.446, "step": 9935 }, { "epoch": 2.7808564231738035, "grad_norm": 0.2926901503423705, "learning_rate": 1.4554659627522782e-06, "loss": 0.4614, "step": 9936 }, { "epoch": 2.781136300027988, "grad_norm": 0.24399404399959204, "learning_rate": 1.4517734688823893e-06, "loss": 0.4342, "step": 9937 }, { "epoch": 2.7814161768821717, "grad_norm": 0.2533952208000935, "learning_rate": 1.4480855958598715e-06, "loss": 0.469, "step": 9938 }, { "epoch": 2.781696053736356, "grad_norm": 0.27457777970489095, "learning_rate": 1.4444023440357445e-06, "loss": 0.4777, "step": 9939 }, { "epoch": 2.7819759305905403, "grad_norm": 0.2606795452061868, "learning_rate": 1.4407237137605833e-06, "loss": 0.4416, "step": 9940 }, { "epoch": 2.7822558074447246, "grad_norm": 0.2489414056239663, "learning_rate": 1.4370497053845188e-06, "loss": 0.4607, "step": 9941 }, { "epoch": 2.7825356842989084, "grad_norm": 0.2560060466918113, "learning_rate": 1.4333803192572437e-06, "loss": 0.4427, "step": 9942 }, { "epoch": 2.7828155611530927, "grad_norm": 0.2689146407676583, "learning_rate": 1.4297155557280062e-06, "loss": 0.4772, "step": 9943 }, { "epoch": 2.783095438007277, "grad_norm": 0.25076953646542366, "learning_rate": 1.4260554151456329e-06, "loss": 0.446, "step": 9944 }, { "epoch": 2.783375314861461, "grad_norm": 0.25888309966038153, "learning_rate": 1.422399897858495e-06, "loss": 0.4572, "step": 9945 }, { "epoch": 2.783655191715645, "grad_norm": 0.25102148456232454, "learning_rate": 1.4187490042145257e-06, "loss": 0.4505, "step": 9946 }, { "epoch": 2.7839350685698294, "grad_norm": 0.26082987979360156, "learning_rate": 1.4151027345612188e-06, "loss": 0.4677, "step": 9947 }, { "epoch": 2.7842149454240133, "grad_norm": 0.2640450694000547, "learning_rate": 1.4114610892456304e-06, "loss": 0.4523, "step": 9948 }, { "epoch": 2.7844948222781976, "grad_norm": 0.26821185864910446, "learning_rate": 1.4078240686143663e-06, "loss": 0.4528, "step": 9949 }, { "epoch": 2.784774699132382, "grad_norm": 0.25741403841115945, "learning_rate": 1.404191673013605e-06, "loss": 0.4398, "step": 9950 }, { "epoch": 2.7850545759865657, "grad_norm": 0.26085871079329714, "learning_rate": 1.4005639027890871e-06, "loss": 0.4636, "step": 9951 }, { "epoch": 2.78533445284075, "grad_norm": 0.2514087586435767, "learning_rate": 1.3969407582860972e-06, "loss": 0.458, "step": 9952 }, { "epoch": 2.7856143296949343, "grad_norm": 0.24396338601296708, "learning_rate": 1.3933222398494927e-06, "loss": 0.4347, "step": 9953 }, { "epoch": 2.785894206549118, "grad_norm": 0.25646494800800057, "learning_rate": 1.3897083478236761e-06, "loss": 0.4556, "step": 9954 }, { "epoch": 2.7861740834033024, "grad_norm": 0.26563384689169894, "learning_rate": 1.3860990825526333e-06, "loss": 0.4742, "step": 9955 }, { "epoch": 2.7864539602574867, "grad_norm": 0.2572462566583641, "learning_rate": 1.3824944443798838e-06, "loss": 0.4494, "step": 9956 }, { "epoch": 2.786733837111671, "grad_norm": 0.25435689945391043, "learning_rate": 1.378894433648531e-06, "loss": 0.4412, "step": 9957 }, { "epoch": 2.7870137139658553, "grad_norm": 0.25698015099571514, "learning_rate": 1.3752990507012176e-06, "loss": 0.4607, "step": 9958 }, { "epoch": 2.787293590820039, "grad_norm": 0.26095188195834784, "learning_rate": 1.3717082958801586e-06, "loss": 0.4411, "step": 9959 }, { "epoch": 2.7875734676742234, "grad_norm": 0.26137156532226097, "learning_rate": 1.3681221695271195e-06, "loss": 0.4596, "step": 9960 }, { "epoch": 2.7878533445284077, "grad_norm": 0.24339930838086868, "learning_rate": 1.364540671983433e-06, "loss": 0.4391, "step": 9961 }, { "epoch": 2.7881332213825916, "grad_norm": 0.2645758288665304, "learning_rate": 1.3609638035899875e-06, "loss": 0.4299, "step": 9962 }, { "epoch": 2.788413098236776, "grad_norm": 0.25811516683528624, "learning_rate": 1.357391564687238e-06, "loss": 0.4494, "step": 9963 }, { "epoch": 2.78869297509096, "grad_norm": 0.25134969175188127, "learning_rate": 1.35382395561518e-06, "loss": 0.458, "step": 9964 }, { "epoch": 2.788972851945144, "grad_norm": 0.2593673101983797, "learning_rate": 1.3502609767133857e-06, "loss": 0.4344, "step": 9965 }, { "epoch": 2.7892527287993283, "grad_norm": 0.2519434370338427, "learning_rate": 1.3467026283209894e-06, "loss": 0.4559, "step": 9966 }, { "epoch": 2.7895326056535126, "grad_norm": 0.2602830949061682, "learning_rate": 1.3431489107766592e-06, "loss": 0.4368, "step": 9967 }, { "epoch": 2.7898124825076964, "grad_norm": 0.2527936043818087, "learning_rate": 1.3395998244186582e-06, "loss": 0.4555, "step": 9968 }, { "epoch": 2.7900923593618807, "grad_norm": 0.25545981836469733, "learning_rate": 1.3360553695847822e-06, "loss": 0.4569, "step": 9969 }, { "epoch": 2.790372236216065, "grad_norm": 0.25744697068202377, "learning_rate": 1.332515546612395e-06, "loss": 0.4633, "step": 9970 }, { "epoch": 2.790652113070249, "grad_norm": 0.26171932687727784, "learning_rate": 1.3289803558384162e-06, "loss": 0.4534, "step": 9971 }, { "epoch": 2.790931989924433, "grad_norm": 0.2512142623555132, "learning_rate": 1.3254497975993264e-06, "loss": 0.4434, "step": 9972 }, { "epoch": 2.7912118667786174, "grad_norm": 0.2577117878871623, "learning_rate": 1.3219238722311733e-06, "loss": 0.4452, "step": 9973 }, { "epoch": 2.7914917436328017, "grad_norm": 0.2590775441238437, "learning_rate": 1.3184025800695499e-06, "loss": 0.4718, "step": 9974 }, { "epoch": 2.7917716204869856, "grad_norm": 0.26265291378524014, "learning_rate": 1.3148859214496156e-06, "loss": 0.4532, "step": 9975 }, { "epoch": 2.79205149734117, "grad_norm": 0.2561238230788668, "learning_rate": 1.3113738967060918e-06, "loss": 0.4466, "step": 9976 }, { "epoch": 2.792331374195354, "grad_norm": 0.25212904412063863, "learning_rate": 1.3078665061732554e-06, "loss": 0.4465, "step": 9977 }, { "epoch": 2.7926112510495384, "grad_norm": 0.25628654442421034, "learning_rate": 1.3043637501849282e-06, "loss": 0.4539, "step": 9978 }, { "epoch": 2.7928911279037223, "grad_norm": 0.25871583667058373, "learning_rate": 1.3008656290745269e-06, "loss": 0.445, "step": 9979 }, { "epoch": 2.7931710047579066, "grad_norm": 0.2577730113815811, "learning_rate": 1.2973721431749908e-06, "loss": 0.4491, "step": 9980 }, { "epoch": 2.793450881612091, "grad_norm": 0.26107229084098077, "learning_rate": 1.2938832928188316e-06, "loss": 0.4651, "step": 9981 }, { "epoch": 2.7937307584662747, "grad_norm": 0.2612412242372199, "learning_rate": 1.2903990783381226e-06, "loss": 0.4614, "step": 9982 }, { "epoch": 2.794010635320459, "grad_norm": 0.2576072472393539, "learning_rate": 1.2869195000644985e-06, "loss": 0.4436, "step": 9983 }, { "epoch": 2.7942905121746433, "grad_norm": 0.2746996528066174, "learning_rate": 1.2834445583291387e-06, "loss": 0.4732, "step": 9984 }, { "epoch": 2.794570389028827, "grad_norm": 0.2524876940103109, "learning_rate": 1.2799742534627901e-06, "loss": 0.442, "step": 9985 }, { "epoch": 2.7948502658830114, "grad_norm": 0.25568136493755084, "learning_rate": 1.276508585795766e-06, "loss": 0.4442, "step": 9986 }, { "epoch": 2.7951301427371957, "grad_norm": 0.2447715531517239, "learning_rate": 1.2730475556579247e-06, "loss": 0.4433, "step": 9987 }, { "epoch": 2.7954100195913796, "grad_norm": 0.2488736236977702, "learning_rate": 1.2695911633786971e-06, "loss": 0.4359, "step": 9988 }, { "epoch": 2.795689896445564, "grad_norm": 0.2591930365268937, "learning_rate": 1.2661394092870537e-06, "loss": 0.4729, "step": 9989 }, { "epoch": 2.795969773299748, "grad_norm": 0.2573072489842773, "learning_rate": 1.262692293711537e-06, "loss": 0.4508, "step": 9990 }, { "epoch": 2.796249650153932, "grad_norm": 0.25989222524716127, "learning_rate": 1.2592498169802513e-06, "loss": 0.4752, "step": 9991 }, { "epoch": 2.7965295270081163, "grad_norm": 0.25270467365628874, "learning_rate": 1.2558119794208456e-06, "loss": 0.4464, "step": 9992 }, { "epoch": 2.7968094038623006, "grad_norm": 0.25835423310990646, "learning_rate": 1.2523787813605413e-06, "loss": 0.4648, "step": 9993 }, { "epoch": 2.797089280716485, "grad_norm": 0.2529859162387948, "learning_rate": 1.2489502231261052e-06, "loss": 0.4533, "step": 9994 }, { "epoch": 2.797369157570669, "grad_norm": 0.23629321574318973, "learning_rate": 1.2455263050438759e-06, "loss": 0.4524, "step": 9995 }, { "epoch": 2.797649034424853, "grad_norm": 0.25389427162784706, "learning_rate": 1.2421070274397428e-06, "loss": 0.4572, "step": 9996 }, { "epoch": 2.7979289112790373, "grad_norm": 0.2840046132570813, "learning_rate": 1.2386923906391512e-06, "loss": 0.4537, "step": 9997 }, { "epoch": 2.7982087881332216, "grad_norm": 0.2505066776398961, "learning_rate": 1.2352823949671189e-06, "loss": 0.4668, "step": 9998 }, { "epoch": 2.7984886649874054, "grad_norm": 0.2509149628141864, "learning_rate": 1.2318770407481973e-06, "loss": 0.4523, "step": 9999 }, { "epoch": 2.7987685418415897, "grad_norm": 0.2518685026080287, "learning_rate": 1.2284763283065159e-06, "loss": 0.4598, "step": 10000 }, { "epoch": 2.799048418695774, "grad_norm": 0.2623289816675507, "learning_rate": 1.225080257965755e-06, "loss": 0.4304, "step": 10001 }, { "epoch": 2.799328295549958, "grad_norm": 0.25573096800671996, "learning_rate": 1.2216888300491559e-06, "loss": 0.4428, "step": 10002 }, { "epoch": 2.799608172404142, "grad_norm": 0.2668555762116913, "learning_rate": 1.2183020448795212e-06, "loss": 0.4618, "step": 10003 }, { "epoch": 2.7998880492583265, "grad_norm": 0.26229813824137216, "learning_rate": 1.2149199027791992e-06, "loss": 0.4494, "step": 10004 }, { "epoch": 2.8001679261125103, "grad_norm": 0.25880574709037374, "learning_rate": 1.211542404070104e-06, "loss": 0.4527, "step": 10005 }, { "epoch": 2.8004478029666946, "grad_norm": 0.24930135750690222, "learning_rate": 1.2081695490737178e-06, "loss": 0.4423, "step": 10006 }, { "epoch": 2.800727679820879, "grad_norm": 0.25562693305388845, "learning_rate": 1.2048013381110611e-06, "loss": 0.4355, "step": 10007 }, { "epoch": 2.8010075566750627, "grad_norm": 0.2573744662139402, "learning_rate": 1.2014377715027225e-06, "loss": 0.4669, "step": 10008 }, { "epoch": 2.801287433529247, "grad_norm": 0.2521840692937912, "learning_rate": 1.198078849568851e-06, "loss": 0.4362, "step": 10009 }, { "epoch": 2.8015673103834313, "grad_norm": 0.25530231186014696, "learning_rate": 1.1947245726291523e-06, "loss": 0.418, "step": 10010 }, { "epoch": 2.8018471872376156, "grad_norm": 0.25835305322672464, "learning_rate": 1.1913749410028873e-06, "loss": 0.4524, "step": 10011 }, { "epoch": 2.8021270640917995, "grad_norm": 0.25387526679496275, "learning_rate": 1.188029955008868e-06, "loss": 0.4566, "step": 10012 }, { "epoch": 2.8024069409459837, "grad_norm": 0.25688080755971887, "learning_rate": 1.1846896149654785e-06, "loss": 0.4442, "step": 10013 }, { "epoch": 2.802686817800168, "grad_norm": 0.2509635911430038, "learning_rate": 1.1813539211906587e-06, "loss": 0.4494, "step": 10014 }, { "epoch": 2.8029666946543523, "grad_norm": 0.2552682125098111, "learning_rate": 1.1780228740018994e-06, "loss": 0.4557, "step": 10015 }, { "epoch": 2.803246571508536, "grad_norm": 0.26217435155468116, "learning_rate": 1.174696473716247e-06, "loss": 0.4307, "step": 10016 }, { "epoch": 2.8035264483627205, "grad_norm": 0.2523713093152466, "learning_rate": 1.1713747206503145e-06, "loss": 0.4498, "step": 10017 }, { "epoch": 2.8038063252169048, "grad_norm": 0.25678219498147636, "learning_rate": 1.1680576151202717e-06, "loss": 0.4608, "step": 10018 }, { "epoch": 2.8040862020710886, "grad_norm": 0.2531984694325335, "learning_rate": 1.1647451574418321e-06, "loss": 0.4462, "step": 10019 }, { "epoch": 2.804366078925273, "grad_norm": 0.2519919251645383, "learning_rate": 1.1614373479302832e-06, "loss": 0.4429, "step": 10020 }, { "epoch": 2.804645955779457, "grad_norm": 0.2537678668261987, "learning_rate": 1.1581341869004613e-06, "loss": 0.4659, "step": 10021 }, { "epoch": 2.804925832633641, "grad_norm": 0.25675874919280917, "learning_rate": 1.1548356746667655e-06, "loss": 0.435, "step": 10022 }, { "epoch": 2.8052057094878253, "grad_norm": 0.2579020260027916, "learning_rate": 1.1515418115431553e-06, "loss": 0.4521, "step": 10023 }, { "epoch": 2.8054855863420096, "grad_norm": 0.2618712561245846, "learning_rate": 1.1482525978431357e-06, "loss": 0.4553, "step": 10024 }, { "epoch": 2.8057654631961935, "grad_norm": 0.26263634255425594, "learning_rate": 1.1449680338797786e-06, "loss": 0.4383, "step": 10025 }, { "epoch": 2.8060453400503778, "grad_norm": 0.25277594804249975, "learning_rate": 1.1416881199657059e-06, "loss": 0.4436, "step": 10026 }, { "epoch": 2.806325216904562, "grad_norm": 0.24431848116031987, "learning_rate": 1.1384128564131069e-06, "loss": 0.4566, "step": 10027 }, { "epoch": 2.806605093758746, "grad_norm": 0.2609154288450035, "learning_rate": 1.1351422435337211e-06, "loss": 0.4437, "step": 10028 }, { "epoch": 2.80688497061293, "grad_norm": 0.25695753681581046, "learning_rate": 1.131876281638844e-06, "loss": 0.432, "step": 10029 }, { "epoch": 2.8071648474671145, "grad_norm": 0.2538585151636701, "learning_rate": 1.1286149710393323e-06, "loss": 0.4597, "step": 10030 }, { "epoch": 2.8074447243212988, "grad_norm": 0.23442293755687932, "learning_rate": 1.1253583120456102e-06, "loss": 0.4393, "step": 10031 }, { "epoch": 2.807724601175483, "grad_norm": 0.24571792553483443, "learning_rate": 1.122106304967635e-06, "loss": 0.4572, "step": 10032 }, { "epoch": 2.808004478029667, "grad_norm": 0.24975815194917134, "learning_rate": 1.1188589501149427e-06, "loss": 0.4537, "step": 10033 }, { "epoch": 2.808284354883851, "grad_norm": 0.2522156074258436, "learning_rate": 1.1156162477966137e-06, "loss": 0.4228, "step": 10034 }, { "epoch": 2.8085642317380355, "grad_norm": 0.25234913236963324, "learning_rate": 1.1123781983212955e-06, "loss": 0.4632, "step": 10035 }, { "epoch": 2.8088441085922193, "grad_norm": 0.2617953931586556, "learning_rate": 1.1091448019971806e-06, "loss": 0.4499, "step": 10036 }, { "epoch": 2.8091239854464036, "grad_norm": 0.268164279012252, "learning_rate": 1.1059160591320338e-06, "loss": 0.4774, "step": 10037 }, { "epoch": 2.809403862300588, "grad_norm": 0.2532397829516345, "learning_rate": 1.1026919700331596e-06, "loss": 0.4791, "step": 10038 }, { "epoch": 2.8096837391547718, "grad_norm": 0.25454581027538686, "learning_rate": 1.0994725350074342e-06, "loss": 0.4517, "step": 10039 }, { "epoch": 2.809963616008956, "grad_norm": 0.2641984689771092, "learning_rate": 1.0962577543612795e-06, "loss": 0.4472, "step": 10040 }, { "epoch": 2.8102434928631403, "grad_norm": 0.36658894526412195, "learning_rate": 1.0930476284006897e-06, "loss": 0.4572, "step": 10041 }, { "epoch": 2.810523369717324, "grad_norm": 0.25492350997962254, "learning_rate": 1.0898421574312034e-06, "loss": 0.4352, "step": 10042 }, { "epoch": 2.8108032465715085, "grad_norm": 0.2533002255898336, "learning_rate": 1.0866413417579103e-06, "loss": 0.4584, "step": 10043 }, { "epoch": 2.8110831234256928, "grad_norm": 0.25750211312832716, "learning_rate": 1.0834451816854775e-06, "loss": 0.444, "step": 10044 }, { "epoch": 2.8113630002798766, "grad_norm": 0.25995239658813213, "learning_rate": 1.080253677518106e-06, "loss": 0.4479, "step": 10045 }, { "epoch": 2.811642877134061, "grad_norm": 0.25958867059504953, "learning_rate": 1.0770668295595755e-06, "loss": 0.4433, "step": 10046 }, { "epoch": 2.811922753988245, "grad_norm": 0.26457879133391693, "learning_rate": 1.0738846381131983e-06, "loss": 0.4362, "step": 10047 }, { "epoch": 2.8122026308424295, "grad_norm": 0.2597805161367181, "learning_rate": 1.0707071034818773e-06, "loss": 0.4655, "step": 10048 }, { "epoch": 2.8124825076966133, "grad_norm": 0.2573105787220753, "learning_rate": 1.0675342259680366e-06, "loss": 0.4514, "step": 10049 }, { "epoch": 2.8127623845507976, "grad_norm": 0.2626867445065791, "learning_rate": 1.0643660058736793e-06, "loss": 0.4468, "step": 10050 }, { "epoch": 2.813042261404982, "grad_norm": 0.25000800106284804, "learning_rate": 1.0612024435003531e-06, "loss": 0.4472, "step": 10051 }, { "epoch": 2.813322138259166, "grad_norm": 0.2588721745046232, "learning_rate": 1.0580435391491728e-06, "loss": 0.4349, "step": 10052 }, { "epoch": 2.81360201511335, "grad_norm": 0.25125222756847393, "learning_rate": 1.0548892931208033e-06, "loss": 0.4439, "step": 10053 }, { "epoch": 2.8138818919675344, "grad_norm": 0.24978774064115405, "learning_rate": 1.0517397057154654e-06, "loss": 0.4489, "step": 10054 }, { "epoch": 2.8141617688217186, "grad_norm": 0.25557688299734493, "learning_rate": 1.048594777232942e-06, "loss": 0.4524, "step": 10055 }, { "epoch": 2.8144416456759025, "grad_norm": 0.2553222781900778, "learning_rate": 1.0454545079725654e-06, "loss": 0.442, "step": 10056 }, { "epoch": 2.814721522530087, "grad_norm": 0.2625996339854685, "learning_rate": 1.04231889823323e-06, "loss": 0.4616, "step": 10057 }, { "epoch": 2.815001399384271, "grad_norm": 0.26536237523411943, "learning_rate": 1.0391879483133804e-06, "loss": 0.4553, "step": 10058 }, { "epoch": 2.815281276238455, "grad_norm": 0.2518798493314616, "learning_rate": 1.0360616585110338e-06, "loss": 0.444, "step": 10059 }, { "epoch": 2.815561153092639, "grad_norm": 0.2674145425133259, "learning_rate": 1.0329400291237413e-06, "loss": 0.4639, "step": 10060 }, { "epoch": 2.8158410299468235, "grad_norm": 0.25435428497609625, "learning_rate": 1.0298230604486258e-06, "loss": 0.449, "step": 10061 }, { "epoch": 2.8161209068010074, "grad_norm": 0.2590284413753409, "learning_rate": 1.0267107527823617e-06, "loss": 0.4425, "step": 10062 }, { "epoch": 2.8164007836551916, "grad_norm": 0.25201643767313303, "learning_rate": 1.023603106421178e-06, "loss": 0.4552, "step": 10063 }, { "epoch": 2.816680660509376, "grad_norm": 0.26165099284783544, "learning_rate": 1.0205001216608612e-06, "loss": 0.4538, "step": 10064 }, { "epoch": 2.81696053736356, "grad_norm": 0.27776039496702404, "learning_rate": 1.0174017987967577e-06, "loss": 0.4715, "step": 10065 }, { "epoch": 2.817240414217744, "grad_norm": 0.25777329208975347, "learning_rate": 1.0143081381237706e-06, "loss": 0.4371, "step": 10066 }, { "epoch": 2.8175202910719284, "grad_norm": 0.2690546753062122, "learning_rate": 1.0112191399363534e-06, "loss": 0.4494, "step": 10067 }, { "epoch": 2.8178001679261127, "grad_norm": 0.25587850210427726, "learning_rate": 1.008134804528521e-06, "loss": 0.4486, "step": 10068 }, { "epoch": 2.818080044780297, "grad_norm": 0.25385824906574944, "learning_rate": 1.0050551321938384e-06, "loss": 0.4499, "step": 10069 }, { "epoch": 2.818359921634481, "grad_norm": 0.2529017673823629, "learning_rate": 1.0019801232254322e-06, "loss": 0.4263, "step": 10070 }, { "epoch": 2.818639798488665, "grad_norm": 0.2634481033538776, "learning_rate": 9.989097779159796e-07, "loss": 0.4462, "step": 10071 }, { "epoch": 2.8189196753428494, "grad_norm": 0.2487106793995691, "learning_rate": 9.958440965577243e-07, "loss": 0.4597, "step": 10072 }, { "epoch": 2.819199552197033, "grad_norm": 0.2479782160319531, "learning_rate": 9.927830794424553e-07, "loss": 0.4496, "step": 10073 }, { "epoch": 2.8194794290512175, "grad_norm": 0.2533961878193845, "learning_rate": 9.897267268615284e-07, "loss": 0.4307, "step": 10074 }, { "epoch": 2.819759305905402, "grad_norm": 0.2466751349058953, "learning_rate": 9.866750391058389e-07, "loss": 0.4368, "step": 10075 }, { "epoch": 2.8200391827595856, "grad_norm": 0.24374236508360309, "learning_rate": 9.83628016465854e-07, "loss": 0.4352, "step": 10076 }, { "epoch": 2.82031905961377, "grad_norm": 0.2543213559113662, "learning_rate": 9.80585659231592e-07, "loss": 0.4259, "step": 10077 }, { "epoch": 2.8205989364679542, "grad_norm": 0.25099459130558754, "learning_rate": 9.775479676926269e-07, "loss": 0.4377, "step": 10078 }, { "epoch": 2.820878813322138, "grad_norm": 0.2572368803838283, "learning_rate": 9.745149421380828e-07, "loss": 0.4492, "step": 10079 }, { "epoch": 2.8211586901763224, "grad_norm": 0.2601264172895387, "learning_rate": 9.714865828566455e-07, "loss": 0.4614, "step": 10080 }, { "epoch": 2.8214385670305067, "grad_norm": 0.24809535239113745, "learning_rate": 9.684628901365567e-07, "loss": 0.4686, "step": 10081 }, { "epoch": 2.8217184438846905, "grad_norm": 0.2583757231300886, "learning_rate": 9.654438642656083e-07, "loss": 0.4374, "step": 10082 }, { "epoch": 2.821998320738875, "grad_norm": 0.25979970122250534, "learning_rate": 9.624295055311704e-07, "loss": 0.4878, "step": 10083 }, { "epoch": 2.822278197593059, "grad_norm": 0.2627535858238464, "learning_rate": 9.594198142201305e-07, "loss": 0.4486, "step": 10084 }, { "epoch": 2.822558074447243, "grad_norm": 0.24754921991327522, "learning_rate": 9.564147906189703e-07, "loss": 0.4519, "step": 10085 }, { "epoch": 2.8228379513014272, "grad_norm": 0.27070248197086716, "learning_rate": 9.534144350136942e-07, "loss": 0.4693, "step": 10086 }, { "epoch": 2.8231178281556115, "grad_norm": 0.24472943267326952, "learning_rate": 9.504187476898907e-07, "loss": 0.4334, "step": 10087 }, { "epoch": 2.823397705009796, "grad_norm": 0.24744547999958846, "learning_rate": 9.474277289326817e-07, "loss": 0.4531, "step": 10088 }, { "epoch": 2.82367758186398, "grad_norm": 0.25962567214023186, "learning_rate": 9.444413790267558e-07, "loss": 0.4222, "step": 10089 }, { "epoch": 2.823957458718164, "grad_norm": 0.25505431645769044, "learning_rate": 9.41459698256364e-07, "loss": 0.456, "step": 10090 }, { "epoch": 2.8242373355723482, "grad_norm": 0.26849471916892415, "learning_rate": 9.384826869052898e-07, "loss": 0.4498, "step": 10091 }, { "epoch": 2.8245172124265325, "grad_norm": 0.2554869188266186, "learning_rate": 9.355103452568958e-07, "loss": 0.4382, "step": 10092 }, { "epoch": 2.8247970892807164, "grad_norm": 0.253352558640311, "learning_rate": 9.325426735940946e-07, "loss": 0.4439, "step": 10093 }, { "epoch": 2.8250769661349007, "grad_norm": 0.24457205420612252, "learning_rate": 9.295796721993433e-07, "loss": 0.4317, "step": 10094 }, { "epoch": 2.825356842989085, "grad_norm": 0.2611003726754562, "learning_rate": 9.266213413546609e-07, "loss": 0.4707, "step": 10095 }, { "epoch": 2.825636719843269, "grad_norm": 0.2572010642600289, "learning_rate": 9.236676813416334e-07, "loss": 0.4458, "step": 10096 }, { "epoch": 2.825916596697453, "grad_norm": 0.23567654616912856, "learning_rate": 9.207186924413858e-07, "loss": 0.4161, "step": 10097 }, { "epoch": 2.8261964735516374, "grad_norm": 0.2524806826636959, "learning_rate": 9.17774374934599e-07, "loss": 0.4612, "step": 10098 }, { "epoch": 2.8264763504058212, "grad_norm": 0.2583721915497328, "learning_rate": 9.148347291015158e-07, "loss": 0.4534, "step": 10099 }, { "epoch": 2.8267562272600055, "grad_norm": 0.2443844209188402, "learning_rate": 9.1189975522194e-07, "loss": 0.4351, "step": 10100 }, { "epoch": 2.82703610411419, "grad_norm": 0.2592075075953771, "learning_rate": 9.089694535752257e-07, "loss": 0.4483, "step": 10101 }, { "epoch": 2.8273159809683737, "grad_norm": 0.2627400968198566, "learning_rate": 9.060438244402725e-07, "loss": 0.4623, "step": 10102 }, { "epoch": 2.827595857822558, "grad_norm": 0.24670093257172748, "learning_rate": 9.031228680955517e-07, "loss": 0.4376, "step": 10103 }, { "epoch": 2.8278757346767422, "grad_norm": 0.25509140273022224, "learning_rate": 9.002065848190744e-07, "loss": 0.4488, "step": 10104 }, { "epoch": 2.8281556115309265, "grad_norm": 0.2513643430278304, "learning_rate": 8.97294974888413e-07, "loss": 0.4434, "step": 10105 }, { "epoch": 2.8284354883851104, "grad_norm": 0.25221197470587714, "learning_rate": 8.943880385807068e-07, "loss": 0.4452, "step": 10106 }, { "epoch": 2.8287153652392947, "grad_norm": 0.25070011355054483, "learning_rate": 8.914857761726292e-07, "loss": 0.434, "step": 10107 }, { "epoch": 2.828995242093479, "grad_norm": 0.2582137023259711, "learning_rate": 8.885881879404201e-07, "loss": 0.4528, "step": 10108 }, { "epoch": 2.8292751189476633, "grad_norm": 0.2592773347128377, "learning_rate": 8.856952741598756e-07, "loss": 0.4697, "step": 10109 }, { "epoch": 2.829554995801847, "grad_norm": 0.2629416907627084, "learning_rate": 8.828070351063533e-07, "loss": 0.4501, "step": 10110 }, { "epoch": 2.8298348726560314, "grad_norm": 0.2631384755840109, "learning_rate": 8.799234710547444e-07, "loss": 0.45, "step": 10111 }, { "epoch": 2.8301147495102157, "grad_norm": 0.24555445924895378, "learning_rate": 8.770445822795126e-07, "loss": 0.4541, "step": 10112 }, { "epoch": 2.8303946263643995, "grad_norm": 0.25707171887557456, "learning_rate": 8.741703690546721e-07, "loss": 0.4402, "step": 10113 }, { "epoch": 2.830674503218584, "grad_norm": 0.2556283650182902, "learning_rate": 8.713008316537929e-07, "loss": 0.4572, "step": 10114 }, { "epoch": 2.830954380072768, "grad_norm": 0.2600383651816295, "learning_rate": 8.684359703500067e-07, "loss": 0.4621, "step": 10115 }, { "epoch": 2.831234256926952, "grad_norm": 0.267705736677189, "learning_rate": 8.65575785415973e-07, "loss": 0.4598, "step": 10116 }, { "epoch": 2.8315141337811363, "grad_norm": 0.24332224527834756, "learning_rate": 8.627202771239462e-07, "loss": 0.4593, "step": 10117 }, { "epoch": 2.8317940106353205, "grad_norm": 0.2585644170136799, "learning_rate": 8.598694457457035e-07, "loss": 0.4578, "step": 10118 }, { "epoch": 2.8320738874895044, "grad_norm": 0.2528586860303206, "learning_rate": 8.570232915525888e-07, "loss": 0.4631, "step": 10119 }, { "epoch": 2.8323537643436887, "grad_norm": 0.26658090330048756, "learning_rate": 8.541818148155079e-07, "loss": 0.4456, "step": 10120 }, { "epoch": 2.832633641197873, "grad_norm": 0.25805663437445653, "learning_rate": 8.513450158049108e-07, "loss": 0.4587, "step": 10121 }, { "epoch": 2.832913518052057, "grad_norm": 0.26390238260937576, "learning_rate": 8.485128947908039e-07, "loss": 0.4603, "step": 10122 }, { "epoch": 2.833193394906241, "grad_norm": 0.2628755722632489, "learning_rate": 8.456854520427493e-07, "loss": 0.437, "step": 10123 }, { "epoch": 2.8334732717604254, "grad_norm": 0.26083288223354306, "learning_rate": 8.428626878298707e-07, "loss": 0.4331, "step": 10124 }, { "epoch": 2.8337531486146097, "grad_norm": 0.25799908741638145, "learning_rate": 8.400446024208309e-07, "loss": 0.4564, "step": 10125 }, { "epoch": 2.834033025468794, "grad_norm": 0.24081806697538238, "learning_rate": 8.372311960838652e-07, "loss": 0.4382, "step": 10126 }, { "epoch": 2.834312902322978, "grad_norm": 0.2574413268314077, "learning_rate": 8.344224690867485e-07, "loss": 0.4434, "step": 10127 }, { "epoch": 2.834592779177162, "grad_norm": 0.29761285533028925, "learning_rate": 8.316184216968226e-07, "loss": 0.453, "step": 10128 }, { "epoch": 2.8348726560313464, "grad_norm": 0.26541255095918426, "learning_rate": 8.288190541809738e-07, "loss": 0.446, "step": 10129 }, { "epoch": 2.8351525328855303, "grad_norm": 0.2551603304358025, "learning_rate": 8.260243668056555e-07, "loss": 0.4409, "step": 10130 }, { "epoch": 2.8354324097397146, "grad_norm": 0.2681181178604902, "learning_rate": 8.232343598368552e-07, "loss": 0.454, "step": 10131 }, { "epoch": 2.835712286593899, "grad_norm": 0.267916569740636, "learning_rate": 8.204490335401382e-07, "loss": 0.4453, "step": 10132 }, { "epoch": 2.8359921634480827, "grad_norm": 0.25766534832339966, "learning_rate": 8.17668388180609e-07, "loss": 0.444, "step": 10133 }, { "epoch": 2.836272040302267, "grad_norm": 0.25295408765533794, "learning_rate": 8.148924240229283e-07, "loss": 0.4301, "step": 10134 }, { "epoch": 2.8365519171564513, "grad_norm": 0.2590464448281375, "learning_rate": 8.121211413313178e-07, "loss": 0.4527, "step": 10135 }, { "epoch": 2.836831794010635, "grad_norm": 0.27162404825376435, "learning_rate": 8.093545403695502e-07, "loss": 0.4445, "step": 10136 }, { "epoch": 2.8371116708648194, "grad_norm": 0.26622854836678916, "learning_rate": 8.065926214009479e-07, "loss": 0.4467, "step": 10137 }, { "epoch": 2.8373915477190037, "grad_norm": 0.2540515828910898, "learning_rate": 8.038353846884006e-07, "loss": 0.4431, "step": 10138 }, { "epoch": 2.8376714245731876, "grad_norm": 0.25453815423627246, "learning_rate": 8.010828304943318e-07, "loss": 0.4441, "step": 10139 }, { "epoch": 2.837951301427372, "grad_norm": 0.2685794575098386, "learning_rate": 7.983349590807376e-07, "loss": 0.4476, "step": 10140 }, { "epoch": 2.838231178281556, "grad_norm": 0.26090741048944566, "learning_rate": 7.955917707091642e-07, "loss": 0.444, "step": 10141 }, { "epoch": 2.8385110551357404, "grad_norm": 0.24961955398553431, "learning_rate": 7.928532656407029e-07, "loss": 0.4578, "step": 10142 }, { "epoch": 2.8387909319899243, "grad_norm": 0.261480653970001, "learning_rate": 7.901194441360116e-07, "loss": 0.4365, "step": 10143 }, { "epoch": 2.8390708088441086, "grad_norm": 0.26065977212184543, "learning_rate": 7.87390306455299e-07, "loss": 0.4538, "step": 10144 }, { "epoch": 2.839350685698293, "grad_norm": 0.24750876625727614, "learning_rate": 7.846658528583184e-07, "loss": 0.4522, "step": 10145 }, { "epoch": 2.839630562552477, "grad_norm": 0.2553102416355813, "learning_rate": 7.819460836043957e-07, "loss": 0.4559, "step": 10146 }, { "epoch": 2.839910439406661, "grad_norm": 0.24769185971067034, "learning_rate": 7.79230998952385e-07, "loss": 0.4552, "step": 10147 }, { "epoch": 2.8401903162608453, "grad_norm": 0.2672748874110806, "learning_rate": 7.765205991607238e-07, "loss": 0.4659, "step": 10148 }, { "epoch": 2.8404701931150296, "grad_norm": 0.2604953786766096, "learning_rate": 7.738148844873783e-07, "loss": 0.4744, "step": 10149 }, { "epoch": 2.8407500699692134, "grad_norm": 0.2583480179782524, "learning_rate": 7.711138551898867e-07, "loss": 0.4479, "step": 10150 }, { "epoch": 2.8410299468233977, "grad_norm": 0.2546325371505603, "learning_rate": 7.684175115253378e-07, "loss": 0.439, "step": 10151 }, { "epoch": 2.841309823677582, "grad_norm": 0.26613969797264936, "learning_rate": 7.657258537503598e-07, "loss": 0.461, "step": 10152 }, { "epoch": 2.841589700531766, "grad_norm": 0.24737645835701677, "learning_rate": 7.630388821211587e-07, "loss": 0.4282, "step": 10153 }, { "epoch": 2.84186957738595, "grad_norm": 0.2624667092229066, "learning_rate": 7.603565968934689e-07, "loss": 0.4708, "step": 10154 }, { "epoch": 2.8421494542401344, "grad_norm": 0.2420140631051068, "learning_rate": 7.576789983226029e-07, "loss": 0.4299, "step": 10155 }, { "epoch": 2.8424293310943183, "grad_norm": 0.25496191355468883, "learning_rate": 7.550060866634123e-07, "loss": 0.4377, "step": 10156 }, { "epoch": 2.8427092079485026, "grad_norm": 0.24663098820574658, "learning_rate": 7.523378621703048e-07, "loss": 0.4334, "step": 10157 }, { "epoch": 2.842989084802687, "grad_norm": 0.25869772024576165, "learning_rate": 7.496743250972494e-07, "loss": 0.4677, "step": 10158 }, { "epoch": 2.8432689616568707, "grad_norm": 0.259342332895863, "learning_rate": 7.470154756977543e-07, "loss": 0.4556, "step": 10159 }, { "epoch": 2.843548838511055, "grad_norm": 0.25720323808780676, "learning_rate": 7.443613142248951e-07, "loss": 0.447, "step": 10160 }, { "epoch": 2.8438287153652393, "grad_norm": 0.26139628238782625, "learning_rate": 7.417118409312918e-07, "loss": 0.4468, "step": 10161 }, { "epoch": 2.8441085922194236, "grad_norm": 0.2630166216465488, "learning_rate": 7.390670560691315e-07, "loss": 0.455, "step": 10162 }, { "epoch": 2.844388469073608, "grad_norm": 0.2659934121702161, "learning_rate": 7.364269598901408e-07, "loss": 0.4334, "step": 10163 }, { "epoch": 2.8446683459277917, "grad_norm": 0.25686988035527164, "learning_rate": 7.337915526456019e-07, "loss": 0.4514, "step": 10164 }, { "epoch": 2.844948222781976, "grad_norm": 0.251348655283329, "learning_rate": 7.311608345863641e-07, "loss": 0.4539, "step": 10165 }, { "epoch": 2.8452280996361603, "grad_norm": 0.25654330476923476, "learning_rate": 7.285348059628105e-07, "loss": 0.4593, "step": 10166 }, { "epoch": 2.845507976490344, "grad_norm": 0.24776546205526637, "learning_rate": 7.259134670248968e-07, "loss": 0.4672, "step": 10167 }, { "epoch": 2.8457878533445284, "grad_norm": 0.2556660013031803, "learning_rate": 7.232968180221122e-07, "loss": 0.4325, "step": 10168 }, { "epoch": 2.8460677301987127, "grad_norm": 0.2448147767428879, "learning_rate": 7.206848592035242e-07, "loss": 0.4377, "step": 10169 }, { "epoch": 2.8463476070528966, "grad_norm": 0.25705771035811886, "learning_rate": 7.180775908177339e-07, "loss": 0.443, "step": 10170 }, { "epoch": 2.846627483907081, "grad_norm": 0.24945762219879117, "learning_rate": 7.154750131128984e-07, "loss": 0.4569, "step": 10171 }, { "epoch": 2.846907360761265, "grad_norm": 0.2540273156999752, "learning_rate": 7.128771263367418e-07, "loss": 0.4303, "step": 10172 }, { "epoch": 2.847187237615449, "grad_norm": 0.2580889678828135, "learning_rate": 7.102839307365272e-07, "loss": 0.4505, "step": 10173 }, { "epoch": 2.8474671144696333, "grad_norm": 0.2603424115721654, "learning_rate": 7.076954265590741e-07, "loss": 0.467, "step": 10174 }, { "epoch": 2.8477469913238176, "grad_norm": 0.2530692836062023, "learning_rate": 7.05111614050763e-07, "loss": 0.4487, "step": 10175 }, { "epoch": 2.8480268681780014, "grad_norm": 0.2559129157286122, "learning_rate": 7.025324934575139e-07, "loss": 0.4416, "step": 10176 }, { "epoch": 2.8483067450321857, "grad_norm": 0.24773205322018566, "learning_rate": 6.999580650248194e-07, "loss": 0.4329, "step": 10177 }, { "epoch": 2.84858662188637, "grad_norm": 0.25521126431205277, "learning_rate": 6.973883289977112e-07, "loss": 0.4543, "step": 10178 }, { "epoch": 2.8488664987405543, "grad_norm": 0.2595084443543985, "learning_rate": 6.948232856207771e-07, "loss": 0.4529, "step": 10179 }, { "epoch": 2.849146375594738, "grad_norm": 0.2606741603953674, "learning_rate": 6.922629351381604e-07, "loss": 0.4353, "step": 10180 }, { "epoch": 2.8494262524489224, "grad_norm": 0.2557838497999989, "learning_rate": 6.897072777935609e-07, "loss": 0.4424, "step": 10181 }, { "epoch": 2.8497061293031067, "grad_norm": 0.24571771064985215, "learning_rate": 6.871563138302173e-07, "loss": 0.4505, "step": 10182 }, { "epoch": 2.849986006157291, "grad_norm": 0.26667683305745005, "learning_rate": 6.846100434909353e-07, "loss": 0.4542, "step": 10183 }, { "epoch": 2.850265883011475, "grad_norm": 0.2600320263030447, "learning_rate": 6.820684670180766e-07, "loss": 0.435, "step": 10184 }, { "epoch": 2.850545759865659, "grad_norm": 0.2537212753909362, "learning_rate": 6.795315846535422e-07, "loss": 0.4445, "step": 10185 }, { "epoch": 2.8508256367198435, "grad_norm": 0.26412510291968333, "learning_rate": 6.769993966387999e-07, "loss": 0.4496, "step": 10186 }, { "epoch": 2.8511055135740273, "grad_norm": 0.25422280946962045, "learning_rate": 6.744719032148627e-07, "loss": 0.45, "step": 10187 }, { "epoch": 2.8513853904282116, "grad_norm": 0.2533516046710153, "learning_rate": 6.719491046222992e-07, "loss": 0.428, "step": 10188 }, { "epoch": 2.851665267282396, "grad_norm": 0.24407088865412283, "learning_rate": 6.694310011012284e-07, "loss": 0.4572, "step": 10189 }, { "epoch": 2.8519451441365797, "grad_norm": 0.24943438860892966, "learning_rate": 6.669175928913251e-07, "loss": 0.4165, "step": 10190 }, { "epoch": 2.852225020990764, "grad_norm": 0.25713363585411575, "learning_rate": 6.644088802318205e-07, "loss": 0.4453, "step": 10191 }, { "epoch": 2.8525048978449483, "grad_norm": 0.26567928250076694, "learning_rate": 6.619048633614955e-07, "loss": 0.4505, "step": 10192 }, { "epoch": 2.852784774699132, "grad_norm": 0.25818673891369537, "learning_rate": 6.594055425186763e-07, "loss": 0.449, "step": 10193 }, { "epoch": 2.8530646515533165, "grad_norm": 0.2540198476376862, "learning_rate": 6.569109179412558e-07, "loss": 0.4309, "step": 10194 }, { "epoch": 2.8533445284075007, "grad_norm": 0.2545216361991795, "learning_rate": 6.54420989866672e-07, "loss": 0.4281, "step": 10195 }, { "epoch": 2.8536244052616846, "grad_norm": 0.25968360621510583, "learning_rate": 6.519357585319242e-07, "loss": 0.468, "step": 10196 }, { "epoch": 2.853904282115869, "grad_norm": 0.24687151281696984, "learning_rate": 6.494552241735452e-07, "loss": 0.4264, "step": 10197 }, { "epoch": 2.854184158970053, "grad_norm": 0.2583821078491018, "learning_rate": 6.469793870276464e-07, "loss": 0.4752, "step": 10198 }, { "epoch": 2.8544640358242375, "grad_norm": 0.2459137265381298, "learning_rate": 6.445082473298669e-07, "loss": 0.4725, "step": 10199 }, { "epoch": 2.8547439126784218, "grad_norm": 0.24742088586054187, "learning_rate": 6.420418053154243e-07, "loss": 0.4407, "step": 10200 }, { "epoch": 2.8550237895326056, "grad_norm": 0.25517969356297493, "learning_rate": 6.395800612190639e-07, "loss": 0.4355, "step": 10201 }, { "epoch": 2.85530366638679, "grad_norm": 0.2598944885613344, "learning_rate": 6.37123015275104e-07, "loss": 0.4621, "step": 10202 }, { "epoch": 2.855583543240974, "grad_norm": 0.25983576281556636, "learning_rate": 6.346706677174075e-07, "loss": 0.448, "step": 10203 }, { "epoch": 2.855863420095158, "grad_norm": 0.2442368834802085, "learning_rate": 6.322230187793876e-07, "loss": 0.4305, "step": 10204 }, { "epoch": 2.8561432969493423, "grad_norm": 0.25932837759420585, "learning_rate": 6.297800686940081e-07, "loss": 0.4534, "step": 10205 }, { "epoch": 2.8564231738035266, "grad_norm": 0.26187189125601396, "learning_rate": 6.273418176937995e-07, "loss": 0.4604, "step": 10206 }, { "epoch": 2.8567030506577105, "grad_norm": 0.2503901417519331, "learning_rate": 6.249082660108318e-07, "loss": 0.4152, "step": 10207 }, { "epoch": 2.8569829275118948, "grad_norm": 0.2557843101499479, "learning_rate": 6.224794138767309e-07, "loss": 0.4565, "step": 10208 }, { "epoch": 2.857262804366079, "grad_norm": 0.25384466886807133, "learning_rate": 6.200552615226784e-07, "loss": 0.4452, "step": 10209 }, { "epoch": 2.857542681220263, "grad_norm": 0.2548841014459113, "learning_rate": 6.176358091794011e-07, "loss": 0.4368, "step": 10210 }, { "epoch": 2.857822558074447, "grad_norm": 0.25757124576997104, "learning_rate": 6.152210570771921e-07, "loss": 0.4439, "step": 10211 }, { "epoch": 2.8581024349286315, "grad_norm": 0.2569331306938241, "learning_rate": 6.128110054458847e-07, "loss": 0.4612, "step": 10212 }, { "epoch": 2.8583823117828153, "grad_norm": 0.25282275686772765, "learning_rate": 6.104056545148673e-07, "loss": 0.4577, "step": 10213 }, { "epoch": 2.8586621886369996, "grad_norm": 0.25896247226299324, "learning_rate": 6.080050045130847e-07, "loss": 0.4509, "step": 10214 }, { "epoch": 2.858942065491184, "grad_norm": 0.2576693397622, "learning_rate": 6.056090556690319e-07, "loss": 0.4411, "step": 10215 }, { "epoch": 2.859221942345368, "grad_norm": 0.2636597329257613, "learning_rate": 6.032178082107542e-07, "loss": 0.4533, "step": 10216 }, { "epoch": 2.859501819199552, "grad_norm": 0.24697365713431213, "learning_rate": 6.008312623658586e-07, "loss": 0.4389, "step": 10217 }, { "epoch": 2.8597816960537363, "grad_norm": 0.2500930445428214, "learning_rate": 5.984494183614909e-07, "loss": 0.4566, "step": 10218 }, { "epoch": 2.8600615729079206, "grad_norm": 0.26216006480801274, "learning_rate": 5.960722764243587e-07, "loss": 0.467, "step": 10219 }, { "epoch": 2.860341449762105, "grad_norm": 0.24926684599386656, "learning_rate": 5.936998367807201e-07, "loss": 0.4326, "step": 10220 }, { "epoch": 2.8606213266162888, "grad_norm": 0.2562319805043352, "learning_rate": 5.913320996563831e-07, "loss": 0.4631, "step": 10221 }, { "epoch": 2.860901203470473, "grad_norm": 0.2457629153440099, "learning_rate": 5.889690652767177e-07, "loss": 0.4501, "step": 10222 }, { "epoch": 2.8611810803246573, "grad_norm": 0.26272183713329245, "learning_rate": 5.866107338666271e-07, "loss": 0.4568, "step": 10223 }, { "epoch": 2.861460957178841, "grad_norm": 0.25666367549779606, "learning_rate": 5.842571056505875e-07, "loss": 0.4489, "step": 10224 }, { "epoch": 2.8617408340330255, "grad_norm": 0.2705683877873388, "learning_rate": 5.819081808526139e-07, "loss": 0.4643, "step": 10225 }, { "epoch": 2.8620207108872098, "grad_norm": 0.2543820024194871, "learning_rate": 5.795639596962832e-07, "loss": 0.4503, "step": 10226 }, { "epoch": 2.8623005877413936, "grad_norm": 0.25693972439495794, "learning_rate": 5.772244424047169e-07, "loss": 0.4444, "step": 10227 }, { "epoch": 2.862580464595578, "grad_norm": 0.25490621766923677, "learning_rate": 5.748896292005868e-07, "loss": 0.4486, "step": 10228 }, { "epoch": 2.862860341449762, "grad_norm": 0.26485194854054805, "learning_rate": 5.725595203061318e-07, "loss": 0.4659, "step": 10229 }, { "epoch": 2.863140218303946, "grad_norm": 0.24588014051314325, "learning_rate": 5.702341159431246e-07, "loss": 0.4458, "step": 10230 }, { "epoch": 2.8634200951581303, "grad_norm": 0.25924629499595886, "learning_rate": 5.679134163328992e-07, "loss": 0.4423, "step": 10231 }, { "epoch": 2.8636999720123146, "grad_norm": 0.2553099022686705, "learning_rate": 5.655974216963456e-07, "loss": 0.447, "step": 10232 }, { "epoch": 2.8639798488664985, "grad_norm": 0.259680346674402, "learning_rate": 5.632861322538985e-07, "loss": 0.4425, "step": 10233 }, { "epoch": 2.8642597257206828, "grad_norm": 0.26740786597677946, "learning_rate": 5.609795482255486e-07, "loss": 0.4423, "step": 10234 }, { "epoch": 2.864539602574867, "grad_norm": 0.2680068141821788, "learning_rate": 5.586776698308372e-07, "loss": 0.4648, "step": 10235 }, { "epoch": 2.8648194794290514, "grad_norm": 0.2563372948009287, "learning_rate": 5.563804972888609e-07, "loss": 0.4618, "step": 10236 }, { "epoch": 2.8650993562832356, "grad_norm": 0.24611658743221432, "learning_rate": 5.540880308182617e-07, "loss": 0.4475, "step": 10237 }, { "epoch": 2.8653792331374195, "grad_norm": 0.24907400613042632, "learning_rate": 5.518002706372372e-07, "loss": 0.4479, "step": 10238 }, { "epoch": 2.865659109991604, "grad_norm": 0.2481189654103723, "learning_rate": 5.495172169635409e-07, "loss": 0.4535, "step": 10239 }, { "epoch": 2.865938986845788, "grad_norm": 0.24972684085267743, "learning_rate": 5.472388700144771e-07, "loss": 0.4486, "step": 10240 }, { "epoch": 2.866218863699972, "grad_norm": 0.2493928388889635, "learning_rate": 5.449652300068997e-07, "loss": 0.442, "step": 10241 }, { "epoch": 2.866498740554156, "grad_norm": 0.25502178405778514, "learning_rate": 5.426962971572081e-07, "loss": 0.4567, "step": 10242 }, { "epoch": 2.8667786174083405, "grad_norm": 0.25717676914814624, "learning_rate": 5.404320716813683e-07, "loss": 0.4245, "step": 10243 }, { "epoch": 2.8670584942625243, "grad_norm": 0.26027840464450835, "learning_rate": 5.381725537948856e-07, "loss": 0.4528, "step": 10244 }, { "epoch": 2.8673383711167086, "grad_norm": 0.258521387544155, "learning_rate": 5.35917743712827e-07, "loss": 0.4327, "step": 10245 }, { "epoch": 2.867618247970893, "grad_norm": 0.25270403591986484, "learning_rate": 5.336676416498043e-07, "loss": 0.4459, "step": 10246 }, { "epoch": 2.8678981248250768, "grad_norm": 0.2552451258449106, "learning_rate": 5.314222478199793e-07, "loss": 0.4565, "step": 10247 }, { "epoch": 2.868178001679261, "grad_norm": 0.25016155667035667, "learning_rate": 5.291815624370755e-07, "loss": 0.4381, "step": 10248 }, { "epoch": 2.8684578785334454, "grad_norm": 0.25687106582087943, "learning_rate": 5.269455857143613e-07, "loss": 0.4437, "step": 10249 }, { "epoch": 2.868737755387629, "grad_norm": 0.25635311037631414, "learning_rate": 5.247143178646552e-07, "loss": 0.4523, "step": 10250 }, { "epoch": 2.8690176322418135, "grad_norm": 0.2604944605147867, "learning_rate": 5.224877591003374e-07, "loss": 0.4581, "step": 10251 }, { "epoch": 2.869297509095998, "grad_norm": 0.25014607563839375, "learning_rate": 5.202659096333218e-07, "loss": 0.4462, "step": 10252 }, { "epoch": 2.869577385950182, "grad_norm": 0.26158734638337017, "learning_rate": 5.180487696750946e-07, "loss": 0.4418, "step": 10253 }, { "epoch": 2.869857262804366, "grad_norm": 0.2628142041741432, "learning_rate": 5.158363394366816e-07, "loss": 0.4513, "step": 10254 }, { "epoch": 2.87013713965855, "grad_norm": 0.25316528938359184, "learning_rate": 5.136286191286643e-07, "loss": 0.4393, "step": 10255 }, { "epoch": 2.8704170165127345, "grad_norm": 0.2495107263010776, "learning_rate": 5.114256089611747e-07, "loss": 0.445, "step": 10256 }, { "epoch": 2.870696893366919, "grad_norm": 0.25869314266387045, "learning_rate": 5.092273091438948e-07, "loss": 0.4538, "step": 10257 }, { "epoch": 2.8709767702211026, "grad_norm": 0.2554676058510657, "learning_rate": 5.070337198860631e-07, "loss": 0.4623, "step": 10258 }, { "epoch": 2.871256647075287, "grad_norm": 0.24537075832570931, "learning_rate": 5.048448413964624e-07, "loss": 0.4546, "step": 10259 }, { "epoch": 2.8715365239294712, "grad_norm": 0.2672389569675875, "learning_rate": 5.026606738834317e-07, "loss": 0.4557, "step": 10260 }, { "epoch": 2.871816400783655, "grad_norm": 0.24970304197948354, "learning_rate": 5.004812175548656e-07, "loss": 0.4586, "step": 10261 }, { "epoch": 2.8720962776378394, "grad_norm": 0.26349661025087207, "learning_rate": 4.983064726181986e-07, "loss": 0.4649, "step": 10262 }, { "epoch": 2.8723761544920237, "grad_norm": 0.24778771885993947, "learning_rate": 4.961364392804313e-07, "loss": 0.4433, "step": 10263 }, { "epoch": 2.8726560313462075, "grad_norm": 0.26672455734691347, "learning_rate": 4.939711177481099e-07, "loss": 0.4429, "step": 10264 }, { "epoch": 2.872935908200392, "grad_norm": 0.2548923418425755, "learning_rate": 4.91810508227325e-07, "loss": 0.4397, "step": 10265 }, { "epoch": 2.873215785054576, "grad_norm": 0.25746339083089437, "learning_rate": 4.896546109237232e-07, "loss": 0.4322, "step": 10266 }, { "epoch": 2.87349566190876, "grad_norm": 0.2571032628499338, "learning_rate": 4.875034260425126e-07, "loss": 0.4665, "step": 10267 }, { "epoch": 2.8737755387629442, "grad_norm": 0.2611386216444229, "learning_rate": 4.853569537884406e-07, "loss": 0.4496, "step": 10268 }, { "epoch": 2.8740554156171285, "grad_norm": 0.2554305595408042, "learning_rate": 4.832151943658048e-07, "loss": 0.4374, "step": 10269 }, { "epoch": 2.8743352924713124, "grad_norm": 0.26483360105662895, "learning_rate": 4.810781479784588e-07, "loss": 0.455, "step": 10270 }, { "epoch": 2.8746151693254967, "grad_norm": 0.252441584593939, "learning_rate": 4.789458148298176e-07, "loss": 0.4432, "step": 10271 }, { "epoch": 2.874895046179681, "grad_norm": 0.2603269128030735, "learning_rate": 4.768181951228301e-07, "loss": 0.4651, "step": 10272 }, { "epoch": 2.8751749230338652, "grad_norm": 0.24957648626480253, "learning_rate": 4.746952890600065e-07, "loss": 0.455, "step": 10273 }, { "epoch": 2.8754547998880495, "grad_norm": 0.26391183669801865, "learning_rate": 4.725770968434018e-07, "loss": 0.4541, "step": 10274 }, { "epoch": 2.8757346767422334, "grad_norm": 0.25716393251231623, "learning_rate": 4.7046361867463807e-07, "loss": 0.4608, "step": 10275 }, { "epoch": 2.8760145535964177, "grad_norm": 0.26668582501615473, "learning_rate": 4.683548547548655e-07, "loss": 0.4512, "step": 10276 }, { "epoch": 2.876294430450602, "grad_norm": 0.2455419391449943, "learning_rate": 4.6625080528480137e-07, "loss": 0.4527, "step": 10277 }, { "epoch": 2.876574307304786, "grad_norm": 0.2570455687678307, "learning_rate": 4.641514704647132e-07, "loss": 0.457, "step": 10278 }, { "epoch": 2.87685418415897, "grad_norm": 0.24449720654527113, "learning_rate": 4.620568504944134e-07, "loss": 0.4511, "step": 10279 }, { "epoch": 2.8771340610131544, "grad_norm": 0.24351161087254086, "learning_rate": 4.5996694557327025e-07, "loss": 0.439, "step": 10280 }, { "epoch": 2.8774139378673382, "grad_norm": 0.26099511112058876, "learning_rate": 4.5788175590020246e-07, "loss": 0.469, "step": 10281 }, { "epoch": 2.8776938147215225, "grad_norm": 0.2582471683662392, "learning_rate": 4.5580128167367895e-07, "loss": 0.4309, "step": 10282 }, { "epoch": 2.877973691575707, "grad_norm": 0.24795866638590033, "learning_rate": 4.5372552309171925e-07, "loss": 0.4668, "step": 10283 }, { "epoch": 2.8782535684298907, "grad_norm": 0.25154649095883064, "learning_rate": 4.516544803518985e-07, "loss": 0.4477, "step": 10284 }, { "epoch": 2.878533445284075, "grad_norm": 0.2505105586047212, "learning_rate": 4.495881536513369e-07, "loss": 0.419, "step": 10285 }, { "epoch": 2.8788133221382592, "grad_norm": 0.25352983127315026, "learning_rate": 4.4752654318670485e-07, "loss": 0.4531, "step": 10286 }, { "epoch": 2.879093198992443, "grad_norm": 0.25948613905647183, "learning_rate": 4.4546964915423986e-07, "loss": 0.4618, "step": 10287 }, { "epoch": 2.8793730758466274, "grad_norm": 0.2582180835960804, "learning_rate": 4.434174717497075e-07, "loss": 0.4542, "step": 10288 }, { "epoch": 2.8796529527008117, "grad_norm": 0.257995531526806, "learning_rate": 4.413700111684349e-07, "loss": 0.4611, "step": 10289 }, { "epoch": 2.879932829554996, "grad_norm": 0.2536050936538131, "learning_rate": 4.3932726760531064e-07, "loss": 0.4331, "step": 10290 }, { "epoch": 2.88021270640918, "grad_norm": 0.26348346728174327, "learning_rate": 4.3728924125475137e-07, "loss": 0.4652, "step": 10291 }, { "epoch": 2.880492583263364, "grad_norm": 0.25005631660677363, "learning_rate": 4.3525593231074633e-07, "loss": 0.4544, "step": 10292 }, { "epoch": 2.8807724601175484, "grad_norm": 0.2555331171203246, "learning_rate": 4.3322734096682417e-07, "loss": 0.457, "step": 10293 }, { "epoch": 2.8810523369717327, "grad_norm": 0.2545421363229972, "learning_rate": 4.3120346741606367e-07, "loss": 0.4686, "step": 10294 }, { "epoch": 2.8813322138259165, "grad_norm": 0.2617427853569952, "learning_rate": 4.2918431185110517e-07, "loss": 0.4435, "step": 10295 }, { "epoch": 2.881612090680101, "grad_norm": 0.2611070268049796, "learning_rate": 4.271698744641339e-07, "loss": 0.4822, "step": 10296 }, { "epoch": 2.881891967534285, "grad_norm": 0.2651306945435216, "learning_rate": 4.2516015544687426e-07, "loss": 0.4603, "step": 10297 }, { "epoch": 2.882171844388469, "grad_norm": 0.3124262786349235, "learning_rate": 4.2315515499062317e-07, "loss": 0.4459, "step": 10298 }, { "epoch": 2.8824517212426533, "grad_norm": 0.2685649925102902, "learning_rate": 4.211548732862114e-07, "loss": 0.4392, "step": 10299 }, { "epoch": 2.8827315980968375, "grad_norm": 0.2532537109973419, "learning_rate": 4.19159310524031e-07, "loss": 0.442, "step": 10300 }, { "epoch": 2.8830114749510214, "grad_norm": 0.25293340380592855, "learning_rate": 4.17168466894019e-07, "loss": 0.4272, "step": 10301 }, { "epoch": 2.8832913518052057, "grad_norm": 0.2626769114687486, "learning_rate": 4.1518234258566824e-07, "loss": 0.4514, "step": 10302 }, { "epoch": 2.88357122865939, "grad_norm": 0.2548871588665236, "learning_rate": 4.132009377880108e-07, "loss": 0.4563, "step": 10303 }, { "epoch": 2.883851105513574, "grad_norm": 0.2499079155484336, "learning_rate": 4.1122425268964593e-07, "loss": 0.4488, "step": 10304 }, { "epoch": 2.884130982367758, "grad_norm": 0.2774741682164875, "learning_rate": 4.092522874787119e-07, "loss": 0.4595, "step": 10305 }, { "epoch": 2.8844108592219424, "grad_norm": 0.2620330159986628, "learning_rate": 4.072850423428975e-07, "loss": 0.4553, "step": 10306 }, { "epoch": 2.8846907360761262, "grad_norm": 0.24761164989931458, "learning_rate": 4.053225174694586e-07, "loss": 0.4532, "step": 10307 }, { "epoch": 2.8849706129303105, "grad_norm": 0.24479380194816994, "learning_rate": 4.0336471304517897e-07, "loss": 0.4172, "step": 10308 }, { "epoch": 2.885250489784495, "grad_norm": 0.2514815231592224, "learning_rate": 4.014116292564041e-07, "loss": 0.4459, "step": 10309 }, { "epoch": 2.885530366638679, "grad_norm": 0.2923214184067987, "learning_rate": 3.9946326628903516e-07, "loss": 0.4461, "step": 10310 }, { "epoch": 2.8858102434928634, "grad_norm": 0.24988649212900807, "learning_rate": 3.975196243285129e-07, "loss": 0.4624, "step": 10311 }, { "epoch": 2.8860901203470473, "grad_norm": 0.2483774134025153, "learning_rate": 3.9558070355983357e-07, "loss": 0.447, "step": 10312 }, { "epoch": 2.8863699972012316, "grad_norm": 0.2575682884161871, "learning_rate": 3.9364650416755525e-07, "loss": 0.4319, "step": 10313 }, { "epoch": 2.886649874055416, "grad_norm": 0.2563758815757559, "learning_rate": 3.9171702633576945e-07, "loss": 0.423, "step": 10314 }, { "epoch": 2.8869297509095997, "grad_norm": 0.25206218781065665, "learning_rate": 3.8979227024811826e-07, "loss": 0.4592, "step": 10315 }, { "epoch": 2.887209627763784, "grad_norm": 0.25445140057837873, "learning_rate": 3.878722360878051e-07, "loss": 0.4538, "step": 10316 }, { "epoch": 2.8874895046179683, "grad_norm": 0.25892850164373754, "learning_rate": 3.8595692403758376e-07, "loss": 0.4565, "step": 10317 }, { "epoch": 2.887769381472152, "grad_norm": 0.2571744291136916, "learning_rate": 3.8404633427975846e-07, "loss": 0.4458, "step": 10318 }, { "epoch": 2.8880492583263364, "grad_norm": 0.25961005904007983, "learning_rate": 3.82140466996167e-07, "loss": 0.4681, "step": 10319 }, { "epoch": 2.8883291351805207, "grad_norm": 0.2626162006435372, "learning_rate": 3.802393223682199e-07, "loss": 0.4348, "step": 10320 }, { "epoch": 2.8886090120347045, "grad_norm": 0.26403517659005993, "learning_rate": 3.7834290057686684e-07, "loss": 0.4463, "step": 10321 }, { "epoch": 2.888888888888889, "grad_norm": 0.2640114446640457, "learning_rate": 3.764512018026134e-07, "loss": 0.4491, "step": 10322 }, { "epoch": 2.889168765743073, "grad_norm": 0.2567861703073457, "learning_rate": 3.7456422622551e-07, "loss": 0.4629, "step": 10323 }, { "epoch": 2.889448642597257, "grad_norm": 0.26970481734620666, "learning_rate": 3.726819740251575e-07, "loss": 0.4425, "step": 10324 }, { "epoch": 2.8897285194514413, "grad_norm": 0.2573265949733196, "learning_rate": 3.70804445380718e-07, "loss": 0.4339, "step": 10325 }, { "epoch": 2.8900083963056256, "grad_norm": 0.2457024445347563, "learning_rate": 3.68931640470882e-07, "loss": 0.4542, "step": 10326 }, { "epoch": 2.8902882731598094, "grad_norm": 0.2518158589030567, "learning_rate": 3.67063559473918e-07, "loss": 0.4413, "step": 10327 }, { "epoch": 2.8905681500139937, "grad_norm": 0.2541691212113756, "learning_rate": 3.6520020256762267e-07, "loss": 0.4354, "step": 10328 }, { "epoch": 2.890848026868178, "grad_norm": 0.26148669070858493, "learning_rate": 3.6334156992935406e-07, "loss": 0.4758, "step": 10329 }, { "epoch": 2.8911279037223623, "grad_norm": 0.2578137534929587, "learning_rate": 3.614876617360152e-07, "loss": 0.4694, "step": 10330 }, { "epoch": 2.8914077805765466, "grad_norm": 0.25462776023270506, "learning_rate": 3.596384781640705e-07, "loss": 0.4343, "step": 10331 }, { "epoch": 2.8916876574307304, "grad_norm": 0.2574136099033175, "learning_rate": 3.5779401938951794e-07, "loss": 0.4517, "step": 10332 }, { "epoch": 2.8919675342849147, "grad_norm": 0.2528803426108313, "learning_rate": 3.559542855879172e-07, "loss": 0.4427, "step": 10333 }, { "epoch": 2.892247411139099, "grad_norm": 0.2428749181134538, "learning_rate": 3.5411927693437265e-07, "loss": 0.4381, "step": 10334 }, { "epoch": 2.892527287993283, "grad_norm": 0.2563684456973611, "learning_rate": 3.522889936035445e-07, "loss": 0.4615, "step": 10335 }, { "epoch": 2.892807164847467, "grad_norm": 0.2603322799709365, "learning_rate": 3.504634357696379e-07, "loss": 0.469, "step": 10336 }, { "epoch": 2.8930870417016514, "grad_norm": 0.24520091679492342, "learning_rate": 3.4864260360641386e-07, "loss": 0.4374, "step": 10337 }, { "epoch": 2.8933669185558353, "grad_norm": 0.265476476947951, "learning_rate": 3.468264972871782e-07, "loss": 0.463, "step": 10338 }, { "epoch": 2.8936467954100196, "grad_norm": 0.2716861089064996, "learning_rate": 3.4501511698478705e-07, "loss": 0.4538, "step": 10339 }, { "epoch": 2.893926672264204, "grad_norm": 0.24986600046345175, "learning_rate": 3.43208462871647e-07, "loss": 0.4434, "step": 10340 }, { "epoch": 2.8942065491183877, "grad_norm": 0.25661580131767164, "learning_rate": 3.41406535119726e-07, "loss": 0.4358, "step": 10341 }, { "epoch": 2.894486425972572, "grad_norm": 0.26288992965346053, "learning_rate": 3.396093339005202e-07, "loss": 0.4532, "step": 10342 }, { "epoch": 2.8947663028267563, "grad_norm": 0.2598594542669984, "learning_rate": 3.378168593850983e-07, "loss": 0.4471, "step": 10343 }, { "epoch": 2.89504617968094, "grad_norm": 0.2620213552419905, "learning_rate": 3.3602911174406283e-07, "loss": 0.4515, "step": 10344 }, { "epoch": 2.8953260565351244, "grad_norm": 0.33377074231948806, "learning_rate": 3.34246091147572e-07, "loss": 0.4552, "step": 10345 }, { "epoch": 2.8956059333893087, "grad_norm": 0.2577495637966244, "learning_rate": 3.324677977653401e-07, "loss": 0.4417, "step": 10346 }, { "epoch": 2.895885810243493, "grad_norm": 0.275292397605857, "learning_rate": 3.306942317666206e-07, "loss": 0.462, "step": 10347 }, { "epoch": 2.8961656870976773, "grad_norm": 0.24961360541812772, "learning_rate": 3.289253933202285e-07, "loss": 0.4533, "step": 10348 }, { "epoch": 2.896445563951861, "grad_norm": 0.2661825316055105, "learning_rate": 3.2716128259451804e-07, "loss": 0.4429, "step": 10349 }, { "epoch": 2.8967254408060454, "grad_norm": 0.25305679025577094, "learning_rate": 3.254018997574049e-07, "loss": 0.45, "step": 10350 }, { "epoch": 2.8970053176602297, "grad_norm": 0.25119578479911997, "learning_rate": 3.236472449763384e-07, "loss": 0.4816, "step": 10351 }, { "epoch": 2.8972851945144136, "grad_norm": 0.25395181283716584, "learning_rate": 3.218973184183349e-07, "loss": 0.4181, "step": 10352 }, { "epoch": 2.897565071368598, "grad_norm": 0.25487831155720125, "learning_rate": 3.2015212024995024e-07, "loss": 0.4494, "step": 10353 }, { "epoch": 2.897844948222782, "grad_norm": 0.2599533908619156, "learning_rate": 3.184116506372903e-07, "loss": 0.4703, "step": 10354 }, { "epoch": 2.898124825076966, "grad_norm": 0.25358339034142985, "learning_rate": 3.1667590974602256e-07, "loss": 0.4428, "step": 10355 }, { "epoch": 2.8984047019311503, "grad_norm": 0.26625769348057865, "learning_rate": 3.1494489774134273e-07, "loss": 0.4752, "step": 10356 }, { "epoch": 2.8986845787853346, "grad_norm": 0.2534972553995035, "learning_rate": 3.132186147880245e-07, "loss": 0.4547, "step": 10357 }, { "epoch": 2.8989644556395184, "grad_norm": 0.24764399053567493, "learning_rate": 3.114970610503698e-07, "loss": 0.4588, "step": 10358 }, { "epoch": 2.8992443324937027, "grad_norm": 0.2565092323150165, "learning_rate": 3.0978023669223087e-07, "loss": 0.4616, "step": 10359 }, { "epoch": 2.899524209347887, "grad_norm": 0.2525542577336461, "learning_rate": 3.0806814187702703e-07, "loss": 0.4575, "step": 10360 }, { "epoch": 2.899804086202071, "grad_norm": 0.26140960295508986, "learning_rate": 3.0636077676770567e-07, "loss": 0.4497, "step": 10361 }, { "epoch": 2.900083963056255, "grad_norm": 0.2679370880273387, "learning_rate": 3.0465814152678685e-07, "loss": 0.4545, "step": 10362 }, { "epoch": 2.9003638399104394, "grad_norm": 0.2699083430806211, "learning_rate": 3.0296023631631865e-07, "loss": 0.4643, "step": 10363 }, { "epoch": 2.9006437167646233, "grad_norm": 0.26170266192893454, "learning_rate": 3.0126706129791073e-07, "loss": 0.4561, "step": 10364 }, { "epoch": 2.9009235936188076, "grad_norm": 0.24858769529730645, "learning_rate": 2.99578616632723e-07, "loss": 0.453, "step": 10365 }, { "epoch": 2.901203470472992, "grad_norm": 0.2574791076494076, "learning_rate": 2.9789490248146033e-07, "loss": 0.4434, "step": 10366 }, { "epoch": 2.901483347327176, "grad_norm": 0.26240149623407355, "learning_rate": 2.9621591900437784e-07, "loss": 0.4402, "step": 10367 }, { "epoch": 2.9017632241813605, "grad_norm": 0.2507582670062776, "learning_rate": 2.945416663612921e-07, "loss": 0.4458, "step": 10368 }, { "epoch": 2.9020431010355443, "grad_norm": 0.2471406035226927, "learning_rate": 2.928721447115479e-07, "loss": 0.4346, "step": 10369 }, { "epoch": 2.9023229778897286, "grad_norm": 0.25689886731465, "learning_rate": 2.91207354214057e-07, "loss": 0.4527, "step": 10370 }, { "epoch": 2.902602854743913, "grad_norm": 0.25295420038947664, "learning_rate": 2.895472950272704e-07, "loss": 0.4379, "step": 10371 }, { "epoch": 2.9028827315980967, "grad_norm": 0.2560257149664785, "learning_rate": 2.878919673091951e-07, "loss": 0.4282, "step": 10372 }, { "epoch": 2.903162608452281, "grad_norm": 0.2696114038415163, "learning_rate": 2.8624137121738836e-07, "loss": 0.4706, "step": 10373 }, { "epoch": 2.9034424853064653, "grad_norm": 0.2500674589002211, "learning_rate": 2.845955069089579e-07, "loss": 0.4488, "step": 10374 }, { "epoch": 2.903722362160649, "grad_norm": 0.2554546188397463, "learning_rate": 2.829543745405505e-07, "loss": 0.4372, "step": 10375 }, { "epoch": 2.9040022390148335, "grad_norm": 0.24931022196936325, "learning_rate": 2.813179742683747e-07, "loss": 0.4526, "step": 10376 }, { "epoch": 2.9042821158690177, "grad_norm": 0.2928468198012615, "learning_rate": 2.796863062481836e-07, "loss": 0.4511, "step": 10377 }, { "epoch": 2.9045619927232016, "grad_norm": 0.26388584571093515, "learning_rate": 2.780593706352752e-07, "loss": 0.4539, "step": 10378 }, { "epoch": 2.904841869577386, "grad_norm": 0.2498804927398296, "learning_rate": 2.7643716758451457e-07, "loss": 0.4373, "step": 10379 }, { "epoch": 2.90512174643157, "grad_norm": 0.26863275546009896, "learning_rate": 2.748196972502892e-07, "loss": 0.4611, "step": 10380 }, { "epoch": 2.905401623285754, "grad_norm": 0.2517534306363316, "learning_rate": 2.7320695978655943e-07, "loss": 0.4529, "step": 10381 }, { "epoch": 2.9056815001399383, "grad_norm": 0.2560660861816635, "learning_rate": 2.71598955346819e-07, "loss": 0.463, "step": 10382 }, { "epoch": 2.9059613769941226, "grad_norm": 0.2605364040440226, "learning_rate": 2.6999568408413443e-07, "loss": 0.4483, "step": 10383 }, { "epoch": 2.906241253848307, "grad_norm": 0.24860598895959993, "learning_rate": 2.6839714615108926e-07, "loss": 0.4366, "step": 10384 }, { "epoch": 2.9065211307024907, "grad_norm": 0.2540350770123497, "learning_rate": 2.6680334169983946e-07, "loss": 0.4506, "step": 10385 }, { "epoch": 2.906801007556675, "grad_norm": 0.26270021658466275, "learning_rate": 2.652142708820915e-07, "loss": 0.4541, "step": 10386 }, { "epoch": 2.9070808844108593, "grad_norm": 0.2660916319559597, "learning_rate": 2.6362993384907997e-07, "loss": 0.4614, "step": 10387 }, { "epoch": 2.9073607612650436, "grad_norm": 0.25039301873484276, "learning_rate": 2.62050330751612e-07, "loss": 0.4466, "step": 10388 }, { "epoch": 2.9076406381192275, "grad_norm": 0.24631662305707414, "learning_rate": 2.60475461740034e-07, "loss": 0.4223, "step": 10389 }, { "epoch": 2.9079205149734118, "grad_norm": 0.24299943031929547, "learning_rate": 2.5890532696424274e-07, "loss": 0.4428, "step": 10390 }, { "epoch": 2.908200391827596, "grad_norm": 0.25736270614928747, "learning_rate": 2.573399265736798e-07, "loss": 0.4457, "step": 10391 }, { "epoch": 2.90848026868178, "grad_norm": 0.25310670202243307, "learning_rate": 2.557792607173481e-07, "loss": 0.4447, "step": 10392 }, { "epoch": 2.908760145535964, "grad_norm": 0.2601763089070998, "learning_rate": 2.5422332954379014e-07, "loss": 0.4497, "step": 10393 }, { "epoch": 2.9090400223901485, "grad_norm": 0.25673423214608204, "learning_rate": 2.5267213320109283e-07, "loss": 0.4462, "step": 10394 }, { "epoch": 2.9093198992443323, "grad_norm": 0.2732253329291374, "learning_rate": 2.5112567183691595e-07, "loss": 0.4579, "step": 10395 }, { "epoch": 2.9095997760985166, "grad_norm": 0.24489089427254132, "learning_rate": 2.4958394559843614e-07, "loss": 0.4637, "step": 10396 }, { "epoch": 2.909879652952701, "grad_norm": 0.2597770859891086, "learning_rate": 2.4804695463240826e-07, "loss": 0.4524, "step": 10397 }, { "epoch": 2.9101595298068847, "grad_norm": 0.25115464762045403, "learning_rate": 2.4651469908511527e-07, "loss": 0.4323, "step": 10398 }, { "epoch": 2.910439406661069, "grad_norm": 0.25904281835353893, "learning_rate": 2.4498717910240163e-07, "loss": 0.4532, "step": 10399 }, { "epoch": 2.9107192835152533, "grad_norm": 0.26533887036075104, "learning_rate": 2.4346439482965664e-07, "loss": 0.4473, "step": 10400 }, { "epoch": 2.910999160369437, "grad_norm": 0.25950334993597235, "learning_rate": 2.419463464118199e-07, "loss": 0.4503, "step": 10401 }, { "epoch": 2.9112790372236215, "grad_norm": 0.2592923155127461, "learning_rate": 2.404330339933869e-07, "loss": 0.4539, "step": 10402 }, { "epoch": 2.9115589140778058, "grad_norm": 0.24565959064147494, "learning_rate": 2.3892445771838134e-07, "loss": 0.4426, "step": 10403 }, { "epoch": 2.91183879093199, "grad_norm": 0.2564125328004262, "learning_rate": 2.37420617730405e-07, "loss": 0.4456, "step": 10404 }, { "epoch": 2.9121186677861743, "grad_norm": 0.25657770161499827, "learning_rate": 2.359215141725879e-07, "loss": 0.4584, "step": 10405 }, { "epoch": 2.912398544640358, "grad_norm": 0.2671894407257729, "learning_rate": 2.3442714718761027e-07, "loss": 0.4621, "step": 10406 }, { "epoch": 2.9126784214945425, "grad_norm": 0.2676332770972902, "learning_rate": 2.3293751691771394e-07, "loss": 0.4649, "step": 10407 }, { "epoch": 2.9129582983487268, "grad_norm": 0.2594919036123832, "learning_rate": 2.314526235046799e-07, "loss": 0.4604, "step": 10408 }, { "epoch": 2.9132381752029106, "grad_norm": 0.2593525779189398, "learning_rate": 2.299724670898451e-07, "loss": 0.4349, "step": 10409 }, { "epoch": 2.913518052057095, "grad_norm": 0.25144203995667563, "learning_rate": 2.2849704781408577e-07, "loss": 0.4491, "step": 10410 }, { "epoch": 2.913797928911279, "grad_norm": 0.2635956450162497, "learning_rate": 2.2702636581783399e-07, "loss": 0.4605, "step": 10411 }, { "epoch": 2.914077805765463, "grad_norm": 0.2600994568333365, "learning_rate": 2.2556042124107223e-07, "loss": 0.4368, "step": 10412 }, { "epoch": 2.9143576826196473, "grad_norm": 0.2564418348360325, "learning_rate": 2.2409921422333335e-07, "loss": 0.4497, "step": 10413 }, { "epoch": 2.9146375594738316, "grad_norm": 0.249296973255, "learning_rate": 2.226427449036894e-07, "loss": 0.4578, "step": 10414 }, { "epoch": 2.9149174363280155, "grad_norm": 0.2624082979505958, "learning_rate": 2.2119101342076841e-07, "loss": 0.4557, "step": 10415 }, { "epoch": 2.9151973131821998, "grad_norm": 0.2482312610438998, "learning_rate": 2.1974401991274873e-07, "loss": 0.4452, "step": 10416 }, { "epoch": 2.915477190036384, "grad_norm": 0.28259425383700054, "learning_rate": 2.1830176451735905e-07, "loss": 0.4542, "step": 10417 }, { "epoch": 2.915757066890568, "grad_norm": 0.25328344799056274, "learning_rate": 2.1686424737187295e-07, "loss": 0.4447, "step": 10418 }, { "epoch": 2.916036943744752, "grad_norm": 0.2536325406702005, "learning_rate": 2.1543146861311426e-07, "loss": 0.4425, "step": 10419 }, { "epoch": 2.9163168205989365, "grad_norm": 0.24890910384382822, "learning_rate": 2.1400342837745168e-07, "loss": 0.4497, "step": 10420 }, { "epoch": 2.916596697453121, "grad_norm": 0.2544036580834295, "learning_rate": 2.125801268008043e-07, "loss": 0.4449, "step": 10421 }, { "epoch": 2.9168765743073046, "grad_norm": 0.25742946778671977, "learning_rate": 2.1116156401865815e-07, "loss": 0.4379, "step": 10422 }, { "epoch": 2.917156451161489, "grad_norm": 0.24188272688832368, "learning_rate": 2.0974774016601638e-07, "loss": 0.4396, "step": 10423 }, { "epoch": 2.917436328015673, "grad_norm": 0.25419956448380476, "learning_rate": 2.0833865537744912e-07, "loss": 0.4445, "step": 10424 }, { "epoch": 2.9177162048698575, "grad_norm": 0.2541023006339829, "learning_rate": 2.0693430978708795e-07, "loss": 0.4506, "step": 10425 }, { "epoch": 2.9179960817240413, "grad_norm": 0.24882416696091192, "learning_rate": 2.0553470352858152e-07, "loss": 0.4491, "step": 10426 }, { "epoch": 2.9182759585782256, "grad_norm": 0.2613166798429044, "learning_rate": 2.0413983673515659e-07, "loss": 0.4515, "step": 10427 }, { "epoch": 2.91855583543241, "grad_norm": 0.2532369966568343, "learning_rate": 2.027497095395736e-07, "loss": 0.4486, "step": 10428 }, { "epoch": 2.9188357122865938, "grad_norm": 0.26685547084956296, "learning_rate": 2.0136432207414347e-07, "loss": 0.4506, "step": 10429 }, { "epoch": 2.919115589140778, "grad_norm": 0.2614000751207966, "learning_rate": 1.9998367447073285e-07, "loss": 0.4389, "step": 10430 }, { "epoch": 2.9193954659949624, "grad_norm": 0.26655927508353255, "learning_rate": 1.9860776686075332e-07, "loss": 0.4532, "step": 10431 }, { "epoch": 2.919675342849146, "grad_norm": 0.2578419302241896, "learning_rate": 1.9723659937515572e-07, "loss": 0.4331, "step": 10432 }, { "epoch": 2.9199552197033305, "grad_norm": 0.26151256479973034, "learning_rate": 1.958701721444578e-07, "loss": 0.4619, "step": 10433 }, { "epoch": 2.920235096557515, "grad_norm": 0.2533984994980884, "learning_rate": 1.9450848529871114e-07, "loss": 0.4559, "step": 10434 }, { "epoch": 2.9205149734116986, "grad_norm": 0.26181913630805637, "learning_rate": 1.9315153896752868e-07, "loss": 0.4638, "step": 10435 }, { "epoch": 2.920794850265883, "grad_norm": 0.25441984983057697, "learning_rate": 1.9179933328005718e-07, "loss": 0.4444, "step": 10436 }, { "epoch": 2.921074727120067, "grad_norm": 0.2568916127723896, "learning_rate": 1.9045186836500472e-07, "loss": 0.4368, "step": 10437 }, { "epoch": 2.921354603974251, "grad_norm": 0.2611152004511161, "learning_rate": 1.8910914435062433e-07, "loss": 0.4619, "step": 10438 }, { "epoch": 2.9216344808284354, "grad_norm": 0.25755190571414255, "learning_rate": 1.8777116136471372e-07, "loss": 0.4443, "step": 10439 }, { "epoch": 2.9219143576826196, "grad_norm": 0.25147781885391063, "learning_rate": 1.8643791953462664e-07, "loss": 0.4446, "step": 10440 }, { "epoch": 2.922194234536804, "grad_norm": 0.24820029427422324, "learning_rate": 1.8510941898726153e-07, "loss": 0.4537, "step": 10441 }, { "epoch": 2.9224741113909882, "grad_norm": 0.2638186979540325, "learning_rate": 1.837856598490617e-07, "loss": 0.4554, "step": 10442 }, { "epoch": 2.922753988245172, "grad_norm": 0.25462418346204635, "learning_rate": 1.8246664224603193e-07, "loss": 0.4428, "step": 10443 }, { "epoch": 2.9230338650993564, "grad_norm": 0.2594138620877634, "learning_rate": 1.8115236630370513e-07, "loss": 0.4358, "step": 10444 }, { "epoch": 2.9233137419535407, "grad_norm": 0.25762916254734597, "learning_rate": 1.7984283214718122e-07, "loss": 0.4703, "step": 10445 }, { "epoch": 2.9235936188077245, "grad_norm": 0.24532429258654168, "learning_rate": 1.7853803990110495e-07, "loss": 0.462, "step": 10446 }, { "epoch": 2.923873495661909, "grad_norm": 0.2562022449504887, "learning_rate": 1.7723798968966032e-07, "loss": 0.4544, "step": 10447 }, { "epoch": 2.924153372516093, "grad_norm": 0.25893836124805875, "learning_rate": 1.7594268163659278e-07, "loss": 0.4451, "step": 10448 }, { "epoch": 2.924433249370277, "grad_norm": 0.24266450193199873, "learning_rate": 1.7465211586519258e-07, "loss": 0.4507, "step": 10449 }, { "epoch": 2.9247131262244612, "grad_norm": 0.2524898468017339, "learning_rate": 1.7336629249828372e-07, "loss": 0.4432, "step": 10450 }, { "epoch": 2.9249930030786455, "grad_norm": 0.25327670454061246, "learning_rate": 1.7208521165826829e-07, "loss": 0.4703, "step": 10451 }, { "epoch": 2.9252728799328294, "grad_norm": 0.2652988133563846, "learning_rate": 1.7080887346707098e-07, "loss": 0.4556, "step": 10452 }, { "epoch": 2.9255527567870137, "grad_norm": 0.25212927409896774, "learning_rate": 1.6953727804617237e-07, "loss": 0.4334, "step": 10453 }, { "epoch": 2.925832633641198, "grad_norm": 0.257078013870885, "learning_rate": 1.6827042551660898e-07, "loss": 0.4199, "step": 10454 }, { "epoch": 2.926112510495382, "grad_norm": 0.26032378169304704, "learning_rate": 1.670083159989566e-07, "loss": 0.4716, "step": 10455 }, { "epoch": 2.926392387349566, "grad_norm": 0.24862097369804104, "learning_rate": 1.6575094961335248e-07, "loss": 0.4319, "step": 10456 }, { "epoch": 2.9266722642037504, "grad_norm": 0.24885533344996946, "learning_rate": 1.644983264794564e-07, "loss": 0.4413, "step": 10457 }, { "epoch": 2.9269521410579347, "grad_norm": 0.261188700334077, "learning_rate": 1.632504467165119e-07, "loss": 0.4465, "step": 10458 }, { "epoch": 2.9272320179121185, "grad_norm": 0.24969608494229537, "learning_rate": 1.620073104432851e-07, "loss": 0.4313, "step": 10459 }, { "epoch": 2.927511894766303, "grad_norm": 0.24858008443318744, "learning_rate": 1.6076891777809245e-07, "loss": 0.4744, "step": 10460 }, { "epoch": 2.927791771620487, "grad_norm": 0.25462668422505136, "learning_rate": 1.5953526883881188e-07, "loss": 0.4702, "step": 10461 }, { "epoch": 2.9280716484746714, "grad_norm": 0.2523815260372512, "learning_rate": 1.5830636374286056e-07, "loss": 0.4711, "step": 10462 }, { "epoch": 2.9283515253288552, "grad_norm": 0.25767637274775135, "learning_rate": 1.5708220260721163e-07, "loss": 0.4455, "step": 10463 }, { "epoch": 2.9286314021830395, "grad_norm": 0.2574606985716124, "learning_rate": 1.5586278554837187e-07, "loss": 0.4387, "step": 10464 }, { "epoch": 2.928911279037224, "grad_norm": 0.2585395013878928, "learning_rate": 1.546481126824151e-07, "loss": 0.4426, "step": 10465 }, { "epoch": 2.9291911558914077, "grad_norm": 0.2567128032532917, "learning_rate": 1.534381841249488e-07, "loss": 0.4581, "step": 10466 }, { "epoch": 2.929471032745592, "grad_norm": 0.2551522452384083, "learning_rate": 1.5223299999113094e-07, "loss": 0.4508, "step": 10467 }, { "epoch": 2.9297509095997762, "grad_norm": 0.26152786331414646, "learning_rate": 1.5103256039568635e-07, "loss": 0.4462, "step": 10468 }, { "epoch": 2.93003078645396, "grad_norm": 0.2531210010499005, "learning_rate": 1.4983686545285701e-07, "loss": 0.4444, "step": 10469 }, { "epoch": 2.9303106633081444, "grad_norm": 0.24830215591602056, "learning_rate": 1.4864591527646298e-07, "loss": 0.4222, "step": 10470 }, { "epoch": 2.9305905401623287, "grad_norm": 0.2636931535798214, "learning_rate": 1.4745970997985248e-07, "loss": 0.456, "step": 10471 }, { "epoch": 2.9308704170165125, "grad_norm": 0.2524364263885123, "learning_rate": 1.4627824967592963e-07, "loss": 0.4496, "step": 10472 }, { "epoch": 2.931150293870697, "grad_norm": 0.2616100064256884, "learning_rate": 1.451015344771489e-07, "loss": 0.4612, "step": 10473 }, { "epoch": 2.931430170724881, "grad_norm": 0.25763637866034367, "learning_rate": 1.439295644955041e-07, "loss": 0.448, "step": 10474 }, { "epoch": 2.931710047579065, "grad_norm": 0.2584985683151651, "learning_rate": 1.4276233984255593e-07, "loss": 0.4615, "step": 10475 }, { "epoch": 2.9319899244332492, "grad_norm": 0.2511241667910256, "learning_rate": 1.415998606293878e-07, "loss": 0.4303, "step": 10476 }, { "epoch": 2.9322698012874335, "grad_norm": 0.2545468157726461, "learning_rate": 1.404421269666556e-07, "loss": 0.4323, "step": 10477 }, { "epoch": 2.932549678141618, "grad_norm": 0.24978718053471582, "learning_rate": 1.3928913896454897e-07, "loss": 0.4596, "step": 10478 }, { "epoch": 2.932829554995802, "grad_norm": 0.24891041253653481, "learning_rate": 1.381408967328135e-07, "loss": 0.4408, "step": 10479 }, { "epoch": 2.933109431849986, "grad_norm": 0.25436583588653244, "learning_rate": 1.3699740038073394e-07, "loss": 0.4401, "step": 10480 }, { "epoch": 2.9333893087041703, "grad_norm": 0.25555326435865844, "learning_rate": 1.3585865001715104e-07, "loss": 0.4463, "step": 10481 }, { "epoch": 2.9336691855583545, "grad_norm": 0.25808759275433096, "learning_rate": 1.347246457504503e-07, "loss": 0.4591, "step": 10482 }, { "epoch": 2.9339490624125384, "grad_norm": 0.26026257449568424, "learning_rate": 1.3359538768856762e-07, "loss": 0.4479, "step": 10483 }, { "epoch": 2.9342289392667227, "grad_norm": 0.262707843378836, "learning_rate": 1.3247087593898922e-07, "loss": 0.451, "step": 10484 }, { "epoch": 2.934508816120907, "grad_norm": 0.2564618605525408, "learning_rate": 1.3135111060874617e-07, "loss": 0.4358, "step": 10485 }, { "epoch": 2.934788692975091, "grad_norm": 0.2568705199636224, "learning_rate": 1.3023609180441431e-07, "loss": 0.4473, "step": 10486 }, { "epoch": 2.935068569829275, "grad_norm": 0.25888006691203863, "learning_rate": 1.2912581963212543e-07, "loss": 0.4572, "step": 10487 }, { "epoch": 2.9353484466834594, "grad_norm": 0.26257361864420226, "learning_rate": 1.2802029419755613e-07, "loss": 0.4479, "step": 10488 }, { "epoch": 2.9356283235376432, "grad_norm": 0.25256274125923245, "learning_rate": 1.269195156059333e-07, "loss": 0.4529, "step": 10489 }, { "epoch": 2.9359082003918275, "grad_norm": 0.26555130805791666, "learning_rate": 1.2582348396202316e-07, "loss": 0.4576, "step": 10490 }, { "epoch": 2.936188077246012, "grad_norm": 0.2657930704720208, "learning_rate": 1.247321993701478e-07, "loss": 0.459, "step": 10491 }, { "epoch": 2.9364679541001957, "grad_norm": 0.25168542573681696, "learning_rate": 1.236456619341797e-07, "loss": 0.4326, "step": 10492 }, { "epoch": 2.93674783095438, "grad_norm": 0.2560893374026327, "learning_rate": 1.225638717575306e-07, "loss": 0.4643, "step": 10493 }, { "epoch": 2.9370277078085643, "grad_norm": 0.24755384682714268, "learning_rate": 1.214868289431792e-07, "loss": 0.4479, "step": 10494 }, { "epoch": 2.9373075846627485, "grad_norm": 0.26603348984649644, "learning_rate": 1.2041453359362132e-07, "loss": 0.4439, "step": 10495 }, { "epoch": 2.9375874615169324, "grad_norm": 0.2509154217666826, "learning_rate": 1.1934698581093086e-07, "loss": 0.4408, "step": 10496 }, { "epoch": 2.9378673383711167, "grad_norm": 0.25680508562855797, "learning_rate": 1.1828418569670985e-07, "loss": 0.4351, "step": 10497 }, { "epoch": 2.938147215225301, "grad_norm": 0.2585395777558349, "learning_rate": 1.1722613335212185e-07, "loss": 0.4556, "step": 10498 }, { "epoch": 2.9384270920794853, "grad_norm": 0.24513710160148572, "learning_rate": 1.1617282887787518e-07, "loss": 0.4251, "step": 10499 }, { "epoch": 2.938706968933669, "grad_norm": 0.2549706151869714, "learning_rate": 1.1512427237421741e-07, "loss": 0.4419, "step": 10500 }, { "epoch": 2.9389868457878534, "grad_norm": 0.2394454558323718, "learning_rate": 1.1408046394095206e-07, "loss": 0.4508, "step": 10501 }, { "epoch": 2.9392667226420377, "grad_norm": 0.24841273548598636, "learning_rate": 1.1304140367742744e-07, "loss": 0.4425, "step": 10502 }, { "epoch": 2.9395465994962215, "grad_norm": 0.26824065946500614, "learning_rate": 1.1200709168254774e-07, "loss": 0.4657, "step": 10503 }, { "epoch": 2.939826476350406, "grad_norm": 0.25734686141639923, "learning_rate": 1.1097752805475647e-07, "loss": 0.4425, "step": 10504 }, { "epoch": 2.94010635320459, "grad_norm": 0.25906967232550493, "learning_rate": 1.0995271289204745e-07, "loss": 0.4325, "step": 10505 }, { "epoch": 2.940386230058774, "grad_norm": 0.25985462367295037, "learning_rate": 1.0893264629197042e-07, "loss": 0.4374, "step": 10506 }, { "epoch": 2.9406661069129583, "grad_norm": 0.2424408673662371, "learning_rate": 1.0791732835160328e-07, "loss": 0.4225, "step": 10507 }, { "epoch": 2.9409459837671426, "grad_norm": 0.2572039680041926, "learning_rate": 1.0690675916759097e-07, "loss": 0.4438, "step": 10508 }, { "epoch": 2.9412258606213264, "grad_norm": 0.25194635813849986, "learning_rate": 1.0590093883611762e-07, "loss": 0.4151, "step": 10509 }, { "epoch": 2.9415057374755107, "grad_norm": 0.26942250750658214, "learning_rate": 1.0489986745292335e-07, "loss": 0.4737, "step": 10510 }, { "epoch": 2.941785614329695, "grad_norm": 0.249986867792104, "learning_rate": 1.0390354511328749e-07, "loss": 0.454, "step": 10511 }, { "epoch": 2.942065491183879, "grad_norm": 0.24610171411008774, "learning_rate": 1.0291197191203971e-07, "loss": 0.4396, "step": 10512 }, { "epoch": 2.942345368038063, "grad_norm": 0.255544816937748, "learning_rate": 1.0192514794356012e-07, "loss": 0.4401, "step": 10513 }, { "epoch": 2.9426252448922474, "grad_norm": 0.2455925774087127, "learning_rate": 1.0094307330177355e-07, "loss": 0.4536, "step": 10514 }, { "epoch": 2.9429051217464317, "grad_norm": 0.2583339543631677, "learning_rate": 9.99657480801497e-08, "loss": 0.4508, "step": 10515 }, { "epoch": 2.943184998600616, "grad_norm": 0.25668429968223977, "learning_rate": 9.899317237172523e-08, "loss": 0.419, "step": 10516 }, { "epoch": 2.9434648754548, "grad_norm": 0.24562189192693365, "learning_rate": 9.80253462690539e-08, "loss": 0.4555, "step": 10517 }, { "epoch": 2.943744752308984, "grad_norm": 0.2622676135147595, "learning_rate": 9.706226986426203e-08, "loss": 0.4386, "step": 10518 }, { "epoch": 2.9440246291631684, "grad_norm": 0.26528886042279054, "learning_rate": 9.610394324902073e-08, "loss": 0.4399, "step": 10519 }, { "epoch": 2.9443045060173523, "grad_norm": 0.2687492946457659, "learning_rate": 9.515036651453479e-08, "loss": 0.4565, "step": 10520 }, { "epoch": 2.9445843828715366, "grad_norm": 0.25029425954088713, "learning_rate": 9.420153975157053e-08, "loss": 0.45, "step": 10521 }, { "epoch": 2.944864259725721, "grad_norm": 0.2530798242372889, "learning_rate": 9.325746305043348e-08, "loss": 0.4337, "step": 10522 }, { "epoch": 2.9451441365799047, "grad_norm": 0.25875219638515645, "learning_rate": 9.231813650099064e-08, "loss": 0.4519, "step": 10523 }, { "epoch": 2.945424013434089, "grad_norm": 0.25682815207388776, "learning_rate": 9.138356019264271e-08, "loss": 0.4389, "step": 10524 }, { "epoch": 2.9457038902882733, "grad_norm": 0.25104282666120115, "learning_rate": 9.045373421433523e-08, "loss": 0.425, "step": 10525 }, { "epoch": 2.945983767142457, "grad_norm": 0.26342494863030874, "learning_rate": 8.952865865458626e-08, "loss": 0.4703, "step": 10526 }, { "epoch": 2.9462636439966414, "grad_norm": 0.26385994849672095, "learning_rate": 8.860833360143095e-08, "loss": 0.4356, "step": 10527 }, { "epoch": 2.9465435208508257, "grad_norm": 0.251010870200779, "learning_rate": 8.769275914247143e-08, "loss": 0.4662, "step": 10528 }, { "epoch": 2.9468233977050096, "grad_norm": 0.2471674470410353, "learning_rate": 8.678193536484914e-08, "loss": 0.4394, "step": 10529 }, { "epoch": 2.947103274559194, "grad_norm": 0.24808625130165926, "learning_rate": 8.58758623552669e-08, "loss": 0.4527, "step": 10530 }, { "epoch": 2.947383151413378, "grad_norm": 0.2553979940589703, "learning_rate": 8.497454019995022e-08, "loss": 0.4581, "step": 10531 }, { "epoch": 2.9476630282675624, "grad_norm": 0.27036146065247973, "learning_rate": 8.407796898470266e-08, "loss": 0.4543, "step": 10532 }, { "epoch": 2.9479429051217463, "grad_norm": 0.26083548862095113, "learning_rate": 8.318614879485043e-08, "loss": 0.4528, "step": 10533 }, { "epoch": 2.9482227819759306, "grad_norm": 0.25369304134129755, "learning_rate": 8.229907971528116e-08, "loss": 0.4301, "step": 10534 }, { "epoch": 2.948502658830115, "grad_norm": 0.2587089753348896, "learning_rate": 8.14167618304218e-08, "loss": 0.4547, "step": 10535 }, { "epoch": 2.948782535684299, "grad_norm": 0.2561463390198463, "learning_rate": 8.053919522425513e-08, "loss": 0.4417, "step": 10536 }, { "epoch": 2.949062412538483, "grad_norm": 0.2804159977991239, "learning_rate": 7.966637998031434e-08, "loss": 0.4435, "step": 10537 }, { "epoch": 2.9493422893926673, "grad_norm": 0.2636994221235027, "learning_rate": 7.879831618166633e-08, "loss": 0.4542, "step": 10538 }, { "epoch": 2.9496221662468516, "grad_norm": 0.25934639410112037, "learning_rate": 7.793500391093944e-08, "loss": 0.445, "step": 10539 }, { "epoch": 2.9499020431010354, "grad_norm": 0.28139162564843395, "learning_rate": 7.707644325029572e-08, "loss": 0.4317, "step": 10540 }, { "epoch": 2.9501819199552197, "grad_norm": 0.26420461175765253, "learning_rate": 7.622263428146426e-08, "loss": 0.4356, "step": 10541 }, { "epoch": 2.950461796809404, "grad_norm": 0.25441472194653697, "learning_rate": 7.537357708570225e-08, "loss": 0.446, "step": 10542 }, { "epoch": 2.950741673663588, "grad_norm": 0.2671800826870824, "learning_rate": 7.452927174383396e-08, "loss": 0.4549, "step": 10543 }, { "epoch": 2.951021550517772, "grad_norm": 0.25564475592140923, "learning_rate": 7.368971833620619e-08, "loss": 0.4551, "step": 10544 }, { "epoch": 2.9513014273719564, "grad_norm": 0.25828170287220853, "learning_rate": 7.285491694273838e-08, "loss": 0.445, "step": 10545 }, { "epoch": 2.9515813042261403, "grad_norm": 0.24080567738555247, "learning_rate": 7.202486764288918e-08, "loss": 0.4379, "step": 10546 }, { "epoch": 2.9518611810803246, "grad_norm": 0.25363021744294506, "learning_rate": 7.119957051565651e-08, "loss": 0.4627, "step": 10547 }, { "epoch": 2.952141057934509, "grad_norm": 0.25326867966327166, "learning_rate": 7.03790256395942e-08, "loss": 0.4262, "step": 10548 }, { "epoch": 2.9524209347886927, "grad_norm": 0.26917960408980013, "learning_rate": 6.956323309280089e-08, "loss": 0.4603, "step": 10549 }, { "epoch": 2.952700811642877, "grad_norm": 0.26027908392323174, "learning_rate": 6.875219295293111e-08, "loss": 0.461, "step": 10550 }, { "epoch": 2.9529806884970613, "grad_norm": 0.2591632379456325, "learning_rate": 6.794590529717315e-08, "loss": 0.4536, "step": 10551 }, { "epoch": 2.9532605653512456, "grad_norm": 0.2510652354734244, "learning_rate": 6.714437020227115e-08, "loss": 0.4235, "step": 10552 }, { "epoch": 2.95354044220543, "grad_norm": 0.2526340838303442, "learning_rate": 6.634758774451966e-08, "loss": 0.4352, "step": 10553 }, { "epoch": 2.9538203190596137, "grad_norm": 0.2575515578872242, "learning_rate": 6.555555799974689e-08, "loss": 0.4536, "step": 10554 }, { "epoch": 2.954100195913798, "grad_norm": 0.2603208898237062, "learning_rate": 6.476828104335364e-08, "loss": 0.4133, "step": 10555 }, { "epoch": 2.9543800727679823, "grad_norm": 0.24904628086688163, "learning_rate": 6.398575695026332e-08, "loss": 0.431, "step": 10556 }, { "epoch": 2.954659949622166, "grad_norm": 0.26696013383377293, "learning_rate": 6.320798579495524e-08, "loss": 0.4594, "step": 10557 }, { "epoch": 2.9549398264763505, "grad_norm": 0.25246866520967143, "learning_rate": 6.243496765146461e-08, "loss": 0.4408, "step": 10558 }, { "epoch": 2.9552197033305347, "grad_norm": 0.254437865474292, "learning_rate": 6.166670259336594e-08, "loss": 0.4652, "step": 10559 }, { "epoch": 2.9554995801847186, "grad_norm": 0.26299613136944583, "learning_rate": 6.090319069377848e-08, "loss": 0.4597, "step": 10560 }, { "epoch": 2.955779457038903, "grad_norm": 0.2615353362609834, "learning_rate": 6.0144432025383e-08, "loss": 0.4629, "step": 10561 }, { "epoch": 2.956059333893087, "grad_norm": 0.2512308203604363, "learning_rate": 5.939042666038841e-08, "loss": 0.4444, "step": 10562 }, { "epoch": 2.956339210747271, "grad_norm": 0.2377900486143696, "learning_rate": 5.864117467057062e-08, "loss": 0.4592, "step": 10563 }, { "epoch": 2.9566190876014553, "grad_norm": 0.25875069490141756, "learning_rate": 5.789667612723371e-08, "loss": 0.4335, "step": 10564 }, { "epoch": 2.9568989644556396, "grad_norm": 0.24683365444532673, "learning_rate": 5.7156931101248755e-08, "loss": 0.4351, "step": 10565 }, { "epoch": 2.9571788413098234, "grad_norm": 0.26012406806987626, "learning_rate": 5.642193966302056e-08, "loss": 0.4473, "step": 10566 }, { "epoch": 2.9574587181640077, "grad_norm": 0.2542922961412427, "learning_rate": 5.569170188250983e-08, "loss": 0.4543, "step": 10567 }, { "epoch": 2.957738595018192, "grad_norm": 0.25715451992754895, "learning_rate": 5.496621782921097e-08, "loss": 0.443, "step": 10568 }, { "epoch": 2.9580184718723763, "grad_norm": 0.2590939919758361, "learning_rate": 5.4245487572190946e-08, "loss": 0.4282, "step": 10569 }, { "epoch": 2.95829834872656, "grad_norm": 0.24454462290233103, "learning_rate": 5.35295111800338e-08, "loss": 0.4258, "step": 10570 }, { "epoch": 2.9585782255807445, "grad_norm": 0.24034675492651944, "learning_rate": 5.281828872089611e-08, "loss": 0.4318, "step": 10571 }, { "epoch": 2.9588581024349287, "grad_norm": 0.2668304845642728, "learning_rate": 5.2111820262473745e-08, "loss": 0.4531, "step": 10572 }, { "epoch": 2.959137979289113, "grad_norm": 0.24610859132094123, "learning_rate": 5.141010587200179e-08, "loss": 0.4518, "step": 10573 }, { "epoch": 2.959417856143297, "grad_norm": 0.2525784737926144, "learning_rate": 5.071314561627682e-08, "loss": 0.4499, "step": 10574 }, { "epoch": 2.959697732997481, "grad_norm": 0.2589959315475041, "learning_rate": 5.002093956162912e-08, "loss": 0.4473, "step": 10575 }, { "epoch": 2.9599776098516655, "grad_norm": 0.24563499364177488, "learning_rate": 4.933348777395042e-08, "loss": 0.4311, "step": 10576 }, { "epoch": 2.9602574867058493, "grad_norm": 0.24229642325126785, "learning_rate": 4.865079031866615e-08, "loss": 0.4356, "step": 10577 }, { "epoch": 2.9605373635600336, "grad_norm": 0.2569134940403133, "learning_rate": 4.797284726075768e-08, "loss": 0.4439, "step": 10578 }, { "epoch": 2.960817240414218, "grad_norm": 0.2544678366133651, "learning_rate": 4.729965866475117e-08, "loss": 0.4644, "step": 10579 }, { "epoch": 2.9610971172684017, "grad_norm": 0.25061094843996773, "learning_rate": 4.663122459472869e-08, "loss": 0.4305, "step": 10580 }, { "epoch": 2.961376994122586, "grad_norm": 0.25322228356666415, "learning_rate": 4.5967545114306007e-08, "loss": 0.4518, "step": 10581 }, { "epoch": 2.9616568709767703, "grad_norm": 0.24853741746919858, "learning_rate": 4.530862028664928e-08, "loss": 0.4444, "step": 10582 }, { "epoch": 2.961936747830954, "grad_norm": 0.25579650896961803, "learning_rate": 4.465445017448056e-08, "loss": 0.4376, "step": 10583 }, { "epoch": 2.9622166246851385, "grad_norm": 0.25032094678732375, "learning_rate": 4.400503484006113e-08, "loss": 0.4392, "step": 10584 }, { "epoch": 2.9624965015393228, "grad_norm": 0.25834259594052383, "learning_rate": 4.336037434520823e-08, "loss": 0.456, "step": 10585 }, { "epoch": 2.9627763783935066, "grad_norm": 0.247699967081848, "learning_rate": 4.272046875127278e-08, "loss": 0.4352, "step": 10586 }, { "epoch": 2.963056255247691, "grad_norm": 0.255500336642797, "learning_rate": 4.208531811916716e-08, "loss": 0.4507, "step": 10587 }, { "epoch": 2.963336132101875, "grad_norm": 0.2481995662102386, "learning_rate": 4.1454922509337466e-08, "loss": 0.4505, "step": 10588 }, { "epoch": 2.9636160089560595, "grad_norm": 0.25844158185485583, "learning_rate": 4.082928198179681e-08, "loss": 0.4587, "step": 10589 }, { "epoch": 2.9638958858102438, "grad_norm": 0.25711496564190917, "learning_rate": 4.020839659609199e-08, "loss": 0.4555, "step": 10590 }, { "epoch": 2.9641757626644276, "grad_norm": 0.2588625423650436, "learning_rate": 3.959226641130909e-08, "loss": 0.4533, "step": 10591 }, { "epoch": 2.964455639518612, "grad_norm": 0.24528164675911024, "learning_rate": 3.8980891486101176e-08, "loss": 0.4379, "step": 10592 }, { "epoch": 2.964735516372796, "grad_norm": 0.25861177214476944, "learning_rate": 3.837427187866061e-08, "loss": 0.4488, "step": 10593 }, { "epoch": 2.96501539322698, "grad_norm": 0.2520859346888829, "learning_rate": 3.777240764671342e-08, "loss": 0.4513, "step": 10594 }, { "epoch": 2.9652952700811643, "grad_norm": 0.2604468848180648, "learning_rate": 3.7175298847558216e-08, "loss": 0.4585, "step": 10595 }, { "epoch": 2.9655751469353486, "grad_norm": 0.2582001394915078, "learning_rate": 3.6582945538027324e-08, "loss": 0.4321, "step": 10596 }, { "epoch": 2.9658550237895325, "grad_norm": 0.25229843018884607, "learning_rate": 3.599534777449232e-08, "loss": 0.463, "step": 10597 }, { "epoch": 2.9661349006437168, "grad_norm": 0.25236312467740846, "learning_rate": 3.5412505612886225e-08, "loss": 0.4519, "step": 10598 }, { "epoch": 2.966414777497901, "grad_norm": 0.25724033603089713, "learning_rate": 3.4834419108681346e-08, "loss": 0.4533, "step": 10599 }, { "epoch": 2.966694654352085, "grad_norm": 0.2581127353937177, "learning_rate": 3.426108831691144e-08, "loss": 0.4595, "step": 10600 }, { "epoch": 2.966974531206269, "grad_norm": 0.24611693632209142, "learning_rate": 3.369251329213285e-08, "loss": 0.415, "step": 10601 }, { "epoch": 2.9672544080604535, "grad_norm": 0.25310950920344844, "learning_rate": 3.312869408846897e-08, "loss": 0.429, "step": 10602 }, { "epoch": 2.9675342849146373, "grad_norm": 0.24968979714091358, "learning_rate": 3.2569630759582415e-08, "loss": 0.4566, "step": 10603 }, { "epoch": 2.9678141617688216, "grad_norm": 0.2609776394190351, "learning_rate": 3.201532335868618e-08, "loss": 0.4458, "step": 10604 }, { "epoch": 2.968094038623006, "grad_norm": 0.25969592415796383, "learning_rate": 3.146577193854361e-08, "loss": 0.4498, "step": 10605 }, { "epoch": 2.9683739154771898, "grad_norm": 0.24716778897711522, "learning_rate": 3.092097655145176e-08, "loss": 0.4332, "step": 10606 }, { "epoch": 2.968653792331374, "grad_norm": 0.27328902605297223, "learning_rate": 3.038093724927471e-08, "loss": 0.4511, "step": 10607 }, { "epoch": 2.9689336691855583, "grad_norm": 0.2507518753167543, "learning_rate": 2.984565408341022e-08, "loss": 0.4494, "step": 10608 }, { "epoch": 2.9692135460397426, "grad_norm": 0.2609750291364599, "learning_rate": 2.9315127104800887e-08, "loss": 0.4491, "step": 10609 }, { "epoch": 2.969493422893927, "grad_norm": 0.25655339861321874, "learning_rate": 2.878935636395075e-08, "loss": 0.4316, "step": 10610 }, { "epoch": 2.9697732997481108, "grad_norm": 0.2530639883196548, "learning_rate": 2.8268341910903108e-08, "loss": 0.4525, "step": 10611 }, { "epoch": 2.970053176602295, "grad_norm": 0.23225610378688546, "learning_rate": 2.7752083795240525e-08, "loss": 0.4282, "step": 10612 }, { "epoch": 2.9703330534564794, "grad_norm": 0.2543375083380285, "learning_rate": 2.7240582066107022e-08, "loss": 0.4498, "step": 10613 }, { "epoch": 2.970612930310663, "grad_norm": 0.2527960226970521, "learning_rate": 2.6733836772185884e-08, "loss": 0.462, "step": 10614 }, { "epoch": 2.9708928071648475, "grad_norm": 0.2554175886404961, "learning_rate": 2.623184796170519e-08, "loss": 0.4319, "step": 10615 }, { "epoch": 2.971172684019032, "grad_norm": 0.25718644400633023, "learning_rate": 2.57346156824545e-08, "loss": 0.4517, "step": 10616 }, { "epoch": 2.9714525608732156, "grad_norm": 0.302200558832404, "learning_rate": 2.5242139981751513e-08, "loss": 0.4469, "step": 10617 }, { "epoch": 2.9717324377274, "grad_norm": 0.2549185842864372, "learning_rate": 2.4754420906475396e-08, "loss": 0.4441, "step": 10618 }, { "epoch": 2.972012314581584, "grad_norm": 0.2552989797038095, "learning_rate": 2.4271458503044576e-08, "loss": 0.4555, "step": 10619 }, { "epoch": 2.972292191435768, "grad_norm": 0.25867881848070834, "learning_rate": 2.3793252817427836e-08, "loss": 0.4586, "step": 10620 }, { "epoch": 2.9725720682899524, "grad_norm": 0.25767245445537734, "learning_rate": 2.3319803895144322e-08, "loss": 0.437, "step": 10621 }, { "epoch": 2.9728519451441366, "grad_norm": 0.261765852972024, "learning_rate": 2.2851111781257983e-08, "loss": 0.4512, "step": 10622 }, { "epoch": 2.9731318219983205, "grad_norm": 0.25522924545223363, "learning_rate": 2.238717652037203e-08, "loss": 0.449, "step": 10623 }, { "epoch": 2.973411698852505, "grad_norm": 0.24927821271923611, "learning_rate": 2.1927998156651142e-08, "loss": 0.4506, "step": 10624 }, { "epoch": 2.973691575706689, "grad_norm": 0.25831566762208635, "learning_rate": 2.1473576733793686e-08, "loss": 0.4281, "step": 10625 }, { "epoch": 2.9739714525608734, "grad_norm": 0.25375287107557304, "learning_rate": 2.1023912295059512e-08, "loss": 0.4466, "step": 10626 }, { "epoch": 2.974251329415057, "grad_norm": 0.25946073380754525, "learning_rate": 2.0579004883236608e-08, "loss": 0.4414, "step": 10627 }, { "epoch": 2.9745312062692415, "grad_norm": 0.25171033719930647, "learning_rate": 2.0138854540685538e-08, "loss": 0.435, "step": 10628 }, { "epoch": 2.974811083123426, "grad_norm": 0.26115332869155145, "learning_rate": 1.9703461309295013e-08, "loss": 0.4663, "step": 10629 }, { "epoch": 2.97509095997761, "grad_norm": 0.25790911409843703, "learning_rate": 1.927282523049856e-08, "loss": 0.4663, "step": 10630 }, { "epoch": 2.975370836831794, "grad_norm": 0.2607999674754798, "learning_rate": 1.884694634529116e-08, "loss": 0.4595, "step": 10631 }, { "epoch": 2.975650713685978, "grad_norm": 0.2534597997717571, "learning_rate": 1.842582469420706e-08, "loss": 0.4221, "step": 10632 }, { "epoch": 2.9759305905401625, "grad_norm": 0.2503832993380337, "learning_rate": 1.8009460317330862e-08, "loss": 0.4242, "step": 10633 }, { "epoch": 2.9762104673943464, "grad_norm": 0.25792042031621715, "learning_rate": 1.7597853254291972e-08, "loss": 0.4446, "step": 10634 }, { "epoch": 2.9764903442485307, "grad_norm": 0.24700530197851792, "learning_rate": 1.7191003544259064e-08, "loss": 0.4664, "step": 10635 }, { "epoch": 2.976770221102715, "grad_norm": 0.26387028208771174, "learning_rate": 1.6788911225967817e-08, "loss": 0.438, "step": 10636 }, { "epoch": 2.977050097956899, "grad_norm": 0.2465718699073755, "learning_rate": 1.639157633768762e-08, "loss": 0.4598, "step": 10637 }, { "epoch": 2.977329974811083, "grad_norm": 0.2632178783866457, "learning_rate": 1.5998998917227116e-08, "loss": 0.4506, "step": 10638 }, { "epoch": 2.9776098516652674, "grad_norm": 0.25198103145232476, "learning_rate": 1.5611179001967513e-08, "loss": 0.4497, "step": 10639 }, { "epoch": 2.977889728519451, "grad_norm": 0.2615169372245782, "learning_rate": 1.5228116628807078e-08, "loss": 0.4634, "step": 10640 }, { "epoch": 2.9781696053736355, "grad_norm": 0.24759420280801528, "learning_rate": 1.484981183421108e-08, "loss": 0.4492, "step": 10641 }, { "epoch": 2.97844948222782, "grad_norm": 0.2575222446333656, "learning_rate": 1.447626465419516e-08, "loss": 0.4414, "step": 10642 }, { "epoch": 2.9787293590820036, "grad_norm": 0.2578216050855136, "learning_rate": 1.4107475124297553e-08, "loss": 0.4615, "step": 10643 }, { "epoch": 2.979009235936188, "grad_norm": 0.24764862867426704, "learning_rate": 1.3743443279634617e-08, "loss": 0.4396, "step": 10644 }, { "epoch": 2.9792891127903722, "grad_norm": 0.2790608325780094, "learning_rate": 1.3384169154850856e-08, "loss": 0.4493, "step": 10645 }, { "epoch": 2.9795689896445565, "grad_norm": 0.24885669381579884, "learning_rate": 1.3029652784135592e-08, "loss": 0.4471, "step": 10646 }, { "epoch": 2.979848866498741, "grad_norm": 0.2516985946364248, "learning_rate": 1.2679894201239606e-08, "loss": 0.4282, "step": 10647 }, { "epoch": 2.9801287433529247, "grad_norm": 0.2546918838471997, "learning_rate": 1.2334893439447382e-08, "loss": 0.4247, "step": 10648 }, { "epoch": 2.980408620207109, "grad_norm": 0.26154533642566097, "learning_rate": 1.1994650531604868e-08, "loss": 0.4514, "step": 10649 }, { "epoch": 2.9806884970612932, "grad_norm": 0.2541071458160839, "learning_rate": 1.1659165510086167e-08, "loss": 0.4453, "step": 10650 }, { "epoch": 2.980968373915477, "grad_norm": 0.2557609039926244, "learning_rate": 1.1328438406826847e-08, "loss": 0.4518, "step": 10651 }, { "epoch": 2.9812482507696614, "grad_norm": 0.2603397966746705, "learning_rate": 1.100246925331283e-08, "loss": 0.4518, "step": 10652 }, { "epoch": 2.9815281276238457, "grad_norm": 0.25378766274204506, "learning_rate": 1.0681258080558198e-08, "loss": 0.4632, "step": 10653 }, { "epoch": 2.9818080044780295, "grad_norm": 0.2577461103752013, "learning_rate": 1.0364804919144044e-08, "loss": 0.4321, "step": 10654 }, { "epoch": 2.982087881332214, "grad_norm": 0.2548663096401936, "learning_rate": 1.0053109799190719e-08, "loss": 0.4583, "step": 10655 }, { "epoch": 2.982367758186398, "grad_norm": 0.2678503092268285, "learning_rate": 9.74617275035783e-09, "loss": 0.4446, "step": 10656 }, { "epoch": 2.982647635040582, "grad_norm": 0.25644673755161296, "learning_rate": 9.443993801866447e-09, "loss": 0.4487, "step": 10657 }, { "epoch": 2.9829275118947662, "grad_norm": 0.26481506867740406, "learning_rate": 9.146572982476897e-09, "loss": 0.4555, "step": 10658 }, { "epoch": 2.9832073887489505, "grad_norm": 0.26361043389181155, "learning_rate": 8.85391032049987e-09, "loss": 0.4423, "step": 10659 }, { "epoch": 2.9834872656031344, "grad_norm": 0.25615383736953834, "learning_rate": 8.566005843790858e-09, "loss": 0.4583, "step": 10660 }, { "epoch": 2.9837671424573187, "grad_norm": 0.24828969986997024, "learning_rate": 8.282859579744617e-09, "loss": 0.4262, "step": 10661 }, { "epoch": 2.984047019311503, "grad_norm": 0.25552999391652786, "learning_rate": 8.004471555322913e-09, "loss": 0.4646, "step": 10662 }, { "epoch": 2.9843268961656872, "grad_norm": 0.2636652872115676, "learning_rate": 7.730841797010113e-09, "loss": 0.4618, "step": 10663 }, { "epoch": 2.984606773019871, "grad_norm": 0.2583926307618832, "learning_rate": 7.461970330863156e-09, "loss": 0.4385, "step": 10664 }, { "epoch": 2.9848866498740554, "grad_norm": 0.2518994518625086, "learning_rate": 7.197857182467127e-09, "loss": 0.438, "step": 10665 }, { "epoch": 2.9851665267282397, "grad_norm": 0.2549280585315688, "learning_rate": 6.938502376963029e-09, "loss": 0.431, "step": 10666 }, { "epoch": 2.985446403582424, "grad_norm": 0.26188810220058695, "learning_rate": 6.683905939031121e-09, "loss": 0.4571, "step": 10667 }, { "epoch": 2.985726280436608, "grad_norm": 0.2629607222117437, "learning_rate": 6.434067892907569e-09, "loss": 0.4575, "step": 10668 }, { "epoch": 2.986006157290792, "grad_norm": 0.2497045188475996, "learning_rate": 6.188988262373352e-09, "loss": 0.4428, "step": 10669 }, { "epoch": 2.9862860341449764, "grad_norm": 0.26736706499842067, "learning_rate": 5.948667070754255e-09, "loss": 0.4459, "step": 10670 }, { "epoch": 2.9865659109991602, "grad_norm": 0.24828775818061782, "learning_rate": 5.713104340926423e-09, "loss": 0.4284, "step": 10671 }, { "epoch": 2.9868457878533445, "grad_norm": 0.26353052160753476, "learning_rate": 5.482300095305259e-09, "loss": 0.4721, "step": 10672 }, { "epoch": 2.987125664707529, "grad_norm": 0.25205314276037316, "learning_rate": 5.256254355862078e-09, "loss": 0.4565, "step": 10673 }, { "epoch": 2.9874055415617127, "grad_norm": 0.2559405262948841, "learning_rate": 5.034967144113001e-09, "loss": 0.4575, "step": 10674 }, { "epoch": 2.987685418415897, "grad_norm": 0.24907589377603614, "learning_rate": 4.818438481118959e-09, "loss": 0.4411, "step": 10675 }, { "epoch": 2.9879652952700813, "grad_norm": 0.2549880059005624, "learning_rate": 4.606668387491242e-09, "loss": 0.4556, "step": 10676 }, { "epoch": 2.988245172124265, "grad_norm": 0.2570026338796731, "learning_rate": 4.399656883380398e-09, "loss": 0.4777, "step": 10677 }, { "epoch": 2.9885250489784494, "grad_norm": 0.24982827713037273, "learning_rate": 4.197403988492887e-09, "loss": 0.4447, "step": 10678 }, { "epoch": 2.9888049258326337, "grad_norm": 0.2588687368315132, "learning_rate": 3.999909722085527e-09, "loss": 0.4514, "step": 10679 }, { "epoch": 2.9890848026868175, "grad_norm": 0.2582412441405769, "learning_rate": 3.807174102948841e-09, "loss": 0.4494, "step": 10680 }, { "epoch": 2.989364679541002, "grad_norm": 0.2508054374346863, "learning_rate": 3.6191971494292652e-09, "loss": 0.4488, "step": 10681 }, { "epoch": 2.989644556395186, "grad_norm": 0.24647912747459155, "learning_rate": 3.435978879418045e-09, "loss": 0.4673, "step": 10682 }, { "epoch": 2.9899244332493704, "grad_norm": 0.2600041063000193, "learning_rate": 3.2575193103567826e-09, "loss": 0.4533, "step": 10683 }, { "epoch": 2.9902043101035547, "grad_norm": 0.25480134712200003, "learning_rate": 3.0838184592263396e-09, "loss": 0.4655, "step": 10684 }, { "epoch": 2.9904841869577385, "grad_norm": 0.25467713191116054, "learning_rate": 2.9148763425634886e-09, "loss": 0.4577, "step": 10685 }, { "epoch": 2.990764063811923, "grad_norm": 0.2425309705348517, "learning_rate": 2.750692976444258e-09, "loss": 0.4377, "step": 10686 }, { "epoch": 2.991043940666107, "grad_norm": 0.26213250626211815, "learning_rate": 2.5912683765061398e-09, "loss": 0.4705, "step": 10687 }, { "epoch": 2.991323817520291, "grad_norm": 0.2612357437354752, "learning_rate": 2.436602557909229e-09, "loss": 0.4615, "step": 10688 }, { "epoch": 2.9916036943744753, "grad_norm": 0.260821699009736, "learning_rate": 2.286695535386185e-09, "loss": 0.4519, "step": 10689 }, { "epoch": 2.9918835712286596, "grad_norm": 0.2515545516960629, "learning_rate": 2.1415473231978236e-09, "loss": 0.4532, "step": 10690 }, { "epoch": 2.9921634480828434, "grad_norm": 0.2640350546718974, "learning_rate": 2.001157935160869e-09, "loss": 0.4628, "step": 10691 }, { "epoch": 2.9924433249370277, "grad_norm": 0.2552477225519158, "learning_rate": 1.865527384642407e-09, "loss": 0.443, "step": 10692 }, { "epoch": 2.992723201791212, "grad_norm": 0.28604816511846115, "learning_rate": 1.7346556845432294e-09, "loss": 0.4417, "step": 10693 }, { "epoch": 2.993003078645396, "grad_norm": 0.2607651957497237, "learning_rate": 1.6085428473311404e-09, "loss": 0.4655, "step": 10694 }, { "epoch": 2.99328295549958, "grad_norm": 0.2537250574074743, "learning_rate": 1.4871888849965488e-09, "loss": 0.4519, "step": 10695 }, { "epoch": 2.9935628323537644, "grad_norm": 0.2496928430714104, "learning_rate": 1.3705938091024273e-09, "loss": 0.4401, "step": 10696 }, { "epoch": 2.9938427092079483, "grad_norm": 0.2458011649695888, "learning_rate": 1.2587576307343528e-09, "loss": 0.4475, "step": 10697 }, { "epoch": 2.9941225860621326, "grad_norm": 0.24453034008079477, "learning_rate": 1.1516803605504666e-09, "loss": 0.4365, "step": 10698 }, { "epoch": 2.994402462916317, "grad_norm": 0.26694280093260125, "learning_rate": 1.0493620087315136e-09, "loss": 0.4531, "step": 10699 }, { "epoch": 2.994682339770501, "grad_norm": 0.25832226564127514, "learning_rate": 9.51802585019701e-10, "loss": 0.4457, "step": 10700 }, { "epoch": 2.994962216624685, "grad_norm": 0.25240393231643893, "learning_rate": 8.590020987020442e-10, "loss": 0.4708, "step": 10701 }, { "epoch": 2.9952420934788693, "grad_norm": 0.253125299590793, "learning_rate": 7.709605586103674e-10, "loss": 0.4476, "step": 10702 }, { "epoch": 2.9955219703330536, "grad_norm": 0.2445135905145077, "learning_rate": 6.876779731213035e-10, "loss": 0.4462, "step": 10703 }, { "epoch": 2.995801847187238, "grad_norm": 0.25008011484227777, "learning_rate": 6.091543501673958e-10, "loss": 0.445, "step": 10704 }, { "epoch": 2.9960817240414217, "grad_norm": 0.25643300402049357, "learning_rate": 5.353896972204453e-10, "loss": 0.4406, "step": 10705 }, { "epoch": 2.996361600895606, "grad_norm": 0.24847707003519692, "learning_rate": 4.663840213026127e-10, "loss": 0.4677, "step": 10706 }, { "epoch": 2.9966414777497903, "grad_norm": 0.2520890475535469, "learning_rate": 4.0213732897531607e-10, "loss": 0.449, "step": 10707 }, { "epoch": 2.996921354603974, "grad_norm": 0.24197743329090915, "learning_rate": 3.4264962636143536e-10, "loss": 0.4382, "step": 10708 }, { "epoch": 2.9972012314581584, "grad_norm": 0.2663302724507473, "learning_rate": 2.8792091912310804e-10, "loss": 0.4549, "step": 10709 }, { "epoch": 2.9974811083123427, "grad_norm": 0.27210717025743686, "learning_rate": 2.379512124617289e-10, "loss": 0.4738, "step": 10710 }, { "epoch": 2.9977609851665266, "grad_norm": 0.25045671556430316, "learning_rate": 1.9274051114015478e-10, "loss": 0.4365, "step": 10711 }, { "epoch": 2.998040862020711, "grad_norm": 0.2669187587751103, "learning_rate": 1.522888194604999e-10, "loss": 0.4566, "step": 10712 }, { "epoch": 2.998320738874895, "grad_norm": 0.2600536100682567, "learning_rate": 1.16596141269687e-10, "loss": 0.4347, "step": 10713 }, { "epoch": 2.998600615729079, "grad_norm": 0.2541319276216827, "learning_rate": 8.566247997054966e-11, "loss": 0.4663, "step": 10714 }, { "epoch": 2.9988804925832633, "grad_norm": 0.2576446088985775, "learning_rate": 5.948783850517892e-11, "loss": 0.4636, "step": 10715 }, { "epoch": 2.9991603694374476, "grad_norm": 0.2536248818180751, "learning_rate": 3.807221936047434e-11, "loss": 0.435, "step": 10716 }, { "epoch": 2.9994402462916314, "grad_norm": 0.2545279038011797, "learning_rate": 2.1415624579246284e-11, "loss": 0.4368, "step": 10717 }, { "epoch": 2.9997201231458157, "grad_norm": 0.2604701422540892, "learning_rate": 9.51805574356257e-12, "loss": 0.4551, "step": 10718 }, { "epoch": 3.0, "grad_norm": 0.2550703778343758, "learning_rate": 2.3795139914017937e-12, "loss": 0.4401, "step": 10719 } ], "logging_steps": 1, "max_steps": 10719, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2518172089335808.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }