diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,28018 +2,70018 @@ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.5598320503848845, + "epoch": 2.7987685418415897, "eval_steps": 500, - "global_step": 4000, + "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.00013995801259622114, - "grad_norm": 1.6205339948497404, + "epoch": 0.000279876854184159, + "grad_norm": 1.4728044242366627, "learning_rate": 0.0, - "loss": 1.8649, + "loss": 1.8205, "step": 1 }, { - "epoch": 0.00027991602519244227, - "grad_norm": 1.8823543727463894, - "learning_rate": 2.3255813953488374e-07, - "loss": 1.8156, + "epoch": 0.000559753708368318, + "grad_norm": 1.505239345302424, + "learning_rate": 1.8656716417910447e-07, + "loss": 1.8335, "step": 2 }, { - "epoch": 0.0004198740377886634, - "grad_norm": 1.5734521895417912, - "learning_rate": 4.651162790697675e-07, - "loss": 1.8395, + "epoch": 0.0008396305625524769, + "grad_norm": 1.3433563758023555, + "learning_rate": 3.7313432835820895e-07, + "loss": 1.7795, "step": 3 }, { - "epoch": 0.0005598320503848845, - "grad_norm": 1.884881935408653, - "learning_rate": 6.976744186046511e-07, - "loss": 1.8379, + "epoch": 0.001119507416736636, + "grad_norm": 1.4232474811860303, + "learning_rate": 5.597014925373135e-07, + "loss": 1.7965, "step": 4 }, { - "epoch": 0.0006997900629811056, - "grad_norm": 1.570942303353113, - "learning_rate": 9.30232558139535e-07, - "loss": 1.7683, + "epoch": 0.001399384270920795, + "grad_norm": 1.4247553473189891, + "learning_rate": 7.462686567164179e-07, + "loss": 1.7917, "step": 5 }, { - "epoch": 0.0008397480755773268, - "grad_norm": 2.1232694447925367, - "learning_rate": 1.1627906976744186e-06, - "loss": 1.834, + "epoch": 0.0016792611251049538, + "grad_norm": 1.3711252767403777, + "learning_rate": 9.328358208955223e-07, + "loss": 1.8019, "step": 6 }, { - "epoch": 0.000979706088173548, - "grad_norm": 1.5460826364849427, - "learning_rate": 1.3953488372093023e-06, - "loss": 1.8431, + "epoch": 0.0019591379792891126, + "grad_norm": 1.4549056602853179, + "learning_rate": 1.119402985074627e-06, + "loss": 1.8298, "step": 7 }, { - "epoch": 0.001119664100769769, - "grad_norm": 1.5383810682727819, - "learning_rate": 1.627906976744186e-06, - "loss": 1.7739, + "epoch": 0.002239014833473272, + "grad_norm": 1.7659390136520288, + "learning_rate": 1.3059701492537314e-06, + "loss": 1.81, "step": 8 }, { - "epoch": 0.0012596221133659902, - "grad_norm": 2.321401184160743, - "learning_rate": 1.86046511627907e-06, - "loss": 1.7974, + "epoch": 0.0025188916876574307, + "grad_norm": 1.4049173237098618, + "learning_rate": 1.4925373134328358e-06, + "loss": 1.7558, "step": 9 }, { - "epoch": 0.0013995801259622112, - "grad_norm": 1.5959823871905185, - "learning_rate": 2.0930232558139536e-06, - "loss": 1.802, + "epoch": 0.00279876854184159, + "grad_norm": 1.8665887897015843, + "learning_rate": 1.6791044776119404e-06, + "loss": 1.809, "step": 10 }, { - "epoch": 0.0015395381385584325, - "grad_norm": 1.4312201376848954, - "learning_rate": 2.325581395348837e-06, - "loss": 1.8292, + "epoch": 0.0030786453960257487, + "grad_norm": 1.4414438014667166, + "learning_rate": 1.8656716417910446e-06, + "loss": 1.7982, "step": 11 }, { - "epoch": 0.0016794961511546536, - "grad_norm": 1.3848325251343343, - "learning_rate": 2.558139534883721e-06, - "loss": 1.7843, + "epoch": 0.0033585222502099076, + "grad_norm": 1.5505834107968952, + "learning_rate": 2.0522388059701493e-06, + "loss": 1.8397, "step": 12 }, { - "epoch": 0.0018194541637508747, - "grad_norm": 1.5003345129280181, - "learning_rate": 2.7906976744186046e-06, - "loss": 1.8619, + "epoch": 0.003638399104394067, + "grad_norm": 1.5140005067509932, + "learning_rate": 2.238805970149254e-06, + "loss": 1.7652, "step": 13 }, { - "epoch": 0.001959412176347096, - "grad_norm": 1.5397334321812373, - "learning_rate": 3.0232558139534885e-06, - "loss": 1.8203, + "epoch": 0.003918275958578225, + "grad_norm": 3.009122030724365, + "learning_rate": 2.4253731343283585e-06, + "loss": 1.7922, "step": 14 }, { - "epoch": 0.002099370188943317, - "grad_norm": 1.5024026903634549, - "learning_rate": 3.255813953488372e-06, - "loss": 1.7945, + "epoch": 0.0041981528127623844, + "grad_norm": 1.4566857198502647, + "learning_rate": 2.6119402985074627e-06, + "loss": 1.7847, "step": 15 }, { - "epoch": 0.002239328201539538, - "grad_norm": 1.4537582898628276, - "learning_rate": 3.488372093023256e-06, - "loss": 1.7954, + "epoch": 0.004478029666946544, + "grad_norm": 1.641801445984581, + "learning_rate": 2.7985074626865674e-06, + "loss": 1.7609, "step": 16 }, { - "epoch": 0.0023792862141357595, - "grad_norm": 1.4392959782369958, - "learning_rate": 3.72093023255814e-06, - "loss": 1.7539, + "epoch": 0.004757906521130702, + "grad_norm": 1.4761399904976473, + "learning_rate": 2.9850746268656716e-06, + "loss": 1.7963, "step": 17 }, { - "epoch": 0.0025192442267319803, - "grad_norm": 1.4318028272838452, - "learning_rate": 3.953488372093024e-06, - "loss": 1.7287, + "epoch": 0.005037783375314861, + "grad_norm": 1.5555108403101896, + "learning_rate": 3.171641791044776e-06, + "loss": 1.7737, "step": 18 }, { - "epoch": 0.0026592022393282016, - "grad_norm": 1.5567916289389137, - "learning_rate": 4.186046511627907e-06, - "loss": 1.7651, + "epoch": 0.005317660229499021, + "grad_norm": 1.493846433622173, + "learning_rate": 3.358208955223881e-06, + "loss": 1.7524, "step": 19 }, { - "epoch": 0.0027991602519244225, - "grad_norm": 2.133786571708112, - "learning_rate": 4.418604651162791e-06, - "loss": 1.7431, + "epoch": 0.00559753708368318, + "grad_norm": 1.4220754152692414, + "learning_rate": 3.544776119402985e-06, + "loss": 1.7791, "step": 20 }, { - "epoch": 0.0029391182645206438, - "grad_norm": 1.3887907218945514, - "learning_rate": 4.651162790697674e-06, - "loss": 1.769, + "epoch": 0.005877413937867338, + "grad_norm": 1.8947233265935908, + "learning_rate": 3.7313432835820893e-06, + "loss": 1.7816, "step": 21 }, { - "epoch": 0.003079076277116865, - "grad_norm": 1.4440046752202067, - "learning_rate": 4.883720930232559e-06, - "loss": 1.6852, + "epoch": 0.0061572907920514975, + "grad_norm": 1.407763579837366, + "learning_rate": 3.917910447761194e-06, + "loss": 1.7954, "step": 22 }, { - "epoch": 0.003219034289713086, - "grad_norm": 1.7560763608783, - "learning_rate": 5.116279069767442e-06, - "loss": 1.7515, + "epoch": 0.006437167646235657, + "grad_norm": 1.5237240703857127, + "learning_rate": 4.1044776119402985e-06, + "loss": 1.7398, "step": 23 }, { - "epoch": 0.0033589923023093072, - "grad_norm": 1.4866717560719882, - "learning_rate": 5.348837209302326e-06, - "loss": 1.7103, + "epoch": 0.006717044500419815, + "grad_norm": 1.4696094431787539, + "learning_rate": 4.291044776119403e-06, + "loss": 1.7687, "step": 24 }, { - "epoch": 0.0034989503149055285, - "grad_norm": 1.3191066550690622, - "learning_rate": 5.581395348837209e-06, - "loss": 1.6115, + "epoch": 0.006996921354603974, + "grad_norm": 1.4428552674089288, + "learning_rate": 4.477611940298508e-06, + "loss": 1.7289, "step": 25 }, { - "epoch": 0.0036389083275017494, - "grad_norm": 3.5279348783477205, - "learning_rate": 5.8139534883720935e-06, - "loss": 1.639, + "epoch": 0.007276798208788134, + "grad_norm": 2.662733330011018, + "learning_rate": 4.664179104477613e-06, + "loss": 1.6835, "step": 26 }, { - "epoch": 0.0037788663400979707, - "grad_norm": 1.4814448441008254, - "learning_rate": 6.046511627906977e-06, - "loss": 1.6249, + "epoch": 0.007556675062972292, + "grad_norm": 1.3563238971244345, + "learning_rate": 4.850746268656717e-06, + "loss": 1.6712, "step": 27 }, { - "epoch": 0.003918824352694192, - "grad_norm": 1.2456228376106218, - "learning_rate": 6.279069767441861e-06, - "loss": 1.5937, + "epoch": 0.00783655191715645, + "grad_norm": 1.2826269537602049, + "learning_rate": 5.037313432835821e-06, + "loss": 1.6464, "step": 28 }, { - "epoch": 0.004058782365290413, - "grad_norm": 1.2032083110394567, - "learning_rate": 6.511627906976744e-06, - "loss": 1.57, + "epoch": 0.00811642877134061, + "grad_norm": 1.2434477139354885, + "learning_rate": 5.2238805970149255e-06, + "loss": 1.6427, "step": 29 }, { - "epoch": 0.004198740377886634, - "grad_norm": 1.1873685890041599, - "learning_rate": 6.744186046511629e-06, - "loss": 1.5443, + "epoch": 0.008396305625524769, + "grad_norm": 1.2654431934709676, + "learning_rate": 5.41044776119403e-06, + "loss": 1.5934, "step": 30 }, { - "epoch": 0.0043386983904828555, - "grad_norm": 1.1206050573785102, - "learning_rate": 6.976744186046512e-06, - "loss": 1.4658, + "epoch": 0.008676182479708928, + "grad_norm": 2.014623704486581, + "learning_rate": 5.597014925373135e-06, + "loss": 1.5545, "step": 31 }, { - "epoch": 0.004478656403079076, - "grad_norm": 1.613173262015054, - "learning_rate": 7.209302325581396e-06, - "loss": 1.5061, + "epoch": 0.008956059333893087, + "grad_norm": 1.2003860288390893, + "learning_rate": 5.783582089552239e-06, + "loss": 1.6107, "step": 32 }, { - "epoch": 0.004618614415675297, - "grad_norm": 1.0997443478827273, - "learning_rate": 7.44186046511628e-06, - "loss": 1.4946, + "epoch": 0.009235936188077247, + "grad_norm": 1.1313653705450164, + "learning_rate": 5.970149253731343e-06, + "loss": 1.5306, "step": 33 }, { - "epoch": 0.004758572428271519, - "grad_norm": 1.0976998793927069, - "learning_rate": 7.674418604651164e-06, - "loss": 1.4857, + "epoch": 0.009515813042261404, + "grad_norm": 1.0408280246236337, + "learning_rate": 6.156716417910448e-06, + "loss": 1.5471, "step": 34 }, { - "epoch": 0.00489853044086774, - "grad_norm": 0.9524795998625615, - "learning_rate": 7.906976744186048e-06, - "loss": 1.4493, + "epoch": 0.009795689896445563, + "grad_norm": 1.144505928801738, + "learning_rate": 6.343283582089552e-06, + "loss": 1.4912, "step": 35 }, { - "epoch": 0.005038488453463961, - "grad_norm": 0.9642671767042398, - "learning_rate": 8.139534883720931e-06, - "loss": 1.4039, + "epoch": 0.010075566750629723, + "grad_norm": 1.0617565147825843, + "learning_rate": 6.529850746268657e-06, + "loss": 1.5086, "step": 36 }, { - "epoch": 0.005178446466060182, - "grad_norm": 0.8572170901816704, - "learning_rate": 8.372093023255815e-06, - "loss": 1.3529, + "epoch": 0.010355443604813882, + "grad_norm": 1.0091951750456902, + "learning_rate": 6.716417910447762e-06, + "loss": 1.4814, "step": 37 }, { - "epoch": 0.005318404478656403, - "grad_norm": 0.8008007221794986, - "learning_rate": 8.604651162790698e-06, - "loss": 1.3514, + "epoch": 0.010635320458998041, + "grad_norm": 0.9363987444801333, + "learning_rate": 6.902985074626867e-06, + "loss": 1.4334, "step": 38 }, { - "epoch": 0.005458362491252624, - "grad_norm": 1.484907272279964, - "learning_rate": 8.837209302325582e-06, - "loss": 1.3637, + "epoch": 0.0109151973131822, + "grad_norm": 0.8681716751168916, + "learning_rate": 7.08955223880597e-06, + "loss": 1.4477, "step": 39 }, { - "epoch": 0.005598320503848845, - "grad_norm": 1.3633541730503271, - "learning_rate": 9.069767441860467e-06, - "loss": 1.3263, + "epoch": 0.01119507416736636, + "grad_norm": 0.8320202206333529, + "learning_rate": 7.276119402985075e-06, + "loss": 1.4034, "step": 40 }, { - "epoch": 0.005738278516445067, - "grad_norm": 0.6081019616299234, - "learning_rate": 9.302325581395349e-06, - "loss": 1.333, + "epoch": 0.011474951021550517, + "grad_norm": 0.821194741981735, + "learning_rate": 7.4626865671641785e-06, + "loss": 1.3807, "step": 41 }, { - "epoch": 0.0058782365290412876, - "grad_norm": 0.6853488905437116, - "learning_rate": 9.534883720930234e-06, - "loss": 1.2903, + "epoch": 0.011754827875734676, + "grad_norm": 0.7123537368766032, + "learning_rate": 7.649253731343284e-06, + "loss": 1.343, "step": 42 }, { - "epoch": 0.006018194541637508, - "grad_norm": 0.8466578955626367, - "learning_rate": 9.767441860465117e-06, - "loss": 1.3079, + "epoch": 0.012034704729918836, + "grad_norm": 0.6977173373063026, + "learning_rate": 7.835820895522389e-06, + "loss": 1.3257, "step": 43 }, { - "epoch": 0.00615815255423373, - "grad_norm": 0.567927473680037, - "learning_rate": 1e-05, - "loss": 1.2735, + "epoch": 0.012314581584102995, + "grad_norm": 0.8881966783800591, + "learning_rate": 8.022388059701493e-06, + "loss": 1.3192, "step": 44 }, { - "epoch": 0.006298110566829951, - "grad_norm": 0.6150202452547671, - "learning_rate": 1.0232558139534884e-05, - "loss": 1.2115, + "epoch": 0.012594458438287154, + "grad_norm": 0.7281807244161581, + "learning_rate": 8.208955223880597e-06, + "loss": 1.2733, "step": 45 }, { - "epoch": 0.006438068579426172, - "grad_norm": 0.4989582290513981, - "learning_rate": 1.0465116279069768e-05, - "loss": 1.2315, + "epoch": 0.012874335292471313, + "grad_norm": 0.5786930321019962, + "learning_rate": 8.395522388059703e-06, + "loss": 1.2658, "step": 46 }, { - "epoch": 0.006578026592022394, - "grad_norm": 0.671517817392481, - "learning_rate": 1.0697674418604651e-05, - "loss": 1.211, + "epoch": 0.013154212146655471, + "grad_norm": 0.5312407865275912, + "learning_rate": 8.582089552238805e-06, + "loss": 1.2432, "step": 47 }, { - "epoch": 0.0067179846046186145, - "grad_norm": 0.4743826379981072, - "learning_rate": 1.0930232558139537e-05, - "loss": 1.2323, + "epoch": 0.01343408900083963, + "grad_norm": 0.5018260207663885, + "learning_rate": 8.768656716417911e-06, + "loss": 1.2261, "step": 48 }, { - "epoch": 0.006857942617214835, - "grad_norm": 0.6130428027193264, - "learning_rate": 1.1162790697674418e-05, - "loss": 1.1871, + "epoch": 0.01371396585502379, + "grad_norm": 0.5587813650021505, + "learning_rate": 8.955223880597016e-06, + "loss": 1.2251, "step": 49 }, { - "epoch": 0.006997900629811057, - "grad_norm": 0.5556426649020297, - "learning_rate": 1.1395348837209304e-05, - "loss": 1.1869, + "epoch": 0.013993842709207949, + "grad_norm": 0.6161639441738711, + "learning_rate": 9.14179104477612e-06, + "loss": 1.2459, "step": 50 }, { - "epoch": 0.007137858642407278, - "grad_norm": 1.896863162731097, - "learning_rate": 1.1627906976744187e-05, - "loss": 1.1683, + "epoch": 0.014273719563392108, + "grad_norm": 0.8289046689676719, + "learning_rate": 9.328358208955226e-06, + "loss": 1.222, "step": 51 }, { - "epoch": 0.007277816655003499, - "grad_norm": 0.8412820077594921, - "learning_rate": 1.186046511627907e-05, - "loss": 1.1453, + "epoch": 0.014553596417576267, + "grad_norm": 0.47193853390904894, + "learning_rate": 9.514925373134328e-06, + "loss": 1.202, "step": 52 }, { - "epoch": 0.00741777466759972, - "grad_norm": 0.5453872617764336, - "learning_rate": 1.2093023255813954e-05, - "loss": 1.1449, + "epoch": 0.014833473271760425, + "grad_norm": 0.39196836071066965, + "learning_rate": 9.701492537313434e-06, + "loss": 1.2004, "step": 53 }, { - "epoch": 0.007557732680195941, - "grad_norm": 0.5447374444716608, - "learning_rate": 1.2325581395348838e-05, - "loss": 1.0974, + "epoch": 0.015113350125944584, + "grad_norm": 0.3947951825723403, + "learning_rate": 9.888059701492537e-06, + "loss": 1.1622, "step": 54 }, { - "epoch": 0.007697690692792162, - "grad_norm": 0.4102706328599184, - "learning_rate": 1.2558139534883723e-05, - "loss": 1.1077, + "epoch": 0.015393226980128743, + "grad_norm": 0.41206724699457215, + "learning_rate": 1.0074626865671643e-05, + "loss": 1.1594, "step": 55 }, { - "epoch": 0.007837648705388384, - "grad_norm": 3.5443873887390946, - "learning_rate": 1.2790697674418606e-05, - "loss": 1.1119, + "epoch": 0.0156731038343129, + "grad_norm": 0.4376944946677843, + "learning_rate": 1.0261194029850747e-05, + "loss": 1.1592, "step": 56 }, { - "epoch": 0.007977606717984604, - "grad_norm": 0.46404403603707844, - "learning_rate": 1.3023255813953488e-05, - "loss": 1.074, + "epoch": 0.01595298068849706, + "grad_norm": 0.3668202168603127, + "learning_rate": 1.0447761194029851e-05, + "loss": 1.1709, "step": 57 }, { - "epoch": 0.008117564730580826, - "grad_norm": 0.39045250619813876, - "learning_rate": 1.3255813953488372e-05, - "loss": 1.1102, + "epoch": 0.01623285754268122, + "grad_norm": 0.4159298936193091, + "learning_rate": 1.0634328358208955e-05, + "loss": 1.1241, "step": 58 }, { - "epoch": 0.008257522743177047, - "grad_norm": 1.343556937733491, - "learning_rate": 1.3488372093023258e-05, - "loss": 1.1127, + "epoch": 0.01651273439686538, + "grad_norm": 0.35125578222565534, + "learning_rate": 1.082089552238806e-05, + "loss": 1.1084, "step": 59 }, { - "epoch": 0.008397480755773267, - "grad_norm": 0.7610889883379984, - "learning_rate": 1.372093023255814e-05, - "loss": 1.0093, + "epoch": 0.016792611251049538, + "grad_norm": 0.5341734892876742, + "learning_rate": 1.1007462686567165e-05, + "loss": 1.1317, "step": 60 }, { - "epoch": 0.00853743876836949, - "grad_norm": 0.4556554186064198, - "learning_rate": 1.3953488372093024e-05, - "loss": 1.0192, + "epoch": 0.017072488105233697, + "grad_norm": 0.834819597810914, + "learning_rate": 1.119402985074627e-05, + "loss": 1.1106, "step": 61 }, { - "epoch": 0.008677396780965711, - "grad_norm": 0.501444946545551, - "learning_rate": 1.4186046511627907e-05, - "loss": 1.0403, + "epoch": 0.017352364959417856, + "grad_norm": 0.3210562794260083, + "learning_rate": 1.1380597014925374e-05, + "loss": 1.0916, "step": 62 }, { - "epoch": 0.008817354793561931, - "grad_norm": 0.35988075895590804, - "learning_rate": 1.4418604651162792e-05, - "loss": 1.1192, + "epoch": 0.017632241813602016, + "grad_norm": 0.380813227684211, + "learning_rate": 1.1567164179104478e-05, + "loss": 1.1158, "step": 63 }, { - "epoch": 0.008957312806158153, - "grad_norm": 0.6921457564720671, - "learning_rate": 1.4651162790697676e-05, - "loss": 1.056, + "epoch": 0.017912118667786175, + "grad_norm": 0.3239524649011721, + "learning_rate": 1.1753731343283584e-05, + "loss": 1.1236, "step": 64 }, { - "epoch": 0.009097270818754374, - "grad_norm": 0.37626208734508315, - "learning_rate": 1.488372093023256e-05, - "loss": 1.0427, + "epoch": 0.018191995521970334, + "grad_norm": 0.9504123736544604, + "learning_rate": 1.1940298507462686e-05, + "loss": 1.0827, "step": 65 }, { - "epoch": 0.009237228831350594, - "grad_norm": 0.4227916388896408, - "learning_rate": 1.5116279069767441e-05, - "loss": 1.0649, + "epoch": 0.018471872376154493, + "grad_norm": 0.2923251684511702, + "learning_rate": 1.2126865671641792e-05, + "loss": 1.0691, "step": 66 }, { - "epoch": 0.009377186843946816, - "grad_norm": 0.43865399337074507, - "learning_rate": 1.5348837209302328e-05, - "loss": 1.0342, + "epoch": 0.018751749230338653, + "grad_norm": 0.31695938515870953, + "learning_rate": 1.2313432835820896e-05, + "loss": 1.0576, "step": 67 }, { - "epoch": 0.009517144856543038, - "grad_norm": 1.1110606201155102, - "learning_rate": 1.558139534883721e-05, - "loss": 1.079, + "epoch": 0.01903162608452281, + "grad_norm": 0.37524185307654057, + "learning_rate": 1.25e-05, + "loss": 1.0269, "step": 68 }, { - "epoch": 0.009657102869139258, - "grad_norm": 0.3429950483555357, - "learning_rate": 1.5813953488372095e-05, - "loss": 1.0354, + "epoch": 0.019311502938706968, + "grad_norm": 0.43494475956498324, + "learning_rate": 1.2686567164179105e-05, + "loss": 1.0618, "step": 69 }, { - "epoch": 0.00979706088173548, - "grad_norm": 0.500911837092983, - "learning_rate": 1.6046511627906977e-05, - "loss": 1.0117, + "epoch": 0.019591379792891127, + "grad_norm": 0.3634493682472731, + "learning_rate": 1.287313432835821e-05, + "loss": 1.0541, "step": 70 }, { - "epoch": 0.009937018894331701, - "grad_norm": 0.5054192697417574, - "learning_rate": 1.6279069767441862e-05, - "loss": 1.0435, + "epoch": 0.019871256647075286, + "grad_norm": 0.3831070498006833, + "learning_rate": 1.3059701492537313e-05, + "loss": 1.0448, "step": 71 }, { - "epoch": 0.010076976906927921, - "grad_norm": 0.4024563344236819, - "learning_rate": 1.6511627906976744e-05, - "loss": 1.045, + "epoch": 0.020151133501259445, + "grad_norm": 0.33594175735888865, + "learning_rate": 1.3246268656716417e-05, + "loss": 1.0254, "step": 72 }, { - "epoch": 0.010216934919524143, - "grad_norm": 0.4790998578083277, - "learning_rate": 1.674418604651163e-05, - "loss": 1.0253, + "epoch": 0.020431010355443605, + "grad_norm": 0.31128046999844944, + "learning_rate": 1.3432835820895523e-05, + "loss": 1.027, "step": 73 }, { - "epoch": 0.010356892932120365, - "grad_norm": 0.45731467968480566, - "learning_rate": 1.697674418604651e-05, - "loss": 1.0088, + "epoch": 0.020710887209627764, + "grad_norm": 0.29518135623840586, + "learning_rate": 1.3619402985074628e-05, + "loss": 1.0395, "step": 74 }, { - "epoch": 0.010496850944716585, - "grad_norm": 0.4617808389918637, - "learning_rate": 1.7209302325581396e-05, - "loss": 0.9946, + "epoch": 0.020990764063811923, + "grad_norm": 0.405888150308363, + "learning_rate": 1.3805970149253733e-05, + "loss": 1.0591, "step": 75 }, { - "epoch": 0.010636808957312806, - "grad_norm": 0.4274246905900338, - "learning_rate": 1.744186046511628e-05, - "loss": 1.0078, + "epoch": 0.021270640917996082, + "grad_norm": 0.33804780440415055, + "learning_rate": 1.3992537313432836e-05, + "loss": 1.0109, "step": 76 }, { - "epoch": 0.010776766969909026, - "grad_norm": 0.4859617233595146, - "learning_rate": 1.7674418604651163e-05, - "loss": 1.0292, + "epoch": 0.02155051777218024, + "grad_norm": 0.44351887804070345, + "learning_rate": 1.417910447761194e-05, + "loss": 0.9729, "step": 77 }, { - "epoch": 0.010916724982505248, - "grad_norm": 0.5837878294485921, - "learning_rate": 1.7906976744186045e-05, - "loss": 1.0338, + "epoch": 0.0218303946263644, + "grad_norm": 0.3591134895473745, + "learning_rate": 1.4365671641791046e-05, + "loss": 1.0168, "step": 78 }, { - "epoch": 0.01105668299510147, - "grad_norm": 1.7173283109289441, - "learning_rate": 1.8139534883720934e-05, - "loss": 1.0126, + "epoch": 0.02211027148054856, + "grad_norm": 0.46503349012770845, + "learning_rate": 1.455223880597015e-05, + "loss": 1.0353, "step": 79 }, { - "epoch": 0.01119664100769769, - "grad_norm": 0.5256113477743695, - "learning_rate": 1.8372093023255815e-05, - "loss": 0.9907, + "epoch": 0.02239014833473272, + "grad_norm": 0.29491222542801054, + "learning_rate": 1.4738805970149256e-05, + "loss": 1.0137, "step": 80 }, { - "epoch": 0.011336599020293912, - "grad_norm": 0.4483569419385721, - "learning_rate": 1.8604651162790697e-05, - "loss": 0.9643, + "epoch": 0.022670025188916875, + "grad_norm": 0.3312727144725203, + "learning_rate": 1.4925373134328357e-05, + "loss": 1.0049, "step": 81 }, { - "epoch": 0.011476557032890133, - "grad_norm": 0.4539441530526643, - "learning_rate": 1.8837209302325582e-05, - "loss": 1.0462, + "epoch": 0.022949902043101034, + "grad_norm": 0.4164987168137994, + "learning_rate": 1.5111940298507463e-05, + "loss": 0.9981, "step": 82 }, { - "epoch": 0.011616515045486353, - "grad_norm": 0.33426240052710565, - "learning_rate": 1.9069767441860468e-05, - "loss": 0.954, + "epoch": 0.023229778897285194, + "grad_norm": 0.31812835092237385, + "learning_rate": 1.529850746268657e-05, + "loss": 0.9707, "step": 83 }, { - "epoch": 0.011756473058082575, - "grad_norm": 0.3396314689877589, - "learning_rate": 1.930232558139535e-05, - "loss": 0.9857, + "epoch": 0.023509655751469353, + "grad_norm": 0.38858425420869547, + "learning_rate": 1.5485074626865673e-05, + "loss": 1.01, "step": 84 }, { - "epoch": 0.011896431070678797, - "grad_norm": 0.395155279481088, - "learning_rate": 1.9534883720930235e-05, - "loss": 1.004, + "epoch": 0.023789532605653512, + "grad_norm": 0.29944476492402083, + "learning_rate": 1.5671641791044777e-05, + "loss": 1.0048, "step": 85 }, { - "epoch": 0.012036389083275017, - "grad_norm": 0.38928902258671755, - "learning_rate": 1.9767441860465116e-05, - "loss": 0.9253, + "epoch": 0.02406940945983767, + "grad_norm": 0.40597927338494716, + "learning_rate": 1.585820895522388e-05, + "loss": 0.9852, "step": 86 }, { - "epoch": 0.012176347095871239, - "grad_norm": 0.3964184151155477, - "learning_rate": 2e-05, - "loss": 0.9562, + "epoch": 0.02434928631402183, + "grad_norm": 0.5099191281965668, + "learning_rate": 1.6044776119402986e-05, + "loss": 0.9473, "step": 87 }, { - "epoch": 0.01231630510846746, - "grad_norm": 0.41507841516147836, - "learning_rate": 2.0232558139534883e-05, - "loss": 0.9857, + "epoch": 0.02462916316820599, + "grad_norm": 0.3478753122305574, + "learning_rate": 1.623134328358209e-05, + "loss": 0.9482, "step": 88 }, { - "epoch": 0.01245626312106368, - "grad_norm": 0.3303740429955767, - "learning_rate": 2.046511627906977e-05, - "loss": 0.96, + "epoch": 0.02490904002239015, + "grad_norm": 0.2991341678437568, + "learning_rate": 1.6417910447761194e-05, + "loss": 0.98, "step": 89 }, { - "epoch": 0.012596221133659902, - "grad_norm": 0.35729090571512173, - "learning_rate": 2.069767441860465e-05, - "loss": 0.9453, + "epoch": 0.02518891687657431, + "grad_norm": 1.2717574046483888, + "learning_rate": 1.66044776119403e-05, + "loss": 0.9714, "step": 90 }, { - "epoch": 0.012736179146256124, - "grad_norm": 0.34652306094213037, - "learning_rate": 2.0930232558139536e-05, - "loss": 0.9478, + "epoch": 0.025468793730758468, + "grad_norm": 0.36168485005523193, + "learning_rate": 1.6791044776119406e-05, + "loss": 0.9864, "step": 91 }, { - "epoch": 0.012876137158852344, - "grad_norm": 0.3781323029481633, - "learning_rate": 2.116279069767442e-05, - "loss": 0.9232, + "epoch": 0.025748670584942627, + "grad_norm": 0.2553235723041327, + "learning_rate": 1.6977611940298507e-05, + "loss": 0.9544, "step": 92 }, { - "epoch": 0.013016095171448566, - "grad_norm": 0.3615380548631367, - "learning_rate": 2.1395348837209303e-05, - "loss": 0.903, + "epoch": 0.026028547439126783, + "grad_norm": 0.38968799546815525, + "learning_rate": 1.716417910447761e-05, + "loss": 0.9673, "step": 93 }, { - "epoch": 0.013156053184044787, - "grad_norm": 0.4096150963666029, - "learning_rate": 2.1627906976744184e-05, - "loss": 0.9599, + "epoch": 0.026308424293310942, + "grad_norm": 0.29040443572150015, + "learning_rate": 1.735074626865672e-05, + "loss": 0.9604, "step": 94 }, { - "epoch": 0.013296011196641007, - "grad_norm": 0.38313805223400516, - "learning_rate": 2.1860465116279073e-05, - "loss": 0.9512, + "epoch": 0.0265883011474951, + "grad_norm": 0.33718051088371714, + "learning_rate": 1.7537313432835823e-05, + "loss": 0.9513, "step": 95 }, { - "epoch": 0.013435969209237229, - "grad_norm": 0.4249487976331709, - "learning_rate": 2.2093023255813955e-05, - "loss": 0.888, + "epoch": 0.02686817800167926, + "grad_norm": 0.25695938572493227, + "learning_rate": 1.7723880597014927e-05, + "loss": 0.9611, "step": 96 }, { - "epoch": 0.01357592722183345, - "grad_norm": 0.3958713012909362, - "learning_rate": 2.2325581395348837e-05, - "loss": 0.9366, + "epoch": 0.02714805485586342, + "grad_norm": 0.37451233625994973, + "learning_rate": 1.791044776119403e-05, + "loss": 0.9304, "step": 97 }, { - "epoch": 0.01371588523442967, - "grad_norm": 0.3666415905158996, - "learning_rate": 2.2558139534883722e-05, - "loss": 0.9161, + "epoch": 0.02742793171004758, + "grad_norm": 0.2770675190804006, + "learning_rate": 1.8097014925373135e-05, + "loss": 0.9458, "step": 98 }, { - "epoch": 0.013855843247025892, - "grad_norm": 0.4821438335731706, - "learning_rate": 2.2790697674418607e-05, - "loss": 0.9352, + "epoch": 0.027707808564231738, + "grad_norm": 0.6986110230661327, + "learning_rate": 1.828358208955224e-05, + "loss": 0.9232, "step": 99 }, { - "epoch": 0.013995801259622114, - "grad_norm": 0.3878615433598532, - "learning_rate": 2.302325581395349e-05, - "loss": 0.9557, + "epoch": 0.027987685418415897, + "grad_norm": 0.5685133471176088, + "learning_rate": 1.8470149253731344e-05, + "loss": 0.9216, "step": 100 }, { - "epoch": 0.014135759272218334, - "grad_norm": 5.323533163270388, - "learning_rate": 2.3255813953488374e-05, - "loss": 0.9419, + "epoch": 0.028267562272600057, + "grad_norm": 0.3056276167497264, + "learning_rate": 1.865671641791045e-05, + "loss": 0.9487, "step": 101 }, { - "epoch": 0.014275717284814556, - "grad_norm": 0.6042685685685957, - "learning_rate": 2.3488372093023256e-05, - "loss": 0.9249, + "epoch": 0.028547439126784216, + "grad_norm": 0.690496142181496, + "learning_rate": 1.8843283582089552e-05, + "loss": 0.9603, "step": 102 }, { - "epoch": 0.014415675297410778, - "grad_norm": 0.4137056327381694, - "learning_rate": 2.372093023255814e-05, - "loss": 0.9179, + "epoch": 0.028827315980968375, + "grad_norm": 0.2512661906485543, + "learning_rate": 1.9029850746268656e-05, + "loss": 0.912, "step": 103 }, { - "epoch": 0.014555633310006998, - "grad_norm": 0.3772048144992787, - "learning_rate": 2.3953488372093026e-05, - "loss": 0.9144, + "epoch": 0.029107192835152534, + "grad_norm": 0.2962493395206657, + "learning_rate": 1.921641791044776e-05, + "loss": 0.9254, "step": 104 }, { - "epoch": 0.01469559132260322, - "grad_norm": 0.4175730821083398, - "learning_rate": 2.4186046511627908e-05, - "loss": 0.9227, + "epoch": 0.02938706968933669, + "grad_norm": 0.3267774778813336, + "learning_rate": 1.9402985074626868e-05, + "loss": 0.9106, "step": 105 }, { - "epoch": 0.01483554933519944, - "grad_norm": 0.40061188528050645, - "learning_rate": 2.441860465116279e-05, - "loss": 0.9278, + "epoch": 0.02966694654352085, + "grad_norm": 0.30294232804776927, + "learning_rate": 1.9589552238805972e-05, + "loss": 0.9074, "step": 106 }, { - "epoch": 0.014975507347795661, - "grad_norm": 0.3978558565443655, - "learning_rate": 2.4651162790697675e-05, - "loss": 0.8924, + "epoch": 0.02994682339770501, + "grad_norm": 0.3343955678765164, + "learning_rate": 1.9776119402985073e-05, + "loss": 0.9306, "step": 107 }, { - "epoch": 0.015115465360391883, - "grad_norm": 0.8747382632018756, - "learning_rate": 2.488372093023256e-05, - "loss": 0.9015, + "epoch": 0.030226700251889168, + "grad_norm": 0.33550956325604053, + "learning_rate": 1.996268656716418e-05, + "loss": 0.8916, "step": 108 }, { - "epoch": 0.015255423372988103, - "grad_norm": 0.39737512145413006, - "learning_rate": 2.5116279069767445e-05, - "loss": 0.9201, + "epoch": 0.030506577106073327, + "grad_norm": 0.2899729584402835, + "learning_rate": 2.0149253731343285e-05, + "loss": 0.8908, "step": 109 }, { - "epoch": 0.015395381385584325, - "grad_norm": 0.41650482599802, - "learning_rate": 2.5348837209302327e-05, - "loss": 0.8626, + "epoch": 0.030786453960257486, + "grad_norm": 0.47310364998373344, + "learning_rate": 2.033582089552239e-05, + "loss": 0.9328, "step": 110 }, { - "epoch": 0.015535339398180546, - "grad_norm": 0.42011360836146605, - "learning_rate": 2.5581395348837212e-05, - "loss": 0.8862, + "epoch": 0.031066330814441646, + "grad_norm": 0.28797628991236673, + "learning_rate": 2.0522388059701493e-05, + "loss": 0.9037, "step": 111 }, { - "epoch": 0.015675297410776768, - "grad_norm": 0.4680549878413964, - "learning_rate": 2.5813953488372094e-05, - "loss": 0.9105, + "epoch": 0.0313462076686258, + "grad_norm": 0.33945200140239673, + "learning_rate": 2.0708955223880598e-05, + "loss": 0.9206, "step": 112 }, { - "epoch": 0.01581525542337299, - "grad_norm": 0.49968103218957055, - "learning_rate": 2.6046511627906976e-05, - "loss": 0.8913, + "epoch": 0.03162608452280996, + "grad_norm": 0.5558734834690924, + "learning_rate": 2.0895522388059702e-05, + "loss": 0.9297, "step": 113 }, { - "epoch": 0.015955213435969208, - "grad_norm": 0.49556276645971464, - "learning_rate": 2.627906976744186e-05, - "loss": 0.9266, + "epoch": 0.03190596137699412, + "grad_norm": 0.29382478566663683, + "learning_rate": 2.1082089552238806e-05, + "loss": 0.8795, "step": 114 }, { - "epoch": 0.01609517144856543, - "grad_norm": 0.40655693565880857, - "learning_rate": 2.6511627906976743e-05, - "loss": 0.8941, + "epoch": 0.03218583823117828, + "grad_norm": 0.3519871926142322, + "learning_rate": 2.126865671641791e-05, + "loss": 0.9089, "step": 115 }, { - "epoch": 0.01623512946116165, - "grad_norm": 0.39000307235707055, - "learning_rate": 2.674418604651163e-05, - "loss": 0.8655, + "epoch": 0.03246571508536244, + "grad_norm": 0.31201842451781076, + "learning_rate": 2.1455223880597018e-05, + "loss": 0.8517, "step": 116 }, { - "epoch": 0.016375087473757873, - "grad_norm": 0.4264245544215563, - "learning_rate": 2.6976744186046517e-05, - "loss": 0.837, + "epoch": 0.0327455919395466, + "grad_norm": 0.6019828532548496, + "learning_rate": 2.164179104477612e-05, + "loss": 0.8786, "step": 117 }, { - "epoch": 0.016515045486354095, - "grad_norm": 0.42674207968546857, - "learning_rate": 2.72093023255814e-05, - "loss": 0.8974, + "epoch": 0.03302546879373076, + "grad_norm": 0.5405213522708816, + "learning_rate": 2.1828358208955223e-05, + "loss": 0.8566, "step": 118 }, { - "epoch": 0.016655003498950313, - "grad_norm": 0.46006604179365407, - "learning_rate": 2.744186046511628e-05, - "loss": 0.9413, + "epoch": 0.033305345647914916, + "grad_norm": 0.4757647550403344, + "learning_rate": 2.201492537313433e-05, + "loss": 0.8996, "step": 119 }, { - "epoch": 0.016794961511546535, - "grad_norm": 0.41235716420572693, - "learning_rate": 2.7674418604651166e-05, - "loss": 0.8682, + "epoch": 0.033585222502099076, + "grad_norm": 0.3453825177094335, + "learning_rate": 2.2201492537313435e-05, + "loss": 0.8582, "step": 120 }, { - "epoch": 0.016934919524142757, - "grad_norm": 0.41352483122836786, - "learning_rate": 2.7906976744186048e-05, - "loss": 0.8926, + "epoch": 0.033865099356283235, + "grad_norm": 0.2944792995014774, + "learning_rate": 2.238805970149254e-05, + "loss": 0.8602, "step": 121 }, { - "epoch": 0.01707487753673898, - "grad_norm": 0.4135656272062978, - "learning_rate": 2.813953488372093e-05, - "loss": 0.8664, + "epoch": 0.034144976210467394, + "grad_norm": 0.37379657884593837, + "learning_rate": 2.2574626865671643e-05, + "loss": 0.8562, "step": 122 }, { - "epoch": 0.0172148355493352, - "grad_norm": 0.40087350807045763, - "learning_rate": 2.8372093023255815e-05, - "loss": 0.8399, + "epoch": 0.03442485306465155, + "grad_norm": 0.382340008793532, + "learning_rate": 2.2761194029850747e-05, + "loss": 0.9138, "step": 123 }, { - "epoch": 0.017354793561931422, - "grad_norm": 0.4308300834648659, - "learning_rate": 2.8604651162790696e-05, - "loss": 0.9084, + "epoch": 0.03470472991883571, + "grad_norm": 0.5434059277538503, + "learning_rate": 2.294776119402985e-05, + "loss": 0.8562, "step": 124 }, { - "epoch": 0.01749475157452764, - "grad_norm": 0.48776022790516643, - "learning_rate": 2.8837209302325585e-05, - "loss": 0.911, + "epoch": 0.03498460677301987, + "grad_norm": 0.34085582829165434, + "learning_rate": 2.3134328358208956e-05, + "loss": 0.8917, "step": 125 }, { - "epoch": 0.017634709587123862, - "grad_norm": 0.4111163096942089, - "learning_rate": 2.9069767441860467e-05, - "loss": 0.8848, + "epoch": 0.03526448362720403, + "grad_norm": 0.36563512573885215, + "learning_rate": 2.332089552238806e-05, + "loss": 0.8787, "step": 126 }, { - "epoch": 0.017774667599720084, - "grad_norm": 0.4381509329328657, - "learning_rate": 2.9302325581395352e-05, - "loss": 0.8876, + "epoch": 0.03554436048138819, + "grad_norm": 0.30805995301934036, + "learning_rate": 2.3507462686567168e-05, + "loss": 0.8695, "step": 127 }, { - "epoch": 0.017914625612316305, - "grad_norm": 0.4459710226095744, - "learning_rate": 2.9534883720930234e-05, - "loss": 0.914, + "epoch": 0.03582423733557235, + "grad_norm": 0.32064094869892495, + "learning_rate": 2.369402985074627e-05, + "loss": 0.8869, "step": 128 }, { - "epoch": 0.018054583624912527, - "grad_norm": 0.7227835408304066, - "learning_rate": 2.976744186046512e-05, - "loss": 0.8587, + "epoch": 0.03610411418975651, + "grad_norm": 0.35001466763801176, + "learning_rate": 2.3880597014925373e-05, + "loss": 0.8405, "step": 129 }, { - "epoch": 0.01819454163750875, - "grad_norm": 0.43926461194413513, - "learning_rate": 3e-05, - "loss": 0.8779, + "epoch": 0.03638399104394067, + "grad_norm": 0.3051929273057518, + "learning_rate": 2.406716417910448e-05, + "loss": 0.8695, "step": 130 }, { - "epoch": 0.018334499650104967, - "grad_norm": 0.44123229572799033, - "learning_rate": 3.0232558139534883e-05, - "loss": 0.8371, + "epoch": 0.03666386789812483, + "grad_norm": 0.31303531514779737, + "learning_rate": 2.4253731343283584e-05, + "loss": 0.8231, "step": 131 }, { - "epoch": 0.01847445766270119, - "grad_norm": 0.5044627843039927, - "learning_rate": 3.0465116279069768e-05, - "loss": 0.8745, + "epoch": 0.036943744752308987, + "grad_norm": 0.3327959941927146, + "learning_rate": 2.444029850746269e-05, + "loss": 0.8222, "step": 132 }, { - "epoch": 0.01861441567529741, - "grad_norm": 0.4802286671204574, - "learning_rate": 3.0697674418604656e-05, - "loss": 0.8163, + "epoch": 0.037223621606493146, + "grad_norm": 0.31649436249699664, + "learning_rate": 2.4626865671641793e-05, + "loss": 0.8646, "step": 133 }, { - "epoch": 0.018754373687893632, - "grad_norm": 0.4753543186959038, - "learning_rate": 3.093023255813954e-05, - "loss": 0.8499, + "epoch": 0.037503498460677305, + "grad_norm": 0.31248535586119264, + "learning_rate": 2.4813432835820897e-05, + "loss": 0.8859, "step": 134 }, { - "epoch": 0.018894331700489854, - "grad_norm": 0.4957303847669158, - "learning_rate": 3.116279069767442e-05, - "loss": 0.8485, + "epoch": 0.037783375314861464, + "grad_norm": 0.31221012646478513, + "learning_rate": 2.5e-05, + "loss": 0.8687, "step": 135 }, { - "epoch": 0.019034289713086076, - "grad_norm": 0.4550945161127889, - "learning_rate": 3.13953488372093e-05, - "loss": 0.7769, + "epoch": 0.03806325216904562, + "grad_norm": 0.31379766380092944, + "learning_rate": 2.5186567164179102e-05, + "loss": 0.8406, "step": 136 }, { - "epoch": 0.019174247725682294, - "grad_norm": 0.6087163747209936, - "learning_rate": 3.162790697674419e-05, - "loss": 0.8363, + "epoch": 0.038343129023229776, + "grad_norm": 0.3342098492012485, + "learning_rate": 2.537313432835821e-05, + "loss": 0.8505, "step": 137 }, { - "epoch": 0.019314205738278516, - "grad_norm": 0.500988900697958, - "learning_rate": 3.186046511627907e-05, - "loss": 0.8602, + "epoch": 0.038623005877413935, + "grad_norm": 0.30891061812009696, + "learning_rate": 2.5559701492537314e-05, + "loss": 0.8608, "step": 138 }, { - "epoch": 0.019454163750874737, - "grad_norm": 0.5444272083147511, - "learning_rate": 3.2093023255813954e-05, - "loss": 0.8407, + "epoch": 0.038902882731598094, + "grad_norm": 0.31111148239775727, + "learning_rate": 2.574626865671642e-05, + "loss": 0.8647, "step": 139 }, { - "epoch": 0.01959412176347096, - "grad_norm": 0.5954883412818768, - "learning_rate": 3.2325581395348836e-05, - "loss": 0.8497, + "epoch": 0.039182759585782254, + "grad_norm": 0.3235359516250046, + "learning_rate": 2.5932835820895522e-05, + "loss": 0.8217, "step": 140 }, { - "epoch": 0.01973407977606718, - "grad_norm": 0.5375949124547408, - "learning_rate": 3.2558139534883724e-05, - "loss": 0.8361, + "epoch": 0.03946263643996641, + "grad_norm": 0.33378846630471376, + "learning_rate": 2.6119402985074626e-05, + "loss": 0.846, "step": 141 }, { - "epoch": 0.019874037788663403, - "grad_norm": 0.5552189636185006, - "learning_rate": 3.2790697674418606e-05, - "loss": 0.8342, + "epoch": 0.03974251329415057, + "grad_norm": 0.3432784883296147, + "learning_rate": 2.6305970149253734e-05, + "loss": 0.8393, "step": 142 }, { - "epoch": 0.02001399580125962, - "grad_norm": 0.5148019418462507, - "learning_rate": 3.302325581395349e-05, - "loss": 0.8261, + "epoch": 0.04002239014833473, + "grad_norm": 0.35622120890161457, + "learning_rate": 2.6492537313432835e-05, + "loss": 0.8608, "step": 143 }, { - "epoch": 0.020153953813855843, - "grad_norm": 0.5700734210055411, - "learning_rate": 3.3255813953488377e-05, - "loss": 0.8323, + "epoch": 0.04030226700251889, + "grad_norm": 0.35471829729039506, + "learning_rate": 2.6679104477611942e-05, + "loss": 0.8326, "step": 144 }, { - "epoch": 0.020293911826452064, - "grad_norm": 0.48554968620911604, - "learning_rate": 3.348837209302326e-05, - "loss": 0.8401, + "epoch": 0.04058214385670305, + "grad_norm": 0.33749459736584697, + "learning_rate": 2.6865671641791047e-05, + "loss": 0.8368, "step": 145 }, { - "epoch": 0.020433869839048286, - "grad_norm": 0.46690708567053424, - "learning_rate": 3.372093023255814e-05, - "loss": 0.832, + "epoch": 0.04086202071088721, + "grad_norm": 0.32296179654799884, + "learning_rate": 2.7052238805970147e-05, + "loss": 0.8323, "step": 146 }, { - "epoch": 0.020573827851644508, - "grad_norm": 0.4830729701490898, - "learning_rate": 3.395348837209302e-05, - "loss": 0.8425, + "epoch": 0.04114189756507137, + "grad_norm": 0.3151895401016503, + "learning_rate": 2.7238805970149255e-05, + "loss": 0.8555, "step": 147 }, { - "epoch": 0.02071378586424073, - "grad_norm": 0.48191860044948637, - "learning_rate": 3.4186046511627904e-05, - "loss": 0.8274, + "epoch": 0.04142177441925553, + "grad_norm": 0.29806035262199587, + "learning_rate": 2.742537313432836e-05, + "loss": 0.8048, "step": 148 }, { - "epoch": 0.020853743876836948, - "grad_norm": 0.5142113223620266, - "learning_rate": 3.441860465116279e-05, - "loss": 0.8972, + "epoch": 0.04170165127343969, + "grad_norm": 0.3434423472592504, + "learning_rate": 2.7611940298507467e-05, + "loss": 0.782, "step": 149 }, { - "epoch": 0.02099370188943317, - "grad_norm": 0.4790668173486759, - "learning_rate": 3.465116279069768e-05, - "loss": 0.8315, + "epoch": 0.041981528127623846, + "grad_norm": 0.34313430189812544, + "learning_rate": 2.7798507462686568e-05, + "loss": 0.827, "step": 150 }, { - "epoch": 0.02113365990202939, - "grad_norm": 0.46716122441432334, - "learning_rate": 3.488372093023256e-05, - "loss": 0.8027, + "epoch": 0.042261404981808005, + "grad_norm": 0.3600176344186182, + "learning_rate": 2.7985074626865672e-05, + "loss": 0.8383, "step": 151 }, { - "epoch": 0.021273617914625613, - "grad_norm": 0.5138958015599178, - "learning_rate": 3.5116279069767445e-05, - "loss": 0.7866, + "epoch": 0.042541281835992165, + "grad_norm": 0.350304616705117, + "learning_rate": 2.817164179104478e-05, + "loss": 0.8405, "step": 152 }, { - "epoch": 0.021413575927221835, - "grad_norm": 0.5846032834190681, - "learning_rate": 3.5348837209302326e-05, - "loss": 0.7656, + "epoch": 0.042821158690176324, + "grad_norm": 0.35965814336077234, + "learning_rate": 2.835820895522388e-05, + "loss": 0.8402, "step": 153 }, { - "epoch": 0.021553533939818053, - "grad_norm": 0.4931365055890448, - "learning_rate": 3.558139534883721e-05, - "loss": 0.7875, + "epoch": 0.04310103554436048, + "grad_norm": 0.32598483337211687, + "learning_rate": 2.8544776119402988e-05, + "loss": 0.831, "step": 154 }, { - "epoch": 0.021693491952414275, - "grad_norm": 0.49665243607020065, - "learning_rate": 3.581395348837209e-05, - "loss": 0.8285, + "epoch": 0.04338091239854464, + "grad_norm": 0.3423837739076617, + "learning_rate": 2.8731343283582092e-05, + "loss": 0.7902, "step": 155 }, { - "epoch": 0.021833449965010496, - "grad_norm": 0.4851409756713546, - "learning_rate": 3.604651162790698e-05, - "loss": 0.8395, + "epoch": 0.0436607892527288, + "grad_norm": 0.3380164512912703, + "learning_rate": 2.8917910447761193e-05, + "loss": 0.7833, "step": 156 }, { - "epoch": 0.021973407977606718, - "grad_norm": 0.4915230656047339, - "learning_rate": 3.627906976744187e-05, - "loss": 0.8381, + "epoch": 0.04394066610691296, + "grad_norm": 0.3380512948518286, + "learning_rate": 2.91044776119403e-05, + "loss": 0.8125, "step": 157 }, { - "epoch": 0.02211336599020294, - "grad_norm": 0.4892154916147055, - "learning_rate": 3.651162790697675e-05, - "loss": 0.8372, + "epoch": 0.04422054296109712, + "grad_norm": 0.3587187062888246, + "learning_rate": 2.92910447761194e-05, + "loss": 0.8129, "step": 158 }, { - "epoch": 0.02225332400279916, - "grad_norm": 0.48541705485781506, - "learning_rate": 3.674418604651163e-05, - "loss": 0.8512, + "epoch": 0.04450041981528128, + "grad_norm": 0.3237088416973387, + "learning_rate": 2.9477611940298512e-05, + "loss": 0.8083, "step": 159 }, { - "epoch": 0.02239328201539538, - "grad_norm": 0.490142048396627, - "learning_rate": 3.697674418604651e-05, - "loss": 0.8185, + "epoch": 0.04478029666946544, + "grad_norm": 0.45938766048445673, + "learning_rate": 2.9664179104477613e-05, + "loss": 0.8071, "step": 160 }, { - "epoch": 0.0225332400279916, - "grad_norm": 0.5674374097426846, - "learning_rate": 3.7209302325581394e-05, - "loss": 0.8101, + "epoch": 0.04506017352364959, + "grad_norm": 0.34486561942202365, + "learning_rate": 2.9850746268656714e-05, + "loss": 0.8334, "step": 161 }, { - "epoch": 0.022673198040587823, - "grad_norm": 0.617554041890815, - "learning_rate": 3.7441860465116276e-05, - "loss": 0.8273, + "epoch": 0.04534005037783375, + "grad_norm": 0.3453695395177396, + "learning_rate": 3.003731343283582e-05, + "loss": 0.8213, "step": 162 }, { - "epoch": 0.022813156053184045, - "grad_norm": 0.5446295352802258, - "learning_rate": 3.7674418604651165e-05, - "loss": 0.867, + "epoch": 0.04561992723201791, + "grad_norm": 0.36176570579211076, + "learning_rate": 3.0223880597014926e-05, + "loss": 0.8032, "step": 163 }, { - "epoch": 0.022953114065780267, - "grad_norm": 0.5370506605400496, - "learning_rate": 3.790697674418605e-05, - "loss": 0.7857, + "epoch": 0.04589980408620207, + "grad_norm": 0.4033961981787797, + "learning_rate": 3.0410447761194033e-05, + "loss": 0.806, "step": 164 }, { - "epoch": 0.02309307207837649, - "grad_norm": 0.5011901313523426, - "learning_rate": 3.8139534883720935e-05, - "loss": 0.7731, + "epoch": 0.04617968094038623, + "grad_norm": 0.38152797111667225, + "learning_rate": 3.059701492537314e-05, + "loss": 0.8116, "step": 165 }, { - "epoch": 0.023233030090972707, - "grad_norm": 0.5329852231835671, - "learning_rate": 3.837209302325582e-05, - "loss": 0.7844, + "epoch": 0.04645955779457039, + "grad_norm": 0.3867278932508252, + "learning_rate": 3.078358208955224e-05, + "loss": 0.8079, "step": 166 }, { - "epoch": 0.02337298810356893, - "grad_norm": 0.5409808244274257, - "learning_rate": 3.86046511627907e-05, - "loss": 0.8565, + "epoch": 0.046739434648754546, + "grad_norm": 0.3662832007508961, + "learning_rate": 3.0970149253731346e-05, + "loss": 0.8502, "step": 167 }, { - "epoch": 0.02351294611616515, - "grad_norm": 0.4698618846754603, - "learning_rate": 3.883720930232558e-05, - "loss": 0.8027, + "epoch": 0.047019311502938706, + "grad_norm": 0.39805167295405547, + "learning_rate": 3.115671641791045e-05, + "loss": 0.7612, "step": 168 }, { - "epoch": 0.023652904128761372, - "grad_norm": 0.5265079885633662, - "learning_rate": 3.906976744186047e-05, - "loss": 0.7934, + "epoch": 0.047299188357122865, + "grad_norm": 0.4207759157399436, + "learning_rate": 3.1343283582089554e-05, + "loss": 0.78, "step": 169 }, { - "epoch": 0.023792862141357594, - "grad_norm": 0.5066786460553115, - "learning_rate": 3.930232558139535e-05, - "loss": 0.8472, + "epoch": 0.047579065211307024, + "grad_norm": 0.4428754011821389, + "learning_rate": 3.1529850746268655e-05, + "loss": 0.78, "step": 170 }, { - "epoch": 0.023932820153953815, - "grad_norm": 0.4895093560469156, - "learning_rate": 3.953488372093023e-05, - "loss": 0.8038, + "epoch": 0.04785894206549118, + "grad_norm": 0.35968432621077273, + "learning_rate": 3.171641791044776e-05, + "loss": 0.7916, "step": 171 }, { - "epoch": 0.024072778166550034, - "grad_norm": 0.4722527785641036, - "learning_rate": 3.9767441860465115e-05, - "loss": 0.7926, + "epoch": 0.04813881891967534, + "grad_norm": 0.7517748076336829, + "learning_rate": 3.190298507462687e-05, + "loss": 0.7901, "step": 172 }, { - "epoch": 0.024212736179146255, - "grad_norm": 0.5273651829904968, - "learning_rate": 4e-05, - "loss": 0.8107, + "epoch": 0.0484186957738595, + "grad_norm": 0.4179752380360452, + "learning_rate": 3.208955223880597e-05, + "loss": 0.7783, "step": 173 }, { - "epoch": 0.024352694191742477, - "grad_norm": 0.5381629057966655, - "learning_rate": 4.0232558139534885e-05, - "loss": 0.7478, + "epoch": 0.04869857262804366, + "grad_norm": 0.3651217198371012, + "learning_rate": 3.227611940298508e-05, + "loss": 0.7923, "step": 174 }, { - "epoch": 0.0244926522043387, - "grad_norm": 0.5109213734104476, - "learning_rate": 4.046511627906977e-05, - "loss": 0.7436, + "epoch": 0.04897844948222782, + "grad_norm": 0.42966943123987805, + "learning_rate": 3.246268656716418e-05, + "loss": 0.7719, "step": 175 }, { - "epoch": 0.02463261021693492, - "grad_norm": 0.5144280803181732, - "learning_rate": 4.0697674418604655e-05, - "loss": 0.8024, + "epoch": 0.04925832633641198, + "grad_norm": 0.3829077114727494, + "learning_rate": 3.264925373134329e-05, + "loss": 0.8053, "step": 176 }, { - "epoch": 0.024772568229531142, - "grad_norm": 0.5119285784380162, - "learning_rate": 4.093023255813954e-05, - "loss": 0.8339, + "epoch": 0.04953820319059614, + "grad_norm": 0.4143034582254228, + "learning_rate": 3.283582089552239e-05, + "loss": 0.8113, "step": 177 }, { - "epoch": 0.02491252624212736, - "grad_norm": 0.5343750927263191, - "learning_rate": 4.116279069767442e-05, - "loss": 0.7744, + "epoch": 0.0498180800447803, + "grad_norm": 0.38297119283356057, + "learning_rate": 3.302238805970149e-05, + "loss": 0.8041, "step": 178 }, { - "epoch": 0.025052484254723582, - "grad_norm": 0.534909362676503, - "learning_rate": 4.13953488372093e-05, - "loss": 0.7942, + "epoch": 0.05009795689896446, + "grad_norm": 0.3961083935491084, + "learning_rate": 3.32089552238806e-05, + "loss": 0.8226, "step": 179 }, { - "epoch": 0.025192442267319804, - "grad_norm": 0.5393369737565247, - "learning_rate": 4.162790697674418e-05, - "loss": 0.7677, + "epoch": 0.05037783375314862, + "grad_norm": 0.3604601053896823, + "learning_rate": 3.3395522388059704e-05, + "loss": 0.7905, "step": 180 }, { - "epoch": 0.025332400279916026, - "grad_norm": 0.4985423936875566, - "learning_rate": 4.186046511627907e-05, - "loss": 0.8088, + "epoch": 0.050657710607332776, + "grad_norm": 0.4452392053944784, + "learning_rate": 3.358208955223881e-05, + "loss": 0.7781, "step": 181 }, { - "epoch": 0.025472358292512248, - "grad_norm": 0.5167443499358275, - "learning_rate": 4.209302325581396e-05, - "loss": 0.7999, + "epoch": 0.050937587461516935, + "grad_norm": 0.36090384291409017, + "learning_rate": 3.376865671641791e-05, + "loss": 0.8145, "step": 182 }, { - "epoch": 0.025612316305108466, - "grad_norm": 0.5170409826954023, - "learning_rate": 4.232558139534884e-05, - "loss": 0.7601, + "epoch": 0.051217464315701094, + "grad_norm": 0.41637973003542017, + "learning_rate": 3.395522388059701e-05, + "loss": 0.7799, "step": 183 }, { - "epoch": 0.025752274317704688, - "grad_norm": 0.5219678512296154, - "learning_rate": 4.2558139534883724e-05, - "loss": 0.8032, + "epoch": 0.051497341169885254, + "grad_norm": 0.4175389089983213, + "learning_rate": 3.414179104477612e-05, + "loss": 0.7815, "step": 184 }, { - "epoch": 0.02589223233030091, - "grad_norm": 0.5652005338899992, - "learning_rate": 4.2790697674418605e-05, - "loss": 0.8096, + "epoch": 0.051777218024069406, + "grad_norm": 0.36591396795760794, + "learning_rate": 3.432835820895522e-05, + "loss": 0.7616, "step": 185 }, { - "epoch": 0.02603219034289713, - "grad_norm": 0.5199050303840674, - "learning_rate": 4.302325581395349e-05, - "loss": 0.7953, + "epoch": 0.052057094878253565, + "grad_norm": 0.39343179849771737, + "learning_rate": 3.451492537313433e-05, + "loss": 0.7979, "step": 186 }, { - "epoch": 0.026172148355493353, - "grad_norm": 0.5389317228680723, - "learning_rate": 4.325581395348837e-05, - "loss": 0.7972, + "epoch": 0.052336971732437725, + "grad_norm": 0.4168663914343234, + "learning_rate": 3.470149253731344e-05, + "loss": 0.8179, "step": 187 }, { - "epoch": 0.026312106368089574, - "grad_norm": 0.5496576941977979, - "learning_rate": 4.348837209302326e-05, - "loss": 0.7948, + "epoch": 0.052616848586621884, + "grad_norm": 0.42519235894982943, + "learning_rate": 3.488805970149254e-05, + "loss": 0.7615, "step": 188 }, { - "epoch": 0.026452064380685793, - "grad_norm": 0.4991575270339626, - "learning_rate": 4.3720930232558146e-05, - "loss": 0.7805, + "epoch": 0.05289672544080604, + "grad_norm": 0.3808260408144091, + "learning_rate": 3.5074626865671645e-05, + "loss": 0.7641, "step": 189 }, { - "epoch": 0.026592022393282014, - "grad_norm": 0.5848638237399637, - "learning_rate": 4.395348837209303e-05, - "loss": 0.8075, + "epoch": 0.0531766022949902, + "grad_norm": 0.41167014625126297, + "learning_rate": 3.5261194029850746e-05, + "loss": 0.7857, "step": 190 }, { - "epoch": 0.026731980405878236, - "grad_norm": 0.5521242621533461, - "learning_rate": 4.418604651162791e-05, - "loss": 0.8142, + "epoch": 0.05345647914917436, + "grad_norm": 0.38876907205109745, + "learning_rate": 3.5447761194029854e-05, + "loss": 0.7546, "step": 191 }, { - "epoch": 0.026871938418474458, - "grad_norm": 0.5435127966309655, - "learning_rate": 4.441860465116279e-05, - "loss": 0.7825, + "epoch": 0.05373635600335852, + "grad_norm": 0.41160971741402497, + "learning_rate": 3.5634328358208955e-05, + "loss": 0.7756, "step": 192 }, { - "epoch": 0.02701189643107068, - "grad_norm": 0.5470711522807221, - "learning_rate": 4.465116279069767e-05, - "loss": 0.7823, + "epoch": 0.05401623285754268, + "grad_norm": 0.3923391334923292, + "learning_rate": 3.582089552238806e-05, + "loss": 0.7451, "step": 193 }, { - "epoch": 0.0271518544436669, - "grad_norm": 0.5527490475528084, - "learning_rate": 4.488372093023256e-05, - "loss": 0.7346, + "epoch": 0.05429610971172684, + "grad_norm": 0.4339472355223145, + "learning_rate": 3.600746268656717e-05, + "loss": 0.7458, "step": 194 }, { - "epoch": 0.02729181245626312, - "grad_norm": 0.5440674129311732, - "learning_rate": 4.5116279069767444e-05, - "loss": 0.7794, + "epoch": 0.054575986565911, + "grad_norm": 0.4220123704322767, + "learning_rate": 3.619402985074627e-05, + "loss": 0.787, "step": 195 }, { - "epoch": 0.02743177046885934, - "grad_norm": 0.5470033960615739, - "learning_rate": 4.5348837209302326e-05, - "loss": 0.7778, + "epoch": 0.05485586342009516, + "grad_norm": 0.3945992666601785, + "learning_rate": 3.638059701492538e-05, + "loss": 0.7695, "step": 196 }, { - "epoch": 0.027571728481455563, - "grad_norm": 0.5297715369487911, - "learning_rate": 4.5581395348837214e-05, - "loss": 0.7817, + "epoch": 0.05513574027427932, + "grad_norm": 0.45369858609918495, + "learning_rate": 3.656716417910448e-05, + "loss": 0.7653, "step": 197 }, { - "epoch": 0.027711686494051785, - "grad_norm": 0.5904204687588617, - "learning_rate": 4.5813953488372096e-05, - "loss": 0.741, + "epoch": 0.055415617128463476, + "grad_norm": 0.4177283408518827, + "learning_rate": 3.675373134328358e-05, + "loss": 0.7447, "step": 198 }, { - "epoch": 0.027851644506648007, - "grad_norm": 0.5344813639779119, - "learning_rate": 4.604651162790698e-05, - "loss": 0.7695, + "epoch": 0.055695493982647636, + "grad_norm": 0.39914383848741325, + "learning_rate": 3.694029850746269e-05, + "loss": 0.7538, "step": 199 }, { - "epoch": 0.02799160251924423, - "grad_norm": 0.5332713262025963, - "learning_rate": 4.627906976744186e-05, - "loss": 0.7648, + "epoch": 0.055975370836831795, + "grad_norm": 0.4246872629249871, + "learning_rate": 3.7126865671641795e-05, + "loss": 0.7802, "step": 200 }, { - "epoch": 0.028131560531840447, - "grad_norm": 0.5503858696857928, - "learning_rate": 4.651162790697675e-05, - "loss": 0.7835, + "epoch": 0.056255247691015954, + "grad_norm": 0.42907384825838263, + "learning_rate": 3.73134328358209e-05, + "loss": 0.7674, "step": 201 }, { - "epoch": 0.02827151854443667, - "grad_norm": 0.5358241040816709, - "learning_rate": 4.674418604651163e-05, - "loss": 0.8045, + "epoch": 0.05653512454520011, + "grad_norm": 0.4542528371439862, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.7694, "step": 202 }, { - "epoch": 0.02841147655703289, - "grad_norm": 0.5486918473034686, - "learning_rate": 4.697674418604651e-05, - "loss": 0.7938, + "epoch": 0.05681500139938427, + "grad_norm": 0.43788777933290146, + "learning_rate": 3.7686567164179104e-05, + "loss": 0.7658, "step": 203 }, { - "epoch": 0.028551434569629112, - "grad_norm": 0.5435329407519798, - "learning_rate": 4.7209302325581394e-05, - "loss": 0.8122, + "epoch": 0.05709487825356843, + "grad_norm": 0.47259008763699245, + "learning_rate": 3.787313432835821e-05, + "loss": 0.7811, "step": 204 }, { - "epoch": 0.028691392582225334, - "grad_norm": 0.5353685402018145, - "learning_rate": 4.744186046511628e-05, - "loss": 0.7831, + "epoch": 0.05737475510775259, + "grad_norm": 0.4279490303137887, + "learning_rate": 3.805970149253731e-05, + "loss": 0.7597, "step": 205 }, { - "epoch": 0.028831350594821555, - "grad_norm": 0.5707995107234539, - "learning_rate": 4.7674418604651164e-05, - "loss": 0.7409, + "epoch": 0.05765463196193675, + "grad_norm": 0.40970401648855564, + "learning_rate": 3.824626865671642e-05, + "loss": 0.7598, "step": 206 }, { - "epoch": 0.028971308607417774, - "grad_norm": 0.5604394624011331, - "learning_rate": 4.790697674418605e-05, - "loss": 0.7517, + "epoch": 0.05793450881612091, + "grad_norm": 0.4299596238876096, + "learning_rate": 3.843283582089552e-05, + "loss": 0.7486, "step": 207 }, { - "epoch": 0.029111266620013995, - "grad_norm": 0.5515428854045646, - "learning_rate": 4.8139534883720934e-05, - "loss": 0.7812, + "epoch": 0.05821438567030507, + "grad_norm": 0.46847839915315687, + "learning_rate": 3.861940298507463e-05, + "loss": 0.8066, "step": 208 }, { - "epoch": 0.029251224632610217, - "grad_norm": 0.4865993670607851, - "learning_rate": 4.8372093023255816e-05, - "loss": 0.7767, + "epoch": 0.05849426252448923, + "grad_norm": 0.43393799277461775, + "learning_rate": 3.8805970149253736e-05, + "loss": 0.7618, "step": 209 }, { - "epoch": 0.02939118264520644, - "grad_norm": 0.5336012601695919, - "learning_rate": 4.86046511627907e-05, - "loss": 0.7463, + "epoch": 0.05877413937867338, + "grad_norm": 0.4453971304180821, + "learning_rate": 3.899253731343284e-05, + "loss": 0.7554, "step": 210 }, { - "epoch": 0.02953114065780266, - "grad_norm": 0.5759753656135316, - "learning_rate": 4.883720930232558e-05, - "loss": 0.7899, + "epoch": 0.05905401623285754, + "grad_norm": 0.45614809752187924, + "learning_rate": 3.9179104477611945e-05, + "loss": 0.7437, "step": 211 }, { - "epoch": 0.02967109867039888, - "grad_norm": 0.6037700025251694, - "learning_rate": 4.906976744186046e-05, - "loss": 0.7493, + "epoch": 0.0593338930870417, + "grad_norm": 0.4422390427833751, + "learning_rate": 3.9365671641791046e-05, + "loss": 0.7568, "step": 212 }, { - "epoch": 0.0298110566829951, - "grad_norm": 0.5451464405404893, - "learning_rate": 4.930232558139535e-05, - "loss": 0.7501, + "epoch": 0.05961376994122586, + "grad_norm": 0.38638591200368005, + "learning_rate": 3.9552238805970146e-05, + "loss": 0.7648, "step": 213 }, { - "epoch": 0.029951014695591322, - "grad_norm": 0.5084395290000155, - "learning_rate": 4.953488372093024e-05, - "loss": 0.8, + "epoch": 0.05989364679541002, + "grad_norm": 0.4097213277018535, + "learning_rate": 3.9738805970149254e-05, + "loss": 0.7356, "step": 214 }, { - "epoch": 0.030090972708187544, - "grad_norm": 0.5375968031425802, - "learning_rate": 4.976744186046512e-05, - "loss": 0.7644, + "epoch": 0.06017352364959418, + "grad_norm": 0.453249742519671, + "learning_rate": 3.992537313432836e-05, + "loss": 0.7158, "step": 215 }, { - "epoch": 0.030230930720783766, - "grad_norm": 0.5232124591965054, - "learning_rate": 5e-05, - "loss": 0.7661, + "epoch": 0.060453400503778336, + "grad_norm": 0.4845923097087401, + "learning_rate": 4.011194029850747e-05, + "loss": 0.7282, "step": 216 }, { - "epoch": 0.030370888733379987, - "grad_norm": 0.53168165514151, - "learning_rate": 4.999999743112317e-05, - "loss": 0.7469, + "epoch": 0.060733277357962495, + "grad_norm": 0.4425865451015721, + "learning_rate": 4.029850746268657e-05, + "loss": 0.764, "step": 217 }, { - "epoch": 0.030510846745976206, - "grad_norm": 0.5682427706170695, - "learning_rate": 4.9999989724493205e-05, - "loss": 0.723, + "epoch": 0.061013154212146654, + "grad_norm": 0.459102974798196, + "learning_rate": 4.048507462686567e-05, + "loss": 0.7314, "step": 218 }, { - "epoch": 0.030650804758572427, - "grad_norm": 0.5709911717094754, - "learning_rate": 4.99999768801117e-05, - "loss": 0.8116, + "epoch": 0.061293031066330814, + "grad_norm": 0.4595061468832727, + "learning_rate": 4.067164179104478e-05, + "loss": 0.7579, "step": 219 }, { - "epoch": 0.03079076277116865, - "grad_norm": 0.5259086859276223, - "learning_rate": 4.999995889798127e-05, - "loss": 0.7802, + "epoch": 0.06157290792051497, + "grad_norm": 0.4823737095308922, + "learning_rate": 4.085820895522388e-05, + "loss": 0.7162, "step": 220 }, { - "epoch": 0.03093072078376487, - "grad_norm": 0.5855912789813319, - "learning_rate": 4.999993577810563e-05, - "loss": 0.7523, + "epoch": 0.06185278477469913, + "grad_norm": 0.4261734637156516, + "learning_rate": 4.104477611940299e-05, + "loss": 0.7227, "step": 221 }, { - "epoch": 0.031070678796361093, - "grad_norm": 0.5703052221422155, - "learning_rate": 4.999990752048953e-05, - "loss": 0.7753, + "epoch": 0.06213266162888329, + "grad_norm": 0.4350348027694342, + "learning_rate": 4.1231343283582094e-05, + "loss": 0.7365, "step": 222 }, { - "epoch": 0.031210636808957314, - "grad_norm": 0.5108767700185519, - "learning_rate": 4.999987412513878e-05, - "loss": 0.7634, + "epoch": 0.06241253848306745, + "grad_norm": 0.4832999574991559, + "learning_rate": 4.1417910447761195e-05, + "loss": 0.7522, "step": 223 }, { - "epoch": 0.031350594821553536, - "grad_norm": 0.5527712966540788, - "learning_rate": 4.999983559206023e-05, - "loss": 0.7776, + "epoch": 0.0626924153372516, + "grad_norm": 0.5057681114795001, + "learning_rate": 4.16044776119403e-05, + "loss": 0.7586, "step": 224 }, { - "epoch": 0.031490552834149754, - "grad_norm": 0.5490103596581906, - "learning_rate": 4.999979192126181e-05, - "loss": 0.8043, + "epoch": 0.06297229219143577, + "grad_norm": 0.44285792537328683, + "learning_rate": 4.1791044776119404e-05, + "loss": 0.7792, "step": 225 }, { - "epoch": 0.03163051084674598, - "grad_norm": 0.5298825659589008, - "learning_rate": 4.9999743112752485e-05, - "loss": 0.7873, + "epoch": 0.06325216904561992, + "grad_norm": 0.45762174110305115, + "learning_rate": 4.197761194029851e-05, + "loss": 0.7793, "step": 226 }, { - "epoch": 0.0317704688593422, - "grad_norm": 0.5312004706999939, - "learning_rate": 4.9999689166542295e-05, - "loss": 0.7243, + "epoch": 0.06353204589980409, + "grad_norm": 0.4468144859741723, + "learning_rate": 4.216417910447761e-05, + "loss": 0.7242, "step": 227 }, { - "epoch": 0.031910426871938416, - "grad_norm": 0.5282314986009607, - "learning_rate": 4.9999630082642325e-05, - "loss": 0.7747, + "epoch": 0.06381192275398824, + "grad_norm": 0.4370998673209247, + "learning_rate": 4.235074626865671e-05, + "loss": 0.7179, "step": 228 }, { - "epoch": 0.03205038488453464, - "grad_norm": 0.5605656711144292, - "learning_rate": 4.999956586106472e-05, - "loss": 0.8079, + "epoch": 0.0640917996081724, + "grad_norm": 0.44335959563588795, + "learning_rate": 4.253731343283582e-05, + "loss": 0.7376, "step": 229 }, { - "epoch": 0.03219034289713086, - "grad_norm": 0.5457865205370973, - "learning_rate": 4.999949650182266e-05, - "loss": 0.7413, + "epoch": 0.06437167646235656, + "grad_norm": 0.4273577942277025, + "learning_rate": 4.272388059701493e-05, + "loss": 0.7578, "step": 230 }, { - "epoch": 0.032330300909727085, - "grad_norm": 0.5870565643730205, - "learning_rate": 4.999942200493043e-05, - "loss": 0.696, + "epoch": 0.06465155331654072, + "grad_norm": 0.40467771476605674, + "learning_rate": 4.2910447761194036e-05, + "loss": 0.7661, "step": 231 }, { - "epoch": 0.0324702589223233, - "grad_norm": 0.5334906454266579, - "learning_rate": 4.9999342370403316e-05, - "loss": 0.7259, + "epoch": 0.06493143017072488, + "grad_norm": 0.45246247401021156, + "learning_rate": 4.3097014925373137e-05, + "loss": 0.7611, "step": 232 }, { - "epoch": 0.03261021693491952, - "grad_norm": 0.5740266159557987, - "learning_rate": 4.999925759825768e-05, - "loss": 0.7221, + "epoch": 0.06521130702490904, + "grad_norm": 0.45481303561804026, + "learning_rate": 4.328358208955224e-05, + "loss": 0.7308, "step": 233 }, { - "epoch": 0.032750174947515746, - "grad_norm": 0.613668330354887, - "learning_rate": 4.999916768851096e-05, - "loss": 0.7756, + "epoch": 0.0654911838790932, + "grad_norm": 0.42823127005374406, + "learning_rate": 4.3470149253731345e-05, + "loss": 0.7122, "step": 234 }, { - "epoch": 0.032890132960111965, - "grad_norm": 0.5903235415127636, - "learning_rate": 4.999907264118163e-05, - "loss": 0.7386, + "epoch": 0.06577106073327736, + "grad_norm": 0.42606389284139023, + "learning_rate": 4.3656716417910446e-05, + "loss": 0.7253, "step": 235 }, { - "epoch": 0.03303009097270819, - "grad_norm": 0.534428463431133, - "learning_rate": 4.9998972456289226e-05, - "loss": 0.7299, + "epoch": 0.06605093758746151, + "grad_norm": 0.40394923201625044, + "learning_rate": 4.384328358208955e-05, + "loss": 0.7386, "step": 236 }, { - "epoch": 0.03317004898530441, - "grad_norm": 0.5238837539032761, - "learning_rate": 4.999886713385432e-05, - "loss": 0.7432, + "epoch": 0.06633081444164568, + "grad_norm": 0.4219423013130042, + "learning_rate": 4.402985074626866e-05, + "loss": 0.7524, "step": 237 }, { - "epoch": 0.033310006997900626, - "grad_norm": 0.5371791485677779, - "learning_rate": 4.999875667389858e-05, - "loss": 0.7918, + "epoch": 0.06661069129582983, + "grad_norm": 0.46896382361198863, + "learning_rate": 4.421641791044777e-05, + "loss": 0.7176, "step": 238 }, { - "epoch": 0.03344996501049685, - "grad_norm": 0.5835373561208981, - "learning_rate": 4.999864107644469e-05, - "loss": 0.7534, + "epoch": 0.066890568150014, + "grad_norm": 0.4467940315104462, + "learning_rate": 4.440298507462687e-05, + "loss": 0.7209, "step": 239 }, { - "epoch": 0.03358992302309307, - "grad_norm": 0.552016602574312, - "learning_rate": 4.999852034151641e-05, - "loss": 0.6981, + "epoch": 0.06717044500419815, + "grad_norm": 0.42497709063428196, + "learning_rate": 4.458955223880597e-05, + "loss": 0.6983, "step": 240 }, { - "epoch": 0.033729881035689295, - "grad_norm": 0.5154017381634809, - "learning_rate": 4.999839446913855e-05, - "loss": 0.6855, + "epoch": 0.06745032185838232, + "grad_norm": 0.44655477362737434, + "learning_rate": 4.477611940298508e-05, + "loss": 0.7065, "step": 241 }, { - "epoch": 0.03386983904828551, - "grad_norm": 0.567904246402228, - "learning_rate": 4.999826345933699e-05, - "loss": 0.7559, + "epoch": 0.06773019871256647, + "grad_norm": 0.4677011150337956, + "learning_rate": 4.496268656716418e-05, + "loss": 0.722, "step": 242 }, { - "epoch": 0.03400979706088174, - "grad_norm": 0.5334580095149974, - "learning_rate": 4.999812731213864e-05, - "loss": 0.7427, + "epoch": 0.06801007556675064, + "grad_norm": 0.4226232080324054, + "learning_rate": 4.5149253731343286e-05, + "loss": 0.7436, "step": 243 }, { - "epoch": 0.03414975507347796, - "grad_norm": 0.5705684901629905, - "learning_rate": 4.9997986027571485e-05, - "loss": 0.7097, + "epoch": 0.06828995242093479, + "grad_norm": 0.48006747711595266, + "learning_rate": 4.5335820895522394e-05, + "loss": 0.7589, "step": 244 }, { - "epoch": 0.034289713086074175, - "grad_norm": 0.5308751498286732, - "learning_rate": 4.9997839605664564e-05, - "loss": 0.7885, + "epoch": 0.06856982927511895, + "grad_norm": 0.43447772456651546, + "learning_rate": 4.5522388059701495e-05, + "loss": 0.7341, "step": 245 }, { - "epoch": 0.0344296710986704, - "grad_norm": 0.5655649609764111, - "learning_rate": 4.999768804644796e-05, - "loss": 0.7829, + "epoch": 0.0688497061293031, + "grad_norm": 0.4672355481768206, + "learning_rate": 4.57089552238806e-05, + "loss": 0.7264, "step": 246 }, { - "epoch": 0.03456962911126662, - "grad_norm": 0.5251341000071817, - "learning_rate": 4.999753134995283e-05, - "loss": 0.7406, + "epoch": 0.06912958298348726, + "grad_norm": 0.44525158274162946, + "learning_rate": 4.58955223880597e-05, + "loss": 0.7511, "step": 247 }, { - "epoch": 0.034709587123862844, - "grad_norm": 0.5176591391474799, - "learning_rate": 4.999736951621137e-05, - "loss": 0.7466, + "epoch": 0.06940945983767143, + "grad_norm": 0.4213977009056596, + "learning_rate": 4.608208955223881e-05, + "loss": 0.7391, "step": 248 }, { - "epoch": 0.03484954513645906, - "grad_norm": 0.510003241623571, - "learning_rate": 4.999720254525684e-05, - "loss": 0.7625, + "epoch": 0.06968933669185558, + "grad_norm": 0.4297280843902765, + "learning_rate": 4.626865671641791e-05, + "loss": 0.7304, "step": 249 }, { - "epoch": 0.03498950314905528, - "grad_norm": 0.5472293188653408, - "learning_rate": 4.999703043712355e-05, - "loss": 0.7486, + "epoch": 0.06996921354603974, + "grad_norm": 0.49130561822426916, + "learning_rate": 4.645522388059701e-05, + "loss": 0.7591, "step": 250 }, { - "epoch": 0.035129461161651505, - "grad_norm": 0.513397158699985, - "learning_rate": 4.9996853191846885e-05, - "loss": 0.7676, + "epoch": 0.0702490904002239, + "grad_norm": 0.4588404235287188, + "learning_rate": 4.664179104477612e-05, + "loss": 0.7224, "step": 251 }, { - "epoch": 0.035269419174247724, - "grad_norm": 0.5227735996669705, - "learning_rate": 4.999667080946324e-05, - "loss": 0.7421, + "epoch": 0.07052896725440806, + "grad_norm": 0.4170892489330508, + "learning_rate": 4.682835820895523e-05, + "loss": 0.7187, "step": 252 }, { - "epoch": 0.03540937718684395, - "grad_norm": 0.5706154309444754, - "learning_rate": 4.999648329001013e-05, - "loss": 0.7591, + "epoch": 0.07080884410859221, + "grad_norm": 0.48174211630070884, + "learning_rate": 4.7014925373134335e-05, + "loss": 0.7211, "step": 253 }, { - "epoch": 0.03554933519944017, - "grad_norm": 0.5258959198504635, - "learning_rate": 4.999629063352608e-05, - "loss": 0.7478, + "epoch": 0.07108872096277638, + "grad_norm": 0.44419470582928033, + "learning_rate": 4.7201492537313436e-05, + "loss": 0.7363, "step": 254 }, { - "epoch": 0.03568929321203639, - "grad_norm": 0.5487756790231113, - "learning_rate": 4.999609284005068e-05, - "loss": 0.7605, + "epoch": 0.07136859781696053, + "grad_norm": 0.4661471299819716, + "learning_rate": 4.738805970149254e-05, + "loss": 0.7318, "step": 255 }, { - "epoch": 0.03582925122463261, - "grad_norm": 0.5536825461384619, - "learning_rate": 4.999588990962458e-05, - "loss": 0.743, + "epoch": 0.0716484746711447, + "grad_norm": 0.44815383924055374, + "learning_rate": 4.7574626865671644e-05, + "loss": 0.7028, "step": 256 }, { - "epoch": 0.03596920923722883, - "grad_norm": 0.5272600123907261, - "learning_rate": 4.9995681842289476e-05, - "loss": 0.6928, + "epoch": 0.07192835152532885, + "grad_norm": 0.4347681423244314, + "learning_rate": 4.7761194029850745e-05, + "loss": 0.738, "step": 257 }, { - "epoch": 0.036109167249825054, - "grad_norm": 0.5932574229764146, - "learning_rate": 4.999546863808815e-05, - "loss": 0.7398, + "epoch": 0.07220822837951302, + "grad_norm": 0.42103800902759775, + "learning_rate": 4.794776119402985e-05, + "loss": 0.7197, "step": 258 }, { - "epoch": 0.03624912526242127, - "grad_norm": 0.5318314385969402, - "learning_rate": 4.999525029706439e-05, - "loss": 0.7392, + "epoch": 0.07248810523369717, + "grad_norm": 0.43609163124254846, + "learning_rate": 4.813432835820896e-05, + "loss": 0.7097, "step": 259 }, { - "epoch": 0.0363890832750175, - "grad_norm": 0.548901725278328, - "learning_rate": 4.999502681926309e-05, - "loss": 0.7501, + "epoch": 0.07276798208788134, + "grad_norm": 0.4513244060976077, + "learning_rate": 4.832089552238806e-05, + "loss": 0.6991, "step": 260 }, { - "epoch": 0.036529041287613716, - "grad_norm": 0.5288753436227192, - "learning_rate": 4.9994798204730166e-05, - "loss": 0.6955, + "epoch": 0.07304785894206549, + "grad_norm": 0.46046386444527604, + "learning_rate": 4.850746268656717e-05, + "loss": 0.7256, "step": 261 }, { - "epoch": 0.036668999300209934, - "grad_norm": 0.5694503048772611, - "learning_rate": 4.99945644535126e-05, - "loss": 0.723, + "epoch": 0.07332773579624965, + "grad_norm": 0.4561647252356896, + "learning_rate": 4.869402985074627e-05, + "loss": 0.7315, "step": 262 }, { - "epoch": 0.03680895731280616, - "grad_norm": 0.5665031858281824, - "learning_rate": 4.999432556565843e-05, - "loss": 0.6754, + "epoch": 0.0736076126504338, + "grad_norm": 0.4884637427080716, + "learning_rate": 4.888059701492538e-05, + "loss": 0.7403, "step": 263 }, { - "epoch": 0.03694891532540238, - "grad_norm": 0.5647585870166252, - "learning_rate": 4.999408154121676e-05, - "loss": 0.7434, + "epoch": 0.07388748950461797, + "grad_norm": 0.7652121557299206, + "learning_rate": 4.906716417910448e-05, + "loss": 0.7185, "step": 264 }, { - "epoch": 0.0370888733379986, - "grad_norm": 0.5198420793446112, - "learning_rate": 4.9993832380237735e-05, - "loss": 0.7398, + "epoch": 0.07416736635880213, + "grad_norm": 0.45244064682449026, + "learning_rate": 4.9253731343283586e-05, + "loss": 0.7288, "step": 265 }, { - "epoch": 0.03722883135059482, - "grad_norm": 0.5250695619803318, - "learning_rate": 4.999357808277255e-05, - "loss": 0.7355, + "epoch": 0.07444724321298629, + "grad_norm": 0.45889296478066427, + "learning_rate": 4.944029850746269e-05, + "loss": 0.7176, "step": 266 }, { - "epoch": 0.03736878936319104, - "grad_norm": 0.582781926595669, - "learning_rate": 4.999331864887347e-05, - "loss": 0.794, + "epoch": 0.07472712006717044, + "grad_norm": 0.45826546985668176, + "learning_rate": 4.9626865671641794e-05, + "loss": 0.7159, "step": 267 }, { - "epoch": 0.037508747375787264, - "grad_norm": 0.5564844235496481, - "learning_rate": 4.9993054078593824e-05, - "loss": 0.7205, + "epoch": 0.07500699692135461, + "grad_norm": 0.46389858384982025, + "learning_rate": 4.98134328358209e-05, + "loss": 0.7046, "step": 268 }, { - "epoch": 0.03764870538838348, - "grad_norm": 0.5165771944533392, - "learning_rate": 4.9992784371987966e-05, - "loss": 0.723, + "epoch": 0.07528687377553876, + "grad_norm": 0.4528735717410825, + "learning_rate": 5e-05, + "loss": 0.7375, "step": 269 }, { - "epoch": 0.03778866340097971, - "grad_norm": 0.528823368400942, - "learning_rate": 4.999250952911133e-05, - "loss": 0.752, + "epoch": 0.07556675062972293, + "grad_norm": 0.43357562424446217, + "learning_rate": 5.018656716417911e-05, + "loss": 0.7291, "step": 270 }, { - "epoch": 0.037928621413575926, - "grad_norm": 0.5429179446888814, - "learning_rate": 4.999222955002041e-05, - "loss": 0.723, + "epoch": 0.07584662748390708, + "grad_norm": 0.47087363708784613, + "learning_rate": 5.0373134328358204e-05, + "loss": 0.7161, "step": 271 }, { - "epoch": 0.03806857942617215, - "grad_norm": 0.5587199303859207, - "learning_rate": 4.9991944434772734e-05, - "loss": 0.7417, + "epoch": 0.07612650433809123, + "grad_norm": 0.5002887404008286, + "learning_rate": 5.055970149253731e-05, + "loss": 0.7027, "step": 272 }, { - "epoch": 0.03820853743876837, - "grad_norm": 0.5640839472568651, - "learning_rate": 4.999165418342689e-05, - "loss": 0.7231, + "epoch": 0.0764063811922754, + "grad_norm": 0.44100264957876795, + "learning_rate": 5.074626865671642e-05, + "loss": 0.7028, "step": 273 }, { - "epoch": 0.03834849545136459, - "grad_norm": 0.5679111247364553, - "learning_rate": 4.9991358796042535e-05, - "loss": 0.7469, + "epoch": 0.07668625804645955, + "grad_norm": 0.45791609628530144, + "learning_rate": 5.0932835820895534e-05, + "loss": 0.7079, "step": 274 }, { - "epoch": 0.03848845346396081, - "grad_norm": 0.5333389832026874, - "learning_rate": 4.999105827268038e-05, - "loss": 0.7593, + "epoch": 0.07696613490064372, + "grad_norm": 0.42461907033295915, + "learning_rate": 5.111940298507463e-05, + "loss": 0.7408, "step": 275 }, { - "epoch": 0.03862841147655703, - "grad_norm": 0.576446977047569, - "learning_rate": 4.999075261340218e-05, - "loss": 0.7442, + "epoch": 0.07724601175482787, + "grad_norm": 0.474800866810756, + "learning_rate": 5.1305970149253735e-05, + "loss": 0.7328, "step": 276 }, { - "epoch": 0.03876836948915326, - "grad_norm": 0.5832905654806, - "learning_rate": 4.9990441818270745e-05, - "loss": 0.7631, + "epoch": 0.07752588860901204, + "grad_norm": 0.4349633463201631, + "learning_rate": 5.149253731343284e-05, + "loss": 0.6997, "step": 277 }, { - "epoch": 0.038908327501749475, - "grad_norm": 0.5606985999591908, - "learning_rate": 4.9990125887349956e-05, - "loss": 0.7525, + "epoch": 0.07780576546319619, + "grad_norm": 0.4301040535810014, + "learning_rate": 5.167910447761194e-05, + "loss": 0.7072, "step": 278 }, { - "epoch": 0.03904828551434569, - "grad_norm": 0.5397877770051253, - "learning_rate": 4.9989804820704735e-05, - "loss": 0.677, + "epoch": 0.07808564231738035, + "grad_norm": 0.450034297489181, + "learning_rate": 5.1865671641791044e-05, + "loss": 0.7213, "step": 279 }, { - "epoch": 0.03918824352694192, - "grad_norm": 0.5715063431844626, - "learning_rate": 4.998947861840106e-05, - "loss": 0.7539, + "epoch": 0.07836551917156451, + "grad_norm": 0.44624329150212216, + "learning_rate": 5.205223880597015e-05, + "loss": 0.7426, "step": 280 }, { - "epoch": 0.039328201539538137, - "grad_norm": 0.542818827148529, - "learning_rate": 4.998914728050598e-05, - "loss": 0.7505, + "epoch": 0.07864539602574867, + "grad_norm": 0.4343366431937922, + "learning_rate": 5.223880597014925e-05, + "loss": 0.6915, "step": 281 }, { - "epoch": 0.03946815955213436, - "grad_norm": 0.5465408569180555, - "learning_rate": 4.9988810807087584e-05, - "loss": 0.7119, + "epoch": 0.07892527287993283, + "grad_norm": 0.45204300206030235, + "learning_rate": 5.242537313432836e-05, + "loss": 0.7232, "step": 282 }, { - "epoch": 0.03960811756473058, - "grad_norm": 0.5093766924471657, - "learning_rate": 4.998846919821502e-05, - "loss": 0.7062, + "epoch": 0.07920514973411699, + "grad_norm": 0.4548205588152112, + "learning_rate": 5.261194029850747e-05, + "loss": 0.7138, "step": 283 }, { - "epoch": 0.039748075577326805, - "grad_norm": 0.5138282632852486, - "learning_rate": 4.998812245395849e-05, - "loss": 0.7453, + "epoch": 0.07948502658830114, + "grad_norm": 0.45435492373302583, + "learning_rate": 5.2798507462686576e-05, + "loss": 0.6934, "step": 284 }, { - "epoch": 0.03988803358992302, - "grad_norm": 0.49293443848284324, - "learning_rate": 4.998777057438926e-05, - "loss": 0.7277, + "epoch": 0.07976490344248531, + "grad_norm": 0.4656914294998773, + "learning_rate": 5.298507462686567e-05, + "loss": 0.7082, "step": 285 }, { - "epoch": 0.04002799160251924, - "grad_norm": 0.5162138996089223, - "learning_rate": 4.9987413559579636e-05, - "loss": 0.7804, + "epoch": 0.08004478029666946, + "grad_norm": 0.47456640793620797, + "learning_rate": 5.317164179104478e-05, + "loss": 0.7129, "step": 286 }, { - "epoch": 0.04016794961511547, - "grad_norm": 0.5357676179971859, - "learning_rate": 4.998705140960299e-05, - "loss": 0.7399, + "epoch": 0.08032465715085363, + "grad_norm": 0.4318842240940535, + "learning_rate": 5.3358208955223885e-05, + "loss": 0.6785, "step": 287 }, { - "epoch": 0.040307907627711685, - "grad_norm": 0.518163900018162, - "learning_rate": 4.998668412453374e-05, - "loss": 0.6914, + "epoch": 0.08060453400503778, + "grad_norm": 0.44584560135570095, + "learning_rate": 5.3544776119402986e-05, + "loss": 0.7086, "step": 288 }, { - "epoch": 0.04044786564030791, - "grad_norm": 0.5334432804565435, - "learning_rate": 4.9986311704447395e-05, - "loss": 0.7186, + "epoch": 0.08088441085922195, + "grad_norm": 0.47857178698148334, + "learning_rate": 5.373134328358209e-05, + "loss": 0.7083, "step": 289 }, { - "epoch": 0.04058782365290413, - "grad_norm": 0.5466256273934306, - "learning_rate": 4.9985934149420466e-05, - "loss": 0.7361, + "epoch": 0.0811642877134061, + "grad_norm": 0.4625177089964261, + "learning_rate": 5.39179104477612e-05, + "loss": 0.7112, "step": 290 }, { - "epoch": 0.04072778166550035, - "grad_norm": 0.5299994895086962, - "learning_rate": 4.998555145953054e-05, - "loss": 0.7223, + "epoch": 0.08144416456759027, + "grad_norm": 0.4313745318502042, + "learning_rate": 5.4104477611940295e-05, + "loss": 0.712, "step": 291 }, { - "epoch": 0.04086773967809657, - "grad_norm": 0.5350513478260928, - "learning_rate": 4.998516363485629e-05, - "loss": 0.725, + "epoch": 0.08172404142177442, + "grad_norm": 0.47216163058190774, + "learning_rate": 5.42910447761194e-05, + "loss": 0.7034, "step": 292 }, { - "epoch": 0.04100769769069279, - "grad_norm": 0.47179695277188055, - "learning_rate": 4.99847706754774e-05, - "loss": 0.7441, + "epoch": 0.08200391827595858, + "grad_norm": 0.47780319900261636, + "learning_rate": 5.447761194029851e-05, + "loss": 0.6932, "step": 293 }, { - "epoch": 0.041147655703289016, - "grad_norm": 0.53626273737559, - "learning_rate": 4.998437258147462e-05, - "loss": 0.7643, + "epoch": 0.08228379513014274, + "grad_norm": 0.4137823178775821, + "learning_rate": 5.466417910447762e-05, + "loss": 0.7149, "step": 294 }, { - "epoch": 0.041287613715885234, - "grad_norm": 0.5542158039437199, - "learning_rate": 4.9983969352929786e-05, - "loss": 0.6996, + "epoch": 0.0825636719843269, + "grad_norm": 0.437078138593924, + "learning_rate": 5.485074626865672e-05, + "loss": 0.7171, "step": 295 }, { - "epoch": 0.04142757172848146, - "grad_norm": 0.513139127485963, - "learning_rate": 4.9983560989925736e-05, - "loss": 0.7176, + "epoch": 0.08284354883851106, + "grad_norm": 0.4238041082775488, + "learning_rate": 5.5037313432835826e-05, + "loss": 0.7202, "step": 296 }, { - "epoch": 0.04156752974107768, - "grad_norm": 0.5464627329044695, - "learning_rate": 4.9983147492546414e-05, - "loss": 0.6829, + "epoch": 0.08312342569269521, + "grad_norm": 0.4794628101196632, + "learning_rate": 5.5223880597014934e-05, + "loss": 0.6946, "step": 297 }, { - "epoch": 0.041707487753673896, - "grad_norm": 0.4970498064260649, - "learning_rate": 4.9982728860876794e-05, - "loss": 0.6962, + "epoch": 0.08340330254687937, + "grad_norm": 0.4423732648514037, + "learning_rate": 5.541044776119403e-05, + "loss": 0.7228, "step": 298 }, { - "epoch": 0.04184744576627012, - "grad_norm": 0.5892603998956238, - "learning_rate": 4.998230509500291e-05, - "loss": 0.7526, + "epoch": 0.08368317940106353, + "grad_norm": 0.44145759084944025, + "learning_rate": 5.5597014925373135e-05, + "loss": 0.7087, "step": 299 }, { - "epoch": 0.04198740377886634, - "grad_norm": 0.5501443462930937, - "learning_rate": 4.9981876195011844e-05, - "loss": 0.7133, + "epoch": 0.08396305625524769, + "grad_norm": 0.40934388363608293, + "learning_rate": 5.578358208955224e-05, + "loss": 0.6979, "step": 300 }, { - "epoch": 0.042127361791462564, - "grad_norm": 0.5498252845378556, - "learning_rate": 4.998144216099174e-05, - "loss": 0.7257, + "epoch": 0.08424293310943184, + "grad_norm": 0.4234180639188906, + "learning_rate": 5.5970149253731344e-05, + "loss": 0.7229, "step": 301 }, { - "epoch": 0.04226731980405878, - "grad_norm": 0.5513802243572777, - "learning_rate": 4.99810029930318e-05, - "loss": 0.7403, + "epoch": 0.08452280996361601, + "grad_norm": 0.4472825373799119, + "learning_rate": 5.615671641791045e-05, + "loss": 0.7135, "step": 302 }, { - "epoch": 0.042407277816655, - "grad_norm": 0.5188265748765649, - "learning_rate": 4.998055869122228e-05, - "loss": 0.7302, + "epoch": 0.08480268681780016, + "grad_norm": 0.45272708650047383, + "learning_rate": 5.634328358208956e-05, + "loss": 0.6826, "step": 303 }, { - "epoch": 0.042547235829251226, - "grad_norm": 0.5540179540190183, - "learning_rate": 4.998010925565448e-05, - "loss": 0.7517, + "epoch": 0.08508256367198433, + "grad_norm": 0.4494694600775036, + "learning_rate": 5.652985074626867e-05, + "loss": 0.7313, "step": 304 }, { - "epoch": 0.042687193841847444, - "grad_norm": 0.5281059249590544, - "learning_rate": 4.9979654686420775e-05, - "loss": 0.7317, + "epoch": 0.08536244052616848, + "grad_norm": 0.7339141540328916, + "learning_rate": 5.671641791044776e-05, + "loss": 0.6984, "step": 305 }, { - "epoch": 0.04282715185444367, - "grad_norm": 0.5359394569524711, - "learning_rate": 4.997919498361457e-05, - "loss": 0.7344, + "epoch": 0.08564231738035265, + "grad_norm": 0.45248907297003077, + "learning_rate": 5.690298507462687e-05, + "loss": 0.7477, "step": 306 }, { - "epoch": 0.04296710986703989, - "grad_norm": 0.5074397919192178, - "learning_rate": 4.9978730147330355e-05, - "loss": 0.7245, + "epoch": 0.0859221942345368, + "grad_norm": 0.4482803119122466, + "learning_rate": 5.7089552238805976e-05, + "loss": 0.7186, "step": 307 }, { - "epoch": 0.043107067879636106, - "grad_norm": 0.49809746671209315, - "learning_rate": 4.997826017766364e-05, - "loss": 0.7181, + "epoch": 0.08620207108872097, + "grad_norm": 0.4355901897442642, + "learning_rate": 5.727611940298508e-05, + "loss": 0.7257, "step": 308 }, { - "epoch": 0.04324702589223233, - "grad_norm": 0.5593359787877635, - "learning_rate": 4.997778507471102e-05, - "loss": 0.6895, + "epoch": 0.08648194794290512, + "grad_norm": 0.4161768534777681, + "learning_rate": 5.7462686567164184e-05, + "loss": 0.7166, "step": 309 }, { - "epoch": 0.04338698390482855, - "grad_norm": 0.5259196612830013, - "learning_rate": 4.997730483857014e-05, - "loss": 0.7075, + "epoch": 0.08676182479708928, + "grad_norm": 0.5497629735001676, + "learning_rate": 5.764925373134329e-05, + "loss": 0.7034, "step": 310 }, { - "epoch": 0.043526941917424775, - "grad_norm": 0.5158600291227199, - "learning_rate": 4.997681946933967e-05, - "loss": 0.7071, + "epoch": 0.08704170165127344, + "grad_norm": 0.43493036515863065, + "learning_rate": 5.7835820895522386e-05, + "loss": 0.7041, "step": 311 }, { - "epoch": 0.04366689993002099, - "grad_norm": 0.534021017035884, - "learning_rate": 4.997632896711939e-05, - "loss": 0.6599, + "epoch": 0.0873215785054576, + "grad_norm": 0.4087448488869078, + "learning_rate": 5.8022388059701494e-05, + "loss": 0.7112, "step": 312 }, { - "epoch": 0.04380685794261722, - "grad_norm": 0.5669098102151614, - "learning_rate": 4.997583333201008e-05, - "loss": 0.6998, + "epoch": 0.08760145535964176, + "grad_norm": 0.44234991171225274, + "learning_rate": 5.82089552238806e-05, + "loss": 0.7053, "step": 313 }, { - "epoch": 0.043946815955213436, - "grad_norm": 0.5277846431661678, - "learning_rate": 4.99753325641136e-05, - "loss": 0.7232, + "epoch": 0.08788133221382592, + "grad_norm": 0.5291674247547381, + "learning_rate": 5.839552238805971e-05, + "loss": 0.7405, "step": 314 }, { - "epoch": 0.044086773967809655, - "grad_norm": 0.5834444960242834, - "learning_rate": 4.997482666353287e-05, - "loss": 0.7281, + "epoch": 0.08816120906801007, + "grad_norm": 0.4607497702179738, + "learning_rate": 5.85820895522388e-05, + "loss": 0.7009, "step": 315 }, { - "epoch": 0.04422673198040588, - "grad_norm": 0.5050930819325137, - "learning_rate": 4.9974315630371855e-05, - "loss": 0.7042, + "epoch": 0.08844108592219424, + "grad_norm": 0.4023643317276832, + "learning_rate": 5.876865671641791e-05, + "loss": 0.6958, "step": 316 }, { - "epoch": 0.0443666899930021, - "grad_norm": 0.5077463810437512, - "learning_rate": 4.997379946473557e-05, - "loss": 0.7043, + "epoch": 0.08872096277637839, + "grad_norm": 0.4536954164659193, + "learning_rate": 5.8955223880597025e-05, + "loss": 0.7297, "step": 317 }, { - "epoch": 0.04450664800559832, - "grad_norm": 0.552209585789916, - "learning_rate": 4.9973278166730106e-05, - "loss": 0.7197, + "epoch": 0.08900083963056256, + "grad_norm": 0.4161966261264851, + "learning_rate": 5.914179104477612e-05, + "loss": 0.6986, "step": 318 }, { - "epoch": 0.04464660601819454, - "grad_norm": 0.6653079515801595, - "learning_rate": 4.99727517364626e-05, - "loss": 0.7359, + "epoch": 0.08928071648474671, + "grad_norm": 0.4289977366688839, + "learning_rate": 5.9328358208955226e-05, + "loss": 0.6719, "step": 319 }, { - "epoch": 0.04478656403079076, - "grad_norm": 0.5255861840491078, - "learning_rate": 4.9972220174041205e-05, - "loss": 0.6736, + "epoch": 0.08956059333893088, + "grad_norm": 0.4200723677019632, + "learning_rate": 5.9514925373134334e-05, + "loss": 0.7129, "step": 320 }, { - "epoch": 0.044926522043386985, - "grad_norm": 0.5400381711080283, - "learning_rate": 4.99716834795752e-05, - "loss": 0.7683, + "epoch": 0.08984047019311503, + "grad_norm": 0.42635737483866826, + "learning_rate": 5.970149253731343e-05, + "loss": 0.723, "step": 321 }, { - "epoch": 0.0450664800559832, - "grad_norm": 0.5297848029054303, - "learning_rate": 4.997114165317486e-05, - "loss": 0.7252, + "epoch": 0.09012034704729918, + "grad_norm": 0.4293620901269125, + "learning_rate": 5.9888059701492536e-05, + "loss": 0.6997, "step": 322 }, { - "epoch": 0.04520643806857943, - "grad_norm": 0.5344509508263574, - "learning_rate": 4.997059469495155e-05, - "loss": 0.6991, + "epoch": 0.09040022390148335, + "grad_norm": 0.4050924506501903, + "learning_rate": 6.007462686567164e-05, + "loss": 0.7089, "step": 323 }, { - "epoch": 0.04534639608117565, - "grad_norm": 0.5037923428143048, - "learning_rate": 4.997004260501766e-05, - "loss": 0.7393, + "epoch": 0.0906801007556675, + "grad_norm": 0.43906366645519723, + "learning_rate": 6.026119402985075e-05, + "loss": 0.7261, "step": 324 }, { - "epoch": 0.04548635409377187, - "grad_norm": 0.5338819883439406, - "learning_rate": 4.996948538348666e-05, - "loss": 0.711, + "epoch": 0.09095997760985167, + "grad_norm": 0.4486632214065769, + "learning_rate": 6.044776119402985e-05, + "loss": 0.7127, "step": 325 }, { - "epoch": 0.04562631210636809, - "grad_norm": 0.542939872517023, - "learning_rate": 4.996892303047306e-05, - "loss": 0.7, + "epoch": 0.09123985446403582, + "grad_norm": 0.42554371690002163, + "learning_rate": 6.063432835820896e-05, + "loss": 0.723, "step": 326 }, { - "epoch": 0.04576627011896431, - "grad_norm": 0.5211829437872917, - "learning_rate": 4.996835554609244e-05, - "loss": 0.7185, + "epoch": 0.09151973131821999, + "grad_norm": 0.4208462259883393, + "learning_rate": 6.082089552238807e-05, + "loss": 0.7123, "step": 327 }, { - "epoch": 0.045906228131560534, - "grad_norm": 0.6756691366488846, - "learning_rate": 4.996778293046141e-05, - "loss": 0.7205, + "epoch": 0.09179960817240414, + "grad_norm": 0.4649806874138297, + "learning_rate": 6.100746268656716e-05, + "loss": 0.7142, "step": 328 }, { - "epoch": 0.04604618614415675, - "grad_norm": 0.5016052820529759, - "learning_rate": 4.996720518369764e-05, - "loss": 0.6912, + "epoch": 0.0920794850265883, + "grad_norm": 0.43028372538351295, + "learning_rate": 6.119402985074628e-05, + "loss": 0.689, "step": 329 }, { - "epoch": 0.04618614415675298, - "grad_norm": 0.5201008063930296, - "learning_rate": 4.996662230591989e-05, - "loss": 0.7341, + "epoch": 0.09235936188077246, + "grad_norm": 0.4558325684400419, + "learning_rate": 6.138059701492538e-05, + "loss": 0.6912, "step": 330 }, { - "epoch": 0.046326102169349195, - "grad_norm": 0.567491502617686, - "learning_rate": 4.996603429724793e-05, - "loss": 0.7365, + "epoch": 0.09263923873495662, + "grad_norm": 0.4381458483310585, + "learning_rate": 6.156716417910448e-05, + "loss": 0.6814, "step": 331 }, { - "epoch": 0.046466060181945414, - "grad_norm": 0.5449583987974782, - "learning_rate": 4.996544115780261e-05, - "loss": 0.7075, + "epoch": 0.09291911558914077, + "grad_norm": 0.406646618290278, + "learning_rate": 6.175373134328358e-05, + "loss": 0.6712, "step": 332 }, { - "epoch": 0.04660601819454164, - "grad_norm": 0.5492024616709544, - "learning_rate": 4.996484288770582e-05, - "loss": 0.7896, + "epoch": 0.09319899244332494, + "grad_norm": 0.4980160165116508, + "learning_rate": 6.194029850746269e-05, + "loss": 0.7163, "step": 333 }, { - "epoch": 0.04674597620713786, - "grad_norm": 0.5076286169172769, - "learning_rate": 4.996423948708051e-05, - "loss": 0.7394, + "epoch": 0.09347886929750909, + "grad_norm": 0.4574980195335612, + "learning_rate": 6.21268656716418e-05, + "loss": 0.6838, "step": 334 }, { - "epoch": 0.04688593421973408, - "grad_norm": 0.550162273407423, - "learning_rate": 4.996363095605069e-05, - "loss": 0.7049, + "epoch": 0.09375874615169326, + "grad_norm": 0.44414868273725466, + "learning_rate": 6.23134328358209e-05, + "loss": 0.682, "step": 335 }, { - "epoch": 0.0470258922323303, - "grad_norm": 0.6075217637750909, - "learning_rate": 4.9963017294741407e-05, - "loss": 0.656, + "epoch": 0.09403862300587741, + "grad_norm": 0.4248528405896676, + "learning_rate": 6.25e-05, + "loss": 0.6904, "step": 336 }, { - "epoch": 0.04716585024492652, - "grad_norm": 0.5456579494737005, - "learning_rate": 4.99623985032788e-05, - "loss": 0.6872, + "epoch": 0.09431849986006158, + "grad_norm": 0.4919839023335616, + "learning_rate": 6.268656716417911e-05, + "loss": 0.6897, "step": 337 }, { - "epoch": 0.047305808257522744, - "grad_norm": 0.661719363460849, - "learning_rate": 4.996177458179001e-05, - "loss": 0.7, + "epoch": 0.09459837671424573, + "grad_norm": 1.3161816435263172, + "learning_rate": 6.28731343283582e-05, + "loss": 0.6755, "step": 338 }, { - "epoch": 0.04744576627011896, - "grad_norm": 0.6073637233523712, - "learning_rate": 4.996114553040328e-05, - "loss": 0.7034, + "epoch": 0.0948782535684299, + "grad_norm": 0.513774926789864, + "learning_rate": 6.305970149253731e-05, + "loss": 0.6984, "step": 339 }, { - "epoch": 0.04758572428271519, - "grad_norm": 0.627821201647191, - "learning_rate": 4.996051134924786e-05, - "loss": 0.6996, + "epoch": 0.09515813042261405, + "grad_norm": 0.47769900312879693, + "learning_rate": 6.324626865671642e-05, + "loss": 0.6736, "step": 340 }, { - "epoch": 0.047725682295311406, - "grad_norm": 0.5557264017341775, - "learning_rate": 4.99598720384541e-05, - "loss": 0.7071, + "epoch": 0.09543800727679821, + "grad_norm": 0.5872335086531077, + "learning_rate": 6.343283582089553e-05, + "loss": 0.6924, "step": 341 }, { - "epoch": 0.04786564030790763, - "grad_norm": 0.5980687216959104, - "learning_rate": 4.995922759815339e-05, - "loss": 0.7101, + "epoch": 0.09571788413098237, + "grad_norm": 0.6685685565941328, + "learning_rate": 6.361940298507463e-05, + "loss": 0.7139, "step": 342 }, { - "epoch": 0.04800559832050385, - "grad_norm": 0.6227145204853264, - "learning_rate": 4.995857802847816e-05, - "loss": 0.7248, + "epoch": 0.09599776098516653, + "grad_norm": 0.644943432147974, + "learning_rate": 6.380597014925374e-05, + "loss": 0.6985, "step": 343 }, { - "epoch": 0.04814555633310007, - "grad_norm": 0.9190740737396743, - "learning_rate": 4.9957923329561907e-05, - "loss": 0.709, + "epoch": 0.09627763783935069, + "grad_norm": 0.8158341804464563, + "learning_rate": 6.399253731343285e-05, + "loss": 0.6909, "step": 344 }, { - "epoch": 0.04828551434569629, - "grad_norm": 0.5382237071595833, - "learning_rate": 4.9957263501539174e-05, - "loss": 0.6756, + "epoch": 0.09655751469353484, + "grad_norm": 0.442615963515534, + "learning_rate": 6.417910447761194e-05, + "loss": 0.6766, "step": 345 }, { - "epoch": 0.04842547235829251, - "grad_norm": 0.7390793234122391, - "learning_rate": 4.9956598544545566e-05, - "loss": 0.7184, + "epoch": 0.096837391547719, + "grad_norm": 0.4410297475354073, + "learning_rate": 6.436567164179105e-05, + "loss": 0.6973, "step": 346 }, { - "epoch": 0.048565430370888736, - "grad_norm": 0.5896661276064007, - "learning_rate": 4.9955928458717723e-05, - "loss": 0.7022, + "epoch": 0.09711726840190316, + "grad_norm": 0.7799315559914135, + "learning_rate": 6.455223880597016e-05, + "loss": 0.6911, "step": 347 }, { - "epoch": 0.048705388383484954, - "grad_norm": 0.5284578098126113, - "learning_rate": 4.9955253244193375e-05, - "loss": 0.7238, + "epoch": 0.09739714525608732, + "grad_norm": 0.8056737357080603, + "learning_rate": 6.473880597014925e-05, + "loss": 0.6887, "step": 348 }, { - "epoch": 0.04884534639608117, - "grad_norm": 0.5539013371779543, - "learning_rate": 4.9954572901111286e-05, - "loss": 0.693, + "epoch": 0.09767702211027147, + "grad_norm": 4.298578143710853, + "learning_rate": 6.492537313432836e-05, + "loss": 0.6869, "step": 349 }, { - "epoch": 0.0489853044086774, - "grad_norm": 0.7749019171598228, - "learning_rate": 4.9953887429611256e-05, - "loss": 0.6873, + "epoch": 0.09795689896445564, + "grad_norm": 0.8435981133020648, + "learning_rate": 6.511194029850747e-05, + "loss": 0.723, "step": 350 }, { - "epoch": 0.049125262421273616, - "grad_norm": 0.5416373773112516, - "learning_rate": 4.995319682983418e-05, - "loss": 0.7418, + "epoch": 0.0982367758186398, + "grad_norm": 0.7371117969719428, + "learning_rate": 6.529850746268657e-05, + "loss": 0.6868, "step": 351 }, { - "epoch": 0.04926522043386984, - "grad_norm": 0.5962345555842343, - "learning_rate": 4.995250110192195e-05, - "loss": 0.7048, + "epoch": 0.09851665267282396, + "grad_norm": 0.6164397349080035, + "learning_rate": 6.548507462686567e-05, + "loss": 0.6882, "step": 352 }, { - "epoch": 0.04940517844646606, - "grad_norm": 0.6171440633433287, - "learning_rate": 4.995180024601758e-05, - "loss": 0.719, + "epoch": 0.09879652952700811, + "grad_norm": 0.5317239294299322, + "learning_rate": 6.567164179104478e-05, + "loss": 0.6873, "step": 353 }, { - "epoch": 0.049545136459062285, - "grad_norm": 0.5613359206707593, - "learning_rate": 4.995109426226508e-05, - "loss": 0.7374, + "epoch": 0.09907640638119228, + "grad_norm": 0.8761702871109263, + "learning_rate": 6.585820895522388e-05, + "loss": 0.6833, "step": 354 }, { - "epoch": 0.0496850944716585, - "grad_norm": 0.5557877380798822, - "learning_rate": 4.995038315080954e-05, - "loss": 0.7149, + "epoch": 0.09935628323537643, + "grad_norm": 0.4582754633466804, + "learning_rate": 6.604477611940298e-05, + "loss": 0.7004, "step": 355 }, { - "epoch": 0.04982505248425472, - "grad_norm": 0.5681067506858839, - "learning_rate": 4.994966691179711e-05, - "loss": 0.74, + "epoch": 0.0996361600895606, + "grad_norm": 0.447018180737809, + "learning_rate": 6.62313432835821e-05, + "loss": 0.7102, "step": 356 }, { - "epoch": 0.049965010496850946, - "grad_norm": 0.5671390578619896, - "learning_rate": 4.994894554537498e-05, - "loss": 0.743, + "epoch": 0.09991603694374475, + "grad_norm": 0.6708485391162204, + "learning_rate": 6.64179104477612e-05, + "loss": 0.7077, "step": 357 }, { - "epoch": 0.050104968509447165, - "grad_norm": 0.5040150906914191, - "learning_rate": 4.9948219051691394e-05, - "loss": 0.7411, + "epoch": 0.10019591379792891, + "grad_norm": 0.7193088718027209, + "learning_rate": 6.66044776119403e-05, + "loss": 0.7, "step": 358 }, { - "epoch": 0.05024492652204339, - "grad_norm": 0.5000433753838786, - "learning_rate": 4.994748743089566e-05, - "loss": 0.709, + "epoch": 0.10047579065211307, + "grad_norm": 0.5130518893188671, + "learning_rate": 6.679104477611941e-05, + "loss": 0.7128, "step": 359 }, { - "epoch": 0.05038488453463961, - "grad_norm": 0.5669390690482634, - "learning_rate": 4.9946750683138134e-05, - "loss": 0.7157, + "epoch": 0.10075566750629723, + "grad_norm": 0.4730820378689476, + "learning_rate": 6.697761194029852e-05, + "loss": 0.668, "step": 360 }, { - "epoch": 0.050524842547235826, - "grad_norm": 0.5348574517172033, - "learning_rate": 4.994600880857022e-05, - "loss": 0.6982, + "epoch": 0.10103554436048139, + "grad_norm": 0.5984318381359485, + "learning_rate": 6.716417910447762e-05, + "loss": 0.7275, "step": 361 }, { - "epoch": 0.05066480055983205, - "grad_norm": 0.5519610052382385, - "learning_rate": 4.9945261807344376e-05, - "loss": 0.701, + "epoch": 0.10131542121466555, + "grad_norm": 1.3919482365808233, + "learning_rate": 6.735074626865672e-05, + "loss": 0.6905, "step": 362 }, { - "epoch": 0.05080475857242827, - "grad_norm": 0.5267779469247078, - "learning_rate": 4.994450967961413e-05, - "loss": 0.7164, + "epoch": 0.1015952980688497, + "grad_norm": 0.6069392215504623, + "learning_rate": 6.753731343283583e-05, + "loss": 0.6927, "step": 363 }, { - "epoch": 0.050944716585024495, - "grad_norm": 0.5168765165006579, - "learning_rate": 4.994375242553405e-05, - "loss": 0.7321, + "epoch": 0.10187517492303387, + "grad_norm": 0.49002026414282235, + "learning_rate": 6.772388059701493e-05, + "loss": 0.7032, "step": 364 }, { - "epoch": 0.05108467459762071, - "grad_norm": 0.6298046212170411, - "learning_rate": 4.994299004525975e-05, - "loss": 0.6847, + "epoch": 0.10215505177721802, + "grad_norm": 0.45719536473500955, + "learning_rate": 6.791044776119403e-05, + "loss": 0.6808, "step": 365 }, { - "epoch": 0.05122463261021693, - "grad_norm": 0.557578502569354, - "learning_rate": 4.994222253894791e-05, - "loss": 0.7152, + "epoch": 0.10243492863140219, + "grad_norm": 0.48445489023836774, + "learning_rate": 6.809701492537313e-05, + "loss": 0.6811, "step": 366 }, { - "epoch": 0.05136459062281316, - "grad_norm": 0.5335254526570578, - "learning_rate": 4.994144990675627e-05, - "loss": 0.7247, + "epoch": 0.10271480548558634, + "grad_norm": 0.466557292066106, + "learning_rate": 6.828358208955224e-05, + "loss": 0.6586, "step": 367 }, { - "epoch": 0.051504548635409375, - "grad_norm": 0.5400373413925982, - "learning_rate": 4.99406721488436e-05, - "loss": 0.6837, + "epoch": 0.10299468233977051, + "grad_norm": 0.4717880558758552, + "learning_rate": 6.847014925373134e-05, + "loss": 0.672, "step": 368 }, { - "epoch": 0.0516445066480056, - "grad_norm": 0.4935375590306368, - "learning_rate": 4.993988926536975e-05, - "loss": 0.6861, + "epoch": 0.10327455919395466, + "grad_norm": 0.4877821397302723, + "learning_rate": 6.865671641791044e-05, + "loss": 0.6826, "step": 369 }, { - "epoch": 0.05178446466060182, - "grad_norm": 0.6275855125475005, - "learning_rate": 4.993910125649561e-05, - "loss": 0.6829, + "epoch": 0.10355443604813881, + "grad_norm": 0.55713406216083, + "learning_rate": 6.884328358208955e-05, + "loss": 0.6796, "step": 370 }, { - "epoch": 0.051924422673198044, - "grad_norm": 1.019947219371485, - "learning_rate": 4.993830812238311e-05, - "loss": 0.7568, + "epoch": 0.10383431290232298, + "grad_norm": 0.47581625927326593, + "learning_rate": 6.902985074626866e-05, + "loss": 0.6714, "step": 371 }, { - "epoch": 0.05206438068579426, - "grad_norm": 0.5311851634325208, - "learning_rate": 4.9937509863195256e-05, - "loss": 0.6854, + "epoch": 0.10411418975650713, + "grad_norm": 0.43257191406789336, + "learning_rate": 6.921641791044777e-05, + "loss": 0.6863, "step": 372 }, { - "epoch": 0.05220433869839048, - "grad_norm": 0.5202083034030205, - "learning_rate": 4.993670647909611e-05, - "loss": 0.7297, + "epoch": 0.1043940666106913, + "grad_norm": 0.4437395414763435, + "learning_rate": 6.940298507462687e-05, + "loss": 0.6635, "step": 373 }, { - "epoch": 0.052344296710986706, - "grad_norm": 0.5639019664915923, - "learning_rate": 4.9935897970250745e-05, - "loss": 0.7471, + "epoch": 0.10467394346487545, + "grad_norm": 0.7068875446240742, + "learning_rate": 6.958955223880598e-05, + "loss": 0.6806, "step": 374 }, { - "epoch": 0.052484254723582924, - "grad_norm": 0.5482982154303914, - "learning_rate": 4.993508433682535e-05, - "loss": 0.6802, + "epoch": 0.10495382031905962, + "grad_norm": 0.44911225867373566, + "learning_rate": 6.977611940298508e-05, + "loss": 0.6571, "step": 375 }, { - "epoch": 0.05262421273617915, - "grad_norm": 0.576536365667398, - "learning_rate": 4.993426557898711e-05, - "loss": 0.6996, + "epoch": 0.10523369717324377, + "grad_norm": 0.46333664540903235, + "learning_rate": 6.996268656716418e-05, + "loss": 0.6974, "step": 376 }, { - "epoch": 0.05276417074877537, - "grad_norm": 0.4997211624617034, - "learning_rate": 4.993344169690431e-05, - "loss": 0.6907, + "epoch": 0.10551357402742793, + "grad_norm": 0.5054230893447995, + "learning_rate": 7.014925373134329e-05, + "loss": 0.6998, "step": 377 }, { - "epoch": 0.052904128761371585, - "grad_norm": 0.4948774132141246, - "learning_rate": 4.993261269074625e-05, - "loss": 0.6869, + "epoch": 0.10579345088161209, + "grad_norm": 0.4490793398584244, + "learning_rate": 7.033582089552238e-05, + "loss": 0.7085, "step": 378 }, { - "epoch": 0.05304408677396781, - "grad_norm": 0.49620741460648987, - "learning_rate": 4.9931778560683304e-05, - "loss": 0.7045, + "epoch": 0.10607332773579625, + "grad_norm": 0.4290140309326081, + "learning_rate": 7.052238805970149e-05, + "loss": 0.7181, "step": 379 }, { - "epoch": 0.05318404478656403, - "grad_norm": 0.5687949523084871, - "learning_rate": 4.99309393068869e-05, - "loss": 0.6876, + "epoch": 0.1063532045899804, + "grad_norm": 0.45362497264391827, + "learning_rate": 7.07089552238806e-05, + "loss": 0.691, "step": 380 }, { - "epoch": 0.053324002799160254, - "grad_norm": 1.3449476384226542, - "learning_rate": 4.9930094929529506e-05, - "loss": 0.6932, + "epoch": 0.10663308144416457, + "grad_norm": 0.42874926552817305, + "learning_rate": 7.089552238805971e-05, + "loss": 0.6493, "step": 381 }, { - "epoch": 0.05346396081175647, - "grad_norm": 0.5421720343729313, - "learning_rate": 4.992924542878465e-05, - "loss": 0.6636, + "epoch": 0.10691295829834872, + "grad_norm": 0.47237696937944235, + "learning_rate": 7.10820895522388e-05, + "loss": 0.6987, "step": 382 }, { - "epoch": 0.0536039188243527, - "grad_norm": 0.5574373007750801, - "learning_rate": 4.9928390804826916e-05, - "loss": 0.7171, + "epoch": 0.10719283515253289, + "grad_norm": 0.38855172325103726, + "learning_rate": 7.126865671641791e-05, + "loss": 0.6977, "step": 383 }, { - "epoch": 0.053743876836948916, - "grad_norm": 0.5165807349158404, - "learning_rate": 4.992753105783194e-05, - "loss": 0.6979, + "epoch": 0.10747271200671704, + "grad_norm": 0.44911430898712273, + "learning_rate": 7.145522388059702e-05, + "loss": 0.6957, "step": 384 }, { - "epoch": 0.053883834849545134, - "grad_norm": 0.5207307553346301, - "learning_rate": 4.99266661879764e-05, - "loss": 0.6768, + "epoch": 0.10775258886090121, + "grad_norm": 0.4280200749942071, + "learning_rate": 7.164179104477612e-05, + "loss": 0.681, "step": 385 }, { - "epoch": 0.05402379286214136, - "grad_norm": 0.7774982969038746, - "learning_rate": 4.9925796195438044e-05, - "loss": 0.6667, + "epoch": 0.10803246571508536, + "grad_norm": 0.41670607217895367, + "learning_rate": 7.182835820895523e-05, + "loss": 0.6779, "step": 386 }, { - "epoch": 0.05416375087473758, - "grad_norm": 0.7031427692671837, - "learning_rate": 4.992492108039566e-05, - "loss": 0.7051, + "epoch": 0.10831234256926953, + "grad_norm": 0.4409124040164888, + "learning_rate": 7.201492537313434e-05, + "loss": 0.6851, "step": 387 }, { - "epoch": 0.0543037088873338, - "grad_norm": 0.5217628043633614, - "learning_rate": 4.99240408430291e-05, - "loss": 0.6427, + "epoch": 0.10859221942345368, + "grad_norm": 0.3960561625846798, + "learning_rate": 7.220149253731343e-05, + "loss": 0.7148, "step": 388 }, { - "epoch": 0.05444366689993002, - "grad_norm": 0.8333810612085015, - "learning_rate": 4.992315548351925e-05, - "loss": 0.7245, + "epoch": 0.10887209627763784, + "grad_norm": 0.41019388499923165, + "learning_rate": 7.238805970149254e-05, + "loss": 0.6924, "step": 389 }, { - "epoch": 0.05458362491252624, - "grad_norm": 0.629384361310055, - "learning_rate": 4.992226500204808e-05, - "loss": 0.6932, + "epoch": 0.109151973131822, + "grad_norm": 0.4144681082109547, + "learning_rate": 7.257462686567165e-05, + "loss": 0.6862, "step": 390 }, { - "epoch": 0.054723582925122465, - "grad_norm": 1.215348456810311, - "learning_rate": 4.992136939879856e-05, - "loss": 0.6962, + "epoch": 0.10943184998600616, + "grad_norm": 1.6774478100169978, + "learning_rate": 7.276119402985076e-05, + "loss": 0.6802, "step": 391 }, { - "epoch": 0.05486354093771868, - "grad_norm": 0.5807472413206434, - "learning_rate": 4.992046867395478e-05, - "loss": 0.7176, + "epoch": 0.10971172684019032, + "grad_norm": 0.3987937834141996, + "learning_rate": 7.294776119402985e-05, + "loss": 0.6487, "step": 392 }, { - "epoch": 0.05500349895031491, - "grad_norm": 0.5439919445778956, - "learning_rate": 4.9919562827701824e-05, - "loss": 0.716, + "epoch": 0.10999160369437448, + "grad_norm": 0.4296763054463155, + "learning_rate": 7.313432835820896e-05, + "loss": 0.682, "step": 393 }, { - "epoch": 0.055143456962911126, - "grad_norm": 0.5602331962806683, - "learning_rate": 4.9918651860225864e-05, - "loss": 0.6776, + "epoch": 0.11027148054855863, + "grad_norm": 0.4249814109394962, + "learning_rate": 7.332089552238807e-05, + "loss": 0.641, "step": 394 }, { - "epoch": 0.055283414975507345, - "grad_norm": 0.522684609371295, - "learning_rate": 4.9917735771714114e-05, - "loss": 0.6646, + "epoch": 0.11055135740274279, + "grad_norm": 0.5429804546291697, + "learning_rate": 7.350746268656716e-05, + "loss": 0.6468, "step": 395 }, { - "epoch": 0.05542337298810357, - "grad_norm": 0.6622073701270422, - "learning_rate": 4.991681456235483e-05, - "loss": 0.6714, + "epoch": 0.11083123425692695, + "grad_norm": 0.4093190306279177, + "learning_rate": 7.369402985074627e-05, + "loss": 0.6743, "step": 396 }, { - "epoch": 0.05556333100069979, - "grad_norm": 0.6427418148993167, - "learning_rate": 4.991588823233735e-05, - "loss": 0.7056, + "epoch": 0.1111111111111111, + "grad_norm": 0.7033065138757928, + "learning_rate": 7.388059701492537e-05, + "loss": 0.6988, "step": 397 }, { - "epoch": 0.05570328901329601, - "grad_norm": 0.5725098170561623, - "learning_rate": 4.991495678185202e-05, - "loss": 0.6672, + "epoch": 0.11139098796529527, + "grad_norm": 0.5570947590054259, + "learning_rate": 7.406716417910447e-05, + "loss": 0.7029, "step": 398 }, { - "epoch": 0.05584324702589223, - "grad_norm": 0.5585281717216761, - "learning_rate": 4.991402021109027e-05, - "loss": 0.7064, + "epoch": 0.11167086481947942, + "grad_norm": 0.40859828053308395, + "learning_rate": 7.425373134328359e-05, + "loss": 0.668, "step": 399 }, { - "epoch": 0.05598320503848846, - "grad_norm": 1.7745368005124815, - "learning_rate": 4.991307852024458e-05, - "loss": 0.7246, + "epoch": 0.11195074167366359, + "grad_norm": 0.37920101805001655, + "learning_rate": 7.44402985074627e-05, + "loss": 0.6638, "step": 400 }, { - "epoch": 0.056123163051084675, - "grad_norm": 0.5282756777124027, - "learning_rate": 4.991213170950848e-05, - "loss": 0.6971, + "epoch": 0.11223061852784774, + "grad_norm": 0.4606413966283373, + "learning_rate": 7.46268656716418e-05, + "loss": 0.6705, "step": 401 }, { - "epoch": 0.05626312106368089, - "grad_norm": 0.7218574920206103, - "learning_rate": 4.9911179779076544e-05, - "loss": 0.7173, + "epoch": 0.11251049538203191, + "grad_norm": 0.4117466938030516, + "learning_rate": 7.48134328358209e-05, + "loss": 0.6503, "step": 402 }, { - "epoch": 0.05640307907627712, - "grad_norm": 0.5451854604938866, - "learning_rate": 4.99102227291444e-05, - "loss": 0.7146, + "epoch": 0.11279037223621606, + "grad_norm": 0.41733242064764725, + "learning_rate": 7.500000000000001e-05, + "loss": 0.6972, "step": 403 }, { - "epoch": 0.05654303708887334, - "grad_norm": 0.5389963653898296, - "learning_rate": 4.990926055990873e-05, - "loss": 0.6745, + "epoch": 0.11307024909040023, + "grad_norm": 0.44048880975860893, + "learning_rate": 7.518656716417911e-05, + "loss": 0.6473, "step": 404 }, { - "epoch": 0.05668299510146956, - "grad_norm": 0.5101195028542534, - "learning_rate": 4.9908293271567286e-05, - "loss": 0.7158, + "epoch": 0.11335012594458438, + "grad_norm": 0.4093052594431773, + "learning_rate": 7.537313432835821e-05, + "loss": 0.6748, "step": 405 }, { - "epoch": 0.05682295311406578, - "grad_norm": 0.5324266228237025, - "learning_rate": 4.990732086431884e-05, - "loss": 0.6766, + "epoch": 0.11363000279876855, + "grad_norm": 0.422883608137739, + "learning_rate": 7.555970149253732e-05, + "loss": 0.6962, "step": 406 }, { - "epoch": 0.056962911126662, - "grad_norm": 0.5056404230280876, - "learning_rate": 4.990634333836324e-05, - "loss": 0.6859, + "epoch": 0.1139098796529527, + "grad_norm": 0.4622367714089799, + "learning_rate": 7.574626865671642e-05, + "loss": 0.6444, "step": 407 }, { - "epoch": 0.057102869139258224, - "grad_norm": 0.553015466117812, - "learning_rate": 4.990536069390136e-05, - "loss": 0.7476, + "epoch": 0.11418975650713686, + "grad_norm": 0.4179728359996031, + "learning_rate": 7.593283582089553e-05, + "loss": 0.6751, "step": 408 }, { - "epoch": 0.05724282715185444, - "grad_norm": 0.6468042464648912, - "learning_rate": 4.9904372931135167e-05, - "loss": 0.6926, + "epoch": 0.11446963336132102, + "grad_norm": 0.4081264989680072, + "learning_rate": 7.611940298507463e-05, + "loss": 0.6379, "step": 409 }, { - "epoch": 0.05738278516445067, - "grad_norm": 0.5169803890591222, - "learning_rate": 4.990338005026764e-05, - "loss": 0.6948, + "epoch": 0.11474951021550518, + "grad_norm": 0.39085981520536767, + "learning_rate": 7.630597014925373e-05, + "loss": 0.6688, "step": 410 }, { - "epoch": 0.057522743177046885, - "grad_norm": 0.7826976195286862, - "learning_rate": 4.990238205150284e-05, - "loss": 0.693, + "epoch": 0.11502938706968933, + "grad_norm": 0.43031742568193565, + "learning_rate": 7.649253731343284e-05, + "loss": 0.6894, "step": 411 }, { - "epoch": 0.05766270118964311, - "grad_norm": 0.5898380237885452, - "learning_rate": 4.990137893504585e-05, - "loss": 0.7025, + "epoch": 0.1153092639238735, + "grad_norm": 0.4389085433902972, + "learning_rate": 7.667910447761193e-05, + "loss": 0.6433, "step": 412 }, { - "epoch": 0.05780265920223933, - "grad_norm": 0.6044914970349515, - "learning_rate": 4.990037070110283e-05, - "loss": 0.6642, + "epoch": 0.11558914077805765, + "grad_norm": 0.42220390576147654, + "learning_rate": 7.686567164179104e-05, + "loss": 0.6718, "step": 413 }, { - "epoch": 0.05794261721483555, - "grad_norm": 0.6647502597764803, - "learning_rate": 4.989935734988098e-05, - "loss": 0.7141, + "epoch": 0.11586901763224182, + "grad_norm": 0.41024362930644254, + "learning_rate": 7.705223880597015e-05, + "loss": 0.6668, "step": 414 }, { - "epoch": 0.05808257522743177, - "grad_norm": 0.578032490799264, - "learning_rate": 4.989833888158856e-05, - "loss": 0.7525, + "epoch": 0.11614889448642597, + "grad_norm": 0.41757579668923, + "learning_rate": 7.723880597014926e-05, + "loss": 0.6848, "step": 415 }, { - "epoch": 0.05822253324002799, - "grad_norm": 0.5446056431076586, - "learning_rate": 4.989731529643486e-05, - "loss": 0.7152, + "epoch": 0.11642877134061014, + "grad_norm": 0.4231612312524277, + "learning_rate": 7.742537313432837e-05, + "loss": 0.6994, "step": 416 }, { - "epoch": 0.058362491252624216, - "grad_norm": 0.5520058248905833, - "learning_rate": 4.9896286594630255e-05, - "loss": 0.7104, + "epoch": 0.11670864819479429, + "grad_norm": 0.39796462051489095, + "learning_rate": 7.761194029850747e-05, + "loss": 0.6587, "step": 417 }, { - "epoch": 0.058502449265220434, - "grad_norm": 0.5358722455002061, - "learning_rate": 4.989525277638614e-05, - "loss": 0.6877, + "epoch": 0.11698852504897846, + "grad_norm": 0.445462589657519, + "learning_rate": 7.779850746268658e-05, + "loss": 0.655, "step": 418 }, { - "epoch": 0.05864240727781665, - "grad_norm": 0.5211574683086443, - "learning_rate": 4.989421384191499e-05, - "loss": 0.7235, + "epoch": 0.11726840190316261, + "grad_norm": 0.45089633882777275, + "learning_rate": 7.798507462686567e-05, + "loss": 0.6638, "step": 419 }, { - "epoch": 0.05878236529041288, - "grad_norm": 0.5620604980498491, - "learning_rate": 4.98931697914303e-05, - "loss": 0.6579, + "epoch": 0.11754827875734676, + "grad_norm": 0.42457883121608503, + "learning_rate": 7.817164179104478e-05, + "loss": 0.6377, "step": 420 }, { - "epoch": 0.058922323303009096, - "grad_norm": 0.5591196420226805, - "learning_rate": 4.989212062514664e-05, - "loss": 0.6848, + "epoch": 0.11782815561153093, + "grad_norm": 0.43788212692058714, + "learning_rate": 7.835820895522389e-05, + "loss": 0.6706, "step": 421 }, { - "epoch": 0.05906228131560532, - "grad_norm": 0.593542823435001, - "learning_rate": 4.989106634327963e-05, - "loss": 0.6769, + "epoch": 0.11810803246571508, + "grad_norm": 0.408158060527127, + "learning_rate": 7.854477611940298e-05, + "loss": 0.6743, "step": 422 }, { - "epoch": 0.05920223932820154, - "grad_norm": 0.4993385226927883, - "learning_rate": 4.989000694604593e-05, - "loss": 0.7045, + "epoch": 0.11838790931989925, + "grad_norm": 0.40421527642634947, + "learning_rate": 7.873134328358209e-05, + "loss": 0.6906, "step": 423 }, { - "epoch": 0.05934219734079776, - "grad_norm": 0.8610343379419788, - "learning_rate": 4.9888942433663255e-05, - "loss": 0.6875, + "epoch": 0.1186677861740834, + "grad_norm": 0.5528953780977632, + "learning_rate": 7.89179104477612e-05, + "loss": 0.6959, "step": 424 }, { - "epoch": 0.05948215535339398, - "grad_norm": 0.48976014126417133, - "learning_rate": 4.988787280635038e-05, - "loss": 0.7148, + "epoch": 0.11894766302826756, + "grad_norm": 0.403342469392748, + "learning_rate": 7.910447761194029e-05, + "loss": 0.6871, "step": 425 }, { - "epoch": 0.0596221133659902, - "grad_norm": 0.4952300036598977, - "learning_rate": 4.988679806432712e-05, - "loss": 0.6947, + "epoch": 0.11922753988245172, + "grad_norm": 0.41377648915254395, + "learning_rate": 7.92910447761194e-05, + "loss": 0.6691, "step": 426 }, { - "epoch": 0.059762071378586426, - "grad_norm": 0.46474336456699533, - "learning_rate": 4.9885718207814335e-05, - "loss": 0.6794, + "epoch": 0.11950741673663588, + "grad_norm": 0.39062244201193275, + "learning_rate": 7.947761194029851e-05, + "loss": 0.6646, "step": 427 }, { - "epoch": 0.059902029391182644, - "grad_norm": 0.5147006168979844, - "learning_rate": 4.988463323703397e-05, - "loss": 0.6711, + "epoch": 0.11978729359082003, + "grad_norm": 0.38607174947535255, + "learning_rate": 7.966417910447762e-05, + "loss": 0.6848, "step": 428 }, { - "epoch": 0.06004198740377887, - "grad_norm": 0.637107834123775, - "learning_rate": 4.988354315220898e-05, - "loss": 0.6715, + "epoch": 0.1200671704450042, + "grad_norm": 0.4270778478137736, + "learning_rate": 7.985074626865672e-05, + "loss": 0.6699, "step": 429 }, { - "epoch": 0.06018194541637509, - "grad_norm": 0.5511914486791841, - "learning_rate": 4.988244795356339e-05, - "loss": 0.649, + "epoch": 0.12034704729918835, + "grad_norm": 0.4313322992886548, + "learning_rate": 8.003731343283583e-05, + "loss": 0.6771, "step": 430 }, { - "epoch": 0.060321903428971306, - "grad_norm": 0.4830610595446907, - "learning_rate": 4.9881347641322277e-05, - "loss": 0.6591, + "epoch": 0.12062692415337252, + "grad_norm": 0.40835846468892506, + "learning_rate": 8.022388059701494e-05, + "loss": 0.667, "step": 431 }, { - "epoch": 0.06046186144156753, - "grad_norm": 0.5705178740705275, - "learning_rate": 4.988024221571177e-05, - "loss": 0.6686, + "epoch": 0.12090680100755667, + "grad_norm": 0.42088882100934216, + "learning_rate": 8.041044776119403e-05, + "loss": 0.6475, "step": 432 }, { - "epoch": 0.06060181945416375, - "grad_norm": 0.7215515180250442, - "learning_rate": 4.987913167695904e-05, - "loss": 0.7433, + "epoch": 0.12118667786174084, + "grad_norm": 0.6464518775485, + "learning_rate": 8.059701492537314e-05, + "loss": 0.6888, "step": 433 }, { - "epoch": 0.060741777466759975, - "grad_norm": 0.7060924726627863, - "learning_rate": 4.9878016025292305e-05, - "loss": 0.6936, + "epoch": 0.12146655471592499, + "grad_norm": 0.4125284299211265, + "learning_rate": 8.078358208955225e-05, + "loss": 0.6227, "step": 434 }, { - "epoch": 0.06088173547935619, - "grad_norm": 0.5381947781056515, - "learning_rate": 4.987689526094087e-05, - "loss": 0.6751, + "epoch": 0.12174643157010916, + "grad_norm": 0.47989278107702377, + "learning_rate": 8.097014925373134e-05, + "loss": 0.6936, "step": 435 }, { - "epoch": 0.06102169349195241, - "grad_norm": 0.4984642961815355, - "learning_rate": 4.987576938413504e-05, - "loss": 0.6551, + "epoch": 0.12202630842429331, + "grad_norm": 0.4261772840242414, + "learning_rate": 8.115671641791045e-05, + "loss": 0.676, "step": 436 }, { - "epoch": 0.061161651504548636, - "grad_norm": 0.6995103632612955, - "learning_rate": 4.98746383951062e-05, - "loss": 0.6852, + "epoch": 0.12230618527847748, + "grad_norm": 0.4066754280472226, + "learning_rate": 8.134328358208956e-05, + "loss": 0.6876, "step": 437 }, { - "epoch": 0.061301609517144855, - "grad_norm": 0.5321506444103643, - "learning_rate": 4.9873502294086785e-05, - "loss": 0.7047, + "epoch": 0.12258606213266163, + "grad_norm": 0.43891426861460775, + "learning_rate": 8.152985074626866e-05, + "loss": 0.6616, "step": 438 }, { - "epoch": 0.06144156752974108, - "grad_norm": 0.5373668549791867, - "learning_rate": 4.987236108131026e-05, - "loss": 0.6476, + "epoch": 0.1228659389868458, + "grad_norm": 0.3676904720946436, + "learning_rate": 8.171641791044776e-05, + "loss": 0.6609, "step": 439 }, { - "epoch": 0.0615815255423373, - "grad_norm": 0.5531880070215052, - "learning_rate": 4.9871214757011176e-05, - "loss": 0.6695, + "epoch": 0.12314581584102995, + "grad_norm": 0.3892126784524761, + "learning_rate": 8.190298507462687e-05, + "loss": 0.6846, "step": 440 }, { - "epoch": 0.06172148355493352, - "grad_norm": 0.5377302222338248, - "learning_rate": 4.9870063321425105e-05, - "loss": 0.6522, + "epoch": 0.12342569269521411, + "grad_norm": 0.38049231210876644, + "learning_rate": 8.208955223880597e-05, + "loss": 0.6585, "step": 441 }, { - "epoch": 0.06186144156752974, - "grad_norm": 0.4885102130170266, - "learning_rate": 4.986890677478867e-05, - "loss": 0.6693, + "epoch": 0.12370556954939826, + "grad_norm": 0.37520534882657697, + "learning_rate": 8.227611940298508e-05, + "loss": 0.6606, "step": 442 }, { - "epoch": 0.06200139958012596, - "grad_norm": 0.5111003799643065, - "learning_rate": 4.986774511733957e-05, - "loss": 0.6578, + "epoch": 0.12398544640358243, + "grad_norm": 0.39513205718163796, + "learning_rate": 8.246268656716419e-05, + "loss": 0.6729, "step": 443 }, { - "epoch": 0.062141357592722185, - "grad_norm": 0.45757845171169315, - "learning_rate": 4.986657834931653e-05, - "loss": 0.701, + "epoch": 0.12426532325776658, + "grad_norm": 0.42998721974753285, + "learning_rate": 8.26492537313433e-05, + "loss": 0.6704, "step": 444 }, { - "epoch": 0.0622813156053184, - "grad_norm": 0.6187591152576597, - "learning_rate": 4.986540647095933e-05, - "loss": 0.671, + "epoch": 0.12454520011195074, + "grad_norm": 0.408931714944995, + "learning_rate": 8.283582089552239e-05, + "loss": 0.6596, "step": 445 }, { - "epoch": 0.06242127361791463, - "grad_norm": 0.5054481649682342, - "learning_rate": 4.9864229482508804e-05, - "loss": 0.7186, + "epoch": 0.1248250769661349, + "grad_norm": 0.43411264954711126, + "learning_rate": 8.30223880597015e-05, + "loss": 0.6563, "step": 446 }, { - "epoch": 0.06256123163051085, - "grad_norm": 0.5370871184063281, - "learning_rate": 4.9863047384206835e-05, - "loss": 0.6889, + "epoch": 0.12510495382031905, + "grad_norm": 0.40133533031153473, + "learning_rate": 8.32089552238806e-05, + "loss": 0.6767, "step": 447 }, { - "epoch": 0.06270118964310707, - "grad_norm": 0.5046075184933217, - "learning_rate": 4.986186017629636e-05, - "loss": 0.6968, + "epoch": 0.1253848306745032, + "grad_norm": 0.40872477830181203, + "learning_rate": 8.339552238805971e-05, + "loss": 0.6775, "step": 448 }, { - "epoch": 0.06284114765570328, - "grad_norm": 0.5017707016018389, - "learning_rate": 4.986066785902136e-05, - "loss": 0.7472, + "epoch": 0.12566470752868739, + "grad_norm": 0.3925395199407519, + "learning_rate": 8.358208955223881e-05, + "loss": 0.6504, "step": 449 }, { - "epoch": 0.06298110566829951, - "grad_norm": 0.488992352862207, - "learning_rate": 4.985947043262686e-05, - "loss": 0.6916, + "epoch": 0.12594458438287154, + "grad_norm": 0.39825635877986537, + "learning_rate": 8.376865671641791e-05, + "loss": 0.6556, "step": 450 }, { - "epoch": 0.06312106368089573, - "grad_norm": 0.5153973631708659, - "learning_rate": 4.9858267897358956e-05, - "loss": 0.7105, + "epoch": 0.1262244612370557, + "grad_norm": 0.4107531606961135, + "learning_rate": 8.395522388059702e-05, + "loss": 0.6698, "step": 451 }, { - "epoch": 0.06326102169349196, - "grad_norm": 0.5351052897634555, - "learning_rate": 4.985706025346477e-05, - "loss": 0.7257, + "epoch": 0.12650433809123984, + "grad_norm": 0.41710025849144117, + "learning_rate": 8.414179104477612e-05, + "loss": 0.6702, "step": 452 }, { - "epoch": 0.06340097970608817, - "grad_norm": 0.5382494705510661, - "learning_rate": 4.98558475011925e-05, - "loss": 0.6587, + "epoch": 0.12678421494542402, + "grad_norm": 0.3833524739876693, + "learning_rate": 8.432835820895522e-05, + "loss": 0.6424, "step": 453 }, { - "epoch": 0.0635409377186844, - "grad_norm": 0.5164087894936398, - "learning_rate": 4.985462964079137e-05, - "loss": 0.6732, + "epoch": 0.12706409179960818, + "grad_norm": 0.37471559013898337, + "learning_rate": 8.451492537313433e-05, + "loss": 0.684, "step": 454 }, { - "epoch": 0.06368089573128062, - "grad_norm": 0.5018878284351228, - "learning_rate": 4.985340667251166e-05, - "loss": 0.6436, + "epoch": 0.12734396865379233, + "grad_norm": 0.41349519361632053, + "learning_rate": 8.470149253731343e-05, + "loss": 0.7108, "step": 455 }, { - "epoch": 0.06382085374387683, - "grad_norm": 0.6387432517988891, - "learning_rate": 4.9852178596604705e-05, - "loss": 0.6874, + "epoch": 0.12762384550797648, + "grad_norm": 0.38043562798558783, + "learning_rate": 8.488805970149253e-05, + "loss": 0.6782, "step": 456 }, { - "epoch": 0.06396081175647306, - "grad_norm": 0.525361815670854, - "learning_rate": 4.985094541332288e-05, - "loss": 0.6899, + "epoch": 0.12790372236216066, + "grad_norm": 0.3630384514666315, + "learning_rate": 8.507462686567164e-05, + "loss": 0.6387, "step": 457 }, { - "epoch": 0.06410076976906928, - "grad_norm": 0.5252701657432063, - "learning_rate": 4.984970712291963e-05, - "loss": 0.6788, + "epoch": 0.1281835992163448, + "grad_norm": 0.39964441377131393, + "learning_rate": 8.526119402985075e-05, + "loss": 0.654, "step": 458 }, { - "epoch": 0.0642407277816655, - "grad_norm": 0.5268887795524961, - "learning_rate": 4.984846372564943e-05, - "loss": 0.6915, + "epoch": 0.12846347607052896, + "grad_norm": 0.4171728266265153, + "learning_rate": 8.544776119402986e-05, + "loss": 0.6777, "step": 459 }, { - "epoch": 0.06438068579426172, - "grad_norm": 0.5762015280618955, - "learning_rate": 4.9847215221767815e-05, - "loss": 0.7023, + "epoch": 0.12874335292471312, + "grad_norm": 0.40790236781406547, + "learning_rate": 8.563432835820896e-05, + "loss": 0.6853, "step": 460 }, { - "epoch": 0.06452064380685794, - "grad_norm": 0.5228367157993173, - "learning_rate": 4.984596161153136e-05, - "loss": 0.7088, + "epoch": 0.1290232297788973, + "grad_norm": 0.4110683142830724, + "learning_rate": 8.582089552238807e-05, + "loss": 0.6642, "step": 461 }, { - "epoch": 0.06466060181945417, - "grad_norm": 0.5157276830814348, - "learning_rate": 4.984470289519769e-05, - "loss": 0.7164, + "epoch": 0.12930310663308145, + "grad_norm": 0.43782266552108556, + "learning_rate": 8.600746268656717e-05, + "loss": 0.6753, "step": 462 }, { - "epoch": 0.06480055983205038, - "grad_norm": 0.5405166337013064, - "learning_rate": 4.9843439073025486e-05, - "loss": 0.7111, + "epoch": 0.1295829834872656, + "grad_norm": 0.3972085783903419, + "learning_rate": 8.619402985074627e-05, + "loss": 0.681, "step": 463 }, { - "epoch": 0.0649405178446466, - "grad_norm": 0.5812948989732744, - "learning_rate": 4.984217014527449e-05, - "loss": 0.7043, + "epoch": 0.12986286034144975, + "grad_norm": 0.44630647399233214, + "learning_rate": 8.638059701492538e-05, + "loss": 0.668, "step": 464 }, { - "epoch": 0.06508047585724283, - "grad_norm": 0.49548874056024383, - "learning_rate": 4.984089611220547e-05, - "loss": 0.6428, + "epoch": 0.13014273719563393, + "grad_norm": 0.3731598467477508, + "learning_rate": 8.656716417910447e-05, + "loss": 0.638, "step": 465 }, { - "epoch": 0.06522043386983904, - "grad_norm": 0.5197419283605628, - "learning_rate": 4.9839616974080246e-05, - "loss": 0.7102, + "epoch": 0.1304226140498181, + "grad_norm": 0.6582886918597636, + "learning_rate": 8.675373134328358e-05, + "loss": 0.6697, "step": 466 }, { - "epoch": 0.06536039188243527, - "grad_norm": 0.6462028524249613, - "learning_rate": 4.9838332731161694e-05, - "loss": 0.664, + "epoch": 0.13070249090400224, + "grad_norm": 0.6873071146539492, + "learning_rate": 8.694029850746269e-05, + "loss": 0.6664, "step": 467 }, { - "epoch": 0.06550034989503149, - "grad_norm": 0.4883003485107369, - "learning_rate": 4.9837043383713753e-05, - "loss": 0.6494, + "epoch": 0.1309823677581864, + "grad_norm": 0.3831571859149306, + "learning_rate": 8.71268656716418e-05, + "loss": 0.6593, "step": 468 }, { - "epoch": 0.06564030790762772, - "grad_norm": 0.510996545798241, - "learning_rate": 4.983574893200139e-05, - "loss": 0.6762, + "epoch": 0.13126224461237054, + "grad_norm": 0.35676734914188696, + "learning_rate": 8.731343283582089e-05, + "loss": 0.6584, "step": 469 }, { - "epoch": 0.06578026592022393, - "grad_norm": 0.5132612521238953, - "learning_rate": 4.9834449376290625e-05, - "loss": 0.6646, + "epoch": 0.13154212146655472, + "grad_norm": 0.37186744217198814, + "learning_rate": 8.75e-05, + "loss": 0.6843, "step": 470 }, { - "epoch": 0.06592022393282015, - "grad_norm": 0.5184693525295171, - "learning_rate": 4.983314471684853e-05, - "loss": 0.6709, + "epoch": 0.13182199832073888, + "grad_norm": 0.390404389872634, + "learning_rate": 8.76865671641791e-05, + "loss": 0.6483, "step": 471 }, { - "epoch": 0.06606018194541638, - "grad_norm": 0.48530849719789637, - "learning_rate": 4.9831834953943236e-05, - "loss": 0.6798, + "epoch": 0.13210187517492303, + "grad_norm": 0.3738856376207189, + "learning_rate": 8.787313432835821e-05, + "loss": 0.6551, "step": 472 }, { - "epoch": 0.06620013995801259, - "grad_norm": 0.5169999057943723, - "learning_rate": 4.9830520087843894e-05, - "loss": 0.6949, + "epoch": 0.13238175202910718, + "grad_norm": 0.48004224555935937, + "learning_rate": 8.805970149253732e-05, + "loss": 0.6493, "step": 473 }, { - "epoch": 0.06634009797060882, - "grad_norm": 0.563401180326856, - "learning_rate": 4.982920011882074e-05, - "loss": 0.7226, + "epoch": 0.13266162888329136, + "grad_norm": 0.3736785443142544, + "learning_rate": 8.824626865671643e-05, + "loss": 0.6643, "step": 474 }, { - "epoch": 0.06648005598320504, - "grad_norm": 0.5359095901003381, - "learning_rate": 4.982787504714503e-05, - "loss": 0.6834, + "epoch": 0.1329415057374755, + "grad_norm": 0.36882550460560304, + "learning_rate": 8.843283582089554e-05, + "loss": 0.6903, "step": 475 }, { - "epoch": 0.06662001399580125, - "grad_norm": 0.7237453631577152, - "learning_rate": 4.982654487308908e-05, - "loss": 0.6794, + "epoch": 0.13322138259165967, + "grad_norm": 0.40272658663853805, + "learning_rate": 8.861940298507463e-05, + "loss": 0.6539, "step": 476 }, { - "epoch": 0.06675997200839748, - "grad_norm": 0.5209505752877471, - "learning_rate": 4.982520959692626e-05, - "loss": 0.6533, + "epoch": 0.13350125944584382, + "grad_norm": 0.35544347325052605, + "learning_rate": 8.880597014925374e-05, + "loss": 0.6493, "step": 477 }, { - "epoch": 0.0668999300209937, - "grad_norm": 0.5103023397815947, - "learning_rate": 4.982386921893098e-05, - "loss": 0.6633, + "epoch": 0.133781136300028, + "grad_norm": 0.3614647777559769, + "learning_rate": 8.899253731343285e-05, + "loss": 0.6763, "step": 478 }, { - "epoch": 0.06703988803358993, - "grad_norm": 0.5060683439354244, - "learning_rate": 4.98225237393787e-05, - "loss": 0.6369, + "epoch": 0.13406101315421215, + "grad_norm": 0.36971957161130825, + "learning_rate": 8.917910447761194e-05, + "loss": 0.6904, "step": 479 }, { - "epoch": 0.06717984604618614, - "grad_norm": 0.5296294502024173, - "learning_rate": 4.9821173158545936e-05, - "loss": 0.6394, + "epoch": 0.1343408900083963, + "grad_norm": 0.38740279571359765, + "learning_rate": 8.936567164179105e-05, + "loss": 0.6316, "step": 480 }, { - "epoch": 0.06731980405878236, - "grad_norm": 0.5333731273318062, - "learning_rate": 4.981981747671024e-05, - "loss": 0.6792, + "epoch": 0.13462076686258045, + "grad_norm": 0.3746089452612046, + "learning_rate": 8.955223880597016e-05, + "loss": 0.6775, "step": 481 }, { - "epoch": 0.06745976207137859, - "grad_norm": 0.5350960162399097, - "learning_rate": 4.981845669415022e-05, - "loss": 0.6378, + "epoch": 0.13490064371676463, + "grad_norm": 0.4096573183041176, + "learning_rate": 8.973880597014925e-05, + "loss": 0.6331, "step": 482 }, { - "epoch": 0.0675997200839748, - "grad_norm": 0.4966051310703327, - "learning_rate": 4.9817090811145524e-05, - "loss": 0.6664, + "epoch": 0.1351805205709488, + "grad_norm": 0.39578042975326794, + "learning_rate": 8.992537313432836e-05, + "loss": 0.6527, "step": 483 }, { - "epoch": 0.06773967809657103, - "grad_norm": 0.5117571057914638, - "learning_rate": 4.9815719827976864e-05, - "loss": 0.674, + "epoch": 0.13546039742513294, + "grad_norm": 0.3750647028289509, + "learning_rate": 9.011194029850746e-05, + "loss": 0.6282, "step": 484 }, { - "epoch": 0.06787963610916725, - "grad_norm": 0.5325656306807487, - "learning_rate": 4.9814343744925984e-05, - "loss": 0.7459, + "epoch": 0.1357402742793171, + "grad_norm": 0.3787020948866338, + "learning_rate": 9.029850746268657e-05, + "loss": 0.6571, "step": 485 }, { - "epoch": 0.06801959412176348, - "grad_norm": 0.5363735435396061, - "learning_rate": 4.981296256227569e-05, - "loss": 0.6356, + "epoch": 0.13602015113350127, + "grad_norm": 0.4113950094446887, + "learning_rate": 9.048507462686568e-05, + "loss": 0.6331, "step": 486 }, { - "epoch": 0.06815955213435969, - "grad_norm": 0.5880213444397324, - "learning_rate": 4.981157628030984e-05, - "loss": 0.7152, + "epoch": 0.13630002798768542, + "grad_norm": 1.0715952785604606, + "learning_rate": 9.067164179104479e-05, + "loss": 0.6272, "step": 487 }, { - "epoch": 0.06829951014695591, - "grad_norm": 0.5622620490135608, - "learning_rate": 4.9810184899313294e-05, - "loss": 0.6904, + "epoch": 0.13657990484186958, + "grad_norm": 0.3983710454128697, + "learning_rate": 9.08582089552239e-05, + "loss": 0.6616, "step": 488 }, { - "epoch": 0.06843946815955214, - "grad_norm": 0.5387407294746895, - "learning_rate": 4.980878841957203e-05, - "loss": 0.6417, + "epoch": 0.13685978169605373, + "grad_norm": 0.4495974542727547, + "learning_rate": 9.104477611940299e-05, + "loss": 0.6623, "step": 489 }, { - "epoch": 0.06857942617214835, - "grad_norm": 0.5810694092783647, - "learning_rate": 4.9807386841373014e-05, - "loss": 0.7077, + "epoch": 0.1371396585502379, + "grad_norm": 0.36281685504496297, + "learning_rate": 9.12313432835821e-05, + "loss": 0.6636, "step": 490 }, { - "epoch": 0.06871938418474458, - "grad_norm": 0.5468672188592382, - "learning_rate": 4.9805980165004304e-05, - "loss": 0.6831, + "epoch": 0.13741953540442206, + "grad_norm": 0.353930752331264, + "learning_rate": 9.14179104477612e-05, + "loss": 0.6545, "step": 491 }, { - "epoch": 0.0688593421973408, - "grad_norm": 0.5009452260143993, - "learning_rate": 4.9804568390754974e-05, - "loss": 0.6618, + "epoch": 0.1376994122586062, + "grad_norm": 0.37500739942877725, + "learning_rate": 9.16044776119403e-05, + "loss": 0.6569, "step": 492 }, { - "epoch": 0.06899930020993703, - "grad_norm": 0.5198589285355217, - "learning_rate": 4.980315151891516e-05, - "loss": 0.7322, + "epoch": 0.13797928911279037, + "grad_norm": 0.3820874791181939, + "learning_rate": 9.17910447761194e-05, + "loss": 0.6458, "step": 493 }, { - "epoch": 0.06913925822253324, - "grad_norm": 0.5255753639976765, - "learning_rate": 4.980172954977605e-05, - "loss": 0.667, + "epoch": 0.13825916596697452, + "grad_norm": 0.400717183706268, + "learning_rate": 9.197761194029851e-05, + "loss": 0.6473, "step": 494 }, { - "epoch": 0.06927921623512946, - "grad_norm": 0.514064283357064, - "learning_rate": 4.980030248362987e-05, - "loss": 0.7107, + "epoch": 0.1385390428211587, + "grad_norm": 0.4015144031719116, + "learning_rate": 9.216417910447762e-05, + "loss": 0.6177, "step": 495 }, { - "epoch": 0.06941917424772569, - "grad_norm": 0.5532838372693374, - "learning_rate": 4.9798870320769886e-05, - "loss": 0.6811, + "epoch": 0.13881891967534285, + "grad_norm": 0.7088085383546942, + "learning_rate": 9.235074626865672e-05, + "loss": 0.6584, "step": 496 }, { - "epoch": 0.0695591322603219, - "grad_norm": 0.602307520111878, - "learning_rate": 4.9797433061490434e-05, - "loss": 0.655, + "epoch": 0.139098796529527, + "grad_norm": 1.0178752109753382, + "learning_rate": 9.253731343283582e-05, + "loss": 0.6813, "step": 497 }, { - "epoch": 0.06969909027291812, - "grad_norm": 1.1081876267570516, - "learning_rate": 4.979599070608688e-05, - "loss": 0.7107, + "epoch": 0.13937867338371115, + "grad_norm": 0.5102057156876113, + "learning_rate": 9.272388059701493e-05, + "loss": 0.6592, "step": 498 }, { - "epoch": 0.06983904828551435, - "grad_norm": 0.529722398927933, - "learning_rate": 4.979454325485565e-05, - "loss": 0.7186, + "epoch": 0.13965855023789533, + "grad_norm": 2.7074502386451083, + "learning_rate": 9.291044776119402e-05, + "loss": 0.6909, "step": 499 }, { - "epoch": 0.06997900629811056, - "grad_norm": 0.688076194733635, - "learning_rate": 4.97930907080942e-05, - "loss": 0.6966, + "epoch": 0.1399384270920795, + "grad_norm": 0.5944929804172767, + "learning_rate": 9.309701492537313e-05, + "loss": 0.6575, "step": 500 }, { - "epoch": 0.07011896431070679, - "grad_norm": 0.5241126214650118, - "learning_rate": 4.979163306610105e-05, - "loss": 0.6648, + "epoch": 0.14021830394626364, + "grad_norm": 1.8487977049802586, + "learning_rate": 9.328358208955224e-05, + "loss": 0.6701, "step": 501 }, { - "epoch": 0.07025892232330301, - "grad_norm": 0.6545221839203675, - "learning_rate": 4.9790170329175754e-05, - "loss": 0.6634, + "epoch": 0.1404981808004478, + "grad_norm": 1.1173366320876923, + "learning_rate": 9.347014925373135e-05, + "loss": 0.6922, "step": 502 }, { - "epoch": 0.07039888033589924, - "grad_norm": 0.5292755961917179, - "learning_rate": 4.978870249761893e-05, - "loss": 0.6554, + "epoch": 0.14077805765463197, + "grad_norm": 0.4780147783751096, + "learning_rate": 9.365671641791045e-05, + "loss": 0.6613, "step": 503 }, { - "epoch": 0.07053883834849545, - "grad_norm": 0.5071567724521334, - "learning_rate": 4.978722957173222e-05, - "loss": 0.6741, + "epoch": 0.14105793450881612, + "grad_norm": 0.35430385583757923, + "learning_rate": 9.384328358208956e-05, + "loss": 0.675, "step": 504 }, { - "epoch": 0.07067879636109167, - "grad_norm": 0.6987987933626845, - "learning_rate": 4.9785751551818336e-05, - "loss": 0.6845, + "epoch": 0.14133781136300028, + "grad_norm": 0.48386478637752267, + "learning_rate": 9.402985074626867e-05, + "loss": 0.6585, "step": 505 }, { - "epoch": 0.0708187543736879, - "grad_norm": 0.6505466788008574, - "learning_rate": 4.9784268438181016e-05, - "loss": 0.6632, + "epoch": 0.14161768821718443, + "grad_norm": 0.38156271567770533, + "learning_rate": 9.421641791044776e-05, + "loss": 0.6599, "step": 506 }, { - "epoch": 0.07095871238628411, - "grad_norm": 0.5780869067311343, - "learning_rate": 4.978278023112506e-05, - "loss": 0.7101, + "epoch": 0.1418975650713686, + "grad_norm": 0.3788471218505237, + "learning_rate": 9.440298507462687e-05, + "loss": 0.6478, "step": 507 }, { - "epoch": 0.07109867039888033, - "grad_norm": 0.5668906286656329, - "learning_rate": 4.978128693095632e-05, - "loss": 0.6727, + "epoch": 0.14217744192555276, + "grad_norm": 0.35070799994627283, + "learning_rate": 9.458955223880598e-05, + "loss": 0.6221, "step": 508 }, { - "epoch": 0.07123862841147656, - "grad_norm": 0.5631385798045004, - "learning_rate": 4.977978853798166e-05, - "loss": 0.6936, + "epoch": 0.1424573187797369, + "grad_norm": 0.3801612235838529, + "learning_rate": 9.477611940298507e-05, + "loss": 0.6298, "step": 509 }, { - "epoch": 0.07137858642407278, - "grad_norm": 0.563836937443064, - "learning_rate": 4.977828505250903e-05, - "loss": 0.6646, + "epoch": 0.14273719563392107, + "grad_norm": 0.375699707803378, + "learning_rate": 9.496268656716418e-05, + "loss": 0.6601, "step": 510 }, { - "epoch": 0.071518544436669, - "grad_norm": 0.49771256088584315, - "learning_rate": 4.977677647484741e-05, - "loss": 0.6543, + "epoch": 0.14301707248810525, + "grad_norm": 0.3453134958214971, + "learning_rate": 9.514925373134329e-05, + "loss": 0.6312, "step": 511 }, { - "epoch": 0.07165850244926522, - "grad_norm": 0.813746974710708, - "learning_rate": 4.977526280530684e-05, - "loss": 0.6579, + "epoch": 0.1432969493422894, + "grad_norm": 0.5285573505383262, + "learning_rate": 9.533582089552238e-05, + "loss": 0.648, "step": 512 }, { - "epoch": 0.07179846046186145, - "grad_norm": 0.6705561238466075, - "learning_rate": 4.977374404419837e-05, - "loss": 0.6773, + "epoch": 0.14357682619647355, + "grad_norm": 0.36405062708691716, + "learning_rate": 9.552238805970149e-05, + "loss": 0.648, "step": 513 }, { - "epoch": 0.07193841847445766, - "grad_norm": 0.6410722179382549, - "learning_rate": 4.977222019183414e-05, - "loss": 0.7036, + "epoch": 0.1438567030506577, + "grad_norm": 0.3693638605210676, + "learning_rate": 9.57089552238806e-05, + "loss": 0.6903, "step": 514 }, { - "epoch": 0.07207837648705388, - "grad_norm": 0.49081412125581536, - "learning_rate": 4.977069124852731e-05, - "loss": 0.6944, + "epoch": 0.14413657990484188, + "grad_norm": 0.40528808147040785, + "learning_rate": 9.58955223880597e-05, + "loss": 0.6474, "step": 515 }, { - "epoch": 0.07221833449965011, - "grad_norm": 4.072584572211915, - "learning_rate": 4.976915721459209e-05, - "loss": 0.6678, + "epoch": 0.14441645675902604, + "grad_norm": 0.4271064032840139, + "learning_rate": 9.608208955223881e-05, + "loss": 0.6617, "step": 516 }, { - "epoch": 0.07235829251224632, - "grad_norm": 0.5026288646761555, - "learning_rate": 4.9767618090343745e-05, - "loss": 0.6564, + "epoch": 0.1446963336132102, + "grad_norm": 0.3704408892050665, + "learning_rate": 9.626865671641792e-05, + "loss": 0.6527, "step": 517 }, { - "epoch": 0.07249825052484254, - "grad_norm": 0.5165366931023583, - "learning_rate": 4.976607387609858e-05, - "loss": 0.6766, + "epoch": 0.14497621046739434, + "grad_norm": 0.36416815625544374, + "learning_rate": 9.645522388059703e-05, + "loss": 0.6775, "step": 518 }, { - "epoch": 0.07263820853743877, - "grad_norm": 0.5086677754272171, - "learning_rate": 4.976452457217394e-05, - "loss": 0.6468, + "epoch": 0.1452560873215785, + "grad_norm": 0.37214211322290497, + "learning_rate": 9.664179104477612e-05, + "loss": 0.6693, "step": 519 }, { - "epoch": 0.072778166550035, - "grad_norm": 0.5430265340871115, - "learning_rate": 4.976297017888824e-05, - "loss": 0.6543, + "epoch": 0.14553596417576267, + "grad_norm": 0.34813096434431123, + "learning_rate": 9.682835820895523e-05, + "loss": 0.6438, "step": 520 }, { - "epoch": 0.0729181245626312, - "grad_norm": 0.5436880201922443, - "learning_rate": 4.976141069656091e-05, - "loss": 0.6712, + "epoch": 0.14581584102994682, + "grad_norm": 0.37853947367790136, + "learning_rate": 9.701492537313434e-05, + "loss": 0.6482, "step": 521 }, { - "epoch": 0.07305808257522743, - "grad_norm": 0.5178999941373105, - "learning_rate": 4.975984612551243e-05, - "loss": 0.6676, + "epoch": 0.14609571788413098, + "grad_norm": 0.36937575889952384, + "learning_rate": 9.720149253731343e-05, + "loss": 0.672, "step": 522 }, { - "epoch": 0.07319804058782366, - "grad_norm": 0.5814901447061906, - "learning_rate": 4.975827646606436e-05, - "loss": 0.6959, + "epoch": 0.14637559473831513, + "grad_norm": 0.3584928313519803, + "learning_rate": 9.738805970149254e-05, + "loss": 0.6744, "step": 523 }, { - "epoch": 0.07333799860041987, - "grad_norm": 0.5577926798263833, - "learning_rate": 4.975670171853926e-05, - "loss": 0.6691, + "epoch": 0.1466554715924993, + "grad_norm": 0.34468056884846726, + "learning_rate": 9.757462686567165e-05, + "loss": 0.6451, "step": 524 }, { - "epoch": 0.0734779566130161, - "grad_norm": 0.5368192741447849, - "learning_rate": 4.975512188326077e-05, - "loss": 0.7067, + "epoch": 0.14693534844668346, + "grad_norm": 0.3467550680805756, + "learning_rate": 9.776119402985075e-05, + "loss": 0.6327, "step": 525 }, { - "epoch": 0.07361791462561232, - "grad_norm": 0.8965061390425338, - "learning_rate": 4.9753536960553545e-05, - "loss": 0.6601, + "epoch": 0.1472152253008676, + "grad_norm": 0.393307955817238, + "learning_rate": 9.794776119402985e-05, + "loss": 0.6529, "step": 526 }, { - "epoch": 0.07375787263820854, - "grad_norm": 0.8372680909100517, - "learning_rate": 4.975194695074333e-05, - "loss": 0.7079, + "epoch": 0.14749510215505177, + "grad_norm": 0.3706696868767734, + "learning_rate": 9.813432835820896e-05, + "loss": 0.6944, "step": 527 }, { - "epoch": 0.07389783065080475, - "grad_norm": 0.610185754465145, - "learning_rate": 4.9750351854156864e-05, - "loss": 0.6345, + "epoch": 0.14777497900923595, + "grad_norm": 0.35440153514955575, + "learning_rate": 9.832089552238806e-05, + "loss": 0.6627, "step": 528 }, { - "epoch": 0.07403778866340098, - "grad_norm": 0.5906986023369948, - "learning_rate": 4.9748751671121964e-05, - "loss": 0.6875, + "epoch": 0.1480548558634201, + "grad_norm": 0.35011677077474185, + "learning_rate": 9.850746268656717e-05, + "loss": 0.6446, "step": 529 }, { - "epoch": 0.0741777466759972, - "grad_norm": 0.5382974215866725, - "learning_rate": 4.9747146401967484e-05, - "loss": 0.6763, + "epoch": 0.14833473271760425, + "grad_norm": 0.353375329903762, + "learning_rate": 9.869402985074628e-05, + "loss": 0.6804, "step": 530 }, { - "epoch": 0.07431770468859342, - "grad_norm": 0.5118350667595736, - "learning_rate": 4.9745536047023324e-05, - "loss": 0.693, + "epoch": 0.1486146095717884, + "grad_norm": 0.35084665576786195, + "learning_rate": 9.888059701492539e-05, + "loss": 0.662, "step": 531 }, { - "epoch": 0.07445766270118964, - "grad_norm": 0.5342169754955468, - "learning_rate": 4.974392060662042e-05, - "loss": 0.6564, + "epoch": 0.14889448642597258, + "grad_norm": 0.3447708979608197, + "learning_rate": 9.906716417910448e-05, + "loss": 0.6669, "step": 532 }, { - "epoch": 0.07459762071378587, - "grad_norm": 0.494630398936876, - "learning_rate": 4.9742300081090774e-05, - "loss": 0.651, + "epoch": 0.14917436328015674, + "grad_norm": 0.35527684031521845, + "learning_rate": 9.925373134328359e-05, + "loss": 0.6684, "step": 533 }, { - "epoch": 0.07473757872638208, - "grad_norm": 0.5020465590373978, - "learning_rate": 4.974067447076742e-05, - "loss": 0.6924, + "epoch": 0.1494542401343409, + "grad_norm": 0.3610149287886694, + "learning_rate": 9.94402985074627e-05, + "loss": 0.6629, "step": 534 }, { - "epoch": 0.0748775367389783, - "grad_norm": 0.4949817767414362, - "learning_rate": 4.973904377598443e-05, - "loss": 0.6757, + "epoch": 0.14973411698852504, + "grad_norm": 0.32069706799707914, + "learning_rate": 9.96268656716418e-05, + "loss": 0.6494, "step": 535 }, { - "epoch": 0.07501749475157453, - "grad_norm": 0.503752121503341, - "learning_rate": 4.973740799707692e-05, - "loss": 0.6316, + "epoch": 0.15001399384270922, + "grad_norm": 0.3528379810923722, + "learning_rate": 9.98134328358209e-05, + "loss": 0.6755, "step": 536 }, { - "epoch": 0.07515745276417075, - "grad_norm": 0.5134153985614953, - "learning_rate": 4.973576713438108e-05, - "loss": 0.6768, + "epoch": 0.15029387069689337, + "grad_norm": 0.352761540563393, + "learning_rate": 0.0001, + "loss": 0.6744, "step": 537 }, { - "epoch": 0.07529741077676697, - "grad_norm": 0.5293943650054048, - "learning_rate": 4.973412118823412e-05, - "loss": 0.6915, + "epoch": 0.15057374755107752, + "grad_norm": 0.3192361239700303, + "learning_rate": 9.999999762048602e-05, + "loss": 0.6382, "step": 538 }, { - "epoch": 0.07543736878936319, - "grad_norm": 0.5248241753853723, - "learning_rate": 4.973247015897428e-05, - "loss": 0.6878, + "epoch": 0.15085362440526168, + "grad_norm": 0.35496003453929414, + "learning_rate": 9.999999048194425e-05, + "loss": 0.6568, "step": 539 }, { - "epoch": 0.07557732680195942, - "grad_norm": 0.536619020426942, - "learning_rate": 4.973081404694088e-05, - "loss": 0.6913, + "epoch": 0.15113350125944586, + "grad_norm": 0.35732688372914906, + "learning_rate": 9.999997858437541e-05, + "loss": 0.6835, "step": 540 }, { - "epoch": 0.07571728481455563, - "grad_norm": 0.4893689470456731, - "learning_rate": 4.972915285247426e-05, - "loss": 0.6593, + "epoch": 0.15141337811363, + "grad_norm": 0.3459422957498091, + "learning_rate": 9.999996192778065e-05, + "loss": 0.671, "step": 541 }, { - "epoch": 0.07585724282715185, - "grad_norm": 0.5456811185536261, - "learning_rate": 4.9727486575915823e-05, - "loss": 0.6797, + "epoch": 0.15169325496781416, + "grad_norm": 0.3629900969756224, + "learning_rate": 9.999994051216151e-05, + "loss": 0.6527, "step": 542 }, { - "epoch": 0.07599720083974808, - "grad_norm": 0.547854073554072, - "learning_rate": 4.9725815217607994e-05, - "loss": 0.6729, + "epoch": 0.15197313182199831, + "grad_norm": 0.3359649863697183, + "learning_rate": 9.999991433752003e-05, + "loss": 0.6276, "step": 543 }, { - "epoch": 0.0761371588523443, - "grad_norm": 0.5192786810194786, - "learning_rate": 4.972413877789426e-05, - "loss": 0.6241, + "epoch": 0.15225300867618247, + "grad_norm": 0.33179698726550116, + "learning_rate": 9.999988340385873e-05, + "loss": 0.6717, "step": 544 }, { - "epoch": 0.07627711686494051, - "grad_norm": 0.47716140290292464, - "learning_rate": 4.972245725711914e-05, - "loss": 0.662, + "epoch": 0.15253288553036665, + "grad_norm": 0.3387592133630175, + "learning_rate": 9.999984771118054e-05, + "loss": 0.6677, "step": 545 }, { - "epoch": 0.07641707487753674, - "grad_norm": 0.5273241129689946, - "learning_rate": 4.972077065562821e-05, - "loss": 0.6603, + "epoch": 0.1528127623845508, + "grad_norm": 0.3949036193320457, + "learning_rate": 9.999980725948886e-05, + "loss": 0.6305, "step": 546 }, { - "epoch": 0.07655703289013296, - "grad_norm": 0.5220860206770274, - "learning_rate": 4.971907897376809e-05, - "loss": 0.6862, + "epoch": 0.15309263923873495, + "grad_norm": 0.3296071177268487, + "learning_rate": 9.999976204878753e-05, + "loss": 0.6535, "step": 547 }, { - "epoch": 0.07669699090272918, - "grad_norm": 0.4954377301463657, - "learning_rate": 4.971738221188643e-05, - "loss": 0.6357, + "epoch": 0.1533725160929191, + "grad_norm": 0.3035639208800312, + "learning_rate": 9.999971207908087e-05, + "loss": 0.6459, "step": 548 }, { - "epoch": 0.0768369489153254, - "grad_norm": 0.5031579615121048, - "learning_rate": 4.9715680370331926e-05, - "loss": 0.7242, + "epoch": 0.15365239294710328, + "grad_norm": 0.34793917616897585, + "learning_rate": 9.999965735037364e-05, + "loss": 0.6555, "step": 549 }, { - "epoch": 0.07697690692792163, - "grad_norm": 0.4883815277737791, - "learning_rate": 4.9713973449454335e-05, - "loss": 0.6764, + "epoch": 0.15393226980128744, + "grad_norm": 0.33941641898613595, + "learning_rate": 9.999959786267103e-05, + "loss": 0.6534, "step": 550 }, { - "epoch": 0.07711686494051785, - "grad_norm": 0.5324244397343785, - "learning_rate": 4.971226144960443e-05, - "loss": 0.7038, + "epoch": 0.1542121466554716, + "grad_norm": 0.35110231378849793, + "learning_rate": 9.99995336159787e-05, + "loss": 0.6612, "step": 551 }, { - "epoch": 0.07725682295311406, - "grad_norm": 0.5072266809492816, - "learning_rate": 4.971054437113406e-05, - "loss": 0.6778, + "epoch": 0.15449202350965574, + "grad_norm": 0.3272549474936474, + "learning_rate": 9.999946461030279e-05, + "loss": 0.6331, "step": 552 }, { - "epoch": 0.07739678096571029, - "grad_norm": 0.48810600146086575, - "learning_rate": 4.97088222143961e-05, - "loss": 0.6719, + "epoch": 0.15477190036383992, + "grad_norm": 0.3343686163686602, + "learning_rate": 9.999939084564985e-05, + "loss": 0.6498, "step": 553 }, { - "epoch": 0.07753673897830651, - "grad_norm": 0.5236886736407549, - "learning_rate": 4.970709497974447e-05, - "loss": 0.6472, + "epoch": 0.15505177721802407, + "grad_norm": 0.3449979088086382, + "learning_rate": 9.999931232202689e-05, + "loss": 0.6539, "step": 554 }, { - "epoch": 0.07767669699090272, - "grad_norm": 0.521109484700154, - "learning_rate": 4.9705362667534126e-05, - "loss": 0.637, + "epoch": 0.15533165407220823, + "grad_norm": 0.3447312307894874, + "learning_rate": 9.999922903944139e-05, + "loss": 0.6201, "step": 555 }, { - "epoch": 0.07781665500349895, - "grad_norm": 0.4825990181972682, - "learning_rate": 4.970362527812109e-05, - "loss": 0.6906, + "epoch": 0.15561153092639238, + "grad_norm": 0.31637501434858883, + "learning_rate": 9.99991409979013e-05, + "loss": 0.6073, "step": 556 }, { - "epoch": 0.07795661301609517, - "grad_norm": 0.5261802281784332, - "learning_rate": 4.970188281186241e-05, - "loss": 0.6585, + "epoch": 0.15589140778057656, + "grad_norm": 0.3316495181989054, + "learning_rate": 9.999904819741499e-05, + "loss": 0.6347, "step": 557 }, { - "epoch": 0.07809657102869139, - "grad_norm": 0.4913754461237493, - "learning_rate": 4.970013526911617e-05, - "loss": 0.6868, + "epoch": 0.1561712846347607, + "grad_norm": 0.3407282822121997, + "learning_rate": 9.999895063799127e-05, + "loss": 0.6241, "step": 558 }, { - "epoch": 0.07823652904128761, - "grad_norm": 0.5026456765855343, - "learning_rate": 4.969838265024151e-05, - "loss": 0.7251, + "epoch": 0.15645116148894486, + "grad_norm": 0.3418343751138157, + "learning_rate": 9.999884831963946e-05, + "loss": 0.6738, "step": 559 }, { - "epoch": 0.07837648705388384, - "grad_norm": 0.500311336929775, - "learning_rate": 4.969662495559862e-05, - "loss": 0.6589, + "epoch": 0.15673103834312901, + "grad_norm": 0.35359280098762447, + "learning_rate": 9.999874124236927e-05, + "loss": 0.6493, "step": 560 }, { - "epoch": 0.07851644506648006, - "grad_norm": 0.5085930979997164, - "learning_rate": 4.969486218554871e-05, - "loss": 0.6596, + "epoch": 0.1570109151973132, + "grad_norm": 0.3444280324334195, + "learning_rate": 9.99986294061909e-05, + "loss": 0.639, "step": 561 }, { - "epoch": 0.07865640307907627, - "grad_norm": 0.5160633817299983, - "learning_rate": 4.9693094340454055e-05, - "loss": 0.6414, + "epoch": 0.15729079205149735, + "grad_norm": 0.33392447648348894, + "learning_rate": 9.999851281111501e-05, + "loss": 0.6583, "step": 562 }, { - "epoch": 0.0787963610916725, - "grad_norm": 0.5037304772878324, - "learning_rate": 4.969132142067797e-05, - "loss": 0.6766, + "epoch": 0.1575706689056815, + "grad_norm": 0.339319192915043, + "learning_rate": 9.999839145715269e-05, + "loss": 0.6578, "step": 563 }, { - "epoch": 0.07893631910426872, - "grad_norm": 0.4848388430973453, - "learning_rate": 4.96895434265848e-05, - "loss": 0.6928, + "epoch": 0.15785054575986565, + "grad_norm": 0.33436449611237556, + "learning_rate": 9.999826534431546e-05, + "loss": 0.6577, "step": 564 }, { - "epoch": 0.07907627711686493, - "grad_norm": 0.5085119763538183, - "learning_rate": 4.968776035853996e-05, - "loss": 0.6998, + "epoch": 0.15813042261404983, + "grad_norm": 0.31870643898655826, + "learning_rate": 9.999813447261536e-05, + "loss": 0.6507, "step": 565 }, { - "epoch": 0.07921623512946116, - "grad_norm": 0.9757272802585409, - "learning_rate": 4.968597221690986e-05, - "loss": 0.6367, + "epoch": 0.15841029946823398, + "grad_norm": 0.31539297776414027, + "learning_rate": 9.999799884206484e-05, + "loss": 0.625, "step": 566 }, { - "epoch": 0.07935619314205739, - "grad_norm": 0.49870651337329763, - "learning_rate": 4.9684179002062e-05, - "loss": 0.6814, + "epoch": 0.15869017632241814, + "grad_norm": 0.33819593568049605, + "learning_rate": 9.999785845267681e-05, + "loss": 0.6573, "step": 567 }, { - "epoch": 0.07949615115465361, - "grad_norm": 0.5044807130296783, - "learning_rate": 4.9682380714364897e-05, - "loss": 0.6108, + "epoch": 0.1589700531766023, + "grad_norm": 0.32485623049274076, + "learning_rate": 9.999771330446462e-05, + "loss": 0.6514, "step": 568 }, { - "epoch": 0.07963610916724982, - "grad_norm": 0.708447636643108, - "learning_rate": 4.968057735418812e-05, - "loss": 0.6621, + "epoch": 0.15924993003078644, + "grad_norm": 0.3308253870901051, + "learning_rate": 9.99975633974421e-05, + "loss": 0.644, "step": 569 }, { - "epoch": 0.07977606717984605, - "grad_norm": 0.5176890020141557, - "learning_rate": 4.967876892190227e-05, - "loss": 0.6712, + "epoch": 0.15952980688497062, + "grad_norm": 0.3262387922383822, + "learning_rate": 9.99974087316235e-05, + "loss": 0.6539, "step": 570 }, { - "epoch": 0.07991602519244227, - "grad_norm": 0.5165402375548048, - "learning_rate": 4.967695541787901e-05, - "loss": 0.6516, + "epoch": 0.15980968373915477, + "grad_norm": 0.3249032849983034, + "learning_rate": 9.999724930702356e-05, + "loss": 0.641, "step": 571 }, { - "epoch": 0.08005598320503848, - "grad_norm": 0.5023727855771402, - "learning_rate": 4.967513684249103e-05, - "loss": 0.6761, + "epoch": 0.16008956059333893, + "grad_norm": 0.3546281299107742, + "learning_rate": 9.999708512365744e-05, + "loss": 0.6301, "step": 572 }, { - "epoch": 0.08019594121763471, - "grad_norm": 0.5273373598923502, - "learning_rate": 4.967331319611206e-05, - "loss": 0.6513, + "epoch": 0.16036943744752308, + "grad_norm": 0.349394735113186, + "learning_rate": 9.999691618154077e-05, + "loss": 0.6697, "step": 573 }, { - "epoch": 0.08033589923023093, - "grad_norm": 0.5038764795022782, - "learning_rate": 4.967148447911688e-05, - "loss": 0.6332, + "epoch": 0.16064931430170726, + "grad_norm": 0.3388422869594151, + "learning_rate": 9.999674248068964e-05, + "loss": 0.6493, "step": 574 }, { - "epoch": 0.08047585724282715, - "grad_norm": 0.5050569409944193, - "learning_rate": 4.966965069188132e-05, - "loss": 0.6617, + "epoch": 0.1609291911558914, + "grad_norm": 0.3366125852937771, + "learning_rate": 9.999656402112059e-05, + "loss": 0.6218, "step": 575 }, { - "epoch": 0.08061581525542337, - "grad_norm": 0.48919260228995454, - "learning_rate": 4.9667811834782224e-05, - "loss": 0.6654, + "epoch": 0.16120906801007556, + "grad_norm": 0.33380398174771947, + "learning_rate": 9.999638080285058e-05, + "loss": 0.6166, "step": 576 }, { - "epoch": 0.0807557732680196, - "grad_norm": 0.5353958441988039, - "learning_rate": 4.9665967908197506e-05, - "loss": 0.6592, + "epoch": 0.16148894486425971, + "grad_norm": 0.36646577585598783, + "learning_rate": 9.999619282589705e-05, + "loss": 0.6541, "step": 577 }, { - "epoch": 0.08089573128061582, - "grad_norm": 0.5361921590438847, - "learning_rate": 4.966411891250612e-05, - "loss": 0.674, + "epoch": 0.1617688217184439, + "grad_norm": 0.3370800941263289, + "learning_rate": 9.999600009027792e-05, + "loss": 0.6578, "step": 578 }, { - "epoch": 0.08103568929321203, - "grad_norm": 0.5342727473087102, - "learning_rate": 4.9662264848088034e-05, - "loss": 0.6885, + "epoch": 0.16204869857262805, + "grad_norm": 0.31956453412198904, + "learning_rate": 9.999580259601151e-05, + "loss": 0.6209, "step": 579 }, { - "epoch": 0.08117564730580826, - "grad_norm": 0.539903295130481, - "learning_rate": 4.96604057153243e-05, - "loss": 0.6662, + "epoch": 0.1623285754268122, + "grad_norm": 0.3361863837783066, + "learning_rate": 9.999560034311663e-05, + "loss": 0.6306, "step": 580 }, { - "epoch": 0.08131560531840448, - "grad_norm": 0.49180697521465716, - "learning_rate": 4.965854151459697e-05, - "loss": 0.6423, + "epoch": 0.16260845228099635, + "grad_norm": 0.3276060075852739, + "learning_rate": 9.999539333161251e-05, + "loss": 0.6532, "step": 581 }, { - "epoch": 0.0814555633310007, - "grad_norm": 0.5088216974014643, - "learning_rate": 4.965667224628916e-05, - "loss": 0.696, + "epoch": 0.16288832913518053, + "grad_norm": 0.30105154207683, + "learning_rate": 9.999518156151888e-05, + "loss": 0.6619, "step": 582 }, { - "epoch": 0.08159552134359692, - "grad_norm": 0.49968640535207737, - "learning_rate": 4.965479791078502e-05, - "loss": 0.6543, + "epoch": 0.16316820598936468, + "grad_norm": 0.3222246686403399, + "learning_rate": 9.999496503285589e-05, + "loss": 0.6415, "step": 583 }, { - "epoch": 0.08173547935619314, - "grad_norm": 0.4859083259348156, - "learning_rate": 4.965291850846976e-05, - "loss": 0.6729, + "epoch": 0.16344808284354884, + "grad_norm": 0.3478766083008775, + "learning_rate": 9.999474374564415e-05, + "loss": 0.6603, "step": 584 }, { - "epoch": 0.08187543736878937, - "grad_norm": 0.4993128882253819, - "learning_rate": 4.96510340397296e-05, - "loss": 0.6521, + "epoch": 0.163727959697733, + "grad_norm": 0.3236462403946272, + "learning_rate": 9.99945176999047e-05, + "loss": 0.6671, "step": 585 }, { - "epoch": 0.08201539538138558, - "grad_norm": 0.5193117598814236, - "learning_rate": 4.964914450495183e-05, - "loss": 0.6525, + "epoch": 0.16400783655191717, + "grad_norm": 0.3140274802138082, + "learning_rate": 9.999428689565909e-05, + "loss": 0.6423, "step": 586 }, { - "epoch": 0.0821553533939818, - "grad_norm": 0.5719370954745235, - "learning_rate": 4.964724990452476e-05, - "loss": 0.6622, + "epoch": 0.16428771340610132, + "grad_norm": 0.3500979849784527, + "learning_rate": 9.999405133292925e-05, + "loss": 0.6618, "step": 587 }, { - "epoch": 0.08229531140657803, - "grad_norm": 0.4700969122107685, - "learning_rate": 4.964535023883776e-05, - "loss": 0.6954, + "epoch": 0.16456759026028547, + "grad_norm": 0.31359317793341923, + "learning_rate": 9.999381101173764e-05, + "loss": 0.6325, "step": 588 }, { - "epoch": 0.08243526941917424, - "grad_norm": 0.484560721731152, - "learning_rate": 4.964344550828122e-05, - "loss": 0.6869, + "epoch": 0.16484746711446963, + "grad_norm": 0.3355690039308301, + "learning_rate": 9.999356593210709e-05, + "loss": 0.6836, "step": 589 }, { - "epoch": 0.08257522743177047, - "grad_norm": 0.4925497467039491, - "learning_rate": 4.964153571324658e-05, - "loss": 0.6675, + "epoch": 0.1651273439686538, + "grad_norm": 0.33351754121946403, + "learning_rate": 9.999331609406098e-05, + "loss": 0.6632, "step": 590 }, { - "epoch": 0.08271518544436669, - "grad_norm": 0.48466585548673363, - "learning_rate": 4.9639620854126326e-05, - "loss": 0.6593, + "epoch": 0.16540722082283796, + "grad_norm": 0.32338398053521783, + "learning_rate": 9.999306149762304e-05, + "loss": 0.6462, "step": 591 }, { - "epoch": 0.08285514345696292, - "grad_norm": 0.48477008272792094, - "learning_rate": 4.963770093131399e-05, - "loss": 0.7009, + "epoch": 0.1656870976770221, + "grad_norm": 0.33566937706531347, + "learning_rate": 9.999280214281754e-05, + "loss": 0.6659, "step": 592 }, { - "epoch": 0.08299510146955913, - "grad_norm": 0.5346681837659809, - "learning_rate": 4.963577594520412e-05, - "loss": 0.6611, + "epoch": 0.16596697453120626, + "grad_norm": 0.3183972843362386, + "learning_rate": 9.999253802966914e-05, + "loss": 0.649, "step": 593 }, { - "epoch": 0.08313505948215535, - "grad_norm": 0.5094553222628161, - "learning_rate": 4.963384589619233e-05, - "loss": 0.6488, + "epoch": 0.16624685138539042, + "grad_norm": 0.3402072648711224, + "learning_rate": 9.999226915820298e-05, + "loss": 0.6821, "step": 594 }, { - "epoch": 0.08327501749475158, - "grad_norm": 0.5908935657202756, - "learning_rate": 4.9631910784675265e-05, - "loss": 0.6724, + "epoch": 0.1665267282395746, + "grad_norm": 0.31877277389056163, + "learning_rate": 9.999199552844469e-05, + "loss": 0.6205, "step": 595 }, { - "epoch": 0.08341497550734779, - "grad_norm": 0.515639184329749, - "learning_rate": 4.96299706110506e-05, - "loss": 0.6859, + "epoch": 0.16680660509375875, + "grad_norm": 0.33758778123086086, + "learning_rate": 9.999171714042026e-05, + "loss": 0.6621, "step": 596 }, { - "epoch": 0.08355493351994402, - "grad_norm": 0.55185217811717, - "learning_rate": 4.962802537571707e-05, - "loss": 0.6623, + "epoch": 0.1670864819479429, + "grad_norm": 0.3223090328122372, + "learning_rate": 9.999143399415622e-05, + "loss": 0.6125, "step": 597 }, { - "epoch": 0.08369489153254024, - "grad_norm": 0.49915022546087007, - "learning_rate": 4.962607507907444e-05, - "loss": 0.6648, + "epoch": 0.16736635880212705, + "grad_norm": 0.31000927984882737, + "learning_rate": 9.99911460896795e-05, + "loss": 0.6385, "step": 598 }, { - "epoch": 0.08383484954513645, - "grad_norm": 0.5284448258033697, - "learning_rate": 4.962411972152352e-05, - "loss": 0.6342, + "epoch": 0.16764623565631123, + "grad_norm": 0.33246563079295793, + "learning_rate": 9.999085342701753e-05, + "loss": 0.6626, "step": 599 }, { - "epoch": 0.08397480755773268, - "grad_norm": 0.4774184931571651, - "learning_rate": 4.9622159303466144e-05, - "loss": 0.686, + "epoch": 0.16792611251049538, + "grad_norm": 0.312302187510902, + "learning_rate": 9.999055600619814e-05, + "loss": 0.6684, "step": 600 }, { - "epoch": 0.0841147655703289, - "grad_norm": 0.49606716653441585, - "learning_rate": 4.962019382530521e-05, - "loss": 0.6664, + "epoch": 0.16820598936467954, + "grad_norm": 0.34065399079592823, + "learning_rate": 9.999025382724965e-05, + "loss": 0.6076, "step": 601 }, { - "epoch": 0.08425472358292513, - "grad_norm": 0.5155895908215095, - "learning_rate": 4.9618223287444624e-05, - "loss": 0.693, + "epoch": 0.1684858662188637, + "grad_norm": 0.32782160106703323, + "learning_rate": 9.998994689020082e-05, + "loss": 0.6354, "step": 602 }, { - "epoch": 0.08439468159552134, - "grad_norm": 0.48698005510262393, - "learning_rate": 4.9616247690289375e-05, - "loss": 0.6824, + "epoch": 0.16876574307304787, + "grad_norm": 0.30407057283641703, + "learning_rate": 9.998963519508087e-05, + "loss": 0.6353, "step": 603 }, { - "epoch": 0.08453463960811756, - "grad_norm": 0.5096390231519464, - "learning_rate": 4.9614267034245454e-05, - "loss": 0.6709, + "epoch": 0.16904561992723202, + "grad_norm": 0.3318246992586628, + "learning_rate": 9.998931874191945e-05, + "loss": 0.6626, "step": 604 }, { - "epoch": 0.08467459762071379, - "grad_norm": 0.47879312610079056, - "learning_rate": 4.961228131971991e-05, - "loss": 0.6191, + "epoch": 0.16932549678141617, + "grad_norm": 0.3255311682463083, + "learning_rate": 9.998899753074669e-05, + "loss": 0.634, "step": 605 }, { - "epoch": 0.08481455563331, - "grad_norm": 0.5036694948790285, - "learning_rate": 4.9610290547120835e-05, - "loss": 0.6677, + "epoch": 0.16960537363560033, + "grad_norm": 0.3159736551954797, + "learning_rate": 9.998867156159318e-05, + "loss": 0.6412, "step": 606 }, { - "epoch": 0.08495451364590623, - "grad_norm": 0.5387911945134125, - "learning_rate": 4.960829471685734e-05, - "loss": 0.7108, + "epoch": 0.1698852504897845, + "grad_norm": 0.3151525995521645, + "learning_rate": 9.998834083448991e-05, + "loss": 0.6233, "step": 607 }, { - "epoch": 0.08509447165850245, - "grad_norm": 0.48418619588152695, - "learning_rate": 4.9606293829339595e-05, - "loss": 0.6581, + "epoch": 0.17016512734396866, + "grad_norm": 0.31240621793504697, + "learning_rate": 9.998800534946839e-05, + "loss": 0.6199, "step": 608 }, { - "epoch": 0.08523442967109868, - "grad_norm": 0.46835555910688126, - "learning_rate": 4.9604287884978803e-05, - "loss": 0.6285, + "epoch": 0.1704450041981528, + "grad_norm": 0.32750340408237494, + "learning_rate": 9.998766510656056e-05, + "loss": 0.6482, "step": 609 }, { - "epoch": 0.08537438768369489, - "grad_norm": 0.49536583738424655, - "learning_rate": 4.9602276884187206e-05, - "loss": 0.6862, + "epoch": 0.17072488105233696, + "grad_norm": 0.3243616902352178, + "learning_rate": 9.998732010579876e-05, + "loss": 0.6088, "step": 610 }, { - "epoch": 0.08551434569629111, - "grad_norm": 0.5219805753845016, - "learning_rate": 4.9600260827378074e-05, - "loss": 0.7262, + "epoch": 0.17100475790652114, + "grad_norm": 0.32607558121529656, + "learning_rate": 9.998697034721587e-05, + "loss": 0.6368, "step": 611 }, { - "epoch": 0.08565430370888734, - "grad_norm": 0.5105322442141146, - "learning_rate": 4.959823971496574e-05, - "loss": 0.6818, + "epoch": 0.1712846347607053, + "grad_norm": 0.31635826408054585, + "learning_rate": 9.998661583084516e-05, + "loss": 0.6464, "step": 612 }, { - "epoch": 0.08579426172148355, - "grad_norm": 0.5306990676448388, - "learning_rate": 4.9596213547365566e-05, - "loss": 0.7141, + "epoch": 0.17156451161488945, + "grad_norm": 0.32383391526072597, + "learning_rate": 9.998625655672037e-05, + "loss": 0.6498, "step": 613 }, { - "epoch": 0.08593421973407978, - "grad_norm": 0.518971911880668, - "learning_rate": 4.959418232499394e-05, - "loss": 0.6447, + "epoch": 0.1718443884690736, + "grad_norm": 0.32236318872224334, + "learning_rate": 9.998589252487571e-05, + "loss": 0.6295, "step": 614 }, { - "epoch": 0.086074177746676, - "grad_norm": 0.48723426006398907, - "learning_rate": 4.959214604826831e-05, - "loss": 0.6691, + "epoch": 0.17212426532325778, + "grad_norm": 0.32684039910383994, + "learning_rate": 9.998552373534582e-05, + "loss": 0.6603, "step": 615 }, { - "epoch": 0.08621413575927221, - "grad_norm": 0.47908534192693764, - "learning_rate": 4.9590104717607135e-05, - "loss": 0.7039, + "epoch": 0.17240414217744193, + "grad_norm": 0.3140580905751736, + "learning_rate": 9.998515018816579e-05, + "loss": 0.6284, "step": 616 }, { - "epoch": 0.08635409377186844, - "grad_norm": 0.47477383891868286, - "learning_rate": 4.958805833342994e-05, - "loss": 0.6664, + "epoch": 0.17268401903162608, + "grad_norm": 0.3341019894639354, + "learning_rate": 9.99847718833712e-05, + "loss": 0.657, "step": 617 }, { - "epoch": 0.08649405178446466, - "grad_norm": 0.4984833292893388, - "learning_rate": 4.958600689615728e-05, - "loss": 0.6829, + "epoch": 0.17296389588581024, + "grad_norm": 0.32375888251616164, + "learning_rate": 9.998438882099805e-05, + "loss": 0.6398, "step": 618 }, { - "epoch": 0.08663400979706089, - "grad_norm": 0.49546276658635413, - "learning_rate": 4.958395040621073e-05, - "loss": 0.6482, + "epoch": 0.1732437727399944, + "grad_norm": 0.3200189127041982, + "learning_rate": 9.998400100108279e-05, + "loss": 0.6495, "step": 619 }, { - "epoch": 0.0867739678096571, - "grad_norm": 0.51460494631631, - "learning_rate": 4.958188886401295e-05, - "loss": 0.684, + "epoch": 0.17352364959417857, + "grad_norm": 0.33045751781447524, + "learning_rate": 9.998360842366232e-05, + "loss": 0.6394, "step": 620 }, { - "epoch": 0.08691392582225332, - "grad_norm": 0.5030849606038066, - "learning_rate": 4.9579822269987574e-05, - "loss": 0.6708, + "epoch": 0.17380352644836272, + "grad_norm": 0.3109262315892561, + "learning_rate": 9.998321108877405e-05, + "loss": 0.6312, "step": 621 }, { - "epoch": 0.08705388383484955, - "grad_norm": 0.50101210673068, - "learning_rate": 4.957775062455933e-05, - "loss": 0.6639, + "epoch": 0.17408340330254687, + "grad_norm": 0.31570657112812556, + "learning_rate": 9.998280899645574e-05, + "loss": 0.6467, "step": 622 }, { - "epoch": 0.08719384184744576, - "grad_norm": 0.46852010673845496, - "learning_rate": 4.9575673928153957e-05, - "loss": 0.6603, + "epoch": 0.17436328015673103, + "grad_norm": 0.3071127908154649, + "learning_rate": 9.998240214674572e-05, + "loss": 0.6412, "step": 623 }, { - "epoch": 0.08733379986004199, - "grad_norm": 0.48823643441327075, - "learning_rate": 4.957359218119824e-05, - "loss": 0.6719, + "epoch": 0.1746431570109152, + "grad_norm": 0.3142523728280045, + "learning_rate": 9.998199053968267e-05, + "loss": 0.6312, "step": 624 }, { - "epoch": 0.08747375787263821, - "grad_norm": 0.4891031572821987, - "learning_rate": 4.957150538411999e-05, - "loss": 0.6656, + "epoch": 0.17492303386509936, + "grad_norm": 0.31095019347566755, + "learning_rate": 9.99815741753058e-05, + "loss": 0.6012, "step": 625 }, { - "epoch": 0.08761371588523444, - "grad_norm": 0.563701350648365, - "learning_rate": 4.956941353734807e-05, - "loss": 0.6663, + "epoch": 0.1752029107192835, + "grad_norm": 0.3186015632457505, + "learning_rate": 9.998115305365471e-05, + "loss": 0.6178, "step": 626 }, { - "epoch": 0.08775367389783065, - "grad_norm": 0.48687644197380436, - "learning_rate": 4.956731664131238e-05, - "loss": 0.6913, + "epoch": 0.17548278757346766, + "grad_norm": 0.3391631997299979, + "learning_rate": 9.998072717476951e-05, + "loss": 0.6538, "step": 627 }, { - "epoch": 0.08789363191042687, - "grad_norm": 0.4907175129397562, - "learning_rate": 4.956521469644384e-05, - "loss": 0.706, + "epoch": 0.17576266442765184, + "grad_norm": 0.3342822626637587, + "learning_rate": 9.998029653869071e-05, + "loss": 0.6293, "step": 628 }, { - "epoch": 0.0880335899230231, - "grad_norm": 0.5313788653762662, - "learning_rate": 4.9563107703174436e-05, - "loss": 0.6644, + "epoch": 0.176042541281836, + "grad_norm": 0.3015306664313235, + "learning_rate": 9.997986114545932e-05, + "loss": 0.6458, "step": 629 }, { - "epoch": 0.08817354793561931, - "grad_norm": 0.5033134935760109, - "learning_rate": 4.956099566193717e-05, - "loss": 0.6654, + "epoch": 0.17632241813602015, + "grad_norm": 0.31440909073843937, + "learning_rate": 9.997942099511676e-05, + "loss": 0.6206, "step": 630 }, { - "epoch": 0.08831350594821553, - "grad_norm": 0.4589870741107357, - "learning_rate": 4.955887857316609e-05, - "loss": 0.6417, + "epoch": 0.1766022949902043, + "grad_norm": 0.32982370779234377, + "learning_rate": 9.997897608770495e-05, + "loss": 0.6538, "step": 631 }, { - "epoch": 0.08845346396081176, - "grad_norm": 0.48309495215754206, - "learning_rate": 4.955675643729628e-05, - "loss": 0.665, + "epoch": 0.17688217184438848, + "grad_norm": 0.3124634522078974, + "learning_rate": 9.997852642326622e-05, + "loss": 0.6376, "step": 632 }, { - "epoch": 0.08859342197340797, - "grad_norm": 0.5259807553857797, - "learning_rate": 4.955462925476385e-05, - "loss": 0.6945, + "epoch": 0.17716204869857263, + "grad_norm": 0.31262914391944435, + "learning_rate": 9.997807200184335e-05, + "loss": 0.6232, "step": 633 }, { - "epoch": 0.0887333799860042, - "grad_norm": 0.5163974444977445, - "learning_rate": 4.9552497026005974e-05, - "loss": 0.6972, + "epoch": 0.17744192555275679, + "grad_norm": 0.3175264108164007, + "learning_rate": 9.997761282347963e-05, + "loss": 0.6646, "step": 634 }, { - "epoch": 0.08887333799860042, - "grad_norm": 0.4973389672383331, - "learning_rate": 4.955035975146084e-05, - "loss": 0.6551, + "epoch": 0.17772180240694094, + "grad_norm": 0.30590158489987684, + "learning_rate": 9.997714888821874e-05, + "loss": 0.6159, "step": 635 }, { - "epoch": 0.08901329601119665, - "grad_norm": 0.47160543121321047, - "learning_rate": 4.9548217431567665e-05, - "loss": 0.6499, + "epoch": 0.17800167926112512, + "grad_norm": 0.3317089000758993, + "learning_rate": 9.997668019610486e-05, + "loss": 0.6697, "step": 636 }, { - "epoch": 0.08915325402379286, - "grad_norm": 0.4981939411665721, - "learning_rate": 4.954607006676675e-05, - "loss": 0.6541, + "epoch": 0.17828155611530927, + "grad_norm": 0.3101179298204563, + "learning_rate": 9.997620674718257e-05, + "loss": 0.6339, "step": 637 }, { - "epoch": 0.08929321203638908, - "grad_norm": 0.4495878467816103, - "learning_rate": 4.954391765749936e-05, - "loss": 0.6113, + "epoch": 0.17856143296949342, + "grad_norm": 0.303044616721775, + "learning_rate": 9.997572854149696e-05, + "loss": 0.6395, "step": 638 }, { - "epoch": 0.08943317004898531, - "grad_norm": 0.47812065178812635, - "learning_rate": 4.954176020420788e-05, - "loss": 0.6269, + "epoch": 0.17884130982367757, + "grad_norm": 0.3110256568397774, + "learning_rate": 9.997524557909352e-05, + "loss": 0.5983, "step": 639 }, { - "epoch": 0.08957312806158152, - "grad_norm": 0.5080513968505054, - "learning_rate": 4.953959770733565e-05, - "loss": 0.7068, + "epoch": 0.17912118667786175, + "grad_norm": 0.3149989233307906, + "learning_rate": 9.997475786001826e-05, + "loss": 0.6286, "step": 640 }, { - "epoch": 0.08971308607417774, - "grad_norm": 0.5272266307181286, - "learning_rate": 4.95374301673271e-05, - "loss": 0.6986, + "epoch": 0.1794010635320459, + "grad_norm": 0.2845203322534443, + "learning_rate": 9.997426538431755e-05, + "loss": 0.6055, "step": 641 }, { - "epoch": 0.08985304408677397, - "grad_norm": 0.5013094523061675, - "learning_rate": 4.953525758462769e-05, - "loss": 0.6807, + "epoch": 0.17968094038623006, + "grad_norm": 0.3350427459911618, + "learning_rate": 9.997376815203829e-05, + "loss": 0.6226, "step": 642 }, { - "epoch": 0.0899930020993702, - "grad_norm": 0.6486797331723868, - "learning_rate": 4.95330799596839e-05, - "loss": 0.6414, + "epoch": 0.1799608172404142, + "grad_norm": 0.3148746819181946, + "learning_rate": 9.997326616322782e-05, + "loss": 0.6547, "step": 643 }, { - "epoch": 0.0901329601119664, - "grad_norm": 0.4872602306676202, - "learning_rate": 4.953089729294326e-05, - "loss": 0.6689, + "epoch": 0.18024069409459836, + "grad_norm": 0.3028224259698831, + "learning_rate": 9.997275941793389e-05, + "loss": 0.6193, "step": 644 }, { - "epoch": 0.09027291812456263, - "grad_norm": 0.5040432352418639, - "learning_rate": 4.952870958485432e-05, - "loss": 0.6865, + "epoch": 0.18052057094878254, + "grad_norm": 0.29673426752395654, + "learning_rate": 9.997224791620476e-05, + "loss": 0.62, "step": 645 }, { - "epoch": 0.09041287613715886, - "grad_norm": 0.46173609966688006, - "learning_rate": 4.952651683586668e-05, - "loss": 0.6481, + "epoch": 0.1808004478029667, + "grad_norm": 0.3058890686267457, + "learning_rate": 9.99717316580891e-05, + "loss": 0.658, "step": 646 }, { - "epoch": 0.09055283414975507, - "grad_norm": 0.45658386339702023, - "learning_rate": 4.952431904643097e-05, - "loss": 0.6539, + "epoch": 0.18108032465715085, + "grad_norm": 0.40700293516024183, + "learning_rate": 9.997121064363606e-05, + "loss": 0.6653, "step": 647 }, { - "epoch": 0.0906927921623513, - "grad_norm": 0.503432394082156, - "learning_rate": 4.952211621699887e-05, - "loss": 0.7104, + "epoch": 0.181360201511335, + "grad_norm": 0.323877426832963, + "learning_rate": 9.99706848728952e-05, + "loss": 0.6556, "step": 648 }, { - "epoch": 0.09083275017494752, - "grad_norm": 0.46517000786275325, - "learning_rate": 4.951990834802307e-05, - "loss": 0.6525, + "epoch": 0.18164007836551918, + "grad_norm": 0.29657675077306456, + "learning_rate": 9.997015434591659e-05, + "loss": 0.6384, "step": 649 }, { - "epoch": 0.09097270818754374, - "grad_norm": 0.5062356868953571, - "learning_rate": 4.951769543995731e-05, - "loss": 0.6823, + "epoch": 0.18191995521970333, + "grad_norm": 0.30619817479643974, + "learning_rate": 9.996961906275073e-05, + "loss": 0.62, "step": 650 }, { - "epoch": 0.09111266620013996, - "grad_norm": 0.5119419441222778, - "learning_rate": 4.951547749325638e-05, - "loss": 0.6635, + "epoch": 0.18219983207388749, + "grad_norm": 0.32270553389437934, + "learning_rate": 9.996907902344856e-05, + "loss": 0.6491, "step": 651 }, { - "epoch": 0.09125262421273618, - "grad_norm": 0.46695225104802485, - "learning_rate": 4.951325450837607e-05, - "loss": 0.6971, + "epoch": 0.18247970892807164, + "grad_norm": 0.2969028118634236, + "learning_rate": 9.996853422806146e-05, + "loss": 0.6528, "step": 652 }, { - "epoch": 0.0913925822253324, - "grad_norm": 0.49434009422947434, - "learning_rate": 4.951102648577324e-05, - "loss": 0.6914, + "epoch": 0.18275958578225582, + "grad_norm": 0.29891300543350263, + "learning_rate": 9.996798467664132e-05, + "loss": 0.6387, "step": 653 }, { - "epoch": 0.09153254023792862, - "grad_norm": 0.4604670405526644, - "learning_rate": 4.950879342590577e-05, - "loss": 0.6519, + "epoch": 0.18303946263643997, + "grad_norm": 0.3134949112446973, + "learning_rate": 9.996743036924042e-05, + "loss": 0.6351, "step": 654 }, { - "epoch": 0.09167249825052484, - "grad_norm": 0.5184837228643522, - "learning_rate": 4.9506555329232574e-05, - "loss": 0.6886, + "epoch": 0.18331933949062412, + "grad_norm": 0.3264595354016295, + "learning_rate": 9.996687130591153e-05, + "loss": 0.6308, "step": 655 }, { - "epoch": 0.09181245626312107, - "grad_norm": 0.48694633832428874, - "learning_rate": 4.9504312196213596e-05, - "loss": 0.6636, + "epoch": 0.18359921634480827, + "grad_norm": 0.2963404807242696, + "learning_rate": 9.996630748670787e-05, + "loss": 0.6485, "step": 656 }, { - "epoch": 0.09195241427571728, - "grad_norm": 0.45470193445575985, - "learning_rate": 4.9502064027309836e-05, - "loss": 0.6402, + "epoch": 0.18387909319899245, + "grad_norm": 0.3209518691253152, + "learning_rate": 9.99657389116831e-05, + "loss": 0.6568, "step": 657 }, { - "epoch": 0.0920923722883135, - "grad_norm": 0.49263294738528823, - "learning_rate": 4.9499810822983314e-05, - "loss": 0.6513, + "epoch": 0.1841589700531766, + "grad_norm": 0.320733443890241, + "learning_rate": 9.996516558089133e-05, + "loss": 0.6192, "step": 658 }, { - "epoch": 0.09223233030090973, - "grad_norm": 0.48137872453923874, - "learning_rate": 4.949755258369707e-05, - "loss": 0.6387, + "epoch": 0.18443884690736076, + "grad_norm": 0.3004612938137131, + "learning_rate": 9.996458749438712e-05, + "loss": 0.6372, "step": 659 }, { - "epoch": 0.09237228831350595, - "grad_norm": 0.5244416908958357, - "learning_rate": 4.949528930991522e-05, - "loss": 0.6634, + "epoch": 0.1847187237615449, + "grad_norm": 0.3240748172972207, + "learning_rate": 9.996400465222551e-05, + "loss": 0.6367, "step": 660 }, { - "epoch": 0.09251224632610217, - "grad_norm": 0.4835025657565747, - "learning_rate": 4.949302100210287e-05, - "loss": 0.6582, + "epoch": 0.1849986006157291, + "grad_norm": 0.30365473088989753, + "learning_rate": 9.996341705446197e-05, + "loss": 0.655, "step": 661 }, { - "epoch": 0.09265220433869839, - "grad_norm": 0.4601647504352115, - "learning_rate": 4.9490747660726186e-05, - "loss": 0.633, + "epoch": 0.18527847746991324, + "grad_norm": 0.3101674467288344, + "learning_rate": 9.996282470115245e-05, + "loss": 0.6254, "step": 662 }, { - "epoch": 0.09279216235129462, - "grad_norm": 0.4807820429695223, - "learning_rate": 4.948846928625236e-05, - "loss": 0.6484, + "epoch": 0.1855583543240974, + "grad_norm": 0.30271824363428407, + "learning_rate": 9.996222759235329e-05, + "loss": 0.6263, "step": 663 }, { - "epoch": 0.09293212036389083, - "grad_norm": 0.4621826268890677, - "learning_rate": 4.948618587914963e-05, - "loss": 0.6201, + "epoch": 0.18583823117828155, + "grad_norm": 0.3079318380313509, + "learning_rate": 9.996162572812135e-05, + "loss": 0.6221, "step": 664 }, { - "epoch": 0.09307207837648705, - "grad_norm": 0.5355555893122882, - "learning_rate": 4.9483897439887256e-05, - "loss": 0.6939, + "epoch": 0.18611810803246573, + "grad_norm": 0.3051785871844845, + "learning_rate": 9.99610191085139e-05, + "loss": 0.6227, "step": 665 }, { - "epoch": 0.09321203638908328, - "grad_norm": 0.48631181937028944, - "learning_rate": 4.948160396893553e-05, - "loss": 0.6463, + "epoch": 0.18639798488664988, + "grad_norm": 0.3247709985735292, + "learning_rate": 9.99604077335887e-05, + "loss": 0.6214, "step": 666 }, { - "epoch": 0.0933519944016795, - "grad_norm": 0.4699689810513121, - "learning_rate": 4.947930546676579e-05, - "loss": 0.6417, + "epoch": 0.18667786174083403, + "grad_norm": 0.3223222446657553, + "learning_rate": 9.995979160340392e-05, + "loss": 0.6688, "step": 667 }, { - "epoch": 0.09349195241427571, - "grad_norm": 0.5070622795052616, - "learning_rate": 4.94770019338504e-05, - "loss": 0.6536, + "epoch": 0.18695773859501819, + "grad_norm": 0.30940214214876993, + "learning_rate": 9.995917071801821e-05, + "loss": 0.6162, "step": 668 }, { - "epoch": 0.09363191042687194, - "grad_norm": 0.5074909040261282, - "learning_rate": 4.947469337066275e-05, - "loss": 0.6313, + "epoch": 0.18723761544920234, + "grad_norm": 0.3153905402430541, + "learning_rate": 9.995854507749068e-05, + "loss": 0.6316, "step": 669 }, { - "epoch": 0.09377186843946816, - "grad_norm": 0.5145354321514217, - "learning_rate": 4.9472379777677287e-05, - "loss": 0.6679, + "epoch": 0.18751749230338652, + "grad_norm": 0.31896302875409144, + "learning_rate": 9.995791468188083e-05, + "loss": 0.6546, "step": 670 }, { - "epoch": 0.09391182645206438, - "grad_norm": 0.45833527492215737, - "learning_rate": 4.947006115536947e-05, - "loss": 0.6604, + "epoch": 0.18779736915757067, + "grad_norm": 0.3020089911619597, + "learning_rate": 9.995727953124874e-05, + "loss": 0.6266, "step": 671 }, { - "epoch": 0.0940517844646606, - "grad_norm": 0.45998883685850256, - "learning_rate": 4.9467737504215805e-05, - "loss": 0.6484, + "epoch": 0.18807724601175482, + "grad_norm": 0.2863324313594203, + "learning_rate": 9.99566396256548e-05, + "loss": 0.6282, "step": 672 }, { - "epoch": 0.09419174247725683, - "grad_norm": 0.46701832176416125, - "learning_rate": 4.946540882469381e-05, - "loss": 0.6622, + "epoch": 0.18835712286593898, + "grad_norm": 0.3251047115450784, + "learning_rate": 9.995599496515995e-05, + "loss": 0.6637, "step": 673 }, { - "epoch": 0.09433170048985304, - "grad_norm": 0.4937012715997403, - "learning_rate": 4.946307511728208e-05, - "loss": 0.6423, + "epoch": 0.18863699972012316, + "grad_norm": 0.31740822333464325, + "learning_rate": 9.995534554982553e-05, + "loss": 0.5948, "step": 674 }, { - "epoch": 0.09447165850244926, - "grad_norm": 0.45944259724419273, - "learning_rate": 4.9460736382460195e-05, - "loss": 0.6263, + "epoch": 0.1889168765743073, + "grad_norm": 0.2995959785820535, + "learning_rate": 9.995469137971337e-05, + "loss": 0.5992, "step": 675 }, { - "epoch": 0.09461161651504549, - "grad_norm": 0.4953211118671407, - "learning_rate": 4.945839262070879e-05, - "loss": 0.6401, + "epoch": 0.18919675342849146, + "grad_norm": 0.31641331477201434, + "learning_rate": 9.99540324548857e-05, + "loss": 0.6299, "step": 676 }, { - "epoch": 0.09475157452764171, - "grad_norm": 0.4954445380856775, - "learning_rate": 4.9456043832509544e-05, - "loss": 0.6766, + "epoch": 0.1894766302826756, + "grad_norm": 0.31194497903046237, + "learning_rate": 9.995336877540527e-05, + "loss": 0.6163, "step": 677 }, { - "epoch": 0.09489153254023792, - "grad_norm": 0.4756639472671179, - "learning_rate": 4.9453690018345144e-05, - "loss": 0.6516, + "epoch": 0.1897565071368598, + "grad_norm": 0.2901853783690715, + "learning_rate": 9.995270034133525e-05, + "loss": 0.5985, "step": 678 }, { - "epoch": 0.09503149055283415, - "grad_norm": 0.5216854708766936, - "learning_rate": 4.9451331178699324e-05, - "loss": 0.6233, + "epoch": 0.19003638399104394, + "grad_norm": 0.312865923561726, + "learning_rate": 9.995202715273925e-05, + "loss": 0.6312, "step": 679 }, { - "epoch": 0.09517144856543037, - "grad_norm": 0.5149096513895491, - "learning_rate": 4.944896731405686e-05, - "loss": 0.6561, + "epoch": 0.1903162608452281, + "grad_norm": 0.3149652925861172, + "learning_rate": 9.995134920968135e-05, + "loss": 0.6212, "step": 680 }, { - "epoch": 0.09531140657802659, - "grad_norm": 0.4884876631431996, - "learning_rate": 4.944659842490354e-05, - "loss": 0.6594, + "epoch": 0.19059613769941225, + "grad_norm": 0.29455550139493053, + "learning_rate": 9.995066651222606e-05, + "loss": 0.6105, "step": 681 }, { - "epoch": 0.09545136459062281, - "grad_norm": 0.4960871887261993, - "learning_rate": 4.944422451172619e-05, - "loss": 0.6391, + "epoch": 0.19087601455359643, + "grad_norm": 0.31123731050816483, + "learning_rate": 9.994997906043837e-05, + "loss": 0.6152, "step": 682 }, { - "epoch": 0.09559132260321904, - "grad_norm": 0.49440117471489403, - "learning_rate": 4.944184557501269e-05, - "loss": 0.6714, + "epoch": 0.19115589140778058, + "grad_norm": 0.3322634981253788, + "learning_rate": 9.994928685438373e-05, + "loss": 0.6159, "step": 683 }, { - "epoch": 0.09573128061581526, - "grad_norm": 0.47570363710472247, - "learning_rate": 4.943946161525192e-05, - "loss": 0.6786, + "epoch": 0.19143576826196473, + "grad_norm": 0.3275257284931996, + "learning_rate": 9.994858989412801e-05, + "loss": 0.6117, "step": 684 }, { - "epoch": 0.09587123862841147, - "grad_norm": 0.48841469758633105, - "learning_rate": 4.9437072632933814e-05, - "loss": 0.6932, + "epoch": 0.1917156451161489, + "grad_norm": 0.3040485430620487, + "learning_rate": 9.994788817973753e-05, + "loss": 0.6015, "step": 685 }, { - "epoch": 0.0960111966410077, - "grad_norm": 0.47367349893999716, - "learning_rate": 4.943467862854934e-05, - "loss": 0.6331, + "epoch": 0.19199552197033307, + "grad_norm": 0.3095529932531528, + "learning_rate": 9.99471817112791e-05, + "loss": 0.6451, "step": 686 }, { - "epoch": 0.09615115465360392, - "grad_norm": 0.4966192406884947, - "learning_rate": 4.943227960259048e-05, - "loss": 0.6632, + "epoch": 0.19227539882451722, + "grad_norm": 0.29265260205457316, + "learning_rate": 9.994647048881998e-05, + "loss": 0.6398, "step": 687 }, { - "epoch": 0.09629111266620013, - "grad_norm": 0.46145851197097154, - "learning_rate": 4.942987555555026e-05, - "loss": 0.6428, + "epoch": 0.19255527567870137, + "grad_norm": 0.290284836365612, + "learning_rate": 9.994575451242782e-05, + "loss": 0.6342, "step": 688 }, { - "epoch": 0.09643107067879636, - "grad_norm": 0.47559863610819186, - "learning_rate": 4.942746648792274e-05, - "loss": 0.6499, + "epoch": 0.19283515253288552, + "grad_norm": 0.2977989328108598, + "learning_rate": 9.994503378217079e-05, + "loss": 0.6454, "step": 689 }, { - "epoch": 0.09657102869139259, - "grad_norm": 0.46991178970104935, - "learning_rate": 4.9425052400203e-05, - "loss": 0.6134, + "epoch": 0.19311502938706968, + "grad_norm": 0.31642739862391606, + "learning_rate": 9.99443082981175e-05, + "loss": 0.6274, "step": 690 }, { - "epoch": 0.0967109867039888, - "grad_norm": 0.486056912045251, - "learning_rate": 4.942263329288716e-05, - "loss": 0.6594, + "epoch": 0.19339490624125386, + "grad_norm": 0.2866466996125448, + "learning_rate": 9.994357806033699e-05, + "loss": 0.6053, "step": 691 }, { - "epoch": 0.09685094471658502, - "grad_norm": 0.5045620121297718, - "learning_rate": 4.942020916647238e-05, - "loss": 0.6606, + "epoch": 0.193674783095438, + "grad_norm": 0.30120527019938376, + "learning_rate": 9.994284306889876e-05, + "loss": 0.6059, "step": 692 }, { - "epoch": 0.09699090272918125, - "grad_norm": 0.49357726954668424, - "learning_rate": 4.941778002145684e-05, - "loss": 0.6436, + "epoch": 0.19395465994962216, + "grad_norm": 0.2988298937768542, + "learning_rate": 9.994210332387277e-05, + "loss": 0.6346, "step": 693 }, { - "epoch": 0.09713086074177747, - "grad_norm": 0.44767321778450064, - "learning_rate": 4.941534585833975e-05, - "loss": 0.6662, + "epoch": 0.1942345368038063, + "grad_norm": 0.32095988302603706, + "learning_rate": 9.994135882532943e-05, + "loss": 0.6202, "step": 694 }, { - "epoch": 0.09727081875437368, - "grad_norm": 0.49515786663501604, - "learning_rate": 4.941290667762136e-05, - "loss": 0.6441, + "epoch": 0.1945144136579905, + "grad_norm": 0.2980296981986961, + "learning_rate": 9.994060957333962e-05, + "loss": 0.6183, "step": 695 }, { - "epoch": 0.09741077676696991, - "grad_norm": 0.5193186895726914, - "learning_rate": 4.9410462479802945e-05, - "loss": 0.6667, + "epoch": 0.19479429051217464, + "grad_norm": 0.3184631025074255, + "learning_rate": 9.993985556797463e-05, + "loss": 0.6459, "step": 696 }, { - "epoch": 0.09755073477956613, - "grad_norm": 0.48819228314517343, - "learning_rate": 4.94080132653868e-05, - "loss": 0.6525, + "epoch": 0.1950741673663588, + "grad_norm": 0.28480537458562444, + "learning_rate": 9.993909680930622e-05, + "loss": 0.6469, "step": 697 }, { - "epoch": 0.09769069279216235, - "grad_norm": 0.47504662151869315, - "learning_rate": 4.940555903487628e-05, - "loss": 0.6507, + "epoch": 0.19535404422054295, + "grad_norm": 0.29814808673431314, + "learning_rate": 9.993833329740663e-05, + "loss": 0.6171, "step": 698 }, { - "epoch": 0.09783065080475857, - "grad_norm": 0.5087429079412467, - "learning_rate": 4.9403099788775754e-05, - "loss": 0.6885, + "epoch": 0.19563392107472713, + "grad_norm": 0.3234399945777799, + "learning_rate": 9.993756503234855e-05, + "loss": 0.6165, "step": 699 }, { - "epoch": 0.0979706088173548, - "grad_norm": 0.4991373858117139, - "learning_rate": 4.940063552759061e-05, - "loss": 0.6614, + "epoch": 0.19591379792891128, + "grad_norm": 0.30129220271900264, + "learning_rate": 9.993679201420505e-05, + "loss": 0.6121, "step": 700 }, { - "epoch": 0.09811056682995102, - "grad_norm": 0.5030115759681447, - "learning_rate": 4.939816625182729e-05, - "loss": 0.6415, + "epoch": 0.19619367478309543, + "grad_norm": 0.3064131778971802, + "learning_rate": 9.993601424304975e-05, + "loss": 0.6359, "step": 701 }, { - "epoch": 0.09825052484254723, - "grad_norm": 0.47371776821243294, - "learning_rate": 4.939569196199325e-05, - "loss": 0.6391, + "epoch": 0.1964735516372796, + "grad_norm": 0.30456250610209257, + "learning_rate": 9.993523171895664e-05, + "loss": 0.6157, "step": 702 }, { - "epoch": 0.09839048285514346, - "grad_norm": 0.4881159343567594, - "learning_rate": 4.9393212658596976e-05, - "loss": 0.6244, + "epoch": 0.19675342849146377, + "grad_norm": 0.3058162494981979, + "learning_rate": 9.993444444200026e-05, + "loss": 0.6158, "step": 703 }, { - "epoch": 0.09853044086773968, - "grad_norm": 0.49870488221888004, - "learning_rate": 4.9390728342148006e-05, - "loss": 0.6616, + "epoch": 0.19703330534564792, + "grad_norm": 0.31695347305058746, + "learning_rate": 9.993365241225548e-05, + "loss": 0.631, "step": 704 }, { - "epoch": 0.0986703988803359, - "grad_norm": 0.4768129759031237, - "learning_rate": 4.938823901315688e-05, - "loss": 0.6559, + "epoch": 0.19731318219983207, + "grad_norm": 0.2804550533874116, + "learning_rate": 9.993285562979773e-05, + "loss": 0.6495, "step": 705 }, { - "epoch": 0.09881035689293212, - "grad_norm": 0.4729780422275052, - "learning_rate": 4.938574467213518e-05, - "loss": 0.6256, + "epoch": 0.19759305905401622, + "grad_norm": 0.29588930540308134, + "learning_rate": 9.993205409470283e-05, + "loss": 0.6583, "step": 706 }, { - "epoch": 0.09895031490552834, - "grad_norm": 0.49227905209471745, - "learning_rate": 4.9383245319595514e-05, - "loss": 0.6564, + "epoch": 0.1978729359082004, + "grad_norm": 0.2938872096547923, + "learning_rate": 9.993124780704707e-05, + "loss": 0.6035, "step": 707 }, { - "epoch": 0.09909027291812457, - "grad_norm": 0.49477666423952565, - "learning_rate": 4.9380740956051545e-05, - "loss": 0.6031, + "epoch": 0.19815281276238456, + "grad_norm": 0.3026828687774274, + "learning_rate": 9.993043676690721e-05, + "loss": 0.6521, "step": 708 }, { - "epoch": 0.09923023093072078, - "grad_norm": 0.4711224186000299, - "learning_rate": 4.9378231582017926e-05, - "loss": 0.6302, + "epoch": 0.1984326896165687, + "grad_norm": 0.29562997826441406, + "learning_rate": 9.99296209743604e-05, + "loss": 0.6247, "step": 709 }, { - "epoch": 0.099370188943317, - "grad_norm": 0.49742927700008693, - "learning_rate": 4.9375717198010366e-05, - "loss": 0.6732, + "epoch": 0.19871256647075286, + "grad_norm": 0.2915499622213924, + "learning_rate": 9.992880042948435e-05, + "loss": 0.6107, "step": 710 }, { - "epoch": 0.09951014695591323, - "grad_norm": 0.4750405851316029, - "learning_rate": 4.937319780454559e-05, - "loss": 0.6852, + "epoch": 0.19899244332493704, + "grad_norm": 0.3001574440988939, + "learning_rate": 9.992797513235713e-05, + "loss": 0.6422, "step": 711 }, { - "epoch": 0.09965010496850944, - "grad_norm": 0.5319507911786844, - "learning_rate": 4.937067340214137e-05, - "loss": 0.6426, + "epoch": 0.1992723201791212, + "grad_norm": 0.30837916317033914, + "learning_rate": 9.992714508305727e-05, + "loss": 0.6172, "step": 712 }, { - "epoch": 0.09979006298110567, - "grad_norm": 0.5222169180449785, - "learning_rate": 4.936814399131648e-05, - "loss": 0.6738, + "epoch": 0.19955219703330535, + "grad_norm": 0.29558819715261386, + "learning_rate": 9.99263102816638e-05, + "loss": 0.6081, "step": 713 }, { - "epoch": 0.09993002099370189, - "grad_norm": 0.5105935227729471, - "learning_rate": 4.936560957259077e-05, - "loss": 0.6668, + "epoch": 0.1998320738874895, + "grad_norm": 0.3047160383997923, + "learning_rate": 9.992547072825618e-05, + "loss": 0.611, "step": 714 }, { - "epoch": 0.1000699790062981, - "grad_norm": 0.5163958109604428, - "learning_rate": 4.9363070146485044e-05, - "loss": 0.6986, + "epoch": 0.20011195074167365, + "grad_norm": 0.30541138661385725, + "learning_rate": 9.99246264229143e-05, + "loss": 0.5902, "step": 715 }, { - "epoch": 0.10020993701889433, - "grad_norm": 0.5021401060702906, - "learning_rate": 4.936052571352122e-05, - "loss": 0.6468, + "epoch": 0.20039182759585783, + "grad_norm": 0.28570029418490417, + "learning_rate": 9.992377736571853e-05, + "loss": 0.6181, "step": 716 }, { - "epoch": 0.10034989503149055, - "grad_norm": 0.4685192631602669, - "learning_rate": 4.9357976274222185e-05, - "loss": 0.6609, + "epoch": 0.20067170445004198, + "grad_norm": 0.29682223835244825, + "learning_rate": 9.992292355674971e-05, + "loss": 0.6033, "step": 717 }, { - "epoch": 0.10048985304408678, - "grad_norm": 0.48726170019620224, - "learning_rate": 4.935542182911188e-05, - "loss": 0.6761, + "epoch": 0.20095158130422613, + "grad_norm": 0.2979443560789934, + "learning_rate": 9.992206499608907e-05, + "loss": 0.6266, "step": 718 }, { - "epoch": 0.10062981105668299, - "grad_norm": 0.48175436628421253, - "learning_rate": 4.935286237871527e-05, - "loss": 0.645, + "epoch": 0.2012314581584103, + "grad_norm": 0.30588550823118615, + "learning_rate": 9.992120168381834e-05, + "loss": 0.6287, "step": 719 }, { - "epoch": 0.10076976906927922, - "grad_norm": 0.49385381693887187, - "learning_rate": 4.935029792355834e-05, - "loss": 0.6243, + "epoch": 0.20151133501259447, + "grad_norm": 0.4100123853154365, + "learning_rate": 9.992033362001969e-05, + "loss": 0.6247, "step": 720 }, { - "epoch": 0.10090972708187544, - "grad_norm": 0.4743721480397386, - "learning_rate": 4.934772846416812e-05, - "loss": 0.6999, + "epoch": 0.20179121186677862, + "grad_norm": 0.29072001475354536, + "learning_rate": 9.991946080477574e-05, + "loss": 0.6464, "step": 721 }, { - "epoch": 0.10104968509447165, - "grad_norm": 0.49107710855800335, - "learning_rate": 4.934515400107266e-05, - "loss": 0.6637, + "epoch": 0.20207108872096277, + "grad_norm": 0.30962293495077287, + "learning_rate": 9.991858323816958e-05, + "loss": 0.6335, "step": 722 }, { - "epoch": 0.10118964310706788, - "grad_norm": 0.49159205634657455, - "learning_rate": 4.934257453480103e-05, - "loss": 0.6218, + "epoch": 0.20235096557514692, + "grad_norm": 0.3053900143595763, + "learning_rate": 9.991770092028473e-05, + "loss": 0.6345, "step": 723 }, { - "epoch": 0.1013296011196641, - "grad_norm": 0.49141594694855456, - "learning_rate": 4.933999006588335e-05, - "loss": 0.674, + "epoch": 0.2026308424293311, + "grad_norm": 0.31353933176985543, + "learning_rate": 9.991681385120515e-05, + "loss": 0.6249, "step": 724 }, { - "epoch": 0.10146955913226033, - "grad_norm": 0.44737735842488807, - "learning_rate": 4.933740059485075e-05, - "loss": 0.6446, + "epoch": 0.20291071928351526, + "grad_norm": 0.3068086339106254, + "learning_rate": 9.99159220310153e-05, + "loss": 0.6147, "step": 725 }, { - "epoch": 0.10160951714485654, - "grad_norm": 0.4856033404998713, - "learning_rate": 4.9334806122235376e-05, - "loss": 0.6557, + "epoch": 0.2031905961376994, + "grad_norm": 0.3074442656071545, + "learning_rate": 9.991502545980005e-05, + "loss": 0.6119, "step": 726 }, { - "epoch": 0.10174947515745277, - "grad_norm": 0.48253432462131046, - "learning_rate": 4.933220664857044e-05, - "loss": 0.6452, + "epoch": 0.20347047299188356, + "grad_norm": 0.28818327843923686, + "learning_rate": 9.991412413764474e-05, + "loss": 0.6103, "step": 727 }, { - "epoch": 0.10188943317004899, - "grad_norm": 0.5065000807014411, - "learning_rate": 4.9329602174390153e-05, - "loss": 0.6936, + "epoch": 0.20375034984606774, + "grad_norm": 0.2978307288549914, + "learning_rate": 9.991321806463516e-05, + "loss": 0.6073, "step": 728 }, { - "epoch": 0.1020293911826452, - "grad_norm": 0.46004243943931644, - "learning_rate": 4.932699270022976e-05, - "loss": 0.6666, + "epoch": 0.2040302267002519, + "grad_norm": 0.3018786599383204, + "learning_rate": 9.991230724085754e-05, + "loss": 0.6129, "step": 729 }, { - "epoch": 0.10216934919524143, - "grad_norm": 0.4729412035182845, - "learning_rate": 4.932437822662553e-05, - "loss": 0.619, + "epoch": 0.20431010355443605, + "grad_norm": 0.29415963739630785, + "learning_rate": 9.991139166639859e-05, + "loss": 0.6364, "step": 730 }, { - "epoch": 0.10230930720783765, - "grad_norm": 0.4829984181448435, - "learning_rate": 4.932175875411478e-05, - "loss": 0.6387, + "epoch": 0.2045899804086202, + "grad_norm": 0.2946680094302844, + "learning_rate": 9.991047134134541e-05, + "loss": 0.61, "step": 731 }, { - "epoch": 0.10244926522043386, - "grad_norm": 0.4826696857482811, - "learning_rate": 4.931913428323581e-05, - "loss": 0.6433, + "epoch": 0.20486985726280438, + "grad_norm": 0.3015152834518143, + "learning_rate": 9.990954626578567e-05, + "loss": 0.6138, "step": 732 }, { - "epoch": 0.10258922323303009, - "grad_norm": 0.45648188028698533, - "learning_rate": 4.931650481452801e-05, - "loss": 0.6035, + "epoch": 0.20514973411698853, + "grad_norm": 0.30186507932408085, + "learning_rate": 9.990861643980735e-05, + "loss": 0.6355, "step": 733 }, { - "epoch": 0.10272918124562631, - "grad_norm": 0.4691725557215748, - "learning_rate": 4.931387034853173e-05, - "loss": 0.6362, + "epoch": 0.20542961097117268, + "grad_norm": 0.31699518043113645, + "learning_rate": 9.990768186349902e-05, + "loss": 0.6152, "step": 734 }, { - "epoch": 0.10286913925822254, - "grad_norm": 0.49511735363849857, - "learning_rate": 4.93112308857884e-05, - "loss": 0.6128, + "epoch": 0.20570948782535683, + "grad_norm": 0.3063390693369908, + "learning_rate": 9.990674253694957e-05, + "loss": 0.648, "step": 735 }, { - "epoch": 0.10300909727081875, - "grad_norm": 0.5067311529916844, - "learning_rate": 4.9308586426840454e-05, - "loss": 0.6581, + "epoch": 0.20598936467954101, + "grad_norm": 0.29032196511694675, + "learning_rate": 9.990579846024844e-05, + "loss": 0.607, "step": 736 }, { - "epoch": 0.10314905528341498, - "grad_norm": 0.49461941413672095, - "learning_rate": 4.9305936972231346e-05, - "loss": 0.6545, + "epoch": 0.20626924153372517, + "grad_norm": 0.30391551611435835, + "learning_rate": 9.990484963348547e-05, + "loss": 0.6027, "step": 737 }, { - "epoch": 0.1032890132960112, - "grad_norm": 0.46083627671042743, - "learning_rate": 4.9303282522505565e-05, - "loss": 0.6384, + "epoch": 0.20654911838790932, + "grad_norm": 0.3021305427216307, + "learning_rate": 9.990389605675098e-05, + "loss": 0.6201, "step": 738 }, { - "epoch": 0.10342897130860741, - "grad_norm": 0.48817979810198836, - "learning_rate": 4.930062307820865e-05, - "loss": 0.6392, + "epoch": 0.20682899524209347, + "grad_norm": 0.3072286928000085, + "learning_rate": 9.990293773013575e-05, + "loss": 0.6285, "step": 739 }, { - "epoch": 0.10356892932120364, - "grad_norm": 0.4835520562427775, - "learning_rate": 4.9297958639887116e-05, - "loss": 0.6333, + "epoch": 0.20710887209627762, + "grad_norm": 0.29643099870704986, + "learning_rate": 9.990197465373095e-05, + "loss": 0.631, "step": 740 }, { - "epoch": 0.10370888733379986, - "grad_norm": 0.5167572191592301, - "learning_rate": 4.929528920808854e-05, - "loss": 0.635, + "epoch": 0.2073887489504618, + "grad_norm": 0.2892199981073291, + "learning_rate": 9.990100682762828e-05, + "loss": 0.6204, "step": 741 }, { - "epoch": 0.10384884534639609, - "grad_norm": 0.4595745343183344, - "learning_rate": 4.9292614783361536e-05, - "loss": 0.6376, + "epoch": 0.20766862580464596, + "grad_norm": 0.29927602855446483, + "learning_rate": 9.990003425191985e-05, + "loss": 0.6067, "step": 742 }, { - "epoch": 0.1039888033589923, - "grad_norm": 0.43859471943688627, - "learning_rate": 4.92899353662557e-05, - "loss": 0.6585, + "epoch": 0.2079485026588301, + "grad_norm": 0.3013840368630654, + "learning_rate": 9.989905692669823e-05, + "loss": 0.6478, "step": 743 }, { - "epoch": 0.10412876137158852, - "grad_norm": 0.4953753280512198, - "learning_rate": 4.928725095732169e-05, - "loss": 0.6396, + "epoch": 0.20822837951301426, + "grad_norm": 0.278203984020985, + "learning_rate": 9.989807485205645e-05, + "loss": 0.6369, "step": 744 }, { - "epoch": 0.10426871938418475, - "grad_norm": 0.4880410414717604, - "learning_rate": 4.928456155711117e-05, - "loss": 0.6403, + "epoch": 0.20850825636719844, + "grad_norm": 0.30116207295894537, + "learning_rate": 9.989708802808797e-05, + "loss": 0.6307, "step": 745 }, { - "epoch": 0.10440867739678096, - "grad_norm": 0.4737693275569935, - "learning_rate": 4.928186716617686e-05, - "loss": 0.6301, + "epoch": 0.2087881332213826, + "grad_norm": 0.3139662061574362, + "learning_rate": 9.989609645488671e-05, + "loss": 0.6237, "step": 746 }, { - "epoch": 0.10454863540937719, - "grad_norm": 0.5034731514446417, - "learning_rate": 4.927916778507248e-05, - "loss": 0.6729, + "epoch": 0.20906801007556675, + "grad_norm": 0.29459551907956566, + "learning_rate": 9.989510013254708e-05, + "loss": 0.6299, "step": 747 }, { - "epoch": 0.10468859342197341, - "grad_norm": 0.46500169965537136, - "learning_rate": 4.9276463414352757e-05, - "loss": 0.6296, + "epoch": 0.2093478869297509, + "grad_norm": 0.2877917475806658, + "learning_rate": 9.98940990611639e-05, + "loss": 0.6289, "step": 748 }, { - "epoch": 0.10482855143456962, - "grad_norm": 0.48081470356503164, - "learning_rate": 4.9273754054573496e-05, - "loss": 0.614, + "epoch": 0.20962776378393508, + "grad_norm": 0.3148595914832477, + "learning_rate": 9.989309324083241e-05, + "loss": 0.6381, "step": 749 }, { - "epoch": 0.10496850944716585, - "grad_norm": 0.4506016223155311, - "learning_rate": 4.927103970629148e-05, - "loss": 0.6313, + "epoch": 0.20990764063811923, + "grad_norm": 0.29523633254911474, + "learning_rate": 9.98920826716484e-05, + "loss": 0.6236, "step": 750 }, { - "epoch": 0.10510846745976207, - "grad_norm": 0.4806095102495363, - "learning_rate": 4.926832037006453e-05, - "loss": 0.6862, + "epoch": 0.21018751749230338, + "grad_norm": 0.2982350541251637, + "learning_rate": 9.989106735370803e-05, + "loss": 0.6046, "step": 751 }, { - "epoch": 0.1052484254723583, - "grad_norm": 0.5264224078411822, - "learning_rate": 4.926559604645152e-05, - "loss": 0.6387, + "epoch": 0.21046739434648754, + "grad_norm": 0.28857266833371326, + "learning_rate": 9.989004728710795e-05, + "loss": 0.6033, "step": 752 }, { - "epoch": 0.10538838348495451, - "grad_norm": 0.4993673425121446, - "learning_rate": 4.9262866736012304e-05, - "loss": 0.6761, + "epoch": 0.21074727120067172, + "grad_norm": 0.2994922713392514, + "learning_rate": 9.988902247194525e-05, + "loss": 0.6359, "step": 753 }, { - "epoch": 0.10552834149755073, - "grad_norm": 0.5130179673023382, - "learning_rate": 4.926013243930779e-05, - "loss": 0.6582, + "epoch": 0.21102714805485587, + "grad_norm": 0.29393640928691533, + "learning_rate": 9.988799290831746e-05, + "loss": 0.617, "step": 754 }, { - "epoch": 0.10566829951014696, - "grad_norm": 0.5161547254751858, - "learning_rate": 4.925739315689991e-05, - "loss": 0.65, + "epoch": 0.21130702490904002, + "grad_norm": 0.2786124339094459, + "learning_rate": 9.988695859632257e-05, + "loss": 0.6103, "step": 755 }, { - "epoch": 0.10580825752274317, - "grad_norm": 3.5932005324517964, - "learning_rate": 4.925464888935162e-05, - "loss": 0.6722, + "epoch": 0.21158690176322417, + "grad_norm": 0.26576693865200707, + "learning_rate": 9.988591953605906e-05, + "loss": 0.6107, "step": 756 }, { - "epoch": 0.1059482155353394, - "grad_norm": 0.47238634638015287, - "learning_rate": 4.925189963722687e-05, - "loss": 0.6734, + "epoch": 0.21186677861740835, + "grad_norm": 0.2846284299244608, + "learning_rate": 9.98848757276258e-05, + "loss": 0.6568, "step": 757 }, { - "epoch": 0.10608817354793562, - "grad_norm": 0.4958610236105621, - "learning_rate": 4.924914540109068e-05, - "loss": 0.705, + "epoch": 0.2121466554715925, + "grad_norm": 0.30853876057537716, + "learning_rate": 9.988382717112213e-05, + "loss": 0.5932, "step": 758 }, { - "epoch": 0.10622813156053185, - "grad_norm": 0.47904033540016167, - "learning_rate": 4.924638618150906e-05, - "loss": 0.6611, + "epoch": 0.21242653232577666, + "grad_norm": 0.2849933204804723, + "learning_rate": 9.988277386664788e-05, + "loss": 0.6012, "step": 759 }, { - "epoch": 0.10636808957312806, - "grad_norm": 0.4595734849503931, - "learning_rate": 4.924362197904908e-05, - "loss": 0.637, + "epoch": 0.2127064091799608, + "grad_norm": 0.2845166335515254, + "learning_rate": 9.98817158143033e-05, + "loss": 0.6323, "step": 760 }, { - "epoch": 0.10650804758572428, - "grad_norm": 0.4706049650368603, - "learning_rate": 4.924085279427879e-05, - "loss": 0.607, + "epoch": 0.212986286034145, + "grad_norm": 0.28949031191183927, + "learning_rate": 9.988065301418907e-05, + "loss": 0.6286, "step": 761 }, { - "epoch": 0.10664800559832051, - "grad_norm": 0.4829263701881818, - "learning_rate": 4.923807862776728e-05, - "loss": 0.6311, + "epoch": 0.21326616288832914, + "grad_norm": 0.2996863267327104, + "learning_rate": 9.987958546640639e-05, + "loss": 0.6261, "step": 762 }, { - "epoch": 0.10678796361091672, - "grad_norm": 0.4658603465917324, - "learning_rate": 4.92352994800847e-05, - "loss": 0.6575, + "epoch": 0.2135460397425133, + "grad_norm": 0.2895413025607892, + "learning_rate": 9.987851317105683e-05, + "loss": 0.6012, "step": 763 }, { - "epoch": 0.10692792162351294, - "grad_norm": 0.48975156023608535, - "learning_rate": 4.9232515351802166e-05, - "loss": 0.6754, + "epoch": 0.21382591659669745, + "grad_norm": 0.2787834235743242, + "learning_rate": 9.987743612824248e-05, + "loss": 0.6412, "step": 764 }, { - "epoch": 0.10706787963610917, - "grad_norm": 0.4574370958416068, - "learning_rate": 4.922972624349185e-05, - "loss": 0.6556, + "epoch": 0.2141057934508816, + "grad_norm": 0.285344899631201, + "learning_rate": 9.987635433806582e-05, + "loss": 0.6048, "step": 765 }, { - "epoch": 0.1072078376487054, - "grad_norm": 0.47320816538424315, - "learning_rate": 4.922693215572695e-05, - "loss": 0.6808, + "epoch": 0.21438567030506578, + "grad_norm": 0.29709825924990785, + "learning_rate": 9.987526780062986e-05, + "loss": 0.6306, "step": 766 }, { - "epoch": 0.1073477956613016, - "grad_norm": 0.4938979904546163, - "learning_rate": 4.9224133089081675e-05, - "loss": 0.6396, + "epoch": 0.21466554715924993, + "grad_norm": 0.2955171277517626, + "learning_rate": 9.987417651603799e-05, + "loss": 0.6299, "step": 767 }, { - "epoch": 0.10748775367389783, - "grad_norm": 0.4815436050811902, - "learning_rate": 4.922132904413126e-05, - "loss": 0.6885, + "epoch": 0.21494542401343408, + "grad_norm": 0.2801877576532585, + "learning_rate": 9.987308048439408e-05, + "loss": 0.598, "step": 768 }, { - "epoch": 0.10762771168649406, - "grad_norm": 0.4526917995255154, - "learning_rate": 4.921852002145196e-05, - "loss": 0.6613, + "epoch": 0.21522530086761824, + "grad_norm": 0.27618254097083866, + "learning_rate": 9.987197970580244e-05, + "loss": 0.6148, "step": 769 }, { - "epoch": 0.10776766969909027, - "grad_norm": 0.48451528868058824, - "learning_rate": 4.921570602162108e-05, - "loss": 0.6273, + "epoch": 0.21550517772180242, + "grad_norm": 0.2977612642300702, + "learning_rate": 9.987087418036788e-05, + "loss": 0.62, "step": 770 }, { - "epoch": 0.1079076277116865, - "grad_norm": 0.44910249964176424, - "learning_rate": 4.921288704521689e-05, - "loss": 0.6445, + "epoch": 0.21578505457598657, + "grad_norm": 0.28309302062153524, + "learning_rate": 9.986976390819559e-05, + "loss": 0.6046, "step": 771 }, { - "epoch": 0.10804758572428272, - "grad_norm": 0.4803448598027863, - "learning_rate": 4.9210063092818755e-05, - "loss": 0.6369, + "epoch": 0.21606493143017072, + "grad_norm": 0.2892016544151395, + "learning_rate": 9.986864888939126e-05, + "loss": 0.624, "step": 772 }, { - "epoch": 0.10818754373687893, - "grad_norm": 0.500963530964286, - "learning_rate": 4.9207234165007e-05, - "loss": 0.6572, + "epoch": 0.21634480828435487, + "grad_norm": 0.29115742297952063, + "learning_rate": 9.986752912406101e-05, + "loss": 0.6199, "step": 773 }, { - "epoch": 0.10832750174947516, - "grad_norm": 0.4929298522075444, - "learning_rate": 4.920440026236301e-05, - "loss": 0.6346, + "epoch": 0.21662468513853905, + "grad_norm": 0.3076197675401591, + "learning_rate": 9.986640461231144e-05, + "loss": 0.6066, "step": 774 }, { - "epoch": 0.10846745976207138, - "grad_norm": 0.45662320216713365, - "learning_rate": 4.920156138546917e-05, - "loss": 0.6593, + "epoch": 0.2169045619927232, + "grad_norm": 0.2987549026154578, + "learning_rate": 9.986527535424957e-05, + "loss": 0.6214, "step": 775 }, { - "epoch": 0.1086074177746676, - "grad_norm": 0.46973861985195237, - "learning_rate": 4.919871753490891e-05, - "loss": 0.6906, + "epoch": 0.21718443884690736, + "grad_norm": 0.30218128492263907, + "learning_rate": 9.986414134998286e-05, + "loss": 0.6294, "step": 776 }, { - "epoch": 0.10874737578726382, - "grad_norm": 0.47814740177046505, - "learning_rate": 4.919586871126667e-05, - "loss": 0.646, + "epoch": 0.2174643157010915, + "grad_norm": 0.2747490065906423, + "learning_rate": 9.986300259961927e-05, + "loss": 0.6338, "step": 777 }, { - "epoch": 0.10888733379986004, - "grad_norm": 0.5173386479220408, - "learning_rate": 4.91930149151279e-05, - "loss": 0.6629, + "epoch": 0.2177441925552757, + "grad_norm": 0.2819702498657955, + "learning_rate": 9.98618591032672e-05, + "loss": 0.5994, "step": 778 }, { - "epoch": 0.10902729181245627, - "grad_norm": 0.47384818148671254, - "learning_rate": 4.9190156147079094e-05, - "loss": 0.6458, + "epoch": 0.21802406940945984, + "grad_norm": 0.28472568974607454, + "learning_rate": 9.986071086103545e-05, + "loss": 0.6065, "step": 779 }, { - "epoch": 0.10916724982505248, - "grad_norm": 0.4998121952757577, - "learning_rate": 4.918729240770775e-05, - "loss": 0.6627, + "epoch": 0.218303946263644, + "grad_norm": 0.25512175714572793, + "learning_rate": 9.985955787303335e-05, + "loss": 0.6252, "step": 780 }, { - "epoch": 0.1093072078376487, - "grad_norm": 0.49480732704046404, - "learning_rate": 4.918442369760241e-05, - "loss": 0.6503, + "epoch": 0.21858382311782815, + "grad_norm": 0.2833178477240765, + "learning_rate": 9.985840013937062e-05, + "loss": 0.609, "step": 781 }, { - "epoch": 0.10944716585024493, - "grad_norm": 0.48089249137852297, - "learning_rate": 4.9181550017352615e-05, - "loss": 0.6334, + "epoch": 0.21886369997201233, + "grad_norm": 0.29001540439460133, + "learning_rate": 9.985723766015744e-05, + "loss": 0.6513, "step": 782 }, { - "epoch": 0.10958712386284115, - "grad_norm": 0.46605303630427286, - "learning_rate": 4.917867136754893e-05, - "loss": 0.598, + "epoch": 0.21914357682619648, + "grad_norm": 0.28256802894257993, + "learning_rate": 9.98560704355045e-05, + "loss": 0.6078, "step": 783 }, { - "epoch": 0.10972708187543737, - "grad_norm": 0.4766867229123813, - "learning_rate": 4.9175787748782955e-05, - "loss": 0.6478, + "epoch": 0.21942345368038063, + "grad_norm": 0.31307453923452017, + "learning_rate": 9.985489846552286e-05, + "loss": 0.633, "step": 784 }, { - "epoch": 0.10986703988803359, - "grad_norm": 0.47706432841684887, - "learning_rate": 4.9172899161647295e-05, - "loss": 0.6579, + "epoch": 0.21970333053456478, + "grad_norm": 0.28850734424795094, + "learning_rate": 9.985372175032408e-05, + "loss": 0.6214, "step": 785 }, { - "epoch": 0.11000699790062982, - "grad_norm": 0.47350025007351243, - "learning_rate": 4.9170005606735594e-05, - "loss": 0.622, + "epoch": 0.21998320738874896, + "grad_norm": 0.2791079028943534, + "learning_rate": 9.985254029002015e-05, + "loss": 0.6264, "step": 786 }, { - "epoch": 0.11014695591322603, - "grad_norm": 0.4737951373989515, - "learning_rate": 4.9167107084642496e-05, - "loss": 0.6186, + "epoch": 0.22026308424293312, + "grad_norm": 0.28860212120936385, + "learning_rate": 9.985135408472355e-05, + "loss": 0.6122, "step": 787 }, { - "epoch": 0.11028691392582225, - "grad_norm": 0.4973982793226407, - "learning_rate": 4.916420359596368e-05, - "loss": 0.6041, + "epoch": 0.22054296109711727, + "grad_norm": 0.27794402322042944, + "learning_rate": 9.985016313454715e-05, + "loss": 0.621, "step": 788 }, { - "epoch": 0.11042687193841848, - "grad_norm": 0.45566949261190215, - "learning_rate": 4.916129514129585e-05, - "loss": 0.6196, + "epoch": 0.22082283795130142, + "grad_norm": 0.26834532886232865, + "learning_rate": 9.984896743960432e-05, + "loss": 0.6267, "step": 789 }, { - "epoch": 0.11056682995101469, - "grad_norm": 0.47407807015226433, - "learning_rate": 4.915838172123671e-05, - "loss": 0.5968, + "epoch": 0.22110271480548557, + "grad_norm": 0.29474895680880275, + "learning_rate": 9.984776700000886e-05, + "loss": 0.6023, "step": 790 }, { - "epoch": 0.11070678796361091, - "grad_norm": 0.4508773406494618, - "learning_rate": 4.915546333638501e-05, - "loss": 0.6558, + "epoch": 0.22138259165966975, + "grad_norm": 0.29769900879551847, + "learning_rate": 9.984656181587506e-05, + "loss": 0.6205, "step": 791 }, { - "epoch": 0.11084674597620714, - "grad_norm": 0.47903719726456023, - "learning_rate": 4.915253998734051e-05, - "loss": 0.6281, + "epoch": 0.2216624685138539, + "grad_norm": 0.274009754957161, + "learning_rate": 9.984535188731759e-05, + "loss": 0.5994, "step": 792 }, { - "epoch": 0.11098670398880336, - "grad_norm": 0.4864004849243157, - "learning_rate": 4.914961167470396e-05, - "loss": 0.6632, + "epoch": 0.22194234536803806, + "grad_norm": 0.2988779618403234, + "learning_rate": 9.984413721445162e-05, + "loss": 0.6426, "step": 793 }, { - "epoch": 0.11112666200139958, - "grad_norm": 0.4828968005003782, - "learning_rate": 4.9146678399077196e-05, - "loss": 0.6641, + "epoch": 0.2222222222222222, + "grad_norm": 0.29594649319588723, + "learning_rate": 9.984291779739278e-05, + "loss": 0.615, "step": 794 }, { - "epoch": 0.1112666200139958, - "grad_norm": 0.5082783883297906, - "learning_rate": 4.9143740161063015e-05, - "loss": 0.6881, + "epoch": 0.2225020990764064, + "grad_norm": 0.30013434264676025, + "learning_rate": 9.984169363625715e-05, + "loss": 0.6371, "step": 795 }, { - "epoch": 0.11140657802659203, - "grad_norm": 0.4811155277843086, - "learning_rate": 4.914079696126526e-05, - "loss": 0.6542, + "epoch": 0.22278197593059054, + "grad_norm": 0.2879459082697263, + "learning_rate": 9.98404647311612e-05, + "loss": 0.6141, "step": 796 }, { - "epoch": 0.11154653603918824, - "grad_norm": 0.4697422705172665, - "learning_rate": 4.913784880028878e-05, - "loss": 0.6398, + "epoch": 0.2230618527847747, + "grad_norm": 0.2810009862527073, + "learning_rate": 9.983923108222191e-05, + "loss": 0.6065, "step": 797 }, { - "epoch": 0.11168649405178446, - "grad_norm": 0.48609960976538413, - "learning_rate": 4.9134895678739456e-05, - "loss": 0.6361, + "epoch": 0.22334172963895885, + "grad_norm": 0.29299520183344274, + "learning_rate": 9.983799268955672e-05, + "loss": 0.6099, "step": 798 }, { - "epoch": 0.11182645206438069, - "grad_norm": 0.4505917244706054, - "learning_rate": 4.9131937597224185e-05, - "loss": 0.6531, + "epoch": 0.22362160649314303, + "grad_norm": 0.2816039311381955, + "learning_rate": 9.983674955328349e-05, + "loss": 0.5978, "step": 799 }, { - "epoch": 0.11196641007697691, - "grad_norm": 0.4609463571323594, - "learning_rate": 4.912897455635089e-05, - "loss": 0.6091, + "epoch": 0.22390148334732718, + "grad_norm": 0.2809159135916206, + "learning_rate": 9.983550167352054e-05, + "loss": 0.6027, "step": 800 }, { - "epoch": 0.11210636808957312, - "grad_norm": 0.4633046052805601, - "learning_rate": 4.91260065567285e-05, - "loss": 0.6326, + "epoch": 0.22418136020151133, + "grad_norm": 0.288265729397608, + "learning_rate": 9.983424905038666e-05, + "loss": 0.6064, "step": 801 }, { - "epoch": 0.11224632610216935, - "grad_norm": 0.48176393499918024, - "learning_rate": 4.912303359896697e-05, - "loss": 0.6256, + "epoch": 0.22446123705569548, + "grad_norm": 0.2741336820446325, + "learning_rate": 9.983299168400105e-05, + "loss": 0.6153, "step": 802 }, { - "epoch": 0.11238628411476558, - "grad_norm": 0.44238158654462206, - "learning_rate": 4.912005568367727e-05, - "loss": 0.617, + "epoch": 0.22474111390987966, + "grad_norm": 0.29156434791130137, + "learning_rate": 9.983172957448339e-05, + "loss": 0.6191, "step": 803 }, { - "epoch": 0.11252624212736179, - "grad_norm": 0.46784130274686425, - "learning_rate": 4.91170728114714e-05, - "loss": 0.6194, + "epoch": 0.22502099076406382, + "grad_norm": 0.27991759137428984, + "learning_rate": 9.983046272195384e-05, + "loss": 0.6391, "step": 804 }, { - "epoch": 0.11266620013995801, - "grad_norm": 0.4813594149903505, - "learning_rate": 4.9114084982962356e-05, - "loss": 0.6809, + "epoch": 0.22530086761824797, + "grad_norm": 0.29192928869818263, + "learning_rate": 9.982919112653293e-05, + "loss": 0.6171, "step": 805 }, { - "epoch": 0.11280615815255424, - "grad_norm": 0.4746068016576439, - "learning_rate": 4.911109219876417e-05, - "loss": 0.6482, + "epoch": 0.22558074447243212, + "grad_norm": 0.2893461284592979, + "learning_rate": 9.982791478834173e-05, + "loss": 0.6075, "step": 806 }, { - "epoch": 0.11294611616515045, - "grad_norm": 0.4837726952509842, - "learning_rate": 4.9108094459491916e-05, - "loss": 0.593, + "epoch": 0.2258606213266163, + "grad_norm": 0.27061523394207493, + "learning_rate": 9.982663370750172e-05, + "loss": 0.6058, "step": 807 }, { - "epoch": 0.11308607417774667, - "grad_norm": 0.4939387206527619, - "learning_rate": 4.910509176576162e-05, - "loss": 0.6372, + "epoch": 0.22614049818080045, + "grad_norm": 0.29574318337062955, + "learning_rate": 9.982534788413482e-05, + "loss": 0.6146, "step": 808 }, { - "epoch": 0.1132260321903429, - "grad_norm": 0.49309033390988954, - "learning_rate": 4.910208411819039e-05, - "loss": 0.6212, + "epoch": 0.2264203750349846, + "grad_norm": 0.26800247836730356, + "learning_rate": 9.982405731836342e-05, + "loss": 0.6038, "step": 809 }, { - "epoch": 0.11336599020293912, - "grad_norm": 0.48931195891148005, - "learning_rate": 4.909907151739633e-05, - "loss": 0.6668, + "epoch": 0.22670025188916876, + "grad_norm": 0.2803212952525355, + "learning_rate": 9.982276201031034e-05, + "loss": 0.5969, "step": 810 }, { - "epoch": 0.11350594821553533, - "grad_norm": 0.47504035399989575, - "learning_rate": 4.909605396399856e-05, - "loss": 0.66, + "epoch": 0.22698012874335294, + "grad_norm": 0.2966399380284488, + "learning_rate": 9.98214619600989e-05, + "loss": 0.5894, "step": 811 }, { - "epoch": 0.11364590622813156, - "grad_norm": 0.475509126808091, - "learning_rate": 4.90930314586172e-05, - "loss": 0.6693, + "epoch": 0.2272600055975371, + "grad_norm": 0.2686952657009844, + "learning_rate": 9.982015716785282e-05, + "loss": 0.6235, "step": 812 }, { - "epoch": 0.11378586424072779, - "grad_norm": 0.46395502226780827, - "learning_rate": 4.909000400187341e-05, - "loss": 0.6028, + "epoch": 0.22753988245172124, + "grad_norm": 0.2939487483296079, + "learning_rate": 9.98188476336963e-05, + "loss": 0.6202, "step": 813 }, { - "epoch": 0.113925822253324, - "grad_norm": 0.48820830409318683, - "learning_rate": 4.908697159438937e-05, - "loss": 0.62, + "epoch": 0.2278197593059054, + "grad_norm": 0.2722218703101461, + "learning_rate": 9.981753335775398e-05, + "loss": 0.6021, "step": 814 }, { - "epoch": 0.11406578026592022, - "grad_norm": 0.4909040802882437, - "learning_rate": 4.908393423678829e-05, - "loss": 0.6261, + "epoch": 0.22809963616008955, + "grad_norm": 0.2839592936475026, + "learning_rate": 9.981621434015094e-05, + "loss": 0.6084, "step": 815 }, { - "epoch": 0.11420573827851645, - "grad_norm": 0.4853877606631391, - "learning_rate": 4.908089192969434e-05, - "loss": 0.6574, + "epoch": 0.22837951301427373, + "grad_norm": 0.28133850499751295, + "learning_rate": 9.981489058101275e-05, + "loss": 0.6152, "step": 816 }, { - "epoch": 0.11434569629111267, - "grad_norm": 0.4604083047219368, - "learning_rate": 4.907784467373277e-05, - "loss": 0.6091, + "epoch": 0.22865938986845788, + "grad_norm": 0.2867889951147302, + "learning_rate": 9.981356208046537e-05, + "loss": 0.6417, "step": 817 }, { - "epoch": 0.11448565430370888, - "grad_norm": 0.4533069892751884, - "learning_rate": 4.9074792469529815e-05, - "loss": 0.6021, + "epoch": 0.22893926672264203, + "grad_norm": 0.274845590590577, + "learning_rate": 9.98122288386353e-05, + "loss": 0.6004, "step": 818 }, { - "epoch": 0.11462561231630511, - "grad_norm": 0.4520978783038299, - "learning_rate": 4.907173531771273e-05, - "loss": 0.6427, + "epoch": 0.22921914357682618, + "grad_norm": 0.29619409262200774, + "learning_rate": 9.981089085564939e-05, + "loss": 0.602, "step": 819 }, { - "epoch": 0.11476557032890133, - "grad_norm": 0.45052658755884484, - "learning_rate": 4.9068673218909796e-05, - "loss": 0.6294, + "epoch": 0.22949902043101036, + "grad_norm": 0.28827763748484586, + "learning_rate": 9.9809548131635e-05, + "loss": 0.6079, "step": 820 }, { - "epoch": 0.11490552834149755, - "grad_norm": 0.4793207729603878, - "learning_rate": 4.90656061737503e-05, - "loss": 0.6575, + "epoch": 0.22977889728519452, + "grad_norm": 0.29019480587941326, + "learning_rate": 9.980820066671994e-05, + "loss": 0.6014, "step": 821 }, { - "epoch": 0.11504548635409377, - "grad_norm": 0.4713321633366388, - "learning_rate": 4.906253418286456e-05, - "loss": 0.6507, + "epoch": 0.23005877413937867, + "grad_norm": 0.27407811837477164, + "learning_rate": 9.980684846103247e-05, + "loss": 0.5921, "step": 822 }, { - "epoch": 0.11518544436669, - "grad_norm": 0.4933569292067938, - "learning_rate": 4.90594572468839e-05, - "loss": 0.6084, + "epoch": 0.23033865099356282, + "grad_norm": 0.28569684349982677, + "learning_rate": 9.980549151470129e-05, + "loss": 0.6188, "step": 823 }, { - "epoch": 0.11532540237928622, - "grad_norm": 0.4727488359524651, - "learning_rate": 4.9056375366440654e-05, - "loss": 0.6047, + "epoch": 0.230618527847747, + "grad_norm": 0.29290777334920826, + "learning_rate": 9.980412982785555e-05, + "loss": 0.6273, "step": 824 }, { - "epoch": 0.11546536039188243, - "grad_norm": 0.4790836053521045, - "learning_rate": 4.9053288542168185e-05, - "loss": 0.6496, + "epoch": 0.23089840470193115, + "grad_norm": 0.28439951018793513, + "learning_rate": 9.980276340062484e-05, + "loss": 0.6133, "step": 825 }, { - "epoch": 0.11560531840447866, - "grad_norm": 0.4832677203184707, - "learning_rate": 4.905019677470086e-05, - "loss": 0.6247, + "epoch": 0.2311782815561153, + "grad_norm": 0.2881461107716947, + "learning_rate": 9.980139223313925e-05, + "loss": 0.6233, "step": 826 }, { - "epoch": 0.11574527641707488, - "grad_norm": 0.48906934804664143, - "learning_rate": 4.9047100064674076e-05, - "loss": 0.6419, + "epoch": 0.23145815841029946, + "grad_norm": 0.29024146744075585, + "learning_rate": 9.980001632552927e-05, + "loss": 0.5906, "step": 827 }, { - "epoch": 0.1158852344296711, - "grad_norm": 0.44450056169083824, - "learning_rate": 4.904399841272423e-05, - "loss": 0.6284, + "epoch": 0.23173803526448364, + "grad_norm": 0.2789044611831552, + "learning_rate": 9.979863567792586e-05, + "loss": 0.6049, "step": 828 }, { - "epoch": 0.11602519244226732, - "grad_norm": 0.4736815158989831, - "learning_rate": 4.9040891819488766e-05, - "loss": 0.6254, + "epoch": 0.2320179121186678, + "grad_norm": 0.2882599052261539, + "learning_rate": 9.979725029046043e-05, + "loss": 0.6007, "step": 829 }, { - "epoch": 0.11616515045486354, - "grad_norm": 0.44945213217864644, - "learning_rate": 4.903778028560609e-05, - "loss": 0.6441, + "epoch": 0.23229778897285194, + "grad_norm": 0.27197359513956976, + "learning_rate": 9.979586016326485e-05, + "loss": 0.5905, "step": 830 }, { - "epoch": 0.11630510846745976, - "grad_norm": 0.5007544417322715, - "learning_rate": 4.903466381171568e-05, - "loss": 0.6898, + "epoch": 0.2325776658270361, + "grad_norm": 0.26372064319553057, + "learning_rate": 9.979446529647142e-05, + "loss": 0.6102, "step": 831 }, { - "epoch": 0.11644506648005598, - "grad_norm": 0.5281054716405869, - "learning_rate": 4.9031542398457974e-05, - "loss": 0.6338, + "epoch": 0.23285754268122028, + "grad_norm": 0.2848276573029936, + "learning_rate": 9.979306569021291e-05, + "loss": 0.6252, "step": 832 }, { - "epoch": 0.1165850244926522, - "grad_norm": 0.484478263124759, - "learning_rate": 4.902841604647448e-05, - "loss": 0.6164, + "epoch": 0.23313741953540443, + "grad_norm": 0.2974637344184027, + "learning_rate": 9.979166134462256e-05, + "loss": 0.6212, "step": 833 }, { - "epoch": 0.11672498250524843, - "grad_norm": 0.44994143276964366, - "learning_rate": 4.902528475640768e-05, - "loss": 0.622, + "epoch": 0.23341729638958858, + "grad_norm": 0.2768186580125523, + "learning_rate": 9.9790252259834e-05, + "loss": 0.6105, "step": 834 }, { - "epoch": 0.11686494051784464, - "grad_norm": 0.4865019036883684, - "learning_rate": 4.902214852890109e-05, - "loss": 0.6106, + "epoch": 0.23369717324377273, + "grad_norm": 0.2862169736585286, + "learning_rate": 9.978883843598134e-05, + "loss": 0.6201, "step": 835 }, { - "epoch": 0.11700489853044087, - "grad_norm": 0.4846126418090665, - "learning_rate": 4.9019007364599246e-05, - "loss": 0.6271, + "epoch": 0.2339770500979569, + "grad_norm": 0.2790379481170973, + "learning_rate": 9.97874198731992e-05, + "loss": 0.6285, "step": 836 }, { - "epoch": 0.11714485654303709, - "grad_norm": 0.48440494490884967, - "learning_rate": 4.901586126414768e-05, - "loss": 0.6221, + "epoch": 0.23425692695214106, + "grad_norm": 0.27596925432949154, + "learning_rate": 9.978599657162255e-05, + "loss": 0.6091, "step": 837 }, { - "epoch": 0.1172848145556333, - "grad_norm": 0.5014410717293758, - "learning_rate": 4.9012710228192946e-05, - "loss": 0.6326, + "epoch": 0.23453680380632522, + "grad_norm": 0.27854083765069304, + "learning_rate": 9.978456853138689e-05, + "loss": 0.5935, "step": 838 }, { - "epoch": 0.11742477256822953, - "grad_norm": 0.463246332033912, - "learning_rate": 4.9009554257382616e-05, - "loss": 0.5973, + "epoch": 0.23481668066050937, + "grad_norm": 0.2788150865363137, + "learning_rate": 9.978313575262814e-05, + "loss": 0.6339, "step": 839 }, { - "epoch": 0.11756473058082575, - "grad_norm": 0.5021414470675243, - "learning_rate": 4.900639335236527e-05, - "loss": 0.6163, + "epoch": 0.23509655751469352, + "grad_norm": 0.2986905483488892, + "learning_rate": 9.978169823548264e-05, + "loss": 0.6049, "step": 840 }, { - "epoch": 0.11770468859342198, - "grad_norm": 0.4751809205646141, - "learning_rate": 4.900322751379052e-05, - "loss": 0.5995, + "epoch": 0.2353764343688777, + "grad_norm": 0.26777121733565923, + "learning_rate": 9.978025598008725e-05, + "loss": 0.5833, "step": 841 }, { - "epoch": 0.11784464660601819, - "grad_norm": 0.495594879328081, - "learning_rate": 4.900005674230896e-05, - "loss": 0.6666, + "epoch": 0.23565631122306185, + "grad_norm": 0.26613159796298175, + "learning_rate": 9.977880898657923e-05, + "loss": 0.5819, "step": 842 }, { - "epoch": 0.11798460461861442, - "grad_norm": 0.46838396983160246, - "learning_rate": 4.899688103857223e-05, - "loss": 0.6391, + "epoch": 0.235936188077246, + "grad_norm": 0.27348517758743046, + "learning_rate": 9.977735725509632e-05, + "loss": 0.6056, "step": 843 }, { - "epoch": 0.11812456263121064, - "grad_norm": 0.48479222227164487, - "learning_rate": 4.899370040323295e-05, - "loss": 0.6425, + "epoch": 0.23621606493143016, + "grad_norm": 0.2675358231011222, + "learning_rate": 9.977590078577667e-05, + "loss": 0.6196, "step": 844 }, { - "epoch": 0.11826452064380685, - "grad_norm": 0.4758054425265797, - "learning_rate": 4.899051483694481e-05, - "loss": 0.6722, + "epoch": 0.23649594178561434, + "grad_norm": 0.3033349991064293, + "learning_rate": 9.977443957875894e-05, + "loss": 0.627, "step": 845 }, { - "epoch": 0.11840447865640308, - "grad_norm": 0.4517669680918226, - "learning_rate": 4.898732434036244e-05, - "loss": 0.6417, + "epoch": 0.2367758186397985, + "grad_norm": 0.28645441659428106, + "learning_rate": 9.977297363418217e-05, + "loss": 0.6184, "step": 846 }, { - "epoch": 0.1185444366689993, - "grad_norm": 0.4637034408072003, - "learning_rate": 4.898412891414153e-05, - "loss": 0.6472, + "epoch": 0.23705569549398264, + "grad_norm": 0.27961588977671503, + "learning_rate": 9.977150295218591e-05, + "loss": 0.6278, "step": 847 }, { - "epoch": 0.11868439468159551, - "grad_norm": 0.4483579373594681, - "learning_rate": 4.8980928558938774e-05, - "loss": 0.6631, + "epoch": 0.2373355723481668, + "grad_norm": 0.27714345397000484, + "learning_rate": 9.977002753291016e-05, + "loss": 0.6039, "step": 848 }, { - "epoch": 0.11882435269419174, - "grad_norm": 0.4607581614793959, - "learning_rate": 4.897772327541188e-05, - "loss": 0.6564, + "epoch": 0.23761544920235098, + "grad_norm": 0.28921622716812045, + "learning_rate": 9.976854737649532e-05, + "loss": 0.6138, "step": 849 }, { - "epoch": 0.11896431070678797, - "grad_norm": 0.4627218264459414, - "learning_rate": 4.8974513064219564e-05, - "loss": 0.6421, + "epoch": 0.23789532605653513, + "grad_norm": 0.28740298178395807, + "learning_rate": 9.97670624830823e-05, + "loss": 0.6326, "step": 850 }, { - "epoch": 0.11910426871938419, - "grad_norm": 0.4381385195746151, - "learning_rate": 4.897129792602156e-05, - "loss": 0.6443, + "epoch": 0.23817520291071928, + "grad_norm": 0.3028247639351463, + "learning_rate": 9.976557285281239e-05, + "loss": 0.6345, "step": 851 }, { - "epoch": 0.1192442267319804, - "grad_norm": 0.44805468338464166, - "learning_rate": 4.8968077861478606e-05, - "loss": 0.6289, + "epoch": 0.23845507976490343, + "grad_norm": 0.29368597273038405, + "learning_rate": 9.976407848582742e-05, + "loss": 0.6202, "step": 852 }, { - "epoch": 0.11938418474457663, - "grad_norm": 0.4419209751144886, - "learning_rate": 4.896485287125246e-05, - "loss": 0.6148, + "epoch": 0.2387349566190876, + "grad_norm": 0.2960219096193092, + "learning_rate": 9.97625793822696e-05, + "loss": 0.5859, "step": 853 }, { - "epoch": 0.11952414275717285, - "grad_norm": 0.46951206043414845, - "learning_rate": 4.896162295600589e-05, - "loss": 0.6552, + "epoch": 0.23901483347327176, + "grad_norm": 0.28193812478404506, + "learning_rate": 9.976107554228163e-05, + "loss": 0.5885, "step": 854 }, { - "epoch": 0.11966410076976906, - "grad_norm": 0.6796790512869556, - "learning_rate": 4.8958388116402685e-05, - "loss": 0.6547, + "epoch": 0.23929471032745592, + "grad_norm": 0.2603374063558071, + "learning_rate": 9.975956696600662e-05, + "loss": 0.5936, "step": 855 }, { - "epoch": 0.11980405878236529, - "grad_norm": 0.4900439088338446, - "learning_rate": 4.8955148353107625e-05, - "loss": 0.6618, + "epoch": 0.23957458718164007, + "grad_norm": 0.2724833017435636, + "learning_rate": 9.975805365358818e-05, + "loss": 0.6019, "step": 856 }, { - "epoch": 0.11994401679496151, - "grad_norm": 0.5670961555372047, - "learning_rate": 4.8951903666786514e-05, - "loss": 0.6234, + "epoch": 0.23985446403582425, + "grad_norm": 0.28411268929988087, + "learning_rate": 9.975653560517035e-05, + "loss": 0.6093, "step": 857 }, { - "epoch": 0.12008397480755774, - "grad_norm": 0.4709993547132971, - "learning_rate": 4.894865405810618e-05, - "loss": 0.6528, + "epoch": 0.2401343408900084, + "grad_norm": 0.27073318243611616, + "learning_rate": 9.97550128208976e-05, + "loss": 0.618, "step": 858 }, { - "epoch": 0.12022393282015395, - "grad_norm": 0.5029700711110836, - "learning_rate": 4.8945399527734436e-05, - "loss": 0.6508, + "epoch": 0.24041421774419255, + "grad_norm": 0.27563149813291027, + "learning_rate": 9.975348530091488e-05, + "loss": 0.5827, "step": 859 }, { - "epoch": 0.12036389083275018, - "grad_norm": 0.471348795948391, - "learning_rate": 4.8942140076340135e-05, - "loss": 0.6303, + "epoch": 0.2406940945983767, + "grad_norm": 0.2730665314327667, + "learning_rate": 9.97519530453676e-05, + "loss": 0.5936, "step": 860 }, { - "epoch": 0.1205038488453464, - "grad_norm": 0.47522173933299294, - "learning_rate": 4.893887570459312e-05, - "loss": 0.6503, + "epoch": 0.2409739714525609, + "grad_norm": 0.2891190589294283, + "learning_rate": 9.975041605440157e-05, + "loss": 0.5869, "step": 861 }, { - "epoch": 0.12064380685794261, - "grad_norm": 0.44742680670952745, - "learning_rate": 4.893560641316425e-05, - "loss": 0.6306, + "epoch": 0.24125384830674504, + "grad_norm": 0.2784758939476977, + "learning_rate": 9.974887432816309e-05, + "loss": 0.5875, "step": 862 }, { - "epoch": 0.12078376487053884, - "grad_norm": 0.46472651593001396, - "learning_rate": 4.89323322027254e-05, - "loss": 0.6322, + "epoch": 0.2415337251609292, + "grad_norm": 0.2738695742355149, + "learning_rate": 9.974732786679892e-05, + "loss": 0.569, "step": 863 }, { - "epoch": 0.12092372288313506, - "grad_norm": 0.4573234609819833, - "learning_rate": 4.8929053073949456e-05, - "loss": 0.6014, + "epoch": 0.24181360201511334, + "grad_norm": 0.3014195796535495, + "learning_rate": 9.974577667045622e-05, + "loss": 0.617, "step": 864 }, { - "epoch": 0.12106368089573127, - "grad_norm": 0.5281892171792254, - "learning_rate": 4.892576902751031e-05, - "loss": 0.666, + "epoch": 0.2420934788692975, + "grad_norm": 0.28435696969419094, + "learning_rate": 9.974422073928266e-05, + "loss": 0.6171, "step": 865 }, { - "epoch": 0.1212036389083275, - "grad_norm": 0.46580727241797515, - "learning_rate": 4.8922480064082864e-05, - "loss": 0.634, + "epoch": 0.24237335572348168, + "grad_norm": 0.27668635302453437, + "learning_rate": 9.974266007342632e-05, + "loss": 0.6019, "step": 866 }, { - "epoch": 0.12134359692092372, - "grad_norm": 0.46889330922237543, - "learning_rate": 4.8919186184343046e-05, - "loss": 0.6171, + "epoch": 0.24265323257766583, + "grad_norm": 0.29211185935471046, + "learning_rate": 9.974109467303576e-05, + "loss": 0.649, "step": 867 }, { - "epoch": 0.12148355493351995, - "grad_norm": 0.4593728936842765, - "learning_rate": 4.891588738896776e-05, - "loss": 0.569, + "epoch": 0.24293310943184998, + "grad_norm": 0.3056363778877445, + "learning_rate": 9.973952453825997e-05, + "loss": 0.6185, "step": 868 }, { - "epoch": 0.12162351294611616, - "grad_norm": 0.4554124468260883, - "learning_rate": 4.891258367863497e-05, - "loss": 0.6426, + "epoch": 0.24321298628603413, + "grad_norm": 0.27136141444147716, + "learning_rate": 9.973794966924839e-05, + "loss": 0.6409, "step": 869 }, { - "epoch": 0.12176347095871239, - "grad_norm": 0.4613451131787414, - "learning_rate": 4.890927505402359e-05, - "loss": 0.6738, + "epoch": 0.2434928631402183, + "grad_norm": 0.27814182446754976, + "learning_rate": 9.973637006615094e-05, + "loss": 0.5939, "step": 870 }, { - "epoch": 0.12190342897130861, - "grad_norm": 0.4775869179312642, - "learning_rate": 4.8905961515813604e-05, - "loss": 0.6403, + "epoch": 0.24377273999440247, + "grad_norm": 0.27448637013656907, + "learning_rate": 9.973478572911792e-05, + "loss": 0.6398, "step": 871 }, { - "epoch": 0.12204338698390482, - "grad_norm": 0.4836931023229791, - "learning_rate": 4.890264306468596e-05, - "loss": 0.6472, + "epoch": 0.24405261684858662, + "grad_norm": 0.27623380550060017, + "learning_rate": 9.973319665830016e-05, + "loss": 0.6351, "step": 872 }, { - "epoch": 0.12218334499650105, - "grad_norm": 0.4661703058075851, - "learning_rate": 4.8899319701322646e-05, - "loss": 0.6656, + "epoch": 0.24433249370277077, + "grad_norm": 0.2726881086669371, + "learning_rate": 9.973160285384891e-05, + "loss": 0.5702, "step": 873 }, { - "epoch": 0.12232330300909727, - "grad_norm": 0.46602771617049493, - "learning_rate": 4.889599142640663e-05, - "loss": 0.6471, + "epoch": 0.24461237055695495, + "grad_norm": 0.27001277974661575, + "learning_rate": 9.973000431591587e-05, + "loss": 0.6309, "step": 874 }, { - "epoch": 0.1224632610216935, - "grad_norm": 0.46986425878775284, - "learning_rate": 4.889265824062193e-05, - "loss": 0.6538, + "epoch": 0.2448922474111391, + "grad_norm": 0.285476081481007, + "learning_rate": 9.972840104465318e-05, + "loss": 0.5909, "step": 875 }, { - "epoch": 0.12260321903428971, - "grad_norm": 0.4350487912771349, - "learning_rate": 4.888932014465352e-05, - "loss": 0.5965, + "epoch": 0.24517212426532325, + "grad_norm": 0.29280323111517875, + "learning_rate": 9.972679304021344e-05, + "loss": 0.6203, "step": 876 }, { - "epoch": 0.12274317704688593, - "grad_norm": 0.45434867025136694, - "learning_rate": 4.888597713918743e-05, - "loss": 0.6091, + "epoch": 0.2454520011195074, + "grad_norm": 0.29271860848920134, + "learning_rate": 9.972518030274971e-05, + "loss": 0.6014, "step": 877 }, { - "epoch": 0.12288313505948216, - "grad_norm": 0.45288960332540396, - "learning_rate": 4.888262922491069e-05, - "loss": 0.6633, + "epoch": 0.2457318779736916, + "grad_norm": 0.2901211225693263, + "learning_rate": 9.97235628324155e-05, + "loss": 0.6282, "step": 878 }, { - "epoch": 0.12302309307207837, - "grad_norm": 0.46432887825126745, - "learning_rate": 4.887927640251132e-05, - "loss": 0.6597, + "epoch": 0.24601175482787574, + "grad_norm": 0.28248728430486114, + "learning_rate": 9.972194062936473e-05, + "loss": 0.6157, "step": 879 }, { - "epoch": 0.1231630510846746, - "grad_norm": 0.45520528507797114, - "learning_rate": 4.887591867267836e-05, - "loss": 0.6403, + "epoch": 0.2462916316820599, + "grad_norm": 0.2874781275011946, + "learning_rate": 9.972031369375182e-05, + "loss": 0.5917, "step": 880 }, { - "epoch": 0.12330300909727082, - "grad_norm": 0.4575189583703599, - "learning_rate": 4.887255603610185e-05, - "loss": 0.6531, + "epoch": 0.24657150853624404, + "grad_norm": 0.2911616785918501, + "learning_rate": 9.971868202573162e-05, + "loss": 0.6075, "step": 881 }, { - "epoch": 0.12344296710986705, - "grad_norm": 0.46480538006139166, - "learning_rate": 4.8869188493472854e-05, - "loss": 0.5969, + "epoch": 0.24685138539042822, + "grad_norm": 0.279464811855995, + "learning_rate": 9.971704562545945e-05, + "loss": 0.6248, "step": 882 }, { - "epoch": 0.12358292512246326, - "grad_norm": 0.46565682699578237, - "learning_rate": 4.886581604548344e-05, - "loss": 0.6334, + "epoch": 0.24713126224461238, + "grad_norm": 0.29786012151021235, + "learning_rate": 9.971540449309104e-05, + "loss": 0.6231, "step": 883 }, { - "epoch": 0.12372288313505948, - "grad_norm": 0.4615157170447177, - "learning_rate": 4.8862438692826675e-05, - "loss": 0.6317, + "epoch": 0.24741113909879653, + "grad_norm": 0.28285506113295183, + "learning_rate": 9.971375862878262e-05, + "loss": 0.6162, "step": 884 }, { - "epoch": 0.12386284114765571, - "grad_norm": 0.45719602979674634, - "learning_rate": 4.885905643619664e-05, - "loss": 0.6615, + "epoch": 0.24769101595298068, + "grad_norm": 0.2850500642631151, + "learning_rate": 9.971210803269081e-05, + "loss": 0.5769, "step": 885 }, { - "epoch": 0.12400279916025192, - "grad_norm": 0.4666726722691908, - "learning_rate": 4.885566927628842e-05, - "loss": 0.6192, + "epoch": 0.24797089280716486, + "grad_norm": 0.27139297175942056, + "learning_rate": 9.971045270497273e-05, + "loss": 0.5813, "step": 886 }, { - "epoch": 0.12414275717284814, - "grad_norm": 0.49622217670592067, - "learning_rate": 4.8852277213798106e-05, - "loss": 0.6263, + "epoch": 0.248250769661349, + "grad_norm": 0.26038124190330875, + "learning_rate": 9.970879264578595e-05, + "loss": 0.6071, "step": 887 }, { - "epoch": 0.12428271518544437, - "grad_norm": 0.47891535770456606, - "learning_rate": 4.8848880249422815e-05, - "loss": 0.6435, + "epoch": 0.24853064651553317, + "grad_norm": 0.2778961647917277, + "learning_rate": 9.970712785528846e-05, + "loss": 0.5925, "step": 888 }, { - "epoch": 0.12442267319804058, - "grad_norm": 0.478301172680431, - "learning_rate": 4.884547838386065e-05, - "loss": 0.671, + "epoch": 0.24881052336971732, + "grad_norm": 0.28236769242362414, + "learning_rate": 9.970545833363871e-05, + "loss": 0.6289, "step": 889 }, { - "epoch": 0.1245626312106368, - "grad_norm": 0.44791777443248404, - "learning_rate": 4.884207161781074e-05, - "loss": 0.5783, + "epoch": 0.24909040022390147, + "grad_norm": 0.2939545197445072, + "learning_rate": 9.970378408099562e-05, + "loss": 0.6006, "step": 890 }, { - "epoch": 0.12470258922323303, - "grad_norm": 0.4476426364893327, - "learning_rate": 4.883865995197319e-05, - "loss": 0.6164, + "epoch": 0.24937027707808565, + "grad_norm": 0.27056136323662533, + "learning_rate": 9.970210509751854e-05, + "loss": 0.6116, "step": 891 }, { - "epoch": 0.12484254723582926, - "grad_norm": 0.47381113823924553, - "learning_rate": 4.8835243387049144e-05, - "loss": 0.6427, + "epoch": 0.2496501539322698, + "grad_norm": 0.2647281072320447, + "learning_rate": 9.970042138336728e-05, + "loss": 0.6102, "step": 892 }, { - "epoch": 0.12498250524842547, - "grad_norm": 0.47937356676709797, - "learning_rate": 4.8831821923740745e-05, - "loss": 0.6783, + "epoch": 0.24993003078645395, + "grad_norm": 0.2761621321826647, + "learning_rate": 9.969873293870209e-05, + "loss": 0.5794, "step": 893 }, { - "epoch": 0.1251224632610217, - "grad_norm": 0.4634097903429328, - "learning_rate": 4.882839556275113e-05, - "loss": 0.6131, + "epoch": 0.2502099076406381, + "grad_norm": 0.28508553823962185, + "learning_rate": 9.969703976368368e-05, + "loss": 0.6215, "step": 894 }, { - "epoch": 0.12526242127361792, - "grad_norm": 0.4867484756619001, - "learning_rate": 4.8824964304784446e-05, - "loss": 0.649, + "epoch": 0.2504897844948223, + "grad_norm": 0.2924319229186995, + "learning_rate": 9.969534185847322e-05, + "loss": 0.6241, "step": 895 }, { - "epoch": 0.12540237928621414, - "grad_norm": 0.450881363852382, - "learning_rate": 4.882152815054587e-05, - "loss": 0.6561, + "epoch": 0.2507696613490064, + "grad_norm": 0.26778201393431406, + "learning_rate": 9.96936392232323e-05, + "loss": 0.624, "step": 896 }, { - "epoch": 0.12554233729881037, - "grad_norm": 0.46114414970423057, - "learning_rate": 4.881808710074155e-05, - "loss": 0.6104, + "epoch": 0.2510495382031906, + "grad_norm": 0.29427879854368805, + "learning_rate": 9.969193185812298e-05, + "loss": 0.6199, "step": 897 }, { - "epoch": 0.12568229531140657, - "grad_norm": 0.45063006067640693, - "learning_rate": 4.881464115607865e-05, - "loss": 0.6445, + "epoch": 0.25132941505737477, + "grad_norm": 0.2948563585971674, + "learning_rate": 9.969021976330777e-05, + "loss": 0.5879, "step": 898 }, { - "epoch": 0.1258222533240028, - "grad_norm": 0.47221223029390763, - "learning_rate": 4.8811190317265376e-05, - "loss": 0.6246, + "epoch": 0.2516092919115589, + "grad_norm": 0.28148667221036944, + "learning_rate": 9.968850293894964e-05, + "loss": 0.5579, "step": 899 }, { - "epoch": 0.12596221133659902, - "grad_norm": 0.4781011312710143, - "learning_rate": 4.880773458501089e-05, - "loss": 0.6222, + "epoch": 0.2518891687657431, + "grad_norm": 0.25900686462303496, + "learning_rate": 9.968678138521198e-05, + "loss": 0.6123, "step": 900 }, { - "epoch": 0.12610216934919524, - "grad_norm": 0.45326509799123327, - "learning_rate": 4.8804273960025376e-05, - "loss": 0.651, + "epoch": 0.25216904561992726, + "grad_norm": 0.2626827498089003, + "learning_rate": 9.968505510225866e-05, + "loss": 0.5784, "step": 901 }, { - "epoch": 0.12624212736179147, - "grad_norm": 0.482160143883641, - "learning_rate": 4.880080844302004e-05, - "loss": 0.6286, + "epoch": 0.2524489224741114, + "grad_norm": 0.28274277452405416, + "learning_rate": 9.968332409025398e-05, + "loss": 0.595, "step": 902 }, { - "epoch": 0.1263820853743877, - "grad_norm": 0.4630226354527779, - "learning_rate": 4.879733803470707e-05, - "loss": 0.636, + "epoch": 0.25272879932829556, + "grad_norm": 0.271630482547548, + "learning_rate": 9.968158834936272e-05, + "loss": 0.6056, "step": 903 }, { - "epoch": 0.12652204338698392, - "grad_norm": 0.4731355871687041, - "learning_rate": 4.8793862735799676e-05, - "loss": 0.6418, + "epoch": 0.2530086761824797, + "grad_norm": 0.3051229110087096, + "learning_rate": 9.967984787975007e-05, + "loss": 0.6362, "step": 904 }, { - "epoch": 0.12666200139958012, - "grad_norm": 0.467195164605955, - "learning_rate": 4.879038254701207e-05, - "loss": 0.6306, + "epoch": 0.25328855303666387, + "grad_norm": 0.28806620249122716, + "learning_rate": 9.967810268158167e-05, + "loss": 0.5967, "step": 905 }, { - "epoch": 0.12680195941217634, - "grad_norm": 0.441351930375645, - "learning_rate": 4.878689746905946e-05, - "loss": 0.5974, + "epoch": 0.25356842989084805, + "grad_norm": 0.27491245903789907, + "learning_rate": 9.967635275502368e-05, + "loss": 0.6202, "step": 906 }, { - "epoch": 0.12694191742477257, - "grad_norm": 0.4714053662846153, - "learning_rate": 4.878340750265807e-05, - "loss": 0.6506, + "epoch": 0.25384830674503217, + "grad_norm": 0.28456405223038556, + "learning_rate": 9.96745981002426e-05, + "loss": 0.6083, "step": 907 }, { - "epoch": 0.1270818754373688, - "grad_norm": 0.4459074638142588, - "learning_rate": 4.877991264852512e-05, - "loss": 0.6474, + "epoch": 0.25412818359921635, + "grad_norm": 0.26460147110990734, + "learning_rate": 9.967283871740548e-05, + "loss": 0.5917, "step": 908 }, { - "epoch": 0.12722183344996502, - "grad_norm": 0.469617227274652, - "learning_rate": 4.877641290737884e-05, - "loss": 0.6656, + "epoch": 0.25440806045340053, + "grad_norm": 0.26918516291180145, + "learning_rate": 9.967107460667978e-05, + "loss": 0.5497, "step": 909 }, { - "epoch": 0.12736179146256124, - "grad_norm": 0.4557573307622375, - "learning_rate": 4.8772908279938464e-05, - "loss": 0.6908, + "epoch": 0.25468793730758466, + "grad_norm": 0.2840242578443607, + "learning_rate": 9.966930576823338e-05, + "loss": 0.6493, "step": 910 }, { - "epoch": 0.12750174947515747, - "grad_norm": 0.46053886117091863, - "learning_rate": 4.8769398766924226e-05, - "loss": 0.6619, + "epoch": 0.25496781416176884, + "grad_norm": 0.283115742278163, + "learning_rate": 9.966753220223465e-05, + "loss": 0.6049, "step": 911 }, { - "epoch": 0.12764170748775366, - "grad_norm": 0.5409321614433314, - "learning_rate": 4.876588436905736e-05, - "loss": 0.6306, + "epoch": 0.25524769101595296, + "grad_norm": 0.2714744996140369, + "learning_rate": 9.966575390885244e-05, + "loss": 0.5958, "step": 912 }, { - "epoch": 0.1277816655003499, - "grad_norm": 0.4525734524473738, - "learning_rate": 4.8762365087060117e-05, - "loss": 0.6163, + "epoch": 0.25552756787013714, + "grad_norm": 0.26846164422872715, + "learning_rate": 9.966397088825594e-05, + "loss": 0.6164, "step": 913 }, { - "epoch": 0.12792162351294611, - "grad_norm": 0.44910287840912927, - "learning_rate": 4.8758840921655744e-05, - "loss": 0.5945, + "epoch": 0.2558074447243213, + "grad_norm": 0.27508859334968633, + "learning_rate": 9.96621831406149e-05, + "loss": 0.5992, "step": 914 }, { - "epoch": 0.12806158152554234, - "grad_norm": 0.4451813661618601, - "learning_rate": 4.8755311873568505e-05, - "loss": 0.613, + "epoch": 0.25608732157850544, + "grad_norm": 0.27333482422132627, + "learning_rate": 9.966039066609949e-05, + "loss": 0.601, "step": 915 }, { - "epoch": 0.12820153953813856, - "grad_norm": 0.46559156418594866, - "learning_rate": 4.8751777943523634e-05, - "loss": 0.6414, + "epoch": 0.2563671984326896, + "grad_norm": 0.29591881112266905, + "learning_rate": 9.96585934648803e-05, + "loss": 0.6307, "step": 916 }, { - "epoch": 0.1283414975507348, - "grad_norm": 0.4947913830806307, - "learning_rate": 4.874823913224741e-05, - "loss": 0.6461, + "epoch": 0.25664707528687375, + "grad_norm": 0.2599796159340984, + "learning_rate": 9.965679153712836e-05, + "loss": 0.6164, "step": 917 }, { - "epoch": 0.128481455563331, - "grad_norm": 0.4786066687436742, - "learning_rate": 4.874469544046707e-05, - "loss": 0.6474, + "epoch": 0.25692695214105793, + "grad_norm": 0.26053285881351435, + "learning_rate": 9.965498488301522e-05, + "loss": 0.5906, "step": 918 }, { - "epoch": 0.1286214135759272, - "grad_norm": 0.4603973225093796, - "learning_rate": 4.8741146868910906e-05, - "loss": 0.66, + "epoch": 0.2572068289952421, + "grad_norm": 0.2759319240086795, + "learning_rate": 9.965317350271284e-05, + "loss": 0.5892, "step": 919 }, { - "epoch": 0.12876137158852344, - "grad_norm": 0.48095107605482856, - "learning_rate": 4.8737593418308156e-05, - "loss": 0.6534, + "epoch": 0.25748670584942623, + "grad_norm": 0.27698261367548277, + "learning_rate": 9.965135739639359e-05, + "loss": 0.5982, "step": 920 }, { - "epoch": 0.12890132960111966, - "grad_norm": 0.4623235801737886, - "learning_rate": 4.8734035089389115e-05, - "loss": 0.628, + "epoch": 0.2577665827036104, + "grad_norm": 0.3357178697038624, + "learning_rate": 9.964953656423038e-05, + "loss": 0.6202, "step": 921 }, { - "epoch": 0.1290412876137159, - "grad_norm": 0.4930237562486678, - "learning_rate": 4.873047188288505e-05, - "loss": 0.6409, + "epoch": 0.2580464595577946, + "grad_norm": 0.28472242803358416, + "learning_rate": 9.964771100639646e-05, + "loss": 0.5902, "step": 922 }, { - "epoch": 0.1291812456263121, - "grad_norm": 0.46495991811324616, - "learning_rate": 4.8726903799528234e-05, - "loss": 0.6293, + "epoch": 0.2583263364119787, + "grad_norm": 0.2918879034888202, + "learning_rate": 9.964588072306563e-05, + "loss": 0.6003, "step": 923 }, { - "epoch": 0.12932120363890834, - "grad_norm": 0.46304983740233435, - "learning_rate": 4.872333084005194e-05, - "loss": 0.6472, + "epoch": 0.2586062132661629, + "grad_norm": 0.2595496788501369, + "learning_rate": 9.964404571441208e-05, + "loss": 0.6095, "step": 924 }, { - "epoch": 0.12946116165150454, - "grad_norm": 0.44884446203631584, - "learning_rate": 4.871975300519045e-05, - "loss": 0.6371, + "epoch": 0.258886090120347, + "grad_norm": 0.28799642407440884, + "learning_rate": 9.964220598061049e-05, + "loss": 0.5924, "step": 925 }, { - "epoch": 0.12960111966410076, - "grad_norm": 0.4675721819496966, - "learning_rate": 4.8716170295679053e-05, - "loss": 0.6653, + "epoch": 0.2591659669745312, + "grad_norm": 0.28278716744834675, + "learning_rate": 9.964036152183593e-05, + "loss": 0.589, "step": 926 }, { - "epoch": 0.129741077676697, - "grad_norm": 0.47573888293206407, - "learning_rate": 4.8712582712254016e-05, - "loss": 0.6267, + "epoch": 0.2594458438287154, + "grad_norm": 0.2777978462846363, + "learning_rate": 9.963851233826397e-05, + "loss": 0.5989, "step": 927 }, { - "epoch": 0.1298810356892932, - "grad_norm": 0.46375806207869347, - "learning_rate": 4.870899025565264e-05, - "loss": 0.6467, + "epoch": 0.2597257206828995, + "grad_norm": 0.28105828021526974, + "learning_rate": 9.963665843007064e-05, + "loss": 0.6118, "step": 928 }, { - "epoch": 0.13002099370188944, - "grad_norm": 0.457474154948321, - "learning_rate": 4.8705392926613205e-05, - "loss": 0.6156, + "epoch": 0.2600055975370837, + "grad_norm": 0.27873172937419916, + "learning_rate": 9.963479979743237e-05, + "loss": 0.6148, "step": 929 }, { - "epoch": 0.13016095171448566, - "grad_norm": 0.44866595177001034, - "learning_rate": 4.870179072587499e-05, - "loss": 0.5962, + "epoch": 0.26028547439126787, + "grad_norm": 0.28443215714757414, + "learning_rate": 9.963293644052609e-05, + "loss": 0.6184, "step": 930 }, { - "epoch": 0.1303009097270819, - "grad_norm": 0.4703083744568931, - "learning_rate": 4.86981836541783e-05, - "loss": 0.6309, + "epoch": 0.260565351245452, + "grad_norm": 0.2828574419909584, + "learning_rate": 9.963106835952912e-05, + "loss": 0.6126, "step": 931 }, { - "epoch": 0.13044086773967808, - "grad_norm": 0.6018616664400933, - "learning_rate": 4.869457171226441e-05, - "loss": 0.642, + "epoch": 0.2608452280996362, + "grad_norm": 0.28167140161768456, + "learning_rate": 9.96291955546193e-05, + "loss": 0.6351, "step": 932 }, { - "epoch": 0.1305808257522743, - "grad_norm": 0.46944918935149577, - "learning_rate": 4.869095490087562e-05, - "loss": 0.6273, + "epoch": 0.2611251049538203, + "grad_norm": 0.25233320469051, + "learning_rate": 9.962731802597484e-05, + "loss": 0.5965, "step": 933 }, { - "epoch": 0.13072078376487054, - "grad_norm": 0.44148958617555917, - "learning_rate": 4.868733322075522e-05, - "loss": 0.6378, + "epoch": 0.2614049818080045, + "grad_norm": 0.27303090462669166, + "learning_rate": 9.96254357737745e-05, + "loss": 0.6062, "step": 934 }, { - "epoch": 0.13086074177746676, - "grad_norm": 0.4395583432649483, - "learning_rate": 4.86837066726475e-05, - "loss": 0.6202, + "epoch": 0.26168485866218866, + "grad_norm": 0.25705929063520483, + "learning_rate": 9.96235487981974e-05, + "loss": 0.5998, "step": 935 }, { - "epoch": 0.13100069979006299, - "grad_norm": 0.44852952705339616, - "learning_rate": 4.868007525729775e-05, - "loss": 0.6416, + "epoch": 0.2619647355163728, + "grad_norm": 0.2640967425519281, + "learning_rate": 9.962165709942313e-05, + "loss": 0.5946, "step": 936 }, { - "epoch": 0.1311406578026592, - "grad_norm": 0.4487713642870549, - "learning_rate": 4.8676438975452274e-05, - "loss": 0.6263, + "epoch": 0.26224461237055696, + "grad_norm": 0.2735664946761113, + "learning_rate": 9.961976067763179e-05, + "loss": 0.6025, "step": 937 }, { - "epoch": 0.13128061581525544, - "grad_norm": 0.4648656677758751, - "learning_rate": 4.8672797827858355e-05, - "loss": 0.6164, + "epoch": 0.2625244892247411, + "grad_norm": 0.27131297193419807, + "learning_rate": 9.961785953300385e-05, + "loss": 0.6231, "step": 938 }, { - "epoch": 0.13142057382785163, - "grad_norm": 0.4608329526156638, - "learning_rate": 4.866915181526428e-05, - "loss": 0.6692, + "epoch": 0.26280436607892527, + "grad_norm": 0.2684145277146189, + "learning_rate": 9.961595366572025e-05, + "loss": 0.6134, "step": 939 }, { - "epoch": 0.13156053184044786, - "grad_norm": 0.4654710571009042, - "learning_rate": 4.866550093841936e-05, - "loss": 0.6222, + "epoch": 0.26308424293310945, + "grad_norm": 0.2514536858988313, + "learning_rate": 9.961404307596243e-05, + "loss": 0.5714, "step": 940 }, { - "epoch": 0.13170048985304408, - "grad_norm": 0.4603807269880903, - "learning_rate": 4.866184519807387e-05, - "loss": 0.6165, + "epoch": 0.26336411978729357, + "grad_norm": 0.26661765756611233, + "learning_rate": 9.96121277639122e-05, + "loss": 0.629, "step": 941 }, { - "epoch": 0.1318404478656403, - "grad_norm": 0.47496860741108315, - "learning_rate": 4.865818459497911e-05, - "loss": 0.6267, + "epoch": 0.26364399664147775, + "grad_norm": 0.28014701288949345, + "learning_rate": 9.961020772975189e-05, + "loss": 0.6151, "step": 942 }, { - "epoch": 0.13198040587823653, - "grad_norm": 0.46532589078199804, - "learning_rate": 4.8654519129887364e-05, - "loss": 0.6293, + "epoch": 0.26392387349566193, + "grad_norm": 0.2754571173200175, + "learning_rate": 9.960828297366425e-05, + "loss": 0.597, "step": 943 }, { - "epoch": 0.13212036389083276, - "grad_norm": 0.4590484934275629, - "learning_rate": 4.865084880355193e-05, - "loss": 0.6231, + "epoch": 0.26420375034984606, + "grad_norm": 0.2730088822424412, + "learning_rate": 9.960635349583245e-05, + "loss": 0.6072, "step": 944 }, { - "epoch": 0.13226032190342898, - "grad_norm": 0.47530046553555577, - "learning_rate": 4.86471736167271e-05, - "loss": 0.6133, + "epoch": 0.26448362720403024, + "grad_norm": 0.2703578819692016, + "learning_rate": 9.960441929644017e-05, + "loss": 0.6144, "step": 945 }, { - "epoch": 0.13240027991602518, - "grad_norm": 0.4527393981074044, - "learning_rate": 4.864349357016815e-05, - "loss": 0.6124, + "epoch": 0.26476350405821436, + "grad_norm": 0.27573868394189366, + "learning_rate": 9.960248037567149e-05, + "loss": 0.6014, "step": 946 }, { - "epoch": 0.1325402379286214, - "grad_norm": 0.46113362427703974, - "learning_rate": 4.863980866463138e-05, - "loss": 0.6456, + "epoch": 0.26504338091239854, + "grad_norm": 0.2908649459980856, + "learning_rate": 9.960053673371097e-05, + "loss": 0.5792, "step": 947 }, { - "epoch": 0.13268019594121763, - "grad_norm": 0.509871579156769, - "learning_rate": 4.8636118900874064e-05, - "loss": 0.6229, + "epoch": 0.2653232577665827, + "grad_norm": 0.28223122241673676, + "learning_rate": 9.959858837074361e-05, + "loss": 0.5889, "step": 948 }, { - "epoch": 0.13282015395381386, - "grad_norm": 0.4784635752003754, - "learning_rate": 4.86324242796545e-05, - "loss": 0.6596, + "epoch": 0.26560313462076685, + "grad_norm": 0.26432974167954687, + "learning_rate": 9.959663528695482e-05, + "loss": 0.5978, "step": 949 }, { - "epoch": 0.13296011196641008, - "grad_norm": 0.4414714405275805, - "learning_rate": 4.862872480173195e-05, - "loss": 0.6459, + "epoch": 0.265883011474951, + "grad_norm": 0.2723457851349199, + "learning_rate": 9.959467748253055e-05, + "loss": 0.6224, "step": 950 }, { - "epoch": 0.1331000699790063, - "grad_norm": 0.4587139434548765, - "learning_rate": 4.862502046786671e-05, - "loss": 0.6126, + "epoch": 0.2661628883291352, + "grad_norm": 0.27948914118114576, + "learning_rate": 9.95927149576571e-05, + "loss": 0.6059, "step": 951 }, { - "epoch": 0.1332400279916025, - "grad_norm": 0.42461073662577176, - "learning_rate": 4.8621311278820056e-05, - "loss": 0.6353, + "epoch": 0.26644276518331933, + "grad_norm": 0.2626681250038233, + "learning_rate": 9.959074771252131e-05, + "loss": 0.6011, "step": 952 }, { - "epoch": 0.13337998600419873, - "grad_norm": 0.4324208829491985, - "learning_rate": 4.861759723535426e-05, - "loss": 0.6499, + "epoch": 0.2667226420375035, + "grad_norm": 0.2762883435064009, + "learning_rate": 9.958877574731037e-05, + "loss": 0.621, "step": 953 }, { - "epoch": 0.13351994401679496, - "grad_norm": 0.4640550519654056, - "learning_rate": 4.86138783382326e-05, - "loss": 0.5896, + "epoch": 0.26700251889168763, + "grad_norm": 0.26163482978376007, + "learning_rate": 9.9586799062212e-05, + "loss": 0.6056, "step": 954 }, { - "epoch": 0.13365990202939118, - "grad_norm": 0.4638965920279924, - "learning_rate": 4.8610154588219345e-05, - "loss": 0.6201, + "epoch": 0.2672823957458718, + "grad_norm": 0.27237691070952597, + "learning_rate": 9.958481765741434e-05, + "loss": 0.6044, "step": 955 }, { - "epoch": 0.1337998600419874, - "grad_norm": 0.4695822317863163, - "learning_rate": 4.860642598607976e-05, - "loss": 0.6751, + "epoch": 0.267562272600056, + "grad_norm": 0.26363886412093257, + "learning_rate": 9.958283153310599e-05, + "loss": 0.5942, "step": 956 }, { - "epoch": 0.13393981805458363, - "grad_norm": 0.4600752660391693, - "learning_rate": 4.860269253258012e-05, - "loss": 0.6654, + "epoch": 0.2678421494542401, + "grad_norm": 0.27270700436482376, + "learning_rate": 9.958084068947598e-05, + "loss": 0.5887, "step": 957 }, { - "epoch": 0.13407977606717986, - "grad_norm": 0.47652178074145596, - "learning_rate": 4.859895422848767e-05, - "loss": 0.659, + "epoch": 0.2681220263084243, + "grad_norm": 0.2765738289591348, + "learning_rate": 9.95788451267138e-05, + "loss": 0.5823, "step": 958 }, { - "epoch": 0.13421973407977605, - "grad_norm": 0.445979113412429, - "learning_rate": 4.859521107457069e-05, - "loss": 0.632, + "epoch": 0.2684019031626085, + "grad_norm": 0.2662914222098519, + "learning_rate": 9.957684484500938e-05, + "loss": 0.5842, "step": 959 }, { - "epoch": 0.13435969209237228, - "grad_norm": 0.45688889669250304, - "learning_rate": 4.859146307159842e-05, - "loss": 0.5705, + "epoch": 0.2686817800167926, + "grad_norm": 0.2693829553518625, + "learning_rate": 9.957483984455313e-05, + "loss": 0.612, "step": 960 }, { - "epoch": 0.1344996501049685, - "grad_norm": 0.4567888614063869, - "learning_rate": 4.858771022034112e-05, - "loss": 0.6607, + "epoch": 0.2689616568709768, + "grad_norm": 0.28198638950057897, + "learning_rate": 9.957283012553587e-05, + "loss": 0.6308, "step": 961 }, { - "epoch": 0.13463960811756473, - "grad_norm": 0.4495345209098684, - "learning_rate": 4.858395252157004e-05, - "loss": 0.6415, + "epoch": 0.2692415337251609, + "grad_norm": 0.2805583044437407, + "learning_rate": 9.95708156881489e-05, + "loss": 0.617, "step": 962 }, { - "epoch": 0.13477956613016095, - "grad_norm": 0.48532112670125027, - "learning_rate": 4.858018997605742e-05, - "loss": 0.6088, + "epoch": 0.2695214105793451, + "grad_norm": 0.28291665878839123, + "learning_rate": 9.956879653258394e-05, + "loss": 0.5906, "step": 963 }, { - "epoch": 0.13491952414275718, - "grad_norm": 0.440987511045589, - "learning_rate": 4.8576422584576514e-05, - "loss": 0.6129, + "epoch": 0.26980128743352927, + "grad_norm": 0.28059897948775586, + "learning_rate": 9.956677265903318e-05, + "loss": 0.5817, "step": 964 }, { - "epoch": 0.1350594821553534, - "grad_norm": 0.4669491385905085, - "learning_rate": 4.8572650347901544e-05, - "loss": 0.6191, + "epoch": 0.2700811642877134, + "grad_norm": 0.2622792783790028, + "learning_rate": 9.956474406768925e-05, + "loss": 0.6236, "step": 965 }, { - "epoch": 0.1351994401679496, - "grad_norm": 0.4665408837691419, - "learning_rate": 4.856887326680774e-05, - "loss": 0.6317, + "epoch": 0.2703610411418976, + "grad_norm": 0.2716935989110165, + "learning_rate": 9.956271075874526e-05, + "loss": 0.5948, "step": 966 }, { - "epoch": 0.13533939818054583, - "grad_norm": 0.48893208349101197, - "learning_rate": 4.856509134207136e-05, - "loss": 0.595, + "epoch": 0.2706409179960817, + "grad_norm": 0.27814557340457063, + "learning_rate": 9.95606727323947e-05, + "loss": 0.5819, "step": 967 }, { - "epoch": 0.13547935619314205, - "grad_norm": 0.4438150829055967, - "learning_rate": 4.856130457446959e-05, - "loss": 0.6065, + "epoch": 0.2709207948502659, + "grad_norm": 0.25713661611856053, + "learning_rate": 9.955862998883157e-05, + "loss": 0.6038, "step": 968 }, { - "epoch": 0.13561931420573828, - "grad_norm": 0.5115425422953542, - "learning_rate": 4.8557512964780674e-05, - "loss": 0.6448, + "epoch": 0.27120067170445006, + "grad_norm": 0.35186999778057515, + "learning_rate": 9.95565825282503e-05, + "loss": 0.5926, "step": 969 }, { - "epoch": 0.1357592722183345, - "grad_norm": 0.44504395647634876, - "learning_rate": 4.855371651378382e-05, - "loss": 0.6164, + "epoch": 0.2714805485586342, + "grad_norm": 0.267435250594246, + "learning_rate": 9.955453035084576e-05, + "loss": 0.5834, "step": 970 }, { - "epoch": 0.13589923023093073, - "grad_norm": 0.5076347594918873, - "learning_rate": 4.854991522225923e-05, - "loss": 0.6207, + "epoch": 0.27176042541281836, + "grad_norm": 0.2813180143133331, + "learning_rate": 9.95524734568133e-05, + "loss": 0.6113, "step": 971 }, { - "epoch": 0.13603918824352695, - "grad_norm": 0.4602778279540183, - "learning_rate": 4.854610909098812e-05, - "loss": 0.5923, + "epoch": 0.27204030226700254, + "grad_norm": 0.27599248130038134, + "learning_rate": 9.955041184634867e-05, + "loss": 0.6339, "step": 972 }, { - "epoch": 0.13617914625612315, - "grad_norm": 0.49106399964431596, - "learning_rate": 4.8542298120752684e-05, - "loss": 0.5914, + "epoch": 0.27232017912118667, + "grad_norm": 0.2759096761375916, + "learning_rate": 9.95483455196481e-05, + "loss": 0.5978, "step": 973 }, { - "epoch": 0.13631910426871938, - "grad_norm": 0.44686877889055854, - "learning_rate": 4.85384823123361e-05, - "loss": 0.6163, + "epoch": 0.27260005597537085, + "grad_norm": 0.2748515788564237, + "learning_rate": 9.954627447690828e-05, + "loss": 0.6157, "step": 974 }, { - "epoch": 0.1364590622813156, - "grad_norm": 0.46546574820842096, - "learning_rate": 4.8534661666522584e-05, - "loss": 0.6625, + "epoch": 0.27287993282955497, + "grad_norm": 0.27312379599353603, + "learning_rate": 9.954419871832632e-05, + "loss": 0.6021, "step": 975 }, { - "epoch": 0.13659902029391183, - "grad_norm": 0.6072066700290619, - "learning_rate": 4.8530836184097297e-05, - "loss": 0.6048, + "epoch": 0.27315980968373915, + "grad_norm": 0.26254830363154275, + "learning_rate": 9.95421182440998e-05, + "loss": 0.5949, "step": 976 }, { - "epoch": 0.13673897830650805, - "grad_norm": 0.49583731751388566, - "learning_rate": 4.852700586584642e-05, - "loss": 0.6445, + "epoch": 0.27343968653792333, + "grad_norm": 0.26614398085798086, + "learning_rate": 9.954003305442673e-05, + "loss": 0.5834, "step": 977 }, { - "epoch": 0.13687893631910428, - "grad_norm": 0.4600581543242378, - "learning_rate": 4.852317071255712e-05, - "loss": 0.6276, + "epoch": 0.27371956339210746, + "grad_norm": 0.25638339807793464, + "learning_rate": 9.95379431495056e-05, + "loss": 0.5787, "step": 978 }, { - "epoch": 0.1370188943317005, - "grad_norm": 0.47143173605603533, - "learning_rate": 4.851933072501756e-05, - "loss": 0.6707, + "epoch": 0.27399944024629164, + "grad_norm": 0.2936388532502744, + "learning_rate": 9.953584852953529e-05, + "loss": 0.5972, "step": 979 }, { - "epoch": 0.1371588523442967, - "grad_norm": 0.43226562343994834, - "learning_rate": 4.85154859040169e-05, - "loss": 0.5993, + "epoch": 0.2742793171004758, + "grad_norm": 0.2742154692474115, + "learning_rate": 9.953374919471522e-05, + "loss": 0.5862, "step": 980 }, { - "epoch": 0.13729881035689293, - "grad_norm": 0.5032598563086278, - "learning_rate": 4.8511636250345294e-05, - "loss": 0.6388, + "epoch": 0.27455919395465994, + "grad_norm": 0.28776549392919626, + "learning_rate": 9.953164514524513e-05, + "loss": 0.6145, "step": 981 }, { - "epoch": 0.13743876836948915, - "grad_norm": 0.6566397034726049, - "learning_rate": 4.850778176479387e-05, - "loss": 0.626, + "epoch": 0.2748390708088441, + "grad_norm": 0.26552937227956486, + "learning_rate": 9.952953638132536e-05, + "loss": 0.6087, "step": 982 }, { - "epoch": 0.13757872638208538, - "grad_norm": 0.45320474044463904, - "learning_rate": 4.850392244815478e-05, - "loss": 0.6222, + "epoch": 0.27511894766302825, + "grad_norm": 0.2786682092441462, + "learning_rate": 9.95274229031566e-05, + "loss": 0.6083, "step": 983 }, { - "epoch": 0.1377186843946816, - "grad_norm": 0.4637289824351019, - "learning_rate": 4.8500058301221144e-05, - "loss": 0.6357, + "epoch": 0.2753988245172124, + "grad_norm": 0.271474680333771, + "learning_rate": 9.952530471094e-05, + "loss": 0.6017, "step": 984 }, { - "epoch": 0.13785864240727783, - "grad_norm": 0.4502718394375495, - "learning_rate": 4.849618932478708e-05, - "loss": 0.6161, + "epoch": 0.2756787013713966, + "grad_norm": 0.28260129028292885, + "learning_rate": 9.952318180487717e-05, + "loss": 0.5846, "step": 985 }, { - "epoch": 0.13799860041987405, - "grad_norm": 0.45481795684352794, - "learning_rate": 4.849231551964771e-05, - "loss": 0.6251, + "epoch": 0.27595857822558073, + "grad_norm": 0.27022931757510327, + "learning_rate": 9.95210541851702e-05, + "loss": 0.6158, "step": 986 }, { - "epoch": 0.13813855843247025, - "grad_norm": 0.505134985018315, - "learning_rate": 4.8488436886599144e-05, - "loss": 0.6358, + "epoch": 0.2762384550797649, + "grad_norm": 0.2680005624160554, + "learning_rate": 9.951892185202154e-05, + "loss": 0.6172, "step": 987 }, { - "epoch": 0.13827851644506647, - "grad_norm": 0.48619324349330834, - "learning_rate": 4.8484553426438464e-05, - "loss": 0.6115, + "epoch": 0.27651833193394904, + "grad_norm": 0.27830258224366494, + "learning_rate": 9.95167848056342e-05, + "loss": 0.5998, "step": 988 }, { - "epoch": 0.1384184744576627, - "grad_norm": 0.4650935755290328, - "learning_rate": 4.8480665139963774e-05, - "loss": 0.6004, + "epoch": 0.2767982087881332, + "grad_norm": 0.2792144180130348, + "learning_rate": 9.951464304621156e-05, + "loss": 0.6098, "step": 989 }, { - "epoch": 0.13855843247025892, - "grad_norm": 0.44207287204019446, - "learning_rate": 4.847677202797415e-05, - "loss": 0.597, + "epoch": 0.2770780856423174, + "grad_norm": 0.2725138960107155, + "learning_rate": 9.95124965739575e-05, + "loss": 0.6086, "step": 990 }, { - "epoch": 0.13869839048285515, - "grad_norm": 0.4629268299609161, - "learning_rate": 4.8472874091269674e-05, - "loss": 0.6414, + "epoch": 0.2773579624965015, + "grad_norm": 0.2627344099137454, + "learning_rate": 9.951034538907628e-05, + "loss": 0.5989, "step": 991 }, { - "epoch": 0.13883834849545137, - "grad_norm": 0.46784298534877244, - "learning_rate": 4.84689713306514e-05, - "loss": 0.6263, + "epoch": 0.2776378393506857, + "grad_norm": 0.27102831176084596, + "learning_rate": 9.950818949177268e-05, + "loss": 0.5889, "step": 992 }, { - "epoch": 0.13897830650804757, - "grad_norm": 0.448599541259752, - "learning_rate": 4.8465063746921395e-05, - "loss": 0.6395, + "epoch": 0.2779177162048699, + "grad_norm": 0.26694858006916766, + "learning_rate": 9.950602888225189e-05, + "loss": 0.5933, "step": 993 }, { - "epoch": 0.1391182645206438, - "grad_norm": 0.4582611197304845, - "learning_rate": 4.8461151340882706e-05, - "loss": 0.6533, + "epoch": 0.278197593059054, + "grad_norm": 0.2625987723781404, + "learning_rate": 9.950386356071957e-05, + "loss": 0.5788, "step": 994 }, { - "epoch": 0.13925822253324002, - "grad_norm": 0.49137392446019373, - "learning_rate": 4.845723411333936e-05, - "loss": 0.6327, + "epoch": 0.2784774699132382, + "grad_norm": 0.26360059118488244, + "learning_rate": 9.950169352738181e-05, + "loss": 0.5985, "step": 995 }, { - "epoch": 0.13939818054583625, - "grad_norm": 0.4651834345736373, - "learning_rate": 4.84533120650964e-05, - "loss": 0.6444, + "epoch": 0.2787573467674223, + "grad_norm": 0.2615450758854652, + "learning_rate": 9.949951878244515e-05, + "loss": 0.5909, "step": 996 }, { - "epoch": 0.13953813855843247, - "grad_norm": 0.43613025151896734, - "learning_rate": 4.844938519695984e-05, - "loss": 0.6036, + "epoch": 0.2790372236216065, + "grad_norm": 0.27196421232001006, + "learning_rate": 9.949733932611658e-05, + "loss": 0.622, "step": 997 }, { - "epoch": 0.1396780965710287, - "grad_norm": 0.4547252123073849, - "learning_rate": 4.84454535097367e-05, - "loss": 0.6715, + "epoch": 0.27931710047579067, + "grad_norm": 0.2910453173297014, + "learning_rate": 9.949515515860354e-05, + "loss": 0.6075, "step": 998 }, { - "epoch": 0.13981805458362492, - "grad_norm": 0.4411221787105127, - "learning_rate": 4.8441517004234975e-05, - "loss": 0.6207, + "epoch": 0.2795969773299748, + "grad_norm": 0.26061247085466427, + "learning_rate": 9.949296628011394e-05, + "loss": 0.5884, "step": 999 }, { - "epoch": 0.13995801259622112, - "grad_norm": 0.4652981257021806, - "learning_rate": 4.8437575681263656e-05, - "loss": 0.628, + "epoch": 0.279876854184159, + "grad_norm": 0.27444733214256306, + "learning_rate": 9.949077269085612e-05, + "loss": 0.603, "step": 1000 }, { - "epoch": 0.14009797060881735, - "grad_norm": 0.4506290084182641, - "learning_rate": 4.843362954163273e-05, - "loss": 0.5924, + "epoch": 0.28015673103834315, + "grad_norm": 0.27391616009521086, + "learning_rate": 9.948857439103882e-05, + "loss": 0.5959, "step": 1001 }, { - "epoch": 0.14023792862141357, - "grad_norm": 0.4476783073678505, - "learning_rate": 4.842967858615316e-05, - "loss": 0.6682, + "epoch": 0.2804366078925273, + "grad_norm": 0.2760842761307169, + "learning_rate": 9.948637138087133e-05, + "loss": 0.5807, "step": 1002 }, { - "epoch": 0.1403778866340098, - "grad_norm": 0.4357549411702438, - "learning_rate": 4.842572281563691e-05, - "loss": 0.6332, + "epoch": 0.28071648474671146, + "grad_norm": 0.2669814240346824, + "learning_rate": 9.948416366056332e-05, + "loss": 0.5718, "step": 1003 }, { - "epoch": 0.14051784464660602, - "grad_norm": 0.4883894311666662, - "learning_rate": 4.842176223089694e-05, - "loss": 0.642, + "epoch": 0.2809963616008956, + "grad_norm": 0.28233653487539345, + "learning_rate": 9.948195123032491e-05, + "loss": 0.65, "step": 1004 }, { - "epoch": 0.14065780265920225, - "grad_norm": 0.4432669792413136, - "learning_rate": 4.8417796832747186e-05, - "loss": 0.6049, + "epoch": 0.28127623845507976, + "grad_norm": 0.27260826487029294, + "learning_rate": 9.947973409036669e-05, + "loss": 0.5911, "step": 1005 }, { - "epoch": 0.14079776067179847, - "grad_norm": 0.46556766571285496, - "learning_rate": 4.841382662200257e-05, - "loss": 0.646, + "epoch": 0.28155611530926394, + "grad_norm": 0.28098283552885095, + "learning_rate": 9.947751224089968e-05, + "loss": 0.5897, "step": 1006 }, { - "epoch": 0.14093771868439467, - "grad_norm": 0.5157114366864044, - "learning_rate": 4.8409851599479015e-05, - "loss": 0.657, + "epoch": 0.28183599216344807, + "grad_norm": 0.2676544920210222, + "learning_rate": 9.947528568213536e-05, + "loss": 0.5909, "step": 1007 }, { - "epoch": 0.1410776766969909, - "grad_norm": 0.4366811539636766, - "learning_rate": 4.8405871765993433e-05, - "loss": 0.6326, + "epoch": 0.28211586901763225, + "grad_norm": 0.2587664698367932, + "learning_rate": 9.947305441428565e-05, + "loss": 0.6, "step": 1008 }, { - "epoch": 0.14121763470958712, - "grad_norm": 0.43049502553637836, - "learning_rate": 4.8401887122363714e-05, - "loss": 0.616, + "epoch": 0.28239574587181643, + "grad_norm": 0.26104593545132293, + "learning_rate": 9.947081843756293e-05, + "loss": 0.5701, "step": 1009 }, { - "epoch": 0.14135759272218335, - "grad_norm": 0.48197017641080875, - "learning_rate": 4.839789766940875e-05, - "loss": 0.6432, + "epoch": 0.28267562272600055, + "grad_norm": 0.2777624470132468, + "learning_rate": 9.946857775218003e-05, + "loss": 0.5734, "step": 1010 }, { - "epoch": 0.14149755073477957, - "grad_norm": 0.45845061514071844, - "learning_rate": 4.839390340794841e-05, - "loss": 0.6262, + "epoch": 0.28295549958018473, + "grad_norm": 0.271606897904836, + "learning_rate": 9.94663323583502e-05, + "loss": 0.6035, "step": 1011 }, { - "epoch": 0.1416375087473758, - "grad_norm": 0.457307052580857, - "learning_rate": 4.838990433880355e-05, - "loss": 0.6317, + "epoch": 0.28323537643436886, + "grad_norm": 0.2663552253008882, + "learning_rate": 9.946408225628719e-05, + "loss": 0.5923, "step": 1012 }, { - "epoch": 0.14177746675997202, - "grad_norm": 0.44363047689877544, - "learning_rate": 4.838590046279602e-05, - "loss": 0.599, + "epoch": 0.28351525328855304, + "grad_norm": 0.2627440179998988, + "learning_rate": 9.946182744620512e-05, + "loss": 0.5849, "step": 1013 }, { - "epoch": 0.14191742477256822, - "grad_norm": 0.45048773354087446, - "learning_rate": 4.838189178074867e-05, - "loss": 0.626, + "epoch": 0.2837951301427372, + "grad_norm": 0.2662012385006806, + "learning_rate": 9.945956792831863e-05, + "loss": 0.5875, "step": 1014 }, { - "epoch": 0.14205738278516444, - "grad_norm": 0.44588282768255694, - "learning_rate": 4.8377878293485305e-05, - "loss": 0.5985, + "epoch": 0.28407500699692134, + "grad_norm": 0.28504881278858585, + "learning_rate": 9.94573037028428e-05, + "loss": 0.5744, "step": 1015 }, { - "epoch": 0.14219734079776067, - "grad_norm": 0.4579523725156681, - "learning_rate": 4.8373860001830755e-05, - "loss": 0.5844, + "epoch": 0.2843548838511055, + "grad_norm": 0.288989312859365, + "learning_rate": 9.945503476999311e-05, + "loss": 0.6071, "step": 1016 }, { - "epoch": 0.1423372988103569, - "grad_norm": 0.46436658695739996, - "learning_rate": 4.8369836906610816e-05, - "loss": 0.5891, + "epoch": 0.28463476070528965, + "grad_norm": 0.2645334711064779, + "learning_rate": 9.945276112998553e-05, + "loss": 0.6271, "step": 1017 }, { - "epoch": 0.14247725682295312, - "grad_norm": 0.46569223528579046, - "learning_rate": 4.836580900865227e-05, - "loss": 0.6164, + "epoch": 0.2849146375594738, + "grad_norm": 0.2588427238423153, + "learning_rate": 9.945048278303645e-05, + "loss": 0.6045, "step": 1018 }, { - "epoch": 0.14261721483554934, - "grad_norm": 0.4543042451718399, - "learning_rate": 4.836177630878289e-05, - "loss": 0.588, + "epoch": 0.285194514413658, + "grad_norm": 0.27436590182121146, + "learning_rate": 9.944819972936277e-05, + "loss": 0.6195, "step": 1019 }, { - "epoch": 0.14275717284814557, - "grad_norm": 0.44767680240836294, - "learning_rate": 4.8357738807831446e-05, - "loss": 0.6551, + "epoch": 0.28547439126784213, + "grad_norm": 0.275966442208794, + "learning_rate": 9.944591196918175e-05, + "loss": 0.5998, "step": 1020 }, { - "epoch": 0.14289713086074177, - "grad_norm": 0.44934339880943197, - "learning_rate": 4.835369650662767e-05, - "loss": 0.6167, + "epoch": 0.2857542681220263, + "grad_norm": 0.2794753908066549, + "learning_rate": 9.944361950271115e-05, + "loss": 0.6101, "step": 1021 }, { - "epoch": 0.143037088873338, - "grad_norm": 0.45729799528169507, - "learning_rate": 4.834964940600231e-05, - "loss": 0.5913, + "epoch": 0.2860341449762105, + "grad_norm": 0.27313870946188806, + "learning_rate": 9.944132233016916e-05, + "loss": 0.6022, "step": 1022 }, { - "epoch": 0.14317704688593422, - "grad_norm": 0.46783307870591107, - "learning_rate": 4.8345597506787075e-05, - "loss": 0.6267, + "epoch": 0.2863140218303946, + "grad_norm": 0.26652648188303374, + "learning_rate": 9.943902045177445e-05, + "loss": 0.5968, "step": 1023 }, { - "epoch": 0.14331700489853044, - "grad_norm": 0.4863803445979481, - "learning_rate": 4.8341540809814686e-05, - "loss": 0.62, + "epoch": 0.2865938986845788, + "grad_norm": 0.2794085723982734, + "learning_rate": 9.943671386774611e-05, + "loss": 0.5927, "step": 1024 }, { - "epoch": 0.14345696291112667, - "grad_norm": 0.48368481434141886, - "learning_rate": 4.8337479315918825e-05, - "loss": 0.6458, + "epoch": 0.2868737755387629, + "grad_norm": 0.2796160365142537, + "learning_rate": 9.943440257830366e-05, + "loss": 0.6057, "step": 1025 }, { - "epoch": 0.1435969209237229, - "grad_norm": 0.44240686647941085, - "learning_rate": 4.833341302593417e-05, - "loss": 0.5951, + "epoch": 0.2871536523929471, + "grad_norm": 0.26291805107032756, + "learning_rate": 9.943208658366711e-05, + "loss": 0.5876, "step": 1026 }, { - "epoch": 0.14373687893631912, - "grad_norm": 0.46334653267689374, - "learning_rate": 4.832934194069639e-05, - "loss": 0.6634, + "epoch": 0.2874335292471313, + "grad_norm": 0.26187642395698535, + "learning_rate": 9.942976588405689e-05, + "loss": 0.5436, "step": 1027 }, { - "epoch": 0.14387683694891532, - "grad_norm": 0.4566293728675077, - "learning_rate": 4.832526606104213e-05, - "loss": 0.6574, + "epoch": 0.2877134061013154, + "grad_norm": 0.2744183024872371, + "learning_rate": 9.942744047969388e-05, + "loss": 0.6115, "step": 1028 }, { - "epoch": 0.14401679496151154, - "grad_norm": 0.4482024873976331, - "learning_rate": 4.832118538780902e-05, - "loss": 0.5956, + "epoch": 0.2879932829554996, + "grad_norm": 0.264826759070424, + "learning_rate": 9.942511037079942e-05, + "loss": 0.6198, "step": 1029 }, { - "epoch": 0.14415675297410777, - "grad_norm": 0.45091056986953776, - "learning_rate": 4.8317099921835697e-05, - "loss": 0.638, + "epoch": 0.28827315980968377, + "grad_norm": 0.27401931788791184, + "learning_rate": 9.942277555759529e-05, + "loss": 0.6001, "step": 1030 }, { - "epoch": 0.144296710986704, - "grad_norm": 0.4546329207907057, - "learning_rate": 4.8313009663961746e-05, - "loss": 0.6386, + "epoch": 0.2885530366638679, + "grad_norm": 0.26733469295639417, + "learning_rate": 9.942043604030372e-05, + "loss": 0.5619, "step": 1031 }, { - "epoch": 0.14443666899930022, - "grad_norm": 0.4279917091758518, - "learning_rate": 4.830891461502777e-05, - "loss": 0.6225, + "epoch": 0.28883291351805207, + "grad_norm": 0.26524110787728267, + "learning_rate": 9.941809181914738e-05, + "loss": 0.5811, "step": 1032 }, { - "epoch": 0.14457662701189644, - "grad_norm": 0.4653479806373598, - "learning_rate": 4.8304814775875326e-05, - "loss": 0.6273, + "epoch": 0.2891127903722362, + "grad_norm": 0.25400137970600717, + "learning_rate": 9.941574289434941e-05, + "loss": 0.5917, "step": 1033 }, { - "epoch": 0.14471658502449264, - "grad_norm": 0.44000485919639887, - "learning_rate": 4.8300710147346996e-05, - "loss": 0.6049, + "epoch": 0.2893926672264204, + "grad_norm": 0.263302696284142, + "learning_rate": 9.941338926613337e-05, + "loss": 0.6041, "step": 1034 }, { - "epoch": 0.14485654303708886, - "grad_norm": 0.4481538097684572, - "learning_rate": 4.829660073028631e-05, - "loss": 0.6526, + "epoch": 0.28967254408060455, + "grad_norm": 0.2555759343619727, + "learning_rate": 9.941103093472329e-05, + "loss": 0.5863, "step": 1035 }, { - "epoch": 0.1449965010496851, - "grad_norm": 0.4392800745551569, - "learning_rate": 4.829248652553779e-05, - "loss": 0.6282, + "epoch": 0.2899524209347887, + "grad_norm": 0.2728911085684263, + "learning_rate": 9.940866790034363e-05, + "loss": 0.5894, "step": 1036 }, { - "epoch": 0.14513645906228131, - "grad_norm": 0.43714289037219956, - "learning_rate": 4.8288367533946964e-05, - "loss": 0.6162, + "epoch": 0.29023229778897286, + "grad_norm": 0.2586285672017633, + "learning_rate": 9.940630016321928e-05, + "loss": 0.5899, "step": 1037 }, { - "epoch": 0.14527641707487754, - "grad_norm": 0.47850026452151156, - "learning_rate": 4.828424375636031e-05, - "loss": 0.653, + "epoch": 0.290512174643157, + "grad_norm": 0.2596358831445758, + "learning_rate": 9.940392772357565e-05, + "loss": 0.6286, "step": 1038 }, { - "epoch": 0.14541637508747376, - "grad_norm": 0.4387192439390933, - "learning_rate": 4.828011519362531e-05, - "loss": 0.6186, + "epoch": 0.29079205149734116, + "grad_norm": 0.2561486933508156, + "learning_rate": 9.940155058163851e-05, + "loss": 0.5794, "step": 1039 }, { - "epoch": 0.14555633310007, - "grad_norm": 0.438096448983694, - "learning_rate": 4.827598184659043e-05, - "loss": 0.5983, + "epoch": 0.29107192835152534, + "grad_norm": 0.27378265122860135, + "learning_rate": 9.939916873763415e-05, + "loss": 0.6152, "step": 1040 }, { - "epoch": 0.1456962911126662, - "grad_norm": 0.4545201024028468, - "learning_rate": 4.827184371610511e-05, - "loss": 0.6423, + "epoch": 0.29135180520570947, + "grad_norm": 0.26343237493237315, + "learning_rate": 9.939678219178925e-05, + "loss": 0.5963, "step": 1041 }, { - "epoch": 0.1458362491252624, - "grad_norm": 0.45563857615067394, - "learning_rate": 4.826770080301978e-05, - "loss": 0.5828, + "epoch": 0.29163168205989365, + "grad_norm": 0.2692789610845327, + "learning_rate": 9.939439094433098e-05, + "loss": 0.5756, "step": 1042 }, { - "epoch": 0.14597620713785864, - "grad_norm": 0.45222712800980513, - "learning_rate": 4.826355310818585e-05, - "loss": 0.5934, + "epoch": 0.29191155891407783, + "grad_norm": 0.265489619392448, + "learning_rate": 9.939199499548692e-05, + "loss": 0.6048, "step": 1043 }, { - "epoch": 0.14611616515045486, - "grad_norm": 0.4831310051395056, - "learning_rate": 4.825940063245572e-05, - "loss": 0.6841, + "epoch": 0.29219143576826195, + "grad_norm": 0.26159473965570745, + "learning_rate": 9.938959434548513e-05, + "loss": 0.5992, "step": 1044 }, { - "epoch": 0.1462561231630511, - "grad_norm": 0.46283974537294126, - "learning_rate": 4.8255243376682744e-05, - "loss": 0.6435, + "epoch": 0.29247131262244613, + "grad_norm": 0.2651332960627244, + "learning_rate": 9.938718899455413e-05, + "loss": 0.5668, "step": 1045 }, { - "epoch": 0.1463960811756473, - "grad_norm": 0.4735013090808921, - "learning_rate": 4.825108134172131e-05, - "loss": 0.6406, + "epoch": 0.29275118947663026, + "grad_norm": 0.2713938373931137, + "learning_rate": 9.938477894292281e-05, + "loss": 0.579, "step": 1046 }, { - "epoch": 0.14653603918824354, - "grad_norm": 0.4506208707883696, - "learning_rate": 4.824691452842675e-05, - "loss": 0.6372, + "epoch": 0.29303106633081444, + "grad_norm": 0.2629954882749189, + "learning_rate": 9.938236419082061e-05, + "loss": 0.5852, "step": 1047 }, { - "epoch": 0.14667599720083974, - "grad_norm": 0.4364931698009048, - "learning_rate": 4.824274293765536e-05, - "loss": 0.6026, + "epoch": 0.2933109431849986, + "grad_norm": 0.2748138533245085, + "learning_rate": 9.937994473847733e-05, + "loss": 0.6048, "step": 1048 }, { - "epoch": 0.14681595521343596, - "grad_norm": 0.44459989160312974, - "learning_rate": 4.823856657026448e-05, - "loss": 0.6316, + "epoch": 0.29359082003918274, + "grad_norm": 0.2687295022545514, + "learning_rate": 9.937752058612328e-05, + "loss": 0.608, "step": 1049 }, { - "epoch": 0.1469559132260322, - "grad_norm": 0.44933496657307287, - "learning_rate": 4.8234385427112385e-05, - "loss": 0.5813, + "epoch": 0.2938706968933669, + "grad_norm": 0.26658527628367473, + "learning_rate": 9.937509173398918e-05, + "loss": 0.5851, "step": 1050 }, { - "epoch": 0.1470958712386284, - "grad_norm": 0.4594564123055747, - "learning_rate": 4.8230199509058326e-05, - "loss": 0.6263, + "epoch": 0.2941505737475511, + "grad_norm": 0.2728718605298641, + "learning_rate": 9.93726581823062e-05, + "loss": 0.5877, "step": 1051 }, { - "epoch": 0.14723582925122464, - "grad_norm": 0.4597297128739321, - "learning_rate": 4.822600881696256e-05, - "loss": 0.6166, + "epoch": 0.2944304506017352, + "grad_norm": 0.26644686401704976, + "learning_rate": 9.9370219931306e-05, + "loss": 0.5696, "step": 1052 }, { - "epoch": 0.14737578726382086, - "grad_norm": 0.4797057798768006, - "learning_rate": 4.822181335168634e-05, - "loss": 0.6791, + "epoch": 0.2947103274559194, + "grad_norm": 0.25271495199119975, + "learning_rate": 9.936777698122061e-05, + "loss": 0.591, "step": 1053 }, { - "epoch": 0.1475157452764171, - "grad_norm": 0.463031471453022, - "learning_rate": 4.821761311409184e-05, - "loss": 0.6442, + "epoch": 0.29499020431010353, + "grad_norm": 0.2598427046722628, + "learning_rate": 9.93653293322826e-05, + "loss": 0.6115, "step": 1054 }, { - "epoch": 0.14765570328901328, - "grad_norm": 0.46690698533496683, - "learning_rate": 4.821340810504228e-05, - "loss": 0.6209, + "epoch": 0.2952700811642877, + "grad_norm": 0.2725678140201863, + "learning_rate": 9.93628769847249e-05, + "loss": 0.5837, "step": 1055 }, { - "epoch": 0.1477956613016095, - "grad_norm": 0.47419444368207214, - "learning_rate": 4.8209198325401815e-05, - "loss": 0.6504, + "epoch": 0.2955499580184719, + "grad_norm": 0.2704809174511505, + "learning_rate": 9.936041993878093e-05, + "loss": 0.6075, "step": 1056 }, { - "epoch": 0.14793561931420574, - "grad_norm": 0.4959825016958297, - "learning_rate": 4.8204983776035605e-05, - "loss": 0.615, + "epoch": 0.295829834872656, + "grad_norm": 0.2693829026217594, + "learning_rate": 9.935795819468459e-05, + "loss": 0.5817, "step": 1057 }, { - "epoch": 0.14807557732680196, - "grad_norm": 0.4481720138656539, - "learning_rate": 4.8200764457809784e-05, - "loss": 0.6137, + "epoch": 0.2961097117268402, + "grad_norm": 0.2632695991665687, + "learning_rate": 9.935549175267013e-05, + "loss": 0.588, "step": 1058 }, { - "epoch": 0.14821553533939819, - "grad_norm": 0.45342457377099943, - "learning_rate": 4.819654037159146e-05, - "loss": 0.6605, + "epoch": 0.2963895885810244, + "grad_norm": 0.2666915384441575, + "learning_rate": 9.935302061297236e-05, + "loss": 0.5951, "step": 1059 }, { - "epoch": 0.1483554933519944, - "grad_norm": 0.4383482872844384, - "learning_rate": 4.8192311518248726e-05, - "loss": 0.6383, + "epoch": 0.2966694654352085, + "grad_norm": 0.2528790162576626, + "learning_rate": 9.935054477582646e-05, + "loss": 0.6159, "step": 1060 }, { - "epoch": 0.14849545136459064, - "grad_norm": 0.4558068303448877, - "learning_rate": 4.818807789865065e-05, - "loss": 0.6221, + "epoch": 0.2969493422893927, + "grad_norm": 0.258695386401773, + "learning_rate": 9.934806424146809e-05, + "loss": 0.6317, "step": 1061 }, { - "epoch": 0.14863540937718683, - "grad_norm": 0.44912539719700584, - "learning_rate": 4.818383951366729e-05, - "loss": 0.6413, + "epoch": 0.2972292191435768, + "grad_norm": 0.25882616695355243, + "learning_rate": 9.934557901013333e-05, + "loss": 0.5475, "step": 1062 }, { - "epoch": 0.14877536738978306, - "grad_norm": 0.4355408755527561, - "learning_rate": 4.817959636416969e-05, - "loss": 0.6451, + "epoch": 0.297509095997761, + "grad_norm": 0.2547630767859152, + "learning_rate": 9.934308908205875e-05, + "loss": 0.59, "step": 1063 }, { - "epoch": 0.14891532540237928, - "grad_norm": 0.46358234313926816, - "learning_rate": 4.8175348451029836e-05, - "loss": 0.6249, + "epoch": 0.29778897285194517, + "grad_norm": 0.2581350059717819, + "learning_rate": 9.934059445748134e-05, + "loss": 0.582, "step": 1064 }, { - "epoch": 0.1490552834149755, - "grad_norm": 0.47591686796466215, - "learning_rate": 4.817109577512073e-05, - "loss": 0.6829, + "epoch": 0.2980688497061293, + "grad_norm": 0.27509269212921794, + "learning_rate": 9.93380951366385e-05, + "loss": 0.5981, "step": 1065 }, { - "epoch": 0.14919524142757173, - "grad_norm": 0.4231210119222125, - "learning_rate": 4.8166838337316334e-05, - "loss": 0.5962, + "epoch": 0.29834872656031347, + "grad_norm": 0.2604372488330954, + "learning_rate": 9.933559111976818e-05, + "loss": 0.5569, "step": 1066 }, { - "epoch": 0.14933519944016796, - "grad_norm": 0.47903022082147834, - "learning_rate": 4.81625761384916e-05, - "loss": 0.6666, + "epoch": 0.2986286034144976, + "grad_norm": 0.2645858515338992, + "learning_rate": 9.933308240710868e-05, + "loss": 0.5852, "step": 1067 }, { - "epoch": 0.14947515745276416, - "grad_norm": 0.4356774379772111, - "learning_rate": 4.8158309179522454e-05, - "loss": 0.584, + "epoch": 0.2989084802686818, + "grad_norm": 0.2727455621721883, + "learning_rate": 9.933056899889878e-05, + "loss": 0.5923, "step": 1068 }, { - "epoch": 0.14961511546536038, - "grad_norm": 0.43493168432918317, - "learning_rate": 4.8154037461285796e-05, - "loss": 0.6003, + "epoch": 0.29918835712286596, + "grad_norm": 0.2639919791582596, + "learning_rate": 9.932805089537771e-05, + "loss": 0.6059, "step": 1069 }, { - "epoch": 0.1497550734779566, - "grad_norm": 0.4152139582969938, - "learning_rate": 4.8149760984659506e-05, - "loss": 0.6396, + "epoch": 0.2994682339770501, + "grad_norm": 0.2689253534280279, + "learning_rate": 9.932552809678515e-05, + "loss": 0.5812, "step": 1070 }, { - "epoch": 0.14989503149055283, - "grad_norm": 0.4578712882918065, - "learning_rate": 4.814547975052245e-05, - "loss": 0.6249, + "epoch": 0.29974811083123426, + "grad_norm": 0.2729777989503698, + "learning_rate": 9.93230006033612e-05, + "loss": 0.6067, "step": 1071 }, { - "epoch": 0.15003498950314906, - "grad_norm": 0.46527522340987304, - "learning_rate": 4.814119375975447e-05, - "loss": 0.6691, + "epoch": 0.30002798768541844, + "grad_norm": 0.2705257986314921, + "learning_rate": 9.932046841534646e-05, + "loss": 0.5968, "step": 1072 }, { - "epoch": 0.15017494751574528, - "grad_norm": 0.4551938793084905, - "learning_rate": 4.813690301323636e-05, - "loss": 0.6347, + "epoch": 0.30030786453960256, + "grad_norm": 0.2640730489370255, + "learning_rate": 9.931793153298192e-05, + "loss": 0.5716, "step": 1073 }, { - "epoch": 0.1503149055283415, - "grad_norm": 0.472894226480286, - "learning_rate": 4.813260751184992e-05, - "loss": 0.6558, + "epoch": 0.30058774139378674, + "grad_norm": 0.2610573382932847, + "learning_rate": 9.931538995650907e-05, + "loss": 0.5639, "step": 1074 }, { - "epoch": 0.1504548635409377, - "grad_norm": 0.45206474588868106, - "learning_rate": 4.812830725647793e-05, - "loss": 0.5915, + "epoch": 0.30086761824797087, + "grad_norm": 0.2692689996451498, + "learning_rate": 9.931284368616978e-05, + "loss": 0.5806, "step": 1075 }, { - "epoch": 0.15059482155353393, - "grad_norm": 0.46083474871884716, - "learning_rate": 4.8124002248004126e-05, - "loss": 0.6336, + "epoch": 0.30114749510215505, + "grad_norm": 0.2815554921303491, + "learning_rate": 9.931029272220644e-05, + "loss": 0.5798, "step": 1076 }, { - "epoch": 0.15073477956613016, - "grad_norm": 0.44095545335189645, - "learning_rate": 4.811969248731323e-05, - "loss": 0.6307, + "epoch": 0.30142737195633923, + "grad_norm": 0.2655026281350125, + "learning_rate": 9.930773706486185e-05, + "loss": 0.5986, "step": 1077 }, { - "epoch": 0.15087473757872638, - "grad_norm": 0.46628362437250337, - "learning_rate": 4.8115377975290955e-05, - "loss": 0.6267, + "epoch": 0.30170724881052335, + "grad_norm": 0.27030316189420567, + "learning_rate": 9.930517671437923e-05, + "loss": 0.5976, "step": 1078 }, { - "epoch": 0.1510146955913226, - "grad_norm": 0.4570685734330465, - "learning_rate": 4.811105871282395e-05, - "loss": 0.6891, + "epoch": 0.30198712566470753, + "grad_norm": 0.2590977730284289, + "learning_rate": 9.930261167100229e-05, + "loss": 0.5955, "step": 1079 }, { - "epoch": 0.15115465360391883, - "grad_norm": 0.4494515219252805, - "learning_rate": 4.81067347007999e-05, - "loss": 0.6147, + "epoch": 0.3022670025188917, + "grad_norm": 0.2620915114177364, + "learning_rate": 9.930004193497519e-05, + "loss": 0.6212, "step": 1080 }, { - "epoch": 0.15129461161651506, - "grad_norm": 0.44047992997082713, - "learning_rate": 4.810240594010742e-05, - "loss": 0.6443, + "epoch": 0.30254687937307584, + "grad_norm": 0.27777303725376995, + "learning_rate": 9.929746750654249e-05, + "loss": 0.6132, "step": 1081 }, { - "epoch": 0.15143456962911125, - "grad_norm": 0.4423964257933292, - "learning_rate": 4.80980724316361e-05, - "loss": 0.6316, + "epoch": 0.30282675622726, + "grad_norm": 0.2499508096326113, + "learning_rate": 9.929488838594925e-05, + "loss": 0.5995, "step": 1082 }, { - "epoch": 0.15157452764170748, - "grad_norm": 0.43454276433469047, - "learning_rate": 4.809373417627654e-05, - "loss": 0.6244, + "epoch": 0.30310663308144414, + "grad_norm": 0.2504456280815404, + "learning_rate": 9.929230457344093e-05, + "loss": 0.5854, "step": 1083 }, { - "epoch": 0.1517144856543037, - "grad_norm": 0.4576456453029119, - "learning_rate": 4.8089391174920275e-05, - "loss": 0.6158, + "epoch": 0.3033865099356283, + "grad_norm": 0.2756607289113451, + "learning_rate": 9.928971606926347e-05, + "loss": 0.5941, "step": 1084 }, { - "epoch": 0.15185444366689993, - "grad_norm": 0.4376862180207936, - "learning_rate": 4.808504342845986e-05, - "loss": 0.6287, + "epoch": 0.3036663867898125, + "grad_norm": 0.26535612580493123, + "learning_rate": 9.928712287366326e-05, + "loss": 0.6104, "step": 1085 }, { - "epoch": 0.15199440167949615, - "grad_norm": 0.4272520839128296, - "learning_rate": 4.808069093778879e-05, - "loss": 0.5709, + "epoch": 0.30394626364399663, + "grad_norm": 0.24719193247256463, + "learning_rate": 9.928452498688711e-05, + "loss": 0.5803, "step": 1086 }, { - "epoch": 0.15213435969209238, - "grad_norm": 0.4562369557196528, - "learning_rate": 4.807633370380155e-05, - "loss": 0.651, + "epoch": 0.3042261404981808, + "grad_norm": 0.26520607157065035, + "learning_rate": 9.928192240918227e-05, + "loss": 0.5923, "step": 1087 }, { - "epoch": 0.1522743177046886, - "grad_norm": 0.4304596270726819, - "learning_rate": 4.807197172739357e-05, - "loss": 0.6338, + "epoch": 0.30450601735236493, + "grad_norm": 0.2664832595562186, + "learning_rate": 9.927931514079648e-05, + "loss": 0.5693, "step": 1088 }, { - "epoch": 0.1524142757172848, - "grad_norm": 0.4427035165037056, - "learning_rate": 4.806760500946132e-05, - "loss": 0.6196, + "epoch": 0.3047858942065491, + "grad_norm": 0.2653592619200589, + "learning_rate": 9.927670318197789e-05, + "loss": 0.5971, "step": 1089 }, { - "epoch": 0.15255423372988103, - "grad_norm": 0.4603650971833003, - "learning_rate": 4.806323355090218e-05, - "loss": 0.6526, + "epoch": 0.3050657710607333, + "grad_norm": 0.2698349805193774, + "learning_rate": 9.92740865329751e-05, + "loss": 0.5755, "step": 1090 }, { - "epoch": 0.15269419174247725, - "grad_norm": 0.4348487042293239, - "learning_rate": 4.805885735261454e-05, - "loss": 0.6375, + "epoch": 0.3053456479149174, + "grad_norm": 0.2657086659526615, + "learning_rate": 9.92714651940372e-05, + "loss": 0.5801, "step": 1091 }, { - "epoch": 0.15283414975507348, - "grad_norm": 0.4515390501287558, - "learning_rate": 4.805447641549774e-05, - "loss": 0.5708, + "epoch": 0.3056255247691016, + "grad_norm": 0.2713662771623878, + "learning_rate": 9.926883916541364e-05, + "loss": 0.5963, "step": 1092 }, { - "epoch": 0.1529741077676697, - "grad_norm": 0.4715753314020402, - "learning_rate": 4.805009074045213e-05, - "loss": 0.6477, + "epoch": 0.3059054016232858, + "grad_norm": 0.27085680359250447, + "learning_rate": 9.92662084473544e-05, + "loss": 0.5884, "step": 1093 }, { - "epoch": 0.15311406578026593, - "grad_norm": 0.4434658029205517, - "learning_rate": 4.8045700328378986e-05, - "loss": 0.6013, + "epoch": 0.3061852784774699, + "grad_norm": 0.2738147342608081, + "learning_rate": 9.926357304010987e-05, + "loss": 0.5809, "step": 1094 }, { - "epoch": 0.15325402379286215, - "grad_norm": 0.44663741205752033, - "learning_rate": 4.804130518018058e-05, - "loss": 0.6081, + "epoch": 0.3064651553316541, + "grad_norm": 0.2635383752148601, + "learning_rate": 9.926093294393087e-05, + "loss": 0.5855, "step": 1095 }, { - "epoch": 0.15339398180545835, - "grad_norm": 0.41670655291144576, - "learning_rate": 4.803690529676019e-05, - "loss": 0.6378, + "epoch": 0.3067450321858382, + "grad_norm": 0.2584319865589908, + "learning_rate": 9.925828815906871e-05, + "loss": 0.5692, "step": 1096 }, { - "epoch": 0.15353393981805458, - "grad_norm": 0.49686928110177025, - "learning_rate": 4.803250067902202e-05, - "loss": 0.6445, + "epoch": 0.3070249090400224, + "grad_norm": 0.24917565418840854, + "learning_rate": 9.925563868577511e-05, + "loss": 0.5938, "step": 1097 }, { - "epoch": 0.1536738978306508, - "grad_norm": 0.4685195748194762, - "learning_rate": 4.802809132787125e-05, - "loss": 0.6066, + "epoch": 0.30730478589420657, + "grad_norm": 0.27764442234697434, + "learning_rate": 9.925298452430226e-05, + "loss": 0.594, "step": 1098 }, { - "epoch": 0.15381385584324703, - "grad_norm": 0.46286655961754153, - "learning_rate": 4.802367724421407e-05, - "loss": 0.6144, + "epoch": 0.3075846627483907, + "grad_norm": 0.26175790946980354, + "learning_rate": 9.925032567490275e-05, + "loss": 0.5942, "step": 1099 }, { - "epoch": 0.15395381385584325, - "grad_norm": 0.43665548007096733, - "learning_rate": 4.8019258428957605e-05, - "loss": 0.6404, + "epoch": 0.30786453960257487, + "grad_norm": 0.2641376921332692, + "learning_rate": 9.92476621378297e-05, + "loss": 0.5859, "step": 1100 }, { - "epoch": 0.15409377186843948, - "grad_norm": 0.4700705212693585, - "learning_rate": 4.8014834883009966e-05, - "loss": 0.6297, + "epoch": 0.30814441645675905, + "grad_norm": 0.2624741488450959, + "learning_rate": 9.924499391333659e-05, + "loss": 0.6135, "step": 1101 }, { - "epoch": 0.1542337298810357, - "grad_norm": 0.46140020925769776, - "learning_rate": 4.8010406607280244e-05, - "loss": 0.63, + "epoch": 0.3084242933109432, + "grad_norm": 0.258648592673151, + "learning_rate": 9.924232100167741e-05, + "loss": 0.5821, "step": 1102 }, { - "epoch": 0.1543736878936319, - "grad_norm": 0.432596752729328, - "learning_rate": 4.800597360267849e-05, - "loss": 0.6014, + "epoch": 0.30870417016512736, + "grad_norm": 0.2520646311408594, + "learning_rate": 9.923964340310654e-05, + "loss": 0.5903, "step": 1103 }, { - "epoch": 0.15451364590622813, - "grad_norm": 0.4087158619129061, - "learning_rate": 4.800153587011573e-05, - "loss": 0.6101, + "epoch": 0.3089840470193115, + "grad_norm": 0.2508943082834356, + "learning_rate": 9.923696111787884e-05, + "loss": 0.5536, "step": 1104 }, { - "epoch": 0.15465360391882435, - "grad_norm": 0.4306421416237517, - "learning_rate": 4.799709341050396e-05, - "loss": 0.6088, + "epoch": 0.30926392387349566, + "grad_norm": 0.25590191362612047, + "learning_rate": 9.923427414624964e-05, + "loss": 0.6122, "step": 1105 }, { - "epoch": 0.15479356193142058, - "grad_norm": 0.439174570947343, - "learning_rate": 4.799264622475616e-05, - "loss": 0.6271, + "epoch": 0.30954380072767984, + "grad_norm": 0.2620523692519131, + "learning_rate": 9.923158248847466e-05, + "loss": 0.5788, "step": 1106 }, { - "epoch": 0.1549335199440168, - "grad_norm": 0.45230787848804416, - "learning_rate": 4.7988194313786275e-05, - "loss": 0.6314, + "epoch": 0.30982367758186397, + "grad_norm": 0.26371485722936605, + "learning_rate": 9.922888614481012e-05, + "loss": 0.5943, "step": 1107 }, { - "epoch": 0.15507347795661303, - "grad_norm": 0.4452846746911816, - "learning_rate": 4.79837376785092e-05, - "loss": 0.6189, + "epoch": 0.31010355443604815, + "grad_norm": 0.26383852093572285, + "learning_rate": 9.922618511551263e-05, + "loss": 0.5494, "step": 1108 }, { - "epoch": 0.15521343596920922, - "grad_norm": 0.43395577576303523, - "learning_rate": 4.7979276319840824e-05, - "loss": 0.5867, + "epoch": 0.31038343129023227, + "grad_norm": 0.2601015265137906, + "learning_rate": 9.922347940083928e-05, + "loss": 0.5963, "step": 1109 }, { - "epoch": 0.15535339398180545, - "grad_norm": 0.4198939865361689, - "learning_rate": 4.797481023869801e-05, - "loss": 0.6033, + "epoch": 0.31066330814441645, + "grad_norm": 0.2584865584377887, + "learning_rate": 9.922076900104762e-05, + "loss": 0.598, "step": 1110 }, { - "epoch": 0.15549335199440167, - "grad_norm": 0.4115113980248625, - "learning_rate": 4.797033943599859e-05, - "loss": 0.5792, + "epoch": 0.31094318499860063, + "grad_norm": 0.2566279120727724, + "learning_rate": 9.921805391639561e-05, + "loss": 0.5857, "step": 1111 }, { - "epoch": 0.1556333100069979, - "grad_norm": 0.42305720865767826, - "learning_rate": 4.796586391266134e-05, - "loss": 0.5721, + "epoch": 0.31122306185278475, + "grad_norm": 0.2517603449460284, + "learning_rate": 9.921533414714168e-05, + "loss": 0.6075, "step": 1112 }, { - "epoch": 0.15577326801959412, - "grad_norm": 0.4594490270870182, - "learning_rate": 4.796138366960603e-05, - "loss": 0.5923, + "epoch": 0.31150293870696893, + "grad_norm": 0.24381384656835792, + "learning_rate": 9.921260969354471e-05, + "loss": 0.6012, "step": 1113 }, { - "epoch": 0.15591322603219035, - "grad_norm": 0.4660997680749236, - "learning_rate": 4.7956898707753405e-05, - "loss": 0.6168, + "epoch": 0.3117828155611531, + "grad_norm": 0.25356308624131946, + "learning_rate": 9.9209880555864e-05, + "loss": 0.5975, "step": 1114 }, { - "epoch": 0.15605318404478657, - "grad_norm": 0.4300722165746083, - "learning_rate": 4.795240902802517e-05, - "loss": 0.5975, + "epoch": 0.31206269241533724, + "grad_norm": 0.24198658009302337, + "learning_rate": 9.92071467343593e-05, + "loss": 0.572, "step": 1115 }, { - "epoch": 0.15619314205738277, - "grad_norm": 0.4593249810088033, - "learning_rate": 4.794791463134399e-05, - "loss": 0.598, + "epoch": 0.3123425692695214, + "grad_norm": 0.25933353238025053, + "learning_rate": 9.920440822929085e-05, + "loss": 0.5839, "step": 1116 }, { - "epoch": 0.156333100069979, - "grad_norm": 0.4615991027920945, - "learning_rate": 4.79434155186335e-05, - "loss": 0.6267, + "epoch": 0.31262244612370554, + "grad_norm": 0.26002323769885366, + "learning_rate": 9.920166504091927e-05, + "loss": 0.5932, "step": 1117 }, { - "epoch": 0.15647305808257522, - "grad_norm": 0.441601529655813, - "learning_rate": 4.7938911690818347e-05, - "loss": 0.6607, + "epoch": 0.3129023229778897, + "grad_norm": 0.2572780922199821, + "learning_rate": 9.919891716950566e-05, + "loss": 0.5634, "step": 1118 }, { - "epoch": 0.15661301609517145, - "grad_norm": 0.45355246625005374, - "learning_rate": 4.793440314882408e-05, - "loss": 0.6071, + "epoch": 0.3131821998320739, + "grad_norm": 0.2594440372689817, + "learning_rate": 9.91961646153116e-05, + "loss": 0.5845, "step": 1119 }, { - "epoch": 0.15675297410776767, - "grad_norm": 0.44036948902223627, - "learning_rate": 4.792988989357727e-05, - "loss": 0.6154, + "epoch": 0.31346207668625803, + "grad_norm": 0.24926425960120063, + "learning_rate": 9.919340737859906e-05, + "loss": 0.5778, "step": 1120 }, { - "epoch": 0.1568929321203639, - "grad_norm": 0.4734397090177979, - "learning_rate": 4.7925371926005435e-05, - "loss": 0.6102, + "epoch": 0.3137419535404422, + "grad_norm": 0.2611456735182473, + "learning_rate": 9.919064545963046e-05, + "loss": 0.5766, "step": 1121 }, { - "epoch": 0.15703289013296012, - "grad_norm": 0.43617965348321686, - "learning_rate": 4.792084924703705e-05, - "loss": 0.6104, + "epoch": 0.3140218303946264, + "grad_norm": 0.282742563833002, + "learning_rate": 9.91878788586687e-05, + "loss": 0.6143, "step": 1122 }, { - "epoch": 0.15717284814555632, - "grad_norm": 0.4478534547540619, - "learning_rate": 4.791632185760158e-05, - "loss": 0.6235, + "epoch": 0.3143017072488105, + "grad_norm": 0.2665321164338926, + "learning_rate": 9.918510757597708e-05, + "loss": 0.5967, "step": 1123 }, { - "epoch": 0.15731280615815255, - "grad_norm": 0.44766677539513156, - "learning_rate": 4.791178975862945e-05, - "loss": 0.6439, + "epoch": 0.3145815841029947, + "grad_norm": 0.25822441174138605, + "learning_rate": 9.91823316118194e-05, + "loss": 0.5771, "step": 1124 }, { - "epoch": 0.15745276417074877, - "grad_norm": 0.44554264209996225, - "learning_rate": 4.790725295105205e-05, - "loss": 0.5959, + "epoch": 0.3148614609571788, + "grad_norm": 0.265687585828365, + "learning_rate": 9.917955096645987e-05, + "loss": 0.5731, "step": 1125 }, { - "epoch": 0.157592722183345, - "grad_norm": 0.4296700348089923, - "learning_rate": 4.790271143580174e-05, - "loss": 0.6539, + "epoch": 0.315141337811363, + "grad_norm": 0.2531331848624826, + "learning_rate": 9.917676564016315e-05, + "loss": 0.5721, "step": 1126 }, { - "epoch": 0.15773268019594122, - "grad_norm": 0.4412703085902547, - "learning_rate": 4.789816521381185e-05, - "loss": 0.6375, + "epoch": 0.3154212146655472, + "grad_norm": 0.26311383941298366, + "learning_rate": 9.917397563319434e-05, + "loss": 0.5839, "step": 1127 }, { - "epoch": 0.15787263820853745, - "grad_norm": 0.4373118306763795, - "learning_rate": 4.7893614286016684e-05, - "loss": 0.6112, + "epoch": 0.3157010915197313, + "grad_norm": 0.2546061798405397, + "learning_rate": 9.917118094581903e-05, + "loss": 0.5765, "step": 1128 }, { - "epoch": 0.15801259622113367, - "grad_norm": 0.44070716127461335, - "learning_rate": 4.7889058653351485e-05, - "loss": 0.6195, + "epoch": 0.3159809683739155, + "grad_norm": 0.255912334062152, + "learning_rate": 9.916838157830319e-05, + "loss": 0.5878, "step": 1129 }, { - "epoch": 0.15815255423372987, - "grad_norm": 0.4052645175970106, - "learning_rate": 4.788449831675248e-05, - "loss": 0.6118, + "epoch": 0.31626084522809966, + "grad_norm": 0.25087761510058526, + "learning_rate": 9.916557753091326e-05, + "loss": 0.6024, "step": 1130 }, { - "epoch": 0.1582925122463261, - "grad_norm": 0.4205527476901149, - "learning_rate": 4.7879933277156884e-05, - "loss": 0.6098, + "epoch": 0.3165407220822838, + "grad_norm": 0.2638276003803527, + "learning_rate": 9.916276880391614e-05, + "loss": 0.5871, "step": 1131 }, { - "epoch": 0.15843247025892232, - "grad_norm": 0.4232443251461348, - "learning_rate": 4.787536353550285e-05, - "loss": 0.5962, + "epoch": 0.31682059893646797, + "grad_norm": 0.24961641220572936, + "learning_rate": 9.915995539757917e-05, + "loss": 0.5902, "step": 1132 }, { - "epoch": 0.15857242827151855, - "grad_norm": 0.4555075573119837, - "learning_rate": 4.787078909272951e-05, - "loss": 0.6099, + "epoch": 0.3171004757906521, + "grad_norm": 0.2572607629430118, + "learning_rate": 9.915713731217014e-05, + "loss": 0.5973, "step": 1133 }, { - "epoch": 0.15871238628411477, - "grad_norm": 0.45597750842746004, - "learning_rate": 4.786620994977695e-05, - "loss": 0.6403, + "epoch": 0.31738035264483627, + "grad_norm": 0.25485521505291603, + "learning_rate": 9.915431454795725e-05, + "loss": 0.5671, "step": 1134 }, { - "epoch": 0.158852344296711, - "grad_norm": 0.4523579064101052, - "learning_rate": 4.7861626107586236e-05, - "loss": 0.6276, + "epoch": 0.31766022949902045, + "grad_norm": 0.24656253410737883, + "learning_rate": 9.915148710520921e-05, + "loss": 0.5757, "step": 1135 }, { - "epoch": 0.15899230230930722, - "grad_norm": 0.4496756804618233, - "learning_rate": 4.785703756709939e-05, - "loss": 0.625, + "epoch": 0.3179401063532046, + "grad_norm": 0.2712707220725371, + "learning_rate": 9.91486549841951e-05, + "loss": 0.6036, "step": 1136 }, { - "epoch": 0.15913226032190342, - "grad_norm": 0.44479315166484706, - "learning_rate": 4.78524443292594e-05, - "loss": 0.6219, + "epoch": 0.31821998320738876, + "grad_norm": 0.27167567171785995, + "learning_rate": 9.91458181851845e-05, + "loss": 0.5816, "step": 1137 }, { - "epoch": 0.15927221833449964, - "grad_norm": 0.423002589052915, - "learning_rate": 4.784784639501024e-05, - "loss": 0.6142, + "epoch": 0.3184998600615729, + "grad_norm": 0.2613591558250261, + "learning_rate": 9.914297670844742e-05, + "loss": 0.5973, "step": 1138 }, { - "epoch": 0.15941217634709587, - "grad_norm": 0.45783730451738547, - "learning_rate": 4.7843243765296816e-05, - "loss": 0.6327, + "epoch": 0.31877973691575706, + "grad_norm": 0.256824803592396, + "learning_rate": 9.914013055425431e-05, + "loss": 0.5816, "step": 1139 }, { - "epoch": 0.1595521343596921, - "grad_norm": 0.41727555581040476, - "learning_rate": 4.783863644106502e-05, - "loss": 0.6314, + "epoch": 0.31905961376994124, + "grad_norm": 0.2652728041759387, + "learning_rate": 9.913727972287606e-05, + "loss": 0.598, "step": 1140 }, { - "epoch": 0.15969209237228832, - "grad_norm": 0.4433704880299786, - "learning_rate": 4.7834024423261715e-05, - "loss": 0.6297, + "epoch": 0.31933949062412537, + "grad_norm": 0.2555407037193432, + "learning_rate": 9.913442421458404e-05, + "loss": 0.6037, "step": 1141 }, { - "epoch": 0.15983205038488454, - "grad_norm": 0.44605075439465414, - "learning_rate": 4.78294077128347e-05, - "loss": 0.6052, + "epoch": 0.31961936747830955, + "grad_norm": 0.2601481703232158, + "learning_rate": 9.913156402965001e-05, + "loss": 0.5651, "step": 1142 }, { - "epoch": 0.15997200839748077, - "grad_norm": 0.4252564366376628, - "learning_rate": 4.7824786310732754e-05, - "loss": 0.5952, + "epoch": 0.3198992443324937, + "grad_norm": 0.2509235089023841, + "learning_rate": 9.912869916834622e-05, + "loss": 0.575, "step": 1143 }, { - "epoch": 0.16011196641007697, - "grad_norm": 0.4487603412437455, - "learning_rate": 4.782016021790564e-05, - "loss": 0.5993, + "epoch": 0.32017912118667785, + "grad_norm": 0.2664948588632181, + "learning_rate": 9.912582963094533e-05, + "loss": 0.6087, "step": 1144 }, { - "epoch": 0.1602519244226732, - "grad_norm": 0.4335684662398312, - "learning_rate": 4.781552943530405e-05, - "loss": 0.6119, + "epoch": 0.32045899804086203, + "grad_norm": 0.25430703442836183, + "learning_rate": 9.91229554177205e-05, + "loss": 0.579, "step": 1145 }, { - "epoch": 0.16039188243526942, - "grad_norm": 0.48534201807046695, - "learning_rate": 4.781089396387968e-05, - "loss": 0.6566, + "epoch": 0.32073887489504616, + "grad_norm": 0.2761772671368232, + "learning_rate": 9.912007652894526e-05, + "loss": 0.6283, "step": 1146 }, { - "epoch": 0.16053184044786564, - "grad_norm": 0.4657809557886318, - "learning_rate": 4.780625380458513e-05, - "loss": 0.6188, + "epoch": 0.32101875174923034, + "grad_norm": 0.26450694721226353, + "learning_rate": 9.911719296489366e-05, + "loss": 0.575, "step": 1147 }, { - "epoch": 0.16067179846046187, - "grad_norm": 0.463002737360727, - "learning_rate": 4.7801608958374034e-05, - "loss": 0.6281, + "epoch": 0.3212986286034145, + "grad_norm": 0.2675400274143289, + "learning_rate": 9.911430472584013e-05, + "loss": 0.5817, "step": 1148 }, { - "epoch": 0.1608117564730581, - "grad_norm": 0.4327965386696676, - "learning_rate": 4.779695942620094e-05, - "loss": 0.5676, + "epoch": 0.32157850545759864, + "grad_norm": 0.2647524034579435, + "learning_rate": 9.911141181205958e-05, + "loss": 0.6011, "step": 1149 }, { - "epoch": 0.1609517144856543, - "grad_norm": 0.43456914658498524, - "learning_rate": 4.779230520902138e-05, - "loss": 0.6018, + "epoch": 0.3218583823117828, + "grad_norm": 0.2505623645611872, + "learning_rate": 9.910851422382739e-05, + "loss": 0.5626, "step": 1150 }, { - "epoch": 0.16109167249825052, - "grad_norm": 0.4340562614707508, - "learning_rate": 4.778764630779183e-05, - "loss": 0.5634, + "epoch": 0.322138259165967, + "grad_norm": 0.2663609883611856, + "learning_rate": 9.91056119614193e-05, + "loss": 0.5983, "step": 1151 }, { - "epoch": 0.16123163051084674, - "grad_norm": 0.5384914426514081, - "learning_rate": 4.778298272346976e-05, - "loss": 0.6159, + "epoch": 0.3224181360201511, + "grad_norm": 0.24503997090574697, + "learning_rate": 9.910270502511159e-05, + "loss": 0.5809, "step": 1152 }, { - "epoch": 0.16137158852344297, - "grad_norm": 0.45988369030455006, - "learning_rate": 4.7778314457013565e-05, - "loss": 0.6306, + "epoch": 0.3226980128743353, + "grad_norm": 0.2790500311043266, + "learning_rate": 9.909979341518093e-05, + "loss": 0.6015, "step": 1153 }, { - "epoch": 0.1615115465360392, - "grad_norm": 0.4650532211327251, - "learning_rate": 4.777364150938263e-05, - "loss": 0.6192, + "epoch": 0.32297788972851943, + "grad_norm": 0.2705731083416747, + "learning_rate": 9.909687713190445e-05, + "loss": 0.5702, "step": 1154 }, { - "epoch": 0.16165150454863542, - "grad_norm": 0.4253518192932048, - "learning_rate": 4.77689638815373e-05, - "loss": 0.6373, + "epoch": 0.3232577665827036, + "grad_norm": 0.25550553982629376, + "learning_rate": 9.909395617555973e-05, + "loss": 0.5716, "step": 1155 }, { - "epoch": 0.16179146256123164, - "grad_norm": 0.45004968895984576, - "learning_rate": 4.776428157443886e-05, - "loss": 0.6173, + "epoch": 0.3235376434368878, + "grad_norm": 0.2685885166862554, + "learning_rate": 9.909103054642478e-05, + "loss": 0.6136, "step": 1156 }, { - "epoch": 0.16193142057382784, - "grad_norm": 0.4555578881747446, - "learning_rate": 4.775959458904958e-05, - "loss": 0.6011, + "epoch": 0.3238175202910719, + "grad_norm": 0.2620236933847693, + "learning_rate": 9.908810024477807e-05, + "loss": 0.5949, "step": 1157 }, { - "epoch": 0.16207137858642406, - "grad_norm": 0.4034724262013599, - "learning_rate": 4.775490292633269e-05, - "loss": 0.5945, + "epoch": 0.3240973971452561, + "grad_norm": 0.26623920075723845, + "learning_rate": 9.908516527089848e-05, + "loss": 0.5799, "step": 1158 }, { - "epoch": 0.1622113365990203, - "grad_norm": 0.44601195862385407, - "learning_rate": 4.7750206587252366e-05, - "loss": 0.5964, + "epoch": 0.3243772739994402, + "grad_norm": 0.2822439756971142, + "learning_rate": 9.908222562506542e-05, + "loss": 0.602, "step": 1159 }, { - "epoch": 0.16235129461161651, - "grad_norm": 0.45007693700753115, - "learning_rate": 4.7745505572773754e-05, - "loss": 0.6187, + "epoch": 0.3246571508536244, + "grad_norm": 0.2637678258803881, + "learning_rate": 9.907928130755862e-05, + "loss": 0.5605, "step": 1160 }, { - "epoch": 0.16249125262421274, - "grad_norm": 0.45228216299402807, - "learning_rate": 4.774079988386296e-05, - "loss": 0.6235, + "epoch": 0.3249370277078086, + "grad_norm": 0.261430455404208, + "learning_rate": 9.907633231865838e-05, + "loss": 0.5925, "step": 1161 }, { - "epoch": 0.16263121063680896, - "grad_norm": 0.43817158113391474, - "learning_rate": 4.773608952148706e-05, - "loss": 0.6181, + "epoch": 0.3252169045619927, + "grad_norm": 0.2583250833297409, + "learning_rate": 9.907337865864534e-05, + "loss": 0.5585, "step": 1162 }, { - "epoch": 0.1627711686494052, - "grad_norm": 0.4218728284381608, - "learning_rate": 4.7731374486614056e-05, - "loss": 0.6472, + "epoch": 0.3254967814161769, + "grad_norm": 0.2567489982772435, + "learning_rate": 9.907042032780067e-05, + "loss": 0.5715, "step": 1163 }, { - "epoch": 0.1629111266620014, - "grad_norm": 0.4385041088709967, - "learning_rate": 4.772665478021296e-05, - "loss": 0.6263, + "epoch": 0.32577665827036106, + "grad_norm": 0.25405686607101835, + "learning_rate": 9.906745732640592e-05, + "loss": 0.5685, "step": 1164 }, { - "epoch": 0.1630510846745976, - "grad_norm": 0.4575475392945415, - "learning_rate": 4.7721930403253714e-05, - "loss": 0.6136, + "epoch": 0.3260565351245452, + "grad_norm": 0.3087236259561368, + "learning_rate": 9.906448965474312e-05, + "loss": 0.6123, "step": 1165 }, { - "epoch": 0.16319104268719384, - "grad_norm": 0.4457426524641399, - "learning_rate": 4.771720135670722e-05, - "loss": 0.623, + "epoch": 0.32633641197872937, + "grad_norm": 0.28014190343761486, + "learning_rate": 9.906151731309472e-05, + "loss": 0.5644, "step": 1166 }, { - "epoch": 0.16333100069979006, - "grad_norm": 0.4257770146896383, - "learning_rate": 4.7712467641545354e-05, - "loss": 0.6371, + "epoch": 0.3266162888329135, + "grad_norm": 0.27121399211710767, + "learning_rate": 9.905854030174364e-05, + "loss": 0.5987, "step": 1167 }, { - "epoch": 0.1634709587123863, - "grad_norm": 0.5052851757471238, - "learning_rate": 4.770772925874093e-05, - "loss": 0.6249, + "epoch": 0.3268961656870977, + "grad_norm": 0.26693600160204783, + "learning_rate": 9.905555862097324e-05, + "loss": 0.5981, "step": 1168 }, { - "epoch": 0.1636109167249825, - "grad_norm": 0.47522529944132186, - "learning_rate": 4.7702986209267745e-05, - "loss": 0.6378, + "epoch": 0.32717604254128185, + "grad_norm": 0.25613297806326796, + "learning_rate": 9.905257227106733e-05, + "loss": 0.5908, "step": 1169 }, { - "epoch": 0.16375087473757874, - "grad_norm": 0.42207340820026673, - "learning_rate": 4.7698238494100536e-05, - "loss": 0.6271, + "epoch": 0.327455919395466, + "grad_norm": 0.26904417275188486, + "learning_rate": 9.904958125231012e-05, + "loss": 0.5788, "step": 1170 }, { - "epoch": 0.16389083275017494, - "grad_norm": 0.42129003632456846, - "learning_rate": 4.7693486114215015e-05, - "loss": 0.6135, + "epoch": 0.32773579624965016, + "grad_norm": 0.2541131649139711, + "learning_rate": 9.904658556498631e-05, + "loss": 0.5922, "step": 1171 }, { - "epoch": 0.16403079076277116, - "grad_norm": 0.4929479783722899, - "learning_rate": 4.768872907058783e-05, - "loss": 0.6043, + "epoch": 0.32801567310383434, + "grad_norm": 0.29144181026230515, + "learning_rate": 9.904358520938104e-05, + "loss": 0.6014, "step": 1172 }, { - "epoch": 0.1641707487753674, - "grad_norm": 0.4810208017615139, - "learning_rate": 4.7683967364196624e-05, - "loss": 0.65, + "epoch": 0.32829554995801846, + "grad_norm": 0.2633395039432903, + "learning_rate": 9.904058018577987e-05, + "loss": 0.5986, "step": 1173 }, { - "epoch": 0.1643107067879636, - "grad_norm": 0.47080599343079316, - "learning_rate": 4.767920099601996e-05, - "loss": 0.6215, + "epoch": 0.32857542681220264, + "grad_norm": 0.269393619976543, + "learning_rate": 9.903757049446884e-05, + "loss": 0.6017, "step": 1174 }, { - "epoch": 0.16445066480055984, - "grad_norm": 0.4354882117508445, - "learning_rate": 4.767442996703737e-05, - "loss": 0.6066, + "epoch": 0.32885530366638677, + "grad_norm": 0.2564490743444378, + "learning_rate": 9.90345561357344e-05, + "loss": 0.5759, "step": 1175 }, { - "epoch": 0.16459062281315606, - "grad_norm": 0.4284983370361917, - "learning_rate": 4.766965427822936e-05, - "loss": 0.5834, + "epoch": 0.32913518052057095, + "grad_norm": 0.2486777003142995, + "learning_rate": 9.903153710986346e-05, + "loss": 0.5706, "step": 1176 }, { - "epoch": 0.1647305808257523, - "grad_norm": 0.45013293062633963, - "learning_rate": 4.7664873930577383e-05, - "loss": 0.6747, + "epoch": 0.3294150573747551, + "grad_norm": 0.262014400862415, + "learning_rate": 9.902851341714337e-05, + "loss": 0.5938, "step": 1177 }, { - "epoch": 0.16487053883834848, - "grad_norm": 0.4616970172832583, - "learning_rate": 4.766008892506384e-05, - "loss": 0.6331, + "epoch": 0.32969493422893925, + "grad_norm": 0.26309639336992313, + "learning_rate": 9.902548505786193e-05, + "loss": 0.5717, "step": 1178 }, { - "epoch": 0.1650104968509447, - "grad_norm": 0.43031916863208525, - "learning_rate": 4.765529926267211e-05, - "loss": 0.6218, + "epoch": 0.32997481108312343, + "grad_norm": 0.2526464138072752, + "learning_rate": 9.902245203230738e-05, + "loss": 0.5937, "step": 1179 }, { - "epoch": 0.16515045486354094, - "grad_norm": 0.4663955970462797, - "learning_rate": 4.765050494438651e-05, - "loss": 0.6508, + "epoch": 0.3302546879373076, + "grad_norm": 0.28039963690433434, + "learning_rate": 9.901941434076841e-05, + "loss": 0.6125, "step": 1180 }, { - "epoch": 0.16529041287613716, - "grad_norm": 0.439471188683419, - "learning_rate": 4.7645705971192315e-05, - "loss": 0.6199, + "epoch": 0.33053456479149174, + "grad_norm": 0.24737630654081313, + "learning_rate": 9.901637198353415e-05, + "loss": 0.5755, "step": 1181 }, { - "epoch": 0.16543037088873339, - "grad_norm": 0.45371569435156506, - "learning_rate": 4.764090234407577e-05, - "loss": 0.6266, + "epoch": 0.3308144416456759, + "grad_norm": 0.2532820655849239, + "learning_rate": 9.901332496089417e-05, + "loss": 0.5821, "step": 1182 }, { - "epoch": 0.1655703289013296, - "grad_norm": 0.43706679884940447, - "learning_rate": 4.7636094064024076e-05, - "loss": 0.6358, + "epoch": 0.33109431849986004, + "grad_norm": 0.2511166115916497, + "learning_rate": 9.901027327313848e-05, + "loss": 0.5641, "step": 1183 }, { - "epoch": 0.16571028691392584, - "grad_norm": 0.4383648085023705, - "learning_rate": 4.763128113202537e-05, - "loss": 0.6363, + "epoch": 0.3313741953540442, + "grad_norm": 0.2622700043966974, + "learning_rate": 9.900721692055755e-05, + "loss": 0.5829, "step": 1184 }, { - "epoch": 0.16585024492652203, - "grad_norm": 0.4332543518196199, - "learning_rate": 4.762646354906877e-05, - "loss": 0.6284, + "epoch": 0.3316540722082284, + "grad_norm": 0.26469762684272724, + "learning_rate": 9.900415590344227e-05, + "loss": 0.5592, "step": 1185 }, { - "epoch": 0.16599020293911826, - "grad_norm": 0.42760152759621617, - "learning_rate": 4.7621641316144325e-05, - "loss": 0.6212, + "epoch": 0.3319339490624125, + "grad_norm": 0.2685833366185621, + "learning_rate": 9.900109022208403e-05, + "loss": 0.58, "step": 1186 }, { - "epoch": 0.16613016095171448, - "grad_norm": 0.4530135732031799, - "learning_rate": 4.761681443424306e-05, - "loss": 0.664, + "epoch": 0.3322138259165967, + "grad_norm": 0.2565920420694011, + "learning_rate": 9.899801987677457e-05, + "loss": 0.5737, "step": 1187 }, { - "epoch": 0.1662701189643107, - "grad_norm": 0.4459844470321281, - "learning_rate": 4.7611982904356954e-05, - "loss": 0.6384, + "epoch": 0.33249370277078083, + "grad_norm": 0.25752184536440154, + "learning_rate": 9.899494486780616e-05, + "loss": 0.5877, "step": 1188 }, { - "epoch": 0.16641007697690693, - "grad_norm": 0.41924312442399453, - "learning_rate": 4.7607146727478935e-05, - "loss": 0.5901, + "epoch": 0.332773579624965, + "grad_norm": 0.24842346723141795, + "learning_rate": 9.899186519547147e-05, + "loss": 0.5683, "step": 1189 }, { - "epoch": 0.16655003498950316, - "grad_norm": 0.43860632090683305, - "learning_rate": 4.760230590460287e-05, - "loss": 0.6021, + "epoch": 0.3330534564791492, + "grad_norm": 0.252341889344035, + "learning_rate": 9.898878086006364e-05, + "loss": 0.5845, "step": 1190 }, { - "epoch": 0.16668999300209936, - "grad_norm": 0.45006765458458414, - "learning_rate": 4.759746043672362e-05, - "loss": 0.6365, + "epoch": 0.3333333333333333, + "grad_norm": 0.2596260336526809, + "learning_rate": 9.898569186187622e-05, + "loss": 0.5799, "step": 1191 }, { - "epoch": 0.16682995101469558, - "grad_norm": 0.4440700448858348, - "learning_rate": 4.7592610324836955e-05, - "loss": 0.6349, + "epoch": 0.3336132101875175, + "grad_norm": 0.26315295565046337, + "learning_rate": 9.898259820120325e-05, + "loss": 0.5988, "step": 1192 }, { - "epoch": 0.1669699090272918, - "grad_norm": 0.42964762641344484, - "learning_rate": 4.758775556993964e-05, - "loss": 0.573, + "epoch": 0.3338930870417017, + "grad_norm": 0.25577823881308154, + "learning_rate": 9.897949987833915e-05, + "loss": 0.5811, "step": 1193 }, { - "epoch": 0.16710986703988803, - "grad_norm": 0.4348088254948326, - "learning_rate": 4.758289617302937e-05, - "loss": 0.6094, + "epoch": 0.3341729638958858, + "grad_norm": 0.2616508614494166, + "learning_rate": 9.897639689357883e-05, + "loss": 0.5875, "step": 1194 }, { - "epoch": 0.16724982505248426, - "grad_norm": 0.42297107816490986, - "learning_rate": 4.7578032135104796e-05, - "loss": 0.6102, + "epoch": 0.33445284075007, + "grad_norm": 0.24829264184735783, + "learning_rate": 9.897328924721765e-05, + "loss": 0.566, "step": 1195 }, { - "epoch": 0.16738978306508048, - "grad_norm": 0.4494883284181831, - "learning_rate": 4.7573163457165534e-05, - "loss": 0.6071, + "epoch": 0.3347327176042541, + "grad_norm": 0.24936120561320013, + "learning_rate": 9.897017693955139e-05, + "loss": 0.5766, "step": 1196 }, { - "epoch": 0.1675297410776767, - "grad_norm": 0.4339048883611629, - "learning_rate": 4.7568290140212145e-05, - "loss": 0.6213, + "epoch": 0.3350125944584383, + "grad_norm": 0.25117922631543743, + "learning_rate": 9.896705997087626e-05, + "loss": 0.6102, "step": 1197 }, { - "epoch": 0.1676696990902729, - "grad_norm": 0.48344849855571825, - "learning_rate": 4.7563412185246145e-05, - "loss": 0.6433, + "epoch": 0.33529247131262246, + "grad_norm": 0.2616185379504625, + "learning_rate": 9.896393834148898e-05, + "loss": 0.6022, "step": 1198 }, { - "epoch": 0.16780965710286913, - "grad_norm": 0.4488129613842796, - "learning_rate": 4.7558529593269996e-05, - "loss": 0.654, + "epoch": 0.3355723481668066, + "grad_norm": 0.25387793292589556, + "learning_rate": 9.896081205168662e-05, + "loss": 0.6053, "step": 1199 }, { - "epoch": 0.16794961511546536, - "grad_norm": 0.43481250974353425, - "learning_rate": 4.755364236528713e-05, - "loss": 0.6316, + "epoch": 0.33585222502099077, + "grad_norm": 0.26318212341326885, + "learning_rate": 9.895768110176678e-05, + "loss": 0.5864, "step": 1200 }, { - "epoch": 0.16808957312806158, - "grad_norm": 0.41793689356043273, - "learning_rate": 4.754875050230192e-05, - "loss": 0.5783, + "epoch": 0.33613210187517495, + "grad_norm": 0.24747602735444504, + "learning_rate": 9.895454549202745e-05, + "loss": 0.5945, "step": 1201 }, { - "epoch": 0.1682295311406578, - "grad_norm": 0.43753416598422584, - "learning_rate": 4.754385400531969e-05, - "loss": 0.5949, + "epoch": 0.3364119787293591, + "grad_norm": 0.24962327359025266, + "learning_rate": 9.895140522276707e-05, + "loss": 0.5929, "step": 1202 }, { - "epoch": 0.16836948915325403, - "grad_norm": 0.44668014693071806, - "learning_rate": 4.753895287534673e-05, - "loss": 0.5863, + "epoch": 0.33669185558354325, + "grad_norm": 0.25896810155293915, + "learning_rate": 9.894826029428454e-05, + "loss": 0.6084, "step": 1203 }, { - "epoch": 0.16850944716585026, - "grad_norm": 0.42805637156559095, - "learning_rate": 4.753404711339026e-05, - "loss": 0.6357, + "epoch": 0.3369717324377274, + "grad_norm": 0.25672321740795007, + "learning_rate": 9.894511070687919e-05, + "loss": 0.5897, "step": 1204 }, { - "epoch": 0.16864940517844645, - "grad_norm": 0.41847675435581816, - "learning_rate": 4.752913672045846e-05, - "loss": 0.6138, + "epoch": 0.33725160929191156, + "grad_norm": 0.2518349813338347, + "learning_rate": 9.894195646085083e-05, + "loss": 0.576, "step": 1205 }, { - "epoch": 0.16878936319104268, - "grad_norm": 0.4553821804652506, - "learning_rate": 4.752422169756048e-05, - "loss": 0.6179, + "epoch": 0.33753148614609574, + "grad_norm": 0.2643446814910832, + "learning_rate": 9.893879755649965e-05, + "loss": 0.5666, "step": 1206 }, { - "epoch": 0.1689293212036389, - "grad_norm": 0.45084883922329716, - "learning_rate": 4.75193020457064e-05, - "loss": 0.6321, + "epoch": 0.33781136300027986, + "grad_norm": 0.26047434907347133, + "learning_rate": 9.893563399412634e-05, + "loss": 0.6044, "step": 1207 }, { - "epoch": 0.16906927921623513, - "grad_norm": 0.42851103889071873, - "learning_rate": 4.751437776590726e-05, - "loss": 0.639, + "epoch": 0.33809123985446404, + "grad_norm": 0.2549770752064979, + "learning_rate": 9.893246577403197e-05, + "loss": 0.546, "step": 1208 }, { - "epoch": 0.16920923722883136, - "grad_norm": 0.4555925682379364, - "learning_rate": 4.7509448859175043e-05, - "loss": 0.6185, + "epoch": 0.33837111670864817, + "grad_norm": 0.2639226655722739, + "learning_rate": 9.892929289651813e-05, + "loss": 0.6111, "step": 1209 }, { - "epoch": 0.16934919524142758, - "grad_norm": 0.45021258994097924, - "learning_rate": 4.75045153265227e-05, - "loss": 0.6086, + "epoch": 0.33865099356283235, + "grad_norm": 0.25276014704600214, + "learning_rate": 9.892611536188681e-05, + "loss": 0.5766, "step": 1210 }, { - "epoch": 0.1694891532540238, - "grad_norm": 0.4369626572960402, - "learning_rate": 4.749957716896412e-05, - "loss": 0.6409, + "epoch": 0.3389308704170165, + "grad_norm": 0.25611846388371784, + "learning_rate": 9.892293317044043e-05, + "loss": 0.587, "step": 1211 }, { - "epoch": 0.16962911126662, - "grad_norm": 0.4320366971573474, - "learning_rate": 4.749463438751413e-05, - "loss": 0.5844, + "epoch": 0.33921074727120065, + "grad_norm": 0.2562736816159841, + "learning_rate": 9.891974632248192e-05, + "loss": 0.606, "step": 1212 }, { - "epoch": 0.16976906927921623, - "grad_norm": 0.4163870413689256, - "learning_rate": 4.7489686983188535e-05, - "loss": 0.5803, + "epoch": 0.33949062412538483, + "grad_norm": 0.25325327589508856, + "learning_rate": 9.891655481831453e-05, + "loss": 0.5876, "step": 1213 }, { - "epoch": 0.16990902729181245, - "grad_norm": 0.44949676404016337, - "learning_rate": 4.748473495700408e-05, - "loss": 0.6234, + "epoch": 0.339770500979569, + "grad_norm": 0.2511111558833725, + "learning_rate": 9.89133586582421e-05, + "loss": 0.5886, "step": 1214 }, { - "epoch": 0.17004898530440868, - "grad_norm": 0.44171736970520764, - "learning_rate": 4.747977830997845e-05, - "loss": 0.593, + "epoch": 0.34005037783375314, + "grad_norm": 0.25908982198478825, + "learning_rate": 9.891015784256881e-05, + "loss": 0.5984, "step": 1215 }, { - "epoch": 0.1701889433170049, - "grad_norm": 0.44772610657802425, - "learning_rate": 4.747481704313028e-05, - "loss": 0.5924, + "epoch": 0.3403302546879373, + "grad_norm": 0.2648801626570116, + "learning_rate": 9.890695237159931e-05, + "loss": 0.5756, "step": 1216 }, { - "epoch": 0.17032890132960113, - "grad_norm": 0.4549687449320509, - "learning_rate": 4.7469851157479177e-05, - "loss": 0.6332, + "epoch": 0.34061013154212144, + "grad_norm": 0.25204186393477696, + "learning_rate": 9.890374224563872e-05, + "loss": 0.597, "step": 1217 }, { - "epoch": 0.17046885934219735, - "grad_norm": 0.4682088646897931, - "learning_rate": 4.746488065404567e-05, - "loss": 0.6189, + "epoch": 0.3408900083963056, + "grad_norm": 0.2537507218509391, + "learning_rate": 9.890052746499256e-05, + "loss": 0.5812, "step": 1218 }, { - "epoch": 0.17060881735479355, - "grad_norm": 0.44383085207451206, - "learning_rate": 4.7459905533851246e-05, - "loss": 0.6176, + "epoch": 0.3411698852504898, + "grad_norm": 0.2613338341516281, + "learning_rate": 9.889730802996683e-05, + "loss": 0.5852, "step": 1219 }, { - "epoch": 0.17074877536738978, - "grad_norm": 0.4282311331266807, - "learning_rate": 4.745492579791835e-05, - "loss": 0.5548, + "epoch": 0.3414497621046739, + "grad_norm": 0.2513548166389704, + "learning_rate": 9.889408394086796e-05, + "loss": 0.5968, "step": 1220 }, { - "epoch": 0.170888733379986, - "grad_norm": 0.4542898636825727, - "learning_rate": 4.744994144727036e-05, - "loss": 0.6161, + "epoch": 0.3417296389588581, + "grad_norm": 0.24822866983712577, + "learning_rate": 9.889085519800282e-05, + "loss": 0.573, "step": 1221 }, { - "epoch": 0.17102869139258223, - "grad_norm": 0.4667149443909015, - "learning_rate": 4.7444952482931626e-05, - "loss": 0.6133, + "epoch": 0.3420095158130423, + "grad_norm": 0.24594567150746716, + "learning_rate": 9.888762180167871e-05, + "loss": 0.5551, "step": 1222 }, { - "epoch": 0.17116864940517845, - "grad_norm": 0.4512113910763386, - "learning_rate": 4.743995890592742e-05, - "loss": 0.629, + "epoch": 0.3422893926672264, + "grad_norm": 0.2616874323983542, + "learning_rate": 9.888438375220339e-05, + "loss": 0.5845, "step": 1223 }, { - "epoch": 0.17130860741777468, - "grad_norm": 0.42697256281999074, - "learning_rate": 4.743496071728396e-05, - "loss": 0.61, + "epoch": 0.3425692695214106, + "grad_norm": 0.25449712886631054, + "learning_rate": 9.888114104988506e-05, + "loss": 0.5544, "step": 1224 }, { - "epoch": 0.17144856543037088, - "grad_norm": 0.4510644382875255, - "learning_rate": 4.7429957918028444e-05, - "loss": 0.6247, + "epoch": 0.3428491463755947, + "grad_norm": 0.2651541726904754, + "learning_rate": 9.887789369503237e-05, + "loss": 0.5912, "step": 1225 }, { - "epoch": 0.1715885234429671, - "grad_norm": 0.4432778439526579, - "learning_rate": 4.7424950509188995e-05, - "loss": 0.6192, + "epoch": 0.3431290232297789, + "grad_norm": 0.24951492881588697, + "learning_rate": 9.887464168795439e-05, + "loss": 0.5737, "step": 1226 }, { - "epoch": 0.17172848145556333, - "grad_norm": 0.4215901680439938, - "learning_rate": 4.741993849179468e-05, - "loss": 0.5841, + "epoch": 0.3434089000839631, + "grad_norm": 0.26051669703885516, + "learning_rate": 9.887138502896067e-05, + "loss": 0.5922, "step": 1227 }, { - "epoch": 0.17186843946815955, - "grad_norm": 0.43500291765639604, - "learning_rate": 4.7414921866875524e-05, - "loss": 0.6216, + "epoch": 0.3436887769381472, + "grad_norm": 0.25618708803578677, + "learning_rate": 9.886812371836116e-05, + "loss": 0.5673, "step": 1228 }, { - "epoch": 0.17200839748075578, - "grad_norm": 0.43425438707929986, - "learning_rate": 4.740990063546249e-05, - "loss": 0.6358, + "epoch": 0.3439686537923314, + "grad_norm": 0.24094067673672262, + "learning_rate": 9.886485775646629e-05, + "loss": 0.5947, "step": 1229 }, { - "epoch": 0.172148355493352, - "grad_norm": 0.45544131433595525, - "learning_rate": 4.7404874798587494e-05, - "loss": 0.636, + "epoch": 0.34424853064651556, + "grad_norm": 0.25019065804154744, + "learning_rate": 9.886158714358691e-05, + "loss": 0.5641, "step": 1230 }, { - "epoch": 0.17228831350594823, - "grad_norm": 0.4359451447095212, - "learning_rate": 4.7399844357283397e-05, - "loss": 0.6124, + "epoch": 0.3445284075006997, + "grad_norm": 0.2559694149536015, + "learning_rate": 9.88583118800343e-05, + "loss": 0.612, "step": 1231 }, { - "epoch": 0.17242827151854442, - "grad_norm": 0.4435735452706431, - "learning_rate": 4.739480931258401e-05, - "loss": 0.5854, + "epoch": 0.34480828435488386, + "grad_norm": 0.2946784708107338, + "learning_rate": 9.885503196612022e-05, + "loss": 0.6035, "step": 1232 }, { - "epoch": 0.17256822953114065, - "grad_norm": 0.4550094611824221, - "learning_rate": 4.738976966552407e-05, - "loss": 0.649, + "epoch": 0.345088161209068, + "grad_norm": 0.26396022206734865, + "learning_rate": 9.885174740215687e-05, + "loss": 0.5713, "step": 1233 }, { - "epoch": 0.17270818754373687, - "grad_norm": 0.4417929722405249, - "learning_rate": 4.738472541713931e-05, - "loss": 0.6156, + "epoch": 0.34536803806325217, + "grad_norm": 0.26166485957468827, + "learning_rate": 9.884845818845685e-05, + "loss": 0.5764, "step": 1234 }, { - "epoch": 0.1728481455563331, - "grad_norm": 0.46054175505288897, - "learning_rate": 4.737967656846633e-05, - "loss": 0.5911, + "epoch": 0.34564791491743635, + "grad_norm": 0.24931389332771994, + "learning_rate": 9.884516432533324e-05, + "loss": 0.5852, "step": 1235 }, { - "epoch": 0.17298810356892932, - "grad_norm": 0.44199250556635683, - "learning_rate": 4.737462312054275e-05, - "loss": 0.6378, + "epoch": 0.3459277917716205, + "grad_norm": 0.2532975570578218, + "learning_rate": 9.884186581309954e-05, + "loss": 0.5678, "step": 1236 }, { - "epoch": 0.17312806158152555, - "grad_norm": 0.4365770107069145, - "learning_rate": 4.736956507440709e-05, - "loss": 0.6321, + "epoch": 0.34620766862580465, + "grad_norm": 0.2569653422063483, + "learning_rate": 9.883856265206972e-05, + "loss": 0.5856, "step": 1237 }, { - "epoch": 0.17326801959412177, - "grad_norm": 0.4569326418275404, - "learning_rate": 4.7364502431098844e-05, - "loss": 0.6128, + "epoch": 0.3464875454799888, + "grad_norm": 0.262440592618675, + "learning_rate": 9.883525484255817e-05, + "loss": 0.6175, "step": 1238 }, { - "epoch": 0.17340797760671797, - "grad_norm": 0.4331079335375279, - "learning_rate": 4.7359435191658425e-05, - "loss": 0.6375, + "epoch": 0.34676742233417296, + "grad_norm": 0.2530545077274828, + "learning_rate": 9.883194238487974e-05, + "loss": 0.5919, "step": 1239 }, { - "epoch": 0.1735479356193142, - "grad_norm": 0.45213254965556654, - "learning_rate": 4.7354363357127204e-05, - "loss": 0.6024, + "epoch": 0.34704729918835714, + "grad_norm": 0.2519144804995559, + "learning_rate": 9.882862527934968e-05, + "loss": 0.6003, "step": 1240 }, { - "epoch": 0.17368789363191042, - "grad_norm": 0.4301062314752303, - "learning_rate": 4.7349286928547494e-05, - "loss": 0.6164, + "epoch": 0.34732717604254126, + "grad_norm": 0.25535977220109574, + "learning_rate": 9.882530352628375e-05, + "loss": 0.5827, "step": 1241 }, { - "epoch": 0.17382785164450665, - "grad_norm": 0.43058324357912753, - "learning_rate": 4.7344205906962555e-05, - "loss": 0.6005, + "epoch": 0.34760705289672544, + "grad_norm": 0.2485332607677699, + "learning_rate": 9.88219771259981e-05, + "loss": 0.5708, "step": 1242 }, { - "epoch": 0.17396780965710287, - "grad_norm": 0.4180674272631518, - "learning_rate": 4.7339120293416594e-05, - "loss": 0.6088, + "epoch": 0.3478869297509096, + "grad_norm": 0.24740278514232453, + "learning_rate": 9.881864607880934e-05, + "loss": 0.6051, "step": 1243 }, { - "epoch": 0.1741077676696991, - "grad_norm": 0.4546933788993979, - "learning_rate": 4.733403008895474e-05, - "loss": 0.6279, + "epoch": 0.34816680660509375, + "grad_norm": 0.2605790936490167, + "learning_rate": 9.881531038503454e-05, + "loss": 0.5822, "step": 1244 }, { - "epoch": 0.17424772568229532, - "grad_norm": 0.4477652947027751, - "learning_rate": 4.73289352946231e-05, - "loss": 0.6189, + "epoch": 0.34844668345927793, + "grad_norm": 0.2556716841241045, + "learning_rate": 9.881197004499114e-05, + "loss": 0.5855, "step": 1245 }, { - "epoch": 0.17438768369489152, - "grad_norm": 0.4240511875731597, - "learning_rate": 4.732383591146869e-05, - "loss": 0.6028, + "epoch": 0.34872656031346205, + "grad_norm": 0.2514842683099283, + "learning_rate": 9.880862505899714e-05, + "loss": 0.5886, "step": 1246 }, { - "epoch": 0.17452764170748775, - "grad_norm": 0.4292894608181786, - "learning_rate": 4.73187319405395e-05, - "loss": 0.6109, + "epoch": 0.34900643716764623, + "grad_norm": 0.24987138476583054, + "learning_rate": 9.880527542737085e-05, + "loss": 0.5709, "step": 1247 }, { - "epoch": 0.17466759972008397, - "grad_norm": 0.4268503231804948, - "learning_rate": 4.7313623382884435e-05, - "loss": 0.6014, + "epoch": 0.3492863140218304, + "grad_norm": 0.2574864063751009, + "learning_rate": 9.880192115043115e-05, + "loss": 0.6387, "step": 1248 }, { - "epoch": 0.1748075577326802, - "grad_norm": 0.44496488332160694, - "learning_rate": 4.730851023955337e-05, - "loss": 0.611, + "epoch": 0.34956619087601454, + "grad_norm": 0.24946089622378548, + "learning_rate": 9.879856222849728e-05, + "loss": 0.5708, "step": 1249 }, { - "epoch": 0.17494751574527642, - "grad_norm": 0.43212438694423877, - "learning_rate": 4.730339251159709e-05, - "loss": 0.5553, + "epoch": 0.3498460677301987, + "grad_norm": 0.24532522184225758, + "learning_rate": 9.879519866188896e-05, + "loss": 0.5662, "step": 1250 }, { - "epoch": 0.17508747375787265, - "grad_norm": 0.45661821214759873, - "learning_rate": 4.729827020006735e-05, - "loss": 0.6071, + "epoch": 0.3501259445843829, + "grad_norm": 0.2465495625404283, + "learning_rate": 9.879183045092628e-05, + "loss": 0.5767, "step": 1251 }, { - "epoch": 0.17522743177046887, - "grad_norm": 0.43901674416520003, - "learning_rate": 4.7293143306016836e-05, - "loss": 0.5777, + "epoch": 0.350405821438567, + "grad_norm": 0.25297900433606335, + "learning_rate": 9.87884575959299e-05, + "loss": 0.5788, "step": 1252 }, { - "epoch": 0.17536738978306507, - "grad_norm": 0.4544033001494578, - "learning_rate": 4.728801183049918e-05, - "loss": 0.639, + "epoch": 0.3506856982927512, + "grad_norm": 0.2462356056449759, + "learning_rate": 9.87850800972208e-05, + "loss": 0.5928, "step": 1253 }, { - "epoch": 0.1755073477956613, - "grad_norm": 0.4520331595590865, - "learning_rate": 4.728287577456894e-05, - "loss": 0.6089, + "epoch": 0.3509655751469353, + "grad_norm": 0.24973311591582598, + "learning_rate": 9.878169795512049e-05, + "loss": 0.5776, "step": 1254 }, { - "epoch": 0.17564730580825752, - "grad_norm": 0.44402099958528857, - "learning_rate": 4.7277735139281645e-05, - "loss": 0.6056, + "epoch": 0.3512454520011195, + "grad_norm": 0.23920281263452609, + "learning_rate": 9.877831116995084e-05, + "loss": 0.5573, "step": 1255 }, { - "epoch": 0.17578726382085375, - "grad_norm": 0.4599500342986939, - "learning_rate": 4.7272589925693735e-05, - "loss": 0.6, + "epoch": 0.3515253288553037, + "grad_norm": 0.25725393191303575, + "learning_rate": 9.877491974203426e-05, + "loss": 0.5836, "step": 1256 }, { - "epoch": 0.17592722183344997, - "grad_norm": 0.41917281344225943, - "learning_rate": 4.7267440134862604e-05, - "loss": 0.6204, + "epoch": 0.3518052057094878, + "grad_norm": 0.25232408274091755, + "learning_rate": 9.877152367169349e-05, + "loss": 0.5762, "step": 1257 }, { - "epoch": 0.1760671798460462, - "grad_norm": 0.4365380087308256, - "learning_rate": 4.72622857678466e-05, - "loss": 0.6364, + "epoch": 0.352085082563672, + "grad_norm": 0.24134526587445634, + "learning_rate": 9.87681229592518e-05, + "loss": 0.5497, "step": 1258 }, { - "epoch": 0.17620713785864242, - "grad_norm": 0.4302123239208661, - "learning_rate": 4.725712682570498e-05, - "loss": 0.6077, + "epoch": 0.3523649594178561, + "grad_norm": 0.23982988589107124, + "learning_rate": 9.876471760503288e-05, + "loss": 0.554, "step": 1259 }, { - "epoch": 0.17634709587123862, - "grad_norm": 0.4388230610863713, - "learning_rate": 4.725196330949797e-05, - "loss": 0.5846, + "epoch": 0.3526448362720403, + "grad_norm": 0.23890864878221263, + "learning_rate": 9.876130760936085e-05, + "loss": 0.5627, "step": 1260 }, { - "epoch": 0.17648705388383484, - "grad_norm": 0.43045983689708245, - "learning_rate": 4.724679522028672e-05, - "loss": 0.5945, + "epoch": 0.3529247131262245, + "grad_norm": 0.2497327225548309, + "learning_rate": 9.875789297256027e-05, + "loss": 0.5605, "step": 1261 }, { - "epoch": 0.17662701189643107, - "grad_norm": 0.45979393311365935, - "learning_rate": 4.7241622559133325e-05, - "loss": 0.679, + "epoch": 0.3532045899804086, + "grad_norm": 0.25367700851397396, + "learning_rate": 9.875447369495613e-05, + "loss": 0.5674, "step": 1262 }, { - "epoch": 0.1767669699090273, - "grad_norm": 0.4383956739878393, - "learning_rate": 4.723644532710082e-05, - "loss": 0.6391, + "epoch": 0.3534844668345928, + "grad_norm": 0.25314163600853484, + "learning_rate": 9.875104977687391e-05, + "loss": 0.5738, "step": 1263 }, { - "epoch": 0.17690692792162352, - "grad_norm": 0.4173574514314508, - "learning_rate": 4.723126352525318e-05, - "loss": 0.5922, + "epoch": 0.35376434368877696, + "grad_norm": 0.2659056574428081, + "learning_rate": 9.874762121863947e-05, + "loss": 0.6137, "step": 1264 }, { - "epoch": 0.17704688593421974, - "grad_norm": 0.4480684757490661, - "learning_rate": 4.722607715465532e-05, - "loss": 0.607, + "epoch": 0.3540442205429611, + "grad_norm": 0.24822887643168823, + "learning_rate": 9.874418802057917e-05, + "loss": 0.551, "step": 1265 }, { - "epoch": 0.17718684394681594, - "grad_norm": 0.40606023013599496, - "learning_rate": 4.722088621637309e-05, - "loss": 0.5901, + "epoch": 0.35432409739714527, + "grad_norm": 0.25592040095777707, + "learning_rate": 9.874075018301976e-05, + "loss": 0.5654, "step": 1266 }, { - "epoch": 0.17732680195941217, - "grad_norm": 0.5273306368158803, - "learning_rate": 4.7215690711473275e-05, - "loss": 0.6502, + "epoch": 0.3546039742513294, + "grad_norm": 0.24619356115307459, + "learning_rate": 9.873730770628847e-05, + "loss": 0.5854, "step": 1267 }, { - "epoch": 0.1774667599720084, - "grad_norm": 0.9204038885794614, - "learning_rate": 4.7210490641023615e-05, - "loss": 0.6287, + "epoch": 0.35488385110551357, + "grad_norm": 0.2556041539363497, + "learning_rate": 9.873386059071294e-05, + "loss": 0.602, "step": 1268 }, { - "epoch": 0.17760671798460462, - "grad_norm": 0.40746942485511245, - "learning_rate": 4.7205286006092764e-05, - "loss": 0.6017, + "epoch": 0.35516372795969775, + "grad_norm": 0.24159093702159906, + "learning_rate": 9.873040883662131e-05, + "loss": 0.591, "step": 1269 }, { - "epoch": 0.17774667599720084, - "grad_norm": 0.41902531090541245, - "learning_rate": 4.720007680775034e-05, - "loss": 0.5875, + "epoch": 0.3554436048138819, + "grad_norm": 0.2557836704157818, + "learning_rate": 9.872695244434207e-05, + "loss": 0.6033, "step": 1270 }, { - "epoch": 0.17788663400979707, - "grad_norm": 0.4117792800186485, - "learning_rate": 4.719486304706687e-05, - "loss": 0.6485, + "epoch": 0.35572348166806605, + "grad_norm": 0.26359043655843445, + "learning_rate": 9.872349141420423e-05, + "loss": 0.6023, "step": 1271 }, { - "epoch": 0.1780265920223933, - "grad_norm": 0.4470634029440558, - "learning_rate": 4.718964472511386e-05, - "loss": 0.6527, + "epoch": 0.35600335852225024, + "grad_norm": 0.25715716587656756, + "learning_rate": 9.872002574653722e-05, + "loss": 0.5904, "step": 1272 }, { - "epoch": 0.1781665500349895, - "grad_norm": 0.43032660391655064, - "learning_rate": 4.71844218429637e-05, - "loss": 0.6161, + "epoch": 0.35628323537643436, + "grad_norm": 0.2674975391039773, + "learning_rate": 9.871655544167087e-05, + "loss": 0.5724, "step": 1273 }, { - "epoch": 0.17830650804758572, - "grad_norm": 0.44832712777708833, - "learning_rate": 4.7179194401689764e-05, - "loss": 0.6199, + "epoch": 0.35656311223061854, + "grad_norm": 0.23916191244118243, + "learning_rate": 9.871308049993551e-05, + "loss": 0.5614, "step": 1274 }, { - "epoch": 0.17844646606018194, - "grad_norm": 0.42568085262950894, - "learning_rate": 4.7173962402366334e-05, - "loss": 0.6131, + "epoch": 0.35684298908480266, + "grad_norm": 0.2546830151355151, + "learning_rate": 9.870960092166188e-05, + "loss": 0.5699, "step": 1275 }, { - "epoch": 0.17858642407277817, - "grad_norm": 0.43106804328266973, - "learning_rate": 4.716872584606865e-05, - "loss": 0.6086, + "epoch": 0.35712286593898684, + "grad_norm": 0.25398189363013207, + "learning_rate": 9.870611670718118e-05, + "loss": 0.5658, "step": 1276 }, { - "epoch": 0.1787263820853744, - "grad_norm": 0.4339145485054148, - "learning_rate": 4.716348473387286e-05, - "loss": 0.581, + "epoch": 0.357402742793171, + "grad_norm": 0.2399760785879194, + "learning_rate": 9.870262785682502e-05, + "loss": 0.606, "step": 1277 }, { - "epoch": 0.17886634009797062, - "grad_norm": 0.559402125506372, - "learning_rate": 4.715823906685609e-05, - "loss": 0.5788, + "epoch": 0.35768261964735515, + "grad_norm": 0.24778415767989007, + "learning_rate": 9.869913437092549e-05, + "loss": 0.5952, "step": 1278 }, { - "epoch": 0.17900629811056684, - "grad_norm": 0.4466695949938704, - "learning_rate": 4.715298884609636e-05, - "loss": 0.6169, + "epoch": 0.35796249650153933, + "grad_norm": 0.24623543250412283, + "learning_rate": 9.869563624981507e-05, + "loss": 0.5615, "step": 1279 }, { - "epoch": 0.17914625612316304, - "grad_norm": 0.42599041437598534, - "learning_rate": 4.7147734072672644e-05, - "loss": 0.5892, + "epoch": 0.3582423733557235, + "grad_norm": 0.25371512789372525, + "learning_rate": 9.869213349382676e-05, + "loss": 0.5877, "step": 1280 }, { - "epoch": 0.17928621413575926, - "grad_norm": 0.4193811493005937, - "learning_rate": 4.7142474747664856e-05, - "loss": 0.5716, + "epoch": 0.35852225020990763, + "grad_norm": 0.24454272947446215, + "learning_rate": 9.868862610329391e-05, + "loss": 0.5897, "step": 1281 }, { - "epoch": 0.1794261721483555, - "grad_norm": 0.4283159486252786, - "learning_rate": 4.7137210872153844e-05, - "loss": 0.5828, + "epoch": 0.3588021270640918, + "grad_norm": 0.26926391171765396, + "learning_rate": 9.868511407855039e-05, + "loss": 0.5979, "step": 1282 }, { - "epoch": 0.17956613016095171, - "grad_norm": 0.5396297802303109, - "learning_rate": 4.713194244722138e-05, - "loss": 0.6024, + "epoch": 0.35908200391827594, + "grad_norm": 0.25281226346086155, + "learning_rate": 9.868159741993046e-05, + "loss": 0.5683, "step": 1283 }, { - "epoch": 0.17970608817354794, - "grad_norm": 0.45852485695580586, - "learning_rate": 4.712666947395018e-05, - "loss": 0.5972, + "epoch": 0.3593618807724601, + "grad_norm": 0.25819032163170924, + "learning_rate": 9.867807612776884e-05, + "loss": 0.5667, "step": 1284 }, { - "epoch": 0.17984604618614417, - "grad_norm": 0.4385616625675766, - "learning_rate": 4.71213919534239e-05, - "loss": 0.6379, + "epoch": 0.3596417576266443, + "grad_norm": 0.2574286159352014, + "learning_rate": 9.867455020240069e-05, + "loss": 0.5614, "step": 1285 }, { - "epoch": 0.1799860041987404, - "grad_norm": 0.45421354404722714, - "learning_rate": 4.711610988672712e-05, - "loss": 0.6314, + "epoch": 0.3599216344808284, + "grad_norm": 0.24387447608482338, + "learning_rate": 9.867101964416159e-05, + "loss": 0.5875, "step": 1286 }, { - "epoch": 0.1801259622113366, - "grad_norm": 0.4174346560548907, - "learning_rate": 4.711082327494536e-05, - "loss": 0.582, + "epoch": 0.3602015113350126, + "grad_norm": 0.2569257456738481, + "learning_rate": 9.866748445338761e-05, + "loss": 0.589, "step": 1287 }, { - "epoch": 0.1802659202239328, - "grad_norm": 0.46268786301319254, - "learning_rate": 4.7105532119165066e-05, - "loss": 0.6187, + "epoch": 0.3604813881891967, + "grad_norm": 0.2363017877964043, + "learning_rate": 9.866394463041522e-05, + "loss": 0.5765, "step": 1288 }, { - "epoch": 0.18040587823652904, - "grad_norm": 0.4225894844217519, - "learning_rate": 4.710023642047364e-05, - "loss": 0.6044, + "epoch": 0.3607612650433809, + "grad_norm": 0.26516592444385784, + "learning_rate": 9.866040017558135e-05, + "loss": 0.5848, "step": 1289 }, { - "epoch": 0.18054583624912526, - "grad_norm": 0.4212476755455102, - "learning_rate": 4.709493617995938e-05, - "loss": 0.6139, + "epoch": 0.3610411418975651, + "grad_norm": 0.2551001849367737, + "learning_rate": 9.865685108922333e-05, + "loss": 0.584, "step": 1290 }, { - "epoch": 0.1806857942617215, - "grad_norm": 0.43172618509391675, - "learning_rate": 4.7089631398711556e-05, - "loss": 0.6195, + "epoch": 0.3613210187517492, + "grad_norm": 0.2794469256119939, + "learning_rate": 9.8653297371679e-05, + "loss": 0.599, "step": 1291 }, { - "epoch": 0.1808257522743177, - "grad_norm": 0.47068060331043937, - "learning_rate": 4.7084322077820345e-05, - "loss": 0.6475, + "epoch": 0.3616008956059334, + "grad_norm": 0.2703219263382521, + "learning_rate": 9.864973902328661e-05, + "loss": 0.5768, "step": 1292 }, { - "epoch": 0.18096571028691394, - "grad_norm": 0.42995345865450285, - "learning_rate": 4.707900821837686e-05, - "loss": 0.6263, + "epoch": 0.3618807724601176, + "grad_norm": 0.24803881599949243, + "learning_rate": 9.864617604438482e-05, + "loss": 0.5591, "step": 1293 }, { - "epoch": 0.18110566829951014, - "grad_norm": 0.4574009601171825, - "learning_rate": 4.707368982147318e-05, - "loss": 0.6676, + "epoch": 0.3621606493143017, + "grad_norm": 0.2477846881637209, + "learning_rate": 9.864260843531276e-05, + "loss": 0.5647, "step": 1294 }, { - "epoch": 0.18124562631210636, - "grad_norm": 0.44635706573473055, - "learning_rate": 4.7068366888202264e-05, - "loss": 0.6532, + "epoch": 0.3624405261684859, + "grad_norm": 0.26701383956206354, + "learning_rate": 9.863903619641002e-05, + "loss": 0.5692, "step": 1295 }, { - "epoch": 0.1813855843247026, - "grad_norm": 0.41536175859929064, - "learning_rate": 4.7063039419658035e-05, - "loss": 0.6001, + "epoch": 0.36272040302267, + "grad_norm": 0.25059678055974044, + "learning_rate": 9.863545932801656e-05, + "loss": 0.5871, "step": 1296 }, { - "epoch": 0.1815255423372988, - "grad_norm": 0.431878399826314, - "learning_rate": 4.705770741693535e-05, - "loss": 0.5995, + "epoch": 0.3630002798768542, + "grad_norm": 0.2456024730485762, + "learning_rate": 9.863187783047289e-05, + "loss": 0.5649, "step": 1297 }, { - "epoch": 0.18166550034989504, - "grad_norm": 0.43247283157231065, - "learning_rate": 4.7052370881129976e-05, - "loss": 0.6269, + "epoch": 0.36328015673103836, + "grad_norm": 0.23771467036633712, + "learning_rate": 9.862829170411985e-05, + "loss": 0.5785, "step": 1298 }, { - "epoch": 0.18180545836249126, - "grad_norm": 0.44705916198880513, - "learning_rate": 4.704702981333864e-05, - "loss": 0.5862, + "epoch": 0.3635600335852225, + "grad_norm": 0.25459202670443437, + "learning_rate": 9.862470094929879e-05, + "loss": 0.5877, "step": 1299 }, { - "epoch": 0.1819454163750875, - "grad_norm": 0.4435892818551313, - "learning_rate": 4.704168421465896e-05, - "loss": 0.6206, + "epoch": 0.36383991043940667, + "grad_norm": 0.24400139112438918, + "learning_rate": 9.862110556635148e-05, + "loss": 0.5769, "step": 1300 }, { - "epoch": 0.18208537438768368, - "grad_norm": 0.8694903989330446, - "learning_rate": 4.7036334086189555e-05, - "loss": 0.6592, + "epoch": 0.36411978729359085, + "grad_norm": 0.25462037753744304, + "learning_rate": 9.861750555562012e-05, + "loss": 0.5617, "step": 1301 }, { - "epoch": 0.1822253324002799, - "grad_norm": 0.4457381902676728, - "learning_rate": 4.703097942902989e-05, - "loss": 0.5962, + "epoch": 0.36439966414777497, + "grad_norm": 0.2437847338961068, + "learning_rate": 9.861390091744737e-05, + "loss": 0.5592, "step": 1302 }, { - "epoch": 0.18236529041287614, - "grad_norm": 0.4105292740202644, - "learning_rate": 4.702562024428042e-05, - "loss": 0.6314, + "epoch": 0.36467954100195915, + "grad_norm": 0.25360373464902747, + "learning_rate": 9.861029165217633e-05, + "loss": 0.5719, "step": 1303 }, { - "epoch": 0.18250524842547236, - "grad_norm": 0.43538468918998957, - "learning_rate": 4.70202565330425e-05, - "loss": 0.6348, + "epoch": 0.3649594178561433, + "grad_norm": 0.2522267922475495, + "learning_rate": 9.860667776015052e-05, + "loss": 0.6024, "step": 1304 }, { - "epoch": 0.18264520643806859, - "grad_norm": 0.42334962592608394, - "learning_rate": 4.701488829641845e-05, - "loss": 0.6271, + "epoch": 0.36523929471032746, + "grad_norm": 0.24090443131476025, + "learning_rate": 9.860305924171392e-05, + "loss": 0.5789, "step": 1305 }, { - "epoch": 0.1827851644506648, - "grad_norm": 0.44222681529868485, - "learning_rate": 4.700951553551148e-05, - "loss": 0.6139, + "epoch": 0.36551917156451164, + "grad_norm": 0.24234901539669954, + "learning_rate": 9.859943609721092e-05, + "loss": 0.5498, "step": 1306 }, { - "epoch": 0.182925122463261, - "grad_norm": 0.41298228492856454, - "learning_rate": 4.700413825142574e-05, - "loss": 0.6059, + "epoch": 0.36579904841869576, + "grad_norm": 0.2601574128209228, + "learning_rate": 9.85958083269864e-05, + "loss": 0.5848, "step": 1307 }, { - "epoch": 0.18306508047585723, - "grad_norm": 0.48053391897594716, - "learning_rate": 4.6998756445266336e-05, - "loss": 0.6252, + "epoch": 0.36607892527287994, + "grad_norm": 0.24408752147616794, + "learning_rate": 9.859217593138564e-05, + "loss": 0.5473, "step": 1308 }, { - "epoch": 0.18320503848845346, - "grad_norm": 0.422350382910677, - "learning_rate": 4.6993370118139264e-05, - "loss": 0.594, + "epoch": 0.36635880212706406, + "grad_norm": 0.25108140346626096, + "learning_rate": 9.858853891075437e-05, + "loss": 0.5678, "step": 1309 }, { - "epoch": 0.18334499650104968, - "grad_norm": 0.45501043482560677, - "learning_rate": 4.698797927115148e-05, - "loss": 0.6039, + "epoch": 0.36663867898124824, + "grad_norm": 0.26693222797979294, + "learning_rate": 9.858489726543878e-05, + "loss": 0.5867, "step": 1310 }, { - "epoch": 0.1834849545136459, - "grad_norm": 0.42755574101023547, - "learning_rate": 4.698258390541086e-05, - "loss": 0.635, + "epoch": 0.3669185558354324, + "grad_norm": 0.252675663643795, + "learning_rate": 9.858125099578547e-05, + "loss": 0.61, "step": 1311 }, { - "epoch": 0.18362491252624213, - "grad_norm": 0.4212712876216914, - "learning_rate": 4.69771840220262e-05, - "loss": 0.6187, + "epoch": 0.36719843268961655, + "grad_norm": 0.25287964475016456, + "learning_rate": 9.85776001021415e-05, + "loss": 0.5606, "step": 1312 }, { - "epoch": 0.18376487053883836, - "grad_norm": 0.4474515144815738, - "learning_rate": 4.697177962210722e-05, - "loss": 0.6044, + "epoch": 0.36747830954380073, + "grad_norm": 0.23667971209316457, + "learning_rate": 9.857394458485436e-05, + "loss": 0.5495, "step": 1313 }, { - "epoch": 0.18390482855143456, - "grad_norm": 0.4574401172788082, - "learning_rate": 4.696637070676462e-05, - "loss": 0.6465, + "epoch": 0.3677581863979849, + "grad_norm": 0.2577794731055881, + "learning_rate": 9.8570284444272e-05, + "loss": 0.5967, "step": 1314 }, { - "epoch": 0.18404478656403078, - "grad_norm": 0.47635542166003236, - "learning_rate": 4.696095727710994e-05, - "loss": 0.5871, + "epoch": 0.36803806325216903, + "grad_norm": 0.25147450268312077, + "learning_rate": 9.856661968074277e-05, + "loss": 0.5661, "step": 1315 }, { - "epoch": 0.184184744576627, - "grad_norm": 0.44962202431159326, - "learning_rate": 4.6955539334255716e-05, - "loss": 0.6232, + "epoch": 0.3683179401063532, + "grad_norm": 0.2499365765595922, + "learning_rate": 9.856295029461548e-05, + "loss": 0.5827, "step": 1316 }, { - "epoch": 0.18432470258922323, - "grad_norm": 0.45431685769602653, - "learning_rate": 4.6950116879315385e-05, - "loss": 0.621, + "epoch": 0.36859781696053734, + "grad_norm": 0.2654215393452005, + "learning_rate": 9.855927628623943e-05, + "loss": 0.592, "step": 1317 }, { - "epoch": 0.18446466060181946, - "grad_norm": 0.4388675789360797, - "learning_rate": 4.6944689913403326e-05, - "loss": 0.6183, + "epoch": 0.3688776938147215, + "grad_norm": 0.25165760068861626, + "learning_rate": 9.855559765596426e-05, + "loss": 0.5634, "step": 1318 }, { - "epoch": 0.18460461861441568, - "grad_norm": 0.4523470765103244, - "learning_rate": 4.693925843763483e-05, - "loss": 0.6212, + "epoch": 0.3691575706689057, + "grad_norm": 0.2343431409837341, + "learning_rate": 9.855191440414013e-05, + "loss": 0.5605, "step": 1319 }, { - "epoch": 0.1847445766270119, - "grad_norm": 0.4807021100408991, - "learning_rate": 4.693382245312612e-05, - "loss": 0.6097, + "epoch": 0.3694374475230898, + "grad_norm": 0.23468194509285104, + "learning_rate": 9.854822653111761e-05, + "loss": 0.5734, "step": 1320 }, { - "epoch": 0.1848845346396081, - "grad_norm": 0.4306193212134266, - "learning_rate": 4.6928381960994336e-05, - "loss": 0.6438, + "epoch": 0.369717324377274, + "grad_norm": 0.2528275233327272, + "learning_rate": 9.854453403724773e-05, + "loss": 0.5879, "step": 1321 }, { - "epoch": 0.18502449265220433, - "grad_norm": 0.447094803519498, - "learning_rate": 4.692293696235758e-05, - "loss": 0.6263, + "epoch": 0.3699972012314582, + "grad_norm": 0.24929497405022225, + "learning_rate": 9.854083692288192e-05, + "loss": 0.5964, "step": 1322 }, { - "epoch": 0.18516445066480056, - "grad_norm": 0.4200693798341761, - "learning_rate": 4.6917487458334824e-05, - "loss": 0.5863, + "epoch": 0.3702770780856423, + "grad_norm": 0.24606277157925519, + "learning_rate": 9.853713518837209e-05, + "loss": 0.538, "step": 1323 }, { - "epoch": 0.18530440867739678, - "grad_norm": 0.4459700079709483, - "learning_rate": 4.691203345004602e-05, - "loss": 0.6113, + "epoch": 0.3705569549398265, + "grad_norm": 0.24479708249049445, + "learning_rate": 9.853342883407055e-05, + "loss": 0.5761, "step": 1324 }, { - "epoch": 0.185444366689993, - "grad_norm": 0.42812241397789785, - "learning_rate": 4.6906574938612e-05, - "loss": 0.613, + "epoch": 0.3708368317940106, + "grad_norm": 0.25155137261806565, + "learning_rate": 9.852971786033009e-05, + "loss": 0.5751, "step": 1325 }, { - "epoch": 0.18558432470258923, - "grad_norm": 0.44482736985800875, - "learning_rate": 4.690111192515457e-05, - "loss": 0.6087, + "epoch": 0.3711167086481948, + "grad_norm": 0.24372667494850433, + "learning_rate": 9.852600226750393e-05, + "loss": 0.5766, "step": 1326 }, { - "epoch": 0.18572428271518546, - "grad_norm": 0.43543296488997046, - "learning_rate": 4.6895644410796416e-05, - "loss": 0.6055, + "epoch": 0.371396585502379, + "grad_norm": 0.24230425494337146, + "learning_rate": 9.852228205594571e-05, + "loss": 0.5795, "step": 1327 }, { - "epoch": 0.18586424072778165, - "grad_norm": 0.44518530430540526, - "learning_rate": 4.689017239666117e-05, - "loss": 0.6094, + "epoch": 0.3716764623565631, + "grad_norm": 0.237209943944504, + "learning_rate": 9.851855722600952e-05, + "loss": 0.5977, "step": 1328 }, { - "epoch": 0.18600419874037788, - "grad_norm": 0.41902567935143953, - "learning_rate": 4.688469588387339e-05, - "loss": 0.5978, + "epoch": 0.3719563392107473, + "grad_norm": 0.26817736687336596, + "learning_rate": 9.85148277780499e-05, + "loss": 0.5706, "step": 1329 }, { - "epoch": 0.1861441567529741, - "grad_norm": 0.423883271911657, - "learning_rate": 4.6879214873558565e-05, - "loss": 0.5982, + "epoch": 0.37223621606493146, + "grad_norm": 0.2496500975473135, + "learning_rate": 9.851109371242182e-05, + "loss": 0.5852, "step": 1330 }, { - "epoch": 0.18628411476557033, - "grad_norm": 0.44582071516941296, - "learning_rate": 4.6873729366843075e-05, - "loss": 0.562, + "epoch": 0.3725160929191156, + "grad_norm": 0.25342103629148044, + "learning_rate": 9.850735502948069e-05, + "loss": 0.5772, "step": 1331 }, { - "epoch": 0.18642407277816656, - "grad_norm": 0.4660473204875247, - "learning_rate": 4.686823936485426e-05, - "loss": 0.6367, + "epoch": 0.37279596977329976, + "grad_norm": 0.24207736564553414, + "learning_rate": 9.850361172958234e-05, + "loss": 0.5803, "step": 1332 }, { - "epoch": 0.18656403079076278, - "grad_norm": 0.4769376469582944, - "learning_rate": 4.6862744868720374e-05, - "loss": 0.6652, + "epoch": 0.3730758466274839, + "grad_norm": 0.24826864519216052, + "learning_rate": 9.84998638130831e-05, + "loss": 0.5626, "step": 1333 }, { - "epoch": 0.186703988803359, - "grad_norm": 0.4618831962024523, - "learning_rate": 4.6857245879570585e-05, - "loss": 0.6317, + "epoch": 0.37335572348166807, + "grad_norm": 0.2549209969409992, + "learning_rate": 9.849611128033967e-05, + "loss": 0.5617, "step": 1334 }, { - "epoch": 0.1868439468159552, - "grad_norm": 0.4550475574225298, - "learning_rate": 4.685174239853499e-05, - "loss": 0.5892, + "epoch": 0.37363560033585225, + "grad_norm": 0.2548161261358772, + "learning_rate": 9.84923541317092e-05, + "loss": 0.5645, "step": 1335 }, { - "epoch": 0.18698390482855143, - "grad_norm": 0.43735879865032035, - "learning_rate": 4.684623442674463e-05, - "loss": 0.5915, + "epoch": 0.37391547719003637, + "grad_norm": 0.25086816394183653, + "learning_rate": 9.848859236754935e-05, + "loss": 0.5668, "step": 1336 }, { - "epoch": 0.18712386284114765, - "grad_norm": 0.42856554004367964, - "learning_rate": 4.684072196533142e-05, - "loss": 0.6211, + "epoch": 0.37419535404422055, + "grad_norm": 0.2524264803100986, + "learning_rate": 9.848482598821813e-05, + "loss": 0.6069, "step": 1337 }, { - "epoch": 0.18726382085374388, - "grad_norm": 0.4359596291931712, - "learning_rate": 4.6835205015428246e-05, - "loss": 0.5957, + "epoch": 0.3744752308984047, + "grad_norm": 0.25162801199296647, + "learning_rate": 9.848105499407403e-05, + "loss": 0.5633, "step": 1338 }, { - "epoch": 0.1874037788663401, - "grad_norm": 0.44381328220368255, - "learning_rate": 4.682968357816889e-05, - "loss": 0.634, + "epoch": 0.37475510775258886, + "grad_norm": 0.2527560152710071, + "learning_rate": 9.847727938547599e-05, + "loss": 0.5695, "step": 1339 }, { - "epoch": 0.18754373687893633, - "grad_norm": 0.45368741213258845, - "learning_rate": 4.682415765468807e-05, - "loss": 0.6242, + "epoch": 0.37503498460677304, + "grad_norm": 0.25737762181379004, + "learning_rate": 9.847349916278335e-05, + "loss": 0.5845, "step": 1340 }, { - "epoch": 0.18768369489153253, - "grad_norm": 0.433054739833902, - "learning_rate": 4.681862724612141e-05, - "loss": 0.6155, + "epoch": 0.37531486146095716, + "grad_norm": 0.23523597902347113, + "learning_rate": 9.846971432635593e-05, + "loss": 0.5688, "step": 1341 }, { - "epoch": 0.18782365290412875, - "grad_norm": 0.4455643661885221, - "learning_rate": 4.681309235360546e-05, - "loss": 0.6061, + "epoch": 0.37559473831514134, + "grad_norm": 0.24654863143444625, + "learning_rate": 9.846592487655398e-05, + "loss": 0.6078, "step": 1342 }, { - "epoch": 0.18796361091672498, - "grad_norm": 0.4348159376755847, - "learning_rate": 4.6807552978277725e-05, - "loss": 0.6016, + "epoch": 0.3758746151693255, + "grad_norm": 0.25188389597355215, + "learning_rate": 9.846213081373816e-05, + "loss": 0.583, "step": 1343 }, { - "epoch": 0.1881035689293212, - "grad_norm": 0.43841673430612965, - "learning_rate": 4.6802009121276566e-05, - "loss": 0.6139, + "epoch": 0.37615449202350965, + "grad_norm": 0.24664471800837676, + "learning_rate": 9.845833213826962e-05, + "loss": 0.5919, "step": 1344 }, { - "epoch": 0.18824352694191743, - "grad_norm": 0.43548395332890844, - "learning_rate": 4.679646078374133e-05, - "loss": 0.6335, + "epoch": 0.3764343688776938, + "grad_norm": 0.255510689169472, + "learning_rate": 9.84545288505099e-05, + "loss": 0.5674, "step": 1345 }, { - "epoch": 0.18838348495451365, - "grad_norm": 0.4311071382664937, - "learning_rate": 4.679090796681225e-05, - "loss": 0.6493, + "epoch": 0.37671424573187795, + "grad_norm": 0.23922536995893914, + "learning_rate": 9.8450720950821e-05, + "loss": 0.5674, "step": 1346 }, { - "epoch": 0.18852344296710988, - "grad_norm": 0.43011170445061725, - "learning_rate": 4.6785350671630467e-05, - "loss": 0.5733, + "epoch": 0.37699412258606213, + "grad_norm": 0.23943968149671538, + "learning_rate": 9.844690843956534e-05, + "loss": 0.5587, "step": 1347 }, { - "epoch": 0.18866340097970608, - "grad_norm": 0.44356466777661, - "learning_rate": 4.6779788899338095e-05, - "loss": 0.5936, + "epoch": 0.3772739994402463, + "grad_norm": 0.2404224740926476, + "learning_rate": 9.844309131710585e-05, + "loss": 0.5493, "step": 1348 }, { - "epoch": 0.1888033589923023, - "grad_norm": 0.4408469771886245, - "learning_rate": 4.6774222651078106e-05, - "loss": 0.6, + "epoch": 0.37755387629443043, + "grad_norm": 0.2461865431891018, + "learning_rate": 9.843926958380581e-05, + "loss": 0.563, "step": 1349 }, { - "epoch": 0.18894331700489853, - "grad_norm": 0.4174867184800617, - "learning_rate": 4.6768651927994434e-05, - "loss": 0.5785, + "epoch": 0.3778337531486146, + "grad_norm": 0.24682489667591118, + "learning_rate": 9.843544324002895e-05, + "loss": 0.5964, "step": 1350 }, { - "epoch": 0.18908327501749475, - "grad_norm": 0.4202539862345758, - "learning_rate": 4.6763076731231916e-05, - "loss": 0.6061, + "epoch": 0.3781136300027988, + "grad_norm": 0.2534555642684441, + "learning_rate": 9.843161228613951e-05, + "loss": 0.5945, "step": 1351 }, { - "epoch": 0.18922323303009098, - "grad_norm": 0.4362329726223066, - "learning_rate": 4.675749706193631e-05, - "loss": 0.6105, + "epoch": 0.3783935068569829, + "grad_norm": 0.24827491742347144, + "learning_rate": 9.842777672250212e-05, + "loss": 0.5593, "step": 1352 }, { - "epoch": 0.1893631910426872, - "grad_norm": 0.4463015177283665, - "learning_rate": 4.67519129212543e-05, - "loss": 0.6196, + "epoch": 0.3786733837111671, + "grad_norm": 0.2434939810912754, + "learning_rate": 9.842393654948181e-05, + "loss": 0.5761, "step": 1353 }, { - "epoch": 0.18950314905528343, - "grad_norm": 0.4213305303975094, - "learning_rate": 4.674632431033348e-05, - "loss": 0.5771, + "epoch": 0.3789532605653512, + "grad_norm": 0.25475309432094806, + "learning_rate": 9.842009176744413e-05, + "loss": 0.5862, "step": 1354 }, { - "epoch": 0.18964310706787962, - "grad_norm": 0.4523660797014365, - "learning_rate": 4.674073123032236e-05, - "loss": 0.5614, + "epoch": 0.3792331374195354, + "grad_norm": 0.24077707514403346, + "learning_rate": 9.841624237675499e-05, + "loss": 0.5659, "step": 1355 }, { - "epoch": 0.18978306508047585, - "grad_norm": 0.42558959589979956, - "learning_rate": 4.673513368237039e-05, - "loss": 0.5987, + "epoch": 0.3795130142737196, + "grad_norm": 0.2438247277705193, + "learning_rate": 9.841238837778084e-05, + "loss": 0.5434, "step": 1356 }, { - "epoch": 0.18992302309307207, - "grad_norm": 0.4566223530786513, - "learning_rate": 4.6729531667627905e-05, - "loss": 0.6081, + "epoch": 0.3797928911279037, + "grad_norm": 0.24783230207209245, + "learning_rate": 9.840852977088844e-05, + "loss": 0.572, "step": 1357 }, { - "epoch": 0.1900629811056683, - "grad_norm": 0.4324304893039991, - "learning_rate": 4.672392518724619e-05, - "loss": 0.6132, + "epoch": 0.3800727679820879, + "grad_norm": 0.2542560008900286, + "learning_rate": 9.840466655644509e-05, + "loss": 0.5706, "step": 1358 }, { - "epoch": 0.19020293911826452, - "grad_norm": 0.43913060355532496, - "learning_rate": 4.671831424237743e-05, - "loss": 0.5989, + "epoch": 0.380352644836272, + "grad_norm": 0.2587275663743225, + "learning_rate": 9.840079873481847e-05, + "loss": 0.5581, "step": 1359 }, { - "epoch": 0.19034289713086075, - "grad_norm": 0.44320622872500787, - "learning_rate": 4.671269883417473e-05, - "loss": 0.5998, + "epoch": 0.3806325216904562, + "grad_norm": 0.24411845202564678, + "learning_rate": 9.839692630637676e-05, + "loss": 0.5738, "step": 1360 }, { - "epoch": 0.19048285514345698, - "grad_norm": 0.4298692464735462, - "learning_rate": 4.670707896379211e-05, - "loss": 0.555, + "epoch": 0.3809123985446404, + "grad_norm": 0.2657088118008263, + "learning_rate": 9.839304927148849e-05, + "loss": 0.5952, "step": 1361 }, { - "epoch": 0.19062281315605317, - "grad_norm": 0.4212199124955075, - "learning_rate": 4.670145463238451e-05, - "loss": 0.6177, + "epoch": 0.3811922753988245, + "grad_norm": 0.24120355667171373, + "learning_rate": 9.838916763052273e-05, + "loss": 0.5716, "step": 1362 }, { - "epoch": 0.1907627711686494, - "grad_norm": 0.4448003823408276, - "learning_rate": 4.669582584110779e-05, - "loss": 0.5932, + "epoch": 0.3814721522530087, + "grad_norm": 0.25988001922025383, + "learning_rate": 9.838528138384888e-05, + "loss": 0.583, "step": 1363 }, { - "epoch": 0.19090272918124562, - "grad_norm": 0.44697385174479454, - "learning_rate": 4.6690192591118734e-05, - "loss": 0.6026, + "epoch": 0.38175202910719286, + "grad_norm": 0.251565534572992, + "learning_rate": 9.83813905318369e-05, + "loss": 0.5501, "step": 1364 }, { - "epoch": 0.19104268719384185, - "grad_norm": 0.4671259243593673, - "learning_rate": 4.668455488357502e-05, - "loss": 0.6156, + "epoch": 0.382031905961377, + "grad_norm": 0.2526421398627077, + "learning_rate": 9.837749507485706e-05, + "loss": 0.5936, "step": 1365 }, { - "epoch": 0.19118264520643807, - "grad_norm": 0.43396652865433255, - "learning_rate": 4.6678912719635246e-05, - "loss": 0.5764, + "epoch": 0.38231178281556116, + "grad_norm": 0.24433714214079807, + "learning_rate": 9.837359501328017e-05, + "loss": 0.5472, "step": 1366 }, { - "epoch": 0.1913226032190343, - "grad_norm": 0.4375416377619642, - "learning_rate": 4.667326610045895e-05, - "loss": 0.6031, + "epoch": 0.3825916596697453, + "grad_norm": 0.2536070233357884, + "learning_rate": 9.836969034747743e-05, + "loss": 0.5869, "step": 1367 }, { - "epoch": 0.19146256123163052, - "grad_norm": 0.441530782470694, - "learning_rate": 4.6667615027206564e-05, - "loss": 0.5773, + "epoch": 0.38287153652392947, + "grad_norm": 0.2380215963557464, + "learning_rate": 9.836578107782049e-05, + "loss": 0.5823, "step": 1368 }, { - "epoch": 0.19160251924422672, - "grad_norm": 0.43470376998767046, - "learning_rate": 4.6661959501039446e-05, - "loss": 0.5991, + "epoch": 0.38315141337811365, + "grad_norm": 0.24701908840173167, + "learning_rate": 9.836186720468144e-05, + "loss": 0.6023, "step": 1369 }, { - "epoch": 0.19174247725682295, - "grad_norm": 0.4389060530511099, - "learning_rate": 4.665629952311985e-05, - "loss": 0.5692, + "epoch": 0.3834312902322978, + "grad_norm": 0.25839264139407375, + "learning_rate": 9.83579487284328e-05, + "loss": 0.5816, "step": 1370 }, { - "epoch": 0.19188243526941917, - "grad_norm": 0.4752733107460391, - "learning_rate": 4.665063509461097e-05, - "loss": 0.6525, + "epoch": 0.38371116708648195, + "grad_norm": 0.24147497736430357, + "learning_rate": 9.835402564944752e-05, + "loss": 0.5957, "step": 1371 }, { - "epoch": 0.1920223932820154, - "grad_norm": 0.43452630074557697, - "learning_rate": 4.66449662166769e-05, - "loss": 0.5925, + "epoch": 0.38399104394066613, + "grad_norm": 0.2479587500985481, + "learning_rate": 9.835009796809902e-05, + "loss": 0.5601, "step": 1372 }, { - "epoch": 0.19216235129461162, - "grad_norm": 0.4472660808670533, - "learning_rate": 4.663929289048266e-05, - "loss": 0.6175, + "epoch": 0.38427092079485026, + "grad_norm": 0.24172569401654465, + "learning_rate": 9.834616568476114e-05, + "loss": 0.5612, "step": 1373 }, { - "epoch": 0.19230230930720785, - "grad_norm": 0.4151649360316345, - "learning_rate": 4.6633615117194165e-05, - "loss": 0.6048, + "epoch": 0.38455079764903444, + "grad_norm": 0.24368403167640812, + "learning_rate": 9.834222879980815e-05, + "loss": 0.5533, "step": 1374 }, { - "epoch": 0.19244226731980407, - "grad_norm": 0.41991921626461726, - "learning_rate": 4.6627932897978254e-05, - "loss": 0.6141, + "epoch": 0.38483067450321856, + "grad_norm": 0.2411430338474899, + "learning_rate": 9.833828731361476e-05, + "loss": 0.5579, "step": 1375 }, { - "epoch": 0.19258222533240027, - "grad_norm": 0.44865262717314497, - "learning_rate": 4.6622246234002686e-05, - "loss": 0.6108, + "epoch": 0.38511055135740274, + "grad_norm": 0.24266943199089647, + "learning_rate": 9.833434122655612e-05, + "loss": 0.5784, "step": 1376 }, { - "epoch": 0.1927221833449965, - "grad_norm": 0.42390019369736476, - "learning_rate": 4.6616555126436134e-05, - "loss": 0.6178, + "epoch": 0.3853904282115869, + "grad_norm": 0.23968345541464048, + "learning_rate": 9.833039053900783e-05, + "loss": 0.5715, "step": 1377 }, { - "epoch": 0.19286214135759272, - "grad_norm": 0.47111888994325224, - "learning_rate": 4.6610859576448176e-05, - "loss": 0.6351, + "epoch": 0.38567030506577105, + "grad_norm": 0.25497451001939403, + "learning_rate": 9.832643525134593e-05, + "loss": 0.5743, "step": 1378 }, { - "epoch": 0.19300209937018895, - "grad_norm": 0.4694892453227565, - "learning_rate": 4.660515958520929e-05, - "loss": 0.6313, + "epoch": 0.3859501819199552, + "grad_norm": 0.23794379443832098, + "learning_rate": 9.832247536394687e-05, + "loss": 0.5688, "step": 1379 }, { - "epoch": 0.19314205738278517, - "grad_norm": 0.43419016166335284, - "learning_rate": 4.65994551538909e-05, - "loss": 0.592, + "epoch": 0.38623005877413935, + "grad_norm": 0.2407260271800774, + "learning_rate": 9.831851087718755e-05, + "loss": 0.5494, "step": 1380 }, { - "epoch": 0.1932820153953814, - "grad_norm": 0.4133745853690666, - "learning_rate": 4.659374628366532e-05, - "loss": 0.572, + "epoch": 0.38650993562832353, + "grad_norm": 0.23980211745135735, + "learning_rate": 9.831454179144532e-05, + "loss": 0.5525, "step": 1381 }, { - "epoch": 0.1934219734079776, - "grad_norm": 0.4786575561548947, - "learning_rate": 4.658803297570577e-05, - "loss": 0.6007, + "epoch": 0.3867898124825077, + "grad_norm": 0.2449405522761386, + "learning_rate": 9.831056810709796e-05, + "loss": 0.5892, "step": 1382 }, { - "epoch": 0.19356193142057382, - "grad_norm": 0.4524476179381755, - "learning_rate": 4.658231523118641e-05, - "loss": 0.5966, + "epoch": 0.38706968933669184, + "grad_norm": 0.24433135015995563, + "learning_rate": 9.83065898245237e-05, + "loss": 0.5563, "step": 1383 }, { - "epoch": 0.19370188943317004, - "grad_norm": 0.4349438663500916, - "learning_rate": 4.6576593051282286e-05, - "loss": 0.5862, + "epoch": 0.387349566190876, + "grad_norm": 0.24674073855883902, + "learning_rate": 9.830260694410116e-05, + "loss": 0.5523, "step": 1384 }, { - "epoch": 0.19384184744576627, - "grad_norm": 0.45493985969941036, - "learning_rate": 4.657086643716936e-05, - "loss": 0.6482, + "epoch": 0.3876294430450602, + "grad_norm": 0.2407361444313013, + "learning_rate": 9.829861946620946e-05, + "loss": 0.5786, "step": 1385 }, { - "epoch": 0.1939818054583625, - "grad_norm": 0.42740591145801066, - "learning_rate": 4.6565135390024515e-05, - "loss": 0.5906, + "epoch": 0.3879093198992443, + "grad_norm": 0.24374340567894476, + "learning_rate": 9.829462739122814e-05, + "loss": 0.5794, "step": 1386 }, { - "epoch": 0.19412176347095872, - "grad_norm": 0.47535147700181984, - "learning_rate": 4.6559399911025545e-05, - "loss": 0.5951, + "epoch": 0.3881891967534285, + "grad_norm": 0.23433303313796344, + "learning_rate": 9.829063071953714e-05, + "loss": 0.5705, "step": 1387 }, { - "epoch": 0.19426172148355494, - "grad_norm": 0.4333924774938176, - "learning_rate": 4.655366000135114e-05, - "loss": 0.6083, + "epoch": 0.3884690736076126, + "grad_norm": 0.24789014314786506, + "learning_rate": 9.828662945151688e-05, + "loss": 0.5572, "step": 1388 }, { - "epoch": 0.19440167949615114, - "grad_norm": 0.4197374995007798, - "learning_rate": 4.6547915662180905e-05, - "loss": 0.5758, + "epoch": 0.3887489504617968, + "grad_norm": 0.25171929518719044, + "learning_rate": 9.828262358754821e-05, + "loss": 0.5976, "step": 1389 }, { - "epoch": 0.19454163750874737, - "grad_norm": 0.4209565338579636, - "learning_rate": 4.6542166894695366e-05, - "loss": 0.6219, + "epoch": 0.389028827315981, + "grad_norm": 0.24431104445326382, + "learning_rate": 9.82786131280124e-05, + "loss": 0.5732, "step": 1390 }, { - "epoch": 0.1946815955213436, - "grad_norm": 0.4533900200145802, - "learning_rate": 4.653641370007596e-05, - "loss": 0.6284, + "epoch": 0.3893087041701651, + "grad_norm": 0.2479963265314493, + "learning_rate": 9.827459807329116e-05, + "loss": 0.5941, "step": 1391 }, { - "epoch": 0.19482155353393982, - "grad_norm": 0.43123676441236164, - "learning_rate": 4.653065607950502e-05, - "loss": 0.6172, + "epoch": 0.3895885810243493, + "grad_norm": 0.2342558038183622, + "learning_rate": 9.827057842376665e-05, + "loss": 0.5763, "step": 1392 }, { - "epoch": 0.19496151154653604, - "grad_norm": 0.43240025310515345, - "learning_rate": 4.652489403416579e-05, - "loss": 0.6231, + "epoch": 0.38986845787853347, + "grad_norm": 0.24997810891787892, + "learning_rate": 9.82665541798215e-05, + "loss": 0.5798, "step": 1393 }, { - "epoch": 0.19510146955913227, - "grad_norm": 0.4105873841245408, - "learning_rate": 4.651912756524244e-05, - "loss": 0.6351, + "epoch": 0.3901483347327176, + "grad_norm": 0.2588555058785034, + "learning_rate": 9.826252534183869e-05, + "loss": 0.5922, "step": 1394 }, { - "epoch": 0.1952414275717285, - "grad_norm": 0.4342716468025768, - "learning_rate": 4.651335667392003e-05, - "loss": 0.5944, + "epoch": 0.3904282115869018, + "grad_norm": 0.2566940966721624, + "learning_rate": 9.825849191020169e-05, + "loss": 0.5731, "step": 1395 }, { - "epoch": 0.1953813855843247, - "grad_norm": 0.42929266034842917, - "learning_rate": 4.6507581361384537e-05, - "loss": 0.5949, + "epoch": 0.3907080884410859, + "grad_norm": 0.24224772705378775, + "learning_rate": 9.825445388529443e-05, + "loss": 0.5753, "step": 1396 }, { - "epoch": 0.19552134359692092, - "grad_norm": 0.43375421894645294, - "learning_rate": 4.650180162882285e-05, - "loss": 0.5674, + "epoch": 0.3909879652952701, + "grad_norm": 0.23802043040677046, + "learning_rate": 9.825041126750123e-05, + "loss": 0.5689, "step": 1397 }, { - "epoch": 0.19566130160951714, - "grad_norm": 0.4605858134606264, - "learning_rate": 4.649601747742277e-05, - "loss": 0.6276, + "epoch": 0.39126784214945426, + "grad_norm": 0.24010833324273848, + "learning_rate": 9.82463640572069e-05, + "loss": 0.5932, "step": 1398 }, { - "epoch": 0.19580125962211337, - "grad_norm": 0.4096793441616307, - "learning_rate": 4.649022890837298e-05, - "loss": 0.6066, + "epoch": 0.3915477190036384, + "grad_norm": 0.23942869227945068, + "learning_rate": 9.82423122547966e-05, + "loss": 0.578, "step": 1399 }, { - "epoch": 0.1959412176347096, - "grad_norm": 0.43325226542495887, - "learning_rate": 4.6484435922863105e-05, - "loss": 0.578, + "epoch": 0.39182759585782256, + "grad_norm": 0.23922324829498465, + "learning_rate": 9.823825586065604e-05, + "loss": 0.567, "step": 1400 }, { - "epoch": 0.19608117564730582, - "grad_norm": 0.45203303336392237, - "learning_rate": 4.6478638522083654e-05, - "loss": 0.6247, + "epoch": 0.39210747271200674, + "grad_norm": 0.24430202942016468, + "learning_rate": 9.823419487517129e-05, + "loss": 0.5757, "step": 1401 }, { - "epoch": 0.19622113365990204, - "grad_norm": 0.43768863421738247, - "learning_rate": 4.6472836707226065e-05, - "loss": 0.6089, + "epoch": 0.39238734956619087, + "grad_norm": 0.2518377452208979, + "learning_rate": 9.823012929872888e-05, + "loss": 0.572, "step": 1402 }, { - "epoch": 0.19636109167249824, - "grad_norm": 0.4561097031131965, - "learning_rate": 4.646703047948264e-05, - "loss": 0.6228, + "epoch": 0.39266722642037505, + "grad_norm": 0.22723461063326472, + "learning_rate": 9.822605913171576e-05, + "loss": 0.5793, "step": 1403 }, { - "epoch": 0.19650104968509446, - "grad_norm": 0.430780639976112, - "learning_rate": 4.6461219840046654e-05, - "loss": 0.5712, + "epoch": 0.3929471032745592, + "grad_norm": 0.23563441315632314, + "learning_rate": 9.822198437451932e-05, + "loss": 0.5593, "step": 1404 }, { - "epoch": 0.1966410076976907, - "grad_norm": 0.4421729743171197, - "learning_rate": 4.645540479011223e-05, - "loss": 0.608, + "epoch": 0.39322698012874335, + "grad_norm": 0.24457109666750093, + "learning_rate": 9.821790502752745e-05, + "loss": 0.5923, "step": 1405 }, { - "epoch": 0.19678096571028691, - "grad_norm": 0.4527004953532786, - "learning_rate": 4.644958533087443e-05, - "loss": 0.5794, + "epoch": 0.39350685698292753, + "grad_norm": 0.24189164632769405, + "learning_rate": 9.821382109112836e-05, + "loss": 0.5565, "step": 1406 }, { - "epoch": 0.19692092372288314, - "grad_norm": 0.43655870981476536, - "learning_rate": 4.64437614635292e-05, - "loss": 0.6072, + "epoch": 0.39378673383711166, + "grad_norm": 0.24660376801379894, + "learning_rate": 9.82097325657108e-05, + "loss": 0.569, "step": 1407 }, { - "epoch": 0.19706088173547937, - "grad_norm": 0.46922192783691635, - "learning_rate": 4.643793318927342e-05, - "loss": 0.6129, + "epoch": 0.39406661069129584, + "grad_norm": 0.24932022315992264, + "learning_rate": 9.820563945166393e-05, + "loss": 0.5935, "step": 1408 }, { - "epoch": 0.1972008397480756, - "grad_norm": 0.4096022513008127, - "learning_rate": 4.6432100509304843e-05, - "loss": 0.6139, + "epoch": 0.39434648754547996, + "grad_norm": 0.2371698308262719, + "learning_rate": 9.82015417493773e-05, + "loss": 0.5674, "step": 1409 }, { - "epoch": 0.1973407977606718, - "grad_norm": 0.44858888050025963, - "learning_rate": 4.642626342482215e-05, - "loss": 0.6494, + "epoch": 0.39462636439966414, + "grad_norm": 0.24530065971135384, + "learning_rate": 9.819743945924095e-05, + "loss": 0.6034, "step": 1410 }, { - "epoch": 0.197480755773268, - "grad_norm": 0.42709765753910495, - "learning_rate": 4.642042193702493e-05, - "loss": 0.6144, + "epoch": 0.3949062412538483, + "grad_norm": 0.25000585371231543, + "learning_rate": 9.819333258164534e-05, + "loss": 0.5706, "step": 1411 }, { - "epoch": 0.19762071378586424, - "grad_norm": 0.43179502942515346, - "learning_rate": 4.6414576047113655e-05, - "loss": 0.6533, + "epoch": 0.39518611810803245, + "grad_norm": 0.25145226169082646, + "learning_rate": 9.818922111698135e-05, + "loss": 0.61, "step": 1412 }, { - "epoch": 0.19776067179846046, - "grad_norm": 0.4271262128439598, - "learning_rate": 4.640872575628973e-05, - "loss": 0.5787, + "epoch": 0.3954659949622166, + "grad_norm": 0.24653399767282239, + "learning_rate": 9.818510506564032e-05, + "loss": 0.5415, "step": 1413 }, { - "epoch": 0.1979006298110567, - "grad_norm": 0.44047825759880344, - "learning_rate": 4.640287106575543e-05, - "loss": 0.5908, + "epoch": 0.3957458718164008, + "grad_norm": 0.24944047338705888, + "learning_rate": 9.818098442801403e-05, + "loss": 0.5844, "step": 1414 }, { - "epoch": 0.1980405878236529, - "grad_norm": 0.42112850121439344, - "learning_rate": 4.639701197671397e-05, - "loss": 0.6233, + "epoch": 0.39602574867058493, + "grad_norm": 0.23122977585694265, + "learning_rate": 9.817685920449465e-05, + "loss": 0.5971, "step": 1415 }, { - "epoch": 0.19818054583624914, - "grad_norm": 0.4372841966734705, - "learning_rate": 4.639114849036944e-05, - "loss": 0.6486, + "epoch": 0.3963056255247691, + "grad_norm": 0.24685199164459454, + "learning_rate": 9.817272939547487e-05, + "loss": 0.5672, "step": 1416 }, { - "epoch": 0.19832050384884534, - "grad_norm": 0.4296545377209088, - "learning_rate": 4.638528060792685e-05, - "loss": 0.5862, + "epoch": 0.39658550237895324, + "grad_norm": 0.24948841772290037, + "learning_rate": 9.816859500134772e-05, + "loss": 0.5692, "step": 1417 }, { - "epoch": 0.19846046186144156, - "grad_norm": 0.4242826054839983, - "learning_rate": 4.637940833059211e-05, - "loss": 0.6224, + "epoch": 0.3968653792331374, + "grad_norm": 0.24779947022891766, + "learning_rate": 9.816445602250676e-05, + "loss": 0.5672, "step": 1418 }, { - "epoch": 0.1986004198740378, - "grad_norm": 0.4386897733085964, - "learning_rate": 4.637353165957203e-05, - "loss": 0.6062, + "epoch": 0.3971452560873216, + "grad_norm": 0.25052807070496963, + "learning_rate": 9.816031245934592e-05, + "loss": 0.5853, "step": 1419 }, { - "epoch": 0.198740377886634, - "grad_norm": 0.43253628217173173, - "learning_rate": 4.636765059607434e-05, - "loss": 0.5796, + "epoch": 0.3974251329415057, + "grad_norm": 0.25698634743083887, + "learning_rate": 9.815616431225956e-05, + "loss": 0.5765, "step": 1420 }, { - "epoch": 0.19888033589923024, - "grad_norm": 0.42456136906021713, - "learning_rate": 4.6361765141307645e-05, - "loss": 0.6257, + "epoch": 0.3977050097956899, + "grad_norm": 0.2598518341256616, + "learning_rate": 9.815201158164254e-05, + "loss": 0.5561, "step": 1421 }, { - "epoch": 0.19902029391182646, - "grad_norm": 0.4546276363960964, - "learning_rate": 4.635587529648146e-05, - "loss": 0.6193, + "epoch": 0.3979848866498741, + "grad_norm": 0.2527933730835377, + "learning_rate": 9.81478542678901e-05, + "loss": 0.5832, "step": 1422 }, { - "epoch": 0.19916025192442266, - "grad_norm": 0.4511658476734362, - "learning_rate": 4.634998106280622e-05, - "loss": 0.5936, + "epoch": 0.3982647635040582, + "grad_norm": 0.2330086346185573, + "learning_rate": 9.814369237139795e-05, + "loss": 0.5699, "step": 1423 }, { - "epoch": 0.19930020993701889, - "grad_norm": 0.44597648248378585, - "learning_rate": 4.634408244149324e-05, - "loss": 0.6051, + "epoch": 0.3985446403582424, + "grad_norm": 0.2416250707522738, + "learning_rate": 9.813952589256221e-05, + "loss": 0.5535, "step": 1424 }, { - "epoch": 0.1994401679496151, - "grad_norm": 0.43808391202287855, - "learning_rate": 4.6338179433754756e-05, - "loss": 0.5995, + "epoch": 0.3988245172124265, + "grad_norm": 0.25028962040266256, + "learning_rate": 9.813535483177945e-05, + "loss": 0.5727, "step": 1425 }, { - "epoch": 0.19958012596221134, - "grad_norm": 0.48296291796144863, - "learning_rate": 4.6332272040803895e-05, - "loss": 0.5904, + "epoch": 0.3991043940666107, + "grad_norm": 0.2580212153823351, + "learning_rate": 9.813117918944667e-05, + "loss": 0.5769, "step": 1426 }, { - "epoch": 0.19972008397480756, - "grad_norm": 0.43753233592691976, - "learning_rate": 4.632636026385468e-05, - "loss": 0.5993, + "epoch": 0.39938427092079487, + "grad_norm": 0.2395087042527386, + "learning_rate": 9.812699896596132e-05, + "loss": 0.5715, "step": 1427 }, { - "epoch": 0.19986004198740379, - "grad_norm": 0.441993440207615, - "learning_rate": 4.632044410412204e-05, - "loss": 0.5981, + "epoch": 0.399664147774979, + "grad_norm": 0.2370166418531977, + "learning_rate": 9.812281416172127e-05, + "loss": 0.5871, "step": 1428 }, { - "epoch": 0.2, - "grad_norm": 0.44046702891678347, - "learning_rate": 4.631452356282182e-05, - "loss": 0.5711, + "epoch": 0.3999440246291632, + "grad_norm": 0.2544828790207878, + "learning_rate": 9.811862477712484e-05, + "loss": 0.5876, "step": 1429 }, { - "epoch": 0.2001399580125962, - "grad_norm": 0.42955814782218116, - "learning_rate": 4.630859864117073e-05, - "loss": 0.5847, + "epoch": 0.4002239014833473, + "grad_norm": 0.24156333772037894, + "learning_rate": 9.811443081257075e-05, + "loss": 0.544, "step": 1430 }, { - "epoch": 0.20027991602519243, - "grad_norm": 0.43651504606811536, - "learning_rate": 4.630266934038642e-05, - "loss": 0.6069, + "epoch": 0.4005037783375315, + "grad_norm": 0.2534180250079355, + "learning_rate": 9.811023226845822e-05, + "loss": 0.5503, "step": 1431 }, { - "epoch": 0.20041987403778866, - "grad_norm": 0.428395840664284, - "learning_rate": 4.629673566168741e-05, - "loss": 0.5963, + "epoch": 0.40078365519171566, + "grad_norm": 0.2396171926593597, + "learning_rate": 9.810602914518685e-05, + "loss": 0.5644, "step": 1432 }, { - "epoch": 0.20055983205038488, - "grad_norm": 0.4138324728369602, - "learning_rate": 4.629079760629313e-05, - "loss": 0.5647, + "epoch": 0.4010635320458998, + "grad_norm": 0.2580740191270764, + "learning_rate": 9.810182144315669e-05, + "loss": 0.6111, "step": 1433 }, { - "epoch": 0.2006997900629811, - "grad_norm": 0.4454769448792182, - "learning_rate": 4.628485517542392e-05, - "loss": 0.5915, + "epoch": 0.40134340890008396, + "grad_norm": 0.25392432270498255, + "learning_rate": 9.809760916276826e-05, + "loss": 0.5737, "step": 1434 }, { - "epoch": 0.20083974807557733, - "grad_norm": 0.43842498265287877, - "learning_rate": 4.627890837030101e-05, - "loss": 0.6333, + "epoch": 0.40162328575426814, + "grad_norm": 0.25951610318742846, + "learning_rate": 9.809339230442247e-05, + "loss": 0.5735, "step": 1435 }, { - "epoch": 0.20097970608817356, - "grad_norm": 0.42848973296459963, - "learning_rate": 4.627295719214653e-05, - "loss": 0.5877, + "epoch": 0.40190316260845227, + "grad_norm": 0.2505532584485498, + "learning_rate": 9.808917086852067e-05, + "loss": 0.5649, "step": 1436 }, { - "epoch": 0.20111966410076976, - "grad_norm": 0.44101122405778176, - "learning_rate": 4.6267001642183496e-05, - "loss": 0.6049, + "epoch": 0.40218303946263645, + "grad_norm": 0.24601010187993994, + "learning_rate": 9.808494485546467e-05, + "loss": 0.5756, "step": 1437 }, { - "epoch": 0.20125962211336598, - "grad_norm": 0.552921270157986, - "learning_rate": 4.6261041721635834e-05, - "loss": 0.6243, + "epoch": 0.4024629163168206, + "grad_norm": 0.26329883872874177, + "learning_rate": 9.808071426565671e-05, + "loss": 0.5954, "step": 1438 }, { - "epoch": 0.2013995801259622, - "grad_norm": 0.4518649937814017, - "learning_rate": 4.625507743172838e-05, - "loss": 0.6189, + "epoch": 0.40274279317100475, + "grad_norm": 0.2425089899801096, + "learning_rate": 9.807647909949944e-05, + "loss": 0.5687, "step": 1439 }, { - "epoch": 0.20153953813855843, - "grad_norm": 0.4231937860204061, - "learning_rate": 4.6249108773686846e-05, - "loss": 0.5851, + "epoch": 0.40302267002518893, + "grad_norm": 0.2554382173259353, + "learning_rate": 9.807223935739598e-05, + "loss": 0.5923, "step": 1440 }, { - "epoch": 0.20167949615115466, - "grad_norm": 0.42182226251472993, - "learning_rate": 4.6243135748737864e-05, - "loss": 0.6401, + "epoch": 0.40330254687937306, + "grad_norm": 0.23716757627054405, + "learning_rate": 9.806799503974988e-05, + "loss": 0.5502, "step": 1441 }, { - "epoch": 0.20181945416375088, - "grad_norm": 0.48057969413105356, - "learning_rate": 4.623715835810893e-05, - "loss": 0.6243, + "epoch": 0.40358242373355724, + "grad_norm": 0.22925249816015852, + "learning_rate": 9.806374614696512e-05, + "loss": 0.5585, "step": 1442 }, { - "epoch": 0.2019594121763471, - "grad_norm": 0.4437078265678712, - "learning_rate": 4.6231176603028484e-05, - "loss": 0.6328, + "epoch": 0.4038623005877414, + "grad_norm": 0.2402121585867316, + "learning_rate": 9.805949267944609e-05, + "loss": 0.5753, "step": 1443 }, { - "epoch": 0.2020993701889433, - "grad_norm": 0.4507520837571798, - "learning_rate": 4.6225190484725824e-05, - "loss": 0.5904, + "epoch": 0.40414217744192554, + "grad_norm": 0.24862370331193331, + "learning_rate": 9.805523463759764e-05, + "loss": 0.5682, "step": 1444 }, { - "epoch": 0.20223932820153953, - "grad_norm": 0.43096343316315816, - "learning_rate": 4.6219200004431154e-05, - "loss": 0.6205, + "epoch": 0.4044220542961097, + "grad_norm": 0.26093849074195435, + "learning_rate": 9.805097202182506e-05, + "loss": 0.5719, "step": 1445 }, { - "epoch": 0.20237928621413576, - "grad_norm": 0.44215631216303225, - "learning_rate": 4.6213205163375586e-05, - "loss": 0.6171, + "epoch": 0.40470193115029385, + "grad_norm": 0.2450551233086624, + "learning_rate": 9.804670483253407e-05, + "loss": 0.5622, "step": 1446 }, { - "epoch": 0.20251924422673198, - "grad_norm": 0.4601057485158553, - "learning_rate": 4.620720596279112e-05, - "loss": 0.6355, + "epoch": 0.404981808004478, + "grad_norm": 0.2462324177887218, + "learning_rate": 9.804243307013083e-05, + "loss": 0.5678, "step": 1447 }, { - "epoch": 0.2026592022393282, - "grad_norm": 0.42229237803528424, - "learning_rate": 4.620120240391065e-05, - "loss": 0.5762, + "epoch": 0.4052616848586622, + "grad_norm": 0.24844948626045485, + "learning_rate": 9.80381567350219e-05, + "loss": 0.571, "step": 1448 }, { - "epoch": 0.20279916025192443, - "grad_norm": 0.44247463233153156, - "learning_rate": 4.619519448796797e-05, - "loss": 0.6178, + "epoch": 0.40554156171284633, + "grad_norm": 0.2618704536066427, + "learning_rate": 9.803387582761435e-05, + "loss": 0.5695, "step": 1449 }, { - "epoch": 0.20293911826452066, - "grad_norm": 0.44572237683301125, - "learning_rate": 4.6189182216197766e-05, - "loss": 0.5782, + "epoch": 0.4058214385670305, + "grad_norm": 0.2437642348316598, + "learning_rate": 9.80295903483156e-05, + "loss": 0.5867, "step": 1450 }, { - "epoch": 0.20307907627711685, - "grad_norm": 0.44819354829925007, - "learning_rate": 4.618316558983562e-05, - "loss": 0.5575, + "epoch": 0.4061013154212147, + "grad_norm": 0.24824252189949325, + "learning_rate": 9.802530029753354e-05, + "loss": 0.5529, "step": 1451 }, { - "epoch": 0.20321903428971308, - "grad_norm": 0.462621341218452, - "learning_rate": 4.617714461011802e-05, - "loss": 0.6263, + "epoch": 0.4063811922753988, + "grad_norm": 0.2517819351187458, + "learning_rate": 9.802100567567654e-05, + "loss": 0.5894, "step": 1452 }, { - "epoch": 0.2033589923023093, - "grad_norm": 0.4191206562398054, - "learning_rate": 4.6171119278282315e-05, - "loss": 0.5823, + "epoch": 0.406661069129583, + "grad_norm": 0.25033833944703193, + "learning_rate": 9.801670648315333e-05, + "loss": 0.585, "step": 1453 }, { - "epoch": 0.20349895031490553, - "grad_norm": 0.4234985010775575, - "learning_rate": 4.6165089595566795e-05, - "loss": 0.5935, + "epoch": 0.4069409459837671, + "grad_norm": 0.23891449311006835, + "learning_rate": 9.801240272037313e-05, + "loss": 0.5842, "step": 1454 }, { - "epoch": 0.20363890832750176, - "grad_norm": 0.4138224710610466, - "learning_rate": 4.6159055563210604e-05, - "loss": 0.5753, + "epoch": 0.4072208228379513, + "grad_norm": 0.26300638380055125, + "learning_rate": 9.800809438774556e-05, + "loss": 0.5767, "step": 1455 }, { - "epoch": 0.20377886634009798, - "grad_norm": 0.4657076630993664, - "learning_rate": 4.6153017182453814e-05, - "loss": 0.6038, + "epoch": 0.4075006996921355, + "grad_norm": 0.2476856874567442, + "learning_rate": 9.80037814856807e-05, + "loss": 0.5632, "step": 1456 }, { - "epoch": 0.20391882435269418, - "grad_norm": 0.4271574918448891, - "learning_rate": 4.6146974454537374e-05, - "loss": 0.613, + "epoch": 0.4077805765463196, + "grad_norm": 0.2456684512036152, + "learning_rate": 9.799946401458904e-05, + "loss": 0.5783, "step": 1457 }, { - "epoch": 0.2040587823652904, - "grad_norm": 0.43052348959054654, - "learning_rate": 4.61409273807031e-05, - "loss": 0.578, + "epoch": 0.4080604534005038, + "grad_norm": 0.2442164088958097, + "learning_rate": 9.799514197488153e-05, + "loss": 0.5891, "step": 1458 }, { - "epoch": 0.20419874037788663, - "grad_norm": 0.446875776248068, - "learning_rate": 4.613487596219376e-05, - "loss": 0.6203, + "epoch": 0.4083403302546879, + "grad_norm": 0.24458087286921504, + "learning_rate": 9.799081536696954e-05, + "loss": 0.5514, "step": 1459 }, { - "epoch": 0.20433869839048285, - "grad_norm": 0.44280885463456926, - "learning_rate": 4.6128820200252954e-05, - "loss": 0.6178, + "epoch": 0.4086202071088721, + "grad_norm": 0.24478380799919897, + "learning_rate": 9.79864841912649e-05, + "loss": 0.5592, "step": 1460 }, { - "epoch": 0.20447865640307908, - "grad_norm": 0.4627088307800896, - "learning_rate": 4.612276009612522e-05, - "loss": 0.6083, + "epoch": 0.40890008396305627, + "grad_norm": 0.26026549588069137, + "learning_rate": 9.798214844817983e-05, + "loss": 0.5553, "step": 1461 }, { - "epoch": 0.2046186144156753, - "grad_norm": 0.40566282511231716, - "learning_rate": 4.611669565105596e-05, - "loss": 0.5897, + "epoch": 0.4091799608172404, + "grad_norm": 0.24356732058277175, + "learning_rate": 9.7977808138127e-05, + "loss": 0.5615, "step": 1462 }, { - "epoch": 0.20475857242827153, - "grad_norm": 0.4392817676918071, - "learning_rate": 4.6110626866291485e-05, - "loss": 0.6058, + "epoch": 0.4094598376714246, + "grad_norm": 0.24658345703383888, + "learning_rate": 9.797346326151955e-05, + "loss": 0.5853, "step": 1463 }, { - "epoch": 0.20489853044086773, - "grad_norm": 0.42409351023845426, - "learning_rate": 4.6104553743078996e-05, - "loss": 0.5942, + "epoch": 0.40973971452560876, + "grad_norm": 0.2449348078252221, + "learning_rate": 9.7969113818771e-05, + "loss": 0.5593, "step": 1464 }, { - "epoch": 0.20503848845346395, - "grad_norm": 0.43598866958142785, - "learning_rate": 4.609847628266657e-05, - "loss": 0.6271, + "epoch": 0.4100195913797929, + "grad_norm": 0.24113495521809883, + "learning_rate": 9.796475981029536e-05, + "loss": 0.5563, "step": 1465 }, { - "epoch": 0.20517844646606018, - "grad_norm": 0.4347982090854984, - "learning_rate": 4.60923944863032e-05, - "loss": 0.611, + "epoch": 0.41029946823397706, + "grad_norm": 0.25061954422907295, + "learning_rate": 9.796040123650702e-05, + "loss": 0.5724, "step": 1466 }, { - "epoch": 0.2053184044786564, - "grad_norm": 0.44333205254509317, - "learning_rate": 4.608630835523875e-05, - "loss": 0.6337, + "epoch": 0.4105793450881612, + "grad_norm": 0.2495813782898103, + "learning_rate": 9.795603809782086e-05, + "loss": 0.6059, "step": 1467 }, { - "epoch": 0.20545836249125263, - "grad_norm": 0.43208298348400775, - "learning_rate": 4.608021789072398e-05, - "loss": 0.5602, + "epoch": 0.41085922194234537, + "grad_norm": 0.2463598012700213, + "learning_rate": 9.795167039465216e-05, + "loss": 0.5734, "step": 1468 }, { - "epoch": 0.20559832050384885, - "grad_norm": 0.450291453219847, - "learning_rate": 4.607412309401054e-05, - "loss": 0.6653, + "epoch": 0.41113909879652955, + "grad_norm": 0.2378664592520467, + "learning_rate": 9.794729812741661e-05, + "loss": 0.5719, "step": 1469 }, { - "epoch": 0.20573827851644508, - "grad_norm": 0.4218568966937232, - "learning_rate": 4.606802396635098e-05, - "loss": 0.6039, + "epoch": 0.41141897565071367, + "grad_norm": 0.2533971400126434, + "learning_rate": 9.79429212965304e-05, + "loss": 0.5542, "step": 1470 }, { - "epoch": 0.20587823652904128, - "grad_norm": 0.4372151368221289, - "learning_rate": 4.6061920508998735e-05, - "loss": 0.6251, + "epoch": 0.41169885250489785, + "grad_norm": 0.25207117761398184, + "learning_rate": 9.79385399024101e-05, + "loss": 0.5379, "step": 1471 }, { - "epoch": 0.2060181945416375, - "grad_norm": 0.42362258866936114, - "learning_rate": 4.6055812723208114e-05, - "loss": 0.5752, + "epoch": 0.41197872935908203, + "grad_norm": 0.25312302669436926, + "learning_rate": 9.793415394547274e-05, + "loss": 0.5677, "step": 1472 }, { - "epoch": 0.20615815255423373, - "grad_norm": 0.4281637412896991, - "learning_rate": 4.604970061023434e-05, - "loss": 0.5909, + "epoch": 0.41225860621326615, + "grad_norm": 0.25635865763607046, + "learning_rate": 9.792976342613577e-05, + "loss": 0.5526, "step": 1473 }, { - "epoch": 0.20629811056682995, - "grad_norm": 0.4473289913366391, - "learning_rate": 4.604358417133351e-05, - "loss": 0.5825, + "epoch": 0.41253848306745033, + "grad_norm": 0.24001395354032368, + "learning_rate": 9.792536834481711e-05, + "loss": 0.5882, "step": 1474 }, { - "epoch": 0.20643806857942618, - "grad_norm": 0.44127895646180343, - "learning_rate": 4.6037463407762616e-05, - "loss": 0.591, + "epoch": 0.41281835992163446, + "grad_norm": 0.23611612710272417, + "learning_rate": 9.792096870193506e-05, + "loss": 0.542, "step": 1475 }, { - "epoch": 0.2065780265920224, - "grad_norm": 0.4250252005670499, - "learning_rate": 4.6031338320779534e-05, - "loss": 0.61, + "epoch": 0.41309823677581864, + "grad_norm": 0.2426571369583708, + "learning_rate": 9.791656449790838e-05, + "loss": 0.5707, "step": 1476 }, { - "epoch": 0.20671798460461863, - "grad_norm": 0.4195465649774795, - "learning_rate": 4.602520891164304e-05, - "loss": 0.6044, + "epoch": 0.4133781136300028, + "grad_norm": 0.24876165754893334, + "learning_rate": 9.791215573315628e-05, + "loss": 0.5789, "step": 1477 }, { - "epoch": 0.20685794261721482, - "grad_norm": 0.4546098361873912, - "learning_rate": 4.601907518161277e-05, - "loss": 0.6216, + "epoch": 0.41365799048418694, + "grad_norm": 0.25134751542763856, + "learning_rate": 9.790774240809837e-05, + "loss": 0.5694, "step": 1478 }, { - "epoch": 0.20699790062981105, - "grad_norm": 0.4475804565840221, - "learning_rate": 4.601293713194929e-05, - "loss": 0.6793, + "epoch": 0.4139378673383711, + "grad_norm": 0.24209618480153364, + "learning_rate": 9.790332452315471e-05, + "loss": 0.597, "step": 1479 }, { - "epoch": 0.20713785864240727, - "grad_norm": 0.43358956969279566, - "learning_rate": 4.600679476391402e-05, - "loss": 0.5491, + "epoch": 0.41421774419255525, + "grad_norm": 0.24343182478483422, + "learning_rate": 9.789890207874584e-05, + "loss": 0.5827, "step": 1480 }, { - "epoch": 0.2072778166550035, - "grad_norm": 0.42850643221967644, - "learning_rate": 4.600064807876929e-05, - "loss": 0.6122, + "epoch": 0.41449762104673943, + "grad_norm": 0.25417129479575934, + "learning_rate": 9.789447507529263e-05, + "loss": 0.5697, "step": 1481 }, { - "epoch": 0.20741777466759972, - "grad_norm": 0.4238910915601293, - "learning_rate": 4.599449707777829e-05, - "loss": 0.5886, + "epoch": 0.4147774979009236, + "grad_norm": 0.23407711090966488, + "learning_rate": 9.78900435132165e-05, + "loss": 0.5802, "step": 1482 }, { - "epoch": 0.20755773268019595, - "grad_norm": 0.44358369925735036, - "learning_rate": 4.5988341762205125e-05, - "loss": 0.6162, + "epoch": 0.41505737475510773, + "grad_norm": 0.24349506605245003, + "learning_rate": 9.788560739293921e-05, + "loss": 0.5787, "step": 1483 }, { - "epoch": 0.20769769069279218, - "grad_norm": 0.40620154391670654, - "learning_rate": 4.5982182133314765e-05, - "loss": 0.5666, + "epoch": 0.4153372516092919, + "grad_norm": 0.2439399587049185, + "learning_rate": 9.788116671488301e-05, + "loss": 0.5823, "step": 1484 }, { - "epoch": 0.20783764870538837, - "grad_norm": 0.4474470089623319, - "learning_rate": 4.5976018192373086e-05, - "loss": 0.6601, + "epoch": 0.4156171284634761, + "grad_norm": 0.23938444715747859, + "learning_rate": 9.787672147947055e-05, + "loss": 0.5869, "step": 1485 }, { - "epoch": 0.2079776067179846, - "grad_norm": 0.41753387085884, - "learning_rate": 4.5969849940646834e-05, - "loss": 0.6043, + "epoch": 0.4158970053176602, + "grad_norm": 0.23789190270077706, + "learning_rate": 9.787227168712496e-05, + "loss": 0.5518, "step": 1486 }, { - "epoch": 0.20811756473058082, - "grad_norm": 0.42752671322476543, - "learning_rate": 4.596367737940366e-05, - "loss": 0.6443, + "epoch": 0.4161768821718444, + "grad_norm": 0.2511551205740477, + "learning_rate": 9.786781733826975e-05, + "loss": 0.5698, "step": 1487 }, { - "epoch": 0.20825752274317705, - "grad_norm": 0.42699115737832166, - "learning_rate": 4.595750050991207e-05, - "loss": 0.6025, + "epoch": 0.4164567590260285, + "grad_norm": 0.24037779375713417, + "learning_rate": 9.786335843332888e-05, + "loss": 0.5774, "step": 1488 }, { - "epoch": 0.20839748075577327, - "grad_norm": 0.45076801294244706, - "learning_rate": 4.595131933344148e-05, - "loss": 0.6386, + "epoch": 0.4167366358802127, + "grad_norm": 0.2463066275549332, + "learning_rate": 9.785889497272677e-05, + "loss": 0.5691, "step": 1489 }, { - "epoch": 0.2085374387683695, - "grad_norm": 0.4322528381202846, - "learning_rate": 4.594513385126218e-05, - "loss": 0.5979, + "epoch": 0.4170165127343969, + "grad_norm": 0.24876426837688265, + "learning_rate": 9.785442695688826e-05, + "loss": 0.5734, "step": 1490 }, { - "epoch": 0.20867739678096572, - "grad_norm": 0.4400100496903223, - "learning_rate": 4.593894406464537e-05, - "loss": 0.6226, + "epoch": 0.417296389588581, + "grad_norm": 0.2607685450251353, + "learning_rate": 9.784995438623861e-05, + "loss": 0.5504, "step": 1491 }, { - "epoch": 0.20881735479356192, - "grad_norm": 0.4424527757233975, - "learning_rate": 4.593274997486309e-05, - "loss": 0.594, + "epoch": 0.4175762664427652, + "grad_norm": 0.2560398653950305, + "learning_rate": 9.78454772612035e-05, + "loss": 0.5578, "step": 1492 }, { - "epoch": 0.20895731280615815, - "grad_norm": 0.42378140749175675, - "learning_rate": 4.592655158318829e-05, - "loss": 0.6122, + "epoch": 0.41785614329694937, + "grad_norm": 0.2570477421034459, + "learning_rate": 9.784099558220909e-05, + "loss": 0.5777, "step": 1493 }, { - "epoch": 0.20909727081875437, - "grad_norm": 0.43529413412957524, - "learning_rate": 4.592034889089482e-05, - "loss": 0.6246, + "epoch": 0.4181360201511335, + "grad_norm": 0.24709753942346674, + "learning_rate": 9.783650934968196e-05, + "loss": 0.5692, "step": 1494 }, { - "epoch": 0.2092372288313506, - "grad_norm": 0.4336529824699541, - "learning_rate": 4.591414189925739e-05, - "loss": 0.5778, + "epoch": 0.41841589700531767, + "grad_norm": 0.2418074136805273, + "learning_rate": 9.783201856404907e-05, + "loss": 0.5849, "step": 1495 }, { - "epoch": 0.20937718684394682, - "grad_norm": 0.4397235048236114, - "learning_rate": 4.5907930609551584e-05, - "loss": 0.6468, + "epoch": 0.4186957738595018, + "grad_norm": 0.24516351210637674, + "learning_rate": 9.782752322573789e-05, + "loss": 0.579, "step": 1496 }, { - "epoch": 0.20951714485654305, - "grad_norm": 0.41913091394972535, - "learning_rate": 4.59017150230539e-05, - "loss": 0.6343, + "epoch": 0.418975650713686, + "grad_norm": 0.2317373143926596, + "learning_rate": 9.782302333517628e-05, + "loss": 0.5748, "step": 1497 }, { - "epoch": 0.20965710286913924, - "grad_norm": 0.4402774808830381, - "learning_rate": 4.58954951410417e-05, - "loss": 0.6032, + "epoch": 0.41925552756787016, + "grad_norm": 0.23625423054299133, + "learning_rate": 9.781851889279255e-05, + "loss": 0.5609, "step": 1498 }, { - "epoch": 0.20979706088173547, - "grad_norm": 0.40993950169452836, - "learning_rate": 4.588927096479323e-05, - "loss": 0.5885, + "epoch": 0.4195354044220543, + "grad_norm": 0.24961648317309634, + "learning_rate": 9.781400989901541e-05, + "loss": 0.5644, "step": 1499 }, { - "epoch": 0.2099370188943317, - "grad_norm": 0.4319112358311373, - "learning_rate": 4.5883042495587637e-05, - "loss": 0.6243, + "epoch": 0.41981528127623846, + "grad_norm": 0.25003382796083357, + "learning_rate": 9.780949635427406e-05, + "loss": 0.5789, "step": 1500 }, { - "epoch": 0.21007697690692792, - "grad_norm": 0.4312366729366661, - "learning_rate": 4.587680973470491e-05, - "loss": 0.6154, + "epoch": 0.42009515813042264, + "grad_norm": 0.23860510714669342, + "learning_rate": 9.780497825899807e-05, + "loss": 0.5395, "step": 1501 }, { - "epoch": 0.21021693491952415, - "grad_norm": 0.4262355331023565, - "learning_rate": 4.587057268342597e-05, - "loss": 0.5766, + "epoch": 0.42037503498460677, + "grad_norm": 0.23884887456756274, + "learning_rate": 9.78004556136175e-05, + "loss": 0.5553, "step": 1502 }, { - "epoch": 0.21035689293212037, - "grad_norm": 0.4142051911565897, - "learning_rate": 4.586433134303257e-05, - "loss": 0.5607, + "epoch": 0.42065491183879095, + "grad_norm": 0.24634040348102276, + "learning_rate": 9.779592841856282e-05, + "loss": 0.5735, "step": 1503 }, { - "epoch": 0.2104968509447166, - "grad_norm": 0.4286460156115378, - "learning_rate": 4.5858085714807384e-05, - "loss": 0.6047, + "epoch": 0.42093478869297507, + "grad_norm": 0.235482109411473, + "learning_rate": 9.77913966742649e-05, + "loss": 0.5867, "step": 1504 }, { - "epoch": 0.2106368089573128, - "grad_norm": 0.42524352191943315, - "learning_rate": 4.585183580003395e-05, - "loss": 0.6386, + "epoch": 0.42121466554715925, + "grad_norm": 0.24696965456900874, + "learning_rate": 9.77868603811551e-05, + "loss": 0.5865, "step": 1505 }, { - "epoch": 0.21077676696990902, - "grad_norm": 0.4287194392081206, - "learning_rate": 4.584558159999668e-05, - "loss": 0.5917, + "epoch": 0.42149454240134343, + "grad_norm": 0.25493276759446354, + "learning_rate": 9.778231953966519e-05, + "loss": 0.5481, "step": 1506 }, { - "epoch": 0.21091672498250524, - "grad_norm": 0.43676644200274517, - "learning_rate": 4.583932311598089e-05, - "loss": 0.6053, + "epoch": 0.42177441925552756, + "grad_norm": 0.2622234052682419, + "learning_rate": 9.777777415022736e-05, + "loss": 0.5786, "step": 1507 }, { - "epoch": 0.21105668299510147, - "grad_norm": 0.4555410229607422, - "learning_rate": 4.583306034927275e-05, - "loss": 0.5908, + "epoch": 0.42205429610971174, + "grad_norm": 0.2357278357950739, + "learning_rate": 9.777322421327424e-05, + "loss": 0.555, "step": 1508 }, { - "epoch": 0.2111966410076977, - "grad_norm": 0.45902732656697165, - "learning_rate": 4.582679330115933e-05, - "loss": 0.6039, + "epoch": 0.42233417296389586, + "grad_norm": 0.25881963337215835, + "learning_rate": 9.776866972923891e-05, + "loss": 0.5609, "step": 1509 }, { - "epoch": 0.21133659902029392, - "grad_norm": 0.4025365324445434, - "learning_rate": 4.582052197292856e-05, - "loss": 0.581, + "epoch": 0.42261404981808004, + "grad_norm": 0.25257102830171657, + "learning_rate": 9.776411069855485e-05, + "loss": 0.5502, "step": 1510 }, { - "epoch": 0.21147655703289014, - "grad_norm": 0.3996651665725778, - "learning_rate": 4.581424636586929e-05, - "loss": 0.5978, + "epoch": 0.4228939266722642, + "grad_norm": 0.24171241739880903, + "learning_rate": 9.7759547121656e-05, + "loss": 0.5907, "step": 1511 }, { - "epoch": 0.21161651504548634, - "grad_norm": 0.4015353636671318, - "learning_rate": 4.580796648127118e-05, - "loss": 0.5862, + "epoch": 0.42317380352644834, + "grad_norm": 0.2390417244023811, + "learning_rate": 9.775497899897672e-05, + "loss": 0.5541, "step": 1512 }, { - "epoch": 0.21175647305808257, - "grad_norm": 0.44031192059103713, - "learning_rate": 4.580168232042484e-05, - "loss": 0.6566, + "epoch": 0.4234536803806325, + "grad_norm": 0.24817087359921536, + "learning_rate": 9.77504063309518e-05, + "loss": 0.5823, "step": 1513 }, { - "epoch": 0.2118964310706788, - "grad_norm": 0.42413167616716185, - "learning_rate": 4.579539388462173e-05, - "loss": 0.6368, + "epoch": 0.4237335572348167, + "grad_norm": 0.29322666574088346, + "learning_rate": 9.77458291180165e-05, + "loss": 0.5645, "step": 1514 }, { - "epoch": 0.21203638908327502, - "grad_norm": 0.4357570843764536, - "learning_rate": 4.578910117515416e-05, - "loss": 0.5574, + "epoch": 0.42401343408900083, + "grad_norm": 0.23208189125798345, + "learning_rate": 9.774124736060644e-05, + "loss": 0.582, "step": 1515 }, { - "epoch": 0.21217634709587124, - "grad_norm": 0.4443836991084177, - "learning_rate": 4.578280419331538e-05, - "loss": 0.6129, + "epoch": 0.424293310943185, + "grad_norm": 0.23723817455860452, + "learning_rate": 9.773666105915776e-05, + "loss": 0.5581, "step": 1516 }, { - "epoch": 0.21231630510846747, - "grad_norm": 0.413705821328803, - "learning_rate": 4.5776502940399454e-05, - "loss": 0.5594, + "epoch": 0.42457318779736913, + "grad_norm": 0.25083464625496504, + "learning_rate": 9.773207021410693e-05, + "loss": 0.5738, "step": 1517 }, { - "epoch": 0.2124562631210637, - "grad_norm": 0.4167212326575854, - "learning_rate": 4.5770197417701365e-05, - "loss": 0.6189, + "epoch": 0.4248530646515533, + "grad_norm": 0.23059374288413967, + "learning_rate": 9.772747482589096e-05, + "loss": 0.5561, "step": 1518 }, { - "epoch": 0.2125962211336599, - "grad_norm": 0.4143995752113595, - "learning_rate": 4.576388762651697e-05, - "loss": 0.611, + "epoch": 0.4251329415057375, + "grad_norm": 0.25229853454428236, + "learning_rate": 9.772287489494723e-05, + "loss": 0.5642, "step": 1519 }, { - "epoch": 0.21273617914625612, - "grad_norm": 0.42271548484517474, - "learning_rate": 4.575757356814299e-05, - "loss": 0.6259, + "epoch": 0.4254128183599216, + "grad_norm": 0.23799928620613445, + "learning_rate": 9.771827042171353e-05, + "loss": 0.5618, "step": 1520 }, { - "epoch": 0.21287613715885234, - "grad_norm": 0.44271943318208246, - "learning_rate": 4.5751255243877015e-05, - "loss": 0.6315, + "epoch": 0.4256926952141058, + "grad_norm": 0.2425613119222005, + "learning_rate": 9.771366140662816e-05, + "loss": 0.5904, "step": 1521 }, { - "epoch": 0.21301609517144857, - "grad_norm": 0.4701146597325822, - "learning_rate": 4.574493265501755e-05, - "loss": 0.5963, + "epoch": 0.42597257206829, + "grad_norm": 0.25360883243210447, + "learning_rate": 9.770904785012978e-05, + "loss": 0.6064, "step": 1522 }, { - "epoch": 0.2131560531840448, - "grad_norm": 0.4347056827455416, - "learning_rate": 4.573860580286392e-05, - "loss": 0.5977, + "epoch": 0.4262524489224741, + "grad_norm": 0.23903796352532092, + "learning_rate": 9.770442975265752e-05, + "loss": 0.5807, "step": 1523 }, { - "epoch": 0.21329601119664102, - "grad_norm": 0.42401546255892897, - "learning_rate": 4.573227468871639e-05, - "loss": 0.6176, + "epoch": 0.4265323257766583, + "grad_norm": 0.2367605837276456, + "learning_rate": 9.769980711465094e-05, + "loss": 0.5687, "step": 1524 }, { - "epoch": 0.21343596920923724, - "grad_norm": 0.4263159567462123, - "learning_rate": 4.572593931387604e-05, - "loss": 0.5695, + "epoch": 0.4268122026308424, + "grad_norm": 0.2589316099254001, + "learning_rate": 9.769517993655003e-05, + "loss": 0.5608, "step": 1525 }, { - "epoch": 0.21357592722183344, - "grad_norm": 0.4121278643431697, - "learning_rate": 4.571959967964488e-05, - "loss": 0.5902, + "epoch": 0.4270920794850266, + "grad_norm": 0.2443355361513885, + "learning_rate": 9.76905482187952e-05, + "loss": 0.5851, "step": 1526 }, { - "epoch": 0.21371588523442966, - "grad_norm": 0.4158048949697275, - "learning_rate": 4.571325578732575e-05, - "loss": 0.626, + "epoch": 0.42737195633921077, + "grad_norm": 0.25885465164488697, + "learning_rate": 9.768591196182729e-05, + "loss": 0.5841, "step": 1527 }, { - "epoch": 0.2138558432470259, - "grad_norm": 0.4183332266945186, - "learning_rate": 4.5706907638222385e-05, - "loss": 0.6295, + "epoch": 0.4276518331933949, + "grad_norm": 0.25052779405674647, + "learning_rate": 9.768127116608758e-05, + "loss": 0.5751, "step": 1528 }, { - "epoch": 0.21399580125962211, - "grad_norm": 0.42044986223624303, - "learning_rate": 4.57005552336394e-05, - "loss": 0.5776, + "epoch": 0.4279317100475791, + "grad_norm": 0.23999292142611786, + "learning_rate": 9.767662583201779e-05, + "loss": 0.5597, "step": 1529 }, { - "epoch": 0.21413575927221834, - "grad_norm": 0.4237626863099945, - "learning_rate": 4.569419857488228e-05, - "loss": 0.602, + "epoch": 0.4282115869017632, + "grad_norm": 0.2296914558111579, + "learning_rate": 9.767197596006008e-05, + "loss": 0.5628, "step": 1530 }, { - "epoch": 0.21427571728481457, - "grad_norm": 0.4235915955493986, - "learning_rate": 4.568783766325738e-05, - "loss": 0.6088, + "epoch": 0.4284914637559474, + "grad_norm": 0.2460827362281314, + "learning_rate": 9.7667321550657e-05, + "loss": 0.5662, "step": 1531 }, { - "epoch": 0.2144156752974108, - "grad_norm": 0.4429373953075251, - "learning_rate": 4.568147250007193e-05, - "loss": 0.6244, + "epoch": 0.42877134061013156, + "grad_norm": 0.2480536926331666, + "learning_rate": 9.766266260425159e-05, + "loss": 0.5888, "step": 1532 }, { - "epoch": 0.214555633310007, - "grad_norm": 0.4312825642404986, - "learning_rate": 4.567510308663404e-05, - "loss": 0.5844, + "epoch": 0.4290512174643157, + "grad_norm": 0.24603486571963212, + "learning_rate": 9.765799912128725e-05, + "loss": 0.6004, "step": 1533 }, { - "epoch": 0.2146955913226032, - "grad_norm": 0.447768930946618, - "learning_rate": 4.5668729424252686e-05, - "loss": 0.6446, + "epoch": 0.42933109431849986, + "grad_norm": 0.2332324033244973, + "learning_rate": 9.765333110220792e-05, + "loss": 0.5453, "step": 1534 }, { - "epoch": 0.21483554933519944, - "grad_norm": 0.4031120444622804, - "learning_rate": 4.5662351514237725e-05, - "loss": 0.6134, + "epoch": 0.42961097117268404, + "grad_norm": 0.25109569714469726, + "learning_rate": 9.764865854745784e-05, + "loss": 0.5367, "step": 1535 }, { - "epoch": 0.21497550734779566, - "grad_norm": 0.4102236640467674, - "learning_rate": 4.5655969357899874e-05, - "loss": 0.5515, + "epoch": 0.42989084802686817, + "grad_norm": 0.2407370658163797, + "learning_rate": 9.764398145748176e-05, + "loss": 0.5739, "step": 1536 }, { - "epoch": 0.2151154653603919, - "grad_norm": 0.4184306996067782, - "learning_rate": 4.564958295655074e-05, - "loss": 0.5886, + "epoch": 0.43017072488105235, + "grad_norm": 0.2452166003329777, + "learning_rate": 9.763929983272486e-05, + "loss": 0.5526, "step": 1537 }, { - "epoch": 0.21525542337298811, - "grad_norm": 0.4061735300983383, - "learning_rate": 4.564319231150278e-05, - "loss": 0.5966, + "epoch": 0.43045060173523647, + "grad_norm": 0.23973167204459642, + "learning_rate": 9.763461367363276e-05, + "loss": 0.5625, "step": 1538 }, { - "epoch": 0.2153953813855843, - "grad_norm": 0.4337440887551534, - "learning_rate": 4.563679742406935e-05, - "loss": 0.5803, + "epoch": 0.43073047858942065, + "grad_norm": 0.24240553263222253, + "learning_rate": 9.762992298065144e-05, + "loss": 0.5596, "step": 1539 }, { - "epoch": 0.21553533939818054, - "grad_norm": 0.4509938114765241, - "learning_rate": 4.5630398295564656e-05, - "loss": 0.6328, + "epoch": 0.43101035544360483, + "grad_norm": 0.24681000971969366, + "learning_rate": 9.762522775422741e-05, + "loss": 0.5452, "step": 1540 }, { - "epoch": 0.21567529741077676, - "grad_norm": 0.4152270913030905, - "learning_rate": 4.562399492730379e-05, - "loss": 0.5656, + "epoch": 0.43129023229778896, + "grad_norm": 0.2557476843343845, + "learning_rate": 9.762052799480755e-05, + "loss": 0.58, "step": 1541 }, { - "epoch": 0.215815255423373, - "grad_norm": 0.42027118148073633, - "learning_rate": 4.561758732060271e-05, - "loss": 0.6141, + "epoch": 0.43157010915197314, + "grad_norm": 0.23828427931263624, + "learning_rate": 9.761582370283915e-05, + "loss": 0.5738, "step": 1542 }, { - "epoch": 0.2159552134359692, - "grad_norm": 0.4187821883367385, - "learning_rate": 4.561117547677824e-05, - "loss": 0.5893, + "epoch": 0.4318499860061573, + "grad_norm": 0.256106565070577, + "learning_rate": 9.761111487877001e-05, + "loss": 0.5881, "step": 1543 }, { - "epoch": 0.21609517144856544, - "grad_norm": 0.45872625216573915, - "learning_rate": 4.5604759397148076e-05, - "loss": 0.6301, + "epoch": 0.43212986286034144, + "grad_norm": 0.2464782551768715, + "learning_rate": 9.760640152304833e-05, + "loss": 0.5608, "step": 1544 }, { - "epoch": 0.21623512946116166, - "grad_norm": 0.46161111705747715, - "learning_rate": 4.559833908303079e-05, - "loss": 0.6104, + "epoch": 0.4324097397145256, + "grad_norm": 0.23747551380698148, + "learning_rate": 9.76016836361227e-05, + "loss": 0.5713, "step": 1545 }, { - "epoch": 0.21637508747375786, - "grad_norm": 0.41864006503435036, - "learning_rate": 4.559191453574582e-05, - "loss": 0.5886, + "epoch": 0.43268961656870975, + "grad_norm": 0.2548872684408437, + "learning_rate": 9.759696121844215e-05, + "loss": 0.5618, "step": 1546 }, { - "epoch": 0.21651504548635409, - "grad_norm": 0.4526739247776462, - "learning_rate": 4.5585485756613486e-05, - "loss": 0.6058, + "epoch": 0.4329694934228939, + "grad_norm": 0.23686173170856198, + "learning_rate": 9.759223427045622e-05, + "loss": 0.5847, "step": 1547 }, { - "epoch": 0.2166550034989503, - "grad_norm": 0.43967142639314527, - "learning_rate": 4.5579052746954955e-05, - "loss": 0.5684, + "epoch": 0.4332493702770781, + "grad_norm": 0.2324906952052999, + "learning_rate": 9.758750279261478e-05, + "loss": 0.5596, "step": 1548 }, { - "epoch": 0.21679496151154654, - "grad_norm": 0.6571015727605314, - "learning_rate": 4.557261550809228e-05, - "loss": 0.6116, + "epoch": 0.43352924713126223, + "grad_norm": 0.22842513283373136, + "learning_rate": 9.75827667853682e-05, + "loss": 0.5743, "step": 1549 }, { - "epoch": 0.21693491952414276, - "grad_norm": 0.42734729410443334, - "learning_rate": 4.5566174041348374e-05, - "loss": 0.5922, + "epoch": 0.4338091239854464, + "grad_norm": 0.23168661869478185, + "learning_rate": 9.757802624916723e-05, + "loss": 0.5697, "step": 1550 }, { - "epoch": 0.21707487753673899, - "grad_norm": 0.45611337371107136, - "learning_rate": 4.555972834804704e-05, - "loss": 0.6523, + "epoch": 0.4340890008396306, + "grad_norm": 0.25140947684336423, + "learning_rate": 9.757328118446309e-05, + "loss": 0.5819, "step": 1551 }, { - "epoch": 0.2172148355493352, - "grad_norm": 0.40089163017994156, - "learning_rate": 4.5553278429512914e-05, - "loss": 0.5866, + "epoch": 0.4343688776938147, + "grad_norm": 0.2417463533284766, + "learning_rate": 9.756853159170742e-05, + "loss": 0.5475, "step": 1552 }, { - "epoch": 0.2173547935619314, - "grad_norm": 0.40519806476960724, - "learning_rate": 4.554682428707153e-05, - "loss": 0.6494, + "epoch": 0.4346487545479989, + "grad_norm": 0.22934434928297026, + "learning_rate": 9.75637774713523e-05, + "loss": 0.5475, "step": 1553 }, { - "epoch": 0.21749475157452763, - "grad_norm": 0.41428429253275884, - "learning_rate": 4.5540365922049275e-05, - "loss": 0.6035, + "epoch": 0.434928631402183, + "grad_norm": 0.24110459703507403, + "learning_rate": 9.755901882385021e-05, + "loss": 0.5679, "step": 1554 }, { - "epoch": 0.21763470958712386, - "grad_norm": 0.4216731233907631, - "learning_rate": 4.553390333577342e-05, - "loss": 0.5913, + "epoch": 0.4352085082563672, + "grad_norm": 0.2435000187314083, + "learning_rate": 9.75542556496541e-05, + "loss": 0.5409, "step": 1555 }, { - "epoch": 0.21777466759972008, - "grad_norm": 0.4773910626390572, - "learning_rate": 4.552743652957208e-05, - "loss": 0.5793, + "epoch": 0.4354883851105514, + "grad_norm": 0.2512030616213865, + "learning_rate": 9.75494879492173e-05, + "loss": 0.5712, "step": 1556 }, { - "epoch": 0.2179146256123163, - "grad_norm": 0.4386315899864147, - "learning_rate": 4.5520965504774246e-05, - "loss": 0.598, + "epoch": 0.4357682619647355, + "grad_norm": 0.2490874474172369, + "learning_rate": 9.754471572299363e-05, + "loss": 0.5814, "step": 1557 }, { - "epoch": 0.21805458362491253, - "grad_norm": 0.41469985672115917, - "learning_rate": 4.551449026270979e-05, - "loss": 0.5831, + "epoch": 0.4360481388189197, + "grad_norm": 0.2516458467604343, + "learning_rate": 9.753993897143731e-05, + "loss": 0.5688, "step": 1558 }, { - "epoch": 0.21819454163750876, - "grad_norm": 0.41848055045754795, - "learning_rate": 4.5508010804709434e-05, - "loss": 0.5948, + "epoch": 0.4363280156731038, + "grad_norm": 0.23361860635570805, + "learning_rate": 9.753515769500299e-05, + "loss": 0.5689, "step": 1559 }, { - "epoch": 0.21833449965010496, - "grad_norm": 0.4093401936521381, - "learning_rate": 4.550152713210478e-05, - "loss": 0.6166, + "epoch": 0.436607892527288, + "grad_norm": 0.2587815211957352, + "learning_rate": 9.753037189414575e-05, + "loss": 0.582, "step": 1560 }, { - "epoch": 0.21847445766270118, - "grad_norm": 0.39080382196363417, - "learning_rate": 4.5495039246228274e-05, - "loss": 0.5615, + "epoch": 0.43688776938147217, + "grad_norm": 0.24267621433328138, + "learning_rate": 9.75255815693211e-05, + "loss": 0.5866, "step": 1561 }, { - "epoch": 0.2186144156752974, - "grad_norm": 0.4548223020948238, - "learning_rate": 4.548854714841326e-05, - "loss": 0.6275, + "epoch": 0.4371676462356563, + "grad_norm": 0.24249945491986472, + "learning_rate": 9.752078672098502e-05, + "loss": 0.561, "step": 1562 }, { - "epoch": 0.21875437368789363, - "grad_norm": 0.4423455420604275, - "learning_rate": 4.548205083999392e-05, - "loss": 0.6453, + "epoch": 0.4374475230898405, + "grad_norm": 0.2475570815863539, + "learning_rate": 9.751598734959384e-05, + "loss": 0.5814, "step": 1563 }, { - "epoch": 0.21889433170048986, - "grad_norm": 0.43733043542862093, - "learning_rate": 4.547555032230531e-05, - "loss": 0.624, + "epoch": 0.43772739994402465, + "grad_norm": 0.231386974038659, + "learning_rate": 9.75111834556044e-05, + "loss": 0.5391, "step": 1564 }, { - "epoch": 0.21903428971308608, - "grad_norm": 0.4353839094146027, - "learning_rate": 4.546904559668335e-05, - "loss": 0.6306, + "epoch": 0.4380072767982088, + "grad_norm": 0.24466180538359716, + "learning_rate": 9.750637503947391e-05, + "loss": 0.5837, "step": 1565 }, { - "epoch": 0.2191742477256823, - "grad_norm": 0.43066663949560247, - "learning_rate": 4.546253666446484e-05, - "loss": 0.5603, + "epoch": 0.43828715365239296, + "grad_norm": 0.2417070291325543, + "learning_rate": 9.750156210166006e-05, + "loss": 0.5797, "step": 1566 }, { - "epoch": 0.2193142057382785, - "grad_norm": 0.43310274897745943, - "learning_rate": 4.545602352698742e-05, - "loss": 0.6276, + "epoch": 0.4385670305065771, + "grad_norm": 0.23817213169576182, + "learning_rate": 9.749674464262094e-05, + "loss": 0.5782, "step": 1567 }, { - "epoch": 0.21945416375087473, - "grad_norm": 0.4406898711500198, - "learning_rate": 4.544950618558961e-05, - "loss": 0.6111, + "epoch": 0.43884690736076126, + "grad_norm": 0.26182755912460104, + "learning_rate": 9.749192266281508e-05, + "loss": 0.5454, "step": 1568 }, { - "epoch": 0.21959412176347096, - "grad_norm": 0.44678818353637145, - "learning_rate": 4.544298464161079e-05, - "loss": 0.6159, + "epoch": 0.43912678421494544, + "grad_norm": 0.23464678913914763, + "learning_rate": 9.748709616270144e-05, + "loss": 0.5851, "step": 1569 }, { - "epoch": 0.21973407977606718, - "grad_norm": 0.43974972225616304, - "learning_rate": 4.54364588963912e-05, - "loss": 0.6103, + "epoch": 0.43940666106912957, + "grad_norm": 0.24434762824944903, + "learning_rate": 9.748226514273941e-05, + "loss": 0.5386, "step": 1570 }, { - "epoch": 0.2198740377886634, - "grad_norm": 0.4450084247496375, - "learning_rate": 4.542992895127195e-05, - "loss": 0.6351, + "epoch": 0.43968653792331375, + "grad_norm": 0.24697878019438874, + "learning_rate": 9.747742960338881e-05, + "loss": 0.5731, "step": 1571 }, { - "epoch": 0.22001399580125963, - "grad_norm": 0.4301181992679436, - "learning_rate": 4.5423394807595005e-05, - "loss": 0.5929, + "epoch": 0.4399664147774979, + "grad_norm": 0.23124415337553447, + "learning_rate": 9.747258954510989e-05, + "loss": 0.5824, "step": 1572 }, { - "epoch": 0.22015395381385583, - "grad_norm": 0.4354118703683763, - "learning_rate": 4.541685646670321e-05, - "loss": 0.6417, + "epoch": 0.44024629163168205, + "grad_norm": 0.23813567944378347, + "learning_rate": 9.746774496836332e-05, + "loss": 0.5638, "step": 1573 }, { - "epoch": 0.22029391182645205, - "grad_norm": 0.42020542688000095, - "learning_rate": 4.5410313929940244e-05, - "loss": 0.5609, + "epoch": 0.44052616848586623, + "grad_norm": 0.24249100484216313, + "learning_rate": 9.746289587361021e-05, + "loss": 0.5721, "step": 1574 }, { - "epoch": 0.22043386983904828, - "grad_norm": 0.405854594592669, - "learning_rate": 4.5403767198650683e-05, - "loss": 0.6229, + "epoch": 0.44080604534005036, + "grad_norm": 0.25464163267322554, + "learning_rate": 9.745804226131211e-05, + "loss": 0.5585, "step": 1575 }, { - "epoch": 0.2205738278516445, - "grad_norm": 0.4325219516484385, - "learning_rate": 4.5397216274179934e-05, - "loss": 0.5922, + "epoch": 0.44108592219423454, + "grad_norm": 0.24342188484477878, + "learning_rate": 9.745318413193099e-05, + "loss": 0.5801, "step": 1576 }, { - "epoch": 0.22071378586424073, - "grad_norm": 0.42884858732377196, - "learning_rate": 4.539066115787427e-05, - "loss": 0.5937, + "epoch": 0.4413657990484187, + "grad_norm": 0.2563628080106205, + "learning_rate": 9.744832148592923e-05, + "loss": 0.5895, "step": 1577 }, { - "epoch": 0.22085374387683696, - "grad_norm": 0.4397303655395074, - "learning_rate": 4.5384101851080864e-05, - "loss": 0.6208, + "epoch": 0.44164567590260284, + "grad_norm": 0.23789494903848302, + "learning_rate": 9.744345432376967e-05, + "loss": 0.5859, "step": 1578 }, { - "epoch": 0.22099370188943318, - "grad_norm": 0.4196787008923647, - "learning_rate": 4.537753835514769e-05, - "loss": 0.5936, + "epoch": 0.441925552756787, + "grad_norm": 0.2462756686319238, + "learning_rate": 9.743858264591559e-05, + "loss": 0.5451, "step": 1579 }, { - "epoch": 0.22113365990202938, - "grad_norm": 0.4450498558445989, - "learning_rate": 4.537097067142363e-05, - "loss": 0.5795, + "epoch": 0.44220542961097115, + "grad_norm": 0.23791103680326336, + "learning_rate": 9.743370645283066e-05, + "loss": 0.5452, "step": 1580 }, { - "epoch": 0.2212736179146256, - "grad_norm": 0.4533941983871524, - "learning_rate": 4.5364398801258396e-05, - "loss": 0.5856, + "epoch": 0.4424853064651553, + "grad_norm": 0.2279743487899946, + "learning_rate": 9.742882574497898e-05, + "loss": 0.5452, "step": 1581 }, { - "epoch": 0.22141357592722183, - "grad_norm": 0.4417713842234416, - "learning_rate": 4.5357822746002586e-05, - "loss": 0.6313, + "epoch": 0.4427651833193395, + "grad_norm": 0.22940390173471523, + "learning_rate": 9.742394052282513e-05, + "loss": 0.574, "step": 1582 }, { - "epoch": 0.22155353393981805, - "grad_norm": 0.4463677159437907, - "learning_rate": 4.535124250700764e-05, - "loss": 0.625, + "epoch": 0.44304506017352363, + "grad_norm": 0.23052987503067043, + "learning_rate": 9.741905078683407e-05, + "loss": 0.5448, "step": 1583 }, { - "epoch": 0.22169349195241428, - "grad_norm": 0.40975804674998356, - "learning_rate": 4.534465808562587e-05, - "loss": 0.5485, + "epoch": 0.4433249370277078, + "grad_norm": 0.24075917632009064, + "learning_rate": 9.741415653747123e-05, + "loss": 0.5501, "step": 1584 }, { - "epoch": 0.2218334499650105, - "grad_norm": 0.4543432488989666, - "learning_rate": 4.533806948321044e-05, - "loss": 0.6165, + "epoch": 0.443604813881892, + "grad_norm": 0.23917340606953735, + "learning_rate": 9.740925777520242e-05, + "loss": 0.5458, "step": 1585 }, { - "epoch": 0.22197340797760673, - "grad_norm": 0.44872454774080367, - "learning_rate": 4.5331476701115366e-05, - "loss": 0.6361, + "epoch": 0.4438846907360761, + "grad_norm": 0.23908385812899932, + "learning_rate": 9.740435450049392e-05, + "loss": 0.5795, "step": 1586 }, { - "epoch": 0.22211336599020293, - "grad_norm": 0.44712318439982773, - "learning_rate": 4.532487974069554e-05, - "loss": 0.5872, + "epoch": 0.4441645675902603, + "grad_norm": 0.2401723385481669, + "learning_rate": 9.739944671381243e-05, + "loss": 0.5602, "step": 1587 }, { - "epoch": 0.22225332400279915, - "grad_norm": 0.4396204586538607, - "learning_rate": 4.53182786033067e-05, - "loss": 0.6105, + "epoch": 0.4444444444444444, + "grad_norm": 0.23661393948970766, + "learning_rate": 9.739453441562507e-05, + "loss": 0.5552, "step": 1588 }, { - "epoch": 0.22239328201539538, - "grad_norm": 0.4455977172934511, - "learning_rate": 4.531167329030545e-05, - "loss": 0.624, + "epoch": 0.4447243212986286, + "grad_norm": 0.2345794145401572, + "learning_rate": 9.73896176063994e-05, + "loss": 0.5464, "step": 1589 }, { - "epoch": 0.2225332400279916, - "grad_norm": 0.4556263334519999, - "learning_rate": 4.530506380304925e-05, - "loss": 0.6265, + "epoch": 0.4450041981528128, + "grad_norm": 0.23041977175787082, + "learning_rate": 9.73846962866034e-05, + "loss": 0.5612, "step": 1590 }, { - "epoch": 0.22267319804058783, - "grad_norm": 0.42577140585892975, - "learning_rate": 4.529845014289642e-05, - "loss": 0.6105, + "epoch": 0.4452840750069969, + "grad_norm": 0.2530760752289133, + "learning_rate": 9.737977045670548e-05, + "loss": 0.5611, "step": 1591 }, { - "epoch": 0.22281315605318405, - "grad_norm": 0.4180730424078042, - "learning_rate": 4.529183231120612e-05, - "loss": 0.5895, + "epoch": 0.4455639518611811, + "grad_norm": 0.24151200181727236, + "learning_rate": 9.737484011717448e-05, + "loss": 0.5486, "step": 1592 }, { - "epoch": 0.22295311406578028, - "grad_norm": 0.40699329023671954, - "learning_rate": 4.528521030933839e-05, - "loss": 0.6161, + "epoch": 0.44584382871536526, + "grad_norm": 0.24737202608433992, + "learning_rate": 9.73699052684797e-05, + "loss": 0.5466, "step": 1593 }, { - "epoch": 0.22309307207837648, - "grad_norm": 0.4222068544403408, - "learning_rate": 4.5278584138654116e-05, - "loss": 0.5605, + "epoch": 0.4461237055695494, + "grad_norm": 0.22787072108928844, + "learning_rate": 9.736496591109081e-05, + "loss": 0.5498, "step": 1594 }, { - "epoch": 0.2232330300909727, - "grad_norm": 0.4095845718488545, - "learning_rate": 4.527195380051505e-05, - "loss": 0.6219, + "epoch": 0.44640358242373357, + "grad_norm": 0.23838951228491187, + "learning_rate": 9.736002204547795e-05, + "loss": 0.5587, "step": 1595 }, { - "epoch": 0.22337298810356893, - "grad_norm": 0.4314280489339947, - "learning_rate": 4.526531929628379e-05, - "loss": 0.5701, + "epoch": 0.4466834592779177, + "grad_norm": 0.23089190730961634, + "learning_rate": 9.73550736721117e-05, + "loss": 0.5546, "step": 1596 }, { - "epoch": 0.22351294611616515, - "grad_norm": 0.4074259435374443, - "learning_rate": 4.525868062732379e-05, - "loss": 0.5631, + "epoch": 0.4469633361321019, + "grad_norm": 0.22923545709162893, + "learning_rate": 9.735012079146302e-05, + "loss": 0.5705, "step": 1597 }, { - "epoch": 0.22365290412876138, - "grad_norm": 0.4532813167302565, - "learning_rate": 4.5252037794999375e-05, - "loss": 0.589, + "epoch": 0.44724321298628605, + "grad_norm": 0.23597042843706925, + "learning_rate": 9.734516340400335e-05, + "loss": 0.5516, "step": 1598 }, { - "epoch": 0.2237928621413576, - "grad_norm": 0.4513647564836139, - "learning_rate": 4.52453908006757e-05, - "loss": 0.5948, + "epoch": 0.4475230898404702, + "grad_norm": 0.24282398200242442, + "learning_rate": 9.73402015102045e-05, + "loss": 0.5961, "step": 1599 }, { - "epoch": 0.22393282015395383, - "grad_norm": 0.41293390054677387, - "learning_rate": 4.52387396457188e-05, - "loss": 0.585, + "epoch": 0.44780296669465436, + "grad_norm": 0.23585467598423718, + "learning_rate": 9.73352351105388e-05, + "loss": 0.5841, "step": 1600 }, { - "epoch": 0.22407277816655002, - "grad_norm": 0.43716711655654145, - "learning_rate": 4.523208433149555e-05, - "loss": 0.5717, + "epoch": 0.44808284354883854, + "grad_norm": 0.2630123217166499, + "learning_rate": 9.733026420547892e-05, + "loss": 0.6106, "step": 1601 }, { - "epoch": 0.22421273617914625, - "grad_norm": 0.4258916507285024, - "learning_rate": 4.522542485937369e-05, - "loss": 0.6024, + "epoch": 0.44836272040302266, + "grad_norm": 0.24183326676579633, + "learning_rate": 9.732528879549801e-05, + "loss": 0.5877, "step": 1602 }, { - "epoch": 0.22435269419174247, - "grad_norm": 0.42215745803677884, - "learning_rate": 4.52187612307218e-05, - "loss": 0.564, + "epoch": 0.44864259725720684, + "grad_norm": 0.24433069568397509, + "learning_rate": 9.73203088810696e-05, + "loss": 0.5749, "step": 1603 }, { - "epoch": 0.2244926522043387, - "grad_norm": 0.4429228384575338, - "learning_rate": 4.521209344690933e-05, - "loss": 0.6278, + "epoch": 0.44892247411139097, + "grad_norm": 0.23445527046913978, + "learning_rate": 9.731532446266772e-05, + "loss": 0.5982, "step": 1604 }, { - "epoch": 0.22463261021693492, - "grad_norm": 0.4784015488798018, - "learning_rate": 4.5205421509306576e-05, - "loss": 0.6018, + "epoch": 0.44920235096557515, + "grad_norm": 0.23672023092324654, + "learning_rate": 9.731033554076678e-05, + "loss": 0.5656, "step": 1605 }, { - "epoch": 0.22477256822953115, - "grad_norm": 0.43024752872126554, - "learning_rate": 4.519874541928469e-05, - "loss": 0.6077, + "epoch": 0.44948222781975933, + "grad_norm": 0.25124651998982117, + "learning_rate": 9.730534211584161e-05, + "loss": 0.5578, "step": 1606 }, { - "epoch": 0.22491252624212738, - "grad_norm": 0.41398612904638815, - "learning_rate": 4.519206517821567e-05, - "loss": 0.5996, + "epoch": 0.44976210467394345, + "grad_norm": 0.24062654391573005, + "learning_rate": 9.73003441883675e-05, + "loss": 0.5706, "step": 1607 }, { - "epoch": 0.22505248425472357, - "grad_norm": 0.4141848810116437, - "learning_rate": 4.5185380787472384e-05, - "loss": 0.6349, + "epoch": 0.45004198152812763, + "grad_norm": 0.23863690695406042, + "learning_rate": 9.729534175882016e-05, + "loss": 0.5803, "step": 1608 }, { - "epoch": 0.2251924422673198, - "grad_norm": 0.43155153008345165, - "learning_rate": 4.5178692248428536e-05, - "loss": 0.5882, + "epoch": 0.45032185838231176, + "grad_norm": 0.2364801765216035, + "learning_rate": 9.729033482767572e-05, + "loss": 0.5432, "step": 1609 }, { - "epoch": 0.22533240027991602, - "grad_norm": 0.43080194308434416, - "learning_rate": 4.517199956245869e-05, - "loss": 0.6111, + "epoch": 0.45060173523649594, + "grad_norm": 0.24493619611199438, + "learning_rate": 9.728532339541074e-05, + "loss": 0.541, "step": 1610 }, { - "epoch": 0.22547235829251225, - "grad_norm": 0.42258541276252165, - "learning_rate": 4.516530273093825e-05, - "loss": 0.5848, + "epoch": 0.4508816120906801, + "grad_norm": 0.23030588426816914, + "learning_rate": 9.728030746250221e-05, + "loss": 0.5613, "step": 1611 }, { - "epoch": 0.22561231630510847, - "grad_norm": 0.43019795244061954, - "learning_rate": 4.5158601755243505e-05, - "loss": 0.5885, + "epoch": 0.45116148894486424, + "grad_norm": 0.2435388946534101, + "learning_rate": 9.727528702942755e-05, + "loss": 0.574, "step": 1612 }, { - "epoch": 0.2257522743177047, - "grad_norm": 0.4034883685193691, - "learning_rate": 4.5151896636751556e-05, - "loss": 0.6009, + "epoch": 0.4514413657990484, + "grad_norm": 0.2335591545712549, + "learning_rate": 9.727026209666461e-05, + "loss": 0.5432, "step": 1613 }, { - "epoch": 0.2258922323303009, - "grad_norm": 0.41958257656866, - "learning_rate": 4.514518737684038e-05, - "loss": 0.5881, + "epoch": 0.4517212426532326, + "grad_norm": 0.25186953366070236, + "learning_rate": 9.726523266469167e-05, + "loss": 0.568, "step": 1614 }, { - "epoch": 0.22603219034289712, - "grad_norm": 0.4074178901403192, - "learning_rate": 4.513847397688879e-05, - "loss": 0.5606, + "epoch": 0.4520011195074167, + "grad_norm": 0.2452529418604345, + "learning_rate": 9.726019873398742e-05, + "loss": 0.5742, "step": 1615 }, { - "epoch": 0.22617214835549335, - "grad_norm": 0.44325729803840913, - "learning_rate": 4.513175643827647e-05, - "loss": 0.6336, + "epoch": 0.4522809963616009, + "grad_norm": 0.24267589547772744, + "learning_rate": 9.725516030503101e-05, + "loss": 0.5635, "step": 1616 }, { - "epoch": 0.22631210636808957, - "grad_norm": 0.43312998668445646, - "learning_rate": 4.5125034762383936e-05, - "loss": 0.607, + "epoch": 0.45256087321578503, + "grad_norm": 0.24073431048985094, + "learning_rate": 9.7250117378302e-05, + "loss": 0.5799, "step": 1617 }, { - "epoch": 0.2264520643806858, - "grad_norm": 0.40174739442199564, - "learning_rate": 4.511830895059255e-05, - "loss": 0.5623, + "epoch": 0.4528407500699692, + "grad_norm": 0.23325718270101103, + "learning_rate": 9.724506995428036e-05, + "loss": 0.5849, "step": 1618 }, { - "epoch": 0.22659202239328202, - "grad_norm": 0.43059705945767945, - "learning_rate": 4.511157900428456e-05, - "loss": 0.5922, + "epoch": 0.4531206269241534, + "grad_norm": 0.2314095553527315, + "learning_rate": 9.724001803344652e-05, + "loss": 0.5392, "step": 1619 }, { - "epoch": 0.22673198040587825, - "grad_norm": 0.42985175056584884, - "learning_rate": 4.5104844924843016e-05, - "loss": 0.5719, + "epoch": 0.4534005037783375, + "grad_norm": 0.24920588938072338, + "learning_rate": 9.723496161628132e-05, + "loss": 0.5367, "step": 1620 }, { - "epoch": 0.22687193841847444, - "grad_norm": 0.4503501491245042, - "learning_rate": 4.5098106713651846e-05, - "loss": 0.5888, + "epoch": 0.4536803806325217, + "grad_norm": 0.22744504084522982, + "learning_rate": 9.722990070326604e-05, + "loss": 0.557, "step": 1621 }, { - "epoch": 0.22701189643107067, - "grad_norm": 0.4306003466493788, - "learning_rate": 4.509136437209582e-05, - "loss": 0.5624, + "epoch": 0.4539602574867059, + "grad_norm": 0.25589444054133037, + "learning_rate": 9.722483529488238e-05, + "loss": 0.5929, "step": 1622 }, { - "epoch": 0.2271518544436669, - "grad_norm": 0.413588915370572, - "learning_rate": 4.508461790156056e-05, - "loss": 0.5822, + "epoch": 0.45424013434089, + "grad_norm": 0.23229518165002788, + "learning_rate": 9.721976539161245e-05, + "loss": 0.5634, "step": 1623 }, { - "epoch": 0.22729181245626312, - "grad_norm": 0.43438032996872095, - "learning_rate": 4.5077867303432546e-05, - "loss": 0.6452, + "epoch": 0.4545200111950742, + "grad_norm": 0.23435892514032364, + "learning_rate": 9.721469099393883e-05, + "loss": 0.5753, "step": 1624 }, { - "epoch": 0.22743177046885935, - "grad_norm": 0.4364680489398804, - "learning_rate": 4.5071112579099074e-05, - "loss": 0.6027, + "epoch": 0.4547998880492583, + "grad_norm": 0.22410564597322272, + "learning_rate": 9.720961210234449e-05, + "loss": 0.5382, "step": 1625 }, { - "epoch": 0.22757172848145557, - "grad_norm": 0.44709606835984195, - "learning_rate": 4.5064353729948315e-05, - "loss": 0.6131, + "epoch": 0.4550797649034425, + "grad_norm": 0.2368563135184631, + "learning_rate": 9.720452871731285e-05, + "loss": 0.582, "step": 1626 }, { - "epoch": 0.2277116864940518, - "grad_norm": 0.43519671900411855, - "learning_rate": 4.505759075736929e-05, - "loss": 0.591, + "epoch": 0.45535964175762667, + "grad_norm": 0.24090942265379905, + "learning_rate": 9.719944083932773e-05, + "loss": 0.5496, "step": 1627 }, { - "epoch": 0.227851644506648, - "grad_norm": 0.41612778582560767, - "learning_rate": 4.505082366275184e-05, - "loss": 0.5877, + "epoch": 0.4556395186118108, + "grad_norm": 0.24205985416292858, + "learning_rate": 9.719434846887343e-05, + "loss": 0.5972, "step": 1628 }, { - "epoch": 0.22799160251924422, - "grad_norm": 0.4218216656661162, - "learning_rate": 4.504405244748669e-05, - "loss": 0.5534, + "epoch": 0.45591939546599497, + "grad_norm": 0.24637174261436595, + "learning_rate": 9.718925160643461e-05, + "loss": 0.5547, "step": 1629 }, { - "epoch": 0.22813156053184044, - "grad_norm": 0.42964453812050457, - "learning_rate": 4.503727711296538e-05, - "loss": 0.6389, + "epoch": 0.4561992723201791, + "grad_norm": 0.24242927759220934, + "learning_rate": 9.718415025249644e-05, + "loss": 0.5608, "step": 1630 }, { - "epoch": 0.22827151854443667, - "grad_norm": 0.42585594110280106, - "learning_rate": 4.503049766058033e-05, - "loss": 0.6084, + "epoch": 0.4564791491743633, + "grad_norm": 0.2398370017922551, + "learning_rate": 9.71790444075444e-05, + "loss": 0.5941, "step": 1631 }, { - "epoch": 0.2284114765570329, - "grad_norm": 0.423186467723224, - "learning_rate": 4.5023714091724756e-05, - "loss": 0.6029, + "epoch": 0.45675902602854745, + "grad_norm": 0.22464648132157194, + "learning_rate": 9.717393407206453e-05, + "loss": 0.5785, "step": 1632 }, { - "epoch": 0.22855143456962912, - "grad_norm": 0.43009182540969443, - "learning_rate": 4.5016926407792774e-05, - "loss": 0.6404, + "epoch": 0.4570389028827316, + "grad_norm": 0.2359371075076253, + "learning_rate": 9.71688192465432e-05, + "loss": 0.562, "step": 1633 }, { - "epoch": 0.22869139258222534, - "grad_norm": 0.4291224518654508, - "learning_rate": 4.501013461017931e-05, - "loss": 0.6167, + "epoch": 0.45731877973691576, + "grad_norm": 0.23216743239512033, + "learning_rate": 9.716369993146725e-05, + "loss": 0.5873, "step": 1634 }, { - "epoch": 0.22883135059482154, - "grad_norm": 0.41098068932209736, - "learning_rate": 4.500333870028016e-05, - "loss": 0.581, + "epoch": 0.45759865659109994, + "grad_norm": 0.24372612182360875, + "learning_rate": 9.715857612732397e-05, + "loss": 0.6053, "step": 1635 }, { - "epoch": 0.22897130860741777, - "grad_norm": 0.4297903813744973, - "learning_rate": 4.499653867949194e-05, - "loss": 0.5923, + "epoch": 0.45787853344528406, + "grad_norm": 0.2424657932214619, + "learning_rate": 9.715344783460098e-05, + "loss": 0.5714, "step": 1636 }, { - "epoch": 0.229111266620014, - "grad_norm": 0.4194573186329626, - "learning_rate": 4.4989734549212125e-05, - "loss": 0.5893, + "epoch": 0.45815841029946824, + "grad_norm": 0.23375270597332154, + "learning_rate": 9.714831505378646e-05, + "loss": 0.5747, "step": 1637 }, { - "epoch": 0.22925122463261022, - "grad_norm": 0.44155487858058556, - "learning_rate": 4.498292631083904e-05, - "loss": 0.5734, + "epoch": 0.45843828715365237, + "grad_norm": 0.24199967270319583, + "learning_rate": 9.714317778536891e-05, + "loss": 0.5743, "step": 1638 }, { - "epoch": 0.22939118264520644, - "grad_norm": 0.4463968226022601, - "learning_rate": 4.4976113965771835e-05, - "loss": 0.6054, + "epoch": 0.45871816400783655, + "grad_norm": 0.2531179895334508, + "learning_rate": 9.713803602983731e-05, + "loss": 0.568, "step": 1639 }, { - "epoch": 0.22953114065780267, - "grad_norm": 0.43124302519709157, - "learning_rate": 4.496929751541054e-05, - "loss": 0.5788, + "epoch": 0.45899804086202073, + "grad_norm": 0.25284719666112554, + "learning_rate": 9.713288978768107e-05, + "loss": 0.5677, "step": 1640 }, { - "epoch": 0.2296710986703989, - "grad_norm": 0.4206213171563187, - "learning_rate": 4.4962476961155976e-05, - "loss": 0.5675, + "epoch": 0.45927791771620485, + "grad_norm": 0.245645975031102, + "learning_rate": 9.712773905938999e-05, + "loss": 0.5799, "step": 1641 }, { - "epoch": 0.2298110566829951, - "grad_norm": 0.4696499710097156, - "learning_rate": 4.495565230440985e-05, - "loss": 0.6295, + "epoch": 0.45955779457038903, + "grad_norm": 0.23607686469748843, + "learning_rate": 9.712258384545432e-05, + "loss": 0.5681, "step": 1642 }, { - "epoch": 0.22995101469559132, - "grad_norm": 0.3979651141981672, - "learning_rate": 4.494882354657469e-05, - "loss": 0.584, + "epoch": 0.4598376714245732, + "grad_norm": 0.24529416278386937, + "learning_rate": 9.711742414636476e-05, + "loss": 0.5939, "step": 1643 }, { - "epoch": 0.23009097270818754, - "grad_norm": 0.5843917120508454, - "learning_rate": 4.4941990689053886e-05, - "loss": 0.5725, + "epoch": 0.46011754827875734, + "grad_norm": 0.24808453699518654, + "learning_rate": 9.711225996261238e-05, + "loss": 0.5815, "step": 1644 }, { - "epoch": 0.23023093072078377, - "grad_norm": 0.4077407210872767, - "learning_rate": 4.4935153733251656e-05, - "loss": 0.5744, + "epoch": 0.4603974251329415, + "grad_norm": 0.24072875374815905, + "learning_rate": 9.710709129468873e-05, + "loss": 0.5911, "step": 1645 }, { - "epoch": 0.23037088873338, - "grad_norm": 0.4392056225941937, - "learning_rate": 4.4928312680573064e-05, - "loss": 0.6168, + "epoch": 0.46067730198712564, + "grad_norm": 0.24379613392778396, + "learning_rate": 9.710191814308577e-05, + "loss": 0.5782, "step": 1646 }, { - "epoch": 0.23051084674597622, - "grad_norm": 0.4356793893175191, - "learning_rate": 4.492146753242401e-05, - "loss": 0.5972, + "epoch": 0.4609571788413098, + "grad_norm": 0.23454801611000467, + "learning_rate": 9.709674050829588e-05, + "loss": 0.5696, "step": 1647 }, { - "epoch": 0.23065080475857244, - "grad_norm": 0.42512045974302, - "learning_rate": 4.491461829021125e-05, - "loss": 0.6265, + "epoch": 0.461237055695494, + "grad_norm": 0.24632377157127103, + "learning_rate": 9.709155839081186e-05, + "loss": 0.575, "step": 1648 }, { - "epoch": 0.23079076277116864, - "grad_norm": 0.4378392920345575, - "learning_rate": 4.490776495534237e-05, - "loss": 0.593, + "epoch": 0.4615169325496781, + "grad_norm": 0.24485361784242987, + "learning_rate": 9.708637179112696e-05, + "loss": 0.5629, "step": 1649 }, { - "epoch": 0.23093072078376486, - "grad_norm": 11.092127253150068, - "learning_rate": 4.49009075292258e-05, - "loss": 0.5982, + "epoch": 0.4617968094038623, + "grad_norm": 0.2429849320197462, + "learning_rate": 9.708118070973483e-05, + "loss": 0.5582, "step": 1650 }, { - "epoch": 0.2310706787963611, - "grad_norm": 0.4211855457116042, - "learning_rate": 4.489404601327081e-05, - "loss": 0.5885, + "epoch": 0.46207668625804643, + "grad_norm": 0.23895538009941178, + "learning_rate": 9.707598514712956e-05, + "loss": 0.5516, "step": 1651 }, { - "epoch": 0.23121063680895731, - "grad_norm": 0.44452065502823107, - "learning_rate": 4.48871804088875e-05, - "loss": 0.6286, + "epoch": 0.4623565631122306, + "grad_norm": 0.2355035827858529, + "learning_rate": 9.707078510380569e-05, + "loss": 0.5502, "step": 1652 }, { - "epoch": 0.23135059482155354, - "grad_norm": 0.44745394315873227, - "learning_rate": 4.488031071748684e-05, - "loss": 0.6036, + "epoch": 0.4626364399664148, + "grad_norm": 0.25052010000277225, + "learning_rate": 9.706558058025815e-05, + "loss": 0.5735, "step": 1653 }, { - "epoch": 0.23149055283414977, - "grad_norm": 0.41971430239918867, - "learning_rate": 4.487343694048061e-05, - "loss": 0.5691, + "epoch": 0.4629163168205989, + "grad_norm": 0.2449698774305263, + "learning_rate": 9.70603715769823e-05, + "loss": 0.5363, "step": 1654 }, { - "epoch": 0.23163051084674596, - "grad_norm": 0.43793140979246065, - "learning_rate": 4.4866559079281447e-05, - "loss": 0.6025, + "epoch": 0.4631961936747831, + "grad_norm": 0.2362853939678543, + "learning_rate": 9.705515809447394e-05, + "loss": 0.5724, "step": 1655 }, { - "epoch": 0.2317704688593422, - "grad_norm": 0.45584544502664887, - "learning_rate": 4.485967713530281e-05, - "loss": 0.5985, + "epoch": 0.4634760705289673, + "grad_norm": 0.2568714356965974, + "learning_rate": 9.70499401332293e-05, + "loss": 0.575, "step": 1656 }, { - "epoch": 0.2319104268719384, - "grad_norm": 0.4393986821966947, - "learning_rate": 4.485279110995903e-05, - "loss": 0.6181, + "epoch": 0.4637559473831514, + "grad_norm": 0.24169809634696723, + "learning_rate": 9.704471769374501e-05, + "loss": 0.5684, "step": 1657 }, { - "epoch": 0.23205038488453464, - "grad_norm": 0.41398671250331304, - "learning_rate": 4.4845901004665234e-05, - "loss": 0.5659, + "epoch": 0.4640358242373356, + "grad_norm": 0.24687664486505012, + "learning_rate": 9.703949077651817e-05, + "loss": 0.5745, "step": 1658 }, { - "epoch": 0.23219034289713086, - "grad_norm": 0.40845641035568697, - "learning_rate": 4.483900682083742e-05, - "loss": 0.6004, + "epoch": 0.4643157010915197, + "grad_norm": 0.2444165408649831, + "learning_rate": 9.703425938204627e-05, + "loss": 0.5369, "step": 1659 }, { - "epoch": 0.2323303009097271, - "grad_norm": 0.41163784471475046, - "learning_rate": 4.4832108559892406e-05, - "loss": 0.5676, + "epoch": 0.4645955779457039, + "grad_norm": 0.2431557857220034, + "learning_rate": 9.702902351082723e-05, + "loss": 0.5499, "step": 1660 }, { - "epoch": 0.23247025892232331, - "grad_norm": 0.40520538312381466, - "learning_rate": 4.4825206223247855e-05, - "loss": 0.5709, + "epoch": 0.46487545479988807, + "grad_norm": 0.23202598566873428, + "learning_rate": 9.702378316335942e-05, + "loss": 0.5755, "step": 1661 }, { - "epoch": 0.2326102169349195, - "grad_norm": 0.4052889024628849, - "learning_rate": 4.481829981232227e-05, - "loss": 0.6205, + "epoch": 0.4651553316540722, + "grad_norm": 0.2288877686751276, + "learning_rate": 9.70185383401416e-05, + "loss": 0.5436, "step": 1662 }, { - "epoch": 0.23275017494751574, - "grad_norm": 0.44570039573163067, - "learning_rate": 4.481138932853499e-05, - "loss": 0.6339, + "epoch": 0.46543520850825637, + "grad_norm": 0.24748492346111, + "learning_rate": 9.701328904167298e-05, + "loss": 0.5757, "step": 1663 }, { - "epoch": 0.23289013296011196, - "grad_norm": 0.42218605234809253, - "learning_rate": 4.480447477330619e-05, - "loss": 0.582, + "epoch": 0.46571508536244055, + "grad_norm": 0.23227249736511413, + "learning_rate": 9.700803526845319e-05, + "loss": 0.5808, "step": 1664 }, { - "epoch": 0.2330300909727082, - "grad_norm": 0.4139552246868811, - "learning_rate": 4.479755614805688e-05, - "loss": 0.6185, + "epoch": 0.4659949622166247, + "grad_norm": 0.23359697484490272, + "learning_rate": 9.700277702098231e-05, + "loss": 0.5551, "step": 1665 }, { - "epoch": 0.2331700489853044, - "grad_norm": 0.438909444526491, - "learning_rate": 4.4790633454208904e-05, - "loss": 0.582, + "epoch": 0.46627483907080886, + "grad_norm": 0.24117964707666567, + "learning_rate": 9.699751429976079e-05, + "loss": 0.572, "step": 1666 }, { - "epoch": 0.23331000699790064, - "grad_norm": 0.41733390361062983, - "learning_rate": 4.478370669318494e-05, - "loss": 0.5954, + "epoch": 0.466554715924993, + "grad_norm": 0.24306895681163349, + "learning_rate": 9.699224710528955e-05, + "loss": 0.5722, "step": 1667 }, { - "epoch": 0.23344996501049686, - "grad_norm": 0.43169892360959883, - "learning_rate": 4.477677586640854e-05, - "loss": 0.6044, + "epoch": 0.46683459277917716, + "grad_norm": 0.23446319214186462, + "learning_rate": 9.698697543806994e-05, + "loss": 0.5729, "step": 1668 }, { - "epoch": 0.23358992302309306, - "grad_norm": 0.41695287451508345, - "learning_rate": 4.4769840975304014e-05, - "loss": 0.5781, + "epoch": 0.46711446963336134, + "grad_norm": 0.24165369931898403, + "learning_rate": 9.69816992986037e-05, + "loss": 0.5557, "step": 1669 }, { - "epoch": 0.23372988103568929, - "grad_norm": 0.4287731524988866, - "learning_rate": 4.476290202129658e-05, - "loss": 0.6271, + "epoch": 0.46739434648754546, + "grad_norm": 0.22916898589779047, + "learning_rate": 9.697641868739303e-05, + "loss": 0.5581, "step": 1670 }, { - "epoch": 0.2338698390482855, - "grad_norm": 0.41617003120095575, - "learning_rate": 4.4755959005812256e-05, - "loss": 0.5917, + "epoch": 0.46767422334172964, + "grad_norm": 0.24919921268999934, + "learning_rate": 9.697113360494052e-05, + "loss": 0.5784, "step": 1671 }, { - "epoch": 0.23400979706088174, - "grad_norm": 0.4317600967426944, - "learning_rate": 4.474901193027791e-05, - "loss": 0.6371, + "epoch": 0.4679541001959138, + "grad_norm": 0.23349556147225378, + "learning_rate": 9.696584405174925e-05, + "loss": 0.5531, "step": 1672 }, { - "epoch": 0.23414975507347796, - "grad_norm": 0.4245879897664653, - "learning_rate": 4.474206079612122e-05, - "loss": 0.6104, + "epoch": 0.46823397705009795, + "grad_norm": 0.23301988889837738, + "learning_rate": 9.696055002832263e-05, + "loss": 0.5698, "step": 1673 }, { - "epoch": 0.23428971308607419, - "grad_norm": 0.4493803376375899, - "learning_rate": 4.4735105604770735e-05, - "loss": 0.5892, + "epoch": 0.46851385390428213, + "grad_norm": 0.23371805922133018, + "learning_rate": 9.695525153516459e-05, + "loss": 0.5781, "step": 1674 }, { - "epoch": 0.2344296710986704, - "grad_norm": 0.4250874727480562, - "learning_rate": 4.4728146357655795e-05, - "loss": 0.5675, + "epoch": 0.46879373075846625, + "grad_norm": 0.2445795859274827, + "learning_rate": 9.694994857277942e-05, + "loss": 0.5807, "step": 1675 }, { - "epoch": 0.2345696291112666, - "grad_norm": 0.4374362501965964, - "learning_rate": 4.4721183056206614e-05, - "loss": 0.6075, + "epoch": 0.46907360761265043, + "grad_norm": 0.24647542564702576, + "learning_rate": 9.694464114167186e-05, + "loss": 0.5602, "step": 1676 }, { - "epoch": 0.23470958712386283, - "grad_norm": 0.41760478685320906, - "learning_rate": 4.4714215701854225e-05, - "loss": 0.5986, + "epoch": 0.4693534844668346, + "grad_norm": 0.24022018858423302, + "learning_rate": 9.693932924234708e-05, + "loss": 0.5654, "step": 1677 }, { - "epoch": 0.23484954513645906, - "grad_norm": 0.42350699224694727, - "learning_rate": 4.4707244296030464e-05, - "loss": 0.6486, + "epoch": 0.46963336132101874, + "grad_norm": 0.23281322118283435, + "learning_rate": 9.693401287531067e-05, + "loss": 0.5574, "step": 1678 }, { - "epoch": 0.23498950314905528, - "grad_norm": 0.4256916273457718, - "learning_rate": 4.4700268840168045e-05, - "loss": 0.6174, + "epoch": 0.4699132381752029, + "grad_norm": 0.23754798733117446, + "learning_rate": 9.692869204106866e-05, + "loss": 0.5647, "step": 1679 }, { - "epoch": 0.2351294611616515, - "grad_norm": 0.42531800370790246, - "learning_rate": 4.469328933570051e-05, - "loss": 0.551, + "epoch": 0.47019311502938704, + "grad_norm": 0.23969572637873995, + "learning_rate": 9.692336674012746e-05, + "loss": 0.5678, "step": 1680 }, { - "epoch": 0.23526941917424773, - "grad_norm": 0.4181940913192066, - "learning_rate": 4.468630578406218e-05, - "loss": 0.5856, + "epoch": 0.4704729918835712, + "grad_norm": 0.23776421914820323, + "learning_rate": 9.691803697299396e-05, + "loss": 0.5666, "step": 1681 }, { - "epoch": 0.23540937718684396, - "grad_norm": 0.4105263397513692, - "learning_rate": 4.467931818668827e-05, - "loss": 0.5633, + "epoch": 0.4707528687377554, + "grad_norm": 0.2245483928489407, + "learning_rate": 9.691270274017543e-05, + "loss": 0.541, "step": 1682 }, { - "epoch": 0.23554933519944016, - "grad_norm": 0.4081654944687357, - "learning_rate": 4.46723265450148e-05, - "loss": 0.5808, + "epoch": 0.47103274559193953, + "grad_norm": 0.23614537930723442, + "learning_rate": 9.690736404217959e-05, + "loss": 0.5552, "step": 1683 }, { - "epoch": 0.23568929321203638, - "grad_norm": 0.3907211987137891, - "learning_rate": 4.466533086047861e-05, - "loss": 0.5571, + "epoch": 0.4713126224461237, + "grad_norm": 0.23576106419016976, + "learning_rate": 9.69020208795146e-05, + "loss": 0.5689, "step": 1684 }, { - "epoch": 0.2358292512246326, - "grad_norm": 0.4250060420027131, - "learning_rate": 4.465833113451741e-05, - "loss": 0.6208, + "epoch": 0.4715924993003079, + "grad_norm": 0.2423673322680066, + "learning_rate": 9.6896673252689e-05, + "loss": 0.5606, "step": 1685 }, { - "epoch": 0.23596920923722883, - "grad_norm": 0.3937263336098636, - "learning_rate": 4.465132736856969e-05, - "loss": 0.5621, + "epoch": 0.471872376154492, + "grad_norm": 0.2343894728813448, + "learning_rate": 9.68913211622118e-05, + "loss": 0.5305, "step": 1686 }, { - "epoch": 0.23610916724982506, - "grad_norm": 0.41965579506262274, - "learning_rate": 4.464431956407481e-05, - "loss": 0.6031, + "epoch": 0.4721522530086762, + "grad_norm": 0.23548944963250124, + "learning_rate": 9.68859646085924e-05, + "loss": 0.567, "step": 1687 }, { - "epoch": 0.23624912526242128, - "grad_norm": 0.426646222987036, - "learning_rate": 4.463730772247293e-05, - "loss": 0.6033, + "epoch": 0.4724321298628603, + "grad_norm": 0.23966407867775644, + "learning_rate": 9.688060359234064e-05, + "loss": 0.5747, "step": 1688 }, { - "epoch": 0.2363890832750175, - "grad_norm": 0.4439770950702544, - "learning_rate": 4.463029184520507e-05, - "loss": 0.604, + "epoch": 0.4727120067170445, + "grad_norm": 0.24706733837968547, + "learning_rate": 9.687523811396679e-05, + "loss": 0.5548, "step": 1689 }, { - "epoch": 0.2365290412876137, - "grad_norm": 0.4761499260018663, - "learning_rate": 4.4623271933713065e-05, - "loss": 0.6307, + "epoch": 0.4729918835712287, + "grad_norm": 0.23784005195636673, + "learning_rate": 9.686986817398155e-05, + "loss": 0.5641, "step": 1690 }, { - "epoch": 0.23666899930020993, - "grad_norm": 0.4335225372190029, - "learning_rate": 4.4616247989439565e-05, - "loss": 0.576, + "epoch": 0.4732717604254128, + "grad_norm": 0.24228957192858178, + "learning_rate": 9.686449377289601e-05, + "loss": 0.5598, "step": 1691 }, { - "epoch": 0.23680895731280616, - "grad_norm": 0.43316503301129894, - "learning_rate": 4.4609220013828065e-05, - "loss": 0.6275, + "epoch": 0.473551637279597, + "grad_norm": 0.24173114213297364, + "learning_rate": 9.685911491122175e-05, + "loss": 0.5577, "step": 1692 }, { - "epoch": 0.23694891532540238, - "grad_norm": 0.4313464290288283, - "learning_rate": 4.46021880083229e-05, - "loss": 0.6067, + "epoch": 0.47383151413378116, + "grad_norm": 0.22701719607890072, + "learning_rate": 9.685373158947067e-05, + "loss": 0.5475, "step": 1693 }, { - "epoch": 0.2370888733379986, - "grad_norm": 0.41535036276436027, - "learning_rate": 4.45951519743692e-05, - "loss": 0.6192, + "epoch": 0.4741113909879653, + "grad_norm": 0.24187651396474402, + "learning_rate": 9.684834380815522e-05, + "loss": 0.5488, "step": 1694 }, { - "epoch": 0.23722883135059483, - "grad_norm": 0.4310228331540852, - "learning_rate": 4.4588111913412945e-05, - "loss": 0.5915, + "epoch": 0.47439126784214947, + "grad_norm": 0.2508222727693515, + "learning_rate": 9.684295156778815e-05, + "loss": 0.5827, "step": 1695 }, { - "epoch": 0.23736878936319103, - "grad_norm": 0.42874110588166797, - "learning_rate": 4.458106782690094e-05, - "loss": 0.5912, + "epoch": 0.4746711446963336, + "grad_norm": 0.22557444004295016, + "learning_rate": 9.683755486888277e-05, + "loss": 0.5873, "step": 1696 }, { - "epoch": 0.23750874737578725, - "grad_norm": 0.43089863664872624, - "learning_rate": 4.4574019716280824e-05, - "loss": 0.613, + "epoch": 0.47495102155051777, + "grad_norm": 0.2339936997134689, + "learning_rate": 9.683215371195267e-05, + "loss": 0.5967, "step": 1697 }, { - "epoch": 0.23764870538838348, - "grad_norm": 0.4181283245016506, - "learning_rate": 4.4566967583001046e-05, - "loss": 0.5918, + "epoch": 0.47523089840470195, + "grad_norm": 0.23302300536397813, + "learning_rate": 9.682674809751198e-05, + "loss": 0.5778, "step": 1698 }, { - "epoch": 0.2377886634009797, - "grad_norm": 0.4521121054953592, - "learning_rate": 4.4559911428510895e-05, - "loss": 0.6326, + "epoch": 0.4755107752588861, + "grad_norm": 0.2284584881896518, + "learning_rate": 9.682133802607519e-05, + "loss": 0.5612, "step": 1699 }, { - "epoch": 0.23792862141357593, - "grad_norm": 0.41538878241716964, - "learning_rate": 4.4552851254260484e-05, - "loss": 0.5994, + "epoch": 0.47579065211307026, + "grad_norm": 0.2482456132223044, + "learning_rate": 9.681592349815725e-05, + "loss": 0.5969, "step": 1700 }, { - "epoch": 0.23806857942617216, - "grad_norm": 0.4693907751456843, - "learning_rate": 4.454578706170075e-05, - "loss": 0.6444, + "epoch": 0.4760705289672544, + "grad_norm": 0.2371325439267351, + "learning_rate": 9.681050451427349e-05, + "loss": 0.5759, "step": 1701 }, { - "epoch": 0.23820853743876838, - "grad_norm": 0.47587489105004577, - "learning_rate": 4.453871885228345e-05, - "loss": 0.6053, + "epoch": 0.47635040582143856, + "grad_norm": 0.2276233032410326, + "learning_rate": 9.680508107493974e-05, + "loss": 0.5692, "step": 1702 }, { - "epoch": 0.23834849545136458, - "grad_norm": 0.44284115011605213, - "learning_rate": 4.4531646627461175e-05, - "loss": 0.6107, + "epoch": 0.47663028267562274, + "grad_norm": 0.23006587175018722, + "learning_rate": 9.679965318067214e-05, + "loss": 0.5646, "step": 1703 }, { - "epoch": 0.2384884534639608, - "grad_norm": 0.42954706530716213, - "learning_rate": 4.452457038868735e-05, - "loss": 0.6082, + "epoch": 0.47691015952980687, + "grad_norm": 0.23438779914728478, + "learning_rate": 9.679422083198738e-05, + "loss": 0.5521, "step": 1704 }, { - "epoch": 0.23862841147655703, - "grad_norm": 0.4558514723051049, - "learning_rate": 4.4517490137416196e-05, - "loss": 0.5926, + "epoch": 0.47719003638399105, + "grad_norm": 0.2376983080464345, + "learning_rate": 9.678878402940249e-05, + "loss": 0.5657, "step": 1705 }, { - "epoch": 0.23876836948915325, - "grad_norm": 0.41271838706018094, - "learning_rate": 4.451040587510279e-05, - "loss": 0.5552, + "epoch": 0.4774699132381752, + "grad_norm": 0.2533956785568868, + "learning_rate": 9.678334277343493e-05, + "loss": 0.5749, "step": 1706 }, { - "epoch": 0.23890832750174948, - "grad_norm": 0.4234721479869862, - "learning_rate": 4.450331760320302e-05, - "loss": 0.5907, + "epoch": 0.47774979009235935, + "grad_norm": 0.24240802881417522, + "learning_rate": 9.677789706460263e-05, + "loss": 0.5844, "step": 1707 }, { - "epoch": 0.2390482855143457, - "grad_norm": 0.4301944314412581, - "learning_rate": 4.449622532317359e-05, - "loss": 0.5637, + "epoch": 0.47802966694654353, + "grad_norm": 0.2368764108420469, + "learning_rate": 9.67724469034239e-05, + "loss": 0.5707, "step": 1708 }, { - "epoch": 0.23918824352694193, - "grad_norm": 0.3992534678208053, - "learning_rate": 4.448912903647203e-05, - "loss": 0.5746, + "epoch": 0.47830954380072765, + "grad_norm": 0.23976498497368334, + "learning_rate": 9.676699229041749e-05, + "loss": 0.5611, "step": 1709 }, { - "epoch": 0.23932820153953813, - "grad_norm": 0.4151340108937535, - "learning_rate": 4.448202874455673e-05, - "loss": 0.5953, + "epoch": 0.47858942065491183, + "grad_norm": 0.23873554876842037, + "learning_rate": 9.676153322610259e-05, + "loss": 0.5582, "step": 1710 }, { - "epoch": 0.23946815955213435, - "grad_norm": 0.42307801303730386, - "learning_rate": 4.447492444888682e-05, - "loss": 0.5985, + "epoch": 0.478869297509096, + "grad_norm": 0.24447888689261207, + "learning_rate": 9.675606971099878e-05, + "loss": 0.561, "step": 1711 }, { - "epoch": 0.23960811756473058, - "grad_norm": 0.4254157003906138, - "learning_rate": 4.446781615092235e-05, - "loss": 0.5706, + "epoch": 0.47914917436328014, + "grad_norm": 0.23776601617560683, + "learning_rate": 9.675060174562607e-05, + "loss": 0.5662, "step": 1712 }, { - "epoch": 0.2397480755773268, - "grad_norm": 0.4221420053361986, - "learning_rate": 4.446070385212414e-05, - "loss": 0.5865, + "epoch": 0.4794290512174643, + "grad_norm": 0.2287431865973953, + "learning_rate": 9.674512933050493e-05, + "loss": 0.5485, "step": 1713 }, { - "epoch": 0.23988803358992303, - "grad_norm": 0.44169866453949225, - "learning_rate": 4.445358755395382e-05, - "loss": 0.6131, + "epoch": 0.4797089280716485, + "grad_norm": 0.2460925836878855, + "learning_rate": 9.673965246615621e-05, + "loss": 0.5714, "step": 1714 }, { - "epoch": 0.24002799160251925, - "grad_norm": 0.41028257323034817, - "learning_rate": 4.444646725787387e-05, - "loss": 0.6391, + "epoch": 0.4799888049258326, + "grad_norm": 0.23900950341817997, + "learning_rate": 9.673417115310121e-05, + "loss": 0.5473, "step": 1715 }, { - "epoch": 0.24016794961511548, - "grad_norm": 0.42430000545950824, - "learning_rate": 4.4439342965347595e-05, - "loss": 0.5713, + "epoch": 0.4802686817800168, + "grad_norm": 0.22945766993352582, + "learning_rate": 9.672868539186166e-05, + "loss": 0.5541, "step": 1716 }, { - "epoch": 0.24030790762771168, - "grad_norm": 0.40781271006014086, - "learning_rate": 4.4432214677839095e-05, - "loss": 0.5508, + "epoch": 0.48054855863420093, + "grad_norm": 0.2474126864137905, + "learning_rate": 9.672319518295965e-05, + "loss": 0.5437, "step": 1717 }, { - "epoch": 0.2404478656403079, - "grad_norm": 0.4231812063581877, - "learning_rate": 4.442508239681331e-05, - "loss": 0.59, + "epoch": 0.4808284354883851, + "grad_norm": 0.2253507794048116, + "learning_rate": 9.67177005269178e-05, + "loss": 0.5754, "step": 1718 }, { - "epoch": 0.24058782365290413, - "grad_norm": 0.4175498825745329, - "learning_rate": 4.4417946123736e-05, - "loss": 0.6073, + "epoch": 0.4811083123425693, + "grad_norm": 0.2214830764551037, + "learning_rate": 9.671220142425905e-05, + "loss": 0.5437, "step": 1719 }, { - "epoch": 0.24072778166550035, - "grad_norm": 0.43499143607770346, - "learning_rate": 4.4410805860073736e-05, - "loss": 0.562, + "epoch": 0.4813881891967534, + "grad_norm": 0.24453365086702913, + "learning_rate": 9.670669787550682e-05, + "loss": 0.5771, "step": 1720 }, { - "epoch": 0.24086773967809658, - "grad_norm": 0.4405506003884184, - "learning_rate": 4.440366160729392e-05, - "loss": 0.5963, + "epoch": 0.4816680660509376, + "grad_norm": 0.22646871005462496, + "learning_rate": 9.670118988118493e-05, + "loss": 0.5701, "step": 1721 }, { - "epoch": 0.2410076976906928, - "grad_norm": 0.4048128591600598, - "learning_rate": 4.4396513366864765e-05, - "loss": 0.5533, + "epoch": 0.4819479429051218, + "grad_norm": 0.24667167448757019, + "learning_rate": 9.669567744181767e-05, + "loss": 0.5627, "step": 1722 }, { - "epoch": 0.24114765570328903, - "grad_norm": 0.41424145471376667, - "learning_rate": 4.4389361140255306e-05, - "loss": 0.5814, + "epoch": 0.4822278197593059, + "grad_norm": 0.2247237263507309, + "learning_rate": 9.669016055792967e-05, + "loss": 0.5553, "step": 1723 }, { - "epoch": 0.24128761371588522, - "grad_norm": 0.41036719244303405, - "learning_rate": 4.43822049289354e-05, - "loss": 0.57, + "epoch": 0.4825076966134901, + "grad_norm": 0.8916908358012535, + "learning_rate": 9.668463923004608e-05, + "loss": 0.5348, "step": 1724 }, { - "epoch": 0.24142757172848145, - "grad_norm": 0.41201535335271977, - "learning_rate": 4.4375044734375724e-05, - "loss": 0.5484, + "epoch": 0.4827875734676742, + "grad_norm": 0.2292032131128853, + "learning_rate": 9.667911345869239e-05, + "loss": 0.5717, "step": 1725 }, { - "epoch": 0.24156752974107767, - "grad_norm": 0.4111942850121887, - "learning_rate": 4.436788055804777e-05, - "loss": 0.5706, + "epoch": 0.4830674503218584, + "grad_norm": 0.23436419844039819, + "learning_rate": 9.667358324439455e-05, + "loss": 0.5577, "step": 1726 }, { - "epoch": 0.2417074877536739, - "grad_norm": 0.4519161404524503, - "learning_rate": 4.436071240142383e-05, - "loss": 0.6018, + "epoch": 0.48334732717604256, + "grad_norm": 0.23307361568857196, + "learning_rate": 9.666804858767894e-05, + "loss": 0.5396, "step": 1727 }, { - "epoch": 0.24184744576627012, - "grad_norm": 0.4403101160025735, - "learning_rate": 4.4353540265977064e-05, - "loss": 0.6072, + "epoch": 0.4836272040302267, + "grad_norm": 0.24883370720128642, + "learning_rate": 9.666250948907234e-05, + "loss": 0.5898, "step": 1728 }, { - "epoch": 0.24198740377886635, - "grad_norm": 0.49703818437945607, - "learning_rate": 4.4346364153181397e-05, - "loss": 0.6081, + "epoch": 0.48390708088441087, + "grad_norm": 0.2380820186350895, + "learning_rate": 9.665696594910196e-05, + "loss": 0.5791, "step": 1729 }, { - "epoch": 0.24212736179146255, - "grad_norm": 0.41243187874993914, - "learning_rate": 4.433918406451161e-05, - "loss": 0.5901, + "epoch": 0.484186957738595, + "grad_norm": 0.2421076443682589, + "learning_rate": 9.665141796829545e-05, + "loss": 0.5444, "step": 1730 }, { - "epoch": 0.24226731980405877, - "grad_norm": 0.40745642609215477, - "learning_rate": 4.433200000144326e-05, - "loss": 0.5789, + "epoch": 0.48446683459277917, + "grad_norm": 0.24762436331514348, + "learning_rate": 9.664586554718086e-05, + "loss": 0.5533, "step": 1731 }, { - "epoch": 0.242407277816655, - "grad_norm": 0.42520024238957543, - "learning_rate": 4.4324811965452764e-05, - "loss": 0.5929, + "epoch": 0.48474671144696335, + "grad_norm": 0.2496491363838159, + "learning_rate": 9.66403086862867e-05, + "loss": 0.5731, "step": 1732 }, { - "epoch": 0.24254723582925122, - "grad_norm": 0.4483762787233949, - "learning_rate": 4.431761995801733e-05, - "loss": 0.6412, + "epoch": 0.4850265883011475, + "grad_norm": 0.23970275567042595, + "learning_rate": 9.663474738614185e-05, + "loss": 0.5886, "step": 1733 }, { - "epoch": 0.24268719384184745, - "grad_norm": 0.42388738271936166, - "learning_rate": 4.431042398061499e-05, - "loss": 0.6215, + "epoch": 0.48530646515533166, + "grad_norm": 0.24925697169184896, + "learning_rate": 9.662918164727563e-05, + "loss": 0.5503, "step": 1734 }, { - "epoch": 0.24282715185444367, - "grad_norm": 0.42006944785214295, - "learning_rate": 4.430322403472459e-05, - "loss": 0.6243, + "epoch": 0.48558634200951584, + "grad_norm": 0.24197149218541705, + "learning_rate": 9.662361147021779e-05, + "loss": 0.5749, "step": 1735 }, { - "epoch": 0.2429671098670399, - "grad_norm": 0.41526878760328473, - "learning_rate": 4.429602012182579e-05, - "loss": 0.5868, + "epoch": 0.48586621886369996, + "grad_norm": 0.2248961182333366, + "learning_rate": 9.661803685549853e-05, + "loss": 0.5562, "step": 1736 }, { - "epoch": 0.2431070678796361, - "grad_norm": 0.40029320601405294, - "learning_rate": 4.428881224339907e-05, - "loss": 0.6238, + "epoch": 0.48614609571788414, + "grad_norm": 0.23254674182459417, + "learning_rate": 9.661245780364843e-05, + "loss": 0.5697, "step": 1737 }, { - "epoch": 0.24324702589223232, - "grad_norm": 0.424773544460618, - "learning_rate": 4.4281600400925725e-05, - "loss": 0.6287, + "epoch": 0.48642597257206827, + "grad_norm": 0.23923114242653176, + "learning_rate": 9.66068743151985e-05, + "loss": 0.5622, "step": 1738 }, { - "epoch": 0.24338698390482855, - "grad_norm": 0.43089065054686443, - "learning_rate": 4.4274384595887855e-05, - "loss": 0.579, + "epoch": 0.48670584942625245, + "grad_norm": 0.23552215352744707, + "learning_rate": 9.660128639068018e-05, + "loss": 0.5411, "step": 1739 }, { - "epoch": 0.24352694191742477, - "grad_norm": 0.39335625658540807, - "learning_rate": 4.426716482976838e-05, - "loss": 0.5945, + "epoch": 0.4869857262804366, + "grad_norm": 0.23970839749696465, + "learning_rate": 9.659569403062535e-05, + "loss": 0.5437, "step": 1740 }, { - "epoch": 0.243666899930021, - "grad_norm": 0.4321714579592816, - "learning_rate": 4.425994110405105e-05, - "loss": 0.6165, + "epoch": 0.48726560313462075, + "grad_norm": 0.2367322032905853, + "learning_rate": 9.659009723556627e-05, + "loss": 0.5724, "step": 1741 }, { - "epoch": 0.24380685794261722, - "grad_norm": 0.3868262436570827, - "learning_rate": 4.425271342022039e-05, - "loss": 0.6322, + "epoch": 0.48754547998880493, + "grad_norm": 0.22170711596683768, + "learning_rate": 9.658449600603568e-05, + "loss": 0.5712, "step": 1742 }, { - "epoch": 0.24394681595521345, - "grad_norm": 0.4194791382722549, - "learning_rate": 4.424548177976179e-05, - "loss": 0.6155, + "epoch": 0.4878253568429891, + "grad_norm": 0.24393327932601608, + "learning_rate": 9.657889034256666e-05, + "loss": 0.5631, "step": 1743 }, { - "epoch": 0.24408677396780964, - "grad_norm": 0.4118315161780323, - "learning_rate": 4.42382461841614e-05, - "loss": 0.6144, + "epoch": 0.48810523369717324, + "grad_norm": 0.2598732846882264, + "learning_rate": 9.65732802456928e-05, + "loss": 0.6077, "step": 1744 }, { - "epoch": 0.24422673198040587, - "grad_norm": 0.40215952380747805, - "learning_rate": 4.4231006634906224e-05, - "loss": 0.5487, + "epoch": 0.4883851105513574, + "grad_norm": 0.23872536309296974, + "learning_rate": 9.656766571594805e-05, + "loss": 0.5413, "step": 1745 }, { - "epoch": 0.2443666899930021, - "grad_norm": 0.42510463260619374, - "learning_rate": 4.422376313348405e-05, - "loss": 0.5674, + "epoch": 0.48866498740554154, + "grad_norm": 0.23088966250187232, + "learning_rate": 9.656204675386682e-05, + "loss": 0.5669, "step": 1746 }, { - "epoch": 0.24450664800559832, - "grad_norm": 0.4220078299124082, - "learning_rate": 4.4216515681383505e-05, - "loss": 0.6159, + "epoch": 0.4889448642597257, + "grad_norm": 0.228682225348453, + "learning_rate": 9.655642335998391e-05, + "loss": 0.563, "step": 1747 }, { - "epoch": 0.24464660601819455, - "grad_norm": 0.42050182431963373, - "learning_rate": 4.420926428009401e-05, - "loss": 0.6066, + "epoch": 0.4892247411139099, + "grad_norm": 0.24228207172083097, + "learning_rate": 9.655079553483457e-05, + "loss": 0.5695, "step": 1748 }, { - "epoch": 0.24478656403079077, - "grad_norm": 0.43689368971554526, - "learning_rate": 4.4202008931105795e-05, - "loss": 0.5568, + "epoch": 0.489504617968094, + "grad_norm": 0.23134302629555473, + "learning_rate": 9.654516327895445e-05, + "loss": 0.5505, "step": 1749 }, { - "epoch": 0.244926522043387, - "grad_norm": 0.43739145581183353, - "learning_rate": 4.4194749635909924e-05, - "loss": 0.5984, + "epoch": 0.4897844948222782, + "grad_norm": 0.24081654112806575, + "learning_rate": 9.653952659287963e-05, + "loss": 0.5868, "step": 1750 }, { - "epoch": 0.2450664800559832, - "grad_norm": 0.4595283770036913, - "learning_rate": 4.4187486395998235e-05, - "loss": 0.6204, + "epoch": 0.49006437167646233, + "grad_norm": 0.2301704112159972, + "learning_rate": 9.653388547714665e-05, + "loss": 0.5883, "step": 1751 }, { - "epoch": 0.24520643806857942, - "grad_norm": 0.4332223020604194, - "learning_rate": 4.4180219212863404e-05, - "loss": 0.6114, + "epoch": 0.4903442485306465, + "grad_norm": 0.23815098899205434, + "learning_rate": 9.652823993229239e-05, + "loss": 0.5508, "step": 1752 }, { - "epoch": 0.24534639608117564, - "grad_norm": 0.44490455320321953, - "learning_rate": 4.417294808799892e-05, - "loss": 0.5771, + "epoch": 0.4906241253848307, + "grad_norm": 0.2480390974116189, + "learning_rate": 9.65225899588542e-05, + "loss": 0.5623, "step": 1753 }, { - "epoch": 0.24548635409377187, - "grad_norm": 0.42354862515455083, - "learning_rate": 4.416567302289907e-05, - "loss": 0.6092, + "epoch": 0.4909040022390148, + "grad_norm": 0.23597688623387672, + "learning_rate": 9.651693555736986e-05, + "loss": 0.5768, "step": 1754 }, { - "epoch": 0.2456263121063681, - "grad_norm": 0.4252852477883581, - "learning_rate": 4.4158394019058944e-05, - "loss": 0.6034, + "epoch": 0.491183879093199, + "grad_norm": 0.23803132760044657, + "learning_rate": 9.651127672837757e-05, + "loss": 0.5698, "step": 1755 }, { - "epoch": 0.24576627011896432, - "grad_norm": 0.4376952545699014, - "learning_rate": 4.415111107797445e-05, - "loss": 0.623, + "epoch": 0.4914637559473832, + "grad_norm": 0.2252120497537188, + "learning_rate": 9.650561347241592e-05, + "loss": 0.5561, "step": 1756 }, { - "epoch": 0.24590622813156054, - "grad_norm": 0.41667144416831586, - "learning_rate": 4.414382420114233e-05, - "loss": 0.6042, + "epoch": 0.4917436328015673, + "grad_norm": 0.2338161916293681, + "learning_rate": 9.649994579002392e-05, + "loss": 0.5761, "step": 1757 }, { - "epoch": 0.24604618614415674, - "grad_norm": 0.42933664014516487, - "learning_rate": 4.413653339006008e-05, - "loss": 0.6022, + "epoch": 0.4920235096557515, + "grad_norm": 0.22758790036460252, + "learning_rate": 9.649427368174109e-05, + "loss": 0.5487, "step": 1758 }, { - "epoch": 0.24618614415675297, - "grad_norm": 0.4162921306132023, - "learning_rate": 4.4129238646226055e-05, - "loss": 0.5887, + "epoch": 0.4923033865099356, + "grad_norm": 0.23630527762162581, + "learning_rate": 9.648859714810725e-05, + "loss": 0.545, "step": 1759 }, { - "epoch": 0.2463261021693492, - "grad_norm": 0.4306173347777482, - "learning_rate": 4.4121939971139403e-05, - "loss": 0.5574, + "epoch": 0.4925832633641198, + "grad_norm": 0.24074390662221748, + "learning_rate": 9.648291618966273e-05, + "loss": 0.59, "step": 1760 }, { - "epoch": 0.24646606018194542, - "grad_norm": 0.43621303487831553, - "learning_rate": 4.411463736630006e-05, - "loss": 0.5876, + "epoch": 0.49286314021830396, + "grad_norm": 0.2322264115487179, + "learning_rate": 9.647723080694821e-05, + "loss": 0.5619, "step": 1761 }, { - "epoch": 0.24660601819454164, - "grad_norm": 0.419540410185526, - "learning_rate": 4.410733083320879e-05, - "loss": 0.599, + "epoch": 0.4931430170724881, + "grad_norm": 0.25287558279676103, + "learning_rate": 9.647154100050486e-05, + "loss": 0.5504, "step": 1762 }, { - "epoch": 0.24674597620713787, - "grad_norm": 0.4510539099176357, - "learning_rate": 4.4100020373367166e-05, - "loss": 0.6325, + "epoch": 0.49342289392667227, + "grad_norm": 0.2383217604267359, + "learning_rate": 9.646584677087422e-05, + "loss": 0.5319, "step": 1763 }, { - "epoch": 0.2468859342197341, - "grad_norm": 0.42372431177296327, - "learning_rate": 4.409270598827756e-05, - "loss": 0.5969, + "epoch": 0.49370277078085645, + "grad_norm": 0.23082191881659192, + "learning_rate": 9.646014811859829e-05, + "loss": 0.5466, "step": 1764 }, { - "epoch": 0.2470258922323303, - "grad_norm": 0.45611399870187513, - "learning_rate": 4.408538767944315e-05, - "loss": 0.6076, + "epoch": 0.4939826476350406, + "grad_norm": 0.23398471193474196, + "learning_rate": 9.645444504421944e-05, + "loss": 0.5571, "step": 1765 }, { - "epoch": 0.24716585024492652, - "grad_norm": 0.4401540888444283, - "learning_rate": 4.407806544836792e-05, - "loss": 0.6134, + "epoch": 0.49426252448922475, + "grad_norm": 0.23633229687218657, + "learning_rate": 9.644873754828052e-05, + "loss": 0.5561, "step": 1766 }, { - "epoch": 0.24730580825752274, - "grad_norm": 0.43386652619598914, - "learning_rate": 4.407073929655666e-05, - "loss": 0.6015, + "epoch": 0.4945424013434089, + "grad_norm": 0.25790489199568895, + "learning_rate": 9.644302563132475e-05, + "loss": 0.5523, "step": 1767 }, { - "epoch": 0.24744576627011897, - "grad_norm": 0.43265086343988496, - "learning_rate": 4.406340922551499e-05, - "loss": 0.6059, + "epoch": 0.49482227819759306, + "grad_norm": 0.23756523765843604, + "learning_rate": 9.64373092938958e-05, + "loss": 0.5772, "step": 1768 }, { - "epoch": 0.2475857242827152, - "grad_norm": 0.4200705255951236, - "learning_rate": 4.4056075236749285e-05, - "loss": 0.5596, + "epoch": 0.49510215505177724, + "grad_norm": 0.24904463596657922, + "learning_rate": 9.643158853653778e-05, + "loss": 0.5706, "step": 1769 }, { - "epoch": 0.24772568229531142, - "grad_norm": 0.4327675200614844, - "learning_rate": 4.404873733176678e-05, - "loss": 0.5673, + "epoch": 0.49538203190596136, + "grad_norm": 0.2302259622916314, + "learning_rate": 9.642586335979517e-05, + "loss": 0.5593, "step": 1770 }, { - "epoch": 0.24786564030790761, - "grad_norm": 0.41060345330483095, - "learning_rate": 4.4041395512075464e-05, - "loss": 0.5658, + "epoch": 0.49566190876014554, + "grad_norm": 0.23618995273954105, + "learning_rate": 9.64201337642129e-05, + "loss": 0.5385, "step": 1771 }, { - "epoch": 0.24800559832050384, - "grad_norm": 0.4177460191821631, - "learning_rate": 4.403404977918417e-05, - "loss": 0.5871, + "epoch": 0.4959417856143297, + "grad_norm": 0.25011648673412507, + "learning_rate": 9.641439975033631e-05, + "loss": 0.592, "step": 1772 }, { - "epoch": 0.24814555633310006, - "grad_norm": 0.3995644804936815, - "learning_rate": 4.402670013460252e-05, - "loss": 0.5767, + "epoch": 0.49622166246851385, + "grad_norm": 0.23046022743268094, + "learning_rate": 9.640866131871115e-05, + "loss": 0.5798, "step": 1773 }, { - "epoch": 0.2482855143456963, - "grad_norm": 0.40455718790823125, - "learning_rate": 4.401934657984094e-05, - "loss": 0.6232, + "epoch": 0.496501539322698, + "grad_norm": 0.24263488443094086, + "learning_rate": 9.640291846988367e-05, + "loss": 0.5824, "step": 1774 }, { - "epoch": 0.24842547235829252, - "grad_norm": 0.40147880141712233, - "learning_rate": 4.401198911641066e-05, - "loss": 0.587, + "epoch": 0.49678141617688215, + "grad_norm": 0.23480203611455353, + "learning_rate": 9.639717120440042e-05, + "loss": 0.5492, "step": 1775 }, { - "epoch": 0.24856543037088874, - "grad_norm": 0.40405116225388327, - "learning_rate": 4.400462774582371e-05, - "loss": 0.5708, + "epoch": 0.49706129303106633, + "grad_norm": 0.2324077757263914, + "learning_rate": 9.639141952280845e-05, + "loss": 0.5806, "step": 1776 }, { - "epoch": 0.24870538838348497, - "grad_norm": 0.4227092160628934, - "learning_rate": 4.399726246959293e-05, - "loss": 0.6278, + "epoch": 0.4973411698852505, + "grad_norm": 0.23674735406929293, + "learning_rate": 9.63856634256552e-05, + "loss": 0.5632, "step": 1777 }, { - "epoch": 0.24884534639608116, - "grad_norm": 0.40251616958969094, - "learning_rate": 4.3989893289231954e-05, - "loss": 0.6146, + "epoch": 0.49762104673943464, + "grad_norm": 0.22564044893242224, + "learning_rate": 9.637990291348853e-05, + "loss": 0.5419, "step": 1778 }, { - "epoch": 0.2489853044086774, - "grad_norm": 0.4113564198991073, - "learning_rate": 4.398252020625523e-05, - "loss": 0.5772, + "epoch": 0.4979009235936188, + "grad_norm": 0.23733544029512643, + "learning_rate": 9.637413798685675e-05, + "loss": 0.5387, "step": 1779 }, { - "epoch": 0.2491252624212736, - "grad_norm": 0.4218976966069186, - "learning_rate": 4.3975143222178e-05, - "loss": 0.5956, + "epoch": 0.49818080044780294, + "grad_norm": 0.23226211989695644, + "learning_rate": 9.636836864630856e-05, + "loss": 0.5401, "step": 1780 }, { - "epoch": 0.24926522043386984, - "grad_norm": 0.4153141525664469, - "learning_rate": 4.39677623385163e-05, - "loss": 0.5925, + "epoch": 0.4984606773019871, + "grad_norm": 0.25178990006315966, + "learning_rate": 9.63625948923931e-05, + "loss": 0.5543, "step": 1781 }, { - "epoch": 0.24940517844646606, - "grad_norm": 0.408421965231002, - "learning_rate": 4.3960377556787e-05, - "loss": 0.597, + "epoch": 0.4987405541561713, + "grad_norm": 0.24134903900795754, + "learning_rate": 9.635681672565989e-05, + "loss": 0.5811, "step": 1782 }, { - "epoch": 0.2495451364590623, - "grad_norm": 0.41009081087351246, - "learning_rate": 4.3952988878507714e-05, - "loss": 0.5997, + "epoch": 0.4990204310103554, + "grad_norm": 0.2464353452542341, + "learning_rate": 9.635103414665893e-05, + "loss": 0.561, "step": 1783 }, { - "epoch": 0.24968509447165851, - "grad_norm": 0.4126702795308356, - "learning_rate": 4.3945596305196925e-05, - "loss": 0.5847, + "epoch": 0.4993003078645396, + "grad_norm": 0.23746760502538525, + "learning_rate": 9.634524715594058e-05, + "loss": 0.5918, "step": 1784 }, { - "epoch": 0.2498250524842547, - "grad_norm": 0.40686925420136566, - "learning_rate": 4.393819983837385e-05, - "loss": 0.5748, + "epoch": 0.4995801847187238, + "grad_norm": 0.24264934578582315, + "learning_rate": 9.633945575405567e-05, + "loss": 0.5603, "step": 1785 }, { - "epoch": 0.24996501049685094, - "grad_norm": 0.42764000046837597, - "learning_rate": 4.393079947955856e-05, - "loss": 0.5689, + "epoch": 0.4998600615729079, + "grad_norm": 0.24157740893727817, + "learning_rate": 9.633365994155544e-05, + "loss": 0.5562, "step": 1786 }, { - "epoch": 0.2501049685094472, - "grad_norm": 0.43927412707903785, - "learning_rate": 4.39233952302719e-05, - "loss": 0.6304, + "epoch": 0.500139938427092, + "grad_norm": 0.23872271292004676, + "learning_rate": 9.632785971899151e-05, + "loss": 0.5607, "step": 1787 }, { - "epoch": 0.2502449265220434, - "grad_norm": 0.41444855744545284, - "learning_rate": 4.3915987092035505e-05, - "loss": 0.5849, + "epoch": 0.5004198152812762, + "grad_norm": 0.29377607225803737, + "learning_rate": 9.632205508691596e-05, + "loss": 0.5434, "step": 1788 }, { - "epoch": 0.2503848845346396, - "grad_norm": 0.42682475396958397, - "learning_rate": 4.3908575066371835e-05, - "loss": 0.5862, + "epoch": 0.5006996921354604, + "grad_norm": 0.22816170933325136, + "learning_rate": 9.631624604588129e-05, + "loss": 0.5696, "step": 1789 }, { - "epoch": 0.25052484254723584, - "grad_norm": 0.4771191183081023, - "learning_rate": 4.390115915480414e-05, - "loss": 0.6329, + "epoch": 0.5009795689896446, + "grad_norm": 0.22804984775922338, + "learning_rate": 9.631043259644039e-05, + "loss": 0.5553, "step": 1790 }, { - "epoch": 0.25066480055983204, - "grad_norm": 0.4308111583911404, - "learning_rate": 4.389373935885646e-05, - "loss": 0.6113, + "epoch": 0.5012594458438288, + "grad_norm": 0.23836259743233856, + "learning_rate": 9.63046147391466e-05, + "loss": 0.5881, "step": 1791 }, { - "epoch": 0.2508047585724283, - "grad_norm": 0.42487713253039067, - "learning_rate": 4.388631568005364e-05, - "loss": 0.6097, + "epoch": 0.5015393226980128, + "grad_norm": 0.2411643961682015, + "learning_rate": 9.629879247455365e-05, + "loss": 0.5503, "step": 1792 }, { - "epoch": 0.2509447165850245, - "grad_norm": 0.44680223218467024, - "learning_rate": 4.387888811992131e-05, - "loss": 0.5852, + "epoch": 0.501819199552197, + "grad_norm": 0.22687508674274784, + "learning_rate": 9.629296580321571e-05, + "loss": 0.5418, "step": 1793 }, { - "epoch": 0.25108467459762074, - "grad_norm": 0.43930277215742447, - "learning_rate": 4.387145667998591e-05, - "loss": 0.6279, + "epoch": 0.5020990764063812, + "grad_norm": 0.23509273299700292, + "learning_rate": 9.62871347256874e-05, + "loss": 0.5437, "step": 1794 }, { - "epoch": 0.25122463261021694, - "grad_norm": 0.45405868972289387, - "learning_rate": 4.38640213617747e-05, - "loss": 0.5879, + "epoch": 0.5023789532605654, + "grad_norm": 0.2430667135329794, + "learning_rate": 9.628129924252369e-05, + "loss": 0.5455, "step": 1795 }, { - "epoch": 0.25136459062281313, - "grad_norm": 0.41454960378404326, - "learning_rate": 4.385658216681569e-05, - "loss": 0.5724, + "epoch": 0.5026588301147495, + "grad_norm": 0.24672159908029315, + "learning_rate": 9.627545935427999e-05, + "loss": 0.5866, "step": 1796 }, { - "epoch": 0.2515045486354094, - "grad_norm": 0.4061553712342834, - "learning_rate": 4.384913909663772e-05, - "loss": 0.5411, + "epoch": 0.5029387069689337, + "grad_norm": 0.242629180649455, + "learning_rate": 9.62696150615122e-05, + "loss": 0.5632, "step": 1797 }, { - "epoch": 0.2516445066480056, - "grad_norm": 0.4276501719660472, - "learning_rate": 4.384169215277041e-05, - "loss": 0.557, + "epoch": 0.5032185838231178, + "grad_norm": 0.24438038191497316, + "learning_rate": 9.626376636477653e-05, + "loss": 0.6019, "step": 1798 }, { - "epoch": 0.25178446466060184, - "grad_norm": 0.386857810391419, - "learning_rate": 4.383424133674419e-05, - "loss": 0.5897, + "epoch": 0.503498460677302, + "grad_norm": 0.23661072125513372, + "learning_rate": 9.625791326462969e-05, + "loss": 0.5588, "step": 1799 }, { - "epoch": 0.25192442267319803, - "grad_norm": 0.4310189731603924, - "learning_rate": 4.382678665009028e-05, - "loss": 0.6095, + "epoch": 0.5037783375314862, + "grad_norm": 0.24995935229611654, + "learning_rate": 9.625205576162877e-05, + "loss": 0.5487, "step": 1800 }, { - "epoch": 0.2520643806857943, - "grad_norm": 0.39537072584058847, - "learning_rate": 4.381932809434068e-05, - "loss": 0.5557, + "epoch": 0.5040582143856703, + "grad_norm": 0.24343740056074598, + "learning_rate": 9.62461938563313e-05, + "loss": 0.5585, "step": 1801 }, { - "epoch": 0.2522043386983905, - "grad_norm": 0.3973617854406729, - "learning_rate": 4.3811865671028206e-05, - "loss": 0.5778, + "epoch": 0.5043380912398545, + "grad_norm": 0.24236120456339055, + "learning_rate": 9.624032754929522e-05, + "loss": 0.5816, "step": 1802 }, { - "epoch": 0.2523442967109867, - "grad_norm": 0.4326957921508078, - "learning_rate": 4.380439938168647e-05, - "loss": 0.5888, + "epoch": 0.5046179680940386, + "grad_norm": 0.25800761709487613, + "learning_rate": 9.623445684107886e-05, + "loss": 0.5985, "step": 1803 }, { - "epoch": 0.25248425472358293, - "grad_norm": 0.40126247732516246, - "learning_rate": 4.379692922784986e-05, - "loss": 0.5733, + "epoch": 0.5048978449482228, + "grad_norm": 0.22645047786414052, + "learning_rate": 9.622858173224103e-05, + "loss": 0.5352, "step": 1804 }, { - "epoch": 0.25262421273617913, - "grad_norm": 0.3962832983075247, - "learning_rate": 4.378945521105357e-05, - "loss": 0.5701, + "epoch": 0.5051777218024069, + "grad_norm": 0.24908389535441078, + "learning_rate": 9.622270222334092e-05, + "loss": 0.5509, "step": 1805 }, { - "epoch": 0.2527641707487754, - "grad_norm": 0.4292056775606421, - "learning_rate": 4.378197733283359e-05, - "loss": 0.6177, + "epoch": 0.5054575986565911, + "grad_norm": 0.24691042377307731, + "learning_rate": 9.621681831493814e-05, + "loss": 0.5672, "step": 1806 }, { - "epoch": 0.2529041287613716, - "grad_norm": 0.4598576468560811, - "learning_rate": 4.377449559472669e-05, - "loss": 0.5838, + "epoch": 0.5057374755107753, + "grad_norm": 0.22132863335088446, + "learning_rate": 9.621093000759271e-05, + "loss": 0.5806, "step": 1807 }, { - "epoch": 0.25304408677396784, - "grad_norm": 0.4943287880849202, - "learning_rate": 4.3767009998270464e-05, - "loss": 0.6519, + "epoch": 0.5060173523649594, + "grad_norm": 0.22962279406791766, + "learning_rate": 9.620503730186512e-05, + "loss": 0.5724, "step": 1808 }, { - "epoch": 0.25318404478656403, - "grad_norm": 0.41197329102085845, - "learning_rate": 4.375952054500326e-05, - "loss": 0.5685, + "epoch": 0.5062972292191436, + "grad_norm": 0.2249337051683322, + "learning_rate": 9.619914019831619e-05, + "loss": 0.5284, "step": 1809 }, { - "epoch": 0.25332400279916023, - "grad_norm": 0.4473419746095371, - "learning_rate": 4.375202723646424e-05, - "loss": 0.5987, + "epoch": 0.5065771060733277, + "grad_norm": 0.23783368590261306, + "learning_rate": 9.619323869750727e-05, + "loss": 0.5638, "step": 1810 }, { - "epoch": 0.2534639608117565, - "grad_norm": 0.42470540560047887, - "learning_rate": 4.374453007419336e-05, - "loss": 0.5944, + "epoch": 0.5068569829275119, + "grad_norm": 0.22446658791123203, + "learning_rate": 9.618733280000001e-05, + "loss": 0.5519, "step": 1811 }, { - "epoch": 0.2536039188243527, - "grad_norm": 0.4319391359458571, - "learning_rate": 4.373702905973135e-05, - "loss": 0.6269, + "epoch": 0.5071368597816961, + "grad_norm": 0.23534736176816506, + "learning_rate": 9.618142250635658e-05, + "loss": 0.5837, "step": 1812 }, { - "epoch": 0.25374387683694893, - "grad_norm": 0.4025733974145466, - "learning_rate": 4.3729524194619766e-05, - "loss": 0.5944, + "epoch": 0.5074167366358802, + "grad_norm": 0.22303012234251496, + "learning_rate": 9.617550781713949e-05, + "loss": 0.5414, "step": 1813 }, { - "epoch": 0.25388383484954513, - "grad_norm": 0.4420608064693045, - "learning_rate": 4.3722015480400916e-05, - "loss": 0.5987, + "epoch": 0.5076966134900643, + "grad_norm": 0.238460413121849, + "learning_rate": 9.616958873291173e-05, + "loss": 0.5758, "step": 1814 }, { - "epoch": 0.2540237928621414, - "grad_norm": 0.3993911334683989, - "learning_rate": 4.371450291861792e-05, - "loss": 0.5578, + "epoch": 0.5079764903442485, + "grad_norm": 0.2389949338953074, + "learning_rate": 9.616366525423666e-05, + "loss": 0.5766, "step": 1815 }, { - "epoch": 0.2541637508747376, - "grad_norm": 0.43011254515928227, - "learning_rate": 4.370698651081469e-05, - "loss": 0.6051, + "epoch": 0.5082563671984327, + "grad_norm": 0.22642327324417794, + "learning_rate": 9.61577373816781e-05, + "loss": 0.5595, "step": 1816 }, { - "epoch": 0.2543037088873338, - "grad_norm": 0.4109991007436434, - "learning_rate": 4.369946625853593e-05, - "loss": 0.5478, + "epoch": 0.5085362440526169, + "grad_norm": 0.23824066188804274, + "learning_rate": 9.615180511580026e-05, + "loss": 0.5638, "step": 1817 }, { - "epoch": 0.25444366689993003, - "grad_norm": 0.42138731559636133, - "learning_rate": 4.369194216332712e-05, - "loss": 0.5464, + "epoch": 0.5088161209068011, + "grad_norm": 0.23189431339771868, + "learning_rate": 9.614586845716777e-05, + "loss": 0.5662, "step": 1818 }, { - "epoch": 0.25458362491252623, - "grad_norm": 0.43137639859587146, - "learning_rate": 4.368441422673453e-05, - "loss": 0.6625, + "epoch": 0.5090959977609851, + "grad_norm": 0.23194498047736026, + "learning_rate": 9.613992740634572e-05, + "loss": 0.563, "step": 1819 }, { - "epoch": 0.2547235829251225, - "grad_norm": 0.42368932399104675, - "learning_rate": 4.367688245030523e-05, - "loss": 0.6049, + "epoch": 0.5093758746151693, + "grad_norm": 0.23745188110537496, + "learning_rate": 9.613398196389954e-05, + "loss": 0.5478, "step": 1820 }, { - "epoch": 0.2548635409377187, - "grad_norm": 0.40789716476560073, - "learning_rate": 4.36693468355871e-05, - "loss": 0.5915, + "epoch": 0.5096557514693535, + "grad_norm": 0.23459963343848525, + "learning_rate": 9.612803213039512e-05, + "loss": 0.5739, "step": 1821 }, { - "epoch": 0.25500349895031493, - "grad_norm": 0.4209915793540894, - "learning_rate": 4.366180738412876e-05, - "loss": 0.5932, + "epoch": 0.5099356283235377, + "grad_norm": 0.23694885368323856, + "learning_rate": 9.612207790639879e-05, + "loss": 0.54, "step": 1822 }, { - "epoch": 0.25514345696291113, - "grad_norm": 0.4046845978271354, - "learning_rate": 4.365426409747965e-05, - "loss": 0.5933, + "epoch": 0.5102155051777219, + "grad_norm": 0.24602605516224743, + "learning_rate": 9.611611929247726e-05, + "loss": 0.5688, "step": 1823 }, { - "epoch": 0.25528341497550733, - "grad_norm": 0.3975587329369015, - "learning_rate": 4.3646716977189996e-05, - "loss": 0.5731, + "epoch": 0.5104953820319059, + "grad_norm": 0.22630239119332318, + "learning_rate": 9.61101562891977e-05, + "loss": 0.541, "step": 1824 }, { - "epoch": 0.2554233729881036, - "grad_norm": 0.42825231948537235, - "learning_rate": 4.36391660248108e-05, - "loss": 0.575, + "epoch": 0.5107752588860901, + "grad_norm": 0.23519921347325817, + "learning_rate": 9.610418889712765e-05, + "loss": 0.5628, "step": 1825 }, { - "epoch": 0.2555633310006998, - "grad_norm": 0.4099650326475359, - "learning_rate": 4.3631611241893874e-05, - "loss": 0.612, + "epoch": 0.5110551357402743, + "grad_norm": 0.23070811933354385, + "learning_rate": 9.609821711683509e-05, + "loss": 0.5679, "step": 1826 }, { - "epoch": 0.25570328901329603, - "grad_norm": 0.4163478892519659, - "learning_rate": 4.362405262999178e-05, - "loss": 0.5941, + "epoch": 0.5113350125944585, + "grad_norm": 0.23724394099776144, + "learning_rate": 9.609224094888842e-05, + "loss": 0.548, "step": 1827 }, { - "epoch": 0.25584324702589223, - "grad_norm": 0.4192607935058611, - "learning_rate": 4.361649019065791e-05, - "loss": 0.5861, + "epoch": 0.5116148894486426, + "grad_norm": 0.24225479851705234, + "learning_rate": 9.608626039385648e-05, + "loss": 0.5702, "step": 1828 }, { - "epoch": 0.2559832050384884, - "grad_norm": 0.4056602965223505, - "learning_rate": 4.3608923925446424e-05, - "loss": 0.5517, + "epoch": 0.5118947663028267, + "grad_norm": 0.2461919864351795, + "learning_rate": 9.608027545230847e-05, + "loss": 0.5542, "step": 1829 }, { - "epoch": 0.2561231630510847, - "grad_norm": 0.4291339795549124, - "learning_rate": 4.360135383591224e-05, - "loss": 0.6211, + "epoch": 0.5121746431570109, + "grad_norm": 0.22350936533247612, + "learning_rate": 9.607428612481404e-05, + "loss": 0.5234, "step": 1830 }, { - "epoch": 0.2562631210636809, - "grad_norm": 0.4214799959182231, - "learning_rate": 4.3593779923611114e-05, - "loss": 0.6101, + "epoch": 0.5124545200111951, + "grad_norm": 0.22097363018732752, + "learning_rate": 9.606829241194327e-05, + "loss": 0.5296, "step": 1831 }, { - "epoch": 0.25640307907627713, - "grad_norm": 0.45878889417856267, - "learning_rate": 4.3586202190099555e-05, - "loss": 0.6222, + "epoch": 0.5127343968653792, + "grad_norm": 0.23235613494758678, + "learning_rate": 9.606229431426663e-05, + "loss": 0.5435, "step": 1832 }, { - "epoch": 0.2565430370888733, - "grad_norm": 0.401016178107381, - "learning_rate": 4.357862063693486e-05, - "loss": 0.5759, + "epoch": 0.5130142737195634, + "grad_norm": 0.2319305788291165, + "learning_rate": 9.605629183235506e-05, + "loss": 0.5514, "step": 1833 }, { - "epoch": 0.2566829951014696, - "grad_norm": 0.4096702585349008, - "learning_rate": 4.357103526567511e-05, - "loss": 0.6322, + "epoch": 0.5132941505737475, + "grad_norm": 0.2441169559939731, + "learning_rate": 9.605028496677983e-05, + "loss": 0.5691, "step": 1834 }, { - "epoch": 0.2568229531140658, - "grad_norm": 0.4133870618187997, - "learning_rate": 4.3563446077879194e-05, - "loss": 0.5974, + "epoch": 0.5135740274279317, + "grad_norm": 0.22513807184150386, + "learning_rate": 9.604427371811273e-05, + "loss": 0.5791, "step": 1835 }, { - "epoch": 0.256962911126662, - "grad_norm": 0.41053680469933324, - "learning_rate": 4.355585307510675e-05, - "loss": 0.5636, + "epoch": 0.5138539042821159, + "grad_norm": 0.2283649347192075, + "learning_rate": 9.603825808692587e-05, + "loss": 0.5439, "step": 1836 }, { - "epoch": 0.2571028691392582, - "grad_norm": 0.4286300619381813, - "learning_rate": 4.354825625891822e-05, - "loss": 0.5705, + "epoch": 0.5141337811363, + "grad_norm": 0.23833928056167464, + "learning_rate": 9.603223807379183e-05, + "loss": 0.558, "step": 1837 }, { - "epoch": 0.2572428271518544, - "grad_norm": 0.5243484730511359, - "learning_rate": 4.354065563087484e-05, - "loss": 0.5945, + "epoch": 0.5144136579904842, + "grad_norm": 0.2370905233182266, + "learning_rate": 9.602621367928362e-05, + "loss": 0.5729, "step": 1838 }, { - "epoch": 0.2573827851644507, - "grad_norm": 0.4165723735814597, - "learning_rate": 4.3533051192538596e-05, - "loss": 0.5785, + "epoch": 0.5146935348446684, + "grad_norm": 0.23328697868395074, + "learning_rate": 9.602018490397462e-05, + "loss": 0.5557, "step": 1839 }, { - "epoch": 0.2575227431770469, - "grad_norm": 0.4192843833101577, - "learning_rate": 4.3525442945472294e-05, - "loss": 0.5873, + "epoch": 0.5149734116988525, + "grad_norm": 0.23919119334639152, + "learning_rate": 9.601415174843866e-05, + "loss": 0.54, "step": 1840 }, { - "epoch": 0.25766270118964313, - "grad_norm": 0.42759680733088257, - "learning_rate": 4.3517830891239496e-05, - "loss": 0.6008, + "epoch": 0.5152532885530366, + "grad_norm": 0.23209531414825327, + "learning_rate": 9.600811421324999e-05, + "loss": 0.5385, "step": 1841 }, { - "epoch": 0.2578026592022393, - "grad_norm": 0.41416919735625124, - "learning_rate": 4.351021503140456e-05, - "loss": 0.6047, + "epoch": 0.5155331654072208, + "grad_norm": 0.23622064587622715, + "learning_rate": 9.600207229898325e-05, + "loss": 0.5378, "step": 1842 }, { - "epoch": 0.2579426172148355, - "grad_norm": 0.4183249907357826, - "learning_rate": 4.350259536753262e-05, - "loss": 0.5839, + "epoch": 0.515813042261405, + "grad_norm": 0.23316704314698783, + "learning_rate": 9.599602600621353e-05, + "loss": 0.5434, "step": 1843 }, { - "epoch": 0.2580825752274318, - "grad_norm": 0.41171514577228524, - "learning_rate": 4.34949719011896e-05, - "loss": 0.5826, + "epoch": 0.5160929191155892, + "grad_norm": 0.2365672488806659, + "learning_rate": 9.598997533551631e-05, + "loss": 0.5536, "step": 1844 }, { - "epoch": 0.258222533240028, - "grad_norm": 0.4240796706040656, - "learning_rate": 4.348734463394219e-05, - "loss": 0.5988, + "epoch": 0.5163727959697733, + "grad_norm": 0.24001992184581208, + "learning_rate": 9.598392028746748e-05, + "loss": 0.547, "step": 1845 }, { - "epoch": 0.2583624912526242, - "grad_norm": 0.4231976779667788, - "learning_rate": 4.3479713567357886e-05, - "loss": 0.5686, + "epoch": 0.5166526728239574, + "grad_norm": 0.2270027847107539, + "learning_rate": 9.597786086264338e-05, + "loss": 0.5757, "step": 1846 }, { - "epoch": 0.2585024492652204, - "grad_norm": 0.4027448525781189, - "learning_rate": 4.347207870300494e-05, - "loss": 0.5991, + "epoch": 0.5169325496781416, + "grad_norm": 0.24629004358677026, + "learning_rate": 9.597179706162076e-05, + "loss": 0.5712, "step": 1847 }, { - "epoch": 0.2586424072778167, - "grad_norm": 0.44912825496869785, - "learning_rate": 4.346444004245239e-05, - "loss": 0.5893, + "epoch": 0.5172124265323258, + "grad_norm": 0.22870468213578413, + "learning_rate": 9.596572888497677e-05, + "loss": 0.5403, "step": 1848 }, { - "epoch": 0.2587823652904129, - "grad_norm": 0.4267680067654748, - "learning_rate": 4.3456797587270066e-05, - "loss": 0.5951, + "epoch": 0.51749230338651, + "grad_norm": 0.22444490352606403, + "learning_rate": 9.595965633328897e-05, + "loss": 0.5862, "step": 1849 }, { - "epoch": 0.25892232330300907, - "grad_norm": 0.43586393714352484, - "learning_rate": 4.344915133902856e-05, - "loss": 0.5675, + "epoch": 0.517772180240694, + "grad_norm": 0.23245352945553727, + "learning_rate": 9.595357940713534e-05, + "loss": 0.582, "step": 1850 }, { - "epoch": 0.2590622813156053, - "grad_norm": 0.4559790753468547, - "learning_rate": 4.344150129929927e-05, - "loss": 0.6084, + "epoch": 0.5180520570948782, + "grad_norm": 0.23265760512745268, + "learning_rate": 9.594749810709432e-05, + "loss": 0.5443, "step": 1851 }, { - "epoch": 0.2592022393282015, - "grad_norm": 0.4196684079525518, - "learning_rate": 4.3433847469654344e-05, - "loss": 0.5434, + "epoch": 0.5183319339490624, + "grad_norm": 0.23765425762096956, + "learning_rate": 9.59414124337447e-05, + "loss": 0.5586, "step": 1852 }, { - "epoch": 0.2593421973407978, - "grad_norm": 0.43364642603356207, - "learning_rate": 4.342618985166672e-05, - "loss": 0.602, + "epoch": 0.5186118108032466, + "grad_norm": 0.22943262496098119, + "learning_rate": 9.593532238766574e-05, + "loss": 0.5547, "step": 1853 }, { - "epoch": 0.259482155353394, - "grad_norm": 0.43456208436691296, - "learning_rate": 4.341852844691012e-05, - "loss": 0.5678, + "epoch": 0.5188916876574308, + "grad_norm": 0.24586384683093845, + "learning_rate": 9.592922796943707e-05, + "loss": 0.5618, "step": 1854 }, { - "epoch": 0.2596221133659902, - "grad_norm": 0.4445616632907679, - "learning_rate": 4.341086325695905e-05, - "loss": 0.6011, + "epoch": 0.5191715645116148, + "grad_norm": 0.23118537262405697, + "learning_rate": 9.592312917963878e-05, + "loss": 0.5403, "step": 1855 }, { - "epoch": 0.2597620713785864, - "grad_norm": 0.4148631528766163, - "learning_rate": 4.340319428338877e-05, - "loss": 0.5984, + "epoch": 0.519451441365799, + "grad_norm": 0.23040203657112565, + "learning_rate": 9.591702601885135e-05, + "loss": 0.5599, "step": 1856 }, { - "epoch": 0.2599020293911826, - "grad_norm": 0.42267402439629853, - "learning_rate": 4.339552152777534e-05, - "loss": 0.6198, + "epoch": 0.5197313182199832, + "grad_norm": 0.23825571871577292, + "learning_rate": 9.59109184876557e-05, + "loss": 0.5767, "step": 1857 }, { - "epoch": 0.2600419874037789, - "grad_norm": 0.43701220197978513, - "learning_rate": 4.338784499169559e-05, - "loss": 0.5792, + "epoch": 0.5200111950741674, + "grad_norm": 0.22947366405695055, + "learning_rate": 9.59048065866331e-05, + "loss": 0.5417, "step": 1858 }, { - "epoch": 0.26018194541637507, - "grad_norm": 0.4498233901707297, - "learning_rate": 4.338016467672712e-05, - "loss": 0.5822, + "epoch": 0.5202910719283516, + "grad_norm": 0.23364051222262436, + "learning_rate": 9.589869031636533e-05, + "loss": 0.5335, "step": 1859 }, { - "epoch": 0.2603219034289713, - "grad_norm": 0.4302218516293081, - "learning_rate": 4.337248058444832e-05, - "loss": 0.6329, + "epoch": 0.5205709487825357, + "grad_norm": 0.24928639073870468, + "learning_rate": 9.589256967743453e-05, + "loss": 0.5496, "step": 1860 }, { - "epoch": 0.2604618614415675, - "grad_norm": 0.4149612252531319, - "learning_rate": 4.336479271643833e-05, - "loss": 0.5676, + "epoch": 0.5208508256367198, + "grad_norm": 0.223264011896636, + "learning_rate": 9.588644467042327e-05, + "loss": 0.5532, "step": 1861 }, { - "epoch": 0.2606018194541638, - "grad_norm": 0.4482912811055662, - "learning_rate": 4.335710107427711e-05, - "loss": 0.6405, + "epoch": 0.521130702490904, + "grad_norm": 0.24565336079633493, + "learning_rate": 9.58803152959145e-05, + "loss": 0.5634, "step": 1862 }, { - "epoch": 0.26074177746675997, - "grad_norm": 0.43212615840984736, - "learning_rate": 4.3349405659545365e-05, - "loss": 0.6291, + "epoch": 0.5214105793450882, + "grad_norm": 0.22199641199925949, + "learning_rate": 9.587418155449167e-05, + "loss": 0.5409, "step": 1863 }, { - "epoch": 0.26088173547935617, - "grad_norm": 0.43790968743419006, - "learning_rate": 4.334170647382457e-05, - "loss": 0.6161, + "epoch": 0.5216904561992723, + "grad_norm": 0.22608326732484155, + "learning_rate": 9.586804344673853e-05, + "loss": 0.5509, "step": 1864 }, { - "epoch": 0.2610216934919524, - "grad_norm": 0.4057107337252783, - "learning_rate": 4.333400351869699e-05, - "loss": 0.5897, + "epoch": 0.5219703330534565, + "grad_norm": 0.23813917903969617, + "learning_rate": 9.586190097323934e-05, + "loss": 0.5488, "step": 1865 }, { - "epoch": 0.2611616515045486, - "grad_norm": 0.4169972715195528, - "learning_rate": 4.332629679574566e-05, - "loss": 0.5807, + "epoch": 0.5222502099076406, + "grad_norm": 0.24733511129134678, + "learning_rate": 9.585575413457877e-05, + "loss": 0.6012, "step": 1866 }, { - "epoch": 0.2613016095171449, - "grad_norm": 0.4148009483085235, - "learning_rate": 4.3318586306554394e-05, - "loss": 0.5784, + "epoch": 0.5225300867618248, + "grad_norm": 0.24835802315135277, + "learning_rate": 9.584960293134184e-05, + "loss": 0.5847, "step": 1867 }, { - "epoch": 0.26144156752974107, - "grad_norm": 0.3916134068027622, - "learning_rate": 4.331087205270777e-05, - "loss": 0.6159, + "epoch": 0.522809963616009, + "grad_norm": 0.22261323703635638, + "learning_rate": 9.584344736411405e-05, + "loss": 0.5555, "step": 1868 }, { - "epoch": 0.2615815255423373, - "grad_norm": 0.413939159109484, - "learning_rate": 4.3303154035791164e-05, - "loss": 0.5965, + "epoch": 0.5230898404701931, + "grad_norm": 0.23134104657282056, + "learning_rate": 9.583728743348128e-05, + "loss": 0.5683, "step": 1869 }, { - "epoch": 0.2617214835549335, - "grad_norm": 0.3965210541189402, - "learning_rate": 4.329543225739068e-05, - "loss": 0.5793, + "epoch": 0.5233697173243773, + "grad_norm": 0.23069707962901834, + "learning_rate": 9.583112314002983e-05, + "loss": 0.5687, "step": 1870 }, { - "epoch": 0.2618614415675297, - "grad_norm": 0.42358611314483735, - "learning_rate": 4.328770671909323e-05, - "loss": 0.5817, + "epoch": 0.5236495941785614, + "grad_norm": 0.23107051966706807, + "learning_rate": 9.582495448434643e-05, + "loss": 0.538, "step": 1871 }, { - "epoch": 0.26200139958012597, - "grad_norm": 0.4039935315082308, - "learning_rate": 4.32799774224865e-05, - "loss": 0.5825, + "epoch": 0.5239294710327456, + "grad_norm": 0.23540799518395647, + "learning_rate": 9.581878146701821e-05, + "loss": 0.5768, "step": 1872 }, { - "epoch": 0.26214135759272217, - "grad_norm": 0.42958959652262796, - "learning_rate": 4.327224436915893e-05, - "loss": 0.5831, + "epoch": 0.5242093478869297, + "grad_norm": 0.2231158508992458, + "learning_rate": 9.581260408863272e-05, + "loss": 0.5605, "step": 1873 }, { - "epoch": 0.2622813156053184, - "grad_norm": 0.4520950932964586, - "learning_rate": 4.3264507560699746e-05, - "loss": 0.6003, + "epoch": 0.5244892247411139, + "grad_norm": 0.23665288817701655, + "learning_rate": 9.580642234977792e-05, + "loss": 0.5521, "step": 1874 }, { - "epoch": 0.2624212736179146, - "grad_norm": 0.4177363042656859, - "learning_rate": 4.3256766998698936e-05, - "loss": 0.6011, + "epoch": 0.5247691015952981, + "grad_norm": 0.22260080585557304, + "learning_rate": 9.580023625104223e-05, + "loss": 0.5609, "step": 1875 }, { - "epoch": 0.26256123163051087, - "grad_norm": 0.4416091225807165, - "learning_rate": 4.324902268474727e-05, - "loss": 0.6244, + "epoch": 0.5250489784494822, + "grad_norm": 0.2292353822993223, + "learning_rate": 9.579404579301441e-05, + "loss": 0.5579, "step": 1876 }, { - "epoch": 0.26270118964310707, - "grad_norm": 0.6065946464455126, - "learning_rate": 4.324127462043627e-05, - "loss": 0.6069, + "epoch": 0.5253288553036664, + "grad_norm": 0.23344828178509996, + "learning_rate": 9.578785097628367e-05, + "loss": 0.5791, "step": 1877 }, { - "epoch": 0.26284114765570327, - "grad_norm": 0.41146045832841216, - "learning_rate": 4.323352280735826e-05, - "loss": 0.6005, + "epoch": 0.5256087321578505, + "grad_norm": 0.23980135171832784, + "learning_rate": 9.578165180143965e-05, + "loss": 0.5871, "step": 1878 }, { - "epoch": 0.2629811056682995, - "grad_norm": 0.4003315872022978, - "learning_rate": 4.322576724710631e-05, - "loss": 0.575, + "epoch": 0.5258886090120347, + "grad_norm": 0.23034237676380445, + "learning_rate": 9.577544826907238e-05, + "loss": 0.5401, "step": 1879 }, { - "epoch": 0.2631210636808957, - "grad_norm": 0.40221521105567576, - "learning_rate": 4.3218007941274264e-05, - "loss": 0.5528, + "epoch": 0.5261684858662189, + "grad_norm": 0.2379282172099179, + "learning_rate": 9.576924037977233e-05, + "loss": 0.5398, "step": 1880 }, { - "epoch": 0.26326102169349197, - "grad_norm": 0.4161742129214681, - "learning_rate": 4.321024489145673e-05, - "loss": 0.6148, + "epoch": 0.5264483627204031, + "grad_norm": 0.23368080388160048, + "learning_rate": 9.576302813413036e-05, + "loss": 0.583, "step": 1881 }, { - "epoch": 0.26340097970608817, - "grad_norm": 0.4130366243572411, - "learning_rate": 4.3202478099249105e-05, - "loss": 0.6184, + "epoch": 0.5267282395745871, + "grad_norm": 0.23033677903394748, + "learning_rate": 9.575681153273776e-05, + "loss": 0.5751, "step": 1882 }, { - "epoch": 0.2635409377186844, - "grad_norm": 0.44069492817846667, - "learning_rate": 4.3194707566247537e-05, - "loss": 0.5936, + "epoch": 0.5270081164287713, + "grad_norm": 0.23594766017150454, + "learning_rate": 9.575059057618623e-05, + "loss": 0.5553, "step": 1883 }, { - "epoch": 0.2636808957312806, - "grad_norm": 0.43421883092849856, - "learning_rate": 4.318693329404896e-05, - "loss": 0.6115, + "epoch": 0.5272879932829555, + "grad_norm": 0.24174536855126785, + "learning_rate": 9.574436526506788e-05, + "loss": 0.5887, "step": 1884 }, { - "epoch": 0.2638208537438768, - "grad_norm": 0.41021601219211107, - "learning_rate": 4.317915528425106e-05, - "loss": 0.5749, + "epoch": 0.5275678701371397, + "grad_norm": 0.23461999323173222, + "learning_rate": 9.573813559997522e-05, + "loss": 0.5494, "step": 1885 }, { - "epoch": 0.26396081175647307, - "grad_norm": 0.42123839224718174, - "learning_rate": 4.317137353845229e-05, - "loss": 0.598, + "epoch": 0.5278477469913239, + "grad_norm": 0.23800563209230874, + "learning_rate": 9.573190158150122e-05, + "loss": 0.5688, "step": 1886 }, { - "epoch": 0.26410076976906927, - "grad_norm": 0.431025530329678, - "learning_rate": 4.316358805825188e-05, - "loss": 0.5969, + "epoch": 0.5281276238455079, + "grad_norm": 0.2198457840744784, + "learning_rate": 9.572566321023925e-05, + "loss": 0.5599, "step": 1887 }, { - "epoch": 0.2642407277816655, - "grad_norm": 0.40969971019500684, - "learning_rate": 4.3155798845249827e-05, - "loss": 0.5962, + "epoch": 0.5284075006996921, + "grad_norm": 0.2227507524362117, + "learning_rate": 9.571942048678306e-05, + "loss": 0.5601, "step": 1888 }, { - "epoch": 0.2643806857942617, - "grad_norm": 0.4155454566156464, - "learning_rate": 4.3148005901046905e-05, - "loss": 0.5936, + "epoch": 0.5286873775538763, + "grad_norm": 0.23017247054839587, + "learning_rate": 9.571317341172681e-05, + "loss": 0.5635, "step": 1889 }, { - "epoch": 0.26452064380685797, - "grad_norm": 0.4317150467528267, - "learning_rate": 4.3140209227244624e-05, - "loss": 0.6169, + "epoch": 0.5289672544080605, + "grad_norm": 0.23356722144675973, + "learning_rate": 9.570692198566515e-05, + "loss": 0.5363, "step": 1890 }, { - "epoch": 0.26466060181945417, - "grad_norm": 0.3970247011419925, - "learning_rate": 4.313240882544529e-05, - "loss": 0.5761, + "epoch": 0.5292471312622447, + "grad_norm": 0.22272007526605261, + "learning_rate": 9.570066620919307e-05, + "loss": 0.5772, "step": 1891 }, { - "epoch": 0.26480055983205036, - "grad_norm": 0.4414850369173905, - "learning_rate": 4.3124604697251963e-05, - "loss": 0.5991, + "epoch": 0.5295270081164287, + "grad_norm": 0.22992922392480677, + "learning_rate": 9.569440608290601e-05, + "loss": 0.5502, "step": 1892 }, { - "epoch": 0.2649405178446466, - "grad_norm": 0.4499059882632428, - "learning_rate": 4.311679684426847e-05, - "loss": 0.5507, + "epoch": 0.5298068849706129, + "grad_norm": 0.25133674677929696, + "learning_rate": 9.568814160739978e-05, + "loss": 0.5577, "step": 1893 }, { - "epoch": 0.2650804758572428, - "grad_norm": 0.4195714682577236, - "learning_rate": 4.310898526809941e-05, - "loss": 0.5908, + "epoch": 0.5300867618247971, + "grad_norm": 0.23222311214547842, + "learning_rate": 9.568187278327067e-05, + "loss": 0.5655, "step": 1894 }, { - "epoch": 0.26522043386983907, - "grad_norm": 0.44322553286670324, - "learning_rate": 4.3101169970350125e-05, - "loss": 0.5807, + "epoch": 0.5303666386789813, + "grad_norm": 0.23073644663937348, + "learning_rate": 9.567559961111534e-05, + "loss": 0.5331, "step": 1895 }, { - "epoch": 0.26536039188243526, - "grad_norm": 0.424382131322031, - "learning_rate": 4.309335095262676e-05, - "loss": 0.5867, + "epoch": 0.5306465155331654, + "grad_norm": 0.23651252528460873, + "learning_rate": 9.566932209153088e-05, + "loss": 0.5549, "step": 1896 }, { - "epoch": 0.2655003498950315, - "grad_norm": 0.43960102391817163, - "learning_rate": 4.308552821653618e-05, - "loss": 0.5984, + "epoch": 0.5309263923873496, + "grad_norm": 0.244109438111545, + "learning_rate": 9.566304022511477e-05, + "loss": 0.5634, "step": 1897 }, { - "epoch": 0.2656403079076277, - "grad_norm": 0.39684220956322386, - "learning_rate": 4.307770176368605e-05, - "loss": 0.5701, + "epoch": 0.5312062692415337, + "grad_norm": 0.23293967146364056, + "learning_rate": 9.565675401246494e-05, + "loss": 0.5852, "step": 1898 }, { - "epoch": 0.2657802659202239, - "grad_norm": 0.43672589760925556, - "learning_rate": 4.306987159568479e-05, - "loss": 0.5973, + "epoch": 0.5314861460957179, + "grad_norm": 0.23870681376181876, + "learning_rate": 9.565046345417969e-05, + "loss": 0.5733, "step": 1899 }, { - "epoch": 0.26592022393282017, - "grad_norm": 0.40233707019737064, - "learning_rate": 4.3062037714141565e-05, - "loss": 0.613, + "epoch": 0.531766022949902, + "grad_norm": 0.23187763560436186, + "learning_rate": 9.564416855085781e-05, + "loss": 0.5661, "step": 1900 }, { - "epoch": 0.26606018194541636, - "grad_norm": 0.4264494610214143, - "learning_rate": 4.3054200120666334e-05, - "loss": 0.606, + "epoch": 0.5320458998040862, + "grad_norm": 0.24612542749443875, + "learning_rate": 9.56378693030984e-05, + "loss": 0.5465, "step": 1901 }, { - "epoch": 0.2662001399580126, - "grad_norm": 0.4098467135771313, - "learning_rate": 4.304635881686978e-05, - "loss": 0.5857, + "epoch": 0.5323257766582704, + "grad_norm": 0.23013071753066777, + "learning_rate": 9.563156571150105e-05, + "loss": 0.5661, "step": 1902 }, { - "epoch": 0.2663400979706088, - "grad_norm": 0.4240466866788527, - "learning_rate": 4.303851380436339e-05, - "loss": 0.6153, + "epoch": 0.5326056535124545, + "grad_norm": 0.23472383974694802, + "learning_rate": 9.562525777666572e-05, + "loss": 0.5559, "step": 1903 }, { - "epoch": 0.266480055983205, - "grad_norm": 0.403958075146994, - "learning_rate": 4.303066508475939e-05, - "loss": 0.5663, + "epoch": 0.5328855303666387, + "grad_norm": 0.23324049855111026, + "learning_rate": 9.561894549919283e-05, + "loss": 0.5505, "step": 1904 }, { - "epoch": 0.26662001399580126, - "grad_norm": 0.4143420243493558, - "learning_rate": 4.302281265967076e-05, - "loss": 0.5963, + "epoch": 0.5331654072208228, + "grad_norm": 0.2406931330869733, + "learning_rate": 9.561262887968317e-05, + "loss": 0.5708, "step": 1905 }, { - "epoch": 0.26675997200839746, - "grad_norm": 0.4444719253392314, - "learning_rate": 4.301495653071126e-05, - "loss": 0.6231, + "epoch": 0.533445284075007, + "grad_norm": 0.21556770800085526, + "learning_rate": 9.560630791873797e-05, + "loss": 0.5675, "step": 1906 }, { - "epoch": 0.2668999300209937, - "grad_norm": 0.40995195237809584, - "learning_rate": 4.3007096699495406e-05, - "loss": 0.5821, + "epoch": 0.5337251609291912, + "grad_norm": 0.23629431823799524, + "learning_rate": 9.559998261695883e-05, + "loss": 0.5668, "step": 1907 }, { - "epoch": 0.2670398880335899, - "grad_norm": 0.405726468503472, - "learning_rate": 4.299923316763848e-05, - "loss": 0.604, + "epoch": 0.5340050377833753, + "grad_norm": 0.23188856043931416, + "learning_rate": 9.559365297494784e-05, + "loss": 0.552, "step": 1908 }, { - "epoch": 0.26717984604618616, - "grad_norm": 0.4078344765445211, - "learning_rate": 4.2991365936756505e-05, - "loss": 0.576, + "epoch": 0.5342849146375594, + "grad_norm": 0.2309807204022842, + "learning_rate": 9.558731899330745e-05, + "loss": 0.5657, "step": 1909 }, { - "epoch": 0.26731980405878236, - "grad_norm": 0.4227354700436783, - "learning_rate": 4.2983495008466276e-05, - "loss": 0.6146, + "epoch": 0.5345647914917436, + "grad_norm": 0.22570161807706715, + "learning_rate": 9.558098067264052e-05, + "loss": 0.5946, "step": 1910 }, { - "epoch": 0.26745976207137856, - "grad_norm": 0.4030353788382993, - "learning_rate": 4.2975620384385364e-05, - "loss": 0.5752, + "epoch": 0.5348446683459278, + "grad_norm": 0.2361099600158028, + "learning_rate": 9.557463801355032e-05, + "loss": 0.5611, "step": 1911 }, { - "epoch": 0.2675997200839748, - "grad_norm": 0.4012686664661831, - "learning_rate": 4.296774206613207e-05, - "loss": 0.5892, + "epoch": 0.535124545200112, + "grad_norm": 0.2348866086463897, + "learning_rate": 9.556829101664057e-05, + "loss": 0.5482, "step": 1912 }, { - "epoch": 0.267739678096571, - "grad_norm": 0.41404486946782215, - "learning_rate": 4.2959860055325474e-05, - "loss": 0.5702, + "epoch": 0.5354044220542961, + "grad_norm": 0.22647267846564423, + "learning_rate": 9.55619396825154e-05, + "loss": 0.545, "step": 1913 }, { - "epoch": 0.26787963610916726, - "grad_norm": 0.41190311027197035, - "learning_rate": 4.295197435358541e-05, - "loss": 0.5803, + "epoch": 0.5356842989084802, + "grad_norm": 0.23909560172612637, + "learning_rate": 9.555558401177926e-05, + "loss": 0.5503, "step": 1914 }, { - "epoch": 0.26801959412176346, - "grad_norm": 0.4245596260036112, - "learning_rate": 4.294408496253246e-05, - "loss": 0.6054, + "epoch": 0.5359641757626644, + "grad_norm": 0.23514676112645405, + "learning_rate": 9.554922400503718e-05, + "loss": 0.5464, "step": 1915 }, { - "epoch": 0.2681595521343597, - "grad_norm": 0.4261060945315179, - "learning_rate": 4.293619188378798e-05, - "loss": 0.5609, + "epoch": 0.5362440526168486, + "grad_norm": 0.24219390194935297, + "learning_rate": 9.554285966289445e-05, + "loss": 0.581, "step": 1916 }, { - "epoch": 0.2682995101469559, - "grad_norm": 0.42776522346359913, - "learning_rate": 4.292829511897409e-05, - "loss": 0.5822, + "epoch": 0.5365239294710328, + "grad_norm": 0.26667302882813204, + "learning_rate": 9.553649098595682e-05, + "loss": 0.5611, "step": 1917 }, { - "epoch": 0.2684394681595521, - "grad_norm": 0.41648930810429086, - "learning_rate": 4.292039466971364e-05, - "loss": 0.5746, + "epoch": 0.536803806325217, + "grad_norm": 0.25536556951793493, + "learning_rate": 9.553011797483052e-05, + "loss": 0.5867, "step": 1918 }, { - "epoch": 0.26857942617214836, - "grad_norm": 0.3887697670352693, - "learning_rate": 4.2912490537630255e-05, - "loss": 0.587, + "epoch": 0.537083683179401, + "grad_norm": 0.24412883483923561, + "learning_rate": 9.55237406301221e-05, + "loss": 0.5593, "step": 1919 }, { - "epoch": 0.26871938418474456, - "grad_norm": 0.44349778409152874, - "learning_rate": 4.2904582724348316e-05, - "loss": 0.6146, + "epoch": 0.5373635600335852, + "grad_norm": 0.2426078288284984, + "learning_rate": 9.551735895243857e-05, + "loss": 0.555, "step": 1920 }, { - "epoch": 0.2688593421973408, - "grad_norm": 0.4179386506999582, - "learning_rate": 4.2896671231492966e-05, - "loss": 0.6399, + "epoch": 0.5376434368877694, + "grad_norm": 0.23475525081658452, + "learning_rate": 9.551097294238734e-05, + "loss": 0.5845, "step": 1921 }, { - "epoch": 0.268999300209937, - "grad_norm": 0.465029201453624, - "learning_rate": 4.288875606069008e-05, - "loss": 0.6134, + "epoch": 0.5379233137419536, + "grad_norm": 0.24906252543249288, + "learning_rate": 9.550458260057622e-05, + "loss": 0.553, "step": 1922 }, { - "epoch": 0.26913925822253326, - "grad_norm": 0.4425974682516534, - "learning_rate": 4.2880837213566326e-05, - "loss": 0.6037, + "epoch": 0.5382031905961377, + "grad_norm": 0.23275761033329403, + "learning_rate": 9.549818792761347e-05, + "loss": 0.5779, "step": 1923 }, { - "epoch": 0.26927921623512946, - "grad_norm": 0.42747672129154957, - "learning_rate": 4.2872914691749086e-05, - "loss": 0.6102, + "epoch": 0.5384830674503218, + "grad_norm": 0.2348949256678806, + "learning_rate": 9.549178892410772e-05, + "loss": 0.5668, "step": 1924 }, { - "epoch": 0.26941917424772566, - "grad_norm": 0.4492107075459924, - "learning_rate": 4.286498849686654e-05, - "loss": 0.591, + "epoch": 0.538762944304506, + "grad_norm": 0.24253072449017118, + "learning_rate": 9.548538559066804e-05, + "loss": 0.5672, "step": 1925 }, { - "epoch": 0.2695591322603219, - "grad_norm": 0.4016863287653028, - "learning_rate": 4.2857058630547594e-05, - "loss": 0.5677, + "epoch": 0.5390428211586902, + "grad_norm": 0.24269874600361666, + "learning_rate": 9.54789779279039e-05, + "loss": 0.5849, "step": 1926 }, { - "epoch": 0.2696990902729181, - "grad_norm": 0.4143898674573289, - "learning_rate": 4.2849125094421905e-05, - "loss": 0.5673, + "epoch": 0.5393226980128744, + "grad_norm": 0.22165250504144346, + "learning_rate": 9.547256593642517e-05, + "loss": 0.547, "step": 1927 }, { - "epoch": 0.26983904828551436, - "grad_norm": 0.39235286773607386, - "learning_rate": 4.284118789011991e-05, - "loss": 0.5805, + "epoch": 0.5396025748670585, + "grad_norm": 0.23225886142882915, + "learning_rate": 9.546614961684217e-05, + "loss": 0.583, "step": 1928 }, { - "epoch": 0.26997900629811056, - "grad_norm": 0.39515207584942524, - "learning_rate": 4.2833247019272775e-05, - "loss": 0.5898, + "epoch": 0.5398824517212426, + "grad_norm": 0.24075858244204984, + "learning_rate": 9.545972896976561e-05, + "loss": 0.5821, "step": 1929 }, { - "epoch": 0.2701189643107068, - "grad_norm": 0.4201779430133444, - "learning_rate": 4.2825302483512444e-05, - "loss": 0.6402, + "epoch": 0.5401623285754268, + "grad_norm": 0.2289433754905644, + "learning_rate": 9.545330399580659e-05, + "loss": 0.5443, "step": 1930 }, { - "epoch": 0.270258922323303, - "grad_norm": 0.41576770047699163, - "learning_rate": 4.281735428447158e-05, - "loss": 0.5649, + "epoch": 0.540442205429611, + "grad_norm": 0.23766814224236468, + "learning_rate": 9.544687469557666e-05, + "loss": 0.5623, "step": 1931 }, { - "epoch": 0.2703988803358992, - "grad_norm": 0.41462546934983685, - "learning_rate": 4.2809402423783624e-05, - "loss": 0.5938, + "epoch": 0.5407220822837951, + "grad_norm": 0.2356527757625123, + "learning_rate": 9.544044106968777e-05, + "loss": 0.5587, "step": 1932 }, { - "epoch": 0.27053883834849546, - "grad_norm": 0.4096839436305104, - "learning_rate": 4.280144690308277e-05, - "loss": 0.5786, + "epoch": 0.5410019591379793, + "grad_norm": 0.23257587339724806, + "learning_rate": 9.543400311875225e-05, + "loss": 0.562, "step": 1933 }, { - "epoch": 0.27067879636109166, - "grad_norm": 0.41756540359737393, - "learning_rate": 4.279348772400395e-05, - "loss": 0.5666, + "epoch": 0.5412818359921634, + "grad_norm": 0.23234528561284273, + "learning_rate": 9.542756084338289e-05, + "loss": 0.5363, "step": 1934 }, { - "epoch": 0.2708187543736879, - "grad_norm": 0.39443631101683757, - "learning_rate": 4.278552488818288e-05, - "loss": 0.5919, + "epoch": 0.5415617128463476, + "grad_norm": 0.22934883021742244, + "learning_rate": 9.542111424419286e-05, + "loss": 0.569, "step": 1935 }, { - "epoch": 0.2709587123862841, - "grad_norm": 0.438020775582902, - "learning_rate": 4.277755839725598e-05, - "loss": 0.5852, + "epoch": 0.5418415897005318, + "grad_norm": 0.22697340158798782, + "learning_rate": 9.541466332179576e-05, + "loss": 0.5651, "step": 1936 }, { - "epoch": 0.27109867039888036, - "grad_norm": 0.4043207874267435, - "learning_rate": 4.2769588252860434e-05, - "loss": 0.5628, + "epoch": 0.5421214665547159, + "grad_norm": 0.23855856743322248, + "learning_rate": 9.540820807680557e-05, + "loss": 0.542, "step": 1937 }, { - "epoch": 0.27123862841147656, - "grad_norm": 0.4455988825173519, - "learning_rate": 4.276161445663423e-05, - "loss": 0.6069, + "epoch": 0.5424013434089001, + "grad_norm": 0.23493788787767833, + "learning_rate": 9.540174850983673e-05, + "loss": 0.542, "step": 1938 }, { - "epoch": 0.27137858642407275, - "grad_norm": 0.41404820018714156, - "learning_rate": 4.275363701021602e-05, - "loss": 0.5833, + "epoch": 0.5426812202630843, + "grad_norm": 0.2350779825278462, + "learning_rate": 9.539528462150405e-05, + "loss": 0.5648, "step": 1939 }, { - "epoch": 0.271518544436669, - "grad_norm": 0.4207829197564249, - "learning_rate": 4.2745655915245266e-05, - "loss": 0.5698, + "epoch": 0.5429610971172684, + "grad_norm": 0.2391293156386913, + "learning_rate": 9.538881641242276e-05, + "loss": 0.5532, "step": 1940 }, { - "epoch": 0.2716585024492652, - "grad_norm": 0.4300264097184269, - "learning_rate": 4.273767117336217e-05, - "loss": 0.6043, + "epoch": 0.5432409739714525, + "grad_norm": 0.23997894628745364, + "learning_rate": 9.538234388320855e-05, + "loss": 0.5667, "step": 1941 }, { - "epoch": 0.27179846046186146, - "grad_norm": 0.4230423530894155, - "learning_rate": 4.272968278620768e-05, - "loss": 0.5954, + "epoch": 0.5435208508256367, + "grad_norm": 0.23574334521382298, + "learning_rate": 9.537586703447743e-05, + "loss": 0.576, "step": 1942 }, { - "epoch": 0.27193841847445765, - "grad_norm": 0.4104705230949498, - "learning_rate": 4.272169075542348e-05, - "loss": 0.6132, + "epoch": 0.5438007276798209, + "grad_norm": 0.2385474638070604, + "learning_rate": 9.536938586684587e-05, + "loss": 0.5424, "step": 1943 }, { - "epoch": 0.2720783764870539, - "grad_norm": 0.4412681694744553, - "learning_rate": 4.2713695082652015e-05, - "loss": 0.634, + "epoch": 0.5440806045340051, + "grad_norm": 0.24772515180100296, + "learning_rate": 9.536290038093078e-05, + "loss": 0.5532, "step": 1944 }, { - "epoch": 0.2722183344996501, - "grad_norm": 0.41281029518549284, - "learning_rate": 4.270569576953648e-05, - "loss": 0.5803, + "epoch": 0.5443604813881892, + "grad_norm": 0.23021456892542566, + "learning_rate": 9.535641057734945e-05, + "loss": 0.5579, "step": 1945 }, { - "epoch": 0.2723582925122463, - "grad_norm": 0.4326754906394364, - "learning_rate": 4.269769281772082e-05, - "loss": 0.5982, + "epoch": 0.5446403582423733, + "grad_norm": 0.2311219424121253, + "learning_rate": 9.534991645671958e-05, + "loss": 0.5678, "step": 1946 }, { - "epoch": 0.27249825052484256, - "grad_norm": 0.4143197407799747, - "learning_rate": 4.2689686228849716e-05, - "loss": 0.6203, + "epoch": 0.5449202350965575, + "grad_norm": 0.22398028919231158, + "learning_rate": 9.534341801965926e-05, + "loss": 0.5305, "step": 1947 }, { - "epoch": 0.27263820853743875, - "grad_norm": 0.44499388653333827, - "learning_rate": 4.26816760045686e-05, - "loss": 0.5921, + "epoch": 0.5452001119507417, + "grad_norm": 0.23108011550849794, + "learning_rate": 9.533691526678705e-05, + "loss": 0.5838, "step": 1948 }, { - "epoch": 0.272778166550035, - "grad_norm": 0.4198979954091138, - "learning_rate": 4.267366214652366e-05, - "loss": 0.5864, + "epoch": 0.5454799888049259, + "grad_norm": 0.23265157506126793, + "learning_rate": 9.533040819872185e-05, + "loss": 0.5656, "step": 1949 }, { - "epoch": 0.2729181245626312, - "grad_norm": 0.43526258690475156, - "learning_rate": 4.266564465636183e-05, - "loss": 0.5948, + "epoch": 0.5457598656591099, + "grad_norm": 0.288365000981492, + "learning_rate": 9.532389681608305e-05, + "loss": 0.5606, "step": 1950 }, { - "epoch": 0.27305808257522746, - "grad_norm": 0.3940644065133908, - "learning_rate": 4.265762353573077e-05, - "loss": 0.585, + "epoch": 0.5460397425132941, + "grad_norm": 0.2321060095008162, + "learning_rate": 9.531738111949036e-05, + "loss": 0.5795, "step": 1951 }, { - "epoch": 0.27319804058782365, - "grad_norm": 0.40794104308496737, - "learning_rate": 4.264959878627891e-05, - "loss": 0.6, + "epoch": 0.5463196193674783, + "grad_norm": 0.22758287819714584, + "learning_rate": 9.531086110956398e-05, + "loss": 0.5372, "step": 1952 }, { - "epoch": 0.27333799860041985, - "grad_norm": 0.3823510808207485, - "learning_rate": 4.264157040965543e-05, - "loss": 0.547, + "epoch": 0.5465994962216625, + "grad_norm": 0.23224673201452703, + "learning_rate": 9.530433678692447e-05, + "loss": 0.5572, "step": 1953 }, { - "epoch": 0.2734779566130161, - "grad_norm": 0.42277154396243044, - "learning_rate": 4.263353840751022e-05, - "loss": 0.5996, + "epoch": 0.5468793730758467, + "grad_norm": 0.21852267247765625, + "learning_rate": 9.529780815219284e-05, + "loss": 0.535, "step": 1954 }, { - "epoch": 0.2736179146256123, - "grad_norm": 0.39495247008059065, - "learning_rate": 4.2625502781493955e-05, - "loss": 0.5769, + "epoch": 0.5471592499300307, + "grad_norm": 0.2297540027147699, + "learning_rate": 9.529127520599046e-05, + "loss": 0.5504, "step": 1955 }, { - "epoch": 0.27375787263820855, - "grad_norm": 0.43172939634218205, - "learning_rate": 4.261746353325804e-05, - "loss": 0.5667, + "epoch": 0.5474391267842149, + "grad_norm": 0.23915485872593667, + "learning_rate": 9.528473794893917e-05, + "loss": 0.5707, "step": 1956 }, { - "epoch": 0.27389783065080475, - "grad_norm": 0.42748966931166393, - "learning_rate": 4.26094206644546e-05, - "loss": 0.619, + "epoch": 0.5477190036383991, + "grad_norm": 0.22453867541566563, + "learning_rate": 9.527819638166117e-05, + "loss": 0.5759, "step": 1957 }, { - "epoch": 0.274037788663401, - "grad_norm": 0.41520591372518717, - "learning_rate": 4.260137417673655e-05, - "loss": 0.5495, + "epoch": 0.5479988804925833, + "grad_norm": 0.22199196523457324, + "learning_rate": 9.527165050477909e-05, + "loss": 0.537, "step": 1958 }, { - "epoch": 0.2741777466759972, - "grad_norm": 0.41675727817849284, - "learning_rate": 4.259332407175751e-05, - "loss": 0.5678, + "epoch": 0.5482787573467675, + "grad_norm": 0.24859979390155498, + "learning_rate": 9.526510031891598e-05, + "loss": 0.5718, "step": 1959 }, { - "epoch": 0.2743177046885934, - "grad_norm": 0.42389412857665876, - "learning_rate": 4.258527035117187e-05, - "loss": 0.5776, + "epoch": 0.5485586342009516, + "grad_norm": 0.228265159824563, + "learning_rate": 9.525854582469528e-05, + "loss": 0.5482, "step": 1960 }, { - "epoch": 0.27445766270118965, - "grad_norm": 0.42650038389451433, - "learning_rate": 4.257721301663474e-05, - "loss": 0.5926, + "epoch": 0.5488385110551357, + "grad_norm": 0.22451337588017242, + "learning_rate": 9.525198702274087e-05, + "loss": 0.5296, "step": 1961 }, { - "epoch": 0.27459762071378585, - "grad_norm": 0.4547693632893839, - "learning_rate": 4.2569152069801994e-05, - "loss": 0.6109, + "epoch": 0.5491183879093199, + "grad_norm": 0.2314849500943932, + "learning_rate": 9.524542391367699e-05, + "loss": 0.5706, "step": 1962 }, { - "epoch": 0.2747375787263821, - "grad_norm": 0.4096367527079948, - "learning_rate": 4.256108751233023e-05, - "loss": 0.5742, + "epoch": 0.5493982647635041, + "grad_norm": 0.22769313524812834, + "learning_rate": 9.523885649812833e-05, + "loss": 0.5715, "step": 1963 }, { - "epoch": 0.2748775367389783, - "grad_norm": 0.43852841060550257, - "learning_rate": 4.255301934587679e-05, - "loss": 0.6161, + "epoch": 0.5496781416176882, + "grad_norm": 0.23231869619896636, + "learning_rate": 9.523228477672001e-05, + "loss": 0.5658, "step": 1964 }, { - "epoch": 0.27501749475157455, - "grad_norm": 0.4108034529541987, - "learning_rate": 4.254494757209979e-05, - "loss": 0.5909, + "epoch": 0.5499580184718724, + "grad_norm": 0.24055911201147767, + "learning_rate": 9.52257087500775e-05, + "loss": 0.5708, "step": 1965 }, { - "epoch": 0.27515745276417075, - "grad_norm": 0.4179747177285123, - "learning_rate": 4.2536872192658036e-05, - "loss": 0.5902, + "epoch": 0.5502378953260565, + "grad_norm": 0.22845147101402621, + "learning_rate": 9.521912841882672e-05, + "loss": 0.5656, "step": 1966 }, { - "epoch": 0.27529741077676695, - "grad_norm": 0.4075584545924878, - "learning_rate": 4.2528793209211106e-05, - "loss": 0.573, + "epoch": 0.5505177721802407, + "grad_norm": 0.23328848827384172, + "learning_rate": 9.521254378359398e-05, + "loss": 0.5321, "step": 1967 }, { - "epoch": 0.2754373687893632, - "grad_norm": 0.4201389140014726, - "learning_rate": 4.252071062341933e-05, - "loss": 0.5944, + "epoch": 0.5507976490344249, + "grad_norm": 0.22460346332344272, + "learning_rate": 9.520595484500602e-05, + "loss": 0.546, "step": 1968 }, { - "epoch": 0.2755773268019594, - "grad_norm": 0.4253197821835051, - "learning_rate": 4.251262443694374e-05, - "loss": 0.569, + "epoch": 0.551077525888609, + "grad_norm": 0.23776827874011325, + "learning_rate": 9.519936160368998e-05, + "loss": 0.5699, "step": 1969 }, { - "epoch": 0.27571728481455565, - "grad_norm": 0.41281050840051553, - "learning_rate": 4.2504534651446134e-05, - "loss": 0.5845, + "epoch": 0.5513574027427932, + "grad_norm": 0.23351372187371927, + "learning_rate": 9.519276406027339e-05, + "loss": 0.5493, "step": 1970 }, { - "epoch": 0.27585724282715185, - "grad_norm": 0.4053164541842358, - "learning_rate": 4.2496441268589046e-05, - "loss": 0.6001, + "epoch": 0.5516372795969773, + "grad_norm": 0.2304708018654687, + "learning_rate": 9.518616221538424e-05, + "loss": 0.5427, "step": 1971 }, { - "epoch": 0.2759972008397481, - "grad_norm": 0.4381447506546874, - "learning_rate": 4.2488344290035755e-05, - "loss": 0.607, + "epoch": 0.5519171564511615, + "grad_norm": 0.23232900384938365, + "learning_rate": 9.517955606965086e-05, + "loss": 0.5744, "step": 1972 }, { - "epoch": 0.2761371588523443, - "grad_norm": 0.4183122839304423, - "learning_rate": 4.248024371745027e-05, - "loss": 0.5905, + "epoch": 0.5521970333053456, + "grad_norm": 0.22698205075703637, + "learning_rate": 9.517294562370205e-05, + "loss": 0.542, "step": 1973 }, { - "epoch": 0.2762771168649405, - "grad_norm": 0.4284345637823913, - "learning_rate": 4.2472139552497334e-05, - "loss": 0.6224, + "epoch": 0.5524769101595298, + "grad_norm": 0.22764760603881273, + "learning_rate": 9.516633087816699e-05, + "loss": 0.574, "step": 1974 }, { - "epoch": 0.27641707487753675, - "grad_norm": 0.43624222428113946, - "learning_rate": 4.246403179684245e-05, - "loss": 0.5874, + "epoch": 0.552756787013714, + "grad_norm": 0.22869155706946398, + "learning_rate": 9.515971183367527e-05, + "loss": 0.5568, "step": 1975 }, { - "epoch": 0.27655703289013295, - "grad_norm": 0.4238838475394068, - "learning_rate": 4.245592045215182e-05, - "loss": 0.5939, + "epoch": 0.5530366638678981, + "grad_norm": 0.23575186347623067, + "learning_rate": 9.515308849085691e-05, + "loss": 0.5566, "step": 1976 }, { - "epoch": 0.2766969909027292, - "grad_norm": 0.41691347825833375, - "learning_rate": 4.244780552009243e-05, - "loss": 0.6258, + "epoch": 0.5533165407220823, + "grad_norm": 0.24052513468678355, + "learning_rate": 9.514646085034232e-05, + "loss": 0.5533, "step": 1977 }, { - "epoch": 0.2768369489153254, - "grad_norm": 0.4083689917945832, - "learning_rate": 4.2439687002331974e-05, - "loss": 0.5565, + "epoch": 0.5535964175762664, + "grad_norm": 0.23553457119270182, + "learning_rate": 9.513982891276232e-05, + "loss": 0.5643, "step": 1978 }, { - "epoch": 0.27697690692792165, - "grad_norm": 0.4285760791242101, - "learning_rate": 4.2431564900538884e-05, - "loss": 0.5957, + "epoch": 0.5538762944304506, + "grad_norm": 0.2278446877532151, + "learning_rate": 9.513319267874812e-05, + "loss": 0.5753, "step": 1979 }, { - "epoch": 0.27711686494051785, - "grad_norm": 0.41197446910489155, - "learning_rate": 4.242343921638234e-05, - "loss": 0.601, + "epoch": 0.5541561712846348, + "grad_norm": 0.2233103849978068, + "learning_rate": 9.512655214893138e-05, + "loss": 0.5532, "step": 1980 }, { - "epoch": 0.27725682295311405, - "grad_norm": 0.38975348870242715, - "learning_rate": 4.241530995153225e-05, - "loss": 0.562, + "epoch": 0.554436048138819, + "grad_norm": 0.2366373117514305, + "learning_rate": 9.511990732394416e-05, + "loss": 0.5749, "step": 1981 }, { - "epoch": 0.2773967809657103, - "grad_norm": 0.4368612257662016, - "learning_rate": 4.2407177107659256e-05, - "loss": 0.6061, + "epoch": 0.554715924993003, + "grad_norm": 0.24265896290156705, + "learning_rate": 9.51132582044189e-05, + "loss": 0.5791, "step": 1982 }, { - "epoch": 0.2775367389783065, - "grad_norm": 0.40941708192964865, - "learning_rate": 4.2399040686434756e-05, - "loss": 0.5659, + "epoch": 0.5549958018471872, + "grad_norm": 0.2220396722562633, + "learning_rate": 9.510660479098847e-05, + "loss": 0.5254, "step": 1983 }, { - "epoch": 0.27767669699090275, - "grad_norm": 0.4233950461539689, - "learning_rate": 4.239090068953086e-05, - "loss": 0.5981, + "epoch": 0.5552756787013714, + "grad_norm": 0.23307690627014327, + "learning_rate": 9.509994708428615e-05, + "loss": 0.5479, "step": 1984 }, { - "epoch": 0.27781665500349895, - "grad_norm": 0.42236280001104815, - "learning_rate": 4.23827571186204e-05, - "loss": 0.5886, + "epoch": 0.5555555555555556, + "grad_norm": 0.23876319960416012, + "learning_rate": 9.509328508494563e-05, + "loss": 0.5527, "step": 1985 }, { - "epoch": 0.27795661301609514, - "grad_norm": 0.4144640899811079, - "learning_rate": 4.237460997537699e-05, - "loss": 0.5779, + "epoch": 0.5558354324097398, + "grad_norm": 0.23872847969233363, + "learning_rate": 9.5086618793601e-05, + "loss": 0.5706, "step": 1986 }, { - "epoch": 0.2780965710286914, - "grad_norm": 0.4625927000805653, - "learning_rate": 4.2366459261474933e-05, - "loss": 0.5679, + "epoch": 0.5561153092639238, + "grad_norm": 0.22964560955999136, + "learning_rate": 9.507994821088675e-05, + "loss": 0.5516, "step": 1987 }, { - "epoch": 0.2782365290412876, - "grad_norm": 0.4076605883733819, - "learning_rate": 4.235830497858929e-05, - "loss": 0.5655, + "epoch": 0.556395186118108, + "grad_norm": 0.2300913379025995, + "learning_rate": 9.50732733374378e-05, + "loss": 0.5738, "step": 1988 }, { - "epoch": 0.27837648705388385, - "grad_norm": 0.42845289807533865, - "learning_rate": 4.235014712839586e-05, - "loss": 0.6282, + "epoch": 0.5566750629722922, + "grad_norm": 0.22915071668113138, + "learning_rate": 9.506659417388945e-05, + "loss": 0.5657, "step": 1989 }, { - "epoch": 0.27851644506648005, - "grad_norm": 0.3954911530156325, - "learning_rate": 4.2341985712571144e-05, - "loss": 0.5594, + "epoch": 0.5569549398264764, + "grad_norm": 0.23357691835678449, + "learning_rate": 9.505991072087747e-05, + "loss": 0.5728, "step": 1990 }, { - "epoch": 0.2786564030790763, - "grad_norm": 0.42832437836826653, - "learning_rate": 4.233382073279241e-05, - "loss": 0.592, + "epoch": 0.5572348166806605, + "grad_norm": 0.23183903513631868, + "learning_rate": 9.505322297903794e-05, + "loss": 0.5484, "step": 1991 }, { - "epoch": 0.2787963610916725, - "grad_norm": 0.3915035514180305, - "learning_rate": 4.232565219073763e-05, - "loss": 0.5767, + "epoch": 0.5575146935348446, + "grad_norm": 0.2349802066578527, + "learning_rate": 9.504653094900744e-05, + "loss": 0.5647, "step": 1992 }, { - "epoch": 0.2789363191042687, - "grad_norm": 0.4091633397512807, - "learning_rate": 4.231748008808554e-05, - "loss": 0.5995, + "epoch": 0.5577945703890288, + "grad_norm": 0.2371316694083046, + "learning_rate": 9.503983463142292e-05, + "loss": 0.5898, "step": 1993 }, { - "epoch": 0.27907627711686495, - "grad_norm": 0.43083489824001814, - "learning_rate": 4.230930442651557e-05, - "loss": 0.6254, + "epoch": 0.558074447243213, + "grad_norm": 0.2273397184762064, + "learning_rate": 9.50331340269217e-05, + "loss": 0.5588, "step": 1994 }, { - "epoch": 0.27921623512946114, - "grad_norm": 0.4215052561977625, - "learning_rate": 4.230112520770792e-05, - "loss": 0.5946, + "epoch": 0.5583543240973972, + "grad_norm": 0.23348817571906577, + "learning_rate": 9.502642913614161e-05, + "loss": 0.5592, "step": 1995 }, { - "epoch": 0.2793561931420574, - "grad_norm": 0.41624126906364184, - "learning_rate": 4.22929424333435e-05, - "loss": 0.5912, + "epoch": 0.5586342009515813, + "grad_norm": 0.2152881868863065, + "learning_rate": 9.501971995972078e-05, + "loss": 0.562, "step": 1996 }, { - "epoch": 0.2794961511546536, - "grad_norm": 0.4084391778034195, - "learning_rate": 4.228475610510394e-05, - "loss": 0.5643, + "epoch": 0.5589140778057654, + "grad_norm": 0.21872152794288896, + "learning_rate": 9.501300649829781e-05, + "loss": 0.5823, "step": 1997 }, { - "epoch": 0.27963610916724985, - "grad_norm": 0.4151631289731341, - "learning_rate": 4.227656622467162e-05, - "loss": 0.5762, + "epoch": 0.5591939546599496, + "grad_norm": 0.2386995145124459, + "learning_rate": 9.500628875251168e-05, + "loss": 0.5693, "step": 1998 }, { - "epoch": 0.27977606717984604, - "grad_norm": 0.4207154793240298, - "learning_rate": 4.226837279372965e-05, - "loss": 0.5836, + "epoch": 0.5594738315141338, + "grad_norm": 0.22862033139971408, + "learning_rate": 9.499956672300178e-05, + "loss": 0.5944, "step": 1999 }, { - "epoch": 0.27991602519244224, - "grad_norm": 0.44259939289210365, - "learning_rate": 4.226017581396186e-05, - "loss": 0.6026, + "epoch": 0.559753708368318, + "grad_norm": 0.22519690744331325, + "learning_rate": 9.499284041040797e-05, + "loss": 0.5784, "step": 2000 }, { - "epoch": 0.2800559832050385, - "grad_norm": 0.40656254137179043, - "learning_rate": 4.2251975287052804e-05, - "loss": 0.5739, + "epoch": 0.5600335852225021, + "grad_norm": 0.2210358036618059, + "learning_rate": 9.49861098153704e-05, + "loss": 0.5369, "step": 2001 }, { - "epoch": 0.2801959412176347, - "grad_norm": 0.41605805412685426, - "learning_rate": 4.224377121468778e-05, - "loss": 0.6018, + "epoch": 0.5603134620766863, + "grad_norm": 0.22415424156405234, + "learning_rate": 9.497937493852971e-05, + "loss": 0.5639, "step": 2002 }, { - "epoch": 0.28033589923023094, - "grad_norm": 0.4181616140874938, - "learning_rate": 4.223556359855282e-05, - "loss": 0.5429, + "epoch": 0.5605933389308704, + "grad_norm": 0.23794546338939157, + "learning_rate": 9.497263578052695e-05, + "loss": 0.5712, "step": 2003 }, { - "epoch": 0.28047585724282714, - "grad_norm": 0.4414984932849065, - "learning_rate": 4.222735244033464e-05, - "loss": 0.6, + "epoch": 0.5608732157850546, + "grad_norm": 0.2183833771180705, + "learning_rate": 9.496589234200355e-05, + "loss": 0.5306, "step": 2004 }, { - "epoch": 0.2806158152554234, - "grad_norm": 0.40209497833786334, - "learning_rate": 4.221913774172076e-05, - "loss": 0.5793, + "epoch": 0.5611530926392387, + "grad_norm": 0.22839249865463046, + "learning_rate": 9.495914462360134e-05, + "loss": 0.5573, "step": 2005 }, { - "epoch": 0.2807557732680196, - "grad_norm": 0.40555504520183505, - "learning_rate": 4.221091950439935e-05, - "loss": 0.5522, + "epoch": 0.5614329694934229, + "grad_norm": 0.23122609821488663, + "learning_rate": 9.495239262596258e-05, + "loss": 0.5494, "step": 2006 }, { - "epoch": 0.2808957312806158, - "grad_norm": 0.4425009809651693, - "learning_rate": 4.220269773005935e-05, - "loss": 0.6562, + "epoch": 0.5617128463476071, + "grad_norm": 0.22952413883185982, + "learning_rate": 9.494563634972994e-05, + "loss": 0.5246, "step": 2007 }, { - "epoch": 0.28103568929321204, - "grad_norm": 0.4154023509957506, - "learning_rate": 4.219447242039043e-05, - "loss": 0.6121, + "epoch": 0.5619927232017912, + "grad_norm": 0.2227247431464232, + "learning_rate": 9.493887579554647e-05, + "loss": 0.5555, "step": 2008 }, { - "epoch": 0.28117564730580824, - "grad_norm": 0.4069493962165521, - "learning_rate": 4.2186243577082954e-05, - "loss": 0.582, + "epoch": 0.5622726000559753, + "grad_norm": 0.22670809691968316, + "learning_rate": 9.493211096405564e-05, + "loss": 0.5216, "step": 2009 }, { - "epoch": 0.2813156053184045, - "grad_norm": 0.42547539237875576, - "learning_rate": 4.217801120182805e-05, - "loss": 0.5737, + "epoch": 0.5625524769101595, + "grad_norm": 0.23020903542611063, + "learning_rate": 9.492534185590134e-05, + "loss": 0.5717, "step": 2010 }, { - "epoch": 0.2814555633310007, - "grad_norm": 0.4120143925863747, - "learning_rate": 4.2169775296317545e-05, - "loss": 0.5984, + "epoch": 0.5628323537643437, + "grad_norm": 0.22910534158633578, + "learning_rate": 9.491856847172786e-05, + "loss": 0.551, "step": 2011 }, { - "epoch": 0.28159552134359694, - "grad_norm": 0.4091520558950441, - "learning_rate": 4.216153586224401e-05, - "loss": 0.5533, + "epoch": 0.5631122306185279, + "grad_norm": 0.24505507442272167, + "learning_rate": 9.491179081217989e-05, + "loss": 0.5409, "step": 2012 }, { - "epoch": 0.28173547935619314, - "grad_norm": 0.40387339261079624, - "learning_rate": 4.2153292901300715e-05, - "loss": 0.6078, + "epoch": 0.563392107472712, + "grad_norm": 0.22108833619567092, + "learning_rate": 9.490500887790255e-05, + "loss": 0.5427, "step": 2013 }, { - "epoch": 0.28187543736878934, - "grad_norm": 0.40058528148893674, - "learning_rate": 4.214504641518169e-05, - "loss": 0.552, + "epoch": 0.5636719843268961, + "grad_norm": 0.23810167959392928, + "learning_rate": 9.48982226695413e-05, + "loss": 0.571, "step": 2014 }, { - "epoch": 0.2820153953813856, - "grad_norm": 0.40857595852803164, - "learning_rate": 4.213679640558167e-05, - "loss": 0.5744, + "epoch": 0.5639518611810803, + "grad_norm": 0.23758128510274953, + "learning_rate": 9.48914321877421e-05, + "loss": 0.5501, "step": 2015 }, { - "epoch": 0.2821553533939818, - "grad_norm": 0.3978718427859697, - "learning_rate": 4.212854287419611e-05, - "loss": 0.5916, + "epoch": 0.5642317380352645, + "grad_norm": 0.24894816413938858, + "learning_rate": 9.488463743315126e-05, + "loss": 0.5701, "step": 2016 }, { - "epoch": 0.28229531140657804, - "grad_norm": 0.3821785406082015, - "learning_rate": 4.212028582272119e-05, - "loss": 0.555, + "epoch": 0.5645116148894487, + "grad_norm": 0.23635924382790377, + "learning_rate": 9.487783840641551e-05, + "loss": 0.5672, "step": 2017 }, { - "epoch": 0.28243526941917424, - "grad_norm": 0.4069697450232385, - "learning_rate": 4.2112025252853823e-05, - "loss": 0.564, + "epoch": 0.5647914917436329, + "grad_norm": 0.222049319952158, + "learning_rate": 9.487103510818197e-05, + "loss": 0.559, "step": 2018 }, { - "epoch": 0.2825752274317705, - "grad_norm": 0.4023231577263976, - "learning_rate": 4.210376116629165e-05, - "loss": 0.5486, + "epoch": 0.5650713685978169, + "grad_norm": 0.23100028165490633, + "learning_rate": 9.486422753909819e-05, + "loss": 0.5255, "step": 2019 }, { - "epoch": 0.2827151854443667, - "grad_norm": 0.4071761170849277, - "learning_rate": 4.2095493564733005e-05, - "loss": 0.5769, + "epoch": 0.5653512454520011, + "grad_norm": 0.22600325418508446, + "learning_rate": 9.485741569981214e-05, + "loss": 0.563, "step": 2020 }, { - "epoch": 0.2828551434569629, - "grad_norm": 0.4303385757744145, - "learning_rate": 4.208722244987698e-05, - "loss": 0.5891, + "epoch": 0.5656311223061853, + "grad_norm": 0.2469821168912917, + "learning_rate": 9.485059959097213e-05, + "loss": 0.5394, "step": 2021 }, { - "epoch": 0.28299510146955914, - "grad_norm": 0.42291929930753197, - "learning_rate": 4.2078947823423364e-05, - "loss": 0.6011, + "epoch": 0.5659109991603695, + "grad_norm": 0.23842274970505892, + "learning_rate": 9.484377921322697e-05, + "loss": 0.5588, "step": 2022 }, { - "epoch": 0.28313505948215534, - "grad_norm": 0.4004019282036015, - "learning_rate": 4.2070669687072685e-05, - "loss": 0.5643, + "epoch": 0.5661908760145536, + "grad_norm": 0.2277464279718841, + "learning_rate": 9.483695456722579e-05, + "loss": 0.5651, "step": 2023 }, { - "epoch": 0.2832750174947516, - "grad_norm": 0.4294748353114633, - "learning_rate": 4.206238804252617e-05, - "loss": 0.6098, + "epoch": 0.5664707528687377, + "grad_norm": 0.2289879156338395, + "learning_rate": 9.483012565361819e-05, + "loss": 0.591, "step": 2024 }, { - "epoch": 0.2834149755073478, - "grad_norm": 0.4207040878829443, - "learning_rate": 4.205410289148579e-05, - "loss": 0.5825, + "epoch": 0.5667506297229219, + "grad_norm": 0.24315991038974277, + "learning_rate": 9.482329247305413e-05, + "loss": 0.5579, "step": 2025 }, { - "epoch": 0.28355493351994404, - "grad_norm": 0.42868887539679973, - "learning_rate": 4.204581423565424e-05, - "loss": 0.5703, + "epoch": 0.5670305065771061, + "grad_norm": 0.23791478263652036, + "learning_rate": 9.4816455026184e-05, + "loss": 0.5618, "step": 2026 }, { - "epoch": 0.28369489153254024, - "grad_norm": 0.41961324792796834, - "learning_rate": 4.2037522076734895e-05, - "loss": 0.6024, + "epoch": 0.5673103834312903, + "grad_norm": 0.22770686606928436, + "learning_rate": 9.48096133136586e-05, + "loss": 0.5658, "step": 2027 }, { - "epoch": 0.28383484954513644, - "grad_norm": 0.426062343917317, - "learning_rate": 4.20292264164319e-05, - "loss": 0.5451, + "epoch": 0.5675902602854744, + "grad_norm": 0.22992843543633568, + "learning_rate": 9.480276733612914e-05, + "loss": 0.5565, "step": 2028 }, { - "epoch": 0.2839748075577327, - "grad_norm": 0.5356851221725408, - "learning_rate": 4.202092725645009e-05, - "loss": 0.566, + "epoch": 0.5678701371396585, + "grad_norm": 0.2324655434035538, + "learning_rate": 9.479591709424717e-05, + "loss": 0.5427, "step": 2029 }, { - "epoch": 0.2841147655703289, - "grad_norm": 0.41705129999473584, - "learning_rate": 4.2012624598495026e-05, - "loss": 0.5515, + "epoch": 0.5681500139938427, + "grad_norm": 0.22765992848354477, + "learning_rate": 9.478906258866478e-05, + "loss": 0.5453, "step": 2030 }, { - "epoch": 0.28425472358292514, - "grad_norm": 0.4305240936670277, - "learning_rate": 4.2004318444272985e-05, - "loss": 0.6063, + "epoch": 0.5684298908480269, + "grad_norm": 0.23022522938379125, + "learning_rate": 9.478220382003431e-05, + "loss": 0.5764, "step": 2031 }, { - "epoch": 0.28439468159552134, - "grad_norm": 0.4109507589908985, - "learning_rate": 4.199600879549098e-05, - "loss": 0.5841, + "epoch": 0.568709767702211, + "grad_norm": 0.2408048445919462, + "learning_rate": 9.477534078900864e-05, + "loss": 0.5716, "step": 2032 }, { - "epoch": 0.2845346396081176, - "grad_norm": 0.4152517216058978, - "learning_rate": 4.198769565385671e-05, - "loss": 0.618, + "epoch": 0.5689896445563952, + "grad_norm": 0.2342047777837725, + "learning_rate": 9.476847349624097e-05, + "loss": 0.5481, "step": 2033 }, { - "epoch": 0.2846745976207138, - "grad_norm": 0.40875510353923156, - "learning_rate": 4.197937902107863e-05, - "loss": 0.6169, + "epoch": 0.5692695214105793, + "grad_norm": 0.22897053654830277, + "learning_rate": 9.47616019423849e-05, + "loss": 0.5698, "step": 2034 }, { - "epoch": 0.28481455563331, - "grad_norm": 0.4080934485529636, - "learning_rate": 4.197105889886587e-05, - "loss": 0.6191, + "epoch": 0.5695493982647635, + "grad_norm": 0.2295180393979441, + "learning_rate": 9.475472612809452e-05, + "loss": 0.547, "step": 2035 }, { - "epoch": 0.28495451364590624, - "grad_norm": 0.395534357123435, - "learning_rate": 4.1962735288928305e-05, - "loss": 0.572, + "epoch": 0.5698292751189477, + "grad_norm": 0.2332745513887627, + "learning_rate": 9.474784605402428e-05, + "loss": 0.5625, "step": 2036 }, { - "epoch": 0.28509447165850244, - "grad_norm": 0.4123561699274625, - "learning_rate": 4.1954408192976536e-05, - "loss": 0.6112, + "epoch": 0.5701091519731318, + "grad_norm": 0.23148919434798484, + "learning_rate": 9.4740961720829e-05, + "loss": 0.5834, "step": 2037 }, { - "epoch": 0.2852344296710987, - "grad_norm": 0.43088641312757203, - "learning_rate": 4.1946077612721854e-05, - "loss": 0.6146, + "epoch": 0.570389028827316, + "grad_norm": 0.2252512855750053, + "learning_rate": 9.473407312916393e-05, + "loss": 0.5468, "step": 2038 }, { - "epoch": 0.2853743876836949, - "grad_norm": 0.4073212886136425, - "learning_rate": 4.193774354987629e-05, - "loss": 0.5876, + "epoch": 0.5706689056815002, + "grad_norm": 0.22787780671938834, + "learning_rate": 9.472718027968474e-05, + "loss": 0.5723, "step": 2039 }, { - "epoch": 0.28551434569629114, - "grad_norm": 0.41104021326097206, - "learning_rate": 4.192940600615255e-05, - "loss": 0.5861, + "epoch": 0.5709487825356843, + "grad_norm": 0.23796730720112072, + "learning_rate": 9.472028317304748e-05, + "loss": 0.5626, "step": 2040 }, { - "epoch": 0.28565430370888734, - "grad_norm": 0.4266808368141709, - "learning_rate": 4.192106498326411e-05, - "loss": 0.5982, + "epoch": 0.5712286593898684, + "grad_norm": 0.23279731883326624, + "learning_rate": 9.471338180990868e-05, + "loss": 0.5663, "step": 2041 }, { - "epoch": 0.28579426172148353, - "grad_norm": 0.44040096596479616, - "learning_rate": 4.191272048292513e-05, - "loss": 0.5877, + "epoch": 0.5715085362440526, + "grad_norm": 0.22774061863041195, + "learning_rate": 9.470647619092514e-05, + "loss": 0.5417, "step": 2042 }, { - "epoch": 0.2859342197340798, - "grad_norm": 0.4156724474946737, - "learning_rate": 4.1904372506850484e-05, - "loss": 0.5899, + "epoch": 0.5717884130982368, + "grad_norm": 0.23274343346879015, + "learning_rate": 9.469956631675418e-05, + "loss": 0.5333, "step": 2043 }, { - "epoch": 0.286074177746676, - "grad_norm": 0.42353874241636685, - "learning_rate": 4.189602105675577e-05, - "loss": 0.5958, + "epoch": 0.572068289952421, + "grad_norm": 0.2214570256037127, + "learning_rate": 9.469265218805348e-05, + "loss": 0.5541, "step": 2044 }, { - "epoch": 0.28621413575927224, - "grad_norm": 0.40321593874563183, - "learning_rate": 4.18876661343573e-05, - "loss": 0.5898, + "epoch": 0.572348166806605, + "grad_norm": 0.2347970692270651, + "learning_rate": 9.468573380548112e-05, + "loss": 0.5399, "step": 2045 }, { - "epoch": 0.28635409377186843, - "grad_norm": 0.43950894922113853, - "learning_rate": 4.187930774137209e-05, - "loss": 0.578, + "epoch": 0.5726280436607892, + "grad_norm": 0.23428890664169302, + "learning_rate": 9.467881116969561e-05, + "loss": 0.5659, "step": 2046 }, { - "epoch": 0.2864940517844647, - "grad_norm": 0.4094852639707724, - "learning_rate": 4.187094587951786e-05, - "loss": 0.5808, + "epoch": 0.5729079205149734, + "grad_norm": 0.2284076394558981, + "learning_rate": 9.467188428135585e-05, + "loss": 0.5509, "step": 2047 }, { - "epoch": 0.2866340097970609, - "grad_norm": 0.4290190354250858, - "learning_rate": 4.1862580550513086e-05, - "loss": 0.5754, + "epoch": 0.5731877973691576, + "grad_norm": 0.22483696040078632, + "learning_rate": 9.466495314112114e-05, + "loss": 0.5367, "step": 2048 }, { - "epoch": 0.2867739678096571, - "grad_norm": 0.43386920098037757, - "learning_rate": 4.18542117560769e-05, - "loss": 0.6008, + "epoch": 0.5734676742233418, + "grad_norm": 0.2354170577344445, + "learning_rate": 9.465801774965118e-05, + "loss": 0.5399, "step": 2049 }, { - "epoch": 0.28691392582225334, - "grad_norm": 0.4112010436019356, - "learning_rate": 4.1845839497929204e-05, - "loss": 0.5878, + "epoch": 0.5737475510775258, + "grad_norm": 0.21579766700944755, + "learning_rate": 9.46510781076061e-05, + "loss": 0.5206, "step": 2050 }, { - "epoch": 0.28705388383484953, - "grad_norm": 0.4052022574202855, - "learning_rate": 4.183746377779055e-05, - "loss": 0.5733, + "epoch": 0.57402742793171, + "grad_norm": 0.2394430793632505, + "learning_rate": 9.464413421564642e-05, + "loss": 0.5456, "step": 2051 }, { - "epoch": 0.2871938418474458, - "grad_norm": 0.4325624910156896, - "learning_rate": 4.182908459738226e-05, - "loss": 0.5954, + "epoch": 0.5743073047858942, + "grad_norm": 0.227974969171813, + "learning_rate": 9.463718607443307e-05, + "loss": 0.5348, "step": 2052 }, { - "epoch": 0.287333799860042, - "grad_norm": 0.41379739337134963, - "learning_rate": 4.1820701958426325e-05, - "loss": 0.5487, + "epoch": 0.5745871816400784, + "grad_norm": 0.2319299252666734, + "learning_rate": 9.463023368462733e-05, + "loss": 0.5631, "step": 2053 }, { - "epoch": 0.28747375787263824, - "grad_norm": 0.41206409125142784, - "learning_rate": 4.1812315862645466e-05, - "loss": 0.5317, + "epoch": 0.5748670584942626, + "grad_norm": 0.22631513111151783, + "learning_rate": 9.462327704689098e-05, + "loss": 0.5554, "step": 2054 }, { - "epoch": 0.28761371588523443, - "grad_norm": 0.4410834744075679, - "learning_rate": 4.180392631176312e-05, - "loss": 0.6188, + "epoch": 0.5751469353484466, + "grad_norm": 0.23219123547444884, + "learning_rate": 9.461631616188616e-05, + "loss": 0.5574, "step": 2055 }, { - "epoch": 0.28775367389783063, - "grad_norm": 0.41721301374555403, - "learning_rate": 4.179553330750341e-05, - "loss": 0.5873, + "epoch": 0.5754268122026308, + "grad_norm": 0.22546526779744472, + "learning_rate": 9.460935103027538e-05, + "loss": 0.5432, "step": 2056 }, { - "epoch": 0.2878936319104269, - "grad_norm": 0.4140910091320955, - "learning_rate": 4.1787136851591194e-05, - "loss": 0.5913, + "epoch": 0.575706689056815, + "grad_norm": 0.22897797719256435, + "learning_rate": 9.460238165272159e-05, + "loss": 0.5477, "step": 2057 }, { - "epoch": 0.2880335899230231, - "grad_norm": 0.4136362029589039, - "learning_rate": 4.177873694575202e-05, - "loss": 0.6285, + "epoch": 0.5759865659109992, + "grad_norm": 0.227820803915676, + "learning_rate": 9.459540802988817e-05, + "loss": 0.531, "step": 2058 }, { - "epoch": 0.28817354793561933, - "grad_norm": 0.42507432331536726, - "learning_rate": 4.1770333591712164e-05, - "loss": 0.5854, + "epoch": 0.5762664427651834, + "grad_norm": 0.23181545499850942, + "learning_rate": 9.458843016243884e-05, + "loss": 0.5718, "step": 2059 }, { - "epoch": 0.28831350594821553, - "grad_norm": 0.43737552322389167, - "learning_rate": 4.176192679119859e-05, - "loss": 0.5922, + "epoch": 0.5765463196193675, + "grad_norm": 0.22065737580260858, + "learning_rate": 9.458144805103778e-05, + "loss": 0.5657, "step": 2060 }, { - "epoch": 0.28845346396081173, - "grad_norm": 0.3891108181335366, - "learning_rate": 4.175351654593899e-05, - "loss": 0.5284, + "epoch": 0.5768261964735516, + "grad_norm": 0.24229414516266962, + "learning_rate": 9.457446169634953e-05, + "loss": 0.5426, "step": 2061 }, { - "epoch": 0.288593421973408, - "grad_norm": 0.4969916385805558, - "learning_rate": 4.174510285766175e-05, - "loss": 0.5824, + "epoch": 0.5771060733277358, + "grad_norm": 0.23351271188728046, + "learning_rate": 9.456747109903907e-05, + "loss": 0.5701, "step": 2062 }, { - "epoch": 0.2887333799860042, - "grad_norm": 0.42340411002126843, - "learning_rate": 4.173668572809597e-05, - "loss": 0.5689, + "epoch": 0.57738595018192, + "grad_norm": 0.2275805490827519, + "learning_rate": 9.456047625977178e-05, + "loss": 0.544, "step": 2063 }, { - "epoch": 0.28887333799860043, - "grad_norm": 0.41505954996479894, - "learning_rate": 4.172826515897146e-05, - "loss": 0.5734, + "epoch": 0.5776658270361041, + "grad_norm": 0.23367033225052225, + "learning_rate": 9.455347717921341e-05, + "loss": 0.562, "step": 2064 }, { - "epoch": 0.28901329601119663, - "grad_norm": 0.3799206030872319, - "learning_rate": 4.1719841152018716e-05, - "loss": 0.5669, + "epoch": 0.5779457038902883, + "grad_norm": 0.23151158124130852, + "learning_rate": 9.454647385803012e-05, + "loss": 0.5403, "step": 2065 }, { - "epoch": 0.2891532540237929, - "grad_norm": 0.43628757271419605, - "learning_rate": 4.171141370896898e-05, - "loss": 0.6063, + "epoch": 0.5782255807444724, + "grad_norm": 0.23553604374552775, + "learning_rate": 9.453946629688855e-05, + "loss": 0.5518, "step": 2066 }, { - "epoch": 0.2892932120363891, - "grad_norm": 0.42076886960500287, - "learning_rate": 4.170298283155416e-05, - "loss": 0.5923, + "epoch": 0.5785054575986566, + "grad_norm": 0.22536131300420634, + "learning_rate": 9.453245449645563e-05, + "loss": 0.537, "step": 2067 }, { - "epoch": 0.2894331700489853, - "grad_norm": 0.4074347275662083, - "learning_rate": 4.169454852150689e-05, - "loss": 0.5872, + "epoch": 0.5787853344528407, + "grad_norm": 0.23704728389979268, + "learning_rate": 9.452543845739876e-05, + "loss": 0.5375, "step": 2068 }, { - "epoch": 0.28957312806158153, - "grad_norm": 0.4048049815445387, - "learning_rate": 4.168611078056051e-05, - "loss": 0.5549, + "epoch": 0.5790652113070249, + "grad_norm": 0.2343371150333796, + "learning_rate": 9.451841818038575e-05, + "loss": 0.5819, "step": 2069 }, { - "epoch": 0.28971308607417773, - "grad_norm": 0.411926260409551, - "learning_rate": 4.167766961044907e-05, - "loss": 0.5879, + "epoch": 0.5793450881612091, + "grad_norm": 0.24122881266900179, + "learning_rate": 9.451139366608477e-05, + "loss": 0.5602, "step": 2070 }, { - "epoch": 0.289853044086774, - "grad_norm": 0.4144949406829952, - "learning_rate": 4.166922501290729e-05, - "loss": 0.5718, + "epoch": 0.5796249650153932, + "grad_norm": 0.22664015045862007, + "learning_rate": 9.450436491516444e-05, + "loss": 0.5506, "step": 2071 }, { - "epoch": 0.2899930020993702, - "grad_norm": 0.43675024412528857, - "learning_rate": 4.1660776989670646e-05, - "loss": 0.5933, + "epoch": 0.5799048418695774, + "grad_norm": 0.22147445817593867, + "learning_rate": 9.449733192829373e-05, + "loss": 0.5587, "step": 2072 }, { - "epoch": 0.29013296011196643, - "grad_norm": 0.394293953998829, - "learning_rate": 4.165232554247528e-05, - "loss": 0.5345, + "epoch": 0.5801847187237615, + "grad_norm": 0.2271617764396628, + "learning_rate": 9.449029470614206e-05, + "loss": 0.5761, "step": 2073 }, { - "epoch": 0.29027291812456263, - "grad_norm": 0.4193571632306237, - "learning_rate": 4.164387067305805e-05, - "loss": 0.6182, + "epoch": 0.5804645955779457, + "grad_norm": 0.22847100595534037, + "learning_rate": 9.448325324937925e-05, + "loss": 0.5761, "step": 2074 }, { - "epoch": 0.2904128761371588, - "grad_norm": 0.6437864579317593, - "learning_rate": 4.163541238315653e-05, - "loss": 0.6224, + "epoch": 0.5807444724321299, + "grad_norm": 0.22553840101117412, + "learning_rate": 9.447620755867548e-05, + "loss": 0.5141, "step": 2075 }, { - "epoch": 0.2905528341497551, - "grad_norm": 0.4015710103834747, - "learning_rate": 4.162695067450897e-05, - "loss": 0.6, + "epoch": 0.581024349286314, + "grad_norm": 0.22621686515287362, + "learning_rate": 9.44691576347014e-05, + "loss": 0.5453, "step": 2076 }, { - "epoch": 0.2906927921623513, - "grad_norm": 0.42416882893328844, - "learning_rate": 4.161848554885434e-05, - "loss": 0.5964, + "epoch": 0.5813042261404981, + "grad_norm": 0.22331804582221645, + "learning_rate": 9.446210347812801e-05, + "loss": 0.5601, "step": 2077 }, { - "epoch": 0.29083275017494753, - "grad_norm": 0.39865445877876937, - "learning_rate": 4.161001700793231e-05, - "loss": 0.5379, + "epoch": 0.5815841029946823, + "grad_norm": 0.22232235898316122, + "learning_rate": 9.445504508962671e-05, + "loss": 0.5644, "step": 2078 }, { - "epoch": 0.2909727081875437, - "grad_norm": 0.428296814729791, - "learning_rate": 4.160154505348326e-05, - "loss": 0.565, + "epoch": 0.5818639798488665, + "grad_norm": 0.2336489789496591, + "learning_rate": 9.444798246986933e-05, + "loss": 0.5323, "step": 2079 }, { - "epoch": 0.29111266620014, - "grad_norm": 0.4500265708630457, - "learning_rate": 4.159306968724824e-05, - "loss": 0.6454, + "epoch": 0.5821438567030507, + "grad_norm": 0.2416382159368307, + "learning_rate": 9.44409156195281e-05, + "loss": 0.5601, "step": 2080 }, { - "epoch": 0.2912526242127362, - "grad_norm": 0.4113564994850615, - "learning_rate": 4.1584590910969055e-05, - "loss": 0.5792, + "epoch": 0.5824237335572349, + "grad_norm": 0.24598405587353261, + "learning_rate": 9.443384453927567e-05, + "loss": 0.5576, "step": 2081 }, { - "epoch": 0.2913925822253324, - "grad_norm": 0.4181766831204532, - "learning_rate": 4.157610872638815e-05, - "loss": 0.6005, + "epoch": 0.5827036104114189, + "grad_norm": 0.23820692785331665, + "learning_rate": 9.442676922978503e-05, + "loss": 0.5525, "step": 2082 }, { - "epoch": 0.29153254023792863, - "grad_norm": 0.42000429084184243, - "learning_rate": 4.156762313524873e-05, - "loss": 0.5565, + "epoch": 0.5829834872656031, + "grad_norm": 0.22371910029075298, + "learning_rate": 9.441968969172964e-05, + "loss": 0.5475, "step": 2083 }, { - "epoch": 0.2916724982505248, - "grad_norm": 0.4106324972814455, - "learning_rate": 4.155913413929463e-05, - "loss": 0.5678, + "epoch": 0.5832633641197873, + "grad_norm": 0.23579026492566837, + "learning_rate": 9.441260592578329e-05, + "loss": 0.5483, "step": 2084 }, { - "epoch": 0.2918124562631211, - "grad_norm": 0.3971576285170608, - "learning_rate": 4.155064174027047e-05, - "loss": 0.5972, + "epoch": 0.5835432409739715, + "grad_norm": 0.23040794838373302, + "learning_rate": 9.440551793262027e-05, + "loss": 0.5787, "step": 2085 }, { - "epoch": 0.2919524142757173, - "grad_norm": 0.4438093270564298, - "learning_rate": 4.154214593992149e-05, - "loss": 0.5909, + "epoch": 0.5838231178281557, + "grad_norm": 0.21907945983694874, + "learning_rate": 9.439842571291521e-05, + "loss": 0.5086, "step": 2086 }, { - "epoch": 0.29209237228831353, - "grad_norm": 0.4056320133949262, - "learning_rate": 4.1533646739993676e-05, - "loss": 0.5832, + "epoch": 0.5841029946823397, + "grad_norm": 0.2319913619380196, + "learning_rate": 9.439132926734313e-05, + "loss": 0.5672, "step": 2087 }, { - "epoch": 0.2922323303009097, - "grad_norm": 0.4129515425696569, - "learning_rate": 4.1525144142233705e-05, - "loss": 0.5995, + "epoch": 0.5843828715365239, + "grad_norm": 0.39337399651224053, + "learning_rate": 9.438422859657947e-05, + "loss": 0.5676, "step": 2088 }, { - "epoch": 0.2923722883135059, - "grad_norm": 0.4513903259561082, - "learning_rate": 4.151663814838893e-05, - "loss": 0.5527, + "epoch": 0.5846627483907081, + "grad_norm": 0.2183883009142669, + "learning_rate": 9.43771237013001e-05, + "loss": 0.5744, "step": 2089 }, { - "epoch": 0.2925122463261022, - "grad_norm": 0.3975679752001749, - "learning_rate": 4.150812876020744e-05, - "loss": 0.5579, + "epoch": 0.5849426252448923, + "grad_norm": 5.779487133580226, + "learning_rate": 9.437001458218127e-05, + "loss": 0.6276, "step": 2090 }, { - "epoch": 0.2926522043386984, - "grad_norm": 0.4079824226246549, - "learning_rate": 4.1499615979437986e-05, - "loss": 0.5583, + "epoch": 0.5852225020990764, + "grad_norm": 0.22071823141183178, + "learning_rate": 9.43629012398996e-05, + "loss": 0.5385, "step": 2091 }, { - "epoch": 0.2927921623512946, - "grad_norm": 0.4452632659983039, - "learning_rate": 4.1491099807830044e-05, - "loss": 0.5871, + "epoch": 0.5855023789532605, + "grad_norm": 0.22604144228491943, + "learning_rate": 9.435578367513215e-05, + "loss": 0.5399, "step": 2092 }, { - "epoch": 0.2929321203638908, - "grad_norm": 0.416969695477689, - "learning_rate": 4.148258024713376e-05, - "loss": 0.5686, + "epoch": 0.5857822558074447, + "grad_norm": 0.23461762169954076, + "learning_rate": 9.434866188855641e-05, + "loss": 0.5404, "step": 2093 }, { - "epoch": 0.2930720783764871, - "grad_norm": 0.39010342813066295, - "learning_rate": 4.147405729909999e-05, - "loss": 0.5805, + "epoch": 0.5860621326616289, + "grad_norm": 0.22303629652425513, + "learning_rate": 9.434153588085019e-05, + "loss": 0.5256, "step": 2094 }, { - "epoch": 0.2932120363890833, - "grad_norm": 0.4146408500392661, - "learning_rate": 4.146553096548031e-05, - "loss": 0.5939, + "epoch": 0.5863420095158131, + "grad_norm": 0.47065174666148146, + "learning_rate": 9.433440565269178e-05, + "loss": 0.5437, "step": 2095 }, { - "epoch": 0.2933519944016795, - "grad_norm": 0.4438792319022797, - "learning_rate": 4.145700124802693e-05, - "loss": 0.6048, + "epoch": 0.5866218863699972, + "grad_norm": 0.24230721619534515, + "learning_rate": 9.432727120475981e-05, + "loss": 0.5403, "step": 2096 }, { - "epoch": 0.2934919524142757, - "grad_norm": 0.652029593861103, - "learning_rate": 4.144846814849282e-05, - "loss": 0.6083, + "epoch": 0.5869017632241813, + "grad_norm": 0.25008885174940576, + "learning_rate": 9.432013253773337e-05, + "loss": 0.5488, "step": 2097 }, { - "epoch": 0.2936319104268719, - "grad_norm": 0.4075857320797679, - "learning_rate": 4.143993166863161e-05, - "loss": 0.5861, + "epoch": 0.5871816400783655, + "grad_norm": 0.2436018783912833, + "learning_rate": 9.43129896522919e-05, + "loss": 0.5437, "step": 2098 }, { - "epoch": 0.2937718684394682, - "grad_norm": 0.3986323846100174, - "learning_rate": 4.143139181019764e-05, - "loss": 0.5729, + "epoch": 0.5874615169325497, + "grad_norm": 0.2377397180240944, + "learning_rate": 9.430584254911527e-05, + "loss": 0.5373, "step": 2099 }, { - "epoch": 0.2939118264520644, - "grad_norm": 0.42024806604839915, - "learning_rate": 4.1422848574945924e-05, - "loss": 0.5662, + "epoch": 0.5877413937867338, + "grad_norm": 0.2554239950571331, + "learning_rate": 9.429869122888375e-05, + "loss": 0.5634, "step": 2100 }, { - "epoch": 0.2940517844646606, - "grad_norm": 0.4174607888005454, - "learning_rate": 4.141430196463221e-05, - "loss": 0.542, + "epoch": 0.588021270640918, + "grad_norm": 0.3419522755702989, + "learning_rate": 9.4291535692278e-05, + "loss": 0.5568, "step": 2101 }, { - "epoch": 0.2941917424772568, - "grad_norm": 0.4330991872091227, - "learning_rate": 4.140575198101288e-05, - "loss": 0.6237, + "epoch": 0.5883011474951022, + "grad_norm": 0.22386569521000957, + "learning_rate": 9.428437593997909e-05, + "loss": 0.5492, "step": 2102 }, { - "epoch": 0.294331700489853, - "grad_norm": 0.41437624963664066, - "learning_rate": 4.139719862584506e-05, - "loss": 0.5758, + "epoch": 0.5885810243492863, + "grad_norm": 0.23803678936879716, + "learning_rate": 9.42772119726685e-05, + "loss": 0.542, "step": 2103 }, { - "epoch": 0.2944716585024493, - "grad_norm": 0.41455194934526307, - "learning_rate": 4.138864190088655e-05, - "loss": 0.5494, + "epoch": 0.5888609012034705, + "grad_norm": 0.23133509004822347, + "learning_rate": 9.427004379102809e-05, + "loss": 0.5495, "step": 2104 }, { - "epoch": 0.29461161651504547, - "grad_norm": 0.3928031972149667, - "learning_rate": 4.1380081807895846e-05, - "loss": 0.615, + "epoch": 0.5891407780576546, + "grad_norm": 0.23135027895129517, + "learning_rate": 9.426287139574012e-05, + "loss": 0.5457, "step": 2105 }, { - "epoch": 0.2947515745276417, - "grad_norm": 0.4097807146687554, - "learning_rate": 4.137151834863213e-05, - "loss": 0.5563, + "epoch": 0.5894206549118388, + "grad_norm": 0.22859640564954456, + "learning_rate": 9.42556947874873e-05, + "loss": 0.5468, "step": 2106 }, { - "epoch": 0.2948915325402379, - "grad_norm": 0.4201887917529205, - "learning_rate": 4.1362951524855275e-05, - "loss": 0.5934, + "epoch": 0.589700531766023, + "grad_norm": 0.3210496124769348, + "learning_rate": 9.424851396695267e-05, + "loss": 0.5367, "step": 2107 }, { - "epoch": 0.2950314905528342, - "grad_norm": 0.408291117857594, - "learning_rate": 4.1354381338325864e-05, - "loss": 0.6003, + "epoch": 0.5899804086202071, + "grad_norm": 0.23479468230687703, + "learning_rate": 9.42413289348197e-05, + "loss": 0.5442, "step": 2108 }, { - "epoch": 0.29517144856543037, - "grad_norm": 0.43085354542414667, - "learning_rate": 4.1345807790805144e-05, - "loss": 0.5948, + "epoch": 0.5902602854743912, + "grad_norm": 0.3436130182221491, + "learning_rate": 9.423413969177229e-05, + "loss": 0.5487, "step": 2109 }, { - "epoch": 0.29531140657802657, - "grad_norm": 0.46223900630617937, - "learning_rate": 4.1337230884055073e-05, - "loss": 0.5575, + "epoch": 0.5905401623285754, + "grad_norm": 0.23276523609630337, + "learning_rate": 9.422694623849469e-05, + "loss": 0.5769, "step": 2110 }, { - "epoch": 0.2954513645906228, - "grad_norm": 0.42857422520776206, - "learning_rate": 4.132865061983829e-05, - "loss": 0.6218, + "epoch": 0.5908200391827596, + "grad_norm": 0.22712292607444542, + "learning_rate": 9.421974857567162e-05, + "loss": 0.5743, "step": 2111 }, { - "epoch": 0.295591322603219, - "grad_norm": 0.4328798834584895, - "learning_rate": 4.132006699991813e-05, - "loss": 0.572, + "epoch": 0.5910999160369438, + "grad_norm": 0.23743817390424976, + "learning_rate": 9.421254670398811e-05, + "loss": 0.5301, "step": 2112 }, { - "epoch": 0.2957312806158153, - "grad_norm": 0.4029310476167804, - "learning_rate": 4.131148002605861e-05, - "loss": 0.5762, + "epoch": 0.5913797928911279, + "grad_norm": 0.2481966408048911, + "learning_rate": 9.420534062412966e-05, + "loss": 0.5369, "step": 2113 }, { - "epoch": 0.29587123862841147, - "grad_norm": 0.4227536522427694, - "learning_rate": 4.130288970002444e-05, - "loss": 0.5716, + "epoch": 0.591659669745312, + "grad_norm": 0.2318159884307774, + "learning_rate": 9.419813033678215e-05, + "loss": 0.5313, "step": 2114 }, { - "epoch": 0.2960111966410077, - "grad_norm": 0.3992813234221805, - "learning_rate": 4.1294296023581015e-05, - "loss": 0.5803, + "epoch": 0.5919395465994962, + "grad_norm": 0.23293490976835007, + "learning_rate": 9.419091584263184e-05, + "loss": 0.5691, "step": 2115 }, { - "epoch": 0.2961511546536039, - "grad_norm": 0.40775499827762557, - "learning_rate": 4.128569899849443e-05, - "loss": 0.5813, + "epoch": 0.5922194234536804, + "grad_norm": 0.23663246086893724, + "learning_rate": 9.418369714236546e-05, + "loss": 0.574, "step": 2116 }, { - "epoch": 0.2962911126662001, - "grad_norm": 0.4187348195942707, - "learning_rate": 4.127709862653146e-05, - "loss": 0.5915, + "epoch": 0.5924993003078646, + "grad_norm": 0.2690452252940053, + "learning_rate": 9.417647423667002e-05, + "loss": 0.5422, "step": 2117 }, { - "epoch": 0.29643107067879637, - "grad_norm": 0.43072765488595965, - "learning_rate": 4.126849490945958e-05, - "loss": 0.5869, + "epoch": 0.5927791771620488, + "grad_norm": 0.23220328242643445, + "learning_rate": 9.416924712623305e-05, + "loss": 0.5551, "step": 2118 }, { - "epoch": 0.29657102869139257, - "grad_norm": 0.40631929550004103, - "learning_rate": 4.1259887849046906e-05, - "loss": 0.5783, + "epoch": 0.5930590540162328, + "grad_norm": 0.23303329094420952, + "learning_rate": 9.416201581174242e-05, + "loss": 0.5342, "step": 2119 }, { - "epoch": 0.2967109867039888, - "grad_norm": 0.4236924640290844, - "learning_rate": 4.1251277447062315e-05, - "loss": 0.64, + "epoch": 0.593338930870417, + "grad_norm": 0.232614172873898, + "learning_rate": 9.41547802938864e-05, + "loss": 0.5583, "step": 2120 }, { - "epoch": 0.296850944716585, - "grad_norm": 0.42013017681077297, - "learning_rate": 4.124266370527531e-05, - "loss": 0.6384, + "epoch": 0.5936188077246012, + "grad_norm": 0.23504105712801626, + "learning_rate": 9.414754057335369e-05, + "loss": 0.5421, "step": 2121 }, { - "epoch": 0.29699090272918127, - "grad_norm": 0.4065769677758428, - "learning_rate": 4.123404662545611e-05, - "loss": 0.6086, + "epoch": 0.5938986845787854, + "grad_norm": 0.22532357528964758, + "learning_rate": 9.414029665083335e-05, + "loss": 0.5757, "step": 2122 }, { - "epoch": 0.29713086074177747, - "grad_norm": 0.3886256061886866, - "learning_rate": 4.1225426209375605e-05, - "loss": 0.5219, + "epoch": 0.5941785614329695, + "grad_norm": 0.23439540126628672, + "learning_rate": 9.413304852701486e-05, + "loss": 0.5539, "step": 2123 }, { - "epoch": 0.29727081875437367, - "grad_norm": 0.40830396890274406, - "learning_rate": 4.121680245880539e-05, - "loss": 0.5487, + "epoch": 0.5944584382871536, + "grad_norm": 0.23902983294180502, + "learning_rate": 9.412579620258812e-05, + "loss": 0.5524, "step": 2124 }, { - "epoch": 0.2974107767669699, - "grad_norm": 0.40457862036093745, - "learning_rate": 4.120817537551773e-05, - "loss": 0.5797, + "epoch": 0.5947383151413378, + "grad_norm": 0.24190922233112297, + "learning_rate": 9.411853967824339e-05, + "loss": 0.5617, "step": 2125 }, { - "epoch": 0.2975507347795661, - "grad_norm": 0.414048107015411, - "learning_rate": 4.1199544961285574e-05, - "loss": 0.5827, + "epoch": 0.595018191995522, + "grad_norm": 0.23759151697581649, + "learning_rate": 9.411127895467135e-05, + "loss": 0.549, "step": 2126 }, { - "epoch": 0.29769069279216237, - "grad_norm": 0.4211166151095332, - "learning_rate": 4.119091121788256e-05, - "loss": 0.5869, + "epoch": 0.5952980688497062, + "grad_norm": 0.22968291636303048, + "learning_rate": 9.410401403256312e-05, + "loss": 0.5864, "step": 2127 }, { - "epoch": 0.29783065080475857, - "grad_norm": 0.3999830413220613, - "learning_rate": 4.1182274147083e-05, - "loss": 0.5538, + "epoch": 0.5955779457038903, + "grad_norm": 0.23413373281241773, + "learning_rate": 9.409674491261014e-05, + "loss": 0.5571, "step": 2128 }, { - "epoch": 0.2979706088173548, - "grad_norm": 0.43614503719146447, - "learning_rate": 4.1173633750661915e-05, - "loss": 0.6079, + "epoch": 0.5958578225580744, + "grad_norm": 0.22989266756331098, + "learning_rate": 9.408947159550428e-05, + "loss": 0.566, "step": 2129 }, { - "epoch": 0.298110566829951, - "grad_norm": 0.4284744155784332, - "learning_rate": 4.116499003039499e-05, - "loss": 0.5669, + "epoch": 0.5961376994122586, + "grad_norm": 0.25859836940334974, + "learning_rate": 9.408219408193783e-05, + "loss": 0.5415, "step": 2130 }, { - "epoch": 0.2982505248425472, - "grad_norm": 0.4178472294126034, - "learning_rate": 4.1156342988058606e-05, - "loss": 0.5466, + "epoch": 0.5964175762664428, + "grad_norm": 0.22716914480550707, + "learning_rate": 9.40749123726035e-05, + "loss": 0.5399, "step": 2131 }, { - "epoch": 0.29839048285514347, - "grad_norm": 0.4104945666216928, - "learning_rate": 4.114769262542979e-05, - "loss": 0.5566, + "epoch": 0.5966974531206269, + "grad_norm": 0.23777124759659524, + "learning_rate": 9.406762646819433e-05, + "loss": 0.5438, "step": 2132 }, { - "epoch": 0.29853044086773967, - "grad_norm": 0.5142114902119391, - "learning_rate": 4.113903894428632e-05, - "loss": 0.5706, + "epoch": 0.5969773299748111, + "grad_norm": 0.23100395831119283, + "learning_rate": 9.406033636940378e-05, + "loss": 0.5525, "step": 2133 }, { - "epoch": 0.2986703988803359, - "grad_norm": 0.42568394409367394, - "learning_rate": 4.113038194640658e-05, - "loss": 0.5772, + "epoch": 0.5972572068289952, + "grad_norm": 0.24430225003224879, + "learning_rate": 9.40530420769258e-05, + "loss": 0.5706, "step": 2134 }, { - "epoch": 0.2988103568929321, - "grad_norm": 0.4072689251705309, - "learning_rate": 4.112172163356969e-05, - "loss": 0.5839, + "epoch": 0.5975370836831794, + "grad_norm": 0.34331338348630325, + "learning_rate": 9.404574359145459e-05, + "loss": 0.5635, "step": 2135 }, { - "epoch": 0.2989503149055283, - "grad_norm": 0.43230154146342653, - "learning_rate": 4.1113058007555415e-05, - "loss": 0.5762, + "epoch": 0.5978169605373636, + "grad_norm": 0.2369012608363964, + "learning_rate": 9.403844091368486e-05, + "loss": 0.5631, "step": 2136 }, { - "epoch": 0.29909027291812457, - "grad_norm": 0.42221307246185413, - "learning_rate": 4.110439107014423e-05, - "loss": 0.5854, + "epoch": 0.5980968373915477, + "grad_norm": 0.23748420549451538, + "learning_rate": 9.403113404431167e-05, + "loss": 0.5614, "step": 2137 }, { - "epoch": 0.29923023093072076, - "grad_norm": 0.4115485110413119, - "learning_rate": 4.109572082311729e-05, - "loss": 0.6144, + "epoch": 0.5983767142457319, + "grad_norm": 0.23140857941781828, + "learning_rate": 9.40238229840305e-05, + "loss": 0.5961, "step": 2138 }, { - "epoch": 0.299370188943317, - "grad_norm": 0.4381710550273501, - "learning_rate": 4.10870472682564e-05, - "loss": 0.5764, + "epoch": 0.5986565910999161, + "grad_norm": 0.24126547630721637, + "learning_rate": 9.401650773353721e-05, + "loss": 0.5721, "step": 2139 }, { - "epoch": 0.2995101469559132, - "grad_norm": 0.40672792105491173, - "learning_rate": 4.107837040734407e-05, - "loss": 0.5623, + "epoch": 0.5989364679541002, + "grad_norm": 0.22664415941602947, + "learning_rate": 9.400918829352807e-05, + "loss": 0.5742, "step": 2140 }, { - "epoch": 0.29965010496850947, - "grad_norm": 0.4230148958590366, - "learning_rate": 4.1069690242163484e-05, - "loss": 0.6007, + "epoch": 0.5992163448082843, + "grad_norm": 0.22951037537087327, + "learning_rate": 9.400186466469978e-05, + "loss": 0.5562, "step": 2141 }, { - "epoch": 0.29979006298110566, - "grad_norm": 0.42487403889075154, - "learning_rate": 4.106100677449851e-05, - "loss": 0.5952, + "epoch": 0.5994962216624685, + "grad_norm": 0.22282961316159394, + "learning_rate": 9.399453684774937e-05, + "loss": 0.5396, "step": 2142 }, { - "epoch": 0.29993002099370186, - "grad_norm": 0.41800613830620925, - "learning_rate": 4.105232000613367e-05, - "loss": 0.5814, + "epoch": 0.5997760985166527, + "grad_norm": 0.23344191933282923, + "learning_rate": 9.398720484337431e-05, + "loss": 0.5694, "step": 2143 }, { - "epoch": 0.3000699790062981, - "grad_norm": 0.42812390915287574, - "learning_rate": 4.10436299388542e-05, - "loss": 0.6044, + "epoch": 0.6000559753708369, + "grad_norm": 0.226237346259715, + "learning_rate": 9.397986865227248e-05, + "loss": 0.5695, "step": 2144 }, { - "epoch": 0.3002099370188943, - "grad_norm": 0.42630144567604744, - "learning_rate": 4.103493657444599e-05, - "loss": 0.5458, + "epoch": 0.600335852225021, + "grad_norm": 0.23347717906049928, + "learning_rate": 9.397252827514214e-05, + "loss": 0.5515, "step": 2145 }, { - "epoch": 0.30034989503149057, - "grad_norm": 0.40026869359194683, - "learning_rate": 4.1026239914695617e-05, - "loss": 0.567, + "epoch": 0.6006157290792051, + "grad_norm": 0.23303486001101228, + "learning_rate": 9.396518371268192e-05, + "loss": 0.5504, "step": 2146 }, { - "epoch": 0.30048985304408676, - "grad_norm": 0.4045104989308898, - "learning_rate": 4.101753996139033e-05, - "loss": 0.5573, + "epoch": 0.6008956059333893, + "grad_norm": 0.22700010739843296, + "learning_rate": 9.395783496559094e-05, + "loss": 0.5554, "step": 2147 }, { - "epoch": 0.300629811056683, - "grad_norm": 0.39480816764383886, - "learning_rate": 4.100883671631806e-05, - "loss": 0.5554, + "epoch": 0.6011754827875735, + "grad_norm": 0.22397350220572565, + "learning_rate": 9.395048203456861e-05, + "loss": 0.5552, "step": 2148 }, { - "epoch": 0.3007697690692792, - "grad_norm": 0.4359485340891318, - "learning_rate": 4.100013018126742e-05, - "loss": 0.5306, + "epoch": 0.6014553596417577, + "grad_norm": 0.22672012483051324, + "learning_rate": 9.394312492031479e-05, + "loss": 0.544, "step": 2149 }, { - "epoch": 0.3009097270818754, - "grad_norm": 0.44274809745131777, - "learning_rate": 4.099142035802767e-05, - "loss": 0.6069, + "epoch": 0.6017352364959417, + "grad_norm": 0.24590612945582327, + "learning_rate": 9.393576362352977e-05, + "loss": 0.5519, "step": 2150 }, { - "epoch": 0.30104968509447166, - "grad_norm": 0.4010134732813477, - "learning_rate": 4.098270724838879e-05, - "loss": 0.5737, + "epoch": 0.6020151133501259, + "grad_norm": 0.2412160023677888, + "learning_rate": 9.392839814491416e-05, + "loss": 0.5577, "step": 2151 }, { - "epoch": 0.30118964310706786, - "grad_norm": 0.42583763567956523, - "learning_rate": 4.09739908541414e-05, - "loss": 0.5646, + "epoch": 0.6022949902043101, + "grad_norm": 0.23613718554981, + "learning_rate": 9.392102848516901e-05, + "loss": 0.5701, "step": 2152 }, { - "epoch": 0.3013296011196641, - "grad_norm": 0.43076744664262784, - "learning_rate": 4.096527117707681e-05, - "loss": 0.6089, + "epoch": 0.6025748670584943, + "grad_norm": 0.23046699755852104, + "learning_rate": 9.391365464499581e-05, + "loss": 0.5497, "step": 2153 }, { - "epoch": 0.3014695591322603, - "grad_norm": 0.41900293437499037, - "learning_rate": 4.0956548218987004e-05, - "loss": 0.5818, + "epoch": 0.6028547439126785, + "grad_norm": 0.22353637618584268, + "learning_rate": 9.390627662509637e-05, + "loss": 0.5656, "step": 2154 }, { - "epoch": 0.30160951714485656, - "grad_norm": 0.441326688768721, - "learning_rate": 4.094782198166464e-05, - "loss": 0.601, + "epoch": 0.6031346207668625, + "grad_norm": 0.22387346115727752, + "learning_rate": 9.389889442617295e-05, + "loss": 0.5546, "step": 2155 }, { - "epoch": 0.30174947515745276, - "grad_norm": 0.41905876085866123, - "learning_rate": 4.093909246690303e-05, - "loss": 0.5694, + "epoch": 0.6034144976210467, + "grad_norm": 0.2251342211026499, + "learning_rate": 9.389150804892819e-05, + "loss": 0.5858, "step": 2156 }, { - "epoch": 0.30188943317004896, - "grad_norm": 0.40710066709424864, - "learning_rate": 4.093035967649621e-05, - "loss": 0.5571, + "epoch": 0.6036943744752309, + "grad_norm": 0.22973229416437546, + "learning_rate": 9.388411749406512e-05, + "loss": 0.5415, "step": 2157 }, { - "epoch": 0.3020293911826452, - "grad_norm": 0.41211015638134546, - "learning_rate": 4.092162361223884e-05, - "loss": 0.6152, + "epoch": 0.6039742513294151, + "grad_norm": 0.23055150324750887, + "learning_rate": 9.387672276228719e-05, + "loss": 0.5374, "step": 2158 }, { - "epoch": 0.3021693491952414, - "grad_norm": 0.42106082661170496, - "learning_rate": 4.091288427592626e-05, - "loss": 0.5927, + "epoch": 0.6042541281835992, + "grad_norm": 0.23143025596635663, + "learning_rate": 9.386932385429822e-05, + "loss": 0.5583, "step": 2159 }, { - "epoch": 0.30230930720783766, - "grad_norm": 0.4141524195584368, - "learning_rate": 4.0904141669354504e-05, - "loss": 0.6316, + "epoch": 0.6045340050377834, + "grad_norm": 0.21611838843533412, + "learning_rate": 9.386192077080245e-05, + "loss": 0.5294, "step": 2160 }, { - "epoch": 0.30244926522043386, - "grad_norm": 0.44132339838745016, - "learning_rate": 4.0895395794320265e-05, - "loss": 0.5985, + "epoch": 0.6048138818919675, + "grad_norm": 0.2137986906121095, + "learning_rate": 9.385451351250452e-05, + "loss": 0.5515, "step": 2161 }, { - "epoch": 0.3025892232330301, - "grad_norm": 0.4276646446648299, - "learning_rate": 4.088664665262091e-05, - "loss": 0.6097, + "epoch": 0.6050937587461517, + "grad_norm": 0.23435303738485605, + "learning_rate": 9.384710208010945e-05, + "loss": 0.5687, "step": 2162 }, { - "epoch": 0.3027291812456263, - "grad_norm": 0.4150132159301798, - "learning_rate": 4.087789424605447e-05, - "loss": 0.5909, + "epoch": 0.6053736356003359, + "grad_norm": 0.2180667344257166, + "learning_rate": 9.383968647432265e-05, + "loss": 0.5324, "step": 2163 }, { - "epoch": 0.3028691392582225, - "grad_norm": 0.39307522840638937, - "learning_rate": 4.0869138576419665e-05, - "loss": 0.5914, + "epoch": 0.60565351245452, + "grad_norm": 0.2282908475437685, + "learning_rate": 9.383226669584995e-05, + "loss": 0.5349, "step": 2164 }, { - "epoch": 0.30300909727081876, - "grad_norm": 0.38594672832612004, - "learning_rate": 4.086037964551587e-05, - "loss": 0.5607, + "epoch": 0.6059333893087042, + "grad_norm": 0.22937195448161374, + "learning_rate": 9.382484274539758e-05, + "loss": 0.5628, "step": 2165 }, { - "epoch": 0.30314905528341496, - "grad_norm": 0.4044083531551891, - "learning_rate": 4.085161745514312e-05, - "loss": 0.5844, + "epoch": 0.6062132661628883, + "grad_norm": 0.23805023067234415, + "learning_rate": 9.381741462367215e-05, + "loss": 0.5644, "step": 2166 }, { - "epoch": 0.3032890132960112, - "grad_norm": 0.42910899298854893, - "learning_rate": 4.084285200710215e-05, - "loss": 0.5998, + "epoch": 0.6064931430170725, + "grad_norm": 0.23109045218425225, + "learning_rate": 9.380998233138068e-05, + "loss": 0.5277, "step": 2167 }, { - "epoch": 0.3034289713086074, - "grad_norm": 0.4246336566951562, - "learning_rate": 4.083408330319435e-05, - "loss": 0.5741, + "epoch": 0.6067730198712566, + "grad_norm": 0.23104619170007182, + "learning_rate": 9.380254586923056e-05, + "loss": 0.5651, "step": 2168 }, { - "epoch": 0.30356892932120366, - "grad_norm": 0.4257821385233403, - "learning_rate": 4.082531134522176e-05, - "loss": 0.6026, + "epoch": 0.6070528967254408, + "grad_norm": 0.2343794987860335, + "learning_rate": 9.379510523792961e-05, + "loss": 0.5873, "step": 2169 }, { - "epoch": 0.30370888733379986, - "grad_norm": 0.4191758702628161, - "learning_rate": 4.0816536134987124e-05, - "loss": 0.6057, + "epoch": 0.607332773579625, + "grad_norm": 0.2185645973439327, + "learning_rate": 9.378766043818601e-05, + "loss": 0.5522, "step": 2170 }, { - "epoch": 0.30384884534639606, - "grad_norm": 0.4038665214466415, - "learning_rate": 4.0807757674293834e-05, - "loss": 0.5794, + "epoch": 0.6076126504338091, + "grad_norm": 0.22412756839460132, + "learning_rate": 9.37802114707084e-05, + "loss": 0.5506, "step": 2171 }, { - "epoch": 0.3039888033589923, - "grad_norm": 0.38253864598092674, - "learning_rate": 4.079897596494594e-05, - "loss": 0.5572, + "epoch": 0.6078925272879933, + "grad_norm": 0.2144459810271091, + "learning_rate": 9.377275833620576e-05, + "loss": 0.5271, "step": 2172 }, { - "epoch": 0.3041287613715885, - "grad_norm": 0.40057781747525356, - "learning_rate": 4.0790191008748193e-05, - "loss": 0.5757, + "epoch": 0.6081724041421774, + "grad_norm": 0.2109310703601954, + "learning_rate": 9.376530103538748e-05, + "loss": 0.5454, "step": 2173 }, { - "epoch": 0.30426871938418476, - "grad_norm": 0.4425784348877834, - "learning_rate": 4.078140280750597e-05, - "loss": 0.5909, + "epoch": 0.6084522809963616, + "grad_norm": 0.22814283994446383, + "learning_rate": 9.375783956896333e-05, + "loss": 0.5517, "step": 2174 }, { - "epoch": 0.30440867739678096, - "grad_norm": 0.43807792095543074, - "learning_rate": 4.077261136302536e-05, - "loss": 0.5713, + "epoch": 0.6087321578505458, + "grad_norm": 0.21725478847051968, + "learning_rate": 9.375037393764355e-05, + "loss": 0.5472, "step": 2175 }, { - "epoch": 0.3045486354093772, - "grad_norm": 0.42494113081632956, - "learning_rate": 4.0763816677113064e-05, - "loss": 0.5599, + "epoch": 0.6090120347047299, + "grad_norm": 0.224529319802153, + "learning_rate": 9.374290414213867e-05, + "loss": 0.5483, "step": 2176 }, { - "epoch": 0.3046885934219734, - "grad_norm": 0.4147870888377848, - "learning_rate": 4.07550187515765e-05, - "loss": 0.5958, + "epoch": 0.609291911558914, + "grad_norm": 0.22342063582437635, + "learning_rate": 9.373543018315969e-05, + "loss": 0.547, "step": 2177 }, { - "epoch": 0.3048285514345696, - "grad_norm": 0.44106090618207494, - "learning_rate": 4.0746217588223724e-05, - "loss": 0.5719, + "epoch": 0.6095717884130982, + "grad_norm": 0.2358182591902062, + "learning_rate": 9.372795206141798e-05, + "loss": 0.5509, "step": 2178 }, { - "epoch": 0.30496850944716586, - "grad_norm": 0.43022335538791223, - "learning_rate": 4.073741318886347e-05, - "loss": 0.5666, + "epoch": 0.6098516652672824, + "grad_norm": 0.22835904537672128, + "learning_rate": 9.372046977762533e-05, + "loss": 0.5542, "step": 2179 }, { - "epoch": 0.30510846745976206, - "grad_norm": 0.422655488810057, - "learning_rate": 4.072860555530512e-05, - "loss": 0.5725, + "epoch": 0.6101315421214666, + "grad_norm": 0.2301374730540312, + "learning_rate": 9.37129833324939e-05, + "loss": 0.5374, "step": 2180 }, { - "epoch": 0.3052484254723583, - "grad_norm": 0.41560143611944184, - "learning_rate": 4.071979468935874e-05, - "loss": 0.5707, + "epoch": 0.6104114189756508, + "grad_norm": 0.21823760601817832, + "learning_rate": 9.370549272673623e-05, + "loss": 0.543, "step": 2181 }, { - "epoch": 0.3053883834849545, - "grad_norm": 0.40831478451563363, - "learning_rate": 4.071098059283505e-05, - "loss": 0.5724, + "epoch": 0.6106912958298348, + "grad_norm": 0.24705484430025587, + "learning_rate": 9.369799796106531e-05, + "loss": 0.5657, "step": 2182 }, { - "epoch": 0.30552834149755076, - "grad_norm": 0.45412422141258324, - "learning_rate": 4.070216326754544e-05, - "loss": 0.5914, + "epoch": 0.610971172684019, + "grad_norm": 0.23558866131221204, + "learning_rate": 9.36904990361945e-05, + "loss": 0.5522, "step": 2183 }, { - "epoch": 0.30566829951014696, - "grad_norm": 0.4238846330415573, - "learning_rate": 4.069334271530196e-05, - "loss": 0.5852, + "epoch": 0.6112510495382032, + "grad_norm": 0.2413614541787735, + "learning_rate": 9.368299595283751e-05, + "loss": 0.5585, "step": 2184 }, { - "epoch": 0.30580825752274315, - "grad_norm": 0.4168947592800387, - "learning_rate": 4.0684518937917315e-05, - "loss": 0.5668, + "epoch": 0.6115309263923874, + "grad_norm": 0.23405538131421783, + "learning_rate": 9.367548871170853e-05, + "loss": 0.5702, "step": 2185 }, { - "epoch": 0.3059482155353394, - "grad_norm": 0.3974079353132702, - "learning_rate": 4.0675691937204886e-05, - "loss": 0.5796, + "epoch": 0.6118108032465716, + "grad_norm": 0.22141123795458678, + "learning_rate": 9.366797731352209e-05, + "loss": 0.5319, "step": 2186 }, { - "epoch": 0.3060881735479356, - "grad_norm": 0.447707780183876, - "learning_rate": 4.0666861714978724e-05, - "loss": 0.6053, + "epoch": 0.6120906801007556, + "grad_norm": 0.2225962903572838, + "learning_rate": 9.366046175899311e-05, + "loss": 0.5688, "step": 2187 }, { - "epoch": 0.30622813156053186, - "grad_norm": 0.4241731286712751, - "learning_rate": 4.06580282730535e-05, - "loss": 0.5433, + "epoch": 0.6123705569549398, + "grad_norm": 0.22959480058064163, + "learning_rate": 9.365294204883696e-05, + "loss": 0.5749, "step": 2188 }, { - "epoch": 0.30636808957312806, - "grad_norm": 0.40634600846513996, - "learning_rate": 4.0649191613244596e-05, - "loss": 0.5778, + "epoch": 0.612650433809124, + "grad_norm": 0.23519338282340332, + "learning_rate": 9.364541818376934e-05, + "loss": 0.5711, "step": 2189 }, { - "epoch": 0.3065080475857243, - "grad_norm": 0.40317717922477264, - "learning_rate": 4.064035173736804e-05, - "loss": 0.5654, + "epoch": 0.6129303106633082, + "grad_norm": 0.24498715468104948, + "learning_rate": 9.36378901645064e-05, + "loss": 0.5305, "step": 2190 }, { - "epoch": 0.3066480055983205, - "grad_norm": 0.40439388741840737, - "learning_rate": 4.06315086472405e-05, - "loss": 0.554, + "epoch": 0.6132101875174923, + "grad_norm": 1.2655857992676953, + "learning_rate": 9.363035799176463e-05, + "loss": 0.5614, "step": 2191 }, { - "epoch": 0.3067879636109167, - "grad_norm": 0.40052999103675635, - "learning_rate": 4.062266234467933e-05, - "loss": 0.5633, + "epoch": 0.6134900643716764, + "grad_norm": 0.23654286776943562, + "learning_rate": 9.362282166626098e-05, + "loss": 0.5686, "step": 2192 }, { - "epoch": 0.30692792162351296, - "grad_norm": 0.39039005393866494, - "learning_rate": 4.061381283150254e-05, - "loss": 0.5712, + "epoch": 0.6137699412258606, + "grad_norm": 0.22742679400884647, + "learning_rate": 9.361528118871274e-05, + "loss": 0.5432, "step": 2193 }, { - "epoch": 0.30706787963610915, - "grad_norm": 0.4452309708879627, - "learning_rate": 4.0604960109528786e-05, - "loss": 0.5951, + "epoch": 0.6140498180800448, + "grad_norm": 0.24458561478471183, + "learning_rate": 9.360773655983763e-05, + "loss": 0.5544, "step": 2194 }, { - "epoch": 0.3072078376487054, - "grad_norm": 0.4316897315393044, - "learning_rate": 4.059610418057739e-05, - "loss": 0.5643, + "epoch": 0.614329694934229, + "grad_norm": 0.2421957545425036, + "learning_rate": 9.360018778035375e-05, + "loss": 0.5336, "step": 2195 }, { - "epoch": 0.3073477956613016, - "grad_norm": 0.43497901635487507, - "learning_rate": 4.058724504646834e-05, - "loss": 0.6104, + "epoch": 0.6146095717884131, + "grad_norm": 0.23387462121446417, + "learning_rate": 9.35926348509796e-05, + "loss": 0.5727, "step": 2196 }, { - "epoch": 0.30748775367389786, - "grad_norm": 0.3971370299202904, - "learning_rate": 4.057838270902228e-05, - "loss": 0.5678, + "epoch": 0.6148894486425972, + "grad_norm": 0.22734956188694808, + "learning_rate": 9.358507777243403e-05, + "loss": 0.537, "step": 2197 }, { - "epoch": 0.30762771168649405, - "grad_norm": 0.4316780175497349, - "learning_rate": 4.056951717006051e-05, - "loss": 0.6105, + "epoch": 0.6151693254967814, + "grad_norm": 0.223586012193236, + "learning_rate": 9.35775165454364e-05, + "loss": 0.5297, "step": 2198 }, { - "epoch": 0.30776766969909025, - "grad_norm": 0.39695048154674517, - "learning_rate": 4.056064843140498e-05, - "loss": 0.5726, + "epoch": 0.6154492023509656, + "grad_norm": 0.23523420171430215, + "learning_rate": 9.356995117070634e-05, + "loss": 0.5578, "step": 2199 }, { - "epoch": 0.3079076277116865, - "grad_norm": 0.41439706383436253, - "learning_rate": 4.0551776494878316e-05, - "loss": 0.568, + "epoch": 0.6157290792051497, + "grad_norm": 0.4100477560788732, + "learning_rate": 9.356238164896393e-05, + "loss": 0.5489, "step": 2200 }, { - "epoch": 0.3080475857242827, - "grad_norm": 0.4191940957424624, - "learning_rate": 4.054290136230379e-05, - "loss": 0.6245, + "epoch": 0.6160089560593339, + "grad_norm": 0.2462234975316756, + "learning_rate": 9.355480798092967e-05, + "loss": 0.5741, "step": 2201 }, { - "epoch": 0.30818754373687895, - "grad_norm": 0.4143832100728591, - "learning_rate": 4.053402303550533e-05, - "loss": 0.5795, + "epoch": 0.6162888329135181, + "grad_norm": 0.23506519804551732, + "learning_rate": 9.35472301673244e-05, + "loss": 0.5601, "step": 2202 }, { - "epoch": 0.30832750174947515, - "grad_norm": 0.3915343927508335, - "learning_rate": 4.052514151630752e-05, - "loss": 0.5543, + "epoch": 0.6165687097677022, + "grad_norm": 0.2526039051321268, + "learning_rate": 9.353964820886938e-05, + "loss": 0.5493, "step": 2203 }, { - "epoch": 0.3084674597620714, - "grad_norm": 0.41805518170359557, - "learning_rate": 4.0516256806535594e-05, - "loss": 0.5891, + "epoch": 0.6168485866218864, + "grad_norm": 0.238125342569292, + "learning_rate": 9.353206210628629e-05, + "loss": 0.5553, "step": 2204 }, { - "epoch": 0.3086074177746676, - "grad_norm": 0.41287829384767516, - "learning_rate": 4.050736890801547e-05, - "loss": 0.569, + "epoch": 0.6171284634760705, + "grad_norm": 0.2326993166713717, + "learning_rate": 9.352447186029714e-05, + "loss": 0.5836, "step": 2205 }, { - "epoch": 0.3087473757872638, - "grad_norm": 0.4029096483520546, - "learning_rate": 4.049847782257369e-05, - "loss": 0.5859, + "epoch": 0.6174083403302547, + "grad_norm": 0.23815229620152004, + "learning_rate": 9.35168774716244e-05, + "loss": 0.5503, "step": 2206 }, { - "epoch": 0.30888733379986005, - "grad_norm": 0.39417183897543207, - "learning_rate": 4.048958355203746e-05, - "loss": 0.5282, + "epoch": 0.6176882171844389, + "grad_norm": 0.23278379399657392, + "learning_rate": 9.350927894099093e-05, + "loss": 0.5447, "step": 2207 }, { - "epoch": 0.30902729181245625, - "grad_norm": 0.3952415992081539, - "learning_rate": 4.048068609823464e-05, - "loss": 0.5668, + "epoch": 0.617968094038623, + "grad_norm": 0.23129700841609557, + "learning_rate": 9.350167626911993e-05, + "loss": 0.5619, "step": 2208 }, { - "epoch": 0.3091672498250525, - "grad_norm": 0.4072747277650614, - "learning_rate": 4.047178546299376e-05, - "loss": 0.5977, + "epoch": 0.6182479708928071, + "grad_norm": 0.232947905522226, + "learning_rate": 9.349406945673502e-05, + "loss": 0.5465, "step": 2209 }, { - "epoch": 0.3093072078376487, - "grad_norm": 0.4390729532532709, - "learning_rate": 4.0462881648143977e-05, - "loss": 0.6085, + "epoch": 0.6185278477469913, + "grad_norm": 0.22936914223681212, + "learning_rate": 9.348645850456024e-05, + "loss": 0.5316, "step": 2210 }, { - "epoch": 0.30944716585024495, - "grad_norm": 0.41550618366447567, - "learning_rate": 4.045397465551513e-05, - "loss": 0.5501, + "epoch": 0.6188077246011755, + "grad_norm": 0.23364232892207495, + "learning_rate": 9.347884341332e-05, + "loss": 0.5464, "step": 2211 }, { - "epoch": 0.30958712386284115, - "grad_norm": 0.42949098915048817, - "learning_rate": 4.044506448693769e-05, - "loss": 0.5786, + "epoch": 0.6190876014553597, + "grad_norm": 0.24271945753437266, + "learning_rate": 9.34712241837391e-05, + "loss": 0.5371, "step": 2212 }, { - "epoch": 0.30972708187543735, - "grad_norm": 0.4180486476641866, - "learning_rate": 4.0436151144242776e-05, - "loss": 0.5759, + "epoch": 0.6193674783095438, + "grad_norm": 0.2201927376784854, + "learning_rate": 9.346360081654275e-05, + "loss": 0.5659, "step": 2213 }, { - "epoch": 0.3098670398880336, - "grad_norm": 0.4151740836794609, - "learning_rate": 4.042723462926219e-05, - "loss": 0.5858, + "epoch": 0.6196473551637279, + "grad_norm": 0.2364922733794689, + "learning_rate": 9.345597331245657e-05, + "loss": 0.5526, "step": 2214 }, { - "epoch": 0.3100069979006298, - "grad_norm": 0.41105102208793937, - "learning_rate": 4.041831494382835e-05, - "loss": 0.5285, + "epoch": 0.6199272320179121, + "grad_norm": 0.22070329983117307, + "learning_rate": 9.344834167220651e-05, + "loss": 0.5343, "step": 2215 }, { - "epoch": 0.31014695591322605, - "grad_norm": 0.40516018829246475, - "learning_rate": 4.040939208977435e-05, - "loss": 0.55, + "epoch": 0.6202071088720963, + "grad_norm": 0.2354897372974203, + "learning_rate": 9.344070589651897e-05, + "loss": 0.5697, "step": 2216 }, { - "epoch": 0.31028691392582225, - "grad_norm": 0.42729080222511606, - "learning_rate": 4.040046606893392e-05, - "loss": 0.5872, + "epoch": 0.6204869857262805, + "grad_norm": 0.22433146622437808, + "learning_rate": 9.343306598612071e-05, + "loss": 0.5523, "step": 2217 }, { - "epoch": 0.31042687193841845, - "grad_norm": 0.4226725585719995, - "learning_rate": 4.039153688314145e-05, - "loss": 0.5882, + "epoch": 0.6207668625804645, + "grad_norm": 0.22235688023135813, + "learning_rate": 9.342542194173896e-05, + "loss": 0.5618, "step": 2218 }, { - "epoch": 0.3105668299510147, - "grad_norm": 0.40702190282971207, - "learning_rate": 4.0382604534232e-05, - "loss": 0.5968, + "epoch": 0.6210467394346487, + "grad_norm": 0.22201231251278364, + "learning_rate": 9.341777376410122e-05, + "loss": 0.5366, "step": 2219 }, { - "epoch": 0.3107067879636109, - "grad_norm": 0.4266606910520138, - "learning_rate": 4.0373669024041226e-05, - "loss": 0.5822, + "epoch": 0.6213266162888329, + "grad_norm": 0.21543991405034435, + "learning_rate": 9.341012145393547e-05, + "loss": 0.5506, "step": 2220 }, { - "epoch": 0.31084674597620715, - "grad_norm": 0.4163690162383508, - "learning_rate": 4.0364730354405475e-05, - "loss": 0.5829, + "epoch": 0.6216064931430171, + "grad_norm": 0.22190770928380732, + "learning_rate": 9.340246501197005e-05, + "loss": 0.5809, "step": 2221 }, { - "epoch": 0.31098670398880335, - "grad_norm": 0.40577915115104807, - "learning_rate": 4.035578852716175e-05, - "loss": 0.5733, + "epoch": 0.6218863699972013, + "grad_norm": 0.22244013944186045, + "learning_rate": 9.339480443893374e-05, + "loss": 0.511, "step": 2222 }, { - "epoch": 0.3111266620013996, - "grad_norm": 0.4011948223027502, - "learning_rate": 4.034684354414767e-05, - "loss": 0.6159, + "epoch": 0.6221662468513854, + "grad_norm": 0.22395845487584806, + "learning_rate": 9.338713973555564e-05, + "loss": 0.5602, "step": 2223 }, { - "epoch": 0.3112666200139958, - "grad_norm": 0.40735551230540823, - "learning_rate": 4.0337895407201527e-05, - "loss": 0.5897, + "epoch": 0.6224461237055695, + "grad_norm": 0.23139619395019506, + "learning_rate": 9.33794709025653e-05, + "loss": 0.5741, "step": 2224 }, { - "epoch": 0.311406578026592, - "grad_norm": 0.4007832693673057, - "learning_rate": 4.0328944118162255e-05, - "loss": 0.5823, + "epoch": 0.6227260005597537, + "grad_norm": 0.23752388942609212, + "learning_rate": 9.337179794069264e-05, + "loss": 0.5286, "step": 2225 }, { - "epoch": 0.31154653603918825, - "grad_norm": 0.4220826406609178, - "learning_rate": 4.0319989678869426e-05, - "loss": 0.603, + "epoch": 0.6230058774139379, + "grad_norm": 0.2274071498186889, + "learning_rate": 9.336412085066798e-05, + "loss": 0.5483, "step": 2226 }, { - "epoch": 0.31168649405178445, - "grad_norm": 0.42993833632780304, - "learning_rate": 4.031103209116328e-05, - "loss": 0.5959, + "epoch": 0.623285754268122, + "grad_norm": 0.22486488581133954, + "learning_rate": 9.335643963322203e-05, + "loss": 0.5243, "step": 2227 }, { - "epoch": 0.3118264520643807, - "grad_norm": 0.405630507124848, - "learning_rate": 4.030207135688468e-05, - "loss": 0.5846, + "epoch": 0.6235656311223062, + "grad_norm": 0.2487354662580747, + "learning_rate": 9.334875428908588e-05, + "loss": 0.5429, "step": 2228 }, { - "epoch": 0.3119664100769769, - "grad_norm": 0.40554860006169224, - "learning_rate": 4.029310747787516e-05, - "loss": 0.5809, + "epoch": 0.6238455079764903, + "grad_norm": 0.22803775898340814, + "learning_rate": 9.334106481899102e-05, + "loss": 0.5576, "step": 2229 }, { - "epoch": 0.31210636808957315, - "grad_norm": 0.38272639115415097, - "learning_rate": 4.028414045597688e-05, - "loss": 0.5554, + "epoch": 0.6241253848306745, + "grad_norm": 0.22072757617052552, + "learning_rate": 9.333337122366937e-05, + "loss": 0.5417, "step": 2230 }, { - "epoch": 0.31224632610216935, - "grad_norm": 0.4133830894332048, - "learning_rate": 4.027517029303266e-05, - "loss": 0.5844, + "epoch": 0.6244052616848587, + "grad_norm": 0.234083176200024, + "learning_rate": 9.332567350385317e-05, + "loss": 0.5316, "step": 2231 }, { - "epoch": 0.31238628411476554, - "grad_norm": 0.40469368564073166, - "learning_rate": 4.0266196990885955e-05, - "loss": 0.5692, + "epoch": 0.6246851385390428, + "grad_norm": 0.2325929584790608, + "learning_rate": 9.331797166027514e-05, + "loss": 0.5368, "step": 2232 }, { - "epoch": 0.3125262421273618, - "grad_norm": 0.43419090564140506, - "learning_rate": 4.025722055138087e-05, - "loss": 0.5766, + "epoch": 0.624965015393227, + "grad_norm": 0.22818549979562075, + "learning_rate": 9.331026569366832e-05, + "loss": 0.5373, "step": 2233 }, { - "epoch": 0.312666200139958, - "grad_norm": 0.4192206941532246, - "learning_rate": 4.0248240976362154e-05, - "loss": 0.5916, + "epoch": 0.6252448922474111, + "grad_norm": 0.22432409845951193, + "learning_rate": 9.330255560476616e-05, + "loss": 0.5415, "step": 2234 }, { - "epoch": 0.31280615815255425, - "grad_norm": 0.40494171533150775, - "learning_rate": 4.02392582676752e-05, - "loss": 0.5405, + "epoch": 0.6255247691015953, + "grad_norm": 0.23425028225855624, + "learning_rate": 9.329484139430252e-05, + "loss": 0.5431, "step": 2235 }, { - "epoch": 0.31294611616515045, - "grad_norm": 0.41642501232810825, - "learning_rate": 4.023027242716606e-05, - "loss": 0.5658, + "epoch": 0.6258046459557794, + "grad_norm": 0.22186903257274523, + "learning_rate": 9.328712306301163e-05, + "loss": 0.5281, "step": 2236 }, { - "epoch": 0.3130860741777467, - "grad_norm": 0.4179541672168489, - "learning_rate": 4.022128345668139e-05, - "loss": 0.5747, + "epoch": 0.6260845228099636, + "grad_norm": 0.25764056170581956, + "learning_rate": 9.327940061162817e-05, + "loss": 0.5481, "step": 2237 }, { - "epoch": 0.3132260321903429, - "grad_norm": 0.42437584151566893, - "learning_rate": 4.021229135806853e-05, - "loss": 0.5839, + "epoch": 0.6263643996641478, + "grad_norm": 0.2219924366960302, + "learning_rate": 9.32716740408871e-05, + "loss": 0.5398, "step": 2238 }, { - "epoch": 0.3133659902029391, - "grad_norm": 0.39708478174893375, - "learning_rate": 4.020329613317545e-05, - "loss": 0.5491, + "epoch": 0.626644276518332, + "grad_norm": 0.24346931514182896, + "learning_rate": 9.326394335152391e-05, + "loss": 0.5654, "step": 2239 }, { - "epoch": 0.31350594821553535, - "grad_norm": 0.40589223534794583, - "learning_rate": 4.0194297783850755e-05, - "loss": 0.5767, + "epoch": 0.6269241533725161, + "grad_norm": 0.22636605160474477, + "learning_rate": 9.325620854427435e-05, + "loss": 0.5421, "step": 2240 }, { - "epoch": 0.31364590622813154, - "grad_norm": 0.4186147684647929, - "learning_rate": 4.018529631194369e-05, - "loss": 0.5754, + "epoch": 0.6272040302267002, + "grad_norm": 0.22340805320347967, + "learning_rate": 9.324846961987466e-05, + "loss": 0.5583, "step": 2241 }, { - "epoch": 0.3137858642407278, - "grad_norm": 0.4066652101804317, - "learning_rate": 4.017629171930416e-05, - "loss": 0.5655, + "epoch": 0.6274839070808844, + "grad_norm": 0.21464910424219602, + "learning_rate": 9.324072657906142e-05, + "loss": 0.5471, "step": 2242 }, { - "epoch": 0.313925822253324, - "grad_norm": 0.4383552206858084, - "learning_rate": 4.01672840077827e-05, - "loss": 0.5829, + "epoch": 0.6277637839350686, + "grad_norm": 0.22745971241035404, + "learning_rate": 9.323297942257163e-05, + "loss": 0.5451, "step": 2243 }, { - "epoch": 0.31406578026592025, - "grad_norm": 0.422916994665018, - "learning_rate": 4.0158273179230475e-05, - "loss": 0.6183, + "epoch": 0.6280436607892528, + "grad_norm": 0.23149705136886, + "learning_rate": 9.322522815114265e-05, + "loss": 0.5142, "step": 2244 }, { - "epoch": 0.31420573827851644, - "grad_norm": 0.4255816299563645, - "learning_rate": 4.0149259235499317e-05, - "loss": 0.6063, + "epoch": 0.6283235376434368, + "grad_norm": 0.22838987642013736, + "learning_rate": 9.321747276551227e-05, + "loss": 0.5394, "step": 2245 }, { - "epoch": 0.31434569629111264, - "grad_norm": 0.43561902805107455, - "learning_rate": 4.014024217844167e-05, - "loss": 0.572, + "epoch": 0.628603414497621, + "grad_norm": 0.23142203663897262, + "learning_rate": 9.320971326641863e-05, + "loss": 0.5328, "step": 2246 }, { - "epoch": 0.3144856543037089, - "grad_norm": 0.4257236548174317, - "learning_rate": 4.013122200991064e-05, - "loss": 0.5752, + "epoch": 0.6288832913518052, + "grad_norm": 0.23225764513487954, + "learning_rate": 9.32019496546003e-05, + "loss": 0.5651, "step": 2247 }, { - "epoch": 0.3146256123163051, - "grad_norm": 0.38763550151633236, - "learning_rate": 4.012219873175995e-05, - "loss": 0.5677, + "epoch": 0.6291631682059894, + "grad_norm": 0.2320281458733709, + "learning_rate": 9.319418193079622e-05, + "loss": 0.5751, "step": 2248 }, { - "epoch": 0.31476557032890135, - "grad_norm": 0.4175429066296847, - "learning_rate": 4.0113172345843983e-05, - "loss": 0.5595, + "epoch": 0.6294430450601736, + "grad_norm": 0.24492992978803949, + "learning_rate": 9.318641009574573e-05, + "loss": 0.5466, "step": 2249 }, { - "epoch": 0.31490552834149754, - "grad_norm": 0.4480069652429578, - "learning_rate": 4.010414285401777e-05, - "loss": 0.5681, + "epoch": 0.6297229219143576, + "grad_norm": 0.22022199272503803, + "learning_rate": 9.317863415018856e-05, + "loss": 0.556, "step": 2250 }, { - "epoch": 0.3150454863540938, - "grad_norm": 0.40107954134135065, - "learning_rate": 4.009511025813694e-05, - "loss": 0.5706, + "epoch": 0.6300027987685418, + "grad_norm": 0.2304301736703019, + "learning_rate": 9.317085409486481e-05, + "loss": 0.571, "step": 2251 }, { - "epoch": 0.31518544436669, - "grad_norm": 0.40774375327862394, - "learning_rate": 4.008607456005778e-05, - "loss": 0.5684, + "epoch": 0.630282675622726, + "grad_norm": 0.22509218496281094, + "learning_rate": 9.316306993051501e-05, + "loss": 0.5465, "step": 2252 }, { - "epoch": 0.3153254023792862, - "grad_norm": 0.4374841181743415, - "learning_rate": 4.007703576163724e-05, - "loss": 0.5685, + "epoch": 0.6305625524769102, + "grad_norm": 0.22252812690646834, + "learning_rate": 9.315528165788006e-05, + "loss": 0.5411, "step": 2253 }, { - "epoch": 0.31546536039188244, - "grad_norm": 0.41257242312452364, - "learning_rate": 4.006799386473287e-05, - "loss": 0.591, + "epoch": 0.6308424293310944, + "grad_norm": 0.2328350894152955, + "learning_rate": 9.314748927770125e-05, + "loss": 0.5604, "step": 2254 }, { - "epoch": 0.31560531840447864, - "grad_norm": 0.4114995646091396, - "learning_rate": 4.005894887120287e-05, - "loss": 0.5519, + "epoch": 0.6311223061852784, + "grad_norm": 0.2213710380114181, + "learning_rate": 9.313969279072025e-05, + "loss": 0.5377, "step": 2255 }, { - "epoch": 0.3157452764170749, - "grad_norm": 0.4076070427270775, - "learning_rate": 4.0049900782906086e-05, - "loss": 0.5844, + "epoch": 0.6314021830394626, + "grad_norm": 0.22193669309360173, + "learning_rate": 9.313189219767915e-05, + "loss": 0.5125, "step": 2256 }, { - "epoch": 0.3158852344296711, - "grad_norm": 0.4132478127299326, - "learning_rate": 4.004084960170199e-05, - "loss": 0.5655, + "epoch": 0.6316820598936468, + "grad_norm": 0.22549734138875063, + "learning_rate": 9.31240874993204e-05, + "loss": 0.5564, "step": 2257 }, { - "epoch": 0.31602519244226734, - "grad_norm": 0.4076919229201329, - "learning_rate": 4.0031795329450685e-05, - "loss": 0.5885, + "epoch": 0.631961936747831, + "grad_norm": 0.2259199231381941, + "learning_rate": 9.311627869638686e-05, + "loss": 0.5523, "step": 2258 }, { - "epoch": 0.31616515045486354, - "grad_norm": 0.421676106935922, - "learning_rate": 4.002273796801292e-05, - "loss": 0.6067, + "epoch": 0.6322418136020151, + "grad_norm": 0.229056475520519, + "learning_rate": 9.31084657896218e-05, + "loss": 0.5336, "step": 2259 }, { - "epoch": 0.31630510846745974, - "grad_norm": 0.3902189908536509, - "learning_rate": 4.001367751925008e-05, - "loss": 0.5666, + "epoch": 0.6325216904561993, + "grad_norm": 0.23347803243129697, + "learning_rate": 9.310064877976883e-05, + "loss": 0.5255, "step": 2260 }, { - "epoch": 0.316445066480056, - "grad_norm": 0.42674732993318637, - "learning_rate": 4.000461398502418e-05, - "loss": 0.5944, + "epoch": 0.6328015673103834, + "grad_norm": 0.22553841163286872, + "learning_rate": 9.309282766757197e-05, + "loss": 0.5473, "step": 2261 }, { - "epoch": 0.3165850244926522, - "grad_norm": 0.40075738490963975, - "learning_rate": 3.9995547367197845e-05, - "loss": 0.565, + "epoch": 0.6330814441645676, + "grad_norm": 0.2259053835740961, + "learning_rate": 9.308500245377567e-05, + "loss": 0.5612, "step": 2262 }, { - "epoch": 0.31672498250524844, - "grad_norm": 0.42762891854787655, - "learning_rate": 3.998647766763438e-05, - "loss": 0.6214, + "epoch": 0.6333613210187518, + "grad_norm": 0.22324138836960267, + "learning_rate": 9.307717313912472e-05, + "loss": 0.5289, "step": 2263 }, { - "epoch": 0.31686494051784464, - "grad_norm": 0.3961782540593822, - "learning_rate": 3.9977404888197704e-05, - "loss": 0.5454, + "epoch": 0.6336411978729359, + "grad_norm": 0.235700850033682, + "learning_rate": 9.306933972436431e-05, + "loss": 0.5303, "step": 2264 }, { - "epoch": 0.3170048985304409, - "grad_norm": 0.3923939088232202, - "learning_rate": 3.996832903075235e-05, - "loss": 0.5883, + "epoch": 0.6339210747271201, + "grad_norm": 0.22274464641205288, + "learning_rate": 9.306150221024004e-05, + "loss": 0.5521, "step": 2265 }, { - "epoch": 0.3171448565430371, - "grad_norm": 0.42198649975518954, - "learning_rate": 3.99592500971635e-05, - "loss": 0.5809, + "epoch": 0.6342009515813042, + "grad_norm": 0.23536595808718414, + "learning_rate": 9.305366059749788e-05, + "loss": 0.5501, "step": 2266 }, { - "epoch": 0.3172848145556333, - "grad_norm": 0.4083907533890683, - "learning_rate": 3.995016808929698e-05, - "loss": 0.5423, + "epoch": 0.6344808284354884, + "grad_norm": 0.23187842569592845, + "learning_rate": 9.304581488688422e-05, + "loss": 0.5611, "step": 2267 }, { - "epoch": 0.31742477256822954, - "grad_norm": 0.39942363696045713, - "learning_rate": 3.9941083009019223e-05, - "loss": 0.561, + "epoch": 0.6347607052896725, + "grad_norm": 0.22053474331324538, + "learning_rate": 9.30379650791458e-05, + "loss": 0.5529, "step": 2268 }, { - "epoch": 0.31756473058082574, - "grad_norm": 0.4262274605313951, - "learning_rate": 3.993199485819731e-05, - "loss": 0.5875, + "epoch": 0.6350405821438567, + "grad_norm": 0.22749795353557378, + "learning_rate": 9.303011117502978e-05, + "loss": 0.5507, "step": 2269 }, { - "epoch": 0.317704688593422, - "grad_norm": 0.38915227058740126, - "learning_rate": 3.992290363869895e-05, - "loss": 0.5531, + "epoch": 0.6353204589980409, + "grad_norm": 0.2208736218934104, + "learning_rate": 9.302225317528368e-05, + "loss": 0.5401, "step": 2270 }, { - "epoch": 0.3178446466060182, - "grad_norm": 0.43560718886498834, - "learning_rate": 3.9913809352392474e-05, - "loss": 0.5923, + "epoch": 0.635600335852225, + "grad_norm": 0.23811281327022943, + "learning_rate": 9.301439108065546e-05, + "loss": 0.5554, "step": 2271 }, { - "epoch": 0.31798460461861444, - "grad_norm": 0.44620165771992476, - "learning_rate": 3.990471200114685e-05, - "loss": 0.608, + "epoch": 0.6358802127064092, + "grad_norm": 0.2200082176528252, + "learning_rate": 9.300652489189342e-05, + "loss": 0.546, "step": 2272 }, { - "epoch": 0.31812456263121064, - "grad_norm": 0.4359041969176294, - "learning_rate": 3.9895611586831685e-05, - "loss": 0.5733, + "epoch": 0.6361600895605933, + "grad_norm": 0.22694066876811875, + "learning_rate": 9.299865460974624e-05, + "loss": 0.5597, "step": 2273 }, { - "epoch": 0.31826452064380684, - "grad_norm": 0.42976996875212653, - "learning_rate": 3.98865081113172e-05, - "loss": 0.5701, + "epoch": 0.6364399664147775, + "grad_norm": 0.23316692349991483, + "learning_rate": 9.299078023496307e-05, + "loss": 0.56, "step": 2274 }, { - "epoch": 0.3184044786564031, - "grad_norm": 0.40393224396318145, - "learning_rate": 3.987740157647426e-05, - "loss": 0.5764, + "epoch": 0.6367198432689617, + "grad_norm": 0.22515063137419997, + "learning_rate": 9.298290176829338e-05, + "loss": 0.5593, "step": 2275 }, { - "epoch": 0.3185444366689993, - "grad_norm": 0.42936298439237425, - "learning_rate": 3.986829198417433e-05, - "loss": 0.5922, + "epoch": 0.6369997201231458, + "grad_norm": 0.23614366525635452, + "learning_rate": 9.297501921048703e-05, + "loss": 0.5536, "step": 2276 }, { - "epoch": 0.31868439468159554, - "grad_norm": 0.40635602349462724, - "learning_rate": 3.985917933628955e-05, - "loss": 0.5492, + "epoch": 0.6372795969773299, + "grad_norm": 0.22514963570539365, + "learning_rate": 9.29671325622943e-05, + "loss": 0.5357, "step": 2277 }, { - "epoch": 0.31882435269419174, - "grad_norm": 0.38817161800821404, - "learning_rate": 3.9850063634692635e-05, - "loss": 0.6025, + "epoch": 0.6375594738315141, + "grad_norm": 0.22528905629020501, + "learning_rate": 9.295924182446584e-05, + "loss": 0.5465, "step": 2278 }, { - "epoch": 0.318964310706788, - "grad_norm": 0.40934279574178284, - "learning_rate": 3.984094488125698e-05, - "loss": 0.5779, + "epoch": 0.6378393506856983, + "grad_norm": 0.23085147748649962, + "learning_rate": 9.29513469977527e-05, + "loss": 0.5374, "step": 2279 }, { - "epoch": 0.3191042687193842, - "grad_norm": 0.4265395827777272, - "learning_rate": 3.983182307785657e-05, - "loss": 0.5947, + "epoch": 0.6381192275398825, + "grad_norm": 0.2356490016621578, + "learning_rate": 9.294344808290632e-05, + "loss": 0.5674, "step": 2280 }, { - "epoch": 0.3192442267319804, - "grad_norm": 0.4131217561851261, - "learning_rate": 3.982269822636602e-05, - "loss": 0.5659, + "epoch": 0.6383991043940667, + "grad_norm": 0.2289003216162852, + "learning_rate": 9.29355450806785e-05, + "loss": 0.538, "step": 2281 }, { - "epoch": 0.31938418474457664, - "grad_norm": 0.40502407520544415, - "learning_rate": 3.981357032866058e-05, - "loss": 0.618, + "epoch": 0.6386789812482507, + "grad_norm": 0.22565129640786685, + "learning_rate": 9.292763799182147e-05, + "loss": 0.5413, "step": 2282 }, { - "epoch": 0.31952414275717284, - "grad_norm": 0.4162474667828162, - "learning_rate": 3.980443938661614e-05, - "loss": 0.5778, + "epoch": 0.6389588581024349, + "grad_norm": 0.22970949286117814, + "learning_rate": 9.291972681708782e-05, + "loss": 0.5548, "step": 2283 }, { - "epoch": 0.3196641007697691, - "grad_norm": 0.41057477420812005, - "learning_rate": 3.9795305402109195e-05, - "loss": 0.5432, + "epoch": 0.6392387349566191, + "grad_norm": 0.22374611908346378, + "learning_rate": 9.291181155723056e-05, + "loss": 0.5677, "step": 2284 }, { - "epoch": 0.3198040587823653, - "grad_norm": 0.4199988291247849, - "learning_rate": 3.9786168377016866e-05, - "loss": 0.5615, + "epoch": 0.6395186118108033, + "grad_norm": 0.23480068531041298, + "learning_rate": 9.290389221300308e-05, + "loss": 0.5414, "step": 2285 }, { - "epoch": 0.31994401679496154, - "grad_norm": 0.4154620895424027, - "learning_rate": 3.977702831321692e-05, - "loss": 0.5774, + "epoch": 0.6397984886649875, + "grad_norm": 0.22994014727408524, + "learning_rate": 9.28959687851591e-05, + "loss": 0.5589, "step": 2286 }, { - "epoch": 0.32008397480755774, - "grad_norm": 0.42692144319966024, - "learning_rate": 3.976788521258771e-05, - "loss": 0.6076, + "epoch": 0.6400783655191715, + "grad_norm": 0.22794352806305146, + "learning_rate": 9.288804127445279e-05, + "loss": 0.5533, "step": 2287 }, { - "epoch": 0.32022393282015393, - "grad_norm": 0.41668448669509417, - "learning_rate": 3.975873907700825e-05, - "loss": 0.5973, + "epoch": 0.6403582423733557, + "grad_norm": 0.2157477812026282, + "learning_rate": 9.288010968163872e-05, + "loss": 0.5497, "step": 2288 }, { - "epoch": 0.3203638908327502, - "grad_norm": 0.4268759954887578, - "learning_rate": 3.974958990835816e-05, - "loss": 0.581, + "epoch": 0.6406381192275399, + "grad_norm": 0.2501669333990063, + "learning_rate": 9.287217400747182e-05, + "loss": 0.5685, "step": 2289 }, { - "epoch": 0.3205038488453464, - "grad_norm": 0.3952528234342034, - "learning_rate": 3.974043770851769e-05, - "loss": 0.5613, + "epoch": 0.6409179960817241, + "grad_norm": 0.22656067225401694, + "learning_rate": 9.286423425270738e-05, + "loss": 0.5191, "step": 2290 }, { - "epoch": 0.32064380685794264, - "grad_norm": 0.44836986260917555, - "learning_rate": 3.9731282479367706e-05, - "loss": 0.6248, + "epoch": 0.6411978729359082, + "grad_norm": 0.22767658426171816, + "learning_rate": 9.285629041810115e-05, + "loss": 0.5397, "step": 2291 }, { - "epoch": 0.32078376487053883, - "grad_norm": 0.4256932127612938, - "learning_rate": 3.9722124222789705e-05, - "loss": 0.6071, + "epoch": 0.6414777497900923, + "grad_norm": 0.22180886853440163, + "learning_rate": 9.284834250440921e-05, + "loss": 0.5553, "step": 2292 }, { - "epoch": 0.32092372288313503, - "grad_norm": 0.44510004429106764, - "learning_rate": 3.97129629406658e-05, - "loss": 0.5797, + "epoch": 0.6417576266442765, + "grad_norm": 0.22099414569596135, + "learning_rate": 9.284039051238804e-05, + "loss": 0.5544, "step": 2293 }, { - "epoch": 0.3210636808957313, - "grad_norm": 0.4302627905158645, - "learning_rate": 3.970379863487872e-05, - "loss": 0.5665, + "epoch": 0.6420375034984607, + "grad_norm": 0.232736470506679, + "learning_rate": 9.283243444279452e-05, + "loss": 0.5643, "step": 2294 }, { - "epoch": 0.3212036389083275, - "grad_norm": 0.41235485841130853, - "learning_rate": 3.969463130731183e-05, - "loss": 0.5634, + "epoch": 0.6423173803526449, + "grad_norm": 0.22869140915855235, + "learning_rate": 9.282447429638592e-05, + "loss": 0.5624, "step": 2295 }, { - "epoch": 0.32134359692092374, - "grad_norm": 0.41894260330344535, - "learning_rate": 3.9685460959849105e-05, - "loss": 0.5867, + "epoch": 0.642597257206829, + "grad_norm": 0.22348008058789776, + "learning_rate": 9.281651007391987e-05, + "loss": 0.5559, "step": 2296 }, { - "epoch": 0.32148355493351993, - "grad_norm": 0.41203034907703107, - "learning_rate": 3.967628759437516e-05, - "loss": 0.59, + "epoch": 0.6428771340610131, + "grad_norm": 0.21958367303055723, + "learning_rate": 9.280854177615444e-05, + "loss": 0.5493, "step": 2297 }, { - "epoch": 0.3216235129461162, - "grad_norm": 0.4281005271912537, - "learning_rate": 3.96671112127752e-05, - "loss": 0.6006, + "epoch": 0.6431570109151973, + "grad_norm": 0.21565247233331253, + "learning_rate": 9.280056940384805e-05, + "loss": 0.5536, "step": 2298 }, { - "epoch": 0.3217634709587124, - "grad_norm": 0.4075127794042514, - "learning_rate": 3.965793181693506e-05, - "loss": 0.565, + "epoch": 0.6434368877693815, + "grad_norm": 0.22416765670334324, + "learning_rate": 9.27925929577595e-05, + "loss": 0.5486, "step": 2299 }, { - "epoch": 0.3219034289713086, - "grad_norm": 0.39763128617859633, - "learning_rate": 3.96487494087412e-05, - "loss": 0.5437, + "epoch": 0.6437167646235656, + "grad_norm": 0.22088911741615, + "learning_rate": 9.2784612438648e-05, + "loss": 0.5471, "step": 2300 }, { - "epoch": 0.32204338698390483, - "grad_norm": 0.4552074769557467, - "learning_rate": 3.9639563990080716e-05, - "loss": 0.6201, + "epoch": 0.6439966414777498, + "grad_norm": 0.23209657090685998, + "learning_rate": 9.277662784727315e-05, + "loss": 0.5321, "step": 2301 }, { - "epoch": 0.32218334499650103, - "grad_norm": 0.40413041195679866, - "learning_rate": 3.9630375562841295e-05, - "loss": 0.5526, + "epoch": 0.644276518331934, + "grad_norm": 0.23182387131170437, + "learning_rate": 9.27686391843949e-05, + "loss": 0.5606, "step": 2302 }, { - "epoch": 0.3223233030090973, - "grad_norm": 0.40283666703685167, - "learning_rate": 3.9621184128911234e-05, - "loss": 0.579, + "epoch": 0.6445563951861181, + "grad_norm": 0.2339164369879079, + "learning_rate": 9.276064645077365e-05, + "loss": 0.5506, "step": 2303 }, { - "epoch": 0.3224632610216935, - "grad_norm": 0.3980665357469519, - "learning_rate": 3.96119896901795e-05, - "loss": 0.5626, + "epoch": 0.6448362720403022, + "grad_norm": 0.21714544722292511, + "learning_rate": 9.275264964717012e-05, + "loss": 0.5415, "step": 2304 }, { - "epoch": 0.32260321903428973, - "grad_norm": 0.42655702692861863, - "learning_rate": 3.9602792248535606e-05, - "loss": 0.6075, + "epoch": 0.6451161488944864, + "grad_norm": 0.2201203799089658, + "learning_rate": 9.274464877434548e-05, + "loss": 0.5601, "step": 2305 }, { - "epoch": 0.32274317704688593, - "grad_norm": 0.4826045217103083, - "learning_rate": 3.959359180586975e-05, - "loss": 0.5799, + "epoch": 0.6453960257486706, + "grad_norm": 0.21866346921146546, + "learning_rate": 9.273664383306125e-05, + "loss": 0.5384, "step": 2306 }, { - "epoch": 0.32288313505948213, - "grad_norm": 0.43008580043844, - "learning_rate": 3.95843883640727e-05, - "loss": 0.5787, + "epoch": 0.6456759026028548, + "grad_norm": 0.22511168679753563, + "learning_rate": 9.272863482407931e-05, + "loss": 0.5593, "step": 2307 }, { - "epoch": 0.3230230930720784, - "grad_norm": 0.42351832550150303, - "learning_rate": 3.957518192503587e-05, - "loss": 0.5565, + "epoch": 0.6459557794570389, + "grad_norm": 0.22442764655366385, + "learning_rate": 9.272062174816202e-05, + "loss": 0.5548, "step": 2308 }, { - "epoch": 0.3231630510846746, - "grad_norm": 0.40482672190396396, - "learning_rate": 3.956597249065126e-05, - "loss": 0.5533, + "epoch": 0.646235656311223, + "grad_norm": 0.22902751071678393, + "learning_rate": 9.271260460607202e-05, + "loss": 0.5474, "step": 2309 }, { - "epoch": 0.32330300909727083, - "grad_norm": 0.4107029056895161, - "learning_rate": 3.95567600628115e-05, - "loss": 0.5756, + "epoch": 0.6465155331654072, + "grad_norm": 0.22939826127270602, + "learning_rate": 9.270458339857244e-05, + "loss": 0.5443, "step": 2310 }, { - "epoch": 0.32344296710986703, - "grad_norm": 0.43221123666830596, - "learning_rate": 3.954754464340987e-05, - "loss": 0.6074, + "epoch": 0.6467954100195914, + "grad_norm": 0.2303813527730354, + "learning_rate": 9.269655812642668e-05, + "loss": 0.5661, "step": 2311 }, { - "epoch": 0.3235829251224633, - "grad_norm": 0.42051407755166026, - "learning_rate": 3.9538326234340194e-05, - "loss": 0.5948, + "epoch": 0.6470752868737756, + "grad_norm": 0.2260651119592067, + "learning_rate": 9.268852879039863e-05, + "loss": 0.5618, "step": 2312 }, { - "epoch": 0.3237228831350595, - "grad_norm": 0.38578825068740574, - "learning_rate": 3.9529104837496974e-05, - "loss": 0.5692, + "epoch": 0.6473551637279596, + "grad_norm": 0.242050014211222, + "learning_rate": 9.26804953912525e-05, + "loss": 0.5577, "step": 2313 }, { - "epoch": 0.3238628411476557, - "grad_norm": 0.42139866225485473, - "learning_rate": 3.951988045477529e-05, - "loss": 0.5892, + "epoch": 0.6476350405821438, + "grad_norm": 0.22424313698253537, + "learning_rate": 9.267245792975294e-05, + "loss": 0.5442, "step": 2314 }, { - "epoch": 0.32400279916025193, - "grad_norm": 0.43110899316398094, - "learning_rate": 3.9510653088070846e-05, - "loss": 0.5698, + "epoch": 0.647914917436328, + "grad_norm": 0.24264178315852836, + "learning_rate": 9.266441640666496e-05, + "loss": 0.587, "step": 2315 }, { - "epoch": 0.32414275717284813, - "grad_norm": 0.4426212316446372, - "learning_rate": 3.9501422739279956e-05, - "loss": 0.5727, + "epoch": 0.6481947942905122, + "grad_norm": 0.230444323299661, + "learning_rate": 9.265637082275393e-05, + "loss": 0.5519, "step": 2316 }, { - "epoch": 0.3242827151854444, - "grad_norm": 0.42108819792246993, - "learning_rate": 3.9492189410299566e-05, - "loss": 0.554, + "epoch": 0.6484746711446964, + "grad_norm": 0.23137614429545186, + "learning_rate": 9.264832117878566e-05, + "loss": 0.5306, "step": 2317 }, { - "epoch": 0.3244226731980406, - "grad_norm": 0.46665094478938685, - "learning_rate": 3.94829531030272e-05, - "loss": 0.6323, + "epoch": 0.6487545479988804, + "grad_norm": 0.21618124130299693, + "learning_rate": 9.26402674755263e-05, + "loss": 0.5386, "step": 2318 }, { - "epoch": 0.32456263121063683, - "grad_norm": 0.4138605211792902, - "learning_rate": 3.9473713819361015e-05, - "loss": 0.5446, + "epoch": 0.6490344248530646, + "grad_norm": 0.22619207063263208, + "learning_rate": 9.263220971374243e-05, + "loss": 0.5485, "step": 2319 }, { - "epoch": 0.32470258922323303, - "grad_norm": 0.43505576166032045, - "learning_rate": 3.946447156119979e-05, - "loss": 0.5664, + "epoch": 0.6493143017072488, + "grad_norm": 0.22718565387746364, + "learning_rate": 9.262414789420097e-05, + "loss": 0.5456, "step": 2320 }, { - "epoch": 0.3248425472358292, - "grad_norm": 0.41878878141867854, - "learning_rate": 3.945522633044289e-05, - "loss": 0.608, + "epoch": 0.649594178561433, + "grad_norm": 0.22771105387052398, + "learning_rate": 9.261608201766925e-05, + "loss": 0.5229, "step": 2321 }, { - "epoch": 0.3249825052484255, - "grad_norm": 0.40004594511350894, - "learning_rate": 3.9445978128990326e-05, - "loss": 0.5454, + "epoch": 0.6498740554156172, + "grad_norm": 0.2291366099851266, + "learning_rate": 9.260801208491498e-05, + "loss": 0.5604, "step": 2322 }, { - "epoch": 0.3251224632610217, - "grad_norm": 0.42281186251192865, - "learning_rate": 3.943672695874267e-05, - "loss": 0.567, + "epoch": 0.6501539322698013, + "grad_norm": 0.2289666255387694, + "learning_rate": 9.25999380967063e-05, + "loss": 0.5468, "step": 2323 }, { - "epoch": 0.32526242127361793, - "grad_norm": 0.4068793191045584, - "learning_rate": 3.942747282160114e-05, - "loss": 0.5424, + "epoch": 0.6504338091239854, + "grad_norm": 0.23417748564258375, + "learning_rate": 9.259186005381164e-05, + "loss": 0.5475, "step": 2324 }, { - "epoch": 0.3254023792862141, - "grad_norm": 0.42411112127273465, - "learning_rate": 3.9418215719467565e-05, - "loss": 0.567, + "epoch": 0.6507136859781696, + "grad_norm": 0.24046818943969445, + "learning_rate": 9.258377795699992e-05, + "loss": 0.5756, "step": 2325 }, { - "epoch": 0.3255423372988104, - "grad_norm": 0.4111794082883435, - "learning_rate": 3.9408955654244364e-05, - "loss": 0.5682, + "epoch": 0.6509935628323538, + "grad_norm": 0.22598414155699229, + "learning_rate": 9.257569180704038e-05, + "loss": 0.548, "step": 2326 }, { - "epoch": 0.3256822953114066, - "grad_norm": 0.39707493084776485, - "learning_rate": 3.939969262783457e-05, - "loss": 0.5541, + "epoch": 0.651273439686538, + "grad_norm": 0.2341541064036925, + "learning_rate": 9.256760160470264e-05, + "loss": 0.5362, "step": 2327 }, { - "epoch": 0.3258222533240028, - "grad_norm": 0.3941781607373903, - "learning_rate": 3.939042664214184e-05, - "loss": 0.5511, + "epoch": 0.6515533165407221, + "grad_norm": 0.23515841156435047, + "learning_rate": 9.255950735075678e-05, + "loss": 0.5825, "step": 2328 }, { - "epoch": 0.32596221133659903, - "grad_norm": 0.4150783846079092, - "learning_rate": 3.9381157699070424e-05, - "loss": 0.6085, + "epoch": 0.6518331933949062, + "grad_norm": 0.23173010860238322, + "learning_rate": 9.255140904597317e-05, + "loss": 0.526, "step": 2329 }, { - "epoch": 0.3261021693491952, - "grad_norm": 0.392787000567577, - "learning_rate": 3.937188580052518e-05, - "loss": 0.5834, + "epoch": 0.6521130702490904, + "grad_norm": 0.2297433185972963, + "learning_rate": 9.254330669112264e-05, + "loss": 0.5349, "step": 2330 }, { - "epoch": 0.3262421273617915, - "grad_norm": 0.41765704626894373, - "learning_rate": 3.9362610948411585e-05, - "loss": 0.5497, + "epoch": 0.6523929471032746, + "grad_norm": 0.23215511229319558, + "learning_rate": 9.253520028697636e-05, + "loss": 0.5418, "step": 2331 }, { - "epoch": 0.3263820853743877, - "grad_norm": 0.4175224418697374, - "learning_rate": 3.9353333144635706e-05, - "loss": 0.5597, + "epoch": 0.6526728239574587, + "grad_norm": 0.23788094793990464, + "learning_rate": 9.252708983430592e-05, + "loss": 0.5445, "step": 2332 }, { - "epoch": 0.32652204338698393, - "grad_norm": 0.4243451821154263, - "learning_rate": 3.934405239110423e-05, - "loss": 0.5884, + "epoch": 0.6529527008116429, + "grad_norm": 0.22767547559622842, + "learning_rate": 9.251897533388325e-05, + "loss": 0.5445, "step": 2333 }, { - "epoch": 0.3266620013995801, - "grad_norm": 0.4267515430222131, - "learning_rate": 3.9334768689724456e-05, - "loss": 0.606, + "epoch": 0.653232577665827, + "grad_norm": 0.23724286683345358, + "learning_rate": 9.251085678648072e-05, + "loss": 0.5714, "step": 2334 }, { - "epoch": 0.3268019594121763, - "grad_norm": 0.4332693190274577, - "learning_rate": 3.932548204240426e-05, - "loss": 0.5995, + "epoch": 0.6535124545200112, + "grad_norm": 0.2297351591977234, + "learning_rate": 9.250273419287103e-05, + "loss": 0.5533, "step": 2335 }, { - "epoch": 0.3269419174247726, - "grad_norm": 0.4121506429858514, - "learning_rate": 3.931619245105216e-05, - "loss": 0.5787, + "epoch": 0.6537923313741953, + "grad_norm": 0.23837270156352452, + "learning_rate": 9.249460755382733e-05, + "loss": 0.5516, "step": 2336 }, { - "epoch": 0.3270818754373688, - "grad_norm": 0.4180451871893415, - "learning_rate": 3.9306899917577245e-05, - "loss": 0.5546, + "epoch": 0.6540722082283795, + "grad_norm": 0.2275643138395253, + "learning_rate": 9.248647687012308e-05, + "loss": 0.5642, "step": 2337 }, { - "epoch": 0.327221833449965, - "grad_norm": 0.40942456121846177, - "learning_rate": 3.9297604443889234e-05, - "loss": 0.6098, + "epoch": 0.6543520850825637, + "grad_norm": 0.2089555358270792, + "learning_rate": 9.247834214253219e-05, + "loss": 0.5237, "step": 2338 }, { - "epoch": 0.3273617914625612, - "grad_norm": 0.4067754001916415, - "learning_rate": 3.928830603189844e-05, - "loss": 0.5756, + "epoch": 0.6546319619367479, + "grad_norm": 0.22375962164740998, + "learning_rate": 9.247020337182893e-05, + "loss": 0.5583, "step": 2339 }, { - "epoch": 0.3275017494751575, - "grad_norm": 0.4140918426830124, - "learning_rate": 3.9279004683515783e-05, - "loss": 0.5641, + "epoch": 0.654911838790932, + "grad_norm": 0.23154662092022185, + "learning_rate": 9.246206055878793e-05, + "loss": 0.5636, "step": 2340 }, { - "epoch": 0.3276417074877537, - "grad_norm": 0.4212820521655152, - "learning_rate": 3.926970040065278e-05, - "loss": 0.604, + "epoch": 0.6551917156451161, + "grad_norm": 0.22140482953493013, + "learning_rate": 9.245391370418423e-05, + "loss": 0.5278, "step": 2341 }, { - "epoch": 0.3277816655003499, - "grad_norm": 0.40003067107763557, - "learning_rate": 3.9260393185221564e-05, - "loss": 0.565, + "epoch": 0.6554715924993003, + "grad_norm": 0.22372665401886213, + "learning_rate": 9.244576280879329e-05, + "loss": 0.554, "step": 2342 }, { - "epoch": 0.3279216235129461, - "grad_norm": 0.39827204127063837, - "learning_rate": 3.925108303913485e-05, - "loss": 0.5799, + "epoch": 0.6557514693534845, + "grad_norm": 0.22183815887232886, + "learning_rate": 9.243760787339086e-05, + "loss": 0.5415, "step": 2343 }, { - "epoch": 0.3280615815255423, - "grad_norm": 0.4233494400434675, - "learning_rate": 3.9241769964305976e-05, - "loss": 0.6037, + "epoch": 0.6560313462076687, + "grad_norm": 0.21311967326441986, + "learning_rate": 9.242944889875318e-05, + "loss": 0.549, "step": 2344 }, { - "epoch": 0.3282015395381386, - "grad_norm": 0.4118719992278687, - "learning_rate": 3.9232453962648864e-05, - "loss": 0.6098, + "epoch": 0.6563112230618527, + "grad_norm": 0.2336176164793105, + "learning_rate": 9.24212858856568e-05, + "loss": 0.5576, "step": 2345 }, { - "epoch": 0.3283414975507348, - "grad_norm": 0.4016000541614996, - "learning_rate": 3.9223135036078064e-05, - "loss": 0.5734, + "epoch": 0.6565910999160369, + "grad_norm": 0.2296357453534462, + "learning_rate": 9.241311883487866e-05, + "loss": 0.5655, "step": 2346 }, { - "epoch": 0.328481455563331, - "grad_norm": 0.4237468615429023, - "learning_rate": 3.921381318650869e-05, - "loss": 0.6088, + "epoch": 0.6568709767702211, + "grad_norm": 0.22485073089543192, + "learning_rate": 9.240494774719614e-05, + "loss": 0.5584, "step": 2347 }, { - "epoch": 0.3286214135759272, - "grad_norm": 0.46652462106861353, - "learning_rate": 3.920448841585649e-05, - "loss": 0.5814, + "epoch": 0.6571508536244053, + "grad_norm": 0.2409800791342636, + "learning_rate": 9.239677262338697e-05, + "loss": 0.5374, "step": 2348 }, { - "epoch": 0.3287613715885234, - "grad_norm": 0.39685764815363, - "learning_rate": 3.9195160726037805e-05, - "loss": 0.5646, + "epoch": 0.6574307304785895, + "grad_norm": 0.23198325497364455, + "learning_rate": 9.238859346422925e-05, + "loss": 0.5615, "step": 2349 }, { - "epoch": 0.3289013296011197, - "grad_norm": 0.4229891713627349, - "learning_rate": 3.918583011896955e-05, - "loss": 0.5773, + "epoch": 0.6577106073327735, + "grad_norm": 0.22246276638503817, + "learning_rate": 9.238041027050148e-05, + "loss": 0.5382, "step": 2350 }, { - "epoch": 0.32904128761371587, - "grad_norm": 0.3879674012353026, - "learning_rate": 3.9176496596569265e-05, - "loss": 0.5397, + "epoch": 0.6579904841869577, + "grad_norm": 0.22748624442445645, + "learning_rate": 9.237222304298254e-05, + "loss": 0.5218, "step": 2351 }, { - "epoch": 0.3291812456263121, - "grad_norm": 0.3901223408311448, - "learning_rate": 3.91671601607551e-05, - "loss": 0.5736, + "epoch": 0.6582703610411419, + "grad_norm": 0.21900229802920876, + "learning_rate": 9.236403178245168e-05, + "loss": 0.5618, "step": 2352 }, { - "epoch": 0.3293212036389083, - "grad_norm": 0.4002239555751096, - "learning_rate": 3.915782081344578e-05, - "loss": 0.5967, + "epoch": 0.6585502378953261, + "grad_norm": 0.2240368013808672, + "learning_rate": 9.235583648968858e-05, + "loss": 0.5389, "step": 2353 }, { - "epoch": 0.3294611616515046, - "grad_norm": 0.41288801391640517, - "learning_rate": 3.914847855656062e-05, - "loss": 0.5715, + "epoch": 0.6588301147495103, + "grad_norm": 0.23200122166044623, + "learning_rate": 9.234763716547326e-05, + "loss": 0.5713, "step": 2354 }, { - "epoch": 0.3296011196641008, - "grad_norm": 0.4184700560414015, - "learning_rate": 3.913913339201956e-05, - "loss": 0.5507, + "epoch": 0.6591099916036943, + "grad_norm": 0.22746746470463916, + "learning_rate": 9.233943381058611e-05, + "loss": 0.5516, "step": 2355 }, { - "epoch": 0.32974107767669697, - "grad_norm": 0.3894927148762295, - "learning_rate": 3.9129785321743125e-05, - "loss": 0.5705, + "epoch": 0.6593898684578785, + "grad_norm": 0.2338898749230664, + "learning_rate": 9.233122642580796e-05, + "loss": 0.5253, "step": 2356 }, { - "epoch": 0.3298810356892932, - "grad_norm": 0.3847000287394412, - "learning_rate": 3.912043434765245e-05, - "loss": 0.5484, + "epoch": 0.6596697453120627, + "grad_norm": 0.2273430072844073, + "learning_rate": 9.232301501191997e-05, + "loss": 0.5359, "step": 2357 }, { - "epoch": 0.3300209937018894, - "grad_norm": 0.4403932420001484, - "learning_rate": 3.911108047166924e-05, - "loss": 0.6139, + "epoch": 0.6599496221662469, + "grad_norm": 0.22364010827333952, + "learning_rate": 9.231479956970375e-05, + "loss": 0.5656, "step": 2358 }, { - "epoch": 0.3301609517144857, - "grad_norm": 0.4352977938001518, - "learning_rate": 3.9101723695715805e-05, - "loss": 0.6006, + "epoch": 0.660229499020431, + "grad_norm": 0.23121257126929268, + "learning_rate": 9.230658009994122e-05, + "loss": 0.5461, "step": 2359 }, { - "epoch": 0.33030090972708187, - "grad_norm": 0.40492957509367133, - "learning_rate": 3.909236402171508e-05, - "loss": 0.6026, + "epoch": 0.6605093758746152, + "grad_norm": 0.2199409482019848, + "learning_rate": 9.22983566034147e-05, + "loss": 0.5342, "step": 2360 }, { - "epoch": 0.3304408677396781, - "grad_norm": 0.3867521703349589, - "learning_rate": 3.908300145159055e-05, - "loss": 0.5449, + "epoch": 0.6607892527287993, + "grad_norm": 0.2170295285108138, + "learning_rate": 9.229012908090693e-05, + "loss": 0.528, "step": 2361 }, { - "epoch": 0.3305808257522743, - "grad_norm": 0.4404641851361547, - "learning_rate": 3.907363598726635e-05, - "loss": 0.5905, + "epoch": 0.6610691295829835, + "grad_norm": 0.22830110850500412, + "learning_rate": 9.2281897533201e-05, + "loss": 0.5527, "step": 2362 }, { - "epoch": 0.3307207837648705, - "grad_norm": 0.41840972294550066, - "learning_rate": 3.906426763066714e-05, - "loss": 0.5632, + "epoch": 0.6613490064371677, + "grad_norm": 0.21411374168762964, + "learning_rate": 9.227366196108041e-05, + "loss": 0.5205, "step": 2363 }, { - "epoch": 0.33086074177746677, - "grad_norm": 0.4162282322770041, - "learning_rate": 3.905489638371823e-05, - "loss": 0.5913, + "epoch": 0.6616288832913518, + "grad_norm": 0.2410665757103906, + "learning_rate": 9.226542236532899e-05, + "loss": 0.5619, "step": 2364 }, { - "epoch": 0.33100069979006297, - "grad_norm": 0.3955450449429818, - "learning_rate": 3.9045522248345504e-05, - "loss": 0.5324, + "epoch": 0.661908760145536, + "grad_norm": 0.2366806580469498, + "learning_rate": 9.225717874673102e-05, + "loss": 0.5524, "step": 2365 }, { - "epoch": 0.3311406578026592, - "grad_norm": 0.49091811650049194, - "learning_rate": 3.903614522647545e-05, - "loss": 0.5769, + "epoch": 0.6621886369997201, + "grad_norm": 0.22475522821299487, + "learning_rate": 9.224893110607114e-05, + "loss": 0.5433, "step": 2366 }, { - "epoch": 0.3312806158152554, - "grad_norm": 0.4173793487196645, - "learning_rate": 3.902676532003514e-05, - "loss": 0.5886, + "epoch": 0.6624685138539043, + "grad_norm": 0.22504798697278638, + "learning_rate": 9.224067944413432e-05, + "loss": 0.5479, "step": 2367 }, { - "epoch": 0.33142057382785167, - "grad_norm": 0.3932967546945319, - "learning_rate": 3.901738253095222e-05, - "loss": 0.5621, + "epoch": 0.6627483907080884, + "grad_norm": 0.22805381693945523, + "learning_rate": 9.223242376170602e-05, + "loss": 0.5496, "step": 2368 }, { - "epoch": 0.33156053184044787, - "grad_norm": 0.43569710224836344, - "learning_rate": 3.900799686115498e-05, - "loss": 0.5741, + "epoch": 0.6630282675622726, + "grad_norm": 0.2384274415138598, + "learning_rate": 9.222416405957197e-05, + "loss": 0.5268, "step": 2369 }, { - "epoch": 0.33170048985304407, - "grad_norm": 0.4115145852184928, - "learning_rate": 3.899860831257224e-05, - "loss": 0.5259, + "epoch": 0.6633081444164568, + "grad_norm": 0.21116076140816192, + "learning_rate": 9.221590033851835e-05, + "loss": 0.5301, "step": 2370 }, { - "epoch": 0.3318404478656403, - "grad_norm": 0.4381698932866813, - "learning_rate": 3.898921688713346e-05, - "loss": 0.6024, + "epoch": 0.6635880212706409, + "grad_norm": 0.2269853204511418, + "learning_rate": 9.220763259933172e-05, + "loss": 0.5401, "step": 2371 }, { - "epoch": 0.3319804058782365, - "grad_norm": 0.4208901188098798, - "learning_rate": 3.897982258676867e-05, - "loss": 0.5527, + "epoch": 0.663867898124825, + "grad_norm": 0.2230449755853807, + "learning_rate": 9.219936084279898e-05, + "loss": 0.5667, "step": 2372 }, { - "epoch": 0.33212036389083277, - "grad_norm": 0.41912261407751483, - "learning_rate": 3.8970425413408495e-05, - "loss": 0.5822, + "epoch": 0.6641477749790092, + "grad_norm": 0.22065782443329063, + "learning_rate": 9.219108506970746e-05, + "loss": 0.5564, "step": 2373 }, { - "epoch": 0.33226032190342897, - "grad_norm": 0.4040927772770925, - "learning_rate": 3.896102536898415e-05, - "loss": 0.5491, + "epoch": 0.6644276518331934, + "grad_norm": 0.2201001629076314, + "learning_rate": 9.218280528084485e-05, + "loss": 0.5446, "step": 2374 }, { - "epoch": 0.33240027991602517, - "grad_norm": 0.4173965549079611, - "learning_rate": 3.8951622455427425e-05, - "loss": 0.6025, + "epoch": 0.6647075286873776, + "grad_norm": 0.22674262537396084, + "learning_rate": 9.217452147699921e-05, + "loss": 0.5508, "step": 2375 }, { - "epoch": 0.3325402379286214, - "grad_norm": 0.404116401312251, - "learning_rate": 3.894221667467074e-05, - "loss": 0.553, + "epoch": 0.6649874055415617, + "grad_norm": 0.22583712914169812, + "learning_rate": 9.216623365895903e-05, + "loss": 0.5615, "step": 2376 }, { - "epoch": 0.3326801959412176, - "grad_norm": 0.39814070442365174, - "learning_rate": 3.893280802864706e-05, - "loss": 0.5538, + "epoch": 0.6652672823957458, + "grad_norm": 0.22867332059589804, + "learning_rate": 9.215794182751312e-05, + "loss": 0.5584, "step": 2377 }, { - "epoch": 0.33282015395381387, - "grad_norm": 0.4097199303736863, - "learning_rate": 3.892339651928996e-05, - "loss": 0.5562, + "epoch": 0.66554715924993, + "grad_norm": 0.22269331756145164, + "learning_rate": 9.21496459834507e-05, + "loss": 0.5347, "step": 2378 }, { - "epoch": 0.33296011196641007, - "grad_norm": 0.4251433986281386, - "learning_rate": 3.89139821485336e-05, - "loss": 0.5886, + "epoch": 0.6658270361041142, + "grad_norm": 0.22174083315356938, + "learning_rate": 9.21413461275614e-05, + "loss": 0.5514, "step": 2379 }, { - "epoch": 0.3331000699790063, - "grad_norm": 0.3965548232917148, - "learning_rate": 3.8904564918312737e-05, - "loss": 0.5536, + "epoch": 0.6661069129582984, + "grad_norm": 0.22052978174508833, + "learning_rate": 9.213304226063517e-05, + "loss": 0.5451, "step": 2380 }, { - "epoch": 0.3332400279916025, - "grad_norm": 0.4012944065952801, - "learning_rate": 3.889514483056269e-05, - "loss": 0.5742, + "epoch": 0.6663867898124826, + "grad_norm": 0.21348865687005658, + "learning_rate": 9.212473438346241e-05, + "loss": 0.5117, "step": 2381 }, { - "epoch": 0.3333799860041987, - "grad_norm": 0.4201060112427097, - "learning_rate": 3.8885721887219406e-05, - "loss": 0.5691, + "epoch": 0.6666666666666666, + "grad_norm": 0.23190312932544782, + "learning_rate": 9.211642249683385e-05, + "loss": 0.5329, "step": 2382 }, { - "epoch": 0.33351994401679497, - "grad_norm": 0.42817185933177176, - "learning_rate": 3.887629609021938e-05, - "loss": 0.5678, + "epoch": 0.6669465435208508, + "grad_norm": 0.22492833839773166, + "learning_rate": 9.210810660154062e-05, + "loss": 0.5315, "step": 2383 }, { - "epoch": 0.33365990202939116, - "grad_norm": 0.43405350104277596, - "learning_rate": 3.88668674414997e-05, - "loss": 0.6216, + "epoch": 0.667226420375035, + "grad_norm": 0.2251329828911091, + "learning_rate": 9.209978669837424e-05, + "loss": 0.5361, "step": 2384 }, { - "epoch": 0.3337998600419874, - "grad_norm": 0.42315727231869327, - "learning_rate": 3.8857435942998074e-05, - "loss": 0.575, + "epoch": 0.6675062972292192, + "grad_norm": 0.2205152237600707, + "learning_rate": 9.20914627881266e-05, + "loss": 0.5353, "step": 2385 }, { - "epoch": 0.3339398180545836, - "grad_norm": 0.40648841814651454, - "learning_rate": 3.884800159665276e-05, - "loss": 0.5715, + "epoch": 0.6677861740834033, + "grad_norm": 0.2264829193519305, + "learning_rate": 9.208313487158996e-05, + "loss": 0.5364, "step": 2386 }, { - "epoch": 0.33407977606717987, - "grad_norm": 0.426901177821104, - "learning_rate": 3.883856440440261e-05, - "loss": 0.5788, + "epoch": 0.6680660509375874, + "grad_norm": 0.22027286795041154, + "learning_rate": 9.2074802949557e-05, + "loss": 0.515, "step": 2387 }, { - "epoch": 0.33421973407977607, - "grad_norm": 0.4264982996567364, - "learning_rate": 3.882912436818705e-05, - "loss": 0.5849, + "epoch": 0.6683459277917716, + "grad_norm": 0.22548829241160073, + "learning_rate": 9.206646702282073e-05, + "loss": 0.5241, "step": 2388 }, { - "epoch": 0.33435969209237226, - "grad_norm": 0.41759550845666904, - "learning_rate": 3.881968148994613e-05, - "loss": 0.5524, + "epoch": 0.6686258046459558, + "grad_norm": 0.2270820347736705, + "learning_rate": 9.205812709217459e-05, + "loss": 0.5353, "step": 2389 }, { - "epoch": 0.3344996501049685, - "grad_norm": 0.3922959061793526, - "learning_rate": 3.881023577162044e-05, - "loss": 0.556, + "epoch": 0.66890568150014, + "grad_norm": 0.23210343238307118, + "learning_rate": 9.204978315841237e-05, + "loss": 0.5452, "step": 2390 }, { - "epoch": 0.3346396081175647, - "grad_norm": 0.3988789451222829, - "learning_rate": 3.880078721515117e-05, - "loss": 0.5743, + "epoch": 0.6691855583543241, + "grad_norm": 0.23331168699302035, + "learning_rate": 9.204143522232826e-05, + "loss": 0.5415, "step": 2391 }, { - "epoch": 0.33477956613016097, - "grad_norm": 0.4206132245414296, - "learning_rate": 3.87913358224801e-05, - "loss": 0.5688, + "epoch": 0.6694654352085082, + "grad_norm": 0.23412565351809025, + "learning_rate": 9.20330832847168e-05, + "loss": 0.5211, "step": 2392 }, { - "epoch": 0.33491952414275716, - "grad_norm": 0.400794205539379, - "learning_rate": 3.878188159554959e-05, - "loss": 0.6014, + "epoch": 0.6697453120626924, + "grad_norm": 0.22973650679730895, + "learning_rate": 9.202472734637298e-05, + "loss": 0.532, "step": 2393 }, { - "epoch": 0.3350594821553534, - "grad_norm": 0.4098374598496262, - "learning_rate": 3.8772424536302564e-05, - "loss": 0.6093, + "epoch": 0.6700251889168766, + "grad_norm": 0.24068726671626947, + "learning_rate": 9.201636740809205e-05, + "loss": 0.5871, "step": 2394 }, { - "epoch": 0.3351994401679496, - "grad_norm": 0.4250977633595455, - "learning_rate": 3.876296464668257e-05, - "loss": 0.606, + "epoch": 0.6703050657710607, + "grad_norm": 0.2215762036776492, + "learning_rate": 9.200800347066977e-05, + "loss": 0.5461, "step": 2395 }, { - "epoch": 0.3353393981805458, - "grad_norm": 0.4220441415552252, - "learning_rate": 3.875350192863368e-05, - "loss": 0.5897, + "epoch": 0.6705849426252449, + "grad_norm": 0.2275544008034563, + "learning_rate": 9.199963553490221e-05, + "loss": 0.5465, "step": 2396 }, { - "epoch": 0.33547935619314206, - "grad_norm": 0.41667093576332226, - "learning_rate": 3.87440363841006e-05, - "loss": 0.633, + "epoch": 0.670864819479429, + "grad_norm": 0.21223357979497362, + "learning_rate": 9.199126360158582e-05, + "loss": 0.5535, "step": 2397 }, { - "epoch": 0.33561931420573826, - "grad_norm": 0.4157738741324718, - "learning_rate": 3.873456801502859e-05, - "loss": 0.5703, + "epoch": 0.6711446963336132, + "grad_norm": 0.23540388723962063, + "learning_rate": 9.198288767151747e-05, + "loss": 0.5853, "step": 2398 }, { - "epoch": 0.3357592722183345, - "grad_norm": 0.4258564142322055, - "learning_rate": 3.872509682336351e-05, - "loss": 0.5919, + "epoch": 0.6714245731877974, + "grad_norm": 0.22455122087448684, + "learning_rate": 9.197450774549437e-05, + "loss": 0.5368, "step": 2399 }, { - "epoch": 0.3358992302309307, - "grad_norm": 0.42470627438795794, - "learning_rate": 3.871562281105175e-05, - "loss": 0.5662, + "epoch": 0.6717044500419815, + "grad_norm": 0.234263871491455, + "learning_rate": 9.196612382431412e-05, + "loss": 0.5527, "step": 2400 }, { - "epoch": 0.33603918824352697, - "grad_norm": 0.4186178764102384, - "learning_rate": 3.8706145980040344e-05, - "loss": 0.581, + "epoch": 0.6719843268961657, + "grad_norm": 0.21848303921515058, + "learning_rate": 9.195773590877473e-05, + "loss": 0.5511, "step": 2401 }, { - "epoch": 0.33617914625612316, - "grad_norm": 0.3987870833859228, - "learning_rate": 3.8696666332276875e-05, - "loss": 0.585, + "epoch": 0.6722642037503499, + "grad_norm": 0.24333069219894635, + "learning_rate": 9.194934399967453e-05, + "loss": 0.5436, "step": 2402 }, { - "epoch": 0.33631910426871936, - "grad_norm": 0.40669896712395964, - "learning_rate": 3.86871838697095e-05, - "loss": 0.5793, + "epoch": 0.672544080604534, + "grad_norm": 0.22364954864683115, + "learning_rate": 9.194094809781231e-05, + "loss": 0.5638, "step": 2403 }, { - "epoch": 0.3364590622813156, - "grad_norm": 0.4324505891171469, - "learning_rate": 3.867769859428697e-05, - "loss": 0.5895, + "epoch": 0.6728239574587181, + "grad_norm": 0.22776773701161734, + "learning_rate": 9.193254820398716e-05, + "loss": 0.5702, "step": 2404 }, { - "epoch": 0.3365990202939118, - "grad_norm": 0.4089670782754292, - "learning_rate": 3.866821050795859e-05, - "loss": 0.614, + "epoch": 0.6731038343129023, + "grad_norm": 0.2361587046478995, + "learning_rate": 9.19241443189986e-05, + "loss": 0.5393, "step": 2405 }, { - "epoch": 0.33673897830650806, - "grad_norm": 0.5537632601572994, - "learning_rate": 3.865871961267427e-05, - "loss": 0.5969, + "epoch": 0.6733837111670865, + "grad_norm": 0.21742465711363043, + "learning_rate": 9.191573644364653e-05, + "loss": 0.5233, "step": 2406 }, { - "epoch": 0.33687893631910426, - "grad_norm": 0.4149474055553234, - "learning_rate": 3.864922591038448e-05, - "loss": 0.5521, + "epoch": 0.6736635880212707, + "grad_norm": 0.21674312156195077, + "learning_rate": 9.190732457873119e-05, + "loss": 0.5694, "step": 2407 }, { - "epoch": 0.3370188943317005, - "grad_norm": 0.426552813649065, - "learning_rate": 3.863972940304028e-05, - "loss": 0.6032, + "epoch": 0.6739434648754548, + "grad_norm": 0.2313842918322785, + "learning_rate": 9.189890872505325e-05, + "loss": 0.5525, "step": 2408 }, { - "epoch": 0.3371588523442967, - "grad_norm": 0.41223595995661894, - "learning_rate": 3.863023009259329e-05, - "loss": 0.5785, + "epoch": 0.6742233417296389, + "grad_norm": 0.2203117668983562, + "learning_rate": 9.189048888341372e-05, + "loss": 0.5261, "step": 2409 }, { - "epoch": 0.3372988103568929, - "grad_norm": 0.4024674567365589, - "learning_rate": 3.8620727980995716e-05, - "loss": 0.5589, + "epoch": 0.6745032185838231, + "grad_norm": 0.22571547951266857, + "learning_rate": 9.188206505461401e-05, + "loss": 0.5293, "step": 2410 }, { - "epoch": 0.33743876836948916, - "grad_norm": 0.41476951241547977, - "learning_rate": 3.861122307020034e-05, - "loss": 0.5648, + "epoch": 0.6747830954380073, + "grad_norm": 0.22703033150078905, + "learning_rate": 9.187363723945591e-05, + "loss": 0.5309, "step": 2411 }, { - "epoch": 0.33757872638208536, - "grad_norm": 0.4024211020152443, - "learning_rate": 3.860171536216052e-05, - "loss": 0.5569, + "epoch": 0.6750629722921915, + "grad_norm": 0.23310980729189318, + "learning_rate": 9.186520543874157e-05, + "loss": 0.542, "step": 2412 }, { - "epoch": 0.3377186843946816, - "grad_norm": 0.41785686679485745, - "learning_rate": 3.8592204858830175e-05, - "loss": 0.6093, + "epoch": 0.6753428491463755, + "grad_norm": 0.22296474878704078, + "learning_rate": 9.185676965327356e-05, + "loss": 0.5458, "step": 2413 }, { - "epoch": 0.3378586424072778, - "grad_norm": 0.41460218899468515, - "learning_rate": 3.858269156216383e-05, - "loss": 0.5795, + "epoch": 0.6756227260005597, + "grad_norm": 0.2304910791578109, + "learning_rate": 9.184832988385478e-05, + "loss": 0.5719, "step": 2414 }, { - "epoch": 0.33799860041987406, - "grad_norm": 0.40446354182657146, - "learning_rate": 3.8573175474116545e-05, - "loss": 0.549, + "epoch": 0.6759026028547439, + "grad_norm": 0.22024656460652256, + "learning_rate": 9.183988613128853e-05, + "loss": 0.5375, "step": 2415 }, { - "epoch": 0.33813855843247026, - "grad_norm": 0.4037463725904796, - "learning_rate": 3.856365659664399e-05, - "loss": 0.5356, + "epoch": 0.6761824797089281, + "grad_norm": 0.22196935076754826, + "learning_rate": 9.183143839637851e-05, + "loss": 0.5274, "step": 2416 }, { - "epoch": 0.33827851644506646, - "grad_norm": 0.4182076163889934, - "learning_rate": 3.855413493170237e-05, - "loss": 0.6045, + "epoch": 0.6764623565631123, + "grad_norm": 0.22362608921611213, + "learning_rate": 9.182298667992876e-05, + "loss": 0.532, "step": 2417 }, { - "epoch": 0.3384184744576627, - "grad_norm": 0.42186938322484663, - "learning_rate": 3.85446104812485e-05, - "loss": 0.6155, + "epoch": 0.6767422334172963, + "grad_norm": 0.2372497581911805, + "learning_rate": 9.181453098274373e-05, + "loss": 0.536, "step": 2418 }, { - "epoch": 0.3385584324702589, - "grad_norm": 0.41470003229003616, - "learning_rate": 3.853508324723976e-05, - "loss": 0.5701, + "epoch": 0.6770221102714805, + "grad_norm": 0.22115460519012234, + "learning_rate": 9.180607130562825e-05, + "loss": 0.5245, "step": 2419 }, { - "epoch": 0.33869839048285516, - "grad_norm": 0.41867300233582005, - "learning_rate": 3.852555323163406e-05, - "loss": 0.5762, + "epoch": 0.6773019871256647, + "grad_norm": 0.22168503602089848, + "learning_rate": 9.17976076493875e-05, + "loss": 0.5236, "step": 2420 }, { - "epoch": 0.33883834849545136, - "grad_norm": 0.43083623709965385, - "learning_rate": 3.851602043638994e-05, - "loss": 0.5928, + "epoch": 0.6775818639798489, + "grad_norm": 0.23592879638708428, + "learning_rate": 9.178914001482704e-05, + "loss": 0.5702, "step": 2421 }, { - "epoch": 0.3389783065080476, - "grad_norm": 0.4112941630964681, - "learning_rate": 3.850648486346649e-05, - "loss": 0.5671, + "epoch": 0.677861740834033, + "grad_norm": 0.22994135134672122, + "learning_rate": 9.178066840275286e-05, + "loss": 0.5621, "step": 2422 }, { - "epoch": 0.3391182645206438, - "grad_norm": 0.4081030227054361, - "learning_rate": 3.849694651482335e-05, - "loss": 0.5662, + "epoch": 0.6781416176882172, + "grad_norm": 0.2252303515878882, + "learning_rate": 9.177219281397127e-05, + "loss": 0.5861, "step": 2423 }, { - "epoch": 0.33925822253324, - "grad_norm": 0.41990746427748654, - "learning_rate": 3.848740539242075e-05, - "loss": 0.6186, + "epoch": 0.6784214945424013, + "grad_norm": 0.21099309134814076, + "learning_rate": 9.176371324928899e-05, + "loss": 0.5287, "step": 2424 }, { - "epoch": 0.33939818054583626, - "grad_norm": 0.41509855881507357, - "learning_rate": 3.84778614982195e-05, - "loss": 0.5777, + "epoch": 0.6787013713965855, + "grad_norm": 0.21214908924130665, + "learning_rate": 9.175522970951311e-05, + "loss": 0.5476, "step": 2425 }, { - "epoch": 0.33953813855843246, - "grad_norm": 0.41529612645730785, - "learning_rate": 3.8468314834180954e-05, - "loss": 0.5834, + "epoch": 0.6789812482507697, + "grad_norm": 0.20941477744330658, + "learning_rate": 9.174674219545108e-05, + "loss": 0.5493, "step": 2426 }, { - "epoch": 0.3396780965710287, - "grad_norm": 0.4612886169509395, - "learning_rate": 3.845876540226706e-05, - "loss": 0.5898, + "epoch": 0.6792611251049538, + "grad_norm": 0.22236387650826203, + "learning_rate": 9.173825070791078e-05, + "loss": 0.5412, "step": 2427 }, { - "epoch": 0.3398180545836249, - "grad_norm": 0.40004450008966375, - "learning_rate": 3.844921320444031e-05, - "loss": 0.5677, + "epoch": 0.679541001959138, + "grad_norm": 0.2227943379605569, + "learning_rate": 9.17297552477004e-05, + "loss": 0.5347, "step": 2428 }, { - "epoch": 0.33995801259622116, - "grad_norm": 0.42665920823328024, - "learning_rate": 3.843965824266379e-05, - "loss": 0.5643, + "epoch": 0.6798208788133221, + "grad_norm": 0.233040637554221, + "learning_rate": 9.172125581562857e-05, + "loss": 0.5218, "step": 2429 }, { - "epoch": 0.34009797060881736, - "grad_norm": 0.4124069827888389, - "learning_rate": 3.843010051890114e-05, - "loss": 0.6075, + "epoch": 0.6801007556675063, + "grad_norm": 0.2155492753595792, + "learning_rate": 9.171275241250426e-05, + "loss": 0.5401, "step": 2430 }, { - "epoch": 0.34023792862141355, - "grad_norm": 0.41303867930204274, - "learning_rate": 3.842054003511656e-05, - "loss": 0.5811, + "epoch": 0.6803806325216905, + "grad_norm": 0.22573562829812002, + "learning_rate": 9.170424503913681e-05, + "loss": 0.5192, "step": 2431 }, { - "epoch": 0.3403778866340098, - "grad_norm": 0.4196697445658453, - "learning_rate": 3.841097679327483e-05, - "loss": 0.5525, + "epoch": 0.6806605093758746, + "grad_norm": 0.22200806427149247, + "learning_rate": 9.1695733696336e-05, + "loss": 0.532, "step": 2432 }, { - "epoch": 0.340517844646606, - "grad_norm": 0.4360442351288732, - "learning_rate": 3.840141079534131e-05, - "loss": 0.6244, + "epoch": 0.6809403862300588, + "grad_norm": 0.2491249646322403, + "learning_rate": 9.16872183849119e-05, + "loss": 0.5203, "step": 2433 }, { - "epoch": 0.34065780265920226, - "grad_norm": 0.39392921636528555, - "learning_rate": 3.839184204328188e-05, - "loss": 0.5494, + "epoch": 0.6812202630842429, + "grad_norm": 0.23987168728778543, + "learning_rate": 9.167869910567501e-05, + "loss": 0.5492, "step": 2434 }, { - "epoch": 0.34079776067179846, - "grad_norm": 0.39649367381567424, - "learning_rate": 3.8382270539063035e-05, - "loss": 0.5741, + "epoch": 0.6815001399384271, + "grad_norm": 0.22391714684958228, + "learning_rate": 9.167017585943623e-05, + "loss": 0.5354, "step": 2435 }, { - "epoch": 0.3409377186843947, - "grad_norm": 0.4170815607651056, - "learning_rate": 3.8372696284651814e-05, - "loss": 0.5759, + "epoch": 0.6817800167926112, + "grad_norm": 0.2350819672659733, + "learning_rate": 9.16616486470068e-05, + "loss": 0.539, "step": 2436 }, { - "epoch": 0.3410776766969909, - "grad_norm": 0.3997155811593266, - "learning_rate": 3.8363119282015816e-05, - "loss": 0.5702, + "epoch": 0.6820598936467954, + "grad_norm": 0.24275550843007201, + "learning_rate": 9.165311746919831e-05, + "loss": 0.5498, "step": 2437 }, { - "epoch": 0.3412176347095871, - "grad_norm": 0.4282009363533291, - "learning_rate": 3.835353953312322e-05, - "loss": 0.5891, + "epoch": 0.6823397705009796, + "grad_norm": 0.21304284629654555, + "learning_rate": 9.164458232682278e-05, + "loss": 0.542, "step": 2438 }, { - "epoch": 0.34135759272218336, - "grad_norm": 0.41921744383670845, - "learning_rate": 3.834395703994276e-05, - "loss": 0.5803, + "epoch": 0.6826196473551638, + "grad_norm": 0.21884367177244404, + "learning_rate": 9.163604322069261e-05, + "loss": 0.5601, "step": 2439 }, { - "epoch": 0.34149755073477955, - "grad_norm": 0.40474108379603385, - "learning_rate": 3.833437180444373e-05, - "loss": 0.6013, + "epoch": 0.6828995242093479, + "grad_norm": 0.22410444834241158, + "learning_rate": 9.162750015162054e-05, + "loss": 0.5368, "step": 2440 }, { - "epoch": 0.3416375087473758, - "grad_norm": 0.42511536225444024, - "learning_rate": 3.8324783828596e-05, - "loss": 0.5705, + "epoch": 0.683179401063532, + "grad_norm": 0.2271307266049798, + "learning_rate": 9.161895312041971e-05, + "loss": 0.5675, "step": 2441 }, { - "epoch": 0.341777466759972, - "grad_norm": 0.3949585768953575, - "learning_rate": 3.8315193114369996e-05, - "loss": 0.5617, + "epoch": 0.6834592779177162, + "grad_norm": 0.22202818191834683, + "learning_rate": 9.161040212790362e-05, + "loss": 0.5421, "step": 2442 }, { - "epoch": 0.34191742477256826, - "grad_norm": 0.39765016272736614, - "learning_rate": 3.830559966373671e-05, - "loss": 0.5164, + "epoch": 0.6837391547719004, + "grad_norm": 0.22530864987073587, + "learning_rate": 9.160184717488616e-05, + "loss": 0.5494, "step": 2443 }, { - "epoch": 0.34205738278516445, - "grad_norm": 0.3878551915452198, - "learning_rate": 3.829600347866768e-05, - "loss": 0.5661, + "epoch": 0.6840190316260846, + "grad_norm": 0.2260749470138487, + "learning_rate": 9.159328826218161e-05, + "loss": 0.5501, "step": 2444 }, { - "epoch": 0.34219734079776065, - "grad_norm": 0.4346559633685042, - "learning_rate": 3.828640456113504e-05, - "loss": 0.6013, + "epoch": 0.6842989084802686, + "grad_norm": 0.24187502091301433, + "learning_rate": 9.158472539060458e-05, + "loss": 0.5322, "step": 2445 }, { - "epoch": 0.3423372988103569, - "grad_norm": 0.4218764947636453, - "learning_rate": 3.827680291311143e-05, - "loss": 0.5684, + "epoch": 0.6845787853344528, + "grad_norm": 0.23046316147105173, + "learning_rate": 9.157615856097015e-05, + "loss": 0.5389, "step": 2446 }, { - "epoch": 0.3424772568229531, - "grad_norm": 0.40065712577833346, - "learning_rate": 3.8267198536570123e-05, - "loss": 0.5313, + "epoch": 0.684858662188637, + "grad_norm": 0.227924703026395, + "learning_rate": 9.156758777409364e-05, + "loss": 0.5541, "step": 2447 }, { - "epoch": 0.34261721483554936, - "grad_norm": 0.4050494830959936, - "learning_rate": 3.8257591433484906e-05, - "loss": 0.5632, + "epoch": 0.6851385390428212, + "grad_norm": 0.21384638121163238, + "learning_rate": 9.155901303079089e-05, + "loss": 0.5227, "step": 2448 }, { - "epoch": 0.34275717284814555, - "grad_norm": 0.41579240423995417, - "learning_rate": 3.824798160583012e-05, - "loss": 0.5719, + "epoch": 0.6854184158970054, + "grad_norm": 0.2258287798205943, + "learning_rate": 9.1550434331878e-05, + "loss": 0.5399, "step": 2449 }, { - "epoch": 0.34289713086074175, - "grad_norm": 0.41342697420725416, - "learning_rate": 3.823836905558071e-05, - "loss": 0.5825, + "epoch": 0.6856982927511894, + "grad_norm": 0.2289440302608055, + "learning_rate": 9.15418516781715e-05, + "loss": 0.5713, "step": 2450 }, { - "epoch": 0.343037088873338, - "grad_norm": 0.4242841636188481, - "learning_rate": 3.8228753784712126e-05, - "loss": 0.5917, + "epoch": 0.6859781696053736, + "grad_norm": 0.21971883054547414, + "learning_rate": 9.153326507048834e-05, + "loss": 0.5455, "step": 2451 }, { - "epoch": 0.3431770468859342, - "grad_norm": 0.3909799623795335, - "learning_rate": 3.8219135795200417e-05, - "loss": 0.5294, + "epoch": 0.6862580464595578, + "grad_norm": 0.2111801448115332, + "learning_rate": 9.152467450964574e-05, + "loss": 0.5295, "step": 2452 }, { - "epoch": 0.34331700489853045, - "grad_norm": 0.4191631316056565, - "learning_rate": 3.8209515089022175e-05, - "loss": 0.6145, + "epoch": 0.686537923313742, + "grad_norm": 0.2245461783271496, + "learning_rate": 9.15160799964614e-05, + "loss": 0.5478, "step": 2453 }, { - "epoch": 0.34345696291112665, - "grad_norm": 0.41444745056939064, - "learning_rate": 3.819989166815455e-05, - "loss": 0.5448, + "epoch": 0.6868178001679262, + "grad_norm": 0.22719775795417232, + "learning_rate": 9.15074815317533e-05, + "loss": 0.5364, "step": 2454 }, { - "epoch": 0.3435969209237229, - "grad_norm": 0.41319072983975424, - "learning_rate": 3.8190265534575256e-05, - "loss": 0.5493, + "epoch": 0.6870976770221102, + "grad_norm": 0.22141104365177822, + "learning_rate": 9.149887911633989e-05, + "loss": 0.508, "step": 2455 }, { - "epoch": 0.3437368789363191, - "grad_norm": 0.4144210912873605, - "learning_rate": 3.818063669026256e-05, - "loss": 0.5699, + "epoch": 0.6873775538762944, + "grad_norm": 0.22725076881125048, + "learning_rate": 9.149027275103993e-05, + "loss": 0.5376, "step": 2456 }, { - "epoch": 0.3438768369489153, - "grad_norm": 0.43371658028513654, - "learning_rate": 3.817100513719529e-05, - "loss": 0.5994, + "epoch": 0.6876574307304786, + "grad_norm": 0.21874077073861875, + "learning_rate": 9.14816624366726e-05, + "loss": 0.5518, "step": 2457 }, { - "epoch": 0.34401679496151155, - "grad_norm": 0.39358381227775163, - "learning_rate": 3.8161370877352825e-05, - "loss": 0.5772, + "epoch": 0.6879373075846628, + "grad_norm": 0.22733593578986663, + "learning_rate": 9.14730481740574e-05, + "loss": 0.5476, "step": 2458 }, { - "epoch": 0.34415675297410775, - "grad_norm": 0.4524806967713819, - "learning_rate": 3.815173391271511e-05, - "loss": 0.5801, + "epoch": 0.6882171844388469, + "grad_norm": 0.22468116668619806, + "learning_rate": 9.146442996401428e-05, + "loss": 0.5526, "step": 2459 }, { - "epoch": 0.344296710986704, - "grad_norm": 0.42248629141745564, - "learning_rate": 3.814209424526262e-05, - "loss": 0.5344, + "epoch": 0.6884970612930311, + "grad_norm": 0.22269294666763426, + "learning_rate": 9.145580780736349e-05, + "loss": 0.5117, "step": 2460 }, { - "epoch": 0.3444366689993002, - "grad_norm": 0.4222729879954454, - "learning_rate": 3.813245187697643e-05, - "loss": 0.6305, + "epoch": 0.6887769381472152, + "grad_norm": 0.23002433824113408, + "learning_rate": 9.144718170492572e-05, + "loss": 0.5444, "step": 2461 }, { - "epoch": 0.34457662701189645, - "grad_norm": 0.40274756440667137, - "learning_rate": 3.812280680983812e-05, - "loss": 0.5587, + "epoch": 0.6890568150013994, + "grad_norm": 0.2314978853312327, + "learning_rate": 9.143855165752199e-05, + "loss": 0.5357, "step": 2462 }, { - "epoch": 0.34471658502449265, - "grad_norm": 0.4041971062889942, - "learning_rate": 3.8113159045829864e-05, - "loss": 0.5837, + "epoch": 0.6893366918555835, + "grad_norm": 0.21741171624768588, + "learning_rate": 9.142991766597374e-05, + "loss": 0.5301, "step": 2463 }, { - "epoch": 0.34485654303708885, - "grad_norm": 0.437567215932465, - "learning_rate": 3.8103508586934365e-05, - "loss": 0.6047, + "epoch": 0.6896165687097677, + "grad_norm": 0.23379671587908718, + "learning_rate": 9.142127973110273e-05, + "loss": 0.5439, "step": 2464 }, { - "epoch": 0.3449965010496851, - "grad_norm": 0.4015334832237547, - "learning_rate": 3.8093855435134914e-05, - "loss": 0.5504, + "epoch": 0.6898964455639519, + "grad_norm": 0.2277896226934478, + "learning_rate": 9.141263785373111e-05, + "loss": 0.5667, "step": 2465 }, { - "epoch": 0.3451364590622813, - "grad_norm": 0.42379890127600006, - "learning_rate": 3.8084199592415305e-05, - "loss": 0.5723, + "epoch": 0.690176322418136, + "grad_norm": 0.2376465386077255, + "learning_rate": 9.140399203468146e-05, + "loss": 0.5599, "step": 2466 }, { - "epoch": 0.34527641707487755, - "grad_norm": 0.41715521655535315, - "learning_rate": 3.8074541060759925e-05, - "loss": 0.5857, + "epoch": 0.6904561992723202, + "grad_norm": 0.2383134382595478, + "learning_rate": 9.139534227477667e-05, + "loss": 0.5625, "step": 2467 }, { - "epoch": 0.34541637508747375, - "grad_norm": 0.43326783152483606, - "learning_rate": 3.806487984215369e-05, - "loss": 0.5603, + "epoch": 0.6907360761265043, + "grad_norm": 0.2195144626812075, + "learning_rate": 9.138668857484003e-05, + "loss": 0.5384, "step": 2468 }, { - "epoch": 0.34555633310007, - "grad_norm": 0.403241790805183, - "learning_rate": 3.8055215938582086e-05, - "loss": 0.5727, + "epoch": 0.6910159529806885, + "grad_norm": 0.217829592785949, + "learning_rate": 9.137803093569519e-05, + "loss": 0.5543, "step": 2469 }, { - "epoch": 0.3456962911126662, - "grad_norm": 0.4061061612551589, - "learning_rate": 3.804554935203115e-05, - "loss": 0.5846, + "epoch": 0.6912958298348727, + "grad_norm": 0.23624278846074326, + "learning_rate": 9.136936935816623e-05, + "loss": 0.5423, "step": 2470 }, { - "epoch": 0.3458362491252624, - "grad_norm": 0.40214409369497894, - "learning_rate": 3.803588008448745e-05, - "loss": 0.5655, + "epoch": 0.6915757066890568, + "grad_norm": 0.2186568174685527, + "learning_rate": 9.136070384307754e-05, + "loss": 0.544, "step": 2471 }, { - "epoch": 0.34597620713785865, - "grad_norm": 0.4213923914769129, - "learning_rate": 3.802620813793814e-05, - "loss": 0.5632, + "epoch": 0.691855583543241, + "grad_norm": 0.2175408840074656, + "learning_rate": 9.135203439125389e-05, + "loss": 0.5399, "step": 2472 }, { - "epoch": 0.34611616515045485, - "grad_norm": 0.41515367784081497, - "learning_rate": 3.801653351437087e-05, - "loss": 0.562, + "epoch": 0.6921354603974251, + "grad_norm": 0.22644551440211397, + "learning_rate": 9.134336100352046e-05, + "loss": 0.5688, "step": 2473 }, { - "epoch": 0.3462561231630511, - "grad_norm": 0.424690045157795, - "learning_rate": 3.8006856215773895e-05, - "loss": 0.5889, + "epoch": 0.6924153372516093, + "grad_norm": 0.21841015771728386, + "learning_rate": 9.13346836807028e-05, + "loss": 0.5554, "step": 2474 }, { - "epoch": 0.3463960811756473, - "grad_norm": 0.43537881056181427, - "learning_rate": 3.799717624413599e-05, - "loss": 0.6304, + "epoch": 0.6926952141057935, + "grad_norm": 0.2198257691403368, + "learning_rate": 9.132600242362681e-05, + "loss": 0.5388, "step": 2475 }, { - "epoch": 0.34653603918824355, - "grad_norm": 0.40770798716655837, - "learning_rate": 3.798749360144651e-05, - "loss": 0.5743, + "epoch": 0.6929750909599776, + "grad_norm": 0.22003184324424363, + "learning_rate": 9.131731723311877e-05, + "loss": 0.5337, "step": 2476 }, { - "epoch": 0.34667599720083975, - "grad_norm": 0.42860038780017473, - "learning_rate": 3.7977808289695306e-05, - "loss": 0.6029, + "epoch": 0.6932549678141617, + "grad_norm": 0.22790725907279172, + "learning_rate": 9.130862811000536e-05, + "loss": 0.5426, "step": 2477 }, { - "epoch": 0.34681595521343594, - "grad_norm": 0.41741937935524637, - "learning_rate": 3.796812031087281e-05, - "loss": 0.5745, + "epoch": 0.6935348446683459, + "grad_norm": 0.2216557770186376, + "learning_rate": 9.12999350551136e-05, + "loss": 0.5322, "step": 2478 }, { - "epoch": 0.3469559132260322, - "grad_norm": 0.4169469655781413, - "learning_rate": 3.7958429666970024e-05, - "loss": 0.5932, + "epoch": 0.6938147215225301, + "grad_norm": 0.2387747292363135, + "learning_rate": 9.129123806927088e-05, + "loss": 0.5353, "step": 2479 }, { - "epoch": 0.3470958712386284, - "grad_norm": 0.41794717504089257, - "learning_rate": 3.7948736359978455e-05, - "loss": 0.5957, + "epoch": 0.6940945983767143, + "grad_norm": 0.23886840537708795, + "learning_rate": 9.128253715330506e-05, + "loss": 0.5813, "step": 2480 }, { - "epoch": 0.34723582925122465, - "grad_norm": 0.42237578550439164, - "learning_rate": 3.793904039189018e-05, - "loss": 0.5809, + "epoch": 0.6943744752308985, + "grad_norm": 0.21426295665075762, + "learning_rate": 9.127383230804421e-05, + "loss": 0.5224, "step": 2481 }, { - "epoch": 0.34737578726382085, - "grad_norm": 0.42457946983643247, - "learning_rate": 3.7929341764697816e-05, - "loss": 0.5741, + "epoch": 0.6946543520850825, + "grad_norm": 0.22804583074574128, + "learning_rate": 9.12651235343169e-05, + "loss": 0.5586, "step": 2482 }, { - "epoch": 0.3475157452764171, - "grad_norm": 0.40686808588079204, - "learning_rate": 3.791964048039454e-05, - "loss": 0.577, + "epoch": 0.6949342289392667, + "grad_norm": 0.22531355170150924, + "learning_rate": 9.125641083295206e-05, + "loss": 0.519, "step": 2483 }, { - "epoch": 0.3476557032890133, - "grad_norm": 0.3917392399092806, - "learning_rate": 3.790993654097405e-05, - "loss": 0.5389, + "epoch": 0.6952141057934509, + "grad_norm": 0.22369900699564935, + "learning_rate": 9.124769420477892e-05, + "loss": 0.5345, "step": 2484 }, { - "epoch": 0.3477956613016095, - "grad_norm": 0.41842414036596237, - "learning_rate": 3.790022994843061e-05, - "loss": 0.6111, + "epoch": 0.6954939826476351, + "grad_norm": 0.21667888869213964, + "learning_rate": 9.123897365062717e-05, + "loss": 0.5139, "step": 2485 }, { - "epoch": 0.34793561931420575, - "grad_norm": 0.40264506155704255, - "learning_rate": 3.789052070475903e-05, - "loss": 0.5691, + "epoch": 0.6957738595018192, + "grad_norm": 0.22037004183119002, + "learning_rate": 9.123024917132683e-05, + "loss": 0.5619, "step": 2486 }, { - "epoch": 0.34807557732680194, - "grad_norm": 0.41841559375670284, - "learning_rate": 3.788080881195465e-05, - "loss": 0.5796, + "epoch": 0.6960537363560033, + "grad_norm": 0.23529833658505683, + "learning_rate": 9.12215207677083e-05, + "loss": 0.5538, "step": 2487 }, { - "epoch": 0.3482155353393982, - "grad_norm": 0.42238316240978235, - "learning_rate": 3.787109427201337e-05, - "loss": 0.5721, + "epoch": 0.6963336132101875, + "grad_norm": 0.22814892384509067, + "learning_rate": 9.121278844060232e-05, + "loss": 0.5442, "step": 2488 }, { - "epoch": 0.3483554933519944, - "grad_norm": 0.4261343625599289, - "learning_rate": 3.7861377086931615e-05, - "loss": 0.5762, + "epoch": 0.6966134900643717, + "grad_norm": 0.210286542902692, + "learning_rate": 9.12040521908401e-05, + "loss": 0.5436, "step": 2489 }, { - "epoch": 0.34849545136459065, - "grad_norm": 0.40050526138599146, - "learning_rate": 3.785165725870638e-05, - "loss": 0.5771, + "epoch": 0.6968933669185559, + "grad_norm": 0.22496438800070656, + "learning_rate": 9.119531201925311e-05, + "loss": 0.5618, "step": 2490 }, { - "epoch": 0.34863540937718684, - "grad_norm": 0.4011183639320898, - "learning_rate": 3.7841934789335164e-05, - "loss": 0.5722, + "epoch": 0.69717324377274, + "grad_norm": 0.2301749121128698, + "learning_rate": 9.118656792667328e-05, + "loss": 0.5211, "step": 2491 }, { - "epoch": 0.34877536738978304, - "grad_norm": 0.41515525438995227, - "learning_rate": 3.783220968081606e-05, - "loss": 0.5863, + "epoch": 0.6974531206269241, + "grad_norm": 0.22979032730530666, + "learning_rate": 9.117781991393283e-05, + "loss": 0.5385, "step": 2492 }, { - "epoch": 0.3489153254023793, - "grad_norm": 0.3930742764120805, - "learning_rate": 3.782248193514766e-05, - "loss": 0.531, + "epoch": 0.6977329974811083, + "grad_norm": 0.2263644099605242, + "learning_rate": 9.116906798186448e-05, + "loss": 0.5756, "step": 2493 }, { - "epoch": 0.3490552834149755, - "grad_norm": 0.41871895607441606, - "learning_rate": 3.7812751554329116e-05, - "loss": 0.5925, + "epoch": 0.6980128743352925, + "grad_norm": 0.2369073042879937, + "learning_rate": 9.116031213130117e-05, + "loss": 0.5721, "step": 2494 }, { - "epoch": 0.34919524142757175, - "grad_norm": 0.39386200492237877, - "learning_rate": 3.780301854036013e-05, - "loss": 0.6283, + "epoch": 0.6982927511894766, + "grad_norm": 0.22793661358143325, + "learning_rate": 9.115155236307631e-05, + "loss": 0.5267, "step": 2495 }, { - "epoch": 0.34933519944016794, - "grad_norm": 0.4562215128945137, - "learning_rate": 3.7793282895240926e-05, - "loss": 0.6435, + "epoch": 0.6985726280436608, + "grad_norm": 0.22566745591681991, + "learning_rate": 9.114278867802366e-05, + "loss": 0.5371, "step": 2496 }, { - "epoch": 0.3494751574527642, - "grad_norm": 0.40380935634400195, - "learning_rate": 3.778354462097229e-05, - "loss": 0.5874, + "epoch": 0.6988525048978449, + "grad_norm": 0.22738247065862394, + "learning_rate": 9.113402107697737e-05, + "loss": 0.5425, "step": 2497 }, { - "epoch": 0.3496151154653604, - "grad_norm": 0.37718467161463654, - "learning_rate": 3.7773803719555514e-05, - "loss": 0.5451, + "epoch": 0.6991323817520291, + "grad_norm": 0.23734515453707278, + "learning_rate": 9.112524956077193e-05, + "loss": 0.5499, "step": 2498 }, { - "epoch": 0.3497550734779566, - "grad_norm": 0.49862732423279105, - "learning_rate": 3.776406019299247e-05, - "loss": 0.566, + "epoch": 0.6994122586062133, + "grad_norm": 0.2544658923189295, + "learning_rate": 9.11164741302422e-05, + "loss": 0.5436, "step": 2499 }, { - "epoch": 0.34989503149055284, - "grad_norm": 0.3890315425799742, - "learning_rate": 3.7754314043285556e-05, - "loss": 0.541, + "epoch": 0.6996921354603974, + "grad_norm": 0.23105511764419184, + "learning_rate": 9.110769478622346e-05, + "loss": 0.5528, "step": 2500 }, { - "epoch": 0.35003498950314904, - "grad_norm": 0.4020611603166375, - "learning_rate": 3.774456527243768e-05, - "loss": 0.5703, + "epoch": 0.6999720123145816, + "grad_norm": 0.22154709356239966, + "learning_rate": 9.109891152955132e-05, + "loss": 0.5569, "step": 2501 }, { - "epoch": 0.3501749475157453, - "grad_norm": 0.4024001534789896, - "learning_rate": 3.7734813882452334e-05, - "loss": 0.5596, + "epoch": 0.7002518891687658, + "grad_norm": 0.23139612263439707, + "learning_rate": 9.109012436106178e-05, + "loss": 0.5536, "step": 2502 }, { - "epoch": 0.3503149055283415, - "grad_norm": 0.41123367006288974, - "learning_rate": 3.772505987533352e-05, - "loss": 0.5635, + "epoch": 0.7005317660229499, + "grad_norm": 0.22860454581068818, + "learning_rate": 9.108133328159121e-05, + "loss": 0.5557, "step": 2503 }, { - "epoch": 0.35045486354093774, - "grad_norm": 0.42428174810444297, - "learning_rate": 3.771530325308579e-05, - "loss": 0.5814, + "epoch": 0.700811642877134, + "grad_norm": 0.2291406331101458, + "learning_rate": 9.107253829197633e-05, + "loss": 0.5602, "step": 2504 }, { - "epoch": 0.35059482155353394, - "grad_norm": 0.4038080676069653, - "learning_rate": 3.770554401771423e-05, - "loss": 0.5727, + "epoch": 0.7010915197313182, + "grad_norm": 0.22668470166488117, + "learning_rate": 9.106373939305426e-05, + "loss": 0.5402, "step": 2505 }, { - "epoch": 0.35073477956613014, - "grad_norm": 0.3839599878758453, - "learning_rate": 3.769578217122446e-05, - "loss": 0.5834, + "epoch": 0.7013713965855024, + "grad_norm": 0.22880385201909617, + "learning_rate": 9.105493658566251e-05, + "loss": 0.5356, "step": 2506 }, { - "epoch": 0.3508747375787264, - "grad_norm": 0.4158023938010911, - "learning_rate": 3.768601771562262e-05, - "loss": 0.5712, + "epoch": 0.7016512734396866, + "grad_norm": 0.23568857832719925, + "learning_rate": 9.104612987063891e-05, + "loss": 0.5296, "step": 2507 }, { - "epoch": 0.3510146955913226, - "grad_norm": 0.3943326004175278, - "learning_rate": 3.767625065291544e-05, - "loss": 0.568, + "epoch": 0.7019311502938707, + "grad_norm": 0.22098103092045723, + "learning_rate": 9.103731924882169e-05, + "loss": 0.5414, "step": 2508 }, { - "epoch": 0.35115465360391884, - "grad_norm": 0.3964702492091683, - "learning_rate": 3.766648098511012e-05, - "loss": 0.576, + "epoch": 0.7022110271480548, + "grad_norm": 0.22701922252440868, + "learning_rate": 9.102850472104944e-05, + "loss": 0.5541, "step": 2509 }, { - "epoch": 0.35129461161651504, - "grad_norm": 0.3921536448421572, - "learning_rate": 3.765670871421445e-05, - "loss": 0.5209, + "epoch": 0.702490904002239, + "grad_norm": 0.22767011589671496, + "learning_rate": 9.101968628816118e-05, + "loss": 0.5267, "step": 2510 }, { - "epoch": 0.3514345696291113, - "grad_norm": 0.4351764473354882, - "learning_rate": 3.764693384223671e-05, - "loss": 0.5926, + "epoch": 0.7027707808564232, + "grad_norm": 0.2333123777940145, + "learning_rate": 9.10108639509962e-05, + "loss": 0.5327, "step": 2511 }, { - "epoch": 0.3515745276417075, - "grad_norm": 0.40588260989157027, - "learning_rate": 3.763715637118575e-05, - "loss": 0.548, + "epoch": 0.7030506577106074, + "grad_norm": 0.2252300859420707, + "learning_rate": 9.100203771039424e-05, + "loss": 0.5393, "step": 2512 }, { - "epoch": 0.3517144856543037, - "grad_norm": 0.4170639037160172, - "learning_rate": 3.762737630307093e-05, - "loss": 0.5875, + "epoch": 0.7033305345647914, + "grad_norm": 0.22824894530208628, + "learning_rate": 9.099320756719537e-05, + "loss": 0.5271, "step": 2513 }, { - "epoch": 0.35185444366689994, - "grad_norm": 0.4115129851905205, - "learning_rate": 3.761759363990215e-05, - "loss": 0.5462, + "epoch": 0.7036104114189756, + "grad_norm": 0.23138598533223126, + "learning_rate": 9.098437352224006e-05, + "loss": 0.5532, "step": 2514 }, { - "epoch": 0.35199440167949614, - "grad_norm": 0.4147105457172198, - "learning_rate": 3.7607808383689856e-05, - "loss": 0.5463, + "epoch": 0.7038902882731598, + "grad_norm": 0.21853894319361244, + "learning_rate": 9.097553557636916e-05, + "loss": 0.5312, "step": 2515 }, { - "epoch": 0.3521343596920924, - "grad_norm": 0.4071302253511469, - "learning_rate": 3.7598020536445017e-05, - "loss": 0.5445, + "epoch": 0.704170165127344, + "grad_norm": 0.23278694561533653, + "learning_rate": 9.096669373042382e-05, + "loss": 0.563, "step": 2516 }, { - "epoch": 0.3522743177046886, - "grad_norm": 0.39539443956776693, - "learning_rate": 3.758823010017913e-05, - "loss": 0.5455, + "epoch": 0.7044500419815282, + "grad_norm": 0.23425782484952629, + "learning_rate": 9.095784798524566e-05, + "loss": 0.5296, "step": 2517 }, { - "epoch": 0.35241427571728484, - "grad_norm": 0.39791097766830325, - "learning_rate": 3.7578437076904236e-05, - "loss": 0.5258, + "epoch": 0.7047299188357122, + "grad_norm": 0.22751597950416957, + "learning_rate": 9.094899834167658e-05, + "loss": 0.533, "step": 2518 }, { - "epoch": 0.35255423372988104, - "grad_norm": 0.42035164161239397, - "learning_rate": 3.75686414686329e-05, - "loss": 0.5639, + "epoch": 0.7050097956898964, + "grad_norm": 0.23366548542267773, + "learning_rate": 9.094014480055895e-05, + "loss": 0.5286, "step": 2519 }, { - "epoch": 0.35269419174247724, - "grad_norm": 0.3830433028113876, - "learning_rate": 3.7558843277378206e-05, - "loss": 0.536, + "epoch": 0.7052896725440806, + "grad_norm": 0.22636866726907906, + "learning_rate": 9.09312873627354e-05, + "loss": 0.5603, "step": 2520 }, { - "epoch": 0.3528341497550735, - "grad_norm": 0.3933038656978647, - "learning_rate": 3.75490425051538e-05, - "loss": 0.5452, + "epoch": 0.7055695493982648, + "grad_norm": 0.22411400509153592, + "learning_rate": 9.092242602904903e-05, + "loss": 0.5358, "step": 2521 }, { - "epoch": 0.3529741077676697, - "grad_norm": 0.41406652639293745, - "learning_rate": 3.753923915397383e-05, - "loss": 0.5667, + "epoch": 0.705849426252449, + "grad_norm": 0.22564708212854936, + "learning_rate": 9.091356080034322e-05, + "loss": 0.5046, "step": 2522 }, { - "epoch": 0.35311406578026594, - "grad_norm": 0.4201955355989494, - "learning_rate": 3.752943322585297e-05, - "loss": 0.5417, + "epoch": 0.7061293031066331, + "grad_norm": 0.2254724697285052, + "learning_rate": 9.09046916774618e-05, + "loss": 0.5477, "step": 2523 }, { - "epoch": 0.35325402379286214, - "grad_norm": 0.41614348716949034, - "learning_rate": 3.751962472280647e-05, - "loss": 0.5817, + "epoch": 0.7064091799608172, + "grad_norm": 0.2238374914670721, + "learning_rate": 9.089581866124894e-05, + "loss": 0.5622, "step": 2524 }, { - "epoch": 0.35339398180545833, - "grad_norm": 0.40621639691384537, - "learning_rate": 3.750981364685005e-05, - "loss": 0.5815, + "epoch": 0.7066890568150014, + "grad_norm": 0.21942806561676945, + "learning_rate": 9.088694175254916e-05, + "loss": 0.5398, "step": 2525 }, { - "epoch": 0.3535339398180546, - "grad_norm": 0.4007740627071781, - "learning_rate": 3.7500000000000003e-05, - "loss": 0.5635, + "epoch": 0.7069689336691856, + "grad_norm": 0.21567431447044527, + "learning_rate": 9.087806095220739e-05, + "loss": 0.5413, "step": 2526 }, { - "epoch": 0.3536738978306508, - "grad_norm": 0.4088088272074455, - "learning_rate": 3.749018378427312e-05, - "loss": 0.599, + "epoch": 0.7072488105233697, + "grad_norm": 0.22112822696207846, + "learning_rate": 9.086917626106889e-05, + "loss": 0.5362, "step": 2527 }, { - "epoch": 0.35381385584324704, - "grad_norm": 0.4035751382941894, - "learning_rate": 3.7480365001686746e-05, - "loss": 0.601, + "epoch": 0.7075286873775539, + "grad_norm": 0.2230119498609584, + "learning_rate": 9.086028767997932e-05, + "loss": 0.5163, "step": 2528 }, { - "epoch": 0.35395381385584324, - "grad_norm": 0.41808007576821743, - "learning_rate": 3.747054365425872e-05, - "loss": 0.5625, + "epoch": 0.707808564231738, + "grad_norm": 0.22128627734084555, + "learning_rate": 9.08513952097847e-05, + "loss": 0.5543, "step": 2529 }, { - "epoch": 0.3540937718684395, - "grad_norm": 0.4142769655137648, - "learning_rate": 3.7460719744007446e-05, - "loss": 0.5321, + "epoch": 0.7080884410859222, + "grad_norm": 0.2248930989689319, + "learning_rate": 9.08424988513314e-05, + "loss": 0.5584, "step": 2530 }, { - "epoch": 0.3542337298810357, - "grad_norm": 0.4258627449069856, - "learning_rate": 3.745089327295184e-05, - "loss": 0.5915, + "epoch": 0.7083683179401064, + "grad_norm": 0.22292807302428705, + "learning_rate": 9.083359860546622e-05, + "loss": 0.5555, "step": 2531 }, { - "epoch": 0.3543736878936319, - "grad_norm": 0.3979599134535474, - "learning_rate": 3.744106424311133e-05, - "loss": 0.528, + "epoch": 0.7086481947942905, + "grad_norm": 0.2266481146482922, + "learning_rate": 9.082469447303627e-05, + "loss": 0.5508, "step": 2532 }, { - "epoch": 0.35451364590622814, - "grad_norm": 0.40828188091566914, - "learning_rate": 3.7431232656505885e-05, - "loss": 0.584, + "epoch": 0.7089280716484747, + "grad_norm": 0.2236124543070458, + "learning_rate": 9.081578645488904e-05, + "loss": 0.563, "step": 2533 }, { - "epoch": 0.35465360391882433, - "grad_norm": 0.41378663443999314, - "learning_rate": 3.7421398515155994e-05, - "loss": 0.5735, + "epoch": 0.7092079485026588, + "grad_norm": 0.21623042480224322, + "learning_rate": 9.08068745518724e-05, + "loss": 0.5323, "step": 2534 }, { - "epoch": 0.3547935619314206, - "grad_norm": 0.40350752325849487, - "learning_rate": 3.741156182108268e-05, - "loss": 0.5856, + "epoch": 0.709487825356843, + "grad_norm": 0.23072175564283595, + "learning_rate": 9.079795876483462e-05, + "loss": 0.5409, "step": 2535 }, { - "epoch": 0.3549335199440168, - "grad_norm": 0.42065994177021454, - "learning_rate": 3.740172257630747e-05, - "loss": 0.5996, + "epoch": 0.7097677022110271, + "grad_norm": 0.22102417472322713, + "learning_rate": 9.078903909462428e-05, + "loss": 0.5394, "step": 2536 }, { - "epoch": 0.35507347795661304, - "grad_norm": 0.40860097335167056, - "learning_rate": 3.739188078285244e-05, - "loss": 0.6183, + "epoch": 0.7100475790652113, + "grad_norm": 0.21099745557956595, + "learning_rate": 9.078011554209038e-05, + "loss": 0.5345, "step": 2537 }, { - "epoch": 0.35521343596920923, - "grad_norm": 0.4122420177259316, - "learning_rate": 3.738203644274018e-05, - "loss": 0.5549, + "epoch": 0.7103274559193955, + "grad_norm": 0.22400036434514708, + "learning_rate": 9.077118810808225e-05, + "loss": 0.5302, "step": 2538 }, { - "epoch": 0.35535339398180543, - "grad_norm": 0.4181580931065648, - "learning_rate": 3.7372189557993794e-05, - "loss": 0.5945, + "epoch": 0.7106073327735796, + "grad_norm": 0.2095690846545887, + "learning_rate": 9.07622567934496e-05, + "loss": 0.545, "step": 2539 }, { - "epoch": 0.3554933519944017, - "grad_norm": 0.41214229514921485, - "learning_rate": 3.7362340130636925e-05, - "loss": 0.5964, + "epoch": 0.7108872096277637, + "grad_norm": 0.2325850449801102, + "learning_rate": 9.075332159904255e-05, + "loss": 0.5381, "step": 2540 }, { - "epoch": 0.3556333100069979, - "grad_norm": 0.5379104247408596, - "learning_rate": 3.735248816269372e-05, - "loss": 0.5855, + "epoch": 0.7111670864819479, + "grad_norm": 0.22466041408969864, + "learning_rate": 9.074438252571153e-05, + "loss": 0.5594, "step": 2541 }, { - "epoch": 0.35577326801959414, - "grad_norm": 0.4221656282811352, - "learning_rate": 3.734263365618886e-05, - "loss": 0.611, + "epoch": 0.7114469633361321, + "grad_norm": 0.22859958906579383, + "learning_rate": 9.073543957430738e-05, + "loss": 0.5506, "step": 2542 }, { - "epoch": 0.35591322603219033, - "grad_norm": 0.4014247264225883, - "learning_rate": 3.7332776613147564e-05, - "loss": 0.5706, + "epoch": 0.7117268401903163, + "grad_norm": 0.23946774219802966, + "learning_rate": 9.072649274568129e-05, + "loss": 0.5468, "step": 2543 }, { - "epoch": 0.3560531840447866, - "grad_norm": 0.42213996299337353, - "learning_rate": 3.732291703559553e-05, - "loss": 0.5995, + "epoch": 0.7120067170445005, + "grad_norm": 0.22027467570847611, + "learning_rate": 9.071754204068482e-05, + "loss": 0.5259, "step": 2544 }, { - "epoch": 0.3561931420573828, - "grad_norm": 0.41540133704593746, - "learning_rate": 3.731305492555901e-05, - "loss": 0.5574, + "epoch": 0.7122865938986845, + "grad_norm": 0.22514093412641412, + "learning_rate": 9.070858746016992e-05, + "loss": 0.5397, "step": 2545 }, { - "epoch": 0.356333100069979, - "grad_norm": 0.4104571121982501, - "learning_rate": 3.7303190285064776e-05, - "loss": 0.5627, + "epoch": 0.7125664707528687, + "grad_norm": 0.21277314961836397, + "learning_rate": 9.069962900498888e-05, + "loss": 0.5316, "step": 2546 }, { - "epoch": 0.35647305808257523, - "grad_norm": 0.3942675491175812, - "learning_rate": 3.72933231161401e-05, - "loss": 0.558, + "epoch": 0.7128463476070529, + "grad_norm": 0.22209882294945468, + "learning_rate": 9.069066667599434e-05, + "loss": 0.5532, "step": 2547 }, { - "epoch": 0.35661301609517143, - "grad_norm": 0.39670035734631537, - "learning_rate": 3.7283453420812786e-05, - "loss": 0.5475, + "epoch": 0.7131262244612371, + "grad_norm": 0.20645453081914583, + "learning_rate": 9.068170047403942e-05, + "loss": 0.5144, "step": 2548 }, { - "epoch": 0.3567529741077677, - "grad_norm": 0.42150223375815987, - "learning_rate": 3.727358120111117e-05, - "loss": 0.5721, + "epoch": 0.7134061013154213, + "grad_norm": 0.22658946845370995, + "learning_rate": 9.067273039997744e-05, + "loss": 0.5497, "step": 2549 }, { - "epoch": 0.3568929321203639, - "grad_norm": 0.4008262706510209, - "learning_rate": 3.726370645906407e-05, - "loss": 0.5561, + "epoch": 0.7136859781696053, + "grad_norm": 0.23185946591160245, + "learning_rate": 9.066375645466222e-05, + "loss": 0.5608, "step": 2550 }, { - "epoch": 0.35703289013296013, - "grad_norm": 0.44040584835645996, - "learning_rate": 3.7253829196700876e-05, - "loss": 0.5846, + "epoch": 0.7139658550237895, + "grad_norm": 0.2201375694693047, + "learning_rate": 9.065477863894792e-05, + "loss": 0.5324, "step": 2551 }, { - "epoch": 0.35717284814555633, - "grad_norm": 0.4126914987733967, - "learning_rate": 3.7243949416051435e-05, - "loss": 0.5469, + "epoch": 0.7142457318779737, + "grad_norm": 0.22507634091058842, + "learning_rate": 9.064579695368902e-05, + "loss": 0.5497, "step": 2552 }, { - "epoch": 0.35731280615815253, - "grad_norm": 0.39603138357670026, - "learning_rate": 3.723406711914617e-05, - "loss": 0.5942, + "epoch": 0.7145256087321579, + "grad_norm": 0.22208298299269505, + "learning_rate": 9.063681139974041e-05, + "loss": 0.5323, "step": 2553 }, { - "epoch": 0.3574527641707488, - "grad_norm": 0.3852892928635738, - "learning_rate": 3.7224182308015975e-05, - "loss": 0.5928, + "epoch": 0.714805485586342, + "grad_norm": 0.23861968693150126, + "learning_rate": 9.062782197795736e-05, + "loss": 0.5679, "step": 2554 }, { - "epoch": 0.357592722183345, - "grad_norm": 0.41121367387986824, - "learning_rate": 3.7214294984692285e-05, - "loss": 0.5853, + "epoch": 0.7150853624405261, + "grad_norm": 0.23821003166266916, + "learning_rate": 9.061882868919546e-05, + "loss": 0.5309, "step": 2555 }, { - "epoch": 0.35773268019594123, - "grad_norm": 0.4165573669555882, - "learning_rate": 3.7204405151207036e-05, - "loss": 0.5897, + "epoch": 0.7153652392947103, + "grad_norm": 0.2245213474819366, + "learning_rate": 9.060983153431073e-05, + "loss": 0.5348, "step": 2556 }, { - "epoch": 0.35787263820853743, - "grad_norm": 0.45676174939260117, - "learning_rate": 3.71945128095927e-05, - "loss": 0.5853, + "epoch": 0.7156451161488945, + "grad_norm": 0.2285669613279405, + "learning_rate": 9.060083051415949e-05, + "loss": 0.545, "step": 2557 }, { - "epoch": 0.3580125962211337, - "grad_norm": 0.39957833133810244, - "learning_rate": 3.718461796188225e-05, - "loss": 0.5287, + "epoch": 0.7159249930030787, + "grad_norm": 0.22544893736292657, + "learning_rate": 9.059182562959848e-05, + "loss": 0.5533, "step": 2558 }, { - "epoch": 0.3581525542337299, - "grad_norm": 0.4145657616919604, - "learning_rate": 3.717472061010918e-05, - "loss": 0.593, + "epoch": 0.7162048698572628, + "grad_norm": 0.22679876936910812, + "learning_rate": 9.058281688148477e-05, + "loss": 0.5406, "step": 2559 }, { - "epoch": 0.3582925122463261, - "grad_norm": 0.4080507721332056, - "learning_rate": 3.71648207563075e-05, - "loss": 0.5634, + "epoch": 0.716484746711447, + "grad_norm": 0.21540258409652902, + "learning_rate": 9.057380427067584e-05, + "loss": 0.5388, "step": 2560 }, { - "epoch": 0.35843247025892233, - "grad_norm": 0.3912915057782903, - "learning_rate": 3.715491840251172e-05, - "loss": 0.5949, + "epoch": 0.7167646235656311, + "grad_norm": 0.2318314054357572, + "learning_rate": 9.056478779802952e-05, + "loss": 0.5612, "step": 2561 }, { - "epoch": 0.35857242827151853, - "grad_norm": 0.41837049449187164, - "learning_rate": 3.714501355075688e-05, - "loss": 0.572, + "epoch": 0.7170445004198153, + "grad_norm": 0.2251120346846295, + "learning_rate": 9.0555767464404e-05, + "loss": 0.5511, "step": 2562 }, { - "epoch": 0.3587123862841148, - "grad_norm": 0.40328972090381965, - "learning_rate": 3.713510620307852e-05, - "loss": 0.5856, + "epoch": 0.7173243772739994, + "grad_norm": 0.209993428669391, + "learning_rate": 9.054674327065781e-05, + "loss": 0.535, "step": 2563 }, { - "epoch": 0.358852344296711, - "grad_norm": 0.431299030659745, - "learning_rate": 3.712519636151272e-05, - "loss": 0.5952, + "epoch": 0.7176042541281836, + "grad_norm": 0.22113928151644965, + "learning_rate": 9.05377152176499e-05, + "loss": 0.5305, "step": 2564 }, { - "epoch": 0.35899230230930723, - "grad_norm": 0.3985200361199881, - "learning_rate": 3.711528402809603e-05, - "loss": 0.5533, + "epoch": 0.7178841309823678, + "grad_norm": 0.22341179413776524, + "learning_rate": 9.052868330623959e-05, + "loss": 0.5304, "step": 2565 }, { - "epoch": 0.35913226032190343, - "grad_norm": 0.4192753590584814, - "learning_rate": 3.710536920486555e-05, - "loss": 0.5667, + "epoch": 0.7181640078365519, + "grad_norm": 0.22135061017833846, + "learning_rate": 9.05196475372865e-05, + "loss": 0.5456, "step": 2566 }, { - "epoch": 0.3592722183344996, - "grad_norm": 0.42240052564081954, - "learning_rate": 3.709545189385887e-05, - "loss": 0.5708, + "epoch": 0.7184438846907361, + "grad_norm": 0.22129377660549585, + "learning_rate": 9.051060791165069e-05, + "loss": 0.5289, "step": 2567 }, { - "epoch": 0.3594121763470959, - "grad_norm": 0.4236767334123938, - "learning_rate": 3.708553209711409e-05, - "loss": 0.5485, + "epoch": 0.7187237615449202, + "grad_norm": 0.22048892915569251, + "learning_rate": 9.050156443019253e-05, + "loss": 0.5438, "step": 2568 }, { - "epoch": 0.3595521343596921, - "grad_norm": 0.40080357675210276, - "learning_rate": 3.707560981666986e-05, - "loss": 0.557, + "epoch": 0.7190036383991044, + "grad_norm": 0.21299945876885298, + "learning_rate": 9.049251709377282e-05, + "loss": 0.5369, "step": 2569 }, { - "epoch": 0.35969209237228833, - "grad_norm": 0.410487921988623, - "learning_rate": 3.706568505456527e-05, - "loss": 0.5547, + "epoch": 0.7192835152532886, + "grad_norm": 0.21964049638600658, + "learning_rate": 9.048346590325264e-05, + "loss": 0.5372, "step": 2570 }, { - "epoch": 0.3598320503848845, - "grad_norm": 0.41368891130760344, - "learning_rate": 3.705575781283999e-05, - "loss": 0.5673, + "epoch": 0.7195633921074727, + "grad_norm": 0.23338691731135155, + "learning_rate": 9.047441085949354e-05, + "loss": 0.5362, "step": 2571 }, { - "epoch": 0.3599720083974808, - "grad_norm": 0.40193894214258574, - "learning_rate": 3.704582809353415e-05, - "loss": 0.5925, + "epoch": 0.7198432689616568, + "grad_norm": 0.22879152170084813, + "learning_rate": 9.046535196335735e-05, + "loss": 0.5599, "step": 2572 }, { - "epoch": 0.360111966410077, - "grad_norm": 0.407102772883237, - "learning_rate": 3.703589589868841e-05, - "loss": 0.5658, + "epoch": 0.720123145815841, + "grad_norm": 0.2402491397964616, + "learning_rate": 9.04562892157063e-05, + "loss": 0.5649, "step": 2573 }, { - "epoch": 0.3602519244226732, - "grad_norm": 0.4100475297715309, - "learning_rate": 3.702596123034395e-05, - "loss": 0.5946, + "epoch": 0.7204030226700252, + "grad_norm": 0.2351157095883497, + "learning_rate": 9.044722261740301e-05, + "loss": 0.5583, "step": 2574 }, { - "epoch": 0.36039188243526943, - "grad_norm": 0.4005884577498317, - "learning_rate": 3.701602409054243e-05, - "loss": 0.5293, + "epoch": 0.7206828995242094, + "grad_norm": 0.22494632283570237, + "learning_rate": 9.043815216931043e-05, + "loss": 0.5319, "step": 2575 }, { - "epoch": 0.3605318404478656, - "grad_norm": 0.37159137413391713, - "learning_rate": 3.700608448132604e-05, - "loss": 0.5973, + "epoch": 0.7209627763783935, + "grad_norm": 0.22353460388964774, + "learning_rate": 9.042907787229189e-05, + "loss": 0.5442, "step": 2576 }, { - "epoch": 0.3606717984604619, - "grad_norm": 0.41775069760426675, - "learning_rate": 3.699614240473748e-05, - "loss": 0.5731, + "epoch": 0.7212426532325776, + "grad_norm": 0.23178700738408642, + "learning_rate": 9.041999972721109e-05, + "loss": 0.5395, "step": 2577 }, { - "epoch": 0.3608117564730581, - "grad_norm": 0.4536333746914714, - "learning_rate": 3.6986197862819934e-05, - "loss": 0.5814, + "epoch": 0.7215225300867618, + "grad_norm": 0.223273726747122, + "learning_rate": 9.04109177349321e-05, + "loss": 0.5477, "step": 2578 }, { - "epoch": 0.36095171448565433, - "grad_norm": 0.415665084054717, - "learning_rate": 3.69762508576171e-05, - "loss": 0.5813, + "epoch": 0.721802406940946, + "grad_norm": 0.2238439973920132, + "learning_rate": 9.040183189631932e-05, + "loss": 0.5485, "step": 2579 }, { - "epoch": 0.3610916724982505, - "grad_norm": 0.4111649076881108, - "learning_rate": 3.6966301391173206e-05, - "loss": 0.5687, + "epoch": 0.7220822837951302, + "grad_norm": 0.23062693859751235, + "learning_rate": 9.039274221223759e-05, + "loss": 0.5439, "step": 2580 }, { - "epoch": 0.3612316305108467, - "grad_norm": 0.45348364261588203, - "learning_rate": 3.695634946553296e-05, - "loss": 0.5789, + "epoch": 0.7223621606493144, + "grad_norm": 0.22042520477106617, + "learning_rate": 9.038364868355204e-05, + "loss": 0.5461, "step": 2581 }, { - "epoch": 0.361371588523443, - "grad_norm": 0.46524048585079214, - "learning_rate": 3.694639508274158e-05, - "loss": 0.605, + "epoch": 0.7226420375034984, + "grad_norm": 0.23343794742484003, + "learning_rate": 9.037455131112819e-05, + "loss": 0.5579, "step": 2582 }, { - "epoch": 0.3615115465360392, - "grad_norm": 0.43254745661664457, - "learning_rate": 3.69364382448448e-05, - "loss": 0.5723, + "epoch": 0.7229219143576826, + "grad_norm": 0.22016660472763663, + "learning_rate": 9.036545009583198e-05, + "loss": 0.5594, "step": 2583 }, { - "epoch": 0.3616515045486354, - "grad_norm": 0.5926681683204299, - "learning_rate": 3.692647895388884e-05, - "loss": 0.5657, + "epoch": 0.7232017912118668, + "grad_norm": 0.22215114217283774, + "learning_rate": 9.035634503852961e-05, + "loss": 0.5556, "step": 2584 }, { - "epoch": 0.3617914625612316, - "grad_norm": 0.3984247442681412, - "learning_rate": 3.691651721192046e-05, - "loss": 0.5375, + "epoch": 0.723481668066051, + "grad_norm": 0.22430675789441773, + "learning_rate": 9.034723614008773e-05, + "loss": 0.5464, "step": 2585 }, { - "epoch": 0.3619314205738279, - "grad_norm": 0.4256089941995705, - "learning_rate": 3.6906553020986876e-05, - "loss": 0.5663, + "epoch": 0.7237615449202351, + "grad_norm": 0.2264205841303816, + "learning_rate": 9.033812340137334e-05, + "loss": 0.5555, "step": 2586 }, { - "epoch": 0.3620713785864241, - "grad_norm": 0.3955060934018287, - "learning_rate": 3.6896586383135835e-05, - "loss": 0.5367, + "epoch": 0.7240414217744192, + "grad_norm": 0.2165219165080543, + "learning_rate": 9.032900682325378e-05, + "loss": 0.5178, "step": 2587 }, { - "epoch": 0.3622113365990203, - "grad_norm": 0.3983798844884697, - "learning_rate": 3.688661730041559e-05, - "loss": 0.5757, + "epoch": 0.7243212986286034, + "grad_norm": 0.21021652285032288, + "learning_rate": 9.031988640659679e-05, + "loss": 0.5117, "step": 2588 }, { - "epoch": 0.3623512946116165, - "grad_norm": 0.40920669971645085, - "learning_rate": 3.6876645774874876e-05, - "loss": 0.5445, + "epoch": 0.7246011754827876, + "grad_norm": 0.23006724448066837, + "learning_rate": 9.031076215227043e-05, + "loss": 0.5438, "step": 2589 }, { - "epoch": 0.3624912526242127, - "grad_norm": 0.43900374382517116, - "learning_rate": 3.6866671808562945e-05, - "loss": 0.5702, + "epoch": 0.7248810523369718, + "grad_norm": 0.2372244939552078, + "learning_rate": 9.030163406114316e-05, + "loss": 0.5438, "step": 2590 }, { - "epoch": 0.362631210636809, - "grad_norm": 0.4009385370546209, - "learning_rate": 3.685669540352957e-05, - "loss": 0.5873, + "epoch": 0.7251609291911559, + "grad_norm": 0.22886659116273975, + "learning_rate": 9.029250213408382e-05, + "loss": 0.5098, "step": 2591 }, { - "epoch": 0.3627711686494052, - "grad_norm": 0.4010104073772178, - "learning_rate": 3.6846716561824965e-05, - "loss": 0.5693, + "epoch": 0.72544080604534, + "grad_norm": 0.23270221849125097, + "learning_rate": 9.028336637196158e-05, + "loss": 0.5314, "step": 2592 }, { - "epoch": 0.3629111266620014, - "grad_norm": 0.3942578614306398, - "learning_rate": 3.6836735285499924e-05, - "loss": 0.5459, + "epoch": 0.7257206828995242, + "grad_norm": 0.22839365533135098, + "learning_rate": 9.027422677564596e-05, + "loss": 0.546, "step": 2593 }, { - "epoch": 0.3630510846745976, - "grad_norm": 0.4052354355699724, - "learning_rate": 3.682675157660567e-05, - "loss": 0.5753, + "epoch": 0.7260005597537084, + "grad_norm": 0.22766488812730518, + "learning_rate": 9.02650833460069e-05, + "loss": 0.5595, "step": 2594 }, { - "epoch": 0.3631910426871938, - "grad_norm": 0.4111971028442228, - "learning_rate": 3.681676543719396e-05, - "loss": 0.6152, + "epoch": 0.7262804366078925, + "grad_norm": 0.21650086692890955, + "learning_rate": 9.025593608391467e-05, + "loss": 0.5303, "step": 2595 }, { - "epoch": 0.3633310006997901, - "grad_norm": 0.38664588990396315, - "learning_rate": 3.680677686931707e-05, - "loss": 0.5354, + "epoch": 0.7265603134620767, + "grad_norm": 0.21363366533674547, + "learning_rate": 9.024678499023991e-05, + "loss": 0.5426, "step": 2596 }, { - "epoch": 0.36347095871238627, - "grad_norm": 0.4207284242435013, - "learning_rate": 3.679678587502773e-05, - "loss": 0.576, + "epoch": 0.7268401903162608, + "grad_norm": 0.22551173213075484, + "learning_rate": 9.023763006585364e-05, + "loss": 0.5421, "step": 2597 }, { - "epoch": 0.3636109167249825, - "grad_norm": 0.40595710076845204, - "learning_rate": 3.67867924563792e-05, - "loss": 0.5885, + "epoch": 0.727120067170445, + "grad_norm": 0.22686860980709742, + "learning_rate": 9.022847131162721e-05, + "loss": 0.5607, "step": 2598 }, { - "epoch": 0.3637508747375787, - "grad_norm": 0.41445075167878537, - "learning_rate": 3.6776796615425224e-05, - "loss": 0.5732, + "epoch": 0.7273999440246292, + "grad_norm": 0.22405581529218108, + "learning_rate": 9.021930872843236e-05, + "loss": 0.5463, "step": 2599 }, { - "epoch": 0.363890832750175, - "grad_norm": 0.38888704179883676, - "learning_rate": 3.6766798354220054e-05, - "loss": 0.5658, + "epoch": 0.7276798208788133, + "grad_norm": 0.22396964443628806, + "learning_rate": 9.02101423171412e-05, + "loss": 0.5237, "step": 2600 }, { - "epoch": 0.3640307907627712, - "grad_norm": 0.40093975960461736, - "learning_rate": 3.675679767481842e-05, - "loss": 0.5597, + "epoch": 0.7279596977329975, + "grad_norm": 0.21505223093740625, + "learning_rate": 9.020097207862617e-05, + "loss": 0.5264, "step": 2601 }, { - "epoch": 0.36417074877536737, - "grad_norm": 0.4321442568384402, - "learning_rate": 3.674679457927559e-05, - "loss": 0.5451, + "epoch": 0.7282395745871817, + "grad_norm": 0.21557819647463233, + "learning_rate": 9.019179801376013e-05, + "loss": 0.5175, "step": 2602 }, { - "epoch": 0.3643107067879636, - "grad_norm": 0.41193216461597326, - "learning_rate": 3.673678906964727e-05, - "loss": 0.5562, + "epoch": 0.7285194514413658, + "grad_norm": 0.21356209983230737, + "learning_rate": 9.018262012341627e-05, + "loss": 0.531, "step": 2603 }, { - "epoch": 0.3644506648005598, - "grad_norm": 0.3982633594790959, - "learning_rate": 3.672678114798972e-05, - "loss": 0.5463, + "epoch": 0.7287993282955499, + "grad_norm": 0.221118666679804, + "learning_rate": 9.017343840846812e-05, + "loss": 0.5431, "step": 2604 }, { - "epoch": 0.3645906228131561, - "grad_norm": 0.42438474880866467, - "learning_rate": 3.671677081635966e-05, - "loss": 0.5809, + "epoch": 0.7290792051497341, + "grad_norm": 0.21379233115425508, + "learning_rate": 9.016425286978961e-05, + "loss": 0.5434, "step": 2605 }, { - "epoch": 0.36473058082575227, - "grad_norm": 0.4217628754836848, - "learning_rate": 3.67067580768143e-05, - "loss": 0.553, + "epoch": 0.7293590820039183, + "grad_norm": 0.21863786166249938, + "learning_rate": 9.015506350825504e-05, + "loss": 0.533, "step": 2606 }, { - "epoch": 0.36487053883834847, - "grad_norm": 0.42309673305357437, - "learning_rate": 3.669674293141139e-05, - "loss": 0.6209, + "epoch": 0.7296389588581025, + "grad_norm": 0.2121735124197766, + "learning_rate": 9.014587032473906e-05, + "loss": 0.5264, "step": 2607 }, { - "epoch": 0.3650104968509447, - "grad_norm": 0.3920738753776142, - "learning_rate": 3.6686725382209114e-05, - "loss": 0.5617, + "epoch": 0.7299188357122866, + "grad_norm": 0.22337384706295352, + "learning_rate": 9.013667332011665e-05, + "loss": 0.5407, "step": 2608 }, { - "epoch": 0.3651504548635409, - "grad_norm": 0.39484584407664886, - "learning_rate": 3.66767054312662e-05, - "loss": 0.5167, + "epoch": 0.7301987125664707, + "grad_norm": 0.22611911023687936, + "learning_rate": 9.012747249526324e-05, + "loss": 0.548, "step": 2609 }, { - "epoch": 0.36529041287613717, - "grad_norm": 0.4255886790117686, - "learning_rate": 3.6666683080641846e-05, - "loss": 0.6193, + "epoch": 0.7304785894206549, + "grad_norm": 0.2208382972947634, + "learning_rate": 9.011826785105451e-05, + "loss": 0.5284, "step": 2610 }, { - "epoch": 0.36543037088873337, - "grad_norm": 0.40579442495363904, - "learning_rate": 3.665665833239574e-05, - "loss": 0.5342, + "epoch": 0.7307584662748391, + "grad_norm": 0.22919286676851466, + "learning_rate": 9.010905938836661e-05, + "loss": 0.5226, "step": 2611 }, { - "epoch": 0.3655703289013296, - "grad_norm": 0.4233421685048626, - "learning_rate": 3.664663118858808e-05, - "loss": 0.5465, + "epoch": 0.7310383431290233, + "grad_norm": 0.22167131435437362, + "learning_rate": 9.009984710807595e-05, + "loss": 0.5017, "step": 2612 }, { - "epoch": 0.3657102869139258, - "grad_norm": 0.4174294680767461, - "learning_rate": 3.6636601651279524e-05, - "loss": 0.5849, + "epoch": 0.7313182199832073, + "grad_norm": 0.2212876818656353, + "learning_rate": 9.009063101105943e-05, + "loss": 0.5542, "step": 2613 }, { - "epoch": 0.365850244926522, - "grad_norm": 0.44476352179830164, - "learning_rate": 3.662656972253127e-05, - "loss": 0.5717, + "epoch": 0.7315980968373915, + "grad_norm": 0.22541444570079514, + "learning_rate": 9.00814110981942e-05, + "loss": 0.5411, "step": 2614 }, { - "epoch": 0.36599020293911827, - "grad_norm": 0.3948209367210007, - "learning_rate": 3.6616535404404964e-05, - "loss": 0.5529, + "epoch": 0.7318779736915757, + "grad_norm": 0.2129620860116927, + "learning_rate": 9.007218737035782e-05, + "loss": 0.5332, "step": 2615 }, { - "epoch": 0.36613016095171447, - "grad_norm": 0.40738682961697953, - "learning_rate": 3.660649869896276e-05, - "loss": 0.5268, + "epoch": 0.7321578505457599, + "grad_norm": 0.2216253089851693, + "learning_rate": 9.006295982842825e-05, + "loss": 0.5379, "step": 2616 }, { - "epoch": 0.3662701189643107, - "grad_norm": 0.40020906641099585, - "learning_rate": 3.659645960826732e-05, - "loss": 0.5567, + "epoch": 0.7324377273999441, + "grad_norm": 0.2109998430273148, + "learning_rate": 9.00537284732837e-05, + "loss": 0.5285, "step": 2617 }, { - "epoch": 0.3664100769769069, - "grad_norm": 0.4237523890631533, - "learning_rate": 3.658641813438176e-05, - "loss": 0.573, + "epoch": 0.7327176042541281, + "grad_norm": 0.2249144197432733, + "learning_rate": 9.004449330580288e-05, + "loss": 0.5397, "step": 2618 }, { - "epoch": 0.36655003498950317, - "grad_norm": 0.4325798007082038, - "learning_rate": 3.657637427936972e-05, - "loss": 0.5791, + "epoch": 0.7329974811083123, + "grad_norm": 0.21344031391081858, + "learning_rate": 9.003525432686477e-05, + "loss": 0.5551, "step": 2619 }, { - "epoch": 0.36668999300209937, - "grad_norm": 0.43907042323555606, - "learning_rate": 3.65663280452953e-05, - "loss": 0.5786, + "epoch": 0.7332773579624965, + "grad_norm": 0.2289174917729721, + "learning_rate": 9.002601153734874e-05, + "loss": 0.5628, "step": 2620 }, { - "epoch": 0.36682995101469557, - "grad_norm": 0.4323098724184291, - "learning_rate": 3.6556279434223116e-05, - "loss": 0.6016, + "epoch": 0.7335572348166807, + "grad_norm": 0.20781833809778474, + "learning_rate": 9.001676493813452e-05, + "loss": 0.5309, "step": 2621 }, { - "epoch": 0.3669699090272918, - "grad_norm": 0.4265584212865507, - "learning_rate": 3.654622844821825e-05, - "loss": 0.623, + "epoch": 0.7338371116708649, + "grad_norm": 0.2160990710039784, + "learning_rate": 9.000751453010223e-05, + "loss": 0.5297, "step": 2622 }, { - "epoch": 0.367109867039888, - "grad_norm": 0.4179019694136672, - "learning_rate": 3.6536175089346285e-05, - "loss": 0.5468, + "epoch": 0.734116988525049, + "grad_norm": 0.22871188542141155, + "learning_rate": 8.99982603141323e-05, + "loss": 0.586, "step": 2623 }, { - "epoch": 0.36724982505248427, - "grad_norm": 0.41889421083309525, - "learning_rate": 3.6526119359673284e-05, - "loss": 0.5574, + "epoch": 0.7343968653792331, + "grad_norm": 0.21264166113703792, + "learning_rate": 8.998900229110557e-05, + "loss": 0.5459, "step": 2624 }, { - "epoch": 0.36738978306508047, - "grad_norm": 0.41710292728469345, - "learning_rate": 3.651606126126581e-05, - "loss": 0.5781, + "epoch": 0.7346767422334173, + "grad_norm": 0.2244892589900409, + "learning_rate": 8.997974046190323e-05, + "loss": 0.5345, "step": 2625 }, { - "epoch": 0.3675297410776767, - "grad_norm": 0.3897312626800565, - "learning_rate": 3.65060007961909e-05, - "loss": 0.5193, + "epoch": 0.7349566190876015, + "grad_norm": 0.2252025077322312, + "learning_rate": 8.99704748274068e-05, + "loss": 0.542, "step": 2626 }, { - "epoch": 0.3676696990902729, - "grad_norm": 0.42386091892910743, - "learning_rate": 3.649593796651608e-05, - "loss": 0.5917, + "epoch": 0.7352364959417856, + "grad_norm": 0.21434324820570827, + "learning_rate": 8.996120538849822e-05, + "loss": 0.5178, "step": 2627 }, { - "epoch": 0.3678096571028691, - "grad_norm": 0.414524194950392, - "learning_rate": 3.648587277430936e-05, - "loss": 0.5758, + "epoch": 0.7355163727959698, + "grad_norm": 0.22143531292288385, + "learning_rate": 8.995193214605973e-05, + "loss": 0.5215, "step": 2628 }, { - "epoch": 0.36794961511546537, - "grad_norm": 0.4038428167881955, - "learning_rate": 3.647580522163925e-05, - "loss": 0.5621, + "epoch": 0.7357962496501539, + "grad_norm": 0.21683386039801417, + "learning_rate": 8.994265510097397e-05, + "loss": 0.5642, "step": 2629 }, { - "epoch": 0.36808957312806156, - "grad_norm": 0.4006767303991816, - "learning_rate": 3.646573531057473e-05, - "loss": 0.5586, + "epoch": 0.7360761265043381, + "grad_norm": 0.22279042121537643, + "learning_rate": 8.993337425412396e-05, + "loss": 0.5344, "step": 2630 }, { - "epoch": 0.3682295311406578, - "grad_norm": 0.40655169612049585, - "learning_rate": 3.645566304318526e-05, - "loss": 0.6016, + "epoch": 0.7363560033585222, + "grad_norm": 0.2242316556716447, + "learning_rate": 8.992408960639303e-05, + "loss": 0.5341, "step": 2631 }, { - "epoch": 0.368369489153254, - "grad_norm": 0.434790153457983, - "learning_rate": 3.644558842154081e-05, - "loss": 0.5481, + "epoch": 0.7366358802127064, + "grad_norm": 0.2111941057458277, + "learning_rate": 8.991480115866489e-05, + "loss": 0.5463, "step": 2632 }, { - "epoch": 0.36850944716585027, - "grad_norm": 0.4099645764830532, - "learning_rate": 3.6435511447711804e-05, - "loss": 0.5738, + "epoch": 0.7369157570668906, + "grad_norm": 0.22429499663794777, + "learning_rate": 8.990550891182365e-05, + "loss": 0.5402, "step": 2633 }, { - "epoch": 0.36864940517844647, - "grad_norm": 0.41274450825271575, - "learning_rate": 3.642543212376916e-05, - "loss": 0.5991, + "epoch": 0.7371956339210747, + "grad_norm": 0.22019295218467896, + "learning_rate": 8.989621286675373e-05, + "loss": 0.549, "step": 2634 }, { - "epoch": 0.36878936319104266, - "grad_norm": 0.4032135476124307, - "learning_rate": 3.6415350451784294e-05, - "loss": 0.5499, + "epoch": 0.7374755107752589, + "grad_norm": 0.227911797879774, + "learning_rate": 8.988691302433993e-05, + "loss": 0.5627, "step": 2635 }, { - "epoch": 0.3689293212036389, - "grad_norm": 0.4188775277615029, - "learning_rate": 3.6405266433829075e-05, - "loss": 0.5654, + "epoch": 0.737755387629443, + "grad_norm": 0.21941965086394474, + "learning_rate": 8.987760938546744e-05, + "loss": 0.5588, "step": 2636 }, { - "epoch": 0.3690692792162351, - "grad_norm": 0.5178966570845596, - "learning_rate": 3.6395180071975885e-05, - "loss": 0.5396, + "epoch": 0.7380352644836272, + "grad_norm": 0.2179316528240126, + "learning_rate": 8.986830195102176e-05, + "loss": 0.5446, "step": 2637 }, { - "epoch": 0.36920923722883137, - "grad_norm": 0.40182828296097955, - "learning_rate": 3.638509136829758e-05, - "loss": 0.5652, + "epoch": 0.7383151413378114, + "grad_norm": 0.2145935169967857, + "learning_rate": 8.98589907218888e-05, + "loss": 0.5333, "step": 2638 }, { - "epoch": 0.36934919524142756, - "grad_norm": 0.390078684434463, - "learning_rate": 3.637500032486747e-05, - "loss": 0.557, + "epoch": 0.7385950181919955, + "grad_norm": 0.2257690582727668, + "learning_rate": 8.984967569895477e-05, + "loss": 0.5314, "step": 2639 }, { - "epoch": 0.3694891532540238, - "grad_norm": 0.3931686164476163, - "learning_rate": 3.636490694375938e-05, - "loss": 0.5841, + "epoch": 0.7388748950461796, + "grad_norm": 0.2303994125554735, + "learning_rate": 8.98403568831063e-05, + "loss": 0.5333, "step": 2640 }, { - "epoch": 0.36962911126662, - "grad_norm": 0.39382269721567603, - "learning_rate": 3.63548112270476e-05, - "loss": 0.5575, + "epoch": 0.7391547719003638, + "grad_norm": 0.22440134450016616, + "learning_rate": 8.983103427523039e-05, + "loss": 0.5599, "step": 2641 }, { - "epoch": 0.3697690692792162, - "grad_norm": 0.42173997864130025, - "learning_rate": 3.63447131768069e-05, - "loss": 0.6018, + "epoch": 0.739434648754548, + "grad_norm": 0.2203078419742426, + "learning_rate": 8.982170787621432e-05, + "loss": 0.5592, "step": 2642 }, { - "epoch": 0.36990902729181246, - "grad_norm": 0.43303379767097616, - "learning_rate": 3.6334612795112534e-05, - "loss": 0.5894, + "epoch": 0.7397145256087322, + "grad_norm": 0.21943756803723738, + "learning_rate": 8.981237768694582e-05, + "loss": 0.5209, "step": 2643 }, { - "epoch": 0.37004898530440866, - "grad_norm": 0.40551708557859245, - "learning_rate": 3.632451008404024e-05, - "loss": 0.5822, + "epoch": 0.7399944024629164, + "grad_norm": 0.2251406932636578, + "learning_rate": 8.980304370831292e-05, + "loss": 0.5369, "step": 2644 }, { - "epoch": 0.3701889433170049, - "grad_norm": 0.4199307056582574, - "learning_rate": 3.631440504566621e-05, - "loss": 0.5359, + "epoch": 0.7402742793171004, + "grad_norm": 0.24029662759994772, + "learning_rate": 8.979370594120402e-05, + "loss": 0.5635, "step": 2645 }, { - "epoch": 0.3703289013296011, - "grad_norm": 0.3856025757451338, - "learning_rate": 3.6304297682067144e-05, - "loss": 0.5279, + "epoch": 0.7405541561712846, + "grad_norm": 0.23198592666987902, + "learning_rate": 8.978436438650795e-05, + "loss": 0.5518, "step": 2646 }, { - "epoch": 0.37046885934219737, - "grad_norm": 0.39790868545989994, - "learning_rate": 3.6294187995320214e-05, - "loss": 0.5685, + "epoch": 0.7408340330254688, + "grad_norm": 0.2265286213827318, + "learning_rate": 8.97750190451138e-05, + "loss": 0.5569, "step": 2647 }, { - "epoch": 0.37060881735479356, - "grad_norm": 0.41011662651634145, - "learning_rate": 3.628407598750305e-05, - "loss": 0.5732, + "epoch": 0.741113909879653, + "grad_norm": 0.21371175891317676, + "learning_rate": 8.976566991791107e-05, + "loss": 0.5119, "step": 2648 }, { - "epoch": 0.37074877536738976, - "grad_norm": 0.41740394404930137, - "learning_rate": 3.627396166069377e-05, - "loss": 0.5922, + "epoch": 0.7413937867338372, + "grad_norm": 0.2135139892642674, + "learning_rate": 8.975631700578962e-05, + "loss": 0.5331, "step": 2649 }, { - "epoch": 0.370888733379986, - "grad_norm": 0.41661946277247025, - "learning_rate": 3.626384501697099e-05, - "loss": 0.5501, + "epoch": 0.7416736635880212, + "grad_norm": 0.21976528912048277, + "learning_rate": 8.974696030963965e-05, + "loss": 0.517, "step": 2650 }, { - "epoch": 0.3710286913925822, - "grad_norm": 0.42202366075884673, - "learning_rate": 3.625372605841376e-05, - "loss": 0.5659, + "epoch": 0.7419535404422054, + "grad_norm": 0.2223069175926191, + "learning_rate": 8.973759983035177e-05, + "loss": 0.5392, "step": 2651 }, { - "epoch": 0.37116864940517846, - "grad_norm": 0.39038696179315746, - "learning_rate": 3.624360478710165e-05, - "loss": 0.5729, + "epoch": 0.7422334172963896, + "grad_norm": 0.22494355617310022, + "learning_rate": 8.972823556881689e-05, + "loss": 0.5556, "step": 2652 }, { - "epoch": 0.37130860741777466, - "grad_norm": 0.41764113003828224, - "learning_rate": 3.623348120511466e-05, - "loss": 0.5663, + "epoch": 0.7425132941505738, + "grad_norm": 0.21628539978386174, + "learning_rate": 8.971886752592631e-05, + "loss": 0.5233, "step": 2653 }, { - "epoch": 0.3714485654303709, - "grad_norm": 0.39913226890826564, - "learning_rate": 3.622335531453331e-05, - "loss": 0.5645, + "epoch": 0.742793171004758, + "grad_norm": 0.21273840238289846, + "learning_rate": 8.970949570257169e-05, + "loss": 0.5502, "step": 2654 }, { - "epoch": 0.3715885234429671, - "grad_norm": 0.3939385454277571, - "learning_rate": 3.621322711743858e-05, - "loss": 0.5631, + "epoch": 0.743073047858942, + "grad_norm": 0.22284434464455785, + "learning_rate": 8.970012009964503e-05, + "loss": 0.521, "step": 2655 }, { - "epoch": 0.3717284814555633, - "grad_norm": 0.41308594557194117, - "learning_rate": 3.6203096615911884e-05, - "loss": 0.6052, + "epoch": 0.7433529247131262, + "grad_norm": 0.22036668898365644, + "learning_rate": 8.969074071803875e-05, + "loss": 0.5516, "step": 2656 }, { - "epoch": 0.37186843946815956, - "grad_norm": 0.4298728275791088, - "learning_rate": 3.619296381203517e-05, - "loss": 0.5396, + "epoch": 0.7436328015673104, + "grad_norm": 0.2237368242432521, + "learning_rate": 8.968135755864553e-05, + "loss": 0.5124, "step": 2657 }, { - "epoch": 0.37200839748075576, - "grad_norm": 0.4544709711994261, - "learning_rate": 3.6182828707890816e-05, - "loss": 0.5973, + "epoch": 0.7439126784214946, + "grad_norm": 0.24465980933910203, + "learning_rate": 8.967197062235848e-05, + "loss": 0.5416, "step": 2658 }, { - "epoch": 0.372148355493352, - "grad_norm": 0.417274676645833, - "learning_rate": 3.61726913055617e-05, - "loss": 0.58, + "epoch": 0.7441925552756787, + "grad_norm": 0.2103241232334168, + "learning_rate": 8.966257991007108e-05, + "loss": 0.542, "step": 2659 }, { - "epoch": 0.3722883135059482, - "grad_norm": 0.4027341706183872, - "learning_rate": 3.6162551607131164e-05, - "loss": 0.5793, + "epoch": 0.7444724321298629, + "grad_norm": 0.2309919340598145, + "learning_rate": 8.965318542267711e-05, + "loss": 0.5556, "step": 2660 }, { - "epoch": 0.37242827151854446, - "grad_norm": 0.41668849630230104, - "learning_rate": 3.615240961468301e-05, - "loss": 0.5573, + "epoch": 0.744752308984047, + "grad_norm": 0.22292085304964945, + "learning_rate": 8.964378716107076e-05, + "loss": 0.5627, "step": 2661 }, { - "epoch": 0.37256822953114066, - "grad_norm": 0.40880781367892177, - "learning_rate": 3.614226533030152e-05, - "loss": 0.5821, + "epoch": 0.7450321858382312, + "grad_norm": 0.22509967643412987, + "learning_rate": 8.963438512614655e-05, + "loss": 0.5494, "step": 2662 }, { - "epoch": 0.37270818754373686, - "grad_norm": 0.4154234374761176, - "learning_rate": 3.6132118756071456e-05, - "loss": 0.5932, + "epoch": 0.7453120626924153, + "grad_norm": 0.2167942969204872, + "learning_rate": 8.96249793187994e-05, + "loss": 0.5438, "step": 2663 }, { - "epoch": 0.3728481455563331, - "grad_norm": 0.3839166628649522, - "learning_rate": 3.6121969894078024e-05, - "loss": 0.5498, + "epoch": 0.7455919395465995, + "grad_norm": 0.2240355642128715, + "learning_rate": 8.961556973992452e-05, + "loss": 0.5702, "step": 2664 }, { - "epoch": 0.3729881035689293, - "grad_norm": 0.42314063405669433, - "learning_rate": 3.611181874640694e-05, - "loss": 0.571, + "epoch": 0.7458718164007837, + "grad_norm": 0.22188055447651847, + "learning_rate": 8.960615639041755e-05, + "loss": 0.5208, "step": 2665 }, { - "epoch": 0.37312806158152556, - "grad_norm": 0.3944581050181605, - "learning_rate": 3.610166531514436e-05, - "loss": 0.5382, + "epoch": 0.7461516932549678, + "grad_norm": 0.22901006620538905, + "learning_rate": 8.959673927117444e-05, + "loss": 0.5806, "step": 2666 }, { - "epoch": 0.37326801959412176, - "grad_norm": 0.39463547519248415, - "learning_rate": 3.60915096023769e-05, - "loss": 0.5544, + "epoch": 0.746431570109152, + "grad_norm": 0.21738752651697146, + "learning_rate": 8.958731838309153e-05, + "loss": 0.5207, "step": 2667 }, { - "epoch": 0.373407977606718, - "grad_norm": 0.41268049741997787, - "learning_rate": 3.6081351610191696e-05, - "loss": 0.5427, + "epoch": 0.7467114469633361, + "grad_norm": 0.2181564292843853, + "learning_rate": 8.957789372706548e-05, + "loss": 0.5306, "step": 2668 }, { - "epoch": 0.3735479356193142, - "grad_norm": 0.4095949213776676, - "learning_rate": 3.607119134067629e-05, - "loss": 0.5564, + "epoch": 0.7469913238175203, + "grad_norm": 0.22843602708917043, + "learning_rate": 8.956846530399338e-05, + "loss": 0.5407, "step": 2669 }, { - "epoch": 0.3736878936319104, - "grad_norm": 0.39819288148024584, - "learning_rate": 3.606102879591874e-05, - "loss": 0.5584, + "epoch": 0.7472712006717045, + "grad_norm": 0.2139752967512492, + "learning_rate": 8.955903311477259e-05, + "loss": 0.533, "step": 2670 }, { - "epoch": 0.37382785164450666, - "grad_norm": 0.4133173629042419, - "learning_rate": 3.605086397800753e-05, - "loss": 0.5563, + "epoch": 0.7475510775258886, + "grad_norm": 0.23982720290324436, + "learning_rate": 8.95495971603009e-05, + "loss": 0.55, "step": 2671 }, { - "epoch": 0.37396780965710286, - "grad_norm": 0.39980433778091934, - "learning_rate": 3.604069688903165e-05, - "loss": 0.5536, + "epoch": 0.7478309543800727, + "grad_norm": 0.21307316566105344, + "learning_rate": 8.954015744147639e-05, + "loss": 0.5208, "step": 2672 }, { - "epoch": 0.3741077676696991, - "grad_norm": 0.4143096692418758, - "learning_rate": 3.603052753108053e-05, - "loss": 0.6135, + "epoch": 0.7481108312342569, + "grad_norm": 0.21710414532432137, + "learning_rate": 8.953071395919757e-05, + "loss": 0.5229, "step": 2673 }, { - "epoch": 0.3742477256822953, - "grad_norm": 0.4220107842655034, - "learning_rate": 3.602035590624409e-05, - "loss": 0.5875, + "epoch": 0.7483907080884411, + "grad_norm": 0.22802418489130677, + "learning_rate": 8.952126671436327e-05, + "loss": 0.5352, "step": 2674 }, { - "epoch": 0.37438768369489156, - "grad_norm": 0.41274829656377915, - "learning_rate": 3.6010182016612695e-05, - "loss": 0.553, + "epoch": 0.7486705849426253, + "grad_norm": 0.22741647869103276, + "learning_rate": 8.95118157078727e-05, + "loss": 0.5348, "step": 2675 }, { - "epoch": 0.37452764170748776, - "grad_norm": 0.4077340731853976, - "learning_rate": 3.600000586427718e-05, - "loss": 0.5626, + "epoch": 0.7489504617968094, + "grad_norm": 0.23040467034208675, + "learning_rate": 8.950236094062537e-05, + "loss": 0.518, "step": 2676 }, { - "epoch": 0.37466759972008395, - "grad_norm": 0.432520643568383, - "learning_rate": 3.598982745132885e-05, - "loss": 0.5764, + "epoch": 0.7492303386509935, + "grad_norm": 0.2241049617264922, + "learning_rate": 8.949290241352124e-05, + "loss": 0.524, "step": 2677 }, { - "epoch": 0.3748075577326802, - "grad_norm": 0.3972351037179022, - "learning_rate": 3.597964677985946e-05, - "loss": 0.5439, + "epoch": 0.7495102155051777, + "grad_norm": 0.22660931524184508, + "learning_rate": 8.948344012746053e-05, + "loss": 0.5555, "step": 2678 }, { - "epoch": 0.3749475157452764, - "grad_norm": 0.421002231272715, - "learning_rate": 3.596946385196126e-05, - "loss": 0.5546, + "epoch": 0.7497900923593619, + "grad_norm": 0.21537979934095933, + "learning_rate": 8.947397408334391e-05, + "loss": 0.5045, "step": 2679 }, { - "epoch": 0.37508747375787266, - "grad_norm": 0.41810855014069603, - "learning_rate": 3.5959278669726935e-05, - "loss": 0.5862, + "epoch": 0.7500699692135461, + "grad_norm": 0.21305412259855636, + "learning_rate": 8.946450428207233e-05, + "loss": 0.5181, "step": 2680 }, { - "epoch": 0.37522743177046886, - "grad_norm": 0.3799508514194255, - "learning_rate": 3.594909123524965e-05, - "loss": 0.5434, + "epoch": 0.7503498460677303, + "grad_norm": 0.22846564876789416, + "learning_rate": 8.945503072454714e-05, + "loss": 0.5426, "step": 2681 }, { - "epoch": 0.37536738978306505, - "grad_norm": 0.4035591025821781, - "learning_rate": 3.593890155062302e-05, - "loss": 0.5798, + "epoch": 0.7506297229219143, + "grad_norm": 0.21719840487731035, + "learning_rate": 8.944555341167004e-05, + "loss": 0.5142, "step": 2682 }, { - "epoch": 0.3755073477956613, - "grad_norm": 0.4114244071207664, - "learning_rate": 3.592870961794113e-05, - "loss": 0.6205, + "epoch": 0.7509095997760985, + "grad_norm": 0.20698249269282035, + "learning_rate": 8.94360723443431e-05, + "loss": 0.5607, "step": 2683 }, { - "epoch": 0.3756473058082575, - "grad_norm": 0.4076569672449696, - "learning_rate": 3.5918515439298526e-05, - "loss": 0.5798, + "epoch": 0.7511894766302827, + "grad_norm": 0.22016718001568167, + "learning_rate": 8.942658752346871e-05, + "loss": 0.5707, "step": 2684 }, { - "epoch": 0.37578726382085376, - "grad_norm": 0.4051977237739006, - "learning_rate": 3.5908319016790214e-05, - "loss": 0.5707, + "epoch": 0.7514693534844669, + "grad_norm": 0.21775658131362371, + "learning_rate": 8.941709894994966e-05, + "loss": 0.536, "step": 2685 }, { - "epoch": 0.37592722183344995, - "grad_norm": 0.41477449458013194, - "learning_rate": 3.589812035251167e-05, - "loss": 0.58, + "epoch": 0.751749230338651, + "grad_norm": 0.22609157050532247, + "learning_rate": 8.940760662468907e-05, + "loss": 0.5763, "step": 2686 }, { - "epoch": 0.3760671798460462, - "grad_norm": 0.4265813533603491, - "learning_rate": 3.588791944855881e-05, - "loss": 0.5891, + "epoch": 0.7520291071928351, + "grad_norm": 0.22980289403211504, + "learning_rate": 8.939811054859042e-05, + "loss": 0.5479, "step": 2687 }, { - "epoch": 0.3762071378586424, - "grad_norm": 0.4009173028671792, - "learning_rate": 3.587771630702803e-05, - "loss": 0.5559, + "epoch": 0.7523089840470193, + "grad_norm": 0.22606307563622416, + "learning_rate": 8.938861072255755e-05, + "loss": 0.5421, "step": 2688 }, { - "epoch": 0.3763470958712386, - "grad_norm": 0.41516714213673056, - "learning_rate": 3.586751093001618e-05, - "loss": 0.5518, + "epoch": 0.7525888609012035, + "grad_norm": 0.21508725728349393, + "learning_rate": 8.937910714749468e-05, + "loss": 0.5133, "step": 2689 }, { - "epoch": 0.37648705388383485, - "grad_norm": 0.4246576996594534, - "learning_rate": 3.5857303319620566e-05, - "loss": 0.5804, + "epoch": 0.7528687377553877, + "grad_norm": 0.22178141643697674, + "learning_rate": 8.936959982430634e-05, + "loss": 0.5314, "step": 2690 }, { - "epoch": 0.37662701189643105, - "grad_norm": 0.3882381660222785, - "learning_rate": 3.5847093477938956e-05, - "loss": 0.5588, + "epoch": 0.7531486146095718, + "grad_norm": 0.22299246748814808, + "learning_rate": 8.936008875389746e-05, + "loss": 0.5476, "step": 2691 }, { - "epoch": 0.3767669699090273, - "grad_norm": 0.41694303450281095, - "learning_rate": 3.583688140706958e-05, - "loss": 0.5694, + "epoch": 0.7534284914637559, + "grad_norm": 0.20794294107003272, + "learning_rate": 8.935057393717329e-05, + "loss": 0.5537, "step": 2692 }, { - "epoch": 0.3769069279216235, - "grad_norm": 0.39378160604472995, - "learning_rate": 3.5826667109111115e-05, - "loss": 0.5595, + "epoch": 0.7537083683179401, + "grad_norm": 0.21841321736125574, + "learning_rate": 8.934105537503947e-05, + "loss": 0.5323, "step": 2693 }, { - "epoch": 0.37704688593421976, - "grad_norm": 0.38592197692564867, - "learning_rate": 3.581645058616271e-05, - "loss": 0.5418, + "epoch": 0.7539882451721243, + "grad_norm": 0.23010265456635823, + "learning_rate": 8.933153306840199e-05, + "loss": 0.557, "step": 2694 }, { - "epoch": 0.37718684394681595, - "grad_norm": 0.3930395842122637, - "learning_rate": 3.580623184032396e-05, - "loss": 0.5424, + "epoch": 0.7542681220263084, + "grad_norm": 0.21571026554168998, + "learning_rate": 8.932200701816716e-05, + "loss": 0.5475, "step": 2695 }, { - "epoch": 0.37732680195941215, - "grad_norm": 0.4176452454515421, - "learning_rate": 3.579601087369492e-05, - "loss": 0.5433, + "epoch": 0.7545479988804926, + "grad_norm": 0.20343509482874472, + "learning_rate": 8.931247722524169e-05, + "loss": 0.5128, "step": 2696 }, { - "epoch": 0.3774667599720084, - "grad_norm": 0.4189236803610346, - "learning_rate": 3.5785787688376104e-05, - "loss": 0.5822, + "epoch": 0.7548278757346767, + "grad_norm": 0.21885553947566588, + "learning_rate": 8.930294369053265e-05, + "loss": 0.5391, "step": 2697 }, { - "epoch": 0.3776067179846046, - "grad_norm": 0.38962623396875146, - "learning_rate": 3.577556228646849e-05, - "loss": 0.5357, + "epoch": 0.7551077525888609, + "grad_norm": 0.21544555079315492, + "learning_rate": 8.929340641494743e-05, + "loss": 0.5403, "step": 2698 }, { - "epoch": 0.37774667599720085, - "grad_norm": 0.4020721519778404, - "learning_rate": 3.576533467007349e-05, - "loss": 0.5658, + "epoch": 0.755387629443045, + "grad_norm": 0.21953091916737594, + "learning_rate": 8.92838653993938e-05, + "loss": 0.5456, "step": 2699 }, { - "epoch": 0.37788663400979705, - "grad_norm": 0.43467388992700556, - "learning_rate": 3.5755104841292974e-05, - "loss": 0.6269, + "epoch": 0.7556675062972292, + "grad_norm": 0.20638901705605822, + "learning_rate": 8.927432064477985e-05, + "loss": 0.5377, "step": 2700 }, { - "epoch": 0.3780265920223933, - "grad_norm": 0.41515218431176615, - "learning_rate": 3.5744872802229296e-05, - "loss": 0.5777, + "epoch": 0.7559473831514134, + "grad_norm": 0.22355038181969095, + "learning_rate": 8.92647721520141e-05, + "loss": 0.5483, "step": 2701 }, { - "epoch": 0.3781665500349895, - "grad_norm": 0.4299136904928923, - "learning_rate": 3.5734638554985236e-05, - "loss": 0.6001, + "epoch": 0.7562272600055976, + "grad_norm": 0.22500535830489837, + "learning_rate": 8.925521992200536e-05, + "loss": 0.5274, "step": 2702 }, { - "epoch": 0.3783065080475857, - "grad_norm": 0.4051757493692295, - "learning_rate": 3.5724402101664023e-05, - "loss": 0.5337, + "epoch": 0.7565071368597817, + "grad_norm": 0.2253729760858358, + "learning_rate": 8.924566395566279e-05, + "loss": 0.5396, "step": 2703 }, { - "epoch": 0.37844646606018195, - "grad_norm": 0.41621036812349377, - "learning_rate": 3.571416344436938e-05, - "loss": 0.5754, + "epoch": 0.7567870137139658, + "grad_norm": 0.2122121189260114, + "learning_rate": 8.923610425389599e-05, + "loss": 0.5469, "step": 2704 }, { - "epoch": 0.37858642407277815, - "grad_norm": 0.4041482546847394, - "learning_rate": 3.5703922585205416e-05, - "loss": 0.5556, + "epoch": 0.75706689056815, + "grad_norm": 0.22644094592631228, + "learning_rate": 8.92265408176148e-05, + "loss": 0.5317, "step": 2705 }, { - "epoch": 0.3787263820853744, - "grad_norm": 0.4120614594717955, - "learning_rate": 3.569367952627677e-05, - "loss": 0.5881, + "epoch": 0.7573467674223342, + "grad_norm": 0.22333687597978505, + "learning_rate": 8.92169736477295e-05, + "loss": 0.5554, "step": 2706 }, { - "epoch": 0.3788663400979706, - "grad_norm": 0.4225642230493687, - "learning_rate": 3.5683434269688485e-05, - "loss": 0.5639, + "epoch": 0.7576266442765184, + "grad_norm": 0.2080552531340251, + "learning_rate": 8.920740274515072e-05, + "loss": 0.5019, "step": 2707 }, { - "epoch": 0.37900629811056685, - "grad_norm": 0.42743372284510195, - "learning_rate": 3.567318681754605e-05, - "loss": 0.5866, + "epoch": 0.7579065211307024, + "grad_norm": 0.23186829630086137, + "learning_rate": 8.919782811078938e-05, + "loss": 0.5198, "step": 2708 }, { - "epoch": 0.37914625612316305, - "grad_norm": 0.39443077799329296, - "learning_rate": 3.566293717195543e-05, - "loss": 0.5282, + "epoch": 0.7581863979848866, + "grad_norm": 0.21678582425035686, + "learning_rate": 8.918824974555682e-05, + "loss": 0.5361, "step": 2709 }, { - "epoch": 0.37928621413575925, - "grad_norm": 0.3985257910214282, - "learning_rate": 3.565268533502303e-05, - "loss": 0.5773, + "epoch": 0.7584662748390708, + "grad_norm": 0.22451487293338224, + "learning_rate": 8.917866765036473e-05, + "loss": 0.5262, "step": 2710 }, { - "epoch": 0.3794261721483555, - "grad_norm": 0.4048944059124916, - "learning_rate": 3.5642431308855705e-05, - "loss": 0.5366, + "epoch": 0.758746151693255, + "grad_norm": 0.220116295849953, + "learning_rate": 8.916908182612511e-05, + "loss": 0.541, "step": 2711 }, { - "epoch": 0.3795661301609517, - "grad_norm": 0.39796926870963256, - "learning_rate": 3.563217509556076e-05, - "loss": 0.5375, + "epoch": 0.7590260285474392, + "grad_norm": 0.21177038061510053, + "learning_rate": 8.915949227375037e-05, + "loss": 0.5211, "step": 2712 }, { - "epoch": 0.37970608817354795, - "grad_norm": 0.42405605140429314, - "learning_rate": 3.562191669724597e-05, - "loss": 0.5735, + "epoch": 0.7593059054016232, + "grad_norm": 0.21529028775339493, + "learning_rate": 8.914989899415323e-05, + "loss": 0.5371, "step": 2713 }, { - "epoch": 0.37984604618614415, - "grad_norm": 0.415293963968241, - "learning_rate": 3.56116561160195e-05, - "loss": 0.5616, + "epoch": 0.7595857822558074, + "grad_norm": 0.23356289952438744, + "learning_rate": 8.91403019882468e-05, + "loss": 0.6028, "step": 2714 }, { - "epoch": 0.3799860041987404, - "grad_norm": 0.40013621152671924, - "learning_rate": 3.5601393353990046e-05, - "loss": 0.5454, + "epoch": 0.7598656591099916, + "grad_norm": 0.21624377736402656, + "learning_rate": 8.913070125694452e-05, + "loss": 0.5405, "step": 2715 }, { - "epoch": 0.3801259622113366, - "grad_norm": 0.4248537718600079, - "learning_rate": 3.5591128413266686e-05, - "loss": 0.5732, + "epoch": 0.7601455359641758, + "grad_norm": 0.22031622012182514, + "learning_rate": 8.912109680116016e-05, + "loss": 0.5367, "step": 2716 }, { - "epoch": 0.3802659202239328, - "grad_norm": 0.4520252264272862, - "learning_rate": 3.558086129595898e-05, - "loss": 0.53, + "epoch": 0.76042541281836, + "grad_norm": 0.21890336861203946, + "learning_rate": 8.911148862180794e-05, + "loss": 0.5365, "step": 2717 }, { - "epoch": 0.38040587823652905, - "grad_norm": 0.43210620967696467, - "learning_rate": 3.557059200417691e-05, - "loss": 0.572, + "epoch": 0.760705289672544, + "grad_norm": 0.22556741663629698, + "learning_rate": 8.910187671980233e-05, + "loss": 0.5598, "step": 2718 }, { - "epoch": 0.38054583624912525, - "grad_norm": 0.4125830527654036, - "learning_rate": 3.556032054003093e-05, - "loss": 0.5677, + "epoch": 0.7609851665267282, + "grad_norm": 0.2170309750725635, + "learning_rate": 8.909226109605822e-05, + "loss": 0.5131, "step": 2719 }, { - "epoch": 0.3806857942617215, - "grad_norm": 0.4121722034558122, - "learning_rate": 3.555004690563193e-05, - "loss": 0.557, + "epoch": 0.7612650433809124, + "grad_norm": 0.2235836195817433, + "learning_rate": 8.908264175149081e-05, + "loss": 0.5374, "step": 2720 }, { - "epoch": 0.3808257522743177, - "grad_norm": 0.4349037334371718, - "learning_rate": 3.553977110309125e-05, - "loss": 0.6072, + "epoch": 0.7615449202350966, + "grad_norm": 0.20347233313344, + "learning_rate": 8.907301868701567e-05, + "loss": 0.5171, "step": 2721 }, { - "epoch": 0.38096571028691395, - "grad_norm": 0.4145990063820183, - "learning_rate": 3.552949313452067e-05, - "loss": 0.5716, + "epoch": 0.7618247970892807, + "grad_norm": 0.21826991398296136, + "learning_rate": 8.906339190354875e-05, + "loss": 0.5435, "step": 2722 }, { - "epoch": 0.38110566829951015, - "grad_norm": 0.4199300937861851, - "learning_rate": 3.5519213002032404e-05, - "loss": 0.55, + "epoch": 0.7621046739434649, + "grad_norm": 0.2190244617422154, + "learning_rate": 8.905376140200635e-05, + "loss": 0.5426, "step": 2723 }, { - "epoch": 0.38124562631210634, - "grad_norm": 0.4125752251581557, - "learning_rate": 3.550893070773914e-05, - "loss": 0.5758, + "epoch": 0.762384550797649, + "grad_norm": 0.21375749928212628, + "learning_rate": 8.904412718330504e-05, + "loss": 0.5213, "step": 2724 }, { - "epoch": 0.3813855843247026, - "grad_norm": 0.42548401480941495, - "learning_rate": 3.5498646253753986e-05, - "loss": 0.5751, + "epoch": 0.7626644276518332, + "grad_norm": 0.23205902236105794, + "learning_rate": 8.903448924836188e-05, + "loss": 0.5342, "step": 2725 }, { - "epoch": 0.3815255423372988, - "grad_norm": 0.42522116248828645, - "learning_rate": 3.54883596421905e-05, - "loss": 0.5816, + "epoch": 0.7629443045060174, + "grad_norm": 0.21547996683498147, + "learning_rate": 8.902484759809416e-05, + "loss": 0.519, "step": 2726 }, { - "epoch": 0.38166550034989505, - "grad_norm": 0.41791254635783553, - "learning_rate": 3.5478070875162694e-05, - "loss": 0.5298, + "epoch": 0.7632241813602015, + "grad_norm": 0.22224134483386554, + "learning_rate": 8.901520223341961e-05, + "loss": 0.5205, "step": 2727 }, { - "epoch": 0.38180545836249125, - "grad_norm": 0.4058287630837772, - "learning_rate": 3.5467779954785e-05, - "loss": 0.5567, + "epoch": 0.7635040582143857, + "grad_norm": 0.2202861538829083, + "learning_rate": 8.900555315525629e-05, + "loss": 0.5359, "step": 2728 }, { - "epoch": 0.3819454163750875, - "grad_norm": 0.42582677147713893, - "learning_rate": 3.545748688317232e-05, - "loss": 0.6058, + "epoch": 0.7637839350685698, + "grad_norm": 0.22905506573800075, + "learning_rate": 8.899590036452257e-05, + "loss": 0.5444, "step": 2729 }, { - "epoch": 0.3820853743876837, - "grad_norm": 0.4103609286165152, - "learning_rate": 3.544719166243998e-05, - "loss": 0.5774, + "epoch": 0.764063811922754, + "grad_norm": 0.22835262069963058, + "learning_rate": 8.898624386213725e-05, + "loss": 0.5464, "step": 2730 }, { - "epoch": 0.3822253324002799, - "grad_norm": 0.401572700005355, - "learning_rate": 3.543689429470375e-05, - "loss": 0.5411, + "epoch": 0.7643436887769381, + "grad_norm": 0.2163484683591616, + "learning_rate": 8.89765836490194e-05, + "loss": 0.5265, "step": 2731 }, { - "epoch": 0.38236529041287615, - "grad_norm": 0.3929209916840099, - "learning_rate": 3.5426594782079846e-05, - "loss": 0.5373, + "epoch": 0.7646235656311223, + "grad_norm": 0.21830680694234028, + "learning_rate": 8.896691972608849e-05, + "loss": 0.5472, "step": 2732 }, { - "epoch": 0.38250524842547234, - "grad_norm": 0.40599385466099075, - "learning_rate": 3.541629312668492e-05, - "loss": 0.6015, + "epoch": 0.7649034424853065, + "grad_norm": 0.21113697085923283, + "learning_rate": 8.895725209426436e-05, + "loss": 0.532, "step": 2733 }, { - "epoch": 0.3826452064380686, - "grad_norm": 0.41006832838571494, - "learning_rate": 3.540598933063607e-05, - "loss": 0.5509, + "epoch": 0.7651833193394906, + "grad_norm": 0.22708482386870013, + "learning_rate": 8.894758075446718e-05, + "loss": 0.5669, "step": 2734 }, { - "epoch": 0.3827851644506648, - "grad_norm": 0.4107953458690507, - "learning_rate": 3.5395683396050825e-05, - "loss": 0.5818, + "epoch": 0.7654631961936748, + "grad_norm": 0.2254664501566954, + "learning_rate": 8.893790570761746e-05, + "loss": 0.5324, "step": 2735 }, { - "epoch": 0.38292512246326105, - "grad_norm": 0.4164444405879751, - "learning_rate": 3.5385375325047166e-05, - "loss": 0.5664, + "epoch": 0.7657430730478589, + "grad_norm": 0.22068391267207088, + "learning_rate": 8.892822695463607e-05, + "loss": 0.559, "step": 2736 }, { - "epoch": 0.38306508047585724, - "grad_norm": 0.39131447269722003, - "learning_rate": 3.53750651197435e-05, - "loss": 0.5853, + "epoch": 0.7660229499020431, + "grad_norm": 0.2168443428728145, + "learning_rate": 8.891854449644426e-05, + "loss": 0.5379, "step": 2737 }, { - "epoch": 0.38320503848845344, - "grad_norm": 0.4251859387890631, - "learning_rate": 3.5364752782258684e-05, - "loss": 0.6136, + "epoch": 0.7663028267562273, + "grad_norm": 0.21181212036692257, + "learning_rate": 8.890885833396361e-05, + "loss": 0.551, "step": 2738 }, { - "epoch": 0.3833449965010497, - "grad_norm": 0.4074457295926625, - "learning_rate": 3.535443831471201e-05, - "loss": 0.5696, + "epoch": 0.7665827036104114, + "grad_norm": 0.2194815162367123, + "learning_rate": 8.889916846811602e-05, + "loss": 0.5625, "step": 2739 }, { - "epoch": 0.3834849545136459, - "grad_norm": 0.4275456468988683, - "learning_rate": 3.534412171922319e-05, - "loss": 0.5782, + "epoch": 0.7668625804645955, + "grad_norm": 0.22642300385422914, + "learning_rate": 8.888947489982382e-05, + "loss": 0.5429, "step": 2740 }, { - "epoch": 0.38362491252624215, - "grad_norm": 0.3958129275901648, - "learning_rate": 3.533380299791241e-05, - "loss": 0.6016, + "epoch": 0.7671424573187797, + "grad_norm": 0.21777346578717538, + "learning_rate": 8.887977763000963e-05, + "loss": 0.5217, "step": 2741 }, { - "epoch": 0.38376487053883834, - "grad_norm": 0.41339333649591314, - "learning_rate": 3.5323482152900254e-05, - "loss": 0.5741, + "epoch": 0.7674223341729639, + "grad_norm": 0.21351032496663272, + "learning_rate": 8.887007665959643e-05, + "loss": 0.5345, "step": 2742 }, { - "epoch": 0.3839048285514346, - "grad_norm": 0.4018942596879924, - "learning_rate": 3.5313159186307784e-05, - "loss": 0.5567, + "epoch": 0.7677022110271481, + "grad_norm": 0.22136583939301327, + "learning_rate": 8.88603719895076e-05, + "loss": 0.5195, "step": 2743 }, { - "epoch": 0.3840447865640308, - "grad_norm": 0.40959426159798856, - "learning_rate": 3.530283410025645e-05, - "loss": 0.5545, + "epoch": 0.7679820878813323, + "grad_norm": 0.21902060567153697, + "learning_rate": 8.885066362066679e-05, + "loss": 0.5438, "step": 2744 }, { - "epoch": 0.384184744576627, - "grad_norm": 0.39955019012640813, - "learning_rate": 3.529250689686817e-05, - "loss": 0.5491, + "epoch": 0.7682619647355163, + "grad_norm": 0.22396010992489948, + "learning_rate": 8.884095155399808e-05, + "loss": 0.5335, "step": 2745 }, { - "epoch": 0.38432470258922324, - "grad_norm": 0.41725417707805595, - "learning_rate": 3.5282177578265296e-05, - "loss": 0.5569, + "epoch": 0.7685418415897005, + "grad_norm": 0.2248353337599658, + "learning_rate": 8.883123579042587e-05, + "loss": 0.512, "step": 2746 }, { - "epoch": 0.38446466060181944, - "grad_norm": 0.4045943366257942, - "learning_rate": 3.527184614657059e-05, - "loss": 0.5303, + "epoch": 0.7688217184438847, + "grad_norm": 0.22778594293934562, + "learning_rate": 8.88215163308749e-05, + "loss": 0.5367, "step": 2747 }, { - "epoch": 0.3846046186144157, - "grad_norm": 0.39867086864097645, - "learning_rate": 3.526151260390729e-05, - "loss": 0.5567, + "epoch": 0.7691015952980689, + "grad_norm": 0.22070845746206372, + "learning_rate": 8.881179317627027e-05, + "loss": 0.5526, "step": 2748 }, { - "epoch": 0.3847445766270119, - "grad_norm": 0.3971092164877937, - "learning_rate": 3.525117695239903e-05, - "loss": 0.5517, + "epoch": 0.769381472152253, + "grad_norm": 0.22597830242137348, + "learning_rate": 8.880206632753744e-05, + "loss": 0.5422, "step": 2749 }, { - "epoch": 0.38488453463960814, - "grad_norm": 0.40368782181512425, - "learning_rate": 3.5240839194169885e-05, - "loss": 0.554, + "epoch": 0.7696613490064371, + "grad_norm": 0.21202209406816486, + "learning_rate": 8.879233578560222e-05, + "loss": 0.5309, "step": 2750 }, { - "epoch": 0.38502449265220434, - "grad_norm": 0.4012824600908186, - "learning_rate": 3.523049933134439e-05, - "loss": 0.5622, + "epoch": 0.7699412258606213, + "grad_norm": 0.2339424440918282, + "learning_rate": 8.878260155139078e-05, + "loss": 0.5406, "step": 2751 }, { - "epoch": 0.38516445066480054, - "grad_norm": 0.40807955297557885, - "learning_rate": 3.522015736604747e-05, - "loss": 0.5757, + "epoch": 0.7702211027148055, + "grad_norm": 0.2199231232761973, + "learning_rate": 8.877286362582959e-05, + "loss": 0.5382, "step": 2752 }, { - "epoch": 0.3853044086773968, - "grad_norm": 0.401439172166039, - "learning_rate": 3.5209813300404516e-05, - "loss": 0.5523, + "epoch": 0.7705009795689897, + "grad_norm": 0.23479978285061998, + "learning_rate": 8.876312200984556e-05, + "loss": 0.5593, "step": 2753 }, { - "epoch": 0.385444366689993, - "grad_norm": 0.4188699833926191, - "learning_rate": 3.519946713654134e-05, - "loss": 0.585, + "epoch": 0.7707808564231738, + "grad_norm": 0.22459337014686442, + "learning_rate": 8.875337670436587e-05, + "loss": 0.5594, "step": 2754 }, { - "epoch": 0.38558432470258924, - "grad_norm": 0.4123520096515134, - "learning_rate": 3.518911887658418e-05, - "loss": 0.5681, + "epoch": 0.7710607332773579, + "grad_norm": 0.22300395102172538, + "learning_rate": 8.87436277103181e-05, + "loss": 0.5329, "step": 2755 }, { - "epoch": 0.38572428271518544, - "grad_norm": 0.41652405577544405, - "learning_rate": 3.5178768522659697e-05, - "loss": 0.571, + "epoch": 0.7713406101315421, + "grad_norm": 0.2150771771958537, + "learning_rate": 8.873387502863015e-05, + "loss": 0.5249, "step": 2756 }, { - "epoch": 0.3858642407277817, - "grad_norm": 0.39453566372212684, - "learning_rate": 3.516841607689501e-05, - "loss": 0.5368, + "epoch": 0.7716204869857263, + "grad_norm": 0.22949069847397122, + "learning_rate": 8.872411866023031e-05, + "loss": 0.5563, "step": 2757 }, { - "epoch": 0.3860041987403779, - "grad_norm": 0.41756153998856244, - "learning_rate": 3.5158061541417644e-05, - "loss": 0.5799, + "epoch": 0.7719003638399105, + "grad_norm": 0.20382007322682827, + "learning_rate": 8.871435860604717e-05, + "loss": 0.5228, "step": 2758 }, { - "epoch": 0.3861441567529741, - "grad_norm": 0.41467425735936986, - "learning_rate": 3.514770491835556e-05, - "loss": 0.5743, + "epoch": 0.7721802406940946, + "grad_norm": 0.21711045845545285, + "learning_rate": 8.87045948670097e-05, + "loss": 0.5237, "step": 2759 }, { - "epoch": 0.38628411476557034, - "grad_norm": 0.3947895347030933, - "learning_rate": 3.5137346209837165e-05, - "loss": 0.5059, + "epoch": 0.7724601175482787, + "grad_norm": 0.22083055732760784, + "learning_rate": 8.869482744404724e-05, + "loss": 0.5343, "step": 2760 }, { - "epoch": 0.38642407277816654, - "grad_norm": 0.4139447535493024, - "learning_rate": 3.5126985417991254e-05, - "loss": 0.5196, + "epoch": 0.7727399944024629, + "grad_norm": 0.22910669410226342, + "learning_rate": 8.868505633808946e-05, + "loss": 0.5212, "step": 2761 }, { - "epoch": 0.3865640307907628, - "grad_norm": 0.4002485124686976, - "learning_rate": 3.5116622544947085e-05, - "loss": 0.5704, + "epoch": 0.7730198712566471, + "grad_norm": 0.2149562405455874, + "learning_rate": 8.867528155006633e-05, + "loss": 0.535, "step": 2762 }, { - "epoch": 0.386703988803359, - "grad_norm": 0.41066912276683826, - "learning_rate": 3.5106257592834343e-05, - "loss": 0.5808, + "epoch": 0.7732997481108312, + "grad_norm": 0.20511754519125563, + "learning_rate": 8.866550308090828e-05, + "loss": 0.5079, "step": 2763 }, { - "epoch": 0.3868439468159552, - "grad_norm": 0.40281746677069036, - "learning_rate": 3.509589056378312e-05, - "loss": 0.5779, + "epoch": 0.7735796249650154, + "grad_norm": 0.23127912590994432, + "learning_rate": 8.8655720931546e-05, + "loss": 0.5455, "step": 2764 }, { - "epoch": 0.38698390482855144, - "grad_norm": 0.4010030729597531, - "learning_rate": 3.5085521459923954e-05, - "loss": 0.5476, + "epoch": 0.7738595018191996, + "grad_norm": 0.2267108511790525, + "learning_rate": 8.864593510291055e-05, + "loss": 0.5465, "step": 2765 }, { - "epoch": 0.38712386284114764, - "grad_norm": 0.4116926245763738, - "learning_rate": 3.507515028338779e-05, - "loss": 0.5448, + "epoch": 0.7741393786733837, + "grad_norm": 0.2254690183852538, + "learning_rate": 8.863614559593339e-05, + "loss": 0.5399, "step": 2766 }, { - "epoch": 0.3872638208537439, - "grad_norm": 0.4048451003049423, - "learning_rate": 3.5064777036306015e-05, - "loss": 0.5119, + "epoch": 0.7744192555275679, + "grad_norm": 0.22791125506573726, + "learning_rate": 8.862635241154625e-05, + "loss": 0.5502, "step": 2767 }, { - "epoch": 0.3874037788663401, - "grad_norm": 0.41709617305694274, - "learning_rate": 3.505440172081044e-05, - "loss": 0.5769, + "epoch": 0.774699132381752, + "grad_norm": 0.21637335970823557, + "learning_rate": 8.861655555068127e-05, + "loss": 0.5618, "step": 2768 }, { - "epoch": 0.38754373687893634, - "grad_norm": 0.41584372992034574, - "learning_rate": 3.5044024339033297e-05, - "loss": 0.5302, + "epoch": 0.7749790092359362, + "grad_norm": 0.21816634264789905, + "learning_rate": 8.860675501427091e-05, + "loss": 0.5314, "step": 2769 }, { - "epoch": 0.38768369489153254, - "grad_norm": 0.4065168099113291, - "learning_rate": 3.503364489310723e-05, - "loss": 0.602, + "epoch": 0.7752588860901204, + "grad_norm": 0.22643928807573033, + "learning_rate": 8.859695080324801e-05, + "loss": 0.5249, "step": 2770 }, { - "epoch": 0.38782365290412874, - "grad_norm": 0.4163553923214921, - "learning_rate": 3.502326338516534e-05, - "loss": 0.5694, + "epoch": 0.7755387629443045, + "grad_norm": 0.22942199525901047, + "learning_rate": 8.858714291854573e-05, + "loss": 0.5652, "step": 2771 }, { - "epoch": 0.387963610916725, - "grad_norm": 0.40029772682589904, - "learning_rate": 3.501287981734113e-05, - "loss": 0.5593, + "epoch": 0.7758186397984886, + "grad_norm": 0.2169522453773073, + "learning_rate": 8.857733136109758e-05, + "loss": 0.5333, "step": 2772 }, { - "epoch": 0.3881035689293212, - "grad_norm": 0.4078208586874547, - "learning_rate": 3.5002494191768516e-05, - "loss": 0.5501, + "epoch": 0.7760985166526728, + "grad_norm": 0.2246009564360003, + "learning_rate": 8.856751613183745e-05, + "loss": 0.5364, "step": 2773 }, { - "epoch": 0.38824352694191744, - "grad_norm": 0.392227014886232, - "learning_rate": 3.499210651058185e-05, - "loss": 0.5752, + "epoch": 0.776378393506857, + "grad_norm": 0.23165635218522246, + "learning_rate": 8.855769723169954e-05, + "loss": 0.576, "step": 2774 }, { - "epoch": 0.38838348495451364, - "grad_norm": 0.41190554114378175, - "learning_rate": 3.498171677591593e-05, - "loss": 0.5359, + "epoch": 0.7766582703610412, + "grad_norm": 0.22344252442325427, + "learning_rate": 8.854787466161842e-05, + "loss": 0.5607, "step": 2775 }, { - "epoch": 0.3885234429671099, - "grad_norm": 0.4137674619685686, - "learning_rate": 3.497132498990592e-05, - "loss": 0.5648, + "epoch": 0.7769381472152252, + "grad_norm": 0.22061510174243557, + "learning_rate": 8.853804842252903e-05, + "loss": 0.5355, "step": 2776 }, { - "epoch": 0.3886634009797061, - "grad_norm": 0.4063767289121146, - "learning_rate": 3.496093115468745e-05, - "loss": 0.5903, + "epoch": 0.7772180240694094, + "grad_norm": 0.21645191801589111, + "learning_rate": 8.852821851536661e-05, + "loss": 0.5365, "step": 2777 }, { - "epoch": 0.3888033589923023, - "grad_norm": 0.40730098042246266, - "learning_rate": 3.495053527239656e-05, - "loss": 0.5907, + "epoch": 0.7774979009235936, + "grad_norm": 0.2175101734293596, + "learning_rate": 8.851838494106678e-05, + "loss": 0.5434, "step": 2778 }, { - "epoch": 0.38894331700489854, - "grad_norm": 0.3939529427168923, - "learning_rate": 3.494013734516971e-05, - "loss": 0.5748, + "epoch": 0.7777777777777778, + "grad_norm": 0.21406000462718366, + "learning_rate": 8.850854770056554e-05, + "loss": 0.5381, "step": 2779 }, { - "epoch": 0.38908327501749473, - "grad_norm": 0.40700996952853286, - "learning_rate": 3.492973737514378e-05, - "loss": 0.57, + "epoch": 0.778057654631962, + "grad_norm": 0.22331999551569442, + "learning_rate": 8.849870679479915e-05, + "loss": 0.5199, "step": 2780 }, { - "epoch": 0.389223233030091, - "grad_norm": 0.4127250869631396, - "learning_rate": 3.491933536445606e-05, - "loss": 0.576, + "epoch": 0.7783375314861462, + "grad_norm": 0.21943748138385877, + "learning_rate": 8.84888622247043e-05, + "loss": 0.5575, "step": 2781 }, { - "epoch": 0.3893631910426872, - "grad_norm": 0.42198868253989263, - "learning_rate": 3.490893131524429e-05, - "loss": 0.5982, + "epoch": 0.7786174083403302, + "grad_norm": 0.20529141525084196, + "learning_rate": 8.8479013991218e-05, + "loss": 0.5208, "step": 2782 }, { - "epoch": 0.38950314905528344, - "grad_norm": 0.3655454661119806, - "learning_rate": 3.489852522964658e-05, - "loss": 0.565, + "epoch": 0.7788972851945144, + "grad_norm": 0.2262937696827364, + "learning_rate": 8.846916209527763e-05, + "loss": 0.559, "step": 2783 }, { - "epoch": 0.38964310706787963, - "grad_norm": 0.4144948208886313, - "learning_rate": 3.488811710980151e-05, - "loss": 0.5674, + "epoch": 0.7791771620486986, + "grad_norm": 0.22596654737586358, + "learning_rate": 8.845930653782086e-05, + "loss": 0.5414, "step": 2784 }, { - "epoch": 0.38978306508047583, - "grad_norm": 0.4074829627997144, - "learning_rate": 3.487770695784805e-05, - "loss": 0.5748, + "epoch": 0.7794570389028828, + "grad_norm": 0.2274190341430264, + "learning_rate": 8.844944731978577e-05, + "loss": 0.5374, "step": 2785 }, { - "epoch": 0.3899230230930721, - "grad_norm": 0.41858532067834603, - "learning_rate": 3.486729477592558e-05, - "loss": 0.5814, + "epoch": 0.7797369157570669, + "grad_norm": 0.22649843895076985, + "learning_rate": 8.843958444211075e-05, + "loss": 0.5258, "step": 2786 }, { - "epoch": 0.3900629811056683, - "grad_norm": 0.41786381819649815, - "learning_rate": 3.485688056617391e-05, - "loss": 0.5818, + "epoch": 0.780016792611251, + "grad_norm": 0.22725924280520562, + "learning_rate": 8.84297179057346e-05, + "loss": 0.5388, "step": 2787 }, { - "epoch": 0.39020293911826454, - "grad_norm": 0.43164286265234475, - "learning_rate": 3.484646433073328e-05, - "loss": 0.6056, + "epoch": 0.7802966694654352, + "grad_norm": 0.22689835726130753, + "learning_rate": 8.841984771159636e-05, + "loss": 0.5034, "step": 2788 }, { - "epoch": 0.39034289713086073, - "grad_norm": 0.41022430212941835, - "learning_rate": 3.483604607174432e-05, - "loss": 0.586, + "epoch": 0.7805765463196194, + "grad_norm": 0.23329504821929226, + "learning_rate": 8.840997386063553e-05, + "loss": 0.5269, "step": 2789 }, { - "epoch": 0.390482855143457, - "grad_norm": 0.4232828423199565, - "learning_rate": 3.4825625791348096e-05, - "loss": 0.5569, + "epoch": 0.7808564231738035, + "grad_norm": 0.216598053395184, + "learning_rate": 8.840009635379187e-05, + "loss": 0.5629, "step": 2790 }, { - "epoch": 0.3906228131560532, - "grad_norm": 0.400556356726097, - "learning_rate": 3.481520349168607e-05, - "loss": 0.5853, + "epoch": 0.7811363000279877, + "grad_norm": 0.22091397007640448, + "learning_rate": 8.839021519200557e-05, + "loss": 0.5364, "step": 2791 }, { - "epoch": 0.3907627711686494, - "grad_norm": 0.4163389824429049, - "learning_rate": 3.480477917490014e-05, - "loss": 0.5423, + "epoch": 0.7814161768821718, + "grad_norm": 0.22454445877781262, + "learning_rate": 8.838033037621708e-05, + "loss": 0.5398, "step": 2792 }, { - "epoch": 0.39090272918124563, - "grad_norm": 0.40119034319759783, - "learning_rate": 3.479435284313261e-05, - "loss": 0.5618, + "epoch": 0.781696053736356, + "grad_norm": 0.22364940410539516, + "learning_rate": 8.837044190736726e-05, + "loss": 0.5298, "step": 2793 }, { - "epoch": 0.39104268719384183, - "grad_norm": 0.41230633715814125, - "learning_rate": 3.4783924498526184e-05, - "loss": 0.566, + "epoch": 0.7819759305905402, + "grad_norm": 0.20652769204510682, + "learning_rate": 8.836054978639732e-05, + "loss": 0.5264, "step": 2794 }, { - "epoch": 0.3911826452064381, - "grad_norm": 0.38406879859249193, - "learning_rate": 3.4773494143224e-05, - "loss": 0.5728, + "epoch": 0.7822558074447243, + "grad_norm": 0.21276771356881036, + "learning_rate": 8.835065401424877e-05, + "loss": 0.515, "step": 2795 }, { - "epoch": 0.3913226032190343, - "grad_norm": 0.42250079129339874, - "learning_rate": 3.476306177936961e-05, - "loss": 0.5937, + "epoch": 0.7825356842989085, + "grad_norm": 0.21361040126559733, + "learning_rate": 8.83407545918635e-05, + "loss": 0.535, "step": 2796 }, { - "epoch": 0.39146256123163053, - "grad_norm": 0.39662665146228837, - "learning_rate": 3.475262740910696e-05, - "loss": 0.5644, + "epoch": 0.7828155611530926, + "grad_norm": 0.23286320904578853, + "learning_rate": 8.833085152018375e-05, + "loss": 0.5716, "step": 2797 }, { - "epoch": 0.39160251924422673, - "grad_norm": 0.42760877117863655, - "learning_rate": 3.474219103458043e-05, - "loss": 0.5762, + "epoch": 0.7830954380072768, + "grad_norm": 0.22105089026834285, + "learning_rate": 8.83209448001521e-05, + "loss": 0.5376, "step": 2798 }, { - "epoch": 0.39174247725682293, - "grad_norm": 0.3990049510043462, - "learning_rate": 3.4731752657934794e-05, - "loss": 0.5234, + "epoch": 0.783375314861461, + "grad_norm": 0.2117441316678202, + "learning_rate": 8.83110344327115e-05, + "loss": 0.492, "step": 2799 }, { - "epoch": 0.3918824352694192, - "grad_norm": 0.3870544184519613, - "learning_rate": 3.4721312281315236e-05, - "loss": 0.5921, + "epoch": 0.7836551917156451, + "grad_norm": 0.22413333754161954, + "learning_rate": 8.830112041880516e-05, + "loss": 0.5259, "step": 2800 }, { - "epoch": 0.3920223932820154, - "grad_norm": 0.3907379222640409, - "learning_rate": 3.471086990686737e-05, - "loss": 0.5402, + "epoch": 0.7839350685698293, + "grad_norm": 0.22303533904875358, + "learning_rate": 8.829120275937675e-05, + "loss": 0.5289, "step": 2801 }, { - "epoch": 0.39216235129461163, - "grad_norm": 0.42863016339615406, - "learning_rate": 3.470042553673721e-05, - "loss": 0.6075, + "epoch": 0.7842149454240135, + "grad_norm": 0.23224013663200205, + "learning_rate": 8.828128145537024e-05, + "loss": 0.5378, "step": 2802 }, { - "epoch": 0.39230230930720783, - "grad_norm": 0.40157662221583645, - "learning_rate": 3.468997917307118e-05, - "loss": 0.5723, + "epoch": 0.7844948222781976, + "grad_norm": 0.21037489494798792, + "learning_rate": 8.827135650772994e-05, + "loss": 0.5339, "step": 2803 }, { - "epoch": 0.3924422673198041, - "grad_norm": 0.42138157521984687, - "learning_rate": 3.46795308180161e-05, - "loss": 0.5605, + "epoch": 0.7847746991323817, + "grad_norm": 0.22897771454277885, + "learning_rate": 8.826142791740049e-05, + "loss": 0.5531, "step": 2804 }, { - "epoch": 0.3925822253324003, - "grad_norm": 0.39033795656224785, - "learning_rate": 3.466908047371923e-05, - "loss": 0.5814, + "epoch": 0.7850545759865659, + "grad_norm": 0.219861900135347, + "learning_rate": 8.825149568532691e-05, + "loss": 0.5298, "step": 2805 }, { - "epoch": 0.3927221833449965, - "grad_norm": 0.3904474581637845, - "learning_rate": 3.465862814232822e-05, - "loss": 0.5662, + "epoch": 0.7853344528407501, + "grad_norm": 0.23432080612454642, + "learning_rate": 8.824155981245457e-05, + "loss": 0.5352, "step": 2806 }, { - "epoch": 0.39286214135759273, - "grad_norm": 0.4019162642970993, - "learning_rate": 3.464817382599112e-05, - "loss": 0.5509, + "epoch": 0.7856143296949343, + "grad_norm": 0.21523092077944095, + "learning_rate": 8.823162029972917e-05, + "loss": 0.5283, "step": 2807 }, { - "epoch": 0.39300209937018893, - "grad_norm": 0.4060877755013805, - "learning_rate": 3.4637717526856406e-05, - "loss": 0.5642, + "epoch": 0.7858942065491183, + "grad_norm": 0.2201121770858865, + "learning_rate": 8.822167714809673e-05, + "loss": 0.5473, "step": 2808 }, { - "epoch": 0.3931420573827852, - "grad_norm": 0.41153563140737226, - "learning_rate": 3.462725924707295e-05, - "loss": 0.574, + "epoch": 0.7861740834033025, + "grad_norm": 0.22845228302948312, + "learning_rate": 8.821173035850368e-05, + "loss": 0.5447, "step": 2809 }, { - "epoch": 0.3932820153953814, - "grad_norm": 0.4177210107228342, - "learning_rate": 3.461679898879004e-05, - "loss": 0.6133, + "epoch": 0.7864539602574867, + "grad_norm": 0.2177072634790322, + "learning_rate": 8.820177993189675e-05, + "loss": 0.5308, "step": 2810 }, { - "epoch": 0.39342197340797763, - "grad_norm": 0.3996252120493831, - "learning_rate": 3.460633675415736e-05, - "loss": 0.5635, + "epoch": 0.7867338371116709, + "grad_norm": 0.21641334292617864, + "learning_rate": 8.819182586922302e-05, + "loss": 0.5322, "step": 2811 }, { - "epoch": 0.39356193142057383, - "grad_norm": 0.417067202150554, - "learning_rate": 3.459587254532502e-05, - "loss": 0.548, + "epoch": 0.7870137139658551, + "grad_norm": 0.221026995060422, + "learning_rate": 8.818186817142993e-05, + "loss": 0.5378, "step": 2812 }, { - "epoch": 0.39370188943317, - "grad_norm": 0.4855752987156052, - "learning_rate": 3.458540636444349e-05, - "loss": 0.5833, + "epoch": 0.7872935908200391, + "grad_norm": 0.22261066108655456, + "learning_rate": 8.817190683946526e-05, + "loss": 0.5218, "step": 2813 }, { - "epoch": 0.3938418474457663, - "grad_norm": 0.40124325073974404, - "learning_rate": 3.457493821366369e-05, - "loss": 0.5403, + "epoch": 0.7875734676742233, + "grad_norm": 0.2180297967634272, + "learning_rate": 8.816194187427711e-05, + "loss": 0.5211, "step": 2814 }, { - "epoch": 0.3939818054583625, - "grad_norm": 0.41052139871938925, - "learning_rate": 3.456446809513695e-05, - "loss": 0.5639, + "epoch": 0.7878533445284075, + "grad_norm": 0.2219325714430237, + "learning_rate": 8.815197327681399e-05, + "loss": 0.5303, "step": 2815 }, { - "epoch": 0.39412176347095873, - "grad_norm": 0.429168200503203, - "learning_rate": 3.455399601101497e-05, - "loss": 0.6069, + "epoch": 0.7881332213825917, + "grad_norm": 0.21901054380779303, + "learning_rate": 8.814200104802469e-05, + "loss": 0.5541, "step": 2816 }, { - "epoch": 0.3942617214835549, - "grad_norm": 0.4130675301409534, - "learning_rate": 3.4543521963449857e-05, - "loss": 0.5628, + "epoch": 0.7884130982367759, + "grad_norm": 0.2182971480723001, + "learning_rate": 8.81320251888584e-05, + "loss": 0.5184, "step": 2817 }, { - "epoch": 0.3944016794961512, - "grad_norm": 0.4028085056325733, - "learning_rate": 3.4533045954594164e-05, - "loss": 0.5806, + "epoch": 0.7886929750909599, + "grad_norm": 0.2234946616304098, + "learning_rate": 8.812204570026458e-05, + "loss": 0.5167, "step": 2818 }, { - "epoch": 0.3945416375087474, - "grad_norm": 0.40759739919737564, - "learning_rate": 3.452256798660079e-05, - "loss": 0.6007, + "epoch": 0.7889728519451441, + "grad_norm": 0.22267377011421308, + "learning_rate": 8.811206258319313e-05, + "loss": 0.5507, "step": 2819 }, { - "epoch": 0.3946815955213436, - "grad_norm": 0.41359320045974896, - "learning_rate": 3.4512088061623075e-05, - "loss": 0.5912, + "epoch": 0.7892527287993283, + "grad_norm": 0.2247193866649521, + "learning_rate": 8.810207583859423e-05, + "loss": 0.5305, "step": 2820 }, { - "epoch": 0.39482155353393983, - "grad_norm": 0.3896223345474862, - "learning_rate": 3.450160618181476e-05, - "loss": 0.5583, + "epoch": 0.7895326056535125, + "grad_norm": 0.2233578217040272, + "learning_rate": 8.809208546741842e-05, + "loss": 0.551, "step": 2821 }, { - "epoch": 0.394961511546536, - "grad_norm": 0.41092585101647605, - "learning_rate": 3.449112234932996e-05, - "loss": 0.5571, + "epoch": 0.7898124825076966, + "grad_norm": 0.21971966516217226, + "learning_rate": 8.808209147061659e-05, + "loss": 0.564, "step": 2822 }, { - "epoch": 0.3951014695591323, - "grad_norm": 0.42614487405443774, - "learning_rate": 3.4480636566323215e-05, - "loss": 0.6322, + "epoch": 0.7900923593618808, + "grad_norm": 0.25814484841942725, + "learning_rate": 8.807209384913999e-05, + "loss": 0.5492, "step": 2823 }, { - "epoch": 0.3952414275717285, - "grad_norm": 0.41152848618890253, - "learning_rate": 3.447014883494946e-05, - "loss": 0.5636, + "epoch": 0.7903722362160649, + "grad_norm": 0.22185428438583155, + "learning_rate": 8.806209260394018e-05, + "loss": 0.5468, "step": 2824 }, { - "epoch": 0.39538138558432473, - "grad_norm": 0.4043580579983999, - "learning_rate": 3.445965915736403e-05, - "loss": 0.5338, + "epoch": 0.7906521130702491, + "grad_norm": 0.2233594052708055, + "learning_rate": 8.80520877359691e-05, + "loss": 0.5625, "step": 2825 }, { - "epoch": 0.3955213435969209, - "grad_norm": 0.3944340553638311, - "learning_rate": 3.444916753572266e-05, - "loss": 0.5345, + "epoch": 0.7909319899244333, + "grad_norm": 0.2256990651820279, + "learning_rate": 8.8042079246179e-05, + "loss": 0.5407, "step": 2826 }, { - "epoch": 0.3956613016095171, - "grad_norm": 0.4032299392463221, - "learning_rate": 3.44386739721815e-05, - "loss": 0.5645, + "epoch": 0.7912118667786174, + "grad_norm": 0.22760471565206933, + "learning_rate": 8.803206713552252e-05, + "loss": 0.5411, "step": 2827 }, { - "epoch": 0.3958012596221134, - "grad_norm": 0.43128038862284984, - "learning_rate": 3.442817846889705e-05, - "loss": 0.5793, + "epoch": 0.7914917436328016, + "grad_norm": 0.21692049590750304, + "learning_rate": 8.802205140495259e-05, + "loss": 0.5204, "step": 2828 }, { - "epoch": 0.3959412176347096, - "grad_norm": 0.3904629770048097, - "learning_rate": 3.4417681028026276e-05, - "loss": 0.5594, + "epoch": 0.7917716204869857, + "grad_norm": 0.21457081307008663, + "learning_rate": 8.801203205542252e-05, + "loss": 0.5508, "step": 2829 }, { - "epoch": 0.3960811756473058, - "grad_norm": 0.4032671372019056, - "learning_rate": 3.4407181651726495e-05, - "loss": 0.6305, + "epoch": 0.7920514973411699, + "grad_norm": 0.22345148575294949, + "learning_rate": 8.800200908788598e-05, + "loss": 0.5435, "step": 2830 }, { - "epoch": 0.396221133659902, - "grad_norm": 0.41716969232369355, - "learning_rate": 3.439668034215543e-05, - "loss": 0.5661, + "epoch": 0.792331374195354, + "grad_norm": 0.2101856819329527, + "learning_rate": 8.799198250329692e-05, + "loss": 0.5258, "step": 2831 }, { - "epoch": 0.3963610916724983, - "grad_norm": 0.3785072143764436, - "learning_rate": 3.4386177101471216e-05, - "loss": 0.5378, + "epoch": 0.7926112510495382, + "grad_norm": 0.21800264042889614, + "learning_rate": 8.798195230260973e-05, + "loss": 0.5197, "step": 2832 }, { - "epoch": 0.3965010496850945, - "grad_norm": 0.4062464075615544, - "learning_rate": 3.437567193183237e-05, - "loss": 0.5613, + "epoch": 0.7928911279037224, + "grad_norm": 0.21075015346930503, + "learning_rate": 8.797191848677904e-05, + "loss": 0.5282, "step": 2833 }, { - "epoch": 0.3966410076976907, - "grad_norm": 0.41547338740317635, - "learning_rate": 3.436516483539781e-05, - "loss": 0.5652, + "epoch": 0.7931710047579065, + "grad_norm": 0.24180470598921924, + "learning_rate": 8.79618810567599e-05, + "loss": 0.5311, "step": 2834 }, { - "epoch": 0.3967809657102869, - "grad_norm": 0.4212387411967819, - "learning_rate": 3.4354655814326845e-05, - "loss": 0.5759, + "epoch": 0.7934508816120907, + "grad_norm": 0.21314976378368844, + "learning_rate": 8.795184001350767e-05, + "loss": 0.5351, "step": 2835 }, { - "epoch": 0.3969209237228831, - "grad_norm": 0.39412673101104495, - "learning_rate": 3.434414487077921e-05, - "loss": 0.5455, + "epoch": 0.7937307584662748, + "grad_norm": 0.2262732327054876, + "learning_rate": 8.794179535797806e-05, + "loss": 0.5557, "step": 2836 }, { - "epoch": 0.3970608817354794, - "grad_norm": 0.40728215607499646, - "learning_rate": 3.433363200691498e-05, - "loss": 0.5677, + "epoch": 0.794010635320459, + "grad_norm": 0.2109717137303336, + "learning_rate": 8.793174709112714e-05, + "loss": 0.5205, "step": 2837 }, { - "epoch": 0.3972008397480756, - "grad_norm": 0.4180649072512069, - "learning_rate": 3.432311722489467e-05, - "loss": 0.5845, + "epoch": 0.7942905121746432, + "grad_norm": 0.22044485605307892, + "learning_rate": 8.792169521391132e-05, + "loss": 0.5456, "step": 2838 }, { - "epoch": 0.39734079776067177, - "grad_norm": 0.4237453415621929, - "learning_rate": 3.431260052687919e-05, - "loss": 0.5543, + "epoch": 0.7945703890288273, + "grad_norm": 0.23439227561476458, + "learning_rate": 8.791163972728729e-05, + "loss": 0.5322, "step": 2839 }, { - "epoch": 0.397480755773268, - "grad_norm": 0.47750072518183245, - "learning_rate": 3.430208191502979e-05, - "loss": 0.5799, + "epoch": 0.7948502658830114, + "grad_norm": 0.22338269897225688, + "learning_rate": 8.790158063221219e-05, + "loss": 0.5499, "step": 2840 }, { - "epoch": 0.3976207137858642, - "grad_norm": 0.40826704545334747, - "learning_rate": 3.4291561391508185e-05, - "loss": 0.5524, + "epoch": 0.7951301427371956, + "grad_norm": 0.2232145969653595, + "learning_rate": 8.789151792964342e-05, + "loss": 0.5331, "step": 2841 }, { - "epoch": 0.3977606717984605, - "grad_norm": 0.4353573374399257, - "learning_rate": 3.428103895847644e-05, - "loss": 0.5489, + "epoch": 0.7954100195913798, + "grad_norm": 0.2185630854730285, + "learning_rate": 8.788145162053877e-05, + "loss": 0.53, "step": 2842 }, { - "epoch": 0.39790062981105667, - "grad_norm": 0.4164859565724006, - "learning_rate": 3.427051461809702e-05, - "loss": 0.5694, + "epoch": 0.795689896445564, + "grad_norm": 0.20638335673994054, + "learning_rate": 8.787138170585635e-05, + "loss": 0.5578, "step": 2843 }, { - "epoch": 0.3980405878236529, - "grad_norm": 0.4285455810589633, - "learning_rate": 3.425998837253278e-05, - "loss": 0.5879, + "epoch": 0.7959697732997482, + "grad_norm": 0.21157362620188566, + "learning_rate": 8.786130818655462e-05, + "loss": 0.523, "step": 2844 }, { - "epoch": 0.3981805458362491, - "grad_norm": 0.3797736872864129, - "learning_rate": 3.4249460223946975e-05, - "loss": 0.555, + "epoch": 0.7962496501539322, + "grad_norm": 0.2146520898061125, + "learning_rate": 8.785123106359238e-05, + "loss": 0.5405, "step": 2845 }, { - "epoch": 0.3983205038488453, - "grad_norm": 0.39764697478634364, - "learning_rate": 3.4238930174503245e-05, - "loss": 0.5683, + "epoch": 0.7965295270081164, + "grad_norm": 0.21887109159244958, + "learning_rate": 8.78411503379288e-05, + "loss": 0.5465, "step": 2846 }, { - "epoch": 0.3984604618614416, - "grad_norm": 0.40768853681588346, - "learning_rate": 3.422839822636562e-05, - "loss": 0.5822, + "epoch": 0.7968094038623006, + "grad_norm": 0.21665261854698148, + "learning_rate": 8.783106601052333e-05, + "loss": 0.545, "step": 2847 }, { - "epoch": 0.39860041987403777, - "grad_norm": 0.40077863607268926, - "learning_rate": 3.421786438169852e-05, - "loss": 0.5218, + "epoch": 0.7970892807164848, + "grad_norm": 0.21475423505975183, + "learning_rate": 8.782097808233584e-05, + "loss": 0.5137, "step": 2848 }, { - "epoch": 0.398740377886634, - "grad_norm": 0.43178105768051894, - "learning_rate": 3.420732864266677e-05, - "loss": 0.5917, + "epoch": 0.797369157570669, + "grad_norm": 0.21469560760181244, + "learning_rate": 8.781088655432648e-05, + "loss": 0.518, "step": 2849 }, { - "epoch": 0.3988803358992302, - "grad_norm": 0.4012531476036567, - "learning_rate": 3.4196791011435554e-05, - "loss": 0.5575, + "epoch": 0.797649034424853, + "grad_norm": 0.21697308410113145, + "learning_rate": 8.780079142745576e-05, + "loss": 0.5152, "step": 2850 }, { - "epoch": 0.3990202939118265, - "grad_norm": 0.42052995566229917, - "learning_rate": 3.418625149017047e-05, - "loss": 0.5898, + "epoch": 0.7979289112790372, + "grad_norm": 0.22059316431053475, + "learning_rate": 8.779069270268456e-05, + "loss": 0.5509, "step": 2851 }, { - "epoch": 0.39916025192442267, - "grad_norm": 0.48024947250916206, - "learning_rate": 3.4175710081037505e-05, - "loss": 0.5438, + "epoch": 0.7982087881332214, + "grad_norm": 0.21439874759153993, + "learning_rate": 8.778059038097409e-05, + "loss": 0.5217, "step": 2852 }, { - "epoch": 0.39930020993701887, - "grad_norm": 0.3857911120539825, - "learning_rate": 3.4165166786203015e-05, - "loss": 0.5641, + "epoch": 0.7984886649874056, + "grad_norm": 0.24195536709810261, + "learning_rate": 8.777048446328588e-05, + "loss": 0.4948, "step": 2853 }, { - "epoch": 0.3994401679496151, - "grad_norm": 0.3948093390858907, - "learning_rate": 3.415462160783375e-05, - "loss": 0.5655, + "epoch": 0.7987685418415897, + "grad_norm": 0.22542568973369742, + "learning_rate": 8.77603749505818e-05, + "loss": 0.5508, "step": 2854 }, { - "epoch": 0.3995801259622113, - "grad_norm": 0.4003575934592238, - "learning_rate": 3.4144074548096866e-05, - "loss": 0.6106, + "epoch": 0.7990484186957738, + "grad_norm": 0.2340567086340302, + "learning_rate": 8.775026184382411e-05, + "loss": 0.5454, "step": 2855 }, { - "epoch": 0.39972008397480757, - "grad_norm": 0.4321737583237654, - "learning_rate": 3.413352560915988e-05, - "loss": 0.5514, + "epoch": 0.799328295549958, + "grad_norm": 0.2578561350080951, + "learning_rate": 8.774014514397537e-05, + "loss": 0.5566, "step": 2856 }, { - "epoch": 0.39986004198740377, - "grad_norm": 0.45894659025440354, - "learning_rate": 3.412297479319071e-05, - "loss": 0.5669, + "epoch": 0.7996081724041422, + "grad_norm": 0.22763271026186724, + "learning_rate": 8.773002485199849e-05, + "loss": 0.5322, "step": 2857 }, { - "epoch": 0.4, - "grad_norm": 0.42987083801881937, - "learning_rate": 3.411242210235765e-05, - "loss": 0.5974, + "epoch": 0.7998880492583264, + "grad_norm": 0.22724074095648678, + "learning_rate": 8.771990096885672e-05, + "loss": 0.5228, "step": 2858 }, { - "epoch": 0.4001399580125962, - "grad_norm": 0.40207997706206416, - "learning_rate": 3.4101867538829394e-05, - "loss": 0.5286, + "epoch": 0.8001679261125105, + "grad_norm": 0.23037448798055957, + "learning_rate": 8.770977349551366e-05, + "loss": 0.5394, "step": 2859 }, { - "epoch": 0.4002799160251924, - "grad_norm": 0.40235651191545074, - "learning_rate": 3.4091311104775e-05, - "loss": 0.5427, + "epoch": 0.8004478029666946, + "grad_norm": 0.21513282819816093, + "learning_rate": 8.769964243293326e-05, + "loss": 0.5252, "step": 2860 }, { - "epoch": 0.40041987403778867, - "grad_norm": 0.40231984058011133, - "learning_rate": 3.4080752802363936e-05, - "loss": 0.5341, + "epoch": 0.8007276798208788, + "grad_norm": 0.2191177699463853, + "learning_rate": 8.768950778207978e-05, + "loss": 0.5447, "step": 2861 }, { - "epoch": 0.40055983205038487, - "grad_norm": 0.4240007925517186, - "learning_rate": 3.4070192633766025e-05, - "loss": 0.5571, + "epoch": 0.801007556675063, + "grad_norm": 0.20963860250391247, + "learning_rate": 8.767936954391787e-05, + "loss": 0.5324, "step": 2862 }, { - "epoch": 0.4006997900629811, - "grad_norm": 0.3961994599738629, - "learning_rate": 3.4059630601151504e-05, - "loss": 0.5585, + "epoch": 0.8012874335292471, + "grad_norm": 0.22403841844799766, + "learning_rate": 8.766922771941246e-05, + "loss": 0.5572, "step": 2863 }, { - "epoch": 0.4008397480755773, - "grad_norm": 0.3908273444014209, - "learning_rate": 3.404906670669097e-05, - "loss": 0.5444, + "epoch": 0.8015673103834313, + "grad_norm": 0.23077398062211876, + "learning_rate": 8.765908230952888e-05, + "loss": 0.5569, "step": 2864 }, { - "epoch": 0.40097970608817357, - "grad_norm": 0.4011478963975767, - "learning_rate": 3.403850095255542e-05, - "loss": 0.601, + "epoch": 0.8018471872376155, + "grad_norm": 0.23387319310509821, + "learning_rate": 8.764893331523278e-05, + "loss": 0.5166, "step": 2865 }, { - "epoch": 0.40111966410076977, - "grad_norm": 0.4492421654680456, - "learning_rate": 3.402793334091621e-05, - "loss": 0.5953, + "epoch": 0.8021270640917996, + "grad_norm": 0.2221398047613915, + "learning_rate": 8.763878073749012e-05, + "loss": 0.5355, "step": 2866 }, { - "epoch": 0.40125962211336597, - "grad_norm": 0.4207454261749868, - "learning_rate": 3.40173638739451e-05, - "loss": 0.5931, + "epoch": 0.8024069409459837, + "grad_norm": 0.21527982155363673, + "learning_rate": 8.762862457726723e-05, + "loss": 0.5259, "step": 2867 }, { - "epoch": 0.4013995801259622, - "grad_norm": 0.40623369999208175, - "learning_rate": 3.400679255381421e-05, - "loss": 0.5528, + "epoch": 0.8026868178001679, + "grad_norm": 0.2273342193569453, + "learning_rate": 8.761846483553083e-05, + "loss": 0.5218, "step": 2868 }, { - "epoch": 0.4015395381385584, - "grad_norm": 0.4169914185414076, - "learning_rate": 3.399621938269606e-05, - "loss": 0.5823, + "epoch": 0.8029666946543521, + "grad_norm": 0.22705463249266675, + "learning_rate": 8.760830151324786e-05, + "loss": 0.5466, "step": 2869 }, { - "epoch": 0.40167949615115467, - "grad_norm": 0.42169370060384803, - "learning_rate": 3.3985644362763566e-05, - "loss": 0.5565, + "epoch": 0.8032465715085363, + "grad_norm": 0.21478700010646237, + "learning_rate": 8.75981346113857e-05, + "loss": 0.525, "step": 2870 }, { - "epoch": 0.40181945416375087, - "grad_norm": 0.38157604657299227, - "learning_rate": 3.3975067496189965e-05, - "loss": 0.525, + "epoch": 0.8035264483627204, + "grad_norm": 0.2303179163497238, + "learning_rate": 8.758796413091204e-05, + "loss": 0.5581, "step": 2871 }, { - "epoch": 0.4019594121763471, - "grad_norm": 0.42241067538345, - "learning_rate": 3.396448878514894e-05, - "loss": 0.577, + "epoch": 0.8038063252169045, + "grad_norm": 0.21606750681499284, + "learning_rate": 8.757779007279493e-05, + "loss": 0.5031, "step": 2872 }, { - "epoch": 0.4020993701889433, - "grad_norm": 0.4164649305478365, - "learning_rate": 3.395390823181451e-05, - "loss": 0.5334, + "epoch": 0.8040862020710887, + "grad_norm": 0.2218226313286871, + "learning_rate": 8.756761243800273e-05, + "loss": 0.5111, "step": 2873 }, { - "epoch": 0.4022393282015395, - "grad_norm": 0.4224179515123039, - "learning_rate": 3.3943325838361084e-05, - "loss": 0.6014, + "epoch": 0.8043660789252729, + "grad_norm": 0.2285726570299832, + "learning_rate": 8.755743122750415e-05, + "loss": 0.5335, "step": 2874 }, { - "epoch": 0.40237928621413577, - "grad_norm": 0.4243031111453756, - "learning_rate": 3.393274160696346e-05, - "loss": 0.5759, + "epoch": 0.8046459557794571, + "grad_norm": 0.22297193493897877, + "learning_rate": 8.754724644226824e-05, + "loss": 0.518, "step": 2875 }, { - "epoch": 0.40251924422673196, - "grad_norm": 0.4214607029417847, - "learning_rate": 3.392215553979679e-05, - "loss": 0.6041, + "epoch": 0.8049258326336411, + "grad_norm": 0.21257692593885053, + "learning_rate": 8.753705808326438e-05, + "loss": 0.5354, "step": 2876 }, { - "epoch": 0.4026592022393282, - "grad_norm": 0.39897243417459854, - "learning_rate": 3.391156763903665e-05, - "loss": 0.5425, + "epoch": 0.8052057094878253, + "grad_norm": 0.22159427074143495, + "learning_rate": 8.752686615146234e-05, + "loss": 0.5499, "step": 2877 }, { - "epoch": 0.4027991602519244, - "grad_norm": 0.39128320703090635, - "learning_rate": 3.390097790685892e-05, - "loss": 0.579, + "epoch": 0.8054855863420095, + "grad_norm": 0.20808489492074214, + "learning_rate": 8.751667064783218e-05, + "loss": 0.5011, "step": 2878 }, { - "epoch": 0.40293911826452067, - "grad_norm": 0.40615873009003534, - "learning_rate": 3.389038634543993e-05, - "loss": 0.5828, + "epoch": 0.8057654631961937, + "grad_norm": 0.2321803174350652, + "learning_rate": 8.75064715733443e-05, + "loss": 0.5172, "step": 2879 }, { - "epoch": 0.40307907627711687, - "grad_norm": 0.41878420947323897, - "learning_rate": 3.387979295695632e-05, - "loss": 0.5828, + "epoch": 0.8060453400503779, + "grad_norm": 0.21486242311207676, + "learning_rate": 8.749626892896944e-05, + "loss": 0.5331, "step": 2880 }, { - "epoch": 0.40321903428971306, - "grad_norm": 0.383331359104639, - "learning_rate": 3.386919774358518e-05, - "loss": 0.5384, + "epoch": 0.806325216904562, + "grad_norm": 0.2618784125152297, + "learning_rate": 8.748606271567876e-05, + "loss": 0.5367, "step": 2881 }, { - "epoch": 0.4033589923023093, - "grad_norm": 0.41913779228819376, - "learning_rate": 3.38586007075039e-05, - "loss": 0.5591, + "epoch": 0.8066050937587461, + "grad_norm": 0.22166037267218722, + "learning_rate": 8.747585293444363e-05, + "loss": 0.5285, "step": 2882 }, { - "epoch": 0.4034989503149055, - "grad_norm": 0.38348801468846716, - "learning_rate": 3.3848001850890296e-05, - "loss": 0.5393, + "epoch": 0.8068849706129303, + "grad_norm": 0.223680190345337, + "learning_rate": 8.746563958623584e-05, + "loss": 0.5709, "step": 2883 }, { - "epoch": 0.40363890832750177, - "grad_norm": 0.4113992124017339, - "learning_rate": 3.383740117592254e-05, - "loss": 0.5567, + "epoch": 0.8071648474671145, + "grad_norm": 0.23106289340452013, + "learning_rate": 8.745542267202749e-05, + "loss": 0.5546, "step": 2884 }, { - "epoch": 0.40377886634009796, - "grad_norm": 0.40383694106615, - "learning_rate": 3.382679868477917e-05, - "loss": 0.5686, + "epoch": 0.8074447243212987, + "grad_norm": 0.2257407444251882, + "learning_rate": 8.744520219279106e-05, + "loss": 0.5279, "step": 2885 }, { - "epoch": 0.4039188243526942, - "grad_norm": 0.4275501335912839, - "learning_rate": 3.381619437963911e-05, - "loss": 0.5709, + "epoch": 0.8077246011754828, + "grad_norm": 0.22253837697732912, + "learning_rate": 8.743497814949932e-05, + "loss": 0.5435, "step": 2886 }, { - "epoch": 0.4040587823652904, - "grad_norm": 0.410136210120104, - "learning_rate": 3.3805588262681655e-05, - "loss": 0.5578, + "epoch": 0.8080044780296669, + "grad_norm": 0.22287026196738555, + "learning_rate": 8.742475054312542e-05, + "loss": 0.5546, "step": 2887 }, { - "epoch": 0.4041987403778866, - "grad_norm": 0.39945476818253844, - "learning_rate": 3.379498033608646e-05, - "loss": 0.5576, + "epoch": 0.8082843548838511, + "grad_norm": 0.22115599643611797, + "learning_rate": 8.74145193746428e-05, + "loss": 0.5405, "step": 2888 }, { - "epoch": 0.40433869839048286, - "grad_norm": 0.41503488018923496, - "learning_rate": 3.378437060203357e-05, - "loss": 0.5621, + "epoch": 0.8085642317380353, + "grad_norm": 0.21972420074671384, + "learning_rate": 8.740428464502529e-05, + "loss": 0.5306, "step": 2889 }, { - "epoch": 0.40447865640307906, - "grad_norm": 0.4205244822133629, - "learning_rate": 3.3773759062703396e-05, - "loss": 0.571, + "epoch": 0.8088441085922194, + "grad_norm": 0.2181093091866074, + "learning_rate": 8.739404635524704e-05, + "loss": 0.5124, "step": 2890 }, { - "epoch": 0.4046186144156753, - "grad_norm": 0.40721392794208344, - "learning_rate": 3.376314572027671e-05, - "loss": 0.5539, + "epoch": 0.8091239854464036, + "grad_norm": 0.22584053887315925, + "learning_rate": 8.738380450628252e-05, + "loss": 0.5315, "step": 2891 }, { - "epoch": 0.4047585724282715, - "grad_norm": 0.4061261621378902, - "learning_rate": 3.375253057693466e-05, - "loss": 0.5502, + "epoch": 0.8094038623005877, + "grad_norm": 0.22101277900065378, + "learning_rate": 8.737355909910656e-05, + "loss": 0.5343, "step": 2892 }, { - "epoch": 0.40489853044086777, - "grad_norm": 0.4040934767631151, - "learning_rate": 3.374191363485877e-05, - "loss": 0.5502, + "epoch": 0.8096837391547719, + "grad_norm": 0.21226432343971377, + "learning_rate": 8.736331013469432e-05, + "loss": 0.5179, "step": 2893 }, { - "epoch": 0.40503848845346396, - "grad_norm": 0.4140086272358693, - "learning_rate": 3.373129489623093e-05, - "loss": 0.5752, + "epoch": 0.809963616008956, + "grad_norm": 0.2281987608223546, + "learning_rate": 8.735305761402132e-05, + "loss": 0.5395, "step": 2894 }, { - "epoch": 0.40517844646606016, - "grad_norm": 0.4285410515445642, - "learning_rate": 3.3720674363233396e-05, - "loss": 0.5641, + "epoch": 0.8102434928631402, + "grad_norm": 0.2330515088733716, + "learning_rate": 8.734280153806338e-05, + "loss": 0.5472, "step": 2895 }, { - "epoch": 0.4053184044786564, - "grad_norm": 0.3886401656745267, - "learning_rate": 3.37100520380488e-05, - "loss": 0.5721, + "epoch": 0.8105233697173244, + "grad_norm": 0.2303513278523418, + "learning_rate": 8.73325419077967e-05, + "loss": 0.5476, "step": 2896 }, { - "epoch": 0.4054583624912526, - "grad_norm": 0.4252355420954435, - "learning_rate": 3.369942792286013e-05, - "loss": 0.5494, + "epoch": 0.8108032465715085, + "grad_norm": 0.22998588585301685, + "learning_rate": 8.732227872419778e-05, + "loss": 0.5233, "step": 2897 }, { - "epoch": 0.40559832050384886, - "grad_norm": 0.44644708491027574, - "learning_rate": 3.368880201985076e-05, - "loss": 0.5722, + "epoch": 0.8110831234256927, + "grad_norm": 0.2196595321225993, + "learning_rate": 8.731201198824347e-05, + "loss": 0.5485, "step": 2898 }, { - "epoch": 0.40573827851644506, - "grad_norm": 0.42435542539228505, - "learning_rate": 3.367817433120441e-05, - "loss": 0.5744, + "epoch": 0.8113630002798768, + "grad_norm": 0.21344379675029646, + "learning_rate": 8.730174170091099e-05, + "loss": 0.5185, "step": 2899 }, { - "epoch": 0.4058782365290413, - "grad_norm": 0.41641484378908183, - "learning_rate": 3.366754485910518e-05, - "loss": 0.5874, + "epoch": 0.811642877134061, + "grad_norm": 0.21515576014505106, + "learning_rate": 8.729146786317786e-05, + "loss": 0.5471, "step": 2900 }, { - "epoch": 0.4060181945416375, - "grad_norm": 0.40783494684210103, - "learning_rate": 3.365691360573754e-05, - "loss": 0.5561, + "epoch": 0.8119227539882452, + "grad_norm": 0.22280304383786542, + "learning_rate": 8.728119047602192e-05, + "loss": 0.5501, "step": 2901 }, { - "epoch": 0.4061581525542337, - "grad_norm": 0.3940864417785125, - "learning_rate": 3.3646280573286314e-05, - "loss": 0.5437, + "epoch": 0.8122026308424294, + "grad_norm": 0.21889943027225472, + "learning_rate": 8.727090954042142e-05, + "loss": 0.5373, "step": 2902 }, { - "epoch": 0.40629811056682996, - "grad_norm": 0.41032563545142275, - "learning_rate": 3.363564576393671e-05, - "loss": 0.5679, + "epoch": 0.8124825076966135, + "grad_norm": 0.21020812287582277, + "learning_rate": 8.726062505735489e-05, + "loss": 0.5387, "step": 2903 }, { - "epoch": 0.40643806857942616, - "grad_norm": 0.43293587153461693, - "learning_rate": 3.3625009179874265e-05, - "loss": 0.6005, + "epoch": 0.8127623845507976, + "grad_norm": 0.2175228949728657, + "learning_rate": 8.725033702780121e-05, + "loss": 0.4943, "step": 2904 }, { - "epoch": 0.4065780265920224, - "grad_norm": 0.41913595404532833, - "learning_rate": 3.361437082328493e-05, - "loss": 0.5841, + "epoch": 0.8130422614049818, + "grad_norm": 0.22803427588451333, + "learning_rate": 8.72400454527396e-05, + "loss": 0.5579, "step": 2905 }, { - "epoch": 0.4067179846046186, - "grad_norm": 0.39955188014787174, - "learning_rate": 3.360373069635498e-05, - "loss": 0.5762, + "epoch": 0.813322138259166, + "grad_norm": 0.22717917846053645, + "learning_rate": 8.722975033314962e-05, + "loss": 0.5445, "step": 2906 }, { - "epoch": 0.40685794261721486, - "grad_norm": 0.40532054476042145, - "learning_rate": 3.359308880127108e-05, - "loss": 0.5811, + "epoch": 0.8136020151133502, + "grad_norm": 0.21615324377825143, + "learning_rate": 8.721945167001116e-05, + "loss": 0.5425, "step": 2907 }, { - "epoch": 0.40699790062981106, - "grad_norm": 0.3950198242019259, - "learning_rate": 3.358244514022025e-05, - "loss": 0.5795, + "epoch": 0.8138818919675342, + "grad_norm": 0.21621814365605796, + "learning_rate": 8.720914946430446e-05, + "loss": 0.504, "step": 2908 }, { - "epoch": 0.40713785864240726, - "grad_norm": 0.4211880514353567, - "learning_rate": 3.357179971538985e-05, - "loss": 0.5606, + "epoch": 0.8141617688217184, + "grad_norm": 0.21309418385195072, + "learning_rate": 8.71988437170101e-05, + "loss": 0.5451, "step": 2909 }, { - "epoch": 0.4072778166550035, - "grad_norm": 0.4142064942386179, - "learning_rate": 3.3561152528967646e-05, - "loss": 0.5802, + "epoch": 0.8144416456759026, + "grad_norm": 0.22841599010793856, + "learning_rate": 8.718853442910896e-05, + "loss": 0.5242, "step": 2910 }, { - "epoch": 0.4074177746675997, - "grad_norm": 0.41844753492725145, - "learning_rate": 3.355050358314172e-05, - "loss": 0.5274, + "epoch": 0.8147215225300868, + "grad_norm": 0.2168662958041549, + "learning_rate": 8.71782216015823e-05, + "loss": 0.5267, "step": 2911 }, { - "epoch": 0.40755773268019596, - "grad_norm": 0.41589536675167615, - "learning_rate": 3.353985288010056e-05, - "loss": 0.5938, + "epoch": 0.815001399384271, + "grad_norm": 0.23129303357103495, + "learning_rate": 8.71679052354117e-05, + "loss": 0.5458, "step": 2912 }, { - "epoch": 0.40769769069279216, - "grad_norm": 0.39984810028580087, - "learning_rate": 3.352920042203298e-05, - "loss": 0.58, + "epoch": 0.815281276238455, + "grad_norm": 0.22375846753309966, + "learning_rate": 8.715758533157907e-05, + "loss": 0.5418, "step": 2913 }, { - "epoch": 0.40783764870538836, - "grad_norm": 0.4140807749556365, - "learning_rate": 3.3518546211128166e-05, - "loss": 0.5491, + "epoch": 0.8155611530926392, + "grad_norm": 0.21948924723541993, + "learning_rate": 8.714726189106668e-05, + "loss": 0.5321, "step": 2914 }, { - "epoch": 0.4079776067179846, - "grad_norm": 0.4328655244745967, - "learning_rate": 3.350789024957568e-05, - "loss": 0.5972, + "epoch": 0.8158410299468234, + "grad_norm": 0.21981769355357852, + "learning_rate": 8.713693491485711e-05, + "loss": 0.5161, "step": 2915 }, { - "epoch": 0.4081175647305808, - "grad_norm": 0.3996263118365605, - "learning_rate": 3.349723253956542e-05, - "loss": 0.5629, + "epoch": 0.8161209068010076, + "grad_norm": 0.2213284784587669, + "learning_rate": 8.712660440393328e-05, + "loss": 0.5443, "step": 2916 }, { - "epoch": 0.40825752274317706, - "grad_norm": 0.4138825890216495, - "learning_rate": 3.348657308328766e-05, - "loss": 0.5461, + "epoch": 0.8164007836551918, + "grad_norm": 0.2221214987354716, + "learning_rate": 8.711627035927847e-05, + "loss": 0.5322, "step": 2917 }, { - "epoch": 0.40839748075577326, - "grad_norm": 0.45384712853313164, - "learning_rate": 3.3475911882933015e-05, - "loss": 0.5456, + "epoch": 0.8166806605093758, + "grad_norm": 0.2183012593576887, + "learning_rate": 8.710593278187626e-05, + "loss": 0.5099, "step": 2918 }, { - "epoch": 0.4085374387683695, - "grad_norm": 0.4085437808720945, - "learning_rate": 3.346524894069248e-05, - "loss": 0.5453, + "epoch": 0.81696053736356, + "grad_norm": 0.2229608818399287, + "learning_rate": 8.709559167271061e-05, + "loss": 0.5373, "step": 2919 }, { - "epoch": 0.4086773967809657, - "grad_norm": 0.4038494007288198, - "learning_rate": 3.3454584258757404e-05, - "loss": 0.5623, + "epoch": 0.8172404142177442, + "grad_norm": 0.22985599509228125, + "learning_rate": 8.708524703276578e-05, + "loss": 0.56, "step": 2920 }, { - "epoch": 0.4088173547935619, - "grad_norm": 0.4566167974871496, - "learning_rate": 3.344391783931947e-05, - "loss": 0.5907, + "epoch": 0.8175202910719284, + "grad_norm": 0.20196807128492697, + "learning_rate": 8.707489886302638e-05, + "loss": 0.5317, "step": 2921 }, { - "epoch": 0.40895731280615816, - "grad_norm": 0.3969364122046131, - "learning_rate": 3.343324968457076e-05, - "loss": 0.5131, + "epoch": 0.8178001679261125, + "grad_norm": 0.2076045300988094, + "learning_rate": 8.706454716447736e-05, + "loss": 0.5487, "step": 2922 }, { - "epoch": 0.40909727081875435, - "grad_norm": 0.4203451679837855, - "learning_rate": 3.342257979670365e-05, - "loss": 0.5644, + "epoch": 0.8180800447802967, + "grad_norm": 0.2086327737778983, + "learning_rate": 8.705419193810398e-05, + "loss": 0.5003, "step": 2923 }, { - "epoch": 0.4092372288313506, - "grad_norm": 0.40925859811350057, - "learning_rate": 3.341190817791094e-05, - "loss": 0.5498, + "epoch": 0.8183599216344808, + "grad_norm": 0.21564421210403176, + "learning_rate": 8.704383318489189e-05, + "loss": 0.5165, "step": 2924 }, { - "epoch": 0.4093771868439468, - "grad_norm": 0.4083989313014574, - "learning_rate": 3.3401234830385756e-05, - "loss": 0.5854, + "epoch": 0.818639798488665, + "grad_norm": 0.21993509376897255, + "learning_rate": 8.7033470905827e-05, + "loss": 0.5482, "step": 2925 }, { - "epoch": 0.40951714485654306, - "grad_norm": 0.39780928336254934, - "learning_rate": 3.3390559756321566e-05, - "loss": 0.5659, + "epoch": 0.8189196753428492, + "grad_norm": 0.22198759878988114, + "learning_rate": 8.702310510189563e-05, + "loss": 0.5562, "step": 2926 }, { - "epoch": 0.40965710286913926, - "grad_norm": 0.4109873033834379, - "learning_rate": 3.337988295791221e-05, - "loss": 0.5422, + "epoch": 0.8191995521970333, + "grad_norm": 0.22435205545335035, + "learning_rate": 8.701273577408441e-05, + "loss": 0.5525, "step": 2927 }, { - "epoch": 0.40979706088173545, - "grad_norm": 0.3952927193405086, - "learning_rate": 3.3369204437351886e-05, - "loss": 0.5592, + "epoch": 0.8194794290512175, + "grad_norm": 0.2127898962581027, + "learning_rate": 8.700236292338026e-05, + "loss": 0.5326, "step": 2928 }, { - "epoch": 0.4099370188943317, - "grad_norm": 0.40670959006044405, - "learning_rate": 3.335852419683513e-05, - "loss": 0.5711, + "epoch": 0.8197593059054016, + "grad_norm": 0.22537423412478533, + "learning_rate": 8.699198655077053e-05, + "loss": 0.5474, "step": 2929 }, { - "epoch": 0.4100769769069279, - "grad_norm": 0.3974994882518793, - "learning_rate": 3.3347842238556836e-05, - "loss": 0.5315, + "epoch": 0.8200391827595858, + "grad_norm": 0.21328517915897816, + "learning_rate": 8.698160665724278e-05, + "loss": 0.553, "step": 2930 }, { - "epoch": 0.41021693491952416, - "grad_norm": 0.44234521625168705, - "learning_rate": 3.3337158564712267e-05, - "loss": 0.5552, + "epoch": 0.8203190596137699, + "grad_norm": 0.22641606256105556, + "learning_rate": 8.697122324378502e-05, + "loss": 0.5399, "step": 2931 }, { - "epoch": 0.41035689293212035, - "grad_norm": 0.40022379185542395, - "learning_rate": 3.332647317749702e-05, - "loss": 0.5745, + "epoch": 0.8205989364679541, + "grad_norm": 0.21823819490837804, + "learning_rate": 8.696083631138553e-05, + "loss": 0.5256, "step": 2932 }, { - "epoch": 0.4104968509447166, - "grad_norm": 0.418577329208144, - "learning_rate": 3.3315786079107055e-05, - "loss": 0.5947, + "epoch": 0.8208788133221383, + "grad_norm": 0.21836411962141716, + "learning_rate": 8.695044586103296e-05, + "loss": 0.5137, "step": 2933 }, { - "epoch": 0.4106368089573128, - "grad_norm": 0.40576821316747064, - "learning_rate": 3.3305097271738665e-05, - "loss": 0.6025, + "epoch": 0.8211586901763224, + "grad_norm": 0.2255160147723362, + "learning_rate": 8.694005189371627e-05, + "loss": 0.5363, "step": 2934 }, { - "epoch": 0.410776766969909, - "grad_norm": 0.4066641994376824, - "learning_rate": 3.329440675758853e-05, - "loss": 0.5475, + "epoch": 0.8214385670305066, + "grad_norm": 0.21757007853250637, + "learning_rate": 8.692965441042477e-05, + "loss": 0.5559, "step": 2935 }, { - "epoch": 0.41091672498250525, - "grad_norm": 0.4150792589177556, - "learning_rate": 3.3283714538853636e-05, - "loss": 0.5866, + "epoch": 0.8217184438846907, + "grad_norm": 0.2185395628240894, + "learning_rate": 8.691925341214808e-05, + "loss": 0.5106, "step": 2936 }, { - "epoch": 0.41105668299510145, - "grad_norm": 0.397074447582423, - "learning_rate": 3.327302061773136e-05, - "loss": 0.5799, + "epoch": 0.8219983207388749, + "grad_norm": 0.2104799722351941, + "learning_rate": 8.690884889987619e-05, + "loss": 0.5412, "step": 2937 }, { - "epoch": 0.4111966410076977, - "grad_norm": 0.40754727507905664, - "learning_rate": 3.3262324996419405e-05, - "loss": 0.5469, + "epoch": 0.8222781975930591, + "grad_norm": 0.22737278191374266, + "learning_rate": 8.689844087459941e-05, + "loss": 0.5421, "step": 2938 }, { - "epoch": 0.4113365990202939, - "grad_norm": 0.40164275820649376, - "learning_rate": 3.325162767711583e-05, - "loss": 0.5232, + "epoch": 0.8225580744472432, + "grad_norm": 0.21825281303841354, + "learning_rate": 8.688802933730838e-05, + "loss": 0.5204, "step": 2939 }, { - "epoch": 0.41147655703289016, - "grad_norm": 0.4080075519925706, - "learning_rate": 3.324092866201904e-05, - "loss": 0.5661, + "epoch": 0.8228379513014273, + "grad_norm": 0.22366010628868704, + "learning_rate": 8.687761428899406e-05, + "loss": 0.5297, "step": 2940 }, { - "epoch": 0.41161651504548635, - "grad_norm": 0.40149245258055244, - "learning_rate": 3.3230227953327796e-05, - "loss": 0.5474, + "epoch": 0.8231178281556115, + "grad_norm": 0.2211469477225885, + "learning_rate": 8.686719573064776e-05, + "loss": 0.5452, "step": 2941 }, { - "epoch": 0.41175647305808255, - "grad_norm": 0.4158994899194992, - "learning_rate": 3.321952555324121e-05, - "loss": 0.5258, + "epoch": 0.8233977050097957, + "grad_norm": 0.22025487840882585, + "learning_rate": 8.685677366326115e-05, + "loss": 0.5747, "step": 2942 }, { - "epoch": 0.4118964310706788, - "grad_norm": 0.4019593770294339, - "learning_rate": 3.320882146395871e-05, - "loss": 0.5762, + "epoch": 0.8236775818639799, + "grad_norm": 0.2156221179343058, + "learning_rate": 8.68463480878262e-05, + "loss": 0.5387, "step": 2943 }, { - "epoch": 0.412036389083275, - "grad_norm": 0.4249496622474027, - "learning_rate": 3.3198115687680115e-05, - "loss": 0.5462, + "epoch": 0.8239574587181641, + "grad_norm": 0.2180794735528772, + "learning_rate": 8.683591900533519e-05, + "loss": 0.5366, "step": 2944 }, { - "epoch": 0.41217634709587125, - "grad_norm": 0.43225282993592656, - "learning_rate": 3.318740822660556e-05, - "loss": 0.5446, + "epoch": 0.8242373355723481, + "grad_norm": 0.22272214971857127, + "learning_rate": 8.68254864167808e-05, + "loss": 0.533, "step": 2945 }, { - "epoch": 0.41231630510846745, - "grad_norm": 0.41726887193732093, - "learning_rate": 3.3176699082935545e-05, - "loss": 0.5493, + "epoch": 0.8245172124265323, + "grad_norm": 0.21944632741313155, + "learning_rate": 8.681505032315602e-05, + "loss": 0.5588, "step": 2946 }, { - "epoch": 0.4124562631210637, - "grad_norm": 0.39903806845546885, - "learning_rate": 3.31659882588709e-05, - "loss": 0.5888, + "epoch": 0.8247970892807165, + "grad_norm": 0.2230834224586324, + "learning_rate": 8.680461072545411e-05, + "loss": 0.5553, "step": 2947 }, { - "epoch": 0.4125962211336599, - "grad_norm": 0.4023782994195669, - "learning_rate": 3.315527575661282e-05, - "loss": 0.5684, + "epoch": 0.8250769661349007, + "grad_norm": 0.21119652852437684, + "learning_rate": 8.67941676246688e-05, + "loss": 0.5556, "step": 2948 }, { - "epoch": 0.4127361791462561, - "grad_norm": 0.3936024186034419, - "learning_rate": 3.314456157836281e-05, - "loss": 0.5117, + "epoch": 0.8253568429890848, + "grad_norm": 0.21568690682339875, + "learning_rate": 8.678372102179397e-05, + "loss": 0.5507, "step": 2949 }, { - "epoch": 0.41287613715885235, - "grad_norm": 0.3952468427687398, - "learning_rate": 3.313384572632277e-05, - "loss": 0.5547, + "epoch": 0.8256367198432689, + "grad_norm": 0.22047401512478768, + "learning_rate": 8.677327091782401e-05, + "loss": 0.5194, "step": 2950 }, { - "epoch": 0.41301609517144855, - "grad_norm": 0.4190384583627527, - "learning_rate": 3.31231282026949e-05, - "loss": 0.5805, + "epoch": 0.8259165966974531, + "grad_norm": 0.2203767571422379, + "learning_rate": 8.676281731375353e-05, + "loss": 0.5107, "step": 2951 }, { - "epoch": 0.4131560531840448, - "grad_norm": 0.39658407316177025, - "learning_rate": 3.3112409009681766e-05, - "loss": 0.5406, + "epoch": 0.8261964735516373, + "grad_norm": 0.21433731401494893, + "learning_rate": 8.675236021057754e-05, + "loss": 0.5209, "step": 2952 }, { - "epoch": 0.413296011196641, - "grad_norm": 0.4076292942631321, - "learning_rate": 3.310168814948627e-05, - "loss": 0.588, + "epoch": 0.8264763504058215, + "grad_norm": 0.21481760769970837, + "learning_rate": 8.674189960929132e-05, + "loss": 0.5301, "step": 2953 }, { - "epoch": 0.41343596920923725, - "grad_norm": 0.41540765697070914, - "learning_rate": 3.3090965624311654e-05, - "loss": 0.5594, + "epoch": 0.8267562272600056, + "grad_norm": 0.21487242001534035, + "learning_rate": 8.673143551089054e-05, + "loss": 0.5322, "step": 2954 }, { - "epoch": 0.41357592722183345, - "grad_norm": 0.4035372057640521, - "learning_rate": 3.3080241436361506e-05, - "loss": 0.5636, + "epoch": 0.8270361041141897, + "grad_norm": 0.22973971830681553, + "learning_rate": 8.672096791637115e-05, + "loss": 0.5298, "step": 2955 }, { - "epoch": 0.41371588523442965, - "grad_norm": 0.41459880243511804, - "learning_rate": 3.3069515587839754e-05, - "loss": 0.5532, + "epoch": 0.8273159809683739, + "grad_norm": 0.22644056273751867, + "learning_rate": 8.67104968267295e-05, + "loss": 0.5325, "step": 2956 }, { - "epoch": 0.4138558432470259, - "grad_norm": 0.39031658646005546, - "learning_rate": 3.305878808095068e-05, - "loss": 0.5939, + "epoch": 0.8275958578225581, + "grad_norm": 0.21955045194299588, + "learning_rate": 8.670002224296221e-05, + "loss": 0.5431, "step": 2957 }, { - "epoch": 0.4139958012596221, - "grad_norm": 0.4182734311842286, - "learning_rate": 3.304805891789888e-05, - "loss": 0.5872, + "epoch": 0.8278757346767422, + "grad_norm": 0.23781902841644872, + "learning_rate": 8.668954416606625e-05, + "loss": 0.5121, "step": 2958 }, { - "epoch": 0.41413575927221835, - "grad_norm": 0.4222389070033302, - "learning_rate": 3.303732810088931e-05, - "loss": 0.6106, + "epoch": 0.8281556115309264, + "grad_norm": 0.22609994299163855, + "learning_rate": 8.667906259703895e-05, + "loss": 0.5512, "step": 2959 }, { - "epoch": 0.41427571728481455, - "grad_norm": 0.39648114038614457, - "learning_rate": 3.302659563212727e-05, - "loss": 0.5457, + "epoch": 0.8284354883851105, + "grad_norm": 0.21714338783466877, + "learning_rate": 8.666857753687793e-05, + "loss": 0.526, "step": 2960 }, { - "epoch": 0.4144156752974108, - "grad_norm": 0.42978337071116823, - "learning_rate": 3.301586151381839e-05, - "loss": 0.5644, + "epoch": 0.8287153652392947, + "grad_norm": 0.2169107567494965, + "learning_rate": 8.665808898658119e-05, + "loss": 0.5501, "step": 2961 }, { - "epoch": 0.414555633310007, - "grad_norm": 0.4005463311390214, - "learning_rate": 3.300512574816863e-05, - "loss": 0.5676, + "epoch": 0.8289952420934789, + "grad_norm": 0.225957160407284, + "learning_rate": 8.664759694714701e-05, + "loss": 0.5392, "step": 2962 }, { - "epoch": 0.4146955913226032, - "grad_norm": 0.39670767820561714, - "learning_rate": 3.2994388337384306e-05, - "loss": 0.5683, + "epoch": 0.829275118947663, + "grad_norm": 0.21152421887093917, + "learning_rate": 8.663710141957405e-05, + "loss": 0.5422, "step": 2963 }, { - "epoch": 0.41483554933519945, - "grad_norm": 0.414169510604101, - "learning_rate": 3.298364928367207e-05, - "loss": 0.5685, + "epoch": 0.8295549958018472, + "grad_norm": 0.2235772061757988, + "learning_rate": 8.662660240486127e-05, + "loss": 0.5147, "step": 2964 }, { - "epoch": 0.41497550734779565, - "grad_norm": 0.39829585786394445, - "learning_rate": 3.2972908589238896e-05, - "loss": 0.5627, + "epoch": 0.8298348726560314, + "grad_norm": 0.2605627305586067, + "learning_rate": 8.661609990400796e-05, + "loss": 0.5369, "step": 2965 }, { - "epoch": 0.4151154653603919, - "grad_norm": 0.40155559541312513, - "learning_rate": 3.2962166256292113e-05, - "loss": 0.5805, + "epoch": 0.8301147495102155, + "grad_norm": 0.2181372860032632, + "learning_rate": 8.660559391801377e-05, + "loss": 0.5435, "step": 2966 }, { - "epoch": 0.4152554233729881, - "grad_norm": 0.4105341813553997, - "learning_rate": 3.295142228703938e-05, - "loss": 0.6003, + "epoch": 0.8303946263643996, + "grad_norm": 0.21369808087418557, + "learning_rate": 8.659508444787867e-05, + "loss": 0.5299, "step": 2967 }, { - "epoch": 0.41539538138558435, - "grad_norm": 0.4006968222058459, - "learning_rate": 3.2940676683688677e-05, - "loss": 0.546, + "epoch": 0.8306745032185838, + "grad_norm": 0.22535358055775553, + "learning_rate": 8.658457149460295e-05, + "loss": 0.5104, "step": 2968 }, { - "epoch": 0.41553533939818055, - "grad_norm": 0.39978495742611814, - "learning_rate": 3.292992944844836e-05, - "loss": 0.5892, + "epoch": 0.830954380072768, + "grad_norm": 0.21689791959655993, + "learning_rate": 8.657405505918723e-05, + "loss": 0.5261, "step": 2969 }, { - "epoch": 0.41567529741077675, - "grad_norm": 0.4016166800083618, - "learning_rate": 3.291918058352706e-05, - "loss": 0.5708, + "epoch": 0.8312342569269522, + "grad_norm": 0.2186281600265367, + "learning_rate": 8.65635351426325e-05, + "loss": 0.5095, "step": 2970 }, { - "epoch": 0.415815255423373, - "grad_norm": 0.40924317132383004, - "learning_rate": 3.290843009113382e-05, - "loss": 0.5291, + "epoch": 0.8315141337811363, + "grad_norm": 0.21441682748376117, + "learning_rate": 8.655301174593999e-05, + "loss": 0.5299, "step": 2971 }, { - "epoch": 0.4159552134359692, - "grad_norm": 0.41496924484348124, - "learning_rate": 3.2897677973477936e-05, - "loss": 0.5638, + "epoch": 0.8317940106353204, + "grad_norm": 0.21736371444094799, + "learning_rate": 8.654248487011139e-05, + "loss": 0.5112, "step": 2972 }, { - "epoch": 0.41609517144856545, - "grad_norm": 0.4170073162428262, - "learning_rate": 3.288692423276911e-05, - "loss": 0.5659, + "epoch": 0.8320738874895046, + "grad_norm": 0.21915618735838882, + "learning_rate": 8.653195451614862e-05, + "loss": 0.5353, "step": 2973 }, { - "epoch": 0.41623512946116165, - "grad_norm": 0.4393981644887219, - "learning_rate": 3.2876168871217325e-05, - "loss": 0.5674, + "epoch": 0.8323537643436888, + "grad_norm": 0.22543908758416845, + "learning_rate": 8.652142068505398e-05, + "loss": 0.5525, "step": 2974 }, { - "epoch": 0.4163750874737579, - "grad_norm": 0.39928933014260387, - "learning_rate": 3.2865411891032916e-05, - "loss": 0.5589, + "epoch": 0.832633641197873, + "grad_norm": 0.23538984408900665, + "learning_rate": 8.651088337783007e-05, + "loss": 0.5394, "step": 2975 }, { - "epoch": 0.4165150454863541, - "grad_norm": 0.6076938066952381, - "learning_rate": 3.2854653294426566e-05, - "loss": 0.5781, + "epoch": 0.832913518052057, + "grad_norm": 0.2186494700659604, + "learning_rate": 8.650034259547983e-05, + "loss": 0.5284, "step": 2976 }, { - "epoch": 0.4166550034989503, - "grad_norm": 0.4206690009626964, - "learning_rate": 3.284389308360927e-05, - "loss": 0.5945, + "epoch": 0.8331933949062412, + "grad_norm": 0.22811388308632696, + "learning_rate": 8.648979833900656e-05, + "loss": 0.5362, "step": 2977 }, { - "epoch": 0.41679496151154655, - "grad_norm": 0.38925483204393885, - "learning_rate": 3.2833131260792345e-05, - "loss": 0.527, + "epoch": 0.8334732717604254, + "grad_norm": 0.21611044864898632, + "learning_rate": 8.647925060941386e-05, + "loss": 0.5234, "step": 2978 }, { - "epoch": 0.41693491952414274, - "grad_norm": 0.43019105225997517, - "learning_rate": 3.282236782818747e-05, - "loss": 0.5975, + "epoch": 0.8337531486146096, + "grad_norm": 0.2087566925854075, + "learning_rate": 8.646869940770567e-05, + "loss": 0.5158, "step": 2979 }, { - "epoch": 0.417074877536739, - "grad_norm": 0.4151005702647614, - "learning_rate": 3.2811602788006645e-05, - "loss": 0.5512, + "epoch": 0.8340330254687938, + "grad_norm": 0.22616478661119407, + "learning_rate": 8.645814473488627e-05, + "loss": 0.514, "step": 2980 }, { - "epoch": 0.4172148355493352, - "grad_norm": 0.43155499812179654, - "learning_rate": 3.280083614246218e-05, - "loss": 0.5582, + "epoch": 0.834312902322978, + "grad_norm": 0.2155972000331264, + "learning_rate": 8.644758659196023e-05, + "loss": 0.5197, "step": 2981 }, { - "epoch": 0.41735479356193145, - "grad_norm": 0.4147678068443396, - "learning_rate": 3.279006789376674e-05, - "loss": 0.5355, + "epoch": 0.834592779177162, + "grad_norm": 0.2142662820181751, + "learning_rate": 8.643702497993251e-05, + "loss": 0.5418, "step": 2982 }, { - "epoch": 0.41749475157452764, - "grad_norm": 0.40986730591235127, - "learning_rate": 3.2779298044133304e-05, - "loss": 0.5521, + "epoch": 0.8348726560313462, + "grad_norm": 0.22435896598531738, + "learning_rate": 8.642645989980836e-05, + "loss": 0.5373, "step": 2983 }, { - "epoch": 0.41763470958712384, - "grad_norm": 0.41750246578218936, - "learning_rate": 3.276852659577519e-05, - "loss": 0.5642, + "epoch": 0.8351525328855304, + "grad_norm": 0.20422818906927728, + "learning_rate": 8.641589135259336e-05, + "loss": 0.5166, "step": 2984 }, { - "epoch": 0.4177746675997201, - "grad_norm": 0.4008235425339085, - "learning_rate": 3.275775355090603e-05, - "loss": 0.5385, + "epoch": 0.8354324097397146, + "grad_norm": 0.2178930729884611, + "learning_rate": 8.640531933929344e-05, + "loss": 0.5199, "step": 2985 }, { - "epoch": 0.4179146256123163, - "grad_norm": 0.42115213204670543, - "learning_rate": 3.274697891173982e-05, - "loss": 0.597, + "epoch": 0.8357122865938987, + "grad_norm": 0.20772686523058428, + "learning_rate": 8.639474386091485e-05, + "loss": 0.5138, "step": 2986 }, { - "epoch": 0.41805458362491255, - "grad_norm": 0.40847799222276204, - "learning_rate": 3.273620268049083e-05, - "loss": 0.5372, + "epoch": 0.8359921634480828, + "grad_norm": 0.21310742868775848, + "learning_rate": 8.638416491846417e-05, + "loss": 0.5158, "step": 2987 }, { - "epoch": 0.41819454163750874, - "grad_norm": 0.39981522543632353, - "learning_rate": 3.272542485937369e-05, - "loss": 0.5812, + "epoch": 0.836272040302267, + "grad_norm": 0.21822248030969937, + "learning_rate": 8.63735825129483e-05, + "loss": 0.5274, "step": 2988 }, { - "epoch": 0.418334499650105, - "grad_norm": 0.4281898310803181, - "learning_rate": 3.271464545060336e-05, - "loss": 0.6052, + "epoch": 0.8365519171564512, + "grad_norm": 0.21565497743306078, + "learning_rate": 8.636299664537449e-05, + "loss": 0.5521, "step": 2989 }, { - "epoch": 0.4184744576627012, - "grad_norm": 0.3960705407504793, - "learning_rate": 3.2703864456395106e-05, - "loss": 0.5535, + "epoch": 0.8368317940106353, + "grad_norm": 0.22967598513625365, + "learning_rate": 8.63524073167503e-05, + "loss": 0.5351, "step": 2990 }, { - "epoch": 0.4186144156752974, - "grad_norm": 0.40782715021367766, - "learning_rate": 3.2693081878964546e-05, - "loss": 0.5444, + "epoch": 0.8371116708648195, + "grad_norm": 0.22272323520405976, + "learning_rate": 8.634181452808366e-05, + "loss": 0.5286, "step": 2991 }, { - "epoch": 0.41875437368789364, - "grad_norm": 0.4108892545513627, - "learning_rate": 3.2682297720527596e-05, - "loss": 0.588, + "epoch": 0.8373915477190036, + "grad_norm": 0.2122248361469332, + "learning_rate": 8.633121828038275e-05, + "loss": 0.5204, "step": 2992 }, { - "epoch": 0.41889433170048984, - "grad_norm": 0.4052095905789736, - "learning_rate": 3.267151198330053e-05, - "loss": 0.6009, + "epoch": 0.8376714245731878, + "grad_norm": 0.2224232128088561, + "learning_rate": 8.632061857465614e-05, + "loss": 0.5373, "step": 2993 }, { - "epoch": 0.4190342897130861, - "grad_norm": 0.3957928902994827, - "learning_rate": 3.2660724669499906e-05, - "loss": 0.5503, + "epoch": 0.837951301427372, + "grad_norm": 0.22770490944769647, + "learning_rate": 8.631001541191275e-05, + "loss": 0.5305, "step": 2994 }, { - "epoch": 0.4191742477256823, - "grad_norm": 0.3945291064646862, - "learning_rate": 3.264993578134263e-05, - "loss": 0.5456, + "epoch": 0.8382311782815561, + "grad_norm": 0.22115756576959772, + "learning_rate": 8.629940879316175e-05, + "loss": 0.5416, "step": 2995 }, { - "epoch": 0.4193142057382785, - "grad_norm": 0.38055971820367784, - "learning_rate": 3.263914532104593e-05, - "loss": 0.575, + "epoch": 0.8385110551357403, + "grad_norm": 0.2137085524719575, + "learning_rate": 8.628879871941271e-05, + "loss": 0.5207, "step": 2996 }, { - "epoch": 0.41945416375087474, - "grad_norm": 0.3990796286153784, - "learning_rate": 3.2628353290827365e-05, - "loss": 0.5653, + "epoch": 0.8387909319899244, + "grad_norm": 0.2647897668434767, + "learning_rate": 8.62781851916755e-05, + "loss": 0.5319, "step": 2997 }, { - "epoch": 0.41959412176347094, - "grad_norm": 0.39699323320776897, - "learning_rate": 3.2617559692904784e-05, - "loss": 0.5567, + "epoch": 0.8390708088441086, + "grad_norm": 0.22114974994858042, + "learning_rate": 8.62675682109603e-05, + "loss": 0.5624, "step": 2998 }, { - "epoch": 0.4197340797760672, - "grad_norm": 0.42070498470477863, - "learning_rate": 3.260676452949641e-05, - "loss": 0.5643, + "epoch": 0.8393506856982927, + "grad_norm": 0.21597740903387966, + "learning_rate": 8.625694777827768e-05, + "loss": 0.5377, "step": 2999 }, { - "epoch": 0.4198740377886634, - "grad_norm": 0.41895847299595507, - "learning_rate": 3.259596780282074e-05, - "loss": 0.5866, + "epoch": 0.8396305625524769, + "grad_norm": 0.23160460862914972, + "learning_rate": 8.624632389463847e-05, + "loss": 0.5217, "step": 3000 }, { - "epoch": 0.42001399580125964, - "grad_norm": 0.4054149661332881, - "learning_rate": 3.2585169515096615e-05, - "loss": 0.5221, + "epoch": 0.8399104394066611, + "grad_norm": 0.22198800038480565, + "learning_rate": 8.623569656105386e-05, + "loss": 0.5398, "step": 3001 }, { - "epoch": 0.42015395381385584, - "grad_norm": 0.41159580867468976, - "learning_rate": 3.257436966854319e-05, - "loss": 0.5409, + "epoch": 0.8401903162608453, + "grad_norm": 0.22880709948052833, + "learning_rate": 8.622506577853538e-05, + "loss": 0.5477, "step": 3002 }, { - "epoch": 0.42029391182645204, - "grad_norm": 0.41565485170065436, - "learning_rate": 3.256356826537994e-05, - "loss": 0.572, + "epoch": 0.8404701931150294, + "grad_norm": 0.22341643822669172, + "learning_rate": 8.621443154809484e-05, + "loss": 0.5247, "step": 3003 }, { - "epoch": 0.4204338698390483, - "grad_norm": 0.4011710239277276, - "learning_rate": 3.255276530782667e-05, - "loss": 0.5399, + "epoch": 0.8407500699692135, + "grad_norm": 0.21597317363433163, + "learning_rate": 8.620379387074445e-05, + "loss": 0.5344, "step": 3004 }, { - "epoch": 0.4205738278516445, - "grad_norm": 0.45007473746050475, - "learning_rate": 3.25419607981035e-05, - "loss": 0.5283, + "epoch": 0.8410299468233977, + "grad_norm": 0.2192461822601934, + "learning_rate": 8.619315274749669e-05, + "loss": 0.5304, "step": 3005 }, { - "epoch": 0.42071378586424074, - "grad_norm": 0.4258234246567913, - "learning_rate": 3.253115473843086e-05, - "loss": 0.6111, + "epoch": 0.8413098236775819, + "grad_norm": 0.21837947530897744, + "learning_rate": 8.61825081793644e-05, + "loss": 0.5271, "step": 3006 }, { - "epoch": 0.42085374387683694, - "grad_norm": 0.41531516468042334, - "learning_rate": 3.252034713102951e-05, - "loss": 0.5644, + "epoch": 0.8415897005317661, + "grad_norm": 0.23056929005798127, + "learning_rate": 8.617186016736073e-05, + "loss": 0.5309, "step": 3007 }, { - "epoch": 0.4209937018894332, - "grad_norm": 0.41220387089241467, - "learning_rate": 3.250953797812051e-05, - "loss": 0.5798, + "epoch": 0.8418695773859501, + "grad_norm": 0.48552992745090096, + "learning_rate": 8.616120871249915e-05, + "loss": 0.534, "step": 3008 }, { - "epoch": 0.4211336599020294, - "grad_norm": 0.4165684547155104, - "learning_rate": 3.249872728192527e-05, - "loss": 0.578, + "epoch": 0.8421494542401343, + "grad_norm": 0.22014967001203464, + "learning_rate": 8.615055381579351e-05, + "loss": 0.5079, "step": 3009 }, { - "epoch": 0.4212736179146256, - "grad_norm": 0.41641177346916974, - "learning_rate": 3.248791504466548e-05, - "loss": 0.5847, + "epoch": 0.8424293310943185, + "grad_norm": 0.22228896792519673, + "learning_rate": 8.613989547825791e-05, + "loss": 0.554, "step": 3010 }, { - "epoch": 0.42141357592722184, - "grad_norm": 0.4230423352548566, - "learning_rate": 3.2477101268563184e-05, - "loss": 0.5578, + "epoch": 0.8427092079485027, + "grad_norm": 0.3271222337534686, + "learning_rate": 8.612923370090681e-05, + "loss": 0.5256, "step": 3011 }, { - "epoch": 0.42155353393981804, - "grad_norm": 0.40182190163643416, - "learning_rate": 3.24662859558407e-05, - "loss": 0.5322, + "epoch": 0.8429890848026869, + "grad_norm": 0.23895832466196784, + "learning_rate": 8.611856848475506e-05, + "loss": 0.54, "step": 3012 }, { - "epoch": 0.4216934919524143, - "grad_norm": 0.43511787431355525, - "learning_rate": 3.245546910872071e-05, - "loss": 0.5473, + "epoch": 0.8432689616568709, + "grad_norm": 0.23813344642390777, + "learning_rate": 8.610789983081773e-05, + "loss": 0.5428, "step": 3013 }, { - "epoch": 0.4218334499650105, - "grad_norm": 0.41338327961202165, - "learning_rate": 3.244465072942615e-05, - "loss": 0.5883, + "epoch": 0.8435488385110551, + "grad_norm": 0.2150596947662532, + "learning_rate": 8.609722774011027e-05, + "loss": 0.5245, "step": 3014 }, { - "epoch": 0.42197340797760674, - "grad_norm": 0.39971647364371277, - "learning_rate": 3.2433830820180346e-05, - "loss": 0.5384, + "epoch": 0.8438287153652393, + "grad_norm": 0.4205050738415862, + "learning_rate": 8.608655221364848e-05, + "loss": 0.5225, "step": 3015 }, { - "epoch": 0.42211336599020294, - "grad_norm": 0.4417586008166198, - "learning_rate": 3.2423009383206876e-05, - "loss": 0.5662, + "epoch": 0.8441085922194235, + "grad_norm": 0.48182958198991427, + "learning_rate": 8.607587325244845e-05, + "loss": 0.5287, "step": 3016 }, { - "epoch": 0.42225332400279914, - "grad_norm": 0.41967998090343933, - "learning_rate": 3.241218642072966e-05, - "loss": 0.5456, + "epoch": 0.8443884690736077, + "grad_norm": 0.2357786403355586, + "learning_rate": 8.606519085752661e-05, + "loss": 0.5474, "step": 3017 }, { - "epoch": 0.4223932820153954, - "grad_norm": 0.41986891673954385, - "learning_rate": 3.240136193497293e-05, - "loss": 0.5633, + "epoch": 0.8446683459277917, + "grad_norm": 0.2325231976984887, + "learning_rate": 8.605450502989974e-05, + "loss": 0.5393, "step": 3018 }, { - "epoch": 0.4225332400279916, - "grad_norm": 0.4038319504416983, - "learning_rate": 3.239053592816122e-05, - "loss": 0.5405, + "epoch": 0.8449482227819759, + "grad_norm": 0.30687287538085123, + "learning_rate": 8.604381577058486e-05, + "loss": 0.5103, "step": 3019 }, { - "epoch": 0.42267319804058784, - "grad_norm": 0.42072486493920963, - "learning_rate": 3.2379708402519394e-05, - "loss": 0.5397, + "epoch": 0.8452280996361601, + "grad_norm": 0.28608036056032426, + "learning_rate": 8.603312308059944e-05, + "loss": 0.544, "step": 3020 }, { - "epoch": 0.42281315605318404, - "grad_norm": 0.3963667588002371, - "learning_rate": 3.2368879360272606e-05, - "loss": 0.5437, + "epoch": 0.8455079764903443, + "grad_norm": 0.7097432176114093, + "learning_rate": 8.602242696096121e-05, + "loss": 0.5325, "step": 3021 }, { - "epoch": 0.4229531140657803, - "grad_norm": 0.43813839588807524, - "learning_rate": 3.235804880364635e-05, - "loss": 0.6218, + "epoch": 0.8457878533445284, + "grad_norm": 0.3829433514503948, + "learning_rate": 8.60117274126882e-05, + "loss": 0.5548, "step": 3022 }, { - "epoch": 0.4230930720783765, - "grad_norm": 0.420130728007189, - "learning_rate": 3.23472167348664e-05, - "loss": 0.5528, + "epoch": 0.8460677301987126, + "grad_norm": 0.20884200146313728, + "learning_rate": 8.600102443679882e-05, + "loss": 0.5269, "step": 3023 }, { - "epoch": 0.4232330300909727, - "grad_norm": 0.3931327455825961, - "learning_rate": 3.233638315615887e-05, - "loss": 0.5493, + "epoch": 0.8463476070528967, + "grad_norm": 0.5019778171462909, + "learning_rate": 8.599031803431179e-05, + "loss": 0.5346, "step": 3024 }, { - "epoch": 0.42337298810356894, - "grad_norm": 0.4231921022929699, - "learning_rate": 3.232554806975016e-05, - "loss": 0.5567, + "epoch": 0.8466274839070809, + "grad_norm": 0.21645085459579996, + "learning_rate": 8.597960820624615e-05, + "loss": 0.5216, "step": 3025 }, { - "epoch": 0.42351294611616513, - "grad_norm": 0.4136408960013234, - "learning_rate": 3.231471147786699e-05, - "loss": 0.6057, + "epoch": 0.846907360761265, + "grad_norm": 0.22013350170302393, + "learning_rate": 8.596889495362126e-05, + "loss": 0.5371, "step": 3026 }, { - "epoch": 0.4236529041287614, - "grad_norm": 0.3990285743665062, - "learning_rate": 3.23038733827364e-05, - "loss": 0.5888, + "epoch": 0.8471872376154492, + "grad_norm": 0.23428537071912092, + "learning_rate": 8.595817827745681e-05, + "loss": 0.5389, "step": 3027 }, { - "epoch": 0.4237928621413576, - "grad_norm": 0.38749988478653646, - "learning_rate": 3.2293033786585716e-05, - "loss": 0.5386, + "epoch": 0.8474671144696334, + "grad_norm": 0.26038829635626154, + "learning_rate": 8.594745817877283e-05, + "loss": 0.5339, "step": 3028 }, { - "epoch": 0.42393282015395384, - "grad_norm": 0.4001033239780827, - "learning_rate": 3.228219269164259e-05, - "loss": 0.5741, + "epoch": 0.8477469913238175, + "grad_norm": 0.2526970486204993, + "learning_rate": 8.593673465858964e-05, + "loss": 0.5369, "step": 3029 }, { - "epoch": 0.42407277816655004, - "grad_norm": 0.38245836943637973, - "learning_rate": 3.2271350100134975e-05, - "loss": 0.5658, + "epoch": 0.8480268681780017, + "grad_norm": 0.22912139308680848, + "learning_rate": 8.592600771792796e-05, + "loss": 0.5573, "step": 3030 }, { - "epoch": 0.42421273617914623, - "grad_norm": 0.3936968710553453, - "learning_rate": 3.226050601429115e-05, - "loss": 0.572, + "epoch": 0.8483067450321858, + "grad_norm": 0.23083891019087724, + "learning_rate": 8.591527735780874e-05, + "loss": 0.5307, "step": 3031 }, { - "epoch": 0.4243526941917425, - "grad_norm": 0.3902441652129766, - "learning_rate": 3.224966043633966e-05, - "loss": 0.5199, + "epoch": 0.84858662188637, + "grad_norm": 0.2287937314794176, + "learning_rate": 8.590454357925333e-05, + "loss": 0.5329, "step": 3032 }, { - "epoch": 0.4244926522043387, - "grad_norm": 0.43154246579160466, - "learning_rate": 3.223881336850939e-05, - "loss": 0.5621, + "epoch": 0.8488664987405542, + "grad_norm": 0.2311557611551994, + "learning_rate": 8.589380638328335e-05, + "loss": 0.5061, "step": 3033 }, { - "epoch": 0.42463261021693494, - "grad_norm": 0.41498447902123786, - "learning_rate": 3.222796481302953e-05, - "loss": 0.5886, + "epoch": 0.8491463755947383, + "grad_norm": 0.24368922031902182, + "learning_rate": 8.58830657709208e-05, + "loss": 0.5359, "step": 3034 }, { - "epoch": 0.42477256822953113, - "grad_norm": 0.38695499906221675, - "learning_rate": 3.221711477212956e-05, - "loss": 0.5449, + "epoch": 0.8494262524489224, + "grad_norm": 0.2190276350247355, + "learning_rate": 8.587232174318795e-05, + "loss": 0.5562, "step": 3035 }, { - "epoch": 0.4249125262421274, - "grad_norm": 0.3974805369088236, - "learning_rate": 3.2206263248039276e-05, - "loss": 0.5602, + "epoch": 0.8497061293031066, + "grad_norm": 0.23802916448732422, + "learning_rate": 8.586157430110747e-05, + "loss": 0.5116, "step": 3036 }, { - "epoch": 0.4250524842547236, - "grad_norm": 0.40115361129111843, - "learning_rate": 3.2195410242988776e-05, - "loss": 0.5675, + "epoch": 0.8499860061572908, + "grad_norm": 0.2316176129573859, + "learning_rate": 8.585082344570224e-05, + "loss": 0.5207, "step": 3037 }, { - "epoch": 0.4251924422673198, - "grad_norm": 0.3992286694324687, - "learning_rate": 3.2184555759208465e-05, - "loss": 0.5365, + "epoch": 0.850265883011475, + "grad_norm": 0.24220759817972742, + "learning_rate": 8.584006917799559e-05, + "loss": 0.5282, "step": 3038 }, { - "epoch": 0.42533240027991603, - "grad_norm": 0.39187949837957153, - "learning_rate": 3.217369979892905e-05, - "loss": 0.5514, + "epoch": 0.8505457598656591, + "grad_norm": 0.20917558157479316, + "learning_rate": 8.582931149901108e-05, + "loss": 0.5239, "step": 3039 }, { - "epoch": 0.42547235829251223, - "grad_norm": 0.42850832602682615, - "learning_rate": 3.216284236438154e-05, - "loss": 0.5432, + "epoch": 0.8508256367198432, + "grad_norm": 0.21993803371135584, + "learning_rate": 8.581855040977265e-05, + "loss": 0.5234, "step": 3040 }, { - "epoch": 0.4256123163051085, - "grad_norm": 0.43436314625968364, - "learning_rate": 3.215198345779723e-05, - "loss": 0.586, + "epoch": 0.8511055135740274, + "grad_norm": 0.21482793953146043, + "learning_rate": 8.580778591130455e-05, + "loss": 0.5371, "step": 3041 }, { - "epoch": 0.4257522743177047, - "grad_norm": 0.4099577503194474, - "learning_rate": 3.214112308140777e-05, - "loss": 0.5915, + "epoch": 0.8513853904282116, + "grad_norm": 0.2300206860974878, + "learning_rate": 8.579701800463133e-05, + "loss": 0.5537, "step": 3042 }, { - "epoch": 0.42589223233030093, - "grad_norm": 0.42233874886106193, - "learning_rate": 3.213026123744506e-05, - "loss": 0.6066, + "epoch": 0.8516652672823958, + "grad_norm": 0.23620665970415572, + "learning_rate": 8.578624669077792e-05, + "loss": 0.5639, "step": 3043 }, { - "epoch": 0.42603219034289713, - "grad_norm": 0.3978279257361451, - "learning_rate": 3.211939792814131e-05, - "loss": 0.5882, + "epoch": 0.85194514413658, + "grad_norm": 0.23789290559836274, + "learning_rate": 8.577547197076951e-05, + "loss": 0.5328, "step": 3044 }, { - "epoch": 0.42617214835549333, - "grad_norm": 0.40970969740070096, - "learning_rate": 3.210853315572906e-05, - "loss": 0.5647, + "epoch": 0.852225020990764, + "grad_norm": 0.23353656471451908, + "learning_rate": 8.576469384563167e-05, + "loss": 0.5403, "step": 3045 }, { - "epoch": 0.4263121063680896, - "grad_norm": 0.3976277940502661, - "learning_rate": 3.20976669224411e-05, - "loss": 0.5873, + "epoch": 0.8525048978449482, + "grad_norm": 0.23339557126696017, + "learning_rate": 8.575391231639023e-05, + "loss": 0.5285, "step": 3046 }, { - "epoch": 0.4264520643806858, - "grad_norm": 0.400367465814101, - "learning_rate": 3.208679923051059e-05, - "loss": 0.5595, + "epoch": 0.8527847746991324, + "grad_norm": 0.2192162077558338, + "learning_rate": 8.57431273840714e-05, + "loss": 0.5331, "step": 3047 }, { - "epoch": 0.42659202239328203, - "grad_norm": 0.40911631062240816, - "learning_rate": 3.207593008217092e-05, - "loss": 0.566, + "epoch": 0.8530646515533166, + "grad_norm": 0.23200636869438518, + "learning_rate": 8.573233904970171e-05, + "loss": 0.5543, "step": 3048 }, { - "epoch": 0.42673198040587823, - "grad_norm": 0.4245921605186627, - "learning_rate": 3.206505947965583e-05, - "loss": 0.5643, + "epoch": 0.8533445284075007, + "grad_norm": 0.228854877015622, + "learning_rate": 8.5721547314308e-05, + "loss": 0.5556, "step": 3049 }, { - "epoch": 0.4268719384184745, - "grad_norm": 0.42320217692212986, - "learning_rate": 3.205418742519933e-05, - "loss": 0.5512, + "epoch": 0.8536244052616848, + "grad_norm": 0.23432131992320163, + "learning_rate": 8.57107521789174e-05, + "loss": 0.5397, "step": 3050 }, { - "epoch": 0.4270118964310707, - "grad_norm": 0.4050992683553409, - "learning_rate": 3.2043313921035743e-05, - "loss": 0.5712, + "epoch": 0.853904282115869, + "grad_norm": 0.21593071073555029, + "learning_rate": 8.569995364455743e-05, + "loss": 0.5167, "step": 3051 }, { - "epoch": 0.4271518544436669, - "grad_norm": 0.39118318821429826, - "learning_rate": 3.203243896939968e-05, - "loss": 0.5839, + "epoch": 0.8541841589700532, + "grad_norm": 0.2334519803327315, + "learning_rate": 8.568915171225589e-05, + "loss": 0.5336, "step": 3052 }, { - "epoch": 0.42729181245626313, - "grad_norm": 0.4116673535552461, - "learning_rate": 3.202156257252606e-05, - "loss": 0.5599, + "epoch": 0.8544640358242374, + "grad_norm": 0.2393187471142604, + "learning_rate": 8.56783463830409e-05, + "loss": 0.5642, "step": 3053 }, { - "epoch": 0.42743177046885933, - "grad_norm": 0.42864138348780956, - "learning_rate": 3.201068473265007e-05, - "loss": 0.5877, + "epoch": 0.8547439126784215, + "grad_norm": 0.23015236105141004, + "learning_rate": 8.566753765794096e-05, + "loss": 0.5441, "step": 3054 }, { - "epoch": 0.4275717284814556, - "grad_norm": 0.4084556626969551, - "learning_rate": 3.1999805452007245e-05, - "loss": 0.578, + "epoch": 0.8550237895326056, + "grad_norm": 0.40683635224973136, + "learning_rate": 8.56567255379848e-05, + "loss": 0.5403, "step": 3055 }, { - "epoch": 0.4277116864940518, - "grad_norm": 0.40349943769579744, - "learning_rate": 3.1988924732833384e-05, - "loss": 0.562, + "epoch": 0.8553036663867898, + "grad_norm": 0.21313781378496474, + "learning_rate": 8.564591002420155e-05, + "loss": 0.5052, "step": 3056 }, { - "epoch": 0.42785164450664803, - "grad_norm": 0.41607503622672326, - "learning_rate": 3.197804257736456e-05, - "loss": 0.5766, + "epoch": 0.855583543240974, + "grad_norm": 0.21644306822669965, + "learning_rate": 8.563509111762064e-05, + "loss": 0.5316, "step": 3057 }, { - "epoch": 0.42799160251924423, - "grad_norm": 0.390556372929388, - "learning_rate": 3.19671589878372e-05, - "loss": 0.5343, + "epoch": 0.8558634200951581, + "grad_norm": 0.222839599469616, + "learning_rate": 8.562426881927181e-05, + "loss": 0.5362, "step": 3058 }, { - "epoch": 0.4281315605318404, - "grad_norm": 0.40189243879585396, - "learning_rate": 3.195627396648796e-05, - "loss": 0.5923, + "epoch": 0.8561432969493423, + "grad_norm": 0.22677337413030232, + "learning_rate": 8.561344313018513e-05, + "loss": 0.5377, "step": 3059 }, { - "epoch": 0.4282715185444367, - "grad_norm": 0.383550649614106, - "learning_rate": 3.1945387515553846e-05, - "loss": 0.5356, + "epoch": 0.8564231738035264, + "grad_norm": 0.22181193529032409, + "learning_rate": 8.560261405139098e-05, + "loss": 0.5332, "step": 3060 }, { - "epoch": 0.4284114765570329, - "grad_norm": 0.39467142307894415, - "learning_rate": 3.193449963727213e-05, - "loss": 0.5402, + "epoch": 0.8567030506577106, + "grad_norm": 0.21684088220810097, + "learning_rate": 8.559178158392012e-05, + "loss": 0.5087, "step": 3061 }, { - "epoch": 0.42855143456962913, - "grad_norm": 0.3980289276258243, - "learning_rate": 3.192361033388037e-05, - "loss": 0.5769, + "epoch": 0.8569829275118948, + "grad_norm": 0.22488537452868168, + "learning_rate": 8.558094572880357e-05, + "loss": 0.5456, "step": 3062 }, { - "epoch": 0.42869139258222533, - "grad_norm": 0.40844869461585276, - "learning_rate": 3.191271960761645e-05, - "loss": 0.5635, + "epoch": 0.8572628043660789, + "grad_norm": 0.2160019014092832, + "learning_rate": 8.557010648707268e-05, + "loss": 0.5228, "step": 3063 }, { - "epoch": 0.4288313505948216, - "grad_norm": 0.4137330574117752, - "learning_rate": 3.19018274607185e-05, - "loss": 0.5866, + "epoch": 0.8575426812202631, + "grad_norm": 0.21151813117300422, + "learning_rate": 8.555926385975914e-05, + "loss": 0.5368, "step": 3064 }, { - "epoch": 0.4289713086074178, - "grad_norm": 0.4089150211795348, - "learning_rate": 3.1890933895424976e-05, - "loss": 0.5783, + "epoch": 0.8578225580744473, + "grad_norm": 0.22124207366336962, + "learning_rate": 8.554841784789499e-05, + "loss": 0.5284, "step": 3065 }, { - "epoch": 0.429111266620014, - "grad_norm": 0.40728615720674294, - "learning_rate": 3.188003891397463e-05, - "loss": 0.6048, + "epoch": 0.8581024349286314, + "grad_norm": 0.22389008378654957, + "learning_rate": 8.553756845251251e-05, + "loss": 0.5456, "step": 3066 }, { - "epoch": 0.42925122463261023, - "grad_norm": 0.3988606193766422, - "learning_rate": 3.186914251860648e-05, - "loss": 0.5234, + "epoch": 0.8583823117828155, + "grad_norm": 0.22294828670104347, + "learning_rate": 8.552671567464436e-05, + "loss": 0.5342, "step": 3067 }, { - "epoch": 0.4293911826452064, - "grad_norm": 0.4081704250933805, - "learning_rate": 3.185824471155983e-05, - "loss": 0.5593, + "epoch": 0.8586621886369997, + "grad_norm": 0.22699959701765185, + "learning_rate": 8.551585951532355e-05, + "loss": 0.5535, "step": 3068 }, { - "epoch": 0.4295311406578027, - "grad_norm": 0.42949202285429056, - "learning_rate": 3.184734549507431e-05, - "loss": 0.5385, + "epoch": 0.8589420654911839, + "grad_norm": 0.21951338821931762, + "learning_rate": 8.550499997558335e-05, + "loss": 0.5352, "step": 3069 }, { - "epoch": 0.4296710986703989, - "grad_norm": 0.39800337635965866, - "learning_rate": 3.183644487138982e-05, - "loss": 0.5213, + "epoch": 0.8592219423453681, + "grad_norm": 0.2207427299372135, + "learning_rate": 8.549413705645737e-05, + "loss": 0.5292, "step": 3070 }, { - "epoch": 0.4298110566829951, - "grad_norm": 0.3968235944285963, - "learning_rate": 3.182554284274654e-05, - "loss": 0.5508, + "epoch": 0.8595018191995522, + "grad_norm": 0.2158791998334058, + "learning_rate": 8.548327075897955e-05, + "loss": 0.52, "step": 3071 }, { - "epoch": 0.4299510146955913, - "grad_norm": 0.41991412353150687, - "learning_rate": 3.181463941138495e-05, - "loss": 0.5829, + "epoch": 0.8597816960537363, + "grad_norm": 0.22515172406642706, + "learning_rate": 8.547240108418417e-05, + "loss": 0.5263, "step": 3072 }, { - "epoch": 0.4300909727081875, - "grad_norm": 0.42186424178865006, - "learning_rate": 3.180373457954581e-05, - "loss": 0.5444, + "epoch": 0.8600615729079205, + "grad_norm": 0.21368624688447932, + "learning_rate": 8.54615280331058e-05, + "loss": 0.5593, "step": 3073 }, { - "epoch": 0.4302309307207838, - "grad_norm": 0.409043031506044, - "learning_rate": 3.17928283494702e-05, - "loss": 0.5473, + "epoch": 0.8603414497621047, + "grad_norm": 0.22572367960252235, + "learning_rate": 8.545065160677935e-05, + "loss": 0.551, "step": 3074 }, { - "epoch": 0.43037088873338, - "grad_norm": 0.4090988482060365, - "learning_rate": 3.178192072339942e-05, - "loss": 0.5293, + "epoch": 0.8606213266162889, + "grad_norm": 0.2228799855804906, + "learning_rate": 8.543977180624003e-05, + "loss": 0.5275, "step": 3075 }, { - "epoch": 0.43051084674597623, - "grad_norm": 0.40615205351515493, - "learning_rate": 3.177101170357513e-05, - "loss": 0.5869, + "epoch": 0.8609012034704729, + "grad_norm": 0.22250102641620206, + "learning_rate": 8.54288886325234e-05, + "loss": 0.537, "step": 3076 }, { - "epoch": 0.4306508047585724, - "grad_norm": 0.39773350024022835, - "learning_rate": 3.176010129223923e-05, - "loss": 0.5456, + "epoch": 0.8611810803246571, + "grad_norm": 0.22202228022932616, + "learning_rate": 8.54180020866653e-05, + "loss": 0.5346, "step": 3077 }, { - "epoch": 0.4307907627711686, - "grad_norm": 0.4199551126566616, - "learning_rate": 3.174918949163392e-05, - "loss": 0.5642, + "epoch": 0.8614609571788413, + "grad_norm": 0.22936964314002728, + "learning_rate": 8.540711216970197e-05, + "loss": 0.5206, "step": 3078 }, { - "epoch": 0.4309307207837649, - "grad_norm": 0.43088576609462415, - "learning_rate": 3.17382763040017e-05, - "loss": 0.5563, + "epoch": 0.8617408340330255, + "grad_norm": 0.3770062819332392, + "learning_rate": 8.539621888266985e-05, + "loss": 0.5517, "step": 3079 }, { - "epoch": 0.4310706787963611, - "grad_norm": 0.40278017058920423, - "learning_rate": 3.172736173158532e-05, - "loss": 0.5279, + "epoch": 0.8620207108872097, + "grad_norm": 0.2228015196743566, + "learning_rate": 8.538532222660583e-05, + "loss": 0.522, "step": 3080 }, { - "epoch": 0.4312106368089573, - "grad_norm": 0.4423703616752564, - "learning_rate": 3.171644577662785e-05, - "loss": 0.5701, + "epoch": 0.8623005877413937, + "grad_norm": 0.2228220545406724, + "learning_rate": 8.537442220254703e-05, + "loss": 0.5256, "step": 3081 }, { - "epoch": 0.4313505948215535, - "grad_norm": 0.4240618723379521, - "learning_rate": 3.1705528441372626e-05, - "loss": 0.5876, + "epoch": 0.8625804645955779, + "grad_norm": 0.23350733934227055, + "learning_rate": 8.53635188115309e-05, + "loss": 0.5485, "step": 3082 }, { - "epoch": 0.4314905528341498, - "grad_norm": 0.39848697262264293, - "learning_rate": 3.169460972806327e-05, - "loss": 0.5549, + "epoch": 0.8628603414497621, + "grad_norm": 0.21846066077770462, + "learning_rate": 8.53526120545953e-05, + "loss": 0.5276, "step": 3083 }, { - "epoch": 0.431630510846746, - "grad_norm": 0.40754000265622903, - "learning_rate": 3.1683689638943684e-05, - "loss": 0.5772, + "epoch": 0.8631402183039463, + "grad_norm": 0.2335962307244182, + "learning_rate": 8.534170193277827e-05, + "loss": 0.5548, "step": 3084 }, { - "epoch": 0.43177046885934217, - "grad_norm": 0.4018754339809231, - "learning_rate": 3.167276817625806e-05, - "loss": 0.5783, + "epoch": 0.8634200951581305, + "grad_norm": 0.2201364789143078, + "learning_rate": 8.533078844711826e-05, + "loss": 0.5426, "step": 3085 }, { - "epoch": 0.4319104268719384, - "grad_norm": 0.430233233796668, - "learning_rate": 3.166184534225087e-05, - "loss": 0.5775, + "epoch": 0.8636999720123146, + "grad_norm": 0.23065771245439703, + "learning_rate": 8.531987159865405e-05, + "loss": 0.5716, "step": 3086 }, { - "epoch": 0.4320503848845346, - "grad_norm": 0.4210970260038791, - "learning_rate": 3.165092113916688e-05, - "loss": 0.5593, + "epoch": 0.8639798488664987, + "grad_norm": 0.23289686034673565, + "learning_rate": 8.530895138842467e-05, + "loss": 0.5299, "step": 3087 }, { - "epoch": 0.4321903428971309, - "grad_norm": 0.4162185793360081, - "learning_rate": 3.163999556925111e-05, - "loss": 0.5587, + "epoch": 0.8642597257206829, + "grad_norm": 0.21787168916983998, + "learning_rate": 8.529802781746956e-05, + "loss": 0.5342, "step": 3088 }, { - "epoch": 0.43233030090972707, - "grad_norm": 0.38464238839448883, - "learning_rate": 3.162906863474887e-05, - "loss": 0.5527, + "epoch": 0.8645396025748671, + "grad_norm": 0.22481660268884418, + "learning_rate": 8.528710088682839e-05, + "loss": 0.5174, "step": 3089 }, { - "epoch": 0.4324702589223233, - "grad_norm": 0.3899859914956373, - "learning_rate": 3.161814033790577e-05, - "loss": 0.5734, + "epoch": 0.8648194794290512, + "grad_norm": 0.22367814560169486, + "learning_rate": 8.52761705975412e-05, + "loss": 0.5474, "step": 3090 }, { - "epoch": 0.4326102169349195, - "grad_norm": 0.40900462690982553, - "learning_rate": 3.160721068096768e-05, - "loss": 0.5399, + "epoch": 0.8650993562832354, + "grad_norm": 0.2179802466288477, + "learning_rate": 8.526523695064836e-05, + "loss": 0.4985, "step": 3091 }, { - "epoch": 0.4327501749475157, - "grad_norm": 0.4220096792046019, - "learning_rate": 3.159627966618075e-05, - "loss": 0.5727, + "epoch": 0.8653792331374195, + "grad_norm": 0.22584878578261564, + "learning_rate": 8.525429994719052e-05, + "loss": 0.5362, "step": 3092 }, { - "epoch": 0.432890132960112, - "grad_norm": 0.4233571391991352, - "learning_rate": 3.158534729579142e-05, - "loss": 0.5949, + "epoch": 0.8656591099916037, + "grad_norm": 0.21681602528527902, + "learning_rate": 8.524335958820868e-05, + "loss": 0.5399, "step": 3093 }, { - "epoch": 0.43303009097270817, - "grad_norm": 0.40740179328267645, - "learning_rate": 3.157441357204641e-05, - "loss": 0.5645, + "epoch": 0.8659389868457879, + "grad_norm": 0.21004233979125894, + "learning_rate": 8.523241587474416e-05, + "loss": 0.5279, "step": 3094 }, { - "epoch": 0.4331700489853044, - "grad_norm": 0.38846491608664135, - "learning_rate": 3.15634784971927e-05, - "loss": 0.5352, + "epoch": 0.866218863699972, + "grad_norm": 0.22216773769256115, + "learning_rate": 8.522146880783855e-05, + "loss": 0.5254, "step": 3095 }, { - "epoch": 0.4333100069979006, - "grad_norm": 0.40243615629816265, - "learning_rate": 3.1552542073477555e-05, - "loss": 0.5672, + "epoch": 0.8664987405541562, + "grad_norm": 0.22621255953137892, + "learning_rate": 8.521051838853385e-05, + "loss": 0.5328, "step": 3096 }, { - "epoch": 0.4334499650104969, - "grad_norm": 0.38997133690973523, - "learning_rate": 3.154160430314854e-05, - "loss": 0.5756, + "epoch": 0.8667786174083403, + "grad_norm": 0.213235618976495, + "learning_rate": 8.519956461787228e-05, + "loss": 0.5247, "step": 3097 }, { - "epoch": 0.43358992302309307, - "grad_norm": 0.3815517517819305, - "learning_rate": 3.1530665188453464e-05, - "loss": 0.5596, + "epoch": 0.8670584942625245, + "grad_norm": 0.21174058618586977, + "learning_rate": 8.518860749689648e-05, + "loss": 0.5372, "step": 3098 }, { - "epoch": 0.43372988103568927, - "grad_norm": 0.39845289899145564, - "learning_rate": 3.1519724731640424e-05, - "loss": 0.5914, + "epoch": 0.8673383711167086, + "grad_norm": 0.2329901448660573, + "learning_rate": 8.51776470266493e-05, + "loss": 0.507, "step": 3099 }, { - "epoch": 0.4338698390482855, - "grad_norm": 0.39018815516325334, - "learning_rate": 3.15087829349578e-05, - "loss": 0.5351, + "epoch": 0.8676182479708928, + "grad_norm": 0.2345445756356732, + "learning_rate": 8.516668320817399e-05, + "loss": 0.5254, "step": 3100 }, { - "epoch": 0.4340097970608817, - "grad_norm": 0.4129017336286664, - "learning_rate": 3.149783980065425e-05, - "loss": 0.562, + "epoch": 0.867898124825077, + "grad_norm": 0.22585171397064097, + "learning_rate": 8.51557160425141e-05, + "loss": 0.5053, "step": 3101 }, { - "epoch": 0.43414975507347797, - "grad_norm": 0.42547377124636576, - "learning_rate": 3.1486895330978685e-05, - "loss": 0.5924, + "epoch": 0.8681780016792612, + "grad_norm": 0.22080060758473458, + "learning_rate": 8.514474553071345e-05, + "loss": 0.5214, "step": 3102 }, { - "epoch": 0.43428971308607417, - "grad_norm": 0.41352999220449604, - "learning_rate": 3.147594952818031e-05, - "loss": 0.5417, + "epoch": 0.8684578785334452, + "grad_norm": 0.2248032890887987, + "learning_rate": 8.513377167381626e-05, + "loss": 0.5314, "step": 3103 }, { - "epoch": 0.4344296710986704, - "grad_norm": 0.42347168990756195, - "learning_rate": 3.14650023945086e-05, - "loss": 0.5479, + "epoch": 0.8687377553876294, + "grad_norm": 0.2192324263951842, + "learning_rate": 8.512279447286703e-05, + "loss": 0.5145, "step": 3104 }, { - "epoch": 0.4345696291112666, - "grad_norm": 0.389428898967484, - "learning_rate": 3.1454053932213304e-05, - "loss": 0.5552, + "epoch": 0.8690176322418136, + "grad_norm": 0.2257624455841242, + "learning_rate": 8.511181392891055e-05, + "loss": 0.5195, "step": 3105 }, { - "epoch": 0.4347095871238628, - "grad_norm": 0.38385485279343906, - "learning_rate": 3.144310414354444e-05, - "loss": 0.5197, + "epoch": 0.8692975090959978, + "grad_norm": 0.2328728079571696, + "learning_rate": 8.510083004299199e-05, + "loss": 0.5448, "step": 3106 }, { - "epoch": 0.43484954513645907, - "grad_norm": 0.41030767770871096, - "learning_rate": 3.1432153030752295e-05, - "loss": 0.5597, + "epoch": 0.869577385950182, + "grad_norm": 0.2203283253375442, + "learning_rate": 8.508984281615675e-05, + "loss": 0.5536, "step": 3107 }, { - "epoch": 0.43498950314905527, - "grad_norm": 0.41349967870711807, - "learning_rate": 3.142120059608744e-05, - "loss": 0.5663, + "epoch": 0.869857262804366, + "grad_norm": 0.22063659465343083, + "learning_rate": 8.507885224945065e-05, + "loss": 0.5381, "step": 3108 }, { - "epoch": 0.4351294611616515, - "grad_norm": 0.4082370728237636, - "learning_rate": 3.141024684180071e-05, - "loss": 0.5275, + "epoch": 0.8701371396585502, + "grad_norm": 0.22939783313111242, + "learning_rate": 8.506785834391975e-05, + "loss": 0.5565, "step": 3109 }, { - "epoch": 0.4352694191742477, - "grad_norm": 0.4122249486824059, - "learning_rate": 3.139929177014322e-05, - "loss": 0.5451, + "epoch": 0.8704170165127344, + "grad_norm": 0.2294001337165578, + "learning_rate": 8.505686110061046e-05, + "loss": 0.5214, "step": 3110 }, { - "epoch": 0.43540937718684397, - "grad_norm": 0.41854855746910635, - "learning_rate": 3.138833538336633e-05, - "loss": 0.5418, + "epoch": 0.8706968933669186, + "grad_norm": 0.21851962678707973, + "learning_rate": 8.504586052056951e-05, + "loss": 0.5474, "step": 3111 }, { - "epoch": 0.43554933519944017, - "grad_norm": 0.4259659771695767, - "learning_rate": 3.137737768372171e-05, - "loss": 0.5931, + "epoch": 0.8709767702211028, + "grad_norm": 0.21350852258826278, + "learning_rate": 8.503485660484396e-05, + "loss": 0.5049, "step": 3112 }, { - "epoch": 0.43568929321203637, - "grad_norm": 0.43545087247200576, - "learning_rate": 3.1366418673461254e-05, - "loss": 0.5899, + "epoch": 0.8712566470752868, + "grad_norm": 0.22786109805847043, + "learning_rate": 8.502384935448112e-05, + "loss": 0.5214, "step": 3113 }, { - "epoch": 0.4358292512246326, - "grad_norm": 0.4096432845087902, - "learning_rate": 3.135545835483718e-05, - "loss": 0.5686, + "epoch": 0.871536523929471, + "grad_norm": 0.2236176254734842, + "learning_rate": 8.50128387705287e-05, + "loss": 0.5342, "step": 3114 }, { - "epoch": 0.4359692092372288, - "grad_norm": 0.40360816791162424, - "learning_rate": 3.134449673010192e-05, - "loss": 0.5471, + "epoch": 0.8718164007836552, + "grad_norm": 0.21937941632307223, + "learning_rate": 8.500182485403471e-05, + "loss": 0.5551, "step": 3115 }, { - "epoch": 0.43610916724982507, - "grad_norm": 0.4293932793165949, - "learning_rate": 3.1333533801508204e-05, - "loss": 0.5721, + "epoch": 0.8720962776378394, + "grad_norm": 0.22777612967246738, + "learning_rate": 8.499080760604742e-05, + "loss": 0.5145, "step": 3116 }, { - "epoch": 0.43624912526242127, - "grad_norm": 0.3963350874406529, - "learning_rate": 3.132256957130904e-05, - "loss": 0.5685, + "epoch": 0.8723761544920235, + "grad_norm": 0.20649570660870228, + "learning_rate": 8.497978702761547e-05, + "loss": 0.526, "step": 3117 }, { - "epoch": 0.4363890832750175, - "grad_norm": 0.3871923444763041, - "learning_rate": 3.131160404175767e-05, - "loss": 0.5544, + "epoch": 0.8726560313462076, + "grad_norm": 0.21192410040800452, + "learning_rate": 8.496876311978784e-05, + "loss": 0.5276, "step": 3118 }, { - "epoch": 0.4365290412876137, - "grad_norm": 0.41730033342791306, - "learning_rate": 3.130063721510763e-05, - "loss": 0.5594, + "epoch": 0.8729359082003918, + "grad_norm": 0.24903919474732164, + "learning_rate": 8.495773588361374e-05, + "loss": 0.5246, "step": 3119 }, { - "epoch": 0.4366689993002099, - "grad_norm": 0.43319861497508905, - "learning_rate": 3.1289669093612714e-05, - "loss": 0.5896, + "epoch": 0.873215785054576, + "grad_norm": 0.22174705900135905, + "learning_rate": 8.494670532014277e-05, + "loss": 0.5367, "step": 3120 }, { - "epoch": 0.43680895731280617, - "grad_norm": 0.3896513313595881, - "learning_rate": 3.127869967952698e-05, - "loss": 0.5799, + "epoch": 0.8734956619087602, + "grad_norm": 0.22209745002607398, + "learning_rate": 8.493567143042485e-05, + "loss": 0.5531, "step": 3121 }, { - "epoch": 0.43694891532540237, - "grad_norm": 0.4391301390474321, - "learning_rate": 3.126772897510476e-05, - "loss": 0.582, + "epoch": 0.8737755387629443, + "grad_norm": 0.21573048363141414, + "learning_rate": 8.492463421551016e-05, + "loss": 0.5532, "step": 3122 }, { - "epoch": 0.4370888733379986, - "grad_norm": 0.3951829444712206, - "learning_rate": 3.125675698260065e-05, - "loss": 0.5577, + "epoch": 0.8740554156171285, + "grad_norm": 0.2186833501586502, + "learning_rate": 8.491359367644922e-05, + "loss": 0.5235, "step": 3123 }, { - "epoch": 0.4372288313505948, - "grad_norm": 0.4088921127679525, - "learning_rate": 3.124578370426947e-05, - "loss": 0.5471, + "epoch": 0.8743352924713126, + "grad_norm": 0.2233053486143271, + "learning_rate": 8.490254981429291e-05, + "loss": 0.5531, "step": 3124 }, { - "epoch": 0.43736878936319107, - "grad_norm": 0.40507594886972204, - "learning_rate": 3.1234809142366376e-05, - "loss": 0.5787, + "epoch": 0.8746151693254968, + "grad_norm": 0.21989657534322912, + "learning_rate": 8.489150263009236e-05, + "loss": 0.5343, "step": 3125 }, { - "epoch": 0.43750874737578727, - "grad_norm": 0.48880201961534253, - "learning_rate": 3.1223833299146756e-05, - "loss": 0.5665, + "epoch": 0.874895046179681, + "grad_norm": 0.22215578956680457, + "learning_rate": 8.488045212489906e-05, + "loss": 0.5026, "step": 3126 }, { - "epoch": 0.43764870538838346, - "grad_norm": 0.40104306636405085, - "learning_rate": 3.1212856176866226e-05, - "loss": 0.5403, + "epoch": 0.8751749230338651, + "grad_norm": 0.2144000586652832, + "learning_rate": 8.486939829976481e-05, + "loss": 0.5499, "step": 3127 }, { - "epoch": 0.4377886634009797, - "grad_norm": 0.38729222817598974, - "learning_rate": 3.120187777778073e-05, - "loss": 0.5298, + "epoch": 0.8754547998880493, + "grad_norm": 0.21964329639737248, + "learning_rate": 8.485834115574171e-05, + "loss": 0.5309, "step": 3128 }, { - "epoch": 0.4379286214135759, - "grad_norm": 0.38974703851837783, - "learning_rate": 3.11908981041464e-05, - "loss": 0.5612, + "epoch": 0.8757346767422334, + "grad_norm": 0.2250524115638172, + "learning_rate": 8.48472806938822e-05, + "loss": 0.5273, "step": 3129 }, { - "epoch": 0.43806857942617217, - "grad_norm": 0.4100334816926811, - "learning_rate": 3.117991715821972e-05, - "loss": 0.5903, + "epoch": 0.8760145535964176, + "grad_norm": 0.21934176725743307, + "learning_rate": 8.4836216915239e-05, + "loss": 0.5471, "step": 3130 }, { - "epoch": 0.43820853743876836, - "grad_norm": 0.4156843103680473, - "learning_rate": 3.116893494225734e-05, - "loss": 0.5886, + "epoch": 0.8762944304506017, + "grad_norm": 0.21883613562376256, + "learning_rate": 8.482514982086517e-05, + "loss": 0.5424, "step": 3131 }, { - "epoch": 0.4383484954513646, - "grad_norm": 0.39607057982434785, - "learning_rate": 3.115795145851625e-05, - "loss": 0.5621, + "epoch": 0.8765743073047859, + "grad_norm": 0.21740762627582735, + "learning_rate": 8.48140794118141e-05, + "loss": 0.5239, "step": 3132 }, { - "epoch": 0.4384884534639608, - "grad_norm": 0.4036233265018997, - "learning_rate": 3.114696670925365e-05, - "loss": 0.5538, + "epoch": 0.8768541841589701, + "grad_norm": 0.2181406659999808, + "learning_rate": 8.480300568913945e-05, + "loss": 0.5207, "step": 3133 }, { - "epoch": 0.438628411476557, - "grad_norm": 0.4101277411308792, - "learning_rate": 3.113598069672702e-05, - "loss": 0.5961, + "epoch": 0.8771340610131542, + "grad_norm": 0.22269041831994688, + "learning_rate": 8.479192865389525e-05, + "loss": 0.5611, "step": 3134 }, { - "epoch": 0.43876836948915326, - "grad_norm": 0.399711153788506, - "learning_rate": 3.11249934231941e-05, - "loss": 0.5594, + "epoch": 0.8774139378673383, + "grad_norm": 0.22622911694068062, + "learning_rate": 8.47808483071358e-05, + "loss": 0.5355, "step": 3135 }, { - "epoch": 0.43890832750174946, - "grad_norm": 0.3867721840790253, - "learning_rate": 3.111400489091288e-05, - "loss": 0.5122, + "epoch": 0.8776938147215225, + "grad_norm": 0.22244603082827713, + "learning_rate": 8.476976464991573e-05, + "loss": 0.5024, "step": 3136 }, { - "epoch": 0.4390482855143457, - "grad_norm": 0.4029896269340335, - "learning_rate": 3.110301510214162e-05, - "loss": 0.5724, + "epoch": 0.8779736915757067, + "grad_norm": 0.2321149946809836, + "learning_rate": 8.475867768329002e-05, + "loss": 0.5107, "step": 3137 }, { - "epoch": 0.4391882435269419, - "grad_norm": 0.4323192969018167, - "learning_rate": 3.1092024059138836e-05, - "loss": 0.5901, + "epoch": 0.8782535684298909, + "grad_norm": 0.24475291898145704, + "learning_rate": 8.47475874083139e-05, + "loss": 0.533, "step": 3138 }, { - "epoch": 0.43932820153953817, - "grad_norm": 0.4030250752398442, - "learning_rate": 3.108103176416329e-05, - "loss": 0.5369, + "epoch": 0.878533445284075, + "grad_norm": 0.22701519899541858, + "learning_rate": 8.473649382604296e-05, + "loss": 0.5515, "step": 3139 }, { - "epoch": 0.43946815955213436, - "grad_norm": 0.40699746634257905, - "learning_rate": 3.1070038219474026e-05, - "loss": 0.5236, + "epoch": 0.8788133221382591, + "grad_norm": 0.2187927170120886, + "learning_rate": 8.472539693753311e-05, + "loss": 0.5406, "step": 3140 }, { - "epoch": 0.43960811756473056, - "grad_norm": 0.4054193975355776, - "learning_rate": 3.105904342733032e-05, - "loss": 0.555, + "epoch": 0.8790931989924433, + "grad_norm": 0.217235931346614, + "learning_rate": 8.471429674384052e-05, + "loss": 0.5321, "step": 3141 }, { - "epoch": 0.4397480755773268, - "grad_norm": 0.43325267645820853, - "learning_rate": 3.104804738999169e-05, - "loss": 0.5704, + "epoch": 0.8793730758466275, + "grad_norm": 0.21597888070041155, + "learning_rate": 8.470319324602176e-05, + "loss": 0.5554, "step": 3142 }, { - "epoch": 0.439888033589923, - "grad_norm": 0.41581039947645454, - "learning_rate": 3.103705010971797e-05, - "loss": 0.5733, + "epoch": 0.8796529527008117, + "grad_norm": 0.21855940183015699, + "learning_rate": 8.469208644513363e-05, + "loss": 0.533, "step": 3143 }, { - "epoch": 0.44002799160251926, - "grad_norm": 0.397475655616127, - "learning_rate": 3.1026051588769204e-05, - "loss": 0.5681, + "epoch": 0.8799328295549959, + "grad_norm": 0.22623110729627563, + "learning_rate": 8.46809763422333e-05, + "loss": 0.5367, "step": 3144 }, { - "epoch": 0.44016794961511546, - "grad_norm": 0.4037137237750616, - "learning_rate": 3.101505182940568e-05, - "loss": 0.5641, + "epoch": 0.8802127064091799, + "grad_norm": 0.22743398846355872, + "learning_rate": 8.466986293837822e-05, + "loss": 0.5538, "step": 3145 }, { - "epoch": 0.44030790762771166, - "grad_norm": 0.395909015440236, - "learning_rate": 3.1004050833887985e-05, - "loss": 0.5453, + "epoch": 0.8804925832633641, + "grad_norm": 0.21509057495178288, + "learning_rate": 8.465874623462619e-05, + "loss": 0.5249, "step": 3146 }, { - "epoch": 0.4404478656403079, - "grad_norm": 0.40746541718886475, - "learning_rate": 3.099304860447692e-05, - "loss": 0.5576, + "epoch": 0.8807724601175483, + "grad_norm": 0.21457145431495728, + "learning_rate": 8.46476262320353e-05, + "loss": 0.5452, "step": 3147 }, { - "epoch": 0.4405878236529041, - "grad_norm": 0.40891529518862035, - "learning_rate": 3.098204514343356e-05, - "loss": 0.5744, + "epoch": 0.8810523369717325, + "grad_norm": 0.21222960094378254, + "learning_rate": 8.463650293166393e-05, + "loss": 0.5192, "step": 3148 }, { - "epoch": 0.44072778166550036, - "grad_norm": 0.42039074958765454, - "learning_rate": 3.097104045301922e-05, - "loss": 0.5945, + "epoch": 0.8813322138259166, + "grad_norm": 0.20859749016140702, + "learning_rate": 8.462537633457084e-05, + "loss": 0.5341, "step": 3149 }, { - "epoch": 0.44086773967809656, - "grad_norm": 0.4233543661398848, - "learning_rate": 3.096003453549549e-05, - "loss": 0.5174, + "epoch": 0.8816120906801007, + "grad_norm": 0.21203505951889612, + "learning_rate": 8.461424644181503e-05, + "loss": 0.5472, "step": 3150 }, { - "epoch": 0.4410076976906928, - "grad_norm": 0.41103834231055475, - "learning_rate": 3.0949027393124185e-05, - "loss": 0.5859, + "epoch": 0.8818919675342849, + "grad_norm": 0.22782042811565067, + "learning_rate": 8.460311325445589e-05, + "loss": 0.533, "step": 3151 }, { - "epoch": 0.441147655703289, - "grad_norm": 0.4071396114989415, - "learning_rate": 3.093801902816739e-05, - "loss": 0.5513, + "epoch": 0.8821718443884691, + "grad_norm": 0.21469946099009463, + "learning_rate": 8.459197677355306e-05, + "loss": 0.5261, "step": 3152 }, { - "epoch": 0.4412876137158852, - "grad_norm": 0.5084306343751018, - "learning_rate": 3.092700944288744e-05, - "loss": 0.5871, + "epoch": 0.8824517212426533, + "grad_norm": 0.20998221489633284, + "learning_rate": 8.458083700016653e-05, + "loss": 0.4949, "step": 3153 }, { - "epoch": 0.44142757172848146, - "grad_norm": 0.41487834345004226, - "learning_rate": 3.091599863954691e-05, - "loss": 0.5789, + "epoch": 0.8827315980968374, + "grad_norm": 0.22004558444916747, + "learning_rate": 8.456969393535655e-05, + "loss": 0.5338, "step": 3154 }, { - "epoch": 0.44156752974107766, - "grad_norm": 0.38353879873996594, - "learning_rate": 3.090498662040863e-05, - "loss": 0.5864, + "epoch": 0.8830114749510215, + "grad_norm": 0.22344979424863592, + "learning_rate": 8.455854758018376e-05, + "loss": 0.555, "step": 3155 }, { - "epoch": 0.4417074877536739, - "grad_norm": 0.4037917238728566, - "learning_rate": 3.0893973387735687e-05, - "loss": 0.5649, + "epoch": 0.8832913518052057, + "grad_norm": 0.23319072215414186, + "learning_rate": 8.454739793570909e-05, + "loss": 0.5402, "step": 3156 }, { - "epoch": 0.4418474457662701, - "grad_norm": 0.4017882825846787, - "learning_rate": 3.0882958943791405e-05, - "loss": 0.5314, + "epoch": 0.8835712286593899, + "grad_norm": 0.21611772173232785, + "learning_rate": 8.453624500299373e-05, + "loss": 0.5281, "step": 3157 }, { - "epoch": 0.44198740377886636, - "grad_norm": 0.413358406501944, - "learning_rate": 3.087194329083937e-05, - "loss": 0.5433, + "epoch": 0.883851105513574, + "grad_norm": 0.2432682966502491, + "learning_rate": 8.452508878309923e-05, + "loss": 0.5538, "step": 3158 }, { - "epoch": 0.44212736179146256, - "grad_norm": 0.38344575892281724, - "learning_rate": 3.0860926431143415e-05, - "loss": 0.5612, + "epoch": 0.8841309823677582, + "grad_norm": 0.21309772960948314, + "learning_rate": 8.451392927708747e-05, + "loss": 0.5405, "step": 3159 }, { - "epoch": 0.44226731980405876, - "grad_norm": 0.4087942341185106, - "learning_rate": 3.0849908366967605e-05, - "loss": 0.5256, + "epoch": 0.8844108592219423, + "grad_norm": 0.2324534497746261, + "learning_rate": 8.450276648602061e-05, + "loss": 0.5485, "step": 3160 }, { - "epoch": 0.442407277816655, - "grad_norm": 0.4055705356412522, - "learning_rate": 3.083888910057627e-05, - "loss": 0.5661, + "epoch": 0.8846907360761265, + "grad_norm": 0.22615165140933946, + "learning_rate": 8.44916004109611e-05, + "loss": 0.542, "step": 3161 }, { - "epoch": 0.4425472358292512, - "grad_norm": 0.3826502993612589, - "learning_rate": 3.082786863423399e-05, - "loss": 0.5183, + "epoch": 0.8849706129303107, + "grad_norm": 0.22573627988782002, + "learning_rate": 8.448043105297178e-05, + "loss": 0.5553, "step": 3162 }, { - "epoch": 0.44268719384184746, - "grad_norm": 0.4043315813890686, - "learning_rate": 3.0816846970205556e-05, - "loss": 0.5805, + "epoch": 0.8852504897844948, + "grad_norm": 0.21403239299002255, + "learning_rate": 8.446925841311572e-05, + "loss": 0.5219, "step": 3163 }, { - "epoch": 0.44282715185444366, - "grad_norm": 0.6100429100368088, - "learning_rate": 3.0805824110756064e-05, - "loss": 0.5527, + "epoch": 0.885530366638679, + "grad_norm": 0.22349300654120277, + "learning_rate": 8.445808249245634e-05, + "loss": 0.5244, "step": 3164 }, { - "epoch": 0.4429671098670399, - "grad_norm": 0.39290972200856517, - "learning_rate": 3.079480005815081e-05, - "loss": 0.5529, + "epoch": 0.8858102434928632, + "grad_norm": 0.220638465210544, + "learning_rate": 8.444690329205742e-05, + "loss": 0.5189, "step": 3165 }, { - "epoch": 0.4431070678796361, - "grad_norm": 0.38604414946838106, - "learning_rate": 3.078377481465534e-05, - "loss": 0.5216, + "epoch": 0.8860901203470473, + "grad_norm": 0.2092624701038376, + "learning_rate": 8.443572081298294e-05, + "loss": 0.5236, "step": 3166 }, { - "epoch": 0.4432470258922323, - "grad_norm": 0.4231758177347116, - "learning_rate": 3.0772748382535463e-05, - "loss": 0.5315, + "epoch": 0.8863699972012314, + "grad_norm": 0.21465614741067168, + "learning_rate": 8.442453505629726e-05, + "loss": 0.5191, "step": 3167 }, { - "epoch": 0.44338698390482856, - "grad_norm": 0.3991789110660417, - "learning_rate": 3.076172076405722e-05, - "loss": 0.5488, + "epoch": 0.8866498740554156, + "grad_norm": 0.21519642727409988, + "learning_rate": 8.441334602306509e-05, + "loss": 0.5123, "step": 3168 }, { - "epoch": 0.44352694191742476, - "grad_norm": 0.5750580981426986, - "learning_rate": 3.07506919614869e-05, - "loss": 0.5191, + "epoch": 0.8869297509095998, + "grad_norm": 0.21478883558439016, + "learning_rate": 8.440215371435137e-05, + "loss": 0.5275, "step": 3169 }, { - "epoch": 0.443666899930021, - "grad_norm": 0.41221569096533744, - "learning_rate": 3.073966197709103e-05, - "loss": 0.5505, + "epoch": 0.887209627763784, + "grad_norm": 0.21644613627124493, + "learning_rate": 8.439095813122143e-05, + "loss": 0.5313, "step": 3170 }, { - "epoch": 0.4438068579426172, - "grad_norm": 0.4397217342239035, - "learning_rate": 3.072863081313639e-05, - "loss": 0.594, + "epoch": 0.887489504617968, + "grad_norm": 0.2291596989569196, + "learning_rate": 8.43797592747408e-05, + "loss": 0.564, "step": 3171 }, { - "epoch": 0.44394681595521346, - "grad_norm": 0.4109178364737107, - "learning_rate": 3.071759847188998e-05, - "loss": 0.5438, + "epoch": 0.8877693814721522, + "grad_norm": 0.21334073065507236, + "learning_rate": 8.436855714597546e-05, + "loss": 0.534, "step": 3172 }, { - "epoch": 0.44408677396780966, - "grad_norm": 0.4037573317008806, - "learning_rate": 3.070656495561909e-05, - "loss": 0.56, + "epoch": 0.8880492583263364, + "grad_norm": 0.22089687449675843, + "learning_rate": 8.435735174599165e-05, + "loss": 0.5205, "step": 3173 }, { - "epoch": 0.44422673198040585, - "grad_norm": 0.4029097868342135, - "learning_rate": 3.069553026659119e-05, - "loss": 0.5552, + "epoch": 0.8883291351805206, + "grad_norm": 0.23584516821934443, + "learning_rate": 8.434614307585582e-05, + "loss": 0.5656, "step": 3174 }, { - "epoch": 0.4443666899930021, - "grad_norm": 0.4100219354350298, - "learning_rate": 3.068449440707404e-05, - "loss": 0.5533, + "epoch": 0.8886090120347048, + "grad_norm": 0.226205757196865, + "learning_rate": 8.433493113663489e-05, + "loss": 0.5481, "step": 3175 }, { - "epoch": 0.4445066480055983, - "grad_norm": 0.3892720780012183, - "learning_rate": 3.067345737933561e-05, - "loss": 0.5478, + "epoch": 0.8888888888888888, + "grad_norm": 0.21360041855627168, + "learning_rate": 8.432371592939599e-05, + "loss": 0.5363, "step": 3176 }, { - "epoch": 0.44464660601819456, - "grad_norm": 0.389146645661204, - "learning_rate": 3.0662419185644115e-05, - "loss": 0.5506, + "epoch": 0.889168765743073, + "grad_norm": 0.21904486669173082, + "learning_rate": 8.43124974552066e-05, + "loss": 0.5203, "step": 3177 }, { - "epoch": 0.44478656403079075, - "grad_norm": 0.3873299876663967, - "learning_rate": 3.065137982826802e-05, - "loss": 0.5245, + "epoch": 0.8894486425972572, + "grad_norm": 0.22179102451077717, + "learning_rate": 8.43012757151345e-05, + "loss": 0.5451, "step": 3178 }, { - "epoch": 0.444926522043387, - "grad_norm": 0.38222672454752543, - "learning_rate": 3.064033930947604e-05, - "loss": 0.5451, + "epoch": 0.8897285194514414, + "grad_norm": 0.22856237475782754, + "learning_rate": 8.429005071024778e-05, + "loss": 0.5414, "step": 3179 }, { - "epoch": 0.4450664800559832, - "grad_norm": 0.39505544780509594, - "learning_rate": 3.06292976315371e-05, - "loss": 0.5617, + "epoch": 0.8900083963056256, + "grad_norm": 0.2158331073274858, + "learning_rate": 8.427882244161482e-05, + "loss": 0.5316, "step": 3180 }, { - "epoch": 0.4452064380685794, - "grad_norm": 0.410933368734618, - "learning_rate": 3.0618254796720375e-05, - "loss": 0.5598, + "epoch": 0.8902882731598096, + "grad_norm": 0.21357794510832812, + "learning_rate": 8.426759091030437e-05, + "loss": 0.5152, "step": 3181 }, { - "epoch": 0.44534639608117566, - "grad_norm": 0.4357975405512465, - "learning_rate": 3.060721080729529e-05, - "loss": 0.5558, + "epoch": 0.8905681500139938, + "grad_norm": 0.22180266892326359, + "learning_rate": 8.425635611738543e-05, + "loss": 0.5469, "step": 3182 }, { - "epoch": 0.44548635409377185, - "grad_norm": 0.43046775321062414, - "learning_rate": 3.059616566553149e-05, - "loss": 0.5637, + "epoch": 0.890848026868178, + "grad_norm": 0.2087533268175315, + "learning_rate": 8.424511806392736e-05, + "loss": 0.5245, "step": 3183 }, { - "epoch": 0.4456263121063681, - "grad_norm": 0.3985416977600822, - "learning_rate": 3.058511937369886e-05, - "loss": 0.525, + "epoch": 0.8911279037223622, + "grad_norm": 0.2199988164234642, + "learning_rate": 8.423387675099977e-05, + "loss": 0.5301, "step": 3184 }, { - "epoch": 0.4457662701189643, - "grad_norm": 0.40340048691929675, - "learning_rate": 3.057407193406753e-05, - "loss": 0.5249, + "epoch": 0.8914077805765463, + "grad_norm": 0.21437648175825566, + "learning_rate": 8.422263217967263e-05, + "loss": 0.5351, "step": 3185 }, { - "epoch": 0.44590622813156056, - "grad_norm": 0.4135936572888444, - "learning_rate": 3.056302334890786e-05, - "loss": 0.5549, + "epoch": 0.8916876574307305, + "grad_norm": 0.2198696949107264, + "learning_rate": 8.421138435101618e-05, + "loss": 0.5118, "step": 3186 }, { - "epoch": 0.44604618614415675, - "grad_norm": 0.3920888452917507, - "learning_rate": 3.0551973620490456e-05, - "loss": 0.5491, + "epoch": 0.8919675342849146, + "grad_norm": 0.22454727769491456, + "learning_rate": 8.420013326610105e-05, + "loss": 0.5471, "step": 3187 }, { - "epoch": 0.44618614415675295, - "grad_norm": 0.4035469272259035, - "learning_rate": 3.0540922751086135e-05, - "loss": 0.5465, + "epoch": 0.8922474111390988, + "grad_norm": 0.21990546237502193, + "learning_rate": 8.418887892599809e-05, + "loss": 0.5488, "step": 3188 }, { - "epoch": 0.4463261021693492, - "grad_norm": 0.4216381953710183, - "learning_rate": 3.052987074296596e-05, - "loss": 0.5535, + "epoch": 0.892527287993283, + "grad_norm": 0.21714014362804088, + "learning_rate": 8.417762133177848e-05, + "loss": 0.5081, "step": 3189 }, { - "epoch": 0.4464660601819454, - "grad_norm": 0.41233431611024285, - "learning_rate": 3.051881759840124e-05, - "loss": 0.5498, + "epoch": 0.8928071648474671, + "grad_norm": 0.2231419036615365, + "learning_rate": 8.416636048451376e-05, + "loss": 0.5307, "step": 3190 }, { - "epoch": 0.44660601819454165, - "grad_norm": 0.4114941998250367, - "learning_rate": 3.0507763319663517e-05, - "loss": 0.5718, + "epoch": 0.8930870417016513, + "grad_norm": 0.22930027763459426, + "learning_rate": 8.415509638527572e-05, + "loss": 0.5383, "step": 3191 }, { - "epoch": 0.44674597620713785, - "grad_norm": 0.407680668986072, - "learning_rate": 3.0496707909024542e-05, - "loss": 0.5298, + "epoch": 0.8933669185558354, + "grad_norm": 0.2246271030278197, + "learning_rate": 8.414382903513649e-05, + "loss": 0.5494, "step": 3192 }, { - "epoch": 0.4468859342197341, - "grad_norm": 0.3977959198257157, - "learning_rate": 3.0485651368756323e-05, - "loss": 0.549, + "epoch": 0.8936467954100196, + "grad_norm": 0.22405343698134375, + "learning_rate": 8.413255843516851e-05, + "loss": 0.5178, "step": 3193 }, { - "epoch": 0.4470258922323303, - "grad_norm": 0.3998186038348927, - "learning_rate": 3.0474593701131084e-05, - "loss": 0.5743, + "epoch": 0.8939266722642037, + "grad_norm": 0.24221654128655928, + "learning_rate": 8.412128458644449e-05, + "loss": 0.5563, "step": 3194 }, { - "epoch": 0.4471658502449265, - "grad_norm": 0.4154710015319719, - "learning_rate": 3.0463534908421298e-05, - "loss": 0.5515, + "epoch": 0.8942065491183879, + "grad_norm": 0.21815434040286807, + "learning_rate": 8.411000749003753e-05, + "loss": 0.5336, "step": 3195 }, { - "epoch": 0.44730580825752275, - "grad_norm": 0.3943039960321348, - "learning_rate": 3.0452474992899643e-05, - "loss": 0.5391, + "epoch": 0.8944864259725721, + "grad_norm": 0.22157664448096417, + "learning_rate": 8.409872714702093e-05, + "loss": 0.5423, "step": 3196 }, { - "epoch": 0.44744576627011895, - "grad_norm": 0.4199054832560681, - "learning_rate": 3.044141395683906e-05, - "loss": 0.5741, + "epoch": 0.8947663028267562, + "grad_norm": 0.2105901061172276, + "learning_rate": 8.408744355846842e-05, + "loss": 0.5443, "step": 3197 }, { - "epoch": 0.4475857242827152, - "grad_norm": 0.41750106544298576, - "learning_rate": 3.0430351802512698e-05, - "loss": 0.6249, + "epoch": 0.8950461796809404, + "grad_norm": 0.244545495591243, + "learning_rate": 8.407615672545396e-05, + "loss": 0.5522, "step": 3198 }, { - "epoch": 0.4477256822953114, - "grad_norm": 0.3946839624490567, - "learning_rate": 3.041928853219394e-05, - "loss": 0.5692, + "epoch": 0.8953260565351245, + "grad_norm": 0.2081948552672021, + "learning_rate": 8.406486664905183e-05, + "loss": 0.5295, "step": 3199 }, { - "epoch": 0.44786564030790765, - "grad_norm": 0.4224068149128873, - "learning_rate": 3.0408224148156407e-05, - "loss": 0.5772, + "epoch": 0.8956059333893087, + "grad_norm": 0.22325001321795235, + "learning_rate": 8.405357333033659e-05, + "loss": 0.5158, "step": 3200 }, { - "epoch": 0.44800559832050385, - "grad_norm": 0.44042046806838825, - "learning_rate": 3.039715865267393e-05, - "loss": 0.6051, + "epoch": 0.8958858102434929, + "grad_norm": 0.2158764769238209, + "learning_rate": 8.404227677038322e-05, + "loss": 0.5282, "step": 3201 }, { - "epoch": 0.44814555633310005, - "grad_norm": 0.7011787045122906, - "learning_rate": 3.0386092048020593e-05, - "loss": 0.6074, + "epoch": 0.8961656870976771, + "grad_norm": 0.22807617422393783, + "learning_rate": 8.403097697026687e-05, + "loss": 0.551, "step": 3202 }, { - "epoch": 0.4482855143456963, - "grad_norm": 0.41427835190534584, - "learning_rate": 3.037502433647068e-05, - "loss": 0.5624, + "epoch": 0.8964455639518611, + "grad_norm": 0.20899930240384806, + "learning_rate": 8.401967393106309e-05, + "loss": 0.5334, "step": 3203 }, { - "epoch": 0.4484254723582925, - "grad_norm": 0.4053190934113154, - "learning_rate": 3.0363955520298742e-05, - "loss": 0.5951, + "epoch": 0.8967254408060453, + "grad_norm": 0.2182254048702635, + "learning_rate": 8.40083676538477e-05, + "loss": 0.5129, "step": 3204 }, { - "epoch": 0.44856543037088875, - "grad_norm": 0.43048899640751676, - "learning_rate": 3.0352885601779512e-05, - "loss": 0.5652, + "epoch": 0.8970053176602295, + "grad_norm": 0.2195985871550385, + "learning_rate": 8.399705813969685e-05, + "loss": 0.534, "step": 3205 }, { - "epoch": 0.44870538838348495, - "grad_norm": 0.3964539941995037, - "learning_rate": 3.0341814583187978e-05, - "loss": 0.5595, + "epoch": 0.8972851945144137, + "grad_norm": 0.21838456840893322, + "learning_rate": 8.398574538968697e-05, + "loss": 0.5077, "step": 3206 }, { - "epoch": 0.4488453463960812, - "grad_norm": 0.4009111440660274, - "learning_rate": 3.033074246679935e-05, - "loss": 0.5861, + "epoch": 0.8975650713685979, + "grad_norm": 0.20531380979348746, + "learning_rate": 8.397442940489481e-05, + "loss": 0.5403, "step": 3207 }, { - "epoch": 0.4489853044086774, - "grad_norm": 0.38864197775544784, - "learning_rate": 3.0319669254889055e-05, - "loss": 0.598, + "epoch": 0.8978449482227819, + "grad_norm": 0.2249946706098934, + "learning_rate": 8.396311018639746e-05, + "loss": 0.5483, "step": 3208 }, { - "epoch": 0.4491252624212736, - "grad_norm": 0.4292785996534306, - "learning_rate": 3.0308594949732755e-05, - "loss": 0.5901, + "epoch": 0.8981248250769661, + "grad_norm": 0.21855501304245706, + "learning_rate": 8.395178773527229e-05, + "loss": 0.5377, "step": 3209 }, { - "epoch": 0.44926522043386985, - "grad_norm": 0.37775134402140526, - "learning_rate": 3.029751955360633e-05, - "loss": 0.5327, + "epoch": 0.8984047019311503, + "grad_norm": 0.20427182425970294, + "learning_rate": 8.394046205259693e-05, + "loss": 0.5149, "step": 3210 }, { - "epoch": 0.44940517844646605, - "grad_norm": 0.37253895115039465, - "learning_rate": 3.0286443068785885e-05, - "loss": 0.5649, + "epoch": 0.8986845787853345, + "grad_norm": 0.2193552303123338, + "learning_rate": 8.39291331394494e-05, + "loss": 0.517, "step": 3211 }, { - "epoch": 0.4495451364590623, - "grad_norm": 0.43373031007480495, - "learning_rate": 3.027536549754775e-05, - "loss": 0.541, + "epoch": 0.8989644556395187, + "grad_norm": 0.21587873171075797, + "learning_rate": 8.391780099690799e-05, + "loss": 0.4998, "step": 3212 }, { - "epoch": 0.4496850944716585, - "grad_norm": 0.42039610931604904, - "learning_rate": 3.0264286842168477e-05, - "loss": 0.5549, + "epoch": 0.8992443324937027, + "grad_norm": 0.21433001058656947, + "learning_rate": 8.390646562605129e-05, + "loss": 0.5226, "step": 3213 }, { - "epoch": 0.44982505248425475, - "grad_norm": 0.40625318217496825, - "learning_rate": 3.025320710492484e-05, - "loss": 0.568, + "epoch": 0.8995242093478869, + "grad_norm": 0.22970641247922163, + "learning_rate": 8.389512702795823e-05, + "loss": 0.5353, "step": 3214 }, { - "epoch": 0.44996501049685095, - "grad_norm": 0.4025993136298079, - "learning_rate": 3.0242126288093846e-05, - "loss": 0.5824, + "epoch": 0.8998040862020711, + "grad_norm": 0.2182246871696593, + "learning_rate": 8.388378520370799e-05, + "loss": 0.5576, "step": 3215 }, { - "epoch": 0.45010496850944715, - "grad_norm": 0.3984904672668101, - "learning_rate": 3.0231044393952712e-05, - "loss": 0.5615, + "epoch": 0.9000839630562553, + "grad_norm": 0.2228115798851557, + "learning_rate": 8.387244015438012e-05, + "loss": 0.5402, "step": 3216 }, { - "epoch": 0.4502449265220434, - "grad_norm": 0.4137524109943405, - "learning_rate": 3.0219961424778877e-05, - "loss": 0.5588, + "epoch": 0.9003638399104394, + "grad_norm": 0.21320783404344404, + "learning_rate": 8.386109188105442e-05, + "loss": 0.5249, "step": 3217 }, { - "epoch": 0.4503848845346396, - "grad_norm": 0.39240078172637105, - "learning_rate": 3.020887738285001e-05, - "loss": 0.521, + "epoch": 0.9006437167646235, + "grad_norm": 0.23125313491309887, + "learning_rate": 8.384974038481105e-05, + "loss": 0.5465, "step": 3218 }, { - "epoch": 0.45052484254723585, - "grad_norm": 0.41138657372667015, - "learning_rate": 3.0197792270443982e-05, - "loss": 0.5471, + "epoch": 0.9009235936188077, + "grad_norm": 0.2159942191919595, + "learning_rate": 8.383838566673046e-05, + "loss": 0.5355, "step": 3219 }, { - "epoch": 0.45066480055983205, - "grad_norm": 0.4192515121968027, - "learning_rate": 3.0186706089838913e-05, - "loss": 0.527, + "epoch": 0.9012034704729919, + "grad_norm": 0.2151240015535236, + "learning_rate": 8.382702772789337e-05, + "loss": 0.5443, "step": 3220 }, { - "epoch": 0.4508047585724283, - "grad_norm": 0.4089670890583155, - "learning_rate": 3.017561884331311e-05, - "loss": 0.5726, + "epoch": 0.901483347327176, + "grad_norm": 0.20777649848856372, + "learning_rate": 8.381566656938085e-05, + "loss": 0.5121, "step": 3221 }, { - "epoch": 0.4509447165850245, - "grad_norm": 0.40191329030749956, - "learning_rate": 3.0164530533145123e-05, - "loss": 0.5444, + "epoch": 0.9017632241813602, + "grad_norm": 0.21754586222299052, + "learning_rate": 8.380430219227427e-05, + "loss": 0.5195, "step": 3222 }, { - "epoch": 0.4510846745976207, - "grad_norm": 0.41118884176015386, - "learning_rate": 3.0153441161613704e-05, - "loss": 0.5541, + "epoch": 0.9020431010355444, + "grad_norm": 0.21248672937134955, + "learning_rate": 8.379293459765526e-05, + "loss": 0.5351, "step": 3223 }, { - "epoch": 0.45122463261021695, - "grad_norm": 0.4309170413845665, - "learning_rate": 3.0142350730997837e-05, - "loss": 0.5794, + "epoch": 0.9023229778897285, + "grad_norm": 0.2206266391186216, + "learning_rate": 8.378156378660584e-05, + "loss": 0.5223, "step": 3224 }, { - "epoch": 0.45136459062281314, - "grad_norm": 0.41219057005882026, - "learning_rate": 3.0131259243576726e-05, - "loss": 0.5601, + "epoch": 0.9026028547439127, + "grad_norm": 0.21535885831813834, + "learning_rate": 8.377018976020824e-05, + "loss": 0.5227, "step": 3225 }, { - "epoch": 0.4515045486354094, - "grad_norm": 0.3941521672183907, - "learning_rate": 3.012016670162977e-05, - "loss": 0.521, + "epoch": 0.9028827315980968, + "grad_norm": 0.22093169414907043, + "learning_rate": 8.375881251954512e-05, + "loss": 0.5511, "step": 3226 }, { - "epoch": 0.4516445066480056, - "grad_norm": 0.4198018097191238, - "learning_rate": 3.0109073107436608e-05, - "loss": 0.5573, + "epoch": 0.903162608452281, + "grad_norm": 0.21838882264546794, + "learning_rate": 8.374743206569931e-05, + "loss": 0.5245, "step": 3227 }, { - "epoch": 0.4517844646606018, - "grad_norm": 0.4135610840050145, - "learning_rate": 3.0097978463277076e-05, - "loss": 0.5718, + "epoch": 0.9034424853064652, + "grad_norm": 0.23592055142362406, + "learning_rate": 8.373604839975403e-05, + "loss": 0.5511, "step": 3228 }, { - "epoch": 0.45192442267319805, - "grad_norm": 0.405778139396707, - "learning_rate": 3.0086882771431258e-05, - "loss": 0.5468, + "epoch": 0.9037223621606493, + "grad_norm": 0.2188882784893106, + "learning_rate": 8.372466152279278e-05, + "loss": 0.5267, "step": 3229 }, { - "epoch": 0.45206438068579424, - "grad_norm": 0.4401596995437583, - "learning_rate": 3.0075786034179405e-05, - "loss": 0.5915, + "epoch": 0.9040022390148335, + "grad_norm": 0.22086254230761396, + "learning_rate": 8.371327143589935e-05, + "loss": 0.5235, "step": 3230 }, { - "epoch": 0.4522043386983905, - "grad_norm": 0.4270515847507665, - "learning_rate": 3.0064688253802026e-05, - "loss": 0.5704, + "epoch": 0.9042821158690176, + "grad_norm": 0.21454242664136042, + "learning_rate": 8.370187814015788e-05, + "loss": 0.538, "step": 3231 }, { - "epoch": 0.4523442967109867, - "grad_norm": 0.3969545038717031, - "learning_rate": 3.0053589432579827e-05, - "loss": 0.539, + "epoch": 0.9045619927232018, + "grad_norm": 0.23239524589251495, + "learning_rate": 8.369048163665281e-05, + "loss": 0.5507, "step": 3232 }, { - "epoch": 0.45248425472358295, - "grad_norm": 0.4231180537782753, - "learning_rate": 3.004248957279372e-05, - "loss": 0.5993, + "epoch": 0.904841869577386, + "grad_norm": 0.22496197247981775, + "learning_rate": 8.367908192646882e-05, + "loss": 0.5093, "step": 3233 }, { - "epoch": 0.45262421273617914, - "grad_norm": 0.3821272498454476, - "learning_rate": 3.0031388676724836e-05, - "loss": 0.5444, + "epoch": 0.9051217464315701, + "grad_norm": 0.2266084138636589, + "learning_rate": 8.366767901069097e-05, + "loss": 0.5493, "step": 3234 }, { - "epoch": 0.45276417074877534, - "grad_norm": 0.38147859259770744, - "learning_rate": 3.0020286746654525e-05, - "loss": 0.5893, + "epoch": 0.9054016232857542, + "grad_norm": 0.223294142829303, + "learning_rate": 8.365627289040457e-05, + "loss": 0.5297, "step": 3235 }, { - "epoch": 0.4529041287613716, - "grad_norm": 0.40206232247150925, - "learning_rate": 3.0009183784864365e-05, - "loss": 0.5567, + "epoch": 0.9056815001399384, + "grad_norm": 0.2463666179530782, + "learning_rate": 8.36448635666953e-05, + "loss": 0.5465, "step": 3236 }, { - "epoch": 0.4530440867739678, - "grad_norm": 0.3712974814446054, - "learning_rate": 2.9998079793636098e-05, - "loss": 0.5148, + "epoch": 0.9059613769941226, + "grad_norm": 0.21553561426283782, + "learning_rate": 8.363345104064907e-05, + "loss": 0.505, "step": 3237 }, { - "epoch": 0.45318404478656404, - "grad_norm": 0.4047911445642038, - "learning_rate": 2.998697477525173e-05, - "loss": 0.5543, + "epoch": 0.9062412538483068, + "grad_norm": 0.22666488857798106, + "learning_rate": 8.362203531335216e-05, + "loss": 0.5507, "step": 3238 }, { - "epoch": 0.45332400279916024, - "grad_norm": 0.41083909886481995, - "learning_rate": 2.997586873199344e-05, - "loss": 0.5411, + "epoch": 0.9065211307024909, + "grad_norm": 0.21703062964799782, + "learning_rate": 8.361061638589109e-05, + "loss": 0.5511, "step": 3239 }, { - "epoch": 0.4534639608117565, - "grad_norm": 0.3979787826364129, - "learning_rate": 2.996476166614364e-05, - "loss": 0.526, + "epoch": 0.906801007556675, + "grad_norm": 0.21897725160879003, + "learning_rate": 8.359919425935275e-05, + "loss": 0.5498, "step": 3240 }, { - "epoch": 0.4536039188243527, - "grad_norm": 0.39694745383011404, - "learning_rate": 2.9953653579984942e-05, - "loss": 0.5523, + "epoch": 0.9070808844108592, + "grad_norm": 0.22635000840894065, + "learning_rate": 8.35877689348243e-05, + "loss": 0.5341, "step": 3241 }, { - "epoch": 0.4537438768369489, - "grad_norm": 0.3902000936154921, - "learning_rate": 2.9942544475800172e-05, - "loss": 0.5502, + "epoch": 0.9073607612650434, + "grad_norm": 0.21723297443205186, + "learning_rate": 8.35763404133932e-05, + "loss": 0.5077, "step": 3242 }, { - "epoch": 0.45388383484954514, - "grad_norm": 0.42203355743945187, - "learning_rate": 2.9931434355872367e-05, - "loss": 0.5609, + "epoch": 0.9076406381192276, + "grad_norm": 0.22043675691302084, + "learning_rate": 8.356490869614722e-05, + "loss": 0.5272, "step": 3243 }, { - "epoch": 0.45402379286214134, - "grad_norm": 0.4454869866225551, - "learning_rate": 2.992032322248476e-05, - "loss": 0.6, + "epoch": 0.9079205149734118, + "grad_norm": 0.21856138350677917, + "learning_rate": 8.355347378417444e-05, + "loss": 0.523, "step": 3244 }, { - "epoch": 0.4541637508747376, - "grad_norm": 0.405554964175632, - "learning_rate": 2.9909211077920805e-05, - "loss": 0.572, + "epoch": 0.9082003918275958, + "grad_norm": 0.21218178468442178, + "learning_rate": 8.354203567856325e-05, + "loss": 0.5375, "step": 3245 }, { - "epoch": 0.4543037088873338, - "grad_norm": 0.41415547431542904, - "learning_rate": 2.989809792446417e-05, - "loss": 0.5578, + "epoch": 0.90848026868178, + "grad_norm": 0.2034089891436105, + "learning_rate": 8.353059438040234e-05, + "loss": 0.5424, "step": 3246 }, { - "epoch": 0.45444366689993004, - "grad_norm": 0.41647413135178146, - "learning_rate": 2.9886983764398707e-05, - "loss": 0.5691, + "epoch": 0.9087601455359642, + "grad_norm": 0.21769263669876449, + "learning_rate": 8.351914989078067e-05, + "loss": 0.5218, "step": 3247 }, { - "epoch": 0.45458362491252624, - "grad_norm": 0.3958733230625823, - "learning_rate": 2.9875868600008496e-05, - "loss": 0.5708, + "epoch": 0.9090400223901484, + "grad_norm": 0.22274418924076697, + "learning_rate": 8.350770221078756e-05, + "loss": 0.5144, "step": 3248 }, { - "epoch": 0.45472358292512244, - "grad_norm": 0.39688745966500016, - "learning_rate": 2.986475243357782e-05, - "loss": 0.5497, + "epoch": 0.9093198992443325, + "grad_norm": 0.22355166839093885, + "learning_rate": 8.34962513415126e-05, + "loss": 0.53, "step": 3249 }, { - "epoch": 0.4548635409377187, - "grad_norm": 0.3859490510740407, - "learning_rate": 2.985363526739115e-05, - "loss": 0.5227, + "epoch": 0.9095997760985166, + "grad_norm": 0.2112130469842736, + "learning_rate": 8.348479728404568e-05, + "loss": 0.5327, "step": 3250 }, { - "epoch": 0.4550034989503149, - "grad_norm": 0.3934619917655379, - "learning_rate": 2.9842517103733192e-05, - "loss": 0.5662, + "epoch": 0.9098796529527008, + "grad_norm": 0.2184856519406378, + "learning_rate": 8.347334003947702e-05, + "loss": 0.5393, "step": 3251 }, { - "epoch": 0.45514345696291114, - "grad_norm": 0.5131902290403141, - "learning_rate": 2.9831397944888833e-05, - "loss": 0.5677, + "epoch": 0.910159529806885, + "grad_norm": 0.23537860474721548, + "learning_rate": 8.346187960889713e-05, + "loss": 0.5296, "step": 3252 }, { - "epoch": 0.45528341497550734, - "grad_norm": 0.42208973839755937, - "learning_rate": 2.9820277793143177e-05, - "loss": 0.5347, + "epoch": 0.9104394066610692, + "grad_norm": 0.2251462655166089, + "learning_rate": 8.345041599339679e-05, + "loss": 0.5313, "step": 3253 }, { - "epoch": 0.4554233729881036, - "grad_norm": 0.4047958403557277, - "learning_rate": 2.9809156650781528e-05, - "loss": 0.5533, + "epoch": 0.9107192835152533, + "grad_norm": 0.21360222223163164, + "learning_rate": 8.343894919406715e-05, + "loss": 0.5324, "step": 3254 }, { - "epoch": 0.4555633310006998, - "grad_norm": 0.41534042335678106, - "learning_rate": 2.9798034520089385e-05, - "loss": 0.6129, + "epoch": 0.9109991603694374, + "grad_norm": 0.21448874261285444, + "learning_rate": 8.34274792119996e-05, + "loss": 0.5341, "step": 3255 }, { - "epoch": 0.455703289013296, - "grad_norm": 0.405760676923056, - "learning_rate": 2.978691140335247e-05, - "loss": 0.575, + "epoch": 0.9112790372236216, + "grad_norm": 0.22939842431716173, + "learning_rate": 8.341600604828587e-05, + "loss": 0.5194, "step": 3256 }, { - "epoch": 0.45584324702589224, - "grad_norm": 0.4049361175064025, - "learning_rate": 2.9775787302856683e-05, - "loss": 0.5287, + "epoch": 0.9115589140778058, + "grad_norm": 0.22299870746661596, + "learning_rate": 8.340452970401797e-05, + "loss": 0.525, "step": 3257 }, { - "epoch": 0.45598320503848844, - "grad_norm": 0.4212785218180886, - "learning_rate": 2.9764662220888157e-05, - "loss": 0.5649, + "epoch": 0.9118387909319899, + "grad_norm": 0.2153913886879192, + "learning_rate": 8.339305018028825e-05, + "loss": 0.5038, "step": 3258 }, { - "epoch": 0.4561231630510847, - "grad_norm": 0.4014033439696915, - "learning_rate": 2.9753536159733196e-05, - "loss": 0.5457, + "epoch": 0.9121186677861741, + "grad_norm": 0.22649404750533841, + "learning_rate": 8.338156747818932e-05, + "loss": 0.5396, "step": 3259 }, { - "epoch": 0.4562631210636809, - "grad_norm": 0.38748328739856236, - "learning_rate": 2.974240912167833e-05, - "loss": 0.5409, + "epoch": 0.9123985446403582, + "grad_norm": 0.22601307943292215, + "learning_rate": 8.33700815988141e-05, + "loss": 0.55, "step": 3260 }, { - "epoch": 0.45640307907627714, - "grad_norm": 0.40938166793163544, - "learning_rate": 2.9731281109010256e-05, - "loss": 0.6002, + "epoch": 0.9126784214945424, + "grad_norm": 0.2249755063093647, + "learning_rate": 8.335859254325586e-05, + "loss": 0.5343, "step": 3261 }, { - "epoch": 0.45654303708887334, - "grad_norm": 0.40068504604922145, - "learning_rate": 2.9720152124015916e-05, - "loss": 0.5767, + "epoch": 0.9129582983487265, + "grad_norm": 0.2187030142320576, + "learning_rate": 8.334710031260809e-05, + "loss": 0.5097, "step": 3262 }, { - "epoch": 0.45668299510146954, - "grad_norm": 0.39286585333437835, - "learning_rate": 2.9709022168982426e-05, - "loss": 0.5823, + "epoch": 0.9132381752029107, + "grad_norm": 0.21936405476969345, + "learning_rate": 8.333560490796466e-05, + "loss": 0.5478, "step": 3263 }, { - "epoch": 0.4568229531140658, - "grad_norm": 0.39448349470052235, - "learning_rate": 2.9697891246197097e-05, - "loss": 0.5702, + "epoch": 0.9135180520570949, + "grad_norm": 0.2296443547918883, + "learning_rate": 8.332410633041969e-05, + "loss": 0.5337, "step": 3264 }, { - "epoch": 0.456962911126662, - "grad_norm": 0.42230524457915075, - "learning_rate": 2.9686759357947446e-05, - "loss": 0.5695, + "epoch": 0.9137979289112791, + "grad_norm": 0.2235279234121592, + "learning_rate": 8.331260458106763e-05, + "loss": 0.5303, "step": 3265 }, { - "epoch": 0.45710286913925824, - "grad_norm": 0.3829307935804848, - "learning_rate": 2.9675626506521205e-05, - "loss": 0.5486, + "epoch": 0.9140778057654632, + "grad_norm": 0.20620001354942905, + "learning_rate": 8.330109966100321e-05, + "loss": 0.5382, "step": 3266 }, { - "epoch": 0.45724282715185444, - "grad_norm": 0.4002606373918937, - "learning_rate": 2.966449269420627e-05, - "loss": 0.5953, + "epoch": 0.9143576826196473, + "grad_norm": 0.24224294750708605, + "learning_rate": 8.328959157132149e-05, + "loss": 0.5307, "step": 3267 }, { - "epoch": 0.4573827851644507, - "grad_norm": 0.4081380916674823, - "learning_rate": 2.9653357923290753e-05, - "loss": 0.5605, + "epoch": 0.9146375594738315, + "grad_norm": 0.22520339984118107, + "learning_rate": 8.327808031311781e-05, + "loss": 0.5374, "step": 3268 }, { - "epoch": 0.4575227431770469, - "grad_norm": 0.4173988076758946, - "learning_rate": 2.9642222196062973e-05, - "loss": 0.5941, + "epoch": 0.9149174363280157, + "grad_norm": 0.21908728497768143, + "learning_rate": 8.326656588748783e-05, + "loss": 0.511, "step": 3269 }, { - "epoch": 0.4576627011896431, - "grad_norm": 0.41804559043749046, - "learning_rate": 2.9631085514811423e-05, - "loss": 0.5979, + "epoch": 0.9151973131821999, + "grad_norm": 0.22056425570357524, + "learning_rate": 8.325504829552748e-05, + "loss": 0.538, "step": 3270 }, { - "epoch": 0.45780265920223934, - "grad_norm": 0.3906986098038151, - "learning_rate": 2.9619947881824818e-05, - "loss": 0.5567, + "epoch": 0.915477190036384, + "grad_norm": 0.2169779119488914, + "learning_rate": 8.3243527538333e-05, + "loss": 0.5476, "step": 3271 }, { - "epoch": 0.45794261721483553, - "grad_norm": 0.4110120727152376, - "learning_rate": 2.9608809299392043e-05, - "loss": 0.5732, + "epoch": 0.9157570668905681, + "grad_norm": 0.22693785290832416, + "learning_rate": 8.323200361700099e-05, + "loss": 0.5532, "step": 3272 }, { - "epoch": 0.4580825752274318, - "grad_norm": 0.40920966877207876, - "learning_rate": 2.9597669769802196e-05, - "loss": 0.5673, + "epoch": 0.9160369437447523, + "grad_norm": 0.21859137257035782, + "learning_rate": 8.322047653262828e-05, + "loss": 0.5396, "step": 3273 }, { - "epoch": 0.458222533240028, - "grad_norm": 0.3959304691526394, - "learning_rate": 2.958652929534456e-05, - "loss": 0.5719, + "epoch": 0.9163168205989365, + "grad_norm": 0.22368091938009044, + "learning_rate": 8.3208946286312e-05, + "loss": 0.5238, "step": 3274 }, { - "epoch": 0.45836249125262424, - "grad_norm": 0.3844580981772225, - "learning_rate": 2.9575387878308615e-05, - "loss": 0.5441, + "epoch": 0.9165966974531207, + "grad_norm": 0.2234209802865883, + "learning_rate": 8.319741287914963e-05, + "loss": 0.5248, "step": 3275 }, { - "epoch": 0.45850244926522044, - "grad_norm": 0.4425235109241173, - "learning_rate": 2.9564245520984047e-05, - "loss": 0.5896, + "epoch": 0.9168765743073047, + "grad_norm": 0.22535030226742628, + "learning_rate": 8.318587631223893e-05, + "loss": 0.5239, "step": 3276 }, { - "epoch": 0.45864240727781663, - "grad_norm": 0.4206583711187626, - "learning_rate": 2.9553102225660706e-05, - "loss": 0.5718, + "epoch": 0.9171564511614889, + "grad_norm": 0.21695281932030427, + "learning_rate": 8.317433658667792e-05, + "loss": 0.5468, "step": 3277 }, { - "epoch": 0.4587823652904129, - "grad_norm": 0.43363119264281536, - "learning_rate": 2.954195799462866e-05, - "loss": 0.5584, + "epoch": 0.9174363280156731, + "grad_norm": 0.22308396551305662, + "learning_rate": 8.316279370356502e-05, + "loss": 0.5379, "step": 3278 }, { - "epoch": 0.4589223233030091, - "grad_norm": 0.40918971785473857, - "learning_rate": 2.9530812830178162e-05, - "loss": 0.5386, + "epoch": 0.9177162048698573, + "grad_norm": 0.20666742442512492, + "learning_rate": 8.315124766399884e-05, + "loss": 0.5089, "step": 3279 }, { - "epoch": 0.45906228131560534, - "grad_norm": 0.4195378947004359, - "learning_rate": 2.951966673459965e-05, - "loss": 0.5862, + "epoch": 0.9179960817240415, + "grad_norm": 0.21970520284983006, + "learning_rate": 8.313969846907834e-05, + "loss": 0.5111, "step": 3280 }, { - "epoch": 0.45920223932820153, - "grad_norm": 0.3984648051790019, - "learning_rate": 2.9508519710183772e-05, - "loss": 0.5745, + "epoch": 0.9182759585782255, + "grad_norm": 0.21344841959442235, + "learning_rate": 8.31281461199028e-05, + "loss": 0.4997, "step": 3281 }, { - "epoch": 0.4593421973407978, - "grad_norm": 0.42412243772475006, - "learning_rate": 2.9497371759221347e-05, - "loss": 0.5765, + "epoch": 0.9185558354324097, + "grad_norm": 0.21641038559178405, + "learning_rate": 8.311659061757177e-05, + "loss": 0.5144, "step": 3282 }, { - "epoch": 0.459482155353394, - "grad_norm": 0.4155137965851478, - "learning_rate": 2.9486222884003393e-05, - "loss": 0.5742, + "epoch": 0.9188357122865939, + "grad_norm": 0.22139600923036631, + "learning_rate": 8.310503196318511e-05, + "loss": 0.533, "step": 3283 }, { - "epoch": 0.4596221133659902, - "grad_norm": 0.3929372634486545, - "learning_rate": 2.9475073086821115e-05, - "loss": 0.5467, + "epoch": 0.9191155891407781, + "grad_norm": 0.2184096652559976, + "learning_rate": 8.309347015784298e-05, + "loss": 0.4991, "step": 3284 }, { - "epoch": 0.45976207137858643, - "grad_norm": 0.4188647769421151, - "learning_rate": 2.9463922369965917e-05, - "loss": 0.6122, + "epoch": 0.9193954659949622, + "grad_norm": 0.22664252510152488, + "learning_rate": 8.308190520264583e-05, + "loss": 0.5185, "step": 3285 }, { - "epoch": 0.45990202939118263, - "grad_norm": 0.39936115798519434, - "learning_rate": 2.9452770735729374e-05, - "loss": 0.5592, + "epoch": 0.9196753428491464, + "grad_norm": 0.23589148483698946, + "learning_rate": 8.307033709869443e-05, + "loss": 0.5313, "step": 3286 }, { - "epoch": 0.4600419874037789, - "grad_norm": 0.41793644764176796, - "learning_rate": 2.944161818640327e-05, - "loss": 0.5679, + "epoch": 0.9199552197033305, + "grad_norm": 0.23207782520666115, + "learning_rate": 8.305876584708982e-05, + "loss": 0.5357, "step": 3287 }, { - "epoch": 0.4601819454163751, - "grad_norm": 0.4053297174029381, - "learning_rate": 2.943046472427956e-05, - "loss": 0.5731, + "epoch": 0.9202350965575147, + "grad_norm": 0.23192084926906445, + "learning_rate": 8.30471914489334e-05, + "loss": 0.5614, "step": 3288 }, { - "epoch": 0.46032190342897134, - "grad_norm": 0.4026483342781206, - "learning_rate": 2.9419310351650392e-05, - "loss": 0.5884, + "epoch": 0.9205149734116989, + "grad_norm": 0.21986567938578405, + "learning_rate": 8.30356139053268e-05, + "loss": 0.532, "step": 3289 }, { - "epoch": 0.46046186144156753, - "grad_norm": 0.4014937430945868, - "learning_rate": 2.9408155070808113e-05, - "loss": 0.5753, + "epoch": 0.920794850265883, + "grad_norm": 0.213230683261702, + "learning_rate": 8.302403321737195e-05, + "loss": 0.526, "step": 3290 }, { - "epoch": 0.46060181945416373, - "grad_norm": 0.4168370671409344, - "learning_rate": 2.9396998884045235e-05, - "loss": 0.563, + "epoch": 0.9210747271200672, + "grad_norm": 0.22119328715545494, + "learning_rate": 8.301244938617116e-05, + "loss": 0.537, "step": 3291 }, { - "epoch": 0.46074177746676, - "grad_norm": 0.4015569692729826, - "learning_rate": 2.9385841793654474e-05, - "loss": 0.603, + "epoch": 0.9213546039742513, + "grad_norm": 0.21757587067757342, + "learning_rate": 8.300086241282694e-05, + "loss": 0.5421, "step": 3292 }, { - "epoch": 0.4608817354793562, - "grad_norm": 0.3918827502679464, - "learning_rate": 2.937468380192872e-05, - "loss": 0.5587, + "epoch": 0.9216344808284355, + "grad_norm": 0.2146811201840199, + "learning_rate": 8.298927229844218e-05, + "loss": 0.5029, "step": 3293 }, { - "epoch": 0.46102169349195243, - "grad_norm": 0.4129995494719521, - "learning_rate": 2.936352491116106e-05, - "loss": 0.5554, + "epoch": 0.9219143576826196, + "grad_norm": 0.22371432424042492, + "learning_rate": 8.297767904412002e-05, + "loss": 0.5317, "step": 3294 }, { - "epoch": 0.46116165150454863, - "grad_norm": 0.4281727152857462, - "learning_rate": 2.9352365123644755e-05, - "loss": 0.5632, + "epoch": 0.9221942345368038, + "grad_norm": 0.20763289845884048, + "learning_rate": 8.296608265096388e-05, + "loss": 0.5373, "step": 3295 }, { - "epoch": 0.4613016095171449, - "grad_norm": 0.4078100385278002, - "learning_rate": 2.9341204441673266e-05, - "loss": 0.5717, + "epoch": 0.922474111390988, + "grad_norm": 0.21430336874827588, + "learning_rate": 8.295448312007756e-05, + "loss": 0.5215, "step": 3296 }, { - "epoch": 0.4614415675297411, - "grad_norm": 0.4670973011247201, - "learning_rate": 2.93300428675402e-05, - "loss": 0.5701, + "epoch": 0.9227539882451721, + "grad_norm": 0.21709980732777526, + "learning_rate": 8.294288045256511e-05, + "loss": 0.5519, "step": 3297 }, { - "epoch": 0.4615815255423373, - "grad_norm": 0.4149076463346234, - "learning_rate": 2.93188804035394e-05, - "loss": 0.5477, + "epoch": 0.9230338650993563, + "grad_norm": 0.2103398948722985, + "learning_rate": 8.293127464953083e-05, + "loss": 0.5196, "step": 3298 }, { - "epoch": 0.46172148355493353, - "grad_norm": 0.41094417493173746, - "learning_rate": 2.9307717051964862e-05, - "loss": 0.5499, + "epoch": 0.9233137419535404, + "grad_norm": 0.2183885312635107, + "learning_rate": 8.291966571207943e-05, + "loss": 0.5359, "step": 3299 }, { - "epoch": 0.46186144156752973, - "grad_norm": 0.40933405766552283, - "learning_rate": 2.929655281511075e-05, - "loss": 0.5389, + "epoch": 0.9235936188077246, + "grad_norm": 0.22417649082857644, + "learning_rate": 8.29080536413158e-05, + "loss": 0.5508, "step": 3300 }, { - "epoch": 0.462001399580126, - "grad_norm": 0.4235858324194054, - "learning_rate": 2.9285387695271444e-05, - "loss": 0.5568, + "epoch": 0.9238734956619088, + "grad_norm": 0.2242180021228506, + "learning_rate": 8.28964384383452e-05, + "loss": 0.5165, "step": 3301 }, { - "epoch": 0.4621413575927222, - "grad_norm": 0.3997529823349059, - "learning_rate": 2.9274221694741484e-05, - "loss": 0.5384, + "epoch": 0.9241533725160929, + "grad_norm": 0.22026698067633488, + "learning_rate": 8.288482010427319e-05, + "loss": 0.5367, "step": 3302 }, { - "epoch": 0.4622813156053184, - "grad_norm": 0.42004597023741636, - "learning_rate": 2.92630548158156e-05, - "loss": 0.5613, + "epoch": 0.924433249370277, + "grad_norm": 0.22307198854070948, + "learning_rate": 8.287319864020558e-05, + "loss": 0.5269, "step": 3303 }, { - "epoch": 0.46242127361791463, - "grad_norm": 0.4076618497301079, - "learning_rate": 2.925188706078869e-05, - "loss": 0.533, + "epoch": 0.9247131262244612, + "grad_norm": 0.2161676425004044, + "learning_rate": 8.286157404724853e-05, + "loss": 0.5049, "step": 3304 }, { - "epoch": 0.4625612316305108, - "grad_norm": 0.4131076996508653, - "learning_rate": 2.9240718431955855e-05, - "loss": 0.5809, + "epoch": 0.9249930030786454, + "grad_norm": 0.2178264056156661, + "learning_rate": 8.284994632650847e-05, + "loss": 0.5336, "step": 3305 }, { - "epoch": 0.4627011896431071, - "grad_norm": 0.4246693787392879, - "learning_rate": 2.9229548931612348e-05, - "loss": 0.553, + "epoch": 0.9252728799328296, + "grad_norm": 0.2220693587557796, + "learning_rate": 8.283831547909213e-05, + "loss": 0.5383, "step": 3306 }, { - "epoch": 0.4628411476557033, - "grad_norm": 0.4081853198228349, - "learning_rate": 2.9218378562053623e-05, - "loss": 0.5269, + "epoch": 0.9255527567870138, + "grad_norm": 0.22821604070015108, + "learning_rate": 8.282668150610655e-05, + "loss": 0.5213, "step": 3307 }, { - "epoch": 0.46298110566829953, - "grad_norm": 0.4185513766982846, - "learning_rate": 2.9207207325575304e-05, - "loss": 0.5391, + "epoch": 0.9258326336411978, + "grad_norm": 0.20891709009526233, + "learning_rate": 8.281504440865905e-05, + "loss": 0.553, "step": 3308 }, { - "epoch": 0.46312106368089573, - "grad_norm": 0.40971729273974855, - "learning_rate": 2.9196035224473196e-05, - "loss": 0.5562, + "epoch": 0.926112510495382, + "grad_norm": 0.2522342969506194, + "learning_rate": 8.280340418785726e-05, + "loss": 0.5359, "step": 3309 }, { - "epoch": 0.4632610216934919, - "grad_norm": 0.39899049100540745, - "learning_rate": 2.918486226104327e-05, - "loss": 0.5574, + "epoch": 0.9263923873495662, + "grad_norm": 0.21913336319177137, + "learning_rate": 8.279176084480909e-05, + "loss": 0.5231, "step": 3310 }, { - "epoch": 0.4634009797060882, - "grad_norm": 0.4210144546249132, - "learning_rate": 2.917368843758168e-05, - "loss": 0.5593, + "epoch": 0.9266722642037504, + "grad_norm": 0.22496493748652746, + "learning_rate": 8.278011438062276e-05, + "loss": 0.5362, "step": 3311 }, { - "epoch": 0.4635409377186844, - "grad_norm": 0.43313267085828555, - "learning_rate": 2.916251375638478e-05, - "loss": 0.5827, + "epoch": 0.9269521410579346, + "grad_norm": 0.2135751325818056, + "learning_rate": 8.27684647964068e-05, + "loss": 0.511, "step": 3312 }, { - "epoch": 0.46368089573128063, - "grad_norm": 0.4159179322901382, - "learning_rate": 2.9151338219749065e-05, - "loss": 0.555, + "epoch": 0.9272320179121186, + "grad_norm": 0.22436336250045402, + "learning_rate": 8.275681209327002e-05, + "loss": 0.5526, "step": 3313 }, { - "epoch": 0.4638208537438768, - "grad_norm": 0.4383620819549179, - "learning_rate": 2.9140161829971223e-05, - "loss": 0.5638, + "epoch": 0.9275118947663028, + "grad_norm": 0.2999163446230905, + "learning_rate": 8.274515627232153e-05, + "loss": 0.5456, "step": 3314 }, { - "epoch": 0.4639608117564731, - "grad_norm": 0.4176966506673776, - "learning_rate": 2.9128984589348114e-05, - "loss": 0.5802, + "epoch": 0.927791771620487, + "grad_norm": 0.2322438897760758, + "learning_rate": 8.273349733467076e-05, + "loss": 0.5244, "step": 3315 }, { - "epoch": 0.4641007697690693, - "grad_norm": 0.3985420143299249, - "learning_rate": 2.9117806500176774e-05, - "loss": 0.5614, + "epoch": 0.9280716484746712, + "grad_norm": 0.22875002476722534, + "learning_rate": 8.272183528142737e-05, + "loss": 0.5394, "step": 3316 }, { - "epoch": 0.4642407277816655, - "grad_norm": 0.40635759034414026, - "learning_rate": 2.910662756475443e-05, - "loss": 0.5502, + "epoch": 0.9283515253288553, + "grad_norm": 0.23141460413703294, + "learning_rate": 8.271017011370136e-05, + "loss": 0.525, "step": 3317 }, { - "epoch": 0.4643806857942617, - "grad_norm": 0.3920809381768575, - "learning_rate": 2.9095447785378443e-05, - "loss": 0.5187, + "epoch": 0.9286314021830394, + "grad_norm": 0.23110037584971857, + "learning_rate": 8.269850183260309e-05, + "loss": 0.5216, "step": 3318 }, { - "epoch": 0.4645206438068579, - "grad_norm": 0.4065824247557009, - "learning_rate": 2.9084267164346386e-05, - "loss": 0.5285, + "epoch": 0.9289112790372236, + "grad_norm": 0.24909584556341074, + "learning_rate": 8.26868304392431e-05, + "loss": 0.5143, "step": 3319 }, { - "epoch": 0.4646606018194542, - "grad_norm": 0.47569552376008784, - "learning_rate": 2.9073085703955987e-05, - "loss": 0.5592, + "epoch": 0.9291911558914078, + "grad_norm": 0.22769543995760647, + "learning_rate": 8.26751559347323e-05, + "loss": 0.5303, "step": 3320 }, { - "epoch": 0.4648005598320504, - "grad_norm": 0.41348964126371485, - "learning_rate": 2.9061903406505154e-05, - "loss": 0.573, + "epoch": 0.929471032745592, + "grad_norm": 0.2190293330582465, + "learning_rate": 8.266347832018185e-05, + "loss": 0.5116, "step": 3321 }, { - "epoch": 0.46494051784464663, - "grad_norm": 0.3990216785800373, - "learning_rate": 2.9050720274291943e-05, - "loss": 0.5686, + "epoch": 0.9297509095997761, + "grad_norm": 0.2154438364528873, + "learning_rate": 8.265179759670326e-05, + "loss": 0.525, "step": 3322 }, { - "epoch": 0.4650804758572428, - "grad_norm": 0.3834656155781017, - "learning_rate": 2.903953630961463e-05, - "loss": 0.5297, + "epoch": 0.9300307864539603, + "grad_norm": 0.21866564123388582, + "learning_rate": 8.26401137654083e-05, + "loss": 0.5515, "step": 3323 }, { - "epoch": 0.465220433869839, - "grad_norm": 0.4046993834562908, - "learning_rate": 2.9028351514771606e-05, - "loss": 0.5515, + "epoch": 0.9303106633081444, + "grad_norm": 0.22965351157119618, + "learning_rate": 8.262842682740905e-05, + "loss": 0.5275, "step": 3324 }, { - "epoch": 0.4653603918824353, - "grad_norm": 0.43593159937116366, - "learning_rate": 2.901716589206147e-05, - "loss": 0.5706, + "epoch": 0.9305905401623286, + "grad_norm": 0.23206410004529923, + "learning_rate": 8.261673678381786e-05, + "loss": 0.5497, "step": 3325 }, { - "epoch": 0.4655003498950315, - "grad_norm": 0.41907649826033294, - "learning_rate": 2.9005979443782993e-05, - "loss": 0.5738, + "epoch": 0.9308704170165127, + "grad_norm": 0.22020100188994954, + "learning_rate": 8.260504363574741e-05, + "loss": 0.5329, "step": 3326 }, { - "epoch": 0.4656403079076277, - "grad_norm": 0.39650396383746195, - "learning_rate": 2.899479217223509e-05, - "loss": 0.5678, + "epoch": 0.9311502938706969, + "grad_norm": 0.2280782425820405, + "learning_rate": 8.259334738431066e-05, + "loss": 0.5187, "step": 3327 }, { - "epoch": 0.4657802659202239, - "grad_norm": 0.41237846145466506, - "learning_rate": 2.898360407971687e-05, - "loss": 0.5743, + "epoch": 0.9314301707248811, + "grad_norm": 0.21920488734262566, + "learning_rate": 8.258164803062088e-05, + "loss": 0.5287, "step": 3328 }, { - "epoch": 0.4659202239328202, - "grad_norm": 0.39685046167702703, - "learning_rate": 2.8972415168527584e-05, - "loss": 0.5439, + "epoch": 0.9317100475790652, + "grad_norm": 0.22547720610684008, + "learning_rate": 8.256994557579156e-05, + "loss": 0.5385, "step": 3329 }, { - "epoch": 0.4660601819454164, - "grad_norm": 0.4050270937895372, - "learning_rate": 2.896122544096668e-05, - "loss": 0.5541, + "epoch": 0.9319899244332494, + "grad_norm": 0.22394556448498107, + "learning_rate": 8.255824002093662e-05, + "loss": 0.53, "step": 3330 }, { - "epoch": 0.46620013995801257, - "grad_norm": 0.40567639713038367, - "learning_rate": 2.895003489933375e-05, - "loss": 0.5712, + "epoch": 0.9322698012874335, + "grad_norm": 0.22206432199514495, + "learning_rate": 8.254653136717016e-05, + "loss": 0.537, "step": 3331 }, { - "epoch": 0.4663400979706088, - "grad_norm": 0.4174104058437715, - "learning_rate": 2.8938843545928573e-05, - "loss": 0.5634, + "epoch": 0.9325496781416177, + "grad_norm": 0.21538828155939793, + "learning_rate": 8.253481961560665e-05, + "loss": 0.5571, "step": 3332 }, { - "epoch": 0.466480055983205, - "grad_norm": 0.4174890988710686, - "learning_rate": 2.892765138305108e-05, - "loss": 0.5633, + "epoch": 0.9328295549958019, + "grad_norm": 0.2206053910702642, + "learning_rate": 8.252310476736077e-05, + "loss": 0.5253, "step": 3333 }, { - "epoch": 0.4666200139958013, - "grad_norm": 0.4085166170083092, - "learning_rate": 2.8916458413001375e-05, - "loss": 0.5661, + "epoch": 0.933109431849986, + "grad_norm": 0.23229006057926363, + "learning_rate": 8.251138682354758e-05, + "loss": 0.533, "step": 3334 }, { - "epoch": 0.4667599720083975, - "grad_norm": 0.416230735105726, - "learning_rate": 2.8905264638079732e-05, - "loss": 0.5736, + "epoch": 0.9333893087041701, + "grad_norm": 0.2354313195974534, + "learning_rate": 8.249966578528241e-05, + "loss": 0.5326, "step": 3335 }, { - "epoch": 0.4668999300209937, - "grad_norm": 0.3905749725084316, - "learning_rate": 2.8894070060586576e-05, - "loss": 0.5483, + "epoch": 0.9336691855583543, + "grad_norm": 0.23758558910894242, + "learning_rate": 8.248794165368085e-05, + "loss": 0.5222, "step": 3336 }, { - "epoch": 0.4670398880335899, - "grad_norm": 0.40890374703665905, - "learning_rate": 2.888287468282252e-05, - "loss": 0.5615, + "epoch": 0.9339490624125385, + "grad_norm": 0.21839564498117695, + "learning_rate": 8.247621442985883e-05, + "loss": 0.5472, "step": 3337 }, { - "epoch": 0.4671798460461861, - "grad_norm": 0.41635405487950694, - "learning_rate": 2.8871678507088312e-05, - "loss": 0.5467, + "epoch": 0.9342289392667227, + "grad_norm": 0.21572044108209534, + "learning_rate": 8.246448411493252e-05, + "loss": 0.5424, "step": 3338 }, { - "epoch": 0.4673198040587824, - "grad_norm": 0.4336846869668269, - "learning_rate": 2.886048153568489e-05, - "loss": 0.5511, + "epoch": 0.9345088161209067, + "grad_norm": 0.22118750044316762, + "learning_rate": 8.245275071001846e-05, + "loss": 0.5251, "step": 3339 }, { - "epoch": 0.46745976207137857, - "grad_norm": 0.38470189006529587, - "learning_rate": 2.8849283770913337e-05, - "loss": 0.5488, + "epoch": 0.9347886929750909, + "grad_norm": 0.21007158618082628, + "learning_rate": 8.244101421623341e-05, + "loss": 0.5389, "step": 3340 }, { - "epoch": 0.4675997200839748, - "grad_norm": 0.43922504036128623, - "learning_rate": 2.8838085215074923e-05, - "loss": 0.592, + "epoch": 0.9350685698292751, + "grad_norm": 0.21516354443097485, + "learning_rate": 8.242927463469448e-05, + "loss": 0.4985, "step": 3341 }, { - "epoch": 0.467739678096571, - "grad_norm": 0.4046341572147428, - "learning_rate": 2.8826885870471043e-05, - "loss": 0.5618, + "epoch": 0.9353484466834593, + "grad_norm": 0.21503581987178988, + "learning_rate": 8.241753196651902e-05, + "loss": 0.5335, "step": 3342 }, { - "epoch": 0.4678796361091673, - "grad_norm": 0.38781336644907494, - "learning_rate": 2.8815685739403298e-05, - "loss": 0.556, + "epoch": 0.9356283235376435, + "grad_norm": 0.2214884310011914, + "learning_rate": 8.240578621282474e-05, + "loss": 0.54, "step": 3343 }, { - "epoch": 0.46801959412176347, - "grad_norm": 0.4120764675677611, - "learning_rate": 2.8804484824173417e-05, - "loss": 0.5407, + "epoch": 0.9359082003918276, + "grad_norm": 0.22379935880219792, + "learning_rate": 8.239403737472958e-05, + "loss": 0.5275, "step": 3344 }, { - "epoch": 0.46815955213435967, - "grad_norm": 0.4309704301588916, - "learning_rate": 2.8793283127083292e-05, - "loss": 0.5491, + "epoch": 0.9361880772460117, + "grad_norm": 0.22171699044684767, + "learning_rate": 8.238228545335183e-05, + "loss": 0.521, "step": 3345 }, { - "epoch": 0.4682995101469559, - "grad_norm": 0.3937722091804054, - "learning_rate": 2.8782080650435006e-05, - "loss": 0.5799, + "epoch": 0.9364679541001959, + "grad_norm": 0.21222872526636102, + "learning_rate": 8.237053044981001e-05, + "loss": 0.5022, "step": 3346 }, { - "epoch": 0.4684394681595521, - "grad_norm": 0.39196249510935055, - "learning_rate": 2.8770877396530766e-05, - "loss": 0.5386, + "epoch": 0.9367478309543801, + "grad_norm": 0.22115768080723117, + "learning_rate": 8.235877236522298e-05, + "loss": 0.5197, "step": 3347 }, { - "epoch": 0.46857942617214837, - "grad_norm": 0.410878891101837, - "learning_rate": 2.875967336767296e-05, - "loss": 0.603, + "epoch": 0.9370277078085643, + "grad_norm": 0.22310515684936527, + "learning_rate": 8.234701120070989e-05, + "loss": 0.546, "step": 3348 }, { - "epoch": 0.46871938418474457, - "grad_norm": 0.4020037749774897, - "learning_rate": 2.8748468566164134e-05, - "loss": 0.5499, + "epoch": 0.9373075846627484, + "grad_norm": 0.23072090710835966, + "learning_rate": 8.233524695739017e-05, + "loss": 0.5553, "step": 3349 }, { - "epoch": 0.4688593421973408, - "grad_norm": 0.45651213620060477, - "learning_rate": 2.8737262994306985e-05, - "loss": 0.6113, + "epoch": 0.9375874615169325, + "grad_norm": 0.22580325936311144, + "learning_rate": 8.232347963638354e-05, + "loss": 0.5366, "step": 3350 }, { - "epoch": 0.468999300209937, - "grad_norm": 0.4187097393109256, - "learning_rate": 2.872605665440436e-05, - "loss": 0.548, + "epoch": 0.9378673383711167, + "grad_norm": 0.2168037341491656, + "learning_rate": 8.231170923881005e-05, + "loss": 0.5305, "step": 3351 }, { - "epoch": 0.4691392582225332, - "grad_norm": 0.4096624177728241, - "learning_rate": 2.8714849548759293e-05, - "loss": 0.5629, + "epoch": 0.9381472152253009, + "grad_norm": 0.21691770135249946, + "learning_rate": 8.229993576578995e-05, + "loss": 0.5061, "step": 3352 }, { - "epoch": 0.46927921623512947, - "grad_norm": 0.4106487034595635, - "learning_rate": 2.8703641679674954e-05, - "loss": 0.5743, + "epoch": 0.938427092079485, + "grad_norm": 0.21486304767912126, + "learning_rate": 8.22881592184439e-05, + "loss": 0.5179, "step": 3353 }, { - "epoch": 0.46941917424772567, - "grad_norm": 0.4160938815791059, - "learning_rate": 2.869243304945467e-05, - "loss": 0.5366, + "epoch": 0.9387069689336692, + "grad_norm": 0.210964042393187, + "learning_rate": 8.227637959789279e-05, + "loss": 0.5191, "step": 3354 }, { - "epoch": 0.4695591322603219, - "grad_norm": 0.401353123051922, - "learning_rate": 2.8681223660401935e-05, - "loss": 0.5601, + "epoch": 0.9389868457878533, + "grad_norm": 0.21109165296573837, + "learning_rate": 8.22645969052578e-05, + "loss": 0.5474, "step": 3355 }, { - "epoch": 0.4696990902729181, - "grad_norm": 0.3960216537704636, - "learning_rate": 2.8670013514820375e-05, - "loss": 0.5472, + "epoch": 0.9392667226420375, + "grad_norm": 0.21391586189902217, + "learning_rate": 8.225281114166043e-05, + "loss": 0.549, "step": 3356 }, { - "epoch": 0.46983904828551437, - "grad_norm": 0.40124295326684595, - "learning_rate": 2.8658802615013807e-05, - "loss": 0.5582, + "epoch": 0.9395465994962217, + "grad_norm": 0.23001874093667968, + "learning_rate": 8.224102230822242e-05, + "loss": 0.524, "step": 3357 }, { - "epoch": 0.46997900629811057, - "grad_norm": 0.4061763668246899, - "learning_rate": 2.8647590963286175e-05, - "loss": 0.5582, + "epoch": 0.9398264763504058, + "grad_norm": 0.21120555198380334, + "learning_rate": 8.222923040606588e-05, + "loss": 0.5254, "step": 3358 }, { - "epoch": 0.47011896431070677, - "grad_norm": 0.42540877415275535, - "learning_rate": 2.8636378561941592e-05, - "loss": 0.5555, + "epoch": 0.94010635320459, + "grad_norm": 0.21607266231750683, + "learning_rate": 8.221743543631313e-05, + "loss": 0.5424, "step": 3359 }, { - "epoch": 0.470258922323303, - "grad_norm": 0.40556363836960674, - "learning_rate": 2.862516541328431e-05, - "loss": 0.5677, + "epoch": 0.9403862300587741, + "grad_norm": 0.2167694700672283, + "learning_rate": 8.220563740008687e-05, + "loss": 0.5304, "step": 3360 }, { - "epoch": 0.4703988803358992, - "grad_norm": 0.4163599612619921, - "learning_rate": 2.8613951519618765e-05, - "loss": 0.5648, + "epoch": 0.9406661069129583, + "grad_norm": 0.20829066435264945, + "learning_rate": 8.219383629850998e-05, + "loss": 0.5074, "step": 3361 }, { - "epoch": 0.47053883834849547, - "grad_norm": 0.4088068121264862, - "learning_rate": 2.8602736883249503e-05, - "loss": 0.5592, + "epoch": 0.9409459837671424, + "grad_norm": 0.21946557578426373, + "learning_rate": 8.218203213270576e-05, + "loss": 0.525, "step": 3362 }, { - "epoch": 0.47067879636109167, - "grad_norm": 0.37314677325016854, - "learning_rate": 2.859152150648126e-05, - "loss": 0.5145, + "epoch": 0.9412258606213266, + "grad_norm": 0.21422867572085813, + "learning_rate": 8.21702249037977e-05, + "loss": 0.5343, "step": 3363 }, { - "epoch": 0.4708187543736879, - "grad_norm": 0.7930198911401396, - "learning_rate": 2.8580305391618912e-05, - "loss": 0.5536, + "epoch": 0.9415057374755108, + "grad_norm": 0.21225627725671894, + "learning_rate": 8.215841461290963e-05, + "loss": 0.5166, "step": 3364 }, { - "epoch": 0.4709587123862841, - "grad_norm": 0.41677939251494683, - "learning_rate": 2.8569088540967476e-05, - "loss": 0.5369, + "epoch": 0.941785614329695, + "grad_norm": 0.2125501539626726, + "learning_rate": 8.214660126116566e-05, + "loss": 0.5356, "step": 3365 }, { - "epoch": 0.4710986703988803, - "grad_norm": 0.3922638579854847, - "learning_rate": 2.8557870956832132e-05, - "loss": 0.5565, + "epoch": 0.9420654911838791, + "grad_norm": 0.22675920099238844, + "learning_rate": 8.213478484969017e-05, + "loss": 0.5314, "step": 3366 }, { - "epoch": 0.47123862841147657, - "grad_norm": 0.4290159510473203, - "learning_rate": 2.8546652641518208e-05, - "loss": 0.5655, + "epoch": 0.9423453680380632, + "grad_norm": 0.23108570578589452, + "learning_rate": 8.21229653796079e-05, + "loss": 0.5298, "step": 3367 }, { - "epoch": 0.47137858642407277, - "grad_norm": 0.41113045647723195, - "learning_rate": 2.8535433597331173e-05, - "loss": 0.5651, + "epoch": 0.9426252448922474, + "grad_norm": 0.2218603161067393, + "learning_rate": 8.211114285204378e-05, + "loss": 0.5314, "step": 3368 }, { - "epoch": 0.471518544436669, - "grad_norm": 0.4241008004580075, - "learning_rate": 2.8524213826576667e-05, - "loss": 0.5634, + "epoch": 0.9429051217464316, + "grad_norm": 0.23463134342368347, + "learning_rate": 8.209931726812312e-05, + "loss": 0.5491, "step": 3369 }, { - "epoch": 0.4716585024492652, - "grad_norm": 0.4461069068987371, - "learning_rate": 2.851299333156046e-05, - "loss": 0.5641, + "epoch": 0.9431849986006158, + "grad_norm": 0.22482437164367458, + "learning_rate": 8.208748862897147e-05, + "loss": 0.5119, "step": 3370 }, { - "epoch": 0.47179846046186147, - "grad_norm": 0.4284897736677648, - "learning_rate": 2.8501772114588476e-05, - "loss": 0.5711, + "epoch": 0.9434648754547998, + "grad_norm": 0.2181993821710289, + "learning_rate": 8.20756569357147e-05, + "loss": 0.4967, "step": 3371 }, { - "epoch": 0.47193841847445767, - "grad_norm": 0.3880573082608547, - "learning_rate": 2.8490550177966797e-05, - "loss": 0.4844, + "epoch": 0.943744752308984, + "grad_norm": 0.21704410621928005, + "learning_rate": 8.206382218947895e-05, + "loss": 0.5282, "step": 3372 }, { - "epoch": 0.47207837648705386, - "grad_norm": 0.4343172019973716, - "learning_rate": 2.8479327524001636e-05, - "loss": 0.5544, + "epoch": 0.9440246291631682, + "grad_norm": 0.21578762209142244, + "learning_rate": 8.205198439139066e-05, + "loss": 0.5426, "step": 3373 }, { - "epoch": 0.4722183344996501, - "grad_norm": 0.40211668252102023, - "learning_rate": 2.8468104154999366e-05, - "loss": 0.5577, + "epoch": 0.9443045060173524, + "grad_norm": 0.22317736739177998, + "learning_rate": 8.204014354257654e-05, + "loss": 0.5132, "step": 3374 }, { - "epoch": 0.4723582925122463, - "grad_norm": 0.4165983505211132, - "learning_rate": 2.845688007326651e-05, - "loss": 0.5462, + "epoch": 0.9445843828715366, + "grad_norm": 0.22459777955028615, + "learning_rate": 8.202829964416364e-05, + "loss": 0.5611, "step": 3375 }, { - "epoch": 0.47249825052484257, - "grad_norm": 0.41318809239332804, - "learning_rate": 2.8445655281109718e-05, - "loss": 0.5821, + "epoch": 0.9448642597257206, + "grad_norm": 0.20569382551072954, + "learning_rate": 8.201645269727925e-05, + "loss": 0.5087, "step": 3376 }, { - "epoch": 0.47263820853743876, - "grad_norm": 0.43834923618793703, - "learning_rate": 2.8434429780835807e-05, - "loss": 0.5682, + "epoch": 0.9451441365799048, + "grad_norm": 0.22181050657136128, + "learning_rate": 8.200460270305097e-05, + "loss": 0.5086, "step": 3377 }, { - "epoch": 0.472778166550035, - "grad_norm": 0.4069711220875038, - "learning_rate": 2.8423203574751727e-05, - "loss": 0.5339, + "epoch": 0.945424013434089, + "grad_norm": 0.22011878276120816, + "learning_rate": 8.199274966260669e-05, + "loss": 0.5208, "step": 3378 }, { - "epoch": 0.4729181245626312, - "grad_norm": 0.39860818609724685, - "learning_rate": 2.8411976665164585e-05, - "loss": 0.5379, + "epoch": 0.9457038902882732, + "grad_norm": 0.22850994647719924, + "learning_rate": 8.198089357707458e-05, + "loss": 0.5281, "step": 3379 }, { - "epoch": 0.4730580825752274, - "grad_norm": 0.41823989656114025, - "learning_rate": 2.840074905438161e-05, - "loss": 0.5801, + "epoch": 0.9459837671424574, + "grad_norm": 0.21889111596981828, + "learning_rate": 8.196903444758312e-05, + "loss": 0.5356, "step": 3380 }, { - "epoch": 0.47319804058782367, - "grad_norm": 0.41218220409199874, - "learning_rate": 2.8389520744710196e-05, - "loss": 0.5221, + "epoch": 0.9462636439966414, + "grad_norm": 0.2158418041627815, + "learning_rate": 8.195717227526109e-05, + "loss": 0.5237, "step": 3381 }, { - "epoch": 0.47333799860041986, - "grad_norm": 0.42461569878650546, - "learning_rate": 2.8378291738457887e-05, - "loss": 0.5785, + "epoch": 0.9465435208508256, + "grad_norm": 0.2089107590248314, + "learning_rate": 8.19453070612375e-05, + "loss": 0.5309, "step": 3382 }, { - "epoch": 0.4734779566130161, - "grad_norm": 0.4076091943362167, - "learning_rate": 2.8367062037932342e-05, - "loss": 0.5415, + "epoch": 0.9468233977050098, + "grad_norm": 0.2286941673056431, + "learning_rate": 8.19334388066417e-05, + "loss": 0.5195, "step": 3383 }, { - "epoch": 0.4736179146256123, - "grad_norm": 0.41452613569881464, - "learning_rate": 2.8355831645441388e-05, - "loss": 0.5649, + "epoch": 0.947103274559194, + "grad_norm": 0.2141772255391924, + "learning_rate": 8.192156751260332e-05, + "loss": 0.5284, "step": 3384 }, { - "epoch": 0.4737578726382085, - "grad_norm": 0.4037756702805257, - "learning_rate": 2.834460056329298e-05, - "loss": 0.5523, + "epoch": 0.9473831514133781, + "grad_norm": 0.21727007644459415, + "learning_rate": 8.190969318025228e-05, + "loss": 0.5296, "step": 3385 }, { - "epoch": 0.47389783065080476, - "grad_norm": 0.4011452181351811, - "learning_rate": 2.8333368793795224e-05, - "loss": 0.5246, + "epoch": 0.9476630282675623, + "grad_norm": 0.21485065824015953, + "learning_rate": 8.189781581071879e-05, + "loss": 0.5279, "step": 3386 }, { - "epoch": 0.47403778866340096, - "grad_norm": 0.413887567092737, - "learning_rate": 2.8322136339256356e-05, - "loss": 0.5281, + "epoch": 0.9479429051217464, + "grad_norm": 0.2200669410310803, + "learning_rate": 8.188593540513334e-05, + "loss": 0.5366, "step": 3387 }, { - "epoch": 0.4741777466759972, - "grad_norm": 0.42257963112995583, - "learning_rate": 2.8310903201984763e-05, - "loss": 0.5612, + "epoch": 0.9482227819759306, + "grad_norm": 0.2109932805242757, + "learning_rate": 8.18740519646267e-05, + "loss": 0.5347, "step": 3388 }, { - "epoch": 0.4743177046885934, - "grad_norm": 0.6050510544766738, - "learning_rate": 2.829966938428897e-05, - "loss": 0.5807, + "epoch": 0.9485026588301148, + "grad_norm": 0.2213002976546276, + "learning_rate": 8.186216549032995e-05, + "loss": 0.5469, "step": 3389 }, { - "epoch": 0.47445766270118966, - "grad_norm": 0.42337956289277995, - "learning_rate": 2.8288434888477627e-05, - "loss": 0.5648, + "epoch": 0.9487825356842989, + "grad_norm": 0.21103488566562068, + "learning_rate": 8.185027598337446e-05, + "loss": 0.5183, "step": 3390 }, { - "epoch": 0.47459762071378586, - "grad_norm": 0.39066953133834564, - "learning_rate": 2.827719971685956e-05, - "loss": 0.5896, + "epoch": 0.9490624125384831, + "grad_norm": 0.23950365831087736, + "learning_rate": 8.183838344489187e-05, + "loss": 0.5366, "step": 3391 }, { - "epoch": 0.47473757872638206, - "grad_norm": 0.40333322646376846, - "learning_rate": 2.8265963871743696e-05, - "loss": 0.5761, + "epoch": 0.9493422893926672, + "grad_norm": 0.21496148762292563, + "learning_rate": 8.182648787601414e-05, + "loss": 0.531, "step": 3392 }, { - "epoch": 0.4748775367389783, - "grad_norm": 0.41387027644417956, - "learning_rate": 2.825472735543912e-05, - "loss": 0.5925, + "epoch": 0.9496221662468514, + "grad_norm": 0.2239322942326302, + "learning_rate": 8.181458927787347e-05, + "loss": 0.5209, "step": 3393 }, { - "epoch": 0.4750174947515745, - "grad_norm": 0.3928152148704509, - "learning_rate": 2.8243490170255043e-05, - "loss": 0.5828, + "epoch": 0.9499020431010355, + "grad_norm": 0.23274600817797758, + "learning_rate": 8.180268765160237e-05, + "loss": 0.5488, "step": 3394 }, { - "epoch": 0.47515745276417076, - "grad_norm": 0.3929042322405297, - "learning_rate": 2.8232252318500834e-05, - "loss": 0.5831, + "epoch": 0.9501819199552197, + "grad_norm": 0.2188450011394307, + "learning_rate": 8.179078299833367e-05, + "loss": 0.5333, "step": 3395 }, { - "epoch": 0.47529741077676696, - "grad_norm": 0.4673310747874466, - "learning_rate": 2.8221013802485975e-05, - "loss": 0.5564, + "epoch": 0.9504617968094039, + "grad_norm": 0.22476830987697433, + "learning_rate": 8.177887531920045e-05, + "loss": 0.5284, "step": 3396 }, { - "epoch": 0.4754373687893632, - "grad_norm": 0.3888740851313229, - "learning_rate": 2.8209774624520097e-05, - "loss": 0.5575, + "epoch": 0.950741673663588, + "grad_norm": 0.21649046045356746, + "learning_rate": 8.17669646153361e-05, + "loss": 0.5277, "step": 3397 }, { - "epoch": 0.4755773268019594, - "grad_norm": 0.40361151709091403, - "learning_rate": 2.8198534786912965e-05, - "loss": 0.5496, + "epoch": 0.9510215505177722, + "grad_norm": 0.22288881244129846, + "learning_rate": 8.175505088787426e-05, + "loss": 0.5371, "step": 3398 }, { - "epoch": 0.4757172848145556, - "grad_norm": 0.4106448187857887, - "learning_rate": 2.818729429197448e-05, - "loss": 0.5607, + "epoch": 0.9513014273719563, + "grad_norm": 0.22731584658365314, + "learning_rate": 8.174313413794892e-05, + "loss": 0.5338, "step": 3399 }, { - "epoch": 0.47585724282715186, - "grad_norm": 0.42784648142356363, - "learning_rate": 2.8176053142014687e-05, - "loss": 0.6264, + "epoch": 0.9515813042261405, + "grad_norm": 0.22274506340840244, + "learning_rate": 8.173121436669428e-05, + "loss": 0.5086, "step": 3400 }, { - "epoch": 0.47599720083974806, - "grad_norm": 0.40344827404813044, - "learning_rate": 2.8164811339343732e-05, - "loss": 0.5812, + "epoch": 0.9518611810803247, + "grad_norm": 0.22497675125736963, + "learning_rate": 8.171929157524491e-05, + "loss": 0.5259, "step": 3401 }, { - "epoch": 0.4761371588523443, - "grad_norm": 0.41503031802135976, - "learning_rate": 2.815356888627195e-05, - "loss": 0.5635, + "epoch": 0.9521410579345088, + "grad_norm": 0.22830048832712135, + "learning_rate": 8.170736576473563e-05, + "loss": 0.512, "step": 3402 }, { - "epoch": 0.4762771168649405, - "grad_norm": 0.3854795531096736, - "learning_rate": 2.814232578510975e-05, - "loss": 0.5635, + "epoch": 0.9524209347886929, + "grad_norm": 0.25452996809266615, + "learning_rate": 8.169543693630151e-05, + "loss": 0.5203, "step": 3403 }, { - "epoch": 0.47641707487753676, - "grad_norm": 0.4009076755410169, - "learning_rate": 2.8131082038167735e-05, - "loss": 0.5731, + "epoch": 0.9527008116428771, + "grad_norm": 0.26772054627038366, + "learning_rate": 8.168350509107795e-05, + "loss": 0.5571, "step": 3404 }, { - "epoch": 0.47655703289013296, - "grad_norm": 0.38835090703799413, - "learning_rate": 2.8119837647756574e-05, - "loss": 0.565, + "epoch": 0.9529806884970613, + "grad_norm": 0.22907334090385764, + "learning_rate": 8.167157023020066e-05, + "loss": 0.5319, "step": 3405 }, { - "epoch": 0.47669699090272916, - "grad_norm": 0.4286357705154638, - "learning_rate": 2.8108592616187133e-05, - "loss": 0.549, + "epoch": 0.9532605653512455, + "grad_norm": 0.47850244409919623, + "learning_rate": 8.16596323548056e-05, + "loss": 0.5498, "step": 3406 }, { - "epoch": 0.4768369489153254, - "grad_norm": 0.4275083145446453, - "learning_rate": 2.8097346945770364e-05, - "loss": 0.5719, + "epoch": 0.9535404422054297, + "grad_norm": 0.26977769050935135, + "learning_rate": 8.164769146602899e-05, + "loss": 0.5467, "step": 3407 }, { - "epoch": 0.4769769069279216, - "grad_norm": 0.3940008020310687, - "learning_rate": 2.808610063881737e-05, - "loss": 0.5208, + "epoch": 0.9538203190596137, + "grad_norm": 0.47410180986287737, + "learning_rate": 8.16357475650074e-05, + "loss": 0.5786, "step": 3408 }, { - "epoch": 0.47711686494051786, - "grad_norm": 0.4229335142753006, - "learning_rate": 2.807485369763938e-05, - "loss": 0.5763, + "epoch": 0.9541001959137979, + "grad_norm": 1.8304529902198994, + "learning_rate": 8.162380065287766e-05, + "loss": 0.5325, "step": 3409 }, { - "epoch": 0.47725682295311406, - "grad_norm": 0.4161121773460535, - "learning_rate": 2.8063606124547765e-05, - "loss": 0.5443, + "epoch": 0.9543800727679821, + "grad_norm": 0.2377845171142009, + "learning_rate": 8.161185073077686e-05, + "loss": 0.5227, "step": 3410 }, { - "epoch": 0.4773967809657103, - "grad_norm": 0.40765919854648125, - "learning_rate": 2.8052357921854e-05, - "loss": 0.5702, + "epoch": 0.9546599496221663, + "grad_norm": 0.3081705322827635, + "learning_rate": 8.159989779984242e-05, + "loss": 0.5485, "step": 3411 }, { - "epoch": 0.4775367389783065, - "grad_norm": 0.4163390293585378, - "learning_rate": 2.804110909186971e-05, - "loss": 0.5728, + "epoch": 0.9549398264763505, + "grad_norm": 0.2184041246946621, + "learning_rate": 8.158794186121202e-05, + "loss": 0.5656, "step": 3412 }, { - "epoch": 0.4776766969909027, - "grad_norm": 0.4377093314827132, - "learning_rate": 2.8029859636906654e-05, - "loss": 0.5785, + "epoch": 0.9552197033305345, + "grad_norm": 0.22391711746891552, + "learning_rate": 8.157598291602362e-05, + "loss": 0.5335, "step": 3413 }, { - "epoch": 0.47781665500349896, - "grad_norm": 0.3997496345348747, - "learning_rate": 2.8018609559276686e-05, - "loss": 0.5872, + "epoch": 0.9554995801847187, + "grad_norm": 0.2144146398918876, + "learning_rate": 8.15640209654155e-05, + "loss": 0.5428, "step": 3414 }, { - "epoch": 0.47795661301609516, - "grad_norm": 0.41218479804115327, - "learning_rate": 2.800735886129184e-05, - "loss": 0.5606, + "epoch": 0.9557794570389029, + "grad_norm": 0.22302761311139452, + "learning_rate": 8.15520560105262e-05, + "loss": 0.5397, "step": 3415 }, { - "epoch": 0.4780965710286914, - "grad_norm": 0.4227372321145177, - "learning_rate": 2.7996107545264223e-05, - "loss": 0.5708, + "epoch": 0.9560593338930871, + "grad_norm": 0.22297966431799787, + "learning_rate": 8.154008805249454e-05, + "loss": 0.5183, "step": 3416 }, { - "epoch": 0.4782365290412876, - "grad_norm": 0.42665670114635346, - "learning_rate": 2.7984855613506107e-05, - "loss": 0.5704, + "epoch": 0.9563392107472712, + "grad_norm": 0.2161617150362051, + "learning_rate": 8.152811709245967e-05, + "loss": 0.5467, "step": 3417 }, { - "epoch": 0.47837648705388386, - "grad_norm": 0.4216359313020928, - "learning_rate": 2.7973603068329884e-05, - "loss": 0.5378, + "epoch": 0.9566190876014553, + "grad_norm": 0.23247263670525822, + "learning_rate": 8.151614313156095e-05, + "loss": 0.5368, "step": 3418 }, { - "epoch": 0.47851644506648006, - "grad_norm": 0.4320525421405627, - "learning_rate": 2.796234991204805e-05, - "loss": 0.5626, + "epoch": 0.9568989644556395, + "grad_norm": 0.21350355080837546, + "learning_rate": 8.150416617093808e-05, + "loss": 0.5536, "step": 3419 }, { - "epoch": 0.47865640307907625, - "grad_norm": 0.4087593290796549, - "learning_rate": 2.795109614697326e-05, - "loss": 0.5472, + "epoch": 0.9571788413098237, + "grad_norm": 0.22149224523024807, + "learning_rate": 8.149218621173105e-05, + "loss": 0.5314, "step": 3420 }, { - "epoch": 0.4787963610916725, - "grad_norm": 0.4311719285838966, - "learning_rate": 2.793984177541827e-05, - "loss": 0.5585, + "epoch": 0.9574587181640078, + "grad_norm": 0.22045822843111815, + "learning_rate": 8.14802032550801e-05, + "loss": 0.5318, "step": 3421 }, { - "epoch": 0.4789363191042687, - "grad_norm": 0.4101348935742422, - "learning_rate": 2.792858679969596e-05, - "loss": 0.5539, + "epoch": 0.957738595018192, + "grad_norm": 0.21141350617451612, + "learning_rate": 8.14682173021258e-05, + "loss": 0.5213, "step": 3422 }, { - "epoch": 0.47907627711686496, - "grad_norm": 0.4240688742729598, - "learning_rate": 2.7917331222119346e-05, - "loss": 0.5853, + "epoch": 0.9580184718723762, + "grad_norm": 0.23370290276535358, + "learning_rate": 8.145622835400895e-05, + "loss": 0.5603, "step": 3423 }, { - "epoch": 0.47921623512946115, - "grad_norm": 0.4192530280413205, - "learning_rate": 2.790607504500157e-05, - "loss": 0.5476, + "epoch": 0.9582983487265603, + "grad_norm": 0.22957506378932596, + "learning_rate": 8.144423641187069e-05, + "loss": 0.5256, "step": 3424 }, { - "epoch": 0.4793561931420574, - "grad_norm": 0.39900981412049086, - "learning_rate": 2.7894818270655882e-05, - "loss": 0.5582, + "epoch": 0.9585782255807445, + "grad_norm": 0.23654506009966345, + "learning_rate": 8.14322414768524e-05, + "loss": 0.5463, "step": 3425 }, { - "epoch": 0.4794961511546536, - "grad_norm": 0.3965711350604459, - "learning_rate": 2.7883560901395667e-05, - "loss": 0.5419, + "epoch": 0.9588581024349286, + "grad_norm": 0.235485318675011, + "learning_rate": 8.142024355009577e-05, + "loss": 0.5059, "step": 3426 }, { - "epoch": 0.4796361091672498, - "grad_norm": 0.4312456683665289, - "learning_rate": 2.7872302939534433e-05, - "loss": 0.5536, + "epoch": 0.9591379792891128, + "grad_norm": 0.23097179799511522, + "learning_rate": 8.140824263274279e-05, + "loss": 0.5356, "step": 3427 }, { - "epoch": 0.47977606717984606, - "grad_norm": 0.40864635902503055, - "learning_rate": 2.78610443873858e-05, - "loss": 0.5812, + "epoch": 0.959417856143297, + "grad_norm": 0.2302862749501254, + "learning_rate": 8.139623872593567e-05, + "loss": 0.5787, "step": 3428 }, { - "epoch": 0.47991602519244225, - "grad_norm": 0.40901705450143366, - "learning_rate": 2.7849785247263515e-05, - "loss": 0.5284, + "epoch": 0.9596977329974811, + "grad_norm": 0.2193323203769243, + "learning_rate": 8.138423183081699e-05, + "loss": 0.5417, "step": 3429 }, { - "epoch": 0.4800559832050385, - "grad_norm": 0.40430272789500593, - "learning_rate": 2.7838525521481444e-05, - "loss": 0.5587, + "epoch": 0.9599776098516652, + "grad_norm": 0.2277662806104916, + "learning_rate": 8.137222194852955e-05, + "loss": 0.5642, "step": 3430 }, { - "epoch": 0.4801959412176347, - "grad_norm": 0.3934946456153737, - "learning_rate": 2.7827265212353587e-05, - "loss": 0.5509, + "epoch": 0.9602574867058494, + "grad_norm": 0.2141847213081554, + "learning_rate": 8.136020908021647e-05, + "loss": 0.5221, "step": 3431 }, { - "epoch": 0.48033589923023096, - "grad_norm": 0.42366137833048706, - "learning_rate": 2.781600432219404e-05, - "loss": 0.5478, + "epoch": 0.9605373635600336, + "grad_norm": 0.21106804184196115, + "learning_rate": 8.134819322702114e-05, + "loss": 0.5198, "step": 3432 }, { - "epoch": 0.48047585724282715, - "grad_norm": 0.4067675335773988, - "learning_rate": 2.7804742853317027e-05, - "loss": 0.5348, + "epoch": 0.9608172404142178, + "grad_norm": 0.23110787874906272, + "learning_rate": 8.133617439008723e-05, + "loss": 0.4946, "step": 3433 }, { - "epoch": 0.48061581525542335, - "grad_norm": 0.3990940892590606, - "learning_rate": 2.7793480808036898e-05, - "loss": 0.5449, + "epoch": 0.9610971172684019, + "grad_norm": 0.21659770119960287, + "learning_rate": 8.13241525705587e-05, + "loss": 0.5259, "step": 3434 }, { - "epoch": 0.4807557732680196, - "grad_norm": 0.40172847447577514, - "learning_rate": 2.7782218188668118e-05, - "loss": 0.5898, + "epoch": 0.961376994122586, + "grad_norm": 0.22003405504635687, + "learning_rate": 8.13121277695798e-05, + "loss": 0.5285, "step": 3435 }, { - "epoch": 0.4808957312806158, - "grad_norm": 0.3955704115554125, - "learning_rate": 2.7770954997525277e-05, - "loss": 0.5357, + "epoch": 0.9616568709767702, + "grad_norm": 0.22217668462938064, + "learning_rate": 8.130009998829504e-05, + "loss": 0.5224, "step": 3436 }, { - "epoch": 0.48103568929321205, - "grad_norm": 0.37493116627514156, - "learning_rate": 2.7759691236923064e-05, - "loss": 0.5197, + "epoch": 0.9619367478309544, + "grad_norm": 0.23345429987225355, + "learning_rate": 8.128806922784927e-05, + "loss": 0.5642, "step": 3437 }, { - "epoch": 0.48117564730580825, - "grad_norm": 0.40865836884433765, - "learning_rate": 2.7748426909176307e-05, - "loss": 0.5567, + "epoch": 0.9622166246851386, + "grad_norm": 0.22556320906052635, + "learning_rate": 8.127603548938754e-05, + "loss": 0.5116, "step": 3438 }, { - "epoch": 0.4813156053184045, - "grad_norm": 0.41498192510745757, - "learning_rate": 2.7737162016599927e-05, - "loss": 0.595, + "epoch": 0.9624965015393226, + "grad_norm": 0.2256630583184537, + "learning_rate": 8.126399877405524e-05, + "loss": 0.5292, "step": 3439 }, { - "epoch": 0.4814555633310007, - "grad_norm": 0.4299760976868325, - "learning_rate": 2.7725896561508983e-05, - "loss": 0.5465, + "epoch": 0.9627763783935068, + "grad_norm": 0.22957429736417528, + "learning_rate": 8.125195908299804e-05, + "loss": 0.5326, "step": 3440 }, { - "epoch": 0.4815955213435969, - "grad_norm": 0.40031579978939574, - "learning_rate": 2.7714630546218635e-05, - "loss": 0.5965, + "epoch": 0.963056255247691, + "grad_norm": 0.21724747228624117, + "learning_rate": 8.123991641736189e-05, + "loss": 0.5214, "step": 3441 }, { - "epoch": 0.48173547935619315, - "grad_norm": 0.40096718317347985, - "learning_rate": 2.770336397304417e-05, - "loss": 0.5321, + "epoch": 0.9633361321018752, + "grad_norm": 0.21776413520686494, + "learning_rate": 8.1227870778293e-05, + "loss": 0.5398, "step": 3442 }, { - "epoch": 0.48187543736878935, - "grad_norm": 0.3981130281220963, - "learning_rate": 2.769209684430098e-05, - "loss": 0.5558, + "epoch": 0.9636160089560594, + "grad_norm": 0.22292545990751855, + "learning_rate": 8.121582216693791e-05, + "loss": 0.5281, "step": 3443 }, { - "epoch": 0.4820153953813856, - "grad_norm": 0.45577292418268894, - "learning_rate": 2.7680829162304567e-05, - "loss": 0.5586, + "epoch": 0.9638958858102435, + "grad_norm": 0.22405172895322287, + "learning_rate": 8.120377058444336e-05, + "loss": 0.5454, "step": 3444 }, { - "epoch": 0.4821553533939818, - "grad_norm": 0.4112624187601042, - "learning_rate": 2.7669560929370564e-05, - "loss": 0.5457, + "epoch": 0.9641757626644276, + "grad_norm": 0.22737436469087888, + "learning_rate": 8.11917160319565e-05, + "loss": 0.5463, "step": 3445 }, { - "epoch": 0.48229531140657805, - "grad_norm": 0.39264802624439415, - "learning_rate": 2.76582921478147e-05, - "loss": 0.547, + "epoch": 0.9644556395186118, + "grad_norm": 0.23344539867249173, + "learning_rate": 8.117965851062463e-05, + "loss": 0.5177, "step": 3446 }, { - "epoch": 0.48243526941917425, - "grad_norm": 0.4141847607595186, - "learning_rate": 2.7647022819952835e-05, - "loss": 0.5274, + "epoch": 0.964735516372796, + "grad_norm": 0.2101934078395959, + "learning_rate": 8.11675980215954e-05, + "loss": 0.5472, "step": 3447 }, { - "epoch": 0.48257522743177045, - "grad_norm": 0.40886257454682706, - "learning_rate": 2.763575294810091e-05, - "loss": 0.5305, + "epoch": 0.9650153932269802, + "grad_norm": 0.21299782200792122, + "learning_rate": 8.115553456601676e-05, + "loss": 0.5107, "step": 3448 }, { - "epoch": 0.4827151854443667, - "grad_norm": 0.39545619852603475, - "learning_rate": 2.7624482534575026e-05, - "loss": 0.5839, + "epoch": 0.9652952700811643, + "grad_norm": 0.20530871191472141, + "learning_rate": 8.114346814503689e-05, + "loss": 0.5159, "step": 3449 }, { - "epoch": 0.4828551434569629, - "grad_norm": 0.40937623131741985, - "learning_rate": 2.761321158169134e-05, - "loss": 0.5638, + "epoch": 0.9655751469353484, + "grad_norm": 0.21161623004748173, + "learning_rate": 8.11313987598043e-05, + "loss": 0.5179, "step": 3450 }, { - "epoch": 0.48299510146955915, - "grad_norm": 0.407186107707272, - "learning_rate": 2.7601940091766164e-05, - "loss": 0.5158, + "epoch": 0.9658550237895326, + "grad_norm": 0.21483478174616114, + "learning_rate": 8.111932641146775e-05, + "loss": 0.5222, "step": 3451 }, { - "epoch": 0.48313505948215535, - "grad_norm": 0.4076092863747084, - "learning_rate": 2.7590668067115895e-05, - "loss": 0.5874, + "epoch": 0.9661349006437168, + "grad_norm": 0.255745208170339, + "learning_rate": 8.110725110117629e-05, + "loss": 0.5141, "step": 3452 }, { - "epoch": 0.4832750174947516, - "grad_norm": 0.4221563990293702, - "learning_rate": 2.757939551005706e-05, - "loss": 0.5484, + "epoch": 0.966414777497901, + "grad_norm": 0.22620658357394077, + "learning_rate": 8.109517283007926e-05, + "loss": 0.5472, "step": 3453 }, { - "epoch": 0.4834149755073478, - "grad_norm": 0.38895523634768725, - "learning_rate": 2.7568122422906273e-05, - "loss": 0.5166, + "epoch": 0.9666946543520851, + "grad_norm": 0.22280728342450096, + "learning_rate": 8.108309159932629e-05, + "loss": 0.5164, "step": 3454 }, { - "epoch": 0.483554933519944, - "grad_norm": 0.4268904833505735, - "learning_rate": 2.755684880798026e-05, - "loss": 0.5862, + "epoch": 0.9669745312062692, + "grad_norm": 0.2117522196193627, + "learning_rate": 8.107100741006724e-05, + "loss": 0.5041, "step": 3455 }, { - "epoch": 0.48369489153254025, - "grad_norm": 0.41707456856060987, - "learning_rate": 2.754557466759589e-05, - "loss": 0.5704, + "epoch": 0.9672544080604534, + "grad_norm": 0.22689956471804157, + "learning_rate": 8.105892026345232e-05, + "loss": 0.5265, "step": 3456 }, { - "epoch": 0.48383484954513645, - "grad_norm": 0.41401351680807885, - "learning_rate": 2.7534300004070086e-05, - "loss": 0.5452, + "epoch": 0.9675342849146376, + "grad_norm": 0.22073307051907443, + "learning_rate": 8.104683016063199e-05, + "loss": 0.5392, "step": 3457 }, { - "epoch": 0.4839748075577327, - "grad_norm": 0.4000475828565154, - "learning_rate": 2.7523024819719922e-05, - "loss": 0.5862, + "epoch": 0.9678141617688217, + "grad_norm": 0.22294195192815078, + "learning_rate": 8.1034737102757e-05, + "loss": 0.5536, "step": 3458 }, { - "epoch": 0.4841147655703289, - "grad_norm": 0.4126946869503196, - "learning_rate": 2.7511749116862558e-05, - "loss": 0.5499, + "epoch": 0.9680940386230059, + "grad_norm": 0.22542057272774796, + "learning_rate": 8.102264109097834e-05, + "loss": 0.5474, "step": 3459 }, { - "epoch": 0.4842547235829251, - "grad_norm": 0.4046742301519851, - "learning_rate": 2.7500472897815265e-05, - "loss": 0.5295, + "epoch": 0.96837391547719, + "grad_norm": 0.222310929724708, + "learning_rate": 8.101054212644734e-05, + "loss": 0.5301, "step": 3460 }, { - "epoch": 0.48439468159552135, - "grad_norm": 0.42125194708107067, - "learning_rate": 2.748919616489542e-05, - "loss": 0.5356, + "epoch": 0.9686537923313742, + "grad_norm": 0.2220964572142937, + "learning_rate": 8.09984402103156e-05, + "loss": 0.5349, "step": 3461 }, { - "epoch": 0.48453463960811755, - "grad_norm": 0.4040649587533329, - "learning_rate": 2.7477918920420504e-05, - "loss": 0.553, + "epoch": 0.9689336691855583, + "grad_norm": 0.2604818174403925, + "learning_rate": 8.098633534373495e-05, + "loss": 0.5201, "step": 3462 }, { - "epoch": 0.4846745976207138, - "grad_norm": 0.3886454789211703, - "learning_rate": 2.7466641166708113e-05, - "loss": 0.5567, + "epoch": 0.9692135460397425, + "grad_norm": 0.2720256034646371, + "learning_rate": 8.097422752785757e-05, + "loss": 0.5205, "step": 3463 }, { - "epoch": 0.48481455563331, - "grad_norm": 0.4996582332055803, - "learning_rate": 2.7455362906075932e-05, - "loss": 0.5722, + "epoch": 0.9694934228939267, + "grad_norm": 0.21645967778039324, + "learning_rate": 8.096211676383587e-05, + "loss": 0.519, "step": 3464 }, { - "epoch": 0.48495451364590625, - "grad_norm": 0.423979275094494, - "learning_rate": 2.7444084140841765e-05, - "loss": 0.5837, + "epoch": 0.9697732997481109, + "grad_norm": 0.22463050221626324, + "learning_rate": 8.095000305282256e-05, + "loss": 0.5471, "step": 3465 }, { - "epoch": 0.48509447165850245, - "grad_norm": 0.39551615527180584, - "learning_rate": 2.74328048733235e-05, - "loss": 0.5754, + "epoch": 0.970053176602295, + "grad_norm": 0.21294014795145857, + "learning_rate": 8.093788639597066e-05, + "loss": 0.5184, "step": 3466 }, { - "epoch": 0.48523442967109864, - "grad_norm": 0.45978728132052005, - "learning_rate": 2.7421525105839152e-05, - "loss": 0.5367, + "epoch": 0.9703330534564791, + "grad_norm": 0.22764952208155148, + "learning_rate": 8.09257667944334e-05, + "loss": 0.5385, "step": 3467 }, { - "epoch": 0.4853743876836949, - "grad_norm": 0.4193876985062947, - "learning_rate": 2.741024484070682e-05, - "loss": 0.5576, + "epoch": 0.9706129303106633, + "grad_norm": 0.22290847883624781, + "learning_rate": 8.091364424936434e-05, + "loss": 0.5316, "step": 3468 }, { - "epoch": 0.4855143456962911, - "grad_norm": 0.397308979018639, - "learning_rate": 2.739896408024473e-05, - "loss": 0.5577, + "epoch": 0.9708928071648475, + "grad_norm": 0.22385138132278512, + "learning_rate": 8.090151876191732e-05, + "loss": 0.5138, "step": 3469 }, { - "epoch": 0.48565430370888735, - "grad_norm": 0.4127845230541042, - "learning_rate": 2.7387682826771173e-05, - "loss": 0.5876, + "epoch": 0.9711726840190317, + "grad_norm": 0.22536306681068463, + "learning_rate": 8.088939033324646e-05, + "loss": 0.5202, "step": 3470 }, { - "epoch": 0.48579426172148354, - "grad_norm": 0.40489978417434586, - "learning_rate": 2.7376401082604564e-05, - "loss": 0.5622, + "epoch": 0.9714525608732157, + "grad_norm": 0.2179320847304912, + "learning_rate": 8.087725896450613e-05, + "loss": 0.5404, "step": 3471 }, { - "epoch": 0.4859342197340798, - "grad_norm": 0.3839599583629985, - "learning_rate": 2.736511885006343e-05, - "loss": 0.5432, + "epoch": 0.9717324377273999, + "grad_norm": 0.23475091902179873, + "learning_rate": 8.086512465685102e-05, + "loss": 0.5253, "step": 3472 }, { - "epoch": 0.486074177746676, - "grad_norm": 0.39225392267678383, - "learning_rate": 2.7353836131466372e-05, - "loss": 0.5639, + "epoch": 0.9720123145815841, + "grad_norm": 0.21569738041156963, + "learning_rate": 8.085298741143605e-05, + "loss": 0.533, "step": 3473 }, { - "epoch": 0.4862141357592722, - "grad_norm": 0.4051784046488118, - "learning_rate": 2.7342552929132104e-05, - "loss": 0.5664, + "epoch": 0.9722921914357683, + "grad_norm": 0.21294782426391667, + "learning_rate": 8.084084722941648e-05, + "loss": 0.4959, "step": 3474 }, { - "epoch": 0.48635409377186845, - "grad_norm": 0.40996524593878775, - "learning_rate": 2.7331269245379443e-05, - "loss": 0.5601, + "epoch": 0.9725720682899525, + "grad_norm": 0.2590805337167687, + "learning_rate": 8.08287041119478e-05, + "loss": 0.5243, "step": 3475 }, { - "epoch": 0.48649405178446464, - "grad_norm": 0.42173107295905043, - "learning_rate": 2.7319985082527292e-05, - "loss": 0.5497, + "epoch": 0.9728519451441365, + "grad_norm": 0.22947616671633794, + "learning_rate": 8.081655806018582e-05, + "loss": 0.5192, "step": 3476 }, { - "epoch": 0.4866340097970609, - "grad_norm": 0.40466464089704407, - "learning_rate": 2.7308700442894675e-05, - "loss": 0.5397, + "epoch": 0.9731318219983207, + "grad_norm": 0.21757589819921253, + "learning_rate": 8.080440907528659e-05, + "loss": 0.5185, "step": 3477 }, { - "epoch": 0.4867739678096571, - "grad_norm": 0.3942022841659981, - "learning_rate": 2.7297415328800692e-05, - "loss": 0.5313, + "epoch": 0.9734116988525049, + "grad_norm": 0.27752038602754175, + "learning_rate": 8.079225715840646e-05, + "loss": 0.5388, "step": 3478 }, { - "epoch": 0.48691392582225335, - "grad_norm": 0.40743336481779613, - "learning_rate": 2.728612974256454e-05, - "loss": 0.5313, + "epoch": 0.9736915757066891, + "grad_norm": 0.21265504628185963, + "learning_rate": 8.078010231070206e-05, + "loss": 0.5654, "step": 3479 }, { - "epoch": 0.48705388383484954, - "grad_norm": 0.4332085486342721, - "learning_rate": 2.7274843686505536e-05, - "loss": 0.5455, + "epoch": 0.9739714525608733, + "grad_norm": 0.2565727224751588, + "learning_rate": 8.07679445333303e-05, + "loss": 0.5093, "step": 3480 }, { - "epoch": 0.48719384184744574, - "grad_norm": 0.4094748981538972, - "learning_rate": 2.726355716294307e-05, - "loss": 0.5727, + "epoch": 0.9742513294150573, + "grad_norm": 0.2266504434173381, + "learning_rate": 8.075578382744835e-05, + "loss": 0.5269, "step": 3481 }, { - "epoch": 0.487333799860042, - "grad_norm": 0.4111671844080047, - "learning_rate": 2.7252270174196643e-05, - "loss": 0.5635, + "epoch": 0.9745312062692415, + "grad_norm": 0.2530018381690689, + "learning_rate": 8.074362019421369e-05, + "loss": 0.5493, "step": 3482 }, { - "epoch": 0.4874737578726382, - "grad_norm": 0.39174881532897554, - "learning_rate": 2.724098272258584e-05, - "loss": 0.5414, + "epoch": 0.9748110831234257, + "grad_norm": 0.22279415055269902, + "learning_rate": 8.073145363478404e-05, + "loss": 0.528, "step": 3483 }, { - "epoch": 0.48761371588523444, - "grad_norm": 0.3907484104141741, - "learning_rate": 2.7229694810430344e-05, - "loss": 0.5766, + "epoch": 0.9750909599776099, + "grad_norm": 0.2203374998023382, + "learning_rate": 8.071928415031744e-05, + "loss": 0.5315, "step": 3484 }, { - "epoch": 0.48775367389783064, - "grad_norm": 0.4010543272236602, - "learning_rate": 2.7218406440049954e-05, - "loss": 0.5371, + "epoch": 0.975370836831794, + "grad_norm": 0.2329537787485757, + "learning_rate": 8.070711174197216e-05, + "loss": 0.5163, "step": 3485 }, { - "epoch": 0.4878936319104269, - "grad_norm": 0.4346615757792164, - "learning_rate": 2.720711761376452e-05, - "loss": 0.5729, + "epoch": 0.9756507136859782, + "grad_norm": 0.23049755696239946, + "learning_rate": 8.069493641090682e-05, + "loss": 0.5287, "step": 3486 }, { - "epoch": 0.4880335899230231, - "grad_norm": 0.4341695351875495, - "learning_rate": 2.719582833389403e-05, - "loss": 0.6109, + "epoch": 0.9759305905401623, + "grad_norm": 0.326471449083994, + "learning_rate": 8.068275815828025e-05, + "loss": 0.5114, "step": 3487 }, { - "epoch": 0.4881735479356193, - "grad_norm": 0.43134633918438864, - "learning_rate": 2.7184538602758536e-05, - "loss": 0.5904, + "epoch": 0.9762104673943465, + "grad_norm": 0.2111422990739202, + "learning_rate": 8.067057698525158e-05, + "loss": 0.503, "step": 3488 }, { - "epoch": 0.48831350594821554, - "grad_norm": 0.40924934907989946, - "learning_rate": 2.71732484226782e-05, - "loss": 0.5533, + "epoch": 0.9764903442485307, + "grad_norm": 0.37376760279297455, + "learning_rate": 8.065839289298022e-05, + "loss": 0.5139, "step": 3489 }, { - "epoch": 0.48845346396081174, - "grad_norm": 0.4117613614479626, - "learning_rate": 2.7161957795973246e-05, - "loss": 0.519, + "epoch": 0.9767702211027148, + "grad_norm": 0.25463993604496876, + "learning_rate": 8.064620588262588e-05, + "loss": 0.5351, "step": 3490 }, { - "epoch": 0.488593421973408, - "grad_norm": 0.3876515998640682, - "learning_rate": 2.7150666724964035e-05, - "loss": 0.5543, + "epoch": 0.977050097956899, + "grad_norm": 9.762766576942438, + "learning_rate": 8.06340159553485e-05, + "loss": 0.5325, "step": 3491 }, { - "epoch": 0.4887333799860042, - "grad_norm": 0.407828217550299, - "learning_rate": 2.7139375211970996e-05, - "loss": 0.5515, + "epoch": 0.9773299748110831, + "grad_norm": 0.21411387277434157, + "learning_rate": 8.062182311230832e-05, + "loss": 0.5146, "step": 3492 }, { - "epoch": 0.48887333799860044, - "grad_norm": 0.40672013393542944, - "learning_rate": 2.712808325931464e-05, - "loss": 0.5445, + "epoch": 0.9776098516652673, + "grad_norm": 0.2554389080701559, + "learning_rate": 8.06096273546659e-05, + "loss": 0.5352, "step": 3493 }, { - "epoch": 0.48901329601119664, - "grad_norm": 0.40868169613662847, - "learning_rate": 2.7116790869315582e-05, - "loss": 0.5712, + "epoch": 0.9778897285194514, + "grad_norm": 0.22578356573796832, + "learning_rate": 8.0597428683582e-05, + "loss": 0.5443, "step": 3494 }, { - "epoch": 0.48915325402379284, - "grad_norm": 0.4092633353198152, - "learning_rate": 2.710549804429452e-05, - "loss": 0.5515, + "epoch": 0.9781696053736356, + "grad_norm": 0.2209885227506747, + "learning_rate": 8.058522710021772e-05, + "loss": 0.5419, "step": 3495 }, { - "epoch": 0.4892932120363891, - "grad_norm": 0.4366896965056272, - "learning_rate": 2.7094204786572254e-05, - "loss": 0.5883, + "epoch": 0.9784494822278198, + "grad_norm": 0.23048565206375848, + "learning_rate": 8.05730226057344e-05, + "loss": 0.5101, "step": 3496 }, { - "epoch": 0.4894331700489853, - "grad_norm": 0.41669884030755355, - "learning_rate": 2.7082911098469648e-05, - "loss": 0.5386, + "epoch": 0.9787293590820039, + "grad_norm": 0.21212395218447444, + "learning_rate": 8.056081520129368e-05, + "loss": 0.5107, "step": 3497 }, { - "epoch": 0.48957312806158154, - "grad_norm": 0.39572509135352624, - "learning_rate": 2.7071616982307684e-05, - "loss": 0.5563, + "epoch": 0.979009235936188, + "grad_norm": 0.31711299756514666, + "learning_rate": 8.054860488805746e-05, + "loss": 0.5404, "step": 3498 }, { - "epoch": 0.48971308607417774, - "grad_norm": 0.41695121258573753, - "learning_rate": 2.706032244040741e-05, - "loss": 0.5796, + "epoch": 0.9792891127903722, + "grad_norm": 0.23485744603348208, + "learning_rate": 8.053639166718794e-05, + "loss": 0.5352, "step": 3499 }, { - "epoch": 0.489853044086774, - "grad_norm": 0.4236891067324582, - "learning_rate": 2.704902747508996e-05, - "loss": 0.5799, + "epoch": 0.9795689896445564, + "grad_norm": 0.29408204964329543, + "learning_rate": 8.052417553984755e-05, + "loss": 0.5427, "step": 3500 }, { - "epoch": 0.4899930020993702, - "grad_norm": 0.3977818931519091, - "learning_rate": 2.7037732088676582e-05, - "loss": 0.6042, + "epoch": 0.9798488664987406, + "grad_norm": 0.22809336985401973, + "learning_rate": 8.051195650719906e-05, + "loss": 0.5285, "step": 3501 }, { - "epoch": 0.4901329601119664, - "grad_norm": 0.4233529044212103, - "learning_rate": 2.7026436283488583e-05, - "loss": 0.5673, + "epoch": 0.9801287433529247, + "grad_norm": 0.22648004082745615, + "learning_rate": 8.049973457040547e-05, + "loss": 0.5103, "step": 3502 }, { - "epoch": 0.49027291812456264, - "grad_norm": 0.4082847133569342, - "learning_rate": 2.7015140061847365e-05, - "loss": 0.513, + "epoch": 0.9804086202071088, + "grad_norm": 0.22843520715879986, + "learning_rate": 8.048750973063008e-05, + "loss": 0.5002, "step": 3503 }, { - "epoch": 0.49041287613715884, - "grad_norm": 0.4200304747170798, - "learning_rate": 2.7003843426074416e-05, - "loss": 0.5791, + "epoch": 0.980688497061293, + "grad_norm": 0.29856449350201997, + "learning_rate": 8.047528198903643e-05, + "loss": 0.5038, "step": 3504 }, { - "epoch": 0.4905528341497551, - "grad_norm": 0.39533501738229987, - "learning_rate": 2.6992546378491318e-05, - "loss": 0.5704, + "epoch": 0.9809683739154772, + "grad_norm": 0.23217715481736995, + "learning_rate": 8.046305134678839e-05, + "loss": 0.5437, "step": 3505 }, { - "epoch": 0.4906927921623513, - "grad_norm": 0.41764209923224177, - "learning_rate": 2.698124892141971e-05, - "loss": 0.5457, + "epoch": 0.9812482507696614, + "grad_norm": 0.25092258580232923, + "learning_rate": 8.045081780505007e-05, + "loss": 0.5252, "step": 3506 }, { - "epoch": 0.49083275017494754, - "grad_norm": 0.4064990179760341, - "learning_rate": 2.6969951057181358e-05, - "loss": 0.5574, + "epoch": 0.9815281276238456, + "grad_norm": 0.24238971814228671, + "learning_rate": 8.043858136498587e-05, + "loss": 0.4997, "step": 3507 }, { - "epoch": 0.49097270818754374, - "grad_norm": 0.420505722395169, - "learning_rate": 2.695865278809807e-05, - "loss": 0.5699, + "epoch": 0.9818080044780296, + "grad_norm": 0.2157044047072059, + "learning_rate": 8.042634202776048e-05, + "loss": 0.5261, "step": 3508 }, { - "epoch": 0.49111266620013994, - "grad_norm": 0.40016382436365205, - "learning_rate": 2.6947354116491763e-05, - "loss": 0.5567, + "epoch": 0.9820878813322138, + "grad_norm": 0.23811875278582165, + "learning_rate": 8.04140997945388e-05, + "loss": 0.5324, "step": 3509 }, { - "epoch": 0.4912526242127362, - "grad_norm": 0.4021262284998323, - "learning_rate": 2.693605504468443e-05, - "loss": 0.5676, + "epoch": 0.982367758186398, + "grad_norm": 0.22645701004467556, + "learning_rate": 8.040185466648608e-05, + "loss": 0.5334, "step": 3510 }, { - "epoch": 0.4913925822253324, - "grad_norm": 0.39530073122269704, - "learning_rate": 2.692475557499813e-05, - "loss": 0.5328, + "epoch": 0.9826476350405822, + "grad_norm": 0.21380010388065268, + "learning_rate": 8.038960664476782e-05, + "loss": 0.5134, "step": 3511 }, { - "epoch": 0.49153254023792864, - "grad_norm": 0.397282341609004, - "learning_rate": 2.6913455709755042e-05, - "loss": 0.5567, + "epoch": 0.9829275118947663, + "grad_norm": 0.20867270737824553, + "learning_rate": 8.037735573054979e-05, + "loss": 0.5184, "step": 3512 }, { - "epoch": 0.49167249825052484, - "grad_norm": 0.41008312840461486, - "learning_rate": 2.6902155451277377e-05, - "loss": 0.5393, + "epoch": 0.9832073887489504, + "grad_norm": 0.6408505850508476, + "learning_rate": 8.036510192499803e-05, + "loss": 0.5233, "step": 3513 }, { - "epoch": 0.4918124562631211, - "grad_norm": 0.4096259675286922, - "learning_rate": 2.6890854801887478e-05, - "loss": 0.5951, + "epoch": 0.9834872656031346, + "grad_norm": 0.2604155985197762, + "learning_rate": 8.035284522927888e-05, + "loss": 0.5459, "step": 3514 }, { - "epoch": 0.4919524142757173, - "grad_norm": 0.3919817250852191, - "learning_rate": 2.6879553763907726e-05, - "loss": 0.536, + "epoch": 0.9837671424573188, + "grad_norm": 0.2143759329653001, + "learning_rate": 8.034058564455892e-05, + "loss": 0.5126, "step": 3515 }, { - "epoch": 0.4920923722883135, - "grad_norm": 0.39561344260712744, - "learning_rate": 2.686825233966061e-05, - "loss": 0.5536, + "epoch": 0.984047019311503, + "grad_norm": 0.2254522149470925, + "learning_rate": 8.032832317200506e-05, + "loss": 0.5244, "step": 3516 }, { - "epoch": 0.49223233030090974, - "grad_norm": 0.409797831934292, - "learning_rate": 2.685695053146868e-05, - "loss": 0.5395, + "epoch": 0.9843268961656871, + "grad_norm": 0.22296227768383173, + "learning_rate": 8.031605781278442e-05, + "loss": 0.5425, "step": 3517 }, { - "epoch": 0.49237228831350593, - "grad_norm": 0.3998242277512286, - "learning_rate": 2.684564834165457e-05, - "loss": 0.5404, + "epoch": 0.9846067730198712, + "grad_norm": 0.23808888295866004, + "learning_rate": 8.030378956806442e-05, + "loss": 0.5121, "step": 3518 }, { - "epoch": 0.4925122463261022, - "grad_norm": 0.4267398317265081, - "learning_rate": 2.6834345772541002e-05, - "loss": 0.5711, + "epoch": 0.9848866498740554, + "grad_norm": 0.21628251679951602, + "learning_rate": 8.029151843901277e-05, + "loss": 0.5105, "step": 3519 }, { - "epoch": 0.4926522043386984, - "grad_norm": 0.42572746496869157, - "learning_rate": 2.6823042826450774e-05, - "loss": 0.5921, + "epoch": 0.9851665267282396, + "grad_norm": 0.22557051976823203, + "learning_rate": 8.027924442679744e-05, + "loss": 0.5168, "step": 3520 }, { - "epoch": 0.49279216235129464, - "grad_norm": 0.400101008335866, - "learning_rate": 2.681173950570674e-05, - "loss": 0.5325, + "epoch": 0.9854464035824237, + "grad_norm": 0.8047584846343594, + "learning_rate": 8.026696753258666e-05, + "loss": 0.5362, "step": 3521 }, { - "epoch": 0.49293212036389084, - "grad_norm": 0.4029901094408229, - "learning_rate": 2.6800435812631854e-05, - "loss": 0.5678, + "epoch": 0.9857262804366079, + "grad_norm": 0.2257322789750837, + "learning_rate": 8.025468775754899e-05, + "loss": 0.5411, "step": 3522 }, { - "epoch": 0.49307207837648703, - "grad_norm": 0.41541374559781874, - "learning_rate": 2.678913174954914e-05, - "loss": 0.5285, + "epoch": 0.9860061572907921, + "grad_norm": 0.21725479117049543, + "learning_rate": 8.024240510285321e-05, + "loss": 0.5273, "step": 3523 }, { - "epoch": 0.4932120363890833, - "grad_norm": 0.4370900148410592, - "learning_rate": 2.6777827318781697e-05, - "loss": 0.5666, + "epoch": 0.9862860341449762, + "grad_norm": 0.2194162271385188, + "learning_rate": 8.023011956966837e-05, + "loss": 0.5359, "step": 3524 }, { - "epoch": 0.4933519944016795, - "grad_norm": 0.42959019276244964, - "learning_rate": 2.6766522522652704e-05, - "loss": 0.5402, + "epoch": 0.9865659109991604, + "grad_norm": 0.25876227964758386, + "learning_rate": 8.021783115916384e-05, + "loss": 0.535, "step": 3525 }, { - "epoch": 0.49349195241427574, - "grad_norm": 0.40725453736472333, - "learning_rate": 2.6755217363485406e-05, - "loss": 0.5171, + "epoch": 0.9868457878533445, + "grad_norm": 0.2191099552855977, + "learning_rate": 8.020553987250921e-05, + "loss": 0.4944, "step": 3526 }, { - "epoch": 0.49363191042687193, - "grad_norm": 0.40719701171899414, - "learning_rate": 2.674391184360313e-05, - "loss": 0.5283, + "epoch": 0.9871256647075287, + "grad_norm": 0.217320249188783, + "learning_rate": 8.019324571087441e-05, + "loss": 0.5326, "step": 3527 }, { - "epoch": 0.4937718684394682, - "grad_norm": 0.40279902099076637, - "learning_rate": 2.6732605965329283e-05, - "loss": 0.5477, + "epoch": 0.9874055415617129, + "grad_norm": 0.2204292530846392, + "learning_rate": 8.018094867542956e-05, + "loss": 0.5304, "step": 3528 }, { - "epoch": 0.4939118264520644, - "grad_norm": 0.3802988661016701, - "learning_rate": 2.6721299730987324e-05, - "loss": 0.548, + "epoch": 0.987685418415897, + "grad_norm": 0.48734436501989903, + "learning_rate": 8.016864876734514e-05, + "loss": 0.5339, "step": 3529 }, { - "epoch": 0.4940517844646606, - "grad_norm": 0.42211573921030193, - "learning_rate": 2.670999314290081e-05, - "loss": 0.5365, + "epoch": 0.9879652952700811, + "grad_norm": 0.22022547951211005, + "learning_rate": 8.015634598779185e-05, + "loss": 0.5736, "step": 3530 }, { - "epoch": 0.49419174247725683, - "grad_norm": 0.4001947861590466, - "learning_rate": 2.6698686203393354e-05, - "loss": 0.5495, + "epoch": 0.9882451721242653, + "grad_norm": 0.7229887197064508, + "learning_rate": 8.014404033794065e-05, + "loss": 0.5232, "step": 3531 }, { - "epoch": 0.49433170048985303, - "grad_norm": 0.4075041830130491, - "learning_rate": 2.6687378914788645e-05, - "loss": 0.5598, + "epoch": 0.9885250489784495, + "grad_norm": 0.3084444243431006, + "learning_rate": 8.013173181896283e-05, + "loss": 0.5266, "step": 3532 }, { - "epoch": 0.4944716585024493, - "grad_norm": 0.42826625295627585, - "learning_rate": 2.6676071279410448e-05, - "loss": 0.546, + "epoch": 0.9888049258326337, + "grad_norm": 0.22077699051425648, + "learning_rate": 8.01194204320299e-05, + "loss": 0.509, "step": 3533 }, { - "epoch": 0.4946116165150455, - "grad_norm": 0.4325314823046203, - "learning_rate": 2.6664763299582602e-05, - "loss": 0.5549, + "epoch": 0.9890848026868178, + "grad_norm": 0.21499207519422178, + "learning_rate": 8.010710617831367e-05, + "loss": 0.5092, "step": 3534 }, { - "epoch": 0.4947515745276417, - "grad_norm": 0.4257334322531565, - "learning_rate": 2.6653454977629e-05, - "loss": 0.5714, + "epoch": 0.9893646795410019, + "grad_norm": 0.21870865777949547, + "learning_rate": 8.009478905898624e-05, + "loss": 0.5157, "step": 3535 }, { - "epoch": 0.49489153254023793, - "grad_norm": 0.41145085533484926, - "learning_rate": 2.6642146315873622e-05, - "loss": 0.5637, + "epoch": 0.9896445563951861, + "grad_norm": 0.23674757663998383, + "learning_rate": 8.008246907521995e-05, + "loss": 0.5036, "step": 3536 }, { - "epoch": 0.49503149055283413, - "grad_norm": 0.41951469265452185, - "learning_rate": 2.6630837316640523e-05, - "loss": 0.5531, + "epoch": 0.9899244332493703, + "grad_norm": 0.22855947816976926, + "learning_rate": 8.00701462281874e-05, + "loss": 0.5349, "step": 3537 }, { - "epoch": 0.4951714485654304, - "grad_norm": 0.42251053392716215, - "learning_rate": 2.6619527982253794e-05, - "loss": 0.5631, + "epoch": 0.9902043101035545, + "grad_norm": 0.2087842790298733, + "learning_rate": 8.00578205190615e-05, + "loss": 0.491, "step": 3538 }, { - "epoch": 0.4953114065780266, - "grad_norm": 0.4071751122281306, - "learning_rate": 2.6608218315037648e-05, - "loss": 0.5791, + "epoch": 0.9904841869577385, + "grad_norm": 0.21314197387860112, + "learning_rate": 8.004549194901542e-05, + "loss": 0.5285, "step": 3539 }, { - "epoch": 0.49545136459062283, - "grad_norm": 0.41230607754128246, - "learning_rate": 2.659690831731631e-05, - "loss": 0.533, + "epoch": 0.9907640638119227, + "grad_norm": 0.2447348765486558, + "learning_rate": 8.00331605192226e-05, + "loss": 0.5288, "step": 3540 }, { - "epoch": 0.49559132260321903, - "grad_norm": 0.4049581094697219, - "learning_rate": 2.6585597991414114e-05, - "loss": 0.5314, + "epoch": 0.9910439406661069, + "grad_norm": 0.4706807986021696, + "learning_rate": 8.002082623085675e-05, + "loss": 0.5015, "step": 3541 }, { - "epoch": 0.49573128061581523, - "grad_norm": 0.38934941480909324, - "learning_rate": 2.6574287339655447e-05, - "loss": 0.5359, + "epoch": 0.9913238175202911, + "grad_norm": 0.22781388811872166, + "learning_rate": 8.000848908509187e-05, + "loss": 0.5325, "step": 3542 }, { - "epoch": 0.4958712386284115, - "grad_norm": 0.4183426624654952, - "learning_rate": 2.656297636436475e-05, - "loss": 0.5885, + "epoch": 0.9916036943744753, + "grad_norm": 0.2190282990322059, + "learning_rate": 7.999614908310218e-05, + "loss": 0.5385, "step": 3543 }, { - "epoch": 0.4960111966410077, - "grad_norm": 0.4426159573559465, - "learning_rate": 2.6551665067866556e-05, - "loss": 0.5809, + "epoch": 0.9918835712286594, + "grad_norm": 0.22792660575310014, + "learning_rate": 7.998380622606224e-05, + "loss": 0.5231, "step": 3544 }, { - "epoch": 0.49615115465360393, - "grad_norm": 0.5149102513551677, - "learning_rate": 2.6540353452485443e-05, - "loss": 0.5692, + "epoch": 0.9921634480828435, + "grad_norm": 0.2266927437256646, + "learning_rate": 7.997146051514685e-05, + "loss": 0.5361, "step": 3545 }, { - "epoch": 0.49629111266620013, - "grad_norm": 0.4109744615781001, - "learning_rate": 2.652904152054607e-05, - "loss": 0.5789, + "epoch": 0.9924433249370277, + "grad_norm": 0.30535923620371125, + "learning_rate": 7.995911195153105e-05, + "loss": 0.5236, "step": 3546 }, { - "epoch": 0.4964310706787964, - "grad_norm": 0.44177673319774247, - "learning_rate": 2.651772927437315e-05, - "loss": 0.5912, + "epoch": 0.9927232017912119, + "grad_norm": 0.23941019401490524, + "learning_rate": 7.994676053639024e-05, + "loss": 0.5475, "step": 3547 }, { - "epoch": 0.4965710286913926, - "grad_norm": 0.4052859032621841, - "learning_rate": 2.6506416716291465e-05, - "loss": 0.5616, + "epoch": 0.993003078645396, + "grad_norm": 0.22715410710121234, + "learning_rate": 7.993440627089996e-05, + "loss": 0.4979, "step": 3548 }, { - "epoch": 0.4967109867039888, - "grad_norm": 0.393563528563957, - "learning_rate": 2.649510384862586e-05, - "loss": 0.5379, + "epoch": 0.9932829554995802, + "grad_norm": 0.3993189271581588, + "learning_rate": 7.992204915623615e-05, + "loss": 0.518, "step": 3549 }, { - "epoch": 0.49685094471658503, - "grad_norm": 0.41866504482839206, - "learning_rate": 2.6483790673701242e-05, - "loss": 0.5486, + "epoch": 0.9935628323537643, + "grad_norm": 0.2214579350273138, + "learning_rate": 7.990968919357498e-05, + "loss": 0.5389, "step": 3550 }, { - "epoch": 0.4969909027291812, - "grad_norm": 0.41154455431337716, - "learning_rate": 2.6472477193842583e-05, - "loss": 0.5647, + "epoch": 0.9938427092079485, + "grad_norm": 0.22771130491834968, + "learning_rate": 7.989732638409282e-05, + "loss": 0.5167, "step": 3551 }, { - "epoch": 0.4971308607417775, - "grad_norm": 0.39736456239087703, - "learning_rate": 2.6461163411374923e-05, - "loss": 0.5901, + "epoch": 0.9941225860621327, + "grad_norm": 0.22670484308075003, + "learning_rate": 7.98849607289664e-05, + "loss": 0.5346, "step": 3552 }, { - "epoch": 0.4972708187543737, - "grad_norm": 0.4060568204824789, - "learning_rate": 2.6449849328623355e-05, - "loss": 0.5435, + "epoch": 0.9944024629163168, + "grad_norm": 0.2112443650005292, + "learning_rate": 7.987259222937272e-05, + "loss": 0.5308, "step": 3553 }, { - "epoch": 0.49741077676696993, - "grad_norm": 0.4115697803723554, - "learning_rate": 2.6438534947913047e-05, - "loss": 0.5655, + "epoch": 0.994682339770501, + "grad_norm": 0.27579278071876434, + "learning_rate": 7.986022088648896e-05, + "loss": 0.5533, "step": 3554 }, { - "epoch": 0.49755073477956613, - "grad_norm": 0.3956194134720466, - "learning_rate": 2.6427220271569203e-05, - "loss": 0.5394, + "epoch": 0.9949622166246851, + "grad_norm": 0.20925224837009185, + "learning_rate": 7.984784670149267e-05, + "loss": 0.4939, "step": 3555 }, { - "epoch": 0.4976906927921623, - "grad_norm": 0.38712507902856763, - "learning_rate": 2.6415905301917114e-05, - "loss": 0.5345, + "epoch": 0.9952420934788693, + "grad_norm": 0.2269621090205349, + "learning_rate": 7.983546967556165e-05, + "loss": 0.5224, "step": 3556 }, { - "epoch": 0.4978306508047586, - "grad_norm": 0.9891507537143042, - "learning_rate": 2.6404590041282116e-05, - "loss": 0.5452, + "epoch": 0.9955219703330535, + "grad_norm": 0.22863293485275069, + "learning_rate": 7.982308980987389e-05, + "loss": 0.5326, "step": 3557 }, { - "epoch": 0.4979706088173548, - "grad_norm": 0.40961536120801306, - "learning_rate": 2.6393274491989617e-05, - "loss": 0.5193, + "epoch": 0.9958018471872376, + "grad_norm": 0.22493675349205328, + "learning_rate": 7.981070710560777e-05, + "loss": 0.5298, "step": 3558 }, { - "epoch": 0.49811056682995103, - "grad_norm": 0.43943809296308844, - "learning_rate": 2.6381958656365073e-05, - "loss": 0.5351, + "epoch": 0.9960817240414218, + "grad_norm": 0.23209458196468175, + "learning_rate": 7.979832156394185e-05, + "loss": 0.5164, "step": 3559 }, { - "epoch": 0.4982505248425472, - "grad_norm": 0.4108272260762031, - "learning_rate": 2.6370642536734004e-05, - "loss": 0.5348, + "epoch": 0.9963616008956059, + "grad_norm": 0.2128829501010205, + "learning_rate": 7.978593318605502e-05, + "loss": 0.5206, "step": 3560 }, { - "epoch": 0.4983904828551435, - "grad_norm": 0.4119616541998687, - "learning_rate": 2.6359326135421986e-05, - "loss": 0.5255, + "epoch": 0.9966414777497901, + "grad_norm": 0.219681850578271, + "learning_rate": 7.977354197312638e-05, + "loss": 0.5278, "step": 3561 }, { - "epoch": 0.4985304408677397, - "grad_norm": 0.4495829632664617, - "learning_rate": 2.6348009454754653e-05, - "loss": 0.5828, + "epoch": 0.9969213546039742, + "grad_norm": 0.22295759058538542, + "learning_rate": 7.976114792633536e-05, + "loss": 0.5179, "step": 3562 }, { - "epoch": 0.4986703988803359, - "grad_norm": 0.41137247203721966, - "learning_rate": 2.6336692497057696e-05, - "loss": 0.5541, + "epoch": 0.9972012314581584, + "grad_norm": 0.2113438981839933, + "learning_rate": 7.974875104686163e-05, + "loss": 0.4972, "step": 3563 }, { - "epoch": 0.4988103568929321, - "grad_norm": 0.4367447061362204, - "learning_rate": 2.632537526465687e-05, - "loss": 0.5993, + "epoch": 0.9974811083123426, + "grad_norm": 0.2138013327802012, + "learning_rate": 7.973635133588513e-05, + "loss": 0.521, "step": 3564 }, { - "epoch": 0.4989503149055283, - "grad_norm": 0.4326908998762443, - "learning_rate": 2.6314057759877985e-05, - "loss": 0.5765, + "epoch": 0.9977609851665268, + "grad_norm": 0.22029413818449997, + "learning_rate": 7.972394879458605e-05, + "loss": 0.546, "step": 3565 }, { - "epoch": 0.4990902729181246, - "grad_norm": 0.4192288623903318, - "learning_rate": 2.6302739985046898e-05, - "loss": 0.5376, + "epoch": 0.9980408620207109, + "grad_norm": 0.22049509263757874, + "learning_rate": 7.971154342414489e-05, + "loss": 0.5145, "step": 3566 }, { - "epoch": 0.4992302309307208, - "grad_norm": 0.4248836776012947, - "learning_rate": 2.629142194248952e-05, - "loss": 0.5938, + "epoch": 0.998320738874895, + "grad_norm": 0.21986872037743224, + "learning_rate": 7.96991352257424e-05, + "loss": 0.5127, "step": 3567 }, { - "epoch": 0.49937018894331703, - "grad_norm": 0.4047565365438917, - "learning_rate": 2.6280103634531833e-05, - "loss": 0.5811, + "epoch": 0.9986006157290792, + "grad_norm": 0.21480916211150758, + "learning_rate": 7.968672420055958e-05, + "loss": 0.5257, "step": 3568 }, { - "epoch": 0.4995101469559132, - "grad_norm": 0.4144316245441231, - "learning_rate": 2.626878506349986e-05, - "loss": 0.5902, + "epoch": 0.9988804925832634, + "grad_norm": 0.23421886641376274, + "learning_rate": 7.967431034977775e-05, + "loss": 0.5385, "step": 3569 }, { - "epoch": 0.4996501049685094, - "grad_norm": 0.4178965245669895, - "learning_rate": 2.625746623171968e-05, - "loss": 0.525, + "epoch": 0.9991603694374476, + "grad_norm": 0.23289558241292804, + "learning_rate": 7.966189367457844e-05, + "loss": 0.5485, "step": 3570 }, { - "epoch": 0.4997900629811057, - "grad_norm": 0.4159969957434166, - "learning_rate": 2.624614714151743e-05, - "loss": 0.5576, + "epoch": 0.9994402462916316, + "grad_norm": 0.2140992292192523, + "learning_rate": 7.96494741761435e-05, + "loss": 0.5122, "step": 3571 }, { - "epoch": 0.4999300209937019, - "grad_norm": 0.40173942508061367, - "learning_rate": 2.6234827795219297e-05, - "loss": 0.5358, + "epoch": 0.9997201231458158, + "grad_norm": 0.22440790715798145, + "learning_rate": 7.9637051855655e-05, + "loss": 0.5258, "step": 3572 }, { - "epoch": 0.5000699790062981, - "grad_norm": 0.41640033764604484, - "learning_rate": 2.622350819515153e-05, - "loss": 0.5571, + "epoch": 1.0, + "grad_norm": 0.26865370894700985, + "learning_rate": 7.962462671429532e-05, + "loss": 0.5524, "step": 3573 }, { - "epoch": 0.5002099370188944, - "grad_norm": 0.40378307872853386, - "learning_rate": 2.621218834364041e-05, - "loss": 0.5472, + "epoch": 1.000279876854184, + "grad_norm": 0.2151522824258909, + "learning_rate": 7.961219875324709e-05, + "loss": 0.5202, "step": 3574 }, { - "epoch": 0.5003498950314905, - "grad_norm": 0.398494511998799, - "learning_rate": 2.620086824301229e-05, - "loss": 0.5331, + "epoch": 1.0005597537083684, + "grad_norm": 0.2145821490209512, + "learning_rate": 7.959976797369322e-05, + "loss": 0.505, "step": 3575 }, { - "epoch": 0.5004898530440868, - "grad_norm": 0.40959256585724063, - "learning_rate": 2.6189547895593562e-05, - "loss": 0.5496, + "epoch": 1.0008396305625524, + "grad_norm": 0.2397604540898404, + "learning_rate": 7.958733437681685e-05, + "loss": 0.505, "step": 3576 }, { - "epoch": 0.500629811056683, - "grad_norm": 0.41352440579598176, - "learning_rate": 2.6178227303710673e-05, - "loss": 0.5697, + "epoch": 1.0011195074167367, + "grad_norm": 0.21141014949735462, + "learning_rate": 7.957489796380143e-05, + "loss": 0.4845, "step": 3577 }, { - "epoch": 0.5007697690692792, - "grad_norm": 0.408240609494089, - "learning_rate": 2.616690646969011e-05, - "loss": 0.56, + "epoch": 1.0013993842709208, + "grad_norm": 0.2579292044721452, + "learning_rate": 7.956245873583068e-05, + "loss": 0.5226, "step": 3578 }, { - "epoch": 0.5009097270818754, - "grad_norm": 0.3832170219371199, - "learning_rate": 2.6155585395858435e-05, - "loss": 0.523, + "epoch": 1.0016792611251049, + "grad_norm": 0.2132179301411428, + "learning_rate": 7.955001669408854e-05, + "loss": 0.5024, "step": 3579 }, { - "epoch": 0.5010496850944717, - "grad_norm": 0.405736469986696, - "learning_rate": 2.6144264084542224e-05, - "loss": 0.5743, + "epoch": 1.0019591379792891, + "grad_norm": 0.21816584767506073, + "learning_rate": 7.95375718397593e-05, + "loss": 0.5057, "step": 3580 }, { - "epoch": 0.5011896431070679, - "grad_norm": 0.4036128469748438, - "learning_rate": 2.6132942538068146e-05, - "loss": 0.5807, + "epoch": 1.0022390148334732, + "grad_norm": 0.21752783780863907, + "learning_rate": 7.952512417402743e-05, + "loss": 0.475, "step": 3581 }, { - "epoch": 0.5013296011196641, - "grad_norm": 0.40173202764431026, - "learning_rate": 2.6121620758762877e-05, - "loss": 0.5703, + "epoch": 1.0025188916876575, + "grad_norm": 0.22963190478577222, + "learning_rate": 7.95126736980777e-05, + "loss": 0.489, "step": 3582 }, { - "epoch": 0.5014695591322603, - "grad_norm": 0.42011031965776624, - "learning_rate": 2.6110298748953153e-05, - "loss": 0.5602, + "epoch": 1.0027987685418416, + "grad_norm": 0.23025272572685854, + "learning_rate": 7.95002204130952e-05, + "loss": 0.5092, "step": 3583 }, { - "epoch": 0.5016095171448566, - "grad_norm": 0.39388389881683084, - "learning_rate": 2.6098976510965788e-05, - "loss": 0.5268, + "epoch": 1.0030786453960256, + "grad_norm": 0.2260023492559017, + "learning_rate": 7.94877643202652e-05, + "loss": 0.501, "step": 3584 }, { - "epoch": 0.5017494751574527, - "grad_norm": 0.5531371102734028, - "learning_rate": 2.6087654047127587e-05, - "loss": 0.5644, + "epoch": 1.00335852225021, + "grad_norm": 0.255758955202744, + "learning_rate": 7.947530542077326e-05, + "loss": 0.5082, "step": 3585 }, { - "epoch": 0.501889433170049, - "grad_norm": 0.40679492045140125, - "learning_rate": 2.6076331359765448e-05, - "loss": 0.5047, + "epoch": 1.003638399104394, + "grad_norm": 0.2229700476877229, + "learning_rate": 7.946284371580526e-05, + "loss": 0.5073, "step": 3586 }, { - "epoch": 0.5020293911826452, - "grad_norm": 0.4165461319341358, - "learning_rate": 2.6065008451206296e-05, - "loss": 0.5296, + "epoch": 1.0039182759585783, + "grad_norm": 0.21423101759819524, + "learning_rate": 7.945037920654733e-05, + "loss": 0.504, "step": 3587 }, { - "epoch": 0.5021693491952415, - "grad_norm": 0.39190580835869326, - "learning_rate": 2.60536853237771e-05, - "loss": 0.5346, + "epoch": 1.0041981528127624, + "grad_norm": 0.22091810849257734, + "learning_rate": 7.943791189418579e-05, + "loss": 0.5041, "step": 3588 }, { - "epoch": 0.5023093072078376, - "grad_norm": 0.3991014002270092, - "learning_rate": 2.6042361979804874e-05, - "loss": 0.5006, + "epoch": 1.0044780296669464, + "grad_norm": 0.22291839525112908, + "learning_rate": 7.942544177990734e-05, + "loss": 0.5065, "step": 3589 }, { - "epoch": 0.5024492652204339, - "grad_norm": 0.4371615498576483, - "learning_rate": 2.6031038421616683e-05, - "loss": 0.5738, + "epoch": 1.0047579065211307, + "grad_norm": 0.21788969754061052, + "learning_rate": 7.941296886489888e-05, + "loss": 0.5031, "step": 3590 }, { - "epoch": 0.5025892232330301, - "grad_norm": 0.42427696712386304, - "learning_rate": 2.6019714651539646e-05, - "loss": 0.5857, + "epoch": 1.0050377833753148, + "grad_norm": 0.22905497358669408, + "learning_rate": 7.940049315034755e-05, + "loss": 0.5205, "step": 3591 }, { - "epoch": 0.5027291812456263, - "grad_norm": 0.4155090445862282, - "learning_rate": 2.600839067190089e-05, - "loss": 0.584, + "epoch": 1.005317660229499, + "grad_norm": 0.22599453526951566, + "learning_rate": 7.938801463744084e-05, + "loss": 0.5014, "step": 3592 }, { - "epoch": 0.5028691392582225, - "grad_norm": 0.4233864118860884, - "learning_rate": 2.5997066485027626e-05, - "loss": 0.5393, + "epoch": 1.0055975370836832, + "grad_norm": 0.2308528471667276, + "learning_rate": 7.937553332736646e-05, + "loss": 0.5017, "step": 3593 }, { - "epoch": 0.5030090972708188, - "grad_norm": 0.4286876156223743, - "learning_rate": 2.5985742093247078e-05, - "loss": 0.5787, + "epoch": 1.0058774139378674, + "grad_norm": 0.2263486323970854, + "learning_rate": 7.936304922131238e-05, + "loss": 0.494, "step": 3594 }, { - "epoch": 0.503149055283415, - "grad_norm": 0.4103875069797372, - "learning_rate": 2.5974417498886532e-05, - "loss": 0.5617, + "epoch": 1.0061572907920515, + "grad_norm": 0.2257292243070338, + "learning_rate": 7.93505623204668e-05, + "loss": 0.5268, "step": 3595 }, { - "epoch": 0.5032890132960112, - "grad_norm": 0.43666777401439383, - "learning_rate": 2.59630927042733e-05, - "loss": 0.6215, + "epoch": 1.0064371676462356, + "grad_norm": 0.22837865013465125, + "learning_rate": 7.93380726260183e-05, + "loss": 0.49, "step": 3596 }, { - "epoch": 0.5034289713086074, - "grad_norm": 0.3967739646837927, - "learning_rate": 2.5951767711734753e-05, - "loss": 0.5521, + "epoch": 1.0067170445004199, + "grad_norm": 0.22308161880337282, + "learning_rate": 7.932558013915562e-05, + "loss": 0.5322, "step": 3597 }, { - "epoch": 0.5035689293212037, - "grad_norm": 0.4134700097976131, - "learning_rate": 2.594044252359828e-05, - "loss": 0.5564, + "epoch": 1.006996921354604, + "grad_norm": 0.21222487192372674, + "learning_rate": 7.931308486106782e-05, + "loss": 0.4966, "step": 3598 }, { - "epoch": 0.5037088873337998, - "grad_norm": 0.4193374425121163, - "learning_rate": 2.592911714219132e-05, - "loss": 0.5436, + "epoch": 1.0072767982087882, + "grad_norm": 0.21715491769174602, + "learning_rate": 7.930058679294418e-05, + "loss": 0.4965, "step": 3599 }, { - "epoch": 0.5038488453463961, - "grad_norm": 0.41979302117803585, - "learning_rate": 2.591779156984137e-05, - "loss": 0.5439, + "epoch": 1.0075566750629723, + "grad_norm": 0.21062504848809954, + "learning_rate": 7.92880859359743e-05, + "loss": 0.4712, "step": 3600 }, { - "epoch": 0.5039888033589923, - "grad_norm": 0.40958102976844213, - "learning_rate": 2.590646580887593e-05, - "loss": 0.5436, + "epoch": 1.0078365519171564, + "grad_norm": 0.2254344063296754, + "learning_rate": 7.9275582291348e-05, + "loss": 0.501, "step": 3601 }, { - "epoch": 0.5041287613715886, - "grad_norm": 0.4201739272976801, - "learning_rate": 2.589513986162258e-05, - "loss": 0.5466, + "epoch": 1.0081164287713407, + "grad_norm": 0.2193290328015753, + "learning_rate": 7.926307586025539e-05, + "loss": 0.5079, "step": 3602 }, { - "epoch": 0.5042687193841847, - "grad_norm": 0.4258535420468519, - "learning_rate": 2.5883813730408894e-05, - "loss": 0.58, + "epoch": 1.0083963056255247, + "grad_norm": 0.2209881490859096, + "learning_rate": 7.925056664388683e-05, + "loss": 0.5096, "step": 3603 }, { - "epoch": 0.504408677396781, - "grad_norm": 0.41534287545104986, - "learning_rate": 2.587248741756253e-05, - "loss": 0.576, + "epoch": 1.008676182479709, + "grad_norm": 0.23068930444761246, + "learning_rate": 7.9238054643433e-05, + "loss": 0.5264, "step": 3604 }, { - "epoch": 0.5045486354093772, - "grad_norm": 0.42287342650084503, - "learning_rate": 2.5861160925411138e-05, - "loss": 0.5832, + "epoch": 1.008956059333893, + "grad_norm": 0.21715947474168884, + "learning_rate": 7.922553986008472e-05, + "loss": 0.4934, "step": 3605 }, { - "epoch": 0.5046885934219734, - "grad_norm": 0.4501273841496384, - "learning_rate": 2.5849834256282447e-05, - "loss": 0.6017, + "epoch": 1.0092359361880772, + "grad_norm": 0.22774236254353156, + "learning_rate": 7.921302229503323e-05, + "loss": 0.5219, "step": 3606 }, { - "epoch": 0.5048285514345696, - "grad_norm": 0.4274714205267487, - "learning_rate": 2.5838507412504187e-05, - "loss": 0.5484, + "epoch": 1.0095158130422615, + "grad_norm": 0.2434574119410726, + "learning_rate": 7.92005019494699e-05, + "loss": 0.5219, "step": 3607 }, { - "epoch": 0.5049685094471659, - "grad_norm": 0.3888114387545627, - "learning_rate": 2.5827180396404156e-05, - "loss": 0.5101, + "epoch": 1.0097956898964455, + "grad_norm": 0.21282961651133284, + "learning_rate": 7.918797882458649e-05, + "loss": 0.5087, "step": 3608 }, { - "epoch": 0.5051084674597621, - "grad_norm": 0.43140565124104524, - "learning_rate": 2.5815853210310152e-05, - "loss": 0.5742, + "epoch": 1.0100755667506298, + "grad_norm": 0.2298090155797842, + "learning_rate": 7.917545292157489e-05, + "loss": 0.5081, "step": 3609 }, { - "epoch": 0.5052484254723583, - "grad_norm": 0.41280804310487207, - "learning_rate": 2.580452585655004e-05, - "loss": 0.5187, + "epoch": 1.0103554436048139, + "grad_norm": 0.22865017098908946, + "learning_rate": 7.916292424162735e-05, + "loss": 0.506, "step": 3610 }, { - "epoch": 0.5053883834849545, - "grad_norm": 0.4010119649911929, - "learning_rate": 2.5793198337451696e-05, - "loss": 0.5268, + "epoch": 1.010635320458998, + "grad_norm": 0.22999448688734112, + "learning_rate": 7.915039278593637e-05, + "loss": 0.5068, "step": 3611 }, { - "epoch": 0.5055283414975508, - "grad_norm": 0.43498896699155226, - "learning_rate": 2.5781870655343045e-05, - "loss": 0.5975, + "epoch": 1.0109151973131822, + "grad_norm": 0.2301902343868322, + "learning_rate": 7.913785855569466e-05, + "loss": 0.51, "step": 3612 }, { - "epoch": 0.5056682995101469, - "grad_norm": 0.3871046921614993, - "learning_rate": 2.5770542812552047e-05, - "loss": 0.5777, + "epoch": 1.0111950741673663, + "grad_norm": 0.22166767646491686, + "learning_rate": 7.91253215520953e-05, + "loss": 0.4878, "step": 3613 }, { - "epoch": 0.5058082575227432, - "grad_norm": 0.3836731432807253, - "learning_rate": 2.5759214811406678e-05, - "loss": 0.5591, + "epoch": 1.0114749510215506, + "grad_norm": 0.22027238357501053, + "learning_rate": 7.911278177633151e-05, + "loss": 0.4995, "step": 3614 }, { - "epoch": 0.5059482155353394, - "grad_norm": 0.4186045435948251, - "learning_rate": 2.5747886654234967e-05, - "loss": 0.5929, + "epoch": 1.0117548278757347, + "grad_norm": 0.23521438900435754, + "learning_rate": 7.910023922959686e-05, + "loss": 0.5192, "step": 3615 }, { - "epoch": 0.5060881735479357, - "grad_norm": 0.3989221255230432, - "learning_rate": 2.5736558343364953e-05, - "loss": 0.5484, + "epoch": 1.0120347047299187, + "grad_norm": 0.23505219062362195, + "learning_rate": 7.908769391308517e-05, + "loss": 0.5199, "step": 3616 }, { - "epoch": 0.5062281315605318, - "grad_norm": 0.3802059534422492, - "learning_rate": 2.5725229881124734e-05, - "loss": 0.5194, + "epoch": 1.012314581584103, + "grad_norm": 0.22162025503854033, + "learning_rate": 7.907514582799047e-05, + "loss": 0.4905, "step": 3617 }, { - "epoch": 0.5063680895731281, - "grad_norm": 0.40770746004051617, - "learning_rate": 2.5713901269842404e-05, - "loss": 0.5294, + "epoch": 1.012594458438287, + "grad_norm": 0.22840127255661585, + "learning_rate": 7.906259497550712e-05, + "loss": 0.5159, "step": 3618 }, { - "epoch": 0.5065080475857243, - "grad_norm": 0.4179059282812127, - "learning_rate": 2.5702572511846134e-05, - "loss": 0.56, + "epoch": 1.0128743352924714, + "grad_norm": 0.21422106628265694, + "learning_rate": 7.905004135682971e-05, + "loss": 0.5029, "step": 3619 }, { - "epoch": 0.5066480055983205, - "grad_norm": 0.402518977593649, - "learning_rate": 2.569124360946407e-05, - "loss": 0.5518, + "epoch": 1.0131542121466555, + "grad_norm": 0.21321876589631053, + "learning_rate": 7.903748497315312e-05, + "loss": 0.5045, "step": 3620 }, { - "epoch": 0.5067879636109167, - "grad_norm": 0.40070397027204235, - "learning_rate": 2.5679914565024443e-05, - "loss": 0.5229, + "epoch": 1.0134340890008395, + "grad_norm": 0.2271247535376571, + "learning_rate": 7.902492582567244e-05, + "loss": 0.4885, "step": 3621 }, { - "epoch": 0.506927921623513, - "grad_norm": 0.41261781341428777, - "learning_rate": 2.5668585380855475e-05, - "loss": 0.5716, + "epoch": 1.0137139658550238, + "grad_norm": 0.22542713081875076, + "learning_rate": 7.901236391558309e-05, + "loss": 0.517, "step": 3622 }, { - "epoch": 0.5070678796361092, - "grad_norm": 0.4086792886609616, - "learning_rate": 2.5657256059285417e-05, - "loss": 0.5469, + "epoch": 1.013993842709208, + "grad_norm": 0.21738520054120622, + "learning_rate": 7.899979924408069e-05, + "loss": 0.4939, "step": 3623 }, { - "epoch": 0.5072078376487054, - "grad_norm": 0.41885939317076915, - "learning_rate": 2.564592660264258e-05, - "loss": 0.5896, + "epoch": 1.0142737195633922, + "grad_norm": 0.22862862115632895, + "learning_rate": 7.898723181236116e-05, + "loss": 0.5143, "step": 3624 }, { - "epoch": 0.5073477956613016, - "grad_norm": 0.3962896649868784, - "learning_rate": 2.563459701325526e-05, - "loss": 0.5581, + "epoch": 1.0145535964175763, + "grad_norm": 0.22251220885729586, + "learning_rate": 7.897466162162071e-05, + "loss": 0.4944, "step": 3625 }, { - "epoch": 0.5074877536738979, - "grad_norm": 0.39540667903653226, - "learning_rate": 2.5623267293451826e-05, - "loss": 0.5078, + "epoch": 1.0148334732717603, + "grad_norm": 0.21495788914755032, + "learning_rate": 7.896208867305572e-05, + "loss": 0.4935, "step": 3626 }, { - "epoch": 0.507627711686494, - "grad_norm": 0.42585105817986957, - "learning_rate": 2.5611937445560634e-05, - "loss": 0.5645, + "epoch": 1.0151133501259446, + "grad_norm": 0.2223636056894036, + "learning_rate": 7.894951296786292e-05, + "loss": 0.5013, "step": 3627 }, { - "epoch": 0.5077676696990903, - "grad_norm": 0.38842241786452814, - "learning_rate": 2.5600607471910088e-05, - "loss": 0.574, + "epoch": 1.0153932269801287, + "grad_norm": 0.21313442084895964, + "learning_rate": 7.89369345072393e-05, + "loss": 0.5075, "step": 3628 }, { - "epoch": 0.5079076277116865, - "grad_norm": 0.4605380093032065, - "learning_rate": 2.5589277374828613e-05, - "loss": 0.5743, + "epoch": 1.015673103834313, + "grad_norm": 0.21419141410352954, + "learning_rate": 7.892435329238204e-05, + "loss": 0.5078, "step": 3629 }, { - "epoch": 0.5080475857242828, - "grad_norm": 0.4103790649353433, - "learning_rate": 2.5577947156644655e-05, - "loss": 0.5652, + "epoch": 1.015952980688497, + "grad_norm": 0.21199822486315878, + "learning_rate": 7.891176932448864e-05, + "loss": 0.5208, "step": 3630 }, { - "epoch": 0.5081875437368789, - "grad_norm": 0.4037333605021007, - "learning_rate": 2.55666168196867e-05, - "loss": 0.5374, + "epoch": 1.0162328575426811, + "grad_norm": 0.2178367464258803, + "learning_rate": 7.889918260475685e-05, + "loss": 0.4889, "step": 3631 }, { - "epoch": 0.5083275017494752, - "grad_norm": 0.40487097389779486, - "learning_rate": 2.5555286366283237e-05, - "loss": 0.5631, + "epoch": 1.0165127343968654, + "grad_norm": 0.21567705751207655, + "learning_rate": 7.88865931343847e-05, + "loss": 0.4906, "step": 3632 }, { - "epoch": 0.5084674597620714, - "grad_norm": 0.4082664258736793, - "learning_rate": 2.5543955798762798e-05, - "loss": 0.5598, + "epoch": 1.0167926112510495, + "grad_norm": 0.2217669672523888, + "learning_rate": 7.887400091457043e-05, + "loss": 0.5106, "step": 3633 }, { - "epoch": 0.5086074177746676, - "grad_norm": 0.41864775722779846, - "learning_rate": 2.553262511945391e-05, - "loss": 0.5544, + "epoch": 1.0170724881052338, + "grad_norm": 0.20893848571473597, + "learning_rate": 7.886140594651259e-05, + "loss": 0.4839, "step": 3634 }, { - "epoch": 0.5087473757872638, - "grad_norm": 0.41778038341235235, - "learning_rate": 2.5521294330685165e-05, - "loss": 0.545, + "epoch": 1.0173523649594178, + "grad_norm": 0.21881956483322057, + "learning_rate": 7.884880823140998e-05, + "loss": 0.4953, "step": 3635 }, { - "epoch": 0.5088873337998601, - "grad_norm": 0.3965960636107479, - "learning_rate": 2.550996343478514e-05, - "loss": 0.5629, + "epoch": 1.0176322418136021, + "grad_norm": 0.22237682379708062, + "learning_rate": 7.883620777046167e-05, + "loss": 0.4864, "step": 3636 }, { - "epoch": 0.5090272918124563, - "grad_norm": 0.39741586166804516, - "learning_rate": 2.5498632434082452e-05, - "loss": 0.5261, + "epoch": 1.0179121186677862, + "grad_norm": 0.2293932877642561, + "learning_rate": 7.882360456486696e-05, + "loss": 0.5126, "step": 3637 }, { - "epoch": 0.5091672498250525, - "grad_norm": 0.4036662033874021, - "learning_rate": 2.5487301330905745e-05, - "loss": 0.5913, + "epoch": 1.0181919955219703, + "grad_norm": 0.22595454310736232, + "learning_rate": 7.881099861582542e-05, + "loss": 0.5028, "step": 3638 }, { - "epoch": 0.5093072078376487, - "grad_norm": 0.41863388691988607, - "learning_rate": 2.5475970127583666e-05, - "loss": 0.5442, + "epoch": 1.0184718723761546, + "grad_norm": 0.21979301425980904, + "learning_rate": 7.879838992453691e-05, + "loss": 0.5221, "step": 3639 }, { - "epoch": 0.509447165850245, - "grad_norm": 0.4135621919846456, - "learning_rate": 2.5464638826444904e-05, - "loss": 0.5563, + "epoch": 1.0187517492303386, + "grad_norm": 0.22012884916710354, + "learning_rate": 7.878577849220154e-05, + "loss": 0.4979, "step": 3640 }, { - "epoch": 0.5095871238628411, - "grad_norm": 0.4351748088714143, - "learning_rate": 2.545330742981814e-05, - "loss": 0.5551, + "epoch": 1.019031626084523, + "grad_norm": 0.2172238980519223, + "learning_rate": 7.877316432001965e-05, + "loss": 0.5153, "step": 3641 }, { - "epoch": 0.5097270818754374, - "grad_norm": 0.4234047595420709, - "learning_rate": 2.544197594003211e-05, - "loss": 0.5876, + "epoch": 1.019311502938707, + "grad_norm": 0.2420916094594506, + "learning_rate": 7.876054740919188e-05, + "loss": 0.5226, "step": 3642 }, { - "epoch": 0.5098670398880336, - "grad_norm": 0.4191593238313807, - "learning_rate": 2.5430644359415535e-05, - "loss": 0.5505, + "epoch": 1.019591379792891, + "grad_norm": 0.220317374341056, + "learning_rate": 7.874792776091911e-05, + "loss": 0.4988, "step": 3643 }, { - "epoch": 0.5100069979006299, - "grad_norm": 0.4163770493254072, - "learning_rate": 2.5419312690297176e-05, - "loss": 0.5234, + "epoch": 1.0198712566470753, + "grad_norm": 0.22291240894867526, + "learning_rate": 7.87353053764025e-05, + "loss": 0.5225, "step": 3644 }, { - "epoch": 0.510146955913226, - "grad_norm": 0.4326672484948109, - "learning_rate": 2.54079809350058e-05, - "loss": 0.5565, + "epoch": 1.0201511335012594, + "grad_norm": 0.21868923910777843, + "learning_rate": 7.872268025684342e-05, + "loss": 0.5158, "step": 3645 }, { - "epoch": 0.5102869139258223, - "grad_norm": 0.4293727352297737, - "learning_rate": 2.5396649095870202e-05, - "loss": 0.5651, + "epoch": 1.0204310103554437, + "grad_norm": 0.23874857903317756, + "learning_rate": 7.871005240344356e-05, + "loss": 0.5116, "step": 3646 }, { - "epoch": 0.5104268719384185, - "grad_norm": 0.3928814112239087, - "learning_rate": 2.5385317175219193e-05, - "loss": 0.5223, + "epoch": 1.0207108872096278, + "grad_norm": 0.23305321859946382, + "learning_rate": 7.869742181740484e-05, + "loss": 0.5079, "step": 3647 }, { - "epoch": 0.5105668299510147, - "grad_norm": 0.39498197465504736, - "learning_rate": 2.5373985175381594e-05, - "loss": 0.546, + "epoch": 1.0209907640638118, + "grad_norm": 0.22456830528366797, + "learning_rate": 7.868478849992945e-05, + "loss": 0.5181, "step": 3648 }, { - "epoch": 0.5107067879636109, - "grad_norm": 0.3934270157464578, - "learning_rate": 2.5362653098686245e-05, - "loss": 0.532, + "epoch": 1.0212706409179961, + "grad_norm": 0.2201775470465584, + "learning_rate": 7.867215245221983e-05, + "loss": 0.4907, "step": 3649 }, { - "epoch": 0.5108467459762072, - "grad_norm": 0.41869564796385766, - "learning_rate": 2.5351320947462005e-05, - "loss": 0.5768, + "epoch": 1.0215505177721802, + "grad_norm": 0.2226429327885552, + "learning_rate": 7.86595136754787e-05, + "loss": 0.4961, "step": 3650 }, { - "epoch": 0.5109867039888033, - "grad_norm": 0.42028182516820867, - "learning_rate": 2.533998872403775e-05, - "loss": 0.5859, + "epoch": 1.0218303946263645, + "grad_norm": 0.23263872512360653, + "learning_rate": 7.864687217090901e-05, + "loss": 0.5159, "step": 3651 }, { - "epoch": 0.5111266620013996, - "grad_norm": 0.39465753139334825, - "learning_rate": 2.532865643074236e-05, - "loss": 0.5363, + "epoch": 1.0221102714805486, + "grad_norm": 0.22524051741579265, + "learning_rate": 7.8634227939714e-05, + "loss": 0.5273, "step": 3652 }, { - "epoch": 0.5112666200139958, - "grad_norm": 0.3962177186218334, - "learning_rate": 2.531732406990474e-05, - "loss": 0.5417, + "epoch": 1.0223901483347326, + "grad_norm": 0.22234250746513734, + "learning_rate": 7.862158098309715e-05, + "loss": 0.4821, "step": 3653 }, { - "epoch": 0.5114065780265921, - "grad_norm": 0.4175265234416395, - "learning_rate": 2.5305991643853806e-05, - "loss": 0.542, + "epoch": 1.022670025188917, + "grad_norm": 0.22423880484357808, + "learning_rate": 7.860893130226219e-05, + "loss": 0.5169, "step": 3654 }, { - "epoch": 0.5115465360391882, - "grad_norm": 0.4149054282303681, - "learning_rate": 2.5294659154918475e-05, - "loss": 0.5876, + "epoch": 1.022949902043101, + "grad_norm": 0.22416996618762852, + "learning_rate": 7.859627889841314e-05, + "loss": 0.4965, "step": 3655 }, { - "epoch": 0.5116864940517845, - "grad_norm": 0.433229070178671, - "learning_rate": 2.528332660542771e-05, - "loss": 0.5516, + "epoch": 1.0232297788972853, + "grad_norm": 0.22406438174150115, + "learning_rate": 7.858362377275426e-05, + "loss": 0.5221, "step": 3656 }, { - "epoch": 0.5118264520643807, - "grad_norm": 0.4482742289905204, - "learning_rate": 2.527199399771044e-05, - "loss": 0.5785, + "epoch": 1.0235096557514693, + "grad_norm": 0.2318239704342264, + "learning_rate": 7.857096592649007e-05, + "loss": 0.5206, "step": 3657 }, { - "epoch": 0.5119664100769769, - "grad_norm": 0.41488401148674847, - "learning_rate": 2.526066133409566e-05, - "loss": 0.5107, + "epoch": 1.0237895326056534, + "grad_norm": 0.21836130956054062, + "learning_rate": 7.855830536082536e-05, + "loss": 0.5063, "step": 3658 }, { - "epoch": 0.5121063680895731, - "grad_norm": 0.3865352913695365, - "learning_rate": 2.5249328616912316e-05, - "loss": 0.4911, + "epoch": 1.0240694094598377, + "grad_norm": 0.2219583339403444, + "learning_rate": 7.854564207696514e-05, + "loss": 0.4789, "step": 3659 }, { - "epoch": 0.5122463261021694, - "grad_norm": 0.47638789508637014, - "learning_rate": 2.523799584848942e-05, - "loss": 0.5472, + "epoch": 1.0243492863140218, + "grad_norm": 0.2370270364275783, + "learning_rate": 7.853297607611474e-05, + "loss": 0.4862, "step": 3660 }, { - "epoch": 0.5123862841147656, - "grad_norm": 0.4067671688157654, - "learning_rate": 2.5226663031155954e-05, - "loss": 0.532, + "epoch": 1.024629163168206, + "grad_norm": 0.22431161646064307, + "learning_rate": 7.852030735947972e-05, + "loss": 0.5118, "step": 3661 }, { - "epoch": 0.5125262421273618, - "grad_norm": 0.36222693303435777, - "learning_rate": 2.5215330167240947e-05, - "loss": 0.5085, + "epoch": 1.0249090400223901, + "grad_norm": 0.2283212493299846, + "learning_rate": 7.850763592826587e-05, + "loss": 0.5303, "step": 3662 }, { - "epoch": 0.512666200139958, - "grad_norm": 0.403199238488262, - "learning_rate": 2.52039972590734e-05, - "loss": 0.5497, + "epoch": 1.0251889168765742, + "grad_norm": 0.23409502376978944, + "learning_rate": 7.849496178367928e-05, + "loss": 0.494, "step": 3663 }, { - "epoch": 0.5128061581525543, - "grad_norm": 0.4052356626415982, - "learning_rate": 2.5192664308982344e-05, - "loss": 0.5224, + "epoch": 1.0254687937307585, + "grad_norm": 0.2147483425223085, + "learning_rate": 7.848228492692626e-05, + "loss": 0.5265, "step": 3664 }, { - "epoch": 0.5129461161651504, - "grad_norm": 0.40489338303806344, - "learning_rate": 2.5181331319296825e-05, - "loss": 0.5621, + "epoch": 1.0257486705849426, + "grad_norm": 0.22011900065948645, + "learning_rate": 7.846960535921344e-05, + "loss": 0.4961, "step": 3665 }, { - "epoch": 0.5130860741777467, - "grad_norm": 0.40195392432260063, - "learning_rate": 2.516999829234587e-05, - "loss": 0.5221, + "epoch": 1.0260285474391269, + "grad_norm": 0.21620581271904024, + "learning_rate": 7.845692308174763e-05, + "loss": 0.5238, "step": 3666 }, { - "epoch": 0.5132260321903429, - "grad_norm": 0.44319445941617425, - "learning_rate": 2.515866523045855e-05, - "loss": 0.5607, + "epoch": 1.026308424293311, + "grad_norm": 0.20659401458152649, + "learning_rate": 7.844423809573598e-05, + "loss": 0.5136, "step": 3667 }, { - "epoch": 0.5133659902029392, - "grad_norm": 0.42549718785093565, - "learning_rate": 2.514733213596391e-05, - "loss": 0.5678, + "epoch": 1.026588301147495, + "grad_norm": 0.21696794715860793, + "learning_rate": 7.84315504023858e-05, + "loss": 0.5125, "step": 3668 }, { - "epoch": 0.5135059482155353, - "grad_norm": 0.4012452520954793, - "learning_rate": 2.5135999011191018e-05, - "loss": 0.574, + "epoch": 1.0268681780016793, + "grad_norm": 0.226905351752765, + "learning_rate": 7.841886000290475e-05, + "loss": 0.5131, "step": 3669 }, { - "epoch": 0.5136459062281316, - "grad_norm": 0.3988377623943865, - "learning_rate": 2.5124665858468954e-05, - "loss": 0.5638, + "epoch": 1.0271480548558634, + "grad_norm": 0.22998961957295685, + "learning_rate": 7.840616689850068e-05, + "loss": 0.5046, "step": 3670 }, { - "epoch": 0.5137858642407278, - "grad_norm": 0.4068508250249883, - "learning_rate": 2.5113332680126795e-05, - "loss": 0.531, + "epoch": 1.0274279317100476, + "grad_norm": 0.21251326918552876, + "learning_rate": 7.839347109038177e-05, + "loss": 0.493, "step": 3671 }, { - "epoch": 0.513925822253324, - "grad_norm": 0.40607709238953865, - "learning_rate": 2.510199947849361e-05, - "loss": 0.5555, + "epoch": 1.0277078085642317, + "grad_norm": 0.22252036125757305, + "learning_rate": 7.83807725797564e-05, + "loss": 0.4958, "step": 3672 }, { - "epoch": 0.5140657802659202, - "grad_norm": 0.40363285844425767, - "learning_rate": 2.50906662558985e-05, - "loss": 0.549, + "epoch": 1.0279876854184158, + "grad_norm": 0.21811964113001786, + "learning_rate": 7.836807136783319e-05, + "loss": 0.5041, "step": 3673 }, { - "epoch": 0.5142057382785165, - "grad_norm": 0.42540315547468743, - "learning_rate": 2.507933301467056e-05, - "loss": 0.562, + "epoch": 1.0282675622726, + "grad_norm": 0.23204598074564745, + "learning_rate": 7.835536745582107e-05, + "loss": 0.5074, "step": 3674 }, { - "epoch": 0.5143456962911127, - "grad_norm": 0.41197452211967045, - "learning_rate": 2.5067999757138884e-05, - "loss": 0.5572, + "epoch": 1.0285474391267841, + "grad_norm": 0.22050068047631413, + "learning_rate": 7.83426608449292e-05, + "loss": 0.5009, "step": 3675 }, { - "epoch": 0.5144856543037089, - "grad_norm": 0.42328084101201563, - "learning_rate": 2.505666648563256e-05, - "loss": 0.5651, + "epoch": 1.0288273159809684, + "grad_norm": 0.22693963333949582, + "learning_rate": 7.832995153636701e-05, + "loss": 0.5001, "step": 3676 }, { - "epoch": 0.5146256123163051, - "grad_norm": 0.39617586204555455, - "learning_rate": 2.5045333202480698e-05, - "loss": 0.5505, + "epoch": 1.0291071928351525, + "grad_norm": 0.22729119103749862, + "learning_rate": 7.831723953134418e-05, + "loss": 0.4972, "step": 3677 }, { - "epoch": 0.5147655703289014, - "grad_norm": 0.40450493806111026, - "learning_rate": 2.5033999910012414e-05, - "loss": 0.5387, + "epoch": 1.0293870696893368, + "grad_norm": 0.21855378715453477, + "learning_rate": 7.830452483107063e-05, + "loss": 0.5291, "step": 3678 }, { - "epoch": 0.5149055283414975, - "grad_norm": 0.41125264241739257, - "learning_rate": 2.502266661055679e-05, - "loss": 0.5338, + "epoch": 1.0296669465435209, + "grad_norm": 0.23079382860819328, + "learning_rate": 7.829180743675657e-05, + "loss": 0.5158, "step": 3679 }, { - "epoch": 0.5150454863540938, - "grad_norm": 0.41606733051241485, - "learning_rate": 2.5011333306442953e-05, - "loss": 0.5472, + "epoch": 1.029946823397705, + "grad_norm": 0.21558792962330395, + "learning_rate": 7.827908734961245e-05, + "loss": 0.518, "step": 3680 }, { - "epoch": 0.51518544436669, - "grad_norm": 0.3940959299812521, - "learning_rate": 2.5e-05, - "loss": 0.5264, + "epoch": 1.0302267002518892, + "grad_norm": 0.21739769261590977, + "learning_rate": 7.826636457084897e-05, + "loss": 0.4882, "step": 3681 }, { - "epoch": 0.5153254023792863, - "grad_norm": 0.4160510111032202, - "learning_rate": 2.498866669355706e-05, - "loss": 0.5365, + "epoch": 1.0305065771060733, + "grad_norm": 0.22069486679964603, + "learning_rate": 7.825363910167708e-05, + "loss": 0.5054, "step": 3682 }, { - "epoch": 0.5154653603918824, - "grad_norm": 0.40780249685180014, - "learning_rate": 2.497733338944321e-05, - "loss": 0.5137, + "epoch": 1.0307864539602576, + "grad_norm": 0.2370634533060172, + "learning_rate": 7.8240910943308e-05, + "loss": 0.4993, "step": 3683 }, { - "epoch": 0.5156053184044787, - "grad_norm": 0.41930837922293446, - "learning_rate": 2.4966000089987596e-05, - "loss": 0.549, + "epoch": 1.0310663308144417, + "grad_norm": 0.2360609192064416, + "learning_rate": 7.822818009695322e-05, + "loss": 0.4821, "step": 3684 }, { - "epoch": 0.5157452764170749, - "grad_norm": 0.41175787454056617, - "learning_rate": 2.4954666797519305e-05, - "loss": 0.5445, + "epoch": 1.0313462076686257, + "grad_norm": 0.21923797582612406, + "learning_rate": 7.821544656382445e-05, + "loss": 0.5123, "step": 3685 }, { - "epoch": 0.515885234429671, - "grad_norm": 0.40112997035094405, - "learning_rate": 2.4943333514367448e-05, - "loss": 0.5323, + "epoch": 1.03162608452281, + "grad_norm": 0.21959227309143034, + "learning_rate": 7.820271034513369e-05, + "loss": 0.4917, "step": 3686 }, { - "epoch": 0.5160251924422673, - "grad_norm": 0.41601050967799824, - "learning_rate": 2.4932000242861122e-05, - "loss": 0.5705, + "epoch": 1.031905961376994, + "grad_norm": 0.22578074050452607, + "learning_rate": 7.818997144209317e-05, + "loss": 0.4999, "step": 3687 }, { - "epoch": 0.5161651504548636, - "grad_norm": 0.3938594765036972, - "learning_rate": 2.4920666985329443e-05, - "loss": 0.5147, + "epoch": 1.0321858382311784, + "grad_norm": 0.22418214779760068, + "learning_rate": 7.81772298559154e-05, + "loss": 0.5035, "step": 3688 }, { - "epoch": 0.5163051084674598, - "grad_norm": 0.4187589160579126, - "learning_rate": 2.4909333744101503e-05, - "loss": 0.5461, + "epoch": 1.0324657150853624, + "grad_norm": 0.2181856244345083, + "learning_rate": 7.81644855878131e-05, + "loss": 0.518, "step": 3689 }, { - "epoch": 0.516445066480056, - "grad_norm": 0.4447365782109597, - "learning_rate": 2.48980005215064e-05, - "loss": 0.5388, + "epoch": 1.0327455919395465, + "grad_norm": 0.21754485220945913, + "learning_rate": 7.815173863899932e-05, + "loss": 0.469, "step": 3690 }, { - "epoch": 0.5165850244926522, - "grad_norm": 0.40572901279081225, - "learning_rate": 2.4886667319873214e-05, - "loss": 0.5746, + "epoch": 1.0330254687937308, + "grad_norm": 0.23659125740358572, + "learning_rate": 7.813898901068727e-05, + "loss": 0.5033, "step": 3691 }, { - "epoch": 0.5167249825052485, - "grad_norm": 0.3937401384845004, - "learning_rate": 2.4875334141531052e-05, - "loss": 0.5514, + "epoch": 1.0333053456479149, + "grad_norm": 0.22440086793698985, + "learning_rate": 7.812623670409052e-05, + "loss": 0.5136, "step": 3692 }, { - "epoch": 0.5168649405178446, - "grad_norm": 0.4251082426934148, - "learning_rate": 2.486400098880899e-05, - "loss": 0.5531, + "epoch": 1.0335852225020992, + "grad_norm": 0.22152283188669944, + "learning_rate": 7.811348172042282e-05, + "loss": 0.4964, "step": 3693 }, { - "epoch": 0.5170048985304408, - "grad_norm": 0.4233026623262488, - "learning_rate": 2.4852667864036093e-05, - "loss": 0.5698, + "epoch": 1.0338650993562832, + "grad_norm": 0.22666937105652316, + "learning_rate": 7.81007240608982e-05, + "loss": 0.4898, "step": 3694 }, { - "epoch": 0.5171448565430371, - "grad_norm": 0.4184619207216555, - "learning_rate": 2.484133476954146e-05, - "loss": 0.5445, + "epoch": 1.0341449762104673, + "grad_norm": 0.22737577897722452, + "learning_rate": 7.80879637267309e-05, + "loss": 0.4793, "step": 3695 }, { - "epoch": 0.5172848145556334, - "grad_norm": 0.38758737521657194, - "learning_rate": 2.4830001707654134e-05, - "loss": 0.5383, + "epoch": 1.0344248530646516, + "grad_norm": 0.22454308849167792, + "learning_rate": 7.807520071913553e-05, + "loss": 0.5043, "step": 3696 }, { - "epoch": 0.5174247725682295, - "grad_norm": 0.41121949288648046, - "learning_rate": 2.4818668680703187e-05, - "loss": 0.5548, + "epoch": 1.0347047299188357, + "grad_norm": 0.22583644746256398, + "learning_rate": 7.806243503932681e-05, + "loss": 0.5248, "step": 3697 }, { - "epoch": 0.5175647305808257, - "grad_norm": 0.3912926343783076, - "learning_rate": 2.4807335691017662e-05, - "loss": 0.5993, + "epoch": 1.03498460677302, + "grad_norm": 0.24001174737920145, + "learning_rate": 7.804966668851984e-05, + "loss": 0.4972, "step": 3698 }, { - "epoch": 0.517704688593422, - "grad_norm": 0.4352916580631968, - "learning_rate": 2.479600274092661e-05, - "loss": 0.5947, + "epoch": 1.035264483627204, + "grad_norm": 0.23204276896869927, + "learning_rate": 7.803689566792989e-05, + "loss": 0.5061, "step": 3699 }, { - "epoch": 0.5178446466060181, - "grad_norm": 0.4117712323458744, - "learning_rate": 2.4784669832759065e-05, - "loss": 0.566, + "epoch": 1.035544360481388, + "grad_norm": 0.22768586153412737, + "learning_rate": 7.80241219787725e-05, + "loss": 0.5019, "step": 3700 }, { - "epoch": 0.5179846046186144, - "grad_norm": 0.48543517712737916, - "learning_rate": 2.4773336968844045e-05, - "loss": 0.5217, + "epoch": 1.0358242373355724, + "grad_norm": 0.2278122899786938, + "learning_rate": 7.801134562226351e-05, + "loss": 0.5106, "step": 3701 }, { - "epoch": 0.5181245626312107, - "grad_norm": 0.4201945605915251, - "learning_rate": 2.4762004151510584e-05, - "loss": 0.5817, + "epoch": 1.0361041141897565, + "grad_norm": 0.23306330492410876, + "learning_rate": 7.799856659961896e-05, + "loss": 0.5611, "step": 3702 }, { - "epoch": 0.5182645206438069, - "grad_norm": 0.40028199720572183, - "learning_rate": 2.475067138308769e-05, - "loss": 0.5305, + "epoch": 1.0363839910439407, + "grad_norm": 0.2279186512689613, + "learning_rate": 7.798578491205517e-05, + "loss": 0.5034, "step": 3703 }, { - "epoch": 0.518404478656403, - "grad_norm": 0.45130189575312646, - "learning_rate": 2.4739338665904356e-05, - "loss": 0.5762, + "epoch": 1.0366638678981248, + "grad_norm": 0.2176282765030379, + "learning_rate": 7.79730005607887e-05, + "loss": 0.4993, "step": 3704 }, { - "epoch": 0.5185444366689993, - "grad_norm": 0.40520084504522474, - "learning_rate": 2.4728006002289555e-05, - "loss": 0.5618, + "epoch": 1.0369437447523089, + "grad_norm": 0.2389784110027637, + "learning_rate": 7.796021354703638e-05, + "loss": 0.4983, "step": 3705 }, { - "epoch": 0.5186843946815956, - "grad_norm": 0.39762527724883795, - "learning_rate": 2.4716673394572297e-05, - "loss": 0.5519, + "epoch": 1.0372236216064932, + "grad_norm": 0.227742898410014, + "learning_rate": 7.79474238720153e-05, + "loss": 0.5365, "step": 3706 }, { - "epoch": 0.5188243526941917, - "grad_norm": 0.4303843474015456, - "learning_rate": 2.4705340845081527e-05, - "loss": 0.56, + "epoch": 1.0375034984606772, + "grad_norm": 0.22046182855204585, + "learning_rate": 7.793463153694277e-05, + "loss": 0.503, "step": 3707 }, { - "epoch": 0.518964310706788, - "grad_norm": 0.4169723723648604, - "learning_rate": 2.4694008356146207e-05, - "loss": 0.5436, + "epoch": 1.0377833753148615, + "grad_norm": 0.22869558767230014, + "learning_rate": 7.792183654303638e-05, + "loss": 0.5084, "step": 3708 }, { - "epoch": 0.5191042687193842, - "grad_norm": 0.40055909799700246, - "learning_rate": 2.4682675930095263e-05, - "loss": 0.536, + "epoch": 1.0380632521690456, + "grad_norm": 0.21523346168863788, + "learning_rate": 7.790903889151393e-05, + "loss": 0.4982, "step": 3709 }, { - "epoch": 0.5192442267319805, - "grad_norm": 0.4109688655223356, - "learning_rate": 2.4671343569257647e-05, - "loss": 0.5324, + "epoch": 1.0383431290232297, + "grad_norm": 0.215141081448551, + "learning_rate": 7.789623858359356e-05, + "loss": 0.4773, "step": 3710 }, { - "epoch": 0.5193841847445766, - "grad_norm": 0.4149175387680195, - "learning_rate": 2.4660011275962258e-05, - "loss": 0.5388, + "epoch": 1.038623005877414, + "grad_norm": 0.23749807648684113, + "learning_rate": 7.788343562049359e-05, + "loss": 0.5113, "step": 3711 }, { - "epoch": 0.5195241427571728, - "grad_norm": 0.4264139741717757, - "learning_rate": 2.4648679052537994e-05, - "loss": 0.5625, + "epoch": 1.038902882731598, + "grad_norm": 0.2263293463190765, + "learning_rate": 7.78706300034326e-05, + "loss": 0.5105, "step": 3712 }, { - "epoch": 0.5196641007697691, - "grad_norm": 0.4289357178860241, - "learning_rate": 2.4637346901313757e-05, - "loss": 0.5689, + "epoch": 1.0391827595857823, + "grad_norm": 0.22836076680374096, + "learning_rate": 7.785782173362945e-05, + "loss": 0.5284, "step": 3713 }, { - "epoch": 0.5198040587823652, - "grad_norm": 0.42933380182165265, - "learning_rate": 2.4626014824618415e-05, - "loss": 0.5751, + "epoch": 1.0394626364399664, + "grad_norm": 0.2210892686929637, + "learning_rate": 7.784501081230323e-05, + "loss": 0.5027, "step": 3714 }, { - "epoch": 0.5199440167949615, - "grad_norm": 0.41237458627558815, - "learning_rate": 2.461468282478082e-05, - "loss": 0.5708, + "epoch": 1.0397425132941507, + "grad_norm": 0.22263999621691236, + "learning_rate": 7.78321972406733e-05, + "loss": 0.4852, "step": 3715 }, { - "epoch": 0.5200839748075577, - "grad_norm": 0.40783837110167326, - "learning_rate": 2.46033509041298e-05, - "loss": 0.5163, + "epoch": 1.0400223901483348, + "grad_norm": 0.22576600416532278, + "learning_rate": 7.781938101995927e-05, + "loss": 0.4973, "step": 3716 }, { - "epoch": 0.520223932820154, - "grad_norm": 0.3934394614065878, - "learning_rate": 2.4592019064994207e-05, - "loss": 0.5123, + "epoch": 1.0403022670025188, + "grad_norm": 0.2190389765571684, + "learning_rate": 7.780656215138097e-05, + "loss": 0.5181, "step": 3717 }, { - "epoch": 0.5203638908327501, - "grad_norm": 0.42393320677683094, - "learning_rate": 2.4580687309702836e-05, - "loss": 0.5459, + "epoch": 1.0405821438567031, + "grad_norm": 0.22358476178617778, + "learning_rate": 7.779374063615851e-05, + "loss": 0.4923, "step": 3718 }, { - "epoch": 0.5205038488453464, - "grad_norm": 0.4201737718371511, - "learning_rate": 2.456935564058447e-05, - "loss": 0.5484, + "epoch": 1.0408620207108872, + "grad_norm": 0.21674036560877505, + "learning_rate": 7.778091647551228e-05, + "loss": 0.4751, "step": 3719 }, { - "epoch": 0.5206438068579426, - "grad_norm": 0.4037513584821948, - "learning_rate": 2.45580240599679e-05, - "loss": 0.539, + "epoch": 1.0411418975650715, + "grad_norm": 0.22126498915078868, + "learning_rate": 7.776808967066285e-05, + "loss": 0.5283, "step": 3720 }, { - "epoch": 0.5207837648705388, - "grad_norm": 0.40384281587615306, - "learning_rate": 2.4546692570181863e-05, - "loss": 0.5594, + "epoch": 1.0414217744192555, + "grad_norm": 0.2765440597539088, + "learning_rate": 7.775526022283113e-05, + "loss": 0.4908, "step": 3721 }, { - "epoch": 0.520923722883135, - "grad_norm": 0.39646345117822984, - "learning_rate": 2.453536117355511e-05, - "loss": 0.5344, + "epoch": 1.0417016512734396, + "grad_norm": 0.23209777214489224, + "learning_rate": 7.774242813323817e-05, + "loss": 0.5457, "step": 3722 }, { - "epoch": 0.5210636808957313, - "grad_norm": 0.402351444419845, - "learning_rate": 2.4524029872416333e-05, - "loss": 0.548, + "epoch": 1.041981528127624, + "grad_norm": 0.23080917925574035, + "learning_rate": 7.772959340310541e-05, + "loss": 0.5095, "step": 3723 }, { - "epoch": 0.5212036389083275, - "grad_norm": 0.4310017935995615, - "learning_rate": 2.451269866909426e-05, - "loss": 0.5654, + "epoch": 1.042261404981808, + "grad_norm": 0.25309199876030164, + "learning_rate": 7.771675603365441e-05, + "loss": 0.5181, "step": 3724 }, { - "epoch": 0.5213435969209237, - "grad_norm": 0.39852961133599507, - "learning_rate": 2.4501367565917554e-05, - "loss": 0.5248, + "epoch": 1.0425412818359923, + "grad_norm": 0.22190312983812366, + "learning_rate": 7.770391602610706e-05, + "loss": 0.5207, "step": 3725 }, { - "epoch": 0.5214835549335199, - "grad_norm": 0.3978940832017032, - "learning_rate": 2.4490036565214873e-05, - "loss": 0.5423, + "epoch": 1.0428211586901763, + "grad_norm": 0.22668013820155763, + "learning_rate": 7.769107338168548e-05, + "loss": 0.4917, "step": 3726 }, { - "epoch": 0.5216235129461162, - "grad_norm": 0.4032221188907521, - "learning_rate": 2.447870566931484e-05, - "loss": 0.5625, + "epoch": 1.0431010355443604, + "grad_norm": 0.25886643025629247, + "learning_rate": 7.767822810161203e-05, + "loss": 0.5176, "step": 3727 }, { - "epoch": 0.5217634709587123, - "grad_norm": 0.40124015142391184, - "learning_rate": 2.4467374880546095e-05, - "loss": 0.5173, + "epoch": 1.0433809123985447, + "grad_norm": 0.2273589243644261, + "learning_rate": 7.766538018710934e-05, + "loss": 0.4813, "step": 3728 }, { - "epoch": 0.5219034289713086, - "grad_norm": 0.419761697749084, - "learning_rate": 2.4456044201237215e-05, - "loss": 0.577, + "epoch": 1.0436607892527288, + "grad_norm": 0.2288098441950441, + "learning_rate": 7.765252963940026e-05, + "loss": 0.5261, "step": 3729 }, { - "epoch": 0.5220433869839048, - "grad_norm": 0.4068938084974087, - "learning_rate": 2.4444713633716765e-05, - "loss": 0.5089, + "epoch": 1.043940666106913, + "grad_norm": 0.23432135225594952, + "learning_rate": 7.763967645970796e-05, + "loss": 0.5026, "step": 3730 }, { - "epoch": 0.5221833449965011, - "grad_norm": 0.4372637791239717, - "learning_rate": 2.4433383180313303e-05, - "loss": 0.6125, + "epoch": 1.0442205429610971, + "grad_norm": 0.2257468875164591, + "learning_rate": 7.762682064925578e-05, + "loss": 0.5091, "step": 3731 }, { - "epoch": 0.5223233030090972, - "grad_norm": 0.4211367854727307, - "learning_rate": 2.4422052843355347e-05, - "loss": 0.5837, + "epoch": 1.0445004198152812, + "grad_norm": 0.2258328663868932, + "learning_rate": 7.761396220926733e-05, + "loss": 0.5069, "step": 3732 }, { - "epoch": 0.5224632610216935, - "grad_norm": 0.4138228399759103, - "learning_rate": 2.4410722625171396e-05, - "loss": 0.5904, + "epoch": 1.0447802966694655, + "grad_norm": 0.23853146649917603, + "learning_rate": 7.76011011409665e-05, + "loss": 0.5042, "step": 3733 }, { - "epoch": 0.5226032190342897, - "grad_norm": 0.43035321865290377, - "learning_rate": 2.4399392528089915e-05, - "loss": 0.5543, + "epoch": 1.0450601735236495, + "grad_norm": 0.22491834872133007, + "learning_rate": 7.758823744557744e-05, + "loss": 0.5223, "step": 3734 }, { - "epoch": 0.5227431770468859, - "grad_norm": 0.393376380647825, - "learning_rate": 2.4388062554439372e-05, - "loss": 0.5625, + "epoch": 1.0453400503778338, + "grad_norm": 0.22982063950382134, + "learning_rate": 7.757537112432448e-05, + "loss": 0.5022, "step": 3735 }, { - "epoch": 0.5228831350594821, - "grad_norm": 0.4057572933128106, - "learning_rate": 2.4376732706548183e-05, - "loss": 0.5386, + "epoch": 1.045619927232018, + "grad_norm": 0.22254638081289047, + "learning_rate": 7.756250217843226e-05, + "loss": 0.498, "step": 3736 }, { - "epoch": 0.5230230930720784, - "grad_norm": 0.4127940511172469, - "learning_rate": 2.4365402986744738e-05, - "loss": 0.5424, + "epoch": 1.045899804086202, + "grad_norm": 0.2267195513303436, + "learning_rate": 7.754963060912565e-05, + "loss": 0.5126, "step": 3737 }, { - "epoch": 0.5231630510846746, - "grad_norm": 0.43512898556014096, - "learning_rate": 2.4354073397357427e-05, - "loss": 0.5886, + "epoch": 1.0461796809403863, + "grad_norm": 0.2286198041088637, + "learning_rate": 7.753675641762981e-05, + "loss": 0.4757, "step": 3738 }, { - "epoch": 0.5233030090972708, - "grad_norm": 0.429632898236204, - "learning_rate": 2.434274394071459e-05, - "loss": 0.5897, + "epoch": 1.0464595577945703, + "grad_norm": 0.22037508345309462, + "learning_rate": 7.752387960517004e-05, + "loss": 0.4988, "step": 3739 }, { - "epoch": 0.523442967109867, - "grad_norm": 0.5024683991705309, - "learning_rate": 2.4331414619144537e-05, - "loss": 0.5389, + "epoch": 1.0467394346487546, + "grad_norm": 0.21687297637714298, + "learning_rate": 7.751100017297204e-05, + "loss": 0.4794, "step": 3740 }, { - "epoch": 0.5235829251224633, - "grad_norm": 0.426305756774051, - "learning_rate": 2.432008543497556e-05, - "loss": 0.5319, + "epoch": 1.0470193115029387, + "grad_norm": 0.22023636752093112, + "learning_rate": 7.749811812226161e-05, + "loss": 0.5087, "step": 3741 }, { - "epoch": 0.5237228831350594, - "grad_norm": 0.4067282442774484, - "learning_rate": 2.430875639053593e-05, - "loss": 0.5364, + "epoch": 1.0472991883571228, + "grad_norm": 0.22566517206426936, + "learning_rate": 7.748523345426493e-05, + "loss": 0.5206, "step": 3742 }, { - "epoch": 0.5238628411476557, - "grad_norm": 0.40523378137952, - "learning_rate": 2.4297427488153872e-05, - "loss": 0.5512, + "epoch": 1.047579065211307, + "grad_norm": 0.2291302872555876, + "learning_rate": 7.747234617020835e-05, + "loss": 0.505, "step": 3743 }, { - "epoch": 0.5240027991602519, - "grad_norm": 0.42309697388292367, - "learning_rate": 2.42860987301576e-05, - "loss": 0.5832, + "epoch": 1.0478589420654911, + "grad_norm": 0.21768976187721323, + "learning_rate": 7.745945627131848e-05, + "loss": 0.493, "step": 3744 }, { - "epoch": 0.5241427571728482, - "grad_norm": 0.39749349792214256, - "learning_rate": 2.427477011887527e-05, - "loss": 0.5453, + "epoch": 1.0481388189196754, + "grad_norm": 0.21776900587766715, + "learning_rate": 7.744656375882218e-05, + "loss": 0.486, "step": 3745 }, { - "epoch": 0.5242827151854443, - "grad_norm": 0.4128499134441993, - "learning_rate": 2.4263441656635053e-05, - "loss": 0.5614, + "epoch": 1.0484186957738595, + "grad_norm": 0.2167297075444961, + "learning_rate": 7.743366863394658e-05, + "loss": 0.5071, "step": 3746 }, { - "epoch": 0.5244226731980406, - "grad_norm": 0.4307471936718753, - "learning_rate": 2.4252113345765046e-05, - "loss": 0.5426, + "epoch": 1.0486985726280436, + "grad_norm": 0.22121549586597575, + "learning_rate": 7.742077089791904e-05, + "loss": 0.5113, "step": 3747 }, { - "epoch": 0.5245626312106368, - "grad_norm": 0.40120347811779566, - "learning_rate": 2.4240785188593325e-05, - "loss": 0.5562, + "epoch": 1.0489784494822278, + "grad_norm": 0.21653569757474664, + "learning_rate": 7.740787055196718e-05, + "loss": 0.4833, "step": 3748 }, { - "epoch": 0.524702589223233, - "grad_norm": 0.40709801793493633, - "learning_rate": 2.4229457187447956e-05, - "loss": 0.5326, + "epoch": 1.049258326336412, + "grad_norm": 0.22610627454716303, + "learning_rate": 7.739496759731888e-05, + "loss": 0.5147, "step": 3749 }, { - "epoch": 0.5248425472358292, - "grad_norm": 0.38783041950630354, - "learning_rate": 2.4218129344656958e-05, - "loss": 0.5801, + "epoch": 1.0495382031905962, + "grad_norm": 0.22583420842560206, + "learning_rate": 7.738206203520222e-05, + "loss": 0.5204, "step": 3750 }, { - "epoch": 0.5249825052484255, - "grad_norm": 0.41702390350925966, - "learning_rate": 2.4206801662548314e-05, - "loss": 0.543, + "epoch": 1.0498180800447803, + "grad_norm": 0.23854311733296116, + "learning_rate": 7.736915386684556e-05, + "loss": 0.5225, "step": 3751 }, { - "epoch": 0.5251224632610217, - "grad_norm": 0.40842859770281464, - "learning_rate": 2.419547414344997e-05, - "loss": 0.5719, + "epoch": 1.0500979568989646, + "grad_norm": 0.2067873549877098, + "learning_rate": 7.735624309347753e-05, + "loss": 0.5012, "step": 3752 }, { - "epoch": 0.5252624212736179, - "grad_norm": 0.40966706938584196, - "learning_rate": 2.418414678968985e-05, - "loss": 0.5679, + "epoch": 1.0503778337531486, + "grad_norm": 0.21602368238643985, + "learning_rate": 7.734332971632696e-05, + "loss": 0.4904, "step": 3753 }, { - "epoch": 0.5254023792862141, - "grad_norm": 0.4217418453013296, - "learning_rate": 2.4172819603595853e-05, - "loss": 0.5721, + "epoch": 1.0506577106073327, + "grad_norm": 0.2278415591193141, + "learning_rate": 7.733041373662297e-05, + "loss": 0.5086, "step": 3754 }, { - "epoch": 0.5255423372988104, - "grad_norm": 0.41200690319133665, - "learning_rate": 2.4161492587495812e-05, - "loss": 0.5681, + "epoch": 1.050937587461517, + "grad_norm": 0.2174762455042572, + "learning_rate": 7.73174951555949e-05, + "loss": 0.5061, "step": 3755 }, { - "epoch": 0.5256822953114065, - "grad_norm": 0.4307870892864373, - "learning_rate": 2.4150165743717556e-05, - "loss": 0.5991, + "epoch": 1.051217464315701, + "grad_norm": 0.2187822877203153, + "learning_rate": 7.730457397447235e-05, + "loss": 0.496, "step": 3756 }, { - "epoch": 0.5258222533240028, - "grad_norm": 0.3964187463617023, - "learning_rate": 2.4138839074588868e-05, - "loss": 0.5381, + "epoch": 1.0514973411698854, + "grad_norm": 0.213077764698822, + "learning_rate": 7.729165019448517e-05, + "loss": 0.4848, "step": 3757 }, { - "epoch": 0.525962211336599, - "grad_norm": 0.41691171273978006, - "learning_rate": 2.4127512582437485e-05, - "loss": 0.5336, + "epoch": 1.0517772180240694, + "grad_norm": 0.23034650377157573, + "learning_rate": 7.727872381686343e-05, + "loss": 0.4874, "step": 3758 }, { - "epoch": 0.5261021693491953, - "grad_norm": 0.4243161504581818, - "learning_rate": 2.4116186269591105e-05, - "loss": 0.5466, + "epoch": 1.0520570948782535, + "grad_norm": 0.2162098522324769, + "learning_rate": 7.72657948428375e-05, + "loss": 0.5007, "step": 3759 }, { - "epoch": 0.5262421273617914, - "grad_norm": 0.41746283237341214, - "learning_rate": 2.410486013837743e-05, - "loss": 0.5292, + "epoch": 1.0523369717324378, + "grad_norm": 0.22197862336329033, + "learning_rate": 7.725286327363797e-05, + "loss": 0.4821, "step": 3760 }, { - "epoch": 0.5263820853743877, - "grad_norm": 0.4258425078942338, - "learning_rate": 2.4093534191124072e-05, - "loss": 0.597, + "epoch": 1.0526168485866219, + "grad_norm": 0.23258034245390838, + "learning_rate": 7.723992911049565e-05, + "loss": 0.513, "step": 3761 }, { - "epoch": 0.5265220433869839, - "grad_norm": 0.4082388057932214, - "learning_rate": 2.408220843015864e-05, - "loss": 0.5604, + "epoch": 1.0528967254408061, + "grad_norm": 0.21320431812644003, + "learning_rate": 7.722699235464163e-05, + "loss": 0.4911, "step": 3762 }, { - "epoch": 0.5266620013995801, - "grad_norm": 0.40439792503437516, - "learning_rate": 2.4070882857808678e-05, - "loss": 0.5599, + "epoch": 1.0531766022949902, + "grad_norm": 0.2231566023024273, + "learning_rate": 7.721405300730723e-05, + "loss": 0.4801, "step": 3763 }, { - "epoch": 0.5268019594121763, - "grad_norm": 0.403671819972436, - "learning_rate": 2.4059557476401726e-05, - "loss": 0.5701, + "epoch": 1.0534564791491743, + "grad_norm": 0.21994583286566283, + "learning_rate": 7.720111106972402e-05, + "loss": 0.486, "step": 3764 }, { - "epoch": 0.5269419174247726, - "grad_norm": 0.4115634599388499, - "learning_rate": 2.4048232288265253e-05, - "loss": 0.5613, + "epoch": 1.0537363560033586, + "grad_norm": 0.2355246280315124, + "learning_rate": 7.718816654312386e-05, + "loss": 0.517, "step": 3765 }, { - "epoch": 0.5270818754373688, - "grad_norm": 0.4196728696799674, - "learning_rate": 2.40369072957267e-05, - "loss": 0.5456, + "epoch": 1.0540162328575426, + "grad_norm": 0.23056238675580698, + "learning_rate": 7.71752194287388e-05, + "loss": 0.5317, "step": 3766 }, { - "epoch": 0.527221833449965, - "grad_norm": 0.45632616155283706, - "learning_rate": 2.4025582501113474e-05, - "loss": 0.5986, + "epoch": 1.054296109711727, + "grad_norm": 0.22877985408701595, + "learning_rate": 7.716226972780112e-05, + "loss": 0.4998, "step": 3767 }, { - "epoch": 0.5273617914625612, - "grad_norm": 0.41907920921841135, - "learning_rate": 2.4014257906752928e-05, - "loss": 0.5691, + "epoch": 1.054575986565911, + "grad_norm": 0.21833274013072324, + "learning_rate": 7.714931744154342e-05, + "loss": 0.5015, "step": 3768 }, { - "epoch": 0.5275017494751575, - "grad_norm": 0.4593046177587895, - "learning_rate": 2.4002933514972383e-05, - "loss": 0.5491, + "epoch": 1.054855863420095, + "grad_norm": 0.2121862112484023, + "learning_rate": 7.713636257119848e-05, + "loss": 0.4948, "step": 3769 }, { - "epoch": 0.5276417074877536, - "grad_norm": 0.4141383704701082, - "learning_rate": 2.399160932809911e-05, - "loss": 0.5405, + "epoch": 1.0551357402742794, + "grad_norm": 0.23148855835565083, + "learning_rate": 7.712340511799934e-05, + "loss": 0.5171, "step": 3770 }, { - "epoch": 0.5277816655003499, - "grad_norm": 0.40874131996221996, - "learning_rate": 2.3980285348460363e-05, - "loss": 0.5549, + "epoch": 1.0554156171284634, + "grad_norm": 0.21933393639811025, + "learning_rate": 7.711044508317935e-05, + "loss": 0.4776, "step": 3771 }, { - "epoch": 0.5279216235129461, - "grad_norm": 0.42334424503896645, - "learning_rate": 2.3968961578383323e-05, - "loss": 0.5596, + "epoch": 1.0556954939826477, + "grad_norm": 0.22031419590569426, + "learning_rate": 7.709748246797201e-05, + "loss": 0.5263, "step": 3772 }, { - "epoch": 0.5280615815255424, - "grad_norm": 0.40408381518564374, - "learning_rate": 2.395763802019513e-05, - "loss": 0.5541, + "epoch": 1.0559753708368318, + "grad_norm": 0.22683983817719575, + "learning_rate": 7.708451727361113e-05, + "loss": 0.4929, "step": 3773 }, { - "epoch": 0.5282015395381385, - "grad_norm": 0.3892924948143325, - "learning_rate": 2.3946314676222905e-05, - "loss": 0.5561, + "epoch": 1.0562552476910159, + "grad_norm": 0.23286618883029137, + "learning_rate": 7.707154950133073e-05, + "loss": 0.4773, "step": 3774 }, { - "epoch": 0.5283414975507348, - "grad_norm": 0.38424570502320887, - "learning_rate": 2.393499154879371e-05, - "loss": 0.5513, + "epoch": 1.0565351245452002, + "grad_norm": 0.22956515759896887, + "learning_rate": 7.70585791523651e-05, + "loss": 0.5414, "step": 3775 }, { - "epoch": 0.528481455563331, - "grad_norm": 0.40542835424755946, - "learning_rate": 2.3923668640234558e-05, - "loss": 0.5511, + "epoch": 1.0568150013993842, + "grad_norm": 0.22520125737178678, + "learning_rate": 7.704560622794875e-05, + "loss": 0.5009, "step": 3776 }, { - "epoch": 0.5286214135759272, - "grad_norm": 0.43305443591622345, - "learning_rate": 2.3912345952872416e-05, - "loss": 0.5534, + "epoch": 1.0570948782535685, + "grad_norm": 0.21589269098650588, + "learning_rate": 7.703263072931648e-05, + "loss": 0.4783, "step": 3777 }, { - "epoch": 0.5287613715885234, - "grad_norm": 0.39079289404660855, - "learning_rate": 2.3901023489034218e-05, - "loss": 0.5693, + "epoch": 1.0573747551077526, + "grad_norm": 0.2365087413936952, + "learning_rate": 7.701965265770326e-05, + "loss": 0.5391, "step": 3778 }, { - "epoch": 0.5289013296011197, - "grad_norm": 0.4993326382660523, - "learning_rate": 2.388970125104685e-05, - "loss": 0.5665, + "epoch": 1.0576546319619367, + "grad_norm": 0.21983360387197257, + "learning_rate": 7.70066720143444e-05, + "loss": 0.4941, "step": 3779 }, { - "epoch": 0.5290412876137159, - "grad_norm": 0.4013031423647386, - "learning_rate": 2.3878379241237136e-05, - "loss": 0.5059, + "epoch": 1.057934508816121, + "grad_norm": 0.22353050709079425, + "learning_rate": 7.699368880047538e-05, + "loss": 0.5269, "step": 3780 }, { - "epoch": 0.5291812456263121, - "grad_norm": 0.4122753651945142, - "learning_rate": 2.3867057461931857e-05, - "loss": 0.5643, + "epoch": 1.058214385670305, + "grad_norm": 0.22967064761977415, + "learning_rate": 7.698070301733193e-05, + "loss": 0.507, "step": 3781 }, { - "epoch": 0.5293212036389083, - "grad_norm": 0.3958098061908628, - "learning_rate": 2.3855735915457778e-05, - "loss": 0.5783, + "epoch": 1.0584942625244893, + "grad_norm": 0.22850020289574346, + "learning_rate": 7.69677146661501e-05, + "loss": 0.5163, "step": 3782 }, { - "epoch": 0.5294611616515046, - "grad_norm": 0.40430666249800307, - "learning_rate": 2.384441460414158e-05, - "loss": 0.532, + "epoch": 1.0587741393786734, + "grad_norm": 0.2374433412315277, + "learning_rate": 7.695472374816606e-05, + "loss": 0.5289, "step": 3783 }, { - "epoch": 0.5296011196641007, - "grad_norm": 0.43159094392859726, - "learning_rate": 2.383309353030989e-05, - "loss": 0.5648, + "epoch": 1.0590540162328574, + "grad_norm": 0.250525413138875, + "learning_rate": 7.694173026461634e-05, + "loss": 0.4979, "step": 3784 }, { - "epoch": 0.529741077676697, - "grad_norm": 0.4278430956102745, - "learning_rate": 2.3821772696289336e-05, - "loss": 0.5525, + "epoch": 1.0593338930870417, + "grad_norm": 0.22186768349601863, + "learning_rate": 7.692873421673765e-05, + "loss": 0.5138, "step": 3785 }, { - "epoch": 0.5298810356892932, - "grad_norm": 0.42297623639530246, - "learning_rate": 2.3810452104406444e-05, - "loss": 0.5557, + "epoch": 1.0596137699412258, + "grad_norm": 0.2342367806949055, + "learning_rate": 7.691573560576696e-05, + "loss": 0.4844, "step": 3786 }, { - "epoch": 0.5300209937018895, - "grad_norm": 0.4194124443087706, - "learning_rate": 2.3799131756987716e-05, - "loss": 0.5805, + "epoch": 1.05989364679541, + "grad_norm": 0.2220425010818297, + "learning_rate": 7.690273443294151e-05, + "loss": 0.4974, "step": 3787 }, { - "epoch": 0.5301609517144856, - "grad_norm": 0.42223996989148205, - "learning_rate": 2.378781165635959e-05, - "loss": 0.553, + "epoch": 1.0601735236495942, + "grad_norm": 0.21519697584239855, + "learning_rate": 7.688973069949871e-05, + "loss": 0.4885, "step": 3788 }, { - "epoch": 0.5303009097270819, - "grad_norm": 0.4051480666509419, - "learning_rate": 2.3776491804848474e-05, - "loss": 0.5439, + "epoch": 1.0604534005037782, + "grad_norm": 0.2187205362803618, + "learning_rate": 7.687672440667633e-05, + "loss": 0.4987, "step": 3789 }, { - "epoch": 0.5304408677396781, - "grad_norm": 0.6913758982209272, - "learning_rate": 2.3765172204780705e-05, - "loss": 0.5037, + "epoch": 1.0607332773579625, + "grad_norm": 0.21744890897097585, + "learning_rate": 7.686371555571224e-05, + "loss": 0.488, "step": 3790 }, { - "epoch": 0.5305808257522743, - "grad_norm": 0.39883852712442486, - "learning_rate": 2.375385285848257e-05, - "loss": 0.5373, + "epoch": 1.0610131542121466, + "grad_norm": 0.21004707710627715, + "learning_rate": 7.685070414784468e-05, + "loss": 0.4889, "step": 3791 }, { - "epoch": 0.5307207837648705, - "grad_norm": 0.41473538961504164, - "learning_rate": 2.3742533768280322e-05, - "loss": 0.558, + "epoch": 1.0612930310663309, + "grad_norm": 0.21830103524657823, + "learning_rate": 7.683769018431208e-05, + "loss": 0.5386, "step": 3792 }, { - "epoch": 0.5308607417774668, - "grad_norm": 0.4338240458587132, - "learning_rate": 2.3731214936500147e-05, - "loss": 0.5703, + "epoch": 1.061572907920515, + "grad_norm": 0.22674849870313224, + "learning_rate": 7.68246736663531e-05, + "loss": 0.5069, "step": 3793 }, { - "epoch": 0.531000699790063, - "grad_norm": 0.44632810155747676, - "learning_rate": 2.3719896365468176e-05, - "loss": 0.5428, + "epoch": 1.061852784774699, + "grad_norm": 0.2143074131367963, + "learning_rate": 7.681165459520666e-05, + "loss": 0.4915, "step": 3794 }, { - "epoch": 0.5311406578026592, - "grad_norm": 0.4693366131147214, - "learning_rate": 2.370857805751048e-05, - "loss": 0.5601, + "epoch": 1.0621326616288833, + "grad_norm": 0.22081785671530887, + "learning_rate": 7.679863297211195e-05, + "loss": 0.5092, "step": 3795 }, { - "epoch": 0.5312806158152554, - "grad_norm": 0.4328684886571571, - "learning_rate": 2.3697260014953108e-05, - "loss": 0.5984, + "epoch": 1.0624125384830674, + "grad_norm": 0.20953164470217545, + "learning_rate": 7.678560879830832e-05, + "loss": 0.5172, "step": 3796 }, { - "epoch": 0.5314205738278517, - "grad_norm": 0.4150511950700261, - "learning_rate": 2.3685942240122017e-05, - "loss": 0.5578, + "epoch": 1.0626924153372517, + "grad_norm": 0.23091093486011183, + "learning_rate": 7.677258207503547e-05, + "loss": 0.505, "step": 3797 }, { - "epoch": 0.5315605318404478, - "grad_norm": 0.4140437625288529, - "learning_rate": 2.3674624735343133e-05, - "loss": 0.57, + "epoch": 1.0629722921914357, + "grad_norm": 0.23316138255867908, + "learning_rate": 7.675955280353328e-05, + "loss": 0.5305, "step": 3798 }, { - "epoch": 0.5317004898530441, - "grad_norm": 0.4075001120603276, - "learning_rate": 2.3663307502942306e-05, - "loss": 0.5392, + "epoch": 1.06325216904562, + "grad_norm": 0.22396546236292414, + "learning_rate": 7.674652098504186e-05, + "loss": 0.495, "step": 3799 }, { - "epoch": 0.5318404478656403, - "grad_norm": 0.4132932952092581, - "learning_rate": 2.3651990545245356e-05, - "loss": 0.5826, + "epoch": 1.063532045899804, + "grad_norm": 0.2245788406883771, + "learning_rate": 7.673348662080161e-05, + "loss": 0.5167, "step": 3800 }, { - "epoch": 0.5319804058782366, - "grad_norm": 0.4260946689124058, - "learning_rate": 2.3640673864578023e-05, - "loss": 0.5291, + "epoch": 1.0638119227539882, + "grad_norm": 0.2205397897839526, + "learning_rate": 7.672044971205314e-05, + "loss": 0.507, "step": 3801 }, { - "epoch": 0.5321203638908327, - "grad_norm": 0.42352605916443814, - "learning_rate": 2.3629357463265995e-05, - "loss": 0.5619, + "epoch": 1.0640917996081725, + "grad_norm": 0.2227383971878762, + "learning_rate": 7.670741026003731e-05, + "loss": 0.5235, "step": 3802 }, { - "epoch": 0.532260321903429, - "grad_norm": 0.4071157003238009, - "learning_rate": 2.361804134363493e-05, - "loss": 0.5693, + "epoch": 1.0643716764623565, + "grad_norm": 0.22539336501337817, + "learning_rate": 7.669436826599522e-05, + "loss": 0.5347, "step": 3803 }, { - "epoch": 0.5324002799160252, - "grad_norm": 0.40527112461883336, - "learning_rate": 2.360672550801039e-05, - "loss": 0.5573, + "epoch": 1.0646515533165408, + "grad_norm": 0.22719825863243975, + "learning_rate": 7.668132373116822e-05, + "loss": 0.5049, "step": 3804 }, { - "epoch": 0.5325402379286214, - "grad_norm": 0.4185227436712144, - "learning_rate": 2.359540995871789e-05, - "loss": 0.5519, + "epoch": 1.064931430170725, + "grad_norm": 0.2214197606529923, + "learning_rate": 7.66682766567979e-05, + "loss": 0.4873, "step": 3805 }, { - "epoch": 0.5326801959412176, - "grad_norm": 0.40474881222817827, - "learning_rate": 2.3584094698082888e-05, - "loss": 0.5416, + "epoch": 1.065211307024909, + "grad_norm": 0.21954336514362585, + "learning_rate": 7.665522704412607e-05, + "loss": 0.4912, "step": 3806 }, { - "epoch": 0.5328201539538139, - "grad_norm": 0.4035189283812099, - "learning_rate": 2.35727797284308e-05, - "loss": 0.5386, + "epoch": 1.0654911838790933, + "grad_norm": 0.22102130479639134, + "learning_rate": 7.66421748943948e-05, + "loss": 0.5294, "step": 3807 }, { - "epoch": 0.53296011196641, - "grad_norm": 0.4126483303377044, - "learning_rate": 2.3561465052086962e-05, - "loss": 0.5513, + "epoch": 1.0657710607332773, + "grad_norm": 0.21446008680300668, + "learning_rate": 7.662912020884643e-05, + "loss": 0.5019, "step": 3808 }, { - "epoch": 0.5331000699790063, - "grad_norm": 0.41908332941234616, - "learning_rate": 2.3550150671376644e-05, - "loss": 0.598, + "epoch": 1.0660509375874616, + "grad_norm": 0.2206328164969449, + "learning_rate": 7.661606298872349e-05, + "loss": 0.5047, "step": 3809 }, { - "epoch": 0.5332400279916025, - "grad_norm": 0.42507750506666686, - "learning_rate": 2.353883658862508e-05, - "loss": 0.5289, + "epoch": 1.0663308144416457, + "grad_norm": 0.21756584651981553, + "learning_rate": 7.660300323526878e-05, + "loss": 0.4964, "step": 3810 }, { - "epoch": 0.5333799860041988, - "grad_norm": 0.3908992343447489, - "learning_rate": 2.3527522806157422e-05, - "loss": 0.5955, + "epoch": 1.0666106912958297, + "grad_norm": 0.22430413786534004, + "learning_rate": 7.658994094972533e-05, + "loss": 0.5041, "step": 3811 }, { - "epoch": 0.5335199440167949, - "grad_norm": 0.4088150381618844, - "learning_rate": 2.351620932629877e-05, - "loss": 0.5434, + "epoch": 1.066890568150014, + "grad_norm": 0.21934530109163514, + "learning_rate": 7.657687613333642e-05, + "loss": 0.526, "step": 3812 }, { - "epoch": 0.5336599020293912, - "grad_norm": 0.4313685833974606, - "learning_rate": 2.3504896151374144e-05, - "loss": 0.5751, + "epoch": 1.067170445004198, + "grad_norm": 0.22283384020942965, + "learning_rate": 7.656380878734555e-05, + "loss": 0.5078, "step": 3813 }, { - "epoch": 0.5337998600419874, - "grad_norm": 0.4274490027569291, - "learning_rate": 2.349358328370854e-05, - "loss": 0.5459, + "epoch": 1.0674503218583824, + "grad_norm": 0.21966713305042523, + "learning_rate": 7.65507389129965e-05, + "loss": 0.4956, "step": 3814 }, { - "epoch": 0.5339398180545836, - "grad_norm": 0.4224342493673025, - "learning_rate": 2.3482270725626856e-05, - "loss": 0.5372, + "epoch": 1.0677301987125665, + "grad_norm": 0.2144153612144557, + "learning_rate": 7.653766651153326e-05, + "loss": 0.5026, "step": 3815 }, { - "epoch": 0.5340797760671798, - "grad_norm": 0.508038847256669, - "learning_rate": 2.3470958479453938e-05, - "loss": 0.5545, + "epoch": 1.0680100755667505, + "grad_norm": 0.22565636445137668, + "learning_rate": 7.652459158420007e-05, + "loss": 0.4879, "step": 3816 }, { - "epoch": 0.5342197340797761, - "grad_norm": 0.40285477548710635, - "learning_rate": 2.345964654751456e-05, - "loss": 0.5348, + "epoch": 1.0682899524209348, + "grad_norm": 0.22303918321379382, + "learning_rate": 7.65115141322414e-05, + "loss": 0.4946, "step": 3817 }, { - "epoch": 0.5343596920923723, - "grad_norm": 0.4204468078785021, - "learning_rate": 2.3448334932133446e-05, - "loss": 0.5873, + "epoch": 1.068569829275119, + "grad_norm": 0.23063572648720812, + "learning_rate": 7.649843415690198e-05, + "loss": 0.5176, "step": 3818 }, { - "epoch": 0.5344996501049685, - "grad_norm": 0.3926665806611564, - "learning_rate": 2.3437023635635254e-05, - "loss": 0.5856, + "epoch": 1.0688497061293032, + "grad_norm": 0.21981819761370128, + "learning_rate": 7.648535165942677e-05, + "loss": 0.4891, "step": 3819 }, { - "epoch": 0.5346396081175647, - "grad_norm": 0.4281667973965022, - "learning_rate": 2.342571266034456e-05, - "loss": 0.5829, + "epoch": 1.0691295829834873, + "grad_norm": 0.24644240232076658, + "learning_rate": 7.647226664106095e-05, + "loss": 0.5078, "step": 3820 }, { - "epoch": 0.534779566130161, - "grad_norm": 0.4235798408466391, - "learning_rate": 2.3414402008585888e-05, - "loss": 0.5345, + "epoch": 1.0694094598376713, + "grad_norm": 0.23528066256126917, + "learning_rate": 7.645917910304998e-05, + "loss": 0.5336, "step": 3821 }, { - "epoch": 0.5349195241427571, - "grad_norm": 0.41560783094264714, - "learning_rate": 2.34030916826837e-05, - "loss": 0.5745, + "epoch": 1.0696893366918556, + "grad_norm": 0.2397239247242819, + "learning_rate": 7.644608904663955e-05, + "loss": 0.5175, "step": 3822 }, { - "epoch": 0.5350594821553534, - "grad_norm": 0.4120864111860111, - "learning_rate": 2.3391781684962368e-05, - "loss": 0.5304, + "epoch": 1.0699692135460397, + "grad_norm": 0.232170783854431, + "learning_rate": 7.643299647307554e-05, + "loss": 0.5035, "step": 3823 }, { - "epoch": 0.5351994401679496, - "grad_norm": 0.3998366908738693, - "learning_rate": 2.3380472017746202e-05, - "loss": 0.5579, + "epoch": 1.070249090400224, + "grad_norm": 0.21433696654298434, + "learning_rate": 7.641990138360412e-05, + "loss": 0.5099, "step": 3824 }, { - "epoch": 0.5353393981805459, - "grad_norm": 0.4078921865892698, - "learning_rate": 2.3369162683359486e-05, - "loss": 0.5532, + "epoch": 1.070528967254408, + "grad_norm": 0.21173823050745963, + "learning_rate": 7.640680377947173e-05, + "loss": 0.496, "step": 3825 }, { - "epoch": 0.535479356193142, - "grad_norm": 0.42102589317173017, - "learning_rate": 2.3357853684126384e-05, - "loss": 0.5367, + "epoch": 1.0708088441085921, + "grad_norm": 0.2290236926495319, + "learning_rate": 7.639370366192496e-05, + "loss": 0.4894, "step": 3826 }, { - "epoch": 0.5356193142057383, - "grad_norm": 0.4303168029475006, - "learning_rate": 2.3346545022371015e-05, - "loss": 0.5275, + "epoch": 1.0710887209627764, + "grad_norm": 0.22753587593982447, + "learning_rate": 7.638060103221072e-05, + "loss": 0.4933, "step": 3827 }, { - "epoch": 0.5357592722183345, - "grad_norm": 0.4084962687065381, - "learning_rate": 2.3335236700417404e-05, - "loss": 0.5643, + "epoch": 1.0713685978169605, + "grad_norm": 0.2258659794897729, + "learning_rate": 7.636749589157608e-05, + "loss": 0.5112, "step": 3828 }, { - "epoch": 0.5358992302309307, - "grad_norm": 0.42704269523521693, - "learning_rate": 2.3323928720589555e-05, - "loss": 0.5145, + "epoch": 1.0716484746711448, + "grad_norm": 0.23164992793209913, + "learning_rate": 7.635438824126843e-05, + "loss": 0.5043, "step": 3829 }, { - "epoch": 0.5360391882435269, - "grad_norm": 0.4251581534304365, - "learning_rate": 2.331262108521136e-05, - "loss": 0.5768, + "epoch": 1.0719283515253288, + "grad_norm": 0.21650578338652135, + "learning_rate": 7.634127808253537e-05, + "loss": 0.5256, "step": 3830 }, { - "epoch": 0.5361791462561232, - "grad_norm": 0.4296437560622346, - "learning_rate": 2.3301313796606652e-05, - "loss": 0.5804, + "epoch": 1.072208228379513, + "grad_norm": 0.22089266761764356, + "learning_rate": 7.632816541662471e-05, + "loss": 0.5065, "step": 3831 }, { - "epoch": 0.5363191042687194, - "grad_norm": 0.4160558480550044, - "learning_rate": 2.3290006857099194e-05, - "loss": 0.5618, + "epoch": 1.0724881052336972, + "grad_norm": 0.21431430064386633, + "learning_rate": 7.631505024478452e-05, + "loss": 0.4938, "step": 3832 }, { - "epoch": 0.5364590622813156, - "grad_norm": 0.43852307849271205, - "learning_rate": 2.327870026901268e-05, - "loss": 0.582, + "epoch": 1.0727679820878813, + "grad_norm": 0.20856808669043467, + "learning_rate": 7.630193256826313e-05, + "loss": 0.4727, "step": 3833 }, { - "epoch": 0.5365990202939118, - "grad_norm": 0.42358328346140833, - "learning_rate": 2.326739403467073e-05, - "loss": 0.5265, + "epoch": 1.0730478589420656, + "grad_norm": 0.22296633001784025, + "learning_rate": 7.628881238830907e-05, + "loss": 0.524, "step": 3834 }, { - "epoch": 0.5367389783065081, - "grad_norm": 0.4344046385915696, - "learning_rate": 2.3256088156396868e-05, - "loss": 0.5855, + "epoch": 1.0733277357962496, + "grad_norm": 0.2240612228926075, + "learning_rate": 7.627568970617113e-05, + "loss": 0.4838, "step": 3835 }, { - "epoch": 0.5368789363191042, - "grad_norm": 0.4318461945295688, - "learning_rate": 2.3244782636514596e-05, - "loss": 0.5665, + "epoch": 1.073607612650434, + "grad_norm": 0.21351479114079525, + "learning_rate": 7.626256452309836e-05, + "loss": 0.4966, "step": 3836 }, { - "epoch": 0.5370188943317005, - "grad_norm": 0.4129881442304272, - "learning_rate": 2.3233477477347305e-05, - "loss": 0.5313, + "epoch": 1.073887489504618, + "grad_norm": 0.22141062969527397, + "learning_rate": 7.624943684033998e-05, + "loss": 0.4952, "step": 3837 }, { - "epoch": 0.5371588523442967, - "grad_norm": 0.4277048919308099, - "learning_rate": 2.3222172681218302e-05, - "loss": 0.5632, + "epoch": 1.074167366358802, + "grad_norm": 0.22088480272505118, + "learning_rate": 7.623630665914551e-05, + "loss": 0.4856, "step": 3838 }, { - "epoch": 0.537298810356893, - "grad_norm": 0.4200455327121287, - "learning_rate": 2.3210868250450865e-05, - "loss": 0.5406, + "epoch": 1.0744472432129863, + "grad_norm": 0.22057886913820374, + "learning_rate": 7.622317398076468e-05, + "loss": 0.5046, "step": 3839 }, { - "epoch": 0.5374387683694891, - "grad_norm": 0.4205148102378079, - "learning_rate": 2.3199564187368156e-05, - "loss": 0.5609, + "epoch": 1.0747271200671704, + "grad_norm": 0.22254489674410552, + "learning_rate": 7.621003880644748e-05, + "loss": 0.5045, "step": 3840 }, { - "epoch": 0.5375787263820854, - "grad_norm": 0.41158756784707023, - "learning_rate": 2.3188260494293273e-05, - "loss": 0.5872, + "epoch": 1.0750069969213547, + "grad_norm": 0.22828366945400475, + "learning_rate": 7.619690113744412e-05, + "loss": 0.4912, "step": 3841 }, { - "epoch": 0.5377186843946816, - "grad_norm": 0.43038344004576323, - "learning_rate": 2.3176957173549235e-05, - "loss": 0.5768, + "epoch": 1.0752868737755388, + "grad_norm": 0.2373877195829569, + "learning_rate": 7.618376097500504e-05, + "loss": 0.4948, "step": 3842 }, { - "epoch": 0.5378586424072778, - "grad_norm": 0.4358694072281102, - "learning_rate": 2.3165654227459004e-05, - "loss": 0.5357, + "epoch": 1.0755667506297228, + "grad_norm": 0.21642635871544502, + "learning_rate": 7.617061832038095e-05, + "loss": 0.5043, "step": 3843 }, { - "epoch": 0.537998600419874, - "grad_norm": 0.4343491080187233, - "learning_rate": 2.3154351658345437e-05, - "loss": 0.5608, + "epoch": 1.0758466274839071, + "grad_norm": 0.20746003075145658, + "learning_rate": 7.615747317482274e-05, + "loss": 0.4975, "step": 3844 }, { - "epoch": 0.5381385584324703, - "grad_norm": 0.42585193830336604, - "learning_rate": 2.3143049468531334e-05, - "loss": 0.6127, + "epoch": 1.0761265043380912, + "grad_norm": 0.22922300715140198, + "learning_rate": 7.61443255395816e-05, + "loss": 0.5002, "step": 3845 }, { - "epoch": 0.5382785164450665, - "grad_norm": 0.4168594356306299, - "learning_rate": 2.3131747660339394e-05, - "loss": 0.5421, + "epoch": 1.0764063811922755, + "grad_norm": 0.22262266059975339, + "learning_rate": 7.613117541590892e-05, + "loss": 0.47, "step": 3846 }, { - "epoch": 0.5384184744576627, - "grad_norm": 0.4143482797388666, - "learning_rate": 2.3120446236092276e-05, - "loss": 0.5634, + "epoch": 1.0766862580464596, + "grad_norm": 0.21922009104427154, + "learning_rate": 7.611802280505634e-05, + "loss": 0.4931, "step": 3847 }, { - "epoch": 0.5385584324702589, - "grad_norm": 0.42347488623228974, - "learning_rate": 2.310914519811253e-05, - "loss": 0.553, + "epoch": 1.0769661349006436, + "grad_norm": 0.24920565933246105, + "learning_rate": 7.610486770827573e-05, + "loss": 0.5226, "step": 3848 }, { - "epoch": 0.5386983904828552, - "grad_norm": 0.4232797867273413, - "learning_rate": 2.309784454872262e-05, - "loss": 0.597, + "epoch": 1.077246011754828, + "grad_norm": 0.22925813151376365, + "learning_rate": 7.609171012681919e-05, + "loss": 0.5073, "step": 3849 }, { - "epoch": 0.5388383484954513, - "grad_norm": 0.4226580404199005, - "learning_rate": 2.3086544290244967e-05, - "loss": 0.5418, + "epoch": 1.077525888609012, + "grad_norm": 0.22185591528917906, + "learning_rate": 7.607855006193908e-05, + "loss": 0.5166, "step": 3850 }, { - "epoch": 0.5389783065080476, - "grad_norm": 0.43647901356290086, - "learning_rate": 2.3075244425001874e-05, - "loss": 0.6088, + "epoch": 1.0778057654631963, + "grad_norm": 0.23176119898527425, + "learning_rate": 7.6065387514888e-05, + "loss": 0.5118, "step": 3851 }, { - "epoch": 0.5391182645206438, - "grad_norm": 0.4110315073763923, - "learning_rate": 2.3063944955315584e-05, - "loss": 0.54, + "epoch": 1.0780856423173804, + "grad_norm": 0.230844068591777, + "learning_rate": 7.605222248691872e-05, + "loss": 0.4976, "step": 3852 }, { - "epoch": 0.5392582225332401, - "grad_norm": 0.370641365809909, - "learning_rate": 2.3052645883508242e-05, - "loss": 0.5216, + "epoch": 1.0783655191715644, + "grad_norm": 0.22967617877081778, + "learning_rate": 7.603905497928434e-05, + "loss": 0.5118, "step": 3853 }, { - "epoch": 0.5393981805458362, - "grad_norm": 0.4035453298163194, - "learning_rate": 2.3041347211901935e-05, - "loss": 0.559, + "epoch": 1.0786453960257487, + "grad_norm": 0.22235229326238526, + "learning_rate": 7.602588499323812e-05, + "loss": 0.5327, "step": 3854 }, { - "epoch": 0.5395381385584325, - "grad_norm": 0.426639073461407, - "learning_rate": 2.303004894281865e-05, - "loss": 0.5409, + "epoch": 1.0789252728799328, + "grad_norm": 0.2222409391099783, + "learning_rate": 7.601271253003361e-05, + "loss": 0.4979, "step": 3855 }, { - "epoch": 0.5396780965710287, - "grad_norm": 0.42512057290523336, - "learning_rate": 2.3018751078580287e-05, - "loss": 0.607, + "epoch": 1.079205149734117, + "grad_norm": 0.23195003249182133, + "learning_rate": 7.599953759092455e-05, + "loss": 0.4961, "step": 3856 }, { - "epoch": 0.5398180545836249, - "grad_norm": 0.42427900709447586, - "learning_rate": 2.300745362150869e-05, - "loss": 0.5736, + "epoch": 1.0794850265883011, + "grad_norm": 0.22716270571121247, + "learning_rate": 7.598636017716496e-05, + "loss": 0.4911, "step": 3857 }, { - "epoch": 0.5399580125962211, - "grad_norm": 0.4308088797406061, - "learning_rate": 2.299615657392559e-05, - "loss": 0.5737, + "epoch": 1.0797649034424852, + "grad_norm": 0.21778475956550405, + "learning_rate": 7.597318029000906e-05, + "loss": 0.4969, "step": 3858 }, { - "epoch": 0.5400979706088174, - "grad_norm": 0.39893806446075136, - "learning_rate": 2.2984859938152644e-05, - "loss": 0.5167, + "epoch": 1.0800447802966695, + "grad_norm": 0.22821702121996498, + "learning_rate": 7.595999793071131e-05, + "loss": 0.506, "step": 3859 }, { - "epoch": 0.5402379286214136, - "grad_norm": 0.4227311792800813, - "learning_rate": 2.297356371651142e-05, - "loss": 0.5559, + "epoch": 1.0803246571508536, + "grad_norm": 0.23175362531939797, + "learning_rate": 7.594681310052645e-05, + "loss": 0.484, "step": 3860 }, { - "epoch": 0.5403778866340098, - "grad_norm": 0.4307602196718643, - "learning_rate": 2.296226791132342e-05, - "loss": 0.567, + "epoch": 1.0806045340050379, + "grad_norm": 0.22531728623967945, + "learning_rate": 7.593362580070937e-05, + "loss": 0.5081, "step": 3861 }, { - "epoch": 0.540517844646606, - "grad_norm": 0.4048870602226458, - "learning_rate": 2.2950972524910045e-05, - "loss": 0.5474, + "epoch": 1.080884410859222, + "grad_norm": 0.21878111139174958, + "learning_rate": 7.592043603251529e-05, + "loss": 0.4851, "step": 3862 }, { - "epoch": 0.5406578026592023, - "grad_norm": 0.4130105755887349, - "learning_rate": 2.2939677559592605e-05, - "loss": 0.5438, + "epoch": 1.081164287713406, + "grad_norm": 0.21420800413672003, + "learning_rate": 7.59072437971996e-05, + "loss": 0.4936, "step": 3863 }, { - "epoch": 0.5407977606717984, - "grad_norm": 0.4388089801883774, - "learning_rate": 2.2928383017692322e-05, - "loss": 0.562, + "epoch": 1.0814441645675903, + "grad_norm": 0.21895571432038985, + "learning_rate": 7.589404909601793e-05, + "loss": 0.518, "step": 3864 }, { - "epoch": 0.5409377186843947, - "grad_norm": 0.4591220588088252, - "learning_rate": 2.2917088901530358e-05, - "loss": 0.5396, + "epoch": 1.0817240414217744, + "grad_norm": 0.22844836395438742, + "learning_rate": 7.588085193022618e-05, + "loss": 0.4873, "step": 3865 }, { - "epoch": 0.5410776766969909, - "grad_norm": 0.42951530189135795, - "learning_rate": 2.290579521342776e-05, - "loss": 0.5726, + "epoch": 1.0820039182759587, + "grad_norm": 0.2459045119791399, + "learning_rate": 7.586765230108046e-05, + "loss": 0.5021, "step": 3866 }, { - "epoch": 0.5412176347095872, - "grad_norm": 0.40253890454723895, - "learning_rate": 2.2894501955705477e-05, - "loss": 0.5234, + "epoch": 1.0822837951301427, + "grad_norm": 0.23175907115140723, + "learning_rate": 7.585445020983711e-05, + "loss": 0.4952, "step": 3867 }, { - "epoch": 0.5413575927221833, - "grad_norm": 0.42549136435980095, - "learning_rate": 2.288320913068442e-05, - "loss": 0.5505, + "epoch": 1.0825636719843268, + "grad_norm": 0.23330686736631892, + "learning_rate": 7.584124565775272e-05, + "loss": 0.4911, "step": 3868 }, { - "epoch": 0.5414975507347796, - "grad_norm": 0.4060031912669076, - "learning_rate": 2.2871916740685366e-05, - "loss": 0.5863, + "epoch": 1.082843548838511, + "grad_norm": 0.22702907790495536, + "learning_rate": 7.582803864608411e-05, + "loss": 0.5375, "step": 3869 }, { - "epoch": 0.5416375087473758, - "grad_norm": 0.4242326527971933, - "learning_rate": 2.2860624788029013e-05, - "loss": 0.5408, + "epoch": 1.0831234256926952, + "grad_norm": 0.22027379206298642, + "learning_rate": 7.581482917608832e-05, + "loss": 0.5259, "step": 3870 }, { - "epoch": 0.541777466759972, - "grad_norm": 0.43284739833626334, - "learning_rate": 2.2849333275035964e-05, - "loss": 0.5781, + "epoch": 1.0834033025468794, + "grad_norm": 0.22687339401866688, + "learning_rate": 7.580161724902263e-05, + "loss": 0.5174, "step": 3871 }, { - "epoch": 0.5419174247725682, - "grad_norm": 0.38138784670752485, - "learning_rate": 2.283804220402676e-05, - "loss": 0.5374, + "epoch": 1.0836831794010635, + "grad_norm": 0.22648503128740832, + "learning_rate": 7.578840286614459e-05, + "loss": 0.5211, "step": 3872 }, { - "epoch": 0.5420573827851645, - "grad_norm": 0.42596112184398915, - "learning_rate": 2.2826751577321813e-05, - "loss": 0.5407, + "epoch": 1.0839630562552478, + "grad_norm": 0.22312352399527574, + "learning_rate": 7.577518602871192e-05, + "loss": 0.4809, "step": 3873 }, { - "epoch": 0.5421973407977607, - "grad_norm": 0.4016705521461287, - "learning_rate": 2.2815461397241466e-05, - "loss": 0.543, + "epoch": 1.0842429331094319, + "grad_norm": 0.22815285561006848, + "learning_rate": 7.576196673798262e-05, + "loss": 0.5314, "step": 3874 }, { - "epoch": 0.5423372988103569, - "grad_norm": 0.41007081598495276, - "learning_rate": 2.2804171666105976e-05, - "loss": 0.5606, + "epoch": 1.084522809963616, + "grad_norm": 0.20861969161496066, + "learning_rate": 7.57487449952149e-05, + "loss": 0.4906, "step": 3875 }, { - "epoch": 0.5424772568229531, - "grad_norm": 0.41478676365527245, - "learning_rate": 2.2792882386235485e-05, - "loss": 0.5264, + "epoch": 1.0848026868178002, + "grad_norm": 0.22163694118804153, + "learning_rate": 7.573552080166722e-05, + "loss": 0.5199, "step": 3876 }, { - "epoch": 0.5426172148355494, - "grad_norm": 0.4120034933406958, - "learning_rate": 2.2781593559950052e-05, - "loss": 0.5392, + "epoch": 1.0850825636719843, + "grad_norm": 0.2249256201028353, + "learning_rate": 7.572229415859827e-05, + "loss": 0.5099, "step": 3877 }, { - "epoch": 0.5427571728481455, - "grad_norm": 0.4154088448683998, - "learning_rate": 2.277030518956965e-05, - "loss": 0.5667, + "epoch": 1.0853624405261684, + "grad_norm": 0.2378261645205978, + "learning_rate": 7.570906506726697e-05, + "loss": 0.5399, "step": 3878 }, { - "epoch": 0.5428971308607418, - "grad_norm": 0.4099026076541546, - "learning_rate": 2.2759017277414166e-05, - "loss": 0.5248, + "epoch": 1.0856423173803527, + "grad_norm": 0.21914759134732284, + "learning_rate": 7.569583352893245e-05, + "loss": 0.5009, "step": 3879 }, { - "epoch": 0.543037088873338, - "grad_norm": 0.44769525609475164, - "learning_rate": 2.2747729825803366e-05, - "loss": 0.5772, + "epoch": 1.0859221942345367, + "grad_norm": 0.22379024983302978, + "learning_rate": 7.568259954485411e-05, + "loss": 0.4887, "step": 3880 }, { - "epoch": 0.5431770468859343, - "grad_norm": 0.4131464054022151, - "learning_rate": 2.273644283705694e-05, - "loss": 0.5797, + "epoch": 1.086202071088721, + "grad_norm": 0.22892909741472098, + "learning_rate": 7.566936311629158e-05, + "loss": 0.5098, "step": 3881 }, { - "epoch": 0.5433170048985304, - "grad_norm": 0.4261856744618942, - "learning_rate": 2.2725156313494466e-05, - "loss": 0.549, + "epoch": 1.086481947942905, + "grad_norm": 0.2201131590342588, + "learning_rate": 7.565612424450471e-05, + "loss": 0.5016, "step": 3882 }, { - "epoch": 0.5434569629111267, - "grad_norm": 0.4207414434352504, - "learning_rate": 2.271387025743546e-05, - "loss": 0.5738, + "epoch": 1.0867618247970894, + "grad_norm": 0.22667326283362565, + "learning_rate": 7.564288293075357e-05, + "loss": 0.4997, "step": 3883 }, { - "epoch": 0.5435969209237229, - "grad_norm": 0.4087475690432694, - "learning_rate": 2.2702584671199317e-05, - "loss": 0.5674, + "epoch": 1.0870417016512735, + "grad_norm": 0.22168560884901403, + "learning_rate": 7.562963917629847e-05, + "loss": 0.4901, "step": 3884 }, { - "epoch": 0.5437368789363191, - "grad_norm": 0.40838344825260353, - "learning_rate": 2.2691299557105328e-05, - "loss": 0.5503, + "epoch": 1.0873215785054575, + "grad_norm": 0.22631729313244486, + "learning_rate": 7.561639298239997e-05, + "loss": 0.4758, "step": 3885 }, { - "epoch": 0.5438768369489153, - "grad_norm": 0.42246887497450447, - "learning_rate": 2.268001491747271e-05, - "loss": 0.5276, + "epoch": 1.0876014553596418, + "grad_norm": 0.2338524440250818, + "learning_rate": 7.560314435031885e-05, + "loss": 0.5054, "step": 3886 }, { - "epoch": 0.5440167949615116, - "grad_norm": 0.4325429890817475, - "learning_rate": 2.266873075462056e-05, - "loss": 0.5538, + "epoch": 1.0878813322138259, + "grad_norm": 0.22119235290818326, + "learning_rate": 7.558989328131613e-05, + "loss": 0.474, "step": 3887 }, { - "epoch": 0.5441567529741078, - "grad_norm": 0.420910542262214, - "learning_rate": 2.2657447070867902e-05, - "loss": 0.5475, + "epoch": 1.0881612090680102, + "grad_norm": 0.22800219538548638, + "learning_rate": 7.557663977665304e-05, + "loss": 0.5154, "step": 3888 }, { - "epoch": 0.544296710986704, - "grad_norm": 0.422876814725259, - "learning_rate": 2.264616386853363e-05, - "loss": 0.5477, + "epoch": 1.0884410859221942, + "grad_norm": 0.2146893923232453, + "learning_rate": 7.556338383759105e-05, + "loss": 0.4989, "step": 3889 }, { - "epoch": 0.5444366689993002, - "grad_norm": 0.4143598457765582, - "learning_rate": 2.2634881149936575e-05, - "loss": 0.5419, + "epoch": 1.0887209627763783, + "grad_norm": 0.21957207199801138, + "learning_rate": 7.555012546539188e-05, + "loss": 0.4885, "step": 3890 }, { - "epoch": 0.5445766270118965, - "grad_norm": 0.4105061536286951, - "learning_rate": 2.2623598917395438e-05, - "loss": 0.5475, + "epoch": 1.0890008396305626, + "grad_norm": 0.2283523860823396, + "learning_rate": 7.553686466131747e-05, + "loss": 0.512, "step": 3891 }, { - "epoch": 0.5447165850244926, - "grad_norm": 0.41659655042186283, - "learning_rate": 2.261231717322883e-05, - "loss": 0.5753, + "epoch": 1.0892807164847467, + "grad_norm": 0.23672055375470924, + "learning_rate": 7.552360142662999e-05, + "loss": 0.5125, "step": 3892 }, { - "epoch": 0.5448565430370889, - "grad_norm": 0.4530820570399967, - "learning_rate": 2.2601035919755274e-05, - "loss": 0.5281, + "epoch": 1.089560593338931, + "grad_norm": 0.22939385152277814, + "learning_rate": 7.551033576259183e-05, + "loss": 0.5431, "step": 3893 }, { - "epoch": 0.5449965010496851, - "grad_norm": 0.39850319238936455, - "learning_rate": 2.258975515929318e-05, - "loss": 0.5278, + "epoch": 1.089840470193115, + "grad_norm": 0.2285399202759489, + "learning_rate": 7.549706767046565e-05, + "loss": 0.4925, "step": 3894 }, { - "epoch": 0.5451364590622814, - "grad_norm": 0.3978091253457419, - "learning_rate": 2.2578474894160857e-05, - "loss": 0.5453, + "epoch": 1.090120347047299, + "grad_norm": 0.22353177443499367, + "learning_rate": 7.548379715151428e-05, + "loss": 0.4951, "step": 3895 }, { - "epoch": 0.5452764170748775, - "grad_norm": 0.4231460383028299, - "learning_rate": 2.2567195126676507e-05, - "loss": 0.5963, + "epoch": 1.0904002239014834, + "grad_norm": 0.22202912430917907, + "learning_rate": 7.547052420700085e-05, + "loss": 0.4806, "step": 3896 }, { - "epoch": 0.5454163750874738, - "grad_norm": 0.42063829224368665, - "learning_rate": 2.2555915859158244e-05, - "loss": 0.5614, + "epoch": 1.0906801007556675, + "grad_norm": 0.21661626552023552, + "learning_rate": 7.545724883818865e-05, + "loss": 0.5037, "step": 3897 }, { - "epoch": 0.54555633310007, - "grad_norm": 0.4083469272821722, - "learning_rate": 2.2544637093924074e-05, - "loss": 0.5389, + "epoch": 1.0909599776098517, + "grad_norm": 0.22988693042275354, + "learning_rate": 7.544397104634128e-05, + "loss": 0.504, "step": 3898 }, { - "epoch": 0.5456962911126662, - "grad_norm": 0.4217541606558565, - "learning_rate": 2.2533358833291896e-05, - "loss": 0.5544, + "epoch": 1.0912398544640358, + "grad_norm": 0.22080517506414357, + "learning_rate": 7.543069083272249e-05, + "loss": 0.5186, "step": 3899 }, { - "epoch": 0.5458362491252624, - "grad_norm": 0.4303357717984438, - "learning_rate": 2.2522081079579498e-05, - "loss": 0.5524, + "epoch": 1.0915197313182199, + "grad_norm": 0.22531274355080877, + "learning_rate": 7.541740819859632e-05, + "loss": 0.5107, "step": 3900 }, { - "epoch": 0.5459762071378587, - "grad_norm": 0.41929973760406747, - "learning_rate": 2.251080383510459e-05, - "loss": 0.5652, + "epoch": 1.0917996081724042, + "grad_norm": 0.21995059903334982, + "learning_rate": 7.5404123145227e-05, + "loss": 0.5162, "step": 3901 }, { - "epoch": 0.5461161651504549, - "grad_norm": 0.4126809759165879, - "learning_rate": 2.2499527102184744e-05, - "loss": 0.5917, + "epoch": 1.0920794850265882, + "grad_norm": 0.2237659393370415, + "learning_rate": 7.539083567387904e-05, + "loss": 0.5005, "step": 3902 }, { - "epoch": 0.546256123163051, - "grad_norm": 0.39993684724127465, - "learning_rate": 2.2488250883137445e-05, - "loss": 0.532, + "epoch": 1.0923593618807725, + "grad_norm": 0.22888919607289115, + "learning_rate": 7.53775457858171e-05, + "loss": 0.502, "step": 3903 }, { - "epoch": 0.5463960811756473, - "grad_norm": 0.4070252666557728, - "learning_rate": 2.247697518028008e-05, - "loss": 0.5381, + "epoch": 1.0926392387349566, + "grad_norm": 0.22955372167578642, + "learning_rate": 7.536425348230617e-05, + "loss": 0.4971, "step": 3904 }, { - "epoch": 0.5465360391882436, - "grad_norm": 0.4203749044451171, - "learning_rate": 2.2465699995929916e-05, - "loss": 0.5445, + "epoch": 1.0929191155891407, + "grad_norm": 0.22301297637817336, + "learning_rate": 7.535095876461138e-05, + "loss": 0.5007, "step": 3905 }, { - "epoch": 0.5466759972008397, - "grad_norm": 0.4313492162077952, - "learning_rate": 2.2454425332404122e-05, - "loss": 0.55, + "epoch": 1.093198992443325, + "grad_norm": 0.23809298142797114, + "learning_rate": 7.533766163399816e-05, + "loss": 0.5244, "step": 3906 }, { - "epoch": 0.546815955213436, - "grad_norm": 0.39695375283438905, - "learning_rate": 2.2443151192019735e-05, - "loss": 0.5418, + "epoch": 1.093478869297509, + "grad_norm": 0.2190281078249623, + "learning_rate": 7.532436209173213e-05, + "loss": 0.492, "step": 3907 }, { - "epoch": 0.5469559132260322, - "grad_norm": 0.4051789619978703, - "learning_rate": 2.2431877577093737e-05, - "loss": 0.5059, + "epoch": 1.0937587461516933, + "grad_norm": 0.2267505963590063, + "learning_rate": 7.531106013907911e-05, + "loss": 0.4972, "step": 3908 }, { - "epoch": 0.5470958712386285, - "grad_norm": 0.4218855232884939, - "learning_rate": 2.2420604489942946e-05, - "loss": 0.5617, + "epoch": 1.0940386230058774, + "grad_norm": 0.23097391565857056, + "learning_rate": 7.529775577730525e-05, + "loss": 0.5227, "step": 3909 }, { - "epoch": 0.5472358292512246, - "grad_norm": 0.3989929337699537, - "learning_rate": 2.2409331932884108e-05, - "loss": 0.5244, + "epoch": 1.0943184998600617, + "grad_norm": 0.22143711436518954, + "learning_rate": 7.528444900767682e-05, + "loss": 0.4953, "step": 3910 }, { - "epoch": 0.5473757872638209, - "grad_norm": 0.4010087179047754, - "learning_rate": 2.2398059908233842e-05, - "loss": 0.5371, + "epoch": 1.0945983767142458, + "grad_norm": 0.2144417384807327, + "learning_rate": 7.527113983146038e-05, + "loss": 0.5174, "step": 3911 }, { - "epoch": 0.5475157452764171, - "grad_norm": 0.4466643997788068, - "learning_rate": 2.238678841830867e-05, - "loss": 0.5934, + "epoch": 1.0948782535684298, + "grad_norm": 0.22592666799398026, + "learning_rate": 7.525782824992271e-05, + "loss": 0.488, "step": 3912 }, { - "epoch": 0.5476557032890133, - "grad_norm": 0.39728611887637755, - "learning_rate": 2.237551746542499e-05, - "loss": 0.5587, + "epoch": 1.0951581304226141, + "grad_norm": 0.2494883368003387, + "learning_rate": 7.52445142643308e-05, + "loss": 0.5015, "step": 3913 }, { - "epoch": 0.5477956613016095, - "grad_norm": 0.41809901084229356, - "learning_rate": 2.236424705189909e-05, - "loss": 0.5865, + "epoch": 1.0954380072767982, + "grad_norm": 0.22054061266688652, + "learning_rate": 7.52311978759519e-05, + "loss": 0.49, "step": 3914 }, { - "epoch": 0.5479356193142058, - "grad_norm": 0.40468351305300143, - "learning_rate": 2.2352977180047175e-05, - "loss": 0.5213, + "epoch": 1.0957178841309823, + "grad_norm": 0.23958747852787587, + "learning_rate": 7.521787908605349e-05, + "loss": 0.5218, "step": 3915 }, { - "epoch": 0.548075577326802, - "grad_norm": 0.4113028432405159, - "learning_rate": 2.2341707852185305e-05, - "loss": 0.5488, + "epoch": 1.0959977609851665, + "grad_norm": 0.2157341944758859, + "learning_rate": 7.520455789590319e-05, + "loss": 0.5011, "step": 3916 }, { - "epoch": 0.5482155353393982, - "grad_norm": 0.4448481069088446, - "learning_rate": 2.2330439070629448e-05, - "loss": 0.5665, + "epoch": 1.0962776378393506, + "grad_norm": 0.22119871039538472, + "learning_rate": 7.519123430676899e-05, + "loss": 0.5135, "step": 3917 }, { - "epoch": 0.5483554933519944, - "grad_norm": 0.4360732301218836, - "learning_rate": 2.2319170837695435e-05, - "loss": 0.5586, + "epoch": 1.096557514693535, + "grad_norm": 0.23261646419392415, + "learning_rate": 7.5177908319919e-05, + "loss": 0.5237, "step": 3918 }, { - "epoch": 0.5484954513645907, - "grad_norm": 0.4194014787432002, - "learning_rate": 2.2307903155699027e-05, - "loss": 0.5265, + "epoch": 1.096837391547719, + "grad_norm": 0.21912662190164525, + "learning_rate": 7.516457993662161e-05, + "loss": 0.4969, "step": 3919 }, { - "epoch": 0.5486354093771868, - "grad_norm": 0.4085819437326063, - "learning_rate": 2.2296636026955835e-05, - "loss": 0.5485, + "epoch": 1.0971172684019033, + "grad_norm": 0.21119565994755876, + "learning_rate": 7.51512491581454e-05, + "loss": 0.4891, "step": 3920 }, { - "epoch": 0.548775367389783, - "grad_norm": 0.40406354838300534, - "learning_rate": 2.2285369453781364e-05, - "loss": 0.5409, + "epoch": 1.0973971452560873, + "grad_norm": 0.21202423716618202, + "learning_rate": 7.513791598575923e-05, + "loss": 0.4862, "step": 3921 }, { - "epoch": 0.5489153254023793, - "grad_norm": 0.41085941144789107, - "learning_rate": 2.2274103438491022e-05, - "loss": 0.5228, + "epoch": 1.0976770221102714, + "grad_norm": 0.22509411180443678, + "learning_rate": 7.512458042073214e-05, + "loss": 0.4971, "step": 3922 }, { - "epoch": 0.5490552834149756, - "grad_norm": 0.44664374072567387, - "learning_rate": 2.2262837983400082e-05, - "loss": 0.5663, + "epoch": 1.0979568989644557, + "grad_norm": 0.21635296175845792, + "learning_rate": 7.511124246433342e-05, + "loss": 0.5044, "step": 3923 }, { - "epoch": 0.5491952414275717, - "grad_norm": 0.3848765417302251, - "learning_rate": 2.2251573090823706e-05, - "loss": 0.5645, + "epoch": 1.0982367758186398, + "grad_norm": 0.21496274544460234, + "learning_rate": 7.509790211783261e-05, + "loss": 0.4733, "step": 3924 }, { - "epoch": 0.549335199440168, - "grad_norm": 0.40234337327172925, - "learning_rate": 2.2240308763076935e-05, - "loss": 0.573, + "epoch": 1.098516652672824, + "grad_norm": 0.22632612773336558, + "learning_rate": 7.508455938249942e-05, + "loss": 0.5074, "step": 3925 }, { - "epoch": 0.5494751574527642, - "grad_norm": 0.4124065219528931, - "learning_rate": 2.222904500247473e-05, - "loss": 0.5579, + "epoch": 1.0987965295270081, + "grad_norm": 0.227020819243741, + "learning_rate": 7.50712142596038e-05, + "loss": 0.5057, "step": 3926 }, { - "epoch": 0.5496151154653603, - "grad_norm": 0.4085451573297374, - "learning_rate": 2.2217781811331885e-05, - "loss": 0.5534, + "epoch": 1.0990764063811922, + "grad_norm": 0.2231419819317831, + "learning_rate": 7.5057866750416e-05, + "loss": 0.5048, "step": 3927 }, { - "epoch": 0.5497550734779566, - "grad_norm": 0.413762414731123, - "learning_rate": 2.22065191919631e-05, - "loss": 0.5493, + "epoch": 1.0993562832353765, + "grad_norm": 0.22439510327942658, + "learning_rate": 7.50445168562064e-05, + "loss": 0.4935, "step": 3928 }, { - "epoch": 0.5498950314905529, - "grad_norm": 0.4148155757825788, - "learning_rate": 2.2195257146682975e-05, - "loss": 0.5439, + "epoch": 1.0996361600895606, + "grad_norm": 0.23708832947391364, + "learning_rate": 7.503116457824568e-05, + "loss": 0.4896, "step": 3929 }, { - "epoch": 0.5500349895031491, - "grad_norm": 0.44638831807610185, - "learning_rate": 2.218399567780597e-05, - "loss": 0.5899, + "epoch": 1.0999160369437448, + "grad_norm": 0.22151595670376018, + "learning_rate": 7.50178099178047e-05, + "loss": 0.5061, "step": 3930 }, { - "epoch": 0.5501749475157452, - "grad_norm": 0.41295788493334096, - "learning_rate": 2.217273478764642e-05, - "loss": 0.5524, + "epoch": 1.100195913797929, + "grad_norm": 0.5290360487146467, + "learning_rate": 7.500445287615456e-05, + "loss": 0.5242, "step": 3931 }, { - "epoch": 0.5503149055283415, - "grad_norm": 0.402717034759649, - "learning_rate": 2.216147447851855e-05, - "loss": 0.5534, + "epoch": 1.100475790652113, + "grad_norm": 0.21946455042509816, + "learning_rate": 7.499109345456662e-05, + "loss": 0.493, "step": 3932 }, { - "epoch": 0.5504548635409378, - "grad_norm": 0.40126403258397014, - "learning_rate": 2.2150214752736488e-05, - "loss": 0.5227, + "epoch": 1.1007556675062973, + "grad_norm": 0.24430199441378006, + "learning_rate": 7.497773165431241e-05, + "loss": 0.5281, "step": 3933 }, { - "epoch": 0.5505948215535339, - "grad_norm": 0.4081017029720488, - "learning_rate": 2.2138955612614207e-05, - "loss": 0.5316, + "epoch": 1.1010355443604813, + "grad_norm": 0.2158165078305361, + "learning_rate": 7.496436747666372e-05, + "loss": 0.5065, "step": 3934 }, { - "epoch": 0.5507347795661302, - "grad_norm": 0.3952776691289986, - "learning_rate": 2.2127697060465576e-05, - "loss": 0.54, + "epoch": 1.1013154212146656, + "grad_norm": 0.2170809312137706, + "learning_rate": 7.495100092289256e-05, + "loss": 0.5104, "step": 3935 }, { - "epoch": 0.5508747375787264, - "grad_norm": 0.4086382501258365, - "learning_rate": 2.211643909860433e-05, - "loss": 0.5376, + "epoch": 1.1015952980688497, + "grad_norm": 0.2157092122895801, + "learning_rate": 7.493763199427117e-05, + "loss": 0.4946, "step": 3936 }, { - "epoch": 0.5510146955913227, - "grad_norm": 0.44788307946149386, - "learning_rate": 2.210518172934412e-05, - "loss": 0.5916, + "epoch": 1.1018751749230338, + "grad_norm": 0.22601878455235838, + "learning_rate": 7.4924260692072e-05, + "loss": 0.5061, "step": 3937 }, { - "epoch": 0.5511546536039188, - "grad_norm": 0.4092566815599357, - "learning_rate": 2.2093924954998438e-05, - "loss": 0.5282, + "epoch": 1.102155051777218, + "grad_norm": 0.2200046203941348, + "learning_rate": 7.491088701756775e-05, + "loss": 0.5304, "step": 3938 }, { - "epoch": 0.551294611616515, - "grad_norm": 0.43673368860544104, - "learning_rate": 2.2082668777880653e-05, - "loss": 0.5458, + "epoch": 1.1024349286314021, + "grad_norm": 0.22308895038824725, + "learning_rate": 7.489751097203133e-05, + "loss": 0.504, "step": 3939 }, { - "epoch": 0.5514345696291113, - "grad_norm": 0.406082695925654, - "learning_rate": 2.2071413200304043e-05, - "loss": 0.5375, + "epoch": 1.1027148054855864, + "grad_norm": 0.22743067215840185, + "learning_rate": 7.488413255673588e-05, + "loss": 0.4983, "step": 3940 }, { - "epoch": 0.5515745276417074, - "grad_norm": 0.3883615387736086, - "learning_rate": 2.206015822458174e-05, - "loss": 0.5221, + "epoch": 1.1029946823397705, + "grad_norm": 0.22978696573372845, + "learning_rate": 7.487075177295477e-05, + "loss": 0.5119, "step": 3941 }, { - "epoch": 0.5517144856543037, - "grad_norm": 0.4315662304861121, - "learning_rate": 2.2048903853026745e-05, - "loss": 0.5598, + "epoch": 1.1032745591939546, + "grad_norm": 0.21442187635270676, + "learning_rate": 7.485736862196157e-05, + "loss": 0.493, "step": 3942 }, { - "epoch": 0.5518544436669, - "grad_norm": 0.40829821883297823, - "learning_rate": 2.203765008795195e-05, - "loss": 0.5876, + "epoch": 1.1035544360481389, + "grad_norm": 0.2391998819954397, + "learning_rate": 7.484398310503014e-05, + "loss": 0.4936, "step": 3943 }, { - "epoch": 0.5519944016794962, - "grad_norm": 0.4263164757098248, - "learning_rate": 2.2026396931670125e-05, - "loss": 0.5506, + "epoch": 1.103834312902323, + "grad_norm": 0.22774254590835027, + "learning_rate": 7.483059522343448e-05, + "loss": 0.5015, "step": 3944 }, { - "epoch": 0.5521343596920923, - "grad_norm": 0.4109132538637934, - "learning_rate": 2.2015144386493896e-05, - "loss": 0.5435, + "epoch": 1.1041141897565072, + "grad_norm": 0.22042592966709393, + "learning_rate": 7.481720497844885e-05, + "loss": 0.4996, "step": 3945 }, { - "epoch": 0.5522743177046886, - "grad_norm": 0.40656446743742547, - "learning_rate": 2.2003892454735786e-05, - "loss": 0.5169, + "epoch": 1.1043940666106913, + "grad_norm": 0.5397382889686548, + "learning_rate": 7.480381237134777e-05, + "loss": 0.4879, "step": 3946 }, { - "epoch": 0.5524142757172849, - "grad_norm": 0.4174968254449107, - "learning_rate": 2.1992641138708166e-05, - "loss": 0.5506, + "epoch": 1.1046739434648754, + "grad_norm": 0.22660537818506746, + "learning_rate": 7.479041740340595e-05, + "loss": 0.5128, "step": 3947 }, { - "epoch": 0.552554233729881, - "grad_norm": 0.41392476596527283, - "learning_rate": 2.1981390440723316e-05, - "loss": 0.5705, + "epoch": 1.1049538203190596, + "grad_norm": 0.2259600732655642, + "learning_rate": 7.477702007589832e-05, + "loss": 0.4873, "step": 3948 }, { - "epoch": 0.5526941917424772, - "grad_norm": 0.40768401049876407, - "learning_rate": 2.197014036309336e-05, - "loss": 0.5546, + "epoch": 1.1052336971732437, + "grad_norm": 0.22081623754235322, + "learning_rate": 7.476362039010005e-05, + "loss": 0.5058, "step": 3949 }, { - "epoch": 0.5528341497550735, - "grad_norm": 0.43659985592955725, - "learning_rate": 2.1958890908130288e-05, - "loss": 0.5448, + "epoch": 1.105513574027428, + "grad_norm": 0.23403480837563395, + "learning_rate": 7.475021834728654e-05, + "loss": 0.4883, "step": 3950 }, { - "epoch": 0.5529741077676698, - "grad_norm": 0.4191971534215795, - "learning_rate": 2.1947642078146004e-05, - "loss": 0.5442, + "epoch": 1.105793450881612, + "grad_norm": 0.23872258169546243, + "learning_rate": 7.47368139487334e-05, + "loss": 0.5118, "step": 3951 }, { - "epoch": 0.5531140657802659, - "grad_norm": 0.43109773584435357, - "learning_rate": 2.193639387545224e-05, - "loss": 0.5506, + "epoch": 1.1060733277357961, + "grad_norm": 0.23105921460824574, + "learning_rate": 7.472340719571645e-05, + "loss": 0.5104, "step": 3952 }, { - "epoch": 0.5532540237928621, - "grad_norm": 0.42420382449551325, - "learning_rate": 2.1925146302360625e-05, - "loss": 0.5578, + "epoch": 1.1063532045899804, + "grad_norm": 0.2245049549252369, + "learning_rate": 7.470999808951176e-05, + "loss": 0.4996, "step": 3953 }, { - "epoch": 0.5533939818054584, - "grad_norm": 0.426621743894938, - "learning_rate": 2.1913899361182632e-05, - "loss": 0.5453, + "epoch": 1.1066330814441645, + "grad_norm": 0.2373984025689569, + "learning_rate": 7.469658663139563e-05, + "loss": 0.514, "step": 3954 }, { - "epoch": 0.5535339398180545, - "grad_norm": 0.42952974584048115, - "learning_rate": 2.1902653054229642e-05, - "loss": 0.5514, + "epoch": 1.1069129582983488, + "grad_norm": 0.21771894540135592, + "learning_rate": 7.468317282264456e-05, + "loss": 0.4899, "step": 3955 }, { - "epoch": 0.5536738978306508, - "grad_norm": 0.42155187844422864, - "learning_rate": 2.189140738381288e-05, - "loss": 0.5622, + "epoch": 1.1071928351525329, + "grad_norm": 0.21674073993322088, + "learning_rate": 7.466975666453528e-05, + "loss": 0.5294, "step": 3956 }, { - "epoch": 0.553813855843247, - "grad_norm": 0.4260436696061489, - "learning_rate": 2.1880162352243425e-05, - "loss": 0.5845, + "epoch": 1.1074727120067172, + "grad_norm": 0.22217151016513056, + "learning_rate": 7.465633815834473e-05, + "loss": 0.5052, "step": 3957 }, { - "epoch": 0.5539538138558433, - "grad_norm": 0.4048895896225377, - "learning_rate": 2.1868917961832274e-05, - "loss": 0.5593, + "epoch": 1.1077525888609012, + "grad_norm": 0.229801954450481, + "learning_rate": 7.464291730535016e-05, + "loss": 0.5037, "step": 3958 }, { - "epoch": 0.5540937718684394, - "grad_norm": 0.4088135684372084, - "learning_rate": 2.1857674214890254e-05, - "loss": 0.517, + "epoch": 1.1080324657150853, + "grad_norm": 0.22920707926710376, + "learning_rate": 7.46294941068289e-05, + "loss": 0.4939, "step": 3959 }, { - "epoch": 0.5542337298810357, - "grad_norm": 0.40953844739681516, - "learning_rate": 2.1846431113728064e-05, - "loss": 0.57, + "epoch": 1.1083123425692696, + "grad_norm": 0.21907021312456276, + "learning_rate": 7.461606856405862e-05, + "loss": 0.515, "step": 3960 }, { - "epoch": 0.554373687893632, - "grad_norm": 0.41049014674652273, - "learning_rate": 2.1835188660656267e-05, - "loss": 0.582, + "epoch": 1.1085922194234537, + "grad_norm": 0.23781660925955275, + "learning_rate": 7.460264067831715e-05, + "loss": 0.5023, "step": 3961 }, { - "epoch": 0.5545136459062281, - "grad_norm": 0.44214177894488643, - "learning_rate": 2.1823946857985323e-05, - "loss": 0.5558, + "epoch": 1.108872096277638, + "grad_norm": 0.20439890808463512, + "learning_rate": 7.458921045088258e-05, + "loss": 0.5104, "step": 3962 }, { - "epoch": 0.5546536039188243, - "grad_norm": 0.40688749353479037, - "learning_rate": 2.1812705708025526e-05, - "loss": 0.5647, + "epoch": 1.109151973131822, + "grad_norm": 0.2240262499431877, + "learning_rate": 7.457577788303318e-05, + "loss": 0.5143, "step": 3963 }, { - "epoch": 0.5547935619314206, - "grad_norm": 0.43619186505830493, - "learning_rate": 2.1801465213087044e-05, - "loss": 0.5819, + "epoch": 1.109431849986006, + "grad_norm": 0.23717555660734368, + "learning_rate": 7.456234297604749e-05, + "loss": 0.5095, "step": 3964 }, { - "epoch": 0.5549335199440167, - "grad_norm": 0.3873905618002296, - "learning_rate": 2.179022537547991e-05, - "loss": 0.5015, + "epoch": 1.1097117268401904, + "grad_norm": 0.2242753396687637, + "learning_rate": 7.454890573120424e-05, + "loss": 0.518, "step": 3965 }, { - "epoch": 0.555073477956613, - "grad_norm": 0.40366380009710195, - "learning_rate": 2.1778986197514034e-05, - "loss": 0.5364, + "epoch": 1.1099916036943744, + "grad_norm": 0.237628458375431, + "learning_rate": 7.453546614978239e-05, + "loss": 0.5184, "step": 3966 }, { - "epoch": 0.5552134359692092, - "grad_norm": 0.41225036663129866, - "learning_rate": 2.1767747681499176e-05, - "loss": 0.5395, + "epoch": 1.1102714805485587, + "grad_norm": 0.2232154358193692, + "learning_rate": 7.452202423306116e-05, + "loss": 0.5263, "step": 3967 }, { - "epoch": 0.5553533939818055, - "grad_norm": 0.41221185236407193, - "learning_rate": 2.1756509829744956e-05, - "loss": 0.5375, + "epoch": 1.1105513574027428, + "grad_norm": 0.21611304074653276, + "learning_rate": 7.450857998231995e-05, + "loss": 0.5069, "step": 3968 }, { - "epoch": 0.5554933519944016, - "grad_norm": 0.41637063641888467, - "learning_rate": 2.1745272644560885e-05, - "loss": 0.5695, + "epoch": 1.1108312342569269, + "grad_norm": 0.22557895807258563, + "learning_rate": 7.449513339883836e-05, + "loss": 0.5014, "step": 3969 }, { - "epoch": 0.5556333100069979, - "grad_norm": 0.4246669886827112, - "learning_rate": 2.173403612825631e-05, - "loss": 0.5226, + "epoch": 1.1111111111111112, + "grad_norm": 0.205553625116001, + "learning_rate": 7.448168448389627e-05, + "loss": 0.4678, "step": 3970 }, { - "epoch": 0.5557732680195941, - "grad_norm": 0.4399810838843672, - "learning_rate": 2.172280028314045e-05, - "loss": 0.5637, + "epoch": 1.1113909879652952, + "grad_norm": 0.22628077462935112, + "learning_rate": 7.446823323877375e-05, + "loss": 0.533, "step": 3971 }, { - "epoch": 0.5559132260321903, - "grad_norm": 0.42264632884515463, - "learning_rate": 2.1711565111522372e-05, - "loss": 0.5715, + "epoch": 1.1116708648194795, + "grad_norm": 0.22317889780015895, + "learning_rate": 7.445477966475108e-05, + "loss": 0.4728, "step": 3972 }, { - "epoch": 0.5560531840447865, - "grad_norm": 0.40102465338699234, - "learning_rate": 2.170033061571104e-05, - "loss": 0.5468, + "epoch": 1.1119507416736636, + "grad_norm": 0.2235224351114337, + "learning_rate": 7.444132376310881e-05, + "loss": 0.505, "step": 3973 }, { - "epoch": 0.5561931420573828, - "grad_norm": 0.4330504312787022, - "learning_rate": 2.1689096798015247e-05, - "loss": 0.5544, + "epoch": 1.1122306185278477, + "grad_norm": 0.21922304196356124, + "learning_rate": 7.442786553512764e-05, + "loss": 0.4965, "step": 3974 }, { - "epoch": 0.556333100069979, - "grad_norm": 0.41663293988065675, - "learning_rate": 2.167786366074365e-05, - "loss": 0.5855, + "epoch": 1.112510495382032, + "grad_norm": 0.2244625761338939, + "learning_rate": 7.441440498208858e-05, + "loss": 0.4955, "step": 3975 }, { - "epoch": 0.5564730580825752, - "grad_norm": 0.40801980733280413, - "learning_rate": 2.1666631206204786e-05, - "loss": 0.5431, + "epoch": 1.112790372236216, + "grad_norm": 0.2277128080284073, + "learning_rate": 7.440094210527277e-05, + "loss": 0.5432, "step": 3976 }, { - "epoch": 0.5566130160951714, - "grad_norm": 0.42663243125510747, - "learning_rate": 2.1655399436707026e-05, - "loss": 0.5574, + "epoch": 1.1130702490904003, + "grad_norm": 0.22637258287783846, + "learning_rate": 7.438747690596165e-05, + "loss": 0.5216, "step": 3977 }, { - "epoch": 0.5567529741077677, - "grad_norm": 0.4605067091274325, - "learning_rate": 2.164416835455862e-05, - "loss": 0.5592, + "epoch": 1.1133501259445844, + "grad_norm": 0.2286180614189198, + "learning_rate": 7.437400938543682e-05, + "loss": 0.4783, "step": 3978 }, { - "epoch": 0.5568929321203638, - "grad_norm": 0.42331401492783005, - "learning_rate": 2.1632937962067657e-05, - "loss": 0.5798, + "epoch": 1.1136300027987684, + "grad_norm": 0.23422347281845565, + "learning_rate": 7.436053954498015e-05, + "loss": 0.5181, "step": 3979 }, { - "epoch": 0.5570328901329601, - "grad_norm": 0.5978199091551641, - "learning_rate": 2.1621708261542116e-05, - "loss": 0.5474, + "epoch": 1.1139098796529527, + "grad_norm": 0.221375051592428, + "learning_rate": 7.434706738587368e-05, + "loss": 0.5028, "step": 3980 }, { - "epoch": 0.5571728481455563, - "grad_norm": 0.39934848914020754, - "learning_rate": 2.161047925528981e-05, - "loss": 0.5362, + "epoch": 1.1141897565071368, + "grad_norm": 0.23079758129720093, + "learning_rate": 7.43335929093997e-05, + "loss": 0.4971, "step": 3981 }, { - "epoch": 0.5573128061581526, - "grad_norm": 0.41963116860550964, - "learning_rate": 2.1599250945618402e-05, - "loss": 0.5506, + "epoch": 1.114469633361321, + "grad_norm": 0.22848630298020803, + "learning_rate": 7.432011611684073e-05, + "loss": 0.5079, "step": 3982 }, { - "epoch": 0.5574527641707487, - "grad_norm": 0.41948040388705365, - "learning_rate": 2.158802333483542e-05, - "loss": 0.5723, + "epoch": 1.1147495102155052, + "grad_norm": 0.22222877729702842, + "learning_rate": 7.430663700947948e-05, + "loss": 0.4861, "step": 3983 }, { - "epoch": 0.557592722183345, - "grad_norm": 0.4188462923542639, - "learning_rate": 2.157679642524828e-05, - "loss": 0.5542, + "epoch": 1.1150293870696892, + "grad_norm": 0.22471377154495475, + "learning_rate": 7.429315558859895e-05, + "loss": 0.4925, "step": 3984 }, { - "epoch": 0.5577326801959412, - "grad_norm": 0.4356609128042065, - "learning_rate": 2.15655702191642e-05, - "loss": 0.5568, + "epoch": 1.1153092639238735, + "grad_norm": 0.2243152332371436, + "learning_rate": 7.427967185548224e-05, + "loss": 0.4953, "step": 3985 }, { - "epoch": 0.5578726382085374, - "grad_norm": 0.4295054521048054, - "learning_rate": 2.1554344718890284e-05, - "loss": 0.6145, + "epoch": 1.1155891407780576, + "grad_norm": 0.2489701283125392, + "learning_rate": 7.426618581141279e-05, + "loss": 0.5047, "step": 3986 }, { - "epoch": 0.5580125962211336, - "grad_norm": 0.41247657463875265, - "learning_rate": 2.1543119926733495e-05, - "loss": 0.5561, + "epoch": 1.1158690176322419, + "grad_norm": 0.22901357998009123, + "learning_rate": 7.42526974576742e-05, + "loss": 0.5202, "step": 3987 }, { - "epoch": 0.5581525542337299, - "grad_norm": 0.41532771461538054, - "learning_rate": 2.1531895845000637e-05, - "loss": 0.5414, + "epoch": 1.116148894486426, + "grad_norm": 0.229652633254522, + "learning_rate": 7.423920679555028e-05, + "loss": 0.5333, "step": 3988 }, { - "epoch": 0.5582925122463261, - "grad_norm": 0.40582165725579067, - "learning_rate": 2.1520672475998373e-05, - "loss": 0.5509, + "epoch": 1.11642877134061, + "grad_norm": 0.2264897650671679, + "learning_rate": 7.422571382632509e-05, + "loss": 0.5195, "step": 3989 }, { - "epoch": 0.5584324702589223, - "grad_norm": 0.4060285622816394, - "learning_rate": 2.1509449822033205e-05, - "loss": 0.5484, + "epoch": 1.1167086481947943, + "grad_norm": 0.22776081041661542, + "learning_rate": 7.42122185512829e-05, + "loss": 0.5038, "step": 3990 }, { - "epoch": 0.5585724282715185, - "grad_norm": 0.4029440998090135, - "learning_rate": 2.1498227885411526e-05, - "loss": 0.585, + "epoch": 1.1169885250489784, + "grad_norm": 0.22137987701782322, + "learning_rate": 7.41987209717082e-05, + "loss": 0.4995, "step": 3991 }, { - "epoch": 0.5587123862841148, - "grad_norm": 0.4104149112850885, - "learning_rate": 2.148700666843955e-05, - "loss": 0.52, + "epoch": 1.1172684019031627, + "grad_norm": 0.2411846928455049, + "learning_rate": 7.418522108888568e-05, + "loss": 0.5231, "step": 3992 }, { - "epoch": 0.5588523442967109, - "grad_norm": 0.4171536565223565, - "learning_rate": 2.1475786173423335e-05, - "loss": 0.5623, + "epoch": 1.1175482787573467, + "grad_norm": 0.21324438743416152, + "learning_rate": 7.417171890410029e-05, + "loss": 0.4775, "step": 3993 }, { - "epoch": 0.5589923023093072, - "grad_norm": 0.3980539899980082, - "learning_rate": 2.146456640266883e-05, - "loss": 0.5812, + "epoch": 1.117828155611531, + "grad_norm": 0.2268611907829221, + "learning_rate": 7.415821441863716e-05, + "loss": 0.5278, "step": 3994 }, { - "epoch": 0.5591322603219034, - "grad_norm": 0.41514890565668344, - "learning_rate": 2.14533473584818e-05, - "loss": 0.5826, + "epoch": 1.118108032465715, + "grad_norm": 0.22666263789361527, + "learning_rate": 7.414470763378166e-05, + "loss": 0.5116, "step": 3995 }, { - "epoch": 0.5592722183344997, - "grad_norm": 0.4166753133049829, - "learning_rate": 2.1442129043167874e-05, - "loss": 0.5543, + "epoch": 1.1183879093198992, + "grad_norm": 0.22154158502340937, + "learning_rate": 7.413119855081938e-05, + "loss": 0.4956, "step": 3996 }, { - "epoch": 0.5594121763470958, - "grad_norm": 0.4114882654130403, - "learning_rate": 2.1430911459032526e-05, - "loss": 0.5741, + "epoch": 1.1186677861740835, + "grad_norm": 0.26600970427729226, + "learning_rate": 7.411768717103612e-05, + "loss": 0.4974, "step": 3997 }, { - "epoch": 0.5595521343596921, - "grad_norm": 0.4017370253857547, - "learning_rate": 2.1419694608381094e-05, - "loss": 0.5963, + "epoch": 1.1189476630282675, + "grad_norm": 0.2308800817397494, + "learning_rate": 7.41041734957179e-05, + "loss": 0.5007, "step": 3998 }, { - "epoch": 0.5596920923722883, - "grad_norm": 0.42955031856189274, - "learning_rate": 2.1408478493518742e-05, - "loss": 0.5896, + "epoch": 1.1192275398824516, + "grad_norm": 0.22563471246040492, + "learning_rate": 7.409065752615094e-05, + "loss": 0.4882, "step": 3999 }, { - "epoch": 0.5598320503848845, - "grad_norm": 0.40378298742736296, - "learning_rate": 2.1397263116750503e-05, - "loss": 0.5657, + "epoch": 1.119507416736636, + "grad_norm": 0.2249642920007204, + "learning_rate": 7.40771392636217e-05, + "loss": 0.5041, "step": 4000 + }, + { + "epoch": 1.11978729359082, + "grad_norm": 0.2219281303629957, + "learning_rate": 7.406361870941688e-05, + "loss": 0.4986, + "step": 4001 + }, + { + "epoch": 1.1200671704450043, + "grad_norm": 0.2352913759985641, + "learning_rate": 7.405009586482336e-05, + "loss": 0.5098, + "step": 4002 + }, + { + "epoch": 1.1203470472991883, + "grad_norm": 0.22423843025035858, + "learning_rate": 7.403657073112826e-05, + "loss": 0.4886, + "step": 4003 + }, + { + "epoch": 1.1206269241533726, + "grad_norm": 0.24015195876825654, + "learning_rate": 7.402304330961892e-05, + "loss": 0.5146, + "step": 4004 + }, + { + "epoch": 1.1209068010075567, + "grad_norm": 0.22195974971523932, + "learning_rate": 7.400951360158284e-05, + "loss": 0.4792, + "step": 4005 + }, + { + "epoch": 1.1211866778617408, + "grad_norm": 0.2124027004858279, + "learning_rate": 7.399598160830785e-05, + "loss": 0.4949, + "step": 4006 + }, + { + "epoch": 1.121466554715925, + "grad_norm": 0.22096425979108092, + "learning_rate": 7.398244733108188e-05, + "loss": 0.5049, + "step": 4007 + }, + { + "epoch": 1.1217464315701091, + "grad_norm": 0.23352774774966856, + "learning_rate": 7.396891077119314e-05, + "loss": 0.5336, + "step": 4008 + }, + { + "epoch": 1.1220263084242934, + "grad_norm": 0.22693250166211001, + "learning_rate": 7.395537192993006e-05, + "loss": 0.5384, + "step": 4009 + }, + { + "epoch": 1.1223061852784775, + "grad_norm": 0.22410185812759292, + "learning_rate": 7.394183080858128e-05, + "loss": 0.4995, + "step": 4010 + }, + { + "epoch": 1.1225860621326615, + "grad_norm": 0.2159306200814348, + "learning_rate": 7.392828740843565e-05, + "loss": 0.5065, + "step": 4011 + }, + { + "epoch": 1.1228659389868458, + "grad_norm": 0.2212926685164199, + "learning_rate": 7.391474173078222e-05, + "loss": 0.4915, + "step": 4012 + }, + { + "epoch": 1.12314581584103, + "grad_norm": 0.21902616955118648, + "learning_rate": 7.39011937769103e-05, + "loss": 0.491, + "step": 4013 + }, + { + "epoch": 1.1234256926952142, + "grad_norm": 0.21190653907480703, + "learning_rate": 7.388764354810935e-05, + "loss": 0.4762, + "step": 4014 + }, + { + "epoch": 1.1237055695493983, + "grad_norm": 0.21731547278169128, + "learning_rate": 7.387409104566915e-05, + "loss": 0.4676, + "step": 4015 + }, + { + "epoch": 1.1239854464035823, + "grad_norm": 0.22096594136238323, + "learning_rate": 7.386053627087959e-05, + "loss": 0.5045, + "step": 4016 + }, + { + "epoch": 1.1242653232577666, + "grad_norm": 0.223290153323746, + "learning_rate": 7.384697922503081e-05, + "loss": 0.5084, + "step": 4017 + }, + { + "epoch": 1.1245452001119507, + "grad_norm": 0.21834334265777824, + "learning_rate": 7.383341990941321e-05, + "loss": 0.5055, + "step": 4018 + }, + { + "epoch": 1.124825076966135, + "grad_norm": 0.23158304511179106, + "learning_rate": 7.381985832531738e-05, + "loss": 0.4942, + "step": 4019 + }, + { + "epoch": 1.125104953820319, + "grad_norm": 0.2181884728536942, + "learning_rate": 7.380629447403408e-05, + "loss": 0.4916, + "step": 4020 + }, + { + "epoch": 1.1253848306745031, + "grad_norm": 0.24216185502464424, + "learning_rate": 7.379272835685436e-05, + "loss": 0.5089, + "step": 4021 + }, + { + "epoch": 1.1256647075286874, + "grad_norm": 0.22842620885262918, + "learning_rate": 7.377915997506945e-05, + "loss": 0.525, + "step": 4022 + }, + { + "epoch": 1.1259445843828715, + "grad_norm": 0.2131697616716636, + "learning_rate": 7.376558932997077e-05, + "loss": 0.5101, + "step": 4023 + }, + { + "epoch": 1.1262244612370558, + "grad_norm": 0.21574241056556848, + "learning_rate": 7.375201642285e-05, + "loss": 0.5233, + "step": 4024 + }, + { + "epoch": 1.1265043380912398, + "grad_norm": 0.21973087229331548, + "learning_rate": 7.373844125499902e-05, + "loss": 0.5032, + "step": 4025 + }, + { + "epoch": 1.126784214945424, + "grad_norm": 0.2316693277548038, + "learning_rate": 7.372486382770988e-05, + "loss": 0.4948, + "step": 4026 + }, + { + "epoch": 1.1270640917996082, + "grad_norm": 0.21643730587161744, + "learning_rate": 7.371128414227495e-05, + "loss": 0.5045, + "step": 4027 + }, + { + "epoch": 1.1273439686537923, + "grad_norm": 0.2253032763134499, + "learning_rate": 7.369770219998671e-05, + "loss": 0.5151, + "step": 4028 + }, + { + "epoch": 1.1276238455079766, + "grad_norm": 0.21610329540544126, + "learning_rate": 7.368411800213792e-05, + "loss": 0.4973, + "step": 4029 + }, + { + "epoch": 1.1279037223621606, + "grad_norm": 0.22582337560928176, + "learning_rate": 7.367053155002153e-05, + "loss": 0.5107, + "step": 4030 + }, + { + "epoch": 1.128183599216345, + "grad_norm": 0.22260830561784425, + "learning_rate": 7.365694284493067e-05, + "loss": 0.4937, + "step": 4031 + }, + { + "epoch": 1.128463476070529, + "grad_norm": 0.22579513338201246, + "learning_rate": 7.364335188815879e-05, + "loss": 0.5165, + "step": 4032 + }, + { + "epoch": 1.128743352924713, + "grad_norm": 0.2196064816768498, + "learning_rate": 7.362975868099942e-05, + "loss": 0.5031, + "step": 4033 + }, + { + "epoch": 1.1290232297788974, + "grad_norm": 0.22283353084824703, + "learning_rate": 7.361616322474639e-05, + "loss": 0.5189, + "step": 4034 + }, + { + "epoch": 1.1293031066330814, + "grad_norm": 0.22830486174505005, + "learning_rate": 7.360256552069373e-05, + "loss": 0.5042, + "step": 4035 + }, + { + "epoch": 1.1295829834872655, + "grad_norm": 0.22881786221022365, + "learning_rate": 7.358896557013566e-05, + "loss": 0.5124, + "step": 4036 + }, + { + "epoch": 1.1298628603414498, + "grad_norm": 0.22450305533083373, + "learning_rate": 7.357536337436666e-05, + "loss": 0.5157, + "step": 4037 + }, + { + "epoch": 1.1301427371956339, + "grad_norm": 0.2215223008891001, + "learning_rate": 7.356175893468137e-05, + "loss": 0.5167, + "step": 4038 + }, + { + "epoch": 1.1304226140498181, + "grad_norm": 0.2210892939529426, + "learning_rate": 7.354815225237468e-05, + "loss": 0.4938, + "step": 4039 + }, + { + "epoch": 1.1307024909040022, + "grad_norm": 0.2287015944620728, + "learning_rate": 7.353454332874168e-05, + "loss": 0.5051, + "step": 4040 + }, + { + "epoch": 1.1309823677581865, + "grad_norm": 0.22765714439851215, + "learning_rate": 7.352093216507767e-05, + "loss": 0.4849, + "step": 4041 + }, + { + "epoch": 1.1312622446123706, + "grad_norm": 0.23468073629206568, + "learning_rate": 7.350731876267819e-05, + "loss": 0.5182, + "step": 4042 + }, + { + "epoch": 1.1315421214665546, + "grad_norm": 0.2278784184839403, + "learning_rate": 7.349370312283892e-05, + "loss": 0.4889, + "step": 4043 + }, + { + "epoch": 1.131821998320739, + "grad_norm": 0.2302163249529019, + "learning_rate": 7.348008524685586e-05, + "loss": 0.5035, + "step": 4044 + }, + { + "epoch": 1.132101875174923, + "grad_norm": 0.22271831637056483, + "learning_rate": 7.346646513602513e-05, + "loss": 0.4826, + "step": 4045 + }, + { + "epoch": 1.132381752029107, + "grad_norm": 0.22812934226579695, + "learning_rate": 7.345284279164312e-05, + "loss": 0.4708, + "step": 4046 + }, + { + "epoch": 1.1326616288832914, + "grad_norm": 0.22337073647858013, + "learning_rate": 7.34392182150064e-05, + "loss": 0.4843, + "step": 4047 + }, + { + "epoch": 1.1329415057374754, + "grad_norm": 0.23428618132006931, + "learning_rate": 7.342559140741178e-05, + "loss": 0.5195, + "step": 4048 + }, + { + "epoch": 1.1332213825916597, + "grad_norm": 0.21658888481202418, + "learning_rate": 7.341196237015625e-05, + "loss": 0.5165, + "step": 4049 + }, + { + "epoch": 1.1335012594458438, + "grad_norm": 0.22357993130256928, + "learning_rate": 7.339833110453705e-05, + "loss": 0.4835, + "step": 4050 + }, + { + "epoch": 1.133781136300028, + "grad_norm": 0.21458289644518913, + "learning_rate": 7.338469761185159e-05, + "loss": 0.4936, + "step": 4051 + }, + { + "epoch": 1.1340610131542121, + "grad_norm": 0.22384757186563023, + "learning_rate": 7.337106189339751e-05, + "loss": 0.4866, + "step": 4052 + }, + { + "epoch": 1.1343408900083962, + "grad_norm": 0.23985823753627802, + "learning_rate": 7.335742395047269e-05, + "loss": 0.5006, + "step": 4053 + }, + { + "epoch": 1.1346207668625805, + "grad_norm": 0.22311980934561043, + "learning_rate": 7.334378378437519e-05, + "loss": 0.4997, + "step": 4054 + }, + { + "epoch": 1.1349006437167646, + "grad_norm": 0.2164550009077947, + "learning_rate": 7.333014139640327e-05, + "loss": 0.5055, + "step": 4055 + }, + { + "epoch": 1.1351805205709489, + "grad_norm": 0.22534961429285863, + "learning_rate": 7.331649678785546e-05, + "loss": 0.4853, + "step": 4056 + }, + { + "epoch": 1.135460397425133, + "grad_norm": 0.23185781552040344, + "learning_rate": 7.33028499600304e-05, + "loss": 0.4937, + "step": 4057 + }, + { + "epoch": 1.135740274279317, + "grad_norm": 0.2213397453761991, + "learning_rate": 7.328920091422706e-05, + "loss": 0.5128, + "step": 4058 + }, + { + "epoch": 1.1360201511335013, + "grad_norm": 0.2208586969694549, + "learning_rate": 7.327554965174454e-05, + "loss": 0.5164, + "step": 4059 + }, + { + "epoch": 1.1363000279876854, + "grad_norm": 0.23022256529255308, + "learning_rate": 7.326189617388218e-05, + "loss": 0.5255, + "step": 4060 + }, + { + "epoch": 1.1365799048418697, + "grad_norm": 0.21733781380197187, + "learning_rate": 7.32482404819395e-05, + "loss": 0.498, + "step": 4061 + }, + { + "epoch": 1.1368597816960537, + "grad_norm": 0.21849333909572238, + "learning_rate": 7.32345825772163e-05, + "loss": 0.5279, + "step": 4062 + }, + { + "epoch": 1.1371396585502378, + "grad_norm": 0.22513243789664358, + "learning_rate": 7.32209224610125e-05, + "loss": 0.5093, + "step": 4063 + }, + { + "epoch": 1.137419535404422, + "grad_norm": 0.2163399051652371, + "learning_rate": 7.320726013462833e-05, + "loss": 0.4955, + "step": 4064 + }, + { + "epoch": 1.1376994122586062, + "grad_norm": 0.21823966611068824, + "learning_rate": 7.319359559936414e-05, + "loss": 0.4826, + "step": 4065 + }, + { + "epoch": 1.1379792891127904, + "grad_norm": 0.22781418531552539, + "learning_rate": 7.317992885652055e-05, + "loss": 0.4866, + "step": 4066 + }, + { + "epoch": 1.1382591659669745, + "grad_norm": 0.2273204215532043, + "learning_rate": 7.316625990739833e-05, + "loss": 0.4924, + "step": 4067 + }, + { + "epoch": 1.1385390428211588, + "grad_norm": 0.22467194139826965, + "learning_rate": 7.315258875329855e-05, + "loss": 0.4875, + "step": 4068 + }, + { + "epoch": 1.1388189196753429, + "grad_norm": 0.2892321884351586, + "learning_rate": 7.313891539552241e-05, + "loss": 0.5072, + "step": 4069 + }, + { + "epoch": 1.139098796529527, + "grad_norm": 0.22352433560311688, + "learning_rate": 7.312523983537135e-05, + "loss": 0.4962, + "step": 4070 + }, + { + "epoch": 1.1393786733837112, + "grad_norm": 0.23113575822496088, + "learning_rate": 7.311156207414702e-05, + "loss": 0.5091, + "step": 4071 + }, + { + "epoch": 1.1396585502378953, + "grad_norm": 0.22610375100898752, + "learning_rate": 7.309788211315126e-05, + "loss": 0.5098, + "step": 4072 + }, + { + "epoch": 1.1399384270920794, + "grad_norm": 0.22518720418436866, + "learning_rate": 7.308419995368616e-05, + "loss": 0.5146, + "step": 4073 + }, + { + "epoch": 1.1402183039462637, + "grad_norm": 0.23821428509493398, + "learning_rate": 7.307051559705399e-05, + "loss": 0.5393, + "step": 4074 + }, + { + "epoch": 1.1404981808004477, + "grad_norm": 0.22244560903461974, + "learning_rate": 7.305682904455723e-05, + "loss": 0.4794, + "step": 4075 + }, + { + "epoch": 1.140778057654632, + "grad_norm": 0.21767576164824462, + "learning_rate": 7.304314029749859e-05, + "loss": 0.4936, + "step": 4076 + }, + { + "epoch": 1.141057934508816, + "grad_norm": 0.21992091203741246, + "learning_rate": 7.302944935718095e-05, + "loss": 0.4904, + "step": 4077 + }, + { + "epoch": 1.1413378113630004, + "grad_norm": 0.2188229441835584, + "learning_rate": 7.301575622490742e-05, + "loss": 0.4983, + "step": 4078 + }, + { + "epoch": 1.1416176882171845, + "grad_norm": 0.22609618421285535, + "learning_rate": 7.300206090198134e-05, + "loss": 0.5382, + "step": 4079 + }, + { + "epoch": 1.1418975650713685, + "grad_norm": 0.2218307408425978, + "learning_rate": 7.298836338970622e-05, + "loss": 0.482, + "step": 4080 + }, + { + "epoch": 1.1421774419255528, + "grad_norm": 0.22681541463674998, + "learning_rate": 7.297466368938581e-05, + "loss": 0.5243, + "step": 4081 + }, + { + "epoch": 1.1424573187797369, + "grad_norm": 0.22496500993361587, + "learning_rate": 7.296096180232406e-05, + "loss": 0.5002, + "step": 4082 + }, + { + "epoch": 1.142737195633921, + "grad_norm": 0.23006353150031264, + "learning_rate": 7.29472577298251e-05, + "loss": 0.5081, + "step": 4083 + }, + { + "epoch": 1.1430170724881052, + "grad_norm": 0.21456278073237026, + "learning_rate": 7.293355147319331e-05, + "loss": 0.4965, + "step": 4084 + }, + { + "epoch": 1.1432969493422893, + "grad_norm": 0.2204601380611181, + "learning_rate": 7.291984303373326e-05, + "loss": 0.5226, + "step": 4085 + }, + { + "epoch": 1.1435768261964736, + "grad_norm": 0.22113479429311303, + "learning_rate": 7.290613241274972e-05, + "loss": 0.5146, + "step": 4086 + }, + { + "epoch": 1.1438567030506577, + "grad_norm": 0.22229384157109938, + "learning_rate": 7.289241961154766e-05, + "loss": 0.4957, + "step": 4087 + }, + { + "epoch": 1.144136579904842, + "grad_norm": 0.2193742084806028, + "learning_rate": 7.287870463143232e-05, + "loss": 0.4763, + "step": 4088 + }, + { + "epoch": 1.144416456759026, + "grad_norm": 0.22541904725437054, + "learning_rate": 7.286498747370904e-05, + "loss": 0.5091, + "step": 4089 + }, + { + "epoch": 1.14469633361321, + "grad_norm": 0.22786417978835122, + "learning_rate": 7.285126813968346e-05, + "loss": 0.5159, + "step": 4090 + }, + { + "epoch": 1.1449762104673944, + "grad_norm": 0.21919232932500993, + "learning_rate": 7.28375466306614e-05, + "loss": 0.5137, + "step": 4091 + }, + { + "epoch": 1.1452560873215785, + "grad_norm": 0.22803902956656727, + "learning_rate": 7.282382294794884e-05, + "loss": 0.5113, + "step": 4092 + }, + { + "epoch": 1.1455359641757628, + "grad_norm": 0.23313279194119976, + "learning_rate": 7.281009709285207e-05, + "loss": 0.5349, + "step": 4093 + }, + { + "epoch": 1.1458158410299468, + "grad_norm": 0.235238941798849, + "learning_rate": 7.279636906667747e-05, + "loss": 0.5125, + "step": 4094 + }, + { + "epoch": 1.146095717884131, + "grad_norm": 0.23183299771869897, + "learning_rate": 7.278263887073172e-05, + "loss": 0.5029, + "step": 4095 + }, + { + "epoch": 1.1463755947383152, + "grad_norm": 0.22936431756748155, + "learning_rate": 7.276890650632163e-05, + "loss": 0.4942, + "step": 4096 + }, + { + "epoch": 1.1466554715924993, + "grad_norm": 0.22496097541144508, + "learning_rate": 7.275517197475429e-05, + "loss": 0.519, + "step": 4097 + }, + { + "epoch": 1.1469353484466835, + "grad_norm": 0.22325961646468948, + "learning_rate": 7.274143527733695e-05, + "loss": 0.4933, + "step": 4098 + }, + { + "epoch": 1.1472152253008676, + "grad_norm": 0.22062466364741165, + "learning_rate": 7.272769641537705e-05, + "loss": 0.5147, + "step": 4099 + }, + { + "epoch": 1.1474951021550517, + "grad_norm": 0.21330436108753204, + "learning_rate": 7.27139553901823e-05, + "loss": 0.5201, + "step": 4100 + }, + { + "epoch": 1.147774979009236, + "grad_norm": 0.24501083098398252, + "learning_rate": 7.270021220306056e-05, + "loss": 0.4954, + "step": 4101 + }, + { + "epoch": 1.14805485586342, + "grad_norm": 0.22131722594544098, + "learning_rate": 7.268646685531991e-05, + "loss": 0.5157, + "step": 4102 + }, + { + "epoch": 1.1483347327176043, + "grad_norm": 0.22862286024502293, + "learning_rate": 7.267271934826865e-05, + "loss": 0.491, + "step": 4103 + }, + { + "epoch": 1.1486146095717884, + "grad_norm": 0.2225709660910424, + "learning_rate": 7.265896968321527e-05, + "loss": 0.5188, + "step": 4104 + }, + { + "epoch": 1.1488944864259727, + "grad_norm": 0.24685751888948226, + "learning_rate": 7.264521786146847e-05, + "loss": 0.493, + "step": 4105 + }, + { + "epoch": 1.1491743632801568, + "grad_norm": 0.22542089142703023, + "learning_rate": 7.263146388433717e-05, + "loss": 0.5055, + "step": 4106 + }, + { + "epoch": 1.1494542401343408, + "grad_norm": 0.23744703131267442, + "learning_rate": 7.261770775313046e-05, + "loss": 0.5044, + "step": 4107 + }, + { + "epoch": 1.1497341169885251, + "grad_norm": 0.21399538877733398, + "learning_rate": 7.260394946915767e-05, + "loss": 0.4867, + "step": 4108 + }, + { + "epoch": 1.1500139938427092, + "grad_norm": 0.21812370534388678, + "learning_rate": 7.259018903372832e-05, + "loss": 0.4928, + "step": 4109 + }, + { + "epoch": 1.1502938706968933, + "grad_norm": 0.22271726533539943, + "learning_rate": 7.257642644815213e-05, + "loss": 0.4996, + "step": 4110 + }, + { + "epoch": 1.1505737475510776, + "grad_norm": 0.22232448280925565, + "learning_rate": 7.256266171373905e-05, + "loss": 0.5301, + "step": 4111 + }, + { + "epoch": 1.1508536244052616, + "grad_norm": 0.21768880214177133, + "learning_rate": 7.254889483179918e-05, + "loss": 0.5005, + "step": 4112 + }, + { + "epoch": 1.151133501259446, + "grad_norm": 0.23532171554645165, + "learning_rate": 7.253512580364288e-05, + "loss": 0.5096, + "step": 4113 + }, + { + "epoch": 1.15141337811363, + "grad_norm": 0.2235180972449941, + "learning_rate": 7.25213546305807e-05, + "loss": 0.4732, + "step": 4114 + }, + { + "epoch": 1.1516932549678143, + "grad_norm": 0.22504699821899796, + "learning_rate": 7.250758131392336e-05, + "loss": 0.4927, + "step": 4115 + }, + { + "epoch": 1.1519731318219983, + "grad_norm": 0.2310752269631708, + "learning_rate": 7.249380585498185e-05, + "loss": 0.4986, + "step": 4116 + }, + { + "epoch": 1.1522530086761824, + "grad_norm": 0.21324067512538059, + "learning_rate": 7.248002825506731e-05, + "loss": 0.4998, + "step": 4117 + }, + { + "epoch": 1.1525328855303667, + "grad_norm": 0.25504846748980925, + "learning_rate": 7.24662485154911e-05, + "loss": 0.5074, + "step": 4118 + }, + { + "epoch": 1.1528127623845508, + "grad_norm": 0.2207465599901764, + "learning_rate": 7.245246663756477e-05, + "loss": 0.5229, + "step": 4119 + }, + { + "epoch": 1.1530926392387348, + "grad_norm": 0.22748257967342314, + "learning_rate": 7.243868262260011e-05, + "loss": 0.5194, + "step": 4120 + }, + { + "epoch": 1.1533725160929191, + "grad_norm": 0.23415114877001775, + "learning_rate": 7.242489647190907e-05, + "loss": 0.523, + "step": 4121 + }, + { + "epoch": 1.1536523929471032, + "grad_norm": 0.22463898013038625, + "learning_rate": 7.241110818680384e-05, + "loss": 0.539, + "step": 4122 + }, + { + "epoch": 1.1539322698012875, + "grad_norm": 0.22484010984193606, + "learning_rate": 7.239731776859679e-05, + "loss": 0.5062, + "step": 4123 + }, + { + "epoch": 1.1542121466554716, + "grad_norm": 0.22342956279143927, + "learning_rate": 7.238352521860049e-05, + "loss": 0.5065, + "step": 4124 + }, + { + "epoch": 1.1544920235096559, + "grad_norm": 0.23159403676848503, + "learning_rate": 7.236973053812774e-05, + "loss": 0.4985, + "step": 4125 + }, + { + "epoch": 1.15477190036384, + "grad_norm": 0.22508726822985886, + "learning_rate": 7.235593372849149e-05, + "loss": 0.5014, + "step": 4126 + }, + { + "epoch": 1.155051777218024, + "grad_norm": 0.24508678785258117, + "learning_rate": 7.234213479100498e-05, + "loss": 0.5241, + "step": 4127 + }, + { + "epoch": 1.1553316540722083, + "grad_norm": 0.22596941266435985, + "learning_rate": 7.232833372698157e-05, + "loss": 0.5093, + "step": 4128 + }, + { + "epoch": 1.1556115309263923, + "grad_norm": 0.21903393064508364, + "learning_rate": 7.231453053773486e-05, + "loss": 0.5034, + "step": 4129 + }, + { + "epoch": 1.1558914077805766, + "grad_norm": 0.22118997116070338, + "learning_rate": 7.230072522457864e-05, + "loss": 0.5053, + "step": 4130 + }, + { + "epoch": 1.1561712846347607, + "grad_norm": 0.2404046933927392, + "learning_rate": 7.228691778882693e-05, + "loss": 0.5045, + "step": 4131 + }, + { + "epoch": 1.1564511614889448, + "grad_norm": 0.22552848637334438, + "learning_rate": 7.227310823179388e-05, + "loss": 0.5168, + "step": 4132 + }, + { + "epoch": 1.156731038343129, + "grad_norm": 0.22503819110084003, + "learning_rate": 7.225929655479393e-05, + "loss": 0.4902, + "step": 4133 + }, + { + "epoch": 1.1570109151973131, + "grad_norm": 0.21986557301349818, + "learning_rate": 7.224548275914169e-05, + "loss": 0.4883, + "step": 4134 + }, + { + "epoch": 1.1572907920514974, + "grad_norm": 0.22799502855172393, + "learning_rate": 7.223166684615194e-05, + "loss": 0.5064, + "step": 4135 + }, + { + "epoch": 1.1575706689056815, + "grad_norm": 0.22248076047565904, + "learning_rate": 7.22178488171397e-05, + "loss": 0.4931, + "step": 4136 + }, + { + "epoch": 1.1578505457598656, + "grad_norm": 0.2238793104988906, + "learning_rate": 7.220402867342015e-05, + "loss": 0.5208, + "step": 4137 + }, + { + "epoch": 1.1581304226140499, + "grad_norm": 0.2252932526520466, + "learning_rate": 7.219020641630875e-05, + "loss": 0.5022, + "step": 4138 + }, + { + "epoch": 1.158410299468234, + "grad_norm": 0.2286714548895435, + "learning_rate": 7.217638204712107e-05, + "loss": 0.509, + "step": 4139 + }, + { + "epoch": 1.1586901763224182, + "grad_norm": 0.23009376358579406, + "learning_rate": 7.216255556717295e-05, + "loss": 0.5062, + "step": 4140 + }, + { + "epoch": 1.1589700531766023, + "grad_norm": 0.23301354568137048, + "learning_rate": 7.214872697778037e-05, + "loss": 0.4901, + "step": 4141 + }, + { + "epoch": 1.1592499300307864, + "grad_norm": 0.22337006995086173, + "learning_rate": 7.213489628025956e-05, + "loss": 0.5082, + "step": 4142 + }, + { + "epoch": 1.1595298068849706, + "grad_norm": 0.23391547958969938, + "learning_rate": 7.212106347592694e-05, + "loss": 0.5038, + "step": 4143 + }, + { + "epoch": 1.1598096837391547, + "grad_norm": 0.24235555652130966, + "learning_rate": 7.21072285660991e-05, + "loss": 0.5189, + "step": 4144 + }, + { + "epoch": 1.160089560593339, + "grad_norm": 0.2241935902477743, + "learning_rate": 7.209339155209289e-05, + "loss": 0.5013, + "step": 4145 + }, + { + "epoch": 1.160369437447523, + "grad_norm": 0.2275127344469746, + "learning_rate": 7.20795524352253e-05, + "loss": 0.4829, + "step": 4146 + }, + { + "epoch": 1.1606493143017071, + "grad_norm": 0.22879314084355956, + "learning_rate": 7.206571121681356e-05, + "loss": 0.5209, + "step": 4147 + }, + { + "epoch": 1.1609291911558914, + "grad_norm": 0.21831346078803268, + "learning_rate": 7.205186789817506e-05, + "loss": 0.5183, + "step": 4148 + }, + { + "epoch": 1.1612090680100755, + "grad_norm": 0.23003191649847718, + "learning_rate": 7.203802248062743e-05, + "loss": 0.5195, + "step": 4149 + }, + { + "epoch": 1.1614889448642598, + "grad_norm": 0.2265791854241328, + "learning_rate": 7.20241749654885e-05, + "loss": 0.5007, + "step": 4150 + }, + { + "epoch": 1.1617688217184439, + "grad_norm": 0.22236826227261736, + "learning_rate": 7.201032535407626e-05, + "loss": 0.4808, + "step": 4151 + }, + { + "epoch": 1.1620486985726282, + "grad_norm": 0.22475326847282717, + "learning_rate": 7.199647364770894e-05, + "loss": 0.4987, + "step": 4152 + }, + { + "epoch": 1.1623285754268122, + "grad_norm": 0.2304447791042639, + "learning_rate": 7.198261984770493e-05, + "loss": 0.5281, + "step": 4153 + }, + { + "epoch": 1.1626084522809963, + "grad_norm": 0.22080745978818953, + "learning_rate": 7.196876395538288e-05, + "loss": 0.5264, + "step": 4154 + }, + { + "epoch": 1.1628883291351806, + "grad_norm": 0.21563963829000318, + "learning_rate": 7.195490597206155e-05, + "loss": 0.5037, + "step": 4155 + }, + { + "epoch": 1.1631682059893647, + "grad_norm": 0.22494081118857215, + "learning_rate": 7.194104589906e-05, + "loss": 0.5261, + "step": 4156 + }, + { + "epoch": 1.1634480828435487, + "grad_norm": 0.20843236305617785, + "learning_rate": 7.192718373769744e-05, + "loss": 0.5171, + "step": 4157 + }, + { + "epoch": 1.163727959697733, + "grad_norm": 0.21074589056143336, + "learning_rate": 7.191331948929323e-05, + "loss": 0.4952, + "step": 4158 + }, + { + "epoch": 1.164007836551917, + "grad_norm": 0.2378016825363355, + "learning_rate": 7.189945315516702e-05, + "loss": 0.5065, + "step": 4159 + }, + { + "epoch": 1.1642877134061014, + "grad_norm": 0.2231089660276172, + "learning_rate": 7.18855847366386e-05, + "loss": 0.5114, + "step": 4160 + }, + { + "epoch": 1.1645675902602854, + "grad_norm": 0.21688883427276945, + "learning_rate": 7.187171423502796e-05, + "loss": 0.5037, + "step": 4161 + }, + { + "epoch": 1.1648474671144697, + "grad_norm": 0.21732412053294106, + "learning_rate": 7.185784165165534e-05, + "loss": 0.518, + "step": 4162 + }, + { + "epoch": 1.1651273439686538, + "grad_norm": 0.21816360877095778, + "learning_rate": 7.18439669878411e-05, + "loss": 0.5123, + "step": 4163 + }, + { + "epoch": 1.1654072208228379, + "grad_norm": 0.22840278810585588, + "learning_rate": 7.183009024490586e-05, + "loss": 0.4988, + "step": 4164 + }, + { + "epoch": 1.1656870976770222, + "grad_norm": 0.22580954724899185, + "learning_rate": 7.181621142417041e-05, + "loss": 0.4803, + "step": 4165 + }, + { + "epoch": 1.1659669745312062, + "grad_norm": 0.21973971602955505, + "learning_rate": 7.180233052695576e-05, + "loss": 0.4917, + "step": 4166 + }, + { + "epoch": 1.1662468513853903, + "grad_norm": 0.2141544294295572, + "learning_rate": 7.178844755458306e-05, + "loss": 0.502, + "step": 4167 + }, + { + "epoch": 1.1665267282395746, + "grad_norm": 0.20975885297654492, + "learning_rate": 7.177456250837375e-05, + "loss": 0.4885, + "step": 4168 + }, + { + "epoch": 1.1668066050937587, + "grad_norm": 0.22178780169939083, + "learning_rate": 7.176067538964938e-05, + "loss": 0.5012, + "step": 4169 + }, + { + "epoch": 1.167086481947943, + "grad_norm": 0.21691390524881485, + "learning_rate": 7.174678619973176e-05, + "loss": 0.4961, + "step": 4170 + }, + { + "epoch": 1.167366358802127, + "grad_norm": 0.2150898334825614, + "learning_rate": 7.173289493994284e-05, + "loss": 0.5132, + "step": 4171 + }, + { + "epoch": 1.1676462356563113, + "grad_norm": 0.24284800144882926, + "learning_rate": 7.171900161160483e-05, + "loss": 0.5265, + "step": 4172 + }, + { + "epoch": 1.1679261125104954, + "grad_norm": 0.23582397104039957, + "learning_rate": 7.170510621604008e-05, + "loss": 0.5147, + "step": 4173 + }, + { + "epoch": 1.1682059893646795, + "grad_norm": 0.2149756660438223, + "learning_rate": 7.169120875457117e-05, + "loss": 0.4853, + "step": 4174 + }, + { + "epoch": 1.1684858662188637, + "grad_norm": 0.2256078156739452, + "learning_rate": 7.167730922852087e-05, + "loss": 0.5093, + "step": 4175 + }, + { + "epoch": 1.1687657430730478, + "grad_norm": 0.21261342003842387, + "learning_rate": 7.166340763921215e-05, + "loss": 0.4893, + "step": 4176 + }, + { + "epoch": 1.169045619927232, + "grad_norm": 0.22808664499214856, + "learning_rate": 7.164950398796816e-05, + "loss": 0.511, + "step": 4177 + }, + { + "epoch": 1.1693254967814162, + "grad_norm": 0.2141617495811512, + "learning_rate": 7.163559827611227e-05, + "loss": 0.4953, + "step": 4178 + }, + { + "epoch": 1.1696053736356002, + "grad_norm": 0.2253734698665144, + "learning_rate": 7.162169050496803e-05, + "loss": 0.4878, + "step": 4179 + }, + { + "epoch": 1.1698852504897845, + "grad_norm": 0.22424223754322692, + "learning_rate": 7.160778067585917e-05, + "loss": 0.5277, + "step": 4180 + }, + { + "epoch": 1.1701651273439686, + "grad_norm": 0.230645854225705, + "learning_rate": 7.159386879010967e-05, + "loss": 0.5069, + "step": 4181 + }, + { + "epoch": 1.170445004198153, + "grad_norm": 0.24414442942752934, + "learning_rate": 7.157995484904362e-05, + "loss": 0.5309, + "step": 4182 + }, + { + "epoch": 1.170724881052337, + "grad_norm": 0.2246046772363703, + "learning_rate": 7.156603885398542e-05, + "loss": 0.4954, + "step": 4183 + }, + { + "epoch": 1.171004757906521, + "grad_norm": 0.21183764566664356, + "learning_rate": 7.155212080625955e-05, + "loss": 0.5322, + "step": 4184 + }, + { + "epoch": 1.1712846347607053, + "grad_norm": 0.2370294973889655, + "learning_rate": 7.153820070719077e-05, + "loss": 0.5157, + "step": 4185 + }, + { + "epoch": 1.1715645116148894, + "grad_norm": 0.22460134322724573, + "learning_rate": 7.1524278558104e-05, + "loss": 0.5063, + "step": 4186 + }, + { + "epoch": 1.1718443884690737, + "grad_norm": 0.22926016536982338, + "learning_rate": 7.151035436032434e-05, + "loss": 0.5261, + "step": 4187 + }, + { + "epoch": 1.1721242653232578, + "grad_norm": 0.23804642643452714, + "learning_rate": 7.149642811517712e-05, + "loss": 0.527, + "step": 4188 + }, + { + "epoch": 1.172404142177442, + "grad_norm": 0.21963346274539094, + "learning_rate": 7.148249982398783e-05, + "loss": 0.516, + "step": 4189 + }, + { + "epoch": 1.1726840190316261, + "grad_norm": 0.23088774948417135, + "learning_rate": 7.146856948808217e-05, + "loss": 0.5006, + "step": 4190 + }, + { + "epoch": 1.1729638958858102, + "grad_norm": 0.21627511010611178, + "learning_rate": 7.145463710878607e-05, + "loss": 0.4886, + "step": 4191 + }, + { + "epoch": 1.1732437727399945, + "grad_norm": 0.22502464689520862, + "learning_rate": 7.14407026874256e-05, + "loss": 0.5054, + "step": 4192 + }, + { + "epoch": 1.1735236495941785, + "grad_norm": 0.2138403320112918, + "learning_rate": 7.142676622532702e-05, + "loss": 0.5093, + "step": 4193 + }, + { + "epoch": 1.1738035264483626, + "grad_norm": 0.21659948998091277, + "learning_rate": 7.141282772381687e-05, + "loss": 0.4971, + "step": 4194 + }, + { + "epoch": 1.174083403302547, + "grad_norm": 0.2263863822165275, + "learning_rate": 7.139888718422177e-05, + "loss": 0.5011, + "step": 4195 + }, + { + "epoch": 1.174363280156731, + "grad_norm": 0.22543225536791175, + "learning_rate": 7.138494460786864e-05, + "loss": 0.4875, + "step": 4196 + }, + { + "epoch": 1.1746431570109153, + "grad_norm": 0.23198190772915978, + "learning_rate": 7.137099999608449e-05, + "loss": 0.534, + "step": 4197 + }, + { + "epoch": 1.1749230338650993, + "grad_norm": 0.22694380435663544, + "learning_rate": 7.13570533501966e-05, + "loss": 0.5159, + "step": 4198 + }, + { + "epoch": 1.1752029107192836, + "grad_norm": 0.22794374417325902, + "learning_rate": 7.134310467153243e-05, + "loss": 0.5135, + "step": 4199 + }, + { + "epoch": 1.1754827875734677, + "grad_norm": 0.22704528252422798, + "learning_rate": 7.132915396141959e-05, + "loss": 0.5075, + "step": 4200 + }, + { + "epoch": 1.1757626644276518, + "grad_norm": 0.2188067658947318, + "learning_rate": 7.131520122118594e-05, + "loss": 0.5219, + "step": 4201 + }, + { + "epoch": 1.176042541281836, + "grad_norm": 0.22755473228578177, + "learning_rate": 7.130124645215952e-05, + "loss": 0.5111, + "step": 4202 + }, + { + "epoch": 1.1763224181360201, + "grad_norm": 0.21692041672620438, + "learning_rate": 7.128728965566853e-05, + "loss": 0.5159, + "step": 4203 + }, + { + "epoch": 1.1766022949902042, + "grad_norm": 0.2301526320218446, + "learning_rate": 7.12733308330414e-05, + "loss": 0.5236, + "step": 4204 + }, + { + "epoch": 1.1768821718443885, + "grad_norm": 0.225977373758813, + "learning_rate": 7.125936998560676e-05, + "loss": 0.4887, + "step": 4205 + }, + { + "epoch": 1.1771620486985725, + "grad_norm": 0.22064171515702388, + "learning_rate": 7.124540711469336e-05, + "loss": 0.5013, + "step": 4206 + }, + { + "epoch": 1.1774419255527568, + "grad_norm": 0.21741836774697432, + "learning_rate": 7.123144222163021e-05, + "loss": 0.5069, + "step": 4207 + }, + { + "epoch": 1.177721802406941, + "grad_norm": 0.24009228236769486, + "learning_rate": 7.121747530774652e-05, + "loss": 0.5293, + "step": 4208 + }, + { + "epoch": 1.1780016792611252, + "grad_norm": 0.22672478401477372, + "learning_rate": 7.120350637437165e-05, + "loss": 0.494, + "step": 4209 + }, + { + "epoch": 1.1782815561153093, + "grad_norm": 0.21899359915851813, + "learning_rate": 7.118953542283518e-05, + "loss": 0.5135, + "step": 4210 + }, + { + "epoch": 1.1785614329694933, + "grad_norm": 0.22205936151888048, + "learning_rate": 7.117556245446685e-05, + "loss": 0.5003, + "step": 4211 + }, + { + "epoch": 1.1788413098236776, + "grad_norm": 0.22632344881796398, + "learning_rate": 7.116158747059664e-05, + "loss": 0.5193, + "step": 4212 + }, + { + "epoch": 1.1791211866778617, + "grad_norm": 0.2198089447085492, + "learning_rate": 7.11476104725547e-05, + "loss": 0.4851, + "step": 4213 + }, + { + "epoch": 1.179401063532046, + "grad_norm": 0.21986722770148254, + "learning_rate": 7.113363146167138e-05, + "loss": 0.4945, + "step": 4214 + }, + { + "epoch": 1.17968094038623, + "grad_norm": 0.22086848798747755, + "learning_rate": 7.111965043927715e-05, + "loss": 0.4991, + "step": 4215 + }, + { + "epoch": 1.1799608172404141, + "grad_norm": 0.21428595212648865, + "learning_rate": 7.11056674067028e-05, + "loss": 0.5025, + "step": 4216 + }, + { + "epoch": 1.1802406940945984, + "grad_norm": 0.23039218794510508, + "learning_rate": 7.109168236527919e-05, + "loss": 0.4839, + "step": 4217 + }, + { + "epoch": 1.1805205709487825, + "grad_norm": 0.2227231307963435, + "learning_rate": 7.107769531633745e-05, + "loss": 0.5128, + "step": 4218 + }, + { + "epoch": 1.1808004478029668, + "grad_norm": 0.22568155647611768, + "learning_rate": 7.106370626120887e-05, + "loss": 0.5136, + "step": 4219 + }, + { + "epoch": 1.1810803246571508, + "grad_norm": 0.2310292125877394, + "learning_rate": 7.104971520122495e-05, + "loss": 0.4963, + "step": 4220 + }, + { + "epoch": 1.181360201511335, + "grad_norm": 0.21907795572412855, + "learning_rate": 7.103572213771734e-05, + "loss": 0.4986, + "step": 4221 + }, + { + "epoch": 1.1816400783655192, + "grad_norm": 0.2307946285519689, + "learning_rate": 7.102172707201793e-05, + "loss": 0.4923, + "step": 4222 + }, + { + "epoch": 1.1819199552197033, + "grad_norm": 0.2202551400353409, + "learning_rate": 7.100773000545879e-05, + "loss": 0.498, + "step": 4223 + }, + { + "epoch": 1.1821998320738876, + "grad_norm": 0.226422990684998, + "learning_rate": 7.099373093937213e-05, + "loss": 0.5214, + "step": 4224 + }, + { + "epoch": 1.1824797089280716, + "grad_norm": 0.2181716687563738, + "learning_rate": 7.09797298750904e-05, + "loss": 0.5208, + "step": 4225 + }, + { + "epoch": 1.182759585782256, + "grad_norm": 0.22933086486553092, + "learning_rate": 7.096572681394625e-05, + "loss": 0.5244, + "step": 4226 + }, + { + "epoch": 1.18303946263644, + "grad_norm": 0.22881353509664235, + "learning_rate": 7.095172175727247e-05, + "loss": 0.5068, + "step": 4227 + }, + { + "epoch": 1.183319339490624, + "grad_norm": 0.22088408171918741, + "learning_rate": 7.093771470640211e-05, + "loss": 0.5028, + "step": 4228 + }, + { + "epoch": 1.1835992163448084, + "grad_norm": 0.22761263480559474, + "learning_rate": 7.092370566266834e-05, + "loss": 0.5163, + "step": 4229 + }, + { + "epoch": 1.1838790931989924, + "grad_norm": 0.2255300599919595, + "learning_rate": 7.090969462740454e-05, + "loss": 0.5177, + "step": 4230 + }, + { + "epoch": 1.1841589700531765, + "grad_norm": 0.22184946442958645, + "learning_rate": 7.089568160194431e-05, + "loss": 0.4953, + "step": 4231 + }, + { + "epoch": 1.1844388469073608, + "grad_norm": 0.22584014466341526, + "learning_rate": 7.088166658762143e-05, + "loss": 0.5073, + "step": 4232 + }, + { + "epoch": 1.1847187237615449, + "grad_norm": 0.2180202061909379, + "learning_rate": 7.086764958576982e-05, + "loss": 0.4949, + "step": 4233 + }, + { + "epoch": 1.1849986006157291, + "grad_norm": 0.22643632655484283, + "learning_rate": 7.085363059772364e-05, + "loss": 0.5312, + "step": 4234 + }, + { + "epoch": 1.1852784774699132, + "grad_norm": 0.21482765251570712, + "learning_rate": 7.083960962481721e-05, + "loss": 0.4775, + "step": 4235 + }, + { + "epoch": 1.1855583543240975, + "grad_norm": 0.22397023021744042, + "learning_rate": 7.082558666838508e-05, + "loss": 0.4901, + "step": 4236 + }, + { + "epoch": 1.1858382311782816, + "grad_norm": 0.2187701538506456, + "learning_rate": 7.081156172976197e-05, + "loss": 0.4916, + "step": 4237 + }, + { + "epoch": 1.1861181080324656, + "grad_norm": 0.22276752537598846, + "learning_rate": 7.079753481028275e-05, + "loss": 0.5131, + "step": 4238 + }, + { + "epoch": 1.18639798488665, + "grad_norm": 0.2262761299765684, + "learning_rate": 7.078350591128253e-05, + "loss": 0.498, + "step": 4239 + }, + { + "epoch": 1.186677861740834, + "grad_norm": 0.21780706068190353, + "learning_rate": 7.076947503409659e-05, + "loss": 0.5116, + "step": 4240 + }, + { + "epoch": 1.186957738595018, + "grad_norm": 0.23506301305915292, + "learning_rate": 7.07554421800604e-05, + "loss": 0.5365, + "step": 4241 + }, + { + "epoch": 1.1872376154492024, + "grad_norm": 0.2266700959774239, + "learning_rate": 7.07414073505096e-05, + "loss": 0.507, + "step": 4242 + }, + { + "epoch": 1.1875174923033864, + "grad_norm": 0.22107972963006747, + "learning_rate": 7.072737054678003e-05, + "loss": 0.5124, + "step": 4243 + }, + { + "epoch": 1.1877973691575707, + "grad_norm": 0.21329136604897145, + "learning_rate": 7.071333177020774e-05, + "loss": 0.4866, + "step": 4244 + }, + { + "epoch": 1.1880772460117548, + "grad_norm": 0.22066376242840038, + "learning_rate": 7.069929102212892e-05, + "loss": 0.5257, + "step": 4245 + }, + { + "epoch": 1.188357122865939, + "grad_norm": 0.22224000689293583, + "learning_rate": 7.068524830388e-05, + "loss": 0.4757, + "step": 4246 + }, + { + "epoch": 1.1886369997201232, + "grad_norm": 0.23408366010403492, + "learning_rate": 7.067120361679758e-05, + "loss": 0.5311, + "step": 4247 + }, + { + "epoch": 1.1889168765743072, + "grad_norm": 0.23003287864002572, + "learning_rate": 7.065715696221843e-05, + "loss": 0.5086, + "step": 4248 + }, + { + "epoch": 1.1891967534284915, + "grad_norm": 0.2195987273406075, + "learning_rate": 7.064310834147951e-05, + "loss": 0.4967, + "step": 4249 + }, + { + "epoch": 1.1894766302826756, + "grad_norm": 0.22065475281740798, + "learning_rate": 7.0629057755918e-05, + "loss": 0.4955, + "step": 4250 + }, + { + "epoch": 1.1897565071368599, + "grad_norm": 0.2170458307794216, + "learning_rate": 7.06150052068712e-05, + "loss": 0.5069, + "step": 4251 + }, + { + "epoch": 1.190036383991044, + "grad_norm": 0.23148532456129006, + "learning_rate": 7.060095069567668e-05, + "loss": 0.5116, + "step": 4252 + }, + { + "epoch": 1.190316260845228, + "grad_norm": 0.23364056097506877, + "learning_rate": 7.058689422367212e-05, + "loss": 0.5063, + "step": 4253 + }, + { + "epoch": 1.1905961376994123, + "grad_norm": 0.23397608285019136, + "learning_rate": 7.057283579219548e-05, + "loss": 0.5086, + "step": 4254 + }, + { + "epoch": 1.1908760145535964, + "grad_norm": 0.21945407843328624, + "learning_rate": 7.05587754025848e-05, + "loss": 0.4787, + "step": 4255 + }, + { + "epoch": 1.1911558914077807, + "grad_norm": 0.22055980090323987, + "learning_rate": 7.054471305617837e-05, + "loss": 0.4912, + "step": 4256 + }, + { + "epoch": 1.1914357682619647, + "grad_norm": 0.23223658919043302, + "learning_rate": 7.053064875431465e-05, + "loss": 0.5028, + "step": 4257 + }, + { + "epoch": 1.1917156451161488, + "grad_norm": 0.22158566582115585, + "learning_rate": 7.051658249833228e-05, + "loss": 0.5224, + "step": 4258 + }, + { + "epoch": 1.191995521970333, + "grad_norm": 0.231116800212288, + "learning_rate": 7.050251428957013e-05, + "loss": 0.5074, + "step": 4259 + }, + { + "epoch": 1.1922753988245172, + "grad_norm": 0.2209692838660835, + "learning_rate": 7.048844412936719e-05, + "loss": 0.4882, + "step": 4260 + }, + { + "epoch": 1.1925552756787015, + "grad_norm": 0.21651888268196262, + "learning_rate": 7.047437201906265e-05, + "loss": 0.4616, + "step": 4261 + }, + { + "epoch": 1.1928351525328855, + "grad_norm": 0.23096357615514918, + "learning_rate": 7.046029795999592e-05, + "loss": 0.504, + "step": 4262 + }, + { + "epoch": 1.1931150293870696, + "grad_norm": 0.22201221291485465, + "learning_rate": 7.044622195350658e-05, + "loss": 0.515, + "step": 4263 + }, + { + "epoch": 1.1933949062412539, + "grad_norm": 0.229046761714028, + "learning_rate": 7.04321440009344e-05, + "loss": 0.4885, + "step": 4264 + }, + { + "epoch": 1.193674783095438, + "grad_norm": 0.22760846530361756, + "learning_rate": 7.041806410361933e-05, + "loss": 0.4941, + "step": 4265 + }, + { + "epoch": 1.1939546599496222, + "grad_norm": 0.22472571842244649, + "learning_rate": 7.040398226290148e-05, + "loss": 0.4957, + "step": 4266 + }, + { + "epoch": 1.1942345368038063, + "grad_norm": 0.22740631147937912, + "learning_rate": 7.038989848012116e-05, + "loss": 0.498, + "step": 4267 + }, + { + "epoch": 1.1945144136579904, + "grad_norm": 0.222479304520405, + "learning_rate": 7.037581275661891e-05, + "loss": 0.5029, + "step": 4268 + }, + { + "epoch": 1.1947942905121747, + "grad_norm": 0.21828111993437252, + "learning_rate": 7.036172509373539e-05, + "loss": 0.5176, + "step": 4269 + }, + { + "epoch": 1.1950741673663587, + "grad_norm": 0.2244098147858522, + "learning_rate": 7.034763549281149e-05, + "loss": 0.4921, + "step": 4270 + }, + { + "epoch": 1.195354044220543, + "grad_norm": 0.23001952760016786, + "learning_rate": 7.033354395518823e-05, + "loss": 0.4849, + "step": 4271 + }, + { + "epoch": 1.195633921074727, + "grad_norm": 0.2191270609470566, + "learning_rate": 7.031945048220689e-05, + "loss": 0.4908, + "step": 4272 + }, + { + "epoch": 1.1959137979289114, + "grad_norm": 0.2197867383422929, + "learning_rate": 7.030535507520889e-05, + "loss": 0.5046, + "step": 4273 + }, + { + "epoch": 1.1961936747830955, + "grad_norm": 0.2198926648283125, + "learning_rate": 7.02912577355358e-05, + "loss": 0.4663, + "step": 4274 + }, + { + "epoch": 1.1964735516372795, + "grad_norm": 0.22876495991098986, + "learning_rate": 7.027715846452947e-05, + "loss": 0.5068, + "step": 4275 + }, + { + "epoch": 1.1967534284914638, + "grad_norm": 0.2186226872262562, + "learning_rate": 7.026305726353184e-05, + "loss": 0.5076, + "step": 4276 + }, + { + "epoch": 1.197033305345648, + "grad_norm": 0.23654969360520095, + "learning_rate": 7.024895413388508e-05, + "loss": 0.5576, + "step": 4277 + }, + { + "epoch": 1.197313182199832, + "grad_norm": 0.23619070584067375, + "learning_rate": 7.023484907693153e-05, + "loss": 0.4998, + "step": 4278 + }, + { + "epoch": 1.1975930590540163, + "grad_norm": 0.2286984082526474, + "learning_rate": 7.02207420940137e-05, + "loss": 0.4986, + "step": 4279 + }, + { + "epoch": 1.1978729359082003, + "grad_norm": 0.23121220371959625, + "learning_rate": 7.020663318647433e-05, + "loss": 0.52, + "step": 4280 + }, + { + "epoch": 1.1981528127623846, + "grad_norm": 0.22056528872688555, + "learning_rate": 7.019252235565632e-05, + "loss": 0.5063, + "step": 4281 + }, + { + "epoch": 1.1984326896165687, + "grad_norm": 0.21282310607009033, + "learning_rate": 7.017840960290272e-05, + "loss": 0.4889, + "step": 4282 + }, + { + "epoch": 1.198712566470753, + "grad_norm": 0.2169820340377364, + "learning_rate": 7.01642949295568e-05, + "loss": 0.5072, + "step": 4283 + }, + { + "epoch": 1.198992443324937, + "grad_norm": 0.22417296859401595, + "learning_rate": 7.015017833696199e-05, + "loss": 0.4898, + "step": 4284 + }, + { + "epoch": 1.199272320179121, + "grad_norm": 0.2292495309509333, + "learning_rate": 7.013605982646195e-05, + "loss": 0.5063, + "step": 4285 + }, + { + "epoch": 1.1995521970333054, + "grad_norm": 0.2256952971670304, + "learning_rate": 7.012193939940045e-05, + "loss": 0.4951, + "step": 4286 + }, + { + "epoch": 1.1998320738874895, + "grad_norm": 0.22250011293368643, + "learning_rate": 7.01078170571215e-05, + "loss": 0.4935, + "step": 4287 + }, + { + "epoch": 1.2001119507416735, + "grad_norm": 0.22529443222396162, + "learning_rate": 7.009369280096926e-05, + "loss": 0.4904, + "step": 4288 + }, + { + "epoch": 1.2003918275958578, + "grad_norm": 0.21903130349810393, + "learning_rate": 7.007956663228809e-05, + "loss": 0.4954, + "step": 4289 + }, + { + "epoch": 1.200671704450042, + "grad_norm": 0.2225428572951667, + "learning_rate": 7.006543855242254e-05, + "loss": 0.5008, + "step": 4290 + }, + { + "epoch": 1.2009515813042262, + "grad_norm": 0.2218727051149631, + "learning_rate": 7.005130856271731e-05, + "loss": 0.4875, + "step": 4291 + }, + { + "epoch": 1.2012314581584103, + "grad_norm": 0.21450334615811675, + "learning_rate": 7.003717666451732e-05, + "loss": 0.4835, + "step": 4292 + }, + { + "epoch": 1.2015113350125946, + "grad_norm": 0.22975132976395687, + "learning_rate": 7.002304285916762e-05, + "loss": 0.4843, + "step": 4293 + }, + { + "epoch": 1.2017912118667786, + "grad_norm": 0.23056674838049043, + "learning_rate": 7.000890714801351e-05, + "loss": 0.4847, + "step": 4294 + }, + { + "epoch": 1.2020710887209627, + "grad_norm": 0.22610304577022208, + "learning_rate": 6.999476953240042e-05, + "loss": 0.5117, + "step": 4295 + }, + { + "epoch": 1.202350965575147, + "grad_norm": 0.22731879058846147, + "learning_rate": 6.998063001367397e-05, + "loss": 0.5054, + "step": 4296 + }, + { + "epoch": 1.202630842429331, + "grad_norm": 0.22615544087622722, + "learning_rate": 6.996648859317995e-05, + "loss": 0.4931, + "step": 4297 + }, + { + "epoch": 1.2029107192835153, + "grad_norm": 0.22247817324041005, + "learning_rate": 6.99523452722644e-05, + "loss": 0.4965, + "step": 4298 + }, + { + "epoch": 1.2031905961376994, + "grad_norm": 0.2175063462583486, + "learning_rate": 6.993820005227343e-05, + "loss": 0.5088, + "step": 4299 + }, + { + "epoch": 1.2034704729918835, + "grad_norm": 0.22253078911614768, + "learning_rate": 6.992405293455346e-05, + "loss": 0.5166, + "step": 4300 + }, + { + "epoch": 1.2037503498460678, + "grad_norm": 0.2184481026106758, + "learning_rate": 6.990990392045095e-05, + "loss": 0.5075, + "step": 4301 + }, + { + "epoch": 1.2040302267002518, + "grad_norm": 0.23101561958891997, + "learning_rate": 6.989575301131264e-05, + "loss": 0.5193, + "step": 4302 + }, + { + "epoch": 1.2043101035544361, + "grad_norm": 0.21959358271302212, + "learning_rate": 6.988160020848543e-05, + "loss": 0.5274, + "step": 4303 + }, + { + "epoch": 1.2045899804086202, + "grad_norm": 0.23230253016100438, + "learning_rate": 6.98674455133164e-05, + "loss": 0.5068, + "step": 4304 + }, + { + "epoch": 1.2048698572628043, + "grad_norm": 0.2245698445284393, + "learning_rate": 6.985328892715275e-05, + "loss": 0.479, + "step": 4305 + }, + { + "epoch": 1.2051497341169886, + "grad_norm": 0.2204339776275824, + "learning_rate": 6.983913045134197e-05, + "loss": 0.4945, + "step": 4306 + }, + { + "epoch": 1.2054296109711726, + "grad_norm": 0.21126067215074626, + "learning_rate": 6.982497008723164e-05, + "loss": 0.508, + "step": 4307 + }, + { + "epoch": 1.205709487825357, + "grad_norm": 0.21710117348999522, + "learning_rate": 6.981080783616958e-05, + "loss": 0.5073, + "step": 4308 + }, + { + "epoch": 1.205989364679541, + "grad_norm": 0.22228130817799446, + "learning_rate": 6.979664369950371e-05, + "loss": 0.5185, + "step": 4309 + }, + { + "epoch": 1.2062692415337253, + "grad_norm": 0.23298061886059024, + "learning_rate": 6.978247767858224e-05, + "loss": 0.5362, + "step": 4310 + }, + { + "epoch": 1.2065491183879093, + "grad_norm": 0.2213775602242786, + "learning_rate": 6.976830977475346e-05, + "loss": 0.4994, + "step": 4311 + }, + { + "epoch": 1.2068289952420934, + "grad_norm": 0.22193225682948606, + "learning_rate": 6.97541399893659e-05, + "loss": 0.5179, + "step": 4312 + }, + { + "epoch": 1.2071088720962777, + "grad_norm": 0.2147931682359676, + "learning_rate": 6.973996832376823e-05, + "loss": 0.4959, + "step": 4313 + }, + { + "epoch": 1.2073887489504618, + "grad_norm": 0.22157983408144044, + "learning_rate": 6.972579477930933e-05, + "loss": 0.487, + "step": 4314 + }, + { + "epoch": 1.2076686258046458, + "grad_norm": 0.221426402302768, + "learning_rate": 6.971161935733823e-05, + "loss": 0.4954, + "step": 4315 + }, + { + "epoch": 1.2079485026588301, + "grad_norm": 0.22797227652978325, + "learning_rate": 6.969744205920419e-05, + "loss": 0.4935, + "step": 4316 + }, + { + "epoch": 1.2082283795130142, + "grad_norm": 0.22617728962444442, + "learning_rate": 6.968326288625658e-05, + "loss": 0.5155, + "step": 4317 + }, + { + "epoch": 1.2085082563671985, + "grad_norm": 0.26518826871724366, + "learning_rate": 6.966908183984497e-05, + "loss": 0.5305, + "step": 4318 + }, + { + "epoch": 1.2087881332213826, + "grad_norm": 0.2257023193535358, + "learning_rate": 6.965489892131917e-05, + "loss": 0.4971, + "step": 4319 + }, + { + "epoch": 1.2090680100755669, + "grad_norm": 0.22311422674947592, + "learning_rate": 6.964071413202907e-05, + "loss": 0.492, + "step": 4320 + }, + { + "epoch": 1.209347886929751, + "grad_norm": 0.2276132950462771, + "learning_rate": 6.962652747332481e-05, + "loss": 0.504, + "step": 4321 + }, + { + "epoch": 1.209627763783935, + "grad_norm": 0.23757199110528004, + "learning_rate": 6.961233894655669e-05, + "loss": 0.508, + "step": 4322 + }, + { + "epoch": 1.2099076406381193, + "grad_norm": 0.22065205456398929, + "learning_rate": 6.959814855307516e-05, + "loss": 0.5089, + "step": 4323 + }, + { + "epoch": 1.2101875174923034, + "grad_norm": 0.22791538874212347, + "learning_rate": 6.958395629423088e-05, + "loss": 0.5095, + "step": 4324 + }, + { + "epoch": 1.2104673943464874, + "grad_norm": 0.22710981503104447, + "learning_rate": 6.95697621713747e-05, + "loss": 0.4876, + "step": 4325 + }, + { + "epoch": 1.2107472712006717, + "grad_norm": 0.22701901738159178, + "learning_rate": 6.955556618585758e-05, + "loss": 0.4939, + "step": 4326 + }, + { + "epoch": 1.2110271480548558, + "grad_norm": 0.2134448349238976, + "learning_rate": 6.954136833903073e-05, + "loss": 0.5135, + "step": 4327 + }, + { + "epoch": 1.21130702490904, + "grad_norm": 0.2279914956284989, + "learning_rate": 6.952716863224551e-05, + "loss": 0.5045, + "step": 4328 + }, + { + "epoch": 1.2115869017632241, + "grad_norm": 0.22369873232593462, + "learning_rate": 6.951296706685344e-05, + "loss": 0.5349, + "step": 4329 + }, + { + "epoch": 1.2118667786174084, + "grad_norm": 0.22437327522413142, + "learning_rate": 6.949876364420624e-05, + "loss": 0.499, + "step": 4330 + }, + { + "epoch": 1.2121466554715925, + "grad_norm": 0.2354358076812393, + "learning_rate": 6.948455836565582e-05, + "loss": 0.5251, + "step": 4331 + }, + { + "epoch": 1.2124265323257766, + "grad_norm": 0.23018675432170763, + "learning_rate": 6.947035123255421e-05, + "loss": 0.5113, + "step": 4332 + }, + { + "epoch": 1.2127064091799609, + "grad_norm": 0.22635467212494143, + "learning_rate": 6.945614224625368e-05, + "loss": 0.4983, + "step": 4333 + }, + { + "epoch": 1.212986286034145, + "grad_norm": 0.2296508815181712, + "learning_rate": 6.944193140810664e-05, + "loss": 0.5045, + "step": 4334 + }, + { + "epoch": 1.2132661628883292, + "grad_norm": 0.21446795403491542, + "learning_rate": 6.94277187194657e-05, + "loss": 0.4984, + "step": 4335 + }, + { + "epoch": 1.2135460397425133, + "grad_norm": 0.22026288884944412, + "learning_rate": 6.941350418168359e-05, + "loss": 0.5024, + "step": 4336 + }, + { + "epoch": 1.2138259165966974, + "grad_norm": 0.21403543573421865, + "learning_rate": 6.93992877961133e-05, + "loss": 0.4955, + "step": 4337 + }, + { + "epoch": 1.2141057934508817, + "grad_norm": 0.22403749573915716, + "learning_rate": 6.938506956410795e-05, + "loss": 0.4981, + "step": 4338 + }, + { + "epoch": 1.2143856703050657, + "grad_norm": 0.22715026917631595, + "learning_rate": 6.937084948702081e-05, + "loss": 0.5212, + "step": 4339 + }, + { + "epoch": 1.21466554715925, + "grad_norm": 0.22249576289313944, + "learning_rate": 6.93566275662054e-05, + "loss": 0.5029, + "step": 4340 + }, + { + "epoch": 1.214945424013434, + "grad_norm": 0.22576346777078676, + "learning_rate": 6.934240380301532e-05, + "loss": 0.5128, + "step": 4341 + }, + { + "epoch": 1.2152253008676182, + "grad_norm": 0.2326453627873477, + "learning_rate": 6.932817819880442e-05, + "loss": 0.5163, + "step": 4342 + }, + { + "epoch": 1.2155051777218024, + "grad_norm": 0.22232628361222592, + "learning_rate": 6.931395075492671e-05, + "loss": 0.4873, + "step": 4343 + }, + { + "epoch": 1.2157850545759865, + "grad_norm": 0.21402567488069954, + "learning_rate": 6.929972147273636e-05, + "loss": 0.5165, + "step": 4344 + }, + { + "epoch": 1.2160649314301708, + "grad_norm": 0.21582883929277186, + "learning_rate": 6.928549035358772e-05, + "loss": 0.4906, + "step": 4345 + }, + { + "epoch": 1.2163448082843549, + "grad_norm": 0.23829116546080992, + "learning_rate": 6.92712573988353e-05, + "loss": 0.4942, + "step": 4346 + }, + { + "epoch": 1.2166246851385392, + "grad_norm": 0.21699945188045142, + "learning_rate": 6.925702260983381e-05, + "loss": 0.5, + "step": 4347 + }, + { + "epoch": 1.2169045619927232, + "grad_norm": 0.22263458258539465, + "learning_rate": 6.924278598793814e-05, + "loss": 0.4832, + "step": 4348 + }, + { + "epoch": 1.2171844388469073, + "grad_norm": 0.22228713628757352, + "learning_rate": 6.922854753450333e-05, + "loss": 0.5084, + "step": 4349 + }, + { + "epoch": 1.2174643157010916, + "grad_norm": 0.23887434738098665, + "learning_rate": 6.92143072508846e-05, + "loss": 0.5052, + "step": 4350 + }, + { + "epoch": 1.2177441925552757, + "grad_norm": 0.23523705013624385, + "learning_rate": 6.920006513843736e-05, + "loss": 0.5279, + "step": 4351 + }, + { + "epoch": 1.2180240694094597, + "grad_norm": 0.22399348524879345, + "learning_rate": 6.918582119851716e-05, + "loss": 0.5155, + "step": 4352 + }, + { + "epoch": 1.218303946263644, + "grad_norm": 0.22735951210011596, + "learning_rate": 6.917157543247976e-05, + "loss": 0.4958, + "step": 4353 + }, + { + "epoch": 1.218583823117828, + "grad_norm": 0.22455010629801977, + "learning_rate": 6.915732784168109e-05, + "loss": 0.5138, + "step": 4354 + }, + { + "epoch": 1.2188636999720124, + "grad_norm": 0.22523964003086352, + "learning_rate": 6.914307842747723e-05, + "loss": 0.479, + "step": 4355 + }, + { + "epoch": 1.2191435768261965, + "grad_norm": 0.22385931106122062, + "learning_rate": 6.912882719122445e-05, + "loss": 0.5198, + "step": 4356 + }, + { + "epoch": 1.2194234536803807, + "grad_norm": 0.22893654538163064, + "learning_rate": 6.911457413427917e-05, + "loss": 0.4957, + "step": 4357 + }, + { + "epoch": 1.2197033305345648, + "grad_norm": 0.23360101326660163, + "learning_rate": 6.910031925799805e-05, + "loss": 0.5094, + "step": 4358 + }, + { + "epoch": 1.2199832073887489, + "grad_norm": 0.22439865759101557, + "learning_rate": 6.908606256373785e-05, + "loss": 0.4961, + "step": 4359 + }, + { + "epoch": 1.2202630842429332, + "grad_norm": 0.2130920644271705, + "learning_rate": 6.907180405285552e-05, + "loss": 0.4832, + "step": 4360 + }, + { + "epoch": 1.2205429610971172, + "grad_norm": 0.21962313250065543, + "learning_rate": 6.905754372670822e-05, + "loss": 0.4953, + "step": 4361 + }, + { + "epoch": 1.2208228379513013, + "grad_norm": 0.23117775885750053, + "learning_rate": 6.904328158665323e-05, + "loss": 0.4906, + "step": 4362 + }, + { + "epoch": 1.2211027148054856, + "grad_norm": 0.22927440489987105, + "learning_rate": 6.902901763404805e-05, + "loss": 0.482, + "step": 4363 + }, + { + "epoch": 1.2213825916596697, + "grad_norm": 0.22566949847086684, + "learning_rate": 6.901475187025032e-05, + "loss": 0.4796, + "step": 4364 + }, + { + "epoch": 1.221662468513854, + "grad_norm": 0.23315687664279067, + "learning_rate": 6.900048429661785e-05, + "loss": 0.5197, + "step": 4365 + }, + { + "epoch": 1.221942345368038, + "grad_norm": 0.23302202277015088, + "learning_rate": 6.898621491450867e-05, + "loss": 0.4977, + "step": 4366 + }, + { + "epoch": 1.2222222222222223, + "grad_norm": 0.22656513575201978, + "learning_rate": 6.89719437252809e-05, + "loss": 0.4817, + "step": 4367 + }, + { + "epoch": 1.2225020990764064, + "grad_norm": 0.23097869734734172, + "learning_rate": 6.895767073029293e-05, + "loss": 0.489, + "step": 4368 + }, + { + "epoch": 1.2227819759305905, + "grad_norm": 0.2190357020412073, + "learning_rate": 6.894339593090324e-05, + "loss": 0.5145, + "step": 4369 + }, + { + "epoch": 1.2230618527847748, + "grad_norm": 0.2312220645059619, + "learning_rate": 6.892911932847053e-05, + "loss": 0.5304, + "step": 4370 + }, + { + "epoch": 1.2233417296389588, + "grad_norm": 0.22147256958326017, + "learning_rate": 6.891484092435364e-05, + "loss": 0.5085, + "step": 4371 + }, + { + "epoch": 1.223621606493143, + "grad_norm": 0.2178564902303058, + "learning_rate": 6.89005607199116e-05, + "loss": 0.5104, + "step": 4372 + }, + { + "epoch": 1.2239014833473272, + "grad_norm": 0.2373240342136605, + "learning_rate": 6.888627871650362e-05, + "loss": 0.5133, + "step": 4373 + }, + { + "epoch": 1.2241813602015112, + "grad_norm": 0.22315431750372944, + "learning_rate": 6.887199491548906e-05, + "loss": 0.5342, + "step": 4374 + }, + { + "epoch": 1.2244612370556955, + "grad_norm": 0.22512542970153293, + "learning_rate": 6.885770931822745e-05, + "loss": 0.4847, + "step": 4375 + }, + { + "epoch": 1.2247411139098796, + "grad_norm": 0.23573484698243163, + "learning_rate": 6.884342192607853e-05, + "loss": 0.5104, + "step": 4376 + }, + { + "epoch": 1.225020990764064, + "grad_norm": 0.22956595334186586, + "learning_rate": 6.882913274040214e-05, + "loss": 0.5141, + "step": 4377 + }, + { + "epoch": 1.225300867618248, + "grad_norm": 0.23092574398464455, + "learning_rate": 6.881484176255837e-05, + "loss": 0.5041, + "step": 4378 + }, + { + "epoch": 1.225580744472432, + "grad_norm": 0.23105765714019483, + "learning_rate": 6.880054899390744e-05, + "loss": 0.4865, + "step": 4379 + }, + { + "epoch": 1.2258606213266163, + "grad_norm": 0.22009969745888194, + "learning_rate": 6.878625443580973e-05, + "loss": 0.4787, + "step": 4380 + }, + { + "epoch": 1.2261404981808004, + "grad_norm": 0.22907396351623255, + "learning_rate": 6.877195808962579e-05, + "loss": 0.4963, + "step": 4381 + }, + { + "epoch": 1.2264203750349847, + "grad_norm": 0.2260090364840226, + "learning_rate": 6.87576599567164e-05, + "loss": 0.516, + "step": 4382 + }, + { + "epoch": 1.2267002518891688, + "grad_norm": 0.23052936858197257, + "learning_rate": 6.874336003844241e-05, + "loss": 0.5109, + "step": 4383 + }, + { + "epoch": 1.226980128743353, + "grad_norm": 0.22102925274263935, + "learning_rate": 6.872905833616493e-05, + "loss": 0.5198, + "step": 4384 + }, + { + "epoch": 1.2272600055975371, + "grad_norm": 0.22367362492602536, + "learning_rate": 6.871475485124518e-05, + "loss": 0.514, + "step": 4385 + }, + { + "epoch": 1.2275398824517212, + "grad_norm": 0.23236912743197236, + "learning_rate": 6.870044958504461e-05, + "loss": 0.5114, + "step": 4386 + }, + { + "epoch": 1.2278197593059055, + "grad_norm": 0.22858973761660165, + "learning_rate": 6.868614253892478e-05, + "loss": 0.4928, + "step": 4387 + }, + { + "epoch": 1.2280996361600895, + "grad_norm": 0.23860342464787931, + "learning_rate": 6.867183371424744e-05, + "loss": 0.5112, + "step": 4388 + }, + { + "epoch": 1.2283795130142736, + "grad_norm": 0.22155547850155127, + "learning_rate": 6.86575231123745e-05, + "loss": 0.512, + "step": 4389 + }, + { + "epoch": 1.228659389868458, + "grad_norm": 0.22476076748312143, + "learning_rate": 6.864321073466809e-05, + "loss": 0.5067, + "step": 4390 + }, + { + "epoch": 1.228939266722642, + "grad_norm": 0.23491372461400523, + "learning_rate": 6.862889658249044e-05, + "loss": 0.5079, + "step": 4391 + }, + { + "epoch": 1.2292191435768263, + "grad_norm": 0.22948361156194794, + "learning_rate": 6.861458065720399e-05, + "loss": 0.4997, + "step": 4392 + }, + { + "epoch": 1.2294990204310103, + "grad_norm": 0.23391799870148752, + "learning_rate": 6.860026296017132e-05, + "loss": 0.5264, + "step": 4393 + }, + { + "epoch": 1.2297788972851946, + "grad_norm": 0.22897848051188208, + "learning_rate": 6.858594349275522e-05, + "loss": 0.5161, + "step": 4394 + }, + { + "epoch": 1.2300587741393787, + "grad_norm": 0.24500574082564155, + "learning_rate": 6.85716222563186e-05, + "loss": 0.5172, + "step": 4395 + }, + { + "epoch": 1.2303386509935628, + "grad_norm": 0.2269097252300049, + "learning_rate": 6.855729925222462e-05, + "loss": 0.5149, + "step": 4396 + }, + { + "epoch": 1.230618527847747, + "grad_norm": 0.2307053073066549, + "learning_rate": 6.854297448183647e-05, + "loss": 0.4934, + "step": 4397 + }, + { + "epoch": 1.2308984047019311, + "grad_norm": 0.2339722588104781, + "learning_rate": 6.852864794651765e-05, + "loss": 0.517, + "step": 4398 + }, + { + "epoch": 1.2311782815561152, + "grad_norm": 0.22895011510671143, + "learning_rate": 6.851431964763174e-05, + "loss": 0.5015, + "step": 4399 + }, + { + "epoch": 1.2314581584102995, + "grad_norm": 0.2246427196761417, + "learning_rate": 6.849998958654252e-05, + "loss": 0.526, + "step": 4400 + }, + { + "epoch": 1.2317380352644836, + "grad_norm": 0.22561048513210016, + "learning_rate": 6.848565776461394e-05, + "loss": 0.5124, + "step": 4401 + }, + { + "epoch": 1.2320179121186678, + "grad_norm": 0.21326768138178004, + "learning_rate": 6.847132418321012e-05, + "loss": 0.4889, + "step": 4402 + }, + { + "epoch": 1.232297788972852, + "grad_norm": 0.22435921932213576, + "learning_rate": 6.845698884369529e-05, + "loss": 0.5111, + "step": 4403 + }, + { + "epoch": 1.2325776658270362, + "grad_norm": 0.2322836935784332, + "learning_rate": 6.844265174743396e-05, + "loss": 0.5049, + "step": 4404 + }, + { + "epoch": 1.2328575426812203, + "grad_norm": 0.2274950055276708, + "learning_rate": 6.842831289579071e-05, + "loss": 0.5006, + "step": 4405 + }, + { + "epoch": 1.2331374195354043, + "grad_norm": 0.22198212014618518, + "learning_rate": 6.841397229013032e-05, + "loss": 0.4784, + "step": 4406 + }, + { + "epoch": 1.2334172963895886, + "grad_norm": 0.2590475668793141, + "learning_rate": 6.839962993181775e-05, + "loss": 0.5087, + "step": 4407 + }, + { + "epoch": 1.2336971732437727, + "grad_norm": 0.22235082191551603, + "learning_rate": 6.83852858222181e-05, + "loss": 0.5102, + "step": 4408 + }, + { + "epoch": 1.233977050097957, + "grad_norm": 0.22191258055780216, + "learning_rate": 6.837093996269665e-05, + "loss": 0.4893, + "step": 4409 + }, + { + "epoch": 1.234256926952141, + "grad_norm": 0.22963042675137446, + "learning_rate": 6.835659235461884e-05, + "loss": 0.4991, + "step": 4410 + }, + { + "epoch": 1.2345368038063251, + "grad_norm": 0.2321101229570174, + "learning_rate": 6.83422429993503e-05, + "loss": 0.5069, + "step": 4411 + }, + { + "epoch": 1.2348166806605094, + "grad_norm": 0.2198828652135261, + "learning_rate": 6.832789189825681e-05, + "loss": 0.4802, + "step": 4412 + }, + { + "epoch": 1.2350965575146935, + "grad_norm": 0.2301212143679133, + "learning_rate": 6.831353905270434e-05, + "loss": 0.5047, + "step": 4413 + }, + { + "epoch": 1.2353764343688778, + "grad_norm": 0.228381496483737, + "learning_rate": 6.829918446405894e-05, + "loss": 0.5093, + "step": 4414 + }, + { + "epoch": 1.2356563112230619, + "grad_norm": 0.23159916571391817, + "learning_rate": 6.828482813368692e-05, + "loss": 0.5193, + "step": 4415 + }, + { + "epoch": 1.235936188077246, + "grad_norm": 0.22818500716053816, + "learning_rate": 6.827047006295473e-05, + "loss": 0.5011, + "step": 4416 + }, + { + "epoch": 1.2362160649314302, + "grad_norm": 0.2314663773681912, + "learning_rate": 6.825611025322898e-05, + "loss": 0.5018, + "step": 4417 + }, + { + "epoch": 1.2364959417856143, + "grad_norm": 0.2173769415599354, + "learning_rate": 6.824174870587643e-05, + "loss": 0.4738, + "step": 4418 + }, + { + "epoch": 1.2367758186397986, + "grad_norm": 0.23004061754008617, + "learning_rate": 6.822738542226402e-05, + "loss": 0.5145, + "step": 4419 + }, + { + "epoch": 1.2370556954939826, + "grad_norm": 0.2144677896012713, + "learning_rate": 6.821302040375886e-05, + "loss": 0.5236, + "step": 4420 + }, + { + "epoch": 1.2373355723481667, + "grad_norm": 0.22778656766316274, + "learning_rate": 6.819865365172824e-05, + "loss": 0.5095, + "step": 4421 + }, + { + "epoch": 1.237615449202351, + "grad_norm": 0.22309038235696235, + "learning_rate": 6.818428516753959e-05, + "loss": 0.4989, + "step": 4422 + }, + { + "epoch": 1.237895326056535, + "grad_norm": 0.2215635131690984, + "learning_rate": 6.816991495256047e-05, + "loss": 0.4813, + "step": 4423 + }, + { + "epoch": 1.2381752029107194, + "grad_norm": 0.2230856565953154, + "learning_rate": 6.81555430081587e-05, + "loss": 0.5037, + "step": 4424 + }, + { + "epoch": 1.2384550797649034, + "grad_norm": 0.23180567720413758, + "learning_rate": 6.814116933570217e-05, + "loss": 0.5368, + "step": 4425 + }, + { + "epoch": 1.2387349566190875, + "grad_norm": 0.21018030181583042, + "learning_rate": 6.812679393655898e-05, + "loss": 0.492, + "step": 4426 + }, + { + "epoch": 1.2390148334732718, + "grad_norm": 0.21309149281973794, + "learning_rate": 6.811241681209741e-05, + "loss": 0.53, + "step": 4427 + }, + { + "epoch": 1.2392947103274559, + "grad_norm": 0.2260166789213048, + "learning_rate": 6.809803796368588e-05, + "loss": 0.5033, + "step": 4428 + }, + { + "epoch": 1.2395745871816402, + "grad_norm": 0.23952326573956984, + "learning_rate": 6.808365739269294e-05, + "loss": 0.5007, + "step": 4429 + }, + { + "epoch": 1.2398544640358242, + "grad_norm": 0.21874113451870314, + "learning_rate": 6.806927510048738e-05, + "loss": 0.4997, + "step": 4430 + }, + { + "epoch": 1.2401343408900085, + "grad_norm": 0.21426388745862024, + "learning_rate": 6.805489108843813e-05, + "loss": 0.5068, + "step": 4431 + }, + { + "epoch": 1.2404142177441926, + "grad_norm": 0.2215734126920041, + "learning_rate": 6.80405053579142e-05, + "loss": 0.5175, + "step": 4432 + }, + { + "epoch": 1.2406940945983767, + "grad_norm": 0.23137841249112412, + "learning_rate": 6.802611791028489e-05, + "loss": 0.5073, + "step": 4433 + }, + { + "epoch": 1.240973971452561, + "grad_norm": 0.2482104985235173, + "learning_rate": 6.801172874691959e-05, + "loss": 0.5158, + "step": 4434 + }, + { + "epoch": 1.241253848306745, + "grad_norm": 0.2167572132339733, + "learning_rate": 6.799733786918785e-05, + "loss": 0.504, + "step": 4435 + }, + { + "epoch": 1.241533725160929, + "grad_norm": 0.21886396347628806, + "learning_rate": 6.798294527845943e-05, + "loss": 0.4887, + "step": 4436 + }, + { + "epoch": 1.2418136020151134, + "grad_norm": 0.22523846948020232, + "learning_rate": 6.79685509761042e-05, + "loss": 0.4918, + "step": 4437 + }, + { + "epoch": 1.2420934788692974, + "grad_norm": 0.22338648580035944, + "learning_rate": 6.795415496349224e-05, + "loss": 0.4976, + "step": 4438 + }, + { + "epoch": 1.2423733557234817, + "grad_norm": 0.21679938045963373, + "learning_rate": 6.793975724199377e-05, + "loss": 0.4948, + "step": 4439 + }, + { + "epoch": 1.2426532325776658, + "grad_norm": 0.24280504081104756, + "learning_rate": 6.792535781297917e-05, + "loss": 0.4916, + "step": 4440 + }, + { + "epoch": 1.24293310943185, + "grad_norm": 0.23459037292940543, + "learning_rate": 6.791095667781897e-05, + "loss": 0.4941, + "step": 4441 + }, + { + "epoch": 1.2432129862860342, + "grad_norm": 0.22356252534979063, + "learning_rate": 6.78965538378839e-05, + "loss": 0.5066, + "step": 4442 + }, + { + "epoch": 1.2434928631402182, + "grad_norm": 0.22891790303476722, + "learning_rate": 6.78821492945448e-05, + "loss": 0.51, + "step": 4443 + }, + { + "epoch": 1.2437727399944025, + "grad_norm": 0.2173379812467162, + "learning_rate": 6.786774304917272e-05, + "loss": 0.4871, + "step": 4444 + }, + { + "epoch": 1.2440526168485866, + "grad_norm": 0.2212811210753908, + "learning_rate": 6.785333510313886e-05, + "loss": 0.5069, + "step": 4445 + }, + { + "epoch": 1.2443324937027707, + "grad_norm": 0.21885501957202622, + "learning_rate": 6.783892545781456e-05, + "loss": 0.5033, + "step": 4446 + }, + { + "epoch": 1.244612370556955, + "grad_norm": 0.22307423176941266, + "learning_rate": 6.782451411457137e-05, + "loss": 0.4953, + "step": 4447 + }, + { + "epoch": 1.244892247411139, + "grad_norm": 0.23209156643527004, + "learning_rate": 6.781010107478094e-05, + "loss": 0.5105, + "step": 4448 + }, + { + "epoch": 1.2451721242653233, + "grad_norm": 0.23180375537378448, + "learning_rate": 6.779568633981514e-05, + "loss": 0.5034, + "step": 4449 + }, + { + "epoch": 1.2454520011195074, + "grad_norm": 0.22218934128045695, + "learning_rate": 6.778126991104594e-05, + "loss": 0.5122, + "step": 4450 + }, + { + "epoch": 1.2457318779736917, + "grad_norm": 0.225292661771242, + "learning_rate": 6.776685178984551e-05, + "loss": 0.5086, + "step": 4451 + }, + { + "epoch": 1.2460117548278757, + "grad_norm": 0.23538128851183887, + "learning_rate": 6.775243197758619e-05, + "loss": 0.4958, + "step": 4452 + }, + { + "epoch": 1.2462916316820598, + "grad_norm": 0.2236835830138261, + "learning_rate": 6.773801047564045e-05, + "loss": 0.4944, + "step": 4453 + }, + { + "epoch": 1.246571508536244, + "grad_norm": 0.22707190607036884, + "learning_rate": 6.772358728538095e-05, + "loss": 0.4943, + "step": 4454 + }, + { + "epoch": 1.2468513853904282, + "grad_norm": 0.2363436807243141, + "learning_rate": 6.770916240818048e-05, + "loss": 0.5219, + "step": 4455 + }, + { + "epoch": 1.2471312622446125, + "grad_norm": 0.22731878144830908, + "learning_rate": 6.769473584541203e-05, + "loss": 0.5069, + "step": 4456 + }, + { + "epoch": 1.2474111390987965, + "grad_norm": 0.22436996842966697, + "learning_rate": 6.768030759844872e-05, + "loss": 0.4823, + "step": 4457 + }, + { + "epoch": 1.2476910159529806, + "grad_norm": 0.2205881881487971, + "learning_rate": 6.766587766866386e-05, + "loss": 0.4802, + "step": 4458 + }, + { + "epoch": 1.2479708928071649, + "grad_norm": 0.22828588267098623, + "learning_rate": 6.765144605743084e-05, + "loss": 0.5087, + "step": 4459 + }, + { + "epoch": 1.248250769661349, + "grad_norm": 0.2328653388828949, + "learning_rate": 6.763701276612333e-05, + "loss": 0.5154, + "step": 4460 + }, + { + "epoch": 1.2485306465155332, + "grad_norm": 0.23316643588287533, + "learning_rate": 6.762257779611505e-05, + "loss": 0.5068, + "step": 4461 + }, + { + "epoch": 1.2488105233697173, + "grad_norm": 0.22243507890188366, + "learning_rate": 6.760814114877995e-05, + "loss": 0.4869, + "step": 4462 + }, + { + "epoch": 1.2490904002239014, + "grad_norm": 0.22383762587784456, + "learning_rate": 6.759370282549213e-05, + "loss": 0.4923, + "step": 4463 + }, + { + "epoch": 1.2493702770780857, + "grad_norm": 0.21883428619120182, + "learning_rate": 6.757926282762583e-05, + "loss": 0.5009, + "step": 4464 + }, + { + "epoch": 1.2496501539322697, + "grad_norm": 0.2225919049797821, + "learning_rate": 6.756482115655545e-05, + "loss": 0.5179, + "step": 4465 + }, + { + "epoch": 1.249930030786454, + "grad_norm": 0.2600433388240312, + "learning_rate": 6.755037781365557e-05, + "loss": 0.5224, + "step": 4466 + }, + { + "epoch": 1.250209907640638, + "grad_norm": 0.2270385776196222, + "learning_rate": 6.75359328003009e-05, + "loss": 0.4987, + "step": 4467 + }, + { + "epoch": 1.2504897844948224, + "grad_norm": 0.22286683184484726, + "learning_rate": 6.752148611786633e-05, + "loss": 0.4952, + "step": 4468 + }, + { + "epoch": 1.2507696613490065, + "grad_norm": 0.23560808843530517, + "learning_rate": 6.750703776772691e-05, + "loss": 0.5111, + "step": 4469 + }, + { + "epoch": 1.2510495382031905, + "grad_norm": 0.21047351981030982, + "learning_rate": 6.749258775125783e-05, + "loss": 0.51, + "step": 4470 + }, + { + "epoch": 1.2513294150573748, + "grad_norm": 0.22618105344721426, + "learning_rate": 6.747813606983446e-05, + "loss": 0.5043, + "step": 4471 + }, + { + "epoch": 1.251609291911559, + "grad_norm": 0.22894601947232676, + "learning_rate": 6.74636827248323e-05, + "loss": 0.5096, + "step": 4472 + }, + { + "epoch": 1.251889168765743, + "grad_norm": 0.2336288045912679, + "learning_rate": 6.744922771762705e-05, + "loss": 0.5251, + "step": 4473 + }, + { + "epoch": 1.2521690456199273, + "grad_norm": 0.2187782199417927, + "learning_rate": 6.743477104959455e-05, + "loss": 0.5028, + "step": 4474 + }, + { + "epoch": 1.2524489224741113, + "grad_norm": 0.21555620580526136, + "learning_rate": 6.742031272211078e-05, + "loss": 0.4703, + "step": 4475 + }, + { + "epoch": 1.2527287993282956, + "grad_norm": 0.21785022135296023, + "learning_rate": 6.74058527365519e-05, + "loss": 0.4953, + "step": 4476 + }, + { + "epoch": 1.2530086761824797, + "grad_norm": 0.22264608063454083, + "learning_rate": 6.73913910942942e-05, + "loss": 0.5011, + "step": 4477 + }, + { + "epoch": 1.253288553036664, + "grad_norm": 0.22298368111761738, + "learning_rate": 6.737692779671417e-05, + "loss": 0.4983, + "step": 4478 + }, + { + "epoch": 1.253568429890848, + "grad_norm": 0.21456946013095476, + "learning_rate": 6.736246284518843e-05, + "loss": 0.5151, + "step": 4479 + }, + { + "epoch": 1.2538483067450321, + "grad_norm": 0.21229148838518647, + "learning_rate": 6.734799624109376e-05, + "loss": 0.4978, + "step": 4480 + }, + { + "epoch": 1.2541281835992164, + "grad_norm": 0.22282362281730558, + "learning_rate": 6.733352798580708e-05, + "loss": 0.4751, + "step": 4481 + }, + { + "epoch": 1.2544080604534005, + "grad_norm": 0.2292642849466078, + "learning_rate": 6.731905808070551e-05, + "loss": 0.497, + "step": 4482 + }, + { + "epoch": 1.2546879373075845, + "grad_norm": 0.21463230053034607, + "learning_rate": 6.73045865271663e-05, + "loss": 0.5232, + "step": 4483 + }, + { + "epoch": 1.2549678141617688, + "grad_norm": 0.2272707335182323, + "learning_rate": 6.729011332656685e-05, + "loss": 0.5037, + "step": 4484 + }, + { + "epoch": 1.255247691015953, + "grad_norm": 0.2315495126259172, + "learning_rate": 6.727563848028478e-05, + "loss": 0.5226, + "step": 4485 + }, + { + "epoch": 1.2555275678701372, + "grad_norm": 0.22264526390434827, + "learning_rate": 6.726116198969773e-05, + "loss": 0.4986, + "step": 4486 + }, + { + "epoch": 1.2558074447243213, + "grad_norm": 0.21724436342094056, + "learning_rate": 6.724668385618362e-05, + "loss": 0.4771, + "step": 4487 + }, + { + "epoch": 1.2560873215785056, + "grad_norm": 0.22695688304661193, + "learning_rate": 6.72322040811205e-05, + "loss": 0.4797, + "step": 4488 + }, + { + "epoch": 1.2563671984326896, + "grad_norm": 0.223153852616965, + "learning_rate": 6.721772266588653e-05, + "loss": 0.493, + "step": 4489 + }, + { + "epoch": 1.2566470752868737, + "grad_norm": 0.2287880239038558, + "learning_rate": 6.72032396118601e-05, + "loss": 0.5151, + "step": 4490 + }, + { + "epoch": 1.256926952141058, + "grad_norm": 0.22493308899385148, + "learning_rate": 6.718875492041968e-05, + "loss": 0.4996, + "step": 4491 + }, + { + "epoch": 1.257206828995242, + "grad_norm": 0.23090239774892557, + "learning_rate": 6.717426859294395e-05, + "loss": 0.5154, + "step": 4492 + }, + { + "epoch": 1.2574867058494261, + "grad_norm": 0.2772282550858744, + "learning_rate": 6.715978063081174e-05, + "loss": 0.5094, + "step": 4493 + }, + { + "epoch": 1.2577665827036104, + "grad_norm": 0.23126063717392392, + "learning_rate": 6.7145291035402e-05, + "loss": 0.4935, + "step": 4494 + }, + { + "epoch": 1.2580464595577947, + "grad_norm": 0.23362686207403668, + "learning_rate": 6.713079980809385e-05, + "loss": 0.509, + "step": 4495 + }, + { + "epoch": 1.2583263364119788, + "grad_norm": 0.23498337073517991, + "learning_rate": 6.71163069502666e-05, + "loss": 0.5124, + "step": 4496 + }, + { + "epoch": 1.2586062132661628, + "grad_norm": 0.21308792444705082, + "learning_rate": 6.710181246329965e-05, + "loss": 0.4849, + "step": 4497 + }, + { + "epoch": 1.2588860901203471, + "grad_norm": 0.2247191896077761, + "learning_rate": 6.708731634857263e-05, + "loss": 0.5158, + "step": 4498 + }, + { + "epoch": 1.2591659669745312, + "grad_norm": 0.21883638501941233, + "learning_rate": 6.707281860746529e-05, + "loss": 0.4787, + "step": 4499 + }, + { + "epoch": 1.2594458438287153, + "grad_norm": 0.2268980946026509, + "learning_rate": 6.705831924135749e-05, + "loss": 0.5036, + "step": 4500 + }, + { + "epoch": 1.2597257206828996, + "grad_norm": 0.23025472531366095, + "learning_rate": 6.704381825162934e-05, + "loss": 0.4776, + "step": 4501 + }, + { + "epoch": 1.2600055975370836, + "grad_norm": 0.2262865710994982, + "learning_rate": 6.702931563966101e-05, + "loss": 0.4971, + "step": 4502 + }, + { + "epoch": 1.260285474391268, + "grad_norm": 0.2288895381282692, + "learning_rate": 6.701481140683291e-05, + "loss": 0.5048, + "step": 4503 + }, + { + "epoch": 1.260565351245452, + "grad_norm": 0.23004911632596733, + "learning_rate": 6.700030555452552e-05, + "loss": 0.5033, + "step": 4504 + }, + { + "epoch": 1.2608452280996363, + "grad_norm": 0.22495432634208204, + "learning_rate": 6.698579808411954e-05, + "loss": 0.5096, + "step": 4505 + }, + { + "epoch": 1.2611251049538204, + "grad_norm": 0.23058737312728403, + "learning_rate": 6.69712889969958e-05, + "loss": 0.5085, + "step": 4506 + }, + { + "epoch": 1.2614049818080044, + "grad_norm": 0.22932014941072562, + "learning_rate": 6.695677829453524e-05, + "loss": 0.5079, + "step": 4507 + }, + { + "epoch": 1.2616848586621887, + "grad_norm": 0.21917317206185066, + "learning_rate": 6.694226597811906e-05, + "loss": 0.5295, + "step": 4508 + }, + { + "epoch": 1.2619647355163728, + "grad_norm": 0.2212541162421701, + "learning_rate": 6.692775204912852e-05, + "loss": 0.4812, + "step": 4509 + }, + { + "epoch": 1.2622446123705569, + "grad_norm": 0.2227090718976145, + "learning_rate": 6.691323650894504e-05, + "loss": 0.4831, + "step": 4510 + }, + { + "epoch": 1.2625244892247411, + "grad_norm": 0.2303567506652895, + "learning_rate": 6.689871935895026e-05, + "loss": 0.5286, + "step": 4511 + }, + { + "epoch": 1.2628043660789252, + "grad_norm": 0.23590189093245803, + "learning_rate": 6.688420060052593e-05, + "loss": 0.5132, + "step": 4512 + }, + { + "epoch": 1.2630842429331095, + "grad_norm": 0.22756965231482607, + "learning_rate": 6.686968023505393e-05, + "loss": 0.4943, + "step": 4513 + }, + { + "epoch": 1.2633641197872936, + "grad_norm": 0.2261027838051452, + "learning_rate": 6.68551582639163e-05, + "loss": 0.5154, + "step": 4514 + }, + { + "epoch": 1.2636439966414779, + "grad_norm": 0.22471759104126507, + "learning_rate": 6.684063468849527e-05, + "loss": 0.4975, + "step": 4515 + }, + { + "epoch": 1.263923873495662, + "grad_norm": 0.22240388619876472, + "learning_rate": 6.682610951017323e-05, + "loss": 0.4935, + "step": 4516 + }, + { + "epoch": 1.264203750349846, + "grad_norm": 0.22414050150156797, + "learning_rate": 6.681158273033265e-05, + "loss": 0.5043, + "step": 4517 + }, + { + "epoch": 1.2644836272040303, + "grad_norm": 0.21799138634781873, + "learning_rate": 6.679705435035622e-05, + "loss": 0.5051, + "step": 4518 + }, + { + "epoch": 1.2647635040582144, + "grad_norm": 0.24016586750733343, + "learning_rate": 6.678252437162677e-05, + "loss": 0.4907, + "step": 4519 + }, + { + "epoch": 1.2650433809123984, + "grad_norm": 0.22503684929739645, + "learning_rate": 6.676799279552723e-05, + "loss": 0.5082, + "step": 4520 + }, + { + "epoch": 1.2653232577665827, + "grad_norm": 0.2268392453680348, + "learning_rate": 6.675345962344078e-05, + "loss": 0.4803, + "step": 4521 + }, + { + "epoch": 1.2656031346207668, + "grad_norm": 0.22869329063653454, + "learning_rate": 6.673892485675066e-05, + "loss": 0.4835, + "step": 4522 + }, + { + "epoch": 1.265883011474951, + "grad_norm": 0.22282360543506255, + "learning_rate": 6.67243884968403e-05, + "loss": 0.49, + "step": 4523 + }, + { + "epoch": 1.2661628883291351, + "grad_norm": 0.2207826807449118, + "learning_rate": 6.670985054509326e-05, + "loss": 0.4784, + "step": 4524 + }, + { + "epoch": 1.2664427651833194, + "grad_norm": 0.22161549480639797, + "learning_rate": 6.66953110028933e-05, + "loss": 0.5048, + "step": 4525 + }, + { + "epoch": 1.2667226420375035, + "grad_norm": 0.2237115226263842, + "learning_rate": 6.66807698716243e-05, + "loss": 0.4922, + "step": 4526 + }, + { + "epoch": 1.2670025188916876, + "grad_norm": 0.2307389487534476, + "learning_rate": 6.66662271526703e-05, + "loss": 0.4964, + "step": 4527 + }, + { + "epoch": 1.2672823957458719, + "grad_norm": 0.2276555245827964, + "learning_rate": 6.665168284741545e-05, + "loss": 0.4903, + "step": 4528 + }, + { + "epoch": 1.267562272600056, + "grad_norm": 0.2272755431780669, + "learning_rate": 6.663713695724412e-05, + "loss": 0.4973, + "step": 4529 + }, + { + "epoch": 1.26784214945424, + "grad_norm": 0.23393735854339515, + "learning_rate": 6.66225894835408e-05, + "loss": 0.487, + "step": 4530 + }, + { + "epoch": 1.2681220263084243, + "grad_norm": 0.22483424192193738, + "learning_rate": 6.660804042769008e-05, + "loss": 0.5087, + "step": 4531 + }, + { + "epoch": 1.2684019031626086, + "grad_norm": 0.231993026857539, + "learning_rate": 6.659348979107679e-05, + "loss": 0.4966, + "step": 4532 + }, + { + "epoch": 1.2686817800167927, + "grad_norm": 0.225707753150604, + "learning_rate": 6.657893757508583e-05, + "loss": 0.5229, + "step": 4533 + }, + { + "epoch": 1.2689616568709767, + "grad_norm": 0.22251313029927405, + "learning_rate": 6.656438378110234e-05, + "loss": 0.4667, + "step": 4534 + }, + { + "epoch": 1.269241533725161, + "grad_norm": 0.22719061758486747, + "learning_rate": 6.654982841051151e-05, + "loss": 0.4864, + "step": 4535 + }, + { + "epoch": 1.269521410579345, + "grad_norm": 0.22339650100648326, + "learning_rate": 6.653527146469877e-05, + "loss": 0.5132, + "step": 4536 + }, + { + "epoch": 1.2698012874335292, + "grad_norm": 0.22253956314267231, + "learning_rate": 6.652071294504963e-05, + "loss": 0.4971, + "step": 4537 + }, + { + "epoch": 1.2700811642877134, + "grad_norm": 0.2275811964542699, + "learning_rate": 6.650615285294977e-05, + "loss": 0.4942, + "step": 4538 + }, + { + "epoch": 1.2703610411418975, + "grad_norm": 0.22768182303817025, + "learning_rate": 6.649159118978506e-05, + "loss": 0.4759, + "step": 4539 + }, + { + "epoch": 1.2706409179960816, + "grad_norm": 0.2283229731914254, + "learning_rate": 6.647702795694146e-05, + "loss": 0.5152, + "step": 4540 + }, + { + "epoch": 1.2709207948502659, + "grad_norm": 0.2964667401302082, + "learning_rate": 6.64624631558051e-05, + "loss": 0.4978, + "step": 4541 + }, + { + "epoch": 1.2712006717044502, + "grad_norm": 0.22264768078734123, + "learning_rate": 6.64478967877623e-05, + "loss": 0.4911, + "step": 4542 + }, + { + "epoch": 1.2714805485586342, + "grad_norm": 0.2262480614267082, + "learning_rate": 6.643332885419949e-05, + "loss": 0.5077, + "step": 4543 + }, + { + "epoch": 1.2717604254128183, + "grad_norm": 0.22698807994529827, + "learning_rate": 6.641875935650324e-05, + "loss": 0.5027, + "step": 4544 + }, + { + "epoch": 1.2720403022670026, + "grad_norm": 0.22790119660781558, + "learning_rate": 6.640418829606026e-05, + "loss": 0.5058, + "step": 4545 + }, + { + "epoch": 1.2723201791211867, + "grad_norm": 0.23398749484193088, + "learning_rate": 6.638961567425747e-05, + "loss": 0.5208, + "step": 4546 + }, + { + "epoch": 1.2726000559753707, + "grad_norm": 0.2129361266639578, + "learning_rate": 6.637504149248191e-05, + "loss": 0.5143, + "step": 4547 + }, + { + "epoch": 1.272879932829555, + "grad_norm": 0.2217325763753521, + "learning_rate": 6.636046575212072e-05, + "loss": 0.4922, + "step": 4548 + }, + { + "epoch": 1.273159809683739, + "grad_norm": 0.21700462244893803, + "learning_rate": 6.634588845456123e-05, + "loss": 0.4788, + "step": 4549 + }, + { + "epoch": 1.2734396865379234, + "grad_norm": 0.24376781045059606, + "learning_rate": 6.633130960119092e-05, + "loss": 0.4911, + "step": 4550 + }, + { + "epoch": 1.2737195633921075, + "grad_norm": 0.23127889698525972, + "learning_rate": 6.631672919339743e-05, + "loss": 0.5012, + "step": 4551 + }, + { + "epoch": 1.2739994402462917, + "grad_norm": 0.22248802603051918, + "learning_rate": 6.630214723256853e-05, + "loss": 0.4832, + "step": 4552 + }, + { + "epoch": 1.2742793171004758, + "grad_norm": 0.22005460542819927, + "learning_rate": 6.628756372009213e-05, + "loss": 0.4931, + "step": 4553 + }, + { + "epoch": 1.2745591939546599, + "grad_norm": 0.2314092087083626, + "learning_rate": 6.627297865735629e-05, + "loss": 0.4915, + "step": 4554 + }, + { + "epoch": 1.2748390708088442, + "grad_norm": 0.2247683211760867, + "learning_rate": 6.625839204574925e-05, + "loss": 0.4816, + "step": 4555 + }, + { + "epoch": 1.2751189476630282, + "grad_norm": 0.21776217439031326, + "learning_rate": 6.624380388665934e-05, + "loss": 0.5035, + "step": 4556 + }, + { + "epoch": 1.2753988245172123, + "grad_norm": 0.2209502526007058, + "learning_rate": 6.622921418147509e-05, + "loss": 0.497, + "step": 4557 + }, + { + "epoch": 1.2756787013713966, + "grad_norm": 0.23819498739874898, + "learning_rate": 6.621462293158514e-05, + "loss": 0.5042, + "step": 4558 + }, + { + "epoch": 1.2759585782255807, + "grad_norm": 0.23453751221019697, + "learning_rate": 6.620003013837832e-05, + "loss": 0.5094, + "step": 4559 + }, + { + "epoch": 1.276238455079765, + "grad_norm": 0.23350259985026242, + "learning_rate": 6.618543580324355e-05, + "loss": 0.4976, + "step": 4560 + }, + { + "epoch": 1.276518331933949, + "grad_norm": 0.21956856279332757, + "learning_rate": 6.617083992756994e-05, + "loss": 0.4983, + "step": 4561 + }, + { + "epoch": 1.2767982087881333, + "grad_norm": 0.21809874334246912, + "learning_rate": 6.615624251274676e-05, + "loss": 0.508, + "step": 4562 + }, + { + "epoch": 1.2770780856423174, + "grad_norm": 0.2255808899958128, + "learning_rate": 6.614164356016335e-05, + "loss": 0.5093, + "step": 4563 + }, + { + "epoch": 1.2773579624965015, + "grad_norm": 0.21786145442604976, + "learning_rate": 6.612704307120928e-05, + "loss": 0.4797, + "step": 4564 + }, + { + "epoch": 1.2776378393506858, + "grad_norm": 0.22242215099322205, + "learning_rate": 6.611244104727422e-05, + "loss": 0.493, + "step": 4565 + }, + { + "epoch": 1.2779177162048698, + "grad_norm": 0.23106558933286547, + "learning_rate": 6.609783748974802e-05, + "loss": 0.4773, + "step": 4566 + }, + { + "epoch": 1.278197593059054, + "grad_norm": 0.21779580434800108, + "learning_rate": 6.608323240002061e-05, + "loss": 0.4943, + "step": 4567 + }, + { + "epoch": 1.2784774699132382, + "grad_norm": 0.22340662539705994, + "learning_rate": 6.606862577948214e-05, + "loss": 0.5164, + "step": 4568 + }, + { + "epoch": 1.2787573467674223, + "grad_norm": 0.215103577951189, + "learning_rate": 6.60540176295229e-05, + "loss": 0.5019, + "step": 4569 + }, + { + "epoch": 1.2790372236216065, + "grad_norm": 0.2253343365947351, + "learning_rate": 6.603940795153325e-05, + "loss": 0.5093, + "step": 4570 + }, + { + "epoch": 1.2793171004757906, + "grad_norm": 0.22132564801684748, + "learning_rate": 6.602479674690378e-05, + "loss": 0.5178, + "step": 4571 + }, + { + "epoch": 1.279596977329975, + "grad_norm": 0.22016093350012908, + "learning_rate": 6.60101840170252e-05, + "loss": 0.4912, + "step": 4572 + }, + { + "epoch": 1.279876854184159, + "grad_norm": 0.2145943732816903, + "learning_rate": 6.599556976328833e-05, + "loss": 0.4993, + "step": 4573 + }, + { + "epoch": 1.280156731038343, + "grad_norm": 0.23687774823603283, + "learning_rate": 6.598095398708417e-05, + "loss": 0.4875, + "step": 4574 + }, + { + "epoch": 1.2804366078925273, + "grad_norm": 0.21423686934006037, + "learning_rate": 6.596633668980388e-05, + "loss": 0.502, + "step": 4575 + }, + { + "epoch": 1.2807164847467114, + "grad_norm": 0.2123447415833241, + "learning_rate": 6.595171787283871e-05, + "loss": 0.4921, + "step": 4576 + }, + { + "epoch": 1.2809963616008955, + "grad_norm": 0.2224877807022716, + "learning_rate": 6.593709753758013e-05, + "loss": 0.5066, + "step": 4577 + }, + { + "epoch": 1.2812762384550798, + "grad_norm": 0.23941024051589668, + "learning_rate": 6.592247568541967e-05, + "loss": 0.503, + "step": 4578 + }, + { + "epoch": 1.281556115309264, + "grad_norm": 0.2304541410031591, + "learning_rate": 6.590785231774907e-05, + "loss": 0.5054, + "step": 4579 + }, + { + "epoch": 1.2818359921634481, + "grad_norm": 0.23324628828292474, + "learning_rate": 6.589322743596018e-05, + "loss": 0.5123, + "step": 4580 + }, + { + "epoch": 1.2821158690176322, + "grad_norm": 0.21602298932667466, + "learning_rate": 6.587860104144499e-05, + "loss": 0.4896, + "step": 4581 + }, + { + "epoch": 1.2823957458718165, + "grad_norm": 0.22259542764176052, + "learning_rate": 6.586397313559568e-05, + "loss": 0.5054, + "step": 4582 + }, + { + "epoch": 1.2826756227260006, + "grad_norm": 0.22714730682310053, + "learning_rate": 6.584934371980453e-05, + "loss": 0.5146, + "step": 4583 + }, + { + "epoch": 1.2829554995801846, + "grad_norm": 0.222548577851698, + "learning_rate": 6.583471279546398e-05, + "loss": 0.5052, + "step": 4584 + }, + { + "epoch": 1.283235376434369, + "grad_norm": 0.21497085516350536, + "learning_rate": 6.582008036396658e-05, + "loss": 0.4959, + "step": 4585 + }, + { + "epoch": 1.283515253288553, + "grad_norm": 0.22752922366939657, + "learning_rate": 6.580544642670509e-05, + "loss": 0.4985, + "step": 4586 + }, + { + "epoch": 1.2837951301427373, + "grad_norm": 0.21605540825063918, + "learning_rate": 6.579081098507236e-05, + "loss": 0.4903, + "step": 4587 + }, + { + "epoch": 1.2840750069969213, + "grad_norm": 0.21362644909682582, + "learning_rate": 6.57761740404614e-05, + "loss": 0.5166, + "step": 4588 + }, + { + "epoch": 1.2843548838511056, + "grad_norm": 0.23245160827101932, + "learning_rate": 6.576153559426537e-05, + "loss": 0.5227, + "step": 4589 + }, + { + "epoch": 1.2846347607052897, + "grad_norm": 0.22673442602767166, + "learning_rate": 6.574689564787756e-05, + "loss": 0.5083, + "step": 4590 + }, + { + "epoch": 1.2849146375594738, + "grad_norm": 0.2201733292431062, + "learning_rate": 6.57322542026914e-05, + "loss": 0.5009, + "step": 4591 + }, + { + "epoch": 1.285194514413658, + "grad_norm": 0.2218832645586175, + "learning_rate": 6.571761126010049e-05, + "loss": 0.5012, + "step": 4592 + }, + { + "epoch": 1.2854743912678421, + "grad_norm": 0.22575243646899995, + "learning_rate": 6.570296682149854e-05, + "loss": 0.5004, + "step": 4593 + }, + { + "epoch": 1.2857542681220262, + "grad_norm": 0.22551316113104805, + "learning_rate": 6.568832088827941e-05, + "loss": 0.5005, + "step": 4594 + }, + { + "epoch": 1.2860341449762105, + "grad_norm": 0.2238456767561351, + "learning_rate": 6.567367346183713e-05, + "loss": 0.512, + "step": 4595 + }, + { + "epoch": 1.2863140218303946, + "grad_norm": 0.2269691834643914, + "learning_rate": 6.565902454356583e-05, + "loss": 0.5303, + "step": 4596 + }, + { + "epoch": 1.2865938986845789, + "grad_norm": 0.22855335149517753, + "learning_rate": 6.564437413485981e-05, + "loss": 0.5176, + "step": 4597 + }, + { + "epoch": 1.286873775538763, + "grad_norm": 0.21678142607897863, + "learning_rate": 6.56297222371135e-05, + "loss": 0.5206, + "step": 4598 + }, + { + "epoch": 1.2871536523929472, + "grad_norm": 0.22658488233250337, + "learning_rate": 6.561506885172149e-05, + "loss": 0.4901, + "step": 4599 + }, + { + "epoch": 1.2874335292471313, + "grad_norm": 0.22615414039556447, + "learning_rate": 6.560041398007847e-05, + "loss": 0.4823, + "step": 4600 + }, + { + "epoch": 1.2877134061013153, + "grad_norm": 0.2213057307234734, + "learning_rate": 6.558575762357933e-05, + "loss": 0.5024, + "step": 4601 + }, + { + "epoch": 1.2879932829554996, + "grad_norm": 0.2174894780415531, + "learning_rate": 6.557109978361904e-05, + "loss": 0.4979, + "step": 4602 + }, + { + "epoch": 1.2882731598096837, + "grad_norm": 0.21131699835689913, + "learning_rate": 6.555644046159277e-05, + "loss": 0.4867, + "step": 4603 + }, + { + "epoch": 1.2885530366638678, + "grad_norm": 0.22554621064216013, + "learning_rate": 6.554177965889578e-05, + "loss": 0.5123, + "step": 4604 + }, + { + "epoch": 1.288832913518052, + "grad_norm": 0.2176737111156412, + "learning_rate": 6.552711737692351e-05, + "loss": 0.5085, + "step": 4605 + }, + { + "epoch": 1.2891127903722361, + "grad_norm": 0.2155271472547195, + "learning_rate": 6.551245361707152e-05, + "loss": 0.4895, + "step": 4606 + }, + { + "epoch": 1.2893926672264204, + "grad_norm": 0.21550969305176285, + "learning_rate": 6.54977883807355e-05, + "loss": 0.471, + "step": 4607 + }, + { + "epoch": 1.2896725440806045, + "grad_norm": 0.22382560786400904, + "learning_rate": 6.548312166931131e-05, + "loss": 0.4848, + "step": 4608 + }, + { + "epoch": 1.2899524209347888, + "grad_norm": 0.2287736043856429, + "learning_rate": 6.546845348419494e-05, + "loss": 0.5113, + "step": 4609 + }, + { + "epoch": 1.2902322977889729, + "grad_norm": 0.22632328103882504, + "learning_rate": 6.545378382678252e-05, + "loss": 0.487, + "step": 4610 + }, + { + "epoch": 1.290512174643157, + "grad_norm": 0.224941686173709, + "learning_rate": 6.54391126984703e-05, + "loss": 0.5142, + "step": 4611 + }, + { + "epoch": 1.2907920514973412, + "grad_norm": 0.2271752358038953, + "learning_rate": 6.542444010065468e-05, + "loss": 0.514, + "step": 4612 + }, + { + "epoch": 1.2910719283515253, + "grad_norm": 0.22085345432306197, + "learning_rate": 6.540976603473223e-05, + "loss": 0.5007, + "step": 4613 + }, + { + "epoch": 1.2913518052057094, + "grad_norm": 0.22708975928677427, + "learning_rate": 6.539509050209961e-05, + "loss": 0.4772, + "step": 4614 + }, + { + "epoch": 1.2916316820598936, + "grad_norm": 0.21885416416242665, + "learning_rate": 6.538041350415368e-05, + "loss": 0.492, + "step": 4615 + }, + { + "epoch": 1.291911558914078, + "grad_norm": 0.23595705137744344, + "learning_rate": 6.536573504229135e-05, + "loss": 0.504, + "step": 4616 + }, + { + "epoch": 1.292191435768262, + "grad_norm": 0.2222855519629023, + "learning_rate": 6.53510551179098e-05, + "loss": 0.501, + "step": 4617 + }, + { + "epoch": 1.292471312622446, + "grad_norm": 0.2202684290052884, + "learning_rate": 6.53363737324062e-05, + "loss": 0.5159, + "step": 4618 + }, + { + "epoch": 1.2927511894766304, + "grad_norm": 0.22828953849570324, + "learning_rate": 6.532169088717797e-05, + "loss": 0.4997, + "step": 4619 + }, + { + "epoch": 1.2930310663308144, + "grad_norm": 0.22001787503268458, + "learning_rate": 6.530700658362263e-05, + "loss": 0.5038, + "step": 4620 + }, + { + "epoch": 1.2933109431849985, + "grad_norm": 0.22919760384205226, + "learning_rate": 6.529232082313783e-05, + "loss": 0.4968, + "step": 4621 + }, + { + "epoch": 1.2935908200391828, + "grad_norm": 0.21539147881567156, + "learning_rate": 6.527763360712138e-05, + "loss": 0.4912, + "step": 4622 + }, + { + "epoch": 1.2938706968933669, + "grad_norm": 0.22598274573206428, + "learning_rate": 6.52629449369712e-05, + "loss": 0.5142, + "step": 4623 + }, + { + "epoch": 1.2941505737475512, + "grad_norm": 0.23171482903840254, + "learning_rate": 6.52482548140854e-05, + "loss": 0.5022, + "step": 4624 + }, + { + "epoch": 1.2944304506017352, + "grad_norm": 0.22221397820711014, + "learning_rate": 6.523356323986216e-05, + "loss": 0.5027, + "step": 4625 + }, + { + "epoch": 1.2947103274559195, + "grad_norm": 0.23262272297796627, + "learning_rate": 6.521887021569984e-05, + "loss": 0.5042, + "step": 4626 + }, + { + "epoch": 1.2949902043101036, + "grad_norm": 0.2229541251763377, + "learning_rate": 6.520417574299695e-05, + "loss": 0.4987, + "step": 4627 + }, + { + "epoch": 1.2952700811642877, + "grad_norm": 0.21670560844131473, + "learning_rate": 6.518947982315209e-05, + "loss": 0.4978, + "step": 4628 + }, + { + "epoch": 1.295549958018472, + "grad_norm": 0.23215184350732523, + "learning_rate": 6.517478245756403e-05, + "loss": 0.5115, + "step": 4629 + }, + { + "epoch": 1.295829834872656, + "grad_norm": 0.21806720642178692, + "learning_rate": 6.51600836476317e-05, + "loss": 0.5142, + "step": 4630 + }, + { + "epoch": 1.29610971172684, + "grad_norm": 0.22185631840174178, + "learning_rate": 6.514538339475411e-05, + "loss": 0.5102, + "step": 4631 + }, + { + "epoch": 1.2963895885810244, + "grad_norm": 0.21006387393433928, + "learning_rate": 6.513068170033047e-05, + "loss": 0.5116, + "step": 4632 + }, + { + "epoch": 1.2966694654352084, + "grad_norm": 0.22376905117718077, + "learning_rate": 6.511597856576006e-05, + "loss": 0.5129, + "step": 4633 + }, + { + "epoch": 1.2969493422893927, + "grad_norm": 0.23116756610449993, + "learning_rate": 6.510127399244234e-05, + "loss": 0.5064, + "step": 4634 + }, + { + "epoch": 1.2972292191435768, + "grad_norm": 0.21770023154106802, + "learning_rate": 6.508656798177692e-05, + "loss": 0.4937, + "step": 4635 + }, + { + "epoch": 1.297509095997761, + "grad_norm": 0.21374486310641394, + "learning_rate": 6.507186053516351e-05, + "loss": 0.4903, + "step": 4636 + }, + { + "epoch": 1.2977889728519452, + "grad_norm": 0.217338272468009, + "learning_rate": 6.5057151654002e-05, + "loss": 0.5117, + "step": 4637 + }, + { + "epoch": 1.2980688497061292, + "grad_norm": 0.21938217567391707, + "learning_rate": 6.504244133969235e-05, + "loss": 0.4978, + "step": 4638 + }, + { + "epoch": 1.2983487265603135, + "grad_norm": 0.23609496333007188, + "learning_rate": 6.502772959363472e-05, + "loss": 0.4873, + "step": 4639 + }, + { + "epoch": 1.2986286034144976, + "grad_norm": 0.2303756434279393, + "learning_rate": 6.501301641722937e-05, + "loss": 0.5106, + "step": 4640 + }, + { + "epoch": 1.2989084802686817, + "grad_norm": 0.2312830221919868, + "learning_rate": 6.499830181187672e-05, + "loss": 0.4806, + "step": 4641 + }, + { + "epoch": 1.299188357122866, + "grad_norm": 0.2324375801687925, + "learning_rate": 6.498358577897731e-05, + "loss": 0.4837, + "step": 4642 + }, + { + "epoch": 1.29946823397705, + "grad_norm": 0.23304549317929413, + "learning_rate": 6.49688683199318e-05, + "loss": 0.5126, + "step": 4643 + }, + { + "epoch": 1.2997481108312343, + "grad_norm": 0.2259910405244759, + "learning_rate": 6.495414943614105e-05, + "loss": 0.487, + "step": 4644 + }, + { + "epoch": 1.3000279876854184, + "grad_norm": 0.23609529250810166, + "learning_rate": 6.493942912900597e-05, + "loss": 0.5076, + "step": 4645 + }, + { + "epoch": 1.3003078645396027, + "grad_norm": 0.23387623580651815, + "learning_rate": 6.492470739992767e-05, + "loss": 0.4848, + "step": 4646 + }, + { + "epoch": 1.3005877413937867, + "grad_norm": 0.22397861859626766, + "learning_rate": 6.490998425030735e-05, + "loss": 0.4939, + "step": 4647 + }, + { + "epoch": 1.3008676182479708, + "grad_norm": 0.22400745922601117, + "learning_rate": 6.489525968154642e-05, + "loss": 0.5165, + "step": 4648 + }, + { + "epoch": 1.301147495102155, + "grad_norm": 0.23050838486974656, + "learning_rate": 6.48805336950463e-05, + "loss": 0.5036, + "step": 4649 + }, + { + "epoch": 1.3014273719563392, + "grad_norm": 0.22045488080662087, + "learning_rate": 6.486580629220868e-05, + "loss": 0.4918, + "step": 4650 + }, + { + "epoch": 1.3017072488105232, + "grad_norm": 0.2269785178860283, + "learning_rate": 6.485107747443528e-05, + "loss": 0.5034, + "step": 4651 + }, + { + "epoch": 1.3019871256647075, + "grad_norm": 0.2278074174643306, + "learning_rate": 6.483634724312802e-05, + "loss": 0.4936, + "step": 4652 + }, + { + "epoch": 1.3022670025188918, + "grad_norm": 0.22577671659450127, + "learning_rate": 6.482161559968894e-05, + "loss": 0.4945, + "step": 4653 + }, + { + "epoch": 1.302546879373076, + "grad_norm": 0.22740257340647596, + "learning_rate": 6.480688254552016e-05, + "loss": 0.5235, + "step": 4654 + }, + { + "epoch": 1.30282675622726, + "grad_norm": 0.23350044835590317, + "learning_rate": 6.479214808202403e-05, + "loss": 0.4841, + "step": 4655 + }, + { + "epoch": 1.3031066330814443, + "grad_norm": 0.22544348184644067, + "learning_rate": 6.477741221060295e-05, + "loss": 0.481, + "step": 4656 + }, + { + "epoch": 1.3033865099356283, + "grad_norm": 0.2269730372463868, + "learning_rate": 6.476267493265953e-05, + "loss": 0.4958, + "step": 4657 + }, + { + "epoch": 1.3036663867898124, + "grad_norm": 0.23736714312852122, + "learning_rate": 6.474793624959643e-05, + "loss": 0.5077, + "step": 4658 + }, + { + "epoch": 1.3039462636439967, + "grad_norm": 0.22662145697682098, + "learning_rate": 6.473319616281652e-05, + "loss": 0.5225, + "step": 4659 + }, + { + "epoch": 1.3042261404981808, + "grad_norm": 0.21929342074079694, + "learning_rate": 6.471845467372273e-05, + "loss": 0.4701, + "step": 4660 + }, + { + "epoch": 1.3045060173523648, + "grad_norm": 0.2265212872451589, + "learning_rate": 6.47037117837182e-05, + "loss": 0.4989, + "step": 4661 + }, + { + "epoch": 1.3047858942065491, + "grad_norm": 0.21358014649033347, + "learning_rate": 6.468896749420614e-05, + "loss": 0.5159, + "step": 4662 + }, + { + "epoch": 1.3050657710607334, + "grad_norm": 0.22053449437667336, + "learning_rate": 6.467422180658994e-05, + "loss": 0.5198, + "step": 4663 + }, + { + "epoch": 1.3053456479149175, + "grad_norm": 0.21856246184017916, + "learning_rate": 6.46594747222731e-05, + "loss": 0.4871, + "step": 4664 + }, + { + "epoch": 1.3056255247691015, + "grad_norm": 0.22481669624283085, + "learning_rate": 6.464472624265923e-05, + "loss": 0.4761, + "step": 4665 + }, + { + "epoch": 1.3059054016232858, + "grad_norm": 0.2375178895669889, + "learning_rate": 6.462997636915215e-05, + "loss": 0.4875, + "step": 4666 + }, + { + "epoch": 1.30618527847747, + "grad_norm": 0.21897222128053973, + "learning_rate": 6.461522510315571e-05, + "loss": 0.5067, + "step": 4667 + }, + { + "epoch": 1.306465155331654, + "grad_norm": 0.22033388668398798, + "learning_rate": 6.460047244607397e-05, + "loss": 0.4813, + "step": 4668 + }, + { + "epoch": 1.3067450321858383, + "grad_norm": 0.2199675990516879, + "learning_rate": 6.458571839931109e-05, + "loss": 0.5002, + "step": 4669 + }, + { + "epoch": 1.3070249090400223, + "grad_norm": 0.22006752103480154, + "learning_rate": 6.457096296427137e-05, + "loss": 0.5089, + "step": 4670 + }, + { + "epoch": 1.3073047858942066, + "grad_norm": 0.2201068651261749, + "learning_rate": 6.455620614235924e-05, + "loss": 0.4823, + "step": 4671 + }, + { + "epoch": 1.3075846627483907, + "grad_norm": 0.22125638999287564, + "learning_rate": 6.454144793497926e-05, + "loss": 0.4937, + "step": 4672 + }, + { + "epoch": 1.307864539602575, + "grad_norm": 0.23477307884019413, + "learning_rate": 6.452668834353611e-05, + "loss": 0.4984, + "step": 4673 + }, + { + "epoch": 1.308144416456759, + "grad_norm": 0.21975294977691442, + "learning_rate": 6.451192736943466e-05, + "loss": 0.4793, + "step": 4674 + }, + { + "epoch": 1.3084242933109431, + "grad_norm": 0.23699559958971625, + "learning_rate": 6.449716501407984e-05, + "loss": 0.494, + "step": 4675 + }, + { + "epoch": 1.3087041701651274, + "grad_norm": 0.2258106266233653, + "learning_rate": 6.448240127887671e-05, + "loss": 0.4966, + "step": 4676 + }, + { + "epoch": 1.3089840470193115, + "grad_norm": 0.22730883819000816, + "learning_rate": 6.446763616523055e-05, + "loss": 0.5106, + "step": 4677 + }, + { + "epoch": 1.3092639238734955, + "grad_norm": 0.2163831582075994, + "learning_rate": 6.445286967454666e-05, + "loss": 0.4877, + "step": 4678 + }, + { + "epoch": 1.3095438007276798, + "grad_norm": 0.2330624101798806, + "learning_rate": 6.443810180823056e-05, + "loss": 0.5108, + "step": 4679 + }, + { + "epoch": 1.309823677581864, + "grad_norm": 0.21603565802881391, + "learning_rate": 6.442333256768784e-05, + "loss": 0.5047, + "step": 4680 + }, + { + "epoch": 1.3101035544360482, + "grad_norm": 0.21938758607667536, + "learning_rate": 6.440856195432426e-05, + "loss": 0.5042, + "step": 4681 + }, + { + "epoch": 1.3103834312902323, + "grad_norm": 0.23806358505288744, + "learning_rate": 6.439378996954568e-05, + "loss": 0.492, + "step": 4682 + }, + { + "epoch": 1.3106633081444166, + "grad_norm": 0.22318586284509329, + "learning_rate": 6.437901661475811e-05, + "loss": 0.4839, + "step": 4683 + }, + { + "epoch": 1.3109431849986006, + "grad_norm": 0.21871111013503194, + "learning_rate": 6.436424189136772e-05, + "loss": 0.4931, + "step": 4684 + }, + { + "epoch": 1.3112230618527847, + "grad_norm": 0.21499676471656837, + "learning_rate": 6.434946580078072e-05, + "loss": 0.498, + "step": 4685 + }, + { + "epoch": 1.311502938706969, + "grad_norm": 0.21720831314150404, + "learning_rate": 6.433468834440356e-05, + "loss": 0.4927, + "step": 4686 + }, + { + "epoch": 1.311782815561153, + "grad_norm": 0.23401604712471621, + "learning_rate": 6.431990952364271e-05, + "loss": 0.513, + "step": 4687 + }, + { + "epoch": 1.3120626924153371, + "grad_norm": 0.21885905440437273, + "learning_rate": 6.430512933990486e-05, + "loss": 0.492, + "step": 4688 + }, + { + "epoch": 1.3123425692695214, + "grad_norm": 0.22498399331645105, + "learning_rate": 6.429034779459679e-05, + "loss": 0.4972, + "step": 4689 + }, + { + "epoch": 1.3126224461237055, + "grad_norm": 0.21826256190213422, + "learning_rate": 6.427556488912543e-05, + "loss": 0.4971, + "step": 4690 + }, + { + "epoch": 1.3129023229778898, + "grad_norm": 0.22789436404685254, + "learning_rate": 6.42607806248978e-05, + "loss": 0.5085, + "step": 4691 + }, + { + "epoch": 1.3131821998320738, + "grad_norm": 0.2219310789830236, + "learning_rate": 6.42459950033211e-05, + "loss": 0.4894, + "step": 4692 + }, + { + "epoch": 1.3134620766862581, + "grad_norm": 0.22507886237445038, + "learning_rate": 6.423120802580263e-05, + "loss": 0.4928, + "step": 4693 + }, + { + "epoch": 1.3137419535404422, + "grad_norm": 0.24038631090046275, + "learning_rate": 6.42164196937498e-05, + "loss": 0.5156, + "step": 4694 + }, + { + "epoch": 1.3140218303946263, + "grad_norm": 0.22047131400075695, + "learning_rate": 6.42016300085702e-05, + "loss": 0.4992, + "step": 4695 + }, + { + "epoch": 1.3143017072488106, + "grad_norm": 0.22230506787803977, + "learning_rate": 6.41868389716715e-05, + "loss": 0.497, + "step": 4696 + }, + { + "epoch": 1.3145815841029946, + "grad_norm": 0.22722650947558032, + "learning_rate": 6.417204658446152e-05, + "loss": 0.4982, + "step": 4697 + }, + { + "epoch": 1.3148614609571787, + "grad_norm": 0.2254129475987711, + "learning_rate": 6.415725284834822e-05, + "loss": 0.4927, + "step": 4698 + }, + { + "epoch": 1.315141337811363, + "grad_norm": 0.22211096551732776, + "learning_rate": 6.414245776473964e-05, + "loss": 0.4829, + "step": 4699 + }, + { + "epoch": 1.3154212146655473, + "grad_norm": 0.22199031809195682, + "learning_rate": 6.412766133504405e-05, + "loss": 0.5126, + "step": 4700 + }, + { + "epoch": 1.3157010915197314, + "grad_norm": 0.22396834599543242, + "learning_rate": 6.411286356066975e-05, + "loss": 0.4927, + "step": 4701 + }, + { + "epoch": 1.3159809683739154, + "grad_norm": 0.22881830847296752, + "learning_rate": 6.409806444302518e-05, + "loss": 0.4868, + "step": 4702 + }, + { + "epoch": 1.3162608452280997, + "grad_norm": 0.23468255489824635, + "learning_rate": 6.408326398351895e-05, + "loss": 0.5051, + "step": 4703 + }, + { + "epoch": 1.3165407220822838, + "grad_norm": 0.23152761908826994, + "learning_rate": 6.406846218355977e-05, + "loss": 0.511, + "step": 4704 + }, + { + "epoch": 1.3168205989364679, + "grad_norm": 0.22458082062715418, + "learning_rate": 6.40536590445565e-05, + "loss": 0.4861, + "step": 4705 + }, + { + "epoch": 1.3171004757906521, + "grad_norm": 0.21977348364975618, + "learning_rate": 6.403885456791807e-05, + "loss": 0.4952, + "step": 4706 + }, + { + "epoch": 1.3173803526448362, + "grad_norm": 0.2143295074476737, + "learning_rate": 6.402404875505362e-05, + "loss": 0.4889, + "step": 4707 + }, + { + "epoch": 1.3176602294990205, + "grad_norm": 0.22485304045464563, + "learning_rate": 6.400924160737234e-05, + "loss": 0.513, + "step": 4708 + }, + { + "epoch": 1.3179401063532046, + "grad_norm": 0.21490462207915342, + "learning_rate": 6.399443312628363e-05, + "loss": 0.4884, + "step": 4709 + }, + { + "epoch": 1.3182199832073889, + "grad_norm": 0.22718250602188456, + "learning_rate": 6.397962331319692e-05, + "loss": 0.4947, + "step": 4710 + }, + { + "epoch": 1.318499860061573, + "grad_norm": 0.22543975476627806, + "learning_rate": 6.396481216952186e-05, + "loss": 0.4947, + "step": 4711 + }, + { + "epoch": 1.318779736915757, + "grad_norm": 0.2195984700756812, + "learning_rate": 6.394999969666815e-05, + "loss": 0.4878, + "step": 4712 + }, + { + "epoch": 1.3190596137699413, + "grad_norm": 0.23930971128941994, + "learning_rate": 6.393518589604567e-05, + "loss": 0.5046, + "step": 4713 + }, + { + "epoch": 1.3193394906241254, + "grad_norm": 0.2330286582031531, + "learning_rate": 6.392037076906439e-05, + "loss": 0.4952, + "step": 4714 + }, + { + "epoch": 1.3196193674783094, + "grad_norm": 0.212829444351127, + "learning_rate": 6.390555431713442e-05, + "loss": 0.4782, + "step": 4715 + }, + { + "epoch": 1.3198992443324937, + "grad_norm": 0.22048334769377406, + "learning_rate": 6.389073654166602e-05, + "loss": 0.4889, + "step": 4716 + }, + { + "epoch": 1.3201791211866778, + "grad_norm": 0.2328567892686063, + "learning_rate": 6.387591744406951e-05, + "loss": 0.5119, + "step": 4717 + }, + { + "epoch": 1.320458998040862, + "grad_norm": 0.22954351194606487, + "learning_rate": 6.386109702575545e-05, + "loss": 0.5118, + "step": 4718 + }, + { + "epoch": 1.3207388748950462, + "grad_norm": 0.22259220419694478, + "learning_rate": 6.38462752881344e-05, + "loss": 0.4906, + "step": 4719 + }, + { + "epoch": 1.3210187517492304, + "grad_norm": 0.2267080847553509, + "learning_rate": 6.383145223261712e-05, + "loss": 0.5052, + "step": 4720 + }, + { + "epoch": 1.3212986286034145, + "grad_norm": 0.21041299621266424, + "learning_rate": 6.381662786061448e-05, + "loss": 0.493, + "step": 4721 + }, + { + "epoch": 1.3215785054575986, + "grad_norm": 0.22450646041364145, + "learning_rate": 6.380180217353746e-05, + "loss": 0.5041, + "step": 4722 + }, + { + "epoch": 1.3218583823117829, + "grad_norm": 0.22175856286452167, + "learning_rate": 6.378697517279718e-05, + "loss": 0.4701, + "step": 4723 + }, + { + "epoch": 1.322138259165967, + "grad_norm": 0.22275708747147227, + "learning_rate": 6.377214685980489e-05, + "loss": 0.4969, + "step": 4724 + }, + { + "epoch": 1.322418136020151, + "grad_norm": 0.22013441784850707, + "learning_rate": 6.375731723597194e-05, + "loss": 0.5121, + "step": 4725 + }, + { + "epoch": 1.3226980128743353, + "grad_norm": 0.21238260380550128, + "learning_rate": 6.374248630270985e-05, + "loss": 0.4964, + "step": 4726 + }, + { + "epoch": 1.3229778897285194, + "grad_norm": 0.23561985632728638, + "learning_rate": 6.372765406143023e-05, + "loss": 0.5025, + "step": 4727 + }, + { + "epoch": 1.3232577665827037, + "grad_norm": 0.2242488728215307, + "learning_rate": 6.37128205135448e-05, + "loss": 0.5204, + "step": 4728 + }, + { + "epoch": 1.3235376434368877, + "grad_norm": 0.21401293399237717, + "learning_rate": 6.369798566046545e-05, + "loss": 0.504, + "step": 4729 + }, + { + "epoch": 1.323817520291072, + "grad_norm": 0.22034017856381255, + "learning_rate": 6.368314950360415e-05, + "loss": 0.4982, + "step": 4730 + }, + { + "epoch": 1.324097397145256, + "grad_norm": 0.21056206469299288, + "learning_rate": 6.366831204437302e-05, + "loss": 0.4761, + "step": 4731 + }, + { + "epoch": 1.3243772739994402, + "grad_norm": 0.2329875674472275, + "learning_rate": 6.36534732841843e-05, + "loss": 0.5098, + "step": 4732 + }, + { + "epoch": 1.3246571508536245, + "grad_norm": 0.23177788945168468, + "learning_rate": 6.363863322445036e-05, + "loss": 0.4898, + "step": 4733 + }, + { + "epoch": 1.3249370277078085, + "grad_norm": 0.22351103686194437, + "learning_rate": 6.362379186658365e-05, + "loss": 0.4962, + "step": 4734 + }, + { + "epoch": 1.3252169045619926, + "grad_norm": 0.2280168836124819, + "learning_rate": 6.360894921199682e-05, + "loss": 0.5199, + "step": 4735 + }, + { + "epoch": 1.3254967814161769, + "grad_norm": 0.23481066826371386, + "learning_rate": 6.359410526210258e-05, + "loss": 0.486, + "step": 4736 + }, + { + "epoch": 1.3257766582703612, + "grad_norm": 0.22151126008727215, + "learning_rate": 6.357926001831381e-05, + "loss": 0.496, + "step": 4737 + }, + { + "epoch": 1.3260565351245452, + "grad_norm": 0.22161530996309398, + "learning_rate": 6.356441348204348e-05, + "loss": 0.4819, + "step": 4738 + }, + { + "epoch": 1.3263364119787293, + "grad_norm": 0.22943625558146605, + "learning_rate": 6.354956565470466e-05, + "loss": 0.4931, + "step": 4739 + }, + { + "epoch": 1.3266162888329136, + "grad_norm": 0.22540649565140475, + "learning_rate": 6.353471653771059e-05, + "loss": 0.4903, + "step": 4740 + }, + { + "epoch": 1.3268961656870977, + "grad_norm": 0.22683525578616123, + "learning_rate": 6.351986613247465e-05, + "loss": 0.4995, + "step": 4741 + }, + { + "epoch": 1.3271760425412817, + "grad_norm": 0.2182989044801757, + "learning_rate": 6.350501444041026e-05, + "loss": 0.5032, + "step": 4742 + }, + { + "epoch": 1.327455919395466, + "grad_norm": 0.22302126704546218, + "learning_rate": 6.349016146293103e-05, + "loss": 0.4981, + "step": 4743 + }, + { + "epoch": 1.32773579624965, + "grad_norm": 0.23159155905157916, + "learning_rate": 6.34753072014507e-05, + "loss": 0.4965, + "step": 4744 + }, + { + "epoch": 1.3280156731038344, + "grad_norm": 0.22139141956605743, + "learning_rate": 6.346045165738309e-05, + "loss": 0.4998, + "step": 4745 + }, + { + "epoch": 1.3282955499580185, + "grad_norm": 0.23315805768894807, + "learning_rate": 6.344559483214215e-05, + "loss": 0.5043, + "step": 4746 + }, + { + "epoch": 1.3285754268122028, + "grad_norm": 0.22841483405260213, + "learning_rate": 6.343073672714199e-05, + "loss": 0.4905, + "step": 4747 + }, + { + "epoch": 1.3288553036663868, + "grad_norm": 0.22947480262023023, + "learning_rate": 6.341587734379676e-05, + "loss": 0.4946, + "step": 4748 + }, + { + "epoch": 1.329135180520571, + "grad_norm": 0.22656550982068765, + "learning_rate": 6.340101668352083e-05, + "loss": 0.4789, + "step": 4749 + }, + { + "epoch": 1.3294150573747552, + "grad_norm": 0.23220779940363576, + "learning_rate": 6.338615474772862e-05, + "loss": 0.4944, + "step": 4750 + }, + { + "epoch": 1.3296949342289393, + "grad_norm": 0.2273617717651779, + "learning_rate": 6.337129153783471e-05, + "loss": 0.5034, + "step": 4751 + }, + { + "epoch": 1.3299748110831233, + "grad_norm": 0.21960534139981308, + "learning_rate": 6.335642705525378e-05, + "loss": 0.4999, + "step": 4752 + }, + { + "epoch": 1.3302546879373076, + "grad_norm": 0.2360726375869372, + "learning_rate": 6.334156130140068e-05, + "loss": 0.5004, + "step": 4753 + }, + { + "epoch": 1.3305345647914917, + "grad_norm": 0.21337455412485573, + "learning_rate": 6.332669427769028e-05, + "loss": 0.501, + "step": 4754 + }, + { + "epoch": 1.330814441645676, + "grad_norm": 0.2257426587297062, + "learning_rate": 6.331182598553766e-05, + "loss": 0.5086, + "step": 4755 + }, + { + "epoch": 1.33109431849986, + "grad_norm": 0.23008188954974348, + "learning_rate": 6.3296956426358e-05, + "loss": 0.4809, + "step": 4756 + }, + { + "epoch": 1.3313741953540443, + "grad_norm": 0.23182473779450402, + "learning_rate": 6.32820856015666e-05, + "loss": 0.4832, + "step": 4757 + }, + { + "epoch": 1.3316540722082284, + "grad_norm": 0.22953960976525958, + "learning_rate": 6.326721351257884e-05, + "loss": 0.5066, + "step": 4758 + }, + { + "epoch": 1.3319339490624125, + "grad_norm": 0.21807919472092635, + "learning_rate": 6.325234016081027e-05, + "loss": 0.4981, + "step": 4759 + }, + { + "epoch": 1.3322138259165968, + "grad_norm": 0.2295915976641455, + "learning_rate": 6.323746554767653e-05, + "loss": 0.5026, + "step": 4760 + }, + { + "epoch": 1.3324937027707808, + "grad_norm": 0.21817698600915825, + "learning_rate": 6.322258967459344e-05, + "loss": 0.4999, + "step": 4761 + }, + { + "epoch": 1.332773579624965, + "grad_norm": 0.217287414692134, + "learning_rate": 6.320771254297684e-05, + "loss": 0.4983, + "step": 4762 + }, + { + "epoch": 1.3330534564791492, + "grad_norm": 0.21927009183186255, + "learning_rate": 6.319283415424279e-05, + "loss": 0.4766, + "step": 4763 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.2213969872620352, + "learning_rate": 6.31779545098074e-05, + "loss": 0.4948, + "step": 4764 + }, + { + "epoch": 1.3336132101875176, + "grad_norm": 0.21969073392087798, + "learning_rate": 6.316307361108692e-05, + "loss": 0.504, + "step": 4765 + }, + { + "epoch": 1.3338930870417016, + "grad_norm": 0.22088737612147463, + "learning_rate": 6.314819145949772e-05, + "loss": 0.4627, + "step": 4766 + }, + { + "epoch": 1.334172963895886, + "grad_norm": 0.2153264695661567, + "learning_rate": 6.31333080564563e-05, + "loss": 0.5108, + "step": 4767 + }, + { + "epoch": 1.33445284075007, + "grad_norm": 0.21888635991664576, + "learning_rate": 6.311842340337925e-05, + "loss": 0.4821, + "step": 4768 + }, + { + "epoch": 1.334732717604254, + "grad_norm": 0.22741872715040778, + "learning_rate": 6.310353750168333e-05, + "loss": 0.4938, + "step": 4769 + }, + { + "epoch": 1.3350125944584383, + "grad_norm": 0.23285899827112347, + "learning_rate": 6.30886503527854e-05, + "loss": 0.4915, + "step": 4770 + }, + { + "epoch": 1.3352924713126224, + "grad_norm": 0.21963809122637543, + "learning_rate": 6.307376195810238e-05, + "loss": 0.5014, + "step": 4771 + }, + { + "epoch": 1.3355723481668065, + "grad_norm": 0.2275057665413845, + "learning_rate": 6.305887231905138e-05, + "loss": 0.5351, + "step": 4772 + }, + { + "epoch": 1.3358522250209908, + "grad_norm": 0.21937968106171388, + "learning_rate": 6.30439814370496e-05, + "loss": 0.4837, + "step": 4773 + }, + { + "epoch": 1.336132101875175, + "grad_norm": 0.23340339542763372, + "learning_rate": 6.302908931351438e-05, + "loss": 0.5266, + "step": 4774 + }, + { + "epoch": 1.3364119787293591, + "grad_norm": 0.21622934229164417, + "learning_rate": 6.301419594986314e-05, + "loss": 0.4817, + "step": 4775 + }, + { + "epoch": 1.3366918555835432, + "grad_norm": 0.2306834778997054, + "learning_rate": 6.299930134751344e-05, + "loss": 0.504, + "step": 4776 + }, + { + "epoch": 1.3369717324377275, + "grad_norm": 0.2384546455505702, + "learning_rate": 6.298440550788296e-05, + "loss": 0.5239, + "step": 4777 + }, + { + "epoch": 1.3372516092919116, + "grad_norm": 0.22504121494751755, + "learning_rate": 6.29695084323895e-05, + "loss": 0.4876, + "step": 4778 + }, + { + "epoch": 1.3375314861460956, + "grad_norm": 0.2216634018412408, + "learning_rate": 6.295461012245097e-05, + "loss": 0.5133, + "step": 4779 + }, + { + "epoch": 1.33781136300028, + "grad_norm": 0.2207626799054461, + "learning_rate": 6.29397105794854e-05, + "loss": 0.4951, + "step": 4780 + }, + { + "epoch": 1.338091239854464, + "grad_norm": 0.2322294653571303, + "learning_rate": 6.292480980491094e-05, + "loss": 0.5001, + "step": 4781 + }, + { + "epoch": 1.338371116708648, + "grad_norm": 0.22187636224284968, + "learning_rate": 6.290990780014582e-05, + "loss": 0.4907, + "step": 4782 + }, + { + "epoch": 1.3386509935628323, + "grad_norm": 0.22467144852771262, + "learning_rate": 6.289500456660848e-05, + "loss": 0.495, + "step": 4783 + }, + { + "epoch": 1.3389308704170166, + "grad_norm": 0.23455136872692678, + "learning_rate": 6.288010010571738e-05, + "loss": 0.5156, + "step": 4784 + }, + { + "epoch": 1.3392107472712007, + "grad_norm": 0.2189676584244879, + "learning_rate": 6.286519441889114e-05, + "loss": 0.5094, + "step": 4785 + }, + { + "epoch": 1.3394906241253848, + "grad_norm": 0.22244557228116685, + "learning_rate": 6.285028750754849e-05, + "loss": 0.4909, + "step": 4786 + }, + { + "epoch": 1.339770500979569, + "grad_norm": 0.2153146699586078, + "learning_rate": 6.283537937310828e-05, + "loss": 0.4981, + "step": 4787 + }, + { + "epoch": 1.3400503778337531, + "grad_norm": 0.21528893540817695, + "learning_rate": 6.282047001698948e-05, + "loss": 0.496, + "step": 4788 + }, + { + "epoch": 1.3403302546879372, + "grad_norm": 0.22388445323361997, + "learning_rate": 6.280555944061118e-05, + "loss": 0.4991, + "step": 4789 + }, + { + "epoch": 1.3406101315421215, + "grad_norm": 0.22370320505369892, + "learning_rate": 6.279064764539256e-05, + "loss": 0.5196, + "step": 4790 + }, + { + "epoch": 1.3408900083963056, + "grad_norm": 0.2259520871203192, + "learning_rate": 6.277573463275293e-05, + "loss": 0.5197, + "step": 4791 + }, + { + "epoch": 1.3411698852504899, + "grad_norm": 0.22594160224656487, + "learning_rate": 6.276082040411174e-05, + "loss": 0.4793, + "step": 4792 + }, + { + "epoch": 1.341449762104674, + "grad_norm": 0.239789985501656, + "learning_rate": 6.274590496088852e-05, + "loss": 0.5342, + "step": 4793 + }, + { + "epoch": 1.3417296389588582, + "grad_norm": 0.23199647815813662, + "learning_rate": 6.27309883045029e-05, + "loss": 0.5066, + "step": 4794 + }, + { + "epoch": 1.3420095158130423, + "grad_norm": 0.21526405986152142, + "learning_rate": 6.271607043637472e-05, + "loss": 0.4823, + "step": 4795 + }, + { + "epoch": 1.3422893926672264, + "grad_norm": 0.21958703219627543, + "learning_rate": 6.270115135792383e-05, + "loss": 0.5053, + "step": 4796 + }, + { + "epoch": 1.3425692695214106, + "grad_norm": 0.22609464409833172, + "learning_rate": 6.268623107057023e-05, + "loss": 0.5056, + "step": 4797 + }, + { + "epoch": 1.3428491463755947, + "grad_norm": 0.22194077400812287, + "learning_rate": 6.267130957573408e-05, + "loss": 0.5087, + "step": 4798 + }, + { + "epoch": 1.3431290232297788, + "grad_norm": 0.23204024727785122, + "learning_rate": 6.265638687483558e-05, + "loss": 0.5043, + "step": 4799 + }, + { + "epoch": 1.343408900083963, + "grad_norm": 0.22938061058411732, + "learning_rate": 6.264146296929509e-05, + "loss": 0.4891, + "step": 4800 + }, + { + "epoch": 1.3436887769381471, + "grad_norm": 0.22274245548079766, + "learning_rate": 6.26265378605331e-05, + "loss": 0.4812, + "step": 4801 + }, + { + "epoch": 1.3439686537923314, + "grad_norm": 0.2171688869158793, + "learning_rate": 6.261161154997016e-05, + "loss": 0.4798, + "step": 4802 + }, + { + "epoch": 1.3442485306465155, + "grad_norm": 0.21930534844838145, + "learning_rate": 6.259668403902695e-05, + "loss": 0.4972, + "step": 4803 + }, + { + "epoch": 1.3445284075006998, + "grad_norm": 0.23115945927184667, + "learning_rate": 6.258175532912431e-05, + "loss": 0.4867, + "step": 4804 + }, + { + "epoch": 1.3448082843548839, + "grad_norm": 0.2233713975022183, + "learning_rate": 6.256682542168318e-05, + "loss": 0.4997, + "step": 4805 + }, + { + "epoch": 1.345088161209068, + "grad_norm": 0.236053453803603, + "learning_rate": 6.255189431812455e-05, + "loss": 0.4804, + "step": 4806 + }, + { + "epoch": 1.3453680380632522, + "grad_norm": 0.22318309010050524, + "learning_rate": 6.253696201986961e-05, + "loss": 0.4918, + "step": 4807 + }, + { + "epoch": 1.3456479149174363, + "grad_norm": 0.22434174613416846, + "learning_rate": 6.25220285283396e-05, + "loss": 0.5007, + "step": 4808 + }, + { + "epoch": 1.3459277917716204, + "grad_norm": 0.230362751286978, + "learning_rate": 6.25070938449559e-05, + "loss": 0.5054, + "step": 4809 + }, + { + "epoch": 1.3462076686258047, + "grad_norm": 0.2198229960126399, + "learning_rate": 6.249215797114e-05, + "loss": 0.4772, + "step": 4810 + }, + { + "epoch": 1.3464875454799887, + "grad_norm": 0.23163298555086242, + "learning_rate": 6.247722090831354e-05, + "loss": 0.5157, + "step": 4811 + }, + { + "epoch": 1.346767422334173, + "grad_norm": 0.22279990270725775, + "learning_rate": 6.246228265789819e-05, + "loss": 0.4687, + "step": 4812 + }, + { + "epoch": 1.347047299188357, + "grad_norm": 0.2361531773223951, + "learning_rate": 6.24473432213158e-05, + "loss": 0.5136, + "step": 4813 + }, + { + "epoch": 1.3473271760425414, + "grad_norm": 0.2287702209214274, + "learning_rate": 6.243240259998831e-05, + "loss": 0.5033, + "step": 4814 + }, + { + "epoch": 1.3476070528967254, + "grad_norm": 0.2356348596820923, + "learning_rate": 6.24174607953378e-05, + "loss": 0.5074, + "step": 4815 + }, + { + "epoch": 1.3478869297509095, + "grad_norm": 0.23262247919309248, + "learning_rate": 6.240251780878641e-05, + "loss": 0.5238, + "step": 4816 + }, + { + "epoch": 1.3481668066050938, + "grad_norm": 0.2173593030771436, + "learning_rate": 6.238757364175644e-05, + "loss": 0.4791, + "step": 4817 + }, + { + "epoch": 1.3484466834592779, + "grad_norm": 0.23331170999229453, + "learning_rate": 6.237262829567027e-05, + "loss": 0.4954, + "step": 4818 + }, + { + "epoch": 1.348726560313462, + "grad_norm": 0.23005691137462347, + "learning_rate": 6.235768177195042e-05, + "loss": 0.4864, + "step": 4819 + }, + { + "epoch": 1.3490064371676462, + "grad_norm": 0.2191333581429469, + "learning_rate": 6.23427340720195e-05, + "loss": 0.4958, + "step": 4820 + }, + { + "epoch": 1.3492863140218305, + "grad_norm": 0.22999061796024522, + "learning_rate": 6.232778519730023e-05, + "loss": 0.5156, + "step": 4821 + }, + { + "epoch": 1.3495661908760146, + "grad_norm": 0.2301668625919981, + "learning_rate": 6.231283514921547e-05, + "loss": 0.5048, + "step": 4822 + }, + { + "epoch": 1.3498460677301987, + "grad_norm": 0.22473564312347116, + "learning_rate": 6.229788392918819e-05, + "loss": 0.4871, + "step": 4823 + }, + { + "epoch": 1.350125944584383, + "grad_norm": 0.2259976525067703, + "learning_rate": 6.228293153864142e-05, + "loss": 0.502, + "step": 4824 + }, + { + "epoch": 1.350405821438567, + "grad_norm": 0.23636246438309857, + "learning_rate": 6.226797797899835e-05, + "loss": 0.5243, + "step": 4825 + }, + { + "epoch": 1.350685698292751, + "grad_norm": 0.21997183945333693, + "learning_rate": 6.225302325168227e-05, + "loss": 0.5145, + "step": 4826 + }, + { + "epoch": 1.3509655751469354, + "grad_norm": 0.22055693597634543, + "learning_rate": 6.223806735811657e-05, + "loss": 0.4995, + "step": 4827 + }, + { + "epoch": 1.3512454520011195, + "grad_norm": 0.22283424242343022, + "learning_rate": 6.222311029972477e-05, + "loss": 0.4863, + "step": 4828 + }, + { + "epoch": 1.3515253288553037, + "grad_norm": 0.2227311932162068, + "learning_rate": 6.22081520779305e-05, + "loss": 0.4889, + "step": 4829 + }, + { + "epoch": 1.3518052057094878, + "grad_norm": 0.21546280503278523, + "learning_rate": 6.219319269415747e-05, + "loss": 0.5107, + "step": 4830 + }, + { + "epoch": 1.352085082563672, + "grad_norm": 0.23060313961191806, + "learning_rate": 6.217823214982955e-05, + "loss": 0.5085, + "step": 4831 + }, + { + "epoch": 1.3523649594178562, + "grad_norm": 0.23194577730240898, + "learning_rate": 6.216327044637067e-05, + "loss": 0.488, + "step": 4832 + }, + { + "epoch": 1.3526448362720402, + "grad_norm": 0.23049515631372122, + "learning_rate": 6.214830758520491e-05, + "loss": 0.4915, + "step": 4833 + }, + { + "epoch": 1.3529247131262245, + "grad_norm": 0.22403765225918498, + "learning_rate": 6.213334356775642e-05, + "loss": 0.4963, + "step": 4834 + }, + { + "epoch": 1.3532045899804086, + "grad_norm": 0.22720845592377828, + "learning_rate": 6.21183783954495e-05, + "loss": 0.5111, + "step": 4835 + }, + { + "epoch": 1.3534844668345927, + "grad_norm": 0.22105383560152483, + "learning_rate": 6.210341206970855e-05, + "loss": 0.4922, + "step": 4836 + }, + { + "epoch": 1.353764343688777, + "grad_norm": 0.22698275434901025, + "learning_rate": 6.208844459195806e-05, + "loss": 0.4778, + "step": 4837 + }, + { + "epoch": 1.354044220542961, + "grad_norm": 0.2324373874536657, + "learning_rate": 6.207347596362265e-05, + "loss": 0.508, + "step": 4838 + }, + { + "epoch": 1.3543240973971453, + "grad_norm": 0.23648326306698714, + "learning_rate": 6.205850618612703e-05, + "loss": 0.5025, + "step": 4839 + }, + { + "epoch": 1.3546039742513294, + "grad_norm": 0.22326814690064614, + "learning_rate": 6.204353526089606e-05, + "loss": 0.4822, + "step": 4840 + }, + { + "epoch": 1.3548838511055137, + "grad_norm": 0.2183293592342329, + "learning_rate": 6.202856318935463e-05, + "loss": 0.4829, + "step": 4841 + }, + { + "epoch": 1.3551637279596978, + "grad_norm": 0.22675519081505494, + "learning_rate": 6.201358997292786e-05, + "loss": 0.5089, + "step": 4842 + }, + { + "epoch": 1.3554436048138818, + "grad_norm": 0.2284883687367303, + "learning_rate": 6.199861561304086e-05, + "loss": 0.4875, + "step": 4843 + }, + { + "epoch": 1.355723481668066, + "grad_norm": 0.2319921494654131, + "learning_rate": 6.19836401111189e-05, + "loss": 0.515, + "step": 4844 + }, + { + "epoch": 1.3560033585222502, + "grad_norm": 0.2315120662356948, + "learning_rate": 6.196866346858738e-05, + "loss": 0.4998, + "step": 4845 + }, + { + "epoch": 1.3562832353764342, + "grad_norm": 0.22493496369229218, + "learning_rate": 6.195368568687176e-05, + "loss": 0.5216, + "step": 4846 + }, + { + "epoch": 1.3565631122306185, + "grad_norm": 0.2268476164734343, + "learning_rate": 6.193870676739765e-05, + "loss": 0.4953, + "step": 4847 + }, + { + "epoch": 1.3568429890848026, + "grad_norm": 0.2299974124752463, + "learning_rate": 6.192372671159075e-05, + "loss": 0.5088, + "step": 4848 + }, + { + "epoch": 1.357122865938987, + "grad_norm": 0.23282050525171238, + "learning_rate": 6.190874552087686e-05, + "loss": 0.5126, + "step": 4849 + }, + { + "epoch": 1.357402742793171, + "grad_norm": 0.23778468068019334, + "learning_rate": 6.189376319668192e-05, + "loss": 0.5093, + "step": 4850 + }, + { + "epoch": 1.3576826196473553, + "grad_norm": 0.2324677545913148, + "learning_rate": 6.187877974043192e-05, + "loss": 0.5056, + "step": 4851 + }, + { + "epoch": 1.3579624965015393, + "grad_norm": 0.23153802153864753, + "learning_rate": 6.186379515355304e-05, + "loss": 0.5152, + "step": 4852 + }, + { + "epoch": 1.3582423733557234, + "grad_norm": 0.21973354509480217, + "learning_rate": 6.184880943747148e-05, + "loss": 0.502, + "step": 4853 + }, + { + "epoch": 1.3585222502099077, + "grad_norm": 0.2369756551082989, + "learning_rate": 6.183382259361362e-05, + "loss": 0.4752, + "step": 4854 + }, + { + "epoch": 1.3588021270640918, + "grad_norm": 0.23540156776186763, + "learning_rate": 6.181883462340588e-05, + "loss": 0.5003, + "step": 4855 + }, + { + "epoch": 1.3590820039182758, + "grad_norm": 0.2202761181726255, + "learning_rate": 6.180384552827487e-05, + "loss": 0.511, + "step": 4856 + }, + { + "epoch": 1.3593618807724601, + "grad_norm": 0.21997182463050327, + "learning_rate": 6.178885530964722e-05, + "loss": 0.5014, + "step": 4857 + }, + { + "epoch": 1.3596417576266444, + "grad_norm": 0.2307998778689992, + "learning_rate": 6.177386396894973e-05, + "loss": 0.5087, + "step": 4858 + }, + { + "epoch": 1.3599216344808285, + "grad_norm": 0.22173137184083913, + "learning_rate": 6.175887150760927e-05, + "loss": 0.4698, + "step": 4859 + }, + { + "epoch": 1.3602015113350125, + "grad_norm": 0.23534290672162, + "learning_rate": 6.174387792705286e-05, + "loss": 0.4982, + "step": 4860 + }, + { + "epoch": 1.3604813881891968, + "grad_norm": 0.2229138478331696, + "learning_rate": 6.172888322870754e-05, + "loss": 0.4962, + "step": 4861 + }, + { + "epoch": 1.360761265043381, + "grad_norm": 0.2274025469077541, + "learning_rate": 6.171388741400057e-05, + "loss": 0.4896, + "step": 4862 + }, + { + "epoch": 1.361041141897565, + "grad_norm": 0.22381917227607304, + "learning_rate": 6.169889048435924e-05, + "loss": 0.489, + "step": 4863 + }, + { + "epoch": 1.3613210187517493, + "grad_norm": 0.22810534932635093, + "learning_rate": 6.168389244121096e-05, + "loss": 0.5079, + "step": 4864 + }, + { + "epoch": 1.3616008956059333, + "grad_norm": 0.2128500277491175, + "learning_rate": 6.166889328598326e-05, + "loss": 0.5037, + "step": 4865 + }, + { + "epoch": 1.3618807724601176, + "grad_norm": 0.22321074914783084, + "learning_rate": 6.165389302010377e-05, + "loss": 0.5147, + "step": 4866 + }, + { + "epoch": 1.3621606493143017, + "grad_norm": 0.2401386710456637, + "learning_rate": 6.163889164500022e-05, + "loss": 0.5111, + "step": 4867 + }, + { + "epoch": 1.362440526168486, + "grad_norm": 0.2391316111387079, + "learning_rate": 6.162388916210044e-05, + "loss": 0.5158, + "step": 4868 + }, + { + "epoch": 1.36272040302267, + "grad_norm": 0.3199516716609756, + "learning_rate": 6.160888557283239e-05, + "loss": 0.5238, + "step": 4869 + }, + { + "epoch": 1.3630002798768541, + "grad_norm": 0.21661828950826603, + "learning_rate": 6.159388087862412e-05, + "loss": 0.4873, + "step": 4870 + }, + { + "epoch": 1.3632801567310384, + "grad_norm": 0.22277479866780228, + "learning_rate": 6.157887508090375e-05, + "loss": 0.4735, + "step": 4871 + }, + { + "epoch": 1.3635600335852225, + "grad_norm": 0.22983980627900288, + "learning_rate": 6.15638681810996e-05, + "loss": 0.5243, + "step": 4872 + }, + { + "epoch": 1.3638399104394066, + "grad_norm": 0.2265029377674558, + "learning_rate": 6.154886018063999e-05, + "loss": 0.5044, + "step": 4873 + }, + { + "epoch": 1.3641197872935908, + "grad_norm": 0.22217929053543983, + "learning_rate": 6.15338510809534e-05, + "loss": 0.4971, + "step": 4874 + }, + { + "epoch": 1.364399664147775, + "grad_norm": 0.22261889453846326, + "learning_rate": 6.151884088346842e-05, + "loss": 0.4827, + "step": 4875 + }, + { + "epoch": 1.3646795410019592, + "grad_norm": 0.21438655916459587, + "learning_rate": 6.15038295896137e-05, + "loss": 0.4868, + "step": 4876 + }, + { + "epoch": 1.3649594178561433, + "grad_norm": 0.22544405100383874, + "learning_rate": 6.148881720081806e-05, + "loss": 0.4943, + "step": 4877 + }, + { + "epoch": 1.3652392947103276, + "grad_norm": 0.22266497608271735, + "learning_rate": 6.147380371851036e-05, + "loss": 0.4814, + "step": 4878 + }, + { + "epoch": 1.3655191715645116, + "grad_norm": 0.223885701707682, + "learning_rate": 6.14587891441196e-05, + "loss": 0.5012, + "step": 4879 + }, + { + "epoch": 1.3657990484186957, + "grad_norm": 0.23269224202408606, + "learning_rate": 6.144377347907487e-05, + "loss": 0.5239, + "step": 4880 + }, + { + "epoch": 1.36607892527288, + "grad_norm": 0.22609485478475636, + "learning_rate": 6.14287567248054e-05, + "loss": 0.5014, + "step": 4881 + }, + { + "epoch": 1.366358802127064, + "grad_norm": 0.23157140715010618, + "learning_rate": 6.141373888274044e-05, + "loss": 0.5326, + "step": 4882 + }, + { + "epoch": 1.3666386789812481, + "grad_norm": 0.22651333469638466, + "learning_rate": 6.139871995430944e-05, + "loss": 0.5387, + "step": 4883 + }, + { + "epoch": 1.3669185558354324, + "grad_norm": 0.22906579691133122, + "learning_rate": 6.138369994094188e-05, + "loss": 0.4838, + "step": 4884 + }, + { + "epoch": 1.3671984326896165, + "grad_norm": 0.21872294356138144, + "learning_rate": 6.13686788440674e-05, + "loss": 0.501, + "step": 4885 + }, + { + "epoch": 1.3674783095438008, + "grad_norm": 0.2438449946521116, + "learning_rate": 6.13536566651157e-05, + "loss": 0.4875, + "step": 4886 + }, + { + "epoch": 1.3677581863979849, + "grad_norm": 0.2308407569999151, + "learning_rate": 6.13386334055166e-05, + "loss": 0.4991, + "step": 4887 + }, + { + "epoch": 1.3680380632521691, + "grad_norm": 0.22926425337022524, + "learning_rate": 6.132360906670002e-05, + "loss": 0.4856, + "step": 4888 + }, + { + "epoch": 1.3683179401063532, + "grad_norm": 0.2212095014222958, + "learning_rate": 6.1308583650096e-05, + "loss": 0.5108, + "step": 4889 + }, + { + "epoch": 1.3685978169605373, + "grad_norm": 0.22955795622145608, + "learning_rate": 6.129355715713468e-05, + "loss": 0.476, + "step": 4890 + }, + { + "epoch": 1.3688776938147216, + "grad_norm": 0.21778846604224314, + "learning_rate": 6.127852958924624e-05, + "loss": 0.4957, + "step": 4891 + }, + { + "epoch": 1.3691575706689056, + "grad_norm": 0.224328153450806, + "learning_rate": 6.126350094786105e-05, + "loss": 0.5033, + "step": 4892 + }, + { + "epoch": 1.3694374475230897, + "grad_norm": 0.22334866258573038, + "learning_rate": 6.124847123440953e-05, + "loss": 0.4965, + "step": 4893 + }, + { + "epoch": 1.369717324377274, + "grad_norm": 0.23176984056081099, + "learning_rate": 6.123344045032223e-05, + "loss": 0.4969, + "step": 4894 + }, + { + "epoch": 1.3699972012314583, + "grad_norm": 0.2186322897741303, + "learning_rate": 6.121840859702977e-05, + "loss": 0.4829, + "step": 4895 + }, + { + "epoch": 1.3702770780856424, + "grad_norm": 0.23066094336528212, + "learning_rate": 6.12033756759629e-05, + "loss": 0.4914, + "step": 4896 + }, + { + "epoch": 1.3705569549398264, + "grad_norm": 0.23044860403211298, + "learning_rate": 6.118834168855248e-05, + "loss": 0.482, + "step": 4897 + }, + { + "epoch": 1.3708368317940107, + "grad_norm": 0.223957109973142, + "learning_rate": 6.117330663622942e-05, + "loss": 0.4826, + "step": 4898 + }, + { + "epoch": 1.3711167086481948, + "grad_norm": 0.21772359136514263, + "learning_rate": 6.115827052042479e-05, + "loss": 0.489, + "step": 4899 + }, + { + "epoch": 1.3713965855023789, + "grad_norm": 0.22640568563848076, + "learning_rate": 6.114323334256972e-05, + "loss": 0.4999, + "step": 4900 + }, + { + "epoch": 1.3716764623565632, + "grad_norm": 0.21726592509754597, + "learning_rate": 6.112819510409547e-05, + "loss": 0.4839, + "step": 4901 + }, + { + "epoch": 1.3719563392107472, + "grad_norm": 0.23134599121488977, + "learning_rate": 6.111315580643337e-05, + "loss": 0.5139, + "step": 4902 + }, + { + "epoch": 1.3722362160649315, + "grad_norm": 0.224669563980622, + "learning_rate": 6.109811545101488e-05, + "loss": 0.5005, + "step": 4903 + }, + { + "epoch": 1.3725160929191156, + "grad_norm": 0.2360258757364829, + "learning_rate": 6.108307403927156e-05, + "loss": 0.4819, + "step": 4904 + }, + { + "epoch": 1.3727959697732999, + "grad_norm": 0.49855816430372935, + "learning_rate": 6.106803157263504e-05, + "loss": 0.5051, + "step": 4905 + }, + { + "epoch": 1.373075846627484, + "grad_norm": 0.21973509941982283, + "learning_rate": 6.105298805253708e-05, + "loss": 0.5112, + "step": 4906 + }, + { + "epoch": 1.373355723481668, + "grad_norm": 0.22033958319892305, + "learning_rate": 6.103794348040954e-05, + "loss": 0.4948, + "step": 4907 + }, + { + "epoch": 1.3736356003358523, + "grad_norm": 0.2249123039487554, + "learning_rate": 6.102289785768436e-05, + "loss": 0.493, + "step": 4908 + }, + { + "epoch": 1.3739154771900364, + "grad_norm": 0.217968458008853, + "learning_rate": 6.1007851185793575e-05, + "loss": 0.4968, + "step": 4909 + }, + { + "epoch": 1.3741953540442204, + "grad_norm": 0.2332396971806336, + "learning_rate": 6.099280346616938e-05, + "loss": 0.5023, + "step": 4910 + }, + { + "epoch": 1.3744752308984047, + "grad_norm": 0.22387058472821672, + "learning_rate": 6.0977754700243986e-05, + "loss": 0.5086, + "step": 4911 + }, + { + "epoch": 1.3747551077525888, + "grad_norm": 0.2270411436043115, + "learning_rate": 6.096270488944975e-05, + "loss": 0.4935, + "step": 4912 + }, + { + "epoch": 1.375034984606773, + "grad_norm": 0.22162581729918035, + "learning_rate": 6.0947654035219135e-05, + "loss": 0.4822, + "step": 4913 + }, + { + "epoch": 1.3753148614609572, + "grad_norm": 0.2235969551155392, + "learning_rate": 6.0932602138984675e-05, + "loss": 0.4709, + "step": 4914 + }, + { + "epoch": 1.3755947383151415, + "grad_norm": 0.24945799445412362, + "learning_rate": 6.091754920217903e-05, + "loss": 0.5103, + "step": 4915 + }, + { + "epoch": 1.3758746151693255, + "grad_norm": 0.2531319547808986, + "learning_rate": 6.090249522623493e-05, + "loss": 0.5231, + "step": 4916 + }, + { + "epoch": 1.3761544920235096, + "grad_norm": 0.21215161998682955, + "learning_rate": 6.0887440212585244e-05, + "loss": 0.4839, + "step": 4917 + }, + { + "epoch": 1.3764343688776939, + "grad_norm": 0.22563609490813957, + "learning_rate": 6.0872384162662886e-05, + "loss": 0.5046, + "step": 4918 + }, + { + "epoch": 1.376714245731878, + "grad_norm": 0.22422489178315794, + "learning_rate": 6.085732707790095e-05, + "loss": 0.4959, + "step": 4919 + }, + { + "epoch": 1.376994122586062, + "grad_norm": 0.22083064508632638, + "learning_rate": 6.0842268959732515e-05, + "loss": 0.482, + "step": 4920 + }, + { + "epoch": 1.3772739994402463, + "grad_norm": 0.22922582638465872, + "learning_rate": 6.082720980959087e-05, + "loss": 0.494, + "step": 4921 + }, + { + "epoch": 1.3775538762944304, + "grad_norm": 0.2251895411465758, + "learning_rate": 6.081214962890933e-05, + "loss": 0.4815, + "step": 4922 + }, + { + "epoch": 1.3778337531486147, + "grad_norm": 0.21360756897421826, + "learning_rate": 6.079708841912133e-05, + "loss": 0.508, + "step": 4923 + }, + { + "epoch": 1.3781136300027987, + "grad_norm": 0.22467858319617595, + "learning_rate": 6.0782026181660425e-05, + "loss": 0.507, + "step": 4924 + }, + { + "epoch": 1.378393506856983, + "grad_norm": 0.22496655839864346, + "learning_rate": 6.0766962917960226e-05, + "loss": 0.5045, + "step": 4925 + }, + { + "epoch": 1.378673383711167, + "grad_norm": 0.2202228953655769, + "learning_rate": 6.075189862945446e-05, + "loss": 0.5023, + "step": 4926 + }, + { + "epoch": 1.3789532605653512, + "grad_norm": 0.2359178000522644, + "learning_rate": 6.073683331757696e-05, + "loss": 0.5114, + "step": 4927 + }, + { + "epoch": 1.3792331374195355, + "grad_norm": 0.21847078746225165, + "learning_rate": 6.072176698376168e-05, + "loss": 0.5042, + "step": 4928 + }, + { + "epoch": 1.3795130142737195, + "grad_norm": 0.22474509016118469, + "learning_rate": 6.070669962944261e-05, + "loss": 0.4797, + "step": 4929 + }, + { + "epoch": 1.3797928911279036, + "grad_norm": 0.21644842941771736, + "learning_rate": 6.069163125605387e-05, + "loss": 0.4917, + "step": 4930 + }, + { + "epoch": 1.380072767982088, + "grad_norm": 0.2220378729056806, + "learning_rate": 6.067656186502968e-05, + "loss": 0.4936, + "step": 4931 + }, + { + "epoch": 1.380352644836272, + "grad_norm": 0.22442868456390033, + "learning_rate": 6.0661491457804365e-05, + "loss": 0.5211, + "step": 4932 + }, + { + "epoch": 1.3806325216904562, + "grad_norm": 0.2341319335577658, + "learning_rate": 6.064642003581231e-05, + "loss": 0.4931, + "step": 4933 + }, + { + "epoch": 1.3809123985446403, + "grad_norm": 0.22816881579358053, + "learning_rate": 6.0631347600488055e-05, + "loss": 0.507, + "step": 4934 + }, + { + "epoch": 1.3811922753988246, + "grad_norm": 0.21892281487675827, + "learning_rate": 6.061627415326616e-05, + "loss": 0.4975, + "step": 4935 + }, + { + "epoch": 1.3814721522530087, + "grad_norm": 0.22137019103946362, + "learning_rate": 6.0601199695581365e-05, + "loss": 0.5011, + "step": 4936 + }, + { + "epoch": 1.3817520291071927, + "grad_norm": 0.21767486976382813, + "learning_rate": 6.058612422886847e-05, + "loss": 0.5143, + "step": 4937 + }, + { + "epoch": 1.382031905961377, + "grad_norm": 0.2219460822095807, + "learning_rate": 6.057104775456233e-05, + "loss": 0.4911, + "step": 4938 + }, + { + "epoch": 1.382311782815561, + "grad_norm": 0.22738339474408734, + "learning_rate": 6.055597027409794e-05, + "loss": 0.5077, + "step": 4939 + }, + { + "epoch": 1.3825916596697452, + "grad_norm": 0.22200654926719496, + "learning_rate": 6.054089178891039e-05, + "loss": 0.4902, + "step": 4940 + }, + { + "epoch": 1.3828715365239295, + "grad_norm": 0.22430847631014658, + "learning_rate": 6.052581230043487e-05, + "loss": 0.4761, + "step": 4941 + }, + { + "epoch": 1.3831514133781138, + "grad_norm": 0.2198314918943528, + "learning_rate": 6.0510731810106635e-05, + "loss": 0.5191, + "step": 4942 + }, + { + "epoch": 1.3834312902322978, + "grad_norm": 0.23671993116774223, + "learning_rate": 6.049565031936106e-05, + "loss": 0.4956, + "step": 4943 + }, + { + "epoch": 1.383711167086482, + "grad_norm": 0.2217175544356953, + "learning_rate": 6.048056782963362e-05, + "loss": 0.4867, + "step": 4944 + }, + { + "epoch": 1.3839910439406662, + "grad_norm": 0.23732346326989096, + "learning_rate": 6.046548434235986e-05, + "loss": 0.5274, + "step": 4945 + }, + { + "epoch": 1.3842709207948503, + "grad_norm": 0.22172463375245813, + "learning_rate": 6.045039985897546e-05, + "loss": 0.4908, + "step": 4946 + }, + { + "epoch": 1.3845507976490343, + "grad_norm": 0.22860434100991603, + "learning_rate": 6.043531438091614e-05, + "loss": 0.5121, + "step": 4947 + }, + { + "epoch": 1.3848306745032186, + "grad_norm": 0.23924233802678888, + "learning_rate": 6.0420227909617754e-05, + "loss": 0.5166, + "step": 4948 + }, + { + "epoch": 1.3851105513574027, + "grad_norm": 0.22391349971425006, + "learning_rate": 6.040514044651625e-05, + "loss": 0.5166, + "step": 4949 + }, + { + "epoch": 1.385390428211587, + "grad_norm": 0.21978485436046194, + "learning_rate": 6.039005199304765e-05, + "loss": 0.506, + "step": 4950 + }, + { + "epoch": 1.385670305065771, + "grad_norm": 0.23175563724704498, + "learning_rate": 6.037496255064808e-05, + "loss": 0.5066, + "step": 4951 + }, + { + "epoch": 1.3859501819199553, + "grad_norm": 0.23372162575135808, + "learning_rate": 6.035987212075378e-05, + "loss": 0.4748, + "step": 4952 + }, + { + "epoch": 1.3862300587741394, + "grad_norm": 0.2352190843452797, + "learning_rate": 6.034478070480104e-05, + "loss": 0.5189, + "step": 4953 + }, + { + "epoch": 1.3865099356283235, + "grad_norm": 0.22783618571883854, + "learning_rate": 6.032968830422628e-05, + "loss": 0.5101, + "step": 4954 + }, + { + "epoch": 1.3867898124825078, + "grad_norm": 0.22408681222800939, + "learning_rate": 6.0314594920466025e-05, + "loss": 0.4963, + "step": 4955 + }, + { + "epoch": 1.3870696893366918, + "grad_norm": 0.23597349527187633, + "learning_rate": 6.0299500554956845e-05, + "loss": 0.513, + "step": 4956 + }, + { + "epoch": 1.387349566190876, + "grad_norm": 0.2447697911650183, + "learning_rate": 6.028440520913544e-05, + "loss": 0.5048, + "step": 4957 + }, + { + "epoch": 1.3876294430450602, + "grad_norm": 0.2227897136060564, + "learning_rate": 6.026930888443858e-05, + "loss": 0.4852, + "step": 4958 + }, + { + "epoch": 1.3879093198992443, + "grad_norm": 0.2322352415827976, + "learning_rate": 6.0254211582303154e-05, + "loss": 0.5065, + "step": 4959 + }, + { + "epoch": 1.3881891967534286, + "grad_norm": 0.21090238770420058, + "learning_rate": 6.0239113304166136e-05, + "loss": 0.4699, + "step": 4960 + }, + { + "epoch": 1.3884690736076126, + "grad_norm": 0.2226884079788672, + "learning_rate": 6.022401405146457e-05, + "loss": 0.4933, + "step": 4961 + }, + { + "epoch": 1.388748950461797, + "grad_norm": 0.23210330842943613, + "learning_rate": 6.0208913825635636e-05, + "loss": 0.4949, + "step": 4962 + }, + { + "epoch": 1.389028827315981, + "grad_norm": 0.21929677966562106, + "learning_rate": 6.019381262811656e-05, + "loss": 0.5, + "step": 4963 + }, + { + "epoch": 1.389308704170165, + "grad_norm": 0.2301597174744963, + "learning_rate": 6.017871046034471e-05, + "loss": 0.5067, + "step": 4964 + }, + { + "epoch": 1.3895885810243493, + "grad_norm": 0.22117009999570827, + "learning_rate": 6.01636073237575e-05, + "loss": 0.5129, + "step": 4965 + }, + { + "epoch": 1.3898684578785334, + "grad_norm": 0.21357227768456047, + "learning_rate": 6.014850321979244e-05, + "loss": 0.4745, + "step": 4966 + }, + { + "epoch": 1.3901483347327175, + "grad_norm": 0.2301716139823612, + "learning_rate": 6.013339814988718e-05, + "loss": 0.5154, + "step": 4967 + }, + { + "epoch": 1.3904282115869018, + "grad_norm": 0.22082139725228092, + "learning_rate": 6.0118292115479405e-05, + "loss": 0.4911, + "step": 4968 + }, + { + "epoch": 1.3907080884410858, + "grad_norm": 0.22858146113004257, + "learning_rate": 6.010318511800691e-05, + "loss": 0.489, + "step": 4969 + }, + { + "epoch": 1.3909879652952701, + "grad_norm": 0.21903256391982898, + "learning_rate": 6.0088077158907605e-05, + "loss": 0.4786, + "step": 4970 + }, + { + "epoch": 1.3912678421494542, + "grad_norm": 0.22212827669015747, + "learning_rate": 6.007296823961947e-05, + "loss": 0.4959, + "step": 4971 + }, + { + "epoch": 1.3915477190036385, + "grad_norm": 0.22735127002345915, + "learning_rate": 6.005785836158059e-05, + "loss": 0.5067, + "step": 4972 + }, + { + "epoch": 1.3918275958578226, + "grad_norm": 0.22553672459643198, + "learning_rate": 6.004274752622913e-05, + "loss": 0.5012, + "step": 4973 + }, + { + "epoch": 1.3921074727120066, + "grad_norm": 0.2315360169513721, + "learning_rate": 6.0027635735003316e-05, + "loss": 0.5126, + "step": 4974 + }, + { + "epoch": 1.392387349566191, + "grad_norm": 0.224431775744776, + "learning_rate": 6.001252298934154e-05, + "loss": 0.4965, + "step": 4975 + }, + { + "epoch": 1.392667226420375, + "grad_norm": 0.2181392716101542, + "learning_rate": 5.9997409290682206e-05, + "loss": 0.4878, + "step": 4976 + }, + { + "epoch": 1.392947103274559, + "grad_norm": 0.23699397041647086, + "learning_rate": 5.9982294640463865e-05, + "loss": 0.5045, + "step": 4977 + }, + { + "epoch": 1.3932269801287434, + "grad_norm": 0.22519207347435027, + "learning_rate": 5.996717904012514e-05, + "loss": 0.4924, + "step": 4978 + }, + { + "epoch": 1.3935068569829276, + "grad_norm": 0.23364550992987365, + "learning_rate": 5.995206249110472e-05, + "loss": 0.5084, + "step": 4979 + }, + { + "epoch": 1.3937867338371117, + "grad_norm": 0.2128467269144305, + "learning_rate": 5.993694499484143e-05, + "loss": 0.4656, + "step": 4980 + }, + { + "epoch": 1.3940666106912958, + "grad_norm": 0.2186018293630736, + "learning_rate": 5.992182655277415e-05, + "loss": 0.4947, + "step": 4981 + }, + { + "epoch": 1.39434648754548, + "grad_norm": 0.2105537455267023, + "learning_rate": 5.990670716634188e-05, + "loss": 0.4693, + "step": 4982 + }, + { + "epoch": 1.3946263643996641, + "grad_norm": 0.22416727318435012, + "learning_rate": 5.989158683698366e-05, + "loss": 0.5024, + "step": 4983 + }, + { + "epoch": 1.3949062412538482, + "grad_norm": 0.2234911347900639, + "learning_rate": 5.987646556613868e-05, + "loss": 0.4814, + "step": 4984 + }, + { + "epoch": 1.3951861181080325, + "grad_norm": 0.22608029505877594, + "learning_rate": 5.986134335524617e-05, + "loss": 0.4851, + "step": 4985 + }, + { + "epoch": 1.3954659949622166, + "grad_norm": 0.21696160680906468, + "learning_rate": 5.9846220205745486e-05, + "loss": 0.4886, + "step": 4986 + }, + { + "epoch": 1.3957458718164009, + "grad_norm": 0.22680582852428668, + "learning_rate": 5.9831096119076034e-05, + "loss": 0.486, + "step": 4987 + }, + { + "epoch": 1.396025748670585, + "grad_norm": 0.2123468287750178, + "learning_rate": 5.981597109667737e-05, + "loss": 0.5121, + "step": 4988 + }, + { + "epoch": 1.3963056255247692, + "grad_norm": 0.2379904396157953, + "learning_rate": 5.980084513998908e-05, + "loss": 0.5206, + "step": 4989 + }, + { + "epoch": 1.3965855023789533, + "grad_norm": 0.22597013044968492, + "learning_rate": 5.978571825045086e-05, + "loss": 0.492, + "step": 4990 + }, + { + "epoch": 1.3968653792331374, + "grad_norm": 0.23010187260971798, + "learning_rate": 5.9770590429502516e-05, + "loss": 0.5024, + "step": 4991 + }, + { + "epoch": 1.3971452560873217, + "grad_norm": 0.21997489110166854, + "learning_rate": 5.975546167858389e-05, + "loss": 0.4809, + "step": 4992 + }, + { + "epoch": 1.3974251329415057, + "grad_norm": 0.21762511882155733, + "learning_rate": 5.974033199913496e-05, + "loss": 0.5147, + "step": 4993 + }, + { + "epoch": 1.3977050097956898, + "grad_norm": 0.2124238924762238, + "learning_rate": 5.9725201392595785e-05, + "loss": 0.4521, + "step": 4994 + }, + { + "epoch": 1.397984886649874, + "grad_norm": 0.22673216640657248, + "learning_rate": 5.971006986040649e-05, + "loss": 0.4995, + "step": 4995 + }, + { + "epoch": 1.3982647635040582, + "grad_norm": 0.21788438437220065, + "learning_rate": 5.9694937404007304e-05, + "loss": 0.506, + "step": 4996 + }, + { + "epoch": 1.3985446403582424, + "grad_norm": 0.22551416929690996, + "learning_rate": 5.967980402483856e-05, + "loss": 0.5019, + "step": 4997 + }, + { + "epoch": 1.3988245172124265, + "grad_norm": 0.28356064717799917, + "learning_rate": 5.966466972434065e-05, + "loss": 0.515, + "step": 4998 + }, + { + "epoch": 1.3991043940666108, + "grad_norm": 0.2473033755647614, + "learning_rate": 5.9649534503954075e-05, + "loss": 0.5146, + "step": 4999 + }, + { + "epoch": 1.3993842709207949, + "grad_norm": 0.23527890407280222, + "learning_rate": 5.96343983651194e-05, + "loss": 0.5275, + "step": 5000 + }, + { + "epoch": 1.399664147774979, + "grad_norm": 0.23767422093574717, + "learning_rate": 5.9619261309277296e-05, + "loss": 0.4832, + "step": 5001 + }, + { + "epoch": 1.3999440246291632, + "grad_norm": 0.2196809284577659, + "learning_rate": 5.960412333786851e-05, + "loss": 0.4906, + "step": 5002 + }, + { + "epoch": 1.4002239014833473, + "grad_norm": 0.22609996239196942, + "learning_rate": 5.9588984452333894e-05, + "loss": 0.5003, + "step": 5003 + }, + { + "epoch": 1.4005037783375314, + "grad_norm": 0.22196006951410982, + "learning_rate": 5.9573844654114366e-05, + "loss": 0.511, + "step": 5004 + }, + { + "epoch": 1.4007836551917157, + "grad_norm": 0.2299174173773503, + "learning_rate": 5.955870394465094e-05, + "loss": 0.4924, + "step": 5005 + }, + { + "epoch": 1.4010635320458997, + "grad_norm": 0.223593199641648, + "learning_rate": 5.954356232538474e-05, + "loss": 0.493, + "step": 5006 + }, + { + "epoch": 1.401343408900084, + "grad_norm": 0.2234269638659027, + "learning_rate": 5.952841979775692e-05, + "loss": 0.5018, + "step": 5007 + }, + { + "epoch": 1.401623285754268, + "grad_norm": 0.23371931602661175, + "learning_rate": 5.9513276363208784e-05, + "loss": 0.5214, + "step": 5008 + }, + { + "epoch": 1.4019031626084524, + "grad_norm": 0.22229756297983255, + "learning_rate": 5.9498132023181665e-05, + "loss": 0.4972, + "step": 5009 + }, + { + "epoch": 1.4021830394626364, + "grad_norm": 0.21886655329394156, + "learning_rate": 5.948298677911705e-05, + "loss": 0.486, + "step": 5010 + }, + { + "epoch": 1.4024629163168205, + "grad_norm": 0.22710976531252083, + "learning_rate": 5.946784063245642e-05, + "loss": 0.4989, + "step": 5011 + }, + { + "epoch": 1.4027427931710048, + "grad_norm": 0.23140190148043827, + "learning_rate": 5.945269358464143e-05, + "loss": 0.5041, + "step": 5012 + }, + { + "epoch": 1.4030226700251889, + "grad_norm": 0.21961096827576404, + "learning_rate": 5.943754563711376e-05, + "loss": 0.4905, + "step": 5013 + }, + { + "epoch": 1.403302546879373, + "grad_norm": 0.22983839026146266, + "learning_rate": 5.942239679131521e-05, + "loss": 0.496, + "step": 5014 + }, + { + "epoch": 1.4035824237335572, + "grad_norm": 0.22969809349749876, + "learning_rate": 5.9407247048687675e-05, + "loss": 0.5194, + "step": 5015 + }, + { + "epoch": 1.4038623005877415, + "grad_norm": 0.22027628783970996, + "learning_rate": 5.93920964106731e-05, + "loss": 0.4902, + "step": 5016 + }, + { + "epoch": 1.4041421774419256, + "grad_norm": 0.2214674881750312, + "learning_rate": 5.9376944878713536e-05, + "loss": 0.4973, + "step": 5017 + }, + { + "epoch": 1.4044220542961097, + "grad_norm": 0.21576432232474238, + "learning_rate": 5.9361792454251094e-05, + "loss": 0.4857, + "step": 5018 + }, + { + "epoch": 1.404701931150294, + "grad_norm": 0.22104374925337936, + "learning_rate": 5.934663913872803e-05, + "loss": 0.484, + "step": 5019 + }, + { + "epoch": 1.404981808004478, + "grad_norm": 0.22502827769670583, + "learning_rate": 5.933148493358661e-05, + "loss": 0.5049, + "step": 5020 + }, + { + "epoch": 1.405261684858662, + "grad_norm": 0.23289360228746306, + "learning_rate": 5.9316329840269226e-05, + "loss": 0.4844, + "step": 5021 + }, + { + "epoch": 1.4055415617128464, + "grad_norm": 0.21772145567277384, + "learning_rate": 5.930117386021835e-05, + "loss": 0.5249, + "step": 5022 + }, + { + "epoch": 1.4058214385670305, + "grad_norm": 0.2177600665874925, + "learning_rate": 5.9286016994876536e-05, + "loss": 0.4923, + "step": 5023 + }, + { + "epoch": 1.4061013154212147, + "grad_norm": 0.23106005486503775, + "learning_rate": 5.9270859245686436e-05, + "loss": 0.5098, + "step": 5024 + }, + { + "epoch": 1.4063811922753988, + "grad_norm": 0.21462041939366736, + "learning_rate": 5.925570061409077e-05, + "loss": 0.4775, + "step": 5025 + }, + { + "epoch": 1.406661069129583, + "grad_norm": 0.22339497204583186, + "learning_rate": 5.924054110153233e-05, + "loss": 0.507, + "step": 5026 + }, + { + "epoch": 1.4069409459837672, + "grad_norm": 0.22864504868173752, + "learning_rate": 5.922538070945402e-05, + "loss": 0.481, + "step": 5027 + }, + { + "epoch": 1.4072208228379512, + "grad_norm": 0.22863576947528313, + "learning_rate": 5.921021943929882e-05, + "loss": 0.4889, + "step": 5028 + }, + { + "epoch": 1.4075006996921355, + "grad_norm": 0.23413466753896148, + "learning_rate": 5.919505729250977e-05, + "loss": 0.5141, + "step": 5029 + }, + { + "epoch": 1.4077805765463196, + "grad_norm": 0.2209986247340032, + "learning_rate": 5.917989427053e-05, + "loss": 0.4662, + "step": 5030 + }, + { + "epoch": 1.4080604534005037, + "grad_norm": 0.23618778371542407, + "learning_rate": 5.916473037480278e-05, + "loss": 0.5019, + "step": 5031 + }, + { + "epoch": 1.408340330254688, + "grad_norm": 0.22161486533350255, + "learning_rate": 5.9149565606771386e-05, + "loss": 0.491, + "step": 5032 + }, + { + "epoch": 1.408620207108872, + "grad_norm": 0.23550043411825708, + "learning_rate": 5.913439996787922e-05, + "loss": 0.5119, + "step": 5033 + }, + { + "epoch": 1.4089000839630563, + "grad_norm": 0.2290820612203507, + "learning_rate": 5.9119233459569745e-05, + "loss": 0.4965, + "step": 5034 + }, + { + "epoch": 1.4091799608172404, + "grad_norm": 0.2273188319385308, + "learning_rate": 5.910406608328652e-05, + "loss": 0.4791, + "step": 5035 + }, + { + "epoch": 1.4094598376714247, + "grad_norm": 0.2315924812143491, + "learning_rate": 5.90888978404732e-05, + "loss": 0.4968, + "step": 5036 + }, + { + "epoch": 1.4097397145256088, + "grad_norm": 0.22312571758129626, + "learning_rate": 5.907372873257351e-05, + "loss": 0.5036, + "step": 5037 + }, + { + "epoch": 1.4100195913797928, + "grad_norm": 0.23115034375508464, + "learning_rate": 5.905855876103122e-05, + "loss": 0.4971, + "step": 5038 + }, + { + "epoch": 1.4102994682339771, + "grad_norm": 0.23239591964968542, + "learning_rate": 5.9043387927290236e-05, + "loss": 0.4688, + "step": 5039 + }, + { + "epoch": 1.4105793450881612, + "grad_norm": 0.23394890424832476, + "learning_rate": 5.902821623279453e-05, + "loss": 0.4916, + "step": 5040 + }, + { + "epoch": 1.4108592219423453, + "grad_norm": 0.22836562675520436, + "learning_rate": 5.901304367898815e-05, + "loss": 0.4986, + "step": 5041 + }, + { + "epoch": 1.4111390987965295, + "grad_norm": 0.2570095442819792, + "learning_rate": 5.8997870267315234e-05, + "loss": 0.5019, + "step": 5042 + }, + { + "epoch": 1.4114189756507136, + "grad_norm": 0.22527101983838463, + "learning_rate": 5.898269599921998e-05, + "loss": 0.4874, + "step": 5043 + }, + { + "epoch": 1.411698852504898, + "grad_norm": 0.2286594264095889, + "learning_rate": 5.896752087614671e-05, + "loss": 0.5181, + "step": 5044 + }, + { + "epoch": 1.411978729359082, + "grad_norm": 0.22783639873794284, + "learning_rate": 5.895234489953977e-05, + "loss": 0.4997, + "step": 5045 + }, + { + "epoch": 1.4122586062132663, + "grad_norm": 0.22781834355265154, + "learning_rate": 5.893716807084364e-05, + "loss": 0.4912, + "step": 5046 + }, + { + "epoch": 1.4125384830674503, + "grad_norm": 0.23013603717321154, + "learning_rate": 5.8921990391502845e-05, + "loss": 0.5124, + "step": 5047 + }, + { + "epoch": 1.4128183599216344, + "grad_norm": 0.2254745207543635, + "learning_rate": 5.8906811862962005e-05, + "loss": 0.5052, + "step": 5048 + }, + { + "epoch": 1.4130982367758187, + "grad_norm": 0.22267036138148985, + "learning_rate": 5.889163248666583e-05, + "loss": 0.505, + "step": 5049 + }, + { + "epoch": 1.4133781136300028, + "grad_norm": 0.2241121222711816, + "learning_rate": 5.88764522640591e-05, + "loss": 0.5208, + "step": 5050 + }, + { + "epoch": 1.4136579904841868, + "grad_norm": 0.2221941014441487, + "learning_rate": 5.8861271196586676e-05, + "loss": 0.4722, + "step": 5051 + }, + { + "epoch": 1.4139378673383711, + "grad_norm": 0.2280223143118141, + "learning_rate": 5.8846089285693495e-05, + "loss": 0.5033, + "step": 5052 + }, + { + "epoch": 1.4142177441925552, + "grad_norm": 0.2253822856763662, + "learning_rate": 5.8830906532824594e-05, + "loss": 0.5158, + "step": 5053 + }, + { + "epoch": 1.4144976210467395, + "grad_norm": 0.2191644446815118, + "learning_rate": 5.881572293942506e-05, + "loss": 0.4695, + "step": 5054 + }, + { + "epoch": 1.4147774979009236, + "grad_norm": 0.21817385434715142, + "learning_rate": 5.8800538506940096e-05, + "loss": 0.4846, + "step": 5055 + }, + { + "epoch": 1.4150573747551078, + "grad_norm": 0.2238080059259505, + "learning_rate": 5.878535323681492e-05, + "loss": 0.4902, + "step": 5056 + }, + { + "epoch": 1.415337251609292, + "grad_norm": 0.2217783404490955, + "learning_rate": 5.8770167130494924e-05, + "loss": 0.5072, + "step": 5057 + }, + { + "epoch": 1.415617128463476, + "grad_norm": 0.21282959711668764, + "learning_rate": 5.875498018942551e-05, + "loss": 0.4763, + "step": 5058 + }, + { + "epoch": 1.4158970053176603, + "grad_norm": 0.22241242725168442, + "learning_rate": 5.873979241505218e-05, + "loss": 0.4931, + "step": 5059 + }, + { + "epoch": 1.4161768821718443, + "grad_norm": 0.22465901428241183, + "learning_rate": 5.8724603808820525e-05, + "loss": 0.4983, + "step": 5060 + }, + { + "epoch": 1.4164567590260284, + "grad_norm": 0.23378838641919514, + "learning_rate": 5.870941437217618e-05, + "loss": 0.53, + "step": 5061 + }, + { + "epoch": 1.4167366358802127, + "grad_norm": 0.2206834054188844, + "learning_rate": 5.869422410656492e-05, + "loss": 0.4919, + "step": 5062 + }, + { + "epoch": 1.417016512734397, + "grad_norm": 0.21967802320622712, + "learning_rate": 5.867903301343253e-05, + "loss": 0.4706, + "step": 5063 + }, + { + "epoch": 1.417296389588581, + "grad_norm": 0.23081947704695172, + "learning_rate": 5.866384109422494e-05, + "loss": 0.487, + "step": 5064 + }, + { + "epoch": 1.4175762664427651, + "grad_norm": 0.22330659531424463, + "learning_rate": 5.864864835038807e-05, + "loss": 0.5036, + "step": 5065 + }, + { + "epoch": 1.4178561432969494, + "grad_norm": 0.21905444711381003, + "learning_rate": 5.8633454783368034e-05, + "loss": 0.5101, + "step": 5066 + }, + { + "epoch": 1.4181360201511335, + "grad_norm": 0.22362906053773424, + "learning_rate": 5.8618260394610923e-05, + "loss": 0.5005, + "step": 5067 + }, + { + "epoch": 1.4184158970053176, + "grad_norm": 0.22619063934025008, + "learning_rate": 5.8603065185562976e-05, + "loss": 0.5098, + "step": 5068 + }, + { + "epoch": 1.4186957738595019, + "grad_norm": 0.22980843456710603, + "learning_rate": 5.858786915767047e-05, + "loss": 0.4836, + "step": 5069 + }, + { + "epoch": 1.418975650713686, + "grad_norm": 0.22520190570306362, + "learning_rate": 5.8572672312379773e-05, + "loss": 0.4856, + "step": 5070 + }, + { + "epoch": 1.4192555275678702, + "grad_norm": 0.23043142938009362, + "learning_rate": 5.855747465113731e-05, + "loss": 0.5054, + "step": 5071 + }, + { + "epoch": 1.4195354044220543, + "grad_norm": 0.22230338189102747, + "learning_rate": 5.8542276175389635e-05, + "loss": 0.5231, + "step": 5072 + }, + { + "epoch": 1.4198152812762386, + "grad_norm": 0.23568286790250229, + "learning_rate": 5.852707688658334e-05, + "loss": 0.5045, + "step": 5073 + }, + { + "epoch": 1.4200951581304226, + "grad_norm": 0.22574382109006813, + "learning_rate": 5.851187678616508e-05, + "loss": 0.5107, + "step": 5074 + }, + { + "epoch": 1.4203750349846067, + "grad_norm": 0.23709819701337603, + "learning_rate": 5.849667587558162e-05, + "loss": 0.4989, + "step": 5075 + }, + { + "epoch": 1.420654911838791, + "grad_norm": 0.2286566361517191, + "learning_rate": 5.84814741562798e-05, + "loss": 0.5202, + "step": 5076 + }, + { + "epoch": 1.420934788692975, + "grad_norm": 0.2204586319662191, + "learning_rate": 5.8466271629706526e-05, + "loss": 0.5218, + "step": 5077 + }, + { + "epoch": 1.4212146655471591, + "grad_norm": 0.2149152439134319, + "learning_rate": 5.8451068297308774e-05, + "loss": 0.4962, + "step": 5078 + }, + { + "epoch": 1.4214945424013434, + "grad_norm": 0.22085380067325006, + "learning_rate": 5.843586416053362e-05, + "loss": 0.4947, + "step": 5079 + }, + { + "epoch": 1.4217744192555275, + "grad_norm": 0.2174060073356206, + "learning_rate": 5.842065922082818e-05, + "loss": 0.4787, + "step": 5080 + }, + { + "epoch": 1.4220542961097118, + "grad_norm": 0.2255742449065406, + "learning_rate": 5.8405453479639684e-05, + "loss": 0.4813, + "step": 5081 + }, + { + "epoch": 1.4223341729638959, + "grad_norm": 0.2227538236941638, + "learning_rate": 5.839024693841543e-05, + "loss": 0.4914, + "step": 5082 + }, + { + "epoch": 1.4226140498180802, + "grad_norm": 0.2082428477566117, + "learning_rate": 5.8375039598602774e-05, + "loss": 0.5206, + "step": 5083 + }, + { + "epoch": 1.4228939266722642, + "grad_norm": 0.22473516709512023, + "learning_rate": 5.835983146164916e-05, + "loss": 0.4884, + "step": 5084 + }, + { + "epoch": 1.4231738035264483, + "grad_norm": 0.23382157834934864, + "learning_rate": 5.8344622529002105e-05, + "loss": 0.4924, + "step": 5085 + }, + { + "epoch": 1.4234536803806326, + "grad_norm": 0.2308118760235207, + "learning_rate": 5.8329412802109203e-05, + "loss": 0.4941, + "step": 5086 + }, + { + "epoch": 1.4237335572348166, + "grad_norm": 0.2344467641014014, + "learning_rate": 5.831420228241814e-05, + "loss": 0.5103, + "step": 5087 + }, + { + "epoch": 1.4240134340890007, + "grad_norm": 0.2300614074466648, + "learning_rate": 5.829899097137664e-05, + "loss": 0.5031, + "step": 5088 + }, + { + "epoch": 1.424293310943185, + "grad_norm": 0.22412137344537142, + "learning_rate": 5.828377887043255e-05, + "loss": 0.4917, + "step": 5089 + }, + { + "epoch": 1.424573187797369, + "grad_norm": 0.2457724772272532, + "learning_rate": 5.826856598103374e-05, + "loss": 0.5309, + "step": 5090 + }, + { + "epoch": 1.4248530646515534, + "grad_norm": 0.22946844825061005, + "learning_rate": 5.8253352304628185e-05, + "loss": 0.4976, + "step": 5091 + }, + { + "epoch": 1.4251329415057374, + "grad_norm": 0.22740190593370493, + "learning_rate": 5.823813784266394e-05, + "loss": 0.4988, + "step": 5092 + }, + { + "epoch": 1.4254128183599217, + "grad_norm": 0.21465493384224607, + "learning_rate": 5.822292259658914e-05, + "loss": 0.4791, + "step": 5093 + }, + { + "epoch": 1.4256926952141058, + "grad_norm": 0.22384317574679752, + "learning_rate": 5.820770656785195e-05, + "loss": 0.479, + "step": 5094 + }, + { + "epoch": 1.4259725720682899, + "grad_norm": 0.2330975977727562, + "learning_rate": 5.819248975790066e-05, + "loss": 0.5108, + "step": 5095 + }, + { + "epoch": 1.4262524489224742, + "grad_norm": 0.22474847282534643, + "learning_rate": 5.81772721681836e-05, + "loss": 0.4877, + "step": 5096 + }, + { + "epoch": 1.4265323257766582, + "grad_norm": 0.2266302760241128, + "learning_rate": 5.816205380014921e-05, + "loss": 0.4988, + "step": 5097 + }, + { + "epoch": 1.4268122026308423, + "grad_norm": 0.2272754251670803, + "learning_rate": 5.814683465524596e-05, + "loss": 0.4779, + "step": 5098 + }, + { + "epoch": 1.4270920794850266, + "grad_norm": 0.2257802828793489, + "learning_rate": 5.813161473492245e-05, + "loss": 0.498, + "step": 5099 + }, + { + "epoch": 1.4273719563392109, + "grad_norm": 0.22761805180770237, + "learning_rate": 5.811639404062727e-05, + "loss": 0.4931, + "step": 5100 + }, + { + "epoch": 1.427651833193395, + "grad_norm": 0.2215397250599594, + "learning_rate": 5.8101172573809184e-05, + "loss": 0.5294, + "step": 5101 + }, + { + "epoch": 1.427931710047579, + "grad_norm": 0.22818676960933698, + "learning_rate": 5.808595033591694e-05, + "loss": 0.4994, + "step": 5102 + }, + { + "epoch": 1.4282115869017633, + "grad_norm": 0.21807604927218227, + "learning_rate": 5.8070727328399424e-05, + "loss": 0.4853, + "step": 5103 + }, + { + "epoch": 1.4284914637559474, + "grad_norm": 0.227821285830176, + "learning_rate": 5.805550355270557e-05, + "loss": 0.4844, + "step": 5104 + }, + { + "epoch": 1.4287713406101314, + "grad_norm": 0.2330054532629367, + "learning_rate": 5.804027901028437e-05, + "loss": 0.4992, + "step": 5105 + }, + { + "epoch": 1.4290512174643157, + "grad_norm": 0.22922692229530123, + "learning_rate": 5.802505370258492e-05, + "loss": 0.4881, + "step": 5106 + }, + { + "epoch": 1.4293310943184998, + "grad_norm": 0.21999471584967822, + "learning_rate": 5.8009827631056366e-05, + "loss": 0.5016, + "step": 5107 + }, + { + "epoch": 1.429610971172684, + "grad_norm": 0.22227379808463635, + "learning_rate": 5.799460079714793e-05, + "loss": 0.4926, + "step": 5108 + }, + { + "epoch": 1.4298908480268682, + "grad_norm": 0.2160585080824326, + "learning_rate": 5.7979373202308916e-05, + "loss": 0.4996, + "step": 5109 + }, + { + "epoch": 1.4301707248810525, + "grad_norm": 0.22022489562667785, + "learning_rate": 5.79641448479887e-05, + "loss": 0.5135, + "step": 5110 + }, + { + "epoch": 1.4304506017352365, + "grad_norm": 0.21799375721861491, + "learning_rate": 5.794891573563671e-05, + "loss": 0.5084, + "step": 5111 + }, + { + "epoch": 1.4307304785894206, + "grad_norm": 0.21586980033273376, + "learning_rate": 5.793368586670248e-05, + "loss": 0.4667, + "step": 5112 + }, + { + "epoch": 1.4310103554436049, + "grad_norm": 0.21824290616845962, + "learning_rate": 5.791845524263559e-05, + "loss": 0.5119, + "step": 5113 + }, + { + "epoch": 1.431290232297789, + "grad_norm": 0.23236991119315753, + "learning_rate": 5.790322386488569e-05, + "loss": 0.4889, + "step": 5114 + }, + { + "epoch": 1.431570109151973, + "grad_norm": 0.2175335504781246, + "learning_rate": 5.788799173490252e-05, + "loss": 0.5001, + "step": 5115 + }, + { + "epoch": 1.4318499860061573, + "grad_norm": 0.2195394483250366, + "learning_rate": 5.787275885413589e-05, + "loss": 0.4928, + "step": 5116 + }, + { + "epoch": 1.4321298628603414, + "grad_norm": 0.22265226840877025, + "learning_rate": 5.7857525224035655e-05, + "loss": 0.4874, + "step": 5117 + }, + { + "epoch": 1.4324097397145257, + "grad_norm": 0.22134877298355587, + "learning_rate": 5.7842290846051784e-05, + "loss": 0.4928, + "step": 5118 + }, + { + "epoch": 1.4326896165687097, + "grad_norm": 0.2347206470548645, + "learning_rate": 5.782705572163427e-05, + "loss": 0.5051, + "step": 5119 + }, + { + "epoch": 1.432969493422894, + "grad_norm": 0.40032604481325523, + "learning_rate": 5.781181985223322e-05, + "loss": 0.4919, + "step": 5120 + }, + { + "epoch": 1.433249370277078, + "grad_norm": 0.23219106263107278, + "learning_rate": 5.7796583239298776e-05, + "loss": 0.4809, + "step": 5121 + }, + { + "epoch": 1.4335292471312622, + "grad_norm": 0.23372318208069728, + "learning_rate": 5.7781345884281165e-05, + "loss": 0.5083, + "step": 5122 + }, + { + "epoch": 1.4338091239854465, + "grad_norm": 0.2205676570277828, + "learning_rate": 5.776610778863072e-05, + "loss": 0.4899, + "step": 5123 + }, + { + "epoch": 1.4340890008396305, + "grad_norm": 0.23020409363981567, + "learning_rate": 5.775086895379778e-05, + "loss": 0.4706, + "step": 5124 + }, + { + "epoch": 1.4343688776938146, + "grad_norm": 0.23267482291393507, + "learning_rate": 5.7735629381232795e-05, + "loss": 0.5059, + "step": 5125 + }, + { + "epoch": 1.434648754547999, + "grad_norm": 0.2328435937625657, + "learning_rate": 5.772038907238627e-05, + "loss": 0.5225, + "step": 5126 + }, + { + "epoch": 1.434928631402183, + "grad_norm": 0.22604008472819107, + "learning_rate": 5.770514802870879e-05, + "loss": 0.4991, + "step": 5127 + }, + { + "epoch": 1.4352085082563673, + "grad_norm": 0.21336922426089364, + "learning_rate": 5.7689906251651016e-05, + "loss": 0.474, + "step": 5128 + }, + { + "epoch": 1.4354883851105513, + "grad_norm": 0.23368535426931242, + "learning_rate": 5.767466374266366e-05, + "loss": 0.4804, + "step": 5129 + }, + { + "epoch": 1.4357682619647356, + "grad_norm": 0.23429336490785493, + "learning_rate": 5.7659420503197514e-05, + "loss": 0.4892, + "step": 5130 + }, + { + "epoch": 1.4360481388189197, + "grad_norm": 0.22699847341582347, + "learning_rate": 5.764417653470343e-05, + "loss": 0.496, + "step": 5131 + }, + { + "epoch": 1.4363280156731038, + "grad_norm": 0.2176473038796731, + "learning_rate": 5.762893183863235e-05, + "loss": 0.4854, + "step": 5132 + }, + { + "epoch": 1.436607892527288, + "grad_norm": 0.22785538450447065, + "learning_rate": 5.7613686416435273e-05, + "loss": 0.5282, + "step": 5133 + }, + { + "epoch": 1.4368877693814721, + "grad_norm": 0.22131021495121425, + "learning_rate": 5.7598440269563245e-05, + "loss": 0.4966, + "step": 5134 + }, + { + "epoch": 1.4371676462356562, + "grad_norm": 0.23120351661154448, + "learning_rate": 5.758319339946744e-05, + "loss": 0.5099, + "step": 5135 + }, + { + "epoch": 1.4374475230898405, + "grad_norm": 0.23538010852021032, + "learning_rate": 5.7567945807599035e-05, + "loss": 0.4718, + "step": 5136 + }, + { + "epoch": 1.4377273999440248, + "grad_norm": 0.22925175083253585, + "learning_rate": 5.7552697495409304e-05, + "loss": 0.4831, + "step": 5137 + }, + { + "epoch": 1.4380072767982088, + "grad_norm": 0.22124896147976106, + "learning_rate": 5.753744846434961e-05, + "loss": 0.4856, + "step": 5138 + }, + { + "epoch": 1.438287153652393, + "grad_norm": 0.21536526251900118, + "learning_rate": 5.752219871587134e-05, + "loss": 0.4927, + "step": 5139 + }, + { + "epoch": 1.4385670305065772, + "grad_norm": 0.2332442620797546, + "learning_rate": 5.7506948251426e-05, + "loss": 0.4887, + "step": 5140 + }, + { + "epoch": 1.4388469073607613, + "grad_norm": 0.21917960264428496, + "learning_rate": 5.7491697072465114e-05, + "loss": 0.5107, + "step": 5141 + }, + { + "epoch": 1.4391267842149453, + "grad_norm": 0.2332156472110391, + "learning_rate": 5.74764451804403e-05, + "loss": 0.5142, + "step": 5142 + }, + { + "epoch": 1.4394066610691296, + "grad_norm": 0.21790915008402828, + "learning_rate": 5.7461192576803256e-05, + "loss": 0.4878, + "step": 5143 + }, + { + "epoch": 1.4396865379233137, + "grad_norm": 0.22261157577003818, + "learning_rate": 5.7445939263005734e-05, + "loss": 0.4902, + "step": 5144 + }, + { + "epoch": 1.439966414777498, + "grad_norm": 0.21779936210905748, + "learning_rate": 5.743068524049954e-05, + "loss": 0.5062, + "step": 5145 + }, + { + "epoch": 1.440246291631682, + "grad_norm": 0.22274628015121947, + "learning_rate": 5.7415430510736555e-05, + "loss": 0.5088, + "step": 5146 + }, + { + "epoch": 1.4405261684858663, + "grad_norm": 0.22570440775155368, + "learning_rate": 5.740017507516876e-05, + "loss": 0.4709, + "step": 5147 + }, + { + "epoch": 1.4408060453400504, + "grad_norm": 0.21426655450686172, + "learning_rate": 5.738491893524816e-05, + "loss": 0.5096, + "step": 5148 + }, + { + "epoch": 1.4410859221942345, + "grad_norm": 0.21895351701900131, + "learning_rate": 5.736966209242682e-05, + "loss": 0.5018, + "step": 5149 + }, + { + "epoch": 1.4413657990484188, + "grad_norm": 0.21923286726146438, + "learning_rate": 5.735440454815694e-05, + "loss": 0.4973, + "step": 5150 + }, + { + "epoch": 1.4416456759026028, + "grad_norm": 0.21408259235103064, + "learning_rate": 5.73391463038907e-05, + "loss": 0.4773, + "step": 5151 + }, + { + "epoch": 1.441925552756787, + "grad_norm": 0.21924257695252328, + "learning_rate": 5.732388736108042e-05, + "loss": 0.5044, + "step": 5152 + }, + { + "epoch": 1.4422054296109712, + "grad_norm": 0.22754723786755976, + "learning_rate": 5.730862772117844e-05, + "loss": 0.4802, + "step": 5153 + }, + { + "epoch": 1.4424853064651553, + "grad_norm": 0.22664994828210788, + "learning_rate": 5.7293367385637186e-05, + "loss": 0.4974, + "step": 5154 + }, + { + "epoch": 1.4427651833193396, + "grad_norm": 0.22815602094981982, + "learning_rate": 5.7278106355909136e-05, + "loss": 0.4773, + "step": 5155 + }, + { + "epoch": 1.4430450601735236, + "grad_norm": 0.22836029867656918, + "learning_rate": 5.726284463344686e-05, + "loss": 0.4879, + "step": 5156 + }, + { + "epoch": 1.443324937027708, + "grad_norm": 0.21959018773379563, + "learning_rate": 5.7247582219702946e-05, + "loss": 0.4524, + "step": 5157 + }, + { + "epoch": 1.443604813881892, + "grad_norm": 0.24105814344774842, + "learning_rate": 5.723231911613012e-05, + "loss": 0.5236, + "step": 5158 + }, + { + "epoch": 1.443884690736076, + "grad_norm": 0.2173295053647425, + "learning_rate": 5.72170553241811e-05, + "loss": 0.4619, + "step": 5159 + }, + { + "epoch": 1.4441645675902604, + "grad_norm": 0.2348393430708412, + "learning_rate": 5.720179084530871e-05, + "loss": 0.5036, + "step": 5160 + }, + { + "epoch": 1.4444444444444444, + "grad_norm": 0.226209054313454, + "learning_rate": 5.718652568096585e-05, + "loss": 0.4984, + "step": 5161 + }, + { + "epoch": 1.4447243212986285, + "grad_norm": 0.23282268334346848, + "learning_rate": 5.717125983260545e-05, + "loss": 0.5118, + "step": 5162 + }, + { + "epoch": 1.4450041981528128, + "grad_norm": 0.2203782380160922, + "learning_rate": 5.715599330168052e-05, + "loss": 0.4869, + "step": 5163 + }, + { + "epoch": 1.4452840750069968, + "grad_norm": 0.21882686743677007, + "learning_rate": 5.714072608964415e-05, + "loss": 0.4922, + "step": 5164 + }, + { + "epoch": 1.4455639518611811, + "grad_norm": 0.2278236064292984, + "learning_rate": 5.7125458197949464e-05, + "loss": 0.5061, + "step": 5165 + }, + { + "epoch": 1.4458438287153652, + "grad_norm": 0.22791485905826125, + "learning_rate": 5.711018962804968e-05, + "loss": 0.4962, + "step": 5166 + }, + { + "epoch": 1.4461237055695495, + "grad_norm": 0.2251294062949208, + "learning_rate": 5.7094920381398075e-05, + "loss": 0.4799, + "step": 5167 + }, + { + "epoch": 1.4464035824237336, + "grad_norm": 0.22602341455673444, + "learning_rate": 5.7079650459447975e-05, + "loss": 0.5078, + "step": 5168 + }, + { + "epoch": 1.4466834592779176, + "grad_norm": 0.21981970982390575, + "learning_rate": 5.706437986365278e-05, + "loss": 0.4822, + "step": 5169 + }, + { + "epoch": 1.446963336132102, + "grad_norm": 0.22519556170862506, + "learning_rate": 5.704910859546595e-05, + "loss": 0.4918, + "step": 5170 + }, + { + "epoch": 1.447243212986286, + "grad_norm": 0.21841411859899734, + "learning_rate": 5.703383665634101e-05, + "loss": 0.4779, + "step": 5171 + }, + { + "epoch": 1.44752308984047, + "grad_norm": 0.221552263703885, + "learning_rate": 5.701856404773159e-05, + "loss": 0.5063, + "step": 5172 + }, + { + "epoch": 1.4478029666946544, + "grad_norm": 0.2194652347724914, + "learning_rate": 5.70032907710913e-05, + "loss": 0.4858, + "step": 5173 + }, + { + "epoch": 1.4480828435488386, + "grad_norm": 0.21714844506779132, + "learning_rate": 5.698801682787387e-05, + "loss": 0.4994, + "step": 5174 + }, + { + "epoch": 1.4483627204030227, + "grad_norm": 0.2239726565976733, + "learning_rate": 5.697274221953309e-05, + "loss": 0.4913, + "step": 5175 + }, + { + "epoch": 1.4486425972572068, + "grad_norm": 0.2184591734505271, + "learning_rate": 5.695746694752281e-05, + "loss": 0.518, + "step": 5176 + }, + { + "epoch": 1.448922474111391, + "grad_norm": 0.21352170695384015, + "learning_rate": 5.694219101329692e-05, + "loss": 0.4755, + "step": 5177 + }, + { + "epoch": 1.4492023509655751, + "grad_norm": 0.2298478154080551, + "learning_rate": 5.692691441830941e-05, + "loss": 0.5125, + "step": 5178 + }, + { + "epoch": 1.4494822278197592, + "grad_norm": 0.22984436924526433, + "learning_rate": 5.691163716401431e-05, + "loss": 0.5096, + "step": 5179 + }, + { + "epoch": 1.4497621046739435, + "grad_norm": 0.2154386140166561, + "learning_rate": 5.6896359251865695e-05, + "loss": 0.4629, + "step": 5180 + }, + { + "epoch": 1.4500419815281276, + "grad_norm": 0.22045415166240703, + "learning_rate": 5.688108068331778e-05, + "loss": 0.4747, + "step": 5181 + }, + { + "epoch": 1.4503218583823116, + "grad_norm": 0.22369895885338856, + "learning_rate": 5.686580145982473e-05, + "loss": 0.5064, + "step": 5182 + }, + { + "epoch": 1.450601735236496, + "grad_norm": 0.23857189418531322, + "learning_rate": 5.685052158284087e-05, + "loss": 0.5091, + "step": 5183 + }, + { + "epoch": 1.4508816120906802, + "grad_norm": 0.23972159953548952, + "learning_rate": 5.683524105382052e-05, + "loss": 0.4993, + "step": 5184 + }, + { + "epoch": 1.4511614889448643, + "grad_norm": 0.22379421206513053, + "learning_rate": 5.6819959874218106e-05, + "loss": 0.4961, + "step": 5185 + }, + { + "epoch": 1.4514413657990484, + "grad_norm": 0.23106690502050894, + "learning_rate": 5.680467804548809e-05, + "loss": 0.5074, + "step": 5186 + }, + { + "epoch": 1.4517212426532327, + "grad_norm": 0.21852188404279954, + "learning_rate": 5.678939556908501e-05, + "loss": 0.4707, + "step": 5187 + }, + { + "epoch": 1.4520011195074167, + "grad_norm": 0.2298226129085423, + "learning_rate": 5.6774112446463465e-05, + "loss": 0.4899, + "step": 5188 + }, + { + "epoch": 1.4522809963616008, + "grad_norm": 0.2368934336936178, + "learning_rate": 5.675882867907809e-05, + "loss": 0.5274, + "step": 5189 + }, + { + "epoch": 1.452560873215785, + "grad_norm": 0.23342324032605585, + "learning_rate": 5.674354426838364e-05, + "loss": 0.501, + "step": 5190 + }, + { + "epoch": 1.4528407500699692, + "grad_norm": 0.22532627937468913, + "learning_rate": 5.672825921583487e-05, + "loss": 0.482, + "step": 5191 + }, + { + "epoch": 1.4531206269241534, + "grad_norm": 0.21818767199489672, + "learning_rate": 5.67129735228866e-05, + "loss": 0.4931, + "step": 5192 + }, + { + "epoch": 1.4534005037783375, + "grad_norm": 0.22983193225464882, + "learning_rate": 5.6697687190993775e-05, + "loss": 0.5101, + "step": 5193 + }, + { + "epoch": 1.4536803806325218, + "grad_norm": 0.22830254759515695, + "learning_rate": 5.668240022161132e-05, + "loss": 0.5066, + "step": 5194 + }, + { + "epoch": 1.4539602574867059, + "grad_norm": 0.22800427004652327, + "learning_rate": 5.666711261619428e-05, + "loss": 0.5008, + "step": 5195 + }, + { + "epoch": 1.45424013434089, + "grad_norm": 0.23164596534267978, + "learning_rate": 5.665182437619773e-05, + "loss": 0.5002, + "step": 5196 + }, + { + "epoch": 1.4545200111950742, + "grad_norm": 0.22864191762300512, + "learning_rate": 5.6636535503076796e-05, + "loss": 0.4838, + "step": 5197 + }, + { + "epoch": 1.4547998880492583, + "grad_norm": 0.22163210383934065, + "learning_rate": 5.662124599828671e-05, + "loss": 0.5134, + "step": 5198 + }, + { + "epoch": 1.4550797649034424, + "grad_norm": 0.21822141625786304, + "learning_rate": 5.6605955863282736e-05, + "loss": 0.4745, + "step": 5199 + }, + { + "epoch": 1.4553596417576267, + "grad_norm": 0.21976683123079507, + "learning_rate": 5.659066509952018e-05, + "loss": 0.4938, + "step": 5200 + }, + { + "epoch": 1.4556395186118107, + "grad_norm": 0.23161957364357091, + "learning_rate": 5.657537370845441e-05, + "loss": 0.4852, + "step": 5201 + }, + { + "epoch": 1.455919395465995, + "grad_norm": 0.23122405168374782, + "learning_rate": 5.656008169154091e-05, + "loss": 0.5011, + "step": 5202 + }, + { + "epoch": 1.456199272320179, + "grad_norm": 0.22373361029429523, + "learning_rate": 5.654478905023516e-05, + "loss": 0.49, + "step": 5203 + }, + { + "epoch": 1.4564791491743634, + "grad_norm": 0.21456645930140103, + "learning_rate": 5.6529495785992725e-05, + "loss": 0.5002, + "step": 5204 + }, + { + "epoch": 1.4567590260285475, + "grad_norm": 0.22801884562895566, + "learning_rate": 5.651420190026922e-05, + "loss": 0.4992, + "step": 5205 + }, + { + "epoch": 1.4570389028827315, + "grad_norm": 0.22717453576260827, + "learning_rate": 5.649890739452033e-05, + "loss": 0.4776, + "step": 5206 + }, + { + "epoch": 1.4573187797369158, + "grad_norm": 0.22945912550546757, + "learning_rate": 5.6483612270201804e-05, + "loss": 0.5201, + "step": 5207 + }, + { + "epoch": 1.4575986565910999, + "grad_norm": 0.23501778884197766, + "learning_rate": 5.646831652876945e-05, + "loss": 0.525, + "step": 5208 + }, + { + "epoch": 1.457878533445284, + "grad_norm": 0.2190781689178128, + "learning_rate": 5.645302017167908e-05, + "loss": 0.5224, + "step": 5209 + }, + { + "epoch": 1.4581584102994682, + "grad_norm": 0.22367722761054348, + "learning_rate": 5.643772320038665e-05, + "loss": 0.4816, + "step": 5210 + }, + { + "epoch": 1.4584382871536523, + "grad_norm": 0.21706226354460845, + "learning_rate": 5.642242561634812e-05, + "loss": 0.4874, + "step": 5211 + }, + { + "epoch": 1.4587181640078366, + "grad_norm": 0.2327980171449258, + "learning_rate": 5.6407127421019534e-05, + "loss": 0.5045, + "step": 5212 + }, + { + "epoch": 1.4589980408620207, + "grad_norm": 0.22404302537377715, + "learning_rate": 5.639182861585697e-05, + "loss": 0.4817, + "step": 5213 + }, + { + "epoch": 1.459277917716205, + "grad_norm": 0.2262302122989276, + "learning_rate": 5.6376529202316554e-05, + "loss": 0.4716, + "step": 5214 + }, + { + "epoch": 1.459557794570389, + "grad_norm": 0.2272961937678117, + "learning_rate": 5.636122918185455e-05, + "loss": 0.5025, + "step": 5215 + }, + { + "epoch": 1.459837671424573, + "grad_norm": 0.21677487509548762, + "learning_rate": 5.634592855592717e-05, + "loss": 0.4994, + "step": 5216 + }, + { + "epoch": 1.4601175482787574, + "grad_norm": 0.2159696268066842, + "learning_rate": 5.633062732599078e-05, + "loss": 0.4568, + "step": 5217 + }, + { + "epoch": 1.4603974251329415, + "grad_norm": 0.20843525204322297, + "learning_rate": 5.6315325493501745e-05, + "loss": 0.4718, + "step": 5218 + }, + { + "epoch": 1.4606773019871255, + "grad_norm": 0.21621720695856078, + "learning_rate": 5.630002305991647e-05, + "loss": 0.4907, + "step": 5219 + }, + { + "epoch": 1.4609571788413098, + "grad_norm": 0.22486264201399042, + "learning_rate": 5.6284720026691494e-05, + "loss": 0.4825, + "step": 5220 + }, + { + "epoch": 1.4612370556954941, + "grad_norm": 0.2138664838463663, + "learning_rate": 5.626941639528334e-05, + "loss": 0.4965, + "step": 5221 + }, + { + "epoch": 1.4615169325496782, + "grad_norm": 0.22227984300138978, + "learning_rate": 5.6254112167148623e-05, + "loss": 0.5035, + "step": 5222 + }, + { + "epoch": 1.4617968094038623, + "grad_norm": 0.23560786340253842, + "learning_rate": 5.6238807343743995e-05, + "loss": 0.495, + "step": 5223 + }, + { + "epoch": 1.4620766862580465, + "grad_norm": 0.2222486262089791, + "learning_rate": 5.62235019265262e-05, + "loss": 0.496, + "step": 5224 + }, + { + "epoch": 1.4623565631122306, + "grad_norm": 0.2153877918829292, + "learning_rate": 5.620819591695201e-05, + "loss": 0.4799, + "step": 5225 + }, + { + "epoch": 1.4626364399664147, + "grad_norm": 0.2313760021948021, + "learning_rate": 5.619288931647827e-05, + "loss": 0.5008, + "step": 5226 + }, + { + "epoch": 1.462916316820599, + "grad_norm": 0.21184068885958354, + "learning_rate": 5.6177582126561853e-05, + "loss": 0.4822, + "step": 5227 + }, + { + "epoch": 1.463196193674783, + "grad_norm": 0.24409516586058003, + "learning_rate": 5.616227434865972e-05, + "loss": 0.5304, + "step": 5228 + }, + { + "epoch": 1.4634760705289673, + "grad_norm": 0.2290601368355482, + "learning_rate": 5.614696598422885e-05, + "loss": 0.489, + "step": 5229 + }, + { + "epoch": 1.4637559473831514, + "grad_norm": 0.232763262317614, + "learning_rate": 5.613165703472632e-05, + "loss": 0.4942, + "step": 5230 + }, + { + "epoch": 1.4640358242373357, + "grad_norm": 0.22710153940008881, + "learning_rate": 5.611634750160924e-05, + "loss": 0.4905, + "step": 5231 + }, + { + "epoch": 1.4643157010915198, + "grad_norm": 0.22411737403719634, + "learning_rate": 5.610103738633477e-05, + "loss": 0.5022, + "step": 5232 + }, + { + "epoch": 1.4645955779457038, + "grad_norm": 0.25103886274908743, + "learning_rate": 5.6085726690360165e-05, + "loss": 0.5184, + "step": 5233 + }, + { + "epoch": 1.4648754547998881, + "grad_norm": 0.2393366002243448, + "learning_rate": 5.607041541514268e-05, + "loss": 0.5032, + "step": 5234 + }, + { + "epoch": 1.4651553316540722, + "grad_norm": 0.22221812011519437, + "learning_rate": 5.6055103562139656e-05, + "loss": 0.4867, + "step": 5235 + }, + { + "epoch": 1.4654352085082563, + "grad_norm": 0.23326981284420606, + "learning_rate": 5.6039791132808505e-05, + "loss": 0.5069, + "step": 5236 + }, + { + "epoch": 1.4657150853624406, + "grad_norm": 0.22326394555032164, + "learning_rate": 5.602447812860664e-05, + "loss": 0.5059, + "step": 5237 + }, + { + "epoch": 1.4659949622166246, + "grad_norm": 0.23551628104814457, + "learning_rate": 5.6009164550991565e-05, + "loss": 0.5015, + "step": 5238 + }, + { + "epoch": 1.466274839070809, + "grad_norm": 0.21941415535650327, + "learning_rate": 5.5993850401420856e-05, + "loss": 0.4906, + "step": 5239 + }, + { + "epoch": 1.466554715924993, + "grad_norm": 0.21585708426300135, + "learning_rate": 5.59785356813521e-05, + "loss": 0.493, + "step": 5240 + }, + { + "epoch": 1.4668345927791773, + "grad_norm": 0.22399307811497346, + "learning_rate": 5.5963220392242975e-05, + "loss": 0.491, + "step": 5241 + }, + { + "epoch": 1.4671144696333613, + "grad_norm": 0.22221753004680211, + "learning_rate": 5.59479045355512e-05, + "loss": 0.495, + "step": 5242 + }, + { + "epoch": 1.4673943464875454, + "grad_norm": 0.2322004302720725, + "learning_rate": 5.593258811273454e-05, + "loss": 0.4957, + "step": 5243 + }, + { + "epoch": 1.4676742233417297, + "grad_norm": 0.22743787157192655, + "learning_rate": 5.5917271125250824e-05, + "loss": 0.4968, + "step": 5244 + }, + { + "epoch": 1.4679541001959138, + "grad_norm": 0.2234875833928903, + "learning_rate": 5.5901953574557945e-05, + "loss": 0.5145, + "step": 5245 + }, + { + "epoch": 1.4682339770500978, + "grad_norm": 0.2337471811956906, + "learning_rate": 5.5886635462113804e-05, + "loss": 0.4813, + "step": 5246 + }, + { + "epoch": 1.4685138539042821, + "grad_norm": 0.22750366413705767, + "learning_rate": 5.58713167893764e-05, + "loss": 0.4777, + "step": 5247 + }, + { + "epoch": 1.4687937307584662, + "grad_norm": 0.22660182890774772, + "learning_rate": 5.58559975578038e-05, + "loss": 0.501, + "step": 5248 + }, + { + "epoch": 1.4690736076126505, + "grad_norm": 0.23145870816757763, + "learning_rate": 5.584067776885404e-05, + "loss": 0.4788, + "step": 5249 + }, + { + "epoch": 1.4693534844668346, + "grad_norm": 0.2207247354311303, + "learning_rate": 5.582535742398533e-05, + "loss": 0.5002, + "step": 5250 + }, + { + "epoch": 1.4696333613210188, + "grad_norm": 0.22446853877397394, + "learning_rate": 5.581003652465583e-05, + "loss": 0.4932, + "step": 5251 + }, + { + "epoch": 1.469913238175203, + "grad_norm": 0.22604299120318153, + "learning_rate": 5.57947150723238e-05, + "loss": 0.4886, + "step": 5252 + }, + { + "epoch": 1.470193115029387, + "grad_norm": 0.22934992109241698, + "learning_rate": 5.577939306844755e-05, + "loss": 0.4954, + "step": 5253 + }, + { + "epoch": 1.4704729918835713, + "grad_norm": 0.228733971565801, + "learning_rate": 5.5764070514485435e-05, + "loss": 0.4826, + "step": 5254 + }, + { + "epoch": 1.4707528687377553, + "grad_norm": 0.21659572130593793, + "learning_rate": 5.5748747411895865e-05, + "loss": 0.4975, + "step": 5255 + }, + { + "epoch": 1.4710327455919394, + "grad_norm": 0.2212187681174885, + "learning_rate": 5.573342376213728e-05, + "loss": 0.488, + "step": 5256 + }, + { + "epoch": 1.4713126224461237, + "grad_norm": 0.233789400573396, + "learning_rate": 5.571809956666822e-05, + "loss": 0.4907, + "step": 5257 + }, + { + "epoch": 1.471592499300308, + "grad_norm": 0.22737192042317877, + "learning_rate": 5.570277482694725e-05, + "loss": 0.4825, + "step": 5258 + }, + { + "epoch": 1.471872376154492, + "grad_norm": 0.22242946443921727, + "learning_rate": 5.568744954443297e-05, + "loss": 0.4824, + "step": 5259 + }, + { + "epoch": 1.4721522530086761, + "grad_norm": 0.23064635878022757, + "learning_rate": 5.567212372058407e-05, + "loss": 0.5146, + "step": 5260 + }, + { + "epoch": 1.4724321298628604, + "grad_norm": 0.2330896871546213, + "learning_rate": 5.565679735685925e-05, + "loss": 0.5143, + "step": 5261 + }, + { + "epoch": 1.4727120067170445, + "grad_norm": 0.2307403627780489, + "learning_rate": 5.56414704547173e-05, + "loss": 0.5101, + "step": 5262 + }, + { + "epoch": 1.4729918835712286, + "grad_norm": 0.22428272350933046, + "learning_rate": 5.562614301561704e-05, + "loss": 0.4815, + "step": 5263 + }, + { + "epoch": 1.4732717604254129, + "grad_norm": 0.23103900170190556, + "learning_rate": 5.561081504101733e-05, + "loss": 0.4893, + "step": 5264 + }, + { + "epoch": 1.473551637279597, + "grad_norm": 0.22658422565410644, + "learning_rate": 5.559548653237711e-05, + "loss": 0.495, + "step": 5265 + }, + { + "epoch": 1.4738315141337812, + "grad_norm": 0.2315015655369644, + "learning_rate": 5.558015749115533e-05, + "loss": 0.4957, + "step": 5266 + }, + { + "epoch": 1.4741113909879653, + "grad_norm": 0.22864661559358873, + "learning_rate": 5.556482791881105e-05, + "loss": 0.4829, + "step": 5267 + }, + { + "epoch": 1.4743912678421496, + "grad_norm": 0.23110443055954935, + "learning_rate": 5.554949781680333e-05, + "loss": 0.491, + "step": 5268 + }, + { + "epoch": 1.4746711446963336, + "grad_norm": 0.22929566401500065, + "learning_rate": 5.55341671865913e-05, + "loss": 0.4865, + "step": 5269 + }, + { + "epoch": 1.4749510215505177, + "grad_norm": 0.22677701838405592, + "learning_rate": 5.5518836029634145e-05, + "loss": 0.5081, + "step": 5270 + }, + { + "epoch": 1.475230898404702, + "grad_norm": 0.22090661176482015, + "learning_rate": 5.550350434739109e-05, + "loss": 0.5021, + "step": 5271 + }, + { + "epoch": 1.475510775258886, + "grad_norm": 0.23453787157715336, + "learning_rate": 5.548817214132143e-05, + "loss": 0.4788, + "step": 5272 + }, + { + "epoch": 1.4757906521130701, + "grad_norm": 0.22712003870974973, + "learning_rate": 5.547283941288445e-05, + "loss": 0.4986, + "step": 5273 + }, + { + "epoch": 1.4760705289672544, + "grad_norm": 0.2204667160719935, + "learning_rate": 5.545750616353955e-05, + "loss": 0.4953, + "step": 5274 + }, + { + "epoch": 1.4763504058214385, + "grad_norm": 0.2143925115875876, + "learning_rate": 5.544217239474615e-05, + "loss": 0.4911, + "step": 5275 + }, + { + "epoch": 1.4766302826756228, + "grad_norm": 0.22585843148824863, + "learning_rate": 5.542683810796374e-05, + "loss": 0.4965, + "step": 5276 + }, + { + "epoch": 1.4769101595298069, + "grad_norm": 0.22279290043328198, + "learning_rate": 5.541150330465186e-05, + "loss": 0.5046, + "step": 5277 + }, + { + "epoch": 1.4771900363839912, + "grad_norm": 0.21904534514230872, + "learning_rate": 5.539616798627005e-05, + "loss": 0.4913, + "step": 5278 + }, + { + "epoch": 1.4774699132381752, + "grad_norm": 0.2275017475919835, + "learning_rate": 5.538083215427796e-05, + "loss": 0.5126, + "step": 5279 + }, + { + "epoch": 1.4777497900923593, + "grad_norm": 0.2273895244217181, + "learning_rate": 5.536549581013525e-05, + "loss": 0.4924, + "step": 5280 + }, + { + "epoch": 1.4780296669465436, + "grad_norm": 0.22152081037512972, + "learning_rate": 5.5350158955301657e-05, + "loss": 0.4695, + "step": 5281 + }, + { + "epoch": 1.4783095438007277, + "grad_norm": 0.22460405259049548, + "learning_rate": 5.533482159123693e-05, + "loss": 0.4724, + "step": 5282 + }, + { + "epoch": 1.4785894206549117, + "grad_norm": 0.22040711529727752, + "learning_rate": 5.531948371940089e-05, + "loss": 0.4899, + "step": 5283 + }, + { + "epoch": 1.478869297509096, + "grad_norm": 0.23397617899090678, + "learning_rate": 5.530414534125341e-05, + "loss": 0.4925, + "step": 5284 + }, + { + "epoch": 1.47914917436328, + "grad_norm": 0.22775382648696416, + "learning_rate": 5.5288806458254414e-05, + "loss": 0.4994, + "step": 5285 + }, + { + "epoch": 1.4794290512174644, + "grad_norm": 0.23539696449203906, + "learning_rate": 5.527346707186386e-05, + "loss": 0.4811, + "step": 5286 + }, + { + "epoch": 1.4797089280716484, + "grad_norm": 0.22867356620474527, + "learning_rate": 5.5258127183541766e-05, + "loss": 0.4762, + "step": 5287 + }, + { + "epoch": 1.4799888049258327, + "grad_norm": 0.21167606898677446, + "learning_rate": 5.524278679474817e-05, + "loss": 0.4723, + "step": 5288 + }, + { + "epoch": 1.4802686817800168, + "grad_norm": 0.22849129149195513, + "learning_rate": 5.52274459069432e-05, + "loss": 0.504, + "step": 5289 + }, + { + "epoch": 1.4805485586342009, + "grad_norm": 0.22557473301195613, + "learning_rate": 5.5212104521587016e-05, + "loss": 0.4877, + "step": 5290 + }, + { + "epoch": 1.4808284354883852, + "grad_norm": 0.21591100269906305, + "learning_rate": 5.5196762640139786e-05, + "loss": 0.4901, + "step": 5291 + }, + { + "epoch": 1.4811083123425692, + "grad_norm": 0.2168375814509893, + "learning_rate": 5.518142026406178e-05, + "loss": 0.4995, + "step": 5292 + }, + { + "epoch": 1.4813881891967533, + "grad_norm": 0.2308367184534482, + "learning_rate": 5.5166077394813296e-05, + "loss": 0.4981, + "step": 5293 + }, + { + "epoch": 1.4816680660509376, + "grad_norm": 0.23033343237219403, + "learning_rate": 5.515073403385468e-05, + "loss": 0.4775, + "step": 5294 + }, + { + "epoch": 1.4819479429051219, + "grad_norm": 0.22540094381086165, + "learning_rate": 5.5135390182646304e-05, + "loss": 0.4959, + "step": 5295 + }, + { + "epoch": 1.482227819759306, + "grad_norm": 0.22041950776340755, + "learning_rate": 5.512004584264864e-05, + "loss": 0.49, + "step": 5296 + }, + { + "epoch": 1.48250769661349, + "grad_norm": 0.22465911313300704, + "learning_rate": 5.5104701015322125e-05, + "loss": 0.4834, + "step": 5297 + }, + { + "epoch": 1.4827875734676743, + "grad_norm": 0.2237692653976678, + "learning_rate": 5.508935570212732e-05, + "loss": 0.4779, + "step": 5298 + }, + { + "epoch": 1.4830674503218584, + "grad_norm": 0.2075798796732379, + "learning_rate": 5.507400990452479e-05, + "loss": 0.466, + "step": 5299 + }, + { + "epoch": 1.4833473271760425, + "grad_norm": 0.22768642302095388, + "learning_rate": 5.505866362397516e-05, + "loss": 0.4867, + "step": 5300 + }, + { + "epoch": 1.4836272040302267, + "grad_norm": 0.22653079807440119, + "learning_rate": 5.504331686193907e-05, + "loss": 0.4902, + "step": 5301 + }, + { + "epoch": 1.4839070808844108, + "grad_norm": 0.22136596493574517, + "learning_rate": 5.502796961987728e-05, + "loss": 0.4952, + "step": 5302 + }, + { + "epoch": 1.4841869577385949, + "grad_norm": 0.22922252527003512, + "learning_rate": 5.501262189925053e-05, + "loss": 0.5016, + "step": 5303 + }, + { + "epoch": 1.4844668345927792, + "grad_norm": 0.22846620736430626, + "learning_rate": 5.4997273701519615e-05, + "loss": 0.4862, + "step": 5304 + }, + { + "epoch": 1.4847467114469635, + "grad_norm": 0.22791579215918112, + "learning_rate": 5.4981925028145385e-05, + "loss": 0.4836, + "step": 5305 + }, + { + "epoch": 1.4850265883011475, + "grad_norm": 0.2326806099711271, + "learning_rate": 5.4966575880588755e-05, + "loss": 0.4942, + "step": 5306 + }, + { + "epoch": 1.4853064651553316, + "grad_norm": 0.23153575433444576, + "learning_rate": 5.495122626031065e-05, + "loss": 0.4946, + "step": 5307 + }, + { + "epoch": 1.485586342009516, + "grad_norm": 0.22958469706081977, + "learning_rate": 5.493587616877207e-05, + "loss": 0.4935, + "step": 5308 + }, + { + "epoch": 1.4858662188637, + "grad_norm": 0.2405831372653318, + "learning_rate": 5.492052560743402e-05, + "loss": 0.4871, + "step": 5309 + }, + { + "epoch": 1.486146095717884, + "grad_norm": 0.22904421275674433, + "learning_rate": 5.490517457775758e-05, + "loss": 0.4831, + "step": 5310 + }, + { + "epoch": 1.4864259725720683, + "grad_norm": 0.2251918346907918, + "learning_rate": 5.4889823081203884e-05, + "loss": 0.4882, + "step": 5311 + }, + { + "epoch": 1.4867058494262524, + "grad_norm": 0.2270591895873666, + "learning_rate": 5.4874471119234096e-05, + "loss": 0.4862, + "step": 5312 + }, + { + "epoch": 1.4869857262804367, + "grad_norm": 0.2118106516721711, + "learning_rate": 5.485911869330942e-05, + "loss": 0.4878, + "step": 5313 + }, + { + "epoch": 1.4872656031346208, + "grad_norm": 0.22146512440368507, + "learning_rate": 5.48437658048911e-05, + "loss": 0.4688, + "step": 5314 + }, + { + "epoch": 1.487545479988805, + "grad_norm": 0.22131754103121587, + "learning_rate": 5.482841245544044e-05, + "loss": 0.5199, + "step": 5315 + }, + { + "epoch": 1.487825356842989, + "grad_norm": 0.22080796039787157, + "learning_rate": 5.481305864641878e-05, + "loss": 0.4681, + "step": 5316 + }, + { + "epoch": 1.4881052336971732, + "grad_norm": 0.22997572194429822, + "learning_rate": 5.479770437928752e-05, + "loss": 0.5235, + "step": 5317 + }, + { + "epoch": 1.4883851105513575, + "grad_norm": 0.23238507853581836, + "learning_rate": 5.478234965550805e-05, + "loss": 0.4784, + "step": 5318 + }, + { + "epoch": 1.4886649874055415, + "grad_norm": 0.2176568295354096, + "learning_rate": 5.4766994476541864e-05, + "loss": 0.4892, + "step": 5319 + }, + { + "epoch": 1.4889448642597256, + "grad_norm": 0.21519620095808345, + "learning_rate": 5.4751638843850485e-05, + "loss": 0.4837, + "step": 5320 + }, + { + "epoch": 1.48922474111391, + "grad_norm": 0.26714408771298775, + "learning_rate": 5.4736282758895466e-05, + "loss": 0.4881, + "step": 5321 + }, + { + "epoch": 1.489504617968094, + "grad_norm": 0.23504972996274257, + "learning_rate": 5.472092622313839e-05, + "loss": 0.4959, + "step": 5322 + }, + { + "epoch": 1.4897844948222783, + "grad_norm": 0.22993518344546857, + "learning_rate": 5.470556923804092e-05, + "loss": 0.4993, + "step": 5323 + }, + { + "epoch": 1.4900643716764623, + "grad_norm": 0.2222276901233184, + "learning_rate": 5.4690211805064725e-05, + "loss": 0.4903, + "step": 5324 + }, + { + "epoch": 1.4903442485306466, + "grad_norm": 0.21738014706958292, + "learning_rate": 5.4674853925671566e-05, + "loss": 0.516, + "step": 5325 + }, + { + "epoch": 1.4906241253848307, + "grad_norm": 0.22262884554108853, + "learning_rate": 5.46594956013232e-05, + "loss": 0.4913, + "step": 5326 + }, + { + "epoch": 1.4909040022390148, + "grad_norm": 0.23295215853361276, + "learning_rate": 5.4644136833481395e-05, + "loss": 0.5027, + "step": 5327 + }, + { + "epoch": 1.491183879093199, + "grad_norm": 0.24375183887012075, + "learning_rate": 5.462877762360808e-05, + "loss": 0.5255, + "step": 5328 + }, + { + "epoch": 1.4914637559473831, + "grad_norm": 0.2142324763075613, + "learning_rate": 5.4613417973165106e-05, + "loss": 0.4851, + "step": 5329 + }, + { + "epoch": 1.4917436328015672, + "grad_norm": 0.2227648490124219, + "learning_rate": 5.459805788361443e-05, + "loss": 0.4771, + "step": 5330 + }, + { + "epoch": 1.4920235096557515, + "grad_norm": 0.2234673346516884, + "learning_rate": 5.4582697356418034e-05, + "loss": 0.5016, + "step": 5331 + }, + { + "epoch": 1.4923033865099355, + "grad_norm": 0.23108008144566453, + "learning_rate": 5.4567336393037925e-05, + "loss": 0.4808, + "step": 5332 + }, + { + "epoch": 1.4925832633641198, + "grad_norm": 0.2394730006065942, + "learning_rate": 5.455197499493621e-05, + "loss": 0.5208, + "step": 5333 + }, + { + "epoch": 1.492863140218304, + "grad_norm": 0.22679622242621722, + "learning_rate": 5.453661316357495e-05, + "loss": 0.4827, + "step": 5334 + }, + { + "epoch": 1.4931430170724882, + "grad_norm": 0.2249228663322211, + "learning_rate": 5.452125090041631e-05, + "loss": 0.5112, + "step": 5335 + }, + { + "epoch": 1.4934228939266723, + "grad_norm": 0.22804097193667003, + "learning_rate": 5.4505888206922475e-05, + "loss": 0.518, + "step": 5336 + }, + { + "epoch": 1.4937027707808563, + "grad_norm": 0.23804668663717438, + "learning_rate": 5.449052508455568e-05, + "loss": 0.5165, + "step": 5337 + }, + { + "epoch": 1.4939826476350406, + "grad_norm": 0.22062720030244787, + "learning_rate": 5.44751615347782e-05, + "loss": 0.5085, + "step": 5338 + }, + { + "epoch": 1.4942625244892247, + "grad_norm": 0.21912947508418557, + "learning_rate": 5.4459797559052325e-05, + "loss": 0.4873, + "step": 5339 + }, + { + "epoch": 1.4945424013434088, + "grad_norm": 0.22000486459954285, + "learning_rate": 5.4444433158840436e-05, + "loss": 0.4703, + "step": 5340 + }, + { + "epoch": 1.494822278197593, + "grad_norm": 0.22053919747764167, + "learning_rate": 5.4429068335604906e-05, + "loss": 0.4967, + "step": 5341 + }, + { + "epoch": 1.4951021550517773, + "grad_norm": 0.23365904599562048, + "learning_rate": 5.441370309080818e-05, + "loss": 0.4904, + "step": 5342 + }, + { + "epoch": 1.4953820319059614, + "grad_norm": 0.22269707032354097, + "learning_rate": 5.4398337425912715e-05, + "loss": 0.5094, + "step": 5343 + }, + { + "epoch": 1.4956619087601455, + "grad_norm": 0.2329760203926926, + "learning_rate": 5.438297134238104e-05, + "loss": 0.4969, + "step": 5344 + }, + { + "epoch": 1.4959417856143298, + "grad_norm": 0.22746772287351746, + "learning_rate": 5.436760484167569e-05, + "loss": 0.4893, + "step": 5345 + }, + { + "epoch": 1.4962216624685138, + "grad_norm": 0.23883262642649386, + "learning_rate": 5.435223792525928e-05, + "loss": 0.5169, + "step": 5346 + }, + { + "epoch": 1.496501539322698, + "grad_norm": 0.2338676465082267, + "learning_rate": 5.433687059459441e-05, + "loss": 0.5161, + "step": 5347 + }, + { + "epoch": 1.4967814161768822, + "grad_norm": 0.2363460086159002, + "learning_rate": 5.432150285114378e-05, + "loss": 0.5047, + "step": 5348 + }, + { + "epoch": 1.4970612930310663, + "grad_norm": 0.2178005445609828, + "learning_rate": 5.430613469637009e-05, + "loss": 0.4908, + "step": 5349 + }, + { + "epoch": 1.4973411698852506, + "grad_norm": 0.2277203144839674, + "learning_rate": 5.429076613173609e-05, + "loss": 0.4884, + "step": 5350 + }, + { + "epoch": 1.4976210467394346, + "grad_norm": 0.225794560934591, + "learning_rate": 5.427539715870457e-05, + "loss": 0.477, + "step": 5351 + }, + { + "epoch": 1.497900923593619, + "grad_norm": 0.2243749753222779, + "learning_rate": 5.4260027778738354e-05, + "loss": 0.5022, + "step": 5352 + }, + { + "epoch": 1.498180800447803, + "grad_norm": 0.2334257872575213, + "learning_rate": 5.42446579933003e-05, + "loss": 0.509, + "step": 5353 + }, + { + "epoch": 1.498460677301987, + "grad_norm": 0.23904002604805577, + "learning_rate": 5.422928780385333e-05, + "loss": 0.4877, + "step": 5354 + }, + { + "epoch": 1.4987405541561714, + "grad_norm": 0.22873236866181154, + "learning_rate": 5.4213917211860375e-05, + "loss": 0.4899, + "step": 5355 + }, + { + "epoch": 1.4990204310103554, + "grad_norm": 0.21349431724529108, + "learning_rate": 5.419854621878443e-05, + "loss": 0.5047, + "step": 5356 + }, + { + "epoch": 1.4993003078645395, + "grad_norm": 0.21486505820150203, + "learning_rate": 5.41831748260885e-05, + "loss": 0.4784, + "step": 5357 + }, + { + "epoch": 1.4995801847187238, + "grad_norm": 0.231161793878974, + "learning_rate": 5.416780303523565e-05, + "loss": 0.496, + "step": 5358 + }, + { + "epoch": 1.4998600615729079, + "grad_norm": 0.22658153518783808, + "learning_rate": 5.415243084768897e-05, + "loss": 0.493, + "step": 5359 + }, + { + "epoch": 1.500139938427092, + "grad_norm": 0.2319266323750032, + "learning_rate": 5.413705826491161e-05, + "loss": 0.5139, + "step": 5360 + }, + { + "epoch": 1.5004198152812762, + "grad_norm": 0.2339520560162951, + "learning_rate": 5.412168528836672e-05, + "loss": 0.4844, + "step": 5361 + }, + { + "epoch": 1.5006996921354605, + "grad_norm": 0.2310069006519829, + "learning_rate": 5.410631191951752e-05, + "loss": 0.5026, + "step": 5362 + }, + { + "epoch": 1.5009795689896446, + "grad_norm": 0.21884529678923517, + "learning_rate": 5.409093815982724e-05, + "loss": 0.4705, + "step": 5363 + }, + { + "epoch": 1.5012594458438286, + "grad_norm": 0.22179252483945588, + "learning_rate": 5.407556401075919e-05, + "loss": 0.486, + "step": 5364 + }, + { + "epoch": 1.501539322698013, + "grad_norm": 0.22721510546534038, + "learning_rate": 5.4060189473776676e-05, + "loss": 0.49, + "step": 5365 + }, + { + "epoch": 1.501819199552197, + "grad_norm": 0.22306617178167543, + "learning_rate": 5.404481455034305e-05, + "loss": 0.4755, + "step": 5366 + }, + { + "epoch": 1.502099076406381, + "grad_norm": 0.22766584094233727, + "learning_rate": 5.402943924192172e-05, + "loss": 0.4863, + "step": 5367 + }, + { + "epoch": 1.5023789532605654, + "grad_norm": 0.22418659756892878, + "learning_rate": 5.40140635499761e-05, + "loss": 0.495, + "step": 5368 + }, + { + "epoch": 1.5026588301147497, + "grad_norm": 0.21751904435545283, + "learning_rate": 5.3998687475969666e-05, + "loss": 0.4814, + "step": 5369 + }, + { + "epoch": 1.5029387069689337, + "grad_norm": 0.22492589981571987, + "learning_rate": 5.398331102136591e-05, + "loss": 0.4821, + "step": 5370 + }, + { + "epoch": 1.5032185838231178, + "grad_norm": 0.2155570134958645, + "learning_rate": 5.39679341876284e-05, + "loss": 0.485, + "step": 5371 + }, + { + "epoch": 1.503498460677302, + "grad_norm": 0.22664020079766536, + "learning_rate": 5.395255697622068e-05, + "loss": 0.4983, + "step": 5372 + }, + { + "epoch": 1.5037783375314862, + "grad_norm": 0.22532655513223326, + "learning_rate": 5.393717938860638e-05, + "loss": 0.4534, + "step": 5373 + }, + { + "epoch": 1.5040582143856702, + "grad_norm": 0.22792151749075482, + "learning_rate": 5.392180142624914e-05, + "loss": 0.4911, + "step": 5374 + }, + { + "epoch": 1.5043380912398545, + "grad_norm": 0.24385146635223562, + "learning_rate": 5.390642309061264e-05, + "loss": 0.5051, + "step": 5375 + }, + { + "epoch": 1.5046179680940386, + "grad_norm": 0.21833874786159185, + "learning_rate": 5.3891044383160615e-05, + "loss": 0.4925, + "step": 5376 + }, + { + "epoch": 1.5048978449482227, + "grad_norm": 0.22136799527665596, + "learning_rate": 5.38756653053568e-05, + "loss": 0.491, + "step": 5377 + }, + { + "epoch": 1.505177721802407, + "grad_norm": 0.22621569826533633, + "learning_rate": 5.3860285858665e-05, + "loss": 0.5002, + "step": 5378 + }, + { + "epoch": 1.5054575986565912, + "grad_norm": 0.23668566739683558, + "learning_rate": 5.384490604454903e-05, + "loss": 0.4896, + "step": 5379 + }, + { + "epoch": 1.5057374755107753, + "grad_norm": 0.23314549281402172, + "learning_rate": 5.382952586447274e-05, + "loss": 0.4927, + "step": 5380 + }, + { + "epoch": 1.5060173523649594, + "grad_norm": 0.23345373624989663, + "learning_rate": 5.3814145319900045e-05, + "loss": 0.5174, + "step": 5381 + }, + { + "epoch": 1.5062972292191437, + "grad_norm": 0.2240940396842753, + "learning_rate": 5.379876441229486e-05, + "loss": 0.5021, + "step": 5382 + }, + { + "epoch": 1.5065771060733277, + "grad_norm": 0.22182790957879425, + "learning_rate": 5.378338314312115e-05, + "loss": 0.5035, + "step": 5383 + }, + { + "epoch": 1.5068569829275118, + "grad_norm": 0.23044292401060051, + "learning_rate": 5.3768001513842915e-05, + "loss": 0.4842, + "step": 5384 + }, + { + "epoch": 1.507136859781696, + "grad_norm": 0.22103487297910235, + "learning_rate": 5.375261952592418e-05, + "loss": 0.521, + "step": 5385 + }, + { + "epoch": 1.5074167366358802, + "grad_norm": 0.23267000511855776, + "learning_rate": 5.373723718082904e-05, + "loss": 0.4874, + "step": 5386 + }, + { + "epoch": 1.5076966134900642, + "grad_norm": 0.22475563162231108, + "learning_rate": 5.372185448002155e-05, + "loss": 0.5027, + "step": 5387 + }, + { + "epoch": 1.5079764903442485, + "grad_norm": 0.22201077261229946, + "learning_rate": 5.3706471424965875e-05, + "loss": 0.4739, + "step": 5388 + }, + { + "epoch": 1.5082563671984328, + "grad_norm": 0.23888457805830648, + "learning_rate": 5.369108801712618e-05, + "loss": 0.4969, + "step": 5389 + }, + { + "epoch": 1.5085362440526169, + "grad_norm": 0.2329132526064181, + "learning_rate": 5.3675704257966665e-05, + "loss": 0.4988, + "step": 5390 + }, + { + "epoch": 1.508816120906801, + "grad_norm": 0.22335102429751483, + "learning_rate": 5.366032014895155e-05, + "loss": 0.4832, + "step": 5391 + }, + { + "epoch": 1.5090959977609852, + "grad_norm": 0.22084691242722718, + "learning_rate": 5.3644935691545116e-05, + "loss": 0.4899, + "step": 5392 + }, + { + "epoch": 1.5093758746151693, + "grad_norm": 0.35612480869485336, + "learning_rate": 5.3629550887211666e-05, + "loss": 0.5011, + "step": 5393 + }, + { + "epoch": 1.5096557514693534, + "grad_norm": 0.222333798556437, + "learning_rate": 5.361416573741554e-05, + "loss": 0.4796, + "step": 5394 + }, + { + "epoch": 1.5099356283235377, + "grad_norm": 0.228078232568696, + "learning_rate": 5.359878024362108e-05, + "loss": 0.5145, + "step": 5395 + }, + { + "epoch": 1.510215505177722, + "grad_norm": 0.23042506286294107, + "learning_rate": 5.3583394407292706e-05, + "loss": 0.523, + "step": 5396 + }, + { + "epoch": 1.5104953820319058, + "grad_norm": 0.22279054198944678, + "learning_rate": 5.356800822989486e-05, + "loss": 0.5128, + "step": 5397 + }, + { + "epoch": 1.51077525888609, + "grad_norm": 0.21799199011198456, + "learning_rate": 5.355262171289198e-05, + "loss": 0.4674, + "step": 5398 + }, + { + "epoch": 1.5110551357402744, + "grad_norm": 0.2271852323952012, + "learning_rate": 5.3537234857748584e-05, + "loss": 0.4992, + "step": 5399 + }, + { + "epoch": 1.5113350125944585, + "grad_norm": 0.2358545110627064, + "learning_rate": 5.3521847665929194e-05, + "loss": 0.5086, + "step": 5400 + }, + { + "epoch": 1.5116148894486425, + "grad_norm": 0.22501120662928606, + "learning_rate": 5.3506460138898364e-05, + "loss": 0.4967, + "step": 5401 + }, + { + "epoch": 1.5118947663028268, + "grad_norm": 0.22456639835775383, + "learning_rate": 5.3491072278120704e-05, + "loss": 0.4861, + "step": 5402 + }, + { + "epoch": 1.512174643157011, + "grad_norm": 0.2187884148915267, + "learning_rate": 5.347568408506082e-05, + "loss": 0.5045, + "step": 5403 + }, + { + "epoch": 1.512454520011195, + "grad_norm": 0.22347995490945863, + "learning_rate": 5.346029556118338e-05, + "loss": 0.5031, + "step": 5404 + }, + { + "epoch": 1.5127343968653792, + "grad_norm": 0.22946628984463002, + "learning_rate": 5.344490670795308e-05, + "loss": 0.5148, + "step": 5405 + }, + { + "epoch": 1.5130142737195635, + "grad_norm": 0.23206586802458026, + "learning_rate": 5.342951752683464e-05, + "loss": 0.4893, + "step": 5406 + }, + { + "epoch": 1.5132941505737474, + "grad_norm": 0.22901279293918392, + "learning_rate": 5.3414128019292785e-05, + "loss": 0.5041, + "step": 5407 + }, + { + "epoch": 1.5135740274279317, + "grad_norm": 0.2311481037821431, + "learning_rate": 5.339873818679232e-05, + "loss": 0.5154, + "step": 5408 + }, + { + "epoch": 1.513853904282116, + "grad_norm": 0.2242858288544814, + "learning_rate": 5.3383348030798056e-05, + "loss": 0.4854, + "step": 5409 + }, + { + "epoch": 1.5141337811363, + "grad_norm": 0.23104327232500133, + "learning_rate": 5.336795755277483e-05, + "loss": 0.477, + "step": 5410 + }, + { + "epoch": 1.514413657990484, + "grad_norm": 0.22600539690273602, + "learning_rate": 5.335256675418752e-05, + "loss": 0.5054, + "step": 5411 + }, + { + "epoch": 1.5146935348446684, + "grad_norm": 0.228359395355165, + "learning_rate": 5.3337175636501024e-05, + "loss": 0.4993, + "step": 5412 + }, + { + "epoch": 1.5149734116988525, + "grad_norm": 0.22401380622820097, + "learning_rate": 5.332178420118028e-05, + "loss": 0.4838, + "step": 5413 + }, + { + "epoch": 1.5152532885530365, + "grad_norm": 0.23032159907474142, + "learning_rate": 5.3306392449690266e-05, + "loss": 0.5123, + "step": 5414 + }, + { + "epoch": 1.5155331654072208, + "grad_norm": 0.22346671937052898, + "learning_rate": 5.329100038349597e-05, + "loss": 0.4803, + "step": 5415 + }, + { + "epoch": 1.5158130422614051, + "grad_norm": 0.2287473712593248, + "learning_rate": 5.327560800406241e-05, + "loss": 0.517, + "step": 5416 + }, + { + "epoch": 1.5160929191155892, + "grad_norm": 0.2240763556185364, + "learning_rate": 5.3260215312854644e-05, + "loss": 0.4953, + "step": 5417 + }, + { + "epoch": 1.5163727959697733, + "grad_norm": 0.22193937162416424, + "learning_rate": 5.3244822311337764e-05, + "loss": 0.5142, + "step": 5418 + }, + { + "epoch": 1.5166526728239575, + "grad_norm": 0.2159287036872767, + "learning_rate": 5.322942900097688e-05, + "loss": 0.4774, + "step": 5419 + }, + { + "epoch": 1.5169325496781416, + "grad_norm": 0.2355219929023816, + "learning_rate": 5.3214035383237135e-05, + "loss": 0.5015, + "step": 5420 + }, + { + "epoch": 1.5172124265323257, + "grad_norm": 0.22157497736140797, + "learning_rate": 5.319864145958371e-05, + "loss": 0.4953, + "step": 5421 + }, + { + "epoch": 1.51749230338651, + "grad_norm": 0.22879521258183022, + "learning_rate": 5.318324723148179e-05, + "loss": 0.5156, + "step": 5422 + }, + { + "epoch": 1.517772180240694, + "grad_norm": 0.22218590136515592, + "learning_rate": 5.3167852700396614e-05, + "loss": 0.5034, + "step": 5423 + }, + { + "epoch": 1.5180520570948781, + "grad_norm": 0.22744342293880013, + "learning_rate": 5.3152457867793446e-05, + "loss": 0.5053, + "step": 5424 + }, + { + "epoch": 1.5183319339490624, + "grad_norm": 0.22860124369276388, + "learning_rate": 5.313706273513758e-05, + "loss": 0.506, + "step": 5425 + }, + { + "epoch": 1.5186118108032467, + "grad_norm": 0.21901078600220478, + "learning_rate": 5.312166730389434e-05, + "loss": 0.4745, + "step": 5426 + }, + { + "epoch": 1.5188916876574308, + "grad_norm": 0.222248326215959, + "learning_rate": 5.310627157552904e-05, + "loss": 0.5012, + "step": 5427 + }, + { + "epoch": 1.5191715645116148, + "grad_norm": 0.2327393237346538, + "learning_rate": 5.309087555150708e-05, + "loss": 0.4937, + "step": 5428 + }, + { + "epoch": 1.5194514413657991, + "grad_norm": 0.22886242942321186, + "learning_rate": 5.307547923329386e-05, + "loss": 0.487, + "step": 5429 + }, + { + "epoch": 1.5197313182199832, + "grad_norm": 0.24026441635054493, + "learning_rate": 5.306008262235479e-05, + "loss": 0.5019, + "step": 5430 + }, + { + "epoch": 1.5200111950741673, + "grad_norm": 0.23357057655379512, + "learning_rate": 5.304468572015535e-05, + "loss": 0.5054, + "step": 5431 + }, + { + "epoch": 1.5202910719283516, + "grad_norm": 0.23349083419089312, + "learning_rate": 5.302928852816102e-05, + "loss": 0.5036, + "step": 5432 + }, + { + "epoch": 1.5205709487825358, + "grad_norm": 0.22110916014226956, + "learning_rate": 5.30138910478373e-05, + "loss": 0.4749, + "step": 5433 + }, + { + "epoch": 1.5208508256367197, + "grad_norm": 0.22426314741174758, + "learning_rate": 5.299849328064976e-05, + "loss": 0.5053, + "step": 5434 + }, + { + "epoch": 1.521130702490904, + "grad_norm": 0.22431996803172644, + "learning_rate": 5.2983095228063964e-05, + "loss": 0.485, + "step": 5435 + }, + { + "epoch": 1.5214105793450883, + "grad_norm": 0.22836923342568963, + "learning_rate": 5.296769689154547e-05, + "loss": 0.4857, + "step": 5436 + }, + { + "epoch": 1.5216904561992723, + "grad_norm": 0.36549331515179856, + "learning_rate": 5.295229827255993e-05, + "loss": 0.5039, + "step": 5437 + }, + { + "epoch": 1.5219703330534564, + "grad_norm": 0.2096493734106698, + "learning_rate": 5.293689937257299e-05, + "loss": 0.4847, + "step": 5438 + }, + { + "epoch": 1.5222502099076407, + "grad_norm": 0.22195267794046467, + "learning_rate": 5.292150019305033e-05, + "loss": 0.4957, + "step": 5439 + }, + { + "epoch": 1.5225300867618248, + "grad_norm": 0.22974226173599646, + "learning_rate": 5.290610073545764e-05, + "loss": 0.4731, + "step": 5440 + }, + { + "epoch": 1.5228099636160088, + "grad_norm": 0.22637494741228384, + "learning_rate": 5.289070100126066e-05, + "loss": 0.4881, + "step": 5441 + }, + { + "epoch": 1.5230898404701931, + "grad_norm": 0.2284943980225406, + "learning_rate": 5.2875300991925114e-05, + "loss": 0.4811, + "step": 5442 + }, + { + "epoch": 1.5233697173243774, + "grad_norm": 0.22132666260049025, + "learning_rate": 5.2859900708916844e-05, + "loss": 0.4863, + "step": 5443 + }, + { + "epoch": 1.5236495941785613, + "grad_norm": 0.23433782492196434, + "learning_rate": 5.2844500153701615e-05, + "loss": 0.4985, + "step": 5444 + }, + { + "epoch": 1.5239294710327456, + "grad_norm": 0.23414573490537904, + "learning_rate": 5.2829099327745266e-05, + "loss": 0.5004, + "step": 5445 + }, + { + "epoch": 1.5242093478869299, + "grad_norm": 0.2250347831621673, + "learning_rate": 5.281369823251366e-05, + "loss": 0.4866, + "step": 5446 + }, + { + "epoch": 1.524489224741114, + "grad_norm": 0.22436169247133989, + "learning_rate": 5.279829686947269e-05, + "loss": 0.477, + "step": 5447 + }, + { + "epoch": 1.524769101595298, + "grad_norm": 0.22536271246100767, + "learning_rate": 5.278289524008825e-05, + "loss": 0.497, + "step": 5448 + }, + { + "epoch": 1.5250489784494823, + "grad_norm": 0.23488341377345673, + "learning_rate": 5.276749334582628e-05, + "loss": 0.5029, + "step": 5449 + }, + { + "epoch": 1.5253288553036664, + "grad_norm": 0.2315441593858389, + "learning_rate": 5.275209118815273e-05, + "loss": 0.5065, + "step": 5450 + }, + { + "epoch": 1.5256087321578504, + "grad_norm": 0.25428653237842314, + "learning_rate": 5.273668876853361e-05, + "loss": 0.5263, + "step": 5451 + }, + { + "epoch": 1.5258886090120347, + "grad_norm": 0.22391293143001556, + "learning_rate": 5.272128608843494e-05, + "loss": 0.4808, + "step": 5452 + }, + { + "epoch": 1.526168485866219, + "grad_norm": 0.2337345910607096, + "learning_rate": 5.270588314932273e-05, + "loss": 0.5286, + "step": 5453 + }, + { + "epoch": 1.526448362720403, + "grad_norm": 0.23036909516695217, + "learning_rate": 5.2690479952663054e-05, + "loss": 0.4915, + "step": 5454 + }, + { + "epoch": 1.5267282395745871, + "grad_norm": 0.21697624969892057, + "learning_rate": 5.267507649992197e-05, + "loss": 0.4823, + "step": 5455 + }, + { + "epoch": 1.5270081164287714, + "grad_norm": 0.23912664018691718, + "learning_rate": 5.2659672792565615e-05, + "loss": 0.5188, + "step": 5456 + }, + { + "epoch": 1.5272879932829555, + "grad_norm": 0.23041920103790217, + "learning_rate": 5.2644268832060114e-05, + "loss": 0.4853, + "step": 5457 + }, + { + "epoch": 1.5275678701371396, + "grad_norm": 0.21799483053785548, + "learning_rate": 5.2628864619871635e-05, + "loss": 0.5059, + "step": 5458 + }, + { + "epoch": 1.5278477469913239, + "grad_norm": 0.21065053181165194, + "learning_rate": 5.261346015746633e-05, + "loss": 0.4882, + "step": 5459 + }, + { + "epoch": 1.528127623845508, + "grad_norm": 0.21809702296056538, + "learning_rate": 5.259805544631043e-05, + "loss": 0.4825, + "step": 5460 + }, + { + "epoch": 1.528407500699692, + "grad_norm": 0.21134778412815755, + "learning_rate": 5.258265048787018e-05, + "loss": 0.5025, + "step": 5461 + }, + { + "epoch": 1.5286873775538763, + "grad_norm": 0.2302931779222548, + "learning_rate": 5.25672452836118e-05, + "loss": 0.5098, + "step": 5462 + }, + { + "epoch": 1.5289672544080606, + "grad_norm": 0.23252348128385064, + "learning_rate": 5.255183983500157e-05, + "loss": 0.5031, + "step": 5463 + }, + { + "epoch": 1.5292471312622447, + "grad_norm": 0.21668314929212756, + "learning_rate": 5.2536434143505806e-05, + "loss": 0.4751, + "step": 5464 + }, + { + "epoch": 1.5295270081164287, + "grad_norm": 0.2207455463779076, + "learning_rate": 5.2521028210590806e-05, + "loss": 0.5001, + "step": 5465 + }, + { + "epoch": 1.529806884970613, + "grad_norm": 0.2293560980736755, + "learning_rate": 5.2505622037722945e-05, + "loss": 0.4932, + "step": 5466 + }, + { + "epoch": 1.530086761824797, + "grad_norm": 0.21491334500688608, + "learning_rate": 5.249021562636857e-05, + "loss": 0.4877, + "step": 5467 + }, + { + "epoch": 1.5303666386789812, + "grad_norm": 0.22356099799464063, + "learning_rate": 5.247480897799406e-05, + "loss": 0.4879, + "step": 5468 + }, + { + "epoch": 1.5306465155331654, + "grad_norm": 0.22047742493084346, + "learning_rate": 5.245940209406587e-05, + "loss": 0.4982, + "step": 5469 + }, + { + "epoch": 1.5309263923873497, + "grad_norm": 0.2243889938386529, + "learning_rate": 5.24439949760504e-05, + "loss": 0.4901, + "step": 5470 + }, + { + "epoch": 1.5312062692415336, + "grad_norm": 0.22565115953567208, + "learning_rate": 5.242858762541414e-05, + "loss": 0.4746, + "step": 5471 + }, + { + "epoch": 1.5314861460957179, + "grad_norm": 0.22541263446564486, + "learning_rate": 5.241318004362353e-05, + "loss": 0.4903, + "step": 5472 + }, + { + "epoch": 1.5317660229499022, + "grad_norm": 0.23603589753057705, + "learning_rate": 5.2397772232145105e-05, + "loss": 0.4955, + "step": 5473 + }, + { + "epoch": 1.5320458998040862, + "grad_norm": 0.2302453543942871, + "learning_rate": 5.238236419244537e-05, + "loss": 0.4914, + "step": 5474 + }, + { + "epoch": 1.5323257766582703, + "grad_norm": 0.24355197085429647, + "learning_rate": 5.236695592599088e-05, + "loss": 0.5246, + "step": 5475 + }, + { + "epoch": 1.5326056535124546, + "grad_norm": 0.22854580307770703, + "learning_rate": 5.235154743424818e-05, + "loss": 0.4868, + "step": 5476 + }, + { + "epoch": 1.5328855303666387, + "grad_norm": 0.2228751796284566, + "learning_rate": 5.23361387186839e-05, + "loss": 0.4765, + "step": 5477 + }, + { + "epoch": 1.5331654072208227, + "grad_norm": 0.2259556807149954, + "learning_rate": 5.2320729780764635e-05, + "loss": 0.4854, + "step": 5478 + }, + { + "epoch": 1.533445284075007, + "grad_norm": 0.22914257075987968, + "learning_rate": 5.2305320621957e-05, + "loss": 0.4947, + "step": 5479 + }, + { + "epoch": 1.5337251609291913, + "grad_norm": 0.22139845150999246, + "learning_rate": 5.2289911243727665e-05, + "loss": 0.4677, + "step": 5480 + }, + { + "epoch": 1.5340050377833752, + "grad_norm": 0.21705017782435324, + "learning_rate": 5.22745016475433e-05, + "loss": 0.4904, + "step": 5481 + }, + { + "epoch": 1.5342849146375594, + "grad_norm": 0.22065078716060177, + "learning_rate": 5.2259091834870575e-05, + "loss": 0.4838, + "step": 5482 + }, + { + "epoch": 1.5345647914917437, + "grad_norm": 0.21302127219688868, + "learning_rate": 5.2243681807176236e-05, + "loss": 0.5126, + "step": 5483 + }, + { + "epoch": 1.5348446683459278, + "grad_norm": 0.2245992878314648, + "learning_rate": 5.222827156592701e-05, + "loss": 0.4855, + "step": 5484 + }, + { + "epoch": 1.5351245452001119, + "grad_norm": 0.2237374298522733, + "learning_rate": 5.221286111258963e-05, + "loss": 0.4964, + "step": 5485 + }, + { + "epoch": 1.5354044220542962, + "grad_norm": 0.23570909450106375, + "learning_rate": 5.219745044863091e-05, + "loss": 0.5162, + "step": 5486 + }, + { + "epoch": 1.5356842989084802, + "grad_norm": 0.22737616594999852, + "learning_rate": 5.2182039575517616e-05, + "loss": 0.4874, + "step": 5487 + }, + { + "epoch": 1.5359641757626643, + "grad_norm": 0.223409371172766, + "learning_rate": 5.2166628494716585e-05, + "loss": 0.5021, + "step": 5488 + }, + { + "epoch": 1.5362440526168486, + "grad_norm": 0.22576016070750649, + "learning_rate": 5.215121720769465e-05, + "loss": 0.5042, + "step": 5489 + }, + { + "epoch": 1.536523929471033, + "grad_norm": 0.22072691297681046, + "learning_rate": 5.213580571591864e-05, + "loss": 0.4864, + "step": 5490 + }, + { + "epoch": 1.536803806325217, + "grad_norm": 0.2305464803155683, + "learning_rate": 5.2120394020855456e-05, + "loss": 0.5024, + "step": 5491 + }, + { + "epoch": 1.537083683179401, + "grad_norm": 0.22406606473296256, + "learning_rate": 5.2104982123971967e-05, + "loss": 0.5135, + "step": 5492 + }, + { + "epoch": 1.5373635600335853, + "grad_norm": 0.22949407945698136, + "learning_rate": 5.208957002673511e-05, + "loss": 0.5018, + "step": 5493 + }, + { + "epoch": 1.5376434368877694, + "grad_norm": 0.22522436493783196, + "learning_rate": 5.2074157730611805e-05, + "loss": 0.5089, + "step": 5494 + }, + { + "epoch": 1.5379233137419535, + "grad_norm": 0.21835002861091665, + "learning_rate": 5.2058745237069004e-05, + "loss": 0.5025, + "step": 5495 + }, + { + "epoch": 1.5382031905961377, + "grad_norm": 0.22598496581301414, + "learning_rate": 5.204333254757369e-05, + "loss": 0.4954, + "step": 5496 + }, + { + "epoch": 1.5384830674503218, + "grad_norm": 0.21953127951438886, + "learning_rate": 5.202791966359284e-05, + "loss": 0.4867, + "step": 5497 + }, + { + "epoch": 1.5387629443045059, + "grad_norm": 0.2321845195378221, + "learning_rate": 5.201250658659347e-05, + "loss": 0.4951, + "step": 5498 + }, + { + "epoch": 1.5390428211586902, + "grad_norm": 0.22580315013759258, + "learning_rate": 5.199709331804258e-05, + "loss": 0.482, + "step": 5499 + }, + { + "epoch": 1.5393226980128745, + "grad_norm": 0.2106715632460807, + "learning_rate": 5.198167985940723e-05, + "loss": 0.4826, + "step": 5500 + }, + { + "epoch": 1.5396025748670585, + "grad_norm": 0.22257357891325769, + "learning_rate": 5.196626621215449e-05, + "loss": 0.4784, + "step": 5501 + }, + { + "epoch": 1.5398824517212426, + "grad_norm": 0.22051229132963152, + "learning_rate": 5.195085237775141e-05, + "loss": 0.4776, + "step": 5502 + }, + { + "epoch": 1.540162328575427, + "grad_norm": 0.22602824434366847, + "learning_rate": 5.193543835766513e-05, + "loss": 0.508, + "step": 5503 + }, + { + "epoch": 1.540442205429611, + "grad_norm": 0.2326471364228337, + "learning_rate": 5.192002415336273e-05, + "loss": 0.5088, + "step": 5504 + }, + { + "epoch": 1.540722082283795, + "grad_norm": 0.2157668784817698, + "learning_rate": 5.1904609766311374e-05, + "loss": 0.4859, + "step": 5505 + }, + { + "epoch": 1.5410019591379793, + "grad_norm": 0.21311169923914217, + "learning_rate": 5.1889195197978194e-05, + "loss": 0.4836, + "step": 5506 + }, + { + "epoch": 1.5412818359921634, + "grad_norm": 0.2280535170130329, + "learning_rate": 5.1873780449830355e-05, + "loss": 0.5156, + "step": 5507 + }, + { + "epoch": 1.5415617128463475, + "grad_norm": 0.22595258533730095, + "learning_rate": 5.185836552333504e-05, + "loss": 0.4869, + "step": 5508 + }, + { + "epoch": 1.5418415897005318, + "grad_norm": 0.2311339774670807, + "learning_rate": 5.1842950419959445e-05, + "loss": 0.4919, + "step": 5509 + }, + { + "epoch": 1.542121466554716, + "grad_norm": 0.22176105493539253, + "learning_rate": 5.1827535141170814e-05, + "loss": 0.4886, + "step": 5510 + }, + { + "epoch": 1.5424013434089001, + "grad_norm": 0.20993472889014483, + "learning_rate": 5.1812119688436345e-05, + "loss": 0.4817, + "step": 5511 + }, + { + "epoch": 1.5426812202630842, + "grad_norm": 0.23066979961305653, + "learning_rate": 5.179670406322332e-05, + "loss": 0.5261, + "step": 5512 + }, + { + "epoch": 1.5429610971172685, + "grad_norm": 0.22212220602819874, + "learning_rate": 5.1781288266998994e-05, + "loss": 0.4723, + "step": 5513 + }, + { + "epoch": 1.5432409739714525, + "grad_norm": 0.22180880486084367, + "learning_rate": 5.176587230123067e-05, + "loss": 0.4986, + "step": 5514 + }, + { + "epoch": 1.5435208508256366, + "grad_norm": 0.22771426437422765, + "learning_rate": 5.175045616738561e-05, + "loss": 0.4985, + "step": 5515 + }, + { + "epoch": 1.543800727679821, + "grad_norm": 0.22737842481544127, + "learning_rate": 5.173503986693118e-05, + "loss": 0.4884, + "step": 5516 + }, + { + "epoch": 1.5440806045340052, + "grad_norm": 0.22372893906589034, + "learning_rate": 5.171962340133466e-05, + "loss": 0.4792, + "step": 5517 + }, + { + "epoch": 1.544360481388189, + "grad_norm": 0.22725234823286433, + "learning_rate": 5.170420677206343e-05, + "loss": 0.4921, + "step": 5518 + }, + { + "epoch": 1.5446403582423733, + "grad_norm": 0.2272673329241304, + "learning_rate": 5.168878998058485e-05, + "loss": 0.5053, + "step": 5519 + }, + { + "epoch": 1.5449202350965576, + "grad_norm": 0.2175128874622248, + "learning_rate": 5.167337302836628e-05, + "loss": 0.4734, + "step": 5520 + }, + { + "epoch": 1.5452001119507417, + "grad_norm": 0.22463279961009588, + "learning_rate": 5.165795591687513e-05, + "loss": 0.496, + "step": 5521 + }, + { + "epoch": 1.5454799888049258, + "grad_norm": 0.21911222931517413, + "learning_rate": 5.164253864757882e-05, + "loss": 0.4978, + "step": 5522 + }, + { + "epoch": 1.54575986565911, + "grad_norm": 0.2144687118973654, + "learning_rate": 5.162712122194475e-05, + "loss": 0.4864, + "step": 5523 + }, + { + "epoch": 1.5460397425132941, + "grad_norm": 0.2234349525817024, + "learning_rate": 5.161170364144038e-05, + "loss": 0.4913, + "step": 5524 + }, + { + "epoch": 1.5463196193674782, + "grad_norm": 0.23038865018825, + "learning_rate": 5.159628590753317e-05, + "loss": 0.479, + "step": 5525 + }, + { + "epoch": 1.5465994962216625, + "grad_norm": 0.22704121635477, + "learning_rate": 5.1580868021690554e-05, + "loss": 0.4737, + "step": 5526 + }, + { + "epoch": 1.5468793730758468, + "grad_norm": 0.22058861548784583, + "learning_rate": 5.1565449985380045e-05, + "loss": 0.4794, + "step": 5527 + }, + { + "epoch": 1.5471592499300306, + "grad_norm": 0.22361203755141656, + "learning_rate": 5.155003180006911e-05, + "loss": 0.505, + "step": 5528 + }, + { + "epoch": 1.547439126784215, + "grad_norm": 0.2246715982507394, + "learning_rate": 5.153461346722529e-05, + "loss": 0.4919, + "step": 5529 + }, + { + "epoch": 1.5477190036383992, + "grad_norm": 0.22097702675372954, + "learning_rate": 5.151919498831611e-05, + "loss": 0.4857, + "step": 5530 + }, + { + "epoch": 1.5479988804925833, + "grad_norm": 0.22288398323463426, + "learning_rate": 5.1503776364809095e-05, + "loss": 0.5018, + "step": 5531 + }, + { + "epoch": 1.5482787573467673, + "grad_norm": 0.22599295507683007, + "learning_rate": 5.1488357598171796e-05, + "loss": 0.4731, + "step": 5532 + }, + { + "epoch": 1.5485586342009516, + "grad_norm": 0.22634596383355085, + "learning_rate": 5.147293868987181e-05, + "loss": 0.4803, + "step": 5533 + }, + { + "epoch": 1.5488385110551357, + "grad_norm": 0.23343780091190894, + "learning_rate": 5.145751964137669e-05, + "loss": 0.4933, + "step": 5534 + }, + { + "epoch": 1.5491183879093198, + "grad_norm": 0.23179343716192186, + "learning_rate": 5.144210045415402e-05, + "loss": 0.5037, + "step": 5535 + }, + { + "epoch": 1.549398264763504, + "grad_norm": 0.22337254319140473, + "learning_rate": 5.142668112967143e-05, + "loss": 0.4718, + "step": 5536 + }, + { + "epoch": 1.5496781416176884, + "grad_norm": 0.22856473071054442, + "learning_rate": 5.141126166939652e-05, + "loss": 0.4972, + "step": 5537 + }, + { + "epoch": 1.5499580184718724, + "grad_norm": 0.22682415196387343, + "learning_rate": 5.139584207479694e-05, + "loss": 0.4788, + "step": 5538 + }, + { + "epoch": 1.5502378953260565, + "grad_norm": 0.2342114551926887, + "learning_rate": 5.138042234734034e-05, + "loss": 0.4926, + "step": 5539 + }, + { + "epoch": 1.5505177721802408, + "grad_norm": 0.22073298235366773, + "learning_rate": 5.136500248849436e-05, + "loss": 0.4827, + "step": 5540 + }, + { + "epoch": 1.5507976490344249, + "grad_norm": 0.2308918394389832, + "learning_rate": 5.1349582499726675e-05, + "loss": 0.4794, + "step": 5541 + }, + { + "epoch": 1.551077525888609, + "grad_norm": 0.2373222903561083, + "learning_rate": 5.133416238250499e-05, + "loss": 0.5264, + "step": 5542 + }, + { + "epoch": 1.5513574027427932, + "grad_norm": 0.23196388741003965, + "learning_rate": 5.131874213829698e-05, + "loss": 0.5137, + "step": 5543 + }, + { + "epoch": 1.5516372795969773, + "grad_norm": 0.23835911241143926, + "learning_rate": 5.1303321768570345e-05, + "loss": 0.544, + "step": 5544 + }, + { + "epoch": 1.5519171564511614, + "grad_norm": 0.22224209128980993, + "learning_rate": 5.128790127479281e-05, + "loss": 0.4761, + "step": 5545 + }, + { + "epoch": 1.5521970333053456, + "grad_norm": 0.2266650114888923, + "learning_rate": 5.127248065843211e-05, + "loss": 0.4986, + "step": 5546 + }, + { + "epoch": 1.55247691015953, + "grad_norm": 0.21559488524631035, + "learning_rate": 5.1257059920955995e-05, + "loss": 0.4744, + "step": 5547 + }, + { + "epoch": 1.552756787013714, + "grad_norm": 0.20869256256699395, + "learning_rate": 5.124163906383223e-05, + "loss": 0.4897, + "step": 5548 + }, + { + "epoch": 1.553036663867898, + "grad_norm": 0.22554354805913607, + "learning_rate": 5.122621808852853e-05, + "loss": 0.5028, + "step": 5549 + }, + { + "epoch": 1.5533165407220824, + "grad_norm": 0.22404022736807438, + "learning_rate": 5.121079699651273e-05, + "loss": 0.476, + "step": 5550 + }, + { + "epoch": 1.5535964175762664, + "grad_norm": 0.21840184697867143, + "learning_rate": 5.119537578925259e-05, + "loss": 0.4786, + "step": 5551 + }, + { + "epoch": 1.5538762944304505, + "grad_norm": 0.21642551836098425, + "learning_rate": 5.1179954468215915e-05, + "loss": 0.4923, + "step": 5552 + }, + { + "epoch": 1.5541561712846348, + "grad_norm": 0.22562525130432082, + "learning_rate": 5.116453303487052e-05, + "loss": 0.4876, + "step": 5553 + }, + { + "epoch": 1.554436048138819, + "grad_norm": 0.22369043937979324, + "learning_rate": 5.11491114906842e-05, + "loss": 0.4734, + "step": 5554 + }, + { + "epoch": 1.554715924993003, + "grad_norm": 0.21331217522552084, + "learning_rate": 5.113368983712481e-05, + "loss": 0.5092, + "step": 5555 + }, + { + "epoch": 1.5549958018471872, + "grad_norm": 0.2285230341412732, + "learning_rate": 5.111826807566019e-05, + "loss": 0.5007, + "step": 5556 + }, + { + "epoch": 1.5552756787013715, + "grad_norm": 0.22319739454818105, + "learning_rate": 5.1102846207758195e-05, + "loss": 0.4733, + "step": 5557 + }, + { + "epoch": 1.5555555555555556, + "grad_norm": 0.23825240682033183, + "learning_rate": 5.108742423488667e-05, + "loss": 0.5241, + "step": 5558 + }, + { + "epoch": 1.5558354324097396, + "grad_norm": 0.2300284898939921, + "learning_rate": 5.10720021585135e-05, + "loss": 0.4836, + "step": 5559 + }, + { + "epoch": 1.556115309263924, + "grad_norm": 0.22652834007336548, + "learning_rate": 5.1056579980106564e-05, + "loss": 0.5129, + "step": 5560 + }, + { + "epoch": 1.556395186118108, + "grad_norm": 0.23259689587392177, + "learning_rate": 5.104115770113377e-05, + "loss": 0.5042, + "step": 5561 + }, + { + "epoch": 1.556675062972292, + "grad_norm": 0.22243522953532718, + "learning_rate": 5.1025735323063e-05, + "loss": 0.4757, + "step": 5562 + }, + { + "epoch": 1.5569549398264764, + "grad_norm": 0.23530880200349663, + "learning_rate": 5.101031284736214e-05, + "loss": 0.5087, + "step": 5563 + }, + { + "epoch": 1.5572348166806607, + "grad_norm": 0.22853810652850762, + "learning_rate": 5.0994890275499155e-05, + "loss": 0.4821, + "step": 5564 + }, + { + "epoch": 1.5575146935348445, + "grad_norm": 0.22454368425628163, + "learning_rate": 5.097946760894195e-05, + "loss": 0.4842, + "step": 5565 + }, + { + "epoch": 1.5577945703890288, + "grad_norm": 0.22903907096900009, + "learning_rate": 5.096404484915849e-05, + "loss": 0.5012, + "step": 5566 + }, + { + "epoch": 1.558074447243213, + "grad_norm": 0.22171177976385112, + "learning_rate": 5.094862199761669e-05, + "loss": 0.4762, + "step": 5567 + }, + { + "epoch": 1.5583543240973972, + "grad_norm": 0.22613511012567147, + "learning_rate": 5.0933199055784505e-05, + "loss": 0.4895, + "step": 5568 + }, + { + "epoch": 1.5586342009515812, + "grad_norm": 0.23439319379854245, + "learning_rate": 5.0917776025129926e-05, + "loss": 0.5054, + "step": 5569 + }, + { + "epoch": 1.5589140778057655, + "grad_norm": 0.2342005317269749, + "learning_rate": 5.090235290712092e-05, + "loss": 0.4969, + "step": 5570 + }, + { + "epoch": 1.5591939546599496, + "grad_norm": 0.24274956040005172, + "learning_rate": 5.088692970322545e-05, + "loss": 0.4997, + "step": 5571 + }, + { + "epoch": 1.5594738315141337, + "grad_norm": 0.2242674755747388, + "learning_rate": 5.08715064149115e-05, + "loss": 0.5054, + "step": 5572 + }, + { + "epoch": 1.559753708368318, + "grad_norm": 0.23541270133664366, + "learning_rate": 5.085608304364708e-05, + "loss": 0.4977, + "step": 5573 + }, + { + "epoch": 1.5600335852225022, + "grad_norm": 0.2312343919974206, + "learning_rate": 5.084065959090022e-05, + "loss": 0.4944, + "step": 5574 + }, + { + "epoch": 1.5603134620766863, + "grad_norm": 0.2248216189596048, + "learning_rate": 5.0825236058138906e-05, + "loss": 0.4895, + "step": 5575 + }, + { + "epoch": 1.5605933389308704, + "grad_norm": 0.2359319249211282, + "learning_rate": 5.080981244683115e-05, + "loss": 0.484, + "step": 5576 + }, + { + "epoch": 1.5608732157850547, + "grad_norm": 0.2314013255489555, + "learning_rate": 5.0794388758445e-05, + "loss": 0.4982, + "step": 5577 + }, + { + "epoch": 1.5611530926392387, + "grad_norm": 0.22130249344904962, + "learning_rate": 5.077896499444847e-05, + "loss": 0.4944, + "step": 5578 + }, + { + "epoch": 1.5614329694934228, + "grad_norm": 0.22318504258286237, + "learning_rate": 5.0763541156309646e-05, + "loss": 0.4903, + "step": 5579 + }, + { + "epoch": 1.561712846347607, + "grad_norm": 0.2253565705016098, + "learning_rate": 5.074811724549652e-05, + "loss": 0.4949, + "step": 5580 + }, + { + "epoch": 1.5619927232017912, + "grad_norm": 0.22285492366295562, + "learning_rate": 5.0732693263477185e-05, + "loss": 0.4894, + "step": 5581 + }, + { + "epoch": 1.5622726000559752, + "grad_norm": 0.2297401192687475, + "learning_rate": 5.0717269211719685e-05, + "loss": 0.4832, + "step": 5582 + }, + { + "epoch": 1.5625524769101595, + "grad_norm": 0.25651403582689786, + "learning_rate": 5.0701845091692116e-05, + "loss": 0.466, + "step": 5583 + }, + { + "epoch": 1.5628323537643438, + "grad_norm": 0.23471078254796965, + "learning_rate": 5.0686420904862534e-05, + "loss": 0.4863, + "step": 5584 + }, + { + "epoch": 1.5631122306185279, + "grad_norm": 0.22602221563616173, + "learning_rate": 5.0670996652699024e-05, + "loss": 0.4859, + "step": 5585 + }, + { + "epoch": 1.563392107472712, + "grad_norm": 0.2170110679437644, + "learning_rate": 5.065557233666968e-05, + "loss": 0.4755, + "step": 5586 + }, + { + "epoch": 1.5636719843268962, + "grad_norm": 0.22292328836669514, + "learning_rate": 5.064014795824258e-05, + "loss": 0.4978, + "step": 5587 + }, + { + "epoch": 1.5639518611810803, + "grad_norm": 0.22283976419155824, + "learning_rate": 5.0624723518885864e-05, + "loss": 0.503, + "step": 5588 + }, + { + "epoch": 1.5642317380352644, + "grad_norm": 0.22697066784965056, + "learning_rate": 5.0609299020067594e-05, + "loss": 0.4915, + "step": 5589 + }, + { + "epoch": 1.5645116148894487, + "grad_norm": 0.2237210696005833, + "learning_rate": 5.05938744632559e-05, + "loss": 0.488, + "step": 5590 + }, + { + "epoch": 1.564791491743633, + "grad_norm": 0.23033565315694643, + "learning_rate": 5.057844984991891e-05, + "loss": 0.4818, + "step": 5591 + }, + { + "epoch": 1.5650713685978168, + "grad_norm": 0.22594536289051512, + "learning_rate": 5.0563025181524736e-05, + "loss": 0.4924, + "step": 5592 + }, + { + "epoch": 1.565351245452001, + "grad_norm": 0.22902664035682394, + "learning_rate": 5.054760045954152e-05, + "loss": 0.4825, + "step": 5593 + }, + { + "epoch": 1.5656311223061854, + "grad_norm": 0.2284375583849929, + "learning_rate": 5.053217568543738e-05, + "loss": 0.4993, + "step": 5594 + }, + { + "epoch": 1.5659109991603695, + "grad_norm": 0.21817010761159086, + "learning_rate": 5.051675086068045e-05, + "loss": 0.4869, + "step": 5595 + }, + { + "epoch": 1.5661908760145535, + "grad_norm": 0.21837069183033944, + "learning_rate": 5.05013259867389e-05, + "loss": 0.4762, + "step": 5596 + }, + { + "epoch": 1.5664707528687378, + "grad_norm": 0.2292995169660129, + "learning_rate": 5.048590106508086e-05, + "loss": 0.471, + "step": 5597 + }, + { + "epoch": 1.566750629722922, + "grad_norm": 0.23333384974605587, + "learning_rate": 5.047047609717448e-05, + "loss": 0.5175, + "step": 5598 + }, + { + "epoch": 1.567030506577106, + "grad_norm": 0.22607909993090125, + "learning_rate": 5.0455051084487915e-05, + "loss": 0.4879, + "step": 5599 + }, + { + "epoch": 1.5673103834312903, + "grad_norm": 0.2188263315194909, + "learning_rate": 5.043962602848934e-05, + "loss": 0.4783, + "step": 5600 + }, + { + "epoch": 1.5675902602854745, + "grad_norm": 0.22301232445952954, + "learning_rate": 5.0424200930646914e-05, + "loss": 0.503, + "step": 5601 + }, + { + "epoch": 1.5678701371396584, + "grad_norm": 0.23886610604380776, + "learning_rate": 5.040877579242881e-05, + "loss": 0.4975, + "step": 5602 + }, + { + "epoch": 1.5681500139938427, + "grad_norm": 0.2350143648331525, + "learning_rate": 5.039335061530319e-05, + "loss": 0.5022, + "step": 5603 + }, + { + "epoch": 1.568429890848027, + "grad_norm": 0.22150415708990884, + "learning_rate": 5.037792540073823e-05, + "loss": 0.4857, + "step": 5604 + }, + { + "epoch": 1.568709767702211, + "grad_norm": 0.2303358839947542, + "learning_rate": 5.036250015020214e-05, + "loss": 0.5091, + "step": 5605 + }, + { + "epoch": 1.5689896445563951, + "grad_norm": 0.2217424316390478, + "learning_rate": 5.034707486516307e-05, + "loss": 0.4799, + "step": 5606 + }, + { + "epoch": 1.5692695214105794, + "grad_norm": 0.23553254094918738, + "learning_rate": 5.033164954708922e-05, + "loss": 0.509, + "step": 5607 + }, + { + "epoch": 1.5695493982647635, + "grad_norm": 0.2235691534728348, + "learning_rate": 5.031622419744879e-05, + "loss": 0.4867, + "step": 5608 + }, + { + "epoch": 1.5698292751189475, + "grad_norm": 0.22532176345221014, + "learning_rate": 5.030079881770996e-05, + "loss": 0.4909, + "step": 5609 + }, + { + "epoch": 1.5701091519731318, + "grad_norm": 0.2236565549519103, + "learning_rate": 5.028537340934092e-05, + "loss": 0.4938, + "step": 5610 + }, + { + "epoch": 1.5703890288273161, + "grad_norm": 0.22500947527017523, + "learning_rate": 5.0269947973809886e-05, + "loss": 0.4907, + "step": 5611 + }, + { + "epoch": 1.5706689056815002, + "grad_norm": 0.2323955181170479, + "learning_rate": 5.0254522512585056e-05, + "loss": 0.4987, + "step": 5612 + }, + { + "epoch": 1.5709487825356843, + "grad_norm": 0.22234777205149137, + "learning_rate": 5.023909702713463e-05, + "loss": 0.4779, + "step": 5613 + }, + { + "epoch": 1.5712286593898686, + "grad_norm": 0.21985912973673657, + "learning_rate": 5.0223671518926806e-05, + "loss": 0.4798, + "step": 5614 + }, + { + "epoch": 1.5715085362440526, + "grad_norm": 0.23034250758950078, + "learning_rate": 5.020824598942981e-05, + "loss": 0.491, + "step": 5615 + }, + { + "epoch": 1.5717884130982367, + "grad_norm": 0.2228880479318296, + "learning_rate": 5.019282044011184e-05, + "loss": 0.5168, + "step": 5616 + }, + { + "epoch": 1.572068289952421, + "grad_norm": 0.22690103597598169, + "learning_rate": 5.017739487244112e-05, + "loss": 0.4984, + "step": 5617 + }, + { + "epoch": 1.572348166806605, + "grad_norm": 0.21771862196970726, + "learning_rate": 5.016196928788586e-05, + "loss": 0.4724, + "step": 5618 + }, + { + "epoch": 1.5726280436607891, + "grad_norm": 0.2314285834647458, + "learning_rate": 5.014654368791426e-05, + "loss": 0.4915, + "step": 5619 + }, + { + "epoch": 1.5729079205149734, + "grad_norm": 0.2325948459261361, + "learning_rate": 5.0131118073994556e-05, + "loss": 0.4928, + "step": 5620 + }, + { + "epoch": 1.5731877973691577, + "grad_norm": 0.222898035682407, + "learning_rate": 5.0115692447594954e-05, + "loss": 0.4787, + "step": 5621 + }, + { + "epoch": 1.5734676742233418, + "grad_norm": 0.2275370877876276, + "learning_rate": 5.010026681018368e-05, + "loss": 0.4814, + "step": 5622 + }, + { + "epoch": 1.5737475510775258, + "grad_norm": 0.24306375118915882, + "learning_rate": 5.0084841163228966e-05, + "loss": 0.506, + "step": 5623 + }, + { + "epoch": 1.5740274279317101, + "grad_norm": 0.2367990653165563, + "learning_rate": 5.006941550819901e-05, + "loss": 0.493, + "step": 5624 + }, + { + "epoch": 1.5743073047858942, + "grad_norm": 0.2255838024038827, + "learning_rate": 5.005398984656205e-05, + "loss": 0.492, + "step": 5625 + }, + { + "epoch": 1.5745871816400783, + "grad_norm": 0.2290702472706961, + "learning_rate": 5.003856417978632e-05, + "loss": 0.4938, + "step": 5626 + }, + { + "epoch": 1.5748670584942626, + "grad_norm": 0.239322202470668, + "learning_rate": 5.0023138509340016e-05, + "loss": 0.5062, + "step": 5627 + }, + { + "epoch": 1.5751469353484466, + "grad_norm": 0.2258696469173803, + "learning_rate": 5.000771283669138e-05, + "loss": 0.4786, + "step": 5628 + }, + { + "epoch": 1.5754268122026307, + "grad_norm": 0.21587624527192376, + "learning_rate": 4.999228716330864e-05, + "loss": 0.4804, + "step": 5629 + }, + { + "epoch": 1.575706689056815, + "grad_norm": 0.22861948012999467, + "learning_rate": 4.997686149066e-05, + "loss": 0.5041, + "step": 5630 + }, + { + "epoch": 1.5759865659109993, + "grad_norm": 0.22952288917208172, + "learning_rate": 4.9961435820213704e-05, + "loss": 0.4678, + "step": 5631 + }, + { + "epoch": 1.5762664427651834, + "grad_norm": 0.22081528456431063, + "learning_rate": 4.994601015343796e-05, + "loss": 0.5185, + "step": 5632 + }, + { + "epoch": 1.5765463196193674, + "grad_norm": 0.23351140051001967, + "learning_rate": 4.9930584491801e-05, + "loss": 0.5161, + "step": 5633 + }, + { + "epoch": 1.5768261964735517, + "grad_norm": 0.2243394134223082, + "learning_rate": 4.9915158836771046e-05, + "loss": 0.4846, + "step": 5634 + }, + { + "epoch": 1.5771060733277358, + "grad_norm": 0.2217145560085866, + "learning_rate": 4.9899733189816326e-05, + "loss": 0.4935, + "step": 5635 + }, + { + "epoch": 1.5773859501819198, + "grad_norm": 0.22724535119791522, + "learning_rate": 4.988430755240506e-05, + "loss": 0.4795, + "step": 5636 + }, + { + "epoch": 1.5776658270361041, + "grad_norm": 0.21932074450067263, + "learning_rate": 4.986888192600546e-05, + "loss": 0.498, + "step": 5637 + }, + { + "epoch": 1.5779457038902884, + "grad_norm": 0.21841368179356946, + "learning_rate": 4.985345631208576e-05, + "loss": 0.4711, + "step": 5638 + }, + { + "epoch": 1.5782255807444723, + "grad_norm": 0.230609290062492, + "learning_rate": 4.983803071211416e-05, + "loss": 0.4965, + "step": 5639 + }, + { + "epoch": 1.5785054575986566, + "grad_norm": 0.2415922466704433, + "learning_rate": 4.982260512755889e-05, + "loss": 0.4885, + "step": 5640 + }, + { + "epoch": 1.5787853344528409, + "grad_norm": 0.2354073525679611, + "learning_rate": 4.980717955988817e-05, + "loss": 0.5004, + "step": 5641 + }, + { + "epoch": 1.579065211307025, + "grad_norm": 0.2303289745490306, + "learning_rate": 4.97917540105702e-05, + "loss": 0.5003, + "step": 5642 + }, + { + "epoch": 1.579345088161209, + "grad_norm": 0.2230092275624761, + "learning_rate": 4.977632848107319e-05, + "loss": 0.4791, + "step": 5643 + }, + { + "epoch": 1.5796249650153933, + "grad_norm": 0.23070455125285247, + "learning_rate": 4.9760902972865376e-05, + "loss": 0.496, + "step": 5644 + }, + { + "epoch": 1.5799048418695774, + "grad_norm": 0.23893053439705275, + "learning_rate": 4.974547748741494e-05, + "loss": 0.4724, + "step": 5645 + }, + { + "epoch": 1.5801847187237614, + "grad_norm": 0.22555313781105923, + "learning_rate": 4.973005202619011e-05, + "loss": 0.4678, + "step": 5646 + }, + { + "epoch": 1.5804645955779457, + "grad_norm": 0.21855551936057607, + "learning_rate": 4.9714626590659104e-05, + "loss": 0.4702, + "step": 5647 + }, + { + "epoch": 1.58074447243213, + "grad_norm": 0.23351187271333862, + "learning_rate": 4.9699201182290065e-05, + "loss": 0.4795, + "step": 5648 + }, + { + "epoch": 1.5810243492863139, + "grad_norm": 0.22358200316100899, + "learning_rate": 4.968377580255123e-05, + "loss": 0.4816, + "step": 5649 + }, + { + "epoch": 1.5813042261404981, + "grad_norm": 0.23157501059580052, + "learning_rate": 4.966835045291079e-05, + "loss": 0.4999, + "step": 5650 + }, + { + "epoch": 1.5815841029946824, + "grad_norm": 0.2144969406219859, + "learning_rate": 4.965292513483694e-05, + "loss": 0.4652, + "step": 5651 + }, + { + "epoch": 1.5818639798488665, + "grad_norm": 0.23900376296684603, + "learning_rate": 4.963749984979787e-05, + "loss": 0.5058, + "step": 5652 + }, + { + "epoch": 1.5821438567030506, + "grad_norm": 0.23008662543329655, + "learning_rate": 4.962207459926177e-05, + "loss": 0.4895, + "step": 5653 + }, + { + "epoch": 1.5824237335572349, + "grad_norm": 0.22020178253816597, + "learning_rate": 4.9606649384696826e-05, + "loss": 0.4952, + "step": 5654 + }, + { + "epoch": 1.582703610411419, + "grad_norm": 0.22277943987091794, + "learning_rate": 4.959122420757121e-05, + "loss": 0.4903, + "step": 5655 + }, + { + "epoch": 1.582983487265603, + "grad_norm": 0.23096417077931158, + "learning_rate": 4.95757990693531e-05, + "loss": 0.5208, + "step": 5656 + }, + { + "epoch": 1.5832633641197873, + "grad_norm": 0.22222705436929513, + "learning_rate": 4.956037397151067e-05, + "loss": 0.48, + "step": 5657 + }, + { + "epoch": 1.5835432409739716, + "grad_norm": 0.23613640831850014, + "learning_rate": 4.95449489155121e-05, + "loss": 0.4863, + "step": 5658 + }, + { + "epoch": 1.5838231178281557, + "grad_norm": 0.23204011606346303, + "learning_rate": 4.9529523902825534e-05, + "loss": 0.4981, + "step": 5659 + }, + { + "epoch": 1.5841029946823397, + "grad_norm": 0.22282931745119186, + "learning_rate": 4.951409893491914e-05, + "loss": 0.4811, + "step": 5660 + }, + { + "epoch": 1.584382871536524, + "grad_norm": 0.21492766007267308, + "learning_rate": 4.94986740132611e-05, + "loss": 0.4848, + "step": 5661 + }, + { + "epoch": 1.584662748390708, + "grad_norm": 0.2311786884387169, + "learning_rate": 4.948324913931954e-05, + "loss": 0.4805, + "step": 5662 + }, + { + "epoch": 1.5849426252448922, + "grad_norm": 0.21940614258724447, + "learning_rate": 4.946782431456262e-05, + "loss": 0.4871, + "step": 5663 + }, + { + "epoch": 1.5852225020990764, + "grad_norm": 0.23041075544554102, + "learning_rate": 4.9452399540458484e-05, + "loss": 0.482, + "step": 5664 + }, + { + "epoch": 1.5855023789532605, + "grad_norm": 0.2210787333418233, + "learning_rate": 4.943697481847528e-05, + "loss": 0.4974, + "step": 5665 + }, + { + "epoch": 1.5857822558074446, + "grad_norm": 0.2230033247855133, + "learning_rate": 4.942155015008111e-05, + "loss": 0.4939, + "step": 5666 + }, + { + "epoch": 1.5860621326616289, + "grad_norm": 0.22711898224752045, + "learning_rate": 4.940612553674411e-05, + "loss": 0.4866, + "step": 5667 + }, + { + "epoch": 1.5863420095158132, + "grad_norm": 0.22210416401114763, + "learning_rate": 4.9390700979932425e-05, + "loss": 0.4793, + "step": 5668 + }, + { + "epoch": 1.5866218863699972, + "grad_norm": 0.23036242768367657, + "learning_rate": 4.937527648111416e-05, + "loss": 0.4666, + "step": 5669 + }, + { + "epoch": 1.5869017632241813, + "grad_norm": 0.23307610569854123, + "learning_rate": 4.935985204175743e-05, + "loss": 0.5148, + "step": 5670 + }, + { + "epoch": 1.5871816400783656, + "grad_norm": 0.22876382302810352, + "learning_rate": 4.934442766333034e-05, + "loss": 0.5002, + "step": 5671 + }, + { + "epoch": 1.5874615169325497, + "grad_norm": 0.2300745790895094, + "learning_rate": 4.932900334730099e-05, + "loss": 0.4718, + "step": 5672 + }, + { + "epoch": 1.5877413937867337, + "grad_norm": 0.23143121589506777, + "learning_rate": 4.931357909513748e-05, + "loss": 0.4986, + "step": 5673 + }, + { + "epoch": 1.588021270640918, + "grad_norm": 0.2426854556143326, + "learning_rate": 4.9298154908307896e-05, + "loss": 0.4778, + "step": 5674 + }, + { + "epoch": 1.5883011474951023, + "grad_norm": 0.22520928699315454, + "learning_rate": 4.928273078828032e-05, + "loss": 0.4875, + "step": 5675 + }, + { + "epoch": 1.5885810243492862, + "grad_norm": 0.23469119149336215, + "learning_rate": 4.926730673652283e-05, + "loss": 0.4993, + "step": 5676 + }, + { + "epoch": 1.5888609012034705, + "grad_norm": 0.23477085799090816, + "learning_rate": 4.9251882754503494e-05, + "loss": 0.4927, + "step": 5677 + }, + { + "epoch": 1.5891407780576547, + "grad_norm": 0.2277194693858088, + "learning_rate": 4.9236458843690366e-05, + "loss": 0.4788, + "step": 5678 + }, + { + "epoch": 1.5894206549118388, + "grad_norm": 0.2315005439338584, + "learning_rate": 4.922103500555152e-05, + "loss": 0.4772, + "step": 5679 + }, + { + "epoch": 1.5897005317660229, + "grad_norm": 0.2148963207070413, + "learning_rate": 4.9205611241555005e-05, + "loss": 0.4727, + "step": 5680 + }, + { + "epoch": 1.5899804086202072, + "grad_norm": 0.21648755482076593, + "learning_rate": 4.9190187553168845e-05, + "loss": 0.5016, + "step": 5681 + }, + { + "epoch": 1.5902602854743912, + "grad_norm": 0.2650990713965363, + "learning_rate": 4.91747639418611e-05, + "loss": 0.5108, + "step": 5682 + }, + { + "epoch": 1.5905401623285753, + "grad_norm": 0.22934040499718267, + "learning_rate": 4.91593404090998e-05, + "loss": 0.5056, + "step": 5683 + }, + { + "epoch": 1.5908200391827596, + "grad_norm": 0.23879577773825494, + "learning_rate": 4.914391695635292e-05, + "loss": 0.5016, + "step": 5684 + }, + { + "epoch": 1.591099916036944, + "grad_norm": 0.22643280906868846, + "learning_rate": 4.912849358508851e-05, + "loss": 0.481, + "step": 5685 + }, + { + "epoch": 1.5913797928911277, + "grad_norm": 0.21949304829353772, + "learning_rate": 4.9113070296774575e-05, + "loss": 0.5085, + "step": 5686 + }, + { + "epoch": 1.591659669745312, + "grad_norm": 0.23354786840345054, + "learning_rate": 4.90976470928791e-05, + "loss": 0.4891, + "step": 5687 + }, + { + "epoch": 1.5919395465994963, + "grad_norm": 0.22745535886127166, + "learning_rate": 4.9082223974870086e-05, + "loss": 0.4946, + "step": 5688 + }, + { + "epoch": 1.5922194234536804, + "grad_norm": 0.20991743426911497, + "learning_rate": 4.90668009442155e-05, + "loss": 0.4869, + "step": 5689 + }, + { + "epoch": 1.5924993003078645, + "grad_norm": 0.21985904149845484, + "learning_rate": 4.905137800238333e-05, + "loss": 0.49, + "step": 5690 + }, + { + "epoch": 1.5927791771620488, + "grad_norm": 0.22658574255004135, + "learning_rate": 4.903595515084153e-05, + "loss": 0.494, + "step": 5691 + }, + { + "epoch": 1.5930590540162328, + "grad_norm": 0.2203899452351098, + "learning_rate": 4.902053239105805e-05, + "loss": 0.4939, + "step": 5692 + }, + { + "epoch": 1.593338930870417, + "grad_norm": 0.23498014264086428, + "learning_rate": 4.900510972450086e-05, + "loss": 0.5185, + "step": 5693 + }, + { + "epoch": 1.5936188077246012, + "grad_norm": 0.23073398037069082, + "learning_rate": 4.898968715263787e-05, + "loss": 0.4849, + "step": 5694 + }, + { + "epoch": 1.5938986845787855, + "grad_norm": 0.24388999910783205, + "learning_rate": 4.897426467693702e-05, + "loss": 0.4741, + "step": 5695 + }, + { + "epoch": 1.5941785614329695, + "grad_norm": 0.22466535931596224, + "learning_rate": 4.895884229886624e-05, + "loss": 0.493, + "step": 5696 + }, + { + "epoch": 1.5944584382871536, + "grad_norm": 0.2319889488478196, + "learning_rate": 4.8943420019893434e-05, + "loss": 0.5071, + "step": 5697 + }, + { + "epoch": 1.594738315141338, + "grad_norm": 0.2197193456644569, + "learning_rate": 4.89279978414865e-05, + "loss": 0.4796, + "step": 5698 + }, + { + "epoch": 1.595018191995522, + "grad_norm": 0.24180193937634586, + "learning_rate": 4.8912575765113336e-05, + "loss": 0.4856, + "step": 5699 + }, + { + "epoch": 1.595298068849706, + "grad_norm": 0.22368090012983122, + "learning_rate": 4.889715379224181e-05, + "loss": 0.5004, + "step": 5700 + }, + { + "epoch": 1.5955779457038903, + "grad_norm": 0.22759770021300402, + "learning_rate": 4.888173192433982e-05, + "loss": 0.4882, + "step": 5701 + }, + { + "epoch": 1.5958578225580744, + "grad_norm": 0.22742258388402647, + "learning_rate": 4.8866310162875204e-05, + "loss": 0.4998, + "step": 5702 + }, + { + "epoch": 1.5961376994122585, + "grad_norm": 0.22141383827545275, + "learning_rate": 4.885088850931582e-05, + "loss": 0.4694, + "step": 5703 + }, + { + "epoch": 1.5964175762664428, + "grad_norm": 0.2248426734194824, + "learning_rate": 4.8835466965129503e-05, + "loss": 0.5102, + "step": 5704 + }, + { + "epoch": 1.596697453120627, + "grad_norm": 0.22648678235381703, + "learning_rate": 4.8820045531784096e-05, + "loss": 0.4878, + "step": 5705 + }, + { + "epoch": 1.5969773299748111, + "grad_norm": 0.23415672028477244, + "learning_rate": 4.8804624210747424e-05, + "loss": 0.4917, + "step": 5706 + }, + { + "epoch": 1.5972572068289952, + "grad_norm": 0.21833155102939192, + "learning_rate": 4.8789203003487274e-05, + "loss": 0.4946, + "step": 5707 + }, + { + "epoch": 1.5975370836831795, + "grad_norm": 0.22172592701075167, + "learning_rate": 4.877378191147147e-05, + "loss": 0.4761, + "step": 5708 + }, + { + "epoch": 1.5978169605373636, + "grad_norm": 0.22807897163810353, + "learning_rate": 4.875836093616779e-05, + "loss": 0.4774, + "step": 5709 + }, + { + "epoch": 1.5980968373915476, + "grad_norm": 0.23259623595043447, + "learning_rate": 4.874294007904401e-05, + "loss": 0.5043, + "step": 5710 + }, + { + "epoch": 1.598376714245732, + "grad_norm": 0.2324499737480055, + "learning_rate": 4.8727519341567895e-05, + "loss": 0.4906, + "step": 5711 + }, + { + "epoch": 1.5986565910999162, + "grad_norm": 0.2400088947626389, + "learning_rate": 4.871209872520719e-05, + "loss": 0.4934, + "step": 5712 + }, + { + "epoch": 1.5989364679541, + "grad_norm": 0.22025664699223768, + "learning_rate": 4.8696678231429666e-05, + "loss": 0.4722, + "step": 5713 + }, + { + "epoch": 1.5992163448082843, + "grad_norm": 0.23673944026346538, + "learning_rate": 4.868125786170303e-05, + "loss": 0.4969, + "step": 5714 + }, + { + "epoch": 1.5994962216624686, + "grad_norm": 0.23613047153648795, + "learning_rate": 4.866583761749501e-05, + "loss": 0.4936, + "step": 5715 + }, + { + "epoch": 1.5997760985166527, + "grad_norm": 0.21196338207099613, + "learning_rate": 4.8650417500273316e-05, + "loss": 0.4937, + "step": 5716 + }, + { + "epoch": 1.6000559753708368, + "grad_norm": 0.22503696481116564, + "learning_rate": 4.8634997511505636e-05, + "loss": 0.491, + "step": 5717 + }, + { + "epoch": 1.600335852225021, + "grad_norm": 0.22896987780471204, + "learning_rate": 4.861957765265966e-05, + "loss": 0.4764, + "step": 5718 + }, + { + "epoch": 1.6006157290792051, + "grad_norm": 0.22550125568022933, + "learning_rate": 4.8604157925203064e-05, + "loss": 0.4887, + "step": 5719 + }, + { + "epoch": 1.6008956059333892, + "grad_norm": 0.22001401426355968, + "learning_rate": 4.858873833060349e-05, + "loss": 0.513, + "step": 5720 + }, + { + "epoch": 1.6011754827875735, + "grad_norm": 0.23222746846310632, + "learning_rate": 4.857331887032859e-05, + "loss": 0.5125, + "step": 5721 + }, + { + "epoch": 1.6014553596417578, + "grad_norm": 0.22786792983349033, + "learning_rate": 4.8557899545846e-05, + "loss": 0.4646, + "step": 5722 + }, + { + "epoch": 1.6017352364959416, + "grad_norm": 0.21720613549163814, + "learning_rate": 4.854248035862333e-05, + "loss": 0.479, + "step": 5723 + }, + { + "epoch": 1.602015113350126, + "grad_norm": 0.22555211669472716, + "learning_rate": 4.852706131012821e-05, + "loss": 0.5052, + "step": 5724 + }, + { + "epoch": 1.6022949902043102, + "grad_norm": 0.22159626115305234, + "learning_rate": 4.851164240182821e-05, + "loss": 0.5021, + "step": 5725 + }, + { + "epoch": 1.6025748670584943, + "grad_norm": 0.22149375881581837, + "learning_rate": 4.8496223635190916e-05, + "loss": 0.4923, + "step": 5726 + }, + { + "epoch": 1.6028547439126783, + "grad_norm": 0.21561536936147985, + "learning_rate": 4.8480805011683903e-05, + "loss": 0.4842, + "step": 5727 + }, + { + "epoch": 1.6031346207668626, + "grad_norm": 0.22233172955508182, + "learning_rate": 4.846538653277472e-05, + "loss": 0.4722, + "step": 5728 + }, + { + "epoch": 1.6034144976210467, + "grad_norm": 0.223820077918928, + "learning_rate": 4.8449968199930903e-05, + "loss": 0.4892, + "step": 5729 + }, + { + "epoch": 1.6036943744752308, + "grad_norm": 0.22504862664574823, + "learning_rate": 4.8434550014619974e-05, + "loss": 0.4922, + "step": 5730 + }, + { + "epoch": 1.603974251329415, + "grad_norm": 0.23114975781002625, + "learning_rate": 4.841913197830946e-05, + "loss": 0.4864, + "step": 5731 + }, + { + "epoch": 1.6042541281835994, + "grad_norm": 0.21962614629382674, + "learning_rate": 4.840371409246684e-05, + "loss": 0.4935, + "step": 5732 + }, + { + "epoch": 1.6045340050377834, + "grad_norm": 0.22062467171930908, + "learning_rate": 4.838829635855962e-05, + "loss": 0.4768, + "step": 5733 + }, + { + "epoch": 1.6048138818919675, + "grad_norm": 0.22731014871243183, + "learning_rate": 4.8372878778055245e-05, + "loss": 0.4709, + "step": 5734 + }, + { + "epoch": 1.6050937587461518, + "grad_norm": 0.23312215764311575, + "learning_rate": 4.835746135242118e-05, + "loss": 0.5032, + "step": 5735 + }, + { + "epoch": 1.6053736356003359, + "grad_norm": 0.23382333183840684, + "learning_rate": 4.834204408312487e-05, + "loss": 0.4783, + "step": 5736 + }, + { + "epoch": 1.60565351245452, + "grad_norm": 0.22949331194837824, + "learning_rate": 4.832662697163373e-05, + "loss": 0.4918, + "step": 5737 + }, + { + "epoch": 1.6059333893087042, + "grad_norm": 0.23931647766632208, + "learning_rate": 4.8311210019415174e-05, + "loss": 0.4775, + "step": 5738 + }, + { + "epoch": 1.6062132661628883, + "grad_norm": 0.22694283907923388, + "learning_rate": 4.829579322793659e-05, + "loss": 0.5003, + "step": 5739 + }, + { + "epoch": 1.6064931430170724, + "grad_norm": 0.24169102480458646, + "learning_rate": 4.8280376598665364e-05, + "loss": 0.5015, + "step": 5740 + }, + { + "epoch": 1.6067730198712566, + "grad_norm": 0.23385063482340737, + "learning_rate": 4.8264960133068846e-05, + "loss": 0.4996, + "step": 5741 + }, + { + "epoch": 1.607052896725441, + "grad_norm": 0.2365497882143094, + "learning_rate": 4.82495438326144e-05, + "loss": 0.5006, + "step": 5742 + }, + { + "epoch": 1.607332773579625, + "grad_norm": 0.22155722089844201, + "learning_rate": 4.823412769876935e-05, + "loss": 0.4537, + "step": 5743 + }, + { + "epoch": 1.607612650433809, + "grad_norm": 0.22160297539346716, + "learning_rate": 4.821871173300101e-05, + "loss": 0.5077, + "step": 5744 + }, + { + "epoch": 1.6078925272879934, + "grad_norm": 0.21528485247190282, + "learning_rate": 4.820329593677669e-05, + "loss": 0.4781, + "step": 5745 + }, + { + "epoch": 1.6081724041421774, + "grad_norm": 0.22039165194741256, + "learning_rate": 4.818788031156367e-05, + "loss": 0.4704, + "step": 5746 + }, + { + "epoch": 1.6084522809963615, + "grad_norm": 0.22621626635198547, + "learning_rate": 4.81724648588292e-05, + "loss": 0.5048, + "step": 5747 + }, + { + "epoch": 1.6087321578505458, + "grad_norm": 0.23215933840603425, + "learning_rate": 4.815704958004056e-05, + "loss": 0.514, + "step": 5748 + }, + { + "epoch": 1.6090120347047299, + "grad_norm": 0.23004859929167618, + "learning_rate": 4.814163447666498e-05, + "loss": 0.498, + "step": 5749 + }, + { + "epoch": 1.609291911558914, + "grad_norm": 0.22818970963295204, + "learning_rate": 4.812621955016966e-05, + "loss": 0.4717, + "step": 5750 + }, + { + "epoch": 1.6095717884130982, + "grad_norm": 0.22221224321953403, + "learning_rate": 4.811080480202181e-05, + "loss": 0.4497, + "step": 5751 + }, + { + "epoch": 1.6098516652672825, + "grad_norm": 0.2164486914325114, + "learning_rate": 4.8095390233688624e-05, + "loss": 0.4698, + "step": 5752 + }, + { + "epoch": 1.6101315421214666, + "grad_norm": 0.22699179329191232, + "learning_rate": 4.807997584663726e-05, + "loss": 0.5135, + "step": 5753 + }, + { + "epoch": 1.6104114189756507, + "grad_norm": 0.22011899742096697, + "learning_rate": 4.806456164233487e-05, + "loss": 0.4858, + "step": 5754 + }, + { + "epoch": 1.610691295829835, + "grad_norm": 0.22914681138089035, + "learning_rate": 4.8049147622248586e-05, + "loss": 0.4927, + "step": 5755 + }, + { + "epoch": 1.610971172684019, + "grad_norm": 0.225856117082813, + "learning_rate": 4.8033733787845535e-05, + "loss": 0.5069, + "step": 5756 + }, + { + "epoch": 1.611251049538203, + "grad_norm": 0.22067990693800643, + "learning_rate": 4.801832014059279e-05, + "loss": 0.4802, + "step": 5757 + }, + { + "epoch": 1.6115309263923874, + "grad_norm": 0.23779024106537164, + "learning_rate": 4.8002906681957444e-05, + "loss": 0.5095, + "step": 5758 + }, + { + "epoch": 1.6118108032465717, + "grad_norm": 0.22280889650205968, + "learning_rate": 4.798749341340656e-05, + "loss": 0.466, + "step": 5759 + }, + { + "epoch": 1.6120906801007555, + "grad_norm": 0.22707467688410218, + "learning_rate": 4.797208033640718e-05, + "loss": 0.4826, + "step": 5760 + }, + { + "epoch": 1.6123705569549398, + "grad_norm": 0.22367707952061988, + "learning_rate": 4.7956667452426315e-05, + "loss": 0.4943, + "step": 5761 + }, + { + "epoch": 1.612650433809124, + "grad_norm": 0.2274460927084838, + "learning_rate": 4.7941254762931e-05, + "loss": 0.5029, + "step": 5762 + }, + { + "epoch": 1.6129303106633082, + "grad_norm": 0.22289882798046468, + "learning_rate": 4.7925842269388206e-05, + "loss": 0.4829, + "step": 5763 + }, + { + "epoch": 1.6132101875174922, + "grad_norm": 0.2271771812863479, + "learning_rate": 4.791042997326489e-05, + "loss": 0.4935, + "step": 5764 + }, + { + "epoch": 1.6134900643716765, + "grad_norm": 0.2267766613971914, + "learning_rate": 4.789501787602804e-05, + "loss": 0.4851, + "step": 5765 + }, + { + "epoch": 1.6137699412258606, + "grad_norm": 0.23638489851748645, + "learning_rate": 4.787960597914456e-05, + "loss": 0.51, + "step": 5766 + }, + { + "epoch": 1.6140498180800447, + "grad_norm": 0.2271291952099077, + "learning_rate": 4.786419428408137e-05, + "loss": 0.5048, + "step": 5767 + }, + { + "epoch": 1.614329694934229, + "grad_norm": 0.22745023606135853, + "learning_rate": 4.784878279230536e-05, + "loss": 0.4881, + "step": 5768 + }, + { + "epoch": 1.6146095717884132, + "grad_norm": 0.2263025599007156, + "learning_rate": 4.783337150528341e-05, + "loss": 0.4861, + "step": 5769 + }, + { + "epoch": 1.614889448642597, + "grad_norm": 0.2294863080810387, + "learning_rate": 4.7817960424482375e-05, + "loss": 0.4778, + "step": 5770 + }, + { + "epoch": 1.6151693254967814, + "grad_norm": 0.23131908954082656, + "learning_rate": 4.780254955136909e-05, + "loss": 0.4879, + "step": 5771 + }, + { + "epoch": 1.6154492023509657, + "grad_norm": 0.22520059969785933, + "learning_rate": 4.778713888741036e-05, + "loss": 0.4877, + "step": 5772 + }, + { + "epoch": 1.6157290792051497, + "grad_norm": 0.2260490977803011, + "learning_rate": 4.7771728434073e-05, + "loss": 0.5019, + "step": 5773 + }, + { + "epoch": 1.6160089560593338, + "grad_norm": 0.2231625875053975, + "learning_rate": 4.775631819282378e-05, + "loss": 0.4636, + "step": 5774 + }, + { + "epoch": 1.616288832913518, + "grad_norm": 0.22510355582497066, + "learning_rate": 4.774090816512944e-05, + "loss": 0.491, + "step": 5775 + }, + { + "epoch": 1.6165687097677022, + "grad_norm": 0.24549719427424876, + "learning_rate": 4.7725498352456735e-05, + "loss": 0.5195, + "step": 5776 + }, + { + "epoch": 1.6168485866218862, + "grad_norm": 0.22356303132998295, + "learning_rate": 4.771008875627236e-05, + "loss": 0.4998, + "step": 5777 + }, + { + "epoch": 1.6171284634760705, + "grad_norm": 0.22229706812601246, + "learning_rate": 4.7694679378043014e-05, + "loss": 0.4688, + "step": 5778 + }, + { + "epoch": 1.6174083403302548, + "grad_norm": 0.22785819550643896, + "learning_rate": 4.7679270219235384e-05, + "loss": 0.5035, + "step": 5779 + }, + { + "epoch": 1.617688217184439, + "grad_norm": 0.231961879890167, + "learning_rate": 4.766386128131611e-05, + "loss": 0.4939, + "step": 5780 + }, + { + "epoch": 1.617968094038623, + "grad_norm": 0.23494886687096408, + "learning_rate": 4.764845256575183e-05, + "loss": 0.4753, + "step": 5781 + }, + { + "epoch": 1.6182479708928073, + "grad_norm": 0.22048812903283166, + "learning_rate": 4.7633044074009134e-05, + "loss": 0.4864, + "step": 5782 + }, + { + "epoch": 1.6185278477469913, + "grad_norm": 0.23580651142556416, + "learning_rate": 4.7617635807554644e-05, + "loss": 0.5018, + "step": 5783 + }, + { + "epoch": 1.6188077246011754, + "grad_norm": 0.23905429525768718, + "learning_rate": 4.7602227767854906e-05, + "loss": 0.5257, + "step": 5784 + }, + { + "epoch": 1.6190876014553597, + "grad_norm": 0.2134645462747951, + "learning_rate": 4.758681995637648e-05, + "loss": 0.4698, + "step": 5785 + }, + { + "epoch": 1.6193674783095438, + "grad_norm": 0.21912439150211308, + "learning_rate": 4.757141237458587e-05, + "loss": 0.4953, + "step": 5786 + }, + { + "epoch": 1.6196473551637278, + "grad_norm": 0.23041845944682493, + "learning_rate": 4.75560050239496e-05, + "loss": 0.486, + "step": 5787 + }, + { + "epoch": 1.619927232017912, + "grad_norm": 0.22738879643277715, + "learning_rate": 4.7540597905934136e-05, + "loss": 0.502, + "step": 5788 + }, + { + "epoch": 1.6202071088720964, + "grad_norm": 0.21373301400501682, + "learning_rate": 4.7525191022005935e-05, + "loss": 0.5089, + "step": 5789 + }, + { + "epoch": 1.6204869857262805, + "grad_norm": 0.2326314009368342, + "learning_rate": 4.7509784373631444e-05, + "loss": 0.5047, + "step": 5790 + }, + { + "epoch": 1.6207668625804645, + "grad_norm": 0.22847212324678506, + "learning_rate": 4.749437796227707e-05, + "loss": 0.4866, + "step": 5791 + }, + { + "epoch": 1.6210467394346488, + "grad_norm": 0.22142696297588046, + "learning_rate": 4.747897178940921e-05, + "loss": 0.4793, + "step": 5792 + }, + { + "epoch": 1.621326616288833, + "grad_norm": 0.23040322172517957, + "learning_rate": 4.746356585649422e-05, + "loss": 0.4813, + "step": 5793 + }, + { + "epoch": 1.621606493143017, + "grad_norm": 0.2280228144904133, + "learning_rate": 4.744816016499845e-05, + "loss": 0.4785, + "step": 5794 + }, + { + "epoch": 1.6218863699972013, + "grad_norm": 0.22175047501904213, + "learning_rate": 4.7432754716388224e-05, + "loss": 0.4742, + "step": 5795 + }, + { + "epoch": 1.6221662468513856, + "grad_norm": 0.21681999451189307, + "learning_rate": 4.741734951212984e-05, + "loss": 0.469, + "step": 5796 + }, + { + "epoch": 1.6224461237055694, + "grad_norm": 0.2291453645582424, + "learning_rate": 4.740194455368957e-05, + "loss": 0.4807, + "step": 5797 + }, + { + "epoch": 1.6227260005597537, + "grad_norm": 0.21659639889454546, + "learning_rate": 4.738653984253368e-05, + "loss": 0.4974, + "step": 5798 + }, + { + "epoch": 1.623005877413938, + "grad_norm": 0.22857678859931882, + "learning_rate": 4.737113538012838e-05, + "loss": 0.4767, + "step": 5799 + }, + { + "epoch": 1.623285754268122, + "grad_norm": 0.22695758737965813, + "learning_rate": 4.735573116793989e-05, + "loss": 0.4908, + "step": 5800 + }, + { + "epoch": 1.6235656311223061, + "grad_norm": 0.2241580374931068, + "learning_rate": 4.734032720743439e-05, + "loss": 0.4954, + "step": 5801 + }, + { + "epoch": 1.6238455079764904, + "grad_norm": 0.2295402350886931, + "learning_rate": 4.732492350007804e-05, + "loss": 0.5076, + "step": 5802 + }, + { + "epoch": 1.6241253848306745, + "grad_norm": 0.225986721024452, + "learning_rate": 4.7309520047336964e-05, + "loss": 0.4669, + "step": 5803 + }, + { + "epoch": 1.6244052616848585, + "grad_norm": 0.23057794100971507, + "learning_rate": 4.729411685067728e-05, + "loss": 0.4836, + "step": 5804 + }, + { + "epoch": 1.6246851385390428, + "grad_norm": 0.22842679919809245, + "learning_rate": 4.727871391156507e-05, + "loss": 0.4877, + "step": 5805 + }, + { + "epoch": 1.6249650153932271, + "grad_norm": 0.2239617920477314, + "learning_rate": 4.726331123146638e-05, + "loss": 0.4781, + "step": 5806 + }, + { + "epoch": 1.625244892247411, + "grad_norm": 0.2322547555875918, + "learning_rate": 4.724790881184727e-05, + "loss": 0.493, + "step": 5807 + }, + { + "epoch": 1.6255247691015953, + "grad_norm": 0.22273928750984806, + "learning_rate": 4.723250665417374e-05, + "loss": 0.4685, + "step": 5808 + }, + { + "epoch": 1.6258046459557796, + "grad_norm": 0.22855695969211148, + "learning_rate": 4.721710475991177e-05, + "loss": 0.4709, + "step": 5809 + }, + { + "epoch": 1.6260845228099636, + "grad_norm": 0.23504133375235953, + "learning_rate": 4.720170313052734e-05, + "loss": 0.4944, + "step": 5810 + }, + { + "epoch": 1.6263643996641477, + "grad_norm": 0.2199186142727277, + "learning_rate": 4.718630176748636e-05, + "loss": 0.4761, + "step": 5811 + }, + { + "epoch": 1.626644276518332, + "grad_norm": 0.2275762801104239, + "learning_rate": 4.717090067225475e-05, + "loss": 0.4682, + "step": 5812 + }, + { + "epoch": 1.626924153372516, + "grad_norm": 0.2275213474136106, + "learning_rate": 4.7155499846298404e-05, + "loss": 0.4917, + "step": 5813 + }, + { + "epoch": 1.6272040302267001, + "grad_norm": 0.22296716415371437, + "learning_rate": 4.7140099291083174e-05, + "loss": 0.4915, + "step": 5814 + }, + { + "epoch": 1.6274839070808844, + "grad_norm": 0.22721460994923723, + "learning_rate": 4.712469900807489e-05, + "loss": 0.4967, + "step": 5815 + }, + { + "epoch": 1.6277637839350687, + "grad_norm": 0.2322878810055175, + "learning_rate": 4.710929899873936e-05, + "loss": 0.4911, + "step": 5816 + }, + { + "epoch": 1.6280436607892528, + "grad_norm": 0.2282839435903056, + "learning_rate": 4.709389926454237e-05, + "loss": 0.4971, + "step": 5817 + }, + { + "epoch": 1.6283235376434368, + "grad_norm": 0.2225925443350299, + "learning_rate": 4.7078499806949685e-05, + "loss": 0.4995, + "step": 5818 + }, + { + "epoch": 1.6286034144976211, + "grad_norm": 0.22744894693790926, + "learning_rate": 4.706310062742702e-05, + "loss": 0.4741, + "step": 5819 + }, + { + "epoch": 1.6288832913518052, + "grad_norm": 0.22262399579480868, + "learning_rate": 4.704770172744008e-05, + "loss": 0.4994, + "step": 5820 + }, + { + "epoch": 1.6291631682059893, + "grad_norm": 0.23047125974994434, + "learning_rate": 4.703230310845454e-05, + "loss": 0.5055, + "step": 5821 + }, + { + "epoch": 1.6294430450601736, + "grad_norm": 0.2308635977951529, + "learning_rate": 4.7016904771936054e-05, + "loss": 0.5175, + "step": 5822 + }, + { + "epoch": 1.6297229219143576, + "grad_norm": 0.23588138138650422, + "learning_rate": 4.700150671935024e-05, + "loss": 0.4896, + "step": 5823 + }, + { + "epoch": 1.6300027987685417, + "grad_norm": 0.2315473408984742, + "learning_rate": 4.6986108952162695e-05, + "loss": 0.496, + "step": 5824 + }, + { + "epoch": 1.630282675622726, + "grad_norm": 0.22932480749325715, + "learning_rate": 4.697071147183899e-05, + "loss": 0.5087, + "step": 5825 + }, + { + "epoch": 1.6305625524769103, + "grad_norm": 0.2321368033950167, + "learning_rate": 4.695531427984466e-05, + "loss": 0.4858, + "step": 5826 + }, + { + "epoch": 1.6308424293310944, + "grad_norm": 0.2220572018861899, + "learning_rate": 4.693991737764521e-05, + "loss": 0.466, + "step": 5827 + }, + { + "epoch": 1.6311223061852784, + "grad_norm": 0.22144207840823874, + "learning_rate": 4.692452076670617e-05, + "loss": 0.4942, + "step": 5828 + }, + { + "epoch": 1.6314021830394627, + "grad_norm": 0.2241728442411565, + "learning_rate": 4.690912444849294e-05, + "loss": 0.5083, + "step": 5829 + }, + { + "epoch": 1.6316820598936468, + "grad_norm": 0.2238142220763365, + "learning_rate": 4.6893728424470976e-05, + "loss": 0.4913, + "step": 5830 + }, + { + "epoch": 1.6319619367478309, + "grad_norm": 0.21963971541187846, + "learning_rate": 4.6878332696105685e-05, + "loss": 0.5038, + "step": 5831 + }, + { + "epoch": 1.6322418136020151, + "grad_norm": 0.22034137768128578, + "learning_rate": 4.6862937264862435e-05, + "loss": 0.4751, + "step": 5832 + }, + { + "epoch": 1.6325216904561994, + "grad_norm": 0.23384429528325093, + "learning_rate": 4.6847542132206566e-05, + "loss": 0.4861, + "step": 5833 + }, + { + "epoch": 1.6328015673103833, + "grad_norm": 0.23162851689482852, + "learning_rate": 4.683214729960339e-05, + "loss": 0.4917, + "step": 5834 + }, + { + "epoch": 1.6330814441645676, + "grad_norm": 0.2205854299944986, + "learning_rate": 4.681675276851822e-05, + "loss": 0.4724, + "step": 5835 + }, + { + "epoch": 1.6333613210187519, + "grad_norm": 0.22251589089958843, + "learning_rate": 4.6801358540416304e-05, + "loss": 0.455, + "step": 5836 + }, + { + "epoch": 1.633641197872936, + "grad_norm": 0.23482068124776187, + "learning_rate": 4.678596461676288e-05, + "loss": 0.4969, + "step": 5837 + }, + { + "epoch": 1.63392107472712, + "grad_norm": 0.23365477138352836, + "learning_rate": 4.677057099902313e-05, + "loss": 0.5035, + "step": 5838 + }, + { + "epoch": 1.6342009515813043, + "grad_norm": 0.2335303523267849, + "learning_rate": 4.675517768866224e-05, + "loss": 0.4858, + "step": 5839 + }, + { + "epoch": 1.6344808284354884, + "grad_norm": 0.23195225972423889, + "learning_rate": 4.673978468714537e-05, + "loss": 0.4971, + "step": 5840 + }, + { + "epoch": 1.6347607052896724, + "grad_norm": 0.21817598108023795, + "learning_rate": 4.6724391995937604e-05, + "loss": 0.4643, + "step": 5841 + }, + { + "epoch": 1.6350405821438567, + "grad_norm": 0.21250469154648938, + "learning_rate": 4.670899961650405e-05, + "loss": 0.4755, + "step": 5842 + }, + { + "epoch": 1.635320458998041, + "grad_norm": 0.23772420948105108, + "learning_rate": 4.6693607550309746e-05, + "loss": 0.496, + "step": 5843 + }, + { + "epoch": 1.6356003358522249, + "grad_norm": 0.23044886190912658, + "learning_rate": 4.667821579881973e-05, + "loss": 0.4909, + "step": 5844 + }, + { + "epoch": 1.6358802127064092, + "grad_norm": 0.2286914092546674, + "learning_rate": 4.666282436349898e-05, + "loss": 0.4789, + "step": 5845 + }, + { + "epoch": 1.6361600895605934, + "grad_norm": 0.233776104459152, + "learning_rate": 4.664743324581251e-05, + "loss": 0.463, + "step": 5846 + }, + { + "epoch": 1.6364399664147775, + "grad_norm": 0.2197032189289928, + "learning_rate": 4.66320424472252e-05, + "loss": 0.5025, + "step": 5847 + }, + { + "epoch": 1.6367198432689616, + "grad_norm": 0.22844563758750308, + "learning_rate": 4.661665196920197e-05, + "loss": 0.486, + "step": 5848 + }, + { + "epoch": 1.6369997201231459, + "grad_norm": 0.23659771077719677, + "learning_rate": 4.66012618132077e-05, + "loss": 0.5113, + "step": 5849 + }, + { + "epoch": 1.63727959697733, + "grad_norm": 0.21783685094359637, + "learning_rate": 4.658587198070723e-05, + "loss": 0.5006, + "step": 5850 + }, + { + "epoch": 1.637559473831514, + "grad_norm": 0.22521610038427864, + "learning_rate": 4.657048247316538e-05, + "loss": 0.4972, + "step": 5851 + }, + { + "epoch": 1.6378393506856983, + "grad_norm": 0.23846270943962297, + "learning_rate": 4.655509329204692e-05, + "loss": 0.5103, + "step": 5852 + }, + { + "epoch": 1.6381192275398826, + "grad_norm": 0.23309715924226423, + "learning_rate": 4.653970443881662e-05, + "loss": 0.5049, + "step": 5853 + }, + { + "epoch": 1.6383991043940667, + "grad_norm": 0.22791721167712203, + "learning_rate": 4.6524315914939184e-05, + "loss": 0.4753, + "step": 5854 + }, + { + "epoch": 1.6386789812482507, + "grad_norm": 0.21530879095648967, + "learning_rate": 4.6508927721879315e-05, + "loss": 0.4819, + "step": 5855 + }, + { + "epoch": 1.638958858102435, + "grad_norm": 0.21904974706164682, + "learning_rate": 4.649353986110165e-05, + "loss": 0.4931, + "step": 5856 + }, + { + "epoch": 1.639238734956619, + "grad_norm": 0.21927640189226147, + "learning_rate": 4.6478152334070825e-05, + "loss": 0.4786, + "step": 5857 + }, + { + "epoch": 1.6395186118108032, + "grad_norm": 0.22423225093334115, + "learning_rate": 4.646276514225143e-05, + "loss": 0.4932, + "step": 5858 + }, + { + "epoch": 1.6397984886649875, + "grad_norm": 0.23389077452977902, + "learning_rate": 4.644737828710803e-05, + "loss": 0.4889, + "step": 5859 + }, + { + "epoch": 1.6400783655191715, + "grad_norm": 0.22798625696329272, + "learning_rate": 4.643199177010515e-05, + "loss": 0.5063, + "step": 5860 + }, + { + "epoch": 1.6403582423733556, + "grad_norm": 0.22614433623546817, + "learning_rate": 4.64166055927073e-05, + "loss": 0.4863, + "step": 5861 + }, + { + "epoch": 1.6406381192275399, + "grad_norm": 0.24011361772060963, + "learning_rate": 4.640121975637892e-05, + "loss": 0.4931, + "step": 5862 + }, + { + "epoch": 1.6409179960817242, + "grad_norm": 0.22126510942477237, + "learning_rate": 4.638583426258447e-05, + "loss": 0.4858, + "step": 5863 + }, + { + "epoch": 1.6411978729359082, + "grad_norm": 0.2303787783755845, + "learning_rate": 4.637044911278835e-05, + "loss": 0.4924, + "step": 5864 + }, + { + "epoch": 1.6414777497900923, + "grad_norm": 0.2291733480745904, + "learning_rate": 4.6355064308454896e-05, + "loss": 0.4915, + "step": 5865 + }, + { + "epoch": 1.6417576266442766, + "grad_norm": 0.22438927776433693, + "learning_rate": 4.633967985104847e-05, + "loss": 0.4928, + "step": 5866 + }, + { + "epoch": 1.6420375034984607, + "grad_norm": 0.22222514556871162, + "learning_rate": 4.632429574203337e-05, + "loss": 0.4942, + "step": 5867 + }, + { + "epoch": 1.6423173803526447, + "grad_norm": 0.23048259448382782, + "learning_rate": 4.6308911982873827e-05, + "loss": 0.5152, + "step": 5868 + }, + { + "epoch": 1.642597257206829, + "grad_norm": 0.23404076649464106, + "learning_rate": 4.629352857503413e-05, + "loss": 0.4885, + "step": 5869 + }, + { + "epoch": 1.642877134061013, + "grad_norm": 0.22963516782425958, + "learning_rate": 4.627814551997845e-05, + "loss": 0.4869, + "step": 5870 + }, + { + "epoch": 1.6431570109151972, + "grad_norm": 0.23071543276779807, + "learning_rate": 4.626276281917098e-05, + "loss": 0.4776, + "step": 5871 + }, + { + "epoch": 1.6434368877693815, + "grad_norm": 0.22203563859459977, + "learning_rate": 4.624738047407582e-05, + "loss": 0.4821, + "step": 5872 + }, + { + "epoch": 1.6437167646235658, + "grad_norm": 0.24155444106265953, + "learning_rate": 4.6231998486157096e-05, + "loss": 0.5049, + "step": 5873 + }, + { + "epoch": 1.6439966414777498, + "grad_norm": 0.22196095420075007, + "learning_rate": 4.621661685687886e-05, + "loss": 0.47, + "step": 5874 + }, + { + "epoch": 1.644276518331934, + "grad_norm": 0.21753029932178544, + "learning_rate": 4.6201235587705154e-05, + "loss": 0.4843, + "step": 5875 + }, + { + "epoch": 1.6445563951861182, + "grad_norm": 0.2201254291133512, + "learning_rate": 4.6185854680099974e-05, + "loss": 0.4876, + "step": 5876 + }, + { + "epoch": 1.6448362720403022, + "grad_norm": 0.23444558673827312, + "learning_rate": 4.617047413552727e-05, + "loss": 0.5071, + "step": 5877 + }, + { + "epoch": 1.6451161488944863, + "grad_norm": 0.23655398524443977, + "learning_rate": 4.6155093955450985e-05, + "loss": 0.4718, + "step": 5878 + }, + { + "epoch": 1.6453960257486706, + "grad_norm": 0.22630376536241545, + "learning_rate": 4.6139714141335e-05, + "loss": 0.4784, + "step": 5879 + }, + { + "epoch": 1.645675902602855, + "grad_norm": 0.22067053937881898, + "learning_rate": 4.61243346946432e-05, + "loss": 0.4799, + "step": 5880 + }, + { + "epoch": 1.6459557794570387, + "grad_norm": 0.23505338311486845, + "learning_rate": 4.610895561683938e-05, + "loss": 0.4818, + "step": 5881 + }, + { + "epoch": 1.646235656311223, + "grad_norm": 0.22452848634660116, + "learning_rate": 4.6093576909387376e-05, + "loss": 0.475, + "step": 5882 + }, + { + "epoch": 1.6465155331654073, + "grad_norm": 0.23065555689457484, + "learning_rate": 4.607819857375088e-05, + "loss": 0.5123, + "step": 5883 + }, + { + "epoch": 1.6467954100195914, + "grad_norm": 0.219445416008085, + "learning_rate": 4.606282061139364e-05, + "loss": 0.4816, + "step": 5884 + }, + { + "epoch": 1.6470752868737755, + "grad_norm": 0.22709438830454265, + "learning_rate": 4.604744302377933e-05, + "loss": 0.5004, + "step": 5885 + }, + { + "epoch": 1.6473551637279598, + "grad_norm": 0.2355941425384972, + "learning_rate": 4.6032065812371614e-05, + "loss": 0.4682, + "step": 5886 + }, + { + "epoch": 1.6476350405821438, + "grad_norm": 0.22510368357791385, + "learning_rate": 4.6016688978634095e-05, + "loss": 0.506, + "step": 5887 + }, + { + "epoch": 1.647914917436328, + "grad_norm": 0.2258353823731744, + "learning_rate": 4.600131252403035e-05, + "loss": 0.4832, + "step": 5888 + }, + { + "epoch": 1.6481947942905122, + "grad_norm": 0.22288426387673418, + "learning_rate": 4.598593645002392e-05, + "loss": 0.481, + "step": 5889 + }, + { + "epoch": 1.6484746711446965, + "grad_norm": 0.22088939892550016, + "learning_rate": 4.597056075807829e-05, + "loss": 0.4951, + "step": 5890 + }, + { + "epoch": 1.6487545479988803, + "grad_norm": 0.23297039859727472, + "learning_rate": 4.5955185449656956e-05, + "loss": 0.4725, + "step": 5891 + }, + { + "epoch": 1.6490344248530646, + "grad_norm": 0.22609904076997075, + "learning_rate": 4.5939810526223336e-05, + "loss": 0.5165, + "step": 5892 + }, + { + "epoch": 1.649314301707249, + "grad_norm": 0.21915577383921972, + "learning_rate": 4.5924435989240813e-05, + "loss": 0.5098, + "step": 5893 + }, + { + "epoch": 1.649594178561433, + "grad_norm": 0.2169439502194135, + "learning_rate": 4.5909061840172764e-05, + "loss": 0.4764, + "step": 5894 + }, + { + "epoch": 1.649874055415617, + "grad_norm": 0.22813028526847892, + "learning_rate": 4.5893688080482494e-05, + "loss": 0.4878, + "step": 5895 + }, + { + "epoch": 1.6501539322698013, + "grad_norm": 0.22952508110132797, + "learning_rate": 4.587831471163328e-05, + "loss": 0.4893, + "step": 5896 + }, + { + "epoch": 1.6504338091239854, + "grad_norm": 0.21842203635432386, + "learning_rate": 4.586294173508839e-05, + "loss": 0.4738, + "step": 5897 + }, + { + "epoch": 1.6507136859781695, + "grad_norm": 0.22715534977074694, + "learning_rate": 4.5847569152311025e-05, + "loss": 0.4734, + "step": 5898 + }, + { + "epoch": 1.6509935628323538, + "grad_norm": 0.23395063346934178, + "learning_rate": 4.5832196964764354e-05, + "loss": 0.475, + "step": 5899 + }, + { + "epoch": 1.651273439686538, + "grad_norm": 0.22177215733046468, + "learning_rate": 4.5816825173911524e-05, + "loss": 0.4698, + "step": 5900 + }, + { + "epoch": 1.6515533165407221, + "grad_norm": 0.2355590874464105, + "learning_rate": 4.580145378121559e-05, + "loss": 0.4928, + "step": 5901 + }, + { + "epoch": 1.6518331933949062, + "grad_norm": 0.23261071282838638, + "learning_rate": 4.578608278813964e-05, + "loss": 0.5261, + "step": 5902 + }, + { + "epoch": 1.6521130702490905, + "grad_norm": 0.21952656709338766, + "learning_rate": 4.577071219614668e-05, + "loss": 0.4746, + "step": 5903 + }, + { + "epoch": 1.6523929471032746, + "grad_norm": 0.2324125419181912, + "learning_rate": 4.5755342006699706e-05, + "loss": 0.5151, + "step": 5904 + }, + { + "epoch": 1.6526728239574586, + "grad_norm": 0.22493144406925525, + "learning_rate": 4.5739972221261664e-05, + "loss": 0.4883, + "step": 5905 + }, + { + "epoch": 1.652952700811643, + "grad_norm": 0.23110646357607306, + "learning_rate": 4.572460284129544e-05, + "loss": 0.4691, + "step": 5906 + }, + { + "epoch": 1.653232577665827, + "grad_norm": 0.22352867732678683, + "learning_rate": 4.5709233868263926e-05, + "loss": 0.4825, + "step": 5907 + }, + { + "epoch": 1.653512454520011, + "grad_norm": 0.22669544003950048, + "learning_rate": 4.569386530362992e-05, + "loss": 0.4984, + "step": 5908 + }, + { + "epoch": 1.6537923313741953, + "grad_norm": 0.21539919627219203, + "learning_rate": 4.567849714885623e-05, + "loss": 0.4809, + "step": 5909 + }, + { + "epoch": 1.6540722082283796, + "grad_norm": 0.22892231206163438, + "learning_rate": 4.5663129405405594e-05, + "loss": 0.4964, + "step": 5910 + }, + { + "epoch": 1.6543520850825637, + "grad_norm": 0.22588310995304536, + "learning_rate": 4.5647762074740733e-05, + "loss": 0.4896, + "step": 5911 + }, + { + "epoch": 1.6546319619367478, + "grad_norm": 0.22702448418150784, + "learning_rate": 4.563239515832432e-05, + "loss": 0.4736, + "step": 5912 + }, + { + "epoch": 1.654911838790932, + "grad_norm": 0.21587541396299906, + "learning_rate": 4.561702865761897e-05, + "loss": 0.4635, + "step": 5913 + }, + { + "epoch": 1.6551917156451161, + "grad_norm": 0.21924899188902525, + "learning_rate": 4.560166257408728e-05, + "loss": 0.4737, + "step": 5914 + }, + { + "epoch": 1.6554715924993002, + "grad_norm": 0.224600179807053, + "learning_rate": 4.558629690919182e-05, + "loss": 0.4944, + "step": 5915 + }, + { + "epoch": 1.6557514693534845, + "grad_norm": 0.23126227922062179, + "learning_rate": 4.5570931664395086e-05, + "loss": 0.4833, + "step": 5916 + }, + { + "epoch": 1.6560313462076688, + "grad_norm": 0.24263733339694077, + "learning_rate": 4.555556684115956e-05, + "loss": 0.4934, + "step": 5917 + }, + { + "epoch": 1.6563112230618526, + "grad_norm": 0.22776045189029373, + "learning_rate": 4.5540202440947694e-05, + "loss": 0.4959, + "step": 5918 + }, + { + "epoch": 1.656591099916037, + "grad_norm": 0.23660381641151323, + "learning_rate": 4.5524838465221834e-05, + "loss": 0.5021, + "step": 5919 + }, + { + "epoch": 1.6568709767702212, + "grad_norm": 0.23757745635885563, + "learning_rate": 4.550947491544433e-05, + "loss": 0.4886, + "step": 5920 + }, + { + "epoch": 1.6571508536244053, + "grad_norm": 0.2308209439062421, + "learning_rate": 4.5494111793077544e-05, + "loss": 0.5043, + "step": 5921 + }, + { + "epoch": 1.6574307304785894, + "grad_norm": 0.23364151807682357, + "learning_rate": 4.5478749099583715e-05, + "loss": 0.4823, + "step": 5922 + }, + { + "epoch": 1.6577106073327736, + "grad_norm": 0.214234033539558, + "learning_rate": 4.546338683642507e-05, + "loss": 0.4854, + "step": 5923 + }, + { + "epoch": 1.6579904841869577, + "grad_norm": 0.23334496890303244, + "learning_rate": 4.544802500506381e-05, + "loss": 0.5016, + "step": 5924 + }, + { + "epoch": 1.6582703610411418, + "grad_norm": 0.2211220584306918, + "learning_rate": 4.543266360696208e-05, + "loss": 0.4759, + "step": 5925 + }, + { + "epoch": 1.658550237895326, + "grad_norm": 0.22684648721566092, + "learning_rate": 4.5417302643581985e-05, + "loss": 0.4795, + "step": 5926 + }, + { + "epoch": 1.6588301147495104, + "grad_norm": 0.2245276472669048, + "learning_rate": 4.5401942116385584e-05, + "loss": 0.4801, + "step": 5927 + }, + { + "epoch": 1.6591099916036942, + "grad_norm": 0.22804224758993163, + "learning_rate": 4.5386582026834906e-05, + "loss": 0.4978, + "step": 5928 + }, + { + "epoch": 1.6593898684578785, + "grad_norm": 0.22697568082009562, + "learning_rate": 4.5371222376391935e-05, + "loss": 0.4914, + "step": 5929 + }, + { + "epoch": 1.6596697453120628, + "grad_norm": 0.23634017328640117, + "learning_rate": 4.5355863166518616e-05, + "loss": 0.475, + "step": 5930 + }, + { + "epoch": 1.6599496221662469, + "grad_norm": 0.22538460152394849, + "learning_rate": 4.534050439867682e-05, + "loss": 0.4762, + "step": 5931 + }, + { + "epoch": 1.660229499020431, + "grad_norm": 0.21779274669211907, + "learning_rate": 4.532514607432843e-05, + "loss": 0.4813, + "step": 5932 + }, + { + "epoch": 1.6605093758746152, + "grad_norm": 0.22294375761213864, + "learning_rate": 4.5309788194935266e-05, + "loss": 0.469, + "step": 5933 + }, + { + "epoch": 1.6607892527287993, + "grad_norm": 0.21893675036246654, + "learning_rate": 4.5294430761959086e-05, + "loss": 0.4788, + "step": 5934 + }, + { + "epoch": 1.6610691295829834, + "grad_norm": 0.2334628749230677, + "learning_rate": 4.527907377686161e-05, + "loss": 0.4925, + "step": 5935 + }, + { + "epoch": 1.6613490064371677, + "grad_norm": 0.23360420114826788, + "learning_rate": 4.5263717241104566e-05, + "loss": 0.4958, + "step": 5936 + }, + { + "epoch": 1.661628883291352, + "grad_norm": 0.2263676043324893, + "learning_rate": 4.5248361156149526e-05, + "loss": 0.4867, + "step": 5937 + }, + { + "epoch": 1.661908760145536, + "grad_norm": 0.2330537673372661, + "learning_rate": 4.523300552345814e-05, + "loss": 0.4953, + "step": 5938 + }, + { + "epoch": 1.66218863699972, + "grad_norm": 0.23512582798314416, + "learning_rate": 4.521765034449197e-05, + "loss": 0.4913, + "step": 5939 + }, + { + "epoch": 1.6624685138539044, + "grad_norm": 0.22191100656784873, + "learning_rate": 4.5202295620712505e-05, + "loss": 0.4736, + "step": 5940 + }, + { + "epoch": 1.6627483907080884, + "grad_norm": 0.22857678048063068, + "learning_rate": 4.518694135358123e-05, + "loss": 0.4974, + "step": 5941 + }, + { + "epoch": 1.6630282675622725, + "grad_norm": 0.23791749099365347, + "learning_rate": 4.517158754455957e-05, + "loss": 0.4997, + "step": 5942 + }, + { + "epoch": 1.6633081444164568, + "grad_norm": 0.22313267681411098, + "learning_rate": 4.5156234195108916e-05, + "loss": 0.4862, + "step": 5943 + }, + { + "epoch": 1.6635880212706409, + "grad_norm": 0.2280697590093085, + "learning_rate": 4.5140881306690594e-05, + "loss": 0.4988, + "step": 5944 + }, + { + "epoch": 1.663867898124825, + "grad_norm": 0.22302134670852036, + "learning_rate": 4.5125528880765916e-05, + "loss": 0.5072, + "step": 5945 + }, + { + "epoch": 1.6641477749790092, + "grad_norm": 0.23689990892063556, + "learning_rate": 4.511017691879613e-05, + "loss": 0.4952, + "step": 5946 + }, + { + "epoch": 1.6644276518331935, + "grad_norm": 0.2377504636023883, + "learning_rate": 4.509482542224243e-05, + "loss": 0.4788, + "step": 5947 + }, + { + "epoch": 1.6647075286873776, + "grad_norm": 0.2242851228251371, + "learning_rate": 4.507947439256599e-05, + "loss": 0.4843, + "step": 5948 + }, + { + "epoch": 1.6649874055415617, + "grad_norm": 0.23313302147950857, + "learning_rate": 4.506412383122794e-05, + "loss": 0.4704, + "step": 5949 + }, + { + "epoch": 1.665267282395746, + "grad_norm": 0.23475206775431098, + "learning_rate": 4.504877373968935e-05, + "loss": 0.489, + "step": 5950 + }, + { + "epoch": 1.66554715924993, + "grad_norm": 0.24143362398417503, + "learning_rate": 4.503342411941124e-05, + "loss": 0.5068, + "step": 5951 + }, + { + "epoch": 1.665827036104114, + "grad_norm": 0.21870969193252662, + "learning_rate": 4.5018074971854606e-05, + "loss": 0.4911, + "step": 5952 + }, + { + "epoch": 1.6661069129582984, + "grad_norm": 0.2267434263891294, + "learning_rate": 4.5002726298480383e-05, + "loss": 0.499, + "step": 5953 + }, + { + "epoch": 1.6663867898124827, + "grad_norm": 0.21980272024638106, + "learning_rate": 4.4987378100749475e-05, + "loss": 0.4954, + "step": 5954 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.21489239501568574, + "learning_rate": 4.497203038012272e-05, + "loss": 0.4776, + "step": 5955 + }, + { + "epoch": 1.6669465435208508, + "grad_norm": 0.22328585712202512, + "learning_rate": 4.495668313806093e-05, + "loss": 0.4715, + "step": 5956 + }, + { + "epoch": 1.667226420375035, + "grad_norm": 0.22063427316487816, + "learning_rate": 4.494133637602487e-05, + "loss": 0.4998, + "step": 5957 + }, + { + "epoch": 1.6675062972292192, + "grad_norm": 0.2247994619535898, + "learning_rate": 4.492599009547523e-05, + "loss": 0.4851, + "step": 5958 + }, + { + "epoch": 1.6677861740834032, + "grad_norm": 0.23070369528670504, + "learning_rate": 4.491064429787269e-05, + "loss": 0.4945, + "step": 5959 + }, + { + "epoch": 1.6680660509375875, + "grad_norm": 0.22756132306061724, + "learning_rate": 4.4895298984677886e-05, + "loss": 0.4875, + "step": 5960 + }, + { + "epoch": 1.6683459277917716, + "grad_norm": 0.21784003112082723, + "learning_rate": 4.487995415735138e-05, + "loss": 0.4794, + "step": 5961 + }, + { + "epoch": 1.6686258046459557, + "grad_norm": 0.2203660024385122, + "learning_rate": 4.48646098173537e-05, + "loss": 0.4654, + "step": 5962 + }, + { + "epoch": 1.66890568150014, + "grad_norm": 0.22296256835570522, + "learning_rate": 4.484926596614533e-05, + "loss": 0.474, + "step": 5963 + }, + { + "epoch": 1.6691855583543243, + "grad_norm": 0.224512619034478, + "learning_rate": 4.483392260518671e-05, + "loss": 0.5039, + "step": 5964 + }, + { + "epoch": 1.669465435208508, + "grad_norm": 0.2293958680434901, + "learning_rate": 4.4818579735938234e-05, + "loss": 0.5024, + "step": 5965 + }, + { + "epoch": 1.6697453120626924, + "grad_norm": 0.2156340768395178, + "learning_rate": 4.480323735986022e-05, + "loss": 0.4879, + "step": 5966 + }, + { + "epoch": 1.6700251889168767, + "grad_norm": 0.2200493844929716, + "learning_rate": 4.4787895478412996e-05, + "loss": 0.5048, + "step": 5967 + }, + { + "epoch": 1.6703050657710607, + "grad_norm": 0.2283927289000887, + "learning_rate": 4.4772554093056794e-05, + "loss": 0.4835, + "step": 5968 + }, + { + "epoch": 1.6705849426252448, + "grad_norm": 0.22184732269432453, + "learning_rate": 4.4757213205251826e-05, + "loss": 0.4973, + "step": 5969 + }, + { + "epoch": 1.670864819479429, + "grad_norm": 0.2323183695512758, + "learning_rate": 4.474187281645824e-05, + "loss": 0.4947, + "step": 5970 + }, + { + "epoch": 1.6711446963336132, + "grad_norm": 0.22148138120751784, + "learning_rate": 4.472653292813614e-05, + "loss": 0.4751, + "step": 5971 + }, + { + "epoch": 1.6714245731877972, + "grad_norm": 0.2307676613015895, + "learning_rate": 4.4711193541745584e-05, + "loss": 0.4809, + "step": 5972 + }, + { + "epoch": 1.6717044500419815, + "grad_norm": 0.2151710300959927, + "learning_rate": 4.46958546587466e-05, + "loss": 0.4666, + "step": 5973 + }, + { + "epoch": 1.6719843268961658, + "grad_norm": 0.23775857612586254, + "learning_rate": 4.4680516280599136e-05, + "loss": 0.5043, + "step": 5974 + }, + { + "epoch": 1.67226420375035, + "grad_norm": 0.23148145914124751, + "learning_rate": 4.46651784087631e-05, + "loss": 0.5176, + "step": 5975 + }, + { + "epoch": 1.672544080604534, + "grad_norm": 0.23257994507830823, + "learning_rate": 4.464984104469837e-05, + "loss": 0.5227, + "step": 5976 + }, + { + "epoch": 1.6728239574587183, + "grad_norm": 0.23169417162029926, + "learning_rate": 4.4634504189864765e-05, + "loss": 0.4867, + "step": 5977 + }, + { + "epoch": 1.6731038343129023, + "grad_norm": 0.2202409752353094, + "learning_rate": 4.4619167845722056e-05, + "loss": 0.4904, + "step": 5978 + }, + { + "epoch": 1.6733837111670864, + "grad_norm": 0.2324491677944803, + "learning_rate": 4.4603832013729966e-05, + "loss": 0.507, + "step": 5979 + }, + { + "epoch": 1.6736635880212707, + "grad_norm": 0.22796481642585026, + "learning_rate": 4.4588496695348155e-05, + "loss": 0.4838, + "step": 5980 + }, + { + "epoch": 1.6739434648754548, + "grad_norm": 0.22547180308225825, + "learning_rate": 4.4573161892036264e-05, + "loss": 0.4839, + "step": 5981 + }, + { + "epoch": 1.6742233417296388, + "grad_norm": 0.22324709385121597, + "learning_rate": 4.455782760525386e-05, + "loss": 0.4676, + "step": 5982 + }, + { + "epoch": 1.6745032185838231, + "grad_norm": 0.21827408215289565, + "learning_rate": 4.454249383646046e-05, + "loss": 0.4838, + "step": 5983 + }, + { + "epoch": 1.6747830954380074, + "grad_norm": 0.2178667398543891, + "learning_rate": 4.452716058711557e-05, + "loss": 0.4762, + "step": 5984 + }, + { + "epoch": 1.6750629722921915, + "grad_norm": 0.21918654796609172, + "learning_rate": 4.451182785867859e-05, + "loss": 0.5008, + "step": 5985 + }, + { + "epoch": 1.6753428491463755, + "grad_norm": 0.22649588054297143, + "learning_rate": 4.4496495652608904e-05, + "loss": 0.4837, + "step": 5986 + }, + { + "epoch": 1.6756227260005598, + "grad_norm": 0.23231101716757543, + "learning_rate": 4.4481163970365846e-05, + "loss": 0.4907, + "step": 5987 + }, + { + "epoch": 1.675902602854744, + "grad_norm": 0.22660506943019057, + "learning_rate": 4.446583281340869e-05, + "loss": 0.4705, + "step": 5988 + }, + { + "epoch": 1.676182479708928, + "grad_norm": 0.24184240414493793, + "learning_rate": 4.445050218319667e-05, + "loss": 0.4793, + "step": 5989 + }, + { + "epoch": 1.6764623565631123, + "grad_norm": 0.22491414842907215, + "learning_rate": 4.443517208118895e-05, + "loss": 0.4876, + "step": 5990 + }, + { + "epoch": 1.6767422334172963, + "grad_norm": 0.23170841467206285, + "learning_rate": 4.441984250884468e-05, + "loss": 0.4831, + "step": 5991 + }, + { + "epoch": 1.6770221102714804, + "grad_norm": 0.22880432470515008, + "learning_rate": 4.440451346762292e-05, + "loss": 0.4973, + "step": 5992 + }, + { + "epoch": 1.6773019871256647, + "grad_norm": 0.228564804999199, + "learning_rate": 4.438918495898269e-05, + "loss": 0.4643, + "step": 5993 + }, + { + "epoch": 1.677581863979849, + "grad_norm": 0.2332438590298918, + "learning_rate": 4.4373856984382984e-05, + "loss": 0.5096, + "step": 5994 + }, + { + "epoch": 1.677861740834033, + "grad_norm": 0.2249740894865392, + "learning_rate": 4.435852954528271e-05, + "loss": 0.5285, + "step": 5995 + }, + { + "epoch": 1.6781416176882171, + "grad_norm": 0.23589221551388054, + "learning_rate": 4.434320264314076e-05, + "loss": 0.4954, + "step": 5996 + }, + { + "epoch": 1.6784214945424014, + "grad_norm": 0.22070208814414577, + "learning_rate": 4.432787627941594e-05, + "loss": 0.4601, + "step": 5997 + }, + { + "epoch": 1.6787013713965855, + "grad_norm": 0.22806956980514126, + "learning_rate": 4.431255045556704e-05, + "loss": 0.4991, + "step": 5998 + }, + { + "epoch": 1.6789812482507696, + "grad_norm": 0.22335521181772075, + "learning_rate": 4.429722517305276e-05, + "loss": 0.4932, + "step": 5999 + }, + { + "epoch": 1.6792611251049538, + "grad_norm": 0.2201566645866044, + "learning_rate": 4.428190043333178e-05, + "loss": 0.4876, + "step": 6000 + }, + { + "epoch": 1.6795410019591381, + "grad_norm": 0.21884538054303299, + "learning_rate": 4.426657623786272e-05, + "loss": 0.4914, + "step": 6001 + }, + { + "epoch": 1.679820878813322, + "grad_norm": 0.23234277992764904, + "learning_rate": 4.4251252588104153e-05, + "loss": 0.5001, + "step": 6002 + }, + { + "epoch": 1.6801007556675063, + "grad_norm": 0.2218292395366639, + "learning_rate": 4.4235929485514576e-05, + "loss": 0.4644, + "step": 6003 + }, + { + "epoch": 1.6803806325216906, + "grad_norm": 0.21673290973954457, + "learning_rate": 4.4220606931552454e-05, + "loss": 0.4808, + "step": 6004 + }, + { + "epoch": 1.6806605093758746, + "grad_norm": 0.2374249951356412, + "learning_rate": 4.420528492767621e-05, + "loss": 0.4766, + "step": 6005 + }, + { + "epoch": 1.6809403862300587, + "grad_norm": 0.2150067457956711, + "learning_rate": 4.418996347534418e-05, + "loss": 0.4516, + "step": 6006 + }, + { + "epoch": 1.681220263084243, + "grad_norm": 0.22367424033055425, + "learning_rate": 4.4174642576014677e-05, + "loss": 0.5006, + "step": 6007 + }, + { + "epoch": 1.681500139938427, + "grad_norm": 0.2277806770007238, + "learning_rate": 4.4159322231145956e-05, + "loss": 0.4803, + "step": 6008 + }, + { + "epoch": 1.6817800167926111, + "grad_norm": 0.2209137338224397, + "learning_rate": 4.414400244219623e-05, + "loss": 0.4828, + "step": 6009 + }, + { + "epoch": 1.6820598936467954, + "grad_norm": 0.2269520107756094, + "learning_rate": 4.412868321062361e-05, + "loss": 0.4935, + "step": 6010 + }, + { + "epoch": 1.6823397705009797, + "grad_norm": 0.21970491392431596, + "learning_rate": 4.4113364537886215e-05, + "loss": 0.4775, + "step": 6011 + }, + { + "epoch": 1.6826196473551638, + "grad_norm": 0.23179244760445664, + "learning_rate": 4.409804642544208e-05, + "loss": 0.4977, + "step": 6012 + }, + { + "epoch": 1.6828995242093479, + "grad_norm": 0.23282758041738078, + "learning_rate": 4.408272887474919e-05, + "loss": 0.5092, + "step": 6013 + }, + { + "epoch": 1.6831794010635321, + "grad_norm": 0.22119181773137908, + "learning_rate": 4.406741188726547e-05, + "loss": 0.4692, + "step": 6014 + }, + { + "epoch": 1.6834592779177162, + "grad_norm": 0.23504555619711215, + "learning_rate": 4.405209546444881e-05, + "loss": 0.4934, + "step": 6015 + }, + { + "epoch": 1.6837391547719003, + "grad_norm": 0.2258657833541981, + "learning_rate": 4.403677960775704e-05, + "loss": 0.4939, + "step": 6016 + }, + { + "epoch": 1.6840190316260846, + "grad_norm": 0.22764896633116333, + "learning_rate": 4.402146431864791e-05, + "loss": 0.4862, + "step": 6017 + }, + { + "epoch": 1.6842989084802686, + "grad_norm": 0.2221145229758645, + "learning_rate": 4.4006149598579155e-05, + "loss": 0.509, + "step": 6018 + }, + { + "epoch": 1.6845787853344527, + "grad_norm": 0.22262471785163387, + "learning_rate": 4.399083544900845e-05, + "loss": 0.4824, + "step": 6019 + }, + { + "epoch": 1.684858662188637, + "grad_norm": 0.22314588114794257, + "learning_rate": 4.3975521871393374e-05, + "loss": 0.4692, + "step": 6020 + }, + { + "epoch": 1.6851385390428213, + "grad_norm": 0.23523248038800929, + "learning_rate": 4.396020886719151e-05, + "loss": 0.5041, + "step": 6021 + }, + { + "epoch": 1.6854184158970054, + "grad_norm": 0.23084253004983865, + "learning_rate": 4.394489643786034e-05, + "loss": 0.4845, + "step": 6022 + }, + { + "epoch": 1.6856982927511894, + "grad_norm": 0.23477951351420004, + "learning_rate": 4.392958458485733e-05, + "loss": 0.5015, + "step": 6023 + }, + { + "epoch": 1.6859781696053737, + "grad_norm": 0.23127872384997272, + "learning_rate": 4.391427330963984e-05, + "loss": 0.4925, + "step": 6024 + }, + { + "epoch": 1.6862580464595578, + "grad_norm": 0.22680649351589788, + "learning_rate": 4.389896261366523e-05, + "loss": 0.4778, + "step": 6025 + }, + { + "epoch": 1.6865379233137419, + "grad_norm": 0.2325083981656526, + "learning_rate": 4.388365249839077e-05, + "loss": 0.489, + "step": 6026 + }, + { + "epoch": 1.6868178001679262, + "grad_norm": 0.2327805304002526, + "learning_rate": 4.386834296527371e-05, + "loss": 0.4944, + "step": 6027 + }, + { + "epoch": 1.6870976770221102, + "grad_norm": 0.2239983250870412, + "learning_rate": 4.385303401577118e-05, + "loss": 0.4778, + "step": 6028 + }, + { + "epoch": 1.6873775538762943, + "grad_norm": 0.2321190798666152, + "learning_rate": 4.3837725651340314e-05, + "loss": 0.4714, + "step": 6029 + }, + { + "epoch": 1.6876574307304786, + "grad_norm": 0.2512752993266075, + "learning_rate": 4.3822417873438165e-05, + "loss": 0.4904, + "step": 6030 + }, + { + "epoch": 1.6879373075846629, + "grad_norm": 0.2334635714913292, + "learning_rate": 4.3807110683521744e-05, + "loss": 0.4754, + "step": 6031 + }, + { + "epoch": 1.688217184438847, + "grad_norm": 0.2374893210946379, + "learning_rate": 4.3791804083047997e-05, + "loss": 0.5237, + "step": 6032 + }, + { + "epoch": 1.688497061293031, + "grad_norm": 0.2308533756499172, + "learning_rate": 4.377649807347381e-05, + "loss": 0.4938, + "step": 6033 + }, + { + "epoch": 1.6887769381472153, + "grad_norm": 0.23803728355173645, + "learning_rate": 4.376119265625602e-05, + "loss": 0.4918, + "step": 6034 + }, + { + "epoch": 1.6890568150013994, + "grad_norm": 0.23214402677120147, + "learning_rate": 4.3745887832851395e-05, + "loss": 0.4882, + "step": 6035 + }, + { + "epoch": 1.6893366918555834, + "grad_norm": 0.2145404758363729, + "learning_rate": 4.373058360471668e-05, + "loss": 0.4714, + "step": 6036 + }, + { + "epoch": 1.6896165687097677, + "grad_norm": 0.22197510657771885, + "learning_rate": 4.371527997330852e-05, + "loss": 0.4564, + "step": 6037 + }, + { + "epoch": 1.689896445563952, + "grad_norm": 0.2342687179885491, + "learning_rate": 4.3699976940083534e-05, + "loss": 0.487, + "step": 6038 + }, + { + "epoch": 1.6901763224181359, + "grad_norm": 0.22799044299008367, + "learning_rate": 4.3684674506498266e-05, + "loss": 0.4946, + "step": 6039 + }, + { + "epoch": 1.6904561992723202, + "grad_norm": 0.23406817373569913, + "learning_rate": 4.366937267400922e-05, + "loss": 0.5055, + "step": 6040 + }, + { + "epoch": 1.6907360761265045, + "grad_norm": 0.22671184495415245, + "learning_rate": 4.365407144407282e-05, + "loss": 0.4862, + "step": 6041 + }, + { + "epoch": 1.6910159529806885, + "grad_norm": 0.23380234311764664, + "learning_rate": 4.3638770818145455e-05, + "loss": 0.457, + "step": 6042 + }, + { + "epoch": 1.6912958298348726, + "grad_norm": 0.23079266529705078, + "learning_rate": 4.3623470797683444e-05, + "loss": 0.472, + "step": 6043 + }, + { + "epoch": 1.6915757066890569, + "grad_norm": 0.23830170302766132, + "learning_rate": 4.360817138414305e-05, + "loss": 0.4936, + "step": 6044 + }, + { + "epoch": 1.691855583543241, + "grad_norm": 0.23149585393763558, + "learning_rate": 4.359287257898049e-05, + "loss": 0.4891, + "step": 6045 + }, + { + "epoch": 1.692135460397425, + "grad_norm": 0.22228461204854483, + "learning_rate": 4.35775743836519e-05, + "loss": 0.4601, + "step": 6046 + }, + { + "epoch": 1.6924153372516093, + "grad_norm": 0.23799397030513383, + "learning_rate": 4.356227679961337e-05, + "loss": 0.4945, + "step": 6047 + }, + { + "epoch": 1.6926952141057936, + "grad_norm": 0.2295661357899793, + "learning_rate": 4.354697982832094e-05, + "loss": 0.4995, + "step": 6048 + }, + { + "epoch": 1.6929750909599774, + "grad_norm": 0.22878351557382878, + "learning_rate": 4.353168347123058e-05, + "loss": 0.5071, + "step": 6049 + }, + { + "epoch": 1.6932549678141617, + "grad_norm": 0.22099703818909397, + "learning_rate": 4.351638772979821e-05, + "loss": 0.4929, + "step": 6050 + }, + { + "epoch": 1.693534844668346, + "grad_norm": 0.2322131541717201, + "learning_rate": 4.350109260547968e-05, + "loss": 0.484, + "step": 6051 + }, + { + "epoch": 1.69381472152253, + "grad_norm": 0.23344038059601066, + "learning_rate": 4.348579809973078e-05, + "loss": 0.4944, + "step": 6052 + }, + { + "epoch": 1.6940945983767142, + "grad_norm": 0.23085834900844993, + "learning_rate": 4.3470504214007286e-05, + "loss": 0.489, + "step": 6053 + }, + { + "epoch": 1.6943744752308985, + "grad_norm": 0.21827723584864406, + "learning_rate": 4.345521094976485e-05, + "loss": 0.4846, + "step": 6054 + }, + { + "epoch": 1.6946543520850825, + "grad_norm": 0.22716920261273174, + "learning_rate": 4.3439918308459096e-05, + "loss": 0.4849, + "step": 6055 + }, + { + "epoch": 1.6949342289392666, + "grad_norm": 0.2341787702407808, + "learning_rate": 4.342462629154559e-05, + "loss": 0.4872, + "step": 6056 + }, + { + "epoch": 1.6952141057934509, + "grad_norm": 0.23229886711449935, + "learning_rate": 4.3409334900479836e-05, + "loss": 0.4619, + "step": 6057 + }, + { + "epoch": 1.6954939826476352, + "grad_norm": 0.22132737650463888, + "learning_rate": 4.3394044136717276e-05, + "loss": 0.4665, + "step": 6058 + }, + { + "epoch": 1.6957738595018192, + "grad_norm": 0.22553467244797953, + "learning_rate": 4.337875400171329e-05, + "loss": 0.4635, + "step": 6059 + }, + { + "epoch": 1.6960537363560033, + "grad_norm": 0.2385463024188196, + "learning_rate": 4.336346449692321e-05, + "loss": 0.4988, + "step": 6060 + }, + { + "epoch": 1.6963336132101876, + "grad_norm": 0.2254335937544675, + "learning_rate": 4.3348175623802284e-05, + "loss": 0.4875, + "step": 6061 + }, + { + "epoch": 1.6966134900643717, + "grad_norm": 0.2278016706264, + "learning_rate": 4.333288738380573e-05, + "loss": 0.513, + "step": 6062 + }, + { + "epoch": 1.6968933669185557, + "grad_norm": 0.22408095344803847, + "learning_rate": 4.33175997783887e-05, + "loss": 0.4716, + "step": 6063 + }, + { + "epoch": 1.69717324377274, + "grad_norm": 0.2408144700089221, + "learning_rate": 4.330231280900625e-05, + "loss": 0.4994, + "step": 6064 + }, + { + "epoch": 1.697453120626924, + "grad_norm": 0.22873174835259108, + "learning_rate": 4.328702647711342e-05, + "loss": 0.4892, + "step": 6065 + }, + { + "epoch": 1.6977329974811082, + "grad_norm": 0.23249504156013415, + "learning_rate": 4.327174078416516e-05, + "loss": 0.484, + "step": 6066 + }, + { + "epoch": 1.6980128743352925, + "grad_norm": 0.23536992910481908, + "learning_rate": 4.3256455731616385e-05, + "loss": 0.4891, + "step": 6067 + }, + { + "epoch": 1.6982927511894768, + "grad_norm": 0.23425136163775556, + "learning_rate": 4.324117132092193e-05, + "loss": 0.475, + "step": 6068 + }, + { + "epoch": 1.6985726280436608, + "grad_norm": 0.23819526558478157, + "learning_rate": 4.3225887553536546e-05, + "loss": 0.4904, + "step": 6069 + }, + { + "epoch": 1.698852504897845, + "grad_norm": 0.22210495860003826, + "learning_rate": 4.3210604430914995e-05, + "loss": 0.4859, + "step": 6070 + }, + { + "epoch": 1.6991323817520292, + "grad_norm": 0.24111902482093175, + "learning_rate": 4.319532195451192e-05, + "loss": 0.4911, + "step": 6071 + }, + { + "epoch": 1.6994122586062133, + "grad_norm": 0.218296239987164, + "learning_rate": 4.3180040125781905e-05, + "loss": 0.4821, + "step": 6072 + }, + { + "epoch": 1.6996921354603973, + "grad_norm": 0.23173927481366371, + "learning_rate": 4.316475894617949e-05, + "loss": 0.51, + "step": 6073 + }, + { + "epoch": 1.6999720123145816, + "grad_norm": 0.22818910508063336, + "learning_rate": 4.314947841715914e-05, + "loss": 0.4996, + "step": 6074 + }, + { + "epoch": 1.700251889168766, + "grad_norm": 0.23734091010495434, + "learning_rate": 4.313419854017528e-05, + "loss": 0.4831, + "step": 6075 + }, + { + "epoch": 1.7005317660229498, + "grad_norm": 0.22723183806207628, + "learning_rate": 4.311891931668223e-05, + "loss": 0.495, + "step": 6076 + }, + { + "epoch": 1.700811642877134, + "grad_norm": 0.2325060112545814, + "learning_rate": 4.31036407481343e-05, + "loss": 0.4872, + "step": 6077 + }, + { + "epoch": 1.7010915197313183, + "grad_norm": 0.22319014948993274, + "learning_rate": 4.308836283598571e-05, + "loss": 0.4972, + "step": 6078 + }, + { + "epoch": 1.7013713965855024, + "grad_norm": 0.2286954356567191, + "learning_rate": 4.3073085581690605e-05, + "loss": 0.4978, + "step": 6079 + }, + { + "epoch": 1.7016512734396865, + "grad_norm": 0.22910917223706653, + "learning_rate": 4.305780898670308e-05, + "loss": 0.4995, + "step": 6080 + }, + { + "epoch": 1.7019311502938708, + "grad_norm": 0.21654689465526492, + "learning_rate": 4.304253305247722e-05, + "loss": 0.4879, + "step": 6081 + }, + { + "epoch": 1.7022110271480548, + "grad_norm": 0.2318852139598937, + "learning_rate": 4.302725778046693e-05, + "loss": 0.5043, + "step": 6082 + }, + { + "epoch": 1.702490904002239, + "grad_norm": 0.23702870671028914, + "learning_rate": 4.301198317212615e-05, + "loss": 0.5109, + "step": 6083 + }, + { + "epoch": 1.7027707808564232, + "grad_norm": 0.22319624244566316, + "learning_rate": 4.299670922890873e-05, + "loss": 0.505, + "step": 6084 + }, + { + "epoch": 1.7030506577106075, + "grad_norm": 0.22340990152011508, + "learning_rate": 4.298143595226843e-05, + "loss": 0.4995, + "step": 6085 + }, + { + "epoch": 1.7033305345647913, + "grad_norm": 0.21696548119035303, + "learning_rate": 4.2966163343658994e-05, + "loss": 0.4749, + "step": 6086 + }, + { + "epoch": 1.7036104114189756, + "grad_norm": 0.2234274616324956, + "learning_rate": 4.2950891404534056e-05, + "loss": 0.4828, + "step": 6087 + }, + { + "epoch": 1.70389028827316, + "grad_norm": 0.22168470891317238, + "learning_rate": 4.293562013634723e-05, + "loss": 0.4793, + "step": 6088 + }, + { + "epoch": 1.704170165127344, + "grad_norm": 0.2289638613127193, + "learning_rate": 4.292034954055204e-05, + "loss": 0.5193, + "step": 6089 + }, + { + "epoch": 1.704450041981528, + "grad_norm": 0.2312629677688086, + "learning_rate": 4.290507961860194e-05, + "loss": 0.4982, + "step": 6090 + }, + { + "epoch": 1.7047299188357123, + "grad_norm": 0.23012149422157516, + "learning_rate": 4.2889810371950325e-05, + "loss": 0.4896, + "step": 6091 + }, + { + "epoch": 1.7050097956898964, + "grad_norm": 0.22197492836476815, + "learning_rate": 4.287454180205055e-05, + "loss": 0.4881, + "step": 6092 + }, + { + "epoch": 1.7052896725440805, + "grad_norm": 0.23131945807116241, + "learning_rate": 4.285927391035587e-05, + "loss": 0.4764, + "step": 6093 + }, + { + "epoch": 1.7055695493982648, + "grad_norm": 0.22619551209841873, + "learning_rate": 4.284400669831949e-05, + "loss": 0.5032, + "step": 6094 + }, + { + "epoch": 1.705849426252449, + "grad_norm": 0.21874781523908743, + "learning_rate": 4.282874016739456e-05, + "loss": 0.4633, + "step": 6095 + }, + { + "epoch": 1.7061293031066331, + "grad_norm": 0.23408565220552005, + "learning_rate": 4.281347431903416e-05, + "loss": 0.5201, + "step": 6096 + }, + { + "epoch": 1.7064091799608172, + "grad_norm": 0.21388880137663957, + "learning_rate": 4.2798209154691294e-05, + "loss": 0.4691, + "step": 6097 + }, + { + "epoch": 1.7066890568150015, + "grad_norm": 0.21823192167589522, + "learning_rate": 4.2782944675818905e-05, + "loss": 0.486, + "step": 6098 + }, + { + "epoch": 1.7069689336691856, + "grad_norm": 0.23560885770576326, + "learning_rate": 4.276768088386991e-05, + "loss": 0.4867, + "step": 6099 + }, + { + "epoch": 1.7072488105233696, + "grad_norm": 0.23159807051987655, + "learning_rate": 4.275241778029707e-05, + "loss": 0.4874, + "step": 6100 + }, + { + "epoch": 1.707528687377554, + "grad_norm": 0.2291395922136475, + "learning_rate": 4.2737155366553174e-05, + "loss": 0.4702, + "step": 6101 + }, + { + "epoch": 1.707808564231738, + "grad_norm": 0.2333931478112489, + "learning_rate": 4.272189364409088e-05, + "loss": 0.4955, + "step": 6102 + }, + { + "epoch": 1.708088441085922, + "grad_norm": 0.23706638507750027, + "learning_rate": 4.270663261436284e-05, + "loss": 0.5035, + "step": 6103 + }, + { + "epoch": 1.7083683179401064, + "grad_norm": 0.22135302301079363, + "learning_rate": 4.269137227882157e-05, + "loss": 0.4602, + "step": 6104 + }, + { + "epoch": 1.7086481947942906, + "grad_norm": 0.23384398860035133, + "learning_rate": 4.2676112638919584e-05, + "loss": 0.4814, + "step": 6105 + }, + { + "epoch": 1.7089280716484747, + "grad_norm": 0.23066932047048283, + "learning_rate": 4.26608536961093e-05, + "loss": 0.5053, + "step": 6106 + }, + { + "epoch": 1.7092079485026588, + "grad_norm": 0.22072806062248457, + "learning_rate": 4.2645595451843075e-05, + "loss": 0.4765, + "step": 6107 + }, + { + "epoch": 1.709487825356843, + "grad_norm": 0.22973694101920933, + "learning_rate": 4.263033790757319e-05, + "loss": 0.4999, + "step": 6108 + }, + { + "epoch": 1.7097677022110271, + "grad_norm": 0.23426867632197704, + "learning_rate": 4.261508106475186e-05, + "loss": 0.4965, + "step": 6109 + }, + { + "epoch": 1.7100475790652112, + "grad_norm": 0.21082326494989032, + "learning_rate": 4.2599824924831254e-05, + "loss": 0.4932, + "step": 6110 + }, + { + "epoch": 1.7103274559193955, + "grad_norm": 0.2260451150003131, + "learning_rate": 4.258456948926345e-05, + "loss": 0.4795, + "step": 6111 + }, + { + "epoch": 1.7106073327735796, + "grad_norm": 0.2302765994078192, + "learning_rate": 4.256931475950048e-05, + "loss": 0.4866, + "step": 6112 + }, + { + "epoch": 1.7108872096277636, + "grad_norm": 0.2247863162171495, + "learning_rate": 4.2554060736994284e-05, + "loss": 0.4895, + "step": 6113 + }, + { + "epoch": 1.711167086481948, + "grad_norm": 0.23224152782161891, + "learning_rate": 4.2538807423196755e-05, + "loss": 0.4795, + "step": 6114 + }, + { + "epoch": 1.7114469633361322, + "grad_norm": 0.22876991671248526, + "learning_rate": 4.25235548195597e-05, + "loss": 0.4911, + "step": 6115 + }, + { + "epoch": 1.7117268401903163, + "grad_norm": 0.23545289575333217, + "learning_rate": 4.250830292753489e-05, + "loss": 0.5082, + "step": 6116 + }, + { + "epoch": 1.7120067170445004, + "grad_norm": 0.22156199507874028, + "learning_rate": 4.249305174857403e-05, + "loss": 0.4847, + "step": 6117 + }, + { + "epoch": 1.7122865938986847, + "grad_norm": 0.22339088924553077, + "learning_rate": 4.247780128412868e-05, + "loss": 0.4956, + "step": 6118 + }, + { + "epoch": 1.7125664707528687, + "grad_norm": 0.2310339784394574, + "learning_rate": 4.246255153565042e-05, + "loss": 0.4957, + "step": 6119 + }, + { + "epoch": 1.7128463476070528, + "grad_norm": 0.2232197000183747, + "learning_rate": 4.244730250459072e-05, + "loss": 0.4688, + "step": 6120 + }, + { + "epoch": 1.713126224461237, + "grad_norm": 0.22006292518528192, + "learning_rate": 4.2432054192400983e-05, + "loss": 0.4819, + "step": 6121 + }, + { + "epoch": 1.7134061013154214, + "grad_norm": 0.23648859392172267, + "learning_rate": 4.241680660053258e-05, + "loss": 0.5008, + "step": 6122 + }, + { + "epoch": 1.7136859781696052, + "grad_norm": 0.21708059395993587, + "learning_rate": 4.240155973043676e-05, + "loss": 0.4826, + "step": 6123 + }, + { + "epoch": 1.7139658550237895, + "grad_norm": 0.2336780630673386, + "learning_rate": 4.238631358356474e-05, + "loss": 0.4981, + "step": 6124 + }, + { + "epoch": 1.7142457318779738, + "grad_norm": 0.22097470300748698, + "learning_rate": 4.2371068161367655e-05, + "loss": 0.4634, + "step": 6125 + }, + { + "epoch": 1.7145256087321579, + "grad_norm": 0.21234749760716135, + "learning_rate": 4.235582346529658e-05, + "loss": 0.4549, + "step": 6126 + }, + { + "epoch": 1.714805485586342, + "grad_norm": 0.22530451777881053, + "learning_rate": 4.2340579496802505e-05, + "loss": 0.4986, + "step": 6127 + }, + { + "epoch": 1.7150853624405262, + "grad_norm": 0.23549957729924606, + "learning_rate": 4.232533625733635e-05, + "loss": 0.497, + "step": 6128 + }, + { + "epoch": 1.7153652392947103, + "grad_norm": 0.22971748808940273, + "learning_rate": 4.2310093748348995e-05, + "loss": 0.4939, + "step": 6129 + }, + { + "epoch": 1.7156451161488944, + "grad_norm": 0.23289807795750575, + "learning_rate": 4.229485197129122e-05, + "loss": 0.4977, + "step": 6130 + }, + { + "epoch": 1.7159249930030787, + "grad_norm": 0.24009905850185356, + "learning_rate": 4.227961092761374e-05, + "loss": 0.4764, + "step": 6131 + }, + { + "epoch": 1.716204869857263, + "grad_norm": 0.2652582748926836, + "learning_rate": 4.226437061876721e-05, + "loss": 0.4748, + "step": 6132 + }, + { + "epoch": 1.716484746711447, + "grad_norm": 0.22182048827203818, + "learning_rate": 4.2249131046202225e-05, + "loss": 0.4977, + "step": 6133 + }, + { + "epoch": 1.716764623565631, + "grad_norm": 0.22451475379748234, + "learning_rate": 4.2233892211369286e-05, + "loss": 0.5034, + "step": 6134 + }, + { + "epoch": 1.7170445004198154, + "grad_norm": 0.23047466351686094, + "learning_rate": 4.2218654115718846e-05, + "loss": 0.5037, + "step": 6135 + }, + { + "epoch": 1.7173243772739994, + "grad_norm": 0.2320646993137256, + "learning_rate": 4.2203416760701256e-05, + "loss": 0.4801, + "step": 6136 + }, + { + "epoch": 1.7176042541281835, + "grad_norm": 0.22834671959408195, + "learning_rate": 4.218818014776681e-05, + "loss": 0.496, + "step": 6137 + }, + { + "epoch": 1.7178841309823678, + "grad_norm": 0.23205588739717026, + "learning_rate": 4.2172944278365755e-05, + "loss": 0.4876, + "step": 6138 + }, + { + "epoch": 1.7181640078365519, + "grad_norm": 0.2301065946049179, + "learning_rate": 4.2157709153948234e-05, + "loss": 0.4884, + "step": 6139 + }, + { + "epoch": 1.718443884690736, + "grad_norm": 0.22658124411305183, + "learning_rate": 4.214247477596435e-05, + "loss": 0.4641, + "step": 6140 + }, + { + "epoch": 1.7187237615449202, + "grad_norm": 0.22846579796177366, + "learning_rate": 4.212724114586412e-05, + "loss": 0.4899, + "step": 6141 + }, + { + "epoch": 1.7190036383991045, + "grad_norm": 0.22292216756622865, + "learning_rate": 4.211200826509749e-05, + "loss": 0.4755, + "step": 6142 + }, + { + "epoch": 1.7192835152532886, + "grad_norm": 0.23448196005844724, + "learning_rate": 4.209677613511432e-05, + "loss": 0.5053, + "step": 6143 + }, + { + "epoch": 1.7195633921074727, + "grad_norm": 0.22431288444980207, + "learning_rate": 4.208154475736442e-05, + "loss": 0.4776, + "step": 6144 + }, + { + "epoch": 1.719843268961657, + "grad_norm": 0.2178878211374519, + "learning_rate": 4.2066314133297526e-05, + "loss": 0.4595, + "step": 6145 + }, + { + "epoch": 1.720123145815841, + "grad_norm": 0.22213929533746019, + "learning_rate": 4.20510842643633e-05, + "loss": 0.4704, + "step": 6146 + }, + { + "epoch": 1.720403022670025, + "grad_norm": 0.23103599253361262, + "learning_rate": 4.203585515201131e-05, + "loss": 0.4715, + "step": 6147 + }, + { + "epoch": 1.7206828995242094, + "grad_norm": 0.218019831707151, + "learning_rate": 4.2020626797691096e-05, + "loss": 0.4908, + "step": 6148 + }, + { + "epoch": 1.7209627763783935, + "grad_norm": 0.2293186538753741, + "learning_rate": 4.200539920285208e-05, + "loss": 0.4889, + "step": 6149 + }, + { + "epoch": 1.7212426532325775, + "grad_norm": 0.22837270173547924, + "learning_rate": 4.199017236894364e-05, + "loss": 0.4829, + "step": 6150 + }, + { + "epoch": 1.7215225300867618, + "grad_norm": 0.22800302529739508, + "learning_rate": 4.1974946297415077e-05, + "loss": 0.4794, + "step": 6151 + }, + { + "epoch": 1.721802406940946, + "grad_norm": 0.24263392801028452, + "learning_rate": 4.1959720989715626e-05, + "loss": 0.5092, + "step": 6152 + }, + { + "epoch": 1.7220822837951302, + "grad_norm": 0.21515296149303667, + "learning_rate": 4.194449644729444e-05, + "loss": 0.4974, + "step": 6153 + }, + { + "epoch": 1.7223621606493142, + "grad_norm": 0.23012271944667229, + "learning_rate": 4.1929272671600594e-05, + "loss": 0.491, + "step": 6154 + }, + { + "epoch": 1.7226420375034985, + "grad_norm": 0.2463645417102324, + "learning_rate": 4.191404966408308e-05, + "loss": 0.4851, + "step": 6155 + }, + { + "epoch": 1.7229219143576826, + "grad_norm": 0.2269257728365019, + "learning_rate": 4.1898827426190834e-05, + "loss": 0.4902, + "step": 6156 + }, + { + "epoch": 1.7232017912118667, + "grad_norm": 0.27498288725529796, + "learning_rate": 4.188360595937274e-05, + "loss": 0.4782, + "step": 6157 + }, + { + "epoch": 1.723481668066051, + "grad_norm": 0.23911156220462287, + "learning_rate": 4.186838526507757e-05, + "loss": 0.5124, + "step": 6158 + }, + { + "epoch": 1.7237615449202353, + "grad_norm": 0.2312944792273134, + "learning_rate": 4.185316534475404e-05, + "loss": 0.4978, + "step": 6159 + }, + { + "epoch": 1.724041421774419, + "grad_norm": 0.22707976307773028, + "learning_rate": 4.18379461998508e-05, + "loss": 0.4673, + "step": 6160 + }, + { + "epoch": 1.7243212986286034, + "grad_norm": 0.239679140818221, + "learning_rate": 4.18227278318164e-05, + "loss": 0.4783, + "step": 6161 + }, + { + "epoch": 1.7246011754827877, + "grad_norm": 0.23343532877153514, + "learning_rate": 4.180751024209935e-05, + "loss": 0.4955, + "step": 6162 + }, + { + "epoch": 1.7248810523369718, + "grad_norm": 0.2304604774398203, + "learning_rate": 4.179229343214806e-05, + "loss": 0.4941, + "step": 6163 + }, + { + "epoch": 1.7251609291911558, + "grad_norm": 0.22100401338558318, + "learning_rate": 4.177707740341087e-05, + "loss": 0.483, + "step": 6164 + }, + { + "epoch": 1.7254408060453401, + "grad_norm": 0.23943947980530428, + "learning_rate": 4.1761862157336065e-05, + "loss": 0.4971, + "step": 6165 + }, + { + "epoch": 1.7257206828995242, + "grad_norm": 0.23364708009853402, + "learning_rate": 4.174664769537182e-05, + "loss": 0.4994, + "step": 6166 + }, + { + "epoch": 1.7260005597537083, + "grad_norm": 0.2177972882653534, + "learning_rate": 4.1731434018966265e-05, + "loss": 0.4832, + "step": 6167 + }, + { + "epoch": 1.7262804366078925, + "grad_norm": 0.22638760788070802, + "learning_rate": 4.1716221129567455e-05, + "loss": 0.4819, + "step": 6168 + }, + { + "epoch": 1.7265603134620768, + "grad_norm": 0.23315084038473147, + "learning_rate": 4.170100902862335e-05, + "loss": 0.4755, + "step": 6169 + }, + { + "epoch": 1.7268401903162607, + "grad_norm": 0.2168653007820914, + "learning_rate": 4.168579771758186e-05, + "loss": 0.4905, + "step": 6170 + }, + { + "epoch": 1.727120067170445, + "grad_norm": 0.4397010930184802, + "learning_rate": 4.1670587197890795e-05, + "loss": 0.4947, + "step": 6171 + }, + { + "epoch": 1.7273999440246293, + "grad_norm": 0.23897181759928504, + "learning_rate": 4.165537747099791e-05, + "loss": 0.4993, + "step": 6172 + }, + { + "epoch": 1.7276798208788133, + "grad_norm": 0.2436988300206224, + "learning_rate": 4.164016853835085e-05, + "loss": 0.4878, + "step": 6173 + }, + { + "epoch": 1.7279596977329974, + "grad_norm": 0.234216966427448, + "learning_rate": 4.162496040139724e-05, + "loss": 0.501, + "step": 6174 + }, + { + "epoch": 1.7282395745871817, + "grad_norm": 0.23058344419520013, + "learning_rate": 4.160975306158458e-05, + "loss": 0.4741, + "step": 6175 + }, + { + "epoch": 1.7285194514413658, + "grad_norm": 0.226064221841301, + "learning_rate": 4.159454652036032e-05, + "loss": 0.4812, + "step": 6176 + }, + { + "epoch": 1.7287993282955498, + "grad_norm": 0.23533501416134883, + "learning_rate": 4.157934077917183e-05, + "loss": 0.4985, + "step": 6177 + }, + { + "epoch": 1.7290792051497341, + "grad_norm": 0.2248949857682273, + "learning_rate": 4.15641358394664e-05, + "loss": 0.4921, + "step": 6178 + }, + { + "epoch": 1.7293590820039184, + "grad_norm": 0.21832457514267647, + "learning_rate": 4.154893170269124e-05, + "loss": 0.4687, + "step": 6179 + }, + { + "epoch": 1.7296389588581025, + "grad_norm": 0.22864792597259992, + "learning_rate": 4.153372837029349e-05, + "loss": 0.465, + "step": 6180 + }, + { + "epoch": 1.7299188357122866, + "grad_norm": 0.2189754608542842, + "learning_rate": 4.1518525843720216e-05, + "loss": 0.4822, + "step": 6181 + }, + { + "epoch": 1.7301987125664708, + "grad_norm": 0.2270261070504502, + "learning_rate": 4.150332412441839e-05, + "loss": 0.47, + "step": 6182 + }, + { + "epoch": 1.730478589420655, + "grad_norm": 0.2343048346316914, + "learning_rate": 4.148812321383494e-05, + "loss": 0.4763, + "step": 6183 + }, + { + "epoch": 1.730758466274839, + "grad_norm": 0.2293296292049538, + "learning_rate": 4.147292311341667e-05, + "loss": 0.498, + "step": 6184 + }, + { + "epoch": 1.7310383431290233, + "grad_norm": 0.23190044498144893, + "learning_rate": 4.145772382461036e-05, + "loss": 0.4924, + "step": 6185 + }, + { + "epoch": 1.7313182199832073, + "grad_norm": 0.22096446277113688, + "learning_rate": 4.144252534886268e-05, + "loss": 0.5141, + "step": 6186 + }, + { + "epoch": 1.7315980968373914, + "grad_norm": 0.23537546826112543, + "learning_rate": 4.142732768762023e-05, + "loss": 0.4994, + "step": 6187 + }, + { + "epoch": 1.7318779736915757, + "grad_norm": 0.22247615205398874, + "learning_rate": 4.1412130842329534e-05, + "loss": 0.479, + "step": 6188 + }, + { + "epoch": 1.73215785054576, + "grad_norm": 0.2318827393974484, + "learning_rate": 4.139693481443702e-05, + "loss": 0.4861, + "step": 6189 + }, + { + "epoch": 1.732437727399944, + "grad_norm": 0.22755283443826474, + "learning_rate": 4.1381739605389095e-05, + "loss": 0.4839, + "step": 6190 + }, + { + "epoch": 1.7327176042541281, + "grad_norm": 0.22817109923223067, + "learning_rate": 4.136654521663198e-05, + "loss": 0.4777, + "step": 6191 + }, + { + "epoch": 1.7329974811083124, + "grad_norm": 0.22596647868744635, + "learning_rate": 4.1351351649611933e-05, + "loss": 0.4612, + "step": 6192 + }, + { + "epoch": 1.7332773579624965, + "grad_norm": 0.23397970580814761, + "learning_rate": 4.1336158905775086e-05, + "loss": 0.4844, + "step": 6193 + }, + { + "epoch": 1.7335572348166806, + "grad_norm": 0.2355951420758951, + "learning_rate": 4.1320966986567476e-05, + "loss": 0.4744, + "step": 6194 + }, + { + "epoch": 1.7338371116708649, + "grad_norm": 0.24338253587553524, + "learning_rate": 4.13057758934351e-05, + "loss": 0.4749, + "step": 6195 + }, + { + "epoch": 1.7341169885250491, + "grad_norm": 0.21927606053474624, + "learning_rate": 4.129058562782382e-05, + "loss": 0.4829, + "step": 6196 + }, + { + "epoch": 1.734396865379233, + "grad_norm": 0.22479603677132748, + "learning_rate": 4.127539619117948e-05, + "loss": 0.483, + "step": 6197 + }, + { + "epoch": 1.7346767422334173, + "grad_norm": 0.23048192785742735, + "learning_rate": 4.126020758494782e-05, + "loss": 0.4874, + "step": 6198 + }, + { + "epoch": 1.7349566190876016, + "grad_norm": 0.22266574526724875, + "learning_rate": 4.1245019810574495e-05, + "loss": 0.4798, + "step": 6199 + }, + { + "epoch": 1.7352364959417856, + "grad_norm": 0.22664210498131207, + "learning_rate": 4.122983286950508e-05, + "loss": 0.4851, + "step": 6200 + }, + { + "epoch": 1.7355163727959697, + "grad_norm": 0.2330936722013338, + "learning_rate": 4.121464676318509e-05, + "loss": 0.469, + "step": 6201 + }, + { + "epoch": 1.735796249650154, + "grad_norm": 0.23141386256000987, + "learning_rate": 4.119946149305992e-05, + "loss": 0.4854, + "step": 6202 + }, + { + "epoch": 1.736076126504338, + "grad_norm": 0.22741908377211895, + "learning_rate": 4.118427706057494e-05, + "loss": 0.5071, + "step": 6203 + }, + { + "epoch": 1.7363560033585221, + "grad_norm": 0.23079084357341084, + "learning_rate": 4.1169093467175404e-05, + "loss": 0.5085, + "step": 6204 + }, + { + "epoch": 1.7366358802127064, + "grad_norm": 0.221600111079701, + "learning_rate": 4.1153910714306496e-05, + "loss": 0.5207, + "step": 6205 + }, + { + "epoch": 1.7369157570668907, + "grad_norm": 0.23615707134305802, + "learning_rate": 4.113872880341332e-05, + "loss": 0.5139, + "step": 6206 + }, + { + "epoch": 1.7371956339210746, + "grad_norm": 0.22398368347694772, + "learning_rate": 4.11235477359409e-05, + "loss": 0.4764, + "step": 6207 + }, + { + "epoch": 1.7374755107752589, + "grad_norm": 0.21892919892376989, + "learning_rate": 4.110836751333418e-05, + "loss": 0.4723, + "step": 6208 + }, + { + "epoch": 1.7377553876294431, + "grad_norm": 0.22126669964411433, + "learning_rate": 4.109318813703801e-05, + "loss": 0.4675, + "step": 6209 + }, + { + "epoch": 1.7380352644836272, + "grad_norm": 0.2249764201476906, + "learning_rate": 4.107800960849717e-05, + "loss": 0.482, + "step": 6210 + }, + { + "epoch": 1.7383151413378113, + "grad_norm": 0.22660197424580764, + "learning_rate": 4.106283192915638e-05, + "loss": 0.4635, + "step": 6211 + }, + { + "epoch": 1.7385950181919956, + "grad_norm": 0.24296870443709023, + "learning_rate": 4.104765510046024e-05, + "loss": 0.4906, + "step": 6212 + }, + { + "epoch": 1.7388748950461796, + "grad_norm": 0.2310097726134897, + "learning_rate": 4.1032479123853306e-05, + "loss": 0.4988, + "step": 6213 + }, + { + "epoch": 1.7391547719003637, + "grad_norm": 0.22724682084757694, + "learning_rate": 4.1017304000780025e-05, + "loss": 0.4881, + "step": 6214 + }, + { + "epoch": 1.739434648754548, + "grad_norm": 0.22766719539469013, + "learning_rate": 4.100212973268478e-05, + "loss": 0.4852, + "step": 6215 + }, + { + "epoch": 1.7397145256087323, + "grad_norm": 0.23270993851446942, + "learning_rate": 4.098695632101186e-05, + "loss": 0.494, + "step": 6216 + }, + { + "epoch": 1.7399944024629164, + "grad_norm": 0.2257314465143826, + "learning_rate": 4.097178376720548e-05, + "loss": 0.5083, + "step": 6217 + }, + { + "epoch": 1.7402742793171004, + "grad_norm": 0.21487172598219728, + "learning_rate": 4.0956612072709775e-05, + "loss": 0.4795, + "step": 6218 + }, + { + "epoch": 1.7405541561712847, + "grad_norm": 0.22280803848247047, + "learning_rate": 4.0941441238968784e-05, + "loss": 0.4952, + "step": 6219 + }, + { + "epoch": 1.7408340330254688, + "grad_norm": 0.2288712931118916, + "learning_rate": 4.0926271267426505e-05, + "loss": 0.481, + "step": 6220 + }, + { + "epoch": 1.7411139098796529, + "grad_norm": 0.2317947148548979, + "learning_rate": 4.091110215952679e-05, + "loss": 0.484, + "step": 6221 + }, + { + "epoch": 1.7413937867338372, + "grad_norm": 0.24196046213019462, + "learning_rate": 4.0895933916713475e-05, + "loss": 0.4967, + "step": 6222 + }, + { + "epoch": 1.7416736635880212, + "grad_norm": 0.22802728951721227, + "learning_rate": 4.088076654043026e-05, + "loss": 0.4833, + "step": 6223 + }, + { + "epoch": 1.7419535404422053, + "grad_norm": 0.23532017702606559, + "learning_rate": 4.086560003212079e-05, + "loss": 0.4566, + "step": 6224 + }, + { + "epoch": 1.7422334172963896, + "grad_norm": 0.22664788392552132, + "learning_rate": 4.085043439322861e-05, + "loss": 0.4931, + "step": 6225 + }, + { + "epoch": 1.7425132941505739, + "grad_norm": 0.22804682386682001, + "learning_rate": 4.0835269625197235e-05, + "loss": 0.4951, + "step": 6226 + }, + { + "epoch": 1.742793171004758, + "grad_norm": 0.22851141767746166, + "learning_rate": 4.0820105729470005e-05, + "loss": 0.4711, + "step": 6227 + }, + { + "epoch": 1.743073047858942, + "grad_norm": 0.23419407066537856, + "learning_rate": 4.0804942707490254e-05, + "loss": 0.4985, + "step": 6228 + }, + { + "epoch": 1.7433529247131263, + "grad_norm": 0.2281211997665903, + "learning_rate": 4.0789780560701204e-05, + "loss": 0.4958, + "step": 6229 + }, + { + "epoch": 1.7436328015673104, + "grad_norm": 0.23278838235072313, + "learning_rate": 4.077461929054599e-05, + "loss": 0.4876, + "step": 6230 + }, + { + "epoch": 1.7439126784214944, + "grad_norm": 0.23339218741007328, + "learning_rate": 4.075945889846768e-05, + "loss": 0.4867, + "step": 6231 + }, + { + "epoch": 1.7441925552756787, + "grad_norm": 0.23597406995369719, + "learning_rate": 4.074429938590924e-05, + "loss": 0.4854, + "step": 6232 + }, + { + "epoch": 1.744472432129863, + "grad_norm": 0.2181253638617583, + "learning_rate": 4.072914075431357e-05, + "loss": 0.479, + "step": 6233 + }, + { + "epoch": 1.7447523089840469, + "grad_norm": 0.2375729240684907, + "learning_rate": 4.071398300512347e-05, + "loss": 0.4863, + "step": 6234 + }, + { + "epoch": 1.7450321858382312, + "grad_norm": 0.21824965754073894, + "learning_rate": 4.069882613978167e-05, + "loss": 0.4745, + "step": 6235 + }, + { + "epoch": 1.7453120626924155, + "grad_norm": 0.2423222093731873, + "learning_rate": 4.068367015973078e-05, + "loss": 0.511, + "step": 6236 + }, + { + "epoch": 1.7455919395465995, + "grad_norm": 0.2225530815769493, + "learning_rate": 4.066851506641341e-05, + "loss": 0.466, + "step": 6237 + }, + { + "epoch": 1.7458718164007836, + "grad_norm": 0.23503900746030318, + "learning_rate": 4.0653360861271974e-05, + "loss": 0.4776, + "step": 6238 + }, + { + "epoch": 1.7461516932549679, + "grad_norm": 0.23179912266962155, + "learning_rate": 4.06382075457489e-05, + "loss": 0.4961, + "step": 6239 + }, + { + "epoch": 1.746431570109152, + "grad_norm": 0.23010916446882304, + "learning_rate": 4.062305512128647e-05, + "loss": 0.5108, + "step": 6240 + }, + { + "epoch": 1.746711446963336, + "grad_norm": 0.22742092032785105, + "learning_rate": 4.0607903589326896e-05, + "loss": 0.4692, + "step": 6241 + }, + { + "epoch": 1.7469913238175203, + "grad_norm": 0.22283313750520264, + "learning_rate": 4.0592752951312316e-05, + "loss": 0.4917, + "step": 6242 + }, + { + "epoch": 1.7472712006717046, + "grad_norm": 0.22105652456579566, + "learning_rate": 4.0577603208684785e-05, + "loss": 0.4947, + "step": 6243 + }, + { + "epoch": 1.7475510775258885, + "grad_norm": 0.2306414346923748, + "learning_rate": 4.056245436288626e-05, + "loss": 0.4713, + "step": 6244 + }, + { + "epoch": 1.7478309543800727, + "grad_norm": 0.22984458746847602, + "learning_rate": 4.05473064153586e-05, + "loss": 0.4944, + "step": 6245 + }, + { + "epoch": 1.748110831234257, + "grad_norm": 0.2213908462787107, + "learning_rate": 4.05321593675436e-05, + "loss": 0.4637, + "step": 6246 + }, + { + "epoch": 1.748390708088441, + "grad_norm": 0.23214944822098388, + "learning_rate": 4.051701322088298e-05, + "loss": 0.486, + "step": 6247 + }, + { + "epoch": 1.7486705849426252, + "grad_norm": 0.22587110510421393, + "learning_rate": 4.050186797681834e-05, + "loss": 0.5056, + "step": 6248 + }, + { + "epoch": 1.7489504617968095, + "grad_norm": 0.23345726596750907, + "learning_rate": 4.0486723636791234e-05, + "loss": 0.4893, + "step": 6249 + }, + { + "epoch": 1.7492303386509935, + "grad_norm": 0.23430229132783856, + "learning_rate": 4.047158020224309e-05, + "loss": 0.4704, + "step": 6250 + }, + { + "epoch": 1.7495102155051776, + "grad_norm": 0.22962546937050268, + "learning_rate": 4.045643767461528e-05, + "loss": 0.4866, + "step": 6251 + }, + { + "epoch": 1.749790092359362, + "grad_norm": 0.2256801577148114, + "learning_rate": 4.044129605534907e-05, + "loss": 0.5009, + "step": 6252 + }, + { + "epoch": 1.7500699692135462, + "grad_norm": 0.24114583256973654, + "learning_rate": 4.042615534588565e-05, + "loss": 0.5052, + "step": 6253 + }, + { + "epoch": 1.7503498460677303, + "grad_norm": 0.22799986588431403, + "learning_rate": 4.041101554766612e-05, + "loss": 0.4999, + "step": 6254 + }, + { + "epoch": 1.7506297229219143, + "grad_norm": 0.23193963651872831, + "learning_rate": 4.0395876662131494e-05, + "loss": 0.5025, + "step": 6255 + }, + { + "epoch": 1.7509095997760986, + "grad_norm": 0.24000487006075624, + "learning_rate": 4.0380738690722715e-05, + "loss": 0.4988, + "step": 6256 + }, + { + "epoch": 1.7511894766302827, + "grad_norm": 0.22289020086343436, + "learning_rate": 4.0365601634880604e-05, + "loss": 0.4807, + "step": 6257 + }, + { + "epoch": 1.7514693534844668, + "grad_norm": 0.22062471093548774, + "learning_rate": 4.035046549604593e-05, + "loss": 0.504, + "step": 6258 + }, + { + "epoch": 1.751749230338651, + "grad_norm": 0.24988358237523547, + "learning_rate": 4.0335330275659346e-05, + "loss": 0.4961, + "step": 6259 + }, + { + "epoch": 1.7520291071928351, + "grad_norm": 0.23267036115777692, + "learning_rate": 4.032019597516144e-05, + "loss": 0.4985, + "step": 6260 + }, + { + "epoch": 1.7523089840470192, + "grad_norm": 0.22988664272037695, + "learning_rate": 4.0305062595992694e-05, + "loss": 0.4991, + "step": 6261 + }, + { + "epoch": 1.7525888609012035, + "grad_norm": 0.22123590529816817, + "learning_rate": 4.028993013959353e-05, + "loss": 0.4859, + "step": 6262 + }, + { + "epoch": 1.7528687377553878, + "grad_norm": 0.2226616011786503, + "learning_rate": 4.027479860740424e-05, + "loss": 0.4736, + "step": 6263 + }, + { + "epoch": 1.7531486146095718, + "grad_norm": 0.23406933748112344, + "learning_rate": 4.025966800086506e-05, + "loss": 0.5066, + "step": 6264 + }, + { + "epoch": 1.753428491463756, + "grad_norm": 0.23290244909657634, + "learning_rate": 4.024453832141613e-05, + "loss": 0.4798, + "step": 6265 + }, + { + "epoch": 1.7537083683179402, + "grad_norm": 0.23299853930618206, + "learning_rate": 4.022940957049751e-05, + "loss": 0.487, + "step": 6266 + }, + { + "epoch": 1.7539882451721243, + "grad_norm": 0.22798329834562744, + "learning_rate": 4.021428174954915e-05, + "loss": 0.4771, + "step": 6267 + }, + { + "epoch": 1.7542681220263083, + "grad_norm": 0.23048191503476212, + "learning_rate": 4.0199154860010936e-05, + "loss": 0.4972, + "step": 6268 + }, + { + "epoch": 1.7545479988804926, + "grad_norm": 0.2178656870948323, + "learning_rate": 4.018402890332264e-05, + "loss": 0.4957, + "step": 6269 + }, + { + "epoch": 1.7548278757346767, + "grad_norm": 0.2270165658527656, + "learning_rate": 4.016890388092398e-05, + "loss": 0.4823, + "step": 6270 + }, + { + "epoch": 1.7551077525888608, + "grad_norm": 0.23299633722136034, + "learning_rate": 4.0153779794254525e-05, + "loss": 0.4797, + "step": 6271 + }, + { + "epoch": 1.755387629443045, + "grad_norm": 0.23142542256353943, + "learning_rate": 4.013865664475384e-05, + "loss": 0.4818, + "step": 6272 + }, + { + "epoch": 1.7556675062972293, + "grad_norm": 0.22913844147841553, + "learning_rate": 4.012353443386133e-05, + "loss": 0.4926, + "step": 6273 + }, + { + "epoch": 1.7559473831514134, + "grad_norm": 0.22416087593041847, + "learning_rate": 4.010841316301634e-05, + "loss": 0.4894, + "step": 6274 + }, + { + "epoch": 1.7562272600055975, + "grad_norm": 0.21741814918138835, + "learning_rate": 4.009329283365813e-05, + "loss": 0.494, + "step": 6275 + }, + { + "epoch": 1.7565071368597818, + "grad_norm": 0.22517899957162932, + "learning_rate": 4.007817344722585e-05, + "loss": 0.5039, + "step": 6276 + }, + { + "epoch": 1.7567870137139658, + "grad_norm": 0.23003578969254598, + "learning_rate": 4.0063055005158576e-05, + "loss": 0.5039, + "step": 6277 + }, + { + "epoch": 1.75706689056815, + "grad_norm": 0.24009596113519552, + "learning_rate": 4.004793750889528e-05, + "loss": 0.495, + "step": 6278 + }, + { + "epoch": 1.7573467674223342, + "grad_norm": 0.23205304974684965, + "learning_rate": 4.0032820959874875e-05, + "loss": 0.4815, + "step": 6279 + }, + { + "epoch": 1.7576266442765185, + "grad_norm": 0.22990380313040454, + "learning_rate": 4.0017705359536153e-05, + "loss": 0.4962, + "step": 6280 + }, + { + "epoch": 1.7579065211307023, + "grad_norm": 0.23529770235912076, + "learning_rate": 4.000259070931781e-05, + "loss": 0.5154, + "step": 6281 + }, + { + "epoch": 1.7581863979848866, + "grad_norm": 0.22976403488340347, + "learning_rate": 3.998747701065849e-05, + "loss": 0.4849, + "step": 6282 + }, + { + "epoch": 1.758466274839071, + "grad_norm": 0.2218847105560266, + "learning_rate": 3.9972364264996696e-05, + "loss": 0.4548, + "step": 6283 + }, + { + "epoch": 1.758746151693255, + "grad_norm": 0.22186692728920557, + "learning_rate": 3.99572524737709e-05, + "loss": 0.4967, + "step": 6284 + }, + { + "epoch": 1.759026028547439, + "grad_norm": 0.22254185623061357, + "learning_rate": 3.994214163841942e-05, + "loss": 0.4958, + "step": 6285 + }, + { + "epoch": 1.7593059054016233, + "grad_norm": 0.22156417669810374, + "learning_rate": 3.992703176038054e-05, + "loss": 0.4816, + "step": 6286 + }, + { + "epoch": 1.7595857822558074, + "grad_norm": 0.23901536957158068, + "learning_rate": 3.991192284109241e-05, + "loss": 0.4871, + "step": 6287 + }, + { + "epoch": 1.7598656591099915, + "grad_norm": 0.22301450682162732, + "learning_rate": 3.989681488199309e-05, + "loss": 0.4591, + "step": 6288 + }, + { + "epoch": 1.7601455359641758, + "grad_norm": 0.22377759256850285, + "learning_rate": 3.9881707884520613e-05, + "loss": 0.4844, + "step": 6289 + }, + { + "epoch": 1.76042541281836, + "grad_norm": 0.24620730002455676, + "learning_rate": 3.986660185011283e-05, + "loss": 0.5027, + "step": 6290 + }, + { + "epoch": 1.760705289672544, + "grad_norm": 0.2261648256000998, + "learning_rate": 3.985149678020756e-05, + "loss": 0.499, + "step": 6291 + }, + { + "epoch": 1.7609851665267282, + "grad_norm": 0.23200553036089605, + "learning_rate": 3.983639267624251e-05, + "loss": 0.4994, + "step": 6292 + }, + { + "epoch": 1.7612650433809125, + "grad_norm": 0.22256074334168915, + "learning_rate": 3.9821289539655297e-05, + "loss": 0.5009, + "step": 6293 + }, + { + "epoch": 1.7615449202350966, + "grad_norm": 0.21606778749430056, + "learning_rate": 3.9806187371883435e-05, + "loss": 0.4703, + "step": 6294 + }, + { + "epoch": 1.7618247970892806, + "grad_norm": 0.23302426125720777, + "learning_rate": 3.979108617436437e-05, + "loss": 0.4838, + "step": 6295 + }, + { + "epoch": 1.762104673943465, + "grad_norm": 0.2325499111792386, + "learning_rate": 3.977598594853543e-05, + "loss": 0.4997, + "step": 6296 + }, + { + "epoch": 1.762384550797649, + "grad_norm": 0.230697920236471, + "learning_rate": 3.976088669583387e-05, + "loss": 0.4824, + "step": 6297 + }, + { + "epoch": 1.762664427651833, + "grad_norm": 0.22450802207118495, + "learning_rate": 3.974578841769686e-05, + "loss": 0.4695, + "step": 6298 + }, + { + "epoch": 1.7629443045060174, + "grad_norm": 0.22781091871879922, + "learning_rate": 3.973069111556144e-05, + "loss": 0.4842, + "step": 6299 + }, + { + "epoch": 1.7632241813602016, + "grad_norm": 0.2304046252653234, + "learning_rate": 3.9715594790864586e-05, + "loss": 0.4881, + "step": 6300 + }, + { + "epoch": 1.7635040582143857, + "grad_norm": 0.23292913610813235, + "learning_rate": 3.970049944504317e-05, + "loss": 0.4732, + "step": 6301 + }, + { + "epoch": 1.7637839350685698, + "grad_norm": 0.23933804793398222, + "learning_rate": 3.9685405079533986e-05, + "loss": 0.4829, + "step": 6302 + }, + { + "epoch": 1.764063811922754, + "grad_norm": 0.23299746176743352, + "learning_rate": 3.967031169577373e-05, + "loss": 0.4874, + "step": 6303 + }, + { + "epoch": 1.7643436887769381, + "grad_norm": 0.2342529534679421, + "learning_rate": 3.9655219295198976e-05, + "loss": 0.4867, + "step": 6304 + }, + { + "epoch": 1.7646235656311222, + "grad_norm": 0.22562566201708564, + "learning_rate": 3.964012787924623e-05, + "loss": 0.4785, + "step": 6305 + }, + { + "epoch": 1.7649034424853065, + "grad_norm": 0.23661630561981475, + "learning_rate": 3.962503744935192e-05, + "loss": 0.4892, + "step": 6306 + }, + { + "epoch": 1.7651833193394906, + "grad_norm": 0.22968855464150276, + "learning_rate": 3.960994800695236e-05, + "loss": 0.4864, + "step": 6307 + }, + { + "epoch": 1.7654631961936746, + "grad_norm": 0.23394090987024926, + "learning_rate": 3.959485955348376e-05, + "loss": 0.4874, + "step": 6308 + }, + { + "epoch": 1.765743073047859, + "grad_norm": 0.24092999284959826, + "learning_rate": 3.957977209038226e-05, + "loss": 0.5025, + "step": 6309 + }, + { + "epoch": 1.7660229499020432, + "grad_norm": 0.2333841671522603, + "learning_rate": 3.9564685619083875e-05, + "loss": 0.4828, + "step": 6310 + }, + { + "epoch": 1.7663028267562273, + "grad_norm": 0.24408625874166232, + "learning_rate": 3.954960014102455e-05, + "loss": 0.5011, + "step": 6311 + }, + { + "epoch": 1.7665827036104114, + "grad_norm": 0.2218998319541268, + "learning_rate": 3.953451565764014e-05, + "loss": 0.4815, + "step": 6312 + }, + { + "epoch": 1.7668625804645957, + "grad_norm": 0.233599747588889, + "learning_rate": 3.951943217036639e-05, + "loss": 0.4731, + "step": 6313 + }, + { + "epoch": 1.7671424573187797, + "grad_norm": 0.22581810206686742, + "learning_rate": 3.9504349680638944e-05, + "loss": 0.4801, + "step": 6314 + }, + { + "epoch": 1.7674223341729638, + "grad_norm": 0.22230328041234768, + "learning_rate": 3.948926818989338e-05, + "loss": 0.4719, + "step": 6315 + }, + { + "epoch": 1.767702211027148, + "grad_norm": 0.23914765385746733, + "learning_rate": 3.9474187699565155e-05, + "loss": 0.4966, + "step": 6316 + }, + { + "epoch": 1.7679820878813324, + "grad_norm": 0.22668827409996398, + "learning_rate": 3.945910821108963e-05, + "loss": 0.4913, + "step": 6317 + }, + { + "epoch": 1.7682619647355162, + "grad_norm": 0.23524978952363726, + "learning_rate": 3.944402972590209e-05, + "loss": 0.495, + "step": 6318 + }, + { + "epoch": 1.7685418415897005, + "grad_norm": 0.22704137986481232, + "learning_rate": 3.9428952245437703e-05, + "loss": 0.4779, + "step": 6319 + }, + { + "epoch": 1.7688217184438848, + "grad_norm": 0.22448072357965906, + "learning_rate": 3.9413875771131555e-05, + "loss": 0.4914, + "step": 6320 + }, + { + "epoch": 1.7691015952980689, + "grad_norm": 0.2243162231529199, + "learning_rate": 3.939880030441864e-05, + "loss": 0.4734, + "step": 6321 + }, + { + "epoch": 1.769381472152253, + "grad_norm": 0.22234487866670052, + "learning_rate": 3.9383725846733845e-05, + "loss": 0.4872, + "step": 6322 + }, + { + "epoch": 1.7696613490064372, + "grad_norm": 0.23020609811624973, + "learning_rate": 3.9368652399511956e-05, + "loss": 0.5028, + "step": 6323 + }, + { + "epoch": 1.7699412258606213, + "grad_norm": 0.21793451134665903, + "learning_rate": 3.935357996418769e-05, + "loss": 0.4961, + "step": 6324 + }, + { + "epoch": 1.7702211027148054, + "grad_norm": 0.24087508446996647, + "learning_rate": 3.9338508542195654e-05, + "loss": 0.5188, + "step": 6325 + }, + { + "epoch": 1.7705009795689897, + "grad_norm": 0.22854684206317447, + "learning_rate": 3.932343813497033e-05, + "loss": 0.4797, + "step": 6326 + }, + { + "epoch": 1.770780856423174, + "grad_norm": 0.2196907509152965, + "learning_rate": 3.930836874394615e-05, + "loss": 0.4937, + "step": 6327 + }, + { + "epoch": 1.7710607332773578, + "grad_norm": 0.23307236479312057, + "learning_rate": 3.9293300370557404e-05, + "loss": 0.5012, + "step": 6328 + }, + { + "epoch": 1.771340610131542, + "grad_norm": 0.22377438635370053, + "learning_rate": 3.927823301623833e-05, + "loss": 0.4956, + "step": 6329 + }, + { + "epoch": 1.7716204869857264, + "grad_norm": 0.2374136571358909, + "learning_rate": 3.926316668242304e-05, + "loss": 0.5015, + "step": 6330 + }, + { + "epoch": 1.7719003638399105, + "grad_norm": 0.23691055635127978, + "learning_rate": 3.924810137054555e-05, + "loss": 0.4839, + "step": 6331 + }, + { + "epoch": 1.7721802406940945, + "grad_norm": 0.22235079629447116, + "learning_rate": 3.923303708203979e-05, + "loss": 0.4761, + "step": 6332 + }, + { + "epoch": 1.7724601175482788, + "grad_norm": 0.23762699681876637, + "learning_rate": 3.9217973818339593e-05, + "loss": 0.4972, + "step": 6333 + }, + { + "epoch": 1.7727399944024629, + "grad_norm": 0.22485042309032557, + "learning_rate": 3.920291158087869e-05, + "loss": 0.4842, + "step": 6334 + }, + { + "epoch": 1.773019871256647, + "grad_norm": 0.2247140994121346, + "learning_rate": 3.918785037109069e-05, + "loss": 0.4886, + "step": 6335 + }, + { + "epoch": 1.7732997481108312, + "grad_norm": 0.21513677179734114, + "learning_rate": 3.9172790190409156e-05, + "loss": 0.4937, + "step": 6336 + }, + { + "epoch": 1.7735796249650155, + "grad_norm": 0.24485179374287883, + "learning_rate": 3.91577310402675e-05, + "loss": 0.4939, + "step": 6337 + }, + { + "epoch": 1.7738595018191996, + "grad_norm": 0.22785343433801122, + "learning_rate": 3.914267292209908e-05, + "loss": 0.4643, + "step": 6338 + }, + { + "epoch": 1.7741393786733837, + "grad_norm": 0.2245338842317454, + "learning_rate": 3.9127615837337126e-05, + "loss": 0.4948, + "step": 6339 + }, + { + "epoch": 1.774419255527568, + "grad_norm": 0.2285664646257787, + "learning_rate": 3.911255978741477e-05, + "loss": 0.5124, + "step": 6340 + }, + { + "epoch": 1.774699132381752, + "grad_norm": 0.2245933140485806, + "learning_rate": 3.909750477376508e-05, + "loss": 0.4879, + "step": 6341 + }, + { + "epoch": 1.774979009235936, + "grad_norm": 0.2208241866180384, + "learning_rate": 3.908245079782098e-05, + "loss": 0.4977, + "step": 6342 + }, + { + "epoch": 1.7752588860901204, + "grad_norm": 0.23605904644450681, + "learning_rate": 3.906739786101533e-05, + "loss": 0.4851, + "step": 6343 + }, + { + "epoch": 1.7755387629443045, + "grad_norm": 0.22801536193033944, + "learning_rate": 3.9052345964780876e-05, + "loss": 0.474, + "step": 6344 + }, + { + "epoch": 1.7758186397984885, + "grad_norm": 0.22982947570701895, + "learning_rate": 3.9037295110550254e-05, + "loss": 0.4867, + "step": 6345 + }, + { + "epoch": 1.7760985166526728, + "grad_norm": 0.21889992668874073, + "learning_rate": 3.902224529975602e-05, + "loss": 0.479, + "step": 6346 + }, + { + "epoch": 1.7763783935068571, + "grad_norm": 0.2359937738567436, + "learning_rate": 3.900719653383063e-05, + "loss": 0.4913, + "step": 6347 + }, + { + "epoch": 1.7766582703610412, + "grad_norm": 0.22221531659324922, + "learning_rate": 3.899214881420642e-05, + "loss": 0.4932, + "step": 6348 + }, + { + "epoch": 1.7769381472152252, + "grad_norm": 0.23793621342928006, + "learning_rate": 3.8977102142315645e-05, + "loss": 0.4904, + "step": 6349 + }, + { + "epoch": 1.7772180240694095, + "grad_norm": 0.2258191464006303, + "learning_rate": 3.8962056519590465e-05, + "loss": 0.4933, + "step": 6350 + }, + { + "epoch": 1.7774979009235936, + "grad_norm": 0.2244462131808889, + "learning_rate": 3.894701194746291e-05, + "loss": 0.4742, + "step": 6351 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 0.22935811664036207, + "learning_rate": 3.8931968427364976e-05, + "loss": 0.4815, + "step": 6352 + }, + { + "epoch": 1.778057654631962, + "grad_norm": 0.2283719653872888, + "learning_rate": 3.891692596072846e-05, + "loss": 0.4958, + "step": 6353 + }, + { + "epoch": 1.7783375314861463, + "grad_norm": 0.22735346225222564, + "learning_rate": 3.890188454898514e-05, + "loss": 0.4862, + "step": 6354 + }, + { + "epoch": 1.77861740834033, + "grad_norm": 0.22510619282237004, + "learning_rate": 3.8886844193566655e-05, + "loss": 0.5052, + "step": 6355 + }, + { + "epoch": 1.7788972851945144, + "grad_norm": 0.23266100370870244, + "learning_rate": 3.887180489590456e-05, + "loss": 0.5101, + "step": 6356 + }, + { + "epoch": 1.7791771620486987, + "grad_norm": 0.2366196477148252, + "learning_rate": 3.885676665743029e-05, + "loss": 0.4814, + "step": 6357 + }, + { + "epoch": 1.7794570389028828, + "grad_norm": 0.23246604857315453, + "learning_rate": 3.8841729479575225e-05, + "loss": 0.4884, + "step": 6358 + }, + { + "epoch": 1.7797369157570668, + "grad_norm": 0.22894957258348297, + "learning_rate": 3.882669336377059e-05, + "loss": 0.4946, + "step": 6359 + }, + { + "epoch": 1.7800167926112511, + "grad_norm": 0.22782382461275164, + "learning_rate": 3.8811658311447535e-05, + "loss": 0.4813, + "step": 6360 + }, + { + "epoch": 1.7802966694654352, + "grad_norm": 0.2235324323506944, + "learning_rate": 3.87966243240371e-05, + "loss": 0.4987, + "step": 6361 + }, + { + "epoch": 1.7805765463196193, + "grad_norm": 0.22887829375861352, + "learning_rate": 3.878159140297024e-05, + "loss": 0.5041, + "step": 6362 + }, + { + "epoch": 1.7808564231738035, + "grad_norm": 0.22269130801298512, + "learning_rate": 3.8766559549677786e-05, + "loss": 0.4696, + "step": 6363 + }, + { + "epoch": 1.7811363000279878, + "grad_norm": 0.21900743631481634, + "learning_rate": 3.8751528765590485e-05, + "loss": 0.4924, + "step": 6364 + }, + { + "epoch": 1.7814161768821717, + "grad_norm": 0.2344240137873727, + "learning_rate": 3.8736499052138965e-05, + "loss": 0.4932, + "step": 6365 + }, + { + "epoch": 1.781696053736356, + "grad_norm": 0.2247852256750384, + "learning_rate": 3.872147041075378e-05, + "loss": 0.4954, + "step": 6366 + }, + { + "epoch": 1.7819759305905403, + "grad_norm": 0.23149465846396255, + "learning_rate": 3.870644284286534e-05, + "loss": 0.4864, + "step": 6367 + }, + { + "epoch": 1.7822558074447243, + "grad_norm": 0.2251673707119352, + "learning_rate": 3.869141634990399e-05, + "loss": 0.4927, + "step": 6368 + }, + { + "epoch": 1.7825356842989084, + "grad_norm": 0.2380626169847424, + "learning_rate": 3.8676390933299974e-05, + "loss": 0.482, + "step": 6369 + }, + { + "epoch": 1.7828155611530927, + "grad_norm": 0.22734488945781423, + "learning_rate": 3.86613665944834e-05, + "loss": 0.4882, + "step": 6370 + }, + { + "epoch": 1.7830954380072768, + "grad_norm": 0.23057205016926202, + "learning_rate": 3.864634333488433e-05, + "loss": 0.476, + "step": 6371 + }, + { + "epoch": 1.7833753148614608, + "grad_norm": 0.23982531779577837, + "learning_rate": 3.863132115593263e-05, + "loss": 0.4823, + "step": 6372 + }, + { + "epoch": 1.7836551917156451, + "grad_norm": 0.22970950953957034, + "learning_rate": 3.8616300059058144e-05, + "loss": 0.4971, + "step": 6373 + }, + { + "epoch": 1.7839350685698294, + "grad_norm": 0.2258159796651948, + "learning_rate": 3.860128004569059e-05, + "loss": 0.4829, + "step": 6374 + }, + { + "epoch": 1.7842149454240135, + "grad_norm": 0.23586354154872846, + "learning_rate": 3.8586261117259575e-05, + "loss": 0.4871, + "step": 6375 + }, + { + "epoch": 1.7844948222781976, + "grad_norm": 0.23842329177186955, + "learning_rate": 3.857124327519463e-05, + "loss": 0.5011, + "step": 6376 + }, + { + "epoch": 1.7847746991323818, + "grad_norm": 0.23156115393268714, + "learning_rate": 3.8556226520925134e-05, + "loss": 0.4647, + "step": 6377 + }, + { + "epoch": 1.785054575986566, + "grad_norm": 0.2309254906804709, + "learning_rate": 3.854121085588041e-05, + "loss": 0.4879, + "step": 6378 + }, + { + "epoch": 1.78533445284075, + "grad_norm": 0.23286436381940664, + "learning_rate": 3.8526196281489655e-05, + "loss": 0.4879, + "step": 6379 + }, + { + "epoch": 1.7856143296949343, + "grad_norm": 0.23320900715097417, + "learning_rate": 3.851118279918195e-05, + "loss": 0.4818, + "step": 6380 + }, + { + "epoch": 1.7858942065491183, + "grad_norm": 0.23225467578973877, + "learning_rate": 3.84961704103863e-05, + "loss": 0.4769, + "step": 6381 + }, + { + "epoch": 1.7861740834033024, + "grad_norm": 0.22808060063133884, + "learning_rate": 3.8481159116531596e-05, + "loss": 0.4898, + "step": 6382 + }, + { + "epoch": 1.7864539602574867, + "grad_norm": 0.2282644479443838, + "learning_rate": 3.846614891904661e-05, + "loss": 0.4759, + "step": 6383 + }, + { + "epoch": 1.786733837111671, + "grad_norm": 0.23846114335204405, + "learning_rate": 3.8451139819360024e-05, + "loss": 0.4891, + "step": 6384 + }, + { + "epoch": 1.787013713965855, + "grad_norm": 0.22896905378144364, + "learning_rate": 3.8436131818900416e-05, + "loss": 0.4646, + "step": 6385 + }, + { + "epoch": 1.7872935908200391, + "grad_norm": 0.22096322501271812, + "learning_rate": 3.842112491909624e-05, + "loss": 0.4616, + "step": 6386 + }, + { + "epoch": 1.7875734676742234, + "grad_norm": 0.2360268565352658, + "learning_rate": 3.8406119121375895e-05, + "loss": 0.4927, + "step": 6387 + }, + { + "epoch": 1.7878533445284075, + "grad_norm": 0.2276378931920508, + "learning_rate": 3.839111442716761e-05, + "loss": 0.4745, + "step": 6388 + }, + { + "epoch": 1.7881332213825916, + "grad_norm": 0.225107392841617, + "learning_rate": 3.837611083789958e-05, + "loss": 0.4779, + "step": 6389 + }, + { + "epoch": 1.7884130982367759, + "grad_norm": 0.22067837867254925, + "learning_rate": 3.8361108354999805e-05, + "loss": 0.4652, + "step": 6390 + }, + { + "epoch": 1.78869297509096, + "grad_norm": 0.22840495579109682, + "learning_rate": 3.834610697989625e-05, + "loss": 0.4784, + "step": 6391 + }, + { + "epoch": 1.788972851945144, + "grad_norm": 0.23273210232128885, + "learning_rate": 3.833110671401675e-05, + "loss": 0.4944, + "step": 6392 + }, + { + "epoch": 1.7892527287993283, + "grad_norm": 0.2373862840506717, + "learning_rate": 3.831610755878905e-05, + "loss": 0.512, + "step": 6393 + }, + { + "epoch": 1.7895326056535126, + "grad_norm": 0.23747946030365935, + "learning_rate": 3.830110951564077e-05, + "loss": 0.4902, + "step": 6394 + }, + { + "epoch": 1.7898124825076966, + "grad_norm": 0.23891899288686125, + "learning_rate": 3.828611258599944e-05, + "loss": 0.4834, + "step": 6395 + }, + { + "epoch": 1.7900923593618807, + "grad_norm": 0.23151568879370174, + "learning_rate": 3.8271116771292465e-05, + "loss": 0.4863, + "step": 6396 + }, + { + "epoch": 1.790372236216065, + "grad_norm": 0.2345709172082861, + "learning_rate": 3.825612207294716e-05, + "loss": 0.4816, + "step": 6397 + }, + { + "epoch": 1.790652113070249, + "grad_norm": 0.24314278190424068, + "learning_rate": 3.824112849239074e-05, + "loss": 0.4779, + "step": 6398 + }, + { + "epoch": 1.7909319899244331, + "grad_norm": 0.24296716811599, + "learning_rate": 3.822613603105028e-05, + "loss": 0.4901, + "step": 6399 + }, + { + "epoch": 1.7912118667786174, + "grad_norm": 0.2340895854999377, + "learning_rate": 3.821114469035279e-05, + "loss": 0.4665, + "step": 6400 + }, + { + "epoch": 1.7914917436328017, + "grad_norm": 0.22546872352764571, + "learning_rate": 3.819615447172514e-05, + "loss": 0.4745, + "step": 6401 + }, + { + "epoch": 1.7917716204869856, + "grad_norm": 0.23117676908656137, + "learning_rate": 3.818116537659412e-05, + "loss": 0.4933, + "step": 6402 + }, + { + "epoch": 1.7920514973411699, + "grad_norm": 0.223562285981355, + "learning_rate": 3.816617740638638e-05, + "loss": 0.4813, + "step": 6403 + }, + { + "epoch": 1.7923313741953542, + "grad_norm": 0.22966780341359835, + "learning_rate": 3.815119056252851e-05, + "loss": 0.5076, + "step": 6404 + }, + { + "epoch": 1.7926112510495382, + "grad_norm": 0.22153350027548668, + "learning_rate": 3.813620484644696e-05, + "loss": 0.464, + "step": 6405 + }, + { + "epoch": 1.7928911279037223, + "grad_norm": 0.2291257171279058, + "learning_rate": 3.812122025956807e-05, + "loss": 0.4692, + "step": 6406 + }, + { + "epoch": 1.7931710047579066, + "grad_norm": 0.2446160171924654, + "learning_rate": 3.810623680331811e-05, + "loss": 0.494, + "step": 6407 + }, + { + "epoch": 1.7934508816120907, + "grad_norm": 0.22334207412154658, + "learning_rate": 3.8091254479123156e-05, + "loss": 0.5052, + "step": 6408 + }, + { + "epoch": 1.7937307584662747, + "grad_norm": 0.2287809965363887, + "learning_rate": 3.8076273288409256e-05, + "loss": 0.4681, + "step": 6409 + }, + { + "epoch": 1.794010635320459, + "grad_norm": 0.23459820544496301, + "learning_rate": 3.806129323260236e-05, + "loss": 0.5016, + "step": 6410 + }, + { + "epoch": 1.7942905121746433, + "grad_norm": 0.23928913174996086, + "learning_rate": 3.8046314313128253e-05, + "loss": 0.4891, + "step": 6411 + }, + { + "epoch": 1.7945703890288272, + "grad_norm": 0.23311269055296494, + "learning_rate": 3.8031336531412636e-05, + "loss": 0.4928, + "step": 6412 + }, + { + "epoch": 1.7948502658830114, + "grad_norm": 0.23535177840249255, + "learning_rate": 3.801635988888111e-05, + "loss": 0.4765, + "step": 6413 + }, + { + "epoch": 1.7951301427371957, + "grad_norm": 0.2239102737937147, + "learning_rate": 3.8001384386959156e-05, + "loss": 0.488, + "step": 6414 + }, + { + "epoch": 1.7954100195913798, + "grad_norm": 0.23349315077746952, + "learning_rate": 3.798641002707215e-05, + "loss": 0.4838, + "step": 6415 + }, + { + "epoch": 1.7956898964455639, + "grad_norm": 0.23442839874047128, + "learning_rate": 3.7971436810645366e-05, + "loss": 0.4855, + "step": 6416 + }, + { + "epoch": 1.7959697732997482, + "grad_norm": 0.23515838770678582, + "learning_rate": 3.795646473910396e-05, + "loss": 0.4978, + "step": 6417 + }, + { + "epoch": 1.7962496501539322, + "grad_norm": 0.23190432868173524, + "learning_rate": 3.794149381387297e-05, + "loss": 0.4781, + "step": 6418 + }, + { + "epoch": 1.7965295270081163, + "grad_norm": 0.23091824593707724, + "learning_rate": 3.7926524036377364e-05, + "loss": 0.4752, + "step": 6419 + }, + { + "epoch": 1.7968094038623006, + "grad_norm": 0.23437018190683176, + "learning_rate": 3.791155540804194e-05, + "loss": 0.5035, + "step": 6420 + }, + { + "epoch": 1.7970892807164849, + "grad_norm": 0.21984239262466293, + "learning_rate": 3.789658793029145e-05, + "loss": 0.4884, + "step": 6421 + }, + { + "epoch": 1.797369157570669, + "grad_norm": 0.22807071931345446, + "learning_rate": 3.78816216045505e-05, + "loss": 0.4783, + "step": 6422 + }, + { + "epoch": 1.797649034424853, + "grad_norm": 0.22784355353303584, + "learning_rate": 3.786665643224358e-05, + "loss": 0.4728, + "step": 6423 + }, + { + "epoch": 1.7979289112790373, + "grad_norm": 0.2347225972423178, + "learning_rate": 3.785169241479509e-05, + "loss": 0.5039, + "step": 6424 + }, + { + "epoch": 1.7982087881332214, + "grad_norm": 0.22206278320218145, + "learning_rate": 3.783672955362935e-05, + "loss": 0.5007, + "step": 6425 + }, + { + "epoch": 1.7984886649874054, + "grad_norm": 0.2351551878025639, + "learning_rate": 3.7821767850170466e-05, + "loss": 0.4912, + "step": 6426 + }, + { + "epoch": 1.7987685418415897, + "grad_norm": 0.23269033290744898, + "learning_rate": 3.780680730584253e-05, + "loss": 0.4968, + "step": 6427 + }, + { + "epoch": 1.7990484186957738, + "grad_norm": 0.22914788941688, + "learning_rate": 3.779184792206951e-05, + "loss": 0.475, + "step": 6428 + }, + { + "epoch": 1.7993282955499579, + "grad_norm": 0.22543778209049295, + "learning_rate": 3.777688970027524e-05, + "loss": 0.4863, + "step": 6429 + }, + { + "epoch": 1.7996081724041422, + "grad_norm": 0.22460144271306612, + "learning_rate": 3.776193264188344e-05, + "loss": 0.4947, + "step": 6430 + }, + { + "epoch": 1.7998880492583265, + "grad_norm": 0.21557219381407622, + "learning_rate": 3.774697674831775e-05, + "loss": 0.4765, + "step": 6431 + }, + { + "epoch": 1.8001679261125105, + "grad_norm": 0.23275502909831608, + "learning_rate": 3.7732022021001664e-05, + "loss": 0.4858, + "step": 6432 + }, + { + "epoch": 1.8004478029666946, + "grad_norm": 0.23199607288966145, + "learning_rate": 3.7717068461358596e-05, + "loss": 0.4903, + "step": 6433 + }, + { + "epoch": 1.800727679820879, + "grad_norm": 0.2278125739350418, + "learning_rate": 3.770211607081183e-05, + "loss": 0.4872, + "step": 6434 + }, + { + "epoch": 1.801007556675063, + "grad_norm": 0.2379301071524677, + "learning_rate": 3.7687164850784526e-05, + "loss": 0.479, + "step": 6435 + }, + { + "epoch": 1.801287433529247, + "grad_norm": 0.22751805524157373, + "learning_rate": 3.767221480269978e-05, + "loss": 0.4873, + "step": 6436 + }, + { + "epoch": 1.8015673103834313, + "grad_norm": 0.2309160753830912, + "learning_rate": 3.765726592798052e-05, + "loss": 0.4618, + "step": 6437 + }, + { + "epoch": 1.8018471872376156, + "grad_norm": 0.21758202907819693, + "learning_rate": 3.764231822804958e-05, + "loss": 0.4732, + "step": 6438 + }, + { + "epoch": 1.8021270640917995, + "grad_norm": 0.23736044965375436, + "learning_rate": 3.762737170432973e-05, + "loss": 0.4818, + "step": 6439 + }, + { + "epoch": 1.8024069409459837, + "grad_norm": 0.23311279739565727, + "learning_rate": 3.761242635824356e-05, + "loss": 0.494, + "step": 6440 + }, + { + "epoch": 1.802686817800168, + "grad_norm": 0.22807656196516024, + "learning_rate": 3.759748219121359e-05, + "loss": 0.4803, + "step": 6441 + }, + { + "epoch": 1.802966694654352, + "grad_norm": 0.2321034621612971, + "learning_rate": 3.75825392046622e-05, + "loss": 0.4779, + "step": 6442 + }, + { + "epoch": 1.8032465715085362, + "grad_norm": 0.22768239233460297, + "learning_rate": 3.7567597400011703e-05, + "loss": 0.4735, + "step": 6443 + }, + { + "epoch": 1.8035264483627205, + "grad_norm": 0.22936595037518426, + "learning_rate": 3.7552656778684216e-05, + "loss": 0.4825, + "step": 6444 + }, + { + "epoch": 1.8038063252169045, + "grad_norm": 0.2237806501924362, + "learning_rate": 3.753771734210183e-05, + "loss": 0.4796, + "step": 6445 + }, + { + "epoch": 1.8040862020710886, + "grad_norm": 0.23211481534268055, + "learning_rate": 3.752277909168648e-05, + "loss": 0.4845, + "step": 6446 + }, + { + "epoch": 1.804366078925273, + "grad_norm": 0.23266260150977672, + "learning_rate": 3.750784202886001e-05, + "loss": 0.4851, + "step": 6447 + }, + { + "epoch": 1.8046459557794572, + "grad_norm": 0.23755488035578567, + "learning_rate": 3.7492906155044114e-05, + "loss": 0.5005, + "step": 6448 + }, + { + "epoch": 1.804925832633641, + "grad_norm": 0.2287752327500001, + "learning_rate": 3.747797147166042e-05, + "loss": 0.4707, + "step": 6449 + }, + { + "epoch": 1.8052057094878253, + "grad_norm": 0.22820070453364244, + "learning_rate": 3.746303798013041e-05, + "loss": 0.4666, + "step": 6450 + }, + { + "epoch": 1.8054855863420096, + "grad_norm": 0.22947505886552355, + "learning_rate": 3.744810568187545e-05, + "loss": 0.489, + "step": 6451 + }, + { + "epoch": 1.8057654631961937, + "grad_norm": 0.22844334265879734, + "learning_rate": 3.7433174578316835e-05, + "loss": 0.5023, + "step": 6452 + }, + { + "epoch": 1.8060453400503778, + "grad_norm": 0.21741136174269188, + "learning_rate": 3.741824467087569e-05, + "loss": 0.475, + "step": 6453 + }, + { + "epoch": 1.806325216904562, + "grad_norm": 0.2148390706314293, + "learning_rate": 3.7403315960973064e-05, + "loss": 0.505, + "step": 6454 + }, + { + "epoch": 1.8066050937587461, + "grad_norm": 0.2288312399873347, + "learning_rate": 3.7388388450029855e-05, + "loss": 0.4684, + "step": 6455 + }, + { + "epoch": 1.8068849706129302, + "grad_norm": 0.2285322793435309, + "learning_rate": 3.737346213946691e-05, + "loss": 0.4773, + "step": 6456 + }, + { + "epoch": 1.8071648474671145, + "grad_norm": 0.2239650476414393, + "learning_rate": 3.7358537030704896e-05, + "loss": 0.4818, + "step": 6457 + }, + { + "epoch": 1.8074447243212988, + "grad_norm": 0.22043383339681774, + "learning_rate": 3.734361312516442e-05, + "loss": 0.508, + "step": 6458 + }, + { + "epoch": 1.8077246011754828, + "grad_norm": 0.2242485484748912, + "learning_rate": 3.732869042426592e-05, + "loss": 0.4914, + "step": 6459 + }, + { + "epoch": 1.808004478029667, + "grad_norm": 0.24177245495128274, + "learning_rate": 3.731376892942976e-05, + "loss": 0.4846, + "step": 6460 + }, + { + "epoch": 1.8082843548838512, + "grad_norm": 0.22739999611614095, + "learning_rate": 3.729884864207618e-05, + "loss": 0.477, + "step": 6461 + }, + { + "epoch": 1.8085642317380353, + "grad_norm": 0.23692348971431781, + "learning_rate": 3.7283929563625294e-05, + "loss": 0.4876, + "step": 6462 + }, + { + "epoch": 1.8088441085922193, + "grad_norm": 0.22754627198776253, + "learning_rate": 3.72690116954971e-05, + "loss": 0.4812, + "step": 6463 + }, + { + "epoch": 1.8091239854464036, + "grad_norm": 0.2282740894772259, + "learning_rate": 3.725409503911151e-05, + "loss": 0.4742, + "step": 6464 + }, + { + "epoch": 1.8094038623005877, + "grad_norm": 0.22680885610375315, + "learning_rate": 3.723917959588828e-05, + "loss": 0.4716, + "step": 6465 + }, + { + "epoch": 1.8096837391547718, + "grad_norm": 0.2397221881680731, + "learning_rate": 3.722426536724708e-05, + "loss": 0.4512, + "step": 6466 + }, + { + "epoch": 1.809963616008956, + "grad_norm": 0.2357184203711619, + "learning_rate": 3.720935235460745e-05, + "loss": 0.5143, + "step": 6467 + }, + { + "epoch": 1.8102434928631403, + "grad_norm": 0.23959450410895225, + "learning_rate": 3.719444055938883e-05, + "loss": 0.4687, + "step": 6468 + }, + { + "epoch": 1.8105233697173244, + "grad_norm": 0.23308362789130577, + "learning_rate": 3.717952998301052e-05, + "loss": 0.4992, + "step": 6469 + }, + { + "epoch": 1.8108032465715085, + "grad_norm": 0.231560784337764, + "learning_rate": 3.716462062689172e-05, + "loss": 0.4846, + "step": 6470 + }, + { + "epoch": 1.8110831234256928, + "grad_norm": 0.22361674901817202, + "learning_rate": 3.714971249245152e-05, + "loss": 0.4763, + "step": 6471 + }, + { + "epoch": 1.8113630002798768, + "grad_norm": 0.2192466550776247, + "learning_rate": 3.713480558110887e-05, + "loss": 0.4574, + "step": 6472 + }, + { + "epoch": 1.811642877134061, + "grad_norm": 0.2245220027892542, + "learning_rate": 3.711989989428263e-05, + "loss": 0.4688, + "step": 6473 + }, + { + "epoch": 1.8119227539882452, + "grad_norm": 0.22460989978525742, + "learning_rate": 3.710499543339152e-05, + "loss": 0.5207, + "step": 6474 + }, + { + "epoch": 1.8122026308424295, + "grad_norm": 0.22365795712389236, + "learning_rate": 3.709009219985417e-05, + "loss": 0.4746, + "step": 6475 + }, + { + "epoch": 1.8124825076966133, + "grad_norm": 0.22742510462896132, + "learning_rate": 3.707519019508907e-05, + "loss": 0.4616, + "step": 6476 + }, + { + "epoch": 1.8127623845507976, + "grad_norm": 0.23270651754693447, + "learning_rate": 3.706028942051461e-05, + "loss": 0.4919, + "step": 6477 + }, + { + "epoch": 1.813042261404982, + "grad_norm": 0.22924717727397215, + "learning_rate": 3.704538987754903e-05, + "loss": 0.4571, + "step": 6478 + }, + { + "epoch": 1.813322138259166, + "grad_norm": 0.2324567980807715, + "learning_rate": 3.7030491567610506e-05, + "loss": 0.4908, + "step": 6479 + }, + { + "epoch": 1.81360201511335, + "grad_norm": 0.22630809404807603, + "learning_rate": 3.701559449211705e-05, + "loss": 0.4715, + "step": 6480 + }, + { + "epoch": 1.8138818919675344, + "grad_norm": 0.2325789468179387, + "learning_rate": 3.700069865248658e-05, + "loss": 0.4995, + "step": 6481 + }, + { + "epoch": 1.8141617688217184, + "grad_norm": 0.22868875421283755, + "learning_rate": 3.698580405013688e-05, + "loss": 0.4681, + "step": 6482 + }, + { + "epoch": 1.8144416456759025, + "grad_norm": 0.2228385106509269, + "learning_rate": 3.697091068648564e-05, + "loss": 0.4979, + "step": 6483 + }, + { + "epoch": 1.8147215225300868, + "grad_norm": 0.23306944761670445, + "learning_rate": 3.6956018562950415e-05, + "loss": 0.4873, + "step": 6484 + }, + { + "epoch": 1.815001399384271, + "grad_norm": 0.24804230272369826, + "learning_rate": 3.694112768094864e-05, + "loss": 0.4971, + "step": 6485 + }, + { + "epoch": 1.815281276238455, + "grad_norm": 0.2140979449228239, + "learning_rate": 3.692623804189764e-05, + "loss": 0.4893, + "step": 6486 + }, + { + "epoch": 1.8155611530926392, + "grad_norm": 0.2312693550145618, + "learning_rate": 3.691134964721462e-05, + "loss": 0.4692, + "step": 6487 + }, + { + "epoch": 1.8158410299468235, + "grad_norm": 0.22042986305597487, + "learning_rate": 3.689646249831668e-05, + "loss": 0.4956, + "step": 6488 + }, + { + "epoch": 1.8161209068010076, + "grad_norm": 0.23717865677569938, + "learning_rate": 3.688157659662076e-05, + "loss": 0.5061, + "step": 6489 + }, + { + "epoch": 1.8164007836551916, + "grad_norm": 0.22830538337239978, + "learning_rate": 3.686669194354371e-05, + "loss": 0.4825, + "step": 6490 + }, + { + "epoch": 1.816680660509376, + "grad_norm": 0.23548809670770163, + "learning_rate": 3.6851808540502295e-05, + "loss": 0.4824, + "step": 6491 + }, + { + "epoch": 1.81696053736356, + "grad_norm": 0.23247411953207245, + "learning_rate": 3.6836926388913095e-05, + "loss": 0.4702, + "step": 6492 + }, + { + "epoch": 1.817240414217744, + "grad_norm": 0.2322560620840003, + "learning_rate": 3.682204549019261e-05, + "loss": 0.4958, + "step": 6493 + }, + { + "epoch": 1.8175202910719284, + "grad_norm": 0.2367174302539643, + "learning_rate": 3.680716584575721e-05, + "loss": 0.4842, + "step": 6494 + }, + { + "epoch": 1.8178001679261127, + "grad_norm": 0.2262330883899346, + "learning_rate": 3.679228745702315e-05, + "loss": 0.4709, + "step": 6495 + }, + { + "epoch": 1.8180800447802967, + "grad_norm": 0.23941925398908787, + "learning_rate": 3.677741032540656e-05, + "loss": 0.4847, + "step": 6496 + }, + { + "epoch": 1.8183599216344808, + "grad_norm": 0.23766584439123845, + "learning_rate": 3.6762534452323473e-05, + "loss": 0.4977, + "step": 6497 + }, + { + "epoch": 1.818639798488665, + "grad_norm": 0.22628950665675004, + "learning_rate": 3.674765983918975e-05, + "loss": 0.4898, + "step": 6498 + }, + { + "epoch": 1.8189196753428492, + "grad_norm": 0.2325501537253925, + "learning_rate": 3.673278648742118e-05, + "loss": 0.5121, + "step": 6499 + }, + { + "epoch": 1.8191995521970332, + "grad_norm": 0.21975349812296702, + "learning_rate": 3.671791439843343e-05, + "loss": 0.4696, + "step": 6500 + }, + { + "epoch": 1.8194794290512175, + "grad_norm": 0.22476305581145867, + "learning_rate": 3.6703043573642006e-05, + "loss": 0.4782, + "step": 6501 + }, + { + "epoch": 1.8197593059054016, + "grad_norm": 0.2303965136921295, + "learning_rate": 3.668817401446234e-05, + "loss": 0.5142, + "step": 6502 + }, + { + "epoch": 1.8200391827595856, + "grad_norm": 0.21528946747237063, + "learning_rate": 3.667330572230974e-05, + "loss": 0.4836, + "step": 6503 + }, + { + "epoch": 1.82031905961377, + "grad_norm": 0.2272320800398011, + "learning_rate": 3.665843869859934e-05, + "loss": 0.4773, + "step": 6504 + }, + { + "epoch": 1.8205989364679542, + "grad_norm": 0.22549971262125146, + "learning_rate": 3.664357294474622e-05, + "loss": 0.4868, + "step": 6505 + }, + { + "epoch": 1.8208788133221383, + "grad_norm": 0.2268554758779282, + "learning_rate": 3.6628708462165305e-05, + "loss": 0.5041, + "step": 6506 + }, + { + "epoch": 1.8211586901763224, + "grad_norm": 0.234173341629569, + "learning_rate": 3.6613845252271384e-05, + "loss": 0.4934, + "step": 6507 + }, + { + "epoch": 1.8214385670305067, + "grad_norm": 0.22910008037893956, + "learning_rate": 3.659898331647918e-05, + "loss": 0.4912, + "step": 6508 + }, + { + "epoch": 1.8217184438846907, + "grad_norm": 0.2292380469118307, + "learning_rate": 3.658412265620325e-05, + "loss": 0.505, + "step": 6509 + }, + { + "epoch": 1.8219983207388748, + "grad_norm": 0.2117386117147549, + "learning_rate": 3.656926327285803e-05, + "loss": 0.484, + "step": 6510 + }, + { + "epoch": 1.822278197593059, + "grad_norm": 0.22782567886548166, + "learning_rate": 3.655440516785785e-05, + "loss": 0.4701, + "step": 6511 + }, + { + "epoch": 1.8225580744472432, + "grad_norm": 0.23506440586366945, + "learning_rate": 3.6539548342616915e-05, + "loss": 0.4761, + "step": 6512 + }, + { + "epoch": 1.8228379513014272, + "grad_norm": 0.22249264011253417, + "learning_rate": 3.65246927985493e-05, + "loss": 0.4787, + "step": 6513 + }, + { + "epoch": 1.8231178281556115, + "grad_norm": 0.23584814428475087, + "learning_rate": 3.650983853706896e-05, + "loss": 0.4827, + "step": 6514 + }, + { + "epoch": 1.8233977050097958, + "grad_norm": 0.23505181554847834, + "learning_rate": 3.6494985559589756e-05, + "loss": 0.4771, + "step": 6515 + }, + { + "epoch": 1.8236775818639799, + "grad_norm": 0.22564307430272826, + "learning_rate": 3.648013386752538e-05, + "loss": 0.4863, + "step": 6516 + }, + { + "epoch": 1.823957458718164, + "grad_norm": 0.22307986252388057, + "learning_rate": 3.6465283462289425e-05, + "loss": 0.4731, + "step": 6517 + }, + { + "epoch": 1.8242373355723482, + "grad_norm": 0.23434808973815136, + "learning_rate": 3.6450434345295365e-05, + "loss": 0.4793, + "step": 6518 + }, + { + "epoch": 1.8245172124265323, + "grad_norm": 0.23478699045619666, + "learning_rate": 3.643558651795654e-05, + "loss": 0.4981, + "step": 6519 + }, + { + "epoch": 1.8247970892807164, + "grad_norm": 0.2247815529834582, + "learning_rate": 3.6420739981686204e-05, + "loss": 0.4651, + "step": 6520 + }, + { + "epoch": 1.8250769661349007, + "grad_norm": 0.22969336588368408, + "learning_rate": 3.6405894737897414e-05, + "loss": 0.4951, + "step": 6521 + }, + { + "epoch": 1.825356842989085, + "grad_norm": 0.22075444109330236, + "learning_rate": 3.639105078800319e-05, + "loss": 0.4958, + "step": 6522 + }, + { + "epoch": 1.8256367198432688, + "grad_norm": 0.224078144205714, + "learning_rate": 3.637620813341636e-05, + "loss": 0.4938, + "step": 6523 + }, + { + "epoch": 1.825916596697453, + "grad_norm": 0.2291087961429806, + "learning_rate": 3.6361366775549656e-05, + "loss": 0.4964, + "step": 6524 + }, + { + "epoch": 1.8261964735516374, + "grad_norm": 0.2433034533933429, + "learning_rate": 3.6346526715815705e-05, + "loss": 0.4832, + "step": 6525 + }, + { + "epoch": 1.8264763504058215, + "grad_norm": 0.23054066820239014, + "learning_rate": 3.633168795562698e-05, + "loss": 0.4901, + "step": 6526 + }, + { + "epoch": 1.8267562272600055, + "grad_norm": 0.22596352161533168, + "learning_rate": 3.631685049639586e-05, + "loss": 0.491, + "step": 6527 + }, + { + "epoch": 1.8270361041141898, + "grad_norm": 0.23062114972409564, + "learning_rate": 3.630201433953456e-05, + "loss": 0.4836, + "step": 6528 + }, + { + "epoch": 1.8273159809683739, + "grad_norm": 0.23658848890530146, + "learning_rate": 3.6287179486455206e-05, + "loss": 0.4775, + "step": 6529 + }, + { + "epoch": 1.827595857822558, + "grad_norm": 0.22364569244115756, + "learning_rate": 3.6272345938569777e-05, + "loss": 0.4859, + "step": 6530 + }, + { + "epoch": 1.8278757346767422, + "grad_norm": 0.22561869333344586, + "learning_rate": 3.6257513697290145e-05, + "loss": 0.4676, + "step": 6531 + }, + { + "epoch": 1.8281556115309265, + "grad_norm": 0.23534361999312964, + "learning_rate": 3.624268276402806e-05, + "loss": 0.4816, + "step": 6532 + }, + { + "epoch": 1.8284354883851104, + "grad_norm": 0.22658793404892805, + "learning_rate": 3.622785314019513e-05, + "loss": 0.4779, + "step": 6533 + }, + { + "epoch": 1.8287153652392947, + "grad_norm": 0.23492950159751907, + "learning_rate": 3.621302482720284e-05, + "loss": 0.4906, + "step": 6534 + }, + { + "epoch": 1.828995242093479, + "grad_norm": 0.23279797475181524, + "learning_rate": 3.619819782646256e-05, + "loss": 0.4961, + "step": 6535 + }, + { + "epoch": 1.829275118947663, + "grad_norm": 0.2425314395695167, + "learning_rate": 3.6183372139385546e-05, + "loss": 0.4855, + "step": 6536 + }, + { + "epoch": 1.829554995801847, + "grad_norm": 0.22813066719609001, + "learning_rate": 3.61685477673829e-05, + "loss": 0.4988, + "step": 6537 + }, + { + "epoch": 1.8298348726560314, + "grad_norm": 0.24460833182073274, + "learning_rate": 3.615372471186562e-05, + "loss": 0.4935, + "step": 6538 + }, + { + "epoch": 1.8301147495102155, + "grad_norm": 0.225970454542716, + "learning_rate": 3.613890297424457e-05, + "loss": 0.4664, + "step": 6539 + }, + { + "epoch": 1.8303946263643995, + "grad_norm": 0.22757166180289132, + "learning_rate": 3.612408255593049e-05, + "loss": 0.4829, + "step": 6540 + }, + { + "epoch": 1.8306745032185838, + "grad_norm": 0.2276845648399905, + "learning_rate": 3.6109263458333995e-05, + "loss": 0.4789, + "step": 6541 + }, + { + "epoch": 1.8309543800727681, + "grad_norm": 0.23151724478412966, + "learning_rate": 3.609444568286559e-05, + "loss": 0.47, + "step": 6542 + }, + { + "epoch": 1.8312342569269522, + "grad_norm": 0.23053407200540227, + "learning_rate": 3.607962923093563e-05, + "loss": 0.5052, + "step": 6543 + }, + { + "epoch": 1.8315141337811363, + "grad_norm": 0.2377910574957702, + "learning_rate": 3.606481410395435e-05, + "loss": 0.4953, + "step": 6544 + }, + { + "epoch": 1.8317940106353205, + "grad_norm": 0.23321664208838241, + "learning_rate": 3.605000030333185e-05, + "loss": 0.5014, + "step": 6545 + }, + { + "epoch": 1.8320738874895046, + "grad_norm": 0.23429185721320184, + "learning_rate": 3.6035187830478144e-05, + "loss": 0.5034, + "step": 6546 + }, + { + "epoch": 1.8323537643436887, + "grad_norm": 0.230345886930467, + "learning_rate": 3.602037668680308e-05, + "loss": 0.4694, + "step": 6547 + }, + { + "epoch": 1.832633641197873, + "grad_norm": 0.21593717678789537, + "learning_rate": 3.6005566873716376e-05, + "loss": 0.4658, + "step": 6548 + }, + { + "epoch": 1.832913518052057, + "grad_norm": 0.21838213114269892, + "learning_rate": 3.5990758392627655e-05, + "loss": 0.481, + "step": 6549 + }, + { + "epoch": 1.8331933949062411, + "grad_norm": 0.2223844140768618, + "learning_rate": 3.597595124494639e-05, + "loss": 0.4811, + "step": 6550 + }, + { + "epoch": 1.8334732717604254, + "grad_norm": 0.2237573796585568, + "learning_rate": 3.596114543208194e-05, + "loss": 0.4745, + "step": 6551 + }, + { + "epoch": 1.8337531486146097, + "grad_norm": 0.22706248415344202, + "learning_rate": 3.594634095544353e-05, + "loss": 0.4772, + "step": 6552 + }, + { + "epoch": 1.8340330254687938, + "grad_norm": 0.2399243951861105, + "learning_rate": 3.593153781644024e-05, + "loss": 0.4908, + "step": 6553 + }, + { + "epoch": 1.8343129023229778, + "grad_norm": 0.22794553449691923, + "learning_rate": 3.5916736016481065e-05, + "loss": 0.4951, + "step": 6554 + }, + { + "epoch": 1.8345927791771621, + "grad_norm": 0.21512098560738155, + "learning_rate": 3.5901935556974834e-05, + "loss": 0.474, + "step": 6555 + }, + { + "epoch": 1.8348726560313462, + "grad_norm": 0.2297676345335937, + "learning_rate": 3.588713643933027e-05, + "loss": 0.4825, + "step": 6556 + }, + { + "epoch": 1.8351525328855303, + "grad_norm": 0.22045759188560637, + "learning_rate": 3.587233866495596e-05, + "loss": 0.4857, + "step": 6557 + }, + { + "epoch": 1.8354324097397146, + "grad_norm": 0.23160230731971757, + "learning_rate": 3.5857542235260354e-05, + "loss": 0.4833, + "step": 6558 + }, + { + "epoch": 1.8357122865938988, + "grad_norm": 0.23279517202259958, + "learning_rate": 3.584274715165179e-05, + "loss": 0.518, + "step": 6559 + }, + { + "epoch": 1.8359921634480827, + "grad_norm": 0.22913301949792936, + "learning_rate": 3.5827953415538495e-05, + "loss": 0.493, + "step": 6560 + }, + { + "epoch": 1.836272040302267, + "grad_norm": 0.22332197541170207, + "learning_rate": 3.581316102832851e-05, + "loss": 0.4582, + "step": 6561 + }, + { + "epoch": 1.8365519171564513, + "grad_norm": 0.21537955298112685, + "learning_rate": 3.579836999142981e-05, + "loss": 0.479, + "step": 6562 + }, + { + "epoch": 1.8368317940106353, + "grad_norm": 0.22208188083115105, + "learning_rate": 3.578358030625021e-05, + "loss": 0.4949, + "step": 6563 + }, + { + "epoch": 1.8371116708648194, + "grad_norm": 0.221294918604475, + "learning_rate": 3.576879197419738e-05, + "loss": 0.4923, + "step": 6564 + }, + { + "epoch": 1.8373915477190037, + "grad_norm": 0.23670146131103706, + "learning_rate": 3.57540049966789e-05, + "loss": 0.4874, + "step": 6565 + }, + { + "epoch": 1.8376714245731878, + "grad_norm": 0.22769863797499706, + "learning_rate": 3.5739219375102195e-05, + "loss": 0.4438, + "step": 6566 + }, + { + "epoch": 1.8379513014273718, + "grad_norm": 0.23301064843160793, + "learning_rate": 3.5724435110874575e-05, + "loss": 0.4984, + "step": 6567 + }, + { + "epoch": 1.8382311782815561, + "grad_norm": 0.2361435753693997, + "learning_rate": 3.570965220540321e-05, + "loss": 0.4968, + "step": 6568 + }, + { + "epoch": 1.8385110551357404, + "grad_norm": 0.22412440380364346, + "learning_rate": 3.5694870660095155e-05, + "loss": 0.4815, + "step": 6569 + }, + { + "epoch": 1.8387909319899243, + "grad_norm": 0.23031003962032542, + "learning_rate": 3.568009047635732e-05, + "loss": 0.4977, + "step": 6570 + }, + { + "epoch": 1.8390708088441086, + "grad_norm": 0.2265965665561193, + "learning_rate": 3.5665311655596477e-05, + "loss": 0.4907, + "step": 6571 + }, + { + "epoch": 1.8393506856982929, + "grad_norm": 0.21941080840045463, + "learning_rate": 3.5650534199219296e-05, + "loss": 0.4874, + "step": 6572 + }, + { + "epoch": 1.839630562552477, + "grad_norm": 0.2307372989342663, + "learning_rate": 3.563575810863231e-05, + "loss": 0.5001, + "step": 6573 + }, + { + "epoch": 1.839910439406661, + "grad_norm": 0.23681930184012304, + "learning_rate": 3.562098338524189e-05, + "loss": 0.4887, + "step": 6574 + }, + { + "epoch": 1.8401903162608453, + "grad_norm": 0.22566788746641836, + "learning_rate": 3.5606210030454336e-05, + "loss": 0.4873, + "step": 6575 + }, + { + "epoch": 1.8404701931150294, + "grad_norm": 0.23045563489193585, + "learning_rate": 3.5591438045675754e-05, + "loss": 0.4746, + "step": 6576 + }, + { + "epoch": 1.8407500699692134, + "grad_norm": 0.22970062919591314, + "learning_rate": 3.557666743231216e-05, + "loss": 0.4868, + "step": 6577 + }, + { + "epoch": 1.8410299468233977, + "grad_norm": 0.232551428834001, + "learning_rate": 3.556189819176945e-05, + "loss": 0.5005, + "step": 6578 + }, + { + "epoch": 1.841309823677582, + "grad_norm": 0.2512464483416976, + "learning_rate": 3.554713032545334e-05, + "loss": 0.5052, + "step": 6579 + }, + { + "epoch": 1.841589700531766, + "grad_norm": 0.2561037995866109, + "learning_rate": 3.5532363834769466e-05, + "loss": 0.4866, + "step": 6580 + }, + { + "epoch": 1.8418695773859501, + "grad_norm": 0.23349099698411724, + "learning_rate": 3.551759872112329e-05, + "loss": 0.4706, + "step": 6581 + }, + { + "epoch": 1.8421494542401344, + "grad_norm": 0.2400292340362861, + "learning_rate": 3.550283498592018e-05, + "loss": 0.4975, + "step": 6582 + }, + { + "epoch": 1.8424293310943185, + "grad_norm": 0.23739387080033075, + "learning_rate": 3.548807263056535e-05, + "loss": 0.472, + "step": 6583 + }, + { + "epoch": 1.8427092079485026, + "grad_norm": 0.23018052737349742, + "learning_rate": 3.547331165646389e-05, + "loss": 0.4953, + "step": 6584 + }, + { + "epoch": 1.8429890848026869, + "grad_norm": 0.23040655492611717, + "learning_rate": 3.545855206502076e-05, + "loss": 0.4842, + "step": 6585 + }, + { + "epoch": 1.843268961656871, + "grad_norm": 0.22808817307408025, + "learning_rate": 3.544379385764077e-05, + "loss": 0.4702, + "step": 6586 + }, + { + "epoch": 1.843548838511055, + "grad_norm": 0.22607561763707232, + "learning_rate": 3.542903703572863e-05, + "loss": 0.4764, + "step": 6587 + }, + { + "epoch": 1.8438287153652393, + "grad_norm": 0.22690591780656705, + "learning_rate": 3.541428160068893e-05, + "loss": 0.4811, + "step": 6588 + }, + { + "epoch": 1.8441085922194236, + "grad_norm": 0.22569890558528055, + "learning_rate": 3.539952755392605e-05, + "loss": 0.4887, + "step": 6589 + }, + { + "epoch": 1.8443884690736077, + "grad_norm": 0.22791151144576147, + "learning_rate": 3.538477489684431e-05, + "loss": 0.4849, + "step": 6590 + }, + { + "epoch": 1.8446683459277917, + "grad_norm": 0.2309855937697562, + "learning_rate": 3.537002363084788e-05, + "loss": 0.4751, + "step": 6591 + }, + { + "epoch": 1.844948222781976, + "grad_norm": 0.22107399622057003, + "learning_rate": 3.535527375734078e-05, + "loss": 0.4722, + "step": 6592 + }, + { + "epoch": 1.84522809963616, + "grad_norm": 0.22591956207176844, + "learning_rate": 3.534052527772692e-05, + "loss": 0.4705, + "step": 6593 + }, + { + "epoch": 1.8455079764903441, + "grad_norm": 0.22394642942919243, + "learning_rate": 3.5325778193410066e-05, + "loss": 0.4834, + "step": 6594 + }, + { + "epoch": 1.8457878533445284, + "grad_norm": 0.23104025724382923, + "learning_rate": 3.5311032505793875e-05, + "loss": 0.4872, + "step": 6595 + }, + { + "epoch": 1.8460677301987127, + "grad_norm": 0.23423258362383212, + "learning_rate": 3.5296288216281816e-05, + "loss": 0.4748, + "step": 6596 + }, + { + "epoch": 1.8463476070528966, + "grad_norm": 0.2291913005070103, + "learning_rate": 3.528154532627728e-05, + "loss": 0.4748, + "step": 6597 + }, + { + "epoch": 1.8466274839070809, + "grad_norm": 0.22803791237575421, + "learning_rate": 3.5266803837183507e-05, + "loss": 0.4768, + "step": 6598 + }, + { + "epoch": 1.8469073607612652, + "grad_norm": 0.23021079120937982, + "learning_rate": 3.525206375040358e-05, + "loss": 0.4951, + "step": 6599 + }, + { + "epoch": 1.8471872376154492, + "grad_norm": 0.23460469932875438, + "learning_rate": 3.5237325067340485e-05, + "loss": 0.4734, + "step": 6600 + }, + { + "epoch": 1.8474671144696333, + "grad_norm": 0.21942483725409273, + "learning_rate": 3.5222587789397046e-05, + "loss": 0.4691, + "step": 6601 + }, + { + "epoch": 1.8477469913238176, + "grad_norm": 0.2435986519977707, + "learning_rate": 3.520785191797598e-05, + "loss": 0.5043, + "step": 6602 + }, + { + "epoch": 1.8480268681780017, + "grad_norm": 0.22883687635578676, + "learning_rate": 3.519311745447985e-05, + "loss": 0.501, + "step": 6603 + }, + { + "epoch": 1.8483067450321857, + "grad_norm": 0.23179054577381408, + "learning_rate": 3.517838440031107e-05, + "loss": 0.4677, + "step": 6604 + }, + { + "epoch": 1.84858662188637, + "grad_norm": 0.21903980354418212, + "learning_rate": 3.516365275687198e-05, + "loss": 0.4802, + "step": 6605 + }, + { + "epoch": 1.8488664987405543, + "grad_norm": 0.22525928956838956, + "learning_rate": 3.514892252556474e-05, + "loss": 0.5016, + "step": 6606 + }, + { + "epoch": 1.8491463755947382, + "grad_norm": 0.23713793510283132, + "learning_rate": 3.513419370779134e-05, + "loss": 0.5043, + "step": 6607 + }, + { + "epoch": 1.8494262524489224, + "grad_norm": 0.2277633125686585, + "learning_rate": 3.511946630495371e-05, + "loss": 0.4862, + "step": 6608 + }, + { + "epoch": 1.8497061293031067, + "grad_norm": 0.22653383162702354, + "learning_rate": 3.51047403184536e-05, + "loss": 0.4469, + "step": 6609 + }, + { + "epoch": 1.8499860061572908, + "grad_norm": 0.22371585729474508, + "learning_rate": 3.509001574969265e-05, + "loss": 0.4788, + "step": 6610 + }, + { + "epoch": 1.8502658830114749, + "grad_norm": 0.23568903915808184, + "learning_rate": 3.507529260007234e-05, + "loss": 0.473, + "step": 6611 + }, + { + "epoch": 1.8505457598656592, + "grad_norm": 0.23328271347663027, + "learning_rate": 3.506057087099404e-05, + "loss": 0.5062, + "step": 6612 + }, + { + "epoch": 1.8508256367198432, + "grad_norm": 0.22975755415061422, + "learning_rate": 3.504585056385896e-05, + "loss": 0.4913, + "step": 6613 + }, + { + "epoch": 1.8511055135740273, + "grad_norm": 0.23899740895978278, + "learning_rate": 3.50311316800682e-05, + "loss": 0.4964, + "step": 6614 + }, + { + "epoch": 1.8513853904282116, + "grad_norm": 0.2324594531990676, + "learning_rate": 3.501641422102271e-05, + "loss": 0.4926, + "step": 6615 + }, + { + "epoch": 1.8516652672823959, + "grad_norm": 0.2240877633476286, + "learning_rate": 3.500169818812329e-05, + "loss": 0.4939, + "step": 6616 + }, + { + "epoch": 1.85194514413658, + "grad_norm": 0.21802692671464374, + "learning_rate": 3.498698358277064e-05, + "loss": 0.4685, + "step": 6617 + }, + { + "epoch": 1.852225020990764, + "grad_norm": 0.23417188850486598, + "learning_rate": 3.497227040636529e-05, + "loss": 0.4927, + "step": 6618 + }, + { + "epoch": 1.8525048978449483, + "grad_norm": 0.22578949964344666, + "learning_rate": 3.4957558660307655e-05, + "loss": 0.4668, + "step": 6619 + }, + { + "epoch": 1.8527847746991324, + "grad_norm": 0.2337330880335226, + "learning_rate": 3.494284834599801e-05, + "loss": 0.4906, + "step": 6620 + }, + { + "epoch": 1.8530646515533165, + "grad_norm": 0.21849890336454536, + "learning_rate": 3.4928139464836484e-05, + "loss": 0.4718, + "step": 6621 + }, + { + "epoch": 1.8533445284075007, + "grad_norm": 0.22638760452896164, + "learning_rate": 3.491343201822307e-05, + "loss": 0.4633, + "step": 6622 + }, + { + "epoch": 1.8536244052616848, + "grad_norm": 0.23730653988761322, + "learning_rate": 3.489872600755765e-05, + "loss": 0.4808, + "step": 6623 + }, + { + "epoch": 1.8539042821158689, + "grad_norm": 0.23330392054852062, + "learning_rate": 3.488402143423997e-05, + "loss": 0.4954, + "step": 6624 + }, + { + "epoch": 1.8541841589700532, + "grad_norm": 0.2352002684184638, + "learning_rate": 3.486931829966956e-05, + "loss": 0.4723, + "step": 6625 + }, + { + "epoch": 1.8544640358242375, + "grad_norm": 0.2262474444702206, + "learning_rate": 3.48546166052459e-05, + "loss": 0.4855, + "step": 6626 + }, + { + "epoch": 1.8547439126784215, + "grad_norm": 0.23792312337110524, + "learning_rate": 3.483991635236832e-05, + "loss": 0.4909, + "step": 6627 + }, + { + "epoch": 1.8550237895326056, + "grad_norm": 0.23675326339490255, + "learning_rate": 3.4825217542435976e-05, + "loss": 0.4925, + "step": 6628 + }, + { + "epoch": 1.85530366638679, + "grad_norm": 0.2322328205565557, + "learning_rate": 3.4810520176847926e-05, + "loss": 0.478, + "step": 6629 + }, + { + "epoch": 1.855583543240974, + "grad_norm": 0.23451994931984804, + "learning_rate": 3.4795824257003066e-05, + "loss": 0.505, + "step": 6630 + }, + { + "epoch": 1.855863420095158, + "grad_norm": 0.23738871132729128, + "learning_rate": 3.4781129784300173e-05, + "loss": 0.4692, + "step": 6631 + }, + { + "epoch": 1.8561432969493423, + "grad_norm": 0.23858813090463596, + "learning_rate": 3.4766436760137855e-05, + "loss": 0.5056, + "step": 6632 + }, + { + "epoch": 1.8564231738035264, + "grad_norm": 0.23120069797033554, + "learning_rate": 3.4751745185914616e-05, + "loss": 0.4864, + "step": 6633 + }, + { + "epoch": 1.8567030506577105, + "grad_norm": 0.2311163610037621, + "learning_rate": 3.47370550630288e-05, + "loss": 0.4959, + "step": 6634 + }, + { + "epoch": 1.8569829275118948, + "grad_norm": 0.23299874761747652, + "learning_rate": 3.472236639287863e-05, + "loss": 0.4871, + "step": 6635 + }, + { + "epoch": 1.857262804366079, + "grad_norm": 0.23315580319021786, + "learning_rate": 3.470767917686218e-05, + "loss": 0.4946, + "step": 6636 + }, + { + "epoch": 1.8575426812202631, + "grad_norm": 0.21697351826870254, + "learning_rate": 3.4692993416377384e-05, + "loss": 0.4817, + "step": 6637 + }, + { + "epoch": 1.8578225580744472, + "grad_norm": 0.23157225537121584, + "learning_rate": 3.4678309112822045e-05, + "loss": 0.5037, + "step": 6638 + }, + { + "epoch": 1.8581024349286315, + "grad_norm": 0.21691501081625855, + "learning_rate": 3.4663626267593804e-05, + "loss": 0.482, + "step": 6639 + }, + { + "epoch": 1.8583823117828155, + "grad_norm": 0.2255015824527897, + "learning_rate": 3.464894488209022e-05, + "loss": 0.4784, + "step": 6640 + }, + { + "epoch": 1.8586621886369996, + "grad_norm": 0.2270068270032285, + "learning_rate": 3.463426495770865e-05, + "loss": 0.4746, + "step": 6641 + }, + { + "epoch": 1.858942065491184, + "grad_norm": 0.23644966889917676, + "learning_rate": 3.4619586495846357e-05, + "loss": 0.4982, + "step": 6642 + }, + { + "epoch": 1.8592219423453682, + "grad_norm": 0.23171323603454722, + "learning_rate": 3.460490949790041e-05, + "loss": 0.4941, + "step": 6643 + }, + { + "epoch": 1.859501819199552, + "grad_norm": 0.2429913281245455, + "learning_rate": 3.45902339652678e-05, + "loss": 0.4936, + "step": 6644 + }, + { + "epoch": 1.8597816960537363, + "grad_norm": 0.23158854033323945, + "learning_rate": 3.457555989934533e-05, + "loss": 0.4722, + "step": 6645 + }, + { + "epoch": 1.8600615729079206, + "grad_norm": 0.2372162625718152, + "learning_rate": 3.456088730152972e-05, + "loss": 0.4812, + "step": 6646 + }, + { + "epoch": 1.8603414497621047, + "grad_norm": 0.2380636609178275, + "learning_rate": 3.454621617321749e-05, + "loss": 0.4957, + "step": 6647 + }, + { + "epoch": 1.8606213266162888, + "grad_norm": 0.23527489381309008, + "learning_rate": 3.4531546515805056e-05, + "loss": 0.4883, + "step": 6648 + }, + { + "epoch": 1.860901203470473, + "grad_norm": 0.22678876118962954, + "learning_rate": 3.4516878330688684e-05, + "loss": 0.4744, + "step": 6649 + }, + { + "epoch": 1.8611810803246571, + "grad_norm": 0.23155560539218384, + "learning_rate": 3.45022116192645e-05, + "loss": 0.4848, + "step": 6650 + }, + { + "epoch": 1.8614609571788412, + "grad_norm": 0.22284067438286498, + "learning_rate": 3.4487546382928495e-05, + "loss": 0.477, + "step": 6651 + }, + { + "epoch": 1.8617408340330255, + "grad_norm": 0.23531975822926823, + "learning_rate": 3.44728826230765e-05, + "loss": 0.4859, + "step": 6652 + }, + { + "epoch": 1.8620207108872098, + "grad_norm": 0.22356410580624714, + "learning_rate": 3.445822034110422e-05, + "loss": 0.4721, + "step": 6653 + }, + { + "epoch": 1.8623005877413936, + "grad_norm": 0.23753243385983955, + "learning_rate": 3.444355953840724e-05, + "loss": 0.4741, + "step": 6654 + }, + { + "epoch": 1.862580464595578, + "grad_norm": 0.22271276532637385, + "learning_rate": 3.442890021638097e-05, + "loss": 0.4554, + "step": 6655 + }, + { + "epoch": 1.8628603414497622, + "grad_norm": 0.23500052714654035, + "learning_rate": 3.441424237642068e-05, + "loss": 0.4737, + "step": 6656 + }, + { + "epoch": 1.8631402183039463, + "grad_norm": 0.2211684202951456, + "learning_rate": 3.4399586019921534e-05, + "loss": 0.494, + "step": 6657 + }, + { + "epoch": 1.8634200951581303, + "grad_norm": 0.228229708890135, + "learning_rate": 3.438493114827852e-05, + "loss": 0.5004, + "step": 6658 + }, + { + "epoch": 1.8636999720123146, + "grad_norm": 0.2304615792709606, + "learning_rate": 3.437027776288651e-05, + "loss": 0.4975, + "step": 6659 + }, + { + "epoch": 1.8639798488664987, + "grad_norm": 0.22220549585512975, + "learning_rate": 3.4355625865140216e-05, + "loss": 0.4695, + "step": 6660 + }, + { + "epoch": 1.8642597257206828, + "grad_norm": 0.22582991796289023, + "learning_rate": 3.4340975456434194e-05, + "loss": 0.4763, + "step": 6661 + }, + { + "epoch": 1.864539602574867, + "grad_norm": 0.2339641001908726, + "learning_rate": 3.432632653816289e-05, + "loss": 0.5006, + "step": 6662 + }, + { + "epoch": 1.8648194794290514, + "grad_norm": 0.24080089659130607, + "learning_rate": 3.43116791117206e-05, + "loss": 0.4743, + "step": 6663 + }, + { + "epoch": 1.8650993562832354, + "grad_norm": 0.22493271396098588, + "learning_rate": 3.429703317850147e-05, + "loss": 0.4905, + "step": 6664 + }, + { + "epoch": 1.8653792331374195, + "grad_norm": 0.22651811462105334, + "learning_rate": 3.428238873989952e-05, + "loss": 0.4594, + "step": 6665 + }, + { + "epoch": 1.8656591099916038, + "grad_norm": 0.22980238210861795, + "learning_rate": 3.4267745797308603e-05, + "loss": 0.4772, + "step": 6666 + }, + { + "epoch": 1.8659389868457879, + "grad_norm": 0.22222766889007314, + "learning_rate": 3.4253104352122455e-05, + "loss": 0.4574, + "step": 6667 + }, + { + "epoch": 1.866218863699972, + "grad_norm": 0.22346612379582007, + "learning_rate": 3.423846440573464e-05, + "loss": 0.4651, + "step": 6668 + }, + { + "epoch": 1.8664987405541562, + "grad_norm": 0.2391847394314958, + "learning_rate": 3.4223825959538605e-05, + "loss": 0.509, + "step": 6669 + }, + { + "epoch": 1.8667786174083403, + "grad_norm": 0.2378791997329002, + "learning_rate": 3.420918901492765e-05, + "loss": 0.5103, + "step": 6670 + }, + { + "epoch": 1.8670584942625243, + "grad_norm": 0.2291997970568722, + "learning_rate": 3.4194553573294915e-05, + "loss": 0.5246, + "step": 6671 + }, + { + "epoch": 1.8673383711167086, + "grad_norm": 0.23050602087609529, + "learning_rate": 3.417991963603343e-05, + "loss": 0.4878, + "step": 6672 + }, + { + "epoch": 1.867618247970893, + "grad_norm": 0.22618728410437525, + "learning_rate": 3.416528720453604e-05, + "loss": 0.458, + "step": 6673 + }, + { + "epoch": 1.867898124825077, + "grad_norm": 0.22957725079065297, + "learning_rate": 3.415065628019547e-05, + "loss": 0.486, + "step": 6674 + }, + { + "epoch": 1.868178001679261, + "grad_norm": 0.21686842573351836, + "learning_rate": 3.4136026864404317e-05, + "loss": 0.474, + "step": 6675 + }, + { + "epoch": 1.8684578785334454, + "grad_norm": 0.22803966934711845, + "learning_rate": 3.412139895855501e-05, + "loss": 0.4867, + "step": 6676 + }, + { + "epoch": 1.8687377553876294, + "grad_norm": 0.22594656416803122, + "learning_rate": 3.4106772564039836e-05, + "loss": 0.5091, + "step": 6677 + }, + { + "epoch": 1.8690176322418135, + "grad_norm": 0.22327474274885217, + "learning_rate": 3.4092147682250955e-05, + "loss": 0.508, + "step": 6678 + }, + { + "epoch": 1.8692975090959978, + "grad_norm": 0.23355716789072498, + "learning_rate": 3.407752431458036e-05, + "loss": 0.4902, + "step": 6679 + }, + { + "epoch": 1.869577385950182, + "grad_norm": 0.2274599028974708, + "learning_rate": 3.4062902462419885e-05, + "loss": 0.5023, + "step": 6680 + }, + { + "epoch": 1.869857262804366, + "grad_norm": 0.23691963820836076, + "learning_rate": 3.404828212716129e-05, + "loss": 0.4974, + "step": 6681 + }, + { + "epoch": 1.8701371396585502, + "grad_norm": 0.22293792749485655, + "learning_rate": 3.403366331019613e-05, + "loss": 0.4766, + "step": 6682 + }, + { + "epoch": 1.8704170165127345, + "grad_norm": 0.2243697088365475, + "learning_rate": 3.4019046012915836e-05, + "loss": 0.4759, + "step": 6683 + }, + { + "epoch": 1.8706968933669186, + "grad_norm": 0.23707864223862304, + "learning_rate": 3.400443023671169e-05, + "loss": 0.5076, + "step": 6684 + }, + { + "epoch": 1.8709767702211026, + "grad_norm": 0.22529443944272834, + "learning_rate": 3.398981598297482e-05, + "loss": 0.4754, + "step": 6685 + }, + { + "epoch": 1.871256647075287, + "grad_norm": 0.23318736667270665, + "learning_rate": 3.397520325309623e-05, + "loss": 0.5036, + "step": 6686 + }, + { + "epoch": 1.871536523929471, + "grad_norm": 0.22390308841712703, + "learning_rate": 3.3960592048466764e-05, + "loss": 0.4811, + "step": 6687 + }, + { + "epoch": 1.871816400783655, + "grad_norm": 0.22970308023106925, + "learning_rate": 3.394598237047713e-05, + "loss": 0.4783, + "step": 6688 + }, + { + "epoch": 1.8720962776378394, + "grad_norm": 0.22221704198782882, + "learning_rate": 3.3931374220517866e-05, + "loss": 0.4755, + "step": 6689 + }, + { + "epoch": 1.8723761544920237, + "grad_norm": 0.22796392224910958, + "learning_rate": 3.3916767599979407e-05, + "loss": 0.4845, + "step": 6690 + }, + { + "epoch": 1.8726560313462075, + "grad_norm": 0.22382994048146973, + "learning_rate": 3.3902162510252e-05, + "loss": 0.5051, + "step": 6691 + }, + { + "epoch": 1.8729359082003918, + "grad_norm": 0.22864055488099297, + "learning_rate": 3.388755895272578e-05, + "loss": 0.4845, + "step": 6692 + }, + { + "epoch": 1.873215785054576, + "grad_norm": 0.23888721778466862, + "learning_rate": 3.387295692879072e-05, + "loss": 0.496, + "step": 6693 + }, + { + "epoch": 1.8734956619087602, + "grad_norm": 0.2272996783429827, + "learning_rate": 3.3858356439836644e-05, + "loss": 0.473, + "step": 6694 + }, + { + "epoch": 1.8737755387629442, + "grad_norm": 0.23718106520445464, + "learning_rate": 3.384375748725325e-05, + "loss": 0.479, + "step": 6695 + }, + { + "epoch": 1.8740554156171285, + "grad_norm": 0.2319471396994759, + "learning_rate": 3.382916007243007e-05, + "loss": 0.472, + "step": 6696 + }, + { + "epoch": 1.8743352924713126, + "grad_norm": 0.22802839646117284, + "learning_rate": 3.381456419675645e-05, + "loss": 0.4745, + "step": 6697 + }, + { + "epoch": 1.8746151693254967, + "grad_norm": 0.23381792574372254, + "learning_rate": 3.37999698616217e-05, + "loss": 0.4878, + "step": 6698 + }, + { + "epoch": 1.874895046179681, + "grad_norm": 0.23649436863047152, + "learning_rate": 3.3785377068414865e-05, + "loss": 0.4807, + "step": 6699 + }, + { + "epoch": 1.8751749230338652, + "grad_norm": 0.23406065361013736, + "learning_rate": 3.377078581852492e-05, + "loss": 0.4652, + "step": 6700 + }, + { + "epoch": 1.8754547998880493, + "grad_norm": 0.25478096416382523, + "learning_rate": 3.375619611334067e-05, + "loss": 0.467, + "step": 6701 + }, + { + "epoch": 1.8757346767422334, + "grad_norm": 0.22884520788962845, + "learning_rate": 3.3741607954250765e-05, + "loss": 0.4725, + "step": 6702 + }, + { + "epoch": 1.8760145535964177, + "grad_norm": 0.2500879542893693, + "learning_rate": 3.3727021342643714e-05, + "loss": 0.4955, + "step": 6703 + }, + { + "epoch": 1.8762944304506017, + "grad_norm": 0.2315372897410905, + "learning_rate": 3.371243627990788e-05, + "loss": 0.4754, + "step": 6704 + }, + { + "epoch": 1.8765743073047858, + "grad_norm": 0.2343821662968564, + "learning_rate": 3.3697852767431484e-05, + "loss": 0.4614, + "step": 6705 + }, + { + "epoch": 1.87685418415897, + "grad_norm": 0.22417983360116148, + "learning_rate": 3.3683270806602575e-05, + "loss": 0.4779, + "step": 6706 + }, + { + "epoch": 1.8771340610131542, + "grad_norm": 0.2269968776273528, + "learning_rate": 3.366869039880909e-05, + "loss": 0.494, + "step": 6707 + }, + { + "epoch": 1.8774139378673382, + "grad_norm": 0.22291882165936222, + "learning_rate": 3.365411154543878e-05, + "loss": 0.4575, + "step": 6708 + }, + { + "epoch": 1.8776938147215225, + "grad_norm": 0.22856643072056357, + "learning_rate": 3.36395342478793e-05, + "loss": 0.4682, + "step": 6709 + }, + { + "epoch": 1.8779736915757068, + "grad_norm": 0.22809095891160383, + "learning_rate": 3.36249585075181e-05, + "loss": 0.477, + "step": 6710 + }, + { + "epoch": 1.8782535684298909, + "grad_norm": 0.22944389585931602, + "learning_rate": 3.3610384325742514e-05, + "loss": 0.4827, + "step": 6711 + }, + { + "epoch": 1.878533445284075, + "grad_norm": 0.23926445809624353, + "learning_rate": 3.3595811703939726e-05, + "loss": 0.483, + "step": 6712 + }, + { + "epoch": 1.8788133221382592, + "grad_norm": 0.2270824439574971, + "learning_rate": 3.3581240643496763e-05, + "loss": 0.4928, + "step": 6713 + }, + { + "epoch": 1.8790931989924433, + "grad_norm": 0.23341885127196485, + "learning_rate": 3.356667114580052e-05, + "loss": 0.4832, + "step": 6714 + }, + { + "epoch": 1.8793730758466274, + "grad_norm": 0.2264185400000116, + "learning_rate": 3.355210321223769e-05, + "loss": 0.4941, + "step": 6715 + }, + { + "epoch": 1.8796529527008117, + "grad_norm": 0.22818613255724515, + "learning_rate": 3.353753684419489e-05, + "loss": 0.4886, + "step": 6716 + }, + { + "epoch": 1.879932829554996, + "grad_norm": 0.25188605457591406, + "learning_rate": 3.3522972043058555e-05, + "loss": 0.4732, + "step": 6717 + }, + { + "epoch": 1.8802127064091798, + "grad_norm": 0.22689699208048122, + "learning_rate": 3.350840881021496e-05, + "loss": 0.4806, + "step": 6718 + }, + { + "epoch": 1.880492583263364, + "grad_norm": 0.22009302848574322, + "learning_rate": 3.349384714705024e-05, + "loss": 0.509, + "step": 6719 + }, + { + "epoch": 1.8807724601175484, + "grad_norm": 0.21810958134175087, + "learning_rate": 3.347928705495039e-05, + "loss": 0.4671, + "step": 6720 + }, + { + "epoch": 1.8810523369717325, + "grad_norm": 0.22203103242697145, + "learning_rate": 3.346472853530125e-05, + "loss": 0.4761, + "step": 6721 + }, + { + "epoch": 1.8813322138259165, + "grad_norm": 0.23112227639094682, + "learning_rate": 3.34501715894885e-05, + "loss": 0.4864, + "step": 6722 + }, + { + "epoch": 1.8816120906801008, + "grad_norm": 0.22943863932519792, + "learning_rate": 3.3435616218897674e-05, + "loss": 0.5088, + "step": 6723 + }, + { + "epoch": 1.881891967534285, + "grad_norm": 0.22710685521934543, + "learning_rate": 3.342106242491417e-05, + "loss": 0.478, + "step": 6724 + }, + { + "epoch": 1.882171844388469, + "grad_norm": 0.230110155385769, + "learning_rate": 3.3406510208923224e-05, + "loss": 0.4833, + "step": 6725 + }, + { + "epoch": 1.8824517212426533, + "grad_norm": 0.22411641904965765, + "learning_rate": 3.339195957230993e-05, + "loss": 0.4576, + "step": 6726 + }, + { + "epoch": 1.8827315980968375, + "grad_norm": 0.22616843299935008, + "learning_rate": 3.337741051645922e-05, + "loss": 0.4855, + "step": 6727 + }, + { + "epoch": 1.8830114749510214, + "grad_norm": 0.23439793114301566, + "learning_rate": 3.3362863042755876e-05, + "loss": 0.47, + "step": 6728 + }, + { + "epoch": 1.8832913518052057, + "grad_norm": 0.23099348044755166, + "learning_rate": 3.334831715258454e-05, + "loss": 0.4729, + "step": 6729 + }, + { + "epoch": 1.88357122865939, + "grad_norm": 0.247098590957284, + "learning_rate": 3.33337728473297e-05, + "loss": 0.4891, + "step": 6730 + }, + { + "epoch": 1.883851105513574, + "grad_norm": 0.23603307439394292, + "learning_rate": 3.331923012837569e-05, + "loss": 0.4804, + "step": 6731 + }, + { + "epoch": 1.8841309823677581, + "grad_norm": 0.2241205392850302, + "learning_rate": 3.33046889971067e-05, + "loss": 0.4798, + "step": 6732 + }, + { + "epoch": 1.8844108592219424, + "grad_norm": 0.23273793641263346, + "learning_rate": 3.329014945490675e-05, + "loss": 0.4578, + "step": 6733 + }, + { + "epoch": 1.8846907360761265, + "grad_norm": 0.23846937477481026, + "learning_rate": 3.327561150315973e-05, + "loss": 0.4626, + "step": 6734 + }, + { + "epoch": 1.8849706129303105, + "grad_norm": 0.22400149776889866, + "learning_rate": 3.326107514324936e-05, + "loss": 0.4957, + "step": 6735 + }, + { + "epoch": 1.8852504897844948, + "grad_norm": 0.24377813442499874, + "learning_rate": 3.3246540376559234e-05, + "loss": 0.4978, + "step": 6736 + }, + { + "epoch": 1.8855303666386791, + "grad_norm": 0.22339977902939695, + "learning_rate": 3.323200720447277e-05, + "loss": 0.5022, + "step": 6737 + }, + { + "epoch": 1.8858102434928632, + "grad_norm": 0.2412162155364134, + "learning_rate": 3.3217475628373245e-05, + "loss": 0.5013, + "step": 6738 + }, + { + "epoch": 1.8860901203470473, + "grad_norm": 0.23513602950112825, + "learning_rate": 3.320294564964379e-05, + "loss": 0.4944, + "step": 6739 + }, + { + "epoch": 1.8863699972012316, + "grad_norm": 0.2298080315654412, + "learning_rate": 3.318841726966736e-05, + "loss": 0.4745, + "step": 6740 + }, + { + "epoch": 1.8866498740554156, + "grad_norm": 0.23436143477223603, + "learning_rate": 3.3173890489826784e-05, + "loss": 0.4821, + "step": 6741 + }, + { + "epoch": 1.8869297509095997, + "grad_norm": 0.23349787346008202, + "learning_rate": 3.315936531150473e-05, + "loss": 0.4906, + "step": 6742 + }, + { + "epoch": 1.887209627763784, + "grad_norm": 0.23196108339389324, + "learning_rate": 3.314484173608371e-05, + "loss": 0.487, + "step": 6743 + }, + { + "epoch": 1.887489504617968, + "grad_norm": 0.22526643770602886, + "learning_rate": 3.313031976494609e-05, + "loss": 0.4957, + "step": 6744 + }, + { + "epoch": 1.8877693814721521, + "grad_norm": 0.22093999000554704, + "learning_rate": 3.3115799399474077e-05, + "loss": 0.4776, + "step": 6745 + }, + { + "epoch": 1.8880492583263364, + "grad_norm": 0.2289801069621866, + "learning_rate": 3.310128064104974e-05, + "loss": 0.4782, + "step": 6746 + }, + { + "epoch": 1.8883291351805207, + "grad_norm": 0.22843801143379772, + "learning_rate": 3.308676349105495e-05, + "loss": 0.4753, + "step": 6747 + }, + { + "epoch": 1.8886090120347048, + "grad_norm": 0.23811988591323568, + "learning_rate": 3.3072247950871496e-05, + "loss": 0.4804, + "step": 6748 + }, + { + "epoch": 1.8888888888888888, + "grad_norm": 0.23532957675361993, + "learning_rate": 3.305773402188095e-05, + "loss": 0.4921, + "step": 6749 + }, + { + "epoch": 1.8891687657430731, + "grad_norm": 0.22624397351583414, + "learning_rate": 3.3043221705464766e-05, + "loss": 0.4809, + "step": 6750 + }, + { + "epoch": 1.8894486425972572, + "grad_norm": 0.23696380018089494, + "learning_rate": 3.302871100300423e-05, + "loss": 0.4797, + "step": 6751 + }, + { + "epoch": 1.8897285194514413, + "grad_norm": 0.23025909142493592, + "learning_rate": 3.301420191588048e-05, + "loss": 0.4917, + "step": 6752 + }, + { + "epoch": 1.8900083963056256, + "grad_norm": 0.2222611228566544, + "learning_rate": 3.2999694445474494e-05, + "loss": 0.4805, + "step": 6753 + }, + { + "epoch": 1.8902882731598096, + "grad_norm": 0.22800570578446014, + "learning_rate": 3.298518859316711e-05, + "loss": 0.5008, + "step": 6754 + }, + { + "epoch": 1.8905681500139937, + "grad_norm": 0.23659529528159529, + "learning_rate": 3.297068436033899e-05, + "loss": 0.471, + "step": 6755 + }, + { + "epoch": 1.890848026868178, + "grad_norm": 0.23670723026194176, + "learning_rate": 3.2956181748370674e-05, + "loss": 0.4957, + "step": 6756 + }, + { + "epoch": 1.8911279037223623, + "grad_norm": 0.23089372161918936, + "learning_rate": 3.294168075864252e-05, + "loss": 0.4883, + "step": 6757 + }, + { + "epoch": 1.8914077805765463, + "grad_norm": 0.22239354153578902, + "learning_rate": 3.292718139253473e-05, + "loss": 0.4789, + "step": 6758 + }, + { + "epoch": 1.8916876574307304, + "grad_norm": 0.2256668861696989, + "learning_rate": 3.291268365142738e-05, + "loss": 0.4982, + "step": 6759 + }, + { + "epoch": 1.8919675342849147, + "grad_norm": 0.2429305644567666, + "learning_rate": 3.289818753670035e-05, + "loss": 0.4695, + "step": 6760 + }, + { + "epoch": 1.8922474111390988, + "grad_norm": 0.23982032082301433, + "learning_rate": 3.288369304973342e-05, + "loss": 0.497, + "step": 6761 + }, + { + "epoch": 1.8925272879932828, + "grad_norm": 0.2236843177274853, + "learning_rate": 3.2869200191906157e-05, + "loss": 0.4893, + "step": 6762 + }, + { + "epoch": 1.8928071648474671, + "grad_norm": 0.2307508653621394, + "learning_rate": 3.2854708964598014e-05, + "loss": 0.4804, + "step": 6763 + }, + { + "epoch": 1.8930870417016514, + "grad_norm": 0.23444924896112015, + "learning_rate": 3.2840219369188265e-05, + "loss": 0.4881, + "step": 6764 + }, + { + "epoch": 1.8933669185558353, + "grad_norm": 0.22349152559402358, + "learning_rate": 3.282573140705604e-05, + "loss": 0.4651, + "step": 6765 + }, + { + "epoch": 1.8936467954100196, + "grad_norm": 0.23378393901685646, + "learning_rate": 3.2811245079580316e-05, + "loss": 0.4947, + "step": 6766 + }, + { + "epoch": 1.8939266722642039, + "grad_norm": 0.23377842235723953, + "learning_rate": 3.27967603881399e-05, + "loss": 0.4805, + "step": 6767 + }, + { + "epoch": 1.894206549118388, + "grad_norm": 0.23044283625762985, + "learning_rate": 3.278227733411348e-05, + "loss": 0.4677, + "step": 6768 + }, + { + "epoch": 1.894486425972572, + "grad_norm": 0.22821637898356942, + "learning_rate": 3.276779591887952e-05, + "loss": 0.489, + "step": 6769 + }, + { + "epoch": 1.8947663028267563, + "grad_norm": 0.2349851898158919, + "learning_rate": 3.27533161438164e-05, + "loss": 0.5062, + "step": 6770 + }, + { + "epoch": 1.8950461796809404, + "grad_norm": 0.22807032935111787, + "learning_rate": 3.2738838010302295e-05, + "loss": 0.4549, + "step": 6771 + }, + { + "epoch": 1.8953260565351244, + "grad_norm": 0.23021950686549847, + "learning_rate": 3.2724361519715254e-05, + "loss": 0.4869, + "step": 6772 + }, + { + "epoch": 1.8956059333893087, + "grad_norm": 0.2257923102895986, + "learning_rate": 3.270988667343315e-05, + "loss": 0.4766, + "step": 6773 + }, + { + "epoch": 1.895885810243493, + "grad_norm": 0.22582074092110432, + "learning_rate": 3.269541347283371e-05, + "loss": 0.475, + "step": 6774 + }, + { + "epoch": 1.896165687097677, + "grad_norm": 0.2542424124741826, + "learning_rate": 3.268094191929451e-05, + "loss": 0.4913, + "step": 6775 + }, + { + "epoch": 1.8964455639518611, + "grad_norm": 0.2244689958047732, + "learning_rate": 3.266647201419294e-05, + "loss": 0.4724, + "step": 6776 + }, + { + "epoch": 1.8967254408060454, + "grad_norm": 0.2344822286633166, + "learning_rate": 3.265200375890626e-05, + "loss": 0.4826, + "step": 6777 + }, + { + "epoch": 1.8970053176602295, + "grad_norm": 0.22813453503741773, + "learning_rate": 3.263753715481158e-05, + "loss": 0.4631, + "step": 6778 + }, + { + "epoch": 1.8972851945144136, + "grad_norm": 0.23215944271930963, + "learning_rate": 3.262307220328583e-05, + "loss": 0.4744, + "step": 6779 + }, + { + "epoch": 1.8975650713685979, + "grad_norm": 0.2419283337502692, + "learning_rate": 3.26086089057058e-05, + "loss": 0.4807, + "step": 6780 + }, + { + "epoch": 1.897844948222782, + "grad_norm": 0.23628743917134987, + "learning_rate": 3.2594147263448106e-05, + "loss": 0.4974, + "step": 6781 + }, + { + "epoch": 1.898124825076966, + "grad_norm": 0.2259527845255626, + "learning_rate": 3.257968727788922e-05, + "loss": 0.4708, + "step": 6782 + }, + { + "epoch": 1.8984047019311503, + "grad_norm": 0.23373185042166159, + "learning_rate": 3.256522895040545e-05, + "loss": 0.513, + "step": 6783 + }, + { + "epoch": 1.8986845787853346, + "grad_norm": 0.22550441473117894, + "learning_rate": 3.2550772282372945e-05, + "loss": 0.4756, + "step": 6784 + }, + { + "epoch": 1.8989644556395187, + "grad_norm": 0.23326936591686565, + "learning_rate": 3.253631727516771e-05, + "loss": 0.4633, + "step": 6785 + }, + { + "epoch": 1.8992443324937027, + "grad_norm": 0.22180939238271521, + "learning_rate": 3.2521863930165555e-05, + "loss": 0.4943, + "step": 6786 + }, + { + "epoch": 1.899524209347887, + "grad_norm": 0.22088803819986813, + "learning_rate": 3.2507412248742195e-05, + "loss": 0.4722, + "step": 6787 + }, + { + "epoch": 1.899804086202071, + "grad_norm": 0.22825648960038597, + "learning_rate": 3.2492962232273115e-05, + "loss": 0.4788, + "step": 6788 + }, + { + "epoch": 1.9000839630562552, + "grad_norm": 0.2349773301452994, + "learning_rate": 3.247851388213369e-05, + "loss": 0.4838, + "step": 6789 + }, + { + "epoch": 1.9003638399104394, + "grad_norm": 0.22767266404411718, + "learning_rate": 3.246406719969912e-05, + "loss": 0.4626, + "step": 6790 + }, + { + "epoch": 1.9006437167646235, + "grad_norm": 0.2253672913354678, + "learning_rate": 3.2449622186344445e-05, + "loss": 0.4648, + "step": 6791 + }, + { + "epoch": 1.9009235936188076, + "grad_norm": 0.23378096511454644, + "learning_rate": 3.243517884344456e-05, + "loss": 0.4808, + "step": 6792 + }, + { + "epoch": 1.9012034704729919, + "grad_norm": 0.23490868833061007, + "learning_rate": 3.242073717237418e-05, + "loss": 0.4832, + "step": 6793 + }, + { + "epoch": 1.9014833473271762, + "grad_norm": 0.22865554902914265, + "learning_rate": 3.240629717450788e-05, + "loss": 0.4704, + "step": 6794 + }, + { + "epoch": 1.9017632241813602, + "grad_norm": 0.22979670192876384, + "learning_rate": 3.239185885122005e-05, + "loss": 0.4763, + "step": 6795 + }, + { + "epoch": 1.9020431010355443, + "grad_norm": 0.23161283702216043, + "learning_rate": 3.2377422203884963e-05, + "loss": 0.4904, + "step": 6796 + }, + { + "epoch": 1.9023229778897286, + "grad_norm": 0.22735673238208506, + "learning_rate": 3.236298723387669e-05, + "loss": 0.4802, + "step": 6797 + }, + { + "epoch": 1.9026028547439127, + "grad_norm": 0.2236196980638249, + "learning_rate": 3.234855394256917e-05, + "loss": 0.4883, + "step": 6798 + }, + { + "epoch": 1.9028827315980967, + "grad_norm": 0.24825711839058326, + "learning_rate": 3.233412233133616e-05, + "loss": 0.4953, + "step": 6799 + }, + { + "epoch": 1.903162608452281, + "grad_norm": 0.21784708167937517, + "learning_rate": 3.231969240155127e-05, + "loss": 0.477, + "step": 6800 + }, + { + "epoch": 1.9034424853064653, + "grad_norm": 0.22480391886199635, + "learning_rate": 3.2305264154587964e-05, + "loss": 0.4774, + "step": 6801 + }, + { + "epoch": 1.9037223621606492, + "grad_norm": 0.24005887709323095, + "learning_rate": 3.2290837591819515e-05, + "loss": 0.473, + "step": 6802 + }, + { + "epoch": 1.9040022390148335, + "grad_norm": 0.23900000138365213, + "learning_rate": 3.227641271461906e-05, + "loss": 0.4758, + "step": 6803 + }, + { + "epoch": 1.9042821158690177, + "grad_norm": 0.23707660125217198, + "learning_rate": 3.226198952435956e-05, + "loss": 0.4903, + "step": 6804 + }, + { + "epoch": 1.9045619927232018, + "grad_norm": 0.23458362369244137, + "learning_rate": 3.224756802241383e-05, + "loss": 0.5005, + "step": 6805 + }, + { + "epoch": 1.9048418695773859, + "grad_norm": 0.22772437266238116, + "learning_rate": 3.2233148210154505e-05, + "loss": 0.48, + "step": 6806 + }, + { + "epoch": 1.9051217464315702, + "grad_norm": 0.23658507405210563, + "learning_rate": 3.2218730088954085e-05, + "loss": 0.5001, + "step": 6807 + }, + { + "epoch": 1.9054016232857542, + "grad_norm": 0.2347934669458113, + "learning_rate": 3.220431366018488e-05, + "loss": 0.4981, + "step": 6808 + }, + { + "epoch": 1.9056815001399383, + "grad_norm": 0.2386236334617226, + "learning_rate": 3.218989892521907e-05, + "loss": 0.4966, + "step": 6809 + }, + { + "epoch": 1.9059613769941226, + "grad_norm": 0.22405168964152639, + "learning_rate": 3.217548588542864e-05, + "loss": 0.5002, + "step": 6810 + }, + { + "epoch": 1.906241253848307, + "grad_norm": 0.22443528202264984, + "learning_rate": 3.2161074542185446e-05, + "loss": 0.4716, + "step": 6811 + }, + { + "epoch": 1.9065211307024907, + "grad_norm": 0.22918651917991598, + "learning_rate": 3.214666489686115e-05, + "loss": 0.4711, + "step": 6812 + }, + { + "epoch": 1.906801007556675, + "grad_norm": 0.23550169298562623, + "learning_rate": 3.213225695082729e-05, + "loss": 0.4957, + "step": 6813 + }, + { + "epoch": 1.9070808844108593, + "grad_norm": 0.23245829516667219, + "learning_rate": 3.211785070545522e-05, + "loss": 0.4958, + "step": 6814 + }, + { + "epoch": 1.9073607612650434, + "grad_norm": 0.23398829095356824, + "learning_rate": 3.2103446162116123e-05, + "loss": 0.4737, + "step": 6815 + }, + { + "epoch": 1.9076406381192275, + "grad_norm": 0.23008193940614188, + "learning_rate": 3.208904332218104e-05, + "loss": 0.484, + "step": 6816 + }, + { + "epoch": 1.9079205149734118, + "grad_norm": 0.2321203631771179, + "learning_rate": 3.2074642187020844e-05, + "loss": 0.4659, + "step": 6817 + }, + { + "epoch": 1.9082003918275958, + "grad_norm": 0.2378090521192645, + "learning_rate": 3.2060242758006224e-05, + "loss": 0.4779, + "step": 6818 + }, + { + "epoch": 1.90848026868178, + "grad_norm": 0.2375734569737336, + "learning_rate": 3.204584503650775e-05, + "loss": 0.5021, + "step": 6819 + }, + { + "epoch": 1.9087601455359642, + "grad_norm": 0.22816905535331417, + "learning_rate": 3.2031449023895796e-05, + "loss": 0.4792, + "step": 6820 + }, + { + "epoch": 1.9090400223901485, + "grad_norm": 0.2330459331434565, + "learning_rate": 3.2017054721540574e-05, + "loss": 0.4663, + "step": 6821 + }, + { + "epoch": 1.9093198992443325, + "grad_norm": 0.2336438924530243, + "learning_rate": 3.200266213081216e-05, + "loss": 0.4634, + "step": 6822 + }, + { + "epoch": 1.9095997760985166, + "grad_norm": 0.23412554809101752, + "learning_rate": 3.1988271253080435e-05, + "loss": 0.5008, + "step": 6823 + }, + { + "epoch": 1.909879652952701, + "grad_norm": 0.22469914491950652, + "learning_rate": 3.1973882089715134e-05, + "loss": 0.4814, + "step": 6824 + }, + { + "epoch": 1.910159529806885, + "grad_norm": 0.21854263756921014, + "learning_rate": 3.195949464208582e-05, + "loss": 0.5001, + "step": 6825 + }, + { + "epoch": 1.910439406661069, + "grad_norm": 0.22998329956911545, + "learning_rate": 3.1945108911561905e-05, + "loss": 0.4831, + "step": 6826 + }, + { + "epoch": 1.9107192835152533, + "grad_norm": 0.22654095187481313, + "learning_rate": 3.193072489951263e-05, + "loss": 0.4668, + "step": 6827 + }, + { + "epoch": 1.9109991603694374, + "grad_norm": 0.2325386858114916, + "learning_rate": 3.191634260730707e-05, + "loss": 0.4807, + "step": 6828 + }, + { + "epoch": 1.9112790372236215, + "grad_norm": 0.22987321747102826, + "learning_rate": 3.1901962036314135e-05, + "loss": 0.4721, + "step": 6829 + }, + { + "epoch": 1.9115589140778058, + "grad_norm": 0.23193940874677382, + "learning_rate": 3.1887583187902594e-05, + "loss": 0.5256, + "step": 6830 + }, + { + "epoch": 1.91183879093199, + "grad_norm": 0.2274389175956893, + "learning_rate": 3.187320606344102e-05, + "loss": 0.4872, + "step": 6831 + }, + { + "epoch": 1.9121186677861741, + "grad_norm": 0.23002855024204658, + "learning_rate": 3.185883066429784e-05, + "loss": 0.5087, + "step": 6832 + }, + { + "epoch": 1.9123985446403582, + "grad_norm": 0.23766752700212365, + "learning_rate": 3.1844456991841316e-05, + "loss": 0.499, + "step": 6833 + }, + { + "epoch": 1.9126784214945425, + "grad_norm": 0.2369432673308841, + "learning_rate": 3.1830085047439535e-05, + "loss": 0.4971, + "step": 6834 + }, + { + "epoch": 1.9129582983487265, + "grad_norm": 0.2291129532696114, + "learning_rate": 3.181571483246043e-05, + "loss": 0.4531, + "step": 6835 + }, + { + "epoch": 1.9132381752029106, + "grad_norm": 0.23528789356582233, + "learning_rate": 3.180134634827177e-05, + "loss": 0.4949, + "step": 6836 + }, + { + "epoch": 1.913518052057095, + "grad_norm": 0.23423299731789948, + "learning_rate": 3.178697959624114e-05, + "loss": 0.4633, + "step": 6837 + }, + { + "epoch": 1.9137979289112792, + "grad_norm": 0.227982563824072, + "learning_rate": 3.1772614577735986e-05, + "loss": 0.4606, + "step": 6838 + }, + { + "epoch": 1.914077805765463, + "grad_norm": 0.24287594115101296, + "learning_rate": 3.175825129412359e-05, + "loss": 0.4902, + "step": 6839 + }, + { + "epoch": 1.9143576826196473, + "grad_norm": 0.23235641962549547, + "learning_rate": 3.174388974677103e-05, + "loss": 0.4824, + "step": 6840 + }, + { + "epoch": 1.9146375594738316, + "grad_norm": 0.21938849222606618, + "learning_rate": 3.172952993704529e-05, + "loss": 0.4674, + "step": 6841 + }, + { + "epoch": 1.9149174363280157, + "grad_norm": 0.2331902976043227, + "learning_rate": 3.1715171866313096e-05, + "loss": 0.4907, + "step": 6842 + }, + { + "epoch": 1.9151973131821998, + "grad_norm": 0.2292911227099199, + "learning_rate": 3.1700815535941084e-05, + "loss": 0.4916, + "step": 6843 + }, + { + "epoch": 1.915477190036384, + "grad_norm": 0.23688346430017465, + "learning_rate": 3.1686460947295695e-05, + "loss": 0.498, + "step": 6844 + }, + { + "epoch": 1.9157570668905681, + "grad_norm": 0.2282263419118602, + "learning_rate": 3.167210810174319e-05, + "loss": 0.4707, + "step": 6845 + }, + { + "epoch": 1.9160369437447522, + "grad_norm": 0.22748798163523531, + "learning_rate": 3.1657757000649704e-05, + "loss": 0.4669, + "step": 6846 + }, + { + "epoch": 1.9163168205989365, + "grad_norm": 0.23148091960667105, + "learning_rate": 3.1643407645381164e-05, + "loss": 0.5006, + "step": 6847 + }, + { + "epoch": 1.9165966974531208, + "grad_norm": 0.22499136668384834, + "learning_rate": 3.162906003730337e-05, + "loss": 0.4706, + "step": 6848 + }, + { + "epoch": 1.9168765743073046, + "grad_norm": 0.2268158645626959, + "learning_rate": 3.1614714177781915e-05, + "loss": 0.4729, + "step": 6849 + }, + { + "epoch": 1.917156451161489, + "grad_norm": 0.22077174740142377, + "learning_rate": 3.1600370068182264e-05, + "loss": 0.4623, + "step": 6850 + }, + { + "epoch": 1.9174363280156732, + "grad_norm": 0.22790442444950615, + "learning_rate": 3.158602770986968e-05, + "loss": 0.4837, + "step": 6851 + }, + { + "epoch": 1.9177162048698573, + "grad_norm": 0.23374442369929613, + "learning_rate": 3.15716871042093e-05, + "loss": 0.4879, + "step": 6852 + }, + { + "epoch": 1.9179960817240413, + "grad_norm": 0.23013580297805894, + "learning_rate": 3.155734825256604e-05, + "loss": 0.4994, + "step": 6853 + }, + { + "epoch": 1.9182759585782256, + "grad_norm": 0.2266661411073759, + "learning_rate": 3.154301115630471e-05, + "loss": 0.4692, + "step": 6854 + }, + { + "epoch": 1.9185558354324097, + "grad_norm": 0.23331203969907446, + "learning_rate": 3.15286758167899e-05, + "loss": 0.4765, + "step": 6855 + }, + { + "epoch": 1.9188357122865938, + "grad_norm": 0.2262449587177227, + "learning_rate": 3.1514342235386064e-05, + "loss": 0.4788, + "step": 6856 + }, + { + "epoch": 1.919115589140778, + "grad_norm": 0.24121557769879817, + "learning_rate": 3.1500010413457484e-05, + "loss": 0.4841, + "step": 6857 + }, + { + "epoch": 1.9193954659949624, + "grad_norm": 0.2366969780444885, + "learning_rate": 3.148568035236826e-05, + "loss": 0.4866, + "step": 6858 + }, + { + "epoch": 1.9196753428491464, + "grad_norm": 0.23380953792861275, + "learning_rate": 3.147135205348237e-05, + "loss": 0.4957, + "step": 6859 + }, + { + "epoch": 1.9199552197033305, + "grad_norm": 0.2227770637262004, + "learning_rate": 3.145702551816354e-05, + "loss": 0.4794, + "step": 6860 + }, + { + "epoch": 1.9202350965575148, + "grad_norm": 0.22126815045739895, + "learning_rate": 3.1442700747775414e-05, + "loss": 0.4806, + "step": 6861 + }, + { + "epoch": 1.9205149734116989, + "grad_norm": 0.21848077923144377, + "learning_rate": 3.1428377743681405e-05, + "loss": 0.4625, + "step": 6862 + }, + { + "epoch": 1.920794850265883, + "grad_norm": 0.23327747417855133, + "learning_rate": 3.1414056507244794e-05, + "loss": 0.4868, + "step": 6863 + }, + { + "epoch": 1.9210747271200672, + "grad_norm": 0.22302391172410224, + "learning_rate": 3.139973703982869e-05, + "loss": 0.4625, + "step": 6864 + }, + { + "epoch": 1.9213546039742513, + "grad_norm": 0.23149430994358766, + "learning_rate": 3.138541934279603e-05, + "loss": 0.464, + "step": 6865 + }, + { + "epoch": 1.9216344808284354, + "grad_norm": 0.21861418479211353, + "learning_rate": 3.137110341750957e-05, + "loss": 0.4839, + "step": 6866 + }, + { + "epoch": 1.9219143576826196, + "grad_norm": 0.22460244112099875, + "learning_rate": 3.135678926533192e-05, + "loss": 0.4669, + "step": 6867 + }, + { + "epoch": 1.922194234536804, + "grad_norm": 0.22979776423988815, + "learning_rate": 3.13424768876255e-05, + "loss": 0.4842, + "step": 6868 + }, + { + "epoch": 1.922474111390988, + "grad_norm": 0.227849370538838, + "learning_rate": 3.132816628575257e-05, + "loss": 0.486, + "step": 6869 + }, + { + "epoch": 1.922753988245172, + "grad_norm": 0.2352057090343283, + "learning_rate": 3.131385746107523e-05, + "loss": 0.4568, + "step": 6870 + }, + { + "epoch": 1.9230338650993564, + "grad_norm": 0.24022913515036032, + "learning_rate": 3.1299550414955394e-05, + "loss": 0.475, + "step": 6871 + }, + { + "epoch": 1.9233137419535404, + "grad_norm": 0.23218267938665885, + "learning_rate": 3.1285245148754815e-05, + "loss": 0.4434, + "step": 6872 + }, + { + "epoch": 1.9235936188077245, + "grad_norm": 0.23746534284199786, + "learning_rate": 3.127094166383509e-05, + "loss": 0.4746, + "step": 6873 + }, + { + "epoch": 1.9238734956619088, + "grad_norm": 0.2312195414137495, + "learning_rate": 3.12566399615576e-05, + "loss": 0.494, + "step": 6874 + }, + { + "epoch": 1.9241533725160929, + "grad_norm": 0.22627180660925544, + "learning_rate": 3.1242340043283614e-05, + "loss": 0.4756, + "step": 6875 + }, + { + "epoch": 1.924433249370277, + "grad_norm": 0.23208926301994978, + "learning_rate": 3.1228041910374205e-05, + "loss": 0.5087, + "step": 6876 + }, + { + "epoch": 1.9247131262244612, + "grad_norm": 0.22887270094402065, + "learning_rate": 3.12137455641903e-05, + "loss": 0.4687, + "step": 6877 + }, + { + "epoch": 1.9249930030786455, + "grad_norm": 0.22519232721995053, + "learning_rate": 3.1199451006092584e-05, + "loss": 0.4807, + "step": 6878 + }, + { + "epoch": 1.9252728799328296, + "grad_norm": 0.23411256382615975, + "learning_rate": 3.1185158237441644e-05, + "loss": 0.4714, + "step": 6879 + }, + { + "epoch": 1.9255527567870137, + "grad_norm": 0.219722626424481, + "learning_rate": 3.117086725959787e-05, + "loss": 0.4781, + "step": 6880 + }, + { + "epoch": 1.925832633641198, + "grad_norm": 0.23152207410977735, + "learning_rate": 3.1156578073921486e-05, + "loss": 0.484, + "step": 6881 + }, + { + "epoch": 1.926112510495382, + "grad_norm": 0.22507155641696094, + "learning_rate": 3.114229068177256e-05, + "loss": 0.4631, + "step": 6882 + }, + { + "epoch": 1.926392387349566, + "grad_norm": 0.22872350848907855, + "learning_rate": 3.1128005084510955e-05, + "loss": 0.4747, + "step": 6883 + }, + { + "epoch": 1.9266722642037504, + "grad_norm": 0.2398090353730163, + "learning_rate": 3.111372128349639e-05, + "loss": 0.4873, + "step": 6884 + }, + { + "epoch": 1.9269521410579347, + "grad_norm": 0.23516847024261786, + "learning_rate": 3.109943928008841e-05, + "loss": 0.4985, + "step": 6885 + }, + { + "epoch": 1.9272320179121185, + "grad_norm": 0.2400545689646885, + "learning_rate": 3.108515907564638e-05, + "loss": 0.4731, + "step": 6886 + }, + { + "epoch": 1.9275118947663028, + "grad_norm": 0.23056596783776126, + "learning_rate": 3.107088067152948e-05, + "loss": 0.4816, + "step": 6887 + }, + { + "epoch": 1.927791771620487, + "grad_norm": 0.23462545850016983, + "learning_rate": 3.105660406909677e-05, + "loss": 0.486, + "step": 6888 + }, + { + "epoch": 1.9280716484746712, + "grad_norm": 0.24154000300239664, + "learning_rate": 3.104232926970708e-05, + "loss": 0.4858, + "step": 6889 + }, + { + "epoch": 1.9283515253288552, + "grad_norm": 0.22110620665817354, + "learning_rate": 3.1028056274719105e-05, + "loss": 0.4443, + "step": 6890 + }, + { + "epoch": 1.9286314021830395, + "grad_norm": 0.2352114639897142, + "learning_rate": 3.101378508549135e-05, + "loss": 0.508, + "step": 6891 + }, + { + "epoch": 1.9289112790372236, + "grad_norm": 0.225769653804193, + "learning_rate": 3.0999515703382144e-05, + "loss": 0.4903, + "step": 6892 + }, + { + "epoch": 1.9291911558914077, + "grad_norm": 0.23538240263250854, + "learning_rate": 3.098524812974969e-05, + "loss": 0.4891, + "step": 6893 + }, + { + "epoch": 1.929471032745592, + "grad_norm": 0.23268110961277907, + "learning_rate": 3.097098236595195e-05, + "loss": 0.4785, + "step": 6894 + }, + { + "epoch": 1.9297509095997762, + "grad_norm": 0.24068440132113061, + "learning_rate": 3.095671841334678e-05, + "loss": 0.5166, + "step": 6895 + }, + { + "epoch": 1.9300307864539603, + "grad_norm": 0.22853175972526274, + "learning_rate": 3.0942456273291796e-05, + "loss": 0.4891, + "step": 6896 + }, + { + "epoch": 1.9303106633081444, + "grad_norm": 0.23087725893848762, + "learning_rate": 3.092819594714449e-05, + "loss": 0.4868, + "step": 6897 + }, + { + "epoch": 1.9305905401623287, + "grad_norm": 0.23132547733018058, + "learning_rate": 3.0913937436262175e-05, + "loss": 0.4874, + "step": 6898 + }, + { + "epoch": 1.9308704170165127, + "grad_norm": 0.23381523443609475, + "learning_rate": 3.0899680742001956e-05, + "loss": 0.4879, + "step": 6899 + }, + { + "epoch": 1.9311502938706968, + "grad_norm": 0.22696264033813296, + "learning_rate": 3.088542586572083e-05, + "loss": 0.4754, + "step": 6900 + }, + { + "epoch": 1.931430170724881, + "grad_norm": 0.22362342375231267, + "learning_rate": 3.087117280877557e-05, + "loss": 0.4697, + "step": 6901 + }, + { + "epoch": 1.9317100475790652, + "grad_norm": 0.23054527726771223, + "learning_rate": 3.085692157252279e-05, + "loss": 0.4659, + "step": 6902 + }, + { + "epoch": 1.9319899244332492, + "grad_norm": 0.23376377890307531, + "learning_rate": 3.084267215831893e-05, + "loss": 0.4749, + "step": 6903 + }, + { + "epoch": 1.9322698012874335, + "grad_norm": 0.23148748010655892, + "learning_rate": 3.082842456752025e-05, + "loss": 0.4968, + "step": 6904 + }, + { + "epoch": 1.9325496781416178, + "grad_norm": 0.23592315242289705, + "learning_rate": 3.081417880148285e-05, + "loss": 0.4683, + "step": 6905 + }, + { + "epoch": 1.932829554995802, + "grad_norm": 0.24650908244682188, + "learning_rate": 3.0799934861562655e-05, + "loss": 0.4693, + "step": 6906 + }, + { + "epoch": 1.933109431849986, + "grad_norm": 0.22955108605469735, + "learning_rate": 3.0785692749115414e-05, + "loss": 0.4858, + "step": 6907 + }, + { + "epoch": 1.9333893087041703, + "grad_norm": 0.2236911996856329, + "learning_rate": 3.0771452465496684e-05, + "loss": 0.4779, + "step": 6908 + }, + { + "epoch": 1.9336691855583543, + "grad_norm": 0.24161723810605487, + "learning_rate": 3.075721401206187e-05, + "loss": 0.4721, + "step": 6909 + }, + { + "epoch": 1.9339490624125384, + "grad_norm": 0.2303975187687021, + "learning_rate": 3.074297739016618e-05, + "loss": 0.4758, + "step": 6910 + }, + { + "epoch": 1.9342289392667227, + "grad_norm": 0.23451034142286228, + "learning_rate": 3.0728742601164705e-05, + "loss": 0.5024, + "step": 6911 + }, + { + "epoch": 1.9345088161209067, + "grad_norm": 0.23476269244260678, + "learning_rate": 3.0714509646412296e-05, + "loss": 0.4575, + "step": 6912 + }, + { + "epoch": 1.9347886929750908, + "grad_norm": 0.2394929049749627, + "learning_rate": 3.070027852726366e-05, + "loss": 0.4853, + "step": 6913 + }, + { + "epoch": 1.935068569829275, + "grad_norm": 0.2427409412793654, + "learning_rate": 3.068604924507331e-05, + "loss": 0.47, + "step": 6914 + }, + { + "epoch": 1.9353484466834594, + "grad_norm": 0.23343392326369555, + "learning_rate": 3.0671821801195594e-05, + "loss": 0.4833, + "step": 6915 + }, + { + "epoch": 1.9356283235376435, + "grad_norm": 0.22446464120246062, + "learning_rate": 3.0657596196984686e-05, + "loss": 0.4544, + "step": 6916 + }, + { + "epoch": 1.9359082003918275, + "grad_norm": 0.2302097824648424, + "learning_rate": 3.064337243379462e-05, + "loss": 0.5017, + "step": 6917 + }, + { + "epoch": 1.9361880772460118, + "grad_norm": 0.23476317780853365, + "learning_rate": 3.062915051297919e-05, + "loss": 0.493, + "step": 6918 + }, + { + "epoch": 1.936467954100196, + "grad_norm": 0.23094801660810896, + "learning_rate": 3.061493043589206e-05, + "loss": 0.4744, + "step": 6919 + }, + { + "epoch": 1.93674783095438, + "grad_norm": 0.24366671092119307, + "learning_rate": 3.06007122038867e-05, + "loss": 0.4844, + "step": 6920 + }, + { + "epoch": 1.9370277078085643, + "grad_norm": 0.22928569606518756, + "learning_rate": 3.0586495818316405e-05, + "loss": 0.4557, + "step": 6921 + }, + { + "epoch": 1.9373075846627485, + "grad_norm": 0.2357603007546275, + "learning_rate": 3.0572281280534324e-05, + "loss": 0.4751, + "step": 6922 + }, + { + "epoch": 1.9375874615169324, + "grad_norm": 0.22876167350947205, + "learning_rate": 3.055806859189336e-05, + "loss": 0.4853, + "step": 6923 + }, + { + "epoch": 1.9378673383711167, + "grad_norm": 0.22487240855797255, + "learning_rate": 3.054385775374632e-05, + "loss": 0.4663, + "step": 6924 + }, + { + "epoch": 1.938147215225301, + "grad_norm": 0.2332209486511338, + "learning_rate": 3.05296487674458e-05, + "loss": 0.4957, + "step": 6925 + }, + { + "epoch": 1.938427092079485, + "grad_norm": 0.22952610403124868, + "learning_rate": 3.0515441634344195e-05, + "loss": 0.4868, + "step": 6926 + }, + { + "epoch": 1.9387069689336691, + "grad_norm": 0.22732984211747553, + "learning_rate": 3.0501236355793754e-05, + "loss": 0.469, + "step": 6927 + }, + { + "epoch": 1.9389868457878534, + "grad_norm": 0.23296474093773015, + "learning_rate": 3.0487032933146564e-05, + "loss": 0.4933, + "step": 6928 + }, + { + "epoch": 1.9392667226420375, + "grad_norm": 0.2396372070201162, + "learning_rate": 3.0472831367754494e-05, + "loss": 0.4876, + "step": 6929 + }, + { + "epoch": 1.9395465994962215, + "grad_norm": 0.23398515721260013, + "learning_rate": 3.0458631660969273e-05, + "loss": 0.4757, + "step": 6930 + }, + { + "epoch": 1.9398264763504058, + "grad_norm": 0.23799658394799308, + "learning_rate": 3.044443381414244e-05, + "loss": 0.4932, + "step": 6931 + }, + { + "epoch": 1.9401063532045901, + "grad_norm": 0.23882297071006706, + "learning_rate": 3.043023782862533e-05, + "loss": 0.4845, + "step": 6932 + }, + { + "epoch": 1.940386230058774, + "grad_norm": 0.2289941761074013, + "learning_rate": 3.0416043705769125e-05, + "loss": 0.469, + "step": 6933 + }, + { + "epoch": 1.9406661069129583, + "grad_norm": 0.2280878658820877, + "learning_rate": 3.0401851446924846e-05, + "loss": 0.4738, + "step": 6934 + }, + { + "epoch": 1.9409459837671426, + "grad_norm": 0.23462376522494938, + "learning_rate": 3.0387661053443324e-05, + "loss": 0.5245, + "step": 6935 + }, + { + "epoch": 1.9412258606213266, + "grad_norm": 0.2226797572636581, + "learning_rate": 3.0373472526675197e-05, + "loss": 0.4704, + "step": 6936 + }, + { + "epoch": 1.9415057374755107, + "grad_norm": 0.22545363218823844, + "learning_rate": 3.035928586797094e-05, + "loss": 0.4903, + "step": 6937 + }, + { + "epoch": 1.941785614329695, + "grad_norm": 0.22948329099575007, + "learning_rate": 3.0345101078680848e-05, + "loss": 0.4961, + "step": 6938 + }, + { + "epoch": 1.942065491183879, + "grad_norm": 0.23123306372187905, + "learning_rate": 3.0330918160155035e-05, + "loss": 0.4795, + "step": 6939 + }, + { + "epoch": 1.9423453680380631, + "grad_norm": 0.2334816833466265, + "learning_rate": 3.0316737113743442e-05, + "loss": 0.4702, + "step": 6940 + }, + { + "epoch": 1.9426252448922474, + "grad_norm": 0.23592211266861282, + "learning_rate": 3.0302557940795828e-05, + "loss": 0.4814, + "step": 6941 + }, + { + "epoch": 1.9429051217464317, + "grad_norm": 0.21555884914805812, + "learning_rate": 3.0288380642661774e-05, + "loss": 0.4812, + "step": 6942 + }, + { + "epoch": 1.9431849986006158, + "grad_norm": 0.22766833160207603, + "learning_rate": 3.0274205220690686e-05, + "loss": 0.4642, + "step": 6943 + }, + { + "epoch": 1.9434648754547998, + "grad_norm": 0.24708929888125925, + "learning_rate": 3.0260031676231772e-05, + "loss": 0.4925, + "step": 6944 + }, + { + "epoch": 1.9437447523089841, + "grad_norm": 0.233628491339286, + "learning_rate": 3.0245860010634104e-05, + "loss": 0.4918, + "step": 6945 + }, + { + "epoch": 1.9440246291631682, + "grad_norm": 0.22982334012920114, + "learning_rate": 3.0231690225246535e-05, + "loss": 0.4871, + "step": 6946 + }, + { + "epoch": 1.9443045060173523, + "grad_norm": 0.2237908785868204, + "learning_rate": 3.0217522321417758e-05, + "loss": 0.458, + "step": 6947 + }, + { + "epoch": 1.9445843828715366, + "grad_norm": 0.24540427596737752, + "learning_rate": 3.0203356300496277e-05, + "loss": 0.4849, + "step": 6948 + }, + { + "epoch": 1.9448642597257206, + "grad_norm": 0.22946116864867663, + "learning_rate": 3.018919216383045e-05, + "loss": 0.488, + "step": 6949 + }, + { + "epoch": 1.9451441365799047, + "grad_norm": 0.25076988348664525, + "learning_rate": 3.017502991276836e-05, + "loss": 0.5008, + "step": 6950 + }, + { + "epoch": 1.945424013434089, + "grad_norm": 0.2281895669330235, + "learning_rate": 3.016086954865804e-05, + "loss": 0.4904, + "step": 6951 + }, + { + "epoch": 1.9457038902882733, + "grad_norm": 0.23592514096754377, + "learning_rate": 3.0146711072847257e-05, + "loss": 0.4791, + "step": 6952 + }, + { + "epoch": 1.9459837671424574, + "grad_norm": 0.2332401196359121, + "learning_rate": 3.0132554486683628e-05, + "loss": 0.4945, + "step": 6953 + }, + { + "epoch": 1.9462636439966414, + "grad_norm": 0.2407452785702372, + "learning_rate": 3.011839979151458e-05, + "loss": 0.4773, + "step": 6954 + }, + { + "epoch": 1.9465435208508257, + "grad_norm": 0.2325901569668764, + "learning_rate": 3.0104246988687368e-05, + "loss": 0.4964, + "step": 6955 + }, + { + "epoch": 1.9468233977050098, + "grad_norm": 0.23238008213955866, + "learning_rate": 3.009009607954907e-05, + "loss": 0.4803, + "step": 6956 + }, + { + "epoch": 1.9471032745591939, + "grad_norm": 0.2382190319205473, + "learning_rate": 3.0075947065446563e-05, + "loss": 0.4959, + "step": 6957 + }, + { + "epoch": 1.9473831514133781, + "grad_norm": 0.2419764363830295, + "learning_rate": 3.0061799947726565e-05, + "loss": 0.5032, + "step": 6958 + }, + { + "epoch": 1.9476630282675624, + "grad_norm": 0.2297780721980634, + "learning_rate": 3.004765472773562e-05, + "loss": 0.4674, + "step": 6959 + }, + { + "epoch": 1.9479429051217463, + "grad_norm": 0.2514690104952155, + "learning_rate": 3.0033511406820058e-05, + "loss": 0.468, + "step": 6960 + }, + { + "epoch": 1.9482227819759306, + "grad_norm": 0.2294590882490544, + "learning_rate": 3.001936998632604e-05, + "loss": 0.4799, + "step": 6961 + }, + { + "epoch": 1.9485026588301149, + "grad_norm": 0.23477045519017806, + "learning_rate": 3.000523046759959e-05, + "loss": 0.4708, + "step": 6962 + }, + { + "epoch": 1.948782535684299, + "grad_norm": 0.227380390921727, + "learning_rate": 2.999109285198649e-05, + "loss": 0.4658, + "step": 6963 + }, + { + "epoch": 1.949062412538483, + "grad_norm": 0.22534009942285327, + "learning_rate": 2.9976957140832374e-05, + "loss": 0.4906, + "step": 6964 + }, + { + "epoch": 1.9493422893926673, + "grad_norm": 0.23131293568262534, + "learning_rate": 2.9962823335482693e-05, + "loss": 0.4915, + "step": 6965 + }, + { + "epoch": 1.9496221662468514, + "grad_norm": 0.24348116094107597, + "learning_rate": 2.994869143728269e-05, + "loss": 0.467, + "step": 6966 + }, + { + "epoch": 1.9499020431010354, + "grad_norm": 0.2356463252888148, + "learning_rate": 2.993456144757748e-05, + "loss": 0.4937, + "step": 6967 + }, + { + "epoch": 1.9501819199552197, + "grad_norm": 0.22808636286042844, + "learning_rate": 2.992043336771192e-05, + "loss": 0.4691, + "step": 6968 + }, + { + "epoch": 1.950461796809404, + "grad_norm": 0.22755773681479166, + "learning_rate": 2.9906307199030758e-05, + "loss": 0.4666, + "step": 6969 + }, + { + "epoch": 1.9507416736635879, + "grad_norm": 0.2535203021836645, + "learning_rate": 2.9892182942878522e-05, + "loss": 0.5067, + "step": 6970 + }, + { + "epoch": 1.9510215505177722, + "grad_norm": 0.23077107892665602, + "learning_rate": 2.9878060600599565e-05, + "loss": 0.4912, + "step": 6971 + }, + { + "epoch": 1.9513014273719564, + "grad_norm": 0.23179166256489142, + "learning_rate": 2.9863940173538074e-05, + "loss": 0.4941, + "step": 6972 + }, + { + "epoch": 1.9515813042261405, + "grad_norm": 0.2295845434567394, + "learning_rate": 2.984982166303802e-05, + "loss": 0.5013, + "step": 6973 + }, + { + "epoch": 1.9518611810803246, + "grad_norm": 0.23301915929344238, + "learning_rate": 2.983570507044322e-05, + "loss": 0.4806, + "step": 6974 + }, + { + "epoch": 1.9521410579345089, + "grad_norm": 0.2364962286582459, + "learning_rate": 2.9821590397097298e-05, + "loss": 0.49, + "step": 6975 + }, + { + "epoch": 1.952420934788693, + "grad_norm": 0.22415602367856208, + "learning_rate": 2.9807477644343695e-05, + "loss": 0.4744, + "step": 6976 + }, + { + "epoch": 1.952700811642877, + "grad_norm": 0.23720920427411543, + "learning_rate": 2.979336681352567e-05, + "loss": 0.4687, + "step": 6977 + }, + { + "epoch": 1.9529806884970613, + "grad_norm": 0.227201964135838, + "learning_rate": 2.9779257905986302e-05, + "loss": 0.4894, + "step": 6978 + }, + { + "epoch": 1.9532605653512456, + "grad_norm": 0.22895255908481088, + "learning_rate": 2.976515092306848e-05, + "loss": 0.4809, + "step": 6979 + }, + { + "epoch": 1.9535404422054297, + "grad_norm": 0.2285392463168476, + "learning_rate": 2.9751045866114922e-05, + "loss": 0.507, + "step": 6980 + }, + { + "epoch": 1.9538203190596137, + "grad_norm": 0.22626235418131851, + "learning_rate": 2.9736942736468166e-05, + "loss": 0.5094, + "step": 6981 + }, + { + "epoch": 1.954100195913798, + "grad_norm": 0.2335110819416272, + "learning_rate": 2.9722841535470524e-05, + "loss": 0.4817, + "step": 6982 + }, + { + "epoch": 1.954380072767982, + "grad_norm": 0.24596529967737227, + "learning_rate": 2.9708742264464185e-05, + "loss": 0.4868, + "step": 6983 + }, + { + "epoch": 1.9546599496221662, + "grad_norm": 0.24755451917883628, + "learning_rate": 2.9694644924791116e-05, + "loss": 0.4876, + "step": 6984 + }, + { + "epoch": 1.9549398264763505, + "grad_norm": 0.2394044335536084, + "learning_rate": 2.9680549517793106e-05, + "loss": 0.4937, + "step": 6985 + }, + { + "epoch": 1.9552197033305345, + "grad_norm": 0.23541114527914012, + "learning_rate": 2.9666456044811774e-05, + "loss": 0.4839, + "step": 6986 + }, + { + "epoch": 1.9554995801847186, + "grad_norm": 0.23128141273377667, + "learning_rate": 2.965236450718853e-05, + "loss": 0.4928, + "step": 6987 + }, + { + "epoch": 1.9557794570389029, + "grad_norm": 0.23090437271847997, + "learning_rate": 2.963827490626462e-05, + "loss": 0.4798, + "step": 6988 + }, + { + "epoch": 1.9560593338930872, + "grad_norm": 0.22273393555307908, + "learning_rate": 2.9624187243381095e-05, + "loss": 0.4607, + "step": 6989 + }, + { + "epoch": 1.9563392107472712, + "grad_norm": 0.2304498043001874, + "learning_rate": 2.961010151987884e-05, + "loss": 0.4937, + "step": 6990 + }, + { + "epoch": 1.9566190876014553, + "grad_norm": 0.2330779304346798, + "learning_rate": 2.9596017737098536e-05, + "loss": 0.4659, + "step": 6991 + }, + { + "epoch": 1.9568989644556396, + "grad_norm": 0.25328248738358466, + "learning_rate": 2.9581935896380685e-05, + "loss": 0.4923, + "step": 6992 + }, + { + "epoch": 1.9571788413098237, + "grad_norm": 0.2325139361543725, + "learning_rate": 2.9567855999065596e-05, + "loss": 0.4781, + "step": 6993 + }, + { + "epoch": 1.9574587181640077, + "grad_norm": 0.22713006650382206, + "learning_rate": 2.955377804649342e-05, + "loss": 0.4727, + "step": 6994 + }, + { + "epoch": 1.957738595018192, + "grad_norm": 0.23001038600887078, + "learning_rate": 2.9539702040004085e-05, + "loss": 0.4837, + "step": 6995 + }, + { + "epoch": 1.9580184718723763, + "grad_norm": 0.23198450068624474, + "learning_rate": 2.9525627980937355e-05, + "loss": 0.4872, + "step": 6996 + }, + { + "epoch": 1.9582983487265602, + "grad_norm": 0.2348553910644492, + "learning_rate": 2.9511555870632824e-05, + "loss": 0.4777, + "step": 6997 + }, + { + "epoch": 1.9585782255807445, + "grad_norm": 0.22722001530923186, + "learning_rate": 2.9497485710429873e-05, + "loss": 0.4715, + "step": 6998 + }, + { + "epoch": 1.9588581024349287, + "grad_norm": 0.23336732850772657, + "learning_rate": 2.948341750166771e-05, + "loss": 0.4931, + "step": 6999 + }, + { + "epoch": 1.9591379792891128, + "grad_norm": 0.232762538670184, + "learning_rate": 2.946935124568535e-05, + "loss": 0.4728, + "step": 7000 + }, + { + "epoch": 1.9594178561432969, + "grad_norm": 0.2315457543303341, + "learning_rate": 2.9455286943821638e-05, + "loss": 0.461, + "step": 7001 + }, + { + "epoch": 1.9596977329974812, + "grad_norm": 0.23351845968891322, + "learning_rate": 2.94412245974152e-05, + "loss": 0.4728, + "step": 7002 + }, + { + "epoch": 1.9599776098516652, + "grad_norm": 0.23551139242126456, + "learning_rate": 2.942716420780452e-05, + "loss": 0.4704, + "step": 7003 + }, + { + "epoch": 1.9602574867058493, + "grad_norm": 0.23262927457816346, + "learning_rate": 2.9413105776327877e-05, + "loss": 0.504, + "step": 7004 + }, + { + "epoch": 1.9605373635600336, + "grad_norm": 0.22498542500355775, + "learning_rate": 2.9399049304323334e-05, + "loss": 0.4709, + "step": 7005 + }, + { + "epoch": 1.960817240414218, + "grad_norm": 0.2364436633012764, + "learning_rate": 2.938499479312882e-05, + "loss": 0.4597, + "step": 7006 + }, + { + "epoch": 1.9610971172684017, + "grad_norm": 0.23580077097171936, + "learning_rate": 2.9370942244082022e-05, + "loss": 0.4692, + "step": 7007 + }, + { + "epoch": 1.961376994122586, + "grad_norm": 0.2196275596296198, + "learning_rate": 2.9356891658520502e-05, + "loss": 0.4782, + "step": 7008 + }, + { + "epoch": 1.9616568709767703, + "grad_norm": 0.22992860368248752, + "learning_rate": 2.9342843037781587e-05, + "loss": 0.4698, + "step": 7009 + }, + { + "epoch": 1.9619367478309544, + "grad_norm": 0.22885797763020418, + "learning_rate": 2.9328796383202427e-05, + "loss": 0.4898, + "step": 7010 + }, + { + "epoch": 1.9622166246851385, + "grad_norm": 0.24773494403951035, + "learning_rate": 2.9314751696120003e-05, + "loss": 0.4944, + "step": 7011 + }, + { + "epoch": 1.9624965015393228, + "grad_norm": 0.2370585113625114, + "learning_rate": 2.9300708977871095e-05, + "loss": 0.4881, + "step": 7012 + }, + { + "epoch": 1.9627763783935068, + "grad_norm": 0.24828605566473014, + "learning_rate": 2.9286668229792274e-05, + "loss": 0.4751, + "step": 7013 + }, + { + "epoch": 1.963056255247691, + "grad_norm": 0.23103862341489811, + "learning_rate": 2.927262945321998e-05, + "loss": 0.4528, + "step": 7014 + }, + { + "epoch": 1.9633361321018752, + "grad_norm": 0.23100437274171104, + "learning_rate": 2.9258592649490413e-05, + "loss": 0.4619, + "step": 7015 + }, + { + "epoch": 1.9636160089560595, + "grad_norm": 0.2229633172860549, + "learning_rate": 2.9244557819939606e-05, + "loss": 0.4739, + "step": 7016 + }, + { + "epoch": 1.9638958858102435, + "grad_norm": 0.2327869635447997, + "learning_rate": 2.9230524965903406e-05, + "loss": 0.4764, + "step": 7017 + }, + { + "epoch": 1.9641757626644276, + "grad_norm": 0.24366798841238196, + "learning_rate": 2.9216494088717463e-05, + "loss": 0.5019, + "step": 7018 + }, + { + "epoch": 1.964455639518612, + "grad_norm": 0.23207685832803884, + "learning_rate": 2.920246518971724e-05, + "loss": 0.4697, + "step": 7019 + }, + { + "epoch": 1.964735516372796, + "grad_norm": 0.23765465654318918, + "learning_rate": 2.9188438270238032e-05, + "loss": 0.4789, + "step": 7020 + }, + { + "epoch": 1.96501539322698, + "grad_norm": 0.23208967128417798, + "learning_rate": 2.9174413331614915e-05, + "loss": 0.4751, + "step": 7021 + }, + { + "epoch": 1.9652952700811643, + "grad_norm": 0.2294397803522859, + "learning_rate": 2.916039037518281e-05, + "loss": 0.5058, + "step": 7022 + }, + { + "epoch": 1.9655751469353484, + "grad_norm": 0.22858794716313705, + "learning_rate": 2.9146369402276395e-05, + "loss": 0.4738, + "step": 7023 + }, + { + "epoch": 1.9658550237895325, + "grad_norm": 0.2273453226076543, + "learning_rate": 2.913235041423022e-05, + "loss": 0.481, + "step": 7024 + }, + { + "epoch": 1.9661349006437168, + "grad_norm": 0.22862180788730038, + "learning_rate": 2.9118333412378586e-05, + "loss": 0.4565, + "step": 7025 + }, + { + "epoch": 1.966414777497901, + "grad_norm": 0.23703629882129632, + "learning_rate": 2.9104318398055684e-05, + "loss": 0.4687, + "step": 7026 + }, + { + "epoch": 1.9666946543520851, + "grad_norm": 0.21975098352834288, + "learning_rate": 2.9090305372595457e-05, + "loss": 0.4765, + "step": 7027 + }, + { + "epoch": 1.9669745312062692, + "grad_norm": 0.22575644568142314, + "learning_rate": 2.9076294337331666e-05, + "loss": 0.4931, + "step": 7028 + }, + { + "epoch": 1.9672544080604535, + "grad_norm": 0.22853639099213402, + "learning_rate": 2.906228529359789e-05, + "loss": 0.4727, + "step": 7029 + }, + { + "epoch": 1.9675342849146376, + "grad_norm": 0.2315150628974756, + "learning_rate": 2.9048278242727524e-05, + "loss": 0.4725, + "step": 7030 + }, + { + "epoch": 1.9678141617688216, + "grad_norm": 0.2341757688214872, + "learning_rate": 2.9034273186053755e-05, + "loss": 0.4701, + "step": 7031 + }, + { + "epoch": 1.968094038623006, + "grad_norm": 0.23047135269148536, + "learning_rate": 2.902027012490961e-05, + "loss": 0.4831, + "step": 7032 + }, + { + "epoch": 1.96837391547719, + "grad_norm": 0.2222554430448998, + "learning_rate": 2.900626906062789e-05, + "loss": 0.487, + "step": 7033 + }, + { + "epoch": 1.968653792331374, + "grad_norm": 0.22936272671328484, + "learning_rate": 2.8992269994541233e-05, + "loss": 0.4701, + "step": 7034 + }, + { + "epoch": 1.9689336691855583, + "grad_norm": 0.23650867361925126, + "learning_rate": 2.897827292798207e-05, + "loss": 0.4626, + "step": 7035 + }, + { + "epoch": 1.9692135460397426, + "grad_norm": 0.23024960992005195, + "learning_rate": 2.8964277862282664e-05, + "loss": 0.4765, + "step": 7036 + }, + { + "epoch": 1.9694934228939267, + "grad_norm": 0.2217600833787514, + "learning_rate": 2.8950284798775064e-05, + "loss": 0.4772, + "step": 7037 + }, + { + "epoch": 1.9697732997481108, + "grad_norm": 0.23007353371239403, + "learning_rate": 2.8936293738791132e-05, + "loss": 0.4622, + "step": 7038 + }, + { + "epoch": 1.970053176602295, + "grad_norm": 0.22376176935042869, + "learning_rate": 2.892230468366256e-05, + "loss": 0.4852, + "step": 7039 + }, + { + "epoch": 1.9703330534564791, + "grad_norm": 0.23936365390929332, + "learning_rate": 2.8908317634720845e-05, + "loss": 0.4987, + "step": 7040 + }, + { + "epoch": 1.9706129303106632, + "grad_norm": 0.22464425664828297, + "learning_rate": 2.889433259329724e-05, + "loss": 0.4837, + "step": 7041 + }, + { + "epoch": 1.9708928071648475, + "grad_norm": 0.23670794125702688, + "learning_rate": 2.888034956072285e-05, + "loss": 0.4919, + "step": 7042 + }, + { + "epoch": 1.9711726840190318, + "grad_norm": 0.22692560651586183, + "learning_rate": 2.8866368538328636e-05, + "loss": 0.4967, + "step": 7043 + }, + { + "epoch": 1.9714525608732156, + "grad_norm": 0.22788833578279027, + "learning_rate": 2.885238952744529e-05, + "loss": 0.4787, + "step": 7044 + }, + { + "epoch": 1.9717324377274, + "grad_norm": 0.2433319210610914, + "learning_rate": 2.883841252940335e-05, + "loss": 0.5063, + "step": 7045 + }, + { + "epoch": 1.9720123145815842, + "grad_norm": 0.22761454388568558, + "learning_rate": 2.8824437545533144e-05, + "loss": 0.475, + "step": 7046 + }, + { + "epoch": 1.9722921914357683, + "grad_norm": 0.2307141235514982, + "learning_rate": 2.881046457716483e-05, + "loss": 0.4896, + "step": 7047 + }, + { + "epoch": 1.9725720682899524, + "grad_norm": 0.23221708734677798, + "learning_rate": 2.8796493625628356e-05, + "loss": 0.4752, + "step": 7048 + }, + { + "epoch": 1.9728519451441366, + "grad_norm": 0.23345716338046027, + "learning_rate": 2.878252469225349e-05, + "loss": 0.4998, + "step": 7049 + }, + { + "epoch": 1.9731318219983207, + "grad_norm": 0.25912862470150677, + "learning_rate": 2.8768557778369793e-05, + "loss": 0.5138, + "step": 7050 + }, + { + "epoch": 1.9734116988525048, + "grad_norm": 0.2384157264648219, + "learning_rate": 2.875459288530665e-05, + "loss": 0.4774, + "step": 7051 + }, + { + "epoch": 1.973691575706689, + "grad_norm": 0.22621812786139864, + "learning_rate": 2.8740630014393254e-05, + "loss": 0.4582, + "step": 7052 + }, + { + "epoch": 1.9739714525608734, + "grad_norm": 0.23510762856967313, + "learning_rate": 2.8726669166958592e-05, + "loss": 0.5015, + "step": 7053 + }, + { + "epoch": 1.9742513294150572, + "grad_norm": 0.23103924219529073, + "learning_rate": 2.871271034433146e-05, + "loss": 0.4715, + "step": 7054 + }, + { + "epoch": 1.9745312062692415, + "grad_norm": 0.23620001333983057, + "learning_rate": 2.869875354784048e-05, + "loss": 0.486, + "step": 7055 + }, + { + "epoch": 1.9748110831234258, + "grad_norm": 0.234569119182929, + "learning_rate": 2.868479877881406e-05, + "loss": 0.4825, + "step": 7056 + }, + { + "epoch": 1.9750909599776099, + "grad_norm": 0.23410974720603886, + "learning_rate": 2.8670846038580412e-05, + "loss": 0.472, + "step": 7057 + }, + { + "epoch": 1.975370836831794, + "grad_norm": 0.22408059353002907, + "learning_rate": 2.8656895328467603e-05, + "loss": 0.465, + "step": 7058 + }, + { + "epoch": 1.9756507136859782, + "grad_norm": 0.22850538612943877, + "learning_rate": 2.8642946649803425e-05, + "loss": 0.4883, + "step": 7059 + }, + { + "epoch": 1.9759305905401623, + "grad_norm": 0.2429574859448099, + "learning_rate": 2.8629000003915518e-05, + "loss": 0.4938, + "step": 7060 + }, + { + "epoch": 1.9762104673943464, + "grad_norm": 0.23163296805534375, + "learning_rate": 2.8615055392131372e-05, + "loss": 0.461, + "step": 7061 + }, + { + "epoch": 1.9764903442485307, + "grad_norm": 0.2355781056223335, + "learning_rate": 2.8601112815778223e-05, + "loss": 0.4767, + "step": 7062 + }, + { + "epoch": 1.976770221102715, + "grad_norm": 0.22809593095417793, + "learning_rate": 2.858717227618314e-05, + "loss": 0.4838, + "step": 7063 + }, + { + "epoch": 1.977050097956899, + "grad_norm": 0.23390999068278936, + "learning_rate": 2.8573233774672975e-05, + "loss": 0.5037, + "step": 7064 + }, + { + "epoch": 1.977329974811083, + "grad_norm": 0.23022170186616017, + "learning_rate": 2.8559297312574417e-05, + "loss": 0.47, + "step": 7065 + }, + { + "epoch": 1.9776098516652674, + "grad_norm": 0.2244910351811928, + "learning_rate": 2.8545362891213944e-05, + "loss": 0.4753, + "step": 7066 + }, + { + "epoch": 1.9778897285194514, + "grad_norm": 0.23452644856574384, + "learning_rate": 2.8531430511917834e-05, + "loss": 0.4863, + "step": 7067 + }, + { + "epoch": 1.9781696053736355, + "grad_norm": 0.23093788826410583, + "learning_rate": 2.8517500176012192e-05, + "loss": 0.5043, + "step": 7068 + }, + { + "epoch": 1.9784494822278198, + "grad_norm": 0.22957059730720628, + "learning_rate": 2.85035718848229e-05, + "loss": 0.4709, + "step": 7069 + }, + { + "epoch": 1.9787293590820039, + "grad_norm": 0.22844945758409066, + "learning_rate": 2.8489645639675672e-05, + "loss": 0.4629, + "step": 7070 + }, + { + "epoch": 1.979009235936188, + "grad_norm": 0.24689877723442868, + "learning_rate": 2.8475721441896008e-05, + "loss": 0.5032, + "step": 7071 + }, + { + "epoch": 1.9792891127903722, + "grad_norm": 0.23283754577445992, + "learning_rate": 2.8461799292809234e-05, + "loss": 0.4743, + "step": 7072 + }, + { + "epoch": 1.9795689896445565, + "grad_norm": 0.23100739927796426, + "learning_rate": 2.8447879193740445e-05, + "loss": 0.4904, + "step": 7073 + }, + { + "epoch": 1.9798488664987406, + "grad_norm": 0.23567442996764618, + "learning_rate": 2.8433961146014588e-05, + "loss": 0.4799, + "step": 7074 + }, + { + "epoch": 1.9801287433529247, + "grad_norm": 0.22812684774489528, + "learning_rate": 2.8420045150956374e-05, + "loss": 0.4875, + "step": 7075 + }, + { + "epoch": 1.980408620207109, + "grad_norm": 0.23086532968492773, + "learning_rate": 2.840613120989037e-05, + "loss": 0.4796, + "step": 7076 + }, + { + "epoch": 1.980688497061293, + "grad_norm": 0.23034140314456664, + "learning_rate": 2.8392219324140835e-05, + "loss": 0.479, + "step": 7077 + }, + { + "epoch": 1.980968373915477, + "grad_norm": 0.2313665761715524, + "learning_rate": 2.8378309495031984e-05, + "loss": 0.4889, + "step": 7078 + }, + { + "epoch": 1.9812482507696614, + "grad_norm": 0.22063695647756695, + "learning_rate": 2.8364401723887735e-05, + "loss": 0.4625, + "step": 7079 + }, + { + "epoch": 1.9815281276238457, + "grad_norm": 0.2521820924444219, + "learning_rate": 2.8350496012031847e-05, + "loss": 0.506, + "step": 7080 + }, + { + "epoch": 1.9818080044780295, + "grad_norm": 0.2285491258933469, + "learning_rate": 2.833659236078786e-05, + "loss": 0.4606, + "step": 7081 + }, + { + "epoch": 1.9820878813322138, + "grad_norm": 0.23882669110070884, + "learning_rate": 2.832269077147913e-05, + "loss": 0.5018, + "step": 7082 + }, + { + "epoch": 1.982367758186398, + "grad_norm": 0.22985496995197235, + "learning_rate": 2.830879124542884e-05, + "loss": 0.4748, + "step": 7083 + }, + { + "epoch": 1.9826476350405822, + "grad_norm": 0.23402777728462829, + "learning_rate": 2.829489378395993e-05, + "loss": 0.4837, + "step": 7084 + }, + { + "epoch": 1.9829275118947662, + "grad_norm": 0.24467558647557744, + "learning_rate": 2.8280998388395185e-05, + "loss": 0.5006, + "step": 7085 + }, + { + "epoch": 1.9832073887489505, + "grad_norm": 0.22804699437165032, + "learning_rate": 2.826710506005717e-05, + "loss": 0.4617, + "step": 7086 + }, + { + "epoch": 1.9834872656031346, + "grad_norm": 0.2296227757594068, + "learning_rate": 2.8253213800268256e-05, + "loss": 0.4551, + "step": 7087 + }, + { + "epoch": 1.9837671424573187, + "grad_norm": 0.240150003055722, + "learning_rate": 2.8239324610350625e-05, + "loss": 0.4761, + "step": 7088 + }, + { + "epoch": 1.984047019311503, + "grad_norm": 0.23114381456106, + "learning_rate": 2.822543749162626e-05, + "loss": 0.4639, + "step": 7089 + }, + { + "epoch": 1.9843268961656872, + "grad_norm": 0.24736307218798204, + "learning_rate": 2.8211552445416946e-05, + "loss": 0.4985, + "step": 7090 + }, + { + "epoch": 1.984606773019871, + "grad_norm": 0.22653634632324202, + "learning_rate": 2.8197669473044257e-05, + "loss": 0.4674, + "step": 7091 + }, + { + "epoch": 1.9848866498740554, + "grad_norm": 0.2417910132935196, + "learning_rate": 2.8183788575829596e-05, + "loss": 0.4839, + "step": 7092 + }, + { + "epoch": 1.9851665267282397, + "grad_norm": 0.22664370137669457, + "learning_rate": 2.816990975509415e-05, + "loss": 0.4805, + "step": 7093 + }, + { + "epoch": 1.9854464035824237, + "grad_norm": 0.22736144605270978, + "learning_rate": 2.815603301215891e-05, + "loss": 0.4806, + "step": 7094 + }, + { + "epoch": 1.9857262804366078, + "grad_norm": 0.2369043453440284, + "learning_rate": 2.8142158348344673e-05, + "loss": 0.4734, + "step": 7095 + }, + { + "epoch": 1.986006157290792, + "grad_norm": 0.2442381808375003, + "learning_rate": 2.812828576497204e-05, + "loss": 0.4869, + "step": 7096 + }, + { + "epoch": 1.9862860341449762, + "grad_norm": 0.2382920120174186, + "learning_rate": 2.8114415263361416e-05, + "loss": 0.4851, + "step": 7097 + }, + { + "epoch": 1.9865659109991602, + "grad_norm": 0.23822704343586873, + "learning_rate": 2.8100546844832988e-05, + "loss": 0.4821, + "step": 7098 + }, + { + "epoch": 1.9868457878533445, + "grad_norm": 0.22946593045084673, + "learning_rate": 2.8086680510706774e-05, + "loss": 0.5088, + "step": 7099 + }, + { + "epoch": 1.9871256647075288, + "grad_norm": 0.2314498711980787, + "learning_rate": 2.807281626230257e-05, + "loss": 0.4606, + "step": 7100 + }, + { + "epoch": 1.987405541561713, + "grad_norm": 0.2249556893406552, + "learning_rate": 2.8058954100939992e-05, + "loss": 0.4815, + "step": 7101 + }, + { + "epoch": 1.987685418415897, + "grad_norm": 0.23023146470575578, + "learning_rate": 2.8045094027938447e-05, + "loss": 0.5015, + "step": 7102 + }, + { + "epoch": 1.9879652952700813, + "grad_norm": 0.22364453842125873, + "learning_rate": 2.8031236044617137e-05, + "loss": 0.4721, + "step": 7103 + }, + { + "epoch": 1.9882451721242653, + "grad_norm": 0.23806221721710097, + "learning_rate": 2.801738015229507e-05, + "loss": 0.4975, + "step": 7104 + }, + { + "epoch": 1.9885250489784494, + "grad_norm": 0.2243512238062485, + "learning_rate": 2.8003526352291077e-05, + "loss": 0.4773, + "step": 7105 + }, + { + "epoch": 1.9888049258326337, + "grad_norm": 0.23878315834031513, + "learning_rate": 2.7989674645923747e-05, + "loss": 0.4997, + "step": 7106 + }, + { + "epoch": 1.9890848026868178, + "grad_norm": 0.22136624159924373, + "learning_rate": 2.797582503451151e-05, + "loss": 0.4785, + "step": 7107 + }, + { + "epoch": 1.9893646795410018, + "grad_norm": 0.22851727181910395, + "learning_rate": 2.7961977519372575e-05, + "loss": 0.4656, + "step": 7108 + }, + { + "epoch": 1.9896445563951861, + "grad_norm": 0.22676215976761419, + "learning_rate": 2.7948132101824946e-05, + "loss": 0.4689, + "step": 7109 + }, + { + "epoch": 1.9899244332493704, + "grad_norm": 0.22903461841804382, + "learning_rate": 2.7934288783186458e-05, + "loss": 0.496, + "step": 7110 + }, + { + "epoch": 1.9902043101035545, + "grad_norm": 0.23399417725757138, + "learning_rate": 2.7920447564774704e-05, + "loss": 0.4984, + "step": 7111 + }, + { + "epoch": 1.9904841869577385, + "grad_norm": 0.23468405703801676, + "learning_rate": 2.7906608447907113e-05, + "loss": 0.4789, + "step": 7112 + }, + { + "epoch": 1.9907640638119228, + "grad_norm": 0.22461133254682347, + "learning_rate": 2.78927714339009e-05, + "loss": 0.4832, + "step": 7113 + }, + { + "epoch": 1.991043940666107, + "grad_norm": 0.2361043309281983, + "learning_rate": 2.7878936524073074e-05, + "loss": 0.4804, + "step": 7114 + }, + { + "epoch": 1.991323817520291, + "grad_norm": 0.24284029579036037, + "learning_rate": 2.786510371974045e-05, + "loss": 0.4745, + "step": 7115 + }, + { + "epoch": 1.9916036943744753, + "grad_norm": 0.22604323427116058, + "learning_rate": 2.7851273022219644e-05, + "loss": 0.4926, + "step": 7116 + }, + { + "epoch": 1.9918835712286596, + "grad_norm": 0.22322252295804929, + "learning_rate": 2.7837444432827066e-05, + "loss": 0.478, + "step": 7117 + }, + { + "epoch": 1.9921634480828434, + "grad_norm": 0.23382189009195833, + "learning_rate": 2.7823617952878932e-05, + "loss": 0.4776, + "step": 7118 + }, + { + "epoch": 1.9924433249370277, + "grad_norm": 0.22790903388172398, + "learning_rate": 2.7809793583691258e-05, + "loss": 0.4936, + "step": 7119 + }, + { + "epoch": 1.992723201791212, + "grad_norm": 0.23520628706779156, + "learning_rate": 2.779597132657985e-05, + "loss": 0.5012, + "step": 7120 + }, + { + "epoch": 1.993003078645396, + "grad_norm": 0.2262907996432626, + "learning_rate": 2.7782151182860318e-05, + "loss": 0.4829, + "step": 7121 + }, + { + "epoch": 1.9932829554995801, + "grad_norm": 0.2193334470655366, + "learning_rate": 2.7768333153848075e-05, + "loss": 0.4855, + "step": 7122 + }, + { + "epoch": 1.9935628323537644, + "grad_norm": 0.2197528998059351, + "learning_rate": 2.7754517240858325e-05, + "loss": 0.4629, + "step": 7123 + }, + { + "epoch": 1.9938427092079485, + "grad_norm": 0.22762622975934896, + "learning_rate": 2.7740703445206072e-05, + "loss": 0.4816, + "step": 7124 + }, + { + "epoch": 1.9941225860621326, + "grad_norm": 0.22736377592299664, + "learning_rate": 2.7726891768206132e-05, + "loss": 0.491, + "step": 7125 + }, + { + "epoch": 1.9944024629163168, + "grad_norm": 0.2921883336014387, + "learning_rate": 2.771308221117309e-05, + "loss": 0.4808, + "step": 7126 + }, + { + "epoch": 1.9946823397705011, + "grad_norm": 0.2321603110306611, + "learning_rate": 2.7699274775421363e-05, + "loss": 0.4787, + "step": 7127 + }, + { + "epoch": 1.994962216624685, + "grad_norm": 0.2341763652512763, + "learning_rate": 2.7685469462265144e-05, + "loss": 0.5024, + "step": 7128 + }, + { + "epoch": 1.9952420934788693, + "grad_norm": 0.2262431486301722, + "learning_rate": 2.7671666273018433e-05, + "loss": 0.4699, + "step": 7129 + }, + { + "epoch": 1.9955219703330536, + "grad_norm": 0.2292033135830934, + "learning_rate": 2.7657865208995025e-05, + "loss": 0.4998, + "step": 7130 + }, + { + "epoch": 1.9958018471872376, + "grad_norm": 0.22982800104503978, + "learning_rate": 2.7644066271508506e-05, + "loss": 0.4826, + "step": 7131 + }, + { + "epoch": 1.9960817240414217, + "grad_norm": 0.23537974193570985, + "learning_rate": 2.763026946187228e-05, + "loss": 0.4996, + "step": 7132 + }, + { + "epoch": 1.996361600895606, + "grad_norm": 0.2198193987375018, + "learning_rate": 2.7616474781399526e-05, + "loss": 0.4559, + "step": 7133 + }, + { + "epoch": 1.99664147774979, + "grad_norm": 0.22833082201651922, + "learning_rate": 2.7602682231403228e-05, + "loss": 0.4969, + "step": 7134 + }, + { + "epoch": 1.9969213546039741, + "grad_norm": 0.23158821295374815, + "learning_rate": 2.758889181319617e-05, + "loss": 0.5089, + "step": 7135 + }, + { + "epoch": 1.9972012314581584, + "grad_norm": 0.2372044565687655, + "learning_rate": 2.7575103528090935e-05, + "loss": 0.4648, + "step": 7136 + }, + { + "epoch": 1.9974811083123427, + "grad_norm": 0.2340136785012341, + "learning_rate": 2.7561317377399897e-05, + "loss": 0.4687, + "step": 7137 + }, + { + "epoch": 1.9977609851665268, + "grad_norm": 0.23109783645478796, + "learning_rate": 2.7547533362435234e-05, + "loss": 0.5156, + "step": 7138 + }, + { + "epoch": 1.9980408620207109, + "grad_norm": 0.2159904106190594, + "learning_rate": 2.7533751484508907e-05, + "loss": 0.4507, + "step": 7139 + }, + { + "epoch": 1.9983207388748951, + "grad_norm": 0.2411622800166489, + "learning_rate": 2.751997174493269e-05, + "loss": 0.483, + "step": 7140 + }, + { + "epoch": 1.9986006157290792, + "grad_norm": 0.2316522269934337, + "learning_rate": 2.750619414501815e-05, + "loss": 0.4801, + "step": 7141 + }, + { + "epoch": 1.9988804925832633, + "grad_norm": 0.24186152949997194, + "learning_rate": 2.7492418686076644e-05, + "loss": 0.4845, + "step": 7142 + }, + { + "epoch": 1.9991603694374476, + "grad_norm": 0.23094396636398243, + "learning_rate": 2.747864536941932e-05, + "loss": 0.4713, + "step": 7143 + }, + { + "epoch": 1.9994402462916316, + "grad_norm": 0.23730511172168406, + "learning_rate": 2.746487419635714e-05, + "loss": 0.4823, + "step": 7144 + }, + { + "epoch": 1.9997201231458157, + "grad_norm": 0.2377301352358611, + "learning_rate": 2.745110516820084e-05, + "loss": 0.4704, + "step": 7145 + }, + { + "epoch": 2.0, + "grad_norm": 0.2342240748180366, + "learning_rate": 2.743733828626097e-05, + "loss": 0.4657, + "step": 7146 + }, + { + "epoch": 2.0002798768541843, + "grad_norm": 0.23536629569011525, + "learning_rate": 2.742357355184788e-05, + "loss": 0.4528, + "step": 7147 + }, + { + "epoch": 2.000559753708368, + "grad_norm": 0.24180568723549165, + "learning_rate": 2.7409810966271687e-05, + "loss": 0.4775, + "step": 7148 + }, + { + "epoch": 2.0008396305625524, + "grad_norm": 0.2266540329366163, + "learning_rate": 2.7396050530842338e-05, + "loss": 0.4431, + "step": 7149 + }, + { + "epoch": 2.0011195074167367, + "grad_norm": 0.2318668980184379, + "learning_rate": 2.7382292246869547e-05, + "loss": 0.4576, + "step": 7150 + }, + { + "epoch": 2.001399384270921, + "grad_norm": 0.23099092697837148, + "learning_rate": 2.7368536115662846e-05, + "loss": 0.4608, + "step": 7151 + }, + { + "epoch": 2.001679261125105, + "grad_norm": 0.2289346802474108, + "learning_rate": 2.7354782138531536e-05, + "loss": 0.4628, + "step": 7152 + }, + { + "epoch": 2.001959137979289, + "grad_norm": 0.23500463684622683, + "learning_rate": 2.7341030316784742e-05, + "loss": 0.4529, + "step": 7153 + }, + { + "epoch": 2.0022390148334734, + "grad_norm": 0.23324874847693192, + "learning_rate": 2.732728065173136e-05, + "loss": 0.4544, + "step": 7154 + }, + { + "epoch": 2.0025188916876573, + "grad_norm": 0.24384206284288545, + "learning_rate": 2.7313533144680104e-05, + "loss": 0.468, + "step": 7155 + }, + { + "epoch": 2.0027987685418416, + "grad_norm": 0.24620629471749755, + "learning_rate": 2.7299787796939456e-05, + "loss": 0.4621, + "step": 7156 + }, + { + "epoch": 2.003078645396026, + "grad_norm": 0.2491240336762495, + "learning_rate": 2.7286044609817718e-05, + "loss": 0.4462, + "step": 7157 + }, + { + "epoch": 2.0033585222502097, + "grad_norm": 0.23485142680080093, + "learning_rate": 2.727230358462296e-05, + "loss": 0.4477, + "step": 7158 + }, + { + "epoch": 2.003638399104394, + "grad_norm": 0.26540711606896583, + "learning_rate": 2.725856472266307e-05, + "loss": 0.4732, + "step": 7159 + }, + { + "epoch": 2.0039182759585783, + "grad_norm": 0.2951729471292445, + "learning_rate": 2.7244828025245716e-05, + "loss": 0.4654, + "step": 7160 + }, + { + "epoch": 2.0041981528127626, + "grad_norm": 0.24255185428714476, + "learning_rate": 2.7231093493678373e-05, + "loss": 0.4441, + "step": 7161 + }, + { + "epoch": 2.0044780296669464, + "grad_norm": 0.257612487412905, + "learning_rate": 2.721736112926829e-05, + "loss": 0.4576, + "step": 7162 + }, + { + "epoch": 2.0047579065211307, + "grad_norm": 0.3496534649361711, + "learning_rate": 2.720363093332253e-05, + "loss": 0.4636, + "step": 7163 + }, + { + "epoch": 2.005037783375315, + "grad_norm": 0.281179917706595, + "learning_rate": 2.718990290714794e-05, + "loss": 0.4383, + "step": 7164 + }, + { + "epoch": 2.005317660229499, + "grad_norm": 0.28473189327032133, + "learning_rate": 2.7176177052051153e-05, + "loss": 0.4545, + "step": 7165 + }, + { + "epoch": 2.005597537083683, + "grad_norm": 0.27743126173059895, + "learning_rate": 2.7162453369338614e-05, + "loss": 0.4391, + "step": 7166 + }, + { + "epoch": 2.0058774139378674, + "grad_norm": 0.2429334979008607, + "learning_rate": 2.7148731860316546e-05, + "loss": 0.4569, + "step": 7167 + }, + { + "epoch": 2.0061572907920513, + "grad_norm": 0.2475676642145208, + "learning_rate": 2.7135012526290972e-05, + "loss": 0.4769, + "step": 7168 + }, + { + "epoch": 2.0064371676462356, + "grad_norm": 0.2565373673679602, + "learning_rate": 2.7121295368567702e-05, + "loss": 0.4845, + "step": 7169 + }, + { + "epoch": 2.00671704450042, + "grad_norm": 0.24954284048023126, + "learning_rate": 2.7107580388452335e-05, + "loss": 0.4566, + "step": 7170 + }, + { + "epoch": 2.006996921354604, + "grad_norm": 0.25129636868291216, + "learning_rate": 2.7093867587250288e-05, + "loss": 0.448, + "step": 7171 + }, + { + "epoch": 2.007276798208788, + "grad_norm": 0.2369952002730348, + "learning_rate": 2.7080156966266745e-05, + "loss": 0.4562, + "step": 7172 + }, + { + "epoch": 2.0075566750629723, + "grad_norm": 0.25261368158797665, + "learning_rate": 2.7066448526806697e-05, + "loss": 0.4723, + "step": 7173 + }, + { + "epoch": 2.0078365519171566, + "grad_norm": 0.23919265204567164, + "learning_rate": 2.7052742270174902e-05, + "loss": 0.4399, + "step": 7174 + }, + { + "epoch": 2.0081164287713404, + "grad_norm": 0.2402531935253443, + "learning_rate": 2.703903819767595e-05, + "loss": 0.4544, + "step": 7175 + }, + { + "epoch": 2.0083963056255247, + "grad_norm": 0.24190698892130647, + "learning_rate": 2.702533631061419e-05, + "loss": 0.4616, + "step": 7176 + }, + { + "epoch": 2.008676182479709, + "grad_norm": 0.2584528292180475, + "learning_rate": 2.701163661029379e-05, + "loss": 0.4507, + "step": 7177 + }, + { + "epoch": 2.008956059333893, + "grad_norm": 0.243630883588506, + "learning_rate": 2.6997939098018678e-05, + "loss": 0.4453, + "step": 7178 + }, + { + "epoch": 2.009235936188077, + "grad_norm": 0.24893016839360596, + "learning_rate": 2.698424377509259e-05, + "loss": 0.4522, + "step": 7179 + }, + { + "epoch": 2.0095158130422615, + "grad_norm": 0.24682927072592956, + "learning_rate": 2.697055064281907e-05, + "loss": 0.4692, + "step": 7180 + }, + { + "epoch": 2.0097956898964457, + "grad_norm": 0.2440123746660851, + "learning_rate": 2.6956859702501426e-05, + "loss": 0.4402, + "step": 7181 + }, + { + "epoch": 2.0100755667506296, + "grad_norm": 0.27723488664785007, + "learning_rate": 2.6943170955442774e-05, + "loss": 0.4522, + "step": 7182 + }, + { + "epoch": 2.010355443604814, + "grad_norm": 0.23837291179707326, + "learning_rate": 2.6929484402946014e-05, + "loss": 0.4645, + "step": 7183 + }, + { + "epoch": 2.010635320458998, + "grad_norm": 0.2267454845076299, + "learning_rate": 2.6915800046313848e-05, + "loss": 0.4598, + "step": 7184 + }, + { + "epoch": 2.010915197313182, + "grad_norm": 0.23354531967932965, + "learning_rate": 2.6902117886848755e-05, + "loss": 0.4606, + "step": 7185 + }, + { + "epoch": 2.0111950741673663, + "grad_norm": 0.23231625117562205, + "learning_rate": 2.6888437925853005e-05, + "loss": 0.443, + "step": 7186 + }, + { + "epoch": 2.0114749510215506, + "grad_norm": 0.2409498444756786, + "learning_rate": 2.6874760164628666e-05, + "loss": 0.4631, + "step": 7187 + }, + { + "epoch": 2.011754827875735, + "grad_norm": 0.247697741758576, + "learning_rate": 2.6861084604477604e-05, + "loss": 0.4608, + "step": 7188 + }, + { + "epoch": 2.0120347047299187, + "grad_norm": 0.23719228635563858, + "learning_rate": 2.684741124670146e-05, + "loss": 0.4608, + "step": 7189 + }, + { + "epoch": 2.012314581584103, + "grad_norm": 0.24805011363637294, + "learning_rate": 2.6833740092601673e-05, + "loss": 0.4532, + "step": 7190 + }, + { + "epoch": 2.0125944584382873, + "grad_norm": 0.24152105620775793, + "learning_rate": 2.6820071143479468e-05, + "loss": 0.4497, + "step": 7191 + }, + { + "epoch": 2.012874335292471, + "grad_norm": 0.23978609357587777, + "learning_rate": 2.680640440063587e-05, + "loss": 0.4353, + "step": 7192 + }, + { + "epoch": 2.0131542121466555, + "grad_norm": 0.23870573736222256, + "learning_rate": 2.679273986537168e-05, + "loss": 0.4674, + "step": 7193 + }, + { + "epoch": 2.0134340890008398, + "grad_norm": 0.2458402418123988, + "learning_rate": 2.67790775389875e-05, + "loss": 0.4521, + "step": 7194 + }, + { + "epoch": 2.0137139658550236, + "grad_norm": 0.23847230539227382, + "learning_rate": 2.676541742278372e-05, + "loss": 0.4531, + "step": 7195 + }, + { + "epoch": 2.013993842709208, + "grad_norm": 0.23306581560973855, + "learning_rate": 2.675175951806051e-05, + "loss": 0.4339, + "step": 7196 + }, + { + "epoch": 2.014273719563392, + "grad_norm": 0.25087566501309366, + "learning_rate": 2.6738103826117843e-05, + "loss": 0.4546, + "step": 7197 + }, + { + "epoch": 2.0145535964175765, + "grad_norm": 0.24016871065332204, + "learning_rate": 2.6724450348255477e-05, + "loss": 0.4685, + "step": 7198 + }, + { + "epoch": 2.0148334732717603, + "grad_norm": 0.23767573246241336, + "learning_rate": 2.6710799085772954e-05, + "loss": 0.4372, + "step": 7199 + }, + { + "epoch": 2.0151133501259446, + "grad_norm": 0.24510462731356397, + "learning_rate": 2.6697150039969603e-05, + "loss": 0.4467, + "step": 7200 + }, + { + "epoch": 2.015393226980129, + "grad_norm": 0.2546874855617003, + "learning_rate": 2.6683503212144563e-05, + "loss": 0.4463, + "step": 7201 + }, + { + "epoch": 2.0156731038343128, + "grad_norm": 0.23618363043472076, + "learning_rate": 2.666985860359673e-05, + "loss": 0.4515, + "step": 7202 + }, + { + "epoch": 2.015952980688497, + "grad_norm": 0.2528679248266655, + "learning_rate": 2.6656216215624818e-05, + "loss": 0.455, + "step": 7203 + }, + { + "epoch": 2.0162328575426813, + "grad_norm": 0.2460945714151176, + "learning_rate": 2.6642576049527313e-05, + "loss": 0.4576, + "step": 7204 + }, + { + "epoch": 2.016512734396865, + "grad_norm": 0.24045379737398392, + "learning_rate": 2.6628938106602497e-05, + "loss": 0.4547, + "step": 7205 + }, + { + "epoch": 2.0167926112510495, + "grad_norm": 0.2431107303219301, + "learning_rate": 2.6615302388148428e-05, + "loss": 0.4724, + "step": 7206 + }, + { + "epoch": 2.0170724881052338, + "grad_norm": 0.2569350048873161, + "learning_rate": 2.6601668895462973e-05, + "loss": 0.4745, + "step": 7207 + }, + { + "epoch": 2.017352364959418, + "grad_norm": 0.2500396671786939, + "learning_rate": 2.658803762984376e-05, + "loss": 0.4739, + "step": 7208 + }, + { + "epoch": 2.017632241813602, + "grad_norm": 0.2399155743260989, + "learning_rate": 2.6574408592588234e-05, + "loss": 0.4272, + "step": 7209 + }, + { + "epoch": 2.017912118667786, + "grad_norm": 0.25007912665628107, + "learning_rate": 2.656078178499361e-05, + "loss": 0.4611, + "step": 7210 + }, + { + "epoch": 2.0181919955219705, + "grad_norm": 0.23622471689458255, + "learning_rate": 2.65471572083569e-05, + "loss": 0.4584, + "step": 7211 + }, + { + "epoch": 2.0184718723761543, + "grad_norm": 0.2359639700080925, + "learning_rate": 2.6533534863974886e-05, + "loss": 0.4767, + "step": 7212 + }, + { + "epoch": 2.0187517492303386, + "grad_norm": 0.2418095161208412, + "learning_rate": 2.6519914753144158e-05, + "loss": 0.4571, + "step": 7213 + }, + { + "epoch": 2.019031626084523, + "grad_norm": 0.25041641986788166, + "learning_rate": 2.6506296877161092e-05, + "loss": 0.4662, + "step": 7214 + }, + { + "epoch": 2.0193115029387068, + "grad_norm": 0.23799631436914484, + "learning_rate": 2.6492681237321836e-05, + "loss": 0.4542, + "step": 7215 + }, + { + "epoch": 2.019591379792891, + "grad_norm": 0.2357176967359638, + "learning_rate": 2.647906783492234e-05, + "loss": 0.4636, + "step": 7216 + }, + { + "epoch": 2.0198712566470753, + "grad_norm": 0.2361599119201025, + "learning_rate": 2.6465456671258333e-05, + "loss": 0.4482, + "step": 7217 + }, + { + "epoch": 2.0201511335012596, + "grad_norm": 0.24568280575614648, + "learning_rate": 2.645184774762533e-05, + "loss": 0.4613, + "step": 7218 + }, + { + "epoch": 2.0204310103554435, + "grad_norm": 0.23437213348064267, + "learning_rate": 2.6438241065318637e-05, + "loss": 0.4443, + "step": 7219 + }, + { + "epoch": 2.0207108872096278, + "grad_norm": 0.23810477968970578, + "learning_rate": 2.6424636625633337e-05, + "loss": 0.444, + "step": 7220 + }, + { + "epoch": 2.020990764063812, + "grad_norm": 0.24182078537158005, + "learning_rate": 2.6411034429864347e-05, + "loss": 0.4608, + "step": 7221 + }, + { + "epoch": 2.021270640917996, + "grad_norm": 0.5722824858401844, + "learning_rate": 2.6397434479306294e-05, + "loss": 0.4622, + "step": 7222 + }, + { + "epoch": 2.02155051777218, + "grad_norm": 0.24109465311373007, + "learning_rate": 2.638383677525363e-05, + "loss": 0.4527, + "step": 7223 + }, + { + "epoch": 2.0218303946263645, + "grad_norm": 0.23539187624819047, + "learning_rate": 2.63702413190006e-05, + "loss": 0.4535, + "step": 7224 + }, + { + "epoch": 2.022110271480549, + "grad_norm": 0.23084401150477463, + "learning_rate": 2.635664811184123e-05, + "loss": 0.4645, + "step": 7225 + }, + { + "epoch": 2.0223901483347326, + "grad_norm": 0.24562055324409324, + "learning_rate": 2.6343057155069328e-05, + "loss": 0.4832, + "step": 7226 + }, + { + "epoch": 2.022670025188917, + "grad_norm": 0.2333027891352052, + "learning_rate": 2.632946844997849e-05, + "loss": 0.4489, + "step": 7227 + }, + { + "epoch": 2.022949902043101, + "grad_norm": 0.2432113946363506, + "learning_rate": 2.6315881997862086e-05, + "loss": 0.4549, + "step": 7228 + }, + { + "epoch": 2.023229778897285, + "grad_norm": 0.23477990070633872, + "learning_rate": 2.6302297800013297e-05, + "loss": 0.4668, + "step": 7229 + }, + { + "epoch": 2.0235096557514693, + "grad_norm": 0.2329525268170207, + "learning_rate": 2.6288715857725067e-05, + "loss": 0.463, + "step": 7230 + }, + { + "epoch": 2.0237895326056536, + "grad_norm": 0.23645176432539472, + "learning_rate": 2.6275136172290127e-05, + "loss": 0.4549, + "step": 7231 + }, + { + "epoch": 2.0240694094598375, + "grad_norm": 0.2256120580521212, + "learning_rate": 2.626155874500101e-05, + "loss": 0.4532, + "step": 7232 + }, + { + "epoch": 2.024349286314022, + "grad_norm": 0.24494509486609295, + "learning_rate": 2.6247983577150016e-05, + "loss": 0.4415, + "step": 7233 + }, + { + "epoch": 2.024629163168206, + "grad_norm": 0.25558526117028796, + "learning_rate": 2.6234410670029243e-05, + "loss": 0.4493, + "step": 7234 + }, + { + "epoch": 2.0249090400223904, + "grad_norm": 0.25373525330697044, + "learning_rate": 2.622084002493056e-05, + "loss": 0.4666, + "step": 7235 + }, + { + "epoch": 2.025188916876574, + "grad_norm": 0.24092647473756687, + "learning_rate": 2.6207271643145635e-05, + "loss": 0.463, + "step": 7236 + }, + { + "epoch": 2.0254687937307585, + "grad_norm": 0.2398350079466824, + "learning_rate": 2.619370552596592e-05, + "loss": 0.4568, + "step": 7237 + }, + { + "epoch": 2.025748670584943, + "grad_norm": 0.2402813065093519, + "learning_rate": 2.6180141674682612e-05, + "loss": 0.454, + "step": 7238 + }, + { + "epoch": 2.0260285474391266, + "grad_norm": 0.24515891879724147, + "learning_rate": 2.616658009058679e-05, + "loss": 0.4659, + "step": 7239 + }, + { + "epoch": 2.026308424293311, + "grad_norm": 0.25237161543010617, + "learning_rate": 2.61530207749692e-05, + "loss": 0.4639, + "step": 7240 + }, + { + "epoch": 2.026588301147495, + "grad_norm": 0.2428784920326867, + "learning_rate": 2.613946372912044e-05, + "loss": 0.4443, + "step": 7241 + }, + { + "epoch": 2.026868178001679, + "grad_norm": 0.24933667845887866, + "learning_rate": 2.6125908954330868e-05, + "loss": 0.4591, + "step": 7242 + }, + { + "epoch": 2.0271480548558634, + "grad_norm": 0.2343228016709955, + "learning_rate": 2.611235645189065e-05, + "loss": 0.4512, + "step": 7243 + }, + { + "epoch": 2.0274279317100476, + "grad_norm": 0.2387259647127503, + "learning_rate": 2.6098806223089723e-05, + "loss": 0.4852, + "step": 7244 + }, + { + "epoch": 2.027707808564232, + "grad_norm": 0.23711731697612165, + "learning_rate": 2.6085258269217795e-05, + "loss": 0.4327, + "step": 7245 + }, + { + "epoch": 2.027987685418416, + "grad_norm": 0.23941383218364568, + "learning_rate": 2.6071712591564367e-05, + "loss": 0.4418, + "step": 7246 + }, + { + "epoch": 2.0282675622726, + "grad_norm": 0.24611736084243424, + "learning_rate": 2.6058169191418725e-05, + "loss": 0.4651, + "step": 7247 + }, + { + "epoch": 2.0285474391267844, + "grad_norm": 0.2393564650461905, + "learning_rate": 2.6044628070069945e-05, + "loss": 0.4428, + "step": 7248 + }, + { + "epoch": 2.028827315980968, + "grad_norm": 0.24443034810960743, + "learning_rate": 2.603108922880687e-05, + "loss": 0.439, + "step": 7249 + }, + { + "epoch": 2.0291071928351525, + "grad_norm": 0.24455955218091963, + "learning_rate": 2.6017552668918143e-05, + "loss": 0.4614, + "step": 7250 + }, + { + "epoch": 2.029387069689337, + "grad_norm": 0.23920551494992115, + "learning_rate": 2.6004018391692175e-05, + "loss": 0.4391, + "step": 7251 + }, + { + "epoch": 2.0296669465435206, + "grad_norm": 0.23962490949893656, + "learning_rate": 2.599048639841717e-05, + "loss": 0.4405, + "step": 7252 + }, + { + "epoch": 2.029946823397705, + "grad_norm": 0.2461391762251693, + "learning_rate": 2.59769566903811e-05, + "loss": 0.4657, + "step": 7253 + }, + { + "epoch": 2.0302267002518892, + "grad_norm": 0.24859487862695334, + "learning_rate": 2.5963429268871743e-05, + "loss": 0.4527, + "step": 7254 + }, + { + "epoch": 2.0305065771060735, + "grad_norm": 0.23903524348802838, + "learning_rate": 2.5949904135176624e-05, + "loss": 0.4395, + "step": 7255 + }, + { + "epoch": 2.0307864539602574, + "grad_norm": 0.2380227664798421, + "learning_rate": 2.5936381290583112e-05, + "loss": 0.4546, + "step": 7256 + }, + { + "epoch": 2.0310663308144417, + "grad_norm": 0.2474716956010892, + "learning_rate": 2.5922860736378314e-05, + "loss": 0.4415, + "step": 7257 + }, + { + "epoch": 2.031346207668626, + "grad_norm": 0.23970092978536683, + "learning_rate": 2.5909342473849087e-05, + "loss": 0.4496, + "step": 7258 + }, + { + "epoch": 2.03162608452281, + "grad_norm": 0.2493147804591437, + "learning_rate": 2.5895826504282127e-05, + "loss": 0.4514, + "step": 7259 + }, + { + "epoch": 2.031905961376994, + "grad_norm": 0.26862492448438524, + "learning_rate": 2.5882312828963895e-05, + "loss": 0.4567, + "step": 7260 + }, + { + "epoch": 2.0321858382311784, + "grad_norm": 0.2475580247703966, + "learning_rate": 2.5868801449180625e-05, + "loss": 0.4424, + "step": 7261 + }, + { + "epoch": 2.0324657150853622, + "grad_norm": 0.24809876655430108, + "learning_rate": 2.585529236621834e-05, + "loss": 0.4612, + "step": 7262 + }, + { + "epoch": 2.0327455919395465, + "grad_norm": 0.24451421788854538, + "learning_rate": 2.584178558136285e-05, + "loss": 0.4426, + "step": 7263 + }, + { + "epoch": 2.033025468793731, + "grad_norm": 0.23848747788817679, + "learning_rate": 2.582828109589972e-05, + "loss": 0.4627, + "step": 7264 + }, + { + "epoch": 2.033305345647915, + "grad_norm": 0.2349102396252516, + "learning_rate": 2.581477891111433e-05, + "loss": 0.4589, + "step": 7265 + }, + { + "epoch": 2.033585222502099, + "grad_norm": 0.24949051830562546, + "learning_rate": 2.580127902829182e-05, + "loss": 0.4594, + "step": 7266 + }, + { + "epoch": 2.0338650993562832, + "grad_norm": 0.24670318721825205, + "learning_rate": 2.5787781448717112e-05, + "loss": 0.4484, + "step": 7267 + }, + { + "epoch": 2.0341449762104675, + "grad_norm": 0.25083849647804124, + "learning_rate": 2.577428617367492e-05, + "loss": 0.469, + "step": 7268 + }, + { + "epoch": 2.0344248530646514, + "grad_norm": 0.24612589767460558, + "learning_rate": 2.5760793204449735e-05, + "loss": 0.4629, + "step": 7269 + }, + { + "epoch": 2.0347047299188357, + "grad_norm": 0.24557294540946742, + "learning_rate": 2.5747302542325813e-05, + "loss": 0.4792, + "step": 7270 + }, + { + "epoch": 2.03498460677302, + "grad_norm": 0.25051018400826347, + "learning_rate": 2.5733814188587213e-05, + "loss": 0.4522, + "step": 7271 + }, + { + "epoch": 2.0352644836272042, + "grad_norm": 0.24763663644100886, + "learning_rate": 2.5720328144517748e-05, + "loss": 0.4527, + "step": 7272 + }, + { + "epoch": 2.035544360481388, + "grad_norm": 0.25407321940786803, + "learning_rate": 2.570684441140105e-05, + "loss": 0.4803, + "step": 7273 + }, + { + "epoch": 2.0358242373355724, + "grad_norm": 0.2562832117551291, + "learning_rate": 2.5693362990520498e-05, + "loss": 0.4822, + "step": 7274 + }, + { + "epoch": 2.0361041141897567, + "grad_norm": 0.24653770990649573, + "learning_rate": 2.5679883883159283e-05, + "loss": 0.4829, + "step": 7275 + }, + { + "epoch": 2.0363839910439405, + "grad_norm": 0.25026215764599047, + "learning_rate": 2.566640709060032e-05, + "loss": 0.4713, + "step": 7276 + }, + { + "epoch": 2.036663867898125, + "grad_norm": 0.23154260517396444, + "learning_rate": 2.5652932614126345e-05, + "loss": 0.4283, + "step": 7277 + }, + { + "epoch": 2.036943744752309, + "grad_norm": 0.23802200814652452, + "learning_rate": 2.563946045501987e-05, + "loss": 0.4712, + "step": 7278 + }, + { + "epoch": 2.037223621606493, + "grad_norm": 0.24291863177250364, + "learning_rate": 2.5625990614563184e-05, + "loss": 0.4654, + "step": 7279 + }, + { + "epoch": 2.0375034984606772, + "grad_norm": 0.24164486995545642, + "learning_rate": 2.5612523094038355e-05, + "loss": 0.4478, + "step": 7280 + }, + { + "epoch": 2.0377833753148615, + "grad_norm": 0.25548731208347847, + "learning_rate": 2.559905789472723e-05, + "loss": 0.4592, + "step": 7281 + }, + { + "epoch": 2.038063252169046, + "grad_norm": 0.25126792292383143, + "learning_rate": 2.558559501791143e-05, + "loss": 0.4479, + "step": 7282 + }, + { + "epoch": 2.0383431290232297, + "grad_norm": 0.25616064956120344, + "learning_rate": 2.5572134464872364e-05, + "loss": 0.4389, + "step": 7283 + }, + { + "epoch": 2.038623005877414, + "grad_norm": 0.2495229580549435, + "learning_rate": 2.555867623689121e-05, + "loss": 0.451, + "step": 7284 + }, + { + "epoch": 2.0389028827315983, + "grad_norm": 0.24825812008906087, + "learning_rate": 2.554522033524893e-05, + "loss": 0.4718, + "step": 7285 + }, + { + "epoch": 2.039182759585782, + "grad_norm": 0.22993352633384567, + "learning_rate": 2.5531766761226272e-05, + "loss": 0.4377, + "step": 7286 + }, + { + "epoch": 2.0394626364399664, + "grad_norm": 0.23348585502524938, + "learning_rate": 2.5518315516103748e-05, + "loss": 0.4505, + "step": 7287 + }, + { + "epoch": 2.0397425132941507, + "grad_norm": 0.2505855142977987, + "learning_rate": 2.5504866601161652e-05, + "loss": 0.4748, + "step": 7288 + }, + { + "epoch": 2.0400223901483345, + "grad_norm": 0.24675514469301155, + "learning_rate": 2.5491420017680047e-05, + "loss": 0.4576, + "step": 7289 + }, + { + "epoch": 2.040302267002519, + "grad_norm": 0.24383144668329754, + "learning_rate": 2.5477975766938824e-05, + "loss": 0.4498, + "step": 7290 + }, + { + "epoch": 2.040582143856703, + "grad_norm": 0.24710370847883345, + "learning_rate": 2.546453385021759e-05, + "loss": 0.465, + "step": 7291 + }, + { + "epoch": 2.0408620207108874, + "grad_norm": 0.2500316025046973, + "learning_rate": 2.545109426879576e-05, + "loss": 0.4503, + "step": 7292 + }, + { + "epoch": 2.0411418975650713, + "grad_norm": 0.23595454209093394, + "learning_rate": 2.543765702395253e-05, + "loss": 0.4468, + "step": 7293 + }, + { + "epoch": 2.0414217744192555, + "grad_norm": 0.2506616203082804, + "learning_rate": 2.5424222116966844e-05, + "loss": 0.4563, + "step": 7294 + }, + { + "epoch": 2.04170165127344, + "grad_norm": 0.24512988782659414, + "learning_rate": 2.5410789549117447e-05, + "loss": 0.4497, + "step": 7295 + }, + { + "epoch": 2.0419815281276237, + "grad_norm": 0.2442620989671407, + "learning_rate": 2.539735932168287e-05, + "loss": 0.4463, + "step": 7296 + }, + { + "epoch": 2.042261404981808, + "grad_norm": 0.2362726934455694, + "learning_rate": 2.5383931435941394e-05, + "loss": 0.4595, + "step": 7297 + }, + { + "epoch": 2.0425412818359923, + "grad_norm": 0.2446448559543068, + "learning_rate": 2.5370505893171104e-05, + "loss": 0.4667, + "step": 7298 + }, + { + "epoch": 2.042821158690176, + "grad_norm": 0.2424844078232981, + "learning_rate": 2.5357082694649852e-05, + "loss": 0.4636, + "step": 7299 + }, + { + "epoch": 2.0431010355443604, + "grad_norm": 0.24831067498702217, + "learning_rate": 2.5343661841655263e-05, + "loss": 0.4685, + "step": 7300 + }, + { + "epoch": 2.0433809123985447, + "grad_norm": 0.2473059494485125, + "learning_rate": 2.5330243335464737e-05, + "loss": 0.4638, + "step": 7301 + }, + { + "epoch": 2.043660789252729, + "grad_norm": 0.24366057860506135, + "learning_rate": 2.5316827177355464e-05, + "loss": 0.4491, + "step": 7302 + }, + { + "epoch": 2.043940666106913, + "grad_norm": 0.2504327054990302, + "learning_rate": 2.530341336860439e-05, + "loss": 0.4546, + "step": 7303 + }, + { + "epoch": 2.044220542961097, + "grad_norm": 0.24348921913908614, + "learning_rate": 2.5290001910488257e-05, + "loss": 0.4725, + "step": 7304 + }, + { + "epoch": 2.0445004198152814, + "grad_norm": 0.2495963566684416, + "learning_rate": 2.5276592804283573e-05, + "loss": 0.4689, + "step": 7305 + }, + { + "epoch": 2.0447802966694653, + "grad_norm": 0.2546161261898743, + "learning_rate": 2.526318605126663e-05, + "loss": 0.4632, + "step": 7306 + }, + { + "epoch": 2.0450601735236495, + "grad_norm": 0.24076784172964433, + "learning_rate": 2.5249781652713457e-05, + "loss": 0.4594, + "step": 7307 + }, + { + "epoch": 2.045340050377834, + "grad_norm": 0.24357188961456142, + "learning_rate": 2.523637960989994e-05, + "loss": 0.4579, + "step": 7308 + }, + { + "epoch": 2.0456199272320177, + "grad_norm": 0.23903357537864758, + "learning_rate": 2.5222979924101675e-05, + "loss": 0.4742, + "step": 7309 + }, + { + "epoch": 2.045899804086202, + "grad_norm": 0.24550663594381486, + "learning_rate": 2.520958259659405e-05, + "loss": 0.4608, + "step": 7310 + }, + { + "epoch": 2.0461796809403863, + "grad_norm": 0.2583528489198306, + "learning_rate": 2.5196187628652247e-05, + "loss": 0.448, + "step": 7311 + }, + { + "epoch": 2.0464595577945706, + "grad_norm": 0.24482717544201815, + "learning_rate": 2.5182795021551163e-05, + "loss": 0.4677, + "step": 7312 + }, + { + "epoch": 2.0467394346487544, + "grad_norm": 0.25048101638936504, + "learning_rate": 2.5169404776565553e-05, + "loss": 0.465, + "step": 7313 + }, + { + "epoch": 2.0470193115029387, + "grad_norm": 0.24573404057412682, + "learning_rate": 2.5156016894969887e-05, + "loss": 0.4634, + "step": 7314 + }, + { + "epoch": 2.047299188357123, + "grad_norm": 0.25471144676227236, + "learning_rate": 2.5142631378038438e-05, + "loss": 0.4767, + "step": 7315 + }, + { + "epoch": 2.047579065211307, + "grad_norm": 0.24112860647988485, + "learning_rate": 2.5129248227045248e-05, + "loss": 0.457, + "step": 7316 + }, + { + "epoch": 2.047858942065491, + "grad_norm": 0.24418834987738441, + "learning_rate": 2.5115867443264136e-05, + "loss": 0.4351, + "step": 7317 + }, + { + "epoch": 2.0481388189196754, + "grad_norm": 0.2370929090253794, + "learning_rate": 2.510248902796869e-05, + "loss": 0.456, + "step": 7318 + }, + { + "epoch": 2.0484186957738597, + "grad_norm": 0.2379488471734615, + "learning_rate": 2.5089112982432268e-05, + "loss": 0.4582, + "step": 7319 + }, + { + "epoch": 2.0486985726280436, + "grad_norm": 0.24573479537604365, + "learning_rate": 2.5075739307928014e-05, + "loss": 0.4739, + "step": 7320 + }, + { + "epoch": 2.048978449482228, + "grad_norm": 0.25121647950701465, + "learning_rate": 2.5062368005728855e-05, + "loss": 0.4604, + "step": 7321 + }, + { + "epoch": 2.049258326336412, + "grad_norm": 0.246351919919773, + "learning_rate": 2.504899907710746e-05, + "loss": 0.4605, + "step": 7322 + }, + { + "epoch": 2.049538203190596, + "grad_norm": 0.24631304878976065, + "learning_rate": 2.5035632523336293e-05, + "loss": 0.4559, + "step": 7323 + }, + { + "epoch": 2.0498180800447803, + "grad_norm": 0.24343820253286605, + "learning_rate": 2.502226834568758e-05, + "loss": 0.4606, + "step": 7324 + }, + { + "epoch": 2.0500979568989646, + "grad_norm": 0.2654916928538729, + "learning_rate": 2.5008906545433375e-05, + "loss": 0.4518, + "step": 7325 + }, + { + "epoch": 2.0503778337531484, + "grad_norm": 0.24908078981837442, + "learning_rate": 2.4995547123845426e-05, + "loss": 0.4553, + "step": 7326 + }, + { + "epoch": 2.0506577106073327, + "grad_norm": 0.2484741919983697, + "learning_rate": 2.4982190082195293e-05, + "loss": 0.4649, + "step": 7327 + }, + { + "epoch": 2.050937587461517, + "grad_norm": 0.2534526358928263, + "learning_rate": 2.4968835421754316e-05, + "loss": 0.4807, + "step": 7328 + }, + { + "epoch": 2.0512174643157013, + "grad_norm": 0.24992970447352875, + "learning_rate": 2.4955483143793613e-05, + "loss": 0.4661, + "step": 7329 + }, + { + "epoch": 2.051497341169885, + "grad_norm": 0.2326036789805862, + "learning_rate": 2.494213324958402e-05, + "loss": 0.4676, + "step": 7330 + }, + { + "epoch": 2.0517772180240694, + "grad_norm": 0.24315149972019612, + "learning_rate": 2.4928785740396215e-05, + "loss": 0.4444, + "step": 7331 + }, + { + "epoch": 2.0520570948782537, + "grad_norm": 0.2363183917864702, + "learning_rate": 2.4915440617500613e-05, + "loss": 0.4741, + "step": 7332 + }, + { + "epoch": 2.0523369717324376, + "grad_norm": 0.24517539840395006, + "learning_rate": 2.4902097882167415e-05, + "loss": 0.4461, + "step": 7333 + }, + { + "epoch": 2.052616848586622, + "grad_norm": 0.240754716123818, + "learning_rate": 2.488875753566659e-05, + "loss": 0.4702, + "step": 7334 + }, + { + "epoch": 2.052896725440806, + "grad_norm": 0.24546696196290843, + "learning_rate": 2.4875419579267873e-05, + "loss": 0.4443, + "step": 7335 + }, + { + "epoch": 2.05317660229499, + "grad_norm": 0.24602789936627345, + "learning_rate": 2.486208401424079e-05, + "loss": 0.458, + "step": 7336 + }, + { + "epoch": 2.0534564791491743, + "grad_norm": 0.2549588748124507, + "learning_rate": 2.4848750841854616e-05, + "loss": 0.4526, + "step": 7337 + }, + { + "epoch": 2.0537363560033586, + "grad_norm": 0.24879893937875075, + "learning_rate": 2.4835420063378418e-05, + "loss": 0.479, + "step": 7338 + }, + { + "epoch": 2.054016232857543, + "grad_norm": 0.24536255209201727, + "learning_rate": 2.4822091680081018e-05, + "loss": 0.4428, + "step": 7339 + }, + { + "epoch": 2.0542961097117267, + "grad_norm": 0.2400429868181301, + "learning_rate": 2.480876569323103e-05, + "loss": 0.4697, + "step": 7340 + }, + { + "epoch": 2.054575986565911, + "grad_norm": 0.24794095141216924, + "learning_rate": 2.47954421040968e-05, + "loss": 0.4669, + "step": 7341 + }, + { + "epoch": 2.0548558634200953, + "grad_norm": 0.24993591672310664, + "learning_rate": 2.4782120913946523e-05, + "loss": 0.4416, + "step": 7342 + }, + { + "epoch": 2.055135740274279, + "grad_norm": 0.24633001747357577, + "learning_rate": 2.4768802124048085e-05, + "loss": 0.4632, + "step": 7343 + }, + { + "epoch": 2.0554156171284634, + "grad_norm": 0.2444112442902117, + "learning_rate": 2.4755485735669188e-05, + "loss": 0.4499, + "step": 7344 + }, + { + "epoch": 2.0556954939826477, + "grad_norm": 0.24607674096120183, + "learning_rate": 2.4742171750077286e-05, + "loss": 0.4681, + "step": 7345 + }, + { + "epoch": 2.0559753708368316, + "grad_norm": 0.2650090843559069, + "learning_rate": 2.4728860168539618e-05, + "loss": 0.4784, + "step": 7346 + }, + { + "epoch": 2.056255247691016, + "grad_norm": 0.24861802624393378, + "learning_rate": 2.47155509923232e-05, + "loss": 0.4575, + "step": 7347 + }, + { + "epoch": 2.0565351245452, + "grad_norm": 0.24588781256107936, + "learning_rate": 2.4702244222694776e-05, + "loss": 0.4609, + "step": 7348 + }, + { + "epoch": 2.0568150013993844, + "grad_norm": 0.24159597154506285, + "learning_rate": 2.46889398609209e-05, + "loss": 0.437, + "step": 7349 + }, + { + "epoch": 2.0570948782535683, + "grad_norm": 0.2471740579027645, + "learning_rate": 2.4675637908267903e-05, + "loss": 0.4278, + "step": 7350 + }, + { + "epoch": 2.0573747551077526, + "grad_norm": 0.2409864273656536, + "learning_rate": 2.466233836600186e-05, + "loss": 0.4491, + "step": 7351 + }, + { + "epoch": 2.057654631961937, + "grad_norm": 0.24855932017320295, + "learning_rate": 2.4649041235388633e-05, + "loss": 0.4622, + "step": 7352 + }, + { + "epoch": 2.0579345088161207, + "grad_norm": 0.24838985966907967, + "learning_rate": 2.4635746517693853e-05, + "loss": 0.482, + "step": 7353 + }, + { + "epoch": 2.058214385670305, + "grad_norm": 0.24954842637061003, + "learning_rate": 2.4622454214182917e-05, + "loss": 0.4653, + "step": 7354 + }, + { + "epoch": 2.0584942625244893, + "grad_norm": 0.23712606161658334, + "learning_rate": 2.4609164326120986e-05, + "loss": 0.4565, + "step": 7355 + }, + { + "epoch": 2.0587741393786736, + "grad_norm": 0.2423155389090088, + "learning_rate": 2.4595876854773016e-05, + "loss": 0.4532, + "step": 7356 + }, + { + "epoch": 2.0590540162328574, + "grad_norm": 0.2375487504413494, + "learning_rate": 2.4582591801403705e-05, + "loss": 0.4621, + "step": 7357 + }, + { + "epoch": 2.0593338930870417, + "grad_norm": 0.24704218164447447, + "learning_rate": 2.4569309167277528e-05, + "loss": 0.4706, + "step": 7358 + }, + { + "epoch": 2.059613769941226, + "grad_norm": 0.26403400576395336, + "learning_rate": 2.4556028953658722e-05, + "loss": 0.4841, + "step": 7359 + }, + { + "epoch": 2.05989364679541, + "grad_norm": 0.24067560034869967, + "learning_rate": 2.454275116181134e-05, + "loss": 0.4509, + "step": 7360 + }, + { + "epoch": 2.060173523649594, + "grad_norm": 0.2390827251944361, + "learning_rate": 2.452947579299915e-05, + "loss": 0.4659, + "step": 7361 + }, + { + "epoch": 2.0604534005037785, + "grad_norm": 0.2404377544195593, + "learning_rate": 2.4516202848485713e-05, + "loss": 0.4399, + "step": 7362 + }, + { + "epoch": 2.0607332773579623, + "grad_norm": 0.24907212792594013, + "learning_rate": 2.4502932329534356e-05, + "loss": 0.4732, + "step": 7363 + }, + { + "epoch": 2.0610131542121466, + "grad_norm": 0.23203863169907962, + "learning_rate": 2.4489664237408165e-05, + "loss": 0.4568, + "step": 7364 + }, + { + "epoch": 2.061293031066331, + "grad_norm": 0.2501366444762416, + "learning_rate": 2.4476398573370035e-05, + "loss": 0.4648, + "step": 7365 + }, + { + "epoch": 2.061572907920515, + "grad_norm": 0.2525304994937337, + "learning_rate": 2.446313533868255e-05, + "loss": 0.4629, + "step": 7366 + }, + { + "epoch": 2.061852784774699, + "grad_norm": 0.23931456218056962, + "learning_rate": 2.444987453460814e-05, + "loss": 0.4478, + "step": 7367 + }, + { + "epoch": 2.0621326616288833, + "grad_norm": 0.24774046523785284, + "learning_rate": 2.4436616162408975e-05, + "loss": 0.4497, + "step": 7368 + }, + { + "epoch": 2.0624125384830676, + "grad_norm": 0.24952988350952923, + "learning_rate": 2.442336022334699e-05, + "loss": 0.4616, + "step": 7369 + }, + { + "epoch": 2.0626924153372515, + "grad_norm": 0.23974477546908268, + "learning_rate": 2.4410106718683896e-05, + "loss": 0.4442, + "step": 7370 + }, + { + "epoch": 2.0629722921914357, + "grad_norm": 0.2497246097093398, + "learning_rate": 2.4396855649681166e-05, + "loss": 0.4425, + "step": 7371 + }, + { + "epoch": 2.06325216904562, + "grad_norm": 0.24174250745367293, + "learning_rate": 2.4383607017600048e-05, + "loss": 0.4454, + "step": 7372 + }, + { + "epoch": 2.063532045899804, + "grad_norm": 0.2537840961291248, + "learning_rate": 2.437036082370155e-05, + "loss": 0.4565, + "step": 7373 + }, + { + "epoch": 2.063811922753988, + "grad_norm": 0.25333088689501954, + "learning_rate": 2.4357117069246455e-05, + "loss": 0.4557, + "step": 7374 + }, + { + "epoch": 2.0640917996081725, + "grad_norm": 0.24056535254910136, + "learning_rate": 2.434387575549531e-05, + "loss": 0.4525, + "step": 7375 + }, + { + "epoch": 2.0643716764623568, + "grad_norm": 0.24208215006306735, + "learning_rate": 2.4330636883708412e-05, + "loss": 0.4392, + "step": 7376 + }, + { + "epoch": 2.0646515533165406, + "grad_norm": 0.2620641537026158, + "learning_rate": 2.4317400455145882e-05, + "loss": 0.4637, + "step": 7377 + }, + { + "epoch": 2.064931430170725, + "grad_norm": 0.23863057542189706, + "learning_rate": 2.430416647106756e-05, + "loss": 0.4502, + "step": 7378 + }, + { + "epoch": 2.065211307024909, + "grad_norm": 0.24783973345151425, + "learning_rate": 2.4290934932733045e-05, + "loss": 0.4775, + "step": 7379 + }, + { + "epoch": 2.065491183879093, + "grad_norm": 0.24599111696734668, + "learning_rate": 2.4277705841401737e-05, + "loss": 0.4692, + "step": 7380 + }, + { + "epoch": 2.0657710607332773, + "grad_norm": 0.24491120001851938, + "learning_rate": 2.4264479198332785e-05, + "loss": 0.4358, + "step": 7381 + }, + { + "epoch": 2.0660509375874616, + "grad_norm": 0.24505219777758316, + "learning_rate": 2.4251255004785106e-05, + "loss": 0.4551, + "step": 7382 + }, + { + "epoch": 2.0663308144416455, + "grad_norm": 0.23700138605356436, + "learning_rate": 2.4238033262017405e-05, + "loss": 0.4586, + "step": 7383 + }, + { + "epoch": 2.0666106912958297, + "grad_norm": 0.2461152083478605, + "learning_rate": 2.42248139712881e-05, + "loss": 0.4578, + "step": 7384 + }, + { + "epoch": 2.066890568150014, + "grad_norm": 0.24478452390468058, + "learning_rate": 2.421159713385543e-05, + "loss": 0.4479, + "step": 7385 + }, + { + "epoch": 2.0671704450041983, + "grad_norm": 0.24983949345962733, + "learning_rate": 2.4198382750977384e-05, + "loss": 0.4407, + "step": 7386 + }, + { + "epoch": 2.067450321858382, + "grad_norm": 0.2441473963548969, + "learning_rate": 2.41851708239117e-05, + "loss": 0.4665, + "step": 7387 + }, + { + "epoch": 2.0677301987125665, + "grad_norm": 0.23834976405597819, + "learning_rate": 2.417196135391591e-05, + "loss": 0.4532, + "step": 7388 + }, + { + "epoch": 2.0680100755667508, + "grad_norm": 0.3029073235707665, + "learning_rate": 2.41587543422473e-05, + "loss": 0.4705, + "step": 7389 + }, + { + "epoch": 2.0682899524209346, + "grad_norm": 0.26545078973583597, + "learning_rate": 2.4145549790162906e-05, + "loss": 0.473, + "step": 7390 + }, + { + "epoch": 2.068569829275119, + "grad_norm": 0.23377478818315944, + "learning_rate": 2.413234769891956e-05, + "loss": 0.4174, + "step": 7391 + }, + { + "epoch": 2.068849706129303, + "grad_norm": 0.24501899558071738, + "learning_rate": 2.411914806977384e-05, + "loss": 0.455, + "step": 7392 + }, + { + "epoch": 2.0691295829834875, + "grad_norm": 0.2509900879447519, + "learning_rate": 2.410595090398207e-05, + "loss": 0.4631, + "step": 7393 + }, + { + "epoch": 2.0694094598376713, + "grad_norm": 0.24425155333154205, + "learning_rate": 2.409275620280041e-05, + "loss": 0.4459, + "step": 7394 + }, + { + "epoch": 2.0696893366918556, + "grad_norm": 0.24231387445788757, + "learning_rate": 2.4079563967484713e-05, + "loss": 0.444, + "step": 7395 + }, + { + "epoch": 2.06996921354604, + "grad_norm": 0.24994496182304815, + "learning_rate": 2.4066374199290626e-05, + "loss": 0.4568, + "step": 7396 + }, + { + "epoch": 2.0702490904002238, + "grad_norm": 0.23904145426295392, + "learning_rate": 2.4053186899473557e-05, + "loss": 0.4761, + "step": 7397 + }, + { + "epoch": 2.070528967254408, + "grad_norm": 0.2544856406150435, + "learning_rate": 2.4040002069288687e-05, + "loss": 0.4529, + "step": 7398 + }, + { + "epoch": 2.0708088441085923, + "grad_norm": 0.238774996920386, + "learning_rate": 2.4026819709990945e-05, + "loss": 0.4433, + "step": 7399 + }, + { + "epoch": 2.071088720962776, + "grad_norm": 0.24918531664750443, + "learning_rate": 2.4013639822835042e-05, + "loss": 0.4653, + "step": 7400 + }, + { + "epoch": 2.0713685978169605, + "grad_norm": 0.24042479136433584, + "learning_rate": 2.4000462409075447e-05, + "loss": 0.4439, + "step": 7401 + }, + { + "epoch": 2.0716484746711448, + "grad_norm": 0.24644526224999894, + "learning_rate": 2.3987287469966413e-05, + "loss": 0.4543, + "step": 7402 + }, + { + "epoch": 2.071928351525329, + "grad_norm": 0.24734302322857166, + "learning_rate": 2.3974115006761894e-05, + "loss": 0.4411, + "step": 7403 + }, + { + "epoch": 2.072208228379513, + "grad_norm": 0.24448473335472756, + "learning_rate": 2.396094502071568e-05, + "loss": 0.4591, + "step": 7404 + }, + { + "epoch": 2.072488105233697, + "grad_norm": 0.2465905535816699, + "learning_rate": 2.3947777513081292e-05, + "loss": 0.4421, + "step": 7405 + }, + { + "epoch": 2.0727679820878815, + "grad_norm": 0.24346649080667676, + "learning_rate": 2.3934612485112024e-05, + "loss": 0.4449, + "step": 7406 + }, + { + "epoch": 2.0730478589420653, + "grad_norm": 0.24604002050814763, + "learning_rate": 2.3921449938060924e-05, + "loss": 0.4467, + "step": 7407 + }, + { + "epoch": 2.0733277357962496, + "grad_norm": 0.24562926902681928, + "learning_rate": 2.3908289873180823e-05, + "loss": 0.4599, + "step": 7408 + }, + { + "epoch": 2.073607612650434, + "grad_norm": 0.24589573249544378, + "learning_rate": 2.389513229172429e-05, + "loss": 0.4539, + "step": 7409 + }, + { + "epoch": 2.0738874895046178, + "grad_norm": 0.24678813561369586, + "learning_rate": 2.3881977194943677e-05, + "loss": 0.4579, + "step": 7410 + }, + { + "epoch": 2.074167366358802, + "grad_norm": 0.2662776923644153, + "learning_rate": 2.386882458409108e-05, + "loss": 0.4671, + "step": 7411 + }, + { + "epoch": 2.0744472432129863, + "grad_norm": 0.25978459638648005, + "learning_rate": 2.3855674460418404e-05, + "loss": 0.4596, + "step": 7412 + }, + { + "epoch": 2.0747271200671706, + "grad_norm": 0.23503045811023623, + "learning_rate": 2.384252682517726e-05, + "loss": 0.4432, + "step": 7413 + }, + { + "epoch": 2.0750069969213545, + "grad_norm": 0.23601056429318246, + "learning_rate": 2.3829381679619058e-05, + "loss": 0.4394, + "step": 7414 + }, + { + "epoch": 2.0752868737755388, + "grad_norm": 0.24795182253458156, + "learning_rate": 2.3816239024994957e-05, + "loss": 0.4549, + "step": 7415 + }, + { + "epoch": 2.075566750629723, + "grad_norm": 0.2425400511452179, + "learning_rate": 2.3803098862555877e-05, + "loss": 0.439, + "step": 7416 + }, + { + "epoch": 2.075846627483907, + "grad_norm": 0.2573212425961692, + "learning_rate": 2.378996119355251e-05, + "loss": 0.4557, + "step": 7417 + }, + { + "epoch": 2.076126504338091, + "grad_norm": 0.25253090866307665, + "learning_rate": 2.3776826019235315e-05, + "loss": 0.4568, + "step": 7418 + }, + { + "epoch": 2.0764063811922755, + "grad_norm": 0.25066486315121606, + "learning_rate": 2.3763693340854493e-05, + "loss": 0.4416, + "step": 7419 + }, + { + "epoch": 2.0766862580464593, + "grad_norm": 0.24113915131918504, + "learning_rate": 2.3750563159660044e-05, + "loss": 0.4516, + "step": 7420 + }, + { + "epoch": 2.0769661349006436, + "grad_norm": 0.24712480325430675, + "learning_rate": 2.3737435476901663e-05, + "loss": 0.4485, + "step": 7421 + }, + { + "epoch": 2.077246011754828, + "grad_norm": 0.2312072937319624, + "learning_rate": 2.372431029382888e-05, + "loss": 0.4344, + "step": 7422 + }, + { + "epoch": 2.077525888609012, + "grad_norm": 0.24605372198296094, + "learning_rate": 2.3711187611690944e-05, + "loss": 0.4625, + "step": 7423 + }, + { + "epoch": 2.077805765463196, + "grad_norm": 0.23906096090817333, + "learning_rate": 2.3698067431736887e-05, + "loss": 0.4799, + "step": 7424 + }, + { + "epoch": 2.0780856423173804, + "grad_norm": 0.2489263103624333, + "learning_rate": 2.3684949755215492e-05, + "loss": 0.4669, + "step": 7425 + }, + { + "epoch": 2.0783655191715646, + "grad_norm": 0.25040341531072063, + "learning_rate": 2.3671834583375313e-05, + "loss": 0.4677, + "step": 7426 + }, + { + "epoch": 2.0786453960257485, + "grad_norm": 0.2490478099157188, + "learning_rate": 2.365872191746465e-05, + "loss": 0.4458, + "step": 7427 + }, + { + "epoch": 2.078925272879933, + "grad_norm": 0.2433664703471242, + "learning_rate": 2.364561175873156e-05, + "loss": 0.4501, + "step": 7428 + }, + { + "epoch": 2.079205149734117, + "grad_norm": 0.24597492205580676, + "learning_rate": 2.363250410842392e-05, + "loss": 0.4705, + "step": 7429 + }, + { + "epoch": 2.0794850265883014, + "grad_norm": 0.24294513003802817, + "learning_rate": 2.3619398967789292e-05, + "loss": 0.4438, + "step": 7430 + }, + { + "epoch": 2.079764903442485, + "grad_norm": 0.24488962710679907, + "learning_rate": 2.3606296338075034e-05, + "loss": 0.4556, + "step": 7431 + }, + { + "epoch": 2.0800447802966695, + "grad_norm": 0.257853787991794, + "learning_rate": 2.359319622052827e-05, + "loss": 0.457, + "step": 7432 + }, + { + "epoch": 2.080324657150854, + "grad_norm": 0.2494678467193366, + "learning_rate": 2.3580098616395863e-05, + "loss": 0.4782, + "step": 7433 + }, + { + "epoch": 2.0806045340050376, + "grad_norm": 0.2369808762035699, + "learning_rate": 2.3567003526924463e-05, + "loss": 0.4293, + "step": 7434 + }, + { + "epoch": 2.080884410859222, + "grad_norm": 0.23811384961661042, + "learning_rate": 2.355391095336046e-05, + "loss": 0.4509, + "step": 7435 + }, + { + "epoch": 2.0811642877134062, + "grad_norm": 0.24552576073462928, + "learning_rate": 2.3540820896950016e-05, + "loss": 0.4626, + "step": 7436 + }, + { + "epoch": 2.08144416456759, + "grad_norm": 0.24413988388687807, + "learning_rate": 2.3527733358939046e-05, + "loss": 0.4546, + "step": 7437 + }, + { + "epoch": 2.0817240414217744, + "grad_norm": 0.2485981838183438, + "learning_rate": 2.3514648340573257e-05, + "loss": 0.4599, + "step": 7438 + }, + { + "epoch": 2.0820039182759587, + "grad_norm": 0.24454659192468253, + "learning_rate": 2.350156584309804e-05, + "loss": 0.444, + "step": 7439 + }, + { + "epoch": 2.082283795130143, + "grad_norm": 0.24074442743098742, + "learning_rate": 2.348848586775862e-05, + "loss": 0.4703, + "step": 7440 + }, + { + "epoch": 2.082563671984327, + "grad_norm": 0.23775973111168042, + "learning_rate": 2.3475408415799953e-05, + "loss": 0.4436, + "step": 7441 + }, + { + "epoch": 2.082843548838511, + "grad_norm": 0.23971493006095523, + "learning_rate": 2.3462333488466758e-05, + "loss": 0.4513, + "step": 7442 + }, + { + "epoch": 2.0831234256926954, + "grad_norm": 0.24551777555783094, + "learning_rate": 2.344926108700352e-05, + "loss": 0.4517, + "step": 7443 + }, + { + "epoch": 2.083403302546879, + "grad_norm": 0.24608179058927987, + "learning_rate": 2.343619121265447e-05, + "loss": 0.4543, + "step": 7444 + }, + { + "epoch": 2.0836831794010635, + "grad_norm": 0.23219516309417146, + "learning_rate": 2.342312386666359e-05, + "loss": 0.4364, + "step": 7445 + }, + { + "epoch": 2.083963056255248, + "grad_norm": 0.24001757191204898, + "learning_rate": 2.3410059050274674e-05, + "loss": 0.448, + "step": 7446 + }, + { + "epoch": 2.0842429331094317, + "grad_norm": 0.24052130703717217, + "learning_rate": 2.339699676473122e-05, + "loss": 0.4356, + "step": 7447 + }, + { + "epoch": 2.084522809963616, + "grad_norm": 0.2576403626158046, + "learning_rate": 2.338393701127651e-05, + "loss": 0.4737, + "step": 7448 + }, + { + "epoch": 2.0848026868178002, + "grad_norm": 0.24973816696897705, + "learning_rate": 2.3370879791153566e-05, + "loss": 0.4641, + "step": 7449 + }, + { + "epoch": 2.0850825636719845, + "grad_norm": 0.24707987841698237, + "learning_rate": 2.335782510560519e-05, + "loss": 0.4485, + "step": 7450 + }, + { + "epoch": 2.0853624405261684, + "grad_norm": 0.24198995221156094, + "learning_rate": 2.3344772955873934e-05, + "loss": 0.4763, + "step": 7451 + }, + { + "epoch": 2.0856423173803527, + "grad_norm": 0.24573034712654576, + "learning_rate": 2.3331723343202106e-05, + "loss": 0.4598, + "step": 7452 + }, + { + "epoch": 2.085922194234537, + "grad_norm": 0.24392825108681687, + "learning_rate": 2.331867626883178e-05, + "loss": 0.4611, + "step": 7453 + }, + { + "epoch": 2.086202071088721, + "grad_norm": 0.25078868089134737, + "learning_rate": 2.330563173400478e-05, + "loss": 0.4664, + "step": 7454 + }, + { + "epoch": 2.086481947942905, + "grad_norm": 0.25552493357833944, + "learning_rate": 2.3292589739962695e-05, + "loss": 0.463, + "step": 7455 + }, + { + "epoch": 2.0867618247970894, + "grad_norm": 0.25152576521590647, + "learning_rate": 2.327955028794688e-05, + "loss": 0.4657, + "step": 7456 + }, + { + "epoch": 2.0870417016512732, + "grad_norm": 0.2540216750184329, + "learning_rate": 2.3266513379198413e-05, + "loss": 0.4431, + "step": 7457 + }, + { + "epoch": 2.0873215785054575, + "grad_norm": 0.257161622211932, + "learning_rate": 2.3253479014958164e-05, + "loss": 0.4471, + "step": 7458 + }, + { + "epoch": 2.087601455359642, + "grad_norm": 0.24570160762022308, + "learning_rate": 2.3240447196466748e-05, + "loss": 0.4429, + "step": 7459 + }, + { + "epoch": 2.087881332213826, + "grad_norm": 0.24817088308505086, + "learning_rate": 2.322741792496455e-05, + "loss": 0.471, + "step": 7460 + }, + { + "epoch": 2.08816120906801, + "grad_norm": 0.24166357490471427, + "learning_rate": 2.32143912016917e-05, + "loss": 0.4488, + "step": 7461 + }, + { + "epoch": 2.0884410859221942, + "grad_norm": 0.24797840483804628, + "learning_rate": 2.3201367027888083e-05, + "loss": 0.4661, + "step": 7462 + }, + { + "epoch": 2.0887209627763785, + "grad_norm": 0.24327516494509666, + "learning_rate": 2.3188345404793338e-05, + "loss": 0.45, + "step": 7463 + }, + { + "epoch": 2.0890008396305624, + "grad_norm": 0.2494607268291219, + "learning_rate": 2.31753263336469e-05, + "loss": 0.4706, + "step": 7464 + }, + { + "epoch": 2.0892807164847467, + "grad_norm": 0.2413127471468335, + "learning_rate": 2.3162309815687922e-05, + "loss": 0.4474, + "step": 7465 + }, + { + "epoch": 2.089560593338931, + "grad_norm": 0.23146437649500243, + "learning_rate": 2.3149295852155313e-05, + "loss": 0.452, + "step": 7466 + }, + { + "epoch": 2.0898404701931153, + "grad_norm": 0.24190359388748187, + "learning_rate": 2.3136284444287755e-05, + "loss": 0.455, + "step": 7467 + }, + { + "epoch": 2.090120347047299, + "grad_norm": 0.2480340305458742, + "learning_rate": 2.312327559332368e-05, + "loss": 0.4677, + "step": 7468 + }, + { + "epoch": 2.0904002239014834, + "grad_norm": 0.25900478336119287, + "learning_rate": 2.3110269300501275e-05, + "loss": 0.4716, + "step": 7469 + }, + { + "epoch": 2.0906801007556677, + "grad_norm": 0.25055030068392614, + "learning_rate": 2.309726556705849e-05, + "loss": 0.4553, + "step": 7470 + }, + { + "epoch": 2.0909599776098515, + "grad_norm": 0.24567747066069287, + "learning_rate": 2.308426439423303e-05, + "loss": 0.4482, + "step": 7471 + }, + { + "epoch": 2.091239854464036, + "grad_norm": 0.24812686237349435, + "learning_rate": 2.3071265783262345e-05, + "loss": 0.4717, + "step": 7472 + }, + { + "epoch": 2.09151973131822, + "grad_norm": 0.2549949899850669, + "learning_rate": 2.305826973538366e-05, + "loss": 0.4695, + "step": 7473 + }, + { + "epoch": 2.091799608172404, + "grad_norm": 0.2504635907404072, + "learning_rate": 2.304527625183396e-05, + "loss": 0.4495, + "step": 7474 + }, + { + "epoch": 2.0920794850265882, + "grad_norm": 0.23500629964294462, + "learning_rate": 2.303228533384993e-05, + "loss": 0.4415, + "step": 7475 + }, + { + "epoch": 2.0923593618807725, + "grad_norm": 0.24579359852312388, + "learning_rate": 2.3019296982668083e-05, + "loss": 0.4419, + "step": 7476 + }, + { + "epoch": 2.092639238734957, + "grad_norm": 0.24262114918616326, + "learning_rate": 2.3006311199524645e-05, + "loss": 0.4435, + "step": 7477 + }, + { + "epoch": 2.0929191155891407, + "grad_norm": 0.24015906914268179, + "learning_rate": 2.2993327985655614e-05, + "loss": 0.4368, + "step": 7478 + }, + { + "epoch": 2.093198992443325, + "grad_norm": 0.25274943579405135, + "learning_rate": 2.2980347342296747e-05, + "loss": 0.4523, + "step": 7479 + }, + { + "epoch": 2.0934788692975093, + "grad_norm": 0.24605545727398748, + "learning_rate": 2.2967369270683525e-05, + "loss": 0.446, + "step": 7480 + }, + { + "epoch": 2.093758746151693, + "grad_norm": 0.24493701274465995, + "learning_rate": 2.2954393772051245e-05, + "loss": 0.4429, + "step": 7481 + }, + { + "epoch": 2.0940386230058774, + "grad_norm": 0.23577548000071086, + "learning_rate": 2.2941420847634905e-05, + "loss": 0.4612, + "step": 7482 + }, + { + "epoch": 2.0943184998600617, + "grad_norm": 0.25489438452229934, + "learning_rate": 2.2928450498669274e-05, + "loss": 0.4528, + "step": 7483 + }, + { + "epoch": 2.0945983767142455, + "grad_norm": 0.24634515710529292, + "learning_rate": 2.291548272638887e-05, + "loss": 0.4512, + "step": 7484 + }, + { + "epoch": 2.09487825356843, + "grad_norm": 0.2542990576902803, + "learning_rate": 2.290251753202799e-05, + "loss": 0.4846, + "step": 7485 + }, + { + "epoch": 2.095158130422614, + "grad_norm": 0.2551101021992683, + "learning_rate": 2.2889554916820655e-05, + "loss": 0.4598, + "step": 7486 + }, + { + "epoch": 2.0954380072767984, + "grad_norm": 0.25317851487491305, + "learning_rate": 2.287659488200065e-05, + "loss": 0.4619, + "step": 7487 + }, + { + "epoch": 2.0957178841309823, + "grad_norm": 0.24711890584810983, + "learning_rate": 2.2863637428801536e-05, + "loss": 0.4702, + "step": 7488 + }, + { + "epoch": 2.0959977609851665, + "grad_norm": 0.2435421357837326, + "learning_rate": 2.2850682558456593e-05, + "loss": 0.4518, + "step": 7489 + }, + { + "epoch": 2.096277637839351, + "grad_norm": 0.2517223552482673, + "learning_rate": 2.2837730272198888e-05, + "loss": 0.4485, + "step": 7490 + }, + { + "epoch": 2.0965575146935347, + "grad_norm": 0.25108703790852543, + "learning_rate": 2.2824780571261213e-05, + "loss": 0.4534, + "step": 7491 + }, + { + "epoch": 2.096837391547719, + "grad_norm": 0.24242185222208118, + "learning_rate": 2.2811833456876152e-05, + "loss": 0.4414, + "step": 7492 + }, + { + "epoch": 2.0971172684019033, + "grad_norm": 0.23892286033892943, + "learning_rate": 2.2798888930275984e-05, + "loss": 0.4454, + "step": 7493 + }, + { + "epoch": 2.097397145256087, + "grad_norm": 0.24753543768992284, + "learning_rate": 2.2785946992692796e-05, + "loss": 0.4627, + "step": 7494 + }, + { + "epoch": 2.0976770221102714, + "grad_norm": 0.24247874550019946, + "learning_rate": 2.2773007645358403e-05, + "loss": 0.4569, + "step": 7495 + }, + { + "epoch": 2.0979568989644557, + "grad_norm": 0.2450691802857712, + "learning_rate": 2.2760070889504382e-05, + "loss": 0.4325, + "step": 7496 + }, + { + "epoch": 2.09823677581864, + "grad_norm": 0.24571952118794047, + "learning_rate": 2.2747136726362038e-05, + "loss": 0.4554, + "step": 7497 + }, + { + "epoch": 2.098516652672824, + "grad_norm": 0.23875697882298036, + "learning_rate": 2.2734205157162498e-05, + "loss": 0.4283, + "step": 7498 + }, + { + "epoch": 2.098796529527008, + "grad_norm": 0.2533906134623975, + "learning_rate": 2.272127618313657e-05, + "loss": 0.465, + "step": 7499 + }, + { + "epoch": 2.0990764063811924, + "grad_norm": 0.251507126168585, + "learning_rate": 2.2708349805514846e-05, + "loss": 0.4604, + "step": 7500 + }, + { + "epoch": 2.0993562832353763, + "grad_norm": 0.24871299306195427, + "learning_rate": 2.269542602552766e-05, + "loss": 0.4489, + "step": 7501 + }, + { + "epoch": 2.0996361600895606, + "grad_norm": 0.2577185184620966, + "learning_rate": 2.268250484440511e-05, + "loss": 0.4775, + "step": 7502 + }, + { + "epoch": 2.099916036943745, + "grad_norm": 0.2560814309266284, + "learning_rate": 2.2669586263377033e-05, + "loss": 0.4412, + "step": 7503 + }, + { + "epoch": 2.100195913797929, + "grad_norm": 0.2350102493647487, + "learning_rate": 2.2656670283673042e-05, + "loss": 0.4512, + "step": 7504 + }, + { + "epoch": 2.100475790652113, + "grad_norm": 0.25754304347631335, + "learning_rate": 2.2643756906522478e-05, + "loss": 0.4509, + "step": 7505 + }, + { + "epoch": 2.1007556675062973, + "grad_norm": 0.2497629763548119, + "learning_rate": 2.2630846133154436e-05, + "loss": 0.4402, + "step": 7506 + }, + { + "epoch": 2.1010355443604816, + "grad_norm": 0.24545977648616682, + "learning_rate": 2.2617937964797785e-05, + "loss": 0.4396, + "step": 7507 + }, + { + "epoch": 2.1013154212146654, + "grad_norm": 0.25390296199369455, + "learning_rate": 2.260503240268112e-05, + "loss": 0.4612, + "step": 7508 + }, + { + "epoch": 2.1015952980688497, + "grad_norm": 0.2360792832276678, + "learning_rate": 2.259212944803281e-05, + "loss": 0.4438, + "step": 7509 + }, + { + "epoch": 2.101875174923034, + "grad_norm": 0.23824582001171501, + "learning_rate": 2.2579229102080973e-05, + "loss": 0.4824, + "step": 7510 + }, + { + "epoch": 2.102155051777218, + "grad_norm": 0.23849784399502194, + "learning_rate": 2.2566331366053446e-05, + "loss": 0.4501, + "step": 7511 + }, + { + "epoch": 2.102434928631402, + "grad_norm": 0.2564225630725051, + "learning_rate": 2.255343624117785e-05, + "loss": 0.4534, + "step": 7512 + }, + { + "epoch": 2.1027148054855864, + "grad_norm": 0.2575461076175124, + "learning_rate": 2.2540543728681556e-05, + "loss": 0.4621, + "step": 7513 + }, + { + "epoch": 2.1029946823397707, + "grad_norm": 0.25905110465218073, + "learning_rate": 2.2527653829791662e-05, + "loss": 0.4589, + "step": 7514 + }, + { + "epoch": 2.1032745591939546, + "grad_norm": 0.2561865667351127, + "learning_rate": 2.251476654573507e-05, + "loss": 0.4399, + "step": 7515 + }, + { + "epoch": 2.103554436048139, + "grad_norm": 0.24058587130721956, + "learning_rate": 2.2501881877738383e-05, + "loss": 0.4261, + "step": 7516 + }, + { + "epoch": 2.103834312902323, + "grad_norm": 0.2515336882108558, + "learning_rate": 2.2488999827027972e-05, + "loss": 0.4466, + "step": 7517 + }, + { + "epoch": 2.104114189756507, + "grad_norm": 0.2451665174890162, + "learning_rate": 2.2476120394829952e-05, + "loss": 0.4719, + "step": 7518 + }, + { + "epoch": 2.1043940666106913, + "grad_norm": 0.25910932881308824, + "learning_rate": 2.24632435823702e-05, + "loss": 0.4741, + "step": 7519 + }, + { + "epoch": 2.1046739434648756, + "grad_norm": 0.24546199751436237, + "learning_rate": 2.2450369390874344e-05, + "loss": 0.4778, + "step": 7520 + }, + { + "epoch": 2.1049538203190594, + "grad_norm": 0.2474355420866834, + "learning_rate": 2.2437497821567744e-05, + "loss": 0.4541, + "step": 7521 + }, + { + "epoch": 2.1052336971732437, + "grad_norm": 0.24332045321952375, + "learning_rate": 2.2424628875675524e-05, + "loss": 0.4688, + "step": 7522 + }, + { + "epoch": 2.105513574027428, + "grad_norm": 0.23802478038156527, + "learning_rate": 2.241176255442257e-05, + "loss": 0.4574, + "step": 7523 + }, + { + "epoch": 2.1057934508816123, + "grad_norm": 0.25560288102315903, + "learning_rate": 2.2398898859033494e-05, + "loss": 0.4644, + "step": 7524 + }, + { + "epoch": 2.106073327735796, + "grad_norm": 0.2490834736430194, + "learning_rate": 2.2386037790732673e-05, + "loss": 0.4479, + "step": 7525 + }, + { + "epoch": 2.1063532045899804, + "grad_norm": 0.24455409618143778, + "learning_rate": 2.2373179350744235e-05, + "loss": 0.488, + "step": 7526 + }, + { + "epoch": 2.1066330814441647, + "grad_norm": 0.25081577247405235, + "learning_rate": 2.2360323540292044e-05, + "loss": 0.4606, + "step": 7527 + }, + { + "epoch": 2.1069129582983486, + "grad_norm": 0.23562210124486416, + "learning_rate": 2.2347470360599754e-05, + "loss": 0.4474, + "step": 7528 + }, + { + "epoch": 2.107192835152533, + "grad_norm": 0.2548395480913713, + "learning_rate": 2.233461981289069e-05, + "loss": 0.46, + "step": 7529 + }, + { + "epoch": 2.107472712006717, + "grad_norm": 0.2379713472145678, + "learning_rate": 2.2321771898388e-05, + "loss": 0.4555, + "step": 7530 + }, + { + "epoch": 2.107752588860901, + "grad_norm": 0.251286433144628, + "learning_rate": 2.2308926618314553e-05, + "loss": 0.4684, + "step": 7531 + }, + { + "epoch": 2.1080324657150853, + "grad_norm": 0.23615427735493252, + "learning_rate": 2.2296083973892945e-05, + "loss": 0.4599, + "step": 7532 + }, + { + "epoch": 2.1083123425692696, + "grad_norm": 0.24847602036792382, + "learning_rate": 2.2283243966345596e-05, + "loss": 0.4361, + "step": 7533 + }, + { + "epoch": 2.108592219423454, + "grad_norm": 0.24301117802883626, + "learning_rate": 2.22704065968946e-05, + "loss": 0.4253, + "step": 7534 + }, + { + "epoch": 2.1088720962776377, + "grad_norm": 0.23710501731491193, + "learning_rate": 2.2257571866761824e-05, + "loss": 0.43, + "step": 7535 + }, + { + "epoch": 2.109151973131822, + "grad_norm": 0.2529835935009079, + "learning_rate": 2.224473977716888e-05, + "loss": 0.4465, + "step": 7536 + }, + { + "epoch": 2.1094318499860063, + "grad_norm": 0.2408870811268891, + "learning_rate": 2.2231910329337147e-05, + "loss": 0.455, + "step": 7537 + }, + { + "epoch": 2.10971172684019, + "grad_norm": 0.24820848610356108, + "learning_rate": 2.2219083524487726e-05, + "loss": 0.466, + "step": 7538 + }, + { + "epoch": 2.1099916036943744, + "grad_norm": 0.2512888152568631, + "learning_rate": 2.220625936384149e-05, + "loss": 0.4473, + "step": 7539 + }, + { + "epoch": 2.1102714805485587, + "grad_norm": 0.24460559721548294, + "learning_rate": 2.219343784861904e-05, + "loss": 0.4505, + "step": 7540 + }, + { + "epoch": 2.1105513574027426, + "grad_norm": 0.24237395765297018, + "learning_rate": 2.2180618980040747e-05, + "loss": 0.4444, + "step": 7541 + }, + { + "epoch": 2.110831234256927, + "grad_norm": 0.2472666300963442, + "learning_rate": 2.2167802759326704e-05, + "loss": 0.4739, + "step": 7542 + }, + { + "epoch": 2.111111111111111, + "grad_norm": 0.24105355979943674, + "learning_rate": 2.2154989187696772e-05, + "loss": 0.4328, + "step": 7543 + }, + { + "epoch": 2.1113909879652955, + "grad_norm": 0.24678765407012285, + "learning_rate": 2.2142178266370557e-05, + "loss": 0.4398, + "step": 7544 + }, + { + "epoch": 2.1116708648194793, + "grad_norm": 0.2444408885095439, + "learning_rate": 2.2129369996567406e-05, + "loss": 0.4418, + "step": 7545 + }, + { + "epoch": 2.1119507416736636, + "grad_norm": 0.24095764617311172, + "learning_rate": 2.2116564379506437e-05, + "loss": 0.4487, + "step": 7546 + }, + { + "epoch": 2.112230618527848, + "grad_norm": 0.23834019563772968, + "learning_rate": 2.2103761416406466e-05, + "loss": 0.475, + "step": 7547 + }, + { + "epoch": 2.1125104953820317, + "grad_norm": 0.24684141566440712, + "learning_rate": 2.2090961108486092e-05, + "loss": 0.4794, + "step": 7548 + }, + { + "epoch": 2.112790372236216, + "grad_norm": 0.24724331112303274, + "learning_rate": 2.207816345696364e-05, + "loss": 0.4581, + "step": 7549 + }, + { + "epoch": 2.1130702490904003, + "grad_norm": 0.2377554401642799, + "learning_rate": 2.206536846305724e-05, + "loss": 0.4588, + "step": 7550 + }, + { + "epoch": 2.113350125944584, + "grad_norm": 0.2551818636759096, + "learning_rate": 2.2052576127984704e-05, + "loss": 0.4605, + "step": 7551 + }, + { + "epoch": 2.1136300027987684, + "grad_norm": 0.2472219607296317, + "learning_rate": 2.203978645296362e-05, + "loss": 0.4645, + "step": 7552 + }, + { + "epoch": 2.1139098796529527, + "grad_norm": 0.25375763961112763, + "learning_rate": 2.2026999439211305e-05, + "loss": 0.4618, + "step": 7553 + }, + { + "epoch": 2.114189756507137, + "grad_norm": 0.24460683253482174, + "learning_rate": 2.201421508794484e-05, + "loss": 0.4429, + "step": 7554 + }, + { + "epoch": 2.114469633361321, + "grad_norm": 0.24165091989308077, + "learning_rate": 2.200143340038105e-05, + "loss": 0.4587, + "step": 7555 + }, + { + "epoch": 2.114749510215505, + "grad_norm": 0.24218243076459725, + "learning_rate": 2.19886543777365e-05, + "loss": 0.4675, + "step": 7556 + }, + { + "epoch": 2.1150293870696895, + "grad_norm": 0.2437905324988868, + "learning_rate": 2.1975878021227507e-05, + "loss": 0.4573, + "step": 7557 + }, + { + "epoch": 2.1153092639238733, + "grad_norm": 0.2422809195703653, + "learning_rate": 2.1963104332070127e-05, + "loss": 0.4544, + "step": 7558 + }, + { + "epoch": 2.1155891407780576, + "grad_norm": 0.24630979034974235, + "learning_rate": 2.195033331148017e-05, + "loss": 0.4797, + "step": 7559 + }, + { + "epoch": 2.115869017632242, + "grad_norm": 0.240337674393747, + "learning_rate": 2.193756496067319e-05, + "loss": 0.449, + "step": 7560 + }, + { + "epoch": 2.116148894486426, + "grad_norm": 0.25419261804892673, + "learning_rate": 2.192479928086448e-05, + "loss": 0.4546, + "step": 7561 + }, + { + "epoch": 2.11642877134061, + "grad_norm": 0.2573381459024405, + "learning_rate": 2.19120362732691e-05, + "loss": 0.4913, + "step": 7562 + }, + { + "epoch": 2.1167086481947943, + "grad_norm": 0.23587256095619225, + "learning_rate": 2.189927593910182e-05, + "loss": 0.4383, + "step": 7563 + }, + { + "epoch": 2.1169885250489786, + "grad_norm": 0.24591806879134256, + "learning_rate": 2.18865182795772e-05, + "loss": 0.4572, + "step": 7564 + }, + { + "epoch": 2.1172684019031625, + "grad_norm": 0.236748623891741, + "learning_rate": 2.1873763295909492e-05, + "loss": 0.4569, + "step": 7565 + }, + { + "epoch": 2.1175482787573467, + "grad_norm": 0.2551511152984237, + "learning_rate": 2.186101098931272e-05, + "loss": 0.4723, + "step": 7566 + }, + { + "epoch": 2.117828155611531, + "grad_norm": 0.25639745673483555, + "learning_rate": 2.1848261361000687e-05, + "loss": 0.4627, + "step": 7567 + }, + { + "epoch": 2.118108032465715, + "grad_norm": 0.2575518876408739, + "learning_rate": 2.1835514412186896e-05, + "loss": 0.4501, + "step": 7568 + }, + { + "epoch": 2.118387909319899, + "grad_norm": 0.23815492828339324, + "learning_rate": 2.1822770144084616e-05, + "loss": 0.4431, + "step": 7569 + }, + { + "epoch": 2.1186677861740835, + "grad_norm": 0.2586557287359828, + "learning_rate": 2.1810028557906832e-05, + "loss": 0.4785, + "step": 7570 + }, + { + "epoch": 2.1189476630282678, + "grad_norm": 0.2584600684594632, + "learning_rate": 2.1797289654866314e-05, + "loss": 0.4646, + "step": 7571 + }, + { + "epoch": 2.1192275398824516, + "grad_norm": 0.2473268074022004, + "learning_rate": 2.1784553436175553e-05, + "loss": 0.4476, + "step": 7572 + }, + { + "epoch": 2.119507416736636, + "grad_norm": 0.23380845612844886, + "learning_rate": 2.1771819903046785e-05, + "loss": 0.4431, + "step": 7573 + }, + { + "epoch": 2.11978729359082, + "grad_norm": 0.24317569820214538, + "learning_rate": 2.1759089056692006e-05, + "loss": 0.441, + "step": 7574 + }, + { + "epoch": 2.120067170445004, + "grad_norm": 0.25405334047753797, + "learning_rate": 2.1746360898322933e-05, + "loss": 0.472, + "step": 7575 + }, + { + "epoch": 2.1203470472991883, + "grad_norm": 0.2529813566209135, + "learning_rate": 2.1733635429151046e-05, + "loss": 0.4727, + "step": 7576 + }, + { + "epoch": 2.1206269241533726, + "grad_norm": 0.24345463428834613, + "learning_rate": 2.1720912650387554e-05, + "loss": 0.4596, + "step": 7577 + }, + { + "epoch": 2.1209068010075565, + "grad_norm": 0.24367450650630482, + "learning_rate": 2.170819256324343e-05, + "loss": 0.4416, + "step": 7578 + }, + { + "epoch": 2.1211866778617408, + "grad_norm": 0.2400383852178851, + "learning_rate": 2.1695475168929375e-05, + "loss": 0.4754, + "step": 7579 + }, + { + "epoch": 2.121466554715925, + "grad_norm": 0.24937719815034812, + "learning_rate": 2.1682760468655834e-05, + "loss": 0.4489, + "step": 7580 + }, + { + "epoch": 2.1217464315701093, + "grad_norm": 0.24596825255221352, + "learning_rate": 2.1670048463632996e-05, + "loss": 0.452, + "step": 7581 + }, + { + "epoch": 2.122026308424293, + "grad_norm": 0.2452061773321738, + "learning_rate": 2.1657339155070828e-05, + "loss": 0.4769, + "step": 7582 + }, + { + "epoch": 2.1223061852784775, + "grad_norm": 0.23904809431617988, + "learning_rate": 2.1644632544178967e-05, + "loss": 0.4764, + "step": 7583 + }, + { + "epoch": 2.1225860621326618, + "grad_norm": 0.24015059340537998, + "learning_rate": 2.1631928632166827e-05, + "loss": 0.4484, + "step": 7584 + }, + { + "epoch": 2.1228659389868456, + "grad_norm": 0.2516526249697741, + "learning_rate": 2.1619227420243615e-05, + "loss": 0.4481, + "step": 7585 + }, + { + "epoch": 2.12314581584103, + "grad_norm": 0.24541177461344438, + "learning_rate": 2.160652890961823e-05, + "loss": 0.4397, + "step": 7586 + }, + { + "epoch": 2.123425692695214, + "grad_norm": 0.2371911292223706, + "learning_rate": 2.159383310149931e-05, + "loss": 0.4617, + "step": 7587 + }, + { + "epoch": 2.123705569549398, + "grad_norm": 0.2514411025705835, + "learning_rate": 2.1581139997095258e-05, + "loss": 0.451, + "step": 7588 + }, + { + "epoch": 2.1239854464035823, + "grad_norm": 0.24660085157883696, + "learning_rate": 2.1568449597614205e-05, + "loss": 0.4729, + "step": 7589 + }, + { + "epoch": 2.1242653232577666, + "grad_norm": 0.2565046085253248, + "learning_rate": 2.1555761904264034e-05, + "loss": 0.4712, + "step": 7590 + }, + { + "epoch": 2.124545200111951, + "grad_norm": 0.2530203823346398, + "learning_rate": 2.154307691825237e-05, + "loss": 0.4677, + "step": 7591 + }, + { + "epoch": 2.1248250769661348, + "grad_norm": 0.24950382280245662, + "learning_rate": 2.1530394640786567e-05, + "loss": 0.4531, + "step": 7592 + }, + { + "epoch": 2.125104953820319, + "grad_norm": 0.2485366336476582, + "learning_rate": 2.1517715073073742e-05, + "loss": 0.4705, + "step": 7593 + }, + { + "epoch": 2.1253848306745033, + "grad_norm": 0.24778048156237242, + "learning_rate": 2.1505038216320735e-05, + "loss": 0.4515, + "step": 7594 + }, + { + "epoch": 2.125664707528687, + "grad_norm": 0.24125265596005324, + "learning_rate": 2.149236407173414e-05, + "loss": 0.4502, + "step": 7595 + }, + { + "epoch": 2.1259445843828715, + "grad_norm": 0.25854875672337907, + "learning_rate": 2.1479692640520292e-05, + "loss": 0.4565, + "step": 7596 + }, + { + "epoch": 2.1262244612370558, + "grad_norm": 0.2510271068420979, + "learning_rate": 2.146702392388526e-05, + "loss": 0.4707, + "step": 7597 + }, + { + "epoch": 2.12650433809124, + "grad_norm": 0.24128873224968087, + "learning_rate": 2.1454357923034864e-05, + "loss": 0.4463, + "step": 7598 + }, + { + "epoch": 2.126784214945424, + "grad_norm": 0.24663224433083245, + "learning_rate": 2.1441694639174652e-05, + "loss": 0.4687, + "step": 7599 + }, + { + "epoch": 2.127064091799608, + "grad_norm": 0.24317158373744283, + "learning_rate": 2.142903407350995e-05, + "loss": 0.4481, + "step": 7600 + }, + { + "epoch": 2.1273439686537925, + "grad_norm": 0.261255558763693, + "learning_rate": 2.1416376227245744e-05, + "loss": 0.4714, + "step": 7601 + }, + { + "epoch": 2.1276238455079763, + "grad_norm": 0.24449693848440052, + "learning_rate": 2.1403721101586864e-05, + "loss": 0.4335, + "step": 7602 + }, + { + "epoch": 2.1279037223621606, + "grad_norm": 0.24366650669769435, + "learning_rate": 2.1391068697737815e-05, + "loss": 0.449, + "step": 7603 + }, + { + "epoch": 2.128183599216345, + "grad_norm": 0.24012998115601292, + "learning_rate": 2.137841901690286e-05, + "loss": 0.4626, + "step": 7604 + }, + { + "epoch": 2.1284634760705288, + "grad_norm": 0.2835087041441265, + "learning_rate": 2.1365772060286006e-05, + "loss": 0.4836, + "step": 7605 + }, + { + "epoch": 2.128743352924713, + "grad_norm": 0.24088128251814953, + "learning_rate": 2.1353127829090995e-05, + "loss": 0.442, + "step": 7606 + }, + { + "epoch": 2.1290232297788974, + "grad_norm": 0.2573840347580425, + "learning_rate": 2.134048632452131e-05, + "loss": 0.4833, + "step": 7607 + }, + { + "epoch": 2.1293031066330816, + "grad_norm": 0.25199802270150407, + "learning_rate": 2.132784754778018e-05, + "loss": 0.475, + "step": 7608 + }, + { + "epoch": 2.1295829834872655, + "grad_norm": 0.24247900055381982, + "learning_rate": 2.1315211500070558e-05, + "loss": 0.4534, + "step": 7609 + }, + { + "epoch": 2.12986286034145, + "grad_norm": 0.24406454232964037, + "learning_rate": 2.1302578182595172e-05, + "loss": 0.4585, + "step": 7610 + }, + { + "epoch": 2.130142737195634, + "grad_norm": 0.2504265524046617, + "learning_rate": 2.1289947596556454e-05, + "loss": 0.4444, + "step": 7611 + }, + { + "epoch": 2.130422614049818, + "grad_norm": 0.24761477417319153, + "learning_rate": 2.1277319743156593e-05, + "loss": 0.48, + "step": 7612 + }, + { + "epoch": 2.130702490904002, + "grad_norm": 0.24011045291782035, + "learning_rate": 2.126469462359752e-05, + "loss": 0.451, + "step": 7613 + }, + { + "epoch": 2.1309823677581865, + "grad_norm": 0.2485102356825258, + "learning_rate": 2.1252072239080893e-05, + "loss": 0.4717, + "step": 7614 + }, + { + "epoch": 2.1312622446123703, + "grad_norm": 0.2408935994630069, + "learning_rate": 2.1239452590808124e-05, + "loss": 0.4539, + "step": 7615 + }, + { + "epoch": 2.1315421214665546, + "grad_norm": 0.24679220856227196, + "learning_rate": 2.122683567998035e-05, + "loss": 0.4525, + "step": 7616 + }, + { + "epoch": 2.131821998320739, + "grad_norm": 0.2454251676747661, + "learning_rate": 2.1214221507798466e-05, + "loss": 0.4586, + "step": 7617 + }, + { + "epoch": 2.1321018751749232, + "grad_norm": 0.24173225633486115, + "learning_rate": 2.1201610075463085e-05, + "loss": 0.4496, + "step": 7618 + }, + { + "epoch": 2.132381752029107, + "grad_norm": 0.24472665196638912, + "learning_rate": 2.1189001384174578e-05, + "loss": 0.4493, + "step": 7619 + }, + { + "epoch": 2.1326616288832914, + "grad_norm": 0.24959590534553086, + "learning_rate": 2.1176395435133052e-05, + "loss": 0.4457, + "step": 7620 + }, + { + "epoch": 2.1329415057374757, + "grad_norm": 0.24445113584781797, + "learning_rate": 2.1163792229538336e-05, + "loss": 0.4539, + "step": 7621 + }, + { + "epoch": 2.1332213825916595, + "grad_norm": 0.24851884857757958, + "learning_rate": 2.1151191768590016e-05, + "loss": 0.4394, + "step": 7622 + }, + { + "epoch": 2.133501259445844, + "grad_norm": 0.2384034792363101, + "learning_rate": 2.1138594053487415e-05, + "loss": 0.4575, + "step": 7623 + }, + { + "epoch": 2.133781136300028, + "grad_norm": 0.24618774382705233, + "learning_rate": 2.1125999085429583e-05, + "loss": 0.4573, + "step": 7624 + }, + { + "epoch": 2.134061013154212, + "grad_norm": 0.24962501465608256, + "learning_rate": 2.1113406865615322e-05, + "loss": 0.4587, + "step": 7625 + }, + { + "epoch": 2.134340890008396, + "grad_norm": 0.2398732566660393, + "learning_rate": 2.1100817395243157e-05, + "loss": 0.455, + "step": 7626 + }, + { + "epoch": 2.1346207668625805, + "grad_norm": 0.25842324905438246, + "learning_rate": 2.1088230675511372e-05, + "loss": 0.4477, + "step": 7627 + }, + { + "epoch": 2.134900643716765, + "grad_norm": 0.251392968254428, + "learning_rate": 2.1075646707617973e-05, + "loss": 0.4733, + "step": 7628 + }, + { + "epoch": 2.1351805205709486, + "grad_norm": 0.2505656721002468, + "learning_rate": 2.1063065492760715e-05, + "loss": 0.4636, + "step": 7629 + }, + { + "epoch": 2.135460397425133, + "grad_norm": 0.2572929962069552, + "learning_rate": 2.105048703213708e-05, + "loss": 0.4581, + "step": 7630 + }, + { + "epoch": 2.1357402742793172, + "grad_norm": 0.2417165861116719, + "learning_rate": 2.1037911326944286e-05, + "loss": 0.4631, + "step": 7631 + }, + { + "epoch": 2.136020151133501, + "grad_norm": 0.24921891767128523, + "learning_rate": 2.1025338378379312e-05, + "loss": 0.4637, + "step": 7632 + }, + { + "epoch": 2.1363000279876854, + "grad_norm": 0.2407116134780612, + "learning_rate": 2.1012768187638844e-05, + "loss": 0.4546, + "step": 7633 + }, + { + "epoch": 2.1365799048418697, + "grad_norm": 0.24763876381341868, + "learning_rate": 2.100020075591932e-05, + "loss": 0.4369, + "step": 7634 + }, + { + "epoch": 2.136859781696054, + "grad_norm": 0.2539968119489963, + "learning_rate": 2.0987636084416924e-05, + "loss": 0.4434, + "step": 7635 + }, + { + "epoch": 2.137139658550238, + "grad_norm": 0.25589564146611865, + "learning_rate": 2.0975074174327564e-05, + "loss": 0.4596, + "step": 7636 + }, + { + "epoch": 2.137419535404422, + "grad_norm": 0.2376089711663209, + "learning_rate": 2.096251502684689e-05, + "loss": 0.4436, + "step": 7637 + }, + { + "epoch": 2.1376994122586064, + "grad_norm": 0.2540651550718165, + "learning_rate": 2.0949958643170294e-05, + "loss": 0.4765, + "step": 7638 + }, + { + "epoch": 2.1379792891127902, + "grad_norm": 0.25008346334171755, + "learning_rate": 2.093740502449289e-05, + "loss": 0.4611, + "step": 7639 + }, + { + "epoch": 2.1382591659669745, + "grad_norm": 0.24815997334452597, + "learning_rate": 2.092485417200954e-05, + "loss": 0.443, + "step": 7640 + }, + { + "epoch": 2.138539042821159, + "grad_norm": 0.24897749979619618, + "learning_rate": 2.0912306086914846e-05, + "loss": 0.4397, + "step": 7641 + }, + { + "epoch": 2.1388189196753427, + "grad_norm": 0.24983510933255013, + "learning_rate": 2.0899760770403144e-05, + "loss": 0.4769, + "step": 7642 + }, + { + "epoch": 2.139098796529527, + "grad_norm": 0.24567791617937942, + "learning_rate": 2.088721822366849e-05, + "loss": 0.4653, + "step": 7643 + }, + { + "epoch": 2.1393786733837112, + "grad_norm": 0.2511893988591538, + "learning_rate": 2.0874678447904712e-05, + "loss": 0.4655, + "step": 7644 + }, + { + "epoch": 2.1396585502378955, + "grad_norm": 0.25340979163159116, + "learning_rate": 2.086214144430534e-05, + "loss": 0.4454, + "step": 7645 + }, + { + "epoch": 2.1399384270920794, + "grad_norm": 0.24200109811789502, + "learning_rate": 2.0849607214063647e-05, + "loss": 0.4398, + "step": 7646 + }, + { + "epoch": 2.1402183039462637, + "grad_norm": 0.24770574144742674, + "learning_rate": 2.083707575837266e-05, + "loss": 0.4451, + "step": 7647 + }, + { + "epoch": 2.140498180800448, + "grad_norm": 0.2507817072966352, + "learning_rate": 2.0824547078425126e-05, + "loss": 0.4472, + "step": 7648 + }, + { + "epoch": 2.140778057654632, + "grad_norm": 0.23937088937477222, + "learning_rate": 2.081202117541353e-05, + "loss": 0.4271, + "step": 7649 + }, + { + "epoch": 2.141057934508816, + "grad_norm": 0.24435448910173604, + "learning_rate": 2.0799498050530097e-05, + "loss": 0.4521, + "step": 7650 + }, + { + "epoch": 2.1413378113630004, + "grad_norm": 0.24234436062097528, + "learning_rate": 2.0786977704966782e-05, + "loss": 0.4566, + "step": 7651 + }, + { + "epoch": 2.1416176882171842, + "grad_norm": 0.24159361754388, + "learning_rate": 2.077446013991528e-05, + "loss": 0.4554, + "step": 7652 + }, + { + "epoch": 2.1418975650713685, + "grad_norm": 0.24298997738763822, + "learning_rate": 2.0761945356567025e-05, + "loss": 0.4579, + "step": 7653 + }, + { + "epoch": 2.142177441925553, + "grad_norm": 0.23575064778162888, + "learning_rate": 2.0749433356113168e-05, + "loss": 0.4305, + "step": 7654 + }, + { + "epoch": 2.142457318779737, + "grad_norm": 0.2526297090096628, + "learning_rate": 2.0736924139744624e-05, + "loss": 0.4445, + "step": 7655 + }, + { + "epoch": 2.142737195633921, + "grad_norm": 0.2467617189365528, + "learning_rate": 2.0724417708652017e-05, + "loss": 0.4405, + "step": 7656 + }, + { + "epoch": 2.1430170724881052, + "grad_norm": 0.24657196476447835, + "learning_rate": 2.071191406402572e-05, + "loss": 0.4621, + "step": 7657 + }, + { + "epoch": 2.1432969493422895, + "grad_norm": 0.2413834403646487, + "learning_rate": 2.0699413207055834e-05, + "loss": 0.4569, + "step": 7658 + }, + { + "epoch": 2.1435768261964734, + "grad_norm": 0.25549227235015254, + "learning_rate": 2.0686915138932195e-05, + "loss": 0.4604, + "step": 7659 + }, + { + "epoch": 2.1438567030506577, + "grad_norm": 0.24141759699102472, + "learning_rate": 2.0674419860844384e-05, + "loss": 0.4619, + "step": 7660 + }, + { + "epoch": 2.144136579904842, + "grad_norm": 0.24408268820201984, + "learning_rate": 2.066192737398171e-05, + "loss": 0.4408, + "step": 7661 + }, + { + "epoch": 2.144416456759026, + "grad_norm": 0.24501183545718927, + "learning_rate": 2.0649437679533202e-05, + "loss": 0.4641, + "step": 7662 + }, + { + "epoch": 2.14469633361321, + "grad_norm": 0.2444438500568767, + "learning_rate": 2.0636950778687647e-05, + "loss": 0.4631, + "step": 7663 + }, + { + "epoch": 2.1449762104673944, + "grad_norm": 0.23872278645009623, + "learning_rate": 2.0624466672633552e-05, + "loss": 0.4451, + "step": 7664 + }, + { + "epoch": 2.1452560873215787, + "grad_norm": 0.24839893809469274, + "learning_rate": 2.0611985362559166e-05, + "loss": 0.474, + "step": 7665 + }, + { + "epoch": 2.1455359641757625, + "grad_norm": 0.23810533617093482, + "learning_rate": 2.0599506849652456e-05, + "loss": 0.4435, + "step": 7666 + }, + { + "epoch": 2.145815841029947, + "grad_norm": 0.24728729741769404, + "learning_rate": 2.058703113510114e-05, + "loss": 0.4569, + "step": 7667 + }, + { + "epoch": 2.146095717884131, + "grad_norm": 0.24522991057602664, + "learning_rate": 2.0574558220092665e-05, + "loss": 0.4512, + "step": 7668 + }, + { + "epoch": 2.146375594738315, + "grad_norm": 0.2452430298428381, + "learning_rate": 2.0562088105814213e-05, + "loss": 0.4384, + "step": 7669 + }, + { + "epoch": 2.1466554715924993, + "grad_norm": 0.25861421447453514, + "learning_rate": 2.054962079345269e-05, + "loss": 0.4789, + "step": 7670 + }, + { + "epoch": 2.1469353484466835, + "grad_norm": 0.24613330428350227, + "learning_rate": 2.0537156284194743e-05, + "loss": 0.4484, + "step": 7671 + }, + { + "epoch": 2.147215225300868, + "grad_norm": 0.24725153809854933, + "learning_rate": 2.052469457922675e-05, + "loss": 0.4309, + "step": 7672 + }, + { + "epoch": 2.1474951021550517, + "grad_norm": 0.24838110219764029, + "learning_rate": 2.0512235679734825e-05, + "loss": 0.445, + "step": 7673 + }, + { + "epoch": 2.147774979009236, + "grad_norm": 0.25241695717372814, + "learning_rate": 2.0499779586904815e-05, + "loss": 0.458, + "step": 7674 + }, + { + "epoch": 2.1480548558634203, + "grad_norm": 0.2365323819303878, + "learning_rate": 2.04873263019223e-05, + "loss": 0.4746, + "step": 7675 + }, + { + "epoch": 2.148334732717604, + "grad_norm": 0.25602552930195377, + "learning_rate": 2.047487582597258e-05, + "loss": 0.4699, + "step": 7676 + }, + { + "epoch": 2.1486146095717884, + "grad_norm": 0.24166625285944415, + "learning_rate": 2.046242816024071e-05, + "loss": 0.4457, + "step": 7677 + }, + { + "epoch": 2.1488944864259727, + "grad_norm": 0.25305605131513187, + "learning_rate": 2.0449983305911457e-05, + "loss": 0.4777, + "step": 7678 + }, + { + "epoch": 2.1491743632801565, + "grad_norm": 0.2460520677445608, + "learning_rate": 2.043754126416933e-05, + "loss": 0.4438, + "step": 7679 + }, + { + "epoch": 2.149454240134341, + "grad_norm": 0.2519080224748392, + "learning_rate": 2.042510203619858e-05, + "loss": 0.4483, + "step": 7680 + }, + { + "epoch": 2.149734116988525, + "grad_norm": 0.24991720339072024, + "learning_rate": 2.0412665623183162e-05, + "loss": 0.4575, + "step": 7681 + }, + { + "epoch": 2.1500139938427094, + "grad_norm": 0.24773907482820717, + "learning_rate": 2.04002320263068e-05, + "loss": 0.4661, + "step": 7682 + }, + { + "epoch": 2.1502938706968933, + "grad_norm": 0.2581008504331832, + "learning_rate": 2.038780124675292e-05, + "loss": 0.4512, + "step": 7683 + }, + { + "epoch": 2.1505737475510776, + "grad_norm": 0.2592193497806851, + "learning_rate": 2.0375373285704685e-05, + "loss": 0.4794, + "step": 7684 + }, + { + "epoch": 2.150853624405262, + "grad_norm": 0.24498764169014103, + "learning_rate": 2.036294814434501e-05, + "loss": 0.4489, + "step": 7685 + }, + { + "epoch": 2.1511335012594457, + "grad_norm": 0.24997505633995695, + "learning_rate": 2.0350525823856516e-05, + "loss": 0.4485, + "step": 7686 + }, + { + "epoch": 2.15141337811363, + "grad_norm": 0.24728319568293508, + "learning_rate": 2.033810632542157e-05, + "loss": 0.4411, + "step": 7687 + }, + { + "epoch": 2.1516932549678143, + "grad_norm": 0.23170605418281978, + "learning_rate": 2.0325689650222268e-05, + "loss": 0.4514, + "step": 7688 + }, + { + "epoch": 2.151973131821998, + "grad_norm": 0.2410678794367316, + "learning_rate": 2.0313275799440435e-05, + "loss": 0.4482, + "step": 7689 + }, + { + "epoch": 2.1522530086761824, + "grad_norm": 0.2439369810038682, + "learning_rate": 2.030086477425762e-05, + "loss": 0.4494, + "step": 7690 + }, + { + "epoch": 2.1525328855303667, + "grad_norm": 0.24753795944884796, + "learning_rate": 2.028845657585513e-05, + "loss": 0.4581, + "step": 7691 + }, + { + "epoch": 2.152812762384551, + "grad_norm": 0.24646798280703414, + "learning_rate": 2.0276051205413967e-05, + "loss": 0.447, + "step": 7692 + }, + { + "epoch": 2.153092639238735, + "grad_norm": 0.24227431911441139, + "learning_rate": 2.0263648664114886e-05, + "loss": 0.4586, + "step": 7693 + }, + { + "epoch": 2.153372516092919, + "grad_norm": 0.23730019959175033, + "learning_rate": 2.0251248953138374e-05, + "loss": 0.434, + "step": 7694 + }, + { + "epoch": 2.1536523929471034, + "grad_norm": 0.2549009708513573, + "learning_rate": 2.023885207366464e-05, + "loss": 0.4504, + "step": 7695 + }, + { + "epoch": 2.1539322698012873, + "grad_norm": 0.2433620312879842, + "learning_rate": 2.0226458026873616e-05, + "loss": 0.4591, + "step": 7696 + }, + { + "epoch": 2.1542121466554716, + "grad_norm": 0.24673402406570474, + "learning_rate": 2.0214066813944988e-05, + "loss": 0.4336, + "step": 7697 + }, + { + "epoch": 2.154492023509656, + "grad_norm": 0.25424975467360705, + "learning_rate": 2.0201678436058148e-05, + "loss": 0.4394, + "step": 7698 + }, + { + "epoch": 2.1547719003638397, + "grad_norm": 0.24143025981613622, + "learning_rate": 2.0189292894392238e-05, + "loss": 0.45, + "step": 7699 + }, + { + "epoch": 2.155051777218024, + "grad_norm": 0.254837983121012, + "learning_rate": 2.0176910190126114e-05, + "loss": 0.4801, + "step": 7700 + }, + { + "epoch": 2.1553316540722083, + "grad_norm": 0.25871164567727056, + "learning_rate": 2.0164530324438368e-05, + "loss": 0.464, + "step": 7701 + }, + { + "epoch": 2.1556115309263926, + "grad_norm": 0.2584209085151839, + "learning_rate": 2.0152153298507324e-05, + "loss": 0.464, + "step": 7702 + }, + { + "epoch": 2.1558914077805764, + "grad_norm": 0.25135649506036073, + "learning_rate": 2.0139779113511044e-05, + "loss": 0.4612, + "step": 7703 + }, + { + "epoch": 2.1561712846347607, + "grad_norm": 0.24974697379436328, + "learning_rate": 2.0127407770627298e-05, + "loss": 0.4583, + "step": 7704 + }, + { + "epoch": 2.156451161488945, + "grad_norm": 0.2485539660436353, + "learning_rate": 2.01150392710336e-05, + "loss": 0.4574, + "step": 7705 + }, + { + "epoch": 2.156731038343129, + "grad_norm": 0.2536019454279282, + "learning_rate": 2.0102673615907193e-05, + "loss": 0.46, + "step": 7706 + }, + { + "epoch": 2.157010915197313, + "grad_norm": 0.24308067268202438, + "learning_rate": 2.009031080642504e-05, + "loss": 0.4472, + "step": 7707 + }, + { + "epoch": 2.1572907920514974, + "grad_norm": 0.26105563418332145, + "learning_rate": 2.0077950843763847e-05, + "loss": 0.4723, + "step": 7708 + }, + { + "epoch": 2.1575706689056817, + "grad_norm": 0.25978300743181865, + "learning_rate": 2.0065593729100046e-05, + "loss": 0.4697, + "step": 7709 + }, + { + "epoch": 2.1578505457598656, + "grad_norm": 0.25504147037350067, + "learning_rate": 2.0053239463609785e-05, + "loss": 0.448, + "step": 7710 + }, + { + "epoch": 2.15813042261405, + "grad_norm": 0.2461420145940867, + "learning_rate": 2.0040888048468954e-05, + "loss": 0.4568, + "step": 7711 + }, + { + "epoch": 2.158410299468234, + "grad_norm": 0.2401419650973817, + "learning_rate": 2.002853948485317e-05, + "loss": 0.4349, + "step": 7712 + }, + { + "epoch": 2.158690176322418, + "grad_norm": 0.2513144966848644, + "learning_rate": 2.0016193773937776e-05, + "loss": 0.4682, + "step": 7713 + }, + { + "epoch": 2.1589700531766023, + "grad_norm": 0.2536880790577134, + "learning_rate": 2.000385091689783e-05, + "loss": 0.4559, + "step": 7714 + }, + { + "epoch": 2.1592499300307866, + "grad_norm": 0.25917030427131443, + "learning_rate": 1.999151091490815e-05, + "loss": 0.4528, + "step": 7715 + }, + { + "epoch": 2.1595298068849704, + "grad_norm": 0.2501556664053448, + "learning_rate": 1.997917376914326e-05, + "loss": 0.4461, + "step": 7716 + }, + { + "epoch": 2.1598096837391547, + "grad_norm": 0.24738787444597712, + "learning_rate": 1.9966839480777415e-05, + "loss": 0.467, + "step": 7717 + }, + { + "epoch": 2.160089560593339, + "grad_norm": 0.23323438252697498, + "learning_rate": 1.9954508050984592e-05, + "loss": 0.4322, + "step": 7718 + }, + { + "epoch": 2.160369437447523, + "grad_norm": 0.2458908563658238, + "learning_rate": 1.9942179480938517e-05, + "loss": 0.4654, + "step": 7719 + }, + { + "epoch": 2.160649314301707, + "grad_norm": 0.2584866831872372, + "learning_rate": 1.992985377181262e-05, + "loss": 0.4725, + "step": 7720 + }, + { + "epoch": 2.1609291911558914, + "grad_norm": 0.243870700648934, + "learning_rate": 1.991753092478007e-05, + "loss": 0.4391, + "step": 7721 + }, + { + "epoch": 2.1612090680100757, + "grad_norm": 0.2420300015222653, + "learning_rate": 1.9905210941013765e-05, + "loss": 0.4289, + "step": 7722 + }, + { + "epoch": 2.1614889448642596, + "grad_norm": 0.2511378627198556, + "learning_rate": 1.989289382168633e-05, + "loss": 0.4623, + "step": 7723 + }, + { + "epoch": 2.161768821718444, + "grad_norm": 0.24696892614899602, + "learning_rate": 1.988057956797011e-05, + "loss": 0.4542, + "step": 7724 + }, + { + "epoch": 2.162048698572628, + "grad_norm": 0.24678778236656013, + "learning_rate": 1.9868268181037185e-05, + "loss": 0.4601, + "step": 7725 + }, + { + "epoch": 2.162328575426812, + "grad_norm": 0.2512369548355241, + "learning_rate": 1.9855959662059365e-05, + "loss": 0.4589, + "step": 7726 + }, + { + "epoch": 2.1626084522809963, + "grad_norm": 0.25344596384048185, + "learning_rate": 1.9843654012208173e-05, + "loss": 0.4461, + "step": 7727 + }, + { + "epoch": 2.1628883291351806, + "grad_norm": 0.23938430816339948, + "learning_rate": 1.9831351232654872e-05, + "loss": 0.47, + "step": 7728 + }, + { + "epoch": 2.163168205989365, + "grad_norm": 0.2445065327742629, + "learning_rate": 1.9819051324570443e-05, + "loss": 0.4535, + "step": 7729 + }, + { + "epoch": 2.1634480828435487, + "grad_norm": 0.24813418604265364, + "learning_rate": 1.9806754289125605e-05, + "loss": 0.4763, + "step": 7730 + }, + { + "epoch": 2.163727959697733, + "grad_norm": 0.2515889228984909, + "learning_rate": 1.9794460127490794e-05, + "loss": 0.4625, + "step": 7731 + }, + { + "epoch": 2.1640078365519173, + "grad_norm": 0.243327037487538, + "learning_rate": 1.978216884083618e-05, + "loss": 0.446, + "step": 7732 + }, + { + "epoch": 2.164287713406101, + "grad_norm": 0.24996788209547616, + "learning_rate": 1.976988043033164e-05, + "loss": 0.4591, + "step": 7733 + }, + { + "epoch": 2.1645675902602854, + "grad_norm": 0.2592706923058631, + "learning_rate": 1.9757594897146807e-05, + "loss": 0.48, + "step": 7734 + }, + { + "epoch": 2.1648474671144697, + "grad_norm": 0.26093538918194764, + "learning_rate": 1.974531224245102e-05, + "loss": 0.4649, + "step": 7735 + }, + { + "epoch": 2.1651273439686536, + "grad_norm": 0.25467004494827217, + "learning_rate": 1.9733032467413343e-05, + "loss": 0.4467, + "step": 7736 + }, + { + "epoch": 2.165407220822838, + "grad_norm": 0.24955975582786427, + "learning_rate": 1.972075557320258e-05, + "loss": 0.4568, + "step": 7737 + }, + { + "epoch": 2.165687097677022, + "grad_norm": 0.2391669143405891, + "learning_rate": 1.9708481560987245e-05, + "loss": 0.448, + "step": 7738 + }, + { + "epoch": 2.1659669745312065, + "grad_norm": 0.25376949522374376, + "learning_rate": 1.9696210431935595e-05, + "loss": 0.4708, + "step": 7739 + }, + { + "epoch": 2.1662468513853903, + "grad_norm": 0.25206443679063284, + "learning_rate": 1.9683942187215597e-05, + "loss": 0.48, + "step": 7740 + }, + { + "epoch": 2.1665267282395746, + "grad_norm": 0.24860030256079377, + "learning_rate": 1.967167682799495e-05, + "loss": 0.4839, + "step": 7741 + }, + { + "epoch": 2.166806605093759, + "grad_norm": 0.2585097787688693, + "learning_rate": 1.965941435544108e-05, + "loss": 0.4772, + "step": 7742 + }, + { + "epoch": 2.1670864819479427, + "grad_norm": 0.2354632217802597, + "learning_rate": 1.964715477072113e-05, + "loss": 0.4414, + "step": 7743 + }, + { + "epoch": 2.167366358802127, + "grad_norm": 0.23677024749952494, + "learning_rate": 1.9634898075001967e-05, + "loss": 0.4629, + "step": 7744 + }, + { + "epoch": 2.1676462356563113, + "grad_norm": 0.24593218395377459, + "learning_rate": 1.962264426945023e-05, + "loss": 0.4526, + "step": 7745 + }, + { + "epoch": 2.1679261125104956, + "grad_norm": 0.24647489372050965, + "learning_rate": 1.96103933552322e-05, + "loss": 0.4613, + "step": 7746 + }, + { + "epoch": 2.1682059893646795, + "grad_norm": 0.2577261946690843, + "learning_rate": 1.959814533351394e-05, + "loss": 0.4628, + "step": 7747 + }, + { + "epoch": 2.1684858662188637, + "grad_norm": 0.24898198594961482, + "learning_rate": 1.9585900205461223e-05, + "loss": 0.4615, + "step": 7748 + }, + { + "epoch": 2.168765743073048, + "grad_norm": 0.244537865743805, + "learning_rate": 1.9573657972239546e-05, + "loss": 0.4633, + "step": 7749 + }, + { + "epoch": 2.169045619927232, + "grad_norm": 0.26205331576804664, + "learning_rate": 1.956141863501414e-05, + "loss": 0.4577, + "step": 7750 + }, + { + "epoch": 2.169325496781416, + "grad_norm": 0.25195861776390166, + "learning_rate": 1.954918219494994e-05, + "loss": 0.4781, + "step": 7751 + }, + { + "epoch": 2.1696053736356005, + "grad_norm": 0.24194083733508762, + "learning_rate": 1.9536948653211623e-05, + "loss": 0.4428, + "step": 7752 + }, + { + "epoch": 2.1698852504897843, + "grad_norm": 0.2594792382723162, + "learning_rate": 1.9524718010963583e-05, + "loss": 0.4777, + "step": 7753 + }, + { + "epoch": 2.1701651273439686, + "grad_norm": 0.25071856407895177, + "learning_rate": 1.9512490269369944e-05, + "loss": 0.4666, + "step": 7754 + }, + { + "epoch": 2.170445004198153, + "grad_norm": 0.23606933457195933, + "learning_rate": 1.9500265429594543e-05, + "loss": 0.4551, + "step": 7755 + }, + { + "epoch": 2.1707248810523367, + "grad_norm": 0.2462499962828051, + "learning_rate": 1.948804349280095e-05, + "loss": 0.4512, + "step": 7756 + }, + { + "epoch": 2.171004757906521, + "grad_norm": 0.24669402234344157, + "learning_rate": 1.9475824460152458e-05, + "loss": 0.4508, + "step": 7757 + }, + { + "epoch": 2.1712846347607053, + "grad_norm": 0.24447062976228986, + "learning_rate": 1.946360833281208e-05, + "loss": 0.4598, + "step": 7758 + }, + { + "epoch": 2.1715645116148896, + "grad_norm": 0.24508799052581162, + "learning_rate": 1.945139511194255e-05, + "loss": 0.4523, + "step": 7759 + }, + { + "epoch": 2.1718443884690735, + "grad_norm": 0.2494302055038184, + "learning_rate": 1.9439184798706334e-05, + "loss": 0.4466, + "step": 7760 + }, + { + "epoch": 2.1721242653232578, + "grad_norm": 0.24283933159113955, + "learning_rate": 1.9426977394265593e-05, + "loss": 0.4418, + "step": 7761 + }, + { + "epoch": 2.172404142177442, + "grad_norm": 0.23621243570705355, + "learning_rate": 1.9414772899782276e-05, + "loss": 0.4496, + "step": 7762 + }, + { + "epoch": 2.172684019031626, + "grad_norm": 0.2372523608522219, + "learning_rate": 1.940257131641801e-05, + "loss": 0.4512, + "step": 7763 + }, + { + "epoch": 2.17296389588581, + "grad_norm": 0.24828497828490664, + "learning_rate": 1.939037264533412e-05, + "loss": 0.4538, + "step": 7764 + }, + { + "epoch": 2.1732437727399945, + "grad_norm": 0.25129769055341117, + "learning_rate": 1.937817688769169e-05, + "loss": 0.4795, + "step": 7765 + }, + { + "epoch": 2.1735236495941788, + "grad_norm": 0.24392408891951042, + "learning_rate": 1.9365984044651525e-05, + "loss": 0.4293, + "step": 7766 + }, + { + "epoch": 2.1738035264483626, + "grad_norm": 0.25498994034625544, + "learning_rate": 1.935379411737414e-05, + "loss": 0.4562, + "step": 7767 + }, + { + "epoch": 2.174083403302547, + "grad_norm": 0.2446279437951083, + "learning_rate": 1.9341607107019794e-05, + "loss": 0.4364, + "step": 7768 + }, + { + "epoch": 2.174363280156731, + "grad_norm": 0.2493656735787167, + "learning_rate": 1.9329423014748437e-05, + "loss": 0.455, + "step": 7769 + }, + { + "epoch": 2.174643157010915, + "grad_norm": 0.2557171108570862, + "learning_rate": 1.9317241841719768e-05, + "loss": 0.4582, + "step": 7770 + }, + { + "epoch": 2.1749230338650993, + "grad_norm": 0.24847114191914865, + "learning_rate": 1.930506358909319e-05, + "loss": 0.4464, + "step": 7771 + }, + { + "epoch": 2.1752029107192836, + "grad_norm": 0.24827474418241086, + "learning_rate": 1.9292888258027842e-05, + "loss": 0.4604, + "step": 7772 + }, + { + "epoch": 2.1754827875734675, + "grad_norm": 0.2562796116528402, + "learning_rate": 1.928071584968258e-05, + "loss": 0.4394, + "step": 7773 + }, + { + "epoch": 2.1757626644276518, + "grad_norm": 0.25578007725600227, + "learning_rate": 1.9268546365215978e-05, + "loss": 0.4388, + "step": 7774 + }, + { + "epoch": 2.176042541281836, + "grad_norm": 0.24884757763501225, + "learning_rate": 1.925637980578633e-05, + "loss": 0.4467, + "step": 7775 + }, + { + "epoch": 2.1763224181360203, + "grad_norm": 0.2606704877742673, + "learning_rate": 1.924421617255166e-05, + "loss": 0.478, + "step": 7776 + }, + { + "epoch": 2.176602294990204, + "grad_norm": 0.25730842774347135, + "learning_rate": 1.9232055466669714e-05, + "loss": 0.4363, + "step": 7777 + }, + { + "epoch": 2.1768821718443885, + "grad_norm": 0.24536639692149034, + "learning_rate": 1.9219897689297944e-05, + "loss": 0.464, + "step": 7778 + }, + { + "epoch": 2.1771620486985728, + "grad_norm": 0.2635693184531069, + "learning_rate": 1.920774284159353e-05, + "loss": 0.4578, + "step": 7779 + }, + { + "epoch": 2.1774419255527566, + "grad_norm": 0.2508380517486447, + "learning_rate": 1.9195590924713403e-05, + "loss": 0.4511, + "step": 7780 + }, + { + "epoch": 2.177721802406941, + "grad_norm": 0.24561353911377817, + "learning_rate": 1.918344193981419e-05, + "loss": 0.4449, + "step": 7781 + }, + { + "epoch": 2.178001679261125, + "grad_norm": 0.2515938483646392, + "learning_rate": 1.9171295888052205e-05, + "loss": 0.4856, + "step": 7782 + }, + { + "epoch": 2.1782815561153095, + "grad_norm": 0.2554487519001131, + "learning_rate": 1.9159152770583528e-05, + "loss": 0.4558, + "step": 7783 + }, + { + "epoch": 2.1785614329694933, + "grad_norm": 0.24599832289776138, + "learning_rate": 1.9147012588563955e-05, + "loss": 0.4538, + "step": 7784 + }, + { + "epoch": 2.1788413098236776, + "grad_norm": 0.24827797552563804, + "learning_rate": 1.9134875343149e-05, + "loss": 0.4643, + "step": 7785 + }, + { + "epoch": 2.179121186677862, + "grad_norm": 0.2423392026682831, + "learning_rate": 1.912274103549388e-05, + "loss": 0.4634, + "step": 7786 + }, + { + "epoch": 2.1794010635320458, + "grad_norm": 0.2530638548225227, + "learning_rate": 1.911060966675355e-05, + "loss": 0.4401, + "step": 7787 + }, + { + "epoch": 2.17968094038623, + "grad_norm": 0.24564953024856784, + "learning_rate": 1.9098481238082684e-05, + "loss": 0.4578, + "step": 7788 + }, + { + "epoch": 2.1799608172404144, + "grad_norm": 0.2596550701569648, + "learning_rate": 1.908635575063567e-05, + "loss": 0.4594, + "step": 7789 + }, + { + "epoch": 2.180240694094598, + "grad_norm": 0.2498892016284524, + "learning_rate": 1.9074233205566617e-05, + "loss": 0.4528, + "step": 7790 + }, + { + "epoch": 2.1805205709487825, + "grad_norm": 0.2464999319142464, + "learning_rate": 1.906211360402936e-05, + "loss": 0.4565, + "step": 7791 + }, + { + "epoch": 2.180800447802967, + "grad_norm": 0.24425654605345726, + "learning_rate": 1.9049996947177444e-05, + "loss": 0.4693, + "step": 7792 + }, + { + "epoch": 2.1810803246571506, + "grad_norm": 0.24228558200186168, + "learning_rate": 1.9037883236164146e-05, + "loss": 0.4416, + "step": 7793 + }, + { + "epoch": 2.181360201511335, + "grad_norm": 0.24514792986010997, + "learning_rate": 1.9025772472142443e-05, + "loss": 0.4393, + "step": 7794 + }, + { + "epoch": 2.181640078365519, + "grad_norm": 0.25220357421385875, + "learning_rate": 1.9013664656265063e-05, + "loss": 0.457, + "step": 7795 + }, + { + "epoch": 2.1819199552197035, + "grad_norm": 0.2540426741164844, + "learning_rate": 1.9001559789684404e-05, + "loss": 0.4655, + "step": 7796 + }, + { + "epoch": 2.1821998320738873, + "grad_norm": 0.25053459425933816, + "learning_rate": 1.8989457873552652e-05, + "loss": 0.4565, + "step": 7797 + }, + { + "epoch": 2.1824797089280716, + "grad_norm": 0.24788181896230466, + "learning_rate": 1.897735890902166e-05, + "loss": 0.468, + "step": 7798 + }, + { + "epoch": 2.182759585782256, + "grad_norm": 0.24620568822923727, + "learning_rate": 1.8965262897243023e-05, + "loss": 0.445, + "step": 7799 + }, + { + "epoch": 2.1830394626364398, + "grad_norm": 0.24546099937554955, + "learning_rate": 1.895316983936802e-05, + "loss": 0.4381, + "step": 7800 + }, + { + "epoch": 2.183319339490624, + "grad_norm": 0.25539167735130275, + "learning_rate": 1.894107973654769e-05, + "loss": 0.4499, + "step": 7801 + }, + { + "epoch": 2.1835992163448084, + "grad_norm": 0.251713145144343, + "learning_rate": 1.8928992589932772e-05, + "loss": 0.4373, + "step": 7802 + }, + { + "epoch": 2.1838790931989926, + "grad_norm": 0.25427802966282137, + "learning_rate": 1.8916908400673733e-05, + "loss": 0.473, + "step": 7803 + }, + { + "epoch": 2.1841589700531765, + "grad_norm": 0.2452132412022567, + "learning_rate": 1.8904827169920748e-05, + "loss": 0.4378, + "step": 7804 + }, + { + "epoch": 2.184438846907361, + "grad_norm": 0.24816561372083384, + "learning_rate": 1.889274889882372e-05, + "loss": 0.4458, + "step": 7805 + }, + { + "epoch": 2.184718723761545, + "grad_norm": 0.2667112882195227, + "learning_rate": 1.888067358853226e-05, + "loss": 0.4531, + "step": 7806 + }, + { + "epoch": 2.184998600615729, + "grad_norm": 0.2402304873279501, + "learning_rate": 1.886860124019571e-05, + "loss": 0.4479, + "step": 7807 + }, + { + "epoch": 2.185278477469913, + "grad_norm": 0.25721229276055896, + "learning_rate": 1.8856531854963123e-05, + "loss": 0.4583, + "step": 7808 + }, + { + "epoch": 2.1855583543240975, + "grad_norm": 0.25904591171456187, + "learning_rate": 1.8844465433983256e-05, + "loss": 0.4666, + "step": 7809 + }, + { + "epoch": 2.1858382311782814, + "grad_norm": 0.24044364905075324, + "learning_rate": 1.8832401978404612e-05, + "loss": 0.4395, + "step": 7810 + }, + { + "epoch": 2.1861181080324656, + "grad_norm": 0.2542181852723932, + "learning_rate": 1.882034148937539e-05, + "loss": 0.4665, + "step": 7811 + }, + { + "epoch": 2.18639798488665, + "grad_norm": 0.24055634571479526, + "learning_rate": 1.8808283968043528e-05, + "loss": 0.4708, + "step": 7812 + }, + { + "epoch": 2.1866778617408342, + "grad_norm": 0.2478938980240328, + "learning_rate": 1.8796229415556628e-05, + "loss": 0.4628, + "step": 7813 + }, + { + "epoch": 2.186957738595018, + "grad_norm": 0.2559188829558101, + "learning_rate": 1.87841778330621e-05, + "loss": 0.4548, + "step": 7814 + }, + { + "epoch": 2.1872376154492024, + "grad_norm": 0.2384135511042799, + "learning_rate": 1.8772129221706997e-05, + "loss": 0.4675, + "step": 7815 + }, + { + "epoch": 2.1875174923033867, + "grad_norm": 0.24523900409859284, + "learning_rate": 1.876008358263811e-05, + "loss": 0.4465, + "step": 7816 + }, + { + "epoch": 2.1877973691575705, + "grad_norm": 0.2452968286397749, + "learning_rate": 1.874804091700196e-05, + "loss": 0.452, + "step": 7817 + }, + { + "epoch": 2.188077246011755, + "grad_norm": 0.2545186025574785, + "learning_rate": 1.8736001225944783e-05, + "loss": 0.4547, + "step": 7818 + }, + { + "epoch": 2.188357122865939, + "grad_norm": 0.24134995755574007, + "learning_rate": 1.872396451061249e-05, + "loss": 0.4534, + "step": 7819 + }, + { + "epoch": 2.1886369997201234, + "grad_norm": 0.23698879149519445, + "learning_rate": 1.871193077215076e-05, + "loss": 0.4475, + "step": 7820 + }, + { + "epoch": 2.1889168765743072, + "grad_norm": 0.24258602822455302, + "learning_rate": 1.8699900011704972e-05, + "loss": 0.4307, + "step": 7821 + }, + { + "epoch": 2.1891967534284915, + "grad_norm": 0.24370852116856703, + "learning_rate": 1.8687872230420224e-05, + "loss": 0.4689, + "step": 7822 + }, + { + "epoch": 2.189476630282676, + "grad_norm": 0.24094433935092024, + "learning_rate": 1.8675847429441317e-05, + "loss": 0.4524, + "step": 7823 + }, + { + "epoch": 2.1897565071368597, + "grad_norm": 0.2393943275153635, + "learning_rate": 1.866382560991279e-05, + "loss": 0.4646, + "step": 7824 + }, + { + "epoch": 2.190036383991044, + "grad_norm": 0.24600322975573322, + "learning_rate": 1.865180677297888e-05, + "loss": 0.4417, + "step": 7825 + }, + { + "epoch": 2.1903162608452282, + "grad_norm": 0.23653467949928408, + "learning_rate": 1.8639790919783547e-05, + "loss": 0.4421, + "step": 7826 + }, + { + "epoch": 2.190596137699412, + "grad_norm": 0.24844045629034311, + "learning_rate": 1.8627778051470467e-05, + "loss": 0.4658, + "step": 7827 + }, + { + "epoch": 2.1908760145535964, + "grad_norm": 0.25891714351155704, + "learning_rate": 1.861576816918303e-05, + "loss": 0.4512, + "step": 7828 + }, + { + "epoch": 2.1911558914077807, + "grad_norm": 0.24755295731004745, + "learning_rate": 1.8603761274064347e-05, + "loss": 0.4445, + "step": 7829 + }, + { + "epoch": 2.1914357682619645, + "grad_norm": 0.24871185991226788, + "learning_rate": 1.859175736725724e-05, + "loss": 0.4607, + "step": 7830 + }, + { + "epoch": 2.191715645116149, + "grad_norm": 0.2536417126447717, + "learning_rate": 1.8579756449904227e-05, + "loss": 0.456, + "step": 7831 + }, + { + "epoch": 2.191995521970333, + "grad_norm": 0.2533466720717258, + "learning_rate": 1.8567758523147606e-05, + "loss": 0.487, + "step": 7832 + }, + { + "epoch": 2.1922753988245174, + "grad_norm": 0.2722920814139466, + "learning_rate": 1.8555763588129316e-05, + "loss": 0.4738, + "step": 7833 + }, + { + "epoch": 2.1925552756787012, + "grad_norm": 0.24499078426528714, + "learning_rate": 1.8543771645991047e-05, + "loss": 0.4573, + "step": 7834 + }, + { + "epoch": 2.1928351525328855, + "grad_norm": 0.24114442727471078, + "learning_rate": 1.85317826978742e-05, + "loss": 0.4557, + "step": 7835 + }, + { + "epoch": 2.19311502938707, + "grad_norm": 0.2479406770568865, + "learning_rate": 1.851979674491991e-05, + "loss": 0.45, + "step": 7836 + }, + { + "epoch": 2.1933949062412537, + "grad_norm": 0.24512816090760622, + "learning_rate": 1.8507813788268967e-05, + "loss": 0.467, + "step": 7837 + }, + { + "epoch": 2.193674783095438, + "grad_norm": 0.2488999454409054, + "learning_rate": 1.8495833829061937e-05, + "loss": 0.4659, + "step": 7838 + }, + { + "epoch": 2.1939546599496222, + "grad_norm": 0.2525505742883965, + "learning_rate": 1.848385686843907e-05, + "loss": 0.45, + "step": 7839 + }, + { + "epoch": 2.1942345368038065, + "grad_norm": 0.25118705308969275, + "learning_rate": 1.8471882907540355e-05, + "loss": 0.4492, + "step": 7840 + }, + { + "epoch": 2.1945144136579904, + "grad_norm": 0.24164285045356698, + "learning_rate": 1.8459911947505464e-05, + "loss": 0.4776, + "step": 7841 + }, + { + "epoch": 2.1947942905121747, + "grad_norm": 0.24733942385268706, + "learning_rate": 1.8447943989473814e-05, + "loss": 0.4739, + "step": 7842 + }, + { + "epoch": 2.195074167366359, + "grad_norm": 0.25025012948468583, + "learning_rate": 1.843597903458451e-05, + "loss": 0.4424, + "step": 7843 + }, + { + "epoch": 2.195354044220543, + "grad_norm": 0.24455113988511687, + "learning_rate": 1.8424017083976393e-05, + "loss": 0.467, + "step": 7844 + }, + { + "epoch": 2.195633921074727, + "grad_norm": 0.24870464909046316, + "learning_rate": 1.8412058138788003e-05, + "loss": 0.4566, + "step": 7845 + }, + { + "epoch": 2.1959137979289114, + "grad_norm": 0.25326316139271077, + "learning_rate": 1.8400102200157598e-05, + "loss": 0.44, + "step": 7846 + }, + { + "epoch": 2.1961936747830952, + "grad_norm": 0.24756143910448022, + "learning_rate": 1.8388149269223153e-05, + "loss": 0.4629, + "step": 7847 + }, + { + "epoch": 2.1964735516372795, + "grad_norm": 0.2494077946205501, + "learning_rate": 1.837619934712234e-05, + "loss": 0.4901, + "step": 7848 + }, + { + "epoch": 2.196753428491464, + "grad_norm": 0.2571123514241369, + "learning_rate": 1.836425243499259e-05, + "loss": 0.4811, + "step": 7849 + }, + { + "epoch": 2.197033305345648, + "grad_norm": 0.2504517733448195, + "learning_rate": 1.8352308533971002e-05, + "loss": 0.4354, + "step": 7850 + }, + { + "epoch": 2.197313182199832, + "grad_norm": 0.26024904980336894, + "learning_rate": 1.83403676451944e-05, + "loss": 0.4561, + "step": 7851 + }, + { + "epoch": 2.1975930590540163, + "grad_norm": 0.24541373901313274, + "learning_rate": 1.8328429769799323e-05, + "loss": 0.4544, + "step": 7852 + }, + { + "epoch": 2.1978729359082005, + "grad_norm": 0.24228993703254692, + "learning_rate": 1.8316494908922033e-05, + "loss": 0.4323, + "step": 7853 + }, + { + "epoch": 2.1981528127623844, + "grad_norm": 0.2511516559277119, + "learning_rate": 1.8304563063698504e-05, + "loss": 0.4675, + "step": 7854 + }, + { + "epoch": 2.1984326896165687, + "grad_norm": 0.25999360726883414, + "learning_rate": 1.829263423526439e-05, + "loss": 0.4498, + "step": 7855 + }, + { + "epoch": 2.198712566470753, + "grad_norm": 0.2439316770122664, + "learning_rate": 1.8280708424755095e-05, + "loss": 0.4481, + "step": 7856 + }, + { + "epoch": 2.1989924433249373, + "grad_norm": 0.24385697889986588, + "learning_rate": 1.826878563330573e-05, + "loss": 0.4461, + "step": 7857 + }, + { + "epoch": 2.199272320179121, + "grad_norm": 0.25325081441565767, + "learning_rate": 1.8256865862051105e-05, + "loss": 0.4598, + "step": 7858 + }, + { + "epoch": 2.1995521970333054, + "grad_norm": 0.25207055701347525, + "learning_rate": 1.8244949112125752e-05, + "loss": 0.4571, + "step": 7859 + }, + { + "epoch": 2.1998320738874897, + "grad_norm": 0.23878169220599021, + "learning_rate": 1.823303538466392e-05, + "loss": 0.4497, + "step": 7860 + }, + { + "epoch": 2.2001119507416735, + "grad_norm": 0.24768614949321813, + "learning_rate": 1.8221124680799563e-05, + "loss": 0.462, + "step": 7861 + }, + { + "epoch": 2.200391827595858, + "grad_norm": 0.2528266987490449, + "learning_rate": 1.8209217001666346e-05, + "loss": 0.4702, + "step": 7862 + }, + { + "epoch": 2.200671704450042, + "grad_norm": 0.24943924240071652, + "learning_rate": 1.8197312348397644e-05, + "loss": 0.4637, + "step": 7863 + }, + { + "epoch": 2.200951581304226, + "grad_norm": 0.25086026249232646, + "learning_rate": 1.8185410722126556e-05, + "loss": 0.4616, + "step": 7864 + }, + { + "epoch": 2.2012314581584103, + "grad_norm": 0.23726761680461356, + "learning_rate": 1.8173512123985868e-05, + "loss": 0.4357, + "step": 7865 + }, + { + "epoch": 2.2015113350125946, + "grad_norm": 0.24414251569135859, + "learning_rate": 1.8161616555108124e-05, + "loss": 0.4646, + "step": 7866 + }, + { + "epoch": 2.2017912118667784, + "grad_norm": 0.24469801905230784, + "learning_rate": 1.814972401662554e-05, + "loss": 0.4502, + "step": 7867 + }, + { + "epoch": 2.2020710887209627, + "grad_norm": 0.24314284411565362, + "learning_rate": 1.813783450967005e-05, + "loss": 0.4474, + "step": 7868 + }, + { + "epoch": 2.202350965575147, + "grad_norm": 0.2493523090663002, + "learning_rate": 1.8125948035373302e-05, + "loss": 0.4518, + "step": 7869 + }, + { + "epoch": 2.2026308424293313, + "grad_norm": 0.2501987440792787, + "learning_rate": 1.8114064594866663e-05, + "loss": 0.4492, + "step": 7870 + }, + { + "epoch": 2.202910719283515, + "grad_norm": 0.24608784215228605, + "learning_rate": 1.810218418928121e-05, + "loss": 0.449, + "step": 7871 + }, + { + "epoch": 2.2031905961376994, + "grad_norm": 0.2545219012417861, + "learning_rate": 1.8090306819747727e-05, + "loss": 0.4557, + "step": 7872 + }, + { + "epoch": 2.2034704729918837, + "grad_norm": 0.2498453330510054, + "learning_rate": 1.807843248739669e-05, + "loss": 0.4664, + "step": 7873 + }, + { + "epoch": 2.2037503498460675, + "grad_norm": 0.24678614300565516, + "learning_rate": 1.8066561193358317e-05, + "loss": 0.4325, + "step": 7874 + }, + { + "epoch": 2.204030226700252, + "grad_norm": 0.2479581754897145, + "learning_rate": 1.805469293876252e-05, + "loss": 0.4506, + "step": 7875 + }, + { + "epoch": 2.204310103554436, + "grad_norm": 0.2526399472491942, + "learning_rate": 1.804282772473893e-05, + "loss": 0.459, + "step": 7876 + }, + { + "epoch": 2.2045899804086204, + "grad_norm": 0.2556463232245629, + "learning_rate": 1.8030965552416885e-05, + "loss": 0.4599, + "step": 7877 + }, + { + "epoch": 2.2048698572628043, + "grad_norm": 0.2348266903089208, + "learning_rate": 1.8019106422925436e-05, + "loss": 0.4287, + "step": 7878 + }, + { + "epoch": 2.2051497341169886, + "grad_norm": 0.24295159596583496, + "learning_rate": 1.8007250337393334e-05, + "loss": 0.4525, + "step": 7879 + }, + { + "epoch": 2.205429610971173, + "grad_norm": 0.249595405375553, + "learning_rate": 1.7995397296949052e-05, + "loss": 0.4455, + "step": 7880 + }, + { + "epoch": 2.2057094878253567, + "grad_norm": 0.24467598585723746, + "learning_rate": 1.798354730272077e-05, + "loss": 0.4649, + "step": 7881 + }, + { + "epoch": 2.205989364679541, + "grad_norm": 0.24678481309084288, + "learning_rate": 1.7971700355836376e-05, + "loss": 0.4455, + "step": 7882 + }, + { + "epoch": 2.2062692415337253, + "grad_norm": 0.25572344637615263, + "learning_rate": 1.7959856457423453e-05, + "loss": 0.4407, + "step": 7883 + }, + { + "epoch": 2.206549118387909, + "grad_norm": 0.25686611477700355, + "learning_rate": 1.7948015608609343e-05, + "loss": 0.457, + "step": 7884 + }, + { + "epoch": 2.2068289952420934, + "grad_norm": 0.2357672420576398, + "learning_rate": 1.793617781052105e-05, + "loss": 0.4332, + "step": 7885 + }, + { + "epoch": 2.2071088720962777, + "grad_norm": 0.25389890172651997, + "learning_rate": 1.7924343064285293e-05, + "loss": 0.461, + "step": 7886 + }, + { + "epoch": 2.207388748950462, + "grad_norm": 0.2430272573712817, + "learning_rate": 1.7912511371028524e-05, + "loss": 0.4497, + "step": 7887 + }, + { + "epoch": 2.207668625804646, + "grad_norm": 0.24875172492486222, + "learning_rate": 1.7900682731876877e-05, + "loss": 0.4535, + "step": 7888 + }, + { + "epoch": 2.20794850265883, + "grad_norm": 0.24448566149127557, + "learning_rate": 1.788885714795622e-05, + "loss": 0.4411, + "step": 7889 + }, + { + "epoch": 2.2082283795130144, + "grad_norm": 0.25529872514782653, + "learning_rate": 1.7877034620392126e-05, + "loss": 0.468, + "step": 7890 + }, + { + "epoch": 2.2085082563671983, + "grad_norm": 0.2426010058489337, + "learning_rate": 1.786521515030984e-05, + "loss": 0.4449, + "step": 7891 + }, + { + "epoch": 2.2087881332213826, + "grad_norm": 0.25397649146267437, + "learning_rate": 1.785339873883436e-05, + "loss": 0.4752, + "step": 7892 + }, + { + "epoch": 2.209068010075567, + "grad_norm": 0.2485246127152016, + "learning_rate": 1.784158538709039e-05, + "loss": 0.4601, + "step": 7893 + }, + { + "epoch": 2.2093478869297507, + "grad_norm": 0.25436142306490184, + "learning_rate": 1.782977509620231e-05, + "loss": 0.4622, + "step": 7894 + }, + { + "epoch": 2.209627763783935, + "grad_norm": 0.24936267035405102, + "learning_rate": 1.7817967867294254e-05, + "loss": 0.4475, + "step": 7895 + }, + { + "epoch": 2.2099076406381193, + "grad_norm": 0.2520182646580836, + "learning_rate": 1.7806163701490025e-05, + "loss": 0.4486, + "step": 7896 + }, + { + "epoch": 2.2101875174923036, + "grad_norm": 0.24339898686184286, + "learning_rate": 1.7794362599913155e-05, + "loss": 0.4472, + "step": 7897 + }, + { + "epoch": 2.2104673943464874, + "grad_norm": 0.24708836780938634, + "learning_rate": 1.7782564563686884e-05, + "loss": 0.4584, + "step": 7898 + }, + { + "epoch": 2.2107472712006717, + "grad_norm": 0.24613254514568578, + "learning_rate": 1.7770769593934144e-05, + "loss": 0.4588, + "step": 7899 + }, + { + "epoch": 2.211027148054856, + "grad_norm": 0.2345340256289829, + "learning_rate": 1.775897769177758e-05, + "loss": 0.4554, + "step": 7900 + }, + { + "epoch": 2.21130702490904, + "grad_norm": 0.2553194001094716, + "learning_rate": 1.7747188858339576e-05, + "loss": 0.4535, + "step": 7901 + }, + { + "epoch": 2.211586901763224, + "grad_norm": 0.24910823911685434, + "learning_rate": 1.7735403094742198e-05, + "loss": 0.4518, + "step": 7902 + }, + { + "epoch": 2.2118667786174084, + "grad_norm": 0.24519084778159617, + "learning_rate": 1.7723620402107207e-05, + "loss": 0.4409, + "step": 7903 + }, + { + "epoch": 2.2121466554715923, + "grad_norm": 0.25162372860192306, + "learning_rate": 1.7711840781556092e-05, + "loss": 0.4574, + "step": 7904 + }, + { + "epoch": 2.2124265323257766, + "grad_norm": 0.2557966513214806, + "learning_rate": 1.7700064234210046e-05, + "loss": 0.4605, + "step": 7905 + }, + { + "epoch": 2.212706409179961, + "grad_norm": 0.25660172741415943, + "learning_rate": 1.7688290761189967e-05, + "loss": 0.4504, + "step": 7906 + }, + { + "epoch": 2.212986286034145, + "grad_norm": 0.24517902037664688, + "learning_rate": 1.7676520363616456e-05, + "loss": 0.4448, + "step": 7907 + }, + { + "epoch": 2.213266162888329, + "grad_norm": 0.25686493409749006, + "learning_rate": 1.7664753042609845e-05, + "loss": 0.4542, + "step": 7908 + }, + { + "epoch": 2.2135460397425133, + "grad_norm": 0.24705122241299396, + "learning_rate": 1.7652988799290127e-05, + "loss": 0.4547, + "step": 7909 + }, + { + "epoch": 2.2138259165966976, + "grad_norm": 0.252047790992777, + "learning_rate": 1.7641227634777035e-05, + "loss": 0.4483, + "step": 7910 + }, + { + "epoch": 2.2141057934508814, + "grad_norm": 0.24962222001761322, + "learning_rate": 1.762946955019001e-05, + "loss": 0.4395, + "step": 7911 + }, + { + "epoch": 2.2143856703050657, + "grad_norm": 0.25361686004361983, + "learning_rate": 1.761771454664819e-05, + "loss": 0.4661, + "step": 7912 + }, + { + "epoch": 2.21466554715925, + "grad_norm": 0.25256660077060694, + "learning_rate": 1.7605962625270428e-05, + "loss": 0.4549, + "step": 7913 + }, + { + "epoch": 2.2149454240134343, + "grad_norm": 0.2499270156603375, + "learning_rate": 1.7594213787175274e-05, + "loss": 0.4414, + "step": 7914 + }, + { + "epoch": 2.215225300867618, + "grad_norm": 0.2603825740085603, + "learning_rate": 1.7582468033480992e-05, + "loss": 0.4639, + "step": 7915 + }, + { + "epoch": 2.2155051777218024, + "grad_norm": 0.2451397470222861, + "learning_rate": 1.7570725365305547e-05, + "loss": 0.485, + "step": 7916 + }, + { + "epoch": 2.2157850545759867, + "grad_norm": 0.2529732044457226, + "learning_rate": 1.755898578376659e-05, + "loss": 0.4659, + "step": 7917 + }, + { + "epoch": 2.2160649314301706, + "grad_norm": 0.24203003086204153, + "learning_rate": 1.7547249289981548e-05, + "loss": 0.444, + "step": 7918 + }, + { + "epoch": 2.216344808284355, + "grad_norm": 0.25175761527493534, + "learning_rate": 1.7535515885067484e-05, + "loss": 0.4529, + "step": 7919 + }, + { + "epoch": 2.216624685138539, + "grad_norm": 0.24528838001752543, + "learning_rate": 1.7523785570141182e-05, + "loss": 0.4618, + "step": 7920 + }, + { + "epoch": 2.216904561992723, + "grad_norm": 0.24149357114131356, + "learning_rate": 1.7512058346319148e-05, + "loss": 0.4216, + "step": 7921 + }, + { + "epoch": 2.2171844388469073, + "grad_norm": 0.25314218786784454, + "learning_rate": 1.750033421471759e-05, + "loss": 0.4701, + "step": 7922 + }, + { + "epoch": 2.2174643157010916, + "grad_norm": 0.24908668695012096, + "learning_rate": 1.7488613176452412e-05, + "loss": 0.4356, + "step": 7923 + }, + { + "epoch": 2.217744192555276, + "grad_norm": 0.25202973088004654, + "learning_rate": 1.747689523263923e-05, + "loss": 0.4551, + "step": 7924 + }, + { + "epoch": 2.2180240694094597, + "grad_norm": 0.2521952848999994, + "learning_rate": 1.746518038439336e-05, + "loss": 0.4592, + "step": 7925 + }, + { + "epoch": 2.218303946263644, + "grad_norm": 0.25766929948130207, + "learning_rate": 1.745346863282985e-05, + "loss": 0.4449, + "step": 7926 + }, + { + "epoch": 2.2185838231178283, + "grad_norm": 0.24359965870129346, + "learning_rate": 1.7441759979063392e-05, + "loss": 0.4495, + "step": 7927 + }, + { + "epoch": 2.218863699972012, + "grad_norm": 0.24798138730762323, + "learning_rate": 1.7430054424208447e-05, + "loss": 0.4347, + "step": 7928 + }, + { + "epoch": 2.2191435768261965, + "grad_norm": 0.25300592307291464, + "learning_rate": 1.7418351969379154e-05, + "loss": 0.4432, + "step": 7929 + }, + { + "epoch": 2.2194234536803807, + "grad_norm": 0.2487162651416905, + "learning_rate": 1.7406652615689356e-05, + "loss": 0.4599, + "step": 7930 + }, + { + "epoch": 2.2197033305345646, + "grad_norm": 0.24073338574486924, + "learning_rate": 1.7394956364252602e-05, + "loss": 0.448, + "step": 7931 + }, + { + "epoch": 2.219983207388749, + "grad_norm": 0.2403837745791843, + "learning_rate": 1.7383263216182157e-05, + "loss": 0.4338, + "step": 7932 + }, + { + "epoch": 2.220263084242933, + "grad_norm": 0.24989577766616916, + "learning_rate": 1.737157317259097e-05, + "loss": 0.443, + "step": 7933 + }, + { + "epoch": 2.2205429610971175, + "grad_norm": 0.2515323230954262, + "learning_rate": 1.7359886234591695e-05, + "loss": 0.4535, + "step": 7934 + }, + { + "epoch": 2.2208228379513013, + "grad_norm": 0.25878382581198806, + "learning_rate": 1.7348202403296737e-05, + "loss": 0.4466, + "step": 7935 + }, + { + "epoch": 2.2211027148054856, + "grad_norm": 0.24917245373517516, + "learning_rate": 1.7336521679818147e-05, + "loss": 0.4341, + "step": 7936 + }, + { + "epoch": 2.22138259165967, + "grad_norm": 0.2480932593442455, + "learning_rate": 1.7324844065267708e-05, + "loss": 0.4458, + "step": 7937 + }, + { + "epoch": 2.2216624685138537, + "grad_norm": 0.2512838503594705, + "learning_rate": 1.7313169560756903e-05, + "loss": 0.4549, + "step": 7938 + }, + { + "epoch": 2.221942345368038, + "grad_norm": 0.2632030347489567, + "learning_rate": 1.7301498167396914e-05, + "loss": 0.4466, + "step": 7939 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.2501334246477985, + "learning_rate": 1.7289829886298624e-05, + "loss": 0.4669, + "step": 7940 + }, + { + "epoch": 2.222502099076406, + "grad_norm": 0.24813646405405118, + "learning_rate": 1.7278164718572644e-05, + "loss": 0.4735, + "step": 7941 + }, + { + "epoch": 2.2227819759305905, + "grad_norm": 0.2555327423460743, + "learning_rate": 1.7266502665329252e-05, + "loss": 0.4369, + "step": 7942 + }, + { + "epoch": 2.2230618527847748, + "grad_norm": 0.24518656085447507, + "learning_rate": 1.7254843727678467e-05, + "loss": 0.4618, + "step": 7943 + }, + { + "epoch": 2.223341729638959, + "grad_norm": 0.24291977999803366, + "learning_rate": 1.7243187906729995e-05, + "loss": 0.4498, + "step": 7944 + }, + { + "epoch": 2.223621606493143, + "grad_norm": 0.24063118891127847, + "learning_rate": 1.723153520359322e-05, + "loss": 0.4485, + "step": 7945 + }, + { + "epoch": 2.223901483347327, + "grad_norm": 0.2679634417254974, + "learning_rate": 1.7219885619377264e-05, + "loss": 0.4889, + "step": 7946 + }, + { + "epoch": 2.2241813602015115, + "grad_norm": 0.25885212572291527, + "learning_rate": 1.7208239155190943e-05, + "loss": 0.4565, + "step": 7947 + }, + { + "epoch": 2.2244612370556953, + "grad_norm": 0.24100553328119104, + "learning_rate": 1.719659581214277e-05, + "loss": 0.4461, + "step": 7948 + }, + { + "epoch": 2.2247411139098796, + "grad_norm": 0.24928149919491513, + "learning_rate": 1.7184955591340974e-05, + "loss": 0.4442, + "step": 7949 + }, + { + "epoch": 2.225020990764064, + "grad_norm": 0.2515400428146291, + "learning_rate": 1.717331849389347e-05, + "loss": 0.4576, + "step": 7950 + }, + { + "epoch": 2.225300867618248, + "grad_norm": 0.2515434570758564, + "learning_rate": 1.7161684520907883e-05, + "loss": 0.4772, + "step": 7951 + }, + { + "epoch": 2.225580744472432, + "grad_norm": 0.238449405053394, + "learning_rate": 1.7150053673491528e-05, + "loss": 0.4562, + "step": 7952 + }, + { + "epoch": 2.2258606213266163, + "grad_norm": 0.2549100803745451, + "learning_rate": 1.713842595275147e-05, + "loss": 0.4369, + "step": 7953 + }, + { + "epoch": 2.2261404981808006, + "grad_norm": 0.2503589637496714, + "learning_rate": 1.7126801359794418e-05, + "loss": 0.4304, + "step": 7954 + }, + { + "epoch": 2.2264203750349845, + "grad_norm": 0.2512816021218268, + "learning_rate": 1.7115179895726823e-05, + "loss": 0.4428, + "step": 7955 + }, + { + "epoch": 2.2267002518891688, + "grad_norm": 0.24388927147551884, + "learning_rate": 1.71035615616548e-05, + "loss": 0.4592, + "step": 7956 + }, + { + "epoch": 2.226980128743353, + "grad_norm": 0.24372933422977305, + "learning_rate": 1.7091946358684213e-05, + "loss": 0.4756, + "step": 7957 + }, + { + "epoch": 2.227260005597537, + "grad_norm": 0.23988976309214124, + "learning_rate": 1.708033428792058e-05, + "loss": 0.4266, + "step": 7958 + }, + { + "epoch": 2.227539882451721, + "grad_norm": 0.2481820869753129, + "learning_rate": 1.7068725350469162e-05, + "loss": 0.4264, + "step": 7959 + }, + { + "epoch": 2.2278197593059055, + "grad_norm": 0.24135649240636686, + "learning_rate": 1.7057119547434895e-05, + "loss": 0.4614, + "step": 7960 + }, + { + "epoch": 2.2280996361600893, + "grad_norm": 0.252011329197858, + "learning_rate": 1.704551687992243e-05, + "loss": 0.4441, + "step": 7961 + }, + { + "epoch": 2.2283795130142736, + "grad_norm": 0.25738068801468555, + "learning_rate": 1.7033917349036127e-05, + "loss": 0.4659, + "step": 7962 + }, + { + "epoch": 2.228659389868458, + "grad_norm": 0.2515975015760517, + "learning_rate": 1.702232095588001e-05, + "loss": 0.4446, + "step": 7963 + }, + { + "epoch": 2.228939266722642, + "grad_norm": 0.2518176234187117, + "learning_rate": 1.7010727701557837e-05, + "loss": 0.4284, + "step": 7964 + }, + { + "epoch": 2.229219143576826, + "grad_norm": 0.24727712793208703, + "learning_rate": 1.6999137587173074e-05, + "loss": 0.4463, + "step": 7965 + }, + { + "epoch": 2.2294990204310103, + "grad_norm": 0.24965373613298716, + "learning_rate": 1.6987550613828862e-05, + "loss": 0.4352, + "step": 7966 + }, + { + "epoch": 2.2297788972851946, + "grad_norm": 0.24686778507922055, + "learning_rate": 1.697596678262806e-05, + "loss": 0.4646, + "step": 7967 + }, + { + "epoch": 2.2300587741393785, + "grad_norm": 0.24507759996160586, + "learning_rate": 1.696438609467323e-05, + "loss": 0.4511, + "step": 7968 + }, + { + "epoch": 2.2303386509935628, + "grad_norm": 0.27052804940673875, + "learning_rate": 1.69528085510666e-05, + "loss": 0.4774, + "step": 7969 + }, + { + "epoch": 2.230618527847747, + "grad_norm": 0.2668370135729792, + "learning_rate": 1.6941234152910168e-05, + "loss": 0.485, + "step": 7970 + }, + { + "epoch": 2.2308984047019313, + "grad_norm": 0.2523499356202924, + "learning_rate": 1.692966290130557e-05, + "loss": 0.4419, + "step": 7971 + }, + { + "epoch": 2.231178281556115, + "grad_norm": 0.2437389307602732, + "learning_rate": 1.6918094797354174e-05, + "loss": 0.4521, + "step": 7972 + }, + { + "epoch": 2.2314581584102995, + "grad_norm": 0.24168020682706032, + "learning_rate": 1.6906529842157027e-05, + "loss": 0.4488, + "step": 7973 + }, + { + "epoch": 2.2317380352644838, + "grad_norm": 0.25162232662424494, + "learning_rate": 1.689496803681489e-05, + "loss": 0.4706, + "step": 7974 + }, + { + "epoch": 2.2320179121186676, + "grad_norm": 0.26197643712733076, + "learning_rate": 1.6883409382428233e-05, + "loss": 0.4635, + "step": 7975 + }, + { + "epoch": 2.232297788972852, + "grad_norm": 0.2470103189286477, + "learning_rate": 1.68718538800972e-05, + "loss": 0.4524, + "step": 7976 + }, + { + "epoch": 2.232577665827036, + "grad_norm": 0.2480387357654966, + "learning_rate": 1.6860301530921662e-05, + "loss": 0.4536, + "step": 7977 + }, + { + "epoch": 2.23285754268122, + "grad_norm": 0.2339010891610054, + "learning_rate": 1.684875233600117e-05, + "loss": 0.4511, + "step": 7978 + }, + { + "epoch": 2.2331374195354043, + "grad_norm": 0.24500286682754885, + "learning_rate": 1.6837206296434987e-05, + "loss": 0.4559, + "step": 7979 + }, + { + "epoch": 2.2334172963895886, + "grad_norm": 0.2430729908238167, + "learning_rate": 1.682566341332209e-05, + "loss": 0.4493, + "step": 7980 + }, + { + "epoch": 2.233697173243773, + "grad_norm": 0.25333897406913003, + "learning_rate": 1.6814123687761095e-05, + "loss": 0.4609, + "step": 7981 + }, + { + "epoch": 2.2339770500979568, + "grad_norm": 0.24631010567382958, + "learning_rate": 1.6802587120850387e-05, + "loss": 0.4402, + "step": 7982 + }, + { + "epoch": 2.234256926952141, + "grad_norm": 0.24177119739089156, + "learning_rate": 1.679105371368802e-05, + "loss": 0.4709, + "step": 7983 + }, + { + "epoch": 2.2345368038063254, + "grad_norm": 0.2404616845514908, + "learning_rate": 1.677952346737175e-05, + "loss": 0.4462, + "step": 7984 + }, + { + "epoch": 2.234816680660509, + "grad_norm": 0.24421904816616632, + "learning_rate": 1.6767996382999024e-05, + "loss": 0.4415, + "step": 7985 + }, + { + "epoch": 2.2350965575146935, + "grad_norm": 0.25773948255866413, + "learning_rate": 1.6756472461666988e-05, + "loss": 0.456, + "step": 7986 + }, + { + "epoch": 2.235376434368878, + "grad_norm": 0.24322252207362016, + "learning_rate": 1.6744951704472527e-05, + "loss": 0.4695, + "step": 7987 + }, + { + "epoch": 2.235656311223062, + "grad_norm": 0.2429963628251191, + "learning_rate": 1.673343411251218e-05, + "loss": 0.4522, + "step": 7988 + }, + { + "epoch": 2.235936188077246, + "grad_norm": 0.24708892921844902, + "learning_rate": 1.6721919686882194e-05, + "loss": 0.4661, + "step": 7989 + }, + { + "epoch": 2.23621606493143, + "grad_norm": 0.2441905285899125, + "learning_rate": 1.6710408428678513e-05, + "loss": 0.4668, + "step": 7990 + }, + { + "epoch": 2.2364959417856145, + "grad_norm": 0.2502825217382606, + "learning_rate": 1.66989003389968e-05, + "loss": 0.4496, + "step": 7991 + }, + { + "epoch": 2.2367758186397984, + "grad_norm": 0.24243456426849705, + "learning_rate": 1.6687395418932384e-05, + "loss": 0.4278, + "step": 7992 + }, + { + "epoch": 2.2370556954939826, + "grad_norm": 0.2369164369311766, + "learning_rate": 1.6675893669580322e-05, + "loss": 0.4585, + "step": 7993 + }, + { + "epoch": 2.237335572348167, + "grad_norm": 0.24799242223856602, + "learning_rate": 1.666439509203535e-05, + "loss": 0.4608, + "step": 7994 + }, + { + "epoch": 2.237615449202351, + "grad_norm": 0.25866548823886354, + "learning_rate": 1.6652899687391914e-05, + "loss": 0.467, + "step": 7995 + }, + { + "epoch": 2.237895326056535, + "grad_norm": 0.24710441655216528, + "learning_rate": 1.6641407456744152e-05, + "loss": 0.4422, + "step": 7996 + }, + { + "epoch": 2.2381752029107194, + "grad_norm": 0.24306993015065884, + "learning_rate": 1.6629918401185894e-05, + "loss": 0.4571, + "step": 7997 + }, + { + "epoch": 2.238455079764903, + "grad_norm": 0.2523588605274083, + "learning_rate": 1.66184325218107e-05, + "loss": 0.4607, + "step": 7998 + }, + { + "epoch": 2.2387349566190875, + "grad_norm": 0.2507205117190013, + "learning_rate": 1.660694981971177e-05, + "loss": 0.4637, + "step": 7999 + }, + { + "epoch": 2.239014833473272, + "grad_norm": 0.25074998498597595, + "learning_rate": 1.6595470295982045e-05, + "loss": 0.4503, + "step": 8000 + }, + { + "epoch": 2.239294710327456, + "grad_norm": 0.25097648213155677, + "learning_rate": 1.6583993951714154e-05, + "loss": 0.4365, + "step": 8001 + }, + { + "epoch": 2.23957458718164, + "grad_norm": 0.24297808111841446, + "learning_rate": 1.657252078800042e-05, + "loss": 0.4497, + "step": 8002 + }, + { + "epoch": 2.239854464035824, + "grad_norm": 0.2440741001324849, + "learning_rate": 1.6561050805932875e-05, + "loss": 0.4513, + "step": 8003 + }, + { + "epoch": 2.2401343408900085, + "grad_norm": 0.2487975890820272, + "learning_rate": 1.654958400660321e-05, + "loss": 0.4442, + "step": 8004 + }, + { + "epoch": 2.2404142177441924, + "grad_norm": 0.24342845114817827, + "learning_rate": 1.653812039110288e-05, + "loss": 0.4495, + "step": 8005 + }, + { + "epoch": 2.2406940945983767, + "grad_norm": 0.25650397910269307, + "learning_rate": 1.652665996052298e-05, + "loss": 0.4409, + "step": 8006 + }, + { + "epoch": 2.240973971452561, + "grad_norm": 0.23901972894006557, + "learning_rate": 1.651520271595432e-05, + "loss": 0.4528, + "step": 8007 + }, + { + "epoch": 2.2412538483067452, + "grad_norm": 0.252973107022516, + "learning_rate": 1.6503748658487405e-05, + "loss": 0.4617, + "step": 8008 + }, + { + "epoch": 2.241533725160929, + "grad_norm": 0.2470775167754655, + "learning_rate": 1.6492297789212445e-05, + "loss": 0.4576, + "step": 8009 + }, + { + "epoch": 2.2418136020151134, + "grad_norm": 0.25849123200894225, + "learning_rate": 1.6480850109219335e-05, + "loss": 0.4601, + "step": 8010 + }, + { + "epoch": 2.2420934788692977, + "grad_norm": 0.2849183280028203, + "learning_rate": 1.646940561959767e-05, + "loss": 0.471, + "step": 8011 + }, + { + "epoch": 2.2423733557234815, + "grad_norm": 0.24331633282152842, + "learning_rate": 1.6457964321436754e-05, + "loss": 0.4767, + "step": 8012 + }, + { + "epoch": 2.242653232577666, + "grad_norm": 0.2363492627788017, + "learning_rate": 1.6446526215825564e-05, + "loss": 0.4554, + "step": 8013 + }, + { + "epoch": 2.24293310943185, + "grad_norm": 0.25239999785216144, + "learning_rate": 1.643509130385279e-05, + "loss": 0.4637, + "step": 8014 + }, + { + "epoch": 2.243212986286034, + "grad_norm": 0.2490404161533813, + "learning_rate": 1.642365958660681e-05, + "loss": 0.4799, + "step": 8015 + }, + { + "epoch": 2.2434928631402182, + "grad_norm": 0.24581997293193475, + "learning_rate": 1.6412231065175726e-05, + "loss": 0.4605, + "step": 8016 + }, + { + "epoch": 2.2437727399944025, + "grad_norm": 0.25046046242059733, + "learning_rate": 1.6400805740647267e-05, + "loss": 0.4634, + "step": 8017 + }, + { + "epoch": 2.244052616848587, + "grad_norm": 0.24579928775716894, + "learning_rate": 1.638938361410893e-05, + "loss": 0.4558, + "step": 8018 + }, + { + "epoch": 2.2443324937027707, + "grad_norm": 0.25121405006280206, + "learning_rate": 1.6377964686647868e-05, + "loss": 0.4525, + "step": 8019 + }, + { + "epoch": 2.244612370556955, + "grad_norm": 0.24297240413396543, + "learning_rate": 1.6366548959350947e-05, + "loss": 0.4435, + "step": 8020 + }, + { + "epoch": 2.2448922474111392, + "grad_norm": 0.23754474483492782, + "learning_rate": 1.635513643330471e-05, + "loss": 0.434, + "step": 8021 + }, + { + "epoch": 2.245172124265323, + "grad_norm": 0.250770465819617, + "learning_rate": 1.6343727109595426e-05, + "loss": 0.4693, + "step": 8022 + }, + { + "epoch": 2.2454520011195074, + "grad_norm": 0.24653655580384282, + "learning_rate": 1.6332320989309042e-05, + "loss": 0.4512, + "step": 8023 + }, + { + "epoch": 2.2457318779736917, + "grad_norm": 0.2512710812882383, + "learning_rate": 1.632091807353119e-05, + "loss": 0.4624, + "step": 8024 + }, + { + "epoch": 2.246011754827876, + "grad_norm": 0.23761548726103143, + "learning_rate": 1.6309518363347203e-05, + "loss": 0.4534, + "step": 8025 + }, + { + "epoch": 2.24629163168206, + "grad_norm": 0.2485012937243386, + "learning_rate": 1.6298121859842115e-05, + "loss": 0.4503, + "step": 8026 + }, + { + "epoch": 2.246571508536244, + "grad_norm": 0.2639350958290308, + "learning_rate": 1.6286728564100657e-05, + "loss": 0.4568, + "step": 8027 + }, + { + "epoch": 2.2468513853904284, + "grad_norm": 0.24878318674886682, + "learning_rate": 1.6275338477207243e-05, + "loss": 0.4537, + "step": 8028 + }, + { + "epoch": 2.2471312622446122, + "grad_norm": 0.26098167252836724, + "learning_rate": 1.6263951600245986e-05, + "loss": 0.4636, + "step": 8029 + }, + { + "epoch": 2.2474111390987965, + "grad_norm": 0.2728680058475497, + "learning_rate": 1.62525679343007e-05, + "loss": 0.478, + "step": 8030 + }, + { + "epoch": 2.247691015952981, + "grad_norm": 0.2509449203301831, + "learning_rate": 1.624118748045489e-05, + "loss": 0.4641, + "step": 8031 + }, + { + "epoch": 2.2479708928071647, + "grad_norm": 0.25270506991418473, + "learning_rate": 1.622981023979175e-05, + "loss": 0.4422, + "step": 8032 + }, + { + "epoch": 2.248250769661349, + "grad_norm": 0.2546047036652015, + "learning_rate": 1.621843621339417e-05, + "loss": 0.461, + "step": 8033 + }, + { + "epoch": 2.2485306465155332, + "grad_norm": 0.25526419135366757, + "learning_rate": 1.6207065402344747e-05, + "loss": 0.4491, + "step": 8034 + }, + { + "epoch": 2.248810523369717, + "grad_norm": 0.2558771163863261, + "learning_rate": 1.6195697807725763e-05, + "loss": 0.4576, + "step": 8035 + }, + { + "epoch": 2.2490904002239014, + "grad_norm": 0.24170528357334292, + "learning_rate": 1.618433343061917e-05, + "loss": 0.4578, + "step": 8036 + }, + { + "epoch": 2.2493702770780857, + "grad_norm": 0.23538827050402764, + "learning_rate": 1.6172972272106647e-05, + "loss": 0.4464, + "step": 8037 + }, + { + "epoch": 2.24965015393227, + "grad_norm": 0.24439090793335866, + "learning_rate": 1.616161433326954e-05, + "loss": 0.4413, + "step": 8038 + }, + { + "epoch": 2.249930030786454, + "grad_norm": 0.25081941860473644, + "learning_rate": 1.6150259615188938e-05, + "loss": 0.4673, + "step": 8039 + }, + { + "epoch": 2.250209907640638, + "grad_norm": 0.2453433926538471, + "learning_rate": 1.613890811894557e-05, + "loss": 0.443, + "step": 8040 + }, + { + "epoch": 2.2504897844948224, + "grad_norm": 0.24975312010648723, + "learning_rate": 1.612755984561989e-05, + "loss": 0.4477, + "step": 8041 + }, + { + "epoch": 2.2507696613490062, + "grad_norm": 0.24148933691877925, + "learning_rate": 1.6116214796292013e-05, + "loss": 0.4478, + "step": 8042 + }, + { + "epoch": 2.2510495382031905, + "grad_norm": 0.23798042129055016, + "learning_rate": 1.610487297204178e-05, + "loss": 0.4603, + "step": 8043 + }, + { + "epoch": 2.251329415057375, + "grad_norm": 0.24366092254186447, + "learning_rate": 1.609353437394871e-05, + "loss": 0.4364, + "step": 8044 + }, + { + "epoch": 2.251609291911559, + "grad_norm": 0.25532438746680247, + "learning_rate": 1.608219900309202e-05, + "loss": 0.4796, + "step": 8045 + }, + { + "epoch": 2.251889168765743, + "grad_norm": 0.24529362408593128, + "learning_rate": 1.6070866860550603e-05, + "loss": 0.4475, + "step": 8046 + }, + { + "epoch": 2.2521690456199273, + "grad_norm": 0.2555494009784305, + "learning_rate": 1.605953794740308e-05, + "loss": 0.4607, + "step": 8047 + }, + { + "epoch": 2.2524489224741115, + "grad_norm": 0.2412023951279725, + "learning_rate": 1.604821226472773e-05, + "loss": 0.446, + "step": 8048 + }, + { + "epoch": 2.2527287993282954, + "grad_norm": 0.26411489823517675, + "learning_rate": 1.6036889813602534e-05, + "loss": 0.46, + "step": 8049 + }, + { + "epoch": 2.2530086761824797, + "grad_norm": 0.2577320791532774, + "learning_rate": 1.602557059510518e-05, + "loss": 0.4484, + "step": 8050 + }, + { + "epoch": 2.253288553036664, + "grad_norm": 0.24845629287382937, + "learning_rate": 1.6014254610313033e-05, + "loss": 0.4705, + "step": 8051 + }, + { + "epoch": 2.253568429890848, + "grad_norm": 0.2515467965214697, + "learning_rate": 1.6002941860303163e-05, + "loss": 0.4559, + "step": 8052 + }, + { + "epoch": 2.253848306745032, + "grad_norm": 0.24220978874171253, + "learning_rate": 1.5991632346152318e-05, + "loss": 0.4698, + "step": 8053 + }, + { + "epoch": 2.2541281835992164, + "grad_norm": 0.2540236744257015, + "learning_rate": 1.5980326068936936e-05, + "loss": 0.4638, + "step": 8054 + }, + { + "epoch": 2.2544080604534007, + "grad_norm": 0.25123858182802145, + "learning_rate": 1.5969023029733156e-05, + "loss": 0.46, + "step": 8055 + }, + { + "epoch": 2.2546879373075845, + "grad_norm": 0.24846569597307558, + "learning_rate": 1.5957723229616796e-05, + "loss": 0.4567, + "step": 8056 + }, + { + "epoch": 2.254967814161769, + "grad_norm": 0.2385083401685797, + "learning_rate": 1.5946426669663406e-05, + "loss": 0.4413, + "step": 8057 + }, + { + "epoch": 2.255247691015953, + "grad_norm": 0.24742727656734848, + "learning_rate": 1.593513335094819e-05, + "loss": 0.4812, + "step": 8058 + }, + { + "epoch": 2.255527567870137, + "grad_norm": 0.2475928660820711, + "learning_rate": 1.5923843274546047e-05, + "loss": 0.4632, + "step": 8059 + }, + { + "epoch": 2.2558074447243213, + "grad_norm": 0.2466775051598817, + "learning_rate": 1.5912556441531577e-05, + "loss": 0.4579, + "step": 8060 + }, + { + "epoch": 2.2560873215785056, + "grad_norm": 0.2482971041756975, + "learning_rate": 1.590127285297906e-05, + "loss": 0.4539, + "step": 8061 + }, + { + "epoch": 2.25636719843269, + "grad_norm": 0.23303363154287318, + "learning_rate": 1.588999250996248e-05, + "loss": 0.4172, + "step": 8062 + }, + { + "epoch": 2.2566470752868737, + "grad_norm": 0.2475263519986645, + "learning_rate": 1.587871541355551e-05, + "loss": 0.4588, + "step": 8063 + }, + { + "epoch": 2.256926952141058, + "grad_norm": 0.2513488351825603, + "learning_rate": 1.5867441564831502e-05, + "loss": 0.4494, + "step": 8064 + }, + { + "epoch": 2.2572068289952423, + "grad_norm": 0.2568746568874127, + "learning_rate": 1.5856170964863508e-05, + "loss": 0.4623, + "step": 8065 + }, + { + "epoch": 2.257486705849426, + "grad_norm": 0.23994801380931918, + "learning_rate": 1.5844903614724278e-05, + "loss": 0.4447, + "step": 8066 + }, + { + "epoch": 2.2577665827036104, + "grad_norm": 0.25522426947200266, + "learning_rate": 1.5833639515486237e-05, + "loss": 0.4664, + "step": 8067 + }, + { + "epoch": 2.2580464595577947, + "grad_norm": 0.2651894912794593, + "learning_rate": 1.582237866822151e-05, + "loss": 0.4814, + "step": 8068 + }, + { + "epoch": 2.2583263364119786, + "grad_norm": 0.25243363676802255, + "learning_rate": 1.5811121074001917e-05, + "loss": 0.4619, + "step": 8069 + }, + { + "epoch": 2.258606213266163, + "grad_norm": 0.2389919454384051, + "learning_rate": 1.579986673389895e-05, + "loss": 0.4481, + "step": 8070 + }, + { + "epoch": 2.258886090120347, + "grad_norm": 0.2479257488901504, + "learning_rate": 1.5788615648983828e-05, + "loss": 0.45, + "step": 8071 + }, + { + "epoch": 2.259165966974531, + "grad_norm": 0.24832827728218682, + "learning_rate": 1.57773678203274e-05, + "loss": 0.4545, + "step": 8072 + }, + { + "epoch": 2.2594458438287153, + "grad_norm": 0.2477948436432771, + "learning_rate": 1.5766123249000244e-05, + "loss": 0.4498, + "step": 8073 + }, + { + "epoch": 2.2597257206828996, + "grad_norm": 0.2481975889054319, + "learning_rate": 1.5754881936072653e-05, + "loss": 0.447, + "step": 8074 + }, + { + "epoch": 2.260005597537084, + "grad_norm": 0.25287797136419315, + "learning_rate": 1.5743643882614566e-05, + "loss": 0.4431, + "step": 8075 + }, + { + "epoch": 2.2602854743912677, + "grad_norm": 0.24869014300598982, + "learning_rate": 1.5732409089695632e-05, + "loss": 0.4577, + "step": 8076 + }, + { + "epoch": 2.260565351245452, + "grad_norm": 0.25910950830768154, + "learning_rate": 1.572117755838518e-05, + "loss": 0.4671, + "step": 8077 + }, + { + "epoch": 2.2608452280996363, + "grad_norm": 0.2538591097969899, + "learning_rate": 1.5709949289752235e-05, + "loss": 0.4632, + "step": 8078 + }, + { + "epoch": 2.26112510495382, + "grad_norm": 0.24107156956953155, + "learning_rate": 1.5698724284865506e-05, + "loss": 0.4495, + "step": 8079 + }, + { + "epoch": 2.2614049818080044, + "grad_norm": 0.24782583848826115, + "learning_rate": 1.5687502544793405e-05, + "loss": 0.4534, + "step": 8080 + }, + { + "epoch": 2.2616848586621887, + "grad_norm": 0.23074484549061458, + "learning_rate": 1.5676284070604015e-05, + "loss": 0.4603, + "step": 8081 + }, + { + "epoch": 2.261964735516373, + "grad_norm": 0.2513337097130585, + "learning_rate": 1.5665068863365117e-05, + "loss": 0.4482, + "step": 8082 + }, + { + "epoch": 2.262244612370557, + "grad_norm": 0.23376538153171827, + "learning_rate": 1.5653856924144183e-05, + "loss": 0.4313, + "step": 8083 + }, + { + "epoch": 2.262524489224741, + "grad_norm": 0.2507810713962941, + "learning_rate": 1.564264825400837e-05, + "loss": 0.463, + "step": 8084 + }, + { + "epoch": 2.2628043660789254, + "grad_norm": 0.26057856417098435, + "learning_rate": 1.563144285402453e-05, + "loss": 0.4611, + "step": 8085 + }, + { + "epoch": 2.2630842429331093, + "grad_norm": 0.25263231780762113, + "learning_rate": 1.5620240725259194e-05, + "loss": 0.4786, + "step": 8086 + }, + { + "epoch": 2.2633641197872936, + "grad_norm": 0.24399915657311877, + "learning_rate": 1.560904186877859e-05, + "loss": 0.4609, + "step": 8087 + }, + { + "epoch": 2.263643996641478, + "grad_norm": 0.2567266375720405, + "learning_rate": 1.559784628564863e-05, + "loss": 0.4648, + "step": 8088 + }, + { + "epoch": 2.2639238734956617, + "grad_norm": 0.2533129948185237, + "learning_rate": 1.5586653976934936e-05, + "loss": 0.453, + "step": 8089 + }, + { + "epoch": 2.264203750349846, + "grad_norm": 0.2662838390784181, + "learning_rate": 1.557546494370274e-05, + "loss": 0.4684, + "step": 8090 + }, + { + "epoch": 2.2644836272040303, + "grad_norm": 0.2563249729476431, + "learning_rate": 1.5564279187017077e-05, + "loss": 0.4596, + "step": 8091 + }, + { + "epoch": 2.264763504058214, + "grad_norm": 0.2551216698026058, + "learning_rate": 1.55530967079426e-05, + "loss": 0.4609, + "step": 8092 + }, + { + "epoch": 2.2650433809123984, + "grad_norm": 0.23783019435087047, + "learning_rate": 1.554191750754365e-05, + "loss": 0.4539, + "step": 8093 + }, + { + "epoch": 2.2653232577665827, + "grad_norm": 0.2557040638757712, + "learning_rate": 1.553074158688429e-05, + "loss": 0.4551, + "step": 8094 + }, + { + "epoch": 2.265603134620767, + "grad_norm": 0.25439045422952744, + "learning_rate": 1.5519568947028228e-05, + "loss": 0.4659, + "step": 8095 + }, + { + "epoch": 2.265883011474951, + "grad_norm": 0.25520332065671886, + "learning_rate": 1.55083995890389e-05, + "loss": 0.4741, + "step": 8096 + }, + { + "epoch": 2.266162888329135, + "grad_norm": 0.2520445436560445, + "learning_rate": 1.5497233513979404e-05, + "loss": 0.4663, + "step": 8097 + }, + { + "epoch": 2.2664427651833194, + "grad_norm": 0.2537060451873474, + "learning_rate": 1.5486070722912532e-05, + "loss": 0.4618, + "step": 8098 + }, + { + "epoch": 2.2667226420375037, + "grad_norm": 0.2494349200310384, + "learning_rate": 1.5474911216900774e-05, + "loss": 0.4598, + "step": 8099 + }, + { + "epoch": 2.2670025188916876, + "grad_norm": 0.23177861020387283, + "learning_rate": 1.546375499700628e-05, + "loss": 0.4366, + "step": 8100 + }, + { + "epoch": 2.267282395745872, + "grad_norm": 0.24954003694144591, + "learning_rate": 1.5452602064290923e-05, + "loss": 0.4439, + "step": 8101 + }, + { + "epoch": 2.267562272600056, + "grad_norm": 0.2543620032513836, + "learning_rate": 1.5441452419816237e-05, + "loss": 0.4585, + "step": 8102 + }, + { + "epoch": 2.26784214945424, + "grad_norm": 0.23552521125261952, + "learning_rate": 1.543030606464345e-05, + "loss": 0.4432, + "step": 8103 + }, + { + "epoch": 2.2681220263084243, + "grad_norm": 0.24986342113588852, + "learning_rate": 1.5419162999833485e-05, + "loss": 0.4577, + "step": 8104 + }, + { + "epoch": 2.2684019031626086, + "grad_norm": 0.25637804753175986, + "learning_rate": 1.5408023226446945e-05, + "loss": 0.4722, + "step": 8105 + }, + { + "epoch": 2.2686817800167924, + "grad_norm": 0.2500963453039843, + "learning_rate": 1.539688674554411e-05, + "loss": 0.4427, + "step": 8106 + }, + { + "epoch": 2.2689616568709767, + "grad_norm": 0.25095577334372066, + "learning_rate": 1.5385753558184967e-05, + "loss": 0.4657, + "step": 8107 + }, + { + "epoch": 2.269241533725161, + "grad_norm": 0.24959553726593925, + "learning_rate": 1.537462366542917e-05, + "loss": 0.4416, + "step": 8108 + }, + { + "epoch": 2.269521410579345, + "grad_norm": 0.24021352227065768, + "learning_rate": 1.5363497068336075e-05, + "loss": 0.4682, + "step": 8109 + }, + { + "epoch": 2.269801287433529, + "grad_norm": 0.25741380873199216, + "learning_rate": 1.5352373767964717e-05, + "loss": 0.474, + "step": 8110 + }, + { + "epoch": 2.2700811642877134, + "grad_norm": 0.2447863931933491, + "learning_rate": 1.534125376537382e-05, + "loss": 0.4537, + "step": 8111 + }, + { + "epoch": 2.2703610411418977, + "grad_norm": 0.2551213740238796, + "learning_rate": 1.5330137061621784e-05, + "loss": 0.464, + "step": 8112 + }, + { + "epoch": 2.2706409179960816, + "grad_norm": 0.24024301283981186, + "learning_rate": 1.5319023657766708e-05, + "loss": 0.4509, + "step": 8113 + }, + { + "epoch": 2.270920794850266, + "grad_norm": 0.24078770185235696, + "learning_rate": 1.5307913554866376e-05, + "loss": 0.4659, + "step": 8114 + }, + { + "epoch": 2.27120067170445, + "grad_norm": 0.23763705871991425, + "learning_rate": 1.529680675397825e-05, + "loss": 0.4259, + "step": 8115 + }, + { + "epoch": 2.271480548558634, + "grad_norm": 0.24525825175218002, + "learning_rate": 1.5285703256159473e-05, + "loss": 0.439, + "step": 8116 + }, + { + "epoch": 2.2717604254128183, + "grad_norm": 0.24981404349348715, + "learning_rate": 1.5274603062466897e-05, + "loss": 0.4719, + "step": 8117 + }, + { + "epoch": 2.2720403022670026, + "grad_norm": 0.25704553808778363, + "learning_rate": 1.5263506173957037e-05, + "loss": 0.455, + "step": 8118 + }, + { + "epoch": 2.272320179121187, + "grad_norm": 0.24196506660176514, + "learning_rate": 1.5252412591686105e-05, + "loss": 0.4423, + "step": 8119 + }, + { + "epoch": 2.2726000559753707, + "grad_norm": 0.2547124939604785, + "learning_rate": 1.5241322316709989e-05, + "loss": 0.4688, + "step": 8120 + }, + { + "epoch": 2.272879932829555, + "grad_norm": 0.24573521875961346, + "learning_rate": 1.5230235350084271e-05, + "loss": 0.4246, + "step": 8121 + }, + { + "epoch": 2.2731598096837393, + "grad_norm": 0.25083222391365206, + "learning_rate": 1.5219151692864214e-05, + "loss": 0.4541, + "step": 8122 + }, + { + "epoch": 2.273439686537923, + "grad_norm": 0.26043181528542003, + "learning_rate": 1.5208071346104764e-05, + "loss": 0.4671, + "step": 8123 + }, + { + "epoch": 2.2737195633921075, + "grad_norm": 0.2525542715754832, + "learning_rate": 1.5196994310860562e-05, + "loss": 0.4483, + "step": 8124 + }, + { + "epoch": 2.2739994402462917, + "grad_norm": 0.24809264440721193, + "learning_rate": 1.5185920588185914e-05, + "loss": 0.4512, + "step": 8125 + }, + { + "epoch": 2.2742793171004756, + "grad_norm": 0.24484303724741177, + "learning_rate": 1.5174850179134837e-05, + "loss": 0.4337, + "step": 8126 + }, + { + "epoch": 2.27455919395466, + "grad_norm": 0.2655569281435212, + "learning_rate": 1.5163783084761012e-05, + "loss": 0.4684, + "step": 8127 + }, + { + "epoch": 2.274839070808844, + "grad_norm": 0.24443331183909747, + "learning_rate": 1.5152719306117812e-05, + "loss": 0.4613, + "step": 8128 + }, + { + "epoch": 2.275118947663028, + "grad_norm": 0.2500321468947835, + "learning_rate": 1.5141658844258289e-05, + "loss": 0.4351, + "step": 8129 + }, + { + "epoch": 2.2753988245172123, + "grad_norm": 0.26528907915259103, + "learning_rate": 1.5130601700235192e-05, + "loss": 0.4756, + "step": 8130 + }, + { + "epoch": 2.2756787013713966, + "grad_norm": 0.25078409080690917, + "learning_rate": 1.5119547875100943e-05, + "loss": 0.4623, + "step": 8131 + }, + { + "epoch": 2.275958578225581, + "grad_norm": 0.24248842102632745, + "learning_rate": 1.5108497369907648e-05, + "loss": 0.4516, + "step": 8132 + }, + { + "epoch": 2.2762384550797647, + "grad_norm": 0.24784659006939802, + "learning_rate": 1.5097450185707107e-05, + "loss": 0.4694, + "step": 8133 + }, + { + "epoch": 2.276518331933949, + "grad_norm": 0.2434622522765377, + "learning_rate": 1.5086406323550789e-05, + "loss": 0.4574, + "step": 8134 + }, + { + "epoch": 2.2767982087881333, + "grad_norm": 0.2538724860310962, + "learning_rate": 1.507536578448986e-05, + "loss": 0.4615, + "step": 8135 + }, + { + "epoch": 2.2770780856423176, + "grad_norm": 0.24416755593258385, + "learning_rate": 1.5064328569575165e-05, + "loss": 0.4455, + "step": 8136 + }, + { + "epoch": 2.2773579624965015, + "grad_norm": 0.24009991950767856, + "learning_rate": 1.5053294679857226e-05, + "loss": 0.4503, + "step": 8137 + }, + { + "epoch": 2.2776378393506858, + "grad_norm": 0.25349782407749055, + "learning_rate": 1.5042264116386267e-05, + "loss": 0.4421, + "step": 8138 + }, + { + "epoch": 2.27791771620487, + "grad_norm": 0.23701714061786106, + "learning_rate": 1.5031236880212174e-05, + "loss": 0.4424, + "step": 8139 + }, + { + "epoch": 2.278197593059054, + "grad_norm": 0.24938668142473927, + "learning_rate": 1.5020212972384528e-05, + "loss": 0.4648, + "step": 8140 + }, + { + "epoch": 2.278477469913238, + "grad_norm": 0.2581491945048349, + "learning_rate": 1.5009192393952588e-05, + "loss": 0.4625, + "step": 8141 + }, + { + "epoch": 2.2787573467674225, + "grad_norm": 0.251193013726438, + "learning_rate": 1.4998175145965305e-05, + "loss": 0.4671, + "step": 8142 + }, + { + "epoch": 2.2790372236216063, + "grad_norm": 0.2462144802011149, + "learning_rate": 1.4987161229471298e-05, + "loss": 0.463, + "step": 8143 + }, + { + "epoch": 2.2793171004757906, + "grad_norm": 0.2561687695043128, + "learning_rate": 1.4976150645518888e-05, + "loss": 0.4647, + "step": 8144 + }, + { + "epoch": 2.279596977329975, + "grad_norm": 0.2582874286202819, + "learning_rate": 1.4965143395156057e-05, + "loss": 0.4721, + "step": 8145 + }, + { + "epoch": 2.2798768541841588, + "grad_norm": 0.24647383016131252, + "learning_rate": 1.495413947943049e-05, + "loss": 0.4436, + "step": 8146 + }, + { + "epoch": 2.280156731038343, + "grad_norm": 0.2552866334765592, + "learning_rate": 1.4943138899389548e-05, + "loss": 0.4653, + "step": 8147 + }, + { + "epoch": 2.2804366078925273, + "grad_norm": 0.24920880928984912, + "learning_rate": 1.4932141656080262e-05, + "loss": 0.4562, + "step": 8148 + }, + { + "epoch": 2.2807164847467116, + "grad_norm": 0.2474984339952306, + "learning_rate": 1.4921147750549364e-05, + "loss": 0.4509, + "step": 8149 + }, + { + "epoch": 2.2809963616008955, + "grad_norm": 0.2596381026767097, + "learning_rate": 1.491015718384326e-05, + "loss": 0.4744, + "step": 8150 + }, + { + "epoch": 2.2812762384550798, + "grad_norm": 0.2594766583217914, + "learning_rate": 1.489916995700803e-05, + "loss": 0.4444, + "step": 8151 + }, + { + "epoch": 2.281556115309264, + "grad_norm": 0.26437821310395326, + "learning_rate": 1.4888186071089455e-05, + "loss": 0.447, + "step": 8152 + }, + { + "epoch": 2.281835992163448, + "grad_norm": 0.25639370080130053, + "learning_rate": 1.4877205527132982e-05, + "loss": 0.4531, + "step": 8153 + }, + { + "epoch": 2.282115869017632, + "grad_norm": 0.24847524370433224, + "learning_rate": 1.4866228326183745e-05, + "loss": 0.4738, + "step": 8154 + }, + { + "epoch": 2.2823957458718165, + "grad_norm": 0.23987257178653953, + "learning_rate": 1.4855254469286562e-05, + "loss": 0.4529, + "step": 8155 + }, + { + "epoch": 2.2826756227260008, + "grad_norm": 0.250830051068186, + "learning_rate": 1.4844283957485926e-05, + "loss": 0.4646, + "step": 8156 + }, + { + "epoch": 2.2829554995801846, + "grad_norm": 0.2535638382021271, + "learning_rate": 1.4833316791826024e-05, + "loss": 0.4383, + "step": 8157 + }, + { + "epoch": 2.283235376434369, + "grad_norm": 0.24069102606738224, + "learning_rate": 1.4822352973350712e-05, + "loss": 0.4295, + "step": 8158 + }, + { + "epoch": 2.283515253288553, + "grad_norm": 0.2499143944118685, + "learning_rate": 1.4811392503103539e-05, + "loss": 0.4617, + "step": 8159 + }, + { + "epoch": 2.283795130142737, + "grad_norm": 0.2563370698706162, + "learning_rate": 1.480043538212772e-05, + "loss": 0.4634, + "step": 8160 + }, + { + "epoch": 2.2840750069969213, + "grad_norm": 0.24612872482873813, + "learning_rate": 1.478948161146616e-05, + "loss": 0.4433, + "step": 8161 + }, + { + "epoch": 2.2843548838511056, + "grad_norm": 0.25350081895250187, + "learning_rate": 1.477853119216145e-05, + "loss": 0.4628, + "step": 8162 + }, + { + "epoch": 2.2846347607052895, + "grad_norm": 0.25489830309456557, + "learning_rate": 1.4767584125255856e-05, + "loss": 0.4705, + "step": 8163 + }, + { + "epoch": 2.2849146375594738, + "grad_norm": 0.253489068099149, + "learning_rate": 1.4756640411791328e-05, + "loss": 0.4623, + "step": 8164 + }, + { + "epoch": 2.285194514413658, + "grad_norm": 0.25731998172642323, + "learning_rate": 1.474570005280949e-05, + "loss": 0.4594, + "step": 8165 + }, + { + "epoch": 2.285474391267842, + "grad_norm": 0.249878739953495, + "learning_rate": 1.4734763049351652e-05, + "loss": 0.4528, + "step": 8166 + }, + { + "epoch": 2.285754268122026, + "grad_norm": 0.23429922575446413, + "learning_rate": 1.4723829402458812e-05, + "loss": 0.4538, + "step": 8167 + }, + { + "epoch": 2.2860341449762105, + "grad_norm": 0.24121347435384824, + "learning_rate": 1.471289911317163e-05, + "loss": 0.457, + "step": 8168 + }, + { + "epoch": 2.286314021830395, + "grad_norm": 0.24431296330442784, + "learning_rate": 1.470197218253046e-05, + "loss": 0.4508, + "step": 8169 + }, + { + "epoch": 2.2865938986845786, + "grad_norm": 0.2449363370232026, + "learning_rate": 1.4691048611575337e-05, + "loss": 0.4355, + "step": 8170 + }, + { + "epoch": 2.286873775538763, + "grad_norm": 0.24530705472179876, + "learning_rate": 1.4680128401345966e-05, + "loss": 0.442, + "step": 8171 + }, + { + "epoch": 2.287153652392947, + "grad_norm": 0.2564336937749055, + "learning_rate": 1.466921155288175e-05, + "loss": 0.4565, + "step": 8172 + }, + { + "epoch": 2.2874335292471315, + "grad_norm": 0.2365470389322341, + "learning_rate": 1.4658298067221749e-05, + "loss": 0.4211, + "step": 8173 + }, + { + "epoch": 2.2877134061013153, + "grad_norm": 0.24465723777855133, + "learning_rate": 1.4647387945404722e-05, + "loss": 0.4555, + "step": 8174 + }, + { + "epoch": 2.2879932829554996, + "grad_norm": 0.26635016025713437, + "learning_rate": 1.4636481188469097e-05, + "loss": 0.4776, + "step": 8175 + }, + { + "epoch": 2.288273159809684, + "grad_norm": 0.25383134788551887, + "learning_rate": 1.4625577797452988e-05, + "loss": 0.4383, + "step": 8176 + }, + { + "epoch": 2.288553036663868, + "grad_norm": 0.2532454461626958, + "learning_rate": 1.4614677773394181e-05, + "loss": 0.4453, + "step": 8177 + }, + { + "epoch": 2.288832913518052, + "grad_norm": 0.25733062906476273, + "learning_rate": 1.460378111733015e-05, + "loss": 0.4542, + "step": 8178 + }, + { + "epoch": 2.2891127903722364, + "grad_norm": 0.2509878337226054, + "learning_rate": 1.4592887830298046e-05, + "loss": 0.4678, + "step": 8179 + }, + { + "epoch": 2.28939266722642, + "grad_norm": 0.2565161084024464, + "learning_rate": 1.4581997913334699e-05, + "loss": 0.4613, + "step": 8180 + }, + { + "epoch": 2.2896725440806045, + "grad_norm": 0.24786064781544728, + "learning_rate": 1.4571111367476615e-05, + "loss": 0.4506, + "step": 8181 + }, + { + "epoch": 2.289952420934789, + "grad_norm": 0.25825938258403075, + "learning_rate": 1.4560228193759977e-05, + "loss": 0.4372, + "step": 8182 + }, + { + "epoch": 2.2902322977889726, + "grad_norm": 0.2569675973629223, + "learning_rate": 1.4549348393220657e-05, + "loss": 0.4535, + "step": 8183 + }, + { + "epoch": 2.290512174643157, + "grad_norm": 0.251514946659093, + "learning_rate": 1.4538471966894202e-05, + "loss": 0.471, + "step": 8184 + }, + { + "epoch": 2.290792051497341, + "grad_norm": 0.2488805215828181, + "learning_rate": 1.4527598915815832e-05, + "loss": 0.448, + "step": 8185 + }, + { + "epoch": 2.2910719283515255, + "grad_norm": 0.2552764388029711, + "learning_rate": 1.4516729241020449e-05, + "loss": 0.4355, + "step": 8186 + }, + { + "epoch": 2.2913518052057094, + "grad_norm": 0.26067368005432057, + "learning_rate": 1.4505862943542642e-05, + "loss": 0.4534, + "step": 8187 + }, + { + "epoch": 2.2916316820598936, + "grad_norm": 0.2580691067943451, + "learning_rate": 1.4495000024416666e-05, + "loss": 0.4703, + "step": 8188 + }, + { + "epoch": 2.291911558914078, + "grad_norm": 0.25961930704971553, + "learning_rate": 1.4484140484676462e-05, + "loss": 0.4463, + "step": 8189 + }, + { + "epoch": 2.292191435768262, + "grad_norm": 0.2489274522281798, + "learning_rate": 1.4473284325355647e-05, + "loss": 0.4411, + "step": 8190 + }, + { + "epoch": 2.292471312622446, + "grad_norm": 0.2391449177632102, + "learning_rate": 1.446243154748751e-05, + "loss": 0.4584, + "step": 8191 + }, + { + "epoch": 2.2927511894766304, + "grad_norm": 0.26327118042471787, + "learning_rate": 1.4451582152105032e-05, + "loss": 0.4606, + "step": 8192 + }, + { + "epoch": 2.2930310663308147, + "grad_norm": 0.24558232982989367, + "learning_rate": 1.4440736140240862e-05, + "loss": 0.4501, + "step": 8193 + }, + { + "epoch": 2.2933109431849985, + "grad_norm": 0.2641606051268955, + "learning_rate": 1.4429893512927328e-05, + "loss": 0.4615, + "step": 8194 + }, + { + "epoch": 2.293590820039183, + "grad_norm": 0.26140842053268204, + "learning_rate": 1.4419054271196441e-05, + "loss": 0.4689, + "step": 8195 + }, + { + "epoch": 2.293870696893367, + "grad_norm": 0.2623345051788462, + "learning_rate": 1.4408218416079883e-05, + "loss": 0.456, + "step": 8196 + }, + { + "epoch": 2.294150573747551, + "grad_norm": 0.2606823728849341, + "learning_rate": 1.439738594860902e-05, + "loss": 0.4547, + "step": 8197 + }, + { + "epoch": 2.2944304506017352, + "grad_norm": 0.24816251909493267, + "learning_rate": 1.4386556869814888e-05, + "loss": 0.4766, + "step": 8198 + }, + { + "epoch": 2.2947103274559195, + "grad_norm": 0.25449515930559313, + "learning_rate": 1.4375731180728208e-05, + "loss": 0.4631, + "step": 8199 + }, + { + "epoch": 2.2949902043101034, + "grad_norm": 0.2418384559191895, + "learning_rate": 1.4364908882379373e-05, + "loss": 0.469, + "step": 8200 + }, + { + "epoch": 2.2952700811642877, + "grad_norm": 0.25366734954018105, + "learning_rate": 1.4354089975798457e-05, + "loss": 0.4555, + "step": 8201 + }, + { + "epoch": 2.295549958018472, + "grad_norm": 0.24591943345385542, + "learning_rate": 1.434327446201521e-05, + "loss": 0.4393, + "step": 8202 + }, + { + "epoch": 2.295829834872656, + "grad_norm": 0.24941628216726364, + "learning_rate": 1.4332462342059055e-05, + "loss": 0.4611, + "step": 8203 + }, + { + "epoch": 2.29610971172684, + "grad_norm": 0.25261349961534235, + "learning_rate": 1.4321653616959097e-05, + "loss": 0.436, + "step": 8204 + }, + { + "epoch": 2.2963895885810244, + "grad_norm": 0.26218510261393874, + "learning_rate": 1.4310848287744122e-05, + "loss": 0.4636, + "step": 8205 + }, + { + "epoch": 2.2966694654352087, + "grad_norm": 0.2565325292751425, + "learning_rate": 1.4300046355442581e-05, + "loss": 0.4766, + "step": 8206 + }, + { + "epoch": 2.2969493422893925, + "grad_norm": 0.2592283565934206, + "learning_rate": 1.4289247821082613e-05, + "loss": 0.4566, + "step": 8207 + }, + { + "epoch": 2.297229219143577, + "grad_norm": 0.26156076257911903, + "learning_rate": 1.4278452685692023e-05, + "loss": 0.4737, + "step": 8208 + }, + { + "epoch": 2.297509095997761, + "grad_norm": 0.253839003034228, + "learning_rate": 1.4267660950298301e-05, + "loss": 0.4742, + "step": 8209 + }, + { + "epoch": 2.2977889728519454, + "grad_norm": 0.24817123267220337, + "learning_rate": 1.4256872615928607e-05, + "loss": 0.4531, + "step": 8210 + }, + { + "epoch": 2.2980688497061292, + "grad_norm": 0.24552989846882364, + "learning_rate": 1.4246087683609789e-05, + "loss": 0.4432, + "step": 8211 + }, + { + "epoch": 2.2983487265603135, + "grad_norm": 0.24874254309051902, + "learning_rate": 1.4235306154368355e-05, + "loss": 0.466, + "step": 8212 + }, + { + "epoch": 2.298628603414498, + "grad_norm": 0.24071033384046536, + "learning_rate": 1.42245280292305e-05, + "loss": 0.4353, + "step": 8213 + }, + { + "epoch": 2.2989084802686817, + "grad_norm": 0.23276705113483642, + "learning_rate": 1.4213753309222089e-05, + "loss": 0.4398, + "step": 8214 + }, + { + "epoch": 2.299188357122866, + "grad_norm": 0.2523788869113829, + "learning_rate": 1.420298199536867e-05, + "loss": 0.4476, + "step": 8215 + }, + { + "epoch": 2.2994682339770502, + "grad_norm": 0.23698737418413152, + "learning_rate": 1.419221408869546e-05, + "loss": 0.4337, + "step": 8216 + }, + { + "epoch": 2.299748110831234, + "grad_norm": 0.2581606112338792, + "learning_rate": 1.4181449590227359e-05, + "loss": 0.4657, + "step": 8217 + }, + { + "epoch": 2.3000279876854184, + "grad_norm": 0.2575486533079859, + "learning_rate": 1.4170688500988933e-05, + "loss": 0.4554, + "step": 8218 + }, + { + "epoch": 2.3003078645396027, + "grad_norm": 0.24648739397368136, + "learning_rate": 1.415993082200443e-05, + "loss": 0.4321, + "step": 8219 + }, + { + "epoch": 2.3005877413937865, + "grad_norm": 0.2550029914456197, + "learning_rate": 1.4149176554297772e-05, + "loss": 0.4637, + "step": 8220 + }, + { + "epoch": 2.300867618247971, + "grad_norm": 0.2343995984996954, + "learning_rate": 1.4138425698892555e-05, + "loss": 0.414, + "step": 8221 + }, + { + "epoch": 2.301147495102155, + "grad_norm": 0.24341070300162065, + "learning_rate": 1.4127678256812055e-05, + "loss": 0.4367, + "step": 8222 + }, + { + "epoch": 2.3014273719563394, + "grad_norm": 0.24704913384199684, + "learning_rate": 1.4116934229079215e-05, + "loss": 0.4527, + "step": 8223 + }, + { + "epoch": 2.3017072488105232, + "grad_norm": 0.23806248083878626, + "learning_rate": 1.4106193616716662e-05, + "loss": 0.4478, + "step": 8224 + }, + { + "epoch": 2.3019871256647075, + "grad_norm": 0.2553295484170634, + "learning_rate": 1.4095456420746688e-05, + "loss": 0.4541, + "step": 8225 + }, + { + "epoch": 2.302267002518892, + "grad_norm": 0.25511032208447576, + "learning_rate": 1.4084722642191267e-05, + "loss": 0.4418, + "step": 8226 + }, + { + "epoch": 2.3025468793730757, + "grad_norm": 0.26250324891176086, + "learning_rate": 1.4073992282072052e-05, + "loss": 0.4686, + "step": 8227 + }, + { + "epoch": 2.30282675622726, + "grad_norm": 0.2505089484194936, + "learning_rate": 1.4063265341410359e-05, + "loss": 0.439, + "step": 8228 + }, + { + "epoch": 2.3031066330814443, + "grad_norm": 0.2492830985896294, + "learning_rate": 1.405254182122719e-05, + "loss": 0.4368, + "step": 8229 + }, + { + "epoch": 2.3033865099356285, + "grad_norm": 0.2378450618701384, + "learning_rate": 1.4041821722543203e-05, + "loss": 0.4474, + "step": 8230 + }, + { + "epoch": 2.3036663867898124, + "grad_norm": 0.2588730283123059, + "learning_rate": 1.4031105046378756e-05, + "loss": 0.4239, + "step": 8231 + }, + { + "epoch": 2.3039462636439967, + "grad_norm": 0.2555618257380051, + "learning_rate": 1.4020391793753863e-05, + "loss": 0.4599, + "step": 8232 + }, + { + "epoch": 2.304226140498181, + "grad_norm": 0.25581666827708954, + "learning_rate": 1.4009681965688198e-05, + "loss": 0.4412, + "step": 8233 + }, + { + "epoch": 2.304506017352365, + "grad_norm": 0.2560991316176909, + "learning_rate": 1.3998975563201183e-05, + "loss": 0.4447, + "step": 8234 + }, + { + "epoch": 2.304785894206549, + "grad_norm": 0.2463972513662189, + "learning_rate": 1.3988272587311807e-05, + "loss": 0.4533, + "step": 8235 + }, + { + "epoch": 2.3050657710607334, + "grad_norm": 0.24653116877321052, + "learning_rate": 1.3977573039038804e-05, + "loss": 0.4473, + "step": 8236 + }, + { + "epoch": 2.3053456479149173, + "grad_norm": 0.26062490064044275, + "learning_rate": 1.3966876919400562e-05, + "loss": 0.4437, + "step": 8237 + }, + { + "epoch": 2.3056255247691015, + "grad_norm": 0.24942429844838426, + "learning_rate": 1.3956184229415148e-05, + "loss": 0.4625, + "step": 8238 + }, + { + "epoch": 2.305905401623286, + "grad_norm": 0.2532496257644462, + "learning_rate": 1.3945494970100286e-05, + "loss": 0.4569, + "step": 8239 + }, + { + "epoch": 2.3061852784774697, + "grad_norm": 0.25576705083040635, + "learning_rate": 1.3934809142473399e-05, + "loss": 0.4577, + "step": 8240 + }, + { + "epoch": 2.306465155331654, + "grad_norm": 0.2497333718141048, + "learning_rate": 1.3924126747551557e-05, + "loss": 0.4423, + "step": 8241 + }, + { + "epoch": 2.3067450321858383, + "grad_norm": 0.25013346789155255, + "learning_rate": 1.391344778635153e-05, + "loss": 0.4641, + "step": 8242 + }, + { + "epoch": 2.3070249090400226, + "grad_norm": 0.2519152201595828, + "learning_rate": 1.390277225988974e-05, + "loss": 0.4635, + "step": 8243 + }, + { + "epoch": 2.3073047858942064, + "grad_norm": 0.2656688470446249, + "learning_rate": 1.3892100169182292e-05, + "loss": 0.4524, + "step": 8244 + }, + { + "epoch": 2.3075846627483907, + "grad_norm": 0.24355235271055817, + "learning_rate": 1.3881431515244958e-05, + "loss": 0.4342, + "step": 8245 + }, + { + "epoch": 2.307864539602575, + "grad_norm": 0.26436980827927553, + "learning_rate": 1.3870766299093191e-05, + "loss": 0.4747, + "step": 8246 + }, + { + "epoch": 2.3081444164567593, + "grad_norm": 0.25831009264726085, + "learning_rate": 1.3860104521742106e-05, + "loss": 0.4618, + "step": 8247 + }, + { + "epoch": 2.308424293310943, + "grad_norm": 0.2672249877325695, + "learning_rate": 1.3849446184206506e-05, + "loss": 0.4448, + "step": 8248 + }, + { + "epoch": 2.3087041701651274, + "grad_norm": 0.25312586834946443, + "learning_rate": 1.3838791287500852e-05, + "loss": 0.4581, + "step": 8249 + }, + { + "epoch": 2.3089840470193117, + "grad_norm": 0.24347548711792347, + "learning_rate": 1.3828139832639281e-05, + "loss": 0.444, + "step": 8250 + }, + { + "epoch": 2.3092639238734955, + "grad_norm": 0.24634867984844497, + "learning_rate": 1.3817491820635592e-05, + "loss": 0.4416, + "step": 8251 + }, + { + "epoch": 2.30954380072768, + "grad_norm": 0.25697191736106406, + "learning_rate": 1.3806847252503313e-05, + "loss": 0.4618, + "step": 8252 + }, + { + "epoch": 2.309823677581864, + "grad_norm": 0.2713855878912528, + "learning_rate": 1.3796206129255557e-05, + "loss": 0.46, + "step": 8253 + }, + { + "epoch": 2.310103554436048, + "grad_norm": 0.24101979738734647, + "learning_rate": 1.3785568451905167e-05, + "loss": 0.4295, + "step": 8254 + }, + { + "epoch": 2.3103834312902323, + "grad_norm": 0.24317281038151736, + "learning_rate": 1.3774934221464642e-05, + "loss": 0.4498, + "step": 8255 + }, + { + "epoch": 2.3106633081444166, + "grad_norm": 0.25011964806713044, + "learning_rate": 1.3764303438946152e-05, + "loss": 0.4438, + "step": 8256 + }, + { + "epoch": 2.3109431849986004, + "grad_norm": 0.2450960944441535, + "learning_rate": 1.3753676105361545e-05, + "loss": 0.4497, + "step": 8257 + }, + { + "epoch": 2.3112230618527847, + "grad_norm": 0.25587246810033215, + "learning_rate": 1.374305222172233e-05, + "loss": 0.4559, + "step": 8258 + }, + { + "epoch": 2.311502938706969, + "grad_norm": 0.25257742847074705, + "learning_rate": 1.3732431789039702e-05, + "loss": 0.4559, + "step": 8259 + }, + { + "epoch": 2.3117828155611533, + "grad_norm": 0.24499552379034534, + "learning_rate": 1.3721814808324519e-05, + "loss": 0.4539, + "step": 8260 + }, + { + "epoch": 2.312062692415337, + "grad_norm": 0.2519249772197435, + "learning_rate": 1.3711201280587305e-05, + "loss": 0.4666, + "step": 8261 + }, + { + "epoch": 2.3123425692695214, + "grad_norm": 0.2454133835186868, + "learning_rate": 1.3700591206838264e-05, + "loss": 0.4671, + "step": 8262 + }, + { + "epoch": 2.3126224461237057, + "grad_norm": 0.24724259151477296, + "learning_rate": 1.3689984588087268e-05, + "loss": 0.4649, + "step": 8263 + }, + { + "epoch": 2.3129023229778896, + "grad_norm": 0.2526043702303986, + "learning_rate": 1.3679381425343867e-05, + "loss": 0.4318, + "step": 8264 + }, + { + "epoch": 2.313182199832074, + "grad_norm": 0.2639671356249918, + "learning_rate": 1.3668781719617269e-05, + "loss": 0.4431, + "step": 8265 + }, + { + "epoch": 2.313462076686258, + "grad_norm": 0.2459965316812249, + "learning_rate": 1.3658185471916363e-05, + "loss": 0.4574, + "step": 8266 + }, + { + "epoch": 2.3137419535404424, + "grad_norm": 0.258995108849743, + "learning_rate": 1.3647592683249706e-05, + "loss": 0.4735, + "step": 8267 + }, + { + "epoch": 2.3140218303946263, + "grad_norm": 0.25323242675205754, + "learning_rate": 1.3637003354625505e-05, + "loss": 0.4518, + "step": 8268 + }, + { + "epoch": 2.3143017072488106, + "grad_norm": 0.2569839965095759, + "learning_rate": 1.3626417487051695e-05, + "loss": 0.4636, + "step": 8269 + }, + { + "epoch": 2.314581584102995, + "grad_norm": 0.2583915219613413, + "learning_rate": 1.3615835081535844e-05, + "loss": 0.4772, + "step": 8270 + }, + { + "epoch": 2.3148614609571787, + "grad_norm": 0.26519040963695417, + "learning_rate": 1.3605256139085166e-05, + "loss": 0.4751, + "step": 8271 + }, + { + "epoch": 2.315141337811363, + "grad_norm": 0.2544051272367222, + "learning_rate": 1.359468066070657e-05, + "loss": 0.4478, + "step": 8272 + }, + { + "epoch": 2.3154212146655473, + "grad_norm": 0.25904166475846885, + "learning_rate": 1.3584108647406651e-05, + "loss": 0.4367, + "step": 8273 + }, + { + "epoch": 2.315701091519731, + "grad_norm": 0.24203488420889496, + "learning_rate": 1.357354010019165e-05, + "loss": 0.452, + "step": 8274 + }, + { + "epoch": 2.3159809683739154, + "grad_norm": 0.24408034041809595, + "learning_rate": 1.3562975020067498e-05, + "loss": 0.476, + "step": 8275 + }, + { + "epoch": 2.3162608452280997, + "grad_norm": 0.25098600027808327, + "learning_rate": 1.3552413408039772e-05, + "loss": 0.4441, + "step": 8276 + }, + { + "epoch": 2.3165407220822836, + "grad_norm": 0.25957133328367177, + "learning_rate": 1.354185526511374e-05, + "loss": 0.4705, + "step": 8277 + }, + { + "epoch": 2.316820598936468, + "grad_norm": 0.25482623346727046, + "learning_rate": 1.3531300592294333e-05, + "loss": 0.459, + "step": 8278 + }, + { + "epoch": 2.317100475790652, + "grad_norm": 0.2455638731764033, + "learning_rate": 1.352074939058614e-05, + "loss": 0.4434, + "step": 8279 + }, + { + "epoch": 2.3173803526448364, + "grad_norm": 0.27113276648181606, + "learning_rate": 1.3510201660993449e-05, + "loss": 0.4702, + "step": 8280 + }, + { + "epoch": 2.3176602294990203, + "grad_norm": 0.2583368965508004, + "learning_rate": 1.3499657404520182e-05, + "loss": 0.4539, + "step": 8281 + }, + { + "epoch": 2.3179401063532046, + "grad_norm": 0.2412400044041814, + "learning_rate": 1.348911662216995e-05, + "loss": 0.4405, + "step": 8282 + }, + { + "epoch": 2.318219983207389, + "grad_norm": 0.24332594539723115, + "learning_rate": 1.3478579314946044e-05, + "loss": 0.4318, + "step": 8283 + }, + { + "epoch": 2.3184998600615727, + "grad_norm": 0.24523850364563363, + "learning_rate": 1.3468045483851393e-05, + "loss": 0.4529, + "step": 8284 + }, + { + "epoch": 2.318779736915757, + "grad_norm": 0.2502813446703391, + "learning_rate": 1.345751512988861e-05, + "loss": 0.4599, + "step": 8285 + }, + { + "epoch": 2.3190596137699413, + "grad_norm": 0.24642028360544369, + "learning_rate": 1.344698825406e-05, + "loss": 0.454, + "step": 8286 + }, + { + "epoch": 2.3193394906241256, + "grad_norm": 0.23968596721138646, + "learning_rate": 1.3436464857367514e-05, + "loss": 0.4592, + "step": 8287 + }, + { + "epoch": 2.3196193674783094, + "grad_norm": 0.2483871896176604, + "learning_rate": 1.342594494081278e-05, + "loss": 0.47, + "step": 8288 + }, + { + "epoch": 2.3198992443324937, + "grad_norm": 0.2542722786790647, + "learning_rate": 1.341542850539706e-05, + "loss": 0.4811, + "step": 8289 + }, + { + "epoch": 2.320179121186678, + "grad_norm": 0.25567761198216693, + "learning_rate": 1.3404915552121339e-05, + "loss": 0.4599, + "step": 8290 + }, + { + "epoch": 2.320458998040862, + "grad_norm": 0.2403457902353109, + "learning_rate": 1.3394406081986233e-05, + "loss": 0.4513, + "step": 8291 + }, + { + "epoch": 2.320738874895046, + "grad_norm": 0.25775077918557937, + "learning_rate": 1.3383900095992047e-05, + "loss": 0.4445, + "step": 8292 + }, + { + "epoch": 2.3210187517492304, + "grad_norm": 0.2458236879822526, + "learning_rate": 1.3373397595138749e-05, + "loss": 0.4582, + "step": 8293 + }, + { + "epoch": 2.3212986286034143, + "grad_norm": 0.249500610212342, + "learning_rate": 1.3362898580425964e-05, + "loss": 0.4599, + "step": 8294 + }, + { + "epoch": 2.3215785054575986, + "grad_norm": 0.25463565599713955, + "learning_rate": 1.3352403052853002e-05, + "loss": 0.4559, + "step": 8295 + }, + { + "epoch": 2.321858382311783, + "grad_norm": 0.2525468242326315, + "learning_rate": 1.3341911013418823e-05, + "loss": 0.4341, + "step": 8296 + }, + { + "epoch": 2.322138259165967, + "grad_norm": 0.24353159669023183, + "learning_rate": 1.3331422463122078e-05, + "loss": 0.456, + "step": 8297 + }, + { + "epoch": 2.322418136020151, + "grad_norm": 0.25635587644285407, + "learning_rate": 1.332093740296107e-05, + "loss": 0.4445, + "step": 8298 + }, + { + "epoch": 2.3226980128743353, + "grad_norm": 0.25652748059284, + "learning_rate": 1.3310455833933767e-05, + "loss": 0.4607, + "step": 8299 + }, + { + "epoch": 2.3229778897285196, + "grad_norm": 0.2435045692065939, + "learning_rate": 1.3299977757037813e-05, + "loss": 0.4768, + "step": 8300 + }, + { + "epoch": 2.3232577665827034, + "grad_norm": 0.2645763871535758, + "learning_rate": 1.3289503173270519e-05, + "loss": 0.453, + "step": 8301 + }, + { + "epoch": 2.3235376434368877, + "grad_norm": 0.2594497147849001, + "learning_rate": 1.3279032083628856e-05, + "loss": 0.4698, + "step": 8302 + }, + { + "epoch": 2.323817520291072, + "grad_norm": 0.2510893759452567, + "learning_rate": 1.3268564489109463e-05, + "loss": 0.4556, + "step": 8303 + }, + { + "epoch": 2.3240973971452563, + "grad_norm": 0.2690127713973988, + "learning_rate": 1.3258100390708678e-05, + "loss": 0.4804, + "step": 8304 + }, + { + "epoch": 2.32437727399944, + "grad_norm": 0.25566558146444035, + "learning_rate": 1.3247639789422455e-05, + "loss": 0.4621, + "step": 8305 + }, + { + "epoch": 2.3246571508536245, + "grad_norm": 0.2535564702423962, + "learning_rate": 1.3237182686246468e-05, + "loss": 0.4535, + "step": 8306 + }, + { + "epoch": 2.3249370277078087, + "grad_norm": 0.29939560314990055, + "learning_rate": 1.3226729082175998e-05, + "loss": 0.4631, + "step": 8307 + }, + { + "epoch": 2.3252169045619926, + "grad_norm": 0.24921423058478057, + "learning_rate": 1.3216278978206037e-05, + "loss": 0.437, + "step": 8308 + }, + { + "epoch": 2.325496781416177, + "grad_norm": 0.2505100630181456, + "learning_rate": 1.3205832375331228e-05, + "loss": 0.472, + "step": 8309 + }, + { + "epoch": 2.325776658270361, + "grad_norm": 0.26485177491182993, + "learning_rate": 1.3195389274545888e-05, + "loss": 0.4449, + "step": 8310 + }, + { + "epoch": 2.326056535124545, + "grad_norm": 0.25579492366756157, + "learning_rate": 1.3184949676844e-05, + "loss": 0.4575, + "step": 8311 + }, + { + "epoch": 2.3263364119787293, + "grad_norm": 0.25150989588368383, + "learning_rate": 1.3174513583219206e-05, + "loss": 0.4295, + "step": 8312 + }, + { + "epoch": 2.3266162888329136, + "grad_norm": 0.25062198093585214, + "learning_rate": 1.3164080994664819e-05, + "loss": 0.4744, + "step": 8313 + }, + { + "epoch": 2.3268961656870975, + "grad_norm": 0.25611396668986003, + "learning_rate": 1.3153651912173825e-05, + "loss": 0.45, + "step": 8314 + }, + { + "epoch": 2.3271760425412817, + "grad_norm": 0.25084896562845693, + "learning_rate": 1.3143226336738862e-05, + "loss": 0.4595, + "step": 8315 + }, + { + "epoch": 2.327455919395466, + "grad_norm": 0.25015225652760464, + "learning_rate": 1.313280426935225e-05, + "loss": 0.4619, + "step": 8316 + }, + { + "epoch": 2.3277357962496503, + "grad_norm": 0.26072051902000226, + "learning_rate": 1.3122385711005963e-05, + "loss": 0.4649, + "step": 8317 + }, + { + "epoch": 2.328015673103834, + "grad_norm": 0.2620674602640789, + "learning_rate": 1.3111970662691641e-05, + "loss": 0.4542, + "step": 8318 + }, + { + "epoch": 2.3282955499580185, + "grad_norm": 0.2462533774998526, + "learning_rate": 1.3101559125400603e-05, + "loss": 0.4607, + "step": 8319 + }, + { + "epoch": 2.3285754268122028, + "grad_norm": 0.2486600260013821, + "learning_rate": 1.3091151100123806e-05, + "loss": 0.4387, + "step": 8320 + }, + { + "epoch": 2.3288553036663866, + "grad_norm": 0.2578339282929137, + "learning_rate": 1.3080746587851917e-05, + "loss": 0.4611, + "step": 8321 + }, + { + "epoch": 2.329135180520571, + "grad_norm": 0.24276440019133075, + "learning_rate": 1.3070345589575234e-05, + "loss": 0.4372, + "step": 8322 + }, + { + "epoch": 2.329415057374755, + "grad_norm": 0.26731122717672473, + "learning_rate": 1.3059948106283725e-05, + "loss": 0.4706, + "step": 8323 + }, + { + "epoch": 2.3296949342289395, + "grad_norm": 0.250612444759731, + "learning_rate": 1.3049554138967051e-05, + "loss": 0.4648, + "step": 8324 + }, + { + "epoch": 2.3299748110831233, + "grad_norm": 0.24418046783448943, + "learning_rate": 1.303916368861448e-05, + "loss": 0.4243, + "step": 8325 + }, + { + "epoch": 2.3302546879373076, + "grad_norm": 0.25864633555230676, + "learning_rate": 1.3028776756215e-05, + "loss": 0.4628, + "step": 8326 + }, + { + "epoch": 2.330534564791492, + "grad_norm": 0.2596393502184638, + "learning_rate": 1.3018393342757235e-05, + "loss": 0.4467, + "step": 8327 + }, + { + "epoch": 2.3308144416456757, + "grad_norm": 0.2502298990120087, + "learning_rate": 1.3008013449229494e-05, + "loss": 0.4613, + "step": 8328 + }, + { + "epoch": 2.33109431849986, + "grad_norm": 0.25085265852572536, + "learning_rate": 1.299763707661974e-05, + "loss": 0.4682, + "step": 8329 + }, + { + "epoch": 2.3313741953540443, + "grad_norm": 0.2420344453522528, + "learning_rate": 1.2987264225915601e-05, + "loss": 0.4516, + "step": 8330 + }, + { + "epoch": 2.331654072208228, + "grad_norm": 0.25060684699525815, + "learning_rate": 1.297689489810437e-05, + "loss": 0.4456, + "step": 8331 + }, + { + "epoch": 2.3319339490624125, + "grad_norm": 0.24789521553675894, + "learning_rate": 1.2966529094173002e-05, + "loss": 0.4354, + "step": 8332 + }, + { + "epoch": 2.3322138259165968, + "grad_norm": 0.2553686594992788, + "learning_rate": 1.2956166815108128e-05, + "loss": 0.4716, + "step": 8333 + }, + { + "epoch": 2.3324937027707806, + "grad_norm": 0.2520626030690028, + "learning_rate": 1.2945808061896025e-05, + "loss": 0.4497, + "step": 8334 + }, + { + "epoch": 2.332773579624965, + "grad_norm": 0.26246718718579354, + "learning_rate": 1.2935452835522654e-05, + "loss": 0.4798, + "step": 8335 + }, + { + "epoch": 2.333053456479149, + "grad_norm": 0.25996697497970495, + "learning_rate": 1.2925101136973627e-05, + "loss": 0.4548, + "step": 8336 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 0.252207790482802, + "learning_rate": 1.2914752967234211e-05, + "loss": 0.4464, + "step": 8337 + }, + { + "epoch": 2.3336132101875173, + "grad_norm": 0.2571888889923139, + "learning_rate": 1.290440832728938e-05, + "loss": 0.4579, + "step": 8338 + }, + { + "epoch": 2.3338930870417016, + "grad_norm": 0.2462320302594575, + "learning_rate": 1.2894067218123729e-05, + "loss": 0.4408, + "step": 8339 + }, + { + "epoch": 2.334172963895886, + "grad_norm": 0.24476831206771804, + "learning_rate": 1.2883729640721531e-05, + "loss": 0.437, + "step": 8340 + }, + { + "epoch": 2.33445284075007, + "grad_norm": 0.24245968798802456, + "learning_rate": 1.2873395596066718e-05, + "loss": 0.417, + "step": 8341 + }, + { + "epoch": 2.334732717604254, + "grad_norm": 0.2493309105517221, + "learning_rate": 1.2863065085142906e-05, + "loss": 0.4459, + "step": 8342 + }, + { + "epoch": 2.3350125944584383, + "grad_norm": 0.24558585019024368, + "learning_rate": 1.2852738108933332e-05, + "loss": 0.4524, + "step": 8343 + }, + { + "epoch": 2.3352924713126226, + "grad_norm": 0.261617174287669, + "learning_rate": 1.2842414668420938e-05, + "loss": 0.4634, + "step": 8344 + }, + { + "epoch": 2.3355723481668065, + "grad_norm": 0.2596256719318115, + "learning_rate": 1.2832094764588316e-05, + "loss": 0.4547, + "step": 8345 + }, + { + "epoch": 2.3358522250209908, + "grad_norm": 0.24890030706086635, + "learning_rate": 1.2821778398417716e-05, + "loss": 0.453, + "step": 8346 + }, + { + "epoch": 2.336132101875175, + "grad_norm": 0.2506008504759721, + "learning_rate": 1.2811465570891057e-05, + "loss": 0.4562, + "step": 8347 + }, + { + "epoch": 2.336411978729359, + "grad_norm": 0.24276476872341263, + "learning_rate": 1.2801156282989918e-05, + "loss": 0.4459, + "step": 8348 + }, + { + "epoch": 2.336691855583543, + "grad_norm": 0.24679338909226162, + "learning_rate": 1.2790850535695548e-05, + "loss": 0.4465, + "step": 8349 + }, + { + "epoch": 2.3369717324377275, + "grad_norm": 0.25632954802168545, + "learning_rate": 1.2780548329988851e-05, + "loss": 0.4708, + "step": 8350 + }, + { + "epoch": 2.3372516092919113, + "grad_norm": 0.24926302589325672, + "learning_rate": 1.2770249666850399e-05, + "loss": 0.4419, + "step": 8351 + }, + { + "epoch": 2.3375314861460956, + "grad_norm": 0.245843961577653, + "learning_rate": 1.2759954547260417e-05, + "loss": 0.4566, + "step": 8352 + }, + { + "epoch": 2.33781136300028, + "grad_norm": 0.26314821525666565, + "learning_rate": 1.2749662972198806e-05, + "loss": 0.4622, + "step": 8353 + }, + { + "epoch": 2.338091239854464, + "grad_norm": 0.2606144564266123, + "learning_rate": 1.273937494264511e-05, + "loss": 0.4645, + "step": 8354 + }, + { + "epoch": 2.338371116708648, + "grad_norm": 0.27126219153303943, + "learning_rate": 1.272909045957858e-05, + "loss": 0.4775, + "step": 8355 + }, + { + "epoch": 2.3386509935628323, + "grad_norm": 0.25245097134514854, + "learning_rate": 1.2718809523978077e-05, + "loss": 0.4537, + "step": 8356 + }, + { + "epoch": 2.3389308704170166, + "grad_norm": 0.24209596472168504, + "learning_rate": 1.2708532136822155e-05, + "loss": 0.4348, + "step": 8357 + }, + { + "epoch": 2.3392107472712005, + "grad_norm": 0.25556493560390914, + "learning_rate": 1.2698258299089011e-05, + "loss": 0.4608, + "step": 8358 + }, + { + "epoch": 2.3394906241253848, + "grad_norm": 0.248939787297999, + "learning_rate": 1.2687988011756524e-05, + "loss": 0.437, + "step": 8359 + }, + { + "epoch": 2.339770500979569, + "grad_norm": 0.24750924242357977, + "learning_rate": 1.267772127580224e-05, + "loss": 0.4555, + "step": 8360 + }, + { + "epoch": 2.3400503778337534, + "grad_norm": 0.25532401963847123, + "learning_rate": 1.2667458092203316e-05, + "loss": 0.4495, + "step": 8361 + }, + { + "epoch": 2.340330254687937, + "grad_norm": 0.24882237368089657, + "learning_rate": 1.2657198461936632e-05, + "loss": 0.4673, + "step": 8362 + }, + { + "epoch": 2.3406101315421215, + "grad_norm": 0.25017244077714124, + "learning_rate": 1.2646942385978695e-05, + "loss": 0.4652, + "step": 8363 + }, + { + "epoch": 2.340890008396306, + "grad_norm": 0.25600251884801806, + "learning_rate": 1.263668986530569e-05, + "loss": 0.4641, + "step": 8364 + }, + { + "epoch": 2.3411698852504896, + "grad_norm": 0.24819184545591527, + "learning_rate": 1.2626440900893461e-05, + "loss": 0.4309, + "step": 8365 + }, + { + "epoch": 2.341449762104674, + "grad_norm": 0.2592169963027563, + "learning_rate": 1.2616195493717503e-05, + "loss": 0.4646, + "step": 8366 + }, + { + "epoch": 2.341729638958858, + "grad_norm": 0.2401897302540128, + "learning_rate": 1.2605953644752983e-05, + "loss": 0.4501, + "step": 8367 + }, + { + "epoch": 2.342009515813042, + "grad_norm": 0.25976734294746356, + "learning_rate": 1.2595715354974725e-05, + "loss": 0.4697, + "step": 8368 + }, + { + "epoch": 2.3422893926672264, + "grad_norm": 0.2562485280560772, + "learning_rate": 1.2585480625357215e-05, + "loss": 0.4522, + "step": 8369 + }, + { + "epoch": 2.3425692695214106, + "grad_norm": 0.2535452702032081, + "learning_rate": 1.25752494568746e-05, + "loss": 0.4643, + "step": 8370 + }, + { + "epoch": 2.3428491463755945, + "grad_norm": 0.26249151307478963, + "learning_rate": 1.2565021850500692e-05, + "loss": 0.4508, + "step": 8371 + }, + { + "epoch": 2.343129023229779, + "grad_norm": 0.24245600725469135, + "learning_rate": 1.255479780720894e-05, + "loss": 0.4602, + "step": 8372 + }, + { + "epoch": 2.343408900083963, + "grad_norm": 0.24935167034834935, + "learning_rate": 1.2544577327972506e-05, + "loss": 0.4255, + "step": 8373 + }, + { + "epoch": 2.3436887769381474, + "grad_norm": 0.24987231561353682, + "learning_rate": 1.2534360413764169e-05, + "loss": 0.4634, + "step": 8374 + }, + { + "epoch": 2.343968653792331, + "grad_norm": 0.24784216651321836, + "learning_rate": 1.252414706555638e-05, + "loss": 0.4402, + "step": 8375 + }, + { + "epoch": 2.3442485306465155, + "grad_norm": 0.2522270909559883, + "learning_rate": 1.2513937284321247e-05, + "loss": 0.4517, + "step": 8376 + }, + { + "epoch": 2.3445284075007, + "grad_norm": 0.2552643636788191, + "learning_rate": 1.2503731071030545e-05, + "loss": 0.4512, + "step": 8377 + }, + { + "epoch": 2.344808284354884, + "grad_norm": 0.23577579148392627, + "learning_rate": 1.249352842665572e-05, + "loss": 0.463, + "step": 8378 + }, + { + "epoch": 2.345088161209068, + "grad_norm": 0.2478778888263256, + "learning_rate": 1.2483329352167845e-05, + "loss": 0.4248, + "step": 8379 + }, + { + "epoch": 2.3453680380632522, + "grad_norm": 0.24702159194094486, + "learning_rate": 1.2473133848537672e-05, + "loss": 0.443, + "step": 8380 + }, + { + "epoch": 2.3456479149174365, + "grad_norm": 0.2653285232474737, + "learning_rate": 1.2462941916735632e-05, + "loss": 0.462, + "step": 8381 + }, + { + "epoch": 2.3459277917716204, + "grad_norm": 0.2617016511643717, + "learning_rate": 1.2452753557731784e-05, + "loss": 0.4475, + "step": 8382 + }, + { + "epoch": 2.3462076686258047, + "grad_norm": 0.25530482866239995, + "learning_rate": 1.2442568772495872e-05, + "loss": 0.4545, + "step": 8383 + }, + { + "epoch": 2.346487545479989, + "grad_norm": 0.24709930348089, + "learning_rate": 1.2432387561997288e-05, + "loss": 0.4354, + "step": 8384 + }, + { + "epoch": 2.346767422334173, + "grad_norm": 0.25460629853692296, + "learning_rate": 1.242220992720508e-05, + "loss": 0.4477, + "step": 8385 + }, + { + "epoch": 2.347047299188357, + "grad_norm": 0.2443884228766934, + "learning_rate": 1.2412035869087967e-05, + "loss": 0.4487, + "step": 8386 + }, + { + "epoch": 2.3473271760425414, + "grad_norm": 0.25595280642141804, + "learning_rate": 1.2401865388614315e-05, + "loss": 0.4485, + "step": 8387 + }, + { + "epoch": 2.347607052896725, + "grad_norm": 0.2487615002853761, + "learning_rate": 1.2391698486752162e-05, + "loss": 0.4558, + "step": 8388 + }, + { + "epoch": 2.3478869297509095, + "grad_norm": 0.24954185753813174, + "learning_rate": 1.2381535164469183e-05, + "loss": 0.4618, + "step": 8389 + }, + { + "epoch": 2.348166806605094, + "grad_norm": 0.253669681545023, + "learning_rate": 1.2371375422732761e-05, + "loss": 0.4451, + "step": 8390 + }, + { + "epoch": 2.348446683459278, + "grad_norm": 0.262076025150562, + "learning_rate": 1.2361219262509883e-05, + "loss": 0.4623, + "step": 8391 + }, + { + "epoch": 2.348726560313462, + "grad_norm": 0.2611064528389205, + "learning_rate": 1.2351066684767226e-05, + "loss": 0.4491, + "step": 8392 + }, + { + "epoch": 2.3490064371676462, + "grad_norm": 0.2517744268132719, + "learning_rate": 1.234091769047111e-05, + "loss": 0.4273, + "step": 8393 + }, + { + "epoch": 2.3492863140218305, + "grad_norm": 0.256443946686235, + "learning_rate": 1.2330772280587532e-05, + "loss": 0.4418, + "step": 8394 + }, + { + "epoch": 2.3495661908760144, + "grad_norm": 0.24454576138412137, + "learning_rate": 1.2320630456082133e-05, + "loss": 0.4282, + "step": 8395 + }, + { + "epoch": 2.3498460677301987, + "grad_norm": 0.2563834790872159, + "learning_rate": 1.2310492217920227e-05, + "loss": 0.4357, + "step": 8396 + }, + { + "epoch": 2.350125944584383, + "grad_norm": 0.2600202692628765, + "learning_rate": 1.2300357567066756e-05, + "loss": 0.4734, + "step": 8397 + }, + { + "epoch": 2.3504058214385672, + "grad_norm": 0.24637218071644576, + "learning_rate": 1.2290226504486351e-05, + "loss": 0.4459, + "step": 8398 + }, + { + "epoch": 2.350685698292751, + "grad_norm": 0.24790095006311919, + "learning_rate": 1.22800990311433e-05, + "loss": 0.4442, + "step": 8399 + }, + { + "epoch": 2.3509655751469354, + "grad_norm": 0.25465728953280475, + "learning_rate": 1.2269975148001534e-05, + "loss": 0.4527, + "step": 8400 + }, + { + "epoch": 2.3512454520011197, + "grad_norm": 0.2532217279249197, + "learning_rate": 1.225985485602465e-05, + "loss": 0.4621, + "step": 8401 + }, + { + "epoch": 2.3515253288553035, + "grad_norm": 0.2606280595395842, + "learning_rate": 1.2249738156175906e-05, + "loss": 0.4529, + "step": 8402 + }, + { + "epoch": 2.351805205709488, + "grad_norm": 0.25089789120110084, + "learning_rate": 1.2239625049418213e-05, + "loss": 0.4555, + "step": 8403 + }, + { + "epoch": 2.352085082563672, + "grad_norm": 0.2633404388003439, + "learning_rate": 1.2229515536714143e-05, + "loss": 0.464, + "step": 8404 + }, + { + "epoch": 2.352364959417856, + "grad_norm": 0.24919943604379635, + "learning_rate": 1.2219409619025923e-05, + "loss": 0.4154, + "step": 8405 + }, + { + "epoch": 2.3526448362720402, + "grad_norm": 0.24462315372296126, + "learning_rate": 1.2209307297315432e-05, + "loss": 0.4423, + "step": 8406 + }, + { + "epoch": 2.3529247131262245, + "grad_norm": 0.25343039410110674, + "learning_rate": 1.2199208572544235e-05, + "loss": 0.4749, + "step": 8407 + }, + { + "epoch": 2.3532045899804084, + "grad_norm": 0.24891937821030463, + "learning_rate": 1.2189113445673528e-05, + "loss": 0.4557, + "step": 8408 + }, + { + "epoch": 2.3534844668345927, + "grad_norm": 0.2492162822411207, + "learning_rate": 1.2179021917664169e-05, + "loss": 0.4544, + "step": 8409 + }, + { + "epoch": 2.353764343688777, + "grad_norm": 0.2537192931013115, + "learning_rate": 1.2168933989476667e-05, + "loss": 0.4774, + "step": 8410 + }, + { + "epoch": 2.3540442205429613, + "grad_norm": 0.2506170605975324, + "learning_rate": 1.2158849662071203e-05, + "loss": 0.4484, + "step": 8411 + }, + { + "epoch": 2.354324097397145, + "grad_norm": 0.25102479492205515, + "learning_rate": 1.2148768936407612e-05, + "loss": 0.4526, + "step": 8412 + }, + { + "epoch": 2.3546039742513294, + "grad_norm": 0.26854370336913724, + "learning_rate": 1.213869181344538e-05, + "loss": 0.4581, + "step": 8413 + }, + { + "epoch": 2.3548838511055137, + "grad_norm": 0.2509218105622706, + "learning_rate": 1.2128618294143667e-05, + "loss": 0.4535, + "step": 8414 + }, + { + "epoch": 2.355163727959698, + "grad_norm": 0.25942931229597765, + "learning_rate": 1.2118548379461247e-05, + "loss": 0.4636, + "step": 8415 + }, + { + "epoch": 2.355443604813882, + "grad_norm": 0.24517258144625204, + "learning_rate": 1.2108482070356596e-05, + "loss": 0.4555, + "step": 8416 + }, + { + "epoch": 2.355723481668066, + "grad_norm": 0.24999274596383914, + "learning_rate": 1.2098419367787833e-05, + "loss": 0.4525, + "step": 8417 + }, + { + "epoch": 2.3560033585222504, + "grad_norm": 0.241417344554353, + "learning_rate": 1.2088360272712728e-05, + "loss": 0.4468, + "step": 8418 + }, + { + "epoch": 2.3562832353764342, + "grad_norm": 0.25576661641780774, + "learning_rate": 1.2078304786088707e-05, + "loss": 0.472, + "step": 8419 + }, + { + "epoch": 2.3565631122306185, + "grad_norm": 0.24005259286460717, + "learning_rate": 1.2068252908872867e-05, + "loss": 0.4407, + "step": 8420 + }, + { + "epoch": 2.356842989084803, + "grad_norm": 0.24663659363954651, + "learning_rate": 1.2058204642021948e-05, + "loss": 0.4564, + "step": 8421 + }, + { + "epoch": 2.3571228659389867, + "grad_norm": 0.2643555755912351, + "learning_rate": 1.2048159986492347e-05, + "loss": 0.4572, + "step": 8422 + }, + { + "epoch": 2.357402742793171, + "grad_norm": 0.24345084309527462, + "learning_rate": 1.2038118943240118e-05, + "loss": 0.4549, + "step": 8423 + }, + { + "epoch": 2.3576826196473553, + "grad_norm": 0.258733728353715, + "learning_rate": 1.2028081513220962e-05, + "loss": 0.4549, + "step": 8424 + }, + { + "epoch": 2.357962496501539, + "grad_norm": 0.2509791103448085, + "learning_rate": 1.2018047697390279e-05, + "loss": 0.4731, + "step": 8425 + }, + { + "epoch": 2.3582423733557234, + "grad_norm": 0.24755504442418122, + "learning_rate": 1.2008017496703072e-05, + "loss": 0.4544, + "step": 8426 + }, + { + "epoch": 2.3585222502099077, + "grad_norm": 0.2569324588508519, + "learning_rate": 1.1997990912114026e-05, + "loss": 0.4566, + "step": 8427 + }, + { + "epoch": 2.358802127064092, + "grad_norm": 0.24303490972506264, + "learning_rate": 1.1987967944577477e-05, + "loss": 0.4445, + "step": 8428 + }, + { + "epoch": 2.359082003918276, + "grad_norm": 0.2527798835191069, + "learning_rate": 1.1977948595047417e-05, + "loss": 0.4385, + "step": 8429 + }, + { + "epoch": 2.35936188077246, + "grad_norm": 0.25286962790773965, + "learning_rate": 1.1967932864477488e-05, + "loss": 0.4457, + "step": 8430 + }, + { + "epoch": 2.3596417576266444, + "grad_norm": 0.2490643236869597, + "learning_rate": 1.1957920753820994e-05, + "loss": 0.4457, + "step": 8431 + }, + { + "epoch": 2.3599216344808283, + "grad_norm": 0.25069382241206445, + "learning_rate": 1.1947912264030914e-05, + "loss": 0.4784, + "step": 8432 + }, + { + "epoch": 2.3602015113350125, + "grad_norm": 0.26386931472411046, + "learning_rate": 1.1937907396059833e-05, + "loss": 0.454, + "step": 8433 + }, + { + "epoch": 2.360481388189197, + "grad_norm": 0.2561981374840693, + "learning_rate": 1.1927906150860025e-05, + "loss": 0.4629, + "step": 8434 + }, + { + "epoch": 2.360761265043381, + "grad_norm": 0.26558819123351496, + "learning_rate": 1.191790852938342e-05, + "loss": 0.4646, + "step": 8435 + }, + { + "epoch": 2.361041141897565, + "grad_norm": 0.25531702647885207, + "learning_rate": 1.19079145325816e-05, + "loss": 0.4591, + "step": 8436 + }, + { + "epoch": 2.3613210187517493, + "grad_norm": 0.27255461611800547, + "learning_rate": 1.1897924161405788e-05, + "loss": 0.461, + "step": 8437 + }, + { + "epoch": 2.3616008956059336, + "grad_norm": 0.24334621995825967, + "learning_rate": 1.1887937416806888e-05, + "loss": 0.4479, + "step": 8438 + }, + { + "epoch": 2.3618807724601174, + "grad_norm": 0.2537850517740165, + "learning_rate": 1.1877954299735434e-05, + "loss": 0.4443, + "step": 8439 + }, + { + "epoch": 2.3621606493143017, + "grad_norm": 0.2495366582398718, + "learning_rate": 1.186797481114163e-05, + "loss": 0.4293, + "step": 8440 + }, + { + "epoch": 2.362440526168486, + "grad_norm": 0.24886917301015574, + "learning_rate": 1.1857998951975308e-05, + "loss": 0.4593, + "step": 8441 + }, + { + "epoch": 2.36272040302267, + "grad_norm": 0.25535409313463214, + "learning_rate": 1.1848026723186012e-05, + "loss": 0.4564, + "step": 8442 + }, + { + "epoch": 2.363000279876854, + "grad_norm": 0.24791219708171475, + "learning_rate": 1.1838058125722889e-05, + "loss": 0.4333, + "step": 8443 + }, + { + "epoch": 2.3632801567310384, + "grad_norm": 0.24544745511039154, + "learning_rate": 1.1828093160534753e-05, + "loss": 0.4615, + "step": 8444 + }, + { + "epoch": 2.3635600335852223, + "grad_norm": 0.24488233523327232, + "learning_rate": 1.1818131828570073e-05, + "loss": 0.4547, + "step": 8445 + }, + { + "epoch": 2.3638399104394066, + "grad_norm": 0.24834077034366778, + "learning_rate": 1.1808174130776978e-05, + "loss": 0.4404, + "step": 8446 + }, + { + "epoch": 2.364119787293591, + "grad_norm": 0.2515092195111415, + "learning_rate": 1.179822006810325e-05, + "loss": 0.447, + "step": 8447 + }, + { + "epoch": 2.364399664147775, + "grad_norm": 0.24456693293954418, + "learning_rate": 1.1788269641496314e-05, + "loss": 0.4382, + "step": 8448 + }, + { + "epoch": 2.364679541001959, + "grad_norm": 0.24135189123791034, + "learning_rate": 1.1778322851903262e-05, + "loss": 0.4228, + "step": 8449 + }, + { + "epoch": 2.3649594178561433, + "grad_norm": 0.24672020178430434, + "learning_rate": 1.1768379700270837e-05, + "loss": 0.4439, + "step": 8450 + }, + { + "epoch": 2.3652392947103276, + "grad_norm": 0.24364002490914613, + "learning_rate": 1.1758440187545444e-05, + "loss": 0.4511, + "step": 8451 + }, + { + "epoch": 2.365519171564512, + "grad_norm": 0.25583148991624116, + "learning_rate": 1.1748504314673103e-05, + "loss": 0.4371, + "step": 8452 + }, + { + "epoch": 2.3657990484186957, + "grad_norm": 0.25055055778103663, + "learning_rate": 1.173857208259953e-05, + "loss": 0.4433, + "step": 8453 + }, + { + "epoch": 2.36607892527288, + "grad_norm": 0.25223392882482665, + "learning_rate": 1.1728643492270086e-05, + "loss": 0.4564, + "step": 8454 + }, + { + "epoch": 2.3663588021270643, + "grad_norm": 0.2552693560851051, + "learning_rate": 1.1718718544629775e-05, + "loss": 0.4689, + "step": 8455 + }, + { + "epoch": 2.366638678981248, + "grad_norm": 0.2514387784740974, + "learning_rate": 1.170879724062326e-05, + "loss": 0.4516, + "step": 8456 + }, + { + "epoch": 2.3669185558354324, + "grad_norm": 0.25300993156690305, + "learning_rate": 1.1698879581194855e-05, + "loss": 0.4668, + "step": 8457 + }, + { + "epoch": 2.3671984326896167, + "grad_norm": 0.24725806328930555, + "learning_rate": 1.1688965567288518e-05, + "loss": 0.4547, + "step": 8458 + }, + { + "epoch": 2.3674783095438006, + "grad_norm": 0.2640539988900624, + "learning_rate": 1.1679055199847893e-05, + "loss": 0.4723, + "step": 8459 + }, + { + "epoch": 2.367758186397985, + "grad_norm": 0.2553325697968964, + "learning_rate": 1.1669148479816244e-05, + "loss": 0.432, + "step": 8460 + }, + { + "epoch": 2.368038063252169, + "grad_norm": 0.25233999820344744, + "learning_rate": 1.1659245408136498e-05, + "loss": 0.4672, + "step": 8461 + }, + { + "epoch": 2.368317940106353, + "grad_norm": 0.24428085281649775, + "learning_rate": 1.1649345985751237e-05, + "loss": 0.4499, + "step": 8462 + }, + { + "epoch": 2.3685978169605373, + "grad_norm": 0.25982128241925356, + "learning_rate": 1.1639450213602687e-05, + "loss": 0.4585, + "step": 8463 + }, + { + "epoch": 2.3688776938147216, + "grad_norm": 0.2442033543668397, + "learning_rate": 1.1629558092632736e-05, + "loss": 0.4625, + "step": 8464 + }, + { + "epoch": 2.369157570668906, + "grad_norm": 0.24608761580131538, + "learning_rate": 1.1619669623782925e-05, + "loss": 0.4485, + "step": 8465 + }, + { + "epoch": 2.3694374475230897, + "grad_norm": 0.25627952642747487, + "learning_rate": 1.1609784807994445e-05, + "loss": 0.4437, + "step": 8466 + }, + { + "epoch": 2.369717324377274, + "grad_norm": 0.26470736320009125, + "learning_rate": 1.1599903646208127e-05, + "loss": 0.4681, + "step": 8467 + }, + { + "epoch": 2.3699972012314583, + "grad_norm": 0.2504622206793435, + "learning_rate": 1.159002613936448e-05, + "loss": 0.4427, + "step": 8468 + }, + { + "epoch": 2.370277078085642, + "grad_norm": 0.24825332132246183, + "learning_rate": 1.1580152288403656e-05, + "loss": 0.4409, + "step": 8469 + }, + { + "epoch": 2.3705569549398264, + "grad_norm": 0.25954707635115476, + "learning_rate": 1.1570282094265428e-05, + "loss": 0.4522, + "step": 8470 + }, + { + "epoch": 2.3708368317940107, + "grad_norm": 0.25321343728879336, + "learning_rate": 1.156041555788926e-05, + "loss": 0.451, + "step": 8471 + }, + { + "epoch": 2.371116708648195, + "grad_norm": 0.24750246691792838, + "learning_rate": 1.1550552680214255e-05, + "loss": 0.4601, + "step": 8472 + }, + { + "epoch": 2.371396585502379, + "grad_norm": 0.2580413676391593, + "learning_rate": 1.1540693462179164e-05, + "loss": 0.4579, + "step": 8473 + }, + { + "epoch": 2.371676462356563, + "grad_norm": 0.243592156305927, + "learning_rate": 1.1530837904722397e-05, + "loss": 0.4402, + "step": 8474 + }, + { + "epoch": 2.3719563392107474, + "grad_norm": 0.25033382298234824, + "learning_rate": 1.1520986008782014e-05, + "loss": 0.4385, + "step": 8475 + }, + { + "epoch": 2.3722362160649313, + "grad_norm": 0.27509721948040766, + "learning_rate": 1.1511137775295704e-05, + "loss": 0.4441, + "step": 8476 + }, + { + "epoch": 2.3725160929191156, + "grad_norm": 0.22598811889755624, + "learning_rate": 1.1501293205200859e-05, + "loss": 0.4515, + "step": 8477 + }, + { + "epoch": 2.3727959697733, + "grad_norm": 0.2557994410155763, + "learning_rate": 1.1491452299434474e-05, + "loss": 0.4377, + "step": 8478 + }, + { + "epoch": 2.3730758466274837, + "grad_norm": 0.25167272119375567, + "learning_rate": 1.1481615058933215e-05, + "loss": 0.4489, + "step": 8479 + }, + { + "epoch": 2.373355723481668, + "grad_norm": 0.25403894749544725, + "learning_rate": 1.1471781484633393e-05, + "loss": 0.4554, + "step": 8480 + }, + { + "epoch": 2.3736356003358523, + "grad_norm": 0.25835384953502943, + "learning_rate": 1.146195157747097e-05, + "loss": 0.4611, + "step": 8481 + }, + { + "epoch": 2.373915477190036, + "grad_norm": 0.24090620200456578, + "learning_rate": 1.1452125338381575e-05, + "loss": 0.4265, + "step": 8482 + }, + { + "epoch": 2.3741953540442204, + "grad_norm": 0.26330330751365594, + "learning_rate": 1.1442302768300462e-05, + "loss": 0.455, + "step": 8483 + }, + { + "epoch": 2.3744752308984047, + "grad_norm": 0.2647296160726092, + "learning_rate": 1.1432483868162553e-05, + "loss": 0.4403, + "step": 8484 + }, + { + "epoch": 2.374755107752589, + "grad_norm": 0.2562155822778771, + "learning_rate": 1.1422668638902418e-05, + "loss": 0.4604, + "step": 8485 + }, + { + "epoch": 2.375034984606773, + "grad_norm": 0.24498676345082143, + "learning_rate": 1.141285708145427e-05, + "loss": 0.4414, + "step": 8486 + }, + { + "epoch": 2.375314861460957, + "grad_norm": 0.25020471503214947, + "learning_rate": 1.1403049196752003e-05, + "loss": 0.4504, + "step": 8487 + }, + { + "epoch": 2.3755947383151415, + "grad_norm": 0.2571397649585966, + "learning_rate": 1.1393244985729102e-05, + "loss": 0.4608, + "step": 8488 + }, + { + "epoch": 2.3758746151693257, + "grad_norm": 0.24323730976911384, + "learning_rate": 1.1383444449318753e-05, + "loss": 0.4497, + "step": 8489 + }, + { + "epoch": 2.3761544920235096, + "grad_norm": 0.25109532925424516, + "learning_rate": 1.1373647588453773e-05, + "loss": 0.4685, + "step": 8490 + }, + { + "epoch": 2.376434368877694, + "grad_norm": 0.2479280759369505, + "learning_rate": 1.1363854404066638e-05, + "loss": 0.4619, + "step": 8491 + }, + { + "epoch": 2.376714245731878, + "grad_norm": 0.25209496643912516, + "learning_rate": 1.1354064897089462e-05, + "loss": 0.4549, + "step": 8492 + }, + { + "epoch": 2.376994122586062, + "grad_norm": 0.2538103598208296, + "learning_rate": 1.1344279068454011e-05, + "loss": 0.4564, + "step": 8493 + }, + { + "epoch": 2.3772739994402463, + "grad_norm": 0.2567942540639802, + "learning_rate": 1.1334496919091731e-05, + "loss": 0.4469, + "step": 8494 + }, + { + "epoch": 2.3775538762944306, + "grad_norm": 0.2616791211581202, + "learning_rate": 1.1324718449933669e-05, + "loss": 0.4646, + "step": 8495 + }, + { + "epoch": 2.3778337531486144, + "grad_norm": 0.24270226011231513, + "learning_rate": 1.131494366191056e-05, + "loss": 0.453, + "step": 8496 + }, + { + "epoch": 2.3781136300027987, + "grad_norm": 0.25307643984953343, + "learning_rate": 1.1305172555952758e-05, + "loss": 0.4402, + "step": 8497 + }, + { + "epoch": 2.378393506856983, + "grad_norm": 0.23995819948642394, + "learning_rate": 1.1295405132990294e-05, + "loss": 0.4228, + "step": 8498 + }, + { + "epoch": 2.378673383711167, + "grad_norm": 0.25915800116593424, + "learning_rate": 1.128564139395284e-05, + "loss": 0.4732, + "step": 8499 + }, + { + "epoch": 2.378953260565351, + "grad_norm": 0.25760410570267217, + "learning_rate": 1.12758813397697e-05, + "loss": 0.4447, + "step": 8500 + }, + { + "epoch": 2.3792331374195355, + "grad_norm": 0.24910130559213378, + "learning_rate": 1.1266124971369851e-05, + "loss": 0.4556, + "step": 8501 + }, + { + "epoch": 2.3795130142737198, + "grad_norm": 0.2554496544371673, + "learning_rate": 1.1256372289681905e-05, + "loss": 0.4579, + "step": 8502 + }, + { + "epoch": 2.3797928911279036, + "grad_norm": 0.25992853781633357, + "learning_rate": 1.1246623295634134e-05, + "loss": 0.4482, + "step": 8503 + }, + { + "epoch": 2.380072767982088, + "grad_norm": 0.2515653705583453, + "learning_rate": 1.1236877990154449e-05, + "loss": 0.4457, + "step": 8504 + }, + { + "epoch": 2.380352644836272, + "grad_norm": 0.2588612060050965, + "learning_rate": 1.1227136374170428e-05, + "loss": 0.4693, + "step": 8505 + }, + { + "epoch": 2.380632521690456, + "grad_norm": 0.23450885032738195, + "learning_rate": 1.121739844860925e-05, + "loss": 0.4372, + "step": 8506 + }, + { + "epoch": 2.3809123985446403, + "grad_norm": 0.24992041320964856, + "learning_rate": 1.12076642143978e-05, + "loss": 0.467, + "step": 8507 + }, + { + "epoch": 2.3811922753988246, + "grad_norm": 0.2460138063010371, + "learning_rate": 1.119793367246258e-05, + "loss": 0.4452, + "step": 8508 + }, + { + "epoch": 2.381472152253009, + "grad_norm": 0.2610456256884649, + "learning_rate": 1.1188206823729752e-05, + "loss": 0.473, + "step": 8509 + }, + { + "epoch": 2.3817520291071927, + "grad_norm": 0.24559419442246114, + "learning_rate": 1.1178483669125112e-05, + "loss": 0.443, + "step": 8510 + }, + { + "epoch": 2.382031905961377, + "grad_norm": 0.2508224584655094, + "learning_rate": 1.1168764209574134e-05, + "loss": 0.4487, + "step": 8511 + }, + { + "epoch": 2.3823117828155613, + "grad_norm": 0.2620645803352295, + "learning_rate": 1.1159048446001918e-05, + "loss": 0.4419, + "step": 8512 + }, + { + "epoch": 2.382591659669745, + "grad_norm": 0.24188700477460595, + "learning_rate": 1.1149336379333208e-05, + "loss": 0.4522, + "step": 8513 + }, + { + "epoch": 2.3828715365239295, + "grad_norm": 0.241568253265115, + "learning_rate": 1.113962801049241e-05, + "loss": 0.4411, + "step": 8514 + }, + { + "epoch": 2.3831514133781138, + "grad_norm": 0.24872667384802113, + "learning_rate": 1.112992334040357e-05, + "loss": 0.4552, + "step": 8515 + }, + { + "epoch": 2.3834312902322976, + "grad_norm": 0.24840115162390053, + "learning_rate": 1.1120222369990379e-05, + "loss": 0.4572, + "step": 8516 + }, + { + "epoch": 2.383711167086482, + "grad_norm": 0.2478572673421125, + "learning_rate": 1.1110525100176183e-05, + "loss": 0.4243, + "step": 8517 + }, + { + "epoch": 2.383991043940666, + "grad_norm": 0.23767018995649516, + "learning_rate": 1.1100831531883982e-05, + "loss": 0.4451, + "step": 8518 + }, + { + "epoch": 2.38427092079485, + "grad_norm": 0.2535901641769806, + "learning_rate": 1.1091141666036403e-05, + "loss": 0.4487, + "step": 8519 + }, + { + "epoch": 2.3845507976490343, + "grad_norm": 0.26581437337015335, + "learning_rate": 1.1081455503555743e-05, + "loss": 0.4511, + "step": 8520 + }, + { + "epoch": 2.3848306745032186, + "grad_norm": 0.26287891550412334, + "learning_rate": 1.1071773045363931e-05, + "loss": 0.4701, + "step": 8521 + }, + { + "epoch": 2.385110551357403, + "grad_norm": 0.25444013359480133, + "learning_rate": 1.1062094292382547e-05, + "loss": 0.4386, + "step": 8522 + }, + { + "epoch": 2.3853904282115868, + "grad_norm": 0.2505144992278886, + "learning_rate": 1.1052419245532842e-05, + "loss": 0.4767, + "step": 8523 + }, + { + "epoch": 2.385670305065771, + "grad_norm": 0.2472367432507622, + "learning_rate": 1.1042747905735651e-05, + "loss": 0.4735, + "step": 8524 + }, + { + "epoch": 2.3859501819199553, + "grad_norm": 0.25266064924184417, + "learning_rate": 1.1033080273911522e-05, + "loss": 0.4655, + "step": 8525 + }, + { + "epoch": 2.386230058774139, + "grad_norm": 0.2578619442893693, + "learning_rate": 1.1023416350980625e-05, + "loss": 0.4431, + "step": 8526 + }, + { + "epoch": 2.3865099356283235, + "grad_norm": 0.2545384617196527, + "learning_rate": 1.101375613786278e-05, + "loss": 0.4405, + "step": 8527 + }, + { + "epoch": 2.3867898124825078, + "grad_norm": 0.2574613754642643, + "learning_rate": 1.1004099635477427e-05, + "loss": 0.4486, + "step": 8528 + }, + { + "epoch": 2.387069689336692, + "grad_norm": 0.24314583711923138, + "learning_rate": 1.0994446844743717e-05, + "loss": 0.4427, + "step": 8529 + }, + { + "epoch": 2.387349566190876, + "grad_norm": 0.2620409814504985, + "learning_rate": 1.0984797766580384e-05, + "loss": 0.4463, + "step": 8530 + }, + { + "epoch": 2.38762944304506, + "grad_norm": 0.25234223114246107, + "learning_rate": 1.0975152401905842e-05, + "loss": 0.4484, + "step": 8531 + }, + { + "epoch": 2.3879093198992445, + "grad_norm": 0.250087034848043, + "learning_rate": 1.0965510751638131e-05, + "loss": 0.4757, + "step": 8532 + }, + { + "epoch": 2.3881891967534283, + "grad_norm": 0.25321507095995915, + "learning_rate": 1.0955872816694963e-05, + "loss": 0.4505, + "step": 8533 + }, + { + "epoch": 2.3884690736076126, + "grad_norm": 0.2605523473618677, + "learning_rate": 1.094623859799367e-05, + "loss": 0.4498, + "step": 8534 + }, + { + "epoch": 2.388748950461797, + "grad_norm": 0.2541970227069143, + "learning_rate": 1.0936608096451245e-05, + "loss": 0.4425, + "step": 8535 + }, + { + "epoch": 2.3890288273159808, + "grad_norm": 0.25825358231260204, + "learning_rate": 1.0926981312984324e-05, + "loss": 0.4499, + "step": 8536 + }, + { + "epoch": 2.389308704170165, + "grad_norm": 0.25421668578967516, + "learning_rate": 1.0917358248509196e-05, + "loss": 0.4433, + "step": 8537 + }, + { + "epoch": 2.3895885810243493, + "grad_norm": 0.2537661699338842, + "learning_rate": 1.0907738903941789e-05, + "loss": 0.4414, + "step": 8538 + }, + { + "epoch": 2.3898684578785336, + "grad_norm": 0.2580294505365846, + "learning_rate": 1.089812328019767e-05, + "loss": 0.4607, + "step": 8539 + }, + { + "epoch": 2.3901483347327175, + "grad_norm": 0.2511456533382572, + "learning_rate": 1.0888511378192062e-05, + "loss": 0.4563, + "step": 8540 + }, + { + "epoch": 2.3904282115869018, + "grad_norm": 0.2549293949705993, + "learning_rate": 1.0878903198839846e-05, + "loss": 0.4511, + "step": 8541 + }, + { + "epoch": 2.390708088441086, + "grad_norm": 0.24967874919311853, + "learning_rate": 1.0869298743055512e-05, + "loss": 0.45, + "step": 8542 + }, + { + "epoch": 2.39098796529527, + "grad_norm": 0.2643741079619027, + "learning_rate": 1.0859698011753222e-05, + "loss": 0.4368, + "step": 8543 + }, + { + "epoch": 2.391267842149454, + "grad_norm": 0.2370939773182887, + "learning_rate": 1.0850101005846786e-05, + "loss": 0.4397, + "step": 8544 + }, + { + "epoch": 2.3915477190036385, + "grad_norm": 0.24725258826462226, + "learning_rate": 1.0840507726249632e-05, + "loss": 0.4419, + "step": 8545 + }, + { + "epoch": 2.391827595857823, + "grad_norm": 0.2551646833782642, + "learning_rate": 1.0830918173874887e-05, + "loss": 0.4516, + "step": 8546 + }, + { + "epoch": 2.3921074727120066, + "grad_norm": 0.25736879017287306, + "learning_rate": 1.0821332349635272e-05, + "loss": 0.435, + "step": 8547 + }, + { + "epoch": 2.392387349566191, + "grad_norm": 0.25750594326208964, + "learning_rate": 1.0811750254443177e-05, + "loss": 0.4558, + "step": 8548 + }, + { + "epoch": 2.392667226420375, + "grad_norm": 0.2516396755730355, + "learning_rate": 1.0802171889210621e-05, + "loss": 0.4559, + "step": 8549 + }, + { + "epoch": 2.392947103274559, + "grad_norm": 0.2537997014701762, + "learning_rate": 1.079259725484929e-05, + "loss": 0.4582, + "step": 8550 + }, + { + "epoch": 2.3932269801287434, + "grad_norm": 0.24586619868940723, + "learning_rate": 1.0783026352270497e-05, + "loss": 0.4608, + "step": 8551 + }, + { + "epoch": 2.3935068569829276, + "grad_norm": 0.23958963468288366, + "learning_rate": 1.0773459182385203e-05, + "loss": 0.4416, + "step": 8552 + }, + { + "epoch": 2.3937867338371115, + "grad_norm": 0.25757865687385034, + "learning_rate": 1.0763895746104025e-05, + "loss": 0.4466, + "step": 8553 + }, + { + "epoch": 2.394066610691296, + "grad_norm": 0.24553407549061387, + "learning_rate": 1.075433604433721e-05, + "loss": 0.4508, + "step": 8554 + }, + { + "epoch": 2.39434648754548, + "grad_norm": 0.24820752981238575, + "learning_rate": 1.0744780077994654e-05, + "loss": 0.46, + "step": 8555 + }, + { + "epoch": 2.394626364399664, + "grad_norm": 0.2597759636128657, + "learning_rate": 1.0735227847985907e-05, + "loss": 0.4711, + "step": 8556 + }, + { + "epoch": 2.394906241253848, + "grad_norm": 0.2565794358175612, + "learning_rate": 1.0725679355220147e-05, + "loss": 0.4553, + "step": 8557 + }, + { + "epoch": 2.3951861181080325, + "grad_norm": 0.25660870939499814, + "learning_rate": 1.071613460060621e-05, + "loss": 0.4579, + "step": 8558 + }, + { + "epoch": 2.395465994962217, + "grad_norm": 0.2589972903164503, + "learning_rate": 1.0706593585052588e-05, + "loss": 0.4319, + "step": 8559 + }, + { + "epoch": 2.3957458718164006, + "grad_norm": 0.25647126213705046, + "learning_rate": 1.0697056309467363e-05, + "loss": 0.4353, + "step": 8560 + }, + { + "epoch": 2.396025748670585, + "grad_norm": 0.25211816040062407, + "learning_rate": 1.0687522774758319e-05, + "loss": 0.4738, + "step": 8561 + }, + { + "epoch": 2.3963056255247692, + "grad_norm": 0.269195193074081, + "learning_rate": 1.0677992981832847e-05, + "loss": 0.4631, + "step": 8562 + }, + { + "epoch": 2.396585502378953, + "grad_norm": 0.2572591613253173, + "learning_rate": 1.0668466931598026e-05, + "loss": 0.435, + "step": 8563 + }, + { + "epoch": 2.3968653792331374, + "grad_norm": 0.2433479360365131, + "learning_rate": 1.0658944624960537e-05, + "loss": 0.4382, + "step": 8564 + }, + { + "epoch": 2.3971452560873217, + "grad_norm": 0.25964090246581323, + "learning_rate": 1.0649426062826717e-05, + "loss": 0.4688, + "step": 8565 + }, + { + "epoch": 2.397425132941506, + "grad_norm": 0.25815098741393544, + "learning_rate": 1.0639911246102551e-05, + "loss": 0.456, + "step": 8566 + }, + { + "epoch": 2.39770500979569, + "grad_norm": 0.25282898037390106, + "learning_rate": 1.0630400175693667e-05, + "loss": 0.4703, + "step": 8567 + }, + { + "epoch": 2.397984886649874, + "grad_norm": 0.25190562989901727, + "learning_rate": 1.0620892852505326e-05, + "loss": 0.4586, + "step": 8568 + }, + { + "epoch": 2.3982647635040584, + "grad_norm": 0.2611461007462448, + "learning_rate": 1.0611389277442452e-05, + "loss": 0.4848, + "step": 8569 + }, + { + "epoch": 2.398544640358242, + "grad_norm": 0.24982906183339657, + "learning_rate": 1.0601889451409586e-05, + "loss": 0.4606, + "step": 8570 + }, + { + "epoch": 2.3988245172124265, + "grad_norm": 0.26870082819238894, + "learning_rate": 1.0592393375310932e-05, + "loss": 0.4582, + "step": 8571 + }, + { + "epoch": 2.399104394066611, + "grad_norm": 0.2591418969042139, + "learning_rate": 1.0582901050050343e-05, + "loss": 0.4661, + "step": 8572 + }, + { + "epoch": 2.3993842709207946, + "grad_norm": 0.2562995108371808, + "learning_rate": 1.0573412476531286e-05, + "loss": 0.4311, + "step": 8573 + }, + { + "epoch": 2.399664147774979, + "grad_norm": 0.2357820928071976, + "learning_rate": 1.0563927655656903e-05, + "loss": 0.4284, + "step": 8574 + }, + { + "epoch": 2.3999440246291632, + "grad_norm": 0.2585138815343089, + "learning_rate": 1.0554446588329958e-05, + "loss": 0.4669, + "step": 8575 + }, + { + "epoch": 2.400223901483347, + "grad_norm": 0.24608526786812152, + "learning_rate": 1.0544969275452865e-05, + "loss": 0.4571, + "step": 8576 + }, + { + "epoch": 2.4005037783375314, + "grad_norm": 0.25021791058956777, + "learning_rate": 1.0535495717927697e-05, + "loss": 0.4499, + "step": 8577 + }, + { + "epoch": 2.4007836551917157, + "grad_norm": 0.2394709493644098, + "learning_rate": 1.0526025916656119e-05, + "loss": 0.4286, + "step": 8578 + }, + { + "epoch": 2.4010635320459, + "grad_norm": 0.24850933716896373, + "learning_rate": 1.0516559872539472e-05, + "loss": 0.4709, + "step": 8579 + }, + { + "epoch": 2.401343408900084, + "grad_norm": 0.2473285040876372, + "learning_rate": 1.050709758647877e-05, + "loss": 0.4409, + "step": 8580 + }, + { + "epoch": 2.401623285754268, + "grad_norm": 0.24585197917796422, + "learning_rate": 1.049763905937463e-05, + "loss": 0.4441, + "step": 8581 + }, + { + "epoch": 2.4019031626084524, + "grad_norm": 0.2561980962896979, + "learning_rate": 1.0488184292127312e-05, + "loss": 0.4559, + "step": 8582 + }, + { + "epoch": 2.4021830394626367, + "grad_norm": 0.2628419010408199, + "learning_rate": 1.0478733285636732e-05, + "loss": 0.459, + "step": 8583 + }, + { + "epoch": 2.4024629163168205, + "grad_norm": 0.26645929706731025, + "learning_rate": 1.0469286040802434e-05, + "loss": 0.4646, + "step": 8584 + }, + { + "epoch": 2.402742793171005, + "grad_norm": 0.2498350546475695, + "learning_rate": 1.045984255852362e-05, + "loss": 0.4455, + "step": 8585 + }, + { + "epoch": 2.403022670025189, + "grad_norm": 0.24867557897568118, + "learning_rate": 1.0450402839699125e-05, + "loss": 0.4613, + "step": 8586 + }, + { + "epoch": 2.403302546879373, + "grad_norm": 0.2589311019287912, + "learning_rate": 1.044096688522742e-05, + "loss": 0.4583, + "step": 8587 + }, + { + "epoch": 2.4035824237335572, + "grad_norm": 0.2594301193961265, + "learning_rate": 1.0431534696006629e-05, + "loss": 0.4581, + "step": 8588 + }, + { + "epoch": 2.4038623005877415, + "grad_norm": 0.2444535036771609, + "learning_rate": 1.0422106272934518e-05, + "loss": 0.4344, + "step": 8589 + }, + { + "epoch": 2.4041421774419254, + "grad_norm": 0.24864364982483306, + "learning_rate": 1.0412681616908482e-05, + "loss": 0.4372, + "step": 8590 + }, + { + "epoch": 2.4044220542961097, + "grad_norm": 0.25733610925231926, + "learning_rate": 1.0403260728825564e-05, + "loss": 0.4641, + "step": 8591 + }, + { + "epoch": 2.404701931150294, + "grad_norm": 0.24866074269218585, + "learning_rate": 1.0393843609582454e-05, + "loss": 0.4525, + "step": 8592 + }, + { + "epoch": 2.404981808004478, + "grad_norm": 0.25090480950398686, + "learning_rate": 1.0384430260075483e-05, + "loss": 0.4426, + "step": 8593 + }, + { + "epoch": 2.405261684858662, + "grad_norm": 0.2527307782120318, + "learning_rate": 1.037502068120061e-05, + "loss": 0.4462, + "step": 8594 + }, + { + "epoch": 2.4055415617128464, + "grad_norm": 0.26634405280805074, + "learning_rate": 1.0365614873853462e-05, + "loss": 0.4614, + "step": 8595 + }, + { + "epoch": 2.4058214385670307, + "grad_norm": 0.2571775265876603, + "learning_rate": 1.0356212838929263e-05, + "loss": 0.4472, + "step": 8596 + }, + { + "epoch": 2.4061013154212145, + "grad_norm": 0.27237001904567404, + "learning_rate": 1.03468145773229e-05, + "loss": 0.4605, + "step": 8597 + }, + { + "epoch": 2.406381192275399, + "grad_norm": 0.2484780604021942, + "learning_rate": 1.0337420089928934e-05, + "loss": 0.4328, + "step": 8598 + }, + { + "epoch": 2.406661069129583, + "grad_norm": 0.2584202081029937, + "learning_rate": 1.0328029377641524e-05, + "loss": 0.4377, + "step": 8599 + }, + { + "epoch": 2.406940945983767, + "grad_norm": 0.2535064978309816, + "learning_rate": 1.031864244135448e-05, + "loss": 0.4631, + "step": 8600 + }, + { + "epoch": 2.4072208228379512, + "grad_norm": 0.26025377701465413, + "learning_rate": 1.0309259281961264e-05, + "loss": 0.4417, + "step": 8601 + }, + { + "epoch": 2.4075006996921355, + "grad_norm": 0.2616665535860985, + "learning_rate": 1.0299879900354964e-05, + "loss": 0.469, + "step": 8602 + }, + { + "epoch": 2.40778057654632, + "grad_norm": 0.2413895721759028, + "learning_rate": 1.0290504297428316e-05, + "loss": 0.4332, + "step": 8603 + }, + { + "epoch": 2.4080604534005037, + "grad_norm": 0.26131763707584116, + "learning_rate": 1.0281132474073697e-05, + "loss": 0.4587, + "step": 8604 + }, + { + "epoch": 2.408340330254688, + "grad_norm": 0.2553180213830489, + "learning_rate": 1.0271764431183117e-05, + "loss": 0.4513, + "step": 8605 + }, + { + "epoch": 2.4086202071088723, + "grad_norm": 0.2531893195043381, + "learning_rate": 1.0262400169648235e-05, + "loss": 0.4548, + "step": 8606 + }, + { + "epoch": 2.408900083963056, + "grad_norm": 0.2637153486653331, + "learning_rate": 1.025303969036035e-05, + "loss": 0.4609, + "step": 8607 + }, + { + "epoch": 2.4091799608172404, + "grad_norm": 0.24966077178018864, + "learning_rate": 1.0243682994210396e-05, + "loss": 0.4582, + "step": 8608 + }, + { + "epoch": 2.4094598376714247, + "grad_norm": 0.2477961428780549, + "learning_rate": 1.0234330082088944e-05, + "loss": 0.4368, + "step": 8609 + }, + { + "epoch": 2.4097397145256085, + "grad_norm": 0.26031003578224476, + "learning_rate": 1.0224980954886215e-05, + "loss": 0.4524, + "step": 8610 + }, + { + "epoch": 2.410019591379793, + "grad_norm": 0.24885377610396178, + "learning_rate": 1.0215635613492059e-05, + "loss": 0.4661, + "step": 8611 + }, + { + "epoch": 2.410299468233977, + "grad_norm": 0.2544164546472397, + "learning_rate": 1.0206294058795973e-05, + "loss": 0.4611, + "step": 8612 + }, + { + "epoch": 2.410579345088161, + "grad_norm": 0.2605012999203762, + "learning_rate": 1.019695629168711e-05, + "loss": 0.4638, + "step": 8613 + }, + { + "epoch": 2.4108592219423453, + "grad_norm": 0.2485367521380268, + "learning_rate": 1.0187622313054196e-05, + "loss": 0.4499, + "step": 8614 + }, + { + "epoch": 2.4111390987965295, + "grad_norm": 0.25561012536182637, + "learning_rate": 1.0178292123785688e-05, + "loss": 0.454, + "step": 8615 + }, + { + "epoch": 2.411418975650714, + "grad_norm": 0.25548857242052186, + "learning_rate": 1.0168965724769624e-05, + "loss": 0.4544, + "step": 8616 + }, + { + "epoch": 2.4116988525048977, + "grad_norm": 0.25797691362024977, + "learning_rate": 1.0159643116893697e-05, + "loss": 0.4391, + "step": 8617 + }, + { + "epoch": 2.411978729359082, + "grad_norm": 0.25412226742638816, + "learning_rate": 1.015032430104524e-05, + "loss": 0.4566, + "step": 8618 + }, + { + "epoch": 2.4122586062132663, + "grad_norm": 0.25735285136315833, + "learning_rate": 1.0141009278111218e-05, + "loss": 0.4634, + "step": 8619 + }, + { + "epoch": 2.4125384830674506, + "grad_norm": 0.25326124853396187, + "learning_rate": 1.0131698048978245e-05, + "loss": 0.4612, + "step": 8620 + }, + { + "epoch": 2.4128183599216344, + "grad_norm": 0.251539458502865, + "learning_rate": 1.0122390614532567e-05, + "loss": 0.4508, + "step": 8621 + }, + { + "epoch": 2.4130982367758187, + "grad_norm": 0.24558310542459344, + "learning_rate": 1.0113086975660069e-05, + "loss": 0.4455, + "step": 8622 + }, + { + "epoch": 2.413378113630003, + "grad_norm": 0.24438323246842192, + "learning_rate": 1.0103787133246278e-05, + "loss": 0.4411, + "step": 8623 + }, + { + "epoch": 2.413657990484187, + "grad_norm": 0.2652900454859552, + "learning_rate": 1.009449108817636e-05, + "loss": 0.4565, + "step": 8624 + }, + { + "epoch": 2.413937867338371, + "grad_norm": 0.24680876324047746, + "learning_rate": 1.0085198841335113e-05, + "loss": 0.4718, + "step": 8625 + }, + { + "epoch": 2.4142177441925554, + "grad_norm": 0.25450331110947566, + "learning_rate": 1.0075910393606985e-05, + "loss": 0.4459, + "step": 8626 + }, + { + "epoch": 2.4144976210467393, + "grad_norm": 0.24516058193926904, + "learning_rate": 1.0066625745876057e-05, + "loss": 0.4678, + "step": 8627 + }, + { + "epoch": 2.4147774979009236, + "grad_norm": 0.256412199736452, + "learning_rate": 1.0057344899026033e-05, + "loss": 0.4535, + "step": 8628 + }, + { + "epoch": 2.415057374755108, + "grad_norm": 0.24002494060523508, + "learning_rate": 1.0048067853940285e-05, + "loss": 0.4331, + "step": 8629 + }, + { + "epoch": 2.4153372516092917, + "grad_norm": 0.247358564282145, + "learning_rate": 1.00387946115018e-05, + "loss": 0.4527, + "step": 8630 + }, + { + "epoch": 2.415617128463476, + "grad_norm": 0.2591784977096219, + "learning_rate": 1.0029525172593207e-05, + "loss": 0.4442, + "step": 8631 + }, + { + "epoch": 2.4158970053176603, + "grad_norm": 0.24240558272495144, + "learning_rate": 1.0020259538096783e-05, + "loss": 0.4423, + "step": 8632 + }, + { + "epoch": 2.4161768821718446, + "grad_norm": 0.25622432374280196, + "learning_rate": 1.0010997708894431e-05, + "loss": 0.454, + "step": 8633 + }, + { + "epoch": 2.4164567590260284, + "grad_norm": 0.2632338277656419, + "learning_rate": 1.0001739685867707e-05, + "loss": 0.4441, + "step": 8634 + }, + { + "epoch": 2.4167366358802127, + "grad_norm": 0.25270250977478576, + "learning_rate": 9.992485469897784e-06, + "loss": 0.4423, + "step": 8635 + }, + { + "epoch": 2.417016512734397, + "grad_norm": 0.24681380974500228, + "learning_rate": 9.983235061865488e-06, + "loss": 0.47, + "step": 8636 + }, + { + "epoch": 2.417296389588581, + "grad_norm": 0.2686663215069251, + "learning_rate": 9.973988462651273e-06, + "loss": 0.4626, + "step": 8637 + }, + { + "epoch": 2.417576266442765, + "grad_norm": 0.26299024284108885, + "learning_rate": 9.96474567313524e-06, + "loss": 0.4431, + "step": 8638 + }, + { + "epoch": 2.4178561432969494, + "grad_norm": 0.26772873928657787, + "learning_rate": 9.955506694197125e-06, + "loss": 0.4559, + "step": 8639 + }, + { + "epoch": 2.4181360201511337, + "grad_norm": 0.2477671241569365, + "learning_rate": 9.9462715267163e-06, + "loss": 0.4361, + "step": 8640 + }, + { + "epoch": 2.4184158970053176, + "grad_norm": 0.24284858624233002, + "learning_rate": 9.937040171571766e-06, + "loss": 0.4486, + "step": 8641 + }, + { + "epoch": 2.418695773859502, + "grad_norm": 0.25814165198388794, + "learning_rate": 9.927812629642175e-06, + "loss": 0.4504, + "step": 8642 + }, + { + "epoch": 2.418975650713686, + "grad_norm": 0.23605886613592167, + "learning_rate": 9.918588901805803e-06, + "loss": 0.4407, + "step": 8643 + }, + { + "epoch": 2.41925552756787, + "grad_norm": 0.2521803210979653, + "learning_rate": 9.909368988940576e-06, + "loss": 0.4465, + "step": 8644 + }, + { + "epoch": 2.4195354044220543, + "grad_norm": 0.2530730669884669, + "learning_rate": 9.900152891924048e-06, + "loss": 0.4451, + "step": 8645 + }, + { + "epoch": 2.4198152812762386, + "grad_norm": 0.2580819773635043, + "learning_rate": 9.890940611633414e-06, + "loss": 0.4462, + "step": 8646 + }, + { + "epoch": 2.4200951581304224, + "grad_norm": 0.24238307584133761, + "learning_rate": 9.881732148945506e-06, + "loss": 0.4525, + "step": 8647 + }, + { + "epoch": 2.4203750349846067, + "grad_norm": 0.25469538168173506, + "learning_rate": 9.872527504736779e-06, + "loss": 0.4343, + "step": 8648 + }, + { + "epoch": 2.420654911838791, + "grad_norm": 0.25592123097193537, + "learning_rate": 9.86332667988335e-06, + "loss": 0.4282, + "step": 8649 + }, + { + "epoch": 2.420934788692975, + "grad_norm": 0.2541152179969242, + "learning_rate": 9.854129675260954e-06, + "loss": 0.4392, + "step": 8650 + }, + { + "epoch": 2.421214665547159, + "grad_norm": 0.2519173852407417, + "learning_rate": 9.84493649174496e-06, + "loss": 0.4615, + "step": 8651 + }, + { + "epoch": 2.4214945424013434, + "grad_norm": 0.2509635352782236, + "learning_rate": 9.835747130210394e-06, + "loss": 0.4422, + "step": 8652 + }, + { + "epoch": 2.4217744192555277, + "grad_norm": 0.25094145497835696, + "learning_rate": 9.826561591531891e-06, + "loss": 0.4614, + "step": 8653 + }, + { + "epoch": 2.4220542961097116, + "grad_norm": 0.25343795423147647, + "learning_rate": 9.81737987658375e-06, + "loss": 0.4655, + "step": 8654 + }, + { + "epoch": 2.422334172963896, + "grad_norm": 0.26416564626482847, + "learning_rate": 9.808201986239873e-06, + "loss": 0.4617, + "step": 8655 + }, + { + "epoch": 2.42261404981808, + "grad_norm": 0.25689984577385283, + "learning_rate": 9.799027921373832e-06, + "loss": 0.4528, + "step": 8656 + }, + { + "epoch": 2.4228939266722644, + "grad_norm": 0.29975297082457075, + "learning_rate": 9.789857682858816e-06, + "loss": 0.4642, + "step": 8657 + }, + { + "epoch": 2.4231738035264483, + "grad_norm": 0.2606575568398438, + "learning_rate": 9.780691271567655e-06, + "loss": 0.4683, + "step": 8658 + }, + { + "epoch": 2.4234536803806326, + "grad_norm": 0.25885281887690115, + "learning_rate": 9.771528688372805e-06, + "loss": 0.4433, + "step": 8659 + }, + { + "epoch": 2.423733557234817, + "grad_norm": 0.2516278579206306, + "learning_rate": 9.762369934146371e-06, + "loss": 0.4694, + "step": 8660 + }, + { + "epoch": 2.4240134340890007, + "grad_norm": 0.24606825146254277, + "learning_rate": 9.753215009760092e-06, + "loss": 0.4527, + "step": 8661 + }, + { + "epoch": 2.424293310943185, + "grad_norm": 0.24880312290263892, + "learning_rate": 9.744063916085334e-06, + "loss": 0.4358, + "step": 8662 + }, + { + "epoch": 2.4245731877973693, + "grad_norm": 0.2521487586611111, + "learning_rate": 9.734916653993103e-06, + "loss": 0.4598, + "step": 8663 + }, + { + "epoch": 2.424853064651553, + "grad_norm": 0.24923895019671002, + "learning_rate": 9.725773224354046e-06, + "loss": 0.4237, + "step": 8664 + }, + { + "epoch": 2.4251329415057374, + "grad_norm": 0.25683108938031485, + "learning_rate": 9.716633628038435e-06, + "loss": 0.4481, + "step": 8665 + }, + { + "epoch": 2.4254128183599217, + "grad_norm": 0.2594524193860435, + "learning_rate": 9.707497865916181e-06, + "loss": 0.4476, + "step": 8666 + }, + { + "epoch": 2.4256926952141056, + "grad_norm": 0.24778126512126722, + "learning_rate": 9.698365938856834e-06, + "loss": 0.4378, + "step": 8667 + }, + { + "epoch": 2.42597257206829, + "grad_norm": 0.26174957377692537, + "learning_rate": 9.689237847729576e-06, + "loss": 0.4322, + "step": 8668 + }, + { + "epoch": 2.426252448922474, + "grad_norm": 0.2464064176147182, + "learning_rate": 9.680113593403222e-06, + "loss": 0.4684, + "step": 8669 + }, + { + "epoch": 2.4265323257766584, + "grad_norm": 0.25190262983201533, + "learning_rate": 9.670993176746223e-06, + "loss": 0.4698, + "step": 8670 + }, + { + "epoch": 2.4268122026308423, + "grad_norm": 0.2457078652813174, + "learning_rate": 9.661876598626669e-06, + "loss": 0.4503, + "step": 8671 + }, + { + "epoch": 2.4270920794850266, + "grad_norm": 0.24796744225271272, + "learning_rate": 9.652763859912279e-06, + "loss": 0.4283, + "step": 8672 + }, + { + "epoch": 2.427371956339211, + "grad_norm": 0.25001875025402515, + "learning_rate": 9.643654961470405e-06, + "loss": 0.4471, + "step": 8673 + }, + { + "epoch": 2.4276518331933947, + "grad_norm": 0.26094082727578394, + "learning_rate": 9.634549904168038e-06, + "loss": 0.4556, + "step": 8674 + }, + { + "epoch": 2.427931710047579, + "grad_norm": 0.2527766814496001, + "learning_rate": 9.625448688871808e-06, + "loss": 0.4396, + "step": 8675 + }, + { + "epoch": 2.4282115869017633, + "grad_norm": 0.2533891833120829, + "learning_rate": 9.61635131644797e-06, + "loss": 0.4496, + "step": 8676 + }, + { + "epoch": 2.4284914637559476, + "grad_norm": 0.24953538158933503, + "learning_rate": 9.60725778776242e-06, + "loss": 0.4521, + "step": 8677 + }, + { + "epoch": 2.4287713406101314, + "grad_norm": 0.25194688014968925, + "learning_rate": 9.598168103680676e-06, + "loss": 0.4422, + "step": 8678 + }, + { + "epoch": 2.4290512174643157, + "grad_norm": 0.2593759850548933, + "learning_rate": 9.58908226506791e-06, + "loss": 0.47, + "step": 8679 + }, + { + "epoch": 2.4293310943185, + "grad_norm": 0.2516560451199984, + "learning_rate": 9.580000272788914e-06, + "loss": 0.4493, + "step": 8680 + }, + { + "epoch": 2.429610971172684, + "grad_norm": 0.25406709751230233, + "learning_rate": 9.570922127708115e-06, + "loss": 0.4541, + "step": 8681 + }, + { + "epoch": 2.429890848026868, + "grad_norm": 0.2570269532750313, + "learning_rate": 9.56184783068958e-06, + "loss": 0.4477, + "step": 8682 + }, + { + "epoch": 2.4301707248810525, + "grad_norm": 0.24971566492724123, + "learning_rate": 9.552777382597e-06, + "loss": 0.4532, + "step": 8683 + }, + { + "epoch": 2.4304506017352363, + "grad_norm": 0.25302483303820306, + "learning_rate": 9.543710784293709e-06, + "loss": 0.4446, + "step": 8684 + }, + { + "epoch": 2.4307304785894206, + "grad_norm": 0.23827549478306503, + "learning_rate": 9.534648036642662e-06, + "loss": 0.445, + "step": 8685 + }, + { + "epoch": 2.431010355443605, + "grad_norm": 0.2533356794578736, + "learning_rate": 9.525589140506475e-06, + "loss": 0.463, + "step": 8686 + }, + { + "epoch": 2.4312902322977887, + "grad_norm": 0.2534237929116753, + "learning_rate": 9.516534096747365e-06, + "loss": 0.46, + "step": 8687 + }, + { + "epoch": 2.431570109151973, + "grad_norm": 0.26224123572476243, + "learning_rate": 9.507482906227193e-06, + "loss": 0.4528, + "step": 8688 + }, + { + "epoch": 2.4318499860061573, + "grad_norm": 0.25835046245255744, + "learning_rate": 9.498435569807474e-06, + "loss": 0.4575, + "step": 8689 + }, + { + "epoch": 2.4321298628603416, + "grad_norm": 0.2528107537029733, + "learning_rate": 9.48939208834932e-06, + "loss": 0.4557, + "step": 8690 + }, + { + "epoch": 2.4324097397145255, + "grad_norm": 0.24686083665992026, + "learning_rate": 9.480352462713505e-06, + "loss": 0.4417, + "step": 8691 + }, + { + "epoch": 2.4326896165687097, + "grad_norm": 0.244128112584507, + "learning_rate": 9.471316693760418e-06, + "loss": 0.4298, + "step": 8692 + }, + { + "epoch": 2.432969493422894, + "grad_norm": 0.2642604604375863, + "learning_rate": 9.462284782350095e-06, + "loss": 0.4629, + "step": 8693 + }, + { + "epoch": 2.4332493702770783, + "grad_norm": 0.23697579728429405, + "learning_rate": 9.453256729342198e-06, + "loss": 0.4507, + "step": 8694 + }, + { + "epoch": 2.433529247131262, + "grad_norm": 0.2598769680279463, + "learning_rate": 9.44423253559602e-06, + "loss": 0.443, + "step": 8695 + }, + { + "epoch": 2.4338091239854465, + "grad_norm": 0.2519461630230364, + "learning_rate": 9.435212201970488e-06, + "loss": 0.4316, + "step": 8696 + }, + { + "epoch": 2.4340890008396308, + "grad_norm": 0.2570446768156951, + "learning_rate": 9.426195729324161e-06, + "loss": 0.4467, + "step": 8697 + }, + { + "epoch": 2.4343688776938146, + "grad_norm": 0.2511001572796495, + "learning_rate": 9.417183118515238e-06, + "loss": 0.4277, + "step": 8698 + }, + { + "epoch": 2.434648754547999, + "grad_norm": 0.2534741851012963, + "learning_rate": 9.408174370401546e-06, + "loss": 0.4632, + "step": 8699 + }, + { + "epoch": 2.434928631402183, + "grad_norm": 0.24495205335664716, + "learning_rate": 9.399169485840531e-06, + "loss": 0.4512, + "step": 8700 + }, + { + "epoch": 2.435208508256367, + "grad_norm": 0.2588232626877536, + "learning_rate": 9.390168465689291e-06, + "loss": 0.4572, + "step": 8701 + }, + { + "epoch": 2.4354883851105513, + "grad_norm": 0.2595244918965313, + "learning_rate": 9.381171310804549e-06, + "loss": 0.4507, + "step": 8702 + }, + { + "epoch": 2.4357682619647356, + "grad_norm": 0.2509568386919133, + "learning_rate": 9.372178022042655e-06, + "loss": 0.4537, + "step": 8703 + }, + { + "epoch": 2.4360481388189195, + "grad_norm": 0.2574851073663655, + "learning_rate": 9.363188600259592e-06, + "loss": 0.4629, + "step": 8704 + }, + { + "epoch": 2.4363280156731038, + "grad_norm": 0.24143585573403095, + "learning_rate": 9.354203046310989e-06, + "loss": 0.4416, + "step": 8705 + }, + { + "epoch": 2.436607892527288, + "grad_norm": 0.2567156832788759, + "learning_rate": 9.345221361052092e-06, + "loss": 0.4557, + "step": 8706 + }, + { + "epoch": 2.4368877693814723, + "grad_norm": 0.2684706355733321, + "learning_rate": 9.33624354533778e-06, + "loss": 0.4394, + "step": 8707 + }, + { + "epoch": 2.437167646235656, + "grad_norm": 0.2571773773922458, + "learning_rate": 9.327269600022564e-06, + "loss": 0.4574, + "step": 8708 + }, + { + "epoch": 2.4374475230898405, + "grad_norm": 0.25389521166220824, + "learning_rate": 9.3182995259606e-06, + "loss": 0.4646, + "step": 8709 + }, + { + "epoch": 2.4377273999440248, + "grad_norm": 0.2573298915143599, + "learning_rate": 9.309333324005653e-06, + "loss": 0.4627, + "step": 8710 + }, + { + "epoch": 2.4380072767982086, + "grad_norm": 0.25888647008282245, + "learning_rate": 9.300370995011137e-06, + "loss": 0.4628, + "step": 8711 + }, + { + "epoch": 2.438287153652393, + "grad_norm": 0.24873750669651037, + "learning_rate": 9.291412539830091e-06, + "loss": 0.4358, + "step": 8712 + }, + { + "epoch": 2.438567030506577, + "grad_norm": 0.2460586067257239, + "learning_rate": 9.282457959315183e-06, + "loss": 0.4391, + "step": 8713 + }, + { + "epoch": 2.4388469073607615, + "grad_norm": 0.24809386972930658, + "learning_rate": 9.27350725431872e-06, + "loss": 0.4659, + "step": 8714 + }, + { + "epoch": 2.4391267842149453, + "grad_norm": 0.24405633863970483, + "learning_rate": 9.264560425692632e-06, + "loss": 0.4277, + "step": 8715 + }, + { + "epoch": 2.4394066610691296, + "grad_norm": 0.2438372099492113, + "learning_rate": 9.25561747428848e-06, + "loss": 0.4404, + "step": 8716 + }, + { + "epoch": 2.439686537923314, + "grad_norm": 0.25901714955768335, + "learning_rate": 9.246678400957464e-06, + "loss": 0.4703, + "step": 8717 + }, + { + "epoch": 2.4399664147774978, + "grad_norm": 0.26139742145792844, + "learning_rate": 9.23774320655041e-06, + "loss": 0.4708, + "step": 8718 + }, + { + "epoch": 2.440246291631682, + "grad_norm": 0.25447992886901827, + "learning_rate": 9.228811891917771e-06, + "loss": 0.4432, + "step": 8719 + }, + { + "epoch": 2.4405261684858663, + "grad_norm": 0.24308488526054634, + "learning_rate": 9.219884457909634e-06, + "loss": 0.45, + "step": 8720 + }, + { + "epoch": 2.44080604534005, + "grad_norm": 0.25362680347028754, + "learning_rate": 9.210960905375726e-06, + "loss": 0.4535, + "step": 8721 + }, + { + "epoch": 2.4410859221942345, + "grad_norm": 0.2549043073450669, + "learning_rate": 9.202041235165387e-06, + "loss": 0.4497, + "step": 8722 + }, + { + "epoch": 2.4413657990484188, + "grad_norm": 0.24929151091884463, + "learning_rate": 9.193125448127599e-06, + "loss": 0.4331, + "step": 8723 + }, + { + "epoch": 2.4416456759026026, + "grad_norm": 0.2555091617525663, + "learning_rate": 9.18421354511097e-06, + "loss": 0.4637, + "step": 8724 + }, + { + "epoch": 2.441925552756787, + "grad_norm": 0.2459175146329819, + "learning_rate": 9.175305526963746e-06, + "loss": 0.4709, + "step": 8725 + }, + { + "epoch": 2.442205429610971, + "grad_norm": 0.257298654064916, + "learning_rate": 9.166401394533786e-06, + "loss": 0.4504, + "step": 8726 + }, + { + "epoch": 2.4424853064651555, + "grad_norm": 0.25975736680860273, + "learning_rate": 9.157501148668602e-06, + "loss": 0.4594, + "step": 8727 + }, + { + "epoch": 2.4427651833193393, + "grad_norm": 0.26699151334811305, + "learning_rate": 9.148604790215316e-06, + "loss": 0.4449, + "step": 8728 + }, + { + "epoch": 2.4430450601735236, + "grad_norm": 0.26310916710123866, + "learning_rate": 9.139712320020694e-06, + "loss": 0.4613, + "step": 8729 + }, + { + "epoch": 2.443324937027708, + "grad_norm": 0.24990486843781354, + "learning_rate": 9.130823738931122e-06, + "loss": 0.4572, + "step": 8730 + }, + { + "epoch": 2.443604813881892, + "grad_norm": 0.2454856718093436, + "learning_rate": 9.121939047792621e-06, + "loss": 0.4421, + "step": 8731 + }, + { + "epoch": 2.443884690736076, + "grad_norm": 0.258350340714871, + "learning_rate": 9.113058247450846e-06, + "loss": 0.4629, + "step": 8732 + }, + { + "epoch": 2.4441645675902604, + "grad_norm": 0.25948440717570664, + "learning_rate": 9.104181338751072e-06, + "loss": 0.4498, + "step": 8733 + }, + { + "epoch": 2.4444444444444446, + "grad_norm": 0.2462499351337228, + "learning_rate": 9.095308322538204e-06, + "loss": 0.4396, + "step": 8734 + }, + { + "epoch": 2.4447243212986285, + "grad_norm": 0.2569682798802448, + "learning_rate": 9.086439199656787e-06, + "loss": 0.445, + "step": 8735 + }, + { + "epoch": 2.445004198152813, + "grad_norm": 0.246647949016701, + "learning_rate": 9.077573970950991e-06, + "loss": 0.4683, + "step": 8736 + }, + { + "epoch": 2.445284075006997, + "grad_norm": 0.24789787281425485, + "learning_rate": 9.068712637264604e-06, + "loss": 0.4457, + "step": 8737 + }, + { + "epoch": 2.445563951861181, + "grad_norm": 0.2681755619265382, + "learning_rate": 9.059855199441065e-06, + "loss": 0.4724, + "step": 8738 + }, + { + "epoch": 2.445843828715365, + "grad_norm": 0.24597980816150247, + "learning_rate": 9.051001658323415e-06, + "loss": 0.4506, + "step": 8739 + }, + { + "epoch": 2.4461237055695495, + "grad_norm": 0.24518445891027368, + "learning_rate": 9.042152014754357e-06, + "loss": 0.4432, + "step": 8740 + }, + { + "epoch": 2.4464035824237333, + "grad_norm": 0.2608204879290255, + "learning_rate": 9.033306269576186e-06, + "loss": 0.4687, + "step": 8741 + }, + { + "epoch": 2.4466834592779176, + "grad_norm": 0.2512059862644084, + "learning_rate": 9.024464423630863e-06, + "loss": 0.4539, + "step": 8742 + }, + { + "epoch": 2.446963336132102, + "grad_norm": 0.2598669445578634, + "learning_rate": 9.015626477759942e-06, + "loss": 0.4392, + "step": 8743 + }, + { + "epoch": 2.447243212986286, + "grad_norm": 0.2611220893656895, + "learning_rate": 9.00679243280464e-06, + "loss": 0.443, + "step": 8744 + }, + { + "epoch": 2.44752308984047, + "grad_norm": 0.25262976592223696, + "learning_rate": 8.997962289605778e-06, + "loss": 0.4345, + "step": 8745 + }, + { + "epoch": 2.4478029666946544, + "grad_norm": 0.2571089164905739, + "learning_rate": 8.989136049003816e-06, + "loss": 0.4532, + "step": 8746 + }, + { + "epoch": 2.4480828435488386, + "grad_norm": 0.24915094209957508, + "learning_rate": 8.980313711838833e-06, + "loss": 0.469, + "step": 8747 + }, + { + "epoch": 2.4483627204030225, + "grad_norm": 0.2449521949390417, + "learning_rate": 8.971495278950559e-06, + "loss": 0.4449, + "step": 8748 + }, + { + "epoch": 2.448642597257207, + "grad_norm": 0.25833922557453676, + "learning_rate": 8.96268075117832e-06, + "loss": 0.4414, + "step": 8749 + }, + { + "epoch": 2.448922474111391, + "grad_norm": 0.2561921132027639, + "learning_rate": 8.953870129361103e-06, + "loss": 0.4751, + "step": 8750 + }, + { + "epoch": 2.4492023509655754, + "grad_norm": 0.25243468817494663, + "learning_rate": 8.945063414337502e-06, + "loss": 0.4535, + "step": 8751 + }, + { + "epoch": 2.449482227819759, + "grad_norm": 0.24407529942373457, + "learning_rate": 8.936260606945746e-06, + "loss": 0.4455, + "step": 8752 + }, + { + "epoch": 2.4497621046739435, + "grad_norm": 0.254439271242042, + "learning_rate": 8.927461708023682e-06, + "loss": 0.45, + "step": 8753 + }, + { + "epoch": 2.450041981528128, + "grad_norm": 0.2499518236860164, + "learning_rate": 8.918666718408807e-06, + "loss": 0.4276, + "step": 8754 + }, + { + "epoch": 2.4503218583823116, + "grad_norm": 0.24972048345014827, + "learning_rate": 8.90987563893823e-06, + "loss": 0.4638, + "step": 8755 + }, + { + "epoch": 2.450601735236496, + "grad_norm": 0.2592715456981266, + "learning_rate": 8.901088470448688e-06, + "loss": 0.4634, + "step": 8756 + }, + { + "epoch": 2.4508816120906802, + "grad_norm": 0.26717180906496124, + "learning_rate": 8.892305213776537e-06, + "loss": 0.4551, + "step": 8757 + }, + { + "epoch": 2.451161488944864, + "grad_norm": 0.2727554500837517, + "learning_rate": 8.883525869757808e-06, + "loss": 0.4698, + "step": 8758 + }, + { + "epoch": 2.4514413657990484, + "grad_norm": 0.25320575306537013, + "learning_rate": 8.874750439228086e-06, + "loss": 0.4599, + "step": 8759 + }, + { + "epoch": 2.4517212426532327, + "grad_norm": 0.26015165359909925, + "learning_rate": 8.865978923022639e-06, + "loss": 0.4373, + "step": 8760 + }, + { + "epoch": 2.4520011195074165, + "grad_norm": 0.24973055709950392, + "learning_rate": 8.857211321976345e-06, + "loss": 0.4371, + "step": 8761 + }, + { + "epoch": 2.452280996361601, + "grad_norm": 0.2567892230943019, + "learning_rate": 8.8484476369237e-06, + "loss": 0.4508, + "step": 8762 + }, + { + "epoch": 2.452560873215785, + "grad_norm": 0.2651888022491084, + "learning_rate": 8.839687868698843e-06, + "loss": 0.4512, + "step": 8763 + }, + { + "epoch": 2.4528407500699694, + "grad_norm": 0.24439360381708025, + "learning_rate": 8.830932018135534e-06, + "loss": 0.4353, + "step": 8764 + }, + { + "epoch": 2.4531206269241532, + "grad_norm": 0.26569808054625527, + "learning_rate": 8.82218008606716e-06, + "loss": 0.4664, + "step": 8765 + }, + { + "epoch": 2.4534005037783375, + "grad_norm": 0.2604596365649492, + "learning_rate": 8.813432073326733e-06, + "loss": 0.4555, + "step": 8766 + }, + { + "epoch": 2.453680380632522, + "grad_norm": 0.25717993946272316, + "learning_rate": 8.804687980746895e-06, + "loss": 0.4516, + "step": 8767 + }, + { + "epoch": 2.453960257486706, + "grad_norm": 0.26238604410316096, + "learning_rate": 8.795947809159916e-06, + "loss": 0.4604, + "step": 8768 + }, + { + "epoch": 2.45424013434089, + "grad_norm": 0.26518424198451557, + "learning_rate": 8.787211559397685e-06, + "loss": 0.4413, + "step": 8769 + }, + { + "epoch": 2.4545200111950742, + "grad_norm": 0.2633362977252646, + "learning_rate": 8.778479232291725e-06, + "loss": 0.4407, + "step": 8770 + }, + { + "epoch": 2.4547998880492585, + "grad_norm": 0.2596837173525926, + "learning_rate": 8.769750828673185e-06, + "loss": 0.4413, + "step": 8771 + }, + { + "epoch": 2.4550797649034424, + "grad_norm": 0.24849747158814303, + "learning_rate": 8.761026349372836e-06, + "loss": 0.4538, + "step": 8772 + }, + { + "epoch": 2.4553596417576267, + "grad_norm": 0.26329439229113977, + "learning_rate": 8.752305795221083e-06, + "loss": 0.4447, + "step": 8773 + }, + { + "epoch": 2.455639518611811, + "grad_norm": 0.25238534245377653, + "learning_rate": 8.74358916704795e-06, + "loss": 0.435, + "step": 8774 + }, + { + "epoch": 2.455919395465995, + "grad_norm": 0.24898705016741032, + "learning_rate": 8.734876465683084e-06, + "loss": 0.4338, + "step": 8775 + }, + { + "epoch": 2.456199272320179, + "grad_norm": 0.24785772107154505, + "learning_rate": 8.726167691955795e-06, + "loss": 0.4729, + "step": 8776 + }, + { + "epoch": 2.4564791491743634, + "grad_norm": 0.239248536315287, + "learning_rate": 8.717462846694958e-06, + "loss": 0.4089, + "step": 8777 + }, + { + "epoch": 2.4567590260285472, + "grad_norm": 0.2429698015847293, + "learning_rate": 8.708761930729108e-06, + "loss": 0.4564, + "step": 8778 + }, + { + "epoch": 2.4570389028827315, + "grad_norm": 0.2546477530163254, + "learning_rate": 8.700064944886416e-06, + "loss": 0.4613, + "step": 8779 + }, + { + "epoch": 2.457318779736916, + "grad_norm": 0.25185237329456067, + "learning_rate": 8.691371889994653e-06, + "loss": 0.4555, + "step": 8780 + }, + { + "epoch": 2.4575986565911, + "grad_norm": 0.2600163293195142, + "learning_rate": 8.68268276688124e-06, + "loss": 0.4516, + "step": 8781 + }, + { + "epoch": 2.457878533445284, + "grad_norm": 0.2522208989063548, + "learning_rate": 8.673997576373205e-06, + "loss": 0.483, + "step": 8782 + }, + { + "epoch": 2.4581584102994682, + "grad_norm": 0.25219010364377603, + "learning_rate": 8.665316319297211e-06, + "loss": 0.4744, + "step": 8783 + }, + { + "epoch": 2.4584382871536525, + "grad_norm": 0.2460263169031998, + "learning_rate": 8.65663899647955e-06, + "loss": 0.4654, + "step": 8784 + }, + { + "epoch": 2.4587181640078364, + "grad_norm": 0.2475874976944104, + "learning_rate": 8.647965608746128e-06, + "loss": 0.4435, + "step": 8785 + }, + { + "epoch": 2.4589980408620207, + "grad_norm": 0.26112941218263963, + "learning_rate": 8.639296156922482e-06, + "loss": 0.447, + "step": 8786 + }, + { + "epoch": 2.459277917716205, + "grad_norm": 0.2381450065309774, + "learning_rate": 8.630630641833782e-06, + "loss": 0.4314, + "step": 8787 + }, + { + "epoch": 2.4595577945703893, + "grad_norm": 0.25006214540187555, + "learning_rate": 8.621969064304813e-06, + "loss": 0.4659, + "step": 8788 + }, + { + "epoch": 2.459837671424573, + "grad_norm": 0.24784708367652952, + "learning_rate": 8.613311425159986e-06, + "loss": 0.4465, + "step": 8789 + }, + { + "epoch": 2.4601175482787574, + "grad_norm": 0.23901346003043714, + "learning_rate": 8.604657725223348e-06, + "loss": 0.4502, + "step": 8790 + }, + { + "epoch": 2.4603974251329417, + "grad_norm": 0.2563017090038037, + "learning_rate": 8.596007965318553e-06, + "loss": 0.4454, + "step": 8791 + }, + { + "epoch": 2.4606773019871255, + "grad_norm": 0.2534308207864483, + "learning_rate": 8.58736214626888e-06, + "loss": 0.4471, + "step": 8792 + }, + { + "epoch": 2.46095717884131, + "grad_norm": 0.24716832102749217, + "learning_rate": 8.578720268897278e-06, + "loss": 0.4682, + "step": 8793 + }, + { + "epoch": 2.461237055695494, + "grad_norm": 0.24589647734389258, + "learning_rate": 8.570082334026269e-06, + "loss": 0.4238, + "step": 8794 + }, + { + "epoch": 2.461516932549678, + "grad_norm": 0.2706887481862387, + "learning_rate": 8.561448342478006e-06, + "loss": 0.4727, + "step": 8795 + }, + { + "epoch": 2.4617968094038623, + "grad_norm": 0.25009743700279913, + "learning_rate": 8.552818295074283e-06, + "loss": 0.4653, + "step": 8796 + }, + { + "epoch": 2.4620766862580465, + "grad_norm": 0.2609228214806486, + "learning_rate": 8.544192192636514e-06, + "loss": 0.4579, + "step": 8797 + }, + { + "epoch": 2.4623565631122304, + "grad_norm": 0.2552038023734495, + "learning_rate": 8.535570035985735e-06, + "loss": 0.4499, + "step": 8798 + }, + { + "epoch": 2.4626364399664147, + "grad_norm": 0.2623068259763304, + "learning_rate": 8.526951825942609e-06, + "loss": 0.4722, + "step": 8799 + }, + { + "epoch": 2.462916316820599, + "grad_norm": 0.2345347434331712, + "learning_rate": 8.518337563327416e-06, + "loss": 0.4388, + "step": 8800 + }, + { + "epoch": 2.4631961936747833, + "grad_norm": 0.33316634183098015, + "learning_rate": 8.509727248960081e-06, + "loss": 0.4658, + "step": 8801 + }, + { + "epoch": 2.463476070528967, + "grad_norm": 0.2614180155249143, + "learning_rate": 8.501120883660124e-06, + "loss": 0.4473, + "step": 8802 + }, + { + "epoch": 2.4637559473831514, + "grad_norm": 0.2578616030305097, + "learning_rate": 8.492518468246713e-06, + "loss": 0.4632, + "step": 8803 + }, + { + "epoch": 2.4640358242373357, + "grad_norm": 0.25364120687260067, + "learning_rate": 8.483920003538626e-06, + "loss": 0.4464, + "step": 8804 + }, + { + "epoch": 2.4643157010915195, + "grad_norm": 0.2511528232877162, + "learning_rate": 8.47532549035427e-06, + "loss": 0.4442, + "step": 8805 + }, + { + "epoch": 2.464595577945704, + "grad_norm": 0.26722745107266116, + "learning_rate": 8.466734929511672e-06, + "loss": 0.4448, + "step": 8806 + }, + { + "epoch": 2.464875454799888, + "grad_norm": 0.24661331952045684, + "learning_rate": 8.458148321828495e-06, + "loss": 0.4411, + "step": 8807 + }, + { + "epoch": 2.4651553316540724, + "grad_norm": 0.24234777529026694, + "learning_rate": 8.449565668122012e-06, + "loss": 0.4449, + "step": 8808 + }, + { + "epoch": 2.4654352085082563, + "grad_norm": 0.25100135931081274, + "learning_rate": 8.440986969209114e-06, + "loss": 0.4568, + "step": 8809 + }, + { + "epoch": 2.4657150853624406, + "grad_norm": 0.2616029794356942, + "learning_rate": 8.43241222590635e-06, + "loss": 0.4488, + "step": 8810 + }, + { + "epoch": 2.465994962216625, + "grad_norm": 0.2516585904165877, + "learning_rate": 8.423841439029856e-06, + "loss": 0.4507, + "step": 8811 + }, + { + "epoch": 2.4662748390708087, + "grad_norm": 0.250785187040853, + "learning_rate": 8.415274609395419e-06, + "loss": 0.4453, + "step": 8812 + }, + { + "epoch": 2.466554715924993, + "grad_norm": 0.26842108922718766, + "learning_rate": 8.406711737818407e-06, + "loss": 0.4515, + "step": 8813 + }, + { + "epoch": 2.4668345927791773, + "grad_norm": 0.25759536406008543, + "learning_rate": 8.398152825113853e-06, + "loss": 0.4349, + "step": 8814 + }, + { + "epoch": 2.467114469633361, + "grad_norm": 0.24877343150791728, + "learning_rate": 8.389597872096395e-06, + "loss": 0.4577, + "step": 8815 + }, + { + "epoch": 2.4673943464875454, + "grad_norm": 0.24690516069503435, + "learning_rate": 8.381046879580306e-06, + "loss": 0.4472, + "step": 8816 + }, + { + "epoch": 2.4676742233417297, + "grad_norm": 0.2498296130339311, + "learning_rate": 8.372499848379473e-06, + "loss": 0.4423, + "step": 8817 + }, + { + "epoch": 2.467954100195914, + "grad_norm": 0.23970722909371703, + "learning_rate": 8.363956779307397e-06, + "loss": 0.4387, + "step": 8818 + }, + { + "epoch": 2.468233977050098, + "grad_norm": 0.25215230521910237, + "learning_rate": 8.355417673177224e-06, + "loss": 0.4764, + "step": 8819 + }, + { + "epoch": 2.468513853904282, + "grad_norm": 0.2627276042100404, + "learning_rate": 8.346882530801703e-06, + "loss": 0.4791, + "step": 8820 + }, + { + "epoch": 2.4687937307584664, + "grad_norm": 0.2634063154876318, + "learning_rate": 8.338351352993223e-06, + "loss": 0.4523, + "step": 8821 + }, + { + "epoch": 2.4690736076126503, + "grad_norm": 0.260379878904615, + "learning_rate": 8.329824140563774e-06, + "loss": 0.4537, + "step": 8822 + }, + { + "epoch": 2.4693534844668346, + "grad_norm": 0.2474845610484414, + "learning_rate": 8.32130089432499e-06, + "loss": 0.4466, + "step": 8823 + }, + { + "epoch": 2.469633361321019, + "grad_norm": 0.24575569815410245, + "learning_rate": 8.312781615088117e-06, + "loss": 0.4678, + "step": 8824 + }, + { + "epoch": 2.469913238175203, + "grad_norm": 0.2501725858848952, + "learning_rate": 8.304266303664022e-06, + "loss": 0.4318, + "step": 8825 + }, + { + "epoch": 2.470193115029387, + "grad_norm": 0.26158386112949367, + "learning_rate": 8.295754960863183e-06, + "loss": 0.4641, + "step": 8826 + }, + { + "epoch": 2.4704729918835713, + "grad_norm": 0.2512532674692248, + "learning_rate": 8.287247587495745e-06, + "loss": 0.4666, + "step": 8827 + }, + { + "epoch": 2.4707528687377556, + "grad_norm": 0.25758225105575505, + "learning_rate": 8.278744184371428e-06, + "loss": 0.4466, + "step": 8828 + }, + { + "epoch": 2.4710327455919394, + "grad_norm": 0.26442196017313674, + "learning_rate": 8.270244752299594e-06, + "loss": 0.4453, + "step": 8829 + }, + { + "epoch": 2.4713126224461237, + "grad_norm": 0.24282226088915165, + "learning_rate": 8.261749292089232e-06, + "loss": 0.4483, + "step": 8830 + }, + { + "epoch": 2.471592499300308, + "grad_norm": 0.2653192403347194, + "learning_rate": 8.253257804548926e-06, + "loss": 0.4606, + "step": 8831 + }, + { + "epoch": 2.471872376154492, + "grad_norm": 0.256316459436771, + "learning_rate": 8.244770290486902e-06, + "loss": 0.4357, + "step": 8832 + }, + { + "epoch": 2.472152253008676, + "grad_norm": 0.257732831978499, + "learning_rate": 8.23628675071102e-06, + "loss": 0.4578, + "step": 8833 + }, + { + "epoch": 2.4724321298628604, + "grad_norm": 0.25750455150037393, + "learning_rate": 8.227807186028735e-06, + "loss": 0.4754, + "step": 8834 + }, + { + "epoch": 2.4727120067170443, + "grad_norm": 0.2585203961533809, + "learning_rate": 8.219331597247149e-06, + "loss": 0.4446, + "step": 8835 + }, + { + "epoch": 2.4729918835712286, + "grad_norm": 0.2502622173079045, + "learning_rate": 8.210859985172965e-06, + "loss": 0.4393, + "step": 8836 + }, + { + "epoch": 2.473271760425413, + "grad_norm": 0.26794254234560555, + "learning_rate": 8.202392350612515e-06, + "loss": 0.4653, + "step": 8837 + }, + { + "epoch": 2.473551637279597, + "grad_norm": 0.253063023944246, + "learning_rate": 8.193928694371762e-06, + "loss": 0.4692, + "step": 8838 + }, + { + "epoch": 2.473831514133781, + "grad_norm": 0.2631953633339251, + "learning_rate": 8.18546901725627e-06, + "loss": 0.4533, + "step": 8839 + }, + { + "epoch": 2.4741113909879653, + "grad_norm": 0.25273374524819214, + "learning_rate": 8.177013320071242e-06, + "loss": 0.4284, + "step": 8840 + }, + { + "epoch": 2.4743912678421496, + "grad_norm": 0.26540190913298567, + "learning_rate": 8.1685616036215e-06, + "loss": 0.4653, + "step": 8841 + }, + { + "epoch": 2.4746711446963334, + "grad_norm": 0.25256252488623404, + "learning_rate": 8.160113868711472e-06, + "loss": 0.4344, + "step": 8842 + }, + { + "epoch": 2.4749510215505177, + "grad_norm": 0.2422732339902129, + "learning_rate": 8.151670116145232e-06, + "loss": 0.4262, + "step": 8843 + }, + { + "epoch": 2.475230898404702, + "grad_norm": 0.2469530278705053, + "learning_rate": 8.143230346726437e-06, + "loss": 0.4265, + "step": 8844 + }, + { + "epoch": 2.4755107752588863, + "grad_norm": 0.23441539423895605, + "learning_rate": 8.13479456125842e-06, + "loss": 0.4525, + "step": 8845 + }, + { + "epoch": 2.47579065211307, + "grad_norm": 0.2569000369849948, + "learning_rate": 8.126362760544087e-06, + "loss": 0.465, + "step": 8846 + }, + { + "epoch": 2.4760705289672544, + "grad_norm": 0.25036897978886113, + "learning_rate": 8.11793494538599e-06, + "loss": 0.4373, + "step": 8847 + }, + { + "epoch": 2.4763504058214387, + "grad_norm": 0.2534382996776073, + "learning_rate": 8.109511116586293e-06, + "loss": 0.4353, + "step": 8848 + }, + { + "epoch": 2.4766302826756226, + "grad_norm": 0.2625875150888653, + "learning_rate": 8.101091274946766e-06, + "loss": 0.4589, + "step": 8849 + }, + { + "epoch": 2.476910159529807, + "grad_norm": 0.2458607655275804, + "learning_rate": 8.092675421268826e-06, + "loss": 0.4419, + "step": 8850 + }, + { + "epoch": 2.477190036383991, + "grad_norm": 0.2695265276415108, + "learning_rate": 8.084263556353488e-06, + "loss": 0.4528, + "step": 8851 + }, + { + "epoch": 2.477469913238175, + "grad_norm": 0.23958898865776768, + "learning_rate": 8.075855681001415e-06, + "loss": 0.453, + "step": 8852 + }, + { + "epoch": 2.4777497900923593, + "grad_norm": 0.25722658792905134, + "learning_rate": 8.067451796012859e-06, + "loss": 0.4461, + "step": 8853 + }, + { + "epoch": 2.4780296669465436, + "grad_norm": 0.2505958536295343, + "learning_rate": 8.059051902187714e-06, + "loss": 0.4564, + "step": 8854 + }, + { + "epoch": 2.4783095438007274, + "grad_norm": 0.25713175258741855, + "learning_rate": 8.05065600032548e-06, + "loss": 0.4703, + "step": 8855 + }, + { + "epoch": 2.4785894206549117, + "grad_norm": 0.24969849742796021, + "learning_rate": 8.042264091225293e-06, + "loss": 0.4369, + "step": 8856 + }, + { + "epoch": 2.478869297509096, + "grad_norm": 0.25187749023349426, + "learning_rate": 8.033876175685894e-06, + "loss": 0.4327, + "step": 8857 + }, + { + "epoch": 2.4791491743632803, + "grad_norm": 0.25141787198038745, + "learning_rate": 8.025492254505646e-06, + "loss": 0.4401, + "step": 8858 + }, + { + "epoch": 2.479429051217464, + "grad_norm": 0.24336124216872376, + "learning_rate": 8.017112328482546e-06, + "loss": 0.4411, + "step": 8859 + }, + { + "epoch": 2.4797089280716484, + "grad_norm": 0.25966246947911736, + "learning_rate": 8.008736398414185e-06, + "loss": 0.4462, + "step": 8860 + }, + { + "epoch": 2.4799888049258327, + "grad_norm": 0.2529278877262405, + "learning_rate": 8.000364465097792e-06, + "loss": 0.4425, + "step": 8861 + }, + { + "epoch": 2.480268681780017, + "grad_norm": 0.2538931757905225, + "learning_rate": 7.991996529330226e-06, + "loss": 0.4541, + "step": 8862 + }, + { + "epoch": 2.480548558634201, + "grad_norm": 0.25134289965651335, + "learning_rate": 7.983632591907947e-06, + "loss": 0.4533, + "step": 8863 + }, + { + "epoch": 2.480828435488385, + "grad_norm": 0.24866939522478199, + "learning_rate": 7.975272653627031e-06, + "loss": 0.4388, + "step": 8864 + }, + { + "epoch": 2.4811083123425695, + "grad_norm": 0.25517906403239415, + "learning_rate": 7.96691671528319e-06, + "loss": 0.4493, + "step": 8865 + }, + { + "epoch": 2.4813881891967533, + "grad_norm": 0.2606694042685941, + "learning_rate": 7.95856477767174e-06, + "loss": 0.468, + "step": 8866 + }, + { + "epoch": 2.4816680660509376, + "grad_norm": 0.2692510674753085, + "learning_rate": 7.950216841587638e-06, + "loss": 0.4843, + "step": 8867 + }, + { + "epoch": 2.481947942905122, + "grad_norm": 0.2632445380264223, + "learning_rate": 7.941872907825421e-06, + "loss": 0.4639, + "step": 8868 + }, + { + "epoch": 2.4822278197593057, + "grad_norm": 0.257729898588726, + "learning_rate": 7.933532977179287e-06, + "loss": 0.4548, + "step": 8869 + }, + { + "epoch": 2.48250769661349, + "grad_norm": 0.24741173124978624, + "learning_rate": 7.925197050443023e-06, + "loss": 0.4403, + "step": 8870 + }, + { + "epoch": 2.4827875734676743, + "grad_norm": 0.2729225867355483, + "learning_rate": 7.916865128410056e-06, + "loss": 0.4707, + "step": 8871 + }, + { + "epoch": 2.483067450321858, + "grad_norm": 0.2538403651257615, + "learning_rate": 7.908537211873423e-06, + "loss": 0.4738, + "step": 8872 + }, + { + "epoch": 2.4833473271760425, + "grad_norm": 0.25179095236947746, + "learning_rate": 7.900213301625774e-06, + "loss": 0.4427, + "step": 8873 + }, + { + "epoch": 2.4836272040302267, + "grad_norm": 0.2511032981585161, + "learning_rate": 7.891893398459393e-06, + "loss": 0.4643, + "step": 8874 + }, + { + "epoch": 2.483907080884411, + "grad_norm": 0.24316285899510243, + "learning_rate": 7.883577503166162e-06, + "loss": 0.4184, + "step": 8875 + }, + { + "epoch": 2.484186957738595, + "grad_norm": 0.23892801848790826, + "learning_rate": 7.875265616537603e-06, + "loss": 0.4482, + "step": 8876 + }, + { + "epoch": 2.484466834592779, + "grad_norm": 0.2586827045175894, + "learning_rate": 7.866957739364838e-06, + "loss": 0.4413, + "step": 8877 + }, + { + "epoch": 2.4847467114469635, + "grad_norm": 0.24991802724800657, + "learning_rate": 7.858653872438604e-06, + "loss": 0.4474, + "step": 8878 + }, + { + "epoch": 2.4850265883011473, + "grad_norm": 0.25370606353769065, + "learning_rate": 7.850354016549299e-06, + "loss": 0.4686, + "step": 8879 + }, + { + "epoch": 2.4853064651553316, + "grad_norm": 0.2708748813220551, + "learning_rate": 7.842058172486888e-06, + "loss": 0.473, + "step": 8880 + }, + { + "epoch": 2.485586342009516, + "grad_norm": 0.27692138171144687, + "learning_rate": 7.833766341040977e-06, + "loss": 0.4571, + "step": 8881 + }, + { + "epoch": 2.4858662188637, + "grad_norm": 0.2793897596750318, + "learning_rate": 7.825478523000784e-06, + "loss": 0.4483, + "step": 8882 + }, + { + "epoch": 2.486146095717884, + "grad_norm": 0.25167269286679217, + "learning_rate": 7.817194719155157e-06, + "loss": 0.4409, + "step": 8883 + }, + { + "epoch": 2.4864259725720683, + "grad_norm": 0.25515552979710937, + "learning_rate": 7.808914930292543e-06, + "loss": 0.4396, + "step": 8884 + }, + { + "epoch": 2.4867058494262526, + "grad_norm": 0.2551114566159558, + "learning_rate": 7.800639157201034e-06, + "loss": 0.4694, + "step": 8885 + }, + { + "epoch": 2.4869857262804365, + "grad_norm": 0.26029356290704797, + "learning_rate": 7.792367400668298e-06, + "loss": 0.4627, + "step": 8886 + }, + { + "epoch": 2.4872656031346208, + "grad_norm": 0.23504242524148974, + "learning_rate": 7.784099661481659e-06, + "loss": 0.4478, + "step": 8887 + }, + { + "epoch": 2.487545479988805, + "grad_norm": 0.2512748274880192, + "learning_rate": 7.775835940428045e-06, + "loss": 0.4537, + "step": 8888 + }, + { + "epoch": 2.487825356842989, + "grad_norm": 0.25171981143190264, + "learning_rate": 7.767576238294e-06, + "loss": 0.4581, + "step": 8889 + }, + { + "epoch": 2.488105233697173, + "grad_norm": 0.24622432633520117, + "learning_rate": 7.759320555865685e-06, + "loss": 0.4513, + "step": 8890 + }, + { + "epoch": 2.4883851105513575, + "grad_norm": 0.25616479919867263, + "learning_rate": 7.75106889392888e-06, + "loss": 0.4391, + "step": 8891 + }, + { + "epoch": 2.4886649874055413, + "grad_norm": 0.2627191482362859, + "learning_rate": 7.74282125326899e-06, + "loss": 0.4676, + "step": 8892 + }, + { + "epoch": 2.4889448642597256, + "grad_norm": 0.2510764610320255, + "learning_rate": 7.734577634671026e-06, + "loss": 0.4428, + "step": 8893 + }, + { + "epoch": 2.48922474111391, + "grad_norm": 0.2563019188998492, + "learning_rate": 7.726338038919617e-06, + "loss": 0.4775, + "step": 8894 + }, + { + "epoch": 2.489504617968094, + "grad_norm": 0.24968828858127015, + "learning_rate": 7.718102466799016e-06, + "loss": 0.436, + "step": 8895 + }, + { + "epoch": 2.489784494822278, + "grad_norm": 0.24668294561710194, + "learning_rate": 7.709870919093076e-06, + "loss": 0.4428, + "step": 8896 + }, + { + "epoch": 2.4900643716764623, + "grad_norm": 0.2642183359750441, + "learning_rate": 7.701643396585307e-06, + "loss": 0.4412, + "step": 8897 + }, + { + "epoch": 2.4903442485306466, + "grad_norm": 0.25214599009723276, + "learning_rate": 7.69341990005879e-06, + "loss": 0.4476, + "step": 8898 + }, + { + "epoch": 2.490624125384831, + "grad_norm": 0.25425374085772656, + "learning_rate": 7.685200430296246e-06, + "loss": 0.4562, + "step": 8899 + }, + { + "epoch": 2.4909040022390148, + "grad_norm": 0.2585353507804545, + "learning_rate": 7.676984988080016e-06, + "loss": 0.459, + "step": 8900 + }, + { + "epoch": 2.491183879093199, + "grad_norm": 0.2489273363132629, + "learning_rate": 7.66877357419204e-06, + "loss": 0.4469, + "step": 8901 + }, + { + "epoch": 2.4914637559473833, + "grad_norm": 0.2540000855545819, + "learning_rate": 7.660566189413892e-06, + "loss": 0.4511, + "step": 8902 + }, + { + "epoch": 2.491743632801567, + "grad_norm": 0.25985644349776127, + "learning_rate": 7.652362834526766e-06, + "loss": 0.449, + "step": 8903 + }, + { + "epoch": 2.4920235096557515, + "grad_norm": 0.2562089695994192, + "learning_rate": 7.64416351031143e-06, + "loss": 0.4439, + "step": 8904 + }, + { + "epoch": 2.4923033865099358, + "grad_norm": 0.25278132260236597, + "learning_rate": 7.635968217548322e-06, + "loss": 0.4659, + "step": 8905 + }, + { + "epoch": 2.4925832633641196, + "grad_norm": 0.25888564877155157, + "learning_rate": 7.627776957017474e-06, + "loss": 0.4399, + "step": 8906 + }, + { + "epoch": 2.492863140218304, + "grad_norm": 0.24493965083645694, + "learning_rate": 7.619589729498527e-06, + "loss": 0.4377, + "step": 8907 + }, + { + "epoch": 2.493143017072488, + "grad_norm": 0.24969443975352626, + "learning_rate": 7.6114065357707544e-06, + "loss": 0.4644, + "step": 8908 + }, + { + "epoch": 2.493422893926672, + "grad_norm": 0.2565209300903901, + "learning_rate": 7.603227376613031e-06, + "loss": 0.4552, + "step": 8909 + }, + { + "epoch": 2.4937027707808563, + "grad_norm": 0.2452124420124257, + "learning_rate": 7.595052252803858e-06, + "loss": 0.4517, + "step": 8910 + }, + { + "epoch": 2.4939826476350406, + "grad_norm": 0.255949702565928, + "learning_rate": 7.5868811651213454e-06, + "loss": 0.4557, + "step": 8911 + }, + { + "epoch": 2.494262524489225, + "grad_norm": 0.259286446567972, + "learning_rate": 7.578714114343227e-06, + "loss": 0.4734, + "step": 8912 + }, + { + "epoch": 2.4945424013434088, + "grad_norm": 0.2640374675122462, + "learning_rate": 7.570551101246831e-06, + "loss": 0.4644, + "step": 8913 + }, + { + "epoch": 2.494822278197593, + "grad_norm": 0.25127260420103387, + "learning_rate": 7.5623921266091355e-06, + "loss": 0.4435, + "step": 8914 + }, + { + "epoch": 2.4951021550517773, + "grad_norm": 0.25389631309855887, + "learning_rate": 7.55423719120672e-06, + "loss": 0.4564, + "step": 8915 + }, + { + "epoch": 2.495382031905961, + "grad_norm": 0.24305461929854702, + "learning_rate": 7.5460862958157575e-06, + "loss": 0.44, + "step": 8916 + }, + { + "epoch": 2.4956619087601455, + "grad_norm": 0.2551714121846865, + "learning_rate": 7.537939441212072e-06, + "loss": 0.4585, + "step": 8917 + }, + { + "epoch": 2.4959417856143298, + "grad_norm": 0.2734545899115857, + "learning_rate": 7.5297966281710705e-06, + "loss": 0.4838, + "step": 8918 + }, + { + "epoch": 2.496221662468514, + "grad_norm": 0.24950143295608296, + "learning_rate": 7.521657857467801e-06, + "loss": 0.4266, + "step": 8919 + }, + { + "epoch": 2.496501539322698, + "grad_norm": 0.25819665620444043, + "learning_rate": 7.513523129876915e-06, + "loss": 0.4411, + "step": 8920 + }, + { + "epoch": 2.496781416176882, + "grad_norm": 0.2598365440070756, + "learning_rate": 7.505392446172688e-06, + "loss": 0.4554, + "step": 8921 + }, + { + "epoch": 2.4970612930310665, + "grad_norm": 0.23971359460619432, + "learning_rate": 7.497265807128978e-06, + "loss": 0.4393, + "step": 8922 + }, + { + "epoch": 2.4973411698852503, + "grad_norm": 0.2568520714516961, + "learning_rate": 7.489143213519301e-06, + "loss": 0.4371, + "step": 8923 + }, + { + "epoch": 2.4976210467394346, + "grad_norm": 0.25148870661721523, + "learning_rate": 7.481024666116765e-06, + "loss": 0.4459, + "step": 8924 + }, + { + "epoch": 2.497900923593619, + "grad_norm": 0.2588802187966045, + "learning_rate": 7.472910165694102e-06, + "loss": 0.4425, + "step": 8925 + }, + { + "epoch": 2.4981808004478028, + "grad_norm": 0.2519353375045778, + "learning_rate": 7.464799713023657e-06, + "loss": 0.4417, + "step": 8926 + }, + { + "epoch": 2.498460677301987, + "grad_norm": 0.2550387065917063, + "learning_rate": 7.456693308877377e-06, + "loss": 0.4535, + "step": 8927 + }, + { + "epoch": 2.4987405541561714, + "grad_norm": 0.25619742864510653, + "learning_rate": 7.448590954026846e-06, + "loss": 0.4384, + "step": 8928 + }, + { + "epoch": 2.499020431010355, + "grad_norm": 0.2577727926770196, + "learning_rate": 7.440492649243242e-06, + "loss": 0.4519, + "step": 8929 + }, + { + "epoch": 2.4993003078645395, + "grad_norm": 0.24415656171714847, + "learning_rate": 7.432398395297357e-06, + "loss": 0.4394, + "step": 8930 + }, + { + "epoch": 2.499580184718724, + "grad_norm": 0.24783691948030032, + "learning_rate": 7.4243081929596336e-06, + "loss": 0.4396, + "step": 8931 + }, + { + "epoch": 2.499860061572908, + "grad_norm": 0.2509008364025493, + "learning_rate": 7.416222043000082e-06, + "loss": 0.4505, + "step": 8932 + }, + { + "epoch": 2.500139938427092, + "grad_norm": 0.25113708543384494, + "learning_rate": 7.408139946188358e-06, + "loss": 0.4582, + "step": 8933 + }, + { + "epoch": 2.500419815281276, + "grad_norm": 0.25948635498933226, + "learning_rate": 7.400061903293709e-06, + "loss": 0.4673, + "step": 8934 + }, + { + "epoch": 2.5006996921354605, + "grad_norm": 0.24315429208856337, + "learning_rate": 7.391987915085013e-06, + "loss": 0.4348, + "step": 8935 + }, + { + "epoch": 2.500979568989645, + "grad_norm": 0.26204781206030575, + "learning_rate": 7.383917982330757e-06, + "loss": 0.434, + "step": 8936 + }, + { + "epoch": 2.5012594458438286, + "grad_norm": 0.2588636451443479, + "learning_rate": 7.375852105799036e-06, + "loss": 0.4425, + "step": 8937 + }, + { + "epoch": 2.501539322698013, + "grad_norm": 0.24993444792058758, + "learning_rate": 7.367790286257576e-06, + "loss": 0.4478, + "step": 8938 + }, + { + "epoch": 2.5018191995521972, + "grad_norm": 0.25553144415491774, + "learning_rate": 7.3597325244737056e-06, + "loss": 0.472, + "step": 8939 + }, + { + "epoch": 2.502099076406381, + "grad_norm": 0.25597088030086723, + "learning_rate": 7.351678821214353e-06, + "loss": 0.4211, + "step": 8940 + }, + { + "epoch": 2.5023789532605654, + "grad_norm": 0.2630377552359938, + "learning_rate": 7.343629177246081e-06, + "loss": 0.453, + "step": 8941 + }, + { + "epoch": 2.5026588301147497, + "grad_norm": 0.25873977474103477, + "learning_rate": 7.335583593335055e-06, + "loss": 0.4524, + "step": 8942 + }, + { + "epoch": 2.5029387069689335, + "grad_norm": 0.251957955374892, + "learning_rate": 7.327542070247068e-06, + "loss": 0.4671, + "step": 8943 + }, + { + "epoch": 2.503218583823118, + "grad_norm": 0.2432185898006891, + "learning_rate": 7.319504608747507e-06, + "loss": 0.4365, + "step": 8944 + }, + { + "epoch": 2.503498460677302, + "grad_norm": 0.24733585421860688, + "learning_rate": 7.3114712096013894e-06, + "loss": 0.465, + "step": 8945 + }, + { + "epoch": 2.503778337531486, + "grad_norm": 0.2489244920393296, + "learning_rate": 7.303441873573335e-06, + "loss": 0.4554, + "step": 8946 + }, + { + "epoch": 2.5040582143856702, + "grad_norm": 0.2621419028480469, + "learning_rate": 7.295416601427585e-06, + "loss": 0.4565, + "step": 8947 + }, + { + "epoch": 2.5043380912398545, + "grad_norm": 0.25392129715622236, + "learning_rate": 7.287395393927971e-06, + "loss": 0.4525, + "step": 8948 + }, + { + "epoch": 2.5046179680940384, + "grad_norm": 0.2597443364769656, + "learning_rate": 7.279378251837982e-06, + "loss": 0.4546, + "step": 8949 + }, + { + "epoch": 2.5048978449482227, + "grad_norm": 0.25569931522305633, + "learning_rate": 7.271365175920686e-06, + "loss": 0.4467, + "step": 8950 + }, + { + "epoch": 2.505177721802407, + "grad_norm": 0.2483428630146001, + "learning_rate": 7.263356166938762e-06, + "loss": 0.448, + "step": 8951 + }, + { + "epoch": 2.5054575986565912, + "grad_norm": 0.24620526913419488, + "learning_rate": 7.255351225654527e-06, + "loss": 0.4504, + "step": 8952 + }, + { + "epoch": 2.5057374755107755, + "grad_norm": 0.24532576029083908, + "learning_rate": 7.2473503528298794e-06, + "loss": 0.4471, + "step": 8953 + }, + { + "epoch": 2.5060173523649594, + "grad_norm": 0.24277139236472012, + "learning_rate": 7.239353549226363e-06, + "loss": 0.4472, + "step": 8954 + }, + { + "epoch": 2.5062972292191437, + "grad_norm": 0.2507319047245604, + "learning_rate": 7.231360815605104e-06, + "loss": 0.4522, + "step": 8955 + }, + { + "epoch": 2.506577106073328, + "grad_norm": 0.24821010930428444, + "learning_rate": 7.223372152726865e-06, + "loss": 0.4553, + "step": 8956 + }, + { + "epoch": 2.506856982927512, + "grad_norm": 0.2615670280351122, + "learning_rate": 7.215387561352016e-06, + "loss": 0.4612, + "step": 8957 + }, + { + "epoch": 2.507136859781696, + "grad_norm": 0.2600748753959852, + "learning_rate": 7.207407042240516e-06, + "loss": 0.445, + "step": 8958 + }, + { + "epoch": 2.5074167366358804, + "grad_norm": 0.25896581280794956, + "learning_rate": 7.199430596151968e-06, + "loss": 0.456, + "step": 8959 + }, + { + "epoch": 2.5076966134900642, + "grad_norm": 0.2558477947203808, + "learning_rate": 7.19145822384557e-06, + "loss": 0.4573, + "step": 8960 + }, + { + "epoch": 2.5079764903442485, + "grad_norm": 0.2502157967721201, + "learning_rate": 7.1834899260801395e-06, + "loss": 0.4478, + "step": 8961 + }, + { + "epoch": 2.508256367198433, + "grad_norm": 0.25875228888298124, + "learning_rate": 7.1755257036141e-06, + "loss": 0.454, + "step": 8962 + }, + { + "epoch": 2.5085362440526167, + "grad_norm": 0.25368430084325355, + "learning_rate": 7.167565557205497e-06, + "loss": 0.4575, + "step": 8963 + }, + { + "epoch": 2.508816120906801, + "grad_norm": 0.2564756941022119, + "learning_rate": 7.159609487611979e-06, + "loss": 0.467, + "step": 8964 + }, + { + "epoch": 2.5090959977609852, + "grad_norm": 0.24892436207172702, + "learning_rate": 7.151657495590802e-06, + "loss": 0.4814, + "step": 8965 + }, + { + "epoch": 2.509375874615169, + "grad_norm": 0.26435105508762835, + "learning_rate": 7.14370958189885e-06, + "loss": 0.4582, + "step": 8966 + }, + { + "epoch": 2.5096557514693534, + "grad_norm": 0.2517061912480958, + "learning_rate": 7.135765747292617e-06, + "loss": 0.4379, + "step": 8967 + }, + { + "epoch": 2.5099356283235377, + "grad_norm": 0.2589595047484895, + "learning_rate": 7.127825992528187e-06, + "loss": 0.4438, + "step": 8968 + }, + { + "epoch": 2.510215505177722, + "grad_norm": 0.26025832593462245, + "learning_rate": 7.119890318361277e-06, + "loss": 0.4434, + "step": 8969 + }, + { + "epoch": 2.510495382031906, + "grad_norm": 0.2558621471436027, + "learning_rate": 7.111958725547208e-06, + "loss": 0.4391, + "step": 8970 + }, + { + "epoch": 2.51077525888609, + "grad_norm": 0.2659495080127039, + "learning_rate": 7.104031214840906e-06, + "loss": 0.4462, + "step": 8971 + }, + { + "epoch": 2.5110551357402744, + "grad_norm": 0.2503816090494375, + "learning_rate": 7.0961077869969305e-06, + "loss": 0.4292, + "step": 8972 + }, + { + "epoch": 2.5113350125944587, + "grad_norm": 0.2534700631485543, + "learning_rate": 7.088188442769428e-06, + "loss": 0.4501, + "step": 8973 + }, + { + "epoch": 2.5116148894486425, + "grad_norm": 0.25580737668576414, + "learning_rate": 7.0802731829121724e-06, + "loss": 0.4741, + "step": 8974 + }, + { + "epoch": 2.511894766302827, + "grad_norm": 0.2628449581556826, + "learning_rate": 7.072362008178546e-06, + "loss": 0.4694, + "step": 8975 + }, + { + "epoch": 2.512174643157011, + "grad_norm": 0.24390541906259172, + "learning_rate": 7.064454919321517e-06, + "loss": 0.4471, + "step": 8976 + }, + { + "epoch": 2.512454520011195, + "grad_norm": 0.2504828534106917, + "learning_rate": 7.056551917093707e-06, + "loss": 0.4565, + "step": 8977 + }, + { + "epoch": 2.5127343968653792, + "grad_norm": 0.24321842590206041, + "learning_rate": 7.048653002247319e-06, + "loss": 0.4345, + "step": 8978 + }, + { + "epoch": 2.5130142737195635, + "grad_norm": 0.26925868771948464, + "learning_rate": 7.040758175534179e-06, + "loss": 0.452, + "step": 8979 + }, + { + "epoch": 2.5132941505737474, + "grad_norm": 0.25258720681110736, + "learning_rate": 7.03286743770572e-06, + "loss": 0.4676, + "step": 8980 + }, + { + "epoch": 2.5135740274279317, + "grad_norm": 0.2515970074169978, + "learning_rate": 7.024980789512991e-06, + "loss": 0.4414, + "step": 8981 + }, + { + "epoch": 2.513853904282116, + "grad_norm": 0.2513935049927517, + "learning_rate": 7.0170982317066315e-06, + "loss": 0.4375, + "step": 8982 + }, + { + "epoch": 2.5141337811363, + "grad_norm": 0.25409520958708476, + "learning_rate": 7.009219765036934e-06, + "loss": 0.4478, + "step": 8983 + }, + { + "epoch": 2.514413657990484, + "grad_norm": 0.27062936308402946, + "learning_rate": 7.001345390253761e-06, + "loss": 0.4617, + "step": 8984 + }, + { + "epoch": 2.5146935348446684, + "grad_norm": 0.25502899641806187, + "learning_rate": 6.993475108106601e-06, + "loss": 0.4565, + "step": 8985 + }, + { + "epoch": 2.5149734116988522, + "grad_norm": 0.25329809166927825, + "learning_rate": 6.98560891934455e-06, + "loss": 0.4476, + "step": 8986 + }, + { + "epoch": 2.5152532885530365, + "grad_norm": 0.2646511731091114, + "learning_rate": 6.977746824716319e-06, + "loss": 0.4733, + "step": 8987 + }, + { + "epoch": 2.515533165407221, + "grad_norm": 0.2586772269443656, + "learning_rate": 6.969888824970228e-06, + "loss": 0.4556, + "step": 8988 + }, + { + "epoch": 2.515813042261405, + "grad_norm": 0.25663256481890245, + "learning_rate": 6.962034920854199e-06, + "loss": 0.4562, + "step": 8989 + }, + { + "epoch": 2.5160929191155894, + "grad_norm": 0.2451363489483608, + "learning_rate": 6.95418511311578e-06, + "loss": 0.4521, + "step": 8990 + }, + { + "epoch": 2.5163727959697733, + "grad_norm": 0.25364956233507796, + "learning_rate": 6.946339402502116e-06, + "loss": 0.462, + "step": 8991 + }, + { + "epoch": 2.5166526728239575, + "grad_norm": 0.24462490587022592, + "learning_rate": 6.938497789759968e-06, + "loss": 0.4572, + "step": 8992 + }, + { + "epoch": 2.516932549678142, + "grad_norm": 0.2639739919137141, + "learning_rate": 6.930660275635709e-06, + "loss": 0.4592, + "step": 8993 + }, + { + "epoch": 2.5172124265323257, + "grad_norm": 0.25208749476232456, + "learning_rate": 6.922826860875303e-06, + "loss": 0.4645, + "step": 8994 + }, + { + "epoch": 2.51749230338651, + "grad_norm": 0.24924361587793836, + "learning_rate": 6.91499754622435e-06, + "loss": 0.4265, + "step": 8995 + }, + { + "epoch": 2.5177721802406943, + "grad_norm": 0.248666852870826, + "learning_rate": 6.907172332428041e-06, + "loss": 0.464, + "step": 8996 + }, + { + "epoch": 2.518052057094878, + "grad_norm": 0.2530985619314788, + "learning_rate": 6.899351220231193e-06, + "loss": 0.4464, + "step": 8997 + }, + { + "epoch": 2.5183319339490624, + "grad_norm": 0.25352600897579103, + "learning_rate": 6.89153421037822e-06, + "loss": 0.4524, + "step": 8998 + }, + { + "epoch": 2.5186118108032467, + "grad_norm": 0.25109304395332893, + "learning_rate": 6.883721303613139e-06, + "loss": 0.4395, + "step": 8999 + }, + { + "epoch": 2.5188916876574305, + "grad_norm": 0.2581392575479511, + "learning_rate": 6.875912500679605e-06, + "loss": 0.4562, + "step": 9000 + }, + { + "epoch": 2.519171564511615, + "grad_norm": 0.26440397696851237, + "learning_rate": 6.868107802320861e-06, + "loss": 0.4523, + "step": 9001 + }, + { + "epoch": 2.519451441365799, + "grad_norm": 0.25347825803240537, + "learning_rate": 6.860307209279759e-06, + "loss": 0.4585, + "step": 9002 + }, + { + "epoch": 2.519731318219983, + "grad_norm": 0.24683638479026232, + "learning_rate": 6.852510722298761e-06, + "loss": 0.4479, + "step": 9003 + }, + { + "epoch": 2.5200111950741673, + "grad_norm": 0.24739050528434142, + "learning_rate": 6.844718342119944e-06, + "loss": 0.4384, + "step": 9004 + }, + { + "epoch": 2.5202910719283516, + "grad_norm": 0.2691543647274035, + "learning_rate": 6.836930069484987e-06, + "loss": 0.4619, + "step": 9005 + }, + { + "epoch": 2.520570948782536, + "grad_norm": 0.25099324436026205, + "learning_rate": 6.829145905135187e-06, + "loss": 0.4429, + "step": 9006 + }, + { + "epoch": 2.5208508256367197, + "grad_norm": 0.24724463802634064, + "learning_rate": 6.821365849811445e-06, + "loss": 0.4272, + "step": 9007 + }, + { + "epoch": 2.521130702490904, + "grad_norm": 0.2572366796115523, + "learning_rate": 6.813589904254269e-06, + "loss": 0.4924, + "step": 9008 + }, + { + "epoch": 2.5214105793450883, + "grad_norm": 0.25816372621467104, + "learning_rate": 6.805818069203779e-06, + "loss": 0.4453, + "step": 9009 + }, + { + "epoch": 2.5216904561992726, + "grad_norm": 0.25110778666278544, + "learning_rate": 6.798050345399704e-06, + "loss": 0.4646, + "step": 9010 + }, + { + "epoch": 2.5219703330534564, + "grad_norm": 0.2580243655826121, + "learning_rate": 6.790286733581386e-06, + "loss": 0.4595, + "step": 9011 + }, + { + "epoch": 2.5222502099076407, + "grad_norm": 0.2579702369872616, + "learning_rate": 6.782527234487751e-06, + "loss": 0.4497, + "step": 9012 + }, + { + "epoch": 2.522530086761825, + "grad_norm": 0.2551010773248085, + "learning_rate": 6.7747718488573645e-06, + "loss": 0.4612, + "step": 9013 + }, + { + "epoch": 2.522809963616009, + "grad_norm": 0.25772777460612917, + "learning_rate": 6.7670205774283905e-06, + "loss": 0.4461, + "step": 9014 + }, + { + "epoch": 2.523089840470193, + "grad_norm": 0.26076216237241695, + "learning_rate": 6.759273420938594e-06, + "loss": 0.4769, + "step": 9015 + }, + { + "epoch": 2.5233697173243774, + "grad_norm": 0.2502091012128725, + "learning_rate": 6.751530380125353e-06, + "loss": 0.4271, + "step": 9016 + }, + { + "epoch": 2.5236495941785613, + "grad_norm": 0.26811058278756883, + "learning_rate": 6.743791455725651e-06, + "loss": 0.4563, + "step": 9017 + }, + { + "epoch": 2.5239294710327456, + "grad_norm": 0.2586330387799208, + "learning_rate": 6.736056648476102e-06, + "loss": 0.4579, + "step": 9018 + }, + { + "epoch": 2.52420934788693, + "grad_norm": 0.2645844473897462, + "learning_rate": 6.728325959112891e-06, + "loss": 0.4703, + "step": 9019 + }, + { + "epoch": 2.5244892247411137, + "grad_norm": 0.26234738979855504, + "learning_rate": 6.72059938837184e-06, + "loss": 0.4759, + "step": 9020 + }, + { + "epoch": 2.524769101595298, + "grad_norm": 0.2531272337988228, + "learning_rate": 6.712876936988366e-06, + "loss": 0.4573, + "step": 9021 + }, + { + "epoch": 2.5250489784494823, + "grad_norm": 0.25923149226462266, + "learning_rate": 6.705158605697487e-06, + "loss": 0.4449, + "step": 9022 + }, + { + "epoch": 2.525328855303666, + "grad_norm": 0.2527412087067922, + "learning_rate": 6.697444395233849e-06, + "loss": 0.4411, + "step": 9023 + }, + { + "epoch": 2.5256087321578504, + "grad_norm": 0.24458877134965698, + "learning_rate": 6.689734306331691e-06, + "loss": 0.4696, + "step": 9024 + }, + { + "epoch": 2.5258886090120347, + "grad_norm": 0.28629513349931024, + "learning_rate": 6.682028339724866e-06, + "loss": 0.457, + "step": 9025 + }, + { + "epoch": 2.526168485866219, + "grad_norm": 0.24219808104391424, + "learning_rate": 6.6743264961468266e-06, + "loss": 0.4486, + "step": 9026 + }, + { + "epoch": 2.5264483627204033, + "grad_norm": 0.24907028126722813, + "learning_rate": 6.666628776330641e-06, + "loss": 0.4515, + "step": 9027 + }, + { + "epoch": 2.526728239574587, + "grad_norm": 0.2428588276750298, + "learning_rate": 6.658935181008985e-06, + "loss": 0.4444, + "step": 9028 + }, + { + "epoch": 2.5270081164287714, + "grad_norm": 0.2598518384155076, + "learning_rate": 6.65124571091415e-06, + "loss": 0.4639, + "step": 9029 + }, + { + "epoch": 2.5272879932829557, + "grad_norm": 0.26021567833303383, + "learning_rate": 6.643560366777995e-06, + "loss": 0.4608, + "step": 9030 + }, + { + "epoch": 2.5275678701371396, + "grad_norm": 0.26185111067548805, + "learning_rate": 6.635879149332036e-06, + "loss": 0.4412, + "step": 9031 + }, + { + "epoch": 2.527847746991324, + "grad_norm": 0.2627699129277269, + "learning_rate": 6.62820205930737e-06, + "loss": 0.4632, + "step": 9032 + }, + { + "epoch": 2.528127623845508, + "grad_norm": 0.2608584967089512, + "learning_rate": 6.620529097434714e-06, + "loss": 0.4631, + "step": 9033 + }, + { + "epoch": 2.528407500699692, + "grad_norm": 0.251428636092459, + "learning_rate": 6.612860264444359e-06, + "loss": 0.4492, + "step": 9034 + }, + { + "epoch": 2.5286873775538763, + "grad_norm": 0.24945361347811598, + "learning_rate": 6.605195561066269e-06, + "loss": 0.4562, + "step": 9035 + }, + { + "epoch": 2.5289672544080606, + "grad_norm": 0.24857318034272893, + "learning_rate": 6.597534988029946e-06, + "loss": 0.4576, + "step": 9036 + }, + { + "epoch": 2.5292471312622444, + "grad_norm": 0.25722373000155074, + "learning_rate": 6.589878546064543e-06, + "loss": 0.4617, + "step": 9037 + }, + { + "epoch": 2.5295270081164287, + "grad_norm": 0.25686729673985775, + "learning_rate": 6.5822262358987916e-06, + "loss": 0.4741, + "step": 9038 + }, + { + "epoch": 2.529806884970613, + "grad_norm": 0.2560864645379675, + "learning_rate": 6.574578058261055e-06, + "loss": 0.4418, + "step": 9039 + }, + { + "epoch": 2.530086761824797, + "grad_norm": 0.256450121733098, + "learning_rate": 6.566934013879283e-06, + "loss": 0.4464, + "step": 9040 + }, + { + "epoch": 2.530366638678981, + "grad_norm": 0.24927342458415117, + "learning_rate": 6.55929410348104e-06, + "loss": 0.4594, + "step": 9041 + }, + { + "epoch": 2.5306465155331654, + "grad_norm": 0.24086033624755418, + "learning_rate": 6.551658327793503e-06, + "loss": 0.4611, + "step": 9042 + }, + { + "epoch": 2.5309263923873497, + "grad_norm": 0.23626263901940917, + "learning_rate": 6.544026687543442e-06, + "loss": 0.4513, + "step": 9043 + }, + { + "epoch": 2.5312062692415336, + "grad_norm": 0.25190955097541173, + "learning_rate": 6.536399183457248e-06, + "loss": 0.4551, + "step": 9044 + }, + { + "epoch": 2.531486146095718, + "grad_norm": 0.41540484331269534, + "learning_rate": 6.528775816260901e-06, + "loss": 0.4493, + "step": 9045 + }, + { + "epoch": 2.531766022949902, + "grad_norm": 0.24828989706009602, + "learning_rate": 6.521156586680011e-06, + "loss": 0.445, + "step": 9046 + }, + { + "epoch": 2.5320458998040865, + "grad_norm": 0.2537136199076485, + "learning_rate": 6.5135414954397785e-06, + "loss": 0.463, + "step": 9047 + }, + { + "epoch": 2.5323257766582703, + "grad_norm": 0.2461613470711474, + "learning_rate": 6.505930543264999e-06, + "loss": 0.4803, + "step": 9048 + }, + { + "epoch": 2.5326056535124546, + "grad_norm": 0.24533467246800178, + "learning_rate": 6.498323730880096e-06, + "loss": 0.4678, + "step": 9049 + }, + { + "epoch": 2.532885530366639, + "grad_norm": 0.25797351546042757, + "learning_rate": 6.490721059009086e-06, + "loss": 0.4339, + "step": 9050 + }, + { + "epoch": 2.5331654072208227, + "grad_norm": 0.25157856160358383, + "learning_rate": 6.483122528375591e-06, + "loss": 0.4545, + "step": 9051 + }, + { + "epoch": 2.533445284075007, + "grad_norm": 0.2465713257227587, + "learning_rate": 6.4755281397028576e-06, + "loss": 0.455, + "step": 9052 + }, + { + "epoch": 2.5337251609291913, + "grad_norm": 0.25310491688299097, + "learning_rate": 6.4679378937137215e-06, + "loss": 0.4426, + "step": 9053 + }, + { + "epoch": 2.534005037783375, + "grad_norm": 0.25791684141482535, + "learning_rate": 6.46035179113062e-06, + "loss": 0.4408, + "step": 9054 + }, + { + "epoch": 2.5342849146375594, + "grad_norm": 0.2572030751913601, + "learning_rate": 6.452769832675609e-06, + "loss": 0.4476, + "step": 9055 + }, + { + "epoch": 2.5345647914917437, + "grad_norm": 0.266356508072427, + "learning_rate": 6.445192019070334e-06, + "loss": 0.4558, + "step": 9056 + }, + { + "epoch": 2.5348446683459276, + "grad_norm": 0.257384068028928, + "learning_rate": 6.437618351036068e-06, + "loss": 0.4591, + "step": 9057 + }, + { + "epoch": 2.535124545200112, + "grad_norm": 0.2546444328612967, + "learning_rate": 6.430048829293667e-06, + "loss": 0.4528, + "step": 9058 + }, + { + "epoch": 2.535404422054296, + "grad_norm": 0.26424748863084735, + "learning_rate": 6.422483454563605e-06, + "loss": 0.4522, + "step": 9059 + }, + { + "epoch": 2.53568429890848, + "grad_norm": 0.25642044074867987, + "learning_rate": 6.414922227565962e-06, + "loss": 0.4463, + "step": 9060 + }, + { + "epoch": 2.5359641757626643, + "grad_norm": 0.2410920603981759, + "learning_rate": 6.407365149020411e-06, + "loss": 0.4479, + "step": 9061 + }, + { + "epoch": 2.5362440526168486, + "grad_norm": 0.25678693014949794, + "learning_rate": 6.399812219646251e-06, + "loss": 0.4389, + "step": 9062 + }, + { + "epoch": 2.536523929471033, + "grad_norm": 0.2709547706867928, + "learning_rate": 6.392263440162366e-06, + "loss": 0.4538, + "step": 9063 + }, + { + "epoch": 2.536803806325217, + "grad_norm": 0.25724482144428323, + "learning_rate": 6.384718811287255e-06, + "loss": 0.4581, + "step": 9064 + }, + { + "epoch": 2.537083683179401, + "grad_norm": 0.2630105013815421, + "learning_rate": 6.37717833373902e-06, + "loss": 0.4371, + "step": 9065 + }, + { + "epoch": 2.5373635600335853, + "grad_norm": 0.2642686115741411, + "learning_rate": 6.369642008235377e-06, + "loss": 0.4673, + "step": 9066 + }, + { + "epoch": 2.5376434368877696, + "grad_norm": 0.25008826463627004, + "learning_rate": 6.3621098354936235e-06, + "loss": 0.4477, + "step": 9067 + }, + { + "epoch": 2.5379233137419535, + "grad_norm": 0.24809789646075484, + "learning_rate": 6.354581816230676e-06, + "loss": 0.4407, + "step": 9068 + }, + { + "epoch": 2.5382031905961377, + "grad_norm": 0.25287845157192435, + "learning_rate": 6.347057951163049e-06, + "loss": 0.4293, + "step": 9069 + }, + { + "epoch": 2.538483067450322, + "grad_norm": 0.25872036606644555, + "learning_rate": 6.3395382410068925e-06, + "loss": 0.4389, + "step": 9070 + }, + { + "epoch": 2.538762944304506, + "grad_norm": 0.24919407583183825, + "learning_rate": 6.332022686477928e-06, + "loss": 0.4447, + "step": 9071 + }, + { + "epoch": 2.53904282115869, + "grad_norm": 0.24639849359031837, + "learning_rate": 6.324511288291479e-06, + "loss": 0.4392, + "step": 9072 + }, + { + "epoch": 2.5393226980128745, + "grad_norm": 0.24467253580160808, + "learning_rate": 6.3170040471625005e-06, + "loss": 0.4274, + "step": 9073 + }, + { + "epoch": 2.5396025748670583, + "grad_norm": 0.2514500342020152, + "learning_rate": 6.30950096380552e-06, + "loss": 0.4507, + "step": 9074 + }, + { + "epoch": 2.5398824517212426, + "grad_norm": 0.24603248756069154, + "learning_rate": 6.302002038934696e-06, + "loss": 0.4429, + "step": 9075 + }, + { + "epoch": 2.540162328575427, + "grad_norm": 0.2629686044634512, + "learning_rate": 6.294507273263772e-06, + "loss": 0.4683, + "step": 9076 + }, + { + "epoch": 2.5404422054296107, + "grad_norm": 0.25872850504593214, + "learning_rate": 6.287016667506113e-06, + "loss": 0.4483, + "step": 9077 + }, + { + "epoch": 2.540722082283795, + "grad_norm": 0.2480412433276257, + "learning_rate": 6.279530222374674e-06, + "loss": 0.4386, + "step": 9078 + }, + { + "epoch": 2.5410019591379793, + "grad_norm": 0.2531260430664589, + "learning_rate": 6.27204793858202e-06, + "loss": 0.461, + "step": 9079 + }, + { + "epoch": 2.541281835992163, + "grad_norm": 0.25795413308401616, + "learning_rate": 6.264569816840321e-06, + "loss": 0.468, + "step": 9080 + }, + { + "epoch": 2.5415617128463475, + "grad_norm": 0.24996489529895616, + "learning_rate": 6.257095857861345e-06, + "loss": 0.4521, + "step": 9081 + }, + { + "epoch": 2.5418415897005318, + "grad_norm": 0.2552172480352286, + "learning_rate": 6.249626062356467e-06, + "loss": 0.4369, + "step": 9082 + }, + { + "epoch": 2.542121466554716, + "grad_norm": 0.2561180333740874, + "learning_rate": 6.24216043103667e-06, + "loss": 0.4309, + "step": 9083 + }, + { + "epoch": 2.5424013434089003, + "grad_norm": 0.2620053313294719, + "learning_rate": 6.234698964612545e-06, + "loss": 0.4716, + "step": 9084 + }, + { + "epoch": 2.542681220263084, + "grad_norm": 0.24833583400042106, + "learning_rate": 6.227241663794259e-06, + "loss": 0.4415, + "step": 9085 + }, + { + "epoch": 2.5429610971172685, + "grad_norm": 0.24157989185972797, + "learning_rate": 6.219788529291603e-06, + "loss": 0.4447, + "step": 9086 + }, + { + "epoch": 2.5432409739714528, + "grad_norm": 0.24469414025311992, + "learning_rate": 6.21233956181399e-06, + "loss": 0.4455, + "step": 9087 + }, + { + "epoch": 2.5435208508256366, + "grad_norm": 0.25049764277045034, + "learning_rate": 6.204894762070407e-06, + "loss": 0.4402, + "step": 9088 + }, + { + "epoch": 2.543800727679821, + "grad_norm": 0.2486790610579519, + "learning_rate": 6.197454130769448e-06, + "loss": 0.4447, + "step": 9089 + }, + { + "epoch": 2.544080604534005, + "grad_norm": 0.24619858352040716, + "learning_rate": 6.19001766861933e-06, + "loss": 0.4435, + "step": 9090 + }, + { + "epoch": 2.544360481388189, + "grad_norm": 0.2558167745797563, + "learning_rate": 6.1825853763278485e-06, + "loss": 0.4402, + "step": 9091 + }, + { + "epoch": 2.5446403582423733, + "grad_norm": 0.2353500169975788, + "learning_rate": 6.175157254602415e-06, + "loss": 0.4262, + "step": 9092 + }, + { + "epoch": 2.5449202350965576, + "grad_norm": 0.25616219547313984, + "learning_rate": 6.167733304150048e-06, + "loss": 0.4783, + "step": 9093 + }, + { + "epoch": 2.5452001119507415, + "grad_norm": 0.2621886960057489, + "learning_rate": 6.160313525677353e-06, + "loss": 0.4453, + "step": 9094 + }, + { + "epoch": 2.5454799888049258, + "grad_norm": 0.252862558908742, + "learning_rate": 6.15289791989056e-06, + "loss": 0.4584, + "step": 9095 + }, + { + "epoch": 2.54575986565911, + "grad_norm": 0.25631498872333514, + "learning_rate": 6.1454864874954834e-06, + "loss": 0.4514, + "step": 9096 + }, + { + "epoch": 2.546039742513294, + "grad_norm": 0.24840661224398433, + "learning_rate": 6.138079229197552e-06, + "loss": 0.4478, + "step": 9097 + }, + { + "epoch": 2.546319619367478, + "grad_norm": 0.24389373928599684, + "learning_rate": 6.130676145701786e-06, + "loss": 0.4381, + "step": 9098 + }, + { + "epoch": 2.5465994962216625, + "grad_norm": 0.257340560366041, + "learning_rate": 6.123277237712821e-06, + "loss": 0.456, + "step": 9099 + }, + { + "epoch": 2.5468793730758468, + "grad_norm": 0.2562883816613838, + "learning_rate": 6.115882505934889e-06, + "loss": 0.4354, + "step": 9100 + }, + { + "epoch": 2.5471592499300306, + "grad_norm": 0.25208727499306416, + "learning_rate": 6.108491951071821e-06, + "loss": 0.4423, + "step": 9101 + }, + { + "epoch": 2.547439126784215, + "grad_norm": 0.2568919940162497, + "learning_rate": 6.101105573827065e-06, + "loss": 0.4696, + "step": 9102 + }, + { + "epoch": 2.547719003638399, + "grad_norm": 0.2568095046552021, + "learning_rate": 6.093723374903632e-06, + "loss": 0.4231, + "step": 9103 + }, + { + "epoch": 2.5479988804925835, + "grad_norm": 0.2482858904137207, + "learning_rate": 6.086345355004197e-06, + "loss": 0.4422, + "step": 9104 + }, + { + "epoch": 2.5482787573467673, + "grad_norm": 0.25164730562085175, + "learning_rate": 6.078971514830989e-06, + "loss": 0.4762, + "step": 9105 + }, + { + "epoch": 2.5485586342009516, + "grad_norm": 0.25451172396853644, + "learning_rate": 6.071601855085857e-06, + "loss": 0.4449, + "step": 9106 + }, + { + "epoch": 2.548838511055136, + "grad_norm": 0.24662730540699523, + "learning_rate": 6.064236376470245e-06, + "loss": 0.4433, + "step": 9107 + }, + { + "epoch": 2.5491183879093198, + "grad_norm": 0.26930731644611416, + "learning_rate": 6.056875079685209e-06, + "loss": 0.4539, + "step": 9108 + }, + { + "epoch": 2.549398264763504, + "grad_norm": 0.24951810190856688, + "learning_rate": 6.049517965431401e-06, + "loss": 0.4364, + "step": 9109 + }, + { + "epoch": 2.5496781416176884, + "grad_norm": 0.2588401379405034, + "learning_rate": 6.04216503440907e-06, + "loss": 0.4617, + "step": 9110 + }, + { + "epoch": 2.549958018471872, + "grad_norm": 0.2481260845789059, + "learning_rate": 6.034816287318073e-06, + "loss": 0.4413, + "step": 9111 + }, + { + "epoch": 2.5502378953260565, + "grad_norm": 0.24731364381426707, + "learning_rate": 6.027471724857875e-06, + "loss": 0.4459, + "step": 9112 + }, + { + "epoch": 2.550517772180241, + "grad_norm": 0.258436832246282, + "learning_rate": 6.0201313477275256e-06, + "loss": 0.4552, + "step": 9113 + }, + { + "epoch": 2.5507976490344246, + "grad_norm": 0.23769209221220836, + "learning_rate": 6.012795156625695e-06, + "loss": 0.4323, + "step": 9114 + }, + { + "epoch": 2.551077525888609, + "grad_norm": 0.2583512286228744, + "learning_rate": 6.005463152250645e-06, + "loss": 0.4468, + "step": 9115 + }, + { + "epoch": 2.551357402742793, + "grad_norm": 0.25152119278179613, + "learning_rate": 5.998135335300231e-06, + "loss": 0.4318, + "step": 9116 + }, + { + "epoch": 2.551637279596977, + "grad_norm": 0.250313446831586, + "learning_rate": 5.990811706471927e-06, + "loss": 0.4546, + "step": 9117 + }, + { + "epoch": 2.5519171564511614, + "grad_norm": 0.2534164940441637, + "learning_rate": 5.983492266462798e-06, + "loss": 0.4237, + "step": 9118 + }, + { + "epoch": 2.5521970333053456, + "grad_norm": 0.2558220303793233, + "learning_rate": 5.976177015969514e-06, + "loss": 0.4629, + "step": 9119 + }, + { + "epoch": 2.55247691015953, + "grad_norm": 0.2676523221666284, + "learning_rate": 5.968865955688352e-06, + "loss": 0.4678, + "step": 9120 + }, + { + "epoch": 2.5527567870137142, + "grad_norm": 0.26340062183579227, + "learning_rate": 5.9615590863151495e-06, + "loss": 0.482, + "step": 9121 + }, + { + "epoch": 2.553036663867898, + "grad_norm": 0.2613714215157316, + "learning_rate": 5.9542564085454165e-06, + "loss": 0.4677, + "step": 9122 + }, + { + "epoch": 2.5533165407220824, + "grad_norm": 0.25983017898091737, + "learning_rate": 5.946957923074209e-06, + "loss": 0.4613, + "step": 9123 + }, + { + "epoch": 2.5535964175762667, + "grad_norm": 0.24451686698187075, + "learning_rate": 5.939663630596209e-06, + "loss": 0.4591, + "step": 9124 + }, + { + "epoch": 2.5538762944304505, + "grad_norm": 0.24427666017394079, + "learning_rate": 5.932373531805685e-06, + "loss": 0.4518, + "step": 9125 + }, + { + "epoch": 2.554156171284635, + "grad_norm": 0.2456198460591965, + "learning_rate": 5.925087627396508e-06, + "loss": 0.4607, + "step": 9126 + }, + { + "epoch": 2.554436048138819, + "grad_norm": 0.25358946251252196, + "learning_rate": 5.917805918062169e-06, + "loss": 0.4435, + "step": 9127 + }, + { + "epoch": 2.554715924993003, + "grad_norm": 0.2478587311658339, + "learning_rate": 5.91052840449573e-06, + "loss": 0.4436, + "step": 9128 + }, + { + "epoch": 2.554995801847187, + "grad_norm": 0.2526430282536233, + "learning_rate": 5.903255087389881e-06, + "loss": 0.4382, + "step": 9129 + }, + { + "epoch": 2.5552756787013715, + "grad_norm": 0.2599604379789864, + "learning_rate": 5.89598596743689e-06, + "loss": 0.4713, + "step": 9130 + }, + { + "epoch": 2.5555555555555554, + "grad_norm": 0.26059258912979244, + "learning_rate": 5.888721045328644e-06, + "loss": 0.4607, + "step": 9131 + }, + { + "epoch": 2.5558354324097396, + "grad_norm": 0.256164164169617, + "learning_rate": 5.8814603217566155e-06, + "loss": 0.4543, + "step": 9132 + }, + { + "epoch": 2.556115309263924, + "grad_norm": 0.2495455717137147, + "learning_rate": 5.874203797411887e-06, + "loss": 0.4571, + "step": 9133 + }, + { + "epoch": 2.556395186118108, + "grad_norm": 0.25740927083003284, + "learning_rate": 5.866951472985143e-06, + "loss": 0.4425, + "step": 9134 + }, + { + "epoch": 2.556675062972292, + "grad_norm": 0.2542573316512893, + "learning_rate": 5.859703349166662e-06, + "loss": 0.4586, + "step": 9135 + }, + { + "epoch": 2.5569549398264764, + "grad_norm": 0.262186312086353, + "learning_rate": 5.852459426646317e-06, + "loss": 0.4867, + "step": 9136 + }, + { + "epoch": 2.5572348166806607, + "grad_norm": 0.26371603263245813, + "learning_rate": 5.845219706113597e-06, + "loss": 0.4626, + "step": 9137 + }, + { + "epoch": 2.5575146935348445, + "grad_norm": 0.2509764711358416, + "learning_rate": 5.837984188257583e-06, + "loss": 0.4447, + "step": 9138 + }, + { + "epoch": 2.557794570389029, + "grad_norm": 0.2579467960049227, + "learning_rate": 5.830752873766948e-06, + "loss": 0.4457, + "step": 9139 + }, + { + "epoch": 2.558074447243213, + "grad_norm": 0.25286430913608743, + "learning_rate": 5.823525763329979e-06, + "loss": 0.4438, + "step": 9140 + }, + { + "epoch": 2.5583543240973974, + "grad_norm": 0.263430597262002, + "learning_rate": 5.816302857634553e-06, + "loss": 0.4566, + "step": 9141 + }, + { + "epoch": 2.5586342009515812, + "grad_norm": 0.25998784760305815, + "learning_rate": 5.809084157368155e-06, + "loss": 0.4824, + "step": 9142 + }, + { + "epoch": 2.5589140778057655, + "grad_norm": 0.25757352165151176, + "learning_rate": 5.801869663217857e-06, + "loss": 0.4561, + "step": 9143 + }, + { + "epoch": 2.55919395465995, + "grad_norm": 0.2611364733536436, + "learning_rate": 5.794659375870348e-06, + "loss": 0.4598, + "step": 9144 + }, + { + "epoch": 2.5594738315141337, + "grad_norm": 0.24673739305454123, + "learning_rate": 5.787453296011902e-06, + "loss": 0.4472, + "step": 9145 + }, + { + "epoch": 2.559753708368318, + "grad_norm": 0.2525542203161026, + "learning_rate": 5.7802514243283925e-06, + "loss": 0.4772, + "step": 9146 + }, + { + "epoch": 2.5600335852225022, + "grad_norm": 0.24610967119593202, + "learning_rate": 5.7730537615053095e-06, + "loss": 0.4534, + "step": 9147 + }, + { + "epoch": 2.560313462076686, + "grad_norm": 0.2508092311732347, + "learning_rate": 5.765860308227722e-06, + "loss": 0.4444, + "step": 9148 + }, + { + "epoch": 2.5605933389308704, + "grad_norm": 0.25882638519393514, + "learning_rate": 5.75867106518031e-06, + "loss": 0.4638, + "step": 9149 + }, + { + "epoch": 2.5608732157850547, + "grad_norm": 0.25450553546604476, + "learning_rate": 5.751486033047349e-06, + "loss": 0.4339, + "step": 9150 + }, + { + "epoch": 2.5611530926392385, + "grad_norm": 0.2669342479300081, + "learning_rate": 5.7443052125127125e-06, + "loss": 0.4525, + "step": 9151 + }, + { + "epoch": 2.561432969493423, + "grad_norm": 0.25777195645807727, + "learning_rate": 5.737128604259878e-06, + "loss": 0.4499, + "step": 9152 + }, + { + "epoch": 2.561712846347607, + "grad_norm": 0.2597046531032639, + "learning_rate": 5.729956208971915e-06, + "loss": 0.4729, + "step": 9153 + }, + { + "epoch": 2.561992723201791, + "grad_norm": 0.2614699484353166, + "learning_rate": 5.7227880273315045e-06, + "loss": 0.4684, + "step": 9154 + }, + { + "epoch": 2.5622726000559752, + "grad_norm": 0.2501052573574934, + "learning_rate": 5.715624060020908e-06, + "loss": 0.4442, + "step": 9155 + }, + { + "epoch": 2.5625524769101595, + "grad_norm": 0.2520769280854139, + "learning_rate": 5.708464307722006e-06, + "loss": 0.4622, + "step": 9156 + }, + { + "epoch": 2.562832353764344, + "grad_norm": 0.2547266031794907, + "learning_rate": 5.701308771116254e-06, + "loss": 0.4521, + "step": 9157 + }, + { + "epoch": 2.563112230618528, + "grad_norm": 0.2563651052950184, + "learning_rate": 5.694157450884735e-06, + "loss": 0.4519, + "step": 9158 + }, + { + "epoch": 2.563392107472712, + "grad_norm": 0.25744604735413434, + "learning_rate": 5.687010347708105e-06, + "loss": 0.4623, + "step": 9159 + }, + { + "epoch": 2.5636719843268962, + "grad_norm": 0.25466997886611975, + "learning_rate": 5.679867462266636e-06, + "loss": 0.4548, + "step": 9160 + }, + { + "epoch": 2.5639518611810805, + "grad_norm": 0.23892686124911805, + "learning_rate": 5.67272879524019e-06, + "loss": 0.435, + "step": 9161 + }, + { + "epoch": 2.5642317380352644, + "grad_norm": 0.25698434922165514, + "learning_rate": 5.665594347308229e-06, + "loss": 0.4459, + "step": 9162 + }, + { + "epoch": 2.5645116148894487, + "grad_norm": 0.257195515088587, + "learning_rate": 5.658464119149809e-06, + "loss": 0.4641, + "step": 9163 + }, + { + "epoch": 2.564791491743633, + "grad_norm": 0.25358151850238736, + "learning_rate": 5.651338111443594e-06, + "loss": 0.4464, + "step": 9164 + }, + { + "epoch": 2.565071368597817, + "grad_norm": 0.2572662942917605, + "learning_rate": 5.644216324867841e-06, + "loss": 0.4566, + "step": 9165 + }, + { + "epoch": 2.565351245452001, + "grad_norm": 0.2558001665540111, + "learning_rate": 5.637098760100407e-06, + "loss": 0.4704, + "step": 9166 + }, + { + "epoch": 2.5656311223061854, + "grad_norm": 0.24883429377142569, + "learning_rate": 5.62998541781874e-06, + "loss": 0.446, + "step": 9167 + }, + { + "epoch": 2.5659109991603692, + "grad_norm": 0.26564797320747585, + "learning_rate": 5.622876298699898e-06, + "loss": 0.4608, + "step": 9168 + }, + { + "epoch": 2.5661908760145535, + "grad_norm": 0.2411278117744789, + "learning_rate": 5.6157714034205334e-06, + "loss": 0.4393, + "step": 9169 + }, + { + "epoch": 2.566470752868738, + "grad_norm": 0.25815355494097664, + "learning_rate": 5.6086707326568845e-06, + "loss": 0.4269, + "step": 9170 + }, + { + "epoch": 2.5667506297229217, + "grad_norm": 0.2616000562617375, + "learning_rate": 5.6015742870847985e-06, + "loss": 0.4481, + "step": 9171 + }, + { + "epoch": 2.567030506577106, + "grad_norm": 0.255265647625433, + "learning_rate": 5.594482067379731e-06, + "loss": 0.4694, + "step": 9172 + }, + { + "epoch": 2.5673103834312903, + "grad_norm": 0.25239103218717907, + "learning_rate": 5.587394074216712e-06, + "loss": 0.4394, + "step": 9173 + }, + { + "epoch": 2.5675902602854745, + "grad_norm": 0.25854733542183617, + "learning_rate": 5.580310308270381e-06, + "loss": 0.4495, + "step": 9174 + }, + { + "epoch": 2.5678701371396584, + "grad_norm": 0.24697676977817049, + "learning_rate": 5.573230770214982e-06, + "loss": 0.4315, + "step": 9175 + }, + { + "epoch": 2.5681500139938427, + "grad_norm": 0.26384154704967105, + "learning_rate": 5.566155460724343e-06, + "loss": 0.4489, + "step": 9176 + }, + { + "epoch": 2.568429890848027, + "grad_norm": 0.24507750117012964, + "learning_rate": 5.559084380471896e-06, + "loss": 0.4517, + "step": 9177 + }, + { + "epoch": 2.5687097677022113, + "grad_norm": 0.27425588684557417, + "learning_rate": 5.552017530130676e-06, + "loss": 0.4512, + "step": 9178 + }, + { + "epoch": 2.568989644556395, + "grad_norm": 0.2488587339406811, + "learning_rate": 5.5449549103733065e-06, + "loss": 0.4371, + "step": 9179 + }, + { + "epoch": 2.5692695214105794, + "grad_norm": 0.25506453667985635, + "learning_rate": 5.537896521872005e-06, + "loss": 0.4365, + "step": 9180 + }, + { + "epoch": 2.5695493982647637, + "grad_norm": 0.2656730131529827, + "learning_rate": 5.530842365298605e-06, + "loss": 0.468, + "step": 9181 + }, + { + "epoch": 2.5698292751189475, + "grad_norm": 0.25973678949507073, + "learning_rate": 5.523792441324516e-06, + "loss": 0.457, + "step": 9182 + }, + { + "epoch": 2.570109151973132, + "grad_norm": 0.24539048819265274, + "learning_rate": 5.51674675062076e-06, + "loss": 0.4769, + "step": 9183 + }, + { + "epoch": 2.570389028827316, + "grad_norm": 0.2531422133017602, + "learning_rate": 5.509705293857947e-06, + "loss": 0.4574, + "step": 9184 + }, + { + "epoch": 2.5706689056815, + "grad_norm": 0.2548109395272116, + "learning_rate": 5.5026680717062855e-06, + "loss": 0.4543, + "step": 9185 + }, + { + "epoch": 2.5709487825356843, + "grad_norm": 0.25359074586043096, + "learning_rate": 5.495635084835582e-06, + "loss": 0.4467, + "step": 9186 + }, + { + "epoch": 2.5712286593898686, + "grad_norm": 0.2617761868678252, + "learning_rate": 5.488606333915236e-06, + "loss": 0.4542, + "step": 9187 + }, + { + "epoch": 2.5715085362440524, + "grad_norm": 0.25215950198858933, + "learning_rate": 5.481581819614262e-06, + "loss": 0.4571, + "step": 9188 + }, + { + "epoch": 2.5717884130982367, + "grad_norm": 0.2538088206469343, + "learning_rate": 5.474561542601242e-06, + "loss": 0.449, + "step": 9189 + }, + { + "epoch": 2.572068289952421, + "grad_norm": 0.2603239345951513, + "learning_rate": 5.46754550354438e-06, + "loss": 0.4461, + "step": 9190 + }, + { + "epoch": 2.572348166806605, + "grad_norm": 0.24946940193842998, + "learning_rate": 5.460533703111465e-06, + "loss": 0.4677, + "step": 9191 + }, + { + "epoch": 2.572628043660789, + "grad_norm": 0.2461630152636201, + "learning_rate": 5.453526141969878e-06, + "loss": 0.4672, + "step": 9192 + }, + { + "epoch": 2.5729079205149734, + "grad_norm": 0.25407980345989045, + "learning_rate": 5.4465228207866095e-06, + "loss": 0.4551, + "step": 9193 + }, + { + "epoch": 2.5731877973691577, + "grad_norm": 0.256288476843359, + "learning_rate": 5.439523740228236e-06, + "loss": 0.4406, + "step": 9194 + }, + { + "epoch": 2.573467674223342, + "grad_norm": 0.25172429485079223, + "learning_rate": 5.432528900960931e-06, + "loss": 0.4613, + "step": 9195 + }, + { + "epoch": 2.573747551077526, + "grad_norm": 0.2459655324435995, + "learning_rate": 5.4255383036504736e-06, + "loss": 0.4335, + "step": 9196 + }, + { + "epoch": 2.57402742793171, + "grad_norm": 0.2463592720999968, + "learning_rate": 5.41855194896223e-06, + "loss": 0.455, + "step": 9197 + }, + { + "epoch": 2.5743073047858944, + "grad_norm": 0.2599890636185018, + "learning_rate": 5.411569837561164e-06, + "loss": 0.4407, + "step": 9198 + }, + { + "epoch": 2.5745871816400783, + "grad_norm": 0.2697605036552731, + "learning_rate": 5.404591970111838e-06, + "loss": 0.4578, + "step": 9199 + }, + { + "epoch": 2.5748670584942626, + "grad_norm": 0.25532802799904475, + "learning_rate": 5.39761834727841e-06, + "loss": 0.4456, + "step": 9200 + }, + { + "epoch": 2.575146935348447, + "grad_norm": 0.2604318981372324, + "learning_rate": 5.390648969724632e-06, + "loss": 0.4699, + "step": 9201 + }, + { + "epoch": 2.5754268122026307, + "grad_norm": 0.24311926826298216, + "learning_rate": 5.383683838113856e-06, + "loss": 0.4582, + "step": 9202 + }, + { + "epoch": 2.575706689056815, + "grad_norm": 0.25901859166320634, + "learning_rate": 5.376722953109026e-06, + "loss": 0.4402, + "step": 9203 + }, + { + "epoch": 2.5759865659109993, + "grad_norm": 0.23953969446327295, + "learning_rate": 5.369766315372676e-06, + "loss": 0.4295, + "step": 9204 + }, + { + "epoch": 2.576266442765183, + "grad_norm": 0.25651576602281145, + "learning_rate": 5.3628139255669566e-06, + "loss": 0.4586, + "step": 9205 + }, + { + "epoch": 2.5765463196193674, + "grad_norm": 0.24919727706103317, + "learning_rate": 5.3558657843535865e-06, + "loss": 0.458, + "step": 9206 + }, + { + "epoch": 2.5768261964735517, + "grad_norm": 0.25696382072887175, + "learning_rate": 5.348921892393904e-06, + "loss": 0.4213, + "step": 9207 + }, + { + "epoch": 2.5771060733277356, + "grad_norm": 0.2553158735417908, + "learning_rate": 5.3419822503488256e-06, + "loss": 0.4495, + "step": 9208 + }, + { + "epoch": 2.57738595018192, + "grad_norm": 0.25377523123978984, + "learning_rate": 5.335046858878873e-06, + "loss": 0.454, + "step": 9209 + }, + { + "epoch": 2.577665827036104, + "grad_norm": 0.2457531256711823, + "learning_rate": 5.328115718644161e-06, + "loss": 0.4465, + "step": 9210 + }, + { + "epoch": 2.5779457038902884, + "grad_norm": 0.2612425526475498, + "learning_rate": 5.321188830304397e-06, + "loss": 0.4695, + "step": 9211 + }, + { + "epoch": 2.5782255807444723, + "grad_norm": 0.2553886435888375, + "learning_rate": 5.314266194518886e-06, + "loss": 0.4415, + "step": 9212 + }, + { + "epoch": 2.5785054575986566, + "grad_norm": 0.247513891804963, + "learning_rate": 5.307347811946534e-06, + "loss": 0.4416, + "step": 9213 + }, + { + "epoch": 2.578785334452841, + "grad_norm": 0.25266220607001677, + "learning_rate": 5.300433683245831e-06, + "loss": 0.4461, + "step": 9214 + }, + { + "epoch": 2.579065211307025, + "grad_norm": 0.24858735261916182, + "learning_rate": 5.293523809074874e-06, + "loss": 0.4606, + "step": 9215 + }, + { + "epoch": 2.579345088161209, + "grad_norm": 0.2558186712920539, + "learning_rate": 5.286618190091341e-06, + "loss": 0.464, + "step": 9216 + }, + { + "epoch": 2.5796249650153933, + "grad_norm": 0.25630614765529586, + "learning_rate": 5.279716826952513e-06, + "loss": 0.4565, + "step": 9217 + }, + { + "epoch": 2.5799048418695776, + "grad_norm": 0.2488157396102064, + "learning_rate": 5.272819720315275e-06, + "loss": 0.4445, + "step": 9218 + }, + { + "epoch": 2.5801847187237614, + "grad_norm": 0.254845405717992, + "learning_rate": 5.265926870836085e-06, + "loss": 0.4567, + "step": 9219 + }, + { + "epoch": 2.5804645955779457, + "grad_norm": 0.2442042932718986, + "learning_rate": 5.259038279171014e-06, + "loss": 0.4496, + "step": 9220 + }, + { + "epoch": 2.58074447243213, + "grad_norm": 0.2583695399185578, + "learning_rate": 5.252153945975724e-06, + "loss": 0.4357, + "step": 9221 + }, + { + "epoch": 2.581024349286314, + "grad_norm": 0.2599667365110196, + "learning_rate": 5.245273871905471e-06, + "loss": 0.4552, + "step": 9222 + }, + { + "epoch": 2.581304226140498, + "grad_norm": 0.24578533495555563, + "learning_rate": 5.2383980576150956e-06, + "loss": 0.4342, + "step": 9223 + }, + { + "epoch": 2.5815841029946824, + "grad_norm": 0.25983770635532155, + "learning_rate": 5.231526503759054e-06, + "loss": 0.4541, + "step": 9224 + }, + { + "epoch": 2.5818639798488663, + "grad_norm": 0.25978791332708273, + "learning_rate": 5.224659210991373e-06, + "loss": 0.4513, + "step": 9225 + }, + { + "epoch": 2.5821438567030506, + "grad_norm": 0.25163242302701827, + "learning_rate": 5.21779617996569e-06, + "loss": 0.4405, + "step": 9226 + }, + { + "epoch": 2.582423733557235, + "grad_norm": 0.2543188791677801, + "learning_rate": 5.210937411335237e-06, + "loss": 0.4565, + "step": 9227 + }, + { + "epoch": 2.5827036104114187, + "grad_norm": 0.25620720936735775, + "learning_rate": 5.204082905752822e-06, + "loss": 0.4559, + "step": 9228 + }, + { + "epoch": 2.582983487265603, + "grad_norm": 0.2550947738601276, + "learning_rate": 5.1972326638708765e-06, + "loss": 0.4364, + "step": 9229 + }, + { + "epoch": 2.5832633641197873, + "grad_norm": 0.24627397104248913, + "learning_rate": 5.190386686341403e-06, + "loss": 0.4428, + "step": 9230 + }, + { + "epoch": 2.5835432409739716, + "grad_norm": 0.2590679580610083, + "learning_rate": 5.183544973816001e-06, + "loss": 0.4621, + "step": 9231 + }, + { + "epoch": 2.583823117828156, + "grad_norm": 0.25292247781032845, + "learning_rate": 5.176707526945879e-06, + "loss": 0.4679, + "step": 9232 + }, + { + "epoch": 2.5841029946823397, + "grad_norm": 0.27356158880275805, + "learning_rate": 5.16987434638182e-06, + "loss": 0.4841, + "step": 9233 + }, + { + "epoch": 2.584382871536524, + "grad_norm": 0.2490183167988874, + "learning_rate": 5.163045432774211e-06, + "loss": 0.4343, + "step": 9234 + }, + { + "epoch": 2.5846627483907083, + "grad_norm": 0.24613135330540556, + "learning_rate": 5.156220786773042e-06, + "loss": 0.4238, + "step": 9235 + }, + { + "epoch": 2.584942625244892, + "grad_norm": 0.25588783561535067, + "learning_rate": 5.149400409027871e-06, + "loss": 0.4785, + "step": 9236 + }, + { + "epoch": 2.5852225020990764, + "grad_norm": 0.24552521028453203, + "learning_rate": 5.142584300187875e-06, + "loss": 0.4395, + "step": 9237 + }, + { + "epoch": 2.5855023789532607, + "grad_norm": 0.2543110945501354, + "learning_rate": 5.135772460901816e-06, + "loss": 0.4766, + "step": 9238 + }, + { + "epoch": 2.5857822558074446, + "grad_norm": 0.2527108255469426, + "learning_rate": 5.128964891818039e-06, + "loss": 0.4556, + "step": 9239 + }, + { + "epoch": 2.586062132661629, + "grad_norm": 0.24839543549521476, + "learning_rate": 5.122161593584507e-06, + "loss": 0.4545, + "step": 9240 + }, + { + "epoch": 2.586342009515813, + "grad_norm": 0.2593936213171141, + "learning_rate": 5.115362566848747e-06, + "loss": 0.4625, + "step": 9241 + }, + { + "epoch": 2.586621886369997, + "grad_norm": 0.2517068631168117, + "learning_rate": 5.108567812257908e-06, + "loss": 0.4543, + "step": 9242 + }, + { + "epoch": 2.5869017632241813, + "grad_norm": 0.2539529424820145, + "learning_rate": 5.101777330458707e-06, + "loss": 0.4539, + "step": 9243 + }, + { + "epoch": 2.5871816400783656, + "grad_norm": 0.28103238452968893, + "learning_rate": 5.09499112209747e-06, + "loss": 0.4523, + "step": 9244 + }, + { + "epoch": 2.5874615169325494, + "grad_norm": 0.25606370897440894, + "learning_rate": 5.0882091878201145e-06, + "loss": 0.4526, + "step": 9245 + }, + { + "epoch": 2.5877413937867337, + "grad_norm": 0.24275143396759824, + "learning_rate": 5.081431528272146e-06, + "loss": 0.4337, + "step": 9246 + }, + { + "epoch": 2.588021270640918, + "grad_norm": 0.24786400825264993, + "learning_rate": 5.0746581440986655e-06, + "loss": 0.4371, + "step": 9247 + }, + { + "epoch": 2.5883011474951023, + "grad_norm": 0.2569107452959841, + "learning_rate": 5.0678890359443676e-06, + "loss": 0.4462, + "step": 9248 + }, + { + "epoch": 2.588581024349286, + "grad_norm": 0.24840227265534442, + "learning_rate": 5.061124204453544e-06, + "loss": 0.4416, + "step": 9249 + }, + { + "epoch": 2.5888609012034705, + "grad_norm": 0.2475647497707761, + "learning_rate": 5.054363650270072e-06, + "loss": 0.4493, + "step": 9250 + }, + { + "epoch": 2.5891407780576547, + "grad_norm": 0.2611274030792405, + "learning_rate": 5.0476073740374215e-06, + "loss": 0.452, + "step": 9251 + }, + { + "epoch": 2.589420654911839, + "grad_norm": 0.25050214858935405, + "learning_rate": 5.040855376398662e-06, + "loss": 0.4479, + "step": 9252 + }, + { + "epoch": 2.589700531766023, + "grad_norm": 0.2508115086874871, + "learning_rate": 5.034107657996456e-06, + "loss": 0.4429, + "step": 9253 + }, + { + "epoch": 2.589980408620207, + "grad_norm": 0.25708179010064847, + "learning_rate": 5.027364219473052e-06, + "loss": 0.4506, + "step": 9254 + }, + { + "epoch": 2.5902602854743915, + "grad_norm": 0.25173750004236733, + "learning_rate": 5.020625061470291e-06, + "loss": 0.4486, + "step": 9255 + }, + { + "epoch": 2.5905401623285753, + "grad_norm": 0.2512137293591381, + "learning_rate": 5.013890184629616e-06, + "loss": 0.4252, + "step": 9256 + }, + { + "epoch": 2.5908200391827596, + "grad_norm": 0.2543750373430273, + "learning_rate": 5.007159589592047e-06, + "loss": 0.4574, + "step": 9257 + }, + { + "epoch": 2.591099916036944, + "grad_norm": 0.2472024703921494, + "learning_rate": 5.000433276998218e-06, + "loss": 0.4349, + "step": 9258 + }, + { + "epoch": 2.5913797928911277, + "grad_norm": 0.2409312346089113, + "learning_rate": 4.9937112474883365e-06, + "loss": 0.4545, + "step": 9259 + }, + { + "epoch": 2.591659669745312, + "grad_norm": 0.26704917029047187, + "learning_rate": 4.986993501702209e-06, + "loss": 0.4465, + "step": 9260 + }, + { + "epoch": 2.5919395465994963, + "grad_norm": 0.2525748147837304, + "learning_rate": 4.980280040279229e-06, + "loss": 0.4486, + "step": 9261 + }, + { + "epoch": 2.59221942345368, + "grad_norm": 0.23942735513286054, + "learning_rate": 4.973570863858401e-06, + "loss": 0.4473, + "step": 9262 + }, + { + "epoch": 2.5924993003078645, + "grad_norm": 0.27030908353186944, + "learning_rate": 4.9668659730783e-06, + "loss": 0.4768, + "step": 9263 + }, + { + "epoch": 2.5927791771620488, + "grad_norm": 0.2512822128836338, + "learning_rate": 4.960165368577096e-06, + "loss": 0.426, + "step": 9264 + }, + { + "epoch": 2.5930590540162326, + "grad_norm": 0.2603896442018894, + "learning_rate": 4.953469050992565e-06, + "loss": 0.4469, + "step": 9265 + }, + { + "epoch": 2.593338930870417, + "grad_norm": 0.25378765872640163, + "learning_rate": 4.9467770209620675e-06, + "loss": 0.4566, + "step": 9266 + }, + { + "epoch": 2.593618807724601, + "grad_norm": 0.25966225841536006, + "learning_rate": 4.9400892791225454e-06, + "loss": 0.476, + "step": 9267 + }, + { + "epoch": 2.5938986845787855, + "grad_norm": 0.25115292088678226, + "learning_rate": 4.933405826110549e-06, + "loss": 0.4391, + "step": 9268 + }, + { + "epoch": 2.5941785614329698, + "grad_norm": 0.25405678616581745, + "learning_rate": 4.926726662562209e-06, + "loss": 0.47, + "step": 9269 + }, + { + "epoch": 2.5944584382871536, + "grad_norm": 0.2480627202841323, + "learning_rate": 4.920051789113256e-06, + "loss": 0.4476, + "step": 9270 + }, + { + "epoch": 2.594738315141338, + "grad_norm": 0.24902788193748368, + "learning_rate": 4.913381206399003e-06, + "loss": 0.459, + "step": 9271 + }, + { + "epoch": 2.595018191995522, + "grad_norm": 0.2526288474786995, + "learning_rate": 4.906714915054367e-06, + "loss": 0.4462, + "step": 9272 + }, + { + "epoch": 2.595298068849706, + "grad_norm": 0.24683415975415668, + "learning_rate": 4.900052915713843e-06, + "loss": 0.4354, + "step": 9273 + }, + { + "epoch": 2.5955779457038903, + "grad_norm": 0.26488285631448083, + "learning_rate": 4.893395209011531e-06, + "loss": 0.4635, + "step": 9274 + }, + { + "epoch": 2.5958578225580746, + "grad_norm": 0.25325409943157845, + "learning_rate": 4.886741795581101e-06, + "loss": 0.4658, + "step": 9275 + }, + { + "epoch": 2.5961376994122585, + "grad_norm": 0.26043278499621375, + "learning_rate": 4.880092676055848e-06, + "loss": 0.4406, + "step": 9276 + }, + { + "epoch": 2.5964175762664428, + "grad_norm": 0.269277412353307, + "learning_rate": 4.873447851068619e-06, + "loss": 0.5047, + "step": 9277 + }, + { + "epoch": 2.596697453120627, + "grad_norm": 0.2620382040003155, + "learning_rate": 4.866807321251888e-06, + "loss": 0.466, + "step": 9278 + }, + { + "epoch": 2.596977329974811, + "grad_norm": 0.25525522928605204, + "learning_rate": 4.860171087237697e-06, + "loss": 0.4333, + "step": 9279 + }, + { + "epoch": 2.597257206828995, + "grad_norm": 0.24995066162807658, + "learning_rate": 4.853539149657688e-06, + "loss": 0.432, + "step": 9280 + }, + { + "epoch": 2.5975370836831795, + "grad_norm": 0.2574171365789426, + "learning_rate": 4.846911509143082e-06, + "loss": 0.4573, + "step": 9281 + }, + { + "epoch": 2.5978169605373633, + "grad_norm": 0.26075742452234985, + "learning_rate": 4.840288166324724e-06, + "loss": 0.4543, + "step": 9282 + }, + { + "epoch": 2.5980968373915476, + "grad_norm": 0.2569715760319337, + "learning_rate": 4.833669121833023e-06, + "loss": 0.4408, + "step": 9283 + }, + { + "epoch": 2.598376714245732, + "grad_norm": 0.2484605217426453, + "learning_rate": 4.827054376297963e-06, + "loss": 0.4331, + "step": 9284 + }, + { + "epoch": 2.598656591099916, + "grad_norm": 0.24905568287189572, + "learning_rate": 4.820443930349156e-06, + "loss": 0.4476, + "step": 9285 + }, + { + "epoch": 2.5989364679541, + "grad_norm": 0.2590631511587284, + "learning_rate": 4.813837784615782e-06, + "loss": 0.4304, + "step": 9286 + }, + { + "epoch": 2.5992163448082843, + "grad_norm": 0.25008885615691817, + "learning_rate": 4.807235939726617e-06, + "loss": 0.4406, + "step": 9287 + }, + { + "epoch": 2.5994962216624686, + "grad_norm": 0.267542945204892, + "learning_rate": 4.800638396310036e-06, + "loss": 0.4563, + "step": 9288 + }, + { + "epoch": 2.599776098516653, + "grad_norm": 0.25736597982979836, + "learning_rate": 4.794045154993993e-06, + "loss": 0.4474, + "step": 9289 + }, + { + "epoch": 2.6000559753708368, + "grad_norm": 0.24746449651076893, + "learning_rate": 4.787456216406028e-06, + "loss": 0.4475, + "step": 9290 + }, + { + "epoch": 2.600335852225021, + "grad_norm": 0.2453563596198923, + "learning_rate": 4.780871581173291e-06, + "loss": 0.4207, + "step": 9291 + }, + { + "epoch": 2.6006157290792054, + "grad_norm": 0.26032544087459286, + "learning_rate": 4.774291249922508e-06, + "loss": 0.4709, + "step": 9292 + }, + { + "epoch": 2.600895605933389, + "grad_norm": 0.2580696455857522, + "learning_rate": 4.767715223279995e-06, + "loss": 0.453, + "step": 9293 + }, + { + "epoch": 2.6011754827875735, + "grad_norm": 0.25871962509925484, + "learning_rate": 4.761143501871667e-06, + "loss": 0.4555, + "step": 9294 + }, + { + "epoch": 2.601455359641758, + "grad_norm": 0.2749257480568536, + "learning_rate": 4.75457608632302e-06, + "loss": 0.4566, + "step": 9295 + }, + { + "epoch": 2.6017352364959416, + "grad_norm": 0.2532048996850684, + "learning_rate": 4.748012977259147e-06, + "loss": 0.4384, + "step": 9296 + }, + { + "epoch": 2.602015113350126, + "grad_norm": 0.26328460933205455, + "learning_rate": 4.741454175304727e-06, + "loss": 0.4614, + "step": 9297 + }, + { + "epoch": 2.60229499020431, + "grad_norm": 0.2664634685783237, + "learning_rate": 4.734899681084021e-06, + "loss": 0.4457, + "step": 9298 + }, + { + "epoch": 2.602574867058494, + "grad_norm": 0.25823670038817764, + "learning_rate": 4.728349495220908e-06, + "loss": 0.4432, + "step": 9299 + }, + { + "epoch": 2.6028547439126783, + "grad_norm": 0.2440733968119489, + "learning_rate": 4.721803618338832e-06, + "loss": 0.4337, + "step": 9300 + }, + { + "epoch": 2.6031346207668626, + "grad_norm": 0.26225762731850727, + "learning_rate": 4.7152620510608424e-06, + "loss": 0.4775, + "step": 9301 + }, + { + "epoch": 2.6034144976210465, + "grad_norm": 0.24616384891613666, + "learning_rate": 4.708724794009545e-06, + "loss": 0.4524, + "step": 9302 + }, + { + "epoch": 2.6036943744752308, + "grad_norm": 0.24668665097430656, + "learning_rate": 4.702191847807169e-06, + "loss": 0.445, + "step": 9303 + }, + { + "epoch": 2.603974251329415, + "grad_norm": 0.2597920558729576, + "learning_rate": 4.695663213075535e-06, + "loss": 0.4601, + "step": 9304 + }, + { + "epoch": 2.6042541281835994, + "grad_norm": 0.24607487003505218, + "learning_rate": 4.689138890436029e-06, + "loss": 0.4604, + "step": 9305 + }, + { + "epoch": 2.6045340050377837, + "grad_norm": 0.2600636397934303, + "learning_rate": 4.6826188805096484e-06, + "loss": 0.4569, + "step": 9306 + }, + { + "epoch": 2.6048138818919675, + "grad_norm": 0.2459774794523608, + "learning_rate": 4.676103183916963e-06, + "loss": 0.4312, + "step": 9307 + }, + { + "epoch": 2.605093758746152, + "grad_norm": 0.2469395153022534, + "learning_rate": 4.669591801278151e-06, + "loss": 0.4445, + "step": 9308 + }, + { + "epoch": 2.605373635600336, + "grad_norm": 0.2523058020611295, + "learning_rate": 4.6630847332129575e-06, + "loss": 0.4723, + "step": 9309 + }, + { + "epoch": 2.60565351245452, + "grad_norm": 0.24619581804301602, + "learning_rate": 4.656581980340741e-06, + "loss": 0.4586, + "step": 9310 + }, + { + "epoch": 2.605933389308704, + "grad_norm": 0.25924161343200974, + "learning_rate": 4.650083543280431e-06, + "loss": 0.4717, + "step": 9311 + }, + { + "epoch": 2.6062132661628885, + "grad_norm": 0.2531297057253927, + "learning_rate": 4.643589422650552e-06, + "loss": 0.4374, + "step": 9312 + }, + { + "epoch": 2.6064931430170724, + "grad_norm": 0.24542663509039195, + "learning_rate": 4.637099619069213e-06, + "loss": 0.4564, + "step": 9313 + }, + { + "epoch": 2.6067730198712566, + "grad_norm": 0.2557134244304275, + "learning_rate": 4.630614133154132e-06, + "loss": 0.4558, + "step": 9314 + }, + { + "epoch": 2.607052896725441, + "grad_norm": 0.2658720097253333, + "learning_rate": 4.6241329655225875e-06, + "loss": 0.4734, + "step": 9315 + }, + { + "epoch": 2.607332773579625, + "grad_norm": 0.2607839304856215, + "learning_rate": 4.617656116791458e-06, + "loss": 0.4418, + "step": 9316 + }, + { + "epoch": 2.607612650433809, + "grad_norm": 0.2542006757984473, + "learning_rate": 4.611183587577228e-06, + "loss": 0.4478, + "step": 9317 + }, + { + "epoch": 2.6078925272879934, + "grad_norm": 0.26790537167162126, + "learning_rate": 4.6047153784959495e-06, + "loss": 0.4781, + "step": 9318 + }, + { + "epoch": 2.608172404142177, + "grad_norm": 0.25588698574798024, + "learning_rate": 4.59825149016328e-06, + "loss": 0.4895, + "step": 9319 + }, + { + "epoch": 2.6084522809963615, + "grad_norm": 0.2552989315535823, + "learning_rate": 4.591791923194438e-06, + "loss": 0.4451, + "step": 9320 + }, + { + "epoch": 2.608732157850546, + "grad_norm": 0.2441500900857033, + "learning_rate": 4.5853366782042555e-06, + "loss": 0.4334, + "step": 9321 + }, + { + "epoch": 2.6090120347047296, + "grad_norm": 0.26821549973815967, + "learning_rate": 4.57888575580715e-06, + "loss": 0.4662, + "step": 9322 + }, + { + "epoch": 2.609291911558914, + "grad_norm": 0.26360346162674064, + "learning_rate": 4.57243915661712e-06, + "loss": 0.4493, + "step": 9323 + }, + { + "epoch": 2.6095717884130982, + "grad_norm": 0.25924151998896916, + "learning_rate": 4.565996881247758e-06, + "loss": 0.4556, + "step": 9324 + }, + { + "epoch": 2.6098516652672825, + "grad_norm": 0.2604518073930235, + "learning_rate": 4.559558930312241e-06, + "loss": 0.4478, + "step": 9325 + }, + { + "epoch": 2.610131542121467, + "grad_norm": 0.2562775273881299, + "learning_rate": 4.553125304423339e-06, + "loss": 0.449, + "step": 9326 + }, + { + "epoch": 2.6104114189756507, + "grad_norm": 0.26455514338810987, + "learning_rate": 4.546696004193413e-06, + "loss": 0.4771, + "step": 9327 + }, + { + "epoch": 2.610691295829835, + "grad_norm": 0.24071992091921762, + "learning_rate": 4.5402710302344e-06, + "loss": 0.4382, + "step": 9328 + }, + { + "epoch": 2.6109711726840192, + "grad_norm": 0.2560770546418357, + "learning_rate": 4.533850383157834e-06, + "loss": 0.4555, + "step": 9329 + }, + { + "epoch": 2.611251049538203, + "grad_norm": 0.2367427791260587, + "learning_rate": 4.527434063574843e-06, + "loss": 0.4269, + "step": 9330 + }, + { + "epoch": 2.6115309263923874, + "grad_norm": 0.25413566039375374, + "learning_rate": 4.5210220720961205e-06, + "loss": 0.4639, + "step": 9331 + }, + { + "epoch": 2.6118108032465717, + "grad_norm": 0.25336488873316837, + "learning_rate": 4.51461440933198e-06, + "loss": 0.454, + "step": 9332 + }, + { + "epoch": 2.6120906801007555, + "grad_norm": 0.2655120275885839, + "learning_rate": 4.508211075892288e-06, + "loss": 0.457, + "step": 9333 + }, + { + "epoch": 2.61237055695494, + "grad_norm": 0.24853946314736047, + "learning_rate": 4.5018120723865354e-06, + "loss": 0.4475, + "step": 9334 + }, + { + "epoch": 2.612650433809124, + "grad_norm": 0.26015719641746526, + "learning_rate": 4.495417399423779e-06, + "loss": 0.4608, + "step": 9335 + }, + { + "epoch": 2.612930310663308, + "grad_norm": 0.2481190133426938, + "learning_rate": 4.489027057612666e-06, + "loss": 0.4272, + "step": 9336 + }, + { + "epoch": 2.6132101875174922, + "grad_norm": 0.2599020825492872, + "learning_rate": 4.482641047561437e-06, + "loss": 0.4533, + "step": 9337 + }, + { + "epoch": 2.6134900643716765, + "grad_norm": 0.26310922664294667, + "learning_rate": 4.476259369877906e-06, + "loss": 0.4599, + "step": 9338 + }, + { + "epoch": 2.6137699412258604, + "grad_norm": 0.28213603933508197, + "learning_rate": 4.469882025169481e-06, + "loss": 0.4593, + "step": 9339 + }, + { + "epoch": 2.6140498180800447, + "grad_norm": 0.26416135955336856, + "learning_rate": 4.463509014043177e-06, + "loss": 0.4543, + "step": 9340 + }, + { + "epoch": 2.614329694934229, + "grad_norm": 0.25295242555747477, + "learning_rate": 4.45714033710557e-06, + "loss": 0.4582, + "step": 9341 + }, + { + "epoch": 2.6146095717884132, + "grad_norm": 0.25592679760765735, + "learning_rate": 4.450775994962836e-06, + "loss": 0.4866, + "step": 9342 + }, + { + "epoch": 2.614889448642597, + "grad_norm": 0.25470082594219184, + "learning_rate": 4.44441598822074e-06, + "loss": 0.4537, + "step": 9343 + }, + { + "epoch": 2.6151693254967814, + "grad_norm": 0.29443900741901563, + "learning_rate": 4.438060317484627e-06, + "loss": 0.4697, + "step": 9344 + }, + { + "epoch": 2.6154492023509657, + "grad_norm": 0.2631036528520912, + "learning_rate": 4.431708983359434e-06, + "loss": 0.4307, + "step": 9345 + }, + { + "epoch": 2.61572907920515, + "grad_norm": 0.2598629462089242, + "learning_rate": 4.425361986449689e-06, + "loss": 0.4627, + "step": 9346 + }, + { + "epoch": 2.616008956059334, + "grad_norm": 0.25912034443154314, + "learning_rate": 4.419019327359497e-06, + "loss": 0.4711, + "step": 9347 + }, + { + "epoch": 2.616288832913518, + "grad_norm": 0.24671391772179715, + "learning_rate": 4.41268100669256e-06, + "loss": 0.4573, + "step": 9348 + }, + { + "epoch": 2.6165687097677024, + "grad_norm": 0.24826633076239904, + "learning_rate": 4.40634702505216e-06, + "loss": 0.4457, + "step": 9349 + }, + { + "epoch": 2.6168485866218862, + "grad_norm": 0.2684082894019948, + "learning_rate": 4.400017383041161e-06, + "loss": 0.4365, + "step": 9350 + }, + { + "epoch": 2.6171284634760705, + "grad_norm": 0.2622832928493746, + "learning_rate": 4.393692081262035e-06, + "loss": 0.4539, + "step": 9351 + }, + { + "epoch": 2.617408340330255, + "grad_norm": 0.2531070945819376, + "learning_rate": 4.38737112031683e-06, + "loss": 0.4257, + "step": 9352 + }, + { + "epoch": 2.6176882171844387, + "grad_norm": 0.2721155574974308, + "learning_rate": 4.381054500807175e-06, + "loss": 0.4669, + "step": 9353 + }, + { + "epoch": 2.617968094038623, + "grad_norm": 0.24878218136538488, + "learning_rate": 4.3747422233342775e-06, + "loss": 0.4577, + "step": 9354 + }, + { + "epoch": 2.6182479708928073, + "grad_norm": 0.2548314632370883, + "learning_rate": 4.368434288498968e-06, + "loss": 0.4639, + "step": 9355 + }, + { + "epoch": 2.618527847746991, + "grad_norm": 0.2529519792388573, + "learning_rate": 4.362130696901617e-06, + "loss": 0.4363, + "step": 9356 + }, + { + "epoch": 2.6188077246011754, + "grad_norm": 0.2534106158408726, + "learning_rate": 4.355831449142206e-06, + "loss": 0.4521, + "step": 9357 + }, + { + "epoch": 2.6190876014553597, + "grad_norm": 0.2536426424989659, + "learning_rate": 4.349536545820309e-06, + "loss": 0.432, + "step": 9358 + }, + { + "epoch": 2.6193674783095435, + "grad_norm": 0.2535331169579017, + "learning_rate": 4.343245987535072e-06, + "loss": 0.441, + "step": 9359 + }, + { + "epoch": 2.619647355163728, + "grad_norm": 0.2500811714147653, + "learning_rate": 4.336959774885241e-06, + "loss": 0.4656, + "step": 9360 + }, + { + "epoch": 2.619927232017912, + "grad_norm": 0.25858977177423315, + "learning_rate": 4.330677908469133e-06, + "loss": 0.4546, + "step": 9361 + }, + { + "epoch": 2.6202071088720964, + "grad_norm": 0.2518512386049054, + "learning_rate": 4.324400388884664e-06, + "loss": 0.4336, + "step": 9362 + }, + { + "epoch": 2.6204869857262807, + "grad_norm": 0.2699716072155538, + "learning_rate": 4.318127216729334e-06, + "loss": 0.4531, + "step": 9363 + }, + { + "epoch": 2.6207668625804645, + "grad_norm": 0.2541266763600309, + "learning_rate": 4.311858392600226e-06, + "loss": 0.4339, + "step": 9364 + }, + { + "epoch": 2.621046739434649, + "grad_norm": 0.2634890024901075, + "learning_rate": 4.3055939170940086e-06, + "loss": 0.4505, + "step": 9365 + }, + { + "epoch": 2.621326616288833, + "grad_norm": 0.24990794728809038, + "learning_rate": 4.2993337908069366e-06, + "loss": 0.4287, + "step": 9366 + }, + { + "epoch": 2.621606493143017, + "grad_norm": 0.2480944203989238, + "learning_rate": 4.2930780143348555e-06, + "loss": 0.4563, + "step": 9367 + }, + { + "epoch": 2.6218863699972013, + "grad_norm": 0.24803952381587008, + "learning_rate": 4.286826588273185e-06, + "loss": 0.4277, + "step": 9368 + }, + { + "epoch": 2.6221662468513856, + "grad_norm": 0.24203231261637667, + "learning_rate": 4.280579513216954e-06, + "loss": 0.4485, + "step": 9369 + }, + { + "epoch": 2.6224461237055694, + "grad_norm": 0.2479442825992165, + "learning_rate": 4.274336789760752e-06, + "loss": 0.4365, + "step": 9370 + }, + { + "epoch": 2.6227260005597537, + "grad_norm": 0.24976414773135683, + "learning_rate": 4.268098418498773e-06, + "loss": 0.4684, + "step": 9371 + }, + { + "epoch": 2.623005877413938, + "grad_norm": 0.25003686869830055, + "learning_rate": 4.2618644000247785e-06, + "loss": 0.4199, + "step": 9372 + }, + { + "epoch": 2.623285754268122, + "grad_norm": 0.26451613692079345, + "learning_rate": 4.255634734932146e-06, + "loss": 0.4522, + "step": 9373 + }, + { + "epoch": 2.623565631122306, + "grad_norm": 0.2509294379357824, + "learning_rate": 4.249409423813788e-06, + "loss": 0.4445, + "step": 9374 + }, + { + "epoch": 2.6238455079764904, + "grad_norm": 0.2892647018192773, + "learning_rate": 4.243188467262255e-06, + "loss": 0.468, + "step": 9375 + }, + { + "epoch": 2.6241253848306743, + "grad_norm": 0.24355127012649377, + "learning_rate": 4.236971865869655e-06, + "loss": 0.4221, + "step": 9376 + }, + { + "epoch": 2.6244052616848585, + "grad_norm": 0.24109406623426521, + "learning_rate": 4.2307596202276815e-06, + "loss": 0.4566, + "step": 9377 + }, + { + "epoch": 2.624685138539043, + "grad_norm": 0.2470566193498977, + "learning_rate": 4.224551730927628e-06, + "loss": 0.4417, + "step": 9378 + }, + { + "epoch": 2.624965015393227, + "grad_norm": 0.256879649992719, + "learning_rate": 4.218348198560368e-06, + "loss": 0.4509, + "step": 9379 + }, + { + "epoch": 2.625244892247411, + "grad_norm": 0.254632909125894, + "learning_rate": 4.212149023716344e-06, + "loss": 0.4429, + "step": 9380 + }, + { + "epoch": 2.6255247691015953, + "grad_norm": 0.24720789018540543, + "learning_rate": 4.205954206985607e-06, + "loss": 0.4557, + "step": 9381 + }, + { + "epoch": 2.6258046459557796, + "grad_norm": 0.25333054501266145, + "learning_rate": 4.19976374895778e-06, + "loss": 0.4447, + "step": 9382 + }, + { + "epoch": 2.626084522809964, + "grad_norm": 0.2588455562400075, + "learning_rate": 4.19357765022208e-06, + "loss": 0.4652, + "step": 9383 + }, + { + "epoch": 2.6263643996641477, + "grad_norm": 0.2607027020838425, + "learning_rate": 4.187395911367292e-06, + "loss": 0.4599, + "step": 9384 + }, + { + "epoch": 2.626644276518332, + "grad_norm": 0.24573620452859748, + "learning_rate": 4.181218532981796e-06, + "loss": 0.4457, + "step": 9385 + }, + { + "epoch": 2.6269241533725163, + "grad_norm": 0.24968986893656317, + "learning_rate": 4.175045515653575e-06, + "loss": 0.4487, + "step": 9386 + }, + { + "epoch": 2.6272040302267, + "grad_norm": 0.2563148180069053, + "learning_rate": 4.168876859970172e-06, + "loss": 0.4607, + "step": 9387 + }, + { + "epoch": 2.6274839070808844, + "grad_norm": 0.25600605587513764, + "learning_rate": 4.162712566518723e-06, + "loss": 0.4439, + "step": 9388 + }, + { + "epoch": 2.6277637839350687, + "grad_norm": 0.25005785222597743, + "learning_rate": 4.156552635885946e-06, + "loss": 0.4615, + "step": 9389 + }, + { + "epoch": 2.6280436607892526, + "grad_norm": 0.24510843450387793, + "learning_rate": 4.1503970686581514e-06, + "loss": 0.4413, + "step": 9390 + }, + { + "epoch": 2.628323537643437, + "grad_norm": 0.24194767758289132, + "learning_rate": 4.144245865421237e-06, + "loss": 0.4362, + "step": 9391 + }, + { + "epoch": 2.628603414497621, + "grad_norm": 0.2490759801194332, + "learning_rate": 4.138099026760656e-06, + "loss": 0.4495, + "step": 9392 + }, + { + "epoch": 2.628883291351805, + "grad_norm": 0.253358245566839, + "learning_rate": 4.131956553261479e-06, + "loss": 0.4523, + "step": 9393 + }, + { + "epoch": 2.6291631682059893, + "grad_norm": 0.2630190315744542, + "learning_rate": 4.1258184455083505e-06, + "loss": 0.4435, + "step": 9394 + }, + { + "epoch": 2.6294430450601736, + "grad_norm": 0.25470117843539053, + "learning_rate": 4.119684704085502e-06, + "loss": 0.4464, + "step": 9395 + }, + { + "epoch": 2.6297229219143574, + "grad_norm": 0.25859111668674567, + "learning_rate": 4.1135553295767455e-06, + "loss": 0.4557, + "step": 9396 + }, + { + "epoch": 2.6300027987685417, + "grad_norm": 0.25524386836612356, + "learning_rate": 4.107430322565469e-06, + "loss": 0.4599, + "step": 9397 + }, + { + "epoch": 2.630282675622726, + "grad_norm": 0.24574519551467336, + "learning_rate": 4.101309683634669e-06, + "loss": 0.4564, + "step": 9398 + }, + { + "epoch": 2.6305625524769103, + "grad_norm": 0.2574978646230604, + "learning_rate": 4.095193413366899e-06, + "loss": 0.4527, + "step": 9399 + }, + { + "epoch": 2.6308424293310946, + "grad_norm": 0.25000210745410584, + "learning_rate": 4.089081512344317e-06, + "loss": 0.4376, + "step": 9400 + }, + { + "epoch": 2.6311223061852784, + "grad_norm": 0.2605679393117334, + "learning_rate": 4.082973981148653e-06, + "loss": 0.4421, + "step": 9401 + }, + { + "epoch": 2.6314021830394627, + "grad_norm": 0.26409217588005957, + "learning_rate": 4.0768708203612175e-06, + "loss": 0.4371, + "step": 9402 + }, + { + "epoch": 2.631682059893647, + "grad_norm": 0.25377680711251804, + "learning_rate": 4.07077203056293e-06, + "loss": 0.4514, + "step": 9403 + }, + { + "epoch": 2.631961936747831, + "grad_norm": 0.25210873312266174, + "learning_rate": 4.064677612334267e-06, + "loss": 0.4387, + "step": 9404 + }, + { + "epoch": 2.632241813602015, + "grad_norm": 0.24494119559621805, + "learning_rate": 4.058587566255306e-06, + "loss": 0.4297, + "step": 9405 + }, + { + "epoch": 2.6325216904561994, + "grad_norm": 0.2596515985559775, + "learning_rate": 4.052501892905691e-06, + "loss": 0.4564, + "step": 9406 + }, + { + "epoch": 2.6328015673103833, + "grad_norm": 0.2518905835826524, + "learning_rate": 4.046420592864664e-06, + "loss": 0.4349, + "step": 9407 + }, + { + "epoch": 2.6330814441645676, + "grad_norm": 0.2467819090355505, + "learning_rate": 4.040343666711044e-06, + "loss": 0.4525, + "step": 9408 + }, + { + "epoch": 2.633361321018752, + "grad_norm": 0.24948075480582393, + "learning_rate": 4.034271115023247e-06, + "loss": 0.4267, + "step": 9409 + }, + { + "epoch": 2.6336411978729357, + "grad_norm": 0.2489089465903646, + "learning_rate": 4.02820293837925e-06, + "loss": 0.4311, + "step": 9410 + }, + { + "epoch": 2.63392107472712, + "grad_norm": 0.256823975308967, + "learning_rate": 4.022139137356623e-06, + "loss": 0.4477, + "step": 9411 + }, + { + "epoch": 2.6342009515813043, + "grad_norm": 0.24609712398915673, + "learning_rate": 4.0160797125325335e-06, + "loss": 0.4619, + "step": 9412 + }, + { + "epoch": 2.634480828435488, + "grad_norm": 0.2593112314554741, + "learning_rate": 4.010024664483708e-06, + "loss": 0.4427, + "step": 9413 + }, + { + "epoch": 2.6347607052896724, + "grad_norm": 0.2650976512280279, + "learning_rate": 4.00397399378648e-06, + "loss": 0.458, + "step": 9414 + }, + { + "epoch": 2.6350405821438567, + "grad_norm": 0.2606263463407475, + "learning_rate": 3.997927701016757e-06, + "loss": 0.4542, + "step": 9415 + }, + { + "epoch": 2.635320458998041, + "grad_norm": 0.26155105720954686, + "learning_rate": 3.991885786750021e-06, + "loss": 0.4437, + "step": 9416 + }, + { + "epoch": 2.635600335852225, + "grad_norm": 0.27000033674043966, + "learning_rate": 3.985848251561347e-06, + "loss": 0.4659, + "step": 9417 + }, + { + "epoch": 2.635880212706409, + "grad_norm": 0.27621755838983997, + "learning_rate": 3.979815096025391e-06, + "loss": 0.4488, + "step": 9418 + }, + { + "epoch": 2.6361600895605934, + "grad_norm": 0.26000645509963716, + "learning_rate": 3.973786320716394e-06, + "loss": 0.4533, + "step": 9419 + }, + { + "epoch": 2.6364399664147777, + "grad_norm": 0.25805612731916294, + "learning_rate": 3.967761926208163e-06, + "loss": 0.4493, + "step": 9420 + }, + { + "epoch": 2.6367198432689616, + "grad_norm": 0.26245896850743844, + "learning_rate": 3.961741913074135e-06, + "loss": 0.4411, + "step": 9421 + }, + { + "epoch": 2.636999720123146, + "grad_norm": 0.25100152576965584, + "learning_rate": 3.955726281887273e-06, + "loss": 0.4511, + "step": 9422 + }, + { + "epoch": 2.63727959697733, + "grad_norm": 0.2565874408931755, + "learning_rate": 3.9497150332201585e-06, + "loss": 0.4672, + "step": 9423 + }, + { + "epoch": 2.637559473831514, + "grad_norm": 0.2656034989017934, + "learning_rate": 3.943708167644944e-06, + "loss": 0.4567, + "step": 9424 + }, + { + "epoch": 2.6378393506856983, + "grad_norm": 0.2550238839321128, + "learning_rate": 3.937705685733362e-06, + "loss": 0.4569, + "step": 9425 + }, + { + "epoch": 2.6381192275398826, + "grad_norm": 0.2489611089489336, + "learning_rate": 3.931707588056738e-06, + "loss": 0.4626, + "step": 9426 + }, + { + "epoch": 2.6383991043940664, + "grad_norm": 0.24179225105831964, + "learning_rate": 3.925713875185977e-06, + "loss": 0.4363, + "step": 9427 + }, + { + "epoch": 2.6386789812482507, + "grad_norm": 0.25738028516432154, + "learning_rate": 3.919724547691556e-06, + "loss": 0.455, + "step": 9428 + }, + { + "epoch": 2.638958858102435, + "grad_norm": 0.3421980163502586, + "learning_rate": 3.91373960614354e-06, + "loss": 0.4578, + "step": 9429 + }, + { + "epoch": 2.639238734956619, + "grad_norm": 0.25132835113339924, + "learning_rate": 3.907759051111581e-06, + "loss": 0.4484, + "step": 9430 + }, + { + "epoch": 2.639518611810803, + "grad_norm": 0.2622300126748502, + "learning_rate": 3.901782883164918e-06, + "loss": 0.4753, + "step": 9431 + }, + { + "epoch": 2.6397984886649875, + "grad_norm": 0.25253521288343556, + "learning_rate": 3.89581110287236e-06, + "loss": 0.4446, + "step": 9432 + }, + { + "epoch": 2.6400783655191713, + "grad_norm": 0.2578958518456159, + "learning_rate": 3.88984371080231e-06, + "loss": 0.4706, + "step": 9433 + }, + { + "epoch": 2.6403582423733556, + "grad_norm": 0.25334138612729395, + "learning_rate": 3.883880707522747e-06, + "loss": 0.4506, + "step": 9434 + }, + { + "epoch": 2.64063811922754, + "grad_norm": 0.24506347974857887, + "learning_rate": 3.8779220936012275e-06, + "loss": 0.4417, + "step": 9435 + }, + { + "epoch": 2.640917996081724, + "grad_norm": 0.25878707958534775, + "learning_rate": 3.871967869604898e-06, + "loss": 0.4465, + "step": 9436 + }, + { + "epoch": 2.6411978729359085, + "grad_norm": 0.25511636798625403, + "learning_rate": 3.866018036100477e-06, + "loss": 0.4254, + "step": 9437 + }, + { + "epoch": 2.6414777497900923, + "grad_norm": 0.27087832571071385, + "learning_rate": 3.86007259365429e-06, + "loss": 0.4726, + "step": 9438 + }, + { + "epoch": 2.6417576266442766, + "grad_norm": 0.2639956803014047, + "learning_rate": 3.854131542832218e-06, + "loss": 0.4569, + "step": 9439 + }, + { + "epoch": 2.642037503498461, + "grad_norm": 0.2608292468835911, + "learning_rate": 3.848194884199735e-06, + "loss": 0.4278, + "step": 9440 + }, + { + "epoch": 2.6423173803526447, + "grad_norm": 0.257109106994948, + "learning_rate": 3.8422626183218956e-06, + "loss": 0.461, + "step": 9441 + }, + { + "epoch": 2.642597257206829, + "grad_norm": 0.25760729945150973, + "learning_rate": 3.836334745763337e-06, + "loss": 0.4414, + "step": 9442 + }, + { + "epoch": 2.6428771340610133, + "grad_norm": 0.25075381348627446, + "learning_rate": 3.8304112670882796e-06, + "loss": 0.4348, + "step": 9443 + }, + { + "epoch": 2.643157010915197, + "grad_norm": 0.26297703591974236, + "learning_rate": 3.824492182860517e-06, + "loss": 0.4559, + "step": 9444 + }, + { + "epoch": 2.6434368877693815, + "grad_norm": 0.24954680414939545, + "learning_rate": 3.818577493643444e-06, + "loss": 0.4449, + "step": 9445 + }, + { + "epoch": 2.6437167646235658, + "grad_norm": 0.2530958409819108, + "learning_rate": 3.812667200000003e-06, + "loss": 0.4522, + "step": 9446 + }, + { + "epoch": 2.6439966414777496, + "grad_norm": 0.26183666165205954, + "learning_rate": 3.8067613024927506e-06, + "loss": 0.4612, + "step": 9447 + }, + { + "epoch": 2.644276518331934, + "grad_norm": 0.2541657204587236, + "learning_rate": 3.8008598016838094e-06, + "loss": 0.4394, + "step": 9448 + }, + { + "epoch": 2.644556395186118, + "grad_norm": 0.2620238517426079, + "learning_rate": 3.7949626981348964e-06, + "loss": 0.4571, + "step": 9449 + }, + { + "epoch": 2.644836272040302, + "grad_norm": 0.24271353893657108, + "learning_rate": 3.789069992407296e-06, + "loss": 0.4488, + "step": 9450 + }, + { + "epoch": 2.6451161488944863, + "grad_norm": 0.2525646148455708, + "learning_rate": 3.783181685061876e-06, + "loss": 0.4551, + "step": 9451 + }, + { + "epoch": 2.6453960257486706, + "grad_norm": 0.24362772636513352, + "learning_rate": 3.7772977766590943e-06, + "loss": 0.4566, + "step": 9452 + }, + { + "epoch": 2.645675902602855, + "grad_norm": 0.24968313358726013, + "learning_rate": 3.7714182677589804e-06, + "loss": 0.4522, + "step": 9453 + }, + { + "epoch": 2.6459557794570387, + "grad_norm": 0.26749174667974374, + "learning_rate": 3.7655431589211422e-06, + "loss": 0.4662, + "step": 9454 + }, + { + "epoch": 2.646235656311223, + "grad_norm": 0.25207039781310786, + "learning_rate": 3.7596724507047932e-06, + "loss": 0.4511, + "step": 9455 + }, + { + "epoch": 2.6465155331654073, + "grad_norm": 0.26139214837358166, + "learning_rate": 3.753806143668703e-06, + "loss": 0.4604, + "step": 9456 + }, + { + "epoch": 2.6467954100195916, + "grad_norm": 0.25471814006145055, + "learning_rate": 3.7479442383712316e-06, + "loss": 0.4343, + "step": 9457 + }, + { + "epoch": 2.6470752868737755, + "grad_norm": 0.26040601131187685, + "learning_rate": 3.7420867353703147e-06, + "loss": 0.4602, + "step": 9458 + }, + { + "epoch": 2.6473551637279598, + "grad_norm": 0.24031830715623834, + "learning_rate": 3.736233635223474e-06, + "loss": 0.4458, + "step": 9459 + }, + { + "epoch": 2.647635040582144, + "grad_norm": 0.24991718967960128, + "learning_rate": 3.7303849384878076e-06, + "loss": 0.4533, + "step": 9460 + }, + { + "epoch": 2.647914917436328, + "grad_norm": 0.25381014077048275, + "learning_rate": 3.724540645720004e-06, + "loss": 0.4533, + "step": 9461 + }, + { + "epoch": 2.648194794290512, + "grad_norm": 0.24513905116283924, + "learning_rate": 3.7187007574763232e-06, + "loss": 0.4207, + "step": 9462 + }, + { + "epoch": 2.6484746711446965, + "grad_norm": 0.2622613581031185, + "learning_rate": 3.712865274312616e-06, + "loss": 0.4519, + "step": 9463 + }, + { + "epoch": 2.6487545479988803, + "grad_norm": 0.25305479769519595, + "learning_rate": 3.7070341967842926e-06, + "loss": 0.4307, + "step": 9464 + }, + { + "epoch": 2.6490344248530646, + "grad_norm": 0.2369397572733977, + "learning_rate": 3.701207525446365e-06, + "loss": 0.4281, + "step": 9465 + }, + { + "epoch": 2.649314301707249, + "grad_norm": 0.25102184052885873, + "learning_rate": 3.695385260853418e-06, + "loss": 0.4733, + "step": 9466 + }, + { + "epoch": 2.6495941785614328, + "grad_norm": 0.2892932955279966, + "learning_rate": 3.689567403559624e-06, + "loss": 0.4563, + "step": 9467 + }, + { + "epoch": 2.649874055415617, + "grad_norm": 0.2542357110750897, + "learning_rate": 3.6837539541187238e-06, + "loss": 0.4327, + "step": 9468 + }, + { + "epoch": 2.6501539322698013, + "grad_norm": 0.2576517441763403, + "learning_rate": 3.677944913084047e-06, + "loss": 0.4507, + "step": 9469 + }, + { + "epoch": 2.650433809123985, + "grad_norm": 0.2642765121906414, + "learning_rate": 3.6721402810085015e-06, + "loss": 0.4581, + "step": 9470 + }, + { + "epoch": 2.6507136859781695, + "grad_norm": 0.2603716715661713, + "learning_rate": 3.6663400584445616e-06, + "loss": 0.4487, + "step": 9471 + }, + { + "epoch": 2.6509935628323538, + "grad_norm": 0.24098686381718468, + "learning_rate": 3.660544245944325e-06, + "loss": 0.4379, + "step": 9472 + }, + { + "epoch": 2.651273439686538, + "grad_norm": 0.2604086414853814, + "learning_rate": 3.654752844059417e-06, + "loss": 0.4596, + "step": 9473 + }, + { + "epoch": 2.6515533165407223, + "grad_norm": 0.2701012382621206, + "learning_rate": 3.6489658533410797e-06, + "loss": 0.4603, + "step": 9474 + }, + { + "epoch": 2.651833193394906, + "grad_norm": 0.255398830252583, + "learning_rate": 3.643183274340112e-06, + "loss": 0.4407, + "step": 9475 + }, + { + "epoch": 2.6521130702490905, + "grad_norm": 0.25049589227035957, + "learning_rate": 3.6374051076069115e-06, + "loss": 0.4466, + "step": 9476 + }, + { + "epoch": 2.652392947103275, + "grad_norm": 0.25494597161481597, + "learning_rate": 3.631631353691439e-06, + "loss": 0.4388, + "step": 9477 + }, + { + "epoch": 2.6526728239574586, + "grad_norm": 0.25292281522898435, + "learning_rate": 3.62586201314325e-06, + "loss": 0.4354, + "step": 9478 + }, + { + "epoch": 2.652952700811643, + "grad_norm": 0.2521383821512615, + "learning_rate": 3.6200970865114704e-06, + "loss": 0.4563, + "step": 9479 + }, + { + "epoch": 2.653232577665827, + "grad_norm": 0.25445549286093044, + "learning_rate": 3.614336574344812e-06, + "loss": 0.4499, + "step": 9480 + }, + { + "epoch": 2.653512454520011, + "grad_norm": 0.2571486008365219, + "learning_rate": 3.608580477191559e-06, + "loss": 0.4608, + "step": 9481 + }, + { + "epoch": 2.6537923313741953, + "grad_norm": 0.2515101810049176, + "learning_rate": 3.6028287955995943e-06, + "loss": 0.4131, + "step": 9482 + }, + { + "epoch": 2.6540722082283796, + "grad_norm": 0.2600451628950232, + "learning_rate": 3.597081530116342e-06, + "loss": 0.4628, + "step": 9483 + }, + { + "epoch": 2.6543520850825635, + "grad_norm": 0.24947153800241603, + "learning_rate": 3.5913386812888474e-06, + "loss": 0.4456, + "step": 9484 + }, + { + "epoch": 2.6546319619367478, + "grad_norm": 0.25041000642750527, + "learning_rate": 3.585600249663712e-06, + "loss": 0.4572, + "step": 9485 + }, + { + "epoch": 2.654911838790932, + "grad_norm": 0.2531926139912886, + "learning_rate": 3.5798662357871217e-06, + "loss": 0.4379, + "step": 9486 + }, + { + "epoch": 2.655191715645116, + "grad_norm": 0.27303601220831636, + "learning_rate": 3.574136640204845e-06, + "loss": 0.4742, + "step": 9487 + }, + { + "epoch": 2.6554715924993, + "grad_norm": 0.25279944241663, + "learning_rate": 3.568411463462229e-06, + "loss": 0.4476, + "step": 9488 + }, + { + "epoch": 2.6557514693534845, + "grad_norm": 0.25596048011253975, + "learning_rate": 3.5626907061041937e-06, + "loss": 0.4591, + "step": 9489 + }, + { + "epoch": 2.656031346207669, + "grad_norm": 0.264374812286928, + "learning_rate": 3.556974368675253e-06, + "loss": 0.467, + "step": 9490 + }, + { + "epoch": 2.6563112230618526, + "grad_norm": 0.2633281828279678, + "learning_rate": 3.5512624517194893e-06, + "loss": 0.4556, + "step": 9491 + }, + { + "epoch": 2.656591099916037, + "grad_norm": 0.24839076237085658, + "learning_rate": 3.545554955780567e-06, + "loss": 0.4624, + "step": 9492 + }, + { + "epoch": 2.656870976770221, + "grad_norm": 0.2552202991168326, + "learning_rate": 3.5398518814017177e-06, + "loss": 0.436, + "step": 9493 + }, + { + "epoch": 2.6571508536244055, + "grad_norm": 0.25713779284626853, + "learning_rate": 3.5341532291257806e-06, + "loss": 0.462, + "step": 9494 + }, + { + "epoch": 2.6574307304785894, + "grad_norm": 0.2566118638588749, + "learning_rate": 3.5284589994951435e-06, + "loss": 0.4532, + "step": 9495 + }, + { + "epoch": 2.6577106073327736, + "grad_norm": 0.2529663695003584, + "learning_rate": 3.522769193051789e-06, + "loss": 0.4342, + "step": 9496 + }, + { + "epoch": 2.657990484186958, + "grad_norm": 0.24959068090001982, + "learning_rate": 3.517083810337274e-06, + "loss": 0.4337, + "step": 9497 + }, + { + "epoch": 2.658270361041142, + "grad_norm": 0.25888889878029636, + "learning_rate": 3.5114028518927476e-06, + "loss": 0.4459, + "step": 9498 + }, + { + "epoch": 2.658550237895326, + "grad_norm": 0.251809556835883, + "learning_rate": 3.505726318258912e-06, + "loss": 0.4438, + "step": 9499 + }, + { + "epoch": 2.6588301147495104, + "grad_norm": 0.23883640860702854, + "learning_rate": 3.500054209976078e-06, + "loss": 0.4742, + "step": 9500 + }, + { + "epoch": 2.659109991603694, + "grad_norm": 0.2535837618499653, + "learning_rate": 3.4943865275841036e-06, + "loss": 0.4264, + "step": 9501 + }, + { + "epoch": 2.6593898684578785, + "grad_norm": 0.24931965791720442, + "learning_rate": 3.4887232716224515e-06, + "loss": 0.4559, + "step": 9502 + }, + { + "epoch": 2.659669745312063, + "grad_norm": 0.2566775946151293, + "learning_rate": 3.4830644426301516e-06, + "loss": 0.4653, + "step": 9503 + }, + { + "epoch": 2.6599496221662466, + "grad_norm": 0.26540747561173816, + "learning_rate": 3.477410041145818e-06, + "loss": 0.452, + "step": 9504 + }, + { + "epoch": 2.660229499020431, + "grad_norm": 0.23606934915419622, + "learning_rate": 3.47176006770763e-06, + "loss": 0.4565, + "step": 9505 + }, + { + "epoch": 2.6605093758746152, + "grad_norm": 0.2501107069181252, + "learning_rate": 3.4661145228533587e-06, + "loss": 0.4552, + "step": 9506 + }, + { + "epoch": 2.660789252728799, + "grad_norm": 0.268483862398106, + "learning_rate": 3.4604734071203625e-06, + "loss": 0.457, + "step": 9507 + }, + { + "epoch": 2.6610691295829834, + "grad_norm": 0.24398110463413233, + "learning_rate": 3.4548367210455514e-06, + "loss": 0.4344, + "step": 9508 + }, + { + "epoch": 2.6613490064371677, + "grad_norm": 0.25772212739615136, + "learning_rate": 3.4492044651654344e-06, + "loss": 0.47, + "step": 9509 + }, + { + "epoch": 2.661628883291352, + "grad_norm": 0.2649890920852127, + "learning_rate": 3.443576640016094e-06, + "loss": 0.4575, + "step": 9510 + }, + { + "epoch": 2.6619087601455362, + "grad_norm": 0.246274944821003, + "learning_rate": 3.437953246133191e-06, + "loss": 0.4419, + "step": 9511 + }, + { + "epoch": 2.66218863699972, + "grad_norm": 0.25328972822913165, + "learning_rate": 3.4323342840519523e-06, + "loss": 0.4425, + "step": 9512 + }, + { + "epoch": 2.6624685138539044, + "grad_norm": 0.25838070294669013, + "learning_rate": 3.426719754307206e-06, + "loss": 0.4321, + "step": 9513 + }, + { + "epoch": 2.6627483907080887, + "grad_norm": 0.2529082247004486, + "learning_rate": 3.421109657433341e-06, + "loss": 0.428, + "step": 9514 + }, + { + "epoch": 2.6630282675622725, + "grad_norm": 0.25964210499745244, + "learning_rate": 3.4155039939643306e-06, + "loss": 0.4622, + "step": 9515 + }, + { + "epoch": 2.663308144416457, + "grad_norm": 0.25382490703939575, + "learning_rate": 3.409902764433726e-06, + "loss": 0.4486, + "step": 9516 + }, + { + "epoch": 2.663588021270641, + "grad_norm": 0.2592529548773974, + "learning_rate": 3.4043059693746546e-06, + "loss": 0.4728, + "step": 9517 + }, + { + "epoch": 2.663867898124825, + "grad_norm": 0.25246685281677483, + "learning_rate": 3.3987136093198255e-06, + "loss": 0.4477, + "step": 9518 + }, + { + "epoch": 2.6641477749790092, + "grad_norm": 0.2621064650997339, + "learning_rate": 3.3931256848015123e-06, + "loss": 0.4512, + "step": 9519 + }, + { + "epoch": 2.6644276518331935, + "grad_norm": 0.25466340259682224, + "learning_rate": 3.3875421963515842e-06, + "loss": 0.4609, + "step": 9520 + }, + { + "epoch": 2.6647075286873774, + "grad_norm": 0.26012689481029794, + "learning_rate": 3.381963144501482e-06, + "loss": 0.4337, + "step": 9521 + }, + { + "epoch": 2.6649874055415617, + "grad_norm": 0.24874050743360282, + "learning_rate": 3.376388529782215e-06, + "loss": 0.4477, + "step": 9522 + }, + { + "epoch": 2.665267282395746, + "grad_norm": 0.2606179998971198, + "learning_rate": 3.370818352724381e-06, + "loss": 0.4227, + "step": 9523 + }, + { + "epoch": 2.66554715924993, + "grad_norm": 0.2579114898590253, + "learning_rate": 3.3652526138581665e-06, + "loss": 0.4414, + "step": 9524 + }, + { + "epoch": 2.665827036104114, + "grad_norm": 0.25710483426366676, + "learning_rate": 3.3596913137133033e-06, + "loss": 0.4643, + "step": 9525 + }, + { + "epoch": 2.6661069129582984, + "grad_norm": 0.2469217533571249, + "learning_rate": 3.35413445281913e-06, + "loss": 0.4401, + "step": 9526 + }, + { + "epoch": 2.6663867898124827, + "grad_norm": 0.2670611971362608, + "learning_rate": 3.3485820317045503e-06, + "loss": 0.4793, + "step": 9527 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.26109629252961764, + "learning_rate": 3.3430340508980418e-06, + "loss": 0.4336, + "step": 9528 + }, + { + "epoch": 2.666946543520851, + "grad_norm": 0.2542831554154449, + "learning_rate": 3.3374905109276712e-06, + "loss": 0.446, + "step": 9529 + }, + { + "epoch": 2.667226420375035, + "grad_norm": 0.25660788217170455, + "learning_rate": 3.331951412321066e-06, + "loss": 0.4537, + "step": 9530 + }, + { + "epoch": 2.6675062972292194, + "grad_norm": 0.258753500608575, + "learning_rate": 3.3264167556054493e-06, + "loss": 0.4629, + "step": 9531 + }, + { + "epoch": 2.6677861740834032, + "grad_norm": 0.24303736973262738, + "learning_rate": 3.3208865413076106e-06, + "loss": 0.4452, + "step": 9532 + }, + { + "epoch": 2.6680660509375875, + "grad_norm": 0.2546674566555844, + "learning_rate": 3.315360769953918e-06, + "loss": 0.4519, + "step": 9533 + }, + { + "epoch": 2.668345927791772, + "grad_norm": 0.2548655228739014, + "learning_rate": 3.3098394420703226e-06, + "loss": 0.4639, + "step": 9534 + }, + { + "epoch": 2.6686258046459557, + "grad_norm": 0.2595714172104206, + "learning_rate": 3.304322558182338e-06, + "loss": 0.4634, + "step": 9535 + }, + { + "epoch": 2.66890568150014, + "grad_norm": 0.2591402840263963, + "learning_rate": 3.2988101188150767e-06, + "loss": 0.4301, + "step": 9536 + }, + { + "epoch": 2.6691855583543243, + "grad_norm": 0.259532454648795, + "learning_rate": 3.2933021244932027e-06, + "loss": 0.4635, + "step": 9537 + }, + { + "epoch": 2.669465435208508, + "grad_norm": 0.24871495624519288, + "learning_rate": 3.287798575740969e-06, + "loss": 0.4395, + "step": 9538 + }, + { + "epoch": 2.6697453120626924, + "grad_norm": 0.256465366145869, + "learning_rate": 3.2822994730822177e-06, + "loss": 0.4419, + "step": 9539 + }, + { + "epoch": 2.6700251889168767, + "grad_norm": 0.2581741144493468, + "learning_rate": 3.276804817040352e-06, + "loss": 0.4438, + "step": 9540 + }, + { + "epoch": 2.6703050657710605, + "grad_norm": 0.2608276603552817, + "learning_rate": 3.2713146081383484e-06, + "loss": 0.4527, + "step": 9541 + }, + { + "epoch": 2.670584942625245, + "grad_norm": 0.25669486283378345, + "learning_rate": 3.265828846898783e-06, + "loss": 0.4629, + "step": 9542 + }, + { + "epoch": 2.670864819479429, + "grad_norm": 0.2553481115471904, + "learning_rate": 3.2603475338437828e-06, + "loss": 0.4516, + "step": 9543 + }, + { + "epoch": 2.671144696333613, + "grad_norm": 0.2568392811345736, + "learning_rate": 3.2548706694950693e-06, + "loss": 0.4594, + "step": 9544 + }, + { + "epoch": 2.6714245731877972, + "grad_norm": 0.25504560907747265, + "learning_rate": 3.249398254373931e-06, + "loss": 0.4539, + "step": 9545 + }, + { + "epoch": 2.6717044500419815, + "grad_norm": 0.2583480188591699, + "learning_rate": 3.2439302890012346e-06, + "loss": 0.4332, + "step": 9546 + }, + { + "epoch": 2.671984326896166, + "grad_norm": 0.2462940740601506, + "learning_rate": 3.2384667738974196e-06, + "loss": 0.4526, + "step": 9547 + }, + { + "epoch": 2.67226420375035, + "grad_norm": 0.25364777033394637, + "learning_rate": 3.233007709582514e-06, + "loss": 0.4453, + "step": 9548 + }, + { + "epoch": 2.672544080604534, + "grad_norm": 0.25792848970974264, + "learning_rate": 3.227553096576108e-06, + "loss": 0.448, + "step": 9549 + }, + { + "epoch": 2.6728239574587183, + "grad_norm": 0.25744147779501747, + "learning_rate": 3.222102935397381e-06, + "loss": 0.4671, + "step": 9550 + }, + { + "epoch": 2.6731038343129025, + "grad_norm": 0.24928936236121033, + "learning_rate": 3.2166572265650787e-06, + "loss": 0.4351, + "step": 9551 + }, + { + "epoch": 2.6733837111670864, + "grad_norm": 0.25632576413837144, + "learning_rate": 3.2112159705975254e-06, + "loss": 0.4449, + "step": 9552 + }, + { + "epoch": 2.6736635880212707, + "grad_norm": 0.2506734509361175, + "learning_rate": 3.205779168012624e-06, + "loss": 0.4504, + "step": 9553 + }, + { + "epoch": 2.673943464875455, + "grad_norm": 0.2502185843859141, + "learning_rate": 3.200346819327865e-06, + "loss": 0.442, + "step": 9554 + }, + { + "epoch": 2.674223341729639, + "grad_norm": 0.24697086837287455, + "learning_rate": 3.194918925060281e-06, + "loss": 0.4464, + "step": 9555 + }, + { + "epoch": 2.674503218583823, + "grad_norm": 0.24180270927745726, + "learning_rate": 3.1894954857265126e-06, + "loss": 0.4651, + "step": 9556 + }, + { + "epoch": 2.6747830954380074, + "grad_norm": 0.24510244175229476, + "learning_rate": 3.1840765018427642e-06, + "loss": 0.4463, + "step": 9557 + }, + { + "epoch": 2.6750629722921913, + "grad_norm": 0.2627364790670975, + "learning_rate": 3.1786619739248123e-06, + "loss": 0.456, + "step": 9558 + }, + { + "epoch": 2.6753428491463755, + "grad_norm": 0.23953547585015808, + "learning_rate": 3.1732519024880227e-06, + "loss": 0.45, + "step": 9559 + }, + { + "epoch": 2.67562272600056, + "grad_norm": 0.24941273014969406, + "learning_rate": 3.1678462880473324e-06, + "loss": 0.4421, + "step": 9560 + }, + { + "epoch": 2.6759026028547437, + "grad_norm": 0.24889410295356765, + "learning_rate": 3.1624451311172475e-06, + "loss": 0.4548, + "step": 9561 + }, + { + "epoch": 2.676182479708928, + "grad_norm": 0.24757414595847224, + "learning_rate": 3.1570484322118454e-06, + "loss": 0.4619, + "step": 9562 + }, + { + "epoch": 2.6764623565631123, + "grad_norm": 0.255381341343547, + "learning_rate": 3.151656191844793e-06, + "loss": 0.4509, + "step": 9563 + }, + { + "epoch": 2.676742233417296, + "grad_norm": 0.2545565791122728, + "learning_rate": 3.1462684105293293e-06, + "loss": 0.44, + "step": 9564 + }, + { + "epoch": 2.6770221102714804, + "grad_norm": 0.2601252350876901, + "learning_rate": 3.1408850887782615e-06, + "loss": 0.4648, + "step": 9565 + }, + { + "epoch": 2.6773019871256647, + "grad_norm": 0.2541944562033961, + "learning_rate": 3.135506227103985e-06, + "loss": 0.4658, + "step": 9566 + }, + { + "epoch": 2.677581863979849, + "grad_norm": 0.25952499281932945, + "learning_rate": 3.130131826018451e-06, + "loss": 0.4576, + "step": 9567 + }, + { + "epoch": 2.6778617408340333, + "grad_norm": 0.24352318330194347, + "learning_rate": 3.1247618860332064e-06, + "loss": 0.4568, + "step": 9568 + }, + { + "epoch": 2.678141617688217, + "grad_norm": 0.25483798914707423, + "learning_rate": 3.1193964076593583e-06, + "loss": 0.4597, + "step": 9569 + }, + { + "epoch": 2.6784214945424014, + "grad_norm": 0.2664555666188415, + "learning_rate": 3.114035391407605e-06, + "loss": 0.4431, + "step": 9570 + }, + { + "epoch": 2.6787013713965857, + "grad_norm": 0.2558920128272072, + "learning_rate": 3.1086788377882037e-06, + "loss": 0.4584, + "step": 9571 + }, + { + "epoch": 2.6789812482507696, + "grad_norm": 0.2564401741229892, + "learning_rate": 3.103326747311008e-06, + "loss": 0.4421, + "step": 9572 + }, + { + "epoch": 2.679261125104954, + "grad_norm": 0.261257903348952, + "learning_rate": 3.0979791204854116e-06, + "loss": 0.4442, + "step": 9573 + }, + { + "epoch": 2.679541001959138, + "grad_norm": 0.24728596928066218, + "learning_rate": 3.0926359578204168e-06, + "loss": 0.4207, + "step": 9574 + }, + { + "epoch": 2.679820878813322, + "grad_norm": 0.25384082637236177, + "learning_rate": 3.0872972598245785e-06, + "loss": 0.4727, + "step": 9575 + }, + { + "epoch": 2.6801007556675063, + "grad_norm": 0.25155267496967193, + "learning_rate": 3.0819630270060518e-06, + "loss": 0.4465, + "step": 9576 + }, + { + "epoch": 2.6803806325216906, + "grad_norm": 0.2514218639113108, + "learning_rate": 3.0766332598725413e-06, + "loss": 0.4471, + "step": 9577 + }, + { + "epoch": 2.6806605093758744, + "grad_norm": 0.24556715410688237, + "learning_rate": 3.0713079589313466e-06, + "loss": 0.456, + "step": 9578 + }, + { + "epoch": 2.6809403862300587, + "grad_norm": 0.23776124231133908, + "learning_rate": 3.0659871246893292e-06, + "loss": 0.4488, + "step": 9579 + }, + { + "epoch": 2.681220263084243, + "grad_norm": 0.25766891290935423, + "learning_rate": 3.0606707576529225e-06, + "loss": 0.4234, + "step": 9580 + }, + { + "epoch": 2.681500139938427, + "grad_norm": 0.24452010593592108, + "learning_rate": 3.0553588583281444e-06, + "loss": 0.4293, + "step": 9581 + }, + { + "epoch": 2.681780016792611, + "grad_norm": 0.2598302592284333, + "learning_rate": 3.0500514272205906e-06, + "loss": 0.4497, + "step": 9582 + }, + { + "epoch": 2.6820598936467954, + "grad_norm": 0.25562772663169386, + "learning_rate": 3.044748464835423e-06, + "loss": 0.4617, + "step": 9583 + }, + { + "epoch": 2.6823397705009797, + "grad_norm": 0.25476205270862523, + "learning_rate": 3.039449971677377e-06, + "loss": 0.4478, + "step": 9584 + }, + { + "epoch": 2.682619647355164, + "grad_norm": 0.2538151461762847, + "learning_rate": 3.0341559482507666e-06, + "loss": 0.4395, + "step": 9585 + }, + { + "epoch": 2.682899524209348, + "grad_norm": 0.26844670171940377, + "learning_rate": 3.0288663950594766e-06, + "loss": 0.4312, + "step": 9586 + }, + { + "epoch": 2.683179401063532, + "grad_norm": 0.25131678810625624, + "learning_rate": 3.0235813126069822e-06, + "loss": 0.4441, + "step": 9587 + }, + { + "epoch": 2.6834592779177164, + "grad_norm": 0.265383114183847, + "learning_rate": 3.0183007013963092e-06, + "loss": 0.4467, + "step": 9588 + }, + { + "epoch": 2.6837391547719003, + "grad_norm": 0.2598004507884646, + "learning_rate": 3.0130245619300666e-06, + "loss": 0.4436, + "step": 9589 + }, + { + "epoch": 2.6840190316260846, + "grad_norm": 0.25780086768921223, + "learning_rate": 3.0077528947104585e-06, + "loss": 0.4396, + "step": 9590 + }, + { + "epoch": 2.684298908480269, + "grad_norm": 0.25261973652628045, + "learning_rate": 3.002485700239227e-06, + "loss": 0.4565, + "step": 9591 + }, + { + "epoch": 2.6845787853344527, + "grad_norm": 0.25393925572197334, + "learning_rate": 2.997222979017711e-06, + "loss": 0.4396, + "step": 9592 + }, + { + "epoch": 2.684858662188637, + "grad_norm": 0.2381658079414295, + "learning_rate": 2.9919647315468093e-06, + "loss": 0.4691, + "step": 9593 + }, + { + "epoch": 2.6851385390428213, + "grad_norm": 0.2715883406636102, + "learning_rate": 2.986710958327027e-06, + "loss": 0.4633, + "step": 9594 + }, + { + "epoch": 2.685418415897005, + "grad_norm": 0.255247208575035, + "learning_rate": 2.9814616598584087e-06, + "loss": 0.4324, + "step": 9595 + }, + { + "epoch": 2.6856982927511894, + "grad_norm": 0.29036514800454216, + "learning_rate": 2.9762168366405886e-06, + "loss": 0.4661, + "step": 9596 + }, + { + "epoch": 2.6859781696053737, + "grad_norm": 0.2518865790086803, + "learning_rate": 2.970976489172772e-06, + "loss": 0.4788, + "step": 9597 + }, + { + "epoch": 2.6862580464595576, + "grad_norm": 0.24712504303747004, + "learning_rate": 2.965740617953733e-06, + "loss": 0.4477, + "step": 9598 + }, + { + "epoch": 2.686537923313742, + "grad_norm": 0.2607184257165128, + "learning_rate": 2.960509223481828e-06, + "loss": 0.4553, + "step": 9599 + }, + { + "epoch": 2.686817800167926, + "grad_norm": 0.25130344370407304, + "learning_rate": 2.955282306254986e-06, + "loss": 0.4459, + "step": 9600 + }, + { + "epoch": 2.68709767702211, + "grad_norm": 0.24468795689000825, + "learning_rate": 2.9500598667707045e-06, + "loss": 0.4376, + "step": 9601 + }, + { + "epoch": 2.6873775538762943, + "grad_norm": 0.240846706524123, + "learning_rate": 2.9448419055260634e-06, + "loss": 0.4311, + "step": 9602 + }, + { + "epoch": 2.6876574307304786, + "grad_norm": 0.2506620901909148, + "learning_rate": 2.939628423017704e-06, + "loss": 0.4398, + "step": 9603 + }, + { + "epoch": 2.687937307584663, + "grad_norm": 0.2546535852948708, + "learning_rate": 2.934419419741852e-06, + "loss": 0.4481, + "step": 9604 + }, + { + "epoch": 2.688217184438847, + "grad_norm": 0.25635196813437233, + "learning_rate": 2.9292148961943044e-06, + "loss": 0.4631, + "step": 9605 + }, + { + "epoch": 2.688497061293031, + "grad_norm": 0.26961047587511755, + "learning_rate": 2.9240148528704326e-06, + "loss": 0.4471, + "step": 9606 + }, + { + "epoch": 2.6887769381472153, + "grad_norm": 0.2502754243244984, + "learning_rate": 2.9188192902651736e-06, + "loss": 0.4387, + "step": 9607 + }, + { + "epoch": 2.6890568150013996, + "grad_norm": 0.2557472176361832, + "learning_rate": 2.9136282088730536e-06, + "loss": 0.4576, + "step": 9608 + }, + { + "epoch": 2.6893366918555834, + "grad_norm": 0.24776733623736844, + "learning_rate": 2.9084416091881506e-06, + "loss": 0.4406, + "step": 9609 + }, + { + "epoch": 2.6896165687097677, + "grad_norm": 0.25696230575270806, + "learning_rate": 2.9032594917041244e-06, + "loss": 0.4502, + "step": 9610 + }, + { + "epoch": 2.689896445563952, + "grad_norm": 0.25842164096611264, + "learning_rate": 2.898081856914231e-06, + "loss": 0.4441, + "step": 9611 + }, + { + "epoch": 2.690176322418136, + "grad_norm": 0.2621535345165615, + "learning_rate": 2.89290870531127e-06, + "loss": 0.4611, + "step": 9612 + }, + { + "epoch": 2.69045619927232, + "grad_norm": 0.2474173482149627, + "learning_rate": 2.887740037387626e-06, + "loss": 0.4523, + "step": 9613 + }, + { + "epoch": 2.6907360761265045, + "grad_norm": 0.2490375360888736, + "learning_rate": 2.88257585363525e-06, + "loss": 0.4476, + "step": 9614 + }, + { + "epoch": 2.6910159529806883, + "grad_norm": 0.2529025213225243, + "learning_rate": 2.877416154545681e-06, + "loss": 0.4385, + "step": 9615 + }, + { + "epoch": 2.6912958298348726, + "grad_norm": 0.2511885081465574, + "learning_rate": 2.872260940610022e-06, + "loss": 0.4278, + "step": 9616 + }, + { + "epoch": 2.691575706689057, + "grad_norm": 0.2510840460673405, + "learning_rate": 2.8671102123189408e-06, + "loss": 0.4429, + "step": 9617 + }, + { + "epoch": 2.6918555835432407, + "grad_norm": 0.2603164784694938, + "learning_rate": 2.86196397016269e-06, + "loss": 0.4453, + "step": 9618 + }, + { + "epoch": 2.692135460397425, + "grad_norm": 0.2583463118208237, + "learning_rate": 2.856822214631094e-06, + "loss": 0.4746, + "step": 9619 + }, + { + "epoch": 2.6924153372516093, + "grad_norm": 0.2554763338118534, + "learning_rate": 2.8516849462135454e-06, + "loss": 0.4507, + "step": 9620 + }, + { + "epoch": 2.6926952141057936, + "grad_norm": 0.2585290063608683, + "learning_rate": 2.8465521653990135e-06, + "loss": 0.4483, + "step": 9621 + }, + { + "epoch": 2.6929750909599774, + "grad_norm": 0.2623714587562958, + "learning_rate": 2.8414238726760414e-06, + "loss": 0.447, + "step": 9622 + }, + { + "epoch": 2.6932549678141617, + "grad_norm": 0.2659255301786653, + "learning_rate": 2.8363000685327436e-06, + "loss": 0.466, + "step": 9623 + }, + { + "epoch": 2.693534844668346, + "grad_norm": 0.2609359558585346, + "learning_rate": 2.8311807534568024e-06, + "loss": 0.4496, + "step": 9624 + }, + { + "epoch": 2.6938147215225303, + "grad_norm": 0.26097482483463635, + "learning_rate": 2.826065927935473e-06, + "loss": 0.4396, + "step": 9625 + }, + { + "epoch": 2.694094598376714, + "grad_norm": 0.25537061492759044, + "learning_rate": 2.8209555924556042e-06, + "loss": 0.4691, + "step": 9626 + }, + { + "epoch": 2.6943744752308985, + "grad_norm": 0.25011120329684866, + "learning_rate": 2.8158497475035792e-06, + "loss": 0.4659, + "step": 9627 + }, + { + "epoch": 2.6946543520850827, + "grad_norm": 0.26208656472724784, + "learning_rate": 2.810748393565388e-06, + "loss": 0.4572, + "step": 9628 + }, + { + "epoch": 2.6949342289392666, + "grad_norm": 0.2606565840292971, + "learning_rate": 2.8056515311265795e-06, + "loss": 0.4587, + "step": 9629 + }, + { + "epoch": 2.695214105793451, + "grad_norm": 0.2589436096839342, + "learning_rate": 2.8005591606722727e-06, + "loss": 0.4438, + "step": 9630 + }, + { + "epoch": 2.695493982647635, + "grad_norm": 0.25800776062159353, + "learning_rate": 2.7954712826871622e-06, + "loss": 0.4357, + "step": 9631 + }, + { + "epoch": 2.695773859501819, + "grad_norm": 0.2608140540071773, + "learning_rate": 2.7903878976555163e-06, + "loss": 0.45, + "step": 9632 + }, + { + "epoch": 2.6960537363560033, + "grad_norm": 0.2650190509760145, + "learning_rate": 2.785309006061176e-06, + "loss": 0.4384, + "step": 9633 + }, + { + "epoch": 2.6963336132101876, + "grad_norm": 0.24931993941455413, + "learning_rate": 2.780234608387555e-06, + "loss": 0.446, + "step": 9634 + }, + { + "epoch": 2.6966134900643715, + "grad_norm": 0.2658399973734569, + "learning_rate": 2.7751647051176278e-06, + "loss": 0.4471, + "step": 9635 + }, + { + "epoch": 2.6968933669185557, + "grad_norm": 0.24952100274742145, + "learning_rate": 2.7700992967339633e-06, + "loss": 0.4729, + "step": 9636 + }, + { + "epoch": 2.69717324377274, + "grad_norm": 0.26158161361345517, + "learning_rate": 2.7650383837186823e-06, + "loss": 0.4642, + "step": 9637 + }, + { + "epoch": 2.697453120626924, + "grad_norm": 0.2631291321973273, + "learning_rate": 2.759981966553482e-06, + "loss": 0.4264, + "step": 9638 + }, + { + "epoch": 2.697732997481108, + "grad_norm": 0.2575367709015392, + "learning_rate": 2.754930045719645e-06, + "loss": 0.4597, + "step": 9639 + }, + { + "epoch": 2.6980128743352925, + "grad_norm": 0.2564659384915359, + "learning_rate": 2.7498826216980022e-06, + "loss": 0.4723, + "step": 9640 + }, + { + "epoch": 2.6982927511894768, + "grad_norm": 0.2662945491752931, + "learning_rate": 2.744839694968987e-06, + "loss": 0.4575, + "step": 9641 + }, + { + "epoch": 2.698572628043661, + "grad_norm": 0.2607535522450551, + "learning_rate": 2.739801266012576e-06, + "loss": 0.4441, + "step": 9642 + }, + { + "epoch": 2.698852504897845, + "grad_norm": 0.2568796561308157, + "learning_rate": 2.734767335308336e-06, + "loss": 0.4569, + "step": 9643 + }, + { + "epoch": 2.699132381752029, + "grad_norm": 0.2604173575784273, + "learning_rate": 2.7297379033353944e-06, + "loss": 0.4486, + "step": 9644 + }, + { + "epoch": 2.6994122586062135, + "grad_norm": 0.2546225387213733, + "learning_rate": 2.7247129705724572e-06, + "loss": 0.4546, + "step": 9645 + }, + { + "epoch": 2.6996921354603973, + "grad_norm": 0.24754408215495408, + "learning_rate": 2.7196925374977978e-06, + "loss": 0.4539, + "step": 9646 + }, + { + "epoch": 2.6999720123145816, + "grad_norm": 0.26432126577422416, + "learning_rate": 2.7146766045892722e-06, + "loss": 0.448, + "step": 9647 + }, + { + "epoch": 2.700251889168766, + "grad_norm": 0.25476615361813054, + "learning_rate": 2.709665172324288e-06, + "loss": 0.4516, + "step": 9648 + }, + { + "epoch": 2.7005317660229498, + "grad_norm": 0.26414146297212104, + "learning_rate": 2.7046582411798473e-06, + "loss": 0.4379, + "step": 9649 + }, + { + "epoch": 2.700811642877134, + "grad_norm": 0.2524114589093038, + "learning_rate": 2.6996558116325067e-06, + "loss": 0.4514, + "step": 9650 + }, + { + "epoch": 2.7010915197313183, + "grad_norm": 0.25452540330895845, + "learning_rate": 2.6946578841583968e-06, + "loss": 0.4602, + "step": 9651 + }, + { + "epoch": 2.701371396585502, + "grad_norm": 0.256703708724523, + "learning_rate": 2.689664459233232e-06, + "loss": 0.4305, + "step": 9652 + }, + { + "epoch": 2.7016512734396865, + "grad_norm": 0.26223148502940047, + "learning_rate": 2.684675537332287e-06, + "loss": 0.4298, + "step": 9653 + }, + { + "epoch": 2.7019311502938708, + "grad_norm": 0.262019513621444, + "learning_rate": 2.6796911189304053e-06, + "loss": 0.4517, + "step": 9654 + }, + { + "epoch": 2.7022110271480546, + "grad_norm": 0.2604320882640914, + "learning_rate": 2.6747112045020063e-06, + "loss": 0.4731, + "step": 9655 + }, + { + "epoch": 2.702490904002239, + "grad_norm": 0.2451978939009998, + "learning_rate": 2.669735794521089e-06, + "loss": 0.4373, + "step": 9656 + }, + { + "epoch": 2.702770780856423, + "grad_norm": 0.25363039131620124, + "learning_rate": 2.6647648894612076e-06, + "loss": 0.4396, + "step": 9657 + }, + { + "epoch": 2.7030506577106075, + "grad_norm": 0.263893339157257, + "learning_rate": 2.659798489795495e-06, + "loss": 0.4463, + "step": 9658 + }, + { + "epoch": 2.7033305345647913, + "grad_norm": 0.27918997221012776, + "learning_rate": 2.654836595996668e-06, + "loss": 0.4727, + "step": 9659 + }, + { + "epoch": 2.7036104114189756, + "grad_norm": 0.25622222685787027, + "learning_rate": 2.6498792085369873e-06, + "loss": 0.4431, + "step": 9660 + }, + { + "epoch": 2.70389028827316, + "grad_norm": 0.25115136845374564, + "learning_rate": 2.6449263278883086e-06, + "loss": 0.4451, + "step": 9661 + }, + { + "epoch": 2.704170165127344, + "grad_norm": 0.25241869160083336, + "learning_rate": 2.6399779545220493e-06, + "loss": 0.4358, + "step": 9662 + }, + { + "epoch": 2.704450041981528, + "grad_norm": 0.25012657718988424, + "learning_rate": 2.635034088909194e-06, + "loss": 0.4412, + "step": 9663 + }, + { + "epoch": 2.7047299188357123, + "grad_norm": 0.2529009342703803, + "learning_rate": 2.630094731520311e-06, + "loss": 0.4449, + "step": 9664 + }, + { + "epoch": 2.7050097956898966, + "grad_norm": 0.2529681464102911, + "learning_rate": 2.6251598828255177e-06, + "loss": 0.4213, + "step": 9665 + }, + { + "epoch": 2.7052896725440805, + "grad_norm": 0.24697254757555365, + "learning_rate": 2.620229543294528e-06, + "loss": 0.443, + "step": 9666 + }, + { + "epoch": 2.7055695493982648, + "grad_norm": 0.2564555043163987, + "learning_rate": 2.615303713396611e-06, + "loss": 0.4615, + "step": 9667 + }, + { + "epoch": 2.705849426252449, + "grad_norm": 0.2393032225739067, + "learning_rate": 2.6103823936006078e-06, + "loss": 0.4458, + "step": 9668 + }, + { + "epoch": 2.706129303106633, + "grad_norm": 0.254402736502425, + "learning_rate": 2.605465584374933e-06, + "loss": 0.4527, + "step": 9669 + }, + { + "epoch": 2.706409179960817, + "grad_norm": 0.25882846930758674, + "learning_rate": 2.6005532861875736e-06, + "loss": 0.473, + "step": 9670 + }, + { + "epoch": 2.7066890568150015, + "grad_norm": 0.26023853810341335, + "learning_rate": 2.5956454995060774e-06, + "loss": 0.4664, + "step": 9671 + }, + { + "epoch": 2.7069689336691853, + "grad_norm": 0.25544238437269734, + "learning_rate": 2.590742224797582e-06, + "loss": 0.4568, + "step": 9672 + }, + { + "epoch": 2.7072488105233696, + "grad_norm": 0.2591892632742536, + "learning_rate": 2.5858434625287753e-06, + "loss": 0.4673, + "step": 9673 + }, + { + "epoch": 2.707528687377554, + "grad_norm": 0.2537890934138441, + "learning_rate": 2.5809492131659285e-06, + "loss": 0.4512, + "step": 9674 + }, + { + "epoch": 2.7078085642317378, + "grad_norm": 0.2601607784858347, + "learning_rate": 2.5760594771748747e-06, + "loss": 0.4539, + "step": 9675 + }, + { + "epoch": 2.708088441085922, + "grad_norm": 0.2675710384101143, + "learning_rate": 2.571174255021025e-06, + "loss": 0.4671, + "step": 9676 + }, + { + "epoch": 2.7083683179401064, + "grad_norm": 0.253588464582648, + "learning_rate": 2.5662935471693573e-06, + "loss": 0.4419, + "step": 9677 + }, + { + "epoch": 2.7086481947942906, + "grad_norm": 0.25297096701091965, + "learning_rate": 2.5614173540844223e-06, + "loss": 0.4465, + "step": 9678 + }, + { + "epoch": 2.708928071648475, + "grad_norm": 0.25726382234904843, + "learning_rate": 2.556545676230332e-06, + "loss": 0.4463, + "step": 9679 + }, + { + "epoch": 2.709207948502659, + "grad_norm": 0.27082897903045283, + "learning_rate": 2.551678514070782e-06, + "loss": 0.4916, + "step": 9680 + }, + { + "epoch": 2.709487825356843, + "grad_norm": 0.24652697571702764, + "learning_rate": 2.5468158680690246e-06, + "loss": 0.4405, + "step": 9681 + }, + { + "epoch": 2.7097677022110274, + "grad_norm": 0.24925217968179736, + "learning_rate": 2.5419577386879002e-06, + "loss": 0.4427, + "step": 9682 + }, + { + "epoch": 2.710047579065211, + "grad_norm": 0.26476711391996977, + "learning_rate": 2.537104126389794e-06, + "loss": 0.4589, + "step": 9683 + }, + { + "epoch": 2.7103274559193955, + "grad_norm": 0.25447263864085196, + "learning_rate": 2.532255031636688e-06, + "loss": 0.4353, + "step": 9684 + }, + { + "epoch": 2.71060733277358, + "grad_norm": 0.2563979710447523, + "learning_rate": 2.5274104548901166e-06, + "loss": 0.4615, + "step": 9685 + }, + { + "epoch": 2.7108872096277636, + "grad_norm": 0.25158898126609197, + "learning_rate": 2.52257039661119e-06, + "loss": 0.4488, + "step": 9686 + }, + { + "epoch": 2.711167086481948, + "grad_norm": 0.2584423608435234, + "learning_rate": 2.51773485726059e-06, + "loss": 0.464, + "step": 9687 + }, + { + "epoch": 2.711446963336132, + "grad_norm": 0.2603578049857069, + "learning_rate": 2.512903837298558e-06, + "loss": 0.4399, + "step": 9688 + }, + { + "epoch": 2.711726840190316, + "grad_norm": 0.26123083105238193, + "learning_rate": 2.508077337184922e-06, + "loss": 0.4648, + "step": 9689 + }, + { + "epoch": 2.7120067170445004, + "grad_norm": 0.2469999078727203, + "learning_rate": 2.503255357379064e-06, + "loss": 0.4414, + "step": 9690 + }, + { + "epoch": 2.7122865938986847, + "grad_norm": 0.25414093190683257, + "learning_rate": 2.4984378983399504e-06, + "loss": 0.4331, + "step": 9691 + }, + { + "epoch": 2.7125664707528685, + "grad_norm": 0.24754182443592454, + "learning_rate": 2.4936249605261032e-06, + "loss": 0.4328, + "step": 9692 + }, + { + "epoch": 2.712846347607053, + "grad_norm": 0.2720956891822757, + "learning_rate": 2.4888165443956225e-06, + "loss": 0.4463, + "step": 9693 + }, + { + "epoch": 2.713126224461237, + "grad_norm": 0.2625198655714074, + "learning_rate": 2.484012650406176e-06, + "loss": 0.4444, + "step": 9694 + }, + { + "epoch": 2.7134061013154214, + "grad_norm": 0.250325071522633, + "learning_rate": 2.4792132790150026e-06, + "loss": 0.4561, + "step": 9695 + }, + { + "epoch": 2.713685978169605, + "grad_norm": 0.25871354229605087, + "learning_rate": 2.4744184306789042e-06, + "loss": 0.4501, + "step": 9696 + }, + { + "epoch": 2.7139658550237895, + "grad_norm": 0.2663409146154799, + "learning_rate": 2.469628105854266e-06, + "loss": 0.4665, + "step": 9697 + }, + { + "epoch": 2.714245731877974, + "grad_norm": 0.2572069481207055, + "learning_rate": 2.464842304997023e-06, + "loss": 0.4605, + "step": 9698 + }, + { + "epoch": 2.714525608732158, + "grad_norm": 0.25399706969156943, + "learning_rate": 2.460061028562699e-06, + "loss": 0.4532, + "step": 9699 + }, + { + "epoch": 2.714805485586342, + "grad_norm": 0.2574344431098343, + "learning_rate": 2.4552842770063757e-06, + "loss": 0.452, + "step": 9700 + }, + { + "epoch": 2.7150853624405262, + "grad_norm": 0.2578065467749837, + "learning_rate": 2.4505120507827105e-06, + "loss": 0.4564, + "step": 9701 + }, + { + "epoch": 2.7153652392947105, + "grad_norm": 0.2515542162601119, + "learning_rate": 2.445744350345919e-06, + "loss": 0.4532, + "step": 9702 + }, + { + "epoch": 2.7156451161488944, + "grad_norm": 0.26410474275459034, + "learning_rate": 2.440981176149798e-06, + "loss": 0.4465, + "step": 9703 + }, + { + "epoch": 2.7159249930030787, + "grad_norm": 0.25209619717054277, + "learning_rate": 2.436222528647708e-06, + "loss": 0.4451, + "step": 9704 + }, + { + "epoch": 2.716204869857263, + "grad_norm": 0.2510633138055131, + "learning_rate": 2.431468408292581e-06, + "loss": 0.4477, + "step": 9705 + }, + { + "epoch": 2.716484746711447, + "grad_norm": 0.27023137077398746, + "learning_rate": 2.4267188155369157e-06, + "loss": 0.4689, + "step": 9706 + }, + { + "epoch": 2.716764623565631, + "grad_norm": 0.25424533967816504, + "learning_rate": 2.4219737508327788e-06, + "loss": 0.4666, + "step": 9707 + }, + { + "epoch": 2.7170445004198154, + "grad_norm": 0.26711550608448836, + "learning_rate": 2.4172332146318145e-06, + "loss": 0.444, + "step": 9708 + }, + { + "epoch": 2.7173243772739992, + "grad_norm": 0.2612666601638906, + "learning_rate": 2.412497207385228e-06, + "loss": 0.4565, + "step": 9709 + }, + { + "epoch": 2.7176042541281835, + "grad_norm": 0.25180959549909626, + "learning_rate": 2.4077657295437872e-06, + "loss": 0.436, + "step": 9710 + }, + { + "epoch": 2.717884130982368, + "grad_norm": 0.2537135056863, + "learning_rate": 2.403038781557848e-06, + "loss": 0.4567, + "step": 9711 + }, + { + "epoch": 2.7181640078365517, + "grad_norm": 0.2666149702062973, + "learning_rate": 2.3983163638773175e-06, + "loss": 0.4445, + "step": 9712 + }, + { + "epoch": 2.718443884690736, + "grad_norm": 0.25902707631329347, + "learning_rate": 2.3935984769516807e-06, + "loss": 0.4209, + "step": 9713 + }, + { + "epoch": 2.7187237615449202, + "grad_norm": 0.2588325646202454, + "learning_rate": 2.3888851212299832e-06, + "loss": 0.4675, + "step": 9714 + }, + { + "epoch": 2.7190036383991045, + "grad_norm": 0.2562007232326913, + "learning_rate": 2.3841762971608506e-06, + "loss": 0.4555, + "step": 9715 + }, + { + "epoch": 2.719283515253289, + "grad_norm": 0.25269977295520657, + "learning_rate": 2.3794720051924677e-06, + "loss": 0.4507, + "step": 9716 + }, + { + "epoch": 2.7195633921074727, + "grad_norm": 0.2525499154762477, + "learning_rate": 2.3747722457725996e-06, + "loss": 0.471, + "step": 9717 + }, + { + "epoch": 2.719843268961657, + "grad_norm": 0.2579482568280547, + "learning_rate": 2.37007701934856e-06, + "loss": 0.4394, + "step": 9718 + }, + { + "epoch": 2.7201231458158412, + "grad_norm": 0.2508807784812661, + "learning_rate": 2.3653863263672527e-06, + "loss": 0.4592, + "step": 9719 + }, + { + "epoch": 2.720403022670025, + "grad_norm": 0.2530849878278066, + "learning_rate": 2.360700167275137e-06, + "loss": 0.4403, + "step": 9720 + }, + { + "epoch": 2.7206828995242094, + "grad_norm": 0.25064367633886986, + "learning_rate": 2.3560185425182458e-06, + "loss": 0.4546, + "step": 9721 + }, + { + "epoch": 2.7209627763783937, + "grad_norm": 0.24754252052662992, + "learning_rate": 2.351341452542177e-06, + "loss": 0.4347, + "step": 9722 + }, + { + "epoch": 2.7212426532325775, + "grad_norm": 0.24698393700365023, + "learning_rate": 2.3466688977920923e-06, + "loss": 0.4413, + "step": 9723 + }, + { + "epoch": 2.721522530086762, + "grad_norm": 0.25850155854762463, + "learning_rate": 2.342000878712741e-06, + "loss": 0.4613, + "step": 9724 + }, + { + "epoch": 2.721802406940946, + "grad_norm": 0.2603882550246586, + "learning_rate": 2.3373373957484233e-06, + "loss": 0.4463, + "step": 9725 + }, + { + "epoch": 2.72208228379513, + "grad_norm": 0.2619025457880354, + "learning_rate": 2.3326784493430066e-06, + "loss": 0.4585, + "step": 9726 + }, + { + "epoch": 2.7223621606493142, + "grad_norm": 0.2463781540172044, + "learning_rate": 2.3280240399399356e-06, + "loss": 0.4577, + "step": 9727 + }, + { + "epoch": 2.7226420375034985, + "grad_norm": 0.2491900824207974, + "learning_rate": 2.3233741679822173e-06, + "loss": 0.4396, + "step": 9728 + }, + { + "epoch": 2.7229219143576824, + "grad_norm": 0.2570978650724168, + "learning_rate": 2.318728833912437e-06, + "loss": 0.4268, + "step": 9729 + }, + { + "epoch": 2.7232017912118667, + "grad_norm": 0.27026489858051084, + "learning_rate": 2.314088038172729e-06, + "loss": 0.4504, + "step": 9730 + }, + { + "epoch": 2.723481668066051, + "grad_norm": 0.26183851864325597, + "learning_rate": 2.309451781204819e-06, + "loss": 0.4676, + "step": 9731 + }, + { + "epoch": 2.7237615449202353, + "grad_norm": 0.25103174510057086, + "learning_rate": 2.3048200634499752e-06, + "loss": 0.4604, + "step": 9732 + }, + { + "epoch": 2.724041421774419, + "grad_norm": 0.288239760592007, + "learning_rate": 2.3001928853490563e-06, + "loss": 0.443, + "step": 9733 + }, + { + "epoch": 2.7243212986286034, + "grad_norm": 0.24397708267580392, + "learning_rate": 2.2955702473424824e-06, + "loss": 0.4574, + "step": 9734 + }, + { + "epoch": 2.7246011754827877, + "grad_norm": 0.2504832668909398, + "learning_rate": 2.290952149870229e-06, + "loss": 0.4471, + "step": 9735 + }, + { + "epoch": 2.724881052336972, + "grad_norm": 0.2578583590442245, + "learning_rate": 2.2863385933718506e-06, + "loss": 0.4737, + "step": 9736 + }, + { + "epoch": 2.725160929191156, + "grad_norm": 0.25663532849515097, + "learning_rate": 2.281729578286479e-06, + "loss": 0.4547, + "step": 9737 + }, + { + "epoch": 2.72544080604534, + "grad_norm": 0.2548716365918792, + "learning_rate": 2.277125105052791e-06, + "loss": 0.4399, + "step": 9738 + }, + { + "epoch": 2.7257206828995244, + "grad_norm": 0.26531838813698677, + "learning_rate": 2.272525174109047e-06, + "loss": 0.4529, + "step": 9739 + }, + { + "epoch": 2.7260005597537083, + "grad_norm": 0.2634166447757077, + "learning_rate": 2.267929785893069e-06, + "loss": 0.4678, + "step": 9740 + }, + { + "epoch": 2.7262804366078925, + "grad_norm": 0.24376956925000517, + "learning_rate": 2.263338940842258e-06, + "loss": 0.4472, + "step": 9741 + }, + { + "epoch": 2.726560313462077, + "grad_norm": 0.2613972036128885, + "learning_rate": 2.2587526393935575e-06, + "loss": 0.4561, + "step": 9742 + }, + { + "epoch": 2.7268401903162607, + "grad_norm": 0.26207387859482995, + "learning_rate": 2.2541708819835083e-06, + "loss": 0.483, + "step": 9743 + }, + { + "epoch": 2.727120067170445, + "grad_norm": 0.2584530797788964, + "learning_rate": 2.2495936690481943e-06, + "loss": 0.4611, + "step": 9744 + }, + { + "epoch": 2.7273999440246293, + "grad_norm": 0.25829123389903674, + "learning_rate": 2.2450210010232895e-06, + "loss": 0.456, + "step": 9745 + }, + { + "epoch": 2.727679820878813, + "grad_norm": 0.25503319866970714, + "learning_rate": 2.240452878344007e-06, + "loss": 0.4476, + "step": 9746 + }, + { + "epoch": 2.7279596977329974, + "grad_norm": 0.25741869424005215, + "learning_rate": 2.23588930144516e-06, + "loss": 0.4738, + "step": 9747 + }, + { + "epoch": 2.7282395745871817, + "grad_norm": 0.2543053628985632, + "learning_rate": 2.2313302707611006e-06, + "loss": 0.4474, + "step": 9748 + }, + { + "epoch": 2.7285194514413655, + "grad_norm": 0.2771948450205726, + "learning_rate": 2.22677578672576e-06, + "loss": 0.4772, + "step": 9749 + }, + { + "epoch": 2.72879932829555, + "grad_norm": 0.2639708823854919, + "learning_rate": 2.222225849772641e-06, + "loss": 0.4595, + "step": 9750 + }, + { + "epoch": 2.729079205149734, + "grad_norm": 0.25593857277096793, + "learning_rate": 2.217680460334809e-06, + "loss": 0.4306, + "step": 9751 + }, + { + "epoch": 2.7293590820039184, + "grad_norm": 0.2561656048222062, + "learning_rate": 2.2131396188449005e-06, + "loss": 0.443, + "step": 9752 + }, + { + "epoch": 2.7296389588581027, + "grad_norm": 0.26033103658113366, + "learning_rate": 2.2086033257351035e-06, + "loss": 0.4528, + "step": 9753 + }, + { + "epoch": 2.7299188357122866, + "grad_norm": 0.2620716770154568, + "learning_rate": 2.2040715814371947e-06, + "loss": 0.438, + "step": 9754 + }, + { + "epoch": 2.730198712566471, + "grad_norm": 0.25569202205132774, + "learning_rate": 2.199544386382507e-06, + "loss": 0.4514, + "step": 9755 + }, + { + "epoch": 2.730478589420655, + "grad_norm": 0.2457195033787847, + "learning_rate": 2.1950217410019348e-06, + "loss": 0.4407, + "step": 9756 + }, + { + "epoch": 2.730758466274839, + "grad_norm": 0.25164064702847944, + "learning_rate": 2.1905036457259555e-06, + "loss": 0.4479, + "step": 9757 + }, + { + "epoch": 2.7310383431290233, + "grad_norm": 0.24407379992726314, + "learning_rate": 2.1859901009845974e-06, + "loss": 0.4382, + "step": 9758 + }, + { + "epoch": 2.7313182199832076, + "grad_norm": 0.24912827766097292, + "learning_rate": 2.181481107207467e-06, + "loss": 0.4634, + "step": 9759 + }, + { + "epoch": 2.7315980968373914, + "grad_norm": 0.2605340914466241, + "learning_rate": 2.1769766648237265e-06, + "loss": 0.4404, + "step": 9760 + }, + { + "epoch": 2.7318779736915757, + "grad_norm": 0.2580613226425842, + "learning_rate": 2.1724767742621167e-06, + "loss": 0.4762, + "step": 9761 + }, + { + "epoch": 2.73215785054576, + "grad_norm": 0.2603137926796265, + "learning_rate": 2.1679814359509333e-06, + "loss": 0.4495, + "step": 9762 + }, + { + "epoch": 2.732437727399944, + "grad_norm": 0.2524330504497244, + "learning_rate": 2.1634906503180565e-06, + "loss": 0.4324, + "step": 9763 + }, + { + "epoch": 2.732717604254128, + "grad_norm": 0.260142923673047, + "learning_rate": 2.159004417790911e-06, + "loss": 0.4768, + "step": 9764 + }, + { + "epoch": 2.7329974811083124, + "grad_norm": 0.26635278369881427, + "learning_rate": 2.154522738796505e-06, + "loss": 0.4342, + "step": 9765 + }, + { + "epoch": 2.7332773579624963, + "grad_norm": 0.2473458112234101, + "learning_rate": 2.1500456137614033e-06, + "loss": 0.4337, + "step": 9766 + }, + { + "epoch": 2.7335572348166806, + "grad_norm": 0.25070281196189426, + "learning_rate": 2.1455730431117427e-06, + "loss": 0.4589, + "step": 9767 + }, + { + "epoch": 2.733837111670865, + "grad_norm": 0.252400866560634, + "learning_rate": 2.141105027273227e-06, + "loss": 0.4426, + "step": 9768 + }, + { + "epoch": 2.734116988525049, + "grad_norm": 0.25260736603149075, + "learning_rate": 2.136641566671116e-06, + "loss": 0.451, + "step": 9769 + }, + { + "epoch": 2.734396865379233, + "grad_norm": 0.2515553157562186, + "learning_rate": 2.132182661730253e-06, + "loss": 0.435, + "step": 9770 + }, + { + "epoch": 2.7346767422334173, + "grad_norm": 0.26044560295493985, + "learning_rate": 2.1277283128750437e-06, + "loss": 0.4585, + "step": 9771 + }, + { + "epoch": 2.7349566190876016, + "grad_norm": 0.2534114378150915, + "learning_rate": 2.123278520529448e-06, + "loss": 0.4475, + "step": 9772 + }, + { + "epoch": 2.735236495941786, + "grad_norm": 0.2529724150240257, + "learning_rate": 2.1188332851169946e-06, + "loss": 0.465, + "step": 9773 + }, + { + "epoch": 2.7355163727959697, + "grad_norm": 0.2492456543986863, + "learning_rate": 2.1143926070607945e-06, + "loss": 0.4436, + "step": 9774 + }, + { + "epoch": 2.735796249650154, + "grad_norm": 0.2505347128345977, + "learning_rate": 2.1099564867835098e-06, + "loss": 0.4406, + "step": 9775 + }, + { + "epoch": 2.7360761265043383, + "grad_norm": 0.2562143674896613, + "learning_rate": 2.1055249247073638e-06, + "loss": 0.4617, + "step": 9776 + }, + { + "epoch": 2.736356003358522, + "grad_norm": 0.2445628376186289, + "learning_rate": 2.1010979212541692e-06, + "loss": 0.4317, + "step": 9777 + }, + { + "epoch": 2.7366358802127064, + "grad_norm": 0.24936755921346454, + "learning_rate": 2.0966754768452836e-06, + "loss": 0.4415, + "step": 9778 + }, + { + "epoch": 2.7369157570668907, + "grad_norm": 0.24447213129673334, + "learning_rate": 2.092257591901642e-06, + "loss": 0.4409, + "step": 9779 + }, + { + "epoch": 2.7371956339210746, + "grad_norm": 0.2503946767854177, + "learning_rate": 2.087844266843736e-06, + "loss": 0.4457, + "step": 9780 + }, + { + "epoch": 2.737475510775259, + "grad_norm": 0.25055245006537563, + "learning_rate": 2.0834355020916296e-06, + "loss": 0.4338, + "step": 9781 + }, + { + "epoch": 2.737755387629443, + "grad_norm": 0.26274715786940184, + "learning_rate": 2.0790312980649485e-06, + "loss": 0.4538, + "step": 9782 + }, + { + "epoch": 2.738035264483627, + "grad_norm": 0.24467616380968663, + "learning_rate": 2.0746316551828958e-06, + "loss": 0.4498, + "step": 9783 + }, + { + "epoch": 2.7383151413378113, + "grad_norm": 0.24718713979995247, + "learning_rate": 2.070236573864226e-06, + "loss": 0.4317, + "step": 9784 + }, + { + "epoch": 2.7385950181919956, + "grad_norm": 0.24938040432107095, + "learning_rate": 2.065846054527265e-06, + "loss": 0.4307, + "step": 9785 + }, + { + "epoch": 2.7388748950461794, + "grad_norm": 0.25620621588726555, + "learning_rate": 2.061460097589907e-06, + "loss": 0.4271, + "step": 9786 + }, + { + "epoch": 2.7391547719003637, + "grad_norm": 0.2443597292643001, + "learning_rate": 2.0570787034696117e-06, + "loss": 0.4573, + "step": 9787 + }, + { + "epoch": 2.739434648754548, + "grad_norm": 0.2431003346385221, + "learning_rate": 2.05270187258339e-06, + "loss": 0.4552, + "step": 9788 + }, + { + "epoch": 2.7397145256087323, + "grad_norm": 0.2622162635281076, + "learning_rate": 2.0483296053478596e-06, + "loss": 0.4545, + "step": 9789 + }, + { + "epoch": 2.7399944024629166, + "grad_norm": 0.24673910627636125, + "learning_rate": 2.043961902179148e-06, + "loss": 0.4311, + "step": 9790 + }, + { + "epoch": 2.7402742793171004, + "grad_norm": 0.26986549912269525, + "learning_rate": 2.0395987634929835e-06, + "loss": 0.4602, + "step": 9791 + }, + { + "epoch": 2.7405541561712847, + "grad_norm": 0.2584870948790133, + "learning_rate": 2.0352401897046514e-06, + "loss": 0.4574, + "step": 9792 + }, + { + "epoch": 2.740834033025469, + "grad_norm": 0.2717921986704552, + "learning_rate": 2.030886181229008e-06, + "loss": 0.4705, + "step": 9793 + }, + { + "epoch": 2.741113909879653, + "grad_norm": 0.2598638961512423, + "learning_rate": 2.026536738480467e-06, + "loss": 0.4484, + "step": 9794 + }, + { + "epoch": 2.741393786733837, + "grad_norm": 0.2676571347095541, + "learning_rate": 2.022191861873013e-06, + "loss": 0.4689, + "step": 9795 + }, + { + "epoch": 2.7416736635880214, + "grad_norm": 0.2555603117542369, + "learning_rate": 2.017851551820188e-06, + "loss": 0.4426, + "step": 9796 + }, + { + "epoch": 2.7419535404422053, + "grad_norm": 0.26338405397950065, + "learning_rate": 2.0135158087351116e-06, + "loss": 0.4632, + "step": 9797 + }, + { + "epoch": 2.7422334172963896, + "grad_norm": 0.24756470929017552, + "learning_rate": 2.0091846330304587e-06, + "loss": 0.4338, + "step": 9798 + }, + { + "epoch": 2.742513294150574, + "grad_norm": 0.24951686147478658, + "learning_rate": 2.004858025118472e-06, + "loss": 0.4353, + "step": 9799 + }, + { + "epoch": 2.7427931710047577, + "grad_norm": 0.26044379006654206, + "learning_rate": 2.0005359854109674e-06, + "loss": 0.443, + "step": 9800 + }, + { + "epoch": 2.743073047858942, + "grad_norm": 0.2541233637604626, + "learning_rate": 1.9962185143193146e-06, + "loss": 0.4613, + "step": 9801 + }, + { + "epoch": 2.7433529247131263, + "grad_norm": 0.2422587448045734, + "learning_rate": 1.9919056122544465e-06, + "loss": 0.4455, + "step": 9802 + }, + { + "epoch": 2.74363280156731, + "grad_norm": 0.26598242085573076, + "learning_rate": 1.9875972796268796e-06, + "loss": 0.4491, + "step": 9803 + }, + { + "epoch": 2.7439126784214944, + "grad_norm": 0.2367366865239579, + "learning_rate": 1.9832935168466737e-06, + "loss": 0.4535, + "step": 9804 + }, + { + "epoch": 2.7441925552756787, + "grad_norm": 0.25565944852140315, + "learning_rate": 1.978994324323463e-06, + "loss": 0.4411, + "step": 9805 + }, + { + "epoch": 2.744472432129863, + "grad_norm": 0.265656398995227, + "learning_rate": 1.9746997024664537e-06, + "loss": 0.4605, + "step": 9806 + }, + { + "epoch": 2.744752308984047, + "grad_norm": 0.2506083541144176, + "learning_rate": 1.9704096516844185e-06, + "loss": 0.4279, + "step": 9807 + }, + { + "epoch": 2.745032185838231, + "grad_norm": 0.2536719791358648, + "learning_rate": 1.966124172385664e-06, + "loss": 0.4332, + "step": 9808 + }, + { + "epoch": 2.7453120626924155, + "grad_norm": 0.2491911982119003, + "learning_rate": 1.9618432649781026e-06, + "loss": 0.4568, + "step": 9809 + }, + { + "epoch": 2.7455919395465997, + "grad_norm": 0.25381163034410015, + "learning_rate": 1.957566929869181e-06, + "loss": 0.4625, + "step": 9810 + }, + { + "epoch": 2.7458718164007836, + "grad_norm": 0.249212606963689, + "learning_rate": 1.953295167465935e-06, + "loss": 0.4503, + "step": 9811 + }, + { + "epoch": 2.746151693254968, + "grad_norm": 0.25293889988967516, + "learning_rate": 1.9490279781749444e-06, + "loss": 0.4497, + "step": 9812 + }, + { + "epoch": 2.746431570109152, + "grad_norm": 0.25946744619315143, + "learning_rate": 1.9447653624023677e-06, + "loss": 0.4376, + "step": 9813 + }, + { + "epoch": 2.746711446963336, + "grad_norm": 0.2522338245764953, + "learning_rate": 1.940507320553925e-06, + "loss": 0.4538, + "step": 9814 + }, + { + "epoch": 2.7469913238175203, + "grad_norm": 0.26342991229246765, + "learning_rate": 1.9362538530348916e-06, + "loss": 0.4576, + "step": 9815 + }, + { + "epoch": 2.7472712006717046, + "grad_norm": 0.2535452965318185, + "learning_rate": 1.9320049602501166e-06, + "loss": 0.4542, + "step": 9816 + }, + { + "epoch": 2.7475510775258885, + "grad_norm": 0.2533952199194951, + "learning_rate": 1.9277606426040206e-06, + "loss": 0.4594, + "step": 9817 + }, + { + "epoch": 2.7478309543800727, + "grad_norm": 0.2583910405217963, + "learning_rate": 1.9235209005005693e-06, + "loss": 0.4669, + "step": 9818 + }, + { + "epoch": 2.748110831234257, + "grad_norm": 0.2498732511385406, + "learning_rate": 1.919285734343307e-06, + "loss": 0.4412, + "step": 9819 + }, + { + "epoch": 2.748390708088441, + "grad_norm": 0.26116816402370263, + "learning_rate": 1.915055144535344e-06, + "loss": 0.4558, + "step": 9820 + }, + { + "epoch": 2.748670584942625, + "grad_norm": 0.25829591909850785, + "learning_rate": 1.910829131479347e-06, + "loss": 0.4623, + "step": 9821 + }, + { + "epoch": 2.7489504617968095, + "grad_norm": 0.24712886039603282, + "learning_rate": 1.9066076955775392e-06, + "loss": 0.4601, + "step": 9822 + }, + { + "epoch": 2.7492303386509933, + "grad_norm": 0.24310791930278508, + "learning_rate": 1.9023908372317434e-06, + "loss": 0.44, + "step": 9823 + }, + { + "epoch": 2.7495102155051776, + "grad_norm": 0.24835019957768184, + "learning_rate": 1.8981785568433052e-06, + "loss": 0.413, + "step": 9824 + }, + { + "epoch": 2.749790092359362, + "grad_norm": 0.24464392895275985, + "learning_rate": 1.8939708548131596e-06, + "loss": 0.4617, + "step": 9825 + }, + { + "epoch": 2.750069969213546, + "grad_norm": 0.2633395189470949, + "learning_rate": 1.8897677315417917e-06, + "loss": 0.4657, + "step": 9826 + }, + { + "epoch": 2.7503498460677305, + "grad_norm": 0.2503926669341791, + "learning_rate": 1.885569187429259e-06, + "loss": 0.4652, + "step": 9827 + }, + { + "epoch": 2.7506297229219143, + "grad_norm": 0.27174935999295685, + "learning_rate": 1.8813752228751813e-06, + "loss": 0.4748, + "step": 9828 + }, + { + "epoch": 2.7509095997760986, + "grad_norm": 0.25368284762597176, + "learning_rate": 1.877185838278739e-06, + "loss": 0.453, + "step": 9829 + }, + { + "epoch": 2.751189476630283, + "grad_norm": 0.2604977777841358, + "learning_rate": 1.8730010340386906e-06, + "loss": 0.4457, + "step": 9830 + }, + { + "epoch": 2.7514693534844668, + "grad_norm": 0.25199200486936535, + "learning_rate": 1.8688208105533345e-06, + "loss": 0.4571, + "step": 9831 + }, + { + "epoch": 2.751749230338651, + "grad_norm": 0.2564707300617125, + "learning_rate": 1.8646451682205634e-06, + "loss": 0.4473, + "step": 9832 + }, + { + "epoch": 2.7520291071928353, + "grad_norm": 0.2465814274437075, + "learning_rate": 1.8604741074377985e-06, + "loss": 0.441, + "step": 9833 + }, + { + "epoch": 2.752308984047019, + "grad_norm": 0.24819787350402822, + "learning_rate": 1.856307628602061e-06, + "loss": 0.434, + "step": 9834 + }, + { + "epoch": 2.7525888609012035, + "grad_norm": 0.2522458208535914, + "learning_rate": 1.8521457321099056e-06, + "loss": 0.4555, + "step": 9835 + }, + { + "epoch": 2.7528687377553878, + "grad_norm": 0.25325237798506217, + "learning_rate": 1.8479884183574657e-06, + "loss": 0.4486, + "step": 9836 + }, + { + "epoch": 2.7531486146095716, + "grad_norm": 0.2512687359912989, + "learning_rate": 1.8438356877404472e-06, + "loss": 0.4463, + "step": 9837 + }, + { + "epoch": 2.753428491463756, + "grad_norm": 0.24482383859662027, + "learning_rate": 1.8396875406541003e-06, + "loss": 0.4266, + "step": 9838 + }, + { + "epoch": 2.75370836831794, + "grad_norm": 0.255910446221892, + "learning_rate": 1.835543977493248e-06, + "loss": 0.4702, + "step": 9839 + }, + { + "epoch": 2.753988245172124, + "grad_norm": 0.24189453109675438, + "learning_rate": 1.8314049986522697e-06, + "loss": 0.4319, + "step": 9840 + }, + { + "epoch": 2.7542681220263083, + "grad_norm": 0.25312095976269955, + "learning_rate": 1.8272706045251386e-06, + "loss": 0.4498, + "step": 9841 + }, + { + "epoch": 2.7545479988804926, + "grad_norm": 0.24774475119856862, + "learning_rate": 1.8231407955053515e-06, + "loss": 0.4422, + "step": 9842 + }, + { + "epoch": 2.7548278757346765, + "grad_norm": 0.24530932973762928, + "learning_rate": 1.8190155719859937e-06, + "loss": 0.4435, + "step": 9843 + }, + { + "epoch": 2.7551077525888608, + "grad_norm": 0.2486010765037517, + "learning_rate": 1.8148949343596955e-06, + "loss": 0.4402, + "step": 9844 + }, + { + "epoch": 2.755387629443045, + "grad_norm": 0.25405471905500593, + "learning_rate": 1.8107788830186657e-06, + "loss": 0.4466, + "step": 9845 + }, + { + "epoch": 2.7556675062972293, + "grad_norm": 0.25319744713438, + "learning_rate": 1.8066674183546796e-06, + "loss": 0.466, + "step": 9846 + }, + { + "epoch": 2.7559473831514136, + "grad_norm": 0.24416571032093162, + "learning_rate": 1.802560540759063e-06, + "loss": 0.4308, + "step": 9847 + }, + { + "epoch": 2.7562272600055975, + "grad_norm": 0.25587903696865794, + "learning_rate": 1.7984582506227087e-06, + "loss": 0.4623, + "step": 9848 + }, + { + "epoch": 2.7565071368597818, + "grad_norm": 0.27282076814942396, + "learning_rate": 1.794360548336077e-06, + "loss": 0.4604, + "step": 9849 + }, + { + "epoch": 2.756787013713966, + "grad_norm": 0.24360921050416073, + "learning_rate": 1.7902674342891945e-06, + "loss": 0.4324, + "step": 9850 + }, + { + "epoch": 2.75706689056815, + "grad_norm": 0.2666268305331878, + "learning_rate": 1.7861789088716385e-06, + "loss": 0.442, + "step": 9851 + }, + { + "epoch": 2.757346767422334, + "grad_norm": 0.2556956994805931, + "learning_rate": 1.7820949724725644e-06, + "loss": 0.4492, + "step": 9852 + }, + { + "epoch": 2.7576266442765185, + "grad_norm": 0.2497913576433295, + "learning_rate": 1.7780156254806779e-06, + "loss": 0.4321, + "step": 9853 + }, + { + "epoch": 2.7579065211307023, + "grad_norm": 0.26680275607938037, + "learning_rate": 1.7739408682842519e-06, + "loss": 0.4459, + "step": 9854 + }, + { + "epoch": 2.7581863979848866, + "grad_norm": 0.24563229509684617, + "learning_rate": 1.7698707012711313e-06, + "loss": 0.4331, + "step": 9855 + }, + { + "epoch": 2.758466274839071, + "grad_norm": 0.2588336788891555, + "learning_rate": 1.7658051248287066e-06, + "loss": 0.4515, + "step": 9856 + }, + { + "epoch": 2.7587461516932548, + "grad_norm": 0.26412216104427755, + "learning_rate": 1.761744139343946e-06, + "loss": 0.4487, + "step": 9857 + }, + { + "epoch": 2.759026028547439, + "grad_norm": 0.26252899037835437, + "learning_rate": 1.7576877452033847e-06, + "loss": 0.4389, + "step": 9858 + }, + { + "epoch": 2.7593059054016233, + "grad_norm": 0.2512836814322738, + "learning_rate": 1.7536359427931083e-06, + "loss": 0.4403, + "step": 9859 + }, + { + "epoch": 2.759585782255807, + "grad_norm": 0.26114576787388766, + "learning_rate": 1.7495887324987636e-06, + "loss": 0.4584, + "step": 9860 + }, + { + "epoch": 2.7598656591099915, + "grad_norm": 0.24786863869500758, + "learning_rate": 1.7455461147055819e-06, + "loss": 0.4467, + "step": 9861 + }, + { + "epoch": 2.760145535964176, + "grad_norm": 0.25527130359432465, + "learning_rate": 1.7415080897983215e-06, + "loss": 0.4499, + "step": 9862 + }, + { + "epoch": 2.76042541281836, + "grad_norm": 0.25850781816512897, + "learning_rate": 1.737474658161331e-06, + "loss": 0.4488, + "step": 9863 + }, + { + "epoch": 2.760705289672544, + "grad_norm": 0.24088271779080977, + "learning_rate": 1.7334458201785143e-06, + "loss": 0.4212, + "step": 9864 + }, + { + "epoch": 2.760985166526728, + "grad_norm": 0.24949668871945238, + "learning_rate": 1.7294215762333422e-06, + "loss": 0.4284, + "step": 9865 + }, + { + "epoch": 2.7612650433809125, + "grad_norm": 0.25583877952988887, + "learning_rate": 1.7254019267088472e-06, + "loss": 0.4682, + "step": 9866 + }, + { + "epoch": 2.761544920235097, + "grad_norm": 0.2638343758900408, + "learning_rate": 1.7213868719876125e-06, + "loss": 0.4408, + "step": 9867 + }, + { + "epoch": 2.7618247970892806, + "grad_norm": 0.26082987065987406, + "learning_rate": 1.7173764124517987e-06, + "loss": 0.4546, + "step": 9868 + }, + { + "epoch": 2.762104673943465, + "grad_norm": 0.24988160273768262, + "learning_rate": 1.7133705484831231e-06, + "loss": 0.4305, + "step": 9869 + }, + { + "epoch": 2.762384550797649, + "grad_norm": 0.2515710888517263, + "learning_rate": 1.7093692804628635e-06, + "loss": 0.4361, + "step": 9870 + }, + { + "epoch": 2.762664427651833, + "grad_norm": 0.2552183979411106, + "learning_rate": 1.7053726087718712e-06, + "loss": 0.4653, + "step": 9871 + }, + { + "epoch": 2.7629443045060174, + "grad_norm": 0.2537436909675899, + "learning_rate": 1.701380533790542e-06, + "loss": 0.4345, + "step": 9872 + }, + { + "epoch": 2.7632241813602016, + "grad_norm": 0.2500433034108333, + "learning_rate": 1.6973930558988438e-06, + "loss": 0.4502, + "step": 9873 + }, + { + "epoch": 2.7635040582143855, + "grad_norm": 0.26169076283954357, + "learning_rate": 1.6934101754763122e-06, + "loss": 0.4419, + "step": 9874 + }, + { + "epoch": 2.76378393506857, + "grad_norm": 0.27014196100755844, + "learning_rate": 1.6894318929020381e-06, + "loss": 0.4479, + "step": 9875 + }, + { + "epoch": 2.764063811922754, + "grad_norm": 0.2574726657825003, + "learning_rate": 1.68545820855468e-06, + "loss": 0.4429, + "step": 9876 + }, + { + "epoch": 2.764343688776938, + "grad_norm": 0.2524880184970193, + "learning_rate": 1.6814891228124518e-06, + "loss": 0.441, + "step": 9877 + }, + { + "epoch": 2.764623565631122, + "grad_norm": 0.2441941462031394, + "learning_rate": 1.6775246360531348e-06, + "loss": 0.4461, + "step": 9878 + }, + { + "epoch": 2.7649034424853065, + "grad_norm": 0.25143709731337827, + "learning_rate": 1.6735647486540773e-06, + "loss": 0.4582, + "step": 9879 + }, + { + "epoch": 2.7651833193394904, + "grad_norm": 0.2600667779290014, + "learning_rate": 1.669609460992172e-06, + "loss": 0.4495, + "step": 9880 + }, + { + "epoch": 2.7654631961936746, + "grad_norm": 0.2598431401850228, + "learning_rate": 1.6656587734438845e-06, + "loss": 0.4418, + "step": 9881 + }, + { + "epoch": 2.765743073047859, + "grad_norm": 0.24950260170920652, + "learning_rate": 1.6617126863852528e-06, + "loss": 0.4377, + "step": 9882 + }, + { + "epoch": 2.7660229499020432, + "grad_norm": 0.2597516854380162, + "learning_rate": 1.65777120019186e-06, + "loss": 0.4539, + "step": 9883 + }, + { + "epoch": 2.7663028267562275, + "grad_norm": 0.2582783102350748, + "learning_rate": 1.653834315238867e-06, + "loss": 0.4445, + "step": 9884 + }, + { + "epoch": 2.7665827036104114, + "grad_norm": 0.2597813332993534, + "learning_rate": 1.6499020319009796e-06, + "loss": 0.4449, + "step": 9885 + }, + { + "epoch": 2.7668625804645957, + "grad_norm": 0.2589579891258187, + "learning_rate": 1.645974350552487e-06, + "loss": 0.4335, + "step": 9886 + }, + { + "epoch": 2.76714245731878, + "grad_norm": 0.25390228325926073, + "learning_rate": 1.6420512715672131e-06, + "loss": 0.4522, + "step": 9887 + }, + { + "epoch": 2.767422334172964, + "grad_norm": 0.24855966704301372, + "learning_rate": 1.6381327953185698e-06, + "loss": 0.4431, + "step": 9888 + }, + { + "epoch": 2.767702211027148, + "grad_norm": 0.26122318036499725, + "learning_rate": 1.6342189221795146e-06, + "loss": 0.4476, + "step": 9889 + }, + { + "epoch": 2.7679820878813324, + "grad_norm": 0.26490584141169976, + "learning_rate": 1.630309652522577e-06, + "loss": 0.4568, + "step": 9890 + }, + { + "epoch": 2.7682619647355162, + "grad_norm": 0.25900542075149086, + "learning_rate": 1.6264049867198316e-06, + "loss": 0.4567, + "step": 9891 + }, + { + "epoch": 2.7685418415897005, + "grad_norm": 0.26407639153594076, + "learning_rate": 1.6225049251429424e-06, + "loss": 0.4764, + "step": 9892 + }, + { + "epoch": 2.768821718443885, + "grad_norm": 0.25400032000388784, + "learning_rate": 1.6186094681631126e-06, + "loss": 0.4611, + "step": 9893 + }, + { + "epoch": 2.7691015952980687, + "grad_norm": 0.24900510360273606, + "learning_rate": 1.614718616151112e-06, + "loss": 0.4579, + "step": 9894 + }, + { + "epoch": 2.769381472152253, + "grad_norm": 0.2637590373757003, + "learning_rate": 1.6108323694772775e-06, + "loss": 0.4569, + "step": 9895 + }, + { + "epoch": 2.7696613490064372, + "grad_norm": 0.24799032492316023, + "learning_rate": 1.6069507285115027e-06, + "loss": 0.4384, + "step": 9896 + }, + { + "epoch": 2.769941225860621, + "grad_norm": 0.25441732706797976, + "learning_rate": 1.6030736936232472e-06, + "loss": 0.4416, + "step": 9897 + }, + { + "epoch": 2.7702211027148054, + "grad_norm": 0.24776083490307405, + "learning_rate": 1.599201265181527e-06, + "loss": 0.4272, + "step": 9898 + }, + { + "epoch": 2.7705009795689897, + "grad_norm": 0.26213435984723776, + "learning_rate": 1.5953334435549195e-06, + "loss": 0.444, + "step": 9899 + }, + { + "epoch": 2.770780856423174, + "grad_norm": 0.2607649999457887, + "learning_rate": 1.591470229111569e-06, + "loss": 0.4442, + "step": 9900 + }, + { + "epoch": 2.771060733277358, + "grad_norm": 0.26314542894095144, + "learning_rate": 1.5876116222191762e-06, + "loss": 0.4421, + "step": 9901 + }, + { + "epoch": 2.771340610131542, + "grad_norm": 0.2562781836764644, + "learning_rate": 1.5837576232450025e-06, + "loss": 0.4556, + "step": 9902 + }, + { + "epoch": 2.7716204869857264, + "grad_norm": 0.2552996940280876, + "learning_rate": 1.5799082325558822e-06, + "loss": 0.4571, + "step": 9903 + }, + { + "epoch": 2.7719003638399107, + "grad_norm": 0.24840947215054535, + "learning_rate": 1.5760634505182004e-06, + "loss": 0.4705, + "step": 9904 + }, + { + "epoch": 2.7721802406940945, + "grad_norm": 0.24769111202528704, + "learning_rate": 1.5722232774978974e-06, + "loss": 0.4342, + "step": 9905 + }, + { + "epoch": 2.772460117548279, + "grad_norm": 0.2466219726623981, + "learning_rate": 1.568387713860492e-06, + "loss": 0.4345, + "step": 9906 + }, + { + "epoch": 2.772739994402463, + "grad_norm": 0.26863136609639976, + "learning_rate": 1.5645567599710532e-06, + "loss": 0.4508, + "step": 9907 + }, + { + "epoch": 2.773019871256647, + "grad_norm": 0.26252638536666434, + "learning_rate": 1.5607304161942115e-06, + "loss": 0.4648, + "step": 9908 + }, + { + "epoch": 2.7732997481108312, + "grad_norm": 0.26615456002637095, + "learning_rate": 1.5569086828941593e-06, + "loss": 0.4443, + "step": 9909 + }, + { + "epoch": 2.7735796249650155, + "grad_norm": 0.25319575151533724, + "learning_rate": 1.5530915604346553e-06, + "loss": 0.4519, + "step": 9910 + }, + { + "epoch": 2.7738595018191994, + "grad_norm": 0.25393800338746597, + "learning_rate": 1.5492790491790143e-06, + "loss": 0.4525, + "step": 9911 + }, + { + "epoch": 2.7741393786733837, + "grad_norm": 0.25350606794488084, + "learning_rate": 1.5454711494901076e-06, + "loss": 0.4308, + "step": 9912 + }, + { + "epoch": 2.774419255527568, + "grad_norm": 0.2678909115242242, + "learning_rate": 1.5416678617303838e-06, + "loss": 0.4629, + "step": 9913 + }, + { + "epoch": 2.774699132381752, + "grad_norm": 0.2366863998554562, + "learning_rate": 1.5378691862618367e-06, + "loss": 0.4591, + "step": 9914 + }, + { + "epoch": 2.774979009235936, + "grad_norm": 0.25635978902418827, + "learning_rate": 1.5340751234460216e-06, + "loss": 0.4458, + "step": 9915 + }, + { + "epoch": 2.7752588860901204, + "grad_norm": 0.258296792425405, + "learning_rate": 1.5302856736440773e-06, + "loss": 0.4361, + "step": 9916 + }, + { + "epoch": 2.7755387629443042, + "grad_norm": 0.25931834446669616, + "learning_rate": 1.52650083721666e-06, + "loss": 0.479, + "step": 9917 + }, + { + "epoch": 2.7758186397984885, + "grad_norm": 0.26385446568239773, + "learning_rate": 1.522720614524026e-06, + "loss": 0.4348, + "step": 9918 + }, + { + "epoch": 2.776098516652673, + "grad_norm": 0.2622619028570416, + "learning_rate": 1.5189450059259759e-06, + "loss": 0.4665, + "step": 9919 + }, + { + "epoch": 2.776378393506857, + "grad_norm": 0.2530816873962765, + "learning_rate": 1.515174011781878e-06, + "loss": 0.4329, + "step": 9920 + }, + { + "epoch": 2.7766582703610414, + "grad_norm": 0.26841643057303033, + "learning_rate": 1.5114076324506565e-06, + "loss": 0.4672, + "step": 9921 + }, + { + "epoch": 2.7769381472152252, + "grad_norm": 0.2766572726103652, + "learning_rate": 1.5076458682907967e-06, + "loss": 0.4766, + "step": 9922 + }, + { + "epoch": 2.7772180240694095, + "grad_norm": 0.26976189067539585, + "learning_rate": 1.5038887196603458e-06, + "loss": 0.4587, + "step": 9923 + }, + { + "epoch": 2.777497900923594, + "grad_norm": 0.25998431386306364, + "learning_rate": 1.5001361869169117e-06, + "loss": 0.4425, + "step": 9924 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.2572445614318216, + "learning_rate": 1.496388270417659e-06, + "loss": 0.449, + "step": 9925 + }, + { + "epoch": 2.778057654631962, + "grad_norm": 0.25004335296725344, + "learning_rate": 1.492644970519319e-06, + "loss": 0.4467, + "step": 9926 + }, + { + "epoch": 2.7783375314861463, + "grad_norm": 0.2524736550997618, + "learning_rate": 1.4889062875781846e-06, + "loss": 0.4523, + "step": 9927 + }, + { + "epoch": 2.77861740834033, + "grad_norm": 0.24379796656378236, + "learning_rate": 1.4851722219500986e-06, + "loss": 0.4475, + "step": 9928 + }, + { + "epoch": 2.7788972851945144, + "grad_norm": 0.2637163149355571, + "learning_rate": 1.4814427739904824e-06, + "loss": 0.4594, + "step": 9929 + }, + { + "epoch": 2.7791771620486987, + "grad_norm": 0.26285694312144064, + "learning_rate": 1.477717944054291e-06, + "loss": 0.4508, + "step": 9930 + }, + { + "epoch": 2.7794570389028825, + "grad_norm": 0.2385400019552132, + "learning_rate": 1.4739977324960742e-06, + "loss": 0.441, + "step": 9931 + }, + { + "epoch": 2.779736915757067, + "grad_norm": 0.2488919146263261, + "learning_rate": 1.4702821396699095e-06, + "loss": 0.4215, + "step": 9932 + }, + { + "epoch": 2.780016792611251, + "grad_norm": 0.2553818793559371, + "learning_rate": 1.4665711659294535e-06, + "loss": 0.4433, + "step": 9933 + }, + { + "epoch": 2.780296669465435, + "grad_norm": 0.24543217828489985, + "learning_rate": 1.462864811627923e-06, + "loss": 0.442, + "step": 9934 + }, + { + "epoch": 2.7805765463196193, + "grad_norm": 0.26225398041640935, + "learning_rate": 1.4591630771180865e-06, + "loss": 0.446, + "step": 9935 + }, + { + "epoch": 2.7808564231738035, + "grad_norm": 0.2926901503423705, + "learning_rate": 1.4554659627522782e-06, + "loss": 0.4614, + "step": 9936 + }, + { + "epoch": 2.781136300027988, + "grad_norm": 0.24399404399959204, + "learning_rate": 1.4517734688823893e-06, + "loss": 0.4342, + "step": 9937 + }, + { + "epoch": 2.7814161768821717, + "grad_norm": 0.2533952208000935, + "learning_rate": 1.4480855958598715e-06, + "loss": 0.469, + "step": 9938 + }, + { + "epoch": 2.781696053736356, + "grad_norm": 0.27457777970489095, + "learning_rate": 1.4444023440357445e-06, + "loss": 0.4777, + "step": 9939 + }, + { + "epoch": 2.7819759305905403, + "grad_norm": 0.2606795452061868, + "learning_rate": 1.4407237137605833e-06, + "loss": 0.4416, + "step": 9940 + }, + { + "epoch": 2.7822558074447246, + "grad_norm": 0.2489414056239663, + "learning_rate": 1.4370497053845188e-06, + "loss": 0.4607, + "step": 9941 + }, + { + "epoch": 2.7825356842989084, + "grad_norm": 0.2560060466918113, + "learning_rate": 1.4333803192572437e-06, + "loss": 0.4427, + "step": 9942 + }, + { + "epoch": 2.7828155611530927, + "grad_norm": 0.2689146407676583, + "learning_rate": 1.4297155557280062e-06, + "loss": 0.4772, + "step": 9943 + }, + { + "epoch": 2.783095438007277, + "grad_norm": 0.25076953646542366, + "learning_rate": 1.4260554151456329e-06, + "loss": 0.446, + "step": 9944 + }, + { + "epoch": 2.783375314861461, + "grad_norm": 0.25888309966038153, + "learning_rate": 1.422399897858495e-06, + "loss": 0.4572, + "step": 9945 + }, + { + "epoch": 2.783655191715645, + "grad_norm": 0.25102148456232454, + "learning_rate": 1.4187490042145257e-06, + "loss": 0.4505, + "step": 9946 + }, + { + "epoch": 2.7839350685698294, + "grad_norm": 0.26082987979360156, + "learning_rate": 1.4151027345612188e-06, + "loss": 0.4677, + "step": 9947 + }, + { + "epoch": 2.7842149454240133, + "grad_norm": 0.2640450694000547, + "learning_rate": 1.4114610892456304e-06, + "loss": 0.4523, + "step": 9948 + }, + { + "epoch": 2.7844948222781976, + "grad_norm": 0.26821185864910446, + "learning_rate": 1.4078240686143663e-06, + "loss": 0.4528, + "step": 9949 + }, + { + "epoch": 2.784774699132382, + "grad_norm": 0.25741403841115945, + "learning_rate": 1.404191673013605e-06, + "loss": 0.4398, + "step": 9950 + }, + { + "epoch": 2.7850545759865657, + "grad_norm": 0.26085871079329714, + "learning_rate": 1.4005639027890871e-06, + "loss": 0.4636, + "step": 9951 + }, + { + "epoch": 2.78533445284075, + "grad_norm": 0.2514087586435767, + "learning_rate": 1.3969407582860972e-06, + "loss": 0.458, + "step": 9952 + }, + { + "epoch": 2.7856143296949343, + "grad_norm": 0.24396338601296708, + "learning_rate": 1.3933222398494927e-06, + "loss": 0.4347, + "step": 9953 + }, + { + "epoch": 2.785894206549118, + "grad_norm": 0.25646494800800057, + "learning_rate": 1.3897083478236761e-06, + "loss": 0.4556, + "step": 9954 + }, + { + "epoch": 2.7861740834033024, + "grad_norm": 0.26563384689169894, + "learning_rate": 1.3860990825526333e-06, + "loss": 0.4742, + "step": 9955 + }, + { + "epoch": 2.7864539602574867, + "grad_norm": 0.2572462566583641, + "learning_rate": 1.3824944443798838e-06, + "loss": 0.4494, + "step": 9956 + }, + { + "epoch": 2.786733837111671, + "grad_norm": 0.25435689945391043, + "learning_rate": 1.378894433648531e-06, + "loss": 0.4412, + "step": 9957 + }, + { + "epoch": 2.7870137139658553, + "grad_norm": 0.25698015099571514, + "learning_rate": 1.3752990507012176e-06, + "loss": 0.4607, + "step": 9958 + }, + { + "epoch": 2.787293590820039, + "grad_norm": 0.26095188195834784, + "learning_rate": 1.3717082958801586e-06, + "loss": 0.4411, + "step": 9959 + }, + { + "epoch": 2.7875734676742234, + "grad_norm": 0.26137156532226097, + "learning_rate": 1.3681221695271195e-06, + "loss": 0.4596, + "step": 9960 + }, + { + "epoch": 2.7878533445284077, + "grad_norm": 0.24339930838086868, + "learning_rate": 1.364540671983433e-06, + "loss": 0.4391, + "step": 9961 + }, + { + "epoch": 2.7881332213825916, + "grad_norm": 0.2645758288665304, + "learning_rate": 1.3609638035899875e-06, + "loss": 0.4299, + "step": 9962 + }, + { + "epoch": 2.788413098236776, + "grad_norm": 0.25811516683528624, + "learning_rate": 1.357391564687238e-06, + "loss": 0.4494, + "step": 9963 + }, + { + "epoch": 2.78869297509096, + "grad_norm": 0.25134969175188127, + "learning_rate": 1.35382395561518e-06, + "loss": 0.458, + "step": 9964 + }, + { + "epoch": 2.788972851945144, + "grad_norm": 0.2593673101983797, + "learning_rate": 1.3502609767133857e-06, + "loss": 0.4344, + "step": 9965 + }, + { + "epoch": 2.7892527287993283, + "grad_norm": 0.2519434370338427, + "learning_rate": 1.3467026283209894e-06, + "loss": 0.4559, + "step": 9966 + }, + { + "epoch": 2.7895326056535126, + "grad_norm": 0.2602830949061682, + "learning_rate": 1.3431489107766592e-06, + "loss": 0.4368, + "step": 9967 + }, + { + "epoch": 2.7898124825076964, + "grad_norm": 0.2527936043818087, + "learning_rate": 1.3395998244186582e-06, + "loss": 0.4555, + "step": 9968 + }, + { + "epoch": 2.7900923593618807, + "grad_norm": 0.25545981836469733, + "learning_rate": 1.3360553695847822e-06, + "loss": 0.4569, + "step": 9969 + }, + { + "epoch": 2.790372236216065, + "grad_norm": 0.25744697068202377, + "learning_rate": 1.332515546612395e-06, + "loss": 0.4633, + "step": 9970 + }, + { + "epoch": 2.790652113070249, + "grad_norm": 0.26171932687727784, + "learning_rate": 1.3289803558384162e-06, + "loss": 0.4534, + "step": 9971 + }, + { + "epoch": 2.790931989924433, + "grad_norm": 0.2512142623555132, + "learning_rate": 1.3254497975993264e-06, + "loss": 0.4434, + "step": 9972 + }, + { + "epoch": 2.7912118667786174, + "grad_norm": 0.2577117878871623, + "learning_rate": 1.3219238722311733e-06, + "loss": 0.4452, + "step": 9973 + }, + { + "epoch": 2.7914917436328017, + "grad_norm": 0.2590775441238437, + "learning_rate": 1.3184025800695499e-06, + "loss": 0.4718, + "step": 9974 + }, + { + "epoch": 2.7917716204869856, + "grad_norm": 0.26265291378524014, + "learning_rate": 1.3148859214496156e-06, + "loss": 0.4532, + "step": 9975 + }, + { + "epoch": 2.79205149734117, + "grad_norm": 0.2561238230788668, + "learning_rate": 1.3113738967060918e-06, + "loss": 0.4466, + "step": 9976 + }, + { + "epoch": 2.792331374195354, + "grad_norm": 0.25212904412063863, + "learning_rate": 1.3078665061732554e-06, + "loss": 0.4465, + "step": 9977 + }, + { + "epoch": 2.7926112510495384, + "grad_norm": 0.25628654442421034, + "learning_rate": 1.3043637501849282e-06, + "loss": 0.4539, + "step": 9978 + }, + { + "epoch": 2.7928911279037223, + "grad_norm": 0.25871583667058373, + "learning_rate": 1.3008656290745269e-06, + "loss": 0.445, + "step": 9979 + }, + { + "epoch": 2.7931710047579066, + "grad_norm": 0.2577730113815811, + "learning_rate": 1.2973721431749908e-06, + "loss": 0.4491, + "step": 9980 + }, + { + "epoch": 2.793450881612091, + "grad_norm": 0.26107229084098077, + "learning_rate": 1.2938832928188316e-06, + "loss": 0.4651, + "step": 9981 + }, + { + "epoch": 2.7937307584662747, + "grad_norm": 0.2612412242372199, + "learning_rate": 1.2903990783381226e-06, + "loss": 0.4614, + "step": 9982 + }, + { + "epoch": 2.794010635320459, + "grad_norm": 0.2576072472393539, + "learning_rate": 1.2869195000644985e-06, + "loss": 0.4436, + "step": 9983 + }, + { + "epoch": 2.7942905121746433, + "grad_norm": 0.2746996528066174, + "learning_rate": 1.2834445583291387e-06, + "loss": 0.4732, + "step": 9984 + }, + { + "epoch": 2.794570389028827, + "grad_norm": 0.2524876940103109, + "learning_rate": 1.2799742534627901e-06, + "loss": 0.442, + "step": 9985 + }, + { + "epoch": 2.7948502658830114, + "grad_norm": 0.25568136493755084, + "learning_rate": 1.276508585795766e-06, + "loss": 0.4442, + "step": 9986 + }, + { + "epoch": 2.7951301427371957, + "grad_norm": 0.2447715531517239, + "learning_rate": 1.2730475556579247e-06, + "loss": 0.4433, + "step": 9987 + }, + { + "epoch": 2.7954100195913796, + "grad_norm": 0.2488736236977702, + "learning_rate": 1.2695911633786971e-06, + "loss": 0.4359, + "step": 9988 + }, + { + "epoch": 2.795689896445564, + "grad_norm": 0.2591930365268937, + "learning_rate": 1.2661394092870537e-06, + "loss": 0.4729, + "step": 9989 + }, + { + "epoch": 2.795969773299748, + "grad_norm": 0.2573072489842773, + "learning_rate": 1.262692293711537e-06, + "loss": 0.4508, + "step": 9990 + }, + { + "epoch": 2.796249650153932, + "grad_norm": 0.25989222524716127, + "learning_rate": 1.2592498169802513e-06, + "loss": 0.4752, + "step": 9991 + }, + { + "epoch": 2.7965295270081163, + "grad_norm": 0.25270467365628874, + "learning_rate": 1.2558119794208456e-06, + "loss": 0.4464, + "step": 9992 + }, + { + "epoch": 2.7968094038623006, + "grad_norm": 0.25835423310990646, + "learning_rate": 1.2523787813605413e-06, + "loss": 0.4648, + "step": 9993 + }, + { + "epoch": 2.797089280716485, + "grad_norm": 0.2529859162387948, + "learning_rate": 1.2489502231261052e-06, + "loss": 0.4533, + "step": 9994 + }, + { + "epoch": 2.797369157570669, + "grad_norm": 0.23629321574318973, + "learning_rate": 1.2455263050438759e-06, + "loss": 0.4524, + "step": 9995 + }, + { + "epoch": 2.797649034424853, + "grad_norm": 0.25389427162784706, + "learning_rate": 1.2421070274397428e-06, + "loss": 0.4572, + "step": 9996 + }, + { + "epoch": 2.7979289112790373, + "grad_norm": 0.2840046132570813, + "learning_rate": 1.2386923906391512e-06, + "loss": 0.4537, + "step": 9997 + }, + { + "epoch": 2.7982087881332216, + "grad_norm": 0.2505066776398961, + "learning_rate": 1.2352823949671189e-06, + "loss": 0.4668, + "step": 9998 + }, + { + "epoch": 2.7984886649874054, + "grad_norm": 0.2509149628141864, + "learning_rate": 1.2318770407481973e-06, + "loss": 0.4523, + "step": 9999 + }, + { + "epoch": 2.7987685418415897, + "grad_norm": 0.2518685026080287, + "learning_rate": 1.2284763283065159e-06, + "loss": 0.4598, + "step": 10000 } ], "logging_steps": 1, - "max_steps": 7145, + "max_steps": 10719, "num_input_tokens_seen": 0, - "num_train_epochs": 1, + "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { @@ -28027,8 +70027,8 @@ "attributes": {} } }, - "total_flos": 700001933443072.0, - "train_batch_size": 1, + "total_flos": 2349444853596160.0, + "train_batch_size": 8, "trial_name": null, "trial_params": null }