{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9627, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010387451958034694, "grad_norm": 1.3366456031799316, "learning_rate": 9.9999997337695e-05, "loss": 2.4557, "step": 1 }, { "epoch": 0.00020774903916069389, "grad_norm": 0.7125677466392517, "learning_rate": 9.999998935078033e-05, "loss": 2.1366, "step": 2 }, { "epoch": 0.0003116235587410408, "grad_norm": 0.5620805621147156, "learning_rate": 9.999997603925677e-05, "loss": 1.9955, "step": 3 }, { "epoch": 0.00041549807832138777, "grad_norm": 0.44992074370384216, "learning_rate": 9.99999574031258e-05, "loss": 2.1548, "step": 4 }, { "epoch": 0.0005193725979017347, "grad_norm": 0.4193553626537323, "learning_rate": 9.999993344238936e-05, "loss": 2.0443, "step": 5 }, { "epoch": 0.0006232471174820816, "grad_norm": 0.4412941336631775, "learning_rate": 9.999990415705005e-05, "loss": 2.0479, "step": 6 }, { "epoch": 0.0007271216370624286, "grad_norm": 0.40010932087898254, "learning_rate": 9.999986954711094e-05, "loss": 2.0064, "step": 7 }, { "epoch": 0.0008309961566427755, "grad_norm": 0.4279223680496216, "learning_rate": 9.999982961257574e-05, "loss": 1.8892, "step": 8 }, { "epoch": 0.0009348706762231225, "grad_norm": 0.4031948149204254, "learning_rate": 9.999978435344872e-05, "loss": 1.9309, "step": 9 }, { "epoch": 0.0010387451958034693, "grad_norm": 0.34114745259284973, "learning_rate": 9.999973376973466e-05, "loss": 1.6737, "step": 10 }, { "epoch": 0.0011426197153838163, "grad_norm": 0.4245925545692444, "learning_rate": 9.999967786143895e-05, "loss": 1.8743, "step": 11 }, { "epoch": 0.0012464942349641633, "grad_norm": 0.4021143317222595, "learning_rate": 9.99996166285676e-05, "loss": 1.8595, "step": 12 }, { "epoch": 0.0013503687545445102, "grad_norm": 0.38615134358406067, "learning_rate": 9.999955007112706e-05, "loss": 1.8856, "step": 13 }, { "epoch": 0.0014542432741248572, "grad_norm": 0.3954271078109741, "learning_rate": 9.999947818912448e-05, "loss": 2.0519, "step": 14 }, { "epoch": 0.0015581177937052041, "grad_norm": 0.3857433795928955, "learning_rate": 9.999940098256747e-05, "loss": 1.8855, "step": 15 }, { "epoch": 0.001661992313285551, "grad_norm": 0.38353338837623596, "learning_rate": 9.999931845146424e-05, "loss": 2.0608, "step": 16 }, { "epoch": 0.001765866832865898, "grad_norm": 0.46258458495140076, "learning_rate": 9.999923059582363e-05, "loss": 2.2282, "step": 17 }, { "epoch": 0.001869741352446245, "grad_norm": 0.40921229124069214, "learning_rate": 9.999913741565496e-05, "loss": 2.0698, "step": 18 }, { "epoch": 0.0019736158720265917, "grad_norm": 0.39826393127441406, "learning_rate": 9.999903891096817e-05, "loss": 2.0232, "step": 19 }, { "epoch": 0.0020774903916069387, "grad_norm": 0.4504700005054474, "learning_rate": 9.999893508177373e-05, "loss": 1.8005, "step": 20 }, { "epoch": 0.0021813649111872857, "grad_norm": 0.35965996980667114, "learning_rate": 9.999882592808274e-05, "loss": 1.8358, "step": 21 }, { "epoch": 0.0022852394307676326, "grad_norm": 0.36941030621528625, "learning_rate": 9.999871144990677e-05, "loss": 1.8202, "step": 22 }, { "epoch": 0.0023891139503479796, "grad_norm": 0.3631148338317871, "learning_rate": 9.999859164725804e-05, "loss": 1.9011, "step": 23 }, { "epoch": 0.0024929884699283265, "grad_norm": 0.3719225525856018, "learning_rate": 9.999846652014931e-05, "loss": 1.8046, "step": 24 }, { "epoch": 0.0025968629895086735, "grad_norm": 0.3886321783065796, "learning_rate": 9.99983360685939e-05, "loss": 1.7041, "step": 25 }, { "epoch": 0.0027007375090890204, "grad_norm": 0.35539302229881287, "learning_rate": 9.999820029260569e-05, "loss": 1.8635, "step": 26 }, { "epoch": 0.0028046120286693674, "grad_norm": 0.40370672941207886, "learning_rate": 9.999805919219916e-05, "loss": 2.025, "step": 27 }, { "epoch": 0.0029084865482497143, "grad_norm": 0.36346563696861267, "learning_rate": 9.999791276738933e-05, "loss": 1.9266, "step": 28 }, { "epoch": 0.0030123610678300613, "grad_norm": 0.4094865322113037, "learning_rate": 9.999776101819177e-05, "loss": 1.923, "step": 29 }, { "epoch": 0.0031162355874104083, "grad_norm": 0.33642393350601196, "learning_rate": 9.999760394462268e-05, "loss": 1.876, "step": 30 }, { "epoch": 0.0032201101069907552, "grad_norm": 0.386692613363266, "learning_rate": 9.999744154669874e-05, "loss": 2.0935, "step": 31 }, { "epoch": 0.003323984626571102, "grad_norm": 0.32473698258399963, "learning_rate": 9.999727382443728e-05, "loss": 1.7944, "step": 32 }, { "epoch": 0.003427859146151449, "grad_norm": 0.3738991916179657, "learning_rate": 9.999710077785616e-05, "loss": 2.0774, "step": 33 }, { "epoch": 0.003531733665731796, "grad_norm": 0.3248012959957123, "learning_rate": 9.99969224069738e-05, "loss": 1.7769, "step": 34 }, { "epoch": 0.003635608185312143, "grad_norm": 0.3610652685165405, "learning_rate": 9.99967387118092e-05, "loss": 1.9222, "step": 35 }, { "epoch": 0.00373948270489249, "grad_norm": 0.3891034722328186, "learning_rate": 9.99965496923819e-05, "loss": 2.0162, "step": 36 }, { "epoch": 0.003843357224472837, "grad_norm": 0.37236684560775757, "learning_rate": 9.999635534871205e-05, "loss": 2.0374, "step": 37 }, { "epoch": 0.0039472317440531835, "grad_norm": 0.3498350977897644, "learning_rate": 9.999615568082036e-05, "loss": 1.7788, "step": 38 }, { "epoch": 0.0040511062636335304, "grad_norm": 0.41840648651123047, "learning_rate": 9.999595068872805e-05, "loss": 1.6986, "step": 39 }, { "epoch": 0.004154980783213877, "grad_norm": 0.3720596134662628, "learning_rate": 9.9995740372457e-05, "loss": 1.9754, "step": 40 }, { "epoch": 0.004258855302794224, "grad_norm": 0.32730332016944885, "learning_rate": 9.999552473202955e-05, "loss": 1.699, "step": 41 }, { "epoch": 0.004362729822374571, "grad_norm": 0.36480000615119934, "learning_rate": 9.999530376746873e-05, "loss": 2.1077, "step": 42 }, { "epoch": 0.004466604341954918, "grad_norm": 0.3458911180496216, "learning_rate": 9.999507747879802e-05, "loss": 1.9693, "step": 43 }, { "epoch": 0.004570478861535265, "grad_norm": 0.34066182374954224, "learning_rate": 9.999484586604154e-05, "loss": 1.9005, "step": 44 }, { "epoch": 0.004674353381115612, "grad_norm": 0.34163591265678406, "learning_rate": 9.999460892922394e-05, "loss": 1.8829, "step": 45 }, { "epoch": 0.004778227900695959, "grad_norm": 0.37044212222099304, "learning_rate": 9.999436666837048e-05, "loss": 1.9172, "step": 46 }, { "epoch": 0.004882102420276306, "grad_norm": 0.3332674205303192, "learning_rate": 9.999411908350692e-05, "loss": 1.8282, "step": 47 }, { "epoch": 0.004985976939856653, "grad_norm": 0.3461020886898041, "learning_rate": 9.999386617465966e-05, "loss": 1.8652, "step": 48 }, { "epoch": 0.005089851459437, "grad_norm": 0.31899985671043396, "learning_rate": 9.999360794185562e-05, "loss": 1.9479, "step": 49 }, { "epoch": 0.005193725979017347, "grad_norm": 0.3465663194656372, "learning_rate": 9.999334438512228e-05, "loss": 1.8856, "step": 50 }, { "epoch": 0.005297600498597694, "grad_norm": 0.4567152261734009, "learning_rate": 9.999307550448776e-05, "loss": 2.26, "step": 51 }, { "epoch": 0.005401475018178041, "grad_norm": 0.36369699239730835, "learning_rate": 9.999280129998063e-05, "loss": 1.9131, "step": 52 }, { "epoch": 0.005505349537758388, "grad_norm": 0.3090951442718506, "learning_rate": 9.999252177163013e-05, "loss": 1.7429, "step": 53 }, { "epoch": 0.005609224057338735, "grad_norm": 0.3246748745441437, "learning_rate": 9.999223691946602e-05, "loss": 1.8882, "step": 54 }, { "epoch": 0.005713098576919082, "grad_norm": 0.3664419949054718, "learning_rate": 9.999194674351864e-05, "loss": 2.2768, "step": 55 }, { "epoch": 0.005816973096499429, "grad_norm": 0.3436775505542755, "learning_rate": 9.999165124381887e-05, "loss": 1.7684, "step": 56 }, { "epoch": 0.005920847616079776, "grad_norm": 0.3558364510536194, "learning_rate": 9.99913504203982e-05, "loss": 1.8547, "step": 57 }, { "epoch": 0.006024722135660123, "grad_norm": 0.3412851393222809, "learning_rate": 9.999104427328865e-05, "loss": 1.9773, "step": 58 }, { "epoch": 0.0061285966552404696, "grad_norm": 0.3379881978034973, "learning_rate": 9.999073280252283e-05, "loss": 1.9732, "step": 59 }, { "epoch": 0.0062324711748208165, "grad_norm": 0.38533058762550354, "learning_rate": 9.999041600813393e-05, "loss": 1.9285, "step": 60 }, { "epoch": 0.0063363456944011635, "grad_norm": 0.3420720398426056, "learning_rate": 9.999009389015564e-05, "loss": 1.9619, "step": 61 }, { "epoch": 0.0064402202139815104, "grad_norm": 0.3268018662929535, "learning_rate": 9.99897664486223e-05, "loss": 2.0373, "step": 62 }, { "epoch": 0.006544094733561857, "grad_norm": 0.32013919949531555, "learning_rate": 9.998943368356877e-05, "loss": 1.8389, "step": 63 }, { "epoch": 0.006647969253142204, "grad_norm": 0.38457608222961426, "learning_rate": 9.998909559503048e-05, "loss": 2.0334, "step": 64 }, { "epoch": 0.006751843772722551, "grad_norm": 0.35168957710266113, "learning_rate": 9.998875218304345e-05, "loss": 2.0735, "step": 65 }, { "epoch": 0.006855718292302898, "grad_norm": 0.3252507746219635, "learning_rate": 9.998840344764422e-05, "loss": 1.7135, "step": 66 }, { "epoch": 0.006959592811883245, "grad_norm": 0.353354275226593, "learning_rate": 9.998804938886997e-05, "loss": 1.9234, "step": 67 }, { "epoch": 0.007063467331463592, "grad_norm": 0.3150123357772827, "learning_rate": 9.998769000675836e-05, "loss": 1.6909, "step": 68 }, { "epoch": 0.007167341851043939, "grad_norm": 0.33036699891090393, "learning_rate": 9.99873253013477e-05, "loss": 1.7841, "step": 69 }, { "epoch": 0.007271216370624286, "grad_norm": 0.3279257118701935, "learning_rate": 9.998695527267681e-05, "loss": 1.8755, "step": 70 }, { "epoch": 0.007375090890204633, "grad_norm": 0.3096674978733063, "learning_rate": 9.998657992078509e-05, "loss": 1.8373, "step": 71 }, { "epoch": 0.00747896540978498, "grad_norm": 0.3656545877456665, "learning_rate": 9.998619924571251e-05, "loss": 2.0858, "step": 72 }, { "epoch": 0.007582839929365327, "grad_norm": 0.31613457202911377, "learning_rate": 9.998581324749964e-05, "loss": 1.7906, "step": 73 }, { "epoch": 0.007686714448945674, "grad_norm": 0.3255898356437683, "learning_rate": 9.998542192618755e-05, "loss": 1.8037, "step": 74 }, { "epoch": 0.007790588968526021, "grad_norm": 0.33904996514320374, "learning_rate": 9.998502528181793e-05, "loss": 1.6548, "step": 75 }, { "epoch": 0.007894463488106367, "grad_norm": 0.3505510985851288, "learning_rate": 9.998462331443301e-05, "loss": 2.0898, "step": 76 }, { "epoch": 0.007998338007686715, "grad_norm": 0.3301371932029724, "learning_rate": 9.99842160240756e-05, "loss": 1.9709, "step": 77 }, { "epoch": 0.008102212527267061, "grad_norm": 0.3287624418735504, "learning_rate": 9.998380341078909e-05, "loss": 1.6921, "step": 78 }, { "epoch": 0.008206087046847409, "grad_norm": 0.3255762755870819, "learning_rate": 9.99833854746174e-05, "loss": 1.8009, "step": 79 }, { "epoch": 0.008309961566427755, "grad_norm": 0.3210470378398895, "learning_rate": 9.998296221560504e-05, "loss": 1.9141, "step": 80 }, { "epoch": 0.008413836086008103, "grad_norm": 0.3226737082004547, "learning_rate": 9.998253363379708e-05, "loss": 1.8365, "step": 81 }, { "epoch": 0.008517710605588449, "grad_norm": 0.3540826439857483, "learning_rate": 9.998209972923918e-05, "loss": 2.0948, "step": 82 }, { "epoch": 0.008621585125168797, "grad_norm": 0.3137127161026001, "learning_rate": 9.998166050197751e-05, "loss": 1.8404, "step": 83 }, { "epoch": 0.008725459644749143, "grad_norm": 0.3495839536190033, "learning_rate": 9.998121595205889e-05, "loss": 1.8201, "step": 84 }, { "epoch": 0.00882933416432949, "grad_norm": 0.3384789824485779, "learning_rate": 9.998076607953064e-05, "loss": 1.9134, "step": 85 }, { "epoch": 0.008933208683909837, "grad_norm": 0.3618725836277008, "learning_rate": 9.998031088444067e-05, "loss": 1.9558, "step": 86 }, { "epoch": 0.009037083203490184, "grad_norm": 0.370078444480896, "learning_rate": 9.997985036683744e-05, "loss": 2.045, "step": 87 }, { "epoch": 0.00914095772307053, "grad_norm": 0.32593002915382385, "learning_rate": 9.997938452677e-05, "loss": 1.7929, "step": 88 }, { "epoch": 0.009244832242650878, "grad_norm": 0.3358675241470337, "learning_rate": 9.997891336428797e-05, "loss": 1.8742, "step": 89 }, { "epoch": 0.009348706762231224, "grad_norm": 0.35875624418258667, "learning_rate": 9.997843687944152e-05, "loss": 1.9634, "step": 90 }, { "epoch": 0.009452581281811572, "grad_norm": 0.3237409293651581, "learning_rate": 9.997795507228139e-05, "loss": 1.8137, "step": 91 }, { "epoch": 0.009556455801391918, "grad_norm": 0.3336571455001831, "learning_rate": 9.997746794285887e-05, "loss": 1.9342, "step": 92 }, { "epoch": 0.009660330320972266, "grad_norm": 0.3358410894870758, "learning_rate": 9.997697549122586e-05, "loss": 1.8772, "step": 93 }, { "epoch": 0.009764204840552612, "grad_norm": 0.3093462884426117, "learning_rate": 9.997647771743482e-05, "loss": 1.8048, "step": 94 }, { "epoch": 0.00986807936013296, "grad_norm": 0.34004223346710205, "learning_rate": 9.997597462153871e-05, "loss": 1.8228, "step": 95 }, { "epoch": 0.009971953879713306, "grad_norm": 0.33898910880088806, "learning_rate": 9.997546620359114e-05, "loss": 1.8203, "step": 96 }, { "epoch": 0.010075828399293654, "grad_norm": 0.3222779929637909, "learning_rate": 9.997495246364624e-05, "loss": 1.7667, "step": 97 }, { "epoch": 0.010179702918874, "grad_norm": 0.3132547438144684, "learning_rate": 9.997443340175872e-05, "loss": 1.7176, "step": 98 }, { "epoch": 0.010283577438454348, "grad_norm": 0.30829909443855286, "learning_rate": 9.997390901798386e-05, "loss": 1.7196, "step": 99 }, { "epoch": 0.010387451958034694, "grad_norm": 0.3239877223968506, "learning_rate": 9.997337931237751e-05, "loss": 1.6993, "step": 100 }, { "epoch": 0.010491326477615042, "grad_norm": 0.34558477997779846, "learning_rate": 9.997284428499605e-05, "loss": 1.9082, "step": 101 }, { "epoch": 0.010595200997195388, "grad_norm": 0.34653279185295105, "learning_rate": 9.99723039358965e-05, "loss": 1.8484, "step": 102 }, { "epoch": 0.010699075516775736, "grad_norm": 0.4017353057861328, "learning_rate": 9.997175826513637e-05, "loss": 2.0352, "step": 103 }, { "epoch": 0.010802950036356082, "grad_norm": 0.3651774227619171, "learning_rate": 9.997120727277378e-05, "loss": 2.1032, "step": 104 }, { "epoch": 0.01090682455593643, "grad_norm": 0.32084140181541443, "learning_rate": 9.997065095886741e-05, "loss": 1.7161, "step": 105 }, { "epoch": 0.011010699075516776, "grad_norm": 0.3476540446281433, "learning_rate": 9.997008932347649e-05, "loss": 1.8586, "step": 106 }, { "epoch": 0.011114573595097123, "grad_norm": 0.3458161950111389, "learning_rate": 9.996952236666084e-05, "loss": 1.8134, "step": 107 }, { "epoch": 0.01121844811467747, "grad_norm": 0.3564072549343109, "learning_rate": 9.996895008848085e-05, "loss": 1.9237, "step": 108 }, { "epoch": 0.011322322634257817, "grad_norm": 0.33512404561042786, "learning_rate": 9.996837248899741e-05, "loss": 1.8897, "step": 109 }, { "epoch": 0.011426197153838163, "grad_norm": 0.36306363344192505, "learning_rate": 9.99677895682721e-05, "loss": 1.9856, "step": 110 }, { "epoch": 0.011530071673418511, "grad_norm": 0.32571837306022644, "learning_rate": 9.996720132636697e-05, "loss": 1.6574, "step": 111 }, { "epoch": 0.011633946192998857, "grad_norm": 0.3236237168312073, "learning_rate": 9.996660776334464e-05, "loss": 1.7604, "step": 112 }, { "epoch": 0.011737820712579203, "grad_norm": 0.34493985772132874, "learning_rate": 9.996600887926834e-05, "loss": 1.766, "step": 113 }, { "epoch": 0.011841695232159551, "grad_norm": 0.34191280603408813, "learning_rate": 9.996540467420186e-05, "loss": 1.7565, "step": 114 }, { "epoch": 0.011945569751739897, "grad_norm": 0.33236685395240784, "learning_rate": 9.996479514820952e-05, "loss": 1.8874, "step": 115 }, { "epoch": 0.012049444271320245, "grad_norm": 0.37998783588409424, "learning_rate": 9.996418030135622e-05, "loss": 2.0761, "step": 116 }, { "epoch": 0.012153318790900591, "grad_norm": 0.34933921694755554, "learning_rate": 9.996356013370747e-05, "loss": 2.0269, "step": 117 }, { "epoch": 0.012257193310480939, "grad_norm": 0.3566916286945343, "learning_rate": 9.99629346453293e-05, "loss": 1.7982, "step": 118 }, { "epoch": 0.012361067830061285, "grad_norm": 0.36033695936203003, "learning_rate": 9.996230383628831e-05, "loss": 1.9449, "step": 119 }, { "epoch": 0.012464942349641633, "grad_norm": 0.34493738412857056, "learning_rate": 9.996166770665167e-05, "loss": 1.9453, "step": 120 }, { "epoch": 0.012568816869221979, "grad_norm": 0.3142033815383911, "learning_rate": 9.996102625648715e-05, "loss": 1.7562, "step": 121 }, { "epoch": 0.012672691388802327, "grad_norm": 0.3889004588127136, "learning_rate": 9.996037948586305e-05, "loss": 1.9439, "step": 122 }, { "epoch": 0.012776565908382673, "grad_norm": 0.32579320669174194, "learning_rate": 9.995972739484822e-05, "loss": 1.7729, "step": 123 }, { "epoch": 0.012880440427963021, "grad_norm": 0.3976999521255493, "learning_rate": 9.995906998351215e-05, "loss": 2.246, "step": 124 }, { "epoch": 0.012984314947543367, "grad_norm": 0.38258862495422363, "learning_rate": 9.99584072519248e-05, "loss": 1.9098, "step": 125 }, { "epoch": 0.013088189467123715, "grad_norm": 0.3394373655319214, "learning_rate": 9.995773920015679e-05, "loss": 1.7569, "step": 126 }, { "epoch": 0.013192063986704061, "grad_norm": 0.3687582015991211, "learning_rate": 9.995706582827922e-05, "loss": 2.0098, "step": 127 }, { "epoch": 0.013295938506284409, "grad_norm": 0.36636659502983093, "learning_rate": 9.995638713636382e-05, "loss": 1.9661, "step": 128 }, { "epoch": 0.013399813025864755, "grad_norm": 0.33423036336898804, "learning_rate": 9.995570312448289e-05, "loss": 1.768, "step": 129 }, { "epoch": 0.013503687545445103, "grad_norm": 0.34405717253685, "learning_rate": 9.995501379270922e-05, "loss": 1.649, "step": 130 }, { "epoch": 0.013607562065025449, "grad_norm": 0.352104514837265, "learning_rate": 9.995431914111624e-05, "loss": 1.904, "step": 131 }, { "epoch": 0.013711436584605797, "grad_norm": 0.34278222918510437, "learning_rate": 9.995361916977795e-05, "loss": 1.9214, "step": 132 }, { "epoch": 0.013815311104186143, "grad_norm": 0.3315337300300598, "learning_rate": 9.995291387876886e-05, "loss": 1.9647, "step": 133 }, { "epoch": 0.01391918562376649, "grad_norm": 0.349161297082901, "learning_rate": 9.99522032681641e-05, "loss": 1.8797, "step": 134 }, { "epoch": 0.014023060143346837, "grad_norm": 0.32067403197288513, "learning_rate": 9.995148733803933e-05, "loss": 1.9065, "step": 135 }, { "epoch": 0.014126934662927184, "grad_norm": 0.336671382188797, "learning_rate": 9.995076608847078e-05, "loss": 1.7649, "step": 136 }, { "epoch": 0.01423080918250753, "grad_norm": 0.3314901292324066, "learning_rate": 9.99500395195353e-05, "loss": 1.7482, "step": 137 }, { "epoch": 0.014334683702087878, "grad_norm": 0.3493605852127075, "learning_rate": 9.994930763131022e-05, "loss": 2.0021, "step": 138 }, { "epoch": 0.014438558221668224, "grad_norm": 0.37969377636909485, "learning_rate": 9.99485704238735e-05, "loss": 1.9293, "step": 139 }, { "epoch": 0.014542432741248572, "grad_norm": 0.31969866156578064, "learning_rate": 9.994782789730363e-05, "loss": 1.789, "step": 140 }, { "epoch": 0.014646307260828918, "grad_norm": 0.3660503625869751, "learning_rate": 9.994708005167972e-05, "loss": 2.0092, "step": 141 }, { "epoch": 0.014750181780409266, "grad_norm": 0.346720427274704, "learning_rate": 9.994632688708138e-05, "loss": 1.9393, "step": 142 }, { "epoch": 0.014854056299989612, "grad_norm": 0.3383733630180359, "learning_rate": 9.994556840358882e-05, "loss": 1.7967, "step": 143 }, { "epoch": 0.01495793081956996, "grad_norm": 0.3345518708229065, "learning_rate": 9.994480460128282e-05, "loss": 1.8901, "step": 144 }, { "epoch": 0.015061805339150306, "grad_norm": 0.34996676445007324, "learning_rate": 9.99440354802447e-05, "loss": 1.8006, "step": 145 }, { "epoch": 0.015165679858730654, "grad_norm": 0.34575289487838745, "learning_rate": 9.994326104055639e-05, "loss": 1.9967, "step": 146 }, { "epoch": 0.015269554378311, "grad_norm": 0.39413705468177795, "learning_rate": 9.994248128230035e-05, "loss": 1.9811, "step": 147 }, { "epoch": 0.015373428897891348, "grad_norm": 0.3441784083843231, "learning_rate": 9.994169620555962e-05, "loss": 1.7302, "step": 148 }, { "epoch": 0.015477303417471694, "grad_norm": 0.32033270597457886, "learning_rate": 9.994090581041781e-05, "loss": 1.8822, "step": 149 }, { "epoch": 0.015581177937052042, "grad_norm": 0.37517550587654114, "learning_rate": 9.994011009695908e-05, "loss": 2.0916, "step": 150 }, { "epoch": 0.01568505245663239, "grad_norm": 0.3081076145172119, "learning_rate": 9.993930906526818e-05, "loss": 1.6987, "step": 151 }, { "epoch": 0.015788926976212734, "grad_norm": 0.31799814105033875, "learning_rate": 9.993850271543039e-05, "loss": 1.6714, "step": 152 }, { "epoch": 0.015892801495793082, "grad_norm": 0.3602600693702698, "learning_rate": 9.99376910475316e-05, "loss": 2.0209, "step": 153 }, { "epoch": 0.01599667601537343, "grad_norm": 0.31287044286727905, "learning_rate": 9.993687406165826e-05, "loss": 1.6677, "step": 154 }, { "epoch": 0.016100550534953777, "grad_norm": 0.34423595666885376, "learning_rate": 9.993605175789733e-05, "loss": 1.863, "step": 155 }, { "epoch": 0.016204425054534122, "grad_norm": 0.3550304174423218, "learning_rate": 9.993522413633643e-05, "loss": 1.9312, "step": 156 }, { "epoch": 0.01630829957411447, "grad_norm": 0.3672383725643158, "learning_rate": 9.993439119706364e-05, "loss": 2.0446, "step": 157 }, { "epoch": 0.016412174093694817, "grad_norm": 0.32233211398124695, "learning_rate": 9.993355294016771e-05, "loss": 1.7413, "step": 158 }, { "epoch": 0.016516048613275165, "grad_norm": 0.33033961057662964, "learning_rate": 9.993270936573788e-05, "loss": 1.8413, "step": 159 }, { "epoch": 0.01661992313285551, "grad_norm": 0.3530619442462921, "learning_rate": 9.9931860473864e-05, "loss": 1.9083, "step": 160 }, { "epoch": 0.016723797652435857, "grad_norm": 0.33920934796333313, "learning_rate": 9.993100626463646e-05, "loss": 1.8213, "step": 161 }, { "epoch": 0.016827672172016205, "grad_norm": 0.3833538889884949, "learning_rate": 9.993014673814624e-05, "loss": 2.1405, "step": 162 }, { "epoch": 0.016931546691596553, "grad_norm": 0.35124388337135315, "learning_rate": 9.992928189448484e-05, "loss": 1.8398, "step": 163 }, { "epoch": 0.017035421211176897, "grad_norm": 0.34892386198043823, "learning_rate": 9.992841173374441e-05, "loss": 1.8151, "step": 164 }, { "epoch": 0.017139295730757245, "grad_norm": 0.33861371874809265, "learning_rate": 9.992753625601756e-05, "loss": 1.7062, "step": 165 }, { "epoch": 0.017243170250337593, "grad_norm": 0.34012502431869507, "learning_rate": 9.992665546139757e-05, "loss": 1.6682, "step": 166 }, { "epoch": 0.017347044769917937, "grad_norm": 0.38989123702049255, "learning_rate": 9.992576934997819e-05, "loss": 1.9738, "step": 167 }, { "epoch": 0.017450919289498285, "grad_norm": 0.37011152505874634, "learning_rate": 9.992487792185383e-05, "loss": 1.9593, "step": 168 }, { "epoch": 0.017554793809078633, "grad_norm": 0.341496080160141, "learning_rate": 9.992398117711941e-05, "loss": 1.8838, "step": 169 }, { "epoch": 0.01765866832865898, "grad_norm": 0.3466763496398926, "learning_rate": 9.99230791158704e-05, "loss": 1.7701, "step": 170 }, { "epoch": 0.017762542848239325, "grad_norm": 0.36252373456954956, "learning_rate": 9.992217173820288e-05, "loss": 1.8656, "step": 171 }, { "epoch": 0.017866417367819673, "grad_norm": 0.3116392195224762, "learning_rate": 9.992125904421348e-05, "loss": 1.7344, "step": 172 }, { "epoch": 0.01797029188740002, "grad_norm": 0.3929993808269501, "learning_rate": 9.992034103399939e-05, "loss": 1.6855, "step": 173 }, { "epoch": 0.01807416640698037, "grad_norm": 0.3422747552394867, "learning_rate": 9.991941770765838e-05, "loss": 1.8554, "step": 174 }, { "epoch": 0.018178040926560713, "grad_norm": 0.3650684058666229, "learning_rate": 9.991848906528876e-05, "loss": 1.9407, "step": 175 }, { "epoch": 0.01828191544614106, "grad_norm": 0.35484546422958374, "learning_rate": 9.991755510698944e-05, "loss": 1.8553, "step": 176 }, { "epoch": 0.01838578996572141, "grad_norm": 0.3734646737575531, "learning_rate": 9.991661583285987e-05, "loss": 1.77, "step": 177 }, { "epoch": 0.018489664485301757, "grad_norm": 0.3550145924091339, "learning_rate": 9.991567124300009e-05, "loss": 1.8825, "step": 178 }, { "epoch": 0.0185935390048821, "grad_norm": 0.3555574119091034, "learning_rate": 9.991472133751067e-05, "loss": 1.9889, "step": 179 }, { "epoch": 0.01869741352446245, "grad_norm": 0.32128405570983887, "learning_rate": 9.991376611649279e-05, "loss": 1.683, "step": 180 }, { "epoch": 0.018801288044042797, "grad_norm": 0.3892490863800049, "learning_rate": 9.991280558004815e-05, "loss": 1.9933, "step": 181 }, { "epoch": 0.018905162563623144, "grad_norm": 0.32436904311180115, "learning_rate": 9.991183972827904e-05, "loss": 1.8897, "step": 182 }, { "epoch": 0.01900903708320349, "grad_norm": 0.3443160355091095, "learning_rate": 9.991086856128833e-05, "loss": 1.6934, "step": 183 }, { "epoch": 0.019112911602783837, "grad_norm": 0.3968258202075958, "learning_rate": 9.990989207917944e-05, "loss": 1.9867, "step": 184 }, { "epoch": 0.019216786122364184, "grad_norm": 0.3794998526573181, "learning_rate": 9.990891028205636e-05, "loss": 2.0397, "step": 185 }, { "epoch": 0.019320660641944532, "grad_norm": 0.3027797043323517, "learning_rate": 9.990792317002364e-05, "loss": 1.6816, "step": 186 }, { "epoch": 0.019424535161524877, "grad_norm": 0.3459644913673401, "learning_rate": 9.99069307431864e-05, "loss": 1.8124, "step": 187 }, { "epoch": 0.019528409681105224, "grad_norm": 0.3326679766178131, "learning_rate": 9.990593300165033e-05, "loss": 1.6452, "step": 188 }, { "epoch": 0.019632284200685572, "grad_norm": 0.3605174422264099, "learning_rate": 9.990492994552167e-05, "loss": 2.088, "step": 189 }, { "epoch": 0.01973615872026592, "grad_norm": 0.3342396914958954, "learning_rate": 9.990392157490724e-05, "loss": 1.9396, "step": 190 }, { "epoch": 0.019840033239846264, "grad_norm": 0.37349754571914673, "learning_rate": 9.990290788991443e-05, "loss": 1.9863, "step": 191 }, { "epoch": 0.019943907759426612, "grad_norm": 0.3778427541255951, "learning_rate": 9.99018888906512e-05, "loss": 1.902, "step": 192 }, { "epoch": 0.02004778227900696, "grad_norm": 0.3246352970600128, "learning_rate": 9.990086457722604e-05, "loss": 1.8484, "step": 193 }, { "epoch": 0.020151656798587308, "grad_norm": 0.37238767743110657, "learning_rate": 9.989983494974805e-05, "loss": 1.9543, "step": 194 }, { "epoch": 0.020255531318167652, "grad_norm": 0.37035781145095825, "learning_rate": 9.989880000832686e-05, "loss": 1.8695, "step": 195 }, { "epoch": 0.020359405837748, "grad_norm": 0.3354018032550812, "learning_rate": 9.989775975307272e-05, "loss": 1.7589, "step": 196 }, { "epoch": 0.020463280357328348, "grad_norm": 0.4081863462924957, "learning_rate": 9.989671418409636e-05, "loss": 2.1527, "step": 197 }, { "epoch": 0.020567154876908696, "grad_norm": 0.33487629890441895, "learning_rate": 9.989566330150914e-05, "loss": 1.7132, "step": 198 }, { "epoch": 0.02067102939648904, "grad_norm": 0.3445211350917816, "learning_rate": 9.989460710542301e-05, "loss": 1.8913, "step": 199 }, { "epoch": 0.020774903916069388, "grad_norm": 0.40155649185180664, "learning_rate": 9.98935455959504e-05, "loss": 2.191, "step": 200 }, { "epoch": 0.020878778435649736, "grad_norm": 0.35195931792259216, "learning_rate": 9.989247877320436e-05, "loss": 1.9199, "step": 201 }, { "epoch": 0.020982652955230083, "grad_norm": 0.34090521931648254, "learning_rate": 9.989140663729852e-05, "loss": 1.831, "step": 202 }, { "epoch": 0.021086527474810428, "grad_norm": 0.3321789801120758, "learning_rate": 9.989032918834704e-05, "loss": 1.8922, "step": 203 }, { "epoch": 0.021190401994390776, "grad_norm": 0.321913480758667, "learning_rate": 9.988924642646466e-05, "loss": 1.8271, "step": 204 }, { "epoch": 0.021294276513971123, "grad_norm": 0.39516186714172363, "learning_rate": 9.988815835176668e-05, "loss": 1.9391, "step": 205 }, { "epoch": 0.02139815103355147, "grad_norm": 0.35352519154548645, "learning_rate": 9.9887064964369e-05, "loss": 1.6495, "step": 206 }, { "epoch": 0.021502025553131816, "grad_norm": 0.3435503840446472, "learning_rate": 9.988596626438801e-05, "loss": 1.8272, "step": 207 }, { "epoch": 0.021605900072712163, "grad_norm": 0.35792356729507446, "learning_rate": 9.988486225194075e-05, "loss": 1.8506, "step": 208 }, { "epoch": 0.02170977459229251, "grad_norm": 0.32657817006111145, "learning_rate": 9.988375292714478e-05, "loss": 1.8219, "step": 209 }, { "epoch": 0.02181364911187286, "grad_norm": 0.35596343874931335, "learning_rate": 9.988263829011822e-05, "loss": 1.8984, "step": 210 }, { "epoch": 0.021917523631453203, "grad_norm": 0.35671266913414, "learning_rate": 9.988151834097979e-05, "loss": 1.8276, "step": 211 }, { "epoch": 0.02202139815103355, "grad_norm": 0.3640732765197754, "learning_rate": 9.988039307984874e-05, "loss": 1.8979, "step": 212 }, { "epoch": 0.0221252726706139, "grad_norm": 0.32779642939567566, "learning_rate": 9.987926250684491e-05, "loss": 1.8392, "step": 213 }, { "epoch": 0.022229147190194247, "grad_norm": 0.3471597731113434, "learning_rate": 9.98781266220887e-05, "loss": 1.8287, "step": 214 }, { "epoch": 0.02233302170977459, "grad_norm": 0.3553629219532013, "learning_rate": 9.987698542570107e-05, "loss": 1.8612, "step": 215 }, { "epoch": 0.02243689622935494, "grad_norm": 0.3513118028640747, "learning_rate": 9.987583891780355e-05, "loss": 1.9711, "step": 216 }, { "epoch": 0.022540770748935287, "grad_norm": 0.3648374378681183, "learning_rate": 9.987468709851823e-05, "loss": 1.7615, "step": 217 }, { "epoch": 0.022644645268515635, "grad_norm": 0.3236382007598877, "learning_rate": 9.987352996796777e-05, "loss": 1.694, "step": 218 }, { "epoch": 0.02274851978809598, "grad_norm": 0.3441942632198334, "learning_rate": 9.98723675262754e-05, "loss": 1.7834, "step": 219 }, { "epoch": 0.022852394307676327, "grad_norm": 0.3617098331451416, "learning_rate": 9.987119977356491e-05, "loss": 1.8788, "step": 220 }, { "epoch": 0.022956268827256675, "grad_norm": 0.3559406101703644, "learning_rate": 9.987002670996064e-05, "loss": 1.9663, "step": 221 }, { "epoch": 0.023060143346837023, "grad_norm": 0.3278948664665222, "learning_rate": 9.986884833558754e-05, "loss": 1.822, "step": 222 }, { "epoch": 0.023164017866417367, "grad_norm": 0.3330172896385193, "learning_rate": 9.986766465057108e-05, "loss": 1.8147, "step": 223 }, { "epoch": 0.023267892385997715, "grad_norm": 0.3398604094982147, "learning_rate": 9.986647565503731e-05, "loss": 1.7854, "step": 224 }, { "epoch": 0.023371766905578063, "grad_norm": 0.2998906970024109, "learning_rate": 9.986528134911288e-05, "loss": 1.6783, "step": 225 }, { "epoch": 0.023475641425158407, "grad_norm": 0.3525906801223755, "learning_rate": 9.986408173292492e-05, "loss": 1.9996, "step": 226 }, { "epoch": 0.023579515944738755, "grad_norm": 0.34057801961898804, "learning_rate": 9.986287680660123e-05, "loss": 1.8241, "step": 227 }, { "epoch": 0.023683390464319103, "grad_norm": 0.3602697551250458, "learning_rate": 9.98616665702701e-05, "loss": 1.9336, "step": 228 }, { "epoch": 0.02378726498389945, "grad_norm": 0.31595826148986816, "learning_rate": 9.986045102406042e-05, "loss": 1.7719, "step": 229 }, { "epoch": 0.023891139503479795, "grad_norm": 0.34162575006484985, "learning_rate": 9.985923016810163e-05, "loss": 1.8026, "step": 230 }, { "epoch": 0.023995014023060143, "grad_norm": 0.3397705852985382, "learning_rate": 9.985800400252374e-05, "loss": 1.9196, "step": 231 }, { "epoch": 0.02409888854264049, "grad_norm": 0.3344639539718628, "learning_rate": 9.985677252745733e-05, "loss": 1.8512, "step": 232 }, { "epoch": 0.024202763062220838, "grad_norm": 0.44500732421875, "learning_rate": 9.985553574303354e-05, "loss": 1.8336, "step": 233 }, { "epoch": 0.024306637581801183, "grad_norm": 0.3289027512073517, "learning_rate": 9.98542936493841e-05, "loss": 1.6927, "step": 234 }, { "epoch": 0.02441051210138153, "grad_norm": 0.3510255217552185, "learning_rate": 9.985304624664125e-05, "loss": 1.8582, "step": 235 }, { "epoch": 0.024514386620961878, "grad_norm": 0.3442867696285248, "learning_rate": 9.985179353493785e-05, "loss": 1.9107, "step": 236 }, { "epoch": 0.024618261140542226, "grad_norm": 0.3105999529361725, "learning_rate": 9.98505355144073e-05, "loss": 1.6351, "step": 237 }, { "epoch": 0.02472213566012257, "grad_norm": 0.3260045647621155, "learning_rate": 9.984927218518356e-05, "loss": 1.7564, "step": 238 }, { "epoch": 0.024826010179702918, "grad_norm": 0.32900920510292053, "learning_rate": 9.984800354740117e-05, "loss": 1.7372, "step": 239 }, { "epoch": 0.024929884699283266, "grad_norm": 0.3402916193008423, "learning_rate": 9.984672960119523e-05, "loss": 1.7131, "step": 240 }, { "epoch": 0.025033759218863614, "grad_norm": 0.35345762968063354, "learning_rate": 9.984545034670142e-05, "loss": 1.7636, "step": 241 }, { "epoch": 0.025137633738443958, "grad_norm": 0.3537079393863678, "learning_rate": 9.984416578405596e-05, "loss": 1.9847, "step": 242 }, { "epoch": 0.025241508258024306, "grad_norm": 0.3712044358253479, "learning_rate": 9.984287591339562e-05, "loss": 2.0124, "step": 243 }, { "epoch": 0.025345382777604654, "grad_norm": 0.3348478376865387, "learning_rate": 9.98415807348578e-05, "loss": 1.8527, "step": 244 }, { "epoch": 0.025449257297185002, "grad_norm": 0.3452380299568176, "learning_rate": 9.984028024858041e-05, "loss": 1.8608, "step": 245 }, { "epoch": 0.025553131816765346, "grad_norm": 0.38733747601509094, "learning_rate": 9.983897445470194e-05, "loss": 2.029, "step": 246 }, { "epoch": 0.025657006336345694, "grad_norm": 0.3828756809234619, "learning_rate": 9.983766335336144e-05, "loss": 1.9682, "step": 247 }, { "epoch": 0.025760880855926042, "grad_norm": 0.38744086027145386, "learning_rate": 9.983634694469855e-05, "loss": 1.6414, "step": 248 }, { "epoch": 0.02586475537550639, "grad_norm": 0.3316837251186371, "learning_rate": 9.983502522885347e-05, "loss": 1.7982, "step": 249 }, { "epoch": 0.025968629895086734, "grad_norm": 0.3396342694759369, "learning_rate": 9.983369820596691e-05, "loss": 1.8246, "step": 250 }, { "epoch": 0.026072504414667082, "grad_norm": 0.3051803708076477, "learning_rate": 9.98323658761802e-05, "loss": 1.733, "step": 251 }, { "epoch": 0.02617637893424743, "grad_norm": 0.31522175669670105, "learning_rate": 9.983102823963524e-05, "loss": 1.6995, "step": 252 }, { "epoch": 0.026280253453827777, "grad_norm": 0.33415964245796204, "learning_rate": 9.982968529647447e-05, "loss": 1.5215, "step": 253 }, { "epoch": 0.026384127973408122, "grad_norm": 0.35238194465637207, "learning_rate": 9.982833704684091e-05, "loss": 1.8707, "step": 254 }, { "epoch": 0.02648800249298847, "grad_norm": 0.3153392970561981, "learning_rate": 9.982698349087812e-05, "loss": 1.7123, "step": 255 }, { "epoch": 0.026591877012568817, "grad_norm": 0.3397294282913208, "learning_rate": 9.982562462873026e-05, "loss": 1.7335, "step": 256 }, { "epoch": 0.026695751532149165, "grad_norm": 0.3764454126358032, "learning_rate": 9.982426046054204e-05, "loss": 1.9431, "step": 257 }, { "epoch": 0.02679962605172951, "grad_norm": 0.3482568860054016, "learning_rate": 9.982289098645872e-05, "loss": 1.8024, "step": 258 }, { "epoch": 0.026903500571309857, "grad_norm": 0.3208499550819397, "learning_rate": 9.982151620662612e-05, "loss": 1.8129, "step": 259 }, { "epoch": 0.027007375090890205, "grad_norm": 0.3308181166648865, "learning_rate": 9.98201361211907e-05, "loss": 1.8653, "step": 260 }, { "epoch": 0.027111249610470553, "grad_norm": 0.3466727137565613, "learning_rate": 9.981875073029938e-05, "loss": 1.7716, "step": 261 }, { "epoch": 0.027215124130050897, "grad_norm": 0.33778107166290283, "learning_rate": 9.981736003409971e-05, "loss": 1.8481, "step": 262 }, { "epoch": 0.027318998649631245, "grad_norm": 0.367851197719574, "learning_rate": 9.981596403273978e-05, "loss": 2.0175, "step": 263 }, { "epoch": 0.027422873169211593, "grad_norm": 0.3659020662307739, "learning_rate": 9.981456272636826e-05, "loss": 1.6928, "step": 264 }, { "epoch": 0.02752674768879194, "grad_norm": 0.35648471117019653, "learning_rate": 9.981315611513438e-05, "loss": 1.9024, "step": 265 }, { "epoch": 0.027630622208372285, "grad_norm": 0.3503887355327606, "learning_rate": 9.981174419918795e-05, "loss": 1.8439, "step": 266 }, { "epoch": 0.027734496727952633, "grad_norm": 0.37520813941955566, "learning_rate": 9.981032697867929e-05, "loss": 2.0282, "step": 267 }, { "epoch": 0.02783837124753298, "grad_norm": 0.3300826847553253, "learning_rate": 9.980890445375934e-05, "loss": 1.7793, "step": 268 }, { "epoch": 0.02794224576711333, "grad_norm": 0.32417240738868713, "learning_rate": 9.980747662457961e-05, "loss": 1.8332, "step": 269 }, { "epoch": 0.028046120286693673, "grad_norm": 0.32837119698524475, "learning_rate": 9.980604349129211e-05, "loss": 1.6546, "step": 270 }, { "epoch": 0.02814999480627402, "grad_norm": 0.36148953437805176, "learning_rate": 9.980460505404949e-05, "loss": 1.9148, "step": 271 }, { "epoch": 0.02825386932585437, "grad_norm": 0.3625042736530304, "learning_rate": 9.980316131300493e-05, "loss": 1.8963, "step": 272 }, { "epoch": 0.028357743845434717, "grad_norm": 0.322068452835083, "learning_rate": 9.980171226831216e-05, "loss": 1.7182, "step": 273 }, { "epoch": 0.02846161836501506, "grad_norm": 0.3443426787853241, "learning_rate": 9.980025792012551e-05, "loss": 1.7612, "step": 274 }, { "epoch": 0.02856549288459541, "grad_norm": 0.3243624269962311, "learning_rate": 9.979879826859983e-05, "loss": 1.8107, "step": 275 }, { "epoch": 0.028669367404175757, "grad_norm": 0.34362664818763733, "learning_rate": 9.97973333138906e-05, "loss": 1.8116, "step": 276 }, { "epoch": 0.028773241923756104, "grad_norm": 0.3423418402671814, "learning_rate": 9.97958630561538e-05, "loss": 1.968, "step": 277 }, { "epoch": 0.02887711644333645, "grad_norm": 0.3265102207660675, "learning_rate": 9.9794387495546e-05, "loss": 1.7281, "step": 278 }, { "epoch": 0.028980990962916797, "grad_norm": 0.34078219532966614, "learning_rate": 9.979290663222434e-05, "loss": 1.8249, "step": 279 }, { "epoch": 0.029084865482497144, "grad_norm": 0.34047967195510864, "learning_rate": 9.979142046634653e-05, "loss": 1.8084, "step": 280 }, { "epoch": 0.02918874000207749, "grad_norm": 0.36841025948524475, "learning_rate": 9.978992899807084e-05, "loss": 1.9606, "step": 281 }, { "epoch": 0.029292614521657837, "grad_norm": 0.32618024945259094, "learning_rate": 9.978843222755607e-05, "loss": 1.8753, "step": 282 }, { "epoch": 0.029396489041238184, "grad_norm": 0.33189335465431213, "learning_rate": 9.978693015496165e-05, "loss": 1.948, "step": 283 }, { "epoch": 0.029500363560818532, "grad_norm": 0.32388558983802795, "learning_rate": 9.978542278044751e-05, "loss": 1.8796, "step": 284 }, { "epoch": 0.029604238080398877, "grad_norm": 0.34063830971717834, "learning_rate": 9.978391010417418e-05, "loss": 1.8221, "step": 285 }, { "epoch": 0.029708112599979224, "grad_norm": 0.32267820835113525, "learning_rate": 9.978239212630277e-05, "loss": 1.7903, "step": 286 }, { "epoch": 0.029811987119559572, "grad_norm": 0.35702529549598694, "learning_rate": 9.978086884699492e-05, "loss": 1.9183, "step": 287 }, { "epoch": 0.02991586163913992, "grad_norm": 0.36471986770629883, "learning_rate": 9.977934026641282e-05, "loss": 1.8877, "step": 288 }, { "epoch": 0.030019736158720264, "grad_norm": 0.3516945540904999, "learning_rate": 9.977780638471928e-05, "loss": 1.7437, "step": 289 }, { "epoch": 0.030123610678300612, "grad_norm": 0.37206095457077026, "learning_rate": 9.977626720207764e-05, "loss": 1.794, "step": 290 }, { "epoch": 0.03022748519788096, "grad_norm": 0.3756014108657837, "learning_rate": 9.977472271865182e-05, "loss": 1.8116, "step": 291 }, { "epoch": 0.030331359717461308, "grad_norm": 0.3230532705783844, "learning_rate": 9.977317293460631e-05, "loss": 1.7871, "step": 292 }, { "epoch": 0.030435234237041652, "grad_norm": 0.3606550693511963, "learning_rate": 9.97716178501061e-05, "loss": 1.8669, "step": 293 }, { "epoch": 0.030539108756622, "grad_norm": 0.34765157103538513, "learning_rate": 9.977005746531682e-05, "loss": 1.8061, "step": 294 }, { "epoch": 0.030642983276202348, "grad_norm": 0.3419150114059448, "learning_rate": 9.976849178040466e-05, "loss": 1.8189, "step": 295 }, { "epoch": 0.030746857795782696, "grad_norm": 0.33086076378822327, "learning_rate": 9.976692079553633e-05, "loss": 1.7308, "step": 296 }, { "epoch": 0.03085073231536304, "grad_norm": 0.3839011490345001, "learning_rate": 9.976534451087913e-05, "loss": 1.881, "step": 297 }, { "epoch": 0.030954606834943388, "grad_norm": 0.3362378478050232, "learning_rate": 9.976376292660091e-05, "loss": 2.0111, "step": 298 }, { "epoch": 0.031058481354523736, "grad_norm": 0.3804178535938263, "learning_rate": 9.976217604287013e-05, "loss": 2.1103, "step": 299 }, { "epoch": 0.031162355874104083, "grad_norm": 0.38724952936172485, "learning_rate": 9.976058385985575e-05, "loss": 1.8858, "step": 300 }, { "epoch": 0.03126623039368443, "grad_norm": 0.35871621966362, "learning_rate": 9.975898637772734e-05, "loss": 1.7076, "step": 301 }, { "epoch": 0.03137010491326478, "grad_norm": 0.32210198044776917, "learning_rate": 9.975738359665501e-05, "loss": 1.8058, "step": 302 }, { "epoch": 0.03147397943284512, "grad_norm": 0.34519657492637634, "learning_rate": 9.975577551680946e-05, "loss": 1.8827, "step": 303 }, { "epoch": 0.03157785395242547, "grad_norm": 0.3406042456626892, "learning_rate": 9.975416213836193e-05, "loss": 1.801, "step": 304 }, { "epoch": 0.031681728472005816, "grad_norm": 0.33171379566192627, "learning_rate": 9.975254346148422e-05, "loss": 1.7826, "step": 305 }, { "epoch": 0.031785602991586163, "grad_norm": 0.31799814105033875, "learning_rate": 9.975091948634871e-05, "loss": 1.7969, "step": 306 }, { "epoch": 0.03188947751116651, "grad_norm": 0.33427590131759644, "learning_rate": 9.974929021312836e-05, "loss": 1.9499, "step": 307 }, { "epoch": 0.03199335203074686, "grad_norm": 0.336112916469574, "learning_rate": 9.974765564199665e-05, "loss": 1.9003, "step": 308 }, { "epoch": 0.03209722655032721, "grad_norm": 0.33370164036750793, "learning_rate": 9.974601577312768e-05, "loss": 1.8738, "step": 309 }, { "epoch": 0.032201101069907555, "grad_norm": 0.3412269949913025, "learning_rate": 9.974437060669603e-05, "loss": 1.8817, "step": 310 }, { "epoch": 0.032304975589487896, "grad_norm": 0.34361353516578674, "learning_rate": 9.974272014287697e-05, "loss": 1.8391, "step": 311 }, { "epoch": 0.032408850109068243, "grad_norm": 0.3552245795726776, "learning_rate": 9.97410643818462e-05, "loss": 1.8881, "step": 312 }, { "epoch": 0.03251272462864859, "grad_norm": 0.3323882222175598, "learning_rate": 9.973940332378007e-05, "loss": 1.8975, "step": 313 }, { "epoch": 0.03261659914822894, "grad_norm": 0.34055736660957336, "learning_rate": 9.973773696885547e-05, "loss": 1.7162, "step": 314 }, { "epoch": 0.03272047366780929, "grad_norm": 0.3609575927257538, "learning_rate": 9.973606531724985e-05, "loss": 1.9902, "step": 315 }, { "epoch": 0.032824348187389635, "grad_norm": 0.3580215573310852, "learning_rate": 9.973438836914124e-05, "loss": 1.9183, "step": 316 }, { "epoch": 0.03292822270696998, "grad_norm": 0.34166282415390015, "learning_rate": 9.973270612470822e-05, "loss": 1.8884, "step": 317 }, { "epoch": 0.03303209722655033, "grad_norm": 0.3344467878341675, "learning_rate": 9.97310185841299e-05, "loss": 1.9219, "step": 318 }, { "epoch": 0.03313597174613067, "grad_norm": 0.3929234743118286, "learning_rate": 9.972932574758604e-05, "loss": 1.9071, "step": 319 }, { "epoch": 0.03323984626571102, "grad_norm": 0.3376573324203491, "learning_rate": 9.972762761525689e-05, "loss": 1.7418, "step": 320 }, { "epoch": 0.03334372078529137, "grad_norm": 0.33105456829071045, "learning_rate": 9.972592418732327e-05, "loss": 1.7941, "step": 321 }, { "epoch": 0.033447595304871715, "grad_norm": 0.3593199551105499, "learning_rate": 9.972421546396662e-05, "loss": 1.8983, "step": 322 }, { "epoch": 0.03355146982445206, "grad_norm": 0.37045425176620483, "learning_rate": 9.972250144536888e-05, "loss": 1.947, "step": 323 }, { "epoch": 0.03365534434403241, "grad_norm": 0.3378489911556244, "learning_rate": 9.972078213171259e-05, "loss": 1.7318, "step": 324 }, { "epoch": 0.03375921886361276, "grad_norm": 0.354125440120697, "learning_rate": 9.971905752318084e-05, "loss": 1.6396, "step": 325 }, { "epoch": 0.033863093383193106, "grad_norm": 0.36457526683807373, "learning_rate": 9.971732761995728e-05, "loss": 1.8063, "step": 326 }, { "epoch": 0.03396696790277345, "grad_norm": 0.33944860100746155, "learning_rate": 9.971559242222615e-05, "loss": 1.7624, "step": 327 }, { "epoch": 0.034070842422353795, "grad_norm": 0.327573299407959, "learning_rate": 9.971385193017221e-05, "loss": 1.7963, "step": 328 }, { "epoch": 0.03417471694193414, "grad_norm": 0.3168799877166748, "learning_rate": 9.971210614398084e-05, "loss": 1.7648, "step": 329 }, { "epoch": 0.03427859146151449, "grad_norm": 0.3881419003009796, "learning_rate": 9.971035506383792e-05, "loss": 1.8365, "step": 330 }, { "epoch": 0.03438246598109484, "grad_norm": 0.3255634903907776, "learning_rate": 9.970859868992995e-05, "loss": 1.9249, "step": 331 }, { "epoch": 0.034486340500675186, "grad_norm": 0.3285115361213684, "learning_rate": 9.970683702244395e-05, "loss": 1.881, "step": 332 }, { "epoch": 0.034590215020255534, "grad_norm": 0.34438276290893555, "learning_rate": 9.970507006156755e-05, "loss": 1.8085, "step": 333 }, { "epoch": 0.034694089539835875, "grad_norm": 0.347380667924881, "learning_rate": 9.970329780748888e-05, "loss": 1.8762, "step": 334 }, { "epoch": 0.03479796405941622, "grad_norm": 0.32174697518348694, "learning_rate": 9.970152026039672e-05, "loss": 1.7262, "step": 335 }, { "epoch": 0.03490183857899657, "grad_norm": 0.3503490686416626, "learning_rate": 9.969973742048032e-05, "loss": 1.7248, "step": 336 }, { "epoch": 0.03500571309857692, "grad_norm": 0.327619343996048, "learning_rate": 9.969794928792957e-05, "loss": 1.7031, "step": 337 }, { "epoch": 0.035109587618157266, "grad_norm": 0.4252755343914032, "learning_rate": 9.969615586293488e-05, "loss": 2.0297, "step": 338 }, { "epoch": 0.035213462137737614, "grad_norm": 0.36586689949035645, "learning_rate": 9.969435714568722e-05, "loss": 1.9272, "step": 339 }, { "epoch": 0.03531733665731796, "grad_norm": 0.3425196409225464, "learning_rate": 9.969255313637818e-05, "loss": 1.8647, "step": 340 }, { "epoch": 0.03542121117689831, "grad_norm": 0.3617844581604004, "learning_rate": 9.969074383519983e-05, "loss": 1.8622, "step": 341 }, { "epoch": 0.03552508569647865, "grad_norm": 0.33944642543792725, "learning_rate": 9.968892924234487e-05, "loss": 1.7988, "step": 342 }, { "epoch": 0.035628960216059, "grad_norm": 0.3269334137439728, "learning_rate": 9.968710935800652e-05, "loss": 1.8039, "step": 343 }, { "epoch": 0.035732834735639346, "grad_norm": 0.3931077718734741, "learning_rate": 9.968528418237862e-05, "loss": 2.0847, "step": 344 }, { "epoch": 0.035836709255219694, "grad_norm": 0.3752608299255371, "learning_rate": 9.96834537156555e-05, "loss": 2.0207, "step": 345 }, { "epoch": 0.03594058377480004, "grad_norm": 0.33372077345848083, "learning_rate": 9.96816179580321e-05, "loss": 1.9464, "step": 346 }, { "epoch": 0.03604445829438039, "grad_norm": 0.36559945344924927, "learning_rate": 9.967977690970393e-05, "loss": 1.9318, "step": 347 }, { "epoch": 0.03614833281396074, "grad_norm": 0.3152011036872864, "learning_rate": 9.967793057086706e-05, "loss": 1.8081, "step": 348 }, { "epoch": 0.036252207333541085, "grad_norm": 0.32508155703544617, "learning_rate": 9.967607894171804e-05, "loss": 1.7424, "step": 349 }, { "epoch": 0.036356081853121426, "grad_norm": 0.3422446846961975, "learning_rate": 9.967422202245413e-05, "loss": 1.9266, "step": 350 }, { "epoch": 0.036459956372701774, "grad_norm": 0.3278721570968628, "learning_rate": 9.967235981327304e-05, "loss": 1.8344, "step": 351 }, { "epoch": 0.03656383089228212, "grad_norm": 0.32580530643463135, "learning_rate": 9.967049231437309e-05, "loss": 1.7158, "step": 352 }, { "epoch": 0.03666770541186247, "grad_norm": 0.38664954900741577, "learning_rate": 9.966861952595316e-05, "loss": 1.9844, "step": 353 }, { "epoch": 0.03677157993144282, "grad_norm": 0.40230998396873474, "learning_rate": 9.966674144821266e-05, "loss": 2.0037, "step": 354 }, { "epoch": 0.036875454451023165, "grad_norm": 0.33027613162994385, "learning_rate": 9.966485808135165e-05, "loss": 1.7922, "step": 355 }, { "epoch": 0.03697932897060351, "grad_norm": 0.342254102230072, "learning_rate": 9.966296942557062e-05, "loss": 1.8094, "step": 356 }, { "epoch": 0.03708320349018386, "grad_norm": 0.3351558446884155, "learning_rate": 9.966107548107074e-05, "loss": 1.803, "step": 357 }, { "epoch": 0.0371870780097642, "grad_norm": 0.34574928879737854, "learning_rate": 9.96591762480537e-05, "loss": 1.9305, "step": 358 }, { "epoch": 0.03729095252934455, "grad_norm": 0.34656211733818054, "learning_rate": 9.965727172672174e-05, "loss": 1.781, "step": 359 }, { "epoch": 0.0373948270489249, "grad_norm": 0.35084661841392517, "learning_rate": 9.965536191727769e-05, "loss": 1.886, "step": 360 }, { "epoch": 0.037498701568505245, "grad_norm": 0.34373828768730164, "learning_rate": 9.965344681992491e-05, "loss": 1.8182, "step": 361 }, { "epoch": 0.03760257608808559, "grad_norm": 0.33524051308631897, "learning_rate": 9.965152643486738e-05, "loss": 1.8175, "step": 362 }, { "epoch": 0.03770645060766594, "grad_norm": 0.3301374912261963, "learning_rate": 9.964960076230955e-05, "loss": 1.7128, "step": 363 }, { "epoch": 0.03781032512724629, "grad_norm": 0.35539349913597107, "learning_rate": 9.964766980245653e-05, "loss": 1.8029, "step": 364 }, { "epoch": 0.037914199646826637, "grad_norm": 0.3248385190963745, "learning_rate": 9.964573355551394e-05, "loss": 1.6429, "step": 365 }, { "epoch": 0.03801807416640698, "grad_norm": 0.34955573081970215, "learning_rate": 9.964379202168799e-05, "loss": 1.9152, "step": 366 }, { "epoch": 0.038121948685987325, "grad_norm": 0.32366281747817993, "learning_rate": 9.964184520118542e-05, "loss": 1.7536, "step": 367 }, { "epoch": 0.03822582320556767, "grad_norm": 0.3177925646305084, "learning_rate": 9.963989309421356e-05, "loss": 1.7626, "step": 368 }, { "epoch": 0.03832969772514802, "grad_norm": 0.35649746656417847, "learning_rate": 9.96379357009803e-05, "loss": 1.822, "step": 369 }, { "epoch": 0.03843357224472837, "grad_norm": 0.3334449827671051, "learning_rate": 9.963597302169406e-05, "loss": 1.7122, "step": 370 }, { "epoch": 0.038537446764308717, "grad_norm": 0.35319963097572327, "learning_rate": 9.963400505656388e-05, "loss": 1.8249, "step": 371 }, { "epoch": 0.038641321283889064, "grad_norm": 0.34933820366859436, "learning_rate": 9.963203180579932e-05, "loss": 2.1094, "step": 372 }, { "epoch": 0.03874519580346941, "grad_norm": 0.35011550784111023, "learning_rate": 9.963005326961052e-05, "loss": 1.8247, "step": 373 }, { "epoch": 0.03884907032304975, "grad_norm": 0.4046299159526825, "learning_rate": 9.962806944820817e-05, "loss": 2.1071, "step": 374 }, { "epoch": 0.0389529448426301, "grad_norm": 0.3381158113479614, "learning_rate": 9.962608034180353e-05, "loss": 1.8242, "step": 375 }, { "epoch": 0.03905681936221045, "grad_norm": 0.35168975591659546, "learning_rate": 9.962408595060845e-05, "loss": 2.0001, "step": 376 }, { "epoch": 0.039160693881790797, "grad_norm": 0.35159188508987427, "learning_rate": 9.962208627483529e-05, "loss": 1.8534, "step": 377 }, { "epoch": 0.039264568401371144, "grad_norm": 0.31816786527633667, "learning_rate": 9.962008131469703e-05, "loss": 1.6624, "step": 378 }, { "epoch": 0.03936844292095149, "grad_norm": 0.33059608936309814, "learning_rate": 9.961807107040712e-05, "loss": 1.7612, "step": 379 }, { "epoch": 0.03947231744053184, "grad_norm": 0.33961811661720276, "learning_rate": 9.96160555421797e-05, "loss": 1.8432, "step": 380 }, { "epoch": 0.03957619196011219, "grad_norm": 0.3444601595401764, "learning_rate": 9.961403473022939e-05, "loss": 1.8842, "step": 381 }, { "epoch": 0.03968006647969253, "grad_norm": 0.3596336841583252, "learning_rate": 9.961200863477139e-05, "loss": 2.0091, "step": 382 }, { "epoch": 0.039783940999272877, "grad_norm": 0.35697799921035767, "learning_rate": 9.960997725602144e-05, "loss": 1.8124, "step": 383 }, { "epoch": 0.039887815518853224, "grad_norm": 0.33248475193977356, "learning_rate": 9.96079405941959e-05, "loss": 1.9146, "step": 384 }, { "epoch": 0.03999169003843357, "grad_norm": 0.3686772882938385, "learning_rate": 9.960589864951162e-05, "loss": 1.8714, "step": 385 }, { "epoch": 0.04009556455801392, "grad_norm": 0.36085087060928345, "learning_rate": 9.960385142218609e-05, "loss": 1.8268, "step": 386 }, { "epoch": 0.04019943907759427, "grad_norm": 0.32262122631073, "learning_rate": 9.960179891243731e-05, "loss": 1.6027, "step": 387 }, { "epoch": 0.040303313597174616, "grad_norm": 0.361555278301239, "learning_rate": 9.959974112048386e-05, "loss": 1.9221, "step": 388 }, { "epoch": 0.040407188116754963, "grad_norm": 0.3558778166770935, "learning_rate": 9.959767804654487e-05, "loss": 1.8234, "step": 389 }, { "epoch": 0.040511062636335304, "grad_norm": 0.3629186749458313, "learning_rate": 9.959560969084003e-05, "loss": 1.9483, "step": 390 }, { "epoch": 0.04061493715591565, "grad_norm": 0.37084469199180603, "learning_rate": 9.959353605358964e-05, "loss": 1.7024, "step": 391 }, { "epoch": 0.040718811675496, "grad_norm": 0.3552490770816803, "learning_rate": 9.95914571350145e-05, "loss": 1.884, "step": 392 }, { "epoch": 0.04082268619507635, "grad_norm": 0.34589436650276184, "learning_rate": 9.958937293533599e-05, "loss": 1.8548, "step": 393 }, { "epoch": 0.040926560714656696, "grad_norm": 0.3737829327583313, "learning_rate": 9.958728345477608e-05, "loss": 2.0568, "step": 394 }, { "epoch": 0.041030435234237043, "grad_norm": 0.35843873023986816, "learning_rate": 9.958518869355728e-05, "loss": 1.7908, "step": 395 }, { "epoch": 0.04113430975381739, "grad_norm": 0.6880629062652588, "learning_rate": 9.958308865190267e-05, "loss": 1.8225, "step": 396 }, { "epoch": 0.04123818427339773, "grad_norm": 0.3438432812690735, "learning_rate": 9.958098333003588e-05, "loss": 1.7719, "step": 397 }, { "epoch": 0.04134205879297808, "grad_norm": 0.3586975634098053, "learning_rate": 9.957887272818112e-05, "loss": 1.9513, "step": 398 }, { "epoch": 0.04144593331255843, "grad_norm": 0.3421470522880554, "learning_rate": 9.957675684656312e-05, "loss": 1.8509, "step": 399 }, { "epoch": 0.041549807832138776, "grad_norm": 0.3313000202178955, "learning_rate": 9.957463568540725e-05, "loss": 1.8173, "step": 400 }, { "epoch": 0.041653682351719123, "grad_norm": 0.3544430732727051, "learning_rate": 9.957250924493938e-05, "loss": 1.8059, "step": 401 }, { "epoch": 0.04175755687129947, "grad_norm": 0.36002522706985474, "learning_rate": 9.957037752538592e-05, "loss": 1.9456, "step": 402 }, { "epoch": 0.04186143139087982, "grad_norm": 0.3440776765346527, "learning_rate": 9.956824052697395e-05, "loss": 1.7831, "step": 403 }, { "epoch": 0.04196530591046017, "grad_norm": 0.36796486377716064, "learning_rate": 9.956609824993099e-05, "loss": 1.791, "step": 404 }, { "epoch": 0.04206918043004051, "grad_norm": 0.3968099355697632, "learning_rate": 9.956395069448523e-05, "loss": 1.8728, "step": 405 }, { "epoch": 0.042173054949620856, "grad_norm": 0.3467409610748291, "learning_rate": 9.95617978608653e-05, "loss": 1.8004, "step": 406 }, { "epoch": 0.042276929469201203, "grad_norm": 0.3945915102958679, "learning_rate": 9.955963974930052e-05, "loss": 1.7891, "step": 407 }, { "epoch": 0.04238080398878155, "grad_norm": 0.3563166856765747, "learning_rate": 9.955747636002068e-05, "loss": 1.9755, "step": 408 }, { "epoch": 0.0424846785083619, "grad_norm": 0.4015011191368103, "learning_rate": 9.955530769325616e-05, "loss": 1.6478, "step": 409 }, { "epoch": 0.04258855302794225, "grad_norm": 0.36491334438323975, "learning_rate": 9.955313374923791e-05, "loss": 1.9403, "step": 410 }, { "epoch": 0.042692427547522595, "grad_norm": 0.3891184628009796, "learning_rate": 9.955095452819747e-05, "loss": 2.0413, "step": 411 }, { "epoch": 0.04279630206710294, "grad_norm": 0.3426514267921448, "learning_rate": 9.954877003036687e-05, "loss": 1.6253, "step": 412 }, { "epoch": 0.042900176586683283, "grad_norm": 0.48841214179992676, "learning_rate": 9.954658025597876e-05, "loss": 2.1789, "step": 413 }, { "epoch": 0.04300405110626363, "grad_norm": 0.3180578351020813, "learning_rate": 9.954438520526635e-05, "loss": 1.4562, "step": 414 }, { "epoch": 0.04310792562584398, "grad_norm": 0.3487285077571869, "learning_rate": 9.954218487846334e-05, "loss": 1.7904, "step": 415 }, { "epoch": 0.04321180014542433, "grad_norm": 0.3434458374977112, "learning_rate": 9.953997927580411e-05, "loss": 1.8548, "step": 416 }, { "epoch": 0.043315674665004675, "grad_norm": 0.3450503647327423, "learning_rate": 9.953776839752351e-05, "loss": 1.7331, "step": 417 }, { "epoch": 0.04341954918458502, "grad_norm": 0.33946654200553894, "learning_rate": 9.953555224385698e-05, "loss": 1.883, "step": 418 }, { "epoch": 0.04352342370416537, "grad_norm": 0.37217575311660767, "learning_rate": 9.953333081504052e-05, "loss": 1.6892, "step": 419 }, { "epoch": 0.04362729822374572, "grad_norm": 0.3341975510120392, "learning_rate": 9.953110411131072e-05, "loss": 1.734, "step": 420 }, { "epoch": 0.04373117274332606, "grad_norm": 0.3347923159599304, "learning_rate": 9.952887213290469e-05, "loss": 1.8173, "step": 421 }, { "epoch": 0.04383504726290641, "grad_norm": 0.35578978061676025, "learning_rate": 9.952663488006011e-05, "loss": 1.9877, "step": 422 }, { "epoch": 0.043938921782486755, "grad_norm": 0.3420504331588745, "learning_rate": 9.952439235301522e-05, "loss": 1.8042, "step": 423 }, { "epoch": 0.0440427963020671, "grad_norm": 0.3436269760131836, "learning_rate": 9.952214455200887e-05, "loss": 1.7833, "step": 424 }, { "epoch": 0.04414667082164745, "grad_norm": 0.3772909641265869, "learning_rate": 9.951989147728043e-05, "loss": 1.9778, "step": 425 }, { "epoch": 0.0442505453412278, "grad_norm": 0.3602845370769501, "learning_rate": 9.951763312906979e-05, "loss": 1.8447, "step": 426 }, { "epoch": 0.044354419860808146, "grad_norm": 0.3500402271747589, "learning_rate": 9.951536950761748e-05, "loss": 1.6772, "step": 427 }, { "epoch": 0.044458294380388494, "grad_norm": 0.3483547866344452, "learning_rate": 9.951310061316455e-05, "loss": 1.8328, "step": 428 }, { "epoch": 0.044562168899968835, "grad_norm": 0.3545311391353607, "learning_rate": 9.951082644595264e-05, "loss": 1.8814, "step": 429 }, { "epoch": 0.04466604341954918, "grad_norm": 0.33376967906951904, "learning_rate": 9.95085470062239e-05, "loss": 1.8108, "step": 430 }, { "epoch": 0.04476991793912953, "grad_norm": 0.330287903547287, "learning_rate": 9.950626229422107e-05, "loss": 1.7423, "step": 431 }, { "epoch": 0.04487379245870988, "grad_norm": 0.36695408821105957, "learning_rate": 9.950397231018748e-05, "loss": 1.7873, "step": 432 }, { "epoch": 0.044977666978290226, "grad_norm": 0.3538840711116791, "learning_rate": 9.950167705436699e-05, "loss": 1.9949, "step": 433 }, { "epoch": 0.045081541497870574, "grad_norm": 0.31848835945129395, "learning_rate": 9.949937652700402e-05, "loss": 1.5736, "step": 434 }, { "epoch": 0.04518541601745092, "grad_norm": 0.3445279598236084, "learning_rate": 9.949707072834356e-05, "loss": 1.7662, "step": 435 }, { "epoch": 0.04528929053703127, "grad_norm": 0.37137672305107117, "learning_rate": 9.949475965863116e-05, "loss": 1.8209, "step": 436 }, { "epoch": 0.04539316505661161, "grad_norm": 0.43532824516296387, "learning_rate": 9.949244331811293e-05, "loss": 2.1643, "step": 437 }, { "epoch": 0.04549703957619196, "grad_norm": 0.35221028327941895, "learning_rate": 9.949012170703556e-05, "loss": 1.8557, "step": 438 }, { "epoch": 0.045600914095772306, "grad_norm": 0.34177568554878235, "learning_rate": 9.948779482564624e-05, "loss": 1.7119, "step": 439 }, { "epoch": 0.045704788615352654, "grad_norm": 0.3455469012260437, "learning_rate": 9.948546267419278e-05, "loss": 1.8411, "step": 440 }, { "epoch": 0.045808663134933, "grad_norm": 0.34957823157310486, "learning_rate": 9.948312525292358e-05, "loss": 1.7674, "step": 441 }, { "epoch": 0.04591253765451335, "grad_norm": 0.3494671583175659, "learning_rate": 9.948078256208751e-05, "loss": 1.9521, "step": 442 }, { "epoch": 0.0460164121740937, "grad_norm": 0.3724217116832733, "learning_rate": 9.947843460193407e-05, "loss": 1.9358, "step": 443 }, { "epoch": 0.046120286693674045, "grad_norm": 0.34524059295654297, "learning_rate": 9.947608137271328e-05, "loss": 1.8426, "step": 444 }, { "epoch": 0.046224161213254386, "grad_norm": 0.3344314396381378, "learning_rate": 9.947372287467576e-05, "loss": 1.7263, "step": 445 }, { "epoch": 0.046328035732834734, "grad_norm": 0.3640967309474945, "learning_rate": 9.947135910807265e-05, "loss": 1.9716, "step": 446 }, { "epoch": 0.04643191025241508, "grad_norm": 0.38016462326049805, "learning_rate": 9.946899007315569e-05, "loss": 1.7952, "step": 447 }, { "epoch": 0.04653578477199543, "grad_norm": 0.35454219579696655, "learning_rate": 9.946661577017717e-05, "loss": 1.8804, "step": 448 }, { "epoch": 0.04663965929157578, "grad_norm": 0.3755532503128052, "learning_rate": 9.946423619938992e-05, "loss": 2.0071, "step": 449 }, { "epoch": 0.046743533811156125, "grad_norm": 0.3422885537147522, "learning_rate": 9.946185136104735e-05, "loss": 1.6811, "step": 450 }, { "epoch": 0.04684740833073647, "grad_norm": 0.33953240513801575, "learning_rate": 9.945946125540343e-05, "loss": 1.7127, "step": 451 }, { "epoch": 0.046951282850316814, "grad_norm": 0.313754677772522, "learning_rate": 9.945706588271269e-05, "loss": 1.7116, "step": 452 }, { "epoch": 0.04705515736989716, "grad_norm": 0.3217809200286865, "learning_rate": 9.94546652432302e-05, "loss": 1.7475, "step": 453 }, { "epoch": 0.04715903188947751, "grad_norm": 0.35068783164024353, "learning_rate": 9.945225933721163e-05, "loss": 1.7588, "step": 454 }, { "epoch": 0.04726290640905786, "grad_norm": 0.32471030950546265, "learning_rate": 9.944984816491318e-05, "loss": 1.6391, "step": 455 }, { "epoch": 0.047366780928638205, "grad_norm": 0.3416842222213745, "learning_rate": 9.944743172659164e-05, "loss": 1.7689, "step": 456 }, { "epoch": 0.04747065544821855, "grad_norm": 0.3535037934780121, "learning_rate": 9.94450100225043e-05, "loss": 1.6911, "step": 457 }, { "epoch": 0.0475745299677989, "grad_norm": 0.3632698953151703, "learning_rate": 9.944258305290908e-05, "loss": 1.949, "step": 458 }, { "epoch": 0.04767840448737925, "grad_norm": 0.38424357771873474, "learning_rate": 9.944015081806445e-05, "loss": 1.9012, "step": 459 }, { "epoch": 0.04778227900695959, "grad_norm": 0.3942817747592926, "learning_rate": 9.94377133182294e-05, "loss": 2.0238, "step": 460 }, { "epoch": 0.04788615352653994, "grad_norm": 0.3337697684764862, "learning_rate": 9.943527055366351e-05, "loss": 1.7753, "step": 461 }, { "epoch": 0.047990028046120285, "grad_norm": 0.3492131233215332, "learning_rate": 9.94328225246269e-05, "loss": 1.7137, "step": 462 }, { "epoch": 0.04809390256570063, "grad_norm": 0.3351963758468628, "learning_rate": 9.943036923138032e-05, "loss": 1.7599, "step": 463 }, { "epoch": 0.04819777708528098, "grad_norm": 0.33879536390304565, "learning_rate": 9.942791067418496e-05, "loss": 1.7868, "step": 464 }, { "epoch": 0.04830165160486133, "grad_norm": 0.3504945933818817, "learning_rate": 9.942544685330267e-05, "loss": 1.93, "step": 465 }, { "epoch": 0.048405526124441677, "grad_norm": 0.3818790912628174, "learning_rate": 9.942297776899583e-05, "loss": 1.9025, "step": 466 }, { "epoch": 0.048509400644022024, "grad_norm": 0.346236914396286, "learning_rate": 9.942050342152736e-05, "loss": 1.7295, "step": 467 }, { "epoch": 0.048613275163602365, "grad_norm": 0.39538463950157166, "learning_rate": 9.941802381116078e-05, "loss": 1.5522, "step": 468 }, { "epoch": 0.04871714968318271, "grad_norm": 0.35648438334465027, "learning_rate": 9.941553893816014e-05, "loss": 1.7374, "step": 469 }, { "epoch": 0.04882102420276306, "grad_norm": 0.3466714024543762, "learning_rate": 9.941304880279005e-05, "loss": 1.7781, "step": 470 }, { "epoch": 0.04892489872234341, "grad_norm": 0.38975635170936584, "learning_rate": 9.941055340531571e-05, "loss": 1.9754, "step": 471 }, { "epoch": 0.049028773241923757, "grad_norm": 0.3584454655647278, "learning_rate": 9.940805274600285e-05, "loss": 1.8484, "step": 472 }, { "epoch": 0.049132647761504104, "grad_norm": 0.33715423941612244, "learning_rate": 9.940554682511775e-05, "loss": 1.9061, "step": 473 }, { "epoch": 0.04923652228108445, "grad_norm": 0.35068365931510925, "learning_rate": 9.940303564292731e-05, "loss": 1.759, "step": 474 }, { "epoch": 0.0493403968006648, "grad_norm": 0.37917405366897583, "learning_rate": 9.940051919969893e-05, "loss": 1.8638, "step": 475 }, { "epoch": 0.04944427132024514, "grad_norm": 0.38259661197662354, "learning_rate": 9.939799749570059e-05, "loss": 1.9465, "step": 476 }, { "epoch": 0.04954814583982549, "grad_norm": 0.334435373544693, "learning_rate": 9.939547053120084e-05, "loss": 1.8029, "step": 477 }, { "epoch": 0.049652020359405837, "grad_norm": 0.3447658121585846, "learning_rate": 9.939293830646878e-05, "loss": 1.6021, "step": 478 }, { "epoch": 0.049755894878986184, "grad_norm": 0.3410322964191437, "learning_rate": 9.939040082177407e-05, "loss": 1.9107, "step": 479 }, { "epoch": 0.04985976939856653, "grad_norm": 0.3771478831768036, "learning_rate": 9.938785807738693e-05, "loss": 2.0531, "step": 480 }, { "epoch": 0.04996364391814688, "grad_norm": 0.421427845954895, "learning_rate": 9.938531007357813e-05, "loss": 1.9953, "step": 481 }, { "epoch": 0.05006751843772723, "grad_norm": 0.347931444644928, "learning_rate": 9.938275681061903e-05, "loss": 1.75, "step": 482 }, { "epoch": 0.050171392957307576, "grad_norm": 0.38480523228645325, "learning_rate": 9.938019828878155e-05, "loss": 1.9639, "step": 483 }, { "epoch": 0.050275267476887917, "grad_norm": 0.3436914384365082, "learning_rate": 9.937763450833813e-05, "loss": 1.8305, "step": 484 }, { "epoch": 0.050379141996468264, "grad_norm": 0.3643799126148224, "learning_rate": 9.93750654695618e-05, "loss": 1.8667, "step": 485 }, { "epoch": 0.05048301651604861, "grad_norm": 0.3727877140045166, "learning_rate": 9.937249117272612e-05, "loss": 1.8097, "step": 486 }, { "epoch": 0.05058689103562896, "grad_norm": 0.34742361307144165, "learning_rate": 9.936991161810525e-05, "loss": 1.872, "step": 487 }, { "epoch": 0.05069076555520931, "grad_norm": 0.34192922711372375, "learning_rate": 9.93673268059739e-05, "loss": 1.777, "step": 488 }, { "epoch": 0.050794640074789656, "grad_norm": 0.34614014625549316, "learning_rate": 9.936473673660733e-05, "loss": 1.8364, "step": 489 }, { "epoch": 0.050898514594370003, "grad_norm": 0.3394782841205597, "learning_rate": 9.936214141028135e-05, "loss": 1.7858, "step": 490 }, { "epoch": 0.05100238911395035, "grad_norm": 0.3589742183685303, "learning_rate": 9.935954082727235e-05, "loss": 2.1136, "step": 491 }, { "epoch": 0.05110626363353069, "grad_norm": 0.3754216134548187, "learning_rate": 9.935693498785728e-05, "loss": 1.8873, "step": 492 }, { "epoch": 0.05121013815311104, "grad_norm": 0.3502977192401886, "learning_rate": 9.93543238923136e-05, "loss": 1.6858, "step": 493 }, { "epoch": 0.05131401267269139, "grad_norm": 0.3583030104637146, "learning_rate": 9.935170754091945e-05, "loss": 1.8528, "step": 494 }, { "epoch": 0.051417887192271736, "grad_norm": 0.3400440216064453, "learning_rate": 9.934908593395338e-05, "loss": 1.7657, "step": 495 }, { "epoch": 0.051521761711852083, "grad_norm": 0.31973132491111755, "learning_rate": 9.934645907169459e-05, "loss": 1.7152, "step": 496 }, { "epoch": 0.05162563623143243, "grad_norm": 0.33886149525642395, "learning_rate": 9.934382695442284e-05, "loss": 1.6483, "step": 497 }, { "epoch": 0.05172951075101278, "grad_norm": 0.3560135066509247, "learning_rate": 9.934118958241842e-05, "loss": 1.8188, "step": 498 }, { "epoch": 0.05183338527059313, "grad_norm": 0.32066768407821655, "learning_rate": 9.933854695596216e-05, "loss": 1.6657, "step": 499 }, { "epoch": 0.05193725979017347, "grad_norm": 0.3544224798679352, "learning_rate": 9.933589907533554e-05, "loss": 1.7714, "step": 500 }, { "epoch": 0.052041134309753816, "grad_norm": 0.3491422235965729, "learning_rate": 9.933324594082048e-05, "loss": 1.9087, "step": 501 }, { "epoch": 0.052145008829334163, "grad_norm": 0.32746124267578125, "learning_rate": 9.933058755269955e-05, "loss": 1.8386, "step": 502 }, { "epoch": 0.05224888334891451, "grad_norm": 0.32933396100997925, "learning_rate": 9.932792391125583e-05, "loss": 1.7782, "step": 503 }, { "epoch": 0.05235275786849486, "grad_norm": 0.35885295271873474, "learning_rate": 9.932525501677299e-05, "loss": 1.8822, "step": 504 }, { "epoch": 0.05245663238807521, "grad_norm": 0.3308489918708801, "learning_rate": 9.932258086953525e-05, "loss": 1.8777, "step": 505 }, { "epoch": 0.052560506907655555, "grad_norm": 0.32857421040534973, "learning_rate": 9.931990146982736e-05, "loss": 1.6693, "step": 506 }, { "epoch": 0.052664381427235896, "grad_norm": 0.37959519028663635, "learning_rate": 9.931721681793468e-05, "loss": 1.6766, "step": 507 }, { "epoch": 0.052768255946816243, "grad_norm": 0.39978745579719543, "learning_rate": 9.931452691414311e-05, "loss": 1.9316, "step": 508 }, { "epoch": 0.05287213046639659, "grad_norm": 0.36127495765686035, "learning_rate": 9.93118317587391e-05, "loss": 1.9157, "step": 509 }, { "epoch": 0.05297600498597694, "grad_norm": 0.3422827422618866, "learning_rate": 9.930913135200963e-05, "loss": 1.718, "step": 510 }, { "epoch": 0.05307987950555729, "grad_norm": 0.3710615932941437, "learning_rate": 9.930642569424231e-05, "loss": 1.7702, "step": 511 }, { "epoch": 0.053183754025137635, "grad_norm": 0.3597875237464905, "learning_rate": 9.930371478572526e-05, "loss": 1.6665, "step": 512 }, { "epoch": 0.05328762854471798, "grad_norm": 0.3505166172981262, "learning_rate": 9.930099862674716e-05, "loss": 1.7484, "step": 513 }, { "epoch": 0.05339150306429833, "grad_norm": 0.343287855386734, "learning_rate": 9.929827721759728e-05, "loss": 1.9197, "step": 514 }, { "epoch": 0.05349537758387867, "grad_norm": 0.35222482681274414, "learning_rate": 9.92955505585654e-05, "loss": 1.8467, "step": 515 }, { "epoch": 0.05359925210345902, "grad_norm": 0.39000675082206726, "learning_rate": 9.929281864994193e-05, "loss": 1.8709, "step": 516 }, { "epoch": 0.05370312662303937, "grad_norm": 0.3652053773403168, "learning_rate": 9.929008149201774e-05, "loss": 1.9486, "step": 517 }, { "epoch": 0.053807001142619715, "grad_norm": 0.3311000168323517, "learning_rate": 9.928733908508438e-05, "loss": 1.7749, "step": 518 }, { "epoch": 0.05391087566220006, "grad_norm": 0.3284410536289215, "learning_rate": 9.928459142943385e-05, "loss": 1.8424, "step": 519 }, { "epoch": 0.05401475018178041, "grad_norm": 0.3507472574710846, "learning_rate": 9.928183852535877e-05, "loss": 1.8719, "step": 520 }, { "epoch": 0.05411862470136076, "grad_norm": 0.35009628534317017, "learning_rate": 9.92790803731523e-05, "loss": 1.8434, "step": 521 }, { "epoch": 0.054222499220941106, "grad_norm": 0.39859047532081604, "learning_rate": 9.927631697310816e-05, "loss": 1.9473, "step": 522 }, { "epoch": 0.05432637374052145, "grad_norm": 0.369045615196228, "learning_rate": 9.927354832552063e-05, "loss": 1.9063, "step": 523 }, { "epoch": 0.054430248260101795, "grad_norm": 0.3442009389400482, "learning_rate": 9.927077443068455e-05, "loss": 1.7318, "step": 524 }, { "epoch": 0.05453412277968214, "grad_norm": 0.3606685996055603, "learning_rate": 9.926799528889534e-05, "loss": 1.8991, "step": 525 }, { "epoch": 0.05463799729926249, "grad_norm": 0.36547592282295227, "learning_rate": 9.926521090044891e-05, "loss": 2.1391, "step": 526 }, { "epoch": 0.05474187181884284, "grad_norm": 0.334673136472702, "learning_rate": 9.926242126564182e-05, "loss": 1.8072, "step": 527 }, { "epoch": 0.054845746338423186, "grad_norm": 0.3335951864719391, "learning_rate": 9.925962638477113e-05, "loss": 1.7599, "step": 528 }, { "epoch": 0.054949620858003534, "grad_norm": 0.33183959126472473, "learning_rate": 9.925682625813446e-05, "loss": 1.7878, "step": 529 }, { "epoch": 0.05505349537758388, "grad_norm": 0.33561477065086365, "learning_rate": 9.925402088603e-05, "loss": 1.7764, "step": 530 }, { "epoch": 0.05515736989716422, "grad_norm": 0.33029311895370483, "learning_rate": 9.925121026875654e-05, "loss": 1.7976, "step": 531 }, { "epoch": 0.05526124441674457, "grad_norm": 0.3405066430568695, "learning_rate": 9.924839440661334e-05, "loss": 1.8439, "step": 532 }, { "epoch": 0.05536511893632492, "grad_norm": 0.3600308299064636, "learning_rate": 9.92455732999003e-05, "loss": 1.9871, "step": 533 }, { "epoch": 0.055468993455905266, "grad_norm": 0.4009020924568176, "learning_rate": 9.924274694891782e-05, "loss": 1.7941, "step": 534 }, { "epoch": 0.055572867975485614, "grad_norm": 0.3483946919441223, "learning_rate": 9.92399153539669e-05, "loss": 1.7015, "step": 535 }, { "epoch": 0.05567674249506596, "grad_norm": 0.37152403593063354, "learning_rate": 9.923707851534909e-05, "loss": 1.791, "step": 536 }, { "epoch": 0.05578061701464631, "grad_norm": 0.3480570912361145, "learning_rate": 9.923423643336648e-05, "loss": 1.7576, "step": 537 }, { "epoch": 0.05588449153422666, "grad_norm": 0.38511136174201965, "learning_rate": 9.923138910832172e-05, "loss": 1.963, "step": 538 }, { "epoch": 0.055988366053807, "grad_norm": 0.31676754355430603, "learning_rate": 9.922853654051806e-05, "loss": 1.6796, "step": 539 }, { "epoch": 0.056092240573387346, "grad_norm": 0.325538694858551, "learning_rate": 9.922567873025924e-05, "loss": 1.7261, "step": 540 }, { "epoch": 0.056196115092967694, "grad_norm": 0.3761240839958191, "learning_rate": 9.922281567784961e-05, "loss": 1.825, "step": 541 }, { "epoch": 0.05629998961254804, "grad_norm": 0.33543258905410767, "learning_rate": 9.921994738359409e-05, "loss": 1.791, "step": 542 }, { "epoch": 0.05640386413212839, "grad_norm": 0.34951722621917725, "learning_rate": 9.921707384779807e-05, "loss": 1.8141, "step": 543 }, { "epoch": 0.05650773865170874, "grad_norm": 0.34788161516189575, "learning_rate": 9.92141950707676e-05, "loss": 1.7599, "step": 544 }, { "epoch": 0.056611613171289085, "grad_norm": 0.32082879543304443, "learning_rate": 9.921131105280925e-05, "loss": 1.5647, "step": 545 }, { "epoch": 0.05671548769086943, "grad_norm": 0.37312445044517517, "learning_rate": 9.920842179423013e-05, "loss": 1.9926, "step": 546 }, { "epoch": 0.056819362210449774, "grad_norm": 0.32151979207992554, "learning_rate": 9.920552729533792e-05, "loss": 1.6564, "step": 547 }, { "epoch": 0.05692323673003012, "grad_norm": 0.3291560709476471, "learning_rate": 9.92026275564409e-05, "loss": 1.8871, "step": 548 }, { "epoch": 0.05702711124961047, "grad_norm": 0.370978444814682, "learning_rate": 9.919972257784783e-05, "loss": 2.0246, "step": 549 }, { "epoch": 0.05713098576919082, "grad_norm": 0.3654458820819855, "learning_rate": 9.919681235986806e-05, "loss": 1.8523, "step": 550 }, { "epoch": 0.057234860288771165, "grad_norm": 0.3720274567604065, "learning_rate": 9.919389690281154e-05, "loss": 1.7427, "step": 551 }, { "epoch": 0.05733873480835151, "grad_norm": 0.3372805714607239, "learning_rate": 9.919097620698872e-05, "loss": 1.8491, "step": 552 }, { "epoch": 0.05744260932793186, "grad_norm": 0.3331336975097656, "learning_rate": 9.918805027271064e-05, "loss": 1.7309, "step": 553 }, { "epoch": 0.05754648384751221, "grad_norm": 0.3529910743236542, "learning_rate": 9.918511910028888e-05, "loss": 1.8213, "step": 554 }, { "epoch": 0.05765035836709255, "grad_norm": 0.3342648446559906, "learning_rate": 9.918218269003561e-05, "loss": 1.6017, "step": 555 }, { "epoch": 0.0577542328866729, "grad_norm": 0.41469427943229675, "learning_rate": 9.917924104226351e-05, "loss": 1.7434, "step": 556 }, { "epoch": 0.057858107406253245, "grad_norm": 0.3674336373806, "learning_rate": 9.917629415728584e-05, "loss": 1.8997, "step": 557 }, { "epoch": 0.05796198192583359, "grad_norm": 0.3625181317329407, "learning_rate": 9.917334203541645e-05, "loss": 1.8736, "step": 558 }, { "epoch": 0.05806585644541394, "grad_norm": 0.35162436962127686, "learning_rate": 9.917038467696968e-05, "loss": 1.8951, "step": 559 }, { "epoch": 0.05816973096499429, "grad_norm": 0.35170653462409973, "learning_rate": 9.916742208226051e-05, "loss": 1.9565, "step": 560 }, { "epoch": 0.058273605484574637, "grad_norm": 0.3626965284347534, "learning_rate": 9.91644542516044e-05, "loss": 1.9673, "step": 561 }, { "epoch": 0.05837748000415498, "grad_norm": 0.36217907071113586, "learning_rate": 9.91614811853174e-05, "loss": 2.0416, "step": 562 }, { "epoch": 0.058481354523735325, "grad_norm": 0.3251532018184662, "learning_rate": 9.915850288371616e-05, "loss": 1.8444, "step": 563 }, { "epoch": 0.05858522904331567, "grad_norm": 0.3693266212940216, "learning_rate": 9.915551934711777e-05, "loss": 1.7398, "step": 564 }, { "epoch": 0.05868910356289602, "grad_norm": 0.34907597303390503, "learning_rate": 9.915253057584003e-05, "loss": 1.5016, "step": 565 }, { "epoch": 0.05879297808247637, "grad_norm": 0.3413955867290497, "learning_rate": 9.914953657020118e-05, "loss": 1.7225, "step": 566 }, { "epoch": 0.058896852602056717, "grad_norm": 0.34406933188438416, "learning_rate": 9.914653733052006e-05, "loss": 1.7444, "step": 567 }, { "epoch": 0.059000727121637064, "grad_norm": 0.36242082715034485, "learning_rate": 9.914353285711607e-05, "loss": 1.8678, "step": 568 }, { "epoch": 0.05910460164121741, "grad_norm": 0.359131395816803, "learning_rate": 9.914052315030917e-05, "loss": 1.9416, "step": 569 }, { "epoch": 0.05920847616079775, "grad_norm": 0.35352620482444763, "learning_rate": 9.913750821041987e-05, "loss": 1.8087, "step": 570 }, { "epoch": 0.0593123506803781, "grad_norm": 0.3557063043117523, "learning_rate": 9.913448803776922e-05, "loss": 1.9329, "step": 571 }, { "epoch": 0.05941622519995845, "grad_norm": 0.3462260067462921, "learning_rate": 9.913146263267887e-05, "loss": 1.882, "step": 572 }, { "epoch": 0.059520099719538797, "grad_norm": 0.32889804244041443, "learning_rate": 9.912843199547099e-05, "loss": 1.6253, "step": 573 }, { "epoch": 0.059623974239119144, "grad_norm": 0.3543769419193268, "learning_rate": 9.912539612646832e-05, "loss": 1.7809, "step": 574 }, { "epoch": 0.05972784875869949, "grad_norm": 0.38579022884368896, "learning_rate": 9.912235502599414e-05, "loss": 2.0883, "step": 575 }, { "epoch": 0.05983172327827984, "grad_norm": 0.3288674056529999, "learning_rate": 9.911930869437233e-05, "loss": 1.7313, "step": 576 }, { "epoch": 0.05993559779786019, "grad_norm": 0.35373395681381226, "learning_rate": 9.911625713192729e-05, "loss": 1.906, "step": 577 }, { "epoch": 0.06003947231744053, "grad_norm": 0.33762502670288086, "learning_rate": 9.911320033898398e-05, "loss": 1.8296, "step": 578 }, { "epoch": 0.060143346837020877, "grad_norm": 0.3767317831516266, "learning_rate": 9.911013831586795e-05, "loss": 1.8199, "step": 579 }, { "epoch": 0.060247221356601224, "grad_norm": 0.38876909017562866, "learning_rate": 9.910707106290525e-05, "loss": 1.6965, "step": 580 }, { "epoch": 0.06035109587618157, "grad_norm": 0.34621986746788025, "learning_rate": 9.910399858042252e-05, "loss": 1.7974, "step": 581 }, { "epoch": 0.06045497039576192, "grad_norm": 0.3526759743690491, "learning_rate": 9.910092086874699e-05, "loss": 1.7587, "step": 582 }, { "epoch": 0.06055884491534227, "grad_norm": 0.3554968237876892, "learning_rate": 9.909783792820637e-05, "loss": 1.7185, "step": 583 }, { "epoch": 0.060662719434922616, "grad_norm": 0.34225958585739136, "learning_rate": 9.909474975912899e-05, "loss": 1.9277, "step": 584 }, { "epoch": 0.060766593954502963, "grad_norm": 0.35381531715393066, "learning_rate": 9.909165636184372e-05, "loss": 1.8624, "step": 585 }, { "epoch": 0.060870468474083304, "grad_norm": 0.3494366407394409, "learning_rate": 9.908855773667997e-05, "loss": 1.8565, "step": 586 }, { "epoch": 0.06097434299366365, "grad_norm": 0.358967661857605, "learning_rate": 9.908545388396775e-05, "loss": 1.8135, "step": 587 }, { "epoch": 0.061078217513244, "grad_norm": 0.32239392399787903, "learning_rate": 9.908234480403755e-05, "loss": 1.7439, "step": 588 }, { "epoch": 0.06118209203282435, "grad_norm": 0.33216288685798645, "learning_rate": 9.90792304972205e-05, "loss": 1.8637, "step": 589 }, { "epoch": 0.061285966552404696, "grad_norm": 0.3340390622615814, "learning_rate": 9.907611096384823e-05, "loss": 1.6959, "step": 590 }, { "epoch": 0.061389841071985043, "grad_norm": 0.3495895564556122, "learning_rate": 9.907298620425294e-05, "loss": 1.7969, "step": 591 }, { "epoch": 0.06149371559156539, "grad_norm": 0.365520715713501, "learning_rate": 9.90698562187674e-05, "loss": 1.8457, "step": 592 }, { "epoch": 0.06159759011114574, "grad_norm": 0.33886629343032837, "learning_rate": 9.906672100772496e-05, "loss": 1.6876, "step": 593 }, { "epoch": 0.06170146463072608, "grad_norm": 0.3432716131210327, "learning_rate": 9.906358057145945e-05, "loss": 1.7644, "step": 594 }, { "epoch": 0.06180533915030643, "grad_norm": 0.35947611927986145, "learning_rate": 9.906043491030532e-05, "loss": 1.9074, "step": 595 }, { "epoch": 0.061909213669886776, "grad_norm": 0.3474954664707184, "learning_rate": 9.905728402459755e-05, "loss": 1.834, "step": 596 }, { "epoch": 0.062013088189467123, "grad_norm": 0.32925835251808167, "learning_rate": 9.905412791467171e-05, "loss": 1.7292, "step": 597 }, { "epoch": 0.06211696270904747, "grad_norm": 0.37957310676574707, "learning_rate": 9.905096658086387e-05, "loss": 2.0142, "step": 598 }, { "epoch": 0.06222083722862782, "grad_norm": 0.3465014398097992, "learning_rate": 9.904780002351071e-05, "loss": 1.852, "step": 599 }, { "epoch": 0.06232471174820817, "grad_norm": 0.364199697971344, "learning_rate": 9.904462824294944e-05, "loss": 1.7035, "step": 600 }, { "epoch": 0.062428586267788515, "grad_norm": 0.33271247148513794, "learning_rate": 9.904145123951781e-05, "loss": 1.7046, "step": 601 }, { "epoch": 0.06253246078736886, "grad_norm": 0.38924211263656616, "learning_rate": 9.903826901355417e-05, "loss": 1.8107, "step": 602 }, { "epoch": 0.0626363353069492, "grad_norm": 0.39860212802886963, "learning_rate": 9.90350815653974e-05, "loss": 1.8613, "step": 603 }, { "epoch": 0.06274020982652956, "grad_norm": 0.335452139377594, "learning_rate": 9.903188889538692e-05, "loss": 1.6605, "step": 604 }, { "epoch": 0.0628440843461099, "grad_norm": 0.3518449664115906, "learning_rate": 9.902869100386273e-05, "loss": 1.8248, "step": 605 }, { "epoch": 0.06294795886569024, "grad_norm": 0.36134716868400574, "learning_rate": 9.90254878911654e-05, "loss": 1.8456, "step": 606 }, { "epoch": 0.0630518333852706, "grad_norm": 0.3638603687286377, "learning_rate": 9.902227955763602e-05, "loss": 1.6454, "step": 607 }, { "epoch": 0.06315570790485094, "grad_norm": 0.3790292739868164, "learning_rate": 9.901906600361625e-05, "loss": 1.8036, "step": 608 }, { "epoch": 0.06325958242443129, "grad_norm": 0.3539649546146393, "learning_rate": 9.901584722944833e-05, "loss": 1.8153, "step": 609 }, { "epoch": 0.06336345694401163, "grad_norm": 0.33538633584976196, "learning_rate": 9.9012623235475e-05, "loss": 1.7256, "step": 610 }, { "epoch": 0.06346733146359199, "grad_norm": 0.3604874610900879, "learning_rate": 9.900939402203962e-05, "loss": 2.0305, "step": 611 }, { "epoch": 0.06357120598317233, "grad_norm": 0.34128594398498535, "learning_rate": 9.900615958948606e-05, "loss": 1.7801, "step": 612 }, { "epoch": 0.06367508050275267, "grad_norm": 0.3321555554866791, "learning_rate": 9.900291993815877e-05, "loss": 1.6963, "step": 613 }, { "epoch": 0.06377895502233302, "grad_norm": 0.32713523507118225, "learning_rate": 9.899967506840273e-05, "loss": 1.8211, "step": 614 }, { "epoch": 0.06388282954191336, "grad_norm": 0.34873101115226746, "learning_rate": 9.899642498056353e-05, "loss": 1.8328, "step": 615 }, { "epoch": 0.06398670406149372, "grad_norm": 0.36176908016204834, "learning_rate": 9.899316967498724e-05, "loss": 1.9433, "step": 616 }, { "epoch": 0.06409057858107406, "grad_norm": 0.35845959186553955, "learning_rate": 9.898990915202055e-05, "loss": 1.8383, "step": 617 }, { "epoch": 0.06419445310065441, "grad_norm": 0.3352903425693512, "learning_rate": 9.898664341201067e-05, "loss": 1.8279, "step": 618 }, { "epoch": 0.06429832762023475, "grad_norm": 0.3244091272354126, "learning_rate": 9.898337245530536e-05, "loss": 1.6484, "step": 619 }, { "epoch": 0.06440220213981511, "grad_norm": 0.39862748980522156, "learning_rate": 9.8980096282253e-05, "loss": 1.9679, "step": 620 }, { "epoch": 0.06450607665939545, "grad_norm": 0.34560245275497437, "learning_rate": 9.897681489320242e-05, "loss": 1.8297, "step": 621 }, { "epoch": 0.06460995117897579, "grad_norm": 0.37943777441978455, "learning_rate": 9.897352828850309e-05, "loss": 1.9845, "step": 622 }, { "epoch": 0.06471382569855615, "grad_norm": 0.3497175872325897, "learning_rate": 9.897023646850502e-05, "loss": 1.7576, "step": 623 }, { "epoch": 0.06481770021813649, "grad_norm": 0.3344300091266632, "learning_rate": 9.896693943355874e-05, "loss": 1.7502, "step": 624 }, { "epoch": 0.06492157473771684, "grad_norm": 0.3299304246902466, "learning_rate": 9.896363718401536e-05, "loss": 1.5323, "step": 625 }, { "epoch": 0.06502544925729718, "grad_norm": 0.3126195967197418, "learning_rate": 9.896032972022656e-05, "loss": 1.5328, "step": 626 }, { "epoch": 0.06512932377687754, "grad_norm": 0.33860689401626587, "learning_rate": 9.895701704254455e-05, "loss": 1.7393, "step": 627 }, { "epoch": 0.06523319829645788, "grad_norm": 0.332501083612442, "learning_rate": 9.89536991513221e-05, "loss": 1.6363, "step": 628 }, { "epoch": 0.06533707281603822, "grad_norm": 0.32221221923828125, "learning_rate": 9.895037604691256e-05, "loss": 1.6374, "step": 629 }, { "epoch": 0.06544094733561857, "grad_norm": 0.381583034992218, "learning_rate": 9.894704772966977e-05, "loss": 1.773, "step": 630 }, { "epoch": 0.06554482185519891, "grad_norm": 0.36580127477645874, "learning_rate": 9.894371419994821e-05, "loss": 1.7417, "step": 631 }, { "epoch": 0.06564869637477927, "grad_norm": 0.3486615717411041, "learning_rate": 9.894037545810288e-05, "loss": 1.8542, "step": 632 }, { "epoch": 0.06575257089435961, "grad_norm": 0.3709062337875366, "learning_rate": 9.89370315044893e-05, "loss": 1.917, "step": 633 }, { "epoch": 0.06585644541393997, "grad_norm": 0.37561196088790894, "learning_rate": 9.893368233946359e-05, "loss": 1.9253, "step": 634 }, { "epoch": 0.0659603199335203, "grad_norm": 0.32539495825767517, "learning_rate": 9.89303279633824e-05, "loss": 1.6948, "step": 635 }, { "epoch": 0.06606419445310066, "grad_norm": 0.32918450236320496, "learning_rate": 9.892696837660295e-05, "loss": 1.8504, "step": 636 }, { "epoch": 0.066168068972681, "grad_norm": 0.32979822158813477, "learning_rate": 9.892360357948303e-05, "loss": 1.7728, "step": 637 }, { "epoch": 0.06627194349226134, "grad_norm": 0.39485815167427063, "learning_rate": 9.892023357238094e-05, "loss": 1.8216, "step": 638 }, { "epoch": 0.0663758180118417, "grad_norm": 0.3529796302318573, "learning_rate": 9.891685835565557e-05, "loss": 1.725, "step": 639 }, { "epoch": 0.06647969253142204, "grad_norm": 0.34411367774009705, "learning_rate": 9.891347792966635e-05, "loss": 1.7718, "step": 640 }, { "epoch": 0.06658356705100239, "grad_norm": 0.38490092754364014, "learning_rate": 9.891009229477326e-05, "loss": 1.9705, "step": 641 }, { "epoch": 0.06668744157058273, "grad_norm": 0.3569811284542084, "learning_rate": 9.890670145133687e-05, "loss": 2.0534, "step": 642 }, { "epoch": 0.06679131609016309, "grad_norm": 0.3439379334449768, "learning_rate": 9.890330539971825e-05, "loss": 1.9179, "step": 643 }, { "epoch": 0.06689519060974343, "grad_norm": 0.3299177587032318, "learning_rate": 9.889990414027907e-05, "loss": 1.8077, "step": 644 }, { "epoch": 0.06699906512932377, "grad_norm": 0.3420833647251129, "learning_rate": 9.889649767338153e-05, "loss": 1.6712, "step": 645 }, { "epoch": 0.06710293964890413, "grad_norm": 0.3195805549621582, "learning_rate": 9.88930859993884e-05, "loss": 1.629, "step": 646 }, { "epoch": 0.06720681416848447, "grad_norm": 0.3861030638217926, "learning_rate": 9.888966911866299e-05, "loss": 2.02, "step": 647 }, { "epoch": 0.06731068868806482, "grad_norm": 0.3432799279689789, "learning_rate": 9.888624703156918e-05, "loss": 1.6907, "step": 648 }, { "epoch": 0.06741456320764516, "grad_norm": 0.33698904514312744, "learning_rate": 9.888281973847138e-05, "loss": 1.6967, "step": 649 }, { "epoch": 0.06751843772722552, "grad_norm": 0.3321373462677002, "learning_rate": 9.887938723973458e-05, "loss": 1.7385, "step": 650 }, { "epoch": 0.06762231224680586, "grad_norm": 0.3467056155204773, "learning_rate": 9.887594953572432e-05, "loss": 1.6206, "step": 651 }, { "epoch": 0.06772618676638621, "grad_norm": 0.3018084764480591, "learning_rate": 9.887250662680667e-05, "loss": 1.6185, "step": 652 }, { "epoch": 0.06783006128596655, "grad_norm": 0.37331244349479675, "learning_rate": 9.886905851334829e-05, "loss": 1.8004, "step": 653 }, { "epoch": 0.0679339358055469, "grad_norm": 0.36824825406074524, "learning_rate": 9.886560519571637e-05, "loss": 1.7545, "step": 654 }, { "epoch": 0.06803781032512725, "grad_norm": 0.33622682094573975, "learning_rate": 9.886214667427867e-05, "loss": 1.7991, "step": 655 }, { "epoch": 0.06814168484470759, "grad_norm": 0.3627051115036011, "learning_rate": 9.885868294940349e-05, "loss": 1.7288, "step": 656 }, { "epoch": 0.06824555936428794, "grad_norm": 0.3434629440307617, "learning_rate": 9.885521402145967e-05, "loss": 1.7928, "step": 657 }, { "epoch": 0.06834943388386829, "grad_norm": 0.3145413100719452, "learning_rate": 9.885173989081664e-05, "loss": 1.6569, "step": 658 }, { "epoch": 0.06845330840344864, "grad_norm": 0.3603506088256836, "learning_rate": 9.88482605578444e-05, "loss": 1.9213, "step": 659 }, { "epoch": 0.06855718292302898, "grad_norm": 0.33825626969337463, "learning_rate": 9.884477602291343e-05, "loss": 1.6129, "step": 660 }, { "epoch": 0.06866105744260932, "grad_norm": 0.35486510396003723, "learning_rate": 9.88412862863948e-05, "loss": 1.8986, "step": 661 }, { "epoch": 0.06876493196218968, "grad_norm": 0.3892877399921417, "learning_rate": 9.883779134866016e-05, "loss": 2.0618, "step": 662 }, { "epoch": 0.06886880648177002, "grad_norm": 0.3327191472053528, "learning_rate": 9.883429121008169e-05, "loss": 1.6847, "step": 663 }, { "epoch": 0.06897268100135037, "grad_norm": 0.32258662581443787, "learning_rate": 9.883078587103215e-05, "loss": 1.7325, "step": 664 }, { "epoch": 0.06907655552093071, "grad_norm": 0.3689654767513275, "learning_rate": 9.882727533188481e-05, "loss": 2.0511, "step": 665 }, { "epoch": 0.06918043004051107, "grad_norm": 0.3166467547416687, "learning_rate": 9.882375959301349e-05, "loss": 1.7457, "step": 666 }, { "epoch": 0.06928430456009141, "grad_norm": 0.3648569583892822, "learning_rate": 9.882023865479263e-05, "loss": 1.9451, "step": 667 }, { "epoch": 0.06938817907967175, "grad_norm": 0.35240545868873596, "learning_rate": 9.881671251759717e-05, "loss": 1.9018, "step": 668 }, { "epoch": 0.0694920535992521, "grad_norm": 0.3445391356945038, "learning_rate": 9.881318118180258e-05, "loss": 1.9383, "step": 669 }, { "epoch": 0.06959592811883245, "grad_norm": 0.33014625310897827, "learning_rate": 9.880964464778499e-05, "loss": 1.6511, "step": 670 }, { "epoch": 0.0696998026384128, "grad_norm": 0.3588254451751709, "learning_rate": 9.880610291592096e-05, "loss": 1.7165, "step": 671 }, { "epoch": 0.06980367715799314, "grad_norm": 0.3672218322753906, "learning_rate": 9.880255598658767e-05, "loss": 1.6612, "step": 672 }, { "epoch": 0.0699075516775735, "grad_norm": 0.3593609035015106, "learning_rate": 9.879900386016284e-05, "loss": 1.8946, "step": 673 }, { "epoch": 0.07001142619715384, "grad_norm": 0.45219045877456665, "learning_rate": 9.879544653702476e-05, "loss": 1.8398, "step": 674 }, { "epoch": 0.07011530071673419, "grad_norm": 0.3960374891757965, "learning_rate": 9.879188401755222e-05, "loss": 1.928, "step": 675 }, { "epoch": 0.07021917523631453, "grad_norm": 0.38661623001098633, "learning_rate": 9.878831630212465e-05, "loss": 1.9605, "step": 676 }, { "epoch": 0.07032304975589487, "grad_norm": 0.34862831234931946, "learning_rate": 9.878474339112193e-05, "loss": 1.7933, "step": 677 }, { "epoch": 0.07042692427547523, "grad_norm": 0.3596080243587494, "learning_rate": 9.878116528492458e-05, "loss": 1.7487, "step": 678 }, { "epoch": 0.07053079879505557, "grad_norm": 0.3401545584201813, "learning_rate": 9.877758198391365e-05, "loss": 1.8613, "step": 679 }, { "epoch": 0.07063467331463592, "grad_norm": 0.3352885842323303, "learning_rate": 9.87739934884707e-05, "loss": 1.6381, "step": 680 }, { "epoch": 0.07073854783421626, "grad_norm": 0.36678406596183777, "learning_rate": 9.87703997989779e-05, "loss": 1.944, "step": 681 }, { "epoch": 0.07084242235379662, "grad_norm": 0.3366321623325348, "learning_rate": 9.876680091581796e-05, "loss": 1.7308, "step": 682 }, { "epoch": 0.07094629687337696, "grad_norm": 0.37314730882644653, "learning_rate": 9.876319683937412e-05, "loss": 1.8055, "step": 683 }, { "epoch": 0.0710501713929573, "grad_norm": 0.3612748384475708, "learning_rate": 9.875958757003017e-05, "loss": 1.7043, "step": 684 }, { "epoch": 0.07115404591253766, "grad_norm": 0.37414270639419556, "learning_rate": 9.875597310817049e-05, "loss": 1.8259, "step": 685 }, { "epoch": 0.071257920432118, "grad_norm": 0.3539772033691406, "learning_rate": 9.875235345417997e-05, "loss": 1.7541, "step": 686 }, { "epoch": 0.07136179495169835, "grad_norm": 0.35429245233535767, "learning_rate": 9.874872860844411e-05, "loss": 1.5966, "step": 687 }, { "epoch": 0.07146566947127869, "grad_norm": 0.32633036375045776, "learning_rate": 9.87450985713489e-05, "loss": 1.7182, "step": 688 }, { "epoch": 0.07156954399085905, "grad_norm": 0.316303014755249, "learning_rate": 9.874146334328092e-05, "loss": 1.553, "step": 689 }, { "epoch": 0.07167341851043939, "grad_norm": 0.39488279819488525, "learning_rate": 9.873782292462727e-05, "loss": 1.9359, "step": 690 }, { "epoch": 0.07177729303001974, "grad_norm": 0.3820980489253998, "learning_rate": 9.873417731577568e-05, "loss": 1.9939, "step": 691 }, { "epoch": 0.07188116754960008, "grad_norm": 0.4182794392108917, "learning_rate": 9.873052651711434e-05, "loss": 1.8192, "step": 692 }, { "epoch": 0.07198504206918042, "grad_norm": 0.3251241147518158, "learning_rate": 9.872687052903203e-05, "loss": 1.6569, "step": 693 }, { "epoch": 0.07208891658876078, "grad_norm": 0.33113572001457214, "learning_rate": 9.87232093519181e-05, "loss": 1.743, "step": 694 }, { "epoch": 0.07219279110834112, "grad_norm": 0.3508453369140625, "learning_rate": 9.871954298616242e-05, "loss": 1.7822, "step": 695 }, { "epoch": 0.07229666562792147, "grad_norm": 0.36456069350242615, "learning_rate": 9.871587143215545e-05, "loss": 1.7646, "step": 696 }, { "epoch": 0.07240054014750182, "grad_norm": 0.3450940251350403, "learning_rate": 9.871219469028816e-05, "loss": 1.798, "step": 697 }, { "epoch": 0.07250441466708217, "grad_norm": 0.3370673358440399, "learning_rate": 9.870851276095212e-05, "loss": 1.757, "step": 698 }, { "epoch": 0.07260828918666251, "grad_norm": 0.34746289253234863, "learning_rate": 9.87048256445394e-05, "loss": 1.6328, "step": 699 }, { "epoch": 0.07271216370624285, "grad_norm": 0.3589167296886444, "learning_rate": 9.870113334144267e-05, "loss": 1.7512, "step": 700 }, { "epoch": 0.07281603822582321, "grad_norm": 0.4009071886539459, "learning_rate": 9.869743585205511e-05, "loss": 1.8032, "step": 701 }, { "epoch": 0.07291991274540355, "grad_norm": 0.38780277967453003, "learning_rate": 9.869373317677049e-05, "loss": 1.8526, "step": 702 }, { "epoch": 0.0730237872649839, "grad_norm": 0.37040579319000244, "learning_rate": 9.869002531598312e-05, "loss": 1.8994, "step": 703 }, { "epoch": 0.07312766178456424, "grad_norm": 0.3585062325000763, "learning_rate": 9.868631227008785e-05, "loss": 1.853, "step": 704 }, { "epoch": 0.0732315363041446, "grad_norm": 0.3617033362388611, "learning_rate": 9.868259403948008e-05, "loss": 1.9527, "step": 705 }, { "epoch": 0.07333541082372494, "grad_norm": 0.35170087218284607, "learning_rate": 9.867887062455579e-05, "loss": 1.7766, "step": 706 }, { "epoch": 0.0734392853433053, "grad_norm": 0.34852275252342224, "learning_rate": 9.867514202571149e-05, "loss": 1.5639, "step": 707 }, { "epoch": 0.07354315986288563, "grad_norm": 0.3750884532928467, "learning_rate": 9.867140824334424e-05, "loss": 1.9035, "step": 708 }, { "epoch": 0.07364703438246598, "grad_norm": 0.37743544578552246, "learning_rate": 9.866766927785167e-05, "loss": 1.8442, "step": 709 }, { "epoch": 0.07375090890204633, "grad_norm": 0.3599265217781067, "learning_rate": 9.866392512963195e-05, "loss": 1.6519, "step": 710 }, { "epoch": 0.07385478342162667, "grad_norm": 0.35439547896385193, "learning_rate": 9.86601757990838e-05, "loss": 1.8225, "step": 711 }, { "epoch": 0.07395865794120703, "grad_norm": 0.35483318567276, "learning_rate": 9.865642128660647e-05, "loss": 1.778, "step": 712 }, { "epoch": 0.07406253246078737, "grad_norm": 0.39381521940231323, "learning_rate": 9.865266159259981e-05, "loss": 1.9915, "step": 713 }, { "epoch": 0.07416640698036772, "grad_norm": 0.4099736213684082, "learning_rate": 9.864889671746421e-05, "loss": 2.0117, "step": 714 }, { "epoch": 0.07427028149994806, "grad_norm": 0.3895813822746277, "learning_rate": 9.864512666160058e-05, "loss": 2.2284, "step": 715 }, { "epoch": 0.0743741560195284, "grad_norm": 0.3480944037437439, "learning_rate": 9.86413514254104e-05, "loss": 1.7284, "step": 716 }, { "epoch": 0.07447803053910876, "grad_norm": 0.3395494818687439, "learning_rate": 9.863757100929573e-05, "loss": 1.7128, "step": 717 }, { "epoch": 0.0745819050586891, "grad_norm": 0.4045158624649048, "learning_rate": 9.863378541365912e-05, "loss": 1.8116, "step": 718 }, { "epoch": 0.07468577957826945, "grad_norm": 0.34248921275138855, "learning_rate": 9.862999463890372e-05, "loss": 1.9717, "step": 719 }, { "epoch": 0.0747896540978498, "grad_norm": 0.33910906314849854, "learning_rate": 9.862619868543322e-05, "loss": 1.886, "step": 720 }, { "epoch": 0.07489352861743015, "grad_norm": 0.33199310302734375, "learning_rate": 9.862239755365186e-05, "loss": 1.7202, "step": 721 }, { "epoch": 0.07499740313701049, "grad_norm": 0.39172807335853577, "learning_rate": 9.861859124396444e-05, "loss": 1.9074, "step": 722 }, { "epoch": 0.07510127765659085, "grad_norm": 0.38816559314727783, "learning_rate": 9.86147797567763e-05, "loss": 1.963, "step": 723 }, { "epoch": 0.07520515217617119, "grad_norm": 0.39771151542663574, "learning_rate": 9.86109630924933e-05, "loss": 2.119, "step": 724 }, { "epoch": 0.07530902669575153, "grad_norm": 0.34102803468704224, "learning_rate": 9.860714125152191e-05, "loss": 1.8536, "step": 725 }, { "epoch": 0.07541290121533188, "grad_norm": 0.35469919443130493, "learning_rate": 9.860331423426914e-05, "loss": 1.7498, "step": 726 }, { "epoch": 0.07551677573491222, "grad_norm": 0.3550052046775818, "learning_rate": 9.859948204114253e-05, "loss": 1.9569, "step": 727 }, { "epoch": 0.07562065025449258, "grad_norm": 0.3915771245956421, "learning_rate": 9.859564467255015e-05, "loss": 2.0232, "step": 728 }, { "epoch": 0.07572452477407292, "grad_norm": 0.326684832572937, "learning_rate": 9.859180212890069e-05, "loss": 1.6597, "step": 729 }, { "epoch": 0.07582839929365327, "grad_norm": 0.33311307430267334, "learning_rate": 9.858795441060333e-05, "loss": 1.8534, "step": 730 }, { "epoch": 0.07593227381323361, "grad_norm": 0.3527379631996155, "learning_rate": 9.858410151806783e-05, "loss": 1.8047, "step": 731 }, { "epoch": 0.07603614833281395, "grad_norm": 0.3848365545272827, "learning_rate": 9.85802434517045e-05, "loss": 1.7222, "step": 732 }, { "epoch": 0.07614002285239431, "grad_norm": 0.3726034462451935, "learning_rate": 9.857638021192417e-05, "loss": 1.9158, "step": 733 }, { "epoch": 0.07624389737197465, "grad_norm": 0.4738229513168335, "learning_rate": 9.857251179913824e-05, "loss": 2.1136, "step": 734 }, { "epoch": 0.076347771891555, "grad_norm": 0.3687533438205719, "learning_rate": 9.856863821375872e-05, "loss": 1.802, "step": 735 }, { "epoch": 0.07645164641113535, "grad_norm": 0.35550445318222046, "learning_rate": 9.856475945619806e-05, "loss": 1.8454, "step": 736 }, { "epoch": 0.0765555209307157, "grad_norm": 0.3472285866737366, "learning_rate": 9.856087552686932e-05, "loss": 1.7785, "step": 737 }, { "epoch": 0.07665939545029604, "grad_norm": 0.3866637051105499, "learning_rate": 9.855698642618616e-05, "loss": 1.9353, "step": 738 }, { "epoch": 0.07676326996987638, "grad_norm": 0.3777254819869995, "learning_rate": 9.855309215456268e-05, "loss": 1.9781, "step": 739 }, { "epoch": 0.07686714448945674, "grad_norm": 0.413335919380188, "learning_rate": 9.854919271241363e-05, "loss": 1.917, "step": 740 }, { "epoch": 0.07697101900903708, "grad_norm": 0.3310924470424652, "learning_rate": 9.854528810015425e-05, "loss": 1.6168, "step": 741 }, { "epoch": 0.07707489352861743, "grad_norm": 0.34101495146751404, "learning_rate": 9.854137831820035e-05, "loss": 1.7935, "step": 742 }, { "epoch": 0.07717876804819777, "grad_norm": 0.34622523188591003, "learning_rate": 9.85374633669683e-05, "loss": 1.7695, "step": 743 }, { "epoch": 0.07728264256777813, "grad_norm": 0.39640340209007263, "learning_rate": 9.8533543246875e-05, "loss": 1.8163, "step": 744 }, { "epoch": 0.07738651708735847, "grad_norm": 0.35431715846061707, "learning_rate": 9.852961795833793e-05, "loss": 1.7706, "step": 745 }, { "epoch": 0.07749039160693882, "grad_norm": 0.37833094596862793, "learning_rate": 9.852568750177508e-05, "loss": 1.6099, "step": 746 }, { "epoch": 0.07759426612651917, "grad_norm": 0.3214152753353119, "learning_rate": 9.852175187760504e-05, "loss": 1.5333, "step": 747 }, { "epoch": 0.0776981406460995, "grad_norm": 0.3399621546268463, "learning_rate": 9.851781108624692e-05, "loss": 1.7676, "step": 748 }, { "epoch": 0.07780201516567986, "grad_norm": 0.3282534182071686, "learning_rate": 9.851386512812036e-05, "loss": 1.8172, "step": 749 }, { "epoch": 0.0779058896852602, "grad_norm": 0.37723109126091003, "learning_rate": 9.850991400364557e-05, "loss": 1.8644, "step": 750 }, { "epoch": 0.07800976420484056, "grad_norm": 0.36292698979377747, "learning_rate": 9.850595771324335e-05, "loss": 1.8879, "step": 751 }, { "epoch": 0.0781136387244209, "grad_norm": 0.3753519654273987, "learning_rate": 9.850199625733501e-05, "loss": 1.9628, "step": 752 }, { "epoch": 0.07821751324400125, "grad_norm": 0.33497151732444763, "learning_rate": 9.849802963634238e-05, "loss": 1.8383, "step": 753 }, { "epoch": 0.07832138776358159, "grad_norm": 0.3661734163761139, "learning_rate": 9.849405785068789e-05, "loss": 1.903, "step": 754 }, { "epoch": 0.07842526228316193, "grad_norm": 0.33177104592323303, "learning_rate": 9.84900809007945e-05, "loss": 1.7703, "step": 755 }, { "epoch": 0.07852913680274229, "grad_norm": 0.41701647639274597, "learning_rate": 9.848609878708575e-05, "loss": 2.017, "step": 756 }, { "epoch": 0.07863301132232263, "grad_norm": 0.3774707317352295, "learning_rate": 9.848211150998568e-05, "loss": 1.8528, "step": 757 }, { "epoch": 0.07873688584190298, "grad_norm": 0.3493809700012207, "learning_rate": 9.84781190699189e-05, "loss": 1.8787, "step": 758 }, { "epoch": 0.07884076036148333, "grad_norm": 0.4020559787750244, "learning_rate": 9.847412146731058e-05, "loss": 2.0583, "step": 759 }, { "epoch": 0.07894463488106368, "grad_norm": 0.37172991037368774, "learning_rate": 9.847011870258644e-05, "loss": 1.9294, "step": 760 }, { "epoch": 0.07904850940064402, "grad_norm": 0.3182731568813324, "learning_rate": 9.846611077617274e-05, "loss": 1.7817, "step": 761 }, { "epoch": 0.07915238392022438, "grad_norm": 0.4014347791671753, "learning_rate": 9.84620976884963e-05, "loss": 1.8936, "step": 762 }, { "epoch": 0.07925625843980472, "grad_norm": 0.317242294549942, "learning_rate": 9.845807943998448e-05, "loss": 1.6904, "step": 763 }, { "epoch": 0.07936013295938506, "grad_norm": 0.3695957064628601, "learning_rate": 9.845405603106518e-05, "loss": 1.9056, "step": 764 }, { "epoch": 0.07946400747896541, "grad_norm": 0.3249827027320862, "learning_rate": 9.845002746216687e-05, "loss": 1.8097, "step": 765 }, { "epoch": 0.07956788199854575, "grad_norm": 0.3424146771430969, "learning_rate": 9.844599373371855e-05, "loss": 1.6326, "step": 766 }, { "epoch": 0.07967175651812611, "grad_norm": 0.35552307963371277, "learning_rate": 9.84419548461498e-05, "loss": 1.9049, "step": 767 }, { "epoch": 0.07977563103770645, "grad_norm": 0.3346835970878601, "learning_rate": 9.843791079989071e-05, "loss": 1.7215, "step": 768 }, { "epoch": 0.0798795055572868, "grad_norm": 0.339160680770874, "learning_rate": 9.843386159537196e-05, "loss": 1.6931, "step": 769 }, { "epoch": 0.07998338007686714, "grad_norm": 0.3619834780693054, "learning_rate": 9.842980723302476e-05, "loss": 1.7751, "step": 770 }, { "epoch": 0.08008725459644749, "grad_norm": 0.37088942527770996, "learning_rate": 9.842574771328084e-05, "loss": 1.7259, "step": 771 }, { "epoch": 0.08019112911602784, "grad_norm": 0.3355197012424469, "learning_rate": 9.842168303657253e-05, "loss": 1.8242, "step": 772 }, { "epoch": 0.08029500363560818, "grad_norm": 0.37746527791023254, "learning_rate": 9.841761320333266e-05, "loss": 1.7493, "step": 773 }, { "epoch": 0.08039887815518854, "grad_norm": 0.3684033751487732, "learning_rate": 9.841353821399469e-05, "loss": 1.7955, "step": 774 }, { "epoch": 0.08050275267476888, "grad_norm": 0.3551725447177887, "learning_rate": 9.840945806899253e-05, "loss": 1.9765, "step": 775 }, { "epoch": 0.08060662719434923, "grad_norm": 0.32390061020851135, "learning_rate": 9.840537276876069e-05, "loss": 1.6581, "step": 776 }, { "epoch": 0.08071050171392957, "grad_norm": 0.3525629937648773, "learning_rate": 9.840128231373422e-05, "loss": 1.8436, "step": 777 }, { "epoch": 0.08081437623350993, "grad_norm": 0.3583666682243347, "learning_rate": 9.839718670434873e-05, "loss": 1.8948, "step": 778 }, { "epoch": 0.08091825075309027, "grad_norm": 0.3378392457962036, "learning_rate": 9.839308594104038e-05, "loss": 1.7955, "step": 779 }, { "epoch": 0.08102212527267061, "grad_norm": 0.3844630718231201, "learning_rate": 9.838898002424585e-05, "loss": 1.987, "step": 780 }, { "epoch": 0.08112599979225096, "grad_norm": 0.41022759675979614, "learning_rate": 9.83848689544024e-05, "loss": 1.959, "step": 781 }, { "epoch": 0.0812298743118313, "grad_norm": 0.3520353436470032, "learning_rate": 9.83807527319478e-05, "loss": 1.8185, "step": 782 }, { "epoch": 0.08133374883141166, "grad_norm": 0.36484450101852417, "learning_rate": 9.837663135732045e-05, "loss": 1.8079, "step": 783 }, { "epoch": 0.081437623350992, "grad_norm": 0.34820452332496643, "learning_rate": 9.83725048309592e-05, "loss": 1.7167, "step": 784 }, { "epoch": 0.08154149787057235, "grad_norm": 0.35253769159317017, "learning_rate": 9.83683731533035e-05, "loss": 1.8118, "step": 785 }, { "epoch": 0.0816453723901527, "grad_norm": 0.3355175256729126, "learning_rate": 9.836423632479335e-05, "loss": 1.7767, "step": 786 }, { "epoch": 0.08174924690973304, "grad_norm": 0.3633790612220764, "learning_rate": 9.836009434586929e-05, "loss": 1.9734, "step": 787 }, { "epoch": 0.08185312142931339, "grad_norm": 0.36171820759773254, "learning_rate": 9.835594721697241e-05, "loss": 1.8049, "step": 788 }, { "epoch": 0.08195699594889373, "grad_norm": 0.329885333776474, "learning_rate": 9.835179493854433e-05, "loss": 1.8358, "step": 789 }, { "epoch": 0.08206087046847409, "grad_norm": 0.33959150314331055, "learning_rate": 9.834763751102726e-05, "loss": 1.6816, "step": 790 }, { "epoch": 0.08216474498805443, "grad_norm": 0.3971327245235443, "learning_rate": 9.834347493486391e-05, "loss": 1.8448, "step": 791 }, { "epoch": 0.08226861950763478, "grad_norm": 0.3603891134262085, "learning_rate": 9.833930721049758e-05, "loss": 1.9219, "step": 792 }, { "epoch": 0.08237249402721512, "grad_norm": 0.3858663737773895, "learning_rate": 9.833513433837209e-05, "loss": 2.0203, "step": 793 }, { "epoch": 0.08247636854679546, "grad_norm": 0.36965909600257874, "learning_rate": 9.833095631893183e-05, "loss": 1.5018, "step": 794 }, { "epoch": 0.08258024306637582, "grad_norm": 0.3826972544193268, "learning_rate": 9.832677315262171e-05, "loss": 1.9229, "step": 795 }, { "epoch": 0.08268411758595616, "grad_norm": 0.3521627187728882, "learning_rate": 9.832258483988721e-05, "loss": 1.8676, "step": 796 }, { "epoch": 0.08278799210553651, "grad_norm": 0.38269051909446716, "learning_rate": 9.831839138117435e-05, "loss": 1.8478, "step": 797 }, { "epoch": 0.08289186662511686, "grad_norm": 0.33963099122047424, "learning_rate": 9.831419277692972e-05, "loss": 1.7087, "step": 798 }, { "epoch": 0.08299574114469721, "grad_norm": 0.3614671528339386, "learning_rate": 9.830998902760043e-05, "loss": 1.8661, "step": 799 }, { "epoch": 0.08309961566427755, "grad_norm": 0.3493926227092743, "learning_rate": 9.830578013363411e-05, "loss": 1.8653, "step": 800 }, { "epoch": 0.0832034901838579, "grad_norm": 0.3648037314414978, "learning_rate": 9.830156609547905e-05, "loss": 1.8252, "step": 801 }, { "epoch": 0.08330736470343825, "grad_norm": 0.35134628415107727, "learning_rate": 9.829734691358393e-05, "loss": 1.8055, "step": 802 }, { "epoch": 0.08341123922301859, "grad_norm": 0.3299328088760376, "learning_rate": 9.829312258839811e-05, "loss": 1.7022, "step": 803 }, { "epoch": 0.08351511374259894, "grad_norm": 0.39070454239845276, "learning_rate": 9.828889312037143e-05, "loss": 2.0613, "step": 804 }, { "epoch": 0.08361898826217928, "grad_norm": 0.3565296232700348, "learning_rate": 9.828465850995433e-05, "loss": 1.973, "step": 805 }, { "epoch": 0.08372286278175964, "grad_norm": 0.35673460364341736, "learning_rate": 9.82804187575977e-05, "loss": 1.8557, "step": 806 }, { "epoch": 0.08382673730133998, "grad_norm": 0.34928351640701294, "learning_rate": 9.827617386375307e-05, "loss": 1.9153, "step": 807 }, { "epoch": 0.08393061182092033, "grad_norm": 0.35845404863357544, "learning_rate": 9.827192382887251e-05, "loss": 1.8209, "step": 808 }, { "epoch": 0.08403448634050067, "grad_norm": 0.3234979510307312, "learning_rate": 9.82676686534086e-05, "loss": 1.6632, "step": 809 }, { "epoch": 0.08413836086008102, "grad_norm": 0.3533652722835541, "learning_rate": 9.826340833781447e-05, "loss": 1.7217, "step": 810 }, { "epoch": 0.08424223537966137, "grad_norm": 0.3984024226665497, "learning_rate": 9.825914288254383e-05, "loss": 1.9889, "step": 811 }, { "epoch": 0.08434610989924171, "grad_norm": 0.3283498287200928, "learning_rate": 9.82548722880509e-05, "loss": 1.7551, "step": 812 }, { "epoch": 0.08444998441882207, "grad_norm": 0.3530416190624237, "learning_rate": 9.825059655479047e-05, "loss": 1.5998, "step": 813 }, { "epoch": 0.08455385893840241, "grad_norm": 0.377096951007843, "learning_rate": 9.82463156832179e-05, "loss": 1.93, "step": 814 }, { "epoch": 0.08465773345798276, "grad_norm": 0.3375512361526489, "learning_rate": 9.824202967378903e-05, "loss": 1.7851, "step": 815 }, { "epoch": 0.0847616079775631, "grad_norm": 0.3516164720058441, "learning_rate": 9.82377385269603e-05, "loss": 1.7856, "step": 816 }, { "epoch": 0.08486548249714346, "grad_norm": 0.34516972303390503, "learning_rate": 9.823344224318869e-05, "loss": 1.7032, "step": 817 }, { "epoch": 0.0849693570167238, "grad_norm": 0.33883360028266907, "learning_rate": 9.822914082293171e-05, "loss": 1.8525, "step": 818 }, { "epoch": 0.08507323153630414, "grad_norm": 0.3980461657047272, "learning_rate": 9.822483426664744e-05, "loss": 1.8626, "step": 819 }, { "epoch": 0.0851771060558845, "grad_norm": 0.36254748702049255, "learning_rate": 9.822052257479448e-05, "loss": 1.7556, "step": 820 }, { "epoch": 0.08528098057546483, "grad_norm": 0.33397209644317627, "learning_rate": 9.821620574783202e-05, "loss": 1.713, "step": 821 }, { "epoch": 0.08538485509504519, "grad_norm": 0.3705761432647705, "learning_rate": 9.821188378621973e-05, "loss": 2.0036, "step": 822 }, { "epoch": 0.08548872961462553, "grad_norm": 0.40211713314056396, "learning_rate": 9.820755669041787e-05, "loss": 1.8456, "step": 823 }, { "epoch": 0.08559260413420589, "grad_norm": 0.3149651288986206, "learning_rate": 9.820322446088729e-05, "loss": 1.6277, "step": 824 }, { "epoch": 0.08569647865378623, "grad_norm": 0.36002975702285767, "learning_rate": 9.819888709808929e-05, "loss": 1.8758, "step": 825 }, { "epoch": 0.08580035317336657, "grad_norm": 0.40512141585350037, "learning_rate": 9.819454460248578e-05, "loss": 1.8693, "step": 826 }, { "epoch": 0.08590422769294692, "grad_norm": 0.33460405468940735, "learning_rate": 9.81901969745392e-05, "loss": 1.8883, "step": 827 }, { "epoch": 0.08600810221252726, "grad_norm": 0.3666040301322937, "learning_rate": 9.818584421471254e-05, "loss": 1.7379, "step": 828 }, { "epoch": 0.08611197673210762, "grad_norm": 0.3242300748825073, "learning_rate": 9.818148632346934e-05, "loss": 1.67, "step": 829 }, { "epoch": 0.08621585125168796, "grad_norm": 0.37964361906051636, "learning_rate": 9.817712330127367e-05, "loss": 1.9572, "step": 830 }, { "epoch": 0.08631972577126831, "grad_norm": 0.3320227265357971, "learning_rate": 9.817275514859017e-05, "loss": 1.7383, "step": 831 }, { "epoch": 0.08642360029084865, "grad_norm": 0.3565314710140228, "learning_rate": 9.8168381865884e-05, "loss": 1.9483, "step": 832 }, { "epoch": 0.08652747481042901, "grad_norm": 0.35897499322891235, "learning_rate": 9.81640034536209e-05, "loss": 1.7132, "step": 833 }, { "epoch": 0.08663134933000935, "grad_norm": 0.37124866247177124, "learning_rate": 9.815961991226711e-05, "loss": 2.0216, "step": 834 }, { "epoch": 0.08673522384958969, "grad_norm": 0.36309394240379333, "learning_rate": 9.815523124228949e-05, "loss": 1.7324, "step": 835 }, { "epoch": 0.08683909836917005, "grad_norm": 0.35258007049560547, "learning_rate": 9.815083744415534e-05, "loss": 1.7634, "step": 836 }, { "epoch": 0.08694297288875039, "grad_norm": 0.335984468460083, "learning_rate": 9.81464385183326e-05, "loss": 1.4646, "step": 837 }, { "epoch": 0.08704684740833074, "grad_norm": 0.31050655245780945, "learning_rate": 9.814203446528971e-05, "loss": 1.3127, "step": 838 }, { "epoch": 0.08715072192791108, "grad_norm": 0.3857136368751526, "learning_rate": 9.813762528549568e-05, "loss": 1.9647, "step": 839 }, { "epoch": 0.08725459644749144, "grad_norm": 0.35393643379211426, "learning_rate": 9.813321097942005e-05, "loss": 1.7981, "step": 840 }, { "epoch": 0.08735847096707178, "grad_norm": 0.3497065305709839, "learning_rate": 9.812879154753289e-05, "loss": 1.8632, "step": 841 }, { "epoch": 0.08746234548665212, "grad_norm": 0.34144020080566406, "learning_rate": 9.812436699030486e-05, "loss": 1.653, "step": 842 }, { "epoch": 0.08756622000623247, "grad_norm": 0.375434935092926, "learning_rate": 9.811993730820712e-05, "loss": 1.798, "step": 843 }, { "epoch": 0.08767009452581281, "grad_norm": 0.33210819959640503, "learning_rate": 9.811550250171142e-05, "loss": 1.7409, "step": 844 }, { "epoch": 0.08777396904539317, "grad_norm": 0.3796077072620392, "learning_rate": 9.811106257129001e-05, "loss": 1.8968, "step": 845 }, { "epoch": 0.08787784356497351, "grad_norm": 0.3507767915725708, "learning_rate": 9.810661751741573e-05, "loss": 1.7981, "step": 846 }, { "epoch": 0.08798171808455386, "grad_norm": 0.3411443829536438, "learning_rate": 9.810216734056192e-05, "loss": 1.5857, "step": 847 }, { "epoch": 0.0880855926041342, "grad_norm": 0.3522975444793701, "learning_rate": 9.809771204120248e-05, "loss": 1.7766, "step": 848 }, { "epoch": 0.08818946712371455, "grad_norm": 0.35262516140937805, "learning_rate": 9.809325161981191e-05, "loss": 1.6296, "step": 849 }, { "epoch": 0.0882933416432949, "grad_norm": 0.36089739203453064, "learning_rate": 9.808878607686518e-05, "loss": 1.8433, "step": 850 }, { "epoch": 0.08839721616287524, "grad_norm": 0.3271440863609314, "learning_rate": 9.808431541283783e-05, "loss": 1.8521, "step": 851 }, { "epoch": 0.0885010906824556, "grad_norm": 0.39304596185684204, "learning_rate": 9.807983962820597e-05, "loss": 2.0822, "step": 852 }, { "epoch": 0.08860496520203594, "grad_norm": 0.3730714023113251, "learning_rate": 9.807535872344622e-05, "loss": 1.8393, "step": 853 }, { "epoch": 0.08870883972161629, "grad_norm": 0.37659719586372375, "learning_rate": 9.807087269903577e-05, "loss": 1.921, "step": 854 }, { "epoch": 0.08881271424119663, "grad_norm": 0.3870750367641449, "learning_rate": 9.806638155545236e-05, "loss": 2.0497, "step": 855 }, { "epoch": 0.08891658876077699, "grad_norm": 0.3502962291240692, "learning_rate": 9.806188529317423e-05, "loss": 1.849, "step": 856 }, { "epoch": 0.08902046328035733, "grad_norm": 0.3573035001754761, "learning_rate": 9.805738391268021e-05, "loss": 1.6662, "step": 857 }, { "epoch": 0.08912433779993767, "grad_norm": 0.46759751439094543, "learning_rate": 9.805287741444968e-05, "loss": 1.5148, "step": 858 }, { "epoch": 0.08922821231951802, "grad_norm": 0.3513263463973999, "learning_rate": 9.804836579896253e-05, "loss": 1.9624, "step": 859 }, { "epoch": 0.08933208683909837, "grad_norm": 0.3362573981285095, "learning_rate": 9.80438490666992e-05, "loss": 1.7757, "step": 860 }, { "epoch": 0.08943596135867872, "grad_norm": 0.3206601142883301, "learning_rate": 9.803932721814071e-05, "loss": 1.692, "step": 861 }, { "epoch": 0.08953983587825906, "grad_norm": 0.3470083773136139, "learning_rate": 9.803480025376858e-05, "loss": 1.8445, "step": 862 }, { "epoch": 0.08964371039783942, "grad_norm": 0.3480392098426819, "learning_rate": 9.803026817406493e-05, "loss": 1.8338, "step": 863 }, { "epoch": 0.08974758491741976, "grad_norm": 0.3229011297225952, "learning_rate": 9.802573097951235e-05, "loss": 1.7572, "step": 864 }, { "epoch": 0.0898514594370001, "grad_norm": 0.3733616769313812, "learning_rate": 9.802118867059407e-05, "loss": 1.8358, "step": 865 }, { "epoch": 0.08995533395658045, "grad_norm": 0.35253623127937317, "learning_rate": 9.801664124779373e-05, "loss": 1.8347, "step": 866 }, { "epoch": 0.09005920847616079, "grad_norm": 0.3676695227622986, "learning_rate": 9.801208871159567e-05, "loss": 1.8158, "step": 867 }, { "epoch": 0.09016308299574115, "grad_norm": 0.3626701235771179, "learning_rate": 9.800753106248468e-05, "loss": 1.9947, "step": 868 }, { "epoch": 0.09026695751532149, "grad_norm": 0.3680635392665863, "learning_rate": 9.800296830094609e-05, "loss": 1.7856, "step": 869 }, { "epoch": 0.09037083203490184, "grad_norm": 0.34749433398246765, "learning_rate": 9.79984004274658e-05, "loss": 1.7665, "step": 870 }, { "epoch": 0.09047470655448218, "grad_norm": 0.3646162152290344, "learning_rate": 9.799382744253029e-05, "loss": 1.715, "step": 871 }, { "epoch": 0.09057858107406254, "grad_norm": 0.37328121066093445, "learning_rate": 9.79892493466265e-05, "loss": 1.7945, "step": 872 }, { "epoch": 0.09068245559364288, "grad_norm": 0.35278576612472534, "learning_rate": 9.7984666140242e-05, "loss": 1.7249, "step": 873 }, { "epoch": 0.09078633011322322, "grad_norm": 0.33178964257240295, "learning_rate": 9.798007782386485e-05, "loss": 1.6934, "step": 874 }, { "epoch": 0.09089020463280358, "grad_norm": 0.3599662780761719, "learning_rate": 9.797548439798368e-05, "loss": 1.9192, "step": 875 }, { "epoch": 0.09099407915238392, "grad_norm": 0.37787213921546936, "learning_rate": 9.797088586308762e-05, "loss": 1.9863, "step": 876 }, { "epoch": 0.09109795367196427, "grad_norm": 0.31273946166038513, "learning_rate": 9.796628221966642e-05, "loss": 1.6311, "step": 877 }, { "epoch": 0.09120182819154461, "grad_norm": 0.33196964859962463, "learning_rate": 9.796167346821032e-05, "loss": 1.6965, "step": 878 }, { "epoch": 0.09130570271112497, "grad_norm": 0.3330535590648651, "learning_rate": 9.79570596092101e-05, "loss": 1.745, "step": 879 }, { "epoch": 0.09140957723070531, "grad_norm": 0.3526756465435028, "learning_rate": 9.795244064315712e-05, "loss": 1.886, "step": 880 }, { "epoch": 0.09151345175028565, "grad_norm": 0.3859866261482239, "learning_rate": 9.794781657054324e-05, "loss": 1.8935, "step": 881 }, { "epoch": 0.091617326269866, "grad_norm": 0.3710609972476959, "learning_rate": 9.794318739186092e-05, "loss": 1.9279, "step": 882 }, { "epoch": 0.09172120078944634, "grad_norm": 0.37800589203834534, "learning_rate": 9.79385531076031e-05, "loss": 1.9028, "step": 883 }, { "epoch": 0.0918250753090267, "grad_norm": 0.36296647787094116, "learning_rate": 9.793391371826332e-05, "loss": 1.9369, "step": 884 }, { "epoch": 0.09192894982860704, "grad_norm": 0.3620465397834778, "learning_rate": 9.792926922433563e-05, "loss": 1.9072, "step": 885 }, { "epoch": 0.0920328243481874, "grad_norm": 0.35136178135871887, "learning_rate": 9.792461962631462e-05, "loss": 1.6753, "step": 886 }, { "epoch": 0.09213669886776774, "grad_norm": 0.33308306336402893, "learning_rate": 9.791996492469544e-05, "loss": 1.8044, "step": 887 }, { "epoch": 0.09224057338734809, "grad_norm": 0.36434537172317505, "learning_rate": 9.79153051199738e-05, "loss": 1.6905, "step": 888 }, { "epoch": 0.09234444790692843, "grad_norm": 0.3682979345321655, "learning_rate": 9.791064021264591e-05, "loss": 1.9002, "step": 889 }, { "epoch": 0.09244832242650877, "grad_norm": 0.39494451880455017, "learning_rate": 9.790597020320856e-05, "loss": 1.8316, "step": 890 }, { "epoch": 0.09255219694608913, "grad_norm": 0.3634776473045349, "learning_rate": 9.790129509215907e-05, "loss": 1.7874, "step": 891 }, { "epoch": 0.09265607146566947, "grad_norm": 0.49316370487213135, "learning_rate": 9.789661487999528e-05, "loss": 1.8666, "step": 892 }, { "epoch": 0.09275994598524982, "grad_norm": 0.4129713475704193, "learning_rate": 9.789192956721563e-05, "loss": 2.0314, "step": 893 }, { "epoch": 0.09286382050483016, "grad_norm": 0.33426743745803833, "learning_rate": 9.788723915431905e-05, "loss": 1.6938, "step": 894 }, { "epoch": 0.09296769502441052, "grad_norm": 0.3889116048812866, "learning_rate": 9.788254364180503e-05, "loss": 1.9863, "step": 895 }, { "epoch": 0.09307156954399086, "grad_norm": 0.37265413999557495, "learning_rate": 9.78778430301736e-05, "loss": 1.7899, "step": 896 }, { "epoch": 0.0931754440635712, "grad_norm": 0.4090532958507538, "learning_rate": 9.787313731992537e-05, "loss": 1.8823, "step": 897 }, { "epoch": 0.09327931858315155, "grad_norm": 0.3560778796672821, "learning_rate": 9.786842651156143e-05, "loss": 1.8849, "step": 898 }, { "epoch": 0.0933831931027319, "grad_norm": 0.43143633008003235, "learning_rate": 9.786371060558346e-05, "loss": 1.7419, "step": 899 }, { "epoch": 0.09348706762231225, "grad_norm": 0.4012468755245209, "learning_rate": 9.785898960249364e-05, "loss": 1.9191, "step": 900 }, { "epoch": 0.09359094214189259, "grad_norm": 0.38962993025779724, "learning_rate": 9.785426350279477e-05, "loss": 1.8023, "step": 901 }, { "epoch": 0.09369481666147295, "grad_norm": 0.38585248589515686, "learning_rate": 9.78495323069901e-05, "loss": 1.9383, "step": 902 }, { "epoch": 0.09379869118105329, "grad_norm": 0.347837895154953, "learning_rate": 9.784479601558348e-05, "loss": 1.8187, "step": 903 }, { "epoch": 0.09390256570063363, "grad_norm": 0.32775819301605225, "learning_rate": 9.78400546290793e-05, "loss": 1.5762, "step": 904 }, { "epoch": 0.09400644022021398, "grad_norm": 0.3671472668647766, "learning_rate": 9.783530814798245e-05, "loss": 1.8772, "step": 905 }, { "epoch": 0.09411031473979432, "grad_norm": 0.32809990644454956, "learning_rate": 9.783055657279843e-05, "loss": 1.7432, "step": 906 }, { "epoch": 0.09421418925937468, "grad_norm": 0.33797743916511536, "learning_rate": 9.782579990403321e-05, "loss": 1.7051, "step": 907 }, { "epoch": 0.09431806377895502, "grad_norm": 0.3659290373325348, "learning_rate": 9.782103814219337e-05, "loss": 1.7051, "step": 908 }, { "epoch": 0.09442193829853537, "grad_norm": 0.4114789068698883, "learning_rate": 9.781627128778598e-05, "loss": 2.0238, "step": 909 }, { "epoch": 0.09452581281811571, "grad_norm": 0.3247332274913788, "learning_rate": 9.781149934131868e-05, "loss": 1.6112, "step": 910 }, { "epoch": 0.09462968733769607, "grad_norm": 0.3638227880001068, "learning_rate": 9.780672230329964e-05, "loss": 1.7238, "step": 911 }, { "epoch": 0.09473356185727641, "grad_norm": 0.35563889145851135, "learning_rate": 9.780194017423758e-05, "loss": 1.8491, "step": 912 }, { "epoch": 0.09483743637685675, "grad_norm": 0.38154855370521545, "learning_rate": 9.779715295464178e-05, "loss": 1.666, "step": 913 }, { "epoch": 0.0949413108964371, "grad_norm": 0.3501157760620117, "learning_rate": 9.779236064502201e-05, "loss": 1.8345, "step": 914 }, { "epoch": 0.09504518541601745, "grad_norm": 0.3432106375694275, "learning_rate": 9.778756324588863e-05, "loss": 1.7373, "step": 915 }, { "epoch": 0.0951490599355978, "grad_norm": 0.39171236753463745, "learning_rate": 9.778276075775251e-05, "loss": 1.9764, "step": 916 }, { "epoch": 0.09525293445517814, "grad_norm": 0.35228273272514343, "learning_rate": 9.77779531811251e-05, "loss": 1.8864, "step": 917 }, { "epoch": 0.0953568089747585, "grad_norm": 0.3456995487213135, "learning_rate": 9.777314051651836e-05, "loss": 1.8929, "step": 918 }, { "epoch": 0.09546068349433884, "grad_norm": 0.39113515615463257, "learning_rate": 9.776832276444479e-05, "loss": 1.8455, "step": 919 }, { "epoch": 0.09556455801391918, "grad_norm": 0.35368141531944275, "learning_rate": 9.776349992541746e-05, "loss": 1.7881, "step": 920 }, { "epoch": 0.09566843253349953, "grad_norm": 0.3645842671394348, "learning_rate": 9.775867199994994e-05, "loss": 1.9618, "step": 921 }, { "epoch": 0.09577230705307987, "grad_norm": 0.4213750958442688, "learning_rate": 9.775383898855641e-05, "loss": 1.9835, "step": 922 }, { "epoch": 0.09587618157266023, "grad_norm": 0.3259928226470947, "learning_rate": 9.77490008917515e-05, "loss": 1.6711, "step": 923 }, { "epoch": 0.09598005609224057, "grad_norm": 0.33279410004615784, "learning_rate": 9.774415771005048e-05, "loss": 1.7854, "step": 924 }, { "epoch": 0.09608393061182093, "grad_norm": 0.3503129482269287, "learning_rate": 9.773930944396905e-05, "loss": 1.8105, "step": 925 }, { "epoch": 0.09618780513140127, "grad_norm": 0.3861035704612732, "learning_rate": 9.773445609402356e-05, "loss": 1.8127, "step": 926 }, { "epoch": 0.09629167965098162, "grad_norm": 0.4022195339202881, "learning_rate": 9.772959766073084e-05, "loss": 1.8921, "step": 927 }, { "epoch": 0.09639555417056196, "grad_norm": 0.34826424717903137, "learning_rate": 9.772473414460828e-05, "loss": 1.8411, "step": 928 }, { "epoch": 0.0964994286901423, "grad_norm": 0.3265725374221802, "learning_rate": 9.77198655461738e-05, "loss": 1.6894, "step": 929 }, { "epoch": 0.09660330320972266, "grad_norm": 0.3737447261810303, "learning_rate": 9.771499186594586e-05, "loss": 1.8592, "step": 930 }, { "epoch": 0.096707177729303, "grad_norm": 0.3337661623954773, "learning_rate": 9.771011310444348e-05, "loss": 1.6941, "step": 931 }, { "epoch": 0.09681105224888335, "grad_norm": 0.3704545199871063, "learning_rate": 9.770522926218622e-05, "loss": 1.7519, "step": 932 }, { "epoch": 0.0969149267684637, "grad_norm": 0.35930871963500977, "learning_rate": 9.770034033969413e-05, "loss": 1.9341, "step": 933 }, { "epoch": 0.09701880128804405, "grad_norm": 0.38769471645355225, "learning_rate": 9.769544633748789e-05, "loss": 1.8773, "step": 934 }, { "epoch": 0.09712267580762439, "grad_norm": 0.37353989481925964, "learning_rate": 9.769054725608867e-05, "loss": 1.9544, "step": 935 }, { "epoch": 0.09722655032720473, "grad_norm": 0.39026400446891785, "learning_rate": 9.768564309601813e-05, "loss": 2.0178, "step": 936 }, { "epoch": 0.09733042484678509, "grad_norm": 0.37478938698768616, "learning_rate": 9.768073385779859e-05, "loss": 1.6323, "step": 937 }, { "epoch": 0.09743429936636543, "grad_norm": 0.3543946146965027, "learning_rate": 9.76758195419528e-05, "loss": 1.8568, "step": 938 }, { "epoch": 0.09753817388594578, "grad_norm": 0.3243783712387085, "learning_rate": 9.767090014900413e-05, "loss": 1.6446, "step": 939 }, { "epoch": 0.09764204840552612, "grad_norm": 0.390176922082901, "learning_rate": 9.766597567947644e-05, "loss": 1.7432, "step": 940 }, { "epoch": 0.09774592292510648, "grad_norm": 0.40544116497039795, "learning_rate": 9.766104613389413e-05, "loss": 1.8066, "step": 941 }, { "epoch": 0.09784979744468682, "grad_norm": 0.4119703471660614, "learning_rate": 9.76561115127822e-05, "loss": 1.8682, "step": 942 }, { "epoch": 0.09795367196426717, "grad_norm": 0.41198158264160156, "learning_rate": 9.765117181666612e-05, "loss": 1.8556, "step": 943 }, { "epoch": 0.09805754648384751, "grad_norm": 0.3242831528186798, "learning_rate": 9.764622704607193e-05, "loss": 1.8249, "step": 944 }, { "epoch": 0.09816142100342785, "grad_norm": 0.336739718914032, "learning_rate": 9.764127720152623e-05, "loss": 1.7983, "step": 945 }, { "epoch": 0.09826529552300821, "grad_norm": 0.43454012274742126, "learning_rate": 9.763632228355611e-05, "loss": 1.8482, "step": 946 }, { "epoch": 0.09836917004258855, "grad_norm": 0.39079445600509644, "learning_rate": 9.763136229268924e-05, "loss": 1.9735, "step": 947 }, { "epoch": 0.0984730445621689, "grad_norm": 0.3446981906890869, "learning_rate": 9.762639722945384e-05, "loss": 1.8289, "step": 948 }, { "epoch": 0.09857691908174925, "grad_norm": 0.3308192193508148, "learning_rate": 9.762142709437863e-05, "loss": 1.6953, "step": 949 }, { "epoch": 0.0986807936013296, "grad_norm": 0.40982964634895325, "learning_rate": 9.76164518879929e-05, "loss": 1.9994, "step": 950 }, { "epoch": 0.09878466812090994, "grad_norm": 0.3556596040725708, "learning_rate": 9.761147161082644e-05, "loss": 1.7591, "step": 951 }, { "epoch": 0.09888854264049028, "grad_norm": 0.361565500497818, "learning_rate": 9.760648626340965e-05, "loss": 1.8883, "step": 952 }, { "epoch": 0.09899241716007064, "grad_norm": 0.36020350456237793, "learning_rate": 9.760149584627343e-05, "loss": 1.7859, "step": 953 }, { "epoch": 0.09909629167965098, "grad_norm": 0.3831625282764435, "learning_rate": 9.759650035994921e-05, "loss": 1.9948, "step": 954 }, { "epoch": 0.09920016619923133, "grad_norm": 0.34753647446632385, "learning_rate": 9.759149980496896e-05, "loss": 1.8239, "step": 955 }, { "epoch": 0.09930404071881167, "grad_norm": 0.3743743896484375, "learning_rate": 9.758649418186521e-05, "loss": 1.8389, "step": 956 }, { "epoch": 0.09940791523839203, "grad_norm": 0.38041260838508606, "learning_rate": 9.758148349117101e-05, "loss": 1.9304, "step": 957 }, { "epoch": 0.09951178975797237, "grad_norm": 0.4073218107223511, "learning_rate": 9.757646773341998e-05, "loss": 1.9184, "step": 958 }, { "epoch": 0.09961566427755271, "grad_norm": 0.3602345585823059, "learning_rate": 9.757144690914624e-05, "loss": 1.8935, "step": 959 }, { "epoch": 0.09971953879713306, "grad_norm": 0.3824581801891327, "learning_rate": 9.756642101888448e-05, "loss": 1.8969, "step": 960 }, { "epoch": 0.0998234133167134, "grad_norm": 0.3472011387348175, "learning_rate": 9.756139006316991e-05, "loss": 1.7702, "step": 961 }, { "epoch": 0.09992728783629376, "grad_norm": 0.3461735248565674, "learning_rate": 9.755635404253829e-05, "loss": 1.6524, "step": 962 }, { "epoch": 0.1000311623558741, "grad_norm": 0.34832313656806946, "learning_rate": 9.755131295752593e-05, "loss": 1.701, "step": 963 }, { "epoch": 0.10013503687545446, "grad_norm": 0.3486674726009369, "learning_rate": 9.754626680866964e-05, "loss": 1.8465, "step": 964 }, { "epoch": 0.1002389113950348, "grad_norm": 0.3982466161251068, "learning_rate": 9.754121559650681e-05, "loss": 2.0038, "step": 965 }, { "epoch": 0.10034278591461515, "grad_norm": 0.3699825406074524, "learning_rate": 9.753615932157536e-05, "loss": 2.1116, "step": 966 }, { "epoch": 0.10044666043419549, "grad_norm": 0.37160253524780273, "learning_rate": 9.753109798441375e-05, "loss": 1.9158, "step": 967 }, { "epoch": 0.10055053495377583, "grad_norm": 0.34480687975883484, "learning_rate": 9.752603158556095e-05, "loss": 1.7587, "step": 968 }, { "epoch": 0.10065440947335619, "grad_norm": 0.34491807222366333, "learning_rate": 9.752096012555652e-05, "loss": 1.8125, "step": 969 }, { "epoch": 0.10075828399293653, "grad_norm": 0.36737021803855896, "learning_rate": 9.751588360494048e-05, "loss": 1.9693, "step": 970 }, { "epoch": 0.10086215851251688, "grad_norm": 0.372712641954422, "learning_rate": 9.75108020242535e-05, "loss": 1.7953, "step": 971 }, { "epoch": 0.10096603303209722, "grad_norm": 0.3740496039390564, "learning_rate": 9.75057153840367e-05, "loss": 1.7753, "step": 972 }, { "epoch": 0.10106990755167758, "grad_norm": 0.46863672137260437, "learning_rate": 9.750062368483176e-05, "loss": 2.1136, "step": 973 }, { "epoch": 0.10117378207125792, "grad_norm": 0.3760620355606079, "learning_rate": 9.749552692718093e-05, "loss": 1.7655, "step": 974 }, { "epoch": 0.10127765659083826, "grad_norm": 0.35611656308174133, "learning_rate": 9.749042511162696e-05, "loss": 1.8326, "step": 975 }, { "epoch": 0.10138153111041862, "grad_norm": 0.33176377415657043, "learning_rate": 9.748531823871314e-05, "loss": 1.7562, "step": 976 }, { "epoch": 0.10148540562999896, "grad_norm": 0.33755457401275635, "learning_rate": 9.748020630898333e-05, "loss": 1.7919, "step": 977 }, { "epoch": 0.10158928014957931, "grad_norm": 0.40104183554649353, "learning_rate": 9.747508932298192e-05, "loss": 1.7349, "step": 978 }, { "epoch": 0.10169315466915965, "grad_norm": 0.33952096104621887, "learning_rate": 9.746996728125382e-05, "loss": 1.6836, "step": 979 }, { "epoch": 0.10179702918874001, "grad_norm": 0.3854829668998718, "learning_rate": 9.746484018434447e-05, "loss": 1.6913, "step": 980 }, { "epoch": 0.10190090370832035, "grad_norm": 0.44178688526153564, "learning_rate": 9.745970803279989e-05, "loss": 2.2579, "step": 981 }, { "epoch": 0.1020047782279007, "grad_norm": 0.35316595435142517, "learning_rate": 9.74545708271666e-05, "loss": 1.7616, "step": 982 }, { "epoch": 0.10210865274748104, "grad_norm": 0.36709627509117126, "learning_rate": 9.744942856799169e-05, "loss": 1.6341, "step": 983 }, { "epoch": 0.10221252726706138, "grad_norm": 0.4684566557407379, "learning_rate": 9.744428125582275e-05, "loss": 2.0114, "step": 984 }, { "epoch": 0.10231640178664174, "grad_norm": 0.34113651514053345, "learning_rate": 9.743912889120791e-05, "loss": 1.7651, "step": 985 }, { "epoch": 0.10242027630622208, "grad_norm": 0.4054688513278961, "learning_rate": 9.743397147469592e-05, "loss": 2.0589, "step": 986 }, { "epoch": 0.10252415082580243, "grad_norm": 0.36325010657310486, "learning_rate": 9.742880900683596e-05, "loss": 1.8003, "step": 987 }, { "epoch": 0.10262802534538278, "grad_norm": 0.3484811782836914, "learning_rate": 9.742364148817777e-05, "loss": 1.8546, "step": 988 }, { "epoch": 0.10273189986496313, "grad_norm": 0.3415622115135193, "learning_rate": 9.74184689192717e-05, "loss": 1.6446, "step": 989 }, { "epoch": 0.10283577438454347, "grad_norm": 0.377069354057312, "learning_rate": 9.741329130066859e-05, "loss": 2.0174, "step": 990 }, { "epoch": 0.10293964890412381, "grad_norm": 0.37598928809165955, "learning_rate": 9.740810863291976e-05, "loss": 1.9646, "step": 991 }, { "epoch": 0.10304352342370417, "grad_norm": 0.34742510318756104, "learning_rate": 9.740292091657718e-05, "loss": 1.7112, "step": 992 }, { "epoch": 0.10314739794328451, "grad_norm": 0.35279062390327454, "learning_rate": 9.739772815219327e-05, "loss": 1.736, "step": 993 }, { "epoch": 0.10325127246286486, "grad_norm": 0.33563071489334106, "learning_rate": 9.739253034032103e-05, "loss": 1.7482, "step": 994 }, { "epoch": 0.1033551469824452, "grad_norm": 0.4013693332672119, "learning_rate": 9.738732748151397e-05, "loss": 1.9931, "step": 995 }, { "epoch": 0.10345902150202556, "grad_norm": 0.3738352954387665, "learning_rate": 9.738211957632619e-05, "loss": 1.7324, "step": 996 }, { "epoch": 0.1035628960216059, "grad_norm": 0.34439367055892944, "learning_rate": 9.737690662531227e-05, "loss": 1.7277, "step": 997 }, { "epoch": 0.10366677054118625, "grad_norm": 0.34663471579551697, "learning_rate": 9.737168862902735e-05, "loss": 1.8904, "step": 998 }, { "epoch": 0.1037706450607666, "grad_norm": 0.3536369800567627, "learning_rate": 9.736646558802709e-05, "loss": 1.898, "step": 999 }, { "epoch": 0.10387451958034694, "grad_norm": 0.37141430377960205, "learning_rate": 9.736123750286772e-05, "loss": 1.9826, "step": 1000 }, { "epoch": 0.10397839409992729, "grad_norm": 0.33750221133232117, "learning_rate": 9.7356004374106e-05, "loss": 1.6245, "step": 1001 }, { "epoch": 0.10408226861950763, "grad_norm": 0.39704445004463196, "learning_rate": 9.735076620229919e-05, "loss": 1.9813, "step": 1002 }, { "epoch": 0.10418614313908799, "grad_norm": 0.3396252691745758, "learning_rate": 9.734552298800513e-05, "loss": 1.7119, "step": 1003 }, { "epoch": 0.10429001765866833, "grad_norm": 0.3506071865558624, "learning_rate": 9.734027473178219e-05, "loss": 1.8563, "step": 1004 }, { "epoch": 0.10439389217824868, "grad_norm": 0.3697708249092102, "learning_rate": 9.733502143418925e-05, "loss": 1.5552, "step": 1005 }, { "epoch": 0.10449776669782902, "grad_norm": 0.34835517406463623, "learning_rate": 9.732976309578577e-05, "loss": 1.6061, "step": 1006 }, { "epoch": 0.10460164121740936, "grad_norm": 0.4172792434692383, "learning_rate": 9.73244997171317e-05, "loss": 2.0223, "step": 1007 }, { "epoch": 0.10470551573698972, "grad_norm": 0.37893515825271606, "learning_rate": 9.731923129878754e-05, "loss": 1.8512, "step": 1008 }, { "epoch": 0.10480939025657006, "grad_norm": 0.3517685830593109, "learning_rate": 9.731395784131437e-05, "loss": 1.7601, "step": 1009 }, { "epoch": 0.10491326477615041, "grad_norm": 0.3533296585083008, "learning_rate": 9.730867934527374e-05, "loss": 1.7264, "step": 1010 }, { "epoch": 0.10501713929573075, "grad_norm": 0.3656322956085205, "learning_rate": 9.73033958112278e-05, "loss": 1.8542, "step": 1011 }, { "epoch": 0.10512101381531111, "grad_norm": 0.36052483320236206, "learning_rate": 9.729810723973917e-05, "loss": 1.7967, "step": 1012 }, { "epoch": 0.10522488833489145, "grad_norm": 0.3687455356121063, "learning_rate": 9.729281363137106e-05, "loss": 1.9559, "step": 1013 }, { "epoch": 0.10532876285447179, "grad_norm": 0.3520287275314331, "learning_rate": 9.72875149866872e-05, "loss": 1.8265, "step": 1014 }, { "epoch": 0.10543263737405215, "grad_norm": 0.36388278007507324, "learning_rate": 9.728221130625186e-05, "loss": 1.7892, "step": 1015 }, { "epoch": 0.10553651189363249, "grad_norm": 0.3430229127407074, "learning_rate": 9.727690259062982e-05, "loss": 1.7436, "step": 1016 }, { "epoch": 0.10564038641321284, "grad_norm": 0.40209057927131653, "learning_rate": 9.727158884038643e-05, "loss": 2.0679, "step": 1017 }, { "epoch": 0.10574426093279318, "grad_norm": 0.35472702980041504, "learning_rate": 9.726627005608756e-05, "loss": 1.8322, "step": 1018 }, { "epoch": 0.10584813545237354, "grad_norm": 0.3761141896247864, "learning_rate": 9.726094623829962e-05, "loss": 1.9986, "step": 1019 }, { "epoch": 0.10595200997195388, "grad_norm": 0.37328705191612244, "learning_rate": 9.725561738758956e-05, "loss": 1.8012, "step": 1020 }, { "epoch": 0.10605588449153423, "grad_norm": 0.3950223922729492, "learning_rate": 9.725028350452485e-05, "loss": 1.859, "step": 1021 }, { "epoch": 0.10615975901111457, "grad_norm": 0.37431395053863525, "learning_rate": 9.724494458967352e-05, "loss": 1.6539, "step": 1022 }, { "epoch": 0.10626363353069491, "grad_norm": 0.3411618173122406, "learning_rate": 9.723960064360412e-05, "loss": 1.6884, "step": 1023 }, { "epoch": 0.10636750805027527, "grad_norm": 0.3407668173313141, "learning_rate": 9.72342516668857e-05, "loss": 1.7554, "step": 1024 }, { "epoch": 0.10647138256985561, "grad_norm": 0.3708615005016327, "learning_rate": 9.722889766008796e-05, "loss": 1.7656, "step": 1025 }, { "epoch": 0.10657525708943597, "grad_norm": 0.3609970510005951, "learning_rate": 9.722353862378101e-05, "loss": 1.8226, "step": 1026 }, { "epoch": 0.1066791316090163, "grad_norm": 0.341370552778244, "learning_rate": 9.721817455853554e-05, "loss": 1.6735, "step": 1027 }, { "epoch": 0.10678300612859666, "grad_norm": 0.3461858630180359, "learning_rate": 9.72128054649228e-05, "loss": 1.6466, "step": 1028 }, { "epoch": 0.106886880648177, "grad_norm": 0.3534831404685974, "learning_rate": 9.720743134351457e-05, "loss": 1.8349, "step": 1029 }, { "epoch": 0.10699075516775734, "grad_norm": 0.33681240677833557, "learning_rate": 9.720205219488312e-05, "loss": 1.7535, "step": 1030 }, { "epoch": 0.1070946296873377, "grad_norm": 0.3691225051879883, "learning_rate": 9.71966680196013e-05, "loss": 1.7835, "step": 1031 }, { "epoch": 0.10719850420691804, "grad_norm": 0.3454303443431854, "learning_rate": 9.719127881824249e-05, "loss": 1.7698, "step": 1032 }, { "epoch": 0.10730237872649839, "grad_norm": 0.3353855311870575, "learning_rate": 9.718588459138059e-05, "loss": 1.8369, "step": 1033 }, { "epoch": 0.10740625324607873, "grad_norm": 0.3854605257511139, "learning_rate": 9.718048533959004e-05, "loss": 1.8615, "step": 1034 }, { "epoch": 0.10751012776565909, "grad_norm": 0.34844595193862915, "learning_rate": 9.717508106344582e-05, "loss": 1.9681, "step": 1035 }, { "epoch": 0.10761400228523943, "grad_norm": 0.3415117561817169, "learning_rate": 9.716967176352345e-05, "loss": 1.8015, "step": 1036 }, { "epoch": 0.10771787680481978, "grad_norm": 0.3829508125782013, "learning_rate": 9.716425744039898e-05, "loss": 1.9817, "step": 1037 }, { "epoch": 0.10782175132440013, "grad_norm": 0.3588506877422333, "learning_rate": 9.715883809464899e-05, "loss": 1.7871, "step": 1038 }, { "epoch": 0.10792562584398047, "grad_norm": 0.36895090341567993, "learning_rate": 9.715341372685059e-05, "loss": 1.9029, "step": 1039 }, { "epoch": 0.10802950036356082, "grad_norm": 0.4375429153442383, "learning_rate": 9.714798433758144e-05, "loss": 1.6931, "step": 1040 }, { "epoch": 0.10813337488314116, "grad_norm": 0.34080272912979126, "learning_rate": 9.714254992741971e-05, "loss": 1.7422, "step": 1041 }, { "epoch": 0.10823724940272152, "grad_norm": 0.35768958926200867, "learning_rate": 9.713711049694416e-05, "loss": 1.7005, "step": 1042 }, { "epoch": 0.10834112392230186, "grad_norm": 0.35327664017677307, "learning_rate": 9.713166604673403e-05, "loss": 1.7248, "step": 1043 }, { "epoch": 0.10844499844188221, "grad_norm": 0.33711856603622437, "learning_rate": 9.71262165773691e-05, "loss": 1.719, "step": 1044 }, { "epoch": 0.10854887296146255, "grad_norm": 0.35835373401641846, "learning_rate": 9.712076208942968e-05, "loss": 1.7452, "step": 1045 }, { "epoch": 0.1086527474810429, "grad_norm": 0.3395202159881592, "learning_rate": 9.711530258349668e-05, "loss": 1.7313, "step": 1046 }, { "epoch": 0.10875662200062325, "grad_norm": 0.3273974061012268, "learning_rate": 9.710983806015148e-05, "loss": 1.7674, "step": 1047 }, { "epoch": 0.10886049652020359, "grad_norm": 0.35871952772140503, "learning_rate": 9.710436851997598e-05, "loss": 1.964, "step": 1048 }, { "epoch": 0.10896437103978394, "grad_norm": 0.3521716296672821, "learning_rate": 9.709889396355265e-05, "loss": 1.8366, "step": 1049 }, { "epoch": 0.10906824555936429, "grad_norm": 0.37341251969337463, "learning_rate": 9.709341439146452e-05, "loss": 1.8713, "step": 1050 }, { "epoch": 0.10917212007894464, "grad_norm": 0.37906524538993835, "learning_rate": 9.708792980429508e-05, "loss": 1.988, "step": 1051 }, { "epoch": 0.10927599459852498, "grad_norm": 0.3761735260486603, "learning_rate": 9.708244020262842e-05, "loss": 1.9337, "step": 1052 }, { "epoch": 0.10937986911810534, "grad_norm": 0.33354657888412476, "learning_rate": 9.707694558704914e-05, "loss": 1.8026, "step": 1053 }, { "epoch": 0.10948374363768568, "grad_norm": 0.3411722779273987, "learning_rate": 9.707144595814237e-05, "loss": 1.6728, "step": 1054 }, { "epoch": 0.10958761815726602, "grad_norm": 0.3556554913520813, "learning_rate": 9.706594131649378e-05, "loss": 1.8825, "step": 1055 }, { "epoch": 0.10969149267684637, "grad_norm": 0.3727862536907196, "learning_rate": 9.706043166268957e-05, "loss": 1.8012, "step": 1056 }, { "epoch": 0.10979536719642671, "grad_norm": 0.3387303650379181, "learning_rate": 9.705491699731646e-05, "loss": 1.6768, "step": 1057 }, { "epoch": 0.10989924171600707, "grad_norm": 0.36352434754371643, "learning_rate": 9.704939732096175e-05, "loss": 1.8888, "step": 1058 }, { "epoch": 0.11000311623558741, "grad_norm": 0.3399582803249359, "learning_rate": 9.704387263421322e-05, "loss": 1.6474, "step": 1059 }, { "epoch": 0.11010699075516776, "grad_norm": 0.41029804944992065, "learning_rate": 9.70383429376592e-05, "loss": 1.9323, "step": 1060 }, { "epoch": 0.1102108652747481, "grad_norm": 0.40055909752845764, "learning_rate": 9.703280823188858e-05, "loss": 1.8642, "step": 1061 }, { "epoch": 0.11031473979432845, "grad_norm": 0.365138441324234, "learning_rate": 9.702726851749074e-05, "loss": 1.7669, "step": 1062 }, { "epoch": 0.1104186143139088, "grad_norm": 0.3488384187221527, "learning_rate": 9.702172379505565e-05, "loss": 1.6532, "step": 1063 }, { "epoch": 0.11052248883348914, "grad_norm": 0.34194380044937134, "learning_rate": 9.701617406517374e-05, "loss": 1.6235, "step": 1064 }, { "epoch": 0.1106263633530695, "grad_norm": 0.3830929696559906, "learning_rate": 9.701061932843605e-05, "loss": 1.9023, "step": 1065 }, { "epoch": 0.11073023787264984, "grad_norm": 0.3511880934238434, "learning_rate": 9.700505958543409e-05, "loss": 1.6955, "step": 1066 }, { "epoch": 0.11083411239223019, "grad_norm": 0.35780179500579834, "learning_rate": 9.699949483675994e-05, "loss": 1.7417, "step": 1067 }, { "epoch": 0.11093798691181053, "grad_norm": 0.3555978536605835, "learning_rate": 9.69939250830062e-05, "loss": 1.7226, "step": 1068 }, { "epoch": 0.11104186143139087, "grad_norm": 0.34007731080055237, "learning_rate": 9.698835032476598e-05, "loss": 1.7571, "step": 1069 }, { "epoch": 0.11114573595097123, "grad_norm": 0.34321436285972595, "learning_rate": 9.6982770562633e-05, "loss": 1.8685, "step": 1070 }, { "epoch": 0.11124961047055157, "grad_norm": 0.32458704710006714, "learning_rate": 9.697718579720144e-05, "loss": 1.6949, "step": 1071 }, { "epoch": 0.11135348499013192, "grad_norm": 0.37475547194480896, "learning_rate": 9.697159602906603e-05, "loss": 1.9906, "step": 1072 }, { "epoch": 0.11145735950971226, "grad_norm": 0.38911253213882446, "learning_rate": 9.696600125882202e-05, "loss": 1.9341, "step": 1073 }, { "epoch": 0.11156123402929262, "grad_norm": 0.3872954249382019, "learning_rate": 9.696040148706523e-05, "loss": 1.929, "step": 1074 }, { "epoch": 0.11166510854887296, "grad_norm": 0.3806956708431244, "learning_rate": 9.695479671439198e-05, "loss": 1.8981, "step": 1075 }, { "epoch": 0.11176898306845331, "grad_norm": 0.3513946235179901, "learning_rate": 9.694918694139915e-05, "loss": 1.689, "step": 1076 }, { "epoch": 0.11187285758803366, "grad_norm": 0.3258873522281647, "learning_rate": 9.694357216868413e-05, "loss": 1.6753, "step": 1077 }, { "epoch": 0.111976732107614, "grad_norm": 0.36498063802719116, "learning_rate": 9.693795239684484e-05, "loss": 1.6901, "step": 1078 }, { "epoch": 0.11208060662719435, "grad_norm": 0.34291768074035645, "learning_rate": 9.693232762647976e-05, "loss": 1.6372, "step": 1079 }, { "epoch": 0.11218448114677469, "grad_norm": 0.38301602005958557, "learning_rate": 9.692669785818786e-05, "loss": 1.8587, "step": 1080 }, { "epoch": 0.11228835566635505, "grad_norm": 0.3735436201095581, "learning_rate": 9.69210630925687e-05, "loss": 1.9017, "step": 1081 }, { "epoch": 0.11239223018593539, "grad_norm": 0.34948012232780457, "learning_rate": 9.69154233302223e-05, "loss": 1.7744, "step": 1082 }, { "epoch": 0.11249610470551574, "grad_norm": 0.30937257409095764, "learning_rate": 9.690977857174927e-05, "loss": 1.6596, "step": 1083 }, { "epoch": 0.11259997922509608, "grad_norm": 0.3430674970149994, "learning_rate": 9.690412881775076e-05, "loss": 1.8651, "step": 1084 }, { "epoch": 0.11270385374467642, "grad_norm": 0.4052720069885254, "learning_rate": 9.689847406882838e-05, "loss": 1.9878, "step": 1085 }, { "epoch": 0.11280772826425678, "grad_norm": 0.3367058038711548, "learning_rate": 9.689281432558432e-05, "loss": 1.9459, "step": 1086 }, { "epoch": 0.11291160278383712, "grad_norm": 0.32296720147132874, "learning_rate": 9.688714958862133e-05, "loss": 1.7604, "step": 1087 }, { "epoch": 0.11301547730341747, "grad_norm": 0.36404433846473694, "learning_rate": 9.688147985854264e-05, "loss": 1.8102, "step": 1088 }, { "epoch": 0.11311935182299782, "grad_norm": 0.4039054811000824, "learning_rate": 9.687580513595202e-05, "loss": 1.8611, "step": 1089 }, { "epoch": 0.11322322634257817, "grad_norm": 0.4218135476112366, "learning_rate": 9.687012542145382e-05, "loss": 1.962, "step": 1090 }, { "epoch": 0.11332710086215851, "grad_norm": 0.36868998408317566, "learning_rate": 9.686444071565285e-05, "loss": 1.7224, "step": 1091 }, { "epoch": 0.11343097538173887, "grad_norm": 0.3493179380893707, "learning_rate": 9.68587510191545e-05, "loss": 1.715, "step": 1092 }, { "epoch": 0.11353484990131921, "grad_norm": 0.3461728096008301, "learning_rate": 9.685305633256469e-05, "loss": 1.6688, "step": 1093 }, { "epoch": 0.11363872442089955, "grad_norm": 0.35678285360336304, "learning_rate": 9.684735665648983e-05, "loss": 1.851, "step": 1094 }, { "epoch": 0.1137425989404799, "grad_norm": 0.3611486852169037, "learning_rate": 9.684165199153692e-05, "loss": 1.8039, "step": 1095 }, { "epoch": 0.11384647346006024, "grad_norm": 0.3748013377189636, "learning_rate": 9.683594233831345e-05, "loss": 1.8317, "step": 1096 }, { "epoch": 0.1139503479796406, "grad_norm": 0.39046037197113037, "learning_rate": 9.683022769742746e-05, "loss": 1.9756, "step": 1097 }, { "epoch": 0.11405422249922094, "grad_norm": 0.32861414551734924, "learning_rate": 9.68245080694875e-05, "loss": 1.6867, "step": 1098 }, { "epoch": 0.1141580970188013, "grad_norm": 0.4004691541194916, "learning_rate": 9.68187834551027e-05, "loss": 1.9397, "step": 1099 }, { "epoch": 0.11426197153838163, "grad_norm": 0.33561286330223083, "learning_rate": 9.681305385488264e-05, "loss": 1.7001, "step": 1100 }, { "epoch": 0.11436584605796198, "grad_norm": 0.3517715036869049, "learning_rate": 9.680731926943752e-05, "loss": 1.774, "step": 1101 }, { "epoch": 0.11446972057754233, "grad_norm": 0.37641438841819763, "learning_rate": 9.680157969937799e-05, "loss": 1.7585, "step": 1102 }, { "epoch": 0.11457359509712267, "grad_norm": 0.36919966340065, "learning_rate": 9.67958351453153e-05, "loss": 1.8175, "step": 1103 }, { "epoch": 0.11467746961670303, "grad_norm": 0.3542039096355438, "learning_rate": 9.679008560786117e-05, "loss": 1.7941, "step": 1104 }, { "epoch": 0.11478134413628337, "grad_norm": 0.3409385681152344, "learning_rate": 9.678433108762791e-05, "loss": 1.7144, "step": 1105 }, { "epoch": 0.11488521865586372, "grad_norm": 0.3563219904899597, "learning_rate": 9.677857158522833e-05, "loss": 1.9284, "step": 1106 }, { "epoch": 0.11498909317544406, "grad_norm": 0.3560921251773834, "learning_rate": 9.677280710127575e-05, "loss": 1.5077, "step": 1107 }, { "epoch": 0.11509296769502442, "grad_norm": 0.43264439702033997, "learning_rate": 9.676703763638406e-05, "loss": 2.0279, "step": 1108 }, { "epoch": 0.11519684221460476, "grad_norm": 0.36117497086524963, "learning_rate": 9.676126319116766e-05, "loss": 1.9437, "step": 1109 }, { "epoch": 0.1153007167341851, "grad_norm": 0.3572199046611786, "learning_rate": 9.675548376624148e-05, "loss": 1.8091, "step": 1110 }, { "epoch": 0.11540459125376545, "grad_norm": 0.35779669880867004, "learning_rate": 9.674969936222099e-05, "loss": 1.7403, "step": 1111 }, { "epoch": 0.1155084657733458, "grad_norm": 0.36604002118110657, "learning_rate": 9.674390997972218e-05, "loss": 1.9467, "step": 1112 }, { "epoch": 0.11561234029292615, "grad_norm": 0.38619542121887207, "learning_rate": 9.673811561936157e-05, "loss": 1.8208, "step": 1113 }, { "epoch": 0.11571621481250649, "grad_norm": 0.33228105306625366, "learning_rate": 9.673231628175622e-05, "loss": 1.7336, "step": 1114 }, { "epoch": 0.11582008933208685, "grad_norm": 0.3648672103881836, "learning_rate": 9.672651196752371e-05, "loss": 1.9191, "step": 1115 }, { "epoch": 0.11592396385166719, "grad_norm": 0.33783960342407227, "learning_rate": 9.672070267728215e-05, "loss": 1.7849, "step": 1116 }, { "epoch": 0.11602783837124753, "grad_norm": 0.4384811818599701, "learning_rate": 9.67148884116502e-05, "loss": 2.076, "step": 1117 }, { "epoch": 0.11613171289082788, "grad_norm": 0.3607281446456909, "learning_rate": 9.670906917124703e-05, "loss": 1.7839, "step": 1118 }, { "epoch": 0.11623558741040822, "grad_norm": 0.3579871356487274, "learning_rate": 9.670324495669231e-05, "loss": 1.7778, "step": 1119 }, { "epoch": 0.11633946192998858, "grad_norm": 0.3565009832382202, "learning_rate": 9.669741576860634e-05, "loss": 1.8653, "step": 1120 }, { "epoch": 0.11644333644956892, "grad_norm": 0.36752045154571533, "learning_rate": 9.669158160760982e-05, "loss": 1.9041, "step": 1121 }, { "epoch": 0.11654721096914927, "grad_norm": 0.32525351643562317, "learning_rate": 9.668574247432407e-05, "loss": 1.5888, "step": 1122 }, { "epoch": 0.11665108548872961, "grad_norm": 0.3645164370536804, "learning_rate": 9.667989836937092e-05, "loss": 1.8802, "step": 1123 }, { "epoch": 0.11675496000830995, "grad_norm": 0.34458303451538086, "learning_rate": 9.66740492933727e-05, "loss": 1.8201, "step": 1124 }, { "epoch": 0.11685883452789031, "grad_norm": 0.33625590801239014, "learning_rate": 9.66681952469523e-05, "loss": 1.758, "step": 1125 }, { "epoch": 0.11696270904747065, "grad_norm": 0.3612918257713318, "learning_rate": 9.666233623073315e-05, "loss": 1.8567, "step": 1126 }, { "epoch": 0.117066583567051, "grad_norm": 0.3521936535835266, "learning_rate": 9.665647224533915e-05, "loss": 1.8514, "step": 1127 }, { "epoch": 0.11717045808663135, "grad_norm": 0.42195868492126465, "learning_rate": 9.665060329139479e-05, "loss": 1.9686, "step": 1128 }, { "epoch": 0.1172743326062117, "grad_norm": 0.35592785477638245, "learning_rate": 9.66447293695251e-05, "loss": 1.6642, "step": 1129 }, { "epoch": 0.11737820712579204, "grad_norm": 0.3389096260070801, "learning_rate": 9.663885048035555e-05, "loss": 1.6711, "step": 1130 }, { "epoch": 0.1174820816453724, "grad_norm": 0.3547491729259491, "learning_rate": 9.663296662451221e-05, "loss": 1.7615, "step": 1131 }, { "epoch": 0.11758595616495274, "grad_norm": 0.33912357687950134, "learning_rate": 9.66270778026217e-05, "loss": 1.6615, "step": 1132 }, { "epoch": 0.11768983068453308, "grad_norm": 0.37360331416130066, "learning_rate": 9.662118401531108e-05, "loss": 1.8592, "step": 1133 }, { "epoch": 0.11779370520411343, "grad_norm": 0.3422006666660309, "learning_rate": 9.661528526320804e-05, "loss": 1.8253, "step": 1134 }, { "epoch": 0.11789757972369377, "grad_norm": 0.36613208055496216, "learning_rate": 9.660938154694075e-05, "loss": 1.592, "step": 1135 }, { "epoch": 0.11800145424327413, "grad_norm": 0.38055816292762756, "learning_rate": 9.660347286713787e-05, "loss": 1.6502, "step": 1136 }, { "epoch": 0.11810532876285447, "grad_norm": 0.3511364459991455, "learning_rate": 9.659755922442866e-05, "loss": 1.6945, "step": 1137 }, { "epoch": 0.11820920328243482, "grad_norm": 0.3394559621810913, "learning_rate": 9.659164061944285e-05, "loss": 1.7614, "step": 1138 }, { "epoch": 0.11831307780201517, "grad_norm": 0.38711434602737427, "learning_rate": 9.658571705281076e-05, "loss": 1.8105, "step": 1139 }, { "epoch": 0.1184169523215955, "grad_norm": 0.3627341389656067, "learning_rate": 9.657978852516319e-05, "loss": 1.6527, "step": 1140 }, { "epoch": 0.11852082684117586, "grad_norm": 0.3539731800556183, "learning_rate": 9.657385503713146e-05, "loss": 1.8393, "step": 1141 }, { "epoch": 0.1186247013607562, "grad_norm": 0.3663812279701233, "learning_rate": 9.656791658934747e-05, "loss": 1.7833, "step": 1142 }, { "epoch": 0.11872857588033656, "grad_norm": 0.3631526231765747, "learning_rate": 9.656197318244359e-05, "loss": 1.9837, "step": 1143 }, { "epoch": 0.1188324503999169, "grad_norm": 0.382820725440979, "learning_rate": 9.655602481705279e-05, "loss": 1.8836, "step": 1144 }, { "epoch": 0.11893632491949725, "grad_norm": 0.33744722604751587, "learning_rate": 9.655007149380849e-05, "loss": 1.7552, "step": 1145 }, { "epoch": 0.11904019943907759, "grad_norm": 0.34012195467948914, "learning_rate": 9.654411321334468e-05, "loss": 1.6319, "step": 1146 }, { "epoch": 0.11914407395865795, "grad_norm": 0.34437379240989685, "learning_rate": 9.653814997629587e-05, "loss": 1.6788, "step": 1147 }, { "epoch": 0.11924794847823829, "grad_norm": 0.3765188753604889, "learning_rate": 9.653218178329709e-05, "loss": 1.8694, "step": 1148 }, { "epoch": 0.11935182299781863, "grad_norm": 0.341296911239624, "learning_rate": 9.652620863498393e-05, "loss": 1.7641, "step": 1149 }, { "epoch": 0.11945569751739898, "grad_norm": 0.37878480553627014, "learning_rate": 9.652023053199247e-05, "loss": 1.8416, "step": 1150 }, { "epoch": 0.11955957203697933, "grad_norm": 0.39128556847572327, "learning_rate": 9.651424747495931e-05, "loss": 1.9727, "step": 1151 }, { "epoch": 0.11966344655655968, "grad_norm": 0.32593607902526855, "learning_rate": 9.650825946452164e-05, "loss": 1.6429, "step": 1152 }, { "epoch": 0.11976732107614002, "grad_norm": 0.365972101688385, "learning_rate": 9.65022665013171e-05, "loss": 1.7789, "step": 1153 }, { "epoch": 0.11987119559572038, "grad_norm": 0.3774997889995575, "learning_rate": 9.64962685859839e-05, "loss": 1.8482, "step": 1154 }, { "epoch": 0.11997507011530072, "grad_norm": 0.36030101776123047, "learning_rate": 9.64902657191608e-05, "loss": 1.621, "step": 1155 }, { "epoch": 0.12007894463488106, "grad_norm": 0.40642428398132324, "learning_rate": 9.648425790148704e-05, "loss": 1.869, "step": 1156 }, { "epoch": 0.12018281915446141, "grad_norm": 0.374668151140213, "learning_rate": 9.64782451336024e-05, "loss": 1.8155, "step": 1157 }, { "epoch": 0.12028669367404175, "grad_norm": 0.33325710892677307, "learning_rate": 9.64722274161472e-05, "loss": 1.6419, "step": 1158 }, { "epoch": 0.12039056819362211, "grad_norm": 0.36537379026412964, "learning_rate": 9.646620474976226e-05, "loss": 1.8969, "step": 1159 }, { "epoch": 0.12049444271320245, "grad_norm": 0.3427730202674866, "learning_rate": 9.6460177135089e-05, "loss": 1.7814, "step": 1160 }, { "epoch": 0.1205983172327828, "grad_norm": 0.39888283610343933, "learning_rate": 9.645414457276925e-05, "loss": 1.8611, "step": 1161 }, { "epoch": 0.12070219175236314, "grad_norm": 0.35970908403396606, "learning_rate": 9.644810706344547e-05, "loss": 1.838, "step": 1162 }, { "epoch": 0.1208060662719435, "grad_norm": 0.35236963629722595, "learning_rate": 9.644206460776058e-05, "loss": 1.7295, "step": 1163 }, { "epoch": 0.12090994079152384, "grad_norm": 0.3539031445980072, "learning_rate": 9.643601720635809e-05, "loss": 1.8078, "step": 1164 }, { "epoch": 0.12101381531110418, "grad_norm": 0.3607688546180725, "learning_rate": 9.642996485988198e-05, "loss": 1.8958, "step": 1165 }, { "epoch": 0.12111768983068454, "grad_norm": 0.363936185836792, "learning_rate": 9.642390756897677e-05, "loss": 1.7418, "step": 1166 }, { "epoch": 0.12122156435026488, "grad_norm": 0.3548314571380615, "learning_rate": 9.641784533428754e-05, "loss": 1.6745, "step": 1167 }, { "epoch": 0.12132543886984523, "grad_norm": 0.43917304277420044, "learning_rate": 9.641177815645984e-05, "loss": 2.0477, "step": 1168 }, { "epoch": 0.12142931338942557, "grad_norm": 0.36484494805336, "learning_rate": 9.64057060361398e-05, "loss": 1.8097, "step": 1169 }, { "epoch": 0.12153318790900593, "grad_norm": 0.41941696405410767, "learning_rate": 9.639962897397404e-05, "loss": 1.921, "step": 1170 }, { "epoch": 0.12163706242858627, "grad_norm": 0.3769179582595825, "learning_rate": 9.639354697060974e-05, "loss": 1.9793, "step": 1171 }, { "epoch": 0.12174093694816661, "grad_norm": 0.3361814618110657, "learning_rate": 9.638746002669455e-05, "loss": 1.685, "step": 1172 }, { "epoch": 0.12184481146774696, "grad_norm": 0.3244837522506714, "learning_rate": 9.638136814287672e-05, "loss": 1.6935, "step": 1173 }, { "epoch": 0.1219486859873273, "grad_norm": 0.3598596751689911, "learning_rate": 9.637527131980496e-05, "loss": 1.6975, "step": 1174 }, { "epoch": 0.12205256050690766, "grad_norm": 0.396697461605072, "learning_rate": 9.636916955812856e-05, "loss": 1.982, "step": 1175 }, { "epoch": 0.122156435026488, "grad_norm": 0.37619683146476746, "learning_rate": 9.636306285849728e-05, "loss": 1.8531, "step": 1176 }, { "epoch": 0.12226030954606835, "grad_norm": 0.3777252733707428, "learning_rate": 9.635695122156145e-05, "loss": 1.9038, "step": 1177 }, { "epoch": 0.1223641840656487, "grad_norm": 0.39798396825790405, "learning_rate": 9.635083464797192e-05, "loss": 2.0153, "step": 1178 }, { "epoch": 0.12246805858522904, "grad_norm": 0.3506300449371338, "learning_rate": 9.634471313838005e-05, "loss": 1.743, "step": 1179 }, { "epoch": 0.12257193310480939, "grad_norm": 0.3415655493736267, "learning_rate": 9.633858669343774e-05, "loss": 1.7878, "step": 1180 }, { "epoch": 0.12267580762438973, "grad_norm": 0.39789021015167236, "learning_rate": 9.633245531379739e-05, "loss": 1.9123, "step": 1181 }, { "epoch": 0.12277968214397009, "grad_norm": 0.37617623805999756, "learning_rate": 9.632631900011197e-05, "loss": 1.8383, "step": 1182 }, { "epoch": 0.12288355666355043, "grad_norm": 0.37131214141845703, "learning_rate": 9.632017775303492e-05, "loss": 1.817, "step": 1183 }, { "epoch": 0.12298743118313078, "grad_norm": 0.4080236256122589, "learning_rate": 9.631403157322026e-05, "loss": 1.9845, "step": 1184 }, { "epoch": 0.12309130570271112, "grad_norm": 0.37740015983581543, "learning_rate": 9.630788046132251e-05, "loss": 1.904, "step": 1185 }, { "epoch": 0.12319518022229148, "grad_norm": 0.33329272270202637, "learning_rate": 9.63017244179967e-05, "loss": 1.7073, "step": 1186 }, { "epoch": 0.12329905474187182, "grad_norm": 0.3489798307418823, "learning_rate": 9.629556344389839e-05, "loss": 1.7045, "step": 1187 }, { "epoch": 0.12340292926145216, "grad_norm": 0.3395266532897949, "learning_rate": 9.628939753968371e-05, "loss": 1.7442, "step": 1188 }, { "epoch": 0.12350680378103251, "grad_norm": 0.3406863808631897, "learning_rate": 9.628322670600926e-05, "loss": 1.7656, "step": 1189 }, { "epoch": 0.12361067830061286, "grad_norm": 0.35596734285354614, "learning_rate": 9.627705094353219e-05, "loss": 2.0144, "step": 1190 }, { "epoch": 0.12371455282019321, "grad_norm": 0.3764468729496002, "learning_rate": 9.627087025291015e-05, "loss": 1.8333, "step": 1191 }, { "epoch": 0.12381842733977355, "grad_norm": 0.3464232087135315, "learning_rate": 9.626468463480138e-05, "loss": 1.5966, "step": 1192 }, { "epoch": 0.1239223018593539, "grad_norm": 0.35695046186447144, "learning_rate": 9.625849408986456e-05, "loss": 1.7578, "step": 1193 }, { "epoch": 0.12402617637893425, "grad_norm": 0.3387443721294403, "learning_rate": 9.625229861875897e-05, "loss": 1.6883, "step": 1194 }, { "epoch": 0.12413005089851459, "grad_norm": 0.3431154191493988, "learning_rate": 9.624609822214434e-05, "loss": 1.534, "step": 1195 }, { "epoch": 0.12423392541809494, "grad_norm": 0.3574540317058563, "learning_rate": 9.623989290068099e-05, "loss": 1.804, "step": 1196 }, { "epoch": 0.12433779993767528, "grad_norm": 0.35088303685188293, "learning_rate": 9.623368265502973e-05, "loss": 1.8282, "step": 1197 }, { "epoch": 0.12444167445725564, "grad_norm": 0.35268113017082214, "learning_rate": 9.62274674858519e-05, "loss": 1.748, "step": 1198 }, { "epoch": 0.12454554897683598, "grad_norm": 0.4166131019592285, "learning_rate": 9.622124739380936e-05, "loss": 1.9086, "step": 1199 }, { "epoch": 0.12464942349641633, "grad_norm": 0.38566648960113525, "learning_rate": 9.621502237956452e-05, "loss": 1.7812, "step": 1200 }, { "epoch": 0.12475329801599667, "grad_norm": 0.35631853342056274, "learning_rate": 9.62087924437803e-05, "loss": 1.6562, "step": 1201 }, { "epoch": 0.12485717253557703, "grad_norm": 0.34664246439933777, "learning_rate": 9.620255758712012e-05, "loss": 1.743, "step": 1202 }, { "epoch": 0.12496104705515737, "grad_norm": 0.4030589759349823, "learning_rate": 9.619631781024797e-05, "loss": 1.7807, "step": 1203 }, { "epoch": 0.12506492157473773, "grad_norm": 0.3608097732067108, "learning_rate": 9.619007311382828e-05, "loss": 1.8727, "step": 1204 }, { "epoch": 0.12516879609431805, "grad_norm": 0.3390495777130127, "learning_rate": 9.618382349852612e-05, "loss": 1.6474, "step": 1205 }, { "epoch": 0.1252726706138984, "grad_norm": 0.34509339928627014, "learning_rate": 9.617756896500701e-05, "loss": 1.7651, "step": 1206 }, { "epoch": 0.12537654513347876, "grad_norm": 0.37745997309684753, "learning_rate": 9.6171309513937e-05, "loss": 1.6898, "step": 1207 }, { "epoch": 0.12548041965305912, "grad_norm": 0.33759596943855286, "learning_rate": 9.616504514598267e-05, "loss": 1.5825, "step": 1208 }, { "epoch": 0.12558429417263944, "grad_norm": 0.3818475008010864, "learning_rate": 9.615877586181114e-05, "loss": 1.7711, "step": 1209 }, { "epoch": 0.1256881686922198, "grad_norm": 0.3622841536998749, "learning_rate": 9.615250166209003e-05, "loss": 1.8772, "step": 1210 }, { "epoch": 0.12579204321180015, "grad_norm": 0.38078466057777405, "learning_rate": 9.614622254748749e-05, "loss": 1.6718, "step": 1211 }, { "epoch": 0.12589591773138048, "grad_norm": 0.3571094572544098, "learning_rate": 9.613993851867221e-05, "loss": 1.7133, "step": 1212 }, { "epoch": 0.12599979225096083, "grad_norm": 0.3744667172431946, "learning_rate": 9.613364957631339e-05, "loss": 1.8524, "step": 1213 }, { "epoch": 0.1261036667705412, "grad_norm": 0.3752327561378479, "learning_rate": 9.612735572108073e-05, "loss": 1.7747, "step": 1214 }, { "epoch": 0.12620754129012154, "grad_norm": 0.3768329620361328, "learning_rate": 9.61210569536445e-05, "loss": 1.7503, "step": 1215 }, { "epoch": 0.12631141580970187, "grad_norm": 0.35730409622192383, "learning_rate": 9.611475327467547e-05, "loss": 1.8134, "step": 1216 }, { "epoch": 0.12641529032928223, "grad_norm": 0.38782814145088196, "learning_rate": 9.610844468484492e-05, "loss": 1.9043, "step": 1217 }, { "epoch": 0.12651916484886258, "grad_norm": 0.35042253136634827, "learning_rate": 9.610213118482468e-05, "loss": 1.6922, "step": 1218 }, { "epoch": 0.1266230393684429, "grad_norm": 0.3477722108364105, "learning_rate": 9.609581277528708e-05, "loss": 1.8249, "step": 1219 }, { "epoch": 0.12672691388802326, "grad_norm": 0.358598530292511, "learning_rate": 9.608948945690496e-05, "loss": 1.6571, "step": 1220 }, { "epoch": 0.12683078840760362, "grad_norm": 0.34587663412094116, "learning_rate": 9.608316123035172e-05, "loss": 1.8308, "step": 1221 }, { "epoch": 0.12693466292718397, "grad_norm": 0.3662189543247223, "learning_rate": 9.60768280963013e-05, "loss": 1.8263, "step": 1222 }, { "epoch": 0.1270385374467643, "grad_norm": 0.4229857325553894, "learning_rate": 9.60704900554281e-05, "loss": 1.8465, "step": 1223 }, { "epoch": 0.12714241196634465, "grad_norm": 0.3771613538265228, "learning_rate": 9.606414710840706e-05, "loss": 1.9523, "step": 1224 }, { "epoch": 0.127246286485925, "grad_norm": 0.37686896324157715, "learning_rate": 9.605779925591366e-05, "loss": 1.8436, "step": 1225 }, { "epoch": 0.12735016100550534, "grad_norm": 0.3408556580543518, "learning_rate": 9.605144649862391e-05, "loss": 1.8095, "step": 1226 }, { "epoch": 0.1274540355250857, "grad_norm": 0.3675098717212677, "learning_rate": 9.604508883721432e-05, "loss": 1.9452, "step": 1227 }, { "epoch": 0.12755791004466605, "grad_norm": 0.3680543899536133, "learning_rate": 9.603872627236194e-05, "loss": 1.8987, "step": 1228 }, { "epoch": 0.1276617845642464, "grad_norm": 0.3140665888786316, "learning_rate": 9.603235880474433e-05, "loss": 1.6107, "step": 1229 }, { "epoch": 0.12776565908382673, "grad_norm": 0.35767602920532227, "learning_rate": 9.602598643503957e-05, "loss": 1.5594, "step": 1230 }, { "epoch": 0.12786953360340708, "grad_norm": 0.40325549244880676, "learning_rate": 9.601960916392627e-05, "loss": 2.082, "step": 1231 }, { "epoch": 0.12797340812298744, "grad_norm": 0.34380728006362915, "learning_rate": 9.601322699208357e-05, "loss": 1.7435, "step": 1232 }, { "epoch": 0.1280772826425678, "grad_norm": 0.3566792607307434, "learning_rate": 9.60068399201911e-05, "loss": 1.7594, "step": 1233 }, { "epoch": 0.12818115716214812, "grad_norm": 0.34519535303115845, "learning_rate": 9.600044794892905e-05, "loss": 1.8354, "step": 1234 }, { "epoch": 0.12828503168172847, "grad_norm": 0.36046940088272095, "learning_rate": 9.599405107897811e-05, "loss": 1.7715, "step": 1235 }, { "epoch": 0.12838890620130883, "grad_norm": 0.33479416370391846, "learning_rate": 9.598764931101949e-05, "loss": 1.6034, "step": 1236 }, { "epoch": 0.12849278072088915, "grad_norm": 0.3153260052204132, "learning_rate": 9.598124264573495e-05, "loss": 1.6561, "step": 1237 }, { "epoch": 0.1285966552404695, "grad_norm": 0.3522169888019562, "learning_rate": 9.597483108380674e-05, "loss": 1.8149, "step": 1238 }, { "epoch": 0.12870052976004986, "grad_norm": 0.39202046394348145, "learning_rate": 9.596841462591761e-05, "loss": 1.8963, "step": 1239 }, { "epoch": 0.12880440427963022, "grad_norm": 0.3598194718360901, "learning_rate": 9.596199327275091e-05, "loss": 1.7161, "step": 1240 }, { "epoch": 0.12890827879921055, "grad_norm": 0.36012735962867737, "learning_rate": 9.595556702499045e-05, "loss": 1.86, "step": 1241 }, { "epoch": 0.1290121533187909, "grad_norm": 0.32633063197135925, "learning_rate": 9.594913588332055e-05, "loss": 1.5897, "step": 1242 }, { "epoch": 0.12911602783837126, "grad_norm": 0.35175660252571106, "learning_rate": 9.594269984842612e-05, "loss": 1.8549, "step": 1243 }, { "epoch": 0.12921990235795158, "grad_norm": 0.3716254234313965, "learning_rate": 9.59362589209925e-05, "loss": 1.8379, "step": 1244 }, { "epoch": 0.12932377687753194, "grad_norm": 0.3607979118824005, "learning_rate": 9.592981310170563e-05, "loss": 1.8592, "step": 1245 }, { "epoch": 0.1294276513971123, "grad_norm": 0.3418334424495697, "learning_rate": 9.592336239125193e-05, "loss": 1.7502, "step": 1246 }, { "epoch": 0.12953152591669265, "grad_norm": 0.3638349771499634, "learning_rate": 9.591690679031835e-05, "loss": 1.8981, "step": 1247 }, { "epoch": 0.12963540043627297, "grad_norm": 0.36420029401779175, "learning_rate": 9.591044629959236e-05, "loss": 1.913, "step": 1248 }, { "epoch": 0.12973927495585333, "grad_norm": 0.3521292805671692, "learning_rate": 9.590398091976195e-05, "loss": 1.6886, "step": 1249 }, { "epoch": 0.12984314947543368, "grad_norm": 0.3583468198776245, "learning_rate": 9.589751065151567e-05, "loss": 1.7328, "step": 1250 }, { "epoch": 0.129947023995014, "grad_norm": 0.3620645999908447, "learning_rate": 9.589103549554248e-05, "loss": 1.9107, "step": 1251 }, { "epoch": 0.13005089851459437, "grad_norm": 0.361101895570755, "learning_rate": 9.5884555452532e-05, "loss": 1.7693, "step": 1252 }, { "epoch": 0.13015477303417472, "grad_norm": 0.3591129183769226, "learning_rate": 9.587807052317426e-05, "loss": 1.696, "step": 1253 }, { "epoch": 0.13025864755375507, "grad_norm": 0.3596014678478241, "learning_rate": 9.587158070815988e-05, "loss": 1.9454, "step": 1254 }, { "epoch": 0.1303625220733354, "grad_norm": 0.35805267095565796, "learning_rate": 9.586508600817997e-05, "loss": 1.8159, "step": 1255 }, { "epoch": 0.13046639659291576, "grad_norm": 0.3794294595718384, "learning_rate": 9.585858642392616e-05, "loss": 1.7915, "step": 1256 }, { "epoch": 0.1305702711124961, "grad_norm": 0.350418359041214, "learning_rate": 9.585208195609059e-05, "loss": 1.754, "step": 1257 }, { "epoch": 0.13067414563207644, "grad_norm": 0.37485983967781067, "learning_rate": 9.584557260536597e-05, "loss": 1.8592, "step": 1258 }, { "epoch": 0.1307780201516568, "grad_norm": 0.38537418842315674, "learning_rate": 9.583905837244547e-05, "loss": 1.814, "step": 1259 }, { "epoch": 0.13088189467123715, "grad_norm": 0.3657982349395752, "learning_rate": 9.583253925802283e-05, "loss": 1.7968, "step": 1260 }, { "epoch": 0.1309857691908175, "grad_norm": 0.4190506935119629, "learning_rate": 9.582601526279225e-05, "loss": 2.0191, "step": 1261 }, { "epoch": 0.13108964371039783, "grad_norm": 0.3682914674282074, "learning_rate": 9.581948638744849e-05, "loss": 1.7923, "step": 1262 }, { "epoch": 0.13119351822997818, "grad_norm": 0.33630824089050293, "learning_rate": 9.581295263268684e-05, "loss": 1.6561, "step": 1263 }, { "epoch": 0.13129739274955854, "grad_norm": 0.37690606713294983, "learning_rate": 9.580641399920311e-05, "loss": 1.8836, "step": 1264 }, { "epoch": 0.1314012672691389, "grad_norm": 0.3465617895126343, "learning_rate": 9.579987048769356e-05, "loss": 1.7498, "step": 1265 }, { "epoch": 0.13150514178871922, "grad_norm": 0.3308243155479431, "learning_rate": 9.579332209885508e-05, "loss": 1.6476, "step": 1266 }, { "epoch": 0.13160901630829958, "grad_norm": 0.38376688957214355, "learning_rate": 9.5786768833385e-05, "loss": 1.7795, "step": 1267 }, { "epoch": 0.13171289082787993, "grad_norm": 0.35049548745155334, "learning_rate": 9.578021069198118e-05, "loss": 1.718, "step": 1268 }, { "epoch": 0.13181676534746026, "grad_norm": 0.34886178374290466, "learning_rate": 9.577364767534202e-05, "loss": 1.7214, "step": 1269 }, { "epoch": 0.1319206398670406, "grad_norm": 0.36722105741500854, "learning_rate": 9.576707978416644e-05, "loss": 1.6402, "step": 1270 }, { "epoch": 0.13202451438662097, "grad_norm": 0.4114188849925995, "learning_rate": 9.576050701915385e-05, "loss": 1.8282, "step": 1271 }, { "epoch": 0.13212838890620132, "grad_norm": 0.40800419449806213, "learning_rate": 9.575392938100422e-05, "loss": 2.0049, "step": 1272 }, { "epoch": 0.13223226342578165, "grad_norm": 0.3313359320163727, "learning_rate": 9.5747346870418e-05, "loss": 1.678, "step": 1273 }, { "epoch": 0.132336137945362, "grad_norm": 0.33527714014053345, "learning_rate": 9.574075948809618e-05, "loss": 1.7132, "step": 1274 }, { "epoch": 0.13244001246494236, "grad_norm": 0.37509602308273315, "learning_rate": 9.573416723474026e-05, "loss": 1.7306, "step": 1275 }, { "epoch": 0.13254388698452269, "grad_norm": 0.41604387760162354, "learning_rate": 9.572757011105227e-05, "loss": 1.8486, "step": 1276 }, { "epoch": 0.13264776150410304, "grad_norm": 0.3609203100204468, "learning_rate": 9.572096811773477e-05, "loss": 1.7497, "step": 1277 }, { "epoch": 0.1327516360236834, "grad_norm": 0.346780389547348, "learning_rate": 9.571436125549078e-05, "loss": 1.6892, "step": 1278 }, { "epoch": 0.13285551054326375, "grad_norm": 0.37450289726257324, "learning_rate": 9.57077495250239e-05, "loss": 1.77, "step": 1279 }, { "epoch": 0.13295938506284408, "grad_norm": 0.36386212706565857, "learning_rate": 9.570113292703826e-05, "loss": 1.8936, "step": 1280 }, { "epoch": 0.13306325958242443, "grad_norm": 0.381849080324173, "learning_rate": 9.569451146223843e-05, "loss": 1.7727, "step": 1281 }, { "epoch": 0.13316713410200479, "grad_norm": 0.3608989715576172, "learning_rate": 9.568788513132955e-05, "loss": 1.8763, "step": 1282 }, { "epoch": 0.1332710086215851, "grad_norm": 0.37927472591400146, "learning_rate": 9.568125393501728e-05, "loss": 1.864, "step": 1283 }, { "epoch": 0.13337488314116547, "grad_norm": 0.37271663546562195, "learning_rate": 9.567461787400781e-05, "loss": 1.7884, "step": 1284 }, { "epoch": 0.13347875766074582, "grad_norm": 0.34601160883903503, "learning_rate": 9.566797694900779e-05, "loss": 1.7951, "step": 1285 }, { "epoch": 0.13358263218032618, "grad_norm": 0.32820773124694824, "learning_rate": 9.566133116072447e-05, "loss": 1.602, "step": 1286 }, { "epoch": 0.1336865066999065, "grad_norm": 0.36603811383247375, "learning_rate": 9.565468050986556e-05, "loss": 1.7799, "step": 1287 }, { "epoch": 0.13379038121948686, "grad_norm": 0.33986133337020874, "learning_rate": 9.564802499713928e-05, "loss": 1.7055, "step": 1288 }, { "epoch": 0.13389425573906721, "grad_norm": 0.37927207350730896, "learning_rate": 9.564136462325442e-05, "loss": 1.8803, "step": 1289 }, { "epoch": 0.13399813025864754, "grad_norm": 0.3654598593711853, "learning_rate": 9.563469938892022e-05, "loss": 1.8929, "step": 1290 }, { "epoch": 0.1341020047782279, "grad_norm": 0.38735446333885193, "learning_rate": 9.562802929484651e-05, "loss": 1.6864, "step": 1291 }, { "epoch": 0.13420587929780825, "grad_norm": 0.3329310417175293, "learning_rate": 9.56213543417436e-05, "loss": 1.7724, "step": 1292 }, { "epoch": 0.1343097538173886, "grad_norm": 0.37568461894989014, "learning_rate": 9.561467453032231e-05, "loss": 1.794, "step": 1293 }, { "epoch": 0.13441362833696893, "grad_norm": 0.3253065049648285, "learning_rate": 9.560798986129399e-05, "loss": 1.7469, "step": 1294 }, { "epoch": 0.1345175028565493, "grad_norm": 0.32807695865631104, "learning_rate": 9.56013003353705e-05, "loss": 1.7181, "step": 1295 }, { "epoch": 0.13462137737612964, "grad_norm": 0.34811633825302124, "learning_rate": 9.559460595326424e-05, "loss": 1.6823, "step": 1296 }, { "epoch": 0.13472525189570997, "grad_norm": 0.3585864007472992, "learning_rate": 9.558790671568809e-05, "loss": 1.8669, "step": 1297 }, { "epoch": 0.13482912641529032, "grad_norm": 0.3390277624130249, "learning_rate": 9.558120262335546e-05, "loss": 1.6597, "step": 1298 }, { "epoch": 0.13493300093487068, "grad_norm": 0.3469536006450653, "learning_rate": 9.557449367698032e-05, "loss": 1.8582, "step": 1299 }, { "epoch": 0.13503687545445103, "grad_norm": 0.35477346181869507, "learning_rate": 9.55677798772771e-05, "loss": 1.8483, "step": 1300 }, { "epoch": 0.13514074997403136, "grad_norm": 0.3245786726474762, "learning_rate": 9.556106122496076e-05, "loss": 1.6194, "step": 1301 }, { "epoch": 0.13524462449361171, "grad_norm": 0.37269967794418335, "learning_rate": 9.555433772074678e-05, "loss": 1.8113, "step": 1302 }, { "epoch": 0.13534849901319207, "grad_norm": 0.41088709235191345, "learning_rate": 9.554760936535116e-05, "loss": 1.9416, "step": 1303 }, { "epoch": 0.13545237353277242, "grad_norm": 0.35153427720069885, "learning_rate": 9.554087615949046e-05, "loss": 1.6847, "step": 1304 }, { "epoch": 0.13555624805235275, "grad_norm": 0.38244035840034485, "learning_rate": 9.553413810388165e-05, "loss": 1.8235, "step": 1305 }, { "epoch": 0.1356601225719331, "grad_norm": 0.33529362082481384, "learning_rate": 9.552739519924235e-05, "loss": 1.6378, "step": 1306 }, { "epoch": 0.13576399709151346, "grad_norm": 0.335685133934021, "learning_rate": 9.552064744629054e-05, "loss": 1.8144, "step": 1307 }, { "epoch": 0.1358678716110938, "grad_norm": 0.33851975202560425, "learning_rate": 9.551389484574489e-05, "loss": 1.6885, "step": 1308 }, { "epoch": 0.13597174613067414, "grad_norm": 0.4007343649864197, "learning_rate": 9.550713739832444e-05, "loss": 2.0096, "step": 1309 }, { "epoch": 0.1360756206502545, "grad_norm": 0.3581026494503021, "learning_rate": 9.550037510474883e-05, "loss": 1.7002, "step": 1310 }, { "epoch": 0.13617949516983485, "grad_norm": 0.3608642518520355, "learning_rate": 9.549360796573821e-05, "loss": 1.7883, "step": 1311 }, { "epoch": 0.13628336968941518, "grad_norm": 0.39638829231262207, "learning_rate": 9.548683598201317e-05, "loss": 1.7193, "step": 1312 }, { "epoch": 0.13638724420899553, "grad_norm": 0.3727129399776459, "learning_rate": 9.548005915429495e-05, "loss": 1.8664, "step": 1313 }, { "epoch": 0.1364911187285759, "grad_norm": 0.34268417954444885, "learning_rate": 9.547327748330514e-05, "loss": 1.7827, "step": 1314 }, { "epoch": 0.13659499324815622, "grad_norm": 0.3418048918247223, "learning_rate": 9.5466490969766e-05, "loss": 1.7287, "step": 1315 }, { "epoch": 0.13669886776773657, "grad_norm": 0.3453529477119446, "learning_rate": 9.545969961440025e-05, "loss": 1.7565, "step": 1316 }, { "epoch": 0.13680274228731693, "grad_norm": 0.35973402857780457, "learning_rate": 9.545290341793107e-05, "loss": 1.8727, "step": 1317 }, { "epoch": 0.13690661680689728, "grad_norm": 0.3567400872707367, "learning_rate": 9.544610238108223e-05, "loss": 1.8201, "step": 1318 }, { "epoch": 0.1370104913264776, "grad_norm": 0.37093833088874817, "learning_rate": 9.543929650457796e-05, "loss": 1.8134, "step": 1319 }, { "epoch": 0.13711436584605796, "grad_norm": 0.3393130600452423, "learning_rate": 9.543248578914307e-05, "loss": 1.6867, "step": 1320 }, { "epoch": 0.13721824036563832, "grad_norm": 0.3978770673274994, "learning_rate": 9.542567023550283e-05, "loss": 1.9418, "step": 1321 }, { "epoch": 0.13732211488521864, "grad_norm": 0.33980122208595276, "learning_rate": 9.541884984438304e-05, "loss": 1.6986, "step": 1322 }, { "epoch": 0.137425989404799, "grad_norm": 0.3643237054347992, "learning_rate": 9.541202461651002e-05, "loss": 1.6953, "step": 1323 }, { "epoch": 0.13752986392437935, "grad_norm": 0.3416014313697815, "learning_rate": 9.54051945526106e-05, "loss": 1.6347, "step": 1324 }, { "epoch": 0.1376337384439597, "grad_norm": 0.42336559295654297, "learning_rate": 9.539835965341215e-05, "loss": 2.0321, "step": 1325 }, { "epoch": 0.13773761296354003, "grad_norm": 0.3578283488750458, "learning_rate": 9.539151991964253e-05, "loss": 1.7978, "step": 1326 }, { "epoch": 0.1378414874831204, "grad_norm": 0.39390215277671814, "learning_rate": 9.538467535203007e-05, "loss": 2.0113, "step": 1327 }, { "epoch": 0.13794536200270074, "grad_norm": 0.34902918338775635, "learning_rate": 9.537782595130371e-05, "loss": 1.7936, "step": 1328 }, { "epoch": 0.13804923652228107, "grad_norm": 0.3545171916484833, "learning_rate": 9.537097171819287e-05, "loss": 1.8352, "step": 1329 }, { "epoch": 0.13815311104186143, "grad_norm": 0.41688135266304016, "learning_rate": 9.536411265342742e-05, "loss": 1.9648, "step": 1330 }, { "epoch": 0.13825698556144178, "grad_norm": 0.34818556904792786, "learning_rate": 9.535724875773784e-05, "loss": 1.6804, "step": 1331 }, { "epoch": 0.13836086008102214, "grad_norm": 0.3562292754650116, "learning_rate": 9.535038003185507e-05, "loss": 1.697, "step": 1332 }, { "epoch": 0.13846473460060246, "grad_norm": 0.3660946786403656, "learning_rate": 9.534350647651056e-05, "loss": 1.8382, "step": 1333 }, { "epoch": 0.13856860912018282, "grad_norm": 0.38286975026130676, "learning_rate": 9.533662809243632e-05, "loss": 1.8386, "step": 1334 }, { "epoch": 0.13867248363976317, "grad_norm": 0.3726559281349182, "learning_rate": 9.532974488036481e-05, "loss": 1.701, "step": 1335 }, { "epoch": 0.1387763581593435, "grad_norm": 0.37573710083961487, "learning_rate": 9.532285684102907e-05, "loss": 1.9953, "step": 1336 }, { "epoch": 0.13888023267892385, "grad_norm": 0.36941513419151306, "learning_rate": 9.53159639751626e-05, "loss": 1.7335, "step": 1337 }, { "epoch": 0.1389841071985042, "grad_norm": 0.3815058767795563, "learning_rate": 9.530906628349945e-05, "loss": 1.929, "step": 1338 }, { "epoch": 0.13908798171808456, "grad_norm": 0.3447597026824951, "learning_rate": 9.530216376677417e-05, "loss": 1.7699, "step": 1339 }, { "epoch": 0.1391918562376649, "grad_norm": 0.3957836627960205, "learning_rate": 9.529525642572182e-05, "loss": 1.8501, "step": 1340 }, { "epoch": 0.13929573075724525, "grad_norm": 0.44103363156318665, "learning_rate": 9.528834426107795e-05, "loss": 1.9139, "step": 1341 }, { "epoch": 0.1393996052768256, "grad_norm": 0.3422475755214691, "learning_rate": 9.52814272735787e-05, "loss": 1.7613, "step": 1342 }, { "epoch": 0.13950347979640595, "grad_norm": 0.3645455837249756, "learning_rate": 9.527450546396067e-05, "loss": 1.7897, "step": 1343 }, { "epoch": 0.13960735431598628, "grad_norm": 0.3628111779689789, "learning_rate": 9.526757883296094e-05, "loss": 1.782, "step": 1344 }, { "epoch": 0.13971122883556664, "grad_norm": 0.3505362868309021, "learning_rate": 9.526064738131717e-05, "loss": 1.7047, "step": 1345 }, { "epoch": 0.139815103355147, "grad_norm": 0.36256182193756104, "learning_rate": 9.52537111097675e-05, "loss": 1.8108, "step": 1346 }, { "epoch": 0.13991897787472732, "grad_norm": 0.34468889236450195, "learning_rate": 9.524677001905059e-05, "loss": 1.7674, "step": 1347 }, { "epoch": 0.14002285239430767, "grad_norm": 0.36335834860801697, "learning_rate": 9.52398241099056e-05, "loss": 1.7181, "step": 1348 }, { "epoch": 0.14012672691388803, "grad_norm": 0.3589484393596649, "learning_rate": 9.523287338307224e-05, "loss": 1.8431, "step": 1349 }, { "epoch": 0.14023060143346838, "grad_norm": 0.3601551949977875, "learning_rate": 9.522591783929069e-05, "loss": 1.6761, "step": 1350 }, { "epoch": 0.1403344759530487, "grad_norm": 0.34505337476730347, "learning_rate": 9.521895747930168e-05, "loss": 1.7923, "step": 1351 }, { "epoch": 0.14043835047262906, "grad_norm": 0.3898836374282837, "learning_rate": 9.521199230384641e-05, "loss": 1.9424, "step": 1352 }, { "epoch": 0.14054222499220942, "grad_norm": 0.4122181534767151, "learning_rate": 9.520502231366661e-05, "loss": 1.7709, "step": 1353 }, { "epoch": 0.14064609951178975, "grad_norm": 0.36563369631767273, "learning_rate": 9.519804750950457e-05, "loss": 1.8535, "step": 1354 }, { "epoch": 0.1407499740313701, "grad_norm": 0.3425121009349823, "learning_rate": 9.519106789210301e-05, "loss": 1.6418, "step": 1355 }, { "epoch": 0.14085384855095046, "grad_norm": 0.36973652243614197, "learning_rate": 9.518408346220525e-05, "loss": 1.7506, "step": 1356 }, { "epoch": 0.1409577230705308, "grad_norm": 0.36737698316574097, "learning_rate": 9.517709422055502e-05, "loss": 1.756, "step": 1357 }, { "epoch": 0.14106159759011114, "grad_norm": 0.36824169754981995, "learning_rate": 9.517010016789667e-05, "loss": 1.7849, "step": 1358 }, { "epoch": 0.1411654721096915, "grad_norm": 0.44827938079833984, "learning_rate": 9.516310130497498e-05, "loss": 2.191, "step": 1359 }, { "epoch": 0.14126934662927185, "grad_norm": 0.33879658579826355, "learning_rate": 9.51560976325353e-05, "loss": 1.6468, "step": 1360 }, { "epoch": 0.14137322114885217, "grad_norm": 0.3727847933769226, "learning_rate": 9.514908915132346e-05, "loss": 1.5898, "step": 1361 }, { "epoch": 0.14147709566843253, "grad_norm": 0.34985223412513733, "learning_rate": 9.514207586208578e-05, "loss": 1.5243, "step": 1362 }, { "epoch": 0.14158097018801288, "grad_norm": 0.39218172430992126, "learning_rate": 9.513505776556915e-05, "loss": 1.8003, "step": 1363 }, { "epoch": 0.14168484470759324, "grad_norm": 0.3964877426624298, "learning_rate": 9.512803486252093e-05, "loss": 1.7633, "step": 1364 }, { "epoch": 0.14178871922717357, "grad_norm": 0.3854914903640747, "learning_rate": 9.512100715368902e-05, "loss": 1.8179, "step": 1365 }, { "epoch": 0.14189259374675392, "grad_norm": 0.35278111696243286, "learning_rate": 9.511397463982181e-05, "loss": 1.8105, "step": 1366 }, { "epoch": 0.14199646826633427, "grad_norm": 0.45402637124061584, "learning_rate": 9.510693732166819e-05, "loss": 1.9222, "step": 1367 }, { "epoch": 0.1421003427859146, "grad_norm": 0.3488807678222656, "learning_rate": 9.50998951999776e-05, "loss": 1.6662, "step": 1368 }, { "epoch": 0.14220421730549496, "grad_norm": 0.3388180136680603, "learning_rate": 9.509284827549996e-05, "loss": 1.6714, "step": 1369 }, { "epoch": 0.1423080918250753, "grad_norm": 0.36008358001708984, "learning_rate": 9.508579654898571e-05, "loss": 1.8606, "step": 1370 }, { "epoch": 0.14241196634465567, "grad_norm": 0.37350037693977356, "learning_rate": 9.507874002118582e-05, "loss": 1.8999, "step": 1371 }, { "epoch": 0.142515840864236, "grad_norm": 0.3648621737957001, "learning_rate": 9.507167869285174e-05, "loss": 1.9342, "step": 1372 }, { "epoch": 0.14261971538381635, "grad_norm": 0.3702144920825958, "learning_rate": 9.506461256473545e-05, "loss": 1.8062, "step": 1373 }, { "epoch": 0.1427235899033967, "grad_norm": 0.37488213181495667, "learning_rate": 9.505754163758946e-05, "loss": 1.8751, "step": 1374 }, { "epoch": 0.14282746442297706, "grad_norm": 0.37895387411117554, "learning_rate": 9.505046591216674e-05, "loss": 1.6707, "step": 1375 }, { "epoch": 0.14293133894255738, "grad_norm": 0.3528745174407959, "learning_rate": 9.504338538922078e-05, "loss": 1.8434, "step": 1376 }, { "epoch": 0.14303521346213774, "grad_norm": 0.3752725422382355, "learning_rate": 9.503630006950566e-05, "loss": 1.9281, "step": 1377 }, { "epoch": 0.1431390879817181, "grad_norm": 0.38882383704185486, "learning_rate": 9.502920995377587e-05, "loss": 1.6748, "step": 1378 }, { "epoch": 0.14324296250129842, "grad_norm": 0.3682365417480469, "learning_rate": 9.502211504278644e-05, "loss": 1.767, "step": 1379 }, { "epoch": 0.14334683702087878, "grad_norm": 0.339139848947525, "learning_rate": 9.501501533729297e-05, "loss": 1.5963, "step": 1380 }, { "epoch": 0.14345071154045913, "grad_norm": 0.3614344298839569, "learning_rate": 9.50079108380515e-05, "loss": 1.7736, "step": 1381 }, { "epoch": 0.14355458606003949, "grad_norm": 0.33216235041618347, "learning_rate": 9.500080154581859e-05, "loss": 1.8277, "step": 1382 }, { "epoch": 0.1436584605796198, "grad_norm": 0.3475283086299896, "learning_rate": 9.499368746135133e-05, "loss": 1.7244, "step": 1383 }, { "epoch": 0.14376233509920017, "grad_norm": 0.35839635133743286, "learning_rate": 9.498656858540731e-05, "loss": 1.8232, "step": 1384 }, { "epoch": 0.14386620961878052, "grad_norm": 0.37137824296951294, "learning_rate": 9.497944491874467e-05, "loss": 1.7515, "step": 1385 }, { "epoch": 0.14397008413836085, "grad_norm": 0.35471752285957336, "learning_rate": 9.4972316462122e-05, "loss": 1.8021, "step": 1386 }, { "epoch": 0.1440739586579412, "grad_norm": 0.3838224411010742, "learning_rate": 9.49651832162984e-05, "loss": 1.8813, "step": 1387 }, { "epoch": 0.14417783317752156, "grad_norm": 0.32974106073379517, "learning_rate": 9.495804518203355e-05, "loss": 1.7155, "step": 1388 }, { "epoch": 0.1442817076971019, "grad_norm": 0.35001546144485474, "learning_rate": 9.495090236008756e-05, "loss": 1.8771, "step": 1389 }, { "epoch": 0.14438558221668224, "grad_norm": 0.37329480051994324, "learning_rate": 9.49437547512211e-05, "loss": 1.894, "step": 1390 }, { "epoch": 0.1444894567362626, "grad_norm": 0.35633528232574463, "learning_rate": 9.493660235619535e-05, "loss": 1.7673, "step": 1391 }, { "epoch": 0.14459333125584295, "grad_norm": 0.37964895367622375, "learning_rate": 9.492944517577196e-05, "loss": 2.0935, "step": 1392 }, { "epoch": 0.14469720577542328, "grad_norm": 0.33758020401000977, "learning_rate": 9.492228321071311e-05, "loss": 1.6482, "step": 1393 }, { "epoch": 0.14480108029500363, "grad_norm": 0.39080965518951416, "learning_rate": 9.491511646178151e-05, "loss": 1.6859, "step": 1394 }, { "epoch": 0.14490495481458399, "grad_norm": 0.36754679679870605, "learning_rate": 9.490794492974038e-05, "loss": 1.6268, "step": 1395 }, { "epoch": 0.14500882933416434, "grad_norm": 0.33904823660850525, "learning_rate": 9.490076861535339e-05, "loss": 1.8217, "step": 1396 }, { "epoch": 0.14511270385374467, "grad_norm": 0.36078742146492004, "learning_rate": 9.489358751938477e-05, "loss": 1.7979, "step": 1397 }, { "epoch": 0.14521657837332502, "grad_norm": 0.37665989995002747, "learning_rate": 9.488640164259929e-05, "loss": 1.7041, "step": 1398 }, { "epoch": 0.14532045289290538, "grad_norm": 0.3702797591686249, "learning_rate": 9.487921098576216e-05, "loss": 1.8746, "step": 1399 }, { "epoch": 0.1454243274124857, "grad_norm": 0.3909272253513336, "learning_rate": 9.487201554963911e-05, "loss": 1.9115, "step": 1400 }, { "epoch": 0.14552820193206606, "grad_norm": 0.33725446462631226, "learning_rate": 9.486481533499642e-05, "loss": 1.6462, "step": 1401 }, { "epoch": 0.14563207645164641, "grad_norm": 0.3404362201690674, "learning_rate": 9.485761034260087e-05, "loss": 1.5963, "step": 1402 }, { "epoch": 0.14573595097122677, "grad_norm": 0.3509582281112671, "learning_rate": 9.485040057321972e-05, "loss": 1.7931, "step": 1403 }, { "epoch": 0.1458398254908071, "grad_norm": 0.3783257007598877, "learning_rate": 9.484318602762077e-05, "loss": 2.0324, "step": 1404 }, { "epoch": 0.14594370001038745, "grad_norm": 0.36144283413887024, "learning_rate": 9.483596670657228e-05, "loss": 1.5949, "step": 1405 }, { "epoch": 0.1460475745299678, "grad_norm": 0.33135420083999634, "learning_rate": 9.482874261084307e-05, "loss": 1.6486, "step": 1406 }, { "epoch": 0.14615144904954813, "grad_norm": 0.35105010867118835, "learning_rate": 9.482151374120244e-05, "loss": 1.7674, "step": 1407 }, { "epoch": 0.1462553235691285, "grad_norm": 0.3350829780101776, "learning_rate": 9.481428009842024e-05, "loss": 1.6668, "step": 1408 }, { "epoch": 0.14635919808870884, "grad_norm": 0.3917206823825836, "learning_rate": 9.480704168326676e-05, "loss": 1.7319, "step": 1409 }, { "epoch": 0.1464630726082892, "grad_norm": 0.35552677512168884, "learning_rate": 9.479979849651286e-05, "loss": 1.7501, "step": 1410 }, { "epoch": 0.14656694712786952, "grad_norm": 0.3432137072086334, "learning_rate": 9.479255053892986e-05, "loss": 1.7739, "step": 1411 }, { "epoch": 0.14667082164744988, "grad_norm": 0.37724825739860535, "learning_rate": 9.478529781128962e-05, "loss": 1.8712, "step": 1412 }, { "epoch": 0.14677469616703023, "grad_norm": 0.37483900785446167, "learning_rate": 9.477804031436451e-05, "loss": 1.7262, "step": 1413 }, { "epoch": 0.1468785706866106, "grad_norm": 0.35302111506462097, "learning_rate": 9.47707780489274e-05, "loss": 1.68, "step": 1414 }, { "epoch": 0.14698244520619091, "grad_norm": 0.3883351683616638, "learning_rate": 9.476351101575163e-05, "loss": 1.7192, "step": 1415 }, { "epoch": 0.14708631972577127, "grad_norm": 0.3823285400867462, "learning_rate": 9.475623921561112e-05, "loss": 1.8355, "step": 1416 }, { "epoch": 0.14719019424535162, "grad_norm": 0.3662102520465851, "learning_rate": 9.474896264928024e-05, "loss": 1.5081, "step": 1417 }, { "epoch": 0.14729406876493195, "grad_norm": 0.35046282410621643, "learning_rate": 9.47416813175339e-05, "loss": 1.7609, "step": 1418 }, { "epoch": 0.1473979432845123, "grad_norm": 0.37491992115974426, "learning_rate": 9.47343952211475e-05, "loss": 1.9254, "step": 1419 }, { "epoch": 0.14750181780409266, "grad_norm": 0.38457345962524414, "learning_rate": 9.472710436089693e-05, "loss": 1.8976, "step": 1420 }, { "epoch": 0.14760569232367302, "grad_norm": 0.39856263995170593, "learning_rate": 9.471980873755865e-05, "loss": 1.989, "step": 1421 }, { "epoch": 0.14770956684325334, "grad_norm": 0.3745686113834381, "learning_rate": 9.471250835190957e-05, "loss": 1.7134, "step": 1422 }, { "epoch": 0.1478134413628337, "grad_norm": 0.360574871301651, "learning_rate": 9.470520320472712e-05, "loss": 1.722, "step": 1423 }, { "epoch": 0.14791731588241405, "grad_norm": 0.35429859161376953, "learning_rate": 9.469789329678922e-05, "loss": 1.5907, "step": 1424 }, { "epoch": 0.14802119040199438, "grad_norm": 0.3642517030239105, "learning_rate": 9.469057862887436e-05, "loss": 1.7298, "step": 1425 }, { "epoch": 0.14812506492157473, "grad_norm": 0.4171449840068817, "learning_rate": 9.468325920176148e-05, "loss": 1.9391, "step": 1426 }, { "epoch": 0.1482289394411551, "grad_norm": 0.3378536105155945, "learning_rate": 9.467593501623003e-05, "loss": 1.7072, "step": 1427 }, { "epoch": 0.14833281396073544, "grad_norm": 0.37104731798171997, "learning_rate": 9.466860607305998e-05, "loss": 1.7929, "step": 1428 }, { "epoch": 0.14843668848031577, "grad_norm": 0.38163742423057556, "learning_rate": 9.466127237303182e-05, "loss": 1.8288, "step": 1429 }, { "epoch": 0.14854056299989613, "grad_norm": 0.3787825107574463, "learning_rate": 9.465393391692653e-05, "loss": 1.7313, "step": 1430 }, { "epoch": 0.14864443751947648, "grad_norm": 0.3504464626312256, "learning_rate": 9.464659070552558e-05, "loss": 1.7905, "step": 1431 }, { "epoch": 0.1487483120390568, "grad_norm": 0.3874281346797943, "learning_rate": 9.463924273961099e-05, "loss": 1.8856, "step": 1432 }, { "epoch": 0.14885218655863716, "grad_norm": 0.35603591799736023, "learning_rate": 9.463189001996525e-05, "loss": 1.7671, "step": 1433 }, { "epoch": 0.14895606107821752, "grad_norm": 0.37857985496520996, "learning_rate": 9.462453254737133e-05, "loss": 1.7791, "step": 1434 }, { "epoch": 0.14905993559779787, "grad_norm": 0.3580288290977478, "learning_rate": 9.461717032261282e-05, "loss": 1.6404, "step": 1435 }, { "epoch": 0.1491638101173782, "grad_norm": 0.38064590096473694, "learning_rate": 9.460980334647367e-05, "loss": 1.8078, "step": 1436 }, { "epoch": 0.14926768463695855, "grad_norm": 0.3878273069858551, "learning_rate": 9.460243161973846e-05, "loss": 1.6382, "step": 1437 }, { "epoch": 0.1493715591565389, "grad_norm": 0.3887486457824707, "learning_rate": 9.459505514319216e-05, "loss": 1.8884, "step": 1438 }, { "epoch": 0.14947543367611923, "grad_norm": 0.42063435912132263, "learning_rate": 9.458767391762038e-05, "loss": 1.9866, "step": 1439 }, { "epoch": 0.1495793081956996, "grad_norm": 0.39019012451171875, "learning_rate": 9.45802879438091e-05, "loss": 1.8562, "step": 1440 }, { "epoch": 0.14968318271527994, "grad_norm": 0.36793825030326843, "learning_rate": 9.457289722254489e-05, "loss": 1.8974, "step": 1441 }, { "epoch": 0.1497870572348603, "grad_norm": 0.37522581219673157, "learning_rate": 9.456550175461483e-05, "loss": 1.878, "step": 1442 }, { "epoch": 0.14989093175444063, "grad_norm": 0.3458879590034485, "learning_rate": 9.455810154080644e-05, "loss": 1.7597, "step": 1443 }, { "epoch": 0.14999480627402098, "grad_norm": 0.3603193759918213, "learning_rate": 9.45506965819078e-05, "loss": 1.7119, "step": 1444 }, { "epoch": 0.15009868079360134, "grad_norm": 0.35782983899116516, "learning_rate": 9.45432868787075e-05, "loss": 1.7925, "step": 1445 }, { "epoch": 0.1502025553131817, "grad_norm": 0.3702535331249237, "learning_rate": 9.453587243199458e-05, "loss": 1.8274, "step": 1446 }, { "epoch": 0.15030642983276202, "grad_norm": 0.37376201152801514, "learning_rate": 9.452845324255865e-05, "loss": 1.6782, "step": 1447 }, { "epoch": 0.15041030435234237, "grad_norm": 0.3699168264865875, "learning_rate": 9.452102931118979e-05, "loss": 1.8381, "step": 1448 }, { "epoch": 0.15051417887192273, "grad_norm": 0.36657705903053284, "learning_rate": 9.451360063867857e-05, "loss": 1.8949, "step": 1449 }, { "epoch": 0.15061805339150305, "grad_norm": 0.35056763887405396, "learning_rate": 9.450616722581611e-05, "loss": 1.7087, "step": 1450 }, { "epoch": 0.1507219279110834, "grad_norm": 0.33786895871162415, "learning_rate": 9.449872907339401e-05, "loss": 1.7222, "step": 1451 }, { "epoch": 0.15082580243066376, "grad_norm": 0.3312149941921234, "learning_rate": 9.449128618220436e-05, "loss": 1.7871, "step": 1452 }, { "epoch": 0.15092967695024412, "grad_norm": 0.3575022518634796, "learning_rate": 9.448383855303977e-05, "loss": 1.7017, "step": 1453 }, { "epoch": 0.15103355146982445, "grad_norm": 0.35011738538742065, "learning_rate": 9.447638618669336e-05, "loss": 1.9229, "step": 1454 }, { "epoch": 0.1511374259894048, "grad_norm": 0.37051519751548767, "learning_rate": 9.446892908395877e-05, "loss": 1.8912, "step": 1455 }, { "epoch": 0.15124130050898515, "grad_norm": 0.3656577169895172, "learning_rate": 9.446146724563009e-05, "loss": 1.8665, "step": 1456 }, { "epoch": 0.15134517502856548, "grad_norm": 0.34047558903694153, "learning_rate": 9.445400067250196e-05, "loss": 1.6188, "step": 1457 }, { "epoch": 0.15144904954814584, "grad_norm": 0.3526557385921478, "learning_rate": 9.44465293653695e-05, "loss": 1.7484, "step": 1458 }, { "epoch": 0.1515529240677262, "grad_norm": 0.3618306815624237, "learning_rate": 9.443905332502837e-05, "loss": 1.6957, "step": 1459 }, { "epoch": 0.15165679858730655, "grad_norm": 0.39453551173210144, "learning_rate": 9.443157255227472e-05, "loss": 1.866, "step": 1460 }, { "epoch": 0.15176067310688687, "grad_norm": 0.3841235637664795, "learning_rate": 9.442408704790516e-05, "loss": 1.8885, "step": 1461 }, { "epoch": 0.15186454762646723, "grad_norm": 0.3559393286705017, "learning_rate": 9.441659681271684e-05, "loss": 1.5994, "step": 1462 }, { "epoch": 0.15196842214604758, "grad_norm": 0.3648277521133423, "learning_rate": 9.440910184750743e-05, "loss": 1.741, "step": 1463 }, { "epoch": 0.1520722966656279, "grad_norm": 0.35485556721687317, "learning_rate": 9.440160215307508e-05, "loss": 1.7756, "step": 1464 }, { "epoch": 0.15217617118520826, "grad_norm": 0.35386818647384644, "learning_rate": 9.439409773021844e-05, "loss": 1.877, "step": 1465 }, { "epoch": 0.15228004570478862, "grad_norm": 0.3678518235683441, "learning_rate": 9.438658857973668e-05, "loss": 1.7585, "step": 1466 }, { "epoch": 0.15238392022436897, "grad_norm": 0.36395224928855896, "learning_rate": 9.437907470242946e-05, "loss": 1.7805, "step": 1467 }, { "epoch": 0.1524877947439493, "grad_norm": 0.3579014539718628, "learning_rate": 9.437155609909696e-05, "loss": 1.7271, "step": 1468 }, { "epoch": 0.15259166926352966, "grad_norm": 0.3654552102088928, "learning_rate": 9.436403277053984e-05, "loss": 1.858, "step": 1469 }, { "epoch": 0.15269554378311, "grad_norm": 0.3869788646697998, "learning_rate": 9.435650471755931e-05, "loss": 1.8507, "step": 1470 }, { "epoch": 0.15279941830269034, "grad_norm": 0.3339064121246338, "learning_rate": 9.4348971940957e-05, "loss": 1.7086, "step": 1471 }, { "epoch": 0.1529032928222707, "grad_norm": 0.3778363764286041, "learning_rate": 9.434143444153511e-05, "loss": 1.936, "step": 1472 }, { "epoch": 0.15300716734185105, "grad_norm": 0.4051001965999603, "learning_rate": 9.433389222009633e-05, "loss": 1.7736, "step": 1473 }, { "epoch": 0.1531110418614314, "grad_norm": 0.35207393765449524, "learning_rate": 9.432634527744386e-05, "loss": 1.7176, "step": 1474 }, { "epoch": 0.15321491638101173, "grad_norm": 0.3851469159126282, "learning_rate": 9.431879361438137e-05, "loss": 1.9448, "step": 1475 }, { "epoch": 0.15331879090059208, "grad_norm": 0.35818904638290405, "learning_rate": 9.431123723171305e-05, "loss": 1.7773, "step": 1476 }, { "epoch": 0.15342266542017244, "grad_norm": 0.37601613998413086, "learning_rate": 9.430367613024361e-05, "loss": 1.9316, "step": 1477 }, { "epoch": 0.15352653993975277, "grad_norm": 0.3351070284843445, "learning_rate": 9.429611031077825e-05, "loss": 1.5443, "step": 1478 }, { "epoch": 0.15363041445933312, "grad_norm": 0.4057861566543579, "learning_rate": 9.428853977412266e-05, "loss": 1.8619, "step": 1479 }, { "epoch": 0.15373428897891347, "grad_norm": 0.3662700057029724, "learning_rate": 9.428096452108305e-05, "loss": 1.7752, "step": 1480 }, { "epoch": 0.15383816349849383, "grad_norm": 0.38205060362815857, "learning_rate": 9.427338455246612e-05, "loss": 1.7397, "step": 1481 }, { "epoch": 0.15394203801807416, "grad_norm": 0.3742862641811371, "learning_rate": 9.426579986907909e-05, "loss": 1.7385, "step": 1482 }, { "epoch": 0.1540459125376545, "grad_norm": 0.37825533747673035, "learning_rate": 9.425821047172965e-05, "loss": 1.8288, "step": 1483 }, { "epoch": 0.15414978705723487, "grad_norm": 0.42222508788108826, "learning_rate": 9.425061636122603e-05, "loss": 1.9567, "step": 1484 }, { "epoch": 0.15425366157681522, "grad_norm": 0.3459799885749817, "learning_rate": 9.424301753837692e-05, "loss": 1.7383, "step": 1485 }, { "epoch": 0.15435753609639555, "grad_norm": 0.38764312863349915, "learning_rate": 9.423541400399157e-05, "loss": 1.8508, "step": 1486 }, { "epoch": 0.1544614106159759, "grad_norm": 0.3998440206050873, "learning_rate": 9.422780575887967e-05, "loss": 1.9329, "step": 1487 }, { "epoch": 0.15456528513555626, "grad_norm": 0.365713894367218, "learning_rate": 9.422019280385145e-05, "loss": 1.9418, "step": 1488 }, { "epoch": 0.15466915965513658, "grad_norm": 0.3910145163536072, "learning_rate": 9.421257513971762e-05, "loss": 1.8998, "step": 1489 }, { "epoch": 0.15477303417471694, "grad_norm": 0.3563111424446106, "learning_rate": 9.420495276728943e-05, "loss": 1.8042, "step": 1490 }, { "epoch": 0.1548769086942973, "grad_norm": 0.3810329735279083, "learning_rate": 9.419732568737856e-05, "loss": 1.9044, "step": 1491 }, { "epoch": 0.15498078321387765, "grad_norm": 0.4031761884689331, "learning_rate": 9.418969390079726e-05, "loss": 2.098, "step": 1492 }, { "epoch": 0.15508465773345798, "grad_norm": 0.3679083287715912, "learning_rate": 9.418205740835825e-05, "loss": 1.7396, "step": 1493 }, { "epoch": 0.15518853225303833, "grad_norm": 0.35544419288635254, "learning_rate": 9.417441621087476e-05, "loss": 1.7114, "step": 1494 }, { "epoch": 0.15529240677261869, "grad_norm": 0.36036983132362366, "learning_rate": 9.416677030916053e-05, "loss": 1.828, "step": 1495 }, { "epoch": 0.155396281292199, "grad_norm": 0.34864529967308044, "learning_rate": 9.415911970402976e-05, "loss": 1.7821, "step": 1496 }, { "epoch": 0.15550015581177937, "grad_norm": 0.4014929533004761, "learning_rate": 9.41514643962972e-05, "loss": 1.8746, "step": 1497 }, { "epoch": 0.15560403033135972, "grad_norm": 0.39249807596206665, "learning_rate": 9.414380438677808e-05, "loss": 1.9818, "step": 1498 }, { "epoch": 0.15570790485094008, "grad_norm": 0.33457159996032715, "learning_rate": 9.413613967628814e-05, "loss": 1.7178, "step": 1499 }, { "epoch": 0.1558117793705204, "grad_norm": 0.326725572347641, "learning_rate": 9.412847026564357e-05, "loss": 1.5971, "step": 1500 }, { "epoch": 0.15591565389010076, "grad_norm": 0.36268436908721924, "learning_rate": 9.412079615566116e-05, "loss": 1.8022, "step": 1501 }, { "epoch": 0.1560195284096811, "grad_norm": 0.3470126986503601, "learning_rate": 9.411311734715811e-05, "loss": 1.8473, "step": 1502 }, { "epoch": 0.15612340292926144, "grad_norm": 0.3726639747619629, "learning_rate": 9.410543384095214e-05, "loss": 1.7718, "step": 1503 }, { "epoch": 0.1562272774488418, "grad_norm": 0.37733760476112366, "learning_rate": 9.409774563786152e-05, "loss": 1.8614, "step": 1504 }, { "epoch": 0.15633115196842215, "grad_norm": 0.4124131202697754, "learning_rate": 9.409005273870495e-05, "loss": 2.1187, "step": 1505 }, { "epoch": 0.1564350264880025, "grad_norm": 0.3965660631656647, "learning_rate": 9.40823551443017e-05, "loss": 1.7244, "step": 1506 }, { "epoch": 0.15653890100758283, "grad_norm": 0.390770822763443, "learning_rate": 9.407465285547147e-05, "loss": 1.8046, "step": 1507 }, { "epoch": 0.15664277552716319, "grad_norm": 0.369103342294693, "learning_rate": 9.406694587303451e-05, "loss": 1.7574, "step": 1508 }, { "epoch": 0.15674665004674354, "grad_norm": 0.38057541847229004, "learning_rate": 9.405923419781154e-05, "loss": 1.8895, "step": 1509 }, { "epoch": 0.15685052456632387, "grad_norm": 0.3767298460006714, "learning_rate": 9.405151783062381e-05, "loss": 1.6415, "step": 1510 }, { "epoch": 0.15695439908590422, "grad_norm": 0.35092034935951233, "learning_rate": 9.404379677229305e-05, "loss": 1.7707, "step": 1511 }, { "epoch": 0.15705827360548458, "grad_norm": 0.3567756712436676, "learning_rate": 9.403607102364148e-05, "loss": 1.7814, "step": 1512 }, { "epoch": 0.15716214812506493, "grad_norm": 0.38232964277267456, "learning_rate": 9.402834058549184e-05, "loss": 2.0017, "step": 1513 }, { "epoch": 0.15726602264464526, "grad_norm": 0.35852310061454773, "learning_rate": 9.402060545866737e-05, "loss": 1.8552, "step": 1514 }, { "epoch": 0.15736989716422561, "grad_norm": 0.3555811047554016, "learning_rate": 9.401286564399178e-05, "loss": 1.8447, "step": 1515 }, { "epoch": 0.15747377168380597, "grad_norm": 0.33671462535858154, "learning_rate": 9.400512114228935e-05, "loss": 1.6783, "step": 1516 }, { "epoch": 0.1575776462033863, "grad_norm": 0.3423287570476532, "learning_rate": 9.399737195438473e-05, "loss": 1.7901, "step": 1517 }, { "epoch": 0.15768152072296665, "grad_norm": 0.3938925564289093, "learning_rate": 9.398961808110321e-05, "loss": 1.6647, "step": 1518 }, { "epoch": 0.157785395242547, "grad_norm": 0.366240918636322, "learning_rate": 9.398185952327048e-05, "loss": 1.7797, "step": 1519 }, { "epoch": 0.15788926976212736, "grad_norm": 0.3558516204357147, "learning_rate": 9.397409628171279e-05, "loss": 1.6302, "step": 1520 }, { "epoch": 0.1579931442817077, "grad_norm": 0.3891289234161377, "learning_rate": 9.396632835725687e-05, "loss": 1.9296, "step": 1521 }, { "epoch": 0.15809701880128804, "grad_norm": 0.4402526319026947, "learning_rate": 9.395855575072991e-05, "loss": 1.9748, "step": 1522 }, { "epoch": 0.1582008933208684, "grad_norm": 0.3928702473640442, "learning_rate": 9.395077846295967e-05, "loss": 1.9662, "step": 1523 }, { "epoch": 0.15830476784044875, "grad_norm": 0.36685439944267273, "learning_rate": 9.394299649477434e-05, "loss": 1.7051, "step": 1524 }, { "epoch": 0.15840864236002908, "grad_norm": 0.36427903175354004, "learning_rate": 9.393520984700266e-05, "loss": 1.8365, "step": 1525 }, { "epoch": 0.15851251687960943, "grad_norm": 0.3728634715080261, "learning_rate": 9.392741852047385e-05, "loss": 1.8812, "step": 1526 }, { "epoch": 0.1586163913991898, "grad_norm": 0.3916126489639282, "learning_rate": 9.391962251601758e-05, "loss": 1.8, "step": 1527 }, { "epoch": 0.15872026591877011, "grad_norm": 0.3563532531261444, "learning_rate": 9.391182183446413e-05, "loss": 1.5527, "step": 1528 }, { "epoch": 0.15882414043835047, "grad_norm": 0.3756893277168274, "learning_rate": 9.390401647664417e-05, "loss": 1.9147, "step": 1529 }, { "epoch": 0.15892801495793082, "grad_norm": 0.35163572430610657, "learning_rate": 9.389620644338892e-05, "loss": 1.7478, "step": 1530 }, { "epoch": 0.15903188947751118, "grad_norm": 0.3400569260120392, "learning_rate": 9.388839173553007e-05, "loss": 1.5696, "step": 1531 }, { "epoch": 0.1591357639970915, "grad_norm": 0.3381504416465759, "learning_rate": 9.388057235389986e-05, "loss": 1.6998, "step": 1532 }, { "epoch": 0.15923963851667186, "grad_norm": 0.35516080260276794, "learning_rate": 9.3872748299331e-05, "loss": 1.8103, "step": 1533 }, { "epoch": 0.15934351303625222, "grad_norm": 0.3808714747428894, "learning_rate": 9.386491957265662e-05, "loss": 1.7707, "step": 1534 }, { "epoch": 0.15944738755583254, "grad_norm": 0.3629775941371918, "learning_rate": 9.38570861747105e-05, "loss": 1.8373, "step": 1535 }, { "epoch": 0.1595512620754129, "grad_norm": 0.340930700302124, "learning_rate": 9.384924810632679e-05, "loss": 1.7374, "step": 1536 }, { "epoch": 0.15965513659499325, "grad_norm": 0.40548157691955566, "learning_rate": 9.384140536834018e-05, "loss": 2.0285, "step": 1537 }, { "epoch": 0.1597590111145736, "grad_norm": 0.39659062027931213, "learning_rate": 9.383355796158588e-05, "loss": 1.8455, "step": 1538 }, { "epoch": 0.15986288563415393, "grad_norm": 0.35021933913230896, "learning_rate": 9.382570588689958e-05, "loss": 1.8248, "step": 1539 }, { "epoch": 0.1599667601537343, "grad_norm": 0.39401671290397644, "learning_rate": 9.381784914511746e-05, "loss": 2.1518, "step": 1540 }, { "epoch": 0.16007063467331464, "grad_norm": 0.3451925814151764, "learning_rate": 9.38099877370762e-05, "loss": 1.7234, "step": 1541 }, { "epoch": 0.16017450919289497, "grad_norm": 0.3629453778266907, "learning_rate": 9.380212166361298e-05, "loss": 1.7744, "step": 1542 }, { "epoch": 0.16027838371247533, "grad_norm": 0.4119042456150055, "learning_rate": 9.379425092556545e-05, "loss": 2.0752, "step": 1543 }, { "epoch": 0.16038225823205568, "grad_norm": 0.347231388092041, "learning_rate": 9.378637552377181e-05, "loss": 1.7362, "step": 1544 }, { "epoch": 0.16048613275163603, "grad_norm": 0.4104901850223541, "learning_rate": 9.377849545907074e-05, "loss": 1.9631, "step": 1545 }, { "epoch": 0.16059000727121636, "grad_norm": 0.396040141582489, "learning_rate": 9.377061073230138e-05, "loss": 1.955, "step": 1546 }, { "epoch": 0.16069388179079672, "grad_norm": 0.3335644602775574, "learning_rate": 9.37627213443034e-05, "loss": 1.5817, "step": 1547 }, { "epoch": 0.16079775631037707, "grad_norm": 0.3619517385959625, "learning_rate": 9.375482729591696e-05, "loss": 1.7563, "step": 1548 }, { "epoch": 0.1609016308299574, "grad_norm": 0.35229092836380005, "learning_rate": 9.37469285879827e-05, "loss": 1.7638, "step": 1549 }, { "epoch": 0.16100550534953775, "grad_norm": 0.36668309569358826, "learning_rate": 9.373902522134181e-05, "loss": 1.6821, "step": 1550 }, { "epoch": 0.1611093798691181, "grad_norm": 0.38943883776664734, "learning_rate": 9.37311171968359e-05, "loss": 1.6423, "step": 1551 }, { "epoch": 0.16121325438869846, "grad_norm": 0.35452452301979065, "learning_rate": 9.372320451530711e-05, "loss": 1.7089, "step": 1552 }, { "epoch": 0.1613171289082788, "grad_norm": 0.39926689863204956, "learning_rate": 9.371528717759811e-05, "loss": 1.801, "step": 1553 }, { "epoch": 0.16142100342785914, "grad_norm": 0.37590524554252625, "learning_rate": 9.370736518455203e-05, "loss": 1.7258, "step": 1554 }, { "epoch": 0.1615248779474395, "grad_norm": 0.35222113132476807, "learning_rate": 9.369943853701246e-05, "loss": 1.8522, "step": 1555 }, { "epoch": 0.16162875246701985, "grad_norm": 0.35563012957572937, "learning_rate": 9.369150723582358e-05, "loss": 1.6075, "step": 1556 }, { "epoch": 0.16173262698660018, "grad_norm": 0.39180493354797363, "learning_rate": 9.368357128182998e-05, "loss": 1.7797, "step": 1557 }, { "epoch": 0.16183650150618054, "grad_norm": 0.3635503053665161, "learning_rate": 9.367563067587677e-05, "loss": 1.8579, "step": 1558 }, { "epoch": 0.1619403760257609, "grad_norm": 0.3636340796947479, "learning_rate": 9.366768541880958e-05, "loss": 1.708, "step": 1559 }, { "epoch": 0.16204425054534122, "grad_norm": 0.39017453789711, "learning_rate": 9.365973551147453e-05, "loss": 1.8155, "step": 1560 }, { "epoch": 0.16214812506492157, "grad_norm": 0.3406469225883484, "learning_rate": 9.365178095471819e-05, "loss": 1.7573, "step": 1561 }, { "epoch": 0.16225199958450193, "grad_norm": 0.36579304933547974, "learning_rate": 9.364382174938768e-05, "loss": 1.898, "step": 1562 }, { "epoch": 0.16235587410408228, "grad_norm": 0.3369811773300171, "learning_rate": 9.363585789633057e-05, "loss": 1.6292, "step": 1563 }, { "epoch": 0.1624597486236626, "grad_norm": 0.3355672061443329, "learning_rate": 9.3627889396395e-05, "loss": 1.6484, "step": 1564 }, { "epoch": 0.16256362314324296, "grad_norm": 0.37393924593925476, "learning_rate": 9.36199162504295e-05, "loss": 1.7915, "step": 1565 }, { "epoch": 0.16266749766282332, "grad_norm": 0.37588125467300415, "learning_rate": 9.361193845928318e-05, "loss": 1.6923, "step": 1566 }, { "epoch": 0.16277137218240365, "grad_norm": 0.4559624493122101, "learning_rate": 9.360395602380559e-05, "loss": 1.8842, "step": 1567 }, { "epoch": 0.162875246701984, "grad_norm": 0.39234891533851624, "learning_rate": 9.359596894484682e-05, "loss": 1.9783, "step": 1568 }, { "epoch": 0.16297912122156435, "grad_norm": 0.36912286281585693, "learning_rate": 9.35879772232574e-05, "loss": 1.8153, "step": 1569 }, { "epoch": 0.1630829957411447, "grad_norm": 0.3693285584449768, "learning_rate": 9.357998085988842e-05, "loss": 1.5263, "step": 1570 }, { "epoch": 0.16318687026072504, "grad_norm": 0.38860028982162476, "learning_rate": 9.35719798555914e-05, "loss": 1.6806, "step": 1571 }, { "epoch": 0.1632907447803054, "grad_norm": 0.401967853307724, "learning_rate": 9.356397421121842e-05, "loss": 1.855, "step": 1572 }, { "epoch": 0.16339461929988575, "grad_norm": 0.35193613171577454, "learning_rate": 9.355596392762197e-05, "loss": 1.8909, "step": 1573 }, { "epoch": 0.16349849381946607, "grad_norm": 0.38218873739242554, "learning_rate": 9.354794900565515e-05, "loss": 1.874, "step": 1574 }, { "epoch": 0.16360236833904643, "grad_norm": 0.3593958020210266, "learning_rate": 9.353992944617142e-05, "loss": 1.6768, "step": 1575 }, { "epoch": 0.16370624285862678, "grad_norm": 0.3737293481826782, "learning_rate": 9.353190525002484e-05, "loss": 1.6279, "step": 1576 }, { "epoch": 0.16381011737820714, "grad_norm": 0.38832589983940125, "learning_rate": 9.35238764180699e-05, "loss": 1.8541, "step": 1577 }, { "epoch": 0.16391399189778746, "grad_norm": 0.37959024310112, "learning_rate": 9.351584295116165e-05, "loss": 1.5282, "step": 1578 }, { "epoch": 0.16401786641736782, "grad_norm": 0.37193652987480164, "learning_rate": 9.350780485015553e-05, "loss": 1.6896, "step": 1579 }, { "epoch": 0.16412174093694817, "grad_norm": 0.35193783044815063, "learning_rate": 9.349976211590758e-05, "loss": 1.57, "step": 1580 }, { "epoch": 0.1642256154565285, "grad_norm": 0.3922156095504761, "learning_rate": 9.349171474927427e-05, "loss": 1.5819, "step": 1581 }, { "epoch": 0.16432948997610886, "grad_norm": 0.36705270409584045, "learning_rate": 9.34836627511126e-05, "loss": 1.6357, "step": 1582 }, { "epoch": 0.1644333644956892, "grad_norm": 0.36877331137657166, "learning_rate": 9.347560612228003e-05, "loss": 1.7312, "step": 1583 }, { "epoch": 0.16453723901526957, "grad_norm": 0.3872382342815399, "learning_rate": 9.346754486363452e-05, "loss": 1.8537, "step": 1584 }, { "epoch": 0.1646411135348499, "grad_norm": 0.3447991609573364, "learning_rate": 9.345947897603455e-05, "loss": 1.6428, "step": 1585 }, { "epoch": 0.16474498805443025, "grad_norm": 0.3650977909564972, "learning_rate": 9.345140846033906e-05, "loss": 1.8029, "step": 1586 }, { "epoch": 0.1648488625740106, "grad_norm": 0.35949772596359253, "learning_rate": 9.344333331740751e-05, "loss": 1.6589, "step": 1587 }, { "epoch": 0.16495273709359093, "grad_norm": 0.38417744636535645, "learning_rate": 9.343525354809984e-05, "loss": 1.7891, "step": 1588 }, { "epoch": 0.16505661161317128, "grad_norm": 0.36684268712997437, "learning_rate": 9.342716915327647e-05, "loss": 1.7708, "step": 1589 }, { "epoch": 0.16516048613275164, "grad_norm": 0.3678865134716034, "learning_rate": 9.341908013379831e-05, "loss": 1.7155, "step": 1590 }, { "epoch": 0.165264360652332, "grad_norm": 0.38696667551994324, "learning_rate": 9.341098649052681e-05, "loss": 1.8784, "step": 1591 }, { "epoch": 0.16536823517191232, "grad_norm": 0.3689199388027191, "learning_rate": 9.340288822432388e-05, "loss": 1.874, "step": 1592 }, { "epoch": 0.16547210969149267, "grad_norm": 0.34134095907211304, "learning_rate": 9.339478533605191e-05, "loss": 1.5365, "step": 1593 }, { "epoch": 0.16557598421107303, "grad_norm": 0.3707195818424225, "learning_rate": 9.338667782657378e-05, "loss": 1.7984, "step": 1594 }, { "epoch": 0.16567985873065338, "grad_norm": 0.3389596939086914, "learning_rate": 9.337856569675288e-05, "loss": 1.6244, "step": 1595 }, { "epoch": 0.1657837332502337, "grad_norm": 0.3761754035949707, "learning_rate": 9.337044894745313e-05, "loss": 1.8774, "step": 1596 }, { "epoch": 0.16588760776981407, "grad_norm": 0.37581124901771545, "learning_rate": 9.336232757953884e-05, "loss": 1.7503, "step": 1597 }, { "epoch": 0.16599148228939442, "grad_norm": 0.37551531195640564, "learning_rate": 9.335420159387491e-05, "loss": 1.8433, "step": 1598 }, { "epoch": 0.16609535680897475, "grad_norm": 0.3547391891479492, "learning_rate": 9.33460709913267e-05, "loss": 1.8019, "step": 1599 }, { "epoch": 0.1661992313285551, "grad_norm": 0.40177521109580994, "learning_rate": 9.333793577276004e-05, "loss": 1.9248, "step": 1600 }, { "epoch": 0.16630310584813546, "grad_norm": 0.345114141702652, "learning_rate": 9.332979593904125e-05, "loss": 1.674, "step": 1601 }, { "epoch": 0.1664069803677158, "grad_norm": 0.36117351055145264, "learning_rate": 9.33216514910372e-05, "loss": 1.9506, "step": 1602 }, { "epoch": 0.16651085488729614, "grad_norm": 0.3475273847579956, "learning_rate": 9.331350242961518e-05, "loss": 1.716, "step": 1603 }, { "epoch": 0.1666147294068765, "grad_norm": 0.3611377477645874, "learning_rate": 9.3305348755643e-05, "loss": 1.7274, "step": 1604 }, { "epoch": 0.16671860392645685, "grad_norm": 0.40187951922416687, "learning_rate": 9.329719046998898e-05, "loss": 1.7231, "step": 1605 }, { "epoch": 0.16682247844603718, "grad_norm": 0.3646019399166107, "learning_rate": 9.328902757352191e-05, "loss": 1.6772, "step": 1606 }, { "epoch": 0.16692635296561753, "grad_norm": 0.34303680062294006, "learning_rate": 9.328086006711108e-05, "loss": 1.6853, "step": 1607 }, { "epoch": 0.16703022748519789, "grad_norm": 0.36761873960494995, "learning_rate": 9.327268795162622e-05, "loss": 1.7584, "step": 1608 }, { "epoch": 0.16713410200477824, "grad_norm": 0.36471322178840637, "learning_rate": 9.326451122793766e-05, "loss": 1.7157, "step": 1609 }, { "epoch": 0.16723797652435857, "grad_norm": 0.385139524936676, "learning_rate": 9.325632989691613e-05, "loss": 1.7911, "step": 1610 }, { "epoch": 0.16734185104393892, "grad_norm": 0.3613028824329376, "learning_rate": 9.324814395943289e-05, "loss": 1.7849, "step": 1611 }, { "epoch": 0.16744572556351928, "grad_norm": 0.3828355669975281, "learning_rate": 9.323995341635963e-05, "loss": 1.9662, "step": 1612 }, { "epoch": 0.1675496000830996, "grad_norm": 0.3685465157032013, "learning_rate": 9.323175826856865e-05, "loss": 1.8091, "step": 1613 }, { "epoch": 0.16765347460267996, "grad_norm": 0.36837613582611084, "learning_rate": 9.322355851693263e-05, "loss": 1.716, "step": 1614 }, { "epoch": 0.1677573491222603, "grad_norm": 0.4158804416656494, "learning_rate": 9.321535416232476e-05, "loss": 1.8553, "step": 1615 }, { "epoch": 0.16786122364184067, "grad_norm": 0.35498541593551636, "learning_rate": 9.32071452056188e-05, "loss": 1.6873, "step": 1616 }, { "epoch": 0.167965098161421, "grad_norm": 0.37819525599479675, "learning_rate": 9.319893164768888e-05, "loss": 1.7064, "step": 1617 }, { "epoch": 0.16806897268100135, "grad_norm": 0.3379732668399811, "learning_rate": 9.31907134894097e-05, "loss": 1.6289, "step": 1618 }, { "epoch": 0.1681728472005817, "grad_norm": 0.38348469138145447, "learning_rate": 9.318249073165646e-05, "loss": 1.7028, "step": 1619 }, { "epoch": 0.16827672172016203, "grad_norm": 0.3996196985244751, "learning_rate": 9.317426337530477e-05, "loss": 1.7427, "step": 1620 }, { "epoch": 0.16838059623974239, "grad_norm": 0.3799591064453125, "learning_rate": 9.316603142123082e-05, "loss": 1.6961, "step": 1621 }, { "epoch": 0.16848447075932274, "grad_norm": 0.36466121673583984, "learning_rate": 9.315779487031122e-05, "loss": 1.8285, "step": 1622 }, { "epoch": 0.1685883452789031, "grad_norm": 0.35838162899017334, "learning_rate": 9.314955372342311e-05, "loss": 1.643, "step": 1623 }, { "epoch": 0.16869221979848342, "grad_norm": 0.37461066246032715, "learning_rate": 9.314130798144412e-05, "loss": 1.8822, "step": 1624 }, { "epoch": 0.16879609431806378, "grad_norm": 0.4094388484954834, "learning_rate": 9.313305764525232e-05, "loss": 1.9001, "step": 1625 }, { "epoch": 0.16889996883764413, "grad_norm": 0.382744163274765, "learning_rate": 9.312480271572634e-05, "loss": 1.8858, "step": 1626 }, { "epoch": 0.16900384335722446, "grad_norm": 0.3661576807498932, "learning_rate": 9.311654319374528e-05, "loss": 1.8909, "step": 1627 }, { "epoch": 0.16910771787680481, "grad_norm": 0.38933804631233215, "learning_rate": 9.310827908018866e-05, "loss": 1.8039, "step": 1628 }, { "epoch": 0.16921159239638517, "grad_norm": 0.3421494662761688, "learning_rate": 9.31000103759366e-05, "loss": 1.6478, "step": 1629 }, { "epoch": 0.16931546691596552, "grad_norm": 0.3714037239551544, "learning_rate": 9.309173708186963e-05, "loss": 1.7259, "step": 1630 }, { "epoch": 0.16941934143554585, "grad_norm": 0.4035167694091797, "learning_rate": 9.308345919886877e-05, "loss": 1.8386, "step": 1631 }, { "epoch": 0.1695232159551262, "grad_norm": 0.37035608291625977, "learning_rate": 9.307517672781557e-05, "loss": 1.8391, "step": 1632 }, { "epoch": 0.16962709047470656, "grad_norm": 0.37822678685188293, "learning_rate": 9.306688966959205e-05, "loss": 1.8277, "step": 1633 }, { "epoch": 0.16973096499428691, "grad_norm": 0.3702380955219269, "learning_rate": 9.305859802508073e-05, "loss": 1.7996, "step": 1634 }, { "epoch": 0.16983483951386724, "grad_norm": 0.37005430459976196, "learning_rate": 9.305030179516458e-05, "loss": 1.7441, "step": 1635 }, { "epoch": 0.1699387140334476, "grad_norm": 0.38137298822402954, "learning_rate": 9.304200098072709e-05, "loss": 1.9145, "step": 1636 }, { "epoch": 0.17004258855302795, "grad_norm": 0.39384132623672485, "learning_rate": 9.303369558265224e-05, "loss": 1.7381, "step": 1637 }, { "epoch": 0.17014646307260828, "grad_norm": 0.34569051861763, "learning_rate": 9.30253856018245e-05, "loss": 1.6391, "step": 1638 }, { "epoch": 0.17025033759218863, "grad_norm": 0.38778260350227356, "learning_rate": 9.301707103912878e-05, "loss": 1.8337, "step": 1639 }, { "epoch": 0.170354212111769, "grad_norm": 0.390543133020401, "learning_rate": 9.300875189545055e-05, "loss": 1.9798, "step": 1640 }, { "epoch": 0.17045808663134934, "grad_norm": 0.3621176779270172, "learning_rate": 9.300042817167574e-05, "loss": 1.6744, "step": 1641 }, { "epoch": 0.17056196115092967, "grad_norm": 0.43153461813926697, "learning_rate": 9.299209986869073e-05, "loss": 2.0351, "step": 1642 }, { "epoch": 0.17066583567051002, "grad_norm": 0.39063888788223267, "learning_rate": 9.298376698738242e-05, "loss": 1.8298, "step": 1643 }, { "epoch": 0.17076971019009038, "grad_norm": 0.36155179142951965, "learning_rate": 9.297542952863824e-05, "loss": 1.7124, "step": 1644 }, { "epoch": 0.1708735847096707, "grad_norm": 0.4032329022884369, "learning_rate": 9.296708749334603e-05, "loss": 2.0271, "step": 1645 }, { "epoch": 0.17097745922925106, "grad_norm": 0.39317673444747925, "learning_rate": 9.295874088239416e-05, "loss": 1.8808, "step": 1646 }, { "epoch": 0.17108133374883142, "grad_norm": 0.36430609226226807, "learning_rate": 9.295038969667145e-05, "loss": 1.8879, "step": 1647 }, { "epoch": 0.17118520826841177, "grad_norm": 0.3385809361934662, "learning_rate": 9.29420339370673e-05, "loss": 1.644, "step": 1648 }, { "epoch": 0.1712890827879921, "grad_norm": 0.3685392737388611, "learning_rate": 9.293367360447148e-05, "loss": 1.907, "step": 1649 }, { "epoch": 0.17139295730757245, "grad_norm": 0.36233484745025635, "learning_rate": 9.292530869977432e-05, "loss": 1.6838, "step": 1650 }, { "epoch": 0.1714968318271528, "grad_norm": 0.4106137752532959, "learning_rate": 9.29169392238666e-05, "loss": 1.8516, "step": 1651 }, { "epoch": 0.17160070634673313, "grad_norm": 0.4084801971912384, "learning_rate": 9.290856517763965e-05, "loss": 1.8696, "step": 1652 }, { "epoch": 0.1717045808663135, "grad_norm": 0.3670850694179535, "learning_rate": 9.290018656198517e-05, "loss": 1.7186, "step": 1653 }, { "epoch": 0.17180845538589384, "grad_norm": 0.362164705991745, "learning_rate": 9.289180337779547e-05, "loss": 1.6732, "step": 1654 }, { "epoch": 0.1719123299054742, "grad_norm": 0.3619086742401123, "learning_rate": 9.288341562596328e-05, "loss": 1.6082, "step": 1655 }, { "epoch": 0.17201620442505453, "grad_norm": 0.3776550889015198, "learning_rate": 9.287502330738183e-05, "loss": 1.7092, "step": 1656 }, { "epoch": 0.17212007894463488, "grad_norm": 0.3793870210647583, "learning_rate": 9.286662642294484e-05, "loss": 1.6828, "step": 1657 }, { "epoch": 0.17222395346421523, "grad_norm": 0.3709464967250824, "learning_rate": 9.28582249735465e-05, "loss": 1.7037, "step": 1658 }, { "epoch": 0.17232782798379556, "grad_norm": 0.35695111751556396, "learning_rate": 9.284981896008152e-05, "loss": 1.7627, "step": 1659 }, { "epoch": 0.17243170250337592, "grad_norm": 0.3538592457771301, "learning_rate": 9.284140838344505e-05, "loss": 1.8424, "step": 1660 }, { "epoch": 0.17253557702295627, "grad_norm": 0.3740057647228241, "learning_rate": 9.283299324453276e-05, "loss": 1.9412, "step": 1661 }, { "epoch": 0.17263945154253663, "grad_norm": 0.3617721199989319, "learning_rate": 9.28245735442408e-05, "loss": 1.6443, "step": 1662 }, { "epoch": 0.17274332606211695, "grad_norm": 0.40431293845176697, "learning_rate": 9.281614928346582e-05, "loss": 1.9303, "step": 1663 }, { "epoch": 0.1728472005816973, "grad_norm": 0.3580422103404999, "learning_rate": 9.280772046310491e-05, "loss": 1.5823, "step": 1664 }, { "epoch": 0.17295107510127766, "grad_norm": 0.38240423798561096, "learning_rate": 9.279928708405568e-05, "loss": 1.6381, "step": 1665 }, { "epoch": 0.17305494962085802, "grad_norm": 0.3335290253162384, "learning_rate": 9.279084914721623e-05, "loss": 1.686, "step": 1666 }, { "epoch": 0.17315882414043834, "grad_norm": 0.3627927899360657, "learning_rate": 9.278240665348512e-05, "loss": 1.8426, "step": 1667 }, { "epoch": 0.1732626986600187, "grad_norm": 0.4823165833950043, "learning_rate": 9.277395960376143e-05, "loss": 2.0619, "step": 1668 }, { "epoch": 0.17336657317959905, "grad_norm": 0.36185434460639954, "learning_rate": 9.276550799894468e-05, "loss": 1.7277, "step": 1669 }, { "epoch": 0.17347044769917938, "grad_norm": 0.36980822682380676, "learning_rate": 9.275705183993491e-05, "loss": 1.7649, "step": 1670 }, { "epoch": 0.17357432221875974, "grad_norm": 0.3434619605541229, "learning_rate": 9.274859112763264e-05, "loss": 1.702, "step": 1671 }, { "epoch": 0.1736781967383401, "grad_norm": 0.3595278859138489, "learning_rate": 9.274012586293887e-05, "loss": 1.7485, "step": 1672 }, { "epoch": 0.17378207125792045, "grad_norm": 0.4083694815635681, "learning_rate": 9.273165604675509e-05, "loss": 1.7587, "step": 1673 }, { "epoch": 0.17388594577750077, "grad_norm": 0.3653002679347992, "learning_rate": 9.272318167998325e-05, "loss": 1.7234, "step": 1674 }, { "epoch": 0.17398982029708113, "grad_norm": 0.40430814027786255, "learning_rate": 9.27147027635258e-05, "loss": 1.8682, "step": 1675 }, { "epoch": 0.17409369481666148, "grad_norm": 0.4132486879825592, "learning_rate": 9.270621929828571e-05, "loss": 1.9371, "step": 1676 }, { "epoch": 0.1741975693362418, "grad_norm": 0.3685455322265625, "learning_rate": 9.269773128516638e-05, "loss": 1.8035, "step": 1677 }, { "epoch": 0.17430144385582216, "grad_norm": 0.35275524854660034, "learning_rate": 9.268923872507174e-05, "loss": 1.7536, "step": 1678 }, { "epoch": 0.17440531837540252, "grad_norm": 0.364773154258728, "learning_rate": 9.268074161890614e-05, "loss": 1.8075, "step": 1679 }, { "epoch": 0.17450919289498287, "grad_norm": 0.40323731303215027, "learning_rate": 9.267223996757451e-05, "loss": 1.8028, "step": 1680 }, { "epoch": 0.1746130674145632, "grad_norm": 0.34777867794036865, "learning_rate": 9.266373377198216e-05, "loss": 1.7143, "step": 1681 }, { "epoch": 0.17471694193414355, "grad_norm": 0.39449429512023926, "learning_rate": 9.265522303303494e-05, "loss": 1.8556, "step": 1682 }, { "epoch": 0.1748208164537239, "grad_norm": 0.3480819761753082, "learning_rate": 9.264670775163922e-05, "loss": 1.6931, "step": 1683 }, { "epoch": 0.17492469097330424, "grad_norm": 0.3808116316795349, "learning_rate": 9.263818792870176e-05, "loss": 1.6403, "step": 1684 }, { "epoch": 0.1750285654928846, "grad_norm": 0.3749628961086273, "learning_rate": 9.262966356512987e-05, "loss": 1.6206, "step": 1685 }, { "epoch": 0.17513244001246495, "grad_norm": 0.3475877642631531, "learning_rate": 9.262113466183136e-05, "loss": 1.6314, "step": 1686 }, { "epoch": 0.1752363145320453, "grad_norm": 0.3954698443412781, "learning_rate": 9.261260121971445e-05, "loss": 1.7696, "step": 1687 }, { "epoch": 0.17534018905162563, "grad_norm": 0.36054256558418274, "learning_rate": 9.260406323968791e-05, "loss": 1.734, "step": 1688 }, { "epoch": 0.17544406357120598, "grad_norm": 0.3659268617630005, "learning_rate": 9.259552072266096e-05, "loss": 1.6301, "step": 1689 }, { "epoch": 0.17554793809078634, "grad_norm": 0.3359396457672119, "learning_rate": 9.258697366954329e-05, "loss": 1.6708, "step": 1690 }, { "epoch": 0.17565181261036666, "grad_norm": 0.3496209979057312, "learning_rate": 9.257842208124514e-05, "loss": 1.7327, "step": 1691 }, { "epoch": 0.17575568712994702, "grad_norm": 0.4164491295814514, "learning_rate": 9.256986595867715e-05, "loss": 1.7489, "step": 1692 }, { "epoch": 0.17585956164952737, "grad_norm": 0.4587213695049286, "learning_rate": 9.256130530275048e-05, "loss": 2.1299, "step": 1693 }, { "epoch": 0.17596343616910773, "grad_norm": 0.40228530764579773, "learning_rate": 9.255274011437683e-05, "loss": 1.8252, "step": 1694 }, { "epoch": 0.17606731068868806, "grad_norm": 0.3676128685474396, "learning_rate": 9.254417039446824e-05, "loss": 1.6666, "step": 1695 }, { "epoch": 0.1761711852082684, "grad_norm": 0.3423883318901062, "learning_rate": 9.253559614393739e-05, "loss": 1.7213, "step": 1696 }, { "epoch": 0.17627505972784877, "grad_norm": 0.3809538781642914, "learning_rate": 9.252701736369732e-05, "loss": 1.8218, "step": 1697 }, { "epoch": 0.1763789342474291, "grad_norm": 0.351162314414978, "learning_rate": 9.251843405466162e-05, "loss": 1.7478, "step": 1698 }, { "epoch": 0.17648280876700945, "grad_norm": 0.3520200550556183, "learning_rate": 9.250984621774437e-05, "loss": 1.7541, "step": 1699 }, { "epoch": 0.1765866832865898, "grad_norm": 0.3424931764602661, "learning_rate": 9.250125385386007e-05, "loss": 1.6592, "step": 1700 }, { "epoch": 0.17669055780617016, "grad_norm": 0.3925075829029083, "learning_rate": 9.249265696392377e-05, "loss": 1.784, "step": 1701 }, { "epoch": 0.17679443232575048, "grad_norm": 0.3799852132797241, "learning_rate": 9.248405554885095e-05, "loss": 1.7904, "step": 1702 }, { "epoch": 0.17689830684533084, "grad_norm": 0.37215399742126465, "learning_rate": 9.24754496095576e-05, "loss": 1.8846, "step": 1703 }, { "epoch": 0.1770021813649112, "grad_norm": 0.37840306758880615, "learning_rate": 9.246683914696019e-05, "loss": 1.7425, "step": 1704 }, { "epoch": 0.17710605588449155, "grad_norm": 0.362456351518631, "learning_rate": 9.245822416197567e-05, "loss": 1.8047, "step": 1705 }, { "epoch": 0.17720993040407187, "grad_norm": 0.3728417456150055, "learning_rate": 9.244960465552148e-05, "loss": 1.8033, "step": 1706 }, { "epoch": 0.17731380492365223, "grad_norm": 0.3797183334827423, "learning_rate": 9.244098062851549e-05, "loss": 1.9075, "step": 1707 }, { "epoch": 0.17741767944323258, "grad_norm": 0.38207125663757324, "learning_rate": 9.243235208187612e-05, "loss": 1.8054, "step": 1708 }, { "epoch": 0.1775215539628129, "grad_norm": 0.3418576419353485, "learning_rate": 9.242371901652223e-05, "loss": 1.8205, "step": 1709 }, { "epoch": 0.17762542848239327, "grad_norm": 0.4341420829296112, "learning_rate": 9.241508143337319e-05, "loss": 2.1321, "step": 1710 }, { "epoch": 0.17772930300197362, "grad_norm": 0.38019636273384094, "learning_rate": 9.240643933334884e-05, "loss": 1.6551, "step": 1711 }, { "epoch": 0.17783317752155398, "grad_norm": 0.3897992968559265, "learning_rate": 9.239779271736947e-05, "loss": 1.8644, "step": 1712 }, { "epoch": 0.1779370520411343, "grad_norm": 0.3626013398170471, "learning_rate": 9.238914158635589e-05, "loss": 1.6417, "step": 1713 }, { "epoch": 0.17804092656071466, "grad_norm": 0.38448286056518555, "learning_rate": 9.238048594122939e-05, "loss": 1.7762, "step": 1714 }, { "epoch": 0.178144801080295, "grad_norm": 0.3795235753059387, "learning_rate": 9.237182578291171e-05, "loss": 1.7897, "step": 1715 }, { "epoch": 0.17824867559987534, "grad_norm": 0.3505716323852539, "learning_rate": 9.23631611123251e-05, "loss": 1.8132, "step": 1716 }, { "epoch": 0.1783525501194557, "grad_norm": 0.36175671219825745, "learning_rate": 9.235449193039227e-05, "loss": 1.6795, "step": 1717 }, { "epoch": 0.17845642463903605, "grad_norm": 0.3952975571155548, "learning_rate": 9.234581823803645e-05, "loss": 1.7109, "step": 1718 }, { "epoch": 0.1785602991586164, "grad_norm": 0.3580237925052643, "learning_rate": 9.233714003618127e-05, "loss": 1.8293, "step": 1719 }, { "epoch": 0.17866417367819673, "grad_norm": 0.33214688301086426, "learning_rate": 9.232845732575093e-05, "loss": 1.6718, "step": 1720 }, { "epoch": 0.17876804819777709, "grad_norm": 0.359581857919693, "learning_rate": 9.231977010767005e-05, "loss": 1.8014, "step": 1721 }, { "epoch": 0.17887192271735744, "grad_norm": 0.3323659300804138, "learning_rate": 9.231107838286378e-05, "loss": 1.6053, "step": 1722 }, { "epoch": 0.17897579723693777, "grad_norm": 0.3609966039657593, "learning_rate": 9.230238215225769e-05, "loss": 1.9612, "step": 1723 }, { "epoch": 0.17907967175651812, "grad_norm": 0.33431074023246765, "learning_rate": 9.229368141677788e-05, "loss": 1.6002, "step": 1724 }, { "epoch": 0.17918354627609848, "grad_norm": 0.37472623586654663, "learning_rate": 9.228497617735088e-05, "loss": 1.7805, "step": 1725 }, { "epoch": 0.17928742079567883, "grad_norm": 0.3726039528846741, "learning_rate": 9.227626643490377e-05, "loss": 1.8624, "step": 1726 }, { "epoch": 0.17939129531525916, "grad_norm": 0.4065108895301819, "learning_rate": 9.226755219036405e-05, "loss": 1.7586, "step": 1727 }, { "epoch": 0.1794951698348395, "grad_norm": 0.35552752017974854, "learning_rate": 9.225883344465972e-05, "loss": 1.6, "step": 1728 }, { "epoch": 0.17959904435441987, "grad_norm": 0.3508833944797516, "learning_rate": 9.225011019871927e-05, "loss": 1.7917, "step": 1729 }, { "epoch": 0.1797029188740002, "grad_norm": 0.374521404504776, "learning_rate": 9.224138245347163e-05, "loss": 1.7524, "step": 1730 }, { "epoch": 0.17980679339358055, "grad_norm": 0.36706939339637756, "learning_rate": 9.223265020984626e-05, "loss": 1.7125, "step": 1731 }, { "epoch": 0.1799106679131609, "grad_norm": 0.3556000590324402, "learning_rate": 9.222391346877308e-05, "loss": 1.6935, "step": 1732 }, { "epoch": 0.18001454243274126, "grad_norm": 0.3797661066055298, "learning_rate": 9.221517223118245e-05, "loss": 1.7725, "step": 1733 }, { "epoch": 0.18011841695232159, "grad_norm": 0.3610544502735138, "learning_rate": 9.22064264980053e-05, "loss": 1.8192, "step": 1734 }, { "epoch": 0.18022229147190194, "grad_norm": 0.46052882075309753, "learning_rate": 9.219767627017293e-05, "loss": 2.0761, "step": 1735 }, { "epoch": 0.1803261659914823, "grad_norm": 0.4026206135749817, "learning_rate": 9.218892154861721e-05, "loss": 1.978, "step": 1736 }, { "epoch": 0.18043004051106262, "grad_norm": 0.37845221161842346, "learning_rate": 9.218016233427042e-05, "loss": 1.7811, "step": 1737 }, { "epoch": 0.18053391503064298, "grad_norm": 0.4019536077976227, "learning_rate": 9.217139862806534e-05, "loss": 1.9864, "step": 1738 }, { "epoch": 0.18063778955022333, "grad_norm": 0.36031562089920044, "learning_rate": 9.21626304309353e-05, "loss": 1.6712, "step": 1739 }, { "epoch": 0.1807416640698037, "grad_norm": 0.37847834825515747, "learning_rate": 9.215385774381395e-05, "loss": 1.7967, "step": 1740 }, { "epoch": 0.18084553858938401, "grad_norm": 0.38624006509780884, "learning_rate": 9.21450805676356e-05, "loss": 1.8833, "step": 1741 }, { "epoch": 0.18094941310896437, "grad_norm": 0.34793728590011597, "learning_rate": 9.213629890333489e-05, "loss": 1.8027, "step": 1742 }, { "epoch": 0.18105328762854472, "grad_norm": 0.3407808542251587, "learning_rate": 9.212751275184704e-05, "loss": 1.658, "step": 1743 }, { "epoch": 0.18115716214812508, "grad_norm": 0.3690899908542633, "learning_rate": 9.211872211410769e-05, "loss": 1.6251, "step": 1744 }, { "epoch": 0.1812610366677054, "grad_norm": 0.3415517210960388, "learning_rate": 9.210992699105296e-05, "loss": 1.5998, "step": 1745 }, { "epoch": 0.18136491118728576, "grad_norm": 0.36001309752464294, "learning_rate": 9.210112738361949e-05, "loss": 1.8308, "step": 1746 }, { "epoch": 0.18146878570686611, "grad_norm": 0.3720461130142212, "learning_rate": 9.209232329274433e-05, "loss": 1.7902, "step": 1747 }, { "epoch": 0.18157266022644644, "grad_norm": 0.42275193333625793, "learning_rate": 9.208351471936509e-05, "loss": 1.9225, "step": 1748 }, { "epoch": 0.1816765347460268, "grad_norm": 0.36535343527793884, "learning_rate": 9.20747016644198e-05, "loss": 1.7431, "step": 1749 }, { "epoch": 0.18178040926560715, "grad_norm": 0.3501549959182739, "learning_rate": 9.206588412884698e-05, "loss": 1.6417, "step": 1750 }, { "epoch": 0.1818842837851875, "grad_norm": 0.34967172145843506, "learning_rate": 9.205706211358563e-05, "loss": 1.7612, "step": 1751 }, { "epoch": 0.18198815830476783, "grad_norm": 0.3451713025569916, "learning_rate": 9.204823561957521e-05, "loss": 1.6711, "step": 1752 }, { "epoch": 0.1820920328243482, "grad_norm": 0.37724578380584717, "learning_rate": 9.203940464775568e-05, "loss": 1.9513, "step": 1753 }, { "epoch": 0.18219590734392854, "grad_norm": 0.34096843004226685, "learning_rate": 9.203056919906748e-05, "loss": 1.6691, "step": 1754 }, { "epoch": 0.18229978186350887, "grad_norm": 0.3424280881881714, "learning_rate": 9.202172927445152e-05, "loss": 1.6556, "step": 1755 }, { "epoch": 0.18240365638308922, "grad_norm": 0.3700230121612549, "learning_rate": 9.201288487484916e-05, "loss": 1.7761, "step": 1756 }, { "epoch": 0.18250753090266958, "grad_norm": 0.4259064197540283, "learning_rate": 9.200403600120229e-05, "loss": 2.1197, "step": 1757 }, { "epoch": 0.18261140542224993, "grad_norm": 0.40020573139190674, "learning_rate": 9.199518265445321e-05, "loss": 1.9191, "step": 1758 }, { "epoch": 0.18271527994183026, "grad_norm": 0.36124834418296814, "learning_rate": 9.198632483554478e-05, "loss": 1.8001, "step": 1759 }, { "epoch": 0.18281915446141062, "grad_norm": 0.4086042642593384, "learning_rate": 9.197746254542025e-05, "loss": 1.8518, "step": 1760 }, { "epoch": 0.18292302898099097, "grad_norm": 0.37972450256347656, "learning_rate": 9.196859578502338e-05, "loss": 1.952, "step": 1761 }, { "epoch": 0.1830269035005713, "grad_norm": 0.4148435890674591, "learning_rate": 9.195972455529844e-05, "loss": 1.8547, "step": 1762 }, { "epoch": 0.18313077802015165, "grad_norm": 0.3922263979911804, "learning_rate": 9.195084885719014e-05, "loss": 1.9248, "step": 1763 }, { "epoch": 0.183234652539732, "grad_norm": 0.3774312138557434, "learning_rate": 9.194196869164366e-05, "loss": 1.8411, "step": 1764 }, { "epoch": 0.18333852705931236, "grad_norm": 0.35800519585609436, "learning_rate": 9.193308405960466e-05, "loss": 1.6903, "step": 1765 }, { "epoch": 0.1834424015788927, "grad_norm": 0.40143853425979614, "learning_rate": 9.192419496201932e-05, "loss": 1.7171, "step": 1766 }, { "epoch": 0.18354627609847304, "grad_norm": 0.3324778974056244, "learning_rate": 9.191530139983423e-05, "loss": 1.6295, "step": 1767 }, { "epoch": 0.1836501506180534, "grad_norm": 0.3377951979637146, "learning_rate": 9.190640337399647e-05, "loss": 1.5843, "step": 1768 }, { "epoch": 0.18375402513763373, "grad_norm": 0.39436647295951843, "learning_rate": 9.189750088545367e-05, "loss": 1.957, "step": 1769 }, { "epoch": 0.18385789965721408, "grad_norm": 0.42431554198265076, "learning_rate": 9.188859393515382e-05, "loss": 1.9529, "step": 1770 }, { "epoch": 0.18396177417679443, "grad_norm": 0.3626527786254883, "learning_rate": 9.187968252404547e-05, "loss": 1.7235, "step": 1771 }, { "epoch": 0.1840656486963748, "grad_norm": 0.42632752656936646, "learning_rate": 9.187076665307758e-05, "loss": 2.1363, "step": 1772 }, { "epoch": 0.18416952321595512, "grad_norm": 0.41993650794029236, "learning_rate": 9.186184632319968e-05, "loss": 1.7975, "step": 1773 }, { "epoch": 0.18427339773553547, "grad_norm": 0.37287190556526184, "learning_rate": 9.185292153536165e-05, "loss": 1.8601, "step": 1774 }, { "epoch": 0.18437727225511583, "grad_norm": 0.38827162981033325, "learning_rate": 9.184399229051397e-05, "loss": 1.8401, "step": 1775 }, { "epoch": 0.18448114677469618, "grad_norm": 0.3717474937438965, "learning_rate": 9.183505858960748e-05, "loss": 1.7523, "step": 1776 }, { "epoch": 0.1845850212942765, "grad_norm": 0.3732149600982666, "learning_rate": 9.18261204335936e-05, "loss": 1.7327, "step": 1777 }, { "epoch": 0.18468889581385686, "grad_norm": 0.3960385024547577, "learning_rate": 9.181717782342414e-05, "loss": 1.9453, "step": 1778 }, { "epoch": 0.18479277033343722, "grad_norm": 0.38847383856773376, "learning_rate": 9.180823076005143e-05, "loss": 1.5841, "step": 1779 }, { "epoch": 0.18489664485301754, "grad_norm": 0.37122729420661926, "learning_rate": 9.179927924442826e-05, "loss": 1.8535, "step": 1780 }, { "epoch": 0.1850005193725979, "grad_norm": 0.4589596688747406, "learning_rate": 9.17903232775079e-05, "loss": 2.1207, "step": 1781 }, { "epoch": 0.18510439389217825, "grad_norm": 0.4033050537109375, "learning_rate": 9.178136286024408e-05, "loss": 1.9405, "step": 1782 }, { "epoch": 0.1852082684117586, "grad_norm": 0.37945422530174255, "learning_rate": 9.177239799359102e-05, "loss": 1.8607, "step": 1783 }, { "epoch": 0.18531214293133894, "grad_norm": 0.44369664788246155, "learning_rate": 9.176342867850343e-05, "loss": 1.9337, "step": 1784 }, { "epoch": 0.1854160174509193, "grad_norm": 0.3960365951061249, "learning_rate": 9.175445491593644e-05, "loss": 1.8079, "step": 1785 }, { "epoch": 0.18551989197049965, "grad_norm": 0.34727954864501953, "learning_rate": 9.174547670684569e-05, "loss": 1.5856, "step": 1786 }, { "epoch": 0.18562376649007997, "grad_norm": 0.3571012616157532, "learning_rate": 9.173649405218732e-05, "loss": 1.6828, "step": 1787 }, { "epoch": 0.18572764100966033, "grad_norm": 0.35931140184402466, "learning_rate": 9.172750695291787e-05, "loss": 1.5845, "step": 1788 }, { "epoch": 0.18583151552924068, "grad_norm": 0.36305472254753113, "learning_rate": 9.171851540999442e-05, "loss": 1.7407, "step": 1789 }, { "epoch": 0.18593539004882104, "grad_norm": 0.377492219209671, "learning_rate": 9.17095194243745e-05, "loss": 1.8655, "step": 1790 }, { "epoch": 0.18603926456840136, "grad_norm": 0.3644843101501465, "learning_rate": 9.17005189970161e-05, "loss": 1.7179, "step": 1791 }, { "epoch": 0.18614313908798172, "grad_norm": 0.3360661268234253, "learning_rate": 9.169151412887774e-05, "loss": 1.6426, "step": 1792 }, { "epoch": 0.18624701360756207, "grad_norm": 0.38643765449523926, "learning_rate": 9.16825048209183e-05, "loss": 1.7468, "step": 1793 }, { "epoch": 0.1863508881271424, "grad_norm": 0.34640881419181824, "learning_rate": 9.167349107409721e-05, "loss": 1.5946, "step": 1794 }, { "epoch": 0.18645476264672275, "grad_norm": 0.33807694911956787, "learning_rate": 9.166447288937444e-05, "loss": 1.6169, "step": 1795 }, { "epoch": 0.1865586371663031, "grad_norm": 0.4004231095314026, "learning_rate": 9.165545026771026e-05, "loss": 1.7109, "step": 1796 }, { "epoch": 0.18666251168588346, "grad_norm": 0.3662926256656647, "learning_rate": 9.164642321006555e-05, "loss": 1.7274, "step": 1797 }, { "epoch": 0.1867663862054638, "grad_norm": 0.37025517225265503, "learning_rate": 9.163739171740167e-05, "loss": 1.8982, "step": 1798 }, { "epoch": 0.18687026072504415, "grad_norm": 0.4032931327819824, "learning_rate": 9.162835579068032e-05, "loss": 1.8586, "step": 1799 }, { "epoch": 0.1869741352446245, "grad_norm": 0.36516958475112915, "learning_rate": 9.161931543086379e-05, "loss": 1.8012, "step": 1800 }, { "epoch": 0.18707800976420483, "grad_norm": 0.35469850897789, "learning_rate": 9.161027063891481e-05, "loss": 1.6804, "step": 1801 }, { "epoch": 0.18718188428378518, "grad_norm": 0.4029258191585541, "learning_rate": 9.160122141579659e-05, "loss": 2.0161, "step": 1802 }, { "epoch": 0.18728575880336554, "grad_norm": 0.3713259696960449, "learning_rate": 9.159216776247279e-05, "loss": 1.7743, "step": 1803 }, { "epoch": 0.1873896333229459, "grad_norm": 0.3715076744556427, "learning_rate": 9.158310967990755e-05, "loss": 1.8117, "step": 1804 }, { "epoch": 0.18749350784252622, "grad_norm": 0.43088141083717346, "learning_rate": 9.15740471690655e-05, "loss": 1.8559, "step": 1805 }, { "epoch": 0.18759738236210657, "grad_norm": 0.38683950901031494, "learning_rate": 9.156498023091172e-05, "loss": 1.8888, "step": 1806 }, { "epoch": 0.18770125688168693, "grad_norm": 0.3979988694190979, "learning_rate": 9.155590886641174e-05, "loss": 1.8934, "step": 1807 }, { "epoch": 0.18780513140126726, "grad_norm": 0.39728519320487976, "learning_rate": 9.154683307653162e-05, "loss": 1.8035, "step": 1808 }, { "epoch": 0.1879090059208476, "grad_norm": 0.42086923122406006, "learning_rate": 9.153775286223787e-05, "loss": 1.9612, "step": 1809 }, { "epoch": 0.18801288044042797, "grad_norm": 0.40852025151252747, "learning_rate": 9.152866822449743e-05, "loss": 1.6764, "step": 1810 }, { "epoch": 0.18811675496000832, "grad_norm": 0.3808266222476959, "learning_rate": 9.151957916427778e-05, "loss": 1.7292, "step": 1811 }, { "epoch": 0.18822062947958865, "grad_norm": 0.4031354784965515, "learning_rate": 9.151048568254682e-05, "loss": 1.9939, "step": 1812 }, { "epoch": 0.188324503999169, "grad_norm": 0.3865586817264557, "learning_rate": 9.15013877802729e-05, "loss": 1.7508, "step": 1813 }, { "epoch": 0.18842837851874936, "grad_norm": 0.35779932141304016, "learning_rate": 9.149228545842492e-05, "loss": 1.766, "step": 1814 }, { "epoch": 0.1885322530383297, "grad_norm": 0.35968711972236633, "learning_rate": 9.148317871797219e-05, "loss": 1.8, "step": 1815 }, { "epoch": 0.18863612755791004, "grad_norm": 0.44821977615356445, "learning_rate": 9.14740675598845e-05, "loss": 1.8142, "step": 1816 }, { "epoch": 0.1887400020774904, "grad_norm": 0.433288037776947, "learning_rate": 9.146495198513214e-05, "loss": 1.9191, "step": 1817 }, { "epoch": 0.18884387659707075, "grad_norm": 0.39990267157554626, "learning_rate": 9.145583199468582e-05, "loss": 1.8575, "step": 1818 }, { "epoch": 0.18894775111665107, "grad_norm": 0.37523946166038513, "learning_rate": 9.144670758951677e-05, "loss": 1.799, "step": 1819 }, { "epoch": 0.18905162563623143, "grad_norm": 0.3855798542499542, "learning_rate": 9.143757877059665e-05, "loss": 1.8317, "step": 1820 }, { "epoch": 0.18915550015581178, "grad_norm": 0.35304591059684753, "learning_rate": 9.142844553889762e-05, "loss": 1.5719, "step": 1821 }, { "epoch": 0.18925937467539214, "grad_norm": 0.3850051760673523, "learning_rate": 9.141930789539229e-05, "loss": 1.7996, "step": 1822 }, { "epoch": 0.18936324919497247, "grad_norm": 0.3510708212852478, "learning_rate": 9.141016584105376e-05, "loss": 1.5699, "step": 1823 }, { "epoch": 0.18946712371455282, "grad_norm": 0.3610669672489166, "learning_rate": 9.140101937685558e-05, "loss": 1.7303, "step": 1824 }, { "epoch": 0.18957099823413318, "grad_norm": 0.34818708896636963, "learning_rate": 9.139186850377177e-05, "loss": 1.7005, "step": 1825 }, { "epoch": 0.1896748727537135, "grad_norm": 0.35581493377685547, "learning_rate": 9.138271322277685e-05, "loss": 1.6708, "step": 1826 }, { "epoch": 0.18977874727329386, "grad_norm": 0.35102319717407227, "learning_rate": 9.137355353484573e-05, "loss": 1.8142, "step": 1827 }, { "epoch": 0.1898826217928742, "grad_norm": 0.3373032212257385, "learning_rate": 9.136438944095392e-05, "loss": 1.741, "step": 1828 }, { "epoch": 0.18998649631245457, "grad_norm": 0.37855619192123413, "learning_rate": 9.135522094207729e-05, "loss": 1.7995, "step": 1829 }, { "epoch": 0.1900903708320349, "grad_norm": 0.35194653272628784, "learning_rate": 9.13460480391922e-05, "loss": 1.5989, "step": 1830 }, { "epoch": 0.19019424535161525, "grad_norm": 0.3600010871887207, "learning_rate": 9.13368707332755e-05, "loss": 1.8601, "step": 1831 }, { "epoch": 0.1902981198711956, "grad_norm": 0.37306833267211914, "learning_rate": 9.13276890253045e-05, "loss": 1.86, "step": 1832 }, { "epoch": 0.19040199439077593, "grad_norm": 0.355836421251297, "learning_rate": 9.131850291625702e-05, "loss": 1.6658, "step": 1833 }, { "epoch": 0.19050586891035629, "grad_norm": 0.4272918999195099, "learning_rate": 9.130931240711126e-05, "loss": 1.9726, "step": 1834 }, { "epoch": 0.19060974342993664, "grad_norm": 0.4324601888656616, "learning_rate": 9.130011749884595e-05, "loss": 1.851, "step": 1835 }, { "epoch": 0.190713617949517, "grad_norm": 0.36520737409591675, "learning_rate": 9.129091819244028e-05, "loss": 1.8169, "step": 1836 }, { "epoch": 0.19081749246909732, "grad_norm": 0.4533885419368744, "learning_rate": 9.12817144888739e-05, "loss": 2.0705, "step": 1837 }, { "epoch": 0.19092136698867768, "grad_norm": 0.3979325592517853, "learning_rate": 9.127250638912695e-05, "loss": 1.7838, "step": 1838 }, { "epoch": 0.19102524150825803, "grad_norm": 0.3549365997314453, "learning_rate": 9.126329389418e-05, "loss": 1.8239, "step": 1839 }, { "epoch": 0.19112911602783836, "grad_norm": 0.3781158924102783, "learning_rate": 9.125407700501412e-05, "loss": 1.6151, "step": 1840 }, { "epoch": 0.1912329905474187, "grad_norm": 0.388790100812912, "learning_rate": 9.124485572261082e-05, "loss": 1.9243, "step": 1841 }, { "epoch": 0.19133686506699907, "grad_norm": 0.4158736765384674, "learning_rate": 9.123563004795213e-05, "loss": 1.9735, "step": 1842 }, { "epoch": 0.19144073958657942, "grad_norm": 0.39373651146888733, "learning_rate": 9.122639998202047e-05, "loss": 1.8774, "step": 1843 }, { "epoch": 0.19154461410615975, "grad_norm": 0.3896943926811218, "learning_rate": 9.12171655257988e-05, "loss": 1.8337, "step": 1844 }, { "epoch": 0.1916484886257401, "grad_norm": 0.3980332016944885, "learning_rate": 9.120792668027049e-05, "loss": 1.8081, "step": 1845 }, { "epoch": 0.19175236314532046, "grad_norm": 0.4258505403995514, "learning_rate": 9.119868344641944e-05, "loss": 1.8096, "step": 1846 }, { "epoch": 0.19185623766490079, "grad_norm": 0.3683486580848694, "learning_rate": 9.118943582522996e-05, "loss": 1.826, "step": 1847 }, { "epoch": 0.19196011218448114, "grad_norm": 0.3608863949775696, "learning_rate": 9.118018381768687e-05, "loss": 1.6942, "step": 1848 }, { "epoch": 0.1920639867040615, "grad_norm": 0.35737764835357666, "learning_rate": 9.11709274247754e-05, "loss": 1.8741, "step": 1849 }, { "epoch": 0.19216786122364185, "grad_norm": 0.36617088317871094, "learning_rate": 9.116166664748131e-05, "loss": 1.7608, "step": 1850 }, { "epoch": 0.19227173574322218, "grad_norm": 0.40373408794403076, "learning_rate": 9.11524014867908e-05, "loss": 1.715, "step": 1851 }, { "epoch": 0.19237561026280253, "grad_norm": 0.36378052830696106, "learning_rate": 9.114313194369053e-05, "loss": 1.5889, "step": 1852 }, { "epoch": 0.1924794847823829, "grad_norm": 0.37960201501846313, "learning_rate": 9.113385801916765e-05, "loss": 1.6605, "step": 1853 }, { "epoch": 0.19258335930196324, "grad_norm": 0.3682219982147217, "learning_rate": 9.112457971420973e-05, "loss": 1.8351, "step": 1854 }, { "epoch": 0.19268723382154357, "grad_norm": 0.38188329339027405, "learning_rate": 9.111529702980486e-05, "loss": 1.8845, "step": 1855 }, { "epoch": 0.19279110834112392, "grad_norm": 0.39885640144348145, "learning_rate": 9.110600996694157e-05, "loss": 1.8629, "step": 1856 }, { "epoch": 0.19289498286070428, "grad_norm": 0.36927422881126404, "learning_rate": 9.109671852660885e-05, "loss": 1.718, "step": 1857 }, { "epoch": 0.1929988573802846, "grad_norm": 0.35329553484916687, "learning_rate": 9.108742270979618e-05, "loss": 1.6209, "step": 1858 }, { "epoch": 0.19310273189986496, "grad_norm": 0.40359756350517273, "learning_rate": 9.107812251749349e-05, "loss": 1.936, "step": 1859 }, { "epoch": 0.19320660641944531, "grad_norm": 0.3696520924568176, "learning_rate": 9.106881795069116e-05, "loss": 1.8329, "step": 1860 }, { "epoch": 0.19331048093902567, "grad_norm": 0.38084790110588074, "learning_rate": 9.105950901038008e-05, "loss": 1.8072, "step": 1861 }, { "epoch": 0.193414355458606, "grad_norm": 0.4722822606563568, "learning_rate": 9.105019569755157e-05, "loss": 1.8516, "step": 1862 }, { "epoch": 0.19351822997818635, "grad_norm": 0.3589233160018921, "learning_rate": 9.104087801319741e-05, "loss": 1.7609, "step": 1863 }, { "epoch": 0.1936221044977667, "grad_norm": 0.4372257590293884, "learning_rate": 9.103155595830988e-05, "loss": 1.9742, "step": 1864 }, { "epoch": 0.19372597901734703, "grad_norm": 0.3735795021057129, "learning_rate": 9.10222295338817e-05, "loss": 1.71, "step": 1865 }, { "epoch": 0.1938298535369274, "grad_norm": 0.35734692215919495, "learning_rate": 9.101289874090605e-05, "loss": 1.6526, "step": 1866 }, { "epoch": 0.19393372805650774, "grad_norm": 0.36955031752586365, "learning_rate": 9.10035635803766e-05, "loss": 1.787, "step": 1867 }, { "epoch": 0.1940376025760881, "grad_norm": 0.3762964904308319, "learning_rate": 9.099422405328748e-05, "loss": 1.6645, "step": 1868 }, { "epoch": 0.19414147709566842, "grad_norm": 0.40508878231048584, "learning_rate": 9.098488016063325e-05, "loss": 1.8264, "step": 1869 }, { "epoch": 0.19424535161524878, "grad_norm": 0.43582627177238464, "learning_rate": 9.097553190340899e-05, "loss": 2.0092, "step": 1870 }, { "epoch": 0.19434922613482913, "grad_norm": 0.379056453704834, "learning_rate": 9.09661792826102e-05, "loss": 1.7522, "step": 1871 }, { "epoch": 0.19445310065440946, "grad_norm": 0.35009434819221497, "learning_rate": 9.095682229923287e-05, "loss": 1.7053, "step": 1872 }, { "epoch": 0.19455697517398982, "grad_norm": 0.3844042420387268, "learning_rate": 9.094746095427342e-05, "loss": 1.7458, "step": 1873 }, { "epoch": 0.19466084969357017, "grad_norm": 0.3690871000289917, "learning_rate": 9.09380952487288e-05, "loss": 1.7563, "step": 1874 }, { "epoch": 0.19476472421315053, "grad_norm": 0.3871258497238159, "learning_rate": 9.092872518359637e-05, "loss": 1.9391, "step": 1875 }, { "epoch": 0.19486859873273085, "grad_norm": 0.3885674774646759, "learning_rate": 9.091935075987395e-05, "loss": 1.7193, "step": 1876 }, { "epoch": 0.1949724732523112, "grad_norm": 0.37015703320503235, "learning_rate": 9.090997197855986e-05, "loss": 1.7996, "step": 1877 }, { "epoch": 0.19507634777189156, "grad_norm": 0.3506444990634918, "learning_rate": 9.090058884065287e-05, "loss": 1.5394, "step": 1878 }, { "epoch": 0.1951802222914719, "grad_norm": 0.350041002035141, "learning_rate": 9.089120134715222e-05, "loss": 1.8134, "step": 1879 }, { "epoch": 0.19528409681105224, "grad_norm": 0.35820406675338745, "learning_rate": 9.088180949905756e-05, "loss": 1.7417, "step": 1880 }, { "epoch": 0.1953879713306326, "grad_norm": 0.37615320086479187, "learning_rate": 9.087241329736909e-05, "loss": 1.7162, "step": 1881 }, { "epoch": 0.19549184585021295, "grad_norm": 0.35000884532928467, "learning_rate": 9.086301274308742e-05, "loss": 1.6961, "step": 1882 }, { "epoch": 0.19559572036979328, "grad_norm": 0.3935372233390808, "learning_rate": 9.085360783721363e-05, "loss": 1.6883, "step": 1883 }, { "epoch": 0.19569959488937363, "grad_norm": 0.37884512543678284, "learning_rate": 9.084419858074928e-05, "loss": 1.756, "step": 1884 }, { "epoch": 0.195803469408954, "grad_norm": 0.33219221234321594, "learning_rate": 9.08347849746964e-05, "loss": 1.5695, "step": 1885 }, { "epoch": 0.19590734392853434, "grad_norm": 0.3523041903972626, "learning_rate": 9.082536702005741e-05, "loss": 1.7402, "step": 1886 }, { "epoch": 0.19601121844811467, "grad_norm": 0.38876470923423767, "learning_rate": 9.08159447178353e-05, "loss": 1.6901, "step": 1887 }, { "epoch": 0.19611509296769503, "grad_norm": 0.35667043924331665, "learning_rate": 9.080651806903346e-05, "loss": 1.6679, "step": 1888 }, { "epoch": 0.19621896748727538, "grad_norm": 0.3476751446723938, "learning_rate": 9.079708707465575e-05, "loss": 1.6656, "step": 1889 }, { "epoch": 0.1963228420068557, "grad_norm": 0.3749213218688965, "learning_rate": 9.078765173570648e-05, "loss": 1.6932, "step": 1890 }, { "epoch": 0.19642671652643606, "grad_norm": 0.38287267088890076, "learning_rate": 9.077821205319045e-05, "loss": 1.8927, "step": 1891 }, { "epoch": 0.19653059104601642, "grad_norm": 0.3563637137413025, "learning_rate": 9.076876802811293e-05, "loss": 1.7309, "step": 1892 }, { "epoch": 0.19663446556559677, "grad_norm": 0.4049491882324219, "learning_rate": 9.075931966147964e-05, "loss": 1.8704, "step": 1893 }, { "epoch": 0.1967383400851771, "grad_norm": 0.38649019598960876, "learning_rate": 9.074986695429673e-05, "loss": 1.7995, "step": 1894 }, { "epoch": 0.19684221460475745, "grad_norm": 0.38346853852272034, "learning_rate": 9.074040990757085e-05, "loss": 1.7893, "step": 1895 }, { "epoch": 0.1969460891243378, "grad_norm": 0.39437806606292725, "learning_rate": 9.07309485223091e-05, "loss": 1.8473, "step": 1896 }, { "epoch": 0.19704996364391814, "grad_norm": 0.3689126670360565, "learning_rate": 9.072148279951905e-05, "loss": 1.6973, "step": 1897 }, { "epoch": 0.1971538381634985, "grad_norm": 0.37634965777397156, "learning_rate": 9.07120127402087e-05, "loss": 1.5333, "step": 1898 }, { "epoch": 0.19725771268307885, "grad_norm": 0.365343302488327, "learning_rate": 9.070253834538658e-05, "loss": 1.7896, "step": 1899 }, { "epoch": 0.1973615872026592, "grad_norm": 0.3354664444923401, "learning_rate": 9.069305961606162e-05, "loss": 1.5792, "step": 1900 }, { "epoch": 0.19746546172223953, "grad_norm": 0.4631555676460266, "learning_rate": 9.068357655324322e-05, "loss": 1.9998, "step": 1901 }, { "epoch": 0.19756933624181988, "grad_norm": 0.3880319893360138, "learning_rate": 9.067408915794126e-05, "loss": 1.6883, "step": 1902 }, { "epoch": 0.19767321076140024, "grad_norm": 0.3678785562515259, "learning_rate": 9.066459743116609e-05, "loss": 1.7639, "step": 1903 }, { "epoch": 0.19777708528098056, "grad_norm": 0.39961332082748413, "learning_rate": 9.065510137392848e-05, "loss": 1.8451, "step": 1904 }, { "epoch": 0.19788095980056092, "grad_norm": 0.4052257239818573, "learning_rate": 9.06456009872397e-05, "loss": 1.7716, "step": 1905 }, { "epoch": 0.19798483432014127, "grad_norm": 0.3587489128112793, "learning_rate": 9.063609627211145e-05, "loss": 1.7665, "step": 1906 }, { "epoch": 0.19808870883972163, "grad_norm": 0.36075523495674133, "learning_rate": 9.062658722955593e-05, "loss": 1.7998, "step": 1907 }, { "epoch": 0.19819258335930195, "grad_norm": 0.3680761754512787, "learning_rate": 9.061707386058577e-05, "loss": 1.7033, "step": 1908 }, { "epoch": 0.1982964578788823, "grad_norm": 0.37647727131843567, "learning_rate": 9.060755616621406e-05, "loss": 1.8304, "step": 1909 }, { "epoch": 0.19840033239846266, "grad_norm": 0.3684910237789154, "learning_rate": 9.059803414745438e-05, "loss": 1.6814, "step": 1910 }, { "epoch": 0.198504206918043, "grad_norm": 0.3947627544403076, "learning_rate": 9.058850780532074e-05, "loss": 1.8224, "step": 1911 }, { "epoch": 0.19860808143762335, "grad_norm": 0.38467562198638916, "learning_rate": 9.057897714082761e-05, "loss": 1.6889, "step": 1912 }, { "epoch": 0.1987119559572037, "grad_norm": 0.38776230812072754, "learning_rate": 9.056944215498994e-05, "loss": 1.5652, "step": 1913 }, { "epoch": 0.19881583047678406, "grad_norm": 0.36061033606529236, "learning_rate": 9.055990284882317e-05, "loss": 1.7888, "step": 1914 }, { "epoch": 0.19891970499636438, "grad_norm": 0.37597185373306274, "learning_rate": 9.05503592233431e-05, "loss": 1.8526, "step": 1915 }, { "epoch": 0.19902357951594474, "grad_norm": 0.39023831486701965, "learning_rate": 9.054081127956607e-05, "loss": 1.7178, "step": 1916 }, { "epoch": 0.1991274540355251, "grad_norm": 0.37832608819007874, "learning_rate": 9.053125901850888e-05, "loss": 1.7645, "step": 1917 }, { "epoch": 0.19923132855510542, "grad_norm": 0.3865016996860504, "learning_rate": 9.052170244118876e-05, "loss": 1.9984, "step": 1918 }, { "epoch": 0.19933520307468577, "grad_norm": 0.42755410075187683, "learning_rate": 9.05121415486234e-05, "loss": 1.7952, "step": 1919 }, { "epoch": 0.19943907759426613, "grad_norm": 0.3337387442588806, "learning_rate": 9.050257634183099e-05, "loss": 1.5986, "step": 1920 }, { "epoch": 0.19954295211384648, "grad_norm": 0.35701167583465576, "learning_rate": 9.049300682183012e-05, "loss": 1.5109, "step": 1921 }, { "epoch": 0.1996468266334268, "grad_norm": 0.3579074442386627, "learning_rate": 9.048343298963988e-05, "loss": 1.8023, "step": 1922 }, { "epoch": 0.19975070115300717, "grad_norm": 0.3522278666496277, "learning_rate": 9.047385484627982e-05, "loss": 1.8454, "step": 1923 }, { "epoch": 0.19985457567258752, "grad_norm": 0.35523468255996704, "learning_rate": 9.046427239276991e-05, "loss": 1.7483, "step": 1924 }, { "epoch": 0.19995845019216787, "grad_norm": 0.32691970467567444, "learning_rate": 9.045468563013064e-05, "loss": 1.6541, "step": 1925 }, { "epoch": 0.2000623247117482, "grad_norm": 0.3599613606929779, "learning_rate": 9.04450945593829e-05, "loss": 1.7776, "step": 1926 }, { "epoch": 0.20016619923132856, "grad_norm": 0.3572538495063782, "learning_rate": 9.043549918154808e-05, "loss": 1.7373, "step": 1927 }, { "epoch": 0.2002700737509089, "grad_norm": 0.3507987856864929, "learning_rate": 9.0425899497648e-05, "loss": 1.7154, "step": 1928 }, { "epoch": 0.20037394827048924, "grad_norm": 0.3993472456932068, "learning_rate": 9.041629550870496e-05, "loss": 1.8059, "step": 1929 }, { "epoch": 0.2004778227900696, "grad_norm": 0.34402957558631897, "learning_rate": 9.040668721574171e-05, "loss": 1.7075, "step": 1930 }, { "epoch": 0.20058169730964995, "grad_norm": 0.3825515806674957, "learning_rate": 9.039707461978146e-05, "loss": 1.5313, "step": 1931 }, { "epoch": 0.2006855718292303, "grad_norm": 0.3753582537174225, "learning_rate": 9.038745772184786e-05, "loss": 1.792, "step": 1932 }, { "epoch": 0.20078944634881063, "grad_norm": 0.39727622270584106, "learning_rate": 9.037783652296505e-05, "loss": 1.7264, "step": 1933 }, { "epoch": 0.20089332086839098, "grad_norm": 0.3653308153152466, "learning_rate": 9.036821102415762e-05, "loss": 1.6462, "step": 1934 }, { "epoch": 0.20099719538797134, "grad_norm": 0.3842056095600128, "learning_rate": 9.03585812264506e-05, "loss": 1.8057, "step": 1935 }, { "epoch": 0.20110106990755167, "grad_norm": 0.3687531054019928, "learning_rate": 9.034894713086947e-05, "loss": 1.6832, "step": 1936 }, { "epoch": 0.20120494442713202, "grad_norm": 0.4252033829689026, "learning_rate": 9.033930873844023e-05, "loss": 1.8803, "step": 1937 }, { "epoch": 0.20130881894671238, "grad_norm": 0.3911295235157013, "learning_rate": 9.032966605018924e-05, "loss": 1.8326, "step": 1938 }, { "epoch": 0.20141269346629273, "grad_norm": 0.3815619647502899, "learning_rate": 9.032001906714343e-05, "loss": 1.8654, "step": 1939 }, { "epoch": 0.20151656798587306, "grad_norm": 0.42655375599861145, "learning_rate": 9.031036779033006e-05, "loss": 1.8325, "step": 1940 }, { "epoch": 0.2016204425054534, "grad_norm": 0.3972933292388916, "learning_rate": 9.030071222077698e-05, "loss": 1.5501, "step": 1941 }, { "epoch": 0.20172431702503377, "grad_norm": 0.3478373885154724, "learning_rate": 9.02910523595124e-05, "loss": 1.5961, "step": 1942 }, { "epoch": 0.2018281915446141, "grad_norm": 0.36317178606987, "learning_rate": 9.028138820756504e-05, "loss": 1.7352, "step": 1943 }, { "epoch": 0.20193206606419445, "grad_norm": 0.36241620779037476, "learning_rate": 9.027171976596402e-05, "loss": 1.7462, "step": 1944 }, { "epoch": 0.2020359405837748, "grad_norm": 0.3782672584056854, "learning_rate": 9.026204703573899e-05, "loss": 1.7317, "step": 1945 }, { "epoch": 0.20213981510335516, "grad_norm": 0.3928276002407074, "learning_rate": 9.025237001791999e-05, "loss": 1.7494, "step": 1946 }, { "epoch": 0.20224368962293549, "grad_norm": 0.38330700993537903, "learning_rate": 9.024268871353758e-05, "loss": 1.6797, "step": 1947 }, { "epoch": 0.20234756414251584, "grad_norm": 0.38679707050323486, "learning_rate": 9.023300312362273e-05, "loss": 1.7813, "step": 1948 }, { "epoch": 0.2024514386620962, "grad_norm": 0.41488325595855713, "learning_rate": 9.022331324920687e-05, "loss": 1.8702, "step": 1949 }, { "epoch": 0.20255531318167652, "grad_norm": 0.39658311009407043, "learning_rate": 9.02136190913219e-05, "loss": 1.7838, "step": 1950 }, { "epoch": 0.20265918770125688, "grad_norm": 0.39057308435440063, "learning_rate": 9.020392065100018e-05, "loss": 1.814, "step": 1951 }, { "epoch": 0.20276306222083723, "grad_norm": 0.374887079000473, "learning_rate": 9.019421792927452e-05, "loss": 1.7414, "step": 1952 }, { "epoch": 0.20286693674041759, "grad_norm": 0.3583182990550995, "learning_rate": 9.018451092717816e-05, "loss": 1.6376, "step": 1953 }, { "epoch": 0.2029708112599979, "grad_norm": 0.35195374488830566, "learning_rate": 9.017479964574485e-05, "loss": 1.6618, "step": 1954 }, { "epoch": 0.20307468577957827, "grad_norm": 0.3840351104736328, "learning_rate": 9.016508408600875e-05, "loss": 1.6952, "step": 1955 }, { "epoch": 0.20317856029915862, "grad_norm": 0.37217509746551514, "learning_rate": 9.015536424900449e-05, "loss": 1.6792, "step": 1956 }, { "epoch": 0.20328243481873895, "grad_norm": 0.39523565769195557, "learning_rate": 9.014564013576716e-05, "loss": 1.7578, "step": 1957 }, { "epoch": 0.2033863093383193, "grad_norm": 0.38107234239578247, "learning_rate": 9.013591174733231e-05, "loss": 1.8505, "step": 1958 }, { "epoch": 0.20349018385789966, "grad_norm": 0.3982243835926056, "learning_rate": 9.012617908473593e-05, "loss": 1.9007, "step": 1959 }, { "epoch": 0.20359405837748001, "grad_norm": 0.3710818290710449, "learning_rate": 9.011644214901447e-05, "loss": 1.7955, "step": 1960 }, { "epoch": 0.20369793289706034, "grad_norm": 0.37171775102615356, "learning_rate": 9.010670094120485e-05, "loss": 1.8637, "step": 1961 }, { "epoch": 0.2038018074166407, "grad_norm": 0.3907235562801361, "learning_rate": 9.009695546234443e-05, "loss": 1.7001, "step": 1962 }, { "epoch": 0.20390568193622105, "grad_norm": 0.34530210494995117, "learning_rate": 9.0087205713471e-05, "loss": 1.6479, "step": 1963 }, { "epoch": 0.2040095564558014, "grad_norm": 0.37163665890693665, "learning_rate": 9.007745169562285e-05, "loss": 1.7622, "step": 1964 }, { "epoch": 0.20411343097538173, "grad_norm": 0.36681413650512695, "learning_rate": 9.006769340983873e-05, "loss": 1.9061, "step": 1965 }, { "epoch": 0.2042173054949621, "grad_norm": 0.370410680770874, "learning_rate": 9.00579308571578e-05, "loss": 1.823, "step": 1966 }, { "epoch": 0.20432118001454244, "grad_norm": 0.43219515681266785, "learning_rate": 9.004816403861969e-05, "loss": 1.9272, "step": 1967 }, { "epoch": 0.20442505453412277, "grad_norm": 0.3857935667037964, "learning_rate": 9.00383929552645e-05, "loss": 1.7746, "step": 1968 }, { "epoch": 0.20452892905370312, "grad_norm": 0.37894824147224426, "learning_rate": 9.002861760813278e-05, "loss": 1.7197, "step": 1969 }, { "epoch": 0.20463280357328348, "grad_norm": 0.35982224345207214, "learning_rate": 9.001883799826551e-05, "loss": 1.7175, "step": 1970 }, { "epoch": 0.20473667809286383, "grad_norm": 0.3776380717754364, "learning_rate": 9.000905412670415e-05, "loss": 1.7624, "step": 1971 }, { "epoch": 0.20484055261244416, "grad_norm": 0.38274866342544556, "learning_rate": 8.999926599449062e-05, "loss": 1.7201, "step": 1972 }, { "epoch": 0.20494442713202451, "grad_norm": 0.3876325488090515, "learning_rate": 8.998947360266726e-05, "loss": 1.8462, "step": 1973 }, { "epoch": 0.20504830165160487, "grad_norm": 0.3528856039047241, "learning_rate": 8.99796769522769e-05, "loss": 1.7983, "step": 1974 }, { "epoch": 0.2051521761711852, "grad_norm": 0.3806709945201874, "learning_rate": 8.996987604436279e-05, "loss": 2.0234, "step": 1975 }, { "epoch": 0.20525605069076555, "grad_norm": 0.3677120506763458, "learning_rate": 8.996007087996866e-05, "loss": 1.7771, "step": 1976 }, { "epoch": 0.2053599252103459, "grad_norm": 0.3892456591129303, "learning_rate": 8.995026146013867e-05, "loss": 1.6017, "step": 1977 }, { "epoch": 0.20546379972992626, "grad_norm": 0.4247187077999115, "learning_rate": 8.994044778591749e-05, "loss": 1.7722, "step": 1978 }, { "epoch": 0.2055676742495066, "grad_norm": 0.3515567183494568, "learning_rate": 8.993062985835013e-05, "loss": 1.4704, "step": 1979 }, { "epoch": 0.20567154876908694, "grad_norm": 0.3741825520992279, "learning_rate": 8.99208076784822e-05, "loss": 1.6708, "step": 1980 }, { "epoch": 0.2057754232886673, "grad_norm": 0.37131038308143616, "learning_rate": 8.991098124735961e-05, "loss": 1.7259, "step": 1981 }, { "epoch": 0.20587929780824762, "grad_norm": 0.3691762387752533, "learning_rate": 8.990115056602885e-05, "loss": 1.8153, "step": 1982 }, { "epoch": 0.20598317232782798, "grad_norm": 0.3380625545978546, "learning_rate": 8.989131563553679e-05, "loss": 1.7063, "step": 1983 }, { "epoch": 0.20608704684740833, "grad_norm": 0.4201214909553528, "learning_rate": 8.98814764569308e-05, "loss": 1.8642, "step": 1984 }, { "epoch": 0.2061909213669887, "grad_norm": 0.3742324709892273, "learning_rate": 8.987163303125863e-05, "loss": 1.6623, "step": 1985 }, { "epoch": 0.20629479588656902, "grad_norm": 0.3564354181289673, "learning_rate": 8.986178535956856e-05, "loss": 1.7477, "step": 1986 }, { "epoch": 0.20639867040614937, "grad_norm": 0.41312122344970703, "learning_rate": 8.985193344290929e-05, "loss": 1.901, "step": 1987 }, { "epoch": 0.20650254492572973, "grad_norm": 0.4109034538269043, "learning_rate": 8.984207728232995e-05, "loss": 1.7853, "step": 1988 }, { "epoch": 0.20660641944531005, "grad_norm": 0.383986234664917, "learning_rate": 8.983221687888017e-05, "loss": 1.8616, "step": 1989 }, { "epoch": 0.2067102939648904, "grad_norm": 0.38250911235809326, "learning_rate": 8.982235223360999e-05, "loss": 1.7691, "step": 1990 }, { "epoch": 0.20681416848447076, "grad_norm": 0.35506096482276917, "learning_rate": 8.981248334756994e-05, "loss": 1.8013, "step": 1991 }, { "epoch": 0.20691804300405112, "grad_norm": 0.33986783027648926, "learning_rate": 8.980261022181095e-05, "loss": 1.624, "step": 1992 }, { "epoch": 0.20702191752363144, "grad_norm": 0.35081538558006287, "learning_rate": 8.979273285738445e-05, "loss": 1.6776, "step": 1993 }, { "epoch": 0.2071257920432118, "grad_norm": 0.3631971478462219, "learning_rate": 8.978285125534229e-05, "loss": 1.5715, "step": 1994 }, { "epoch": 0.20722966656279215, "grad_norm": 0.38734301924705505, "learning_rate": 8.977296541673678e-05, "loss": 1.781, "step": 1995 }, { "epoch": 0.2073335410823725, "grad_norm": 0.40530499815940857, "learning_rate": 8.976307534262072e-05, "loss": 1.8291, "step": 1996 }, { "epoch": 0.20743741560195283, "grad_norm": 0.4037350118160248, "learning_rate": 8.975318103404728e-05, "loss": 1.9573, "step": 1997 }, { "epoch": 0.2075412901215332, "grad_norm": 0.3434680700302124, "learning_rate": 8.974328249207015e-05, "loss": 1.7654, "step": 1998 }, { "epoch": 0.20764516464111354, "grad_norm": 0.38100457191467285, "learning_rate": 8.973337971774344e-05, "loss": 1.7762, "step": 1999 }, { "epoch": 0.20774903916069387, "grad_norm": 0.3673883080482483, "learning_rate": 8.972347271212173e-05, "loss": 1.7563, "step": 2000 }, { "epoch": 0.20785291368027423, "grad_norm": 0.3593236207962036, "learning_rate": 8.971356147626004e-05, "loss": 1.6374, "step": 2001 }, { "epoch": 0.20795678819985458, "grad_norm": 0.4013366401195526, "learning_rate": 8.970364601121382e-05, "loss": 1.7564, "step": 2002 }, { "epoch": 0.20806066271943494, "grad_norm": 0.36467140913009644, "learning_rate": 8.9693726318039e-05, "loss": 1.7618, "step": 2003 }, { "epoch": 0.20816453723901526, "grad_norm": 0.36317500472068787, "learning_rate": 8.968380239779194e-05, "loss": 1.6911, "step": 2004 }, { "epoch": 0.20826841175859562, "grad_norm": 0.3494802713394165, "learning_rate": 8.96738742515295e-05, "loss": 1.7087, "step": 2005 }, { "epoch": 0.20837228627817597, "grad_norm": 0.3602224290370941, "learning_rate": 8.96639418803089e-05, "loss": 1.698, "step": 2006 }, { "epoch": 0.2084761607977563, "grad_norm": 0.3831143379211426, "learning_rate": 8.965400528518787e-05, "loss": 1.7733, "step": 2007 }, { "epoch": 0.20858003531733665, "grad_norm": 0.3766723871231079, "learning_rate": 8.964406446722459e-05, "loss": 1.7092, "step": 2008 }, { "epoch": 0.208683909836917, "grad_norm": 0.38016197085380554, "learning_rate": 8.96341194274777e-05, "loss": 1.6285, "step": 2009 }, { "epoch": 0.20878778435649736, "grad_norm": 0.3981560170650482, "learning_rate": 8.962417016700624e-05, "loss": 1.7449, "step": 2010 }, { "epoch": 0.2088916588760777, "grad_norm": 0.3760923445224762, "learning_rate": 8.961421668686974e-05, "loss": 1.7459, "step": 2011 }, { "epoch": 0.20899553339565805, "grad_norm": 0.3543214797973633, "learning_rate": 8.960425898812816e-05, "loss": 1.7329, "step": 2012 }, { "epoch": 0.2090994079152384, "grad_norm": 0.3547232747077942, "learning_rate": 8.959429707184192e-05, "loss": 1.7992, "step": 2013 }, { "epoch": 0.20920328243481873, "grad_norm": 0.43568694591522217, "learning_rate": 8.95843309390719e-05, "loss": 2.0488, "step": 2014 }, { "epoch": 0.20930715695439908, "grad_norm": 0.4254542291164398, "learning_rate": 8.95743605908794e-05, "loss": 1.796, "step": 2015 }, { "epoch": 0.20941103147397944, "grad_norm": 0.3894093632698059, "learning_rate": 8.956438602832619e-05, "loss": 1.8641, "step": 2016 }, { "epoch": 0.2095149059935598, "grad_norm": 0.383073091506958, "learning_rate": 8.955440725247448e-05, "loss": 1.7898, "step": 2017 }, { "epoch": 0.20961878051314012, "grad_norm": 0.37630245089530945, "learning_rate": 8.954442426438694e-05, "loss": 1.738, "step": 2018 }, { "epoch": 0.20972265503272047, "grad_norm": 0.3684820234775543, "learning_rate": 8.953443706512667e-05, "loss": 1.8819, "step": 2019 }, { "epoch": 0.20982652955230083, "grad_norm": 0.36796849966049194, "learning_rate": 8.952444565575723e-05, "loss": 1.7886, "step": 2020 }, { "epoch": 0.20993040407188115, "grad_norm": 0.35004639625549316, "learning_rate": 8.951445003734263e-05, "loss": 1.7872, "step": 2021 }, { "epoch": 0.2100342785914615, "grad_norm": 0.3540632724761963, "learning_rate": 8.950445021094733e-05, "loss": 1.6917, "step": 2022 }, { "epoch": 0.21013815311104186, "grad_norm": 0.3964502215385437, "learning_rate": 8.949444617763623e-05, "loss": 1.7487, "step": 2023 }, { "epoch": 0.21024202763062222, "grad_norm": 0.3707832098007202, "learning_rate": 8.948443793847467e-05, "loss": 1.8752, "step": 2024 }, { "epoch": 0.21034590215020255, "grad_norm": 0.3899782598018646, "learning_rate": 8.947442549452846e-05, "loss": 1.7619, "step": 2025 }, { "epoch": 0.2104497766697829, "grad_norm": 0.349178671836853, "learning_rate": 8.946440884686387e-05, "loss": 1.5852, "step": 2026 }, { "epoch": 0.21055365118936326, "grad_norm": 0.3928735554218292, "learning_rate": 8.945438799654756e-05, "loss": 1.8503, "step": 2027 }, { "epoch": 0.21065752570894358, "grad_norm": 0.3914186954498291, "learning_rate": 8.944436294464669e-05, "loss": 1.6764, "step": 2028 }, { "epoch": 0.21076140022852394, "grad_norm": 0.370089054107666, "learning_rate": 8.943433369222882e-05, "loss": 1.6127, "step": 2029 }, { "epoch": 0.2108652747481043, "grad_norm": 0.35939261317253113, "learning_rate": 8.942430024036203e-05, "loss": 1.7161, "step": 2030 }, { "epoch": 0.21096914926768465, "grad_norm": 0.36686834692955017, "learning_rate": 8.941426259011478e-05, "loss": 1.8648, "step": 2031 }, { "epoch": 0.21107302378726497, "grad_norm": 0.3749295473098755, "learning_rate": 8.940422074255602e-05, "loss": 1.7433, "step": 2032 }, { "epoch": 0.21117689830684533, "grad_norm": 0.36779385805130005, "learning_rate": 8.93941746987551e-05, "loss": 1.778, "step": 2033 }, { "epoch": 0.21128077282642568, "grad_norm": 0.3840864896774292, "learning_rate": 8.938412445978186e-05, "loss": 1.7633, "step": 2034 }, { "epoch": 0.21138464734600604, "grad_norm": 0.3512420356273651, "learning_rate": 8.937407002670659e-05, "loss": 1.6726, "step": 2035 }, { "epoch": 0.21148852186558637, "grad_norm": 0.3997120261192322, "learning_rate": 8.936401140059998e-05, "loss": 1.7913, "step": 2036 }, { "epoch": 0.21159239638516672, "grad_norm": 0.4093436300754547, "learning_rate": 8.935394858253321e-05, "loss": 1.7161, "step": 2037 }, { "epoch": 0.21169627090474707, "grad_norm": 0.3791636824607849, "learning_rate": 8.934388157357788e-05, "loss": 1.9097, "step": 2038 }, { "epoch": 0.2118001454243274, "grad_norm": 0.39315351843833923, "learning_rate": 8.933381037480606e-05, "loss": 1.9521, "step": 2039 }, { "epoch": 0.21190401994390776, "grad_norm": 0.3954058289527893, "learning_rate": 8.932373498729025e-05, "loss": 1.7008, "step": 2040 }, { "epoch": 0.2120078944634881, "grad_norm": 0.44486141204833984, "learning_rate": 8.931365541210342e-05, "loss": 1.8255, "step": 2041 }, { "epoch": 0.21211176898306847, "grad_norm": 0.4147118330001831, "learning_rate": 8.930357165031893e-05, "loss": 1.9377, "step": 2042 }, { "epoch": 0.2122156435026488, "grad_norm": 0.4043649733066559, "learning_rate": 8.929348370301063e-05, "loss": 1.8809, "step": 2043 }, { "epoch": 0.21231951802222915, "grad_norm": 0.37084200978279114, "learning_rate": 8.928339157125283e-05, "loss": 1.7093, "step": 2044 }, { "epoch": 0.2124233925418095, "grad_norm": 0.33814066648483276, "learning_rate": 8.927329525612025e-05, "loss": 1.7248, "step": 2045 }, { "epoch": 0.21252726706138983, "grad_norm": 0.3607522249221802, "learning_rate": 8.926319475868807e-05, "loss": 1.6424, "step": 2046 }, { "epoch": 0.21263114158097018, "grad_norm": 0.35810667276382446, "learning_rate": 8.925309008003192e-05, "loss": 1.6919, "step": 2047 }, { "epoch": 0.21273501610055054, "grad_norm": 0.3557438850402832, "learning_rate": 8.924298122122783e-05, "loss": 1.8568, "step": 2048 }, { "epoch": 0.2128388906201309, "grad_norm": 0.37123459577560425, "learning_rate": 8.923286818335237e-05, "loss": 1.8214, "step": 2049 }, { "epoch": 0.21294276513971122, "grad_norm": 0.42854633927345276, "learning_rate": 8.922275096748247e-05, "loss": 2.1206, "step": 2050 }, { "epoch": 0.21304663965929158, "grad_norm": 0.4002090394496918, "learning_rate": 8.921262957469554e-05, "loss": 1.8579, "step": 2051 }, { "epoch": 0.21315051417887193, "grad_norm": 0.34181690216064453, "learning_rate": 8.920250400606944e-05, "loss": 1.7962, "step": 2052 }, { "epoch": 0.21325438869845226, "grad_norm": 0.35831955075263977, "learning_rate": 8.919237426268246e-05, "loss": 1.6304, "step": 2053 }, { "epoch": 0.2133582632180326, "grad_norm": 0.4297682046890259, "learning_rate": 8.918224034561333e-05, "loss": 1.9758, "step": 2054 }, { "epoch": 0.21346213773761297, "grad_norm": 0.3815910518169403, "learning_rate": 8.917210225594122e-05, "loss": 1.7476, "step": 2055 }, { "epoch": 0.21356601225719332, "grad_norm": 0.3674294650554657, "learning_rate": 8.916195999474579e-05, "loss": 1.7741, "step": 2056 }, { "epoch": 0.21366988677677365, "grad_norm": 0.3721073567867279, "learning_rate": 8.915181356310709e-05, "loss": 1.7451, "step": 2057 }, { "epoch": 0.213773761296354, "grad_norm": 0.379859983921051, "learning_rate": 8.914166296210564e-05, "loss": 1.9665, "step": 2058 }, { "epoch": 0.21387763581593436, "grad_norm": 0.3658583462238312, "learning_rate": 8.913150819282242e-05, "loss": 1.6796, "step": 2059 }, { "epoch": 0.21398151033551469, "grad_norm": 0.3702607750892639, "learning_rate": 8.912134925633878e-05, "loss": 1.7107, "step": 2060 }, { "epoch": 0.21408538485509504, "grad_norm": 0.37530508637428284, "learning_rate": 8.911118615373661e-05, "loss": 1.6896, "step": 2061 }, { "epoch": 0.2141892593746754, "grad_norm": 0.3804415464401245, "learning_rate": 8.910101888609821e-05, "loss": 1.8049, "step": 2062 }, { "epoch": 0.21429313389425575, "grad_norm": 0.36921432614326477, "learning_rate": 8.909084745450628e-05, "loss": 1.7652, "step": 2063 }, { "epoch": 0.21439700841383608, "grad_norm": 0.38994041085243225, "learning_rate": 8.908067186004405e-05, "loss": 1.701, "step": 2064 }, { "epoch": 0.21450088293341643, "grad_norm": 0.3705255389213562, "learning_rate": 8.907049210379508e-05, "loss": 1.781, "step": 2065 }, { "epoch": 0.21460475745299679, "grad_norm": 0.3754466772079468, "learning_rate": 8.906030818684348e-05, "loss": 1.6878, "step": 2066 }, { "epoch": 0.2147086319725771, "grad_norm": 0.3889169991016388, "learning_rate": 8.905012011027372e-05, "loss": 1.6964, "step": 2067 }, { "epoch": 0.21481250649215747, "grad_norm": 0.3674185872077942, "learning_rate": 8.903992787517078e-05, "loss": 1.2466, "step": 2068 }, { "epoch": 0.21491638101173782, "grad_norm": 0.37006455659866333, "learning_rate": 8.902973148262004e-05, "loss": 1.7607, "step": 2069 }, { "epoch": 0.21502025553131818, "grad_norm": 0.3947128355503082, "learning_rate": 8.901953093370734e-05, "loss": 1.8514, "step": 2070 }, { "epoch": 0.2151241300508985, "grad_norm": 0.36148831248283386, "learning_rate": 8.900932622951897e-05, "loss": 1.5947, "step": 2071 }, { "epoch": 0.21522800457047886, "grad_norm": 0.34942540526390076, "learning_rate": 8.899911737114163e-05, "loss": 1.7449, "step": 2072 }, { "epoch": 0.21533187909005921, "grad_norm": 0.3522939383983612, "learning_rate": 8.898890435966251e-05, "loss": 1.7266, "step": 2073 }, { "epoch": 0.21543575360963957, "grad_norm": 0.4223746955394745, "learning_rate": 8.897868719616919e-05, "loss": 1.8562, "step": 2074 }, { "epoch": 0.2155396281292199, "grad_norm": 0.3850705921649933, "learning_rate": 8.896846588174973e-05, "loss": 1.8528, "step": 2075 }, { "epoch": 0.21564350264880025, "grad_norm": 0.391237735748291, "learning_rate": 8.89582404174926e-05, "loss": 1.6268, "step": 2076 }, { "epoch": 0.2157473771683806, "grad_norm": 0.40240150690078735, "learning_rate": 8.89480108044868e-05, "loss": 1.7256, "step": 2077 }, { "epoch": 0.21585125168796093, "grad_norm": 0.400774210691452, "learning_rate": 8.893777704382163e-05, "loss": 1.7654, "step": 2078 }, { "epoch": 0.2159551262075413, "grad_norm": 0.4116402268409729, "learning_rate": 8.892753913658691e-05, "loss": 1.5765, "step": 2079 }, { "epoch": 0.21605900072712164, "grad_norm": 0.3588711619377136, "learning_rate": 8.891729708387294e-05, "loss": 1.7399, "step": 2080 }, { "epoch": 0.216162875246702, "grad_norm": 0.40262454748153687, "learning_rate": 8.890705088677039e-05, "loss": 1.8868, "step": 2081 }, { "epoch": 0.21626674976628232, "grad_norm": 0.38147544860839844, "learning_rate": 8.889680054637042e-05, "loss": 1.907, "step": 2082 }, { "epoch": 0.21637062428586268, "grad_norm": 0.40591171383857727, "learning_rate": 8.888654606376459e-05, "loss": 1.9046, "step": 2083 }, { "epoch": 0.21647449880544303, "grad_norm": 0.3788270652294159, "learning_rate": 8.887628744004493e-05, "loss": 1.7253, "step": 2084 }, { "epoch": 0.21657837332502336, "grad_norm": 0.38902533054351807, "learning_rate": 8.88660246763039e-05, "loss": 1.7522, "step": 2085 }, { "epoch": 0.21668224784460371, "grad_norm": 0.3673681318759918, "learning_rate": 8.885575777363442e-05, "loss": 1.7231, "step": 2086 }, { "epoch": 0.21678612236418407, "grad_norm": 0.33182385563850403, "learning_rate": 8.884548673312981e-05, "loss": 1.6142, "step": 2087 }, { "epoch": 0.21688999688376442, "grad_norm": 0.38326361775398254, "learning_rate": 8.883521155588388e-05, "loss": 1.8635, "step": 2088 }, { "epoch": 0.21699387140334475, "grad_norm": 0.38930484652519226, "learning_rate": 8.882493224299084e-05, "loss": 1.6453, "step": 2089 }, { "epoch": 0.2170977459229251, "grad_norm": 0.3819931447505951, "learning_rate": 8.881464879554536e-05, "loss": 1.821, "step": 2090 }, { "epoch": 0.21720162044250546, "grad_norm": 0.3670012354850769, "learning_rate": 8.880436121464255e-05, "loss": 1.8536, "step": 2091 }, { "epoch": 0.2173054949620858, "grad_norm": 0.3817991614341736, "learning_rate": 8.879406950137796e-05, "loss": 1.7896, "step": 2092 }, { "epoch": 0.21740936948166614, "grad_norm": 0.44066059589385986, "learning_rate": 8.878377365684758e-05, "loss": 1.7763, "step": 2093 }, { "epoch": 0.2175132440012465, "grad_norm": 0.362020343542099, "learning_rate": 8.877347368214783e-05, "loss": 1.7594, "step": 2094 }, { "epoch": 0.21761711852082685, "grad_norm": 0.385455459356308, "learning_rate": 8.876316957837556e-05, "loss": 1.9007, "step": 2095 }, { "epoch": 0.21772099304040718, "grad_norm": 0.3932596743106842, "learning_rate": 8.87528613466281e-05, "loss": 1.6164, "step": 2096 }, { "epoch": 0.21782486755998753, "grad_norm": 0.3642088770866394, "learning_rate": 8.874254898800321e-05, "loss": 1.626, "step": 2097 }, { "epoch": 0.2179287420795679, "grad_norm": 0.4024796783924103, "learning_rate": 8.873223250359903e-05, "loss": 1.8004, "step": 2098 }, { "epoch": 0.21803261659914822, "grad_norm": 0.3582911491394043, "learning_rate": 8.872191189451422e-05, "loss": 1.7169, "step": 2099 }, { "epoch": 0.21813649111872857, "grad_norm": 0.40865403413772583, "learning_rate": 8.871158716184784e-05, "loss": 1.6472, "step": 2100 }, { "epoch": 0.21824036563830893, "grad_norm": 0.39730289578437805, "learning_rate": 8.87012583066994e-05, "loss": 1.6177, "step": 2101 }, { "epoch": 0.21834424015788928, "grad_norm": 0.3871819078922272, "learning_rate": 8.869092533016882e-05, "loss": 1.6924, "step": 2102 }, { "epoch": 0.2184481146774696, "grad_norm": 0.377941757440567, "learning_rate": 8.86805882333565e-05, "loss": 1.8299, "step": 2103 }, { "epoch": 0.21855198919704996, "grad_norm": 0.397905558347702, "learning_rate": 8.867024701736325e-05, "loss": 1.8411, "step": 2104 }, { "epoch": 0.21865586371663032, "grad_norm": 0.3686088025569916, "learning_rate": 8.865990168329031e-05, "loss": 1.6618, "step": 2105 }, { "epoch": 0.21875973823621067, "grad_norm": 0.394415020942688, "learning_rate": 8.864955223223943e-05, "loss": 1.8495, "step": 2106 }, { "epoch": 0.218863612755791, "grad_norm": 0.3614397943019867, "learning_rate": 8.863919866531269e-05, "loss": 1.7565, "step": 2107 }, { "epoch": 0.21896748727537135, "grad_norm": 0.3643103837966919, "learning_rate": 8.862884098361271e-05, "loss": 1.7943, "step": 2108 }, { "epoch": 0.2190713617949517, "grad_norm": 0.37451231479644775, "learning_rate": 8.861847918824248e-05, "loss": 1.5722, "step": 2109 }, { "epoch": 0.21917523631453203, "grad_norm": 0.40897324681282043, "learning_rate": 8.860811328030545e-05, "loss": 1.8011, "step": 2110 }, { "epoch": 0.2192791108341124, "grad_norm": 0.38252517580986023, "learning_rate": 8.85977432609055e-05, "loss": 1.7692, "step": 2111 }, { "epoch": 0.21938298535369274, "grad_norm": 0.3585284352302551, "learning_rate": 8.858736913114698e-05, "loss": 1.6858, "step": 2112 }, { "epoch": 0.2194868598732731, "grad_norm": 0.40061917901039124, "learning_rate": 8.857699089213462e-05, "loss": 1.5681, "step": 2113 }, { "epoch": 0.21959073439285343, "grad_norm": 0.38758501410484314, "learning_rate": 8.856660854497367e-05, "loss": 1.7354, "step": 2114 }, { "epoch": 0.21969460891243378, "grad_norm": 0.37637653946876526, "learning_rate": 8.855622209076972e-05, "loss": 1.6739, "step": 2115 }, { "epoch": 0.21979848343201414, "grad_norm": 0.4087028205394745, "learning_rate": 8.854583153062887e-05, "loss": 2.0526, "step": 2116 }, { "epoch": 0.21990235795159446, "grad_norm": 0.3950178623199463, "learning_rate": 8.853543686565765e-05, "loss": 1.8204, "step": 2117 }, { "epoch": 0.22000623247117482, "grad_norm": 0.3853364586830139, "learning_rate": 8.852503809696297e-05, "loss": 1.7236, "step": 2118 }, { "epoch": 0.22011010699075517, "grad_norm": 0.36730098724365234, "learning_rate": 8.851463522565226e-05, "loss": 1.7896, "step": 2119 }, { "epoch": 0.22021398151033553, "grad_norm": 0.3799102008342743, "learning_rate": 8.85042282528333e-05, "loss": 1.8652, "step": 2120 }, { "epoch": 0.22031785602991585, "grad_norm": 0.37538158893585205, "learning_rate": 8.84938171796144e-05, "loss": 1.8323, "step": 2121 }, { "epoch": 0.2204217305494962, "grad_norm": 0.3702995777130127, "learning_rate": 8.848340200710421e-05, "loss": 1.6752, "step": 2122 }, { "epoch": 0.22052560506907656, "grad_norm": 0.3841915726661682, "learning_rate": 8.847298273641192e-05, "loss": 1.8893, "step": 2123 }, { "epoch": 0.2206294795886569, "grad_norm": 0.3636316955089569, "learning_rate": 8.846255936864703e-05, "loss": 1.6729, "step": 2124 }, { "epoch": 0.22073335410823725, "grad_norm": 0.3771231770515442, "learning_rate": 8.845213190491962e-05, "loss": 1.6048, "step": 2125 }, { "epoch": 0.2208372286278176, "grad_norm": 0.4207725524902344, "learning_rate": 8.844170034634007e-05, "loss": 1.9401, "step": 2126 }, { "epoch": 0.22094110314739795, "grad_norm": 0.42666593194007874, "learning_rate": 8.843126469401931e-05, "loss": 1.9608, "step": 2127 }, { "epoch": 0.22104497766697828, "grad_norm": 0.3714624345302582, "learning_rate": 8.842082494906863e-05, "loss": 1.7066, "step": 2128 }, { "epoch": 0.22114885218655864, "grad_norm": 0.41800081729888916, "learning_rate": 8.841038111259979e-05, "loss": 1.8553, "step": 2129 }, { "epoch": 0.221252726706139, "grad_norm": 0.34756773710250854, "learning_rate": 8.839993318572497e-05, "loss": 1.6992, "step": 2130 }, { "epoch": 0.22135660122571932, "grad_norm": 0.3820962905883789, "learning_rate": 8.83894811695568e-05, "loss": 1.9582, "step": 2131 }, { "epoch": 0.22146047574529967, "grad_norm": 0.3578221797943115, "learning_rate": 8.837902506520835e-05, "loss": 1.7399, "step": 2132 }, { "epoch": 0.22156435026488003, "grad_norm": 0.3778248727321625, "learning_rate": 8.836856487379306e-05, "loss": 1.6818, "step": 2133 }, { "epoch": 0.22166822478446038, "grad_norm": 0.36357828974723816, "learning_rate": 8.835810059642493e-05, "loss": 1.6495, "step": 2134 }, { "epoch": 0.2217720993040407, "grad_norm": 0.384027898311615, "learning_rate": 8.834763223421828e-05, "loss": 1.7112, "step": 2135 }, { "epoch": 0.22187597382362106, "grad_norm": 0.3461948037147522, "learning_rate": 8.833715978828793e-05, "loss": 1.713, "step": 2136 }, { "epoch": 0.22197984834320142, "grad_norm": 0.38593146204948425, "learning_rate": 8.83266832597491e-05, "loss": 1.6994, "step": 2137 }, { "epoch": 0.22208372286278175, "grad_norm": 0.346113383769989, "learning_rate": 8.831620264971745e-05, "loss": 1.6051, "step": 2138 }, { "epoch": 0.2221875973823621, "grad_norm": 0.3933405876159668, "learning_rate": 8.830571795930911e-05, "loss": 1.678, "step": 2139 }, { "epoch": 0.22229147190194246, "grad_norm": 0.34719663858413696, "learning_rate": 8.829522918964058e-05, "loss": 1.6881, "step": 2140 }, { "epoch": 0.2223953464215228, "grad_norm": 0.3760213553905487, "learning_rate": 8.828473634182888e-05, "loss": 1.8456, "step": 2141 }, { "epoch": 0.22249922094110314, "grad_norm": 0.3988967537879944, "learning_rate": 8.827423941699137e-05, "loss": 1.8562, "step": 2142 }, { "epoch": 0.2226030954606835, "grad_norm": 0.40549615025520325, "learning_rate": 8.826373841624593e-05, "loss": 1.7444, "step": 2143 }, { "epoch": 0.22270696998026385, "grad_norm": 0.37362509965896606, "learning_rate": 8.825323334071083e-05, "loss": 1.7901, "step": 2144 }, { "epoch": 0.2228108444998442, "grad_norm": 0.34235090017318726, "learning_rate": 8.824272419150475e-05, "loss": 1.7461, "step": 2145 }, { "epoch": 0.22291471901942453, "grad_norm": 0.3918958306312561, "learning_rate": 8.823221096974684e-05, "loss": 1.782, "step": 2146 }, { "epoch": 0.22301859353900488, "grad_norm": 0.37165430188179016, "learning_rate": 8.822169367655669e-05, "loss": 1.803, "step": 2147 }, { "epoch": 0.22312246805858524, "grad_norm": 0.3669700026512146, "learning_rate": 8.821117231305431e-05, "loss": 1.8741, "step": 2148 }, { "epoch": 0.22322634257816557, "grad_norm": 0.3532780408859253, "learning_rate": 8.820064688036014e-05, "loss": 1.7279, "step": 2149 }, { "epoch": 0.22333021709774592, "grad_norm": 0.3830491006374359, "learning_rate": 8.819011737959504e-05, "loss": 1.8127, "step": 2150 }, { "epoch": 0.22343409161732627, "grad_norm": 0.36550477147102356, "learning_rate": 8.817958381188033e-05, "loss": 1.7985, "step": 2151 }, { "epoch": 0.22353796613690663, "grad_norm": 0.3734526038169861, "learning_rate": 8.816904617833778e-05, "loss": 1.7096, "step": 2152 }, { "epoch": 0.22364184065648696, "grad_norm": 0.36251428723335266, "learning_rate": 8.815850448008953e-05, "loss": 1.7936, "step": 2153 }, { "epoch": 0.2237457151760673, "grad_norm": 0.3801165223121643, "learning_rate": 8.81479587182582e-05, "loss": 1.7445, "step": 2154 }, { "epoch": 0.22384958969564767, "grad_norm": 0.38031628727912903, "learning_rate": 8.813740889396682e-05, "loss": 1.8105, "step": 2155 }, { "epoch": 0.223953464215228, "grad_norm": 0.42004433274269104, "learning_rate": 8.81268550083389e-05, "loss": 1.7913, "step": 2156 }, { "epoch": 0.22405733873480835, "grad_norm": 0.38428744673728943, "learning_rate": 8.81162970624983e-05, "loss": 1.805, "step": 2157 }, { "epoch": 0.2241612132543887, "grad_norm": 0.4019632935523987, "learning_rate": 8.81057350575694e-05, "loss": 1.6997, "step": 2158 }, { "epoch": 0.22426508777396906, "grad_norm": 0.3467335104942322, "learning_rate": 8.809516899467694e-05, "loss": 1.5612, "step": 2159 }, { "epoch": 0.22436896229354938, "grad_norm": 0.36768898367881775, "learning_rate": 8.808459887494615e-05, "loss": 1.6928, "step": 2160 }, { "epoch": 0.22447283681312974, "grad_norm": 0.41053932905197144, "learning_rate": 8.807402469950263e-05, "loss": 1.7605, "step": 2161 }, { "epoch": 0.2245767113327101, "grad_norm": 0.36271244287490845, "learning_rate": 8.806344646947249e-05, "loss": 1.7773, "step": 2162 }, { "epoch": 0.22468058585229042, "grad_norm": 0.38008955121040344, "learning_rate": 8.80528641859822e-05, "loss": 1.8473, "step": 2163 }, { "epoch": 0.22478446037187078, "grad_norm": 0.382253497838974, "learning_rate": 8.804227785015869e-05, "loss": 1.686, "step": 2164 }, { "epoch": 0.22488833489145113, "grad_norm": 0.3781398832798004, "learning_rate": 8.803168746312934e-05, "loss": 1.7894, "step": 2165 }, { "epoch": 0.22499220941103149, "grad_norm": 0.39482182264328003, "learning_rate": 8.802109302602193e-05, "loss": 1.8895, "step": 2166 }, { "epoch": 0.2250960839306118, "grad_norm": 0.40680569410324097, "learning_rate": 8.801049453996468e-05, "loss": 2.0375, "step": 2167 }, { "epoch": 0.22519995845019217, "grad_norm": 0.3483993113040924, "learning_rate": 8.799989200608627e-05, "loss": 1.6489, "step": 2168 }, { "epoch": 0.22530383296977252, "grad_norm": 0.3627791702747345, "learning_rate": 8.798928542551576e-05, "loss": 1.8188, "step": 2169 }, { "epoch": 0.22540770748935285, "grad_norm": 0.3764524459838867, "learning_rate": 8.797867479938269e-05, "loss": 1.9716, "step": 2170 }, { "epoch": 0.2255115820089332, "grad_norm": 0.3761730492115021, "learning_rate": 8.796806012881699e-05, "loss": 1.857, "step": 2171 }, { "epoch": 0.22561545652851356, "grad_norm": 0.3673245906829834, "learning_rate": 8.795744141494905e-05, "loss": 1.6663, "step": 2172 }, { "epoch": 0.2257193310480939, "grad_norm": 0.3899873197078705, "learning_rate": 8.794681865890968e-05, "loss": 1.7631, "step": 2173 }, { "epoch": 0.22582320556767424, "grad_norm": 0.36863207817077637, "learning_rate": 8.793619186183011e-05, "loss": 1.8269, "step": 2174 }, { "epoch": 0.2259270800872546, "grad_norm": 0.36547142267227173, "learning_rate": 8.792556102484204e-05, "loss": 1.6963, "step": 2175 }, { "epoch": 0.22603095460683495, "grad_norm": 0.34882575273513794, "learning_rate": 8.791492614907754e-05, "loss": 1.6336, "step": 2176 }, { "epoch": 0.22613482912641528, "grad_norm": 0.38748350739479065, "learning_rate": 8.790428723566915e-05, "loss": 1.8569, "step": 2177 }, { "epoch": 0.22623870364599563, "grad_norm": 0.362943559885025, "learning_rate": 8.789364428574984e-05, "loss": 1.7161, "step": 2178 }, { "epoch": 0.22634257816557599, "grad_norm": 0.37946629524230957, "learning_rate": 8.788299730045299e-05, "loss": 1.8211, "step": 2179 }, { "epoch": 0.22644645268515634, "grad_norm": 0.3608231842517853, "learning_rate": 8.787234628091243e-05, "loss": 1.7704, "step": 2180 }, { "epoch": 0.22655032720473667, "grad_norm": 0.38280248641967773, "learning_rate": 8.786169122826242e-05, "loss": 1.7376, "step": 2181 }, { "epoch": 0.22665420172431702, "grad_norm": 0.36834195256233215, "learning_rate": 8.785103214363761e-05, "loss": 1.6503, "step": 2182 }, { "epoch": 0.22675807624389738, "grad_norm": 0.3779491186141968, "learning_rate": 8.784036902817312e-05, "loss": 1.7421, "step": 2183 }, { "epoch": 0.22686195076347773, "grad_norm": 0.40714192390441895, "learning_rate": 8.78297018830045e-05, "loss": 1.7828, "step": 2184 }, { "epoch": 0.22696582528305806, "grad_norm": 0.36981451511383057, "learning_rate": 8.781903070926774e-05, "loss": 1.7687, "step": 2185 }, { "epoch": 0.22706969980263841, "grad_norm": 0.37374863028526306, "learning_rate": 8.780835550809918e-05, "loss": 1.7469, "step": 2186 }, { "epoch": 0.22717357432221877, "grad_norm": 0.3481188416481018, "learning_rate": 8.779767628063567e-05, "loss": 1.5268, "step": 2187 }, { "epoch": 0.2272774488417991, "grad_norm": 0.352603942155838, "learning_rate": 8.77869930280145e-05, "loss": 1.5966, "step": 2188 }, { "epoch": 0.22738132336137945, "grad_norm": 0.38702160120010376, "learning_rate": 8.77763057513733e-05, "loss": 1.7469, "step": 2189 }, { "epoch": 0.2274851978809598, "grad_norm": 0.37263423204421997, "learning_rate": 8.77656144518502e-05, "loss": 1.6986, "step": 2190 }, { "epoch": 0.22758907240054016, "grad_norm": 0.3658749759197235, "learning_rate": 8.775491913058376e-05, "loss": 1.7425, "step": 2191 }, { "epoch": 0.2276929469201205, "grad_norm": 0.38823366165161133, "learning_rate": 8.774421978871292e-05, "loss": 1.7665, "step": 2192 }, { "epoch": 0.22779682143970084, "grad_norm": 0.35970309376716614, "learning_rate": 8.773351642737708e-05, "loss": 1.6914, "step": 2193 }, { "epoch": 0.2279006959592812, "grad_norm": 0.3693389892578125, "learning_rate": 8.772280904771608e-05, "loss": 1.8031, "step": 2194 }, { "epoch": 0.22800457047886152, "grad_norm": 0.34769538044929504, "learning_rate": 8.771209765087018e-05, "loss": 1.7248, "step": 2195 }, { "epoch": 0.22810844499844188, "grad_norm": 0.37290412187576294, "learning_rate": 8.770138223798003e-05, "loss": 1.7994, "step": 2196 }, { "epoch": 0.22821231951802223, "grad_norm": 0.37960392236709595, "learning_rate": 8.769066281018676e-05, "loss": 1.8668, "step": 2197 }, { "epoch": 0.2283161940376026, "grad_norm": 0.3551698625087738, "learning_rate": 8.76799393686319e-05, "loss": 1.7928, "step": 2198 }, { "epoch": 0.22842006855718291, "grad_norm": 0.3406142294406891, "learning_rate": 8.766921191445742e-05, "loss": 1.6981, "step": 2199 }, { "epoch": 0.22852394307676327, "grad_norm": 0.410206139087677, "learning_rate": 8.765848044880569e-05, "loss": 1.634, "step": 2200 }, { "epoch": 0.22862781759634362, "grad_norm": 0.42800506949424744, "learning_rate": 8.764774497281954e-05, "loss": 1.7564, "step": 2201 }, { "epoch": 0.22873169211592395, "grad_norm": 0.39173564314842224, "learning_rate": 8.763700548764223e-05, "loss": 1.8504, "step": 2202 }, { "epoch": 0.2288355666355043, "grad_norm": 0.35888996720314026, "learning_rate": 8.76262619944174e-05, "loss": 1.5756, "step": 2203 }, { "epoch": 0.22893944115508466, "grad_norm": 0.4852285087108612, "learning_rate": 8.761551449428918e-05, "loss": 1.9804, "step": 2204 }, { "epoch": 0.22904331567466502, "grad_norm": 0.4047568440437317, "learning_rate": 8.760476298840206e-05, "loss": 1.81, "step": 2205 }, { "epoch": 0.22914719019424534, "grad_norm": 0.3965972065925598, "learning_rate": 8.7594007477901e-05, "loss": 1.8159, "step": 2206 }, { "epoch": 0.2292510647138257, "grad_norm": 0.4031708538532257, "learning_rate": 8.758324796393142e-05, "loss": 1.8237, "step": 2207 }, { "epoch": 0.22935493923340605, "grad_norm": 0.3491591215133667, "learning_rate": 8.757248444763908e-05, "loss": 1.6837, "step": 2208 }, { "epoch": 0.22945881375298638, "grad_norm": 0.36505648493766785, "learning_rate": 8.756171693017022e-05, "loss": 1.6408, "step": 2209 }, { "epoch": 0.22956268827256673, "grad_norm": 0.394775927066803, "learning_rate": 8.755094541267149e-05, "loss": 1.8816, "step": 2210 }, { "epoch": 0.2296665627921471, "grad_norm": 0.4164559543132782, "learning_rate": 8.754016989629e-05, "loss": 1.9372, "step": 2211 }, { "epoch": 0.22977043731172744, "grad_norm": 0.3899494707584381, "learning_rate": 8.752939038217322e-05, "loss": 1.7356, "step": 2212 }, { "epoch": 0.22987431183130777, "grad_norm": 0.36787334084510803, "learning_rate": 8.751860687146912e-05, "loss": 1.7804, "step": 2213 }, { "epoch": 0.22997818635088813, "grad_norm": 0.3581325113773346, "learning_rate": 8.750781936532604e-05, "loss": 1.825, "step": 2214 }, { "epoch": 0.23008206087046848, "grad_norm": 0.3675483465194702, "learning_rate": 8.749702786489277e-05, "loss": 1.7208, "step": 2215 }, { "epoch": 0.23018593539004883, "grad_norm": 0.38968801498413086, "learning_rate": 8.748623237131853e-05, "loss": 1.8447, "step": 2216 }, { "epoch": 0.23028980990962916, "grad_norm": 0.3647545576095581, "learning_rate": 8.747543288575293e-05, "loss": 1.6169, "step": 2217 }, { "epoch": 0.23039368442920952, "grad_norm": 0.3703446388244629, "learning_rate": 8.746462940934605e-05, "loss": 1.6316, "step": 2218 }, { "epoch": 0.23049755894878987, "grad_norm": 0.3884913921356201, "learning_rate": 8.745382194324839e-05, "loss": 1.7359, "step": 2219 }, { "epoch": 0.2306014334683702, "grad_norm": 0.39596831798553467, "learning_rate": 8.744301048861083e-05, "loss": 1.7425, "step": 2220 }, { "epoch": 0.23070530798795055, "grad_norm": 0.41258224844932556, "learning_rate": 8.743219504658472e-05, "loss": 1.8832, "step": 2221 }, { "epoch": 0.2308091825075309, "grad_norm": 0.3803333342075348, "learning_rate": 8.742137561832182e-05, "loss": 1.8591, "step": 2222 }, { "epoch": 0.23091305702711126, "grad_norm": 0.3818683326244354, "learning_rate": 8.741055220497431e-05, "loss": 1.9393, "step": 2223 }, { "epoch": 0.2310169315466916, "grad_norm": 0.3759542405605316, "learning_rate": 8.739972480769481e-05, "loss": 1.8439, "step": 2224 }, { "epoch": 0.23112080606627194, "grad_norm": 0.3411445617675781, "learning_rate": 8.738889342763635e-05, "loss": 1.7198, "step": 2225 }, { "epoch": 0.2312246805858523, "grad_norm": 0.3545193076133728, "learning_rate": 8.737805806595239e-05, "loss": 1.6465, "step": 2226 }, { "epoch": 0.23132855510543263, "grad_norm": 0.3674876093864441, "learning_rate": 8.73672187237968e-05, "loss": 1.7859, "step": 2227 }, { "epoch": 0.23143242962501298, "grad_norm": 0.3644886612892151, "learning_rate": 8.735637540232389e-05, "loss": 1.7008, "step": 2228 }, { "epoch": 0.23153630414459334, "grad_norm": 0.3750406503677368, "learning_rate": 8.734552810268838e-05, "loss": 1.6847, "step": 2229 }, { "epoch": 0.2316401786641737, "grad_norm": 0.3923105299472809, "learning_rate": 8.733467682604545e-05, "loss": 1.7455, "step": 2230 }, { "epoch": 0.23174405318375402, "grad_norm": 0.3961966931819916, "learning_rate": 8.732382157355066e-05, "loss": 1.6726, "step": 2231 }, { "epoch": 0.23184792770333437, "grad_norm": 0.4062844514846802, "learning_rate": 8.731296234636e-05, "loss": 1.9202, "step": 2232 }, { "epoch": 0.23195180222291473, "grad_norm": 0.4073842167854309, "learning_rate": 8.73020991456299e-05, "loss": 1.9388, "step": 2233 }, { "epoch": 0.23205567674249505, "grad_norm": 0.38866791129112244, "learning_rate": 8.72912319725172e-05, "loss": 1.6893, "step": 2234 }, { "epoch": 0.2321595512620754, "grad_norm": 0.3830127716064453, "learning_rate": 8.72803608281792e-05, "loss": 1.812, "step": 2235 }, { "epoch": 0.23226342578165576, "grad_norm": 0.4291350245475769, "learning_rate": 8.726948571377356e-05, "loss": 1.8426, "step": 2236 }, { "epoch": 0.23236730030123612, "grad_norm": 0.36664703488349915, "learning_rate": 8.72586066304584e-05, "loss": 1.6714, "step": 2237 }, { "epoch": 0.23247117482081645, "grad_norm": 0.34283211827278137, "learning_rate": 8.724772357939229e-05, "loss": 1.7595, "step": 2238 }, { "epoch": 0.2325750493403968, "grad_norm": 0.3679027557373047, "learning_rate": 8.723683656173413e-05, "loss": 1.6493, "step": 2239 }, { "epoch": 0.23267892385997715, "grad_norm": 0.3953079879283905, "learning_rate": 8.722594557864335e-05, "loss": 1.7885, "step": 2240 }, { "epoch": 0.23278279837955748, "grad_norm": 0.37095022201538086, "learning_rate": 8.721505063127972e-05, "loss": 1.6947, "step": 2241 }, { "epoch": 0.23288667289913784, "grad_norm": 0.38997137546539307, "learning_rate": 8.72041517208035e-05, "loss": 1.8158, "step": 2242 }, { "epoch": 0.2329905474187182, "grad_norm": 0.36092621088027954, "learning_rate": 8.719324884837531e-05, "loss": 1.7928, "step": 2243 }, { "epoch": 0.23309442193829855, "grad_norm": 0.3828462064266205, "learning_rate": 8.718234201515627e-05, "loss": 1.7782, "step": 2244 }, { "epoch": 0.23319829645787887, "grad_norm": 0.35145506262779236, "learning_rate": 8.717143122230782e-05, "loss": 1.6527, "step": 2245 }, { "epoch": 0.23330217097745923, "grad_norm": 0.3912096321582794, "learning_rate": 8.71605164709919e-05, "loss": 1.8666, "step": 2246 }, { "epoch": 0.23340604549703958, "grad_norm": 0.3545803725719452, "learning_rate": 8.714959776237083e-05, "loss": 1.7043, "step": 2247 }, { "epoch": 0.2335099200166199, "grad_norm": 0.3926384449005127, "learning_rate": 8.713867509760738e-05, "loss": 2.007, "step": 2248 }, { "epoch": 0.23361379453620026, "grad_norm": 0.3713902533054352, "learning_rate": 8.712774847786471e-05, "loss": 1.7151, "step": 2249 }, { "epoch": 0.23371766905578062, "grad_norm": 0.39191189408302307, "learning_rate": 8.711681790430645e-05, "loss": 1.8144, "step": 2250 }, { "epoch": 0.23382154357536097, "grad_norm": 0.40152493119239807, "learning_rate": 8.710588337809662e-05, "loss": 1.9487, "step": 2251 }, { "epoch": 0.2339254180949413, "grad_norm": 0.3732980489730835, "learning_rate": 8.709494490039963e-05, "loss": 1.6245, "step": 2252 }, { "epoch": 0.23402929261452166, "grad_norm": 0.3951932191848755, "learning_rate": 8.708400247238035e-05, "loss": 1.8078, "step": 2253 }, { "epoch": 0.234133167134102, "grad_norm": 0.3723011910915375, "learning_rate": 8.707305609520408e-05, "loss": 1.7107, "step": 2254 }, { "epoch": 0.23423704165368237, "grad_norm": 0.3788382411003113, "learning_rate": 8.706210577003653e-05, "loss": 1.6238, "step": 2255 }, { "epoch": 0.2343409161732627, "grad_norm": 0.3792799115180969, "learning_rate": 8.705115149804381e-05, "loss": 1.7069, "step": 2256 }, { "epoch": 0.23444479069284305, "grad_norm": 0.3782130181789398, "learning_rate": 8.704019328039244e-05, "loss": 1.6198, "step": 2257 }, { "epoch": 0.2345486652124234, "grad_norm": 0.3798910975456238, "learning_rate": 8.702923111824943e-05, "loss": 1.8176, "step": 2258 }, { "epoch": 0.23465253973200373, "grad_norm": 0.38398462533950806, "learning_rate": 8.701826501278216e-05, "loss": 1.7607, "step": 2259 }, { "epoch": 0.23475641425158408, "grad_norm": 0.3783421814441681, "learning_rate": 8.70072949651584e-05, "loss": 1.765, "step": 2260 }, { "epoch": 0.23486028877116444, "grad_norm": 0.34123924374580383, "learning_rate": 8.69963209765464e-05, "loss": 1.6595, "step": 2261 }, { "epoch": 0.2349641632907448, "grad_norm": 0.33665284514427185, "learning_rate": 8.698534304811478e-05, "loss": 1.4557, "step": 2262 }, { "epoch": 0.23506803781032512, "grad_norm": 0.37269553542137146, "learning_rate": 8.697436118103264e-05, "loss": 1.8497, "step": 2263 }, { "epoch": 0.23517191232990547, "grad_norm": 0.38616353273391724, "learning_rate": 8.696337537646944e-05, "loss": 1.7943, "step": 2264 }, { "epoch": 0.23527578684948583, "grad_norm": 0.4167322814464569, "learning_rate": 8.695238563559509e-05, "loss": 1.9579, "step": 2265 }, { "epoch": 0.23537966136906616, "grad_norm": 0.38582777976989746, "learning_rate": 8.694139195957991e-05, "loss": 1.7782, "step": 2266 }, { "epoch": 0.2354835358886465, "grad_norm": 0.3635927438735962, "learning_rate": 8.693039434959464e-05, "loss": 1.7276, "step": 2267 }, { "epoch": 0.23558741040822687, "grad_norm": 0.42506465315818787, "learning_rate": 8.691939280681045e-05, "loss": 1.818, "step": 2268 }, { "epoch": 0.23569128492780722, "grad_norm": 0.38665080070495605, "learning_rate": 8.69083873323989e-05, "loss": 1.7603, "step": 2269 }, { "epoch": 0.23579515944738755, "grad_norm": 0.3853725790977478, "learning_rate": 8.689737792753198e-05, "loss": 1.6039, "step": 2270 }, { "epoch": 0.2358990339669679, "grad_norm": 0.37736237049102783, "learning_rate": 8.688636459338215e-05, "loss": 1.8416, "step": 2271 }, { "epoch": 0.23600290848654826, "grad_norm": 0.4537546634674072, "learning_rate": 8.68753473311222e-05, "loss": 1.6987, "step": 2272 }, { "epoch": 0.23610678300612858, "grad_norm": 0.3903726041316986, "learning_rate": 8.686432614192538e-05, "loss": 1.9472, "step": 2273 }, { "epoch": 0.23621065752570894, "grad_norm": 0.41961470246315, "learning_rate": 8.68533010269654e-05, "loss": 1.8414, "step": 2274 }, { "epoch": 0.2363145320452893, "grad_norm": 0.36322250962257385, "learning_rate": 8.684227198741633e-05, "loss": 1.5978, "step": 2275 }, { "epoch": 0.23641840656486965, "grad_norm": 0.4004223644733429, "learning_rate": 8.683123902445267e-05, "loss": 1.7981, "step": 2276 }, { "epoch": 0.23652228108444998, "grad_norm": 0.38842740654945374, "learning_rate": 8.682020213924935e-05, "loss": 1.7098, "step": 2277 }, { "epoch": 0.23662615560403033, "grad_norm": 0.3592800199985504, "learning_rate": 8.680916133298171e-05, "loss": 1.661, "step": 2278 }, { "epoch": 0.23673003012361069, "grad_norm": 0.357117623090744, "learning_rate": 8.67981166068255e-05, "loss": 1.5888, "step": 2279 }, { "epoch": 0.236833904643191, "grad_norm": 0.3849972188472748, "learning_rate": 8.678706796195692e-05, "loss": 1.7733, "step": 2280 }, { "epoch": 0.23693777916277137, "grad_norm": 0.36867091059684753, "learning_rate": 8.677601539955256e-05, "loss": 1.8096, "step": 2281 }, { "epoch": 0.23704165368235172, "grad_norm": 0.3629715144634247, "learning_rate": 8.676495892078941e-05, "loss": 1.6919, "step": 2282 }, { "epoch": 0.23714552820193208, "grad_norm": 0.4076031446456909, "learning_rate": 8.675389852684492e-05, "loss": 1.9165, "step": 2283 }, { "epoch": 0.2372494027215124, "grad_norm": 0.37175339460372925, "learning_rate": 8.674283421889691e-05, "loss": 1.6767, "step": 2284 }, { "epoch": 0.23735327724109276, "grad_norm": 0.3934227526187897, "learning_rate": 8.673176599812368e-05, "loss": 1.7891, "step": 2285 }, { "epoch": 0.2374571517606731, "grad_norm": 0.4014788269996643, "learning_rate": 8.672069386570389e-05, "loss": 1.9999, "step": 2286 }, { "epoch": 0.23756102628025344, "grad_norm": 0.35122254490852356, "learning_rate": 8.670961782281664e-05, "loss": 1.8249, "step": 2287 }, { "epoch": 0.2376649007998338, "grad_norm": 0.36434510350227356, "learning_rate": 8.669853787064142e-05, "loss": 1.8532, "step": 2288 }, { "epoch": 0.23776877531941415, "grad_norm": 0.3808654546737671, "learning_rate": 8.668745401035818e-05, "loss": 1.7474, "step": 2289 }, { "epoch": 0.2378726498389945, "grad_norm": 0.3531155288219452, "learning_rate": 8.667636624314725e-05, "loss": 1.8941, "step": 2290 }, { "epoch": 0.23797652435857483, "grad_norm": 0.472049742937088, "learning_rate": 8.666527457018943e-05, "loss": 1.985, "step": 2291 }, { "epoch": 0.23808039887815519, "grad_norm": 0.39368936419487, "learning_rate": 8.665417899266586e-05, "loss": 1.6889, "step": 2292 }, { "epoch": 0.23818427339773554, "grad_norm": 0.3707980811595917, "learning_rate": 8.664307951175814e-05, "loss": 1.5088, "step": 2293 }, { "epoch": 0.2382881479173159, "grad_norm": 0.3548358380794525, "learning_rate": 8.663197612864827e-05, "loss": 1.7288, "step": 2294 }, { "epoch": 0.23839202243689622, "grad_norm": 0.47809383273124695, "learning_rate": 8.662086884451869e-05, "loss": 1.8472, "step": 2295 }, { "epoch": 0.23849589695647658, "grad_norm": 0.4353974163532257, "learning_rate": 8.660975766055224e-05, "loss": 1.8897, "step": 2296 }, { "epoch": 0.23859977147605693, "grad_norm": 0.4466110169887543, "learning_rate": 8.659864257793215e-05, "loss": 1.9391, "step": 2297 }, { "epoch": 0.23870364599563726, "grad_norm": 0.39143791794776917, "learning_rate": 8.65875235978421e-05, "loss": 1.7464, "step": 2298 }, { "epoch": 0.23880752051521761, "grad_norm": 0.38728442788124084, "learning_rate": 8.65764007214662e-05, "loss": 1.7393, "step": 2299 }, { "epoch": 0.23891139503479797, "grad_norm": 0.3674345910549164, "learning_rate": 8.656527394998892e-05, "loss": 1.6336, "step": 2300 }, { "epoch": 0.23901526955437832, "grad_norm": 0.38445186614990234, "learning_rate": 8.655414328459519e-05, "loss": 1.8982, "step": 2301 }, { "epoch": 0.23911914407395865, "grad_norm": 0.38056597113609314, "learning_rate": 8.654300872647033e-05, "loss": 1.6851, "step": 2302 }, { "epoch": 0.239223018593539, "grad_norm": 0.4110845625400543, "learning_rate": 8.65318702768001e-05, "loss": 1.8767, "step": 2303 }, { "epoch": 0.23932689311311936, "grad_norm": 0.38014286756515503, "learning_rate": 8.652072793677061e-05, "loss": 1.725, "step": 2304 }, { "epoch": 0.2394307676326997, "grad_norm": 0.39622190594673157, "learning_rate": 8.650958170756852e-05, "loss": 1.9115, "step": 2305 }, { "epoch": 0.23953464215228004, "grad_norm": 0.36890217661857605, "learning_rate": 8.649843159038071e-05, "loss": 1.698, "step": 2306 }, { "epoch": 0.2396385166718604, "grad_norm": 0.38787323236465454, "learning_rate": 8.648727758639467e-05, "loss": 1.8998, "step": 2307 }, { "epoch": 0.23974239119144075, "grad_norm": 0.38866347074508667, "learning_rate": 8.647611969679816e-05, "loss": 1.7095, "step": 2308 }, { "epoch": 0.23984626571102108, "grad_norm": 0.39878830313682556, "learning_rate": 8.646495792277943e-05, "loss": 1.9387, "step": 2309 }, { "epoch": 0.23995014023060143, "grad_norm": 0.3923484981060028, "learning_rate": 8.645379226552712e-05, "loss": 1.8302, "step": 2310 }, { "epoch": 0.2400540147501818, "grad_norm": 0.3727096617221832, "learning_rate": 8.644262272623029e-05, "loss": 1.6717, "step": 2311 }, { "epoch": 0.24015788926976211, "grad_norm": 0.3535787761211395, "learning_rate": 8.64314493060784e-05, "loss": 1.6446, "step": 2312 }, { "epoch": 0.24026176378934247, "grad_norm": 0.3555033504962921, "learning_rate": 8.642027200626135e-05, "loss": 1.7145, "step": 2313 }, { "epoch": 0.24036563830892282, "grad_norm": 0.3748113512992859, "learning_rate": 8.640909082796939e-05, "loss": 1.7144, "step": 2314 }, { "epoch": 0.24046951282850318, "grad_norm": 0.37921851873397827, "learning_rate": 8.639790577239328e-05, "loss": 1.8516, "step": 2315 }, { "epoch": 0.2405733873480835, "grad_norm": 0.36275529861450195, "learning_rate": 8.638671684072412e-05, "loss": 1.6144, "step": 2316 }, { "epoch": 0.24067726186766386, "grad_norm": 0.35582250356674194, "learning_rate": 8.637552403415343e-05, "loss": 1.7601, "step": 2317 }, { "epoch": 0.24078113638724422, "grad_norm": 0.3954647183418274, "learning_rate": 8.636432735387319e-05, "loss": 1.798, "step": 2318 }, { "epoch": 0.24088501090682454, "grad_norm": 0.36659133434295654, "learning_rate": 8.635312680107572e-05, "loss": 1.551, "step": 2319 }, { "epoch": 0.2409888854264049, "grad_norm": 0.35685068368911743, "learning_rate": 8.634192237695382e-05, "loss": 1.6926, "step": 2320 }, { "epoch": 0.24109275994598525, "grad_norm": 0.35937169194221497, "learning_rate": 8.633071408270065e-05, "loss": 1.604, "step": 2321 }, { "epoch": 0.2411966344655656, "grad_norm": 0.44768527150154114, "learning_rate": 8.631950191950983e-05, "loss": 2.0036, "step": 2322 }, { "epoch": 0.24130050898514593, "grad_norm": 0.37177640199661255, "learning_rate": 8.630828588857537e-05, "loss": 1.7145, "step": 2323 }, { "epoch": 0.2414043835047263, "grad_norm": 0.38756901025772095, "learning_rate": 8.629706599109169e-05, "loss": 1.7844, "step": 2324 }, { "epoch": 0.24150825802430664, "grad_norm": 0.37525761127471924, "learning_rate": 8.628584222825357e-05, "loss": 1.586, "step": 2325 }, { "epoch": 0.241612132543887, "grad_norm": 0.4448417127132416, "learning_rate": 8.627461460125632e-05, "loss": 1.6915, "step": 2326 }, { "epoch": 0.24171600706346733, "grad_norm": 0.39078620076179504, "learning_rate": 8.626338311129557e-05, "loss": 1.7567, "step": 2327 }, { "epoch": 0.24181988158304768, "grad_norm": 0.38780951499938965, "learning_rate": 8.625214775956737e-05, "loss": 1.6969, "step": 2328 }, { "epoch": 0.24192375610262803, "grad_norm": 0.3700442314147949, "learning_rate": 8.624090854726822e-05, "loss": 1.6198, "step": 2329 }, { "epoch": 0.24202763062220836, "grad_norm": 0.37854552268981934, "learning_rate": 8.622966547559499e-05, "loss": 1.7932, "step": 2330 }, { "epoch": 0.24213150514178872, "grad_norm": 0.49389779567718506, "learning_rate": 8.621841854574501e-05, "loss": 1.8561, "step": 2331 }, { "epoch": 0.24223537966136907, "grad_norm": 0.35576608777046204, "learning_rate": 8.620716775891595e-05, "loss": 1.5224, "step": 2332 }, { "epoch": 0.24233925418094943, "grad_norm": 0.37670719623565674, "learning_rate": 8.619591311630595e-05, "loss": 1.8392, "step": 2333 }, { "epoch": 0.24244312870052975, "grad_norm": 0.3652971386909485, "learning_rate": 8.618465461911355e-05, "loss": 1.7624, "step": 2334 }, { "epoch": 0.2425470032201101, "grad_norm": 0.3692745268344879, "learning_rate": 8.617339226853768e-05, "loss": 1.6901, "step": 2335 }, { "epoch": 0.24265087773969046, "grad_norm": 0.36297428607940674, "learning_rate": 8.61621260657777e-05, "loss": 1.8461, "step": 2336 }, { "epoch": 0.2427547522592708, "grad_norm": 0.37688395380973816, "learning_rate": 8.615085601203337e-05, "loss": 1.6562, "step": 2337 }, { "epoch": 0.24285862677885114, "grad_norm": 0.3952527642250061, "learning_rate": 8.613958210850485e-05, "loss": 1.7862, "step": 2338 }, { "epoch": 0.2429625012984315, "grad_norm": 0.35479936003685, "learning_rate": 8.612830435639275e-05, "loss": 1.9233, "step": 2339 }, { "epoch": 0.24306637581801185, "grad_norm": 0.40649887919425964, "learning_rate": 8.611702275689805e-05, "loss": 1.8637, "step": 2340 }, { "epoch": 0.24317025033759218, "grad_norm": 0.36132219433784485, "learning_rate": 8.610573731122214e-05, "loss": 1.7277, "step": 2341 }, { "epoch": 0.24327412485717254, "grad_norm": 0.35456719994544983, "learning_rate": 8.609444802056686e-05, "loss": 1.7141, "step": 2342 }, { "epoch": 0.2433779993767529, "grad_norm": 0.38762810826301575, "learning_rate": 8.608315488613439e-05, "loss": 1.7315, "step": 2343 }, { "epoch": 0.24348187389633322, "grad_norm": 0.3507632911205292, "learning_rate": 8.607185790912739e-05, "loss": 1.712, "step": 2344 }, { "epoch": 0.24358574841591357, "grad_norm": 0.39255252480506897, "learning_rate": 8.60605570907489e-05, "loss": 1.7046, "step": 2345 }, { "epoch": 0.24368962293549393, "grad_norm": 0.35092976689338684, "learning_rate": 8.604925243220235e-05, "loss": 1.8009, "step": 2346 }, { "epoch": 0.24379349745507428, "grad_norm": 0.37961629033088684, "learning_rate": 8.603794393469162e-05, "loss": 1.8602, "step": 2347 }, { "epoch": 0.2438973719746546, "grad_norm": 0.38758939504623413, "learning_rate": 8.602663159942098e-05, "loss": 1.8524, "step": 2348 }, { "epoch": 0.24400124649423496, "grad_norm": 0.3866504430770874, "learning_rate": 8.601531542759506e-05, "loss": 1.722, "step": 2349 }, { "epoch": 0.24410512101381532, "grad_norm": 0.3876989483833313, "learning_rate": 8.600399542041901e-05, "loss": 1.7846, "step": 2350 }, { "epoch": 0.24420899553339565, "grad_norm": 0.3649909198284149, "learning_rate": 8.599267157909827e-05, "loss": 1.7255, "step": 2351 }, { "epoch": 0.244312870052976, "grad_norm": 0.3867836892604828, "learning_rate": 8.598134390483879e-05, "loss": 1.8129, "step": 2352 }, { "epoch": 0.24441674457255635, "grad_norm": 0.40322503447532654, "learning_rate": 8.59700123988468e-05, "loss": 2.0753, "step": 2353 }, { "epoch": 0.2445206190921367, "grad_norm": 0.37695807218551636, "learning_rate": 8.595867706232911e-05, "loss": 1.8298, "step": 2354 }, { "epoch": 0.24462449361171704, "grad_norm": 0.3737477958202362, "learning_rate": 8.594733789649279e-05, "loss": 1.7315, "step": 2355 }, { "epoch": 0.2447283681312974, "grad_norm": 0.3679026663303375, "learning_rate": 8.593599490254538e-05, "loss": 1.6989, "step": 2356 }, { "epoch": 0.24483224265087775, "grad_norm": 0.3665264844894409, "learning_rate": 8.592464808169482e-05, "loss": 1.7326, "step": 2357 }, { "epoch": 0.24493611717045807, "grad_norm": 0.4071952700614929, "learning_rate": 8.591329743514947e-05, "loss": 1.774, "step": 2358 }, { "epoch": 0.24503999169003843, "grad_norm": 0.4158788025379181, "learning_rate": 8.590194296411806e-05, "loss": 2.0193, "step": 2359 }, { "epoch": 0.24514386620961878, "grad_norm": 0.3840784430503845, "learning_rate": 8.589058466980979e-05, "loss": 1.6699, "step": 2360 }, { "epoch": 0.24524774072919914, "grad_norm": 0.36834099888801575, "learning_rate": 8.587922255343421e-05, "loss": 1.6759, "step": 2361 }, { "epoch": 0.24535161524877946, "grad_norm": 0.35662734508514404, "learning_rate": 8.58678566162013e-05, "loss": 1.4792, "step": 2362 }, { "epoch": 0.24545548976835982, "grad_norm": 0.369302362203598, "learning_rate": 8.585648685932141e-05, "loss": 1.7362, "step": 2363 }, { "epoch": 0.24555936428794017, "grad_norm": 0.38440340757369995, "learning_rate": 8.58451132840054e-05, "loss": 1.8942, "step": 2364 }, { "epoch": 0.24566323880752053, "grad_norm": 0.3850240707397461, "learning_rate": 8.58337358914644e-05, "loss": 1.8449, "step": 2365 }, { "epoch": 0.24576711332710086, "grad_norm": 0.393264502286911, "learning_rate": 8.582235468291007e-05, "loss": 1.88, "step": 2366 }, { "epoch": 0.2458709878466812, "grad_norm": 0.40909549593925476, "learning_rate": 8.581096965955436e-05, "loss": 1.8228, "step": 2367 }, { "epoch": 0.24597486236626157, "grad_norm": 0.36390751600265503, "learning_rate": 8.579958082260973e-05, "loss": 1.7457, "step": 2368 }, { "epoch": 0.2460787368858419, "grad_norm": 0.3853476047515869, "learning_rate": 8.5788188173289e-05, "loss": 1.8158, "step": 2369 }, { "epoch": 0.24618261140542225, "grad_norm": 0.37678855657577515, "learning_rate": 8.577679171280537e-05, "loss": 1.8368, "step": 2370 }, { "epoch": 0.2462864859250026, "grad_norm": 0.3758280277252197, "learning_rate": 8.57653914423725e-05, "loss": 1.6665, "step": 2371 }, { "epoch": 0.24639036044458296, "grad_norm": 0.38738977909088135, "learning_rate": 8.575398736320442e-05, "loss": 1.8557, "step": 2372 }, { "epoch": 0.24649423496416328, "grad_norm": 0.35604584217071533, "learning_rate": 8.574257947651558e-05, "loss": 1.6039, "step": 2373 }, { "epoch": 0.24659810948374364, "grad_norm": 0.3709351420402527, "learning_rate": 8.573116778352084e-05, "loss": 1.85, "step": 2374 }, { "epoch": 0.246701984003324, "grad_norm": 0.37650248408317566, "learning_rate": 8.571975228543543e-05, "loss": 1.8249, "step": 2375 }, { "epoch": 0.24680585852290432, "grad_norm": 0.3389437794685364, "learning_rate": 8.570833298347502e-05, "loss": 1.7355, "step": 2376 }, { "epoch": 0.24690973304248467, "grad_norm": 0.3376055061817169, "learning_rate": 8.56969098788557e-05, "loss": 1.5235, "step": 2377 }, { "epoch": 0.24701360756206503, "grad_norm": 0.3974694013595581, "learning_rate": 8.568548297279392e-05, "loss": 1.8553, "step": 2378 }, { "epoch": 0.24711748208164538, "grad_norm": 0.35596492886543274, "learning_rate": 8.567405226650656e-05, "loss": 1.4544, "step": 2379 }, { "epoch": 0.2472213566012257, "grad_norm": 0.37058472633361816, "learning_rate": 8.56626177612109e-05, "loss": 1.7564, "step": 2380 }, { "epoch": 0.24732523112080607, "grad_norm": 0.3887064456939697, "learning_rate": 8.565117945812463e-05, "loss": 1.8172, "step": 2381 }, { "epoch": 0.24742910564038642, "grad_norm": 0.3546907901763916, "learning_rate": 8.563973735846583e-05, "loss": 1.6763, "step": 2382 }, { "epoch": 0.24753298015996675, "grad_norm": 0.38635915517807007, "learning_rate": 8.562829146345301e-05, "loss": 1.7017, "step": 2383 }, { "epoch": 0.2476368546795471, "grad_norm": 0.3599991500377655, "learning_rate": 8.561684177430507e-05, "loss": 1.6087, "step": 2384 }, { "epoch": 0.24774072919912746, "grad_norm": 0.3766249120235443, "learning_rate": 8.560538829224129e-05, "loss": 1.6795, "step": 2385 }, { "epoch": 0.2478446037187078, "grad_norm": 0.38669300079345703, "learning_rate": 8.559393101848139e-05, "loss": 1.7984, "step": 2386 }, { "epoch": 0.24794847823828814, "grad_norm": 0.35774049162864685, "learning_rate": 8.558246995424548e-05, "loss": 1.5587, "step": 2387 }, { "epoch": 0.2480523527578685, "grad_norm": 0.3456031382083893, "learning_rate": 8.557100510075406e-05, "loss": 1.6269, "step": 2388 }, { "epoch": 0.24815622727744885, "grad_norm": 0.42325082421302795, "learning_rate": 8.555953645922809e-05, "loss": 2.0851, "step": 2389 }, { "epoch": 0.24826010179702918, "grad_norm": 0.32227158546447754, "learning_rate": 8.554806403088884e-05, "loss": 1.5321, "step": 2390 }, { "epoch": 0.24836397631660953, "grad_norm": 0.35737884044647217, "learning_rate": 8.553658781695807e-05, "loss": 1.7002, "step": 2391 }, { "epoch": 0.24846785083618989, "grad_norm": 0.3779980540275574, "learning_rate": 8.552510781865788e-05, "loss": 1.74, "step": 2392 }, { "epoch": 0.24857172535577024, "grad_norm": 0.3488198518753052, "learning_rate": 8.551362403721084e-05, "loss": 1.7332, "step": 2393 }, { "epoch": 0.24867559987535057, "grad_norm": 0.3871583342552185, "learning_rate": 8.550213647383982e-05, "loss": 1.8173, "step": 2394 }, { "epoch": 0.24877947439493092, "grad_norm": 0.3559076488018036, "learning_rate": 8.549064512976822e-05, "loss": 1.824, "step": 2395 }, { "epoch": 0.24888334891451128, "grad_norm": 0.36714842915534973, "learning_rate": 8.547915000621974e-05, "loss": 1.7691, "step": 2396 }, { "epoch": 0.2489872234340916, "grad_norm": 0.35242655873298645, "learning_rate": 8.546765110441855e-05, "loss": 1.7228, "step": 2397 }, { "epoch": 0.24909109795367196, "grad_norm": 0.3446803689002991, "learning_rate": 8.545614842558915e-05, "loss": 1.613, "step": 2398 }, { "epoch": 0.2491949724732523, "grad_norm": 0.36509135365486145, "learning_rate": 8.544464197095651e-05, "loss": 1.6443, "step": 2399 }, { "epoch": 0.24929884699283267, "grad_norm": 0.3790837228298187, "learning_rate": 8.5433131741746e-05, "loss": 1.7364, "step": 2400 }, { "epoch": 0.249402721512413, "grad_norm": 0.4487643837928772, "learning_rate": 8.542161773918334e-05, "loss": 1.8304, "step": 2401 }, { "epoch": 0.24950659603199335, "grad_norm": 0.3896706700325012, "learning_rate": 8.54100999644947e-05, "loss": 1.7448, "step": 2402 }, { "epoch": 0.2496104705515737, "grad_norm": 0.382664293050766, "learning_rate": 8.53985784189066e-05, "loss": 1.7384, "step": 2403 }, { "epoch": 0.24971434507115406, "grad_norm": 0.3803345561027527, "learning_rate": 8.538705310364603e-05, "loss": 1.8425, "step": 2404 }, { "epoch": 0.24981821959073439, "grad_norm": 0.37627753615379333, "learning_rate": 8.537552401994034e-05, "loss": 1.7596, "step": 2405 }, { "epoch": 0.24992209411031474, "grad_norm": 0.3746028542518616, "learning_rate": 8.536399116901728e-05, "loss": 1.8014, "step": 2406 }, { "epoch": 0.25002596862989507, "grad_norm": 0.3502449095249176, "learning_rate": 8.535245455210501e-05, "loss": 1.5849, "step": 2407 }, { "epoch": 0.25012984314947545, "grad_norm": 0.40077903866767883, "learning_rate": 8.534091417043208e-05, "loss": 1.9111, "step": 2408 }, { "epoch": 0.2502337176690558, "grad_norm": 0.3578510284423828, "learning_rate": 8.532937002522747e-05, "loss": 1.6662, "step": 2409 }, { "epoch": 0.2503375921886361, "grad_norm": 0.38965925574302673, "learning_rate": 8.531782211772052e-05, "loss": 1.5775, "step": 2410 }, { "epoch": 0.2504414667082165, "grad_norm": 0.35679998993873596, "learning_rate": 8.530627044914101e-05, "loss": 1.796, "step": 2411 }, { "epoch": 0.2505453412277968, "grad_norm": 0.3703193962574005, "learning_rate": 8.52947150207191e-05, "loss": 1.8143, "step": 2412 }, { "epoch": 0.25064921574737714, "grad_norm": 0.3648841977119446, "learning_rate": 8.528315583368536e-05, "loss": 1.7078, "step": 2413 }, { "epoch": 0.2507530902669575, "grad_norm": 0.38748499751091003, "learning_rate": 8.527159288927074e-05, "loss": 1.876, "step": 2414 }, { "epoch": 0.25085696478653785, "grad_norm": 0.37417733669281006, "learning_rate": 8.526002618870659e-05, "loss": 1.646, "step": 2415 }, { "epoch": 0.25096083930611823, "grad_norm": 0.3735947012901306, "learning_rate": 8.52484557332247e-05, "loss": 1.8018, "step": 2416 }, { "epoch": 0.25106471382569856, "grad_norm": 0.36857619881629944, "learning_rate": 8.523688152405722e-05, "loss": 1.6909, "step": 2417 }, { "epoch": 0.2511685883452789, "grad_norm": 0.41932663321495056, "learning_rate": 8.522530356243671e-05, "loss": 1.6112, "step": 2418 }, { "epoch": 0.25127246286485927, "grad_norm": 0.4023738503456116, "learning_rate": 8.521372184959615e-05, "loss": 1.9221, "step": 2419 }, { "epoch": 0.2513763373844396, "grad_norm": 0.35625362396240234, "learning_rate": 8.520213638676889e-05, "loss": 1.6999, "step": 2420 }, { "epoch": 0.2514802119040199, "grad_norm": 0.36791056394577026, "learning_rate": 8.519054717518868e-05, "loss": 1.8009, "step": 2421 }, { "epoch": 0.2515840864236003, "grad_norm": 0.3679400086402893, "learning_rate": 8.517895421608972e-05, "loss": 1.6935, "step": 2422 }, { "epoch": 0.25168796094318063, "grad_norm": 0.36350300908088684, "learning_rate": 8.516735751070652e-05, "loss": 1.7611, "step": 2423 }, { "epoch": 0.25179183546276096, "grad_norm": 0.36680689454078674, "learning_rate": 8.515575706027406e-05, "loss": 1.7451, "step": 2424 }, { "epoch": 0.25189570998234134, "grad_norm": 0.3586255609989166, "learning_rate": 8.514415286602771e-05, "loss": 1.6652, "step": 2425 }, { "epoch": 0.25199958450192167, "grad_norm": 0.380088210105896, "learning_rate": 8.51325449292032e-05, "loss": 1.7082, "step": 2426 }, { "epoch": 0.25210345902150205, "grad_norm": 0.3451653718948364, "learning_rate": 8.512093325103671e-05, "loss": 1.6654, "step": 2427 }, { "epoch": 0.2522073335410824, "grad_norm": 0.42833617329597473, "learning_rate": 8.510931783276477e-05, "loss": 1.9137, "step": 2428 }, { "epoch": 0.2523112080606627, "grad_norm": 0.42482277750968933, "learning_rate": 8.509769867562436e-05, "loss": 2.0342, "step": 2429 }, { "epoch": 0.2524150825802431, "grad_norm": 0.3890083134174347, "learning_rate": 8.508607578085282e-05, "loss": 1.7393, "step": 2430 }, { "epoch": 0.2525189570998234, "grad_norm": 0.37014102935791016, "learning_rate": 8.507444914968788e-05, "loss": 1.8187, "step": 2431 }, { "epoch": 0.25262283161940374, "grad_norm": 0.370259553194046, "learning_rate": 8.506281878336768e-05, "loss": 1.733, "step": 2432 }, { "epoch": 0.2527267061389841, "grad_norm": 0.37391120195388794, "learning_rate": 8.50511846831308e-05, "loss": 1.691, "step": 2433 }, { "epoch": 0.25283058065856445, "grad_norm": 0.3874087631702423, "learning_rate": 8.503954685021616e-05, "loss": 1.7962, "step": 2434 }, { "epoch": 0.2529344551781448, "grad_norm": 0.3736341893672943, "learning_rate": 8.50279052858631e-05, "loss": 1.8226, "step": 2435 }, { "epoch": 0.25303832969772516, "grad_norm": 0.37461403012275696, "learning_rate": 8.501625999131134e-05, "loss": 1.8345, "step": 2436 }, { "epoch": 0.2531422042173055, "grad_norm": 0.39281967282295227, "learning_rate": 8.500461096780105e-05, "loss": 1.6779, "step": 2437 }, { "epoch": 0.2532460787368858, "grad_norm": 0.4174926280975342, "learning_rate": 8.499295821657273e-05, "loss": 1.8124, "step": 2438 }, { "epoch": 0.2533499532564662, "grad_norm": 0.4330326318740845, "learning_rate": 8.498130173886731e-05, "loss": 1.9252, "step": 2439 }, { "epoch": 0.2534538277760465, "grad_norm": 0.357661634683609, "learning_rate": 8.496964153592613e-05, "loss": 1.7423, "step": 2440 }, { "epoch": 0.2535577022956269, "grad_norm": 0.3977978527545929, "learning_rate": 8.495797760899088e-05, "loss": 1.4438, "step": 2441 }, { "epoch": 0.25366157681520723, "grad_norm": 0.4523005783557892, "learning_rate": 8.494630995930372e-05, "loss": 1.5389, "step": 2442 }, { "epoch": 0.25376545133478756, "grad_norm": 0.3693365156650543, "learning_rate": 8.493463858810713e-05, "loss": 1.8215, "step": 2443 }, { "epoch": 0.25386932585436794, "grad_norm": 0.40159082412719727, "learning_rate": 8.492296349664401e-05, "loss": 1.8385, "step": 2444 }, { "epoch": 0.25397320037394827, "grad_norm": 0.3591073751449585, "learning_rate": 8.491128468615772e-05, "loss": 1.642, "step": 2445 }, { "epoch": 0.2540770748935286, "grad_norm": 0.3740187883377075, "learning_rate": 8.489960215789192e-05, "loss": 1.8311, "step": 2446 }, { "epoch": 0.254180949413109, "grad_norm": 0.38675010204315186, "learning_rate": 8.488791591309072e-05, "loss": 1.7373, "step": 2447 }, { "epoch": 0.2542848239326893, "grad_norm": 0.34369611740112305, "learning_rate": 8.48762259529986e-05, "loss": 1.7054, "step": 2448 }, { "epoch": 0.25438869845226963, "grad_norm": 0.3643989861011505, "learning_rate": 8.486453227886049e-05, "loss": 1.6371, "step": 2449 }, { "epoch": 0.25449257297185, "grad_norm": 0.38395020365715027, "learning_rate": 8.485283489192162e-05, "loss": 1.5869, "step": 2450 }, { "epoch": 0.25459644749143034, "grad_norm": 0.365596741437912, "learning_rate": 8.48411337934277e-05, "loss": 1.5886, "step": 2451 }, { "epoch": 0.25470032201101067, "grad_norm": 0.3677592873573303, "learning_rate": 8.48294289846248e-05, "loss": 1.8044, "step": 2452 }, { "epoch": 0.25480419653059105, "grad_norm": 0.39587604999542236, "learning_rate": 8.48177204667594e-05, "loss": 1.8413, "step": 2453 }, { "epoch": 0.2549080710501714, "grad_norm": 0.3834674656391144, "learning_rate": 8.480600824107837e-05, "loss": 1.6651, "step": 2454 }, { "epoch": 0.25501194556975176, "grad_norm": 0.38077157735824585, "learning_rate": 8.479429230882893e-05, "loss": 1.558, "step": 2455 }, { "epoch": 0.2551158200893321, "grad_norm": 0.4156430959701538, "learning_rate": 8.478257267125878e-05, "loss": 1.7901, "step": 2456 }, { "epoch": 0.2552196946089124, "grad_norm": 0.3814376890659332, "learning_rate": 8.477084932961596e-05, "loss": 1.6559, "step": 2457 }, { "epoch": 0.2553235691284928, "grad_norm": 0.3800637722015381, "learning_rate": 8.475912228514889e-05, "loss": 1.7842, "step": 2458 }, { "epoch": 0.2554274436480731, "grad_norm": 0.36574795842170715, "learning_rate": 8.474739153910646e-05, "loss": 1.6281, "step": 2459 }, { "epoch": 0.25553131816765345, "grad_norm": 0.3729517161846161, "learning_rate": 8.473565709273785e-05, "loss": 1.8031, "step": 2460 }, { "epoch": 0.25563519268723384, "grad_norm": 0.4632837176322937, "learning_rate": 8.47239189472927e-05, "loss": 1.9434, "step": 2461 }, { "epoch": 0.25573906720681416, "grad_norm": 0.3544566333293915, "learning_rate": 8.471217710402106e-05, "loss": 1.8198, "step": 2462 }, { "epoch": 0.2558429417263945, "grad_norm": 0.3790525794029236, "learning_rate": 8.470043156417333e-05, "loss": 1.8017, "step": 2463 }, { "epoch": 0.2559468162459749, "grad_norm": 0.3937493562698364, "learning_rate": 8.46886823290003e-05, "loss": 1.7512, "step": 2464 }, { "epoch": 0.2560506907655552, "grad_norm": 0.4141665995121002, "learning_rate": 8.467692939975316e-05, "loss": 1.9163, "step": 2465 }, { "epoch": 0.2561545652851356, "grad_norm": 0.4007442891597748, "learning_rate": 8.466517277768355e-05, "loss": 1.9303, "step": 2466 }, { "epoch": 0.2562584398047159, "grad_norm": 0.36880823969841003, "learning_rate": 8.465341246404345e-05, "loss": 1.6991, "step": 2467 }, { "epoch": 0.25636231432429624, "grad_norm": 0.3562661409378052, "learning_rate": 8.464164846008522e-05, "loss": 1.7796, "step": 2468 }, { "epoch": 0.2564661888438766, "grad_norm": 0.3477732241153717, "learning_rate": 8.462988076706164e-05, "loss": 1.7226, "step": 2469 }, { "epoch": 0.25657006336345695, "grad_norm": 0.35839709639549255, "learning_rate": 8.461810938622589e-05, "loss": 1.7105, "step": 2470 }, { "epoch": 0.2566739378830373, "grad_norm": 0.39441442489624023, "learning_rate": 8.460633431883151e-05, "loss": 1.6964, "step": 2471 }, { "epoch": 0.25677781240261766, "grad_norm": 0.41418394446372986, "learning_rate": 8.459455556613247e-05, "loss": 1.7889, "step": 2472 }, { "epoch": 0.256881686922198, "grad_norm": 0.3982704281806946, "learning_rate": 8.458277312938312e-05, "loss": 2.066, "step": 2473 }, { "epoch": 0.2569855614417783, "grad_norm": 0.38881179690361023, "learning_rate": 8.457098700983818e-05, "loss": 1.8605, "step": 2474 }, { "epoch": 0.2570894359613587, "grad_norm": 0.39533254504203796, "learning_rate": 8.455919720875279e-05, "loss": 1.8728, "step": 2475 }, { "epoch": 0.257193310480939, "grad_norm": 0.4153515100479126, "learning_rate": 8.454740372738246e-05, "loss": 1.6178, "step": 2476 }, { "epoch": 0.25729718500051935, "grad_norm": 0.35797804594039917, "learning_rate": 8.453560656698311e-05, "loss": 1.5962, "step": 2477 }, { "epoch": 0.25740105952009973, "grad_norm": 0.3884628415107727, "learning_rate": 8.452380572881107e-05, "loss": 1.6725, "step": 2478 }, { "epoch": 0.25750493403968006, "grad_norm": 0.4121970534324646, "learning_rate": 8.451200121412299e-05, "loss": 1.731, "step": 2479 }, { "epoch": 0.25760880855926044, "grad_norm": 0.3405035734176636, "learning_rate": 8.4500193024176e-05, "loss": 1.5419, "step": 2480 }, { "epoch": 0.25771268307884077, "grad_norm": 0.38876980543136597, "learning_rate": 8.448838116022758e-05, "loss": 1.6488, "step": 2481 }, { "epoch": 0.2578165575984211, "grad_norm": 0.38169679045677185, "learning_rate": 8.447656562353557e-05, "loss": 1.8325, "step": 2482 }, { "epoch": 0.2579204321180015, "grad_norm": 0.36781740188598633, "learning_rate": 8.446474641535824e-05, "loss": 1.5927, "step": 2483 }, { "epoch": 0.2580243066375818, "grad_norm": 0.39887794852256775, "learning_rate": 8.445292353695427e-05, "loss": 1.7471, "step": 2484 }, { "epoch": 0.25812818115716213, "grad_norm": 0.381283164024353, "learning_rate": 8.444109698958267e-05, "loss": 1.7668, "step": 2485 }, { "epoch": 0.2582320556767425, "grad_norm": 0.38031065464019775, "learning_rate": 8.44292667745029e-05, "loss": 1.7104, "step": 2486 }, { "epoch": 0.25833593019632284, "grad_norm": 0.36943575739860535, "learning_rate": 8.441743289297476e-05, "loss": 1.8359, "step": 2487 }, { "epoch": 0.25843980471590317, "grad_norm": 0.3790445625782013, "learning_rate": 8.440559534625851e-05, "loss": 1.6393, "step": 2488 }, { "epoch": 0.25854367923548355, "grad_norm": 0.3714222013950348, "learning_rate": 8.439375413561472e-05, "loss": 1.6612, "step": 2489 }, { "epoch": 0.2586475537550639, "grad_norm": 0.38887256383895874, "learning_rate": 8.43819092623044e-05, "loss": 1.7044, "step": 2490 }, { "epoch": 0.25875142827464426, "grad_norm": 0.3530022203922272, "learning_rate": 8.437006072758891e-05, "loss": 1.608, "step": 2491 }, { "epoch": 0.2588553027942246, "grad_norm": 0.38229474425315857, "learning_rate": 8.435820853273007e-05, "loss": 1.7361, "step": 2492 }, { "epoch": 0.2589591773138049, "grad_norm": 0.35574570298194885, "learning_rate": 8.434635267899002e-05, "loss": 1.799, "step": 2493 }, { "epoch": 0.2590630518333853, "grad_norm": 0.40232568979263306, "learning_rate": 8.433449316763133e-05, "loss": 1.8073, "step": 2494 }, { "epoch": 0.2591669263529656, "grad_norm": 0.42104196548461914, "learning_rate": 8.432262999991694e-05, "loss": 2.0099, "step": 2495 }, { "epoch": 0.25927080087254595, "grad_norm": 0.37814000248908997, "learning_rate": 8.431076317711017e-05, "loss": 1.6813, "step": 2496 }, { "epoch": 0.25937467539212633, "grad_norm": 0.38011592626571655, "learning_rate": 8.429889270047475e-05, "loss": 1.6921, "step": 2497 }, { "epoch": 0.25947854991170666, "grad_norm": 0.3853015601634979, "learning_rate": 8.428701857127481e-05, "loss": 1.8239, "step": 2498 }, { "epoch": 0.259582424431287, "grad_norm": 0.351633757352829, "learning_rate": 8.427514079077485e-05, "loss": 1.6058, "step": 2499 }, { "epoch": 0.25968629895086737, "grad_norm": 0.4057175815105438, "learning_rate": 8.426325936023974e-05, "loss": 1.8754, "step": 2500 }, { "epoch": 0.2597901734704477, "grad_norm": 0.3578610420227051, "learning_rate": 8.425137428093477e-05, "loss": 1.7336, "step": 2501 }, { "epoch": 0.259894047990028, "grad_norm": 0.3505629301071167, "learning_rate": 8.423948555412562e-05, "loss": 1.7356, "step": 2502 }, { "epoch": 0.2599979225096084, "grad_norm": 0.3745979368686676, "learning_rate": 8.422759318107832e-05, "loss": 1.8169, "step": 2503 }, { "epoch": 0.26010179702918873, "grad_norm": 0.37145760655403137, "learning_rate": 8.421569716305934e-05, "loss": 1.904, "step": 2504 }, { "epoch": 0.2602056715487691, "grad_norm": 0.3965916931629181, "learning_rate": 8.42037975013355e-05, "loss": 1.809, "step": 2505 }, { "epoch": 0.26030954606834944, "grad_norm": 0.39221835136413574, "learning_rate": 8.419189419717404e-05, "loss": 1.7811, "step": 2506 }, { "epoch": 0.26041342058792977, "grad_norm": 0.3546196222305298, "learning_rate": 8.417998725184254e-05, "loss": 1.7371, "step": 2507 }, { "epoch": 0.26051729510751015, "grad_norm": 0.3630322217941284, "learning_rate": 8.416807666660901e-05, "loss": 1.719, "step": 2508 }, { "epoch": 0.2606211696270905, "grad_norm": 0.34440553188323975, "learning_rate": 8.415616244274185e-05, "loss": 1.6887, "step": 2509 }, { "epoch": 0.2607250441466708, "grad_norm": 0.39099493622779846, "learning_rate": 8.414424458150983e-05, "loss": 1.7395, "step": 2510 }, { "epoch": 0.2608289186662512, "grad_norm": 0.35405251383781433, "learning_rate": 8.413232308418206e-05, "loss": 1.6363, "step": 2511 }, { "epoch": 0.2609327931858315, "grad_norm": 0.41946062445640564, "learning_rate": 8.412039795202816e-05, "loss": 1.91, "step": 2512 }, { "epoch": 0.26103666770541184, "grad_norm": 0.4249524474143982, "learning_rate": 8.4108469186318e-05, "loss": 1.7341, "step": 2513 }, { "epoch": 0.2611405422249922, "grad_norm": 0.3654036223888397, "learning_rate": 8.409653678832194e-05, "loss": 1.6744, "step": 2514 }, { "epoch": 0.26124441674457255, "grad_norm": 0.35637834668159485, "learning_rate": 8.408460075931068e-05, "loss": 1.6597, "step": 2515 }, { "epoch": 0.2613482912641529, "grad_norm": 0.39124104380607605, "learning_rate": 8.407266110055531e-05, "loss": 1.6995, "step": 2516 }, { "epoch": 0.26145216578373326, "grad_norm": 0.3535708487033844, "learning_rate": 8.40607178133273e-05, "loss": 1.657, "step": 2517 }, { "epoch": 0.2615560403033136, "grad_norm": 0.4048800766468048, "learning_rate": 8.404877089889853e-05, "loss": 1.7071, "step": 2518 }, { "epoch": 0.26165991482289397, "grad_norm": 0.3928733170032501, "learning_rate": 8.403682035854125e-05, "loss": 1.5713, "step": 2519 }, { "epoch": 0.2617637893424743, "grad_norm": 0.3696751892566681, "learning_rate": 8.40248661935281e-05, "loss": 1.6968, "step": 2520 }, { "epoch": 0.2618676638620546, "grad_norm": 0.35033291578292847, "learning_rate": 8.40129084051321e-05, "loss": 1.5176, "step": 2521 }, { "epoch": 0.261971538381635, "grad_norm": 0.38887977600097656, "learning_rate": 8.400094699462667e-05, "loss": 1.84, "step": 2522 }, { "epoch": 0.26207541290121533, "grad_norm": 0.382174015045166, "learning_rate": 8.398898196328561e-05, "loss": 1.818, "step": 2523 }, { "epoch": 0.26217928742079566, "grad_norm": 0.3753756880760193, "learning_rate": 8.39770133123831e-05, "loss": 1.764, "step": 2524 }, { "epoch": 0.26228316194037604, "grad_norm": 0.37997132539749146, "learning_rate": 8.396504104319366e-05, "loss": 1.8256, "step": 2525 }, { "epoch": 0.26238703645995637, "grad_norm": 0.36985552310943604, "learning_rate": 8.395306515699234e-05, "loss": 1.5408, "step": 2526 }, { "epoch": 0.2624909109795367, "grad_norm": 0.36994847655296326, "learning_rate": 8.394108565505441e-05, "loss": 1.6385, "step": 2527 }, { "epoch": 0.2625947854991171, "grad_norm": 0.3789272904396057, "learning_rate": 8.392910253865557e-05, "loss": 1.6302, "step": 2528 }, { "epoch": 0.2626986600186974, "grad_norm": 0.39001405239105225, "learning_rate": 8.391711580907202e-05, "loss": 1.8292, "step": 2529 }, { "epoch": 0.2628025345382778, "grad_norm": 0.3820188343524933, "learning_rate": 8.390512546758016e-05, "loss": 1.8284, "step": 2530 }, { "epoch": 0.2629064090578581, "grad_norm": 0.43149465322494507, "learning_rate": 8.389313151545694e-05, "loss": 1.8026, "step": 2531 }, { "epoch": 0.26301028357743844, "grad_norm": 0.4724147319793701, "learning_rate": 8.388113395397957e-05, "loss": 1.7194, "step": 2532 }, { "epoch": 0.2631141580970188, "grad_norm": 0.3973971903324127, "learning_rate": 8.386913278442571e-05, "loss": 1.703, "step": 2533 }, { "epoch": 0.26321803261659915, "grad_norm": 0.38330528140068054, "learning_rate": 8.385712800807343e-05, "loss": 1.8659, "step": 2534 }, { "epoch": 0.2633219071361795, "grad_norm": 0.37077972292900085, "learning_rate": 8.38451196262011e-05, "loss": 1.8752, "step": 2535 }, { "epoch": 0.26342578165575986, "grad_norm": 0.36941617727279663, "learning_rate": 8.383310764008751e-05, "loss": 1.765, "step": 2536 }, { "epoch": 0.2635296561753402, "grad_norm": 0.3706973195075989, "learning_rate": 8.382109205101188e-05, "loss": 1.6668, "step": 2537 }, { "epoch": 0.2636335306949205, "grad_norm": 0.37633809447288513, "learning_rate": 8.380907286025377e-05, "loss": 1.6682, "step": 2538 }, { "epoch": 0.2637374052145009, "grad_norm": 0.39212766289711, "learning_rate": 8.37970500690931e-05, "loss": 1.6501, "step": 2539 }, { "epoch": 0.2638412797340812, "grad_norm": 0.3803769052028656, "learning_rate": 8.378502367881025e-05, "loss": 1.6854, "step": 2540 }, { "epoch": 0.26394515425366155, "grad_norm": 0.40050208568573, "learning_rate": 8.37729936906859e-05, "loss": 1.8676, "step": 2541 }, { "epoch": 0.26404902877324193, "grad_norm": 0.39779967069625854, "learning_rate": 8.376096010600116e-05, "loss": 1.7777, "step": 2542 }, { "epoch": 0.26415290329282226, "grad_norm": 0.4135241210460663, "learning_rate": 8.374892292603751e-05, "loss": 1.7512, "step": 2543 }, { "epoch": 0.26425677781240264, "grad_norm": 0.36392831802368164, "learning_rate": 8.373688215207682e-05, "loss": 1.7389, "step": 2544 }, { "epoch": 0.26436065233198297, "grad_norm": 0.40198683738708496, "learning_rate": 8.372483778540134e-05, "loss": 1.9784, "step": 2545 }, { "epoch": 0.2644645268515633, "grad_norm": 0.35998931527137756, "learning_rate": 8.371278982729371e-05, "loss": 1.8175, "step": 2546 }, { "epoch": 0.2645684013711437, "grad_norm": 0.4466514587402344, "learning_rate": 8.370073827903693e-05, "loss": 1.8014, "step": 2547 }, { "epoch": 0.264672275890724, "grad_norm": 0.39324405789375305, "learning_rate": 8.368868314191439e-05, "loss": 1.8389, "step": 2548 }, { "epoch": 0.26477615041030433, "grad_norm": 0.3979739248752594, "learning_rate": 8.367662441720989e-05, "loss": 1.7975, "step": 2549 }, { "epoch": 0.2648800249298847, "grad_norm": 0.3831605315208435, "learning_rate": 8.366456210620757e-05, "loss": 1.7531, "step": 2550 }, { "epoch": 0.26498389944946504, "grad_norm": 0.3608654737472534, "learning_rate": 8.365249621019197e-05, "loss": 1.6416, "step": 2551 }, { "epoch": 0.26508777396904537, "grad_norm": 0.37154313921928406, "learning_rate": 8.364042673044803e-05, "loss": 1.775, "step": 2552 }, { "epoch": 0.26519164848862575, "grad_norm": 0.36865541338920593, "learning_rate": 8.362835366826105e-05, "loss": 1.6302, "step": 2553 }, { "epoch": 0.2652955230082061, "grad_norm": 0.40405401587486267, "learning_rate": 8.361627702491673e-05, "loss": 1.8325, "step": 2554 }, { "epoch": 0.2653993975277864, "grad_norm": 0.3631226122379303, "learning_rate": 8.360419680170111e-05, "loss": 1.6433, "step": 2555 }, { "epoch": 0.2655032720473668, "grad_norm": 0.3895721435546875, "learning_rate": 8.359211299990064e-05, "loss": 1.5261, "step": 2556 }, { "epoch": 0.2656071465669471, "grad_norm": 0.34275680780410767, "learning_rate": 8.358002562080219e-05, "loss": 1.7036, "step": 2557 }, { "epoch": 0.2657110210865275, "grad_norm": 0.4039117991924286, "learning_rate": 8.356793466569293e-05, "loss": 1.801, "step": 2558 }, { "epoch": 0.2658148956061078, "grad_norm": 0.444367915391922, "learning_rate": 8.355584013586047e-05, "loss": 2.0989, "step": 2559 }, { "epoch": 0.26591877012568815, "grad_norm": 0.3847164809703827, "learning_rate": 8.354374203259278e-05, "loss": 1.8125, "step": 2560 }, { "epoch": 0.26602264464526854, "grad_norm": 0.36114174127578735, "learning_rate": 8.353164035717822e-05, "loss": 1.6731, "step": 2561 }, { "epoch": 0.26612651916484886, "grad_norm": 0.37462207674980164, "learning_rate": 8.351953511090551e-05, "loss": 1.7634, "step": 2562 }, { "epoch": 0.2662303936844292, "grad_norm": 0.38300392031669617, "learning_rate": 8.350742629506378e-05, "loss": 1.8874, "step": 2563 }, { "epoch": 0.26633426820400957, "grad_norm": 0.37371304631233215, "learning_rate": 8.349531391094251e-05, "loss": 1.7228, "step": 2564 }, { "epoch": 0.2664381427235899, "grad_norm": 0.33728665113449097, "learning_rate": 8.34831979598316e-05, "loss": 1.6236, "step": 2565 }, { "epoch": 0.2665420172431702, "grad_norm": 0.38045376539230347, "learning_rate": 8.347107844302129e-05, "loss": 1.8109, "step": 2566 }, { "epoch": 0.2666458917627506, "grad_norm": 0.36660969257354736, "learning_rate": 8.345895536180219e-05, "loss": 1.7463, "step": 2567 }, { "epoch": 0.26674976628233094, "grad_norm": 0.3986735939979553, "learning_rate": 8.344682871746534e-05, "loss": 1.8362, "step": 2568 }, { "epoch": 0.2668536408019113, "grad_norm": 0.41932711005210876, "learning_rate": 8.343469851130212e-05, "loss": 1.845, "step": 2569 }, { "epoch": 0.26695751532149165, "grad_norm": 0.40920954942703247, "learning_rate": 8.342256474460431e-05, "loss": 1.6763, "step": 2570 }, { "epoch": 0.26706138984107197, "grad_norm": 0.3709944784641266, "learning_rate": 8.341042741866408e-05, "loss": 1.8428, "step": 2571 }, { "epoch": 0.26716526436065235, "grad_norm": 0.3826795220375061, "learning_rate": 8.339828653477391e-05, "loss": 1.5346, "step": 2572 }, { "epoch": 0.2672691388802327, "grad_norm": 0.37877601385116577, "learning_rate": 8.338614209422677e-05, "loss": 1.7763, "step": 2573 }, { "epoch": 0.267373013399813, "grad_norm": 0.4136562645435333, "learning_rate": 8.33739940983159e-05, "loss": 1.9153, "step": 2574 }, { "epoch": 0.2674768879193934, "grad_norm": 0.3574436604976654, "learning_rate": 8.336184254833499e-05, "loss": 1.4252, "step": 2575 }, { "epoch": 0.2675807624389737, "grad_norm": 0.37612104415893555, "learning_rate": 8.33496874455781e-05, "loss": 1.8131, "step": 2576 }, { "epoch": 0.26768463695855405, "grad_norm": 0.4038298726081848, "learning_rate": 8.33375287913396e-05, "loss": 1.769, "step": 2577 }, { "epoch": 0.26778851147813443, "grad_norm": 0.3684050440788269, "learning_rate": 8.332536658691435e-05, "loss": 1.806, "step": 2578 }, { "epoch": 0.26789238599771475, "grad_norm": 0.39676815271377563, "learning_rate": 8.331320083359751e-05, "loss": 1.8308, "step": 2579 }, { "epoch": 0.2679962605172951, "grad_norm": 0.37601250410079956, "learning_rate": 8.330103153268462e-05, "loss": 1.6708, "step": 2580 }, { "epoch": 0.26810013503687546, "grad_norm": 0.35616087913513184, "learning_rate": 8.328885868547164e-05, "loss": 1.6567, "step": 2581 }, { "epoch": 0.2682040095564558, "grad_norm": 0.4200495183467865, "learning_rate": 8.327668229325487e-05, "loss": 1.9518, "step": 2582 }, { "epoch": 0.2683078840760362, "grad_norm": 0.4047677516937256, "learning_rate": 8.326450235733098e-05, "loss": 1.8429, "step": 2583 }, { "epoch": 0.2684117585956165, "grad_norm": 0.35730311274528503, "learning_rate": 8.32523188789971e-05, "loss": 1.5773, "step": 2584 }, { "epoch": 0.26851563311519683, "grad_norm": 0.38395121693611145, "learning_rate": 8.324013185955062e-05, "loss": 1.685, "step": 2585 }, { "epoch": 0.2686195076347772, "grad_norm": 0.3953067362308502, "learning_rate": 8.322794130028938e-05, "loss": 1.8974, "step": 2586 }, { "epoch": 0.26872338215435754, "grad_norm": 0.3572498559951782, "learning_rate": 8.321574720251158e-05, "loss": 1.6507, "step": 2587 }, { "epoch": 0.26882725667393786, "grad_norm": 0.38150545954704285, "learning_rate": 8.32035495675158e-05, "loss": 1.7104, "step": 2588 }, { "epoch": 0.26893113119351825, "grad_norm": 0.36428940296173096, "learning_rate": 8.3191348396601e-05, "loss": 1.6052, "step": 2589 }, { "epoch": 0.2690350057130986, "grad_norm": 0.4125150144100189, "learning_rate": 8.317914369106648e-05, "loss": 1.9357, "step": 2590 }, { "epoch": 0.2691388802326789, "grad_norm": 0.3636937439441681, "learning_rate": 8.316693545221196e-05, "loss": 1.7948, "step": 2591 }, { "epoch": 0.2692427547522593, "grad_norm": 0.40792933106422424, "learning_rate": 8.315472368133754e-05, "loss": 1.6696, "step": 2592 }, { "epoch": 0.2693466292718396, "grad_norm": 0.40981337428092957, "learning_rate": 8.314250837974364e-05, "loss": 1.8025, "step": 2593 }, { "epoch": 0.26945050379141994, "grad_norm": 0.3620801270008087, "learning_rate": 8.313028954873114e-05, "loss": 1.725, "step": 2594 }, { "epoch": 0.2695543783110003, "grad_norm": 0.35369589924812317, "learning_rate": 8.311806718960124e-05, "loss": 1.5303, "step": 2595 }, { "epoch": 0.26965825283058065, "grad_norm": 0.38566333055496216, "learning_rate": 8.310584130365551e-05, "loss": 1.9351, "step": 2596 }, { "epoch": 0.26976212735016103, "grad_norm": 0.3703290820121765, "learning_rate": 8.309361189219589e-05, "loss": 1.6705, "step": 2597 }, { "epoch": 0.26986600186974136, "grad_norm": 0.3818873167037964, "learning_rate": 8.308137895652477e-05, "loss": 1.7879, "step": 2598 }, { "epoch": 0.2699698763893217, "grad_norm": 0.3748812675476074, "learning_rate": 8.306914249794483e-05, "loss": 1.7799, "step": 2599 }, { "epoch": 0.27007375090890207, "grad_norm": 0.3769199252128601, "learning_rate": 8.305690251775915e-05, "loss": 1.6268, "step": 2600 }, { "epoch": 0.2701776254284824, "grad_norm": 0.3615618944168091, "learning_rate": 8.304465901727123e-05, "loss": 1.7004, "step": 2601 }, { "epoch": 0.2702814999480627, "grad_norm": 0.36809107661247253, "learning_rate": 8.303241199778486e-05, "loss": 1.7739, "step": 2602 }, { "epoch": 0.2703853744676431, "grad_norm": 0.37990957498550415, "learning_rate": 8.30201614606043e-05, "loss": 1.5496, "step": 2603 }, { "epoch": 0.27048924898722343, "grad_norm": 0.3890332579612732, "learning_rate": 8.300790740703409e-05, "loss": 1.7503, "step": 2604 }, { "epoch": 0.27059312350680376, "grad_norm": 0.3829896152019501, "learning_rate": 8.299564983837922e-05, "loss": 1.7856, "step": 2605 }, { "epoch": 0.27069699802638414, "grad_norm": 0.36139553785324097, "learning_rate": 8.298338875594501e-05, "loss": 1.7488, "step": 2606 }, { "epoch": 0.27080087254596447, "grad_norm": 0.3622409403324127, "learning_rate": 8.29711241610372e-05, "loss": 1.6934, "step": 2607 }, { "epoch": 0.27090474706554485, "grad_norm": 0.37896421551704407, "learning_rate": 8.295885605496184e-05, "loss": 1.785, "step": 2608 }, { "epoch": 0.2710086215851252, "grad_norm": 0.36020195484161377, "learning_rate": 8.294658443902539e-05, "loss": 1.7482, "step": 2609 }, { "epoch": 0.2711124961047055, "grad_norm": 0.3811848759651184, "learning_rate": 8.29343093145347e-05, "loss": 1.7077, "step": 2610 }, { "epoch": 0.2712163706242859, "grad_norm": 0.3960614502429962, "learning_rate": 8.292203068279695e-05, "loss": 1.8574, "step": 2611 }, { "epoch": 0.2713202451438662, "grad_norm": 0.4237765371799469, "learning_rate": 8.290974854511974e-05, "loss": 1.7988, "step": 2612 }, { "epoch": 0.27142411966344654, "grad_norm": 0.3800017237663269, "learning_rate": 8.289746290281104e-05, "loss": 1.6488, "step": 2613 }, { "epoch": 0.2715279941830269, "grad_norm": 0.38529491424560547, "learning_rate": 8.288517375717913e-05, "loss": 1.8813, "step": 2614 }, { "epoch": 0.27163186870260725, "grad_norm": 0.3625027537345886, "learning_rate": 8.287288110953274e-05, "loss": 1.4855, "step": 2615 }, { "epoch": 0.2717357432221876, "grad_norm": 0.4243226647377014, "learning_rate": 8.286058496118091e-05, "loss": 1.921, "step": 2616 }, { "epoch": 0.27183961774176796, "grad_norm": 0.36872926354408264, "learning_rate": 8.284828531343312e-05, "loss": 1.6206, "step": 2617 }, { "epoch": 0.2719434922613483, "grad_norm": 0.40654125809669495, "learning_rate": 8.283598216759915e-05, "loss": 1.8668, "step": 2618 }, { "epoch": 0.2720473667809286, "grad_norm": 0.3534424602985382, "learning_rate": 8.282367552498925e-05, "loss": 1.6809, "step": 2619 }, { "epoch": 0.272151241300509, "grad_norm": 0.41357290744781494, "learning_rate": 8.28113653869139e-05, "loss": 1.922, "step": 2620 }, { "epoch": 0.2722551158200893, "grad_norm": 0.41436415910720825, "learning_rate": 8.27990517546841e-05, "loss": 1.8944, "step": 2621 }, { "epoch": 0.2723589903396697, "grad_norm": 0.3990755081176758, "learning_rate": 8.278673462961112e-05, "loss": 1.8936, "step": 2622 }, { "epoch": 0.27246286485925003, "grad_norm": 0.3946811258792877, "learning_rate": 8.277441401300665e-05, "loss": 1.9192, "step": 2623 }, { "epoch": 0.27256673937883036, "grad_norm": 0.37876197695732117, "learning_rate": 8.276208990618274e-05, "loss": 1.6761, "step": 2624 }, { "epoch": 0.27267061389841074, "grad_norm": 0.4105803072452545, "learning_rate": 8.27497623104518e-05, "loss": 1.7843, "step": 2625 }, { "epoch": 0.27277448841799107, "grad_norm": 0.37790194153785706, "learning_rate": 8.273743122712664e-05, "loss": 1.5891, "step": 2626 }, { "epoch": 0.2728783629375714, "grad_norm": 0.43674150109291077, "learning_rate": 8.272509665752041e-05, "loss": 1.793, "step": 2627 }, { "epoch": 0.2729822374571518, "grad_norm": 0.45887747406959534, "learning_rate": 8.271275860294667e-05, "loss": 1.9712, "step": 2628 }, { "epoch": 0.2730861119767321, "grad_norm": 0.3662078082561493, "learning_rate": 8.27004170647193e-05, "loss": 1.6459, "step": 2629 }, { "epoch": 0.27318998649631243, "grad_norm": 0.364083468914032, "learning_rate": 8.268807204415258e-05, "loss": 1.6445, "step": 2630 }, { "epoch": 0.2732938610158928, "grad_norm": 0.3766544759273529, "learning_rate": 8.267572354256117e-05, "loss": 1.6357, "step": 2631 }, { "epoch": 0.27339773553547314, "grad_norm": 0.38634899258613586, "learning_rate": 8.266337156126008e-05, "loss": 1.7254, "step": 2632 }, { "epoch": 0.27350161005505347, "grad_norm": 0.391665518283844, "learning_rate": 8.26510161015647e-05, "loss": 1.7073, "step": 2633 }, { "epoch": 0.27360548457463385, "grad_norm": 0.5298207998275757, "learning_rate": 8.263865716479078e-05, "loss": 1.8689, "step": 2634 }, { "epoch": 0.2737093590942142, "grad_norm": 0.4072588086128235, "learning_rate": 8.262629475225448e-05, "loss": 1.8721, "step": 2635 }, { "epoch": 0.27381323361379456, "grad_norm": 0.3996543288230896, "learning_rate": 8.261392886527229e-05, "loss": 1.7335, "step": 2636 }, { "epoch": 0.2739171081333749, "grad_norm": 0.414880633354187, "learning_rate": 8.260155950516106e-05, "loss": 1.8539, "step": 2637 }, { "epoch": 0.2740209826529552, "grad_norm": 0.36667972803115845, "learning_rate": 8.258918667323806e-05, "loss": 1.6214, "step": 2638 }, { "epoch": 0.2741248571725356, "grad_norm": 0.39150235056877136, "learning_rate": 8.257681037082085e-05, "loss": 1.7361, "step": 2639 }, { "epoch": 0.2742287316921159, "grad_norm": 0.38801634311676025, "learning_rate": 8.256443059922749e-05, "loss": 1.8081, "step": 2640 }, { "epoch": 0.27433260621169625, "grad_norm": 0.42313215136528015, "learning_rate": 8.255204735977626e-05, "loss": 1.7, "step": 2641 }, { "epoch": 0.27443648073127663, "grad_norm": 0.36863112449645996, "learning_rate": 8.25396606537859e-05, "loss": 1.5137, "step": 2642 }, { "epoch": 0.27454035525085696, "grad_norm": 0.39456894993782043, "learning_rate": 8.252727048257551e-05, "loss": 1.7127, "step": 2643 }, { "epoch": 0.2746442297704373, "grad_norm": 0.3853185474872589, "learning_rate": 8.251487684746453e-05, "loss": 1.8356, "step": 2644 }, { "epoch": 0.27474810429001767, "grad_norm": 0.4087388813495636, "learning_rate": 8.250247974977279e-05, "loss": 1.6803, "step": 2645 }, { "epoch": 0.274851978809598, "grad_norm": 0.3977982997894287, "learning_rate": 8.249007919082048e-05, "loss": 1.7868, "step": 2646 }, { "epoch": 0.2749558533291784, "grad_norm": 0.3721441626548767, "learning_rate": 8.247767517192818e-05, "loss": 1.8521, "step": 2647 }, { "epoch": 0.2750597278487587, "grad_norm": 0.3743959963321686, "learning_rate": 8.246526769441681e-05, "loss": 1.7785, "step": 2648 }, { "epoch": 0.27516360236833903, "grad_norm": 0.39412543177604675, "learning_rate": 8.245285675960767e-05, "loss": 1.8763, "step": 2649 }, { "epoch": 0.2752674768879194, "grad_norm": 0.3404049575328827, "learning_rate": 8.244044236882242e-05, "loss": 1.4576, "step": 2650 }, { "epoch": 0.27537135140749974, "grad_norm": 0.3886807858943939, "learning_rate": 8.24280245233831e-05, "loss": 1.7468, "step": 2651 }, { "epoch": 0.27547522592708007, "grad_norm": 0.3996857702732086, "learning_rate": 8.241560322461212e-05, "loss": 1.92, "step": 2652 }, { "epoch": 0.27557910044666045, "grad_norm": 0.3869626224040985, "learning_rate": 8.240317847383226e-05, "loss": 1.6042, "step": 2653 }, { "epoch": 0.2756829749662408, "grad_norm": 0.4047796130180359, "learning_rate": 8.239075027236663e-05, "loss": 1.8099, "step": 2654 }, { "epoch": 0.2757868494858211, "grad_norm": 0.44502919912338257, "learning_rate": 8.237831862153877e-05, "loss": 1.7363, "step": 2655 }, { "epoch": 0.2758907240054015, "grad_norm": 0.388954222202301, "learning_rate": 8.236588352267252e-05, "loss": 1.8691, "step": 2656 }, { "epoch": 0.2759945985249818, "grad_norm": 0.39591512084007263, "learning_rate": 8.235344497709214e-05, "loss": 1.8538, "step": 2657 }, { "epoch": 0.27609847304456214, "grad_norm": 0.38203978538513184, "learning_rate": 8.234100298612226e-05, "loss": 1.28, "step": 2658 }, { "epoch": 0.2762023475641425, "grad_norm": 0.43226009607315063, "learning_rate": 8.23285575510878e-05, "loss": 1.9326, "step": 2659 }, { "epoch": 0.27630622208372285, "grad_norm": 0.37048643827438354, "learning_rate": 8.231610867331415e-05, "loss": 1.7221, "step": 2660 }, { "epoch": 0.27641009660330323, "grad_norm": 0.41580793261528015, "learning_rate": 8.2303656354127e-05, "loss": 1.9031, "step": 2661 }, { "epoch": 0.27651397112288356, "grad_norm": 0.3827223777770996, "learning_rate": 8.229120059485243e-05, "loss": 1.6825, "step": 2662 }, { "epoch": 0.2766178456424639, "grad_norm": 0.37424013018608093, "learning_rate": 8.227874139681688e-05, "loss": 1.7172, "step": 2663 }, { "epoch": 0.27672172016204427, "grad_norm": 0.43709954619407654, "learning_rate": 8.226627876134714e-05, "loss": 1.8136, "step": 2664 }, { "epoch": 0.2768255946816246, "grad_norm": 0.35949236154556274, "learning_rate": 8.22538126897704e-05, "loss": 1.7346, "step": 2665 }, { "epoch": 0.2769294692012049, "grad_norm": 0.39692017436027527, "learning_rate": 8.224134318341421e-05, "loss": 1.925, "step": 2666 }, { "epoch": 0.2770333437207853, "grad_norm": 0.3917725384235382, "learning_rate": 8.222887024360646e-05, "loss": 1.7681, "step": 2667 }, { "epoch": 0.27713721824036563, "grad_norm": 0.4082455635070801, "learning_rate": 8.221639387167543e-05, "loss": 1.8118, "step": 2668 }, { "epoch": 0.27724109275994596, "grad_norm": 0.36559662222862244, "learning_rate": 8.220391406894973e-05, "loss": 1.8121, "step": 2669 }, { "epoch": 0.27734496727952634, "grad_norm": 0.372790664434433, "learning_rate": 8.21914308367584e-05, "loss": 1.7102, "step": 2670 }, { "epoch": 0.27744884179910667, "grad_norm": 0.37069472670555115, "learning_rate": 8.217894417643078e-05, "loss": 1.6396, "step": 2671 }, { "epoch": 0.277552716318687, "grad_norm": 0.38368502259254456, "learning_rate": 8.216645408929661e-05, "loss": 1.8278, "step": 2672 }, { "epoch": 0.2776565908382674, "grad_norm": 0.4187244474887848, "learning_rate": 8.215396057668598e-05, "loss": 1.7567, "step": 2673 }, { "epoch": 0.2777604653578477, "grad_norm": 0.37204065918922424, "learning_rate": 8.214146363992939e-05, "loss": 1.7282, "step": 2674 }, { "epoch": 0.2778643398774281, "grad_norm": 0.37890782952308655, "learning_rate": 8.21289632803576e-05, "loss": 1.6675, "step": 2675 }, { "epoch": 0.2779682143970084, "grad_norm": 0.37642747163772583, "learning_rate": 8.211645949930187e-05, "loss": 1.6311, "step": 2676 }, { "epoch": 0.27807208891658874, "grad_norm": 0.3810623288154602, "learning_rate": 8.210395229809369e-05, "loss": 1.7166, "step": 2677 }, { "epoch": 0.2781759634361691, "grad_norm": 0.3708658516407013, "learning_rate": 8.209144167806502e-05, "loss": 1.7207, "step": 2678 }, { "epoch": 0.27827983795574945, "grad_norm": 0.3930984139442444, "learning_rate": 8.207892764054814e-05, "loss": 1.7495, "step": 2679 }, { "epoch": 0.2783837124753298, "grad_norm": 0.3427664339542389, "learning_rate": 8.20664101868757e-05, "loss": 1.6362, "step": 2680 }, { "epoch": 0.27848758699491016, "grad_norm": 0.3461865484714508, "learning_rate": 8.205388931838068e-05, "loss": 1.594, "step": 2681 }, { "epoch": 0.2785914615144905, "grad_norm": 0.43719297647476196, "learning_rate": 8.20413650363965e-05, "loss": 1.9431, "step": 2682 }, { "epoch": 0.2786953360340708, "grad_norm": 0.3867358863353729, "learning_rate": 8.202883734225686e-05, "loss": 1.7749, "step": 2683 }, { "epoch": 0.2787992105536512, "grad_norm": 0.4017643928527832, "learning_rate": 8.201630623729588e-05, "loss": 1.8466, "step": 2684 }, { "epoch": 0.2789030850732315, "grad_norm": 0.4061945974826813, "learning_rate": 8.200377172284803e-05, "loss": 2.0036, "step": 2685 }, { "epoch": 0.2790069595928119, "grad_norm": 0.36883074045181274, "learning_rate": 8.199123380024812e-05, "loss": 1.6884, "step": 2686 }, { "epoch": 0.27911083411239224, "grad_norm": 0.37533366680145264, "learning_rate": 8.197869247083136e-05, "loss": 1.6512, "step": 2687 }, { "epoch": 0.27921470863197256, "grad_norm": 0.3572383522987366, "learning_rate": 8.196614773593329e-05, "loss": 1.6059, "step": 2688 }, { "epoch": 0.27931858315155295, "grad_norm": 0.37231144309043884, "learning_rate": 8.195359959688985e-05, "loss": 1.6252, "step": 2689 }, { "epoch": 0.2794224576711333, "grad_norm": 0.3484655022621155, "learning_rate": 8.194104805503728e-05, "loss": 1.7011, "step": 2690 }, { "epoch": 0.2795263321907136, "grad_norm": 0.3684258460998535, "learning_rate": 8.192849311171225e-05, "loss": 1.6864, "step": 2691 }, { "epoch": 0.279630206710294, "grad_norm": 0.34496328234672546, "learning_rate": 8.191593476825174e-05, "loss": 1.5545, "step": 2692 }, { "epoch": 0.2797340812298743, "grad_norm": 0.3760117292404175, "learning_rate": 8.190337302599314e-05, "loss": 1.7473, "step": 2693 }, { "epoch": 0.27983795574945464, "grad_norm": 0.3527992367744446, "learning_rate": 8.189080788627419e-05, "loss": 1.6099, "step": 2694 }, { "epoch": 0.279941830269035, "grad_norm": 0.37164172530174255, "learning_rate": 8.187823935043292e-05, "loss": 1.6274, "step": 2695 }, { "epoch": 0.28004570478861535, "grad_norm": 0.37998443841934204, "learning_rate": 8.186566741980786e-05, "loss": 1.8226, "step": 2696 }, { "epoch": 0.2801495793081957, "grad_norm": 0.3952192962169647, "learning_rate": 8.185309209573775e-05, "loss": 1.6987, "step": 2697 }, { "epoch": 0.28025345382777606, "grad_norm": 0.3859109878540039, "learning_rate": 8.18405133795618e-05, "loss": 1.7248, "step": 2698 }, { "epoch": 0.2803573283473564, "grad_norm": 0.3739028573036194, "learning_rate": 8.182793127261955e-05, "loss": 1.8529, "step": 2699 }, { "epoch": 0.28046120286693677, "grad_norm": 0.44212570786476135, "learning_rate": 8.181534577625088e-05, "loss": 1.8915, "step": 2700 }, { "epoch": 0.2805650773865171, "grad_norm": 0.3574367165565491, "learning_rate": 8.180275689179607e-05, "loss": 1.7146, "step": 2701 }, { "epoch": 0.2806689519060974, "grad_norm": 0.36751818656921387, "learning_rate": 8.17901646205957e-05, "loss": 1.5943, "step": 2702 }, { "epoch": 0.2807728264256778, "grad_norm": 0.3904435634613037, "learning_rate": 8.177756896399078e-05, "loss": 1.7849, "step": 2703 }, { "epoch": 0.28087670094525813, "grad_norm": 0.38835206627845764, "learning_rate": 8.176496992332265e-05, "loss": 1.7837, "step": 2704 }, { "epoch": 0.28098057546483846, "grad_norm": 0.46619629859924316, "learning_rate": 8.175236749993298e-05, "loss": 1.9918, "step": 2705 }, { "epoch": 0.28108444998441884, "grad_norm": 0.412544846534729, "learning_rate": 8.173976169516386e-05, "loss": 1.8577, "step": 2706 }, { "epoch": 0.28118832450399917, "grad_norm": 0.36168932914733887, "learning_rate": 8.17271525103577e-05, "loss": 1.6675, "step": 2707 }, { "epoch": 0.2812921990235795, "grad_norm": 0.4263473451137543, "learning_rate": 8.171453994685728e-05, "loss": 1.8988, "step": 2708 }, { "epoch": 0.2813960735431599, "grad_norm": 0.369133323431015, "learning_rate": 8.170192400600574e-05, "loss": 1.6811, "step": 2709 }, { "epoch": 0.2814999480627402, "grad_norm": 0.39230549335479736, "learning_rate": 8.168930468914658e-05, "loss": 1.5888, "step": 2710 }, { "epoch": 0.2816038225823206, "grad_norm": 0.3522338569164276, "learning_rate": 8.167668199762364e-05, "loss": 1.5904, "step": 2711 }, { "epoch": 0.2817076971019009, "grad_norm": 0.3948661983013153, "learning_rate": 8.166405593278116e-05, "loss": 1.7703, "step": 2712 }, { "epoch": 0.28181157162148124, "grad_norm": 0.35214731097221375, "learning_rate": 8.165142649596372e-05, "loss": 1.6956, "step": 2713 }, { "epoch": 0.2819154461410616, "grad_norm": 0.4061570167541504, "learning_rate": 8.163879368851623e-05, "loss": 1.9759, "step": 2714 }, { "epoch": 0.28201932066064195, "grad_norm": 0.3767627477645874, "learning_rate": 8.162615751178405e-05, "loss": 1.6924, "step": 2715 }, { "epoch": 0.2821231951802223, "grad_norm": 0.37810075283050537, "learning_rate": 8.161351796711274e-05, "loss": 1.8406, "step": 2716 }, { "epoch": 0.28222706969980266, "grad_norm": 0.37295323610305786, "learning_rate": 8.16008750558484e-05, "loss": 1.7651, "step": 2717 }, { "epoch": 0.282330944219383, "grad_norm": 0.3852209448814392, "learning_rate": 8.158822877933733e-05, "loss": 1.7538, "step": 2718 }, { "epoch": 0.2824348187389633, "grad_norm": 0.36513835191726685, "learning_rate": 8.15755791389263e-05, "loss": 1.7825, "step": 2719 }, { "epoch": 0.2825386932585437, "grad_norm": 0.3820558190345764, "learning_rate": 8.15629261359624e-05, "loss": 1.737, "step": 2720 }, { "epoch": 0.282642567778124, "grad_norm": 0.3873055875301361, "learning_rate": 8.155026977179307e-05, "loss": 1.774, "step": 2721 }, { "epoch": 0.28274644229770435, "grad_norm": 0.3791974186897278, "learning_rate": 8.15376100477661e-05, "loss": 1.7034, "step": 2722 }, { "epoch": 0.28285031681728473, "grad_norm": 0.35163450241088867, "learning_rate": 8.152494696522965e-05, "loss": 1.5747, "step": 2723 }, { "epoch": 0.28295419133686506, "grad_norm": 0.390493780374527, "learning_rate": 8.151228052553228e-05, "loss": 1.7802, "step": 2724 }, { "epoch": 0.28305806585644544, "grad_norm": 0.3626655340194702, "learning_rate": 8.149961073002283e-05, "loss": 1.7353, "step": 2725 }, { "epoch": 0.28316194037602577, "grad_norm": 0.40224525332450867, "learning_rate": 8.148693758005054e-05, "loss": 1.8108, "step": 2726 }, { "epoch": 0.2832658148956061, "grad_norm": 0.3655959367752075, "learning_rate": 8.1474261076965e-05, "loss": 1.7521, "step": 2727 }, { "epoch": 0.2833696894151865, "grad_norm": 0.3598358929157257, "learning_rate": 8.146158122211618e-05, "loss": 1.6139, "step": 2728 }, { "epoch": 0.2834735639347668, "grad_norm": 0.4097810983657837, "learning_rate": 8.144889801685436e-05, "loss": 1.9751, "step": 2729 }, { "epoch": 0.28357743845434713, "grad_norm": 0.3773553967475891, "learning_rate": 8.143621146253022e-05, "loss": 1.7618, "step": 2730 }, { "epoch": 0.2836813129739275, "grad_norm": 0.3876679837703705, "learning_rate": 8.142352156049475e-05, "loss": 1.7756, "step": 2731 }, { "epoch": 0.28378518749350784, "grad_norm": 0.3985688090324402, "learning_rate": 8.141082831209938e-05, "loss": 1.8868, "step": 2732 }, { "epoch": 0.28388906201308817, "grad_norm": 0.40376704931259155, "learning_rate": 8.139813171869579e-05, "loss": 1.9038, "step": 2733 }, { "epoch": 0.28399293653266855, "grad_norm": 0.333291620016098, "learning_rate": 8.138543178163607e-05, "loss": 1.5568, "step": 2734 }, { "epoch": 0.2840968110522489, "grad_norm": 0.39361095428466797, "learning_rate": 8.137272850227271e-05, "loss": 1.6809, "step": 2735 }, { "epoch": 0.2842006855718292, "grad_norm": 0.39014434814453125, "learning_rate": 8.136002188195846e-05, "loss": 1.7479, "step": 2736 }, { "epoch": 0.2843045600914096, "grad_norm": 0.4338228404521942, "learning_rate": 8.13473119220465e-05, "loss": 1.8751, "step": 2737 }, { "epoch": 0.2844084346109899, "grad_norm": 0.44359880685806274, "learning_rate": 8.133459862389036e-05, "loss": 1.8787, "step": 2738 }, { "epoch": 0.2845123091305703, "grad_norm": 0.42322838306427, "learning_rate": 8.132188198884386e-05, "loss": 1.827, "step": 2739 }, { "epoch": 0.2846161836501506, "grad_norm": 0.363816499710083, "learning_rate": 8.130916201826126e-05, "loss": 1.5635, "step": 2740 }, { "epoch": 0.28472005816973095, "grad_norm": 0.39882132411003113, "learning_rate": 8.129643871349712e-05, "loss": 2.0049, "step": 2741 }, { "epoch": 0.28482393268931133, "grad_norm": 0.3964424729347229, "learning_rate": 8.128371207590639e-05, "loss": 1.6739, "step": 2742 }, { "epoch": 0.28492780720889166, "grad_norm": 0.42000818252563477, "learning_rate": 8.127098210684434e-05, "loss": 1.9091, "step": 2743 }, { "epoch": 0.285031681728472, "grad_norm": 0.3911057412624359, "learning_rate": 8.125824880766661e-05, "loss": 1.7084, "step": 2744 }, { "epoch": 0.28513555624805237, "grad_norm": 0.37378305196762085, "learning_rate": 8.124551217972922e-05, "loss": 1.7252, "step": 2745 }, { "epoch": 0.2852394307676327, "grad_norm": 0.40639790892601013, "learning_rate": 8.12327722243885e-05, "loss": 1.8441, "step": 2746 }, { "epoch": 0.285343305287213, "grad_norm": 0.3873542249202728, "learning_rate": 8.122002894300117e-05, "loss": 1.506, "step": 2747 }, { "epoch": 0.2854471798067934, "grad_norm": 0.34335261583328247, "learning_rate": 8.120728233692428e-05, "loss": 1.6598, "step": 2748 }, { "epoch": 0.28555105432637373, "grad_norm": 0.4100406765937805, "learning_rate": 8.119453240751525e-05, "loss": 1.6315, "step": 2749 }, { "epoch": 0.2856549288459541, "grad_norm": 0.34580525755882263, "learning_rate": 8.118177915613182e-05, "loss": 1.562, "step": 2750 }, { "epoch": 0.28575880336553444, "grad_norm": 0.39648303389549255, "learning_rate": 8.116902258413216e-05, "loss": 1.9186, "step": 2751 }, { "epoch": 0.28586267788511477, "grad_norm": 0.3890984356403351, "learning_rate": 8.115626269287473e-05, "loss": 1.862, "step": 2752 }, { "epoch": 0.28596655240469515, "grad_norm": 0.3671417236328125, "learning_rate": 8.114349948371833e-05, "loss": 1.7655, "step": 2753 }, { "epoch": 0.2860704269242755, "grad_norm": 0.3553464412689209, "learning_rate": 8.113073295802219e-05, "loss": 1.5148, "step": 2754 }, { "epoch": 0.2861743014438558, "grad_norm": 0.3629017472267151, "learning_rate": 8.111796311714579e-05, "loss": 1.5992, "step": 2755 }, { "epoch": 0.2862781759634362, "grad_norm": 0.4407372772693634, "learning_rate": 8.110518996244907e-05, "loss": 1.7642, "step": 2756 }, { "epoch": 0.2863820504830165, "grad_norm": 0.42658326029777527, "learning_rate": 8.109241349529223e-05, "loss": 1.9091, "step": 2757 }, { "epoch": 0.28648592500259684, "grad_norm": 0.38717007637023926, "learning_rate": 8.107963371703589e-05, "loss": 1.7881, "step": 2758 }, { "epoch": 0.2865897995221772, "grad_norm": 0.39365988969802856, "learning_rate": 8.1066850629041e-05, "loss": 1.6742, "step": 2759 }, { "epoch": 0.28669367404175755, "grad_norm": 0.3701498806476593, "learning_rate": 8.105406423266884e-05, "loss": 1.6718, "step": 2760 }, { "epoch": 0.2867975485613379, "grad_norm": 0.3934311866760254, "learning_rate": 8.104127452928107e-05, "loss": 1.7174, "step": 2761 }, { "epoch": 0.28690142308091826, "grad_norm": 0.3584936261177063, "learning_rate": 8.10284815202397e-05, "loss": 1.7269, "step": 2762 }, { "epoch": 0.2870052976004986, "grad_norm": 0.4173574447631836, "learning_rate": 8.101568520690708e-05, "loss": 1.7251, "step": 2763 }, { "epoch": 0.28710917212007897, "grad_norm": 0.3903685212135315, "learning_rate": 8.10028855906459e-05, "loss": 1.7643, "step": 2764 }, { "epoch": 0.2872130466396593, "grad_norm": 0.37448278069496155, "learning_rate": 8.099008267281924e-05, "loss": 1.7648, "step": 2765 }, { "epoch": 0.2873169211592396, "grad_norm": 0.3669279217720032, "learning_rate": 8.097727645479053e-05, "loss": 1.7988, "step": 2766 }, { "epoch": 0.28742079567882, "grad_norm": 0.4136315584182739, "learning_rate": 8.096446693792347e-05, "loss": 1.857, "step": 2767 }, { "epoch": 0.28752467019840033, "grad_norm": 0.33900344371795654, "learning_rate": 8.095165412358225e-05, "loss": 1.5582, "step": 2768 }, { "epoch": 0.28762854471798066, "grad_norm": 0.36668214201927185, "learning_rate": 8.093883801313129e-05, "loss": 1.6839, "step": 2769 }, { "epoch": 0.28773241923756104, "grad_norm": 0.4014098048210144, "learning_rate": 8.09260186079354e-05, "loss": 1.7254, "step": 2770 }, { "epoch": 0.28783629375714137, "grad_norm": 0.44136208295822144, "learning_rate": 8.091319590935978e-05, "loss": 1.6938, "step": 2771 }, { "epoch": 0.2879401682767217, "grad_norm": 0.42486312985420227, "learning_rate": 8.090036991876992e-05, "loss": 1.7066, "step": 2772 }, { "epoch": 0.2880440427963021, "grad_norm": 0.4575170576572418, "learning_rate": 8.08875406375317e-05, "loss": 2.0641, "step": 2773 }, { "epoch": 0.2881479173158824, "grad_norm": 0.4238605499267578, "learning_rate": 8.087470806701131e-05, "loss": 1.9339, "step": 2774 }, { "epoch": 0.28825179183546273, "grad_norm": 0.37625259160995483, "learning_rate": 8.086187220857537e-05, "loss": 1.6855, "step": 2775 }, { "epoch": 0.2883556663550431, "grad_norm": 0.3760613799095154, "learning_rate": 8.084903306359078e-05, "loss": 1.786, "step": 2776 }, { "epoch": 0.28845954087462344, "grad_norm": 0.4117826521396637, "learning_rate": 8.083619063342477e-05, "loss": 1.796, "step": 2777 }, { "epoch": 0.2885634153942038, "grad_norm": 0.3975922465324402, "learning_rate": 8.0823344919445e-05, "loss": 1.8309, "step": 2778 }, { "epoch": 0.28866728991378415, "grad_norm": 0.39822137355804443, "learning_rate": 8.081049592301944e-05, "loss": 1.8686, "step": 2779 }, { "epoch": 0.2887711644333645, "grad_norm": 0.3700173795223236, "learning_rate": 8.079764364551638e-05, "loss": 1.7419, "step": 2780 }, { "epoch": 0.28887503895294486, "grad_norm": 0.3475184440612793, "learning_rate": 8.07847880883045e-05, "loss": 1.478, "step": 2781 }, { "epoch": 0.2889789134725252, "grad_norm": 0.37445908784866333, "learning_rate": 8.077192925275283e-05, "loss": 1.8183, "step": 2782 }, { "epoch": 0.2890827879921055, "grad_norm": 0.3727985918521881, "learning_rate": 8.075906714023073e-05, "loss": 1.7757, "step": 2783 }, { "epoch": 0.2891866625116859, "grad_norm": 0.36759766936302185, "learning_rate": 8.07462017521079e-05, "loss": 1.6917, "step": 2784 }, { "epoch": 0.2892905370312662, "grad_norm": 0.39550402760505676, "learning_rate": 8.073333308975442e-05, "loss": 1.7787, "step": 2785 }, { "epoch": 0.28939441155084655, "grad_norm": 0.3845430612564087, "learning_rate": 8.072046115454069e-05, "loss": 1.9255, "step": 2786 }, { "epoch": 0.28949828607042694, "grad_norm": 0.39458414912223816, "learning_rate": 8.070758594783748e-05, "loss": 1.6905, "step": 2787 }, { "epoch": 0.28960216059000726, "grad_norm": 0.38234516978263855, "learning_rate": 8.069470747101588e-05, "loss": 1.784, "step": 2788 }, { "epoch": 0.28970603510958765, "grad_norm": 0.4639817774295807, "learning_rate": 8.068182572544736e-05, "loss": 1.7567, "step": 2789 }, { "epoch": 0.28980990962916797, "grad_norm": 0.43574076890945435, "learning_rate": 8.066894071250375e-05, "loss": 1.8726, "step": 2790 }, { "epoch": 0.2899137841487483, "grad_norm": 0.37440717220306396, "learning_rate": 8.065605243355717e-05, "loss": 1.7629, "step": 2791 }, { "epoch": 0.2900176586683287, "grad_norm": 0.4101695716381073, "learning_rate": 8.064316088998012e-05, "loss": 1.7499, "step": 2792 }, { "epoch": 0.290121533187909, "grad_norm": 0.3932081162929535, "learning_rate": 8.063026608314547e-05, "loss": 1.7148, "step": 2793 }, { "epoch": 0.29022540770748934, "grad_norm": 0.37535983324050903, "learning_rate": 8.061736801442643e-05, "loss": 1.8031, "step": 2794 }, { "epoch": 0.2903292822270697, "grad_norm": 0.3890056610107422, "learning_rate": 8.06044666851965e-05, "loss": 1.718, "step": 2795 }, { "epoch": 0.29043315674665005, "grad_norm": 0.44528940320014954, "learning_rate": 8.059156209682959e-05, "loss": 1.7655, "step": 2796 }, { "epoch": 0.29053703126623037, "grad_norm": 0.3969075679779053, "learning_rate": 8.057865425069995e-05, "loss": 1.7541, "step": 2797 }, { "epoch": 0.29064090578581075, "grad_norm": 0.37456732988357544, "learning_rate": 8.056574314818215e-05, "loss": 1.747, "step": 2798 }, { "epoch": 0.2907447803053911, "grad_norm": 0.3638935387134552, "learning_rate": 8.055282879065114e-05, "loss": 1.6764, "step": 2799 }, { "epoch": 0.2908486548249714, "grad_norm": 0.3996514081954956, "learning_rate": 8.053991117948216e-05, "loss": 1.7869, "step": 2800 }, { "epoch": 0.2909525293445518, "grad_norm": 0.3886312246322632, "learning_rate": 8.052699031605087e-05, "loss": 1.8845, "step": 2801 }, { "epoch": 0.2910564038641321, "grad_norm": 0.37657225131988525, "learning_rate": 8.051406620173323e-05, "loss": 1.6482, "step": 2802 }, { "epoch": 0.2911602783837125, "grad_norm": 0.3608708083629608, "learning_rate": 8.050113883790556e-05, "loss": 1.6466, "step": 2803 }, { "epoch": 0.29126415290329283, "grad_norm": 0.41230741143226624, "learning_rate": 8.048820822594452e-05, "loss": 1.8663, "step": 2804 }, { "epoch": 0.29136802742287315, "grad_norm": 0.3793978989124298, "learning_rate": 8.047527436722713e-05, "loss": 1.7933, "step": 2805 }, { "epoch": 0.29147190194245354, "grad_norm": 0.35737892985343933, "learning_rate": 8.046233726313073e-05, "loss": 1.6788, "step": 2806 }, { "epoch": 0.29157577646203386, "grad_norm": 0.4132479429244995, "learning_rate": 8.044939691503304e-05, "loss": 1.7392, "step": 2807 }, { "epoch": 0.2916796509816142, "grad_norm": 0.4083154499530792, "learning_rate": 8.043645332431206e-05, "loss": 1.6476, "step": 2808 }, { "epoch": 0.2917835255011946, "grad_norm": 0.38581231236457825, "learning_rate": 8.042350649234625e-05, "loss": 1.7906, "step": 2809 }, { "epoch": 0.2918874000207749, "grad_norm": 0.4001144766807556, "learning_rate": 8.041055642051429e-05, "loss": 1.5869, "step": 2810 }, { "epoch": 0.29199127454035523, "grad_norm": 0.3624385893344879, "learning_rate": 8.039760311019529e-05, "loss": 1.7447, "step": 2811 }, { "epoch": 0.2920951490599356, "grad_norm": 0.3716486394405365, "learning_rate": 8.038464656276867e-05, "loss": 1.8424, "step": 2812 }, { "epoch": 0.29219902357951594, "grad_norm": 0.39141982793807983, "learning_rate": 8.03716867796142e-05, "loss": 1.6962, "step": 2813 }, { "epoch": 0.29230289809909626, "grad_norm": 0.3740139901638031, "learning_rate": 8.035872376211199e-05, "loss": 1.6874, "step": 2814 }, { "epoch": 0.29240677261867665, "grad_norm": 0.464403361082077, "learning_rate": 8.03457575116425e-05, "loss": 1.8638, "step": 2815 }, { "epoch": 0.292510647138257, "grad_norm": 0.3929448425769806, "learning_rate": 8.033278802958656e-05, "loss": 1.808, "step": 2816 }, { "epoch": 0.29261452165783736, "grad_norm": 0.4005575180053711, "learning_rate": 8.031981531732529e-05, "loss": 1.6983, "step": 2817 }, { "epoch": 0.2927183961774177, "grad_norm": 0.3648793697357178, "learning_rate": 8.03068393762402e-05, "loss": 1.7543, "step": 2818 }, { "epoch": 0.292822270696998, "grad_norm": 0.3590324819087982, "learning_rate": 8.02938602077131e-05, "loss": 1.8414, "step": 2819 }, { "epoch": 0.2929261452165784, "grad_norm": 0.3629642426967621, "learning_rate": 8.02808778131262e-05, "loss": 1.8325, "step": 2820 }, { "epoch": 0.2930300197361587, "grad_norm": 0.42311742901802063, "learning_rate": 8.026789219386201e-05, "loss": 1.7774, "step": 2821 }, { "epoch": 0.29313389425573905, "grad_norm": 0.37744253873825073, "learning_rate": 8.025490335130341e-05, "loss": 1.7133, "step": 2822 }, { "epoch": 0.29323776877531943, "grad_norm": 0.3763290047645569, "learning_rate": 8.02419112868336e-05, "loss": 1.7046, "step": 2823 }, { "epoch": 0.29334164329489976, "grad_norm": 0.3736268877983093, "learning_rate": 8.022891600183613e-05, "loss": 1.6936, "step": 2824 }, { "epoch": 0.2934455178144801, "grad_norm": 0.3851562440395355, "learning_rate": 8.021591749769489e-05, "loss": 1.7645, "step": 2825 }, { "epoch": 0.29354939233406047, "grad_norm": 0.4103233814239502, "learning_rate": 8.020291577579413e-05, "loss": 1.8886, "step": 2826 }, { "epoch": 0.2936532668536408, "grad_norm": 0.38178038597106934, "learning_rate": 8.018991083751846e-05, "loss": 1.7208, "step": 2827 }, { "epoch": 0.2937571413732212, "grad_norm": 0.36348870396614075, "learning_rate": 8.017690268425276e-05, "loss": 1.6585, "step": 2828 }, { "epoch": 0.2938610158928015, "grad_norm": 0.3771371841430664, "learning_rate": 8.016389131738229e-05, "loss": 1.7227, "step": 2829 }, { "epoch": 0.29396489041238183, "grad_norm": 0.38507938385009766, "learning_rate": 8.015087673829271e-05, "loss": 1.8273, "step": 2830 }, { "epoch": 0.2940687649319622, "grad_norm": 0.3727617561817169, "learning_rate": 8.013785894836993e-05, "loss": 1.6725, "step": 2831 }, { "epoch": 0.29417263945154254, "grad_norm": 0.37300458550453186, "learning_rate": 8.012483794900026e-05, "loss": 1.729, "step": 2832 }, { "epoch": 0.29427651397112287, "grad_norm": 0.4091015160083771, "learning_rate": 8.011181374157034e-05, "loss": 1.6771, "step": 2833 }, { "epoch": 0.29438038849070325, "grad_norm": 0.4204038083553314, "learning_rate": 8.009878632746712e-05, "loss": 1.8316, "step": 2834 }, { "epoch": 0.2944842630102836, "grad_norm": 0.4067396819591522, "learning_rate": 8.008575570807795e-05, "loss": 1.7071, "step": 2835 }, { "epoch": 0.2945881375298639, "grad_norm": 0.4235256314277649, "learning_rate": 8.007272188479049e-05, "loss": 1.8557, "step": 2836 }, { "epoch": 0.2946920120494443, "grad_norm": 0.3819893002510071, "learning_rate": 8.005968485899269e-05, "loss": 1.7695, "step": 2837 }, { "epoch": 0.2947958865690246, "grad_norm": 0.3644767999649048, "learning_rate": 8.004664463207294e-05, "loss": 1.6902, "step": 2838 }, { "epoch": 0.29489976108860494, "grad_norm": 0.36920779943466187, "learning_rate": 8.003360120541992e-05, "loss": 1.7109, "step": 2839 }, { "epoch": 0.2950036356081853, "grad_norm": 0.3806985318660736, "learning_rate": 8.002055458042264e-05, "loss": 1.8152, "step": 2840 }, { "epoch": 0.29510751012776565, "grad_norm": 0.4298724830150604, "learning_rate": 8.000750475847045e-05, "loss": 1.8906, "step": 2841 }, { "epoch": 0.29521138464734603, "grad_norm": 0.3639010787010193, "learning_rate": 7.999445174095308e-05, "loss": 1.6325, "step": 2842 }, { "epoch": 0.29531525916692636, "grad_norm": 0.36708855628967285, "learning_rate": 7.998139552926057e-05, "loss": 1.6156, "step": 2843 }, { "epoch": 0.2954191336865067, "grad_norm": 0.3640385866165161, "learning_rate": 7.996833612478331e-05, "loss": 1.71, "step": 2844 }, { "epoch": 0.29552300820608707, "grad_norm": 0.44802719354629517, "learning_rate": 7.9955273528912e-05, "loss": 1.7186, "step": 2845 }, { "epoch": 0.2956268827256674, "grad_norm": 0.37511417269706726, "learning_rate": 7.994220774303771e-05, "loss": 1.5034, "step": 2846 }, { "epoch": 0.2957307572452477, "grad_norm": 0.36259564757347107, "learning_rate": 7.992913876855188e-05, "loss": 1.6271, "step": 2847 }, { "epoch": 0.2958346317648281, "grad_norm": 0.3824392855167389, "learning_rate": 7.99160666068462e-05, "loss": 1.7693, "step": 2848 }, { "epoch": 0.29593850628440843, "grad_norm": 0.41231241822242737, "learning_rate": 7.99029912593128e-05, "loss": 1.5047, "step": 2849 }, { "epoch": 0.29604238080398876, "grad_norm": 0.3836499750614166, "learning_rate": 7.988991272734407e-05, "loss": 1.6688, "step": 2850 }, { "epoch": 0.29614625532356914, "grad_norm": 0.4020180106163025, "learning_rate": 7.987683101233279e-05, "loss": 1.7687, "step": 2851 }, { "epoch": 0.29625012984314947, "grad_norm": 0.3831713795661926, "learning_rate": 7.986374611567207e-05, "loss": 1.7141, "step": 2852 }, { "epoch": 0.2963540043627298, "grad_norm": 0.44648897647857666, "learning_rate": 7.985065803875532e-05, "loss": 1.9484, "step": 2853 }, { "epoch": 0.2964578788823102, "grad_norm": 0.36257603764533997, "learning_rate": 7.983756678297633e-05, "loss": 1.6849, "step": 2854 }, { "epoch": 0.2965617534018905, "grad_norm": 0.40492746233940125, "learning_rate": 7.982447234972922e-05, "loss": 1.7369, "step": 2855 }, { "epoch": 0.2966656279214709, "grad_norm": 0.37346115708351135, "learning_rate": 7.981137474040845e-05, "loss": 1.7769, "step": 2856 }, { "epoch": 0.2967695024410512, "grad_norm": 0.3808712959289551, "learning_rate": 7.979827395640883e-05, "loss": 1.6387, "step": 2857 }, { "epoch": 0.29687337696063154, "grad_norm": 0.3708186149597168, "learning_rate": 7.978516999912544e-05, "loss": 1.7448, "step": 2858 }, { "epoch": 0.2969772514802119, "grad_norm": 0.3663538992404938, "learning_rate": 7.977206286995379e-05, "loss": 1.7705, "step": 2859 }, { "epoch": 0.29708112599979225, "grad_norm": 0.41986000537872314, "learning_rate": 7.975895257028965e-05, "loss": 1.9017, "step": 2860 }, { "epoch": 0.2971850005193726, "grad_norm": 0.36581292748451233, "learning_rate": 7.974583910152922e-05, "loss": 1.5924, "step": 2861 }, { "epoch": 0.29728887503895296, "grad_norm": 0.38697493076324463, "learning_rate": 7.973272246506893e-05, "loss": 1.7093, "step": 2862 }, { "epoch": 0.2973927495585333, "grad_norm": 0.38015836477279663, "learning_rate": 7.971960266230565e-05, "loss": 1.7662, "step": 2863 }, { "epoch": 0.2974966240781136, "grad_norm": 0.37382447719573975, "learning_rate": 7.97064796946365e-05, "loss": 1.6753, "step": 2864 }, { "epoch": 0.297600498597694, "grad_norm": 0.42534875869750977, "learning_rate": 7.969335356345894e-05, "loss": 2.0455, "step": 2865 }, { "epoch": 0.2977043731172743, "grad_norm": 0.39144277572631836, "learning_rate": 7.968022427017088e-05, "loss": 1.4737, "step": 2866 }, { "epoch": 0.2978082476368547, "grad_norm": 0.3727049231529236, "learning_rate": 7.966709181617045e-05, "loss": 1.8034, "step": 2867 }, { "epoch": 0.29791212215643503, "grad_norm": 0.3644360601902008, "learning_rate": 7.965395620285616e-05, "loss": 1.7262, "step": 2868 }, { "epoch": 0.29801599667601536, "grad_norm": 0.40929368138313293, "learning_rate": 7.964081743162684e-05, "loss": 1.8834, "step": 2869 }, { "epoch": 0.29811987119559574, "grad_norm": 0.40274330973625183, "learning_rate": 7.962767550388166e-05, "loss": 1.6383, "step": 2870 }, { "epoch": 0.29822374571517607, "grad_norm": 0.38310062885284424, "learning_rate": 7.961453042102014e-05, "loss": 1.6704, "step": 2871 }, { "epoch": 0.2983276202347564, "grad_norm": 0.3942394554615021, "learning_rate": 7.960138218444215e-05, "loss": 1.8623, "step": 2872 }, { "epoch": 0.2984314947543368, "grad_norm": 0.3523310124874115, "learning_rate": 7.958823079554785e-05, "loss": 1.7932, "step": 2873 }, { "epoch": 0.2985353692739171, "grad_norm": 0.36860281229019165, "learning_rate": 7.957507625573776e-05, "loss": 1.7049, "step": 2874 }, { "epoch": 0.29863924379349743, "grad_norm": 0.37940865755081177, "learning_rate": 7.956191856641276e-05, "loss": 1.682, "step": 2875 }, { "epoch": 0.2987431183130778, "grad_norm": 0.4322430193424225, "learning_rate": 7.9548757728974e-05, "loss": 1.9822, "step": 2876 }, { "epoch": 0.29884699283265814, "grad_norm": 0.39275041222572327, "learning_rate": 7.953559374482305e-05, "loss": 1.8279, "step": 2877 }, { "epoch": 0.29895086735223847, "grad_norm": 0.3673188388347626, "learning_rate": 7.952242661536175e-05, "loss": 1.6634, "step": 2878 }, { "epoch": 0.29905474187181885, "grad_norm": 0.39935851097106934, "learning_rate": 7.950925634199228e-05, "loss": 1.7705, "step": 2879 }, { "epoch": 0.2991586163913992, "grad_norm": 0.4122634530067444, "learning_rate": 7.949608292611721e-05, "loss": 1.7795, "step": 2880 }, { "epoch": 0.29926249091097956, "grad_norm": 0.40487560629844666, "learning_rate": 7.948290636913939e-05, "loss": 1.9119, "step": 2881 }, { "epoch": 0.2993663654305599, "grad_norm": 0.3445805013179779, "learning_rate": 7.946972667246201e-05, "loss": 1.5662, "step": 2882 }, { "epoch": 0.2994702399501402, "grad_norm": 0.3658605217933655, "learning_rate": 7.945654383748861e-05, "loss": 1.5546, "step": 2883 }, { "epoch": 0.2995741144697206, "grad_norm": 0.3820507824420929, "learning_rate": 7.944335786562307e-05, "loss": 1.5994, "step": 2884 }, { "epoch": 0.2996779889893009, "grad_norm": 0.4138948321342468, "learning_rate": 7.943016875826957e-05, "loss": 1.829, "step": 2885 }, { "epoch": 0.29978186350888125, "grad_norm": 0.35458049178123474, "learning_rate": 7.941697651683267e-05, "loss": 1.6141, "step": 2886 }, { "epoch": 0.29988573802846163, "grad_norm": 0.4207065999507904, "learning_rate": 7.940378114271723e-05, "loss": 1.837, "step": 2887 }, { "epoch": 0.29998961254804196, "grad_norm": 0.35922253131866455, "learning_rate": 7.939058263732846e-05, "loss": 1.5883, "step": 2888 }, { "epoch": 0.3000934870676223, "grad_norm": 0.38362789154052734, "learning_rate": 7.93773810020719e-05, "loss": 1.7448, "step": 2889 }, { "epoch": 0.30019736158720267, "grad_norm": 0.39529237151145935, "learning_rate": 7.93641762383534e-05, "loss": 1.7747, "step": 2890 }, { "epoch": 0.300301236106783, "grad_norm": 0.37079519033432007, "learning_rate": 7.93509683475792e-05, "loss": 1.6808, "step": 2891 }, { "epoch": 0.3004051106263634, "grad_norm": 0.37962087988853455, "learning_rate": 7.93377573311558e-05, "loss": 1.6966, "step": 2892 }, { "epoch": 0.3005089851459437, "grad_norm": 0.36086609959602356, "learning_rate": 7.932454319049008e-05, "loss": 1.6517, "step": 2893 }, { "epoch": 0.30061285966552403, "grad_norm": 0.3676837980747223, "learning_rate": 7.931132592698927e-05, "loss": 1.5688, "step": 2894 }, { "epoch": 0.3007167341851044, "grad_norm": 0.38151493668556213, "learning_rate": 7.929810554206088e-05, "loss": 1.7309, "step": 2895 }, { "epoch": 0.30082060870468474, "grad_norm": 0.38594383001327515, "learning_rate": 7.928488203711279e-05, "loss": 1.7323, "step": 2896 }, { "epoch": 0.30092448322426507, "grad_norm": 0.3639463186264038, "learning_rate": 7.927165541355319e-05, "loss": 1.6411, "step": 2897 }, { "epoch": 0.30102835774384545, "grad_norm": 0.36965441703796387, "learning_rate": 7.92584256727906e-05, "loss": 1.6233, "step": 2898 }, { "epoch": 0.3011322322634258, "grad_norm": 0.40190815925598145, "learning_rate": 7.924519281623393e-05, "loss": 1.8847, "step": 2899 }, { "epoch": 0.3012361067830061, "grad_norm": 0.3761938214302063, "learning_rate": 7.923195684529232e-05, "loss": 1.6344, "step": 2900 }, { "epoch": 0.3013399813025865, "grad_norm": 0.39748868346214294, "learning_rate": 7.921871776137533e-05, "loss": 1.9062, "step": 2901 }, { "epoch": 0.3014438558221668, "grad_norm": 0.3873717188835144, "learning_rate": 7.920547556589282e-05, "loss": 1.8971, "step": 2902 }, { "epoch": 0.30154773034174714, "grad_norm": 0.39114367961883545, "learning_rate": 7.919223026025498e-05, "loss": 1.7381, "step": 2903 }, { "epoch": 0.3016516048613275, "grad_norm": 0.38005557656288147, "learning_rate": 7.917898184587231e-05, "loss": 1.8914, "step": 2904 }, { "epoch": 0.30175547938090785, "grad_norm": 0.3953699469566345, "learning_rate": 7.916573032415569e-05, "loss": 1.8567, "step": 2905 }, { "epoch": 0.30185935390048824, "grad_norm": 0.3641657829284668, "learning_rate": 7.915247569651627e-05, "loss": 1.5998, "step": 2906 }, { "epoch": 0.30196322842006856, "grad_norm": 0.35653895139694214, "learning_rate": 7.913921796436561e-05, "loss": 1.5639, "step": 2907 }, { "epoch": 0.3020671029396489, "grad_norm": 0.3951781690120697, "learning_rate": 7.912595712911552e-05, "loss": 1.7428, "step": 2908 }, { "epoch": 0.3021709774592293, "grad_norm": 0.47132858633995056, "learning_rate": 7.91126931921782e-05, "loss": 1.7877, "step": 2909 }, { "epoch": 0.3022748519788096, "grad_norm": 0.4093819260597229, "learning_rate": 7.909942615496613e-05, "loss": 1.8667, "step": 2910 }, { "epoch": 0.3023787264983899, "grad_norm": 0.43610236048698425, "learning_rate": 7.908615601889218e-05, "loss": 1.6627, "step": 2911 }, { "epoch": 0.3024826010179703, "grad_norm": 0.42912378907203674, "learning_rate": 7.907288278536947e-05, "loss": 1.6026, "step": 2912 }, { "epoch": 0.30258647553755064, "grad_norm": 0.4064681828022003, "learning_rate": 7.905960645581152e-05, "loss": 1.7998, "step": 2913 }, { "epoch": 0.30269035005713096, "grad_norm": 0.373544842004776, "learning_rate": 7.904632703163218e-05, "loss": 1.6253, "step": 2914 }, { "epoch": 0.30279422457671135, "grad_norm": 0.376240074634552, "learning_rate": 7.903304451424556e-05, "loss": 1.7304, "step": 2915 }, { "epoch": 0.3028980990962917, "grad_norm": 0.3998112678527832, "learning_rate": 7.901975890506616e-05, "loss": 1.7403, "step": 2916 }, { "epoch": 0.303001973615872, "grad_norm": 0.38144102692604065, "learning_rate": 7.900647020550882e-05, "loss": 1.7706, "step": 2917 }, { "epoch": 0.3031058481354524, "grad_norm": 0.370951771736145, "learning_rate": 7.899317841698864e-05, "loss": 1.7351, "step": 2918 }, { "epoch": 0.3032097226550327, "grad_norm": 0.41188183426856995, "learning_rate": 7.897988354092113e-05, "loss": 1.8509, "step": 2919 }, { "epoch": 0.3033135971746131, "grad_norm": 0.4149079918861389, "learning_rate": 7.896658557872207e-05, "loss": 1.6496, "step": 2920 }, { "epoch": 0.3034174716941934, "grad_norm": 0.38840773701667786, "learning_rate": 7.89532845318076e-05, "loss": 1.4942, "step": 2921 }, { "epoch": 0.30352134621377375, "grad_norm": 0.4095969498157501, "learning_rate": 7.893998040159418e-05, "loss": 1.5979, "step": 2922 }, { "epoch": 0.30362522073335413, "grad_norm": 0.3632447123527527, "learning_rate": 7.892667318949855e-05, "loss": 1.5617, "step": 2923 }, { "epoch": 0.30372909525293446, "grad_norm": 0.34822526574134827, "learning_rate": 7.891336289693789e-05, "loss": 1.6198, "step": 2924 }, { "epoch": 0.3038329697725148, "grad_norm": 0.3828188180923462, "learning_rate": 7.890004952532962e-05, "loss": 1.7494, "step": 2925 }, { "epoch": 0.30393684429209517, "grad_norm": 0.37072688341140747, "learning_rate": 7.888673307609149e-05, "loss": 1.5492, "step": 2926 }, { "epoch": 0.3040407188116755, "grad_norm": 0.3795013427734375, "learning_rate": 7.887341355064162e-05, "loss": 1.6222, "step": 2927 }, { "epoch": 0.3041445933312558, "grad_norm": 0.40049639344215393, "learning_rate": 7.886009095039843e-05, "loss": 1.8504, "step": 2928 }, { "epoch": 0.3042484678508362, "grad_norm": 0.4613747000694275, "learning_rate": 7.884676527678068e-05, "loss": 2.1015, "step": 2929 }, { "epoch": 0.30435234237041653, "grad_norm": 0.3635263442993164, "learning_rate": 7.883343653120743e-05, "loss": 1.6364, "step": 2930 }, { "epoch": 0.3044562168899969, "grad_norm": 0.4116704761981964, "learning_rate": 7.882010471509809e-05, "loss": 1.5827, "step": 2931 }, { "epoch": 0.30456009140957724, "grad_norm": 0.41490453481674194, "learning_rate": 7.88067698298724e-05, "loss": 1.9797, "step": 2932 }, { "epoch": 0.30466396592915757, "grad_norm": 0.3712293207645416, "learning_rate": 7.879343187695045e-05, "loss": 1.7037, "step": 2933 }, { "epoch": 0.30476784044873795, "grad_norm": 0.37144172191619873, "learning_rate": 7.878009085775257e-05, "loss": 1.7411, "step": 2934 }, { "epoch": 0.3048717149683183, "grad_norm": 0.38958555459976196, "learning_rate": 7.876674677369954e-05, "loss": 1.7444, "step": 2935 }, { "epoch": 0.3049755894878986, "grad_norm": 0.3905205726623535, "learning_rate": 7.875339962621235e-05, "loss": 1.8388, "step": 2936 }, { "epoch": 0.305079464007479, "grad_norm": 0.4097367525100708, "learning_rate": 7.874004941671239e-05, "loss": 1.7877, "step": 2937 }, { "epoch": 0.3051833385270593, "grad_norm": 0.39029520750045776, "learning_rate": 7.872669614662135e-05, "loss": 1.7491, "step": 2938 }, { "epoch": 0.30528721304663964, "grad_norm": 0.3893304467201233, "learning_rate": 7.871333981736124e-05, "loss": 1.7892, "step": 2939 }, { "epoch": 0.30539108756622, "grad_norm": 0.39329490065574646, "learning_rate": 7.869998043035442e-05, "loss": 1.7247, "step": 2940 }, { "epoch": 0.30549496208580035, "grad_norm": 0.4152490496635437, "learning_rate": 7.868661798702355e-05, "loss": 1.767, "step": 2941 }, { "epoch": 0.3055988366053807, "grad_norm": 0.40060338377952576, "learning_rate": 7.867325248879163e-05, "loss": 1.6133, "step": 2942 }, { "epoch": 0.30570271112496106, "grad_norm": 0.422410786151886, "learning_rate": 7.865988393708197e-05, "loss": 1.6876, "step": 2943 }, { "epoch": 0.3058065856445414, "grad_norm": 0.4146443009376526, "learning_rate": 7.864651233331823e-05, "loss": 1.7866, "step": 2944 }, { "epoch": 0.30591046016412177, "grad_norm": 0.363182008266449, "learning_rate": 7.863313767892438e-05, "loss": 1.5605, "step": 2945 }, { "epoch": 0.3060143346837021, "grad_norm": 0.4126920998096466, "learning_rate": 7.861975997532471e-05, "loss": 1.8965, "step": 2946 }, { "epoch": 0.3061182092032824, "grad_norm": 0.4038070738315582, "learning_rate": 7.860637922394387e-05, "loss": 1.8668, "step": 2947 }, { "epoch": 0.3062220837228628, "grad_norm": 0.3751528859138489, "learning_rate": 7.859299542620675e-05, "loss": 1.8258, "step": 2948 }, { "epoch": 0.30632595824244313, "grad_norm": 0.422244668006897, "learning_rate": 7.857960858353866e-05, "loss": 1.8309, "step": 2949 }, { "epoch": 0.30642983276202346, "grad_norm": 0.37908682227134705, "learning_rate": 7.85662186973652e-05, "loss": 1.7936, "step": 2950 }, { "epoch": 0.30653370728160384, "grad_norm": 0.36694470047950745, "learning_rate": 7.855282576911224e-05, "loss": 1.7259, "step": 2951 }, { "epoch": 0.30663758180118417, "grad_norm": 0.3961583077907562, "learning_rate": 7.85394298002061e-05, "loss": 1.7949, "step": 2952 }, { "epoch": 0.3067414563207645, "grad_norm": 0.3951317071914673, "learning_rate": 7.852603079207328e-05, "loss": 1.6679, "step": 2953 }, { "epoch": 0.3068453308403449, "grad_norm": 0.3763517737388611, "learning_rate": 7.851262874614069e-05, "loss": 1.6965, "step": 2954 }, { "epoch": 0.3069492053599252, "grad_norm": 0.3869110941886902, "learning_rate": 7.849922366383555e-05, "loss": 1.6044, "step": 2955 }, { "epoch": 0.30705307987950553, "grad_norm": 0.4839021563529968, "learning_rate": 7.848581554658539e-05, "loss": 2.1448, "step": 2956 }, { "epoch": 0.3071569543990859, "grad_norm": 0.4335784912109375, "learning_rate": 7.847240439581806e-05, "loss": 1.7972, "step": 2957 }, { "epoch": 0.30726082891866624, "grad_norm": 0.5460455417633057, "learning_rate": 7.845899021296178e-05, "loss": 1.732, "step": 2958 }, { "epoch": 0.3073647034382466, "grad_norm": 0.39001351594924927, "learning_rate": 7.844557299944501e-05, "loss": 1.7266, "step": 2959 }, { "epoch": 0.30746857795782695, "grad_norm": 0.39050352573394775, "learning_rate": 7.843215275669663e-05, "loss": 1.7042, "step": 2960 }, { "epoch": 0.3075724524774073, "grad_norm": 0.3831944465637207, "learning_rate": 7.841872948614573e-05, "loss": 1.7086, "step": 2961 }, { "epoch": 0.30767632699698766, "grad_norm": 0.4240471422672272, "learning_rate": 7.840530318922181e-05, "loss": 1.7215, "step": 2962 }, { "epoch": 0.307780201516568, "grad_norm": 0.3831408619880676, "learning_rate": 7.839187386735469e-05, "loss": 1.7995, "step": 2963 }, { "epoch": 0.3078840760361483, "grad_norm": 0.43340522050857544, "learning_rate": 7.837844152197447e-05, "loss": 1.7028, "step": 2964 }, { "epoch": 0.3079879505557287, "grad_norm": 0.3613108992576599, "learning_rate": 7.836500615451156e-05, "loss": 1.4274, "step": 2965 }, { "epoch": 0.308091825075309, "grad_norm": 0.4255940914154053, "learning_rate": 7.835156776639679e-05, "loss": 1.5747, "step": 2966 }, { "epoch": 0.30819569959488935, "grad_norm": 0.3656274974346161, "learning_rate": 7.833812635906118e-05, "loss": 1.6061, "step": 2967 }, { "epoch": 0.30829957411446973, "grad_norm": 0.4550713002681732, "learning_rate": 7.832468193393616e-05, "loss": 1.8704, "step": 2968 }, { "epoch": 0.30840344863405006, "grad_norm": 0.40488240122795105, "learning_rate": 7.831123449245345e-05, "loss": 1.7132, "step": 2969 }, { "epoch": 0.30850732315363044, "grad_norm": 0.3986703157424927, "learning_rate": 7.829778403604513e-05, "loss": 1.8511, "step": 2970 }, { "epoch": 0.30861119767321077, "grad_norm": 0.41377493739128113, "learning_rate": 7.828433056614351e-05, "loss": 1.8369, "step": 2971 }, { "epoch": 0.3087150721927911, "grad_norm": 0.3639225959777832, "learning_rate": 7.827087408418132e-05, "loss": 1.7907, "step": 2972 }, { "epoch": 0.3088189467123715, "grad_norm": 0.3887461721897125, "learning_rate": 7.825741459159157e-05, "loss": 1.7639, "step": 2973 }, { "epoch": 0.3089228212319518, "grad_norm": 0.38962462544441223, "learning_rate": 7.824395208980758e-05, "loss": 1.7871, "step": 2974 }, { "epoch": 0.30902669575153213, "grad_norm": 0.43991488218307495, "learning_rate": 7.8230486580263e-05, "loss": 1.988, "step": 2975 }, { "epoch": 0.3091305702711125, "grad_norm": 0.4023156464099884, "learning_rate": 7.821701806439179e-05, "loss": 1.812, "step": 2976 }, { "epoch": 0.30923444479069284, "grad_norm": 0.37170884013175964, "learning_rate": 7.820354654362828e-05, "loss": 1.7231, "step": 2977 }, { "epoch": 0.30933831931027317, "grad_norm": 0.38935086131095886, "learning_rate": 7.819007201940706e-05, "loss": 1.7493, "step": 2978 }, { "epoch": 0.30944219382985355, "grad_norm": 0.3613695204257965, "learning_rate": 7.817659449316305e-05, "loss": 1.621, "step": 2979 }, { "epoch": 0.3095460683494339, "grad_norm": 0.3804933726787567, "learning_rate": 7.816311396633152e-05, "loss": 1.7622, "step": 2980 }, { "epoch": 0.3096499428690142, "grad_norm": 0.3554391860961914, "learning_rate": 7.814963044034802e-05, "loss": 1.615, "step": 2981 }, { "epoch": 0.3097538173885946, "grad_norm": 0.42907533049583435, "learning_rate": 7.813614391664847e-05, "loss": 1.9264, "step": 2982 }, { "epoch": 0.3098576919081749, "grad_norm": 0.46726834774017334, "learning_rate": 7.812265439666906e-05, "loss": 1.6321, "step": 2983 }, { "epoch": 0.3099615664277553, "grad_norm": 0.43149927258491516, "learning_rate": 7.81091618818463e-05, "loss": 1.902, "step": 2984 }, { "epoch": 0.3100654409473356, "grad_norm": 0.42148879170417786, "learning_rate": 7.80956663736171e-05, "loss": 1.8288, "step": 2985 }, { "epoch": 0.31016931546691595, "grad_norm": 0.39359238743782043, "learning_rate": 7.808216787341855e-05, "loss": 1.7853, "step": 2986 }, { "epoch": 0.31027318998649633, "grad_norm": 0.37277668714523315, "learning_rate": 7.806866638268818e-05, "loss": 1.702, "step": 2987 }, { "epoch": 0.31037706450607666, "grad_norm": 0.3854857087135315, "learning_rate": 7.80551619028638e-05, "loss": 1.7776, "step": 2988 }, { "epoch": 0.310480939025657, "grad_norm": 0.3613017201423645, "learning_rate": 7.80416544353835e-05, "loss": 1.6451, "step": 2989 }, { "epoch": 0.31058481354523737, "grad_norm": 0.3885599970817566, "learning_rate": 7.802814398168572e-05, "loss": 1.8862, "step": 2990 }, { "epoch": 0.3106886880648177, "grad_norm": 0.3823319375514984, "learning_rate": 7.801463054320927e-05, "loss": 1.7783, "step": 2991 }, { "epoch": 0.310792562584398, "grad_norm": 0.43862950801849365, "learning_rate": 7.800111412139318e-05, "loss": 1.654, "step": 2992 }, { "epoch": 0.3108964371039784, "grad_norm": 0.4039106070995331, "learning_rate": 7.798759471767684e-05, "loss": 1.7425, "step": 2993 }, { "epoch": 0.31100031162355873, "grad_norm": 0.3942197263240814, "learning_rate": 7.797407233349998e-05, "loss": 1.7184, "step": 2994 }, { "epoch": 0.31110418614313906, "grad_norm": 0.35280346870422363, "learning_rate": 7.796054697030262e-05, "loss": 1.5123, "step": 2995 }, { "epoch": 0.31120806066271944, "grad_norm": 0.38291507959365845, "learning_rate": 7.794701862952512e-05, "loss": 1.7482, "step": 2996 }, { "epoch": 0.31131193518229977, "grad_norm": 0.4096364378929138, "learning_rate": 7.793348731260813e-05, "loss": 1.8343, "step": 2997 }, { "epoch": 0.31141580970188015, "grad_norm": 0.40099769830703735, "learning_rate": 7.79199530209926e-05, "loss": 1.655, "step": 2998 }, { "epoch": 0.3115196842214605, "grad_norm": 0.3833656907081604, "learning_rate": 7.79064157561199e-05, "loss": 1.7954, "step": 2999 }, { "epoch": 0.3116235587410408, "grad_norm": 0.3824611008167267, "learning_rate": 7.789287551943158e-05, "loss": 1.5854, "step": 3000 }, { "epoch": 0.3117274332606212, "grad_norm": 0.3726818561553955, "learning_rate": 7.787933231236958e-05, "loss": 1.6898, "step": 3001 }, { "epoch": 0.3118313077802015, "grad_norm": 0.37149757146835327, "learning_rate": 7.786578613637617e-05, "loss": 1.6463, "step": 3002 }, { "epoch": 0.31193518229978184, "grad_norm": 0.42311716079711914, "learning_rate": 7.78522369928939e-05, "loss": 1.951, "step": 3003 }, { "epoch": 0.3120390568193622, "grad_norm": 0.47437840700149536, "learning_rate": 7.783868488336563e-05, "loss": 1.6597, "step": 3004 }, { "epoch": 0.31214293133894255, "grad_norm": 0.3761157989501953, "learning_rate": 7.782512980923459e-05, "loss": 1.8036, "step": 3005 }, { "epoch": 0.3122468058585229, "grad_norm": 0.3912384510040283, "learning_rate": 7.781157177194426e-05, "loss": 1.6382, "step": 3006 }, { "epoch": 0.31235068037810326, "grad_norm": 0.3848210871219635, "learning_rate": 7.779801077293847e-05, "loss": 1.7521, "step": 3007 }, { "epoch": 0.3124545548976836, "grad_norm": 0.3805733323097229, "learning_rate": 7.778444681366137e-05, "loss": 1.7019, "step": 3008 }, { "epoch": 0.31255842941726397, "grad_norm": 0.38343513011932373, "learning_rate": 7.777087989555741e-05, "loss": 1.7686, "step": 3009 }, { "epoch": 0.3126623039368443, "grad_norm": 0.37739554047584534, "learning_rate": 7.775731002007138e-05, "loss": 1.5242, "step": 3010 }, { "epoch": 0.3127661784564246, "grad_norm": 0.37050339579582214, "learning_rate": 7.774373718864833e-05, "loss": 1.771, "step": 3011 }, { "epoch": 0.312870052976005, "grad_norm": 0.39899301528930664, "learning_rate": 7.773016140273368e-05, "loss": 1.8089, "step": 3012 }, { "epoch": 0.31297392749558534, "grad_norm": 0.3865603506565094, "learning_rate": 7.771658266377315e-05, "loss": 1.6336, "step": 3013 }, { "epoch": 0.31307780201516566, "grad_norm": 0.3612881302833557, "learning_rate": 7.770300097321276e-05, "loss": 1.4883, "step": 3014 }, { "epoch": 0.31318167653474605, "grad_norm": 0.422632098197937, "learning_rate": 7.768941633249884e-05, "loss": 1.8948, "step": 3015 }, { "epoch": 0.31328555105432637, "grad_norm": 0.3684242069721222, "learning_rate": 7.76758287430781e-05, "loss": 1.702, "step": 3016 }, { "epoch": 0.3133894255739067, "grad_norm": 0.36709776520729065, "learning_rate": 7.766223820639747e-05, "loss": 1.687, "step": 3017 }, { "epoch": 0.3134933000934871, "grad_norm": 0.39332762360572815, "learning_rate": 7.764864472390423e-05, "loss": 1.7333, "step": 3018 }, { "epoch": 0.3135971746130674, "grad_norm": 0.3547021448612213, "learning_rate": 7.7635048297046e-05, "loss": 1.6883, "step": 3019 }, { "epoch": 0.31370104913264774, "grad_norm": 0.393284410238266, "learning_rate": 7.762144892727069e-05, "loss": 1.9121, "step": 3020 }, { "epoch": 0.3138049236522281, "grad_norm": 0.4200425148010254, "learning_rate": 7.760784661602653e-05, "loss": 1.8525, "step": 3021 }, { "epoch": 0.31390879817180845, "grad_norm": 0.3398437798023224, "learning_rate": 7.759424136476206e-05, "loss": 1.529, "step": 3022 }, { "epoch": 0.31401267269138883, "grad_norm": 0.3940331041812897, "learning_rate": 7.75806331749261e-05, "loss": 1.8153, "step": 3023 }, { "epoch": 0.31411654721096915, "grad_norm": 0.37523049116134644, "learning_rate": 7.756702204796786e-05, "loss": 1.6807, "step": 3024 }, { "epoch": 0.3142204217305495, "grad_norm": 0.3940492868423462, "learning_rate": 7.75534079853368e-05, "loss": 1.8526, "step": 3025 }, { "epoch": 0.31432429625012986, "grad_norm": 0.40810003876686096, "learning_rate": 7.753979098848272e-05, "loss": 1.6186, "step": 3026 }, { "epoch": 0.3144281707697102, "grad_norm": 0.3908647894859314, "learning_rate": 7.752617105885574e-05, "loss": 1.6896, "step": 3027 }, { "epoch": 0.3145320452892905, "grad_norm": 0.35582828521728516, "learning_rate": 7.751254819790623e-05, "loss": 1.719, "step": 3028 }, { "epoch": 0.3146359198088709, "grad_norm": 0.3710813820362091, "learning_rate": 7.749892240708494e-05, "loss": 1.7155, "step": 3029 }, { "epoch": 0.31473979432845123, "grad_norm": 0.4214909076690674, "learning_rate": 7.748529368784292e-05, "loss": 1.8792, "step": 3030 }, { "epoch": 0.31484366884803155, "grad_norm": 0.4322544038295746, "learning_rate": 7.747166204163152e-05, "loss": 1.8348, "step": 3031 }, { "epoch": 0.31494754336761194, "grad_norm": 0.38530245423316956, "learning_rate": 7.745802746990239e-05, "loss": 1.7037, "step": 3032 }, { "epoch": 0.31505141788719226, "grad_norm": 0.3923969268798828, "learning_rate": 7.744438997410752e-05, "loss": 1.6523, "step": 3033 }, { "epoch": 0.3151552924067726, "grad_norm": 0.3855164349079132, "learning_rate": 7.74307495556992e-05, "loss": 1.705, "step": 3034 }, { "epoch": 0.315259166926353, "grad_norm": 0.3986038267612457, "learning_rate": 7.741710621613002e-05, "loss": 1.9339, "step": 3035 }, { "epoch": 0.3153630414459333, "grad_norm": 0.42609813809394836, "learning_rate": 7.740345995685287e-05, "loss": 1.8712, "step": 3036 }, { "epoch": 0.3154669159655137, "grad_norm": 0.4008728563785553, "learning_rate": 7.738981077932099e-05, "loss": 1.8456, "step": 3037 }, { "epoch": 0.315570790485094, "grad_norm": 0.4060449004173279, "learning_rate": 7.737615868498793e-05, "loss": 1.7126, "step": 3038 }, { "epoch": 0.31567466500467434, "grad_norm": 0.36810052394866943, "learning_rate": 7.736250367530751e-05, "loss": 1.6292, "step": 3039 }, { "epoch": 0.3157785395242547, "grad_norm": 0.39102548360824585, "learning_rate": 7.734884575173387e-05, "loss": 1.7703, "step": 3040 }, { "epoch": 0.31588241404383505, "grad_norm": 0.4772125780582428, "learning_rate": 7.73351849157215e-05, "loss": 1.7719, "step": 3041 }, { "epoch": 0.3159862885634154, "grad_norm": 0.40599432587623596, "learning_rate": 7.732152116872515e-05, "loss": 1.6538, "step": 3042 }, { "epoch": 0.31609016308299576, "grad_norm": 0.4301255941390991, "learning_rate": 7.730785451219991e-05, "loss": 1.755, "step": 3043 }, { "epoch": 0.3161940376025761, "grad_norm": 0.386177122592926, "learning_rate": 7.729418494760119e-05, "loss": 1.5637, "step": 3044 }, { "epoch": 0.3162979121221564, "grad_norm": 0.3636477589607239, "learning_rate": 7.728051247638468e-05, "loss": 1.5991, "step": 3045 }, { "epoch": 0.3164017866417368, "grad_norm": 0.38973626494407654, "learning_rate": 7.726683710000637e-05, "loss": 1.7331, "step": 3046 }, { "epoch": 0.3165056611613171, "grad_norm": 0.42155221104621887, "learning_rate": 7.72531588199226e-05, "loss": 1.8179, "step": 3047 }, { "epoch": 0.3166095356808975, "grad_norm": 0.4188525080680847, "learning_rate": 7.723947763759e-05, "loss": 1.7464, "step": 3048 }, { "epoch": 0.31671341020047783, "grad_norm": 0.38648781180381775, "learning_rate": 7.722579355446551e-05, "loss": 1.5823, "step": 3049 }, { "epoch": 0.31681728472005816, "grad_norm": 0.4101918339729309, "learning_rate": 7.721210657200637e-05, "loss": 1.7697, "step": 3050 }, { "epoch": 0.31692115923963854, "grad_norm": 0.366359680891037, "learning_rate": 7.719841669167014e-05, "loss": 1.6861, "step": 3051 }, { "epoch": 0.31702503375921887, "grad_norm": 0.4024447202682495, "learning_rate": 7.71847239149147e-05, "loss": 1.9079, "step": 3052 }, { "epoch": 0.3171289082787992, "grad_norm": 0.3973731994628906, "learning_rate": 7.717102824319822e-05, "loss": 1.8736, "step": 3053 }, { "epoch": 0.3172327827983796, "grad_norm": 0.42864885926246643, "learning_rate": 7.715732967797915e-05, "loss": 1.7298, "step": 3054 }, { "epoch": 0.3173366573179599, "grad_norm": 0.37840789556503296, "learning_rate": 7.714362822071632e-05, "loss": 1.8096, "step": 3055 }, { "epoch": 0.31744053183754023, "grad_norm": 0.3534778654575348, "learning_rate": 7.712992387286882e-05, "loss": 1.5215, "step": 3056 }, { "epoch": 0.3175444063571206, "grad_norm": 0.37285417318344116, "learning_rate": 7.711621663589602e-05, "loss": 1.6497, "step": 3057 }, { "epoch": 0.31764828087670094, "grad_norm": 0.35734882950782776, "learning_rate": 7.71025065112577e-05, "loss": 1.6053, "step": 3058 }, { "epoch": 0.31775215539628127, "grad_norm": 0.38647857308387756, "learning_rate": 7.708879350041383e-05, "loss": 1.6968, "step": 3059 }, { "epoch": 0.31785602991586165, "grad_norm": 0.4510471522808075, "learning_rate": 7.707507760482474e-05, "loss": 1.6815, "step": 3060 }, { "epoch": 0.317959904435442, "grad_norm": 0.4056614935398102, "learning_rate": 7.706135882595108e-05, "loss": 1.6113, "step": 3061 }, { "epoch": 0.31806377895502236, "grad_norm": 0.4500080645084381, "learning_rate": 7.70476371652538e-05, "loss": 1.9964, "step": 3062 }, { "epoch": 0.3181676534746027, "grad_norm": 0.3800092935562134, "learning_rate": 7.703391262419413e-05, "loss": 1.67, "step": 3063 }, { "epoch": 0.318271527994183, "grad_norm": 0.4021591544151306, "learning_rate": 7.702018520423367e-05, "loss": 1.7399, "step": 3064 }, { "epoch": 0.3183754025137634, "grad_norm": 0.42205873131752014, "learning_rate": 7.700645490683421e-05, "loss": 1.8155, "step": 3065 }, { "epoch": 0.3184792770333437, "grad_norm": 0.3718779385089874, "learning_rate": 7.699272173345799e-05, "loss": 1.7583, "step": 3066 }, { "epoch": 0.31858315155292405, "grad_norm": 0.3647688031196594, "learning_rate": 7.697898568556745e-05, "loss": 1.5891, "step": 3067 }, { "epoch": 0.31868702607250443, "grad_norm": 0.3905600309371948, "learning_rate": 7.696524676462538e-05, "loss": 1.7358, "step": 3068 }, { "epoch": 0.31879090059208476, "grad_norm": 0.36496713757514954, "learning_rate": 7.695150497209485e-05, "loss": 1.7218, "step": 3069 }, { "epoch": 0.3188947751116651, "grad_norm": 0.4626975357532501, "learning_rate": 7.69377603094393e-05, "loss": 1.8209, "step": 3070 }, { "epoch": 0.31899864963124547, "grad_norm": 0.4146973490715027, "learning_rate": 7.692401277812236e-05, "loss": 1.6095, "step": 3071 }, { "epoch": 0.3191025241508258, "grad_norm": 0.3558516800403595, "learning_rate": 7.69102623796081e-05, "loss": 1.5576, "step": 3072 }, { "epoch": 0.3192063986704061, "grad_norm": 0.4161911606788635, "learning_rate": 7.689650911536081e-05, "loss": 1.656, "step": 3073 }, { "epoch": 0.3193102731899865, "grad_norm": 0.40754130482673645, "learning_rate": 7.688275298684509e-05, "loss": 1.7435, "step": 3074 }, { "epoch": 0.31941414770956683, "grad_norm": 0.36731454730033875, "learning_rate": 7.686899399552587e-05, "loss": 1.6421, "step": 3075 }, { "epoch": 0.3195180222291472, "grad_norm": 0.4020611047744751, "learning_rate": 7.685523214286839e-05, "loss": 1.7208, "step": 3076 }, { "epoch": 0.31962189674872754, "grad_norm": 0.48006507754325867, "learning_rate": 7.684146743033815e-05, "loss": 1.7648, "step": 3077 }, { "epoch": 0.31972577126830787, "grad_norm": 0.39849093556404114, "learning_rate": 7.6827699859401e-05, "loss": 1.751, "step": 3078 }, { "epoch": 0.31982964578788825, "grad_norm": 0.3924444615840912, "learning_rate": 7.681392943152308e-05, "loss": 1.7878, "step": 3079 }, { "epoch": 0.3199335203074686, "grad_norm": 0.4005417823791504, "learning_rate": 7.680015614817083e-05, "loss": 1.7752, "step": 3080 }, { "epoch": 0.3200373948270489, "grad_norm": 0.39548689126968384, "learning_rate": 7.678638001081102e-05, "loss": 1.7377, "step": 3081 }, { "epoch": 0.3201412693466293, "grad_norm": 0.4022079408168793, "learning_rate": 7.677260102091066e-05, "loss": 1.7732, "step": 3082 }, { "epoch": 0.3202451438662096, "grad_norm": 0.4020305573940277, "learning_rate": 7.675881917993713e-05, "loss": 1.6768, "step": 3083 }, { "epoch": 0.32034901838578994, "grad_norm": 0.374347448348999, "learning_rate": 7.674503448935808e-05, "loss": 1.59, "step": 3084 }, { "epoch": 0.3204528929053703, "grad_norm": 0.3810427784919739, "learning_rate": 7.673124695064148e-05, "loss": 1.3873, "step": 3085 }, { "epoch": 0.32055676742495065, "grad_norm": 0.43509384989738464, "learning_rate": 7.671745656525559e-05, "loss": 1.5037, "step": 3086 }, { "epoch": 0.32066064194453103, "grad_norm": 0.38209038972854614, "learning_rate": 7.670366333466899e-05, "loss": 1.6842, "step": 3087 }, { "epoch": 0.32076451646411136, "grad_norm": 0.4374238848686218, "learning_rate": 7.668986726035054e-05, "loss": 1.858, "step": 3088 }, { "epoch": 0.3208683909836917, "grad_norm": 0.4977831244468689, "learning_rate": 7.66760683437694e-05, "loss": 1.9369, "step": 3089 }, { "epoch": 0.32097226550327207, "grad_norm": 0.37174275517463684, "learning_rate": 7.666226658639507e-05, "loss": 1.6107, "step": 3090 }, { "epoch": 0.3210761400228524, "grad_norm": 0.3945876657962799, "learning_rate": 7.664846198969733e-05, "loss": 1.8063, "step": 3091 }, { "epoch": 0.3211800145424327, "grad_norm": 0.4560282230377197, "learning_rate": 7.663465455514625e-05, "loss": 2.2025, "step": 3092 }, { "epoch": 0.3212838890620131, "grad_norm": 0.3639695346355438, "learning_rate": 7.662084428421221e-05, "loss": 1.6021, "step": 3093 }, { "epoch": 0.32138776358159343, "grad_norm": 0.36369383335113525, "learning_rate": 7.660703117836591e-05, "loss": 1.6781, "step": 3094 }, { "epoch": 0.32149163810117376, "grad_norm": 0.3726049065589905, "learning_rate": 7.659321523907834e-05, "loss": 1.8033, "step": 3095 }, { "epoch": 0.32159551262075414, "grad_norm": 0.3972417116165161, "learning_rate": 7.657939646782077e-05, "loss": 1.6971, "step": 3096 }, { "epoch": 0.32169938714033447, "grad_norm": 0.4059099555015564, "learning_rate": 7.656557486606482e-05, "loss": 1.7625, "step": 3097 }, { "epoch": 0.3218032616599148, "grad_norm": 0.3809730112552643, "learning_rate": 7.655175043528235e-05, "loss": 1.79, "step": 3098 }, { "epoch": 0.3219071361794952, "grad_norm": 0.37229931354522705, "learning_rate": 7.653792317694556e-05, "loss": 1.7281, "step": 3099 }, { "epoch": 0.3220110106990755, "grad_norm": 0.40562358498573303, "learning_rate": 7.652409309252699e-05, "loss": 1.7644, "step": 3100 }, { "epoch": 0.3221148852186559, "grad_norm": 0.3879106044769287, "learning_rate": 7.651026018349938e-05, "loss": 1.815, "step": 3101 }, { "epoch": 0.3222187597382362, "grad_norm": 0.39501863718032837, "learning_rate": 7.649642445133585e-05, "loss": 1.8004, "step": 3102 }, { "epoch": 0.32232263425781654, "grad_norm": 0.42894792556762695, "learning_rate": 7.64825858975098e-05, "loss": 1.8058, "step": 3103 }, { "epoch": 0.3224265087773969, "grad_norm": 0.4245396852493286, "learning_rate": 7.646874452349491e-05, "loss": 1.7229, "step": 3104 }, { "epoch": 0.32253038329697725, "grad_norm": 0.3750874996185303, "learning_rate": 7.64549003307652e-05, "loss": 1.6437, "step": 3105 }, { "epoch": 0.3226342578165576, "grad_norm": 0.44294023513793945, "learning_rate": 7.644105332079497e-05, "loss": 1.8526, "step": 3106 }, { "epoch": 0.32273813233613796, "grad_norm": 0.4349030554294586, "learning_rate": 7.642720349505881e-05, "loss": 1.8628, "step": 3107 }, { "epoch": 0.3228420068557183, "grad_norm": 0.4126608967781067, "learning_rate": 7.641335085503161e-05, "loss": 1.7873, "step": 3108 }, { "epoch": 0.3229458813752986, "grad_norm": 0.3600351810455322, "learning_rate": 7.639949540218858e-05, "loss": 1.7672, "step": 3109 }, { "epoch": 0.323049755894879, "grad_norm": 0.38123077154159546, "learning_rate": 7.63856371380052e-05, "loss": 1.7417, "step": 3110 }, { "epoch": 0.3231536304144593, "grad_norm": 0.41736799478530884, "learning_rate": 7.63717760639573e-05, "loss": 1.855, "step": 3111 }, { "epoch": 0.3232575049340397, "grad_norm": 0.42737138271331787, "learning_rate": 7.635791218152094e-05, "loss": 1.766, "step": 3112 }, { "epoch": 0.32336137945362003, "grad_norm": 0.37469685077667236, "learning_rate": 7.634404549217254e-05, "loss": 1.6696, "step": 3113 }, { "epoch": 0.32346525397320036, "grad_norm": 0.4223494231700897, "learning_rate": 7.633017599738879e-05, "loss": 1.7205, "step": 3114 }, { "epoch": 0.32356912849278074, "grad_norm": 0.39290282130241394, "learning_rate": 7.631630369864668e-05, "loss": 1.6764, "step": 3115 }, { "epoch": 0.32367300301236107, "grad_norm": 0.38165774941444397, "learning_rate": 7.63024285974235e-05, "loss": 1.633, "step": 3116 }, { "epoch": 0.3237768775319414, "grad_norm": 0.46332311630249023, "learning_rate": 7.628855069519685e-05, "loss": 1.9059, "step": 3117 }, { "epoch": 0.3238807520515218, "grad_norm": 0.38814160227775574, "learning_rate": 7.627466999344459e-05, "loss": 1.6475, "step": 3118 }, { "epoch": 0.3239846265711021, "grad_norm": 0.44505298137664795, "learning_rate": 7.626078649364494e-05, "loss": 1.8901, "step": 3119 }, { "epoch": 0.32408850109068243, "grad_norm": 0.4025139808654785, "learning_rate": 7.624690019727636e-05, "loss": 1.62, "step": 3120 }, { "epoch": 0.3241923756102628, "grad_norm": 0.37177082896232605, "learning_rate": 7.623301110581764e-05, "loss": 1.7613, "step": 3121 }, { "epoch": 0.32429625012984314, "grad_norm": 0.38354530930519104, "learning_rate": 7.621911922074787e-05, "loss": 1.6685, "step": 3122 }, { "epoch": 0.32440012464942347, "grad_norm": 0.404633492231369, "learning_rate": 7.620522454354644e-05, "loss": 1.8067, "step": 3123 }, { "epoch": 0.32450399916900385, "grad_norm": 0.3900499641895294, "learning_rate": 7.619132707569299e-05, "loss": 1.8046, "step": 3124 }, { "epoch": 0.3246078736885842, "grad_norm": 0.4414837062358856, "learning_rate": 7.617742681866749e-05, "loss": 1.9807, "step": 3125 }, { "epoch": 0.32471174820816456, "grad_norm": 0.3875233232975006, "learning_rate": 7.616352377395025e-05, "loss": 1.7041, "step": 3126 }, { "epoch": 0.3248156227277449, "grad_norm": 0.4394824206829071, "learning_rate": 7.614961794302178e-05, "loss": 1.7103, "step": 3127 }, { "epoch": 0.3249194972473252, "grad_norm": 0.38259413838386536, "learning_rate": 7.6135709327363e-05, "loss": 1.6377, "step": 3128 }, { "epoch": 0.3250233717669056, "grad_norm": 0.4169695973396301, "learning_rate": 7.612179792845504e-05, "loss": 1.7518, "step": 3129 }, { "epoch": 0.3251272462864859, "grad_norm": 0.4079383909702301, "learning_rate": 7.610788374777935e-05, "loss": 1.6768, "step": 3130 }, { "epoch": 0.32523112080606625, "grad_norm": 0.4074368476867676, "learning_rate": 7.609396678681771e-05, "loss": 1.8424, "step": 3131 }, { "epoch": 0.32533499532564664, "grad_norm": 0.41162872314453125, "learning_rate": 7.608004704705212e-05, "loss": 1.8505, "step": 3132 }, { "epoch": 0.32543886984522696, "grad_norm": 0.41227778792381287, "learning_rate": 7.606612452996495e-05, "loss": 1.6893, "step": 3133 }, { "epoch": 0.3255427443648073, "grad_norm": 0.3721327781677246, "learning_rate": 7.605219923703886e-05, "loss": 1.7895, "step": 3134 }, { "epoch": 0.3256466188843877, "grad_norm": 0.3553870916366577, "learning_rate": 7.603827116975677e-05, "loss": 1.5966, "step": 3135 }, { "epoch": 0.325750493403968, "grad_norm": 0.35715457797050476, "learning_rate": 7.602434032960189e-05, "loss": 1.7413, "step": 3136 }, { "epoch": 0.3258543679235483, "grad_norm": 0.39415115118026733, "learning_rate": 7.601040671805779e-05, "loss": 1.3362, "step": 3137 }, { "epoch": 0.3259582424431287, "grad_norm": 0.3815273344516754, "learning_rate": 7.599647033660824e-05, "loss": 1.6745, "step": 3138 }, { "epoch": 0.32606211696270904, "grad_norm": 0.3810808062553406, "learning_rate": 7.598253118673738e-05, "loss": 1.6381, "step": 3139 }, { "epoch": 0.3261659914822894, "grad_norm": 0.43945252895355225, "learning_rate": 7.596858926992962e-05, "loss": 1.8387, "step": 3140 }, { "epoch": 0.32626986600186975, "grad_norm": 0.38324853777885437, "learning_rate": 7.595464458766968e-05, "loss": 1.6446, "step": 3141 }, { "epoch": 0.3263737405214501, "grad_norm": 0.4006134569644928, "learning_rate": 7.594069714144252e-05, "loss": 1.4691, "step": 3142 }, { "epoch": 0.32647761504103046, "grad_norm": 0.4041348993778229, "learning_rate": 7.592674693273348e-05, "loss": 1.8108, "step": 3143 }, { "epoch": 0.3265814895606108, "grad_norm": 0.380634605884552, "learning_rate": 7.591279396302812e-05, "loss": 1.7062, "step": 3144 }, { "epoch": 0.3266853640801911, "grad_norm": 0.3991457223892212, "learning_rate": 7.589883823381234e-05, "loss": 1.6658, "step": 3145 }, { "epoch": 0.3267892385997715, "grad_norm": 0.4109683334827423, "learning_rate": 7.58848797465723e-05, "loss": 1.7899, "step": 3146 }, { "epoch": 0.3268931131193518, "grad_norm": 0.38975322246551514, "learning_rate": 7.587091850279447e-05, "loss": 1.7506, "step": 3147 }, { "epoch": 0.32699698763893215, "grad_norm": 0.3871016800403595, "learning_rate": 7.585695450396564e-05, "loss": 1.7103, "step": 3148 }, { "epoch": 0.32710086215851253, "grad_norm": 0.36475732922554016, "learning_rate": 7.584298775157282e-05, "loss": 1.5979, "step": 3149 }, { "epoch": 0.32720473667809286, "grad_norm": 0.3877626359462738, "learning_rate": 7.58290182471034e-05, "loss": 1.8509, "step": 3150 }, { "epoch": 0.32730861119767324, "grad_norm": 0.36147022247314453, "learning_rate": 7.581504599204503e-05, "loss": 1.686, "step": 3151 }, { "epoch": 0.32741248571725357, "grad_norm": 0.38483577966690063, "learning_rate": 7.58010709878856e-05, "loss": 1.8448, "step": 3152 }, { "epoch": 0.3275163602368339, "grad_norm": 0.415414035320282, "learning_rate": 7.578709323611338e-05, "loss": 1.7782, "step": 3153 }, { "epoch": 0.3276202347564143, "grad_norm": 0.3928332030773163, "learning_rate": 7.577311273821687e-05, "loss": 1.7276, "step": 3154 }, { "epoch": 0.3277241092759946, "grad_norm": 0.36280107498168945, "learning_rate": 7.575912949568489e-05, "loss": 1.7094, "step": 3155 }, { "epoch": 0.32782798379557493, "grad_norm": 0.36135435104370117, "learning_rate": 7.574514351000657e-05, "loss": 1.6252, "step": 3156 }, { "epoch": 0.3279318583151553, "grad_norm": 0.38758692145347595, "learning_rate": 7.573115478267126e-05, "loss": 1.7715, "step": 3157 }, { "epoch": 0.32803573283473564, "grad_norm": 0.39279675483703613, "learning_rate": 7.571716331516869e-05, "loss": 1.71, "step": 3158 }, { "epoch": 0.32813960735431597, "grad_norm": 0.36881011724472046, "learning_rate": 7.570316910898882e-05, "loss": 1.6717, "step": 3159 }, { "epoch": 0.32824348187389635, "grad_norm": 0.3746923506259918, "learning_rate": 7.568917216562193e-05, "loss": 1.6144, "step": 3160 }, { "epoch": 0.3283473563934767, "grad_norm": 0.4102190434932709, "learning_rate": 7.56751724865586e-05, "loss": 1.7229, "step": 3161 }, { "epoch": 0.328451230913057, "grad_norm": 0.39784857630729675, "learning_rate": 7.566117007328967e-05, "loss": 1.6128, "step": 3162 }, { "epoch": 0.3285551054326374, "grad_norm": 0.37368080019950867, "learning_rate": 7.564716492730628e-05, "loss": 1.8158, "step": 3163 }, { "epoch": 0.3286589799522177, "grad_norm": 0.4041743576526642, "learning_rate": 7.56331570500999e-05, "loss": 1.5258, "step": 3164 }, { "epoch": 0.3287628544717981, "grad_norm": 0.4261610507965088, "learning_rate": 7.561914644316221e-05, "loss": 1.8395, "step": 3165 }, { "epoch": 0.3288667289913784, "grad_norm": 0.3845587372779846, "learning_rate": 7.560513310798529e-05, "loss": 1.7382, "step": 3166 }, { "epoch": 0.32897060351095875, "grad_norm": 0.37753623723983765, "learning_rate": 7.559111704606142e-05, "loss": 1.7647, "step": 3167 }, { "epoch": 0.32907447803053913, "grad_norm": 0.3923317492008209, "learning_rate": 7.557709825888319e-05, "loss": 1.6671, "step": 3168 }, { "epoch": 0.32917835255011946, "grad_norm": 0.4080863893032074, "learning_rate": 7.556307674794351e-05, "loss": 1.8564, "step": 3169 }, { "epoch": 0.3292822270696998, "grad_norm": 0.38211774826049805, "learning_rate": 7.554905251473557e-05, "loss": 1.8405, "step": 3170 }, { "epoch": 0.32938610158928017, "grad_norm": 0.3729119896888733, "learning_rate": 7.55350255607528e-05, "loss": 1.6871, "step": 3171 }, { "epoch": 0.3294899761088605, "grad_norm": 0.36250361800193787, "learning_rate": 7.5520995887489e-05, "loss": 1.6192, "step": 3172 }, { "epoch": 0.3295938506284408, "grad_norm": 0.4741998314857483, "learning_rate": 7.550696349643822e-05, "loss": 1.9924, "step": 3173 }, { "epoch": 0.3296977251480212, "grad_norm": 0.39271846413612366, "learning_rate": 7.549292838909478e-05, "loss": 1.7255, "step": 3174 }, { "epoch": 0.32980159966760153, "grad_norm": 0.4134219288825989, "learning_rate": 7.547889056695332e-05, "loss": 1.7508, "step": 3175 }, { "epoch": 0.32990547418718186, "grad_norm": 0.377145379781723, "learning_rate": 7.546485003150876e-05, "loss": 1.7239, "step": 3176 }, { "epoch": 0.33000934870676224, "grad_norm": 0.37137743830680847, "learning_rate": 7.54508067842563e-05, "loss": 1.6987, "step": 3177 }, { "epoch": 0.33011322322634257, "grad_norm": 0.36599913239479065, "learning_rate": 7.543676082669146e-05, "loss": 1.675, "step": 3178 }, { "epoch": 0.33021709774592295, "grad_norm": 0.3733290731906891, "learning_rate": 7.542271216030998e-05, "loss": 1.6841, "step": 3179 }, { "epoch": 0.3303209722655033, "grad_norm": 0.3710775375366211, "learning_rate": 7.540866078660797e-05, "loss": 1.7604, "step": 3180 }, { "epoch": 0.3304248467850836, "grad_norm": 0.4162429869174957, "learning_rate": 7.539460670708178e-05, "loss": 1.7021, "step": 3181 }, { "epoch": 0.330528721304664, "grad_norm": 0.4121178090572357, "learning_rate": 7.538054992322806e-05, "loss": 1.885, "step": 3182 }, { "epoch": 0.3306325958242443, "grad_norm": 0.3530847728252411, "learning_rate": 7.536649043654376e-05, "loss": 1.3871, "step": 3183 }, { "epoch": 0.33073647034382464, "grad_norm": 0.42063409090042114, "learning_rate": 7.535242824852608e-05, "loss": 1.7323, "step": 3184 }, { "epoch": 0.330840344863405, "grad_norm": 0.4144652485847473, "learning_rate": 7.533836336067257e-05, "loss": 1.7173, "step": 3185 }, { "epoch": 0.33094421938298535, "grad_norm": 0.3715633749961853, "learning_rate": 7.5324295774481e-05, "loss": 1.6793, "step": 3186 }, { "epoch": 0.3310480939025657, "grad_norm": 0.40452414751052856, "learning_rate": 7.531022549144946e-05, "loss": 1.7443, "step": 3187 }, { "epoch": 0.33115196842214606, "grad_norm": 0.3777833878993988, "learning_rate": 7.529615251307631e-05, "loss": 1.675, "step": 3188 }, { "epoch": 0.3312558429417264, "grad_norm": 0.3736540973186493, "learning_rate": 7.528207684086027e-05, "loss": 1.7026, "step": 3189 }, { "epoch": 0.33135971746130677, "grad_norm": 0.3794771134853363, "learning_rate": 7.526799847630024e-05, "loss": 1.6583, "step": 3190 }, { "epoch": 0.3314635919808871, "grad_norm": 0.38491761684417725, "learning_rate": 7.525391742089547e-05, "loss": 1.6339, "step": 3191 }, { "epoch": 0.3315674665004674, "grad_norm": 0.3660680055618286, "learning_rate": 7.52398336761455e-05, "loss": 1.7295, "step": 3192 }, { "epoch": 0.3316713410200478, "grad_norm": 0.4049983024597168, "learning_rate": 7.522574724355009e-05, "loss": 1.8415, "step": 3193 }, { "epoch": 0.33177521553962813, "grad_norm": 0.4031364917755127, "learning_rate": 7.52116581246094e-05, "loss": 1.8444, "step": 3194 }, { "epoch": 0.33187909005920846, "grad_norm": 0.3920493721961975, "learning_rate": 7.519756632082376e-05, "loss": 1.9211, "step": 3195 }, { "epoch": 0.33198296457878884, "grad_norm": 0.40728676319122314, "learning_rate": 7.518347183369385e-05, "loss": 1.5216, "step": 3196 }, { "epoch": 0.33208683909836917, "grad_norm": 0.3954370617866516, "learning_rate": 7.516937466472065e-05, "loss": 1.6549, "step": 3197 }, { "epoch": 0.3321907136179495, "grad_norm": 0.3614353537559509, "learning_rate": 7.515527481540536e-05, "loss": 1.6972, "step": 3198 }, { "epoch": 0.3322945881375299, "grad_norm": 0.35628658533096313, "learning_rate": 7.514117228724953e-05, "loss": 1.6622, "step": 3199 }, { "epoch": 0.3323984626571102, "grad_norm": 0.3558284342288971, "learning_rate": 7.512706708175496e-05, "loss": 1.5866, "step": 3200 }, { "epoch": 0.33250233717669053, "grad_norm": 0.3872045576572418, "learning_rate": 7.511295920042374e-05, "loss": 1.7721, "step": 3201 }, { "epoch": 0.3326062116962709, "grad_norm": 0.3917664587497711, "learning_rate": 7.509884864475825e-05, "loss": 1.7278, "step": 3202 }, { "epoch": 0.33271008621585124, "grad_norm": 0.3571791648864746, "learning_rate": 7.508473541626115e-05, "loss": 1.7481, "step": 3203 }, { "epoch": 0.3328139607354316, "grad_norm": 0.3802284002304077, "learning_rate": 7.507061951643541e-05, "loss": 1.7085, "step": 3204 }, { "epoch": 0.33291783525501195, "grad_norm": 0.41221603751182556, "learning_rate": 7.505650094678425e-05, "loss": 1.8142, "step": 3205 }, { "epoch": 0.3330217097745923, "grad_norm": 0.41970524191856384, "learning_rate": 7.504237970881118e-05, "loss": 1.7213, "step": 3206 }, { "epoch": 0.33312558429417266, "grad_norm": 0.38147467374801636, "learning_rate": 7.502825580402002e-05, "loss": 1.637, "step": 3207 }, { "epoch": 0.333229458813753, "grad_norm": 0.430268257856369, "learning_rate": 7.501412923391483e-05, "loss": 1.7966, "step": 3208 }, { "epoch": 0.3333333333333333, "grad_norm": 0.422599732875824, "learning_rate": 7.500000000000001e-05, "loss": 1.7097, "step": 3209 }, { "epoch": 0.3334372078529137, "grad_norm": 0.3947455585002899, "learning_rate": 7.498586810378019e-05, "loss": 1.6064, "step": 3210 }, { "epoch": 0.333541082372494, "grad_norm": 0.43581533432006836, "learning_rate": 7.49717335467603e-05, "loss": 1.8244, "step": 3211 }, { "epoch": 0.33364495689207435, "grad_norm": 0.42619240283966064, "learning_rate": 7.49575963304456e-05, "loss": 1.8339, "step": 3212 }, { "epoch": 0.33374883141165473, "grad_norm": 0.4488205015659332, "learning_rate": 7.494345645634156e-05, "loss": 1.6953, "step": 3213 }, { "epoch": 0.33385270593123506, "grad_norm": 0.35742267966270447, "learning_rate": 7.492931392595397e-05, "loss": 1.7246, "step": 3214 }, { "epoch": 0.3339565804508154, "grad_norm": 0.37364768981933594, "learning_rate": 7.49151687407889e-05, "loss": 1.6528, "step": 3215 }, { "epoch": 0.33406045497039577, "grad_norm": 0.3801439702510834, "learning_rate": 7.49010209023527e-05, "loss": 1.5913, "step": 3216 }, { "epoch": 0.3341643294899761, "grad_norm": 0.37618300318717957, "learning_rate": 7.488687041215202e-05, "loss": 1.609, "step": 3217 }, { "epoch": 0.3342682040095565, "grad_norm": 0.37247294187545776, "learning_rate": 7.487271727169377e-05, "loss": 1.6473, "step": 3218 }, { "epoch": 0.3343720785291368, "grad_norm": 0.3996807336807251, "learning_rate": 7.485856148248513e-05, "loss": 1.7744, "step": 3219 }, { "epoch": 0.33447595304871713, "grad_norm": 0.3898894488811493, "learning_rate": 7.484440304603362e-05, "loss": 1.8776, "step": 3220 }, { "epoch": 0.3345798275682975, "grad_norm": 0.4372991621494293, "learning_rate": 7.483024196384696e-05, "loss": 1.8293, "step": 3221 }, { "epoch": 0.33468370208787784, "grad_norm": 0.4678659439086914, "learning_rate": 7.481607823743321e-05, "loss": 1.8113, "step": 3222 }, { "epoch": 0.33478757660745817, "grad_norm": 0.38630014657974243, "learning_rate": 7.480191186830072e-05, "loss": 1.6579, "step": 3223 }, { "epoch": 0.33489145112703855, "grad_norm": 0.4188450276851654, "learning_rate": 7.478774285795805e-05, "loss": 1.9302, "step": 3224 }, { "epoch": 0.3349953256466189, "grad_norm": 0.3890570402145386, "learning_rate": 7.477357120791415e-05, "loss": 1.8298, "step": 3225 }, { "epoch": 0.3350992001661992, "grad_norm": 0.4579260051250458, "learning_rate": 7.475939691967814e-05, "loss": 1.673, "step": 3226 }, { "epoch": 0.3352030746857796, "grad_norm": 0.39062821865081787, "learning_rate": 7.47452199947595e-05, "loss": 1.7051, "step": 3227 }, { "epoch": 0.3353069492053599, "grad_norm": 0.37739062309265137, "learning_rate": 7.473104043466794e-05, "loss": 1.6151, "step": 3228 }, { "epoch": 0.3354108237249403, "grad_norm": 0.3767510950565338, "learning_rate": 7.471685824091348e-05, "loss": 1.7629, "step": 3229 }, { "epoch": 0.3355146982445206, "grad_norm": 0.41610831022262573, "learning_rate": 7.470267341500642e-05, "loss": 1.7987, "step": 3230 }, { "epoch": 0.33561857276410095, "grad_norm": 0.3895459771156311, "learning_rate": 7.468848595845733e-05, "loss": 1.5922, "step": 3231 }, { "epoch": 0.33572244728368134, "grad_norm": 0.3886757493019104, "learning_rate": 7.467429587277705e-05, "loss": 1.6534, "step": 3232 }, { "epoch": 0.33582632180326166, "grad_norm": 0.37435784935951233, "learning_rate": 7.466010315947676e-05, "loss": 1.7319, "step": 3233 }, { "epoch": 0.335930196322842, "grad_norm": 0.4168533682823181, "learning_rate": 7.464590782006782e-05, "loss": 1.8126, "step": 3234 }, { "epoch": 0.33603407084242237, "grad_norm": 0.3834715187549591, "learning_rate": 7.463170985606194e-05, "loss": 1.7701, "step": 3235 }, { "epoch": 0.3361379453620027, "grad_norm": 0.44510364532470703, "learning_rate": 7.46175092689711e-05, "loss": 1.8166, "step": 3236 }, { "epoch": 0.336241819881583, "grad_norm": 0.41382500529289246, "learning_rate": 7.460330606030754e-05, "loss": 1.9408, "step": 3237 }, { "epoch": 0.3363456944011634, "grad_norm": 0.3592391014099121, "learning_rate": 7.45891002315838e-05, "loss": 1.6185, "step": 3238 }, { "epoch": 0.33644956892074374, "grad_norm": 0.36257967352867126, "learning_rate": 7.457489178431269e-05, "loss": 1.5973, "step": 3239 }, { "epoch": 0.33655344344032406, "grad_norm": 0.368685245513916, "learning_rate": 7.45606807200073e-05, "loss": 1.5255, "step": 3240 }, { "epoch": 0.33665731795990445, "grad_norm": 0.3916224241256714, "learning_rate": 7.4546467040181e-05, "loss": 1.6482, "step": 3241 }, { "epoch": 0.33676119247948477, "grad_norm": 0.36512747406959534, "learning_rate": 7.453225074634742e-05, "loss": 1.5935, "step": 3242 }, { "epoch": 0.33686506699906515, "grad_norm": 0.40340378880500793, "learning_rate": 7.451803184002048e-05, "loss": 1.8217, "step": 3243 }, { "epoch": 0.3369689415186455, "grad_norm": 0.4013363718986511, "learning_rate": 7.450381032271442e-05, "loss": 1.698, "step": 3244 }, { "epoch": 0.3370728160382258, "grad_norm": 0.4022335708141327, "learning_rate": 7.44895861959437e-05, "loss": 1.7154, "step": 3245 }, { "epoch": 0.3371766905578062, "grad_norm": 0.4084652364253998, "learning_rate": 7.447535946122304e-05, "loss": 1.6902, "step": 3246 }, { "epoch": 0.3372805650773865, "grad_norm": 0.41394492983818054, "learning_rate": 7.446113012006756e-05, "loss": 1.6136, "step": 3247 }, { "epoch": 0.33738443959696685, "grad_norm": 0.3942053020000458, "learning_rate": 7.444689817399251e-05, "loss": 1.7797, "step": 3248 }, { "epoch": 0.33748831411654723, "grad_norm": 0.41565534472465515, "learning_rate": 7.443266362451349e-05, "loss": 1.7013, "step": 3249 }, { "epoch": 0.33759218863612755, "grad_norm": 0.3893018960952759, "learning_rate": 7.441842647314638e-05, "loss": 1.7065, "step": 3250 }, { "epoch": 0.3376960631557079, "grad_norm": 0.4059962034225464, "learning_rate": 7.440418672140733e-05, "loss": 1.7509, "step": 3251 }, { "epoch": 0.33779993767528826, "grad_norm": 0.4266146719455719, "learning_rate": 7.438994437081272e-05, "loss": 1.7629, "step": 3252 }, { "epoch": 0.3379038121948686, "grad_norm": 0.37736377120018005, "learning_rate": 7.437569942287932e-05, "loss": 1.8509, "step": 3253 }, { "epoch": 0.3380076867144489, "grad_norm": 0.3954285681247711, "learning_rate": 7.436145187912406e-05, "loss": 1.6512, "step": 3254 }, { "epoch": 0.3381115612340293, "grad_norm": 0.3633100092411041, "learning_rate": 7.43472017410642e-05, "loss": 1.5277, "step": 3255 }, { "epoch": 0.33821543575360963, "grad_norm": 0.3525071144104004, "learning_rate": 7.433294901021727e-05, "loss": 1.5944, "step": 3256 }, { "epoch": 0.33831931027319, "grad_norm": 0.4135834574699402, "learning_rate": 7.431869368810106e-05, "loss": 1.6653, "step": 3257 }, { "epoch": 0.33842318479277034, "grad_norm": 0.40316396951675415, "learning_rate": 7.430443577623369e-05, "loss": 1.7842, "step": 3258 }, { "epoch": 0.33852705931235066, "grad_norm": 0.3851187825202942, "learning_rate": 7.429017527613347e-05, "loss": 1.721, "step": 3259 }, { "epoch": 0.33863093383193105, "grad_norm": 0.4158236086368561, "learning_rate": 7.427591218931906e-05, "loss": 1.9518, "step": 3260 }, { "epoch": 0.3387348083515114, "grad_norm": 0.3790571689605713, "learning_rate": 7.426164651730937e-05, "loss": 1.5893, "step": 3261 }, { "epoch": 0.3388386828710917, "grad_norm": 0.37710022926330566, "learning_rate": 7.424737826162358e-05, "loss": 1.6549, "step": 3262 }, { "epoch": 0.3389425573906721, "grad_norm": 0.3882453441619873, "learning_rate": 7.423310742378113e-05, "loss": 1.6472, "step": 3263 }, { "epoch": 0.3390464319102524, "grad_norm": 0.39669013023376465, "learning_rate": 7.421883400530176e-05, "loss": 1.7416, "step": 3264 }, { "epoch": 0.33915030642983274, "grad_norm": 0.41558176279067993, "learning_rate": 7.42045580077055e-05, "loss": 1.6492, "step": 3265 }, { "epoch": 0.3392541809494131, "grad_norm": 0.4130480885505676, "learning_rate": 7.41902794325126e-05, "loss": 1.7518, "step": 3266 }, { "epoch": 0.33935805546899345, "grad_norm": 0.398885577917099, "learning_rate": 7.417599828124363e-05, "loss": 1.6747, "step": 3267 }, { "epoch": 0.33946192998857383, "grad_norm": 0.39864227175712585, "learning_rate": 7.416171455541943e-05, "loss": 1.7727, "step": 3268 }, { "epoch": 0.33956580450815416, "grad_norm": 0.4171668589115143, "learning_rate": 7.414742825656111e-05, "loss": 1.6616, "step": 3269 }, { "epoch": 0.3396696790277345, "grad_norm": 0.379443883895874, "learning_rate": 7.413313938619003e-05, "loss": 1.8302, "step": 3270 }, { "epoch": 0.33977355354731487, "grad_norm": 0.3963342607021332, "learning_rate": 7.411884794582784e-05, "loss": 1.7629, "step": 3271 }, { "epoch": 0.3398774280668952, "grad_norm": 0.3690603971481323, "learning_rate": 7.410455393699647e-05, "loss": 1.6549, "step": 3272 }, { "epoch": 0.3399813025864755, "grad_norm": 0.36229610443115234, "learning_rate": 7.409025736121816e-05, "loss": 1.6949, "step": 3273 }, { "epoch": 0.3400851771060559, "grad_norm": 0.3683745265007019, "learning_rate": 7.407595822001532e-05, "loss": 1.8114, "step": 3274 }, { "epoch": 0.34018905162563623, "grad_norm": 0.3948541283607483, "learning_rate": 7.406165651491075e-05, "loss": 1.6883, "step": 3275 }, { "epoch": 0.34029292614521656, "grad_norm": 0.4150826036930084, "learning_rate": 7.404735224742745e-05, "loss": 1.7254, "step": 3276 }, { "epoch": 0.34039680066479694, "grad_norm": 0.4049845337867737, "learning_rate": 7.40330454190887e-05, "loss": 1.6418, "step": 3277 }, { "epoch": 0.34050067518437727, "grad_norm": 0.37728235125541687, "learning_rate": 7.401873603141809e-05, "loss": 1.8138, "step": 3278 }, { "epoch": 0.3406045497039576, "grad_norm": 0.4334777891635895, "learning_rate": 7.400442408593944e-05, "loss": 1.9727, "step": 3279 }, { "epoch": 0.340708424223538, "grad_norm": 0.395805299282074, "learning_rate": 7.399010958417684e-05, "loss": 1.7511, "step": 3280 }, { "epoch": 0.3408122987431183, "grad_norm": 0.3635464310646057, "learning_rate": 7.397579252765475e-05, "loss": 1.7119, "step": 3281 }, { "epoch": 0.3409161732626987, "grad_norm": 0.3630931079387665, "learning_rate": 7.396147291789775e-05, "loss": 1.7253, "step": 3282 }, { "epoch": 0.341020047782279, "grad_norm": 0.37721481919288635, "learning_rate": 7.394715075643078e-05, "loss": 1.7831, "step": 3283 }, { "epoch": 0.34112392230185934, "grad_norm": 0.41346338391304016, "learning_rate": 7.393282604477907e-05, "loss": 1.8239, "step": 3284 }, { "epoch": 0.3412277968214397, "grad_norm": 0.3926272392272949, "learning_rate": 7.391849878446805e-05, "loss": 1.8158, "step": 3285 }, { "epoch": 0.34133167134102005, "grad_norm": 0.37988725304603577, "learning_rate": 7.39041689770235e-05, "loss": 1.6549, "step": 3286 }, { "epoch": 0.3414355458606004, "grad_norm": 0.46685877442359924, "learning_rate": 7.388983662397139e-05, "loss": 1.8314, "step": 3287 }, { "epoch": 0.34153942038018076, "grad_norm": 0.38952314853668213, "learning_rate": 7.387550172683803e-05, "loss": 1.9213, "step": 3288 }, { "epoch": 0.3416432948997611, "grad_norm": 0.3613092005252838, "learning_rate": 7.386116428715e-05, "loss": 1.7343, "step": 3289 }, { "epoch": 0.3417471694193414, "grad_norm": 0.3758079707622528, "learning_rate": 7.384682430643409e-05, "loss": 1.6233, "step": 3290 }, { "epoch": 0.3418510439389218, "grad_norm": 0.36782777309417725, "learning_rate": 7.383248178621739e-05, "loss": 1.7206, "step": 3291 }, { "epoch": 0.3419549184585021, "grad_norm": 0.36162370443344116, "learning_rate": 7.381813672802729e-05, "loss": 1.6387, "step": 3292 }, { "epoch": 0.34205879297808245, "grad_norm": 0.3870641887187958, "learning_rate": 7.380378913339142e-05, "loss": 1.7021, "step": 3293 }, { "epoch": 0.34216266749766283, "grad_norm": 0.366452157497406, "learning_rate": 7.378943900383766e-05, "loss": 1.6213, "step": 3294 }, { "epoch": 0.34226654201724316, "grad_norm": 0.40911996364593506, "learning_rate": 7.377508634089422e-05, "loss": 1.8177, "step": 3295 }, { "epoch": 0.34237041653682354, "grad_norm": 0.396062433719635, "learning_rate": 7.376073114608955e-05, "loss": 1.7282, "step": 3296 }, { "epoch": 0.34247429105640387, "grad_norm": 0.4013812839984894, "learning_rate": 7.374637342095236e-05, "loss": 1.7302, "step": 3297 }, { "epoch": 0.3425781655759842, "grad_norm": 0.37226590514183044, "learning_rate": 7.373201316701162e-05, "loss": 1.6312, "step": 3298 }, { "epoch": 0.3426820400955646, "grad_norm": 0.39679157733917236, "learning_rate": 7.371765038579658e-05, "loss": 1.6591, "step": 3299 }, { "epoch": 0.3427859146151449, "grad_norm": 0.4248996078968048, "learning_rate": 7.37032850788368e-05, "loss": 1.8358, "step": 3300 }, { "epoch": 0.34288978913472523, "grad_norm": 0.37451761960983276, "learning_rate": 7.368891724766204e-05, "loss": 1.7225, "step": 3301 }, { "epoch": 0.3429936636543056, "grad_norm": 0.38723859190940857, "learning_rate": 7.367454689380238e-05, "loss": 1.5813, "step": 3302 }, { "epoch": 0.34309753817388594, "grad_norm": 0.4239073395729065, "learning_rate": 7.366017401878813e-05, "loss": 1.8608, "step": 3303 }, { "epoch": 0.34320141269346627, "grad_norm": 0.39057475328445435, "learning_rate": 7.364579862414993e-05, "loss": 1.7135, "step": 3304 }, { "epoch": 0.34330528721304665, "grad_norm": 0.4171145558357239, "learning_rate": 7.36314207114186e-05, "loss": 1.8721, "step": 3305 }, { "epoch": 0.343409161732627, "grad_norm": 0.4564177095890045, "learning_rate": 7.36170402821253e-05, "loss": 1.6559, "step": 3306 }, { "epoch": 0.34351303625220736, "grad_norm": 0.4414724111557007, "learning_rate": 7.360265733780142e-05, "loss": 1.7733, "step": 3307 }, { "epoch": 0.3436169107717877, "grad_norm": 0.35561293363571167, "learning_rate": 7.358827187997867e-05, "loss": 1.5218, "step": 3308 }, { "epoch": 0.343720785291368, "grad_norm": 0.41624075174331665, "learning_rate": 7.357388391018893e-05, "loss": 1.6683, "step": 3309 }, { "epoch": 0.3438246598109484, "grad_norm": 0.38379234075546265, "learning_rate": 7.355949342996447e-05, "loss": 1.7642, "step": 3310 }, { "epoch": 0.3439285343305287, "grad_norm": 0.40274274349212646, "learning_rate": 7.35451004408377e-05, "loss": 1.7529, "step": 3311 }, { "epoch": 0.34403240885010905, "grad_norm": 0.4023614525794983, "learning_rate": 7.353070494434141e-05, "loss": 1.77, "step": 3312 }, { "epoch": 0.34413628336968943, "grad_norm": 0.41878741979599, "learning_rate": 7.351630694200857e-05, "loss": 1.7528, "step": 3313 }, { "epoch": 0.34424015788926976, "grad_norm": 0.3700146973133087, "learning_rate": 7.35019064353725e-05, "loss": 1.677, "step": 3314 }, { "epoch": 0.3443440324088501, "grad_norm": 0.44311147928237915, "learning_rate": 7.348750342596671e-05, "loss": 1.9235, "step": 3315 }, { "epoch": 0.34444790692843047, "grad_norm": 0.4150424599647522, "learning_rate": 7.3473097915325e-05, "loss": 1.6732, "step": 3316 }, { "epoch": 0.3445517814480108, "grad_norm": 0.3866238594055176, "learning_rate": 7.345868990498146e-05, "loss": 1.6598, "step": 3317 }, { "epoch": 0.3446556559675911, "grad_norm": 0.42701780796051025, "learning_rate": 7.344427939647045e-05, "loss": 1.7695, "step": 3318 }, { "epoch": 0.3447595304871715, "grad_norm": 0.3794342577457428, "learning_rate": 7.342986639132654e-05, "loss": 1.6552, "step": 3319 }, { "epoch": 0.34486340500675183, "grad_norm": 0.4094541668891907, "learning_rate": 7.341545089108464e-05, "loss": 1.9607, "step": 3320 }, { "epoch": 0.3449672795263322, "grad_norm": 0.4172968566417694, "learning_rate": 7.340103289727983e-05, "loss": 1.7933, "step": 3321 }, { "epoch": 0.34507115404591254, "grad_norm": 0.402832955121994, "learning_rate": 7.338661241144758e-05, "loss": 1.8795, "step": 3322 }, { "epoch": 0.34517502856549287, "grad_norm": 0.38485434651374817, "learning_rate": 7.337218943512353e-05, "loss": 1.764, "step": 3323 }, { "epoch": 0.34527890308507325, "grad_norm": 0.4096323847770691, "learning_rate": 7.335776396984361e-05, "loss": 1.814, "step": 3324 }, { "epoch": 0.3453827776046536, "grad_norm": 0.347913920879364, "learning_rate": 7.334333601714403e-05, "loss": 1.5926, "step": 3325 }, { "epoch": 0.3454866521242339, "grad_norm": 0.404429167509079, "learning_rate": 7.332890557856125e-05, "loss": 1.8246, "step": 3326 }, { "epoch": 0.3455905266438143, "grad_norm": 0.3735859990119934, "learning_rate": 7.331447265563197e-05, "loss": 1.5425, "step": 3327 }, { "epoch": 0.3456944011633946, "grad_norm": 0.35603833198547363, "learning_rate": 7.330003724989324e-05, "loss": 1.5314, "step": 3328 }, { "epoch": 0.34579827568297494, "grad_norm": 0.3887482285499573, "learning_rate": 7.328559936288228e-05, "loss": 1.688, "step": 3329 }, { "epoch": 0.3459021502025553, "grad_norm": 0.4250495731830597, "learning_rate": 7.327115899613662e-05, "loss": 1.8245, "step": 3330 }, { "epoch": 0.34600602472213565, "grad_norm": 0.36763063073158264, "learning_rate": 7.325671615119407e-05, "loss": 1.4173, "step": 3331 }, { "epoch": 0.34610989924171603, "grad_norm": 0.3886317312717438, "learning_rate": 7.324227082959264e-05, "loss": 1.7182, "step": 3332 }, { "epoch": 0.34621377376129636, "grad_norm": 0.4716704487800598, "learning_rate": 7.322782303287067e-05, "loss": 1.6838, "step": 3333 }, { "epoch": 0.3463176482808767, "grad_norm": 0.3932298719882965, "learning_rate": 7.321337276256674e-05, "loss": 1.8532, "step": 3334 }, { "epoch": 0.34642152280045707, "grad_norm": 0.4240279793739319, "learning_rate": 7.319892002021966e-05, "loss": 1.7919, "step": 3335 }, { "epoch": 0.3465253973200374, "grad_norm": 0.41877415776252747, "learning_rate": 7.318446480736857e-05, "loss": 1.6985, "step": 3336 }, { "epoch": 0.3466292718396177, "grad_norm": 0.45144563913345337, "learning_rate": 7.317000712555283e-05, "loss": 1.9947, "step": 3337 }, { "epoch": 0.3467331463591981, "grad_norm": 0.3652666509151459, "learning_rate": 7.315554697631205e-05, "loss": 1.7206, "step": 3338 }, { "epoch": 0.34683702087877843, "grad_norm": 0.3598730266094208, "learning_rate": 7.314108436118614e-05, "loss": 1.6871, "step": 3339 }, { "epoch": 0.34694089539835876, "grad_norm": 0.3652264177799225, "learning_rate": 7.312661928171525e-05, "loss": 1.5859, "step": 3340 }, { "epoch": 0.34704476991793914, "grad_norm": 0.36526259779930115, "learning_rate": 7.311215173943982e-05, "loss": 1.5721, "step": 3341 }, { "epoch": 0.34714864443751947, "grad_norm": 0.3666329085826874, "learning_rate": 7.309768173590049e-05, "loss": 1.6762, "step": 3342 }, { "epoch": 0.3472525189570998, "grad_norm": 0.39144429564476013, "learning_rate": 7.308320927263823e-05, "loss": 1.6709, "step": 3343 }, { "epoch": 0.3473563934766802, "grad_norm": 0.41937124729156494, "learning_rate": 7.306873435119423e-05, "loss": 1.7338, "step": 3344 }, { "epoch": 0.3474602679962605, "grad_norm": 0.38105306029319763, "learning_rate": 7.305425697311e-05, "loss": 1.5917, "step": 3345 }, { "epoch": 0.3475641425158409, "grad_norm": 0.3741595149040222, "learning_rate": 7.30397771399272e-05, "loss": 1.6511, "step": 3346 }, { "epoch": 0.3476680170354212, "grad_norm": 0.399387389421463, "learning_rate": 7.302529485318786e-05, "loss": 1.6948, "step": 3347 }, { "epoch": 0.34777189155500154, "grad_norm": 0.3723578155040741, "learning_rate": 7.301081011443423e-05, "loss": 1.6319, "step": 3348 }, { "epoch": 0.3478757660745819, "grad_norm": 0.37226778268814087, "learning_rate": 7.29963229252088e-05, "loss": 1.7856, "step": 3349 }, { "epoch": 0.34797964059416225, "grad_norm": 0.38649681210517883, "learning_rate": 7.298183328705436e-05, "loss": 1.65, "step": 3350 }, { "epoch": 0.3480835151137426, "grad_norm": 0.3898165822029114, "learning_rate": 7.296734120151394e-05, "loss": 1.8236, "step": 3351 }, { "epoch": 0.34818738963332296, "grad_norm": 0.37378284335136414, "learning_rate": 7.295284667013083e-05, "loss": 1.5903, "step": 3352 }, { "epoch": 0.3482912641529033, "grad_norm": 0.39997485280036926, "learning_rate": 7.293834969444861e-05, "loss": 1.8107, "step": 3353 }, { "epoch": 0.3483951386724836, "grad_norm": 0.37795597314834595, "learning_rate": 7.292385027601105e-05, "loss": 1.7677, "step": 3354 }, { "epoch": 0.348499013192064, "grad_norm": 0.3679714500904083, "learning_rate": 7.290934841636225e-05, "loss": 1.6464, "step": 3355 }, { "epoch": 0.3486028877116443, "grad_norm": 0.3939383029937744, "learning_rate": 7.289484411704656e-05, "loss": 1.7565, "step": 3356 }, { "epoch": 0.34870676223122465, "grad_norm": 0.38755881786346436, "learning_rate": 7.288033737960855e-05, "loss": 1.5508, "step": 3357 }, { "epoch": 0.34881063675080504, "grad_norm": 0.3855699598789215, "learning_rate": 7.286582820559308e-05, "loss": 1.6979, "step": 3358 }, { "epoch": 0.34891451127038536, "grad_norm": 0.37389200925827026, "learning_rate": 7.285131659654527e-05, "loss": 1.703, "step": 3359 }, { "epoch": 0.34901838578996575, "grad_norm": 0.3579116761684418, "learning_rate": 7.283680255401049e-05, "loss": 1.6348, "step": 3360 }, { "epoch": 0.3491222603095461, "grad_norm": 0.39986559748649597, "learning_rate": 7.282228607953436e-05, "loss": 1.7792, "step": 3361 }, { "epoch": 0.3492261348291264, "grad_norm": 0.3745286166667938, "learning_rate": 7.28077671746628e-05, "loss": 1.7119, "step": 3362 }, { "epoch": 0.3493300093487068, "grad_norm": 0.45481473207473755, "learning_rate": 7.279324584094194e-05, "loss": 1.8279, "step": 3363 }, { "epoch": 0.3494338838682871, "grad_norm": 0.42257899045944214, "learning_rate": 7.277872207991818e-05, "loss": 1.6366, "step": 3364 }, { "epoch": 0.34953775838786744, "grad_norm": 0.3691715896129608, "learning_rate": 7.276419589313821e-05, "loss": 1.6741, "step": 3365 }, { "epoch": 0.3496416329074478, "grad_norm": 0.36931112408638, "learning_rate": 7.274966728214895e-05, "loss": 1.7447, "step": 3366 }, { "epoch": 0.34974550742702815, "grad_norm": 0.4134025573730469, "learning_rate": 7.273513624849757e-05, "loss": 1.9171, "step": 3367 }, { "epoch": 0.3498493819466085, "grad_norm": 0.4000683128833771, "learning_rate": 7.272060279373152e-05, "loss": 1.7925, "step": 3368 }, { "epoch": 0.34995325646618886, "grad_norm": 0.3902685344219208, "learning_rate": 7.27060669193985e-05, "loss": 1.6959, "step": 3369 }, { "epoch": 0.3500571309857692, "grad_norm": 0.40209463238716125, "learning_rate": 7.269152862704647e-05, "loss": 1.7667, "step": 3370 }, { "epoch": 0.35016100550534957, "grad_norm": 0.46758002042770386, "learning_rate": 7.267698791822363e-05, "loss": 1.8386, "step": 3371 }, { "epoch": 0.3502648800249299, "grad_norm": 0.4505844712257385, "learning_rate": 7.266244479447847e-05, "loss": 1.9704, "step": 3372 }, { "epoch": 0.3503687545445102, "grad_norm": 0.4215162694454193, "learning_rate": 7.264789925735974e-05, "loss": 1.9084, "step": 3373 }, { "epoch": 0.3504726290640906, "grad_norm": 0.4192196726799011, "learning_rate": 7.263335130841638e-05, "loss": 1.7802, "step": 3374 }, { "epoch": 0.35057650358367093, "grad_norm": 0.4384503662586212, "learning_rate": 7.261880094919765e-05, "loss": 1.6945, "step": 3375 }, { "epoch": 0.35068037810325126, "grad_norm": 0.4039691984653473, "learning_rate": 7.260424818125305e-05, "loss": 1.7436, "step": 3376 }, { "epoch": 0.35078425262283164, "grad_norm": 0.3905966579914093, "learning_rate": 7.258969300613233e-05, "loss": 1.7358, "step": 3377 }, { "epoch": 0.35088812714241197, "grad_norm": 0.4261592626571655, "learning_rate": 7.257513542538553e-05, "loss": 1.8172, "step": 3378 }, { "epoch": 0.3509920016619923, "grad_norm": 0.39150550961494446, "learning_rate": 7.25605754405629e-05, "loss": 1.6052, "step": 3379 }, { "epoch": 0.3510958761815727, "grad_norm": 0.3744257092475891, "learning_rate": 7.254601305321496e-05, "loss": 1.7486, "step": 3380 }, { "epoch": 0.351199750701153, "grad_norm": 0.36264219880104065, "learning_rate": 7.253144826489249e-05, "loss": 1.6068, "step": 3381 }, { "epoch": 0.35130362522073333, "grad_norm": 0.35563141107559204, "learning_rate": 7.251688107714654e-05, "loss": 1.6542, "step": 3382 }, { "epoch": 0.3514074997403137, "grad_norm": 0.39678165316581726, "learning_rate": 7.250231149152838e-05, "loss": 1.8309, "step": 3383 }, { "epoch": 0.35151137425989404, "grad_norm": 0.3613341748714447, "learning_rate": 7.248773950958958e-05, "loss": 1.735, "step": 3384 }, { "epoch": 0.3516152487794744, "grad_norm": 0.3558713495731354, "learning_rate": 7.247316513288192e-05, "loss": 1.5545, "step": 3385 }, { "epoch": 0.35171912329905475, "grad_norm": 0.3634355366230011, "learning_rate": 7.245858836295749e-05, "loss": 1.748, "step": 3386 }, { "epoch": 0.3518229978186351, "grad_norm": 0.3754010796546936, "learning_rate": 7.244400920136858e-05, "loss": 1.703, "step": 3387 }, { "epoch": 0.35192687233821546, "grad_norm": 0.380953848361969, "learning_rate": 7.242942764966776e-05, "loss": 1.6779, "step": 3388 }, { "epoch": 0.3520307468577958, "grad_norm": 0.4804019033908844, "learning_rate": 7.241484370940784e-05, "loss": 1.7014, "step": 3389 }, { "epoch": 0.3521346213773761, "grad_norm": 0.41277310252189636, "learning_rate": 7.240025738214192e-05, "loss": 1.6532, "step": 3390 }, { "epoch": 0.3522384958969565, "grad_norm": 0.4273497760295868, "learning_rate": 7.238566866942332e-05, "loss": 1.9818, "step": 3391 }, { "epoch": 0.3523423704165368, "grad_norm": 0.38524994254112244, "learning_rate": 7.23710775728056e-05, "loss": 1.7702, "step": 3392 }, { "epoch": 0.35244624493611715, "grad_norm": 0.37887367606163025, "learning_rate": 7.235648409384263e-05, "loss": 1.6614, "step": 3393 }, { "epoch": 0.35255011945569753, "grad_norm": 0.3723335564136505, "learning_rate": 7.234188823408851e-05, "loss": 1.6692, "step": 3394 }, { "epoch": 0.35265399397527786, "grad_norm": 0.38009339570999146, "learning_rate": 7.232728999509754e-05, "loss": 1.7263, "step": 3395 }, { "epoch": 0.3527578684948582, "grad_norm": 0.4187556505203247, "learning_rate": 7.231268937842438e-05, "loss": 1.7052, "step": 3396 }, { "epoch": 0.35286174301443857, "grad_norm": 0.3810417056083679, "learning_rate": 7.229808638562381e-05, "loss": 1.659, "step": 3397 }, { "epoch": 0.3529656175340189, "grad_norm": 0.41524961590766907, "learning_rate": 7.228348101825099e-05, "loss": 1.5006, "step": 3398 }, { "epoch": 0.3530694920535993, "grad_norm": 0.38853874802589417, "learning_rate": 7.226887327786125e-05, "loss": 1.8323, "step": 3399 }, { "epoch": 0.3531733665731796, "grad_norm": 0.3678146004676819, "learning_rate": 7.22542631660102e-05, "loss": 1.6357, "step": 3400 }, { "epoch": 0.35327724109275993, "grad_norm": 0.3867931365966797, "learning_rate": 7.223965068425376e-05, "loss": 1.7819, "step": 3401 }, { "epoch": 0.3533811156123403, "grad_norm": 0.37931546568870544, "learning_rate": 7.222503583414795e-05, "loss": 1.6478, "step": 3402 }, { "epoch": 0.35348499013192064, "grad_norm": 0.43002453446388245, "learning_rate": 7.22104186172492e-05, "loss": 1.8901, "step": 3403 }, { "epoch": 0.35358886465150097, "grad_norm": 0.36890125274658203, "learning_rate": 7.219579903511412e-05, "loss": 1.7524, "step": 3404 }, { "epoch": 0.35369273917108135, "grad_norm": 0.4097067415714264, "learning_rate": 7.218117708929957e-05, "loss": 1.6646, "step": 3405 }, { "epoch": 0.3537966136906617, "grad_norm": 0.47631388902664185, "learning_rate": 7.216655278136269e-05, "loss": 1.8199, "step": 3406 }, { "epoch": 0.353900488210242, "grad_norm": 0.4068619906902313, "learning_rate": 7.215192611286083e-05, "loss": 1.5446, "step": 3407 }, { "epoch": 0.3540043627298224, "grad_norm": 0.36159148812294006, "learning_rate": 7.213729708535164e-05, "loss": 1.4702, "step": 3408 }, { "epoch": 0.3541082372494027, "grad_norm": 0.39345964789390564, "learning_rate": 7.212266570039299e-05, "loss": 1.7324, "step": 3409 }, { "epoch": 0.3542121117689831, "grad_norm": 0.4041385352611542, "learning_rate": 7.2108031959543e-05, "loss": 1.7712, "step": 3410 }, { "epoch": 0.3543159862885634, "grad_norm": 0.40899065136909485, "learning_rate": 7.209339586436005e-05, "loss": 1.5812, "step": 3411 }, { "epoch": 0.35441986080814375, "grad_norm": 0.37464478611946106, "learning_rate": 7.20787574164028e-05, "loss": 1.4846, "step": 3412 }, { "epoch": 0.35452373532772413, "grad_norm": 0.41557177901268005, "learning_rate": 7.206411661723007e-05, "loss": 1.7985, "step": 3413 }, { "epoch": 0.35462760984730446, "grad_norm": 0.4185909032821655, "learning_rate": 7.204947346840106e-05, "loss": 1.7178, "step": 3414 }, { "epoch": 0.3547314843668848, "grad_norm": 0.40856650471687317, "learning_rate": 7.203482797147512e-05, "loss": 1.5386, "step": 3415 }, { "epoch": 0.35483535888646517, "grad_norm": 0.40840035676956177, "learning_rate": 7.202018012801185e-05, "loss": 1.8462, "step": 3416 }, { "epoch": 0.3549392334060455, "grad_norm": 0.4034786820411682, "learning_rate": 7.200552993957117e-05, "loss": 1.6743, "step": 3417 }, { "epoch": 0.3550431079256258, "grad_norm": 0.39444810152053833, "learning_rate": 7.199087740771321e-05, "loss": 1.6801, "step": 3418 }, { "epoch": 0.3551469824452062, "grad_norm": 0.4487433433532715, "learning_rate": 7.197622253399833e-05, "loss": 1.9052, "step": 3419 }, { "epoch": 0.35525085696478653, "grad_norm": 0.37087568640708923, "learning_rate": 7.196156531998718e-05, "loss": 1.6895, "step": 3420 }, { "epoch": 0.35535473148436686, "grad_norm": 0.3976334035396576, "learning_rate": 7.194690576724064e-05, "loss": 1.6898, "step": 3421 }, { "epoch": 0.35545860600394724, "grad_norm": 0.3944483697414398, "learning_rate": 7.19322438773198e-05, "loss": 1.7735, "step": 3422 }, { "epoch": 0.35556248052352757, "grad_norm": 0.4049598276615143, "learning_rate": 7.191757965178609e-05, "loss": 1.7066, "step": 3423 }, { "epoch": 0.35566635504310795, "grad_norm": 0.3984638452529907, "learning_rate": 7.19029130922011e-05, "loss": 1.622, "step": 3424 }, { "epoch": 0.3557702295626883, "grad_norm": 0.37969374656677246, "learning_rate": 7.188824420012671e-05, "loss": 1.5406, "step": 3425 }, { "epoch": 0.3558741040822686, "grad_norm": 0.39994993805885315, "learning_rate": 7.187357297712506e-05, "loss": 1.6681, "step": 3426 }, { "epoch": 0.355977978601849, "grad_norm": 0.38335686922073364, "learning_rate": 7.185889942475851e-05, "loss": 1.7488, "step": 3427 }, { "epoch": 0.3560818531214293, "grad_norm": 0.37399354577064514, "learning_rate": 7.184422354458966e-05, "loss": 1.6041, "step": 3428 }, { "epoch": 0.35618572764100964, "grad_norm": 0.3858386278152466, "learning_rate": 7.182954533818142e-05, "loss": 1.7968, "step": 3429 }, { "epoch": 0.35628960216059, "grad_norm": 0.39342349767684937, "learning_rate": 7.181486480709687e-05, "loss": 1.7446, "step": 3430 }, { "epoch": 0.35639347668017035, "grad_norm": 0.3939875364303589, "learning_rate": 7.180018195289937e-05, "loss": 1.7684, "step": 3431 }, { "epoch": 0.3564973511997507, "grad_norm": 0.3789867162704468, "learning_rate": 7.178549677715254e-05, "loss": 1.7591, "step": 3432 }, { "epoch": 0.35660122571933106, "grad_norm": 0.41745778918266296, "learning_rate": 7.177080928142024e-05, "loss": 1.8228, "step": 3433 }, { "epoch": 0.3567051002389114, "grad_norm": 0.3856961727142334, "learning_rate": 7.175611946726657e-05, "loss": 1.7026, "step": 3434 }, { "epoch": 0.3568089747584917, "grad_norm": 0.36199331283569336, "learning_rate": 7.174142733625589e-05, "loss": 1.4263, "step": 3435 }, { "epoch": 0.3569128492780721, "grad_norm": 0.38251036405563354, "learning_rate": 7.172673288995277e-05, "loss": 1.6076, "step": 3436 }, { "epoch": 0.3570167237976524, "grad_norm": 0.3627626895904541, "learning_rate": 7.171203612992209e-05, "loss": 1.7049, "step": 3437 }, { "epoch": 0.3571205983172328, "grad_norm": 0.37557271122932434, "learning_rate": 7.169733705772891e-05, "loss": 1.5924, "step": 3438 }, { "epoch": 0.35722447283681313, "grad_norm": 0.4099326729774475, "learning_rate": 7.168263567493859e-05, "loss": 1.8338, "step": 3439 }, { "epoch": 0.35732834735639346, "grad_norm": 0.3865790367126465, "learning_rate": 7.16679319831167e-05, "loss": 1.8652, "step": 3440 }, { "epoch": 0.35743222187597384, "grad_norm": 0.3986462950706482, "learning_rate": 7.165322598382905e-05, "loss": 1.8941, "step": 3441 }, { "epoch": 0.35753609639555417, "grad_norm": 0.3964523673057556, "learning_rate": 7.163851767864176e-05, "loss": 1.8183, "step": 3442 }, { "epoch": 0.3576399709151345, "grad_norm": 0.4160378575325012, "learning_rate": 7.162380706912112e-05, "loss": 1.7119, "step": 3443 }, { "epoch": 0.3577438454347149, "grad_norm": 0.3585418462753296, "learning_rate": 7.16090941568337e-05, "loss": 1.5949, "step": 3444 }, { "epoch": 0.3578477199542952, "grad_norm": 0.3526414930820465, "learning_rate": 7.159437894334629e-05, "loss": 1.5768, "step": 3445 }, { "epoch": 0.35795159447387553, "grad_norm": 0.4531826674938202, "learning_rate": 7.157966143022599e-05, "loss": 1.7067, "step": 3446 }, { "epoch": 0.3580554689934559, "grad_norm": 0.3793008625507355, "learning_rate": 7.156494161904005e-05, "loss": 1.6634, "step": 3447 }, { "epoch": 0.35815934351303624, "grad_norm": 0.3665551245212555, "learning_rate": 7.155021951135605e-05, "loss": 1.6025, "step": 3448 }, { "epoch": 0.3582632180326166, "grad_norm": 0.4132823646068573, "learning_rate": 7.153549510874178e-05, "loss": 1.709, "step": 3449 }, { "epoch": 0.35836709255219695, "grad_norm": 0.38809409737586975, "learning_rate": 7.152076841276527e-05, "loss": 1.6638, "step": 3450 }, { "epoch": 0.3584709670717773, "grad_norm": 0.388092964887619, "learning_rate": 7.150603942499478e-05, "loss": 1.7261, "step": 3451 }, { "epoch": 0.35857484159135766, "grad_norm": 0.4031296372413635, "learning_rate": 7.149130814699884e-05, "loss": 1.7373, "step": 3452 }, { "epoch": 0.358678716110938, "grad_norm": 0.36466771364212036, "learning_rate": 7.147657458034622e-05, "loss": 1.6811, "step": 3453 }, { "epoch": 0.3587825906305183, "grad_norm": 0.3705892562866211, "learning_rate": 7.146183872660595e-05, "loss": 1.6032, "step": 3454 }, { "epoch": 0.3588864651500987, "grad_norm": 0.3856612741947174, "learning_rate": 7.144710058734724e-05, "loss": 1.5488, "step": 3455 }, { "epoch": 0.358990339669679, "grad_norm": 0.3789518177509308, "learning_rate": 7.143236016413963e-05, "loss": 1.6905, "step": 3456 }, { "epoch": 0.35909421418925935, "grad_norm": 0.3953239321708679, "learning_rate": 7.141761745855284e-05, "loss": 1.7514, "step": 3457 }, { "epoch": 0.35919808870883974, "grad_norm": 0.3837435841560364, "learning_rate": 7.140287247215685e-05, "loss": 1.3697, "step": 3458 }, { "epoch": 0.35930196322842006, "grad_norm": 0.3627301752567291, "learning_rate": 7.13881252065219e-05, "loss": 1.6055, "step": 3459 }, { "epoch": 0.3594058377480004, "grad_norm": 0.41467034816741943, "learning_rate": 7.137337566321844e-05, "loss": 1.7294, "step": 3460 }, { "epoch": 0.35950971226758077, "grad_norm": 0.4232648015022278, "learning_rate": 7.135862384381717e-05, "loss": 1.8147, "step": 3461 }, { "epoch": 0.3596135867871611, "grad_norm": 0.43227145075798035, "learning_rate": 7.134386974988909e-05, "loss": 1.5631, "step": 3462 }, { "epoch": 0.3597174613067415, "grad_norm": 0.4435618817806244, "learning_rate": 7.132911338300537e-05, "loss": 1.8052, "step": 3463 }, { "epoch": 0.3598213358263218, "grad_norm": 0.41515088081359863, "learning_rate": 7.131435474473744e-05, "loss": 1.8405, "step": 3464 }, { "epoch": 0.35992521034590214, "grad_norm": 0.3956370949745178, "learning_rate": 7.1299593836657e-05, "loss": 1.6957, "step": 3465 }, { "epoch": 0.3600290848654825, "grad_norm": 0.4101237654685974, "learning_rate": 7.128483066033594e-05, "loss": 1.6149, "step": 3466 }, { "epoch": 0.36013295938506285, "grad_norm": 0.39739376306533813, "learning_rate": 7.127006521734646e-05, "loss": 1.7836, "step": 3467 }, { "epoch": 0.36023683390464317, "grad_norm": 0.4086010158061981, "learning_rate": 7.125529750926094e-05, "loss": 1.7712, "step": 3468 }, { "epoch": 0.36034070842422355, "grad_norm": 0.3838006556034088, "learning_rate": 7.124052753765202e-05, "loss": 1.7435, "step": 3469 }, { "epoch": 0.3604445829438039, "grad_norm": 0.40963372588157654, "learning_rate": 7.122575530409262e-05, "loss": 1.4758, "step": 3470 }, { "epoch": 0.3605484574633842, "grad_norm": 0.4056002199649811, "learning_rate": 7.121098081015586e-05, "loss": 1.6, "step": 3471 }, { "epoch": 0.3606523319829646, "grad_norm": 0.46054428815841675, "learning_rate": 7.119620405741506e-05, "loss": 1.845, "step": 3472 }, { "epoch": 0.3607562065025449, "grad_norm": 0.41523477435112, "learning_rate": 7.118142504744389e-05, "loss": 1.7532, "step": 3473 }, { "epoch": 0.36086008102212525, "grad_norm": 0.40955179929733276, "learning_rate": 7.116664378181616e-05, "loss": 1.7787, "step": 3474 }, { "epoch": 0.36096395554170563, "grad_norm": 0.39328327775001526, "learning_rate": 7.115186026210597e-05, "loss": 1.7785, "step": 3475 }, { "epoch": 0.36106783006128595, "grad_norm": 0.380825012922287, "learning_rate": 7.113707448988767e-05, "loss": 1.6208, "step": 3476 }, { "epoch": 0.36117170458086634, "grad_norm": 0.3730035722255707, "learning_rate": 7.11222864667358e-05, "loss": 1.66, "step": 3477 }, { "epoch": 0.36127557910044666, "grad_norm": 0.39808517694473267, "learning_rate": 7.110749619422519e-05, "loss": 1.6975, "step": 3478 }, { "epoch": 0.361379453620027, "grad_norm": 0.4094315767288208, "learning_rate": 7.109270367393088e-05, "loss": 1.7015, "step": 3479 }, { "epoch": 0.3614833281396074, "grad_norm": 0.41740068793296814, "learning_rate": 7.107790890742815e-05, "loss": 1.7778, "step": 3480 }, { "epoch": 0.3615872026591877, "grad_norm": 0.41227155923843384, "learning_rate": 7.106311189629253e-05, "loss": 1.8917, "step": 3481 }, { "epoch": 0.36169107717876803, "grad_norm": 0.3613469898700714, "learning_rate": 7.104831264209982e-05, "loss": 1.6972, "step": 3482 }, { "epoch": 0.3617949516983484, "grad_norm": 0.402305543422699, "learning_rate": 7.103351114642596e-05, "loss": 1.5879, "step": 3483 }, { "epoch": 0.36189882621792874, "grad_norm": 0.39771807193756104, "learning_rate": 7.101870741084726e-05, "loss": 1.7124, "step": 3484 }, { "epoch": 0.36200270073750906, "grad_norm": 0.36613133549690247, "learning_rate": 7.100390143694016e-05, "loss": 1.681, "step": 3485 }, { "epoch": 0.36210657525708945, "grad_norm": 0.3933365046977997, "learning_rate": 7.098909322628138e-05, "loss": 1.703, "step": 3486 }, { "epoch": 0.3622104497766698, "grad_norm": 0.39445760846138, "learning_rate": 7.097428278044792e-05, "loss": 1.7333, "step": 3487 }, { "epoch": 0.36231432429625016, "grad_norm": 0.3644881844520569, "learning_rate": 7.095947010101693e-05, "loss": 1.713, "step": 3488 }, { "epoch": 0.3624181988158305, "grad_norm": 0.3871423304080963, "learning_rate": 7.094465518956588e-05, "loss": 1.7749, "step": 3489 }, { "epoch": 0.3625220733354108, "grad_norm": 0.38430991768836975, "learning_rate": 7.092983804767242e-05, "loss": 1.6216, "step": 3490 }, { "epoch": 0.3626259478549912, "grad_norm": 0.3718239367008209, "learning_rate": 7.091501867691446e-05, "loss": 1.6914, "step": 3491 }, { "epoch": 0.3627298223745715, "grad_norm": 0.39211010932922363, "learning_rate": 7.090019707887017e-05, "loss": 1.4819, "step": 3492 }, { "epoch": 0.36283369689415185, "grad_norm": 0.3777989149093628, "learning_rate": 7.08853732551179e-05, "loss": 1.6425, "step": 3493 }, { "epoch": 0.36293757141373223, "grad_norm": 0.3853297829627991, "learning_rate": 7.087054720723629e-05, "loss": 1.6724, "step": 3494 }, { "epoch": 0.36304144593331256, "grad_norm": 0.3982897400856018, "learning_rate": 7.085571893680421e-05, "loss": 1.7549, "step": 3495 }, { "epoch": 0.3631453204528929, "grad_norm": 0.38394638895988464, "learning_rate": 7.084088844540074e-05, "loss": 1.7959, "step": 3496 }, { "epoch": 0.36324919497247327, "grad_norm": 0.4506365656852722, "learning_rate": 7.08260557346052e-05, "loss": 1.907, "step": 3497 }, { "epoch": 0.3633530694920536, "grad_norm": 0.43902164697647095, "learning_rate": 7.081122080599721e-05, "loss": 1.7573, "step": 3498 }, { "epoch": 0.3634569440116339, "grad_norm": 0.4061184525489807, "learning_rate": 7.079638366115653e-05, "loss": 1.8399, "step": 3499 }, { "epoch": 0.3635608185312143, "grad_norm": 0.3644791841506958, "learning_rate": 7.078154430166319e-05, "loss": 1.5973, "step": 3500 }, { "epoch": 0.36366469305079463, "grad_norm": 0.44396528601646423, "learning_rate": 7.07667027290975e-05, "loss": 1.8829, "step": 3501 }, { "epoch": 0.363768567570375, "grad_norm": 0.44041764736175537, "learning_rate": 7.075185894503993e-05, "loss": 1.9544, "step": 3502 }, { "epoch": 0.36387244208995534, "grad_norm": 0.3821714520454407, "learning_rate": 7.073701295107128e-05, "loss": 1.7017, "step": 3503 }, { "epoch": 0.36397631660953567, "grad_norm": 0.3823404014110565, "learning_rate": 7.072216474877249e-05, "loss": 1.5732, "step": 3504 }, { "epoch": 0.36408019112911605, "grad_norm": 0.38946443796157837, "learning_rate": 7.070731433972481e-05, "loss": 1.5651, "step": 3505 }, { "epoch": 0.3641840656486964, "grad_norm": 0.3907792270183563, "learning_rate": 7.069246172550967e-05, "loss": 1.5276, "step": 3506 }, { "epoch": 0.3642879401682767, "grad_norm": 0.374189168214798, "learning_rate": 7.067760690770876e-05, "loss": 1.4874, "step": 3507 }, { "epoch": 0.3643918146878571, "grad_norm": 0.37600815296173096, "learning_rate": 7.066274988790399e-05, "loss": 1.6168, "step": 3508 }, { "epoch": 0.3644956892074374, "grad_norm": 0.39330026507377625, "learning_rate": 7.064789066767755e-05, "loss": 1.8023, "step": 3509 }, { "epoch": 0.36459956372701774, "grad_norm": 0.4047221541404724, "learning_rate": 7.063302924861182e-05, "loss": 1.718, "step": 3510 }, { "epoch": 0.3647034382465981, "grad_norm": 0.3758139908313751, "learning_rate": 7.06181656322894e-05, "loss": 1.6952, "step": 3511 }, { "epoch": 0.36480731276617845, "grad_norm": 0.3800835907459259, "learning_rate": 7.060329982029317e-05, "loss": 1.5578, "step": 3512 }, { "epoch": 0.3649111872857588, "grad_norm": 0.38068559765815735, "learning_rate": 7.058843181420624e-05, "loss": 1.7363, "step": 3513 }, { "epoch": 0.36501506180533916, "grad_norm": 0.40776312351226807, "learning_rate": 7.05735616156119e-05, "loss": 1.5816, "step": 3514 }, { "epoch": 0.3651189363249195, "grad_norm": 0.40851452946662903, "learning_rate": 7.055868922609374e-05, "loss": 1.8035, "step": 3515 }, { "epoch": 0.36522281084449987, "grad_norm": 0.4785706698894501, "learning_rate": 7.054381464723554e-05, "loss": 2.132, "step": 3516 }, { "epoch": 0.3653266853640802, "grad_norm": 0.3917141556739807, "learning_rate": 7.052893788062131e-05, "loss": 1.7564, "step": 3517 }, { "epoch": 0.3654305598836605, "grad_norm": 0.35814887285232544, "learning_rate": 7.051405892783536e-05, "loss": 1.6212, "step": 3518 }, { "epoch": 0.3655344344032409, "grad_norm": 0.37529706954956055, "learning_rate": 7.049917779046213e-05, "loss": 1.6613, "step": 3519 }, { "epoch": 0.36563830892282123, "grad_norm": 0.4299023449420929, "learning_rate": 7.048429447008636e-05, "loss": 1.7108, "step": 3520 }, { "epoch": 0.36574218344240156, "grad_norm": 0.39884042739868164, "learning_rate": 7.046940896829302e-05, "loss": 1.722, "step": 3521 }, { "epoch": 0.36584605796198194, "grad_norm": 0.3787887394428253, "learning_rate": 7.04545212866673e-05, "loss": 1.7693, "step": 3522 }, { "epoch": 0.36594993248156227, "grad_norm": 0.37919625639915466, "learning_rate": 7.043963142679459e-05, "loss": 1.8372, "step": 3523 }, { "epoch": 0.3660538070011426, "grad_norm": 0.4805675446987152, "learning_rate": 7.04247393902606e-05, "loss": 1.9524, "step": 3524 }, { "epoch": 0.366157681520723, "grad_norm": 0.3908388316631317, "learning_rate": 7.040984517865116e-05, "loss": 1.7887, "step": 3525 }, { "epoch": 0.3662615560403033, "grad_norm": 0.39634791016578674, "learning_rate": 7.039494879355242e-05, "loss": 1.7503, "step": 3526 }, { "epoch": 0.3663654305598837, "grad_norm": 0.3950914740562439, "learning_rate": 7.038005023655072e-05, "loss": 1.6939, "step": 3527 }, { "epoch": 0.366469305079464, "grad_norm": 0.3956228792667389, "learning_rate": 7.036514950923264e-05, "loss": 1.7095, "step": 3528 }, { "epoch": 0.36657317959904434, "grad_norm": 0.38164466619491577, "learning_rate": 7.0350246613185e-05, "loss": 1.6409, "step": 3529 }, { "epoch": 0.3666770541186247, "grad_norm": 0.39597025513648987, "learning_rate": 7.033534154999481e-05, "loss": 1.781, "step": 3530 }, { "epoch": 0.36678092863820505, "grad_norm": 0.3930729329586029, "learning_rate": 7.032043432124939e-05, "loss": 1.7551, "step": 3531 }, { "epoch": 0.3668848031577854, "grad_norm": 0.38284942507743835, "learning_rate": 7.030552492853621e-05, "loss": 1.4163, "step": 3532 }, { "epoch": 0.36698867767736576, "grad_norm": 0.3863504230976105, "learning_rate": 7.029061337344303e-05, "loss": 1.8067, "step": 3533 }, { "epoch": 0.3670925521969461, "grad_norm": 0.42066165804862976, "learning_rate": 7.027569965755777e-05, "loss": 1.819, "step": 3534 }, { "epoch": 0.3671964267165264, "grad_norm": 0.3733902871608734, "learning_rate": 7.026078378246868e-05, "loss": 1.6656, "step": 3535 }, { "epoch": 0.3673003012361068, "grad_norm": 0.39036303758621216, "learning_rate": 7.024586574976414e-05, "loss": 1.7219, "step": 3536 }, { "epoch": 0.3674041757556871, "grad_norm": 0.3913772702217102, "learning_rate": 7.023094556103283e-05, "loss": 1.6209, "step": 3537 }, { "epoch": 0.36750805027526745, "grad_norm": 0.3987481892108917, "learning_rate": 7.021602321786362e-05, "loss": 1.7411, "step": 3538 }, { "epoch": 0.36761192479484783, "grad_norm": 0.38136929273605347, "learning_rate": 7.020109872184563e-05, "loss": 1.6651, "step": 3539 }, { "epoch": 0.36771579931442816, "grad_norm": 0.3978997766971588, "learning_rate": 7.018617207456821e-05, "loss": 1.7601, "step": 3540 }, { "epoch": 0.36781967383400854, "grad_norm": 0.39841562509536743, "learning_rate": 7.01712432776209e-05, "loss": 1.6426, "step": 3541 }, { "epoch": 0.36792354835358887, "grad_norm": 0.40489330887794495, "learning_rate": 7.015631233259354e-05, "loss": 1.4646, "step": 3542 }, { "epoch": 0.3680274228731692, "grad_norm": 0.3978332579135895, "learning_rate": 7.014137924107614e-05, "loss": 1.7999, "step": 3543 }, { "epoch": 0.3681312973927496, "grad_norm": 0.4058266878128052, "learning_rate": 7.012644400465895e-05, "loss": 1.8085, "step": 3544 }, { "epoch": 0.3682351719123299, "grad_norm": 0.4187474846839905, "learning_rate": 7.011150662493248e-05, "loss": 1.851, "step": 3545 }, { "epoch": 0.36833904643191023, "grad_norm": 0.43871933221817017, "learning_rate": 7.009656710348742e-05, "loss": 2.0409, "step": 3546 }, { "epoch": 0.3684429209514906, "grad_norm": 0.4018607437610626, "learning_rate": 7.008162544191474e-05, "loss": 1.7269, "step": 3547 }, { "epoch": 0.36854679547107094, "grad_norm": 0.4518108665943146, "learning_rate": 7.006668164180557e-05, "loss": 1.8257, "step": 3548 }, { "epoch": 0.36865066999065127, "grad_norm": 0.4045441150665283, "learning_rate": 7.005173570475135e-05, "loss": 1.8499, "step": 3549 }, { "epoch": 0.36875454451023165, "grad_norm": 0.41423842310905457, "learning_rate": 7.003678763234367e-05, "loss": 1.8105, "step": 3550 }, { "epoch": 0.368858419029812, "grad_norm": 0.37954044342041016, "learning_rate": 7.002183742617441e-05, "loss": 1.7043, "step": 3551 }, { "epoch": 0.36896229354939236, "grad_norm": 0.39509761333465576, "learning_rate": 7.000688508783564e-05, "loss": 1.7842, "step": 3552 }, { "epoch": 0.3690661680689727, "grad_norm": 0.4210776686668396, "learning_rate": 6.999193061891967e-05, "loss": 1.5356, "step": 3553 }, { "epoch": 0.369170042588553, "grad_norm": 0.4176965355873108, "learning_rate": 6.997697402101904e-05, "loss": 1.8425, "step": 3554 }, { "epoch": 0.3692739171081334, "grad_norm": 0.4520731270313263, "learning_rate": 6.996201529572648e-05, "loss": 1.7272, "step": 3555 }, { "epoch": 0.3693777916277137, "grad_norm": 0.3699086606502533, "learning_rate": 6.994705444463501e-05, "loss": 1.661, "step": 3556 }, { "epoch": 0.36948166614729405, "grad_norm": 0.38104352355003357, "learning_rate": 6.993209146933784e-05, "loss": 1.5626, "step": 3557 }, { "epoch": 0.36958554066687443, "grad_norm": 0.4503292739391327, "learning_rate": 6.991712637142839e-05, "loss": 1.8327, "step": 3558 }, { "epoch": 0.36968941518645476, "grad_norm": 0.43513697385787964, "learning_rate": 6.990215915250035e-05, "loss": 1.6256, "step": 3559 }, { "epoch": 0.3697932897060351, "grad_norm": 0.4127349555492401, "learning_rate": 6.988718981414761e-05, "loss": 1.8755, "step": 3560 }, { "epoch": 0.36989716422561547, "grad_norm": 0.4628101885318756, "learning_rate": 6.987221835796429e-05, "loss": 1.8136, "step": 3561 }, { "epoch": 0.3700010387451958, "grad_norm": 0.4025733768939972, "learning_rate": 6.98572447855447e-05, "loss": 1.66, "step": 3562 }, { "epoch": 0.3701049132647761, "grad_norm": 0.3781670331954956, "learning_rate": 6.984226909848346e-05, "loss": 1.6159, "step": 3563 }, { "epoch": 0.3702087877843565, "grad_norm": 0.40306591987609863, "learning_rate": 6.982729129837531e-05, "loss": 1.888, "step": 3564 }, { "epoch": 0.37031266230393683, "grad_norm": 0.4283866584300995, "learning_rate": 6.981231138681531e-05, "loss": 1.7992, "step": 3565 }, { "epoch": 0.3704165368235172, "grad_norm": 0.4207936227321625, "learning_rate": 6.979732936539868e-05, "loss": 1.7709, "step": 3566 }, { "epoch": 0.37052041134309754, "grad_norm": 0.4613027274608612, "learning_rate": 6.97823452357209e-05, "loss": 1.765, "step": 3567 }, { "epoch": 0.37062428586267787, "grad_norm": 0.40821146965026855, "learning_rate": 6.976735899937767e-05, "loss": 1.7137, "step": 3568 }, { "epoch": 0.37072816038225825, "grad_norm": 0.3744267225265503, "learning_rate": 6.97523706579649e-05, "loss": 1.7768, "step": 3569 }, { "epoch": 0.3708320349018386, "grad_norm": 0.410510778427124, "learning_rate": 6.973738021307871e-05, "loss": 1.4456, "step": 3570 }, { "epoch": 0.3709359094214189, "grad_norm": 0.4135860800743103, "learning_rate": 6.97223876663155e-05, "loss": 1.6854, "step": 3571 }, { "epoch": 0.3710397839409993, "grad_norm": 0.43364089727401733, "learning_rate": 6.970739301927183e-05, "loss": 1.7468, "step": 3572 }, { "epoch": 0.3711436584605796, "grad_norm": 0.37639757990837097, "learning_rate": 6.969239627354453e-05, "loss": 1.6043, "step": 3573 }, { "epoch": 0.37124753298015994, "grad_norm": 0.415420800447464, "learning_rate": 6.967739743073065e-05, "loss": 1.7144, "step": 3574 }, { "epoch": 0.3713514074997403, "grad_norm": 0.39380958676338196, "learning_rate": 6.966239649242743e-05, "loss": 1.8287, "step": 3575 }, { "epoch": 0.37145528201932065, "grad_norm": 0.41520291566848755, "learning_rate": 6.964739346023235e-05, "loss": 1.8377, "step": 3576 }, { "epoch": 0.371559156538901, "grad_norm": 0.4204116761684418, "learning_rate": 6.963238833574312e-05, "loss": 1.7471, "step": 3577 }, { "epoch": 0.37166303105848136, "grad_norm": 0.4574458599090576, "learning_rate": 6.961738112055767e-05, "loss": 1.4237, "step": 3578 }, { "epoch": 0.3717669055780617, "grad_norm": 0.3843066990375519, "learning_rate": 6.960237181627416e-05, "loss": 1.7035, "step": 3579 }, { "epoch": 0.3718707800976421, "grad_norm": 0.3904613256454468, "learning_rate": 6.958736042449095e-05, "loss": 1.8277, "step": 3580 }, { "epoch": 0.3719746546172224, "grad_norm": 0.3790266811847687, "learning_rate": 6.957234694680663e-05, "loss": 1.7919, "step": 3581 }, { "epoch": 0.3720785291368027, "grad_norm": 0.3974626362323761, "learning_rate": 6.955733138482006e-05, "loss": 1.795, "step": 3582 }, { "epoch": 0.3721824036563831, "grad_norm": 0.3986896574497223, "learning_rate": 6.954231374013022e-05, "loss": 1.7831, "step": 3583 }, { "epoch": 0.37228627817596344, "grad_norm": 0.4144607484340668, "learning_rate": 6.952729401433642e-05, "loss": 1.4946, "step": 3584 }, { "epoch": 0.37239015269554376, "grad_norm": 0.4263852536678314, "learning_rate": 6.951227220903813e-05, "loss": 1.7404, "step": 3585 }, { "epoch": 0.37249402721512415, "grad_norm": 0.4002395272254944, "learning_rate": 6.949724832583504e-05, "loss": 1.6906, "step": 3586 }, { "epoch": 0.3725979017347045, "grad_norm": 0.41054767370224, "learning_rate": 6.948222236632709e-05, "loss": 1.7632, "step": 3587 }, { "epoch": 0.3727017762542848, "grad_norm": 0.3650989234447479, "learning_rate": 6.946719433211442e-05, "loss": 1.6058, "step": 3588 }, { "epoch": 0.3728056507738652, "grad_norm": 0.4016493856906891, "learning_rate": 6.945216422479741e-05, "loss": 1.7819, "step": 3589 }, { "epoch": 0.3729095252934455, "grad_norm": 0.3928471505641937, "learning_rate": 6.943713204597664e-05, "loss": 1.8151, "step": 3590 }, { "epoch": 0.3730133998130259, "grad_norm": 0.39063897728919983, "learning_rate": 6.942209779725292e-05, "loss": 1.775, "step": 3591 }, { "epoch": 0.3731172743326062, "grad_norm": 0.38129961490631104, "learning_rate": 6.94070614802273e-05, "loss": 1.6656, "step": 3592 }, { "epoch": 0.37322114885218655, "grad_norm": 0.438495934009552, "learning_rate": 6.9392023096501e-05, "loss": 1.7774, "step": 3593 }, { "epoch": 0.37332502337176693, "grad_norm": 0.43866604566574097, "learning_rate": 6.937698264767549e-05, "loss": 1.8816, "step": 3594 }, { "epoch": 0.37342889789134726, "grad_norm": 0.41461604833602905, "learning_rate": 6.93619401353525e-05, "loss": 1.7163, "step": 3595 }, { "epoch": 0.3735327724109276, "grad_norm": 0.36867401003837585, "learning_rate": 6.934689556113391e-05, "loss": 1.7318, "step": 3596 }, { "epoch": 0.37363664693050797, "grad_norm": 0.41185227036476135, "learning_rate": 6.933184892662185e-05, "loss": 1.8164, "step": 3597 }, { "epoch": 0.3737405214500883, "grad_norm": 0.3863617777824402, "learning_rate": 6.931680023341867e-05, "loss": 1.7503, "step": 3598 }, { "epoch": 0.3738443959696686, "grad_norm": 0.3842338025569916, "learning_rate": 6.930174948312696e-05, "loss": 1.6838, "step": 3599 }, { "epoch": 0.373948270489249, "grad_norm": 0.3778021037578583, "learning_rate": 6.928669667734945e-05, "loss": 1.743, "step": 3600 }, { "epoch": 0.37405214500882933, "grad_norm": 0.40780338644981384, "learning_rate": 6.927164181768921e-05, "loss": 1.868, "step": 3601 }, { "epoch": 0.37415601952840966, "grad_norm": 0.4100748300552368, "learning_rate": 6.925658490574945e-05, "loss": 1.8172, "step": 3602 }, { "epoch": 0.37425989404799004, "grad_norm": 0.3898886740207672, "learning_rate": 6.92415259431336e-05, "loss": 1.671, "step": 3603 }, { "epoch": 0.37436376856757037, "grad_norm": 0.4094226062297821, "learning_rate": 6.922646493144532e-05, "loss": 1.6493, "step": 3604 }, { "epoch": 0.37446764308715075, "grad_norm": 0.36988019943237305, "learning_rate": 6.92114018722885e-05, "loss": 1.6539, "step": 3605 }, { "epoch": 0.3745715176067311, "grad_norm": 0.3859472870826721, "learning_rate": 6.919633676726722e-05, "loss": 1.5816, "step": 3606 }, { "epoch": 0.3746753921263114, "grad_norm": 0.4098130464553833, "learning_rate": 6.918126961798583e-05, "loss": 1.7291, "step": 3607 }, { "epoch": 0.3747792666458918, "grad_norm": 0.40433475375175476, "learning_rate": 6.916620042604883e-05, "loss": 1.7515, "step": 3608 }, { "epoch": 0.3748831411654721, "grad_norm": 0.38681918382644653, "learning_rate": 6.915112919306101e-05, "loss": 1.6408, "step": 3609 }, { "epoch": 0.37498701568505244, "grad_norm": 0.43381497263908386, "learning_rate": 6.913605592062731e-05, "loss": 1.7996, "step": 3610 }, { "epoch": 0.3750908902046328, "grad_norm": 0.37756863236427307, "learning_rate": 6.91209806103529e-05, "loss": 1.6188, "step": 3611 }, { "epoch": 0.37519476472421315, "grad_norm": 0.44089293479919434, "learning_rate": 6.910590326384323e-05, "loss": 1.8074, "step": 3612 }, { "epoch": 0.3752986392437935, "grad_norm": 0.3893734812736511, "learning_rate": 6.909082388270389e-05, "loss": 1.6607, "step": 3613 }, { "epoch": 0.37540251376337386, "grad_norm": 0.40464848279953003, "learning_rate": 6.90757424685407e-05, "loss": 1.7711, "step": 3614 }, { "epoch": 0.3755063882829542, "grad_norm": 0.3943893313407898, "learning_rate": 6.906065902295975e-05, "loss": 1.8252, "step": 3615 }, { "epoch": 0.3756102628025345, "grad_norm": 0.38202980160713196, "learning_rate": 6.904557354756728e-05, "loss": 1.7832, "step": 3616 }, { "epoch": 0.3757141373221149, "grad_norm": 0.38370367884635925, "learning_rate": 6.903048604396981e-05, "loss": 1.6776, "step": 3617 }, { "epoch": 0.3758180118416952, "grad_norm": 0.39055919647216797, "learning_rate": 6.9015396513774e-05, "loss": 1.7664, "step": 3618 }, { "epoch": 0.3759218863612756, "grad_norm": 0.3747326731681824, "learning_rate": 6.900030495858681e-05, "loss": 1.5807, "step": 3619 }, { "epoch": 0.37602576088085593, "grad_norm": 0.4175710380077362, "learning_rate": 6.898521138001533e-05, "loss": 1.8023, "step": 3620 }, { "epoch": 0.37612963540043626, "grad_norm": 0.4446984529495239, "learning_rate": 6.897011577966693e-05, "loss": 1.7831, "step": 3621 }, { "epoch": 0.37623350992001664, "grad_norm": 0.3818349242210388, "learning_rate": 6.895501815914917e-05, "loss": 1.7742, "step": 3622 }, { "epoch": 0.37633738443959697, "grad_norm": 0.4104137122631073, "learning_rate": 6.893991852006983e-05, "loss": 1.6418, "step": 3623 }, { "epoch": 0.3764412589591773, "grad_norm": 0.3920380771160126, "learning_rate": 6.892481686403694e-05, "loss": 1.6307, "step": 3624 }, { "epoch": 0.3765451334787577, "grad_norm": 0.36929798126220703, "learning_rate": 6.890971319265863e-05, "loss": 1.6734, "step": 3625 }, { "epoch": 0.376649007998338, "grad_norm": 0.39406248927116394, "learning_rate": 6.889460750754337e-05, "loss": 1.6584, "step": 3626 }, { "epoch": 0.37675288251791833, "grad_norm": 0.4494403898715973, "learning_rate": 6.887949981029981e-05, "loss": 1.9331, "step": 3627 }, { "epoch": 0.3768567570374987, "grad_norm": 0.4188593924045563, "learning_rate": 6.886439010253678e-05, "loss": 1.8305, "step": 3628 }, { "epoch": 0.37696063155707904, "grad_norm": 0.3847233057022095, "learning_rate": 6.884927838586336e-05, "loss": 1.675, "step": 3629 }, { "epoch": 0.3770645060766594, "grad_norm": 0.3903995156288147, "learning_rate": 6.88341646618888e-05, "loss": 1.6902, "step": 3630 }, { "epoch": 0.37716838059623975, "grad_norm": 0.38415199518203735, "learning_rate": 6.881904893222265e-05, "loss": 1.593, "step": 3631 }, { "epoch": 0.3772722551158201, "grad_norm": 0.3710866868495941, "learning_rate": 6.880393119847456e-05, "loss": 1.5337, "step": 3632 }, { "epoch": 0.37737612963540046, "grad_norm": 0.3745911419391632, "learning_rate": 6.878881146225447e-05, "loss": 1.7957, "step": 3633 }, { "epoch": 0.3774800041549808, "grad_norm": 0.42726603150367737, "learning_rate": 6.877368972517252e-05, "loss": 1.783, "step": 3634 }, { "epoch": 0.3775838786745611, "grad_norm": 0.36636191606521606, "learning_rate": 6.875856598883907e-05, "loss": 1.756, "step": 3635 }, { "epoch": 0.3776877531941415, "grad_norm": 0.40645745396614075, "learning_rate": 6.874344025486464e-05, "loss": 1.7357, "step": 3636 }, { "epoch": 0.3777916277137218, "grad_norm": 0.42991214990615845, "learning_rate": 6.872831252486005e-05, "loss": 1.9177, "step": 3637 }, { "epoch": 0.37789550223330215, "grad_norm": 0.39431822299957275, "learning_rate": 6.871318280043626e-05, "loss": 1.6861, "step": 3638 }, { "epoch": 0.37799937675288253, "grad_norm": 0.4022711515426636, "learning_rate": 6.869805108320446e-05, "loss": 1.7634, "step": 3639 }, { "epoch": 0.37810325127246286, "grad_norm": 0.3846032917499542, "learning_rate": 6.868291737477608e-05, "loss": 1.616, "step": 3640 }, { "epoch": 0.3782071257920432, "grad_norm": 0.3910578787326813, "learning_rate": 6.866778167676272e-05, "loss": 1.6158, "step": 3641 }, { "epoch": 0.37831100031162357, "grad_norm": 0.39738887548446655, "learning_rate": 6.865264399077623e-05, "loss": 1.657, "step": 3642 }, { "epoch": 0.3784148748312039, "grad_norm": 0.3639686703681946, "learning_rate": 6.863750431842865e-05, "loss": 1.536, "step": 3643 }, { "epoch": 0.3785187493507843, "grad_norm": 0.4142943322658539, "learning_rate": 6.862236266133225e-05, "loss": 1.8389, "step": 3644 }, { "epoch": 0.3786226238703646, "grad_norm": 0.42220360040664673, "learning_rate": 6.860721902109948e-05, "loss": 1.6854, "step": 3645 }, { "epoch": 0.37872649838994493, "grad_norm": 0.43140527606010437, "learning_rate": 6.859207339934302e-05, "loss": 1.8258, "step": 3646 }, { "epoch": 0.3788303729095253, "grad_norm": 0.40784284472465515, "learning_rate": 6.857692579767576e-05, "loss": 1.7949, "step": 3647 }, { "epoch": 0.37893424742910564, "grad_norm": 0.39824676513671875, "learning_rate": 6.856177621771083e-05, "loss": 1.6723, "step": 3648 }, { "epoch": 0.37903812194868597, "grad_norm": 0.3684784471988678, "learning_rate": 6.854662466106151e-05, "loss": 1.7018, "step": 3649 }, { "epoch": 0.37914199646826635, "grad_norm": 0.4189784824848175, "learning_rate": 6.853147112934132e-05, "loss": 1.7914, "step": 3650 }, { "epoch": 0.3792458709878467, "grad_norm": 0.41028210520744324, "learning_rate": 6.851631562416402e-05, "loss": 1.7757, "step": 3651 }, { "epoch": 0.379349745507427, "grad_norm": 0.37003424763679504, "learning_rate": 6.850115814714355e-05, "loss": 1.5974, "step": 3652 }, { "epoch": 0.3794536200270074, "grad_norm": 0.4214622676372528, "learning_rate": 6.848599869989403e-05, "loss": 1.8257, "step": 3653 }, { "epoch": 0.3795574945465877, "grad_norm": 0.4845244586467743, "learning_rate": 6.847083728402986e-05, "loss": 1.5854, "step": 3654 }, { "epoch": 0.37966136906616804, "grad_norm": 0.4533901512622833, "learning_rate": 6.845567390116558e-05, "loss": 1.8799, "step": 3655 }, { "epoch": 0.3797652435857484, "grad_norm": 0.43582406640052795, "learning_rate": 6.8440508552916e-05, "loss": 1.9001, "step": 3656 }, { "epoch": 0.37986911810532875, "grad_norm": 0.3865016996860504, "learning_rate": 6.84253412408961e-05, "loss": 1.5654, "step": 3657 }, { "epoch": 0.37997299262490913, "grad_norm": 0.3525942862033844, "learning_rate": 6.841017196672109e-05, "loss": 1.5015, "step": 3658 }, { "epoch": 0.38007686714448946, "grad_norm": 0.3993504047393799, "learning_rate": 6.839500073200636e-05, "loss": 1.7024, "step": 3659 }, { "epoch": 0.3801807416640698, "grad_norm": 0.4344191253185272, "learning_rate": 6.837982753836755e-05, "loss": 1.7241, "step": 3660 }, { "epoch": 0.38028461618365017, "grad_norm": 0.3942391574382782, "learning_rate": 6.836465238742045e-05, "loss": 1.6898, "step": 3661 }, { "epoch": 0.3803884907032305, "grad_norm": 0.4017980396747589, "learning_rate": 6.834947528078114e-05, "loss": 1.8323, "step": 3662 }, { "epoch": 0.3804923652228108, "grad_norm": 0.4126379191875458, "learning_rate": 6.833429622006584e-05, "loss": 1.7061, "step": 3663 }, { "epoch": 0.3805962397423912, "grad_norm": 0.4057486951351166, "learning_rate": 6.8319115206891e-05, "loss": 1.8332, "step": 3664 }, { "epoch": 0.38070011426197153, "grad_norm": 0.4050893783569336, "learning_rate": 6.83039322428733e-05, "loss": 1.6706, "step": 3665 }, { "epoch": 0.38080398878155186, "grad_norm": 0.37479984760284424, "learning_rate": 6.828874732962958e-05, "loss": 1.4825, "step": 3666 }, { "epoch": 0.38090786330113224, "grad_norm": 0.39387476444244385, "learning_rate": 6.827356046877693e-05, "loss": 1.6615, "step": 3667 }, { "epoch": 0.38101173782071257, "grad_norm": 0.35786718130111694, "learning_rate": 6.825837166193263e-05, "loss": 1.5442, "step": 3668 }, { "epoch": 0.38111561234029295, "grad_norm": 0.3664378225803375, "learning_rate": 6.824318091071416e-05, "loss": 1.6057, "step": 3669 }, { "epoch": 0.3812194868598733, "grad_norm": 0.4093641936779022, "learning_rate": 6.822798821673923e-05, "loss": 1.7737, "step": 3670 }, { "epoch": 0.3813233613794536, "grad_norm": 0.3681464195251465, "learning_rate": 6.821279358162575e-05, "loss": 1.506, "step": 3671 }, { "epoch": 0.381427235899034, "grad_norm": 0.4376184344291687, "learning_rate": 6.819759700699181e-05, "loss": 1.7086, "step": 3672 }, { "epoch": 0.3815311104186143, "grad_norm": 0.4148000180721283, "learning_rate": 6.818239849445574e-05, "loss": 1.8619, "step": 3673 }, { "epoch": 0.38163498493819464, "grad_norm": 0.39664286375045776, "learning_rate": 6.816719804563606e-05, "loss": 1.7338, "step": 3674 }, { "epoch": 0.381738859457775, "grad_norm": 0.37723055481910706, "learning_rate": 6.815199566215149e-05, "loss": 1.5691, "step": 3675 }, { "epoch": 0.38184273397735535, "grad_norm": 0.42841121554374695, "learning_rate": 6.813679134562098e-05, "loss": 1.8749, "step": 3676 }, { "epoch": 0.3819466084969357, "grad_norm": 0.3727743625640869, "learning_rate": 6.812158509766366e-05, "loss": 1.6885, "step": 3677 }, { "epoch": 0.38205048301651606, "grad_norm": 0.401883989572525, "learning_rate": 6.810637691989887e-05, "loss": 1.7689, "step": 3678 }, { "epoch": 0.3821543575360964, "grad_norm": 0.3472015857696533, "learning_rate": 6.809116681394618e-05, "loss": 1.6354, "step": 3679 }, { "epoch": 0.3822582320556767, "grad_norm": 0.3772028684616089, "learning_rate": 6.807595478142536e-05, "loss": 1.6749, "step": 3680 }, { "epoch": 0.3823621065752571, "grad_norm": 0.37408995628356934, "learning_rate": 6.806074082395633e-05, "loss": 1.7558, "step": 3681 }, { "epoch": 0.3824659810948374, "grad_norm": 0.4021287262439728, "learning_rate": 6.80455249431593e-05, "loss": 1.6639, "step": 3682 }, { "epoch": 0.3825698556144178, "grad_norm": 0.3701534867286682, "learning_rate": 6.803030714065461e-05, "loss": 1.6497, "step": 3683 }, { "epoch": 0.38267373013399814, "grad_norm": 0.4467286765575409, "learning_rate": 6.801508741806285e-05, "loss": 1.86, "step": 3684 }, { "epoch": 0.38277760465357846, "grad_norm": 0.40490084886550903, "learning_rate": 6.799986577700481e-05, "loss": 1.7865, "step": 3685 }, { "epoch": 0.38288147917315885, "grad_norm": 0.39086583256721497, "learning_rate": 6.798464221910147e-05, "loss": 1.7091, "step": 3686 }, { "epoch": 0.38298535369273917, "grad_norm": 0.4162215292453766, "learning_rate": 6.796941674597402e-05, "loss": 1.7886, "step": 3687 }, { "epoch": 0.3830892282123195, "grad_norm": 0.3935025930404663, "learning_rate": 6.795418935924384e-05, "loss": 1.8681, "step": 3688 }, { "epoch": 0.3831931027318999, "grad_norm": 0.40464386343955994, "learning_rate": 6.793896006053255e-05, "loss": 1.8301, "step": 3689 }, { "epoch": 0.3832969772514802, "grad_norm": 0.3767825961112976, "learning_rate": 6.792372885146195e-05, "loss": 1.6537, "step": 3690 }, { "epoch": 0.38340085177106054, "grad_norm": 0.36534345149993896, "learning_rate": 6.790849573365404e-05, "loss": 1.5565, "step": 3691 }, { "epoch": 0.3835047262906409, "grad_norm": 0.38759198784828186, "learning_rate": 6.7893260708731e-05, "loss": 1.8307, "step": 3692 }, { "epoch": 0.38360860081022125, "grad_norm": 0.38123390078544617, "learning_rate": 6.78780237783153e-05, "loss": 1.6495, "step": 3693 }, { "epoch": 0.38371247532980157, "grad_norm": 0.4280903935432434, "learning_rate": 6.786278494402952e-05, "loss": 1.8522, "step": 3694 }, { "epoch": 0.38381634984938195, "grad_norm": 0.42416825890541077, "learning_rate": 6.784754420749645e-05, "loss": 1.7361, "step": 3695 }, { "epoch": 0.3839202243689623, "grad_norm": 0.37206903100013733, "learning_rate": 6.783230157033915e-05, "loss": 1.7802, "step": 3696 }, { "epoch": 0.38402409888854266, "grad_norm": 0.4096907079219818, "learning_rate": 6.781705703418084e-05, "loss": 1.7411, "step": 3697 }, { "epoch": 0.384127973408123, "grad_norm": 0.38418519496917725, "learning_rate": 6.780181060064493e-05, "loss": 1.4685, "step": 3698 }, { "epoch": 0.3842318479277033, "grad_norm": 0.3541015088558197, "learning_rate": 6.778656227135505e-05, "loss": 1.4894, "step": 3699 }, { "epoch": 0.3843357224472837, "grad_norm": 0.3923327326774597, "learning_rate": 6.777131204793502e-05, "loss": 1.5972, "step": 3700 }, { "epoch": 0.38443959696686403, "grad_norm": 0.38584020733833313, "learning_rate": 6.775605993200889e-05, "loss": 1.474, "step": 3701 }, { "epoch": 0.38454347148644435, "grad_norm": 0.42586928606033325, "learning_rate": 6.774080592520088e-05, "loss": 1.8876, "step": 3702 }, { "epoch": 0.38464734600602474, "grad_norm": 0.4268626570701599, "learning_rate": 6.77255500291354e-05, "loss": 1.8743, "step": 3703 }, { "epoch": 0.38475122052560506, "grad_norm": 0.43447503447532654, "learning_rate": 6.771029224543713e-05, "loss": 1.9229, "step": 3704 }, { "epoch": 0.3848550950451854, "grad_norm": 0.36980974674224854, "learning_rate": 6.769503257573089e-05, "loss": 1.5463, "step": 3705 }, { "epoch": 0.3849589695647658, "grad_norm": 0.3850055932998657, "learning_rate": 6.767977102164167e-05, "loss": 1.724, "step": 3706 }, { "epoch": 0.3850628440843461, "grad_norm": 0.4217112064361572, "learning_rate": 6.766450758479479e-05, "loss": 1.965, "step": 3707 }, { "epoch": 0.3851667186039265, "grad_norm": 0.3799114525318146, "learning_rate": 6.76492422668156e-05, "loss": 1.6066, "step": 3708 }, { "epoch": 0.3852705931235068, "grad_norm": 0.39665165543556213, "learning_rate": 6.76339750693298e-05, "loss": 1.8164, "step": 3709 }, { "epoch": 0.38537446764308714, "grad_norm": 0.38629379868507385, "learning_rate": 6.76187059939632e-05, "loss": 1.6843, "step": 3710 }, { "epoch": 0.3854783421626675, "grad_norm": 0.4092472493648529, "learning_rate": 6.760343504234184e-05, "loss": 1.5059, "step": 3711 }, { "epoch": 0.38558221668224785, "grad_norm": 0.40238040685653687, "learning_rate": 6.758816221609196e-05, "loss": 1.8893, "step": 3712 }, { "epoch": 0.3856860912018282, "grad_norm": 0.39806345105171204, "learning_rate": 6.757288751684e-05, "loss": 1.7954, "step": 3713 }, { "epoch": 0.38578996572140856, "grad_norm": 0.41928762197494507, "learning_rate": 6.75576109462126e-05, "loss": 1.8645, "step": 3714 }, { "epoch": 0.3858938402409889, "grad_norm": 0.4039596915245056, "learning_rate": 6.75423325058366e-05, "loss": 1.8103, "step": 3715 }, { "epoch": 0.3859977147605692, "grad_norm": 0.400309681892395, "learning_rate": 6.7527052197339e-05, "loss": 1.6896, "step": 3716 }, { "epoch": 0.3861015892801496, "grad_norm": 0.43688875436782837, "learning_rate": 6.751177002234706e-05, "loss": 1.9697, "step": 3717 }, { "epoch": 0.3862054637997299, "grad_norm": 0.5165666937828064, "learning_rate": 6.749648598248821e-05, "loss": 1.9304, "step": 3718 }, { "epoch": 0.38630933831931025, "grad_norm": 0.4006554186344147, "learning_rate": 6.74812000793901e-05, "loss": 1.6306, "step": 3719 }, { "epoch": 0.38641321283889063, "grad_norm": 0.3924461007118225, "learning_rate": 6.74659123146805e-05, "loss": 1.6424, "step": 3720 }, { "epoch": 0.38651708735847096, "grad_norm": 0.35347139835357666, "learning_rate": 6.745062268998752e-05, "loss": 1.4823, "step": 3721 }, { "epoch": 0.38662096187805134, "grad_norm": 0.41199618577957153, "learning_rate": 6.743533120693932e-05, "loss": 1.6857, "step": 3722 }, { "epoch": 0.38672483639763167, "grad_norm": 0.40314194560050964, "learning_rate": 6.742003786716435e-05, "loss": 1.7333, "step": 3723 }, { "epoch": 0.386828710917212, "grad_norm": 0.402383416891098, "learning_rate": 6.740474267229123e-05, "loss": 1.7304, "step": 3724 }, { "epoch": 0.3869325854367924, "grad_norm": 0.4002920687198639, "learning_rate": 6.738944562394878e-05, "loss": 1.6984, "step": 3725 }, { "epoch": 0.3870364599563727, "grad_norm": 0.3783153295516968, "learning_rate": 6.737414672376601e-05, "loss": 1.6048, "step": 3726 }, { "epoch": 0.38714033447595303, "grad_norm": 0.4129243791103363, "learning_rate": 6.735884597337214e-05, "loss": 1.8856, "step": 3727 }, { "epoch": 0.3872442089955334, "grad_norm": 0.36337247490882874, "learning_rate": 6.73435433743966e-05, "loss": 1.6613, "step": 3728 }, { "epoch": 0.38734808351511374, "grad_norm": 0.38264912366867065, "learning_rate": 6.732823892846895e-05, "loss": 1.6385, "step": 3729 }, { "epoch": 0.38745195803469407, "grad_norm": 0.394731342792511, "learning_rate": 6.731293263721902e-05, "loss": 1.6883, "step": 3730 }, { "epoch": 0.38755583255427445, "grad_norm": 0.3739268183708191, "learning_rate": 6.72976245022768e-05, "loss": 1.677, "step": 3731 }, { "epoch": 0.3876597070738548, "grad_norm": 0.37496718764305115, "learning_rate": 6.728231452527251e-05, "loss": 1.8645, "step": 3732 }, { "epoch": 0.3877635815934351, "grad_norm": 0.39028218388557434, "learning_rate": 6.726700270783655e-05, "loss": 1.7683, "step": 3733 }, { "epoch": 0.3878674561130155, "grad_norm": 0.4083595871925354, "learning_rate": 6.725168905159945e-05, "loss": 1.821, "step": 3734 }, { "epoch": 0.3879713306325958, "grad_norm": 0.3999233543872833, "learning_rate": 6.723637355819206e-05, "loss": 1.7861, "step": 3735 }, { "epoch": 0.3880752051521762, "grad_norm": 0.3761618435382843, "learning_rate": 6.722105622924533e-05, "loss": 1.7543, "step": 3736 }, { "epoch": 0.3881790796717565, "grad_norm": 0.4310716390609741, "learning_rate": 6.720573706639044e-05, "loss": 1.7804, "step": 3737 }, { "epoch": 0.38828295419133685, "grad_norm": 0.3662225902080536, "learning_rate": 6.719041607125877e-05, "loss": 1.5168, "step": 3738 }, { "epoch": 0.38838682871091723, "grad_norm": 0.3875160813331604, "learning_rate": 6.717509324548187e-05, "loss": 1.801, "step": 3739 }, { "epoch": 0.38849070323049756, "grad_norm": 0.379233181476593, "learning_rate": 6.715976859069151e-05, "loss": 1.6002, "step": 3740 }, { "epoch": 0.3885945777500779, "grad_norm": 0.3968057632446289, "learning_rate": 6.714444210851964e-05, "loss": 1.5895, "step": 3741 }, { "epoch": 0.38869845226965827, "grad_norm": 0.45486825704574585, "learning_rate": 6.712911380059845e-05, "loss": 1.8872, "step": 3742 }, { "epoch": 0.3888023267892386, "grad_norm": 0.38296419382095337, "learning_rate": 6.711378366856023e-05, "loss": 1.7437, "step": 3743 }, { "epoch": 0.3889062013088189, "grad_norm": 0.394843190908432, "learning_rate": 6.709845171403754e-05, "loss": 1.7002, "step": 3744 }, { "epoch": 0.3890100758283993, "grad_norm": 0.4002247154712677, "learning_rate": 6.70831179386631e-05, "loss": 1.7328, "step": 3745 }, { "epoch": 0.38911395034797963, "grad_norm": 0.45309674739837646, "learning_rate": 6.706778234406987e-05, "loss": 1.7358, "step": 3746 }, { "epoch": 0.38921782486756, "grad_norm": 0.40047159790992737, "learning_rate": 6.705244493189094e-05, "loss": 1.7627, "step": 3747 }, { "epoch": 0.38932169938714034, "grad_norm": 0.3902556300163269, "learning_rate": 6.703710570375966e-05, "loss": 1.6005, "step": 3748 }, { "epoch": 0.38942557390672067, "grad_norm": 0.4025695025920868, "learning_rate": 6.702176466130953e-05, "loss": 1.6763, "step": 3749 }, { "epoch": 0.38952944842630105, "grad_norm": 0.40774810314178467, "learning_rate": 6.70064218061742e-05, "loss": 1.8209, "step": 3750 }, { "epoch": 0.3896333229458814, "grad_norm": 0.36347082257270813, "learning_rate": 6.699107713998762e-05, "loss": 1.4882, "step": 3751 }, { "epoch": 0.3897371974654617, "grad_norm": 0.39139434695243835, "learning_rate": 6.697573066438387e-05, "loss": 1.713, "step": 3752 }, { "epoch": 0.3898410719850421, "grad_norm": 0.4328778386116028, "learning_rate": 6.69603823809972e-05, "loss": 1.9039, "step": 3753 }, { "epoch": 0.3899449465046224, "grad_norm": 0.38652610778808594, "learning_rate": 6.694503229146212e-05, "loss": 1.7179, "step": 3754 }, { "epoch": 0.39004882102420274, "grad_norm": 0.36628860235214233, "learning_rate": 6.692968039741328e-05, "loss": 1.5286, "step": 3755 }, { "epoch": 0.3901526955437831, "grad_norm": 0.3640875220298767, "learning_rate": 6.691432670048552e-05, "loss": 1.6235, "step": 3756 }, { "epoch": 0.39025657006336345, "grad_norm": 0.3808233439922333, "learning_rate": 6.689897120231389e-05, "loss": 1.6554, "step": 3757 }, { "epoch": 0.3903604445829438, "grad_norm": 0.4116235375404358, "learning_rate": 6.688361390453368e-05, "loss": 1.7059, "step": 3758 }, { "epoch": 0.39046431910252416, "grad_norm": 0.39567288756370544, "learning_rate": 6.686825480878026e-05, "loss": 1.5193, "step": 3759 }, { "epoch": 0.3905681936221045, "grad_norm": 0.39557141065597534, "learning_rate": 6.685289391668929e-05, "loss": 1.7383, "step": 3760 }, { "epoch": 0.39067206814168487, "grad_norm": 0.40352147817611694, "learning_rate": 6.683753122989656e-05, "loss": 1.6945, "step": 3761 }, { "epoch": 0.3907759426612652, "grad_norm": 0.38249486684799194, "learning_rate": 6.682216675003811e-05, "loss": 1.5765, "step": 3762 }, { "epoch": 0.3908798171808455, "grad_norm": 0.3883412480354309, "learning_rate": 6.680680047875014e-05, "loss": 1.7523, "step": 3763 }, { "epoch": 0.3909836917004259, "grad_norm": 0.38825148344039917, "learning_rate": 6.679143241766898e-05, "loss": 1.7129, "step": 3764 }, { "epoch": 0.39108756622000623, "grad_norm": 0.4497055113315582, "learning_rate": 6.677606256843126e-05, "loss": 1.7823, "step": 3765 }, { "epoch": 0.39119144073958656, "grad_norm": 0.39453697204589844, "learning_rate": 6.676069093267374e-05, "loss": 1.7184, "step": 3766 }, { "epoch": 0.39129531525916694, "grad_norm": 0.4167402982711792, "learning_rate": 6.674531751203335e-05, "loss": 1.921, "step": 3767 }, { "epoch": 0.39139918977874727, "grad_norm": 0.39788275957107544, "learning_rate": 6.672994230814729e-05, "loss": 1.6948, "step": 3768 }, { "epoch": 0.3915030642983276, "grad_norm": 0.3702066242694855, "learning_rate": 6.671456532265285e-05, "loss": 1.6143, "step": 3769 }, { "epoch": 0.391606938817908, "grad_norm": 0.4112524390220642, "learning_rate": 6.669918655718759e-05, "loss": 1.6526, "step": 3770 }, { "epoch": 0.3917108133374883, "grad_norm": 0.4594886898994446, "learning_rate": 6.668380601338924e-05, "loss": 1.8916, "step": 3771 }, { "epoch": 0.3918146878570687, "grad_norm": 0.3847801089286804, "learning_rate": 6.666842369289566e-05, "loss": 1.803, "step": 3772 }, { "epoch": 0.391918562376649, "grad_norm": 0.413424551486969, "learning_rate": 6.665303959734499e-05, "loss": 1.7646, "step": 3773 }, { "epoch": 0.39202243689622934, "grad_norm": 0.42914092540740967, "learning_rate": 6.66376537283755e-05, "loss": 1.8358, "step": 3774 }, { "epoch": 0.3921263114158097, "grad_norm": 0.42516449093818665, "learning_rate": 6.662226608762566e-05, "loss": 1.7087, "step": 3775 }, { "epoch": 0.39223018593539005, "grad_norm": 0.40384045243263245, "learning_rate": 6.660687667673416e-05, "loss": 1.81, "step": 3776 }, { "epoch": 0.3923340604549704, "grad_norm": 0.3911544978618622, "learning_rate": 6.659148549733983e-05, "loss": 1.6589, "step": 3777 }, { "epoch": 0.39243793497455076, "grad_norm": 0.3848823606967926, "learning_rate": 6.657609255108169e-05, "loss": 1.653, "step": 3778 }, { "epoch": 0.3925418094941311, "grad_norm": 0.38892778754234314, "learning_rate": 6.656069783959901e-05, "loss": 1.7404, "step": 3779 }, { "epoch": 0.3926456840137114, "grad_norm": 0.4406839609146118, "learning_rate": 6.654530136453118e-05, "loss": 2.1317, "step": 3780 }, { "epoch": 0.3927495585332918, "grad_norm": 0.39131468534469604, "learning_rate": 6.652990312751781e-05, "loss": 1.864, "step": 3781 }, { "epoch": 0.3928534330528721, "grad_norm": 0.38605618476867676, "learning_rate": 6.65145031301987e-05, "loss": 1.7456, "step": 3782 }, { "epoch": 0.39295730757245245, "grad_norm": 0.3929480314254761, "learning_rate": 6.649910137421383e-05, "loss": 1.7215, "step": 3783 }, { "epoch": 0.39306118209203283, "grad_norm": 0.4117045998573303, "learning_rate": 6.648369786120336e-05, "loss": 1.8767, "step": 3784 }, { "epoch": 0.39316505661161316, "grad_norm": 0.4199231266975403, "learning_rate": 6.646829259280765e-05, "loss": 1.7678, "step": 3785 }, { "epoch": 0.39326893113119354, "grad_norm": 0.3810809254646301, "learning_rate": 6.645288557066722e-05, "loss": 1.6417, "step": 3786 }, { "epoch": 0.39337280565077387, "grad_norm": 0.40853750705718994, "learning_rate": 6.643747679642282e-05, "loss": 1.7405, "step": 3787 }, { "epoch": 0.3934766801703542, "grad_norm": 0.3977396488189697, "learning_rate": 6.642206627171536e-05, "loss": 1.7114, "step": 3788 }, { "epoch": 0.3935805546899346, "grad_norm": 0.40110906958580017, "learning_rate": 6.640665399818594e-05, "loss": 1.433, "step": 3789 }, { "epoch": 0.3936844292095149, "grad_norm": 0.37747594714164734, "learning_rate": 6.639123997747585e-05, "loss": 1.6693, "step": 3790 }, { "epoch": 0.39378830372909523, "grad_norm": 0.39845001697540283, "learning_rate": 6.637582421122657e-05, "loss": 1.7434, "step": 3791 }, { "epoch": 0.3938921782486756, "grad_norm": 0.3821636438369751, "learning_rate": 6.636040670107972e-05, "loss": 1.6637, "step": 3792 }, { "epoch": 0.39399605276825594, "grad_norm": 0.4459479749202728, "learning_rate": 6.63449874486772e-05, "loss": 1.9512, "step": 3793 }, { "epoch": 0.39409992728783627, "grad_norm": 0.35890012979507446, "learning_rate": 6.632956645566099e-05, "loss": 1.5369, "step": 3794 }, { "epoch": 0.39420380180741665, "grad_norm": 0.40800046920776367, "learning_rate": 6.631414372367333e-05, "loss": 1.7759, "step": 3795 }, { "epoch": 0.394307676326997, "grad_norm": 0.42685750126838684, "learning_rate": 6.629871925435663e-05, "loss": 1.8498, "step": 3796 }, { "epoch": 0.3944115508465773, "grad_norm": 0.3911482095718384, "learning_rate": 6.628329304935346e-05, "loss": 1.7042, "step": 3797 }, { "epoch": 0.3945154253661577, "grad_norm": 0.3812938332557678, "learning_rate": 6.62678651103066e-05, "loss": 1.6208, "step": 3798 }, { "epoch": 0.394619299885738, "grad_norm": 0.4290582239627838, "learning_rate": 6.625243543885899e-05, "loss": 1.8095, "step": 3799 }, { "epoch": 0.3947231744053184, "grad_norm": 0.38044747710227966, "learning_rate": 6.623700403665378e-05, "loss": 1.821, "step": 3800 }, { "epoch": 0.3948270489248987, "grad_norm": 0.42807307839393616, "learning_rate": 6.622157090533431e-05, "loss": 1.9329, "step": 3801 }, { "epoch": 0.39493092344447905, "grad_norm": 0.3865020275115967, "learning_rate": 6.620613604654405e-05, "loss": 1.641, "step": 3802 }, { "epoch": 0.39503479796405944, "grad_norm": 0.4715527892112732, "learning_rate": 6.619069946192672e-05, "loss": 1.8927, "step": 3803 }, { "epoch": 0.39513867248363976, "grad_norm": 0.3802943229675293, "learning_rate": 6.61752611531262e-05, "loss": 1.7063, "step": 3804 }, { "epoch": 0.3952425470032201, "grad_norm": 0.38618597388267517, "learning_rate": 6.615982112178653e-05, "loss": 1.6576, "step": 3805 }, { "epoch": 0.3953464215228005, "grad_norm": 0.4029250741004944, "learning_rate": 6.614437936955196e-05, "loss": 1.7814, "step": 3806 }, { "epoch": 0.3954502960423808, "grad_norm": 0.3960702121257782, "learning_rate": 6.612893589806693e-05, "loss": 1.7102, "step": 3807 }, { "epoch": 0.3955541705619611, "grad_norm": 0.38714170455932617, "learning_rate": 6.611349070897604e-05, "loss": 1.6924, "step": 3808 }, { "epoch": 0.3956580450815415, "grad_norm": 0.4107656180858612, "learning_rate": 6.609804380392406e-05, "loss": 1.5255, "step": 3809 }, { "epoch": 0.39576191960112184, "grad_norm": 0.40633097290992737, "learning_rate": 6.6082595184556e-05, "loss": 1.734, "step": 3810 }, { "epoch": 0.3958657941207022, "grad_norm": 0.4659360945224762, "learning_rate": 6.606714485251699e-05, "loss": 1.7854, "step": 3811 }, { "epoch": 0.39596966864028255, "grad_norm": 0.4087457060813904, "learning_rate": 6.605169280945238e-05, "loss": 1.7713, "step": 3812 }, { "epoch": 0.3960735431598629, "grad_norm": 0.38341912627220154, "learning_rate": 6.603623905700771e-05, "loss": 1.5804, "step": 3813 }, { "epoch": 0.39617741767944326, "grad_norm": 0.41718804836273193, "learning_rate": 6.602078359682865e-05, "loss": 1.7562, "step": 3814 }, { "epoch": 0.3962812921990236, "grad_norm": 0.4072135388851166, "learning_rate": 6.600532643056112e-05, "loss": 1.6313, "step": 3815 }, { "epoch": 0.3963851667186039, "grad_norm": 0.4200040400028229, "learning_rate": 6.598986755985116e-05, "loss": 1.7867, "step": 3816 }, { "epoch": 0.3964890412381843, "grad_norm": 0.38370397686958313, "learning_rate": 6.597440698634503e-05, "loss": 1.5696, "step": 3817 }, { "epoch": 0.3965929157577646, "grad_norm": 0.4030844271183014, "learning_rate": 6.595894471168917e-05, "loss": 1.688, "step": 3818 }, { "epoch": 0.39669679027734495, "grad_norm": 0.4080040454864502, "learning_rate": 6.594348073753019e-05, "loss": 1.7786, "step": 3819 }, { "epoch": 0.39680066479692533, "grad_norm": 0.36491355299949646, "learning_rate": 6.592801506551486e-05, "loss": 1.5053, "step": 3820 }, { "epoch": 0.39690453931650566, "grad_norm": 0.4053652882575989, "learning_rate": 6.591254769729018e-05, "loss": 1.7091, "step": 3821 }, { "epoch": 0.397008413836086, "grad_norm": 0.40529873967170715, "learning_rate": 6.589707863450327e-05, "loss": 1.6866, "step": 3822 }, { "epoch": 0.39711228835566637, "grad_norm": 0.3853190243244171, "learning_rate": 6.588160787880151e-05, "loss": 1.703, "step": 3823 }, { "epoch": 0.3972161628752467, "grad_norm": 0.39480873942375183, "learning_rate": 6.586613543183237e-05, "loss": 1.6685, "step": 3824 }, { "epoch": 0.3973200373948271, "grad_norm": 0.38072872161865234, "learning_rate": 6.58506612952436e-05, "loss": 1.6512, "step": 3825 }, { "epoch": 0.3974239119144074, "grad_norm": 0.42218974232673645, "learning_rate": 6.583518547068302e-05, "loss": 1.7624, "step": 3826 }, { "epoch": 0.39752778643398773, "grad_norm": 0.34545183181762695, "learning_rate": 6.581970795979871e-05, "loss": 1.4234, "step": 3827 }, { "epoch": 0.3976316609535681, "grad_norm": 0.39277178049087524, "learning_rate": 6.580422876423891e-05, "loss": 1.7441, "step": 3828 }, { "epoch": 0.39773553547314844, "grad_norm": 0.39970070123672485, "learning_rate": 6.578874788565201e-05, "loss": 1.6835, "step": 3829 }, { "epoch": 0.39783940999272877, "grad_norm": 0.38639700412750244, "learning_rate": 6.577326532568663e-05, "loss": 1.5985, "step": 3830 }, { "epoch": 0.39794328451230915, "grad_norm": 0.3735407292842865, "learning_rate": 6.575778108599151e-05, "loss": 1.734, "step": 3831 }, { "epoch": 0.3980471590318895, "grad_norm": 0.38914212584495544, "learning_rate": 6.574229516821564e-05, "loss": 1.6279, "step": 3832 }, { "epoch": 0.3981510335514698, "grad_norm": 0.38888975977897644, "learning_rate": 6.572680757400813e-05, "loss": 1.7271, "step": 3833 }, { "epoch": 0.3982549080710502, "grad_norm": 0.4465731382369995, "learning_rate": 6.571131830501827e-05, "loss": 1.8058, "step": 3834 }, { "epoch": 0.3983587825906305, "grad_norm": 0.3945710062980652, "learning_rate": 6.569582736289556e-05, "loss": 1.6509, "step": 3835 }, { "epoch": 0.39846265711021084, "grad_norm": 0.39435771107673645, "learning_rate": 6.56803347492897e-05, "loss": 1.5513, "step": 3836 }, { "epoch": 0.3985665316297912, "grad_norm": 0.3789151906967163, "learning_rate": 6.566484046585047e-05, "loss": 1.5777, "step": 3837 }, { "epoch": 0.39867040614937155, "grad_norm": 0.39071643352508545, "learning_rate": 6.564934451422793e-05, "loss": 1.8612, "step": 3838 }, { "epoch": 0.39877428066895193, "grad_norm": 0.3789994716644287, "learning_rate": 6.563384689607227e-05, "loss": 1.7228, "step": 3839 }, { "epoch": 0.39887815518853226, "grad_norm": 0.3888130486011505, "learning_rate": 6.561834761303386e-05, "loss": 1.7733, "step": 3840 }, { "epoch": 0.3989820297081126, "grad_norm": 0.41186729073524475, "learning_rate": 6.560284666676327e-05, "loss": 1.8526, "step": 3841 }, { "epoch": 0.39908590422769297, "grad_norm": 0.3710212707519531, "learning_rate": 6.558734405891121e-05, "loss": 1.4567, "step": 3842 }, { "epoch": 0.3991897787472733, "grad_norm": 0.4276915490627289, "learning_rate": 6.557183979112859e-05, "loss": 1.8229, "step": 3843 }, { "epoch": 0.3992936532668536, "grad_norm": 0.4046017527580261, "learning_rate": 6.55563338650665e-05, "loss": 1.7953, "step": 3844 }, { "epoch": 0.399397527786434, "grad_norm": 0.36397168040275574, "learning_rate": 6.554082628237619e-05, "loss": 1.4815, "step": 3845 }, { "epoch": 0.39950140230601433, "grad_norm": 0.49856171011924744, "learning_rate": 6.552531704470911e-05, "loss": 1.9411, "step": 3846 }, { "epoch": 0.39960527682559466, "grad_norm": 0.36472898721694946, "learning_rate": 6.550980615371687e-05, "loss": 1.6889, "step": 3847 }, { "epoch": 0.39970915134517504, "grad_norm": 0.37358102202415466, "learning_rate": 6.549429361105126e-05, "loss": 1.6639, "step": 3848 }, { "epoch": 0.39981302586475537, "grad_norm": 0.3778083920478821, "learning_rate": 6.547877941836424e-05, "loss": 1.7107, "step": 3849 }, { "epoch": 0.39991690038433575, "grad_norm": 0.424234002828598, "learning_rate": 6.546326357730794e-05, "loss": 1.7377, "step": 3850 }, { "epoch": 0.4000207749039161, "grad_norm": 0.4264697730541229, "learning_rate": 6.544774608953468e-05, "loss": 1.8146, "step": 3851 }, { "epoch": 0.4001246494234964, "grad_norm": 0.4023953676223755, "learning_rate": 6.543222695669697e-05, "loss": 1.6474, "step": 3852 }, { "epoch": 0.4002285239430768, "grad_norm": 0.41954219341278076, "learning_rate": 6.541670618044748e-05, "loss": 1.8214, "step": 3853 }, { "epoch": 0.4003323984626571, "grad_norm": 0.41983938217163086, "learning_rate": 6.540118376243903e-05, "loss": 1.7134, "step": 3854 }, { "epoch": 0.40043627298223744, "grad_norm": 0.4455628991127014, "learning_rate": 6.538565970432463e-05, "loss": 1.974, "step": 3855 }, { "epoch": 0.4005401475018178, "grad_norm": 0.41283226013183594, "learning_rate": 6.537013400775748e-05, "loss": 1.5895, "step": 3856 }, { "epoch": 0.40064402202139815, "grad_norm": 0.4059913158416748, "learning_rate": 6.535460667439097e-05, "loss": 1.6404, "step": 3857 }, { "epoch": 0.4007478965409785, "grad_norm": 0.4247495234012604, "learning_rate": 6.53390777058786e-05, "loss": 1.8849, "step": 3858 }, { "epoch": 0.40085177106055886, "grad_norm": 0.4334213137626648, "learning_rate": 6.532354710387411e-05, "loss": 1.7472, "step": 3859 }, { "epoch": 0.4009556455801392, "grad_norm": 0.3892272114753723, "learning_rate": 6.530801487003139e-05, "loss": 1.601, "step": 3860 }, { "epoch": 0.4010595200997195, "grad_norm": 0.3871516287326813, "learning_rate": 6.529248100600449e-05, "loss": 1.7275, "step": 3861 }, { "epoch": 0.4011633946192999, "grad_norm": 0.3974870443344116, "learning_rate": 6.527694551344765e-05, "loss": 1.6634, "step": 3862 }, { "epoch": 0.4012672691388802, "grad_norm": 0.3988457918167114, "learning_rate": 6.526140839401527e-05, "loss": 1.6647, "step": 3863 }, { "epoch": 0.4013711436584606, "grad_norm": 0.42209574580192566, "learning_rate": 6.524586964936194e-05, "loss": 1.8112, "step": 3864 }, { "epoch": 0.40147501817804093, "grad_norm": 0.4116060733795166, "learning_rate": 6.523032928114242e-05, "loss": 1.7179, "step": 3865 }, { "epoch": 0.40157889269762126, "grad_norm": 0.341496080160141, "learning_rate": 6.521478729101164e-05, "loss": 1.4234, "step": 3866 }, { "epoch": 0.40168276721720164, "grad_norm": 0.3907933831214905, "learning_rate": 6.519924368062468e-05, "loss": 1.7079, "step": 3867 }, { "epoch": 0.40178664173678197, "grad_norm": 0.4084981679916382, "learning_rate": 6.518369845163683e-05, "loss": 1.6538, "step": 3868 }, { "epoch": 0.4018905162563623, "grad_norm": 0.37673652172088623, "learning_rate": 6.516815160570355e-05, "loss": 1.7034, "step": 3869 }, { "epoch": 0.4019943907759427, "grad_norm": 0.36309683322906494, "learning_rate": 6.515260314448041e-05, "loss": 1.5712, "step": 3870 }, { "epoch": 0.402098265295523, "grad_norm": 0.40556076169013977, "learning_rate": 6.513705306962325e-05, "loss": 1.6909, "step": 3871 }, { "epoch": 0.40220213981510333, "grad_norm": 0.39010584354400635, "learning_rate": 6.5121501382788e-05, "loss": 1.5812, "step": 3872 }, { "epoch": 0.4023060143346837, "grad_norm": 0.4054809510707855, "learning_rate": 6.51059480856308e-05, "loss": 1.6611, "step": 3873 }, { "epoch": 0.40240988885426404, "grad_norm": 0.3722795844078064, "learning_rate": 6.509039317980798e-05, "loss": 1.5964, "step": 3874 }, { "epoch": 0.40251376337384437, "grad_norm": 0.41028568148612976, "learning_rate": 6.5074836666976e-05, "loss": 1.6734, "step": 3875 }, { "epoch": 0.40261763789342475, "grad_norm": 0.39521047472953796, "learning_rate": 6.505927854879148e-05, "loss": 1.5993, "step": 3876 }, { "epoch": 0.4027215124130051, "grad_norm": 0.3818994462490082, "learning_rate": 6.504371882691126e-05, "loss": 1.6791, "step": 3877 }, { "epoch": 0.40282538693258546, "grad_norm": 0.4162769615650177, "learning_rate": 6.502815750299235e-05, "loss": 1.7729, "step": 3878 }, { "epoch": 0.4029292614521658, "grad_norm": 0.38468343019485474, "learning_rate": 6.501259457869187e-05, "loss": 1.7063, "step": 3879 }, { "epoch": 0.4030331359717461, "grad_norm": 0.3765242397785187, "learning_rate": 6.49970300556672e-05, "loss": 1.546, "step": 3880 }, { "epoch": 0.4031370104913265, "grad_norm": 0.3874824345111847, "learning_rate": 6.498146393557578e-05, "loss": 1.7616, "step": 3881 }, { "epoch": 0.4032408850109068, "grad_norm": 0.39522698521614075, "learning_rate": 6.496589622007532e-05, "loss": 1.7472, "step": 3882 }, { "epoch": 0.40334475953048715, "grad_norm": 0.3761104941368103, "learning_rate": 6.495032691082367e-05, "loss": 1.5378, "step": 3883 }, { "epoch": 0.40344863405006753, "grad_norm": 0.4124884605407715, "learning_rate": 6.493475600947879e-05, "loss": 1.6228, "step": 3884 }, { "epoch": 0.40355250856964786, "grad_norm": 0.3794013559818268, "learning_rate": 6.491918351769891e-05, "loss": 1.8077, "step": 3885 }, { "epoch": 0.4036563830892282, "grad_norm": 0.43968185782432556, "learning_rate": 6.490360943714235e-05, "loss": 1.8393, "step": 3886 }, { "epoch": 0.40376025760880857, "grad_norm": 0.4507423937320709, "learning_rate": 6.488803376946764e-05, "loss": 1.9139, "step": 3887 }, { "epoch": 0.4038641321283889, "grad_norm": 0.39006999135017395, "learning_rate": 6.48724565163335e-05, "loss": 1.7191, "step": 3888 }, { "epoch": 0.4039680066479693, "grad_norm": 0.3846725523471832, "learning_rate": 6.48568776793987e-05, "loss": 1.6855, "step": 3889 }, { "epoch": 0.4040718811675496, "grad_norm": 0.4519991874694824, "learning_rate": 6.484129726032233e-05, "loss": 1.7529, "step": 3890 }, { "epoch": 0.40417575568712993, "grad_norm": 0.36061856150627136, "learning_rate": 6.482571526076357e-05, "loss": 1.5992, "step": 3891 }, { "epoch": 0.4042796302067103, "grad_norm": 0.4295485317707062, "learning_rate": 6.481013168238178e-05, "loss": 1.7882, "step": 3892 }, { "epoch": 0.40438350472629064, "grad_norm": 0.4181903600692749, "learning_rate": 6.479454652683649e-05, "loss": 1.9548, "step": 3893 }, { "epoch": 0.40448737924587097, "grad_norm": 0.37351468205451965, "learning_rate": 6.47789597957874e-05, "loss": 1.5784, "step": 3894 }, { "epoch": 0.40459125376545135, "grad_norm": 0.505387544631958, "learning_rate": 6.476337149089438e-05, "loss": 1.9224, "step": 3895 }, { "epoch": 0.4046951282850317, "grad_norm": 0.3897416293621063, "learning_rate": 6.474778161381743e-05, "loss": 1.4597, "step": 3896 }, { "epoch": 0.404799002804612, "grad_norm": 0.44174665212631226, "learning_rate": 6.47321901662168e-05, "loss": 1.9036, "step": 3897 }, { "epoch": 0.4049028773241924, "grad_norm": 0.39586788415908813, "learning_rate": 6.471659714975282e-05, "loss": 1.802, "step": 3898 }, { "epoch": 0.4050067518437727, "grad_norm": 0.40716326236724854, "learning_rate": 6.470100256608603e-05, "loss": 1.7556, "step": 3899 }, { "epoch": 0.40511062636335304, "grad_norm": 0.3839757740497589, "learning_rate": 6.468540641687716e-05, "loss": 1.6419, "step": 3900 }, { "epoch": 0.4052145008829334, "grad_norm": 0.3808140754699707, "learning_rate": 6.466980870378704e-05, "loss": 1.7802, "step": 3901 }, { "epoch": 0.40531837540251375, "grad_norm": 0.4827220141887665, "learning_rate": 6.465420942847673e-05, "loss": 1.9368, "step": 3902 }, { "epoch": 0.40542224992209414, "grad_norm": 0.42109206318855286, "learning_rate": 6.463860859260742e-05, "loss": 1.8801, "step": 3903 }, { "epoch": 0.40552612444167446, "grad_norm": 0.41569140553474426, "learning_rate": 6.462300619784048e-05, "loss": 1.8356, "step": 3904 }, { "epoch": 0.4056299989612548, "grad_norm": 0.3784182071685791, "learning_rate": 6.460740224583746e-05, "loss": 1.5941, "step": 3905 }, { "epoch": 0.40573387348083517, "grad_norm": 0.38103383779525757, "learning_rate": 6.459179673826001e-05, "loss": 1.8269, "step": 3906 }, { "epoch": 0.4058377480004155, "grad_norm": 0.38035640120506287, "learning_rate": 6.457618967677004e-05, "loss": 1.7104, "step": 3907 }, { "epoch": 0.4059416225199958, "grad_norm": 0.4427489638328552, "learning_rate": 6.456058106302958e-05, "loss": 2.0598, "step": 3908 }, { "epoch": 0.4060454970395762, "grad_norm": 0.38776329159736633, "learning_rate": 6.454497089870082e-05, "loss": 1.8005, "step": 3909 }, { "epoch": 0.40614937155915654, "grad_norm": 0.39295095205307007, "learning_rate": 6.45293591854461e-05, "loss": 1.5732, "step": 3910 }, { "epoch": 0.40625324607873686, "grad_norm": 0.4256858229637146, "learning_rate": 6.451374592492796e-05, "loss": 1.8357, "step": 3911 }, { "epoch": 0.40635712059831725, "grad_norm": 0.45658764243125916, "learning_rate": 6.449813111880909e-05, "loss": 1.851, "step": 3912 }, { "epoch": 0.40646099511789757, "grad_norm": 0.40819069743156433, "learning_rate": 6.448251476875235e-05, "loss": 1.4937, "step": 3913 }, { "epoch": 0.4065648696374779, "grad_norm": 0.35700273513793945, "learning_rate": 6.446689687642076e-05, "loss": 1.6035, "step": 3914 }, { "epoch": 0.4066687441570583, "grad_norm": 0.3629886209964752, "learning_rate": 6.445127744347748e-05, "loss": 1.5591, "step": 3915 }, { "epoch": 0.4067726186766386, "grad_norm": 0.38591131567955017, "learning_rate": 6.443565647158589e-05, "loss": 1.5891, "step": 3916 }, { "epoch": 0.406876493196219, "grad_norm": 0.41065141558647156, "learning_rate": 6.442003396240949e-05, "loss": 1.8029, "step": 3917 }, { "epoch": 0.4069803677157993, "grad_norm": 0.4433719515800476, "learning_rate": 6.440440991761195e-05, "loss": 1.8338, "step": 3918 }, { "epoch": 0.40708424223537965, "grad_norm": 0.40136539936065674, "learning_rate": 6.438878433885711e-05, "loss": 1.5756, "step": 3919 }, { "epoch": 0.40718811675496003, "grad_norm": 0.39997127652168274, "learning_rate": 6.437315722780898e-05, "loss": 1.6578, "step": 3920 }, { "epoch": 0.40729199127454035, "grad_norm": 0.41028451919555664, "learning_rate": 6.435752858613171e-05, "loss": 1.7394, "step": 3921 }, { "epoch": 0.4073958657941207, "grad_norm": 0.3836668133735657, "learning_rate": 6.434189841548965e-05, "loss": 1.7368, "step": 3922 }, { "epoch": 0.40749974031370106, "grad_norm": 0.38762718439102173, "learning_rate": 6.432626671754729e-05, "loss": 1.6122, "step": 3923 }, { "epoch": 0.4076036148332814, "grad_norm": 0.37233954668045044, "learning_rate": 6.431063349396926e-05, "loss": 1.5676, "step": 3924 }, { "epoch": 0.4077074893528617, "grad_norm": 0.3751562237739563, "learning_rate": 6.429499874642038e-05, "loss": 1.5929, "step": 3925 }, { "epoch": 0.4078113638724421, "grad_norm": 0.41311609745025635, "learning_rate": 6.427936247656565e-05, "loss": 1.4869, "step": 3926 }, { "epoch": 0.40791523839202243, "grad_norm": 0.37297797203063965, "learning_rate": 6.42637246860702e-05, "loss": 1.6425, "step": 3927 }, { "epoch": 0.4080191129116028, "grad_norm": 0.4266083240509033, "learning_rate": 6.424808537659932e-05, "loss": 1.7858, "step": 3928 }, { "epoch": 0.40812298743118314, "grad_norm": 0.46376118063926697, "learning_rate": 6.423244454981851e-05, "loss": 1.7072, "step": 3929 }, { "epoch": 0.40822686195076346, "grad_norm": 0.4833356440067291, "learning_rate": 6.421680220739336e-05, "loss": 1.792, "step": 3930 }, { "epoch": 0.40833073647034385, "grad_norm": 0.42818766832351685, "learning_rate": 6.420115835098967e-05, "loss": 1.9283, "step": 3931 }, { "epoch": 0.4084346109899242, "grad_norm": 0.41609877347946167, "learning_rate": 6.418551298227338e-05, "loss": 1.6607, "step": 3932 }, { "epoch": 0.4085384855095045, "grad_norm": 0.3854401111602783, "learning_rate": 6.416986610291064e-05, "loss": 1.7383, "step": 3933 }, { "epoch": 0.4086423600290849, "grad_norm": 0.4421563744544983, "learning_rate": 6.415421771456766e-05, "loss": 1.5812, "step": 3934 }, { "epoch": 0.4087462345486652, "grad_norm": 0.40960294008255005, "learning_rate": 6.41385678189109e-05, "loss": 1.7514, "step": 3935 }, { "epoch": 0.40885010906824554, "grad_norm": 0.37743479013442993, "learning_rate": 6.412291641760697e-05, "loss": 1.6141, "step": 3936 }, { "epoch": 0.4089539835878259, "grad_norm": 0.3768305480480194, "learning_rate": 6.410726351232259e-05, "loss": 1.6868, "step": 3937 }, { "epoch": 0.40905785810740625, "grad_norm": 0.40039536356925964, "learning_rate": 6.409160910472469e-05, "loss": 1.7051, "step": 3938 }, { "epoch": 0.4091617326269866, "grad_norm": 0.4146733283996582, "learning_rate": 6.407595319648034e-05, "loss": 1.7432, "step": 3939 }, { "epoch": 0.40926560714656696, "grad_norm": 0.3897091746330261, "learning_rate": 6.406029578925677e-05, "loss": 1.6319, "step": 3940 }, { "epoch": 0.4093694816661473, "grad_norm": 0.3977634608745575, "learning_rate": 6.404463688472138e-05, "loss": 1.6741, "step": 3941 }, { "epoch": 0.40947335618572767, "grad_norm": 0.3753795027732849, "learning_rate": 6.402897648454171e-05, "loss": 1.6135, "step": 3942 }, { "epoch": 0.409577230705308, "grad_norm": 0.40746620297431946, "learning_rate": 6.401331459038546e-05, "loss": 1.6742, "step": 3943 }, { "epoch": 0.4096811052248883, "grad_norm": 0.4224250614643097, "learning_rate": 6.399765120392054e-05, "loss": 1.7462, "step": 3944 }, { "epoch": 0.4097849797444687, "grad_norm": 0.4034820795059204, "learning_rate": 6.398198632681494e-05, "loss": 1.6549, "step": 3945 }, { "epoch": 0.40988885426404903, "grad_norm": 0.40801072120666504, "learning_rate": 6.396631996073686e-05, "loss": 1.5801, "step": 3946 }, { "epoch": 0.40999272878362936, "grad_norm": 0.4579329192638397, "learning_rate": 6.395065210735464e-05, "loss": 1.6841, "step": 3947 }, { "epoch": 0.41009660330320974, "grad_norm": 0.4300961494445801, "learning_rate": 6.39349827683368e-05, "loss": 1.8659, "step": 3948 }, { "epoch": 0.41020047782279007, "grad_norm": 0.4603518843650818, "learning_rate": 6.391931194535199e-05, "loss": 1.82, "step": 3949 }, { "epoch": 0.4103043523423704, "grad_norm": 0.36023688316345215, "learning_rate": 6.390363964006903e-05, "loss": 1.5473, "step": 3950 }, { "epoch": 0.4104082268619508, "grad_norm": 0.36991775035858154, "learning_rate": 6.38879658541569e-05, "loss": 1.554, "step": 3951 }, { "epoch": 0.4105121013815311, "grad_norm": 0.3790716528892517, "learning_rate": 6.387229058928475e-05, "loss": 1.6563, "step": 3952 }, { "epoch": 0.41061597590111143, "grad_norm": 0.38919469714164734, "learning_rate": 6.385661384712185e-05, "loss": 1.6243, "step": 3953 }, { "epoch": 0.4107198504206918, "grad_norm": 0.3934738337993622, "learning_rate": 6.384093562933765e-05, "loss": 1.6521, "step": 3954 }, { "epoch": 0.41082372494027214, "grad_norm": 0.39593058824539185, "learning_rate": 6.382525593760178e-05, "loss": 1.7887, "step": 3955 }, { "epoch": 0.4109275994598525, "grad_norm": 0.47822320461273193, "learning_rate": 6.380957477358399e-05, "loss": 1.8647, "step": 3956 }, { "epoch": 0.41103147397943285, "grad_norm": 0.44262126088142395, "learning_rate": 6.379389213895421e-05, "loss": 1.8746, "step": 3957 }, { "epoch": 0.4111353484990132, "grad_norm": 0.37435340881347656, "learning_rate": 6.377820803538253e-05, "loss": 1.6782, "step": 3958 }, { "epoch": 0.41123922301859356, "grad_norm": 0.41944101452827454, "learning_rate": 6.376252246453914e-05, "loss": 1.7188, "step": 3959 }, { "epoch": 0.4113430975381739, "grad_norm": 0.43215543031692505, "learning_rate": 6.374683542809447e-05, "loss": 1.8221, "step": 3960 }, { "epoch": 0.4114469720577542, "grad_norm": 0.40915408730506897, "learning_rate": 6.373114692771906e-05, "loss": 1.7294, "step": 3961 }, { "epoch": 0.4115508465773346, "grad_norm": 0.41122907400131226, "learning_rate": 6.371545696508358e-05, "loss": 1.708, "step": 3962 }, { "epoch": 0.4116547210969149, "grad_norm": 0.3763563930988312, "learning_rate": 6.369976554185896e-05, "loss": 1.8057, "step": 3963 }, { "epoch": 0.41175859561649525, "grad_norm": 0.4643213748931885, "learning_rate": 6.368407265971616e-05, "loss": 1.6913, "step": 3964 }, { "epoch": 0.41186247013607563, "grad_norm": 0.3934086263179779, "learning_rate": 6.366837832032635e-05, "loss": 1.8158, "step": 3965 }, { "epoch": 0.41196634465565596, "grad_norm": 0.35763460397720337, "learning_rate": 6.365268252536087e-05, "loss": 1.5065, "step": 3966 }, { "epoch": 0.41207021917523634, "grad_norm": 0.381234347820282, "learning_rate": 6.36369852764912e-05, "loss": 1.672, "step": 3967 }, { "epoch": 0.41217409369481667, "grad_norm": 0.4128655195236206, "learning_rate": 6.362128657538897e-05, "loss": 1.8548, "step": 3968 }, { "epoch": 0.412277968214397, "grad_norm": 0.42314839363098145, "learning_rate": 6.360558642372597e-05, "loss": 1.8469, "step": 3969 }, { "epoch": 0.4123818427339774, "grad_norm": 0.3665439188480377, "learning_rate": 6.358988482317414e-05, "loss": 1.6893, "step": 3970 }, { "epoch": 0.4124857172535577, "grad_norm": 0.3973866105079651, "learning_rate": 6.357418177540558e-05, "loss": 1.5002, "step": 3971 }, { "epoch": 0.41258959177313803, "grad_norm": 0.40862172842025757, "learning_rate": 6.355847728209257e-05, "loss": 1.817, "step": 3972 }, { "epoch": 0.4126934662927184, "grad_norm": 0.439984530210495, "learning_rate": 6.354277134490746e-05, "loss": 1.7698, "step": 3973 }, { "epoch": 0.41279734081229874, "grad_norm": 0.3773249685764313, "learning_rate": 6.352706396552285e-05, "loss": 1.4967, "step": 3974 }, { "epoch": 0.41290121533187907, "grad_norm": 0.4068536162376404, "learning_rate": 6.351135514561144e-05, "loss": 1.6282, "step": 3975 }, { "epoch": 0.41300508985145945, "grad_norm": 0.3690778613090515, "learning_rate": 6.34956448868461e-05, "loss": 1.637, "step": 3976 }, { "epoch": 0.4131089643710398, "grad_norm": 0.4202233552932739, "learning_rate": 6.347993319089985e-05, "loss": 1.9794, "step": 3977 }, { "epoch": 0.4132128388906201, "grad_norm": 0.44085201621055603, "learning_rate": 6.346422005944586e-05, "loss": 1.8328, "step": 3978 }, { "epoch": 0.4133167134102005, "grad_norm": 0.39549145102500916, "learning_rate": 6.344850549415746e-05, "loss": 1.7727, "step": 3979 }, { "epoch": 0.4134205879297808, "grad_norm": 0.39016398787498474, "learning_rate": 6.343278949670812e-05, "loss": 1.6293, "step": 3980 }, { "epoch": 0.4135244624493612, "grad_norm": 0.40789923071861267, "learning_rate": 6.341707206877149e-05, "loss": 1.8089, "step": 3981 }, { "epoch": 0.4136283369689415, "grad_norm": 0.41951802372932434, "learning_rate": 6.340135321202134e-05, "loss": 1.793, "step": 3982 }, { "epoch": 0.41373221148852185, "grad_norm": 0.41457945108413696, "learning_rate": 6.33856329281316e-05, "loss": 1.8366, "step": 3983 }, { "epoch": 0.41383608600810223, "grad_norm": 0.4352897107601166, "learning_rate": 6.336991121877637e-05, "loss": 1.7176, "step": 3984 }, { "epoch": 0.41393996052768256, "grad_norm": 0.414348840713501, "learning_rate": 6.335418808562988e-05, "loss": 1.8262, "step": 3985 }, { "epoch": 0.4140438350472629, "grad_norm": 0.3896345794200897, "learning_rate": 6.333846353036654e-05, "loss": 1.8285, "step": 3986 }, { "epoch": 0.41414770956684327, "grad_norm": 0.4062201678752899, "learning_rate": 6.332273755466087e-05, "loss": 1.709, "step": 3987 }, { "epoch": 0.4142515840864236, "grad_norm": 0.4064597189426422, "learning_rate": 6.330701016018757e-05, "loss": 1.7283, "step": 3988 }, { "epoch": 0.4143554586060039, "grad_norm": 0.42126670479774475, "learning_rate": 6.329128134862148e-05, "loss": 1.7227, "step": 3989 }, { "epoch": 0.4144593331255843, "grad_norm": 0.38570138812065125, "learning_rate": 6.32755511216376e-05, "loss": 1.7244, "step": 3990 }, { "epoch": 0.41456320764516463, "grad_norm": 0.4073387682437897, "learning_rate": 6.32598194809111e-05, "loss": 1.6718, "step": 3991 }, { "epoch": 0.414667082164745, "grad_norm": 0.39126601815223694, "learning_rate": 6.324408642811726e-05, "loss": 1.584, "step": 3992 }, { "epoch": 0.41477095668432534, "grad_norm": 0.38054293394088745, "learning_rate": 6.32283519649315e-05, "loss": 1.5411, "step": 3993 }, { "epoch": 0.41487483120390567, "grad_norm": 0.42494648694992065, "learning_rate": 6.321261609302945e-05, "loss": 1.8188, "step": 3994 }, { "epoch": 0.41497870572348605, "grad_norm": 0.3690442442893982, "learning_rate": 6.319687881408683e-05, "loss": 1.6147, "step": 3995 }, { "epoch": 0.4150825802430664, "grad_norm": 0.41196659207344055, "learning_rate": 6.318114012977958e-05, "loss": 1.917, "step": 3996 }, { "epoch": 0.4151864547626467, "grad_norm": 0.38468247652053833, "learning_rate": 6.316540004178371e-05, "loss": 1.7175, "step": 3997 }, { "epoch": 0.4152903292822271, "grad_norm": 0.36904019117355347, "learning_rate": 6.314965855177544e-05, "loss": 1.6544, "step": 3998 }, { "epoch": 0.4153942038018074, "grad_norm": 0.3496026396751404, "learning_rate": 6.31339156614311e-05, "loss": 1.4105, "step": 3999 }, { "epoch": 0.41549807832138774, "grad_norm": 0.3861815333366394, "learning_rate": 6.311817137242721e-05, "loss": 1.7974, "step": 4000 }, { "epoch": 0.4156019528409681, "grad_norm": 0.37794098258018494, "learning_rate": 6.310242568644035e-05, "loss": 1.5556, "step": 4001 }, { "epoch": 0.41570582736054845, "grad_norm": 0.42547518014907837, "learning_rate": 6.30866786051474e-05, "loss": 1.6694, "step": 4002 }, { "epoch": 0.4158097018801288, "grad_norm": 0.3894122540950775, "learning_rate": 6.307093013022525e-05, "loss": 1.5479, "step": 4003 }, { "epoch": 0.41591357639970916, "grad_norm": 0.36968937516212463, "learning_rate": 6.305518026335097e-05, "loss": 1.6548, "step": 4004 }, { "epoch": 0.4160174509192895, "grad_norm": 0.40135595202445984, "learning_rate": 6.303942900620185e-05, "loss": 1.5989, "step": 4005 }, { "epoch": 0.41612132543886987, "grad_norm": 0.39152655005455017, "learning_rate": 6.302367636045526e-05, "loss": 1.6618, "step": 4006 }, { "epoch": 0.4162251999584502, "grad_norm": 0.382112592458725, "learning_rate": 6.300792232778872e-05, "loss": 1.5055, "step": 4007 }, { "epoch": 0.4163290744780305, "grad_norm": 0.4156250059604645, "learning_rate": 6.29921669098799e-05, "loss": 1.9369, "step": 4008 }, { "epoch": 0.4164329489976109, "grad_norm": 0.3940093219280243, "learning_rate": 6.297641010840665e-05, "loss": 1.6275, "step": 4009 }, { "epoch": 0.41653682351719123, "grad_norm": 0.4581923186779022, "learning_rate": 6.296065192504697e-05, "loss": 1.8641, "step": 4010 }, { "epoch": 0.41664069803677156, "grad_norm": 0.3822265863418579, "learning_rate": 6.294489236147894e-05, "loss": 1.7272, "step": 4011 }, { "epoch": 0.41674457255635194, "grad_norm": 0.3892137110233307, "learning_rate": 6.292913141938084e-05, "loss": 1.683, "step": 4012 }, { "epoch": 0.41684844707593227, "grad_norm": 0.3944839835166931, "learning_rate": 6.29133691004311e-05, "loss": 1.6662, "step": 4013 }, { "epoch": 0.4169523215955126, "grad_norm": 0.39415454864501953, "learning_rate": 6.289760540630829e-05, "loss": 1.6862, "step": 4014 }, { "epoch": 0.417056196115093, "grad_norm": 0.39324238896369934, "learning_rate": 6.288184033869109e-05, "loss": 1.7041, "step": 4015 }, { "epoch": 0.4171600706346733, "grad_norm": 0.4160117506980896, "learning_rate": 6.28660738992584e-05, "loss": 1.7689, "step": 4016 }, { "epoch": 0.41726394515425363, "grad_norm": 0.4204423725605011, "learning_rate": 6.285030608968918e-05, "loss": 1.7707, "step": 4017 }, { "epoch": 0.417367819673834, "grad_norm": 0.3935635983943939, "learning_rate": 6.28345369116626e-05, "loss": 1.5884, "step": 4018 }, { "epoch": 0.41747169419341434, "grad_norm": 0.41393959522247314, "learning_rate": 6.281876636685795e-05, "loss": 1.6574, "step": 4019 }, { "epoch": 0.4175755687129947, "grad_norm": 0.4115196764469147, "learning_rate": 6.280299445695469e-05, "loss": 1.8304, "step": 4020 }, { "epoch": 0.41767944323257505, "grad_norm": 0.3949635922908783, "learning_rate": 6.278722118363237e-05, "loss": 1.5268, "step": 4021 }, { "epoch": 0.4177833177521554, "grad_norm": 0.41173237562179565, "learning_rate": 6.277144654857074e-05, "loss": 1.8324, "step": 4022 }, { "epoch": 0.41788719227173576, "grad_norm": 0.3953053951263428, "learning_rate": 6.275567055344967e-05, "loss": 1.6818, "step": 4023 }, { "epoch": 0.4179910667913161, "grad_norm": 0.46665164828300476, "learning_rate": 6.27398931999492e-05, "loss": 1.8911, "step": 4024 }, { "epoch": 0.4180949413108964, "grad_norm": 0.4181447923183441, "learning_rate": 6.272411448974947e-05, "loss": 1.7229, "step": 4025 }, { "epoch": 0.4181988158304768, "grad_norm": 0.3725024461746216, "learning_rate": 6.270833442453079e-05, "loss": 1.664, "step": 4026 }, { "epoch": 0.4183026903500571, "grad_norm": 0.430210679769516, "learning_rate": 6.269255300597364e-05, "loss": 1.6161, "step": 4027 }, { "epoch": 0.41840656486963745, "grad_norm": 0.3929589092731476, "learning_rate": 6.267677023575859e-05, "loss": 1.7089, "step": 4028 }, { "epoch": 0.41851043938921784, "grad_norm": 0.3702670931816101, "learning_rate": 6.26609861155664e-05, "loss": 1.6041, "step": 4029 }, { "epoch": 0.41861431390879816, "grad_norm": 0.3990215063095093, "learning_rate": 6.264520064707795e-05, "loss": 1.7304, "step": 4030 }, { "epoch": 0.41871818842837855, "grad_norm": 0.4187242388725281, "learning_rate": 6.262941383197425e-05, "loss": 1.813, "step": 4031 }, { "epoch": 0.4188220629479589, "grad_norm": 0.46325740218162537, "learning_rate": 6.261362567193651e-05, "loss": 1.8182, "step": 4032 }, { "epoch": 0.4189259374675392, "grad_norm": 0.3769034445285797, "learning_rate": 6.259783616864601e-05, "loss": 1.733, "step": 4033 }, { "epoch": 0.4190298119871196, "grad_norm": 0.42521029710769653, "learning_rate": 6.258204532378423e-05, "loss": 1.8581, "step": 4034 }, { "epoch": 0.4191336865066999, "grad_norm": 0.3773384988307953, "learning_rate": 6.256625313903278e-05, "loss": 1.6578, "step": 4035 }, { "epoch": 0.41923756102628024, "grad_norm": 0.4068077504634857, "learning_rate": 6.255045961607338e-05, "loss": 1.7315, "step": 4036 }, { "epoch": 0.4193414355458606, "grad_norm": 0.38190507888793945, "learning_rate": 6.253466475658792e-05, "loss": 1.5881, "step": 4037 }, { "epoch": 0.41944531006544095, "grad_norm": 0.373384028673172, "learning_rate": 6.251886856225844e-05, "loss": 1.719, "step": 4038 }, { "epoch": 0.4195491845850213, "grad_norm": 0.3872770369052887, "learning_rate": 6.250307103476712e-05, "loss": 1.6688, "step": 4039 }, { "epoch": 0.41965305910460166, "grad_norm": 0.4040100574493408, "learning_rate": 6.248727217579625e-05, "loss": 1.7883, "step": 4040 }, { "epoch": 0.419756933624182, "grad_norm": 0.5108729004859924, "learning_rate": 6.247147198702832e-05, "loss": 1.8919, "step": 4041 }, { "epoch": 0.4198608081437623, "grad_norm": 0.4295211732387543, "learning_rate": 6.245567047014589e-05, "loss": 1.7379, "step": 4042 }, { "epoch": 0.4199646826633427, "grad_norm": 0.4218069911003113, "learning_rate": 6.243986762683171e-05, "loss": 1.7343, "step": 4043 }, { "epoch": 0.420068557182923, "grad_norm": 0.40920522809028625, "learning_rate": 6.242406345876866e-05, "loss": 1.8147, "step": 4044 }, { "epoch": 0.4201724317025034, "grad_norm": 0.3913504481315613, "learning_rate": 6.240825796763977e-05, "loss": 1.6202, "step": 4045 }, { "epoch": 0.42027630622208373, "grad_norm": 0.387791246175766, "learning_rate": 6.239245115512818e-05, "loss": 1.6385, "step": 4046 }, { "epoch": 0.42038018074166406, "grad_norm": 0.3972698450088501, "learning_rate": 6.237664302291722e-05, "loss": 1.6763, "step": 4047 }, { "epoch": 0.42048405526124444, "grad_norm": 0.35716134309768677, "learning_rate": 6.236083357269033e-05, "loss": 1.6527, "step": 4048 }, { "epoch": 0.42058792978082477, "grad_norm": 0.40162917971611023, "learning_rate": 6.234502280613108e-05, "loss": 1.7658, "step": 4049 }, { "epoch": 0.4206918043004051, "grad_norm": 0.3818826377391815, "learning_rate": 6.232921072492318e-05, "loss": 1.7382, "step": 4050 }, { "epoch": 0.4207956788199855, "grad_norm": 0.4017144739627838, "learning_rate": 6.23133973307505e-05, "loss": 1.7945, "step": 4051 }, { "epoch": 0.4208995533395658, "grad_norm": 0.3896738290786743, "learning_rate": 6.229758262529708e-05, "loss": 1.8404, "step": 4052 }, { "epoch": 0.42100342785914613, "grad_norm": 0.38667070865631104, "learning_rate": 6.228176661024703e-05, "loss": 1.619, "step": 4053 }, { "epoch": 0.4211073023787265, "grad_norm": 0.38369256258010864, "learning_rate": 6.226594928728462e-05, "loss": 1.6359, "step": 4054 }, { "epoch": 0.42121117689830684, "grad_norm": 0.44246816635131836, "learning_rate": 6.225013065809431e-05, "loss": 1.8973, "step": 4055 }, { "epoch": 0.42131505141788717, "grad_norm": 0.42337489128112793, "learning_rate": 6.223431072436063e-05, "loss": 1.6915, "step": 4056 }, { "epoch": 0.42141892593746755, "grad_norm": 0.419403612613678, "learning_rate": 6.221848948776828e-05, "loss": 1.6772, "step": 4057 }, { "epoch": 0.4215228004570479, "grad_norm": 0.3898250460624695, "learning_rate": 6.220266695000211e-05, "loss": 1.7213, "step": 4058 }, { "epoch": 0.42162667497662826, "grad_norm": 0.4045391380786896, "learning_rate": 6.21868431127471e-05, "loss": 1.9761, "step": 4059 }, { "epoch": 0.4217305494962086, "grad_norm": 0.3763904273509979, "learning_rate": 6.217101797768837e-05, "loss": 1.7037, "step": 4060 }, { "epoch": 0.4218344240157889, "grad_norm": 0.397461861371994, "learning_rate": 6.215519154651116e-05, "loss": 1.7218, "step": 4061 }, { "epoch": 0.4219382985353693, "grad_norm": 0.4115433990955353, "learning_rate": 6.213936382090085e-05, "loss": 1.7957, "step": 4062 }, { "epoch": 0.4220421730549496, "grad_norm": 0.393285870552063, "learning_rate": 6.2123534802543e-05, "loss": 1.7508, "step": 4063 }, { "epoch": 0.42214604757452995, "grad_norm": 0.3760216236114502, "learning_rate": 6.210770449312326e-05, "loss": 1.6243, "step": 4064 }, { "epoch": 0.42224992209411033, "grad_norm": 0.4444178342819214, "learning_rate": 6.209187289432742e-05, "loss": 1.8161, "step": 4065 }, { "epoch": 0.42235379661369066, "grad_norm": 0.37198397517204285, "learning_rate": 6.207604000784143e-05, "loss": 1.6786, "step": 4066 }, { "epoch": 0.422457671133271, "grad_norm": 0.39783281087875366, "learning_rate": 6.206020583535141e-05, "loss": 1.7703, "step": 4067 }, { "epoch": 0.42256154565285137, "grad_norm": 0.45979025959968567, "learning_rate": 6.204437037854351e-05, "loss": 1.8734, "step": 4068 }, { "epoch": 0.4226654201724317, "grad_norm": 0.42977213859558105, "learning_rate": 6.202853363910413e-05, "loss": 1.7048, "step": 4069 }, { "epoch": 0.4227692946920121, "grad_norm": 0.41636922955513, "learning_rate": 6.201269561871975e-05, "loss": 1.6683, "step": 4070 }, { "epoch": 0.4228731692115924, "grad_norm": 0.4053545594215393, "learning_rate": 6.199685631907697e-05, "loss": 1.6983, "step": 4071 }, { "epoch": 0.42297704373117273, "grad_norm": 0.378132164478302, "learning_rate": 6.19810157418626e-05, "loss": 1.6345, "step": 4072 }, { "epoch": 0.4230809182507531, "grad_norm": 0.4450819492340088, "learning_rate": 6.196517388876348e-05, "loss": 1.6669, "step": 4073 }, { "epoch": 0.42318479277033344, "grad_norm": 0.3707204759120941, "learning_rate": 6.194933076146668e-05, "loss": 1.5462, "step": 4074 }, { "epoch": 0.42328866728991377, "grad_norm": 0.41867223381996155, "learning_rate": 6.193348636165937e-05, "loss": 1.8317, "step": 4075 }, { "epoch": 0.42339254180949415, "grad_norm": 0.4516044557094574, "learning_rate": 6.191764069102883e-05, "loss": 1.8396, "step": 4076 }, { "epoch": 0.4234964163290745, "grad_norm": 0.421350359916687, "learning_rate": 6.190179375126254e-05, "loss": 1.7317, "step": 4077 }, { "epoch": 0.4236002908486548, "grad_norm": 0.3848811984062195, "learning_rate": 6.188594554404804e-05, "loss": 1.6922, "step": 4078 }, { "epoch": 0.4237041653682352, "grad_norm": 0.41102609038352966, "learning_rate": 6.187009607107304e-05, "loss": 1.7322, "step": 4079 }, { "epoch": 0.4238080398878155, "grad_norm": 0.4180265963077545, "learning_rate": 6.185424533402543e-05, "loss": 1.8053, "step": 4080 }, { "epoch": 0.42391191440739584, "grad_norm": 0.4037413001060486, "learning_rate": 6.183839333459315e-05, "loss": 1.6599, "step": 4081 }, { "epoch": 0.4240157889269762, "grad_norm": 0.40133136510849, "learning_rate": 6.18225400744643e-05, "loss": 1.7394, "step": 4082 }, { "epoch": 0.42411966344655655, "grad_norm": 0.37302759289741516, "learning_rate": 6.180668555532719e-05, "loss": 1.5259, "step": 4083 }, { "epoch": 0.42422353796613693, "grad_norm": 0.39756691455841064, "learning_rate": 6.179082977887013e-05, "loss": 1.6746, "step": 4084 }, { "epoch": 0.42432741248571726, "grad_norm": 0.37990039587020874, "learning_rate": 6.177497274678168e-05, "loss": 1.6812, "step": 4085 }, { "epoch": 0.4244312870052976, "grad_norm": 0.3902164697647095, "learning_rate": 6.175911446075049e-05, "loss": 1.6457, "step": 4086 }, { "epoch": 0.42453516152487797, "grad_norm": 0.4064858555793762, "learning_rate": 6.174325492246531e-05, "loss": 1.7168, "step": 4087 }, { "epoch": 0.4246390360444583, "grad_norm": 0.40128055214881897, "learning_rate": 6.17273941336151e-05, "loss": 1.7847, "step": 4088 }, { "epoch": 0.4247429105640386, "grad_norm": 0.4626508951187134, "learning_rate": 6.171153209588891e-05, "loss": 1.951, "step": 4089 }, { "epoch": 0.424846785083619, "grad_norm": 0.40969690680503845, "learning_rate": 6.169566881097588e-05, "loss": 1.4734, "step": 4090 }, { "epoch": 0.42495065960319933, "grad_norm": 0.3807416558265686, "learning_rate": 6.167980428056537e-05, "loss": 1.5504, "step": 4091 }, { "epoch": 0.42505453412277966, "grad_norm": 0.3962438702583313, "learning_rate": 6.16639385063468e-05, "loss": 1.8144, "step": 4092 }, { "epoch": 0.42515840864236004, "grad_norm": 0.3545083701610565, "learning_rate": 6.164807149000974e-05, "loss": 1.5118, "step": 4093 }, { "epoch": 0.42526228316194037, "grad_norm": 0.43927130103111267, "learning_rate": 6.163220323324395e-05, "loss": 1.8641, "step": 4094 }, { "epoch": 0.4253661576815207, "grad_norm": 0.41815078258514404, "learning_rate": 6.161633373773925e-05, "loss": 1.7244, "step": 4095 }, { "epoch": 0.4254700322011011, "grad_norm": 0.4570396840572357, "learning_rate": 6.16004630051856e-05, "loss": 1.8696, "step": 4096 }, { "epoch": 0.4255739067206814, "grad_norm": 0.3970779478549957, "learning_rate": 6.158459103727316e-05, "loss": 1.7826, "step": 4097 }, { "epoch": 0.4256777812402618, "grad_norm": 0.437796026468277, "learning_rate": 6.15687178356921e-05, "loss": 1.6452, "step": 4098 }, { "epoch": 0.4257816557598421, "grad_norm": 0.4131401777267456, "learning_rate": 6.155284340213285e-05, "loss": 1.6827, "step": 4099 }, { "epoch": 0.42588553027942244, "grad_norm": 0.4367378056049347, "learning_rate": 6.15369677382859e-05, "loss": 1.9042, "step": 4100 }, { "epoch": 0.4259894047990028, "grad_norm": 0.39361101388931274, "learning_rate": 6.152109084584186e-05, "loss": 1.643, "step": 4101 }, { "epoch": 0.42609327931858315, "grad_norm": 0.37626150250434875, "learning_rate": 6.150521272649152e-05, "loss": 1.5454, "step": 4102 }, { "epoch": 0.4261971538381635, "grad_norm": 0.3899744749069214, "learning_rate": 6.148933338192578e-05, "loss": 1.6159, "step": 4103 }, { "epoch": 0.42630102835774386, "grad_norm": 0.4217323362827301, "learning_rate": 6.147345281383565e-05, "loss": 1.8407, "step": 4104 }, { "epoch": 0.4264049028773242, "grad_norm": 0.42654165625572205, "learning_rate": 6.145757102391227e-05, "loss": 1.7704, "step": 4105 }, { "epoch": 0.4265087773969045, "grad_norm": 0.3943133056163788, "learning_rate": 6.144168801384698e-05, "loss": 1.7799, "step": 4106 }, { "epoch": 0.4266126519164849, "grad_norm": 0.37911903858184814, "learning_rate": 6.142580378533114e-05, "loss": 1.6106, "step": 4107 }, { "epoch": 0.4267165264360652, "grad_norm": 0.42597952485084534, "learning_rate": 6.140991834005632e-05, "loss": 1.6198, "step": 4108 }, { "epoch": 0.4268204009556456, "grad_norm": 0.43222105503082275, "learning_rate": 6.139403167971422e-05, "loss": 1.7884, "step": 4109 }, { "epoch": 0.42692427547522593, "grad_norm": 0.4335835576057434, "learning_rate": 6.13781438059966e-05, "loss": 1.7371, "step": 4110 }, { "epoch": 0.42702814999480626, "grad_norm": 0.41362127661705017, "learning_rate": 6.136225472059545e-05, "loss": 1.6047, "step": 4111 }, { "epoch": 0.42713202451438664, "grad_norm": 0.38855838775634766, "learning_rate": 6.134636442520277e-05, "loss": 1.6329, "step": 4112 }, { "epoch": 0.42723589903396697, "grad_norm": 0.39782068133354187, "learning_rate": 6.13304729215108e-05, "loss": 1.711, "step": 4113 }, { "epoch": 0.4273397735535473, "grad_norm": 0.4341132342815399, "learning_rate": 6.131458021121184e-05, "loss": 1.6623, "step": 4114 }, { "epoch": 0.4274436480731277, "grad_norm": 0.4523240625858307, "learning_rate": 6.129868629599832e-05, "loss": 1.8319, "step": 4115 }, { "epoch": 0.427547522592708, "grad_norm": 0.3760691285133362, "learning_rate": 6.128279117756288e-05, "loss": 1.8514, "step": 4116 }, { "epoch": 0.42765139711228833, "grad_norm": 0.45057788491249084, "learning_rate": 6.126689485759818e-05, "loss": 1.7907, "step": 4117 }, { "epoch": 0.4277552716318687, "grad_norm": 0.4334897994995117, "learning_rate": 6.125099733779706e-05, "loss": 1.5299, "step": 4118 }, { "epoch": 0.42785914615144904, "grad_norm": 0.4173983633518219, "learning_rate": 6.12350986198525e-05, "loss": 1.6515, "step": 4119 }, { "epoch": 0.42796302067102937, "grad_norm": 0.4509902000427246, "learning_rate": 6.121919870545755e-05, "loss": 1.9308, "step": 4120 }, { "epoch": 0.42806689519060975, "grad_norm": 0.433244526386261, "learning_rate": 6.120329759630546e-05, "loss": 1.9438, "step": 4121 }, { "epoch": 0.4281707697101901, "grad_norm": 0.4360440671443939, "learning_rate": 6.118739529408956e-05, "loss": 1.8285, "step": 4122 }, { "epoch": 0.42827464422977046, "grad_norm": 0.44721856713294983, "learning_rate": 6.117149180050332e-05, "loss": 1.7948, "step": 4123 }, { "epoch": 0.4283785187493508, "grad_norm": 0.4097500741481781, "learning_rate": 6.115558711724036e-05, "loss": 1.6341, "step": 4124 }, { "epoch": 0.4284823932689311, "grad_norm": 0.3732147514820099, "learning_rate": 6.113968124599439e-05, "loss": 1.6512, "step": 4125 }, { "epoch": 0.4285862677885115, "grad_norm": 0.407043993473053, "learning_rate": 6.112377418845925e-05, "loss": 1.7734, "step": 4126 }, { "epoch": 0.4286901423080918, "grad_norm": 0.4379003047943115, "learning_rate": 6.110786594632892e-05, "loss": 1.8773, "step": 4127 }, { "epoch": 0.42879401682767215, "grad_norm": 0.49205613136291504, "learning_rate": 6.109195652129753e-05, "loss": 1.6428, "step": 4128 }, { "epoch": 0.42889789134725254, "grad_norm": 0.40176907181739807, "learning_rate": 6.107604591505928e-05, "loss": 1.5291, "step": 4129 }, { "epoch": 0.42900176586683286, "grad_norm": 0.4463737905025482, "learning_rate": 6.106013412930853e-05, "loss": 1.8709, "step": 4130 }, { "epoch": 0.4291056403864132, "grad_norm": 0.49634161591529846, "learning_rate": 6.10442211657398e-05, "loss": 2.1084, "step": 4131 }, { "epoch": 0.42920951490599357, "grad_norm": 0.3978555500507355, "learning_rate": 6.1028307026047624e-05, "loss": 1.7182, "step": 4132 }, { "epoch": 0.4293133894255739, "grad_norm": 0.39746221899986267, "learning_rate": 6.1012391711926795e-05, "loss": 1.7323, "step": 4133 }, { "epoch": 0.4294172639451542, "grad_norm": 0.39065563678741455, "learning_rate": 6.099647522507216e-05, "loss": 1.7496, "step": 4134 }, { "epoch": 0.4295211384647346, "grad_norm": 0.41617366671562195, "learning_rate": 6.098055756717866e-05, "loss": 1.781, "step": 4135 }, { "epoch": 0.42962501298431494, "grad_norm": 0.41195693612098694, "learning_rate": 6.0964638739941447e-05, "loss": 1.7705, "step": 4136 }, { "epoch": 0.4297288875038953, "grad_norm": 0.397640585899353, "learning_rate": 6.094871874505572e-05, "loss": 1.7518, "step": 4137 }, { "epoch": 0.42983276202347565, "grad_norm": 0.37663549184799194, "learning_rate": 6.093279758421687e-05, "loss": 1.6859, "step": 4138 }, { "epoch": 0.42993663654305597, "grad_norm": 0.3834432065486908, "learning_rate": 6.0916875259120366e-05, "loss": 1.5105, "step": 4139 }, { "epoch": 0.43004051106263635, "grad_norm": 0.4278203248977661, "learning_rate": 6.090095177146178e-05, "loss": 1.7621, "step": 4140 }, { "epoch": 0.4301443855822167, "grad_norm": 0.4155001938343048, "learning_rate": 6.088502712293687e-05, "loss": 1.8687, "step": 4141 }, { "epoch": 0.430248260101797, "grad_norm": 0.4091452658176422, "learning_rate": 6.086910131524147e-05, "loss": 1.6906, "step": 4142 }, { "epoch": 0.4303521346213774, "grad_norm": 0.4341062605381012, "learning_rate": 6.085317435007156e-05, "loss": 1.7431, "step": 4143 }, { "epoch": 0.4304560091409577, "grad_norm": 0.4048025906085968, "learning_rate": 6.0837246229123246e-05, "loss": 1.7628, "step": 4144 }, { "epoch": 0.43055988366053805, "grad_norm": 0.3721714913845062, "learning_rate": 6.0821316954092745e-05, "loss": 1.4422, "step": 4145 }, { "epoch": 0.43066375818011843, "grad_norm": 0.39783287048339844, "learning_rate": 6.08053865266764e-05, "loss": 1.6521, "step": 4146 }, { "epoch": 0.43076763269969875, "grad_norm": 0.44611310958862305, "learning_rate": 6.078945494857068e-05, "loss": 1.9144, "step": 4147 }, { "epoch": 0.43087150721927914, "grad_norm": 0.3994828462600708, "learning_rate": 6.077352222147214e-05, "loss": 1.6819, "step": 4148 }, { "epoch": 0.43097538173885946, "grad_norm": 0.4165269434452057, "learning_rate": 6.075758834707754e-05, "loss": 1.7586, "step": 4149 }, { "epoch": 0.4310792562584398, "grad_norm": 0.4540640413761139, "learning_rate": 6.0741653327083703e-05, "loss": 1.8504, "step": 4150 }, { "epoch": 0.4311831307780202, "grad_norm": 0.4224712550640106, "learning_rate": 6.072571716318756e-05, "loss": 1.7859, "step": 4151 }, { "epoch": 0.4312870052976005, "grad_norm": 0.4140393137931824, "learning_rate": 6.0709779857086214e-05, "loss": 1.6534, "step": 4152 }, { "epoch": 0.43139087981718083, "grad_norm": 0.37748152017593384, "learning_rate": 6.069384141047686e-05, "loss": 1.5697, "step": 4153 }, { "epoch": 0.4314947543367612, "grad_norm": 0.4263037145137787, "learning_rate": 6.067790182505678e-05, "loss": 1.6565, "step": 4154 }, { "epoch": 0.43159862885634154, "grad_norm": 0.38402366638183594, "learning_rate": 6.066196110252347e-05, "loss": 1.6159, "step": 4155 }, { "epoch": 0.43170250337592186, "grad_norm": 0.3785112798213959, "learning_rate": 6.064601924457447e-05, "loss": 1.4971, "step": 4156 }, { "epoch": 0.43180637789550225, "grad_norm": 0.3928913474082947, "learning_rate": 6.0630076252907445e-05, "loss": 1.6329, "step": 4157 }, { "epoch": 0.4319102524150826, "grad_norm": 0.42711085081100464, "learning_rate": 6.061413212922022e-05, "loss": 1.7333, "step": 4158 }, { "epoch": 0.4320141269346629, "grad_norm": 0.40235477685928345, "learning_rate": 6.059818687521074e-05, "loss": 1.6936, "step": 4159 }, { "epoch": 0.4321180014542433, "grad_norm": 0.4238249957561493, "learning_rate": 6.0582240492577015e-05, "loss": 1.7692, "step": 4160 }, { "epoch": 0.4322218759738236, "grad_norm": 0.4094812870025635, "learning_rate": 6.056629298301722e-05, "loss": 1.6549, "step": 4161 }, { "epoch": 0.432325750493404, "grad_norm": 0.4390838146209717, "learning_rate": 6.0550344348229626e-05, "loss": 1.8607, "step": 4162 }, { "epoch": 0.4324296250129843, "grad_norm": 0.3718215823173523, "learning_rate": 6.053439458991268e-05, "loss": 1.6305, "step": 4163 }, { "epoch": 0.43253349953256465, "grad_norm": 0.42819488048553467, "learning_rate": 6.051844370976487e-05, "loss": 1.5788, "step": 4164 }, { "epoch": 0.43263737405214503, "grad_norm": 0.37748944759368896, "learning_rate": 6.050249170948484e-05, "loss": 1.6356, "step": 4165 }, { "epoch": 0.43274124857172536, "grad_norm": 0.4419516623020172, "learning_rate": 6.048653859077138e-05, "loss": 1.7449, "step": 4166 }, { "epoch": 0.4328451230913057, "grad_norm": 0.40298861265182495, "learning_rate": 6.0470584355323375e-05, "loss": 1.648, "step": 4167 }, { "epoch": 0.43294899761088607, "grad_norm": 0.41812488436698914, "learning_rate": 6.045462900483978e-05, "loss": 1.8709, "step": 4168 }, { "epoch": 0.4330528721304664, "grad_norm": 0.4024185538291931, "learning_rate": 6.0438672541019755e-05, "loss": 1.7013, "step": 4169 }, { "epoch": 0.4331567466500467, "grad_norm": 0.3921603262424469, "learning_rate": 6.042271496556254e-05, "loss": 1.6795, "step": 4170 }, { "epoch": 0.4332606211696271, "grad_norm": 0.41133809089660645, "learning_rate": 6.040675628016746e-05, "loss": 1.7468, "step": 4171 }, { "epoch": 0.43336449568920743, "grad_norm": 0.4003753960132599, "learning_rate": 6.039079648653403e-05, "loss": 1.7661, "step": 4172 }, { "epoch": 0.4334683702087878, "grad_norm": 0.3852844536304474, "learning_rate": 6.037483558636183e-05, "loss": 1.6341, "step": 4173 }, { "epoch": 0.43357224472836814, "grad_norm": 0.40245676040649414, "learning_rate": 6.035887358135056e-05, "loss": 1.7788, "step": 4174 }, { "epoch": 0.43367611924794847, "grad_norm": 0.4260011613368988, "learning_rate": 6.0342910473200054e-05, "loss": 1.8262, "step": 4175 }, { "epoch": 0.43377999376752885, "grad_norm": 0.4148736298084259, "learning_rate": 6.032694626361025e-05, "loss": 1.7661, "step": 4176 }, { "epoch": 0.4338838682871092, "grad_norm": 0.38179653882980347, "learning_rate": 6.0310980954281234e-05, "loss": 1.5742, "step": 4177 }, { "epoch": 0.4339877428066895, "grad_norm": 0.4099988639354706, "learning_rate": 6.029501454691318e-05, "loss": 1.66, "step": 4178 }, { "epoch": 0.4340916173262699, "grad_norm": 0.3874553442001343, "learning_rate": 6.027904704320636e-05, "loss": 1.6484, "step": 4179 }, { "epoch": 0.4341954918458502, "grad_norm": 0.390216588973999, "learning_rate": 6.026307844486123e-05, "loss": 1.7898, "step": 4180 }, { "epoch": 0.43429936636543054, "grad_norm": 0.38214191794395447, "learning_rate": 6.024710875357831e-05, "loss": 1.6147, "step": 4181 }, { "epoch": 0.4344032408850109, "grad_norm": 0.37591472268104553, "learning_rate": 6.02311379710582e-05, "loss": 1.685, "step": 4182 }, { "epoch": 0.43450711540459125, "grad_norm": 0.42642828822135925, "learning_rate": 6.021516609900173e-05, "loss": 1.8715, "step": 4183 }, { "epoch": 0.4346109899241716, "grad_norm": 0.4037899672985077, "learning_rate": 6.019919313910976e-05, "loss": 1.6407, "step": 4184 }, { "epoch": 0.43471486444375196, "grad_norm": 0.40876585245132446, "learning_rate": 6.0183219093083244e-05, "loss": 1.6688, "step": 4185 }, { "epoch": 0.4348187389633323, "grad_norm": 0.4056430757045746, "learning_rate": 6.016724396262335e-05, "loss": 1.8047, "step": 4186 }, { "epoch": 0.43492261348291267, "grad_norm": 0.4015723168849945, "learning_rate": 6.015126774943128e-05, "loss": 1.7502, "step": 4187 }, { "epoch": 0.435026488002493, "grad_norm": 0.4126836359500885, "learning_rate": 6.013529045520838e-05, "loss": 1.8191, "step": 4188 }, { "epoch": 0.4351303625220733, "grad_norm": 0.39362239837646484, "learning_rate": 6.011931208165611e-05, "loss": 1.6365, "step": 4189 }, { "epoch": 0.4352342370416537, "grad_norm": 0.37851396203041077, "learning_rate": 6.0103332630476017e-05, "loss": 1.7451, "step": 4190 }, { "epoch": 0.43533811156123403, "grad_norm": 0.3897380530834198, "learning_rate": 6.008735210336982e-05, "loss": 1.7397, "step": 4191 }, { "epoch": 0.43544198608081436, "grad_norm": 0.38429588079452515, "learning_rate": 6.00713705020393e-05, "loss": 1.5615, "step": 4192 }, { "epoch": 0.43554586060039474, "grad_norm": 0.4072874188423157, "learning_rate": 6.005538782818638e-05, "loss": 1.8237, "step": 4193 }, { "epoch": 0.43564973511997507, "grad_norm": 0.4054878354072571, "learning_rate": 6.003940408351311e-05, "loss": 1.7932, "step": 4194 }, { "epoch": 0.4357536096395554, "grad_norm": 0.38141578435897827, "learning_rate": 6.0023419269721613e-05, "loss": 1.6768, "step": 4195 }, { "epoch": 0.4358574841591358, "grad_norm": 0.4173438251018524, "learning_rate": 6.000743338851413e-05, "loss": 1.513, "step": 4196 }, { "epoch": 0.4359613586787161, "grad_norm": 0.4424532651901245, "learning_rate": 5.999144644159307e-05, "loss": 1.7438, "step": 4197 }, { "epoch": 0.43606523319829643, "grad_norm": 0.43783673644065857, "learning_rate": 5.997545843066089e-05, "loss": 1.7892, "step": 4198 }, { "epoch": 0.4361691077178768, "grad_norm": 0.3705306649208069, "learning_rate": 5.995946935742019e-05, "loss": 1.6238, "step": 4199 }, { "epoch": 0.43627298223745714, "grad_norm": 0.38308608531951904, "learning_rate": 5.994347922357372e-05, "loss": 1.5195, "step": 4200 }, { "epoch": 0.4363768567570375, "grad_norm": 0.43744367361068726, "learning_rate": 5.992748803082425e-05, "loss": 1.9181, "step": 4201 }, { "epoch": 0.43648073127661785, "grad_norm": 0.42308345437049866, "learning_rate": 5.991149578087476e-05, "loss": 1.767, "step": 4202 }, { "epoch": 0.4365846057961982, "grad_norm": 0.4409851133823395, "learning_rate": 5.9895502475428265e-05, "loss": 1.8537, "step": 4203 }, { "epoch": 0.43668848031577856, "grad_norm": 0.377109169960022, "learning_rate": 5.9879508116187947e-05, "loss": 1.6446, "step": 4204 }, { "epoch": 0.4367923548353589, "grad_norm": 0.4228600263595581, "learning_rate": 5.9863512704857085e-05, "loss": 1.7255, "step": 4205 }, { "epoch": 0.4368962293549392, "grad_norm": 0.410178542137146, "learning_rate": 5.9847516243139055e-05, "loss": 1.6686, "step": 4206 }, { "epoch": 0.4370001038745196, "grad_norm": 0.37821877002716064, "learning_rate": 5.9831518732737344e-05, "loss": 1.563, "step": 4207 }, { "epoch": 0.4371039783940999, "grad_norm": 0.3783068358898163, "learning_rate": 5.981552017535561e-05, "loss": 1.6068, "step": 4208 }, { "epoch": 0.43720785291368025, "grad_norm": 0.4072893261909485, "learning_rate": 5.979952057269752e-05, "loss": 1.7952, "step": 4209 }, { "epoch": 0.43731172743326063, "grad_norm": 0.3989700376987457, "learning_rate": 5.9783519926466924e-05, "loss": 1.66, "step": 4210 }, { "epoch": 0.43741560195284096, "grad_norm": 0.4692555367946625, "learning_rate": 5.976751823836778e-05, "loss": 1.6255, "step": 4211 }, { "epoch": 0.43751947647242134, "grad_norm": 0.3792388439178467, "learning_rate": 5.975151551010412e-05, "loss": 1.5188, "step": 4212 }, { "epoch": 0.43762335099200167, "grad_norm": 0.38823702931404114, "learning_rate": 5.973551174338014e-05, "loss": 1.6951, "step": 4213 }, { "epoch": 0.437727225511582, "grad_norm": 0.37780869007110596, "learning_rate": 5.9719506939900094e-05, "loss": 1.5638, "step": 4214 }, { "epoch": 0.4378311000311624, "grad_norm": 0.45048758387565613, "learning_rate": 5.9703501101368374e-05, "loss": 1.8339, "step": 4215 }, { "epoch": 0.4379349745507427, "grad_norm": 0.3920362889766693, "learning_rate": 5.968749422948947e-05, "loss": 1.723, "step": 4216 }, { "epoch": 0.43803884907032303, "grad_norm": 0.3886549770832062, "learning_rate": 5.967148632596801e-05, "loss": 1.6502, "step": 4217 }, { "epoch": 0.4381427235899034, "grad_norm": 0.4173612594604492, "learning_rate": 5.965547739250867e-05, "loss": 1.8315, "step": 4218 }, { "epoch": 0.43824659810948374, "grad_norm": 0.45074644684791565, "learning_rate": 5.963946743081633e-05, "loss": 1.7273, "step": 4219 }, { "epoch": 0.43835047262906407, "grad_norm": 0.42388424277305603, "learning_rate": 5.962345644259589e-05, "loss": 1.7344, "step": 4220 }, { "epoch": 0.43845434714864445, "grad_norm": 0.44487425684928894, "learning_rate": 5.9607444429552405e-05, "loss": 1.771, "step": 4221 }, { "epoch": 0.4385582216682248, "grad_norm": 0.4247962236404419, "learning_rate": 5.959143139339104e-05, "loss": 1.7933, "step": 4222 }, { "epoch": 0.4386620961878051, "grad_norm": 0.4087913930416107, "learning_rate": 5.957541733581704e-05, "loss": 1.667, "step": 4223 }, { "epoch": 0.4387659707073855, "grad_norm": 0.3898204267024994, "learning_rate": 5.955940225853577e-05, "loss": 1.6835, "step": 4224 }, { "epoch": 0.4388698452269658, "grad_norm": 0.3906581997871399, "learning_rate": 5.9543386163252744e-05, "loss": 1.7532, "step": 4225 }, { "epoch": 0.4389737197465462, "grad_norm": 0.37566760182380676, "learning_rate": 5.952736905167352e-05, "loss": 1.5813, "step": 4226 }, { "epoch": 0.4390775942661265, "grad_norm": 0.42560824751853943, "learning_rate": 5.951135092550382e-05, "loss": 1.8791, "step": 4227 }, { "epoch": 0.43918146878570685, "grad_norm": 0.4161139130592346, "learning_rate": 5.949533178644943e-05, "loss": 1.6862, "step": 4228 }, { "epoch": 0.43928534330528723, "grad_norm": 0.398384690284729, "learning_rate": 5.9479311636216274e-05, "loss": 1.7341, "step": 4229 }, { "epoch": 0.43938921782486756, "grad_norm": 0.36606839299201965, "learning_rate": 5.946329047651037e-05, "loss": 1.6987, "step": 4230 }, { "epoch": 0.4394930923444479, "grad_norm": 0.41310790181159973, "learning_rate": 5.944726830903785e-05, "loss": 1.7809, "step": 4231 }, { "epoch": 0.43959696686402827, "grad_norm": 0.41169822216033936, "learning_rate": 5.9431245135504934e-05, "loss": 1.7338, "step": 4232 }, { "epoch": 0.4397008413836086, "grad_norm": 0.3810160756111145, "learning_rate": 5.941522095761799e-05, "loss": 1.5948, "step": 4233 }, { "epoch": 0.4398047159031889, "grad_norm": 0.3669174909591675, "learning_rate": 5.939919577708346e-05, "loss": 1.2893, "step": 4234 }, { "epoch": 0.4399085904227693, "grad_norm": 0.4046458601951599, "learning_rate": 5.938316959560788e-05, "loss": 1.6906, "step": 4235 }, { "epoch": 0.44001246494234963, "grad_norm": 0.40834760665893555, "learning_rate": 5.9367142414897945e-05, "loss": 1.7074, "step": 4236 }, { "epoch": 0.44011633946192996, "grad_norm": 0.425662636756897, "learning_rate": 5.935111423666041e-05, "loss": 1.7486, "step": 4237 }, { "epoch": 0.44022021398151034, "grad_norm": 0.37841665744781494, "learning_rate": 5.933508506260214e-05, "loss": 1.7472, "step": 4238 }, { "epoch": 0.44032408850109067, "grad_norm": 0.4464881420135498, "learning_rate": 5.9319054894430126e-05, "loss": 1.8697, "step": 4239 }, { "epoch": 0.44042796302067105, "grad_norm": 0.4150855541229248, "learning_rate": 5.930302373385145e-05, "loss": 1.5873, "step": 4240 }, { "epoch": 0.4405318375402514, "grad_norm": 0.38565129041671753, "learning_rate": 5.928699158257333e-05, "loss": 1.71, "step": 4241 }, { "epoch": 0.4406357120598317, "grad_norm": 0.44895511865615845, "learning_rate": 5.9270958442303035e-05, "loss": 1.8128, "step": 4242 }, { "epoch": 0.4407395865794121, "grad_norm": 0.3845813572406769, "learning_rate": 5.9254924314748e-05, "loss": 1.7892, "step": 4243 }, { "epoch": 0.4408434610989924, "grad_norm": 0.4151462912559509, "learning_rate": 5.923888920161571e-05, "loss": 1.7573, "step": 4244 }, { "epoch": 0.44094733561857274, "grad_norm": 0.38922443985939026, "learning_rate": 5.922285310461378e-05, "loss": 1.7848, "step": 4245 }, { "epoch": 0.4410512101381531, "grad_norm": 0.38882389664649963, "learning_rate": 5.920681602544993e-05, "loss": 1.7333, "step": 4246 }, { "epoch": 0.44115508465773345, "grad_norm": 0.38732343912124634, "learning_rate": 5.9190777965832e-05, "loss": 1.6265, "step": 4247 }, { "epoch": 0.4412589591773138, "grad_norm": 0.3835653066635132, "learning_rate": 5.917473892746791e-05, "loss": 1.5993, "step": 4248 }, { "epoch": 0.44136283369689416, "grad_norm": 0.41864073276519775, "learning_rate": 5.915869891206567e-05, "loss": 1.7911, "step": 4249 }, { "epoch": 0.4414667082164745, "grad_norm": 0.43092262744903564, "learning_rate": 5.9142657921333466e-05, "loss": 1.487, "step": 4250 }, { "epoch": 0.4415705827360549, "grad_norm": 0.3756438195705414, "learning_rate": 5.912661595697948e-05, "loss": 1.5246, "step": 4251 }, { "epoch": 0.4416744572556352, "grad_norm": 0.3791937828063965, "learning_rate": 5.91105730207121e-05, "loss": 1.7199, "step": 4252 }, { "epoch": 0.4417783317752155, "grad_norm": 0.381509393453598, "learning_rate": 5.909452911423976e-05, "loss": 1.6264, "step": 4253 }, { "epoch": 0.4418822062947959, "grad_norm": 0.3744488060474396, "learning_rate": 5.9078484239271e-05, "loss": 1.5363, "step": 4254 }, { "epoch": 0.44198608081437624, "grad_norm": 0.3968122601509094, "learning_rate": 5.9062438397514496e-05, "loss": 1.7037, "step": 4255 }, { "epoch": 0.44208995533395656, "grad_norm": 0.4114841818809509, "learning_rate": 5.904639159067898e-05, "loss": 1.7508, "step": 4256 }, { "epoch": 0.44219382985353695, "grad_norm": 0.37198910117149353, "learning_rate": 5.9030343820473335e-05, "loss": 1.7378, "step": 4257 }, { "epoch": 0.4422977043731173, "grad_norm": 0.47801533341407776, "learning_rate": 5.9014295088606505e-05, "loss": 1.838, "step": 4258 }, { "epoch": 0.4424015788926976, "grad_norm": 0.43482860922813416, "learning_rate": 5.899824539678758e-05, "loss": 1.8126, "step": 4259 }, { "epoch": 0.442505453412278, "grad_norm": 0.3933789134025574, "learning_rate": 5.898219474672568e-05, "loss": 1.5948, "step": 4260 }, { "epoch": 0.4426093279318583, "grad_norm": 0.369711697101593, "learning_rate": 5.896614314013012e-05, "loss": 1.5256, "step": 4261 }, { "epoch": 0.44271320245143864, "grad_norm": 0.43509915471076965, "learning_rate": 5.8950090578710246e-05, "loss": 1.762, "step": 4262 }, { "epoch": 0.442817076971019, "grad_norm": 0.4086291193962097, "learning_rate": 5.893403706417554e-05, "loss": 1.8018, "step": 4263 }, { "epoch": 0.44292095149059935, "grad_norm": 0.38804736733436584, "learning_rate": 5.89179825982356e-05, "loss": 1.717, "step": 4264 }, { "epoch": 0.44302482601017973, "grad_norm": 0.4058080017566681, "learning_rate": 5.8901927182600035e-05, "loss": 1.8017, "step": 4265 }, { "epoch": 0.44312870052976006, "grad_norm": 0.381388783454895, "learning_rate": 5.88858708189787e-05, "loss": 1.8363, "step": 4266 }, { "epoch": 0.4432325750493404, "grad_norm": 0.3509836196899414, "learning_rate": 5.886981350908142e-05, "loss": 1.5213, "step": 4267 }, { "epoch": 0.44333644956892077, "grad_norm": 0.39383694529533386, "learning_rate": 5.885375525461817e-05, "loss": 1.6166, "step": 4268 }, { "epoch": 0.4434403240885011, "grad_norm": 0.4015539586544037, "learning_rate": 5.883769605729907e-05, "loss": 1.7148, "step": 4269 }, { "epoch": 0.4435441986080814, "grad_norm": 0.3899979293346405, "learning_rate": 5.882163591883427e-05, "loss": 1.5109, "step": 4270 }, { "epoch": 0.4436480731276618, "grad_norm": 0.41530346870422363, "learning_rate": 5.8805574840934067e-05, "loss": 1.6773, "step": 4271 }, { "epoch": 0.44375194764724213, "grad_norm": 0.4475281536579132, "learning_rate": 5.878951282530881e-05, "loss": 1.6551, "step": 4272 }, { "epoch": 0.44385582216682246, "grad_norm": 0.442122220993042, "learning_rate": 5.877344987366902e-05, "loss": 1.7879, "step": 4273 }, { "epoch": 0.44395969668640284, "grad_norm": 0.39350566267967224, "learning_rate": 5.875738598772522e-05, "loss": 1.705, "step": 4274 }, { "epoch": 0.44406357120598317, "grad_norm": 0.43882155418395996, "learning_rate": 5.874132116918816e-05, "loss": 1.7977, "step": 4275 }, { "epoch": 0.4441674457255635, "grad_norm": 0.42583081126213074, "learning_rate": 5.8725255419768565e-05, "loss": 1.6302, "step": 4276 }, { "epoch": 0.4442713202451439, "grad_norm": 0.40676143765449524, "learning_rate": 5.870918874117731e-05, "loss": 1.8449, "step": 4277 }, { "epoch": 0.4443751947647242, "grad_norm": 0.42294740676879883, "learning_rate": 5.869312113512542e-05, "loss": 1.7231, "step": 4278 }, { "epoch": 0.4444790692843046, "grad_norm": 0.41314196586608887, "learning_rate": 5.867705260332391e-05, "loss": 1.6797, "step": 4279 }, { "epoch": 0.4445829438038849, "grad_norm": 0.395781934261322, "learning_rate": 5.866098314748401e-05, "loss": 1.7341, "step": 4280 }, { "epoch": 0.44468681832346524, "grad_norm": 0.412775456905365, "learning_rate": 5.864491276931694e-05, "loss": 1.685, "step": 4281 }, { "epoch": 0.4447906928430456, "grad_norm": 0.38872653245925903, "learning_rate": 5.862884147053409e-05, "loss": 1.7268, "step": 4282 }, { "epoch": 0.44489456736262595, "grad_norm": 0.36840468645095825, "learning_rate": 5.861276925284694e-05, "loss": 1.5863, "step": 4283 }, { "epoch": 0.4449984418822063, "grad_norm": 0.37796318531036377, "learning_rate": 5.8596696117967054e-05, "loss": 1.6242, "step": 4284 }, { "epoch": 0.44510231640178666, "grad_norm": 0.3686732351779938, "learning_rate": 5.8580622067606083e-05, "loss": 1.5881, "step": 4285 }, { "epoch": 0.445206190921367, "grad_norm": 0.4148024320602417, "learning_rate": 5.8564547103475795e-05, "loss": 1.815, "step": 4286 }, { "epoch": 0.4453100654409473, "grad_norm": 0.39075860381126404, "learning_rate": 5.854847122728805e-05, "loss": 1.6837, "step": 4287 }, { "epoch": 0.4454139399605277, "grad_norm": 0.40239056944847107, "learning_rate": 5.853239444075479e-05, "loss": 1.7351, "step": 4288 }, { "epoch": 0.445517814480108, "grad_norm": 0.4210759401321411, "learning_rate": 5.8516316745588085e-05, "loss": 1.6621, "step": 4289 }, { "epoch": 0.4456216889996884, "grad_norm": 0.39281001687049866, "learning_rate": 5.850023814350006e-05, "loss": 1.7465, "step": 4290 }, { "epoch": 0.44572556351926873, "grad_norm": 0.4055309295654297, "learning_rate": 5.848415863620299e-05, "loss": 1.6812, "step": 4291 }, { "epoch": 0.44582943803884906, "grad_norm": 0.3883981704711914, "learning_rate": 5.846807822540922e-05, "loss": 1.6243, "step": 4292 }, { "epoch": 0.44593331255842944, "grad_norm": 0.394539475440979, "learning_rate": 5.8451996912831156e-05, "loss": 1.7954, "step": 4293 }, { "epoch": 0.44603718707800977, "grad_norm": 0.40347257256507874, "learning_rate": 5.8435914700181347e-05, "loss": 1.6049, "step": 4294 }, { "epoch": 0.4461410615975901, "grad_norm": 0.47316792607307434, "learning_rate": 5.8419831589172426e-05, "loss": 1.9193, "step": 4295 }, { "epoch": 0.4462449361171705, "grad_norm": 0.427937775850296, "learning_rate": 5.840374758151711e-05, "loss": 1.6742, "step": 4296 }, { "epoch": 0.4463488106367508, "grad_norm": 0.44503235816955566, "learning_rate": 5.838766267892825e-05, "loss": 1.9509, "step": 4297 }, { "epoch": 0.44645268515633113, "grad_norm": 0.4180712103843689, "learning_rate": 5.837157688311873e-05, "loss": 1.8231, "step": 4298 }, { "epoch": 0.4465565596759115, "grad_norm": 0.4312383532524109, "learning_rate": 5.8355490195801566e-05, "loss": 1.7526, "step": 4299 }, { "epoch": 0.44666043419549184, "grad_norm": 0.38129255175590515, "learning_rate": 5.8339402618689885e-05, "loss": 1.749, "step": 4300 }, { "epoch": 0.44676430871507217, "grad_norm": 0.47581759095191956, "learning_rate": 5.832331415349687e-05, "loss": 2.0137, "step": 4301 }, { "epoch": 0.44686818323465255, "grad_norm": 0.3956213593482971, "learning_rate": 5.83072248019358e-05, "loss": 1.7437, "step": 4302 }, { "epoch": 0.4469720577542329, "grad_norm": 0.42220327258110046, "learning_rate": 5.829113456572012e-05, "loss": 1.9, "step": 4303 }, { "epoch": 0.44707593227381326, "grad_norm": 0.39744284749031067, "learning_rate": 5.827504344656326e-05, "loss": 1.5549, "step": 4304 }, { "epoch": 0.4471798067933936, "grad_norm": 0.3954363763332367, "learning_rate": 5.825895144617883e-05, "loss": 1.7203, "step": 4305 }, { "epoch": 0.4472836813129739, "grad_norm": 0.39236709475517273, "learning_rate": 5.82428585662805e-05, "loss": 1.7229, "step": 4306 }, { "epoch": 0.4473875558325543, "grad_norm": 0.3697808086872101, "learning_rate": 5.822676480858201e-05, "loss": 1.6764, "step": 4307 }, { "epoch": 0.4474914303521346, "grad_norm": 0.45007967948913574, "learning_rate": 5.821067017479727e-05, "loss": 1.8381, "step": 4308 }, { "epoch": 0.44759530487171495, "grad_norm": 0.49184536933898926, "learning_rate": 5.819457466664018e-05, "loss": 2.1298, "step": 4309 }, { "epoch": 0.44769917939129533, "grad_norm": 0.3896716833114624, "learning_rate": 5.81784782858248e-05, "loss": 1.7621, "step": 4310 }, { "epoch": 0.44780305391087566, "grad_norm": 0.39417698979377747, "learning_rate": 5.816238103406529e-05, "loss": 1.7275, "step": 4311 }, { "epoch": 0.447906928430456, "grad_norm": 0.45076748728752136, "learning_rate": 5.814628291307587e-05, "loss": 1.7021, "step": 4312 }, { "epoch": 0.44801080295003637, "grad_norm": 0.43015792965888977, "learning_rate": 5.813018392457086e-05, "loss": 1.8654, "step": 4313 }, { "epoch": 0.4481146774696167, "grad_norm": 0.3831145167350769, "learning_rate": 5.8114084070264686e-05, "loss": 1.7, "step": 4314 }, { "epoch": 0.448218551989197, "grad_norm": 0.3929171860218048, "learning_rate": 5.8097983351871844e-05, "loss": 1.611, "step": 4315 }, { "epoch": 0.4483224265087774, "grad_norm": 0.41042855381965637, "learning_rate": 5.808188177110694e-05, "loss": 2.0493, "step": 4316 }, { "epoch": 0.44842630102835773, "grad_norm": 0.4465937316417694, "learning_rate": 5.806577932968467e-05, "loss": 1.8463, "step": 4317 }, { "epoch": 0.4485301755479381, "grad_norm": 0.4185175597667694, "learning_rate": 5.804967602931981e-05, "loss": 1.649, "step": 4318 }, { "epoch": 0.44863405006751844, "grad_norm": 0.413787841796875, "learning_rate": 5.803357187172726e-05, "loss": 1.8159, "step": 4319 }, { "epoch": 0.44873792458709877, "grad_norm": 0.4084557890892029, "learning_rate": 5.801746685862197e-05, "loss": 1.6689, "step": 4320 }, { "epoch": 0.44884179910667915, "grad_norm": 0.4004693329334259, "learning_rate": 5.8001360991718976e-05, "loss": 1.5459, "step": 4321 }, { "epoch": 0.4489456736262595, "grad_norm": 0.40109360218048096, "learning_rate": 5.798525427273347e-05, "loss": 1.6249, "step": 4322 }, { "epoch": 0.4490495481458398, "grad_norm": 0.4452827274799347, "learning_rate": 5.796914670338067e-05, "loss": 1.7092, "step": 4323 }, { "epoch": 0.4491534226654202, "grad_norm": 0.4333063066005707, "learning_rate": 5.795303828537589e-05, "loss": 1.8151, "step": 4324 }, { "epoch": 0.4492572971850005, "grad_norm": 0.4180525243282318, "learning_rate": 5.7936929020434585e-05, "loss": 1.7309, "step": 4325 }, { "epoch": 0.44936117170458084, "grad_norm": 0.40769898891448975, "learning_rate": 5.792081891027224e-05, "loss": 1.7786, "step": 4326 }, { "epoch": 0.4494650462241612, "grad_norm": 0.46367326378822327, "learning_rate": 5.790470795660447e-05, "loss": 1.9491, "step": 4327 }, { "epoch": 0.44956892074374155, "grad_norm": 0.40147140622138977, "learning_rate": 5.788859616114697e-05, "loss": 1.7364, "step": 4328 }, { "epoch": 0.44967279526332193, "grad_norm": 0.4153495132923126, "learning_rate": 5.787248352561549e-05, "loss": 1.7383, "step": 4329 }, { "epoch": 0.44977666978290226, "grad_norm": 0.40633097290992737, "learning_rate": 5.785637005172595e-05, "loss": 1.4243, "step": 4330 }, { "epoch": 0.4498805443024826, "grad_norm": 0.3852026164531708, "learning_rate": 5.784025574119427e-05, "loss": 1.6868, "step": 4331 }, { "epoch": 0.44998441882206297, "grad_norm": 0.3848346769809723, "learning_rate": 5.78241405957365e-05, "loss": 1.5136, "step": 4332 }, { "epoch": 0.4500882933416433, "grad_norm": 0.3910854160785675, "learning_rate": 5.78080246170688e-05, "loss": 1.6864, "step": 4333 }, { "epoch": 0.4501921678612236, "grad_norm": 0.3685462176799774, "learning_rate": 5.7791907806907395e-05, "loss": 1.6707, "step": 4334 }, { "epoch": 0.450296042380804, "grad_norm": 0.4090143144130707, "learning_rate": 5.777579016696856e-05, "loss": 1.5942, "step": 4335 }, { "epoch": 0.45039991690038433, "grad_norm": 0.377214252948761, "learning_rate": 5.7759671698968745e-05, "loss": 1.5599, "step": 4336 }, { "epoch": 0.45050379141996466, "grad_norm": 0.41875123977661133, "learning_rate": 5.774355240462441e-05, "loss": 1.7982, "step": 4337 }, { "epoch": 0.45060766593954504, "grad_norm": 0.3935871422290802, "learning_rate": 5.772743228565215e-05, "loss": 1.7028, "step": 4338 }, { "epoch": 0.45071154045912537, "grad_norm": 0.406841903924942, "learning_rate": 5.771131134376863e-05, "loss": 1.7354, "step": 4339 }, { "epoch": 0.4508154149787057, "grad_norm": 0.37955769896507263, "learning_rate": 5.76951895806906e-05, "loss": 1.647, "step": 4340 }, { "epoch": 0.4509192894982861, "grad_norm": 0.42771968245506287, "learning_rate": 5.76790669981349e-05, "loss": 1.825, "step": 4341 }, { "epoch": 0.4510231640178664, "grad_norm": 0.3990226686000824, "learning_rate": 5.766294359781848e-05, "loss": 1.611, "step": 4342 }, { "epoch": 0.4511270385374468, "grad_norm": 0.4627864360809326, "learning_rate": 5.764681938145832e-05, "loss": 1.7403, "step": 4343 }, { "epoch": 0.4512309130570271, "grad_norm": 0.3694743812084198, "learning_rate": 5.763069435077155e-05, "loss": 1.5556, "step": 4344 }, { "epoch": 0.45133478757660744, "grad_norm": 0.37064674496650696, "learning_rate": 5.761456850747536e-05, "loss": 1.6155, "step": 4345 }, { "epoch": 0.4514386620961878, "grad_norm": 0.37530389428138733, "learning_rate": 5.759844185328701e-05, "loss": 1.462, "step": 4346 }, { "epoch": 0.45154253661576815, "grad_norm": 0.4151467978954315, "learning_rate": 5.7582314389923876e-05, "loss": 1.6177, "step": 4347 }, { "epoch": 0.4516464111353485, "grad_norm": 0.4127391278743744, "learning_rate": 5.756618611910343e-05, "loss": 1.782, "step": 4348 }, { "epoch": 0.45175028565492886, "grad_norm": 0.41236239671707153, "learning_rate": 5.7550057042543137e-05, "loss": 1.7076, "step": 4349 }, { "epoch": 0.4518541601745092, "grad_norm": 0.3952932357788086, "learning_rate": 5.753392716196069e-05, "loss": 1.6074, "step": 4350 }, { "epoch": 0.4519580346940895, "grad_norm": 0.5258936285972595, "learning_rate": 5.751779647907376e-05, "loss": 1.8871, "step": 4351 }, { "epoch": 0.4520619092136699, "grad_norm": 0.3987799882888794, "learning_rate": 5.7501664995600134e-05, "loss": 1.7228, "step": 4352 }, { "epoch": 0.4521657837332502, "grad_norm": 0.3863909840583801, "learning_rate": 5.748553271325772e-05, "loss": 1.5237, "step": 4353 }, { "epoch": 0.45226965825283055, "grad_norm": 0.40286675095558167, "learning_rate": 5.746939963376445e-05, "loss": 1.6773, "step": 4354 }, { "epoch": 0.45237353277241094, "grad_norm": 0.4835149645805359, "learning_rate": 5.745326575883839e-05, "loss": 1.7949, "step": 4355 }, { "epoch": 0.45247740729199126, "grad_norm": 0.3882110118865967, "learning_rate": 5.743713109019766e-05, "loss": 1.6454, "step": 4356 }, { "epoch": 0.45258128181157165, "grad_norm": 0.4016772508621216, "learning_rate": 5.742099562956048e-05, "loss": 1.6265, "step": 4357 }, { "epoch": 0.45268515633115197, "grad_norm": 0.4201604425907135, "learning_rate": 5.740485937864515e-05, "loss": 1.6872, "step": 4358 }, { "epoch": 0.4527890308507323, "grad_norm": 0.45017364621162415, "learning_rate": 5.7388722339170066e-05, "loss": 1.8138, "step": 4359 }, { "epoch": 0.4528929053703127, "grad_norm": 0.39483213424682617, "learning_rate": 5.7372584512853665e-05, "loss": 1.5788, "step": 4360 }, { "epoch": 0.452996779889893, "grad_norm": 0.4778917133808136, "learning_rate": 5.7356445901414545e-05, "loss": 1.8203, "step": 4361 }, { "epoch": 0.45310065440947334, "grad_norm": 0.3956323564052582, "learning_rate": 5.734030650657132e-05, "loss": 1.6614, "step": 4362 }, { "epoch": 0.4532045289290537, "grad_norm": 0.42811745405197144, "learning_rate": 5.73241663300427e-05, "loss": 1.851, "step": 4363 }, { "epoch": 0.45330840344863405, "grad_norm": 0.39979517459869385, "learning_rate": 5.730802537354749e-05, "loss": 1.6255, "step": 4364 }, { "epoch": 0.45341227796821437, "grad_norm": 0.4086802899837494, "learning_rate": 5.729188363880459e-05, "loss": 1.4243, "step": 4365 }, { "epoch": 0.45351615248779475, "grad_norm": 0.43692517280578613, "learning_rate": 5.727574112753296e-05, "loss": 1.8354, "step": 4366 }, { "epoch": 0.4536200270073751, "grad_norm": 0.37770721316337585, "learning_rate": 5.725959784145165e-05, "loss": 1.5944, "step": 4367 }, { "epoch": 0.45372390152695546, "grad_norm": 0.4468797445297241, "learning_rate": 5.72434537822798e-05, "loss": 1.7066, "step": 4368 }, { "epoch": 0.4538277760465358, "grad_norm": 0.4045311510562897, "learning_rate": 5.722730895173662e-05, "loss": 1.7554, "step": 4369 }, { "epoch": 0.4539316505661161, "grad_norm": 0.40388810634613037, "learning_rate": 5.721116335154142e-05, "loss": 1.4915, "step": 4370 }, { "epoch": 0.4540355250856965, "grad_norm": 0.4059275686740875, "learning_rate": 5.719501698341356e-05, "loss": 1.6267, "step": 4371 }, { "epoch": 0.45413939960527683, "grad_norm": 0.3910108804702759, "learning_rate": 5.7178869849072526e-05, "loss": 1.6264, "step": 4372 }, { "epoch": 0.45424327412485715, "grad_norm": 0.445516437292099, "learning_rate": 5.716272195023785e-05, "loss": 1.7784, "step": 4373 }, { "epoch": 0.45434714864443754, "grad_norm": 0.4089578688144684, "learning_rate": 5.7146573288629145e-05, "loss": 1.7885, "step": 4374 }, { "epoch": 0.45445102316401786, "grad_norm": 0.4145938754081726, "learning_rate": 5.713042386596614e-05, "loss": 1.6938, "step": 4375 }, { "epoch": 0.4545548976835982, "grad_norm": 0.38517993688583374, "learning_rate": 5.7114273683968625e-05, "loss": 1.5404, "step": 4376 }, { "epoch": 0.4546587722031786, "grad_norm": 0.40502604842185974, "learning_rate": 5.709812274435643e-05, "loss": 1.6948, "step": 4377 }, { "epoch": 0.4547626467227589, "grad_norm": 0.3674617409706116, "learning_rate": 5.708197104884955e-05, "loss": 1.6687, "step": 4378 }, { "epoch": 0.45486652124233923, "grad_norm": 0.38975459337234497, "learning_rate": 5.706581859916799e-05, "loss": 1.5973, "step": 4379 }, { "epoch": 0.4549703957619196, "grad_norm": 0.3994758725166321, "learning_rate": 5.704966539703185e-05, "loss": 1.6266, "step": 4380 }, { "epoch": 0.45507427028149994, "grad_norm": 0.39510855078697205, "learning_rate": 5.7033511444161355e-05, "loss": 1.7119, "step": 4381 }, { "epoch": 0.4551781448010803, "grad_norm": 0.4508286416530609, "learning_rate": 5.701735674227675e-05, "loss": 1.9857, "step": 4382 }, { "epoch": 0.45528201932066065, "grad_norm": 0.4357827603816986, "learning_rate": 5.7001201293098386e-05, "loss": 1.9188, "step": 4383 }, { "epoch": 0.455385893840241, "grad_norm": 0.4613460898399353, "learning_rate": 5.69850450983467e-05, "loss": 1.7152, "step": 4384 }, { "epoch": 0.45548976835982136, "grad_norm": 0.43691280484199524, "learning_rate": 5.6968888159742184e-05, "loss": 1.7644, "step": 4385 }, { "epoch": 0.4555936428794017, "grad_norm": 0.40065327286720276, "learning_rate": 5.695273047900544e-05, "loss": 1.6811, "step": 4386 }, { "epoch": 0.455697517398982, "grad_norm": 0.4466818869113922, "learning_rate": 5.693657205785715e-05, "loss": 1.8516, "step": 4387 }, { "epoch": 0.4558013919185624, "grad_norm": 0.4043349623680115, "learning_rate": 5.6920412898018026e-05, "loss": 1.8134, "step": 4388 }, { "epoch": 0.4559052664381427, "grad_norm": 0.42350390553474426, "learning_rate": 5.690425300120893e-05, "loss": 1.8296, "step": 4389 }, { "epoch": 0.45600914095772305, "grad_norm": 0.38826295733451843, "learning_rate": 5.6888092369150734e-05, "loss": 1.7926, "step": 4390 }, { "epoch": 0.45611301547730343, "grad_norm": 0.39671480655670166, "learning_rate": 5.6871931003564414e-05, "loss": 1.8088, "step": 4391 }, { "epoch": 0.45621688999688376, "grad_norm": 0.38095131516456604, "learning_rate": 5.685576890617107e-05, "loss": 1.5183, "step": 4392 }, { "epoch": 0.45632076451646414, "grad_norm": 0.37252774834632874, "learning_rate": 5.683960607869179e-05, "loss": 1.6363, "step": 4393 }, { "epoch": 0.45642463903604447, "grad_norm": 0.43346065282821655, "learning_rate": 5.6823442522847835e-05, "loss": 1.5937, "step": 4394 }, { "epoch": 0.4565285135556248, "grad_norm": 0.3876819610595703, "learning_rate": 5.680727824036046e-05, "loss": 1.723, "step": 4395 }, { "epoch": 0.4566323880752052, "grad_norm": 0.3917519152164459, "learning_rate": 5.6791113232951063e-05, "loss": 1.5769, "step": 4396 }, { "epoch": 0.4567362625947855, "grad_norm": 0.4223952889442444, "learning_rate": 5.677494750234108e-05, "loss": 1.7898, "step": 4397 }, { "epoch": 0.45684013711436583, "grad_norm": 0.39051714539527893, "learning_rate": 5.675878105025203e-05, "loss": 1.7079, "step": 4398 }, { "epoch": 0.4569440116339462, "grad_norm": 0.3843806982040405, "learning_rate": 5.674261387840551e-05, "loss": 1.5216, "step": 4399 }, { "epoch": 0.45704788615352654, "grad_norm": 0.4141395092010498, "learning_rate": 5.6726445988523224e-05, "loss": 1.6415, "step": 4400 }, { "epoch": 0.45715176067310687, "grad_norm": 0.4137116074562073, "learning_rate": 5.67102773823269e-05, "loss": 1.7626, "step": 4401 }, { "epoch": 0.45725563519268725, "grad_norm": 0.3695942461490631, "learning_rate": 5.669410806153838e-05, "loss": 1.6487, "step": 4402 }, { "epoch": 0.4573595097122676, "grad_norm": 0.3959349989891052, "learning_rate": 5.667793802787957e-05, "loss": 1.7431, "step": 4403 }, { "epoch": 0.4574633842318479, "grad_norm": 0.39805370569229126, "learning_rate": 5.6661767283072444e-05, "loss": 1.6595, "step": 4404 }, { "epoch": 0.4575672587514283, "grad_norm": 0.38343602418899536, "learning_rate": 5.664559582883906e-05, "loss": 1.8235, "step": 4405 }, { "epoch": 0.4576711332710086, "grad_norm": 0.4133247435092926, "learning_rate": 5.662942366690157e-05, "loss": 1.7427, "step": 4406 }, { "epoch": 0.457775007790589, "grad_norm": 0.44640305638313293, "learning_rate": 5.6613250798982156e-05, "loss": 1.7745, "step": 4407 }, { "epoch": 0.4578788823101693, "grad_norm": 0.4178631603717804, "learning_rate": 5.659707722680313e-05, "loss": 1.7777, "step": 4408 }, { "epoch": 0.45798275682974965, "grad_norm": 0.44724735617637634, "learning_rate": 5.6580902952086836e-05, "loss": 1.5845, "step": 4409 }, { "epoch": 0.45808663134933003, "grad_norm": 0.39849719405174255, "learning_rate": 5.656472797655571e-05, "loss": 1.5669, "step": 4410 }, { "epoch": 0.45819050586891036, "grad_norm": 0.3746553957462311, "learning_rate": 5.6548552301932265e-05, "loss": 1.5851, "step": 4411 }, { "epoch": 0.4582943803884907, "grad_norm": 0.4282001852989197, "learning_rate": 5.6532375929939075e-05, "loss": 1.8875, "step": 4412 }, { "epoch": 0.45839825490807107, "grad_norm": 0.3972803056240082, "learning_rate": 5.65161988622988e-05, "loss": 1.6262, "step": 4413 }, { "epoch": 0.4585021294276514, "grad_norm": 0.38488489389419556, "learning_rate": 5.650002110073418e-05, "loss": 1.5766, "step": 4414 }, { "epoch": 0.4586060039472317, "grad_norm": 0.45945605635643005, "learning_rate": 5.648384264696802e-05, "loss": 1.5854, "step": 4415 }, { "epoch": 0.4587098784668121, "grad_norm": 0.3760433793067932, "learning_rate": 5.6467663502723175e-05, "loss": 1.6934, "step": 4416 }, { "epoch": 0.45881375298639243, "grad_norm": 0.38754063844680786, "learning_rate": 5.645148366972264e-05, "loss": 1.7554, "step": 4417 }, { "epoch": 0.45891762750597276, "grad_norm": 0.388735830783844, "learning_rate": 5.643530314968941e-05, "loss": 1.4804, "step": 4418 }, { "epoch": 0.45902150202555314, "grad_norm": 0.39035165309906006, "learning_rate": 5.6419121944346585e-05, "loss": 1.6639, "step": 4419 }, { "epoch": 0.45912537654513347, "grad_norm": 0.3769480586051941, "learning_rate": 5.640294005541735e-05, "loss": 1.7626, "step": 4420 }, { "epoch": 0.45922925106471385, "grad_norm": 0.41671907901763916, "learning_rate": 5.638675748462493e-05, "loss": 1.7363, "step": 4421 }, { "epoch": 0.4593331255842942, "grad_norm": 0.4125288426876068, "learning_rate": 5.637057423369268e-05, "loss": 1.725, "step": 4422 }, { "epoch": 0.4594370001038745, "grad_norm": 0.44852516055107117, "learning_rate": 5.635439030434395e-05, "loss": 1.8253, "step": 4423 }, { "epoch": 0.4595408746234549, "grad_norm": 0.4015621542930603, "learning_rate": 5.6338205698302224e-05, "loss": 1.6797, "step": 4424 }, { "epoch": 0.4596447491430352, "grad_norm": 0.40786075592041016, "learning_rate": 5.6322020417291034e-05, "loss": 1.7569, "step": 4425 }, { "epoch": 0.45974862366261554, "grad_norm": 0.4462954103946686, "learning_rate": 5.630583446303399e-05, "loss": 1.8999, "step": 4426 }, { "epoch": 0.4598524981821959, "grad_norm": 0.40880078077316284, "learning_rate": 5.6289647837254744e-05, "loss": 1.5374, "step": 4427 }, { "epoch": 0.45995637270177625, "grad_norm": 0.4236305356025696, "learning_rate": 5.627346054167707e-05, "loss": 1.7143, "step": 4428 }, { "epoch": 0.4600602472213566, "grad_norm": 0.3998420834541321, "learning_rate": 5.625727257802479e-05, "loss": 1.8177, "step": 4429 }, { "epoch": 0.46016412174093696, "grad_norm": 0.4473719000816345, "learning_rate": 5.624108394802178e-05, "loss": 1.8531, "step": 4430 }, { "epoch": 0.4602679962605173, "grad_norm": 0.39816030859947205, "learning_rate": 5.6224894653392035e-05, "loss": 1.6969, "step": 4431 }, { "epoch": 0.46037187078009767, "grad_norm": 0.36577850580215454, "learning_rate": 5.6208704695859535e-05, "loss": 1.5994, "step": 4432 }, { "epoch": 0.460475745299678, "grad_norm": 0.3840119540691376, "learning_rate": 5.619251407714843e-05, "loss": 1.6662, "step": 4433 }, { "epoch": 0.4605796198192583, "grad_norm": 0.4154397249221802, "learning_rate": 5.617632279898288e-05, "loss": 1.7509, "step": 4434 }, { "epoch": 0.4606834943388387, "grad_norm": 0.3793880045413971, "learning_rate": 5.6160130863087115e-05, "loss": 1.6896, "step": 4435 }, { "epoch": 0.46078736885841903, "grad_norm": 0.3721482455730438, "learning_rate": 5.614393827118548e-05, "loss": 1.6231, "step": 4436 }, { "epoch": 0.46089124337799936, "grad_norm": 0.37379753589630127, "learning_rate": 5.6127745025002344e-05, "loss": 1.6131, "step": 4437 }, { "epoch": 0.46099511789757974, "grad_norm": 0.3776273727416992, "learning_rate": 5.611155112626215e-05, "loss": 1.7206, "step": 4438 }, { "epoch": 0.46109899241716007, "grad_norm": 0.37556153535842896, "learning_rate": 5.609535657668945e-05, "loss": 1.6558, "step": 4439 }, { "epoch": 0.4612028669367404, "grad_norm": 0.41758623719215393, "learning_rate": 5.6079161378008805e-05, "loss": 1.7243, "step": 4440 }, { "epoch": 0.4613067414563208, "grad_norm": 0.39208123087882996, "learning_rate": 5.606296553194489e-05, "loss": 1.7576, "step": 4441 }, { "epoch": 0.4614106159759011, "grad_norm": 0.3936588764190674, "learning_rate": 5.6046769040222446e-05, "loss": 1.5866, "step": 4442 }, { "epoch": 0.46151449049548143, "grad_norm": 0.3708806335926056, "learning_rate": 5.603057190456628e-05, "loss": 1.6697, "step": 4443 }, { "epoch": 0.4616183650150618, "grad_norm": 0.4200730621814728, "learning_rate": 5.601437412670121e-05, "loss": 1.4743, "step": 4444 }, { "epoch": 0.46172223953464214, "grad_norm": 0.40892651677131653, "learning_rate": 5.5998175708352255e-05, "loss": 1.7079, "step": 4445 }, { "epoch": 0.4618261140542225, "grad_norm": 0.41610532999038696, "learning_rate": 5.598197665124434e-05, "loss": 1.6898, "step": 4446 }, { "epoch": 0.46192998857380285, "grad_norm": 0.39250999689102173, "learning_rate": 5.596577695710258e-05, "loss": 1.686, "step": 4447 }, { "epoch": 0.4620338630933832, "grad_norm": 0.3861941695213318, "learning_rate": 5.594957662765211e-05, "loss": 1.6314, "step": 4448 }, { "epoch": 0.46213773761296356, "grad_norm": 0.4138748049736023, "learning_rate": 5.5933375664618134e-05, "loss": 1.6829, "step": 4449 }, { "epoch": 0.4622416121325439, "grad_norm": 0.45050138235092163, "learning_rate": 5.591717406972594e-05, "loss": 1.9202, "step": 4450 }, { "epoch": 0.4623454866521242, "grad_norm": 0.40642380714416504, "learning_rate": 5.590097184470087e-05, "loss": 1.7383, "step": 4451 }, { "epoch": 0.4624493611717046, "grad_norm": 0.4135674238204956, "learning_rate": 5.588476899126832e-05, "loss": 1.7811, "step": 4452 }, { "epoch": 0.4625532356912849, "grad_norm": 0.4031025767326355, "learning_rate": 5.586856551115378e-05, "loss": 1.6378, "step": 4453 }, { "epoch": 0.46265711021086525, "grad_norm": 0.41000255942344666, "learning_rate": 5.585236140608279e-05, "loss": 1.7076, "step": 4454 }, { "epoch": 0.46276098473044563, "grad_norm": 0.41616103053092957, "learning_rate": 5.583615667778094e-05, "loss": 1.7697, "step": 4455 }, { "epoch": 0.46286485925002596, "grad_norm": 0.4099687337875366, "learning_rate": 5.581995132797394e-05, "loss": 1.5478, "step": 4456 }, { "epoch": 0.4629687337696063, "grad_norm": 0.41153401136398315, "learning_rate": 5.5803745358387526e-05, "loss": 1.7247, "step": 4457 }, { "epoch": 0.46307260828918667, "grad_norm": 0.41319242119789124, "learning_rate": 5.578753877074749e-05, "loss": 1.768, "step": 4458 }, { "epoch": 0.463176482808767, "grad_norm": 0.3985736072063446, "learning_rate": 5.577133156677975e-05, "loss": 1.7182, "step": 4459 }, { "epoch": 0.4632803573283474, "grad_norm": 0.4140629470348358, "learning_rate": 5.575512374821018e-05, "loss": 1.8411, "step": 4460 }, { "epoch": 0.4633842318479277, "grad_norm": 0.40386584401130676, "learning_rate": 5.573891531676484e-05, "loss": 1.6013, "step": 4461 }, { "epoch": 0.46348810636750803, "grad_norm": 0.40092045068740845, "learning_rate": 5.572270627416979e-05, "loss": 1.8274, "step": 4462 }, { "epoch": 0.4635919808870884, "grad_norm": 0.4397892355918884, "learning_rate": 5.5706496622151136e-05, "loss": 1.8865, "step": 4463 }, { "epoch": 0.46369585540666874, "grad_norm": 0.41136595606803894, "learning_rate": 5.569028636243513e-05, "loss": 1.6921, "step": 4464 }, { "epoch": 0.46379972992624907, "grad_norm": 0.3688594698905945, "learning_rate": 5.5674075496748e-05, "loss": 1.6099, "step": 4465 }, { "epoch": 0.46390360444582945, "grad_norm": 0.41220369935035706, "learning_rate": 5.56578640268161e-05, "loss": 1.7032, "step": 4466 }, { "epoch": 0.4640074789654098, "grad_norm": 0.38784199953079224, "learning_rate": 5.5641651954365803e-05, "loss": 1.7474, "step": 4467 }, { "epoch": 0.4641113534849901, "grad_norm": 0.3877417743206024, "learning_rate": 5.562543928112358e-05, "loss": 1.7868, "step": 4468 }, { "epoch": 0.4642152280045705, "grad_norm": 0.41094866394996643, "learning_rate": 5.560922600881595e-05, "loss": 1.684, "step": 4469 }, { "epoch": 0.4643191025241508, "grad_norm": 0.3839239180088043, "learning_rate": 5.559301213916952e-05, "loss": 1.601, "step": 4470 }, { "epoch": 0.4644229770437312, "grad_norm": 0.3839179277420044, "learning_rate": 5.557679767391091e-05, "loss": 1.773, "step": 4471 }, { "epoch": 0.4645268515633115, "grad_norm": 0.3948630392551422, "learning_rate": 5.5560582614766845e-05, "loss": 1.7059, "step": 4472 }, { "epoch": 0.46463072608289185, "grad_norm": 0.4155755639076233, "learning_rate": 5.5544366963464134e-05, "loss": 1.7463, "step": 4473 }, { "epoch": 0.46473460060247224, "grad_norm": 0.3817596733570099, "learning_rate": 5.552815072172955e-05, "loss": 1.6543, "step": 4474 }, { "epoch": 0.46483847512205256, "grad_norm": 0.40604111552238464, "learning_rate": 5.5511933891290056e-05, "loss": 1.6462, "step": 4475 }, { "epoch": 0.4649423496416329, "grad_norm": 0.39084017276763916, "learning_rate": 5.5495716473872604e-05, "loss": 1.656, "step": 4476 }, { "epoch": 0.4650462241612133, "grad_norm": 0.4190458059310913, "learning_rate": 5.5479498471204196e-05, "loss": 1.7544, "step": 4477 }, { "epoch": 0.4651500986807936, "grad_norm": 0.37424778938293457, "learning_rate": 5.546327988501196e-05, "loss": 1.5076, "step": 4478 }, { "epoch": 0.4652539732003739, "grad_norm": 0.4260268807411194, "learning_rate": 5.544706071702302e-05, "loss": 1.638, "step": 4479 }, { "epoch": 0.4653578477199543, "grad_norm": 0.3845141530036926, "learning_rate": 5.5430840968964615e-05, "loss": 1.5985, "step": 4480 }, { "epoch": 0.46546172223953464, "grad_norm": 0.385220468044281, "learning_rate": 5.541462064256401e-05, "loss": 1.7247, "step": 4481 }, { "epoch": 0.46556559675911496, "grad_norm": 0.41517341136932373, "learning_rate": 5.5398399739548524e-05, "loss": 1.8184, "step": 4482 }, { "epoch": 0.46566947127869535, "grad_norm": 0.4744667112827301, "learning_rate": 5.538217826164558e-05, "loss": 1.8839, "step": 4483 }, { "epoch": 0.4657733457982757, "grad_norm": 0.40122199058532715, "learning_rate": 5.5365956210582634e-05, "loss": 1.5911, "step": 4484 }, { "epoch": 0.46587722031785606, "grad_norm": 0.4111073911190033, "learning_rate": 5.5349733588087216e-05, "loss": 1.7894, "step": 4485 }, { "epoch": 0.4659810948374364, "grad_norm": 0.38711994886398315, "learning_rate": 5.533351039588689e-05, "loss": 1.7112, "step": 4486 }, { "epoch": 0.4660849693570167, "grad_norm": 0.4004124104976654, "learning_rate": 5.531728663570933e-05, "loss": 1.7559, "step": 4487 }, { "epoch": 0.4661888438765971, "grad_norm": 0.3998907804489136, "learning_rate": 5.53010623092822e-05, "loss": 1.6631, "step": 4488 }, { "epoch": 0.4662927183961774, "grad_norm": 0.3848699927330017, "learning_rate": 5.5284837418333294e-05, "loss": 1.736, "step": 4489 }, { "epoch": 0.46639659291575775, "grad_norm": 0.41114541888237, "learning_rate": 5.5268611964590425e-05, "loss": 1.8165, "step": 4490 }, { "epoch": 0.46650046743533813, "grad_norm": 0.475700706243515, "learning_rate": 5.525238594978146e-05, "loss": 1.8559, "step": 4491 }, { "epoch": 0.46660434195491846, "grad_norm": 0.37533336877822876, "learning_rate": 5.523615937563438e-05, "loss": 1.5622, "step": 4492 }, { "epoch": 0.4667082164744988, "grad_norm": 0.38787853717803955, "learning_rate": 5.521993224387717e-05, "loss": 1.6647, "step": 4493 }, { "epoch": 0.46681209099407917, "grad_norm": 0.4438975155353546, "learning_rate": 5.520370455623789e-05, "loss": 1.7663, "step": 4494 }, { "epoch": 0.4669159655136595, "grad_norm": 0.4348212480545044, "learning_rate": 5.518747631444467e-05, "loss": 1.8218, "step": 4495 }, { "epoch": 0.4670198400332398, "grad_norm": 0.4120256304740906, "learning_rate": 5.517124752022569e-05, "loss": 1.6427, "step": 4496 }, { "epoch": 0.4671237145528202, "grad_norm": 0.3732791244983673, "learning_rate": 5.515501817530917e-05, "loss": 1.5317, "step": 4497 }, { "epoch": 0.46722758907240053, "grad_norm": 0.40980592370033264, "learning_rate": 5.513878828142344e-05, "loss": 1.736, "step": 4498 }, { "epoch": 0.4673314635919809, "grad_norm": 0.42405858635902405, "learning_rate": 5.5122557840296815e-05, "loss": 1.7212, "step": 4499 }, { "epoch": 0.46743533811156124, "grad_norm": 0.380643367767334, "learning_rate": 5.5106326853657764e-05, "loss": 1.6732, "step": 4500 }, { "epoch": 0.46753921263114157, "grad_norm": 0.4484168589115143, "learning_rate": 5.5090095323234735e-05, "loss": 1.8326, "step": 4501 }, { "epoch": 0.46764308715072195, "grad_norm": 0.44353044033050537, "learning_rate": 5.507386325075625e-05, "loss": 1.8159, "step": 4502 }, { "epoch": 0.4677469616703023, "grad_norm": 0.40523630380630493, "learning_rate": 5.505763063795091e-05, "loss": 1.8045, "step": 4503 }, { "epoch": 0.4678508361898826, "grad_norm": 0.41718611121177673, "learning_rate": 5.504139748654735e-05, "loss": 1.7684, "step": 4504 }, { "epoch": 0.467954710709463, "grad_norm": 0.3799745440483093, "learning_rate": 5.5025163798274283e-05, "loss": 1.7299, "step": 4505 }, { "epoch": 0.4680585852290433, "grad_norm": 0.5379766821861267, "learning_rate": 5.500892957486048e-05, "loss": 1.8735, "step": 4506 }, { "epoch": 0.46816245974862364, "grad_norm": 0.42083024978637695, "learning_rate": 5.499269481803474e-05, "loss": 1.6187, "step": 4507 }, { "epoch": 0.468266334268204, "grad_norm": 0.44988521933555603, "learning_rate": 5.497645952952596e-05, "loss": 1.8866, "step": 4508 }, { "epoch": 0.46837020878778435, "grad_norm": 0.37519097328186035, "learning_rate": 5.496022371106304e-05, "loss": 1.6358, "step": 4509 }, { "epoch": 0.46847408330736473, "grad_norm": 0.40404897928237915, "learning_rate": 5.4943987364374985e-05, "loss": 1.6527, "step": 4510 }, { "epoch": 0.46857795782694506, "grad_norm": 0.3824155032634735, "learning_rate": 5.492775049119085e-05, "loss": 1.571, "step": 4511 }, { "epoch": 0.4686818323465254, "grad_norm": 0.38465747237205505, "learning_rate": 5.491151309323973e-05, "loss": 1.8012, "step": 4512 }, { "epoch": 0.46878570686610577, "grad_norm": 0.41244107484817505, "learning_rate": 5.489527517225076e-05, "loss": 1.7915, "step": 4513 }, { "epoch": 0.4688895813856861, "grad_norm": 0.39716869592666626, "learning_rate": 5.487903672995318e-05, "loss": 1.6541, "step": 4514 }, { "epoch": 0.4689934559052664, "grad_norm": 0.3766006529331207, "learning_rate": 5.486279776807626e-05, "loss": 1.7457, "step": 4515 }, { "epoch": 0.4690973304248468, "grad_norm": 0.3792710602283478, "learning_rate": 5.484655828834929e-05, "loss": 1.5523, "step": 4516 }, { "epoch": 0.46920120494442713, "grad_norm": 0.4360962212085724, "learning_rate": 5.483031829250167e-05, "loss": 1.7419, "step": 4517 }, { "epoch": 0.46930507946400746, "grad_norm": 0.4073307514190674, "learning_rate": 5.481407778226284e-05, "loss": 1.7789, "step": 4518 }, { "epoch": 0.46940895398358784, "grad_norm": 0.3982420563697815, "learning_rate": 5.4797836759362256e-05, "loss": 1.6478, "step": 4519 }, { "epoch": 0.46951282850316817, "grad_norm": 0.3903031051158905, "learning_rate": 5.478159522552949e-05, "loss": 1.5748, "step": 4520 }, { "epoch": 0.4696167030227485, "grad_norm": 0.4410886764526367, "learning_rate": 5.476535318249414e-05, "loss": 1.7942, "step": 4521 }, { "epoch": 0.4697205775423289, "grad_norm": 0.3934234380722046, "learning_rate": 5.4749110631985846e-05, "loss": 1.6272, "step": 4522 }, { "epoch": 0.4698244520619092, "grad_norm": 0.4124247431755066, "learning_rate": 5.4732867575734314e-05, "loss": 1.7511, "step": 4523 }, { "epoch": 0.4699283265814896, "grad_norm": 0.4178972840309143, "learning_rate": 5.4716624015469284e-05, "loss": 1.7224, "step": 4524 }, { "epoch": 0.4700322011010699, "grad_norm": 0.44093725085258484, "learning_rate": 5.470037995292061e-05, "loss": 1.6125, "step": 4525 }, { "epoch": 0.47013607562065024, "grad_norm": 0.3923119008541107, "learning_rate": 5.468413538981814e-05, "loss": 1.6888, "step": 4526 }, { "epoch": 0.4702399501402306, "grad_norm": 0.4040331244468689, "learning_rate": 5.466789032789176e-05, "loss": 1.7642, "step": 4527 }, { "epoch": 0.47034382465981095, "grad_norm": 0.4173428416252136, "learning_rate": 5.465164476887149e-05, "loss": 1.6372, "step": 4528 }, { "epoch": 0.4704476991793913, "grad_norm": 0.36699429154396057, "learning_rate": 5.463539871448735e-05, "loss": 1.7026, "step": 4529 }, { "epoch": 0.47055157369897166, "grad_norm": 0.37579965591430664, "learning_rate": 5.461915216646938e-05, "loss": 1.513, "step": 4530 }, { "epoch": 0.470655448218552, "grad_norm": 0.39434462785720825, "learning_rate": 5.4602905126547744e-05, "loss": 1.6832, "step": 4531 }, { "epoch": 0.4707593227381323, "grad_norm": 0.40880143642425537, "learning_rate": 5.4586657596452615e-05, "loss": 1.5767, "step": 4532 }, { "epoch": 0.4708631972577127, "grad_norm": 0.4027133584022522, "learning_rate": 5.4570409577914215e-05, "loss": 1.6025, "step": 4533 }, { "epoch": 0.470967071777293, "grad_norm": 0.37982282042503357, "learning_rate": 5.455416107266287e-05, "loss": 1.6899, "step": 4534 }, { "epoch": 0.47107094629687335, "grad_norm": 0.4183795154094696, "learning_rate": 5.453791208242889e-05, "loss": 1.7355, "step": 4535 }, { "epoch": 0.47117482081645373, "grad_norm": 0.4518531560897827, "learning_rate": 5.452166260894267e-05, "loss": 1.9148, "step": 4536 }, { "epoch": 0.47127869533603406, "grad_norm": 0.41706570982933044, "learning_rate": 5.450541265393464e-05, "loss": 1.7175, "step": 4537 }, { "epoch": 0.47138256985561444, "grad_norm": 0.38298308849334717, "learning_rate": 5.4489162219135314e-05, "loss": 1.8146, "step": 4538 }, { "epoch": 0.47148644437519477, "grad_norm": 0.40449774265289307, "learning_rate": 5.447291130627522e-05, "loss": 1.8353, "step": 4539 }, { "epoch": 0.4715903188947751, "grad_norm": 0.40344521403312683, "learning_rate": 5.4456659917084975e-05, "loss": 1.672, "step": 4540 }, { "epoch": 0.4716941934143555, "grad_norm": 0.3761052191257477, "learning_rate": 5.44404080532952e-05, "loss": 1.7119, "step": 4541 }, { "epoch": 0.4717980679339358, "grad_norm": 0.45389243960380554, "learning_rate": 5.442415571663661e-05, "loss": 1.9569, "step": 4542 }, { "epoch": 0.47190194245351613, "grad_norm": 0.3938455581665039, "learning_rate": 5.440790290883997e-05, "loss": 1.5407, "step": 4543 }, { "epoch": 0.4720058169730965, "grad_norm": 0.39464959502220154, "learning_rate": 5.439164963163601e-05, "loss": 1.7799, "step": 4544 }, { "epoch": 0.47210969149267684, "grad_norm": 0.3845635652542114, "learning_rate": 5.437539588675564e-05, "loss": 1.6902, "step": 4545 }, { "epoch": 0.47221356601225717, "grad_norm": 0.41250425577163696, "learning_rate": 5.435914167592975e-05, "loss": 1.7341, "step": 4546 }, { "epoch": 0.47231744053183755, "grad_norm": 0.42640382051467896, "learning_rate": 5.434288700088925e-05, "loss": 1.7448, "step": 4547 }, { "epoch": 0.4724213150514179, "grad_norm": 0.3739955425262451, "learning_rate": 5.432663186336517e-05, "loss": 1.6007, "step": 4548 }, { "epoch": 0.47252518957099826, "grad_norm": 0.3853756785392761, "learning_rate": 5.431037626508856e-05, "loss": 1.6348, "step": 4549 }, { "epoch": 0.4726290640905786, "grad_norm": 0.4050140082836151, "learning_rate": 5.429412020779049e-05, "loss": 1.6318, "step": 4550 }, { "epoch": 0.4727329386101589, "grad_norm": 0.39661628007888794, "learning_rate": 5.427786369320211e-05, "loss": 1.7079, "step": 4551 }, { "epoch": 0.4728368131297393, "grad_norm": 0.3745506703853607, "learning_rate": 5.4261606723054604e-05, "loss": 1.5058, "step": 4552 }, { "epoch": 0.4729406876493196, "grad_norm": 0.45182400941848755, "learning_rate": 5.424534929907924e-05, "loss": 1.7662, "step": 4553 }, { "epoch": 0.47304456216889995, "grad_norm": 0.39401260018348694, "learning_rate": 5.422909142300729e-05, "loss": 1.6387, "step": 4554 }, { "epoch": 0.47314843668848033, "grad_norm": 0.43190351128578186, "learning_rate": 5.4212833096570085e-05, "loss": 1.9062, "step": 4555 }, { "epoch": 0.47325231120806066, "grad_norm": 0.3953791558742523, "learning_rate": 5.419657432149903e-05, "loss": 1.779, "step": 4556 }, { "epoch": 0.473356185727641, "grad_norm": 0.4370119273662567, "learning_rate": 5.418031509952554e-05, "loss": 1.7205, "step": 4557 }, { "epoch": 0.47346006024722137, "grad_norm": 0.46050959825515747, "learning_rate": 5.4164055432381076e-05, "loss": 1.8187, "step": 4558 }, { "epoch": 0.4735639347668017, "grad_norm": 0.3929772675037384, "learning_rate": 5.414779532179721e-05, "loss": 1.7266, "step": 4559 }, { "epoch": 0.473667809286382, "grad_norm": 0.3913351595401764, "learning_rate": 5.41315347695055e-05, "loss": 1.6847, "step": 4560 }, { "epoch": 0.4737716838059624, "grad_norm": 0.37702706456184387, "learning_rate": 5.4115273777237554e-05, "loss": 1.5548, "step": 4561 }, { "epoch": 0.47387555832554273, "grad_norm": 0.4072859585285187, "learning_rate": 5.4099012346725054e-05, "loss": 1.6328, "step": 4562 }, { "epoch": 0.4739794328451231, "grad_norm": 0.4485922157764435, "learning_rate": 5.4082750479699715e-05, "loss": 1.7856, "step": 4563 }, { "epoch": 0.47408330736470344, "grad_norm": 0.4083961248397827, "learning_rate": 5.406648817789329e-05, "loss": 1.6751, "step": 4564 }, { "epoch": 0.47418718188428377, "grad_norm": 0.4277319312095642, "learning_rate": 5.40502254430376e-05, "loss": 1.793, "step": 4565 }, { "epoch": 0.47429105640386415, "grad_norm": 0.4535433053970337, "learning_rate": 5.403396227686449e-05, "loss": 1.5738, "step": 4566 }, { "epoch": 0.4743949309234445, "grad_norm": 0.41116324067115784, "learning_rate": 5.4017698681105865e-05, "loss": 1.7443, "step": 4567 }, { "epoch": 0.4744988054430248, "grad_norm": 0.38053932785987854, "learning_rate": 5.400143465749368e-05, "loss": 1.6522, "step": 4568 }, { "epoch": 0.4746026799626052, "grad_norm": 0.40427184104919434, "learning_rate": 5.398517020775989e-05, "loss": 1.7336, "step": 4569 }, { "epoch": 0.4747065544821855, "grad_norm": 0.4339055120944977, "learning_rate": 5.39689053336366e-05, "loss": 1.7395, "step": 4570 }, { "epoch": 0.47481042900176584, "grad_norm": 0.415379136800766, "learning_rate": 5.3952640036855825e-05, "loss": 1.5726, "step": 4571 }, { "epoch": 0.4749143035213462, "grad_norm": 0.4650099575519562, "learning_rate": 5.3936374319149705e-05, "loss": 2.0233, "step": 4572 }, { "epoch": 0.47501817804092655, "grad_norm": 0.429559588432312, "learning_rate": 5.392010818225044e-05, "loss": 1.539, "step": 4573 }, { "epoch": 0.4751220525605069, "grad_norm": 0.4143827259540558, "learning_rate": 5.390384162789024e-05, "loss": 1.767, "step": 4574 }, { "epoch": 0.47522592708008726, "grad_norm": 0.4033125042915344, "learning_rate": 5.3887574657801346e-05, "loss": 1.7017, "step": 4575 }, { "epoch": 0.4753298015996676, "grad_norm": 0.41067859530448914, "learning_rate": 5.3871307273716074e-05, "loss": 1.6445, "step": 4576 }, { "epoch": 0.47543367611924797, "grad_norm": 0.41794803738594055, "learning_rate": 5.3855039477366775e-05, "loss": 1.7336, "step": 4577 }, { "epoch": 0.4755375506388283, "grad_norm": 0.38265594840049744, "learning_rate": 5.3838771270485854e-05, "loss": 1.6598, "step": 4578 }, { "epoch": 0.4756414251584086, "grad_norm": 0.3966656029224396, "learning_rate": 5.3822502654805725e-05, "loss": 1.5692, "step": 4579 }, { "epoch": 0.475745299677989, "grad_norm": 0.4140309989452362, "learning_rate": 5.3806233632058876e-05, "loss": 1.687, "step": 4580 }, { "epoch": 0.47584917419756934, "grad_norm": 0.43315640091896057, "learning_rate": 5.378996420397784e-05, "loss": 1.6507, "step": 4581 }, { "epoch": 0.47595304871714966, "grad_norm": 0.4187996983528137, "learning_rate": 5.377369437229518e-05, "loss": 1.782, "step": 4582 }, { "epoch": 0.47605692323673005, "grad_norm": 0.41366416215896606, "learning_rate": 5.375742413874351e-05, "loss": 1.7035, "step": 4583 }, { "epoch": 0.47616079775631037, "grad_norm": 0.4386520981788635, "learning_rate": 5.3741153505055475e-05, "loss": 1.6678, "step": 4584 }, { "epoch": 0.4762646722758907, "grad_norm": 0.4309309124946594, "learning_rate": 5.372488247296378e-05, "loss": 1.7662, "step": 4585 }, { "epoch": 0.4763685467954711, "grad_norm": 0.4261937737464905, "learning_rate": 5.370861104420114e-05, "loss": 1.7769, "step": 4586 }, { "epoch": 0.4764724213150514, "grad_norm": 0.3945866823196411, "learning_rate": 5.369233922050036e-05, "loss": 1.4977, "step": 4587 }, { "epoch": 0.4765762958346318, "grad_norm": 0.38651934266090393, "learning_rate": 5.3676067003594255e-05, "loss": 1.7031, "step": 4588 }, { "epoch": 0.4766801703542121, "grad_norm": 0.43587467074394226, "learning_rate": 5.365979439521569e-05, "loss": 1.6961, "step": 4589 }, { "epoch": 0.47678404487379245, "grad_norm": 0.43202391266822815, "learning_rate": 5.364352139709758e-05, "loss": 1.8117, "step": 4590 }, { "epoch": 0.47688791939337283, "grad_norm": 0.4378650486469269, "learning_rate": 5.362724801097284e-05, "loss": 1.6322, "step": 4591 }, { "epoch": 0.47699179391295315, "grad_norm": 0.4283282160758972, "learning_rate": 5.3610974238574495e-05, "loss": 1.7273, "step": 4592 }, { "epoch": 0.4770956684325335, "grad_norm": 0.42612189054489136, "learning_rate": 5.359470008163556e-05, "loss": 1.824, "step": 4593 }, { "epoch": 0.47719954295211386, "grad_norm": 0.38064736127853394, "learning_rate": 5.35784255418891e-05, "loss": 1.5924, "step": 4594 }, { "epoch": 0.4773034174716942, "grad_norm": 0.40858712792396545, "learning_rate": 5.356215062106823e-05, "loss": 1.7402, "step": 4595 }, { "epoch": 0.4774072919912745, "grad_norm": 0.4329719543457031, "learning_rate": 5.354587532090611e-05, "loss": 1.861, "step": 4596 }, { "epoch": 0.4775111665108549, "grad_norm": 0.4516226649284363, "learning_rate": 5.352959964313592e-05, "loss": 1.8382, "step": 4597 }, { "epoch": 0.47761504103043523, "grad_norm": 0.4081841707229614, "learning_rate": 5.3513323589490924e-05, "loss": 1.6581, "step": 4598 }, { "epoch": 0.47771891555001555, "grad_norm": 0.3968779444694519, "learning_rate": 5.349704716170435e-05, "loss": 1.5309, "step": 4599 }, { "epoch": 0.47782279006959594, "grad_norm": 0.4048573076725006, "learning_rate": 5.348077036150954e-05, "loss": 1.7319, "step": 4600 }, { "epoch": 0.47792666458917626, "grad_norm": 0.4110606014728546, "learning_rate": 5.346449319063983e-05, "loss": 1.7603, "step": 4601 }, { "epoch": 0.47803053910875665, "grad_norm": 0.4106254577636719, "learning_rate": 5.34482156508286e-05, "loss": 1.5944, "step": 4602 }, { "epoch": 0.478134413628337, "grad_norm": 0.3852187693119049, "learning_rate": 5.343193774380933e-05, "loss": 1.5459, "step": 4603 }, { "epoch": 0.4782382881479173, "grad_norm": 0.40513893961906433, "learning_rate": 5.341565947131546e-05, "loss": 1.6581, "step": 4604 }, { "epoch": 0.4783421626674977, "grad_norm": 0.38794079422950745, "learning_rate": 5.3399380835080493e-05, "loss": 1.6475, "step": 4605 }, { "epoch": 0.478446037187078, "grad_norm": 0.3935708999633789, "learning_rate": 5.338310183683799e-05, "loss": 1.7491, "step": 4606 }, { "epoch": 0.47854991170665834, "grad_norm": 0.5326302647590637, "learning_rate": 5.3366822478321524e-05, "loss": 1.7748, "step": 4607 }, { "epoch": 0.4786537862262387, "grad_norm": 0.4071970283985138, "learning_rate": 5.335054276126471e-05, "loss": 1.6662, "step": 4608 }, { "epoch": 0.47875766074581905, "grad_norm": 0.43349170684814453, "learning_rate": 5.333426268740125e-05, "loss": 1.8815, "step": 4609 }, { "epoch": 0.4788615352653994, "grad_norm": 0.372646689414978, "learning_rate": 5.331798225846482e-05, "loss": 1.5864, "step": 4610 }, { "epoch": 0.47896540978497976, "grad_norm": 0.4014649987220764, "learning_rate": 5.330170147618914e-05, "loss": 1.6708, "step": 4611 }, { "epoch": 0.4790692843045601, "grad_norm": 0.4292800724506378, "learning_rate": 5.3285420342308035e-05, "loss": 1.8315, "step": 4612 }, { "epoch": 0.47917315882414047, "grad_norm": 0.41147077083587646, "learning_rate": 5.326913885855527e-05, "loss": 1.6449, "step": 4613 }, { "epoch": 0.4792770333437208, "grad_norm": 0.35840991139411926, "learning_rate": 5.325285702666473e-05, "loss": 1.5407, "step": 4614 }, { "epoch": 0.4793809078633011, "grad_norm": 0.4264986515045166, "learning_rate": 5.323657484837028e-05, "loss": 1.6642, "step": 4615 }, { "epoch": 0.4794847823828815, "grad_norm": 0.4104459881782532, "learning_rate": 5.3220292325405854e-05, "loss": 1.6084, "step": 4616 }, { "epoch": 0.47958865690246183, "grad_norm": 0.4270613193511963, "learning_rate": 5.320400945950542e-05, "loss": 1.9207, "step": 4617 }, { "epoch": 0.47969253142204216, "grad_norm": 0.4374467730522156, "learning_rate": 5.318772625240298e-05, "loss": 1.6418, "step": 4618 }, { "epoch": 0.47979640594162254, "grad_norm": 0.3923068940639496, "learning_rate": 5.3171442705832554e-05, "loss": 1.6963, "step": 4619 }, { "epoch": 0.47990028046120287, "grad_norm": 0.3642514646053314, "learning_rate": 5.315515882152822e-05, "loss": 1.5201, "step": 4620 }, { "epoch": 0.4800041549807832, "grad_norm": 0.41580015420913696, "learning_rate": 5.313887460122408e-05, "loss": 1.5179, "step": 4621 }, { "epoch": 0.4801080295003636, "grad_norm": 0.42775875329971313, "learning_rate": 5.312259004665426e-05, "loss": 1.7967, "step": 4622 }, { "epoch": 0.4802119040199439, "grad_norm": 0.3977086544036865, "learning_rate": 5.310630515955297e-05, "loss": 1.4864, "step": 4623 }, { "epoch": 0.48031577853952423, "grad_norm": 0.41823333501815796, "learning_rate": 5.309001994165442e-05, "loss": 1.7526, "step": 4624 }, { "epoch": 0.4804196530591046, "grad_norm": 0.41474875807762146, "learning_rate": 5.307373439469283e-05, "loss": 1.5733, "step": 4625 }, { "epoch": 0.48052352757868494, "grad_norm": 0.370061457157135, "learning_rate": 5.305744852040253e-05, "loss": 1.541, "step": 4626 }, { "epoch": 0.4806274020982653, "grad_norm": 0.36893734335899353, "learning_rate": 5.304116232051779e-05, "loss": 1.6238, "step": 4627 }, { "epoch": 0.48073127661784565, "grad_norm": 0.4357246458530426, "learning_rate": 5.3024875796773e-05, "loss": 1.9604, "step": 4628 }, { "epoch": 0.480835151137426, "grad_norm": 0.3936040997505188, "learning_rate": 5.3008588950902524e-05, "loss": 1.4643, "step": 4629 }, { "epoch": 0.48093902565700636, "grad_norm": 0.38085660338401794, "learning_rate": 5.2992301784640785e-05, "loss": 1.6453, "step": 4630 }, { "epoch": 0.4810429001765867, "grad_norm": 0.3864935338497162, "learning_rate": 5.2976014299722263e-05, "loss": 1.6807, "step": 4631 }, { "epoch": 0.481146774696167, "grad_norm": 0.405137836933136, "learning_rate": 5.2959726497881425e-05, "loss": 1.7978, "step": 4632 }, { "epoch": 0.4812506492157474, "grad_norm": 0.3816376328468323, "learning_rate": 5.29434383808528e-05, "loss": 1.7011, "step": 4633 }, { "epoch": 0.4813545237353277, "grad_norm": 0.3807872533798218, "learning_rate": 5.292714995037096e-05, "loss": 1.6109, "step": 4634 }, { "epoch": 0.48145839825490805, "grad_norm": 0.3880075514316559, "learning_rate": 5.291086120817047e-05, "loss": 1.6175, "step": 4635 }, { "epoch": 0.48156227277448843, "grad_norm": 0.40148478746414185, "learning_rate": 5.289457215598595e-05, "loss": 1.5225, "step": 4636 }, { "epoch": 0.48166614729406876, "grad_norm": 0.4223523437976837, "learning_rate": 5.28782827955521e-05, "loss": 1.7701, "step": 4637 }, { "epoch": 0.4817700218136491, "grad_norm": 0.41836312413215637, "learning_rate": 5.286199312860358e-05, "loss": 1.6871, "step": 4638 }, { "epoch": 0.48187389633322947, "grad_norm": 0.41826310753822327, "learning_rate": 5.2845703156875106e-05, "loss": 1.7912, "step": 4639 }, { "epoch": 0.4819777708528098, "grad_norm": 0.3970772325992584, "learning_rate": 5.282941288210146e-05, "loss": 1.6757, "step": 4640 }, { "epoch": 0.4820816453723902, "grad_norm": 0.4118634760379791, "learning_rate": 5.2813122306017396e-05, "loss": 1.8312, "step": 4641 }, { "epoch": 0.4821855198919705, "grad_norm": 0.38596856594085693, "learning_rate": 5.279683143035775e-05, "loss": 1.6572, "step": 4642 }, { "epoch": 0.48228939441155083, "grad_norm": 0.3717338442802429, "learning_rate": 5.278054025685739e-05, "loss": 1.6622, "step": 4643 }, { "epoch": 0.4823932689311312, "grad_norm": 0.38928812742233276, "learning_rate": 5.276424878725116e-05, "loss": 1.578, "step": 4644 }, { "epoch": 0.48249714345071154, "grad_norm": 0.4005277156829834, "learning_rate": 5.2747957023274005e-05, "loss": 1.6593, "step": 4645 }, { "epoch": 0.48260101797029187, "grad_norm": 0.37568166851997375, "learning_rate": 5.2731664966660864e-05, "loss": 1.6828, "step": 4646 }, { "epoch": 0.48270489248987225, "grad_norm": 0.4337156116962433, "learning_rate": 5.271537261914672e-05, "loss": 1.8237, "step": 4647 }, { "epoch": 0.4828087670094526, "grad_norm": 0.4006423056125641, "learning_rate": 5.269907998246656e-05, "loss": 1.6222, "step": 4648 }, { "epoch": 0.4829126415290329, "grad_norm": 0.3779585361480713, "learning_rate": 5.268278705835544e-05, "loss": 1.6672, "step": 4649 }, { "epoch": 0.4830165160486133, "grad_norm": 0.4423217475414276, "learning_rate": 5.266649384854841e-05, "loss": 1.7731, "step": 4650 }, { "epoch": 0.4831203905681936, "grad_norm": 0.4306303560733795, "learning_rate": 5.265020035478061e-05, "loss": 1.7711, "step": 4651 }, { "epoch": 0.483224265087774, "grad_norm": 0.38442516326904297, "learning_rate": 5.263390657878712e-05, "loss": 1.4701, "step": 4652 }, { "epoch": 0.4833281396073543, "grad_norm": 0.38074007630348206, "learning_rate": 5.2617612522303136e-05, "loss": 1.6486, "step": 4653 }, { "epoch": 0.48343201412693465, "grad_norm": 0.3747223913669586, "learning_rate": 5.2601318187063855e-05, "loss": 1.6641, "step": 4654 }, { "epoch": 0.48353588864651503, "grad_norm": 0.48334217071533203, "learning_rate": 5.258502357480445e-05, "loss": 1.5608, "step": 4655 }, { "epoch": 0.48363976316609536, "grad_norm": 0.4297388792037964, "learning_rate": 5.256872868726021e-05, "loss": 1.6617, "step": 4656 }, { "epoch": 0.4837436376856757, "grad_norm": 0.433788001537323, "learning_rate": 5.255243352616641e-05, "loss": 1.8598, "step": 4657 }, { "epoch": 0.48384751220525607, "grad_norm": 0.4389197528362274, "learning_rate": 5.253613809325834e-05, "loss": 1.8134, "step": 4658 }, { "epoch": 0.4839513867248364, "grad_norm": 0.39111611247062683, "learning_rate": 5.251984239027133e-05, "loss": 1.5018, "step": 4659 }, { "epoch": 0.4840552612444167, "grad_norm": 0.39344149827957153, "learning_rate": 5.2503546418940784e-05, "loss": 1.7079, "step": 4660 }, { "epoch": 0.4841591357639971, "grad_norm": 0.42502087354660034, "learning_rate": 5.248725018100207e-05, "loss": 1.7178, "step": 4661 }, { "epoch": 0.48426301028357743, "grad_norm": 0.4289288818836212, "learning_rate": 5.2470953678190615e-05, "loss": 1.455, "step": 4662 }, { "epoch": 0.48436688480315776, "grad_norm": 0.41218411922454834, "learning_rate": 5.2454656912241864e-05, "loss": 1.8562, "step": 4663 }, { "epoch": 0.48447075932273814, "grad_norm": 0.38502606749534607, "learning_rate": 5.2438359884891285e-05, "loss": 1.5738, "step": 4664 }, { "epoch": 0.48457463384231847, "grad_norm": 0.4311594069004059, "learning_rate": 5.242206259787441e-05, "loss": 1.69, "step": 4665 }, { "epoch": 0.48467850836189885, "grad_norm": 0.4154076874256134, "learning_rate": 5.2405765052926755e-05, "loss": 1.7648, "step": 4666 }, { "epoch": 0.4847823828814792, "grad_norm": 0.4217148721218109, "learning_rate": 5.23894672517839e-05, "loss": 1.6886, "step": 4667 }, { "epoch": 0.4848862574010595, "grad_norm": 0.39737677574157715, "learning_rate": 5.237316919618143e-05, "loss": 1.6023, "step": 4668 }, { "epoch": 0.4849901319206399, "grad_norm": 0.3839234709739685, "learning_rate": 5.2356870887854926e-05, "loss": 1.7436, "step": 4669 }, { "epoch": 0.4850940064402202, "grad_norm": 0.39807283878326416, "learning_rate": 5.234057232854006e-05, "loss": 1.7422, "step": 4670 }, { "epoch": 0.48519788095980054, "grad_norm": 0.40253087878227234, "learning_rate": 5.232427351997251e-05, "loss": 1.6793, "step": 4671 }, { "epoch": 0.4853017554793809, "grad_norm": 0.41623252630233765, "learning_rate": 5.2307974463887944e-05, "loss": 1.6998, "step": 4672 }, { "epoch": 0.48540562999896125, "grad_norm": 0.39853668212890625, "learning_rate": 5.229167516202211e-05, "loss": 1.7607, "step": 4673 }, { "epoch": 0.4855095045185416, "grad_norm": 0.433289110660553, "learning_rate": 5.2275375616110755e-05, "loss": 1.707, "step": 4674 }, { "epoch": 0.48561337903812196, "grad_norm": 0.37123411893844604, "learning_rate": 5.2259075827889644e-05, "loss": 1.765, "step": 4675 }, { "epoch": 0.4857172535577023, "grad_norm": 0.3944990634918213, "learning_rate": 5.224277579909458e-05, "loss": 1.6677, "step": 4676 }, { "epoch": 0.4858211280772826, "grad_norm": 0.38303902745246887, "learning_rate": 5.222647553146138e-05, "loss": 1.5869, "step": 4677 }, { "epoch": 0.485925002596863, "grad_norm": 0.40630918741226196, "learning_rate": 5.221017502672589e-05, "loss": 1.6596, "step": 4678 }, { "epoch": 0.4860288771164433, "grad_norm": 0.38045310974121094, "learning_rate": 5.219387428662402e-05, "loss": 1.75, "step": 4679 }, { "epoch": 0.4861327516360237, "grad_norm": 0.36957138776779175, "learning_rate": 5.2177573312891646e-05, "loss": 1.5169, "step": 4680 }, { "epoch": 0.48623662615560403, "grad_norm": 0.40403348207473755, "learning_rate": 5.21612721072647e-05, "loss": 1.8167, "step": 4681 }, { "epoch": 0.48634050067518436, "grad_norm": 0.4342941641807556, "learning_rate": 5.214497067147914e-05, "loss": 1.6114, "step": 4682 }, { "epoch": 0.48644437519476474, "grad_norm": 0.40665194392204285, "learning_rate": 5.212866900727093e-05, "loss": 1.6924, "step": 4683 }, { "epoch": 0.48654824971434507, "grad_norm": 0.397088885307312, "learning_rate": 5.211236711637607e-05, "loss": 1.6505, "step": 4684 }, { "epoch": 0.4866521242339254, "grad_norm": 0.4288082718849182, "learning_rate": 5.209606500053059e-05, "loss": 1.9258, "step": 4685 }, { "epoch": 0.4867559987535058, "grad_norm": 0.43021494150161743, "learning_rate": 5.207976266147053e-05, "loss": 1.7348, "step": 4686 }, { "epoch": 0.4868598732730861, "grad_norm": 0.4119536280632019, "learning_rate": 5.206346010093198e-05, "loss": 1.6395, "step": 4687 }, { "epoch": 0.48696374779266643, "grad_norm": 0.4928002655506134, "learning_rate": 5.204715732065103e-05, "loss": 1.846, "step": 4688 }, { "epoch": 0.4870676223122468, "grad_norm": 0.3708318769931793, "learning_rate": 5.203085432236379e-05, "loss": 1.4251, "step": 4689 }, { "epoch": 0.48717149683182714, "grad_norm": 0.3991853892803192, "learning_rate": 5.201455110780641e-05, "loss": 1.6869, "step": 4690 }, { "epoch": 0.4872753713514075, "grad_norm": 0.4395716190338135, "learning_rate": 5.199824767871503e-05, "loss": 1.8586, "step": 4691 }, { "epoch": 0.48737924587098785, "grad_norm": 0.4379446804523468, "learning_rate": 5.198194403682588e-05, "loss": 1.6928, "step": 4692 }, { "epoch": 0.4874831203905682, "grad_norm": 0.4161565601825714, "learning_rate": 5.1965640183875144e-05, "loss": 1.8182, "step": 4693 }, { "epoch": 0.48758699491014856, "grad_norm": 0.41332149505615234, "learning_rate": 5.194933612159906e-05, "loss": 1.8115, "step": 4694 }, { "epoch": 0.4876908694297289, "grad_norm": 0.38883647322654724, "learning_rate": 5.193303185173388e-05, "loss": 1.7725, "step": 4695 }, { "epoch": 0.4877947439493092, "grad_norm": 0.4268670976161957, "learning_rate": 5.191672737601591e-05, "loss": 1.823, "step": 4696 }, { "epoch": 0.4878986184688896, "grad_norm": 0.4016038775444031, "learning_rate": 5.1900422696181396e-05, "loss": 1.5653, "step": 4697 }, { "epoch": 0.4880024929884699, "grad_norm": 0.43578583002090454, "learning_rate": 5.1884117813966705e-05, "loss": 1.7653, "step": 4698 }, { "epoch": 0.48810636750805025, "grad_norm": 0.45143744349479675, "learning_rate": 5.186781273110816e-05, "loss": 1.7976, "step": 4699 }, { "epoch": 0.48821024202763064, "grad_norm": 0.415283739566803, "learning_rate": 5.1851507449342104e-05, "loss": 1.6952, "step": 4700 }, { "epoch": 0.48831411654721096, "grad_norm": 0.38803666830062866, "learning_rate": 5.183520197040498e-05, "loss": 1.6622, "step": 4701 }, { "epoch": 0.4884179910667913, "grad_norm": 0.400741845369339, "learning_rate": 5.1818896296033145e-05, "loss": 1.7436, "step": 4702 }, { "epoch": 0.4885218655863717, "grad_norm": 0.39219218492507935, "learning_rate": 5.180259042796306e-05, "loss": 1.6381, "step": 4703 }, { "epoch": 0.488625740105952, "grad_norm": 0.44362738728523254, "learning_rate": 5.178628436793115e-05, "loss": 1.8482, "step": 4704 }, { "epoch": 0.4887296146255324, "grad_norm": 0.4290272295475006, "learning_rate": 5.176997811767387e-05, "loss": 1.7622, "step": 4705 }, { "epoch": 0.4888334891451127, "grad_norm": 0.38011881709098816, "learning_rate": 5.175367167892774e-05, "loss": 1.7822, "step": 4706 }, { "epoch": 0.48893736366469304, "grad_norm": 0.4040461778640747, "learning_rate": 5.1737365053429254e-05, "loss": 1.4567, "step": 4707 }, { "epoch": 0.4890412381842734, "grad_norm": 0.40654054284095764, "learning_rate": 5.1721058242914935e-05, "loss": 1.7415, "step": 4708 }, { "epoch": 0.48914511270385375, "grad_norm": 0.3765581548213959, "learning_rate": 5.170475124912134e-05, "loss": 1.6306, "step": 4709 }, { "epoch": 0.4892489872234341, "grad_norm": 0.4085083305835724, "learning_rate": 5.168844407378506e-05, "loss": 1.6247, "step": 4710 }, { "epoch": 0.48935286174301446, "grad_norm": 0.4177039861679077, "learning_rate": 5.167213671864263e-05, "loss": 1.6903, "step": 4711 }, { "epoch": 0.4894567362625948, "grad_norm": 0.449349582195282, "learning_rate": 5.1655829185430685e-05, "loss": 1.6055, "step": 4712 }, { "epoch": 0.4895606107821751, "grad_norm": 0.4039447605609894, "learning_rate": 5.1639521475885854e-05, "loss": 1.6719, "step": 4713 }, { "epoch": 0.4896644853017555, "grad_norm": 0.39503759145736694, "learning_rate": 5.162321359174476e-05, "loss": 1.7133, "step": 4714 }, { "epoch": 0.4897683598213358, "grad_norm": 0.4664006531238556, "learning_rate": 5.16069055347441e-05, "loss": 1.805, "step": 4715 }, { "epoch": 0.48987223434091615, "grad_norm": 0.39584633708000183, "learning_rate": 5.1590597306620525e-05, "loss": 1.5628, "step": 4716 }, { "epoch": 0.48997610886049653, "grad_norm": 0.3739977478981018, "learning_rate": 5.1574288909110744e-05, "loss": 1.5699, "step": 4717 }, { "epoch": 0.49007998338007686, "grad_norm": 0.3966865539550781, "learning_rate": 5.155798034395147e-05, "loss": 1.7159, "step": 4718 }, { "epoch": 0.49018385789965724, "grad_norm": 0.48280176520347595, "learning_rate": 5.154167161287945e-05, "loss": 1.8649, "step": 4719 }, { "epoch": 0.49028773241923757, "grad_norm": 0.43992406129837036, "learning_rate": 5.152536271763143e-05, "loss": 1.7603, "step": 4720 }, { "epoch": 0.4903916069388179, "grad_norm": 0.40175652503967285, "learning_rate": 5.1509053659944175e-05, "loss": 1.664, "step": 4721 }, { "epoch": 0.4904954814583983, "grad_norm": 0.3840475082397461, "learning_rate": 5.149274444155446e-05, "loss": 1.5613, "step": 4722 }, { "epoch": 0.4905993559779786, "grad_norm": 0.42482373118400574, "learning_rate": 5.147643506419914e-05, "loss": 1.7978, "step": 4723 }, { "epoch": 0.49070323049755893, "grad_norm": 0.40053901076316833, "learning_rate": 5.146012552961499e-05, "loss": 1.736, "step": 4724 }, { "epoch": 0.4908071050171393, "grad_norm": 0.3966452479362488, "learning_rate": 5.1443815839538845e-05, "loss": 1.7367, "step": 4725 }, { "epoch": 0.49091097953671964, "grad_norm": 0.39208799600601196, "learning_rate": 5.1427505995707594e-05, "loss": 1.4424, "step": 4726 }, { "epoch": 0.49101485405629997, "grad_norm": 0.42873436212539673, "learning_rate": 5.1411195999858074e-05, "loss": 1.8207, "step": 4727 }, { "epoch": 0.49111872857588035, "grad_norm": 0.44238024950027466, "learning_rate": 5.1394885853727195e-05, "loss": 1.756, "step": 4728 }, { "epoch": 0.4912226030954607, "grad_norm": 0.38255882263183594, "learning_rate": 5.137857555905184e-05, "loss": 1.5362, "step": 4729 }, { "epoch": 0.49132647761504106, "grad_norm": 0.4030906558036804, "learning_rate": 5.1362265117568965e-05, "loss": 1.6924, "step": 4730 }, { "epoch": 0.4914303521346214, "grad_norm": 0.4658985733985901, "learning_rate": 5.134595453101547e-05, "loss": 1.7965, "step": 4731 }, { "epoch": 0.4915342266542017, "grad_norm": 0.3776882290840149, "learning_rate": 5.1329643801128324e-05, "loss": 1.5735, "step": 4732 }, { "epoch": 0.4916381011737821, "grad_norm": 0.3989698588848114, "learning_rate": 5.131333292964447e-05, "loss": 1.7203, "step": 4733 }, { "epoch": 0.4917419756933624, "grad_norm": 0.4176570177078247, "learning_rate": 5.12970219183009e-05, "loss": 1.7932, "step": 4734 }, { "epoch": 0.49184585021294275, "grad_norm": 0.42140617966651917, "learning_rate": 5.128071076883463e-05, "loss": 1.8202, "step": 4735 }, { "epoch": 0.49194972473252313, "grad_norm": 0.41681942343711853, "learning_rate": 5.126439948298264e-05, "loss": 1.6721, "step": 4736 }, { "epoch": 0.49205359925210346, "grad_norm": 0.465843141078949, "learning_rate": 5.124808806248199e-05, "loss": 1.6901, "step": 4737 }, { "epoch": 0.4921574737716838, "grad_norm": 0.41670602560043335, "learning_rate": 5.123177650906968e-05, "loss": 1.745, "step": 4738 }, { "epoch": 0.49226134829126417, "grad_norm": 0.415339857339859, "learning_rate": 5.1215464824482775e-05, "loss": 1.7013, "step": 4739 }, { "epoch": 0.4923652228108445, "grad_norm": 0.3894515931606293, "learning_rate": 5.119915301045836e-05, "loss": 1.5359, "step": 4740 }, { "epoch": 0.4924690973304248, "grad_norm": 0.37234368920326233, "learning_rate": 5.118284106873349e-05, "loss": 1.6351, "step": 4741 }, { "epoch": 0.4925729718500052, "grad_norm": 0.47200119495391846, "learning_rate": 5.116652900104527e-05, "loss": 1.9368, "step": 4742 }, { "epoch": 0.49267684636958553, "grad_norm": 0.430601567029953, "learning_rate": 5.1150216809130826e-05, "loss": 1.6977, "step": 4743 }, { "epoch": 0.4927807208891659, "grad_norm": 0.4601489305496216, "learning_rate": 5.113390449472726e-05, "loss": 1.8549, "step": 4744 }, { "epoch": 0.49288459540874624, "grad_norm": 0.42277416586875916, "learning_rate": 5.11175920595717e-05, "loss": 1.7991, "step": 4745 }, { "epoch": 0.49298846992832657, "grad_norm": 0.3993331789970398, "learning_rate": 5.1101279505401325e-05, "loss": 1.7089, "step": 4746 }, { "epoch": 0.49309234444790695, "grad_norm": 0.4037160277366638, "learning_rate": 5.108496683395324e-05, "loss": 1.6423, "step": 4747 }, { "epoch": 0.4931962189674873, "grad_norm": 0.3775770664215088, "learning_rate": 5.106865404696468e-05, "loss": 1.6201, "step": 4748 }, { "epoch": 0.4933000934870676, "grad_norm": 0.4403548836708069, "learning_rate": 5.1052341146172785e-05, "loss": 1.7299, "step": 4749 }, { "epoch": 0.493403968006648, "grad_norm": 0.4192941188812256, "learning_rate": 5.103602813331476e-05, "loss": 1.7541, "step": 4750 }, { "epoch": 0.4935078425262283, "grad_norm": 0.3859345018863678, "learning_rate": 5.101971501012785e-05, "loss": 1.3569, "step": 4751 }, { "epoch": 0.49361171704580864, "grad_norm": 0.4286668598651886, "learning_rate": 5.1003401778349224e-05, "loss": 1.8115, "step": 4752 }, { "epoch": 0.493715591565389, "grad_norm": 0.4297914206981659, "learning_rate": 5.0987088439716134e-05, "loss": 1.6486, "step": 4753 }, { "epoch": 0.49381946608496935, "grad_norm": 0.4609547555446625, "learning_rate": 5.097077499596583e-05, "loss": 1.8906, "step": 4754 }, { "epoch": 0.4939233406045497, "grad_norm": 0.3923378884792328, "learning_rate": 5.095446144883556e-05, "loss": 1.5989, "step": 4755 }, { "epoch": 0.49402721512413006, "grad_norm": 0.42669469118118286, "learning_rate": 5.0938147800062586e-05, "loss": 1.5584, "step": 4756 }, { "epoch": 0.4941310896437104, "grad_norm": 0.4073004126548767, "learning_rate": 5.092183405138419e-05, "loss": 1.6068, "step": 4757 }, { "epoch": 0.49423496416329077, "grad_norm": 0.4162704050540924, "learning_rate": 5.090552020453766e-05, "loss": 1.8445, "step": 4758 }, { "epoch": 0.4943388386828711, "grad_norm": 0.45715436339378357, "learning_rate": 5.08892062612603e-05, "loss": 1.8761, "step": 4759 }, { "epoch": 0.4944427132024514, "grad_norm": 0.38431254029273987, "learning_rate": 5.08728922232894e-05, "loss": 1.5639, "step": 4760 }, { "epoch": 0.4945465877220318, "grad_norm": 0.4815351963043213, "learning_rate": 5.0856578092362285e-05, "loss": 1.7764, "step": 4761 }, { "epoch": 0.49465046224161213, "grad_norm": 0.41645729541778564, "learning_rate": 5.084026387021629e-05, "loss": 1.6924, "step": 4762 }, { "epoch": 0.49475433676119246, "grad_norm": 0.4079589247703552, "learning_rate": 5.0823949558588746e-05, "loss": 1.7549, "step": 4763 }, { "epoch": 0.49485821128077284, "grad_norm": 0.40971922874450684, "learning_rate": 5.0807635159216993e-05, "loss": 1.5389, "step": 4764 }, { "epoch": 0.49496208580035317, "grad_norm": 0.41493481397628784, "learning_rate": 5.079132067383842e-05, "loss": 1.7683, "step": 4765 }, { "epoch": 0.4950659603199335, "grad_norm": 0.3941478729248047, "learning_rate": 5.077500610419036e-05, "loss": 1.7123, "step": 4766 }, { "epoch": 0.4951698348395139, "grad_norm": 0.4111056327819824, "learning_rate": 5.0758691452010175e-05, "loss": 1.7361, "step": 4767 }, { "epoch": 0.4952737093590942, "grad_norm": 0.4429363012313843, "learning_rate": 5.0742376719035276e-05, "loss": 1.8205, "step": 4768 }, { "epoch": 0.4953775838786746, "grad_norm": 0.3898932635784149, "learning_rate": 5.0726061907003045e-05, "loss": 1.6695, "step": 4769 }, { "epoch": 0.4954814583982549, "grad_norm": 0.43110302090644836, "learning_rate": 5.070974701765089e-05, "loss": 1.6963, "step": 4770 }, { "epoch": 0.49558533291783524, "grad_norm": 0.4207279682159424, "learning_rate": 5.069343205271622e-05, "loss": 1.5213, "step": 4771 }, { "epoch": 0.4956892074374156, "grad_norm": 0.423676073551178, "learning_rate": 5.067711701393644e-05, "loss": 1.5632, "step": 4772 }, { "epoch": 0.49579308195699595, "grad_norm": 0.39001235365867615, "learning_rate": 5.066080190304898e-05, "loss": 1.717, "step": 4773 }, { "epoch": 0.4958969564765763, "grad_norm": 0.3835489749908447, "learning_rate": 5.0644486721791273e-05, "loss": 1.7107, "step": 4774 }, { "epoch": 0.49600083099615666, "grad_norm": 0.4401542842388153, "learning_rate": 5.0628171471900756e-05, "loss": 1.8646, "step": 4775 }, { "epoch": 0.496104705515737, "grad_norm": 0.38474228978157043, "learning_rate": 5.0611856155114876e-05, "loss": 1.6334, "step": 4776 }, { "epoch": 0.4962085800353173, "grad_norm": 0.4321017563343048, "learning_rate": 5.059554077317109e-05, "loss": 1.7811, "step": 4777 }, { "epoch": 0.4963124545548977, "grad_norm": 0.38683265447616577, "learning_rate": 5.0579225327806847e-05, "loss": 1.6125, "step": 4778 }, { "epoch": 0.496416329074478, "grad_norm": 0.3820945918560028, "learning_rate": 5.0562909820759654e-05, "loss": 1.554, "step": 4779 }, { "epoch": 0.49652020359405835, "grad_norm": 0.38485878705978394, "learning_rate": 5.054659425376693e-05, "loss": 1.6951, "step": 4780 }, { "epoch": 0.49662407811363873, "grad_norm": 0.40460067987442017, "learning_rate": 5.0530278628566175e-05, "loss": 1.7489, "step": 4781 }, { "epoch": 0.49672795263321906, "grad_norm": 0.4411293566226959, "learning_rate": 5.05139629468949e-05, "loss": 1.7431, "step": 4782 }, { "epoch": 0.49683182715279944, "grad_norm": 0.41115111112594604, "learning_rate": 5.0497647210490565e-05, "loss": 1.5739, "step": 4783 }, { "epoch": 0.49693570167237977, "grad_norm": 0.4054515063762665, "learning_rate": 5.048133142109069e-05, "loss": 1.6096, "step": 4784 }, { "epoch": 0.4970395761919601, "grad_norm": 0.45801469683647156, "learning_rate": 5.046501558043278e-05, "loss": 1.8216, "step": 4785 }, { "epoch": 0.4971434507115405, "grad_norm": 0.3939175009727478, "learning_rate": 5.044869969025434e-05, "loss": 1.6871, "step": 4786 }, { "epoch": 0.4972473252311208, "grad_norm": 0.38540032505989075, "learning_rate": 5.043238375229289e-05, "loss": 1.6358, "step": 4787 }, { "epoch": 0.49735119975070113, "grad_norm": 0.4039050340652466, "learning_rate": 5.0416067768285924e-05, "loss": 1.6841, "step": 4788 }, { "epoch": 0.4974550742702815, "grad_norm": 0.3593752384185791, "learning_rate": 5.039975173997099e-05, "loss": 1.4662, "step": 4789 }, { "epoch": 0.49755894878986184, "grad_norm": 0.4602617919445038, "learning_rate": 5.038343566908562e-05, "loss": 1.8741, "step": 4790 }, { "epoch": 0.49766282330944217, "grad_norm": 0.39395564794540405, "learning_rate": 5.036711955736734e-05, "loss": 1.6235, "step": 4791 }, { "epoch": 0.49776669782902255, "grad_norm": 0.3926306664943695, "learning_rate": 5.0350803406553684e-05, "loss": 1.4756, "step": 4792 }, { "epoch": 0.4978705723486029, "grad_norm": 0.4372478723526001, "learning_rate": 5.033448721838222e-05, "loss": 1.6785, "step": 4793 }, { "epoch": 0.4979744468681832, "grad_norm": 0.41775140166282654, "learning_rate": 5.0318170994590464e-05, "loss": 1.6019, "step": 4794 }, { "epoch": 0.4980783213877636, "grad_norm": 0.4097503423690796, "learning_rate": 5.030185473691597e-05, "loss": 1.8253, "step": 4795 }, { "epoch": 0.4981821959073439, "grad_norm": 0.38628101348876953, "learning_rate": 5.0285538447096315e-05, "loss": 1.6124, "step": 4796 }, { "epoch": 0.4982860704269243, "grad_norm": 0.4230360984802246, "learning_rate": 5.026922212686902e-05, "loss": 1.67, "step": 4797 }, { "epoch": 0.4983899449465046, "grad_norm": 0.40452295541763306, "learning_rate": 5.025290577797169e-05, "loss": 1.6923, "step": 4798 }, { "epoch": 0.49849381946608495, "grad_norm": 0.38569211959838867, "learning_rate": 5.0236589402141864e-05, "loss": 1.585, "step": 4799 }, { "epoch": 0.49859769398566534, "grad_norm": 0.4305466413497925, "learning_rate": 5.022027300111711e-05, "loss": 1.4631, "step": 4800 }, { "epoch": 0.49870156850524566, "grad_norm": 0.42307230830192566, "learning_rate": 5.0203956576635015e-05, "loss": 1.6969, "step": 4801 }, { "epoch": 0.498805443024826, "grad_norm": 0.44654223322868347, "learning_rate": 5.018764013043312e-05, "loss": 1.8514, "step": 4802 }, { "epoch": 0.49890931754440637, "grad_norm": 0.4396657943725586, "learning_rate": 5.0171323664249005e-05, "loss": 1.7264, "step": 4803 }, { "epoch": 0.4990131920639867, "grad_norm": 0.39395204186439514, "learning_rate": 5.0155007179820266e-05, "loss": 1.6963, "step": 4804 }, { "epoch": 0.499117066583567, "grad_norm": 0.4187118411064148, "learning_rate": 5.013869067888448e-05, "loss": 1.7839, "step": 4805 }, { "epoch": 0.4992209411031474, "grad_norm": 0.40489673614501953, "learning_rate": 5.01223741631792e-05, "loss": 1.6467, "step": 4806 }, { "epoch": 0.49932481562272774, "grad_norm": 0.41493701934814453, "learning_rate": 5.010605763444205e-05, "loss": 1.8405, "step": 4807 }, { "epoch": 0.4994286901423081, "grad_norm": 0.4441494643688202, "learning_rate": 5.008974109441057e-05, "loss": 1.6736, "step": 4808 }, { "epoch": 0.49953256466188845, "grad_norm": 0.38821157813072205, "learning_rate": 5.007342454482238e-05, "loss": 1.6041, "step": 4809 }, { "epoch": 0.49963643918146877, "grad_norm": 0.4361104965209961, "learning_rate": 5.005710798741503e-05, "loss": 1.7678, "step": 4810 }, { "epoch": 0.49974031370104915, "grad_norm": 0.39289024472236633, "learning_rate": 5.0040791423926125e-05, "loss": 1.5475, "step": 4811 }, { "epoch": 0.4998441882206295, "grad_norm": 0.35669323801994324, "learning_rate": 5.002447485609327e-05, "loss": 1.4978, "step": 4812 }, { "epoch": 0.4999480627402098, "grad_norm": 0.38865354657173157, "learning_rate": 5.0008158285654027e-05, "loss": 1.7025, "step": 4813 }, { "epoch": 0.5000519372597901, "grad_norm": 0.42419496178627014, "learning_rate": 4.9991841714345985e-05, "loss": 1.7601, "step": 4814 }, { "epoch": 0.5001558117793705, "grad_norm": 0.39531058073043823, "learning_rate": 4.997552514390674e-05, "loss": 1.5614, "step": 4815 }, { "epoch": 0.5002596862989509, "grad_norm": 0.415740430355072, "learning_rate": 4.9959208576073866e-05, "loss": 1.7504, "step": 4816 }, { "epoch": 0.5003635608185312, "grad_norm": 0.4087817966938019, "learning_rate": 4.994289201258498e-05, "loss": 1.6628, "step": 4817 }, { "epoch": 0.5004674353381116, "grad_norm": 0.39313584566116333, "learning_rate": 4.992657545517764e-05, "loss": 1.7745, "step": 4818 }, { "epoch": 0.5005713098576919, "grad_norm": 0.3950080871582031, "learning_rate": 4.9910258905589427e-05, "loss": 1.6339, "step": 4819 }, { "epoch": 0.5006751843772722, "grad_norm": 0.37852367758750916, "learning_rate": 4.989394236555797e-05, "loss": 1.6569, "step": 4820 }, { "epoch": 0.5007790588968526, "grad_norm": 0.43450725078582764, "learning_rate": 4.9877625836820806e-05, "loss": 1.602, "step": 4821 }, { "epoch": 0.500882933416433, "grad_norm": 0.42665791511535645, "learning_rate": 4.986130932111553e-05, "loss": 1.6783, "step": 4822 }, { "epoch": 0.5009868079360132, "grad_norm": 0.396518737077713, "learning_rate": 4.984499282017975e-05, "loss": 1.6118, "step": 4823 }, { "epoch": 0.5010906824555936, "grad_norm": 0.4622015655040741, "learning_rate": 4.9828676335751006e-05, "loss": 1.8231, "step": 4824 }, { "epoch": 0.501194556975174, "grad_norm": 0.3886905908584595, "learning_rate": 4.9812359869566896e-05, "loss": 1.667, "step": 4825 }, { "epoch": 0.5012984314947543, "grad_norm": 0.4054078161716461, "learning_rate": 4.979604342336501e-05, "loss": 1.6415, "step": 4826 }, { "epoch": 0.5014023060143347, "grad_norm": 0.38207703828811646, "learning_rate": 4.977972699888289e-05, "loss": 1.7066, "step": 4827 }, { "epoch": 0.501506180533915, "grad_norm": 0.43799397349357605, "learning_rate": 4.976341059785815e-05, "loss": 1.6469, "step": 4828 }, { "epoch": 0.5016100550534954, "grad_norm": 0.5063971281051636, "learning_rate": 4.974709422202831e-05, "loss": 1.9436, "step": 4829 }, { "epoch": 0.5017139295730757, "grad_norm": 0.39013898372650146, "learning_rate": 4.973077787313099e-05, "loss": 1.5765, "step": 4830 }, { "epoch": 0.5018178040926561, "grad_norm": 0.4318859875202179, "learning_rate": 4.9714461552903704e-05, "loss": 1.8306, "step": 4831 }, { "epoch": 0.5019216786122365, "grad_norm": 0.4619205892086029, "learning_rate": 4.9698145263084036e-05, "loss": 1.7797, "step": 4832 }, { "epoch": 0.5020255531318167, "grad_norm": 0.3963909447193146, "learning_rate": 4.968182900540956e-05, "loss": 1.646, "step": 4833 }, { "epoch": 0.5021294276513971, "grad_norm": 0.3879808187484741, "learning_rate": 4.966551278161779e-05, "loss": 1.6855, "step": 4834 }, { "epoch": 0.5022333021709775, "grad_norm": 0.393045037984848, "learning_rate": 4.964919659344632e-05, "loss": 1.678, "step": 4835 }, { "epoch": 0.5023371766905578, "grad_norm": 0.38027775287628174, "learning_rate": 4.963288044263268e-05, "loss": 1.6203, "step": 4836 }, { "epoch": 0.5024410512101382, "grad_norm": 0.4136490821838379, "learning_rate": 4.96165643309144e-05, "loss": 1.772, "step": 4837 }, { "epoch": 0.5025449257297185, "grad_norm": 0.3826773762702942, "learning_rate": 4.9600248260029016e-05, "loss": 1.8326, "step": 4838 }, { "epoch": 0.5026488002492988, "grad_norm": 0.4194203317165375, "learning_rate": 4.9583932231714095e-05, "loss": 1.5652, "step": 4839 }, { "epoch": 0.5027526747688792, "grad_norm": 0.4387541115283966, "learning_rate": 4.956761624770713e-05, "loss": 1.8814, "step": 4840 }, { "epoch": 0.5028565492884596, "grad_norm": 0.3808096945285797, "learning_rate": 4.9551300309745666e-05, "loss": 1.748, "step": 4841 }, { "epoch": 0.5029604238080398, "grad_norm": 0.4226476550102234, "learning_rate": 4.953498441956723e-05, "loss": 1.6055, "step": 4842 }, { "epoch": 0.5030642983276202, "grad_norm": 0.4065408706665039, "learning_rate": 4.951866857890931e-05, "loss": 1.7516, "step": 4843 }, { "epoch": 0.5031681728472006, "grad_norm": 0.3814634084701538, "learning_rate": 4.950235278950943e-05, "loss": 1.521, "step": 4844 }, { "epoch": 0.5032720473667809, "grad_norm": 0.4002365469932556, "learning_rate": 4.9486037053105125e-05, "loss": 1.7387, "step": 4845 }, { "epoch": 0.5033759218863613, "grad_norm": 0.39226168394088745, "learning_rate": 4.946972137143383e-05, "loss": 1.6565, "step": 4846 }, { "epoch": 0.5034797964059416, "grad_norm": 0.425890177488327, "learning_rate": 4.9453405746233076e-05, "loss": 1.7404, "step": 4847 }, { "epoch": 0.5035836709255219, "grad_norm": 0.41715189814567566, "learning_rate": 4.943709017924038e-05, "loss": 1.7237, "step": 4848 }, { "epoch": 0.5036875454451023, "grad_norm": 0.4025704860687256, "learning_rate": 4.9420774672193165e-05, "loss": 1.4753, "step": 4849 }, { "epoch": 0.5037914199646827, "grad_norm": 0.4412018060684204, "learning_rate": 4.940445922682892e-05, "loss": 1.9151, "step": 4850 }, { "epoch": 0.503895294484263, "grad_norm": 0.3976673483848572, "learning_rate": 4.938814384488515e-05, "loss": 1.707, "step": 4851 }, { "epoch": 0.5039991690038433, "grad_norm": 0.40271249413490295, "learning_rate": 4.937182852809926e-05, "loss": 1.658, "step": 4852 }, { "epoch": 0.5041030435234237, "grad_norm": 0.4666357934474945, "learning_rate": 4.935551327820873e-05, "loss": 1.8472, "step": 4853 }, { "epoch": 0.5042069180430041, "grad_norm": 0.42836159467697144, "learning_rate": 4.933919809695103e-05, "loss": 1.7557, "step": 4854 }, { "epoch": 0.5043107925625844, "grad_norm": 0.44381582736968994, "learning_rate": 4.9322882986063566e-05, "loss": 1.7145, "step": 4855 }, { "epoch": 0.5044146670821648, "grad_norm": 0.4695770740509033, "learning_rate": 4.9306567947283785e-05, "loss": 1.5709, "step": 4856 }, { "epoch": 0.5045185416017451, "grad_norm": 0.40107208490371704, "learning_rate": 4.92902529823491e-05, "loss": 1.6956, "step": 4857 }, { "epoch": 0.5046224161213254, "grad_norm": 0.39944395422935486, "learning_rate": 4.927393809299696e-05, "loss": 1.6442, "step": 4858 }, { "epoch": 0.5047262906409058, "grad_norm": 0.42077764868736267, "learning_rate": 4.925762328096473e-05, "loss": 1.7561, "step": 4859 }, { "epoch": 0.5048301651604862, "grad_norm": 0.4306979477405548, "learning_rate": 4.924130854798983e-05, "loss": 1.6888, "step": 4860 }, { "epoch": 0.5049340396800664, "grad_norm": 0.41281387209892273, "learning_rate": 4.922499389580967e-05, "loss": 1.8061, "step": 4861 }, { "epoch": 0.5050379141996468, "grad_norm": 0.4295770227909088, "learning_rate": 4.920867932616159e-05, "loss": 1.7401, "step": 4862 }, { "epoch": 0.5051417887192272, "grad_norm": 0.37192273139953613, "learning_rate": 4.9192364840783e-05, "loss": 1.5993, "step": 4863 }, { "epoch": 0.5052456632388075, "grad_norm": 0.4081692695617676, "learning_rate": 4.917605044141127e-05, "loss": 1.7359, "step": 4864 }, { "epoch": 0.5053495377583879, "grad_norm": 0.40556600689888, "learning_rate": 4.915973612978372e-05, "loss": 1.6507, "step": 4865 }, { "epoch": 0.5054534122779683, "grad_norm": 0.41496628522872925, "learning_rate": 4.914342190763771e-05, "loss": 1.6881, "step": 4866 }, { "epoch": 0.5055572867975485, "grad_norm": 0.3876533806324005, "learning_rate": 4.912710777671062e-05, "loss": 1.5866, "step": 4867 }, { "epoch": 0.5056611613171289, "grad_norm": 0.3975103795528412, "learning_rate": 4.9110793738739715e-05, "loss": 1.7478, "step": 4868 }, { "epoch": 0.5057650358367093, "grad_norm": 0.37800195813179016, "learning_rate": 4.909447979546235e-05, "loss": 1.6304, "step": 4869 }, { "epoch": 0.5058689103562896, "grad_norm": 0.3874056041240692, "learning_rate": 4.907816594861582e-05, "loss": 1.6151, "step": 4870 }, { "epoch": 0.5059727848758699, "grad_norm": 0.42271357774734497, "learning_rate": 4.906185219993743e-05, "loss": 1.7042, "step": 4871 }, { "epoch": 0.5060766593954503, "grad_norm": 0.44538614153862, "learning_rate": 4.904553855116445e-05, "loss": 1.7608, "step": 4872 }, { "epoch": 0.5061805339150306, "grad_norm": 0.41812676191329956, "learning_rate": 4.90292250040342e-05, "loss": 1.7, "step": 4873 }, { "epoch": 0.506284408434611, "grad_norm": 0.3845331370830536, "learning_rate": 4.901291156028388e-05, "loss": 1.5532, "step": 4874 }, { "epoch": 0.5063882829541914, "grad_norm": 0.4065440595149994, "learning_rate": 4.899659822165079e-05, "loss": 1.7019, "step": 4875 }, { "epoch": 0.5064921574737716, "grad_norm": 0.42397770285606384, "learning_rate": 4.898028498987217e-05, "loss": 1.7414, "step": 4876 }, { "epoch": 0.506596031993352, "grad_norm": 0.38539576530456543, "learning_rate": 4.8963971866685245e-05, "loss": 1.7377, "step": 4877 }, { "epoch": 0.5066999065129324, "grad_norm": 0.373605877161026, "learning_rate": 4.894765885382722e-05, "loss": 1.6698, "step": 4878 }, { "epoch": 0.5068037810325128, "grad_norm": 0.36256158351898193, "learning_rate": 4.8931345953035326e-05, "loss": 1.5029, "step": 4879 }, { "epoch": 0.506907655552093, "grad_norm": 0.39593711495399475, "learning_rate": 4.891503316604677e-05, "loss": 1.7055, "step": 4880 }, { "epoch": 0.5070115300716734, "grad_norm": 0.3991319239139557, "learning_rate": 4.889872049459869e-05, "loss": 1.6002, "step": 4881 }, { "epoch": 0.5071154045912538, "grad_norm": 0.3885091543197632, "learning_rate": 4.88824079404283e-05, "loss": 1.6086, "step": 4882 }, { "epoch": 0.5072192791108341, "grad_norm": 0.41479891538619995, "learning_rate": 4.8866095505272754e-05, "loss": 1.4958, "step": 4883 }, { "epoch": 0.5073231536304145, "grad_norm": 0.4320788085460663, "learning_rate": 4.8849783190869186e-05, "loss": 1.6549, "step": 4884 }, { "epoch": 0.5074270281499949, "grad_norm": 0.49210458993911743, "learning_rate": 4.883347099895473e-05, "loss": 1.9594, "step": 4885 }, { "epoch": 0.5075309026695751, "grad_norm": 0.38703057169914246, "learning_rate": 4.8817158931266534e-05, "loss": 1.5258, "step": 4886 }, { "epoch": 0.5076347771891555, "grad_norm": 0.387260764837265, "learning_rate": 4.880084698954166e-05, "loss": 1.5779, "step": 4887 }, { "epoch": 0.5077386517087359, "grad_norm": 0.42436152696609497, "learning_rate": 4.878453517551722e-05, "loss": 1.874, "step": 4888 }, { "epoch": 0.5078425262283162, "grad_norm": 0.3863707184791565, "learning_rate": 4.876822349093034e-05, "loss": 1.4991, "step": 4889 }, { "epoch": 0.5079464007478965, "grad_norm": 0.40059852600097656, "learning_rate": 4.875191193751802e-05, "loss": 1.5891, "step": 4890 }, { "epoch": 0.5080502752674769, "grad_norm": 0.45580723881721497, "learning_rate": 4.873560051701736e-05, "loss": 1.8611, "step": 4891 }, { "epoch": 0.5081541497870572, "grad_norm": 0.3890886902809143, "learning_rate": 4.871928923116539e-05, "loss": 1.5606, "step": 4892 }, { "epoch": 0.5082580243066376, "grad_norm": 0.42675158381462097, "learning_rate": 4.8702978081699104e-05, "loss": 1.7001, "step": 4893 }, { "epoch": 0.508361898826218, "grad_norm": 0.44091951847076416, "learning_rate": 4.868666707035554e-05, "loss": 1.8272, "step": 4894 }, { "epoch": 0.5084657733457982, "grad_norm": 0.3957894444465637, "learning_rate": 4.86703561988717e-05, "loss": 1.8023, "step": 4895 }, { "epoch": 0.5085696478653786, "grad_norm": 0.37731900811195374, "learning_rate": 4.865404546898454e-05, "loss": 1.6024, "step": 4896 }, { "epoch": 0.508673522384959, "grad_norm": 0.41856059432029724, "learning_rate": 4.863773488243105e-05, "loss": 1.6815, "step": 4897 }, { "epoch": 0.5087773969045393, "grad_norm": 0.45765843987464905, "learning_rate": 4.862142444094816e-05, "loss": 1.8893, "step": 4898 }, { "epoch": 0.5088812714241197, "grad_norm": 0.4164111018180847, "learning_rate": 4.8605114146272824e-05, "loss": 1.6191, "step": 4899 }, { "epoch": 0.5089851459437, "grad_norm": 0.4314156770706177, "learning_rate": 4.858880400014194e-05, "loss": 1.9152, "step": 4900 }, { "epoch": 0.5090890204632803, "grad_norm": 0.42690524458885193, "learning_rate": 4.857249400429244e-05, "loss": 1.787, "step": 4901 }, { "epoch": 0.5091928949828607, "grad_norm": 0.3987329602241516, "learning_rate": 4.8556184160461167e-05, "loss": 1.6278, "step": 4902 }, { "epoch": 0.5092967695024411, "grad_norm": 0.427369624376297, "learning_rate": 4.8539874470385024e-05, "loss": 1.8381, "step": 4903 }, { "epoch": 0.5094006440220213, "grad_norm": 0.39449864625930786, "learning_rate": 4.852356493580088e-05, "loss": 1.7702, "step": 4904 }, { "epoch": 0.5095045185416017, "grad_norm": 0.4443662464618683, "learning_rate": 4.850725555844555e-05, "loss": 1.8187, "step": 4905 }, { "epoch": 0.5096083930611821, "grad_norm": 0.42031386494636536, "learning_rate": 4.8490946340055837e-05, "loss": 1.7612, "step": 4906 }, { "epoch": 0.5097122675807625, "grad_norm": 0.44448190927505493, "learning_rate": 4.8474637282368577e-05, "loss": 1.6873, "step": 4907 }, { "epoch": 0.5098161421003428, "grad_norm": 0.4955507516860962, "learning_rate": 4.8458328387120565e-05, "loss": 2.1091, "step": 4908 }, { "epoch": 0.5099200166199231, "grad_norm": 0.3980197012424469, "learning_rate": 4.844201965604853e-05, "loss": 1.6583, "step": 4909 }, { "epoch": 0.5100238911395035, "grad_norm": 0.41697457432746887, "learning_rate": 4.842571109088927e-05, "loss": 1.5176, "step": 4910 }, { "epoch": 0.5101277656590838, "grad_norm": 0.412379652261734, "learning_rate": 4.840940269337949e-05, "loss": 1.5699, "step": 4911 }, { "epoch": 0.5102316401786642, "grad_norm": 0.420980840921402, "learning_rate": 4.839309446525592e-05, "loss": 1.6266, "step": 4912 }, { "epoch": 0.5103355146982446, "grad_norm": 0.4162856340408325, "learning_rate": 4.837678640825524e-05, "loss": 1.6759, "step": 4913 }, { "epoch": 0.5104393892178248, "grad_norm": 0.4442167580127716, "learning_rate": 4.836047852411417e-05, "loss": 1.7818, "step": 4914 }, { "epoch": 0.5105432637374052, "grad_norm": 0.41404595971107483, "learning_rate": 4.834417081456933e-05, "loss": 1.7876, "step": 4915 }, { "epoch": 0.5106471382569856, "grad_norm": 0.4493081271648407, "learning_rate": 4.8327863281357376e-05, "loss": 1.9106, "step": 4916 }, { "epoch": 0.5107510127765659, "grad_norm": 0.379351407289505, "learning_rate": 4.8311555926214966e-05, "loss": 1.6729, "step": 4917 }, { "epoch": 0.5108548872961463, "grad_norm": 0.40297406911849976, "learning_rate": 4.829524875087867e-05, "loss": 1.7575, "step": 4918 }, { "epoch": 0.5109587618157266, "grad_norm": 0.4127398133277893, "learning_rate": 4.8278941757085063e-05, "loss": 1.5132, "step": 4919 }, { "epoch": 0.5110626363353069, "grad_norm": 0.38783156871795654, "learning_rate": 4.8262634946570764e-05, "loss": 1.6706, "step": 4920 }, { "epoch": 0.5111665108548873, "grad_norm": 0.39855116605758667, "learning_rate": 4.824632832107227e-05, "loss": 1.7722, "step": 4921 }, { "epoch": 0.5112703853744677, "grad_norm": 0.38658004999160767, "learning_rate": 4.8230021882326135e-05, "loss": 1.7226, "step": 4922 }, { "epoch": 0.5113742598940479, "grad_norm": 0.4114481210708618, "learning_rate": 4.821371563206888e-05, "loss": 1.7858, "step": 4923 }, { "epoch": 0.5114781344136283, "grad_norm": 0.3694150745868683, "learning_rate": 4.819740957203696e-05, "loss": 1.4821, "step": 4924 }, { "epoch": 0.5115820089332087, "grad_norm": 0.40538889169692993, "learning_rate": 4.818110370396686e-05, "loss": 1.6467, "step": 4925 }, { "epoch": 0.511685883452789, "grad_norm": 0.4382588863372803, "learning_rate": 4.816479802959503e-05, "loss": 1.6146, "step": 4926 }, { "epoch": 0.5117897579723694, "grad_norm": 0.41980645060539246, "learning_rate": 4.81484925506579e-05, "loss": 1.6783, "step": 4927 }, { "epoch": 0.5118936324919497, "grad_norm": 0.413632333278656, "learning_rate": 4.813218726889186e-05, "loss": 1.749, "step": 4928 }, { "epoch": 0.51199750701153, "grad_norm": 0.40364977717399597, "learning_rate": 4.8115882186033326e-05, "loss": 1.5492, "step": 4929 }, { "epoch": 0.5121013815311104, "grad_norm": 0.3991188406944275, "learning_rate": 4.8099577303818616e-05, "loss": 1.7461, "step": 4930 }, { "epoch": 0.5122052560506908, "grad_norm": 0.4893375337123871, "learning_rate": 4.8083272623984104e-05, "loss": 1.9981, "step": 4931 }, { "epoch": 0.5123091305702712, "grad_norm": 0.40781837701797485, "learning_rate": 4.8066968148266116e-05, "loss": 1.7219, "step": 4932 }, { "epoch": 0.5124130050898514, "grad_norm": 0.39397352933883667, "learning_rate": 4.805066387840096e-05, "loss": 1.6126, "step": 4933 }, { "epoch": 0.5125168796094318, "grad_norm": 0.41246309876441956, "learning_rate": 4.803435981612486e-05, "loss": 1.7436, "step": 4934 }, { "epoch": 0.5126207541290122, "grad_norm": 0.3805597424507141, "learning_rate": 4.801805596317412e-05, "loss": 1.6401, "step": 4935 }, { "epoch": 0.5127246286485925, "grad_norm": 0.404367059469223, "learning_rate": 4.800175232128498e-05, "loss": 1.6504, "step": 4936 }, { "epoch": 0.5128285031681729, "grad_norm": 0.3783699870109558, "learning_rate": 4.79854488921936e-05, "loss": 1.6495, "step": 4937 }, { "epoch": 0.5129323776877532, "grad_norm": 0.48037418723106384, "learning_rate": 4.7969145677636215e-05, "loss": 1.6088, "step": 4938 }, { "epoch": 0.5130362522073335, "grad_norm": 0.4683350622653961, "learning_rate": 4.795284267934898e-05, "loss": 1.8945, "step": 4939 }, { "epoch": 0.5131401267269139, "grad_norm": 0.4192398488521576, "learning_rate": 4.793653989906802e-05, "loss": 1.8459, "step": 4940 }, { "epoch": 0.5132440012464943, "grad_norm": 0.3951408565044403, "learning_rate": 4.792023733852947e-05, "loss": 1.6314, "step": 4941 }, { "epoch": 0.5133478757660745, "grad_norm": 0.38078486919403076, "learning_rate": 4.790393499946943e-05, "loss": 1.6349, "step": 4942 }, { "epoch": 0.5134517502856549, "grad_norm": 0.4152866005897522, "learning_rate": 4.788763288362394e-05, "loss": 1.7195, "step": 4943 }, { "epoch": 0.5135556248052353, "grad_norm": 0.37676650285720825, "learning_rate": 4.787133099272908e-05, "loss": 1.5917, "step": 4944 }, { "epoch": 0.5136594993248156, "grad_norm": 0.3816780149936676, "learning_rate": 4.7855029328520876e-05, "loss": 1.6664, "step": 4945 }, { "epoch": 0.513763373844396, "grad_norm": 0.3960273265838623, "learning_rate": 4.7838727892735315e-05, "loss": 1.6255, "step": 4946 }, { "epoch": 0.5138672483639763, "grad_norm": 0.41314899921417236, "learning_rate": 4.782242668710836e-05, "loss": 1.7316, "step": 4947 }, { "epoch": 0.5139711228835566, "grad_norm": 0.3930761516094208, "learning_rate": 4.7806125713376e-05, "loss": 1.6513, "step": 4948 }, { "epoch": 0.514074997403137, "grad_norm": 0.4532455503940582, "learning_rate": 4.7789824973274114e-05, "loss": 1.6415, "step": 4949 }, { "epoch": 0.5141788719227174, "grad_norm": 0.4238525927066803, "learning_rate": 4.7773524468538624e-05, "loss": 1.8161, "step": 4950 }, { "epoch": 0.5142827464422977, "grad_norm": 0.40888962149620056, "learning_rate": 4.7757224200905444e-05, "loss": 1.7179, "step": 4951 }, { "epoch": 0.514386620961878, "grad_norm": 0.38617759943008423, "learning_rate": 4.774092417211037e-05, "loss": 1.6069, "step": 4952 }, { "epoch": 0.5144904954814584, "grad_norm": 0.41479814052581787, "learning_rate": 4.7724624383889256e-05, "loss": 1.7158, "step": 4953 }, { "epoch": 0.5145943700010387, "grad_norm": 0.38144171237945557, "learning_rate": 4.770832483797789e-05, "loss": 1.6917, "step": 4954 }, { "epoch": 0.5146982445206191, "grad_norm": 0.41436558961868286, "learning_rate": 4.769202553611206e-05, "loss": 1.817, "step": 4955 }, { "epoch": 0.5148021190401995, "grad_norm": 0.43558332324028015, "learning_rate": 4.76757264800275e-05, "loss": 1.7038, "step": 4956 }, { "epoch": 0.5149059935597798, "grad_norm": 0.3971516788005829, "learning_rate": 4.765942767145994e-05, "loss": 1.6663, "step": 4957 }, { "epoch": 0.5150098680793601, "grad_norm": 0.39739498496055603, "learning_rate": 4.764312911214509e-05, "loss": 1.5594, "step": 4958 }, { "epoch": 0.5151137425989405, "grad_norm": 0.408383846282959, "learning_rate": 4.762683080381859e-05, "loss": 1.7743, "step": 4959 }, { "epoch": 0.5152176171185209, "grad_norm": 0.39290642738342285, "learning_rate": 4.761053274821611e-05, "loss": 1.6531, "step": 4960 }, { "epoch": 0.5153214916381011, "grad_norm": 0.46999290585517883, "learning_rate": 4.7594234947073256e-05, "loss": 1.8683, "step": 4961 }, { "epoch": 0.5154253661576815, "grad_norm": 0.40802252292633057, "learning_rate": 4.75779374021256e-05, "loss": 1.5823, "step": 4962 }, { "epoch": 0.5155292406772619, "grad_norm": 0.42017194628715515, "learning_rate": 4.756164011510871e-05, "loss": 1.802, "step": 4963 }, { "epoch": 0.5156331151968422, "grad_norm": 0.4172336161136627, "learning_rate": 4.754534308775816e-05, "loss": 1.7046, "step": 4964 }, { "epoch": 0.5157369897164226, "grad_norm": 0.38955143094062805, "learning_rate": 4.75290463218094e-05, "loss": 1.5606, "step": 4965 }, { "epoch": 0.515840864236003, "grad_norm": 0.47302696108818054, "learning_rate": 4.7512749818997936e-05, "loss": 1.6814, "step": 4966 }, { "epoch": 0.5159447387555832, "grad_norm": 0.40039142966270447, "learning_rate": 4.749645358105923e-05, "loss": 1.6354, "step": 4967 }, { "epoch": 0.5160486132751636, "grad_norm": 0.3926571011543274, "learning_rate": 4.748015760972867e-05, "loss": 1.6332, "step": 4968 }, { "epoch": 0.516152487794744, "grad_norm": 0.4233894348144531, "learning_rate": 4.7463861906741666e-05, "loss": 1.3089, "step": 4969 }, { "epoch": 0.5162563623143243, "grad_norm": 0.3955030143260956, "learning_rate": 4.7447566473833615e-05, "loss": 1.5917, "step": 4970 }, { "epoch": 0.5163602368339046, "grad_norm": 0.3940108120441437, "learning_rate": 4.743127131273979e-05, "loss": 1.7515, "step": 4971 }, { "epoch": 0.516464111353485, "grad_norm": 0.44134819507598877, "learning_rate": 4.7414976425195544e-05, "loss": 1.5085, "step": 4972 }, { "epoch": 0.5165679858730653, "grad_norm": 0.44649553298950195, "learning_rate": 4.739868181293616e-05, "loss": 1.8201, "step": 4973 }, { "epoch": 0.5166718603926457, "grad_norm": 0.4249851703643799, "learning_rate": 4.738238747769687e-05, "loss": 1.7515, "step": 4974 }, { "epoch": 0.5167757349122261, "grad_norm": 0.41049546003341675, "learning_rate": 4.7366093421212884e-05, "loss": 1.6677, "step": 4975 }, { "epoch": 0.5168796094318063, "grad_norm": 0.4584263563156128, "learning_rate": 4.7349799645219415e-05, "loss": 1.7249, "step": 4976 }, { "epoch": 0.5169834839513867, "grad_norm": 0.4268789291381836, "learning_rate": 4.73335061514516e-05, "loss": 1.6639, "step": 4977 }, { "epoch": 0.5170873584709671, "grad_norm": 0.4349389672279358, "learning_rate": 4.731721294164457e-05, "loss": 1.6422, "step": 4978 }, { "epoch": 0.5171912329905474, "grad_norm": 0.43998467922210693, "learning_rate": 4.730092001753346e-05, "loss": 1.8851, "step": 4979 }, { "epoch": 0.5172951075101277, "grad_norm": 0.4167867600917816, "learning_rate": 4.72846273808533e-05, "loss": 1.6397, "step": 4980 }, { "epoch": 0.5173989820297081, "grad_norm": 0.40250730514526367, "learning_rate": 4.726833503333915e-05, "loss": 1.4696, "step": 4981 }, { "epoch": 0.5175028565492885, "grad_norm": 0.44303545355796814, "learning_rate": 4.7252042976726e-05, "loss": 1.814, "step": 4982 }, { "epoch": 0.5176067310688688, "grad_norm": 0.3853762745857239, "learning_rate": 4.723575121274885e-05, "loss": 1.6521, "step": 4983 }, { "epoch": 0.5177106055884492, "grad_norm": 0.39856797456741333, "learning_rate": 4.721945974314263e-05, "loss": 1.6085, "step": 4984 }, { "epoch": 0.5178144801080296, "grad_norm": 0.380560040473938, "learning_rate": 4.720316856964224e-05, "loss": 1.6189, "step": 4985 }, { "epoch": 0.5179183546276098, "grad_norm": 0.4493720531463623, "learning_rate": 4.7186877693982616e-05, "loss": 1.7474, "step": 4986 }, { "epoch": 0.5180222291471902, "grad_norm": 0.38229069113731384, "learning_rate": 4.717058711789855e-05, "loss": 1.5625, "step": 4987 }, { "epoch": 0.5181261036667706, "grad_norm": 0.3856019675731659, "learning_rate": 4.715429684312489e-05, "loss": 1.6004, "step": 4988 }, { "epoch": 0.5182299781863509, "grad_norm": 0.3876418471336365, "learning_rate": 4.713800687139644e-05, "loss": 1.6596, "step": 4989 }, { "epoch": 0.5183338527059312, "grad_norm": 0.41648218035697937, "learning_rate": 4.712171720444791e-05, "loss": 1.8412, "step": 4990 }, { "epoch": 0.5184377272255116, "grad_norm": 0.43493372201919556, "learning_rate": 4.7105427844014046e-05, "loss": 1.8457, "step": 4991 }, { "epoch": 0.5185416017450919, "grad_norm": 0.37969422340393066, "learning_rate": 4.7089138791829554e-05, "loss": 1.6451, "step": 4992 }, { "epoch": 0.5186454762646723, "grad_norm": 0.4082360863685608, "learning_rate": 4.707285004962906e-05, "loss": 1.8016, "step": 4993 }, { "epoch": 0.5187493507842527, "grad_norm": 0.42352405190467834, "learning_rate": 4.70565616191472e-05, "loss": 1.5526, "step": 4994 }, { "epoch": 0.5188532253038329, "grad_norm": 0.3847419321537018, "learning_rate": 4.704027350211859e-05, "loss": 1.6518, "step": 4995 }, { "epoch": 0.5189570998234133, "grad_norm": 0.40276169776916504, "learning_rate": 4.702398570027775e-05, "loss": 1.5955, "step": 4996 }, { "epoch": 0.5190609743429937, "grad_norm": 0.44198933243751526, "learning_rate": 4.700769821535921e-05, "loss": 1.7202, "step": 4997 }, { "epoch": 0.519164848862574, "grad_norm": 0.39979052543640137, "learning_rate": 4.6991411049097494e-05, "loss": 1.6186, "step": 4998 }, { "epoch": 0.5192687233821544, "grad_norm": 0.39757975935935974, "learning_rate": 4.697512420322701e-05, "loss": 1.7077, "step": 4999 }, { "epoch": 0.5193725979017347, "grad_norm": 0.3819902837276459, "learning_rate": 4.69588376794822e-05, "loss": 1.5771, "step": 5000 }, { "epoch": 0.519476472421315, "grad_norm": 0.3874730169773102, "learning_rate": 4.694255147959749e-05, "loss": 1.6497, "step": 5001 }, { "epoch": 0.5195803469408954, "grad_norm": 0.38986441493034363, "learning_rate": 4.6926265605307175e-05, "loss": 1.6801, "step": 5002 }, { "epoch": 0.5196842214604758, "grad_norm": 0.4043687880039215, "learning_rate": 4.690998005834559e-05, "loss": 1.5736, "step": 5003 }, { "epoch": 0.519788095980056, "grad_norm": 0.4174960255622864, "learning_rate": 4.689369484044705e-05, "loss": 1.7306, "step": 5004 }, { "epoch": 0.5198919704996364, "grad_norm": 0.43060052394866943, "learning_rate": 4.687740995334575e-05, "loss": 1.7073, "step": 5005 }, { "epoch": 0.5199958450192168, "grad_norm": 0.405344158411026, "learning_rate": 4.686112539877594e-05, "loss": 1.7753, "step": 5006 }, { "epoch": 0.5200997195387971, "grad_norm": 0.39156869053840637, "learning_rate": 4.684484117847181e-05, "loss": 1.5766, "step": 5007 }, { "epoch": 0.5202035940583775, "grad_norm": 0.40544578433036804, "learning_rate": 4.6828557294167465e-05, "loss": 1.6717, "step": 5008 }, { "epoch": 0.5203074685779578, "grad_norm": 0.41265055537223816, "learning_rate": 4.681227374759704e-05, "loss": 1.7227, "step": 5009 }, { "epoch": 0.5204113430975382, "grad_norm": 0.3951095640659332, "learning_rate": 4.679599054049458e-05, "loss": 1.6448, "step": 5010 }, { "epoch": 0.5205152176171185, "grad_norm": 0.4113246500492096, "learning_rate": 4.677970767459416e-05, "loss": 1.7294, "step": 5011 }, { "epoch": 0.5206190921366989, "grad_norm": 0.40176793932914734, "learning_rate": 4.676342515162973e-05, "loss": 1.6219, "step": 5012 }, { "epoch": 0.5207229666562793, "grad_norm": 0.6003035306930542, "learning_rate": 4.674714297333527e-05, "loss": 1.99, "step": 5013 }, { "epoch": 0.5208268411758595, "grad_norm": 0.36669179797172546, "learning_rate": 4.673086114144474e-05, "loss": 1.5943, "step": 5014 }, { "epoch": 0.5209307156954399, "grad_norm": 0.3950868546962738, "learning_rate": 4.671457965769198e-05, "loss": 1.6115, "step": 5015 }, { "epoch": 0.5210345902150203, "grad_norm": 0.4209156036376953, "learning_rate": 4.6698298523810854e-05, "loss": 1.6077, "step": 5016 }, { "epoch": 0.5211384647346006, "grad_norm": 0.38038328289985657, "learning_rate": 4.668201774153521e-05, "loss": 1.5769, "step": 5017 }, { "epoch": 0.521242339254181, "grad_norm": 0.4059074819087982, "learning_rate": 4.666573731259877e-05, "loss": 1.6872, "step": 5018 }, { "epoch": 0.5213462137737613, "grad_norm": 0.3864630162715912, "learning_rate": 4.664945723873529e-05, "loss": 1.6696, "step": 5019 }, { "epoch": 0.5214500882933416, "grad_norm": 0.3989625871181488, "learning_rate": 4.66331775216785e-05, "loss": 1.716, "step": 5020 }, { "epoch": 0.521553962812922, "grad_norm": 0.376228392124176, "learning_rate": 4.6616898163162024e-05, "loss": 1.6107, "step": 5021 }, { "epoch": 0.5216578373325024, "grad_norm": 0.40415439009666443, "learning_rate": 4.660061916491952e-05, "loss": 1.6419, "step": 5022 }, { "epoch": 0.5217617118520826, "grad_norm": 0.4512626528739929, "learning_rate": 4.658434052868455e-05, "loss": 1.8734, "step": 5023 }, { "epoch": 0.521865586371663, "grad_norm": 0.5184199810028076, "learning_rate": 4.656806225619067e-05, "loss": 1.8781, "step": 5024 }, { "epoch": 0.5219694608912434, "grad_norm": 0.4308519661426544, "learning_rate": 4.655178434917139e-05, "loss": 1.7415, "step": 5025 }, { "epoch": 0.5220733354108237, "grad_norm": 0.41828837990760803, "learning_rate": 4.65355068093602e-05, "loss": 1.6995, "step": 5026 }, { "epoch": 0.5221772099304041, "grad_norm": 0.3959868252277374, "learning_rate": 4.651922963849048e-05, "loss": 1.7536, "step": 5027 }, { "epoch": 0.5222810844499844, "grad_norm": 0.427030473947525, "learning_rate": 4.6502952838295654e-05, "loss": 1.6309, "step": 5028 }, { "epoch": 0.5223849589695647, "grad_norm": 0.3931408226490021, "learning_rate": 4.64866764105091e-05, "loss": 1.6537, "step": 5029 }, { "epoch": 0.5224888334891451, "grad_norm": 0.343965619802475, "learning_rate": 4.647040035686409e-05, "loss": 1.3474, "step": 5030 }, { "epoch": 0.5225927080087255, "grad_norm": 0.40805351734161377, "learning_rate": 4.64541246790939e-05, "loss": 1.6824, "step": 5031 }, { "epoch": 0.5226965825283058, "grad_norm": 0.39621418714523315, "learning_rate": 4.643784937893179e-05, "loss": 1.6969, "step": 5032 }, { "epoch": 0.5228004570478861, "grad_norm": 0.38394787907600403, "learning_rate": 4.642157445811092e-05, "loss": 1.614, "step": 5033 }, { "epoch": 0.5229043315674665, "grad_norm": 0.3906930685043335, "learning_rate": 4.6405299918364456e-05, "loss": 1.7664, "step": 5034 }, { "epoch": 0.5230082060870469, "grad_norm": 0.37266916036605835, "learning_rate": 4.638902576142552e-05, "loss": 1.4807, "step": 5035 }, { "epoch": 0.5231120806066272, "grad_norm": 0.4071597456932068, "learning_rate": 4.637275198902717e-05, "loss": 1.4604, "step": 5036 }, { "epoch": 0.5232159551262076, "grad_norm": 0.4267309606075287, "learning_rate": 4.635647860290244e-05, "loss": 1.7642, "step": 5037 }, { "epoch": 0.5233198296457879, "grad_norm": 0.4255138635635376, "learning_rate": 4.634020560478431e-05, "loss": 1.7285, "step": 5038 }, { "epoch": 0.5234237041653682, "grad_norm": 0.3941444456577301, "learning_rate": 4.6323932996405764e-05, "loss": 1.665, "step": 5039 }, { "epoch": 0.5235275786849486, "grad_norm": 0.4022713005542755, "learning_rate": 4.630766077949965e-05, "loss": 1.731, "step": 5040 }, { "epoch": 0.523631453204529, "grad_norm": 0.3884924352169037, "learning_rate": 4.629138895579886e-05, "loss": 1.541, "step": 5041 }, { "epoch": 0.5237353277241092, "grad_norm": 0.41130009293556213, "learning_rate": 4.6275117527036246e-05, "loss": 1.6621, "step": 5042 }, { "epoch": 0.5238392022436896, "grad_norm": 0.3775813579559326, "learning_rate": 4.625884649494453e-05, "loss": 1.5298, "step": 5043 }, { "epoch": 0.52394307676327, "grad_norm": 0.4054543375968933, "learning_rate": 4.6242575861256496e-05, "loss": 1.8429, "step": 5044 }, { "epoch": 0.5240469512828503, "grad_norm": 0.3855196535587311, "learning_rate": 4.622630562770483e-05, "loss": 1.589, "step": 5045 }, { "epoch": 0.5241508258024307, "grad_norm": 0.3769000470638275, "learning_rate": 4.6210035796022166e-05, "loss": 1.6403, "step": 5046 }, { "epoch": 0.524254700322011, "grad_norm": 0.4284866750240326, "learning_rate": 4.619376636794113e-05, "loss": 1.6255, "step": 5047 }, { "epoch": 0.5243585748415913, "grad_norm": 0.40864747762680054, "learning_rate": 4.61774973451943e-05, "loss": 1.627, "step": 5048 }, { "epoch": 0.5244624493611717, "grad_norm": 0.4019078016281128, "learning_rate": 4.6161228729514164e-05, "loss": 1.5215, "step": 5049 }, { "epoch": 0.5245663238807521, "grad_norm": 0.40582525730133057, "learning_rate": 4.614496052263324e-05, "loss": 1.615, "step": 5050 }, { "epoch": 0.5246701984003324, "grad_norm": 0.4704711437225342, "learning_rate": 4.612869272628394e-05, "loss": 1.8979, "step": 5051 }, { "epoch": 0.5247740729199127, "grad_norm": 0.4247235059738159, "learning_rate": 4.611242534219867e-05, "loss": 1.7807, "step": 5052 }, { "epoch": 0.5248779474394931, "grad_norm": 0.4381190240383148, "learning_rate": 4.609615837210977e-05, "loss": 1.6944, "step": 5053 }, { "epoch": 0.5249818219590734, "grad_norm": 0.37743279337882996, "learning_rate": 4.6079891817749576e-05, "loss": 1.5408, "step": 5054 }, { "epoch": 0.5250856964786538, "grad_norm": 0.41715410351753235, "learning_rate": 4.6063625680850306e-05, "loss": 1.8648, "step": 5055 }, { "epoch": 0.5251895709982342, "grad_norm": 0.3836834132671356, "learning_rate": 4.6047359963144187e-05, "loss": 1.5844, "step": 5056 }, { "epoch": 0.5252934455178144, "grad_norm": 0.4763341248035431, "learning_rate": 4.6031094666363426e-05, "loss": 1.9145, "step": 5057 }, { "epoch": 0.5253973200373948, "grad_norm": 0.5105146765708923, "learning_rate": 4.601482979224012e-05, "loss": 1.9522, "step": 5058 }, { "epoch": 0.5255011945569752, "grad_norm": 0.4624611735343933, "learning_rate": 4.599856534250633e-05, "loss": 1.7487, "step": 5059 }, { "epoch": 0.5256050690765556, "grad_norm": 0.46096524596214294, "learning_rate": 4.5982301318894126e-05, "loss": 1.7626, "step": 5060 }, { "epoch": 0.5257089435961358, "grad_norm": 0.42811891436576843, "learning_rate": 4.596603772313552e-05, "loss": 1.9035, "step": 5061 }, { "epoch": 0.5258128181157162, "grad_norm": 0.38237836956977844, "learning_rate": 4.59497745569624e-05, "loss": 1.654, "step": 5062 }, { "epoch": 0.5259166926352966, "grad_norm": 0.38058823347091675, "learning_rate": 4.593351182210671e-05, "loss": 1.6587, "step": 5063 }, { "epoch": 0.5260205671548769, "grad_norm": 0.4481144845485687, "learning_rate": 4.59172495203003e-05, "loss": 1.7732, "step": 5064 }, { "epoch": 0.5261244416744573, "grad_norm": 0.3913651406764984, "learning_rate": 4.590098765327496e-05, "loss": 1.7154, "step": 5065 }, { "epoch": 0.5262283161940376, "grad_norm": 0.3957405388355255, "learning_rate": 4.588472622276245e-05, "loss": 1.7209, "step": 5066 }, { "epoch": 0.5263321907136179, "grad_norm": 0.4275793135166168, "learning_rate": 4.5868465230494525e-05, "loss": 1.7113, "step": 5067 }, { "epoch": 0.5264360652331983, "grad_norm": 0.39351120591163635, "learning_rate": 4.58522046782028e-05, "loss": 1.7318, "step": 5068 }, { "epoch": 0.5265399397527787, "grad_norm": 0.42391449213027954, "learning_rate": 4.5835944567618916e-05, "loss": 1.721, "step": 5069 }, { "epoch": 0.526643814272359, "grad_norm": 0.42262449860572815, "learning_rate": 4.5819684900474486e-05, "loss": 1.7771, "step": 5070 }, { "epoch": 0.5267476887919393, "grad_norm": 0.4281655251979828, "learning_rate": 4.580342567850098e-05, "loss": 1.6757, "step": 5071 }, { "epoch": 0.5268515633115197, "grad_norm": 0.4202190935611725, "learning_rate": 4.578716690342992e-05, "loss": 1.597, "step": 5072 }, { "epoch": 0.5269554378311, "grad_norm": 0.39920350909233093, "learning_rate": 4.577090857699273e-05, "loss": 1.4776, "step": 5073 }, { "epoch": 0.5270593123506804, "grad_norm": 0.4190593361854553, "learning_rate": 4.575465070092077e-05, "loss": 1.7005, "step": 5074 }, { "epoch": 0.5271631868702608, "grad_norm": 0.4298344850540161, "learning_rate": 4.573839327694539e-05, "loss": 1.7374, "step": 5075 }, { "epoch": 0.527267061389841, "grad_norm": 0.3944896161556244, "learning_rate": 4.5722136306797914e-05, "loss": 1.7146, "step": 5076 }, { "epoch": 0.5273709359094214, "grad_norm": 0.44215551018714905, "learning_rate": 4.570587979220953e-05, "loss": 1.7662, "step": 5077 }, { "epoch": 0.5274748104290018, "grad_norm": 0.3969592750072479, "learning_rate": 4.568962373491146e-05, "loss": 1.5801, "step": 5078 }, { "epoch": 0.5275786849485821, "grad_norm": 0.4148666262626648, "learning_rate": 4.567336813663484e-05, "loss": 1.6804, "step": 5079 }, { "epoch": 0.5276825594681624, "grad_norm": 0.3804900050163269, "learning_rate": 4.565711299911076e-05, "loss": 1.6895, "step": 5080 }, { "epoch": 0.5277864339877428, "grad_norm": 0.4792127013206482, "learning_rate": 4.564085832407026e-05, "loss": 1.9221, "step": 5081 }, { "epoch": 0.5278903085073231, "grad_norm": 0.36079883575439453, "learning_rate": 4.5624604113244376e-05, "loss": 1.6225, "step": 5082 }, { "epoch": 0.5279941830269035, "grad_norm": 0.3568095266819, "learning_rate": 4.5608350368363995e-05, "loss": 1.4058, "step": 5083 }, { "epoch": 0.5280980575464839, "grad_norm": 0.43193015456199646, "learning_rate": 4.559209709116005e-05, "loss": 1.4616, "step": 5084 }, { "epoch": 0.5282019320660641, "grad_norm": 0.43650293350219727, "learning_rate": 4.557584428336339e-05, "loss": 1.7984, "step": 5085 }, { "epoch": 0.5283058065856445, "grad_norm": 0.3841239809989929, "learning_rate": 4.555959194670481e-05, "loss": 1.6124, "step": 5086 }, { "epoch": 0.5284096811052249, "grad_norm": 0.3851770758628845, "learning_rate": 4.554334008291503e-05, "loss": 1.629, "step": 5087 }, { "epoch": 0.5285135556248053, "grad_norm": 0.4100612998008728, "learning_rate": 4.5527088693724775e-05, "loss": 1.6488, "step": 5088 }, { "epoch": 0.5286174301443856, "grad_norm": 0.42137911915779114, "learning_rate": 4.5510837780864705e-05, "loss": 1.8, "step": 5089 }, { "epoch": 0.5287213046639659, "grad_norm": 0.5062525868415833, "learning_rate": 4.549458734606537e-05, "loss": 1.6027, "step": 5090 }, { "epoch": 0.5288251791835463, "grad_norm": 0.3984263241291046, "learning_rate": 4.547833739105735e-05, "loss": 1.4691, "step": 5091 }, { "epoch": 0.5289290537031266, "grad_norm": 0.4082936644554138, "learning_rate": 4.5462087917571124e-05, "loss": 1.7615, "step": 5092 }, { "epoch": 0.529032928222707, "grad_norm": 0.43450236320495605, "learning_rate": 4.544583892733714e-05, "loss": 1.6093, "step": 5093 }, { "epoch": 0.5291368027422874, "grad_norm": 0.4008505344390869, "learning_rate": 4.5429590422085776e-05, "loss": 1.7545, "step": 5094 }, { "epoch": 0.5292406772618676, "grad_norm": 0.41801589727401733, "learning_rate": 4.5413342403547403e-05, "loss": 1.6709, "step": 5095 }, { "epoch": 0.529344551781448, "grad_norm": 0.39262089133262634, "learning_rate": 4.539709487345227e-05, "loss": 1.4997, "step": 5096 }, { "epoch": 0.5294484263010284, "grad_norm": 0.41034695506095886, "learning_rate": 4.5380847833530625e-05, "loss": 1.803, "step": 5097 }, { "epoch": 0.5295523008206087, "grad_norm": 0.42729073762893677, "learning_rate": 4.5364601285512674e-05, "loss": 1.589, "step": 5098 }, { "epoch": 0.529656175340189, "grad_norm": 0.3822977542877197, "learning_rate": 4.534835523112852e-05, "loss": 1.6351, "step": 5099 }, { "epoch": 0.5297600498597694, "grad_norm": 0.4156366288661957, "learning_rate": 4.533210967210824e-05, "loss": 1.7417, "step": 5100 }, { "epoch": 0.5298639243793497, "grad_norm": 0.4239443242549896, "learning_rate": 4.5315864610181894e-05, "loss": 1.9393, "step": 5101 }, { "epoch": 0.5299677988989301, "grad_norm": 0.40841466188430786, "learning_rate": 4.52996200470794e-05, "loss": 1.5193, "step": 5102 }, { "epoch": 0.5300716734185105, "grad_norm": 0.4005943238735199, "learning_rate": 4.528337598453071e-05, "loss": 1.6073, "step": 5103 }, { "epoch": 0.5301755479380907, "grad_norm": 0.3967158794403076, "learning_rate": 4.5267132424265705e-05, "loss": 1.6722, "step": 5104 }, { "epoch": 0.5302794224576711, "grad_norm": 0.3896438181400299, "learning_rate": 4.5250889368014166e-05, "loss": 1.684, "step": 5105 }, { "epoch": 0.5303832969772515, "grad_norm": 0.4113629460334778, "learning_rate": 4.523464681750587e-05, "loss": 1.5668, "step": 5106 }, { "epoch": 0.5304871714968318, "grad_norm": 0.39420047402381897, "learning_rate": 4.521840477447051e-05, "loss": 1.8714, "step": 5107 }, { "epoch": 0.5305910460164122, "grad_norm": 0.3833891451358795, "learning_rate": 4.5202163240637755e-05, "loss": 1.6501, "step": 5108 }, { "epoch": 0.5306949205359925, "grad_norm": 0.40579354763031006, "learning_rate": 4.5185922217737175e-05, "loss": 1.5287, "step": 5109 }, { "epoch": 0.5307987950555728, "grad_norm": 0.39992040395736694, "learning_rate": 4.5169681707498354e-05, "loss": 1.7683, "step": 5110 }, { "epoch": 0.5309026695751532, "grad_norm": 0.3796159327030182, "learning_rate": 4.515344171165072e-05, "loss": 1.4853, "step": 5111 }, { "epoch": 0.5310065440947336, "grad_norm": 0.3873627185821533, "learning_rate": 4.5137202231923744e-05, "loss": 1.7049, "step": 5112 }, { "epoch": 0.531110418614314, "grad_norm": 0.42043253779411316, "learning_rate": 4.512096327004682e-05, "loss": 1.822, "step": 5113 }, { "epoch": 0.5312142931338942, "grad_norm": 0.4138578176498413, "learning_rate": 4.5104724827749254e-05, "loss": 1.8201, "step": 5114 }, { "epoch": 0.5313181676534746, "grad_norm": 0.4510040879249573, "learning_rate": 4.508848690676028e-05, "loss": 1.8475, "step": 5115 }, { "epoch": 0.531422042173055, "grad_norm": 0.45895758271217346, "learning_rate": 4.507224950880915e-05, "loss": 1.8352, "step": 5116 }, { "epoch": 0.5315259166926353, "grad_norm": 0.3979472815990448, "learning_rate": 4.505601263562502e-05, "loss": 1.7358, "step": 5117 }, { "epoch": 0.5316297912122157, "grad_norm": 0.3930369019508362, "learning_rate": 4.503977628893697e-05, "loss": 1.7055, "step": 5118 }, { "epoch": 0.531733665731796, "grad_norm": 0.3967008590698242, "learning_rate": 4.502354047047406e-05, "loss": 1.6146, "step": 5119 }, { "epoch": 0.5318375402513763, "grad_norm": 0.4116523563861847, "learning_rate": 4.500730518196527e-05, "loss": 1.6813, "step": 5120 }, { "epoch": 0.5319414147709567, "grad_norm": 0.4032840430736542, "learning_rate": 4.499107042513953e-05, "loss": 1.566, "step": 5121 }, { "epoch": 0.5320452892905371, "grad_norm": 0.3959857225418091, "learning_rate": 4.4974836201725715e-05, "loss": 1.5526, "step": 5122 }, { "epoch": 0.5321491638101173, "grad_norm": 0.399959534406662, "learning_rate": 4.495860251345266e-05, "loss": 1.6931, "step": 5123 }, { "epoch": 0.5322530383296977, "grad_norm": 0.4058084487915039, "learning_rate": 4.49423693620491e-05, "loss": 1.5507, "step": 5124 }, { "epoch": 0.5323569128492781, "grad_norm": 0.4492541551589966, "learning_rate": 4.4926136749243754e-05, "loss": 1.7912, "step": 5125 }, { "epoch": 0.5324607873688584, "grad_norm": 0.4223318099975586, "learning_rate": 4.490990467676528e-05, "loss": 1.7354, "step": 5126 }, { "epoch": 0.5325646618884388, "grad_norm": 0.38780099153518677, "learning_rate": 4.489367314634225e-05, "loss": 1.5337, "step": 5127 }, { "epoch": 0.5326685364080191, "grad_norm": 0.40995070338249207, "learning_rate": 4.487744215970318e-05, "loss": 1.7209, "step": 5128 }, { "epoch": 0.5327724109275994, "grad_norm": 0.45095205307006836, "learning_rate": 4.486121171857659e-05, "loss": 1.7835, "step": 5129 }, { "epoch": 0.5328762854471798, "grad_norm": 0.42928487062454224, "learning_rate": 4.4844981824690844e-05, "loss": 1.7334, "step": 5130 }, { "epoch": 0.5329801599667602, "grad_norm": 0.38315919041633606, "learning_rate": 4.4828752479774324e-05, "loss": 1.5904, "step": 5131 }, { "epoch": 0.5330840344863405, "grad_norm": 0.3942658603191376, "learning_rate": 4.481252368555535e-05, "loss": 1.6793, "step": 5132 }, { "epoch": 0.5331879090059208, "grad_norm": 0.419535756111145, "learning_rate": 4.4796295443762116e-05, "loss": 1.7484, "step": 5133 }, { "epoch": 0.5332917835255012, "grad_norm": 0.378946989774704, "learning_rate": 4.478006775612284e-05, "loss": 1.6216, "step": 5134 }, { "epoch": 0.5333956580450815, "grad_norm": 0.43386486172676086, "learning_rate": 4.476384062436563e-05, "loss": 1.8178, "step": 5135 }, { "epoch": 0.5334995325646619, "grad_norm": 0.4382666051387787, "learning_rate": 4.474761405021855e-05, "loss": 1.8414, "step": 5136 }, { "epoch": 0.5336034070842423, "grad_norm": 0.4107944667339325, "learning_rate": 4.4731388035409586e-05, "loss": 1.7848, "step": 5137 }, { "epoch": 0.5337072816038226, "grad_norm": 0.4328972399234772, "learning_rate": 4.471516258166671e-05, "loss": 1.8443, "step": 5138 }, { "epoch": 0.5338111561234029, "grad_norm": 0.3964642584323883, "learning_rate": 4.4698937690717816e-05, "loss": 1.6394, "step": 5139 }, { "epoch": 0.5339150306429833, "grad_norm": 0.37310969829559326, "learning_rate": 4.468271336429068e-05, "loss": 1.5261, "step": 5140 }, { "epoch": 0.5340189051625637, "grad_norm": 0.3795417547225952, "learning_rate": 4.466648960411311e-05, "loss": 1.512, "step": 5141 }, { "epoch": 0.5341227796821439, "grad_norm": 0.40631794929504395, "learning_rate": 4.46502664119128e-05, "loss": 1.8027, "step": 5142 }, { "epoch": 0.5342266542017243, "grad_norm": 0.47585329413414, "learning_rate": 4.463404378941737e-05, "loss": 1.8224, "step": 5143 }, { "epoch": 0.5343305287213047, "grad_norm": 0.4149931073188782, "learning_rate": 4.461782173835442e-05, "loss": 1.6209, "step": 5144 }, { "epoch": 0.534434403240885, "grad_norm": 0.41741567850112915, "learning_rate": 4.4601600260451494e-05, "loss": 1.8086, "step": 5145 }, { "epoch": 0.5345382777604654, "grad_norm": 0.43015965819358826, "learning_rate": 4.458537935743601e-05, "loss": 1.7298, "step": 5146 }, { "epoch": 0.5346421522800457, "grad_norm": 0.3866709768772125, "learning_rate": 4.45691590310354e-05, "loss": 1.6128, "step": 5147 }, { "epoch": 0.534746026799626, "grad_norm": 0.4770145118236542, "learning_rate": 4.455293928297698e-05, "loss": 1.8353, "step": 5148 }, { "epoch": 0.5348499013192064, "grad_norm": 0.39021778106689453, "learning_rate": 4.453672011498805e-05, "loss": 1.5159, "step": 5149 }, { "epoch": 0.5349537758387868, "grad_norm": 0.4521084129810333, "learning_rate": 4.45205015287958e-05, "loss": 1.7056, "step": 5150 }, { "epoch": 0.535057650358367, "grad_norm": 0.3977662920951843, "learning_rate": 4.450428352612742e-05, "loss": 1.5728, "step": 5151 }, { "epoch": 0.5351615248779474, "grad_norm": 0.4670353829860687, "learning_rate": 4.448806610870995e-05, "loss": 1.8235, "step": 5152 }, { "epoch": 0.5352653993975278, "grad_norm": 0.4064006507396698, "learning_rate": 4.4471849278270446e-05, "loss": 1.6757, "step": 5153 }, { "epoch": 0.5353692739171081, "grad_norm": 0.41022297739982605, "learning_rate": 4.445563303653589e-05, "loss": 1.7759, "step": 5154 }, { "epoch": 0.5354731484366885, "grad_norm": 0.37725940346717834, "learning_rate": 4.443941738523316e-05, "loss": 1.6766, "step": 5155 }, { "epoch": 0.5355770229562689, "grad_norm": 0.4157141447067261, "learning_rate": 4.4423202326089095e-05, "loss": 1.5477, "step": 5156 }, { "epoch": 0.5356808974758491, "grad_norm": 0.43074604868888855, "learning_rate": 4.4406987860830505e-05, "loss": 1.6997, "step": 5157 }, { "epoch": 0.5357847719954295, "grad_norm": 0.4153660833835602, "learning_rate": 4.439077399118405e-05, "loss": 1.7147, "step": 5158 }, { "epoch": 0.5358886465150099, "grad_norm": 0.40564560890197754, "learning_rate": 4.4374560718876426e-05, "loss": 1.6825, "step": 5159 }, { "epoch": 0.5359925210345902, "grad_norm": 0.41882357001304626, "learning_rate": 4.4358348045634215e-05, "loss": 1.6888, "step": 5160 }, { "epoch": 0.5360963955541705, "grad_norm": 0.4029204249382019, "learning_rate": 4.4342135973183915e-05, "loss": 1.7675, "step": 5161 }, { "epoch": 0.5362002700737509, "grad_norm": 0.42406782507896423, "learning_rate": 4.432592450325201e-05, "loss": 1.7902, "step": 5162 }, { "epoch": 0.5363041445933313, "grad_norm": 0.4365594983100891, "learning_rate": 4.430971363756487e-05, "loss": 1.8162, "step": 5163 }, { "epoch": 0.5364080191129116, "grad_norm": 0.3890252709388733, "learning_rate": 4.429350337784887e-05, "loss": 1.7314, "step": 5164 }, { "epoch": 0.536511893632492, "grad_norm": 0.3934305012226105, "learning_rate": 4.427729372583023e-05, "loss": 1.7803, "step": 5165 }, { "epoch": 0.5366157681520723, "grad_norm": 0.40188831090927124, "learning_rate": 4.426108468323516e-05, "loss": 1.6121, "step": 5166 }, { "epoch": 0.5367196426716526, "grad_norm": 0.37544122338294983, "learning_rate": 4.424487625178983e-05, "loss": 1.6125, "step": 5167 }, { "epoch": 0.536823517191233, "grad_norm": 0.43579837679862976, "learning_rate": 4.4228668433220264e-05, "loss": 1.806, "step": 5168 }, { "epoch": 0.5369273917108134, "grad_norm": 0.37916016578674316, "learning_rate": 4.421246122925251e-05, "loss": 1.4683, "step": 5169 }, { "epoch": 0.5370312662303937, "grad_norm": 0.3701176345348358, "learning_rate": 4.419625464161249e-05, "loss": 1.7148, "step": 5170 }, { "epoch": 0.537135140749974, "grad_norm": 0.39556387066841125, "learning_rate": 4.4180048672026066e-05, "loss": 1.5598, "step": 5171 }, { "epoch": 0.5372390152695544, "grad_norm": 0.413783460855484, "learning_rate": 4.416384332221906e-05, "loss": 1.6101, "step": 5172 }, { "epoch": 0.5373428897891347, "grad_norm": 0.41170206665992737, "learning_rate": 4.414763859391724e-05, "loss": 1.7957, "step": 5173 }, { "epoch": 0.5374467643087151, "grad_norm": 0.3944395184516907, "learning_rate": 4.4131434488846236e-05, "loss": 1.5504, "step": 5174 }, { "epoch": 0.5375506388282955, "grad_norm": 0.47782790660858154, "learning_rate": 4.4115231008731696e-05, "loss": 1.7433, "step": 5175 }, { "epoch": 0.5376545133478757, "grad_norm": 0.42878782749176025, "learning_rate": 4.409902815529914e-05, "loss": 1.7387, "step": 5176 }, { "epoch": 0.5377583878674561, "grad_norm": 0.42631304264068604, "learning_rate": 4.4082825930274066e-05, "loss": 1.8365, "step": 5177 }, { "epoch": 0.5378622623870365, "grad_norm": 0.4046842157840729, "learning_rate": 4.4066624335381864e-05, "loss": 1.6611, "step": 5178 }, { "epoch": 0.5379661369066168, "grad_norm": 0.41138967871665955, "learning_rate": 4.40504233723479e-05, "loss": 1.7386, "step": 5179 }, { "epoch": 0.5380700114261971, "grad_norm": 0.4140818417072296, "learning_rate": 4.4034223042897423e-05, "loss": 1.7121, "step": 5180 }, { "epoch": 0.5381738859457775, "grad_norm": 0.4164121150970459, "learning_rate": 4.4018023348755666e-05, "loss": 1.746, "step": 5181 }, { "epoch": 0.5382777604653578, "grad_norm": 0.4518551528453827, "learning_rate": 4.400182429164778e-05, "loss": 1.5526, "step": 5182 }, { "epoch": 0.5383816349849382, "grad_norm": 0.39879488945007324, "learning_rate": 4.39856258732988e-05, "loss": 1.7516, "step": 5183 }, { "epoch": 0.5384855095045186, "grad_norm": 0.43033257126808167, "learning_rate": 4.396942809543374e-05, "loss": 1.7992, "step": 5184 }, { "epoch": 0.5385893840240988, "grad_norm": 0.4049583673477173, "learning_rate": 4.395323095977757e-05, "loss": 1.6651, "step": 5185 }, { "epoch": 0.5386932585436792, "grad_norm": 0.43411314487457275, "learning_rate": 4.393703446805512e-05, "loss": 1.7299, "step": 5186 }, { "epoch": 0.5387971330632596, "grad_norm": 0.47748497128486633, "learning_rate": 4.39208386219912e-05, "loss": 1.8707, "step": 5187 }, { "epoch": 0.5389010075828399, "grad_norm": 0.3915466070175171, "learning_rate": 4.3904643423310574e-05, "loss": 1.6647, "step": 5188 }, { "epoch": 0.5390048821024203, "grad_norm": 0.46273308992385864, "learning_rate": 4.388844887373785e-05, "loss": 1.7202, "step": 5189 }, { "epoch": 0.5391087566220006, "grad_norm": 0.37621983885765076, "learning_rate": 4.387225497499767e-05, "loss": 1.7193, "step": 5190 }, { "epoch": 0.539212631141581, "grad_norm": 0.4030202627182007, "learning_rate": 4.385606172881452e-05, "loss": 1.6711, "step": 5191 }, { "epoch": 0.5393165056611613, "grad_norm": 0.4407753646373749, "learning_rate": 4.383986913691289e-05, "loss": 1.7142, "step": 5192 }, { "epoch": 0.5394203801807417, "grad_norm": 0.40765175223350525, "learning_rate": 4.382367720101713e-05, "loss": 1.7295, "step": 5193 }, { "epoch": 0.5395242547003221, "grad_norm": 0.40376102924346924, "learning_rate": 4.3807485922851574e-05, "loss": 1.574, "step": 5194 }, { "epoch": 0.5396281292199023, "grad_norm": 0.41411998867988586, "learning_rate": 4.3791295304140476e-05, "loss": 1.6128, "step": 5195 }, { "epoch": 0.5397320037394827, "grad_norm": 0.4627225399017334, "learning_rate": 4.3775105346607983e-05, "loss": 1.8546, "step": 5196 }, { "epoch": 0.5398358782590631, "grad_norm": 0.4189532399177551, "learning_rate": 4.375891605197822e-05, "loss": 1.7621, "step": 5197 }, { "epoch": 0.5399397527786434, "grad_norm": 0.40579670667648315, "learning_rate": 4.3742727421975224e-05, "loss": 1.6286, "step": 5198 }, { "epoch": 0.5400436272982237, "grad_norm": 0.4409949481487274, "learning_rate": 4.3726539458322943e-05, "loss": 1.8302, "step": 5199 }, { "epoch": 0.5401475018178041, "grad_norm": 0.4035548269748688, "learning_rate": 4.371035216274526e-05, "loss": 1.7163, "step": 5200 }, { "epoch": 0.5402513763373844, "grad_norm": 0.41622182726860046, "learning_rate": 4.369416553696604e-05, "loss": 1.5436, "step": 5201 }, { "epoch": 0.5403552508569648, "grad_norm": 0.41848766803741455, "learning_rate": 4.367797958270897e-05, "loss": 1.6028, "step": 5202 }, { "epoch": 0.5404591253765452, "grad_norm": 0.4080602824687958, "learning_rate": 4.366179430169779e-05, "loss": 1.6886, "step": 5203 }, { "epoch": 0.5405629998961254, "grad_norm": 0.40305790305137634, "learning_rate": 4.3645609695656056e-05, "loss": 1.604, "step": 5204 }, { "epoch": 0.5406668744157058, "grad_norm": 0.42248713970184326, "learning_rate": 4.3629425766307334e-05, "loss": 1.6635, "step": 5205 }, { "epoch": 0.5407707489352862, "grad_norm": 0.41928306221961975, "learning_rate": 4.361324251537506e-05, "loss": 1.5922, "step": 5206 }, { "epoch": 0.5408746234548665, "grad_norm": 0.45712658762931824, "learning_rate": 4.359705994458267e-05, "loss": 1.7722, "step": 5207 }, { "epoch": 0.5409784979744469, "grad_norm": 0.3815540373325348, "learning_rate": 4.358087805565343e-05, "loss": 1.6145, "step": 5208 }, { "epoch": 0.5410823724940272, "grad_norm": 0.41365593671798706, "learning_rate": 4.35646968503106e-05, "loss": 1.5844, "step": 5209 }, { "epoch": 0.5411862470136075, "grad_norm": 0.4178517162799835, "learning_rate": 4.354851633027738e-05, "loss": 1.7365, "step": 5210 }, { "epoch": 0.5412901215331879, "grad_norm": 0.4436066448688507, "learning_rate": 4.3532336497276836e-05, "loss": 1.7656, "step": 5211 }, { "epoch": 0.5413939960527683, "grad_norm": 0.4040657877922058, "learning_rate": 4.351615735303199e-05, "loss": 1.7024, "step": 5212 }, { "epoch": 0.5414978705723485, "grad_norm": 0.4756530821323395, "learning_rate": 4.3499978899265844e-05, "loss": 1.9407, "step": 5213 }, { "epoch": 0.5416017450919289, "grad_norm": 0.43450602889060974, "learning_rate": 4.348380113770121e-05, "loss": 1.7781, "step": 5214 }, { "epoch": 0.5417056196115093, "grad_norm": 0.4165419638156891, "learning_rate": 4.346762407006093e-05, "loss": 1.6077, "step": 5215 }, { "epoch": 0.5418094941310897, "grad_norm": 0.4009828269481659, "learning_rate": 4.345144769806775e-05, "loss": 1.5677, "step": 5216 }, { "epoch": 0.54191336865067, "grad_norm": 0.37052732706069946, "learning_rate": 4.3435272023444304e-05, "loss": 1.5191, "step": 5217 }, { "epoch": 0.5420172431702504, "grad_norm": 0.4109463095664978, "learning_rate": 4.3419097047913175e-05, "loss": 1.5929, "step": 5218 }, { "epoch": 0.5421211176898307, "grad_norm": 0.4167434275150299, "learning_rate": 4.340292277319687e-05, "loss": 1.612, "step": 5219 }, { "epoch": 0.542224992209411, "grad_norm": 0.39904865622520447, "learning_rate": 4.3386749201017856e-05, "loss": 1.6542, "step": 5220 }, { "epoch": 0.5423288667289914, "grad_norm": 0.4138846695423126, "learning_rate": 4.3370576333098446e-05, "loss": 1.6344, "step": 5221 }, { "epoch": 0.5424327412485718, "grad_norm": 0.38887113332748413, "learning_rate": 4.335440417116094e-05, "loss": 1.6036, "step": 5222 }, { "epoch": 0.542536615768152, "grad_norm": 0.4017149806022644, "learning_rate": 4.3338232716927574e-05, "loss": 1.6746, "step": 5223 }, { "epoch": 0.5426404902877324, "grad_norm": 0.39271867275238037, "learning_rate": 4.3322061972120445e-05, "loss": 1.6285, "step": 5224 }, { "epoch": 0.5427443648073128, "grad_norm": 0.4323449432849884, "learning_rate": 4.3305891938461626e-05, "loss": 1.5169, "step": 5225 }, { "epoch": 0.5428482393268931, "grad_norm": 0.37628036737442017, "learning_rate": 4.3289722617673115e-05, "loss": 1.6436, "step": 5226 }, { "epoch": 0.5429521138464735, "grad_norm": 0.43014097213745117, "learning_rate": 4.327355401147679e-05, "loss": 1.7777, "step": 5227 }, { "epoch": 0.5430559883660538, "grad_norm": 0.3823567032814026, "learning_rate": 4.3257386121594485e-05, "loss": 1.5837, "step": 5228 }, { "epoch": 0.5431598628856341, "grad_norm": 0.4464382827281952, "learning_rate": 4.324121894974799e-05, "loss": 1.8451, "step": 5229 }, { "epoch": 0.5432637374052145, "grad_norm": 0.37794092297554016, "learning_rate": 4.322505249765893e-05, "loss": 1.7015, "step": 5230 }, { "epoch": 0.5433676119247949, "grad_norm": 0.40177834033966064, "learning_rate": 4.320888676704895e-05, "loss": 1.7247, "step": 5231 }, { "epoch": 0.5434714864443752, "grad_norm": 0.3933226764202118, "learning_rate": 4.3192721759639545e-05, "loss": 1.6449, "step": 5232 }, { "epoch": 0.5435753609639555, "grad_norm": 0.4006556272506714, "learning_rate": 4.317655747715218e-05, "loss": 1.5498, "step": 5233 }, { "epoch": 0.5436792354835359, "grad_norm": 0.4249705970287323, "learning_rate": 4.3160393921308204e-05, "loss": 1.7594, "step": 5234 }, { "epoch": 0.5437831100031162, "grad_norm": 0.44793421030044556, "learning_rate": 4.3144231093828954e-05, "loss": 1.8968, "step": 5235 }, { "epoch": 0.5438869845226966, "grad_norm": 0.3769989311695099, "learning_rate": 4.312806899643559e-05, "loss": 1.5737, "step": 5236 }, { "epoch": 0.543990859042277, "grad_norm": 0.41074395179748535, "learning_rate": 4.311190763084928e-05, "loss": 1.6493, "step": 5237 }, { "epoch": 0.5440947335618572, "grad_norm": 0.4340376555919647, "learning_rate": 4.309574699879109e-05, "loss": 1.7359, "step": 5238 }, { "epoch": 0.5441986080814376, "grad_norm": 0.4758669435977936, "learning_rate": 4.3079587101981985e-05, "loss": 1.8211, "step": 5239 }, { "epoch": 0.544302482601018, "grad_norm": 0.3941514790058136, "learning_rate": 4.306342794214286e-05, "loss": 1.4555, "step": 5240 }, { "epoch": 0.5444063571205984, "grad_norm": 0.40955716371536255, "learning_rate": 4.3047269520994555e-05, "loss": 1.6498, "step": 5241 }, { "epoch": 0.5445102316401786, "grad_norm": 0.45448553562164307, "learning_rate": 4.3031111840257834e-05, "loss": 1.7542, "step": 5242 }, { "epoch": 0.544614106159759, "grad_norm": 0.4177002012729645, "learning_rate": 4.301495490165331e-05, "loss": 1.6508, "step": 5243 }, { "epoch": 0.5447179806793394, "grad_norm": 0.431246280670166, "learning_rate": 4.299879870690162e-05, "loss": 1.8128, "step": 5244 }, { "epoch": 0.5448218551989197, "grad_norm": 0.4549204707145691, "learning_rate": 4.298264325772327e-05, "loss": 1.6395, "step": 5245 }, { "epoch": 0.5449257297185001, "grad_norm": 0.4020931124687195, "learning_rate": 4.2966488555838656e-05, "loss": 1.6735, "step": 5246 }, { "epoch": 0.5450296042380804, "grad_norm": 0.4968905448913574, "learning_rate": 4.295033460296814e-05, "loss": 1.7222, "step": 5247 }, { "epoch": 0.5451334787576607, "grad_norm": 0.3912610709667206, "learning_rate": 4.293418140083203e-05, "loss": 1.6682, "step": 5248 }, { "epoch": 0.5452373532772411, "grad_norm": 0.39999935030937195, "learning_rate": 4.291802895115046e-05, "loss": 1.746, "step": 5249 }, { "epoch": 0.5453412277968215, "grad_norm": 0.4106389582157135, "learning_rate": 4.290187725564356e-05, "loss": 1.5329, "step": 5250 }, { "epoch": 0.5454451023164018, "grad_norm": 0.4551447927951813, "learning_rate": 4.28857263160314e-05, "loss": 1.7031, "step": 5251 }, { "epoch": 0.5455489768359821, "grad_norm": 0.39867910742759705, "learning_rate": 4.286957613403387e-05, "loss": 1.5662, "step": 5252 }, { "epoch": 0.5456528513555625, "grad_norm": 0.38527026772499084, "learning_rate": 4.285342671137086e-05, "loss": 1.5221, "step": 5253 }, { "epoch": 0.5457567258751428, "grad_norm": 0.40201330184936523, "learning_rate": 4.283727804976217e-05, "loss": 1.5841, "step": 5254 }, { "epoch": 0.5458606003947232, "grad_norm": 0.40640905499458313, "learning_rate": 4.2821130150927486e-05, "loss": 1.6659, "step": 5255 }, { "epoch": 0.5459644749143036, "grad_norm": 0.39006564021110535, "learning_rate": 4.280498301658644e-05, "loss": 1.463, "step": 5256 }, { "epoch": 0.5460683494338838, "grad_norm": 0.44832733273506165, "learning_rate": 4.27888366484586e-05, "loss": 1.941, "step": 5257 }, { "epoch": 0.5461722239534642, "grad_norm": 0.41479620337486267, "learning_rate": 4.277269104826339e-05, "loss": 1.7727, "step": 5258 }, { "epoch": 0.5462760984730446, "grad_norm": 0.40158823132514954, "learning_rate": 4.275654621772021e-05, "loss": 1.6807, "step": 5259 }, { "epoch": 0.5463799729926249, "grad_norm": 0.39162763953208923, "learning_rate": 4.2740402158548366e-05, "loss": 1.6696, "step": 5260 }, { "epoch": 0.5464838475122052, "grad_norm": 0.41060155630111694, "learning_rate": 4.272425887246706e-05, "loss": 1.7343, "step": 5261 }, { "epoch": 0.5465877220317856, "grad_norm": 0.3723122179508209, "learning_rate": 4.270811636119542e-05, "loss": 1.6486, "step": 5262 }, { "epoch": 0.5466915965513659, "grad_norm": 0.4098038375377655, "learning_rate": 4.269197462645253e-05, "loss": 1.691, "step": 5263 }, { "epoch": 0.5467954710709463, "grad_norm": 0.38569438457489014, "learning_rate": 4.267583366995732e-05, "loss": 1.6651, "step": 5264 }, { "epoch": 0.5468993455905267, "grad_norm": 0.42734023928642273, "learning_rate": 4.2659693493428684e-05, "loss": 1.8253, "step": 5265 }, { "epoch": 0.5470032201101069, "grad_norm": 0.4274364709854126, "learning_rate": 4.264355409858547e-05, "loss": 1.6397, "step": 5266 }, { "epoch": 0.5471070946296873, "grad_norm": 0.38870930671691895, "learning_rate": 4.2627415487146346e-05, "loss": 1.5403, "step": 5267 }, { "epoch": 0.5472109691492677, "grad_norm": 0.3967702388763428, "learning_rate": 4.261127766082995e-05, "loss": 1.8323, "step": 5268 }, { "epoch": 0.5473148436688481, "grad_norm": 0.42525243759155273, "learning_rate": 4.2595140621354846e-05, "loss": 1.7435, "step": 5269 }, { "epoch": 0.5474187181884284, "grad_norm": 0.41377174854278564, "learning_rate": 4.257900437043954e-05, "loss": 1.5484, "step": 5270 }, { "epoch": 0.5475225927080087, "grad_norm": 0.36868613958358765, "learning_rate": 4.256286890980234e-05, "loss": 1.6659, "step": 5271 }, { "epoch": 0.5476264672275891, "grad_norm": 0.43371888995170593, "learning_rate": 4.254673424116162e-05, "loss": 1.6907, "step": 5272 }, { "epoch": 0.5477303417471694, "grad_norm": 0.4247437119483948, "learning_rate": 4.2530600366235554e-05, "loss": 1.7444, "step": 5273 }, { "epoch": 0.5478342162667498, "grad_norm": 0.43270236253738403, "learning_rate": 4.251446728674229e-05, "loss": 1.6692, "step": 5274 }, { "epoch": 0.5479380907863302, "grad_norm": 0.40581294894218445, "learning_rate": 4.2498335004399864e-05, "loss": 1.7245, "step": 5275 }, { "epoch": 0.5480419653059104, "grad_norm": 0.40520188212394714, "learning_rate": 4.248220352092626e-05, "loss": 1.6344, "step": 5276 }, { "epoch": 0.5481458398254908, "grad_norm": 0.41677728295326233, "learning_rate": 4.246607283803932e-05, "loss": 1.7355, "step": 5277 }, { "epoch": 0.5482497143450712, "grad_norm": 0.4243401885032654, "learning_rate": 4.2449942957456855e-05, "loss": 1.7528, "step": 5278 }, { "epoch": 0.5483535888646515, "grad_norm": 0.44001367688179016, "learning_rate": 4.24338138808966e-05, "loss": 1.7903, "step": 5279 }, { "epoch": 0.5484574633842318, "grad_norm": 0.40032702684402466, "learning_rate": 4.241768561007613e-05, "loss": 1.6569, "step": 5280 }, { "epoch": 0.5485613379038122, "grad_norm": 0.4183237850666046, "learning_rate": 4.2401558146712994e-05, "loss": 1.6377, "step": 5281 }, { "epoch": 0.5486652124233925, "grad_norm": 0.4048320949077606, "learning_rate": 4.238543149252466e-05, "loss": 1.5852, "step": 5282 }, { "epoch": 0.5487690869429729, "grad_norm": 0.4144987165927887, "learning_rate": 4.2369305649228455e-05, "loss": 1.7107, "step": 5283 }, { "epoch": 0.5488729614625533, "grad_norm": 0.40201154351234436, "learning_rate": 4.235318061854168e-05, "loss": 1.7281, "step": 5284 }, { "epoch": 0.5489768359821335, "grad_norm": 0.41914767026901245, "learning_rate": 4.233705640218154e-05, "loss": 1.7443, "step": 5285 }, { "epoch": 0.5490807105017139, "grad_norm": 0.39670661091804504, "learning_rate": 4.2320933001865106e-05, "loss": 1.5151, "step": 5286 }, { "epoch": 0.5491845850212943, "grad_norm": 0.49306583404541016, "learning_rate": 4.230481041930941e-05, "loss": 2.0025, "step": 5287 }, { "epoch": 0.5492884595408746, "grad_norm": 0.4066086709499359, "learning_rate": 4.2288688656231384e-05, "loss": 1.6324, "step": 5288 }, { "epoch": 0.549392334060455, "grad_norm": 0.43332695960998535, "learning_rate": 4.227256771434787e-05, "loss": 1.7869, "step": 5289 }, { "epoch": 0.5494962085800353, "grad_norm": 0.37857580184936523, "learning_rate": 4.2256447595375596e-05, "loss": 1.6478, "step": 5290 }, { "epoch": 0.5496000830996156, "grad_norm": 0.45964285731315613, "learning_rate": 4.224032830103128e-05, "loss": 1.7415, "step": 5291 }, { "epoch": 0.549703957619196, "grad_norm": 0.3959317207336426, "learning_rate": 4.2224209833031445e-05, "loss": 1.6643, "step": 5292 }, { "epoch": 0.5498078321387764, "grad_norm": 0.48961135745048523, "learning_rate": 4.220809219309262e-05, "loss": 1.76, "step": 5293 }, { "epoch": 0.5499117066583568, "grad_norm": 0.38995257019996643, "learning_rate": 4.21919753829312e-05, "loss": 1.6642, "step": 5294 }, { "epoch": 0.550015581177937, "grad_norm": 0.4197613000869751, "learning_rate": 4.217585940426351e-05, "loss": 1.7614, "step": 5295 }, { "epoch": 0.5501194556975174, "grad_norm": 0.40901002287864685, "learning_rate": 4.2159744258805736e-05, "loss": 1.8191, "step": 5296 }, { "epoch": 0.5502233302170978, "grad_norm": 0.4012939929962158, "learning_rate": 4.214362994827405e-05, "loss": 1.4622, "step": 5297 }, { "epoch": 0.5503272047366781, "grad_norm": 0.4541564881801605, "learning_rate": 4.212751647438451e-05, "loss": 1.9298, "step": 5298 }, { "epoch": 0.5504310792562584, "grad_norm": 0.3447628915309906, "learning_rate": 4.2111403838853034e-05, "loss": 1.3871, "step": 5299 }, { "epoch": 0.5505349537758388, "grad_norm": 0.4184315502643585, "learning_rate": 4.209529204339553e-05, "loss": 1.5378, "step": 5300 }, { "epoch": 0.5506388282954191, "grad_norm": 0.49825429916381836, "learning_rate": 4.207918108972776e-05, "loss": 1.8026, "step": 5301 }, { "epoch": 0.5507427028149995, "grad_norm": 0.385919451713562, "learning_rate": 4.2063070979565426e-05, "loss": 1.6863, "step": 5302 }, { "epoch": 0.5508465773345799, "grad_norm": 0.40161100029945374, "learning_rate": 4.204696171462411e-05, "loss": 1.5964, "step": 5303 }, { "epoch": 0.5509504518541601, "grad_norm": 0.4033989906311035, "learning_rate": 4.203085329661935e-05, "loss": 1.7334, "step": 5304 }, { "epoch": 0.5510543263737405, "grad_norm": 0.416106641292572, "learning_rate": 4.201474572726654e-05, "loss": 1.7121, "step": 5305 }, { "epoch": 0.5511582008933209, "grad_norm": 0.43745073676109314, "learning_rate": 4.199863900828102e-05, "loss": 1.7883, "step": 5306 }, { "epoch": 0.5512620754129012, "grad_norm": 0.4253213703632355, "learning_rate": 4.1982533141378056e-05, "loss": 1.7145, "step": 5307 }, { "epoch": 0.5513659499324816, "grad_norm": 0.4426640570163727, "learning_rate": 4.1966428128272754e-05, "loss": 1.7868, "step": 5308 }, { "epoch": 0.5514698244520619, "grad_norm": 0.3622012436389923, "learning_rate": 4.195032397068019e-05, "loss": 1.5348, "step": 5309 }, { "epoch": 0.5515736989716422, "grad_norm": 0.45180484652519226, "learning_rate": 4.193422067031535e-05, "loss": 1.8076, "step": 5310 }, { "epoch": 0.5516775734912226, "grad_norm": 0.41505420207977295, "learning_rate": 4.191811822889307e-05, "loss": 1.7471, "step": 5311 }, { "epoch": 0.551781448010803, "grad_norm": 0.42771202325820923, "learning_rate": 4.190201664812816e-05, "loss": 1.6095, "step": 5312 }, { "epoch": 0.5518853225303832, "grad_norm": 0.41721728444099426, "learning_rate": 4.188591592973534e-05, "loss": 1.7719, "step": 5313 }, { "epoch": 0.5519891970499636, "grad_norm": 0.4197467267513275, "learning_rate": 4.1869816075429145e-05, "loss": 1.7216, "step": 5314 }, { "epoch": 0.552093071569544, "grad_norm": 0.43040528893470764, "learning_rate": 4.1853717086924136e-05, "loss": 1.7245, "step": 5315 }, { "epoch": 0.5521969460891243, "grad_norm": 0.46574804186820984, "learning_rate": 4.183761896593472e-05, "loss": 1.8018, "step": 5316 }, { "epoch": 0.5523008206087047, "grad_norm": 0.41133713722229004, "learning_rate": 4.182152171417521e-05, "loss": 1.628, "step": 5317 }, { "epoch": 0.552404695128285, "grad_norm": 0.3880963623523712, "learning_rate": 4.180542533335984e-05, "loss": 1.6717, "step": 5318 }, { "epoch": 0.5525085696478654, "grad_norm": 0.3985794484615326, "learning_rate": 4.1789329825202736e-05, "loss": 1.7005, "step": 5319 }, { "epoch": 0.5526124441674457, "grad_norm": 0.43377330899238586, "learning_rate": 4.177323519141799e-05, "loss": 1.7466, "step": 5320 }, { "epoch": 0.5527163186870261, "grad_norm": 0.44802311062812805, "learning_rate": 4.1757141433719506e-05, "loss": 1.6648, "step": 5321 }, { "epoch": 0.5528201932066065, "grad_norm": 0.4492226839065552, "learning_rate": 4.174104855382117e-05, "loss": 1.5708, "step": 5322 }, { "epoch": 0.5529240677261867, "grad_norm": 0.4240928590297699, "learning_rate": 4.1724956553436756e-05, "loss": 1.6883, "step": 5323 }, { "epoch": 0.5530279422457671, "grad_norm": 0.4252764582633972, "learning_rate": 4.17088654342799e-05, "loss": 1.7223, "step": 5324 }, { "epoch": 0.5531318167653475, "grad_norm": 0.44476190209388733, "learning_rate": 4.1692775198064194e-05, "loss": 1.8179, "step": 5325 }, { "epoch": 0.5532356912849278, "grad_norm": 0.38664236664772034, "learning_rate": 4.167668584650316e-05, "loss": 1.5664, "step": 5326 }, { "epoch": 0.5533395658045082, "grad_norm": 0.3723793923854828, "learning_rate": 4.166059738131013e-05, "loss": 1.6096, "step": 5327 }, { "epoch": 0.5534434403240885, "grad_norm": 0.4321557283401489, "learning_rate": 4.164450980419844e-05, "loss": 1.8148, "step": 5328 }, { "epoch": 0.5535473148436688, "grad_norm": 0.40069714188575745, "learning_rate": 4.162842311688129e-05, "loss": 1.6699, "step": 5329 }, { "epoch": 0.5536511893632492, "grad_norm": 0.4085390269756317, "learning_rate": 4.161233732107177e-05, "loss": 1.7729, "step": 5330 }, { "epoch": 0.5537550638828296, "grad_norm": 0.4812339246273041, "learning_rate": 4.1596252418482884e-05, "loss": 1.963, "step": 5331 }, { "epoch": 0.5538589384024099, "grad_norm": 0.458366721868515, "learning_rate": 4.158016841082759e-05, "loss": 1.8112, "step": 5332 }, { "epoch": 0.5539628129219902, "grad_norm": 0.4018287658691406, "learning_rate": 4.1564085299818665e-05, "loss": 1.7265, "step": 5333 }, { "epoch": 0.5540666874415706, "grad_norm": 0.41217106580734253, "learning_rate": 4.154800308716885e-05, "loss": 1.7181, "step": 5334 }, { "epoch": 0.5541705619611509, "grad_norm": 0.5003406405448914, "learning_rate": 4.15319217745908e-05, "loss": 1.8835, "step": 5335 }, { "epoch": 0.5542744364807313, "grad_norm": 0.4172942340373993, "learning_rate": 4.151584136379702e-05, "loss": 1.6447, "step": 5336 }, { "epoch": 0.5543783110003117, "grad_norm": 0.4157778322696686, "learning_rate": 4.149976185649994e-05, "loss": 1.7282, "step": 5337 }, { "epoch": 0.5544821855198919, "grad_norm": 0.40136539936065674, "learning_rate": 4.148368325441194e-05, "loss": 1.7406, "step": 5338 }, { "epoch": 0.5545860600394723, "grad_norm": 0.4425347149372101, "learning_rate": 4.1467605559245226e-05, "loss": 1.7426, "step": 5339 }, { "epoch": 0.5546899345590527, "grad_norm": 0.44530102610588074, "learning_rate": 4.145152877271196e-05, "loss": 1.8979, "step": 5340 }, { "epoch": 0.554793809078633, "grad_norm": 0.41977494955062866, "learning_rate": 4.143545289652422e-05, "loss": 1.7588, "step": 5341 }, { "epoch": 0.5548976835982133, "grad_norm": 0.43756383657455444, "learning_rate": 4.141937793239392e-05, "loss": 1.6979, "step": 5342 }, { "epoch": 0.5550015581177937, "grad_norm": 0.42378005385398865, "learning_rate": 4.140330388203295e-05, "loss": 1.6303, "step": 5343 }, { "epoch": 0.555105432637374, "grad_norm": 0.39726606011390686, "learning_rate": 4.138723074715306e-05, "loss": 1.6592, "step": 5344 }, { "epoch": 0.5552093071569544, "grad_norm": 0.4202333092689514, "learning_rate": 4.1371158529465916e-05, "loss": 1.7364, "step": 5345 }, { "epoch": 0.5553131816765348, "grad_norm": 0.40389859676361084, "learning_rate": 4.1355087230683065e-05, "loss": 1.5759, "step": 5346 }, { "epoch": 0.5554170561961151, "grad_norm": 0.37404200434684753, "learning_rate": 4.1339016852516e-05, "loss": 1.544, "step": 5347 }, { "epoch": 0.5555209307156954, "grad_norm": 0.45083087682724, "learning_rate": 4.132294739667609e-05, "loss": 1.8926, "step": 5348 }, { "epoch": 0.5556248052352758, "grad_norm": 0.4377882480621338, "learning_rate": 4.130687886487459e-05, "loss": 1.741, "step": 5349 }, { "epoch": 0.5557286797548562, "grad_norm": 0.39583009481430054, "learning_rate": 4.1290811258822685e-05, "loss": 1.6806, "step": 5350 }, { "epoch": 0.5558325542744365, "grad_norm": 0.48825472593307495, "learning_rate": 4.127474458023146e-05, "loss": 1.9815, "step": 5351 }, { "epoch": 0.5559364287940168, "grad_norm": 0.39083537459373474, "learning_rate": 4.125867883081186e-05, "loss": 1.7249, "step": 5352 }, { "epoch": 0.5560403033135972, "grad_norm": 0.385078489780426, "learning_rate": 4.124261401227477e-05, "loss": 1.546, "step": 5353 }, { "epoch": 0.5561441778331775, "grad_norm": 0.41559967398643494, "learning_rate": 4.122655012633101e-05, "loss": 1.6632, "step": 5354 }, { "epoch": 0.5562480523527579, "grad_norm": 0.4069918990135193, "learning_rate": 4.121048717469119e-05, "loss": 1.6128, "step": 5355 }, { "epoch": 0.5563519268723383, "grad_norm": 0.43804723024368286, "learning_rate": 4.119442515906595e-05, "loss": 1.618, "step": 5356 }, { "epoch": 0.5564558013919185, "grad_norm": 0.46744030714035034, "learning_rate": 4.117836408116574e-05, "loss": 1.787, "step": 5357 }, { "epoch": 0.5565596759114989, "grad_norm": 0.4054070711135864, "learning_rate": 4.116230394270094e-05, "loss": 1.6505, "step": 5358 }, { "epoch": 0.5566635504310793, "grad_norm": 0.4278983771800995, "learning_rate": 4.114624474538183e-05, "loss": 1.7579, "step": 5359 }, { "epoch": 0.5567674249506596, "grad_norm": 0.4365118443965912, "learning_rate": 4.113018649091861e-05, "loss": 1.7672, "step": 5360 }, { "epoch": 0.5568712994702399, "grad_norm": 0.4227050244808197, "learning_rate": 4.111412918102132e-05, "loss": 1.8521, "step": 5361 }, { "epoch": 0.5569751739898203, "grad_norm": 0.41989371180534363, "learning_rate": 4.1098072817399956e-05, "loss": 1.6816, "step": 5362 }, { "epoch": 0.5570790485094006, "grad_norm": 0.43567296862602234, "learning_rate": 4.108201740176443e-05, "loss": 1.7746, "step": 5363 }, { "epoch": 0.557182923028981, "grad_norm": 0.48267829418182373, "learning_rate": 4.106596293582446e-05, "loss": 1.7837, "step": 5364 }, { "epoch": 0.5572867975485614, "grad_norm": 0.44921717047691345, "learning_rate": 4.104990942128976e-05, "loss": 1.8066, "step": 5365 }, { "epoch": 0.5573906720681416, "grad_norm": 0.4056643843650818, "learning_rate": 4.10338568598699e-05, "loss": 1.472, "step": 5366 }, { "epoch": 0.557494546587722, "grad_norm": 0.4002752900123596, "learning_rate": 4.1017805253274336e-05, "loss": 1.6328, "step": 5367 }, { "epoch": 0.5575984211073024, "grad_norm": 0.42509204149246216, "learning_rate": 4.100175460321244e-05, "loss": 1.5223, "step": 5368 }, { "epoch": 0.5577022956268827, "grad_norm": 0.391414612531662, "learning_rate": 4.0985704911393506e-05, "loss": 1.685, "step": 5369 }, { "epoch": 0.557806170146463, "grad_norm": 0.4597155749797821, "learning_rate": 4.096965617952667e-05, "loss": 1.7821, "step": 5370 }, { "epoch": 0.5579100446660434, "grad_norm": 0.4139951467514038, "learning_rate": 4.0953608409321024e-05, "loss": 1.7831, "step": 5371 }, { "epoch": 0.5580139191856238, "grad_norm": 0.4334876239299774, "learning_rate": 4.093756160248551e-05, "loss": 1.6776, "step": 5372 }, { "epoch": 0.5581177937052041, "grad_norm": 0.47372686862945557, "learning_rate": 4.092151576072901e-05, "loss": 1.8743, "step": 5373 }, { "epoch": 0.5582216682247845, "grad_norm": 0.39616334438323975, "learning_rate": 4.0905470885760244e-05, "loss": 1.6113, "step": 5374 }, { "epoch": 0.5583255427443649, "grad_norm": 0.44439664483070374, "learning_rate": 4.088942697928789e-05, "loss": 1.842, "step": 5375 }, { "epoch": 0.5584294172639451, "grad_norm": 0.3774793744087219, "learning_rate": 4.087338404302052e-05, "loss": 1.7603, "step": 5376 }, { "epoch": 0.5585332917835255, "grad_norm": 0.4046783745288849, "learning_rate": 4.0857342078666546e-05, "loss": 1.597, "step": 5377 }, { "epoch": 0.5586371663031059, "grad_norm": 0.41748616099357605, "learning_rate": 4.0841301087934326e-05, "loss": 1.7326, "step": 5378 }, { "epoch": 0.5587410408226862, "grad_norm": 0.3903839886188507, "learning_rate": 4.082526107253212e-05, "loss": 1.5858, "step": 5379 }, { "epoch": 0.5588449153422665, "grad_norm": 0.4938502311706543, "learning_rate": 4.080922203416801e-05, "loss": 1.5894, "step": 5380 }, { "epoch": 0.5589487898618469, "grad_norm": 0.4077463150024414, "learning_rate": 4.079318397455007e-05, "loss": 1.6721, "step": 5381 }, { "epoch": 0.5590526643814272, "grad_norm": 0.42645683884620667, "learning_rate": 4.077714689538624e-05, "loss": 1.8013, "step": 5382 }, { "epoch": 0.5591565389010076, "grad_norm": 0.3947215676307678, "learning_rate": 4.076111079838431e-05, "loss": 1.6274, "step": 5383 }, { "epoch": 0.559260413420588, "grad_norm": 0.43122047185897827, "learning_rate": 4.074507568525201e-05, "loss": 1.7273, "step": 5384 }, { "epoch": 0.5593642879401682, "grad_norm": 0.41860494017601013, "learning_rate": 4.072904155769697e-05, "loss": 1.6412, "step": 5385 }, { "epoch": 0.5594681624597486, "grad_norm": 0.4793721139431, "learning_rate": 4.0713008417426676e-05, "loss": 1.8271, "step": 5386 }, { "epoch": 0.559572036979329, "grad_norm": 0.4130531847476959, "learning_rate": 4.069697626614855e-05, "loss": 1.7992, "step": 5387 }, { "epoch": 0.5596759114989093, "grad_norm": 0.40088602900505066, "learning_rate": 4.068094510556989e-05, "loss": 1.4903, "step": 5388 }, { "epoch": 0.5597797860184897, "grad_norm": 0.40667760372161865, "learning_rate": 4.066491493739788e-05, "loss": 1.5853, "step": 5389 }, { "epoch": 0.55988366053807, "grad_norm": 0.44721919298171997, "learning_rate": 4.064888576333961e-05, "loss": 1.7506, "step": 5390 }, { "epoch": 0.5599875350576503, "grad_norm": 0.4468611478805542, "learning_rate": 4.063285758510207e-05, "loss": 1.6431, "step": 5391 }, { "epoch": 0.5600914095772307, "grad_norm": 0.39779260754585266, "learning_rate": 4.061683040439214e-05, "loss": 1.7715, "step": 5392 }, { "epoch": 0.5601952840968111, "grad_norm": 0.41888242959976196, "learning_rate": 4.060080422291656e-05, "loss": 1.6373, "step": 5393 }, { "epoch": 0.5602991586163913, "grad_norm": 0.4231390953063965, "learning_rate": 4.058477904238203e-05, "loss": 1.7707, "step": 5394 }, { "epoch": 0.5604030331359717, "grad_norm": 0.4443194568157196, "learning_rate": 4.056875486449507e-05, "loss": 1.8821, "step": 5395 }, { "epoch": 0.5605069076555521, "grad_norm": 0.4171590507030487, "learning_rate": 4.055273169096216e-05, "loss": 1.7404, "step": 5396 }, { "epoch": 0.5606107821751325, "grad_norm": 0.41153210401535034, "learning_rate": 4.053670952348964e-05, "loss": 1.679, "step": 5397 }, { "epoch": 0.5607146566947128, "grad_norm": 0.40622779726982117, "learning_rate": 4.052068836378374e-05, "loss": 1.69, "step": 5398 }, { "epoch": 0.5608185312142931, "grad_norm": 0.40083175897598267, "learning_rate": 4.050466821355058e-05, "loss": 1.6595, "step": 5399 }, { "epoch": 0.5609224057338735, "grad_norm": 0.4463566243648529, "learning_rate": 4.0488649074496186e-05, "loss": 1.7722, "step": 5400 }, { "epoch": 0.5610262802534538, "grad_norm": 0.39217326045036316, "learning_rate": 4.04726309483265e-05, "loss": 1.5485, "step": 5401 }, { "epoch": 0.5611301547730342, "grad_norm": 0.39511242508888245, "learning_rate": 4.045661383674727e-05, "loss": 1.6444, "step": 5402 }, { "epoch": 0.5612340292926146, "grad_norm": 0.411687970161438, "learning_rate": 4.044059774146423e-05, "loss": 1.8602, "step": 5403 }, { "epoch": 0.5613379038121948, "grad_norm": 0.38304659724235535, "learning_rate": 4.0424582664182986e-05, "loss": 1.6232, "step": 5404 }, { "epoch": 0.5614417783317752, "grad_norm": 0.4158017635345459, "learning_rate": 4.040856860660897e-05, "loss": 1.6325, "step": 5405 }, { "epoch": 0.5615456528513556, "grad_norm": 0.43518683314323425, "learning_rate": 4.03925555704476e-05, "loss": 1.6961, "step": 5406 }, { "epoch": 0.5616495273709359, "grad_norm": 0.3899015486240387, "learning_rate": 4.0376543557404125e-05, "loss": 1.6689, "step": 5407 }, { "epoch": 0.5617534018905163, "grad_norm": 0.3894193470478058, "learning_rate": 4.036053256918368e-05, "loss": 1.5847, "step": 5408 }, { "epoch": 0.5618572764100966, "grad_norm": 0.41329947113990784, "learning_rate": 4.034452260749132e-05, "loss": 1.7453, "step": 5409 }, { "epoch": 0.5619611509296769, "grad_norm": 0.43754690885543823, "learning_rate": 4.032851367403201e-05, "loss": 1.7059, "step": 5410 }, { "epoch": 0.5620650254492573, "grad_norm": 0.423709899187088, "learning_rate": 4.031250577051053e-05, "loss": 1.6802, "step": 5411 }, { "epoch": 0.5621688999688377, "grad_norm": 0.40563976764678955, "learning_rate": 4.0296498898631644e-05, "loss": 1.7492, "step": 5412 }, { "epoch": 0.562272774488418, "grad_norm": 0.40074846148490906, "learning_rate": 4.028049306009992e-05, "loss": 1.7393, "step": 5413 }, { "epoch": 0.5623766490079983, "grad_norm": 0.4513157308101654, "learning_rate": 4.026448825661987e-05, "loss": 1.7905, "step": 5414 }, { "epoch": 0.5624805235275787, "grad_norm": 0.4043268859386444, "learning_rate": 4.024848448989588e-05, "loss": 1.8213, "step": 5415 }, { "epoch": 0.562584398047159, "grad_norm": 0.36234188079833984, "learning_rate": 4.023248176163224e-05, "loss": 1.4981, "step": 5416 }, { "epoch": 0.5626882725667394, "grad_norm": 0.42101240158081055, "learning_rate": 4.021648007353309e-05, "loss": 1.6849, "step": 5417 }, { "epoch": 0.5627921470863197, "grad_norm": 0.3881211280822754, "learning_rate": 4.0200479427302486e-05, "loss": 1.5648, "step": 5418 }, { "epoch": 0.5628960216059, "grad_norm": 0.4609657824039459, "learning_rate": 4.0184479824644413e-05, "loss": 1.8176, "step": 5419 }, { "epoch": 0.5629998961254804, "grad_norm": 0.4178425669670105, "learning_rate": 4.016848126726266e-05, "loss": 1.6165, "step": 5420 }, { "epoch": 0.5631037706450608, "grad_norm": 0.390349805355072, "learning_rate": 4.0152483756860956e-05, "loss": 1.5583, "step": 5421 }, { "epoch": 0.5632076451646412, "grad_norm": 0.40884721279144287, "learning_rate": 4.0136487295142913e-05, "loss": 1.7998, "step": 5422 }, { "epoch": 0.5633115196842214, "grad_norm": 0.4485437273979187, "learning_rate": 4.0120491883812065e-05, "loss": 2.054, "step": 5423 }, { "epoch": 0.5634153942038018, "grad_norm": 0.43038633465766907, "learning_rate": 4.010449752457175e-05, "loss": 1.7295, "step": 5424 }, { "epoch": 0.5635192687233822, "grad_norm": 0.36314454674720764, "learning_rate": 4.0088504219125253e-05, "loss": 1.21, "step": 5425 }, { "epoch": 0.5636231432429625, "grad_norm": 0.3857851028442383, "learning_rate": 4.007251196917576e-05, "loss": 1.6524, "step": 5426 }, { "epoch": 0.5637270177625429, "grad_norm": 0.3993614912033081, "learning_rate": 4.00565207764263e-05, "loss": 1.6198, "step": 5427 }, { "epoch": 0.5638308922821232, "grad_norm": 0.41268518567085266, "learning_rate": 4.00405306425798e-05, "loss": 1.6297, "step": 5428 }, { "epoch": 0.5639347668017035, "grad_norm": 0.410853773355484, "learning_rate": 4.0024541569339126e-05, "loss": 1.6958, "step": 5429 }, { "epoch": 0.5640386413212839, "grad_norm": 0.39320501685142517, "learning_rate": 4.0008553558406945e-05, "loss": 1.5767, "step": 5430 }, { "epoch": 0.5641425158408643, "grad_norm": 0.444997102022171, "learning_rate": 3.9992566611485866e-05, "loss": 1.6399, "step": 5431 }, { "epoch": 0.5642463903604445, "grad_norm": 0.4061078131198883, "learning_rate": 3.9976580730278405e-05, "loss": 1.485, "step": 5432 }, { "epoch": 0.5643502648800249, "grad_norm": 0.38394030928611755, "learning_rate": 3.99605959164869e-05, "loss": 1.6078, "step": 5433 }, { "epoch": 0.5644541393996053, "grad_norm": 0.46738219261169434, "learning_rate": 3.9944612171813616e-05, "loss": 1.7178, "step": 5434 }, { "epoch": 0.5645580139191856, "grad_norm": 0.47596266865730286, "learning_rate": 3.992862949796072e-05, "loss": 1.7114, "step": 5435 }, { "epoch": 0.564661888438766, "grad_norm": 0.4068540930747986, "learning_rate": 3.991264789663019e-05, "loss": 1.5976, "step": 5436 }, { "epoch": 0.5647657629583464, "grad_norm": 0.4116705060005188, "learning_rate": 3.989666736952399e-05, "loss": 1.7029, "step": 5437 }, { "epoch": 0.5648696374779266, "grad_norm": 0.37593719363212585, "learning_rate": 3.9880687918343916e-05, "loss": 1.5416, "step": 5438 }, { "epoch": 0.564973511997507, "grad_norm": 0.43204763531684875, "learning_rate": 3.986470954479163e-05, "loss": 1.7622, "step": 5439 }, { "epoch": 0.5650773865170874, "grad_norm": 0.4367099106311798, "learning_rate": 3.984873225056873e-05, "loss": 1.6935, "step": 5440 }, { "epoch": 0.5651812610366677, "grad_norm": 0.3884550631046295, "learning_rate": 3.983275603737666e-05, "loss": 1.6477, "step": 5441 }, { "epoch": 0.565285135556248, "grad_norm": 0.40880388021469116, "learning_rate": 3.981678090691676e-05, "loss": 1.7526, "step": 5442 }, { "epoch": 0.5653890100758284, "grad_norm": 0.40109533071517944, "learning_rate": 3.980080686089026e-05, "loss": 1.7972, "step": 5443 }, { "epoch": 0.5654928845954087, "grad_norm": 0.4425828456878662, "learning_rate": 3.9784833900998286e-05, "loss": 1.738, "step": 5444 }, { "epoch": 0.5655967591149891, "grad_norm": 0.4051485061645508, "learning_rate": 3.97688620289418e-05, "loss": 1.6437, "step": 5445 }, { "epoch": 0.5657006336345695, "grad_norm": 0.3986642360687256, "learning_rate": 3.975289124642171e-05, "loss": 1.6672, "step": 5446 }, { "epoch": 0.5658045081541497, "grad_norm": 0.3762451112270355, "learning_rate": 3.9736921555138787e-05, "loss": 1.6666, "step": 5447 }, { "epoch": 0.5659083826737301, "grad_norm": 0.3796166479587555, "learning_rate": 3.972095295679365e-05, "loss": 1.6013, "step": 5448 }, { "epoch": 0.5660122571933105, "grad_norm": 0.4417518377304077, "learning_rate": 3.9704985453086833e-05, "loss": 1.6795, "step": 5449 }, { "epoch": 0.5661161317128909, "grad_norm": 0.3697144389152527, "learning_rate": 3.968901904571877e-05, "loss": 1.4858, "step": 5450 }, { "epoch": 0.5662200062324712, "grad_norm": 0.3945258557796478, "learning_rate": 3.967305373638976e-05, "loss": 1.5509, "step": 5451 }, { "epoch": 0.5663238807520515, "grad_norm": 0.39280179142951965, "learning_rate": 3.965708952679996e-05, "loss": 1.6271, "step": 5452 }, { "epoch": 0.5664277552716319, "grad_norm": 0.4188973605632782, "learning_rate": 3.9641126418649456e-05, "loss": 1.656, "step": 5453 }, { "epoch": 0.5665316297912122, "grad_norm": 0.39895448088645935, "learning_rate": 3.962516441363818e-05, "loss": 1.6666, "step": 5454 }, { "epoch": 0.5666355043107926, "grad_norm": 0.38310202956199646, "learning_rate": 3.9609203513465974e-05, "loss": 1.5975, "step": 5455 }, { "epoch": 0.566739378830373, "grad_norm": 0.43473097681999207, "learning_rate": 3.9593243719832536e-05, "loss": 1.7687, "step": 5456 }, { "epoch": 0.5668432533499532, "grad_norm": 0.4295366406440735, "learning_rate": 3.957728503443748e-05, "loss": 1.6263, "step": 5457 }, { "epoch": 0.5669471278695336, "grad_norm": 0.4174087643623352, "learning_rate": 3.956132745898025e-05, "loss": 1.6705, "step": 5458 }, { "epoch": 0.567051002389114, "grad_norm": 0.41141003370285034, "learning_rate": 3.954537099516021e-05, "loss": 1.6423, "step": 5459 }, { "epoch": 0.5671548769086943, "grad_norm": 0.44283923506736755, "learning_rate": 3.952941564467665e-05, "loss": 1.7335, "step": 5460 }, { "epoch": 0.5672587514282746, "grad_norm": 0.4794785678386688, "learning_rate": 3.951346140922863e-05, "loss": 1.7162, "step": 5461 }, { "epoch": 0.567362625947855, "grad_norm": 0.39859795570373535, "learning_rate": 3.949750829051516e-05, "loss": 1.7051, "step": 5462 }, { "epoch": 0.5674665004674353, "grad_norm": 0.39325210452079773, "learning_rate": 3.948155629023515e-05, "loss": 1.5073, "step": 5463 }, { "epoch": 0.5675703749870157, "grad_norm": 0.37789732217788696, "learning_rate": 3.946560541008734e-05, "loss": 1.7606, "step": 5464 }, { "epoch": 0.5676742495065961, "grad_norm": 0.4179360270500183, "learning_rate": 3.9449655651770365e-05, "loss": 1.576, "step": 5465 }, { "epoch": 0.5677781240261763, "grad_norm": 0.38552039861679077, "learning_rate": 3.943370701698281e-05, "loss": 1.6677, "step": 5466 }, { "epoch": 0.5678819985457567, "grad_norm": 0.4086502194404602, "learning_rate": 3.9417759507423004e-05, "loss": 1.7007, "step": 5467 }, { "epoch": 0.5679858730653371, "grad_norm": 0.43782469630241394, "learning_rate": 3.940181312478928e-05, "loss": 1.788, "step": 5468 }, { "epoch": 0.5680897475849174, "grad_norm": 0.40869951248168945, "learning_rate": 3.938586787077978e-05, "loss": 1.7881, "step": 5469 }, { "epoch": 0.5681936221044978, "grad_norm": 0.4343576729297638, "learning_rate": 3.936992374709256e-05, "loss": 1.8009, "step": 5470 }, { "epoch": 0.5682974966240781, "grad_norm": 0.4356035590171814, "learning_rate": 3.935398075542554e-05, "loss": 1.7802, "step": 5471 }, { "epoch": 0.5684013711436584, "grad_norm": 0.4507029950618744, "learning_rate": 3.933803889747655e-05, "loss": 1.5211, "step": 5472 }, { "epoch": 0.5685052456632388, "grad_norm": 0.3965437114238739, "learning_rate": 3.9322098174943226e-05, "loss": 1.6319, "step": 5473 }, { "epoch": 0.5686091201828192, "grad_norm": 0.4066412150859833, "learning_rate": 3.930615858952316e-05, "loss": 1.4707, "step": 5474 }, { "epoch": 0.5687129947023996, "grad_norm": 0.4407905638217926, "learning_rate": 3.929022014291379e-05, "loss": 1.7746, "step": 5475 }, { "epoch": 0.5688168692219798, "grad_norm": 0.43420854210853577, "learning_rate": 3.927428283681245e-05, "loss": 1.7012, "step": 5476 }, { "epoch": 0.5689207437415602, "grad_norm": 0.39704859256744385, "learning_rate": 3.925834667291631e-05, "loss": 1.623, "step": 5477 }, { "epoch": 0.5690246182611406, "grad_norm": 0.39629414677619934, "learning_rate": 3.924241165292245e-05, "loss": 1.6304, "step": 5478 }, { "epoch": 0.5691284927807209, "grad_norm": 0.4737340211868286, "learning_rate": 3.922647777852786e-05, "loss": 1.7869, "step": 5479 }, { "epoch": 0.5692323673003012, "grad_norm": 0.41576650738716125, "learning_rate": 3.9210545051429337e-05, "loss": 1.7863, "step": 5480 }, { "epoch": 0.5693362418198816, "grad_norm": 0.4058329463005066, "learning_rate": 3.919461347332361e-05, "loss": 1.7295, "step": 5481 }, { "epoch": 0.5694401163394619, "grad_norm": 0.38741639256477356, "learning_rate": 3.917868304590726e-05, "loss": 1.5988, "step": 5482 }, { "epoch": 0.5695439908590423, "grad_norm": 0.40862467885017395, "learning_rate": 3.916275377087676e-05, "loss": 1.6312, "step": 5483 }, { "epoch": 0.5696478653786227, "grad_norm": 0.38968440890312195, "learning_rate": 3.9146825649928446e-05, "loss": 1.7144, "step": 5484 }, { "epoch": 0.5697517398982029, "grad_norm": 0.41511067748069763, "learning_rate": 3.913089868475855e-05, "loss": 1.8591, "step": 5485 }, { "epoch": 0.5698556144177833, "grad_norm": 0.4132608473300934, "learning_rate": 3.911497287706315e-05, "loss": 1.7331, "step": 5486 }, { "epoch": 0.5699594889373637, "grad_norm": 0.41503503918647766, "learning_rate": 3.909904822853823e-05, "loss": 1.867, "step": 5487 }, { "epoch": 0.570063363456944, "grad_norm": 0.39798980951309204, "learning_rate": 3.908312474087966e-05, "loss": 1.7591, "step": 5488 }, { "epoch": 0.5701672379765244, "grad_norm": 0.4197697639465332, "learning_rate": 3.906720241578314e-05, "loss": 1.7974, "step": 5489 }, { "epoch": 0.5702711124961047, "grad_norm": 0.47081831097602844, "learning_rate": 3.905128125494427e-05, "loss": 1.7685, "step": 5490 }, { "epoch": 0.570374987015685, "grad_norm": 0.42261427640914917, "learning_rate": 3.903536126005858e-05, "loss": 1.7431, "step": 5491 }, { "epoch": 0.5704788615352654, "grad_norm": 0.39490261673927307, "learning_rate": 3.901944243282135e-05, "loss": 1.8147, "step": 5492 }, { "epoch": 0.5705827360548458, "grad_norm": 0.41960594058036804, "learning_rate": 3.900352477492786e-05, "loss": 1.7233, "step": 5493 }, { "epoch": 0.570686610574426, "grad_norm": 0.3711908161640167, "learning_rate": 3.8987608288073216e-05, "loss": 1.5517, "step": 5494 }, { "epoch": 0.5707904850940064, "grad_norm": 0.433907151222229, "learning_rate": 3.897169297395238e-05, "loss": 1.6967, "step": 5495 }, { "epoch": 0.5708943596135868, "grad_norm": 0.40768545866012573, "learning_rate": 3.895577883426022e-05, "loss": 1.4462, "step": 5496 }, { "epoch": 0.5709982341331671, "grad_norm": 0.4261188209056854, "learning_rate": 3.893986587069147e-05, "loss": 1.8624, "step": 5497 }, { "epoch": 0.5711021086527475, "grad_norm": 0.3955959379673004, "learning_rate": 3.892395408494073e-05, "loss": 1.6179, "step": 5498 }, { "epoch": 0.5712059831723278, "grad_norm": 0.4365078806877136, "learning_rate": 3.890804347870248e-05, "loss": 1.7387, "step": 5499 }, { "epoch": 0.5713098576919082, "grad_norm": 0.4353174865245819, "learning_rate": 3.889213405367107e-05, "loss": 1.4776, "step": 5500 }, { "epoch": 0.5714137322114885, "grad_norm": 0.39252567291259766, "learning_rate": 3.887622581154077e-05, "loss": 1.6338, "step": 5501 }, { "epoch": 0.5715176067310689, "grad_norm": 0.49222537875175476, "learning_rate": 3.886031875400562e-05, "loss": 1.9512, "step": 5502 }, { "epoch": 0.5716214812506493, "grad_norm": 0.41813385486602783, "learning_rate": 3.884441288275965e-05, "loss": 1.7856, "step": 5503 }, { "epoch": 0.5717253557702295, "grad_norm": 0.4292755424976349, "learning_rate": 3.8828508199496694e-05, "loss": 1.6817, "step": 5504 }, { "epoch": 0.5718292302898099, "grad_norm": 0.4089149832725525, "learning_rate": 3.8812604705910454e-05, "loss": 1.803, "step": 5505 }, { "epoch": 0.5719331048093903, "grad_norm": 0.44375675916671753, "learning_rate": 3.8796702403694554e-05, "loss": 1.5654, "step": 5506 }, { "epoch": 0.5720369793289706, "grad_norm": 0.3902745246887207, "learning_rate": 3.8780801294542476e-05, "loss": 1.6287, "step": 5507 }, { "epoch": 0.572140853848551, "grad_norm": 0.38409364223480225, "learning_rate": 3.876490138014752e-05, "loss": 1.5963, "step": 5508 }, { "epoch": 0.5722447283681313, "grad_norm": 0.41062119603157043, "learning_rate": 3.874900266220295e-05, "loss": 1.6819, "step": 5509 }, { "epoch": 0.5723486028877116, "grad_norm": 0.38653069734573364, "learning_rate": 3.873310514240183e-05, "loss": 1.5166, "step": 5510 }, { "epoch": 0.572452477407292, "grad_norm": 0.38248497247695923, "learning_rate": 3.871720882243712e-05, "loss": 1.505, "step": 5511 }, { "epoch": 0.5725563519268724, "grad_norm": 0.41657835245132446, "learning_rate": 3.870131370400166e-05, "loss": 1.6057, "step": 5512 }, { "epoch": 0.5726602264464526, "grad_norm": 0.4237949252128601, "learning_rate": 3.868541978878818e-05, "loss": 1.6186, "step": 5513 }, { "epoch": 0.572764100966033, "grad_norm": 0.4268006682395935, "learning_rate": 3.866952707848922e-05, "loss": 1.5518, "step": 5514 }, { "epoch": 0.5728679754856134, "grad_norm": 0.46735864877700806, "learning_rate": 3.865363557479723e-05, "loss": 1.7631, "step": 5515 }, { "epoch": 0.5729718500051937, "grad_norm": 0.45263731479644775, "learning_rate": 3.863774527940457e-05, "loss": 1.7801, "step": 5516 }, { "epoch": 0.5730757245247741, "grad_norm": 0.38436025381088257, "learning_rate": 3.86218561940034e-05, "loss": 1.4836, "step": 5517 }, { "epoch": 0.5731795990443544, "grad_norm": 0.3812609910964966, "learning_rate": 3.860596832028579e-05, "loss": 1.5733, "step": 5518 }, { "epoch": 0.5732834735639347, "grad_norm": 0.4150224030017853, "learning_rate": 3.8590081659943694e-05, "loss": 1.7095, "step": 5519 }, { "epoch": 0.5733873480835151, "grad_norm": 0.4313552975654602, "learning_rate": 3.8574196214668876e-05, "loss": 1.7818, "step": 5520 }, { "epoch": 0.5734912226030955, "grad_norm": 0.3813556730747223, "learning_rate": 3.855831198615304e-05, "loss": 1.5572, "step": 5521 }, { "epoch": 0.5735950971226758, "grad_norm": 0.3927074670791626, "learning_rate": 3.854242897608775e-05, "loss": 1.4641, "step": 5522 }, { "epoch": 0.5736989716422561, "grad_norm": 0.38922983407974243, "learning_rate": 3.852654718616438e-05, "loss": 1.3964, "step": 5523 }, { "epoch": 0.5738028461618365, "grad_norm": 0.39198037981987, "learning_rate": 3.851066661807424e-05, "loss": 1.66, "step": 5524 }, { "epoch": 0.5739067206814168, "grad_norm": 0.41385430097579956, "learning_rate": 3.849478727350849e-05, "loss": 1.7209, "step": 5525 }, { "epoch": 0.5740105952009972, "grad_norm": 0.4008021652698517, "learning_rate": 3.847890915415815e-05, "loss": 1.6168, "step": 5526 }, { "epoch": 0.5741144697205776, "grad_norm": 0.42012637853622437, "learning_rate": 3.8463032261714114e-05, "loss": 1.7073, "step": 5527 }, { "epoch": 0.5742183442401579, "grad_norm": 0.428591251373291, "learning_rate": 3.8447156597867153e-05, "loss": 1.7536, "step": 5528 }, { "epoch": 0.5743222187597382, "grad_norm": 0.45808881521224976, "learning_rate": 3.843128216430791e-05, "loss": 1.6032, "step": 5529 }, { "epoch": 0.5744260932793186, "grad_norm": 0.41718247532844543, "learning_rate": 3.841540896272685e-05, "loss": 1.6722, "step": 5530 }, { "epoch": 0.574529967798899, "grad_norm": 0.4867742955684662, "learning_rate": 3.83995369948144e-05, "loss": 1.7742, "step": 5531 }, { "epoch": 0.5746338423184792, "grad_norm": 0.4175654947757721, "learning_rate": 3.838366626226078e-05, "loss": 1.5992, "step": 5532 }, { "epoch": 0.5747377168380596, "grad_norm": 0.3990960121154785, "learning_rate": 3.836779676675606e-05, "loss": 1.7132, "step": 5533 }, { "epoch": 0.57484159135764, "grad_norm": 0.43652915954589844, "learning_rate": 3.8351928509990255e-05, "loss": 1.8061, "step": 5534 }, { "epoch": 0.5749454658772203, "grad_norm": 0.4044228196144104, "learning_rate": 3.833606149365323e-05, "loss": 1.7402, "step": 5535 }, { "epoch": 0.5750493403968007, "grad_norm": 0.41677069664001465, "learning_rate": 3.832019571943465e-05, "loss": 1.6522, "step": 5536 }, { "epoch": 0.575153214916381, "grad_norm": 0.40722280740737915, "learning_rate": 3.8304331189024127e-05, "loss": 1.8343, "step": 5537 }, { "epoch": 0.5752570894359613, "grad_norm": 0.40876513719558716, "learning_rate": 3.828846790411111e-05, "loss": 1.6463, "step": 5538 }, { "epoch": 0.5753609639555417, "grad_norm": 0.4222007095813751, "learning_rate": 3.82726058663849e-05, "loss": 1.672, "step": 5539 }, { "epoch": 0.5754648384751221, "grad_norm": 0.4677537679672241, "learning_rate": 3.8256745077534684e-05, "loss": 1.8548, "step": 5540 }, { "epoch": 0.5755687129947024, "grad_norm": 0.4388614594936371, "learning_rate": 3.824088553924954e-05, "loss": 1.8835, "step": 5541 }, { "epoch": 0.5756725875142827, "grad_norm": 0.43357276916503906, "learning_rate": 3.822502725321833e-05, "loss": 1.831, "step": 5542 }, { "epoch": 0.5757764620338631, "grad_norm": 0.40327322483062744, "learning_rate": 3.8209170221129875e-05, "loss": 1.7199, "step": 5543 }, { "epoch": 0.5758803365534434, "grad_norm": 0.4302258789539337, "learning_rate": 3.819331444467284e-05, "loss": 1.8026, "step": 5544 }, { "epoch": 0.5759842110730238, "grad_norm": 0.4435931444168091, "learning_rate": 3.817745992553571e-05, "loss": 1.6854, "step": 5545 }, { "epoch": 0.5760880855926042, "grad_norm": 0.40510040521621704, "learning_rate": 3.816160666540687e-05, "loss": 1.6003, "step": 5546 }, { "epoch": 0.5761919601121844, "grad_norm": 0.43098416924476624, "learning_rate": 3.814575466597459e-05, "loss": 1.8288, "step": 5547 }, { "epoch": 0.5762958346317648, "grad_norm": 0.4217774569988251, "learning_rate": 3.8129903928926956e-05, "loss": 1.8433, "step": 5548 }, { "epoch": 0.5763997091513452, "grad_norm": 0.4399735629558563, "learning_rate": 3.811405445595197e-05, "loss": 1.6735, "step": 5549 }, { "epoch": 0.5765035836709255, "grad_norm": 0.39600399136543274, "learning_rate": 3.809820624873748e-05, "loss": 1.5637, "step": 5550 }, { "epoch": 0.5766074581905059, "grad_norm": 0.4116421937942505, "learning_rate": 3.808235930897118e-05, "loss": 1.6252, "step": 5551 }, { "epoch": 0.5767113327100862, "grad_norm": 0.4399339258670807, "learning_rate": 3.806651363834065e-05, "loss": 1.7843, "step": 5552 }, { "epoch": 0.5768152072296666, "grad_norm": 0.3872789442539215, "learning_rate": 3.805066923853333e-05, "loss": 1.547, "step": 5553 }, { "epoch": 0.5769190817492469, "grad_norm": 0.38812029361724854, "learning_rate": 3.803482611123654e-05, "loss": 1.5512, "step": 5554 }, { "epoch": 0.5770229562688273, "grad_norm": 0.4194970428943634, "learning_rate": 3.8018984258137423e-05, "loss": 1.5616, "step": 5555 }, { "epoch": 0.5771268307884077, "grad_norm": 0.44213446974754333, "learning_rate": 3.800314368092302e-05, "loss": 1.8645, "step": 5556 }, { "epoch": 0.5772307053079879, "grad_norm": 0.39991211891174316, "learning_rate": 3.798730438128027e-05, "loss": 1.7879, "step": 5557 }, { "epoch": 0.5773345798275683, "grad_norm": 0.4132145047187805, "learning_rate": 3.797146636089587e-05, "loss": 1.8618, "step": 5558 }, { "epoch": 0.5774384543471487, "grad_norm": 0.4226401150226593, "learning_rate": 3.795562962145649e-05, "loss": 1.5837, "step": 5559 }, { "epoch": 0.577542328866729, "grad_norm": 0.3981073498725891, "learning_rate": 3.793979416464862e-05, "loss": 1.6511, "step": 5560 }, { "epoch": 0.5776462033863093, "grad_norm": 0.399758517742157, "learning_rate": 3.792395999215857e-05, "loss": 1.5685, "step": 5561 }, { "epoch": 0.5777500779058897, "grad_norm": 0.4009932279586792, "learning_rate": 3.790812710567259e-05, "loss": 1.6031, "step": 5562 }, { "epoch": 0.57785395242547, "grad_norm": 0.4045742154121399, "learning_rate": 3.789229550687676e-05, "loss": 1.6291, "step": 5563 }, { "epoch": 0.5779578269450504, "grad_norm": 0.4436100423336029, "learning_rate": 3.7876465197457014e-05, "loss": 1.7213, "step": 5564 }, { "epoch": 0.5780617014646308, "grad_norm": 0.41849127411842346, "learning_rate": 3.7860636179099153e-05, "loss": 1.7851, "step": 5565 }, { "epoch": 0.578165575984211, "grad_norm": 0.44967445731163025, "learning_rate": 3.784480845348886e-05, "loss": 1.7522, "step": 5566 }, { "epoch": 0.5782694505037914, "grad_norm": 0.4041469693183899, "learning_rate": 3.7828982022311645e-05, "loss": 1.6278, "step": 5567 }, { "epoch": 0.5783733250233718, "grad_norm": 0.3950141668319702, "learning_rate": 3.7813156887252896e-05, "loss": 1.566, "step": 5568 }, { "epoch": 0.5784771995429521, "grad_norm": 0.4024663269519806, "learning_rate": 3.77973330499979e-05, "loss": 1.6743, "step": 5569 }, { "epoch": 0.5785810740625325, "grad_norm": 0.4183747172355652, "learning_rate": 3.778151051223173e-05, "loss": 1.6308, "step": 5570 }, { "epoch": 0.5786849485821128, "grad_norm": 0.3978317081928253, "learning_rate": 3.7765689275639374e-05, "loss": 1.6298, "step": 5571 }, { "epoch": 0.5787888231016931, "grad_norm": 0.4176594018936157, "learning_rate": 3.774986934190571e-05, "loss": 1.7289, "step": 5572 }, { "epoch": 0.5788926976212735, "grad_norm": 0.4060553014278412, "learning_rate": 3.7734050712715394e-05, "loss": 1.7287, "step": 5573 }, { "epoch": 0.5789965721408539, "grad_norm": 0.40676042437553406, "learning_rate": 3.7718233389752986e-05, "loss": 1.4959, "step": 5574 }, { "epoch": 0.5791004466604341, "grad_norm": 0.4172111451625824, "learning_rate": 3.770241737470295e-05, "loss": 1.669, "step": 5575 }, { "epoch": 0.5792043211800145, "grad_norm": 0.4500811994075775, "learning_rate": 3.7686602669249505e-05, "loss": 1.8687, "step": 5576 }, { "epoch": 0.5793081956995949, "grad_norm": 0.4102492928504944, "learning_rate": 3.767078927507683e-05, "loss": 1.6422, "step": 5577 }, { "epoch": 0.5794120702191753, "grad_norm": 0.42576107382774353, "learning_rate": 3.765497719386894e-05, "loss": 1.7986, "step": 5578 }, { "epoch": 0.5795159447387556, "grad_norm": 0.43037348985671997, "learning_rate": 3.763916642730969e-05, "loss": 1.52, "step": 5579 }, { "epoch": 0.5796198192583359, "grad_norm": 0.4052847623825073, "learning_rate": 3.762335697708279e-05, "loss": 1.8281, "step": 5580 }, { "epoch": 0.5797236937779163, "grad_norm": 0.42516037821769714, "learning_rate": 3.760754884487182e-05, "loss": 1.7839, "step": 5581 }, { "epoch": 0.5798275682974966, "grad_norm": 0.4137275815010071, "learning_rate": 3.759174203236025e-05, "loss": 1.643, "step": 5582 }, { "epoch": 0.579931442817077, "grad_norm": 0.45627397298812866, "learning_rate": 3.757593654123135e-05, "loss": 1.8582, "step": 5583 }, { "epoch": 0.5800353173366574, "grad_norm": 0.420109361410141, "learning_rate": 3.75601323731683e-05, "loss": 1.7523, "step": 5584 }, { "epoch": 0.5801391918562376, "grad_norm": 0.4100499749183655, "learning_rate": 3.754432952985413e-05, "loss": 1.6175, "step": 5585 }, { "epoch": 0.580243066375818, "grad_norm": 0.4295746088027954, "learning_rate": 3.7528528012971694e-05, "loss": 1.7074, "step": 5586 }, { "epoch": 0.5803469408953984, "grad_norm": 0.40844690799713135, "learning_rate": 3.7512727824203754e-05, "loss": 1.6747, "step": 5587 }, { "epoch": 0.5804508154149787, "grad_norm": 0.45334941148757935, "learning_rate": 3.7496928965232905e-05, "loss": 1.6918, "step": 5588 }, { "epoch": 0.580554689934559, "grad_norm": 0.35644862055778503, "learning_rate": 3.7481131437741566e-05, "loss": 1.5046, "step": 5589 }, { "epoch": 0.5806585644541394, "grad_norm": 0.41652175784111023, "learning_rate": 3.746533524341208e-05, "loss": 1.4811, "step": 5590 }, { "epoch": 0.5807624389737197, "grad_norm": 0.40654313564300537, "learning_rate": 3.7449540383926646e-05, "loss": 1.6301, "step": 5591 }, { "epoch": 0.5808663134933001, "grad_norm": 0.40000343322753906, "learning_rate": 3.7433746860967235e-05, "loss": 1.6732, "step": 5592 }, { "epoch": 0.5809701880128805, "grad_norm": 0.4352266788482666, "learning_rate": 3.741795467621578e-05, "loss": 1.4708, "step": 5593 }, { "epoch": 0.5810740625324607, "grad_norm": 0.40756914019584656, "learning_rate": 3.7402163831354e-05, "loss": 1.647, "step": 5594 }, { "epoch": 0.5811779370520411, "grad_norm": 0.4151524603366852, "learning_rate": 3.738637432806351e-05, "loss": 1.5459, "step": 5595 }, { "epoch": 0.5812818115716215, "grad_norm": 0.39485087990760803, "learning_rate": 3.737058616802575e-05, "loss": 1.6415, "step": 5596 }, { "epoch": 0.5813856860912018, "grad_norm": 0.4184524118900299, "learning_rate": 3.735479935292208e-05, "loss": 1.4745, "step": 5597 }, { "epoch": 0.5814895606107822, "grad_norm": 0.42042723298072815, "learning_rate": 3.733901388443361e-05, "loss": 1.6336, "step": 5598 }, { "epoch": 0.5815934351303625, "grad_norm": 0.39631494879722595, "learning_rate": 3.732322976424141e-05, "loss": 1.4979, "step": 5599 }, { "epoch": 0.5816973096499428, "grad_norm": 0.45113274455070496, "learning_rate": 3.7307446994026375e-05, "loss": 1.826, "step": 5600 }, { "epoch": 0.5818011841695232, "grad_norm": 0.43801307678222656, "learning_rate": 3.7291665575469216e-05, "loss": 2.0605, "step": 5601 }, { "epoch": 0.5819050586891036, "grad_norm": 0.4118470549583435, "learning_rate": 3.727588551025053e-05, "loss": 1.6777, "step": 5602 }, { "epoch": 0.582008933208684, "grad_norm": 0.40802454948425293, "learning_rate": 3.72601068000508e-05, "loss": 1.6875, "step": 5603 }, { "epoch": 0.5821128077282642, "grad_norm": 0.3772849142551422, "learning_rate": 3.724432944655033e-05, "loss": 1.7246, "step": 5604 }, { "epoch": 0.5822166822478446, "grad_norm": 0.42528003454208374, "learning_rate": 3.722855345142926e-05, "loss": 1.7337, "step": 5605 }, { "epoch": 0.582320556767425, "grad_norm": 0.4423576593399048, "learning_rate": 3.721277881636764e-05, "loss": 1.8097, "step": 5606 }, { "epoch": 0.5824244312870053, "grad_norm": 0.4138087034225464, "learning_rate": 3.719700554304533e-05, "loss": 1.5108, "step": 5607 }, { "epoch": 0.5825283058065857, "grad_norm": 0.4109199047088623, "learning_rate": 3.718123363314206e-05, "loss": 1.541, "step": 5608 }, { "epoch": 0.582632180326166, "grad_norm": 0.38805916905403137, "learning_rate": 3.7165463088337406e-05, "loss": 1.7062, "step": 5609 }, { "epoch": 0.5827360548457463, "grad_norm": 0.39964035153388977, "learning_rate": 3.714969391031084e-05, "loss": 1.6754, "step": 5610 }, { "epoch": 0.5828399293653267, "grad_norm": 0.41137054562568665, "learning_rate": 3.713392610074162e-05, "loss": 1.6984, "step": 5611 }, { "epoch": 0.5829438038849071, "grad_norm": 0.3872339129447937, "learning_rate": 3.7118159661308906e-05, "loss": 1.4976, "step": 5612 }, { "epoch": 0.5830476784044873, "grad_norm": 0.4146081507205963, "learning_rate": 3.7102394593691724e-05, "loss": 1.5536, "step": 5613 }, { "epoch": 0.5831515529240677, "grad_norm": 0.4401085674762726, "learning_rate": 3.7086630899568906e-05, "loss": 1.8147, "step": 5614 }, { "epoch": 0.5832554274436481, "grad_norm": 0.46493473649024963, "learning_rate": 3.707086858061917e-05, "loss": 1.934, "step": 5615 }, { "epoch": 0.5833593019632284, "grad_norm": 0.4143672585487366, "learning_rate": 3.7055107638521086e-05, "loss": 1.6462, "step": 5616 }, { "epoch": 0.5834631764828088, "grad_norm": 0.41151854395866394, "learning_rate": 3.703934807495305e-05, "loss": 1.6508, "step": 5617 }, { "epoch": 0.5835670510023891, "grad_norm": 0.47356948256492615, "learning_rate": 3.7023589891593344e-05, "loss": 1.9211, "step": 5618 }, { "epoch": 0.5836709255219694, "grad_norm": 0.4014083445072174, "learning_rate": 3.7007833090120116e-05, "loss": 1.7667, "step": 5619 }, { "epoch": 0.5837748000415498, "grad_norm": 0.3790227174758911, "learning_rate": 3.69920776722113e-05, "loss": 1.4863, "step": 5620 }, { "epoch": 0.5838786745611302, "grad_norm": 0.43195411562919617, "learning_rate": 3.697632363954475e-05, "loss": 1.7431, "step": 5621 }, { "epoch": 0.5839825490807105, "grad_norm": 0.4108622372150421, "learning_rate": 3.696057099379816e-05, "loss": 1.6647, "step": 5622 }, { "epoch": 0.5840864236002908, "grad_norm": 0.3950975835323334, "learning_rate": 3.694481973664904e-05, "loss": 1.4736, "step": 5623 }, { "epoch": 0.5841902981198712, "grad_norm": 0.41740620136260986, "learning_rate": 3.6929069869774766e-05, "loss": 1.6726, "step": 5624 }, { "epoch": 0.5842941726394515, "grad_norm": 0.45372968912124634, "learning_rate": 3.6913321394852616e-05, "loss": 1.7569, "step": 5625 }, { "epoch": 0.5843980471590319, "grad_norm": 0.3958111107349396, "learning_rate": 3.689757431355964e-05, "loss": 1.7056, "step": 5626 }, { "epoch": 0.5845019216786123, "grad_norm": 0.4113248288631439, "learning_rate": 3.68818286275728e-05, "loss": 1.7895, "step": 5627 }, { "epoch": 0.5846057961981925, "grad_norm": 0.4527917802333832, "learning_rate": 3.686608433856891e-05, "loss": 1.7048, "step": 5628 }, { "epoch": 0.5847096707177729, "grad_norm": 0.43237176537513733, "learning_rate": 3.6850341448224574e-05, "loss": 1.6414, "step": 5629 }, { "epoch": 0.5848135452373533, "grad_norm": 0.4343053102493286, "learning_rate": 3.6834599958216286e-05, "loss": 1.7855, "step": 5630 }, { "epoch": 0.5849174197569337, "grad_norm": 0.44802016019821167, "learning_rate": 3.6818859870220415e-05, "loss": 1.8118, "step": 5631 }, { "epoch": 0.585021294276514, "grad_norm": 0.4242957532405853, "learning_rate": 3.680312118591317e-05, "loss": 1.6769, "step": 5632 }, { "epoch": 0.5851251687960943, "grad_norm": 0.4112863540649414, "learning_rate": 3.6787383906970564e-05, "loss": 1.6765, "step": 5633 }, { "epoch": 0.5852290433156747, "grad_norm": 0.435183048248291, "learning_rate": 3.677164803506851e-05, "loss": 1.6958, "step": 5634 }, { "epoch": 0.585332917835255, "grad_norm": 0.4290895462036133, "learning_rate": 3.675591357188276e-05, "loss": 1.6542, "step": 5635 }, { "epoch": 0.5854367923548354, "grad_norm": 0.44175586104393005, "learning_rate": 3.6740180519088905e-05, "loss": 1.802, "step": 5636 }, { "epoch": 0.5855406668744157, "grad_norm": 0.3649253249168396, "learning_rate": 3.672444887836239e-05, "loss": 1.4738, "step": 5637 }, { "epoch": 0.585644541393996, "grad_norm": 0.4127234220504761, "learning_rate": 3.6708718651378534e-05, "loss": 1.522, "step": 5638 }, { "epoch": 0.5857484159135764, "grad_norm": 0.41260048747062683, "learning_rate": 3.6692989839812444e-05, "loss": 1.6182, "step": 5639 }, { "epoch": 0.5858522904331568, "grad_norm": 0.410636305809021, "learning_rate": 3.667726244533913e-05, "loss": 1.6908, "step": 5640 }, { "epoch": 0.5859561649527371, "grad_norm": 0.393493115901947, "learning_rate": 3.6661536469633474e-05, "loss": 1.5382, "step": 5641 }, { "epoch": 0.5860600394723174, "grad_norm": 0.4044821560382843, "learning_rate": 3.6645811914370126e-05, "loss": 1.683, "step": 5642 }, { "epoch": 0.5861639139918978, "grad_norm": 0.39565759897232056, "learning_rate": 3.663008878122364e-05, "loss": 1.6154, "step": 5643 }, { "epoch": 0.5862677885114781, "grad_norm": 0.4338008165359497, "learning_rate": 3.661436707186842e-05, "loss": 1.8497, "step": 5644 }, { "epoch": 0.5863716630310585, "grad_norm": 0.4520065188407898, "learning_rate": 3.6598646787978676e-05, "loss": 1.6919, "step": 5645 }, { "epoch": 0.5864755375506389, "grad_norm": 0.3839634656906128, "learning_rate": 3.6582927931228516e-05, "loss": 1.6516, "step": 5646 }, { "epoch": 0.5865794120702191, "grad_norm": 0.4146488606929779, "learning_rate": 3.656721050329189e-05, "loss": 1.7285, "step": 5647 }, { "epoch": 0.5866832865897995, "grad_norm": 0.43894267082214355, "learning_rate": 3.6551494505842554e-05, "loss": 1.6571, "step": 5648 }, { "epoch": 0.5867871611093799, "grad_norm": 0.42978256940841675, "learning_rate": 3.653577994055416e-05, "loss": 1.6641, "step": 5649 }, { "epoch": 0.5868910356289602, "grad_norm": 0.40996453166007996, "learning_rate": 3.652006680910017e-05, "loss": 1.5068, "step": 5650 }, { "epoch": 0.5869949101485405, "grad_norm": 0.4839547574520111, "learning_rate": 3.650435511315392e-05, "loss": 1.7859, "step": 5651 }, { "epoch": 0.5870987846681209, "grad_norm": 0.40157848596572876, "learning_rate": 3.6488644854388577e-05, "loss": 1.5551, "step": 5652 }, { "epoch": 0.5872026591877012, "grad_norm": 0.46978557109832764, "learning_rate": 3.647293603447718e-05, "loss": 1.8654, "step": 5653 }, { "epoch": 0.5873065337072816, "grad_norm": 0.39479735493659973, "learning_rate": 3.6457228655092554e-05, "loss": 1.6824, "step": 5654 }, { "epoch": 0.587410408226862, "grad_norm": 0.4402686357498169, "learning_rate": 3.6441522717907443e-05, "loss": 1.7669, "step": 5655 }, { "epoch": 0.5875142827464424, "grad_norm": 0.41756048798561096, "learning_rate": 3.642581822459441e-05, "loss": 1.7688, "step": 5656 }, { "epoch": 0.5876181572660226, "grad_norm": 0.42584848403930664, "learning_rate": 3.641011517682588e-05, "loss": 1.6345, "step": 5657 }, { "epoch": 0.587722031785603, "grad_norm": 0.40674230456352234, "learning_rate": 3.639441357627404e-05, "loss": 1.6559, "step": 5658 }, { "epoch": 0.5878259063051834, "grad_norm": 0.40528520941734314, "learning_rate": 3.637871342461103e-05, "loss": 1.6813, "step": 5659 }, { "epoch": 0.5879297808247637, "grad_norm": 0.4171191453933716, "learning_rate": 3.6363014723508813e-05, "loss": 1.7832, "step": 5660 }, { "epoch": 0.588033655344344, "grad_norm": 0.3836202919483185, "learning_rate": 3.634731747463913e-05, "loss": 1.6288, "step": 5661 }, { "epoch": 0.5881375298639244, "grad_norm": 0.45273473858833313, "learning_rate": 3.633162167967366e-05, "loss": 1.6388, "step": 5662 }, { "epoch": 0.5882414043835047, "grad_norm": 0.42023298144340515, "learning_rate": 3.6315927340283855e-05, "loss": 1.6475, "step": 5663 }, { "epoch": 0.5883452789030851, "grad_norm": 0.4072648286819458, "learning_rate": 3.630023445814106e-05, "loss": 1.7276, "step": 5664 }, { "epoch": 0.5884491534226655, "grad_norm": 0.37127307057380676, "learning_rate": 3.6284543034916405e-05, "loss": 1.449, "step": 5665 }, { "epoch": 0.5885530279422457, "grad_norm": 0.40578770637512207, "learning_rate": 3.6268853072280966e-05, "loss": 1.692, "step": 5666 }, { "epoch": 0.5886569024618261, "grad_norm": 0.4521169364452362, "learning_rate": 3.625316457190555e-05, "loss": 1.7896, "step": 5667 }, { "epoch": 0.5887607769814065, "grad_norm": 0.4111865758895874, "learning_rate": 3.623747753546086e-05, "loss": 1.8176, "step": 5668 }, { "epoch": 0.5888646515009868, "grad_norm": 0.4034864008426666, "learning_rate": 3.62217919646175e-05, "loss": 1.7778, "step": 5669 }, { "epoch": 0.5889685260205672, "grad_norm": 0.405782550573349, "learning_rate": 3.6206107861045804e-05, "loss": 1.8, "step": 5670 }, { "epoch": 0.5890724005401475, "grad_norm": 0.3821369707584381, "learning_rate": 3.619042522641601e-05, "loss": 1.4904, "step": 5671 }, { "epoch": 0.5891762750597278, "grad_norm": 0.42174914479255676, "learning_rate": 3.617474406239824e-05, "loss": 1.6863, "step": 5672 }, { "epoch": 0.5892801495793082, "grad_norm": 0.4899470806121826, "learning_rate": 3.615906437066236e-05, "loss": 1.5951, "step": 5673 }, { "epoch": 0.5893840240988886, "grad_norm": 0.3886786699295044, "learning_rate": 3.614338615287816e-05, "loss": 1.5416, "step": 5674 }, { "epoch": 0.5894878986184688, "grad_norm": 0.3970104455947876, "learning_rate": 3.612770941071527e-05, "loss": 1.6345, "step": 5675 }, { "epoch": 0.5895917731380492, "grad_norm": 0.4441760182380676, "learning_rate": 3.61120341458431e-05, "loss": 1.7968, "step": 5676 }, { "epoch": 0.5896956476576296, "grad_norm": 0.4187043607234955, "learning_rate": 3.6096360359930984e-05, "loss": 1.458, "step": 5677 }, { "epoch": 0.5897995221772099, "grad_norm": 0.39509010314941406, "learning_rate": 3.608068805464802e-05, "loss": 1.8428, "step": 5678 }, { "epoch": 0.5899033966967903, "grad_norm": 0.41668543219566345, "learning_rate": 3.6065017231663214e-05, "loss": 1.4289, "step": 5679 }, { "epoch": 0.5900072712163706, "grad_norm": 0.4090832471847534, "learning_rate": 3.6049347892645356e-05, "loss": 1.7567, "step": 5680 }, { "epoch": 0.590111145735951, "grad_norm": 0.4536181390285492, "learning_rate": 3.603368003926314e-05, "loss": 1.6062, "step": 5681 }, { "epoch": 0.5902150202555313, "grad_norm": 0.4117638170719147, "learning_rate": 3.601801367318507e-05, "loss": 1.6996, "step": 5682 }, { "epoch": 0.5903188947751117, "grad_norm": 0.4338860809803009, "learning_rate": 3.600234879607947e-05, "loss": 1.6467, "step": 5683 }, { "epoch": 0.5904227692946921, "grad_norm": 0.4608552157878876, "learning_rate": 3.5986685409614526e-05, "loss": 1.8217, "step": 5684 }, { "epoch": 0.5905266438142723, "grad_norm": 0.4324304163455963, "learning_rate": 3.597102351545831e-05, "loss": 1.7504, "step": 5685 }, { "epoch": 0.5906305183338527, "grad_norm": 0.3990081250667572, "learning_rate": 3.5955363115278626e-05, "loss": 1.6519, "step": 5686 }, { "epoch": 0.5907343928534331, "grad_norm": 0.4149520695209503, "learning_rate": 3.593970421074323e-05, "loss": 1.6564, "step": 5687 }, { "epoch": 0.5908382673730134, "grad_norm": 0.4176534116268158, "learning_rate": 3.5924046803519675e-05, "loss": 1.5858, "step": 5688 }, { "epoch": 0.5909421418925938, "grad_norm": 0.40572646260261536, "learning_rate": 3.590839089527531e-05, "loss": 1.6495, "step": 5689 }, { "epoch": 0.5910460164121741, "grad_norm": 0.38206931948661804, "learning_rate": 3.589273648767742e-05, "loss": 1.6129, "step": 5690 }, { "epoch": 0.5911498909317544, "grad_norm": 0.4322497248649597, "learning_rate": 3.5877083582393044e-05, "loss": 1.7354, "step": 5691 }, { "epoch": 0.5912537654513348, "grad_norm": 0.41732117533683777, "learning_rate": 3.586143218108911e-05, "loss": 1.891, "step": 5692 }, { "epoch": 0.5913576399709152, "grad_norm": 0.3857957422733307, "learning_rate": 3.584578228543235e-05, "loss": 1.6225, "step": 5693 }, { "epoch": 0.5914615144904954, "grad_norm": 0.408961683511734, "learning_rate": 3.5830133897089394e-05, "loss": 1.671, "step": 5694 }, { "epoch": 0.5915653890100758, "grad_norm": 0.4022825062274933, "learning_rate": 3.5814487017726616e-05, "loss": 1.648, "step": 5695 }, { "epoch": 0.5916692635296562, "grad_norm": 0.468014657497406, "learning_rate": 3.579884164901033e-05, "loss": 1.8003, "step": 5696 }, { "epoch": 0.5917731380492365, "grad_norm": 0.3964448869228363, "learning_rate": 3.5783197792606654e-05, "loss": 1.7439, "step": 5697 }, { "epoch": 0.5918770125688169, "grad_norm": 0.39556026458740234, "learning_rate": 3.576755545018151e-05, "loss": 1.6194, "step": 5698 }, { "epoch": 0.5919808870883972, "grad_norm": 0.3882671594619751, "learning_rate": 3.5751914623400675e-05, "loss": 1.599, "step": 5699 }, { "epoch": 0.5920847616079775, "grad_norm": 0.4119251072406769, "learning_rate": 3.573627531392982e-05, "loss": 1.8195, "step": 5700 }, { "epoch": 0.5921886361275579, "grad_norm": 0.4111797511577606, "learning_rate": 3.572063752343436e-05, "loss": 1.6844, "step": 5701 }, { "epoch": 0.5922925106471383, "grad_norm": 0.3847576379776001, "learning_rate": 3.5705001253579615e-05, "loss": 1.652, "step": 5702 }, { "epoch": 0.5923963851667186, "grad_norm": 0.44039520621299744, "learning_rate": 3.568936650603076e-05, "loss": 1.6053, "step": 5703 }, { "epoch": 0.5925002596862989, "grad_norm": 0.4001440703868866, "learning_rate": 3.5673733282452725e-05, "loss": 1.3555, "step": 5704 }, { "epoch": 0.5926041342058793, "grad_norm": 0.4470379054546356, "learning_rate": 3.565810158451036e-05, "loss": 1.7809, "step": 5705 }, { "epoch": 0.5927080087254596, "grad_norm": 0.46440574526786804, "learning_rate": 3.5642471413868305e-05, "loss": 1.903, "step": 5706 }, { "epoch": 0.59281188324504, "grad_norm": 0.4026218056678772, "learning_rate": 3.562684277219104e-05, "loss": 1.5994, "step": 5707 }, { "epoch": 0.5929157577646204, "grad_norm": 0.47202712297439575, "learning_rate": 3.5611215661142897e-05, "loss": 1.8916, "step": 5708 }, { "epoch": 0.5930196322842007, "grad_norm": 0.4305804967880249, "learning_rate": 3.559559008238805e-05, "loss": 1.4593, "step": 5709 }, { "epoch": 0.593123506803781, "grad_norm": 0.4332740306854248, "learning_rate": 3.5579966037590524e-05, "loss": 1.7235, "step": 5710 }, { "epoch": 0.5932273813233614, "grad_norm": 0.40714386105537415, "learning_rate": 3.556434352841411e-05, "loss": 1.631, "step": 5711 }, { "epoch": 0.5933312558429418, "grad_norm": 0.4484121799468994, "learning_rate": 3.554872255652252e-05, "loss": 1.8386, "step": 5712 }, { "epoch": 0.593435130362522, "grad_norm": 0.446119099855423, "learning_rate": 3.553310312357927e-05, "loss": 1.838, "step": 5713 }, { "epoch": 0.5935390048821024, "grad_norm": 0.45277395844459534, "learning_rate": 3.551748523124766e-05, "loss": 1.7349, "step": 5714 }, { "epoch": 0.5936428794016828, "grad_norm": 0.3999378979206085, "learning_rate": 3.550186888119091e-05, "loss": 1.6032, "step": 5715 }, { "epoch": 0.5937467539212631, "grad_norm": 0.4385007619857788, "learning_rate": 3.5486254075072056e-05, "loss": 1.8558, "step": 5716 }, { "epoch": 0.5938506284408435, "grad_norm": 0.42009690403938293, "learning_rate": 3.547064081455391e-05, "loss": 1.7783, "step": 5717 }, { "epoch": 0.5939545029604238, "grad_norm": 0.3846227526664734, "learning_rate": 3.5455029101299196e-05, "loss": 1.4527, "step": 5718 }, { "epoch": 0.5940583774800041, "grad_norm": 0.4681823253631592, "learning_rate": 3.5439418936970425e-05, "loss": 1.8712, "step": 5719 }, { "epoch": 0.5941622519995845, "grad_norm": 0.4000855088233948, "learning_rate": 3.542381032322997e-05, "loss": 1.548, "step": 5720 }, { "epoch": 0.5942661265191649, "grad_norm": 0.3869953453540802, "learning_rate": 3.540820326173999e-05, "loss": 1.5979, "step": 5721 }, { "epoch": 0.5943700010387452, "grad_norm": 0.43959927558898926, "learning_rate": 3.539259775416257e-05, "loss": 1.7001, "step": 5722 }, { "epoch": 0.5944738755583255, "grad_norm": 0.40868815779685974, "learning_rate": 3.537699380215953e-05, "loss": 1.7146, "step": 5723 }, { "epoch": 0.5945777500779059, "grad_norm": 0.39394938945770264, "learning_rate": 3.5361391407392583e-05, "loss": 1.711, "step": 5724 }, { "epoch": 0.5946816245974862, "grad_norm": 0.4582231044769287, "learning_rate": 3.5345790571523286e-05, "loss": 1.9054, "step": 5725 }, { "epoch": 0.5947854991170666, "grad_norm": 0.40016230940818787, "learning_rate": 3.5330191296212976e-05, "loss": 1.7314, "step": 5726 }, { "epoch": 0.594889373636647, "grad_norm": 0.45281219482421875, "learning_rate": 3.5314593583122854e-05, "loss": 1.8133, "step": 5727 }, { "epoch": 0.5949932481562272, "grad_norm": 0.41264450550079346, "learning_rate": 3.529899743391398e-05, "loss": 1.7283, "step": 5728 }, { "epoch": 0.5950971226758076, "grad_norm": 0.41202911734580994, "learning_rate": 3.5283402850247194e-05, "loss": 1.3802, "step": 5729 }, { "epoch": 0.595200997195388, "grad_norm": 0.4194154143333435, "learning_rate": 3.526780983378321e-05, "loss": 1.6485, "step": 5730 }, { "epoch": 0.5953048717149683, "grad_norm": 0.4135974943637848, "learning_rate": 3.5252218386182585e-05, "loss": 1.793, "step": 5731 }, { "epoch": 0.5954087462345486, "grad_norm": 0.41171103715896606, "learning_rate": 3.523662850910564e-05, "loss": 1.7508, "step": 5732 }, { "epoch": 0.595512620754129, "grad_norm": 0.4255639314651489, "learning_rate": 3.5221040204212606e-05, "loss": 1.6849, "step": 5733 }, { "epoch": 0.5956164952737094, "grad_norm": 0.4412103295326233, "learning_rate": 3.5205453473163506e-05, "loss": 1.6842, "step": 5734 }, { "epoch": 0.5957203697932897, "grad_norm": 0.397868275642395, "learning_rate": 3.518986831761824e-05, "loss": 1.6491, "step": 5735 }, { "epoch": 0.5958242443128701, "grad_norm": 0.4192911684513092, "learning_rate": 3.517428473923644e-05, "loss": 1.8276, "step": 5736 }, { "epoch": 0.5959281188324504, "grad_norm": 0.39529258012771606, "learning_rate": 3.515870273967767e-05, "loss": 1.6447, "step": 5737 }, { "epoch": 0.5960319933520307, "grad_norm": 0.39592933654785156, "learning_rate": 3.514312232060132e-05, "loss": 1.6278, "step": 5738 }, { "epoch": 0.5961358678716111, "grad_norm": 0.4018373489379883, "learning_rate": 3.512754348366652e-05, "loss": 1.7605, "step": 5739 }, { "epoch": 0.5962397423911915, "grad_norm": 0.47401878237724304, "learning_rate": 3.5111966230532355e-05, "loss": 2.0161, "step": 5740 }, { "epoch": 0.5963436169107718, "grad_norm": 0.4021179676055908, "learning_rate": 3.509639056285766e-05, "loss": 1.6192, "step": 5741 }, { "epoch": 0.5964474914303521, "grad_norm": 0.427116334438324, "learning_rate": 3.50808164823011e-05, "loss": 1.6114, "step": 5742 }, { "epoch": 0.5965513659499325, "grad_norm": 0.41006430983543396, "learning_rate": 3.5065243990521205e-05, "loss": 1.7771, "step": 5743 }, { "epoch": 0.5966552404695128, "grad_norm": 0.39242881536483765, "learning_rate": 3.504967308917635e-05, "loss": 1.6355, "step": 5744 }, { "epoch": 0.5967591149890932, "grad_norm": 0.39691224694252014, "learning_rate": 3.5034103779924674e-05, "loss": 1.5866, "step": 5745 }, { "epoch": 0.5968629895086736, "grad_norm": 0.4471852481365204, "learning_rate": 3.5018536064424224e-05, "loss": 1.663, "step": 5746 }, { "epoch": 0.5969668640282538, "grad_norm": 0.39760124683380127, "learning_rate": 3.500296994433282e-05, "loss": 1.7701, "step": 5747 }, { "epoch": 0.5970707385478342, "grad_norm": 0.3909447193145752, "learning_rate": 3.498740542130813e-05, "loss": 1.5167, "step": 5748 }, { "epoch": 0.5971746130674146, "grad_norm": 0.4225844442844391, "learning_rate": 3.497184249700766e-05, "loss": 1.6903, "step": 5749 }, { "epoch": 0.5972784875869949, "grad_norm": 0.4088863432407379, "learning_rate": 3.495628117308875e-05, "loss": 1.3562, "step": 5750 }, { "epoch": 0.5973823621065752, "grad_norm": 0.39334914088249207, "learning_rate": 3.4940721451208535e-05, "loss": 1.6846, "step": 5751 }, { "epoch": 0.5974862366261556, "grad_norm": 0.4623134434223175, "learning_rate": 3.4925163333024015e-05, "loss": 1.6667, "step": 5752 }, { "epoch": 0.5975901111457359, "grad_norm": 0.4101615846157074, "learning_rate": 3.490960682019203e-05, "loss": 1.594, "step": 5753 }, { "epoch": 0.5976939856653163, "grad_norm": 0.4440319240093231, "learning_rate": 3.489405191436921e-05, "loss": 1.8576, "step": 5754 }, { "epoch": 0.5977978601848967, "grad_norm": 0.45121869444847107, "learning_rate": 3.487849861721201e-05, "loss": 1.7881, "step": 5755 }, { "epoch": 0.5979017347044769, "grad_norm": 0.4189302623271942, "learning_rate": 3.4862946930376775e-05, "loss": 1.5814, "step": 5756 }, { "epoch": 0.5980056092240573, "grad_norm": 0.45438921451568604, "learning_rate": 3.4847396855519604e-05, "loss": 1.688, "step": 5757 }, { "epoch": 0.5981094837436377, "grad_norm": 0.40121012926101685, "learning_rate": 3.483184839429647e-05, "loss": 1.7056, "step": 5758 }, { "epoch": 0.5982133582632181, "grad_norm": 0.4294279217720032, "learning_rate": 3.481630154836317e-05, "loss": 1.6278, "step": 5759 }, { "epoch": 0.5983172327827984, "grad_norm": 0.3801029324531555, "learning_rate": 3.4800756319375325e-05, "loss": 1.5913, "step": 5760 }, { "epoch": 0.5984211073023787, "grad_norm": 0.43568864464759827, "learning_rate": 3.478521270898838e-05, "loss": 1.7894, "step": 5761 }, { "epoch": 0.5985249818219591, "grad_norm": 0.4160782992839813, "learning_rate": 3.476967071885758e-05, "loss": 1.6332, "step": 5762 }, { "epoch": 0.5986288563415394, "grad_norm": 0.4249245524406433, "learning_rate": 3.475413035063807e-05, "loss": 1.6174, "step": 5763 }, { "epoch": 0.5987327308611198, "grad_norm": 0.38329437375068665, "learning_rate": 3.473859160598474e-05, "loss": 1.7204, "step": 5764 }, { "epoch": 0.5988366053807002, "grad_norm": 0.4142932593822479, "learning_rate": 3.4723054486552354e-05, "loss": 1.7932, "step": 5765 }, { "epoch": 0.5989404799002804, "grad_norm": 0.44899502396583557, "learning_rate": 3.4707518993995524e-05, "loss": 1.7966, "step": 5766 }, { "epoch": 0.5990443544198608, "grad_norm": 0.40485885739326477, "learning_rate": 3.469198512996863e-05, "loss": 1.6783, "step": 5767 }, { "epoch": 0.5991482289394412, "grad_norm": 0.39931273460388184, "learning_rate": 3.467645289612589e-05, "loss": 1.5305, "step": 5768 }, { "epoch": 0.5992521034590215, "grad_norm": 0.381760835647583, "learning_rate": 3.4660922294121416e-05, "loss": 1.4935, "step": 5769 }, { "epoch": 0.5993559779786019, "grad_norm": 0.4411904215812683, "learning_rate": 3.4645393325609046e-05, "loss": 1.5182, "step": 5770 }, { "epoch": 0.5994598524981822, "grad_norm": 0.39932867884635925, "learning_rate": 3.462986599224252e-05, "loss": 1.7623, "step": 5771 }, { "epoch": 0.5995637270177625, "grad_norm": 0.40389779210090637, "learning_rate": 3.46143402956754e-05, "loss": 1.5783, "step": 5772 }, { "epoch": 0.5996676015373429, "grad_norm": 0.41848430037498474, "learning_rate": 3.4598816237561e-05, "loss": 1.7221, "step": 5773 }, { "epoch": 0.5997714760569233, "grad_norm": 0.4139845669269562, "learning_rate": 3.4583293819552534e-05, "loss": 1.5198, "step": 5774 }, { "epoch": 0.5998753505765035, "grad_norm": 0.4001912474632263, "learning_rate": 3.456777304330303e-05, "loss": 1.6871, "step": 5775 }, { "epoch": 0.5999792250960839, "grad_norm": 0.44707363843917847, "learning_rate": 3.455225391046533e-05, "loss": 1.6126, "step": 5776 }, { "epoch": 0.6000830996156643, "grad_norm": 0.3965167701244354, "learning_rate": 3.4536736422692076e-05, "loss": 1.4743, "step": 5777 }, { "epoch": 0.6001869741352446, "grad_norm": 0.42330119013786316, "learning_rate": 3.4521220581635785e-05, "loss": 1.4636, "step": 5778 }, { "epoch": 0.600290848654825, "grad_norm": 0.3950074315071106, "learning_rate": 3.4505706388948755e-05, "loss": 1.7064, "step": 5779 }, { "epoch": 0.6003947231744053, "grad_norm": 0.42809754610061646, "learning_rate": 3.4490193846283125e-05, "loss": 1.7697, "step": 5780 }, { "epoch": 0.6004985976939856, "grad_norm": 0.4212953448295593, "learning_rate": 3.44746829552909e-05, "loss": 1.7538, "step": 5781 }, { "epoch": 0.600602472213566, "grad_norm": 0.4184706211090088, "learning_rate": 3.445917371762382e-05, "loss": 1.8034, "step": 5782 }, { "epoch": 0.6007063467331464, "grad_norm": 0.42354482412338257, "learning_rate": 3.444366613493351e-05, "loss": 1.6961, "step": 5783 }, { "epoch": 0.6008102212527268, "grad_norm": 0.388049840927124, "learning_rate": 3.4428160208871414e-05, "loss": 1.6964, "step": 5784 }, { "epoch": 0.600914095772307, "grad_norm": 0.4392579197883606, "learning_rate": 3.441265594108881e-05, "loss": 1.5853, "step": 5785 }, { "epoch": 0.6010179702918874, "grad_norm": 0.3931465148925781, "learning_rate": 3.4397153333236735e-05, "loss": 1.5617, "step": 5786 }, { "epoch": 0.6011218448114678, "grad_norm": 0.3917011320590973, "learning_rate": 3.438165238696614e-05, "loss": 1.642, "step": 5787 }, { "epoch": 0.6012257193310481, "grad_norm": 0.41590529680252075, "learning_rate": 3.436615310392773e-05, "loss": 1.5589, "step": 5788 }, { "epoch": 0.6013295938506285, "grad_norm": 0.46075764298439026, "learning_rate": 3.4350655485772074e-05, "loss": 1.8877, "step": 5789 }, { "epoch": 0.6014334683702088, "grad_norm": 0.4078013300895691, "learning_rate": 3.433515953414953e-05, "loss": 1.5837, "step": 5790 }, { "epoch": 0.6015373428897891, "grad_norm": 0.4370696246623993, "learning_rate": 3.431966525071033e-05, "loss": 1.7953, "step": 5791 }, { "epoch": 0.6016412174093695, "grad_norm": 0.3889653980731964, "learning_rate": 3.4304172637104436e-05, "loss": 1.6294, "step": 5792 }, { "epoch": 0.6017450919289499, "grad_norm": 0.3979250192642212, "learning_rate": 3.4288681694981735e-05, "loss": 1.686, "step": 5793 }, { "epoch": 0.6018489664485301, "grad_norm": 0.428313672542572, "learning_rate": 3.42731924259919e-05, "loss": 1.6336, "step": 5794 }, { "epoch": 0.6019528409681105, "grad_norm": 0.4451068043708801, "learning_rate": 3.4257704831784375e-05, "loss": 1.6593, "step": 5795 }, { "epoch": 0.6020567154876909, "grad_norm": 0.47442716360092163, "learning_rate": 3.42422189140085e-05, "loss": 1.8737, "step": 5796 }, { "epoch": 0.6021605900072712, "grad_norm": 0.4260861277580261, "learning_rate": 3.42267346743134e-05, "loss": 1.7137, "step": 5797 }, { "epoch": 0.6022644645268516, "grad_norm": 0.43714046478271484, "learning_rate": 3.4211252114348e-05, "loss": 1.7122, "step": 5798 }, { "epoch": 0.6023683390464319, "grad_norm": 0.4364699721336365, "learning_rate": 3.41957712357611e-05, "loss": 1.6723, "step": 5799 }, { "epoch": 0.6024722135660122, "grad_norm": 0.39638301730155945, "learning_rate": 3.41802920402013e-05, "loss": 1.6197, "step": 5800 }, { "epoch": 0.6025760880855926, "grad_norm": 0.4492684602737427, "learning_rate": 3.416481452931699e-05, "loss": 1.5572, "step": 5801 }, { "epoch": 0.602679962605173, "grad_norm": 0.3939664661884308, "learning_rate": 3.4149338704756416e-05, "loss": 1.6363, "step": 5802 }, { "epoch": 0.6027838371247533, "grad_norm": 0.5074815154075623, "learning_rate": 3.413386456816763e-05, "loss": 1.8986, "step": 5803 }, { "epoch": 0.6028877116443336, "grad_norm": 0.4267491102218628, "learning_rate": 3.411839212119851e-05, "loss": 1.64, "step": 5804 }, { "epoch": 0.602991586163914, "grad_norm": 0.39998719096183777, "learning_rate": 3.410292136549673e-05, "loss": 1.577, "step": 5805 }, { "epoch": 0.6030954606834943, "grad_norm": 0.3894689381122589, "learning_rate": 3.408745230270985e-05, "loss": 1.6308, "step": 5806 }, { "epoch": 0.6031993352030747, "grad_norm": 0.4221261441707611, "learning_rate": 3.407198493448516e-05, "loss": 1.6313, "step": 5807 }, { "epoch": 0.603303209722655, "grad_norm": 0.4228569269180298, "learning_rate": 3.405651926246982e-05, "loss": 1.7215, "step": 5808 }, { "epoch": 0.6034070842422353, "grad_norm": 0.3922746479511261, "learning_rate": 3.4041055288310844e-05, "loss": 1.665, "step": 5809 }, { "epoch": 0.6035109587618157, "grad_norm": 0.39183309674263, "learning_rate": 3.402559301365498e-05, "loss": 1.6458, "step": 5810 }, { "epoch": 0.6036148332813961, "grad_norm": 0.4340969920158386, "learning_rate": 3.401013244014884e-05, "loss": 1.6737, "step": 5811 }, { "epoch": 0.6037187078009765, "grad_norm": 0.4317123591899872, "learning_rate": 3.3994673569438886e-05, "loss": 1.702, "step": 5812 }, { "epoch": 0.6038225823205567, "grad_norm": 0.4379931092262268, "learning_rate": 3.397921640317136e-05, "loss": 1.6435, "step": 5813 }, { "epoch": 0.6039264568401371, "grad_norm": 0.4368740916252136, "learning_rate": 3.39637609429923e-05, "loss": 1.7202, "step": 5814 }, { "epoch": 0.6040303313597175, "grad_norm": 0.4536083936691284, "learning_rate": 3.3948307190547624e-05, "loss": 1.664, "step": 5815 }, { "epoch": 0.6041342058792978, "grad_norm": 0.40802082419395447, "learning_rate": 3.3932855147483024e-05, "loss": 1.6268, "step": 5816 }, { "epoch": 0.6042380803988782, "grad_norm": 0.4093196392059326, "learning_rate": 3.391740481544402e-05, "loss": 1.7967, "step": 5817 }, { "epoch": 0.6043419549184585, "grad_norm": 0.41186216473579407, "learning_rate": 3.3901956196075945e-05, "loss": 1.5787, "step": 5818 }, { "epoch": 0.6044458294380388, "grad_norm": 0.431708961725235, "learning_rate": 3.3886509291023986e-05, "loss": 1.6593, "step": 5819 }, { "epoch": 0.6045497039576192, "grad_norm": 0.42467281222343445, "learning_rate": 3.387106410193308e-05, "loss": 1.6942, "step": 5820 }, { "epoch": 0.6046535784771996, "grad_norm": 0.3974582254886627, "learning_rate": 3.385562063044803e-05, "loss": 1.5867, "step": 5821 }, { "epoch": 0.6047574529967799, "grad_norm": 0.4653453528881073, "learning_rate": 3.384017887821348e-05, "loss": 1.8252, "step": 5822 }, { "epoch": 0.6048613275163602, "grad_norm": 0.4493721127510071, "learning_rate": 3.382473884687382e-05, "loss": 1.8101, "step": 5823 }, { "epoch": 0.6049652020359406, "grad_norm": 0.3836005926132202, "learning_rate": 3.380930053807328e-05, "loss": 1.6124, "step": 5824 }, { "epoch": 0.6050690765555209, "grad_norm": 0.38905587792396545, "learning_rate": 3.379386395345596e-05, "loss": 1.575, "step": 5825 }, { "epoch": 0.6051729510751013, "grad_norm": 0.4363052248954773, "learning_rate": 3.377842909466571e-05, "loss": 1.6505, "step": 5826 }, { "epoch": 0.6052768255946817, "grad_norm": 0.40476155281066895, "learning_rate": 3.376299596334622e-05, "loss": 1.7677, "step": 5827 }, { "epoch": 0.6053807001142619, "grad_norm": 0.42141294479370117, "learning_rate": 3.374756456114102e-05, "loss": 1.5649, "step": 5828 }, { "epoch": 0.6054845746338423, "grad_norm": 0.45798051357269287, "learning_rate": 3.373213488969341e-05, "loss": 1.8967, "step": 5829 }, { "epoch": 0.6055884491534227, "grad_norm": 0.4245013892650604, "learning_rate": 3.371670695064655e-05, "loss": 1.6626, "step": 5830 }, { "epoch": 0.605692323673003, "grad_norm": 0.4293268918991089, "learning_rate": 3.370128074564338e-05, "loss": 1.9178, "step": 5831 }, { "epoch": 0.6057961981925833, "grad_norm": 0.42448824644088745, "learning_rate": 3.368585627632668e-05, "loss": 1.7219, "step": 5832 }, { "epoch": 0.6059000727121637, "grad_norm": 0.3986135423183441, "learning_rate": 3.3670433544339014e-05, "loss": 1.6541, "step": 5833 }, { "epoch": 0.606003947231744, "grad_norm": 0.44702157378196716, "learning_rate": 3.365501255132283e-05, "loss": 1.5446, "step": 5834 }, { "epoch": 0.6061078217513244, "grad_norm": 0.39528048038482666, "learning_rate": 3.363959329892029e-05, "loss": 1.5639, "step": 5835 }, { "epoch": 0.6062116962709048, "grad_norm": 0.40425539016723633, "learning_rate": 3.362417578877344e-05, "loss": 1.5162, "step": 5836 }, { "epoch": 0.6063155707904851, "grad_norm": 0.3955405056476593, "learning_rate": 3.360876002252415e-05, "loss": 1.6176, "step": 5837 }, { "epoch": 0.6064194453100654, "grad_norm": 0.4061652421951294, "learning_rate": 3.359334600181407e-05, "loss": 1.7371, "step": 5838 }, { "epoch": 0.6065233198296458, "grad_norm": 0.41885098814964294, "learning_rate": 3.357793372828464e-05, "loss": 1.4975, "step": 5839 }, { "epoch": 0.6066271943492262, "grad_norm": 0.3937050998210907, "learning_rate": 3.356252320357718e-05, "loss": 1.623, "step": 5840 }, { "epoch": 0.6067310688688065, "grad_norm": 0.4089513123035431, "learning_rate": 3.354711442933279e-05, "loss": 1.5882, "step": 5841 }, { "epoch": 0.6068349433883868, "grad_norm": 0.43716487288475037, "learning_rate": 3.353170740719236e-05, "loss": 1.814, "step": 5842 }, { "epoch": 0.6069388179079672, "grad_norm": 0.4473409056663513, "learning_rate": 3.351630213879665e-05, "loss": 1.5899, "step": 5843 }, { "epoch": 0.6070426924275475, "grad_norm": 0.40252211689949036, "learning_rate": 3.3500898625786184e-05, "loss": 1.6746, "step": 5844 }, { "epoch": 0.6071465669471279, "grad_norm": 0.406857967376709, "learning_rate": 3.34854968698013e-05, "loss": 1.5783, "step": 5845 }, { "epoch": 0.6072504414667083, "grad_norm": 0.41453415155410767, "learning_rate": 3.347009687248219e-05, "loss": 1.7318, "step": 5846 }, { "epoch": 0.6073543159862885, "grad_norm": 0.3779454231262207, "learning_rate": 3.345469863546884e-05, "loss": 1.5555, "step": 5847 }, { "epoch": 0.6074581905058689, "grad_norm": 0.40654996037483215, "learning_rate": 3.3439302160401e-05, "loss": 1.5914, "step": 5848 }, { "epoch": 0.6075620650254493, "grad_norm": 0.4251600205898285, "learning_rate": 3.342390744891831e-05, "loss": 1.7131, "step": 5849 }, { "epoch": 0.6076659395450296, "grad_norm": 0.45632246136665344, "learning_rate": 3.340851450266019e-05, "loss": 1.7986, "step": 5850 }, { "epoch": 0.60776981406461, "grad_norm": 0.43041008710861206, "learning_rate": 3.3393123323265855e-05, "loss": 1.7041, "step": 5851 }, { "epoch": 0.6078736885841903, "grad_norm": 0.3932364284992218, "learning_rate": 3.337773391237433e-05, "loss": 1.5044, "step": 5852 }, { "epoch": 0.6079775631037706, "grad_norm": 0.3887140154838562, "learning_rate": 3.336234627162451e-05, "loss": 1.6597, "step": 5853 }, { "epoch": 0.608081437623351, "grad_norm": 0.41823363304138184, "learning_rate": 3.3346960402655016e-05, "loss": 1.6794, "step": 5854 }, { "epoch": 0.6081853121429314, "grad_norm": 0.4427807033061981, "learning_rate": 3.333157630710434e-05, "loss": 1.8548, "step": 5855 }, { "epoch": 0.6082891866625116, "grad_norm": 0.4366241693496704, "learning_rate": 3.331619398661078e-05, "loss": 1.597, "step": 5856 }, { "epoch": 0.608393061182092, "grad_norm": 0.4059387147426605, "learning_rate": 3.3300813442812406e-05, "loss": 1.5453, "step": 5857 }, { "epoch": 0.6084969357016724, "grad_norm": 0.40862488746643066, "learning_rate": 3.328543467734715e-05, "loss": 1.6159, "step": 5858 }, { "epoch": 0.6086008102212527, "grad_norm": 0.4672967195510864, "learning_rate": 3.327005769185273e-05, "loss": 1.9154, "step": 5859 }, { "epoch": 0.6087046847408331, "grad_norm": 0.4559909403324127, "learning_rate": 3.325468248796666e-05, "loss": 1.6842, "step": 5860 }, { "epoch": 0.6088085592604134, "grad_norm": 0.3882789611816406, "learning_rate": 3.323930906732628e-05, "loss": 1.5068, "step": 5861 }, { "epoch": 0.6089124337799938, "grad_norm": 0.4709301292896271, "learning_rate": 3.322393743156874e-05, "loss": 1.8432, "step": 5862 }, { "epoch": 0.6090163082995741, "grad_norm": 0.43151330947875977, "learning_rate": 3.3208567582331034e-05, "loss": 1.7516, "step": 5863 }, { "epoch": 0.6091201828191545, "grad_norm": 0.3997372090816498, "learning_rate": 3.3193199521249876e-05, "loss": 1.7277, "step": 5864 }, { "epoch": 0.6092240573387349, "grad_norm": 0.44914302229881287, "learning_rate": 3.3177833249961886e-05, "loss": 1.5975, "step": 5865 }, { "epoch": 0.6093279318583151, "grad_norm": 0.3897424638271332, "learning_rate": 3.316246877010344e-05, "loss": 1.6415, "step": 5866 }, { "epoch": 0.6094318063778955, "grad_norm": 0.4456162452697754, "learning_rate": 3.314710608331072e-05, "loss": 1.7596, "step": 5867 }, { "epoch": 0.6095356808974759, "grad_norm": 0.4402760863304138, "learning_rate": 3.3131745191219744e-05, "loss": 1.7909, "step": 5868 }, { "epoch": 0.6096395554170562, "grad_norm": 0.41520991921424866, "learning_rate": 3.311638609546635e-05, "loss": 1.6518, "step": 5869 }, { "epoch": 0.6097434299366365, "grad_norm": 0.40946778655052185, "learning_rate": 3.3101028797686106e-05, "loss": 1.6841, "step": 5870 }, { "epoch": 0.6098473044562169, "grad_norm": 0.4258642792701721, "learning_rate": 3.3085673299514496e-05, "loss": 1.7182, "step": 5871 }, { "epoch": 0.6099511789757972, "grad_norm": 0.38915473222732544, "learning_rate": 3.307031960258674e-05, "loss": 1.5292, "step": 5872 }, { "epoch": 0.6100550534953776, "grad_norm": 0.39618629217147827, "learning_rate": 3.30549677085379e-05, "loss": 1.6406, "step": 5873 }, { "epoch": 0.610158928014958, "grad_norm": 0.3926134407520294, "learning_rate": 3.30396176190028e-05, "loss": 1.5848, "step": 5874 }, { "epoch": 0.6102628025345382, "grad_norm": 0.41277050971984863, "learning_rate": 3.3024269335616155e-05, "loss": 1.6565, "step": 5875 }, { "epoch": 0.6103666770541186, "grad_norm": 0.40988320112228394, "learning_rate": 3.300892286001238e-05, "loss": 1.72, "step": 5876 }, { "epoch": 0.610470551573699, "grad_norm": 0.4124213457107544, "learning_rate": 3.29935781938258e-05, "loss": 1.5959, "step": 5877 }, { "epoch": 0.6105744260932793, "grad_norm": 0.3976089060306549, "learning_rate": 3.29782353386905e-05, "loss": 1.7009, "step": 5878 }, { "epoch": 0.6106783006128597, "grad_norm": 0.40462133288383484, "learning_rate": 3.296289429624035e-05, "loss": 1.5514, "step": 5879 }, { "epoch": 0.61078217513244, "grad_norm": 0.42904216051101685, "learning_rate": 3.294755506810905e-05, "loss": 1.6511, "step": 5880 }, { "epoch": 0.6108860496520203, "grad_norm": 0.44326838850975037, "learning_rate": 3.293221765593015e-05, "loss": 1.4797, "step": 5881 }, { "epoch": 0.6109899241716007, "grad_norm": 0.3995533883571625, "learning_rate": 3.291688206133691e-05, "loss": 1.77, "step": 5882 }, { "epoch": 0.6110937986911811, "grad_norm": 0.3896981179714203, "learning_rate": 3.290154828596248e-05, "loss": 1.6022, "step": 5883 }, { "epoch": 0.6111976732107613, "grad_norm": 0.38104161620140076, "learning_rate": 3.28862163314398e-05, "loss": 1.5761, "step": 5884 }, { "epoch": 0.6113015477303417, "grad_norm": 0.4309033155441284, "learning_rate": 3.287088619940158e-05, "loss": 1.7047, "step": 5885 }, { "epoch": 0.6114054222499221, "grad_norm": 0.38424256443977356, "learning_rate": 3.285555789148036e-05, "loss": 1.5804, "step": 5886 }, { "epoch": 0.6115092967695024, "grad_norm": 0.39635035395622253, "learning_rate": 3.284023140930851e-05, "loss": 1.7646, "step": 5887 }, { "epoch": 0.6116131712890828, "grad_norm": 0.40174224972724915, "learning_rate": 3.2824906754518146e-05, "loss": 1.6731, "step": 5888 }, { "epoch": 0.6117170458086632, "grad_norm": 0.42028430104255676, "learning_rate": 3.280958392874124e-05, "loss": 1.7342, "step": 5889 }, { "epoch": 0.6118209203282435, "grad_norm": 0.40835410356521606, "learning_rate": 3.2794262933609555e-05, "loss": 1.6333, "step": 5890 }, { "epoch": 0.6119247948478238, "grad_norm": 0.43254995346069336, "learning_rate": 3.277894377075468e-05, "loss": 1.8507, "step": 5891 }, { "epoch": 0.6120286693674042, "grad_norm": 0.41028037667274475, "learning_rate": 3.276362644180794e-05, "loss": 1.712, "step": 5892 }, { "epoch": 0.6121325438869846, "grad_norm": 0.428543359041214, "learning_rate": 3.2748310948400545e-05, "loss": 1.6575, "step": 5893 }, { "epoch": 0.6122364184065648, "grad_norm": 0.46985751390457153, "learning_rate": 3.273299729216348e-05, "loss": 1.9697, "step": 5894 }, { "epoch": 0.6123402929261452, "grad_norm": 0.4072946012020111, "learning_rate": 3.2717685474727495e-05, "loss": 1.7102, "step": 5895 }, { "epoch": 0.6124441674457256, "grad_norm": 0.41395875811576843, "learning_rate": 3.270237549772319e-05, "loss": 1.6602, "step": 5896 }, { "epoch": 0.6125480419653059, "grad_norm": 0.40566155314445496, "learning_rate": 3.2687067362780997e-05, "loss": 1.6355, "step": 5897 }, { "epoch": 0.6126519164848863, "grad_norm": 0.40978339314460754, "learning_rate": 3.267176107153107e-05, "loss": 1.5367, "step": 5898 }, { "epoch": 0.6127557910044666, "grad_norm": 0.39154794812202454, "learning_rate": 3.2656456625603425e-05, "loss": 1.7275, "step": 5899 }, { "epoch": 0.6128596655240469, "grad_norm": 0.4286908209323883, "learning_rate": 3.2641154026627866e-05, "loss": 1.7106, "step": 5900 }, { "epoch": 0.6129635400436273, "grad_norm": 0.39364275336265564, "learning_rate": 3.2625853276234e-05, "loss": 1.6203, "step": 5901 }, { "epoch": 0.6130674145632077, "grad_norm": 0.43890032172203064, "learning_rate": 3.2610554376051224e-05, "loss": 1.6936, "step": 5902 }, { "epoch": 0.613171289082788, "grad_norm": 0.4209052622318268, "learning_rate": 3.2595257327708785e-05, "loss": 1.9501, "step": 5903 }, { "epoch": 0.6132751636023683, "grad_norm": 0.4081374704837799, "learning_rate": 3.257996213283566e-05, "loss": 1.5555, "step": 5904 }, { "epoch": 0.6133790381219487, "grad_norm": 0.3956921696662903, "learning_rate": 3.256466879306068e-05, "loss": 1.5927, "step": 5905 }, { "epoch": 0.613482912641529, "grad_norm": 0.4056268334388733, "learning_rate": 3.25493773100125e-05, "loss": 1.6676, "step": 5906 }, { "epoch": 0.6135867871611094, "grad_norm": 0.4204394221305847, "learning_rate": 3.2534087685319506e-05, "loss": 1.5935, "step": 5907 }, { "epoch": 0.6136906616806898, "grad_norm": 0.4562225043773651, "learning_rate": 3.2518799920609924e-05, "loss": 1.7057, "step": 5908 }, { "epoch": 0.61379453620027, "grad_norm": 0.4130711257457733, "learning_rate": 3.2503514017511806e-05, "loss": 1.6022, "step": 5909 }, { "epoch": 0.6138984107198504, "grad_norm": 0.4055189788341522, "learning_rate": 3.2488229977652954e-05, "loss": 1.7709, "step": 5910 }, { "epoch": 0.6140022852394308, "grad_norm": 0.406704306602478, "learning_rate": 3.2472947802661005e-05, "loss": 1.702, "step": 5911 }, { "epoch": 0.6141061597590111, "grad_norm": 0.41046005487442017, "learning_rate": 3.245766749416342e-05, "loss": 1.6426, "step": 5912 }, { "epoch": 0.6142100342785914, "grad_norm": 0.40672361850738525, "learning_rate": 3.24423890537874e-05, "loss": 1.6559, "step": 5913 }, { "epoch": 0.6143139087981718, "grad_norm": 0.3978021442890167, "learning_rate": 3.242711248316e-05, "loss": 1.7074, "step": 5914 }, { "epoch": 0.6144177833177522, "grad_norm": 0.43113160133361816, "learning_rate": 3.241183778390803e-05, "loss": 1.8111, "step": 5915 }, { "epoch": 0.6145216578373325, "grad_norm": 0.3847864270210266, "learning_rate": 3.239656495765817e-05, "loss": 1.5673, "step": 5916 }, { "epoch": 0.6146255323569129, "grad_norm": 0.406105101108551, "learning_rate": 3.23812940060368e-05, "loss": 1.6545, "step": 5917 }, { "epoch": 0.6147294068764932, "grad_norm": 0.4283885657787323, "learning_rate": 3.2366024930670194e-05, "loss": 1.6233, "step": 5918 }, { "epoch": 0.6148332813960735, "grad_norm": 0.39741960167884827, "learning_rate": 3.23507577331844e-05, "loss": 1.7184, "step": 5919 }, { "epoch": 0.6149371559156539, "grad_norm": 0.4278057813644409, "learning_rate": 3.2335492415205224e-05, "loss": 1.6877, "step": 5920 }, { "epoch": 0.6150410304352343, "grad_norm": 0.3887667655944824, "learning_rate": 3.2320228978358324e-05, "loss": 1.6024, "step": 5921 }, { "epoch": 0.6151449049548146, "grad_norm": 0.4222593903541565, "learning_rate": 3.230496742426914e-05, "loss": 1.5523, "step": 5922 }, { "epoch": 0.6152487794743949, "grad_norm": 0.4194217026233673, "learning_rate": 3.228970775456288e-05, "loss": 1.6793, "step": 5923 }, { "epoch": 0.6153526539939753, "grad_norm": 0.41720110177993774, "learning_rate": 3.2274449970864595e-05, "loss": 1.6962, "step": 5924 }, { "epoch": 0.6154565285135556, "grad_norm": 0.43170827627182007, "learning_rate": 3.225919407479914e-05, "loss": 1.7432, "step": 5925 }, { "epoch": 0.615560403033136, "grad_norm": 0.41001245379447937, "learning_rate": 3.224394006799112e-05, "loss": 1.6198, "step": 5926 }, { "epoch": 0.6156642775527164, "grad_norm": 0.39082086086273193, "learning_rate": 3.222868795206498e-05, "loss": 1.5267, "step": 5927 }, { "epoch": 0.6157681520722966, "grad_norm": 0.4277300536632538, "learning_rate": 3.221343772864497e-05, "loss": 1.8421, "step": 5928 }, { "epoch": 0.615872026591877, "grad_norm": 0.4021863639354706, "learning_rate": 3.2198189399355085e-05, "loss": 1.701, "step": 5929 }, { "epoch": 0.6159759011114574, "grad_norm": 0.39132073521614075, "learning_rate": 3.218294296581916e-05, "loss": 1.5238, "step": 5930 }, { "epoch": 0.6160797756310377, "grad_norm": 0.44258078932762146, "learning_rate": 3.2167698429660866e-05, "loss": 1.7779, "step": 5931 }, { "epoch": 0.616183650150618, "grad_norm": 0.4172841012477875, "learning_rate": 3.215245579250355e-05, "loss": 1.6755, "step": 5932 }, { "epoch": 0.6162875246701984, "grad_norm": 0.46141964197158813, "learning_rate": 3.2137215055970496e-05, "loss": 1.8098, "step": 5933 }, { "epoch": 0.6163913991897787, "grad_norm": 0.39755067229270935, "learning_rate": 3.2121976221684714e-05, "loss": 1.5704, "step": 5934 }, { "epoch": 0.6164952737093591, "grad_norm": 0.44859442114830017, "learning_rate": 3.2106739291269005e-05, "loss": 1.7943, "step": 5935 }, { "epoch": 0.6165991482289395, "grad_norm": 0.4175950288772583, "learning_rate": 3.209150426634597e-05, "loss": 1.7347, "step": 5936 }, { "epoch": 0.6167030227485197, "grad_norm": 0.42605000734329224, "learning_rate": 3.2076271148538066e-05, "loss": 1.6469, "step": 5937 }, { "epoch": 0.6168068972681001, "grad_norm": 0.42592230439186096, "learning_rate": 3.2061039939467454e-05, "loss": 1.7029, "step": 5938 }, { "epoch": 0.6169107717876805, "grad_norm": 0.4934481978416443, "learning_rate": 3.204581064075616e-05, "loss": 2.0553, "step": 5939 }, { "epoch": 0.6170146463072609, "grad_norm": 0.4315035939216614, "learning_rate": 3.203058325402599e-05, "loss": 1.7076, "step": 5940 }, { "epoch": 0.6171185208268412, "grad_norm": 0.4047625958919525, "learning_rate": 3.201535778089853e-05, "loss": 1.5993, "step": 5941 }, { "epoch": 0.6172223953464215, "grad_norm": 0.4070419371128082, "learning_rate": 3.20001342229952e-05, "loss": 1.3709, "step": 5942 }, { "epoch": 0.6173262698660019, "grad_norm": 0.4009752869606018, "learning_rate": 3.1984912581937146e-05, "loss": 1.6183, "step": 5943 }, { "epoch": 0.6174301443855822, "grad_norm": 0.40829452872276306, "learning_rate": 3.196969285934541e-05, "loss": 1.5723, "step": 5944 }, { "epoch": 0.6175340189051626, "grad_norm": 0.43150803446769714, "learning_rate": 3.1954475056840714e-05, "loss": 1.593, "step": 5945 }, { "epoch": 0.617637893424743, "grad_norm": 0.4301930367946625, "learning_rate": 3.193925917604367e-05, "loss": 1.7796, "step": 5946 }, { "epoch": 0.6177417679443232, "grad_norm": 0.4352579414844513, "learning_rate": 3.1924045218574654e-05, "loss": 1.8134, "step": 5947 }, { "epoch": 0.6178456424639036, "grad_norm": 0.3833911716938019, "learning_rate": 3.190883318605382e-05, "loss": 1.5344, "step": 5948 }, { "epoch": 0.617949516983484, "grad_norm": 0.4389069676399231, "learning_rate": 3.189362308010113e-05, "loss": 1.6706, "step": 5949 }, { "epoch": 0.6180533915030643, "grad_norm": 0.3941787779331207, "learning_rate": 3.187841490233636e-05, "loss": 1.5388, "step": 5950 }, { "epoch": 0.6181572660226446, "grad_norm": 0.44791027903556824, "learning_rate": 3.1863208654379036e-05, "loss": 1.7468, "step": 5951 }, { "epoch": 0.618261140542225, "grad_norm": 0.41120895743370056, "learning_rate": 3.184800433784851e-05, "loss": 1.5909, "step": 5952 }, { "epoch": 0.6183650150618053, "grad_norm": 0.41934871673583984, "learning_rate": 3.1832801954363955e-05, "loss": 1.8014, "step": 5953 }, { "epoch": 0.6184688895813857, "grad_norm": 0.38895368576049805, "learning_rate": 3.1817601505544264e-05, "loss": 1.6452, "step": 5954 }, { "epoch": 0.6185727641009661, "grad_norm": 0.38875988125801086, "learning_rate": 3.18024029930082e-05, "loss": 1.6638, "step": 5955 }, { "epoch": 0.6186766386205463, "grad_norm": 0.4126511216163635, "learning_rate": 3.178720641837426e-05, "loss": 1.6237, "step": 5956 }, { "epoch": 0.6187805131401267, "grad_norm": 0.4127541184425354, "learning_rate": 3.177201178326077e-05, "loss": 1.6954, "step": 5957 }, { "epoch": 0.6188843876597071, "grad_norm": 0.4332854449748993, "learning_rate": 3.175681908928584e-05, "loss": 1.7631, "step": 5958 }, { "epoch": 0.6189882621792874, "grad_norm": 0.401528537273407, "learning_rate": 3.174162833806739e-05, "loss": 1.6, "step": 5959 }, { "epoch": 0.6190921366988678, "grad_norm": 0.4269302785396576, "learning_rate": 3.172643953122308e-05, "loss": 1.7953, "step": 5960 }, { "epoch": 0.6191960112184481, "grad_norm": 0.4833371341228485, "learning_rate": 3.171125267037042e-05, "loss": 1.779, "step": 5961 }, { "epoch": 0.6192998857380284, "grad_norm": 0.4078036844730377, "learning_rate": 3.169606775712672e-05, "loss": 1.7738, "step": 5962 }, { "epoch": 0.6194037602576088, "grad_norm": 0.3845091164112091, "learning_rate": 3.1680884793109014e-05, "loss": 1.488, "step": 5963 }, { "epoch": 0.6195076347771892, "grad_norm": 0.4441048800945282, "learning_rate": 3.166570377993417e-05, "loss": 1.6613, "step": 5964 }, { "epoch": 0.6196115092967694, "grad_norm": 0.4266035556793213, "learning_rate": 3.165052471921888e-05, "loss": 1.6224, "step": 5965 }, { "epoch": 0.6197153838163498, "grad_norm": 0.4394325315952301, "learning_rate": 3.163534761257956e-05, "loss": 1.6947, "step": 5966 }, { "epoch": 0.6198192583359302, "grad_norm": 0.4144458472728729, "learning_rate": 3.162017246163247e-05, "loss": 1.7144, "step": 5967 }, { "epoch": 0.6199231328555106, "grad_norm": 0.39400461316108704, "learning_rate": 3.160499926799365e-05, "loss": 1.6563, "step": 5968 }, { "epoch": 0.6200270073750909, "grad_norm": 0.40855416655540466, "learning_rate": 3.1589828033278925e-05, "loss": 1.7237, "step": 5969 }, { "epoch": 0.6201308818946712, "grad_norm": 0.39257362484931946, "learning_rate": 3.157465875910391e-05, "loss": 1.6751, "step": 5970 }, { "epoch": 0.6202347564142516, "grad_norm": 0.3722376525402069, "learning_rate": 3.1559491447084e-05, "loss": 1.547, "step": 5971 }, { "epoch": 0.6203386309338319, "grad_norm": 0.38416385650634766, "learning_rate": 3.1544326098834433e-05, "loss": 1.6595, "step": 5972 }, { "epoch": 0.6204425054534123, "grad_norm": 0.42782196402549744, "learning_rate": 3.152916271597015e-05, "loss": 1.755, "step": 5973 }, { "epoch": 0.6205463799729927, "grad_norm": 0.3950657248497009, "learning_rate": 3.151400130010597e-05, "loss": 1.6521, "step": 5974 }, { "epoch": 0.6206502544925729, "grad_norm": 0.39494791626930237, "learning_rate": 3.149884185285647e-05, "loss": 1.6084, "step": 5975 }, { "epoch": 0.6207541290121533, "grad_norm": 0.4145883321762085, "learning_rate": 3.148368437583599e-05, "loss": 1.5866, "step": 5976 }, { "epoch": 0.6208580035317337, "grad_norm": 0.5039044618606567, "learning_rate": 3.146852887065868e-05, "loss": 1.8135, "step": 5977 }, { "epoch": 0.620961878051314, "grad_norm": 0.42017415165901184, "learning_rate": 3.145337533893851e-05, "loss": 1.6958, "step": 5978 }, { "epoch": 0.6210657525708944, "grad_norm": 0.41060763597488403, "learning_rate": 3.143822378228918e-05, "loss": 1.5397, "step": 5979 }, { "epoch": 0.6211696270904747, "grad_norm": 0.42383599281311035, "learning_rate": 3.142307420232423e-05, "loss": 1.7797, "step": 5980 }, { "epoch": 0.621273501610055, "grad_norm": 0.446959912776947, "learning_rate": 3.1407926600657e-05, "loss": 1.8221, "step": 5981 }, { "epoch": 0.6213773761296354, "grad_norm": 0.403238445520401, "learning_rate": 3.1392780978900535e-05, "loss": 1.6687, "step": 5982 }, { "epoch": 0.6214812506492158, "grad_norm": 0.4446134567260742, "learning_rate": 3.1377637338667764e-05, "loss": 1.719, "step": 5983 }, { "epoch": 0.621585125168796, "grad_norm": 0.4344415068626404, "learning_rate": 3.136249568157136e-05, "loss": 1.6722, "step": 5984 }, { "epoch": 0.6216889996883764, "grad_norm": 0.38726866245269775, "learning_rate": 3.1347356009223786e-05, "loss": 1.6432, "step": 5985 }, { "epoch": 0.6217928742079568, "grad_norm": 0.43266814947128296, "learning_rate": 3.133221832323728e-05, "loss": 1.747, "step": 5986 }, { "epoch": 0.6218967487275371, "grad_norm": 0.39049196243286133, "learning_rate": 3.131708262522395e-05, "loss": 1.6543, "step": 5987 }, { "epoch": 0.6220006232471175, "grad_norm": 0.40162894129753113, "learning_rate": 3.1301948916795554e-05, "loss": 1.5967, "step": 5988 }, { "epoch": 0.6221044977666979, "grad_norm": 0.4087611734867096, "learning_rate": 3.128681719956375e-05, "loss": 1.647, "step": 5989 }, { "epoch": 0.6222083722862781, "grad_norm": 0.4542302191257477, "learning_rate": 3.1271687475139966e-05, "loss": 1.7831, "step": 5990 }, { "epoch": 0.6223122468058585, "grad_norm": 0.40611404180526733, "learning_rate": 3.125655974513537e-05, "loss": 1.6214, "step": 5991 }, { "epoch": 0.6224161213254389, "grad_norm": 0.4047275483608246, "learning_rate": 3.124143401116094e-05, "loss": 1.7274, "step": 5992 }, { "epoch": 0.6225199958450193, "grad_norm": 0.4009588062763214, "learning_rate": 3.1226310274827473e-05, "loss": 1.7371, "step": 5993 }, { "epoch": 0.6226238703645995, "grad_norm": 0.398447185754776, "learning_rate": 3.121118853774554e-05, "loss": 1.5706, "step": 5994 }, { "epoch": 0.6227277448841799, "grad_norm": 0.46983155608177185, "learning_rate": 3.119606880152545e-05, "loss": 1.7618, "step": 5995 }, { "epoch": 0.6228316194037603, "grad_norm": 0.42344680428504944, "learning_rate": 3.1180951067777365e-05, "loss": 1.6264, "step": 5996 }, { "epoch": 0.6229354939233406, "grad_norm": 0.4075701832771301, "learning_rate": 3.11658353381112e-05, "loss": 1.7759, "step": 5997 }, { "epoch": 0.623039368442921, "grad_norm": 0.40149906277656555, "learning_rate": 3.1150721614136655e-05, "loss": 1.7063, "step": 5998 }, { "epoch": 0.6231432429625013, "grad_norm": 0.3998561203479767, "learning_rate": 3.113560989746322e-05, "loss": 1.4695, "step": 5999 }, { "epoch": 0.6232471174820816, "grad_norm": 0.4251110553741455, "learning_rate": 3.11205001897002e-05, "loss": 1.7207, "step": 6000 }, { "epoch": 0.623350992001662, "grad_norm": 0.41832244396209717, "learning_rate": 3.110539249245663e-05, "loss": 1.4959, "step": 6001 }, { "epoch": 0.6234548665212424, "grad_norm": 0.3928918242454529, "learning_rate": 3.1090286807341374e-05, "loss": 1.7322, "step": 6002 }, { "epoch": 0.6235587410408227, "grad_norm": 0.4026903212070465, "learning_rate": 3.107518313596309e-05, "loss": 1.8031, "step": 6003 }, { "epoch": 0.623662615560403, "grad_norm": 0.4026026129722595, "learning_rate": 3.106008147993017e-05, "loss": 1.5662, "step": 6004 }, { "epoch": 0.6237664900799834, "grad_norm": 0.4900519549846649, "learning_rate": 3.1044981840850826e-05, "loss": 1.7387, "step": 6005 }, { "epoch": 0.6238703645995637, "grad_norm": 0.4159107804298401, "learning_rate": 3.102988422033309e-05, "loss": 1.6261, "step": 6006 }, { "epoch": 0.6239742391191441, "grad_norm": 0.46456992626190186, "learning_rate": 3.101478861998469e-05, "loss": 1.8022, "step": 6007 }, { "epoch": 0.6240781136387245, "grad_norm": 0.4258232116699219, "learning_rate": 3.09996950414132e-05, "loss": 1.7152, "step": 6008 }, { "epoch": 0.6241819881583047, "grad_norm": 0.4385705888271332, "learning_rate": 3.098460348622601e-05, "loss": 1.7273, "step": 6009 }, { "epoch": 0.6242858626778851, "grad_norm": 0.3911799192428589, "learning_rate": 3.09695139560302e-05, "loss": 1.525, "step": 6010 }, { "epoch": 0.6243897371974655, "grad_norm": 0.45774349570274353, "learning_rate": 3.095442645243272e-05, "loss": 1.8, "step": 6011 }, { "epoch": 0.6244936117170458, "grad_norm": 0.3881503939628601, "learning_rate": 3.0939340977040267e-05, "loss": 1.7008, "step": 6012 }, { "epoch": 0.6245974862366261, "grad_norm": 0.41384172439575195, "learning_rate": 3.092425753145931e-05, "loss": 1.5997, "step": 6013 }, { "epoch": 0.6247013607562065, "grad_norm": 0.4150152802467346, "learning_rate": 3.0909176117296126e-05, "loss": 1.6308, "step": 6014 }, { "epoch": 0.6248052352757868, "grad_norm": 0.4042709767818451, "learning_rate": 3.08940967361568e-05, "loss": 1.6706, "step": 6015 }, { "epoch": 0.6249091097953672, "grad_norm": 0.4141731560230255, "learning_rate": 3.08790193896471e-05, "loss": 1.6422, "step": 6016 }, { "epoch": 0.6250129843149476, "grad_norm": 0.40485629439353943, "learning_rate": 3.0863944079372694e-05, "loss": 1.6133, "step": 6017 }, { "epoch": 0.6251168588345279, "grad_norm": 0.4418627917766571, "learning_rate": 3.0848870806938995e-05, "loss": 1.741, "step": 6018 }, { "epoch": 0.6252207333541082, "grad_norm": 0.40159109234809875, "learning_rate": 3.083379957395117e-05, "loss": 1.618, "step": 6019 }, { "epoch": 0.6253246078736886, "grad_norm": 0.4042385220527649, "learning_rate": 3.081873038201418e-05, "loss": 1.7609, "step": 6020 }, { "epoch": 0.625428482393269, "grad_norm": 0.4162798523902893, "learning_rate": 3.0803663232732775e-05, "loss": 1.7222, "step": 6021 }, { "epoch": 0.6255323569128493, "grad_norm": 0.3897026777267456, "learning_rate": 3.078859812771152e-05, "loss": 1.5955, "step": 6022 }, { "epoch": 0.6256362314324296, "grad_norm": 0.4212419092655182, "learning_rate": 3.0773535068554695e-05, "loss": 1.6507, "step": 6023 }, { "epoch": 0.62574010595201, "grad_norm": 0.4059465229511261, "learning_rate": 3.075847405686641e-05, "loss": 1.6731, "step": 6024 }, { "epoch": 0.6258439804715903, "grad_norm": 0.4359121024608612, "learning_rate": 3.074341509425056e-05, "loss": 1.5718, "step": 6025 }, { "epoch": 0.6259478549911707, "grad_norm": 0.4287969470024109, "learning_rate": 3.07283581823108e-05, "loss": 1.6525, "step": 6026 }, { "epoch": 0.626051729510751, "grad_norm": 0.4397551715373993, "learning_rate": 3.071330332265054e-05, "loss": 1.6681, "step": 6027 }, { "epoch": 0.6261556040303313, "grad_norm": 0.4228843152523041, "learning_rate": 3.069825051687307e-05, "loss": 1.68, "step": 6028 }, { "epoch": 0.6262594785499117, "grad_norm": 0.4212433993816376, "learning_rate": 3.068319976658134e-05, "loss": 1.7587, "step": 6029 }, { "epoch": 0.6263633530694921, "grad_norm": 0.41605517268180847, "learning_rate": 3.066815107337815e-05, "loss": 1.6564, "step": 6030 }, { "epoch": 0.6264672275890724, "grad_norm": 0.4286707639694214, "learning_rate": 3.0653104438866104e-05, "loss": 1.7046, "step": 6031 }, { "epoch": 0.6265711021086527, "grad_norm": 0.4247523248195648, "learning_rate": 3.063805986464751e-05, "loss": 1.7082, "step": 6032 }, { "epoch": 0.6266749766282331, "grad_norm": 0.4271533489227295, "learning_rate": 3.06230173523245e-05, "loss": 1.7456, "step": 6033 }, { "epoch": 0.6267788511478134, "grad_norm": 0.43358635902404785, "learning_rate": 3.060797690349902e-05, "loss": 1.5457, "step": 6034 }, { "epoch": 0.6268827256673938, "grad_norm": 0.3621548116207123, "learning_rate": 3.0592938519772715e-05, "loss": 1.3377, "step": 6035 }, { "epoch": 0.6269866001869742, "grad_norm": 0.4215681850910187, "learning_rate": 3.057790220274707e-05, "loss": 1.5863, "step": 6036 }, { "epoch": 0.6270904747065544, "grad_norm": 0.425849586725235, "learning_rate": 3.056286795402337e-05, "loss": 1.6182, "step": 6037 }, { "epoch": 0.6271943492261348, "grad_norm": 0.4131641089916229, "learning_rate": 3.0547835775202597e-05, "loss": 1.6674, "step": 6038 }, { "epoch": 0.6272982237457152, "grad_norm": 0.41090524196624756, "learning_rate": 3.053280566788559e-05, "loss": 1.5321, "step": 6039 }, { "epoch": 0.6274020982652955, "grad_norm": 0.443823903799057, "learning_rate": 3.051777763367293e-05, "loss": 1.5838, "step": 6040 }, { "epoch": 0.6275059727848759, "grad_norm": 0.41161811351776123, "learning_rate": 3.0502751674164976e-05, "loss": 1.6502, "step": 6041 }, { "epoch": 0.6276098473044562, "grad_norm": 0.4168041944503784, "learning_rate": 3.0487727790961884e-05, "loss": 1.6534, "step": 6042 }, { "epoch": 0.6277137218240366, "grad_norm": 0.43634623289108276, "learning_rate": 3.0472705985663575e-05, "loss": 1.6755, "step": 6043 }, { "epoch": 0.6278175963436169, "grad_norm": 0.45974037051200867, "learning_rate": 3.045768625986979e-05, "loss": 1.8234, "step": 6044 }, { "epoch": 0.6279214708631973, "grad_norm": 0.4236984848976135, "learning_rate": 3.0442668615179953e-05, "loss": 1.794, "step": 6045 }, { "epoch": 0.6280253453827777, "grad_norm": 0.3920854330062866, "learning_rate": 3.0427653053193366e-05, "loss": 1.6738, "step": 6046 }, { "epoch": 0.6281292199023579, "grad_norm": 0.44960126280784607, "learning_rate": 3.0412639575509073e-05, "loss": 1.6268, "step": 6047 }, { "epoch": 0.6282330944219383, "grad_norm": 0.4008250832557678, "learning_rate": 3.0397628183725856e-05, "loss": 1.4388, "step": 6048 }, { "epoch": 0.6283369689415187, "grad_norm": 0.39700058102607727, "learning_rate": 3.038261887944233e-05, "loss": 1.613, "step": 6049 }, { "epoch": 0.628440843461099, "grad_norm": 0.46325239539146423, "learning_rate": 3.0367611664256895e-05, "loss": 1.8034, "step": 6050 }, { "epoch": 0.6285447179806793, "grad_norm": 0.434199720621109, "learning_rate": 3.0352606539767657e-05, "loss": 1.669, "step": 6051 }, { "epoch": 0.6286485925002597, "grad_norm": 0.3849222958087921, "learning_rate": 3.033760350757258e-05, "loss": 1.5992, "step": 6052 }, { "epoch": 0.62875246701984, "grad_norm": 0.4169051945209503, "learning_rate": 3.0322602569269355e-05, "loss": 1.5102, "step": 6053 }, { "epoch": 0.6288563415394204, "grad_norm": 0.42412588000297546, "learning_rate": 3.030760372645547e-05, "loss": 1.5752, "step": 6054 }, { "epoch": 0.6289602160590008, "grad_norm": 0.4000975787639618, "learning_rate": 3.0292606980728165e-05, "loss": 1.715, "step": 6055 }, { "epoch": 0.629064090578581, "grad_norm": 0.3950633704662323, "learning_rate": 3.0277612333684526e-05, "loss": 1.6543, "step": 6056 }, { "epoch": 0.6291679650981614, "grad_norm": 0.4239039421081543, "learning_rate": 3.02626197869213e-05, "loss": 1.7277, "step": 6057 }, { "epoch": 0.6292718396177418, "grad_norm": 0.44476318359375, "learning_rate": 3.024762934203511e-05, "loss": 1.8181, "step": 6058 }, { "epoch": 0.6293757141373221, "grad_norm": 0.3775947093963623, "learning_rate": 3.023264100062234e-05, "loss": 1.5467, "step": 6059 }, { "epoch": 0.6294795886569025, "grad_norm": 0.4439935088157654, "learning_rate": 3.0217654764279114e-05, "loss": 1.9913, "step": 6060 }, { "epoch": 0.6295834631764828, "grad_norm": 0.4108879566192627, "learning_rate": 3.0202670634601325e-05, "loss": 1.6593, "step": 6061 }, { "epoch": 0.6296873376960631, "grad_norm": 0.4360349476337433, "learning_rate": 3.0187688613184717e-05, "loss": 1.4867, "step": 6062 }, { "epoch": 0.6297912122156435, "grad_norm": 0.4250946044921875, "learning_rate": 3.0172708701624704e-05, "loss": 1.6254, "step": 6063 }, { "epoch": 0.6298950867352239, "grad_norm": 0.44005441665649414, "learning_rate": 3.0157730901516556e-05, "loss": 1.7055, "step": 6064 }, { "epoch": 0.6299989612548041, "grad_norm": 0.40063902735710144, "learning_rate": 3.014275521445531e-05, "loss": 1.4714, "step": 6065 }, { "epoch": 0.6301028357743845, "grad_norm": 0.3904891610145569, "learning_rate": 3.0127781642035725e-05, "loss": 1.5278, "step": 6066 }, { "epoch": 0.6302067102939649, "grad_norm": 0.36918964982032776, "learning_rate": 3.0112810185852393e-05, "loss": 1.4645, "step": 6067 }, { "epoch": 0.6303105848135452, "grad_norm": 0.43034636974334717, "learning_rate": 3.009784084749966e-05, "loss": 1.6989, "step": 6068 }, { "epoch": 0.6304144593331256, "grad_norm": 0.43391627073287964, "learning_rate": 3.0082873628571616e-05, "loss": 1.7468, "step": 6069 }, { "epoch": 0.630518333852706, "grad_norm": 0.47783637046813965, "learning_rate": 3.006790853066217e-05, "loss": 1.7116, "step": 6070 }, { "epoch": 0.6306222083722863, "grad_norm": 0.46756497025489807, "learning_rate": 3.0052945555364987e-05, "loss": 1.8164, "step": 6071 }, { "epoch": 0.6307260828918666, "grad_norm": 0.4332909882068634, "learning_rate": 3.0037984704273536e-05, "loss": 1.7011, "step": 6072 }, { "epoch": 0.630829957411447, "grad_norm": 0.46313124895095825, "learning_rate": 3.0023025978980977e-05, "loss": 1.8097, "step": 6073 }, { "epoch": 0.6309338319310274, "grad_norm": 0.4317631125450134, "learning_rate": 3.000806938108034e-05, "loss": 1.7535, "step": 6074 }, { "epoch": 0.6310377064506076, "grad_norm": 0.39492112398147583, "learning_rate": 2.9993114912164378e-05, "loss": 1.6294, "step": 6075 }, { "epoch": 0.631141580970188, "grad_norm": 0.3940942883491516, "learning_rate": 2.9978162573825597e-05, "loss": 1.7187, "step": 6076 }, { "epoch": 0.6312454554897684, "grad_norm": 0.4159606695175171, "learning_rate": 2.996321236765633e-05, "loss": 1.7397, "step": 6077 }, { "epoch": 0.6313493300093487, "grad_norm": 0.38286933302879333, "learning_rate": 2.994826429524867e-05, "loss": 1.5252, "step": 6078 }, { "epoch": 0.6314532045289291, "grad_norm": 0.424221396446228, "learning_rate": 2.9933318358194427e-05, "loss": 1.4488, "step": 6079 }, { "epoch": 0.6315570790485094, "grad_norm": 0.4659724831581116, "learning_rate": 2.9918374558085272e-05, "loss": 1.705, "step": 6080 }, { "epoch": 0.6316609535680897, "grad_norm": 0.42719754576683044, "learning_rate": 2.9903432896512583e-05, "loss": 1.5949, "step": 6081 }, { "epoch": 0.6317648280876701, "grad_norm": 0.4064619243144989, "learning_rate": 2.988849337506753e-05, "loss": 1.5759, "step": 6082 }, { "epoch": 0.6318687026072505, "grad_norm": 0.4120051860809326, "learning_rate": 2.9873555995341045e-05, "loss": 1.6129, "step": 6083 }, { "epoch": 0.6319725771268307, "grad_norm": 0.41194257140159607, "learning_rate": 2.9858620758923872e-05, "loss": 1.6637, "step": 6084 }, { "epoch": 0.6320764516464111, "grad_norm": 0.4339029788970947, "learning_rate": 2.9843687667406466e-05, "loss": 1.7608, "step": 6085 }, { "epoch": 0.6321803261659915, "grad_norm": 0.4774200916290283, "learning_rate": 2.982875672237909e-05, "loss": 1.5836, "step": 6086 }, { "epoch": 0.6322842006855718, "grad_norm": 0.40862688422203064, "learning_rate": 2.9813827925431813e-05, "loss": 1.6473, "step": 6087 }, { "epoch": 0.6323880752051522, "grad_norm": 0.42833518981933594, "learning_rate": 2.9798901278154384e-05, "loss": 1.6633, "step": 6088 }, { "epoch": 0.6324919497247325, "grad_norm": 0.42553073167800903, "learning_rate": 2.9783976782136385e-05, "loss": 1.6624, "step": 6089 }, { "epoch": 0.6325958242443128, "grad_norm": 0.3767523169517517, "learning_rate": 2.976905443896719e-05, "loss": 1.516, "step": 6090 }, { "epoch": 0.6326996987638932, "grad_norm": 0.44192999601364136, "learning_rate": 2.975413425023587e-05, "loss": 1.8359, "step": 6091 }, { "epoch": 0.6328035732834736, "grad_norm": 0.4179973304271698, "learning_rate": 2.973921621753133e-05, "loss": 1.5251, "step": 6092 }, { "epoch": 0.6329074478030539, "grad_norm": 0.40900513529777527, "learning_rate": 2.972430034244224e-05, "loss": 1.7986, "step": 6093 }, { "epoch": 0.6330113223226342, "grad_norm": 0.42563167214393616, "learning_rate": 2.970938662655699e-05, "loss": 1.6558, "step": 6094 }, { "epoch": 0.6331151968422146, "grad_norm": 0.4261634349822998, "learning_rate": 2.9694475071463797e-05, "loss": 1.6296, "step": 6095 }, { "epoch": 0.633219071361795, "grad_norm": 0.41434627771377563, "learning_rate": 2.9679565678750608e-05, "loss": 1.6559, "step": 6096 }, { "epoch": 0.6333229458813753, "grad_norm": 0.4356957972049713, "learning_rate": 2.9664658450005195e-05, "loss": 1.6724, "step": 6097 }, { "epoch": 0.6334268204009557, "grad_norm": 0.4227178692817688, "learning_rate": 2.9649753386815017e-05, "loss": 1.7473, "step": 6098 }, { "epoch": 0.633530694920536, "grad_norm": 0.45652738213539124, "learning_rate": 2.963485049076736e-05, "loss": 1.7247, "step": 6099 }, { "epoch": 0.6336345694401163, "grad_norm": 0.4394940435886383, "learning_rate": 2.961994976344929e-05, "loss": 1.7161, "step": 6100 }, { "epoch": 0.6337384439596967, "grad_norm": 0.4043090343475342, "learning_rate": 2.9605051206447582e-05, "loss": 1.6737, "step": 6101 }, { "epoch": 0.6338423184792771, "grad_norm": 0.4564562141895294, "learning_rate": 2.959015482134885e-05, "loss": 1.8074, "step": 6102 }, { "epoch": 0.6339461929988573, "grad_norm": 0.4548121392726898, "learning_rate": 2.9575260609739423e-05, "loss": 1.7882, "step": 6103 }, { "epoch": 0.6340500675184377, "grad_norm": 0.41030994057655334, "learning_rate": 2.956036857320541e-05, "loss": 1.6736, "step": 6104 }, { "epoch": 0.6341539420380181, "grad_norm": 0.4318486154079437, "learning_rate": 2.9545478713332707e-05, "loss": 1.7257, "step": 6105 }, { "epoch": 0.6342578165575984, "grad_norm": 0.4724993109703064, "learning_rate": 2.9530591031706988e-05, "loss": 1.6384, "step": 6106 }, { "epoch": 0.6343616910771788, "grad_norm": 0.40737417340278625, "learning_rate": 2.9515705529913644e-05, "loss": 1.7593, "step": 6107 }, { "epoch": 0.6344655655967592, "grad_norm": 0.4164734482765198, "learning_rate": 2.9500822209537883e-05, "loss": 1.7383, "step": 6108 }, { "epoch": 0.6345694401163394, "grad_norm": 0.40095841884613037, "learning_rate": 2.9485941072164656e-05, "loss": 1.6363, "step": 6109 }, { "epoch": 0.6346733146359198, "grad_norm": 0.4260212481021881, "learning_rate": 2.9471062119378688e-05, "loss": 1.7249, "step": 6110 }, { "epoch": 0.6347771891555002, "grad_norm": 0.4054844081401825, "learning_rate": 2.9456185352764465e-05, "loss": 1.4506, "step": 6111 }, { "epoch": 0.6348810636750805, "grad_norm": 0.39739441871643066, "learning_rate": 2.944131077390627e-05, "loss": 1.6051, "step": 6112 }, { "epoch": 0.6349849381946608, "grad_norm": 0.4330110549926758, "learning_rate": 2.94264383843881e-05, "loss": 1.7506, "step": 6113 }, { "epoch": 0.6350888127142412, "grad_norm": 0.4423455595970154, "learning_rate": 2.9411568185793757e-05, "loss": 1.7382, "step": 6114 }, { "epoch": 0.6351926872338215, "grad_norm": 0.41518527269363403, "learning_rate": 2.939670017970683e-05, "loss": 1.4555, "step": 6115 }, { "epoch": 0.6352965617534019, "grad_norm": 0.40972813963890076, "learning_rate": 2.9381834367710616e-05, "loss": 1.7941, "step": 6116 }, { "epoch": 0.6354004362729823, "grad_norm": 0.43250906467437744, "learning_rate": 2.9366970751388196e-05, "loss": 1.7317, "step": 6117 }, { "epoch": 0.6355043107925625, "grad_norm": 0.4202265441417694, "learning_rate": 2.9352109332322468e-05, "loss": 1.652, "step": 6118 }, { "epoch": 0.6356081853121429, "grad_norm": 0.40753960609436035, "learning_rate": 2.9337250112096016e-05, "loss": 1.6144, "step": 6119 }, { "epoch": 0.6357120598317233, "grad_norm": 0.39751505851745605, "learning_rate": 2.9322393092291256e-05, "loss": 1.7013, "step": 6120 }, { "epoch": 0.6358159343513037, "grad_norm": 0.4368493854999542, "learning_rate": 2.9307538274490348e-05, "loss": 1.7454, "step": 6121 }, { "epoch": 0.635919808870884, "grad_norm": 0.40719810128211975, "learning_rate": 2.9292685660275204e-05, "loss": 1.7023, "step": 6122 }, { "epoch": 0.6360236833904643, "grad_norm": 0.4638088047504425, "learning_rate": 2.9277835251227514e-05, "loss": 1.7887, "step": 6123 }, { "epoch": 0.6361275579100447, "grad_norm": 0.454287588596344, "learning_rate": 2.9262987048928725e-05, "loss": 1.8606, "step": 6124 }, { "epoch": 0.636231432429625, "grad_norm": 0.3964051902294159, "learning_rate": 2.9248141054960076e-05, "loss": 1.5617, "step": 6125 }, { "epoch": 0.6363353069492054, "grad_norm": 0.39127108454704285, "learning_rate": 2.9233297270902514e-05, "loss": 1.5637, "step": 6126 }, { "epoch": 0.6364391814687858, "grad_norm": 0.4058896005153656, "learning_rate": 2.921845569833681e-05, "loss": 1.6716, "step": 6127 }, { "epoch": 0.636543055988366, "grad_norm": 0.41195571422576904, "learning_rate": 2.9203616338843486e-05, "loss": 1.5734, "step": 6128 }, { "epoch": 0.6366469305079464, "grad_norm": 0.45315390825271606, "learning_rate": 2.9188779194002803e-05, "loss": 1.6421, "step": 6129 }, { "epoch": 0.6367508050275268, "grad_norm": 0.4393003284931183, "learning_rate": 2.917394426539477e-05, "loss": 1.777, "step": 6130 }, { "epoch": 0.6368546795471071, "grad_norm": 0.4733230769634247, "learning_rate": 2.915911155459927e-05, "loss": 1.7351, "step": 6131 }, { "epoch": 0.6369585540666874, "grad_norm": 0.44397008419036865, "learning_rate": 2.914428106319579e-05, "loss": 1.7189, "step": 6132 }, { "epoch": 0.6370624285862678, "grad_norm": 0.4575912654399872, "learning_rate": 2.912945279276371e-05, "loss": 2.0055, "step": 6133 }, { "epoch": 0.6371663031058481, "grad_norm": 0.43883270025253296, "learning_rate": 2.9114626744882128e-05, "loss": 1.8263, "step": 6134 }, { "epoch": 0.6372701776254285, "grad_norm": 0.4244493842124939, "learning_rate": 2.909980292112985e-05, "loss": 1.7515, "step": 6135 }, { "epoch": 0.6373740521450089, "grad_norm": 0.45086610317230225, "learning_rate": 2.9084981323085547e-05, "loss": 1.7123, "step": 6136 }, { "epoch": 0.6374779266645891, "grad_norm": 0.43869614601135254, "learning_rate": 2.90701619523276e-05, "loss": 1.6792, "step": 6137 }, { "epoch": 0.6375818011841695, "grad_norm": 0.43083521723747253, "learning_rate": 2.9055344810434137e-05, "loss": 1.6872, "step": 6138 }, { "epoch": 0.6376856757037499, "grad_norm": 0.5012997984886169, "learning_rate": 2.904052989898308e-05, "loss": 1.908, "step": 6139 }, { "epoch": 0.6377895502233302, "grad_norm": 0.4534330368041992, "learning_rate": 2.90257172195521e-05, "loss": 1.6729, "step": 6140 }, { "epoch": 0.6378934247429106, "grad_norm": 0.43945181369781494, "learning_rate": 2.9010906773718627e-05, "loss": 1.6487, "step": 6141 }, { "epoch": 0.6379972992624909, "grad_norm": 0.40094152092933655, "learning_rate": 2.899609856305985e-05, "loss": 1.508, "step": 6142 }, { "epoch": 0.6381011737820712, "grad_norm": 0.4558815658092499, "learning_rate": 2.8981292589152768e-05, "loss": 1.8111, "step": 6143 }, { "epoch": 0.6382050483016516, "grad_norm": 0.43613293766975403, "learning_rate": 2.8966488853574036e-05, "loss": 1.6826, "step": 6144 }, { "epoch": 0.638308922821232, "grad_norm": 0.43343204259872437, "learning_rate": 2.8951687357900203e-05, "loss": 1.7693, "step": 6145 }, { "epoch": 0.6384127973408122, "grad_norm": 0.45726385712623596, "learning_rate": 2.8936888103707473e-05, "loss": 1.883, "step": 6146 }, { "epoch": 0.6385166718603926, "grad_norm": 0.3765776455402374, "learning_rate": 2.8922091092571862e-05, "loss": 1.5675, "step": 6147 }, { "epoch": 0.638620546379973, "grad_norm": 0.4046104848384857, "learning_rate": 2.8907296326069137e-05, "loss": 1.7282, "step": 6148 }, { "epoch": 0.6387244208995534, "grad_norm": 0.40840303897857666, "learning_rate": 2.8892503805774806e-05, "loss": 1.6623, "step": 6149 }, { "epoch": 0.6388282954191337, "grad_norm": 0.4642704427242279, "learning_rate": 2.8877713533264217e-05, "loss": 1.6275, "step": 6150 }, { "epoch": 0.638932169938714, "grad_norm": 0.41992318630218506, "learning_rate": 2.8862925510112327e-05, "loss": 1.6549, "step": 6151 }, { "epoch": 0.6390360444582944, "grad_norm": 0.4473896324634552, "learning_rate": 2.884813973789402e-05, "loss": 1.7372, "step": 6152 }, { "epoch": 0.6391399189778747, "grad_norm": 0.39671966433525085, "learning_rate": 2.8833356218183843e-05, "loss": 1.5238, "step": 6153 }, { "epoch": 0.6392437934974551, "grad_norm": 0.4355907440185547, "learning_rate": 2.881857495255612e-05, "loss": 1.8191, "step": 6154 }, { "epoch": 0.6393476680170355, "grad_norm": 0.4201485812664032, "learning_rate": 2.8803795942584945e-05, "loss": 1.4469, "step": 6155 }, { "epoch": 0.6394515425366157, "grad_norm": 0.40149399638175964, "learning_rate": 2.8789019189844163e-05, "loss": 1.5771, "step": 6156 }, { "epoch": 0.6395554170561961, "grad_norm": 0.40967410802841187, "learning_rate": 2.877424469590739e-05, "loss": 1.6283, "step": 6157 }, { "epoch": 0.6396592915757765, "grad_norm": 0.40948042273521423, "learning_rate": 2.8759472462347965e-05, "loss": 1.6039, "step": 6158 }, { "epoch": 0.6397631660953568, "grad_norm": 0.4414249658584595, "learning_rate": 2.8744702490739077e-05, "loss": 1.6585, "step": 6159 }, { "epoch": 0.6398670406149372, "grad_norm": 0.39458298683166504, "learning_rate": 2.8729934782653545e-05, "loss": 1.6049, "step": 6160 }, { "epoch": 0.6399709151345175, "grad_norm": 0.4163827896118164, "learning_rate": 2.8715169339664062e-05, "loss": 1.4499, "step": 6161 }, { "epoch": 0.6400747896540978, "grad_norm": 0.4390014708042145, "learning_rate": 2.870040616334303e-05, "loss": 1.7916, "step": 6162 }, { "epoch": 0.6401786641736782, "grad_norm": 0.38799330592155457, "learning_rate": 2.8685645255262562e-05, "loss": 1.6107, "step": 6163 }, { "epoch": 0.6402825386932586, "grad_norm": 0.4088546633720398, "learning_rate": 2.867088661699464e-05, "loss": 1.7303, "step": 6164 }, { "epoch": 0.6403864132128388, "grad_norm": 0.39084142446517944, "learning_rate": 2.8656130250110913e-05, "loss": 1.5763, "step": 6165 }, { "epoch": 0.6404902877324192, "grad_norm": 0.4360373318195343, "learning_rate": 2.864137615618283e-05, "loss": 1.6337, "step": 6166 }, { "epoch": 0.6405941622519996, "grad_norm": 0.39488184452056885, "learning_rate": 2.862662433678158e-05, "loss": 1.5719, "step": 6167 }, { "epoch": 0.6406980367715799, "grad_norm": 0.402828186750412, "learning_rate": 2.861187479347813e-05, "loss": 1.4238, "step": 6168 }, { "epoch": 0.6408019112911603, "grad_norm": 0.48173683881759644, "learning_rate": 2.8597127527843172e-05, "loss": 1.7344, "step": 6169 }, { "epoch": 0.6409057858107406, "grad_norm": 0.4502374529838562, "learning_rate": 2.858238254144716e-05, "loss": 1.8152, "step": 6170 }, { "epoch": 0.6410096603303209, "grad_norm": 0.41789987683296204, "learning_rate": 2.8567639835860394e-05, "loss": 1.612, "step": 6171 }, { "epoch": 0.6411135348499013, "grad_norm": 0.4217549264431, "learning_rate": 2.8552899412652757e-05, "loss": 1.7688, "step": 6172 }, { "epoch": 0.6412174093694817, "grad_norm": 0.41339951753616333, "learning_rate": 2.853816127339406e-05, "loss": 1.779, "step": 6173 }, { "epoch": 0.6413212838890621, "grad_norm": 0.4295401871204376, "learning_rate": 2.8523425419653784e-05, "loss": 1.723, "step": 6174 }, { "epoch": 0.6414251584086423, "grad_norm": 0.44703397154808044, "learning_rate": 2.8508691853001174e-05, "loss": 1.74, "step": 6175 }, { "epoch": 0.6415290329282227, "grad_norm": 0.4364173710346222, "learning_rate": 2.8493960575005242e-05, "loss": 1.8088, "step": 6176 }, { "epoch": 0.6416329074478031, "grad_norm": 0.41096925735473633, "learning_rate": 2.8479231587234734e-05, "loss": 1.6215, "step": 6177 }, { "epoch": 0.6417367819673834, "grad_norm": 0.42458489537239075, "learning_rate": 2.8464504891258237e-05, "loss": 1.6694, "step": 6178 }, { "epoch": 0.6418406564869638, "grad_norm": 0.4292949438095093, "learning_rate": 2.8449780488643944e-05, "loss": 1.8489, "step": 6179 }, { "epoch": 0.6419445310065441, "grad_norm": 0.4265950918197632, "learning_rate": 2.8435058380959956e-05, "loss": 1.691, "step": 6180 }, { "epoch": 0.6420484055261244, "grad_norm": 0.44009000062942505, "learning_rate": 2.842033856977403e-05, "loss": 1.7029, "step": 6181 }, { "epoch": 0.6421522800457048, "grad_norm": 0.46439558267593384, "learning_rate": 2.840562105665372e-05, "loss": 1.7289, "step": 6182 }, { "epoch": 0.6422561545652852, "grad_norm": 0.4735531210899353, "learning_rate": 2.8390905843166326e-05, "loss": 1.8544, "step": 6183 }, { "epoch": 0.6423600290848654, "grad_norm": 0.4040323793888092, "learning_rate": 2.8376192930878898e-05, "loss": 1.6042, "step": 6184 }, { "epoch": 0.6424639036044458, "grad_norm": 0.399845689535141, "learning_rate": 2.8361482321358256e-05, "loss": 1.7571, "step": 6185 }, { "epoch": 0.6425677781240262, "grad_norm": 0.4226529002189636, "learning_rate": 2.834677401617094e-05, "loss": 1.7529, "step": 6186 }, { "epoch": 0.6426716526436065, "grad_norm": 0.45261451601982117, "learning_rate": 2.833206801688333e-05, "loss": 1.7456, "step": 6187 }, { "epoch": 0.6427755271631869, "grad_norm": 0.4298214912414551, "learning_rate": 2.8317364325061412e-05, "loss": 1.5975, "step": 6188 }, { "epoch": 0.6428794016827672, "grad_norm": 0.3926003575325012, "learning_rate": 2.8302662942271098e-05, "loss": 1.6589, "step": 6189 }, { "epoch": 0.6429832762023475, "grad_norm": 0.43051791191101074, "learning_rate": 2.828796387007794e-05, "loss": 1.7522, "step": 6190 }, { "epoch": 0.6430871507219279, "grad_norm": 0.3769807815551758, "learning_rate": 2.8273267110047234e-05, "loss": 1.4194, "step": 6191 }, { "epoch": 0.6431910252415083, "grad_norm": 0.4164545238018036, "learning_rate": 2.8258572663744132e-05, "loss": 1.6241, "step": 6192 }, { "epoch": 0.6432948997610886, "grad_norm": 0.41669386625289917, "learning_rate": 2.824388053273344e-05, "loss": 1.6012, "step": 6193 }, { "epoch": 0.6433987742806689, "grad_norm": 0.3818405568599701, "learning_rate": 2.8229190718579777e-05, "loss": 1.5466, "step": 6194 }, { "epoch": 0.6435026488002493, "grad_norm": 0.3918877840042114, "learning_rate": 2.8214503222847477e-05, "loss": 1.6249, "step": 6195 }, { "epoch": 0.6436065233198296, "grad_norm": 0.45735591650009155, "learning_rate": 2.8199818047100656e-05, "loss": 1.8056, "step": 6196 }, { "epoch": 0.64371039783941, "grad_norm": 0.45919516682624817, "learning_rate": 2.8185135192903163e-05, "loss": 1.9505, "step": 6197 }, { "epoch": 0.6438142723589904, "grad_norm": 0.38972949981689453, "learning_rate": 2.8170454661818583e-05, "loss": 1.4729, "step": 6198 }, { "epoch": 0.6439181468785707, "grad_norm": 0.479105681180954, "learning_rate": 2.815577645541033e-05, "loss": 1.8712, "step": 6199 }, { "epoch": 0.644022021398151, "grad_norm": 0.38174498081207275, "learning_rate": 2.8141100575241498e-05, "loss": 1.5689, "step": 6200 }, { "epoch": 0.6441258959177314, "grad_norm": 0.40346476435661316, "learning_rate": 2.8126427022874948e-05, "loss": 1.7155, "step": 6201 }, { "epoch": 0.6442297704373118, "grad_norm": 0.5447202324867249, "learning_rate": 2.811175579987329e-05, "loss": 1.9476, "step": 6202 }, { "epoch": 0.644333644956892, "grad_norm": 0.4525381326675415, "learning_rate": 2.809708690779891e-05, "loss": 1.6353, "step": 6203 }, { "epoch": 0.6444375194764724, "grad_norm": 0.4123556911945343, "learning_rate": 2.8082420348213922e-05, "loss": 1.5309, "step": 6204 }, { "epoch": 0.6445413939960528, "grad_norm": 0.40919628739356995, "learning_rate": 2.806775612268019e-05, "loss": 1.5944, "step": 6205 }, { "epoch": 0.6446452685156331, "grad_norm": 0.4005577266216278, "learning_rate": 2.8053094232759393e-05, "loss": 1.5079, "step": 6206 }, { "epoch": 0.6447491430352135, "grad_norm": 0.41373926401138306, "learning_rate": 2.8038434680012814e-05, "loss": 1.7176, "step": 6207 }, { "epoch": 0.6448530175547939, "grad_norm": 0.4339597821235657, "learning_rate": 2.8023777466001666e-05, "loss": 1.7649, "step": 6208 }, { "epoch": 0.6449568920743741, "grad_norm": 0.40516406297683716, "learning_rate": 2.8009122592286795e-05, "loss": 1.6787, "step": 6209 }, { "epoch": 0.6450607665939545, "grad_norm": 0.435835599899292, "learning_rate": 2.7994470060428834e-05, "loss": 1.626, "step": 6210 }, { "epoch": 0.6451646411135349, "grad_norm": 0.44071394205093384, "learning_rate": 2.7979819871988166e-05, "loss": 1.6221, "step": 6211 }, { "epoch": 0.6452685156331152, "grad_norm": 0.40906620025634766, "learning_rate": 2.796517202852491e-05, "loss": 1.6874, "step": 6212 }, { "epoch": 0.6453723901526955, "grad_norm": 0.4259955585002899, "learning_rate": 2.795052653159896e-05, "loss": 1.6113, "step": 6213 }, { "epoch": 0.6454762646722759, "grad_norm": 0.41644105315208435, "learning_rate": 2.7935883382769918e-05, "loss": 1.5794, "step": 6214 }, { "epoch": 0.6455801391918562, "grad_norm": 0.42759883403778076, "learning_rate": 2.7921242583597228e-05, "loss": 1.716, "step": 6215 }, { "epoch": 0.6456840137114366, "grad_norm": 0.4132600426673889, "learning_rate": 2.7906604135639945e-05, "loss": 1.6549, "step": 6216 }, { "epoch": 0.645787888231017, "grad_norm": 0.42022904753685, "learning_rate": 2.7891968040457007e-05, "loss": 1.7255, "step": 6217 }, { "epoch": 0.6458917627505972, "grad_norm": 0.5371679663658142, "learning_rate": 2.7877334299607037e-05, "loss": 1.993, "step": 6218 }, { "epoch": 0.6459956372701776, "grad_norm": 0.4094901978969574, "learning_rate": 2.7862702914648364e-05, "loss": 1.6315, "step": 6219 }, { "epoch": 0.646099511789758, "grad_norm": 0.3900490999221802, "learning_rate": 2.784807388713918e-05, "loss": 1.521, "step": 6220 }, { "epoch": 0.6462033863093383, "grad_norm": 0.4084405303001404, "learning_rate": 2.7833447218637332e-05, "loss": 1.6055, "step": 6221 }, { "epoch": 0.6463072608289187, "grad_norm": 0.38741302490234375, "learning_rate": 2.7818822910700442e-05, "loss": 1.6364, "step": 6222 }, { "epoch": 0.646411135348499, "grad_norm": 0.443694531917572, "learning_rate": 2.7804200964885897e-05, "loss": 1.6882, "step": 6223 }, { "epoch": 0.6465150098680794, "grad_norm": 0.47792142629623413, "learning_rate": 2.7789581382750794e-05, "loss": 1.6535, "step": 6224 }, { "epoch": 0.6466188843876597, "grad_norm": 0.40224647521972656, "learning_rate": 2.777496416585207e-05, "loss": 1.5526, "step": 6225 }, { "epoch": 0.6467227589072401, "grad_norm": 0.4233601987361908, "learning_rate": 2.776034931574626e-05, "loss": 1.6387, "step": 6226 }, { "epoch": 0.6468266334268205, "grad_norm": 0.4132138788700104, "learning_rate": 2.7745736833989788e-05, "loss": 1.7418, "step": 6227 }, { "epoch": 0.6469305079464007, "grad_norm": 0.4106374979019165, "learning_rate": 2.7731126722138756e-05, "loss": 1.7758, "step": 6228 }, { "epoch": 0.6470343824659811, "grad_norm": 0.40899068117141724, "learning_rate": 2.7716518981749022e-05, "loss": 1.7876, "step": 6229 }, { "epoch": 0.6471382569855615, "grad_norm": 0.4388611614704132, "learning_rate": 2.77019136143762e-05, "loss": 1.5814, "step": 6230 }, { "epoch": 0.6472421315051418, "grad_norm": 0.4218350946903229, "learning_rate": 2.7687310621575646e-05, "loss": 1.6095, "step": 6231 }, { "epoch": 0.6473460060247221, "grad_norm": 0.43606144189834595, "learning_rate": 2.767271000490247e-05, "loss": 1.5995, "step": 6232 }, { "epoch": 0.6474498805443025, "grad_norm": 0.4526714086532593, "learning_rate": 2.7658111765911486e-05, "loss": 1.8424, "step": 6233 }, { "epoch": 0.6475537550638828, "grad_norm": 0.43624788522720337, "learning_rate": 2.764351590615738e-05, "loss": 1.544, "step": 6234 }, { "epoch": 0.6476576295834632, "grad_norm": 0.409190833568573, "learning_rate": 2.762892242719439e-05, "loss": 1.6898, "step": 6235 }, { "epoch": 0.6477615041030436, "grad_norm": 0.40407395362854004, "learning_rate": 2.7614331330576694e-05, "loss": 1.4979, "step": 6236 }, { "epoch": 0.6478653786226238, "grad_norm": 0.4525948464870453, "learning_rate": 2.7599742617858088e-05, "loss": 1.8237, "step": 6237 }, { "epoch": 0.6479692531422042, "grad_norm": 0.44162291288375854, "learning_rate": 2.7585156290592164e-05, "loss": 1.482, "step": 6238 }, { "epoch": 0.6480731276617846, "grad_norm": 0.44789817929267883, "learning_rate": 2.7570572350332258e-05, "loss": 1.7348, "step": 6239 }, { "epoch": 0.6481770021813649, "grad_norm": 0.42044568061828613, "learning_rate": 2.7555990798631436e-05, "loss": 1.6357, "step": 6240 }, { "epoch": 0.6482808767009453, "grad_norm": 0.4480580985546112, "learning_rate": 2.7541411637042526e-05, "loss": 1.8034, "step": 6241 }, { "epoch": 0.6483847512205256, "grad_norm": 0.4233933389186859, "learning_rate": 2.752683486711807e-05, "loss": 1.6212, "step": 6242 }, { "epoch": 0.6484886257401059, "grad_norm": 0.38887619972229004, "learning_rate": 2.7512260490410448e-05, "loss": 1.5956, "step": 6243 }, { "epoch": 0.6485925002596863, "grad_norm": 0.3876674771308899, "learning_rate": 2.7497688508471625e-05, "loss": 1.5114, "step": 6244 }, { "epoch": 0.6486963747792667, "grad_norm": 0.4232475459575653, "learning_rate": 2.748311892285348e-05, "loss": 1.4739, "step": 6245 }, { "epoch": 0.6488002492988469, "grad_norm": 0.4148775041103363, "learning_rate": 2.746855173510754e-05, "loss": 1.5095, "step": 6246 }, { "epoch": 0.6489041238184273, "grad_norm": 0.4203701913356781, "learning_rate": 2.745398694678506e-05, "loss": 1.8714, "step": 6247 }, { "epoch": 0.6490079983380077, "grad_norm": 0.424444317817688, "learning_rate": 2.7439424559437122e-05, "loss": 1.7226, "step": 6248 }, { "epoch": 0.649111872857588, "grad_norm": 0.3754339814186096, "learning_rate": 2.7424864574614484e-05, "loss": 1.5442, "step": 6249 }, { "epoch": 0.6492157473771684, "grad_norm": 0.506229043006897, "learning_rate": 2.7410306993867675e-05, "loss": 1.713, "step": 6250 }, { "epoch": 0.6493196218967487, "grad_norm": 0.43399345874786377, "learning_rate": 2.7395751818746972e-05, "loss": 1.6123, "step": 6251 }, { "epoch": 0.6494234964163291, "grad_norm": 0.40855225920677185, "learning_rate": 2.738119905080235e-05, "loss": 1.642, "step": 6252 }, { "epoch": 0.6495273709359094, "grad_norm": 0.3835819959640503, "learning_rate": 2.7366648691583646e-05, "loss": 1.4608, "step": 6253 }, { "epoch": 0.6496312454554898, "grad_norm": 0.41114479303359985, "learning_rate": 2.735210074264027e-05, "loss": 1.5948, "step": 6254 }, { "epoch": 0.6497351199750702, "grad_norm": 0.45714348554611206, "learning_rate": 2.7337555205521514e-05, "loss": 1.5191, "step": 6255 }, { "epoch": 0.6498389944946504, "grad_norm": 0.4060697853565216, "learning_rate": 2.7323012081776368e-05, "loss": 1.5952, "step": 6256 }, { "epoch": 0.6499428690142308, "grad_norm": 0.4719262421131134, "learning_rate": 2.7308471372953538e-05, "loss": 1.644, "step": 6257 }, { "epoch": 0.6500467435338112, "grad_norm": 0.40513306856155396, "learning_rate": 2.7293933080601507e-05, "loss": 1.7363, "step": 6258 }, { "epoch": 0.6501506180533915, "grad_norm": 0.4476475417613983, "learning_rate": 2.727939720626849e-05, "loss": 1.7536, "step": 6259 }, { "epoch": 0.6502544925729719, "grad_norm": 0.44684842228889465, "learning_rate": 2.7264863751502446e-05, "loss": 1.8819, "step": 6260 }, { "epoch": 0.6503583670925522, "grad_norm": 0.40098902583122253, "learning_rate": 2.725033271785105e-05, "loss": 1.6615, "step": 6261 }, { "epoch": 0.6504622416121325, "grad_norm": 0.4347749948501587, "learning_rate": 2.7235804106861807e-05, "loss": 1.5312, "step": 6262 }, { "epoch": 0.6505661161317129, "grad_norm": 0.4464839994907379, "learning_rate": 2.7221277920081818e-05, "loss": 1.8898, "step": 6263 }, { "epoch": 0.6506699906512933, "grad_norm": 0.4316708743572235, "learning_rate": 2.720675415905807e-05, "loss": 1.779, "step": 6264 }, { "epoch": 0.6507738651708735, "grad_norm": 0.42350926995277405, "learning_rate": 2.7192232825337206e-05, "loss": 1.6765, "step": 6265 }, { "epoch": 0.6508777396904539, "grad_norm": 0.39864397048950195, "learning_rate": 2.7177713920465643e-05, "loss": 1.631, "step": 6266 }, { "epoch": 0.6509816142100343, "grad_norm": 0.40547481179237366, "learning_rate": 2.7163197445989518e-05, "loss": 1.5956, "step": 6267 }, { "epoch": 0.6510854887296146, "grad_norm": 0.4294564127922058, "learning_rate": 2.7148683403454746e-05, "loss": 1.5979, "step": 6268 }, { "epoch": 0.651189363249195, "grad_norm": 0.41150328516960144, "learning_rate": 2.7134171794406932e-05, "loss": 1.546, "step": 6269 }, { "epoch": 0.6512932377687753, "grad_norm": 0.4225398004055023, "learning_rate": 2.711966262039145e-05, "loss": 1.7254, "step": 6270 }, { "epoch": 0.6513971122883556, "grad_norm": 0.42999348044395447, "learning_rate": 2.710515588295346e-05, "loss": 1.7585, "step": 6271 }, { "epoch": 0.651500986807936, "grad_norm": 0.39667728543281555, "learning_rate": 2.709065158363774e-05, "loss": 1.5776, "step": 6272 }, { "epoch": 0.6516048613275164, "grad_norm": 0.41493138670921326, "learning_rate": 2.7076149723988954e-05, "loss": 1.7011, "step": 6273 }, { "epoch": 0.6517087358470967, "grad_norm": 0.45391151309013367, "learning_rate": 2.7061650305551424e-05, "loss": 1.7816, "step": 6274 }, { "epoch": 0.651812610366677, "grad_norm": 0.4255356788635254, "learning_rate": 2.7047153329869167e-05, "loss": 1.8013, "step": 6275 }, { "epoch": 0.6519164848862574, "grad_norm": 0.3899310827255249, "learning_rate": 2.7032658798486072e-05, "loss": 1.4257, "step": 6276 }, { "epoch": 0.6520203594058378, "grad_norm": 0.4353606402873993, "learning_rate": 2.7018166712945635e-05, "loss": 1.732, "step": 6277 }, { "epoch": 0.6521242339254181, "grad_norm": 0.4526216685771942, "learning_rate": 2.700367707479122e-05, "loss": 1.7406, "step": 6278 }, { "epoch": 0.6522281084449985, "grad_norm": 0.44765564799308777, "learning_rate": 2.6989189885565803e-05, "loss": 1.7469, "step": 6279 }, { "epoch": 0.6523319829645788, "grad_norm": 0.4167086184024811, "learning_rate": 2.697470514681214e-05, "loss": 1.6946, "step": 6280 }, { "epoch": 0.6524358574841591, "grad_norm": 0.40468063950538635, "learning_rate": 2.6960222860072826e-05, "loss": 1.5709, "step": 6281 }, { "epoch": 0.6525397320037395, "grad_norm": 0.411888062953949, "learning_rate": 2.6945743026890012e-05, "loss": 1.4393, "step": 6282 }, { "epoch": 0.6526436065233199, "grad_norm": 0.41102951765060425, "learning_rate": 2.693126564880576e-05, "loss": 1.68, "step": 6283 }, { "epoch": 0.6527474810429001, "grad_norm": 0.43322500586509705, "learning_rate": 2.6916790727361773e-05, "loss": 1.6189, "step": 6284 }, { "epoch": 0.6528513555624805, "grad_norm": 0.47928208112716675, "learning_rate": 2.6902318264099517e-05, "loss": 1.8985, "step": 6285 }, { "epoch": 0.6529552300820609, "grad_norm": 0.4368571639060974, "learning_rate": 2.68878482605602e-05, "loss": 1.7447, "step": 6286 }, { "epoch": 0.6530591046016412, "grad_norm": 0.39063751697540283, "learning_rate": 2.6873380718284757e-05, "loss": 1.5522, "step": 6287 }, { "epoch": 0.6531629791212216, "grad_norm": 0.4288500249385834, "learning_rate": 2.6858915638813874e-05, "loss": 1.8171, "step": 6288 }, { "epoch": 0.653266853640802, "grad_norm": 0.42578479647636414, "learning_rate": 2.6844453023687956e-05, "loss": 1.5586, "step": 6289 }, { "epoch": 0.6533707281603822, "grad_norm": 0.43467986583709717, "learning_rate": 2.6829992874447196e-05, "loss": 1.8163, "step": 6290 }, { "epoch": 0.6534746026799626, "grad_norm": 0.39412596821784973, "learning_rate": 2.6815535192631437e-05, "loss": 1.4798, "step": 6291 }, { "epoch": 0.653578477199543, "grad_norm": 0.40805160999298096, "learning_rate": 2.6801079979780343e-05, "loss": 1.7423, "step": 6292 }, { "epoch": 0.6536823517191233, "grad_norm": 0.4715184271335602, "learning_rate": 2.6786627237433275e-05, "loss": 1.7352, "step": 6293 }, { "epoch": 0.6537862262387036, "grad_norm": 0.469553142786026, "learning_rate": 2.6772176967129336e-05, "loss": 1.892, "step": 6294 }, { "epoch": 0.653890100758284, "grad_norm": 0.45803409814834595, "learning_rate": 2.6757729170407368e-05, "loss": 1.7635, "step": 6295 }, { "epoch": 0.6539939752778643, "grad_norm": 0.4040502607822418, "learning_rate": 2.6743283848805945e-05, "loss": 1.7023, "step": 6296 }, { "epoch": 0.6540978497974447, "grad_norm": 0.4045460522174835, "learning_rate": 2.672884100386338e-05, "loss": 1.6528, "step": 6297 }, { "epoch": 0.6542017243170251, "grad_norm": 0.44102489948272705, "learning_rate": 2.6714400637117708e-05, "loss": 1.7864, "step": 6298 }, { "epoch": 0.6543055988366053, "grad_norm": 0.43990832567214966, "learning_rate": 2.669996275010678e-05, "loss": 1.7531, "step": 6299 }, { "epoch": 0.6544094733561857, "grad_norm": 0.46511977910995483, "learning_rate": 2.6685527344368022e-05, "loss": 1.7993, "step": 6300 }, { "epoch": 0.6545133478757661, "grad_norm": 0.40895703434944153, "learning_rate": 2.667109442143877e-05, "loss": 1.7012, "step": 6301 }, { "epoch": 0.6546172223953465, "grad_norm": 0.41711392998695374, "learning_rate": 2.665666398285599e-05, "loss": 1.6973, "step": 6302 }, { "epoch": 0.6547210969149267, "grad_norm": 0.44164448976516724, "learning_rate": 2.6642236030156405e-05, "loss": 1.6828, "step": 6303 }, { "epoch": 0.6548249714345071, "grad_norm": 0.45560404658317566, "learning_rate": 2.6627810564876488e-05, "loss": 1.7191, "step": 6304 }, { "epoch": 0.6549288459540875, "grad_norm": 0.40289151668548584, "learning_rate": 2.6613387588552406e-05, "loss": 1.6014, "step": 6305 }, { "epoch": 0.6550327204736678, "grad_norm": 0.44911909103393555, "learning_rate": 2.6598967102720174e-05, "loss": 1.794, "step": 6306 }, { "epoch": 0.6551365949932482, "grad_norm": 0.4513666331768036, "learning_rate": 2.658454910891539e-05, "loss": 1.697, "step": 6307 }, { "epoch": 0.6552404695128285, "grad_norm": 0.4077112674713135, "learning_rate": 2.657013360867346e-05, "loss": 1.5579, "step": 6308 }, { "epoch": 0.6553443440324088, "grad_norm": 0.4188466966152191, "learning_rate": 2.6555720603529578e-05, "loss": 1.61, "step": 6309 }, { "epoch": 0.6554482185519892, "grad_norm": 0.38735076785087585, "learning_rate": 2.6541310095018533e-05, "loss": 1.4753, "step": 6310 }, { "epoch": 0.6555520930715696, "grad_norm": 0.4412975609302521, "learning_rate": 2.6526902084675008e-05, "loss": 1.7289, "step": 6311 }, { "epoch": 0.6556559675911499, "grad_norm": 0.3914285898208618, "learning_rate": 2.6512496574033307e-05, "loss": 1.5964, "step": 6312 }, { "epoch": 0.6557598421107302, "grad_norm": 0.4345444142818451, "learning_rate": 2.6498093564627515e-05, "loss": 1.5983, "step": 6313 }, { "epoch": 0.6558637166303106, "grad_norm": 0.4521616995334625, "learning_rate": 2.6483693057991433e-05, "loss": 1.719, "step": 6314 }, { "epoch": 0.6559675911498909, "grad_norm": 0.465675413608551, "learning_rate": 2.646929505565861e-05, "loss": 1.7625, "step": 6315 }, { "epoch": 0.6560714656694713, "grad_norm": 0.4356716573238373, "learning_rate": 2.6454899559162316e-05, "loss": 1.6035, "step": 6316 }, { "epoch": 0.6561753401890517, "grad_norm": 0.3972066640853882, "learning_rate": 2.644050657003554e-05, "loss": 1.8204, "step": 6317 }, { "epoch": 0.6562792147086319, "grad_norm": 0.40507668256759644, "learning_rate": 2.642611608981108e-05, "loss": 1.5586, "step": 6318 }, { "epoch": 0.6563830892282123, "grad_norm": 0.4741031229496002, "learning_rate": 2.641172812002134e-05, "loss": 1.9185, "step": 6319 }, { "epoch": 0.6564869637477927, "grad_norm": 0.4094397723674774, "learning_rate": 2.6397342662198578e-05, "loss": 1.7196, "step": 6320 }, { "epoch": 0.656590838267373, "grad_norm": 0.43364858627319336, "learning_rate": 2.638295971787471e-05, "loss": 1.7506, "step": 6321 }, { "epoch": 0.6566947127869533, "grad_norm": 0.43040645122528076, "learning_rate": 2.6368579288581414e-05, "loss": 1.5984, "step": 6322 }, { "epoch": 0.6567985873065337, "grad_norm": 0.41142383217811584, "learning_rate": 2.635420137585009e-05, "loss": 1.5823, "step": 6323 }, { "epoch": 0.656902461826114, "grad_norm": 0.4649263918399811, "learning_rate": 2.6339825981211873e-05, "loss": 1.8616, "step": 6324 }, { "epoch": 0.6570063363456944, "grad_norm": 0.42940422892570496, "learning_rate": 2.632545310619764e-05, "loss": 1.4848, "step": 6325 }, { "epoch": 0.6571102108652748, "grad_norm": 0.4017314016819, "learning_rate": 2.6311082752337957e-05, "loss": 1.4897, "step": 6326 }, { "epoch": 0.657214085384855, "grad_norm": 0.4207054674625397, "learning_rate": 2.6296714921163224e-05, "loss": 1.5465, "step": 6327 }, { "epoch": 0.6573179599044354, "grad_norm": 0.3997343182563782, "learning_rate": 2.6282349614203415e-05, "loss": 1.4289, "step": 6328 }, { "epoch": 0.6574218344240158, "grad_norm": 0.43719449639320374, "learning_rate": 2.626798683298839e-05, "loss": 1.64, "step": 6329 }, { "epoch": 0.6575257089435962, "grad_norm": 0.47477173805236816, "learning_rate": 2.6253626579047654e-05, "loss": 1.4736, "step": 6330 }, { "epoch": 0.6576295834631765, "grad_norm": 0.39647629857063293, "learning_rate": 2.6239268853910452e-05, "loss": 1.5444, "step": 6331 }, { "epoch": 0.6577334579827568, "grad_norm": 0.3856428563594818, "learning_rate": 2.6224913659105775e-05, "loss": 1.5782, "step": 6332 }, { "epoch": 0.6578373325023372, "grad_norm": 0.41423386335372925, "learning_rate": 2.621056099616233e-05, "loss": 1.5784, "step": 6333 }, { "epoch": 0.6579412070219175, "grad_norm": 0.38329392671585083, "learning_rate": 2.6196210866608606e-05, "loss": 1.6442, "step": 6334 }, { "epoch": 0.6580450815414979, "grad_norm": 0.39722350239753723, "learning_rate": 2.6181863271972735e-05, "loss": 1.6947, "step": 6335 }, { "epoch": 0.6581489560610783, "grad_norm": 0.4223669469356537, "learning_rate": 2.616751821378261e-05, "loss": 1.6055, "step": 6336 }, { "epoch": 0.6582528305806585, "grad_norm": 0.39463213086128235, "learning_rate": 2.6153175693565933e-05, "loss": 1.5574, "step": 6337 }, { "epoch": 0.6583567051002389, "grad_norm": 0.40961316227912903, "learning_rate": 2.6138835712850003e-05, "loss": 1.6478, "step": 6338 }, { "epoch": 0.6584605796198193, "grad_norm": 0.39676934480667114, "learning_rate": 2.6124498273161957e-05, "loss": 1.6545, "step": 6339 }, { "epoch": 0.6585644541393996, "grad_norm": 0.397554874420166, "learning_rate": 2.6110163376028613e-05, "loss": 1.5861, "step": 6340 }, { "epoch": 0.65866832865898, "grad_norm": 0.40185415744781494, "learning_rate": 2.6095831022976514e-05, "loss": 1.6366, "step": 6341 }, { "epoch": 0.6587722031785603, "grad_norm": 0.3959345519542694, "learning_rate": 2.6081501215531955e-05, "loss": 1.6307, "step": 6342 }, { "epoch": 0.6588760776981406, "grad_norm": 0.41983965039253235, "learning_rate": 2.606717395522095e-05, "loss": 1.6515, "step": 6343 }, { "epoch": 0.658979952217721, "grad_norm": 0.41705140471458435, "learning_rate": 2.6052849243569233e-05, "loss": 1.6791, "step": 6344 }, { "epoch": 0.6590838267373014, "grad_norm": 0.5571948885917664, "learning_rate": 2.603852708210226e-05, "loss": 1.7317, "step": 6345 }, { "epoch": 0.6591877012568816, "grad_norm": 0.39944010972976685, "learning_rate": 2.6024207472345273e-05, "loss": 1.6033, "step": 6346 }, { "epoch": 0.659291575776462, "grad_norm": 0.39192959666252136, "learning_rate": 2.6009890415823147e-05, "loss": 1.6101, "step": 6347 }, { "epoch": 0.6593954502960424, "grad_norm": 0.41190558671951294, "learning_rate": 2.5995575914060576e-05, "loss": 1.5903, "step": 6348 }, { "epoch": 0.6594993248156227, "grad_norm": 0.39744171500205994, "learning_rate": 2.5981263968581925e-05, "loss": 1.663, "step": 6349 }, { "epoch": 0.6596031993352031, "grad_norm": 0.4570941925048828, "learning_rate": 2.5966954580911314e-05, "loss": 1.5257, "step": 6350 }, { "epoch": 0.6597070738547834, "grad_norm": 0.43954119086265564, "learning_rate": 2.5952647752572566e-05, "loss": 1.8657, "step": 6351 }, { "epoch": 0.6598109483743637, "grad_norm": 0.4737524390220642, "learning_rate": 2.5938343485089267e-05, "loss": 1.9287, "step": 6352 }, { "epoch": 0.6599148228939441, "grad_norm": 0.4555112421512604, "learning_rate": 2.5924041779984692e-05, "loss": 1.7726, "step": 6353 }, { "epoch": 0.6600186974135245, "grad_norm": 0.4156988561153412, "learning_rate": 2.5909742638781852e-05, "loss": 1.6406, "step": 6354 }, { "epoch": 0.6601225719331049, "grad_norm": 0.45365050435066223, "learning_rate": 2.5895446063003526e-05, "loss": 1.6795, "step": 6355 }, { "epoch": 0.6602264464526851, "grad_norm": 0.5357591509819031, "learning_rate": 2.5881152054172193e-05, "loss": 1.6828, "step": 6356 }, { "epoch": 0.6603303209722655, "grad_norm": 0.422297865152359, "learning_rate": 2.586686061380999e-05, "loss": 1.6548, "step": 6357 }, { "epoch": 0.6604341954918459, "grad_norm": 0.40216442942619324, "learning_rate": 2.5852571743438908e-05, "loss": 1.5247, "step": 6358 }, { "epoch": 0.6605380700114262, "grad_norm": 0.4673736095428467, "learning_rate": 2.583828544458058e-05, "loss": 1.6892, "step": 6359 }, { "epoch": 0.6606419445310066, "grad_norm": 0.42048928141593933, "learning_rate": 2.582400171875638e-05, "loss": 1.6188, "step": 6360 }, { "epoch": 0.6607458190505869, "grad_norm": 0.4226718246936798, "learning_rate": 2.5809720567487395e-05, "loss": 1.4864, "step": 6361 }, { "epoch": 0.6608496935701672, "grad_norm": 0.46937909722328186, "learning_rate": 2.5795441992294523e-05, "loss": 1.7432, "step": 6362 }, { "epoch": 0.6609535680897476, "grad_norm": 0.43607574701309204, "learning_rate": 2.578116599469825e-05, "loss": 1.8233, "step": 6363 }, { "epoch": 0.661057442609328, "grad_norm": 0.4324806034564972, "learning_rate": 2.576689257621887e-05, "loss": 1.7842, "step": 6364 }, { "epoch": 0.6611613171289082, "grad_norm": 0.3860158920288086, "learning_rate": 2.5752621738376447e-05, "loss": 1.4591, "step": 6365 }, { "epoch": 0.6612651916484886, "grad_norm": 0.3831734359264374, "learning_rate": 2.5738353482690624e-05, "loss": 1.527, "step": 6366 }, { "epoch": 0.661369066168069, "grad_norm": 0.41983523964881897, "learning_rate": 2.5724087810680937e-05, "loss": 1.7582, "step": 6367 }, { "epoch": 0.6614729406876493, "grad_norm": 0.4340680241584778, "learning_rate": 2.570982472386653e-05, "loss": 1.7625, "step": 6368 }, { "epoch": 0.6615768152072297, "grad_norm": 0.4355176091194153, "learning_rate": 2.569556422376632e-05, "loss": 1.6903, "step": 6369 }, { "epoch": 0.66168068972681, "grad_norm": 0.39546066522598267, "learning_rate": 2.5681306311898945e-05, "loss": 1.6276, "step": 6370 }, { "epoch": 0.6617845642463903, "grad_norm": 0.40482446551322937, "learning_rate": 2.566705098978275e-05, "loss": 1.7003, "step": 6371 }, { "epoch": 0.6618884387659707, "grad_norm": 0.4112469255924225, "learning_rate": 2.5652798258935825e-05, "loss": 1.6817, "step": 6372 }, { "epoch": 0.6619923132855511, "grad_norm": 0.4560087025165558, "learning_rate": 2.5638548120875945e-05, "loss": 1.7853, "step": 6373 }, { "epoch": 0.6620961878051314, "grad_norm": 0.4203476905822754, "learning_rate": 2.5624300577120703e-05, "loss": 1.7259, "step": 6374 }, { "epoch": 0.6622000623247117, "grad_norm": 0.4099465608596802, "learning_rate": 2.5610055629187275e-05, "loss": 1.6924, "step": 6375 }, { "epoch": 0.6623039368442921, "grad_norm": 0.4466152489185333, "learning_rate": 2.559581327859269e-05, "loss": 1.8033, "step": 6376 }, { "epoch": 0.6624078113638724, "grad_norm": 0.487020343542099, "learning_rate": 2.558157352685363e-05, "loss": 1.9298, "step": 6377 }, { "epoch": 0.6625116858834528, "grad_norm": 0.5030232071876526, "learning_rate": 2.5567336375486528e-05, "loss": 1.6374, "step": 6378 }, { "epoch": 0.6626155604030332, "grad_norm": 0.4117771089076996, "learning_rate": 2.555310182600751e-05, "loss": 1.6823, "step": 6379 }, { "epoch": 0.6627194349226135, "grad_norm": 0.5117984414100647, "learning_rate": 2.5538869879932438e-05, "loss": 1.7883, "step": 6380 }, { "epoch": 0.6628233094421938, "grad_norm": 0.4503713548183441, "learning_rate": 2.5524640538776955e-05, "loss": 1.8058, "step": 6381 }, { "epoch": 0.6629271839617742, "grad_norm": 0.4063549041748047, "learning_rate": 2.551041380405631e-05, "loss": 1.6395, "step": 6382 }, { "epoch": 0.6630310584813546, "grad_norm": 0.4199891984462738, "learning_rate": 2.5496189677285574e-05, "loss": 1.6788, "step": 6383 }, { "epoch": 0.6631349330009348, "grad_norm": 0.45948025584220886, "learning_rate": 2.548196815997953e-05, "loss": 1.8964, "step": 6384 }, { "epoch": 0.6632388075205152, "grad_norm": 0.4091807007789612, "learning_rate": 2.546774925365259e-05, "loss": 1.7551, "step": 6385 }, { "epoch": 0.6633426820400956, "grad_norm": 0.3907213807106018, "learning_rate": 2.5453532959819017e-05, "loss": 1.6075, "step": 6386 }, { "epoch": 0.6634465565596759, "grad_norm": 0.4916064143180847, "learning_rate": 2.5439319279992708e-05, "loss": 1.8376, "step": 6387 }, { "epoch": 0.6635504310792563, "grad_norm": 0.3837120532989502, "learning_rate": 2.5425108215687314e-05, "loss": 1.6351, "step": 6388 }, { "epoch": 0.6636543055988366, "grad_norm": 0.4401141405105591, "learning_rate": 2.5410899768416186e-05, "loss": 1.7869, "step": 6389 }, { "epoch": 0.6637581801184169, "grad_norm": 0.4023844599723816, "learning_rate": 2.5396693939692474e-05, "loss": 1.5495, "step": 6390 }, { "epoch": 0.6638620546379973, "grad_norm": 0.41856905817985535, "learning_rate": 2.5382490731028918e-05, "loss": 1.641, "step": 6391 }, { "epoch": 0.6639659291575777, "grad_norm": 0.40514466166496277, "learning_rate": 2.5368290143938062e-05, "loss": 1.7506, "step": 6392 }, { "epoch": 0.664069803677158, "grad_norm": 0.4336822032928467, "learning_rate": 2.5354092179932204e-05, "loss": 1.7111, "step": 6393 }, { "epoch": 0.6641736781967383, "grad_norm": 0.4441390037536621, "learning_rate": 2.533989684052325e-05, "loss": 1.6557, "step": 6394 }, { "epoch": 0.6642775527163187, "grad_norm": 0.4061375558376312, "learning_rate": 2.5325704127222937e-05, "loss": 1.5773, "step": 6395 }, { "epoch": 0.664381427235899, "grad_norm": 0.4289427101612091, "learning_rate": 2.5311514041542678e-05, "loss": 1.8766, "step": 6396 }, { "epoch": 0.6644853017554794, "grad_norm": 0.40495064854621887, "learning_rate": 2.529732658499359e-05, "loss": 1.582, "step": 6397 }, { "epoch": 0.6645891762750598, "grad_norm": 0.4054896831512451, "learning_rate": 2.5283141759086537e-05, "loss": 1.6379, "step": 6398 }, { "epoch": 0.66469305079464, "grad_norm": 0.3749221861362457, "learning_rate": 2.5268959565332086e-05, "loss": 1.4214, "step": 6399 }, { "epoch": 0.6647969253142204, "grad_norm": 0.3932819068431854, "learning_rate": 2.525478000524053e-05, "loss": 1.6518, "step": 6400 }, { "epoch": 0.6649007998338008, "grad_norm": 0.4017762541770935, "learning_rate": 2.5240603080321863e-05, "loss": 1.578, "step": 6401 }, { "epoch": 0.6650046743533811, "grad_norm": 0.47562339901924133, "learning_rate": 2.5226428792085875e-05, "loss": 2.0138, "step": 6402 }, { "epoch": 0.6651085488729614, "grad_norm": 0.407149076461792, "learning_rate": 2.5212257142041938e-05, "loss": 1.5924, "step": 6403 }, { "epoch": 0.6652124233925418, "grad_norm": 0.44227856397628784, "learning_rate": 2.5198088131699294e-05, "loss": 1.7202, "step": 6404 }, { "epoch": 0.6653162979121221, "grad_norm": 0.44379758834838867, "learning_rate": 2.5183921762566797e-05, "loss": 1.8716, "step": 6405 }, { "epoch": 0.6654201724317025, "grad_norm": 0.42266109585762024, "learning_rate": 2.5169758036153056e-05, "loss": 1.6601, "step": 6406 }, { "epoch": 0.6655240469512829, "grad_norm": 0.5245758295059204, "learning_rate": 2.5155596953966397e-05, "loss": 1.7759, "step": 6407 }, { "epoch": 0.6656279214708632, "grad_norm": 0.3796195089817047, "learning_rate": 2.514143851751486e-05, "loss": 1.4598, "step": 6408 }, { "epoch": 0.6657317959904435, "grad_norm": 0.38661640882492065, "learning_rate": 2.512728272830625e-05, "loss": 1.5706, "step": 6409 }, { "epoch": 0.6658356705100239, "grad_norm": 0.41082271933555603, "learning_rate": 2.511312958784797e-05, "loss": 1.7148, "step": 6410 }, { "epoch": 0.6659395450296043, "grad_norm": 0.42262521386146545, "learning_rate": 2.5098979097647297e-05, "loss": 1.561, "step": 6411 }, { "epoch": 0.6660434195491846, "grad_norm": 0.3816385865211487, "learning_rate": 2.508483125921113e-05, "loss": 1.602, "step": 6412 }, { "epoch": 0.6661472940687649, "grad_norm": 0.47888967394828796, "learning_rate": 2.507068607404604e-05, "loss": 1.9113, "step": 6413 }, { "epoch": 0.6662511685883453, "grad_norm": 0.47457215189933777, "learning_rate": 2.5056543543658452e-05, "loss": 1.5093, "step": 6414 }, { "epoch": 0.6663550431079256, "grad_norm": 0.3893379867076874, "learning_rate": 2.5042403669554416e-05, "loss": 1.5901, "step": 6415 }, { "epoch": 0.666458917627506, "grad_norm": 0.4125143587589264, "learning_rate": 2.5028266453239703e-05, "loss": 1.6606, "step": 6416 }, { "epoch": 0.6665627921470864, "grad_norm": 0.3965260088443756, "learning_rate": 2.501413189621983e-05, "loss": 1.6371, "step": 6417 }, { "epoch": 0.6666666666666666, "grad_norm": 0.4429340660572052, "learning_rate": 2.500000000000001e-05, "loss": 1.7512, "step": 6418 }, { "epoch": 0.666770541186247, "grad_norm": 0.471049427986145, "learning_rate": 2.4985870766085185e-05, "loss": 1.6877, "step": 6419 }, { "epoch": 0.6668744157058274, "grad_norm": 0.4106440544128418, "learning_rate": 2.4971744195979985e-05, "loss": 1.6417, "step": 6420 }, { "epoch": 0.6669782902254077, "grad_norm": 0.4270106554031372, "learning_rate": 2.4957620291188844e-05, "loss": 1.7746, "step": 6421 }, { "epoch": 0.667082164744988, "grad_norm": 0.39720797538757324, "learning_rate": 2.4943499053215756e-05, "loss": 1.5985, "step": 6422 }, { "epoch": 0.6671860392645684, "grad_norm": 0.43498122692108154, "learning_rate": 2.4929380483564602e-05, "loss": 1.7645, "step": 6423 }, { "epoch": 0.6672899137841487, "grad_norm": 0.3922276198863983, "learning_rate": 2.4915264583738858e-05, "loss": 1.5833, "step": 6424 }, { "epoch": 0.6673937883037291, "grad_norm": 0.4027794599533081, "learning_rate": 2.490115135524177e-05, "loss": 1.6876, "step": 6425 }, { "epoch": 0.6674976628233095, "grad_norm": 0.4081961214542389, "learning_rate": 2.488704079957629e-05, "loss": 1.7356, "step": 6426 }, { "epoch": 0.6676015373428897, "grad_norm": 0.427378386259079, "learning_rate": 2.487293291824507e-05, "loss": 1.6506, "step": 6427 }, { "epoch": 0.6677054118624701, "grad_norm": 0.4446810185909271, "learning_rate": 2.4858827712750494e-05, "loss": 1.8314, "step": 6428 }, { "epoch": 0.6678092863820505, "grad_norm": 0.41210460662841797, "learning_rate": 2.484472518459464e-05, "loss": 1.6845, "step": 6429 }, { "epoch": 0.6679131609016308, "grad_norm": 0.39390888810157776, "learning_rate": 2.483062533527938e-05, "loss": 1.4303, "step": 6430 }, { "epoch": 0.6680170354212112, "grad_norm": 0.4102948307991028, "learning_rate": 2.4816528166306146e-05, "loss": 1.7178, "step": 6431 }, { "epoch": 0.6681209099407915, "grad_norm": 0.45998239517211914, "learning_rate": 2.4802433679176247e-05, "loss": 1.69, "step": 6432 }, { "epoch": 0.6682247844603719, "grad_norm": 0.41955703496932983, "learning_rate": 2.4788341875390614e-05, "loss": 1.6254, "step": 6433 }, { "epoch": 0.6683286589799522, "grad_norm": 0.41680797934532166, "learning_rate": 2.4774252756449912e-05, "loss": 1.6673, "step": 6434 }, { "epoch": 0.6684325334995326, "grad_norm": 0.43315428495407104, "learning_rate": 2.4760166323854524e-05, "loss": 1.8226, "step": 6435 }, { "epoch": 0.668536408019113, "grad_norm": 0.4136770963668823, "learning_rate": 2.4746082579104518e-05, "loss": 1.6372, "step": 6436 }, { "epoch": 0.6686402825386932, "grad_norm": 0.40513721108436584, "learning_rate": 2.4732001523699772e-05, "loss": 1.5601, "step": 6437 }, { "epoch": 0.6687441570582736, "grad_norm": 0.4485640227794647, "learning_rate": 2.471792315913973e-05, "loss": 1.5854, "step": 6438 }, { "epoch": 0.668848031577854, "grad_norm": 0.4075925946235657, "learning_rate": 2.470384748692368e-05, "loss": 1.5502, "step": 6439 }, { "epoch": 0.6689519060974343, "grad_norm": 0.45135799050331116, "learning_rate": 2.468977450855058e-05, "loss": 1.836, "step": 6440 }, { "epoch": 0.6690557806170147, "grad_norm": 0.41414105892181396, "learning_rate": 2.4675704225519018e-05, "loss": 1.6872, "step": 6441 }, { "epoch": 0.669159655136595, "grad_norm": 0.39774543046951294, "learning_rate": 2.4661636639327444e-05, "loss": 1.5706, "step": 6442 }, { "epoch": 0.6692635296561753, "grad_norm": 0.43322694301605225, "learning_rate": 2.4647571751473924e-05, "loss": 1.7595, "step": 6443 }, { "epoch": 0.6693674041757557, "grad_norm": 0.4438508450984955, "learning_rate": 2.4633509563456252e-05, "loss": 1.5921, "step": 6444 }, { "epoch": 0.6694712786953361, "grad_norm": 0.460419237613678, "learning_rate": 2.461945007677195e-05, "loss": 1.9157, "step": 6445 }, { "epoch": 0.6695751532149163, "grad_norm": 0.4211447536945343, "learning_rate": 2.4605393292918234e-05, "loss": 1.7016, "step": 6446 }, { "epoch": 0.6696790277344967, "grad_norm": 0.40026143193244934, "learning_rate": 2.459133921339205e-05, "loss": 1.5405, "step": 6447 }, { "epoch": 0.6697829022540771, "grad_norm": 0.4232792258262634, "learning_rate": 2.4577287839690022e-05, "loss": 1.7017, "step": 6448 }, { "epoch": 0.6698867767736574, "grad_norm": 0.42340609431266785, "learning_rate": 2.456323917330857e-05, "loss": 1.8029, "step": 6449 }, { "epoch": 0.6699906512932378, "grad_norm": 0.3742263615131378, "learning_rate": 2.4549193215743706e-05, "loss": 1.6008, "step": 6450 }, { "epoch": 0.6700945258128181, "grad_norm": 0.41276246309280396, "learning_rate": 2.453514996849125e-05, "loss": 1.667, "step": 6451 }, { "epoch": 0.6701984003323984, "grad_norm": 0.4266214668750763, "learning_rate": 2.452110943304669e-05, "loss": 1.5254, "step": 6452 }, { "epoch": 0.6703022748519788, "grad_norm": 0.39875510334968567, "learning_rate": 2.450707161090523e-05, "loss": 1.5862, "step": 6453 }, { "epoch": 0.6704061493715592, "grad_norm": 0.47201064229011536, "learning_rate": 2.4493036503561795e-05, "loss": 1.7249, "step": 6454 }, { "epoch": 0.6705100238911395, "grad_norm": 0.43423768877983093, "learning_rate": 2.4479004112511012e-05, "loss": 1.5667, "step": 6455 }, { "epoch": 0.6706138984107198, "grad_norm": 0.45527634024620056, "learning_rate": 2.4464974439247213e-05, "loss": 1.7565, "step": 6456 }, { "epoch": 0.6707177729303002, "grad_norm": 0.4044038951396942, "learning_rate": 2.4450947485264442e-05, "loss": 1.6019, "step": 6457 }, { "epoch": 0.6708216474498806, "grad_norm": 0.4240105450153351, "learning_rate": 2.4436923252056486e-05, "loss": 1.6025, "step": 6458 }, { "epoch": 0.6709255219694609, "grad_norm": 0.45421627163887024, "learning_rate": 2.442290174111681e-05, "loss": 1.8032, "step": 6459 }, { "epoch": 0.6710293964890413, "grad_norm": 0.4415995478630066, "learning_rate": 2.440888295393859e-05, "loss": 1.844, "step": 6460 }, { "epoch": 0.6711332710086216, "grad_norm": 0.410188227891922, "learning_rate": 2.4394866892014716e-05, "loss": 1.5926, "step": 6461 }, { "epoch": 0.6712371455282019, "grad_norm": 0.46723711490631104, "learning_rate": 2.438085355683779e-05, "loss": 1.5847, "step": 6462 }, { "epoch": 0.6713410200477823, "grad_norm": 0.3974010944366455, "learning_rate": 2.436684294990012e-05, "loss": 1.5292, "step": 6463 }, { "epoch": 0.6714448945673627, "grad_norm": 0.39809471368789673, "learning_rate": 2.4352835072693713e-05, "loss": 1.6891, "step": 6464 }, { "epoch": 0.6715487690869429, "grad_norm": 0.45135897397994995, "learning_rate": 2.4338829926710356e-05, "loss": 1.8254, "step": 6465 }, { "epoch": 0.6716526436065233, "grad_norm": 0.3903135061264038, "learning_rate": 2.4324827513441406e-05, "loss": 1.5315, "step": 6466 }, { "epoch": 0.6717565181261037, "grad_norm": 0.47816312313079834, "learning_rate": 2.431082783437807e-05, "loss": 1.6046, "step": 6467 }, { "epoch": 0.671860392645684, "grad_norm": 0.47781482338905334, "learning_rate": 2.4296830891011206e-05, "loss": 1.7201, "step": 6468 }, { "epoch": 0.6719642671652644, "grad_norm": 0.4250834584236145, "learning_rate": 2.4282836684831324e-05, "loss": 1.5979, "step": 6469 }, { "epoch": 0.6720681416848447, "grad_norm": 0.4348602592945099, "learning_rate": 2.426884521732875e-05, "loss": 1.6024, "step": 6470 }, { "epoch": 0.672172016204425, "grad_norm": 0.3877941370010376, "learning_rate": 2.425485648999345e-05, "loss": 1.4421, "step": 6471 }, { "epoch": 0.6722758907240054, "grad_norm": 0.4232386350631714, "learning_rate": 2.4240870504315117e-05, "loss": 1.7063, "step": 6472 }, { "epoch": 0.6723797652435858, "grad_norm": 0.4069366455078125, "learning_rate": 2.4226887261783143e-05, "loss": 1.6469, "step": 6473 }, { "epoch": 0.672483639763166, "grad_norm": 0.43410545587539673, "learning_rate": 2.4212906763886638e-05, "loss": 1.6914, "step": 6474 }, { "epoch": 0.6725875142827464, "grad_norm": 0.40854349732398987, "learning_rate": 2.4198929012114413e-05, "loss": 1.5814, "step": 6475 }, { "epoch": 0.6726913888023268, "grad_norm": 0.4242352247238159, "learning_rate": 2.4184954007954978e-05, "loss": 1.6704, "step": 6476 }, { "epoch": 0.6727952633219071, "grad_norm": 0.44728976488113403, "learning_rate": 2.417098175289661e-05, "loss": 1.7766, "step": 6477 }, { "epoch": 0.6728991378414875, "grad_norm": 0.4239303171634674, "learning_rate": 2.4157012248427173e-05, "loss": 1.5764, "step": 6478 }, { "epoch": 0.6730030123610679, "grad_norm": 0.406534880399704, "learning_rate": 2.414304549603437e-05, "loss": 1.7778, "step": 6479 }, { "epoch": 0.6731068868806481, "grad_norm": 0.43634507060050964, "learning_rate": 2.4129081497205535e-05, "loss": 1.7988, "step": 6480 }, { "epoch": 0.6732107614002285, "grad_norm": 0.4149050712585449, "learning_rate": 2.4115120253427715e-05, "loss": 1.636, "step": 6481 }, { "epoch": 0.6733146359198089, "grad_norm": 0.46954065561294556, "learning_rate": 2.4101161766187673e-05, "loss": 1.6532, "step": 6482 }, { "epoch": 0.6734185104393893, "grad_norm": 0.4280845820903778, "learning_rate": 2.4087206036971872e-05, "loss": 1.5258, "step": 6483 }, { "epoch": 0.6735223849589695, "grad_norm": 0.40671125054359436, "learning_rate": 2.4073253067266532e-05, "loss": 1.6615, "step": 6484 }, { "epoch": 0.6736262594785499, "grad_norm": 0.45035362243652344, "learning_rate": 2.4059302858557475e-05, "loss": 1.7412, "step": 6485 }, { "epoch": 0.6737301339981303, "grad_norm": 0.4099656045436859, "learning_rate": 2.4045355412330328e-05, "loss": 1.6766, "step": 6486 }, { "epoch": 0.6738340085177106, "grad_norm": 0.39857202768325806, "learning_rate": 2.403141073007038e-05, "loss": 1.6006, "step": 6487 }, { "epoch": 0.673937883037291, "grad_norm": 0.4072171151638031, "learning_rate": 2.401746881326263e-05, "loss": 1.6916, "step": 6488 }, { "epoch": 0.6740417575568713, "grad_norm": 0.4422532916069031, "learning_rate": 2.400352966339177e-05, "loss": 1.7378, "step": 6489 }, { "epoch": 0.6741456320764516, "grad_norm": 0.41221001744270325, "learning_rate": 2.398959328194223e-05, "loss": 1.6959, "step": 6490 }, { "epoch": 0.674249506596032, "grad_norm": 0.4334010183811188, "learning_rate": 2.3975659670398117e-05, "loss": 1.7088, "step": 6491 }, { "epoch": 0.6743533811156124, "grad_norm": 0.4333952069282532, "learning_rate": 2.3961728830243224e-05, "loss": 1.4458, "step": 6492 }, { "epoch": 0.6744572556351927, "grad_norm": 0.40676429867744446, "learning_rate": 2.3947800762961147e-05, "loss": 1.68, "step": 6493 }, { "epoch": 0.674561130154773, "grad_norm": 0.39929771423339844, "learning_rate": 2.3933875470035038e-05, "loss": 1.4526, "step": 6494 }, { "epoch": 0.6746650046743534, "grad_norm": 0.4088972508907318, "learning_rate": 2.391995295294788e-05, "loss": 1.6881, "step": 6495 }, { "epoch": 0.6747688791939337, "grad_norm": 0.47918689250946045, "learning_rate": 2.3906033213182328e-05, "loss": 1.8923, "step": 6496 }, { "epoch": 0.6748727537135141, "grad_norm": 0.41081228852272034, "learning_rate": 2.3892116252220653e-05, "loss": 1.6732, "step": 6497 }, { "epoch": 0.6749766282330945, "grad_norm": 0.3840208649635315, "learning_rate": 2.3878202071544977e-05, "loss": 1.5638, "step": 6498 }, { "epoch": 0.6750805027526747, "grad_norm": 0.43148073554039, "learning_rate": 2.3864290672637012e-05, "loss": 1.7186, "step": 6499 }, { "epoch": 0.6751843772722551, "grad_norm": 0.42979419231414795, "learning_rate": 2.3850382056978228e-05, "loss": 1.621, "step": 6500 }, { "epoch": 0.6752882517918355, "grad_norm": 0.3895018696784973, "learning_rate": 2.3836476226049775e-05, "loss": 1.5422, "step": 6501 }, { "epoch": 0.6753921263114158, "grad_norm": 0.511653482913971, "learning_rate": 2.3822573181332526e-05, "loss": 1.7677, "step": 6502 }, { "epoch": 0.6754960008309961, "grad_norm": 0.43084853887557983, "learning_rate": 2.380867292430704e-05, "loss": 1.7901, "step": 6503 }, { "epoch": 0.6755998753505765, "grad_norm": 0.404482364654541, "learning_rate": 2.3794775456453573e-05, "loss": 1.708, "step": 6504 }, { "epoch": 0.6757037498701568, "grad_norm": 0.4159182906150818, "learning_rate": 2.378088077925214e-05, "loss": 1.5471, "step": 6505 }, { "epoch": 0.6758076243897372, "grad_norm": 0.4247626066207886, "learning_rate": 2.3766988894182357e-05, "loss": 1.8363, "step": 6506 }, { "epoch": 0.6759114989093176, "grad_norm": 0.47585469484329224, "learning_rate": 2.3753099802723643e-05, "loss": 2.0124, "step": 6507 }, { "epoch": 0.6760153734288978, "grad_norm": 0.44073712825775146, "learning_rate": 2.3739213506355073e-05, "loss": 1.698, "step": 6508 }, { "epoch": 0.6761192479484782, "grad_norm": 0.4209062159061432, "learning_rate": 2.372533000655542e-05, "loss": 1.8448, "step": 6509 }, { "epoch": 0.6762231224680586, "grad_norm": 0.40654614567756653, "learning_rate": 2.3711449304803175e-05, "loss": 1.6932, "step": 6510 }, { "epoch": 0.676326996987639, "grad_norm": 0.42411383986473083, "learning_rate": 2.3697571402576497e-05, "loss": 1.6501, "step": 6511 }, { "epoch": 0.6764308715072193, "grad_norm": 0.4248904883861542, "learning_rate": 2.3683696301353337e-05, "loss": 1.4518, "step": 6512 }, { "epoch": 0.6765347460267996, "grad_norm": 0.39044690132141113, "learning_rate": 2.3669824002611207e-05, "loss": 1.6496, "step": 6513 }, { "epoch": 0.67663862054638, "grad_norm": 0.4223690629005432, "learning_rate": 2.3655954507827456e-05, "loss": 1.6076, "step": 6514 }, { "epoch": 0.6767424950659603, "grad_norm": 0.44539690017700195, "learning_rate": 2.364208781847906e-05, "loss": 1.7109, "step": 6515 }, { "epoch": 0.6768463695855407, "grad_norm": 0.4467482566833496, "learning_rate": 2.3628223936042708e-05, "loss": 1.6478, "step": 6516 }, { "epoch": 0.6769502441051211, "grad_norm": 0.3779411315917969, "learning_rate": 2.3614362861994803e-05, "loss": 1.5394, "step": 6517 }, { "epoch": 0.6770541186247013, "grad_norm": 0.44637593626976013, "learning_rate": 2.3600504597811436e-05, "loss": 1.8076, "step": 6518 }, { "epoch": 0.6771579931442817, "grad_norm": 0.43689000606536865, "learning_rate": 2.3586649144968404e-05, "loss": 1.718, "step": 6519 }, { "epoch": 0.6772618676638621, "grad_norm": 0.4006705582141876, "learning_rate": 2.357279650494119e-05, "loss": 1.4616, "step": 6520 }, { "epoch": 0.6773657421834424, "grad_norm": 0.42330577969551086, "learning_rate": 2.3558946679205046e-05, "loss": 1.6517, "step": 6521 }, { "epoch": 0.6774696167030227, "grad_norm": 0.41748082637786865, "learning_rate": 2.3545099669234792e-05, "loss": 1.5593, "step": 6522 }, { "epoch": 0.6775734912226031, "grad_norm": 0.46182626485824585, "learning_rate": 2.3531255476505086e-05, "loss": 1.7158, "step": 6523 }, { "epoch": 0.6776773657421834, "grad_norm": 0.444038450717926, "learning_rate": 2.3517414102490232e-05, "loss": 1.8144, "step": 6524 }, { "epoch": 0.6777812402617638, "grad_norm": 0.46683838963508606, "learning_rate": 2.350357554866416e-05, "loss": 1.8248, "step": 6525 }, { "epoch": 0.6778851147813442, "grad_norm": 0.4560922384262085, "learning_rate": 2.3489739816500635e-05, "loss": 1.7912, "step": 6526 }, { "epoch": 0.6779889893009244, "grad_norm": 0.4331681430339813, "learning_rate": 2.3475906907473027e-05, "loss": 1.7446, "step": 6527 }, { "epoch": 0.6780928638205048, "grad_norm": 0.42912784218788147, "learning_rate": 2.3462076823054442e-05, "loss": 1.6939, "step": 6528 }, { "epoch": 0.6781967383400852, "grad_norm": 0.451789915561676, "learning_rate": 2.3448249564717666e-05, "loss": 1.7973, "step": 6529 }, { "epoch": 0.6783006128596655, "grad_norm": 0.4173526465892792, "learning_rate": 2.3434425133935207e-05, "loss": 1.6438, "step": 6530 }, { "epoch": 0.6784044873792459, "grad_norm": 0.4266951084136963, "learning_rate": 2.3420603532179246e-05, "loss": 1.7301, "step": 6531 }, { "epoch": 0.6785083618988262, "grad_norm": 0.48929300904273987, "learning_rate": 2.3406784760921667e-05, "loss": 1.7364, "step": 6532 }, { "epoch": 0.6786122364184065, "grad_norm": 0.3748891055583954, "learning_rate": 2.339296882163411e-05, "loss": 1.608, "step": 6533 }, { "epoch": 0.6787161109379869, "grad_norm": 0.42468810081481934, "learning_rate": 2.3379155715787792e-05, "loss": 1.8397, "step": 6534 }, { "epoch": 0.6788199854575673, "grad_norm": 0.46891170740127563, "learning_rate": 2.3365345444853763e-05, "loss": 1.9061, "step": 6535 }, { "epoch": 0.6789238599771477, "grad_norm": 0.42651164531707764, "learning_rate": 2.3351538010302688e-05, "loss": 1.6904, "step": 6536 }, { "epoch": 0.6790277344967279, "grad_norm": 0.41323697566986084, "learning_rate": 2.333773341360494e-05, "loss": 1.6176, "step": 6537 }, { "epoch": 0.6791316090163083, "grad_norm": 0.4378218948841095, "learning_rate": 2.3323931656230614e-05, "loss": 1.7018, "step": 6538 }, { "epoch": 0.6792354835358887, "grad_norm": 0.42341065406799316, "learning_rate": 2.331013273964947e-05, "loss": 1.6211, "step": 6539 }, { "epoch": 0.679339358055469, "grad_norm": 0.37841764092445374, "learning_rate": 2.3296336665331032e-05, "loss": 1.6159, "step": 6540 }, { "epoch": 0.6794432325750493, "grad_norm": 0.38354504108428955, "learning_rate": 2.3282543434744407e-05, "loss": 1.6425, "step": 6541 }, { "epoch": 0.6795471070946297, "grad_norm": 0.4210819900035858, "learning_rate": 2.3268753049358522e-05, "loss": 1.6235, "step": 6542 }, { "epoch": 0.67965098161421, "grad_norm": 0.4259316623210907, "learning_rate": 2.3254965510641928e-05, "loss": 1.8103, "step": 6543 }, { "epoch": 0.6797548561337904, "grad_norm": 0.3991251289844513, "learning_rate": 2.3241180820062885e-05, "loss": 1.6595, "step": 6544 }, { "epoch": 0.6798587306533708, "grad_norm": 0.3932139575481415, "learning_rate": 2.3227398979089353e-05, "loss": 1.586, "step": 6545 }, { "epoch": 0.679962605172951, "grad_norm": 0.40890079736709595, "learning_rate": 2.3213619989189006e-05, "loss": 1.4932, "step": 6546 }, { "epoch": 0.6800664796925314, "grad_norm": 0.42647114396095276, "learning_rate": 2.319984385182918e-05, "loss": 1.6931, "step": 6547 }, { "epoch": 0.6801703542121118, "grad_norm": 0.39702990651130676, "learning_rate": 2.318607056847692e-05, "loss": 1.5665, "step": 6548 }, { "epoch": 0.6802742287316921, "grad_norm": 0.3964666724205017, "learning_rate": 2.3172300140599024e-05, "loss": 1.7028, "step": 6549 }, { "epoch": 0.6803781032512725, "grad_norm": 0.41276541352272034, "learning_rate": 2.315853256966186e-05, "loss": 1.5754, "step": 6550 }, { "epoch": 0.6804819777708528, "grad_norm": 0.40658214688301086, "learning_rate": 2.314476785713163e-05, "loss": 1.8211, "step": 6551 }, { "epoch": 0.6805858522904331, "grad_norm": 0.43847519159317017, "learning_rate": 2.3131006004474152e-05, "loss": 1.5858, "step": 6552 }, { "epoch": 0.6806897268100135, "grad_norm": 0.5137412548065186, "learning_rate": 2.3117247013154918e-05, "loss": 1.9818, "step": 6553 }, { "epoch": 0.6807936013295939, "grad_norm": 0.41750749945640564, "learning_rate": 2.31034908846392e-05, "loss": 1.519, "step": 6554 }, { "epoch": 0.6808974758491741, "grad_norm": 0.3900861144065857, "learning_rate": 2.3089737620391906e-05, "loss": 1.6836, "step": 6555 }, { "epoch": 0.6810013503687545, "grad_norm": 0.4610309302806854, "learning_rate": 2.307598722187765e-05, "loss": 1.6183, "step": 6556 }, { "epoch": 0.6811052248883349, "grad_norm": 0.4481363892555237, "learning_rate": 2.306223969056073e-05, "loss": 1.7593, "step": 6557 }, { "epoch": 0.6812090994079152, "grad_norm": 0.37722769379615784, "learning_rate": 2.3048495027905166e-05, "loss": 1.5302, "step": 6558 }, { "epoch": 0.6813129739274956, "grad_norm": 0.42121073603630066, "learning_rate": 2.303475323537465e-05, "loss": 1.5051, "step": 6559 }, { "epoch": 0.681416848447076, "grad_norm": 0.49709874391555786, "learning_rate": 2.302101431443256e-05, "loss": 1.9087, "step": 6560 }, { "epoch": 0.6815207229666563, "grad_norm": 0.4022544026374817, "learning_rate": 2.300727826654202e-05, "loss": 1.6909, "step": 6561 }, { "epoch": 0.6816245974862366, "grad_norm": 0.41728565096855164, "learning_rate": 2.299354509316579e-05, "loss": 1.6303, "step": 6562 }, { "epoch": 0.681728472005817, "grad_norm": 0.439998060464859, "learning_rate": 2.297981479576635e-05, "loss": 1.8708, "step": 6563 }, { "epoch": 0.6818323465253974, "grad_norm": 0.39185917377471924, "learning_rate": 2.2966087375805874e-05, "loss": 1.6224, "step": 6564 }, { "epoch": 0.6819362210449776, "grad_norm": 0.38439640402793884, "learning_rate": 2.2952362834746216e-05, "loss": 1.5744, "step": 6565 }, { "epoch": 0.682040095564558, "grad_norm": 0.4013480544090271, "learning_rate": 2.2938641174048936e-05, "loss": 1.5341, "step": 6566 }, { "epoch": 0.6821439700841384, "grad_norm": 0.44190388917922974, "learning_rate": 2.2924922395175268e-05, "loss": 1.8871, "step": 6567 }, { "epoch": 0.6822478446037187, "grad_norm": 0.4578564167022705, "learning_rate": 2.2911206499586207e-05, "loss": 1.8006, "step": 6568 }, { "epoch": 0.6823517191232991, "grad_norm": 0.44078001379966736, "learning_rate": 2.2897493488742317e-05, "loss": 1.6603, "step": 6569 }, { "epoch": 0.6824555936428794, "grad_norm": 0.4083844721317291, "learning_rate": 2.2883783364103976e-05, "loss": 1.5109, "step": 6570 }, { "epoch": 0.6825594681624597, "grad_norm": 0.4550290107727051, "learning_rate": 2.2870076127131196e-05, "loss": 1.7607, "step": 6571 }, { "epoch": 0.6826633426820401, "grad_norm": 0.4314436912536621, "learning_rate": 2.2856371779283692e-05, "loss": 1.7517, "step": 6572 }, { "epoch": 0.6827672172016205, "grad_norm": 0.4039313793182373, "learning_rate": 2.2842670322020858e-05, "loss": 1.7537, "step": 6573 }, { "epoch": 0.6828710917212008, "grad_norm": 0.40721437335014343, "learning_rate": 2.2828971756801802e-05, "loss": 1.5869, "step": 6574 }, { "epoch": 0.6829749662407811, "grad_norm": 0.38915568590164185, "learning_rate": 2.2815276085085313e-05, "loss": 1.5199, "step": 6575 }, { "epoch": 0.6830788407603615, "grad_norm": 0.42462047934532166, "learning_rate": 2.2801583308329848e-05, "loss": 1.6698, "step": 6576 }, { "epoch": 0.6831827152799418, "grad_norm": 0.42079266905784607, "learning_rate": 2.2787893427993646e-05, "loss": 1.582, "step": 6577 }, { "epoch": 0.6832865897995222, "grad_norm": 0.45003542304039, "learning_rate": 2.2774206445534495e-05, "loss": 1.4429, "step": 6578 }, { "epoch": 0.6833904643191026, "grad_norm": 0.38841044902801514, "learning_rate": 2.2760522362410003e-05, "loss": 1.5655, "step": 6579 }, { "epoch": 0.6834943388386828, "grad_norm": 0.39281266927719116, "learning_rate": 2.274684118007743e-05, "loss": 1.6493, "step": 6580 }, { "epoch": 0.6835982133582632, "grad_norm": 0.37398335337638855, "learning_rate": 2.2733162899993642e-05, "loss": 1.4814, "step": 6581 }, { "epoch": 0.6837020878778436, "grad_norm": 0.43852871656417847, "learning_rate": 2.271948752361534e-05, "loss": 1.5412, "step": 6582 }, { "epoch": 0.6838059623974239, "grad_norm": 0.4092741012573242, "learning_rate": 2.2705815052398816e-05, "loss": 1.6233, "step": 6583 }, { "epoch": 0.6839098369170042, "grad_norm": 0.3938494026660919, "learning_rate": 2.2692145487800093e-05, "loss": 1.6233, "step": 6584 }, { "epoch": 0.6840137114365846, "grad_norm": 0.5273641347885132, "learning_rate": 2.2678478831274858e-05, "loss": 1.8792, "step": 6585 }, { "epoch": 0.6841175859561649, "grad_norm": 0.41497117280960083, "learning_rate": 2.266481508427852e-05, "loss": 1.6535, "step": 6586 }, { "epoch": 0.6842214604757453, "grad_norm": 0.39837929606437683, "learning_rate": 2.2651154248266145e-05, "loss": 1.5587, "step": 6587 }, { "epoch": 0.6843253349953257, "grad_norm": 0.504227340221405, "learning_rate": 2.2637496324692496e-05, "loss": 1.6661, "step": 6588 }, { "epoch": 0.684429209514906, "grad_norm": 0.425836443901062, "learning_rate": 2.2623841315012072e-05, "loss": 1.7157, "step": 6589 }, { "epoch": 0.6845330840344863, "grad_norm": 0.429581880569458, "learning_rate": 2.2610189220679007e-05, "loss": 1.7036, "step": 6590 }, { "epoch": 0.6846369585540667, "grad_norm": 0.4043353497982025, "learning_rate": 2.2596540043147142e-05, "loss": 1.6208, "step": 6591 }, { "epoch": 0.6847408330736471, "grad_norm": 0.423662006855011, "learning_rate": 2.2582893783870006e-05, "loss": 1.8271, "step": 6592 }, { "epoch": 0.6848447075932274, "grad_norm": 0.3973560631275177, "learning_rate": 2.256925044430082e-05, "loss": 1.638, "step": 6593 }, { "epoch": 0.6849485821128077, "grad_norm": 0.41801849007606506, "learning_rate": 2.2555610025892498e-05, "loss": 1.5826, "step": 6594 }, { "epoch": 0.6850524566323881, "grad_norm": 0.45296528935432434, "learning_rate": 2.254197253009761e-05, "loss": 1.5875, "step": 6595 }, { "epoch": 0.6851563311519684, "grad_norm": 0.4170701205730438, "learning_rate": 2.2528337958368507e-05, "loss": 1.6933, "step": 6596 }, { "epoch": 0.6852602056715488, "grad_norm": 0.3868260979652405, "learning_rate": 2.2514706312157085e-05, "loss": 1.5773, "step": 6597 }, { "epoch": 0.6853640801911292, "grad_norm": 0.3958593010902405, "learning_rate": 2.2501077592915065e-05, "loss": 1.6426, "step": 6598 }, { "epoch": 0.6854679547107094, "grad_norm": 0.3819239139556885, "learning_rate": 2.2487451802093784e-05, "loss": 1.5363, "step": 6599 }, { "epoch": 0.6855718292302898, "grad_norm": 0.4158235192298889, "learning_rate": 2.2473828941144276e-05, "loss": 1.5895, "step": 6600 }, { "epoch": 0.6856757037498702, "grad_norm": 0.4286392331123352, "learning_rate": 2.246020901151728e-05, "loss": 1.6797, "step": 6601 }, { "epoch": 0.6857795782694505, "grad_norm": 0.39033573865890503, "learning_rate": 2.24465920146632e-05, "loss": 1.6592, "step": 6602 }, { "epoch": 0.6858834527890308, "grad_norm": 0.47178182005882263, "learning_rate": 2.2432977952032148e-05, "loss": 1.7151, "step": 6603 }, { "epoch": 0.6859873273086112, "grad_norm": 0.4451124370098114, "learning_rate": 2.2419366825073894e-05, "loss": 1.8579, "step": 6604 }, { "epoch": 0.6860912018281915, "grad_norm": 0.4096846878528595, "learning_rate": 2.2405758635237973e-05, "loss": 1.57, "step": 6605 }, { "epoch": 0.6861950763477719, "grad_norm": 0.437682569026947, "learning_rate": 2.2392153383973473e-05, "loss": 1.7868, "step": 6606 }, { "epoch": 0.6862989508673523, "grad_norm": 0.4488213062286377, "learning_rate": 2.2378551072729314e-05, "loss": 1.7668, "step": 6607 }, { "epoch": 0.6864028253869325, "grad_norm": 0.4229215383529663, "learning_rate": 2.2364951702954025e-05, "loss": 1.7325, "step": 6608 }, { "epoch": 0.6865066999065129, "grad_norm": 0.45291194319725037, "learning_rate": 2.235135527609578e-05, "loss": 1.8742, "step": 6609 }, { "epoch": 0.6866105744260933, "grad_norm": 0.41478431224823, "learning_rate": 2.233776179360255e-05, "loss": 1.5874, "step": 6610 }, { "epoch": 0.6867144489456736, "grad_norm": 0.4467930495738983, "learning_rate": 2.2324171256921918e-05, "loss": 1.8026, "step": 6611 }, { "epoch": 0.686818323465254, "grad_norm": 0.4063619375228882, "learning_rate": 2.2310583667501163e-05, "loss": 1.68, "step": 6612 }, { "epoch": 0.6869221979848343, "grad_norm": 0.4245857000350952, "learning_rate": 2.2296999026787262e-05, "loss": 1.5025, "step": 6613 }, { "epoch": 0.6870260725044147, "grad_norm": 0.44443392753601074, "learning_rate": 2.2283417336226853e-05, "loss": 1.6217, "step": 6614 }, { "epoch": 0.687129947023995, "grad_norm": 0.4405082166194916, "learning_rate": 2.2269838597266342e-05, "loss": 1.5827, "step": 6615 }, { "epoch": 0.6872338215435754, "grad_norm": 0.4452870190143585, "learning_rate": 2.2256262811351676e-05, "loss": 1.7894, "step": 6616 }, { "epoch": 0.6873376960631558, "grad_norm": 0.37524208426475525, "learning_rate": 2.2242689979928632e-05, "loss": 1.5107, "step": 6617 }, { "epoch": 0.687441570582736, "grad_norm": 0.4177568554878235, "learning_rate": 2.222912010444259e-05, "loss": 1.6793, "step": 6618 }, { "epoch": 0.6875454451023164, "grad_norm": 0.39872753620147705, "learning_rate": 2.2215553186338635e-05, "loss": 1.6067, "step": 6619 }, { "epoch": 0.6876493196218968, "grad_norm": 0.40704911947250366, "learning_rate": 2.2201989227061535e-05, "loss": 1.6807, "step": 6620 }, { "epoch": 0.6877531941414771, "grad_norm": 0.43390125036239624, "learning_rate": 2.2188428228055756e-05, "loss": 1.5434, "step": 6621 }, { "epoch": 0.6878570686610574, "grad_norm": 0.4030878245830536, "learning_rate": 2.2174870190765428e-05, "loss": 1.6909, "step": 6622 }, { "epoch": 0.6879609431806378, "grad_norm": 0.45585277676582336, "learning_rate": 2.2161315116634362e-05, "loss": 1.7252, "step": 6623 }, { "epoch": 0.6880648177002181, "grad_norm": 0.4133506715297699, "learning_rate": 2.2147763007106126e-05, "loss": 1.7813, "step": 6624 }, { "epoch": 0.6881686922197985, "grad_norm": 0.45067742466926575, "learning_rate": 2.213421386362383e-05, "loss": 1.7338, "step": 6625 }, { "epoch": 0.6882725667393789, "grad_norm": 0.4103121757507324, "learning_rate": 2.212066768763042e-05, "loss": 1.7215, "step": 6626 }, { "epoch": 0.6883764412589591, "grad_norm": 0.43877947330474854, "learning_rate": 2.2107124480568435e-05, "loss": 1.5899, "step": 6627 }, { "epoch": 0.6884803157785395, "grad_norm": 0.3948143720626831, "learning_rate": 2.2093584243880116e-05, "loss": 1.6213, "step": 6628 }, { "epoch": 0.6885841902981199, "grad_norm": 0.39642539620399475, "learning_rate": 2.20800469790074e-05, "loss": 1.5516, "step": 6629 }, { "epoch": 0.6886880648177002, "grad_norm": 0.4189082980155945, "learning_rate": 2.2066512687391898e-05, "loss": 1.6291, "step": 6630 }, { "epoch": 0.6887919393372806, "grad_norm": 0.48019567131996155, "learning_rate": 2.20529813704749e-05, "loss": 1.819, "step": 6631 }, { "epoch": 0.6888958138568609, "grad_norm": 0.43770167231559753, "learning_rate": 2.2039453029697378e-05, "loss": 1.7297, "step": 6632 }, { "epoch": 0.6889996883764412, "grad_norm": 0.3850918114185333, "learning_rate": 2.202592766650004e-05, "loss": 1.6097, "step": 6633 }, { "epoch": 0.6891035628960216, "grad_norm": 0.4326091408729553, "learning_rate": 2.2012405282323166e-05, "loss": 1.4958, "step": 6634 }, { "epoch": 0.689207437415602, "grad_norm": 0.3961119055747986, "learning_rate": 2.1998885878606835e-05, "loss": 1.5057, "step": 6635 }, { "epoch": 0.6893113119351822, "grad_norm": 0.4198208153247833, "learning_rate": 2.1985369456790757e-05, "loss": 1.614, "step": 6636 }, { "epoch": 0.6894151864547626, "grad_norm": 0.4884597659111023, "learning_rate": 2.1971856018314274e-05, "loss": 1.8599, "step": 6637 }, { "epoch": 0.689519060974343, "grad_norm": 0.42158132791519165, "learning_rate": 2.1958345564616515e-05, "loss": 1.6569, "step": 6638 }, { "epoch": 0.6896229354939234, "grad_norm": 0.41984039545059204, "learning_rate": 2.1944838097136205e-05, "loss": 1.6422, "step": 6639 }, { "epoch": 0.6897268100135037, "grad_norm": 0.42424213886260986, "learning_rate": 2.1931333617311833e-05, "loss": 1.8098, "step": 6640 }, { "epoch": 0.689830684533084, "grad_norm": 0.3981630206108093, "learning_rate": 2.1917832126581467e-05, "loss": 1.6439, "step": 6641 }, { "epoch": 0.6899345590526644, "grad_norm": 0.42164191603660583, "learning_rate": 2.1904333626382918e-05, "loss": 1.6079, "step": 6642 }, { "epoch": 0.6900384335722447, "grad_norm": 0.43774572014808655, "learning_rate": 2.189083811815371e-05, "loss": 1.6243, "step": 6643 }, { "epoch": 0.6901423080918251, "grad_norm": 0.4788624942302704, "learning_rate": 2.187734560333095e-05, "loss": 1.8424, "step": 6644 }, { "epoch": 0.6902461826114055, "grad_norm": 0.3973691761493683, "learning_rate": 2.186385608335154e-05, "loss": 1.5472, "step": 6645 }, { "epoch": 0.6903500571309857, "grad_norm": 0.4038008451461792, "learning_rate": 2.1850369559651984e-05, "loss": 1.5287, "step": 6646 }, { "epoch": 0.6904539316505661, "grad_norm": 0.38313379883766174, "learning_rate": 2.1836886033668495e-05, "loss": 1.5671, "step": 6647 }, { "epoch": 0.6905578061701465, "grad_norm": 0.43707215785980225, "learning_rate": 2.1823405506836962e-05, "loss": 1.6821, "step": 6648 }, { "epoch": 0.6906616806897268, "grad_norm": 0.40973857045173645, "learning_rate": 2.180992798059296e-05, "loss": 1.6621, "step": 6649 }, { "epoch": 0.6907655552093072, "grad_norm": 0.3938702344894409, "learning_rate": 2.1796453456371735e-05, "loss": 1.5388, "step": 6650 }, { "epoch": 0.6908694297288875, "grad_norm": 0.3766863942146301, "learning_rate": 2.17829819356082e-05, "loss": 1.5155, "step": 6651 }, { "epoch": 0.6909733042484678, "grad_norm": 0.4006540775299072, "learning_rate": 2.1769513419737024e-05, "loss": 1.768, "step": 6652 }, { "epoch": 0.6910771787680482, "grad_norm": 0.4285779297351837, "learning_rate": 2.1756047910192428e-05, "loss": 1.6215, "step": 6653 }, { "epoch": 0.6911810532876286, "grad_norm": 0.46323853731155396, "learning_rate": 2.1742585408408438e-05, "loss": 1.7715, "step": 6654 }, { "epoch": 0.6912849278072088, "grad_norm": 0.46970510482788086, "learning_rate": 2.1729125915818684e-05, "loss": 1.7967, "step": 6655 }, { "epoch": 0.6913888023267892, "grad_norm": 0.39832404255867004, "learning_rate": 2.17156694338565e-05, "loss": 1.6482, "step": 6656 }, { "epoch": 0.6914926768463696, "grad_norm": 0.42489683628082275, "learning_rate": 2.1702215963954893e-05, "loss": 1.6526, "step": 6657 }, { "epoch": 0.6915965513659499, "grad_norm": 0.39924055337905884, "learning_rate": 2.1688765507546556e-05, "loss": 1.3809, "step": 6658 }, { "epoch": 0.6917004258855303, "grad_norm": 0.4202609062194824, "learning_rate": 2.1675318066063855e-05, "loss": 1.6426, "step": 6659 }, { "epoch": 0.6918043004051107, "grad_norm": 0.42206838726997375, "learning_rate": 2.166187364093882e-05, "loss": 1.6819, "step": 6660 }, { "epoch": 0.6919081749246909, "grad_norm": 0.4156840443611145, "learning_rate": 2.1648432233603232e-05, "loss": 1.8221, "step": 6661 }, { "epoch": 0.6920120494442713, "grad_norm": 0.41110360622406006, "learning_rate": 2.1634993845488426e-05, "loss": 1.707, "step": 6662 }, { "epoch": 0.6921159239638517, "grad_norm": 0.4039273262023926, "learning_rate": 2.162155847802554e-05, "loss": 1.6048, "step": 6663 }, { "epoch": 0.6922197984834321, "grad_norm": 0.4477052688598633, "learning_rate": 2.1608126132645317e-05, "loss": 1.8689, "step": 6664 }, { "epoch": 0.6923236730030123, "grad_norm": 0.40778693556785583, "learning_rate": 2.1594696810778193e-05, "loss": 1.7781, "step": 6665 }, { "epoch": 0.6924275475225927, "grad_norm": 0.42754799127578735, "learning_rate": 2.1581270513854283e-05, "loss": 1.71, "step": 6666 }, { "epoch": 0.6925314220421731, "grad_norm": 0.4134778380393982, "learning_rate": 2.156784724330338e-05, "loss": 1.7161, "step": 6667 }, { "epoch": 0.6926352965617534, "grad_norm": 0.40109783411026, "learning_rate": 2.1554427000555e-05, "loss": 1.5071, "step": 6668 }, { "epoch": 0.6927391710813338, "grad_norm": 0.49069786071777344, "learning_rate": 2.154100978703824e-05, "loss": 1.9258, "step": 6669 }, { "epoch": 0.6928430456009141, "grad_norm": 0.4370437562465668, "learning_rate": 2.1527595604181933e-05, "loss": 1.4963, "step": 6670 }, { "epoch": 0.6929469201204944, "grad_norm": 0.45224136114120483, "learning_rate": 2.151418445341463e-05, "loss": 1.745, "step": 6671 }, { "epoch": 0.6930507946400748, "grad_norm": 0.4150208532810211, "learning_rate": 2.1500776336164456e-05, "loss": 1.6489, "step": 6672 }, { "epoch": 0.6931546691596552, "grad_norm": 0.4757159352302551, "learning_rate": 2.1487371253859316e-05, "loss": 1.8266, "step": 6673 }, { "epoch": 0.6932585436792355, "grad_norm": 0.3998592793941498, "learning_rate": 2.1473969207926737e-05, "loss": 1.6169, "step": 6674 }, { "epoch": 0.6933624181988158, "grad_norm": 0.42257049679756165, "learning_rate": 2.1460570199793917e-05, "loss": 1.7495, "step": 6675 }, { "epoch": 0.6934662927183962, "grad_norm": 0.3936207592487335, "learning_rate": 2.1447174230887757e-05, "loss": 1.7681, "step": 6676 }, { "epoch": 0.6935701672379765, "grad_norm": 0.4289858639240265, "learning_rate": 2.143378130263482e-05, "loss": 1.665, "step": 6677 }, { "epoch": 0.6936740417575569, "grad_norm": 0.4393899738788605, "learning_rate": 2.142039141646135e-05, "loss": 1.6799, "step": 6678 }, { "epoch": 0.6937779162771373, "grad_norm": 0.4026007056236267, "learning_rate": 2.140700457379325e-05, "loss": 1.6812, "step": 6679 }, { "epoch": 0.6938817907967175, "grad_norm": 0.4645029306411743, "learning_rate": 2.1393620776056163e-05, "loss": 1.7876, "step": 6680 }, { "epoch": 0.6939856653162979, "grad_norm": 0.3936692774295807, "learning_rate": 2.1380240024675285e-05, "loss": 1.3614, "step": 6681 }, { "epoch": 0.6940895398358783, "grad_norm": 0.4686059057712555, "learning_rate": 2.136686232107562e-05, "loss": 1.7599, "step": 6682 }, { "epoch": 0.6941934143554586, "grad_norm": 0.4059823155403137, "learning_rate": 2.1353487666681777e-05, "loss": 1.663, "step": 6683 }, { "epoch": 0.6942972888750389, "grad_norm": 0.4833250939846039, "learning_rate": 2.134011606291804e-05, "loss": 1.8039, "step": 6684 }, { "epoch": 0.6944011633946193, "grad_norm": 0.41929182410240173, "learning_rate": 2.132674751120839e-05, "loss": 1.7361, "step": 6685 }, { "epoch": 0.6945050379141996, "grad_norm": 0.4122251272201538, "learning_rate": 2.131338201297647e-05, "loss": 1.5977, "step": 6686 }, { "epoch": 0.69460891243378, "grad_norm": 0.476242333650589, "learning_rate": 2.1300019569645596e-05, "loss": 1.9488, "step": 6687 }, { "epoch": 0.6947127869533604, "grad_norm": 0.40492451190948486, "learning_rate": 2.1286660182638756e-05, "loss": 1.7338, "step": 6688 }, { "epoch": 0.6948166614729406, "grad_norm": 0.4443548917770386, "learning_rate": 2.1273303853378674e-05, "loss": 1.5402, "step": 6689 }, { "epoch": 0.694920535992521, "grad_norm": 0.40552589297294617, "learning_rate": 2.125995058328763e-05, "loss": 1.6692, "step": 6690 }, { "epoch": 0.6950244105121014, "grad_norm": 0.5920501351356506, "learning_rate": 2.124660037378765e-05, "loss": 1.7801, "step": 6691 }, { "epoch": 0.6951282850316818, "grad_norm": 0.4190227687358856, "learning_rate": 2.1233253226300465e-05, "loss": 1.5115, "step": 6692 }, { "epoch": 0.695232159551262, "grad_norm": 0.47602778673171997, "learning_rate": 2.1219909142247424e-05, "loss": 1.8221, "step": 6693 }, { "epoch": 0.6953360340708424, "grad_norm": 0.43550148606300354, "learning_rate": 2.1206568123049564e-05, "loss": 1.6134, "step": 6694 }, { "epoch": 0.6954399085904228, "grad_norm": 0.4044070541858673, "learning_rate": 2.119323017012759e-05, "loss": 1.5809, "step": 6695 }, { "epoch": 0.6955437831100031, "grad_norm": 0.4208220839500427, "learning_rate": 2.1179895284901924e-05, "loss": 1.5416, "step": 6696 }, { "epoch": 0.6956476576295835, "grad_norm": 0.40507733821868896, "learning_rate": 2.1166563468792594e-05, "loss": 1.5112, "step": 6697 }, { "epoch": 0.6957515321491639, "grad_norm": 0.4560292959213257, "learning_rate": 2.1153234723219328e-05, "loss": 1.6208, "step": 6698 }, { "epoch": 0.6958554066687441, "grad_norm": 0.46594947576522827, "learning_rate": 2.1139909049601587e-05, "loss": 1.7745, "step": 6699 }, { "epoch": 0.6959592811883245, "grad_norm": 0.4122544527053833, "learning_rate": 2.1126586449358375e-05, "loss": 1.5898, "step": 6700 }, { "epoch": 0.6960631557079049, "grad_norm": 0.45607662200927734, "learning_rate": 2.111326692390851e-05, "loss": 1.7258, "step": 6701 }, { "epoch": 0.6961670302274852, "grad_norm": 0.4141891300678253, "learning_rate": 2.1099950474670387e-05, "loss": 1.6533, "step": 6702 }, { "epoch": 0.6962709047470655, "grad_norm": 0.3780854046344757, "learning_rate": 2.1086637103062107e-05, "loss": 1.5623, "step": 6703 }, { "epoch": 0.6963747792666459, "grad_norm": 0.4268392026424408, "learning_rate": 2.1073326810501448e-05, "loss": 1.6596, "step": 6704 }, { "epoch": 0.6964786537862262, "grad_norm": 0.40509405732154846, "learning_rate": 2.1060019598405843e-05, "loss": 1.5612, "step": 6705 }, { "epoch": 0.6965825283058066, "grad_norm": 0.38771677017211914, "learning_rate": 2.1046715468192414e-05, "loss": 1.5457, "step": 6706 }, { "epoch": 0.696686402825387, "grad_norm": 0.4286300539970398, "learning_rate": 2.1033414421277924e-05, "loss": 1.7415, "step": 6707 }, { "epoch": 0.6967902773449672, "grad_norm": 0.46014636754989624, "learning_rate": 2.1020116459078887e-05, "loss": 1.5627, "step": 6708 }, { "epoch": 0.6968941518645476, "grad_norm": 0.42904940247535706, "learning_rate": 2.1006821583011356e-05, "loss": 1.6067, "step": 6709 }, { "epoch": 0.696998026384128, "grad_norm": 0.39022296667099, "learning_rate": 2.099352979449119e-05, "loss": 1.5778, "step": 6710 }, { "epoch": 0.6971019009037083, "grad_norm": 0.4107384979724884, "learning_rate": 2.0980241094933845e-05, "loss": 1.685, "step": 6711 }, { "epoch": 0.6972057754232887, "grad_norm": 0.42475056648254395, "learning_rate": 2.0966955485754457e-05, "loss": 1.2405, "step": 6712 }, { "epoch": 0.697309649942869, "grad_norm": 0.4216676950454712, "learning_rate": 2.0953672968367844e-05, "loss": 1.4806, "step": 6713 }, { "epoch": 0.6974135244624493, "grad_norm": 0.37157952785491943, "learning_rate": 2.094039354418849e-05, "loss": 1.4771, "step": 6714 }, { "epoch": 0.6975173989820297, "grad_norm": 0.44007498025894165, "learning_rate": 2.0927117214630547e-05, "loss": 1.8148, "step": 6715 }, { "epoch": 0.6976212735016101, "grad_norm": 0.4631294012069702, "learning_rate": 2.0913843981107835e-05, "loss": 1.7156, "step": 6716 }, { "epoch": 0.6977251480211905, "grad_norm": 0.42013266682624817, "learning_rate": 2.0900573845033865e-05, "loss": 1.6558, "step": 6717 }, { "epoch": 0.6978290225407707, "grad_norm": 0.39242061972618103, "learning_rate": 2.0887306807821822e-05, "loss": 1.5849, "step": 6718 }, { "epoch": 0.6979328970603511, "grad_norm": 0.41832900047302246, "learning_rate": 2.0874042870884475e-05, "loss": 1.5836, "step": 6719 }, { "epoch": 0.6980367715799315, "grad_norm": 0.44263386726379395, "learning_rate": 2.086078203563439e-05, "loss": 1.7748, "step": 6720 }, { "epoch": 0.6981406460995118, "grad_norm": 0.4205859303474426, "learning_rate": 2.0847524303483728e-05, "loss": 1.7206, "step": 6721 }, { "epoch": 0.6982445206190921, "grad_norm": 0.422676146030426, "learning_rate": 2.0834269675844326e-05, "loss": 1.7428, "step": 6722 }, { "epoch": 0.6983483951386725, "grad_norm": 0.42799460887908936, "learning_rate": 2.0821018154127686e-05, "loss": 1.7212, "step": 6723 }, { "epoch": 0.6984522696582528, "grad_norm": 0.37061259150505066, "learning_rate": 2.0807769739745043e-05, "loss": 1.326, "step": 6724 }, { "epoch": 0.6985561441778332, "grad_norm": 0.48382559418678284, "learning_rate": 2.0794524434107192e-05, "loss": 1.6675, "step": 6725 }, { "epoch": 0.6986600186974136, "grad_norm": 0.4368096888065338, "learning_rate": 2.0781282238624666e-05, "loss": 1.6125, "step": 6726 }, { "epoch": 0.6987638932169938, "grad_norm": 0.424841970205307, "learning_rate": 2.07680431547077e-05, "loss": 1.567, "step": 6727 }, { "epoch": 0.6988677677365742, "grad_norm": 0.40765881538391113, "learning_rate": 2.0754807183766084e-05, "loss": 1.6608, "step": 6728 }, { "epoch": 0.6989716422561546, "grad_norm": 0.45474496483802795, "learning_rate": 2.07415743272094e-05, "loss": 1.672, "step": 6729 }, { "epoch": 0.6990755167757349, "grad_norm": 0.3880652189254761, "learning_rate": 2.0728344586446825e-05, "loss": 1.6127, "step": 6730 }, { "epoch": 0.6991793912953153, "grad_norm": 0.40850159525871277, "learning_rate": 2.0715117962887224e-05, "loss": 1.604, "step": 6731 }, { "epoch": 0.6992832658148956, "grad_norm": 0.44652217626571655, "learning_rate": 2.070189445793913e-05, "loss": 1.6413, "step": 6732 }, { "epoch": 0.6993871403344759, "grad_norm": 0.42982134222984314, "learning_rate": 2.068867407301074e-05, "loss": 1.6768, "step": 6733 }, { "epoch": 0.6994910148540563, "grad_norm": 0.40679651498794556, "learning_rate": 2.0675456809509925e-05, "loss": 1.6151, "step": 6734 }, { "epoch": 0.6995948893736367, "grad_norm": 0.427057683467865, "learning_rate": 2.06622426688442e-05, "loss": 1.6619, "step": 6735 }, { "epoch": 0.699698763893217, "grad_norm": 0.4573904275894165, "learning_rate": 2.064903165242083e-05, "loss": 1.7536, "step": 6736 }, { "epoch": 0.6998026384127973, "grad_norm": 0.4206405282020569, "learning_rate": 2.06358237616466e-05, "loss": 1.8067, "step": 6737 }, { "epoch": 0.6999065129323777, "grad_norm": 0.4055739641189575, "learning_rate": 2.062261899792811e-05, "loss": 1.6172, "step": 6738 }, { "epoch": 0.700010387451958, "grad_norm": 0.45139777660369873, "learning_rate": 2.0609417362671545e-05, "loss": 1.7188, "step": 6739 }, { "epoch": 0.7001142619715384, "grad_norm": 0.4384258985519409, "learning_rate": 2.0596218857282778e-05, "loss": 1.5628, "step": 6740 }, { "epoch": 0.7002181364911187, "grad_norm": 0.39930063486099243, "learning_rate": 2.058302348316734e-05, "loss": 1.6419, "step": 6741 }, { "epoch": 0.7003220110106991, "grad_norm": 0.40519270300865173, "learning_rate": 2.0569831241730425e-05, "loss": 1.5731, "step": 6742 }, { "epoch": 0.7004258855302794, "grad_norm": 0.46715521812438965, "learning_rate": 2.0556642134376953e-05, "loss": 1.6441, "step": 6743 }, { "epoch": 0.7005297600498598, "grad_norm": 0.4366108179092407, "learning_rate": 2.0543456162511386e-05, "loss": 1.7277, "step": 6744 }, { "epoch": 0.7006336345694402, "grad_norm": 0.3887299597263336, "learning_rate": 2.053027332753799e-05, "loss": 1.4717, "step": 6745 }, { "epoch": 0.7007375090890204, "grad_norm": 0.4008408486843109, "learning_rate": 2.0517093630860633e-05, "loss": 1.619, "step": 6746 }, { "epoch": 0.7008413836086008, "grad_norm": 0.43216922879219055, "learning_rate": 2.050391707388279e-05, "loss": 1.5183, "step": 6747 }, { "epoch": 0.7009452581281812, "grad_norm": 0.4278073310852051, "learning_rate": 2.049074365800772e-05, "loss": 1.7367, "step": 6748 }, { "epoch": 0.7010491326477615, "grad_norm": 0.4540071487426758, "learning_rate": 2.0477573384638266e-05, "loss": 1.8808, "step": 6749 }, { "epoch": 0.7011530071673419, "grad_norm": 0.4321799874305725, "learning_rate": 2.0464406255176966e-05, "loss": 1.6206, "step": 6750 }, { "epoch": 0.7012568816869222, "grad_norm": 0.39823484420776367, "learning_rate": 2.045124227102601e-05, "loss": 1.6493, "step": 6751 }, { "epoch": 0.7013607562065025, "grad_norm": 0.3932490050792694, "learning_rate": 2.043808143358727e-05, "loss": 1.6487, "step": 6752 }, { "epoch": 0.7014646307260829, "grad_norm": 0.45285892486572266, "learning_rate": 2.0424923744262258e-05, "loss": 1.7022, "step": 6753 }, { "epoch": 0.7015685052456633, "grad_norm": 0.41068267822265625, "learning_rate": 2.041176920445216e-05, "loss": 1.7152, "step": 6754 }, { "epoch": 0.7016723797652435, "grad_norm": 0.4176632761955261, "learning_rate": 2.0398617815557875e-05, "loss": 1.5213, "step": 6755 }, { "epoch": 0.7017762542848239, "grad_norm": 0.3959343433380127, "learning_rate": 2.038546957897986e-05, "loss": 1.4975, "step": 6756 }, { "epoch": 0.7018801288044043, "grad_norm": 0.4066161513328552, "learning_rate": 2.037232449611835e-05, "loss": 1.5669, "step": 6757 }, { "epoch": 0.7019840033239846, "grad_norm": 0.4088003635406494, "learning_rate": 2.0359182568373176e-05, "loss": 1.7064, "step": 6758 }, { "epoch": 0.702087877843565, "grad_norm": 0.4266449213027954, "learning_rate": 2.0346043797143854e-05, "loss": 1.5934, "step": 6759 }, { "epoch": 0.7021917523631453, "grad_norm": 0.418796181678772, "learning_rate": 2.0332908183829556e-05, "loss": 1.6623, "step": 6760 }, { "epoch": 0.7022956268827256, "grad_norm": 0.44862931966781616, "learning_rate": 2.031977572982912e-05, "loss": 1.8849, "step": 6761 }, { "epoch": 0.702399501402306, "grad_norm": 0.44697731733322144, "learning_rate": 2.030664643654106e-05, "loss": 1.7985, "step": 6762 }, { "epoch": 0.7025033759218864, "grad_norm": 0.38466355204582214, "learning_rate": 2.0293520305363522e-05, "loss": 1.5767, "step": 6763 }, { "epoch": 0.7026072504414667, "grad_norm": 0.40668144822120667, "learning_rate": 2.028039733769438e-05, "loss": 1.7073, "step": 6764 }, { "epoch": 0.702711124961047, "grad_norm": 0.42051973938941956, "learning_rate": 2.0267277534931066e-05, "loss": 1.3946, "step": 6765 }, { "epoch": 0.7028149994806274, "grad_norm": 0.42236143350601196, "learning_rate": 2.0254160898470792e-05, "loss": 1.7273, "step": 6766 }, { "epoch": 0.7029188740002077, "grad_norm": 0.4152265191078186, "learning_rate": 2.024104742971035e-05, "loss": 1.4529, "step": 6767 }, { "epoch": 0.7030227485197881, "grad_norm": 0.37839797139167786, "learning_rate": 2.0227937130046237e-05, "loss": 1.4317, "step": 6768 }, { "epoch": 0.7031266230393685, "grad_norm": 0.4131396412849426, "learning_rate": 2.0214830000874586e-05, "loss": 1.659, "step": 6769 }, { "epoch": 0.7032304975589488, "grad_norm": 0.4527067244052887, "learning_rate": 2.0201726043591186e-05, "loss": 1.7416, "step": 6770 }, { "epoch": 0.7033343720785291, "grad_norm": 0.40972375869750977, "learning_rate": 2.018862525959156e-05, "loss": 1.6191, "step": 6771 }, { "epoch": 0.7034382465981095, "grad_norm": 0.38629379868507385, "learning_rate": 2.017552765027077e-05, "loss": 1.6096, "step": 6772 }, { "epoch": 0.7035421211176899, "grad_norm": 0.39593833684921265, "learning_rate": 2.0162433217023667e-05, "loss": 1.7234, "step": 6773 }, { "epoch": 0.7036459956372701, "grad_norm": 0.3909621834754944, "learning_rate": 2.0149341961244704e-05, "loss": 1.5358, "step": 6774 }, { "epoch": 0.7037498701568505, "grad_norm": 0.4053054749965668, "learning_rate": 2.0136253884327937e-05, "loss": 1.592, "step": 6775 }, { "epoch": 0.7038537446764309, "grad_norm": 0.4064863622188568, "learning_rate": 2.0123168987667206e-05, "loss": 1.6287, "step": 6776 }, { "epoch": 0.7039576191960112, "grad_norm": 0.422603040933609, "learning_rate": 2.011008727265593e-05, "loss": 1.6935, "step": 6777 }, { "epoch": 0.7040614937155916, "grad_norm": 0.418403297662735, "learning_rate": 2.009700874068721e-05, "loss": 1.8375, "step": 6778 }, { "epoch": 0.704165368235172, "grad_norm": 0.4110656678676605, "learning_rate": 2.0083933393153808e-05, "loss": 1.6755, "step": 6779 }, { "epoch": 0.7042692427547522, "grad_norm": 0.47258806228637695, "learning_rate": 2.007086123144814e-05, "loss": 1.6924, "step": 6780 }, { "epoch": 0.7043731172743326, "grad_norm": 0.4690280854701996, "learning_rate": 2.00577922569623e-05, "loss": 1.8002, "step": 6781 }, { "epoch": 0.704476991793913, "grad_norm": 0.40814927220344543, "learning_rate": 2.0044726471088005e-05, "loss": 1.6296, "step": 6782 }, { "epoch": 0.7045808663134933, "grad_norm": 0.4353756606578827, "learning_rate": 2.0031663875216717e-05, "loss": 1.7662, "step": 6783 }, { "epoch": 0.7046847408330736, "grad_norm": 0.41065043210983276, "learning_rate": 2.0018604470739434e-05, "loss": 1.5681, "step": 6784 }, { "epoch": 0.704788615352654, "grad_norm": 0.4216291308403015, "learning_rate": 2.000554825904692e-05, "loss": 1.6114, "step": 6785 }, { "epoch": 0.7048924898722343, "grad_norm": 0.43526455760002136, "learning_rate": 1.9992495241529556e-05, "loss": 1.6702, "step": 6786 }, { "epoch": 0.7049963643918147, "grad_norm": 0.43194445967674255, "learning_rate": 1.997944541957738e-05, "loss": 1.7412, "step": 6787 }, { "epoch": 0.7051002389113951, "grad_norm": 0.4365564286708832, "learning_rate": 1.99663987945801e-05, "loss": 1.7816, "step": 6788 }, { "epoch": 0.7052041134309753, "grad_norm": 0.4253787696361542, "learning_rate": 1.9953355367927075e-05, "loss": 1.6678, "step": 6789 }, { "epoch": 0.7053079879505557, "grad_norm": 0.43567463755607605, "learning_rate": 1.9940315141007327e-05, "loss": 1.6948, "step": 6790 }, { "epoch": 0.7054118624701361, "grad_norm": 0.41654548048973083, "learning_rate": 1.992727811520953e-05, "loss": 1.6762, "step": 6791 }, { "epoch": 0.7055157369897164, "grad_norm": 0.3824082612991333, "learning_rate": 1.991424429192207e-05, "loss": 1.495, "step": 6792 }, { "epoch": 0.7056196115092968, "grad_norm": 0.4068382978439331, "learning_rate": 1.9901213672532875e-05, "loss": 1.6277, "step": 6793 }, { "epoch": 0.7057234860288771, "grad_norm": 0.3962841033935547, "learning_rate": 1.988818625842967e-05, "loss": 1.6252, "step": 6794 }, { "epoch": 0.7058273605484575, "grad_norm": 0.4224420487880707, "learning_rate": 1.9875162050999747e-05, "loss": 1.5978, "step": 6795 }, { "epoch": 0.7059312350680378, "grad_norm": 0.38469555974006653, "learning_rate": 1.986214105163008e-05, "loss": 1.521, "step": 6796 }, { "epoch": 0.7060351095876182, "grad_norm": 0.43312588334083557, "learning_rate": 1.9849123261707305e-05, "loss": 1.7809, "step": 6797 }, { "epoch": 0.7061389841071986, "grad_norm": 0.42561838030815125, "learning_rate": 1.98361086826177e-05, "loss": 1.6883, "step": 6798 }, { "epoch": 0.7062428586267788, "grad_norm": 0.4514966309070587, "learning_rate": 1.9823097315747267e-05, "loss": 1.6939, "step": 6799 }, { "epoch": 0.7063467331463592, "grad_norm": 0.4347332715988159, "learning_rate": 1.9810089162481548e-05, "loss": 1.6935, "step": 6800 }, { "epoch": 0.7064506076659396, "grad_norm": 0.45488476753234863, "learning_rate": 1.9797084224205857e-05, "loss": 1.8802, "step": 6801 }, { "epoch": 0.7065544821855199, "grad_norm": 0.44390448927879333, "learning_rate": 1.978408250230513e-05, "loss": 1.7329, "step": 6802 }, { "epoch": 0.7066583567051002, "grad_norm": 0.39649200439453125, "learning_rate": 1.977108399816388e-05, "loss": 1.5892, "step": 6803 }, { "epoch": 0.7067622312246806, "grad_norm": 0.3859086036682129, "learning_rate": 1.975808871316641e-05, "loss": 1.5868, "step": 6804 }, { "epoch": 0.7068661057442609, "grad_norm": 0.48838183283805847, "learning_rate": 1.97450966486966e-05, "loss": 1.7898, "step": 6805 }, { "epoch": 0.7069699802638413, "grad_norm": 0.4066251218318939, "learning_rate": 1.9732107806137996e-05, "loss": 1.662, "step": 6806 }, { "epoch": 0.7070738547834217, "grad_norm": 0.4213266968727112, "learning_rate": 1.971912218687381e-05, "loss": 1.5625, "step": 6807 }, { "epoch": 0.7071777293030019, "grad_norm": 0.38811829686164856, "learning_rate": 1.9706139792286915e-05, "loss": 1.4567, "step": 6808 }, { "epoch": 0.7072816038225823, "grad_norm": 0.42099571228027344, "learning_rate": 1.9693160623759826e-05, "loss": 1.6887, "step": 6809 }, { "epoch": 0.7073854783421627, "grad_norm": 0.45879611372947693, "learning_rate": 1.968018468267472e-05, "loss": 1.7088, "step": 6810 }, { "epoch": 0.707489352861743, "grad_norm": 0.3958274722099304, "learning_rate": 1.9667211970413468e-05, "loss": 1.5636, "step": 6811 }, { "epoch": 0.7075932273813234, "grad_norm": 0.4116361141204834, "learning_rate": 1.96542424883575e-05, "loss": 1.5771, "step": 6812 }, { "epoch": 0.7076971019009037, "grad_norm": 0.42354995012283325, "learning_rate": 1.9641276237888023e-05, "loss": 1.8045, "step": 6813 }, { "epoch": 0.707800976420484, "grad_norm": 0.4337577223777771, "learning_rate": 1.9628313220385818e-05, "loss": 1.7703, "step": 6814 }, { "epoch": 0.7079048509400644, "grad_norm": 0.4336363673210144, "learning_rate": 1.961535343723135e-05, "loss": 1.4605, "step": 6815 }, { "epoch": 0.7080087254596448, "grad_norm": 0.41159120202064514, "learning_rate": 1.960239688980473e-05, "loss": 1.6786, "step": 6816 }, { "epoch": 0.708112599979225, "grad_norm": 0.4380592405796051, "learning_rate": 1.958944357948573e-05, "loss": 1.5045, "step": 6817 }, { "epoch": 0.7082164744988054, "grad_norm": 0.3844902515411377, "learning_rate": 1.9576493507653776e-05, "loss": 1.653, "step": 6818 }, { "epoch": 0.7083203490183858, "grad_norm": 0.41153958439826965, "learning_rate": 1.9563546675687932e-05, "loss": 1.7776, "step": 6819 }, { "epoch": 0.7084242235379662, "grad_norm": 0.39045795798301697, "learning_rate": 1.9550603084966973e-05, "loss": 1.5983, "step": 6820 }, { "epoch": 0.7085280980575465, "grad_norm": 0.4701341688632965, "learning_rate": 1.9537662736869272e-05, "loss": 1.7198, "step": 6821 }, { "epoch": 0.7086319725771268, "grad_norm": 0.4244726896286011, "learning_rate": 1.9524725632772878e-05, "loss": 1.6711, "step": 6822 }, { "epoch": 0.7087358470967072, "grad_norm": 0.44290024042129517, "learning_rate": 1.9511791774055482e-05, "loss": 1.8374, "step": 6823 }, { "epoch": 0.7088397216162875, "grad_norm": 0.4390253722667694, "learning_rate": 1.9498861162094446e-05, "loss": 1.6595, "step": 6824 }, { "epoch": 0.7089435961358679, "grad_norm": 0.3866863548755646, "learning_rate": 1.948593379826678e-05, "loss": 1.5316, "step": 6825 }, { "epoch": 0.7090474706554483, "grad_norm": 0.4000120162963867, "learning_rate": 1.947300968394913e-05, "loss": 1.5832, "step": 6826 }, { "epoch": 0.7091513451750285, "grad_norm": 0.4002024531364441, "learning_rate": 1.946008882051786e-05, "loss": 1.6259, "step": 6827 }, { "epoch": 0.7092552196946089, "grad_norm": 0.43123364448547363, "learning_rate": 1.9447171209348875e-05, "loss": 1.6728, "step": 6828 }, { "epoch": 0.7093590942141893, "grad_norm": 0.39023008942604065, "learning_rate": 1.943425685181785e-05, "loss": 1.5225, "step": 6829 }, { "epoch": 0.7094629687337696, "grad_norm": 0.40491533279418945, "learning_rate": 1.9421345749300073e-05, "loss": 1.6512, "step": 6830 }, { "epoch": 0.70956684325335, "grad_norm": 0.4594792127609253, "learning_rate": 1.940843790317041e-05, "loss": 1.6372, "step": 6831 }, { "epoch": 0.7096707177729303, "grad_norm": 0.4281672537326813, "learning_rate": 1.939553331480351e-05, "loss": 1.5949, "step": 6832 }, { "epoch": 0.7097745922925106, "grad_norm": 0.4785386025905609, "learning_rate": 1.9382631985573586e-05, "loss": 1.682, "step": 6833 }, { "epoch": 0.709878466812091, "grad_norm": 0.42151594161987305, "learning_rate": 1.9369733916854526e-05, "loss": 1.5583, "step": 6834 }, { "epoch": 0.7099823413316714, "grad_norm": 0.4763428866863251, "learning_rate": 1.935683911001988e-05, "loss": 1.6265, "step": 6835 }, { "epoch": 0.7100862158512516, "grad_norm": 0.4347456693649292, "learning_rate": 1.934394756644285e-05, "loss": 1.7359, "step": 6836 }, { "epoch": 0.710190090370832, "grad_norm": 0.40570497512817383, "learning_rate": 1.9331059287496274e-05, "loss": 1.6238, "step": 6837 }, { "epoch": 0.7102939648904124, "grad_norm": 0.41450199484825134, "learning_rate": 1.9318174274552638e-05, "loss": 1.7365, "step": 6838 }, { "epoch": 0.7103978394099927, "grad_norm": 0.44917452335357666, "learning_rate": 1.930529252898415e-05, "loss": 1.7917, "step": 6839 }, { "epoch": 0.7105017139295731, "grad_norm": 0.4153517186641693, "learning_rate": 1.929241405216254e-05, "loss": 1.7213, "step": 6840 }, { "epoch": 0.7106055884491534, "grad_norm": 0.4131045639514923, "learning_rate": 1.9279538845459327e-05, "loss": 1.5519, "step": 6841 }, { "epoch": 0.7107094629687337, "grad_norm": 0.38155892491340637, "learning_rate": 1.92666669102456e-05, "loss": 1.505, "step": 6842 }, { "epoch": 0.7108133374883141, "grad_norm": 0.45029881596565247, "learning_rate": 1.9253798247892113e-05, "loss": 1.678, "step": 6843 }, { "epoch": 0.7109172120078945, "grad_norm": 0.4427982568740845, "learning_rate": 1.924093285976929e-05, "loss": 1.8031, "step": 6844 }, { "epoch": 0.7110210865274748, "grad_norm": 0.42872363328933716, "learning_rate": 1.9228070747247185e-05, "loss": 1.7211, "step": 6845 }, { "epoch": 0.7111249610470551, "grad_norm": 0.42956963181495667, "learning_rate": 1.9215211911695512e-05, "loss": 1.7405, "step": 6846 }, { "epoch": 0.7112288355666355, "grad_norm": 0.39396312832832336, "learning_rate": 1.9202356354483624e-05, "loss": 1.6972, "step": 6847 }, { "epoch": 0.7113327100862159, "grad_norm": 0.41904011368751526, "learning_rate": 1.9189504076980567e-05, "loss": 1.7432, "step": 6848 }, { "epoch": 0.7114365846057962, "grad_norm": 0.37891194224357605, "learning_rate": 1.9176655080555e-05, "loss": 1.6582, "step": 6849 }, { "epoch": 0.7115404591253766, "grad_norm": 0.4023973345756531, "learning_rate": 1.9163809366575235e-05, "loss": 1.6672, "step": 6850 }, { "epoch": 0.7116443336449569, "grad_norm": 0.44481852650642395, "learning_rate": 1.9150966936409242e-05, "loss": 1.7137, "step": 6851 }, { "epoch": 0.7117482081645372, "grad_norm": 0.39180970191955566, "learning_rate": 1.9138127791424637e-05, "loss": 1.5847, "step": 6852 }, { "epoch": 0.7118520826841176, "grad_norm": 0.4336892068386078, "learning_rate": 1.912529193298869e-05, "loss": 1.7037, "step": 6853 }, { "epoch": 0.711955957203698, "grad_norm": 0.3839171230792999, "learning_rate": 1.91124593624683e-05, "loss": 1.5538, "step": 6854 }, { "epoch": 0.7120598317232782, "grad_norm": 0.40723204612731934, "learning_rate": 1.9099630081230095e-05, "loss": 1.4787, "step": 6855 }, { "epoch": 0.7121637062428586, "grad_norm": 0.449480801820755, "learning_rate": 1.908680409064022e-05, "loss": 1.831, "step": 6856 }, { "epoch": 0.712267580762439, "grad_norm": 0.5175827145576477, "learning_rate": 1.9073981392064587e-05, "loss": 1.9043, "step": 6857 }, { "epoch": 0.7123714552820193, "grad_norm": 0.41189754009246826, "learning_rate": 1.9061161986868732e-05, "loss": 1.7122, "step": 6858 }, { "epoch": 0.7124753298015997, "grad_norm": 0.43195226788520813, "learning_rate": 1.9048345876417752e-05, "loss": 1.6308, "step": 6859 }, { "epoch": 0.71257920432118, "grad_norm": 0.41276681423187256, "learning_rate": 1.9035533062076522e-05, "loss": 1.658, "step": 6860 }, { "epoch": 0.7126830788407603, "grad_norm": 0.4124269485473633, "learning_rate": 1.9022723545209487e-05, "loss": 1.5886, "step": 6861 }, { "epoch": 0.7127869533603407, "grad_norm": 0.4499133825302124, "learning_rate": 1.9009917327180766e-05, "loss": 1.5387, "step": 6862 }, { "epoch": 0.7128908278799211, "grad_norm": 0.41163170337677, "learning_rate": 1.8997114409354116e-05, "loss": 1.7033, "step": 6863 }, { "epoch": 0.7129947023995014, "grad_norm": 0.4106546640396118, "learning_rate": 1.898431479309295e-05, "loss": 1.6542, "step": 6864 }, { "epoch": 0.7130985769190817, "grad_norm": 0.46902644634246826, "learning_rate": 1.8971518479760324e-05, "loss": 1.6021, "step": 6865 }, { "epoch": 0.7132024514386621, "grad_norm": 0.4623051881790161, "learning_rate": 1.8958725470718937e-05, "loss": 1.9381, "step": 6866 }, { "epoch": 0.7133063259582424, "grad_norm": 0.43215009570121765, "learning_rate": 1.8945935767331186e-05, "loss": 1.6968, "step": 6867 }, { "epoch": 0.7134102004778228, "grad_norm": 0.4356382191181183, "learning_rate": 1.893314937095901e-05, "loss": 1.6144, "step": 6868 }, { "epoch": 0.7135140749974032, "grad_norm": 0.456093966960907, "learning_rate": 1.8920366282964115e-05, "loss": 1.723, "step": 6869 }, { "epoch": 0.7136179495169834, "grad_norm": 0.5878288149833679, "learning_rate": 1.8907586504707773e-05, "loss": 1.5267, "step": 6870 }, { "epoch": 0.7137218240365638, "grad_norm": 0.3891201913356781, "learning_rate": 1.8894810037550947e-05, "loss": 1.53, "step": 6871 }, { "epoch": 0.7138256985561442, "grad_norm": 0.3847010135650635, "learning_rate": 1.888203688285422e-05, "loss": 1.6174, "step": 6872 }, { "epoch": 0.7139295730757246, "grad_norm": 0.4661828577518463, "learning_rate": 1.8869267041977817e-05, "loss": 1.7112, "step": 6873 }, { "epoch": 0.7140334475953048, "grad_norm": 0.4062906503677368, "learning_rate": 1.8856500516281684e-05, "loss": 1.6291, "step": 6874 }, { "epoch": 0.7141373221148852, "grad_norm": 0.5327140688896179, "learning_rate": 1.8843737307125275e-05, "loss": 1.4871, "step": 6875 }, { "epoch": 0.7142411966344656, "grad_norm": 0.4605855643749237, "learning_rate": 1.883097741586784e-05, "loss": 1.4438, "step": 6876 }, { "epoch": 0.7143450711540459, "grad_norm": 0.4348846673965454, "learning_rate": 1.8818220843868178e-05, "loss": 1.6393, "step": 6877 }, { "epoch": 0.7144489456736263, "grad_norm": 0.41968026757240295, "learning_rate": 1.8805467592484772e-05, "loss": 1.7655, "step": 6878 }, { "epoch": 0.7145528201932067, "grad_norm": 0.47305288910865784, "learning_rate": 1.8792717663075738e-05, "loss": 1.9356, "step": 6879 }, { "epoch": 0.7146566947127869, "grad_norm": 0.4094095826148987, "learning_rate": 1.8779971056998846e-05, "loss": 1.5515, "step": 6880 }, { "epoch": 0.7147605692323673, "grad_norm": 0.41150006651878357, "learning_rate": 1.876722777561151e-05, "loss": 1.7381, "step": 6881 }, { "epoch": 0.7148644437519477, "grad_norm": 0.405069500207901, "learning_rate": 1.8754487820270778e-05, "loss": 1.5871, "step": 6882 }, { "epoch": 0.714968318271528, "grad_norm": 0.4334279000759125, "learning_rate": 1.87417511923334e-05, "loss": 1.7346, "step": 6883 }, { "epoch": 0.7150721927911083, "grad_norm": 0.4156447947025299, "learning_rate": 1.872901789315566e-05, "loss": 1.7387, "step": 6884 }, { "epoch": 0.7151760673106887, "grad_norm": 0.4099999666213989, "learning_rate": 1.8716287924093613e-05, "loss": 1.6588, "step": 6885 }, { "epoch": 0.715279941830269, "grad_norm": 0.39068254828453064, "learning_rate": 1.8703561286502896e-05, "loss": 1.5263, "step": 6886 }, { "epoch": 0.7153838163498494, "grad_norm": 0.4342229664325714, "learning_rate": 1.8690837981738747e-05, "loss": 1.6721, "step": 6887 }, { "epoch": 0.7154876908694298, "grad_norm": 0.40513840317726135, "learning_rate": 1.867811801115614e-05, "loss": 1.6021, "step": 6888 }, { "epoch": 0.71559156538901, "grad_norm": 0.3993147313594818, "learning_rate": 1.8665401376109654e-05, "loss": 1.7418, "step": 6889 }, { "epoch": 0.7156954399085904, "grad_norm": 0.4815475046634674, "learning_rate": 1.86526880779535e-05, "loss": 1.9432, "step": 6890 }, { "epoch": 0.7157993144281708, "grad_norm": 0.4066917896270752, "learning_rate": 1.863997811804155e-05, "loss": 1.4421, "step": 6891 }, { "epoch": 0.7159031889477511, "grad_norm": 0.4122363030910492, "learning_rate": 1.862727149772731e-05, "loss": 1.6838, "step": 6892 }, { "epoch": 0.7160070634673315, "grad_norm": 0.41208595037460327, "learning_rate": 1.8614568218363937e-05, "loss": 1.6661, "step": 6893 }, { "epoch": 0.7161109379869118, "grad_norm": 0.4167573153972626, "learning_rate": 1.860186828130422e-05, "loss": 1.7035, "step": 6894 }, { "epoch": 0.7162148125064921, "grad_norm": 0.40936407446861267, "learning_rate": 1.8589171687900646e-05, "loss": 1.5622, "step": 6895 }, { "epoch": 0.7163186870260725, "grad_norm": 0.400158554315567, "learning_rate": 1.8576478439505248e-05, "loss": 1.6196, "step": 6896 }, { "epoch": 0.7164225615456529, "grad_norm": 0.42571789026260376, "learning_rate": 1.856378853746979e-05, "loss": 1.7369, "step": 6897 }, { "epoch": 0.7165264360652333, "grad_norm": 0.395346462726593, "learning_rate": 1.855110198314565e-05, "loss": 1.7267, "step": 6898 }, { "epoch": 0.7166303105848135, "grad_norm": 0.43957024812698364, "learning_rate": 1.8538418777883825e-05, "loss": 1.7538, "step": 6899 }, { "epoch": 0.7167341851043939, "grad_norm": 0.41253888607025146, "learning_rate": 1.8525738923035003e-05, "loss": 1.5747, "step": 6900 }, { "epoch": 0.7168380596239743, "grad_norm": 0.4174502193927765, "learning_rate": 1.8513062419949452e-05, "loss": 1.8062, "step": 6901 }, { "epoch": 0.7169419341435546, "grad_norm": 0.42370733618736267, "learning_rate": 1.8500389269977186e-05, "loss": 1.668, "step": 6902 }, { "epoch": 0.7170458086631349, "grad_norm": 0.41417741775512695, "learning_rate": 1.848771947446772e-05, "loss": 1.6166, "step": 6903 }, { "epoch": 0.7171496831827153, "grad_norm": 0.4121987819671631, "learning_rate": 1.847505303477034e-05, "loss": 1.5487, "step": 6904 }, { "epoch": 0.7172535577022956, "grad_norm": 0.39274632930755615, "learning_rate": 1.8462389952233905e-05, "loss": 1.5665, "step": 6905 }, { "epoch": 0.717357432221876, "grad_norm": 0.41530582308769226, "learning_rate": 1.8449730228206945e-05, "loss": 1.4904, "step": 6906 }, { "epoch": 0.7174613067414564, "grad_norm": 0.4424355626106262, "learning_rate": 1.8437073864037608e-05, "loss": 1.6915, "step": 6907 }, { "epoch": 0.7175651812610366, "grad_norm": 0.41284075379371643, "learning_rate": 1.842442086107371e-05, "loss": 1.6279, "step": 6908 }, { "epoch": 0.717669055780617, "grad_norm": 0.4062071442604065, "learning_rate": 1.8411771220662684e-05, "loss": 1.6648, "step": 6909 }, { "epoch": 0.7177729303001974, "grad_norm": 0.4447495639324188, "learning_rate": 1.8399124944151614e-05, "loss": 1.755, "step": 6910 }, { "epoch": 0.7178768048197777, "grad_norm": 0.4060676395893097, "learning_rate": 1.838648203288727e-05, "loss": 1.7247, "step": 6911 }, { "epoch": 0.717980679339358, "grad_norm": 0.4383039176464081, "learning_rate": 1.8373842488215966e-05, "loss": 1.5317, "step": 6912 }, { "epoch": 0.7180845538589384, "grad_norm": 0.42534106969833374, "learning_rate": 1.836120631148376e-05, "loss": 1.7154, "step": 6913 }, { "epoch": 0.7181884283785187, "grad_norm": 0.4181063175201416, "learning_rate": 1.8348573504036305e-05, "loss": 1.7214, "step": 6914 }, { "epoch": 0.7182923028980991, "grad_norm": 0.40229707956314087, "learning_rate": 1.8335944067218846e-05, "loss": 1.652, "step": 6915 }, { "epoch": 0.7183961774176795, "grad_norm": 0.4175788462162018, "learning_rate": 1.8323318002376372e-05, "loss": 1.6764, "step": 6916 }, { "epoch": 0.7185000519372597, "grad_norm": 0.4145587682723999, "learning_rate": 1.831069531085344e-05, "loss": 1.7295, "step": 6917 }, { "epoch": 0.7186039264568401, "grad_norm": 0.4142177700996399, "learning_rate": 1.829807599399428e-05, "loss": 1.5781, "step": 6918 }, { "epoch": 0.7187078009764205, "grad_norm": 0.4172203540802002, "learning_rate": 1.828546005314274e-05, "loss": 1.6024, "step": 6919 }, { "epoch": 0.7188116754960008, "grad_norm": 0.42822572588920593, "learning_rate": 1.8272847489642314e-05, "loss": 1.7269, "step": 6920 }, { "epoch": 0.7189155500155812, "grad_norm": 0.4436422288417816, "learning_rate": 1.826023830483616e-05, "loss": 1.6872, "step": 6921 }, { "epoch": 0.7190194245351615, "grad_norm": 0.39383989572525024, "learning_rate": 1.8247632500067017e-05, "loss": 1.3766, "step": 6922 }, { "epoch": 0.7191232990547419, "grad_norm": 0.4555588364601135, "learning_rate": 1.8235030076677362e-05, "loss": 1.7151, "step": 6923 }, { "epoch": 0.7192271735743222, "grad_norm": 0.42651811242103577, "learning_rate": 1.8222431036009226e-05, "loss": 1.6841, "step": 6924 }, { "epoch": 0.7193310480939026, "grad_norm": 0.4434896409511566, "learning_rate": 1.8209835379404304e-05, "loss": 1.5951, "step": 6925 }, { "epoch": 0.719434922613483, "grad_norm": 0.38876456022262573, "learning_rate": 1.819724310820395e-05, "loss": 1.6513, "step": 6926 }, { "epoch": 0.7195387971330632, "grad_norm": 0.4082390069961548, "learning_rate": 1.8184654223749126e-05, "loss": 1.63, "step": 6927 }, { "epoch": 0.7196426716526436, "grad_norm": 0.5180742740631104, "learning_rate": 1.8172068727380464e-05, "loss": 1.7954, "step": 6928 }, { "epoch": 0.719746546172224, "grad_norm": 0.43790149688720703, "learning_rate": 1.8159486620438193e-05, "loss": 1.7982, "step": 6929 }, { "epoch": 0.7198504206918043, "grad_norm": 0.4642382562160492, "learning_rate": 1.8146907904262267e-05, "loss": 1.8414, "step": 6930 }, { "epoch": 0.7199542952113847, "grad_norm": 0.4199405014514923, "learning_rate": 1.8134332580192152e-05, "loss": 1.8044, "step": 6931 }, { "epoch": 0.720058169730965, "grad_norm": 0.4645751714706421, "learning_rate": 1.8121760649567077e-05, "loss": 1.6786, "step": 6932 }, { "epoch": 0.7201620442505453, "grad_norm": 0.40321746468544006, "learning_rate": 1.8109192113725826e-05, "loss": 1.51, "step": 6933 }, { "epoch": 0.7202659187701257, "grad_norm": 0.44322633743286133, "learning_rate": 1.809662697400686e-05, "loss": 1.5216, "step": 6934 }, { "epoch": 0.7203697932897061, "grad_norm": 0.48086169362068176, "learning_rate": 1.8084065231748265e-05, "loss": 1.7657, "step": 6935 }, { "epoch": 0.7204736678092863, "grad_norm": 0.46815988421440125, "learning_rate": 1.807150688828777e-05, "loss": 1.7116, "step": 6936 }, { "epoch": 0.7205775423288667, "grad_norm": 0.42190316319465637, "learning_rate": 1.8058951944962736e-05, "loss": 1.503, "step": 6937 }, { "epoch": 0.7206814168484471, "grad_norm": 0.4263322949409485, "learning_rate": 1.804640040311016e-05, "loss": 1.6463, "step": 6938 }, { "epoch": 0.7207852913680274, "grad_norm": 0.4055632948875427, "learning_rate": 1.8033852264066718e-05, "loss": 1.4503, "step": 6939 }, { "epoch": 0.7208891658876078, "grad_norm": 0.40299129486083984, "learning_rate": 1.802130752916864e-05, "loss": 1.6289, "step": 6940 }, { "epoch": 0.7209930404071881, "grad_norm": 0.4563215374946594, "learning_rate": 1.800876619975188e-05, "loss": 1.6664, "step": 6941 }, { "epoch": 0.7210969149267684, "grad_norm": 0.4018884599208832, "learning_rate": 1.7996228277151995e-05, "loss": 1.5518, "step": 6942 }, { "epoch": 0.7212007894463488, "grad_norm": 0.4353574514389038, "learning_rate": 1.7983693762704122e-05, "loss": 1.5822, "step": 6943 }, { "epoch": 0.7213046639659292, "grad_norm": 0.4123103618621826, "learning_rate": 1.797116265774315e-05, "loss": 1.6165, "step": 6944 }, { "epoch": 0.7214085384855095, "grad_norm": 0.4055830240249634, "learning_rate": 1.7958634963603516e-05, "loss": 1.5928, "step": 6945 }, { "epoch": 0.7215124130050898, "grad_norm": 0.439450204372406, "learning_rate": 1.7946110681619327e-05, "loss": 1.8438, "step": 6946 }, { "epoch": 0.7216162875246702, "grad_norm": 0.39102089405059814, "learning_rate": 1.7933589813124325e-05, "loss": 1.6103, "step": 6947 }, { "epoch": 0.7217201620442505, "grad_norm": 0.39481091499328613, "learning_rate": 1.7921072359451872e-05, "loss": 1.6833, "step": 6948 }, { "epoch": 0.7218240365638309, "grad_norm": 0.4227730333805084, "learning_rate": 1.7908558321934992e-05, "loss": 1.5623, "step": 6949 }, { "epoch": 0.7219279110834113, "grad_norm": 0.41327497363090515, "learning_rate": 1.789604770190631e-05, "loss": 1.5559, "step": 6950 }, { "epoch": 0.7220317856029916, "grad_norm": 0.44786590337753296, "learning_rate": 1.7883540500698148e-05, "loss": 1.6942, "step": 6951 }, { "epoch": 0.7221356601225719, "grad_norm": 0.40318799018859863, "learning_rate": 1.7871036719642398e-05, "loss": 1.4997, "step": 6952 }, { "epoch": 0.7222395346421523, "grad_norm": 0.43783068656921387, "learning_rate": 1.7858536360070626e-05, "loss": 1.8249, "step": 6953 }, { "epoch": 0.7223434091617327, "grad_norm": 0.4172214865684509, "learning_rate": 1.784603942331402e-05, "loss": 1.6494, "step": 6954 }, { "epoch": 0.722447283681313, "grad_norm": 0.3986718952655792, "learning_rate": 1.7833545910703402e-05, "loss": 1.5129, "step": 6955 }, { "epoch": 0.7225511582008933, "grad_norm": 0.4317796528339386, "learning_rate": 1.7821055823569238e-05, "loss": 1.4516, "step": 6956 }, { "epoch": 0.7226550327204737, "grad_norm": 0.42882880568504333, "learning_rate": 1.7808569163241605e-05, "loss": 1.7454, "step": 6957 }, { "epoch": 0.722758907240054, "grad_norm": 0.4157145917415619, "learning_rate": 1.7796085931050287e-05, "loss": 1.6144, "step": 6958 }, { "epoch": 0.7228627817596344, "grad_norm": 0.4417478144168854, "learning_rate": 1.778360612832458e-05, "loss": 1.6667, "step": 6959 }, { "epoch": 0.7229666562792147, "grad_norm": 0.47706910967826843, "learning_rate": 1.7771129756393547e-05, "loss": 1.7617, "step": 6960 }, { "epoch": 0.723070530798795, "grad_norm": 0.38705945014953613, "learning_rate": 1.77586568165858e-05, "loss": 1.6515, "step": 6961 }, { "epoch": 0.7231744053183754, "grad_norm": 0.42276665568351746, "learning_rate": 1.7746187310229606e-05, "loss": 1.7102, "step": 6962 }, { "epoch": 0.7232782798379558, "grad_norm": 0.44928818941116333, "learning_rate": 1.773372123865287e-05, "loss": 1.8452, "step": 6963 }, { "epoch": 0.7233821543575361, "grad_norm": 0.5049888491630554, "learning_rate": 1.7721258603183144e-05, "loss": 1.6948, "step": 6964 }, { "epoch": 0.7234860288771164, "grad_norm": 0.4933055639266968, "learning_rate": 1.770879940514759e-05, "loss": 1.9625, "step": 6965 }, { "epoch": 0.7235899033966968, "grad_norm": 0.4487113058567047, "learning_rate": 1.7696343645873e-05, "loss": 1.6516, "step": 6966 }, { "epoch": 0.7236937779162771, "grad_norm": 0.4504063129425049, "learning_rate": 1.768389132668587e-05, "loss": 1.7148, "step": 6967 }, { "epoch": 0.7237976524358575, "grad_norm": 0.4117870628833771, "learning_rate": 1.7671442448912202e-05, "loss": 1.457, "step": 6968 }, { "epoch": 0.7239015269554379, "grad_norm": 0.4312496781349182, "learning_rate": 1.7658997013877754e-05, "loss": 1.5079, "step": 6969 }, { "epoch": 0.7240054014750181, "grad_norm": 0.45666730403900146, "learning_rate": 1.764655502290788e-05, "loss": 1.7927, "step": 6970 }, { "epoch": 0.7241092759945985, "grad_norm": 0.41350269317626953, "learning_rate": 1.763411647732749e-05, "loss": 1.6194, "step": 6971 }, { "epoch": 0.7242131505141789, "grad_norm": 0.44214317202568054, "learning_rate": 1.7621681378461245e-05, "loss": 1.5284, "step": 6972 }, { "epoch": 0.7243170250337592, "grad_norm": 0.5224699974060059, "learning_rate": 1.7609249727633382e-05, "loss": 1.3562, "step": 6973 }, { "epoch": 0.7244208995533395, "grad_norm": 0.4053601622581482, "learning_rate": 1.759682152616776e-05, "loss": 1.6075, "step": 6974 }, { "epoch": 0.7245247740729199, "grad_norm": 0.39831477403640747, "learning_rate": 1.758439677538789e-05, "loss": 1.5422, "step": 6975 }, { "epoch": 0.7246286485925003, "grad_norm": 0.4336235225200653, "learning_rate": 1.7571975476616897e-05, "loss": 1.6262, "step": 6976 }, { "epoch": 0.7247325231120806, "grad_norm": 0.4514787495136261, "learning_rate": 1.7559557631177605e-05, "loss": 1.6682, "step": 6977 }, { "epoch": 0.724836397631661, "grad_norm": 0.4214664101600647, "learning_rate": 1.754714324039234e-05, "loss": 1.836, "step": 6978 }, { "epoch": 0.7249402721512413, "grad_norm": 0.4808892607688904, "learning_rate": 1.7534732305583196e-05, "loss": 1.7584, "step": 6979 }, { "epoch": 0.7250441466708216, "grad_norm": 0.44463905692100525, "learning_rate": 1.7522324828071822e-05, "loss": 1.6672, "step": 6980 }, { "epoch": 0.725148021190402, "grad_norm": 0.4203853905200958, "learning_rate": 1.750992080917952e-05, "loss": 1.5338, "step": 6981 }, { "epoch": 0.7252518957099824, "grad_norm": 0.41992905735969543, "learning_rate": 1.7497520250227222e-05, "loss": 1.6543, "step": 6982 }, { "epoch": 0.7253557702295627, "grad_norm": 0.42039117217063904, "learning_rate": 1.7485123152535488e-05, "loss": 1.6052, "step": 6983 }, { "epoch": 0.725459644749143, "grad_norm": 0.4649543762207031, "learning_rate": 1.7472729517424508e-05, "loss": 1.7914, "step": 6984 }, { "epoch": 0.7255635192687234, "grad_norm": 0.39799681305885315, "learning_rate": 1.7460339346214095e-05, "loss": 1.6915, "step": 6985 }, { "epoch": 0.7256673937883037, "grad_norm": 0.4391082227230072, "learning_rate": 1.744795264022376e-05, "loss": 1.7437, "step": 6986 }, { "epoch": 0.7257712683078841, "grad_norm": 0.4607831835746765, "learning_rate": 1.7435569400772516e-05, "loss": 1.8705, "step": 6987 }, { "epoch": 0.7258751428274645, "grad_norm": 0.4046463966369629, "learning_rate": 1.742318962917914e-05, "loss": 1.7026, "step": 6988 }, { "epoch": 0.7259790173470447, "grad_norm": 0.4511662721633911, "learning_rate": 1.7410813326761955e-05, "loss": 1.7673, "step": 6989 }, { "epoch": 0.7260828918666251, "grad_norm": 0.3973452150821686, "learning_rate": 1.7398440494838947e-05, "loss": 1.5597, "step": 6990 }, { "epoch": 0.7261867663862055, "grad_norm": 0.4389094412326813, "learning_rate": 1.7386071134727726e-05, "loss": 1.8227, "step": 6991 }, { "epoch": 0.7262906409057858, "grad_norm": 0.3820962905883789, "learning_rate": 1.7373705247745532e-05, "loss": 1.5315, "step": 6992 }, { "epoch": 0.7263945154253661, "grad_norm": 0.4259055554866791, "learning_rate": 1.7361342835209225e-05, "loss": 1.598, "step": 6993 }, { "epoch": 0.7264983899449465, "grad_norm": 0.4085586965084076, "learning_rate": 1.7348983898435307e-05, "loss": 1.7899, "step": 6994 }, { "epoch": 0.7266022644645268, "grad_norm": 0.4286569654941559, "learning_rate": 1.733662843873995e-05, "loss": 1.7511, "step": 6995 }, { "epoch": 0.7267061389841072, "grad_norm": 0.4160563051700592, "learning_rate": 1.7324276457438843e-05, "loss": 1.741, "step": 6996 }, { "epoch": 0.7268100135036876, "grad_norm": 0.47089239954948425, "learning_rate": 1.7311927955847428e-05, "loss": 1.8191, "step": 6997 }, { "epoch": 0.7269138880232678, "grad_norm": 0.41623279452323914, "learning_rate": 1.729958293528073e-05, "loss": 1.5197, "step": 6998 }, { "epoch": 0.7270177625428482, "grad_norm": 0.41076546907424927, "learning_rate": 1.728724139705334e-05, "loss": 1.6748, "step": 6999 }, { "epoch": 0.7271216370624286, "grad_norm": 0.4390968382358551, "learning_rate": 1.7274903342479586e-05, "loss": 1.6716, "step": 7000 }, { "epoch": 0.727225511582009, "grad_norm": 0.4050026535987854, "learning_rate": 1.726256877287335e-05, "loss": 1.6378, "step": 7001 }, { "epoch": 0.7273293861015893, "grad_norm": 0.41926950216293335, "learning_rate": 1.725023768954821e-05, "loss": 1.5495, "step": 7002 }, { "epoch": 0.7274332606211696, "grad_norm": 0.4020102024078369, "learning_rate": 1.7237910093817278e-05, "loss": 1.5418, "step": 7003 }, { "epoch": 0.72753713514075, "grad_norm": 0.42248985171318054, "learning_rate": 1.7225585986993353e-05, "loss": 1.5632, "step": 7004 }, { "epoch": 0.7276410096603303, "grad_norm": 0.4645000696182251, "learning_rate": 1.7213265370388898e-05, "loss": 1.7221, "step": 7005 }, { "epoch": 0.7277448841799107, "grad_norm": 0.45067256689071655, "learning_rate": 1.7200948245315908e-05, "loss": 1.7604, "step": 7006 }, { "epoch": 0.7278487586994911, "grad_norm": 0.40143445134162903, "learning_rate": 1.7188634613086103e-05, "loss": 1.6143, "step": 7007 }, { "epoch": 0.7279526332190713, "grad_norm": 0.37583884596824646, "learning_rate": 1.717632447501077e-05, "loss": 1.5589, "step": 7008 }, { "epoch": 0.7280565077386517, "grad_norm": 0.4202621877193451, "learning_rate": 1.7164017832400846e-05, "loss": 1.5601, "step": 7009 }, { "epoch": 0.7281603822582321, "grad_norm": 0.39898592233657837, "learning_rate": 1.71517146865669e-05, "loss": 1.6215, "step": 7010 }, { "epoch": 0.7282642567778124, "grad_norm": 0.4259057641029358, "learning_rate": 1.7139415038819108e-05, "loss": 1.8012, "step": 7011 }, { "epoch": 0.7283681312973928, "grad_norm": 0.3842464089393616, "learning_rate": 1.7127118890467292e-05, "loss": 1.6024, "step": 7012 }, { "epoch": 0.7284720058169731, "grad_norm": 0.3995085060596466, "learning_rate": 1.7114826242820875e-05, "loss": 1.6076, "step": 7013 }, { "epoch": 0.7285758803365534, "grad_norm": 0.37243375182151794, "learning_rate": 1.7102537097188986e-05, "loss": 1.5847, "step": 7014 }, { "epoch": 0.7286797548561338, "grad_norm": 0.46955230832099915, "learning_rate": 1.7090251454880258e-05, "loss": 1.8128, "step": 7015 }, { "epoch": 0.7287836293757142, "grad_norm": 0.40310418605804443, "learning_rate": 1.707796931720305e-05, "loss": 1.5522, "step": 7016 }, { "epoch": 0.7288875038952944, "grad_norm": 0.4547189176082611, "learning_rate": 1.706569068546532e-05, "loss": 1.8937, "step": 7017 }, { "epoch": 0.7289913784148748, "grad_norm": 0.41194766759872437, "learning_rate": 1.7053415560974623e-05, "loss": 1.6175, "step": 7018 }, { "epoch": 0.7290952529344552, "grad_norm": 0.4527081847190857, "learning_rate": 1.7041143945038186e-05, "loss": 1.7127, "step": 7019 }, { "epoch": 0.7291991274540355, "grad_norm": 0.45364293456077576, "learning_rate": 1.702887583896282e-05, "loss": 1.822, "step": 7020 }, { "epoch": 0.7293030019736159, "grad_norm": 0.4415326714515686, "learning_rate": 1.7016611244055e-05, "loss": 1.7217, "step": 7021 }, { "epoch": 0.7294068764931962, "grad_norm": 0.4117661714553833, "learning_rate": 1.700435016162078e-05, "loss": 1.7666, "step": 7022 }, { "epoch": 0.7295107510127765, "grad_norm": 0.41793352365493774, "learning_rate": 1.699209259296593e-05, "loss": 1.6506, "step": 7023 }, { "epoch": 0.7296146255323569, "grad_norm": 0.41876649856567383, "learning_rate": 1.697983853939571e-05, "loss": 1.6463, "step": 7024 }, { "epoch": 0.7297185000519373, "grad_norm": 0.46522414684295654, "learning_rate": 1.6967588002215134e-05, "loss": 1.6875, "step": 7025 }, { "epoch": 0.7298223745715176, "grad_norm": 0.40117374062538147, "learning_rate": 1.6955340982728797e-05, "loss": 1.6181, "step": 7026 }, { "epoch": 0.7299262490910979, "grad_norm": 0.4233303368091583, "learning_rate": 1.694309748224085e-05, "loss": 1.7707, "step": 7027 }, { "epoch": 0.7300301236106783, "grad_norm": 0.42827704548835754, "learning_rate": 1.693085750205518e-05, "loss": 1.6969, "step": 7028 }, { "epoch": 0.7301339981302587, "grad_norm": 0.42237424850463867, "learning_rate": 1.6918621043475225e-05, "loss": 1.5153, "step": 7029 }, { "epoch": 0.730237872649839, "grad_norm": 0.4134870171546936, "learning_rate": 1.6906388107804118e-05, "loss": 1.5392, "step": 7030 }, { "epoch": 0.7303417471694194, "grad_norm": 0.42920297384262085, "learning_rate": 1.6894158696344515e-05, "loss": 1.7474, "step": 7031 }, { "epoch": 0.7304456216889997, "grad_norm": 0.401040643453598, "learning_rate": 1.6881932810398765e-05, "loss": 1.5063, "step": 7032 }, { "epoch": 0.73054949620858, "grad_norm": 0.4182683229446411, "learning_rate": 1.686971045126887e-05, "loss": 1.5769, "step": 7033 }, { "epoch": 0.7306533707281604, "grad_norm": 0.4622383713722229, "learning_rate": 1.6857491620256353e-05, "loss": 1.628, "step": 7034 }, { "epoch": 0.7307572452477408, "grad_norm": 0.5033509135246277, "learning_rate": 1.6845276318662472e-05, "loss": 1.8296, "step": 7035 }, { "epoch": 0.730861119767321, "grad_norm": 0.3891574442386627, "learning_rate": 1.6833064547788053e-05, "loss": 1.5515, "step": 7036 }, { "epoch": 0.7309649942869014, "grad_norm": 0.44622090458869934, "learning_rate": 1.6820856308933543e-05, "loss": 1.7212, "step": 7037 }, { "epoch": 0.7310688688064818, "grad_norm": 0.4310351014137268, "learning_rate": 1.6808651603399024e-05, "loss": 1.5695, "step": 7038 }, { "epoch": 0.7311727433260621, "grad_norm": 0.4409294128417969, "learning_rate": 1.6796450432484212e-05, "loss": 1.6526, "step": 7039 }, { "epoch": 0.7312766178456425, "grad_norm": 0.39524850249290466, "learning_rate": 1.678425279748843e-05, "loss": 1.5189, "step": 7040 }, { "epoch": 0.7313804923652228, "grad_norm": 0.42522087693214417, "learning_rate": 1.6772058699710618e-05, "loss": 1.6336, "step": 7041 }, { "epoch": 0.7314843668848031, "grad_norm": 0.4241730868816376, "learning_rate": 1.6759868140449397e-05, "loss": 1.5303, "step": 7042 }, { "epoch": 0.7315882414043835, "grad_norm": 0.41088756918907166, "learning_rate": 1.6747681121002902e-05, "loss": 1.6328, "step": 7043 }, { "epoch": 0.7316921159239639, "grad_norm": 0.43897873163223267, "learning_rate": 1.6735497642669008e-05, "loss": 1.7547, "step": 7044 }, { "epoch": 0.7317959904435442, "grad_norm": 0.42431479692459106, "learning_rate": 1.6723317706745144e-05, "loss": 1.6887, "step": 7045 }, { "epoch": 0.7318998649631245, "grad_norm": 0.3968948423862457, "learning_rate": 1.6711141314528374e-05, "loss": 1.4962, "step": 7046 }, { "epoch": 0.7320037394827049, "grad_norm": 0.4105996787548065, "learning_rate": 1.669896846731539e-05, "loss": 1.6409, "step": 7047 }, { "epoch": 0.7321076140022852, "grad_norm": 0.40278342366218567, "learning_rate": 1.6686799166402507e-05, "loss": 1.5295, "step": 7048 }, { "epoch": 0.7322114885218656, "grad_norm": 0.3999069929122925, "learning_rate": 1.667463341308566e-05, "loss": 1.5359, "step": 7049 }, { "epoch": 0.732315363041446, "grad_norm": 0.3840470016002655, "learning_rate": 1.666247120866039e-05, "loss": 1.5948, "step": 7050 }, { "epoch": 0.7324192375610262, "grad_norm": 0.40649813413619995, "learning_rate": 1.6650312554421928e-05, "loss": 1.4639, "step": 7051 }, { "epoch": 0.7325231120806066, "grad_norm": 0.46434590220451355, "learning_rate": 1.663815745166502e-05, "loss": 1.8214, "step": 7052 }, { "epoch": 0.732626986600187, "grad_norm": 0.434417724609375, "learning_rate": 1.66260059016841e-05, "loss": 1.5394, "step": 7053 }, { "epoch": 0.7327308611197674, "grad_norm": 0.39939597249031067, "learning_rate": 1.6613857905773238e-05, "loss": 1.5732, "step": 7054 }, { "epoch": 0.7328347356393476, "grad_norm": 0.39410555362701416, "learning_rate": 1.6601713465226086e-05, "loss": 1.6296, "step": 7055 }, { "epoch": 0.732938610158928, "grad_norm": 0.4101269245147705, "learning_rate": 1.6589572581335938e-05, "loss": 1.6349, "step": 7056 }, { "epoch": 0.7330424846785084, "grad_norm": 0.4318089485168457, "learning_rate": 1.6577435255395677e-05, "loss": 1.6986, "step": 7057 }, { "epoch": 0.7331463591980887, "grad_norm": 0.4095480442047119, "learning_rate": 1.6565301488697893e-05, "loss": 1.7157, "step": 7058 }, { "epoch": 0.7332502337176691, "grad_norm": 0.4215502440929413, "learning_rate": 1.655317128253468e-05, "loss": 1.587, "step": 7059 }, { "epoch": 0.7333541082372494, "grad_norm": 0.472887247800827, "learning_rate": 1.6541044638197818e-05, "loss": 1.7887, "step": 7060 }, { "epoch": 0.7334579827568297, "grad_norm": 0.4307557940483093, "learning_rate": 1.6528921556978737e-05, "loss": 1.7428, "step": 7061 }, { "epoch": 0.7335618572764101, "grad_norm": 0.4611065685749054, "learning_rate": 1.65168020401684e-05, "loss": 1.79, "step": 7062 }, { "epoch": 0.7336657317959905, "grad_norm": 0.45637285709381104, "learning_rate": 1.650468608905748e-05, "loss": 1.8929, "step": 7063 }, { "epoch": 0.7337696063155708, "grad_norm": 0.41570550203323364, "learning_rate": 1.649257370493622e-05, "loss": 1.7476, "step": 7064 }, { "epoch": 0.7338734808351511, "grad_norm": 0.40172329545021057, "learning_rate": 1.6480464889094493e-05, "loss": 1.762, "step": 7065 }, { "epoch": 0.7339773553547315, "grad_norm": 0.4335883557796478, "learning_rate": 1.646835964282179e-05, "loss": 1.5714, "step": 7066 }, { "epoch": 0.7340812298743118, "grad_norm": 0.4139271676540375, "learning_rate": 1.6456257967407228e-05, "loss": 1.6522, "step": 7067 }, { "epoch": 0.7341851043938922, "grad_norm": 0.4054241478443146, "learning_rate": 1.6444159864139542e-05, "loss": 1.7425, "step": 7068 }, { "epoch": 0.7342889789134726, "grad_norm": 0.4233381450176239, "learning_rate": 1.6432065334307074e-05, "loss": 1.6434, "step": 7069 }, { "epoch": 0.7343928534330528, "grad_norm": 0.4486279785633087, "learning_rate": 1.6419974379197834e-05, "loss": 1.7726, "step": 7070 }, { "epoch": 0.7344967279526332, "grad_norm": 0.4719793200492859, "learning_rate": 1.6407887000099354e-05, "loss": 1.5734, "step": 7071 }, { "epoch": 0.7346006024722136, "grad_norm": 0.4241810142993927, "learning_rate": 1.6395803198298905e-05, "loss": 1.7336, "step": 7072 }, { "epoch": 0.7347044769917939, "grad_norm": 0.40580904483795166, "learning_rate": 1.6383722975083282e-05, "loss": 1.6019, "step": 7073 }, { "epoch": 0.7348083515113742, "grad_norm": 0.46320223808288574, "learning_rate": 1.6371646331738953e-05, "loss": 1.7766, "step": 7074 }, { "epoch": 0.7349122260309546, "grad_norm": 0.41690146923065186, "learning_rate": 1.6359573269551977e-05, "loss": 1.6961, "step": 7075 }, { "epoch": 0.7350161005505349, "grad_norm": 0.4090801775455475, "learning_rate": 1.634750378980804e-05, "loss": 1.6388, "step": 7076 }, { "epoch": 0.7351199750701153, "grad_norm": 0.3977707624435425, "learning_rate": 1.633543789379245e-05, "loss": 1.6002, "step": 7077 }, { "epoch": 0.7352238495896957, "grad_norm": 0.4486248195171356, "learning_rate": 1.632337558279011e-05, "loss": 1.7919, "step": 7078 }, { "epoch": 0.735327724109276, "grad_norm": 0.41269683837890625, "learning_rate": 1.6311316858085607e-05, "loss": 1.5934, "step": 7079 }, { "epoch": 0.7354315986288563, "grad_norm": 0.4171505570411682, "learning_rate": 1.6299261720963093e-05, "loss": 1.6255, "step": 7080 }, { "epoch": 0.7355354731484367, "grad_norm": 0.406341552734375, "learning_rate": 1.6287210172706297e-05, "loss": 1.7223, "step": 7081 }, { "epoch": 0.7356393476680171, "grad_norm": 0.43116459250450134, "learning_rate": 1.627516221459866e-05, "loss": 1.6397, "step": 7082 }, { "epoch": 0.7357432221875974, "grad_norm": 0.42022863030433655, "learning_rate": 1.6263117847923182e-05, "loss": 1.7121, "step": 7083 }, { "epoch": 0.7358470967071777, "grad_norm": 0.4364830553531647, "learning_rate": 1.62510770739625e-05, "loss": 1.5572, "step": 7084 }, { "epoch": 0.7359509712267581, "grad_norm": 0.4395456910133362, "learning_rate": 1.6239039893998842e-05, "loss": 1.6877, "step": 7085 }, { "epoch": 0.7360548457463384, "grad_norm": 0.499352365732193, "learning_rate": 1.622700630931412e-05, "loss": 1.4045, "step": 7086 }, { "epoch": 0.7361587202659188, "grad_norm": 0.41505447030067444, "learning_rate": 1.6214976321189768e-05, "loss": 1.6824, "step": 7087 }, { "epoch": 0.7362625947854992, "grad_norm": 0.40224704146385193, "learning_rate": 1.6202949930906893e-05, "loss": 1.6501, "step": 7088 }, { "epoch": 0.7363664693050794, "grad_norm": 0.4343310296535492, "learning_rate": 1.6190927139746255e-05, "loss": 1.7141, "step": 7089 }, { "epoch": 0.7364703438246598, "grad_norm": 0.4633185565471649, "learning_rate": 1.617890794898812e-05, "loss": 1.8566, "step": 7090 }, { "epoch": 0.7365742183442402, "grad_norm": 0.4461994469165802, "learning_rate": 1.616689235991249e-05, "loss": 1.6646, "step": 7091 }, { "epoch": 0.7366780928638205, "grad_norm": 0.4115038812160492, "learning_rate": 1.6154880373798918e-05, "loss": 1.6597, "step": 7092 }, { "epoch": 0.7367819673834008, "grad_norm": 0.47499844431877136, "learning_rate": 1.6142871991926584e-05, "loss": 1.8035, "step": 7093 }, { "epoch": 0.7368858419029812, "grad_norm": 0.4350105822086334, "learning_rate": 1.6130867215574284e-05, "loss": 1.446, "step": 7094 }, { "epoch": 0.7369897164225615, "grad_norm": 0.39589881896972656, "learning_rate": 1.6118866046020438e-05, "loss": 1.5551, "step": 7095 }, { "epoch": 0.7370935909421419, "grad_norm": 0.458922415971756, "learning_rate": 1.6106868484543076e-05, "loss": 1.8941, "step": 7096 }, { "epoch": 0.7371974654617223, "grad_norm": 0.48349758982658386, "learning_rate": 1.6094874532419828e-05, "loss": 1.6175, "step": 7097 }, { "epoch": 0.7373013399813025, "grad_norm": 0.43464893102645874, "learning_rate": 1.6082884190928004e-05, "loss": 1.7482, "step": 7098 }, { "epoch": 0.7374052145008829, "grad_norm": 0.43286725878715515, "learning_rate": 1.6070897461344413e-05, "loss": 1.7566, "step": 7099 }, { "epoch": 0.7375090890204633, "grad_norm": 0.3972510099411011, "learning_rate": 1.6058914344945607e-05, "loss": 1.576, "step": 7100 }, { "epoch": 0.7376129635400436, "grad_norm": 0.44797611236572266, "learning_rate": 1.604693484300767e-05, "loss": 1.6982, "step": 7101 }, { "epoch": 0.737716838059624, "grad_norm": 0.4418253004550934, "learning_rate": 1.6034958956806333e-05, "loss": 1.8107, "step": 7102 }, { "epoch": 0.7378207125792043, "grad_norm": 0.40204495191574097, "learning_rate": 1.6022986687616926e-05, "loss": 1.5234, "step": 7103 }, { "epoch": 0.7379245870987847, "grad_norm": 0.407816082239151, "learning_rate": 1.6011018036714388e-05, "loss": 1.7615, "step": 7104 }, { "epoch": 0.738028461618365, "grad_norm": 0.4105852544307709, "learning_rate": 1.599905300537334e-05, "loss": 1.7623, "step": 7105 }, { "epoch": 0.7381323361379454, "grad_norm": 0.4015292525291443, "learning_rate": 1.598709159486789e-05, "loss": 1.7175, "step": 7106 }, { "epoch": 0.7382362106575258, "grad_norm": 0.4852696359157562, "learning_rate": 1.5975133806471903e-05, "loss": 1.8187, "step": 7107 }, { "epoch": 0.738340085177106, "grad_norm": 0.4263093173503876, "learning_rate": 1.596317964145877e-05, "loss": 1.6612, "step": 7108 }, { "epoch": 0.7384439596966864, "grad_norm": 0.43212834000587463, "learning_rate": 1.5951229101101473e-05, "loss": 1.6154, "step": 7109 }, { "epoch": 0.7385478342162668, "grad_norm": 0.4184805452823639, "learning_rate": 1.5939282186672705e-05, "loss": 1.706, "step": 7110 }, { "epoch": 0.7386517087358471, "grad_norm": 0.42199963331222534, "learning_rate": 1.5927338899444706e-05, "loss": 1.6324, "step": 7111 }, { "epoch": 0.7387555832554275, "grad_norm": 0.44483646750450134, "learning_rate": 1.5915399240689325e-05, "loss": 1.6332, "step": 7112 }, { "epoch": 0.7388594577750078, "grad_norm": 0.43318289518356323, "learning_rate": 1.5903463211678067e-05, "loss": 1.6949, "step": 7113 }, { "epoch": 0.7389633322945881, "grad_norm": 0.5184254050254822, "learning_rate": 1.5891530813682008e-05, "loss": 1.6298, "step": 7114 }, { "epoch": 0.7390672068141685, "grad_norm": 0.4334104657173157, "learning_rate": 1.5879602047971865e-05, "loss": 1.4948, "step": 7115 }, { "epoch": 0.7391710813337489, "grad_norm": 0.39444056153297424, "learning_rate": 1.586767691581793e-05, "loss": 1.607, "step": 7116 }, { "epoch": 0.7392749558533291, "grad_norm": 0.3886300027370453, "learning_rate": 1.58557554184902e-05, "loss": 1.5812, "step": 7117 }, { "epoch": 0.7393788303729095, "grad_norm": 0.43382641673088074, "learning_rate": 1.584383755725815e-05, "loss": 1.6636, "step": 7118 }, { "epoch": 0.7394827048924899, "grad_norm": 0.40782594680786133, "learning_rate": 1.5831923333390985e-05, "loss": 1.4104, "step": 7119 }, { "epoch": 0.7395865794120702, "grad_norm": 0.3772667348384857, "learning_rate": 1.5820012748157464e-05, "loss": 1.4713, "step": 7120 }, { "epoch": 0.7396904539316506, "grad_norm": 0.42232704162597656, "learning_rate": 1.5808105802825975e-05, "loss": 1.3777, "step": 7121 }, { "epoch": 0.7397943284512309, "grad_norm": 0.430355042219162, "learning_rate": 1.579620249866451e-05, "loss": 1.6707, "step": 7122 }, { "epoch": 0.7398982029708112, "grad_norm": 0.40983816981315613, "learning_rate": 1.5784302836940678e-05, "loss": 1.4956, "step": 7123 }, { "epoch": 0.7400020774903916, "grad_norm": 0.4761542081832886, "learning_rate": 1.5772406818921697e-05, "loss": 1.7082, "step": 7124 }, { "epoch": 0.740105952009972, "grad_norm": 0.4703845977783203, "learning_rate": 1.576051444587439e-05, "loss": 1.7969, "step": 7125 }, { "epoch": 0.7402098265295523, "grad_norm": 0.40209317207336426, "learning_rate": 1.574862571906525e-05, "loss": 1.7028, "step": 7126 }, { "epoch": 0.7403137010491326, "grad_norm": 0.39198899269104004, "learning_rate": 1.5736740639760272e-05, "loss": 1.6769, "step": 7127 }, { "epoch": 0.740417575568713, "grad_norm": 0.3915342390537262, "learning_rate": 1.5724859209225168e-05, "loss": 1.5418, "step": 7128 }, { "epoch": 0.7405214500882933, "grad_norm": 0.4316711127758026, "learning_rate": 1.5712981428725192e-05, "loss": 1.5925, "step": 7129 }, { "epoch": 0.7406253246078737, "grad_norm": 0.41193971037864685, "learning_rate": 1.5701107299525253e-05, "loss": 1.5975, "step": 7130 }, { "epoch": 0.740729199127454, "grad_norm": 0.4072656035423279, "learning_rate": 1.5689236822889846e-05, "loss": 1.8195, "step": 7131 }, { "epoch": 0.7408330736470344, "grad_norm": 0.40560346841812134, "learning_rate": 1.5677370000083065e-05, "loss": 1.4801, "step": 7132 }, { "epoch": 0.7409369481666147, "grad_norm": 0.4122333824634552, "learning_rate": 1.566550683236868e-05, "loss": 1.4249, "step": 7133 }, { "epoch": 0.7410408226861951, "grad_norm": 0.4269520938396454, "learning_rate": 1.5653647321009973e-05, "loss": 1.8544, "step": 7134 }, { "epoch": 0.7411446972057755, "grad_norm": 0.4235425293445587, "learning_rate": 1.5641791467269922e-05, "loss": 1.5443, "step": 7135 }, { "epoch": 0.7412485717253557, "grad_norm": 0.49636656045913696, "learning_rate": 1.56299392724111e-05, "loss": 1.6799, "step": 7136 }, { "epoch": 0.7413524462449361, "grad_norm": 0.40569061040878296, "learning_rate": 1.561809073769561e-05, "loss": 1.5797, "step": 7137 }, { "epoch": 0.7414563207645165, "grad_norm": 0.42479363083839417, "learning_rate": 1.5606245864385287e-05, "loss": 1.6916, "step": 7138 }, { "epoch": 0.7415601952840968, "grad_norm": 0.4160860776901245, "learning_rate": 1.5594404653741494e-05, "loss": 1.4902, "step": 7139 }, { "epoch": 0.7416640698036772, "grad_norm": 0.4242292642593384, "learning_rate": 1.5582567107025237e-05, "loss": 1.6821, "step": 7140 }, { "epoch": 0.7417679443232575, "grad_norm": 0.4416872262954712, "learning_rate": 1.5570733225497115e-05, "loss": 1.6489, "step": 7141 }, { "epoch": 0.7418718188428378, "grad_norm": 0.4447227418422699, "learning_rate": 1.5558903010417346e-05, "loss": 1.766, "step": 7142 }, { "epoch": 0.7419756933624182, "grad_norm": 0.4173499047756195, "learning_rate": 1.5547076463045756e-05, "loss": 1.6973, "step": 7143 }, { "epoch": 0.7420795678819986, "grad_norm": 0.4345002770423889, "learning_rate": 1.5535253584641762e-05, "loss": 1.7257, "step": 7144 }, { "epoch": 0.7421834424015789, "grad_norm": 0.45821139216423035, "learning_rate": 1.552343437646446e-05, "loss": 1.7526, "step": 7145 }, { "epoch": 0.7422873169211592, "grad_norm": 0.391783744096756, "learning_rate": 1.5511618839772437e-05, "loss": 1.6038, "step": 7146 }, { "epoch": 0.7423911914407396, "grad_norm": 0.43559110164642334, "learning_rate": 1.5499806975824e-05, "loss": 1.6295, "step": 7147 }, { "epoch": 0.7424950659603199, "grad_norm": 0.43846607208251953, "learning_rate": 1.5487998785877005e-05, "loss": 1.6083, "step": 7148 }, { "epoch": 0.7425989404799003, "grad_norm": 0.43840745091438293, "learning_rate": 1.5476194271188944e-05, "loss": 1.6872, "step": 7149 }, { "epoch": 0.7427028149994807, "grad_norm": 0.44359713792800903, "learning_rate": 1.5464393433016895e-05, "loss": 1.6835, "step": 7150 }, { "epoch": 0.7428066895190609, "grad_norm": 0.46567508578300476, "learning_rate": 1.5452596272617555e-05, "loss": 1.6991, "step": 7151 }, { "epoch": 0.7429105640386413, "grad_norm": 0.4170341491699219, "learning_rate": 1.5440802791247234e-05, "loss": 1.5401, "step": 7152 }, { "epoch": 0.7430144385582217, "grad_norm": 0.40833204984664917, "learning_rate": 1.5429012990161827e-05, "loss": 1.604, "step": 7153 }, { "epoch": 0.743118313077802, "grad_norm": 0.4111480712890625, "learning_rate": 1.5417226870616902e-05, "loss": 1.6432, "step": 7154 }, { "epoch": 0.7432221875973823, "grad_norm": 0.41619327664375305, "learning_rate": 1.540544443386753e-05, "loss": 1.5474, "step": 7155 }, { "epoch": 0.7433260621169627, "grad_norm": 0.41143396496772766, "learning_rate": 1.5393665681168496e-05, "loss": 1.8406, "step": 7156 }, { "epoch": 0.7434299366365431, "grad_norm": 0.40822920203208923, "learning_rate": 1.5381890613774126e-05, "loss": 1.589, "step": 7157 }, { "epoch": 0.7435338111561234, "grad_norm": 0.39305150508880615, "learning_rate": 1.537011923293837e-05, "loss": 1.4567, "step": 7158 }, { "epoch": 0.7436376856757038, "grad_norm": 0.40565574169158936, "learning_rate": 1.5358351539914794e-05, "loss": 1.5574, "step": 7159 }, { "epoch": 0.7437415601952841, "grad_norm": 0.47624900937080383, "learning_rate": 1.5346587535956547e-05, "loss": 1.7159, "step": 7160 }, { "epoch": 0.7438454347148644, "grad_norm": 0.41582754254341125, "learning_rate": 1.5334827222316457e-05, "loss": 1.6007, "step": 7161 }, { "epoch": 0.7439493092344448, "grad_norm": 0.42031049728393555, "learning_rate": 1.5323070600246832e-05, "loss": 1.7471, "step": 7162 }, { "epoch": 0.7440531837540252, "grad_norm": 0.43765780329704285, "learning_rate": 1.5311317670999714e-05, "loss": 1.6716, "step": 7163 }, { "epoch": 0.7441570582736055, "grad_norm": 0.46914222836494446, "learning_rate": 1.52995684358267e-05, "loss": 1.7037, "step": 7164 }, { "epoch": 0.7442609327931858, "grad_norm": 0.43838274478912354, "learning_rate": 1.5287822895978942e-05, "loss": 1.6881, "step": 7165 }, { "epoch": 0.7443648073127662, "grad_norm": 0.43214210867881775, "learning_rate": 1.52760810527073e-05, "loss": 1.7643, "step": 7166 }, { "epoch": 0.7444686818323465, "grad_norm": 0.43604573607444763, "learning_rate": 1.5264342907262164e-05, "loss": 1.6576, "step": 7167 }, { "epoch": 0.7445725563519269, "grad_norm": 0.4425446391105652, "learning_rate": 1.5252608460893558e-05, "loss": 1.7985, "step": 7168 }, { "epoch": 0.7446764308715073, "grad_norm": 0.42904865741729736, "learning_rate": 1.5240877714851115e-05, "loss": 1.6071, "step": 7169 }, { "epoch": 0.7447803053910875, "grad_norm": 0.4095899164676666, "learning_rate": 1.5229150670384056e-05, "loss": 1.6072, "step": 7170 }, { "epoch": 0.7448841799106679, "grad_norm": 0.4281744360923767, "learning_rate": 1.5217427328741235e-05, "loss": 1.642, "step": 7171 }, { "epoch": 0.7449880544302483, "grad_norm": 0.4427133798599243, "learning_rate": 1.5205707691171074e-05, "loss": 1.7441, "step": 7172 }, { "epoch": 0.7450919289498286, "grad_norm": 0.4026220142841339, "learning_rate": 1.5193991758921666e-05, "loss": 1.7042, "step": 7173 }, { "epoch": 0.745195803469409, "grad_norm": 0.39205819368362427, "learning_rate": 1.5182279533240606e-05, "loss": 1.4862, "step": 7174 }, { "epoch": 0.7452996779889893, "grad_norm": 0.45115184783935547, "learning_rate": 1.5170571015375207e-05, "loss": 1.8971, "step": 7175 }, { "epoch": 0.7454035525085696, "grad_norm": 0.4259231686592102, "learning_rate": 1.5158866206572313e-05, "loss": 1.5441, "step": 7176 }, { "epoch": 0.74550742702815, "grad_norm": 0.39843812584877014, "learning_rate": 1.5147165108078398e-05, "loss": 1.7365, "step": 7177 }, { "epoch": 0.7456113015477304, "grad_norm": 0.3978004455566406, "learning_rate": 1.5135467721139535e-05, "loss": 1.5903, "step": 7178 }, { "epoch": 0.7457151760673106, "grad_norm": 0.43081727623939514, "learning_rate": 1.5123774047001404e-05, "loss": 1.6461, "step": 7179 }, { "epoch": 0.745819050586891, "grad_norm": 0.41507482528686523, "learning_rate": 1.5112084086909296e-05, "loss": 1.5398, "step": 7180 }, { "epoch": 0.7459229251064714, "grad_norm": 0.4333358108997345, "learning_rate": 1.5100397842108078e-05, "loss": 1.612, "step": 7181 }, { "epoch": 0.7460267996260518, "grad_norm": 0.4127698540687561, "learning_rate": 1.5088715313842278e-05, "loss": 1.5695, "step": 7182 }, { "epoch": 0.7461306741456321, "grad_norm": 0.4184444546699524, "learning_rate": 1.507703650335598e-05, "loss": 1.6183, "step": 7183 }, { "epoch": 0.7462345486652124, "grad_norm": 0.4407637417316437, "learning_rate": 1.506536141189288e-05, "loss": 1.6235, "step": 7184 }, { "epoch": 0.7463384231847928, "grad_norm": 0.40334874391555786, "learning_rate": 1.5053690040696295e-05, "loss": 1.6717, "step": 7185 }, { "epoch": 0.7464422977043731, "grad_norm": 0.40933892130851746, "learning_rate": 1.504202239100913e-05, "loss": 1.5891, "step": 7186 }, { "epoch": 0.7465461722239535, "grad_norm": 0.41984155774116516, "learning_rate": 1.5030358464073891e-05, "loss": 1.6755, "step": 7187 }, { "epoch": 0.7466500467435339, "grad_norm": 0.44677481055259705, "learning_rate": 1.501869826113269e-05, "loss": 1.6644, "step": 7188 }, { "epoch": 0.7467539212631141, "grad_norm": 0.43033236265182495, "learning_rate": 1.5007041783427294e-05, "loss": 1.6723, "step": 7189 }, { "epoch": 0.7468577957826945, "grad_norm": 0.4469008445739746, "learning_rate": 1.4995389032198958e-05, "loss": 1.7799, "step": 7190 }, { "epoch": 0.7469616703022749, "grad_norm": 0.44168853759765625, "learning_rate": 1.4983740008688657e-05, "loss": 1.6653, "step": 7191 }, { "epoch": 0.7470655448218552, "grad_norm": 0.41871321201324463, "learning_rate": 1.4972094714136926e-05, "loss": 1.5746, "step": 7192 }, { "epoch": 0.7471694193414355, "grad_norm": 0.411448210477829, "learning_rate": 1.4960453149783848e-05, "loss": 1.5334, "step": 7193 }, { "epoch": 0.7472732938610159, "grad_norm": 0.4211091101169586, "learning_rate": 1.4948815316869207e-05, "loss": 1.7637, "step": 7194 }, { "epoch": 0.7473771683805962, "grad_norm": 0.46923527121543884, "learning_rate": 1.4937181216632323e-05, "loss": 1.7789, "step": 7195 }, { "epoch": 0.7474810429001766, "grad_norm": 0.4714355766773224, "learning_rate": 1.4925550850312142e-05, "loss": 1.7327, "step": 7196 }, { "epoch": 0.747584917419757, "grad_norm": 0.39055758714675903, "learning_rate": 1.4913924219147202e-05, "loss": 1.6193, "step": 7197 }, { "epoch": 0.7476887919393372, "grad_norm": 0.4228423833847046, "learning_rate": 1.490230132437565e-05, "loss": 1.75, "step": 7198 }, { "epoch": 0.7477926664589176, "grad_norm": 0.42969369888305664, "learning_rate": 1.4890682167235238e-05, "loss": 1.5587, "step": 7199 }, { "epoch": 0.747896540978498, "grad_norm": 0.42206692695617676, "learning_rate": 1.4879066748963295e-05, "loss": 1.7437, "step": 7200 }, { "epoch": 0.7480004154980783, "grad_norm": 0.4134342670440674, "learning_rate": 1.4867455070796816e-05, "loss": 1.6074, "step": 7201 }, { "epoch": 0.7481042900176587, "grad_norm": 0.4012831747531891, "learning_rate": 1.48558471339723e-05, "loss": 1.5822, "step": 7202 }, { "epoch": 0.748208164537239, "grad_norm": 0.42914196848869324, "learning_rate": 1.4844242939725944e-05, "loss": 1.6097, "step": 7203 }, { "epoch": 0.7483120390568193, "grad_norm": 0.43084225058555603, "learning_rate": 1.483264248929349e-05, "loss": 1.7464, "step": 7204 }, { "epoch": 0.7484159135763997, "grad_norm": 0.4075019657611847, "learning_rate": 1.4821045783910298e-05, "loss": 1.6601, "step": 7205 }, { "epoch": 0.7485197880959801, "grad_norm": 0.46284353733062744, "learning_rate": 1.480945282481132e-05, "loss": 1.9072, "step": 7206 }, { "epoch": 0.7486236626155603, "grad_norm": 0.4372515380382538, "learning_rate": 1.4797863613231122e-05, "loss": 1.7274, "step": 7207 }, { "epoch": 0.7487275371351407, "grad_norm": 0.3990016281604767, "learning_rate": 1.478627815040386e-05, "loss": 1.5798, "step": 7208 }, { "epoch": 0.7488314116547211, "grad_norm": 0.3860011696815491, "learning_rate": 1.4774696437563278e-05, "loss": 1.5999, "step": 7209 }, { "epoch": 0.7489352861743015, "grad_norm": 0.4409307837486267, "learning_rate": 1.4763118475942784e-05, "loss": 1.7148, "step": 7210 }, { "epoch": 0.7490391606938818, "grad_norm": 0.41701313853263855, "learning_rate": 1.4751544266775302e-05, "loss": 1.7499, "step": 7211 }, { "epoch": 0.7491430352134621, "grad_norm": 0.40917038917541504, "learning_rate": 1.473997381129341e-05, "loss": 1.6405, "step": 7212 }, { "epoch": 0.7492469097330425, "grad_norm": 0.46700426936149597, "learning_rate": 1.4728407110729275e-05, "loss": 1.7749, "step": 7213 }, { "epoch": 0.7493507842526228, "grad_norm": 0.4193143844604492, "learning_rate": 1.4716844166314648e-05, "loss": 1.6553, "step": 7214 }, { "epoch": 0.7494546587722032, "grad_norm": 0.4625590741634369, "learning_rate": 1.4705284979280898e-05, "loss": 1.8334, "step": 7215 }, { "epoch": 0.7495585332917836, "grad_norm": 0.4359906017780304, "learning_rate": 1.4693729550858976e-05, "loss": 1.6999, "step": 7216 }, { "epoch": 0.7496624078113638, "grad_norm": 0.4275515079498291, "learning_rate": 1.4682177882279486e-05, "loss": 1.7192, "step": 7217 }, { "epoch": 0.7497662823309442, "grad_norm": 0.457688570022583, "learning_rate": 1.4670629974772532e-05, "loss": 1.725, "step": 7218 }, { "epoch": 0.7498701568505246, "grad_norm": 0.3888382613658905, "learning_rate": 1.465908582956792e-05, "loss": 1.5319, "step": 7219 }, { "epoch": 0.7499740313701049, "grad_norm": 0.396145761013031, "learning_rate": 1.4647545447895017e-05, "loss": 1.6354, "step": 7220 }, { "epoch": 0.7500779058896853, "grad_norm": 0.425191730260849, "learning_rate": 1.4636008830982729e-05, "loss": 1.8322, "step": 7221 }, { "epoch": 0.7501817804092656, "grad_norm": 0.4128625988960266, "learning_rate": 1.4624475980059665e-05, "loss": 1.5982, "step": 7222 }, { "epoch": 0.7502856549288459, "grad_norm": 0.41751012206077576, "learning_rate": 1.4612946896353974e-05, "loss": 1.5379, "step": 7223 }, { "epoch": 0.7503895294484263, "grad_norm": 0.39982759952545166, "learning_rate": 1.4601421581093406e-05, "loss": 1.6689, "step": 7224 }, { "epoch": 0.7504934039680067, "grad_norm": 0.4144216775894165, "learning_rate": 1.4589900035505321e-05, "loss": 1.6664, "step": 7225 }, { "epoch": 0.750597278487587, "grad_norm": 0.4442715644836426, "learning_rate": 1.4578382260816669e-05, "loss": 1.81, "step": 7226 }, { "epoch": 0.7507011530071673, "grad_norm": 0.4191363751888275, "learning_rate": 1.4566868258254013e-05, "loss": 1.5913, "step": 7227 }, { "epoch": 0.7508050275267477, "grad_norm": 0.41727662086486816, "learning_rate": 1.455535802904348e-05, "loss": 1.6758, "step": 7228 }, { "epoch": 0.750908902046328, "grad_norm": 0.4364617168903351, "learning_rate": 1.4543851574410872e-05, "loss": 1.6397, "step": 7229 }, { "epoch": 0.7510127765659084, "grad_norm": 0.45298081636428833, "learning_rate": 1.4532348895581465e-05, "loss": 1.6516, "step": 7230 }, { "epoch": 0.7511166510854888, "grad_norm": 0.4370194375514984, "learning_rate": 1.4520849993780266e-05, "loss": 1.6518, "step": 7231 }, { "epoch": 0.751220525605069, "grad_norm": 0.43286606669425964, "learning_rate": 1.450935487023179e-05, "loss": 1.5553, "step": 7232 }, { "epoch": 0.7513244001246494, "grad_norm": 0.40669238567352295, "learning_rate": 1.4497863526160183e-05, "loss": 1.6917, "step": 7233 }, { "epoch": 0.7514282746442298, "grad_norm": 0.42103543877601624, "learning_rate": 1.4486375962789184e-05, "loss": 1.4978, "step": 7234 }, { "epoch": 0.7515321491638102, "grad_norm": 0.4966675043106079, "learning_rate": 1.447489218134211e-05, "loss": 1.7395, "step": 7235 }, { "epoch": 0.7516360236833904, "grad_norm": 0.43593791127204895, "learning_rate": 1.4463412183041947e-05, "loss": 1.771, "step": 7236 }, { "epoch": 0.7517398982029708, "grad_norm": 0.4331296980381012, "learning_rate": 1.4451935969111158e-05, "loss": 1.621, "step": 7237 }, { "epoch": 0.7518437727225512, "grad_norm": 0.46651628613471985, "learning_rate": 1.4440463540771914e-05, "loss": 1.8537, "step": 7238 }, { "epoch": 0.7519476472421315, "grad_norm": 0.40865036845207214, "learning_rate": 1.4428994899245934e-05, "loss": 1.6614, "step": 7239 }, { "epoch": 0.7520515217617119, "grad_norm": 0.4490216374397278, "learning_rate": 1.4417530045754529e-05, "loss": 1.6751, "step": 7240 }, { "epoch": 0.7521553962812922, "grad_norm": 0.46918779611587524, "learning_rate": 1.4406068981518623e-05, "loss": 1.8038, "step": 7241 }, { "epoch": 0.7522592708008725, "grad_norm": 0.40138277411460876, "learning_rate": 1.4394611707758726e-05, "loss": 1.6524, "step": 7242 }, { "epoch": 0.7523631453204529, "grad_norm": 0.43885231018066406, "learning_rate": 1.4383158225694948e-05, "loss": 1.7706, "step": 7243 }, { "epoch": 0.7524670198400333, "grad_norm": 0.42752382159233093, "learning_rate": 1.4371708536546985e-05, "loss": 1.5644, "step": 7244 }, { "epoch": 0.7525708943596136, "grad_norm": 0.4024030864238739, "learning_rate": 1.4360262641534179e-05, "loss": 1.6406, "step": 7245 }, { "epoch": 0.7526747688791939, "grad_norm": 0.40453994274139404, "learning_rate": 1.4348820541875374e-05, "loss": 1.6899, "step": 7246 }, { "epoch": 0.7527786433987743, "grad_norm": 0.4157525300979614, "learning_rate": 1.4337382238789105e-05, "loss": 1.4451, "step": 7247 }, { "epoch": 0.7528825179183546, "grad_norm": 0.42336198687553406, "learning_rate": 1.432594773349346e-05, "loss": 1.7071, "step": 7248 }, { "epoch": 0.752986392437935, "grad_norm": 0.4072533845901489, "learning_rate": 1.4314517027206086e-05, "loss": 1.6278, "step": 7249 }, { "epoch": 0.7530902669575154, "grad_norm": 0.47410857677459717, "learning_rate": 1.4303090121144302e-05, "loss": 1.6032, "step": 7250 }, { "epoch": 0.7531941414770956, "grad_norm": 0.39942529797554016, "learning_rate": 1.429166701652498e-05, "loss": 1.5404, "step": 7251 }, { "epoch": 0.753298015996676, "grad_norm": 0.44464361667633057, "learning_rate": 1.4280247714564582e-05, "loss": 1.73, "step": 7252 }, { "epoch": 0.7534018905162564, "grad_norm": 0.45004576444625854, "learning_rate": 1.4268832216479177e-05, "loss": 1.7523, "step": 7253 }, { "epoch": 0.7535057650358367, "grad_norm": 0.4188900291919708, "learning_rate": 1.4257420523484427e-05, "loss": 1.605, "step": 7254 }, { "epoch": 0.753609639555417, "grad_norm": 0.39535877108573914, "learning_rate": 1.4246012636795592e-05, "loss": 1.5916, "step": 7255 }, { "epoch": 0.7537135140749974, "grad_norm": 0.44383829832077026, "learning_rate": 1.42346085576275e-05, "loss": 1.7938, "step": 7256 }, { "epoch": 0.7538173885945777, "grad_norm": 0.41384685039520264, "learning_rate": 1.422320828719465e-05, "loss": 1.5843, "step": 7257 }, { "epoch": 0.7539212631141581, "grad_norm": 0.41272881627082825, "learning_rate": 1.4211811826711014e-05, "loss": 1.5989, "step": 7258 }, { "epoch": 0.7540251376337385, "grad_norm": 0.4295910596847534, "learning_rate": 1.4200419177390279e-05, "loss": 1.6587, "step": 7259 }, { "epoch": 0.7541290121533188, "grad_norm": 0.38193991780281067, "learning_rate": 1.4189030340445646e-05, "loss": 1.5246, "step": 7260 }, { "epoch": 0.7542328866728991, "grad_norm": 0.4172359108924866, "learning_rate": 1.4177645317089954e-05, "loss": 1.6807, "step": 7261 }, { "epoch": 0.7543367611924795, "grad_norm": 0.4366094172000885, "learning_rate": 1.416626410853561e-05, "loss": 1.6379, "step": 7262 }, { "epoch": 0.7544406357120599, "grad_norm": 0.4305514991283417, "learning_rate": 1.4154886715994603e-05, "loss": 1.6603, "step": 7263 }, { "epoch": 0.7545445102316402, "grad_norm": 0.4556597173213959, "learning_rate": 1.4143513140678595e-05, "loss": 1.8564, "step": 7264 }, { "epoch": 0.7546483847512205, "grad_norm": 0.49126148223876953, "learning_rate": 1.4132143383798713e-05, "loss": 1.7624, "step": 7265 }, { "epoch": 0.7547522592708009, "grad_norm": 0.46818116307258606, "learning_rate": 1.4120777446565791e-05, "loss": 1.6411, "step": 7266 }, { "epoch": 0.7548561337903812, "grad_norm": 0.4020385444164276, "learning_rate": 1.4109415330190213e-05, "loss": 1.6907, "step": 7267 }, { "epoch": 0.7549600083099616, "grad_norm": 0.4432200491428375, "learning_rate": 1.4098057035881934e-05, "loss": 1.6414, "step": 7268 }, { "epoch": 0.755063882829542, "grad_norm": 0.43888726830482483, "learning_rate": 1.408670256485054e-05, "loss": 1.5862, "step": 7269 }, { "epoch": 0.7551677573491222, "grad_norm": 0.45090728998184204, "learning_rate": 1.4075351918305191e-05, "loss": 1.7012, "step": 7270 }, { "epoch": 0.7552716318687026, "grad_norm": 0.42840370535850525, "learning_rate": 1.4064005097454635e-05, "loss": 1.7653, "step": 7271 }, { "epoch": 0.755375506388283, "grad_norm": 0.4270748496055603, "learning_rate": 1.4052662103507214e-05, "loss": 1.6584, "step": 7272 }, { "epoch": 0.7554793809078633, "grad_norm": 0.4157198369503021, "learning_rate": 1.4041322937670903e-05, "loss": 1.7567, "step": 7273 }, { "epoch": 0.7555832554274436, "grad_norm": 0.4310022294521332, "learning_rate": 1.4029987601153188e-05, "loss": 1.6985, "step": 7274 }, { "epoch": 0.755687129947024, "grad_norm": 0.41847115755081177, "learning_rate": 1.4018656095161226e-05, "loss": 1.6191, "step": 7275 }, { "epoch": 0.7557910044666043, "grad_norm": 0.4358253479003906, "learning_rate": 1.400732842090174e-05, "loss": 1.6991, "step": 7276 }, { "epoch": 0.7558948789861847, "grad_norm": 0.4234682321548462, "learning_rate": 1.3996004579580995e-05, "loss": 1.8434, "step": 7277 }, { "epoch": 0.7559987535057651, "grad_norm": 0.5005912780761719, "learning_rate": 1.3984684572404938e-05, "loss": 1.7706, "step": 7278 }, { "epoch": 0.7561026280253453, "grad_norm": 0.45514440536499023, "learning_rate": 1.397336840057904e-05, "loss": 1.6742, "step": 7279 }, { "epoch": 0.7562065025449257, "grad_norm": 0.4162159562110901, "learning_rate": 1.3962056065308388e-05, "loss": 1.6319, "step": 7280 }, { "epoch": 0.7563103770645061, "grad_norm": 0.41975322365760803, "learning_rate": 1.3950747567797662e-05, "loss": 1.6619, "step": 7281 }, { "epoch": 0.7564142515840864, "grad_norm": 0.3796076476573944, "learning_rate": 1.3939442909251121e-05, "loss": 1.4829, "step": 7282 }, { "epoch": 0.7565181261036668, "grad_norm": 0.4343501925468445, "learning_rate": 1.3928142090872632e-05, "loss": 1.6641, "step": 7283 }, { "epoch": 0.7566220006232471, "grad_norm": 0.48821020126342773, "learning_rate": 1.3916845113865618e-05, "loss": 2.0066, "step": 7284 }, { "epoch": 0.7567258751428275, "grad_norm": 0.4088677763938904, "learning_rate": 1.3905551979433157e-05, "loss": 1.6202, "step": 7285 }, { "epoch": 0.7568297496624078, "grad_norm": 0.4253714680671692, "learning_rate": 1.3894262688777865e-05, "loss": 1.595, "step": 7286 }, { "epoch": 0.7569336241819882, "grad_norm": 0.4234124422073364, "learning_rate": 1.3882977243101957e-05, "loss": 1.6401, "step": 7287 }, { "epoch": 0.7570374987015686, "grad_norm": 0.4122594892978668, "learning_rate": 1.3871695643607252e-05, "loss": 1.415, "step": 7288 }, { "epoch": 0.7571413732211488, "grad_norm": 0.4564708173274994, "learning_rate": 1.3860417891495154e-05, "loss": 1.7223, "step": 7289 }, { "epoch": 0.7572452477407292, "grad_norm": 0.4112136662006378, "learning_rate": 1.3849143987966645e-05, "loss": 1.558, "step": 7290 }, { "epoch": 0.7573491222603096, "grad_norm": 0.4101704955101013, "learning_rate": 1.38378739342223e-05, "loss": 1.5861, "step": 7291 }, { "epoch": 0.7574529967798899, "grad_norm": 0.4257015287876129, "learning_rate": 1.3826607731462332e-05, "loss": 1.5489, "step": 7292 }, { "epoch": 0.7575568712994702, "grad_norm": 0.41998380422592163, "learning_rate": 1.3815345380886452e-05, "loss": 1.5094, "step": 7293 }, { "epoch": 0.7576607458190506, "grad_norm": 0.3991340398788452, "learning_rate": 1.380408688369405e-05, "loss": 1.5909, "step": 7294 }, { "epoch": 0.7577646203386309, "grad_norm": 0.42524048686027527, "learning_rate": 1.3792832241084058e-05, "loss": 1.6435, "step": 7295 }, { "epoch": 0.7578684948582113, "grad_norm": 0.4474044740200043, "learning_rate": 1.3781581454255005e-05, "loss": 1.6698, "step": 7296 }, { "epoch": 0.7579723693777917, "grad_norm": 0.41872158646583557, "learning_rate": 1.3770334524405015e-05, "loss": 1.6468, "step": 7297 }, { "epoch": 0.7580762438973719, "grad_norm": 0.44773685932159424, "learning_rate": 1.3759091452731792e-05, "loss": 1.7338, "step": 7298 }, { "epoch": 0.7581801184169523, "grad_norm": 0.498161643743515, "learning_rate": 1.3747852240432641e-05, "loss": 1.6291, "step": 7299 }, { "epoch": 0.7582839929365327, "grad_norm": 0.4790482223033905, "learning_rate": 1.3736616888704434e-05, "loss": 1.8468, "step": 7300 }, { "epoch": 0.758387867456113, "grad_norm": 0.4306541383266449, "learning_rate": 1.372538539874369e-05, "loss": 1.6639, "step": 7301 }, { "epoch": 0.7584917419756934, "grad_norm": 0.4123995304107666, "learning_rate": 1.3714157771746422e-05, "loss": 1.5665, "step": 7302 }, { "epoch": 0.7585956164952737, "grad_norm": 0.45254337787628174, "learning_rate": 1.3702934008908325e-05, "loss": 1.8113, "step": 7303 }, { "epoch": 0.758699491014854, "grad_norm": 0.47395646572113037, "learning_rate": 1.3691714111424641e-05, "loss": 1.7394, "step": 7304 }, { "epoch": 0.7588033655344344, "grad_norm": 0.43641114234924316, "learning_rate": 1.3680498080490161e-05, "loss": 1.6745, "step": 7305 }, { "epoch": 0.7589072400540148, "grad_norm": 0.4418218433856964, "learning_rate": 1.3669285917299347e-05, "loss": 1.6433, "step": 7306 }, { "epoch": 0.759011114573595, "grad_norm": 0.3992539048194885, "learning_rate": 1.3658077623046195e-05, "loss": 1.5712, "step": 7307 }, { "epoch": 0.7591149890931754, "grad_norm": 0.43207311630249023, "learning_rate": 1.3646873198924293e-05, "loss": 1.6891, "step": 7308 }, { "epoch": 0.7592188636127558, "grad_norm": 0.43646010756492615, "learning_rate": 1.3635672646126835e-05, "loss": 1.7747, "step": 7309 }, { "epoch": 0.7593227381323361, "grad_norm": 0.4463355839252472, "learning_rate": 1.3624475965846584e-05, "loss": 1.5922, "step": 7310 }, { "epoch": 0.7594266126519165, "grad_norm": 0.41628172993659973, "learning_rate": 1.3613283159275902e-05, "loss": 1.8192, "step": 7311 }, { "epoch": 0.7595304871714968, "grad_norm": 0.3946208953857422, "learning_rate": 1.3602094227606721e-05, "loss": 1.5317, "step": 7312 }, { "epoch": 0.7596343616910772, "grad_norm": 0.41928601264953613, "learning_rate": 1.3590909172030608e-05, "loss": 1.7496, "step": 7313 }, { "epoch": 0.7597382362106575, "grad_norm": 0.4171970784664154, "learning_rate": 1.3579727993738666e-05, "loss": 1.7095, "step": 7314 }, { "epoch": 0.7598421107302379, "grad_norm": 0.4714401960372925, "learning_rate": 1.3568550693921605e-05, "loss": 1.8207, "step": 7315 }, { "epoch": 0.7599459852498183, "grad_norm": 0.4133859872817993, "learning_rate": 1.3557377273769718e-05, "loss": 1.7672, "step": 7316 }, { "epoch": 0.7600498597693985, "grad_norm": 0.40099528431892395, "learning_rate": 1.3546207734472882e-05, "loss": 1.565, "step": 7317 }, { "epoch": 0.7601537342889789, "grad_norm": 0.4770098030567169, "learning_rate": 1.3535042077220578e-05, "loss": 1.7241, "step": 7318 }, { "epoch": 0.7602576088085593, "grad_norm": 0.43758663535118103, "learning_rate": 1.3523880303201842e-05, "loss": 1.6718, "step": 7319 }, { "epoch": 0.7603614833281396, "grad_norm": 0.4401274621486664, "learning_rate": 1.3512722413605355e-05, "loss": 1.7467, "step": 7320 }, { "epoch": 0.76046535784772, "grad_norm": 0.4016100764274597, "learning_rate": 1.3501568409619286e-05, "loss": 1.6112, "step": 7321 }, { "epoch": 0.7605692323673003, "grad_norm": 0.44667908549308777, "learning_rate": 1.3490418292431506e-05, "loss": 1.7561, "step": 7322 }, { "epoch": 0.7606731068868806, "grad_norm": 0.3975602388381958, "learning_rate": 1.3479272063229387e-05, "loss": 1.7218, "step": 7323 }, { "epoch": 0.760776981406461, "grad_norm": 0.43723657727241516, "learning_rate": 1.3468129723199919e-05, "loss": 1.777, "step": 7324 }, { "epoch": 0.7608808559260414, "grad_norm": 0.46576887369155884, "learning_rate": 1.3456991273529679e-05, "loss": 1.7544, "step": 7325 }, { "epoch": 0.7609847304456216, "grad_norm": 0.4114767014980316, "learning_rate": 1.344585671540482e-05, "loss": 1.6227, "step": 7326 }, { "epoch": 0.761088604965202, "grad_norm": 0.4945017695426941, "learning_rate": 1.3434726050011093e-05, "loss": 1.6841, "step": 7327 }, { "epoch": 0.7611924794847824, "grad_norm": 0.4236311614513397, "learning_rate": 1.34235992785338e-05, "loss": 1.6404, "step": 7328 }, { "epoch": 0.7612963540043627, "grad_norm": 0.4564515948295593, "learning_rate": 1.3412476402157908e-05, "loss": 1.7414, "step": 7329 }, { "epoch": 0.7614002285239431, "grad_norm": 0.41757771372795105, "learning_rate": 1.3401357422067857e-05, "loss": 1.7274, "step": 7330 }, { "epoch": 0.7615041030435235, "grad_norm": 0.4927060008049011, "learning_rate": 1.3390242339447779e-05, "loss": 1.7611, "step": 7331 }, { "epoch": 0.7616079775631037, "grad_norm": 0.4198152720928192, "learning_rate": 1.3379131155481334e-05, "loss": 1.7494, "step": 7332 }, { "epoch": 0.7617118520826841, "grad_norm": 0.38799822330474854, "learning_rate": 1.3368023871351737e-05, "loss": 1.6216, "step": 7333 }, { "epoch": 0.7618157266022645, "grad_norm": 0.4070364236831665, "learning_rate": 1.3356920488241875e-05, "loss": 1.5723, "step": 7334 }, { "epoch": 0.7619196011218448, "grad_norm": 0.41575080156326294, "learning_rate": 1.3345821007334153e-05, "loss": 1.6863, "step": 7335 }, { "epoch": 0.7620234756414251, "grad_norm": 0.43388399481773376, "learning_rate": 1.333472542981058e-05, "loss": 1.7284, "step": 7336 }, { "epoch": 0.7621273501610055, "grad_norm": 0.39538997411727905, "learning_rate": 1.3323633756852749e-05, "loss": 1.5037, "step": 7337 }, { "epoch": 0.7622312246805859, "grad_norm": 0.39611417055130005, "learning_rate": 1.3312545989641823e-05, "loss": 1.5679, "step": 7338 }, { "epoch": 0.7623350992001662, "grad_norm": 0.42863768339157104, "learning_rate": 1.3301462129358605e-05, "loss": 1.6756, "step": 7339 }, { "epoch": 0.7624389737197466, "grad_norm": 0.44742724299430847, "learning_rate": 1.329038217718338e-05, "loss": 1.8475, "step": 7340 }, { "epoch": 0.7625428482393269, "grad_norm": 0.4362469017505646, "learning_rate": 1.3279306134296122e-05, "loss": 1.6682, "step": 7341 }, { "epoch": 0.7626467227589072, "grad_norm": 0.41959047317504883, "learning_rate": 1.3268234001876328e-05, "loss": 1.6528, "step": 7342 }, { "epoch": 0.7627505972784876, "grad_norm": 0.3941357135772705, "learning_rate": 1.3257165781103093e-05, "loss": 1.5253, "step": 7343 }, { "epoch": 0.762854471798068, "grad_norm": 0.4178176522254944, "learning_rate": 1.3246101473155098e-05, "loss": 1.4581, "step": 7344 }, { "epoch": 0.7629583463176483, "grad_norm": 0.42281249165534973, "learning_rate": 1.3235041079210603e-05, "loss": 1.5694, "step": 7345 }, { "epoch": 0.7630622208372286, "grad_norm": 0.4281790554523468, "learning_rate": 1.3223984600447459e-05, "loss": 1.6775, "step": 7346 }, { "epoch": 0.763166095356809, "grad_norm": 0.4060118496417999, "learning_rate": 1.3212932038043075e-05, "loss": 1.4226, "step": 7347 }, { "epoch": 0.7632699698763893, "grad_norm": 0.41389042139053345, "learning_rate": 1.3201883393174508e-05, "loss": 1.6444, "step": 7348 }, { "epoch": 0.7633738443959697, "grad_norm": 0.424570232629776, "learning_rate": 1.3190838667018297e-05, "loss": 1.4875, "step": 7349 }, { "epoch": 0.76347771891555, "grad_norm": 0.39579057693481445, "learning_rate": 1.3179797860750653e-05, "loss": 1.7218, "step": 7350 }, { "epoch": 0.7635815934351303, "grad_norm": 0.4264102876186371, "learning_rate": 1.3168760975547335e-05, "loss": 1.7828, "step": 7351 }, { "epoch": 0.7636854679547107, "grad_norm": 0.4306561350822449, "learning_rate": 1.3157728012583675e-05, "loss": 1.7086, "step": 7352 }, { "epoch": 0.7637893424742911, "grad_norm": 0.43455207347869873, "learning_rate": 1.3146698973034599e-05, "loss": 1.72, "step": 7353 }, { "epoch": 0.7638932169938714, "grad_norm": 0.4588955342769623, "learning_rate": 1.3135673858074621e-05, "loss": 1.7184, "step": 7354 }, { "epoch": 0.7639970915134517, "grad_norm": 0.41876885294914246, "learning_rate": 1.3124652668877819e-05, "loss": 1.6402, "step": 7355 }, { "epoch": 0.7641009660330321, "grad_norm": 0.4438285529613495, "learning_rate": 1.3113635406617858e-05, "loss": 1.7906, "step": 7356 }, { "epoch": 0.7642048405526124, "grad_norm": 0.4253295063972473, "learning_rate": 1.310262207246803e-05, "loss": 1.6315, "step": 7357 }, { "epoch": 0.7643087150721928, "grad_norm": 0.45891672372817993, "learning_rate": 1.3091612667601111e-05, "loss": 1.7554, "step": 7358 }, { "epoch": 0.7644125895917732, "grad_norm": 0.4097495973110199, "learning_rate": 1.3080607193189559e-05, "loss": 1.5465, "step": 7359 }, { "epoch": 0.7645164641113534, "grad_norm": 0.4238637685775757, "learning_rate": 1.3069605650405375e-05, "loss": 1.6579, "step": 7360 }, { "epoch": 0.7646203386309338, "grad_norm": 0.42440927028656006, "learning_rate": 1.3058608040420095e-05, "loss": 1.5157, "step": 7361 }, { "epoch": 0.7647242131505142, "grad_norm": 0.42934998869895935, "learning_rate": 1.3047614364404914e-05, "loss": 1.6494, "step": 7362 }, { "epoch": 0.7648280876700946, "grad_norm": 0.42825332283973694, "learning_rate": 1.3036624623530552e-05, "loss": 1.6405, "step": 7363 }, { "epoch": 0.7649319621896749, "grad_norm": 0.41669198870658875, "learning_rate": 1.3025638818967373e-05, "loss": 1.5746, "step": 7364 }, { "epoch": 0.7650358367092552, "grad_norm": 0.4732094705104828, "learning_rate": 1.3014656951885229e-05, "loss": 1.7997, "step": 7365 }, { "epoch": 0.7651397112288356, "grad_norm": 0.38443320989608765, "learning_rate": 1.3003679023453614e-05, "loss": 1.52, "step": 7366 }, { "epoch": 0.7652435857484159, "grad_norm": 0.3946983516216278, "learning_rate": 1.2992705034841623e-05, "loss": 1.707, "step": 7367 }, { "epoch": 0.7653474602679963, "grad_norm": 0.4353131055831909, "learning_rate": 1.2981734987217853e-05, "loss": 1.788, "step": 7368 }, { "epoch": 0.7654513347875767, "grad_norm": 0.41617780923843384, "learning_rate": 1.2970768881750567e-05, "loss": 1.6524, "step": 7369 }, { "epoch": 0.7655552093071569, "grad_norm": 0.42190882563591003, "learning_rate": 1.295980671960756e-05, "loss": 1.6253, "step": 7370 }, { "epoch": 0.7656590838267373, "grad_norm": 0.4275079369544983, "learning_rate": 1.2948848501956207e-05, "loss": 1.6816, "step": 7371 }, { "epoch": 0.7657629583463177, "grad_norm": 0.4169149696826935, "learning_rate": 1.2937894229963483e-05, "loss": 1.5858, "step": 7372 }, { "epoch": 0.765866832865898, "grad_norm": 0.4570522606372833, "learning_rate": 1.2926943904795925e-05, "loss": 1.7703, "step": 7373 }, { "epoch": 0.7659707073854783, "grad_norm": 0.424277663230896, "learning_rate": 1.291599752761966e-05, "loss": 1.6223, "step": 7374 }, { "epoch": 0.7660745819050587, "grad_norm": 0.41258883476257324, "learning_rate": 1.2905055099600378e-05, "loss": 1.6564, "step": 7375 }, { "epoch": 0.766178456424639, "grad_norm": 0.434390127658844, "learning_rate": 1.2894116621903402e-05, "loss": 1.5993, "step": 7376 }, { "epoch": 0.7662823309442194, "grad_norm": 0.44891485571861267, "learning_rate": 1.2883182095693547e-05, "loss": 1.5434, "step": 7377 }, { "epoch": 0.7663862054637998, "grad_norm": 0.43780601024627686, "learning_rate": 1.2872251522135286e-05, "loss": 1.5887, "step": 7378 }, { "epoch": 0.76649007998338, "grad_norm": 0.4061758518218994, "learning_rate": 1.286132490239263e-05, "loss": 1.7188, "step": 7379 }, { "epoch": 0.7665939545029604, "grad_norm": 0.47357314825057983, "learning_rate": 1.2850402237629183e-05, "loss": 1.869, "step": 7380 }, { "epoch": 0.7666978290225408, "grad_norm": 0.41141971945762634, "learning_rate": 1.2839483529008117e-05, "loss": 1.7155, "step": 7381 }, { "epoch": 0.7668017035421211, "grad_norm": 0.4543936252593994, "learning_rate": 1.2828568777692191e-05, "loss": 1.6919, "step": 7382 }, { "epoch": 0.7669055780617015, "grad_norm": 0.4838084876537323, "learning_rate": 1.2817657984843746e-05, "loss": 1.7123, "step": 7383 }, { "epoch": 0.7670094525812818, "grad_norm": 0.422134667634964, "learning_rate": 1.280675115162467e-05, "loss": 1.7498, "step": 7384 }, { "epoch": 0.7671133271008621, "grad_norm": 0.44436314702033997, "learning_rate": 1.2795848279196515e-05, "loss": 1.6622, "step": 7385 }, { "epoch": 0.7672172016204425, "grad_norm": 0.42208799719810486, "learning_rate": 1.2784949368720295e-05, "loss": 1.7493, "step": 7386 }, { "epoch": 0.7673210761400229, "grad_norm": 0.47984281182289124, "learning_rate": 1.2774054421356668e-05, "loss": 1.767, "step": 7387 }, { "epoch": 0.7674249506596031, "grad_norm": 0.4420330226421356, "learning_rate": 1.2763163438265896e-05, "loss": 1.7475, "step": 7388 }, { "epoch": 0.7675288251791835, "grad_norm": 0.41784390807151794, "learning_rate": 1.2752276420607735e-05, "loss": 1.756, "step": 7389 }, { "epoch": 0.7676326996987639, "grad_norm": 0.6316620707511902, "learning_rate": 1.2741393369541604e-05, "loss": 1.8209, "step": 7390 }, { "epoch": 0.7677365742183443, "grad_norm": 0.39647650718688965, "learning_rate": 1.2730514286226435e-05, "loss": 1.5466, "step": 7391 }, { "epoch": 0.7678404487379246, "grad_norm": 0.41624629497528076, "learning_rate": 1.2719639171820812e-05, "loss": 1.6497, "step": 7392 }, { "epoch": 0.767944323257505, "grad_norm": 0.4231652617454529, "learning_rate": 1.2708768027482804e-05, "loss": 1.3777, "step": 7393 }, { "epoch": 0.7680481977770853, "grad_norm": 0.42754560708999634, "learning_rate": 1.2697900854370103e-05, "loss": 1.6038, "step": 7394 }, { "epoch": 0.7681520722966656, "grad_norm": 0.4023604691028595, "learning_rate": 1.2687037653640026e-05, "loss": 1.6013, "step": 7395 }, { "epoch": 0.768255946816246, "grad_norm": 0.4051744043827057, "learning_rate": 1.2676178426449353e-05, "loss": 1.6607, "step": 7396 }, { "epoch": 0.7683598213358264, "grad_norm": 0.39897581934928894, "learning_rate": 1.2665323173954557e-05, "loss": 1.659, "step": 7397 }, { "epoch": 0.7684636958554066, "grad_norm": 0.41026565432548523, "learning_rate": 1.2654471897311627e-05, "loss": 1.6306, "step": 7398 }, { "epoch": 0.768567570374987, "grad_norm": 0.42027774453163147, "learning_rate": 1.2643624597676123e-05, "loss": 1.5395, "step": 7399 }, { "epoch": 0.7686714448945674, "grad_norm": 0.3966914117336273, "learning_rate": 1.2632781276203216e-05, "loss": 1.6195, "step": 7400 }, { "epoch": 0.7687753194141477, "grad_norm": 0.40674200654029846, "learning_rate": 1.2621941934047626e-05, "loss": 1.5897, "step": 7401 }, { "epoch": 0.7688791939337281, "grad_norm": 0.3980896770954132, "learning_rate": 1.2611106572363663e-05, "loss": 1.4635, "step": 7402 }, { "epoch": 0.7689830684533084, "grad_norm": 0.43104100227355957, "learning_rate": 1.2600275192305184e-05, "loss": 1.6104, "step": 7403 }, { "epoch": 0.7690869429728887, "grad_norm": 0.4664388597011566, "learning_rate": 1.2589447795025699e-05, "loss": 1.7406, "step": 7404 }, { "epoch": 0.7691908174924691, "grad_norm": 0.4235634505748749, "learning_rate": 1.2578624381678183e-05, "loss": 1.7893, "step": 7405 }, { "epoch": 0.7692946920120495, "grad_norm": 0.3953429162502289, "learning_rate": 1.2567804953415286e-05, "loss": 1.5755, "step": 7406 }, { "epoch": 0.7693985665316297, "grad_norm": 0.45959627628326416, "learning_rate": 1.2556989511389183e-05, "loss": 1.6138, "step": 7407 }, { "epoch": 0.7695024410512101, "grad_norm": 0.4262646436691284, "learning_rate": 1.2546178056751623e-05, "loss": 1.5875, "step": 7408 }, { "epoch": 0.7696063155707905, "grad_norm": 0.4125349819660187, "learning_rate": 1.2535370590653955e-05, "loss": 1.5919, "step": 7409 }, { "epoch": 0.7697101900903708, "grad_norm": 0.40705251693725586, "learning_rate": 1.2524567114247082e-05, "loss": 1.6622, "step": 7410 }, { "epoch": 0.7698140646099512, "grad_norm": 0.4188450872898102, "learning_rate": 1.2513767628681489e-05, "loss": 1.5001, "step": 7411 }, { "epoch": 0.7699179391295315, "grad_norm": 0.430321604013443, "learning_rate": 1.2502972135107227e-05, "loss": 1.6887, "step": 7412 }, { "epoch": 0.7700218136491118, "grad_norm": 0.4586038291454315, "learning_rate": 1.2492180634673978e-05, "loss": 1.6159, "step": 7413 }, { "epoch": 0.7701256881686922, "grad_norm": 0.4472750425338745, "learning_rate": 1.24813931285309e-05, "loss": 1.6015, "step": 7414 }, { "epoch": 0.7702295626882726, "grad_norm": 0.404680997133255, "learning_rate": 1.2470609617826784e-05, "loss": 1.3466, "step": 7415 }, { "epoch": 0.770333437207853, "grad_norm": 0.44199302792549133, "learning_rate": 1.2459830103710019e-05, "loss": 1.7882, "step": 7416 }, { "epoch": 0.7704373117274332, "grad_norm": 0.4036124050617218, "learning_rate": 1.2449054587328519e-05, "loss": 1.6455, "step": 7417 }, { "epoch": 0.7705411862470136, "grad_norm": 0.43482506275177, "learning_rate": 1.2438283069829799e-05, "loss": 1.7585, "step": 7418 }, { "epoch": 0.770645060766594, "grad_norm": 0.3882008194923401, "learning_rate": 1.2427515552360924e-05, "loss": 1.502, "step": 7419 }, { "epoch": 0.7707489352861743, "grad_norm": 0.44999149441719055, "learning_rate": 1.2416752036068596e-05, "loss": 1.7382, "step": 7420 }, { "epoch": 0.7708528098057547, "grad_norm": 0.4068087935447693, "learning_rate": 1.2405992522099002e-05, "loss": 1.5567, "step": 7421 }, { "epoch": 0.770956684325335, "grad_norm": 0.4564858675003052, "learning_rate": 1.2395237011597949e-05, "loss": 1.8138, "step": 7422 }, { "epoch": 0.7710605588449153, "grad_norm": 0.4281824231147766, "learning_rate": 1.2384485505710847e-05, "loss": 1.6393, "step": 7423 }, { "epoch": 0.7711644333644957, "grad_norm": 0.3930261731147766, "learning_rate": 1.2373738005582603e-05, "loss": 1.5276, "step": 7424 }, { "epoch": 0.7712683078840761, "grad_norm": 0.4548739790916443, "learning_rate": 1.2362994512357779e-05, "loss": 1.7476, "step": 7425 }, { "epoch": 0.7713721824036563, "grad_norm": 0.4831191897392273, "learning_rate": 1.235225502718046e-05, "loss": 1.8217, "step": 7426 }, { "epoch": 0.7714760569232367, "grad_norm": 0.41000956296920776, "learning_rate": 1.2341519551194314e-05, "loss": 1.5592, "step": 7427 }, { "epoch": 0.7715799314428171, "grad_norm": 0.46288132667541504, "learning_rate": 1.2330788085542589e-05, "loss": 1.7773, "step": 7428 }, { "epoch": 0.7716838059623974, "grad_norm": 0.4005960524082184, "learning_rate": 1.2320060631368103e-05, "loss": 1.6483, "step": 7429 }, { "epoch": 0.7717876804819778, "grad_norm": 0.4161857068538666, "learning_rate": 1.2309337189813242e-05, "loss": 1.5655, "step": 7430 }, { "epoch": 0.7718915550015581, "grad_norm": 0.4241454601287842, "learning_rate": 1.229861776201996e-05, "loss": 1.5819, "step": 7431 }, { "epoch": 0.7719954295211384, "grad_norm": 0.44512662291526794, "learning_rate": 1.2287902349129832e-05, "loss": 1.5958, "step": 7432 }, { "epoch": 0.7720993040407188, "grad_norm": 0.42898502945899963, "learning_rate": 1.2277190952283912e-05, "loss": 1.6924, "step": 7433 }, { "epoch": 0.7722031785602992, "grad_norm": 0.4218330681324005, "learning_rate": 1.2266483572622916e-05, "loss": 1.6906, "step": 7434 }, { "epoch": 0.7723070530798795, "grad_norm": 0.4153681695461273, "learning_rate": 1.2255780211287094e-05, "loss": 1.718, "step": 7435 }, { "epoch": 0.7724109275994598, "grad_norm": 0.443933367729187, "learning_rate": 1.2245080869416258e-05, "loss": 1.636, "step": 7436 }, { "epoch": 0.7725148021190402, "grad_norm": 0.42244407534599304, "learning_rate": 1.2234385548149813e-05, "loss": 1.7592, "step": 7437 }, { "epoch": 0.7726186766386205, "grad_norm": 0.4867939054965973, "learning_rate": 1.2223694248626721e-05, "loss": 1.7048, "step": 7438 }, { "epoch": 0.7727225511582009, "grad_norm": 0.45962727069854736, "learning_rate": 1.2213006971985524e-05, "loss": 1.808, "step": 7439 }, { "epoch": 0.7728264256777813, "grad_norm": 0.44696059823036194, "learning_rate": 1.2202323719364323e-05, "loss": 1.6744, "step": 7440 }, { "epoch": 0.7729303001973616, "grad_norm": 0.3926789462566376, "learning_rate": 1.2191644491900822e-05, "loss": 1.5021, "step": 7441 }, { "epoch": 0.7730341747169419, "grad_norm": 0.4703446626663208, "learning_rate": 1.2180969290732286e-05, "loss": 1.7047, "step": 7442 }, { "epoch": 0.7731380492365223, "grad_norm": 0.4337899386882782, "learning_rate": 1.2170298116995493e-05, "loss": 1.5239, "step": 7443 }, { "epoch": 0.7732419237561027, "grad_norm": 0.39774975180625916, "learning_rate": 1.2159630971826879e-05, "loss": 1.4872, "step": 7444 }, { "epoch": 0.773345798275683, "grad_norm": 0.47527754306793213, "learning_rate": 1.2148967856362398e-05, "loss": 1.813, "step": 7445 }, { "epoch": 0.7734496727952633, "grad_norm": 0.3975711166858673, "learning_rate": 1.2138308771737594e-05, "loss": 1.5849, "step": 7446 }, { "epoch": 0.7735535473148437, "grad_norm": 0.44653868675231934, "learning_rate": 1.2127653719087572e-05, "loss": 1.7053, "step": 7447 }, { "epoch": 0.773657421834424, "grad_norm": 0.4855223298072815, "learning_rate": 1.2117002699547015e-05, "loss": 1.7733, "step": 7448 }, { "epoch": 0.7737612963540044, "grad_norm": 0.464631050825119, "learning_rate": 1.2106355714250172e-05, "loss": 1.7352, "step": 7449 }, { "epoch": 0.7738651708735848, "grad_norm": 0.44896233081817627, "learning_rate": 1.2095712764330847e-05, "loss": 1.9348, "step": 7450 }, { "epoch": 0.773969045393165, "grad_norm": 0.4480723738670349, "learning_rate": 1.2085073850922479e-05, "loss": 1.659, "step": 7451 }, { "epoch": 0.7740729199127454, "grad_norm": 0.469439297914505, "learning_rate": 1.2074438975157964e-05, "loss": 1.8332, "step": 7452 }, { "epoch": 0.7741767944323258, "grad_norm": 0.4098644256591797, "learning_rate": 1.2063808138169891e-05, "loss": 1.3752, "step": 7453 }, { "epoch": 0.7742806689519061, "grad_norm": 0.3945968449115753, "learning_rate": 1.205318134109033e-05, "loss": 1.632, "step": 7454 }, { "epoch": 0.7743845434714864, "grad_norm": 0.38634219765663147, "learning_rate": 1.2042558585050961e-05, "loss": 1.5544, "step": 7455 }, { "epoch": 0.7744884179910668, "grad_norm": 0.45720741152763367, "learning_rate": 1.2031939871183023e-05, "loss": 1.8892, "step": 7456 }, { "epoch": 0.7745922925106471, "grad_norm": 0.44557642936706543, "learning_rate": 1.2021325200617329e-05, "loss": 1.6822, "step": 7457 }, { "epoch": 0.7746961670302275, "grad_norm": 0.4418318569660187, "learning_rate": 1.2010714574484255e-05, "loss": 1.6621, "step": 7458 }, { "epoch": 0.7748000415498079, "grad_norm": 0.4305969774723053, "learning_rate": 1.2000107993913739e-05, "loss": 1.5825, "step": 7459 }, { "epoch": 0.7749039160693881, "grad_norm": 0.385640025138855, "learning_rate": 1.1989505460035333e-05, "loss": 1.573, "step": 7460 }, { "epoch": 0.7750077905889685, "grad_norm": 0.45117321610450745, "learning_rate": 1.1978906973978083e-05, "loss": 1.68, "step": 7461 }, { "epoch": 0.7751116651085489, "grad_norm": 0.4692929983139038, "learning_rate": 1.1968312536870668e-05, "loss": 1.5407, "step": 7462 }, { "epoch": 0.7752155396281292, "grad_norm": 0.38421300053596497, "learning_rate": 1.1957722149841317e-05, "loss": 1.4685, "step": 7463 }, { "epoch": 0.7753194141477096, "grad_norm": 0.41444119811058044, "learning_rate": 1.1947135814017812e-05, "loss": 1.4937, "step": 7464 }, { "epoch": 0.7754232886672899, "grad_norm": 0.4278319478034973, "learning_rate": 1.1936553530527523e-05, "loss": 1.5106, "step": 7465 }, { "epoch": 0.7755271631868702, "grad_norm": 0.39915332198143005, "learning_rate": 1.1925975300497371e-05, "loss": 1.4574, "step": 7466 }, { "epoch": 0.7756310377064506, "grad_norm": 0.44178900122642517, "learning_rate": 1.1915401125053866e-05, "loss": 1.749, "step": 7467 }, { "epoch": 0.775734912226031, "grad_norm": 0.39515554904937744, "learning_rate": 1.1904831005323053e-05, "loss": 1.5465, "step": 7468 }, { "epoch": 0.7758387867456114, "grad_norm": 0.3899015486240387, "learning_rate": 1.18942649424306e-05, "loss": 1.487, "step": 7469 }, { "epoch": 0.7759426612651916, "grad_norm": 0.44565442204475403, "learning_rate": 1.1883702937501705e-05, "loss": 1.6149, "step": 7470 }, { "epoch": 0.776046535784772, "grad_norm": 0.405678927898407, "learning_rate": 1.1873144991661106e-05, "loss": 1.567, "step": 7471 }, { "epoch": 0.7761504103043524, "grad_norm": 0.46768710017204285, "learning_rate": 1.1862591106033178e-05, "loss": 1.7902, "step": 7472 }, { "epoch": 0.7762542848239327, "grad_norm": 0.4093432128429413, "learning_rate": 1.185204128174181e-05, "loss": 1.6518, "step": 7473 }, { "epoch": 0.776358159343513, "grad_norm": 0.4388825297355652, "learning_rate": 1.1841495519910485e-05, "loss": 1.6971, "step": 7474 }, { "epoch": 0.7764620338630934, "grad_norm": 0.4254426956176758, "learning_rate": 1.1830953821662232e-05, "loss": 1.6166, "step": 7475 }, { "epoch": 0.7765659083826737, "grad_norm": 0.42285576462745667, "learning_rate": 1.182041618811967e-05, "loss": 1.611, "step": 7476 }, { "epoch": 0.7766697829022541, "grad_norm": 0.4236442446708679, "learning_rate": 1.1809882620404972e-05, "loss": 1.586, "step": 7477 }, { "epoch": 0.7767736574218345, "grad_norm": 0.4397282600402832, "learning_rate": 1.1799353119639867e-05, "loss": 1.7642, "step": 7478 }, { "epoch": 0.7768775319414147, "grad_norm": 0.41384944319725037, "learning_rate": 1.1788827686945708e-05, "loss": 1.6689, "step": 7479 }, { "epoch": 0.7769814064609951, "grad_norm": 0.44808679819107056, "learning_rate": 1.177830632344331e-05, "loss": 1.8645, "step": 7480 }, { "epoch": 0.7770852809805755, "grad_norm": 0.3973611295223236, "learning_rate": 1.1767789030253168e-05, "loss": 1.6404, "step": 7481 }, { "epoch": 0.7771891555001558, "grad_norm": 0.44642767310142517, "learning_rate": 1.1757275808495266e-05, "loss": 1.7545, "step": 7482 }, { "epoch": 0.7772930300197362, "grad_norm": 0.4089753329753876, "learning_rate": 1.1746766659289188e-05, "loss": 1.6952, "step": 7483 }, { "epoch": 0.7773969045393165, "grad_norm": 0.41882163286209106, "learning_rate": 1.1736261583754072e-05, "loss": 1.5784, "step": 7484 }, { "epoch": 0.7775007790588968, "grad_norm": 0.489686518907547, "learning_rate": 1.1725760583008633e-05, "loss": 1.7849, "step": 7485 }, { "epoch": 0.7776046535784772, "grad_norm": 0.41849491000175476, "learning_rate": 1.171526365817114e-05, "loss": 1.6088, "step": 7486 }, { "epoch": 0.7777085280980576, "grad_norm": 0.40229809284210205, "learning_rate": 1.1704770810359417e-05, "loss": 1.5425, "step": 7487 }, { "epoch": 0.7778124026176378, "grad_norm": 0.3939957916736603, "learning_rate": 1.1694282040690919e-05, "loss": 1.5142, "step": 7488 }, { "epoch": 0.7779162771372182, "grad_norm": 0.46565282344818115, "learning_rate": 1.168379735028256e-05, "loss": 1.4844, "step": 7489 }, { "epoch": 0.7780201516567986, "grad_norm": 0.4124033451080322, "learning_rate": 1.1673316740250916e-05, "loss": 1.647, "step": 7490 }, { "epoch": 0.7781240261763789, "grad_norm": 0.463223397731781, "learning_rate": 1.1662840211712084e-05, "loss": 1.6546, "step": 7491 }, { "epoch": 0.7782279006959593, "grad_norm": 0.40807053446769714, "learning_rate": 1.1652367765781724e-05, "loss": 1.7317, "step": 7492 }, { "epoch": 0.7783317752155396, "grad_norm": 0.4360339045524597, "learning_rate": 1.164189940357508e-05, "loss": 1.6996, "step": 7493 }, { "epoch": 0.77843564973512, "grad_norm": 0.4180784821510315, "learning_rate": 1.1631435126206924e-05, "loss": 1.5694, "step": 7494 }, { "epoch": 0.7785395242547003, "grad_norm": 0.4407843053340912, "learning_rate": 1.1620974934791673e-05, "loss": 1.5474, "step": 7495 }, { "epoch": 0.7786433987742807, "grad_norm": 0.36754143238067627, "learning_rate": 1.1610518830443195e-05, "loss": 1.3948, "step": 7496 }, { "epoch": 0.7787472732938611, "grad_norm": 0.4531268775463104, "learning_rate": 1.1600066814275028e-05, "loss": 1.7093, "step": 7497 }, { "epoch": 0.7788511478134413, "grad_norm": 0.4329068958759308, "learning_rate": 1.1589618887400223e-05, "loss": 1.6267, "step": 7498 }, { "epoch": 0.7789550223330217, "grad_norm": 0.385638564825058, "learning_rate": 1.1579175050931369e-05, "loss": 1.4813, "step": 7499 }, { "epoch": 0.7790588968526021, "grad_norm": 0.4533402621746063, "learning_rate": 1.1568735305980693e-05, "loss": 1.6934, "step": 7500 }, { "epoch": 0.7791627713721824, "grad_norm": 0.41636162996292114, "learning_rate": 1.1558299653659931e-05, "loss": 1.7808, "step": 7501 }, { "epoch": 0.7792666458917628, "grad_norm": 0.44357746839523315, "learning_rate": 1.15478680950804e-05, "loss": 1.5262, "step": 7502 }, { "epoch": 0.7793705204113431, "grad_norm": 0.43575209379196167, "learning_rate": 1.1537440631352975e-05, "loss": 1.6568, "step": 7503 }, { "epoch": 0.7794743949309234, "grad_norm": 0.4117165803909302, "learning_rate": 1.1527017263588108e-05, "loss": 1.5999, "step": 7504 }, { "epoch": 0.7795782694505038, "grad_norm": 0.40848150849342346, "learning_rate": 1.15165979928958e-05, "loss": 1.4227, "step": 7505 }, { "epoch": 0.7796821439700842, "grad_norm": 0.42398545145988464, "learning_rate": 1.1506182820385608e-05, "loss": 1.6902, "step": 7506 }, { "epoch": 0.7797860184896644, "grad_norm": 0.4009295105934143, "learning_rate": 1.149577174716671e-05, "loss": 1.543, "step": 7507 }, { "epoch": 0.7798898930092448, "grad_norm": 0.48386311531066895, "learning_rate": 1.148536477434775e-05, "loss": 1.6154, "step": 7508 }, { "epoch": 0.7799937675288252, "grad_norm": 0.43705689907073975, "learning_rate": 1.1474961903037029e-05, "loss": 1.6337, "step": 7509 }, { "epoch": 0.7800976420484055, "grad_norm": 0.3995164632797241, "learning_rate": 1.146456313434236e-05, "loss": 1.6047, "step": 7510 }, { "epoch": 0.7802015165679859, "grad_norm": 0.40124985575675964, "learning_rate": 1.145416846937113e-05, "loss": 1.5502, "step": 7511 }, { "epoch": 0.7803053910875662, "grad_norm": 0.4591343104839325, "learning_rate": 1.1443777909230286e-05, "loss": 1.8176, "step": 7512 }, { "epoch": 0.7804092656071465, "grad_norm": 0.4121926724910736, "learning_rate": 1.1433391455026343e-05, "loss": 1.6699, "step": 7513 }, { "epoch": 0.7805131401267269, "grad_norm": 0.4800514876842499, "learning_rate": 1.1423009107865384e-05, "loss": 1.9029, "step": 7514 }, { "epoch": 0.7806170146463073, "grad_norm": 0.44299235939979553, "learning_rate": 1.1412630868853024e-05, "loss": 1.6599, "step": 7515 }, { "epoch": 0.7807208891658876, "grad_norm": 0.5640523433685303, "learning_rate": 1.1402256739094513e-05, "loss": 1.9605, "step": 7516 }, { "epoch": 0.7808247636854679, "grad_norm": 0.4083343744277954, "learning_rate": 1.1391886719694561e-05, "loss": 1.6042, "step": 7517 }, { "epoch": 0.7809286382050483, "grad_norm": 0.4362529516220093, "learning_rate": 1.1381520811757529e-05, "loss": 1.6724, "step": 7518 }, { "epoch": 0.7810325127246287, "grad_norm": 0.3871046006679535, "learning_rate": 1.1371159016387296e-05, "loss": 1.55, "step": 7519 }, { "epoch": 0.781136387244209, "grad_norm": 0.41727960109710693, "learning_rate": 1.136080133468731e-05, "loss": 1.6557, "step": 7520 }, { "epoch": 0.7812402617637894, "grad_norm": 0.41626328229904175, "learning_rate": 1.1350447767760586e-05, "loss": 1.642, "step": 7521 }, { "epoch": 0.7813441362833697, "grad_norm": 0.41060563921928406, "learning_rate": 1.134009831670968e-05, "loss": 1.6731, "step": 7522 }, { "epoch": 0.78144801080295, "grad_norm": 0.407531201839447, "learning_rate": 1.1329752982636776e-05, "loss": 1.6211, "step": 7523 }, { "epoch": 0.7815518853225304, "grad_norm": 0.38882845640182495, "learning_rate": 1.1319411766643512e-05, "loss": 1.5965, "step": 7524 }, { "epoch": 0.7816557598421108, "grad_norm": 0.4430273771286011, "learning_rate": 1.1309074669831189e-05, "loss": 1.7404, "step": 7525 }, { "epoch": 0.781759634361691, "grad_norm": 0.4414060115814209, "learning_rate": 1.1298741693300624e-05, "loss": 1.6904, "step": 7526 }, { "epoch": 0.7818635088812714, "grad_norm": 0.4308008849620819, "learning_rate": 1.1288412838152157e-05, "loss": 1.8278, "step": 7527 }, { "epoch": 0.7819673834008518, "grad_norm": 0.4290758967399597, "learning_rate": 1.1278088105485778e-05, "loss": 1.714, "step": 7528 }, { "epoch": 0.7820712579204321, "grad_norm": 0.4313497841358185, "learning_rate": 1.1267767496400972e-05, "loss": 1.6401, "step": 7529 }, { "epoch": 0.7821751324400125, "grad_norm": 0.4212192893028259, "learning_rate": 1.1257451011996805e-05, "loss": 1.6266, "step": 7530 }, { "epoch": 0.7822790069595928, "grad_norm": 0.44428661465644836, "learning_rate": 1.1247138653371903e-05, "loss": 1.6743, "step": 7531 }, { "epoch": 0.7823828814791731, "grad_norm": 0.44234758615493774, "learning_rate": 1.123683042162445e-05, "loss": 1.6527, "step": 7532 }, { "epoch": 0.7824867559987535, "grad_norm": 0.4521244168281555, "learning_rate": 1.1226526317852193e-05, "loss": 1.8607, "step": 7533 }, { "epoch": 0.7825906305183339, "grad_norm": 0.4284451901912689, "learning_rate": 1.1216226343152424e-05, "loss": 1.5447, "step": 7534 }, { "epoch": 0.7826945050379142, "grad_norm": 0.39966773986816406, "learning_rate": 1.1205930498622053e-05, "loss": 1.6129, "step": 7535 }, { "epoch": 0.7827983795574945, "grad_norm": 0.4256837069988251, "learning_rate": 1.1195638785357448e-05, "loss": 1.5621, "step": 7536 }, { "epoch": 0.7829022540770749, "grad_norm": 0.4508436620235443, "learning_rate": 1.1185351204454647e-05, "loss": 1.7632, "step": 7537 }, { "epoch": 0.7830061285966552, "grad_norm": 0.40548187494277954, "learning_rate": 1.1175067757009171e-05, "loss": 1.5052, "step": 7538 }, { "epoch": 0.7831100031162356, "grad_norm": 0.43622684478759766, "learning_rate": 1.1164788444116131e-05, "loss": 1.7971, "step": 7539 }, { "epoch": 0.783213877635816, "grad_norm": 0.4189201891422272, "learning_rate": 1.1154513266870204e-05, "loss": 1.5101, "step": 7540 }, { "epoch": 0.7833177521553962, "grad_norm": 0.4424949288368225, "learning_rate": 1.1144242226365599e-05, "loss": 1.8468, "step": 7541 }, { "epoch": 0.7834216266749766, "grad_norm": 0.4143393635749817, "learning_rate": 1.1133975323696111e-05, "loss": 1.6425, "step": 7542 }, { "epoch": 0.783525501194557, "grad_norm": 0.43571484088897705, "learning_rate": 1.1123712559955075e-05, "loss": 1.5772, "step": 7543 }, { "epoch": 0.7836293757141374, "grad_norm": 0.42268067598342896, "learning_rate": 1.1113453936235418e-05, "loss": 1.632, "step": 7544 }, { "epoch": 0.7837332502337176, "grad_norm": 0.4198543131351471, "learning_rate": 1.1103199453629587e-05, "loss": 1.6937, "step": 7545 }, { "epoch": 0.783837124753298, "grad_norm": 0.4531916677951813, "learning_rate": 1.1092949113229612e-05, "loss": 1.7399, "step": 7546 }, { "epoch": 0.7839409992728784, "grad_norm": 0.43174856901168823, "learning_rate": 1.1082702916127063e-05, "loss": 1.6816, "step": 7547 }, { "epoch": 0.7840448737924587, "grad_norm": 0.3892672061920166, "learning_rate": 1.1072460863413092e-05, "loss": 1.3392, "step": 7548 }, { "epoch": 0.7841487483120391, "grad_norm": 0.4050576388835907, "learning_rate": 1.1062222956178391e-05, "loss": 1.6167, "step": 7549 }, { "epoch": 0.7842526228316195, "grad_norm": 0.43549004197120667, "learning_rate": 1.1051989195513207e-05, "loss": 1.7752, "step": 7550 }, { "epoch": 0.7843564973511997, "grad_norm": 0.4682556390762329, "learning_rate": 1.1041759582507393e-05, "loss": 1.6396, "step": 7551 }, { "epoch": 0.7844603718707801, "grad_norm": 0.3986964821815491, "learning_rate": 1.1031534118250269e-05, "loss": 1.697, "step": 7552 }, { "epoch": 0.7845642463903605, "grad_norm": 0.4416189193725586, "learning_rate": 1.1021312803830813e-05, "loss": 1.5987, "step": 7553 }, { "epoch": 0.7846681209099408, "grad_norm": 0.39252594113349915, "learning_rate": 1.101109564033751e-05, "loss": 1.5662, "step": 7554 }, { "epoch": 0.7847719954295211, "grad_norm": 0.4105452299118042, "learning_rate": 1.1000882628858367e-05, "loss": 1.4838, "step": 7555 }, { "epoch": 0.7848758699491015, "grad_norm": 0.409593790769577, "learning_rate": 1.0990673770481036e-05, "loss": 1.6211, "step": 7556 }, { "epoch": 0.7849797444686818, "grad_norm": 0.40311574935913086, "learning_rate": 1.0980469066292659e-05, "loss": 1.5268, "step": 7557 }, { "epoch": 0.7850836189882622, "grad_norm": 0.4237530827522278, "learning_rate": 1.0970268517379967e-05, "loss": 1.7556, "step": 7558 }, { "epoch": 0.7851874935078426, "grad_norm": 0.4404970407485962, "learning_rate": 1.0960072124829235e-05, "loss": 1.5014, "step": 7559 }, { "epoch": 0.7852913680274228, "grad_norm": 0.42267706990242004, "learning_rate": 1.0949879889726294e-05, "loss": 1.6162, "step": 7560 }, { "epoch": 0.7853952425470032, "grad_norm": 0.44833898544311523, "learning_rate": 1.0939691813156544e-05, "loss": 1.7035, "step": 7561 }, { "epoch": 0.7854991170665836, "grad_norm": 0.4051210284233093, "learning_rate": 1.0929507896204921e-05, "loss": 1.6337, "step": 7562 }, { "epoch": 0.7856029915861639, "grad_norm": 0.4495202898979187, "learning_rate": 1.0919328139955976e-05, "loss": 1.6693, "step": 7563 }, { "epoch": 0.7857068661057443, "grad_norm": 0.4015834331512451, "learning_rate": 1.0909152545493712e-05, "loss": 1.6461, "step": 7564 }, { "epoch": 0.7858107406253246, "grad_norm": 0.4055159091949463, "learning_rate": 1.089898111390179e-05, "loss": 1.6278, "step": 7565 }, { "epoch": 0.7859146151449049, "grad_norm": 0.3767836093902588, "learning_rate": 1.0888813846263391e-05, "loss": 1.5302, "step": 7566 }, { "epoch": 0.7860184896644853, "grad_norm": 0.4378937780857086, "learning_rate": 1.0878650743661229e-05, "loss": 1.6846, "step": 7567 }, { "epoch": 0.7861223641840657, "grad_norm": 0.43852996826171875, "learning_rate": 1.086849180717761e-05, "loss": 1.6535, "step": 7568 }, { "epoch": 0.7862262387036459, "grad_norm": 0.42073825001716614, "learning_rate": 1.0858337037894372e-05, "loss": 1.6498, "step": 7569 }, { "epoch": 0.7863301132232263, "grad_norm": 0.44468173384666443, "learning_rate": 1.0848186436892927e-05, "loss": 1.5972, "step": 7570 }, { "epoch": 0.7864339877428067, "grad_norm": 0.4390113055706024, "learning_rate": 1.0838040005254213e-05, "loss": 1.618, "step": 7571 }, { "epoch": 0.7865378622623871, "grad_norm": 0.3942447602748871, "learning_rate": 1.082789774405878e-05, "loss": 1.5818, "step": 7572 }, { "epoch": 0.7866417367819674, "grad_norm": 0.4129110276699066, "learning_rate": 1.0817759654386684e-05, "loss": 1.512, "step": 7573 }, { "epoch": 0.7867456113015477, "grad_norm": 0.4303886592388153, "learning_rate": 1.0807625737317545e-05, "loss": 1.6786, "step": 7574 }, { "epoch": 0.7868494858211281, "grad_norm": 0.47462910413742065, "learning_rate": 1.0797495993930562e-05, "loss": 1.7152, "step": 7575 }, { "epoch": 0.7869533603407084, "grad_norm": 0.41988933086395264, "learning_rate": 1.0787370425304454e-05, "loss": 1.7505, "step": 7576 }, { "epoch": 0.7870572348602888, "grad_norm": 0.41259077191352844, "learning_rate": 1.0777249032517534e-05, "loss": 1.6145, "step": 7577 }, { "epoch": 0.7871611093798692, "grad_norm": 0.3950294256210327, "learning_rate": 1.0767131816647624e-05, "loss": 1.4186, "step": 7578 }, { "epoch": 0.7872649838994494, "grad_norm": 0.4047811031341553, "learning_rate": 1.0757018778772181e-05, "loss": 1.7788, "step": 7579 }, { "epoch": 0.7873688584190298, "grad_norm": 0.46056464314460754, "learning_rate": 1.0746909919968102e-05, "loss": 1.7623, "step": 7580 }, { "epoch": 0.7874727329386102, "grad_norm": 0.4839528203010559, "learning_rate": 1.0736805241311937e-05, "loss": 1.7721, "step": 7581 }, { "epoch": 0.7875766074581905, "grad_norm": 0.44515979290008545, "learning_rate": 1.0726704743879767e-05, "loss": 1.6909, "step": 7582 }, { "epoch": 0.7876804819777709, "grad_norm": 0.4091649353504181, "learning_rate": 1.0716608428747177e-05, "loss": 1.6288, "step": 7583 }, { "epoch": 0.7877843564973512, "grad_norm": 0.4144968092441559, "learning_rate": 1.0706516296989372e-05, "loss": 1.7996, "step": 7584 }, { "epoch": 0.7878882310169315, "grad_norm": 0.3900415301322937, "learning_rate": 1.0696428349681087e-05, "loss": 1.5053, "step": 7585 }, { "epoch": 0.7879921055365119, "grad_norm": 0.4206119775772095, "learning_rate": 1.0686344587896597e-05, "loss": 1.6293, "step": 7586 }, { "epoch": 0.7880959800560923, "grad_norm": 0.421841025352478, "learning_rate": 1.0676265012709752e-05, "loss": 1.628, "step": 7587 }, { "epoch": 0.7881998545756725, "grad_norm": 0.45985618233680725, "learning_rate": 1.066618962519395e-05, "loss": 1.8276, "step": 7588 }, { "epoch": 0.7883037290952529, "grad_norm": 0.414380818605423, "learning_rate": 1.0656118426422134e-05, "loss": 1.6578, "step": 7589 }, { "epoch": 0.7884076036148333, "grad_norm": 0.4441922903060913, "learning_rate": 1.06460514174668e-05, "loss": 1.74, "step": 7590 }, { "epoch": 0.7885114781344136, "grad_norm": 0.45518070459365845, "learning_rate": 1.0635988599400043e-05, "loss": 1.7842, "step": 7591 }, { "epoch": 0.788615352653994, "grad_norm": 0.3902077078819275, "learning_rate": 1.0625929973293419e-05, "loss": 1.5922, "step": 7592 }, { "epoch": 0.7887192271735743, "grad_norm": 0.4476879835128784, "learning_rate": 1.061587554021814e-05, "loss": 1.7011, "step": 7593 }, { "epoch": 0.7888231016931546, "grad_norm": 0.41619980335235596, "learning_rate": 1.0605825301244909e-05, "loss": 1.683, "step": 7594 }, { "epoch": 0.788926976212735, "grad_norm": 0.40933772921562195, "learning_rate": 1.0595779257444e-05, "loss": 1.4897, "step": 7595 }, { "epoch": 0.7890308507323154, "grad_norm": 0.4322732388973236, "learning_rate": 1.0585737409885227e-05, "loss": 1.6298, "step": 7596 }, { "epoch": 0.7891347252518958, "grad_norm": 0.4134397804737091, "learning_rate": 1.057569975963797e-05, "loss": 1.6549, "step": 7597 }, { "epoch": 0.789238599771476, "grad_norm": 0.44103166460990906, "learning_rate": 1.0565666307771193e-05, "loss": 1.7137, "step": 7598 }, { "epoch": 0.7893424742910564, "grad_norm": 0.4072246253490448, "learning_rate": 1.0555637055353324e-05, "loss": 1.5784, "step": 7599 }, { "epoch": 0.7894463488106368, "grad_norm": 0.44003406167030334, "learning_rate": 1.054561200345245e-05, "loss": 1.6315, "step": 7600 }, { "epoch": 0.7895502233302171, "grad_norm": 0.4993717074394226, "learning_rate": 1.0535591153136137e-05, "loss": 1.7496, "step": 7601 }, { "epoch": 0.7896540978497975, "grad_norm": 0.41622164845466614, "learning_rate": 1.0525574505471531e-05, "loss": 1.6808, "step": 7602 }, { "epoch": 0.7897579723693778, "grad_norm": 0.3730601370334625, "learning_rate": 1.051556206152533e-05, "loss": 1.5215, "step": 7603 }, { "epoch": 0.7898618468889581, "grad_norm": 0.41570550203323364, "learning_rate": 1.0505553822363783e-05, "loss": 1.6401, "step": 7604 }, { "epoch": 0.7899657214085385, "grad_norm": 0.41199877858161926, "learning_rate": 1.0495549789052678e-05, "loss": 1.613, "step": 7605 }, { "epoch": 0.7900695959281189, "grad_norm": 0.4056338667869568, "learning_rate": 1.0485549962657365e-05, "loss": 1.6405, "step": 7606 }, { "epoch": 0.7901734704476991, "grad_norm": 0.39273974299430847, "learning_rate": 1.0475554344242783e-05, "loss": 1.5479, "step": 7607 }, { "epoch": 0.7902773449672795, "grad_norm": 0.454589307308197, "learning_rate": 1.0465562934873335e-05, "loss": 1.8367, "step": 7608 }, { "epoch": 0.7903812194868599, "grad_norm": 0.4377838373184204, "learning_rate": 1.0455575735613066e-05, "loss": 1.6501, "step": 7609 }, { "epoch": 0.7904850940064402, "grad_norm": 0.41390401124954224, "learning_rate": 1.0445592747525534e-05, "loss": 1.5864, "step": 7610 }, { "epoch": 0.7905889685260206, "grad_norm": 0.40852096676826477, "learning_rate": 1.0435613971673813e-05, "loss": 1.5591, "step": 7611 }, { "epoch": 0.790692843045601, "grad_norm": 0.43240541219711304, "learning_rate": 1.0425639409120602e-05, "loss": 1.6134, "step": 7612 }, { "epoch": 0.7907967175651812, "grad_norm": 0.4393400549888611, "learning_rate": 1.0415669060928108e-05, "loss": 1.7608, "step": 7613 }, { "epoch": 0.7909005920847616, "grad_norm": 0.4604494571685791, "learning_rate": 1.0405702928158085e-05, "loss": 1.7024, "step": 7614 }, { "epoch": 0.791004466604342, "grad_norm": 0.40346240997314453, "learning_rate": 1.0395741011871851e-05, "loss": 1.6004, "step": 7615 }, { "epoch": 0.7911083411239223, "grad_norm": 0.39214804768562317, "learning_rate": 1.0385783313130276e-05, "loss": 1.6282, "step": 7616 }, { "epoch": 0.7912122156435026, "grad_norm": 0.44647249579429626, "learning_rate": 1.0375829832993772e-05, "loss": 1.7222, "step": 7617 }, { "epoch": 0.791316090163083, "grad_norm": 0.4374501705169678, "learning_rate": 1.0365880572522302e-05, "loss": 1.7329, "step": 7618 }, { "epoch": 0.7914199646826633, "grad_norm": 0.4381657838821411, "learning_rate": 1.0355935532775419e-05, "loss": 1.5133, "step": 7619 }, { "epoch": 0.7915238392022437, "grad_norm": 0.4005827009677887, "learning_rate": 1.0345994714812135e-05, "loss": 1.4862, "step": 7620 }, { "epoch": 0.7916277137218241, "grad_norm": 0.40875616669654846, "learning_rate": 1.0336058119691122e-05, "loss": 1.5222, "step": 7621 }, { "epoch": 0.7917315882414044, "grad_norm": 0.4034888744354248, "learning_rate": 1.0326125748470522e-05, "loss": 1.6171, "step": 7622 }, { "epoch": 0.7918354627609847, "grad_norm": 0.42959120869636536, "learning_rate": 1.0316197602208067e-05, "loss": 1.7382, "step": 7623 }, { "epoch": 0.7919393372805651, "grad_norm": 0.5072987079620361, "learning_rate": 1.0306273681961014e-05, "loss": 1.8618, "step": 7624 }, { "epoch": 0.7920432118001455, "grad_norm": 0.4025663435459137, "learning_rate": 1.0296353988786184e-05, "loss": 1.7562, "step": 7625 }, { "epoch": 0.7921470863197257, "grad_norm": 0.4331921637058258, "learning_rate": 1.0286438523739978e-05, "loss": 1.5518, "step": 7626 }, { "epoch": 0.7922509608393061, "grad_norm": 0.46530386805534363, "learning_rate": 1.0276527287878268e-05, "loss": 1.831, "step": 7627 }, { "epoch": 0.7923548353588865, "grad_norm": 0.43432602286338806, "learning_rate": 1.0266620282256556e-05, "loss": 1.69, "step": 7628 }, { "epoch": 0.7924587098784668, "grad_norm": 0.39210620522499084, "learning_rate": 1.0256717507929852e-05, "loss": 1.4931, "step": 7629 }, { "epoch": 0.7925625843980472, "grad_norm": 0.4316072463989258, "learning_rate": 1.0246818965952727e-05, "loss": 1.5392, "step": 7630 }, { "epoch": 0.7926664589176275, "grad_norm": 0.422419011592865, "learning_rate": 1.0236924657379293e-05, "loss": 1.5485, "step": 7631 }, { "epoch": 0.7927703334372078, "grad_norm": 0.43081673979759216, "learning_rate": 1.0227034583263218e-05, "loss": 1.7733, "step": 7632 }, { "epoch": 0.7928742079567882, "grad_norm": 0.4367268979549408, "learning_rate": 1.0217148744657723e-05, "loss": 1.7832, "step": 7633 }, { "epoch": 0.7929780824763686, "grad_norm": 0.42134618759155273, "learning_rate": 1.0207267142615552e-05, "loss": 1.6996, "step": 7634 }, { "epoch": 0.7930819569959489, "grad_norm": 0.40299269556999207, "learning_rate": 1.0197389778189065e-05, "loss": 1.6218, "step": 7635 }, { "epoch": 0.7931858315155292, "grad_norm": 0.414371132850647, "learning_rate": 1.0187516652430062e-05, "loss": 1.6509, "step": 7636 }, { "epoch": 0.7932897060351096, "grad_norm": 0.4730742573738098, "learning_rate": 1.0177647766390002e-05, "loss": 1.7145, "step": 7637 }, { "epoch": 0.7933935805546899, "grad_norm": 0.43268048763275146, "learning_rate": 1.0167783121119845e-05, "loss": 1.3435, "step": 7638 }, { "epoch": 0.7934974550742703, "grad_norm": 0.43376022577285767, "learning_rate": 1.0157922717670054e-05, "loss": 1.5719, "step": 7639 }, { "epoch": 0.7936013295938507, "grad_norm": 0.40382814407348633, "learning_rate": 1.0148066557090724e-05, "loss": 1.6977, "step": 7640 }, { "epoch": 0.7937052041134309, "grad_norm": 0.45663517713546753, "learning_rate": 1.0138214640431454e-05, "loss": 1.7793, "step": 7641 }, { "epoch": 0.7938090786330113, "grad_norm": 0.4395454525947571, "learning_rate": 1.0128366968741387e-05, "loss": 1.7664, "step": 7642 }, { "epoch": 0.7939129531525917, "grad_norm": 0.4374726116657257, "learning_rate": 1.0118523543069226e-05, "loss": 1.6902, "step": 7643 }, { "epoch": 0.794016827672172, "grad_norm": 0.459427148103714, "learning_rate": 1.0108684364463222e-05, "loss": 1.6312, "step": 7644 }, { "epoch": 0.7941207021917523, "grad_norm": 0.42887747287750244, "learning_rate": 1.0098849433971169e-05, "loss": 1.6194, "step": 7645 }, { "epoch": 0.7942245767113327, "grad_norm": 0.4146133363246918, "learning_rate": 1.0089018752640394e-05, "loss": 1.6965, "step": 7646 }, { "epoch": 0.794328451230913, "grad_norm": 0.46396344900131226, "learning_rate": 1.0079192321517838e-05, "loss": 1.7115, "step": 7647 }, { "epoch": 0.7944323257504934, "grad_norm": 0.4100917875766754, "learning_rate": 1.0069370141649876e-05, "loss": 1.4832, "step": 7648 }, { "epoch": 0.7945362002700738, "grad_norm": 0.4542340040206909, "learning_rate": 1.0059552214082536e-05, "loss": 1.6654, "step": 7649 }, { "epoch": 0.7946400747896541, "grad_norm": 0.4383617043495178, "learning_rate": 1.0049738539861332e-05, "loss": 1.6049, "step": 7650 }, { "epoch": 0.7947439493092344, "grad_norm": 0.4077033996582031, "learning_rate": 1.0039929120031355e-05, "loss": 1.7055, "step": 7651 }, { "epoch": 0.7948478238288148, "grad_norm": 0.4490281641483307, "learning_rate": 1.0030123955637227e-05, "loss": 1.7024, "step": 7652 }, { "epoch": 0.7949516983483952, "grad_norm": 0.4433899223804474, "learning_rate": 1.0020323047723108e-05, "loss": 1.7098, "step": 7653 }, { "epoch": 0.7950555728679755, "grad_norm": 0.3918624818325043, "learning_rate": 1.0010526397332754e-05, "loss": 1.5875, "step": 7654 }, { "epoch": 0.7951594473875558, "grad_norm": 0.4222068190574646, "learning_rate": 1.0000734005509382e-05, "loss": 1.6725, "step": 7655 }, { "epoch": 0.7952633219071362, "grad_norm": 0.4067675471305847, "learning_rate": 9.990945873295848e-06, "loss": 1.6157, "step": 7656 }, { "epoch": 0.7953671964267165, "grad_norm": 0.4570964574813843, "learning_rate": 9.981162001734496e-06, "loss": 1.7816, "step": 7657 }, { "epoch": 0.7954710709462969, "grad_norm": 0.4113404452800751, "learning_rate": 9.97138239186723e-06, "loss": 1.6609, "step": 7658 }, { "epoch": 0.7955749454658773, "grad_norm": 0.4180930256843567, "learning_rate": 9.961607044735505e-06, "loss": 1.5903, "step": 7659 }, { "epoch": 0.7956788199854575, "grad_norm": 0.4389685094356537, "learning_rate": 9.951835961380317e-06, "loss": 1.655, "step": 7660 }, { "epoch": 0.7957826945050379, "grad_norm": 0.38533127307891846, "learning_rate": 9.942069142842214e-06, "loss": 1.5481, "step": 7661 }, { "epoch": 0.7958865690246183, "grad_norm": 0.4118013083934784, "learning_rate": 9.932306590161266e-06, "loss": 1.6456, "step": 7662 }, { "epoch": 0.7959904435441986, "grad_norm": 0.433296263217926, "learning_rate": 9.922548304377154e-06, "loss": 1.6836, "step": 7663 }, { "epoch": 0.796094318063779, "grad_norm": 0.41111084818840027, "learning_rate": 9.91279428652901e-06, "loss": 1.5107, "step": 7664 }, { "epoch": 0.7961981925833593, "grad_norm": 0.4467814862728119, "learning_rate": 9.903044537655587e-06, "loss": 1.6828, "step": 7665 }, { "epoch": 0.7963020671029396, "grad_norm": 0.3999803364276886, "learning_rate": 9.893299058795175e-06, "loss": 1.5791, "step": 7666 }, { "epoch": 0.79640594162252, "grad_norm": 0.4356433153152466, "learning_rate": 9.883557850985537e-06, "loss": 1.6213, "step": 7667 }, { "epoch": 0.7965098161421004, "grad_norm": 0.44254523515701294, "learning_rate": 9.873820915264082e-06, "loss": 1.7171, "step": 7668 }, { "epoch": 0.7966136906616806, "grad_norm": 0.4416894018650055, "learning_rate": 9.864088252667702e-06, "loss": 1.5873, "step": 7669 }, { "epoch": 0.796717565181261, "grad_norm": 0.46283599734306335, "learning_rate": 9.854359864232854e-06, "loss": 1.8697, "step": 7670 }, { "epoch": 0.7968214397008414, "grad_norm": 0.41159945726394653, "learning_rate": 9.84463575099553e-06, "loss": 1.6293, "step": 7671 }, { "epoch": 0.7969253142204217, "grad_norm": 0.4374866187572479, "learning_rate": 9.834915913991272e-06, "loss": 1.7457, "step": 7672 }, { "epoch": 0.7970291887400021, "grad_norm": 0.37280136346817017, "learning_rate": 9.825200354255171e-06, "loss": 1.5509, "step": 7673 }, { "epoch": 0.7971330632595824, "grad_norm": 0.41301167011260986, "learning_rate": 9.815489072821848e-06, "loss": 1.6039, "step": 7674 }, { "epoch": 0.7972369377791628, "grad_norm": 0.3978497087955475, "learning_rate": 9.805782070725494e-06, "loss": 1.5862, "step": 7675 }, { "epoch": 0.7973408122987431, "grad_norm": 0.42098501324653625, "learning_rate": 9.796079348999831e-06, "loss": 1.6685, "step": 7676 }, { "epoch": 0.7974446868183235, "grad_norm": 0.4220602214336395, "learning_rate": 9.786380908678112e-06, "loss": 1.6917, "step": 7677 }, { "epoch": 0.7975485613379039, "grad_norm": 0.44230684638023376, "learning_rate": 9.77668675079314e-06, "loss": 1.6786, "step": 7678 }, { "epoch": 0.7976524358574841, "grad_norm": 0.49053019285202026, "learning_rate": 9.766996876377283e-06, "loss": 1.5424, "step": 7679 }, { "epoch": 0.7977563103770645, "grad_norm": 0.41432085633277893, "learning_rate": 9.757311286462429e-06, "loss": 1.6682, "step": 7680 }, { "epoch": 0.7978601848966449, "grad_norm": 0.41580092906951904, "learning_rate": 9.747629982080003e-06, "loss": 1.7237, "step": 7681 }, { "epoch": 0.7979640594162252, "grad_norm": 0.4454309642314911, "learning_rate": 9.737952964261027e-06, "loss": 1.7689, "step": 7682 }, { "epoch": 0.7980679339358056, "grad_norm": 0.40397220849990845, "learning_rate": 9.728280234035981e-06, "loss": 1.6688, "step": 7683 }, { "epoch": 0.7981718084553859, "grad_norm": 0.43213963508605957, "learning_rate": 9.718611792434973e-06, "loss": 1.7833, "step": 7684 }, { "epoch": 0.7982756829749662, "grad_norm": 0.44224637746810913, "learning_rate": 9.708947640487598e-06, "loss": 1.6129, "step": 7685 }, { "epoch": 0.7983795574945466, "grad_norm": 0.42797449231147766, "learning_rate": 9.699287779223022e-06, "loss": 1.7152, "step": 7686 }, { "epoch": 0.798483432014127, "grad_norm": 0.39138954877853394, "learning_rate": 9.689632209669935e-06, "loss": 1.5439, "step": 7687 }, { "epoch": 0.7985873065337072, "grad_norm": 0.42455416917800903, "learning_rate": 9.679980932856591e-06, "loss": 1.6193, "step": 7688 }, { "epoch": 0.7986911810532876, "grad_norm": 0.392198383808136, "learning_rate": 9.670333949810761e-06, "loss": 1.5908, "step": 7689 }, { "epoch": 0.798795055572868, "grad_norm": 0.4087858200073242, "learning_rate": 9.660691261559774e-06, "loss": 1.6236, "step": 7690 }, { "epoch": 0.7988989300924483, "grad_norm": 0.38556161522865295, "learning_rate": 9.65105286913054e-06, "loss": 1.5387, "step": 7691 }, { "epoch": 0.7990028046120287, "grad_norm": 0.40947332978248596, "learning_rate": 9.641418773549405e-06, "loss": 1.583, "step": 7692 }, { "epoch": 0.799106679131609, "grad_norm": 0.4197182059288025, "learning_rate": 9.631788975842381e-06, "loss": 1.593, "step": 7693 }, { "epoch": 0.7992105536511893, "grad_norm": 0.43122637271881104, "learning_rate": 9.622163477034957e-06, "loss": 1.6916, "step": 7694 }, { "epoch": 0.7993144281707697, "grad_norm": 0.4150683283805847, "learning_rate": 9.612542278152142e-06, "loss": 1.6192, "step": 7695 }, { "epoch": 0.7994183026903501, "grad_norm": 0.41880491375923157, "learning_rate": 9.602925380218547e-06, "loss": 1.6866, "step": 7696 }, { "epoch": 0.7995221772099304, "grad_norm": 0.40711480379104614, "learning_rate": 9.593312784258297e-06, "loss": 1.7376, "step": 7697 }, { "epoch": 0.7996260517295107, "grad_norm": 0.4136184751987457, "learning_rate": 9.583704491295042e-06, "loss": 1.6386, "step": 7698 }, { "epoch": 0.7997299262490911, "grad_norm": 0.41841137409210205, "learning_rate": 9.574100502352007e-06, "loss": 1.7478, "step": 7699 }, { "epoch": 0.7998338007686715, "grad_norm": 0.38687604665756226, "learning_rate": 9.564500818451916e-06, "loss": 1.4819, "step": 7700 }, { "epoch": 0.7999376752882518, "grad_norm": 0.40951046347618103, "learning_rate": 9.554905440617108e-06, "loss": 1.5952, "step": 7701 }, { "epoch": 0.8000415498078322, "grad_norm": 0.4178246855735779, "learning_rate": 9.545314369869362e-06, "loss": 1.7333, "step": 7702 }, { "epoch": 0.8001454243274125, "grad_norm": 0.4111107289791107, "learning_rate": 9.535727607230088e-06, "loss": 1.7021, "step": 7703 }, { "epoch": 0.8002492988469928, "grad_norm": 0.42014679312705994, "learning_rate": 9.52614515372019e-06, "loss": 1.5992, "step": 7704 }, { "epoch": 0.8003531733665732, "grad_norm": 0.45557916164398193, "learning_rate": 9.516567010360127e-06, "loss": 1.2576, "step": 7705 }, { "epoch": 0.8004570478861536, "grad_norm": 0.4034499228000641, "learning_rate": 9.506993178169892e-06, "loss": 1.5702, "step": 7706 }, { "epoch": 0.8005609224057338, "grad_norm": 0.4066244661808014, "learning_rate": 9.497423658169025e-06, "loss": 1.5422, "step": 7707 }, { "epoch": 0.8006647969253142, "grad_norm": 0.4864404499530792, "learning_rate": 9.487858451376608e-06, "loss": 1.7401, "step": 7708 }, { "epoch": 0.8007686714448946, "grad_norm": 0.4497174024581909, "learning_rate": 9.47829755881125e-06, "loss": 1.7727, "step": 7709 }, { "epoch": 0.8008725459644749, "grad_norm": 0.4629211723804474, "learning_rate": 9.468740981491143e-06, "loss": 1.6619, "step": 7710 }, { "epoch": 0.8009764204840553, "grad_norm": 0.392501562833786, "learning_rate": 9.459188720433937e-06, "loss": 1.5847, "step": 7711 }, { "epoch": 0.8010802950036356, "grad_norm": 0.47376659512519836, "learning_rate": 9.449640776656915e-06, "loss": 1.6943, "step": 7712 }, { "epoch": 0.8011841695232159, "grad_norm": 0.40953293442726135, "learning_rate": 9.440097151176846e-06, "loss": 1.5879, "step": 7713 }, { "epoch": 0.8012880440427963, "grad_norm": 0.40460994839668274, "learning_rate": 9.430557845010058e-06, "loss": 1.7309, "step": 7714 }, { "epoch": 0.8013919185623767, "grad_norm": 0.4047517776489258, "learning_rate": 9.421022859172397e-06, "loss": 1.7228, "step": 7715 }, { "epoch": 0.801495793081957, "grad_norm": 0.397476464509964, "learning_rate": 9.411492194679277e-06, "loss": 1.6439, "step": 7716 }, { "epoch": 0.8015996676015373, "grad_norm": 0.39920222759246826, "learning_rate": 9.401965852545636e-06, "loss": 1.5571, "step": 7717 }, { "epoch": 0.8017035421211177, "grad_norm": 0.4768927991390228, "learning_rate": 9.39244383378594e-06, "loss": 1.7649, "step": 7718 }, { "epoch": 0.801807416640698, "grad_norm": 0.4142306447029114, "learning_rate": 9.38292613941425e-06, "loss": 1.3555, "step": 7719 }, { "epoch": 0.8019112911602784, "grad_norm": 0.4120739996433258, "learning_rate": 9.37341277044408e-06, "loss": 1.6209, "step": 7720 }, { "epoch": 0.8020151656798588, "grad_norm": 0.434986412525177, "learning_rate": 9.363903727888556e-06, "loss": 1.3753, "step": 7721 }, { "epoch": 0.802119040199439, "grad_norm": 0.3884414732456207, "learning_rate": 9.354399012760328e-06, "loss": 1.4721, "step": 7722 }, { "epoch": 0.8022229147190194, "grad_norm": 0.42061513662338257, "learning_rate": 9.34489862607153e-06, "loss": 1.5033, "step": 7723 }, { "epoch": 0.8023267892385998, "grad_norm": 0.4704020023345947, "learning_rate": 9.335402568833923e-06, "loss": 1.8149, "step": 7724 }, { "epoch": 0.8024306637581802, "grad_norm": 0.44218188524246216, "learning_rate": 9.325910842058733e-06, "loss": 1.7166, "step": 7725 }, { "epoch": 0.8025345382777604, "grad_norm": 0.4181729555130005, "learning_rate": 9.316423446756788e-06, "loss": 1.4822, "step": 7726 }, { "epoch": 0.8026384127973408, "grad_norm": 0.4159371256828308, "learning_rate": 9.306940383938395e-06, "loss": 1.7386, "step": 7727 }, { "epoch": 0.8027422873169212, "grad_norm": 0.45085909962654114, "learning_rate": 9.297461654613415e-06, "loss": 1.6013, "step": 7728 }, { "epoch": 0.8028461618365015, "grad_norm": 0.4149478077888489, "learning_rate": 9.28798725979131e-06, "loss": 1.6137, "step": 7729 }, { "epoch": 0.8029500363560819, "grad_norm": 0.5053886771202087, "learning_rate": 9.278517200480963e-06, "loss": 1.8171, "step": 7730 }, { "epoch": 0.8030539108756622, "grad_norm": 0.4231288731098175, "learning_rate": 9.26905147769091e-06, "loss": 1.6836, "step": 7731 }, { "epoch": 0.8031577853952425, "grad_norm": 0.5030054450035095, "learning_rate": 9.25959009242916e-06, "loss": 1.9295, "step": 7732 }, { "epoch": 0.8032616599148229, "grad_norm": 0.5075528025627136, "learning_rate": 9.25013304570328e-06, "loss": 1.8385, "step": 7733 }, { "epoch": 0.8033655344344033, "grad_norm": 0.43307042121887207, "learning_rate": 9.24068033852037e-06, "loss": 1.5867, "step": 7734 }, { "epoch": 0.8034694089539836, "grad_norm": 0.4354676306247711, "learning_rate": 9.231231971887066e-06, "loss": 1.6309, "step": 7735 }, { "epoch": 0.8035732834735639, "grad_norm": 0.4056316912174225, "learning_rate": 9.221787946809552e-06, "loss": 1.5253, "step": 7736 }, { "epoch": 0.8036771579931443, "grad_norm": 0.408130019903183, "learning_rate": 9.212348264293519e-06, "loss": 1.5669, "step": 7737 }, { "epoch": 0.8037810325127246, "grad_norm": 0.4351346492767334, "learning_rate": 9.202912925344276e-06, "loss": 1.6559, "step": 7738 }, { "epoch": 0.803884907032305, "grad_norm": 0.4343356788158417, "learning_rate": 9.19348193096654e-06, "loss": 1.7244, "step": 7739 }, { "epoch": 0.8039887815518854, "grad_norm": 0.43080124258995056, "learning_rate": 9.184055282164699e-06, "loss": 1.5402, "step": 7740 }, { "epoch": 0.8040926560714656, "grad_norm": 0.4280051290988922, "learning_rate": 9.174632979942587e-06, "loss": 1.6772, "step": 7741 }, { "epoch": 0.804196530591046, "grad_norm": 0.45156073570251465, "learning_rate": 9.165215025303614e-06, "loss": 1.7684, "step": 7742 }, { "epoch": 0.8043004051106264, "grad_norm": 0.5198978185653687, "learning_rate": 9.15580141925072e-06, "loss": 1.8452, "step": 7743 }, { "epoch": 0.8044042796302067, "grad_norm": 0.4284294843673706, "learning_rate": 9.146392162786377e-06, "loss": 1.7204, "step": 7744 }, { "epoch": 0.804508154149787, "grad_norm": 0.41050204634666443, "learning_rate": 9.136987256912594e-06, "loss": 1.4458, "step": 7745 }, { "epoch": 0.8046120286693674, "grad_norm": 0.5094167590141296, "learning_rate": 9.127586702630913e-06, "loss": 1.7218, "step": 7746 }, { "epoch": 0.8047159031889477, "grad_norm": 0.44446441531181335, "learning_rate": 9.118190500942459e-06, "loss": 1.7473, "step": 7747 }, { "epoch": 0.8048197777085281, "grad_norm": 0.4276479482650757, "learning_rate": 9.108798652847811e-06, "loss": 1.6519, "step": 7748 }, { "epoch": 0.8049236522281085, "grad_norm": 0.42676594853401184, "learning_rate": 9.099411159347133e-06, "loss": 1.5866, "step": 7749 }, { "epoch": 0.8050275267476887, "grad_norm": 0.43534624576568604, "learning_rate": 9.090028021440155e-06, "loss": 1.8477, "step": 7750 }, { "epoch": 0.8051314012672691, "grad_norm": 0.4531230330467224, "learning_rate": 9.080649240126054e-06, "loss": 1.6118, "step": 7751 }, { "epoch": 0.8052352757868495, "grad_norm": 0.4191751182079315, "learning_rate": 9.07127481640364e-06, "loss": 1.5275, "step": 7752 }, { "epoch": 0.8053391503064299, "grad_norm": 0.4555509090423584, "learning_rate": 9.061904751271193e-06, "loss": 1.6641, "step": 7753 }, { "epoch": 0.8054430248260102, "grad_norm": 0.4250716269016266, "learning_rate": 9.052539045726583e-06, "loss": 1.7141, "step": 7754 }, { "epoch": 0.8055468993455905, "grad_norm": 0.42984840273857117, "learning_rate": 9.043177700767148e-06, "loss": 1.6593, "step": 7755 }, { "epoch": 0.8056507738651709, "grad_norm": 0.4200412929058075, "learning_rate": 9.033820717389802e-06, "loss": 1.6844, "step": 7756 }, { "epoch": 0.8057546483847512, "grad_norm": 0.3976275622844696, "learning_rate": 9.024468096591022e-06, "loss": 1.6062, "step": 7757 }, { "epoch": 0.8058585229043316, "grad_norm": 0.40078333020210266, "learning_rate": 9.015119839366748e-06, "loss": 1.6187, "step": 7758 }, { "epoch": 0.805962397423912, "grad_norm": 0.4719092845916748, "learning_rate": 9.005775946712524e-06, "loss": 1.6813, "step": 7759 }, { "epoch": 0.8060662719434922, "grad_norm": 0.48558080196380615, "learning_rate": 8.9964364196234e-06, "loss": 1.8797, "step": 7760 }, { "epoch": 0.8061701464630726, "grad_norm": 0.4854564368724823, "learning_rate": 8.987101259093955e-06, "loss": 1.6896, "step": 7761 }, { "epoch": 0.806274020982653, "grad_norm": 0.4403029680252075, "learning_rate": 8.977770466118313e-06, "loss": 1.574, "step": 7762 }, { "epoch": 0.8063778955022333, "grad_norm": 0.40083712339401245, "learning_rate": 8.968444041690132e-06, "loss": 1.5353, "step": 7763 }, { "epoch": 0.8064817700218136, "grad_norm": 0.41334569454193115, "learning_rate": 8.959121986802599e-06, "loss": 1.494, "step": 7764 }, { "epoch": 0.806585644541394, "grad_norm": 0.4323657751083374, "learning_rate": 8.949804302448433e-06, "loss": 1.6712, "step": 7765 }, { "epoch": 0.8066895190609743, "grad_norm": 0.4492452144622803, "learning_rate": 8.940490989619932e-06, "loss": 1.6931, "step": 7766 }, { "epoch": 0.8067933935805547, "grad_norm": 0.39172980189323425, "learning_rate": 8.931182049308833e-06, "loss": 1.616, "step": 7767 }, { "epoch": 0.8068972681001351, "grad_norm": 0.40300890803337097, "learning_rate": 8.921877482506518e-06, "loss": 1.5328, "step": 7768 }, { "epoch": 0.8070011426197153, "grad_norm": 0.38939663767814636, "learning_rate": 8.912577290203822e-06, "loss": 1.5393, "step": 7769 }, { "epoch": 0.8071050171392957, "grad_norm": 0.39137062430381775, "learning_rate": 8.90328147339115e-06, "loss": 1.5114, "step": 7770 }, { "epoch": 0.8072088916588761, "grad_norm": 0.39022791385650635, "learning_rate": 8.893990033058442e-06, "loss": 1.6231, "step": 7771 }, { "epoch": 0.8073127661784564, "grad_norm": 0.4040229320526123, "learning_rate": 8.884702970195152e-06, "loss": 1.6152, "step": 7772 }, { "epoch": 0.8074166406980368, "grad_norm": 0.45910122990608215, "learning_rate": 8.875420285790281e-06, "loss": 1.7177, "step": 7773 }, { "epoch": 0.8075205152176171, "grad_norm": 0.4154045879840851, "learning_rate": 8.866141980832354e-06, "loss": 1.6195, "step": 7774 }, { "epoch": 0.8076243897371974, "grad_norm": 0.397510826587677, "learning_rate": 8.856868056309475e-06, "loss": 1.4594, "step": 7775 }, { "epoch": 0.8077282642567778, "grad_norm": 0.48494023084640503, "learning_rate": 8.84759851320921e-06, "loss": 1.7521, "step": 7776 }, { "epoch": 0.8078321387763582, "grad_norm": 0.4340880811214447, "learning_rate": 8.838333352518685e-06, "loss": 1.5742, "step": 7777 }, { "epoch": 0.8079360132959386, "grad_norm": 0.46240857243537903, "learning_rate": 8.829072575224601e-06, "loss": 1.6479, "step": 7778 }, { "epoch": 0.8080398878155188, "grad_norm": 0.4113903343677521, "learning_rate": 8.819816182313139e-06, "loss": 1.4088, "step": 7779 }, { "epoch": 0.8081437623350992, "grad_norm": 0.4529806077480316, "learning_rate": 8.810564174770043e-06, "loss": 1.5949, "step": 7780 }, { "epoch": 0.8082476368546796, "grad_norm": 0.39212316274642944, "learning_rate": 8.801316553580551e-06, "loss": 1.6899, "step": 7781 }, { "epoch": 0.8083515113742599, "grad_norm": 0.3910139501094818, "learning_rate": 8.792073319729516e-06, "loss": 1.5462, "step": 7782 }, { "epoch": 0.8084553858938403, "grad_norm": 0.40568485856056213, "learning_rate": 8.782834474201217e-06, "loss": 1.8192, "step": 7783 }, { "epoch": 0.8085592604134206, "grad_norm": 0.4968739151954651, "learning_rate": 8.773600017979538e-06, "loss": 1.5268, "step": 7784 }, { "epoch": 0.8086631349330009, "grad_norm": 0.46858468651771545, "learning_rate": 8.764369952047896e-06, "loss": 1.7217, "step": 7785 }, { "epoch": 0.8087670094525813, "grad_norm": 0.4213848412036896, "learning_rate": 8.755144277389183e-06, "loss": 1.7892, "step": 7786 }, { "epoch": 0.8088708839721617, "grad_norm": 0.38827750086784363, "learning_rate": 8.745922994985895e-06, "loss": 1.5297, "step": 7787 }, { "epoch": 0.8089747584917419, "grad_norm": 0.45694899559020996, "learning_rate": 8.736706105820008e-06, "loss": 1.7274, "step": 7788 }, { "epoch": 0.8090786330113223, "grad_norm": 0.40677914023399353, "learning_rate": 8.727493610873062e-06, "loss": 1.597, "step": 7789 }, { "epoch": 0.8091825075309027, "grad_norm": 0.4284796714782715, "learning_rate": 8.718285511126106e-06, "loss": 1.7429, "step": 7790 }, { "epoch": 0.809286382050483, "grad_norm": 0.4217987656593323, "learning_rate": 8.709081807559732e-06, "loss": 1.6746, "step": 7791 }, { "epoch": 0.8093902565700634, "grad_norm": 0.4426036775112152, "learning_rate": 8.699882501154067e-06, "loss": 1.7955, "step": 7792 }, { "epoch": 0.8094941310896437, "grad_norm": 0.43109604716300964, "learning_rate": 8.690687592888747e-06, "loss": 1.741, "step": 7793 }, { "epoch": 0.809598005609224, "grad_norm": 0.4453687369823456, "learning_rate": 8.681497083742995e-06, "loss": 1.5883, "step": 7794 }, { "epoch": 0.8097018801288044, "grad_norm": 0.47059452533721924, "learning_rate": 8.672310974695485e-06, "loss": 1.7313, "step": 7795 }, { "epoch": 0.8098057546483848, "grad_norm": 0.39277252554893494, "learning_rate": 8.663129266724501e-06, "loss": 1.6054, "step": 7796 }, { "epoch": 0.809909629167965, "grad_norm": 0.47614243626594543, "learning_rate": 8.65395196080781e-06, "loss": 1.9036, "step": 7797 }, { "epoch": 0.8100135036875454, "grad_norm": 0.43281370401382446, "learning_rate": 8.644779057922725e-06, "loss": 1.7245, "step": 7798 }, { "epoch": 0.8101173782071258, "grad_norm": 0.41983285546302795, "learning_rate": 8.635610559046087e-06, "loss": 1.6524, "step": 7799 }, { "epoch": 0.8102212527267061, "grad_norm": 0.41546353697776794, "learning_rate": 8.62644646515427e-06, "loss": 1.6953, "step": 7800 }, { "epoch": 0.8103251272462865, "grad_norm": 0.3985000252723694, "learning_rate": 8.617286777223177e-06, "loss": 1.5696, "step": 7801 }, { "epoch": 0.8104290017658669, "grad_norm": 0.4149910509586334, "learning_rate": 8.608131496228228e-06, "loss": 1.5954, "step": 7802 }, { "epoch": 0.8105328762854472, "grad_norm": 0.5904526710510254, "learning_rate": 8.598980623144427e-06, "loss": 2.0503, "step": 7803 }, { "epoch": 0.8106367508050275, "grad_norm": 0.44825097918510437, "learning_rate": 8.589834158946253e-06, "loss": 1.8498, "step": 7804 }, { "epoch": 0.8107406253246079, "grad_norm": 0.4529954791069031, "learning_rate": 8.580692104607712e-06, "loss": 1.6659, "step": 7805 }, { "epoch": 0.8108444998441883, "grad_norm": 0.41834184527397156, "learning_rate": 8.571554461102389e-06, "loss": 1.625, "step": 7806 }, { "epoch": 0.8109483743637685, "grad_norm": 0.4493601620197296, "learning_rate": 8.562421229403356e-06, "loss": 1.7467, "step": 7807 }, { "epoch": 0.8110522488833489, "grad_norm": 0.4033777117729187, "learning_rate": 8.553292410483243e-06, "loss": 1.6585, "step": 7808 }, { "epoch": 0.8111561234029293, "grad_norm": 0.4082568883895874, "learning_rate": 8.544168005314191e-06, "loss": 1.6903, "step": 7809 }, { "epoch": 0.8112599979225096, "grad_norm": 0.44599470496177673, "learning_rate": 8.535048014867875e-06, "loss": 1.6525, "step": 7810 }, { "epoch": 0.81136387244209, "grad_norm": 0.38098323345184326, "learning_rate": 8.525932440115509e-06, "loss": 1.4948, "step": 7811 }, { "epoch": 0.8114677469616703, "grad_norm": 0.4076872766017914, "learning_rate": 8.516821282027814e-06, "loss": 1.5873, "step": 7812 }, { "epoch": 0.8115716214812506, "grad_norm": 0.40935343503952026, "learning_rate": 8.507714541575096e-06, "loss": 1.5856, "step": 7813 }, { "epoch": 0.811675496000831, "grad_norm": 0.3904459476470947, "learning_rate": 8.498612219727104e-06, "loss": 1.6061, "step": 7814 }, { "epoch": 0.8117793705204114, "grad_norm": 0.41350409388542175, "learning_rate": 8.4895143174532e-06, "loss": 1.6078, "step": 7815 }, { "epoch": 0.8118832450399917, "grad_norm": 0.4305771589279175, "learning_rate": 8.480420835722224e-06, "loss": 1.6971, "step": 7816 }, { "epoch": 0.811987119559572, "grad_norm": 0.43974366784095764, "learning_rate": 8.471331775502566e-06, "loss": 1.7409, "step": 7817 }, { "epoch": 0.8120909940791524, "grad_norm": 0.40193986892700195, "learning_rate": 8.462247137762142e-06, "loss": 1.6476, "step": 7818 }, { "epoch": 0.8121948685987327, "grad_norm": 0.42793333530426025, "learning_rate": 8.453166923468387e-06, "loss": 1.6937, "step": 7819 }, { "epoch": 0.8122987431183131, "grad_norm": 0.46195828914642334, "learning_rate": 8.444091133588273e-06, "loss": 1.7928, "step": 7820 }, { "epoch": 0.8124026176378935, "grad_norm": 0.45351237058639526, "learning_rate": 8.435019769088298e-06, "loss": 1.6076, "step": 7821 }, { "epoch": 0.8125064921574737, "grad_norm": 0.41227513551712036, "learning_rate": 8.42595283093452e-06, "loss": 1.8033, "step": 7822 }, { "epoch": 0.8126103666770541, "grad_norm": 0.4189743101596832, "learning_rate": 8.416890320092451e-06, "loss": 1.613, "step": 7823 }, { "epoch": 0.8127142411966345, "grad_norm": 0.44498223066329956, "learning_rate": 8.407832237527214e-06, "loss": 1.8238, "step": 7824 }, { "epoch": 0.8128181157162148, "grad_norm": 0.4206719994544983, "learning_rate": 8.398778584203414e-06, "loss": 1.6164, "step": 7825 }, { "epoch": 0.8129219902357951, "grad_norm": 0.4327350854873657, "learning_rate": 8.389729361085192e-06, "loss": 1.6903, "step": 7826 }, { "epoch": 0.8130258647553755, "grad_norm": 0.4112006425857544, "learning_rate": 8.380684569136222e-06, "loss": 1.5846, "step": 7827 }, { "epoch": 0.8131297392749558, "grad_norm": 0.4160413444042206, "learning_rate": 8.371644209319702e-06, "loss": 1.4752, "step": 7828 }, { "epoch": 0.8132336137945362, "grad_norm": 0.4746900796890259, "learning_rate": 8.362608282598355e-06, "loss": 1.6837, "step": 7829 }, { "epoch": 0.8133374883141166, "grad_norm": 0.4345802664756775, "learning_rate": 8.353576789934436e-06, "loss": 1.6555, "step": 7830 }, { "epoch": 0.813441362833697, "grad_norm": 0.43879222869873047, "learning_rate": 8.344549732289741e-06, "loss": 1.7406, "step": 7831 }, { "epoch": 0.8135452373532772, "grad_norm": 0.41482990980148315, "learning_rate": 8.33552711062559e-06, "loss": 1.7404, "step": 7832 }, { "epoch": 0.8136491118728576, "grad_norm": 0.4466431736946106, "learning_rate": 8.326508925902781e-06, "loss": 1.4813, "step": 7833 }, { "epoch": 0.813752986392438, "grad_norm": 0.38617411255836487, "learning_rate": 8.31749517908172e-06, "loss": 1.3518, "step": 7834 }, { "epoch": 0.8138568609120183, "grad_norm": 0.449827641248703, "learning_rate": 8.308485871122284e-06, "loss": 1.723, "step": 7835 }, { "epoch": 0.8139607354315986, "grad_norm": 0.4846034348011017, "learning_rate": 8.2994810029839e-06, "loss": 1.8044, "step": 7836 }, { "epoch": 0.814064609951179, "grad_norm": 0.4224514663219452, "learning_rate": 8.290480575625508e-06, "loss": 1.6315, "step": 7837 }, { "epoch": 0.8141684844707593, "grad_norm": 0.4300024211406708, "learning_rate": 8.28148459000559e-06, "loss": 1.6415, "step": 7838 }, { "epoch": 0.8142723589903397, "grad_norm": 0.4380504786968231, "learning_rate": 8.272493047082147e-06, "loss": 1.7884, "step": 7839 }, { "epoch": 0.8143762335099201, "grad_norm": 0.4354472756385803, "learning_rate": 8.263505947812694e-06, "loss": 1.6999, "step": 7840 }, { "epoch": 0.8144801080295003, "grad_norm": 0.42169252038002014, "learning_rate": 8.254523293154325e-06, "loss": 1.7044, "step": 7841 }, { "epoch": 0.8145839825490807, "grad_norm": 0.4186304211616516, "learning_rate": 8.245545084063572e-06, "loss": 1.6382, "step": 7842 }, { "epoch": 0.8146878570686611, "grad_norm": 0.41696295142173767, "learning_rate": 8.236571321496584e-06, "loss": 1.657, "step": 7843 }, { "epoch": 0.8147917315882414, "grad_norm": 0.40641024708747864, "learning_rate": 8.227602006408986e-06, "loss": 1.6226, "step": 7844 }, { "epoch": 0.8148956061078217, "grad_norm": 0.42552778124809265, "learning_rate": 8.218637139755931e-06, "loss": 1.6346, "step": 7845 }, { "epoch": 0.8149994806274021, "grad_norm": 0.4113791584968567, "learning_rate": 8.209676722492116e-06, "loss": 1.6784, "step": 7846 }, { "epoch": 0.8151033551469824, "grad_norm": 0.4235158860683441, "learning_rate": 8.200720755571755e-06, "loss": 1.6905, "step": 7847 }, { "epoch": 0.8152072296665628, "grad_norm": 0.47491201758384705, "learning_rate": 8.191769239948588e-06, "loss": 1.8831, "step": 7848 }, { "epoch": 0.8153111041861432, "grad_norm": 0.4474998712539673, "learning_rate": 8.182822176575865e-06, "loss": 1.6919, "step": 7849 }, { "epoch": 0.8154149787057234, "grad_norm": 0.43246933817863464, "learning_rate": 8.173879566406418e-06, "loss": 1.6247, "step": 7850 }, { "epoch": 0.8155188532253038, "grad_norm": 0.4082406461238861, "learning_rate": 8.164941410392519e-06, "loss": 1.662, "step": 7851 }, { "epoch": 0.8156227277448842, "grad_norm": 0.3958854079246521, "learning_rate": 8.15600770948604e-06, "loss": 1.5751, "step": 7852 }, { "epoch": 0.8157266022644645, "grad_norm": 0.4203862249851227, "learning_rate": 8.147078464638346e-06, "loss": 1.5519, "step": 7853 }, { "epoch": 0.8158304767840449, "grad_norm": 0.5035269260406494, "learning_rate": 8.138153676800336e-06, "loss": 1.7116, "step": 7854 }, { "epoch": 0.8159343513036252, "grad_norm": 0.4069529175758362, "learning_rate": 8.129233346922422e-06, "loss": 1.5981, "step": 7855 }, { "epoch": 0.8160382258232056, "grad_norm": 0.44691234827041626, "learning_rate": 8.120317475954536e-06, "loss": 1.7425, "step": 7856 }, { "epoch": 0.8161421003427859, "grad_norm": 0.4124663770198822, "learning_rate": 8.111406064846194e-06, "loss": 1.5632, "step": 7857 }, { "epoch": 0.8162459748623663, "grad_norm": 0.4278232157230377, "learning_rate": 8.102499114546335e-06, "loss": 1.5863, "step": 7858 }, { "epoch": 0.8163498493819467, "grad_norm": 0.4516265392303467, "learning_rate": 8.093596626003519e-06, "loss": 1.7937, "step": 7859 }, { "epoch": 0.8164537239015269, "grad_norm": 0.42853647470474243, "learning_rate": 8.084698600165796e-06, "loss": 1.745, "step": 7860 }, { "epoch": 0.8165575984211073, "grad_norm": 0.3971802294254303, "learning_rate": 8.075805037980688e-06, "loss": 1.528, "step": 7861 }, { "epoch": 0.8166614729406877, "grad_norm": 0.4206622242927551, "learning_rate": 8.066915940395341e-06, "loss": 1.6715, "step": 7862 }, { "epoch": 0.816765347460268, "grad_norm": 0.4111405313014984, "learning_rate": 8.058031308356357e-06, "loss": 1.5871, "step": 7863 }, { "epoch": 0.8168692219798483, "grad_norm": 0.5833050608634949, "learning_rate": 8.049151142809874e-06, "loss": 1.8095, "step": 7864 }, { "epoch": 0.8169730964994287, "grad_norm": 0.42305901646614075, "learning_rate": 8.040275444701567e-06, "loss": 1.712, "step": 7865 }, { "epoch": 0.817076971019009, "grad_norm": 0.46678563952445984, "learning_rate": 8.031404214976628e-06, "loss": 1.8676, "step": 7866 }, { "epoch": 0.8171808455385894, "grad_norm": 0.45080798864364624, "learning_rate": 8.02253745457977e-06, "loss": 1.5683, "step": 7867 }, { "epoch": 0.8172847200581698, "grad_norm": 0.41853803396224976, "learning_rate": 8.013675164455225e-06, "loss": 1.6236, "step": 7868 }, { "epoch": 0.81738859457775, "grad_norm": 0.4322271943092346, "learning_rate": 8.004817345546794e-06, "loss": 1.677, "step": 7869 }, { "epoch": 0.8174924690973304, "grad_norm": 0.4160740077495575, "learning_rate": 7.995963998797717e-06, "loss": 1.6949, "step": 7870 }, { "epoch": 0.8175963436169108, "grad_norm": 0.4284444749355316, "learning_rate": 7.98711512515084e-06, "loss": 1.6774, "step": 7871 }, { "epoch": 0.8177002181364911, "grad_norm": 0.3733249306678772, "learning_rate": 7.978270725548493e-06, "loss": 1.4368, "step": 7872 }, { "epoch": 0.8178040926560715, "grad_norm": 0.4796290695667267, "learning_rate": 7.969430800932526e-06, "loss": 1.6731, "step": 7873 }, { "epoch": 0.8179079671756518, "grad_norm": 0.4326523542404175, "learning_rate": 7.960595352244332e-06, "loss": 1.7251, "step": 7874 }, { "epoch": 0.8180118416952321, "grad_norm": 0.4698193669319153, "learning_rate": 7.951764380424808e-06, "loss": 1.8268, "step": 7875 }, { "epoch": 0.8181157162148125, "grad_norm": 0.42041996121406555, "learning_rate": 7.942937886414392e-06, "loss": 1.4004, "step": 7876 }, { "epoch": 0.8182195907343929, "grad_norm": 0.4212832450866699, "learning_rate": 7.934115871153025e-06, "loss": 1.6694, "step": 7877 }, { "epoch": 0.8183234652539731, "grad_norm": 0.39500170946121216, "learning_rate": 7.925298335580211e-06, "loss": 1.5698, "step": 7878 }, { "epoch": 0.8184273397735535, "grad_norm": 0.4193446934223175, "learning_rate": 7.916485280634906e-06, "loss": 1.6886, "step": 7879 }, { "epoch": 0.8185312142931339, "grad_norm": 0.44972357153892517, "learning_rate": 7.907676707255668e-06, "loss": 1.8019, "step": 7880 }, { "epoch": 0.8186350888127143, "grad_norm": 0.42009323835372925, "learning_rate": 7.898872616380526e-06, "loss": 1.5613, "step": 7881 }, { "epoch": 0.8187389633322946, "grad_norm": 0.4435664713382721, "learning_rate": 7.890073008947047e-06, "loss": 1.6753, "step": 7882 }, { "epoch": 0.818842837851875, "grad_norm": 0.40186628699302673, "learning_rate": 7.881277885892324e-06, "loss": 1.4869, "step": 7883 }, { "epoch": 0.8189467123714553, "grad_norm": 0.40945637226104736, "learning_rate": 7.87248724815296e-06, "loss": 1.641, "step": 7884 }, { "epoch": 0.8190505868910356, "grad_norm": 0.42431339621543884, "learning_rate": 7.863701096665116e-06, "loss": 1.708, "step": 7885 }, { "epoch": 0.819154461410616, "grad_norm": 0.3984614610671997, "learning_rate": 7.854919432364406e-06, "loss": 1.545, "step": 7886 }, { "epoch": 0.8192583359301964, "grad_norm": 0.42371925711631775, "learning_rate": 7.846142256186046e-06, "loss": 1.6084, "step": 7887 }, { "epoch": 0.8193622104497766, "grad_norm": 0.40940725803375244, "learning_rate": 7.837369569064734e-06, "loss": 1.4237, "step": 7888 }, { "epoch": 0.819466084969357, "grad_norm": 0.4526590406894684, "learning_rate": 7.828601371934657e-06, "loss": 1.7202, "step": 7889 }, { "epoch": 0.8195699594889374, "grad_norm": 0.4891595244407654, "learning_rate": 7.819837665729596e-06, "loss": 1.8862, "step": 7890 }, { "epoch": 0.8196738340085177, "grad_norm": 0.44362369179725647, "learning_rate": 7.811078451382808e-06, "loss": 1.776, "step": 7891 }, { "epoch": 0.8197777085280981, "grad_norm": 0.46131572127342224, "learning_rate": 7.802323729827077e-06, "loss": 1.7542, "step": 7892 }, { "epoch": 0.8198815830476784, "grad_norm": 0.45027032494544983, "learning_rate": 7.79357350199471e-06, "loss": 1.3346, "step": 7893 }, { "epoch": 0.8199854575672587, "grad_norm": 0.41758599877357483, "learning_rate": 7.784827768817548e-06, "loss": 1.6569, "step": 7894 }, { "epoch": 0.8200893320868391, "grad_norm": 0.3943220376968384, "learning_rate": 7.77608653122694e-06, "loss": 1.5361, "step": 7895 }, { "epoch": 0.8201932066064195, "grad_norm": 0.40477508306503296, "learning_rate": 7.767349790153738e-06, "loss": 1.5803, "step": 7896 }, { "epoch": 0.8202970811259997, "grad_norm": 0.4200827479362488, "learning_rate": 7.758617546528386e-06, "loss": 1.6642, "step": 7897 }, { "epoch": 0.8204009556455801, "grad_norm": 0.4615240693092346, "learning_rate": 7.74988980128074e-06, "loss": 1.8093, "step": 7898 }, { "epoch": 0.8205048301651605, "grad_norm": 0.42218905687332153, "learning_rate": 7.741166555340284e-06, "loss": 1.7515, "step": 7899 }, { "epoch": 0.8206087046847408, "grad_norm": 0.4505891799926758, "learning_rate": 7.732447809635956e-06, "loss": 1.6826, "step": 7900 }, { "epoch": 0.8207125792043212, "grad_norm": 0.4403258264064789, "learning_rate": 7.723733565096236e-06, "loss": 1.6249, "step": 7901 }, { "epoch": 0.8208164537239016, "grad_norm": 0.4211053252220154, "learning_rate": 7.715023822649126e-06, "loss": 1.563, "step": 7902 }, { "epoch": 0.8209203282434818, "grad_norm": 0.4513636529445648, "learning_rate": 7.706318583222144e-06, "loss": 1.787, "step": 7903 }, { "epoch": 0.8210242027630622, "grad_norm": 0.3969414532184601, "learning_rate": 7.697617847742328e-06, "loss": 1.6397, "step": 7904 }, { "epoch": 0.8211280772826426, "grad_norm": 0.43942591547966003, "learning_rate": 7.688921617136224e-06, "loss": 1.6617, "step": 7905 }, { "epoch": 0.8212319518022229, "grad_norm": 0.3998488187789917, "learning_rate": 7.68022989232996e-06, "loss": 1.5171, "step": 7906 }, { "epoch": 0.8213358263218032, "grad_norm": 0.44349002838134766, "learning_rate": 7.671542674249077e-06, "loss": 1.7494, "step": 7907 }, { "epoch": 0.8214397008413836, "grad_norm": 0.4117303788661957, "learning_rate": 7.662859963818737e-06, "loss": 1.6006, "step": 7908 }, { "epoch": 0.821543575360964, "grad_norm": 0.4332650303840637, "learning_rate": 7.65418176196357e-06, "loss": 1.4621, "step": 7909 }, { "epoch": 0.8216474498805443, "grad_norm": 0.3984694182872772, "learning_rate": 7.645508069607731e-06, "loss": 1.5073, "step": 7910 }, { "epoch": 0.8217513244001247, "grad_norm": 0.477058082818985, "learning_rate": 7.636838887674908e-06, "loss": 1.9018, "step": 7911 }, { "epoch": 0.821855198919705, "grad_norm": 0.4433257281780243, "learning_rate": 7.6281742170882854e-06, "loss": 1.6646, "step": 7912 }, { "epoch": 0.8219590734392853, "grad_norm": 0.4137663245201111, "learning_rate": 7.619514058770622e-06, "loss": 1.482, "step": 7913 }, { "epoch": 0.8220629479588657, "grad_norm": 0.4989883005619049, "learning_rate": 7.610858413644106e-06, "loss": 1.7999, "step": 7914 }, { "epoch": 0.8221668224784461, "grad_norm": 0.4211665391921997, "learning_rate": 7.60220728263053e-06, "loss": 1.6357, "step": 7915 }, { "epoch": 0.8222706969980264, "grad_norm": 0.4524006247520447, "learning_rate": 7.5935606666511785e-06, "loss": 1.7431, "step": 7916 }, { "epoch": 0.8223745715176067, "grad_norm": 0.42954227328300476, "learning_rate": 7.584918566626808e-06, "loss": 1.8379, "step": 7917 }, { "epoch": 0.8224784460371871, "grad_norm": 0.4114231467247009, "learning_rate": 7.576280983477768e-06, "loss": 1.7044, "step": 7918 }, { "epoch": 0.8225823205567674, "grad_norm": 0.43601930141448975, "learning_rate": 7.5676479181238935e-06, "loss": 1.6908, "step": 7919 }, { "epoch": 0.8226861950763478, "grad_norm": 0.42451170086860657, "learning_rate": 7.5590193714845215e-06, "loss": 1.7343, "step": 7920 }, { "epoch": 0.8227900695959282, "grad_norm": 0.4314807653427124, "learning_rate": 7.5503953444785395e-06, "loss": 1.6858, "step": 7921 }, { "epoch": 0.8228939441155084, "grad_norm": 0.41289034485816956, "learning_rate": 7.541775838024335e-06, "loss": 1.4288, "step": 7922 }, { "epoch": 0.8229978186350888, "grad_norm": 0.4084930419921875, "learning_rate": 7.533160853039811e-06, "loss": 1.6902, "step": 7923 }, { "epoch": 0.8231016931546692, "grad_norm": 0.4590340852737427, "learning_rate": 7.5245503904423974e-06, "loss": 1.6764, "step": 7924 }, { "epoch": 0.8232055676742495, "grad_norm": 0.46251264214515686, "learning_rate": 7.515944451149065e-06, "loss": 1.5257, "step": 7925 }, { "epoch": 0.8233094421938298, "grad_norm": 0.4301720857620239, "learning_rate": 7.507343036076236e-06, "loss": 1.7414, "step": 7926 }, { "epoch": 0.8234133167134102, "grad_norm": 0.42009609937667847, "learning_rate": 7.498746146139934e-06, "loss": 1.6366, "step": 7927 }, { "epoch": 0.8235171912329905, "grad_norm": 0.40405720472335815, "learning_rate": 7.490153782255643e-06, "loss": 1.5959, "step": 7928 }, { "epoch": 0.8236210657525709, "grad_norm": 0.4580763280391693, "learning_rate": 7.481565945338381e-06, "loss": 1.8361, "step": 7929 }, { "epoch": 0.8237249402721513, "grad_norm": 0.43223121762275696, "learning_rate": 7.472982636302694e-06, "loss": 1.592, "step": 7930 }, { "epoch": 0.8238288147917315, "grad_norm": 0.48288586735725403, "learning_rate": 7.464403856062629e-06, "loss": 1.7912, "step": 7931 }, { "epoch": 0.8239326893113119, "grad_norm": 0.4333949685096741, "learning_rate": 7.455829605531767e-06, "loss": 1.7699, "step": 7932 }, { "epoch": 0.8240365638308923, "grad_norm": 0.40936028957366943, "learning_rate": 7.4472598856231815e-06, "loss": 1.5568, "step": 7933 }, { "epoch": 0.8241404383504727, "grad_norm": 0.4204612672328949, "learning_rate": 7.438694697249504e-06, "loss": 1.5514, "step": 7934 }, { "epoch": 0.824244312870053, "grad_norm": 0.4096861183643341, "learning_rate": 7.430134041322856e-06, "loss": 1.6633, "step": 7935 }, { "epoch": 0.8243481873896333, "grad_norm": 0.4757747948169708, "learning_rate": 7.421577918754868e-06, "loss": 1.8486, "step": 7936 }, { "epoch": 0.8244520619092137, "grad_norm": 0.44562995433807373, "learning_rate": 7.413026330456713e-06, "loss": 1.6866, "step": 7937 }, { "epoch": 0.824555936428794, "grad_norm": 0.4388675391674042, "learning_rate": 7.404479277339055e-06, "loss": 1.6298, "step": 7938 }, { "epoch": 0.8246598109483744, "grad_norm": 0.4150373041629791, "learning_rate": 7.3959367603121055e-06, "loss": 1.6743, "step": 7939 }, { "epoch": 0.8247636854679548, "grad_norm": 0.40958264470100403, "learning_rate": 7.387398780285548e-06, "loss": 1.5683, "step": 7940 }, { "epoch": 0.824867559987535, "grad_norm": 0.40461811423301697, "learning_rate": 7.378865338168655e-06, "loss": 1.5644, "step": 7941 }, { "epoch": 0.8249714345071154, "grad_norm": 0.4974653124809265, "learning_rate": 7.370336434870123e-06, "loss": 1.8986, "step": 7942 }, { "epoch": 0.8250753090266958, "grad_norm": 0.40290409326553345, "learning_rate": 7.361812071298246e-06, "loss": 1.6532, "step": 7943 }, { "epoch": 0.8251791835462761, "grad_norm": 0.4083233177661896, "learning_rate": 7.353292248360805e-06, "loss": 1.6532, "step": 7944 }, { "epoch": 0.8252830580658564, "grad_norm": 0.47831931710243225, "learning_rate": 7.344776966965061e-06, "loss": 1.7635, "step": 7945 }, { "epoch": 0.8253869325854368, "grad_norm": 0.4501749277114868, "learning_rate": 7.336266228017857e-06, "loss": 1.5736, "step": 7946 }, { "epoch": 0.8254908071050171, "grad_norm": 0.43580162525177, "learning_rate": 7.327760032425507e-06, "loss": 1.7677, "step": 7947 }, { "epoch": 0.8255946816245975, "grad_norm": 0.4431309401988983, "learning_rate": 7.319258381093863e-06, "loss": 1.541, "step": 7948 }, { "epoch": 0.8256985561441779, "grad_norm": 0.46647512912750244, "learning_rate": 7.310761274928274e-06, "loss": 1.6897, "step": 7949 }, { "epoch": 0.8258024306637581, "grad_norm": 0.4248170554637909, "learning_rate": 7.302268714833621e-06, "loss": 1.6534, "step": 7950 }, { "epoch": 0.8259063051833385, "grad_norm": 0.461207777261734, "learning_rate": 7.2937807017142975e-06, "loss": 1.4829, "step": 7951 }, { "epoch": 0.8260101797029189, "grad_norm": 0.42655470967292786, "learning_rate": 7.285297236474198e-06, "loss": 1.7697, "step": 7952 }, { "epoch": 0.8261140542224992, "grad_norm": 0.40300819277763367, "learning_rate": 7.27681832001677e-06, "loss": 1.4717, "step": 7953 }, { "epoch": 0.8262179287420796, "grad_norm": 0.4207182228565216, "learning_rate": 7.268343953244921e-06, "loss": 1.5658, "step": 7954 }, { "epoch": 0.8263218032616599, "grad_norm": 0.516309916973114, "learning_rate": 7.259874137061135e-06, "loss": 1.7525, "step": 7955 }, { "epoch": 0.8264256777812402, "grad_norm": 0.4138142466545105, "learning_rate": 7.2514088723673605e-06, "loss": 1.6619, "step": 7956 }, { "epoch": 0.8265295523008206, "grad_norm": 0.4462479054927826, "learning_rate": 7.2429481600650936e-06, "loss": 1.7732, "step": 7957 }, { "epoch": 0.826633426820401, "grad_norm": 0.46467795968055725, "learning_rate": 7.234492001055332e-06, "loss": 1.762, "step": 7958 }, { "epoch": 0.8267373013399814, "grad_norm": 0.4307834506034851, "learning_rate": 7.2260403962385706e-06, "loss": 1.5749, "step": 7959 }, { "epoch": 0.8268411758595616, "grad_norm": 0.43813708424568176, "learning_rate": 7.217593346514884e-06, "loss": 1.7171, "step": 7960 }, { "epoch": 0.826945050379142, "grad_norm": 0.42140957713127136, "learning_rate": 7.209150852783769e-06, "loss": 1.6369, "step": 7961 }, { "epoch": 0.8270489248987224, "grad_norm": 0.42076048254966736, "learning_rate": 7.200712915944313e-06, "loss": 1.6839, "step": 7962 }, { "epoch": 0.8271527994183027, "grad_norm": 0.4501379728317261, "learning_rate": 7.192279536895091e-06, "loss": 1.7524, "step": 7963 }, { "epoch": 0.827256673937883, "grad_norm": 0.39931780099868774, "learning_rate": 7.183850716534179e-06, "loss": 1.6049, "step": 7964 }, { "epoch": 0.8273605484574634, "grad_norm": 0.39734897017478943, "learning_rate": 7.175426455759193e-06, "loss": 1.5222, "step": 7965 }, { "epoch": 0.8274644229770437, "grad_norm": 0.434510201215744, "learning_rate": 7.167006755467237e-06, "loss": 1.7086, "step": 7966 }, { "epoch": 0.8275682974966241, "grad_norm": 0.42094168066978455, "learning_rate": 7.15859161655496e-06, "loss": 1.7143, "step": 7967 }, { "epoch": 0.8276721720162045, "grad_norm": 0.4152793884277344, "learning_rate": 7.150181039918485e-06, "loss": 1.3494, "step": 7968 }, { "epoch": 0.8277760465357847, "grad_norm": 0.4016823470592499, "learning_rate": 7.141775026453506e-06, "loss": 1.5688, "step": 7969 }, { "epoch": 0.8278799210553651, "grad_norm": 0.45909714698791504, "learning_rate": 7.133373577055163e-06, "loss": 1.7456, "step": 7970 }, { "epoch": 0.8279837955749455, "grad_norm": 0.4085950255393982, "learning_rate": 7.124976692618168e-06, "loss": 1.5706, "step": 7971 }, { "epoch": 0.8280876700945258, "grad_norm": 0.419292151927948, "learning_rate": 7.116584374036733e-06, "loss": 1.6361, "step": 7972 }, { "epoch": 0.8281915446141062, "grad_norm": 0.4057331383228302, "learning_rate": 7.108196622204538e-06, "loss": 1.5935, "step": 7973 }, { "epoch": 0.8282954191336865, "grad_norm": 0.4193360507488251, "learning_rate": 7.099813438014841e-06, "loss": 1.4914, "step": 7974 }, { "epoch": 0.8283992936532668, "grad_norm": 0.42038246989250183, "learning_rate": 7.091434822360377e-06, "loss": 1.6914, "step": 7975 }, { "epoch": 0.8285031681728472, "grad_norm": 0.41948550939559937, "learning_rate": 7.083060776133404e-06, "loss": 1.7179, "step": 7976 }, { "epoch": 0.8286070426924276, "grad_norm": 0.46123793721199036, "learning_rate": 7.0746913002257e-06, "loss": 1.7011, "step": 7977 }, { "epoch": 0.8287109172120078, "grad_norm": 0.4170924127101898, "learning_rate": 7.066326395528539e-06, "loss": 1.6866, "step": 7978 }, { "epoch": 0.8288147917315882, "grad_norm": 0.4177165627479553, "learning_rate": 7.05796606293272e-06, "loss": 1.6695, "step": 7979 }, { "epoch": 0.8289186662511686, "grad_norm": 0.46339476108551025, "learning_rate": 7.049610303328541e-06, "loss": 1.7386, "step": 7980 }, { "epoch": 0.8290225407707489, "grad_norm": 0.45544731616973877, "learning_rate": 7.04125911760587e-06, "loss": 1.6829, "step": 7981 }, { "epoch": 0.8291264152903293, "grad_norm": 0.4197863042354584, "learning_rate": 7.032912506653983e-06, "loss": 1.5823, "step": 7982 }, { "epoch": 0.8292302898099096, "grad_norm": 0.42813071608543396, "learning_rate": 7.024570471361769e-06, "loss": 1.5883, "step": 7983 }, { "epoch": 0.82933416432949, "grad_norm": 0.43591973185539246, "learning_rate": 7.016233012617579e-06, "loss": 1.4265, "step": 7984 }, { "epoch": 0.8294380388490703, "grad_norm": 0.41332176327705383, "learning_rate": 7.0079001313092865e-06, "loss": 1.7404, "step": 7985 }, { "epoch": 0.8295419133686507, "grad_norm": 0.40137556195259094, "learning_rate": 6.99957182832428e-06, "loss": 1.6176, "step": 7986 }, { "epoch": 0.8296457878882311, "grad_norm": 0.45006173849105835, "learning_rate": 6.9912481045494475e-06, "loss": 1.6707, "step": 7987 }, { "epoch": 0.8297496624078113, "grad_norm": 0.4188838005065918, "learning_rate": 6.9829289608712264e-06, "loss": 1.6712, "step": 7988 }, { "epoch": 0.8298535369273917, "grad_norm": 0.4287783205509186, "learning_rate": 6.974614398175511e-06, "loss": 1.6211, "step": 7989 }, { "epoch": 0.8299574114469721, "grad_norm": 0.47514259815216064, "learning_rate": 6.966304417347758e-06, "loss": 1.6593, "step": 7990 }, { "epoch": 0.8300612859665524, "grad_norm": 0.40735095739364624, "learning_rate": 6.95799901927291e-06, "loss": 1.6225, "step": 7991 }, { "epoch": 0.8301651604861328, "grad_norm": 0.39932069182395935, "learning_rate": 6.949698204835426e-06, "loss": 1.5756, "step": 7992 }, { "epoch": 0.8302690350057131, "grad_norm": 0.43335703015327454, "learning_rate": 6.941401974919276e-06, "loss": 1.6939, "step": 7993 }, { "epoch": 0.8303729095252934, "grad_norm": 0.43645966053009033, "learning_rate": 6.9331103304079494e-06, "loss": 1.7476, "step": 7994 }, { "epoch": 0.8304767840448738, "grad_norm": 0.38558638095855713, "learning_rate": 6.924823272184439e-06, "loss": 1.449, "step": 7995 }, { "epoch": 0.8305806585644542, "grad_norm": 0.3955431878566742, "learning_rate": 6.916540801131232e-06, "loss": 1.6482, "step": 7996 }, { "epoch": 0.8306845330840344, "grad_norm": 0.3958597779273987, "learning_rate": 6.908262918130392e-06, "loss": 1.5417, "step": 7997 }, { "epoch": 0.8307884076036148, "grad_norm": 0.4344055950641632, "learning_rate": 6.899989624063402e-06, "loss": 1.5201, "step": 7998 }, { "epoch": 0.8308922821231952, "grad_norm": 0.4599206745624542, "learning_rate": 6.8917209198113345e-06, "loss": 1.7178, "step": 7999 }, { "epoch": 0.8309961566427755, "grad_norm": 0.3974829614162445, "learning_rate": 6.883456806254746e-06, "loss": 1.5518, "step": 8000 }, { "epoch": 0.8311000311623559, "grad_norm": 0.39066168665885925, "learning_rate": 6.87519728427366e-06, "loss": 1.6246, "step": 8001 }, { "epoch": 0.8312039056819363, "grad_norm": 0.4144991338253021, "learning_rate": 6.866942354747685e-06, "loss": 1.7103, "step": 8002 }, { "epoch": 0.8313077802015165, "grad_norm": 0.4020610749721527, "learning_rate": 6.8586920185559015e-06, "loss": 1.5555, "step": 8003 }, { "epoch": 0.8314116547210969, "grad_norm": 0.3927983045578003, "learning_rate": 6.850446276576905e-06, "loss": 1.5676, "step": 8004 }, { "epoch": 0.8315155292406773, "grad_norm": 0.4108089804649353, "learning_rate": 6.8422051296888e-06, "loss": 1.5645, "step": 8005 }, { "epoch": 0.8316194037602576, "grad_norm": 0.38096117973327637, "learning_rate": 6.833968578769201e-06, "loss": 1.4893, "step": 8006 }, { "epoch": 0.8317232782798379, "grad_norm": 0.43577104806900024, "learning_rate": 6.825736624695245e-06, "loss": 1.5007, "step": 8007 }, { "epoch": 0.8318271527994183, "grad_norm": 0.40206262469291687, "learning_rate": 6.817509268343553e-06, "loss": 1.3456, "step": 8008 }, { "epoch": 0.8319310273189986, "grad_norm": 0.4439072906970978, "learning_rate": 6.809286510590307e-06, "loss": 1.5847, "step": 8009 }, { "epoch": 0.832034901838579, "grad_norm": 0.42791223526000977, "learning_rate": 6.80106835231113e-06, "loss": 1.7015, "step": 8010 }, { "epoch": 0.8321387763581594, "grad_norm": 0.3975352644920349, "learning_rate": 6.792854794381215e-06, "loss": 1.6057, "step": 8011 }, { "epoch": 0.8322426508777397, "grad_norm": 0.42390820384025574, "learning_rate": 6.784645837675241e-06, "loss": 1.6555, "step": 8012 }, { "epoch": 0.83234652539732, "grad_norm": 0.4322831928730011, "learning_rate": 6.776441483067386e-06, "loss": 1.6785, "step": 8013 }, { "epoch": 0.8324503999169004, "grad_norm": 0.443946897983551, "learning_rate": 6.768241731431363e-06, "loss": 1.6525, "step": 8014 }, { "epoch": 0.8325542744364808, "grad_norm": 0.4141833186149597, "learning_rate": 6.760046583640362e-06, "loss": 1.6921, "step": 8015 }, { "epoch": 0.832658148956061, "grad_norm": 0.4575175344944, "learning_rate": 6.751856040567134e-06, "loss": 1.7602, "step": 8016 }, { "epoch": 0.8327620234756414, "grad_norm": 0.5201651453971863, "learning_rate": 6.7436701030838715e-06, "loss": 1.9422, "step": 8017 }, { "epoch": 0.8328658979952218, "grad_norm": 0.4121319353580475, "learning_rate": 6.735488772062337e-06, "loss": 1.7875, "step": 8018 }, { "epoch": 0.8329697725148021, "grad_norm": 0.40832269191741943, "learning_rate": 6.727312048373774e-06, "loss": 1.5723, "step": 8019 }, { "epoch": 0.8330736470343825, "grad_norm": 0.44910913705825806, "learning_rate": 6.71913993288894e-06, "loss": 1.536, "step": 8020 }, { "epoch": 0.8331775215539629, "grad_norm": 0.4083802103996277, "learning_rate": 6.710972426478096e-06, "loss": 1.5312, "step": 8021 }, { "epoch": 0.8332813960735431, "grad_norm": 0.41337552666664124, "learning_rate": 6.7028095300110225e-06, "loss": 1.6728, "step": 8022 }, { "epoch": 0.8333852705931235, "grad_norm": 0.39118510484695435, "learning_rate": 6.6946512443570085e-06, "loss": 1.4944, "step": 8023 }, { "epoch": 0.8334891451127039, "grad_norm": 0.4005904793739319, "learning_rate": 6.686497570384825e-06, "loss": 1.5726, "step": 8024 }, { "epoch": 0.8335930196322842, "grad_norm": 0.4205056428909302, "learning_rate": 6.678348508962812e-06, "loss": 1.6453, "step": 8025 }, { "epoch": 0.8336968941518645, "grad_norm": 0.4279542863368988, "learning_rate": 6.670204060958746e-06, "loss": 1.6386, "step": 8026 }, { "epoch": 0.8338007686714449, "grad_norm": 0.44344186782836914, "learning_rate": 6.662064227239967e-06, "loss": 1.6229, "step": 8027 }, { "epoch": 0.8339046431910252, "grad_norm": 0.4007083773612976, "learning_rate": 6.653929008673315e-06, "loss": 1.7046, "step": 8028 }, { "epoch": 0.8340085177106056, "grad_norm": 0.4301755130290985, "learning_rate": 6.645798406125087e-06, "loss": 1.6751, "step": 8029 }, { "epoch": 0.834112392230186, "grad_norm": 0.39321601390838623, "learning_rate": 6.637672420461161e-06, "loss": 1.4603, "step": 8030 }, { "epoch": 0.8342162667497662, "grad_norm": 0.4112393856048584, "learning_rate": 6.629551052546884e-06, "loss": 1.4504, "step": 8031 }, { "epoch": 0.8343201412693466, "grad_norm": 0.4138662815093994, "learning_rate": 6.6214343032471204e-06, "loss": 1.6946, "step": 8032 }, { "epoch": 0.834424015788927, "grad_norm": 0.43226510286331177, "learning_rate": 6.613322173426239e-06, "loss": 1.7209, "step": 8033 }, { "epoch": 0.8345278903085073, "grad_norm": 0.42375436425209045, "learning_rate": 6.605214663948111e-06, "loss": 1.8021, "step": 8034 }, { "epoch": 0.8346317648280877, "grad_norm": 0.4388313889503479, "learning_rate": 6.597111775676135e-06, "loss": 1.6669, "step": 8035 }, { "epoch": 0.834735639347668, "grad_norm": 0.4430168569087982, "learning_rate": 6.589013509473185e-06, "loss": 1.5882, "step": 8036 }, { "epoch": 0.8348395138672484, "grad_norm": 0.38932767510414124, "learning_rate": 6.58091986620169e-06, "loss": 1.4626, "step": 8037 }, { "epoch": 0.8349433883868287, "grad_norm": 0.42232656478881836, "learning_rate": 6.5728308467235435e-06, "loss": 1.676, "step": 8038 }, { "epoch": 0.8350472629064091, "grad_norm": 0.39752089977264404, "learning_rate": 6.5647464519001725e-06, "loss": 1.3398, "step": 8039 }, { "epoch": 0.8351511374259895, "grad_norm": 0.4039764106273651, "learning_rate": 6.556666682592494e-06, "loss": 1.5381, "step": 8040 }, { "epoch": 0.8352550119455697, "grad_norm": 0.41539785265922546, "learning_rate": 6.548591539660942e-06, "loss": 1.6253, "step": 8041 }, { "epoch": 0.8353588864651501, "grad_norm": 0.45972946286201477, "learning_rate": 6.540521023965457e-06, "loss": 1.7125, "step": 8042 }, { "epoch": 0.8354627609847305, "grad_norm": 0.40128108859062195, "learning_rate": 6.532455136365478e-06, "loss": 1.645, "step": 8043 }, { "epoch": 0.8355666355043108, "grad_norm": 0.43023881316185, "learning_rate": 6.524393877719987e-06, "loss": 1.6461, "step": 8044 }, { "epoch": 0.8356705100238911, "grad_norm": 0.4410925507545471, "learning_rate": 6.516337248887399e-06, "loss": 1.8415, "step": 8045 }, { "epoch": 0.8357743845434715, "grad_norm": 0.4021972417831421, "learning_rate": 6.5082852507257265e-06, "loss": 1.4099, "step": 8046 }, { "epoch": 0.8358782590630518, "grad_norm": 0.4312531352043152, "learning_rate": 6.500237884092425e-06, "loss": 1.6968, "step": 8047 }, { "epoch": 0.8359821335826322, "grad_norm": 0.40367355942726135, "learning_rate": 6.492195149844471e-06, "loss": 1.66, "step": 8048 }, { "epoch": 0.8360860081022126, "grad_norm": 0.4290336072444916, "learning_rate": 6.484157048838368e-06, "loss": 1.657, "step": 8049 }, { "epoch": 0.8361898826217928, "grad_norm": 0.4326249063014984, "learning_rate": 6.476123581930099e-06, "loss": 1.7264, "step": 8050 }, { "epoch": 0.8362937571413732, "grad_norm": 0.42618846893310547, "learning_rate": 6.468094749975167e-06, "loss": 1.798, "step": 8051 }, { "epoch": 0.8363976316609536, "grad_norm": 0.4222290515899658, "learning_rate": 6.460070553828573e-06, "loss": 1.5829, "step": 8052 }, { "epoch": 0.8365015061805339, "grad_norm": 0.3985992670059204, "learning_rate": 6.4520509943448656e-06, "loss": 1.5542, "step": 8053 }, { "epoch": 0.8366053807001143, "grad_norm": 0.3930535614490509, "learning_rate": 6.444036072378018e-06, "loss": 1.5923, "step": 8054 }, { "epoch": 0.8367092552196946, "grad_norm": 0.4032153785228729, "learning_rate": 6.436025788781586e-06, "loss": 1.6131, "step": 8055 }, { "epoch": 0.8368131297392749, "grad_norm": 0.38635045289993286, "learning_rate": 6.428020144408608e-06, "loss": 1.5907, "step": 8056 }, { "epoch": 0.8369170042588553, "grad_norm": 0.43229156732559204, "learning_rate": 6.420019140111588e-06, "loss": 1.6621, "step": 8057 }, { "epoch": 0.8370208787784357, "grad_norm": 0.43635618686676025, "learning_rate": 6.412022776742604e-06, "loss": 1.5021, "step": 8058 }, { "epoch": 0.8371247532980159, "grad_norm": 0.395163893699646, "learning_rate": 6.404031055153198e-06, "loss": 1.6401, "step": 8059 }, { "epoch": 0.8372286278175963, "grad_norm": 0.3980249762535095, "learning_rate": 6.396043976194416e-06, "loss": 1.4499, "step": 8060 }, { "epoch": 0.8373325023371767, "grad_norm": 0.4174276888370514, "learning_rate": 6.3880615407168335e-06, "loss": 1.6367, "step": 8061 }, { "epoch": 0.8374363768567571, "grad_norm": 0.4133899509906769, "learning_rate": 6.380083749570498e-06, "loss": 1.5884, "step": 8062 }, { "epoch": 0.8375402513763374, "grad_norm": 0.41283613443374634, "learning_rate": 6.372110603605014e-06, "loss": 1.4089, "step": 8063 }, { "epoch": 0.8376441258959177, "grad_norm": 0.463218629360199, "learning_rate": 6.364142103669418e-06, "loss": 1.6824, "step": 8064 }, { "epoch": 0.8377480004154981, "grad_norm": 0.39653337001800537, "learning_rate": 6.356178250612327e-06, "loss": 1.7319, "step": 8065 }, { "epoch": 0.8378518749350784, "grad_norm": 0.42945095896720886, "learning_rate": 6.348219045281822e-06, "loss": 1.6687, "step": 8066 }, { "epoch": 0.8379557494546588, "grad_norm": 0.4201321005821228, "learning_rate": 6.340264488525488e-06, "loss": 1.6577, "step": 8067 }, { "epoch": 0.8380596239742392, "grad_norm": 0.44182446599006653, "learning_rate": 6.332314581190424e-06, "loss": 1.7371, "step": 8068 }, { "epoch": 0.8381634984938194, "grad_norm": 0.4279153048992157, "learning_rate": 6.324369324123242e-06, "loss": 1.8348, "step": 8069 }, { "epoch": 0.8382673730133998, "grad_norm": 0.42748069763183594, "learning_rate": 6.316428718170036e-06, "loss": 1.7954, "step": 8070 }, { "epoch": 0.8383712475329802, "grad_norm": 0.4482438862323761, "learning_rate": 6.308492764176421e-06, "loss": 1.4829, "step": 8071 }, { "epoch": 0.8384751220525605, "grad_norm": 0.41090673208236694, "learning_rate": 6.300561462987542e-06, "loss": 1.469, "step": 8072 }, { "epoch": 0.8385789965721409, "grad_norm": 0.4080744981765747, "learning_rate": 6.292634815447978e-06, "loss": 1.5294, "step": 8073 }, { "epoch": 0.8386828710917212, "grad_norm": 0.4303456246852875, "learning_rate": 6.2847128224018835e-06, "loss": 1.5216, "step": 8074 }, { "epoch": 0.8387867456113015, "grad_norm": 0.44199666380882263, "learning_rate": 6.276795484692882e-06, "loss": 1.7961, "step": 8075 }, { "epoch": 0.8388906201308819, "grad_norm": 0.407693088054657, "learning_rate": 6.268882803164106e-06, "loss": 1.6753, "step": 8076 }, { "epoch": 0.8389944946504623, "grad_norm": 0.4050787389278412, "learning_rate": 6.260974778658202e-06, "loss": 1.4427, "step": 8077 }, { "epoch": 0.8390983691700425, "grad_norm": 0.4364643692970276, "learning_rate": 6.253071412017298e-06, "loss": 1.7915, "step": 8078 }, { "epoch": 0.8392022436896229, "grad_norm": 0.4825323522090912, "learning_rate": 6.2451727040830525e-06, "loss": 1.8103, "step": 8079 }, { "epoch": 0.8393061182092033, "grad_norm": 0.42931079864501953, "learning_rate": 6.237278655696605e-06, "loss": 1.4995, "step": 8080 }, { "epoch": 0.8394099927287836, "grad_norm": 0.43772071599960327, "learning_rate": 6.229389267698638e-06, "loss": 1.737, "step": 8081 }, { "epoch": 0.839513867248364, "grad_norm": 0.4058978259563446, "learning_rate": 6.2215045409292794e-06, "loss": 1.5779, "step": 8082 }, { "epoch": 0.8396177417679443, "grad_norm": 0.4402966797351837, "learning_rate": 6.213624476228191e-06, "loss": 1.6885, "step": 8083 }, { "epoch": 0.8397216162875246, "grad_norm": 0.4273587167263031, "learning_rate": 6.205749074434569e-06, "loss": 1.5655, "step": 8084 }, { "epoch": 0.839825490807105, "grad_norm": 0.40787985920906067, "learning_rate": 6.197878336387042e-06, "loss": 1.5628, "step": 8085 }, { "epoch": 0.8399293653266854, "grad_norm": 0.42607665061950684, "learning_rate": 6.190012262923811e-06, "loss": 1.5581, "step": 8086 }, { "epoch": 0.8400332398462657, "grad_norm": 0.4342617690563202, "learning_rate": 6.182150854882546e-06, "loss": 1.8488, "step": 8087 }, { "epoch": 0.840137114365846, "grad_norm": 0.3898141086101532, "learning_rate": 6.1742941131004205e-06, "loss": 1.5469, "step": 8088 }, { "epoch": 0.8402409888854264, "grad_norm": 0.43741726875305176, "learning_rate": 6.166442038414122e-06, "loss": 1.756, "step": 8089 }, { "epoch": 0.8403448634050068, "grad_norm": 0.44850122928619385, "learning_rate": 6.1585946316598195e-06, "loss": 1.5767, "step": 8090 }, { "epoch": 0.8404487379245871, "grad_norm": 0.4059605896472931, "learning_rate": 6.150751893673229e-06, "loss": 1.3907, "step": 8091 }, { "epoch": 0.8405526124441675, "grad_norm": 0.46018004417419434, "learning_rate": 6.142913825289509e-06, "loss": 1.8505, "step": 8092 }, { "epoch": 0.8406564869637478, "grad_norm": 0.4265289306640625, "learning_rate": 6.135080427343376e-06, "loss": 1.5951, "step": 8093 }, { "epoch": 0.8407603614833281, "grad_norm": 0.4429461658000946, "learning_rate": 6.1272517006690174e-06, "loss": 1.7048, "step": 8094 }, { "epoch": 0.8408642360029085, "grad_norm": 0.4366631805896759, "learning_rate": 6.1194276461001345e-06, "loss": 1.6959, "step": 8095 }, { "epoch": 0.8409681105224889, "grad_norm": 0.48970597982406616, "learning_rate": 6.111608264469926e-06, "loss": 1.6371, "step": 8096 }, { "epoch": 0.8410719850420691, "grad_norm": 0.3888145983219147, "learning_rate": 6.103793556611093e-06, "loss": 1.6457, "step": 8097 }, { "epoch": 0.8411758595616495, "grad_norm": 0.39202946424484253, "learning_rate": 6.095983523355841e-06, "loss": 1.5726, "step": 8098 }, { "epoch": 0.8412797340812299, "grad_norm": 0.41915374994277954, "learning_rate": 6.088178165535874e-06, "loss": 1.6642, "step": 8099 }, { "epoch": 0.8413836086008102, "grad_norm": 0.3962395489215851, "learning_rate": 6.080377483982424e-06, "loss": 1.6482, "step": 8100 }, { "epoch": 0.8414874831203906, "grad_norm": 0.3973849415779114, "learning_rate": 6.072581479526168e-06, "loss": 1.5789, "step": 8101 }, { "epoch": 0.841591357639971, "grad_norm": 0.46176597476005554, "learning_rate": 6.064790152997346e-06, "loss": 1.7686, "step": 8102 }, { "epoch": 0.8416952321595512, "grad_norm": 0.39481309056282043, "learning_rate": 6.0570035052256615e-06, "loss": 1.4962, "step": 8103 }, { "epoch": 0.8417991066791316, "grad_norm": 0.42713215947151184, "learning_rate": 6.049221537040339e-06, "loss": 1.5899, "step": 8104 }, { "epoch": 0.841902981198712, "grad_norm": 0.4113538861274719, "learning_rate": 6.041444249270089e-06, "loss": 1.7044, "step": 8105 }, { "epoch": 0.8420068557182923, "grad_norm": 0.414631724357605, "learning_rate": 6.033671642743144e-06, "loss": 1.6839, "step": 8106 }, { "epoch": 0.8421107302378726, "grad_norm": 0.48907044529914856, "learning_rate": 6.025903718287212e-06, "loss": 1.6526, "step": 8107 }, { "epoch": 0.842214604757453, "grad_norm": 0.45432183146476746, "learning_rate": 6.018140476729517e-06, "loss": 1.6792, "step": 8108 }, { "epoch": 0.8423184792770333, "grad_norm": 0.4079488217830658, "learning_rate": 6.010381918896807e-06, "loss": 1.7154, "step": 8109 }, { "epoch": 0.8424223537966137, "grad_norm": 0.42770206928253174, "learning_rate": 6.002628045615283e-06, "loss": 1.6739, "step": 8110 }, { "epoch": 0.8425262283161941, "grad_norm": 0.46013393998146057, "learning_rate": 5.994878857710667e-06, "loss": 1.7719, "step": 8111 }, { "epoch": 0.8426301028357743, "grad_norm": 0.39818307757377625, "learning_rate": 5.9871343560082225e-06, "loss": 1.6761, "step": 8112 }, { "epoch": 0.8427339773553547, "grad_norm": 0.3909452557563782, "learning_rate": 5.9793945413326335e-06, "loss": 1.4098, "step": 8113 }, { "epoch": 0.8428378518749351, "grad_norm": 0.4382929801940918, "learning_rate": 5.9716594145081625e-06, "loss": 1.6621, "step": 8114 }, { "epoch": 0.8429417263945155, "grad_norm": 0.4438725709915161, "learning_rate": 5.963928976358518e-06, "loss": 1.7382, "step": 8115 }, { "epoch": 0.8430456009140957, "grad_norm": 0.4258117973804474, "learning_rate": 5.956203227706963e-06, "loss": 1.7194, "step": 8116 }, { "epoch": 0.8431494754336761, "grad_norm": 0.486128032207489, "learning_rate": 5.948482169376202e-06, "loss": 1.7364, "step": 8117 }, { "epoch": 0.8432533499532565, "grad_norm": 0.4413326382637024, "learning_rate": 5.940765802188458e-06, "loss": 1.4118, "step": 8118 }, { "epoch": 0.8433572244728368, "grad_norm": 0.3983282148838043, "learning_rate": 5.933054126965509e-06, "loss": 1.3972, "step": 8119 }, { "epoch": 0.8434610989924172, "grad_norm": 0.4958374500274658, "learning_rate": 5.925347144528537e-06, "loss": 1.8372, "step": 8120 }, { "epoch": 0.8435649735119976, "grad_norm": 0.44721078872680664, "learning_rate": 5.917644855698306e-06, "loss": 1.7691, "step": 8121 }, { "epoch": 0.8436688480315778, "grad_norm": 0.40920016169548035, "learning_rate": 5.9099472612950455e-06, "loss": 1.6897, "step": 8122 }, { "epoch": 0.8437727225511582, "grad_norm": 0.44964519143104553, "learning_rate": 5.902254362138487e-06, "loss": 1.7894, "step": 8123 }, { "epoch": 0.8438765970707386, "grad_norm": 0.4365478456020355, "learning_rate": 5.894566159047865e-06, "loss": 1.5154, "step": 8124 }, { "epoch": 0.8439804715903189, "grad_norm": 0.42581430077552795, "learning_rate": 5.886882652841907e-06, "loss": 1.613, "step": 8125 }, { "epoch": 0.8440843461098992, "grad_norm": 0.4058164656162262, "learning_rate": 5.879203844338848e-06, "loss": 1.6085, "step": 8126 }, { "epoch": 0.8441882206294796, "grad_norm": 0.40692541003227234, "learning_rate": 5.871529734356423e-06, "loss": 1.6069, "step": 8127 }, { "epoch": 0.8442920951490599, "grad_norm": 0.4061817228794098, "learning_rate": 5.863860323711879e-06, "loss": 1.539, "step": 8128 }, { "epoch": 0.8443959696686403, "grad_norm": 0.489007830619812, "learning_rate": 5.856195613221921e-06, "loss": 1.3045, "step": 8129 }, { "epoch": 0.8444998441882207, "grad_norm": 0.43798279762268066, "learning_rate": 5.848535603702798e-06, "loss": 1.5672, "step": 8130 }, { "epoch": 0.8446037187078009, "grad_norm": 0.43467089533805847, "learning_rate": 5.840880295970247e-06, "loss": 1.7033, "step": 8131 }, { "epoch": 0.8447075932273813, "grad_norm": 0.4537743031978607, "learning_rate": 5.833229690839481e-06, "loss": 1.7776, "step": 8132 }, { "epoch": 0.8448114677469617, "grad_norm": 0.43950802087783813, "learning_rate": 5.825583789125244e-06, "loss": 1.7372, "step": 8133 }, { "epoch": 0.844915342266542, "grad_norm": 0.4260973334312439, "learning_rate": 5.817942591641762e-06, "loss": 1.7629, "step": 8134 }, { "epoch": 0.8450192167861224, "grad_norm": 0.40139877796173096, "learning_rate": 5.810306099202755e-06, "loss": 1.6475, "step": 8135 }, { "epoch": 0.8451230913057027, "grad_norm": 0.45263901352882385, "learning_rate": 5.802674312621448e-06, "loss": 1.477, "step": 8136 }, { "epoch": 0.845226965825283, "grad_norm": 0.4598293900489807, "learning_rate": 5.795047232710599e-06, "loss": 1.586, "step": 8137 }, { "epoch": 0.8453308403448634, "grad_norm": 0.4252384901046753, "learning_rate": 5.787424860282392e-06, "loss": 1.5229, "step": 8138 }, { "epoch": 0.8454347148644438, "grad_norm": 0.42608946561813354, "learning_rate": 5.779807196148556e-06, "loss": 1.7491, "step": 8139 }, { "epoch": 0.8455385893840242, "grad_norm": 0.4330217242240906, "learning_rate": 5.772194241120338e-06, "loss": 1.5724, "step": 8140 }, { "epoch": 0.8456424639036044, "grad_norm": 0.4884481132030487, "learning_rate": 5.764585996008437e-06, "loss": 1.7322, "step": 8141 }, { "epoch": 0.8457463384231848, "grad_norm": 0.45733243227005005, "learning_rate": 5.756982461623084e-06, "loss": 1.763, "step": 8142 }, { "epoch": 0.8458502129427652, "grad_norm": 0.5284457802772522, "learning_rate": 5.749383638773986e-06, "loss": 1.9331, "step": 8143 }, { "epoch": 0.8459540874623455, "grad_norm": 0.4064652621746063, "learning_rate": 5.7417895282703635e-06, "loss": 1.6423, "step": 8144 }, { "epoch": 0.8460579619819258, "grad_norm": 0.4062190055847168, "learning_rate": 5.734200130920925e-06, "loss": 1.5123, "step": 8145 }, { "epoch": 0.8461618365015062, "grad_norm": 0.4112701416015625, "learning_rate": 5.726615447533878e-06, "loss": 1.4982, "step": 8146 }, { "epoch": 0.8462657110210865, "grad_norm": 0.4885147213935852, "learning_rate": 5.719035478916967e-06, "loss": 1.8708, "step": 8147 }, { "epoch": 0.8463695855406669, "grad_norm": 0.44870370626449585, "learning_rate": 5.711460225877346e-06, "loss": 1.5868, "step": 8148 }, { "epoch": 0.8464734600602473, "grad_norm": 0.3870431184768677, "learning_rate": 5.703889689221758e-06, "loss": 1.4511, "step": 8149 }, { "epoch": 0.8465773345798275, "grad_norm": 0.41669902205467224, "learning_rate": 5.696323869756398e-06, "loss": 1.6854, "step": 8150 }, { "epoch": 0.8466812090994079, "grad_norm": 0.40349993109703064, "learning_rate": 5.68876276828696e-06, "loss": 1.6919, "step": 8151 }, { "epoch": 0.8467850836189883, "grad_norm": 0.41194114089012146, "learning_rate": 5.681206385618648e-06, "loss": 1.5359, "step": 8152 }, { "epoch": 0.8468889581385686, "grad_norm": 0.46073397994041443, "learning_rate": 5.6736547225561545e-06, "loss": 1.8919, "step": 8153 }, { "epoch": 0.846992832658149, "grad_norm": 0.4091498851776123, "learning_rate": 5.6661077799036785e-06, "loss": 1.5971, "step": 8154 }, { "epoch": 0.8470967071777293, "grad_norm": 0.4949287176132202, "learning_rate": 5.65856555846489e-06, "loss": 1.6858, "step": 8155 }, { "epoch": 0.8472005816973096, "grad_norm": 0.43929752707481384, "learning_rate": 5.6510280590430174e-06, "loss": 1.673, "step": 8156 }, { "epoch": 0.84730445621689, "grad_norm": 0.4475664794445038, "learning_rate": 5.643495282440703e-06, "loss": 1.5692, "step": 8157 }, { "epoch": 0.8474083307364704, "grad_norm": 0.43574050068855286, "learning_rate": 5.635967229460154e-06, "loss": 1.5003, "step": 8158 }, { "epoch": 0.8475122052560506, "grad_norm": 0.4088006317615509, "learning_rate": 5.628443900903041e-06, "loss": 1.6919, "step": 8159 }, { "epoch": 0.847616079775631, "grad_norm": 0.41070079803466797, "learning_rate": 5.62092529757054e-06, "loss": 1.602, "step": 8160 }, { "epoch": 0.8477199542952114, "grad_norm": 0.38564005494117737, "learning_rate": 5.613411420263331e-06, "loss": 1.6129, "step": 8161 }, { "epoch": 0.8478238288147917, "grad_norm": 0.3986556828022003, "learning_rate": 5.605902269781571e-06, "loss": 1.572, "step": 8162 }, { "epoch": 0.8479277033343721, "grad_norm": 0.41261792182922363, "learning_rate": 5.598397846924935e-06, "loss": 1.6821, "step": 8163 }, { "epoch": 0.8480315778539524, "grad_norm": 0.41062214970588684, "learning_rate": 5.590898152492574e-06, "loss": 1.6375, "step": 8164 }, { "epoch": 0.8481354523735328, "grad_norm": 0.4452644884586334, "learning_rate": 5.583403187283159e-06, "loss": 1.7255, "step": 8165 }, { "epoch": 0.8482393268931131, "grad_norm": 0.41799211502075195, "learning_rate": 5.575912952094858e-06, "loss": 1.7604, "step": 8166 }, { "epoch": 0.8483432014126935, "grad_norm": 0.41636091470718384, "learning_rate": 5.568427447725283e-06, "loss": 1.7037, "step": 8167 }, { "epoch": 0.8484470759322739, "grad_norm": 0.45778560638427734, "learning_rate": 5.560946674971618e-06, "loss": 1.8658, "step": 8168 }, { "epoch": 0.8485509504518541, "grad_norm": 0.4254413843154907, "learning_rate": 5.553470634630492e-06, "loss": 1.5678, "step": 8169 }, { "epoch": 0.8486548249714345, "grad_norm": 0.4398033320903778, "learning_rate": 5.5459993274980525e-06, "loss": 1.6442, "step": 8170 }, { "epoch": 0.8487586994910149, "grad_norm": 0.3922208547592163, "learning_rate": 5.538532754369924e-06, "loss": 1.6004, "step": 8171 }, { "epoch": 0.8488625740105952, "grad_norm": 0.40766361355781555, "learning_rate": 5.531070916041247e-06, "loss": 1.5942, "step": 8172 }, { "epoch": 0.8489664485301756, "grad_norm": 0.39256706833839417, "learning_rate": 5.523613813306644e-06, "loss": 1.6922, "step": 8173 }, { "epoch": 0.8490703230497559, "grad_norm": 0.4119519591331482, "learning_rate": 5.516161446960233e-06, "loss": 1.6135, "step": 8174 }, { "epoch": 0.8491741975693362, "grad_norm": 0.43249499797821045, "learning_rate": 5.508713817795658e-06, "loss": 1.5972, "step": 8175 }, { "epoch": 0.8492780720889166, "grad_norm": 0.4471118152141571, "learning_rate": 5.501270926606e-06, "loss": 1.5106, "step": 8176 }, { "epoch": 0.849381946608497, "grad_norm": 0.49427923560142517, "learning_rate": 5.493832774183894e-06, "loss": 1.9156, "step": 8177 }, { "epoch": 0.8494858211280772, "grad_norm": 0.4142952263355255, "learning_rate": 5.4863993613214314e-06, "loss": 1.592, "step": 8178 }, { "epoch": 0.8495896956476576, "grad_norm": 0.4335556626319885, "learning_rate": 5.478970688810225e-06, "loss": 1.6454, "step": 8179 }, { "epoch": 0.849693570167238, "grad_norm": 0.42512187361717224, "learning_rate": 5.471546757441359e-06, "loss": 1.7903, "step": 8180 }, { "epoch": 0.8497974446868183, "grad_norm": 0.4263908863067627, "learning_rate": 5.464127568005423e-06, "loss": 1.5594, "step": 8181 }, { "epoch": 0.8499013192063987, "grad_norm": 0.4174048602581024, "learning_rate": 5.456713121292517e-06, "loss": 1.6509, "step": 8182 }, { "epoch": 0.850005193725979, "grad_norm": 0.3824132978916168, "learning_rate": 5.4493034180921944e-06, "loss": 1.4437, "step": 8183 }, { "epoch": 0.8501090682455593, "grad_norm": 0.42041993141174316, "learning_rate": 5.441898459193573e-06, "loss": 1.689, "step": 8184 }, { "epoch": 0.8502129427651397, "grad_norm": 0.3966014087200165, "learning_rate": 5.434498245385184e-06, "loss": 1.6464, "step": 8185 }, { "epoch": 0.8503168172847201, "grad_norm": 0.4592750370502472, "learning_rate": 5.427102777455112e-06, "loss": 1.4614, "step": 8186 }, { "epoch": 0.8504206918043004, "grad_norm": 0.40993037819862366, "learning_rate": 5.419712056190912e-06, "loss": 1.5345, "step": 8187 }, { "epoch": 0.8505245663238807, "grad_norm": 0.4124085009098053, "learning_rate": 5.412326082379637e-06, "loss": 1.5901, "step": 8188 }, { "epoch": 0.8506284408434611, "grad_norm": 0.3968968391418457, "learning_rate": 5.404944856807842e-06, "loss": 1.5966, "step": 8189 }, { "epoch": 0.8507323153630414, "grad_norm": 0.4331669509410858, "learning_rate": 5.3975683802615586e-06, "loss": 1.6621, "step": 8190 }, { "epoch": 0.8508361898826218, "grad_norm": 0.42043596506118774, "learning_rate": 5.390196653526336e-06, "loss": 1.6508, "step": 8191 }, { "epoch": 0.8509400644022022, "grad_norm": 0.41201046109199524, "learning_rate": 5.382829677387186e-06, "loss": 1.6648, "step": 8192 }, { "epoch": 0.8510439389217825, "grad_norm": 0.41224047541618347, "learning_rate": 5.375467452628663e-06, "loss": 1.7252, "step": 8193 }, { "epoch": 0.8511478134413628, "grad_norm": 0.42982521653175354, "learning_rate": 5.368109980034775e-06, "loss": 1.6036, "step": 8194 }, { "epoch": 0.8512516879609432, "grad_norm": 0.41480857133865356, "learning_rate": 5.360757260389015e-06, "loss": 1.5241, "step": 8195 }, { "epoch": 0.8513555624805236, "grad_norm": 0.41016316413879395, "learning_rate": 5.353409294474421e-06, "loss": 1.568, "step": 8196 }, { "epoch": 0.8514594370001038, "grad_norm": 0.41887757182121277, "learning_rate": 5.346066083073475e-06, "loss": 1.5569, "step": 8197 }, { "epoch": 0.8515633115196842, "grad_norm": 0.43342113494873047, "learning_rate": 5.338727626968182e-06, "loss": 1.6933, "step": 8198 }, { "epoch": 0.8516671860392646, "grad_norm": 0.4318787753582001, "learning_rate": 5.331393926940021e-06, "loss": 1.7458, "step": 8199 }, { "epoch": 0.8517710605588449, "grad_norm": 0.4099392294883728, "learning_rate": 5.324064983769978e-06, "loss": 1.6325, "step": 8200 }, { "epoch": 0.8518749350784253, "grad_norm": 0.43265894055366516, "learning_rate": 5.316740798238534e-06, "loss": 1.7025, "step": 8201 }, { "epoch": 0.8519788095980056, "grad_norm": 0.4109574854373932, "learning_rate": 5.309421371125639e-06, "loss": 1.7289, "step": 8202 }, { "epoch": 0.8520826841175859, "grad_norm": 0.4493882954120636, "learning_rate": 5.302106703210785e-06, "loss": 1.6378, "step": 8203 }, { "epoch": 0.8521865586371663, "grad_norm": 0.49722832441329956, "learning_rate": 5.294796795272894e-06, "loss": 1.8429, "step": 8204 }, { "epoch": 0.8522904331567467, "grad_norm": 0.3835256099700928, "learning_rate": 5.287491648090437e-06, "loss": 1.515, "step": 8205 }, { "epoch": 0.852394307676327, "grad_norm": 0.4009277820587158, "learning_rate": 5.280191262441353e-06, "loss": 1.4383, "step": 8206 }, { "epoch": 0.8524981821959073, "grad_norm": 0.4271654486656189, "learning_rate": 5.27289563910307e-06, "loss": 1.6824, "step": 8207 }, { "epoch": 0.8526020567154877, "grad_norm": 0.41742265224456787, "learning_rate": 5.265604778852512e-06, "loss": 1.525, "step": 8208 }, { "epoch": 0.852705931235068, "grad_norm": 0.4177842140197754, "learning_rate": 5.258318682466107e-06, "loss": 1.6278, "step": 8209 }, { "epoch": 0.8528098057546484, "grad_norm": 0.4213246703147888, "learning_rate": 5.2510373507197675e-06, "loss": 1.563, "step": 8210 }, { "epoch": 0.8529136802742288, "grad_norm": 0.40991777181625366, "learning_rate": 5.243760784388879e-06, "loss": 1.5489, "step": 8211 }, { "epoch": 0.853017554793809, "grad_norm": 0.4318062961101532, "learning_rate": 5.236488984248378e-06, "loss": 1.7528, "step": 8212 }, { "epoch": 0.8531214293133894, "grad_norm": 0.40052494406700134, "learning_rate": 5.229221951072611e-06, "loss": 1.5852, "step": 8213 }, { "epoch": 0.8532253038329698, "grad_norm": 0.4887178838253021, "learning_rate": 5.221959685635491e-06, "loss": 1.7859, "step": 8214 }, { "epoch": 0.8533291783525501, "grad_norm": 0.4212459325790405, "learning_rate": 5.2147021887103796e-06, "loss": 1.6043, "step": 8215 }, { "epoch": 0.8534330528721304, "grad_norm": 0.43245670199394226, "learning_rate": 5.207449461070146e-06, "loss": 1.7065, "step": 8216 }, { "epoch": 0.8535369273917108, "grad_norm": 0.48420557379722595, "learning_rate": 5.2002015034871564e-06, "loss": 1.8633, "step": 8217 }, { "epoch": 0.8536408019112912, "grad_norm": 0.46730557084083557, "learning_rate": 5.1929583167332375e-06, "loss": 1.7325, "step": 8218 }, { "epoch": 0.8537446764308715, "grad_norm": 0.45189306139945984, "learning_rate": 5.185719901579772e-06, "loss": 1.8017, "step": 8219 }, { "epoch": 0.8538485509504519, "grad_norm": 0.46504566073417664, "learning_rate": 5.178486258797555e-06, "loss": 1.8846, "step": 8220 }, { "epoch": 0.8539524254700323, "grad_norm": 0.4610961973667145, "learning_rate": 5.171257389156936e-06, "loss": 1.7558, "step": 8221 }, { "epoch": 0.8540562999896125, "grad_norm": 0.4036599397659302, "learning_rate": 5.164033293427739e-06, "loss": 1.6539, "step": 8222 }, { "epoch": 0.8541601745091929, "grad_norm": 0.4568711817264557, "learning_rate": 5.156813972379243e-06, "loss": 1.769, "step": 8223 }, { "epoch": 0.8542640490287733, "grad_norm": 0.4027152955532074, "learning_rate": 5.1495994267802775e-06, "loss": 1.5509, "step": 8224 }, { "epoch": 0.8543679235483536, "grad_norm": 0.44536474347114563, "learning_rate": 5.142389657399127e-06, "loss": 1.7014, "step": 8225 }, { "epoch": 0.8544717980679339, "grad_norm": 0.43924811482429504, "learning_rate": 5.135184665003573e-06, "loss": 1.602, "step": 8226 }, { "epoch": 0.8545756725875143, "grad_norm": 0.4705229699611664, "learning_rate": 5.127984450360895e-06, "loss": 1.7716, "step": 8227 }, { "epoch": 0.8546795471070946, "grad_norm": 0.42262768745422363, "learning_rate": 5.120789014237859e-06, "loss": 1.6233, "step": 8228 }, { "epoch": 0.854783421626675, "grad_norm": 0.41569891571998596, "learning_rate": 5.113598357400723e-06, "loss": 1.5956, "step": 8229 }, { "epoch": 0.8548872961462554, "grad_norm": 0.3866024911403656, "learning_rate": 5.106412480615219e-06, "loss": 1.5542, "step": 8230 }, { "epoch": 0.8549911706658356, "grad_norm": 0.4203120768070221, "learning_rate": 5.09923138464663e-06, "loss": 1.6801, "step": 8231 }, { "epoch": 0.855095045185416, "grad_norm": 0.433152973651886, "learning_rate": 5.092055070259632e-06, "loss": 1.6589, "step": 8232 }, { "epoch": 0.8551989197049964, "grad_norm": 0.42945393919944763, "learning_rate": 5.0848835382184875e-06, "loss": 1.6403, "step": 8233 }, { "epoch": 0.8553027942245767, "grad_norm": 0.46806976199150085, "learning_rate": 5.077716789286896e-06, "loss": 1.7067, "step": 8234 }, { "epoch": 0.855406668744157, "grad_norm": 0.3977743983268738, "learning_rate": 5.070554824228052e-06, "loss": 1.667, "step": 8235 }, { "epoch": 0.8555105432637374, "grad_norm": 0.40513405203819275, "learning_rate": 5.063397643804662e-06, "loss": 1.4978, "step": 8236 }, { "epoch": 0.8556144177833177, "grad_norm": 0.41432732343673706, "learning_rate": 5.056245248778902e-06, "loss": 1.5833, "step": 8237 }, { "epoch": 0.8557182923028981, "grad_norm": 0.4109817147254944, "learning_rate": 5.049097639912454e-06, "loss": 1.4775, "step": 8238 }, { "epoch": 0.8558221668224785, "grad_norm": 0.4143582582473755, "learning_rate": 5.04195481796646e-06, "loss": 1.612, "step": 8239 }, { "epoch": 0.8559260413420587, "grad_norm": 0.45763957500457764, "learning_rate": 5.034816783701613e-06, "loss": 1.5282, "step": 8240 }, { "epoch": 0.8560299158616391, "grad_norm": 0.4432554543018341, "learning_rate": 5.027683537878014e-06, "loss": 1.5708, "step": 8241 }, { "epoch": 0.8561337903812195, "grad_norm": 0.42282775044441223, "learning_rate": 5.0205550812553345e-06, "loss": 1.6524, "step": 8242 }, { "epoch": 0.8562376649007999, "grad_norm": 0.4614414870738983, "learning_rate": 5.01343141459269e-06, "loss": 1.8134, "step": 8243 }, { "epoch": 0.8563415394203802, "grad_norm": 0.5265931487083435, "learning_rate": 5.0063125386486834e-06, "loss": 1.9862, "step": 8244 }, { "epoch": 0.8564454139399605, "grad_norm": 0.46069347858428955, "learning_rate": 4.999198454181431e-06, "loss": 1.8108, "step": 8245 }, { "epoch": 0.8565492884595409, "grad_norm": 0.397867351770401, "learning_rate": 4.99208916194851e-06, "loss": 1.6589, "step": 8246 }, { "epoch": 0.8566531629791212, "grad_norm": 0.4252391457557678, "learning_rate": 4.984984662707043e-06, "loss": 1.7302, "step": 8247 }, { "epoch": 0.8567570374987016, "grad_norm": 0.41175001859664917, "learning_rate": 4.977884957213558e-06, "loss": 1.564, "step": 8248 }, { "epoch": 0.856860912018282, "grad_norm": 0.40351465344429016, "learning_rate": 4.970790046224144e-06, "loss": 1.7149, "step": 8249 }, { "epoch": 0.8569647865378622, "grad_norm": 0.4298102557659149, "learning_rate": 4.963699930494364e-06, "loss": 1.5095, "step": 8250 }, { "epoch": 0.8570686610574426, "grad_norm": 0.45123982429504395, "learning_rate": 4.95661461077922e-06, "loss": 1.5314, "step": 8251 }, { "epoch": 0.857172535577023, "grad_norm": 0.41809526085853577, "learning_rate": 4.94953408783328e-06, "loss": 1.5747, "step": 8252 }, { "epoch": 0.8572764100966033, "grad_norm": 0.43481943011283875, "learning_rate": 4.942458362410557e-06, "loss": 1.628, "step": 8253 }, { "epoch": 0.8573802846161837, "grad_norm": 0.4104442000389099, "learning_rate": 4.935387435264549e-06, "loss": 1.5326, "step": 8254 }, { "epoch": 0.857484159135764, "grad_norm": 0.47249045968055725, "learning_rate": 4.928321307148265e-06, "loss": 1.7157, "step": 8255 }, { "epoch": 0.8575880336553443, "grad_norm": 0.4751419425010681, "learning_rate": 4.921259978814191e-06, "loss": 1.8348, "step": 8256 }, { "epoch": 0.8576919081749247, "grad_norm": 0.44779595732688904, "learning_rate": 4.914203451014299e-06, "loss": 1.521, "step": 8257 }, { "epoch": 0.8577957826945051, "grad_norm": 0.4319514036178589, "learning_rate": 4.907151724500048e-06, "loss": 1.6885, "step": 8258 }, { "epoch": 0.8578996572140853, "grad_norm": 0.44196733832359314, "learning_rate": 4.9001048000224205e-06, "loss": 1.746, "step": 8259 }, { "epoch": 0.8580035317336657, "grad_norm": 0.4159088134765625, "learning_rate": 4.893062678331817e-06, "loss": 1.794, "step": 8260 }, { "epoch": 0.8581074062532461, "grad_norm": 0.4175340533256531, "learning_rate": 4.886025360178204e-06, "loss": 1.6533, "step": 8261 }, { "epoch": 0.8582112807728264, "grad_norm": 0.41944602131843567, "learning_rate": 4.878992846310987e-06, "loss": 1.6403, "step": 8262 }, { "epoch": 0.8583151552924068, "grad_norm": 0.43087661266326904, "learning_rate": 4.8719651374790735e-06, "loss": 1.7938, "step": 8263 }, { "epoch": 0.8584190298119871, "grad_norm": 0.41063591837882996, "learning_rate": 4.864942234430858e-06, "loss": 1.5834, "step": 8264 }, { "epoch": 0.8585229043315674, "grad_norm": 0.42360448837280273, "learning_rate": 4.857924137914233e-06, "loss": 1.6952, "step": 8265 }, { "epoch": 0.8586267788511478, "grad_norm": 0.4151276648044586, "learning_rate": 4.85091084867656e-06, "loss": 1.658, "step": 8266 }, { "epoch": 0.8587306533707282, "grad_norm": 0.41758909821510315, "learning_rate": 4.843902367464698e-06, "loss": 1.6903, "step": 8267 }, { "epoch": 0.8588345278903085, "grad_norm": 0.4258948266506195, "learning_rate": 4.8368986950250275e-06, "loss": 1.7291, "step": 8268 }, { "epoch": 0.8589384024098888, "grad_norm": 0.3859458565711975, "learning_rate": 4.829899832103335e-06, "loss": 1.5672, "step": 8269 }, { "epoch": 0.8590422769294692, "grad_norm": 0.3968896269798279, "learning_rate": 4.82290577944498e-06, "loss": 1.5287, "step": 8270 }, { "epoch": 0.8591461514490496, "grad_norm": 0.4189419150352478, "learning_rate": 4.815916537794763e-06, "loss": 1.7356, "step": 8271 }, { "epoch": 0.8592500259686299, "grad_norm": 0.39579710364341736, "learning_rate": 4.8089321078969904e-06, "loss": 1.5447, "step": 8272 }, { "epoch": 0.8593539004882103, "grad_norm": 0.42127490043640137, "learning_rate": 4.801952490495437e-06, "loss": 1.6652, "step": 8273 }, { "epoch": 0.8594577750077906, "grad_norm": 0.46544432640075684, "learning_rate": 4.79497768633338e-06, "loss": 1.6881, "step": 8274 }, { "epoch": 0.8595616495273709, "grad_norm": 0.39384204149246216, "learning_rate": 4.788007696153607e-06, "loss": 1.6703, "step": 8275 }, { "epoch": 0.8596655240469513, "grad_norm": 0.4230908155441284, "learning_rate": 4.781042520698326e-06, "loss": 1.7158, "step": 8276 }, { "epoch": 0.8597693985665317, "grad_norm": 0.4715307652950287, "learning_rate": 4.774082160709309e-06, "loss": 1.518, "step": 8277 }, { "epoch": 0.8598732730861119, "grad_norm": 0.42412781715393066, "learning_rate": 4.767126616927769e-06, "loss": 1.5671, "step": 8278 }, { "epoch": 0.8599771476056923, "grad_norm": 0.4136991500854492, "learning_rate": 4.760175890094399e-06, "loss": 1.6172, "step": 8279 }, { "epoch": 0.8600810221252727, "grad_norm": 0.4031031131744385, "learning_rate": 4.753229980949419e-06, "loss": 1.5905, "step": 8280 }, { "epoch": 0.860184896644853, "grad_norm": 0.4419058859348297, "learning_rate": 4.746288890232514e-06, "loss": 1.6522, "step": 8281 }, { "epoch": 0.8602887711644334, "grad_norm": 0.4122452139854431, "learning_rate": 4.739352618682846e-06, "loss": 1.6796, "step": 8282 }, { "epoch": 0.8603926456840137, "grad_norm": 0.44128134846687317, "learning_rate": 4.732421167039075e-06, "loss": 1.6135, "step": 8283 }, { "epoch": 0.860496520203594, "grad_norm": 0.4065702259540558, "learning_rate": 4.725494536039354e-06, "loss": 1.4627, "step": 8284 }, { "epoch": 0.8606003947231744, "grad_norm": 0.40310558676719666, "learning_rate": 4.718572726421305e-06, "loss": 1.782, "step": 8285 }, { "epoch": 0.8607042692427548, "grad_norm": 0.4221704602241516, "learning_rate": 4.711655738922044e-06, "loss": 1.6468, "step": 8286 }, { "epoch": 0.860808143762335, "grad_norm": 0.41662177443504333, "learning_rate": 4.704743574278209e-06, "loss": 1.6226, "step": 8287 }, { "epoch": 0.8609120182819154, "grad_norm": 0.42002618312835693, "learning_rate": 4.697836233225838e-06, "loss": 1.6135, "step": 8288 }, { "epoch": 0.8610158928014958, "grad_norm": 0.4459064304828644, "learning_rate": 4.690933716500556e-06, "loss": 1.845, "step": 8289 }, { "epoch": 0.8611197673210761, "grad_norm": 0.43704405426979065, "learning_rate": 4.684036024837402e-06, "loss": 1.7574, "step": 8290 }, { "epoch": 0.8612236418406565, "grad_norm": 0.3958377540111542, "learning_rate": 4.677143158970937e-06, "loss": 1.5884, "step": 8291 }, { "epoch": 0.8613275163602369, "grad_norm": 0.4173584580421448, "learning_rate": 4.670255119635192e-06, "loss": 1.6082, "step": 8292 }, { "epoch": 0.8614313908798171, "grad_norm": 0.4080117642879486, "learning_rate": 4.663371907563696e-06, "loss": 1.5688, "step": 8293 }, { "epoch": 0.8615352653993975, "grad_norm": 0.4516976773738861, "learning_rate": 4.656493523489447e-06, "loss": 1.7237, "step": 8294 }, { "epoch": 0.8616391399189779, "grad_norm": 0.4905436635017395, "learning_rate": 4.649619968144936e-06, "loss": 1.6905, "step": 8295 }, { "epoch": 0.8617430144385583, "grad_norm": 0.4043090045452118, "learning_rate": 4.642751242262161e-06, "loss": 1.5359, "step": 8296 }, { "epoch": 0.8618468889581385, "grad_norm": 0.4110516905784607, "learning_rate": 4.635887346572582e-06, "loss": 1.6373, "step": 8297 }, { "epoch": 0.8619507634777189, "grad_norm": 0.4277547299861908, "learning_rate": 4.629028281807146e-06, "loss": 1.7373, "step": 8298 }, { "epoch": 0.8620546379972993, "grad_norm": 0.3978832960128784, "learning_rate": 4.622174048696288e-06, "loss": 1.456, "step": 8299 }, { "epoch": 0.8621585125168796, "grad_norm": 0.4189075231552124, "learning_rate": 4.615324647969932e-06, "loss": 1.7349, "step": 8300 }, { "epoch": 0.86226238703646, "grad_norm": 0.4180492162704468, "learning_rate": 4.608480080357491e-06, "loss": 1.4529, "step": 8301 }, { "epoch": 0.8623662615560403, "grad_norm": 0.4220123291015625, "learning_rate": 4.601640346587843e-06, "loss": 1.7035, "step": 8302 }, { "epoch": 0.8624701360756206, "grad_norm": 0.4284611642360687, "learning_rate": 4.5948054473894e-06, "loss": 1.6631, "step": 8303 }, { "epoch": 0.862574010595201, "grad_norm": 0.4400910437107086, "learning_rate": 4.587975383489978e-06, "loss": 1.6642, "step": 8304 }, { "epoch": 0.8626778851147814, "grad_norm": 0.4507836699485779, "learning_rate": 4.581150155616959e-06, "loss": 1.79, "step": 8305 }, { "epoch": 0.8627817596343617, "grad_norm": 0.4717456102371216, "learning_rate": 4.574329764497182e-06, "loss": 1.7097, "step": 8306 }, { "epoch": 0.862885634153942, "grad_norm": 0.42623621225357056, "learning_rate": 4.567514210856933e-06, "loss": 1.6195, "step": 8307 }, { "epoch": 0.8629895086735224, "grad_norm": 0.3888145387172699, "learning_rate": 4.560703495422037e-06, "loss": 1.4697, "step": 8308 }, { "epoch": 0.8630933831931027, "grad_norm": 0.40128466486930847, "learning_rate": 4.553897618917785e-06, "loss": 1.6035, "step": 8309 }, { "epoch": 0.8631972577126831, "grad_norm": 0.43197745084762573, "learning_rate": 4.547096582068938e-06, "loss": 1.6968, "step": 8310 }, { "epoch": 0.8633011322322635, "grad_norm": 0.4532462954521179, "learning_rate": 4.540300385599761e-06, "loss": 1.8258, "step": 8311 }, { "epoch": 0.8634050067518437, "grad_norm": 0.3836333453655243, "learning_rate": 4.533509030233995e-06, "loss": 1.6876, "step": 8312 }, { "epoch": 0.8635088812714241, "grad_norm": 0.44952192902565, "learning_rate": 4.5267225166948645e-06, "loss": 1.7243, "step": 8313 }, { "epoch": 0.8636127557910045, "grad_norm": 0.4556354284286499, "learning_rate": 4.519940845705067e-06, "loss": 1.8883, "step": 8314 }, { "epoch": 0.8637166303105848, "grad_norm": 0.40353667736053467, "learning_rate": 4.513164017986837e-06, "loss": 1.4688, "step": 8315 }, { "epoch": 0.8638205048301651, "grad_norm": 0.4035186171531677, "learning_rate": 4.506392034261803e-06, "loss": 1.7138, "step": 8316 }, { "epoch": 0.8639243793497455, "grad_norm": 0.4350834786891937, "learning_rate": 4.49962489525117e-06, "loss": 1.6899, "step": 8317 }, { "epoch": 0.8640282538693258, "grad_norm": 0.4328579902648926, "learning_rate": 4.492862601675563e-06, "loss": 1.5958, "step": 8318 }, { "epoch": 0.8641321283889062, "grad_norm": 0.45060819387435913, "learning_rate": 4.486105154255121e-06, "loss": 1.8196, "step": 8319 }, { "epoch": 0.8642360029084866, "grad_norm": 0.4311492443084717, "learning_rate": 4.4793525537094595e-06, "loss": 1.7509, "step": 8320 }, { "epoch": 0.864339877428067, "grad_norm": 0.45266759395599365, "learning_rate": 4.472604800757668e-06, "loss": 1.813, "step": 8321 }, { "epoch": 0.8644437519476472, "grad_norm": 0.42085716128349304, "learning_rate": 4.4658618961183506e-06, "loss": 1.6671, "step": 8322 }, { "epoch": 0.8645476264672276, "grad_norm": 0.4207400381565094, "learning_rate": 4.459123840509549e-06, "loss": 1.5313, "step": 8323 }, { "epoch": 0.864651500986808, "grad_norm": 0.4059183895587921, "learning_rate": 4.4523906346488345e-06, "loss": 1.5305, "step": 8324 }, { "epoch": 0.8647553755063883, "grad_norm": 0.40492719411849976, "learning_rate": 4.445662279253226e-06, "loss": 1.5326, "step": 8325 }, { "epoch": 0.8648592500259686, "grad_norm": 0.43756330013275146, "learning_rate": 4.438938775039253e-06, "loss": 1.7945, "step": 8326 }, { "epoch": 0.864963124545549, "grad_norm": 0.44969308376312256, "learning_rate": 4.4322201227229134e-06, "loss": 1.7237, "step": 8327 }, { "epoch": 0.8650669990651293, "grad_norm": 0.4445195198059082, "learning_rate": 4.425506323019685e-06, "loss": 1.7394, "step": 8328 }, { "epoch": 0.8651708735847097, "grad_norm": 0.5065091252326965, "learning_rate": 4.418797376644534e-06, "loss": 1.6003, "step": 8329 }, { "epoch": 0.8652747481042901, "grad_norm": 0.4191296696662903, "learning_rate": 4.412093284311913e-06, "loss": 1.5485, "step": 8330 }, { "epoch": 0.8653786226238703, "grad_norm": 0.4144963026046753, "learning_rate": 4.405394046735773e-06, "loss": 1.5195, "step": 8331 }, { "epoch": 0.8654824971434507, "grad_norm": 0.42345312237739563, "learning_rate": 4.398699664629497e-06, "loss": 1.7706, "step": 8332 }, { "epoch": 0.8655863716630311, "grad_norm": 0.4790879189968109, "learning_rate": 4.392010138706015e-06, "loss": 1.6909, "step": 8333 }, { "epoch": 0.8656902461826114, "grad_norm": 0.39938417077064514, "learning_rate": 4.3853254696777036e-06, "loss": 1.6691, "step": 8334 }, { "epoch": 0.8657941207021917, "grad_norm": 0.382337361574173, "learning_rate": 4.378645658256403e-06, "loss": 1.5524, "step": 8335 }, { "epoch": 0.8658979952217721, "grad_norm": 0.41304081678390503, "learning_rate": 4.371970705153489e-06, "loss": 1.6553, "step": 8336 }, { "epoch": 0.8660018697413524, "grad_norm": 0.44353777170181274, "learning_rate": 4.365300611079781e-06, "loss": 1.7254, "step": 8337 }, { "epoch": 0.8661057442609328, "grad_norm": 0.4305352568626404, "learning_rate": 4.358635376745601e-06, "loss": 1.6613, "step": 8338 }, { "epoch": 0.8662096187805132, "grad_norm": 0.4181787371635437, "learning_rate": 4.351975002860731e-06, "loss": 1.6243, "step": 8339 }, { "epoch": 0.8663134933000934, "grad_norm": 0.49580061435699463, "learning_rate": 4.345319490134453e-06, "loss": 1.6895, "step": 8340 }, { "epoch": 0.8664173678196738, "grad_norm": 0.422701358795166, "learning_rate": 4.338668839275534e-06, "loss": 1.6746, "step": 8341 }, { "epoch": 0.8665212423392542, "grad_norm": 0.4329741299152374, "learning_rate": 4.3320230509922014e-06, "loss": 1.6391, "step": 8342 }, { "epoch": 0.8666251168588345, "grad_norm": 0.40780285000801086, "learning_rate": 4.3253821259922055e-06, "loss": 1.5569, "step": 8343 }, { "epoch": 0.8667289913784149, "grad_norm": 0.42592713236808777, "learning_rate": 4.318746064982721e-06, "loss": 1.6763, "step": 8344 }, { "epoch": 0.8668328658979952, "grad_norm": 0.391530305147171, "learning_rate": 4.312114868670458e-06, "loss": 1.4977, "step": 8345 }, { "epoch": 0.8669367404175756, "grad_norm": 0.41570156812667847, "learning_rate": 4.305488537761588e-06, "loss": 1.586, "step": 8346 }, { "epoch": 0.8670406149371559, "grad_norm": 0.42140865325927734, "learning_rate": 4.298867072961754e-06, "loss": 1.6925, "step": 8347 }, { "epoch": 0.8671444894567363, "grad_norm": 0.4236295819282532, "learning_rate": 4.2922504749761005e-06, "loss": 1.5939, "step": 8348 }, { "epoch": 0.8672483639763167, "grad_norm": 0.4136272966861725, "learning_rate": 4.2856387445092235e-06, "loss": 1.6401, "step": 8349 }, { "epoch": 0.8673522384958969, "grad_norm": 0.4001111090183258, "learning_rate": 4.2790318822652495e-06, "loss": 1.4537, "step": 8350 }, { "epoch": 0.8674561130154773, "grad_norm": 0.40965673327445984, "learning_rate": 4.272429888947732e-06, "loss": 1.472, "step": 8351 }, { "epoch": 0.8675599875350577, "grad_norm": 0.4452982544898987, "learning_rate": 4.265832765259748e-06, "loss": 1.7643, "step": 8352 }, { "epoch": 0.867663862054638, "grad_norm": 0.41096264123916626, "learning_rate": 4.259240511903834e-06, "loss": 1.6081, "step": 8353 }, { "epoch": 0.8677677365742184, "grad_norm": 0.4857504367828369, "learning_rate": 4.252653129582013e-06, "loss": 1.8201, "step": 8354 }, { "epoch": 0.8678716110937987, "grad_norm": 0.3969123959541321, "learning_rate": 4.2460706189957895e-06, "loss": 1.5116, "step": 8355 }, { "epoch": 0.867975485613379, "grad_norm": 0.40044331550598145, "learning_rate": 4.239492980846155e-06, "loss": 1.772, "step": 8356 }, { "epoch": 0.8680793601329594, "grad_norm": 0.47415369749069214, "learning_rate": 4.232920215833569e-06, "loss": 1.9206, "step": 8357 }, { "epoch": 0.8681832346525398, "grad_norm": 0.41735053062438965, "learning_rate": 4.226352324657973e-06, "loss": 1.7219, "step": 8358 }, { "epoch": 0.86828710917212, "grad_norm": 0.43952199816703796, "learning_rate": 4.219789308018829e-06, "loss": 1.8179, "step": 8359 }, { "epoch": 0.8683909836917004, "grad_norm": 0.42207950353622437, "learning_rate": 4.2132311666150005e-06, "loss": 1.6541, "step": 8360 }, { "epoch": 0.8684948582112808, "grad_norm": 0.3974553942680359, "learning_rate": 4.206677901144917e-06, "loss": 1.5655, "step": 8361 }, { "epoch": 0.8685987327308611, "grad_norm": 0.4383615255355835, "learning_rate": 4.200129512306439e-06, "loss": 1.707, "step": 8362 }, { "epoch": 0.8687026072504415, "grad_norm": 0.43259862065315247, "learning_rate": 4.1935860007969005e-06, "loss": 1.6708, "step": 8363 }, { "epoch": 0.8688064817700218, "grad_norm": 0.4056100845336914, "learning_rate": 4.187047367313157e-06, "loss": 1.5422, "step": 8364 }, { "epoch": 0.8689103562896021, "grad_norm": 0.3907129466533661, "learning_rate": 4.180513612551517e-06, "loss": 1.5345, "step": 8365 }, { "epoch": 0.8690142308091825, "grad_norm": 0.3913259208202362, "learning_rate": 4.17398473720777e-06, "loss": 1.5311, "step": 8366 }, { "epoch": 0.8691181053287629, "grad_norm": 0.4376855790615082, "learning_rate": 4.1674607419771915e-06, "loss": 1.4414, "step": 8367 }, { "epoch": 0.8692219798483432, "grad_norm": 0.44017404317855835, "learning_rate": 4.160941627554538e-06, "loss": 1.4987, "step": 8368 }, { "epoch": 0.8693258543679235, "grad_norm": 0.4197766184806824, "learning_rate": 4.154427394634036e-06, "loss": 1.6341, "step": 8369 }, { "epoch": 0.8694297288875039, "grad_norm": 0.41272974014282227, "learning_rate": 4.147918043909404e-06, "loss": 1.7277, "step": 8370 }, { "epoch": 0.8695336034070842, "grad_norm": 0.4294355511665344, "learning_rate": 4.14141357607386e-06, "loss": 1.6332, "step": 8371 }, { "epoch": 0.8696374779266646, "grad_norm": 0.43766576051712036, "learning_rate": 4.1349139918200385e-06, "loss": 1.6576, "step": 8372 }, { "epoch": 0.869741352446245, "grad_norm": 0.39350345730781555, "learning_rate": 4.1284192918401245e-06, "loss": 1.517, "step": 8373 }, { "epoch": 0.8698452269658253, "grad_norm": 0.41571328043937683, "learning_rate": 4.121929476825742e-06, "loss": 1.6047, "step": 8374 }, { "epoch": 0.8699491014854056, "grad_norm": 0.4371244013309479, "learning_rate": 4.1154445474680115e-06, "loss": 1.6017, "step": 8375 }, { "epoch": 0.870052976004986, "grad_norm": 0.4310753643512726, "learning_rate": 4.108964504457524e-06, "loss": 1.7602, "step": 8376 }, { "epoch": 0.8701568505245664, "grad_norm": 0.3955596387386322, "learning_rate": 4.102489348484339e-06, "loss": 1.542, "step": 8377 }, { "epoch": 0.8702607250441466, "grad_norm": 0.3818468749523163, "learning_rate": 4.096019080238045e-06, "loss": 1.4748, "step": 8378 }, { "epoch": 0.870364599563727, "grad_norm": 0.4244476854801178, "learning_rate": 4.089553700407634e-06, "loss": 1.6876, "step": 8379 }, { "epoch": 0.8704684740833074, "grad_norm": 0.44458895921707153, "learning_rate": 4.083093209681649e-06, "loss": 1.6637, "step": 8380 }, { "epoch": 0.8705723486028877, "grad_norm": 0.46665099263191223, "learning_rate": 4.076637608748074e-06, "loss": 1.4212, "step": 8381 }, { "epoch": 0.8706762231224681, "grad_norm": 0.43539130687713623, "learning_rate": 4.070186898294375e-06, "loss": 1.7361, "step": 8382 }, { "epoch": 0.8707800976420484, "grad_norm": 0.4533788561820984, "learning_rate": 4.063741079007505e-06, "loss": 1.9468, "step": 8383 }, { "epoch": 0.8708839721616287, "grad_norm": 0.40480610728263855, "learning_rate": 4.057300151573895e-06, "loss": 1.6335, "step": 8384 }, { "epoch": 0.8709878466812091, "grad_norm": 0.5002861618995667, "learning_rate": 4.050864116679454e-06, "loss": 1.8369, "step": 8385 }, { "epoch": 0.8710917212007895, "grad_norm": 0.41242918372154236, "learning_rate": 4.044432975009554e-06, "loss": 1.5588, "step": 8386 }, { "epoch": 0.8711955957203698, "grad_norm": 0.43866342306137085, "learning_rate": 4.038006727249094e-06, "loss": 1.6729, "step": 8387 }, { "epoch": 0.8712994702399501, "grad_norm": 0.4091012477874756, "learning_rate": 4.031585374082381e-06, "loss": 1.6353, "step": 8388 }, { "epoch": 0.8714033447595305, "grad_norm": 0.44972357153892517, "learning_rate": 4.025168916193273e-06, "loss": 1.7602, "step": 8389 }, { "epoch": 0.8715072192791108, "grad_norm": 0.3911556005477905, "learning_rate": 4.0187573542650625e-06, "loss": 1.5152, "step": 8390 }, { "epoch": 0.8716110937986912, "grad_norm": 0.4245710074901581, "learning_rate": 4.01235068898051e-06, "loss": 1.775, "step": 8391 }, { "epoch": 0.8717149683182716, "grad_norm": 0.41112756729125977, "learning_rate": 4.005948921021896e-06, "loss": 1.5729, "step": 8392 }, { "epoch": 0.8718188428378518, "grad_norm": 0.42917004227638245, "learning_rate": 3.99955205107096e-06, "loss": 1.7396, "step": 8393 }, { "epoch": 0.8719227173574322, "grad_norm": 0.44293302297592163, "learning_rate": 3.99316007980891e-06, "loss": 1.7512, "step": 8394 }, { "epoch": 0.8720265918770126, "grad_norm": 0.41871437430381775, "learning_rate": 3.9867730079164445e-06, "loss": 1.5736, "step": 8395 }, { "epoch": 0.8721304663965929, "grad_norm": 0.4064503312110901, "learning_rate": 3.980390836073739e-06, "loss": 1.4956, "step": 8396 }, { "epoch": 0.8722343409161732, "grad_norm": 0.39449822902679443, "learning_rate": 3.974013564960444e-06, "loss": 1.5223, "step": 8397 }, { "epoch": 0.8723382154357536, "grad_norm": 0.3918604850769043, "learning_rate": 3.967641195255672e-06, "loss": 1.5784, "step": 8398 }, { "epoch": 0.872442089955334, "grad_norm": 0.42740651965141296, "learning_rate": 3.9612737276380595e-06, "loss": 1.652, "step": 8399 }, { "epoch": 0.8725459644749143, "grad_norm": 0.45657414197921753, "learning_rate": 3.9549111627856795e-06, "loss": 1.863, "step": 8400 }, { "epoch": 0.8726498389944947, "grad_norm": 0.4107915759086609, "learning_rate": 3.948553501376096e-06, "loss": 1.6226, "step": 8401 }, { "epoch": 0.872753713514075, "grad_norm": 0.43051251769065857, "learning_rate": 3.942200744086349e-06, "loss": 1.687, "step": 8402 }, { "epoch": 0.8728575880336553, "grad_norm": 0.3889218270778656, "learning_rate": 3.935852891592956e-06, "loss": 1.6296, "step": 8403 }, { "epoch": 0.8729614625532357, "grad_norm": 0.4343833327293396, "learning_rate": 3.929509944571918e-06, "loss": 1.7, "step": 8404 }, { "epoch": 0.8730653370728161, "grad_norm": 0.43371620774269104, "learning_rate": 3.9231719036987025e-06, "loss": 1.627, "step": 8405 }, { "epoch": 0.8731692115923964, "grad_norm": 0.4425232410430908, "learning_rate": 3.916838769648279e-06, "loss": 1.6984, "step": 8406 }, { "epoch": 0.8732730861119767, "grad_norm": 0.3910176455974579, "learning_rate": 3.910510543095047e-06, "loss": 1.5953, "step": 8407 }, { "epoch": 0.8733769606315571, "grad_norm": 0.4370116591453552, "learning_rate": 3.904187224712935e-06, "loss": 1.8256, "step": 8408 }, { "epoch": 0.8734808351511374, "grad_norm": 0.4318406581878662, "learning_rate": 3.897868815175326e-06, "loss": 1.6616, "step": 8409 }, { "epoch": 0.8735847096707178, "grad_norm": 0.38654428720474243, "learning_rate": 3.891555315155082e-06, "loss": 1.4936, "step": 8410 }, { "epoch": 0.8736885841902982, "grad_norm": 0.41681328415870667, "learning_rate": 3.885246725324532e-06, "loss": 1.5365, "step": 8411 }, { "epoch": 0.8737924587098784, "grad_norm": 0.39945271611213684, "learning_rate": 3.878943046355499e-06, "loss": 1.5884, "step": 8412 }, { "epoch": 0.8738963332294588, "grad_norm": 0.40073657035827637, "learning_rate": 3.872644278919274e-06, "loss": 1.6765, "step": 8413 }, { "epoch": 0.8740002077490392, "grad_norm": 0.4032048285007477, "learning_rate": 3.866350423686615e-06, "loss": 1.6804, "step": 8414 }, { "epoch": 0.8741040822686195, "grad_norm": 0.4637449085712433, "learning_rate": 3.860061481327798e-06, "loss": 1.7906, "step": 8415 }, { "epoch": 0.8742079567881998, "grad_norm": 0.4216407537460327, "learning_rate": 3.8537774525125195e-06, "loss": 1.7432, "step": 8416 }, { "epoch": 0.8743118313077802, "grad_norm": 0.4351256787776947, "learning_rate": 3.847498337909977e-06, "loss": 1.6909, "step": 8417 }, { "epoch": 0.8744157058273605, "grad_norm": 0.4363458454608917, "learning_rate": 3.8412241381888755e-06, "loss": 1.5617, "step": 8418 }, { "epoch": 0.8745195803469409, "grad_norm": 0.481900155544281, "learning_rate": 3.834954854017337e-06, "loss": 1.854, "step": 8419 }, { "epoch": 0.8746234548665213, "grad_norm": 0.49199458956718445, "learning_rate": 3.828690486063013e-06, "loss": 1.7667, "step": 8420 }, { "epoch": 0.8747273293861015, "grad_norm": 0.41270673274993896, "learning_rate": 3.822431034993002e-06, "loss": 1.623, "step": 8421 }, { "epoch": 0.8748312039056819, "grad_norm": 0.444240540266037, "learning_rate": 3.816176501473889e-06, "loss": 1.6026, "step": 8422 }, { "epoch": 0.8749350784252623, "grad_norm": 0.4405142366886139, "learning_rate": 3.8099268861717275e-06, "loss": 1.7327, "step": 8423 }, { "epoch": 0.8750389529448427, "grad_norm": 0.4171452820301056, "learning_rate": 3.8036821897520502e-06, "loss": 1.622, "step": 8424 }, { "epoch": 0.875142827464423, "grad_norm": 0.4365280866622925, "learning_rate": 3.7974424128798868e-06, "loss": 1.5021, "step": 8425 }, { "epoch": 0.8752467019840033, "grad_norm": 0.419349730014801, "learning_rate": 3.7912075562197e-06, "loss": 1.5933, "step": 8426 }, { "epoch": 0.8753505765035837, "grad_norm": 0.41351646184921265, "learning_rate": 3.784977620435476e-06, "loss": 1.63, "step": 8427 }, { "epoch": 0.875454451023164, "grad_norm": 0.4430893361568451, "learning_rate": 3.7787526061906387e-06, "loss": 1.655, "step": 8428 }, { "epoch": 0.8755583255427444, "grad_norm": 0.534438967704773, "learning_rate": 3.7725325141481137e-06, "loss": 1.8994, "step": 8429 }, { "epoch": 0.8756622000623248, "grad_norm": 0.44221749901771545, "learning_rate": 3.766317344970288e-06, "loss": 1.6722, "step": 8430 }, { "epoch": 0.875766074581905, "grad_norm": 0.42250174283981323, "learning_rate": 3.7601070993190267e-06, "loss": 1.6381, "step": 8431 }, { "epoch": 0.8758699491014854, "grad_norm": 0.45283105969429016, "learning_rate": 3.753901777855673e-06, "loss": 1.6305, "step": 8432 }, { "epoch": 0.8759738236210658, "grad_norm": 0.4005245268344879, "learning_rate": 3.7477013812410422e-06, "loss": 1.6229, "step": 8433 }, { "epoch": 0.8760776981406461, "grad_norm": 0.3979766368865967, "learning_rate": 3.7415059101354454e-06, "loss": 1.5569, "step": 8434 }, { "epoch": 0.8761815726602264, "grad_norm": 0.4287016987800598, "learning_rate": 3.735315365198622e-06, "loss": 1.5411, "step": 8435 }, { "epoch": 0.8762854471798068, "grad_norm": 0.46041205525398254, "learning_rate": 3.7291297470898435e-06, "loss": 1.5864, "step": 8436 }, { "epoch": 0.8763893216993871, "grad_norm": 0.4164537787437439, "learning_rate": 3.7229490564678227e-06, "loss": 1.6638, "step": 8437 }, { "epoch": 0.8764931962189675, "grad_norm": 0.3900989294052124, "learning_rate": 3.7167732939907496e-06, "loss": 1.497, "step": 8438 }, { "epoch": 0.8765970707385479, "grad_norm": 0.41212278604507446, "learning_rate": 3.710602460316298e-06, "loss": 1.5113, "step": 8439 }, { "epoch": 0.8767009452581281, "grad_norm": 0.39359238743782043, "learning_rate": 3.704436556101615e-06, "loss": 1.5233, "step": 8440 }, { "epoch": 0.8768048197777085, "grad_norm": 0.46984294056892395, "learning_rate": 3.6982755820033186e-06, "loss": 1.6151, "step": 8441 }, { "epoch": 0.8769086942972889, "grad_norm": 0.4061124324798584, "learning_rate": 3.6921195386774965e-06, "loss": 1.5301, "step": 8442 }, { "epoch": 0.8770125688168692, "grad_norm": 0.4347725808620453, "learning_rate": 3.6859684267797455e-06, "loss": 1.6463, "step": 8443 }, { "epoch": 0.8771164433364496, "grad_norm": 0.46318212151527405, "learning_rate": 3.679822246965087e-06, "loss": 1.4855, "step": 8444 }, { "epoch": 0.8772203178560299, "grad_norm": 0.4061053693294525, "learning_rate": 3.673680999888035e-06, "loss": 1.5532, "step": 8445 }, { "epoch": 0.8773241923756102, "grad_norm": 0.3995457589626312, "learning_rate": 3.6675446862026176e-06, "loss": 1.5249, "step": 8446 }, { "epoch": 0.8774280668951906, "grad_norm": 0.4302307665348053, "learning_rate": 3.661413306562267e-06, "loss": 1.6669, "step": 8447 }, { "epoch": 0.877531941414771, "grad_norm": 0.45854660868644714, "learning_rate": 3.65528686161995e-06, "loss": 1.7169, "step": 8448 }, { "epoch": 0.8776358159343512, "grad_norm": 0.4778292179107666, "learning_rate": 3.649165352028083e-06, "loss": 1.6258, "step": 8449 }, { "epoch": 0.8777396904539316, "grad_norm": 0.3999958038330078, "learning_rate": 3.6430487784385515e-06, "loss": 1.6233, "step": 8450 }, { "epoch": 0.877843564973512, "grad_norm": 0.40005001425743103, "learning_rate": 3.636937141502733e-06, "loss": 1.6136, "step": 8451 }, { "epoch": 0.8779474394930924, "grad_norm": 0.4642762839794159, "learning_rate": 3.6308304418714523e-06, "loss": 1.8124, "step": 8452 }, { "epoch": 0.8780513140126727, "grad_norm": 0.41780030727386475, "learning_rate": 3.6247286801950497e-06, "loss": 1.708, "step": 8453 }, { "epoch": 0.878155188532253, "grad_norm": 0.3930474817752838, "learning_rate": 3.6186318571232893e-06, "loss": 1.5891, "step": 8454 }, { "epoch": 0.8782590630518334, "grad_norm": 0.38920000195503235, "learning_rate": 3.612539973305451e-06, "loss": 1.6752, "step": 8455 }, { "epoch": 0.8783629375714137, "grad_norm": 0.41854655742645264, "learning_rate": 3.606453029390272e-06, "loss": 1.6183, "step": 8456 }, { "epoch": 0.8784668120909941, "grad_norm": 0.4399711489677429, "learning_rate": 3.6003710260259616e-06, "loss": 1.9086, "step": 8457 }, { "epoch": 0.8785706866105745, "grad_norm": 0.4335370659828186, "learning_rate": 3.5942939638602067e-06, "loss": 1.5987, "step": 8458 }, { "epoch": 0.8786745611301547, "grad_norm": 0.4267464280128479, "learning_rate": 3.588221843540168e-06, "loss": 1.7094, "step": 8459 }, { "epoch": 0.8787784356497351, "grad_norm": 0.4027010500431061, "learning_rate": 3.582154665712473e-06, "loss": 1.6371, "step": 8460 }, { "epoch": 0.8788823101693155, "grad_norm": 0.4093351662158966, "learning_rate": 3.5760924310232258e-06, "loss": 1.72, "step": 8461 }, { "epoch": 0.8789861846888958, "grad_norm": 0.4228046238422394, "learning_rate": 3.5700351401180333e-06, "loss": 1.769, "step": 8462 }, { "epoch": 0.8790900592084762, "grad_norm": 0.4321579337120056, "learning_rate": 3.5639827936419123e-06, "loss": 1.7098, "step": 8463 }, { "epoch": 0.8791939337280565, "grad_norm": 0.45898208022117615, "learning_rate": 3.5579353922394197e-06, "loss": 1.7701, "step": 8464 }, { "epoch": 0.8792978082476368, "grad_norm": 0.44818854331970215, "learning_rate": 3.5518929365545406e-06, "loss": 1.7982, "step": 8465 }, { "epoch": 0.8794016827672172, "grad_norm": 0.3915591835975647, "learning_rate": 3.5458554272307597e-06, "loss": 1.6386, "step": 8466 }, { "epoch": 0.8795055572867976, "grad_norm": 0.4124680459499359, "learning_rate": 3.5398228649110186e-06, "loss": 1.6979, "step": 8467 }, { "epoch": 0.8796094318063779, "grad_norm": 0.4365387558937073, "learning_rate": 3.533795250237737e-06, "loss": 1.4209, "step": 8468 }, { "epoch": 0.8797133063259582, "grad_norm": 0.4055129885673523, "learning_rate": 3.527772583852812e-06, "loss": 1.4331, "step": 8469 }, { "epoch": 0.8798171808455386, "grad_norm": 0.4493137001991272, "learning_rate": 3.521754866397603e-06, "loss": 1.8114, "step": 8470 }, { "epoch": 0.8799210553651189, "grad_norm": 0.40242356061935425, "learning_rate": 3.5157420985129696e-06, "loss": 1.5685, "step": 8471 }, { "epoch": 0.8800249298846993, "grad_norm": 0.396705687046051, "learning_rate": 3.5097342808392054e-06, "loss": 1.4964, "step": 8472 }, { "epoch": 0.8801288044042797, "grad_norm": 0.4387626647949219, "learning_rate": 3.5037314140160926e-06, "loss": 1.4608, "step": 8473 }, { "epoch": 0.8802326789238599, "grad_norm": 0.47153568267822266, "learning_rate": 3.4977334986829145e-06, "loss": 1.6019, "step": 8474 }, { "epoch": 0.8803365534434403, "grad_norm": 0.46217477321624756, "learning_rate": 3.4917405354783772e-06, "loss": 1.7727, "step": 8475 }, { "epoch": 0.8804404279630207, "grad_norm": 0.48487722873687744, "learning_rate": 3.4857525250406976e-06, "loss": 1.7575, "step": 8476 }, { "epoch": 0.8805443024826011, "grad_norm": 0.48644381761550903, "learning_rate": 3.479769468007543e-06, "loss": 1.6869, "step": 8477 }, { "epoch": 0.8806481770021813, "grad_norm": 0.41009244322776794, "learning_rate": 3.4737913650160826e-06, "loss": 1.446, "step": 8478 }, { "epoch": 0.8807520515217617, "grad_norm": 0.4005885720252991, "learning_rate": 3.467818216702917e-06, "loss": 1.5894, "step": 8479 }, { "epoch": 0.8808559260413421, "grad_norm": 0.4276701509952545, "learning_rate": 3.461850023704133e-06, "loss": 1.5221, "step": 8480 }, { "epoch": 0.8809598005609224, "grad_norm": 0.42804211378097534, "learning_rate": 3.4558867866553324e-06, "loss": 1.7771, "step": 8481 }, { "epoch": 0.8810636750805028, "grad_norm": 0.42332831025123596, "learning_rate": 3.4499285061915134e-06, "loss": 1.6525, "step": 8482 }, { "epoch": 0.8811675496000831, "grad_norm": 0.4782041311264038, "learning_rate": 3.4439751829472123e-06, "loss": 1.808, "step": 8483 }, { "epoch": 0.8812714241196634, "grad_norm": 0.37131935358047485, "learning_rate": 3.4380268175564e-06, "loss": 1.4988, "step": 8484 }, { "epoch": 0.8813752986392438, "grad_norm": 0.46065279841423035, "learning_rate": 3.4320834106525367e-06, "loss": 1.7904, "step": 8485 }, { "epoch": 0.8814791731588242, "grad_norm": 0.41007938981056213, "learning_rate": 3.4261449628685492e-06, "loss": 1.4656, "step": 8486 }, { "epoch": 0.8815830476784045, "grad_norm": 0.40511950850486755, "learning_rate": 3.4202114748368264e-06, "loss": 1.6166, "step": 8487 }, { "epoch": 0.8816869221979848, "grad_norm": 0.42590075731277466, "learning_rate": 3.414282947189251e-06, "loss": 1.681, "step": 8488 }, { "epoch": 0.8817907967175652, "grad_norm": 0.43543121218681335, "learning_rate": 3.4083593805571523e-06, "loss": 1.6805, "step": 8489 }, { "epoch": 0.8818946712371455, "grad_norm": 0.4141135513782501, "learning_rate": 3.402440775571364e-06, "loss": 1.7227, "step": 8490 }, { "epoch": 0.8819985457567259, "grad_norm": 0.4232790470123291, "learning_rate": 3.3965271328621374e-06, "loss": 1.792, "step": 8491 }, { "epoch": 0.8821024202763063, "grad_norm": 0.40409740805625916, "learning_rate": 3.390618453059263e-06, "loss": 1.589, "step": 8492 }, { "epoch": 0.8822062947958865, "grad_norm": 0.418250173330307, "learning_rate": 3.384714736791955e-06, "loss": 1.6795, "step": 8493 }, { "epoch": 0.8823101693154669, "grad_norm": 0.42361485958099365, "learning_rate": 3.3788159846889157e-06, "loss": 1.6938, "step": 8494 }, { "epoch": 0.8824140438350473, "grad_norm": 0.4593660831451416, "learning_rate": 3.3729221973783144e-06, "loss": 1.6643, "step": 8495 }, { "epoch": 0.8825179183546276, "grad_norm": 0.4981370270252228, "learning_rate": 3.3670333754877937e-06, "loss": 1.8358, "step": 8496 }, { "epoch": 0.8826217928742079, "grad_norm": 0.39285340905189514, "learning_rate": 3.3611495196444685e-06, "loss": 1.5936, "step": 8497 }, { "epoch": 0.8827256673937883, "grad_norm": 0.41241446137428284, "learning_rate": 3.355270630474916e-06, "loss": 1.6163, "step": 8498 }, { "epoch": 0.8828295419133686, "grad_norm": 0.4433367848396301, "learning_rate": 3.3493967086052126e-06, "loss": 1.5595, "step": 8499 }, { "epoch": 0.882933416432949, "grad_norm": 0.4431458115577698, "learning_rate": 3.343527754660858e-06, "loss": 1.5006, "step": 8500 }, { "epoch": 0.8830372909525294, "grad_norm": 0.41516363620758057, "learning_rate": 3.3376637692668632e-06, "loss": 1.6199, "step": 8501 }, { "epoch": 0.8831411654721097, "grad_norm": 0.4274922311306, "learning_rate": 3.3318047530477013e-06, "loss": 1.7884, "step": 8502 }, { "epoch": 0.88324503999169, "grad_norm": 0.3764563798904419, "learning_rate": 3.3259507066273064e-06, "loss": 1.4395, "step": 8503 }, { "epoch": 0.8833489145112704, "grad_norm": 0.4132494032382965, "learning_rate": 3.320101630629091e-06, "loss": 1.7359, "step": 8504 }, { "epoch": 0.8834527890308508, "grad_norm": 0.42872190475463867, "learning_rate": 3.3142575256759346e-06, "loss": 1.6038, "step": 8505 }, { "epoch": 0.883556663550431, "grad_norm": 0.41533058881759644, "learning_rate": 3.3084183923901834e-06, "loss": 1.6193, "step": 8506 }, { "epoch": 0.8836605380700114, "grad_norm": 0.39843878149986267, "learning_rate": 3.302584231393674e-06, "loss": 1.636, "step": 8507 }, { "epoch": 0.8837644125895918, "grad_norm": 0.4171253740787506, "learning_rate": 3.2967550433076754e-06, "loss": 1.6229, "step": 8508 }, { "epoch": 0.8838682871091721, "grad_norm": 0.4044714570045471, "learning_rate": 3.290930828752986e-06, "loss": 1.639, "step": 8509 }, { "epoch": 0.8839721616287525, "grad_norm": 0.401554673910141, "learning_rate": 3.2851115883497986e-06, "loss": 1.6301, "step": 8510 }, { "epoch": 0.8840760361483329, "grad_norm": 0.40703094005584717, "learning_rate": 3.27929732271785e-06, "loss": 1.5388, "step": 8511 }, { "epoch": 0.8841799106679131, "grad_norm": 0.4279320538043976, "learning_rate": 3.2734880324762962e-06, "loss": 1.7176, "step": 8512 }, { "epoch": 0.8842837851874935, "grad_norm": 0.5383460521697998, "learning_rate": 3.267683718243786e-06, "loss": 1.8459, "step": 8513 }, { "epoch": 0.8843876597070739, "grad_norm": 0.4355732500553131, "learning_rate": 3.261884380638436e-06, "loss": 1.7073, "step": 8514 }, { "epoch": 0.8844915342266542, "grad_norm": 0.4631870687007904, "learning_rate": 3.25609002027783e-06, "loss": 1.7029, "step": 8515 }, { "epoch": 0.8845954087462345, "grad_norm": 0.3964858651161194, "learning_rate": 3.250300637779019e-06, "loss": 1.5025, "step": 8516 }, { "epoch": 0.8846992832658149, "grad_norm": 0.4206657409667969, "learning_rate": 3.2445162337585154e-06, "loss": 1.8947, "step": 8517 }, { "epoch": 0.8848031577853952, "grad_norm": 0.5454657077789307, "learning_rate": 3.238736808832349e-06, "loss": 1.9812, "step": 8518 }, { "epoch": 0.8849070323049756, "grad_norm": 0.4348321557044983, "learning_rate": 3.2329623636159435e-06, "loss": 1.7276, "step": 8519 }, { "epoch": 0.885010906824556, "grad_norm": 0.46917733550071716, "learning_rate": 3.2271928987242518e-06, "loss": 1.6525, "step": 8520 }, { "epoch": 0.8851147813441362, "grad_norm": 0.45450276136398315, "learning_rate": 3.2214284147716823e-06, "loss": 1.7034, "step": 8521 }, { "epoch": 0.8852186558637166, "grad_norm": 0.4001985490322113, "learning_rate": 3.215668912372094e-06, "loss": 1.5724, "step": 8522 }, { "epoch": 0.885322530383297, "grad_norm": 0.41575512290000916, "learning_rate": 3.209914392138835e-06, "loss": 1.5867, "step": 8523 }, { "epoch": 0.8854264049028773, "grad_norm": 0.42568308115005493, "learning_rate": 3.2041648546847215e-06, "loss": 1.6334, "step": 8524 }, { "epoch": 0.8855302794224577, "grad_norm": 0.4289058744907379, "learning_rate": 3.198420300622024e-06, "loss": 1.6318, "step": 8525 }, { "epoch": 0.885634153942038, "grad_norm": 0.39888161420822144, "learning_rate": 3.192680730562492e-06, "loss": 1.5733, "step": 8526 }, { "epoch": 0.8857380284616183, "grad_norm": 0.42361804842948914, "learning_rate": 3.18694614511737e-06, "loss": 1.7382, "step": 8527 }, { "epoch": 0.8858419029811987, "grad_norm": 0.4657709300518036, "learning_rate": 3.181216544897314e-06, "loss": 1.7749, "step": 8528 }, { "epoch": 0.8859457775007791, "grad_norm": 0.4325384199619293, "learning_rate": 3.175491930512492e-06, "loss": 1.8558, "step": 8529 }, { "epoch": 0.8860496520203595, "grad_norm": 0.4005754292011261, "learning_rate": 3.1697723025725378e-06, "loss": 1.5591, "step": 8530 }, { "epoch": 0.8861535265399397, "grad_norm": 0.4035787880420685, "learning_rate": 3.164057661686548e-06, "loss": 1.6387, "step": 8531 }, { "epoch": 0.8862574010595201, "grad_norm": 0.41289085149765015, "learning_rate": 3.15834800846308e-06, "loss": 1.6326, "step": 8532 }, { "epoch": 0.8863612755791005, "grad_norm": 0.4399457573890686, "learning_rate": 3.15264334351017e-06, "loss": 1.6648, "step": 8533 }, { "epoch": 0.8864651500986808, "grad_norm": 0.45032960176467896, "learning_rate": 3.146943667435326e-06, "loss": 1.8897, "step": 8534 }, { "epoch": 0.8865690246182611, "grad_norm": 0.4220801293849945, "learning_rate": 3.141248980845507e-06, "loss": 1.6006, "step": 8535 }, { "epoch": 0.8866728991378415, "grad_norm": 0.4171903133392334, "learning_rate": 3.135559284347156e-06, "loss": 1.4317, "step": 8536 }, { "epoch": 0.8867767736574218, "grad_norm": 0.4253293573856354, "learning_rate": 3.1298745785461937e-06, "loss": 1.5809, "step": 8537 }, { "epoch": 0.8868806481770022, "grad_norm": 0.4167105257511139, "learning_rate": 3.1241948640479803e-06, "loss": 1.6632, "step": 8538 }, { "epoch": 0.8869845226965826, "grad_norm": 0.4174095690250397, "learning_rate": 3.1185201414573707e-06, "loss": 1.5574, "step": 8539 }, { "epoch": 0.8870883972161628, "grad_norm": 0.43485039472579956, "learning_rate": 3.112850411378676e-06, "loss": 1.6832, "step": 8540 }, { "epoch": 0.8871922717357432, "grad_norm": 0.4099987745285034, "learning_rate": 3.10718567441568e-06, "loss": 1.6025, "step": 8541 }, { "epoch": 0.8872961462553236, "grad_norm": 0.4386424720287323, "learning_rate": 3.101525931171634e-06, "loss": 1.6887, "step": 8542 }, { "epoch": 0.8874000207749039, "grad_norm": 0.3977483808994293, "learning_rate": 3.095871182249255e-06, "loss": 1.4197, "step": 8543 }, { "epoch": 0.8875038952944843, "grad_norm": 0.42674514651298523, "learning_rate": 3.0902214282507225e-06, "loss": 1.655, "step": 8544 }, { "epoch": 0.8876077698140646, "grad_norm": 0.4276520609855652, "learning_rate": 3.0845766697776945e-06, "loss": 1.6276, "step": 8545 }, { "epoch": 0.8877116443336449, "grad_norm": 0.410883367061615, "learning_rate": 3.078936907431312e-06, "loss": 1.7068, "step": 8546 }, { "epoch": 0.8878155188532253, "grad_norm": 0.4068666696548462, "learning_rate": 3.0733021418121342e-06, "loss": 1.5961, "step": 8547 }, { "epoch": 0.8879193933728057, "grad_norm": 0.4384441375732422, "learning_rate": 3.0676723735202416e-06, "loss": 1.735, "step": 8548 }, { "epoch": 0.888023267892386, "grad_norm": 0.41591498255729675, "learning_rate": 3.0620476031551604e-06, "loss": 1.7273, "step": 8549 }, { "epoch": 0.8881271424119663, "grad_norm": 0.4390021860599518, "learning_rate": 3.056427831315878e-06, "loss": 1.6152, "step": 8550 }, { "epoch": 0.8882310169315467, "grad_norm": 0.43032291531562805, "learning_rate": 3.0508130586008545e-06, "loss": 1.6926, "step": 8551 }, { "epoch": 0.888334891451127, "grad_norm": 0.43168923258781433, "learning_rate": 3.0452032856080227e-06, "loss": 1.6707, "step": 8552 }, { "epoch": 0.8884387659707074, "grad_norm": 0.42715707421302795, "learning_rate": 3.039598512934783e-06, "loss": 1.6203, "step": 8553 }, { "epoch": 0.8885426404902877, "grad_norm": 0.45927006006240845, "learning_rate": 3.0339987411779846e-06, "loss": 1.7286, "step": 8554 }, { "epoch": 0.8886465150098681, "grad_norm": 0.3992420434951782, "learning_rate": 3.0284039709339795e-06, "loss": 1.5416, "step": 8555 }, { "epoch": 0.8887503895294484, "grad_norm": 0.4219547212123871, "learning_rate": 3.0228142027985684e-06, "loss": 1.6695, "step": 8556 }, { "epoch": 0.8888542640490288, "grad_norm": 0.41342371702194214, "learning_rate": 3.017229437366992e-06, "loss": 1.5937, "step": 8557 }, { "epoch": 0.8889581385686092, "grad_norm": 0.3998035788536072, "learning_rate": 3.0116496752340127e-06, "loss": 1.6053, "step": 8558 }, { "epoch": 0.8890620130881894, "grad_norm": 0.38309964537620544, "learning_rate": 3.0060749169938174e-06, "loss": 1.491, "step": 8559 }, { "epoch": 0.8891658876077698, "grad_norm": 0.43797820806503296, "learning_rate": 3.000505163240075e-06, "loss": 1.775, "step": 8560 }, { "epoch": 0.8892697621273502, "grad_norm": 0.39977407455444336, "learning_rate": 2.994940414565922e-06, "loss": 1.5729, "step": 8561 }, { "epoch": 0.8893736366469305, "grad_norm": 0.410889208316803, "learning_rate": 2.9893806715639626e-06, "loss": 1.5631, "step": 8562 }, { "epoch": 0.8894775111665109, "grad_norm": 0.41645950078964233, "learning_rate": 2.9838259348262675e-06, "loss": 1.599, "step": 8563 }, { "epoch": 0.8895813856860912, "grad_norm": 0.43443527817726135, "learning_rate": 2.978276204944358e-06, "loss": 1.6522, "step": 8564 }, { "epoch": 0.8896852602056715, "grad_norm": 0.4481103718280792, "learning_rate": 2.9727314825092667e-06, "loss": 1.7648, "step": 8565 }, { "epoch": 0.8897891347252519, "grad_norm": 0.427636057138443, "learning_rate": 2.967191768111427e-06, "loss": 1.6324, "step": 8566 }, { "epoch": 0.8898930092448323, "grad_norm": 0.44498735666275024, "learning_rate": 2.9616570623408057e-06, "loss": 1.8669, "step": 8567 }, { "epoch": 0.8899968837644125, "grad_norm": 0.4394892156124115, "learning_rate": 2.956127365786793e-06, "loss": 1.6186, "step": 8568 }, { "epoch": 0.8901007582839929, "grad_norm": 0.4512022137641907, "learning_rate": 2.950602679038261e-06, "loss": 1.8238, "step": 8569 }, { "epoch": 0.8902046328035733, "grad_norm": 0.5062389969825745, "learning_rate": 2.9450830026835407e-06, "loss": 1.7467, "step": 8570 }, { "epoch": 0.8903085073231536, "grad_norm": 0.46624040603637695, "learning_rate": 2.939568337310439e-06, "loss": 1.931, "step": 8571 }, { "epoch": 0.890412381842734, "grad_norm": 0.41087907552719116, "learning_rate": 2.9340586835062302e-06, "loss": 1.6322, "step": 8572 }, { "epoch": 0.8905162563623144, "grad_norm": 0.39506015181541443, "learning_rate": 2.9285540418576283e-06, "loss": 1.5128, "step": 8573 }, { "epoch": 0.8906201308818946, "grad_norm": 0.3968523144721985, "learning_rate": 2.9230544129508655e-06, "loss": 1.5671, "step": 8574 }, { "epoch": 0.890724005401475, "grad_norm": 0.4123672544956207, "learning_rate": 2.9175597973715828e-06, "loss": 1.4673, "step": 8575 }, { "epoch": 0.8908278799210554, "grad_norm": 0.4277864098548889, "learning_rate": 2.9120701957049246e-06, "loss": 1.6937, "step": 8576 }, { "epoch": 0.8909317544406357, "grad_norm": 0.4264744222164154, "learning_rate": 2.906585608535495e-06, "loss": 1.5243, "step": 8577 }, { "epoch": 0.891035628960216, "grad_norm": 0.45255225896835327, "learning_rate": 2.9011060364473595e-06, "loss": 1.5281, "step": 8578 }, { "epoch": 0.8911395034797964, "grad_norm": 0.42393550276756287, "learning_rate": 2.8956314800240415e-06, "loss": 1.7556, "step": 8579 }, { "epoch": 0.8912433779993768, "grad_norm": 0.4410860240459442, "learning_rate": 2.8901619398485347e-06, "loss": 1.7168, "step": 8580 }, { "epoch": 0.8913472525189571, "grad_norm": 0.43080854415893555, "learning_rate": 2.8846974165033235e-06, "loss": 1.5863, "step": 8581 }, { "epoch": 0.8914511270385375, "grad_norm": 0.3852452337741852, "learning_rate": 2.8792379105703094e-06, "loss": 1.5794, "step": 8582 }, { "epoch": 0.8915550015581178, "grad_norm": 0.43257638812065125, "learning_rate": 2.8737834226309156e-06, "loss": 1.7181, "step": 8583 }, { "epoch": 0.8916588760776981, "grad_norm": 0.42675426602363586, "learning_rate": 2.868333953265989e-06, "loss": 1.6535, "step": 8584 }, { "epoch": 0.8917627505972785, "grad_norm": 0.40293970704078674, "learning_rate": 2.8628895030558435e-06, "loss": 1.6133, "step": 8585 }, { "epoch": 0.8918666251168589, "grad_norm": 0.4669196903705597, "learning_rate": 2.8574500725802865e-06, "loss": 1.7044, "step": 8586 }, { "epoch": 0.8919704996364392, "grad_norm": 0.43928948044776917, "learning_rate": 2.8520156624185777e-06, "loss": 1.6096, "step": 8587 }, { "epoch": 0.8920743741560195, "grad_norm": 0.4221644401550293, "learning_rate": 2.8465862731494264e-06, "loss": 1.7143, "step": 8588 }, { "epoch": 0.8921782486755999, "grad_norm": 0.4293653070926666, "learning_rate": 2.841161905351031e-06, "loss": 1.5406, "step": 8589 }, { "epoch": 0.8922821231951802, "grad_norm": 0.45512861013412476, "learning_rate": 2.8357425596010344e-06, "loss": 1.7877, "step": 8590 }, { "epoch": 0.8923859977147606, "grad_norm": 0.4328274726867676, "learning_rate": 2.830328236476559e-06, "loss": 2.0108, "step": 8591 }, { "epoch": 0.892489872234341, "grad_norm": 0.4451085925102234, "learning_rate": 2.8249189365541886e-06, "loss": 1.731, "step": 8592 }, { "epoch": 0.8925937467539212, "grad_norm": 0.4539354741573334, "learning_rate": 2.819514660409983e-06, "loss": 1.7513, "step": 8593 }, { "epoch": 0.8926976212735016, "grad_norm": 0.4132786691188812, "learning_rate": 2.814115408619428e-06, "loss": 1.5909, "step": 8594 }, { "epoch": 0.892801495793082, "grad_norm": 0.3973241448402405, "learning_rate": 2.808721181757523e-06, "loss": 1.7004, "step": 8595 }, { "epoch": 0.8929053703126623, "grad_norm": 0.4304153025150299, "learning_rate": 2.80333198039871e-06, "loss": 1.6818, "step": 8596 }, { "epoch": 0.8930092448322426, "grad_norm": 0.42142942547798157, "learning_rate": 2.7979478051168906e-06, "loss": 1.7153, "step": 8597 }, { "epoch": 0.893113119351823, "grad_norm": 0.4120078384876251, "learning_rate": 2.7925686564854447e-06, "loss": 1.6648, "step": 8598 }, { "epoch": 0.8932169938714033, "grad_norm": 0.40456852316856384, "learning_rate": 2.7871945350771977e-06, "loss": 1.6177, "step": 8599 }, { "epoch": 0.8933208683909837, "grad_norm": 0.417198121547699, "learning_rate": 2.781825441464464e-06, "loss": 1.6729, "step": 8600 }, { "epoch": 0.8934247429105641, "grad_norm": 0.4824064075946808, "learning_rate": 2.776461376218997e-06, "loss": 1.7711, "step": 8601 }, { "epoch": 0.8935286174301443, "grad_norm": 0.44903162121772766, "learning_rate": 2.7711023399120463e-06, "loss": 1.532, "step": 8602 }, { "epoch": 0.8936324919497247, "grad_norm": 0.4706164002418518, "learning_rate": 2.7657483331142875e-06, "loss": 1.7887, "step": 8603 }, { "epoch": 0.8937363664693051, "grad_norm": 0.4258130192756653, "learning_rate": 2.760399356395893e-06, "loss": 1.7112, "step": 8604 }, { "epoch": 0.8938402409888855, "grad_norm": 0.4342074692249298, "learning_rate": 2.7550554103264845e-06, "loss": 1.6769, "step": 8605 }, { "epoch": 0.8939441155084658, "grad_norm": 0.43605124950408936, "learning_rate": 2.749716495475152e-06, "loss": 1.595, "step": 8606 }, { "epoch": 0.8940479900280461, "grad_norm": 0.4049820899963379, "learning_rate": 2.7443826124104454e-06, "loss": 1.5847, "step": 8607 }, { "epoch": 0.8941518645476265, "grad_norm": 0.41089296340942383, "learning_rate": 2.7390537617003777e-06, "loss": 1.4398, "step": 8608 }, { "epoch": 0.8942557390672068, "grad_norm": 0.4379929006099701, "learning_rate": 2.73372994391245e-06, "loss": 1.7582, "step": 8609 }, { "epoch": 0.8943596135867872, "grad_norm": 0.44860774278640747, "learning_rate": 2.72841115961357e-06, "loss": 1.7001, "step": 8610 }, { "epoch": 0.8944634881063676, "grad_norm": 0.39442136883735657, "learning_rate": 2.7230974093701844e-06, "loss": 1.5964, "step": 8611 }, { "epoch": 0.8945673626259478, "grad_norm": 0.4311879873275757, "learning_rate": 2.7177886937481524e-06, "loss": 1.7961, "step": 8612 }, { "epoch": 0.8946712371455282, "grad_norm": 0.40751856565475464, "learning_rate": 2.7124850133127987e-06, "loss": 1.4809, "step": 8613 }, { "epoch": 0.8947751116651086, "grad_norm": 0.3862675130367279, "learning_rate": 2.707186368628939e-06, "loss": 1.556, "step": 8614 }, { "epoch": 0.8948789861846889, "grad_norm": 0.43987709283828735, "learning_rate": 2.7018927602608323e-06, "loss": 1.4624, "step": 8615 }, { "epoch": 0.8949828607042692, "grad_norm": 0.5344493985176086, "learning_rate": 2.6966041887722117e-06, "loss": 1.8081, "step": 8616 }, { "epoch": 0.8950867352238496, "grad_norm": 0.43068018555641174, "learning_rate": 2.6913206547262593e-06, "loss": 1.7128, "step": 8617 }, { "epoch": 0.8951906097434299, "grad_norm": 0.45353829860687256, "learning_rate": 2.6860421586856368e-06, "loss": 1.7452, "step": 8618 }, { "epoch": 0.8952944842630103, "grad_norm": 0.4327743947505951, "learning_rate": 2.6807687012124605e-06, "loss": 1.773, "step": 8619 }, { "epoch": 0.8953983587825907, "grad_norm": 0.4295056462287903, "learning_rate": 2.6755002828683096e-06, "loss": 1.6954, "step": 8620 }, { "epoch": 0.8955022333021709, "grad_norm": 0.4431517720222473, "learning_rate": 2.6702369042142463e-06, "loss": 1.7723, "step": 8621 }, { "epoch": 0.8956061078217513, "grad_norm": 0.4079246520996094, "learning_rate": 2.6649785658107494e-06, "loss": 1.6812, "step": 8622 }, { "epoch": 0.8957099823413317, "grad_norm": 0.427082896232605, "learning_rate": 2.6597252682178165e-06, "loss": 1.772, "step": 8623 }, { "epoch": 0.895813856860912, "grad_norm": 0.4852583110332489, "learning_rate": 2.6544770119948714e-06, "loss": 1.6485, "step": 8624 }, { "epoch": 0.8959177313804924, "grad_norm": 0.4270947575569153, "learning_rate": 2.6492337977008173e-06, "loss": 1.7824, "step": 8625 }, { "epoch": 0.8960216059000727, "grad_norm": 0.4104156792163849, "learning_rate": 2.6439956258940193e-06, "loss": 1.5354, "step": 8626 }, { "epoch": 0.896125480419653, "grad_norm": 0.4451044797897339, "learning_rate": 2.638762497132291e-06, "loss": 1.6791, "step": 8627 }, { "epoch": 0.8962293549392334, "grad_norm": 0.44924893975257874, "learning_rate": 2.633534411972921e-06, "loss": 1.7615, "step": 8628 }, { "epoch": 0.8963332294588138, "grad_norm": 0.40664803981781006, "learning_rate": 2.628311370972664e-06, "loss": 1.535, "step": 8629 }, { "epoch": 0.896437103978394, "grad_norm": 0.43392449617385864, "learning_rate": 2.6230933746877406e-06, "loss": 1.6903, "step": 8630 }, { "epoch": 0.8965409784979744, "grad_norm": 0.38720911741256714, "learning_rate": 2.6178804236738073e-06, "loss": 1.5899, "step": 8631 }, { "epoch": 0.8966448530175548, "grad_norm": 0.49395009875297546, "learning_rate": 2.61267251848602e-06, "loss": 1.8366, "step": 8632 }, { "epoch": 0.8967487275371352, "grad_norm": 0.41654813289642334, "learning_rate": 2.6074696596789793e-06, "loss": 1.6235, "step": 8633 }, { "epoch": 0.8968526020567155, "grad_norm": 0.4147179424762726, "learning_rate": 2.6022718478067363e-06, "loss": 1.5863, "step": 8634 }, { "epoch": 0.8969564765762958, "grad_norm": 0.46183696389198303, "learning_rate": 2.597079083422832e-06, "loss": 1.7443, "step": 8635 }, { "epoch": 0.8970603510958762, "grad_norm": 0.4648350179195404, "learning_rate": 2.5918913670802346e-06, "loss": 1.7862, "step": 8636 }, { "epoch": 0.8971642256154565, "grad_norm": 0.44168201088905334, "learning_rate": 2.5867086993314305e-06, "loss": 1.7847, "step": 8637 }, { "epoch": 0.8972681001350369, "grad_norm": 0.40692418813705444, "learning_rate": 2.5815310807282944e-06, "loss": 1.6616, "step": 8638 }, { "epoch": 0.8973719746546173, "grad_norm": 0.4546252489089966, "learning_rate": 2.5763585118222246e-06, "loss": 1.8507, "step": 8639 }, { "epoch": 0.8974758491741975, "grad_norm": 0.5330055356025696, "learning_rate": 2.5711909931640634e-06, "loss": 1.9273, "step": 8640 }, { "epoch": 0.8975797236937779, "grad_norm": 0.4015289545059204, "learning_rate": 2.566028525304087e-06, "loss": 1.6388, "step": 8641 }, { "epoch": 0.8976835982133583, "grad_norm": 0.4576239585876465, "learning_rate": 2.5608711087920844e-06, "loss": 1.7742, "step": 8642 }, { "epoch": 0.8977874727329386, "grad_norm": 0.42561182379722595, "learning_rate": 2.5557187441772655e-06, "loss": 1.5785, "step": 8643 }, { "epoch": 0.897891347252519, "grad_norm": 0.43728065490722656, "learning_rate": 2.550571432008325e-06, "loss": 1.6983, "step": 8644 }, { "epoch": 0.8979952217720993, "grad_norm": 0.4214796721935272, "learning_rate": 2.5454291728334022e-06, "loss": 1.7185, "step": 8645 }, { "epoch": 0.8980990962916796, "grad_norm": 0.38102787733078003, "learning_rate": 2.5402919672001147e-06, "loss": 1.4215, "step": 8646 }, { "epoch": 0.89820297081126, "grad_norm": 0.4356735050678253, "learning_rate": 2.5351598156555356e-06, "loss": 1.5594, "step": 8647 }, { "epoch": 0.8983068453308404, "grad_norm": 0.4779966473579407, "learning_rate": 2.5300327187461837e-06, "loss": 1.7134, "step": 8648 }, { "epoch": 0.8984107198504206, "grad_norm": 0.4045184850692749, "learning_rate": 2.524910677018083e-06, "loss": 1.5643, "step": 8649 }, { "epoch": 0.898514594370001, "grad_norm": 0.4227657914161682, "learning_rate": 2.519793691016664e-06, "loss": 1.6597, "step": 8650 }, { "epoch": 0.8986184688895814, "grad_norm": 0.40350401401519775, "learning_rate": 2.5146817612868634e-06, "loss": 1.5878, "step": 8651 }, { "epoch": 0.8987223434091617, "grad_norm": 0.42735230922698975, "learning_rate": 2.5095748883730562e-06, "loss": 1.6392, "step": 8652 }, { "epoch": 0.8988262179287421, "grad_norm": 0.4346289038658142, "learning_rate": 2.50447307281908e-06, "loss": 1.6582, "step": 8653 }, { "epoch": 0.8989300924483224, "grad_norm": 0.4567740559577942, "learning_rate": 2.4993763151682448e-06, "loss": 1.6875, "step": 8654 }, { "epoch": 0.8990339669679027, "grad_norm": 0.4155108332633972, "learning_rate": 2.4942846159633104e-06, "loss": 1.6108, "step": 8655 }, { "epoch": 0.8991378414874831, "grad_norm": 0.3986811935901642, "learning_rate": 2.4891979757465102e-06, "loss": 1.6022, "step": 8656 }, { "epoch": 0.8992417160070635, "grad_norm": 0.39959198236465454, "learning_rate": 2.484116395059516e-06, "loss": 1.6271, "step": 8657 }, { "epoch": 0.8993455905266439, "grad_norm": 0.3958359956741333, "learning_rate": 2.479039874443495e-06, "loss": 1.4597, "step": 8658 }, { "epoch": 0.8994494650462241, "grad_norm": 0.40250423550605774, "learning_rate": 2.473968414439054e-06, "loss": 1.5915, "step": 8659 }, { "epoch": 0.8995533395658045, "grad_norm": 0.4547244608402252, "learning_rate": 2.468902015586255e-06, "loss": 1.7938, "step": 8660 }, { "epoch": 0.8996572140853849, "grad_norm": 0.40203481912612915, "learning_rate": 2.4638406784246337e-06, "loss": 1.5711, "step": 8661 }, { "epoch": 0.8997610886049652, "grad_norm": 0.4178847074508667, "learning_rate": 2.458784403493186e-06, "loss": 1.5101, "step": 8662 }, { "epoch": 0.8998649631245456, "grad_norm": 0.42161715030670166, "learning_rate": 2.453733191330365e-06, "loss": 1.706, "step": 8663 }, { "epoch": 0.8999688376441259, "grad_norm": 0.4573306739330292, "learning_rate": 2.448687042474074e-06, "loss": 1.8456, "step": 8664 }, { "epoch": 0.9000727121637062, "grad_norm": 0.3947295844554901, "learning_rate": 2.4436459574617155e-06, "loss": 1.6289, "step": 8665 }, { "epoch": 0.9001765866832866, "grad_norm": 0.3863442540168762, "learning_rate": 2.4386099368300943e-06, "loss": 1.6296, "step": 8666 }, { "epoch": 0.900280461202867, "grad_norm": 0.4450392723083496, "learning_rate": 2.4335789811155253e-06, "loss": 1.7748, "step": 8667 }, { "epoch": 0.9003843357224472, "grad_norm": 0.4072805643081665, "learning_rate": 2.4285530908537747e-06, "loss": 1.6025, "step": 8668 }, { "epoch": 0.9004882102420276, "grad_norm": 0.44600561261177063, "learning_rate": 2.423532266580031e-06, "loss": 1.6433, "step": 8669 }, { "epoch": 0.900592084761608, "grad_norm": 0.4108854830265045, "learning_rate": 2.4185165088289996e-06, "loss": 1.55, "step": 8670 }, { "epoch": 0.9006959592811883, "grad_norm": 0.4555318355560303, "learning_rate": 2.4135058181348036e-06, "loss": 1.8198, "step": 8671 }, { "epoch": 0.9007998338007687, "grad_norm": 0.4197269082069397, "learning_rate": 2.4085001950310546e-06, "loss": 1.7265, "step": 8672 }, { "epoch": 0.900903708320349, "grad_norm": 0.39955320954322815, "learning_rate": 2.403499640050805e-06, "loss": 1.5325, "step": 8673 }, { "epoch": 0.9010075828399293, "grad_norm": 0.4017268717288971, "learning_rate": 2.3985041537265774e-06, "loss": 1.5629, "step": 8674 }, { "epoch": 0.9011114573595097, "grad_norm": 0.4230203926563263, "learning_rate": 2.3935137365903526e-06, "loss": 1.6934, "step": 8675 }, { "epoch": 0.9012153318790901, "grad_norm": 0.43759918212890625, "learning_rate": 2.3885283891735664e-06, "loss": 1.5736, "step": 8676 }, { "epoch": 0.9013192063986704, "grad_norm": 0.49041640758514404, "learning_rate": 2.383548112007128e-06, "loss": 1.6968, "step": 8677 }, { "epoch": 0.9014230809182507, "grad_norm": 0.4059118926525116, "learning_rate": 2.3785729056213845e-06, "loss": 1.6976, "step": 8678 }, { "epoch": 0.9015269554378311, "grad_norm": 0.4104028642177582, "learning_rate": 2.3736027705461737e-06, "loss": 1.581, "step": 8679 }, { "epoch": 0.9016308299574114, "grad_norm": 0.4261881709098816, "learning_rate": 2.368637707310767e-06, "loss": 1.6556, "step": 8680 }, { "epoch": 0.9017347044769918, "grad_norm": 0.43607598543167114, "learning_rate": 2.3636777164439016e-06, "loss": 1.802, "step": 8681 }, { "epoch": 0.9018385789965722, "grad_norm": 0.5014055371284485, "learning_rate": 2.3587227984737835e-06, "loss": 1.8442, "step": 8682 }, { "epoch": 0.9019424535161525, "grad_norm": 0.3931456208229065, "learning_rate": 2.3537729539280684e-06, "loss": 1.5732, "step": 8683 }, { "epoch": 0.9020463280357328, "grad_norm": 0.3907303810119629, "learning_rate": 2.3488281833338844e-06, "loss": 1.4718, "step": 8684 }, { "epoch": 0.9021502025553132, "grad_norm": 0.4133702516555786, "learning_rate": 2.3438884872177992e-06, "loss": 1.5745, "step": 8685 }, { "epoch": 0.9022540770748936, "grad_norm": 0.45526713132858276, "learning_rate": 2.338953866105864e-06, "loss": 1.8402, "step": 8686 }, { "epoch": 0.9023579515944739, "grad_norm": 0.4495735764503479, "learning_rate": 2.3340243205235703e-06, "loss": 1.7328, "step": 8687 }, { "epoch": 0.9024618261140542, "grad_norm": 0.43973442912101746, "learning_rate": 2.3290998509958752e-06, "loss": 1.7364, "step": 8688 }, { "epoch": 0.9025657006336346, "grad_norm": 0.4277052879333496, "learning_rate": 2.324180458047198e-06, "loss": 1.6693, "step": 8689 }, { "epoch": 0.9026695751532149, "grad_norm": 0.4482114315032959, "learning_rate": 2.3192661422014204e-06, "loss": 1.7195, "step": 8690 }, { "epoch": 0.9027734496727953, "grad_norm": 0.44713294506073, "learning_rate": 2.3143569039818737e-06, "loss": 1.5864, "step": 8691 }, { "epoch": 0.9028773241923757, "grad_norm": 0.4385944604873657, "learning_rate": 2.3094527439113446e-06, "loss": 1.7092, "step": 8692 }, { "epoch": 0.9029811987119559, "grad_norm": 0.39296817779541016, "learning_rate": 2.3045536625121155e-06, "loss": 1.5689, "step": 8693 }, { "epoch": 0.9030850732315363, "grad_norm": 0.43056434392929077, "learning_rate": 2.2996596603058695e-06, "loss": 1.7061, "step": 8694 }, { "epoch": 0.9031889477511167, "grad_norm": 0.49173492193222046, "learning_rate": 2.294770737813795e-06, "loss": 1.6226, "step": 8695 }, { "epoch": 0.903292822270697, "grad_norm": 0.40122538805007935, "learning_rate": 2.289886895556531e-06, "loss": 1.5687, "step": 8696 }, { "epoch": 0.9033966967902773, "grad_norm": 0.4927726089954376, "learning_rate": 2.285008134054145e-06, "loss": 1.9438, "step": 8697 }, { "epoch": 0.9035005713098577, "grad_norm": 0.4643442928791046, "learning_rate": 2.2801344538262103e-06, "loss": 1.6155, "step": 8698 }, { "epoch": 0.903604445829438, "grad_norm": 0.4130551517009735, "learning_rate": 2.2752658553917226e-06, "loss": 1.669, "step": 8699 }, { "epoch": 0.9037083203490184, "grad_norm": 0.46958547830581665, "learning_rate": 2.270402339269162e-06, "loss": 1.7943, "step": 8700 }, { "epoch": 0.9038121948685988, "grad_norm": 0.46900489926338196, "learning_rate": 2.2655439059764415e-06, "loss": 1.5809, "step": 8701 }, { "epoch": 0.903916069388179, "grad_norm": 0.42625823616981506, "learning_rate": 2.2606905560309534e-06, "loss": 1.5512, "step": 8702 }, { "epoch": 0.9040199439077594, "grad_norm": 0.38436755537986755, "learning_rate": 2.2558422899495392e-06, "loss": 1.5806, "step": 8703 }, { "epoch": 0.9041238184273398, "grad_norm": 0.4123709201812744, "learning_rate": 2.2509991082484914e-06, "loss": 1.71, "step": 8704 }, { "epoch": 0.9042276929469201, "grad_norm": 0.4161551594734192, "learning_rate": 2.2461610114436038e-06, "loss": 1.5765, "step": 8705 }, { "epoch": 0.9043315674665005, "grad_norm": 0.409616619348526, "learning_rate": 2.241328000050058e-06, "loss": 1.4221, "step": 8706 }, { "epoch": 0.9044354419860808, "grad_norm": 0.4236219525337219, "learning_rate": 2.236500074582554e-06, "loss": 1.6554, "step": 8707 }, { "epoch": 0.9045393165056611, "grad_norm": 0.4036356508731842, "learning_rate": 2.2316772355552185e-06, "loss": 1.4211, "step": 8708 }, { "epoch": 0.9046431910252415, "grad_norm": 0.40732133388519287, "learning_rate": 2.226859483481658e-06, "loss": 1.6462, "step": 8709 }, { "epoch": 0.9047470655448219, "grad_norm": 0.4440615773200989, "learning_rate": 2.222046818874912e-06, "loss": 1.6724, "step": 8710 }, { "epoch": 0.9048509400644023, "grad_norm": 0.39947155117988586, "learning_rate": 2.217239242247493e-06, "loss": 1.5371, "step": 8711 }, { "epoch": 0.9049548145839825, "grad_norm": 0.41746270656585693, "learning_rate": 2.2124367541113856e-06, "loss": 1.7528, "step": 8712 }, { "epoch": 0.9050586891035629, "grad_norm": 0.40026289224624634, "learning_rate": 2.207639354977997e-06, "loss": 1.5377, "step": 8713 }, { "epoch": 0.9051625636231433, "grad_norm": 0.4574800133705139, "learning_rate": 2.2028470453582253e-06, "loss": 1.798, "step": 8714 }, { "epoch": 0.9052664381427236, "grad_norm": 0.4214540123939514, "learning_rate": 2.1980598257624165e-06, "loss": 1.6247, "step": 8715 }, { "epoch": 0.9053703126623039, "grad_norm": 0.4481583535671234, "learning_rate": 2.193277696700363e-06, "loss": 1.8091, "step": 8716 }, { "epoch": 0.9054741871818843, "grad_norm": 0.45233625173568726, "learning_rate": 2.188500658681325e-06, "loss": 1.8274, "step": 8717 }, { "epoch": 0.9055780617014646, "grad_norm": 0.41306886076927185, "learning_rate": 2.183728712214028e-06, "loss": 1.5299, "step": 8718 }, { "epoch": 0.905681936221045, "grad_norm": 0.4653450846672058, "learning_rate": 2.1789618578066386e-06, "loss": 1.7601, "step": 8719 }, { "epoch": 0.9057858107406254, "grad_norm": 0.4584251642227173, "learning_rate": 2.1742000959667885e-06, "loss": 1.624, "step": 8720 }, { "epoch": 0.9058896852602056, "grad_norm": 0.4341670870780945, "learning_rate": 2.169443427201584e-06, "loss": 1.5616, "step": 8721 }, { "epoch": 0.905993559779786, "grad_norm": 0.408926397562027, "learning_rate": 2.164691852017553e-06, "loss": 1.6354, "step": 8722 }, { "epoch": 0.9060974342993664, "grad_norm": 0.41678133606910706, "learning_rate": 2.159945370920713e-06, "loss": 1.7588, "step": 8723 }, { "epoch": 0.9062013088189467, "grad_norm": 0.4230055809020996, "learning_rate": 2.1552039844165316e-06, "loss": 1.6595, "step": 8724 }, { "epoch": 0.906305183338527, "grad_norm": 0.4654471278190613, "learning_rate": 2.1504676930099056e-06, "loss": 1.6695, "step": 8725 }, { "epoch": 0.9064090578581074, "grad_norm": 0.4271183907985687, "learning_rate": 2.145736497205242e-06, "loss": 1.6587, "step": 8726 }, { "epoch": 0.9065129323776877, "grad_norm": 0.4363931119441986, "learning_rate": 2.14101039750636e-06, "loss": 1.6881, "step": 8727 }, { "epoch": 0.9066168068972681, "grad_norm": 0.4049679636955261, "learning_rate": 2.1362893944165573e-06, "loss": 1.6157, "step": 8728 }, { "epoch": 0.9067206814168485, "grad_norm": 0.4094752371311188, "learning_rate": 2.131573488438582e-06, "loss": 1.6042, "step": 8729 }, { "epoch": 0.9068245559364287, "grad_norm": 0.41162753105163574, "learning_rate": 2.1268626800746427e-06, "loss": 1.738, "step": 8730 }, { "epoch": 0.9069284304560091, "grad_norm": 0.4202596843242645, "learning_rate": 2.1221569698264054e-06, "loss": 1.662, "step": 8731 }, { "epoch": 0.9070323049755895, "grad_norm": 0.40823620557785034, "learning_rate": 2.11745635819498e-06, "loss": 1.6081, "step": 8732 }, { "epoch": 0.9071361794951698, "grad_norm": 0.4370567798614502, "learning_rate": 2.1127608456809667e-06, "loss": 1.6897, "step": 8733 }, { "epoch": 0.9072400540147502, "grad_norm": 0.4072186052799225, "learning_rate": 2.108070432784376e-06, "loss": 1.6987, "step": 8734 }, { "epoch": 0.9073439285343305, "grad_norm": 0.4287233352661133, "learning_rate": 2.1033851200047195e-06, "loss": 1.5307, "step": 8735 }, { "epoch": 0.9074478030539109, "grad_norm": 0.40400230884552, "learning_rate": 2.098704907840943e-06, "loss": 1.5036, "step": 8736 }, { "epoch": 0.9075516775734912, "grad_norm": 0.428813636302948, "learning_rate": 2.094029796791441e-06, "loss": 1.691, "step": 8737 }, { "epoch": 0.9076555520930716, "grad_norm": 0.4154002368450165, "learning_rate": 2.0893597873540947e-06, "loss": 1.6238, "step": 8738 }, { "epoch": 0.907759426612652, "grad_norm": 0.42926695942878723, "learning_rate": 2.0846948800262e-06, "loss": 1.765, "step": 8739 }, { "epoch": 0.9078633011322322, "grad_norm": 0.41468799114227295, "learning_rate": 2.080035075304565e-06, "loss": 1.785, "step": 8740 }, { "epoch": 0.9079671756518126, "grad_norm": 0.4041023254394531, "learning_rate": 2.0753803736853872e-06, "loss": 1.7357, "step": 8741 }, { "epoch": 0.908071050171393, "grad_norm": 0.39929163455963135, "learning_rate": 2.070730775664381e-06, "loss": 1.5915, "step": 8742 }, { "epoch": 0.9081749246909733, "grad_norm": 0.3934778571128845, "learning_rate": 2.0660862817366846e-06, "loss": 1.5252, "step": 8743 }, { "epoch": 0.9082787992105537, "grad_norm": 0.4070751368999481, "learning_rate": 2.061446892396901e-06, "loss": 1.6859, "step": 8744 }, { "epoch": 0.908382673730134, "grad_norm": 0.4303978979587555, "learning_rate": 2.0568126081390916e-06, "loss": 1.635, "step": 8745 }, { "epoch": 0.9084865482497143, "grad_norm": 0.43727797269821167, "learning_rate": 2.052183429456761e-06, "loss": 1.7292, "step": 8746 }, { "epoch": 0.9085904227692947, "grad_norm": 0.40043336153030396, "learning_rate": 2.047559356842893e-06, "loss": 1.6206, "step": 8747 }, { "epoch": 0.9086942972888751, "grad_norm": 0.4222732186317444, "learning_rate": 2.042940390789899e-06, "loss": 1.7271, "step": 8748 }, { "epoch": 0.9087981718084553, "grad_norm": 0.4159920811653137, "learning_rate": 2.0383265317896916e-06, "loss": 1.6506, "step": 8749 }, { "epoch": 0.9089020463280357, "grad_norm": 0.4522947669029236, "learning_rate": 2.033717780333577e-06, "loss": 1.6477, "step": 8750 }, { "epoch": 0.9090059208476161, "grad_norm": 0.4436376392841339, "learning_rate": 2.0291141369123746e-06, "loss": 1.6716, "step": 8751 }, { "epoch": 0.9091097953671964, "grad_norm": 0.42394956946372986, "learning_rate": 2.0245156020163413e-06, "loss": 1.5676, "step": 8752 }, { "epoch": 0.9092136698867768, "grad_norm": 0.4075998365879059, "learning_rate": 2.019922176135153e-06, "loss": 1.7053, "step": 8753 }, { "epoch": 0.9093175444063571, "grad_norm": 0.4617473781108856, "learning_rate": 2.0153338597580006e-06, "loss": 1.7169, "step": 8754 }, { "epoch": 0.9094214189259374, "grad_norm": 0.43988341093063354, "learning_rate": 2.0107506533735e-06, "loss": 1.7221, "step": 8755 }, { "epoch": 0.9095252934455178, "grad_norm": 0.3944602906703949, "learning_rate": 2.0061725574697266e-06, "loss": 1.4159, "step": 8756 }, { "epoch": 0.9096291679650982, "grad_norm": 0.4070970416069031, "learning_rate": 2.0015995725342073e-06, "loss": 1.5355, "step": 8757 }, { "epoch": 0.9097330424846785, "grad_norm": 0.4194161891937256, "learning_rate": 1.9970316990539306e-06, "loss": 1.6739, "step": 8758 }, { "epoch": 0.9098369170042588, "grad_norm": 0.4412401020526886, "learning_rate": 1.9924689375153406e-06, "loss": 1.6932, "step": 8759 }, { "epoch": 0.9099407915238392, "grad_norm": 0.483318030834198, "learning_rate": 1.9879112884043315e-06, "loss": 1.7716, "step": 8760 }, { "epoch": 0.9100446660434196, "grad_norm": 0.3971022367477417, "learning_rate": 1.983358752206266e-06, "loss": 1.7038, "step": 8761 }, { "epoch": 0.9101485405629999, "grad_norm": 0.3960052728652954, "learning_rate": 1.9788113294059498e-06, "loss": 1.5655, "step": 8762 }, { "epoch": 0.9102524150825803, "grad_norm": 0.4107067286968231, "learning_rate": 1.974269020487646e-06, "loss": 1.5848, "step": 8763 }, { "epoch": 0.9103562896021606, "grad_norm": 0.42935070395469666, "learning_rate": 1.969731825935073e-06, "loss": 1.5146, "step": 8764 }, { "epoch": 0.9104601641217409, "grad_norm": 0.4849339723587036, "learning_rate": 1.9651997462314166e-06, "loss": 1.7126, "step": 8765 }, { "epoch": 0.9105640386413213, "grad_norm": 0.4564700424671173, "learning_rate": 1.9606727818592963e-06, "loss": 1.8559, "step": 8766 }, { "epoch": 0.9106679131609017, "grad_norm": 0.43314129114151, "learning_rate": 1.9561509333008045e-06, "loss": 1.6292, "step": 8767 }, { "epoch": 0.910771787680482, "grad_norm": 0.460445374250412, "learning_rate": 1.9516342010374888e-06, "loss": 1.7847, "step": 8768 }, { "epoch": 0.9108756622000623, "grad_norm": 0.4221862554550171, "learning_rate": 1.9471225855503262e-06, "loss": 1.6476, "step": 8769 }, { "epoch": 0.9109795367196427, "grad_norm": 0.3845740258693695, "learning_rate": 1.9426160873197874e-06, "loss": 1.5905, "step": 8770 }, { "epoch": 0.911083411239223, "grad_norm": 0.42310813069343567, "learning_rate": 1.938114706825778e-06, "loss": 1.7829, "step": 8771 }, { "epoch": 0.9111872857588034, "grad_norm": 0.42070627212524414, "learning_rate": 1.9336184445476525e-06, "loss": 1.5937, "step": 8772 }, { "epoch": 0.9112911602783837, "grad_norm": 0.5164587497711182, "learning_rate": 1.9291273009642287e-06, "loss": 1.7094, "step": 8773 }, { "epoch": 0.911395034797964, "grad_norm": 0.39574146270751953, "learning_rate": 1.924641276553779e-06, "loss": 1.5736, "step": 8774 }, { "epoch": 0.9114989093175444, "grad_norm": 0.3940182030200958, "learning_rate": 1.9201603717940385e-06, "loss": 1.3872, "step": 8775 }, { "epoch": 0.9116027838371248, "grad_norm": 0.42792201042175293, "learning_rate": 1.9156845871621698e-06, "loss": 1.7079, "step": 8776 }, { "epoch": 0.9117066583567051, "grad_norm": 0.40028658509254456, "learning_rate": 1.91121392313483e-06, "loss": 1.564, "step": 8777 }, { "epoch": 0.9118105328762854, "grad_norm": 0.462466835975647, "learning_rate": 1.9067483801881002e-06, "loss": 1.7931, "step": 8778 }, { "epoch": 0.9119144073958658, "grad_norm": 0.42079779505729675, "learning_rate": 1.9022879587975162e-06, "loss": 1.6593, "step": 8779 }, { "epoch": 0.9120182819154461, "grad_norm": 0.4041844606399536, "learning_rate": 1.8978326594381035e-06, "loss": 1.4404, "step": 8780 }, { "epoch": 0.9121221564350265, "grad_norm": 0.4102264940738678, "learning_rate": 1.8933824825842828e-06, "loss": 1.5394, "step": 8781 }, { "epoch": 0.9122260309546069, "grad_norm": 0.451555073261261, "learning_rate": 1.8889374287099916e-06, "loss": 1.7135, "step": 8782 }, { "epoch": 0.9123299054741871, "grad_norm": 0.399200975894928, "learning_rate": 1.8844974982885843e-06, "loss": 1.4235, "step": 8783 }, { "epoch": 0.9124337799937675, "grad_norm": 0.42155495285987854, "learning_rate": 1.8800626917928776e-06, "loss": 1.5576, "step": 8784 }, { "epoch": 0.9125376545133479, "grad_norm": 0.4104582965373993, "learning_rate": 1.8756330096951435e-06, "loss": 1.6632, "step": 8785 }, { "epoch": 0.9126415290329283, "grad_norm": 0.4192086160182953, "learning_rate": 1.8712084524671047e-06, "loss": 1.6348, "step": 8786 }, { "epoch": 0.9127454035525085, "grad_norm": 0.40471452474594116, "learning_rate": 1.866789020579962e-06, "loss": 1.5868, "step": 8787 }, { "epoch": 0.9128492780720889, "grad_norm": 0.4262443482875824, "learning_rate": 1.862374714504317e-06, "loss": 1.6801, "step": 8788 }, { "epoch": 0.9129531525916693, "grad_norm": 0.4280400574207306, "learning_rate": 1.8579655347102875e-06, "loss": 1.5403, "step": 8789 }, { "epoch": 0.9130570271112496, "grad_norm": 0.4044937193393707, "learning_rate": 1.853561481667404e-06, "loss": 1.5541, "step": 8790 }, { "epoch": 0.91316090163083, "grad_norm": 0.41494637727737427, "learning_rate": 1.8491625558446634e-06, "loss": 1.7474, "step": 8791 }, { "epoch": 0.9132647761504104, "grad_norm": 0.391250878572464, "learning_rate": 1.8447687577105244e-06, "loss": 1.6448, "step": 8792 }, { "epoch": 0.9133686506699906, "grad_norm": 0.4285977780818939, "learning_rate": 1.8403800877328847e-06, "loss": 1.6618, "step": 8793 }, { "epoch": 0.913472525189571, "grad_norm": 0.4157012104988098, "learning_rate": 1.835996546379104e-06, "loss": 1.6608, "step": 8794 }, { "epoch": 0.9135763997091514, "grad_norm": 0.41410088539123535, "learning_rate": 1.8316181341159977e-06, "loss": 1.7398, "step": 8795 }, { "epoch": 0.9136802742287317, "grad_norm": 0.43308156728744507, "learning_rate": 1.8272448514098373e-06, "loss": 1.5286, "step": 8796 }, { "epoch": 0.913784148748312, "grad_norm": 0.39761316776275635, "learning_rate": 1.8228766987263334e-06, "loss": 1.613, "step": 8797 }, { "epoch": 0.9138880232678924, "grad_norm": 0.46875429153442383, "learning_rate": 1.8185136765306643e-06, "loss": 1.6326, "step": 8798 }, { "epoch": 0.9139918977874727, "grad_norm": 0.4058031737804413, "learning_rate": 1.8141557852874634e-06, "loss": 1.4985, "step": 8799 }, { "epoch": 0.9140957723070531, "grad_norm": 0.4153459668159485, "learning_rate": 1.8098030254608044e-06, "loss": 1.6341, "step": 8800 }, { "epoch": 0.9141996468266335, "grad_norm": 0.4445685148239136, "learning_rate": 1.8054553975142273e-06, "loss": 1.8224, "step": 8801 }, { "epoch": 0.9143035213462137, "grad_norm": 0.4452269375324249, "learning_rate": 1.8011129019107176e-06, "loss": 1.7055, "step": 8802 }, { "epoch": 0.9144073958657941, "grad_norm": 0.4562229812145233, "learning_rate": 1.7967755391127161e-06, "loss": 1.828, "step": 8803 }, { "epoch": 0.9145112703853745, "grad_norm": 0.3990088701248169, "learning_rate": 1.7924433095821203e-06, "loss": 1.6182, "step": 8804 }, { "epoch": 0.9146151449049548, "grad_norm": 0.4023396074771881, "learning_rate": 1.788116213780283e-06, "loss": 1.7746, "step": 8805 }, { "epoch": 0.9147190194245352, "grad_norm": 0.42868486046791077, "learning_rate": 1.783794252168003e-06, "loss": 1.7353, "step": 8806 }, { "epoch": 0.9148228939441155, "grad_norm": 0.43141698837280273, "learning_rate": 1.7794774252055224e-06, "loss": 1.6535, "step": 8807 }, { "epoch": 0.9149267684636958, "grad_norm": 0.4295443594455719, "learning_rate": 1.775165733352574e-06, "loss": 1.6215, "step": 8808 }, { "epoch": 0.9150306429832762, "grad_norm": 0.4191311299800873, "learning_rate": 1.770859177068296e-06, "loss": 1.6455, "step": 8809 }, { "epoch": 0.9151345175028566, "grad_norm": 0.38304373621940613, "learning_rate": 1.7665577568113212e-06, "loss": 1.6085, "step": 8810 }, { "epoch": 0.9152383920224368, "grad_norm": 0.4099152684211731, "learning_rate": 1.7622614730397114e-06, "loss": 1.7182, "step": 8811 }, { "epoch": 0.9153422665420172, "grad_norm": 0.397775262594223, "learning_rate": 1.7579703262109837e-06, "loss": 1.5084, "step": 8812 }, { "epoch": 0.9154461410615976, "grad_norm": 0.4640827476978302, "learning_rate": 1.7536843167821116e-06, "loss": 1.8689, "step": 8813 }, { "epoch": 0.915550015581178, "grad_norm": 0.39592549204826355, "learning_rate": 1.7494034452095242e-06, "loss": 1.5368, "step": 8814 }, { "epoch": 0.9156538901007583, "grad_norm": 0.4693751633167267, "learning_rate": 1.7451277119491073e-06, "loss": 1.806, "step": 8815 }, { "epoch": 0.9157577646203386, "grad_norm": 0.45381423830986023, "learning_rate": 1.740857117456174e-06, "loss": 1.6147, "step": 8816 }, { "epoch": 0.915861639139919, "grad_norm": 0.40232381224632263, "learning_rate": 1.7365916621855327e-06, "loss": 1.5101, "step": 8817 }, { "epoch": 0.9159655136594993, "grad_norm": 0.4090515971183777, "learning_rate": 1.7323313465914093e-06, "loss": 1.6499, "step": 8818 }, { "epoch": 0.9160693881790797, "grad_norm": 0.3945220410823822, "learning_rate": 1.7280761711274906e-06, "loss": 1.5388, "step": 8819 }, { "epoch": 0.9161732626986601, "grad_norm": 0.39310646057128906, "learning_rate": 1.7238261362469254e-06, "loss": 1.5099, "step": 8820 }, { "epoch": 0.9162771372182403, "grad_norm": 0.4310518205165863, "learning_rate": 1.7195812424023129e-06, "loss": 1.4882, "step": 8821 }, { "epoch": 0.9163810117378207, "grad_norm": 0.4244692921638489, "learning_rate": 1.715341490045691e-06, "loss": 1.6063, "step": 8822 }, { "epoch": 0.9164848862574011, "grad_norm": 0.4561496675014496, "learning_rate": 1.7111068796285657e-06, "loss": 1.7671, "step": 8823 }, { "epoch": 0.9165887607769814, "grad_norm": 0.4400840103626251, "learning_rate": 1.7068774116018983e-06, "loss": 1.6152, "step": 8824 }, { "epoch": 0.9166926352965618, "grad_norm": 0.46823105216026306, "learning_rate": 1.7026530864160728e-06, "loss": 1.5364, "step": 8825 }, { "epoch": 0.9167965098161421, "grad_norm": 0.43917423486709595, "learning_rate": 1.698433904520963e-06, "loss": 1.5236, "step": 8826 }, { "epoch": 0.9169003843357224, "grad_norm": 0.4674359858036041, "learning_rate": 1.6942198663658815e-06, "loss": 1.767, "step": 8827 }, { "epoch": 0.9170042588553028, "grad_norm": 0.39382535219192505, "learning_rate": 1.6900109723995805e-06, "loss": 1.6415, "step": 8828 }, { "epoch": 0.9171081333748832, "grad_norm": 0.4580112397670746, "learning_rate": 1.6858072230702792e-06, "loss": 1.6657, "step": 8829 }, { "epoch": 0.9172120078944634, "grad_norm": 0.4108462929725647, "learning_rate": 1.6816086188256475e-06, "loss": 1.6511, "step": 8830 }, { "epoch": 0.9173158824140438, "grad_norm": 0.4109908938407898, "learning_rate": 1.677415160112794e-06, "loss": 1.5784, "step": 8831 }, { "epoch": 0.9174197569336242, "grad_norm": 0.4298510253429413, "learning_rate": 1.6732268473782953e-06, "loss": 1.6897, "step": 8832 }, { "epoch": 0.9175236314532045, "grad_norm": 0.42794668674468994, "learning_rate": 1.6690436810681832e-06, "loss": 1.6477, "step": 8833 }, { "epoch": 0.9176275059727849, "grad_norm": 0.42130839824676514, "learning_rate": 1.664865661627918e-06, "loss": 1.6891, "step": 8834 }, { "epoch": 0.9177313804923652, "grad_norm": 0.4458337128162384, "learning_rate": 1.6606927895024217e-06, "loss": 1.5716, "step": 8835 }, { "epoch": 0.9178352550119455, "grad_norm": 0.4314666986465454, "learning_rate": 1.6565250651360997e-06, "loss": 1.6978, "step": 8836 }, { "epoch": 0.9179391295315259, "grad_norm": 0.47084978222846985, "learning_rate": 1.6523624889727474e-06, "loss": 1.7084, "step": 8837 }, { "epoch": 0.9180430040511063, "grad_norm": 0.42452502250671387, "learning_rate": 1.6482050614556709e-06, "loss": 1.4483, "step": 8838 }, { "epoch": 0.9181468785706867, "grad_norm": 0.4466843008995056, "learning_rate": 1.6440527830275997e-06, "loss": 1.7525, "step": 8839 }, { "epoch": 0.9182507530902669, "grad_norm": 0.3827952444553375, "learning_rate": 1.6399056541307134e-06, "loss": 1.5032, "step": 8840 }, { "epoch": 0.9183546276098473, "grad_norm": 0.46445831656455994, "learning_rate": 1.6357636752066529e-06, "loss": 1.6178, "step": 8841 }, { "epoch": 0.9184585021294277, "grad_norm": 0.4010038375854492, "learning_rate": 1.6316268466965046e-06, "loss": 1.6032, "step": 8842 }, { "epoch": 0.918562376649008, "grad_norm": 0.42369773983955383, "learning_rate": 1.6274951690408158e-06, "loss": 1.7141, "step": 8843 }, { "epoch": 0.9186662511685884, "grad_norm": 0.4194977879524231, "learning_rate": 1.623368642679557e-06, "loss": 1.6226, "step": 8844 }, { "epoch": 0.9187701256881687, "grad_norm": 0.5032253861427307, "learning_rate": 1.6192472680521987e-06, "loss": 1.9285, "step": 8845 }, { "epoch": 0.918874000207749, "grad_norm": 0.4604244530200958, "learning_rate": 1.6151310455976176e-06, "loss": 1.7397, "step": 8846 }, { "epoch": 0.9189778747273294, "grad_norm": 0.4401777386665344, "learning_rate": 1.6110199757541632e-06, "loss": 1.5453, "step": 8847 }, { "epoch": 0.9190817492469098, "grad_norm": 0.48724883794784546, "learning_rate": 1.6069140589596299e-06, "loss": 1.6, "step": 8848 }, { "epoch": 0.91918562376649, "grad_norm": 0.4106622338294983, "learning_rate": 1.6028132956512732e-06, "loss": 1.6392, "step": 8849 }, { "epoch": 0.9192894982860704, "grad_norm": 0.4616900086402893, "learning_rate": 1.5987176862657882e-06, "loss": 1.6164, "step": 8850 }, { "epoch": 0.9193933728056508, "grad_norm": 0.4380607604980469, "learning_rate": 1.5946272312393206e-06, "loss": 1.5529, "step": 8851 }, { "epoch": 0.9194972473252311, "grad_norm": 0.42449167370796204, "learning_rate": 1.5905419310074831e-06, "loss": 1.7714, "step": 8852 }, { "epoch": 0.9196011218448115, "grad_norm": 0.4334729015827179, "learning_rate": 1.586461786005311e-06, "loss": 1.703, "step": 8853 }, { "epoch": 0.9197049963643918, "grad_norm": 0.4081462621688843, "learning_rate": 1.5823867966673288e-06, "loss": 1.6907, "step": 8854 }, { "epoch": 0.9198088708839721, "grad_norm": 0.4235094487667084, "learning_rate": 1.5783169634274787e-06, "loss": 1.7441, "step": 8855 }, { "epoch": 0.9199127454035525, "grad_norm": 0.4290684759616852, "learning_rate": 1.574252286719169e-06, "loss": 1.5403, "step": 8856 }, { "epoch": 0.9200166199231329, "grad_norm": 0.40172362327575684, "learning_rate": 1.5701927669752537e-06, "loss": 1.4616, "step": 8857 }, { "epoch": 0.9201204944427132, "grad_norm": 0.4437967538833618, "learning_rate": 1.566138404628037e-06, "loss": 1.6938, "step": 8858 }, { "epoch": 0.9202243689622935, "grad_norm": 0.42959895730018616, "learning_rate": 1.5620892001092902e-06, "loss": 1.5245, "step": 8859 }, { "epoch": 0.9203282434818739, "grad_norm": 0.4243561923503876, "learning_rate": 1.5580451538502016e-06, "loss": 1.6801, "step": 8860 }, { "epoch": 0.9204321180014542, "grad_norm": 0.5258318781852722, "learning_rate": 1.5540062662814603e-06, "loss": 1.8436, "step": 8861 }, { "epoch": 0.9205359925210346, "grad_norm": 0.4423414468765259, "learning_rate": 1.5499725378331441e-06, "loss": 1.6583, "step": 8862 }, { "epoch": 0.920639867040615, "grad_norm": 0.4549945592880249, "learning_rate": 1.5459439689348266e-06, "loss": 1.6852, "step": 8863 }, { "epoch": 0.9207437415601953, "grad_norm": 0.43814900517463684, "learning_rate": 1.5419205600155306e-06, "loss": 1.6821, "step": 8864 }, { "epoch": 0.9208476160797756, "grad_norm": 0.47776687145233154, "learning_rate": 1.5379023115037027e-06, "loss": 1.8725, "step": 8865 }, { "epoch": 0.920951490599356, "grad_norm": 0.422610342502594, "learning_rate": 1.5338892238272562e-06, "loss": 1.6768, "step": 8866 }, { "epoch": 0.9210553651189364, "grad_norm": 0.46566253900527954, "learning_rate": 1.5298812974135602e-06, "loss": 1.7364, "step": 8867 }, { "epoch": 0.9211592396385166, "grad_norm": 0.41145744919776917, "learning_rate": 1.5258785326894286e-06, "loss": 1.7075, "step": 8868 }, { "epoch": 0.921263114158097, "grad_norm": 0.4216400980949402, "learning_rate": 1.5218809300811155e-06, "loss": 1.7723, "step": 8869 }, { "epoch": 0.9213669886776774, "grad_norm": 0.4355829656124115, "learning_rate": 1.5178884900143353e-06, "loss": 1.6459, "step": 8870 }, { "epoch": 0.9214708631972577, "grad_norm": 0.4165591597557068, "learning_rate": 1.5139012129142704e-06, "loss": 1.5313, "step": 8871 }, { "epoch": 0.9215747377168381, "grad_norm": 0.44293418526649475, "learning_rate": 1.5099190992055034e-06, "loss": 1.7129, "step": 8872 }, { "epoch": 0.9216786122364184, "grad_norm": 0.45215722918510437, "learning_rate": 1.505942149312123e-06, "loss": 1.4032, "step": 8873 }, { "epoch": 0.9217824867559987, "grad_norm": 0.4243696630001068, "learning_rate": 1.5019703636576344e-06, "loss": 1.6315, "step": 8874 }, { "epoch": 0.9218863612755791, "grad_norm": 0.4237101078033447, "learning_rate": 1.498003742665005e-06, "loss": 1.6485, "step": 8875 }, { "epoch": 0.9219902357951595, "grad_norm": 0.4091566801071167, "learning_rate": 1.4940422867566472e-06, "loss": 1.671, "step": 8876 }, { "epoch": 0.9220941103147398, "grad_norm": 0.44032323360443115, "learning_rate": 1.4900859963544233e-06, "loss": 1.581, "step": 8877 }, { "epoch": 0.9221979848343201, "grad_norm": 0.41749629378318787, "learning_rate": 1.486134871879652e-06, "loss": 1.7884, "step": 8878 }, { "epoch": 0.9223018593539005, "grad_norm": 0.447294682264328, "learning_rate": 1.4821889137530852e-06, "loss": 1.7156, "step": 8879 }, { "epoch": 0.9224057338734808, "grad_norm": 0.45944663882255554, "learning_rate": 1.4782481223949596e-06, "loss": 1.8423, "step": 8880 }, { "epoch": 0.9225096083930612, "grad_norm": 0.39821431040763855, "learning_rate": 1.4743124982249112e-06, "loss": 1.5655, "step": 8881 }, { "epoch": 0.9226134829126416, "grad_norm": 0.48018068075180054, "learning_rate": 1.4703820416620718e-06, "loss": 1.6241, "step": 8882 }, { "epoch": 0.9227173574322218, "grad_norm": 0.3963991701602936, "learning_rate": 1.4664567531250007e-06, "loss": 1.5582, "step": 8883 }, { "epoch": 0.9228212319518022, "grad_norm": 0.405376672744751, "learning_rate": 1.462536633031708e-06, "loss": 1.6288, "step": 8884 }, { "epoch": 0.9229251064713826, "grad_norm": 0.4346674382686615, "learning_rate": 1.4586216817996602e-06, "loss": 1.6538, "step": 8885 }, { "epoch": 0.9230289809909629, "grad_norm": 0.4289572536945343, "learning_rate": 1.454711899845762e-06, "loss": 1.8293, "step": 8886 }, { "epoch": 0.9231328555105432, "grad_norm": 0.5129453539848328, "learning_rate": 1.450807287586381e-06, "loss": 1.8293, "step": 8887 }, { "epoch": 0.9232367300301236, "grad_norm": 0.41872215270996094, "learning_rate": 1.4469078454373174e-06, "loss": 1.6358, "step": 8888 }, { "epoch": 0.9233406045497039, "grad_norm": 0.4631272256374359, "learning_rate": 1.4430135738138561e-06, "loss": 1.824, "step": 8889 }, { "epoch": 0.9234444790692843, "grad_norm": 0.41717085242271423, "learning_rate": 1.4391244731306763e-06, "loss": 1.6387, "step": 8890 }, { "epoch": 0.9235483535888647, "grad_norm": 0.4447776675224304, "learning_rate": 1.4352405438019522e-06, "loss": 1.7798, "step": 8891 }, { "epoch": 0.923652228108445, "grad_norm": 0.48640677332878113, "learning_rate": 1.431361786241292e-06, "loss": 1.8333, "step": 8892 }, { "epoch": 0.9237561026280253, "grad_norm": 0.4646976590156555, "learning_rate": 1.427488200861754e-06, "loss": 1.6033, "step": 8893 }, { "epoch": 0.9238599771476057, "grad_norm": 0.39103513956069946, "learning_rate": 1.4236197880758416e-06, "loss": 1.5394, "step": 8894 }, { "epoch": 0.9239638516671861, "grad_norm": 0.49602001905441284, "learning_rate": 1.4197565482955144e-06, "loss": 1.8753, "step": 8895 }, { "epoch": 0.9240677261867664, "grad_norm": 0.3964075744152069, "learning_rate": 1.4158984819321707e-06, "loss": 1.5401, "step": 8896 }, { "epoch": 0.9241716007063467, "grad_norm": 0.4444807171821594, "learning_rate": 1.4120455893966711e-06, "loss": 1.755, "step": 8897 }, { "epoch": 0.9242754752259271, "grad_norm": 0.48171067237854004, "learning_rate": 1.4081978710993092e-06, "loss": 1.7385, "step": 8898 }, { "epoch": 0.9243793497455074, "grad_norm": 0.41400474309921265, "learning_rate": 1.404355327449852e-06, "loss": 1.6438, "step": 8899 }, { "epoch": 0.9244832242650878, "grad_norm": 0.4013029932975769, "learning_rate": 1.4005179588574835e-06, "loss": 1.3095, "step": 8900 }, { "epoch": 0.9245870987846682, "grad_norm": 0.41404280066490173, "learning_rate": 1.3966857657308652e-06, "loss": 1.6767, "step": 8901 }, { "epoch": 0.9246909733042484, "grad_norm": 0.4295409619808197, "learning_rate": 1.392858748478093e-06, "loss": 1.5915, "step": 8902 }, { "epoch": 0.9247948478238288, "grad_norm": 0.4575590491294861, "learning_rate": 1.3890369075067134e-06, "loss": 1.6812, "step": 8903 }, { "epoch": 0.9248987223434092, "grad_norm": 0.42268791794776917, "learning_rate": 1.3852202432237227e-06, "loss": 1.6996, "step": 8904 }, { "epoch": 0.9250025968629895, "grad_norm": 0.4151066839694977, "learning_rate": 1.3814087560355682e-06, "loss": 1.5893, "step": 8905 }, { "epoch": 0.9251064713825699, "grad_norm": 0.40030190348625183, "learning_rate": 1.377602446348142e-06, "loss": 1.7249, "step": 8906 }, { "epoch": 0.9252103459021502, "grad_norm": 0.38971251249313354, "learning_rate": 1.3738013145667804e-06, "loss": 1.5488, "step": 8907 }, { "epoch": 0.9253142204217305, "grad_norm": 0.39237236976623535, "learning_rate": 1.3700053610962882e-06, "loss": 1.5541, "step": 8908 }, { "epoch": 0.9254180949413109, "grad_norm": 0.4266391098499298, "learning_rate": 1.366214586340886e-06, "loss": 1.7434, "step": 8909 }, { "epoch": 0.9255219694608913, "grad_norm": 0.42102593183517456, "learning_rate": 1.3624289907042787e-06, "loss": 1.7068, "step": 8910 }, { "epoch": 0.9256258439804715, "grad_norm": 0.42698073387145996, "learning_rate": 1.3586485745895993e-06, "loss": 1.7564, "step": 8911 }, { "epoch": 0.9257297185000519, "grad_norm": 0.43909063935279846, "learning_rate": 1.354873338399426e-06, "loss": 1.6685, "step": 8912 }, { "epoch": 0.9258335930196323, "grad_norm": 0.4355563819408417, "learning_rate": 1.3511032825357982e-06, "loss": 1.4694, "step": 8913 }, { "epoch": 0.9259374675392126, "grad_norm": 0.4620991349220276, "learning_rate": 1.3473384074001893e-06, "loss": 1.5793, "step": 8914 }, { "epoch": 0.926041342058793, "grad_norm": 0.4601285457611084, "learning_rate": 1.3435787133935397e-06, "loss": 1.8184, "step": 8915 }, { "epoch": 0.9261452165783733, "grad_norm": 0.43226033449172974, "learning_rate": 1.3398242009162177e-06, "loss": 1.795, "step": 8916 }, { "epoch": 0.9262490910979537, "grad_norm": 0.4279690682888031, "learning_rate": 1.3360748703680537e-06, "loss": 1.6734, "step": 8917 }, { "epoch": 0.926352965617534, "grad_norm": 0.42362403869628906, "learning_rate": 1.3323307221483339e-06, "loss": 1.5626, "step": 8918 }, { "epoch": 0.9264568401371144, "grad_norm": 0.4446941614151001, "learning_rate": 1.328591756655756e-06, "loss": 1.6638, "step": 8919 }, { "epoch": 0.9265607146566948, "grad_norm": 0.4361754059791565, "learning_rate": 1.3248579742885126e-06, "loss": 1.552, "step": 8920 }, { "epoch": 0.926664589176275, "grad_norm": 0.4090036451816559, "learning_rate": 1.3211293754442134e-06, "loss": 1.6251, "step": 8921 }, { "epoch": 0.9267684636958554, "grad_norm": 0.4072844088077545, "learning_rate": 1.317405960519924e-06, "loss": 1.6939, "step": 8922 }, { "epoch": 0.9268723382154358, "grad_norm": 0.4060845673084259, "learning_rate": 1.3136877299121608e-06, "loss": 1.6519, "step": 8923 }, { "epoch": 0.9269762127350161, "grad_norm": 0.4271293878555298, "learning_rate": 1.3099746840168903e-06, "loss": 1.5427, "step": 8924 }, { "epoch": 0.9270800872545965, "grad_norm": 0.3999338746070862, "learning_rate": 1.3062668232295128e-06, "loss": 1.5207, "step": 8925 }, { "epoch": 0.9271839617741768, "grad_norm": 0.47124266624450684, "learning_rate": 1.3025641479448958e-06, "loss": 1.8273, "step": 8926 }, { "epoch": 0.9272878362937571, "grad_norm": 0.4004976451396942, "learning_rate": 1.2988666585573461e-06, "loss": 1.4928, "step": 8927 }, { "epoch": 0.9273917108133375, "grad_norm": 0.4213370382785797, "learning_rate": 1.2951743554606044e-06, "loss": 1.6839, "step": 8928 }, { "epoch": 0.9274955853329179, "grad_norm": 0.43283870816230774, "learning_rate": 1.2914872390478894e-06, "loss": 1.7069, "step": 8929 }, { "epoch": 0.9275994598524981, "grad_norm": 0.3999961316585541, "learning_rate": 1.2878053097118425e-06, "loss": 1.6181, "step": 8930 }, { "epoch": 0.9277033343720785, "grad_norm": 0.44577065110206604, "learning_rate": 1.2841285678445614e-06, "loss": 1.599, "step": 8931 }, { "epoch": 0.9278072088916589, "grad_norm": 0.40648534893989563, "learning_rate": 1.2804570138375827e-06, "loss": 1.6496, "step": 8932 }, { "epoch": 0.9279110834112392, "grad_norm": 0.43543723225593567, "learning_rate": 1.2767906480819104e-06, "loss": 1.7878, "step": 8933 }, { "epoch": 0.9280149579308196, "grad_norm": 0.39712443947792053, "learning_rate": 1.273129470967982e-06, "loss": 1.6478, "step": 8934 }, { "epoch": 0.9281188324503999, "grad_norm": 0.43775272369384766, "learning_rate": 1.2694734828856691e-06, "loss": 1.6999, "step": 8935 }, { "epoch": 0.9282227069699802, "grad_norm": 0.46979156136512756, "learning_rate": 1.265822684224327e-06, "loss": 1.5892, "step": 8936 }, { "epoch": 0.9283265814895606, "grad_norm": 0.44931840896606445, "learning_rate": 1.2621770753727224e-06, "loss": 1.661, "step": 8937 }, { "epoch": 0.928430456009141, "grad_norm": 0.441062867641449, "learning_rate": 1.2585366567190949e-06, "loss": 1.7309, "step": 8938 }, { "epoch": 0.9285343305287213, "grad_norm": 0.45350944995880127, "learning_rate": 1.2549014286511118e-06, "loss": 1.5562, "step": 8939 }, { "epoch": 0.9286382050483016, "grad_norm": 0.4538798928260803, "learning_rate": 1.2512713915559027e-06, "loss": 1.5712, "step": 8940 }, { "epoch": 0.928742079567882, "grad_norm": 0.43491330742836, "learning_rate": 1.247646545820036e-06, "loss": 1.6381, "step": 8941 }, { "epoch": 0.9288459540874624, "grad_norm": 0.417327880859375, "learning_rate": 1.2440268918295194e-06, "loss": 1.71, "step": 8942 }, { "epoch": 0.9289498286070427, "grad_norm": 0.43641364574432373, "learning_rate": 1.240412429969845e-06, "loss": 1.7413, "step": 8943 }, { "epoch": 0.929053703126623, "grad_norm": 0.4823327362537384, "learning_rate": 1.2368031606258934e-06, "loss": 1.8444, "step": 8944 }, { "epoch": 0.9291575776462034, "grad_norm": 0.4450342655181885, "learning_rate": 1.2331990841820407e-06, "loss": 1.8187, "step": 8945 }, { "epoch": 0.9292614521657837, "grad_norm": 0.4293372631072998, "learning_rate": 1.2296002010220964e-06, "loss": 1.6616, "step": 8946 }, { "epoch": 0.9293653266853641, "grad_norm": 0.4217541217803955, "learning_rate": 1.2260065115292984e-06, "loss": 1.6949, "step": 8947 }, { "epoch": 0.9294692012049445, "grad_norm": 0.4160800278186798, "learning_rate": 1.2224180160863574e-06, "loss": 1.535, "step": 8948 }, { "epoch": 0.9295730757245247, "grad_norm": 0.42056944966316223, "learning_rate": 1.2188347150754231e-06, "loss": 1.7167, "step": 8949 }, { "epoch": 0.9296769502441051, "grad_norm": 0.4336167871952057, "learning_rate": 1.2152566088780792e-06, "loss": 1.5799, "step": 8950 }, { "epoch": 0.9297808247636855, "grad_norm": 0.43166765570640564, "learning_rate": 1.2116836978753709e-06, "loss": 1.603, "step": 8951 }, { "epoch": 0.9298846992832658, "grad_norm": 0.4209384620189667, "learning_rate": 1.2081159824477883e-06, "loss": 1.5984, "step": 8952 }, { "epoch": 0.9299885738028462, "grad_norm": 0.42866531014442444, "learning_rate": 1.2045534629752609e-06, "loss": 1.6496, "step": 8953 }, { "epoch": 0.9300924483224265, "grad_norm": 0.44947803020477295, "learning_rate": 1.2009961398371628e-06, "loss": 1.4944, "step": 8954 }, { "epoch": 0.9301963228420068, "grad_norm": 0.4815939962863922, "learning_rate": 1.197444013412341e-06, "loss": 1.5212, "step": 8955 }, { "epoch": 0.9303001973615872, "grad_norm": 0.5035466551780701, "learning_rate": 1.1938970840790486e-06, "loss": 1.6037, "step": 8956 }, { "epoch": 0.9304040718811676, "grad_norm": 0.43349477648735046, "learning_rate": 1.1903553522150169e-06, "loss": 1.7366, "step": 8957 }, { "epoch": 0.9305079464007479, "grad_norm": 0.4126850366592407, "learning_rate": 1.186818818197416e-06, "loss": 1.5468, "step": 8958 }, { "epoch": 0.9306118209203282, "grad_norm": 0.4104941189289093, "learning_rate": 1.1832874824028451e-06, "loss": 1.5958, "step": 8959 }, { "epoch": 0.9307156954399086, "grad_norm": 0.422209769487381, "learning_rate": 1.1797613452073808e-06, "loss": 1.5762, "step": 8960 }, { "epoch": 0.9308195699594889, "grad_norm": 0.4042091965675354, "learning_rate": 1.1762404069865174e-06, "loss": 1.6074, "step": 8961 }, { "epoch": 0.9309234444790693, "grad_norm": 0.4372323751449585, "learning_rate": 1.1727246681152049e-06, "loss": 1.7324, "step": 8962 }, { "epoch": 0.9310273189986497, "grad_norm": 0.46790817379951477, "learning_rate": 1.1692141289678492e-06, "loss": 1.6437, "step": 8963 }, { "epoch": 0.9311311935182299, "grad_norm": 0.4523914158344269, "learning_rate": 1.1657087899183016e-06, "loss": 1.5817, "step": 8964 }, { "epoch": 0.9312350680378103, "grad_norm": 0.4646068215370178, "learning_rate": 1.162208651339841e-06, "loss": 1.7894, "step": 8965 }, { "epoch": 0.9313389425573907, "grad_norm": 0.4205915331840515, "learning_rate": 1.1587137136052083e-06, "loss": 1.5805, "step": 8966 }, { "epoch": 0.931442817076971, "grad_norm": 0.40744760632514954, "learning_rate": 1.1552239770865892e-06, "loss": 1.6191, "step": 8967 }, { "epoch": 0.9315466915965513, "grad_norm": 0.43079641461372375, "learning_rate": 1.1517394421556138e-06, "loss": 1.7234, "step": 8968 }, { "epoch": 0.9316505661161317, "grad_norm": 0.4436144232749939, "learning_rate": 1.1482601091833578e-06, "loss": 1.6766, "step": 8969 }, { "epoch": 0.9317544406357121, "grad_norm": 0.43747812509536743, "learning_rate": 1.1447859785403359e-06, "loss": 1.7669, "step": 8970 }, { "epoch": 0.9318583151552924, "grad_norm": 0.4351133108139038, "learning_rate": 1.1413170505965354e-06, "loss": 1.7189, "step": 8971 }, { "epoch": 0.9319621896748728, "grad_norm": 0.4412538409233093, "learning_rate": 1.1378533257213387e-06, "loss": 1.8092, "step": 8972 }, { "epoch": 0.9320660641944531, "grad_norm": 0.4323018789291382, "learning_rate": 1.134394804283634e-06, "loss": 1.8075, "step": 8973 }, { "epoch": 0.9321699387140334, "grad_norm": 0.47585269808769226, "learning_rate": 1.1309414866517209e-06, "loss": 1.7678, "step": 8974 }, { "epoch": 0.9322738132336138, "grad_norm": 0.5131593942642212, "learning_rate": 1.127493373193339e-06, "loss": 1.6321, "step": 8975 }, { "epoch": 0.9323776877531942, "grad_norm": 0.4566580057144165, "learning_rate": 1.1240504642756945e-06, "loss": 1.7411, "step": 8976 }, { "epoch": 0.9324815622727745, "grad_norm": 0.4639977514743805, "learning_rate": 1.1206127602654271e-06, "loss": 1.9319, "step": 8977 }, { "epoch": 0.9325854367923548, "grad_norm": 0.4284569323062897, "learning_rate": 1.1171802615286275e-06, "loss": 1.6936, "step": 8978 }, { "epoch": 0.9326893113119352, "grad_norm": 0.46785593032836914, "learning_rate": 1.1137529684308311e-06, "loss": 1.7217, "step": 8979 }, { "epoch": 0.9327931858315155, "grad_norm": 0.4252128303050995, "learning_rate": 1.110330881337013e-06, "loss": 1.6731, "step": 8980 }, { "epoch": 0.9328970603510959, "grad_norm": 0.4072088599205017, "learning_rate": 1.106914000611603e-06, "loss": 1.5772, "step": 8981 }, { "epoch": 0.9330009348706763, "grad_norm": 0.4445166289806366, "learning_rate": 1.1035023266184663e-06, "loss": 1.6541, "step": 8982 }, { "epoch": 0.9331048093902565, "grad_norm": 0.42075401544570923, "learning_rate": 1.1000958597209343e-06, "loss": 1.6717, "step": 8983 }, { "epoch": 0.9332086839098369, "grad_norm": 0.4416648745536804, "learning_rate": 1.0966946002817557e-06, "loss": 1.8383, "step": 8984 }, { "epoch": 0.9333125584294173, "grad_norm": 0.4478912055492401, "learning_rate": 1.0932985486631408e-06, "loss": 1.6386, "step": 8985 }, { "epoch": 0.9334164329489976, "grad_norm": 0.4134404957294464, "learning_rate": 1.0899077052267393e-06, "loss": 1.6147, "step": 8986 }, { "epoch": 0.933520307468578, "grad_norm": 0.38977158069610596, "learning_rate": 1.086522070333662e-06, "loss": 1.6074, "step": 8987 }, { "epoch": 0.9336241819881583, "grad_norm": 0.4343447685241699, "learning_rate": 1.0831416443444376e-06, "loss": 1.7838, "step": 8988 }, { "epoch": 0.9337280565077386, "grad_norm": 0.42949017882347107, "learning_rate": 1.0797664276190666e-06, "loss": 1.5811, "step": 8989 }, { "epoch": 0.933831931027319, "grad_norm": 0.4194577634334564, "learning_rate": 1.0763964205169786e-06, "loss": 1.7476, "step": 8990 }, { "epoch": 0.9339358055468994, "grad_norm": 0.42771607637405396, "learning_rate": 1.0730316233970473e-06, "loss": 1.7751, "step": 8991 }, { "epoch": 0.9340396800664796, "grad_norm": 0.4049451947212219, "learning_rate": 1.0696720366176138e-06, "loss": 1.5934, "step": 8992 }, { "epoch": 0.93414355458606, "grad_norm": 0.4572373628616333, "learning_rate": 1.0663176605364255e-06, "loss": 1.7941, "step": 8993 }, { "epoch": 0.9342474291056404, "grad_norm": 0.3885292708873749, "learning_rate": 1.0629684955107132e-06, "loss": 1.4659, "step": 8994 }, { "epoch": 0.9343513036252208, "grad_norm": 0.38825228810310364, "learning_rate": 1.0596245418971307e-06, "loss": 1.5177, "step": 8995 }, { "epoch": 0.9344551781448011, "grad_norm": 0.47034990787506104, "learning_rate": 1.0562858000517928e-06, "loss": 1.774, "step": 8996 }, { "epoch": 0.9345590526643814, "grad_norm": 0.5254196524620056, "learning_rate": 1.0529522703302319e-06, "loss": 1.578, "step": 8997 }, { "epoch": 0.9346629271839618, "grad_norm": 0.4228893518447876, "learning_rate": 1.049623953087453e-06, "loss": 1.6469, "step": 8998 }, { "epoch": 0.9347668017035421, "grad_norm": 0.4083818197250366, "learning_rate": 1.0463008486779058e-06, "loss": 1.5999, "step": 8999 }, { "epoch": 0.9348706762231225, "grad_norm": 0.417376846075058, "learning_rate": 1.0429829574554574e-06, "loss": 1.5968, "step": 9000 }, { "epoch": 0.9349745507427029, "grad_norm": 0.4227767884731293, "learning_rate": 1.0396702797734415e-06, "loss": 1.5762, "step": 9001 }, { "epoch": 0.9350784252622831, "grad_norm": 0.41051211953163147, "learning_rate": 1.0363628159846483e-06, "loss": 1.4743, "step": 9002 }, { "epoch": 0.9351822997818635, "grad_norm": 0.41701540350914, "learning_rate": 1.0330605664412684e-06, "loss": 1.6244, "step": 9003 }, { "epoch": 0.9352861743014439, "grad_norm": 0.44816792011260986, "learning_rate": 1.0297635314949872e-06, "loss": 1.8127, "step": 9004 }, { "epoch": 0.9353900488210242, "grad_norm": 0.4178299307823181, "learning_rate": 1.0264717114969125e-06, "loss": 1.5446, "step": 9005 }, { "epoch": 0.9354939233406045, "grad_norm": 0.4642837941646576, "learning_rate": 1.023185106797586e-06, "loss": 1.7789, "step": 9006 }, { "epoch": 0.9355977978601849, "grad_norm": 0.4191594421863556, "learning_rate": 1.0199037177470172e-06, "loss": 1.7449, "step": 9007 }, { "epoch": 0.9357016723797652, "grad_norm": 0.42831364274024963, "learning_rate": 1.0166275446946372e-06, "loss": 1.7044, "step": 9008 }, { "epoch": 0.9358055468993456, "grad_norm": 0.44095903635025024, "learning_rate": 1.0133565879893447e-06, "loss": 1.7159, "step": 9009 }, { "epoch": 0.935909421418926, "grad_norm": 0.4127524495124817, "learning_rate": 1.0100908479794558e-06, "loss": 1.5209, "step": 9010 }, { "epoch": 0.9360132959385062, "grad_norm": 0.45353084802627563, "learning_rate": 1.0068303250127697e-06, "loss": 1.7395, "step": 9011 }, { "epoch": 0.9361171704580866, "grad_norm": 0.4273471236228943, "learning_rate": 1.0035750194364758e-06, "loss": 1.6103, "step": 9012 }, { "epoch": 0.936221044977667, "grad_norm": 0.5556546449661255, "learning_rate": 1.0003249315972684e-06, "loss": 1.7848, "step": 9013 }, { "epoch": 0.9363249194972473, "grad_norm": 0.418753057718277, "learning_rate": 9.97080061841238e-07, "loss": 1.7764, "step": 9014 }, { "epoch": 0.9364287940168277, "grad_norm": 0.40560612082481384, "learning_rate": 9.938404105139465e-07, "loss": 1.6056, "step": 9015 }, { "epoch": 0.936532668536408, "grad_norm": 0.402055025100708, "learning_rate": 9.906059779603849e-07, "loss": 1.5732, "step": 9016 }, { "epoch": 0.9366365430559883, "grad_norm": 0.4551316499710083, "learning_rate": 9.873767645250055e-07, "loss": 1.8166, "step": 9017 }, { "epoch": 0.9367404175755687, "grad_norm": 0.4371841549873352, "learning_rate": 9.84152770551683e-07, "loss": 1.6245, "step": 9018 }, { "epoch": 0.9368442920951491, "grad_norm": 0.43440502882003784, "learning_rate": 9.809339963837426e-07, "loss": 1.6387, "step": 9019 }, { "epoch": 0.9369481666147295, "grad_norm": 0.41472846269607544, "learning_rate": 9.77720442363983e-07, "loss": 1.3704, "step": 9020 }, { "epoch": 0.9370520411343097, "grad_norm": 0.437029629945755, "learning_rate": 9.745121088346021e-07, "loss": 1.7435, "step": 9021 }, { "epoch": 0.9371559156538901, "grad_norm": 0.4492699205875397, "learning_rate": 9.713089961372657e-07, "loss": 1.8335, "step": 9022 }, { "epoch": 0.9372597901734705, "grad_norm": 0.41893166303634644, "learning_rate": 9.681111046130843e-07, "loss": 1.6002, "step": 9023 }, { "epoch": 0.9373636646930508, "grad_norm": 0.4353455603122711, "learning_rate": 9.64918434602613e-07, "loss": 1.7331, "step": 9024 }, { "epoch": 0.9374675392126312, "grad_norm": 0.44769999384880066, "learning_rate": 9.617309864458358e-07, "loss": 1.7173, "step": 9025 }, { "epoch": 0.9375714137322115, "grad_norm": 0.3922676146030426, "learning_rate": 9.585487604821863e-07, "loss": 1.5642, "step": 9026 }, { "epoch": 0.9376752882517918, "grad_norm": 0.4413311779499054, "learning_rate": 9.553717570505716e-07, "loss": 1.6186, "step": 9027 }, { "epoch": 0.9377791627713722, "grad_norm": 0.3945287764072418, "learning_rate": 9.521999764892875e-07, "loss": 1.5881, "step": 9028 }, { "epoch": 0.9378830372909526, "grad_norm": 0.41674908995628357, "learning_rate": 9.49033419136125e-07, "loss": 1.5853, "step": 9029 }, { "epoch": 0.9379869118105328, "grad_norm": 0.4065210521221161, "learning_rate": 9.458720853282977e-07, "loss": 1.5876, "step": 9030 }, { "epoch": 0.9380907863301132, "grad_norm": 0.43645021319389343, "learning_rate": 9.427159754024473e-07, "loss": 1.694, "step": 9031 }, { "epoch": 0.9381946608496936, "grad_norm": 0.4190250337123871, "learning_rate": 9.395650896946884e-07, "loss": 1.686, "step": 9032 }, { "epoch": 0.9382985353692739, "grad_norm": 0.4211812913417816, "learning_rate": 9.364194285405581e-07, "loss": 1.6361, "step": 9033 }, { "epoch": 0.9384024098888543, "grad_norm": 0.438837468624115, "learning_rate": 9.332789922750495e-07, "loss": 1.5855, "step": 9034 }, { "epoch": 0.9385062844084346, "grad_norm": 0.40594640374183655, "learning_rate": 9.301437812325952e-07, "loss": 1.6175, "step": 9035 }, { "epoch": 0.9386101589280149, "grad_norm": 0.4930059313774109, "learning_rate": 9.27013795747067e-07, "loss": 1.6779, "step": 9036 }, { "epoch": 0.9387140334475953, "grad_norm": 0.4809900224208832, "learning_rate": 9.238890361517815e-07, "loss": 1.7196, "step": 9037 }, { "epoch": 0.9388179079671757, "grad_norm": 0.4233071208000183, "learning_rate": 9.20769502779506e-07, "loss": 1.5481, "step": 9038 }, { "epoch": 0.938921782486756, "grad_norm": 0.43219685554504395, "learning_rate": 9.176551959624524e-07, "loss": 1.5561, "step": 9039 }, { "epoch": 0.9390256570063363, "grad_norm": 0.41195639967918396, "learning_rate": 9.145461160322555e-07, "loss": 1.6116, "step": 9040 }, { "epoch": 0.9391295315259167, "grad_norm": 0.3952676057815552, "learning_rate": 9.114422633200226e-07, "loss": 1.5454, "step": 9041 }, { "epoch": 0.939233406045497, "grad_norm": 0.41172322630882263, "learning_rate": 9.083436381562838e-07, "loss": 1.6116, "step": 9042 }, { "epoch": 0.9393372805650774, "grad_norm": 0.4490038752555847, "learning_rate": 9.05250240871014e-07, "loss": 1.7028, "step": 9043 }, { "epoch": 0.9394411550846578, "grad_norm": 0.46638983488082886, "learning_rate": 9.021620717936385e-07, "loss": 1.7121, "step": 9044 }, { "epoch": 0.9395450296042381, "grad_norm": 0.42944493889808655, "learning_rate": 8.990791312530222e-07, "loss": 1.6753, "step": 9045 }, { "epoch": 0.9396489041238184, "grad_norm": 0.4450928270816803, "learning_rate": 8.960014195774857e-07, "loss": 1.7523, "step": 9046 }, { "epoch": 0.9397527786433988, "grad_norm": 0.3949832022190094, "learning_rate": 8.929289370947669e-07, "loss": 1.5365, "step": 9047 }, { "epoch": 0.9398566531629792, "grad_norm": 0.41552335023880005, "learning_rate": 8.89861684132065e-07, "loss": 1.7463, "step": 9048 }, { "epoch": 0.9399605276825594, "grad_norm": 0.40294694900512695, "learning_rate": 8.867996610160246e-07, "loss": 1.6094, "step": 9049 }, { "epoch": 0.9400644022021398, "grad_norm": 0.43670380115509033, "learning_rate": 8.837428680727178e-07, "loss": 1.7627, "step": 9050 }, { "epoch": 0.9401682767217202, "grad_norm": 0.4421059787273407, "learning_rate": 8.806913056276788e-07, "loss": 1.7939, "step": 9051 }, { "epoch": 0.9402721512413005, "grad_norm": 0.48101523518562317, "learning_rate": 8.776449740058645e-07, "loss": 1.7245, "step": 9052 }, { "epoch": 0.9403760257608809, "grad_norm": 0.41619133949279785, "learning_rate": 8.746038735316931e-07, "loss": 1.5208, "step": 9053 }, { "epoch": 0.9404799002804612, "grad_norm": 0.46501943469047546, "learning_rate": 8.715680045290165e-07, "loss": 1.8578, "step": 9054 }, { "epoch": 0.9405837748000415, "grad_norm": 0.443288654088974, "learning_rate": 8.685373673211372e-07, "loss": 1.6814, "step": 9055 }, { "epoch": 0.9406876493196219, "grad_norm": 0.39399245381355286, "learning_rate": 8.655119622307806e-07, "loss": 1.327, "step": 9056 }, { "epoch": 0.9407915238392023, "grad_norm": 0.40905261039733887, "learning_rate": 8.624917895801388e-07, "loss": 1.6839, "step": 9057 }, { "epoch": 0.9408953983587826, "grad_norm": 0.4420108199119568, "learning_rate": 8.594768496908378e-07, "loss": 1.5596, "step": 9058 }, { "epoch": 0.9409992728783629, "grad_norm": 0.3948979675769806, "learning_rate": 8.564671428839321e-07, "loss": 1.5437, "step": 9059 }, { "epoch": 0.9411031473979433, "grad_norm": 0.415421724319458, "learning_rate": 8.534626694799485e-07, "loss": 1.5591, "step": 9060 }, { "epoch": 0.9412070219175236, "grad_norm": 0.41926440596580505, "learning_rate": 8.504634297988312e-07, "loss": 1.7288, "step": 9061 }, { "epoch": 0.941310896437104, "grad_norm": 0.3910306990146637, "learning_rate": 8.4746942415998e-07, "loss": 1.5736, "step": 9062 }, { "epoch": 0.9414147709566844, "grad_norm": 0.4041973054409027, "learning_rate": 8.444806528822291e-07, "loss": 1.5708, "step": 9063 }, { "epoch": 0.9415186454762646, "grad_norm": 0.423279732465744, "learning_rate": 8.41497116283857e-07, "loss": 1.6112, "step": 9064 }, { "epoch": 0.941622519995845, "grad_norm": 0.4005332589149475, "learning_rate": 8.385188146825984e-07, "loss": 1.6238, "step": 9065 }, { "epoch": 0.9417263945154254, "grad_norm": 0.4571692943572998, "learning_rate": 8.355457483955998e-07, "loss": 1.8539, "step": 9066 }, { "epoch": 0.9418302690350057, "grad_norm": 0.4386880695819855, "learning_rate": 8.325779177394965e-07, "loss": 1.709, "step": 9067 }, { "epoch": 0.941934143554586, "grad_norm": 0.4796215891838074, "learning_rate": 8.296153230303139e-07, "loss": 1.8527, "step": 9068 }, { "epoch": 0.9420380180741664, "grad_norm": 0.40631356835365295, "learning_rate": 8.266579645835548e-07, "loss": 1.4964, "step": 9069 }, { "epoch": 0.9421418925937467, "grad_norm": 0.40857720375061035, "learning_rate": 8.237058427141619e-07, "loss": 1.5452, "step": 9070 }, { "epoch": 0.9422457671133271, "grad_norm": 0.4160623550415039, "learning_rate": 8.20758957736506e-07, "loss": 1.6578, "step": 9071 }, { "epoch": 0.9423496416329075, "grad_norm": 0.4103771448135376, "learning_rate": 8.178173099644082e-07, "loss": 1.4836, "step": 9072 }, { "epoch": 0.9424535161524878, "grad_norm": 0.39245137572288513, "learning_rate": 8.148808997111234e-07, "loss": 1.4885, "step": 9073 }, { "epoch": 0.9425573906720681, "grad_norm": 0.4215735197067261, "learning_rate": 8.119497272893739e-07, "loss": 1.4961, "step": 9074 }, { "epoch": 0.9426612651916485, "grad_norm": 0.41241350769996643, "learning_rate": 8.090237930112932e-07, "loss": 1.7644, "step": 9075 }, { "epoch": 0.9427651397112289, "grad_norm": 0.41640931367874146, "learning_rate": 8.061030971884709e-07, "loss": 1.7283, "step": 9076 }, { "epoch": 0.9428690142308092, "grad_norm": 0.42004087567329407, "learning_rate": 8.031876401319471e-07, "loss": 1.7438, "step": 9077 }, { "epoch": 0.9429728887503895, "grad_norm": 0.4231506884098053, "learning_rate": 8.002774221521903e-07, "loss": 1.5288, "step": 9078 }, { "epoch": 0.9430767632699699, "grad_norm": 0.4708472192287445, "learning_rate": 7.973724435591135e-07, "loss": 1.8106, "step": 9079 }, { "epoch": 0.9431806377895502, "grad_norm": 0.4298073649406433, "learning_rate": 7.944727046620748e-07, "loss": 1.7294, "step": 9080 }, { "epoch": 0.9432845123091306, "grad_norm": 0.44677823781967163, "learning_rate": 7.915782057698772e-07, "loss": 1.6364, "step": 9081 }, { "epoch": 0.943388386828711, "grad_norm": 0.4536736011505127, "learning_rate": 7.886889471907521e-07, "loss": 1.7965, "step": 9082 }, { "epoch": 0.9434922613482912, "grad_norm": 0.4077182114124298, "learning_rate": 7.858049292324032e-07, "loss": 1.6954, "step": 9083 }, { "epoch": 0.9435961358678716, "grad_norm": 0.4194972813129425, "learning_rate": 7.829261522019349e-07, "loss": 1.5585, "step": 9084 }, { "epoch": 0.943700010387452, "grad_norm": 0.4863661825656891, "learning_rate": 7.800526164059296e-07, "loss": 1.6892, "step": 9085 }, { "epoch": 0.9438038849070323, "grad_norm": 0.49641919136047363, "learning_rate": 7.771843221503927e-07, "loss": 1.816, "step": 9086 }, { "epoch": 0.9439077594266126, "grad_norm": 0.3937380015850067, "learning_rate": 7.743212697407631e-07, "loss": 1.5559, "step": 9087 }, { "epoch": 0.944011633946193, "grad_norm": 0.45555490255355835, "learning_rate": 7.71463459481947e-07, "loss": 1.7509, "step": 9088 }, { "epoch": 0.9441155084657733, "grad_norm": 0.3873693645000458, "learning_rate": 7.686108916782786e-07, "loss": 1.4442, "step": 9089 }, { "epoch": 0.9442193829853537, "grad_norm": 0.4046841859817505, "learning_rate": 7.657635666335316e-07, "loss": 1.5161, "step": 9090 }, { "epoch": 0.9443232575049341, "grad_norm": 0.41723448038101196, "learning_rate": 7.629214846509191e-07, "loss": 1.7279, "step": 9091 }, { "epoch": 0.9444271320245143, "grad_norm": 0.39067700505256653, "learning_rate": 7.600846460331046e-07, "loss": 1.6004, "step": 9092 }, { "epoch": 0.9445310065440947, "grad_norm": 0.434877872467041, "learning_rate": 7.572530510821907e-07, "loss": 1.6926, "step": 9093 }, { "epoch": 0.9446348810636751, "grad_norm": 0.40955984592437744, "learning_rate": 7.544267000997141e-07, "loss": 1.6929, "step": 9094 }, { "epoch": 0.9447387555832554, "grad_norm": 0.4047926962375641, "learning_rate": 7.516055933866728e-07, "loss": 1.6725, "step": 9095 }, { "epoch": 0.9448426301028358, "grad_norm": 0.42006105184555054, "learning_rate": 7.487897312434711e-07, "loss": 1.76, "step": 9096 }, { "epoch": 0.9449465046224161, "grad_norm": 0.4147946536540985, "learning_rate": 7.459791139699968e-07, "loss": 1.6635, "step": 9097 }, { "epoch": 0.9450503791419965, "grad_norm": 0.440835565328598, "learning_rate": 7.431737418655494e-07, "loss": 1.7047, "step": 9098 }, { "epoch": 0.9451542536615768, "grad_norm": 0.38934168219566345, "learning_rate": 7.403736152288842e-07, "loss": 1.4739, "step": 9099 }, { "epoch": 0.9452581281811572, "grad_norm": 0.45924264192581177, "learning_rate": 7.375787343581852e-07, "loss": 1.8124, "step": 9100 }, { "epoch": 0.9453620027007376, "grad_norm": 0.45659106969833374, "learning_rate": 7.347890995510864e-07, "loss": 1.7298, "step": 9101 }, { "epoch": 0.9454658772203178, "grad_norm": 0.4247036278247833, "learning_rate": 7.320047111046724e-07, "loss": 1.6317, "step": 9102 }, { "epoch": 0.9455697517398982, "grad_norm": 0.41282743215560913, "learning_rate": 7.292255693154449e-07, "loss": 1.6771, "step": 9103 }, { "epoch": 0.9456736262594786, "grad_norm": 0.41268548369407654, "learning_rate": 7.264516744793671e-07, "loss": 1.6163, "step": 9104 }, { "epoch": 0.9457775007790589, "grad_norm": 0.3944796621799469, "learning_rate": 7.236830268918415e-07, "loss": 1.6949, "step": 9105 }, { "epoch": 0.9458813752986392, "grad_norm": 0.4076094925403595, "learning_rate": 7.209196268477047e-07, "loss": 1.5172, "step": 9106 }, { "epoch": 0.9459852498182196, "grad_norm": 0.41099345684051514, "learning_rate": 7.181614746412324e-07, "loss": 1.5826, "step": 9107 }, { "epoch": 0.9460891243377999, "grad_norm": 0.5668470859527588, "learning_rate": 7.154085705661506e-07, "loss": 1.8974, "step": 9108 }, { "epoch": 0.9461929988573803, "grad_norm": 0.4689236879348755, "learning_rate": 7.126609149156249e-07, "loss": 1.7432, "step": 9109 }, { "epoch": 0.9462968733769607, "grad_norm": 0.42818373441696167, "learning_rate": 7.09918507982249e-07, "loss": 1.614, "step": 9110 }, { "epoch": 0.9464007478965409, "grad_norm": 0.40631645917892456, "learning_rate": 7.071813500580837e-07, "loss": 1.6488, "step": 9111 }, { "epoch": 0.9465046224161213, "grad_norm": 0.38255685567855835, "learning_rate": 7.044494414346015e-07, "loss": 1.5024, "step": 9112 }, { "epoch": 0.9466084969357017, "grad_norm": 0.42258989810943604, "learning_rate": 7.017227824027305e-07, "loss": 1.6628, "step": 9113 }, { "epoch": 0.946712371455282, "grad_norm": 0.40682870149612427, "learning_rate": 6.990013732528444e-07, "loss": 1.6693, "step": 9114 }, { "epoch": 0.9468162459748624, "grad_norm": 0.4137510657310486, "learning_rate": 6.9628521427475e-07, "loss": 1.5755, "step": 9115 }, { "epoch": 0.9469201204944427, "grad_norm": 0.446568101644516, "learning_rate": 6.935743057576938e-07, "loss": 1.6624, "step": 9116 }, { "epoch": 0.947023995014023, "grad_norm": 0.41516193747520447, "learning_rate": 6.908686479903725e-07, "loss": 1.6738, "step": 9117 }, { "epoch": 0.9471278695336034, "grad_norm": 0.4611627459526062, "learning_rate": 6.881682412609114e-07, "loss": 1.7311, "step": 9118 }, { "epoch": 0.9472317440531838, "grad_norm": 0.5270797610282898, "learning_rate": 6.854730858568914e-07, "loss": 1.7126, "step": 9119 }, { "epoch": 0.947335618572764, "grad_norm": 0.41994228959083557, "learning_rate": 6.827831820653163e-07, "loss": 1.7939, "step": 9120 }, { "epoch": 0.9474394930923444, "grad_norm": 0.44093456864356995, "learning_rate": 6.800985301726404e-07, "loss": 1.6198, "step": 9121 }, { "epoch": 0.9475433676119248, "grad_norm": 0.4290820360183716, "learning_rate": 6.77419130464757e-07, "loss": 1.73, "step": 9122 }, { "epoch": 0.9476472421315052, "grad_norm": 0.4182687997817993, "learning_rate": 6.747449832270158e-07, "loss": 1.6677, "step": 9123 }, { "epoch": 0.9477511166510855, "grad_norm": 0.4348759949207306, "learning_rate": 6.720760887441779e-07, "loss": 1.8017, "step": 9124 }, { "epoch": 0.9478549911706659, "grad_norm": 0.40973037481307983, "learning_rate": 6.694124473004604e-07, "loss": 1.6173, "step": 9125 }, { "epoch": 0.9479588656902462, "grad_norm": 0.4033758342266083, "learning_rate": 6.667540591795307e-07, "loss": 1.5386, "step": 9126 }, { "epoch": 0.9480627402098265, "grad_norm": 0.4231725037097931, "learning_rate": 6.641009246644736e-07, "loss": 1.652, "step": 9127 }, { "epoch": 0.9481666147294069, "grad_norm": 0.40688326954841614, "learning_rate": 6.614530440378352e-07, "loss": 1.6695, "step": 9128 }, { "epoch": 0.9482704892489873, "grad_norm": 0.38211679458618164, "learning_rate": 6.588104175815901e-07, "loss": 1.4878, "step": 9129 }, { "epoch": 0.9483743637685675, "grad_norm": 0.4489685595035553, "learning_rate": 6.561730455771686e-07, "loss": 1.6489, "step": 9130 }, { "epoch": 0.9484782382881479, "grad_norm": 0.4730764627456665, "learning_rate": 6.535409283054073e-07, "loss": 1.6737, "step": 9131 }, { "epoch": 0.9485821128077283, "grad_norm": 0.38945716619491577, "learning_rate": 6.509140660466318e-07, "loss": 1.6735, "step": 9132 }, { "epoch": 0.9486859873273086, "grad_norm": 0.4208957254886627, "learning_rate": 6.482924590805627e-07, "loss": 1.6911, "step": 9133 }, { "epoch": 0.948789861846889, "grad_norm": 0.5277408361434937, "learning_rate": 6.456761076863937e-07, "loss": 1.5338, "step": 9134 }, { "epoch": 0.9488937363664693, "grad_norm": 0.39843320846557617, "learning_rate": 6.430650121427406e-07, "loss": 1.5234, "step": 9135 }, { "epoch": 0.9489976108860496, "grad_norm": 0.42443880438804626, "learning_rate": 6.404591727276588e-07, "loss": 1.7075, "step": 9136 }, { "epoch": 0.94910148540563, "grad_norm": 0.4635830223560333, "learning_rate": 6.378585897186651e-07, "loss": 1.6455, "step": 9137 }, { "epoch": 0.9492053599252104, "grad_norm": 0.39471131563186646, "learning_rate": 6.352632633926825e-07, "loss": 1.5482, "step": 9138 }, { "epoch": 0.9493092344447907, "grad_norm": 0.45582547783851624, "learning_rate": 6.326731940261122e-07, "loss": 1.7572, "step": 9139 }, { "epoch": 0.949413108964371, "grad_norm": 0.4490654170513153, "learning_rate": 6.300883818947611e-07, "loss": 1.6723, "step": 9140 }, { "epoch": 0.9495169834839514, "grad_norm": 0.38818129897117615, "learning_rate": 6.275088272738927e-07, "loss": 1.7014, "step": 9141 }, { "epoch": 0.9496208580035317, "grad_norm": 0.4169076383113861, "learning_rate": 6.249345304382259e-07, "loss": 1.6569, "step": 9142 }, { "epoch": 0.9497247325231121, "grad_norm": 0.39485812187194824, "learning_rate": 6.223654916618804e-07, "loss": 1.6363, "step": 9143 }, { "epoch": 0.9498286070426925, "grad_norm": 0.41402456164360046, "learning_rate": 6.19801711218454e-07, "loss": 1.5838, "step": 9144 }, { "epoch": 0.9499324815622727, "grad_norm": 0.4002974331378937, "learning_rate": 6.172431893809616e-07, "loss": 1.4252, "step": 9145 }, { "epoch": 0.9500363560818531, "grad_norm": 0.42501163482666016, "learning_rate": 6.146899264218742e-07, "loss": 1.5948, "step": 9146 }, { "epoch": 0.9501402306014335, "grad_norm": 0.4167088270187378, "learning_rate": 6.121419226130909e-07, "loss": 1.5962, "step": 9147 }, { "epoch": 0.9502441051210138, "grad_norm": 0.42873653769493103, "learning_rate": 6.095991782259447e-07, "loss": 1.6758, "step": 9148 }, { "epoch": 0.9503479796405941, "grad_norm": 0.37248915433883667, "learning_rate": 6.070616935312357e-07, "loss": 1.4357, "step": 9149 }, { "epoch": 0.9504518541601745, "grad_norm": 0.4385361671447754, "learning_rate": 6.045294687991643e-07, "loss": 1.8251, "step": 9150 }, { "epoch": 0.9505557286797549, "grad_norm": 0.44529131054878235, "learning_rate": 6.020025042994148e-07, "loss": 1.8014, "step": 9151 }, { "epoch": 0.9506596031993352, "grad_norm": 0.4091431498527527, "learning_rate": 5.994808003010777e-07, "loss": 1.5917, "step": 9152 }, { "epoch": 0.9507634777189156, "grad_norm": 0.4284379780292511, "learning_rate": 5.969643570726935e-07, "loss": 1.5155, "step": 9153 }, { "epoch": 0.9508673522384959, "grad_norm": 0.4677438735961914, "learning_rate": 5.944531748822535e-07, "loss": 1.489, "step": 9154 }, { "epoch": 0.9509712267580762, "grad_norm": 0.4163029193878174, "learning_rate": 5.91947253997166e-07, "loss": 1.4501, "step": 9155 }, { "epoch": 0.9510751012776566, "grad_norm": 0.41146647930145264, "learning_rate": 5.894465946843008e-07, "loss": 1.5144, "step": 9156 }, { "epoch": 0.951178975797237, "grad_norm": 0.4144413471221924, "learning_rate": 5.869511972099506e-07, "loss": 1.6731, "step": 9157 }, { "epoch": 0.9512828503168173, "grad_norm": 0.4550022482872009, "learning_rate": 5.844610618398693e-07, "loss": 1.7321, "step": 9158 }, { "epoch": 0.9513867248363976, "grad_norm": 0.4146331548690796, "learning_rate": 5.819761888392228e-07, "loss": 1.7712, "step": 9159 }, { "epoch": 0.951490599355978, "grad_norm": 0.4827490746974945, "learning_rate": 5.794965784726436e-07, "loss": 1.7595, "step": 9160 }, { "epoch": 0.9515944738755583, "grad_norm": 0.4096835255622864, "learning_rate": 5.770222310041817e-07, "loss": 1.7085, "step": 9161 }, { "epoch": 0.9516983483951387, "grad_norm": 0.4233892261981964, "learning_rate": 5.745531466973375e-07, "loss": 1.7196, "step": 9162 }, { "epoch": 0.951802222914719, "grad_norm": 0.457246333360672, "learning_rate": 5.720893258150451e-07, "loss": 1.7608, "step": 9163 }, { "epoch": 0.9519060974342993, "grad_norm": 0.40001896023750305, "learning_rate": 5.696307686196944e-07, "loss": 1.6069, "step": 9164 }, { "epoch": 0.9520099719538797, "grad_norm": 0.40769293904304504, "learning_rate": 5.67177475373093e-07, "loss": 1.5777, "step": 9165 }, { "epoch": 0.9521138464734601, "grad_norm": 0.4234029948711395, "learning_rate": 5.647294463364927e-07, "loss": 1.8119, "step": 9166 }, { "epoch": 0.9522177209930404, "grad_norm": 0.4327137768268585, "learning_rate": 5.622866817706074e-07, "loss": 1.5702, "step": 9167 }, { "epoch": 0.9523215955126207, "grad_norm": 0.4188234806060791, "learning_rate": 5.598491819355567e-07, "loss": 1.679, "step": 9168 }, { "epoch": 0.9524254700322011, "grad_norm": 0.44610387086868286, "learning_rate": 5.574169470909163e-07, "loss": 1.6981, "step": 9169 }, { "epoch": 0.9525293445517814, "grad_norm": 0.4079190194606781, "learning_rate": 5.549899774957123e-07, "loss": 1.5524, "step": 9170 }, { "epoch": 0.9526332190713618, "grad_norm": 0.4039902985095978, "learning_rate": 5.52568273408377e-07, "loss": 1.7129, "step": 9171 }, { "epoch": 0.9527370935909422, "grad_norm": 0.4110052287578583, "learning_rate": 5.501518350868262e-07, "loss": 1.5757, "step": 9172 }, { "epoch": 0.9528409681105224, "grad_norm": 0.3944413959980011, "learning_rate": 5.477406627883763e-07, "loss": 1.6487, "step": 9173 }, { "epoch": 0.9529448426301028, "grad_norm": 0.4401375949382782, "learning_rate": 5.453347567698108e-07, "loss": 1.7217, "step": 9174 }, { "epoch": 0.9530487171496832, "grad_norm": 0.40892985463142395, "learning_rate": 5.429341172873248e-07, "loss": 1.644, "step": 9175 }, { "epoch": 0.9531525916692636, "grad_norm": 0.40419328212738037, "learning_rate": 5.405387445965748e-07, "loss": 1.5666, "step": 9176 }, { "epoch": 0.9532564661888439, "grad_norm": 0.4269610643386841, "learning_rate": 5.381486389526569e-07, "loss": 1.5313, "step": 9177 }, { "epoch": 0.9533603407084242, "grad_norm": 0.4391513764858246, "learning_rate": 5.35763800610084e-07, "loss": 1.6146, "step": 9178 }, { "epoch": 0.9534642152280046, "grad_norm": 0.4620340168476105, "learning_rate": 5.333842298228364e-07, "loss": 1.7637, "step": 9179 }, { "epoch": 0.9535680897475849, "grad_norm": 0.44196537137031555, "learning_rate": 5.310099268443114e-07, "loss": 1.7589, "step": 9180 }, { "epoch": 0.9536719642671653, "grad_norm": 0.42510437965393066, "learning_rate": 5.286408919273567e-07, "loss": 1.7186, "step": 9181 }, { "epoch": 0.9537758387867457, "grad_norm": 0.4449099600315094, "learning_rate": 5.262771253242593e-07, "loss": 1.698, "step": 9182 }, { "epoch": 0.9538797133063259, "grad_norm": 0.4278699457645416, "learning_rate": 5.239186272867347e-07, "loss": 1.4785, "step": 9183 }, { "epoch": 0.9539835878259063, "grad_norm": 0.40362176299095154, "learning_rate": 5.215653980659485e-07, "loss": 1.7033, "step": 9184 }, { "epoch": 0.9540874623454867, "grad_norm": 0.4301554262638092, "learning_rate": 5.192174379124948e-07, "loss": 1.6292, "step": 9185 }, { "epoch": 0.954191336865067, "grad_norm": 0.4313088655471802, "learning_rate": 5.16874747076429e-07, "loss": 1.5956, "step": 9186 }, { "epoch": 0.9542952113846473, "grad_norm": 0.43892988562583923, "learning_rate": 5.145373258072129e-07, "loss": 1.7799, "step": 9187 }, { "epoch": 0.9543990859042277, "grad_norm": 0.4435426890850067, "learning_rate": 5.122051743537692e-07, "loss": 1.7143, "step": 9188 }, { "epoch": 0.954502960423808, "grad_norm": 0.4262649416923523, "learning_rate": 5.098782929644607e-07, "loss": 1.5772, "step": 9189 }, { "epoch": 0.9546068349433884, "grad_norm": 0.4397432804107666, "learning_rate": 5.075566818870725e-07, "loss": 1.7397, "step": 9190 }, { "epoch": 0.9547107094629688, "grad_norm": 0.42559805512428284, "learning_rate": 5.0524034136884e-07, "loss": 1.6717, "step": 9191 }, { "epoch": 0.954814583982549, "grad_norm": 0.40015941858291626, "learning_rate": 5.029292716564382e-07, "loss": 1.6091, "step": 9192 }, { "epoch": 0.9549184585021294, "grad_norm": 0.43587568402290344, "learning_rate": 5.006234729959813e-07, "loss": 1.6914, "step": 9193 }, { "epoch": 0.9550223330217098, "grad_norm": 0.40942129492759705, "learning_rate": 4.983229456330063e-07, "loss": 1.5895, "step": 9194 }, { "epoch": 0.9551262075412901, "grad_norm": 0.45830440521240234, "learning_rate": 4.960276898125227e-07, "loss": 1.7004, "step": 9195 }, { "epoch": 0.9552300820608705, "grad_norm": 0.45352327823638916, "learning_rate": 4.93737705778935e-07, "loss": 1.684, "step": 9196 }, { "epoch": 0.9553339565804508, "grad_norm": 0.4297914206981659, "learning_rate": 4.914529937761147e-07, "loss": 1.8311, "step": 9197 }, { "epoch": 0.9554378311000311, "grad_norm": 0.40148767828941345, "learning_rate": 4.891735540473785e-07, "loss": 1.5433, "step": 9198 }, { "epoch": 0.9555417056196115, "grad_norm": 0.433064341545105, "learning_rate": 4.868993868354544e-07, "loss": 1.6219, "step": 9199 }, { "epoch": 0.9556455801391919, "grad_norm": 0.4220326542854309, "learning_rate": 4.846304923825263e-07, "loss": 1.6918, "step": 9200 }, { "epoch": 0.9557494546587723, "grad_norm": 0.43805915117263794, "learning_rate": 4.823668709302176e-07, "loss": 1.5888, "step": 9201 }, { "epoch": 0.9558533291783525, "grad_norm": 0.4448419511318207, "learning_rate": 4.801085227195856e-07, "loss": 1.6003, "step": 9202 }, { "epoch": 0.9559572036979329, "grad_norm": 0.42299073934555054, "learning_rate": 4.778554479911268e-07, "loss": 1.6384, "step": 9203 }, { "epoch": 0.9560610782175133, "grad_norm": 0.405784547328949, "learning_rate": 4.7560764698477146e-07, "loss": 1.6106, "step": 9204 }, { "epoch": 0.9561649527370936, "grad_norm": 0.41542530059814453, "learning_rate": 4.7336511993990586e-07, "loss": 1.6064, "step": 9205 }, { "epoch": 0.956268827256674, "grad_norm": 0.45177707076072693, "learning_rate": 4.711278670953223e-07, "loss": 1.8089, "step": 9206 }, { "epoch": 0.9563727017762543, "grad_norm": 0.45689210295677185, "learning_rate": 4.688958886892858e-07, "loss": 1.5888, "step": 9207 }, { "epoch": 0.9564765762958346, "grad_norm": 0.4608011245727539, "learning_rate": 4.666691849594784e-07, "loss": 1.8167, "step": 9208 }, { "epoch": 0.956580450815415, "grad_norm": 0.40499821305274963, "learning_rate": 4.6444775614302714e-07, "loss": 1.6155, "step": 9209 }, { "epoch": 0.9566843253349954, "grad_norm": 0.3993094265460968, "learning_rate": 4.6223160247650386e-07, "loss": 1.5941, "step": 9210 }, { "epoch": 0.9567881998545756, "grad_norm": 0.4130497872829437, "learning_rate": 4.600207241958976e-07, "loss": 1.5724, "step": 9211 }, { "epoch": 0.956892074374156, "grad_norm": 0.4392036497592926, "learning_rate": 4.5781512153665884e-07, "loss": 1.6075, "step": 9212 }, { "epoch": 0.9569959488937364, "grad_norm": 0.4254901111125946, "learning_rate": 4.5561479473366644e-07, "loss": 1.7026, "step": 9213 }, { "epoch": 0.9570998234133167, "grad_norm": 0.4199539124965668, "learning_rate": 4.53419744021244e-07, "loss": 1.5035, "step": 9214 }, { "epoch": 0.9572036979328971, "grad_norm": 0.4554733633995056, "learning_rate": 4.5122996963313234e-07, "loss": 1.6648, "step": 9215 }, { "epoch": 0.9573075724524774, "grad_norm": 0.4227248430252075, "learning_rate": 4.4904547180253386e-07, "loss": 1.5704, "step": 9216 }, { "epoch": 0.9574114469720577, "grad_norm": 0.37971073389053345, "learning_rate": 4.468662507620847e-07, "loss": 1.5082, "step": 9217 }, { "epoch": 0.9575153214916381, "grad_norm": 0.40242883563041687, "learning_rate": 4.446923067438491e-07, "loss": 1.5791, "step": 9218 }, { "epoch": 0.9576191960112185, "grad_norm": 0.43031686544418335, "learning_rate": 4.425236399793364e-07, "loss": 1.6155, "step": 9219 }, { "epoch": 0.9577230705307987, "grad_norm": 0.4682821035385132, "learning_rate": 4.403602506994897e-07, "loss": 1.7276, "step": 9220 }, { "epoch": 0.9578269450503791, "grad_norm": 0.4437069892883301, "learning_rate": 4.382021391346969e-07, "loss": 1.6508, "step": 9221 }, { "epoch": 0.9579308195699595, "grad_norm": 0.38210955262184143, "learning_rate": 4.360493055147796e-07, "loss": 1.5887, "step": 9222 }, { "epoch": 0.9580346940895398, "grad_norm": 0.4004855751991272, "learning_rate": 4.339017500690046e-07, "loss": 1.4538, "step": 9223 }, { "epoch": 0.9581385686091202, "grad_norm": 0.41363096237182617, "learning_rate": 4.3175947302605547e-07, "loss": 1.6011, "step": 9224 }, { "epoch": 0.9582424431287005, "grad_norm": 0.42075902223587036, "learning_rate": 4.296224746140776e-07, "loss": 1.6147, "step": 9225 }, { "epoch": 0.9583463176482809, "grad_norm": 0.45383062958717346, "learning_rate": 4.2749075506063883e-07, "loss": 1.6448, "step": 9226 }, { "epoch": 0.9584501921678612, "grad_norm": 0.40463367104530334, "learning_rate": 4.253643145927577e-07, "loss": 1.663, "step": 9227 }, { "epoch": 0.9585540666874416, "grad_norm": 0.4023562967777252, "learning_rate": 4.232431534368808e-07, "loss": 1.4755, "step": 9228 }, { "epoch": 0.958657941207022, "grad_norm": 0.4149220883846283, "learning_rate": 4.211272718188941e-07, "loss": 1.6507, "step": 9229 }, { "epoch": 0.9587618157266022, "grad_norm": 0.42307958006858826, "learning_rate": 4.190166699641229e-07, "loss": 1.7877, "step": 9230 }, { "epoch": 0.9588656902461826, "grad_norm": 0.4115198254585266, "learning_rate": 4.169113480973319e-07, "loss": 1.5952, "step": 9231 }, { "epoch": 0.958969564765763, "grad_norm": 0.4087419807910919, "learning_rate": 4.148113064427139e-07, "loss": 1.6713, "step": 9232 }, { "epoch": 0.9590734392853433, "grad_norm": 0.43182262778282166, "learning_rate": 4.127165452239179e-07, "loss": 1.728, "step": 9233 }, { "epoch": 0.9591773138049237, "grad_norm": 0.41632604598999023, "learning_rate": 4.106270646640098e-07, "loss": 1.7331, "step": 9234 }, { "epoch": 0.959281188324504, "grad_norm": 0.4216884970664978, "learning_rate": 4.085428649855061e-07, "loss": 1.7273, "step": 9235 }, { "epoch": 0.9593850628440843, "grad_norm": 0.39997923374176025, "learning_rate": 4.0646394641036255e-07, "loss": 1.6779, "step": 9236 }, { "epoch": 0.9594889373636647, "grad_norm": 0.4439750015735626, "learning_rate": 4.043903091599632e-07, "loss": 1.8105, "step": 9237 }, { "epoch": 0.9595928118832451, "grad_norm": 0.4040851593017578, "learning_rate": 4.02321953455137e-07, "loss": 1.619, "step": 9238 }, { "epoch": 0.9596966864028253, "grad_norm": 0.40160131454467773, "learning_rate": 4.002588795161466e-07, "loss": 1.5693, "step": 9239 }, { "epoch": 0.9598005609224057, "grad_norm": 0.49092456698417664, "learning_rate": 3.9820108756268846e-07, "loss": 1.8386, "step": 9240 }, { "epoch": 0.9599044354419861, "grad_norm": 0.4417165517807007, "learning_rate": 3.9614857781390955e-07, "loss": 1.5334, "step": 9241 }, { "epoch": 0.9600083099615664, "grad_norm": 0.4167068302631378, "learning_rate": 3.941013504883795e-07, "loss": 1.7032, "step": 9242 }, { "epoch": 0.9601121844811468, "grad_norm": 0.42593681812286377, "learning_rate": 3.920594058041183e-07, "loss": 1.741, "step": 9243 }, { "epoch": 0.9602160590007272, "grad_norm": 0.4693407118320465, "learning_rate": 3.9002274397856863e-07, "loss": 1.8403, "step": 9244 }, { "epoch": 0.9603199335203074, "grad_norm": 0.41874200105667114, "learning_rate": 3.8799136522862933e-07, "loss": 1.6718, "step": 9245 }, { "epoch": 0.9604238080398878, "grad_norm": 0.42587071657180786, "learning_rate": 3.859652697706162e-07, "loss": 1.5569, "step": 9246 }, { "epoch": 0.9605276825594682, "grad_norm": 0.4416424334049225, "learning_rate": 3.8394445782030107e-07, "loss": 1.5302, "step": 9247 }, { "epoch": 0.9606315570790485, "grad_norm": 0.4183494746685028, "learning_rate": 3.819289295928785e-07, "loss": 1.6514, "step": 9248 }, { "epoch": 0.9607354315986288, "grad_norm": 0.46521294116973877, "learning_rate": 3.7991868530299345e-07, "loss": 1.5298, "step": 9249 }, { "epoch": 0.9608393061182092, "grad_norm": 0.424907386302948, "learning_rate": 3.7791372516470803e-07, "loss": 1.7081, "step": 9250 }, { "epoch": 0.9609431806377895, "grad_norm": 0.44091981649398804, "learning_rate": 3.759140493915514e-07, "loss": 1.7594, "step": 9251 }, { "epoch": 0.9610470551573699, "grad_norm": 0.4381832778453827, "learning_rate": 3.7391965819646435e-07, "loss": 1.5333, "step": 9252 }, { "epoch": 0.9611509296769503, "grad_norm": 0.41242584586143494, "learning_rate": 3.7193055179183256e-07, "loss": 1.5122, "step": 9253 }, { "epoch": 0.9612548041965306, "grad_norm": 0.4707975685596466, "learning_rate": 3.6994673038948655e-07, "loss": 1.7078, "step": 9254 }, { "epoch": 0.9613586787161109, "grad_norm": 0.49493807554244995, "learning_rate": 3.679681942006852e-07, "loss": 1.5883, "step": 9255 }, { "epoch": 0.9614625532356913, "grad_norm": 0.4006246030330658, "learning_rate": 3.6599494343612095e-07, "loss": 1.6098, "step": 9256 }, { "epoch": 0.9615664277552717, "grad_norm": 0.4326263964176178, "learning_rate": 3.6402697830594243e-07, "loss": 1.6603, "step": 9257 }, { "epoch": 0.961670302274852, "grad_norm": 0.39721041917800903, "learning_rate": 3.6206429901970985e-07, "loss": 1.493, "step": 9258 }, { "epoch": 0.9617741767944323, "grad_norm": 0.41533419489860535, "learning_rate": 3.6010690578643924e-07, "loss": 1.5743, "step": 9259 }, { "epoch": 0.9618780513140127, "grad_norm": 0.3986194431781769, "learning_rate": 3.58154798814575e-07, "loss": 1.4523, "step": 9260 }, { "epoch": 0.961981925833593, "grad_norm": 0.46424034237861633, "learning_rate": 3.56207978312012e-07, "loss": 1.6645, "step": 9261 }, { "epoch": 0.9620858003531734, "grad_norm": 0.44541025161743164, "learning_rate": 3.542664444860566e-07, "loss": 1.5695, "step": 9262 }, { "epoch": 0.9621896748727538, "grad_norm": 0.39570924639701843, "learning_rate": 3.5233019754347117e-07, "loss": 1.5907, "step": 9263 }, { "epoch": 0.962293549392334, "grad_norm": 0.4216559827327728, "learning_rate": 3.503992376904575e-07, "loss": 1.7391, "step": 9264 }, { "epoch": 0.9623974239119144, "grad_norm": 0.3937249481678009, "learning_rate": 3.4847356513263986e-07, "loss": 1.6637, "step": 9265 }, { "epoch": 0.9625012984314948, "grad_norm": 0.43053656816482544, "learning_rate": 3.4655318007509316e-07, "loss": 1.5119, "step": 9266 }, { "epoch": 0.9626051729510751, "grad_norm": 0.423225998878479, "learning_rate": 3.4463808272232055e-07, "loss": 1.5577, "step": 9267 }, { "epoch": 0.9627090474706554, "grad_norm": 0.3969104588031769, "learning_rate": 3.427282732782644e-07, "loss": 1.6157, "step": 9268 }, { "epoch": 0.9628129219902358, "grad_norm": 0.42695853114128113, "learning_rate": 3.4082375194630643e-07, "loss": 1.5988, "step": 9269 }, { "epoch": 0.9629167965098161, "grad_norm": 0.4294392764568329, "learning_rate": 3.3892451892926227e-07, "loss": 1.5232, "step": 9270 }, { "epoch": 0.9630206710293965, "grad_norm": 0.45847055315971375, "learning_rate": 3.370305744293867e-07, "loss": 1.7633, "step": 9271 }, { "epoch": 0.9631245455489769, "grad_norm": 0.4628918170928955, "learning_rate": 3.351419186483684e-07, "loss": 1.7437, "step": 9272 }, { "epoch": 0.9632284200685571, "grad_norm": 0.40736517310142517, "learning_rate": 3.3325855178733543e-07, "loss": 1.6749, "step": 9273 }, { "epoch": 0.9633322945881375, "grad_norm": 0.43378880620002747, "learning_rate": 3.3138047404684957e-07, "loss": 1.7485, "step": 9274 }, { "epoch": 0.9634361691077179, "grad_norm": 0.40924814343452454, "learning_rate": 3.295076856269119e-07, "loss": 1.6725, "step": 9275 }, { "epoch": 0.9635400436272982, "grad_norm": 0.40728503465652466, "learning_rate": 3.2764018672696293e-07, "loss": 1.6062, "step": 9276 }, { "epoch": 0.9636439181468786, "grad_norm": 0.45276060700416565, "learning_rate": 3.257779775458769e-07, "loss": 1.7222, "step": 9277 }, { "epoch": 0.9637477926664589, "grad_norm": 0.4360550045967102, "learning_rate": 3.239210582819563e-07, "loss": 1.7827, "step": 9278 }, { "epoch": 0.9638516671860393, "grad_norm": 0.46878498792648315, "learning_rate": 3.220694291329596e-07, "loss": 1.7343, "step": 9279 }, { "epoch": 0.9639555417056196, "grad_norm": 0.4349597096443176, "learning_rate": 3.202230902960679e-07, "loss": 1.3551, "step": 9280 }, { "epoch": 0.9640594162252, "grad_norm": 0.4269922971725464, "learning_rate": 3.1838204196789625e-07, "loss": 1.5912, "step": 9281 }, { "epoch": 0.9641632907447804, "grad_norm": 0.42639386653900146, "learning_rate": 3.1654628434450437e-07, "loss": 1.6366, "step": 9282 }, { "epoch": 0.9642671652643606, "grad_norm": 0.41460925340652466, "learning_rate": 3.147158176213916e-07, "loss": 1.5546, "step": 9283 }, { "epoch": 0.964371039783941, "grad_norm": 0.4272399842739105, "learning_rate": 3.1289064199347964e-07, "loss": 1.6533, "step": 9284 }, { "epoch": 0.9644749143035214, "grad_norm": 0.40159735083580017, "learning_rate": 3.110707576551408e-07, "loss": 1.7311, "step": 9285 }, { "epoch": 0.9645787888231017, "grad_norm": 0.40594831109046936, "learning_rate": 3.092561648001813e-07, "loss": 1.534, "step": 9286 }, { "epoch": 0.964682663342682, "grad_norm": 0.4097544550895691, "learning_rate": 3.074468636218353e-07, "loss": 1.6231, "step": 9287 }, { "epoch": 0.9647865378622624, "grad_norm": 0.40390005707740784, "learning_rate": 3.056428543127765e-07, "loss": 1.6062, "step": 9288 }, { "epoch": 0.9648904123818427, "grad_norm": 0.4110979735851288, "learning_rate": 3.038441370651346e-07, "loss": 1.6621, "step": 9289 }, { "epoch": 0.9649942869014231, "grad_norm": 0.4974674582481384, "learning_rate": 3.020507120704341e-07, "loss": 1.8125, "step": 9290 }, { "epoch": 0.9650981614210035, "grad_norm": 0.45189183950424194, "learning_rate": 3.002625795196834e-07, "loss": 1.6262, "step": 9291 }, { "epoch": 0.9652020359405837, "grad_norm": 0.4480794668197632, "learning_rate": 2.9847973960329124e-07, "loss": 1.6016, "step": 9292 }, { "epoch": 0.9653059104601641, "grad_norm": 0.4347269535064697, "learning_rate": 2.967021925111224e-07, "loss": 1.9229, "step": 9293 }, { "epoch": 0.9654097849797445, "grad_norm": 0.4739961326122284, "learning_rate": 2.949299384324644e-07, "loss": 1.5661, "step": 9294 }, { "epoch": 0.9655136594993248, "grad_norm": 0.39016592502593994, "learning_rate": 2.931629775560607e-07, "loss": 1.542, "step": 9295 }, { "epoch": 0.9656175340189052, "grad_norm": 0.42430630326271057, "learning_rate": 2.914013100700663e-07, "loss": 1.4694, "step": 9296 }, { "epoch": 0.9657214085384855, "grad_norm": 0.4428417682647705, "learning_rate": 2.896449361620923e-07, "loss": 1.7179, "step": 9297 }, { "epoch": 0.9658252830580658, "grad_norm": 0.4339398145675659, "learning_rate": 2.8789385601917797e-07, "loss": 1.6341, "step": 9298 }, { "epoch": 0.9659291575776462, "grad_norm": 0.4497127830982208, "learning_rate": 2.861480698277963e-07, "loss": 1.8256, "step": 9299 }, { "epoch": 0.9660330320972266, "grad_norm": 0.47623205184936523, "learning_rate": 2.844075777738597e-07, "loss": 1.8018, "step": 9300 }, { "epoch": 0.9661369066168068, "grad_norm": 0.4075833261013031, "learning_rate": 2.8267238004272554e-07, "loss": 1.6079, "step": 9301 }, { "epoch": 0.9662407811363872, "grad_norm": 0.44640010595321655, "learning_rate": 2.8094247681916817e-07, "loss": 1.7133, "step": 9302 }, { "epoch": 0.9663446556559676, "grad_norm": 0.4296375811100006, "learning_rate": 2.79217868287418e-07, "loss": 1.6657, "step": 9303 }, { "epoch": 0.966448530175548, "grad_norm": 0.40996262431144714, "learning_rate": 2.7749855463112265e-07, "loss": 1.6412, "step": 9304 }, { "epoch": 0.9665524046951283, "grad_norm": 0.45529499650001526, "learning_rate": 2.757845360333855e-07, "loss": 1.6247, "step": 9305 }, { "epoch": 0.9666562792147086, "grad_norm": 0.4100009500980377, "learning_rate": 2.740758126767273e-07, "loss": 1.6921, "step": 9306 }, { "epoch": 0.966760153734289, "grad_norm": 0.49910518527030945, "learning_rate": 2.723723847431192e-07, "loss": 1.9038, "step": 9307 }, { "epoch": 0.9668640282538693, "grad_norm": 0.42856353521347046, "learning_rate": 2.7067425241396606e-07, "loss": 1.6905, "step": 9308 }, { "epoch": 0.9669679027734497, "grad_norm": 0.4085339605808258, "learning_rate": 2.689814158700954e-07, "loss": 1.5855, "step": 9309 }, { "epoch": 0.9670717772930301, "grad_norm": 0.3960736095905304, "learning_rate": 2.672938752917908e-07, "loss": 1.5288, "step": 9310 }, { "epoch": 0.9671756518126103, "grad_norm": 0.4568417966365814, "learning_rate": 2.6561163085875863e-07, "loss": 1.6313, "step": 9311 }, { "epoch": 0.9672795263321907, "grad_norm": 0.39653193950653076, "learning_rate": 2.6393468275014434e-07, "loss": 1.6358, "step": 9312 }, { "epoch": 0.9673834008517711, "grad_norm": 0.436946302652359, "learning_rate": 2.6226303114452733e-07, "loss": 1.8352, "step": 9313 }, { "epoch": 0.9674872753713514, "grad_norm": 0.4774184226989746, "learning_rate": 2.605966762199319e-07, "loss": 1.7763, "step": 9314 }, { "epoch": 0.9675911498909318, "grad_norm": 0.40284937620162964, "learning_rate": 2.58935618153805e-07, "loss": 1.6718, "step": 9315 }, { "epoch": 0.9676950244105121, "grad_norm": 0.4363865852355957, "learning_rate": 2.5727985712303836e-07, "loss": 1.7835, "step": 9316 }, { "epoch": 0.9677988989300924, "grad_norm": 0.42425060272216797, "learning_rate": 2.5562939330396327e-07, "loss": 1.7533, "step": 9317 }, { "epoch": 0.9679027734496728, "grad_norm": 0.4236680567264557, "learning_rate": 2.539842268723336e-07, "loss": 1.5438, "step": 9318 }, { "epoch": 0.9680066479692532, "grad_norm": 0.41327956318855286, "learning_rate": 2.5234435800335354e-07, "loss": 1.6705, "step": 9319 }, { "epoch": 0.9681105224888334, "grad_norm": 0.4201132655143738, "learning_rate": 2.507097868716501e-07, "loss": 1.7153, "step": 9320 }, { "epoch": 0.9682143970084138, "grad_norm": 0.45233723521232605, "learning_rate": 2.4908051365129524e-07, "loss": 1.716, "step": 9321 }, { "epoch": 0.9683182715279942, "grad_norm": 0.4188010096549988, "learning_rate": 2.4745653851579453e-07, "loss": 1.6264, "step": 9322 }, { "epoch": 0.9684221460475745, "grad_norm": 0.4535348415374756, "learning_rate": 2.458378616380874e-07, "loss": 1.8135, "step": 9323 }, { "epoch": 0.9685260205671549, "grad_norm": 0.5156634449958801, "learning_rate": 2.442244831905471e-07, "loss": 1.8665, "step": 9324 }, { "epoch": 0.9686298950867352, "grad_norm": 0.41018423438072205, "learning_rate": 2.426164033449918e-07, "loss": 1.6818, "step": 9325 }, { "epoch": 0.9687337696063155, "grad_norm": 0.4227202534675598, "learning_rate": 2.4101362227266776e-07, "loss": 1.5506, "step": 9326 }, { "epoch": 0.9688376441258959, "grad_norm": 0.4453946352005005, "learning_rate": 2.394161401442552e-07, "loss": 1.8326, "step": 9327 }, { "epoch": 0.9689415186454763, "grad_norm": 0.43642252683639526, "learning_rate": 2.3782395712987903e-07, "loss": 1.8348, "step": 9328 }, { "epoch": 0.9690453931650566, "grad_norm": 0.4805013835430145, "learning_rate": 2.3623707339909262e-07, "loss": 1.7733, "step": 9329 }, { "epoch": 0.9691492676846369, "grad_norm": 0.41811105608940125, "learning_rate": 2.3465548912088296e-07, "loss": 1.5234, "step": 9330 }, { "epoch": 0.9692531422042173, "grad_norm": 0.39481353759765625, "learning_rate": 2.3307920446368203e-07, "loss": 1.3639, "step": 9331 }, { "epoch": 0.9693570167237977, "grad_norm": 0.43810319900512695, "learning_rate": 2.3150821959534442e-07, "loss": 1.6738, "step": 9332 }, { "epoch": 0.969460891243378, "grad_norm": 0.3864077627658844, "learning_rate": 2.299425346831807e-07, "loss": 1.5251, "step": 9333 }, { "epoch": 0.9695647657629584, "grad_norm": 0.42563048005104065, "learning_rate": 2.2838214989390762e-07, "loss": 1.6638, "step": 9334 }, { "epoch": 0.9696686402825387, "grad_norm": 0.4011988341808319, "learning_rate": 2.2682706539370324e-07, "loss": 1.4951, "step": 9335 }, { "epoch": 0.969772514802119, "grad_norm": 0.4599732458591461, "learning_rate": 2.2527728134817406e-07, "loss": 1.7103, "step": 9336 }, { "epoch": 0.9698763893216994, "grad_norm": 0.3994980752468109, "learning_rate": 2.2373279792235468e-07, "loss": 1.6718, "step": 9337 }, { "epoch": 0.9699802638412798, "grad_norm": 0.42097207903862, "learning_rate": 2.2219361528071912e-07, "loss": 1.6202, "step": 9338 }, { "epoch": 0.97008413836086, "grad_norm": 0.4318835139274597, "learning_rate": 2.2065973358718627e-07, "loss": 1.5318, "step": 9339 }, { "epoch": 0.9701880128804404, "grad_norm": 0.4295785129070282, "learning_rate": 2.191311530050977e-07, "loss": 1.6927, "step": 9340 }, { "epoch": 0.9702918874000208, "grad_norm": 0.4130423367023468, "learning_rate": 2.1760787369723423e-07, "loss": 1.6946, "step": 9341 }, { "epoch": 0.9703957619196011, "grad_norm": 0.43795621395111084, "learning_rate": 2.160898958258162e-07, "loss": 1.4909, "step": 9342 }, { "epoch": 0.9704996364391815, "grad_norm": 0.42696115374565125, "learning_rate": 2.14577219552492e-07, "loss": 1.5696, "step": 9343 }, { "epoch": 0.9706035109587619, "grad_norm": 0.4208974242210388, "learning_rate": 2.1306984503835504e-07, "loss": 1.5663, "step": 9344 }, { "epoch": 0.9707073854783421, "grad_norm": 0.4519440233707428, "learning_rate": 2.115677724439269e-07, "loss": 1.7372, "step": 9345 }, { "epoch": 0.9708112599979225, "grad_norm": 0.4242643415927887, "learning_rate": 2.1007100192916296e-07, "loss": 1.6569, "step": 9346 }, { "epoch": 0.9709151345175029, "grad_norm": 0.41533952951431274, "learning_rate": 2.085795336534635e-07, "loss": 1.5835, "step": 9347 }, { "epoch": 0.9710190090370832, "grad_norm": 0.4785119593143463, "learning_rate": 2.0709336777565702e-07, "loss": 1.7689, "step": 9348 }, { "epoch": 0.9711228835566635, "grad_norm": 0.41797420382499695, "learning_rate": 2.0561250445400583e-07, "loss": 1.6882, "step": 9349 }, { "epoch": 0.9712267580762439, "grad_norm": 0.4142938256263733, "learning_rate": 2.041369438462115e-07, "loss": 1.7222, "step": 9350 }, { "epoch": 0.9713306325958242, "grad_norm": 0.3736826181411743, "learning_rate": 2.0266668610940952e-07, "loss": 1.4399, "step": 9351 }, { "epoch": 0.9714345071154046, "grad_norm": 0.4269476532936096, "learning_rate": 2.012017314001746e-07, "loss": 1.4723, "step": 9352 }, { "epoch": 0.971538381634985, "grad_norm": 0.43131187558174133, "learning_rate": 1.9974207987450422e-07, "loss": 1.739, "step": 9353 }, { "epoch": 0.9716422561545652, "grad_norm": 0.4340756833553314, "learning_rate": 1.9828773168785175e-07, "loss": 1.7562, "step": 9354 }, { "epoch": 0.9717461306741456, "grad_norm": 0.46568766236305237, "learning_rate": 1.9683868699508224e-07, "loss": 1.6564, "step": 9355 }, { "epoch": 0.971850005193726, "grad_norm": 0.438309907913208, "learning_rate": 1.9539494595051112e-07, "loss": 1.6676, "step": 9356 }, { "epoch": 0.9719538797133064, "grad_norm": 0.40006324648857117, "learning_rate": 1.9395650870789316e-07, "loss": 1.6582, "step": 9357 }, { "epoch": 0.9720577542328867, "grad_norm": 0.4404867887496948, "learning_rate": 1.9252337542040034e-07, "loss": 1.7257, "step": 9358 }, { "epoch": 0.972161628752467, "grad_norm": 0.4128919541835785, "learning_rate": 1.910955462406605e-07, "loss": 1.6339, "step": 9359 }, { "epoch": 0.9722655032720474, "grad_norm": 0.47197067737579346, "learning_rate": 1.8967302132071317e-07, "loss": 1.5516, "step": 9360 }, { "epoch": 0.9723693777916277, "grad_norm": 0.46007785201072693, "learning_rate": 1.882558008120594e-07, "loss": 1.6057, "step": 9361 }, { "epoch": 0.9724732523112081, "grad_norm": 0.4223778247833252, "learning_rate": 1.8684388486561733e-07, "loss": 1.6723, "step": 9362 }, { "epoch": 0.9725771268307885, "grad_norm": 0.40712058544158936, "learning_rate": 1.854372736317389e-07, "loss": 1.5695, "step": 9363 }, { "epoch": 0.9726810013503687, "grad_norm": 0.4485786557197571, "learning_rate": 1.8403596726022653e-07, "loss": 1.4115, "step": 9364 }, { "epoch": 0.9727848758699491, "grad_norm": 0.44639256596565247, "learning_rate": 1.8263996590029976e-07, "loss": 1.6466, "step": 9365 }, { "epoch": 0.9728887503895295, "grad_norm": 0.4222765862941742, "learning_rate": 1.812492697006285e-07, "loss": 1.5191, "step": 9366 }, { "epoch": 0.9729926249091098, "grad_norm": 0.4526502192020416, "learning_rate": 1.79863878809311e-07, "loss": 1.8303, "step": 9367 }, { "epoch": 0.9730964994286901, "grad_norm": 0.3991301655769348, "learning_rate": 1.7848379337387922e-07, "loss": 1.6194, "step": 9368 }, { "epoch": 0.9732003739482705, "grad_norm": 0.43972858786582947, "learning_rate": 1.7710901354129894e-07, "loss": 1.6873, "step": 9369 }, { "epoch": 0.9733042484678508, "grad_norm": 0.42031508684158325, "learning_rate": 1.7573953945796973e-07, "loss": 1.7127, "step": 9370 }, { "epoch": 0.9734081229874312, "grad_norm": 0.40002334117889404, "learning_rate": 1.7437537126974158e-07, "loss": 1.4936, "step": 9371 }, { "epoch": 0.9735119975070116, "grad_norm": 0.4212174713611603, "learning_rate": 1.7301650912188162e-07, "loss": 1.7144, "step": 9372 }, { "epoch": 0.9736158720265918, "grad_norm": 0.43451911211013794, "learning_rate": 1.716629531590963e-07, "loss": 1.7131, "step": 9373 }, { "epoch": 0.9737197465461722, "grad_norm": 0.40921545028686523, "learning_rate": 1.7031470352553146e-07, "loss": 1.4595, "step": 9374 }, { "epoch": 0.9738236210657526, "grad_norm": 0.41354814171791077, "learning_rate": 1.6897176036476115e-07, "loss": 1.6428, "step": 9375 }, { "epoch": 0.9739274955853329, "grad_norm": 0.4084813892841339, "learning_rate": 1.6763412381980425e-07, "loss": 1.6586, "step": 9376 }, { "epoch": 0.9740313701049133, "grad_norm": 0.5016772747039795, "learning_rate": 1.6630179403310797e-07, "loss": 1.7705, "step": 9377 }, { "epoch": 0.9741352446244936, "grad_norm": 0.4082990288734436, "learning_rate": 1.6497477114654768e-07, "loss": 1.604, "step": 9378 }, { "epoch": 0.9742391191440739, "grad_norm": 0.4223262369632721, "learning_rate": 1.6365305530144926e-07, "loss": 1.5545, "step": 9379 }, { "epoch": 0.9743429936636543, "grad_norm": 0.45764049887657166, "learning_rate": 1.623366466385612e-07, "loss": 1.6171, "step": 9380 }, { "epoch": 0.9744468681832347, "grad_norm": 0.4205342233181, "learning_rate": 1.6102554529807134e-07, "loss": 1.7076, "step": 9381 }, { "epoch": 0.974550742702815, "grad_norm": 0.4099922478199005, "learning_rate": 1.5971975141960138e-07, "loss": 1.6506, "step": 9382 }, { "epoch": 0.9746546172223953, "grad_norm": 0.43783533573150635, "learning_rate": 1.5841926514220674e-07, "loss": 1.526, "step": 9383 }, { "epoch": 0.9747584917419757, "grad_norm": 0.4745118021965027, "learning_rate": 1.571240866043877e-07, "loss": 1.7779, "step": 9384 }, { "epoch": 0.9748623662615561, "grad_norm": 0.4208376109600067, "learning_rate": 1.5583421594405623e-07, "loss": 1.7611, "step": 9385 }, { "epoch": 0.9749662407811364, "grad_norm": 0.41122379899024963, "learning_rate": 1.5454965329858573e-07, "loss": 1.5896, "step": 9386 }, { "epoch": 0.9750701153007167, "grad_norm": 0.3928976058959961, "learning_rate": 1.5327039880477234e-07, "loss": 1.59, "step": 9387 }, { "epoch": 0.9751739898202971, "grad_norm": 0.4342755675315857, "learning_rate": 1.5199645259883488e-07, "loss": 1.6747, "step": 9388 }, { "epoch": 0.9752778643398774, "grad_norm": 0.39169561862945557, "learning_rate": 1.5072781481644816e-07, "loss": 1.5841, "step": 9389 }, { "epoch": 0.9753817388594578, "grad_norm": 0.43431976437568665, "learning_rate": 1.4946448559270964e-07, "loss": 1.661, "step": 9390 }, { "epoch": 0.9754856133790382, "grad_norm": 0.4187610149383545, "learning_rate": 1.4820646506215064e-07, "loss": 1.5102, "step": 9391 }, { "epoch": 0.9755894878986184, "grad_norm": 0.415600448846817, "learning_rate": 1.4695375335875282e-07, "loss": 1.4286, "step": 9392 }, { "epoch": 0.9756933624181988, "grad_norm": 0.4058886468410492, "learning_rate": 1.45706350615904e-07, "loss": 1.6924, "step": 9393 }, { "epoch": 0.9757972369377792, "grad_norm": 0.41150805354118347, "learning_rate": 1.444642569664534e-07, "loss": 1.5943, "step": 9394 }, { "epoch": 0.9759011114573595, "grad_norm": 0.39166486263275146, "learning_rate": 1.43227472542673e-07, "loss": 1.628, "step": 9395 }, { "epoch": 0.9760049859769399, "grad_norm": 0.41981199383735657, "learning_rate": 1.4199599747626303e-07, "loss": 1.4475, "step": 9396 }, { "epoch": 0.9761088604965202, "grad_norm": 0.4433886408805847, "learning_rate": 1.4076983189837967e-07, "loss": 1.6782, "step": 9397 }, { "epoch": 0.9762127350161005, "grad_norm": 0.4331313967704773, "learning_rate": 1.3954897593958516e-07, "loss": 1.5824, "step": 9398 }, { "epoch": 0.9763166095356809, "grad_norm": 0.4032611846923828, "learning_rate": 1.3833342972990327e-07, "loss": 1.5385, "step": 9399 }, { "epoch": 0.9764204840552613, "grad_norm": 0.4217138886451721, "learning_rate": 1.371231933987749e-07, "loss": 1.7366, "step": 9400 }, { "epoch": 0.9765243585748415, "grad_norm": 0.3987552225589752, "learning_rate": 1.3591826707508026e-07, "loss": 1.5265, "step": 9401 }, { "epoch": 0.9766282330944219, "grad_norm": 0.4729466140270233, "learning_rate": 1.3471865088713342e-07, "loss": 1.7914, "step": 9402 }, { "epoch": 0.9767321076140023, "grad_norm": 0.4094342589378357, "learning_rate": 1.3352434496268773e-07, "loss": 1.6576, "step": 9403 }, { "epoch": 0.9768359821335826, "grad_norm": 0.46844589710235596, "learning_rate": 1.3233534942892477e-07, "loss": 1.7209, "step": 9404 }, { "epoch": 0.976939856653163, "grad_norm": 0.4402284026145935, "learning_rate": 1.311516644124655e-07, "loss": 1.6706, "step": 9405 }, { "epoch": 0.9770437311727433, "grad_norm": 0.4201194941997528, "learning_rate": 1.299732900393591e-07, "loss": 1.598, "step": 9406 }, { "epoch": 0.9771476056923236, "grad_norm": 0.41740289330482483, "learning_rate": 1.2880022643509958e-07, "loss": 1.6247, "step": 9407 }, { "epoch": 0.977251480211904, "grad_norm": 0.4268152117729187, "learning_rate": 1.2763247372460374e-07, "loss": 1.6006, "step": 9408 }, { "epoch": 0.9773553547314844, "grad_norm": 0.43314602971076965, "learning_rate": 1.2647003203223318e-07, "loss": 1.6009, "step": 9409 }, { "epoch": 0.9774592292510648, "grad_norm": 0.42732593417167664, "learning_rate": 1.2531290148177221e-07, "loss": 1.6934, "step": 9410 }, { "epoch": 0.977563103770645, "grad_norm": 0.4502175748348236, "learning_rate": 1.2416108219645005e-07, "loss": 1.7892, "step": 9411 }, { "epoch": 0.9776669782902254, "grad_norm": 0.45358970761299133, "learning_rate": 1.2301457429892972e-07, "loss": 1.5935, "step": 9412 }, { "epoch": 0.9777708528098058, "grad_norm": 0.43146711587905884, "learning_rate": 1.2187337791129682e-07, "loss": 1.457, "step": 9413 }, { "epoch": 0.9778747273293861, "grad_norm": 0.44954973459243774, "learning_rate": 1.2073749315508754e-07, "loss": 1.6821, "step": 9414 }, { "epoch": 0.9779786018489665, "grad_norm": 0.4219938814640045, "learning_rate": 1.1960692015126062e-07, "loss": 1.7259, "step": 9415 }, { "epoch": 0.9780824763685468, "grad_norm": 0.41503259539604187, "learning_rate": 1.1848165902021424e-07, "loss": 1.4451, "step": 9416 }, { "epoch": 0.9781863508881271, "grad_norm": 0.4230490028858185, "learning_rate": 1.1736170988178585e-07, "loss": 1.6452, "step": 9417 }, { "epoch": 0.9782902254077075, "grad_norm": 0.44973745942115784, "learning_rate": 1.1624707285523006e-07, "loss": 1.811, "step": 9418 }, { "epoch": 0.9783940999272879, "grad_norm": 0.4050397574901581, "learning_rate": 1.1513774805925748e-07, "loss": 1.4962, "step": 9419 }, { "epoch": 0.9784979744468681, "grad_norm": 0.3766053020954132, "learning_rate": 1.1403373561199582e-07, "loss": 1.4053, "step": 9420 }, { "epoch": 0.9786018489664485, "grad_norm": 0.41133883595466614, "learning_rate": 1.1293503563101216e-07, "loss": 1.6545, "step": 9421 }, { "epoch": 0.9787057234860289, "grad_norm": 0.44046151638031006, "learning_rate": 1.1184164823331844e-07, "loss": 1.6459, "step": 9422 }, { "epoch": 0.9788095980056092, "grad_norm": 0.44645535945892334, "learning_rate": 1.1075357353534377e-07, "loss": 1.5921, "step": 9423 }, { "epoch": 0.9789134725251896, "grad_norm": 0.44139453768730164, "learning_rate": 1.0967081165296211e-07, "loss": 1.8106, "step": 9424 }, { "epoch": 0.97901734704477, "grad_norm": 0.39633703231811523, "learning_rate": 1.0859336270148124e-07, "loss": 1.6446, "step": 9425 }, { "epoch": 0.9791212215643502, "grad_norm": 0.4287833273410797, "learning_rate": 1.0752122679563714e-07, "loss": 1.6376, "step": 9426 }, { "epoch": 0.9792250960839306, "grad_norm": 0.42582541704177856, "learning_rate": 1.0645440404960517e-07, "loss": 1.6317, "step": 9427 }, { "epoch": 0.979328970603511, "grad_norm": 0.47708913683891296, "learning_rate": 1.0539289457699442e-07, "loss": 1.918, "step": 9428 }, { "epoch": 0.9794328451230913, "grad_norm": 0.38468772172927856, "learning_rate": 1.0433669849085336e-07, "loss": 1.5551, "step": 9429 }, { "epoch": 0.9795367196426716, "grad_norm": 0.4491479694843292, "learning_rate": 1.032858159036476e-07, "loss": 1.7375, "step": 9430 }, { "epoch": 0.979640594162252, "grad_norm": 0.43696144223213196, "learning_rate": 1.0224024692729317e-07, "loss": 1.5334, "step": 9431 }, { "epoch": 0.9797444686818323, "grad_norm": 0.38262155652046204, "learning_rate": 1.0119999167313988e-07, "loss": 1.4757, "step": 9432 }, { "epoch": 0.9798483432014127, "grad_norm": 0.43123918771743774, "learning_rate": 1.0016505025195467e-07, "loss": 1.5889, "step": 9433 }, { "epoch": 0.9799522177209931, "grad_norm": 0.44049620628356934, "learning_rate": 9.913542277396604e-08, "loss": 1.654, "step": 9434 }, { "epoch": 0.9800560922405734, "grad_norm": 0.3811212182044983, "learning_rate": 9.811110934880296e-08, "loss": 1.4344, "step": 9435 }, { "epoch": 0.9801599667601537, "grad_norm": 0.4550376534461975, "learning_rate": 9.709211008556707e-08, "loss": 1.8921, "step": 9436 }, { "epoch": 0.9802638412797341, "grad_norm": 0.41628962755203247, "learning_rate": 9.607842509275489e-08, "loss": 1.6439, "step": 9437 }, { "epoch": 0.9803677157993145, "grad_norm": 0.4357268810272217, "learning_rate": 9.507005447833007e-08, "loss": 1.8815, "step": 9438 }, { "epoch": 0.9804715903188947, "grad_norm": 0.38655275106430054, "learning_rate": 9.406699834967337e-08, "loss": 1.4922, "step": 9439 }, { "epoch": 0.9805754648384751, "grad_norm": 0.3913620710372925, "learning_rate": 9.306925681359935e-08, "loss": 1.6177, "step": 9440 }, { "epoch": 0.9806793393580555, "grad_norm": 0.44508156180381775, "learning_rate": 9.207682997635636e-08, "loss": 1.6836, "step": 9441 }, { "epoch": 0.9807832138776358, "grad_norm": 0.41767412424087524, "learning_rate": 9.108971794363763e-08, "loss": 1.6061, "step": 9442 }, { "epoch": 0.9808870883972162, "grad_norm": 0.40216001868247986, "learning_rate": 9.010792082055908e-08, "loss": 1.5034, "step": 9443 }, { "epoch": 0.9809909629167965, "grad_norm": 0.39269882440567017, "learning_rate": 8.91314387116704e-08, "loss": 1.5912, "step": 9444 }, { "epoch": 0.9810948374363768, "grad_norm": 0.41241204738616943, "learning_rate": 8.81602717209662e-08, "loss": 1.6321, "step": 9445 }, { "epoch": 0.9811987119559572, "grad_norm": 0.4276941120624542, "learning_rate": 8.719441995186928e-08, "loss": 1.6572, "step": 9446 }, { "epoch": 0.9813025864755376, "grad_norm": 0.5014160871505737, "learning_rate": 8.623388350722517e-08, "loss": 1.6643, "step": 9447 }, { "epoch": 0.9814064609951179, "grad_norm": 0.44852280616760254, "learning_rate": 8.527866248933536e-08, "loss": 1.4247, "step": 9448 }, { "epoch": 0.9815103355146982, "grad_norm": 0.4082983136177063, "learning_rate": 8.432875699991849e-08, "loss": 1.4757, "step": 9449 }, { "epoch": 0.9816142100342786, "grad_norm": 0.458987295627594, "learning_rate": 8.338416714013253e-08, "loss": 1.6738, "step": 9450 }, { "epoch": 0.9817180845538589, "grad_norm": 0.42182016372680664, "learning_rate": 8.244489301056368e-08, "loss": 1.5149, "step": 9451 }, { "epoch": 0.9818219590734393, "grad_norm": 0.4483654201030731, "learning_rate": 8.151093471124305e-08, "loss": 1.6678, "step": 9452 }, { "epoch": 0.9819258335930197, "grad_norm": 0.4117814302444458, "learning_rate": 8.058229234162995e-08, "loss": 1.5014, "step": 9453 }, { "epoch": 0.9820297081125999, "grad_norm": 0.43942439556121826, "learning_rate": 7.965896600061751e-08, "loss": 1.6016, "step": 9454 }, { "epoch": 0.9821335826321803, "grad_norm": 0.4229027032852173, "learning_rate": 7.874095578653263e-08, "loss": 1.6684, "step": 9455 }, { "epoch": 0.9822374571517607, "grad_norm": 0.38582631945610046, "learning_rate": 7.782826179713043e-08, "loss": 1.5004, "step": 9456 }, { "epoch": 0.982341331671341, "grad_norm": 0.5087673664093018, "learning_rate": 7.692088412961096e-08, "loss": 1.8254, "step": 9457 }, { "epoch": 0.9824452061909213, "grad_norm": 0.4083236753940582, "learning_rate": 7.601882288060247e-08, "loss": 1.577, "step": 9458 }, { "epoch": 0.9825490807105017, "grad_norm": 0.42479801177978516, "learning_rate": 7.512207814617256e-08, "loss": 1.6106, "step": 9459 }, { "epoch": 0.9826529552300821, "grad_norm": 0.4226894676685333, "learning_rate": 7.423065002180596e-08, "loss": 1.6185, "step": 9460 }, { "epoch": 0.9827568297496624, "grad_norm": 0.42085468769073486, "learning_rate": 7.334453860244339e-08, "loss": 1.5966, "step": 9461 }, { "epoch": 0.9828607042692428, "grad_norm": 0.4113049805164337, "learning_rate": 7.24637439824427e-08, "loss": 1.5905, "step": 9462 }, { "epoch": 0.9829645787888232, "grad_norm": 0.406465083360672, "learning_rate": 7.158826625560112e-08, "loss": 1.6842, "step": 9463 }, { "epoch": 0.9830684533084034, "grad_norm": 0.4487413167953491, "learning_rate": 7.071810551516067e-08, "loss": 1.6072, "step": 9464 }, { "epoch": 0.9831723278279838, "grad_norm": 0.4292948544025421, "learning_rate": 6.98532618537695e-08, "loss": 1.6148, "step": 9465 }, { "epoch": 0.9832762023475642, "grad_norm": 0.4271443784236908, "learning_rate": 6.899373536354282e-08, "loss": 1.693, "step": 9466 }, { "epoch": 0.9833800768671445, "grad_norm": 0.4509519636631012, "learning_rate": 6.813952613600183e-08, "loss": 1.896, "step": 9467 }, { "epoch": 0.9834839513867248, "grad_norm": 0.4309237599372864, "learning_rate": 6.729063426211824e-08, "loss": 1.6331, "step": 9468 }, { "epoch": 0.9835878259063052, "grad_norm": 0.42812246084213257, "learning_rate": 6.644705983229194e-08, "loss": 1.6337, "step": 9469 }, { "epoch": 0.9836917004258855, "grad_norm": 0.43282395601272583, "learning_rate": 6.560880293636218e-08, "loss": 1.7527, "step": 9470 }, { "epoch": 0.9837955749454659, "grad_norm": 0.40227630734443665, "learning_rate": 6.477586366358535e-08, "loss": 1.6945, "step": 9471 }, { "epoch": 0.9838994494650463, "grad_norm": 0.4157799780368805, "learning_rate": 6.394824210266826e-08, "loss": 1.4567, "step": 9472 }, { "epoch": 0.9840033239846265, "grad_norm": 0.39977502822875977, "learning_rate": 6.312593834175152e-08, "loss": 1.5229, "step": 9473 }, { "epoch": 0.9841071985042069, "grad_norm": 0.42183637619018555, "learning_rate": 6.230895246840396e-08, "loss": 1.5506, "step": 9474 }, { "epoch": 0.9842110730237873, "grad_norm": 0.4323163628578186, "learning_rate": 6.149728456961712e-08, "loss": 1.6544, "step": 9475 }, { "epoch": 0.9843149475433676, "grad_norm": 0.5097078680992126, "learning_rate": 6.069093473183296e-08, "loss": 1.7364, "step": 9476 }, { "epoch": 0.984418822062948, "grad_norm": 0.4150666296482086, "learning_rate": 5.988990304092724e-08, "loss": 1.5175, "step": 9477 }, { "epoch": 0.9845226965825283, "grad_norm": 0.4314131736755371, "learning_rate": 5.9094189582198366e-08, "loss": 1.6242, "step": 9478 }, { "epoch": 0.9846265711021086, "grad_norm": 0.4086059033870697, "learning_rate": 5.830379444038414e-08, "loss": 1.6943, "step": 9479 }, { "epoch": 0.984730445621689, "grad_norm": 0.4106563329696655, "learning_rate": 5.751871769965056e-08, "loss": 1.5377, "step": 9480 }, { "epoch": 0.9848343201412694, "grad_norm": 0.4505181610584259, "learning_rate": 5.673895944361407e-08, "loss": 1.7265, "step": 9481 }, { "epoch": 0.9849381946608496, "grad_norm": 0.41251927614212036, "learning_rate": 5.59645197553027e-08, "loss": 1.6865, "step": 9482 }, { "epoch": 0.98504206918043, "grad_norm": 0.42645978927612305, "learning_rate": 5.5195398717194926e-08, "loss": 1.8202, "step": 9483 }, { "epoch": 0.9851459437000104, "grad_norm": 0.4157099723815918, "learning_rate": 5.443159641118634e-08, "loss": 1.6619, "step": 9484 }, { "epoch": 0.9852498182195908, "grad_norm": 0.42381036281585693, "learning_rate": 5.367311291862853e-08, "loss": 1.7641, "step": 9485 }, { "epoch": 0.9853536927391711, "grad_norm": 0.4490167796611786, "learning_rate": 5.291994832028469e-08, "loss": 1.5296, "step": 9486 }, { "epoch": 0.9854575672587514, "grad_norm": 0.4082736074924469, "learning_rate": 5.217210269636286e-08, "loss": 1.4804, "step": 9487 }, { "epoch": 0.9855614417783318, "grad_norm": 0.41350072622299194, "learning_rate": 5.1429576126504895e-08, "loss": 1.5122, "step": 9488 }, { "epoch": 0.9856653162979121, "grad_norm": 0.4074830412864685, "learning_rate": 5.069236868978644e-08, "loss": 1.5971, "step": 9489 }, { "epoch": 0.9857691908174925, "grad_norm": 0.4387468099594116, "learning_rate": 4.996048046471136e-08, "loss": 1.6514, "step": 9490 }, { "epoch": 0.9858730653370729, "grad_norm": 0.4530046582221985, "learning_rate": 4.923391152921175e-08, "loss": 1.6722, "step": 9491 }, { "epoch": 0.9859769398566531, "grad_norm": 0.4661511778831482, "learning_rate": 4.851266196068127e-08, "loss": 1.6484, "step": 9492 }, { "epoch": 0.9860808143762335, "grad_norm": 0.42836901545524597, "learning_rate": 4.779673183590849e-08, "loss": 1.8121, "step": 9493 }, { "epoch": 0.9861846888958139, "grad_norm": 0.538507878780365, "learning_rate": 4.708612123114353e-08, "loss": 1.7639, "step": 9494 }, { "epoch": 0.9862885634153942, "grad_norm": 0.4897894561290741, "learning_rate": 4.638083022205364e-08, "loss": 1.7797, "step": 9495 }, { "epoch": 0.9863924379349746, "grad_norm": 0.4547121524810791, "learning_rate": 4.56808588837565e-08, "loss": 1.6323, "step": 9496 }, { "epoch": 0.9864963124545549, "grad_norm": 0.4277886748313904, "learning_rate": 4.4986207290792505e-08, "loss": 1.7339, "step": 9497 }, { "epoch": 0.9866001869741352, "grad_norm": 0.4299750328063965, "learning_rate": 4.4296875517130243e-08, "loss": 1.6974, "step": 9498 }, { "epoch": 0.9867040614937156, "grad_norm": 0.4436101019382477, "learning_rate": 4.361286363618322e-08, "loss": 1.5564, "step": 9499 }, { "epoch": 0.986807936013296, "grad_norm": 0.42672184109687805, "learning_rate": 4.293417172078762e-08, "loss": 1.5828, "step": 9500 }, { "epoch": 0.9869118105328762, "grad_norm": 0.4650036096572876, "learning_rate": 4.226079984322451e-08, "loss": 1.5301, "step": 9501 }, { "epoch": 0.9870156850524566, "grad_norm": 0.4062730371952057, "learning_rate": 4.159274807520319e-08, "loss": 1.6214, "step": 9502 }, { "epoch": 0.987119559572037, "grad_norm": 0.4749537706375122, "learning_rate": 4.0930016487861214e-08, "loss": 1.7306, "step": 9503 }, { "epoch": 0.9872234340916173, "grad_norm": 0.4257502555847168, "learning_rate": 4.027260515177544e-08, "loss": 1.8203, "step": 9504 }, { "epoch": 0.9873273086111977, "grad_norm": 0.3824896514415741, "learning_rate": 3.962051413695656e-08, "loss": 1.5731, "step": 9505 }, { "epoch": 0.987431183130778, "grad_norm": 0.41676878929138184, "learning_rate": 3.897374351284899e-08, "loss": 1.7339, "step": 9506 }, { "epoch": 0.9875350576503583, "grad_norm": 0.4813458323478699, "learning_rate": 3.8332293348325444e-08, "loss": 1.993, "step": 9507 }, { "epoch": 0.9876389321699387, "grad_norm": 0.47463130950927734, "learning_rate": 3.769616371169793e-08, "loss": 1.708, "step": 9508 }, { "epoch": 0.9877428066895191, "grad_norm": 0.4519473612308502, "learning_rate": 3.706535467070671e-08, "loss": 1.9389, "step": 9509 }, { "epoch": 0.9878466812090994, "grad_norm": 0.4201395809650421, "learning_rate": 3.6439866292531376e-08, "loss": 1.6609, "step": 9510 }, { "epoch": 0.9879505557286797, "grad_norm": 0.4545467495918274, "learning_rate": 3.5819698643779764e-08, "loss": 1.7568, "step": 9511 }, { "epoch": 0.9880544302482601, "grad_norm": 0.39346179366111755, "learning_rate": 3.520485179048794e-08, "loss": 1.6101, "step": 9512 }, { "epoch": 0.9881583047678405, "grad_norm": 0.4246424436569214, "learning_rate": 3.459532579814795e-08, "loss": 1.7143, "step": 9513 }, { "epoch": 0.9882621792874208, "grad_norm": 0.4520607590675354, "learning_rate": 3.399112073165789e-08, "loss": 1.5954, "step": 9514 }, { "epoch": 0.9883660538070012, "grad_norm": 0.4503570795059204, "learning_rate": 3.339223665536073e-08, "loss": 1.8197, "step": 9515 }, { "epoch": 0.9884699283265815, "grad_norm": 0.42323458194732666, "learning_rate": 3.279867363303879e-08, "loss": 1.6981, "step": 9516 }, { "epoch": 0.9885738028461618, "grad_norm": 0.44448158144950867, "learning_rate": 3.221043172789706e-08, "loss": 1.5851, "step": 9517 }, { "epoch": 0.9886776773657422, "grad_norm": 0.41508886218070984, "learning_rate": 3.162751100257988e-08, "loss": 1.6064, "step": 9518 }, { "epoch": 0.9887815518853226, "grad_norm": 0.4167642891407013, "learning_rate": 3.1049911519165365e-08, "loss": 1.6993, "step": 9519 }, { "epoch": 0.9888854264049028, "grad_norm": 0.4645927846431732, "learning_rate": 3.047763333916542e-08, "loss": 1.6729, "step": 9520 }, { "epoch": 0.9889893009244832, "grad_norm": 0.42605888843536377, "learning_rate": 2.991067652351465e-08, "loss": 1.6401, "step": 9521 }, { "epoch": 0.9890931754440636, "grad_norm": 0.3953196108341217, "learning_rate": 2.9349041132598065e-08, "loss": 1.6012, "step": 9522 }, { "epoch": 0.9891970499636439, "grad_norm": 0.41111239790916443, "learning_rate": 2.8792727226223393e-08, "loss": 1.5285, "step": 9523 }, { "epoch": 0.9893009244832243, "grad_norm": 0.4029116928577423, "learning_rate": 2.824173486363213e-08, "loss": 1.7036, "step": 9524 }, { "epoch": 0.9894047990028046, "grad_norm": 0.44695624709129333, "learning_rate": 2.7696064103505116e-08, "loss": 1.7136, "step": 9525 }, { "epoch": 0.9895086735223849, "grad_norm": 0.4348202347755432, "learning_rate": 2.715571500394587e-08, "loss": 1.6265, "step": 9526 }, { "epoch": 0.9896125480419653, "grad_norm": 0.43105223774909973, "learning_rate": 2.6620687622497255e-08, "loss": 1.6818, "step": 9527 }, { "epoch": 0.9897164225615457, "grad_norm": 0.4228067398071289, "learning_rate": 2.609098201613591e-08, "loss": 1.6076, "step": 9528 }, { "epoch": 0.989820297081126, "grad_norm": 0.4084494113922119, "learning_rate": 2.556659824127783e-08, "loss": 1.6429, "step": 9529 }, { "epoch": 0.9899241716007063, "grad_norm": 0.4497421979904175, "learning_rate": 2.504753635376167e-08, "loss": 1.5681, "step": 9530 }, { "epoch": 0.9900280461202867, "grad_norm": 0.4513741731643677, "learning_rate": 2.453379640886544e-08, "loss": 1.6556, "step": 9531 }, { "epoch": 0.990131920639867, "grad_norm": 0.38901105523109436, "learning_rate": 2.4025378461289828e-08, "loss": 1.502, "step": 9532 }, { "epoch": 0.9902357951594474, "grad_norm": 0.4121716022491455, "learning_rate": 2.352228256519151e-08, "loss": 1.6831, "step": 9533 }, { "epoch": 0.9903396696790278, "grad_norm": 0.4378368854522705, "learning_rate": 2.3024508774133204e-08, "loss": 1.6327, "step": 9534 }, { "epoch": 0.990443544198608, "grad_norm": 0.39573803544044495, "learning_rate": 2.2532057141128048e-08, "loss": 1.5492, "step": 9535 }, { "epoch": 0.9905474187181884, "grad_norm": 0.4019763469696045, "learning_rate": 2.2044927718622987e-08, "loss": 1.5991, "step": 9536 }, { "epoch": 0.9906512932377688, "grad_norm": 0.40174970030784607, "learning_rate": 2.1563120558487638e-08, "loss": 1.5447, "step": 9537 }, { "epoch": 0.9907551677573492, "grad_norm": 0.45444202423095703, "learning_rate": 2.1086635712036507e-08, "loss": 1.6873, "step": 9538 }, { "epoch": 0.9908590422769294, "grad_norm": 0.42663663625717163, "learning_rate": 2.061547323000679e-08, "loss": 1.7317, "step": 9539 }, { "epoch": 0.9909629167965098, "grad_norm": 0.40359655022621155, "learning_rate": 2.014963316257501e-08, "loss": 1.5926, "step": 9540 }, { "epoch": 0.9910667913160902, "grad_norm": 0.40575510263442993, "learning_rate": 1.9689115559345938e-08, "loss": 1.5653, "step": 9541 }, { "epoch": 0.9911706658356705, "grad_norm": 0.4161994457244873, "learning_rate": 1.9233920469369225e-08, "loss": 1.5923, "step": 9542 }, { "epoch": 0.9912745403552509, "grad_norm": 0.4296528697013855, "learning_rate": 1.878404794111166e-08, "loss": 1.7091, "step": 9543 }, { "epoch": 0.9913784148748312, "grad_norm": 0.4677174985408783, "learning_rate": 1.8339498022490465e-08, "loss": 1.7469, "step": 9544 }, { "epoch": 0.9914822893944115, "grad_norm": 0.43237027525901794, "learning_rate": 1.790027076083445e-08, "loss": 1.6314, "step": 9545 }, { "epoch": 0.9915861639139919, "grad_norm": 0.4431440830230713, "learning_rate": 1.7466366202928408e-08, "loss": 1.6766, "step": 9546 }, { "epoch": 0.9916900384335723, "grad_norm": 0.43120649456977844, "learning_rate": 1.7037784394968724e-08, "loss": 1.6412, "step": 9547 }, { "epoch": 0.9917939129531526, "grad_norm": 0.3956824541091919, "learning_rate": 1.6614525382613323e-08, "loss": 1.5854, "step": 9548 }, { "epoch": 0.9918977874727329, "grad_norm": 0.41620564460754395, "learning_rate": 1.6196589210915046e-08, "loss": 1.5208, "step": 9549 }, { "epoch": 0.9920016619923133, "grad_norm": 0.46985071897506714, "learning_rate": 1.5783975924399398e-08, "loss": 1.8193, "step": 9550 }, { "epoch": 0.9921055365118936, "grad_norm": 0.44369015097618103, "learning_rate": 1.5376685566992345e-08, "loss": 1.5834, "step": 9551 }, { "epoch": 0.992209411031474, "grad_norm": 0.40749645233154297, "learning_rate": 1.4974718182075853e-08, "loss": 1.5352, "step": 9552 }, { "epoch": 0.9923132855510544, "grad_norm": 0.40627145767211914, "learning_rate": 1.4578073812454574e-08, "loss": 1.6593, "step": 9553 }, { "epoch": 0.9924171600706346, "grad_norm": 0.4516492784023285, "learning_rate": 1.418675250036694e-08, "loss": 1.7582, "step": 9554 }, { "epoch": 0.992521034590215, "grad_norm": 0.44202539324760437, "learning_rate": 1.3800754287485174e-08, "loss": 1.6358, "step": 9555 }, { "epoch": 0.9926249091097954, "grad_norm": 0.4185415208339691, "learning_rate": 1.3420079214915282e-08, "loss": 1.5889, "step": 9556 }, { "epoch": 0.9927287836293757, "grad_norm": 0.41641661524772644, "learning_rate": 1.3044727323202611e-08, "loss": 1.6185, "step": 9557 }, { "epoch": 0.992832658148956, "grad_norm": 0.4016210436820984, "learning_rate": 1.2674698652304085e-08, "loss": 1.6872, "step": 9558 }, { "epoch": 0.9929365326685364, "grad_norm": 0.4109307825565338, "learning_rate": 1.2309993241638173e-08, "loss": 1.7338, "step": 9559 }, { "epoch": 0.9930404071881167, "grad_norm": 0.4159395098686218, "learning_rate": 1.1950611130040479e-08, "loss": 1.8372, "step": 9560 }, { "epoch": 0.9931442817076971, "grad_norm": 0.37879422307014465, "learning_rate": 1.1596552355780388e-08, "loss": 1.5309, "step": 9561 }, { "epoch": 0.9932481562272775, "grad_norm": 0.46690380573272705, "learning_rate": 1.1247816956561075e-08, "loss": 1.6077, "step": 9562 }, { "epoch": 0.9933520307468579, "grad_norm": 0.444859117269516, "learning_rate": 1.0904404969525051e-08, "loss": 1.6831, "step": 9563 }, { "epoch": 0.9934559052664381, "grad_norm": 0.46033644676208496, "learning_rate": 1.0566316431237511e-08, "loss": 1.7443, "step": 9564 }, { "epoch": 0.9935597797860185, "grad_norm": 0.44090694189071655, "learning_rate": 1.0233551377702988e-08, "loss": 1.7238, "step": 9565 }, { "epoch": 0.9936636543055989, "grad_norm": 0.4091147780418396, "learning_rate": 9.906109844359802e-09, "loss": 1.6286, "step": 9566 }, { "epoch": 0.9937675288251792, "grad_norm": 0.41084280610084534, "learning_rate": 9.583991866080055e-09, "loss": 1.5039, "step": 9567 }, { "epoch": 0.9938714033447595, "grad_norm": 0.4234639108181, "learning_rate": 9.267197477169643e-09, "loss": 1.6008, "step": 9568 }, { "epoch": 0.9939752778643399, "grad_norm": 0.40848711133003235, "learning_rate": 8.955726711351586e-09, "loss": 1.6676, "step": 9569 }, { "epoch": 0.9940791523839202, "grad_norm": 0.3974449932575226, "learning_rate": 8.649579601810453e-09, "loss": 1.4568, "step": 9570 }, { "epoch": 0.9941830269035006, "grad_norm": 0.42480480670928955, "learning_rate": 8.34875618113684e-09, "loss": 1.7452, "step": 9571 }, { "epoch": 0.994286901423081, "grad_norm": 0.4431557059288025, "learning_rate": 8.053256481371785e-09, "loss": 1.7141, "step": 9572 }, { "epoch": 0.9943907759426612, "grad_norm": 0.46281933784484863, "learning_rate": 7.763080533984557e-09, "loss": 1.7414, "step": 9573 }, { "epoch": 0.9944946504622416, "grad_norm": 0.44635605812072754, "learning_rate": 7.478228369872664e-09, "loss": 1.7477, "step": 9574 }, { "epoch": 0.994598524981822, "grad_norm": 0.40345069766044617, "learning_rate": 7.198700019378502e-09, "loss": 1.7388, "step": 9575 }, { "epoch": 0.9947023995014023, "grad_norm": 0.4090345799922943, "learning_rate": 6.924495512256046e-09, "loss": 1.6223, "step": 9576 }, { "epoch": 0.9948062740209827, "grad_norm": 0.4181837737560272, "learning_rate": 6.655614877720817e-09, "loss": 1.6013, "step": 9577 }, { "epoch": 0.994910148540563, "grad_norm": 0.4603481590747833, "learning_rate": 6.392058144394364e-09, "loss": 1.6791, "step": 9578 }, { "epoch": 0.9950140230601433, "grad_norm": 0.43850967288017273, "learning_rate": 6.133825340348675e-09, "loss": 1.6949, "step": 9579 }, { "epoch": 0.9951178975797237, "grad_norm": 0.42286258935928345, "learning_rate": 5.8809164930839765e-09, "loss": 1.6022, "step": 9580 }, { "epoch": 0.9952217720993041, "grad_norm": 0.4476601481437683, "learning_rate": 5.6333316295342775e-09, "loss": 1.8264, "step": 9581 }, { "epoch": 0.9953256466188843, "grad_norm": 0.4325719475746155, "learning_rate": 5.391070776061824e-09, "loss": 1.5983, "step": 9582 }, { "epoch": 0.9954295211384647, "grad_norm": 0.41536781191825867, "learning_rate": 5.154133958468199e-09, "loss": 1.6543, "step": 9583 }, { "epoch": 0.9955333956580451, "grad_norm": 0.4049381613731384, "learning_rate": 4.922521201988772e-09, "loss": 1.6388, "step": 9584 }, { "epoch": 0.9956372701776254, "grad_norm": 0.3786325752735138, "learning_rate": 4.6962325312760455e-09, "loss": 1.5859, "step": 9585 }, { "epoch": 0.9957411446972058, "grad_norm": 0.45597589015960693, "learning_rate": 4.475267970444064e-09, "loss": 1.8203, "step": 9586 }, { "epoch": 0.9958450192167861, "grad_norm": 0.42808467149734497, "learning_rate": 4.259627543012901e-09, "loss": 1.6721, "step": 9587 }, { "epoch": 0.9959488937363664, "grad_norm": 0.42117738723754883, "learning_rate": 4.04931127194752e-09, "loss": 1.5659, "step": 9588 }, { "epoch": 0.9960527682559468, "grad_norm": 0.4049902856349945, "learning_rate": 3.844319179646671e-09, "loss": 1.6087, "step": 9589 }, { "epoch": 0.9961566427755272, "grad_norm": 0.40100494027137756, "learning_rate": 3.6446512879428907e-09, "loss": 1.654, "step": 9590 }, { "epoch": 0.9962605172951076, "grad_norm": 0.3948621153831482, "learning_rate": 3.4503076180969486e-09, "loss": 1.661, "step": 9591 }, { "epoch": 0.9963643918146878, "grad_norm": 0.44757014513015747, "learning_rate": 3.2612881908089533e-09, "loss": 1.6552, "step": 9592 }, { "epoch": 0.9964682663342682, "grad_norm": 0.413869172334671, "learning_rate": 3.0775930261961462e-09, "loss": 1.6067, "step": 9593 }, { "epoch": 0.9965721408538486, "grad_norm": 0.46646273136138916, "learning_rate": 2.89922214383731e-09, "loss": 1.6194, "step": 9594 }, { "epoch": 0.9966760153734289, "grad_norm": 0.39553186297416687, "learning_rate": 2.726175562711708e-09, "loss": 1.5895, "step": 9595 }, { "epoch": 0.9967798898930093, "grad_norm": 0.42931756377220154, "learning_rate": 2.558453301260144e-09, "loss": 1.5636, "step": 9596 }, { "epoch": 0.9968837644125896, "grad_norm": 0.4277789890766144, "learning_rate": 2.3960553773350046e-09, "loss": 1.6763, "step": 9597 }, { "epoch": 0.9969876389321699, "grad_norm": 0.4121456444263458, "learning_rate": 2.2389818082335645e-09, "loss": 1.6111, "step": 9598 }, { "epoch": 0.9970915134517503, "grad_norm": 0.4208837151527405, "learning_rate": 2.0872326106868845e-09, "loss": 1.7814, "step": 9599 }, { "epoch": 0.9971953879713307, "grad_norm": 0.4421430826187134, "learning_rate": 1.9408078008431586e-09, "loss": 1.6691, "step": 9600 }, { "epoch": 0.9972992624909109, "grad_norm": 0.4708458483219147, "learning_rate": 1.7997073943121222e-09, "loss": 1.6991, "step": 9601 }, { "epoch": 0.9974031370104913, "grad_norm": 0.416151762008667, "learning_rate": 1.6639314061095422e-09, "loss": 1.6752, "step": 9602 }, { "epoch": 0.9975070115300717, "grad_norm": 0.408317506313324, "learning_rate": 1.5334798506905224e-09, "loss": 1.4994, "step": 9603 }, { "epoch": 0.997610886049652, "grad_norm": 0.4764207601547241, "learning_rate": 1.4083527419606058e-09, "loss": 1.8033, "step": 9604 }, { "epoch": 0.9977147605692324, "grad_norm": 0.4480811655521393, "learning_rate": 1.2885500932313666e-09, "loss": 1.8291, "step": 9605 }, { "epoch": 0.9978186350888127, "grad_norm": 0.4367046058177948, "learning_rate": 1.1740719172703697e-09, "loss": 1.6905, "step": 9606 }, { "epoch": 0.997922509608393, "grad_norm": 0.43447238206863403, "learning_rate": 1.0649182262623125e-09, "loss": 1.6493, "step": 9607 }, { "epoch": 0.9980263841279734, "grad_norm": 0.4337233006954193, "learning_rate": 9.610890318312305e-10, "loss": 1.6764, "step": 9608 }, { "epoch": 0.9981302586475538, "grad_norm": 0.42332491278648376, "learning_rate": 8.625843450404958e-10, "loss": 1.6347, "step": 9609 }, { "epoch": 0.998234133167134, "grad_norm": 0.4210823178291321, "learning_rate": 7.69404176376165e-10, "loss": 1.7052, "step": 9610 }, { "epoch": 0.9983380076867144, "grad_norm": 0.3997040092945099, "learning_rate": 6.81548535763632e-10, "loss": 1.5105, "step": 9611 }, { "epoch": 0.9984418822062948, "grad_norm": 0.43500977754592896, "learning_rate": 5.990174325509745e-10, "loss": 1.6361, "step": 9612 }, { "epoch": 0.9985457567258751, "grad_norm": 0.459688276052475, "learning_rate": 5.218108755367102e-10, "loss": 1.6404, "step": 9613 }, { "epoch": 0.9986496312454555, "grad_norm": 0.43085619807243347, "learning_rate": 4.499288729364892e-10, "loss": 1.642, "step": 9614 }, { "epoch": 0.9987535057650359, "grad_norm": 0.4042317569255829, "learning_rate": 3.833714324108506e-10, "loss": 1.7038, "step": 9615 }, { "epoch": 0.9988573802846162, "grad_norm": 0.43363288044929504, "learning_rate": 3.221385610430172e-10, "loss": 1.8699, "step": 9616 }, { "epoch": 0.9989612548041965, "grad_norm": 0.40789350867271423, "learning_rate": 2.6623026534999817e-10, "loss": 1.7067, "step": 9617 }, { "epoch": 0.9990651293237769, "grad_norm": 0.45757153630256653, "learning_rate": 2.1564655129369115e-10, "loss": 1.9236, "step": 9618 }, { "epoch": 0.9991690038433573, "grad_norm": 0.4801182746887207, "learning_rate": 1.7038742425867781e-10, "loss": 1.8674, "step": 9619 }, { "epoch": 0.9992728783629375, "grad_norm": 0.41760823130607605, "learning_rate": 1.3045288906332608e-10, "loss": 1.591, "step": 9620 }, { "epoch": 0.9993767528825179, "grad_norm": 0.46719786524772644, "learning_rate": 9.584294995979015e-11, "loss": 1.3563, "step": 9621 }, { "epoch": 0.9994806274020983, "grad_norm": 0.4825066030025482, "learning_rate": 6.655761063956157e-11, "loss": 1.7152, "step": 9622 }, { "epoch": 0.9995845019216786, "grad_norm": 0.4088488817214966, "learning_rate": 4.259687421126479e-11, "loss": 1.6796, "step": 9623 }, { "epoch": 0.999688376441259, "grad_norm": 0.4588576853275299, "learning_rate": 2.3960743233963913e-11, "loss": 1.8781, "step": 9624 }, { "epoch": 0.9997922509608393, "grad_norm": 0.46052485704421997, "learning_rate": 1.0649219683855905e-11, "loss": 1.5272, "step": 9625 }, { "epoch": 0.9998961254804196, "grad_norm": 0.4075811207294464, "learning_rate": 2.6623049931284727e-12, "loss": 1.7282, "step": 9626 }, { "epoch": 1.0, "grad_norm": 0.5596733093261719, "learning_rate": 0.0, "loss": 1.8582, "step": 9627 }, { "epoch": 1.0, "step": 9627, "total_flos": 1.452208309419167e+19, "train_loss": 1.7118117516813738, "train_runtime": 275545.5724, "train_samples_per_second": 0.14, "train_steps_per_second": 0.035 } ], "logging_steps": 1.0, "max_steps": 9627, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.452208309419167e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }