{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5193725979017347, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010387451958034694, "grad_norm": 1.3366456031799316, "learning_rate": 9.9999997337695e-05, "loss": 2.4557, "step": 1 }, { "epoch": 0.00020774903916069389, "grad_norm": 0.7125677466392517, "learning_rate": 9.999998935078033e-05, "loss": 2.1366, "step": 2 }, { "epoch": 0.0003116235587410408, "grad_norm": 0.5620805621147156, "learning_rate": 9.999997603925677e-05, "loss": 1.9955, "step": 3 }, { "epoch": 0.00041549807832138777, "grad_norm": 0.44992074370384216, "learning_rate": 9.99999574031258e-05, "loss": 2.1548, "step": 4 }, { "epoch": 0.0005193725979017347, "grad_norm": 0.4193553626537323, "learning_rate": 9.999993344238936e-05, "loss": 2.0443, "step": 5 }, { "epoch": 0.0006232471174820816, "grad_norm": 0.4412941336631775, "learning_rate": 9.999990415705005e-05, "loss": 2.0479, "step": 6 }, { "epoch": 0.0007271216370624286, "grad_norm": 0.40010932087898254, "learning_rate": 9.999986954711094e-05, "loss": 2.0064, "step": 7 }, { "epoch": 0.0008309961566427755, "grad_norm": 0.4279223680496216, "learning_rate": 9.999982961257574e-05, "loss": 1.8892, "step": 8 }, { "epoch": 0.0009348706762231225, "grad_norm": 0.4031948149204254, "learning_rate": 9.999978435344872e-05, "loss": 1.9309, "step": 9 }, { "epoch": 0.0010387451958034693, "grad_norm": 0.34114745259284973, "learning_rate": 9.999973376973466e-05, "loss": 1.6737, "step": 10 }, { "epoch": 0.0011426197153838163, "grad_norm": 0.4245925545692444, "learning_rate": 9.999967786143895e-05, "loss": 1.8743, "step": 11 }, { "epoch": 0.0012464942349641633, "grad_norm": 0.4021143317222595, "learning_rate": 9.99996166285676e-05, "loss": 1.8595, "step": 12 }, { "epoch": 0.0013503687545445102, "grad_norm": 0.38615134358406067, "learning_rate": 9.999955007112706e-05, "loss": 1.8856, "step": 13 }, { "epoch": 0.0014542432741248572, "grad_norm": 0.3954271078109741, "learning_rate": 9.999947818912448e-05, "loss": 2.0519, "step": 14 }, { "epoch": 0.0015581177937052041, "grad_norm": 0.3857433795928955, "learning_rate": 9.999940098256747e-05, "loss": 1.8855, "step": 15 }, { "epoch": 0.001661992313285551, "grad_norm": 0.38353338837623596, "learning_rate": 9.999931845146424e-05, "loss": 2.0608, "step": 16 }, { "epoch": 0.001765866832865898, "grad_norm": 0.46258458495140076, "learning_rate": 9.999923059582363e-05, "loss": 2.2282, "step": 17 }, { "epoch": 0.001869741352446245, "grad_norm": 0.40921229124069214, "learning_rate": 9.999913741565496e-05, "loss": 2.0698, "step": 18 }, { "epoch": 0.0019736158720265917, "grad_norm": 0.39826393127441406, "learning_rate": 9.999903891096817e-05, "loss": 2.0232, "step": 19 }, { "epoch": 0.0020774903916069387, "grad_norm": 0.4504700005054474, "learning_rate": 9.999893508177373e-05, "loss": 1.8005, "step": 20 }, { "epoch": 0.0021813649111872857, "grad_norm": 0.35965996980667114, "learning_rate": 9.999882592808274e-05, "loss": 1.8358, "step": 21 }, { "epoch": 0.0022852394307676326, "grad_norm": 0.36941030621528625, "learning_rate": 9.999871144990677e-05, "loss": 1.8202, "step": 22 }, { "epoch": 0.0023891139503479796, "grad_norm": 0.3631148338317871, "learning_rate": 9.999859164725804e-05, "loss": 1.9011, "step": 23 }, { "epoch": 0.0024929884699283265, "grad_norm": 0.3719225525856018, "learning_rate": 9.999846652014931e-05, "loss": 1.8046, "step": 24 }, { "epoch": 0.0025968629895086735, "grad_norm": 0.3886321783065796, "learning_rate": 9.99983360685939e-05, "loss": 1.7041, "step": 25 }, { "epoch": 0.0027007375090890204, "grad_norm": 0.35539302229881287, "learning_rate": 9.999820029260569e-05, "loss": 1.8635, "step": 26 }, { "epoch": 0.0028046120286693674, "grad_norm": 0.40370672941207886, "learning_rate": 9.999805919219916e-05, "loss": 2.025, "step": 27 }, { "epoch": 0.0029084865482497143, "grad_norm": 0.36346563696861267, "learning_rate": 9.999791276738933e-05, "loss": 1.9266, "step": 28 }, { "epoch": 0.0030123610678300613, "grad_norm": 0.4094865322113037, "learning_rate": 9.999776101819177e-05, "loss": 1.923, "step": 29 }, { "epoch": 0.0031162355874104083, "grad_norm": 0.33642393350601196, "learning_rate": 9.999760394462268e-05, "loss": 1.876, "step": 30 }, { "epoch": 0.0032201101069907552, "grad_norm": 0.386692613363266, "learning_rate": 9.999744154669874e-05, "loss": 2.0935, "step": 31 }, { "epoch": 0.003323984626571102, "grad_norm": 0.32473698258399963, "learning_rate": 9.999727382443728e-05, "loss": 1.7944, "step": 32 }, { "epoch": 0.003427859146151449, "grad_norm": 0.3738991916179657, "learning_rate": 9.999710077785616e-05, "loss": 2.0774, "step": 33 }, { "epoch": 0.003531733665731796, "grad_norm": 0.3248012959957123, "learning_rate": 9.99969224069738e-05, "loss": 1.7769, "step": 34 }, { "epoch": 0.003635608185312143, "grad_norm": 0.3610652685165405, "learning_rate": 9.99967387118092e-05, "loss": 1.9222, "step": 35 }, { "epoch": 0.00373948270489249, "grad_norm": 0.3891034722328186, "learning_rate": 9.99965496923819e-05, "loss": 2.0162, "step": 36 }, { "epoch": 0.003843357224472837, "grad_norm": 0.37236684560775757, "learning_rate": 9.999635534871205e-05, "loss": 2.0374, "step": 37 }, { "epoch": 0.0039472317440531835, "grad_norm": 0.3498350977897644, "learning_rate": 9.999615568082036e-05, "loss": 1.7788, "step": 38 }, { "epoch": 0.0040511062636335304, "grad_norm": 0.41840648651123047, "learning_rate": 9.999595068872805e-05, "loss": 1.6986, "step": 39 }, { "epoch": 0.004154980783213877, "grad_norm": 0.3720596134662628, "learning_rate": 9.9995740372457e-05, "loss": 1.9754, "step": 40 }, { "epoch": 0.004258855302794224, "grad_norm": 0.32730332016944885, "learning_rate": 9.999552473202955e-05, "loss": 1.699, "step": 41 }, { "epoch": 0.004362729822374571, "grad_norm": 0.36480000615119934, "learning_rate": 9.999530376746873e-05, "loss": 2.1077, "step": 42 }, { "epoch": 0.004466604341954918, "grad_norm": 0.3458911180496216, "learning_rate": 9.999507747879802e-05, "loss": 1.9693, "step": 43 }, { "epoch": 0.004570478861535265, "grad_norm": 0.34066182374954224, "learning_rate": 9.999484586604154e-05, "loss": 1.9005, "step": 44 }, { "epoch": 0.004674353381115612, "grad_norm": 0.34163591265678406, "learning_rate": 9.999460892922394e-05, "loss": 1.8829, "step": 45 }, { "epoch": 0.004778227900695959, "grad_norm": 0.37044212222099304, "learning_rate": 9.999436666837048e-05, "loss": 1.9172, "step": 46 }, { "epoch": 0.004882102420276306, "grad_norm": 0.3332674205303192, "learning_rate": 9.999411908350692e-05, "loss": 1.8282, "step": 47 }, { "epoch": 0.004985976939856653, "grad_norm": 0.3461020886898041, "learning_rate": 9.999386617465966e-05, "loss": 1.8652, "step": 48 }, { "epoch": 0.005089851459437, "grad_norm": 0.31899985671043396, "learning_rate": 9.999360794185562e-05, "loss": 1.9479, "step": 49 }, { "epoch": 0.005193725979017347, "grad_norm": 0.3465663194656372, "learning_rate": 9.999334438512228e-05, "loss": 1.8856, "step": 50 }, { "epoch": 0.005297600498597694, "grad_norm": 0.4567152261734009, "learning_rate": 9.999307550448776e-05, "loss": 2.26, "step": 51 }, { "epoch": 0.005401475018178041, "grad_norm": 0.36369699239730835, "learning_rate": 9.999280129998063e-05, "loss": 1.9131, "step": 52 }, { "epoch": 0.005505349537758388, "grad_norm": 0.3090951442718506, "learning_rate": 9.999252177163013e-05, "loss": 1.7429, "step": 53 }, { "epoch": 0.005609224057338735, "grad_norm": 0.3246748745441437, "learning_rate": 9.999223691946602e-05, "loss": 1.8882, "step": 54 }, { "epoch": 0.005713098576919082, "grad_norm": 0.3664419949054718, "learning_rate": 9.999194674351864e-05, "loss": 2.2768, "step": 55 }, { "epoch": 0.005816973096499429, "grad_norm": 0.3436775505542755, "learning_rate": 9.999165124381887e-05, "loss": 1.7684, "step": 56 }, { "epoch": 0.005920847616079776, "grad_norm": 0.3558364510536194, "learning_rate": 9.99913504203982e-05, "loss": 1.8547, "step": 57 }, { "epoch": 0.006024722135660123, "grad_norm": 0.3412851393222809, "learning_rate": 9.999104427328865e-05, "loss": 1.9773, "step": 58 }, { "epoch": 0.0061285966552404696, "grad_norm": 0.3379881978034973, "learning_rate": 9.999073280252283e-05, "loss": 1.9732, "step": 59 }, { "epoch": 0.0062324711748208165, "grad_norm": 0.38533058762550354, "learning_rate": 9.999041600813393e-05, "loss": 1.9285, "step": 60 }, { "epoch": 0.0063363456944011635, "grad_norm": 0.3420720398426056, "learning_rate": 9.999009389015564e-05, "loss": 1.9619, "step": 61 }, { "epoch": 0.0064402202139815104, "grad_norm": 0.3268018662929535, "learning_rate": 9.99897664486223e-05, "loss": 2.0373, "step": 62 }, { "epoch": 0.006544094733561857, "grad_norm": 0.32013919949531555, "learning_rate": 9.998943368356877e-05, "loss": 1.8389, "step": 63 }, { "epoch": 0.006647969253142204, "grad_norm": 0.38457608222961426, "learning_rate": 9.998909559503048e-05, "loss": 2.0334, "step": 64 }, { "epoch": 0.006751843772722551, "grad_norm": 0.35168957710266113, "learning_rate": 9.998875218304345e-05, "loss": 2.0735, "step": 65 }, { "epoch": 0.006855718292302898, "grad_norm": 0.3252507746219635, "learning_rate": 9.998840344764422e-05, "loss": 1.7135, "step": 66 }, { "epoch": 0.006959592811883245, "grad_norm": 0.353354275226593, "learning_rate": 9.998804938886997e-05, "loss": 1.9234, "step": 67 }, { "epoch": 0.007063467331463592, "grad_norm": 0.3150123357772827, "learning_rate": 9.998769000675836e-05, "loss": 1.6909, "step": 68 }, { "epoch": 0.007167341851043939, "grad_norm": 0.33036699891090393, "learning_rate": 9.99873253013477e-05, "loss": 1.7841, "step": 69 }, { "epoch": 0.007271216370624286, "grad_norm": 0.3279257118701935, "learning_rate": 9.998695527267681e-05, "loss": 1.8755, "step": 70 }, { "epoch": 0.007375090890204633, "grad_norm": 0.3096674978733063, "learning_rate": 9.998657992078509e-05, "loss": 1.8373, "step": 71 }, { "epoch": 0.00747896540978498, "grad_norm": 0.3656545877456665, "learning_rate": 9.998619924571251e-05, "loss": 2.0858, "step": 72 }, { "epoch": 0.007582839929365327, "grad_norm": 0.31613457202911377, "learning_rate": 9.998581324749964e-05, "loss": 1.7906, "step": 73 }, { "epoch": 0.007686714448945674, "grad_norm": 0.3255898356437683, "learning_rate": 9.998542192618755e-05, "loss": 1.8037, "step": 74 }, { "epoch": 0.007790588968526021, "grad_norm": 0.33904996514320374, "learning_rate": 9.998502528181793e-05, "loss": 1.6548, "step": 75 }, { "epoch": 0.007894463488106367, "grad_norm": 0.3505510985851288, "learning_rate": 9.998462331443301e-05, "loss": 2.0898, "step": 76 }, { "epoch": 0.007998338007686715, "grad_norm": 0.3301371932029724, "learning_rate": 9.99842160240756e-05, "loss": 1.9709, "step": 77 }, { "epoch": 0.008102212527267061, "grad_norm": 0.3287624418735504, "learning_rate": 9.998380341078909e-05, "loss": 1.6921, "step": 78 }, { "epoch": 0.008206087046847409, "grad_norm": 0.3255762755870819, "learning_rate": 9.99833854746174e-05, "loss": 1.8009, "step": 79 }, { "epoch": 0.008309961566427755, "grad_norm": 0.3210470378398895, "learning_rate": 9.998296221560504e-05, "loss": 1.9141, "step": 80 }, { "epoch": 0.008413836086008103, "grad_norm": 0.3226737082004547, "learning_rate": 9.998253363379708e-05, "loss": 1.8365, "step": 81 }, { "epoch": 0.008517710605588449, "grad_norm": 0.3540826439857483, "learning_rate": 9.998209972923918e-05, "loss": 2.0948, "step": 82 }, { "epoch": 0.008621585125168797, "grad_norm": 0.3137127161026001, "learning_rate": 9.998166050197751e-05, "loss": 1.8404, "step": 83 }, { "epoch": 0.008725459644749143, "grad_norm": 0.3495839536190033, "learning_rate": 9.998121595205889e-05, "loss": 1.8201, "step": 84 }, { "epoch": 0.00882933416432949, "grad_norm": 0.3384789824485779, "learning_rate": 9.998076607953064e-05, "loss": 1.9134, "step": 85 }, { "epoch": 0.008933208683909837, "grad_norm": 0.3618725836277008, "learning_rate": 9.998031088444067e-05, "loss": 1.9558, "step": 86 }, { "epoch": 0.009037083203490184, "grad_norm": 0.370078444480896, "learning_rate": 9.997985036683744e-05, "loss": 2.045, "step": 87 }, { "epoch": 0.00914095772307053, "grad_norm": 0.32593002915382385, "learning_rate": 9.997938452677e-05, "loss": 1.7929, "step": 88 }, { "epoch": 0.009244832242650878, "grad_norm": 0.3358675241470337, "learning_rate": 9.997891336428797e-05, "loss": 1.8742, "step": 89 }, { "epoch": 0.009348706762231224, "grad_norm": 0.35875624418258667, "learning_rate": 9.997843687944152e-05, "loss": 1.9634, "step": 90 }, { "epoch": 0.009452581281811572, "grad_norm": 0.3237409293651581, "learning_rate": 9.997795507228139e-05, "loss": 1.8137, "step": 91 }, { "epoch": 0.009556455801391918, "grad_norm": 0.3336571455001831, "learning_rate": 9.997746794285887e-05, "loss": 1.9342, "step": 92 }, { "epoch": 0.009660330320972266, "grad_norm": 0.3358410894870758, "learning_rate": 9.997697549122586e-05, "loss": 1.8772, "step": 93 }, { "epoch": 0.009764204840552612, "grad_norm": 0.3093462884426117, "learning_rate": 9.997647771743482e-05, "loss": 1.8048, "step": 94 }, { "epoch": 0.00986807936013296, "grad_norm": 0.34004223346710205, "learning_rate": 9.997597462153871e-05, "loss": 1.8228, "step": 95 }, { "epoch": 0.009971953879713306, "grad_norm": 0.33898910880088806, "learning_rate": 9.997546620359114e-05, "loss": 1.8203, "step": 96 }, { "epoch": 0.010075828399293654, "grad_norm": 0.3222779929637909, "learning_rate": 9.997495246364624e-05, "loss": 1.7667, "step": 97 }, { "epoch": 0.010179702918874, "grad_norm": 0.3132547438144684, "learning_rate": 9.997443340175872e-05, "loss": 1.7176, "step": 98 }, { "epoch": 0.010283577438454348, "grad_norm": 0.30829909443855286, "learning_rate": 9.997390901798386e-05, "loss": 1.7196, "step": 99 }, { "epoch": 0.010387451958034694, "grad_norm": 0.3239877223968506, "learning_rate": 9.997337931237751e-05, "loss": 1.6993, "step": 100 }, { "epoch": 0.010491326477615042, "grad_norm": 0.34558477997779846, "learning_rate": 9.997284428499605e-05, "loss": 1.9082, "step": 101 }, { "epoch": 0.010595200997195388, "grad_norm": 0.34653279185295105, "learning_rate": 9.99723039358965e-05, "loss": 1.8484, "step": 102 }, { "epoch": 0.010699075516775736, "grad_norm": 0.4017353057861328, "learning_rate": 9.997175826513637e-05, "loss": 2.0352, "step": 103 }, { "epoch": 0.010802950036356082, "grad_norm": 0.3651774227619171, "learning_rate": 9.997120727277378e-05, "loss": 2.1032, "step": 104 }, { "epoch": 0.01090682455593643, "grad_norm": 0.32084140181541443, "learning_rate": 9.997065095886741e-05, "loss": 1.7161, "step": 105 }, { "epoch": 0.011010699075516776, "grad_norm": 0.3476540446281433, "learning_rate": 9.997008932347649e-05, "loss": 1.8586, "step": 106 }, { "epoch": 0.011114573595097123, "grad_norm": 0.3458161950111389, "learning_rate": 9.996952236666084e-05, "loss": 1.8134, "step": 107 }, { "epoch": 0.01121844811467747, "grad_norm": 0.3564072549343109, "learning_rate": 9.996895008848085e-05, "loss": 1.9237, "step": 108 }, { "epoch": 0.011322322634257817, "grad_norm": 0.33512404561042786, "learning_rate": 9.996837248899741e-05, "loss": 1.8897, "step": 109 }, { "epoch": 0.011426197153838163, "grad_norm": 0.36306363344192505, "learning_rate": 9.99677895682721e-05, "loss": 1.9856, "step": 110 }, { "epoch": 0.011530071673418511, "grad_norm": 0.32571837306022644, "learning_rate": 9.996720132636697e-05, "loss": 1.6574, "step": 111 }, { "epoch": 0.011633946192998857, "grad_norm": 0.3236237168312073, "learning_rate": 9.996660776334464e-05, "loss": 1.7604, "step": 112 }, { "epoch": 0.011737820712579203, "grad_norm": 0.34493985772132874, "learning_rate": 9.996600887926834e-05, "loss": 1.766, "step": 113 }, { "epoch": 0.011841695232159551, "grad_norm": 0.34191280603408813, "learning_rate": 9.996540467420186e-05, "loss": 1.7565, "step": 114 }, { "epoch": 0.011945569751739897, "grad_norm": 0.33236685395240784, "learning_rate": 9.996479514820952e-05, "loss": 1.8874, "step": 115 }, { "epoch": 0.012049444271320245, "grad_norm": 0.37998783588409424, "learning_rate": 9.996418030135622e-05, "loss": 2.0761, "step": 116 }, { "epoch": 0.012153318790900591, "grad_norm": 0.34933921694755554, "learning_rate": 9.996356013370747e-05, "loss": 2.0269, "step": 117 }, { "epoch": 0.012257193310480939, "grad_norm": 0.3566916286945343, "learning_rate": 9.99629346453293e-05, "loss": 1.7982, "step": 118 }, { "epoch": 0.012361067830061285, "grad_norm": 0.36033695936203003, "learning_rate": 9.996230383628831e-05, "loss": 1.9449, "step": 119 }, { "epoch": 0.012464942349641633, "grad_norm": 0.34493738412857056, "learning_rate": 9.996166770665167e-05, "loss": 1.9453, "step": 120 }, { "epoch": 0.012568816869221979, "grad_norm": 0.3142033815383911, "learning_rate": 9.996102625648715e-05, "loss": 1.7562, "step": 121 }, { "epoch": 0.012672691388802327, "grad_norm": 0.3889004588127136, "learning_rate": 9.996037948586305e-05, "loss": 1.9439, "step": 122 }, { "epoch": 0.012776565908382673, "grad_norm": 0.32579320669174194, "learning_rate": 9.995972739484822e-05, "loss": 1.7729, "step": 123 }, { "epoch": 0.012880440427963021, "grad_norm": 0.3976999521255493, "learning_rate": 9.995906998351215e-05, "loss": 2.246, "step": 124 }, { "epoch": 0.012984314947543367, "grad_norm": 0.38258862495422363, "learning_rate": 9.99584072519248e-05, "loss": 1.9098, "step": 125 }, { "epoch": 0.013088189467123715, "grad_norm": 0.3394373655319214, "learning_rate": 9.995773920015679e-05, "loss": 1.7569, "step": 126 }, { "epoch": 0.013192063986704061, "grad_norm": 0.3687582015991211, "learning_rate": 9.995706582827922e-05, "loss": 2.0098, "step": 127 }, { "epoch": 0.013295938506284409, "grad_norm": 0.36636659502983093, "learning_rate": 9.995638713636382e-05, "loss": 1.9661, "step": 128 }, { "epoch": 0.013399813025864755, "grad_norm": 0.33423036336898804, "learning_rate": 9.995570312448289e-05, "loss": 1.768, "step": 129 }, { "epoch": 0.013503687545445103, "grad_norm": 0.34405717253685, "learning_rate": 9.995501379270922e-05, "loss": 1.649, "step": 130 }, { "epoch": 0.013607562065025449, "grad_norm": 0.352104514837265, "learning_rate": 9.995431914111624e-05, "loss": 1.904, "step": 131 }, { "epoch": 0.013711436584605797, "grad_norm": 0.34278222918510437, "learning_rate": 9.995361916977795e-05, "loss": 1.9214, "step": 132 }, { "epoch": 0.013815311104186143, "grad_norm": 0.3315337300300598, "learning_rate": 9.995291387876886e-05, "loss": 1.9647, "step": 133 }, { "epoch": 0.01391918562376649, "grad_norm": 0.349161297082901, "learning_rate": 9.99522032681641e-05, "loss": 1.8797, "step": 134 }, { "epoch": 0.014023060143346837, "grad_norm": 0.32067403197288513, "learning_rate": 9.995148733803933e-05, "loss": 1.9065, "step": 135 }, { "epoch": 0.014126934662927184, "grad_norm": 0.336671382188797, "learning_rate": 9.995076608847078e-05, "loss": 1.7649, "step": 136 }, { "epoch": 0.01423080918250753, "grad_norm": 0.3314901292324066, "learning_rate": 9.99500395195353e-05, "loss": 1.7482, "step": 137 }, { "epoch": 0.014334683702087878, "grad_norm": 0.3493605852127075, "learning_rate": 9.994930763131022e-05, "loss": 2.0021, "step": 138 }, { "epoch": 0.014438558221668224, "grad_norm": 0.37969377636909485, "learning_rate": 9.99485704238735e-05, "loss": 1.9293, "step": 139 }, { "epoch": 0.014542432741248572, "grad_norm": 0.31969866156578064, "learning_rate": 9.994782789730363e-05, "loss": 1.789, "step": 140 }, { "epoch": 0.014646307260828918, "grad_norm": 0.3660503625869751, "learning_rate": 9.994708005167972e-05, "loss": 2.0092, "step": 141 }, { "epoch": 0.014750181780409266, "grad_norm": 0.346720427274704, "learning_rate": 9.994632688708138e-05, "loss": 1.9393, "step": 142 }, { "epoch": 0.014854056299989612, "grad_norm": 0.3383733630180359, "learning_rate": 9.994556840358882e-05, "loss": 1.7967, "step": 143 }, { "epoch": 0.01495793081956996, "grad_norm": 0.3345518708229065, "learning_rate": 9.994480460128282e-05, "loss": 1.8901, "step": 144 }, { "epoch": 0.015061805339150306, "grad_norm": 0.34996676445007324, "learning_rate": 9.99440354802447e-05, "loss": 1.8006, "step": 145 }, { "epoch": 0.015165679858730654, "grad_norm": 0.34575289487838745, "learning_rate": 9.994326104055639e-05, "loss": 1.9967, "step": 146 }, { "epoch": 0.015269554378311, "grad_norm": 0.39413705468177795, "learning_rate": 9.994248128230035e-05, "loss": 1.9811, "step": 147 }, { "epoch": 0.015373428897891348, "grad_norm": 0.3441784083843231, "learning_rate": 9.994169620555962e-05, "loss": 1.7302, "step": 148 }, { "epoch": 0.015477303417471694, "grad_norm": 0.32033270597457886, "learning_rate": 9.994090581041781e-05, "loss": 1.8822, "step": 149 }, { "epoch": 0.015581177937052042, "grad_norm": 0.37517550587654114, "learning_rate": 9.994011009695908e-05, "loss": 2.0916, "step": 150 }, { "epoch": 0.01568505245663239, "grad_norm": 0.3081076145172119, "learning_rate": 9.993930906526818e-05, "loss": 1.6987, "step": 151 }, { "epoch": 0.015788926976212734, "grad_norm": 0.31799814105033875, "learning_rate": 9.993850271543039e-05, "loss": 1.6714, "step": 152 }, { "epoch": 0.015892801495793082, "grad_norm": 0.3602600693702698, "learning_rate": 9.99376910475316e-05, "loss": 2.0209, "step": 153 }, { "epoch": 0.01599667601537343, "grad_norm": 0.31287044286727905, "learning_rate": 9.993687406165826e-05, "loss": 1.6677, "step": 154 }, { "epoch": 0.016100550534953777, "grad_norm": 0.34423595666885376, "learning_rate": 9.993605175789733e-05, "loss": 1.863, "step": 155 }, { "epoch": 0.016204425054534122, "grad_norm": 0.3550304174423218, "learning_rate": 9.993522413633643e-05, "loss": 1.9312, "step": 156 }, { "epoch": 0.01630829957411447, "grad_norm": 0.3672383725643158, "learning_rate": 9.993439119706364e-05, "loss": 2.0446, "step": 157 }, { "epoch": 0.016412174093694817, "grad_norm": 0.32233211398124695, "learning_rate": 9.993355294016771e-05, "loss": 1.7413, "step": 158 }, { "epoch": 0.016516048613275165, "grad_norm": 0.33033961057662964, "learning_rate": 9.993270936573788e-05, "loss": 1.8413, "step": 159 }, { "epoch": 0.01661992313285551, "grad_norm": 0.3530619442462921, "learning_rate": 9.9931860473864e-05, "loss": 1.9083, "step": 160 }, { "epoch": 0.016723797652435857, "grad_norm": 0.33920934796333313, "learning_rate": 9.993100626463646e-05, "loss": 1.8213, "step": 161 }, { "epoch": 0.016827672172016205, "grad_norm": 0.3833538889884949, "learning_rate": 9.993014673814624e-05, "loss": 2.1405, "step": 162 }, { "epoch": 0.016931546691596553, "grad_norm": 0.35124388337135315, "learning_rate": 9.992928189448484e-05, "loss": 1.8398, "step": 163 }, { "epoch": 0.017035421211176897, "grad_norm": 0.34892386198043823, "learning_rate": 9.992841173374441e-05, "loss": 1.8151, "step": 164 }, { "epoch": 0.017139295730757245, "grad_norm": 0.33861371874809265, "learning_rate": 9.992753625601756e-05, "loss": 1.7062, "step": 165 }, { "epoch": 0.017243170250337593, "grad_norm": 0.34012502431869507, "learning_rate": 9.992665546139757e-05, "loss": 1.6682, "step": 166 }, { "epoch": 0.017347044769917937, "grad_norm": 0.38989123702049255, "learning_rate": 9.992576934997819e-05, "loss": 1.9738, "step": 167 }, { "epoch": 0.017450919289498285, "grad_norm": 0.37011152505874634, "learning_rate": 9.992487792185383e-05, "loss": 1.9593, "step": 168 }, { "epoch": 0.017554793809078633, "grad_norm": 0.341496080160141, "learning_rate": 9.992398117711941e-05, "loss": 1.8838, "step": 169 }, { "epoch": 0.01765866832865898, "grad_norm": 0.3466763496398926, "learning_rate": 9.99230791158704e-05, "loss": 1.7701, "step": 170 }, { "epoch": 0.017762542848239325, "grad_norm": 0.36252373456954956, "learning_rate": 9.992217173820288e-05, "loss": 1.8656, "step": 171 }, { "epoch": 0.017866417367819673, "grad_norm": 0.3116392195224762, "learning_rate": 9.992125904421348e-05, "loss": 1.7344, "step": 172 }, { "epoch": 0.01797029188740002, "grad_norm": 0.3929993808269501, "learning_rate": 9.992034103399939e-05, "loss": 1.6855, "step": 173 }, { "epoch": 0.01807416640698037, "grad_norm": 0.3422747552394867, "learning_rate": 9.991941770765838e-05, "loss": 1.8554, "step": 174 }, { "epoch": 0.018178040926560713, "grad_norm": 0.3650684058666229, "learning_rate": 9.991848906528876e-05, "loss": 1.9407, "step": 175 }, { "epoch": 0.01828191544614106, "grad_norm": 0.35484546422958374, "learning_rate": 9.991755510698944e-05, "loss": 1.8553, "step": 176 }, { "epoch": 0.01838578996572141, "grad_norm": 0.3734646737575531, "learning_rate": 9.991661583285987e-05, "loss": 1.77, "step": 177 }, { "epoch": 0.018489664485301757, "grad_norm": 0.3550145924091339, "learning_rate": 9.991567124300009e-05, "loss": 1.8825, "step": 178 }, { "epoch": 0.0185935390048821, "grad_norm": 0.3555574119091034, "learning_rate": 9.991472133751067e-05, "loss": 1.9889, "step": 179 }, { "epoch": 0.01869741352446245, "grad_norm": 0.32128405570983887, "learning_rate": 9.991376611649279e-05, "loss": 1.683, "step": 180 }, { "epoch": 0.018801288044042797, "grad_norm": 0.3892490863800049, "learning_rate": 9.991280558004815e-05, "loss": 1.9933, "step": 181 }, { "epoch": 0.018905162563623144, "grad_norm": 0.32436904311180115, "learning_rate": 9.991183972827904e-05, "loss": 1.8897, "step": 182 }, { "epoch": 0.01900903708320349, "grad_norm": 0.3443160355091095, "learning_rate": 9.991086856128833e-05, "loss": 1.6934, "step": 183 }, { "epoch": 0.019112911602783837, "grad_norm": 0.3968258202075958, "learning_rate": 9.990989207917944e-05, "loss": 1.9867, "step": 184 }, { "epoch": 0.019216786122364184, "grad_norm": 0.3794998526573181, "learning_rate": 9.990891028205636e-05, "loss": 2.0397, "step": 185 }, { "epoch": 0.019320660641944532, "grad_norm": 0.3027797043323517, "learning_rate": 9.990792317002364e-05, "loss": 1.6816, "step": 186 }, { "epoch": 0.019424535161524877, "grad_norm": 0.3459644913673401, "learning_rate": 9.99069307431864e-05, "loss": 1.8124, "step": 187 }, { "epoch": 0.019528409681105224, "grad_norm": 0.3326679766178131, "learning_rate": 9.990593300165033e-05, "loss": 1.6452, "step": 188 }, { "epoch": 0.019632284200685572, "grad_norm": 0.3605174422264099, "learning_rate": 9.990492994552167e-05, "loss": 2.088, "step": 189 }, { "epoch": 0.01973615872026592, "grad_norm": 0.3342396914958954, "learning_rate": 9.990392157490724e-05, "loss": 1.9396, "step": 190 }, { "epoch": 0.019840033239846264, "grad_norm": 0.37349754571914673, "learning_rate": 9.990290788991443e-05, "loss": 1.9863, "step": 191 }, { "epoch": 0.019943907759426612, "grad_norm": 0.3778427541255951, "learning_rate": 9.99018888906512e-05, "loss": 1.902, "step": 192 }, { "epoch": 0.02004778227900696, "grad_norm": 0.3246352970600128, "learning_rate": 9.990086457722604e-05, "loss": 1.8484, "step": 193 }, { "epoch": 0.020151656798587308, "grad_norm": 0.37238767743110657, "learning_rate": 9.989983494974805e-05, "loss": 1.9543, "step": 194 }, { "epoch": 0.020255531318167652, "grad_norm": 0.37035781145095825, "learning_rate": 9.989880000832686e-05, "loss": 1.8695, "step": 195 }, { "epoch": 0.020359405837748, "grad_norm": 0.3354018032550812, "learning_rate": 9.989775975307272e-05, "loss": 1.7589, "step": 196 }, { "epoch": 0.020463280357328348, "grad_norm": 0.4081863462924957, "learning_rate": 9.989671418409636e-05, "loss": 2.1527, "step": 197 }, { "epoch": 0.020567154876908696, "grad_norm": 0.33487629890441895, "learning_rate": 9.989566330150914e-05, "loss": 1.7132, "step": 198 }, { "epoch": 0.02067102939648904, "grad_norm": 0.3445211350917816, "learning_rate": 9.989460710542301e-05, "loss": 1.8913, "step": 199 }, { "epoch": 0.020774903916069388, "grad_norm": 0.40155649185180664, "learning_rate": 9.98935455959504e-05, "loss": 2.191, "step": 200 }, { "epoch": 0.020878778435649736, "grad_norm": 0.35195931792259216, "learning_rate": 9.989247877320436e-05, "loss": 1.9199, "step": 201 }, { "epoch": 0.020982652955230083, "grad_norm": 0.34090521931648254, "learning_rate": 9.989140663729852e-05, "loss": 1.831, "step": 202 }, { "epoch": 0.021086527474810428, "grad_norm": 0.3321789801120758, "learning_rate": 9.989032918834704e-05, "loss": 1.8922, "step": 203 }, { "epoch": 0.021190401994390776, "grad_norm": 0.321913480758667, "learning_rate": 9.988924642646466e-05, "loss": 1.8271, "step": 204 }, { "epoch": 0.021294276513971123, "grad_norm": 0.39516186714172363, "learning_rate": 9.988815835176668e-05, "loss": 1.9391, "step": 205 }, { "epoch": 0.02139815103355147, "grad_norm": 0.35352519154548645, "learning_rate": 9.9887064964369e-05, "loss": 1.6495, "step": 206 }, { "epoch": 0.021502025553131816, "grad_norm": 0.3435503840446472, "learning_rate": 9.988596626438801e-05, "loss": 1.8272, "step": 207 }, { "epoch": 0.021605900072712163, "grad_norm": 0.35792356729507446, "learning_rate": 9.988486225194075e-05, "loss": 1.8506, "step": 208 }, { "epoch": 0.02170977459229251, "grad_norm": 0.32657817006111145, "learning_rate": 9.988375292714478e-05, "loss": 1.8219, "step": 209 }, { "epoch": 0.02181364911187286, "grad_norm": 0.35596343874931335, "learning_rate": 9.988263829011822e-05, "loss": 1.8984, "step": 210 }, { "epoch": 0.021917523631453203, "grad_norm": 0.35671266913414, "learning_rate": 9.988151834097979e-05, "loss": 1.8276, "step": 211 }, { "epoch": 0.02202139815103355, "grad_norm": 0.3640732765197754, "learning_rate": 9.988039307984874e-05, "loss": 1.8979, "step": 212 }, { "epoch": 0.0221252726706139, "grad_norm": 0.32779642939567566, "learning_rate": 9.987926250684491e-05, "loss": 1.8392, "step": 213 }, { "epoch": 0.022229147190194247, "grad_norm": 0.3471597731113434, "learning_rate": 9.98781266220887e-05, "loss": 1.8287, "step": 214 }, { "epoch": 0.02233302170977459, "grad_norm": 0.3553629219532013, "learning_rate": 9.987698542570107e-05, "loss": 1.8612, "step": 215 }, { "epoch": 0.02243689622935494, "grad_norm": 0.3513118028640747, "learning_rate": 9.987583891780355e-05, "loss": 1.9711, "step": 216 }, { "epoch": 0.022540770748935287, "grad_norm": 0.3648374378681183, "learning_rate": 9.987468709851823e-05, "loss": 1.7615, "step": 217 }, { "epoch": 0.022644645268515635, "grad_norm": 0.3236382007598877, "learning_rate": 9.987352996796777e-05, "loss": 1.694, "step": 218 }, { "epoch": 0.02274851978809598, "grad_norm": 0.3441942632198334, "learning_rate": 9.98723675262754e-05, "loss": 1.7834, "step": 219 }, { "epoch": 0.022852394307676327, "grad_norm": 0.3617098331451416, "learning_rate": 9.987119977356491e-05, "loss": 1.8788, "step": 220 }, { "epoch": 0.022956268827256675, "grad_norm": 0.3559406101703644, "learning_rate": 9.987002670996064e-05, "loss": 1.9663, "step": 221 }, { "epoch": 0.023060143346837023, "grad_norm": 0.3278948664665222, "learning_rate": 9.986884833558754e-05, "loss": 1.822, "step": 222 }, { "epoch": 0.023164017866417367, "grad_norm": 0.3330172896385193, "learning_rate": 9.986766465057108e-05, "loss": 1.8147, "step": 223 }, { "epoch": 0.023267892385997715, "grad_norm": 0.3398604094982147, "learning_rate": 9.986647565503731e-05, "loss": 1.7854, "step": 224 }, { "epoch": 0.023371766905578063, "grad_norm": 0.2998906970024109, "learning_rate": 9.986528134911288e-05, "loss": 1.6783, "step": 225 }, { "epoch": 0.023475641425158407, "grad_norm": 0.3525906801223755, "learning_rate": 9.986408173292492e-05, "loss": 1.9996, "step": 226 }, { "epoch": 0.023579515944738755, "grad_norm": 0.34057801961898804, "learning_rate": 9.986287680660123e-05, "loss": 1.8241, "step": 227 }, { "epoch": 0.023683390464319103, "grad_norm": 0.3602697551250458, "learning_rate": 9.98616665702701e-05, "loss": 1.9336, "step": 228 }, { "epoch": 0.02378726498389945, "grad_norm": 0.31595826148986816, "learning_rate": 9.986045102406042e-05, "loss": 1.7719, "step": 229 }, { "epoch": 0.023891139503479795, "grad_norm": 0.34162575006484985, "learning_rate": 9.985923016810163e-05, "loss": 1.8026, "step": 230 }, { "epoch": 0.023995014023060143, "grad_norm": 0.3397705852985382, "learning_rate": 9.985800400252374e-05, "loss": 1.9196, "step": 231 }, { "epoch": 0.02409888854264049, "grad_norm": 0.3344639539718628, "learning_rate": 9.985677252745733e-05, "loss": 1.8512, "step": 232 }, { "epoch": 0.024202763062220838, "grad_norm": 0.44500732421875, "learning_rate": 9.985553574303354e-05, "loss": 1.8336, "step": 233 }, { "epoch": 0.024306637581801183, "grad_norm": 0.3289027512073517, "learning_rate": 9.98542936493841e-05, "loss": 1.6927, "step": 234 }, { "epoch": 0.02441051210138153, "grad_norm": 0.3510255217552185, "learning_rate": 9.985304624664125e-05, "loss": 1.8582, "step": 235 }, { "epoch": 0.024514386620961878, "grad_norm": 0.3442867696285248, "learning_rate": 9.985179353493785e-05, "loss": 1.9107, "step": 236 }, { "epoch": 0.024618261140542226, "grad_norm": 0.3105999529361725, "learning_rate": 9.98505355144073e-05, "loss": 1.6351, "step": 237 }, { "epoch": 0.02472213566012257, "grad_norm": 0.3260045647621155, "learning_rate": 9.984927218518356e-05, "loss": 1.7564, "step": 238 }, { "epoch": 0.024826010179702918, "grad_norm": 0.32900920510292053, "learning_rate": 9.984800354740117e-05, "loss": 1.7372, "step": 239 }, { "epoch": 0.024929884699283266, "grad_norm": 0.3402916193008423, "learning_rate": 9.984672960119523e-05, "loss": 1.7131, "step": 240 }, { "epoch": 0.025033759218863614, "grad_norm": 0.35345762968063354, "learning_rate": 9.984545034670142e-05, "loss": 1.7636, "step": 241 }, { "epoch": 0.025137633738443958, "grad_norm": 0.3537079393863678, "learning_rate": 9.984416578405596e-05, "loss": 1.9847, "step": 242 }, { "epoch": 0.025241508258024306, "grad_norm": 0.3712044358253479, "learning_rate": 9.984287591339562e-05, "loss": 2.0124, "step": 243 }, { "epoch": 0.025345382777604654, "grad_norm": 0.3348478376865387, "learning_rate": 9.98415807348578e-05, "loss": 1.8527, "step": 244 }, { "epoch": 0.025449257297185002, "grad_norm": 0.3452380299568176, "learning_rate": 9.984028024858041e-05, "loss": 1.8608, "step": 245 }, { "epoch": 0.025553131816765346, "grad_norm": 0.38733747601509094, "learning_rate": 9.983897445470194e-05, "loss": 2.029, "step": 246 }, { "epoch": 0.025657006336345694, "grad_norm": 0.3828756809234619, "learning_rate": 9.983766335336144e-05, "loss": 1.9682, "step": 247 }, { "epoch": 0.025760880855926042, "grad_norm": 0.38744086027145386, "learning_rate": 9.983634694469855e-05, "loss": 1.6414, "step": 248 }, { "epoch": 0.02586475537550639, "grad_norm": 0.3316837251186371, "learning_rate": 9.983502522885347e-05, "loss": 1.7982, "step": 249 }, { "epoch": 0.025968629895086734, "grad_norm": 0.3396342694759369, "learning_rate": 9.983369820596691e-05, "loss": 1.8246, "step": 250 }, { "epoch": 0.026072504414667082, "grad_norm": 0.3051803708076477, "learning_rate": 9.98323658761802e-05, "loss": 1.733, "step": 251 }, { "epoch": 0.02617637893424743, "grad_norm": 0.31522175669670105, "learning_rate": 9.983102823963524e-05, "loss": 1.6995, "step": 252 }, { "epoch": 0.026280253453827777, "grad_norm": 0.33415964245796204, "learning_rate": 9.982968529647447e-05, "loss": 1.5215, "step": 253 }, { "epoch": 0.026384127973408122, "grad_norm": 0.35238194465637207, "learning_rate": 9.982833704684091e-05, "loss": 1.8707, "step": 254 }, { "epoch": 0.02648800249298847, "grad_norm": 0.3153392970561981, "learning_rate": 9.982698349087812e-05, "loss": 1.7123, "step": 255 }, { "epoch": 0.026591877012568817, "grad_norm": 0.3397294282913208, "learning_rate": 9.982562462873026e-05, "loss": 1.7335, "step": 256 }, { "epoch": 0.026695751532149165, "grad_norm": 0.3764454126358032, "learning_rate": 9.982426046054204e-05, "loss": 1.9431, "step": 257 }, { "epoch": 0.02679962605172951, "grad_norm": 0.3482568860054016, "learning_rate": 9.982289098645872e-05, "loss": 1.8024, "step": 258 }, { "epoch": 0.026903500571309857, "grad_norm": 0.3208499550819397, "learning_rate": 9.982151620662612e-05, "loss": 1.8129, "step": 259 }, { "epoch": 0.027007375090890205, "grad_norm": 0.3308181166648865, "learning_rate": 9.98201361211907e-05, "loss": 1.8653, "step": 260 }, { "epoch": 0.027111249610470553, "grad_norm": 0.3466727137565613, "learning_rate": 9.981875073029938e-05, "loss": 1.7716, "step": 261 }, { "epoch": 0.027215124130050897, "grad_norm": 0.33778107166290283, "learning_rate": 9.981736003409971e-05, "loss": 1.8481, "step": 262 }, { "epoch": 0.027318998649631245, "grad_norm": 0.367851197719574, "learning_rate": 9.981596403273978e-05, "loss": 2.0175, "step": 263 }, { "epoch": 0.027422873169211593, "grad_norm": 0.3659020662307739, "learning_rate": 9.981456272636826e-05, "loss": 1.6928, "step": 264 }, { "epoch": 0.02752674768879194, "grad_norm": 0.35648471117019653, "learning_rate": 9.981315611513438e-05, "loss": 1.9024, "step": 265 }, { "epoch": 0.027630622208372285, "grad_norm": 0.3503887355327606, "learning_rate": 9.981174419918795e-05, "loss": 1.8439, "step": 266 }, { "epoch": 0.027734496727952633, "grad_norm": 0.37520813941955566, "learning_rate": 9.981032697867929e-05, "loss": 2.0282, "step": 267 }, { "epoch": 0.02783837124753298, "grad_norm": 0.3300826847553253, "learning_rate": 9.980890445375934e-05, "loss": 1.7793, "step": 268 }, { "epoch": 0.02794224576711333, "grad_norm": 0.32417240738868713, "learning_rate": 9.980747662457961e-05, "loss": 1.8332, "step": 269 }, { "epoch": 0.028046120286693673, "grad_norm": 0.32837119698524475, "learning_rate": 9.980604349129211e-05, "loss": 1.6546, "step": 270 }, { "epoch": 0.02814999480627402, "grad_norm": 0.36148953437805176, "learning_rate": 9.980460505404949e-05, "loss": 1.9148, "step": 271 }, { "epoch": 0.02825386932585437, "grad_norm": 0.3625042736530304, "learning_rate": 9.980316131300493e-05, "loss": 1.8963, "step": 272 }, { "epoch": 0.028357743845434717, "grad_norm": 0.322068452835083, "learning_rate": 9.980171226831216e-05, "loss": 1.7182, "step": 273 }, { "epoch": 0.02846161836501506, "grad_norm": 0.3443426787853241, "learning_rate": 9.980025792012551e-05, "loss": 1.7612, "step": 274 }, { "epoch": 0.02856549288459541, "grad_norm": 0.3243624269962311, "learning_rate": 9.979879826859983e-05, "loss": 1.8107, "step": 275 }, { "epoch": 0.028669367404175757, "grad_norm": 0.34362664818763733, "learning_rate": 9.97973333138906e-05, "loss": 1.8116, "step": 276 }, { "epoch": 0.028773241923756104, "grad_norm": 0.3423418402671814, "learning_rate": 9.97958630561538e-05, "loss": 1.968, "step": 277 }, { "epoch": 0.02887711644333645, "grad_norm": 0.3265102207660675, "learning_rate": 9.9794387495546e-05, "loss": 1.7281, "step": 278 }, { "epoch": 0.028980990962916797, "grad_norm": 0.34078219532966614, "learning_rate": 9.979290663222434e-05, "loss": 1.8249, "step": 279 }, { "epoch": 0.029084865482497144, "grad_norm": 0.34047967195510864, "learning_rate": 9.979142046634653e-05, "loss": 1.8084, "step": 280 }, { "epoch": 0.02918874000207749, "grad_norm": 0.36841025948524475, "learning_rate": 9.978992899807084e-05, "loss": 1.9606, "step": 281 }, { "epoch": 0.029292614521657837, "grad_norm": 0.32618024945259094, "learning_rate": 9.978843222755607e-05, "loss": 1.8753, "step": 282 }, { "epoch": 0.029396489041238184, "grad_norm": 0.33189335465431213, "learning_rate": 9.978693015496165e-05, "loss": 1.948, "step": 283 }, { "epoch": 0.029500363560818532, "grad_norm": 0.32388558983802795, "learning_rate": 9.978542278044751e-05, "loss": 1.8796, "step": 284 }, { "epoch": 0.029604238080398877, "grad_norm": 0.34063830971717834, "learning_rate": 9.978391010417418e-05, "loss": 1.8221, "step": 285 }, { "epoch": 0.029708112599979224, "grad_norm": 0.32267820835113525, "learning_rate": 9.978239212630277e-05, "loss": 1.7903, "step": 286 }, { "epoch": 0.029811987119559572, "grad_norm": 0.35702529549598694, "learning_rate": 9.978086884699492e-05, "loss": 1.9183, "step": 287 }, { "epoch": 0.02991586163913992, "grad_norm": 0.36471986770629883, "learning_rate": 9.977934026641282e-05, "loss": 1.8877, "step": 288 }, { "epoch": 0.030019736158720264, "grad_norm": 0.3516945540904999, "learning_rate": 9.977780638471928e-05, "loss": 1.7437, "step": 289 }, { "epoch": 0.030123610678300612, "grad_norm": 0.37206095457077026, "learning_rate": 9.977626720207764e-05, "loss": 1.794, "step": 290 }, { "epoch": 0.03022748519788096, "grad_norm": 0.3756014108657837, "learning_rate": 9.977472271865182e-05, "loss": 1.8116, "step": 291 }, { "epoch": 0.030331359717461308, "grad_norm": 0.3230532705783844, "learning_rate": 9.977317293460631e-05, "loss": 1.7871, "step": 292 }, { "epoch": 0.030435234237041652, "grad_norm": 0.3606550693511963, "learning_rate": 9.97716178501061e-05, "loss": 1.8669, "step": 293 }, { "epoch": 0.030539108756622, "grad_norm": 0.34765157103538513, "learning_rate": 9.977005746531682e-05, "loss": 1.8061, "step": 294 }, { "epoch": 0.030642983276202348, "grad_norm": 0.3419150114059448, "learning_rate": 9.976849178040466e-05, "loss": 1.8189, "step": 295 }, { "epoch": 0.030746857795782696, "grad_norm": 0.33086076378822327, "learning_rate": 9.976692079553633e-05, "loss": 1.7308, "step": 296 }, { "epoch": 0.03085073231536304, "grad_norm": 0.3839011490345001, "learning_rate": 9.976534451087913e-05, "loss": 1.881, "step": 297 }, { "epoch": 0.030954606834943388, "grad_norm": 0.3362378478050232, "learning_rate": 9.976376292660091e-05, "loss": 2.0111, "step": 298 }, { "epoch": 0.031058481354523736, "grad_norm": 0.3804178535938263, "learning_rate": 9.976217604287013e-05, "loss": 2.1103, "step": 299 }, { "epoch": 0.031162355874104083, "grad_norm": 0.38724952936172485, "learning_rate": 9.976058385985575e-05, "loss": 1.8858, "step": 300 }, { "epoch": 0.03126623039368443, "grad_norm": 0.35871621966362, "learning_rate": 9.975898637772734e-05, "loss": 1.7076, "step": 301 }, { "epoch": 0.03137010491326478, "grad_norm": 0.32210198044776917, "learning_rate": 9.975738359665501e-05, "loss": 1.8058, "step": 302 }, { "epoch": 0.03147397943284512, "grad_norm": 0.34519657492637634, "learning_rate": 9.975577551680946e-05, "loss": 1.8827, "step": 303 }, { "epoch": 0.03157785395242547, "grad_norm": 0.3406042456626892, "learning_rate": 9.975416213836193e-05, "loss": 1.801, "step": 304 }, { "epoch": 0.031681728472005816, "grad_norm": 0.33171379566192627, "learning_rate": 9.975254346148422e-05, "loss": 1.7826, "step": 305 }, { "epoch": 0.031785602991586163, "grad_norm": 0.31799814105033875, "learning_rate": 9.975091948634871e-05, "loss": 1.7969, "step": 306 }, { "epoch": 0.03188947751116651, "grad_norm": 0.33427590131759644, "learning_rate": 9.974929021312836e-05, "loss": 1.9499, "step": 307 }, { "epoch": 0.03199335203074686, "grad_norm": 0.336112916469574, "learning_rate": 9.974765564199665e-05, "loss": 1.9003, "step": 308 }, { "epoch": 0.03209722655032721, "grad_norm": 0.33370164036750793, "learning_rate": 9.974601577312768e-05, "loss": 1.8738, "step": 309 }, { "epoch": 0.032201101069907555, "grad_norm": 0.3412269949913025, "learning_rate": 9.974437060669603e-05, "loss": 1.8817, "step": 310 }, { "epoch": 0.032304975589487896, "grad_norm": 0.34361353516578674, "learning_rate": 9.974272014287697e-05, "loss": 1.8391, "step": 311 }, { "epoch": 0.032408850109068243, "grad_norm": 0.3552245795726776, "learning_rate": 9.97410643818462e-05, "loss": 1.8881, "step": 312 }, { "epoch": 0.03251272462864859, "grad_norm": 0.3323882222175598, "learning_rate": 9.973940332378007e-05, "loss": 1.8975, "step": 313 }, { "epoch": 0.03261659914822894, "grad_norm": 0.34055736660957336, "learning_rate": 9.973773696885547e-05, "loss": 1.7162, "step": 314 }, { "epoch": 0.03272047366780929, "grad_norm": 0.3609575927257538, "learning_rate": 9.973606531724985e-05, "loss": 1.9902, "step": 315 }, { "epoch": 0.032824348187389635, "grad_norm": 0.3580215573310852, "learning_rate": 9.973438836914124e-05, "loss": 1.9183, "step": 316 }, { "epoch": 0.03292822270696998, "grad_norm": 0.34166282415390015, "learning_rate": 9.973270612470822e-05, "loss": 1.8884, "step": 317 }, { "epoch": 0.03303209722655033, "grad_norm": 0.3344467878341675, "learning_rate": 9.97310185841299e-05, "loss": 1.9219, "step": 318 }, { "epoch": 0.03313597174613067, "grad_norm": 0.3929234743118286, "learning_rate": 9.972932574758604e-05, "loss": 1.9071, "step": 319 }, { "epoch": 0.03323984626571102, "grad_norm": 0.3376573324203491, "learning_rate": 9.972762761525689e-05, "loss": 1.7418, "step": 320 }, { "epoch": 0.03334372078529137, "grad_norm": 0.33105456829071045, "learning_rate": 9.972592418732327e-05, "loss": 1.7941, "step": 321 }, { "epoch": 0.033447595304871715, "grad_norm": 0.3593199551105499, "learning_rate": 9.972421546396662e-05, "loss": 1.8983, "step": 322 }, { "epoch": 0.03355146982445206, "grad_norm": 0.37045425176620483, "learning_rate": 9.972250144536888e-05, "loss": 1.947, "step": 323 }, { "epoch": 0.03365534434403241, "grad_norm": 0.3378489911556244, "learning_rate": 9.972078213171259e-05, "loss": 1.7318, "step": 324 }, { "epoch": 0.03375921886361276, "grad_norm": 0.354125440120697, "learning_rate": 9.971905752318084e-05, "loss": 1.6396, "step": 325 }, { "epoch": 0.033863093383193106, "grad_norm": 0.36457526683807373, "learning_rate": 9.971732761995728e-05, "loss": 1.8063, "step": 326 }, { "epoch": 0.03396696790277345, "grad_norm": 0.33944860100746155, "learning_rate": 9.971559242222615e-05, "loss": 1.7624, "step": 327 }, { "epoch": 0.034070842422353795, "grad_norm": 0.327573299407959, "learning_rate": 9.971385193017221e-05, "loss": 1.7963, "step": 328 }, { "epoch": 0.03417471694193414, "grad_norm": 0.3168799877166748, "learning_rate": 9.971210614398084e-05, "loss": 1.7648, "step": 329 }, { "epoch": 0.03427859146151449, "grad_norm": 0.3881419003009796, "learning_rate": 9.971035506383792e-05, "loss": 1.8365, "step": 330 }, { "epoch": 0.03438246598109484, "grad_norm": 0.3255634903907776, "learning_rate": 9.970859868992995e-05, "loss": 1.9249, "step": 331 }, { "epoch": 0.034486340500675186, "grad_norm": 0.3285115361213684, "learning_rate": 9.970683702244395e-05, "loss": 1.881, "step": 332 }, { "epoch": 0.034590215020255534, "grad_norm": 0.34438276290893555, "learning_rate": 9.970507006156755e-05, "loss": 1.8085, "step": 333 }, { "epoch": 0.034694089539835875, "grad_norm": 0.347380667924881, "learning_rate": 9.970329780748888e-05, "loss": 1.8762, "step": 334 }, { "epoch": 0.03479796405941622, "grad_norm": 0.32174697518348694, "learning_rate": 9.970152026039672e-05, "loss": 1.7262, "step": 335 }, { "epoch": 0.03490183857899657, "grad_norm": 0.3503490686416626, "learning_rate": 9.969973742048032e-05, "loss": 1.7248, "step": 336 }, { "epoch": 0.03500571309857692, "grad_norm": 0.327619343996048, "learning_rate": 9.969794928792957e-05, "loss": 1.7031, "step": 337 }, { "epoch": 0.035109587618157266, "grad_norm": 0.4252755343914032, "learning_rate": 9.969615586293488e-05, "loss": 2.0297, "step": 338 }, { "epoch": 0.035213462137737614, "grad_norm": 0.36586689949035645, "learning_rate": 9.969435714568722e-05, "loss": 1.9272, "step": 339 }, { "epoch": 0.03531733665731796, "grad_norm": 0.3425196409225464, "learning_rate": 9.969255313637818e-05, "loss": 1.8647, "step": 340 }, { "epoch": 0.03542121117689831, "grad_norm": 0.3617844581604004, "learning_rate": 9.969074383519983e-05, "loss": 1.8622, "step": 341 }, { "epoch": 0.03552508569647865, "grad_norm": 0.33944642543792725, "learning_rate": 9.968892924234487e-05, "loss": 1.7988, "step": 342 }, { "epoch": 0.035628960216059, "grad_norm": 0.3269334137439728, "learning_rate": 9.968710935800652e-05, "loss": 1.8039, "step": 343 }, { "epoch": 0.035732834735639346, "grad_norm": 0.3931077718734741, "learning_rate": 9.968528418237862e-05, "loss": 2.0847, "step": 344 }, { "epoch": 0.035836709255219694, "grad_norm": 0.3752608299255371, "learning_rate": 9.96834537156555e-05, "loss": 2.0207, "step": 345 }, { "epoch": 0.03594058377480004, "grad_norm": 0.33372077345848083, "learning_rate": 9.96816179580321e-05, "loss": 1.9464, "step": 346 }, { "epoch": 0.03604445829438039, "grad_norm": 0.36559945344924927, "learning_rate": 9.967977690970393e-05, "loss": 1.9318, "step": 347 }, { "epoch": 0.03614833281396074, "grad_norm": 0.3152011036872864, "learning_rate": 9.967793057086706e-05, "loss": 1.8081, "step": 348 }, { "epoch": 0.036252207333541085, "grad_norm": 0.32508155703544617, "learning_rate": 9.967607894171804e-05, "loss": 1.7424, "step": 349 }, { "epoch": 0.036356081853121426, "grad_norm": 0.3422446846961975, "learning_rate": 9.967422202245413e-05, "loss": 1.9266, "step": 350 }, { "epoch": 0.036459956372701774, "grad_norm": 0.3278721570968628, "learning_rate": 9.967235981327304e-05, "loss": 1.8344, "step": 351 }, { "epoch": 0.03656383089228212, "grad_norm": 0.32580530643463135, "learning_rate": 9.967049231437309e-05, "loss": 1.7158, "step": 352 }, { "epoch": 0.03666770541186247, "grad_norm": 0.38664954900741577, "learning_rate": 9.966861952595316e-05, "loss": 1.9844, "step": 353 }, { "epoch": 0.03677157993144282, "grad_norm": 0.40230998396873474, "learning_rate": 9.966674144821266e-05, "loss": 2.0037, "step": 354 }, { "epoch": 0.036875454451023165, "grad_norm": 0.33027613162994385, "learning_rate": 9.966485808135165e-05, "loss": 1.7922, "step": 355 }, { "epoch": 0.03697932897060351, "grad_norm": 0.342254102230072, "learning_rate": 9.966296942557062e-05, "loss": 1.8094, "step": 356 }, { "epoch": 0.03708320349018386, "grad_norm": 0.3351558446884155, "learning_rate": 9.966107548107074e-05, "loss": 1.803, "step": 357 }, { "epoch": 0.0371870780097642, "grad_norm": 0.34574928879737854, "learning_rate": 9.96591762480537e-05, "loss": 1.9305, "step": 358 }, { "epoch": 0.03729095252934455, "grad_norm": 0.34656211733818054, "learning_rate": 9.965727172672174e-05, "loss": 1.781, "step": 359 }, { "epoch": 0.0373948270489249, "grad_norm": 0.35084661841392517, "learning_rate": 9.965536191727769e-05, "loss": 1.886, "step": 360 }, { "epoch": 0.037498701568505245, "grad_norm": 0.34373828768730164, "learning_rate": 9.965344681992491e-05, "loss": 1.8182, "step": 361 }, { "epoch": 0.03760257608808559, "grad_norm": 0.33524051308631897, "learning_rate": 9.965152643486738e-05, "loss": 1.8175, "step": 362 }, { "epoch": 0.03770645060766594, "grad_norm": 0.3301374912261963, "learning_rate": 9.964960076230955e-05, "loss": 1.7128, "step": 363 }, { "epoch": 0.03781032512724629, "grad_norm": 0.35539349913597107, "learning_rate": 9.964766980245653e-05, "loss": 1.8029, "step": 364 }, { "epoch": 0.037914199646826637, "grad_norm": 0.3248385190963745, "learning_rate": 9.964573355551394e-05, "loss": 1.6429, "step": 365 }, { "epoch": 0.03801807416640698, "grad_norm": 0.34955573081970215, "learning_rate": 9.964379202168799e-05, "loss": 1.9152, "step": 366 }, { "epoch": 0.038121948685987325, "grad_norm": 0.32366281747817993, "learning_rate": 9.964184520118542e-05, "loss": 1.7536, "step": 367 }, { "epoch": 0.03822582320556767, "grad_norm": 0.3177925646305084, "learning_rate": 9.963989309421356e-05, "loss": 1.7626, "step": 368 }, { "epoch": 0.03832969772514802, "grad_norm": 0.35649746656417847, "learning_rate": 9.96379357009803e-05, "loss": 1.822, "step": 369 }, { "epoch": 0.03843357224472837, "grad_norm": 0.3334449827671051, "learning_rate": 9.963597302169406e-05, "loss": 1.7122, "step": 370 }, { "epoch": 0.038537446764308717, "grad_norm": 0.35319963097572327, "learning_rate": 9.963400505656388e-05, "loss": 1.8249, "step": 371 }, { "epoch": 0.038641321283889064, "grad_norm": 0.34933820366859436, "learning_rate": 9.963203180579932e-05, "loss": 2.1094, "step": 372 }, { "epoch": 0.03874519580346941, "grad_norm": 0.35011550784111023, "learning_rate": 9.963005326961052e-05, "loss": 1.8247, "step": 373 }, { "epoch": 0.03884907032304975, "grad_norm": 0.4046299159526825, "learning_rate": 9.962806944820817e-05, "loss": 2.1071, "step": 374 }, { "epoch": 0.0389529448426301, "grad_norm": 0.3381158113479614, "learning_rate": 9.962608034180353e-05, "loss": 1.8242, "step": 375 }, { "epoch": 0.03905681936221045, "grad_norm": 0.35168975591659546, "learning_rate": 9.962408595060845e-05, "loss": 2.0001, "step": 376 }, { "epoch": 0.039160693881790797, "grad_norm": 0.35159188508987427, "learning_rate": 9.962208627483529e-05, "loss": 1.8534, "step": 377 }, { "epoch": 0.039264568401371144, "grad_norm": 0.31816786527633667, "learning_rate": 9.962008131469703e-05, "loss": 1.6624, "step": 378 }, { "epoch": 0.03936844292095149, "grad_norm": 0.33059608936309814, "learning_rate": 9.961807107040712e-05, "loss": 1.7612, "step": 379 }, { "epoch": 0.03947231744053184, "grad_norm": 0.33961811661720276, "learning_rate": 9.96160555421797e-05, "loss": 1.8432, "step": 380 }, { "epoch": 0.03957619196011219, "grad_norm": 0.3444601595401764, "learning_rate": 9.961403473022939e-05, "loss": 1.8842, "step": 381 }, { "epoch": 0.03968006647969253, "grad_norm": 0.3596336841583252, "learning_rate": 9.961200863477139e-05, "loss": 2.0091, "step": 382 }, { "epoch": 0.039783940999272877, "grad_norm": 0.35697799921035767, "learning_rate": 9.960997725602144e-05, "loss": 1.8124, "step": 383 }, { "epoch": 0.039887815518853224, "grad_norm": 0.33248475193977356, "learning_rate": 9.96079405941959e-05, "loss": 1.9146, "step": 384 }, { "epoch": 0.03999169003843357, "grad_norm": 0.3686772882938385, "learning_rate": 9.960589864951162e-05, "loss": 1.8714, "step": 385 }, { "epoch": 0.04009556455801392, "grad_norm": 0.36085087060928345, "learning_rate": 9.960385142218609e-05, "loss": 1.8268, "step": 386 }, { "epoch": 0.04019943907759427, "grad_norm": 0.32262122631073, "learning_rate": 9.960179891243731e-05, "loss": 1.6027, "step": 387 }, { "epoch": 0.040303313597174616, "grad_norm": 0.361555278301239, "learning_rate": 9.959974112048386e-05, "loss": 1.9221, "step": 388 }, { "epoch": 0.040407188116754963, "grad_norm": 0.3558778166770935, "learning_rate": 9.959767804654487e-05, "loss": 1.8234, "step": 389 }, { "epoch": 0.040511062636335304, "grad_norm": 0.3629186749458313, "learning_rate": 9.959560969084003e-05, "loss": 1.9483, "step": 390 }, { "epoch": 0.04061493715591565, "grad_norm": 0.37084469199180603, "learning_rate": 9.959353605358964e-05, "loss": 1.7024, "step": 391 }, { "epoch": 0.040718811675496, "grad_norm": 0.3552490770816803, "learning_rate": 9.95914571350145e-05, "loss": 1.884, "step": 392 }, { "epoch": 0.04082268619507635, "grad_norm": 0.34589436650276184, "learning_rate": 9.958937293533599e-05, "loss": 1.8548, "step": 393 }, { "epoch": 0.040926560714656696, "grad_norm": 0.3737829327583313, "learning_rate": 9.958728345477608e-05, "loss": 2.0568, "step": 394 }, { "epoch": 0.041030435234237043, "grad_norm": 0.35843873023986816, "learning_rate": 9.958518869355728e-05, "loss": 1.7908, "step": 395 }, { "epoch": 0.04113430975381739, "grad_norm": 0.6880629062652588, "learning_rate": 9.958308865190267e-05, "loss": 1.8225, "step": 396 }, { "epoch": 0.04123818427339773, "grad_norm": 0.3438432812690735, "learning_rate": 9.958098333003588e-05, "loss": 1.7719, "step": 397 }, { "epoch": 0.04134205879297808, "grad_norm": 0.3586975634098053, "learning_rate": 9.957887272818112e-05, "loss": 1.9513, "step": 398 }, { "epoch": 0.04144593331255843, "grad_norm": 0.3421470522880554, "learning_rate": 9.957675684656312e-05, "loss": 1.8509, "step": 399 }, { "epoch": 0.041549807832138776, "grad_norm": 0.3313000202178955, "learning_rate": 9.957463568540725e-05, "loss": 1.8173, "step": 400 }, { "epoch": 0.041653682351719123, "grad_norm": 0.3544430732727051, "learning_rate": 9.957250924493938e-05, "loss": 1.8059, "step": 401 }, { "epoch": 0.04175755687129947, "grad_norm": 0.36002522706985474, "learning_rate": 9.957037752538592e-05, "loss": 1.9456, "step": 402 }, { "epoch": 0.04186143139087982, "grad_norm": 0.3440776765346527, "learning_rate": 9.956824052697395e-05, "loss": 1.7831, "step": 403 }, { "epoch": 0.04196530591046017, "grad_norm": 0.36796486377716064, "learning_rate": 9.956609824993099e-05, "loss": 1.791, "step": 404 }, { "epoch": 0.04206918043004051, "grad_norm": 0.3968099355697632, "learning_rate": 9.956395069448523e-05, "loss": 1.8728, "step": 405 }, { "epoch": 0.042173054949620856, "grad_norm": 0.3467409610748291, "learning_rate": 9.95617978608653e-05, "loss": 1.8004, "step": 406 }, { "epoch": 0.042276929469201203, "grad_norm": 0.3945915102958679, "learning_rate": 9.955963974930052e-05, "loss": 1.7891, "step": 407 }, { "epoch": 0.04238080398878155, "grad_norm": 0.3563166856765747, "learning_rate": 9.955747636002068e-05, "loss": 1.9755, "step": 408 }, { "epoch": 0.0424846785083619, "grad_norm": 0.4015011191368103, "learning_rate": 9.955530769325616e-05, "loss": 1.6478, "step": 409 }, { "epoch": 0.04258855302794225, "grad_norm": 0.36491334438323975, "learning_rate": 9.955313374923791e-05, "loss": 1.9403, "step": 410 }, { "epoch": 0.042692427547522595, "grad_norm": 0.3891184628009796, "learning_rate": 9.955095452819747e-05, "loss": 2.0413, "step": 411 }, { "epoch": 0.04279630206710294, "grad_norm": 0.3426514267921448, "learning_rate": 9.954877003036687e-05, "loss": 1.6253, "step": 412 }, { "epoch": 0.042900176586683283, "grad_norm": 0.48841214179992676, "learning_rate": 9.954658025597876e-05, "loss": 2.1789, "step": 413 }, { "epoch": 0.04300405110626363, "grad_norm": 0.3180578351020813, "learning_rate": 9.954438520526635e-05, "loss": 1.4562, "step": 414 }, { "epoch": 0.04310792562584398, "grad_norm": 0.3487285077571869, "learning_rate": 9.954218487846334e-05, "loss": 1.7904, "step": 415 }, { "epoch": 0.04321180014542433, "grad_norm": 0.3434458374977112, "learning_rate": 9.953997927580411e-05, "loss": 1.8548, "step": 416 }, { "epoch": 0.043315674665004675, "grad_norm": 0.3450503647327423, "learning_rate": 9.953776839752351e-05, "loss": 1.7331, "step": 417 }, { "epoch": 0.04341954918458502, "grad_norm": 0.33946654200553894, "learning_rate": 9.953555224385698e-05, "loss": 1.883, "step": 418 }, { "epoch": 0.04352342370416537, "grad_norm": 0.37217575311660767, "learning_rate": 9.953333081504052e-05, "loss": 1.6892, "step": 419 }, { "epoch": 0.04362729822374572, "grad_norm": 0.3341975510120392, "learning_rate": 9.953110411131072e-05, "loss": 1.734, "step": 420 }, { "epoch": 0.04373117274332606, "grad_norm": 0.3347923159599304, "learning_rate": 9.952887213290469e-05, "loss": 1.8173, "step": 421 }, { "epoch": 0.04383504726290641, "grad_norm": 0.35578978061676025, "learning_rate": 9.952663488006011e-05, "loss": 1.9877, "step": 422 }, { "epoch": 0.043938921782486755, "grad_norm": 0.3420504331588745, "learning_rate": 9.952439235301522e-05, "loss": 1.8042, "step": 423 }, { "epoch": 0.0440427963020671, "grad_norm": 0.3436269760131836, "learning_rate": 9.952214455200887e-05, "loss": 1.7833, "step": 424 }, { "epoch": 0.04414667082164745, "grad_norm": 0.3772909641265869, "learning_rate": 9.951989147728043e-05, "loss": 1.9778, "step": 425 }, { "epoch": 0.0442505453412278, "grad_norm": 0.3602845370769501, "learning_rate": 9.951763312906979e-05, "loss": 1.8447, "step": 426 }, { "epoch": 0.044354419860808146, "grad_norm": 0.3500402271747589, "learning_rate": 9.951536950761748e-05, "loss": 1.6772, "step": 427 }, { "epoch": 0.044458294380388494, "grad_norm": 0.3483547866344452, "learning_rate": 9.951310061316455e-05, "loss": 1.8328, "step": 428 }, { "epoch": 0.044562168899968835, "grad_norm": 0.3545311391353607, "learning_rate": 9.951082644595264e-05, "loss": 1.8814, "step": 429 }, { "epoch": 0.04466604341954918, "grad_norm": 0.33376967906951904, "learning_rate": 9.95085470062239e-05, "loss": 1.8108, "step": 430 }, { "epoch": 0.04476991793912953, "grad_norm": 0.330287903547287, "learning_rate": 9.950626229422107e-05, "loss": 1.7423, "step": 431 }, { "epoch": 0.04487379245870988, "grad_norm": 0.36695408821105957, "learning_rate": 9.950397231018748e-05, "loss": 1.7873, "step": 432 }, { "epoch": 0.044977666978290226, "grad_norm": 0.3538840711116791, "learning_rate": 9.950167705436699e-05, "loss": 1.9949, "step": 433 }, { "epoch": 0.045081541497870574, "grad_norm": 0.31848835945129395, "learning_rate": 9.949937652700402e-05, "loss": 1.5736, "step": 434 }, { "epoch": 0.04518541601745092, "grad_norm": 0.3445279598236084, "learning_rate": 9.949707072834356e-05, "loss": 1.7662, "step": 435 }, { "epoch": 0.04528929053703127, "grad_norm": 0.37137672305107117, "learning_rate": 9.949475965863116e-05, "loss": 1.8209, "step": 436 }, { "epoch": 0.04539316505661161, "grad_norm": 0.43532824516296387, "learning_rate": 9.949244331811293e-05, "loss": 2.1643, "step": 437 }, { "epoch": 0.04549703957619196, "grad_norm": 0.35221028327941895, "learning_rate": 9.949012170703556e-05, "loss": 1.8557, "step": 438 }, { "epoch": 0.045600914095772306, "grad_norm": 0.34177568554878235, "learning_rate": 9.948779482564624e-05, "loss": 1.7119, "step": 439 }, { "epoch": 0.045704788615352654, "grad_norm": 0.3455469012260437, "learning_rate": 9.948546267419278e-05, "loss": 1.8411, "step": 440 }, { "epoch": 0.045808663134933, "grad_norm": 0.34957823157310486, "learning_rate": 9.948312525292358e-05, "loss": 1.7674, "step": 441 }, { "epoch": 0.04591253765451335, "grad_norm": 0.3494671583175659, "learning_rate": 9.948078256208751e-05, "loss": 1.9521, "step": 442 }, { "epoch": 0.0460164121740937, "grad_norm": 0.3724217116832733, "learning_rate": 9.947843460193407e-05, "loss": 1.9358, "step": 443 }, { "epoch": 0.046120286693674045, "grad_norm": 0.34524059295654297, "learning_rate": 9.947608137271328e-05, "loss": 1.8426, "step": 444 }, { "epoch": 0.046224161213254386, "grad_norm": 0.3344314396381378, "learning_rate": 9.947372287467576e-05, "loss": 1.7263, "step": 445 }, { "epoch": 0.046328035732834734, "grad_norm": 0.3640967309474945, "learning_rate": 9.947135910807265e-05, "loss": 1.9716, "step": 446 }, { "epoch": 0.04643191025241508, "grad_norm": 0.38016462326049805, "learning_rate": 9.946899007315569e-05, "loss": 1.7952, "step": 447 }, { "epoch": 0.04653578477199543, "grad_norm": 0.35454219579696655, "learning_rate": 9.946661577017717e-05, "loss": 1.8804, "step": 448 }, { "epoch": 0.04663965929157578, "grad_norm": 0.3755532503128052, "learning_rate": 9.946423619938992e-05, "loss": 2.0071, "step": 449 }, { "epoch": 0.046743533811156125, "grad_norm": 0.3422885537147522, "learning_rate": 9.946185136104735e-05, "loss": 1.6811, "step": 450 }, { "epoch": 0.04684740833073647, "grad_norm": 0.33953240513801575, "learning_rate": 9.945946125540343e-05, "loss": 1.7127, "step": 451 }, { "epoch": 0.046951282850316814, "grad_norm": 0.313754677772522, "learning_rate": 9.945706588271269e-05, "loss": 1.7116, "step": 452 }, { "epoch": 0.04705515736989716, "grad_norm": 0.3217809200286865, "learning_rate": 9.94546652432302e-05, "loss": 1.7475, "step": 453 }, { "epoch": 0.04715903188947751, "grad_norm": 0.35068783164024353, "learning_rate": 9.945225933721163e-05, "loss": 1.7588, "step": 454 }, { "epoch": 0.04726290640905786, "grad_norm": 0.32471030950546265, "learning_rate": 9.944984816491318e-05, "loss": 1.6391, "step": 455 }, { "epoch": 0.047366780928638205, "grad_norm": 0.3416842222213745, "learning_rate": 9.944743172659164e-05, "loss": 1.7689, "step": 456 }, { "epoch": 0.04747065544821855, "grad_norm": 0.3535037934780121, "learning_rate": 9.94450100225043e-05, "loss": 1.6911, "step": 457 }, { "epoch": 0.0475745299677989, "grad_norm": 0.3632698953151703, "learning_rate": 9.944258305290908e-05, "loss": 1.949, "step": 458 }, { "epoch": 0.04767840448737925, "grad_norm": 0.38424357771873474, "learning_rate": 9.944015081806445e-05, "loss": 1.9012, "step": 459 }, { "epoch": 0.04778227900695959, "grad_norm": 0.3942817747592926, "learning_rate": 9.94377133182294e-05, "loss": 2.0238, "step": 460 }, { "epoch": 0.04788615352653994, "grad_norm": 0.3337697684764862, "learning_rate": 9.943527055366351e-05, "loss": 1.7753, "step": 461 }, { "epoch": 0.047990028046120285, "grad_norm": 0.3492131233215332, "learning_rate": 9.94328225246269e-05, "loss": 1.7137, "step": 462 }, { "epoch": 0.04809390256570063, "grad_norm": 0.3351963758468628, "learning_rate": 9.943036923138032e-05, "loss": 1.7599, "step": 463 }, { "epoch": 0.04819777708528098, "grad_norm": 0.33879536390304565, "learning_rate": 9.942791067418496e-05, "loss": 1.7868, "step": 464 }, { "epoch": 0.04830165160486133, "grad_norm": 0.3504945933818817, "learning_rate": 9.942544685330267e-05, "loss": 1.93, "step": 465 }, { "epoch": 0.048405526124441677, "grad_norm": 0.3818790912628174, "learning_rate": 9.942297776899583e-05, "loss": 1.9025, "step": 466 }, { "epoch": 0.048509400644022024, "grad_norm": 0.346236914396286, "learning_rate": 9.942050342152736e-05, "loss": 1.7295, "step": 467 }, { "epoch": 0.048613275163602365, "grad_norm": 0.39538463950157166, "learning_rate": 9.941802381116078e-05, "loss": 1.5522, "step": 468 }, { "epoch": 0.04871714968318271, "grad_norm": 0.35648438334465027, "learning_rate": 9.941553893816014e-05, "loss": 1.7374, "step": 469 }, { "epoch": 0.04882102420276306, "grad_norm": 0.3466714024543762, "learning_rate": 9.941304880279005e-05, "loss": 1.7781, "step": 470 }, { "epoch": 0.04892489872234341, "grad_norm": 0.38975635170936584, "learning_rate": 9.941055340531571e-05, "loss": 1.9754, "step": 471 }, { "epoch": 0.049028773241923757, "grad_norm": 0.3584454655647278, "learning_rate": 9.940805274600285e-05, "loss": 1.8484, "step": 472 }, { "epoch": 0.049132647761504104, "grad_norm": 0.33715423941612244, "learning_rate": 9.940554682511775e-05, "loss": 1.9061, "step": 473 }, { "epoch": 0.04923652228108445, "grad_norm": 0.35068365931510925, "learning_rate": 9.940303564292731e-05, "loss": 1.759, "step": 474 }, { "epoch": 0.0493403968006648, "grad_norm": 0.37917405366897583, "learning_rate": 9.940051919969893e-05, "loss": 1.8638, "step": 475 }, { "epoch": 0.04944427132024514, "grad_norm": 0.38259661197662354, "learning_rate": 9.939799749570059e-05, "loss": 1.9465, "step": 476 }, { "epoch": 0.04954814583982549, "grad_norm": 0.334435373544693, "learning_rate": 9.939547053120084e-05, "loss": 1.8029, "step": 477 }, { "epoch": 0.049652020359405837, "grad_norm": 0.3447658121585846, "learning_rate": 9.939293830646878e-05, "loss": 1.6021, "step": 478 }, { "epoch": 0.049755894878986184, "grad_norm": 0.3410322964191437, "learning_rate": 9.939040082177407e-05, "loss": 1.9107, "step": 479 }, { "epoch": 0.04985976939856653, "grad_norm": 0.3771478831768036, "learning_rate": 9.938785807738693e-05, "loss": 2.0531, "step": 480 }, { "epoch": 0.04996364391814688, "grad_norm": 0.421427845954895, "learning_rate": 9.938531007357813e-05, "loss": 1.9953, "step": 481 }, { "epoch": 0.05006751843772723, "grad_norm": 0.347931444644928, "learning_rate": 9.938275681061903e-05, "loss": 1.75, "step": 482 }, { "epoch": 0.050171392957307576, "grad_norm": 0.38480523228645325, "learning_rate": 9.938019828878155e-05, "loss": 1.9639, "step": 483 }, { "epoch": 0.050275267476887917, "grad_norm": 0.3436914384365082, "learning_rate": 9.937763450833813e-05, "loss": 1.8305, "step": 484 }, { "epoch": 0.050379141996468264, "grad_norm": 0.3643799126148224, "learning_rate": 9.93750654695618e-05, "loss": 1.8667, "step": 485 }, { "epoch": 0.05048301651604861, "grad_norm": 0.3727877140045166, "learning_rate": 9.937249117272612e-05, "loss": 1.8097, "step": 486 }, { "epoch": 0.05058689103562896, "grad_norm": 0.34742361307144165, "learning_rate": 9.936991161810525e-05, "loss": 1.872, "step": 487 }, { "epoch": 0.05069076555520931, "grad_norm": 0.34192922711372375, "learning_rate": 9.93673268059739e-05, "loss": 1.777, "step": 488 }, { "epoch": 0.050794640074789656, "grad_norm": 0.34614014625549316, "learning_rate": 9.936473673660733e-05, "loss": 1.8364, "step": 489 }, { "epoch": 0.050898514594370003, "grad_norm": 0.3394782841205597, "learning_rate": 9.936214141028135e-05, "loss": 1.7858, "step": 490 }, { "epoch": 0.05100238911395035, "grad_norm": 0.3589742183685303, "learning_rate": 9.935954082727235e-05, "loss": 2.1136, "step": 491 }, { "epoch": 0.05110626363353069, "grad_norm": 0.3754216134548187, "learning_rate": 9.935693498785728e-05, "loss": 1.8873, "step": 492 }, { "epoch": 0.05121013815311104, "grad_norm": 0.3502977192401886, "learning_rate": 9.93543238923136e-05, "loss": 1.6858, "step": 493 }, { "epoch": 0.05131401267269139, "grad_norm": 0.3583030104637146, "learning_rate": 9.935170754091945e-05, "loss": 1.8528, "step": 494 }, { "epoch": 0.051417887192271736, "grad_norm": 0.3400440216064453, "learning_rate": 9.934908593395338e-05, "loss": 1.7657, "step": 495 }, { "epoch": 0.051521761711852083, "grad_norm": 0.31973132491111755, "learning_rate": 9.934645907169459e-05, "loss": 1.7152, "step": 496 }, { "epoch": 0.05162563623143243, "grad_norm": 0.33886149525642395, "learning_rate": 9.934382695442284e-05, "loss": 1.6483, "step": 497 }, { "epoch": 0.05172951075101278, "grad_norm": 0.3560135066509247, "learning_rate": 9.934118958241842e-05, "loss": 1.8188, "step": 498 }, { "epoch": 0.05183338527059313, "grad_norm": 0.32066768407821655, "learning_rate": 9.933854695596216e-05, "loss": 1.6657, "step": 499 }, { "epoch": 0.05193725979017347, "grad_norm": 0.3544224798679352, "learning_rate": 9.933589907533554e-05, "loss": 1.7714, "step": 500 }, { "epoch": 0.052041134309753816, "grad_norm": 0.3491422235965729, "learning_rate": 9.933324594082048e-05, "loss": 1.9087, "step": 501 }, { "epoch": 0.052145008829334163, "grad_norm": 0.32746124267578125, "learning_rate": 9.933058755269955e-05, "loss": 1.8386, "step": 502 }, { "epoch": 0.05224888334891451, "grad_norm": 0.32933396100997925, "learning_rate": 9.932792391125583e-05, "loss": 1.7782, "step": 503 }, { "epoch": 0.05235275786849486, "grad_norm": 0.35885295271873474, "learning_rate": 9.932525501677299e-05, "loss": 1.8822, "step": 504 }, { "epoch": 0.05245663238807521, "grad_norm": 0.3308489918708801, "learning_rate": 9.932258086953525e-05, "loss": 1.8777, "step": 505 }, { "epoch": 0.052560506907655555, "grad_norm": 0.32857421040534973, "learning_rate": 9.931990146982736e-05, "loss": 1.6693, "step": 506 }, { "epoch": 0.052664381427235896, "grad_norm": 0.37959519028663635, "learning_rate": 9.931721681793468e-05, "loss": 1.6766, "step": 507 }, { "epoch": 0.052768255946816243, "grad_norm": 0.39978745579719543, "learning_rate": 9.931452691414311e-05, "loss": 1.9316, "step": 508 }, { "epoch": 0.05287213046639659, "grad_norm": 0.36127495765686035, "learning_rate": 9.93118317587391e-05, "loss": 1.9157, "step": 509 }, { "epoch": 0.05297600498597694, "grad_norm": 0.3422827422618866, "learning_rate": 9.930913135200963e-05, "loss": 1.718, "step": 510 }, { "epoch": 0.05307987950555729, "grad_norm": 0.3710615932941437, "learning_rate": 9.930642569424231e-05, "loss": 1.7702, "step": 511 }, { "epoch": 0.053183754025137635, "grad_norm": 0.3597875237464905, "learning_rate": 9.930371478572526e-05, "loss": 1.6665, "step": 512 }, { "epoch": 0.05328762854471798, "grad_norm": 0.3505166172981262, "learning_rate": 9.930099862674716e-05, "loss": 1.7484, "step": 513 }, { "epoch": 0.05339150306429833, "grad_norm": 0.343287855386734, "learning_rate": 9.929827721759728e-05, "loss": 1.9197, "step": 514 }, { "epoch": 0.05349537758387867, "grad_norm": 0.35222482681274414, "learning_rate": 9.92955505585654e-05, "loss": 1.8467, "step": 515 }, { "epoch": 0.05359925210345902, "grad_norm": 0.39000675082206726, "learning_rate": 9.929281864994193e-05, "loss": 1.8709, "step": 516 }, { "epoch": 0.05370312662303937, "grad_norm": 0.3652053773403168, "learning_rate": 9.929008149201774e-05, "loss": 1.9486, "step": 517 }, { "epoch": 0.053807001142619715, "grad_norm": 0.3311000168323517, "learning_rate": 9.928733908508438e-05, "loss": 1.7749, "step": 518 }, { "epoch": 0.05391087566220006, "grad_norm": 0.3284410536289215, "learning_rate": 9.928459142943385e-05, "loss": 1.8424, "step": 519 }, { "epoch": 0.05401475018178041, "grad_norm": 0.3507472574710846, "learning_rate": 9.928183852535877e-05, "loss": 1.8719, "step": 520 }, { "epoch": 0.05411862470136076, "grad_norm": 0.35009628534317017, "learning_rate": 9.92790803731523e-05, "loss": 1.8434, "step": 521 }, { "epoch": 0.054222499220941106, "grad_norm": 0.39859047532081604, "learning_rate": 9.927631697310816e-05, "loss": 1.9473, "step": 522 }, { "epoch": 0.05432637374052145, "grad_norm": 0.369045615196228, "learning_rate": 9.927354832552063e-05, "loss": 1.9063, "step": 523 }, { "epoch": 0.054430248260101795, "grad_norm": 0.3442009389400482, "learning_rate": 9.927077443068455e-05, "loss": 1.7318, "step": 524 }, { "epoch": 0.05453412277968214, "grad_norm": 0.3606685996055603, "learning_rate": 9.926799528889534e-05, "loss": 1.8991, "step": 525 }, { "epoch": 0.05463799729926249, "grad_norm": 0.36547592282295227, "learning_rate": 9.926521090044891e-05, "loss": 2.1391, "step": 526 }, { "epoch": 0.05474187181884284, "grad_norm": 0.334673136472702, "learning_rate": 9.926242126564182e-05, "loss": 1.8072, "step": 527 }, { "epoch": 0.054845746338423186, "grad_norm": 0.3335951864719391, "learning_rate": 9.925962638477113e-05, "loss": 1.7599, "step": 528 }, { "epoch": 0.054949620858003534, "grad_norm": 0.33183959126472473, "learning_rate": 9.925682625813446e-05, "loss": 1.7878, "step": 529 }, { "epoch": 0.05505349537758388, "grad_norm": 0.33561477065086365, "learning_rate": 9.925402088603e-05, "loss": 1.7764, "step": 530 }, { "epoch": 0.05515736989716422, "grad_norm": 0.33029311895370483, "learning_rate": 9.925121026875654e-05, "loss": 1.7976, "step": 531 }, { "epoch": 0.05526124441674457, "grad_norm": 0.3405066430568695, "learning_rate": 9.924839440661334e-05, "loss": 1.8439, "step": 532 }, { "epoch": 0.05536511893632492, "grad_norm": 0.3600308299064636, "learning_rate": 9.92455732999003e-05, "loss": 1.9871, "step": 533 }, { "epoch": 0.055468993455905266, "grad_norm": 0.4009020924568176, "learning_rate": 9.924274694891782e-05, "loss": 1.7941, "step": 534 }, { "epoch": 0.055572867975485614, "grad_norm": 0.3483946919441223, "learning_rate": 9.92399153539669e-05, "loss": 1.7015, "step": 535 }, { "epoch": 0.05567674249506596, "grad_norm": 0.37152403593063354, "learning_rate": 9.923707851534909e-05, "loss": 1.791, "step": 536 }, { "epoch": 0.05578061701464631, "grad_norm": 0.3480570912361145, "learning_rate": 9.923423643336648e-05, "loss": 1.7576, "step": 537 }, { "epoch": 0.05588449153422666, "grad_norm": 0.38511136174201965, "learning_rate": 9.923138910832172e-05, "loss": 1.963, "step": 538 }, { "epoch": 0.055988366053807, "grad_norm": 0.31676754355430603, "learning_rate": 9.922853654051806e-05, "loss": 1.6796, "step": 539 }, { "epoch": 0.056092240573387346, "grad_norm": 0.325538694858551, "learning_rate": 9.922567873025924e-05, "loss": 1.7261, "step": 540 }, { "epoch": 0.056196115092967694, "grad_norm": 0.3761240839958191, "learning_rate": 9.922281567784961e-05, "loss": 1.825, "step": 541 }, { "epoch": 0.05629998961254804, "grad_norm": 0.33543258905410767, "learning_rate": 9.921994738359409e-05, "loss": 1.791, "step": 542 }, { "epoch": 0.05640386413212839, "grad_norm": 0.34951722621917725, "learning_rate": 9.921707384779807e-05, "loss": 1.8141, "step": 543 }, { "epoch": 0.05650773865170874, "grad_norm": 0.34788161516189575, "learning_rate": 9.92141950707676e-05, "loss": 1.7599, "step": 544 }, { "epoch": 0.056611613171289085, "grad_norm": 0.32082879543304443, "learning_rate": 9.921131105280925e-05, "loss": 1.5647, "step": 545 }, { "epoch": 0.05671548769086943, "grad_norm": 0.37312445044517517, "learning_rate": 9.920842179423013e-05, "loss": 1.9926, "step": 546 }, { "epoch": 0.056819362210449774, "grad_norm": 0.32151979207992554, "learning_rate": 9.920552729533792e-05, "loss": 1.6564, "step": 547 }, { "epoch": 0.05692323673003012, "grad_norm": 0.3291560709476471, "learning_rate": 9.92026275564409e-05, "loss": 1.8871, "step": 548 }, { "epoch": 0.05702711124961047, "grad_norm": 0.370978444814682, "learning_rate": 9.919972257784783e-05, "loss": 2.0246, "step": 549 }, { "epoch": 0.05713098576919082, "grad_norm": 0.3654458820819855, "learning_rate": 9.919681235986806e-05, "loss": 1.8523, "step": 550 }, { "epoch": 0.057234860288771165, "grad_norm": 0.3720274567604065, "learning_rate": 9.919389690281154e-05, "loss": 1.7427, "step": 551 }, { "epoch": 0.05733873480835151, "grad_norm": 0.3372805714607239, "learning_rate": 9.919097620698872e-05, "loss": 1.8491, "step": 552 }, { "epoch": 0.05744260932793186, "grad_norm": 0.3331336975097656, "learning_rate": 9.918805027271064e-05, "loss": 1.7309, "step": 553 }, { "epoch": 0.05754648384751221, "grad_norm": 0.3529910743236542, "learning_rate": 9.918511910028888e-05, "loss": 1.8213, "step": 554 }, { "epoch": 0.05765035836709255, "grad_norm": 0.3342648446559906, "learning_rate": 9.918218269003561e-05, "loss": 1.6017, "step": 555 }, { "epoch": 0.0577542328866729, "grad_norm": 0.41469427943229675, "learning_rate": 9.917924104226351e-05, "loss": 1.7434, "step": 556 }, { "epoch": 0.057858107406253245, "grad_norm": 0.3674336373806, "learning_rate": 9.917629415728584e-05, "loss": 1.8997, "step": 557 }, { "epoch": 0.05796198192583359, "grad_norm": 0.3625181317329407, "learning_rate": 9.917334203541645e-05, "loss": 1.8736, "step": 558 }, { "epoch": 0.05806585644541394, "grad_norm": 0.35162436962127686, "learning_rate": 9.917038467696968e-05, "loss": 1.8951, "step": 559 }, { "epoch": 0.05816973096499429, "grad_norm": 0.35170653462409973, "learning_rate": 9.916742208226051e-05, "loss": 1.9565, "step": 560 }, { "epoch": 0.058273605484574637, "grad_norm": 0.3626965284347534, "learning_rate": 9.91644542516044e-05, "loss": 1.9673, "step": 561 }, { "epoch": 0.05837748000415498, "grad_norm": 0.36217907071113586, "learning_rate": 9.91614811853174e-05, "loss": 2.0416, "step": 562 }, { "epoch": 0.058481354523735325, "grad_norm": 0.3251532018184662, "learning_rate": 9.915850288371616e-05, "loss": 1.8444, "step": 563 }, { "epoch": 0.05858522904331567, "grad_norm": 0.3693266212940216, "learning_rate": 9.915551934711777e-05, "loss": 1.7398, "step": 564 }, { "epoch": 0.05868910356289602, "grad_norm": 0.34907597303390503, "learning_rate": 9.915253057584003e-05, "loss": 1.5016, "step": 565 }, { "epoch": 0.05879297808247637, "grad_norm": 0.3413955867290497, "learning_rate": 9.914953657020118e-05, "loss": 1.7225, "step": 566 }, { "epoch": 0.058896852602056717, "grad_norm": 0.34406933188438416, "learning_rate": 9.914653733052006e-05, "loss": 1.7444, "step": 567 }, { "epoch": 0.059000727121637064, "grad_norm": 0.36242082715034485, "learning_rate": 9.914353285711607e-05, "loss": 1.8678, "step": 568 }, { "epoch": 0.05910460164121741, "grad_norm": 0.359131395816803, "learning_rate": 9.914052315030917e-05, "loss": 1.9416, "step": 569 }, { "epoch": 0.05920847616079775, "grad_norm": 0.35352620482444763, "learning_rate": 9.913750821041987e-05, "loss": 1.8087, "step": 570 }, { "epoch": 0.0593123506803781, "grad_norm": 0.3557063043117523, "learning_rate": 9.913448803776922e-05, "loss": 1.9329, "step": 571 }, { "epoch": 0.05941622519995845, "grad_norm": 0.3462260067462921, "learning_rate": 9.913146263267887e-05, "loss": 1.882, "step": 572 }, { "epoch": 0.059520099719538797, "grad_norm": 0.32889804244041443, "learning_rate": 9.912843199547099e-05, "loss": 1.6253, "step": 573 }, { "epoch": 0.059623974239119144, "grad_norm": 0.3543769419193268, "learning_rate": 9.912539612646832e-05, "loss": 1.7809, "step": 574 }, { "epoch": 0.05972784875869949, "grad_norm": 0.38579022884368896, "learning_rate": 9.912235502599414e-05, "loss": 2.0883, "step": 575 }, { "epoch": 0.05983172327827984, "grad_norm": 0.3288674056529999, "learning_rate": 9.911930869437233e-05, "loss": 1.7313, "step": 576 }, { "epoch": 0.05993559779786019, "grad_norm": 0.35373395681381226, "learning_rate": 9.911625713192729e-05, "loss": 1.906, "step": 577 }, { "epoch": 0.06003947231744053, "grad_norm": 0.33762502670288086, "learning_rate": 9.911320033898398e-05, "loss": 1.8296, "step": 578 }, { "epoch": 0.060143346837020877, "grad_norm": 0.3767317831516266, "learning_rate": 9.911013831586795e-05, "loss": 1.8199, "step": 579 }, { "epoch": 0.060247221356601224, "grad_norm": 0.38876909017562866, "learning_rate": 9.910707106290525e-05, "loss": 1.6965, "step": 580 }, { "epoch": 0.06035109587618157, "grad_norm": 0.34621986746788025, "learning_rate": 9.910399858042252e-05, "loss": 1.7974, "step": 581 }, { "epoch": 0.06045497039576192, "grad_norm": 0.3526759743690491, "learning_rate": 9.910092086874699e-05, "loss": 1.7587, "step": 582 }, { "epoch": 0.06055884491534227, "grad_norm": 0.3554968237876892, "learning_rate": 9.909783792820637e-05, "loss": 1.7185, "step": 583 }, { "epoch": 0.060662719434922616, "grad_norm": 0.34225958585739136, "learning_rate": 9.909474975912899e-05, "loss": 1.9277, "step": 584 }, { "epoch": 0.060766593954502963, "grad_norm": 0.35381531715393066, "learning_rate": 9.909165636184372e-05, "loss": 1.8624, "step": 585 }, { "epoch": 0.060870468474083304, "grad_norm": 0.3494366407394409, "learning_rate": 9.908855773667997e-05, "loss": 1.8565, "step": 586 }, { "epoch": 0.06097434299366365, "grad_norm": 0.358967661857605, "learning_rate": 9.908545388396775e-05, "loss": 1.8135, "step": 587 }, { "epoch": 0.061078217513244, "grad_norm": 0.32239392399787903, "learning_rate": 9.908234480403755e-05, "loss": 1.7439, "step": 588 }, { "epoch": 0.06118209203282435, "grad_norm": 0.33216288685798645, "learning_rate": 9.90792304972205e-05, "loss": 1.8637, "step": 589 }, { "epoch": 0.061285966552404696, "grad_norm": 0.3340390622615814, "learning_rate": 9.907611096384823e-05, "loss": 1.6959, "step": 590 }, { "epoch": 0.061389841071985043, "grad_norm": 0.3495895564556122, "learning_rate": 9.907298620425294e-05, "loss": 1.7969, "step": 591 }, { "epoch": 0.06149371559156539, "grad_norm": 0.365520715713501, "learning_rate": 9.90698562187674e-05, "loss": 1.8457, "step": 592 }, { "epoch": 0.06159759011114574, "grad_norm": 0.33886629343032837, "learning_rate": 9.906672100772496e-05, "loss": 1.6876, "step": 593 }, { "epoch": 0.06170146463072608, "grad_norm": 0.3432716131210327, "learning_rate": 9.906358057145945e-05, "loss": 1.7644, "step": 594 }, { "epoch": 0.06180533915030643, "grad_norm": 0.35947611927986145, "learning_rate": 9.906043491030532e-05, "loss": 1.9074, "step": 595 }, { "epoch": 0.061909213669886776, "grad_norm": 0.3474954664707184, "learning_rate": 9.905728402459755e-05, "loss": 1.834, "step": 596 }, { "epoch": 0.062013088189467123, "grad_norm": 0.32925835251808167, "learning_rate": 9.905412791467171e-05, "loss": 1.7292, "step": 597 }, { "epoch": 0.06211696270904747, "grad_norm": 0.37957310676574707, "learning_rate": 9.905096658086387e-05, "loss": 2.0142, "step": 598 }, { "epoch": 0.06222083722862782, "grad_norm": 0.3465014398097992, "learning_rate": 9.904780002351071e-05, "loss": 1.852, "step": 599 }, { "epoch": 0.06232471174820817, "grad_norm": 0.364199697971344, "learning_rate": 9.904462824294944e-05, "loss": 1.7035, "step": 600 }, { "epoch": 0.062428586267788515, "grad_norm": 0.33271247148513794, "learning_rate": 9.904145123951781e-05, "loss": 1.7046, "step": 601 }, { "epoch": 0.06253246078736886, "grad_norm": 0.38924211263656616, "learning_rate": 9.903826901355417e-05, "loss": 1.8107, "step": 602 }, { "epoch": 0.0626363353069492, "grad_norm": 0.39860212802886963, "learning_rate": 9.90350815653974e-05, "loss": 1.8613, "step": 603 }, { "epoch": 0.06274020982652956, "grad_norm": 0.335452139377594, "learning_rate": 9.903188889538692e-05, "loss": 1.6605, "step": 604 }, { "epoch": 0.0628440843461099, "grad_norm": 0.3518449664115906, "learning_rate": 9.902869100386273e-05, "loss": 1.8248, "step": 605 }, { "epoch": 0.06294795886569024, "grad_norm": 0.36134716868400574, "learning_rate": 9.90254878911654e-05, "loss": 1.8456, "step": 606 }, { "epoch": 0.0630518333852706, "grad_norm": 0.3638603687286377, "learning_rate": 9.902227955763602e-05, "loss": 1.6454, "step": 607 }, { "epoch": 0.06315570790485094, "grad_norm": 0.3790292739868164, "learning_rate": 9.901906600361625e-05, "loss": 1.8036, "step": 608 }, { "epoch": 0.06325958242443129, "grad_norm": 0.3539649546146393, "learning_rate": 9.901584722944833e-05, "loss": 1.8153, "step": 609 }, { "epoch": 0.06336345694401163, "grad_norm": 0.33538633584976196, "learning_rate": 9.9012623235475e-05, "loss": 1.7256, "step": 610 }, { "epoch": 0.06346733146359199, "grad_norm": 0.3604874610900879, "learning_rate": 9.900939402203962e-05, "loss": 2.0305, "step": 611 }, { "epoch": 0.06357120598317233, "grad_norm": 0.34128594398498535, "learning_rate": 9.900615958948606e-05, "loss": 1.7801, "step": 612 }, { "epoch": 0.06367508050275267, "grad_norm": 0.3321555554866791, "learning_rate": 9.900291993815877e-05, "loss": 1.6963, "step": 613 }, { "epoch": 0.06377895502233302, "grad_norm": 0.32713523507118225, "learning_rate": 9.899967506840273e-05, "loss": 1.8211, "step": 614 }, { "epoch": 0.06388282954191336, "grad_norm": 0.34873101115226746, "learning_rate": 9.899642498056353e-05, "loss": 1.8328, "step": 615 }, { "epoch": 0.06398670406149372, "grad_norm": 0.36176908016204834, "learning_rate": 9.899316967498724e-05, "loss": 1.9433, "step": 616 }, { "epoch": 0.06409057858107406, "grad_norm": 0.35845959186553955, "learning_rate": 9.898990915202055e-05, "loss": 1.8383, "step": 617 }, { "epoch": 0.06419445310065441, "grad_norm": 0.3352903425693512, "learning_rate": 9.898664341201067e-05, "loss": 1.8279, "step": 618 }, { "epoch": 0.06429832762023475, "grad_norm": 0.3244091272354126, "learning_rate": 9.898337245530536e-05, "loss": 1.6484, "step": 619 }, { "epoch": 0.06440220213981511, "grad_norm": 0.39862748980522156, "learning_rate": 9.8980096282253e-05, "loss": 1.9679, "step": 620 }, { "epoch": 0.06450607665939545, "grad_norm": 0.34560245275497437, "learning_rate": 9.897681489320242e-05, "loss": 1.8297, "step": 621 }, { "epoch": 0.06460995117897579, "grad_norm": 0.37943777441978455, "learning_rate": 9.897352828850309e-05, "loss": 1.9845, "step": 622 }, { "epoch": 0.06471382569855615, "grad_norm": 0.3497175872325897, "learning_rate": 9.897023646850502e-05, "loss": 1.7576, "step": 623 }, { "epoch": 0.06481770021813649, "grad_norm": 0.3344300091266632, "learning_rate": 9.896693943355874e-05, "loss": 1.7502, "step": 624 }, { "epoch": 0.06492157473771684, "grad_norm": 0.3299304246902466, "learning_rate": 9.896363718401536e-05, "loss": 1.5323, "step": 625 }, { "epoch": 0.06502544925729718, "grad_norm": 0.3126195967197418, "learning_rate": 9.896032972022656e-05, "loss": 1.5328, "step": 626 }, { "epoch": 0.06512932377687754, "grad_norm": 0.33860689401626587, "learning_rate": 9.895701704254455e-05, "loss": 1.7393, "step": 627 }, { "epoch": 0.06523319829645788, "grad_norm": 0.332501083612442, "learning_rate": 9.89536991513221e-05, "loss": 1.6363, "step": 628 }, { "epoch": 0.06533707281603822, "grad_norm": 0.32221221923828125, "learning_rate": 9.895037604691256e-05, "loss": 1.6374, "step": 629 }, { "epoch": 0.06544094733561857, "grad_norm": 0.381583034992218, "learning_rate": 9.894704772966977e-05, "loss": 1.773, "step": 630 }, { "epoch": 0.06554482185519891, "grad_norm": 0.36580127477645874, "learning_rate": 9.894371419994821e-05, "loss": 1.7417, "step": 631 }, { "epoch": 0.06564869637477927, "grad_norm": 0.3486615717411041, "learning_rate": 9.894037545810288e-05, "loss": 1.8542, "step": 632 }, { "epoch": 0.06575257089435961, "grad_norm": 0.3709062337875366, "learning_rate": 9.89370315044893e-05, "loss": 1.917, "step": 633 }, { "epoch": 0.06585644541393997, "grad_norm": 0.37561196088790894, "learning_rate": 9.893368233946359e-05, "loss": 1.9253, "step": 634 }, { "epoch": 0.0659603199335203, "grad_norm": 0.32539495825767517, "learning_rate": 9.89303279633824e-05, "loss": 1.6948, "step": 635 }, { "epoch": 0.06606419445310066, "grad_norm": 0.32918450236320496, "learning_rate": 9.892696837660295e-05, "loss": 1.8504, "step": 636 }, { "epoch": 0.066168068972681, "grad_norm": 0.32979822158813477, "learning_rate": 9.892360357948303e-05, "loss": 1.7728, "step": 637 }, { "epoch": 0.06627194349226134, "grad_norm": 0.39485815167427063, "learning_rate": 9.892023357238094e-05, "loss": 1.8216, "step": 638 }, { "epoch": 0.0663758180118417, "grad_norm": 0.3529796302318573, "learning_rate": 9.891685835565557e-05, "loss": 1.725, "step": 639 }, { "epoch": 0.06647969253142204, "grad_norm": 0.34411367774009705, "learning_rate": 9.891347792966635e-05, "loss": 1.7718, "step": 640 }, { "epoch": 0.06658356705100239, "grad_norm": 0.38490092754364014, "learning_rate": 9.891009229477326e-05, "loss": 1.9705, "step": 641 }, { "epoch": 0.06668744157058273, "grad_norm": 0.3569811284542084, "learning_rate": 9.890670145133687e-05, "loss": 2.0534, "step": 642 }, { "epoch": 0.06679131609016309, "grad_norm": 0.3439379334449768, "learning_rate": 9.890330539971825e-05, "loss": 1.9179, "step": 643 }, { "epoch": 0.06689519060974343, "grad_norm": 0.3299177587032318, "learning_rate": 9.889990414027907e-05, "loss": 1.8077, "step": 644 }, { "epoch": 0.06699906512932377, "grad_norm": 0.3420833647251129, "learning_rate": 9.889649767338153e-05, "loss": 1.6712, "step": 645 }, { "epoch": 0.06710293964890413, "grad_norm": 0.3195805549621582, "learning_rate": 9.88930859993884e-05, "loss": 1.629, "step": 646 }, { "epoch": 0.06720681416848447, "grad_norm": 0.3861030638217926, "learning_rate": 9.888966911866299e-05, "loss": 2.02, "step": 647 }, { "epoch": 0.06731068868806482, "grad_norm": 0.3432799279689789, "learning_rate": 9.888624703156918e-05, "loss": 1.6907, "step": 648 }, { "epoch": 0.06741456320764516, "grad_norm": 0.33698904514312744, "learning_rate": 9.888281973847138e-05, "loss": 1.6967, "step": 649 }, { "epoch": 0.06751843772722552, "grad_norm": 0.3321373462677002, "learning_rate": 9.887938723973458e-05, "loss": 1.7385, "step": 650 }, { "epoch": 0.06762231224680586, "grad_norm": 0.3467056155204773, "learning_rate": 9.887594953572432e-05, "loss": 1.6206, "step": 651 }, { "epoch": 0.06772618676638621, "grad_norm": 0.3018084764480591, "learning_rate": 9.887250662680667e-05, "loss": 1.6185, "step": 652 }, { "epoch": 0.06783006128596655, "grad_norm": 0.37331244349479675, "learning_rate": 9.886905851334829e-05, "loss": 1.8004, "step": 653 }, { "epoch": 0.0679339358055469, "grad_norm": 0.36824825406074524, "learning_rate": 9.886560519571637e-05, "loss": 1.7545, "step": 654 }, { "epoch": 0.06803781032512725, "grad_norm": 0.33622682094573975, "learning_rate": 9.886214667427867e-05, "loss": 1.7991, "step": 655 }, { "epoch": 0.06814168484470759, "grad_norm": 0.3627051115036011, "learning_rate": 9.885868294940349e-05, "loss": 1.7288, "step": 656 }, { "epoch": 0.06824555936428794, "grad_norm": 0.3434629440307617, "learning_rate": 9.885521402145967e-05, "loss": 1.7928, "step": 657 }, { "epoch": 0.06834943388386829, "grad_norm": 0.3145413100719452, "learning_rate": 9.885173989081664e-05, "loss": 1.6569, "step": 658 }, { "epoch": 0.06845330840344864, "grad_norm": 0.3603506088256836, "learning_rate": 9.88482605578444e-05, "loss": 1.9213, "step": 659 }, { "epoch": 0.06855718292302898, "grad_norm": 0.33825626969337463, "learning_rate": 9.884477602291343e-05, "loss": 1.6129, "step": 660 }, { "epoch": 0.06866105744260932, "grad_norm": 0.35486510396003723, "learning_rate": 9.88412862863948e-05, "loss": 1.8986, "step": 661 }, { "epoch": 0.06876493196218968, "grad_norm": 0.3892877399921417, "learning_rate": 9.883779134866016e-05, "loss": 2.0618, "step": 662 }, { "epoch": 0.06886880648177002, "grad_norm": 0.3327191472053528, "learning_rate": 9.883429121008169e-05, "loss": 1.6847, "step": 663 }, { "epoch": 0.06897268100135037, "grad_norm": 0.32258662581443787, "learning_rate": 9.883078587103215e-05, "loss": 1.7325, "step": 664 }, { "epoch": 0.06907655552093071, "grad_norm": 0.3689654767513275, "learning_rate": 9.882727533188481e-05, "loss": 2.0511, "step": 665 }, { "epoch": 0.06918043004051107, "grad_norm": 0.3166467547416687, "learning_rate": 9.882375959301349e-05, "loss": 1.7457, "step": 666 }, { "epoch": 0.06928430456009141, "grad_norm": 0.3648569583892822, "learning_rate": 9.882023865479263e-05, "loss": 1.9451, "step": 667 }, { "epoch": 0.06938817907967175, "grad_norm": 0.35240545868873596, "learning_rate": 9.881671251759717e-05, "loss": 1.9018, "step": 668 }, { "epoch": 0.0694920535992521, "grad_norm": 0.3445391356945038, "learning_rate": 9.881318118180258e-05, "loss": 1.9383, "step": 669 }, { "epoch": 0.06959592811883245, "grad_norm": 0.33014625310897827, "learning_rate": 9.880964464778499e-05, "loss": 1.6511, "step": 670 }, { "epoch": 0.0696998026384128, "grad_norm": 0.3588254451751709, "learning_rate": 9.880610291592096e-05, "loss": 1.7165, "step": 671 }, { "epoch": 0.06980367715799314, "grad_norm": 0.3672218322753906, "learning_rate": 9.880255598658767e-05, "loss": 1.6612, "step": 672 }, { "epoch": 0.0699075516775735, "grad_norm": 0.3593609035015106, "learning_rate": 9.879900386016284e-05, "loss": 1.8946, "step": 673 }, { "epoch": 0.07001142619715384, "grad_norm": 0.45219045877456665, "learning_rate": 9.879544653702476e-05, "loss": 1.8398, "step": 674 }, { "epoch": 0.07011530071673419, "grad_norm": 0.3960374891757965, "learning_rate": 9.879188401755222e-05, "loss": 1.928, "step": 675 }, { "epoch": 0.07021917523631453, "grad_norm": 0.38661623001098633, "learning_rate": 9.878831630212465e-05, "loss": 1.9605, "step": 676 }, { "epoch": 0.07032304975589487, "grad_norm": 0.34862831234931946, "learning_rate": 9.878474339112193e-05, "loss": 1.7933, "step": 677 }, { "epoch": 0.07042692427547523, "grad_norm": 0.3596080243587494, "learning_rate": 9.878116528492458e-05, "loss": 1.7487, "step": 678 }, { "epoch": 0.07053079879505557, "grad_norm": 0.3401545584201813, "learning_rate": 9.877758198391365e-05, "loss": 1.8613, "step": 679 }, { "epoch": 0.07063467331463592, "grad_norm": 0.3352885842323303, "learning_rate": 9.87739934884707e-05, "loss": 1.6381, "step": 680 }, { "epoch": 0.07073854783421626, "grad_norm": 0.36678406596183777, "learning_rate": 9.87703997989779e-05, "loss": 1.944, "step": 681 }, { "epoch": 0.07084242235379662, "grad_norm": 0.3366321623325348, "learning_rate": 9.876680091581796e-05, "loss": 1.7308, "step": 682 }, { "epoch": 0.07094629687337696, "grad_norm": 0.37314730882644653, "learning_rate": 9.876319683937412e-05, "loss": 1.8055, "step": 683 }, { "epoch": 0.0710501713929573, "grad_norm": 0.3612748384475708, "learning_rate": 9.875958757003017e-05, "loss": 1.7043, "step": 684 }, { "epoch": 0.07115404591253766, "grad_norm": 0.37414270639419556, "learning_rate": 9.875597310817049e-05, "loss": 1.8259, "step": 685 }, { "epoch": 0.071257920432118, "grad_norm": 0.3539772033691406, "learning_rate": 9.875235345417997e-05, "loss": 1.7541, "step": 686 }, { "epoch": 0.07136179495169835, "grad_norm": 0.35429245233535767, "learning_rate": 9.874872860844411e-05, "loss": 1.5966, "step": 687 }, { "epoch": 0.07146566947127869, "grad_norm": 0.32633036375045776, "learning_rate": 9.87450985713489e-05, "loss": 1.7182, "step": 688 }, { "epoch": 0.07156954399085905, "grad_norm": 0.316303014755249, "learning_rate": 9.874146334328092e-05, "loss": 1.553, "step": 689 }, { "epoch": 0.07167341851043939, "grad_norm": 0.39488279819488525, "learning_rate": 9.873782292462727e-05, "loss": 1.9359, "step": 690 }, { "epoch": 0.07177729303001974, "grad_norm": 0.3820980489253998, "learning_rate": 9.873417731577568e-05, "loss": 1.9939, "step": 691 }, { "epoch": 0.07188116754960008, "grad_norm": 0.4182794392108917, "learning_rate": 9.873052651711434e-05, "loss": 1.8192, "step": 692 }, { "epoch": 0.07198504206918042, "grad_norm": 0.3251241147518158, "learning_rate": 9.872687052903203e-05, "loss": 1.6569, "step": 693 }, { "epoch": 0.07208891658876078, "grad_norm": 0.33113572001457214, "learning_rate": 9.87232093519181e-05, "loss": 1.743, "step": 694 }, { "epoch": 0.07219279110834112, "grad_norm": 0.3508453369140625, "learning_rate": 9.871954298616242e-05, "loss": 1.7822, "step": 695 }, { "epoch": 0.07229666562792147, "grad_norm": 0.36456069350242615, "learning_rate": 9.871587143215545e-05, "loss": 1.7646, "step": 696 }, { "epoch": 0.07240054014750182, "grad_norm": 0.3450940251350403, "learning_rate": 9.871219469028816e-05, "loss": 1.798, "step": 697 }, { "epoch": 0.07250441466708217, "grad_norm": 0.3370673358440399, "learning_rate": 9.870851276095212e-05, "loss": 1.757, "step": 698 }, { "epoch": 0.07260828918666251, "grad_norm": 0.34746289253234863, "learning_rate": 9.87048256445394e-05, "loss": 1.6328, "step": 699 }, { "epoch": 0.07271216370624285, "grad_norm": 0.3589167296886444, "learning_rate": 9.870113334144267e-05, "loss": 1.7512, "step": 700 }, { "epoch": 0.07281603822582321, "grad_norm": 0.4009071886539459, "learning_rate": 9.869743585205511e-05, "loss": 1.8032, "step": 701 }, { "epoch": 0.07291991274540355, "grad_norm": 0.38780277967453003, "learning_rate": 9.869373317677049e-05, "loss": 1.8526, "step": 702 }, { "epoch": 0.0730237872649839, "grad_norm": 0.37040579319000244, "learning_rate": 9.869002531598312e-05, "loss": 1.8994, "step": 703 }, { "epoch": 0.07312766178456424, "grad_norm": 0.3585062325000763, "learning_rate": 9.868631227008785e-05, "loss": 1.853, "step": 704 }, { "epoch": 0.0732315363041446, "grad_norm": 0.3617033362388611, "learning_rate": 9.868259403948008e-05, "loss": 1.9527, "step": 705 }, { "epoch": 0.07333541082372494, "grad_norm": 0.35170087218284607, "learning_rate": 9.867887062455579e-05, "loss": 1.7766, "step": 706 }, { "epoch": 0.0734392853433053, "grad_norm": 0.34852275252342224, "learning_rate": 9.867514202571149e-05, "loss": 1.5639, "step": 707 }, { "epoch": 0.07354315986288563, "grad_norm": 0.3750884532928467, "learning_rate": 9.867140824334424e-05, "loss": 1.9035, "step": 708 }, { "epoch": 0.07364703438246598, "grad_norm": 0.37743544578552246, "learning_rate": 9.866766927785167e-05, "loss": 1.8442, "step": 709 }, { "epoch": 0.07375090890204633, "grad_norm": 0.3599265217781067, "learning_rate": 9.866392512963195e-05, "loss": 1.6519, "step": 710 }, { "epoch": 0.07385478342162667, "grad_norm": 0.35439547896385193, "learning_rate": 9.86601757990838e-05, "loss": 1.8225, "step": 711 }, { "epoch": 0.07395865794120703, "grad_norm": 0.35483318567276, "learning_rate": 9.865642128660647e-05, "loss": 1.778, "step": 712 }, { "epoch": 0.07406253246078737, "grad_norm": 0.39381521940231323, "learning_rate": 9.865266159259981e-05, "loss": 1.9915, "step": 713 }, { "epoch": 0.07416640698036772, "grad_norm": 0.4099736213684082, "learning_rate": 9.864889671746421e-05, "loss": 2.0117, "step": 714 }, { "epoch": 0.07427028149994806, "grad_norm": 0.3895813822746277, "learning_rate": 9.864512666160058e-05, "loss": 2.2284, "step": 715 }, { "epoch": 0.0743741560195284, "grad_norm": 0.3480944037437439, "learning_rate": 9.86413514254104e-05, "loss": 1.7284, "step": 716 }, { "epoch": 0.07447803053910876, "grad_norm": 0.3395494818687439, "learning_rate": 9.863757100929573e-05, "loss": 1.7128, "step": 717 }, { "epoch": 0.0745819050586891, "grad_norm": 0.4045158624649048, "learning_rate": 9.863378541365912e-05, "loss": 1.8116, "step": 718 }, { "epoch": 0.07468577957826945, "grad_norm": 0.34248921275138855, "learning_rate": 9.862999463890372e-05, "loss": 1.9717, "step": 719 }, { "epoch": 0.0747896540978498, "grad_norm": 0.33910906314849854, "learning_rate": 9.862619868543322e-05, "loss": 1.886, "step": 720 }, { "epoch": 0.07489352861743015, "grad_norm": 0.33199310302734375, "learning_rate": 9.862239755365186e-05, "loss": 1.7202, "step": 721 }, { "epoch": 0.07499740313701049, "grad_norm": 0.39172807335853577, "learning_rate": 9.861859124396444e-05, "loss": 1.9074, "step": 722 }, { "epoch": 0.07510127765659085, "grad_norm": 0.38816559314727783, "learning_rate": 9.86147797567763e-05, "loss": 1.963, "step": 723 }, { "epoch": 0.07520515217617119, "grad_norm": 0.39771151542663574, "learning_rate": 9.86109630924933e-05, "loss": 2.119, "step": 724 }, { "epoch": 0.07530902669575153, "grad_norm": 0.34102803468704224, "learning_rate": 9.860714125152191e-05, "loss": 1.8536, "step": 725 }, { "epoch": 0.07541290121533188, "grad_norm": 0.35469919443130493, "learning_rate": 9.860331423426914e-05, "loss": 1.7498, "step": 726 }, { "epoch": 0.07551677573491222, "grad_norm": 0.3550052046775818, "learning_rate": 9.859948204114253e-05, "loss": 1.9569, "step": 727 }, { "epoch": 0.07562065025449258, "grad_norm": 0.3915771245956421, "learning_rate": 9.859564467255015e-05, "loss": 2.0232, "step": 728 }, { "epoch": 0.07572452477407292, "grad_norm": 0.326684832572937, "learning_rate": 9.859180212890069e-05, "loss": 1.6597, "step": 729 }, { "epoch": 0.07582839929365327, "grad_norm": 0.33311307430267334, "learning_rate": 9.858795441060333e-05, "loss": 1.8534, "step": 730 }, { "epoch": 0.07593227381323361, "grad_norm": 0.3527379631996155, "learning_rate": 9.858410151806783e-05, "loss": 1.8047, "step": 731 }, { "epoch": 0.07603614833281395, "grad_norm": 0.3848365545272827, "learning_rate": 9.85802434517045e-05, "loss": 1.7222, "step": 732 }, { "epoch": 0.07614002285239431, "grad_norm": 0.3726034462451935, "learning_rate": 9.857638021192417e-05, "loss": 1.9158, "step": 733 }, { "epoch": 0.07624389737197465, "grad_norm": 0.4738229513168335, "learning_rate": 9.857251179913824e-05, "loss": 2.1136, "step": 734 }, { "epoch": 0.076347771891555, "grad_norm": 0.3687533438205719, "learning_rate": 9.856863821375872e-05, "loss": 1.802, "step": 735 }, { "epoch": 0.07645164641113535, "grad_norm": 0.35550445318222046, "learning_rate": 9.856475945619806e-05, "loss": 1.8454, "step": 736 }, { "epoch": 0.0765555209307157, "grad_norm": 0.3472285866737366, "learning_rate": 9.856087552686932e-05, "loss": 1.7785, "step": 737 }, { "epoch": 0.07665939545029604, "grad_norm": 0.3866637051105499, "learning_rate": 9.855698642618616e-05, "loss": 1.9353, "step": 738 }, { "epoch": 0.07676326996987638, "grad_norm": 0.3777254819869995, "learning_rate": 9.855309215456268e-05, "loss": 1.9781, "step": 739 }, { "epoch": 0.07686714448945674, "grad_norm": 0.413335919380188, "learning_rate": 9.854919271241363e-05, "loss": 1.917, "step": 740 }, { "epoch": 0.07697101900903708, "grad_norm": 0.3310924470424652, "learning_rate": 9.854528810015425e-05, "loss": 1.6168, "step": 741 }, { "epoch": 0.07707489352861743, "grad_norm": 0.34101495146751404, "learning_rate": 9.854137831820035e-05, "loss": 1.7935, "step": 742 }, { "epoch": 0.07717876804819777, "grad_norm": 0.34622523188591003, "learning_rate": 9.85374633669683e-05, "loss": 1.7695, "step": 743 }, { "epoch": 0.07728264256777813, "grad_norm": 0.39640340209007263, "learning_rate": 9.8533543246875e-05, "loss": 1.8163, "step": 744 }, { "epoch": 0.07738651708735847, "grad_norm": 0.35431715846061707, "learning_rate": 9.852961795833793e-05, "loss": 1.7706, "step": 745 }, { "epoch": 0.07749039160693882, "grad_norm": 0.37833094596862793, "learning_rate": 9.852568750177508e-05, "loss": 1.6099, "step": 746 }, { "epoch": 0.07759426612651917, "grad_norm": 0.3214152753353119, "learning_rate": 9.852175187760504e-05, "loss": 1.5333, "step": 747 }, { "epoch": 0.0776981406460995, "grad_norm": 0.3399621546268463, "learning_rate": 9.851781108624692e-05, "loss": 1.7676, "step": 748 }, { "epoch": 0.07780201516567986, "grad_norm": 0.3282534182071686, "learning_rate": 9.851386512812036e-05, "loss": 1.8172, "step": 749 }, { "epoch": 0.0779058896852602, "grad_norm": 0.37723109126091003, "learning_rate": 9.850991400364557e-05, "loss": 1.8644, "step": 750 }, { "epoch": 0.07800976420484056, "grad_norm": 0.36292698979377747, "learning_rate": 9.850595771324335e-05, "loss": 1.8879, "step": 751 }, { "epoch": 0.0781136387244209, "grad_norm": 0.3753519654273987, "learning_rate": 9.850199625733501e-05, "loss": 1.9628, "step": 752 }, { "epoch": 0.07821751324400125, "grad_norm": 0.33497151732444763, "learning_rate": 9.849802963634238e-05, "loss": 1.8383, "step": 753 }, { "epoch": 0.07832138776358159, "grad_norm": 0.3661734163761139, "learning_rate": 9.849405785068789e-05, "loss": 1.903, "step": 754 }, { "epoch": 0.07842526228316193, "grad_norm": 0.33177104592323303, "learning_rate": 9.84900809007945e-05, "loss": 1.7703, "step": 755 }, { "epoch": 0.07852913680274229, "grad_norm": 0.41701647639274597, "learning_rate": 9.848609878708575e-05, "loss": 2.017, "step": 756 }, { "epoch": 0.07863301132232263, "grad_norm": 0.3774707317352295, "learning_rate": 9.848211150998568e-05, "loss": 1.8528, "step": 757 }, { "epoch": 0.07873688584190298, "grad_norm": 0.3493809700012207, "learning_rate": 9.84781190699189e-05, "loss": 1.8787, "step": 758 }, { "epoch": 0.07884076036148333, "grad_norm": 0.4020559787750244, "learning_rate": 9.847412146731058e-05, "loss": 2.0583, "step": 759 }, { "epoch": 0.07894463488106368, "grad_norm": 0.37172991037368774, "learning_rate": 9.847011870258644e-05, "loss": 1.9294, "step": 760 }, { "epoch": 0.07904850940064402, "grad_norm": 0.3182731568813324, "learning_rate": 9.846611077617274e-05, "loss": 1.7817, "step": 761 }, { "epoch": 0.07915238392022438, "grad_norm": 0.4014347791671753, "learning_rate": 9.84620976884963e-05, "loss": 1.8936, "step": 762 }, { "epoch": 0.07925625843980472, "grad_norm": 0.317242294549942, "learning_rate": 9.845807943998448e-05, "loss": 1.6904, "step": 763 }, { "epoch": 0.07936013295938506, "grad_norm": 0.3695957064628601, "learning_rate": 9.845405603106518e-05, "loss": 1.9056, "step": 764 }, { "epoch": 0.07946400747896541, "grad_norm": 0.3249827027320862, "learning_rate": 9.845002746216687e-05, "loss": 1.8097, "step": 765 }, { "epoch": 0.07956788199854575, "grad_norm": 0.3424146771430969, "learning_rate": 9.844599373371855e-05, "loss": 1.6326, "step": 766 }, { "epoch": 0.07967175651812611, "grad_norm": 0.35552307963371277, "learning_rate": 9.84419548461498e-05, "loss": 1.9049, "step": 767 }, { "epoch": 0.07977563103770645, "grad_norm": 0.3346835970878601, "learning_rate": 9.843791079989071e-05, "loss": 1.7215, "step": 768 }, { "epoch": 0.0798795055572868, "grad_norm": 0.339160680770874, "learning_rate": 9.843386159537196e-05, "loss": 1.6931, "step": 769 }, { "epoch": 0.07998338007686714, "grad_norm": 0.3619834780693054, "learning_rate": 9.842980723302476e-05, "loss": 1.7751, "step": 770 }, { "epoch": 0.08008725459644749, "grad_norm": 0.37088942527770996, "learning_rate": 9.842574771328084e-05, "loss": 1.7259, "step": 771 }, { "epoch": 0.08019112911602784, "grad_norm": 0.3355197012424469, "learning_rate": 9.842168303657253e-05, "loss": 1.8242, "step": 772 }, { "epoch": 0.08029500363560818, "grad_norm": 0.37746527791023254, "learning_rate": 9.841761320333266e-05, "loss": 1.7493, "step": 773 }, { "epoch": 0.08039887815518854, "grad_norm": 0.3684033751487732, "learning_rate": 9.841353821399469e-05, "loss": 1.7955, "step": 774 }, { "epoch": 0.08050275267476888, "grad_norm": 0.3551725447177887, "learning_rate": 9.840945806899253e-05, "loss": 1.9765, "step": 775 }, { "epoch": 0.08060662719434923, "grad_norm": 0.32390061020851135, "learning_rate": 9.840537276876069e-05, "loss": 1.6581, "step": 776 }, { "epoch": 0.08071050171392957, "grad_norm": 0.3525629937648773, "learning_rate": 9.840128231373422e-05, "loss": 1.8436, "step": 777 }, { "epoch": 0.08081437623350993, "grad_norm": 0.3583666682243347, "learning_rate": 9.839718670434873e-05, "loss": 1.8948, "step": 778 }, { "epoch": 0.08091825075309027, "grad_norm": 0.3378392457962036, "learning_rate": 9.839308594104038e-05, "loss": 1.7955, "step": 779 }, { "epoch": 0.08102212527267061, "grad_norm": 0.3844630718231201, "learning_rate": 9.838898002424585e-05, "loss": 1.987, "step": 780 }, { "epoch": 0.08112599979225096, "grad_norm": 0.41022759675979614, "learning_rate": 9.83848689544024e-05, "loss": 1.959, "step": 781 }, { "epoch": 0.0812298743118313, "grad_norm": 0.3520353436470032, "learning_rate": 9.83807527319478e-05, "loss": 1.8185, "step": 782 }, { "epoch": 0.08133374883141166, "grad_norm": 0.36484450101852417, "learning_rate": 9.837663135732045e-05, "loss": 1.8079, "step": 783 }, { "epoch": 0.081437623350992, "grad_norm": 0.34820452332496643, "learning_rate": 9.83725048309592e-05, "loss": 1.7167, "step": 784 }, { "epoch": 0.08154149787057235, "grad_norm": 0.35253769159317017, "learning_rate": 9.83683731533035e-05, "loss": 1.8118, "step": 785 }, { "epoch": 0.0816453723901527, "grad_norm": 0.3355175256729126, "learning_rate": 9.836423632479335e-05, "loss": 1.7767, "step": 786 }, { "epoch": 0.08174924690973304, "grad_norm": 0.3633790612220764, "learning_rate": 9.836009434586929e-05, "loss": 1.9734, "step": 787 }, { "epoch": 0.08185312142931339, "grad_norm": 0.36171820759773254, "learning_rate": 9.835594721697241e-05, "loss": 1.8049, "step": 788 }, { "epoch": 0.08195699594889373, "grad_norm": 0.329885333776474, "learning_rate": 9.835179493854433e-05, "loss": 1.8358, "step": 789 }, { "epoch": 0.08206087046847409, "grad_norm": 0.33959150314331055, "learning_rate": 9.834763751102726e-05, "loss": 1.6816, "step": 790 }, { "epoch": 0.08216474498805443, "grad_norm": 0.3971327245235443, "learning_rate": 9.834347493486391e-05, "loss": 1.8448, "step": 791 }, { "epoch": 0.08226861950763478, "grad_norm": 0.3603891134262085, "learning_rate": 9.833930721049758e-05, "loss": 1.9219, "step": 792 }, { "epoch": 0.08237249402721512, "grad_norm": 0.3858663737773895, "learning_rate": 9.833513433837209e-05, "loss": 2.0203, "step": 793 }, { "epoch": 0.08247636854679546, "grad_norm": 0.36965909600257874, "learning_rate": 9.833095631893183e-05, "loss": 1.5018, "step": 794 }, { "epoch": 0.08258024306637582, "grad_norm": 0.3826972544193268, "learning_rate": 9.832677315262171e-05, "loss": 1.9229, "step": 795 }, { "epoch": 0.08268411758595616, "grad_norm": 0.3521627187728882, "learning_rate": 9.832258483988721e-05, "loss": 1.8676, "step": 796 }, { "epoch": 0.08278799210553651, "grad_norm": 0.38269051909446716, "learning_rate": 9.831839138117435e-05, "loss": 1.8478, "step": 797 }, { "epoch": 0.08289186662511686, "grad_norm": 0.33963099122047424, "learning_rate": 9.831419277692972e-05, "loss": 1.7087, "step": 798 }, { "epoch": 0.08299574114469721, "grad_norm": 0.3614671528339386, "learning_rate": 9.830998902760043e-05, "loss": 1.8661, "step": 799 }, { "epoch": 0.08309961566427755, "grad_norm": 0.3493926227092743, "learning_rate": 9.830578013363411e-05, "loss": 1.8653, "step": 800 }, { "epoch": 0.0832034901838579, "grad_norm": 0.3648037314414978, "learning_rate": 9.830156609547905e-05, "loss": 1.8252, "step": 801 }, { "epoch": 0.08330736470343825, "grad_norm": 0.35134628415107727, "learning_rate": 9.829734691358393e-05, "loss": 1.8055, "step": 802 }, { "epoch": 0.08341123922301859, "grad_norm": 0.3299328088760376, "learning_rate": 9.829312258839811e-05, "loss": 1.7022, "step": 803 }, { "epoch": 0.08351511374259894, "grad_norm": 0.39070454239845276, "learning_rate": 9.828889312037143e-05, "loss": 2.0613, "step": 804 }, { "epoch": 0.08361898826217928, "grad_norm": 0.3565296232700348, "learning_rate": 9.828465850995433e-05, "loss": 1.973, "step": 805 }, { "epoch": 0.08372286278175964, "grad_norm": 0.35673460364341736, "learning_rate": 9.82804187575977e-05, "loss": 1.8557, "step": 806 }, { "epoch": 0.08382673730133998, "grad_norm": 0.34928351640701294, "learning_rate": 9.827617386375307e-05, "loss": 1.9153, "step": 807 }, { "epoch": 0.08393061182092033, "grad_norm": 0.35845404863357544, "learning_rate": 9.827192382887251e-05, "loss": 1.8209, "step": 808 }, { "epoch": 0.08403448634050067, "grad_norm": 0.3234979510307312, "learning_rate": 9.82676686534086e-05, "loss": 1.6632, "step": 809 }, { "epoch": 0.08413836086008102, "grad_norm": 0.3533652722835541, "learning_rate": 9.826340833781447e-05, "loss": 1.7217, "step": 810 }, { "epoch": 0.08424223537966137, "grad_norm": 0.3984024226665497, "learning_rate": 9.825914288254383e-05, "loss": 1.9889, "step": 811 }, { "epoch": 0.08434610989924171, "grad_norm": 0.3283498287200928, "learning_rate": 9.82548722880509e-05, "loss": 1.7551, "step": 812 }, { "epoch": 0.08444998441882207, "grad_norm": 0.3530416190624237, "learning_rate": 9.825059655479047e-05, "loss": 1.5998, "step": 813 }, { "epoch": 0.08455385893840241, "grad_norm": 0.377096951007843, "learning_rate": 9.82463156832179e-05, "loss": 1.93, "step": 814 }, { "epoch": 0.08465773345798276, "grad_norm": 0.3375512361526489, "learning_rate": 9.824202967378903e-05, "loss": 1.7851, "step": 815 }, { "epoch": 0.0847616079775631, "grad_norm": 0.3516164720058441, "learning_rate": 9.82377385269603e-05, "loss": 1.7856, "step": 816 }, { "epoch": 0.08486548249714346, "grad_norm": 0.34516972303390503, "learning_rate": 9.823344224318869e-05, "loss": 1.7032, "step": 817 }, { "epoch": 0.0849693570167238, "grad_norm": 0.33883360028266907, "learning_rate": 9.822914082293171e-05, "loss": 1.8525, "step": 818 }, { "epoch": 0.08507323153630414, "grad_norm": 0.3980461657047272, "learning_rate": 9.822483426664744e-05, "loss": 1.8626, "step": 819 }, { "epoch": 0.0851771060558845, "grad_norm": 0.36254748702049255, "learning_rate": 9.822052257479448e-05, "loss": 1.7556, "step": 820 }, { "epoch": 0.08528098057546483, "grad_norm": 0.33397209644317627, "learning_rate": 9.821620574783202e-05, "loss": 1.713, "step": 821 }, { "epoch": 0.08538485509504519, "grad_norm": 0.3705761432647705, "learning_rate": 9.821188378621973e-05, "loss": 2.0036, "step": 822 }, { "epoch": 0.08548872961462553, "grad_norm": 0.40211713314056396, "learning_rate": 9.820755669041787e-05, "loss": 1.8456, "step": 823 }, { "epoch": 0.08559260413420589, "grad_norm": 0.3149651288986206, "learning_rate": 9.820322446088729e-05, "loss": 1.6277, "step": 824 }, { "epoch": 0.08569647865378623, "grad_norm": 0.36002975702285767, "learning_rate": 9.819888709808929e-05, "loss": 1.8758, "step": 825 }, { "epoch": 0.08580035317336657, "grad_norm": 0.40512141585350037, "learning_rate": 9.819454460248578e-05, "loss": 1.8693, "step": 826 }, { "epoch": 0.08590422769294692, "grad_norm": 0.33460405468940735, "learning_rate": 9.81901969745392e-05, "loss": 1.8883, "step": 827 }, { "epoch": 0.08600810221252726, "grad_norm": 0.3666040301322937, "learning_rate": 9.818584421471254e-05, "loss": 1.7379, "step": 828 }, { "epoch": 0.08611197673210762, "grad_norm": 0.3242300748825073, "learning_rate": 9.818148632346934e-05, "loss": 1.67, "step": 829 }, { "epoch": 0.08621585125168796, "grad_norm": 0.37964361906051636, "learning_rate": 9.817712330127367e-05, "loss": 1.9572, "step": 830 }, { "epoch": 0.08631972577126831, "grad_norm": 0.3320227265357971, "learning_rate": 9.817275514859017e-05, "loss": 1.7383, "step": 831 }, { "epoch": 0.08642360029084865, "grad_norm": 0.3565314710140228, "learning_rate": 9.8168381865884e-05, "loss": 1.9483, "step": 832 }, { "epoch": 0.08652747481042901, "grad_norm": 0.35897499322891235, "learning_rate": 9.81640034536209e-05, "loss": 1.7132, "step": 833 }, { "epoch": 0.08663134933000935, "grad_norm": 0.37124866247177124, "learning_rate": 9.815961991226711e-05, "loss": 2.0216, "step": 834 }, { "epoch": 0.08673522384958969, "grad_norm": 0.36309394240379333, "learning_rate": 9.815523124228949e-05, "loss": 1.7324, "step": 835 }, { "epoch": 0.08683909836917005, "grad_norm": 0.35258007049560547, "learning_rate": 9.815083744415534e-05, "loss": 1.7634, "step": 836 }, { "epoch": 0.08694297288875039, "grad_norm": 0.335984468460083, "learning_rate": 9.81464385183326e-05, "loss": 1.4646, "step": 837 }, { "epoch": 0.08704684740833074, "grad_norm": 0.31050655245780945, "learning_rate": 9.814203446528971e-05, "loss": 1.3127, "step": 838 }, { "epoch": 0.08715072192791108, "grad_norm": 0.3857136368751526, "learning_rate": 9.813762528549568e-05, "loss": 1.9647, "step": 839 }, { "epoch": 0.08725459644749144, "grad_norm": 0.35393643379211426, "learning_rate": 9.813321097942005e-05, "loss": 1.7981, "step": 840 }, { "epoch": 0.08735847096707178, "grad_norm": 0.3497065305709839, "learning_rate": 9.812879154753289e-05, "loss": 1.8632, "step": 841 }, { "epoch": 0.08746234548665212, "grad_norm": 0.34144020080566406, "learning_rate": 9.812436699030486e-05, "loss": 1.653, "step": 842 }, { "epoch": 0.08756622000623247, "grad_norm": 0.375434935092926, "learning_rate": 9.811993730820712e-05, "loss": 1.798, "step": 843 }, { "epoch": 0.08767009452581281, "grad_norm": 0.33210819959640503, "learning_rate": 9.811550250171142e-05, "loss": 1.7409, "step": 844 }, { "epoch": 0.08777396904539317, "grad_norm": 0.3796077072620392, "learning_rate": 9.811106257129001e-05, "loss": 1.8968, "step": 845 }, { "epoch": 0.08787784356497351, "grad_norm": 0.3507767915725708, "learning_rate": 9.810661751741573e-05, "loss": 1.7981, "step": 846 }, { "epoch": 0.08798171808455386, "grad_norm": 0.3411443829536438, "learning_rate": 9.810216734056192e-05, "loss": 1.5857, "step": 847 }, { "epoch": 0.0880855926041342, "grad_norm": 0.3522975444793701, "learning_rate": 9.809771204120248e-05, "loss": 1.7766, "step": 848 }, { "epoch": 0.08818946712371455, "grad_norm": 0.35262516140937805, "learning_rate": 9.809325161981191e-05, "loss": 1.6296, "step": 849 }, { "epoch": 0.0882933416432949, "grad_norm": 0.36089739203453064, "learning_rate": 9.808878607686518e-05, "loss": 1.8433, "step": 850 }, { "epoch": 0.08839721616287524, "grad_norm": 0.3271440863609314, "learning_rate": 9.808431541283783e-05, "loss": 1.8521, "step": 851 }, { "epoch": 0.0885010906824556, "grad_norm": 0.39304596185684204, "learning_rate": 9.807983962820597e-05, "loss": 2.0822, "step": 852 }, { "epoch": 0.08860496520203594, "grad_norm": 0.3730714023113251, "learning_rate": 9.807535872344622e-05, "loss": 1.8393, "step": 853 }, { "epoch": 0.08870883972161629, "grad_norm": 0.37659719586372375, "learning_rate": 9.807087269903577e-05, "loss": 1.921, "step": 854 }, { "epoch": 0.08881271424119663, "grad_norm": 0.3870750367641449, "learning_rate": 9.806638155545236e-05, "loss": 2.0497, "step": 855 }, { "epoch": 0.08891658876077699, "grad_norm": 0.3502962291240692, "learning_rate": 9.806188529317423e-05, "loss": 1.849, "step": 856 }, { "epoch": 0.08902046328035733, "grad_norm": 0.3573035001754761, "learning_rate": 9.805738391268021e-05, "loss": 1.6662, "step": 857 }, { "epoch": 0.08912433779993767, "grad_norm": 0.46759751439094543, "learning_rate": 9.805287741444968e-05, "loss": 1.5148, "step": 858 }, { "epoch": 0.08922821231951802, "grad_norm": 0.3513263463973999, "learning_rate": 9.804836579896253e-05, "loss": 1.9624, "step": 859 }, { "epoch": 0.08933208683909837, "grad_norm": 0.3362573981285095, "learning_rate": 9.80438490666992e-05, "loss": 1.7757, "step": 860 }, { "epoch": 0.08943596135867872, "grad_norm": 0.3206601142883301, "learning_rate": 9.803932721814071e-05, "loss": 1.692, "step": 861 }, { "epoch": 0.08953983587825906, "grad_norm": 0.3470083773136139, "learning_rate": 9.803480025376858e-05, "loss": 1.8445, "step": 862 }, { "epoch": 0.08964371039783942, "grad_norm": 0.3480392098426819, "learning_rate": 9.803026817406493e-05, "loss": 1.8338, "step": 863 }, { "epoch": 0.08974758491741976, "grad_norm": 0.3229011297225952, "learning_rate": 9.802573097951235e-05, "loss": 1.7572, "step": 864 }, { "epoch": 0.0898514594370001, "grad_norm": 0.3733616769313812, "learning_rate": 9.802118867059407e-05, "loss": 1.8358, "step": 865 }, { "epoch": 0.08995533395658045, "grad_norm": 0.35253623127937317, "learning_rate": 9.801664124779373e-05, "loss": 1.8347, "step": 866 }, { "epoch": 0.09005920847616079, "grad_norm": 0.3676695227622986, "learning_rate": 9.801208871159567e-05, "loss": 1.8158, "step": 867 }, { "epoch": 0.09016308299574115, "grad_norm": 0.3626701235771179, "learning_rate": 9.800753106248468e-05, "loss": 1.9947, "step": 868 }, { "epoch": 0.09026695751532149, "grad_norm": 0.3680635392665863, "learning_rate": 9.800296830094609e-05, "loss": 1.7856, "step": 869 }, { "epoch": 0.09037083203490184, "grad_norm": 0.34749433398246765, "learning_rate": 9.79984004274658e-05, "loss": 1.7665, "step": 870 }, { "epoch": 0.09047470655448218, "grad_norm": 0.3646162152290344, "learning_rate": 9.799382744253029e-05, "loss": 1.715, "step": 871 }, { "epoch": 0.09057858107406254, "grad_norm": 0.37328121066093445, "learning_rate": 9.79892493466265e-05, "loss": 1.7945, "step": 872 }, { "epoch": 0.09068245559364288, "grad_norm": 0.35278576612472534, "learning_rate": 9.7984666140242e-05, "loss": 1.7249, "step": 873 }, { "epoch": 0.09078633011322322, "grad_norm": 0.33178964257240295, "learning_rate": 9.798007782386485e-05, "loss": 1.6934, "step": 874 }, { "epoch": 0.09089020463280358, "grad_norm": 0.3599662780761719, "learning_rate": 9.797548439798368e-05, "loss": 1.9192, "step": 875 }, { "epoch": 0.09099407915238392, "grad_norm": 0.37787213921546936, "learning_rate": 9.797088586308762e-05, "loss": 1.9863, "step": 876 }, { "epoch": 0.09109795367196427, "grad_norm": 0.31273946166038513, "learning_rate": 9.796628221966642e-05, "loss": 1.6311, "step": 877 }, { "epoch": 0.09120182819154461, "grad_norm": 0.33196964859962463, "learning_rate": 9.796167346821032e-05, "loss": 1.6965, "step": 878 }, { "epoch": 0.09130570271112497, "grad_norm": 0.3330535590648651, "learning_rate": 9.79570596092101e-05, "loss": 1.745, "step": 879 }, { "epoch": 0.09140957723070531, "grad_norm": 0.3526756465435028, "learning_rate": 9.795244064315712e-05, "loss": 1.886, "step": 880 }, { "epoch": 0.09151345175028565, "grad_norm": 0.3859866261482239, "learning_rate": 9.794781657054324e-05, "loss": 1.8935, "step": 881 }, { "epoch": 0.091617326269866, "grad_norm": 0.3710609972476959, "learning_rate": 9.794318739186092e-05, "loss": 1.9279, "step": 882 }, { "epoch": 0.09172120078944634, "grad_norm": 0.37800589203834534, "learning_rate": 9.79385531076031e-05, "loss": 1.9028, "step": 883 }, { "epoch": 0.0918250753090267, "grad_norm": 0.36296647787094116, "learning_rate": 9.793391371826332e-05, "loss": 1.9369, "step": 884 }, { "epoch": 0.09192894982860704, "grad_norm": 0.3620465397834778, "learning_rate": 9.792926922433563e-05, "loss": 1.9072, "step": 885 }, { "epoch": 0.0920328243481874, "grad_norm": 0.35136178135871887, "learning_rate": 9.792461962631462e-05, "loss": 1.6753, "step": 886 }, { "epoch": 0.09213669886776774, "grad_norm": 0.33308306336402893, "learning_rate": 9.791996492469544e-05, "loss": 1.8044, "step": 887 }, { "epoch": 0.09224057338734809, "grad_norm": 0.36434537172317505, "learning_rate": 9.79153051199738e-05, "loss": 1.6905, "step": 888 }, { "epoch": 0.09234444790692843, "grad_norm": 0.3682979345321655, "learning_rate": 9.791064021264591e-05, "loss": 1.9002, "step": 889 }, { "epoch": 0.09244832242650877, "grad_norm": 0.39494451880455017, "learning_rate": 9.790597020320856e-05, "loss": 1.8316, "step": 890 }, { "epoch": 0.09255219694608913, "grad_norm": 0.3634776473045349, "learning_rate": 9.790129509215907e-05, "loss": 1.7874, "step": 891 }, { "epoch": 0.09265607146566947, "grad_norm": 0.49316370487213135, "learning_rate": 9.789661487999528e-05, "loss": 1.8666, "step": 892 }, { "epoch": 0.09275994598524982, "grad_norm": 0.4129713475704193, "learning_rate": 9.789192956721563e-05, "loss": 2.0314, "step": 893 }, { "epoch": 0.09286382050483016, "grad_norm": 0.33426743745803833, "learning_rate": 9.788723915431905e-05, "loss": 1.6938, "step": 894 }, { "epoch": 0.09296769502441052, "grad_norm": 0.3889116048812866, "learning_rate": 9.788254364180503e-05, "loss": 1.9863, "step": 895 }, { "epoch": 0.09307156954399086, "grad_norm": 0.37265413999557495, "learning_rate": 9.78778430301736e-05, "loss": 1.7899, "step": 896 }, { "epoch": 0.0931754440635712, "grad_norm": 0.4090532958507538, "learning_rate": 9.787313731992537e-05, "loss": 1.8823, "step": 897 }, { "epoch": 0.09327931858315155, "grad_norm": 0.3560778796672821, "learning_rate": 9.786842651156143e-05, "loss": 1.8849, "step": 898 }, { "epoch": 0.0933831931027319, "grad_norm": 0.43143633008003235, "learning_rate": 9.786371060558346e-05, "loss": 1.7419, "step": 899 }, { "epoch": 0.09348706762231225, "grad_norm": 0.4012468755245209, "learning_rate": 9.785898960249364e-05, "loss": 1.9191, "step": 900 }, { "epoch": 0.09359094214189259, "grad_norm": 0.38962993025779724, "learning_rate": 9.785426350279477e-05, "loss": 1.8023, "step": 901 }, { "epoch": 0.09369481666147295, "grad_norm": 0.38585248589515686, "learning_rate": 9.78495323069901e-05, "loss": 1.9383, "step": 902 }, { "epoch": 0.09379869118105329, "grad_norm": 0.347837895154953, "learning_rate": 9.784479601558348e-05, "loss": 1.8187, "step": 903 }, { "epoch": 0.09390256570063363, "grad_norm": 0.32775819301605225, "learning_rate": 9.78400546290793e-05, "loss": 1.5762, "step": 904 }, { "epoch": 0.09400644022021398, "grad_norm": 0.3671472668647766, "learning_rate": 9.783530814798245e-05, "loss": 1.8772, "step": 905 }, { "epoch": 0.09411031473979432, "grad_norm": 0.32809990644454956, "learning_rate": 9.783055657279843e-05, "loss": 1.7432, "step": 906 }, { "epoch": 0.09421418925937468, "grad_norm": 0.33797743916511536, "learning_rate": 9.782579990403321e-05, "loss": 1.7051, "step": 907 }, { "epoch": 0.09431806377895502, "grad_norm": 0.3659290373325348, "learning_rate": 9.782103814219337e-05, "loss": 1.7051, "step": 908 }, { "epoch": 0.09442193829853537, "grad_norm": 0.4114789068698883, "learning_rate": 9.781627128778598e-05, "loss": 2.0238, "step": 909 }, { "epoch": 0.09452581281811571, "grad_norm": 0.3247332274913788, "learning_rate": 9.781149934131868e-05, "loss": 1.6112, "step": 910 }, { "epoch": 0.09462968733769607, "grad_norm": 0.3638227880001068, "learning_rate": 9.780672230329964e-05, "loss": 1.7238, "step": 911 }, { "epoch": 0.09473356185727641, "grad_norm": 0.35563889145851135, "learning_rate": 9.780194017423758e-05, "loss": 1.8491, "step": 912 }, { "epoch": 0.09483743637685675, "grad_norm": 0.38154855370521545, "learning_rate": 9.779715295464178e-05, "loss": 1.666, "step": 913 }, { "epoch": 0.0949413108964371, "grad_norm": 0.3501157760620117, "learning_rate": 9.779236064502201e-05, "loss": 1.8345, "step": 914 }, { "epoch": 0.09504518541601745, "grad_norm": 0.3432106375694275, "learning_rate": 9.778756324588863e-05, "loss": 1.7373, "step": 915 }, { "epoch": 0.0951490599355978, "grad_norm": 0.39171236753463745, "learning_rate": 9.778276075775251e-05, "loss": 1.9764, "step": 916 }, { "epoch": 0.09525293445517814, "grad_norm": 0.35228273272514343, "learning_rate": 9.77779531811251e-05, "loss": 1.8864, "step": 917 }, { "epoch": 0.0953568089747585, "grad_norm": 0.3456995487213135, "learning_rate": 9.777314051651836e-05, "loss": 1.8929, "step": 918 }, { "epoch": 0.09546068349433884, "grad_norm": 0.39113515615463257, "learning_rate": 9.776832276444479e-05, "loss": 1.8455, "step": 919 }, { "epoch": 0.09556455801391918, "grad_norm": 0.35368141531944275, "learning_rate": 9.776349992541746e-05, "loss": 1.7881, "step": 920 }, { "epoch": 0.09566843253349953, "grad_norm": 0.3645842671394348, "learning_rate": 9.775867199994994e-05, "loss": 1.9618, "step": 921 }, { "epoch": 0.09577230705307987, "grad_norm": 0.4213750958442688, "learning_rate": 9.775383898855641e-05, "loss": 1.9835, "step": 922 }, { "epoch": 0.09587618157266023, "grad_norm": 0.3259928226470947, "learning_rate": 9.77490008917515e-05, "loss": 1.6711, "step": 923 }, { "epoch": 0.09598005609224057, "grad_norm": 0.33279410004615784, "learning_rate": 9.774415771005048e-05, "loss": 1.7854, "step": 924 }, { "epoch": 0.09608393061182093, "grad_norm": 0.3503129482269287, "learning_rate": 9.773930944396905e-05, "loss": 1.8105, "step": 925 }, { "epoch": 0.09618780513140127, "grad_norm": 0.3861035704612732, "learning_rate": 9.773445609402356e-05, "loss": 1.8127, "step": 926 }, { "epoch": 0.09629167965098162, "grad_norm": 0.4022195339202881, "learning_rate": 9.772959766073084e-05, "loss": 1.8921, "step": 927 }, { "epoch": 0.09639555417056196, "grad_norm": 0.34826424717903137, "learning_rate": 9.772473414460828e-05, "loss": 1.8411, "step": 928 }, { "epoch": 0.0964994286901423, "grad_norm": 0.3265725374221802, "learning_rate": 9.77198655461738e-05, "loss": 1.6894, "step": 929 }, { "epoch": 0.09660330320972266, "grad_norm": 0.3737447261810303, "learning_rate": 9.771499186594586e-05, "loss": 1.8592, "step": 930 }, { "epoch": 0.096707177729303, "grad_norm": 0.3337661623954773, "learning_rate": 9.771011310444348e-05, "loss": 1.6941, "step": 931 }, { "epoch": 0.09681105224888335, "grad_norm": 0.3704545199871063, "learning_rate": 9.770522926218622e-05, "loss": 1.7519, "step": 932 }, { "epoch": 0.0969149267684637, "grad_norm": 0.35930871963500977, "learning_rate": 9.770034033969413e-05, "loss": 1.9341, "step": 933 }, { "epoch": 0.09701880128804405, "grad_norm": 0.38769471645355225, "learning_rate": 9.769544633748789e-05, "loss": 1.8773, "step": 934 }, { "epoch": 0.09712267580762439, "grad_norm": 0.37353989481925964, "learning_rate": 9.769054725608867e-05, "loss": 1.9544, "step": 935 }, { "epoch": 0.09722655032720473, "grad_norm": 0.39026400446891785, "learning_rate": 9.768564309601813e-05, "loss": 2.0178, "step": 936 }, { "epoch": 0.09733042484678509, "grad_norm": 0.37478938698768616, "learning_rate": 9.768073385779859e-05, "loss": 1.6323, "step": 937 }, { "epoch": 0.09743429936636543, "grad_norm": 0.3543946146965027, "learning_rate": 9.76758195419528e-05, "loss": 1.8568, "step": 938 }, { "epoch": 0.09753817388594578, "grad_norm": 0.3243783712387085, "learning_rate": 9.767090014900413e-05, "loss": 1.6446, "step": 939 }, { "epoch": 0.09764204840552612, "grad_norm": 0.390176922082901, "learning_rate": 9.766597567947644e-05, "loss": 1.7432, "step": 940 }, { "epoch": 0.09774592292510648, "grad_norm": 0.40544116497039795, "learning_rate": 9.766104613389413e-05, "loss": 1.8066, "step": 941 }, { "epoch": 0.09784979744468682, "grad_norm": 0.4119703471660614, "learning_rate": 9.76561115127822e-05, "loss": 1.8682, "step": 942 }, { "epoch": 0.09795367196426717, "grad_norm": 0.41198158264160156, "learning_rate": 9.765117181666612e-05, "loss": 1.8556, "step": 943 }, { "epoch": 0.09805754648384751, "grad_norm": 0.3242831528186798, "learning_rate": 9.764622704607193e-05, "loss": 1.8249, "step": 944 }, { "epoch": 0.09816142100342785, "grad_norm": 0.336739718914032, "learning_rate": 9.764127720152623e-05, "loss": 1.7983, "step": 945 }, { "epoch": 0.09826529552300821, "grad_norm": 0.43454012274742126, "learning_rate": 9.763632228355611e-05, "loss": 1.8482, "step": 946 }, { "epoch": 0.09836917004258855, "grad_norm": 0.39079445600509644, "learning_rate": 9.763136229268924e-05, "loss": 1.9735, "step": 947 }, { "epoch": 0.0984730445621689, "grad_norm": 0.3446981906890869, "learning_rate": 9.762639722945384e-05, "loss": 1.8289, "step": 948 }, { "epoch": 0.09857691908174925, "grad_norm": 0.3308192193508148, "learning_rate": 9.762142709437863e-05, "loss": 1.6953, "step": 949 }, { "epoch": 0.0986807936013296, "grad_norm": 0.40982964634895325, "learning_rate": 9.76164518879929e-05, "loss": 1.9994, "step": 950 }, { "epoch": 0.09878466812090994, "grad_norm": 0.3556596040725708, "learning_rate": 9.761147161082644e-05, "loss": 1.7591, "step": 951 }, { "epoch": 0.09888854264049028, "grad_norm": 0.361565500497818, "learning_rate": 9.760648626340965e-05, "loss": 1.8883, "step": 952 }, { "epoch": 0.09899241716007064, "grad_norm": 0.36020350456237793, "learning_rate": 9.760149584627343e-05, "loss": 1.7859, "step": 953 }, { "epoch": 0.09909629167965098, "grad_norm": 0.3831625282764435, "learning_rate": 9.759650035994921e-05, "loss": 1.9948, "step": 954 }, { "epoch": 0.09920016619923133, "grad_norm": 0.34753647446632385, "learning_rate": 9.759149980496896e-05, "loss": 1.8239, "step": 955 }, { "epoch": 0.09930404071881167, "grad_norm": 0.3743743896484375, "learning_rate": 9.758649418186521e-05, "loss": 1.8389, "step": 956 }, { "epoch": 0.09940791523839203, "grad_norm": 0.38041260838508606, "learning_rate": 9.758148349117101e-05, "loss": 1.9304, "step": 957 }, { "epoch": 0.09951178975797237, "grad_norm": 0.4073218107223511, "learning_rate": 9.757646773341998e-05, "loss": 1.9184, "step": 958 }, { "epoch": 0.09961566427755271, "grad_norm": 0.3602345585823059, "learning_rate": 9.757144690914624e-05, "loss": 1.8935, "step": 959 }, { "epoch": 0.09971953879713306, "grad_norm": 0.3824581801891327, "learning_rate": 9.756642101888448e-05, "loss": 1.8969, "step": 960 }, { "epoch": 0.0998234133167134, "grad_norm": 0.3472011387348175, "learning_rate": 9.756139006316991e-05, "loss": 1.7702, "step": 961 }, { "epoch": 0.09992728783629376, "grad_norm": 0.3461735248565674, "learning_rate": 9.755635404253829e-05, "loss": 1.6524, "step": 962 }, { "epoch": 0.1000311623558741, "grad_norm": 0.34832313656806946, "learning_rate": 9.755131295752593e-05, "loss": 1.701, "step": 963 }, { "epoch": 0.10013503687545446, "grad_norm": 0.3486674726009369, "learning_rate": 9.754626680866964e-05, "loss": 1.8465, "step": 964 }, { "epoch": 0.1002389113950348, "grad_norm": 0.3982466161251068, "learning_rate": 9.754121559650681e-05, "loss": 2.0038, "step": 965 }, { "epoch": 0.10034278591461515, "grad_norm": 0.3699825406074524, "learning_rate": 9.753615932157536e-05, "loss": 2.1116, "step": 966 }, { "epoch": 0.10044666043419549, "grad_norm": 0.37160253524780273, "learning_rate": 9.753109798441375e-05, "loss": 1.9158, "step": 967 }, { "epoch": 0.10055053495377583, "grad_norm": 0.34480687975883484, "learning_rate": 9.752603158556095e-05, "loss": 1.7587, "step": 968 }, { "epoch": 0.10065440947335619, "grad_norm": 0.34491807222366333, "learning_rate": 9.752096012555652e-05, "loss": 1.8125, "step": 969 }, { "epoch": 0.10075828399293653, "grad_norm": 0.36737021803855896, "learning_rate": 9.751588360494048e-05, "loss": 1.9693, "step": 970 }, { "epoch": 0.10086215851251688, "grad_norm": 0.372712641954422, "learning_rate": 9.75108020242535e-05, "loss": 1.7953, "step": 971 }, { "epoch": 0.10096603303209722, "grad_norm": 0.3740496039390564, "learning_rate": 9.75057153840367e-05, "loss": 1.7753, "step": 972 }, { "epoch": 0.10106990755167758, "grad_norm": 0.46863672137260437, "learning_rate": 9.750062368483176e-05, "loss": 2.1136, "step": 973 }, { "epoch": 0.10117378207125792, "grad_norm": 0.3760620355606079, "learning_rate": 9.749552692718093e-05, "loss": 1.7655, "step": 974 }, { "epoch": 0.10127765659083826, "grad_norm": 0.35611656308174133, "learning_rate": 9.749042511162696e-05, "loss": 1.8326, "step": 975 }, { "epoch": 0.10138153111041862, "grad_norm": 0.33176377415657043, "learning_rate": 9.748531823871314e-05, "loss": 1.7562, "step": 976 }, { "epoch": 0.10148540562999896, "grad_norm": 0.33755457401275635, "learning_rate": 9.748020630898333e-05, "loss": 1.7919, "step": 977 }, { "epoch": 0.10158928014957931, "grad_norm": 0.40104183554649353, "learning_rate": 9.747508932298192e-05, "loss": 1.7349, "step": 978 }, { "epoch": 0.10169315466915965, "grad_norm": 0.33952096104621887, "learning_rate": 9.746996728125382e-05, "loss": 1.6836, "step": 979 }, { "epoch": 0.10179702918874001, "grad_norm": 0.3854829668998718, "learning_rate": 9.746484018434447e-05, "loss": 1.6913, "step": 980 }, { "epoch": 0.10190090370832035, "grad_norm": 0.44178688526153564, "learning_rate": 9.745970803279989e-05, "loss": 2.2579, "step": 981 }, { "epoch": 0.1020047782279007, "grad_norm": 0.35316595435142517, "learning_rate": 9.74545708271666e-05, "loss": 1.7616, "step": 982 }, { "epoch": 0.10210865274748104, "grad_norm": 0.36709627509117126, "learning_rate": 9.744942856799169e-05, "loss": 1.6341, "step": 983 }, { "epoch": 0.10221252726706138, "grad_norm": 0.4684566557407379, "learning_rate": 9.744428125582275e-05, "loss": 2.0114, "step": 984 }, { "epoch": 0.10231640178664174, "grad_norm": 0.34113651514053345, "learning_rate": 9.743912889120791e-05, "loss": 1.7651, "step": 985 }, { "epoch": 0.10242027630622208, "grad_norm": 0.4054688513278961, "learning_rate": 9.743397147469592e-05, "loss": 2.0589, "step": 986 }, { "epoch": 0.10252415082580243, "grad_norm": 0.36325010657310486, "learning_rate": 9.742880900683596e-05, "loss": 1.8003, "step": 987 }, { "epoch": 0.10262802534538278, "grad_norm": 0.3484811782836914, "learning_rate": 9.742364148817777e-05, "loss": 1.8546, "step": 988 }, { "epoch": 0.10273189986496313, "grad_norm": 0.3415622115135193, "learning_rate": 9.74184689192717e-05, "loss": 1.6446, "step": 989 }, { "epoch": 0.10283577438454347, "grad_norm": 0.377069354057312, "learning_rate": 9.741329130066859e-05, "loss": 2.0174, "step": 990 }, { "epoch": 0.10293964890412381, "grad_norm": 0.37598928809165955, "learning_rate": 9.740810863291976e-05, "loss": 1.9646, "step": 991 }, { "epoch": 0.10304352342370417, "grad_norm": 0.34742510318756104, "learning_rate": 9.740292091657718e-05, "loss": 1.7112, "step": 992 }, { "epoch": 0.10314739794328451, "grad_norm": 0.35279062390327454, "learning_rate": 9.739772815219327e-05, "loss": 1.736, "step": 993 }, { "epoch": 0.10325127246286486, "grad_norm": 0.33563071489334106, "learning_rate": 9.739253034032103e-05, "loss": 1.7482, "step": 994 }, { "epoch": 0.1033551469824452, "grad_norm": 0.4013693332672119, "learning_rate": 9.738732748151397e-05, "loss": 1.9931, "step": 995 }, { "epoch": 0.10345902150202556, "grad_norm": 0.3738352954387665, "learning_rate": 9.738211957632619e-05, "loss": 1.7324, "step": 996 }, { "epoch": 0.1035628960216059, "grad_norm": 0.34439367055892944, "learning_rate": 9.737690662531227e-05, "loss": 1.7277, "step": 997 }, { "epoch": 0.10366677054118625, "grad_norm": 0.34663471579551697, "learning_rate": 9.737168862902735e-05, "loss": 1.8904, "step": 998 }, { "epoch": 0.1037706450607666, "grad_norm": 0.3536369800567627, "learning_rate": 9.736646558802709e-05, "loss": 1.898, "step": 999 }, { "epoch": 0.10387451958034694, "grad_norm": 0.37141430377960205, "learning_rate": 9.736123750286772e-05, "loss": 1.9826, "step": 1000 }, { "epoch": 0.10397839409992729, "grad_norm": 0.33750221133232117, "learning_rate": 9.7356004374106e-05, "loss": 1.6245, "step": 1001 }, { "epoch": 0.10408226861950763, "grad_norm": 0.39704445004463196, "learning_rate": 9.735076620229919e-05, "loss": 1.9813, "step": 1002 }, { "epoch": 0.10418614313908799, "grad_norm": 0.3396252691745758, "learning_rate": 9.734552298800513e-05, "loss": 1.7119, "step": 1003 }, { "epoch": 0.10429001765866833, "grad_norm": 0.3506071865558624, "learning_rate": 9.734027473178219e-05, "loss": 1.8563, "step": 1004 }, { "epoch": 0.10439389217824868, "grad_norm": 0.3697708249092102, "learning_rate": 9.733502143418925e-05, "loss": 1.5552, "step": 1005 }, { "epoch": 0.10449776669782902, "grad_norm": 0.34835517406463623, "learning_rate": 9.732976309578577e-05, "loss": 1.6061, "step": 1006 }, { "epoch": 0.10460164121740936, "grad_norm": 0.4172792434692383, "learning_rate": 9.73244997171317e-05, "loss": 2.0223, "step": 1007 }, { "epoch": 0.10470551573698972, "grad_norm": 0.37893515825271606, "learning_rate": 9.731923129878754e-05, "loss": 1.8512, "step": 1008 }, { "epoch": 0.10480939025657006, "grad_norm": 0.3517685830593109, "learning_rate": 9.731395784131437e-05, "loss": 1.7601, "step": 1009 }, { "epoch": 0.10491326477615041, "grad_norm": 0.3533296585083008, "learning_rate": 9.730867934527374e-05, "loss": 1.7264, "step": 1010 }, { "epoch": 0.10501713929573075, "grad_norm": 0.3656322956085205, "learning_rate": 9.73033958112278e-05, "loss": 1.8542, "step": 1011 }, { "epoch": 0.10512101381531111, "grad_norm": 0.36052483320236206, "learning_rate": 9.729810723973917e-05, "loss": 1.7967, "step": 1012 }, { "epoch": 0.10522488833489145, "grad_norm": 0.3687455356121063, "learning_rate": 9.729281363137106e-05, "loss": 1.9559, "step": 1013 }, { "epoch": 0.10532876285447179, "grad_norm": 0.3520287275314331, "learning_rate": 9.72875149866872e-05, "loss": 1.8265, "step": 1014 }, { "epoch": 0.10543263737405215, "grad_norm": 0.36388278007507324, "learning_rate": 9.728221130625186e-05, "loss": 1.7892, "step": 1015 }, { "epoch": 0.10553651189363249, "grad_norm": 0.3430229127407074, "learning_rate": 9.727690259062982e-05, "loss": 1.7436, "step": 1016 }, { "epoch": 0.10564038641321284, "grad_norm": 0.40209057927131653, "learning_rate": 9.727158884038643e-05, "loss": 2.0679, "step": 1017 }, { "epoch": 0.10574426093279318, "grad_norm": 0.35472702980041504, "learning_rate": 9.726627005608756e-05, "loss": 1.8322, "step": 1018 }, { "epoch": 0.10584813545237354, "grad_norm": 0.3761141896247864, "learning_rate": 9.726094623829962e-05, "loss": 1.9986, "step": 1019 }, { "epoch": 0.10595200997195388, "grad_norm": 0.37328705191612244, "learning_rate": 9.725561738758956e-05, "loss": 1.8012, "step": 1020 }, { "epoch": 0.10605588449153423, "grad_norm": 0.3950223922729492, "learning_rate": 9.725028350452485e-05, "loss": 1.859, "step": 1021 }, { "epoch": 0.10615975901111457, "grad_norm": 0.37431395053863525, "learning_rate": 9.724494458967352e-05, "loss": 1.6539, "step": 1022 }, { "epoch": 0.10626363353069491, "grad_norm": 0.3411618173122406, "learning_rate": 9.723960064360412e-05, "loss": 1.6884, "step": 1023 }, { "epoch": 0.10636750805027527, "grad_norm": 0.3407668173313141, "learning_rate": 9.72342516668857e-05, "loss": 1.7554, "step": 1024 }, { "epoch": 0.10647138256985561, "grad_norm": 0.3708615005016327, "learning_rate": 9.722889766008796e-05, "loss": 1.7656, "step": 1025 }, { "epoch": 0.10657525708943597, "grad_norm": 0.3609970510005951, "learning_rate": 9.722353862378101e-05, "loss": 1.8226, "step": 1026 }, { "epoch": 0.1066791316090163, "grad_norm": 0.341370552778244, "learning_rate": 9.721817455853554e-05, "loss": 1.6735, "step": 1027 }, { "epoch": 0.10678300612859666, "grad_norm": 0.3461858630180359, "learning_rate": 9.72128054649228e-05, "loss": 1.6466, "step": 1028 }, { "epoch": 0.106886880648177, "grad_norm": 0.3534831404685974, "learning_rate": 9.720743134351457e-05, "loss": 1.8349, "step": 1029 }, { "epoch": 0.10699075516775734, "grad_norm": 0.33681240677833557, "learning_rate": 9.720205219488312e-05, "loss": 1.7535, "step": 1030 }, { "epoch": 0.1070946296873377, "grad_norm": 0.3691225051879883, "learning_rate": 9.71966680196013e-05, "loss": 1.7835, "step": 1031 }, { "epoch": 0.10719850420691804, "grad_norm": 0.3454303443431854, "learning_rate": 9.719127881824249e-05, "loss": 1.7698, "step": 1032 }, { "epoch": 0.10730237872649839, "grad_norm": 0.3353855311870575, "learning_rate": 9.718588459138059e-05, "loss": 1.8369, "step": 1033 }, { "epoch": 0.10740625324607873, "grad_norm": 0.3854605257511139, "learning_rate": 9.718048533959004e-05, "loss": 1.8615, "step": 1034 }, { "epoch": 0.10751012776565909, "grad_norm": 0.34844595193862915, "learning_rate": 9.717508106344582e-05, "loss": 1.9681, "step": 1035 }, { "epoch": 0.10761400228523943, "grad_norm": 0.3415117561817169, "learning_rate": 9.716967176352345e-05, "loss": 1.8015, "step": 1036 }, { "epoch": 0.10771787680481978, "grad_norm": 0.3829508125782013, "learning_rate": 9.716425744039898e-05, "loss": 1.9817, "step": 1037 }, { "epoch": 0.10782175132440013, "grad_norm": 0.3588506877422333, "learning_rate": 9.715883809464899e-05, "loss": 1.7871, "step": 1038 }, { "epoch": 0.10792562584398047, "grad_norm": 0.36895090341567993, "learning_rate": 9.715341372685059e-05, "loss": 1.9029, "step": 1039 }, { "epoch": 0.10802950036356082, "grad_norm": 0.4375429153442383, "learning_rate": 9.714798433758144e-05, "loss": 1.6931, "step": 1040 }, { "epoch": 0.10813337488314116, "grad_norm": 0.34080272912979126, "learning_rate": 9.714254992741971e-05, "loss": 1.7422, "step": 1041 }, { "epoch": 0.10823724940272152, "grad_norm": 0.35768958926200867, "learning_rate": 9.713711049694416e-05, "loss": 1.7005, "step": 1042 }, { "epoch": 0.10834112392230186, "grad_norm": 0.35327664017677307, "learning_rate": 9.713166604673403e-05, "loss": 1.7248, "step": 1043 }, { "epoch": 0.10844499844188221, "grad_norm": 0.33711856603622437, "learning_rate": 9.71262165773691e-05, "loss": 1.719, "step": 1044 }, { "epoch": 0.10854887296146255, "grad_norm": 0.35835373401641846, "learning_rate": 9.712076208942968e-05, "loss": 1.7452, "step": 1045 }, { "epoch": 0.1086527474810429, "grad_norm": 0.3395202159881592, "learning_rate": 9.711530258349668e-05, "loss": 1.7313, "step": 1046 }, { "epoch": 0.10875662200062325, "grad_norm": 0.3273974061012268, "learning_rate": 9.710983806015148e-05, "loss": 1.7674, "step": 1047 }, { "epoch": 0.10886049652020359, "grad_norm": 0.35871952772140503, "learning_rate": 9.710436851997598e-05, "loss": 1.964, "step": 1048 }, { "epoch": 0.10896437103978394, "grad_norm": 0.3521716296672821, "learning_rate": 9.709889396355265e-05, "loss": 1.8366, "step": 1049 }, { "epoch": 0.10906824555936429, "grad_norm": 0.37341251969337463, "learning_rate": 9.709341439146452e-05, "loss": 1.8713, "step": 1050 }, { "epoch": 0.10917212007894464, "grad_norm": 0.37906524538993835, "learning_rate": 9.708792980429508e-05, "loss": 1.988, "step": 1051 }, { "epoch": 0.10927599459852498, "grad_norm": 0.3761735260486603, "learning_rate": 9.708244020262842e-05, "loss": 1.9337, "step": 1052 }, { "epoch": 0.10937986911810534, "grad_norm": 0.33354657888412476, "learning_rate": 9.707694558704914e-05, "loss": 1.8026, "step": 1053 }, { "epoch": 0.10948374363768568, "grad_norm": 0.3411722779273987, "learning_rate": 9.707144595814237e-05, "loss": 1.6728, "step": 1054 }, { "epoch": 0.10958761815726602, "grad_norm": 0.3556554913520813, "learning_rate": 9.706594131649378e-05, "loss": 1.8825, "step": 1055 }, { "epoch": 0.10969149267684637, "grad_norm": 0.3727862536907196, "learning_rate": 9.706043166268957e-05, "loss": 1.8012, "step": 1056 }, { "epoch": 0.10979536719642671, "grad_norm": 0.3387303650379181, "learning_rate": 9.705491699731646e-05, "loss": 1.6768, "step": 1057 }, { "epoch": 0.10989924171600707, "grad_norm": 0.36352434754371643, "learning_rate": 9.704939732096175e-05, "loss": 1.8888, "step": 1058 }, { "epoch": 0.11000311623558741, "grad_norm": 0.3399582803249359, "learning_rate": 9.704387263421322e-05, "loss": 1.6474, "step": 1059 }, { "epoch": 0.11010699075516776, "grad_norm": 0.41029804944992065, "learning_rate": 9.70383429376592e-05, "loss": 1.9323, "step": 1060 }, { "epoch": 0.1102108652747481, "grad_norm": 0.40055909752845764, "learning_rate": 9.703280823188858e-05, "loss": 1.8642, "step": 1061 }, { "epoch": 0.11031473979432845, "grad_norm": 0.365138441324234, "learning_rate": 9.702726851749074e-05, "loss": 1.7669, "step": 1062 }, { "epoch": 0.1104186143139088, "grad_norm": 0.3488384187221527, "learning_rate": 9.702172379505565e-05, "loss": 1.6532, "step": 1063 }, { "epoch": 0.11052248883348914, "grad_norm": 0.34194380044937134, "learning_rate": 9.701617406517374e-05, "loss": 1.6235, "step": 1064 }, { "epoch": 0.1106263633530695, "grad_norm": 0.3830929696559906, "learning_rate": 9.701061932843605e-05, "loss": 1.9023, "step": 1065 }, { "epoch": 0.11073023787264984, "grad_norm": 0.3511880934238434, "learning_rate": 9.700505958543409e-05, "loss": 1.6955, "step": 1066 }, { "epoch": 0.11083411239223019, "grad_norm": 0.35780179500579834, "learning_rate": 9.699949483675994e-05, "loss": 1.7417, "step": 1067 }, { "epoch": 0.11093798691181053, "grad_norm": 0.3555978536605835, "learning_rate": 9.69939250830062e-05, "loss": 1.7226, "step": 1068 }, { "epoch": 0.11104186143139087, "grad_norm": 0.34007731080055237, "learning_rate": 9.698835032476598e-05, "loss": 1.7571, "step": 1069 }, { "epoch": 0.11114573595097123, "grad_norm": 0.34321436285972595, "learning_rate": 9.6982770562633e-05, "loss": 1.8685, "step": 1070 }, { "epoch": 0.11124961047055157, "grad_norm": 0.32458704710006714, "learning_rate": 9.697718579720144e-05, "loss": 1.6949, "step": 1071 }, { "epoch": 0.11135348499013192, "grad_norm": 0.37475547194480896, "learning_rate": 9.697159602906603e-05, "loss": 1.9906, "step": 1072 }, { "epoch": 0.11145735950971226, "grad_norm": 0.38911253213882446, "learning_rate": 9.696600125882202e-05, "loss": 1.9341, "step": 1073 }, { "epoch": 0.11156123402929262, "grad_norm": 0.3872954249382019, "learning_rate": 9.696040148706523e-05, "loss": 1.929, "step": 1074 }, { "epoch": 0.11166510854887296, "grad_norm": 0.3806956708431244, "learning_rate": 9.695479671439198e-05, "loss": 1.8981, "step": 1075 }, { "epoch": 0.11176898306845331, "grad_norm": 0.3513946235179901, "learning_rate": 9.694918694139915e-05, "loss": 1.689, "step": 1076 }, { "epoch": 0.11187285758803366, "grad_norm": 0.3258873522281647, "learning_rate": 9.694357216868413e-05, "loss": 1.6753, "step": 1077 }, { "epoch": 0.111976732107614, "grad_norm": 0.36498063802719116, "learning_rate": 9.693795239684484e-05, "loss": 1.6901, "step": 1078 }, { "epoch": 0.11208060662719435, "grad_norm": 0.34291768074035645, "learning_rate": 9.693232762647976e-05, "loss": 1.6372, "step": 1079 }, { "epoch": 0.11218448114677469, "grad_norm": 0.38301602005958557, "learning_rate": 9.692669785818786e-05, "loss": 1.8587, "step": 1080 }, { "epoch": 0.11228835566635505, "grad_norm": 0.3735436201095581, "learning_rate": 9.69210630925687e-05, "loss": 1.9017, "step": 1081 }, { "epoch": 0.11239223018593539, "grad_norm": 0.34948012232780457, "learning_rate": 9.69154233302223e-05, "loss": 1.7744, "step": 1082 }, { "epoch": 0.11249610470551574, "grad_norm": 0.30937257409095764, "learning_rate": 9.690977857174927e-05, "loss": 1.6596, "step": 1083 }, { "epoch": 0.11259997922509608, "grad_norm": 0.3430674970149994, "learning_rate": 9.690412881775076e-05, "loss": 1.8651, "step": 1084 }, { "epoch": 0.11270385374467642, "grad_norm": 0.4052720069885254, "learning_rate": 9.689847406882838e-05, "loss": 1.9878, "step": 1085 }, { "epoch": 0.11280772826425678, "grad_norm": 0.3367058038711548, "learning_rate": 9.689281432558432e-05, "loss": 1.9459, "step": 1086 }, { "epoch": 0.11291160278383712, "grad_norm": 0.32296720147132874, "learning_rate": 9.688714958862133e-05, "loss": 1.7604, "step": 1087 }, { "epoch": 0.11301547730341747, "grad_norm": 0.36404433846473694, "learning_rate": 9.688147985854264e-05, "loss": 1.8102, "step": 1088 }, { "epoch": 0.11311935182299782, "grad_norm": 0.4039054811000824, "learning_rate": 9.687580513595202e-05, "loss": 1.8611, "step": 1089 }, { "epoch": 0.11322322634257817, "grad_norm": 0.4218135476112366, "learning_rate": 9.687012542145382e-05, "loss": 1.962, "step": 1090 }, { "epoch": 0.11332710086215851, "grad_norm": 0.36868998408317566, "learning_rate": 9.686444071565285e-05, "loss": 1.7224, "step": 1091 }, { "epoch": 0.11343097538173887, "grad_norm": 0.3493179380893707, "learning_rate": 9.68587510191545e-05, "loss": 1.715, "step": 1092 }, { "epoch": 0.11353484990131921, "grad_norm": 0.3461728096008301, "learning_rate": 9.685305633256469e-05, "loss": 1.6688, "step": 1093 }, { "epoch": 0.11363872442089955, "grad_norm": 0.35678285360336304, "learning_rate": 9.684735665648983e-05, "loss": 1.851, "step": 1094 }, { "epoch": 0.1137425989404799, "grad_norm": 0.3611486852169037, "learning_rate": 9.684165199153692e-05, "loss": 1.8039, "step": 1095 }, { "epoch": 0.11384647346006024, "grad_norm": 0.3748013377189636, "learning_rate": 9.683594233831345e-05, "loss": 1.8317, "step": 1096 }, { "epoch": 0.1139503479796406, "grad_norm": 0.39046037197113037, "learning_rate": 9.683022769742746e-05, "loss": 1.9756, "step": 1097 }, { "epoch": 0.11405422249922094, "grad_norm": 0.32861414551734924, "learning_rate": 9.68245080694875e-05, "loss": 1.6867, "step": 1098 }, { "epoch": 0.1141580970188013, "grad_norm": 0.4004691541194916, "learning_rate": 9.68187834551027e-05, "loss": 1.9397, "step": 1099 }, { "epoch": 0.11426197153838163, "grad_norm": 0.33561286330223083, "learning_rate": 9.681305385488264e-05, "loss": 1.7001, "step": 1100 }, { "epoch": 0.11436584605796198, "grad_norm": 0.3517715036869049, "learning_rate": 9.680731926943752e-05, "loss": 1.774, "step": 1101 }, { "epoch": 0.11446972057754233, "grad_norm": 0.37641438841819763, "learning_rate": 9.680157969937799e-05, "loss": 1.7585, "step": 1102 }, { "epoch": 0.11457359509712267, "grad_norm": 0.36919966340065, "learning_rate": 9.67958351453153e-05, "loss": 1.8175, "step": 1103 }, { "epoch": 0.11467746961670303, "grad_norm": 0.3542039096355438, "learning_rate": 9.679008560786117e-05, "loss": 1.7941, "step": 1104 }, { "epoch": 0.11478134413628337, "grad_norm": 0.3409385681152344, "learning_rate": 9.678433108762791e-05, "loss": 1.7144, "step": 1105 }, { "epoch": 0.11488521865586372, "grad_norm": 0.3563219904899597, "learning_rate": 9.677857158522833e-05, "loss": 1.9284, "step": 1106 }, { "epoch": 0.11498909317544406, "grad_norm": 0.3560921251773834, "learning_rate": 9.677280710127575e-05, "loss": 1.5077, "step": 1107 }, { "epoch": 0.11509296769502442, "grad_norm": 0.43264439702033997, "learning_rate": 9.676703763638406e-05, "loss": 2.0279, "step": 1108 }, { "epoch": 0.11519684221460476, "grad_norm": 0.36117497086524963, "learning_rate": 9.676126319116766e-05, "loss": 1.9437, "step": 1109 }, { "epoch": 0.1153007167341851, "grad_norm": 0.3572199046611786, "learning_rate": 9.675548376624148e-05, "loss": 1.8091, "step": 1110 }, { "epoch": 0.11540459125376545, "grad_norm": 0.35779669880867004, "learning_rate": 9.674969936222099e-05, "loss": 1.7403, "step": 1111 }, { "epoch": 0.1155084657733458, "grad_norm": 0.36604002118110657, "learning_rate": 9.674390997972218e-05, "loss": 1.9467, "step": 1112 }, { "epoch": 0.11561234029292615, "grad_norm": 0.38619542121887207, "learning_rate": 9.673811561936157e-05, "loss": 1.8208, "step": 1113 }, { "epoch": 0.11571621481250649, "grad_norm": 0.33228105306625366, "learning_rate": 9.673231628175622e-05, "loss": 1.7336, "step": 1114 }, { "epoch": 0.11582008933208685, "grad_norm": 0.3648672103881836, "learning_rate": 9.672651196752371e-05, "loss": 1.9191, "step": 1115 }, { "epoch": 0.11592396385166719, "grad_norm": 0.33783960342407227, "learning_rate": 9.672070267728215e-05, "loss": 1.7849, "step": 1116 }, { "epoch": 0.11602783837124753, "grad_norm": 0.4384811818599701, "learning_rate": 9.67148884116502e-05, "loss": 2.076, "step": 1117 }, { "epoch": 0.11613171289082788, "grad_norm": 0.3607281446456909, "learning_rate": 9.670906917124703e-05, "loss": 1.7839, "step": 1118 }, { "epoch": 0.11623558741040822, "grad_norm": 0.3579871356487274, "learning_rate": 9.670324495669231e-05, "loss": 1.7778, "step": 1119 }, { "epoch": 0.11633946192998858, "grad_norm": 0.3565009832382202, "learning_rate": 9.669741576860634e-05, "loss": 1.8653, "step": 1120 }, { "epoch": 0.11644333644956892, "grad_norm": 0.36752045154571533, "learning_rate": 9.669158160760982e-05, "loss": 1.9041, "step": 1121 }, { "epoch": 0.11654721096914927, "grad_norm": 0.32525351643562317, "learning_rate": 9.668574247432407e-05, "loss": 1.5888, "step": 1122 }, { "epoch": 0.11665108548872961, "grad_norm": 0.3645164370536804, "learning_rate": 9.667989836937092e-05, "loss": 1.8802, "step": 1123 }, { "epoch": 0.11675496000830995, "grad_norm": 0.34458303451538086, "learning_rate": 9.66740492933727e-05, "loss": 1.8201, "step": 1124 }, { "epoch": 0.11685883452789031, "grad_norm": 0.33625590801239014, "learning_rate": 9.66681952469523e-05, "loss": 1.758, "step": 1125 }, { "epoch": 0.11696270904747065, "grad_norm": 0.3612918257713318, "learning_rate": 9.666233623073315e-05, "loss": 1.8567, "step": 1126 }, { "epoch": 0.117066583567051, "grad_norm": 0.3521936535835266, "learning_rate": 9.665647224533915e-05, "loss": 1.8514, "step": 1127 }, { "epoch": 0.11717045808663135, "grad_norm": 0.42195868492126465, "learning_rate": 9.665060329139479e-05, "loss": 1.9686, "step": 1128 }, { "epoch": 0.1172743326062117, "grad_norm": 0.35592785477638245, "learning_rate": 9.66447293695251e-05, "loss": 1.6642, "step": 1129 }, { "epoch": 0.11737820712579204, "grad_norm": 0.3389096260070801, "learning_rate": 9.663885048035555e-05, "loss": 1.6711, "step": 1130 }, { "epoch": 0.1174820816453724, "grad_norm": 0.3547491729259491, "learning_rate": 9.663296662451221e-05, "loss": 1.7615, "step": 1131 }, { "epoch": 0.11758595616495274, "grad_norm": 0.33912357687950134, "learning_rate": 9.66270778026217e-05, "loss": 1.6615, "step": 1132 }, { "epoch": 0.11768983068453308, "grad_norm": 0.37360331416130066, "learning_rate": 9.662118401531108e-05, "loss": 1.8592, "step": 1133 }, { "epoch": 0.11779370520411343, "grad_norm": 0.3422006666660309, "learning_rate": 9.661528526320804e-05, "loss": 1.8253, "step": 1134 }, { "epoch": 0.11789757972369377, "grad_norm": 0.36613208055496216, "learning_rate": 9.660938154694075e-05, "loss": 1.592, "step": 1135 }, { "epoch": 0.11800145424327413, "grad_norm": 0.38055816292762756, "learning_rate": 9.660347286713787e-05, "loss": 1.6502, "step": 1136 }, { "epoch": 0.11810532876285447, "grad_norm": 0.3511364459991455, "learning_rate": 9.659755922442866e-05, "loss": 1.6945, "step": 1137 }, { "epoch": 0.11820920328243482, "grad_norm": 0.3394559621810913, "learning_rate": 9.659164061944285e-05, "loss": 1.7614, "step": 1138 }, { "epoch": 0.11831307780201517, "grad_norm": 0.38711434602737427, "learning_rate": 9.658571705281076e-05, "loss": 1.8105, "step": 1139 }, { "epoch": 0.1184169523215955, "grad_norm": 0.3627341389656067, "learning_rate": 9.657978852516319e-05, "loss": 1.6527, "step": 1140 }, { "epoch": 0.11852082684117586, "grad_norm": 0.3539731800556183, "learning_rate": 9.657385503713146e-05, "loss": 1.8393, "step": 1141 }, { "epoch": 0.1186247013607562, "grad_norm": 0.3663812279701233, "learning_rate": 9.656791658934747e-05, "loss": 1.7833, "step": 1142 }, { "epoch": 0.11872857588033656, "grad_norm": 0.3631526231765747, "learning_rate": 9.656197318244359e-05, "loss": 1.9837, "step": 1143 }, { "epoch": 0.1188324503999169, "grad_norm": 0.382820725440979, "learning_rate": 9.655602481705279e-05, "loss": 1.8836, "step": 1144 }, { "epoch": 0.11893632491949725, "grad_norm": 0.33744722604751587, "learning_rate": 9.655007149380849e-05, "loss": 1.7552, "step": 1145 }, { "epoch": 0.11904019943907759, "grad_norm": 0.34012195467948914, "learning_rate": 9.654411321334468e-05, "loss": 1.6319, "step": 1146 }, { "epoch": 0.11914407395865795, "grad_norm": 0.34437379240989685, "learning_rate": 9.653814997629587e-05, "loss": 1.6788, "step": 1147 }, { "epoch": 0.11924794847823829, "grad_norm": 0.3765188753604889, "learning_rate": 9.653218178329709e-05, "loss": 1.8694, "step": 1148 }, { "epoch": 0.11935182299781863, "grad_norm": 0.341296911239624, "learning_rate": 9.652620863498393e-05, "loss": 1.7641, "step": 1149 }, { "epoch": 0.11945569751739898, "grad_norm": 0.37878480553627014, "learning_rate": 9.652023053199247e-05, "loss": 1.8416, "step": 1150 }, { "epoch": 0.11955957203697933, "grad_norm": 0.39128556847572327, "learning_rate": 9.651424747495931e-05, "loss": 1.9727, "step": 1151 }, { "epoch": 0.11966344655655968, "grad_norm": 0.32593607902526855, "learning_rate": 9.650825946452164e-05, "loss": 1.6429, "step": 1152 }, { "epoch": 0.11976732107614002, "grad_norm": 0.365972101688385, "learning_rate": 9.65022665013171e-05, "loss": 1.7789, "step": 1153 }, { "epoch": 0.11987119559572038, "grad_norm": 0.3774997889995575, "learning_rate": 9.64962685859839e-05, "loss": 1.8482, "step": 1154 }, { "epoch": 0.11997507011530072, "grad_norm": 0.36030101776123047, "learning_rate": 9.64902657191608e-05, "loss": 1.621, "step": 1155 }, { "epoch": 0.12007894463488106, "grad_norm": 0.40642428398132324, "learning_rate": 9.648425790148704e-05, "loss": 1.869, "step": 1156 }, { "epoch": 0.12018281915446141, "grad_norm": 0.374668151140213, "learning_rate": 9.64782451336024e-05, "loss": 1.8155, "step": 1157 }, { "epoch": 0.12028669367404175, "grad_norm": 0.33325710892677307, "learning_rate": 9.64722274161472e-05, "loss": 1.6419, "step": 1158 }, { "epoch": 0.12039056819362211, "grad_norm": 0.36537379026412964, "learning_rate": 9.646620474976226e-05, "loss": 1.8969, "step": 1159 }, { "epoch": 0.12049444271320245, "grad_norm": 0.3427730202674866, "learning_rate": 9.6460177135089e-05, "loss": 1.7814, "step": 1160 }, { "epoch": 0.1205983172327828, "grad_norm": 0.39888283610343933, "learning_rate": 9.645414457276925e-05, "loss": 1.8611, "step": 1161 }, { "epoch": 0.12070219175236314, "grad_norm": 0.35970908403396606, "learning_rate": 9.644810706344547e-05, "loss": 1.838, "step": 1162 }, { "epoch": 0.1208060662719435, "grad_norm": 0.35236963629722595, "learning_rate": 9.644206460776058e-05, "loss": 1.7295, "step": 1163 }, { "epoch": 0.12090994079152384, "grad_norm": 0.3539031445980072, "learning_rate": 9.643601720635809e-05, "loss": 1.8078, "step": 1164 }, { "epoch": 0.12101381531110418, "grad_norm": 0.3607688546180725, "learning_rate": 9.642996485988198e-05, "loss": 1.8958, "step": 1165 }, { "epoch": 0.12111768983068454, "grad_norm": 0.363936185836792, "learning_rate": 9.642390756897677e-05, "loss": 1.7418, "step": 1166 }, { "epoch": 0.12122156435026488, "grad_norm": 0.3548314571380615, "learning_rate": 9.641784533428754e-05, "loss": 1.6745, "step": 1167 }, { "epoch": 0.12132543886984523, "grad_norm": 0.43917304277420044, "learning_rate": 9.641177815645984e-05, "loss": 2.0477, "step": 1168 }, { "epoch": 0.12142931338942557, "grad_norm": 0.36484494805336, "learning_rate": 9.64057060361398e-05, "loss": 1.8097, "step": 1169 }, { "epoch": 0.12153318790900593, "grad_norm": 0.41941696405410767, "learning_rate": 9.639962897397404e-05, "loss": 1.921, "step": 1170 }, { "epoch": 0.12163706242858627, "grad_norm": 0.3769179582595825, "learning_rate": 9.639354697060974e-05, "loss": 1.9793, "step": 1171 }, { "epoch": 0.12174093694816661, "grad_norm": 0.3361814618110657, "learning_rate": 9.638746002669455e-05, "loss": 1.685, "step": 1172 }, { "epoch": 0.12184481146774696, "grad_norm": 0.3244837522506714, "learning_rate": 9.638136814287672e-05, "loss": 1.6935, "step": 1173 }, { "epoch": 0.1219486859873273, "grad_norm": 0.3598596751689911, "learning_rate": 9.637527131980496e-05, "loss": 1.6975, "step": 1174 }, { "epoch": 0.12205256050690766, "grad_norm": 0.396697461605072, "learning_rate": 9.636916955812856e-05, "loss": 1.982, "step": 1175 }, { "epoch": 0.122156435026488, "grad_norm": 0.37619683146476746, "learning_rate": 9.636306285849728e-05, "loss": 1.8531, "step": 1176 }, { "epoch": 0.12226030954606835, "grad_norm": 0.3777252733707428, "learning_rate": 9.635695122156145e-05, "loss": 1.9038, "step": 1177 }, { "epoch": 0.1223641840656487, "grad_norm": 0.39798396825790405, "learning_rate": 9.635083464797192e-05, "loss": 2.0153, "step": 1178 }, { "epoch": 0.12246805858522904, "grad_norm": 0.3506300449371338, "learning_rate": 9.634471313838005e-05, "loss": 1.743, "step": 1179 }, { "epoch": 0.12257193310480939, "grad_norm": 0.3415655493736267, "learning_rate": 9.633858669343774e-05, "loss": 1.7878, "step": 1180 }, { "epoch": 0.12267580762438973, "grad_norm": 0.39789021015167236, "learning_rate": 9.633245531379739e-05, "loss": 1.9123, "step": 1181 }, { "epoch": 0.12277968214397009, "grad_norm": 0.37617623805999756, "learning_rate": 9.632631900011197e-05, "loss": 1.8383, "step": 1182 }, { "epoch": 0.12288355666355043, "grad_norm": 0.37131214141845703, "learning_rate": 9.632017775303492e-05, "loss": 1.817, "step": 1183 }, { "epoch": 0.12298743118313078, "grad_norm": 0.4080236256122589, "learning_rate": 9.631403157322026e-05, "loss": 1.9845, "step": 1184 }, { "epoch": 0.12309130570271112, "grad_norm": 0.37740015983581543, "learning_rate": 9.630788046132251e-05, "loss": 1.904, "step": 1185 }, { "epoch": 0.12319518022229148, "grad_norm": 0.33329272270202637, "learning_rate": 9.63017244179967e-05, "loss": 1.7073, "step": 1186 }, { "epoch": 0.12329905474187182, "grad_norm": 0.3489798307418823, "learning_rate": 9.629556344389839e-05, "loss": 1.7045, "step": 1187 }, { "epoch": 0.12340292926145216, "grad_norm": 0.3395266532897949, "learning_rate": 9.628939753968371e-05, "loss": 1.7442, "step": 1188 }, { "epoch": 0.12350680378103251, "grad_norm": 0.3406863808631897, "learning_rate": 9.628322670600926e-05, "loss": 1.7656, "step": 1189 }, { "epoch": 0.12361067830061286, "grad_norm": 0.35596734285354614, "learning_rate": 9.627705094353219e-05, "loss": 2.0144, "step": 1190 }, { "epoch": 0.12371455282019321, "grad_norm": 0.3764468729496002, "learning_rate": 9.627087025291015e-05, "loss": 1.8333, "step": 1191 }, { "epoch": 0.12381842733977355, "grad_norm": 0.3464232087135315, "learning_rate": 9.626468463480138e-05, "loss": 1.5966, "step": 1192 }, { "epoch": 0.1239223018593539, "grad_norm": 0.35695046186447144, "learning_rate": 9.625849408986456e-05, "loss": 1.7578, "step": 1193 }, { "epoch": 0.12402617637893425, "grad_norm": 0.3387443721294403, "learning_rate": 9.625229861875897e-05, "loss": 1.6883, "step": 1194 }, { "epoch": 0.12413005089851459, "grad_norm": 0.3431154191493988, "learning_rate": 9.624609822214434e-05, "loss": 1.534, "step": 1195 }, { "epoch": 0.12423392541809494, "grad_norm": 0.3574540317058563, "learning_rate": 9.623989290068099e-05, "loss": 1.804, "step": 1196 }, { "epoch": 0.12433779993767528, "grad_norm": 0.35088303685188293, "learning_rate": 9.623368265502973e-05, "loss": 1.8282, "step": 1197 }, { "epoch": 0.12444167445725564, "grad_norm": 0.35268113017082214, "learning_rate": 9.62274674858519e-05, "loss": 1.748, "step": 1198 }, { "epoch": 0.12454554897683598, "grad_norm": 0.4166131019592285, "learning_rate": 9.622124739380936e-05, "loss": 1.9086, "step": 1199 }, { "epoch": 0.12464942349641633, "grad_norm": 0.38566648960113525, "learning_rate": 9.621502237956452e-05, "loss": 1.7812, "step": 1200 }, { "epoch": 0.12475329801599667, "grad_norm": 0.35631853342056274, "learning_rate": 9.62087924437803e-05, "loss": 1.6562, "step": 1201 }, { "epoch": 0.12485717253557703, "grad_norm": 0.34664246439933777, "learning_rate": 9.620255758712012e-05, "loss": 1.743, "step": 1202 }, { "epoch": 0.12496104705515737, "grad_norm": 0.4030589759349823, "learning_rate": 9.619631781024797e-05, "loss": 1.7807, "step": 1203 }, { "epoch": 0.12506492157473773, "grad_norm": 0.3608097732067108, "learning_rate": 9.619007311382828e-05, "loss": 1.8727, "step": 1204 }, { "epoch": 0.12516879609431805, "grad_norm": 0.3390495777130127, "learning_rate": 9.618382349852612e-05, "loss": 1.6474, "step": 1205 }, { "epoch": 0.1252726706138984, "grad_norm": 0.34509339928627014, "learning_rate": 9.617756896500701e-05, "loss": 1.7651, "step": 1206 }, { "epoch": 0.12537654513347876, "grad_norm": 0.37745997309684753, "learning_rate": 9.6171309513937e-05, "loss": 1.6898, "step": 1207 }, { "epoch": 0.12548041965305912, "grad_norm": 0.33759596943855286, "learning_rate": 9.616504514598267e-05, "loss": 1.5825, "step": 1208 }, { "epoch": 0.12558429417263944, "grad_norm": 0.3818475008010864, "learning_rate": 9.615877586181114e-05, "loss": 1.7711, "step": 1209 }, { "epoch": 0.1256881686922198, "grad_norm": 0.3622841536998749, "learning_rate": 9.615250166209003e-05, "loss": 1.8772, "step": 1210 }, { "epoch": 0.12579204321180015, "grad_norm": 0.38078466057777405, "learning_rate": 9.614622254748749e-05, "loss": 1.6718, "step": 1211 }, { "epoch": 0.12589591773138048, "grad_norm": 0.3571094572544098, "learning_rate": 9.613993851867221e-05, "loss": 1.7133, "step": 1212 }, { "epoch": 0.12599979225096083, "grad_norm": 0.3744667172431946, "learning_rate": 9.613364957631339e-05, "loss": 1.8524, "step": 1213 }, { "epoch": 0.1261036667705412, "grad_norm": 0.3752327561378479, "learning_rate": 9.612735572108073e-05, "loss": 1.7747, "step": 1214 }, { "epoch": 0.12620754129012154, "grad_norm": 0.3768329620361328, "learning_rate": 9.61210569536445e-05, "loss": 1.7503, "step": 1215 }, { "epoch": 0.12631141580970187, "grad_norm": 0.35730409622192383, "learning_rate": 9.611475327467547e-05, "loss": 1.8134, "step": 1216 }, { "epoch": 0.12641529032928223, "grad_norm": 0.38782814145088196, "learning_rate": 9.610844468484492e-05, "loss": 1.9043, "step": 1217 }, { "epoch": 0.12651916484886258, "grad_norm": 0.35042253136634827, "learning_rate": 9.610213118482468e-05, "loss": 1.6922, "step": 1218 }, { "epoch": 0.1266230393684429, "grad_norm": 0.3477722108364105, "learning_rate": 9.609581277528708e-05, "loss": 1.8249, "step": 1219 }, { "epoch": 0.12672691388802326, "grad_norm": 0.358598530292511, "learning_rate": 9.608948945690496e-05, "loss": 1.6571, "step": 1220 }, { "epoch": 0.12683078840760362, "grad_norm": 0.34587663412094116, "learning_rate": 9.608316123035172e-05, "loss": 1.8308, "step": 1221 }, { "epoch": 0.12693466292718397, "grad_norm": 0.3662189543247223, "learning_rate": 9.60768280963013e-05, "loss": 1.8263, "step": 1222 }, { "epoch": 0.1270385374467643, "grad_norm": 0.4229857325553894, "learning_rate": 9.60704900554281e-05, "loss": 1.8465, "step": 1223 }, { "epoch": 0.12714241196634465, "grad_norm": 0.3771613538265228, "learning_rate": 9.606414710840706e-05, "loss": 1.9523, "step": 1224 }, { "epoch": 0.127246286485925, "grad_norm": 0.37686896324157715, "learning_rate": 9.605779925591366e-05, "loss": 1.8436, "step": 1225 }, { "epoch": 0.12735016100550534, "grad_norm": 0.3408556580543518, "learning_rate": 9.605144649862391e-05, "loss": 1.8095, "step": 1226 }, { "epoch": 0.1274540355250857, "grad_norm": 0.3675098717212677, "learning_rate": 9.604508883721432e-05, "loss": 1.9452, "step": 1227 }, { "epoch": 0.12755791004466605, "grad_norm": 0.3680543899536133, "learning_rate": 9.603872627236194e-05, "loss": 1.8987, "step": 1228 }, { "epoch": 0.1276617845642464, "grad_norm": 0.3140665888786316, "learning_rate": 9.603235880474433e-05, "loss": 1.6107, "step": 1229 }, { "epoch": 0.12776565908382673, "grad_norm": 0.35767602920532227, "learning_rate": 9.602598643503957e-05, "loss": 1.5594, "step": 1230 }, { "epoch": 0.12786953360340708, "grad_norm": 0.40325549244880676, "learning_rate": 9.601960916392627e-05, "loss": 2.082, "step": 1231 }, { "epoch": 0.12797340812298744, "grad_norm": 0.34380728006362915, "learning_rate": 9.601322699208357e-05, "loss": 1.7435, "step": 1232 }, { "epoch": 0.1280772826425678, "grad_norm": 0.3566792607307434, "learning_rate": 9.60068399201911e-05, "loss": 1.7594, "step": 1233 }, { "epoch": 0.12818115716214812, "grad_norm": 0.34519535303115845, "learning_rate": 9.600044794892905e-05, "loss": 1.8354, "step": 1234 }, { "epoch": 0.12828503168172847, "grad_norm": 0.36046940088272095, "learning_rate": 9.599405107897811e-05, "loss": 1.7715, "step": 1235 }, { "epoch": 0.12838890620130883, "grad_norm": 0.33479416370391846, "learning_rate": 9.598764931101949e-05, "loss": 1.6034, "step": 1236 }, { "epoch": 0.12849278072088915, "grad_norm": 0.3153260052204132, "learning_rate": 9.598124264573495e-05, "loss": 1.6561, "step": 1237 }, { "epoch": 0.1285966552404695, "grad_norm": 0.3522169888019562, "learning_rate": 9.597483108380674e-05, "loss": 1.8149, "step": 1238 }, { "epoch": 0.12870052976004986, "grad_norm": 0.39202046394348145, "learning_rate": 9.596841462591761e-05, "loss": 1.8963, "step": 1239 }, { "epoch": 0.12880440427963022, "grad_norm": 0.3598194718360901, "learning_rate": 9.596199327275091e-05, "loss": 1.7161, "step": 1240 }, { "epoch": 0.12890827879921055, "grad_norm": 0.36012735962867737, "learning_rate": 9.595556702499045e-05, "loss": 1.86, "step": 1241 }, { "epoch": 0.1290121533187909, "grad_norm": 0.32633063197135925, "learning_rate": 9.594913588332055e-05, "loss": 1.5897, "step": 1242 }, { "epoch": 0.12911602783837126, "grad_norm": 0.35175660252571106, "learning_rate": 9.594269984842612e-05, "loss": 1.8549, "step": 1243 }, { "epoch": 0.12921990235795158, "grad_norm": 0.3716254234313965, "learning_rate": 9.59362589209925e-05, "loss": 1.8379, "step": 1244 }, { "epoch": 0.12932377687753194, "grad_norm": 0.3607979118824005, "learning_rate": 9.592981310170563e-05, "loss": 1.8592, "step": 1245 }, { "epoch": 0.1294276513971123, "grad_norm": 0.3418334424495697, "learning_rate": 9.592336239125193e-05, "loss": 1.7502, "step": 1246 }, { "epoch": 0.12953152591669265, "grad_norm": 0.3638349771499634, "learning_rate": 9.591690679031835e-05, "loss": 1.8981, "step": 1247 }, { "epoch": 0.12963540043627297, "grad_norm": 0.36420029401779175, "learning_rate": 9.591044629959236e-05, "loss": 1.913, "step": 1248 }, { "epoch": 0.12973927495585333, "grad_norm": 0.3521292805671692, "learning_rate": 9.590398091976195e-05, "loss": 1.6886, "step": 1249 }, { "epoch": 0.12984314947543368, "grad_norm": 0.3583468198776245, "learning_rate": 9.589751065151567e-05, "loss": 1.7328, "step": 1250 }, { "epoch": 0.129947023995014, "grad_norm": 0.3620645999908447, "learning_rate": 9.589103549554248e-05, "loss": 1.9107, "step": 1251 }, { "epoch": 0.13005089851459437, "grad_norm": 0.361101895570755, "learning_rate": 9.5884555452532e-05, "loss": 1.7693, "step": 1252 }, { "epoch": 0.13015477303417472, "grad_norm": 0.3591129183769226, "learning_rate": 9.587807052317426e-05, "loss": 1.696, "step": 1253 }, { "epoch": 0.13025864755375507, "grad_norm": 0.3596014678478241, "learning_rate": 9.587158070815988e-05, "loss": 1.9454, "step": 1254 }, { "epoch": 0.1303625220733354, "grad_norm": 0.35805267095565796, "learning_rate": 9.586508600817997e-05, "loss": 1.8159, "step": 1255 }, { "epoch": 0.13046639659291576, "grad_norm": 0.3794294595718384, "learning_rate": 9.585858642392616e-05, "loss": 1.7915, "step": 1256 }, { "epoch": 0.1305702711124961, "grad_norm": 0.350418359041214, "learning_rate": 9.585208195609059e-05, "loss": 1.754, "step": 1257 }, { "epoch": 0.13067414563207644, "grad_norm": 0.37485983967781067, "learning_rate": 9.584557260536597e-05, "loss": 1.8592, "step": 1258 }, { "epoch": 0.1307780201516568, "grad_norm": 0.38537418842315674, "learning_rate": 9.583905837244547e-05, "loss": 1.814, "step": 1259 }, { "epoch": 0.13088189467123715, "grad_norm": 0.3657982349395752, "learning_rate": 9.583253925802283e-05, "loss": 1.7968, "step": 1260 }, { "epoch": 0.1309857691908175, "grad_norm": 0.4190506935119629, "learning_rate": 9.582601526279225e-05, "loss": 2.0191, "step": 1261 }, { "epoch": 0.13108964371039783, "grad_norm": 0.3682914674282074, "learning_rate": 9.581948638744849e-05, "loss": 1.7923, "step": 1262 }, { "epoch": 0.13119351822997818, "grad_norm": 0.33630824089050293, "learning_rate": 9.581295263268684e-05, "loss": 1.6561, "step": 1263 }, { "epoch": 0.13129739274955854, "grad_norm": 0.37690606713294983, "learning_rate": 9.580641399920311e-05, "loss": 1.8836, "step": 1264 }, { "epoch": 0.1314012672691389, "grad_norm": 0.3465617895126343, "learning_rate": 9.579987048769356e-05, "loss": 1.7498, "step": 1265 }, { "epoch": 0.13150514178871922, "grad_norm": 0.3308243155479431, "learning_rate": 9.579332209885508e-05, "loss": 1.6476, "step": 1266 }, { "epoch": 0.13160901630829958, "grad_norm": 0.38376688957214355, "learning_rate": 9.5786768833385e-05, "loss": 1.7795, "step": 1267 }, { "epoch": 0.13171289082787993, "grad_norm": 0.35049548745155334, "learning_rate": 9.578021069198118e-05, "loss": 1.718, "step": 1268 }, { "epoch": 0.13181676534746026, "grad_norm": 0.34886178374290466, "learning_rate": 9.577364767534202e-05, "loss": 1.7214, "step": 1269 }, { "epoch": 0.1319206398670406, "grad_norm": 0.36722105741500854, "learning_rate": 9.576707978416644e-05, "loss": 1.6402, "step": 1270 }, { "epoch": 0.13202451438662097, "grad_norm": 0.4114188849925995, "learning_rate": 9.576050701915385e-05, "loss": 1.8282, "step": 1271 }, { "epoch": 0.13212838890620132, "grad_norm": 0.40800419449806213, "learning_rate": 9.575392938100422e-05, "loss": 2.0049, "step": 1272 }, { "epoch": 0.13223226342578165, "grad_norm": 0.3313359320163727, "learning_rate": 9.5747346870418e-05, "loss": 1.678, "step": 1273 }, { "epoch": 0.132336137945362, "grad_norm": 0.33527714014053345, "learning_rate": 9.574075948809618e-05, "loss": 1.7132, "step": 1274 }, { "epoch": 0.13244001246494236, "grad_norm": 0.37509602308273315, "learning_rate": 9.573416723474026e-05, "loss": 1.7306, "step": 1275 }, { "epoch": 0.13254388698452269, "grad_norm": 0.41604387760162354, "learning_rate": 9.572757011105227e-05, "loss": 1.8486, "step": 1276 }, { "epoch": 0.13264776150410304, "grad_norm": 0.3609203100204468, "learning_rate": 9.572096811773477e-05, "loss": 1.7497, "step": 1277 }, { "epoch": 0.1327516360236834, "grad_norm": 0.346780389547348, "learning_rate": 9.571436125549078e-05, "loss": 1.6892, "step": 1278 }, { "epoch": 0.13285551054326375, "grad_norm": 0.37450289726257324, "learning_rate": 9.57077495250239e-05, "loss": 1.77, "step": 1279 }, { "epoch": 0.13295938506284408, "grad_norm": 0.36386212706565857, "learning_rate": 9.570113292703826e-05, "loss": 1.8936, "step": 1280 }, { "epoch": 0.13306325958242443, "grad_norm": 0.381849080324173, "learning_rate": 9.569451146223843e-05, "loss": 1.7727, "step": 1281 }, { "epoch": 0.13316713410200479, "grad_norm": 0.3608989715576172, "learning_rate": 9.568788513132955e-05, "loss": 1.8763, "step": 1282 }, { "epoch": 0.1332710086215851, "grad_norm": 0.37927472591400146, "learning_rate": 9.568125393501728e-05, "loss": 1.864, "step": 1283 }, { "epoch": 0.13337488314116547, "grad_norm": 0.37271663546562195, "learning_rate": 9.567461787400781e-05, "loss": 1.7884, "step": 1284 }, { "epoch": 0.13347875766074582, "grad_norm": 0.34601160883903503, "learning_rate": 9.566797694900779e-05, "loss": 1.7951, "step": 1285 }, { "epoch": 0.13358263218032618, "grad_norm": 0.32820773124694824, "learning_rate": 9.566133116072447e-05, "loss": 1.602, "step": 1286 }, { "epoch": 0.1336865066999065, "grad_norm": 0.36603811383247375, "learning_rate": 9.565468050986556e-05, "loss": 1.7799, "step": 1287 }, { "epoch": 0.13379038121948686, "grad_norm": 0.33986133337020874, "learning_rate": 9.564802499713928e-05, "loss": 1.7055, "step": 1288 }, { "epoch": 0.13389425573906721, "grad_norm": 0.37927207350730896, "learning_rate": 9.564136462325442e-05, "loss": 1.8803, "step": 1289 }, { "epoch": 0.13399813025864754, "grad_norm": 0.3654598593711853, "learning_rate": 9.563469938892022e-05, "loss": 1.8929, "step": 1290 }, { "epoch": 0.1341020047782279, "grad_norm": 0.38735446333885193, "learning_rate": 9.562802929484651e-05, "loss": 1.6864, "step": 1291 }, { "epoch": 0.13420587929780825, "grad_norm": 0.3329310417175293, "learning_rate": 9.56213543417436e-05, "loss": 1.7724, "step": 1292 }, { "epoch": 0.1343097538173886, "grad_norm": 0.37568461894989014, "learning_rate": 9.561467453032231e-05, "loss": 1.794, "step": 1293 }, { "epoch": 0.13441362833696893, "grad_norm": 0.3253065049648285, "learning_rate": 9.560798986129399e-05, "loss": 1.7469, "step": 1294 }, { "epoch": 0.1345175028565493, "grad_norm": 0.32807695865631104, "learning_rate": 9.56013003353705e-05, "loss": 1.7181, "step": 1295 }, { "epoch": 0.13462137737612964, "grad_norm": 0.34811633825302124, "learning_rate": 9.559460595326424e-05, "loss": 1.6823, "step": 1296 }, { "epoch": 0.13472525189570997, "grad_norm": 0.3585864007472992, "learning_rate": 9.558790671568809e-05, "loss": 1.8669, "step": 1297 }, { "epoch": 0.13482912641529032, "grad_norm": 0.3390277624130249, "learning_rate": 9.558120262335546e-05, "loss": 1.6597, "step": 1298 }, { "epoch": 0.13493300093487068, "grad_norm": 0.3469536006450653, "learning_rate": 9.557449367698032e-05, "loss": 1.8582, "step": 1299 }, { "epoch": 0.13503687545445103, "grad_norm": 0.35477346181869507, "learning_rate": 9.55677798772771e-05, "loss": 1.8483, "step": 1300 }, { "epoch": 0.13514074997403136, "grad_norm": 0.3245786726474762, "learning_rate": 9.556106122496076e-05, "loss": 1.6194, "step": 1301 }, { "epoch": 0.13524462449361171, "grad_norm": 0.37269967794418335, "learning_rate": 9.555433772074678e-05, "loss": 1.8113, "step": 1302 }, { "epoch": 0.13534849901319207, "grad_norm": 0.41088709235191345, "learning_rate": 9.554760936535116e-05, "loss": 1.9416, "step": 1303 }, { "epoch": 0.13545237353277242, "grad_norm": 0.35153427720069885, "learning_rate": 9.554087615949046e-05, "loss": 1.6847, "step": 1304 }, { "epoch": 0.13555624805235275, "grad_norm": 0.38244035840034485, "learning_rate": 9.553413810388165e-05, "loss": 1.8235, "step": 1305 }, { "epoch": 0.1356601225719331, "grad_norm": 0.33529362082481384, "learning_rate": 9.552739519924235e-05, "loss": 1.6378, "step": 1306 }, { "epoch": 0.13576399709151346, "grad_norm": 0.335685133934021, "learning_rate": 9.552064744629054e-05, "loss": 1.8144, "step": 1307 }, { "epoch": 0.1358678716110938, "grad_norm": 0.33851975202560425, "learning_rate": 9.551389484574489e-05, "loss": 1.6885, "step": 1308 }, { "epoch": 0.13597174613067414, "grad_norm": 0.4007343649864197, "learning_rate": 9.550713739832444e-05, "loss": 2.0096, "step": 1309 }, { "epoch": 0.1360756206502545, "grad_norm": 0.3581026494503021, "learning_rate": 9.550037510474883e-05, "loss": 1.7002, "step": 1310 }, { "epoch": 0.13617949516983485, "grad_norm": 0.3608642518520355, "learning_rate": 9.549360796573821e-05, "loss": 1.7883, "step": 1311 }, { "epoch": 0.13628336968941518, "grad_norm": 0.39638829231262207, "learning_rate": 9.548683598201317e-05, "loss": 1.7193, "step": 1312 }, { "epoch": 0.13638724420899553, "grad_norm": 0.3727129399776459, "learning_rate": 9.548005915429495e-05, "loss": 1.8664, "step": 1313 }, { "epoch": 0.1364911187285759, "grad_norm": 0.34268417954444885, "learning_rate": 9.547327748330514e-05, "loss": 1.7827, "step": 1314 }, { "epoch": 0.13659499324815622, "grad_norm": 0.3418048918247223, "learning_rate": 9.5466490969766e-05, "loss": 1.7287, "step": 1315 }, { "epoch": 0.13669886776773657, "grad_norm": 0.3453529477119446, "learning_rate": 9.545969961440025e-05, "loss": 1.7565, "step": 1316 }, { "epoch": 0.13680274228731693, "grad_norm": 0.35973402857780457, "learning_rate": 9.545290341793107e-05, "loss": 1.8727, "step": 1317 }, { "epoch": 0.13690661680689728, "grad_norm": 0.3567400872707367, "learning_rate": 9.544610238108223e-05, "loss": 1.8201, "step": 1318 }, { "epoch": 0.1370104913264776, "grad_norm": 0.37093833088874817, "learning_rate": 9.543929650457796e-05, "loss": 1.8134, "step": 1319 }, { "epoch": 0.13711436584605796, "grad_norm": 0.3393130600452423, "learning_rate": 9.543248578914307e-05, "loss": 1.6867, "step": 1320 }, { "epoch": 0.13721824036563832, "grad_norm": 0.3978770673274994, "learning_rate": 9.542567023550283e-05, "loss": 1.9418, "step": 1321 }, { "epoch": 0.13732211488521864, "grad_norm": 0.33980122208595276, "learning_rate": 9.541884984438304e-05, "loss": 1.6986, "step": 1322 }, { "epoch": 0.137425989404799, "grad_norm": 0.3643237054347992, "learning_rate": 9.541202461651002e-05, "loss": 1.6953, "step": 1323 }, { "epoch": 0.13752986392437935, "grad_norm": 0.3416014313697815, "learning_rate": 9.54051945526106e-05, "loss": 1.6347, "step": 1324 }, { "epoch": 0.1376337384439597, "grad_norm": 0.42336559295654297, "learning_rate": 9.539835965341215e-05, "loss": 2.0321, "step": 1325 }, { "epoch": 0.13773761296354003, "grad_norm": 0.3578283488750458, "learning_rate": 9.539151991964253e-05, "loss": 1.7978, "step": 1326 }, { "epoch": 0.1378414874831204, "grad_norm": 0.39390215277671814, "learning_rate": 9.538467535203007e-05, "loss": 2.0113, "step": 1327 }, { "epoch": 0.13794536200270074, "grad_norm": 0.34902918338775635, "learning_rate": 9.537782595130371e-05, "loss": 1.7936, "step": 1328 }, { "epoch": 0.13804923652228107, "grad_norm": 0.3545171916484833, "learning_rate": 9.537097171819287e-05, "loss": 1.8352, "step": 1329 }, { "epoch": 0.13815311104186143, "grad_norm": 0.41688135266304016, "learning_rate": 9.536411265342742e-05, "loss": 1.9648, "step": 1330 }, { "epoch": 0.13825698556144178, "grad_norm": 0.34818556904792786, "learning_rate": 9.535724875773784e-05, "loss": 1.6804, "step": 1331 }, { "epoch": 0.13836086008102214, "grad_norm": 0.3562292754650116, "learning_rate": 9.535038003185507e-05, "loss": 1.697, "step": 1332 }, { "epoch": 0.13846473460060246, "grad_norm": 0.3660946786403656, "learning_rate": 9.534350647651056e-05, "loss": 1.8382, "step": 1333 }, { "epoch": 0.13856860912018282, "grad_norm": 0.38286975026130676, "learning_rate": 9.533662809243632e-05, "loss": 1.8386, "step": 1334 }, { "epoch": 0.13867248363976317, "grad_norm": 0.3726559281349182, "learning_rate": 9.532974488036481e-05, "loss": 1.701, "step": 1335 }, { "epoch": 0.1387763581593435, "grad_norm": 0.37573710083961487, "learning_rate": 9.532285684102907e-05, "loss": 1.9953, "step": 1336 }, { "epoch": 0.13888023267892385, "grad_norm": 0.36941513419151306, "learning_rate": 9.53159639751626e-05, "loss": 1.7335, "step": 1337 }, { "epoch": 0.1389841071985042, "grad_norm": 0.3815058767795563, "learning_rate": 9.530906628349945e-05, "loss": 1.929, "step": 1338 }, { "epoch": 0.13908798171808456, "grad_norm": 0.3447597026824951, "learning_rate": 9.530216376677417e-05, "loss": 1.7699, "step": 1339 }, { "epoch": 0.1391918562376649, "grad_norm": 0.3957836627960205, "learning_rate": 9.529525642572182e-05, "loss": 1.8501, "step": 1340 }, { "epoch": 0.13929573075724525, "grad_norm": 0.44103363156318665, "learning_rate": 9.528834426107795e-05, "loss": 1.9139, "step": 1341 }, { "epoch": 0.1393996052768256, "grad_norm": 0.3422475755214691, "learning_rate": 9.52814272735787e-05, "loss": 1.7613, "step": 1342 }, { "epoch": 0.13950347979640595, "grad_norm": 0.3645455837249756, "learning_rate": 9.527450546396067e-05, "loss": 1.7897, "step": 1343 }, { "epoch": 0.13960735431598628, "grad_norm": 0.3628111779689789, "learning_rate": 9.526757883296094e-05, "loss": 1.782, "step": 1344 }, { "epoch": 0.13971122883556664, "grad_norm": 0.3505362868309021, "learning_rate": 9.526064738131717e-05, "loss": 1.7047, "step": 1345 }, { "epoch": 0.139815103355147, "grad_norm": 0.36256182193756104, "learning_rate": 9.52537111097675e-05, "loss": 1.8108, "step": 1346 }, { "epoch": 0.13991897787472732, "grad_norm": 0.34468889236450195, "learning_rate": 9.524677001905059e-05, "loss": 1.7674, "step": 1347 }, { "epoch": 0.14002285239430767, "grad_norm": 0.36335834860801697, "learning_rate": 9.52398241099056e-05, "loss": 1.7181, "step": 1348 }, { "epoch": 0.14012672691388803, "grad_norm": 0.3589484393596649, "learning_rate": 9.523287338307224e-05, "loss": 1.8431, "step": 1349 }, { "epoch": 0.14023060143346838, "grad_norm": 0.3601551949977875, "learning_rate": 9.522591783929069e-05, "loss": 1.6761, "step": 1350 }, { "epoch": 0.1403344759530487, "grad_norm": 0.34505337476730347, "learning_rate": 9.521895747930168e-05, "loss": 1.7923, "step": 1351 }, { "epoch": 0.14043835047262906, "grad_norm": 0.3898836374282837, "learning_rate": 9.521199230384641e-05, "loss": 1.9424, "step": 1352 }, { "epoch": 0.14054222499220942, "grad_norm": 0.4122181534767151, "learning_rate": 9.520502231366661e-05, "loss": 1.7709, "step": 1353 }, { "epoch": 0.14064609951178975, "grad_norm": 0.36563369631767273, "learning_rate": 9.519804750950457e-05, "loss": 1.8535, "step": 1354 }, { "epoch": 0.1407499740313701, "grad_norm": 0.3425121009349823, "learning_rate": 9.519106789210301e-05, "loss": 1.6418, "step": 1355 }, { "epoch": 0.14085384855095046, "grad_norm": 0.36973652243614197, "learning_rate": 9.518408346220525e-05, "loss": 1.7506, "step": 1356 }, { "epoch": 0.1409577230705308, "grad_norm": 0.36737698316574097, "learning_rate": 9.517709422055502e-05, "loss": 1.756, "step": 1357 }, { "epoch": 0.14106159759011114, "grad_norm": 0.36824169754981995, "learning_rate": 9.517010016789667e-05, "loss": 1.7849, "step": 1358 }, { "epoch": 0.1411654721096915, "grad_norm": 0.44827938079833984, "learning_rate": 9.516310130497498e-05, "loss": 2.191, "step": 1359 }, { "epoch": 0.14126934662927185, "grad_norm": 0.33879658579826355, "learning_rate": 9.51560976325353e-05, "loss": 1.6468, "step": 1360 }, { "epoch": 0.14137322114885217, "grad_norm": 0.3727847933769226, "learning_rate": 9.514908915132346e-05, "loss": 1.5898, "step": 1361 }, { "epoch": 0.14147709566843253, "grad_norm": 0.34985223412513733, "learning_rate": 9.514207586208578e-05, "loss": 1.5243, "step": 1362 }, { "epoch": 0.14158097018801288, "grad_norm": 0.39218172430992126, "learning_rate": 9.513505776556915e-05, "loss": 1.8003, "step": 1363 }, { "epoch": 0.14168484470759324, "grad_norm": 0.3964877426624298, "learning_rate": 9.512803486252093e-05, "loss": 1.7633, "step": 1364 }, { "epoch": 0.14178871922717357, "grad_norm": 0.3854914903640747, "learning_rate": 9.512100715368902e-05, "loss": 1.8179, "step": 1365 }, { "epoch": 0.14189259374675392, "grad_norm": 0.35278111696243286, "learning_rate": 9.511397463982181e-05, "loss": 1.8105, "step": 1366 }, { "epoch": 0.14199646826633427, "grad_norm": 0.45402637124061584, "learning_rate": 9.510693732166819e-05, "loss": 1.9222, "step": 1367 }, { "epoch": 0.1421003427859146, "grad_norm": 0.3488807678222656, "learning_rate": 9.50998951999776e-05, "loss": 1.6662, "step": 1368 }, { "epoch": 0.14220421730549496, "grad_norm": 0.3388180136680603, "learning_rate": 9.509284827549996e-05, "loss": 1.6714, "step": 1369 }, { "epoch": 0.1423080918250753, "grad_norm": 0.36008358001708984, "learning_rate": 9.508579654898571e-05, "loss": 1.8606, "step": 1370 }, { "epoch": 0.14241196634465567, "grad_norm": 0.37350037693977356, "learning_rate": 9.507874002118582e-05, "loss": 1.8999, "step": 1371 }, { "epoch": 0.142515840864236, "grad_norm": 0.3648621737957001, "learning_rate": 9.507167869285174e-05, "loss": 1.9342, "step": 1372 }, { "epoch": 0.14261971538381635, "grad_norm": 0.3702144920825958, "learning_rate": 9.506461256473545e-05, "loss": 1.8062, "step": 1373 }, { "epoch": 0.1427235899033967, "grad_norm": 0.37488213181495667, "learning_rate": 9.505754163758946e-05, "loss": 1.8751, "step": 1374 }, { "epoch": 0.14282746442297706, "grad_norm": 0.37895387411117554, "learning_rate": 9.505046591216674e-05, "loss": 1.6707, "step": 1375 }, { "epoch": 0.14293133894255738, "grad_norm": 0.3528745174407959, "learning_rate": 9.504338538922078e-05, "loss": 1.8434, "step": 1376 }, { "epoch": 0.14303521346213774, "grad_norm": 0.3752725422382355, "learning_rate": 9.503630006950566e-05, "loss": 1.9281, "step": 1377 }, { "epoch": 0.1431390879817181, "grad_norm": 0.38882383704185486, "learning_rate": 9.502920995377587e-05, "loss": 1.6748, "step": 1378 }, { "epoch": 0.14324296250129842, "grad_norm": 0.3682365417480469, "learning_rate": 9.502211504278644e-05, "loss": 1.767, "step": 1379 }, { "epoch": 0.14334683702087878, "grad_norm": 0.339139848947525, "learning_rate": 9.501501533729297e-05, "loss": 1.5963, "step": 1380 }, { "epoch": 0.14345071154045913, "grad_norm": 0.3614344298839569, "learning_rate": 9.50079108380515e-05, "loss": 1.7736, "step": 1381 }, { "epoch": 0.14355458606003949, "grad_norm": 0.33216235041618347, "learning_rate": 9.500080154581859e-05, "loss": 1.8277, "step": 1382 }, { "epoch": 0.1436584605796198, "grad_norm": 0.3475283086299896, "learning_rate": 9.499368746135133e-05, "loss": 1.7244, "step": 1383 }, { "epoch": 0.14376233509920017, "grad_norm": 0.35839635133743286, "learning_rate": 9.498656858540731e-05, "loss": 1.8232, "step": 1384 }, { "epoch": 0.14386620961878052, "grad_norm": 0.37137824296951294, "learning_rate": 9.497944491874467e-05, "loss": 1.7515, "step": 1385 }, { "epoch": 0.14397008413836085, "grad_norm": 0.35471752285957336, "learning_rate": 9.4972316462122e-05, "loss": 1.8021, "step": 1386 }, { "epoch": 0.1440739586579412, "grad_norm": 0.3838224411010742, "learning_rate": 9.49651832162984e-05, "loss": 1.8813, "step": 1387 }, { "epoch": 0.14417783317752156, "grad_norm": 0.32974106073379517, "learning_rate": 9.495804518203355e-05, "loss": 1.7155, "step": 1388 }, { "epoch": 0.1442817076971019, "grad_norm": 0.35001546144485474, "learning_rate": 9.495090236008756e-05, "loss": 1.8771, "step": 1389 }, { "epoch": 0.14438558221668224, "grad_norm": 0.37329480051994324, "learning_rate": 9.49437547512211e-05, "loss": 1.894, "step": 1390 }, { "epoch": 0.1444894567362626, "grad_norm": 0.35633528232574463, "learning_rate": 9.493660235619535e-05, "loss": 1.7673, "step": 1391 }, { "epoch": 0.14459333125584295, "grad_norm": 0.37964895367622375, "learning_rate": 9.492944517577196e-05, "loss": 2.0935, "step": 1392 }, { "epoch": 0.14469720577542328, "grad_norm": 0.33758020401000977, "learning_rate": 9.492228321071311e-05, "loss": 1.6482, "step": 1393 }, { "epoch": 0.14480108029500363, "grad_norm": 0.39080965518951416, "learning_rate": 9.491511646178151e-05, "loss": 1.6859, "step": 1394 }, { "epoch": 0.14490495481458399, "grad_norm": 0.36754679679870605, "learning_rate": 9.490794492974038e-05, "loss": 1.6268, "step": 1395 }, { "epoch": 0.14500882933416434, "grad_norm": 0.33904823660850525, "learning_rate": 9.490076861535339e-05, "loss": 1.8217, "step": 1396 }, { "epoch": 0.14511270385374467, "grad_norm": 0.36078742146492004, "learning_rate": 9.489358751938477e-05, "loss": 1.7979, "step": 1397 }, { "epoch": 0.14521657837332502, "grad_norm": 0.37665989995002747, "learning_rate": 9.488640164259929e-05, "loss": 1.7041, "step": 1398 }, { "epoch": 0.14532045289290538, "grad_norm": 0.3702797591686249, "learning_rate": 9.487921098576216e-05, "loss": 1.8746, "step": 1399 }, { "epoch": 0.1454243274124857, "grad_norm": 0.3909272253513336, "learning_rate": 9.487201554963911e-05, "loss": 1.9115, "step": 1400 }, { "epoch": 0.14552820193206606, "grad_norm": 0.33725446462631226, "learning_rate": 9.486481533499642e-05, "loss": 1.6462, "step": 1401 }, { "epoch": 0.14563207645164641, "grad_norm": 0.3404362201690674, "learning_rate": 9.485761034260087e-05, "loss": 1.5963, "step": 1402 }, { "epoch": 0.14573595097122677, "grad_norm": 0.3509582281112671, "learning_rate": 9.485040057321972e-05, "loss": 1.7931, "step": 1403 }, { "epoch": 0.1458398254908071, "grad_norm": 0.3783257007598877, "learning_rate": 9.484318602762077e-05, "loss": 2.0324, "step": 1404 }, { "epoch": 0.14594370001038745, "grad_norm": 0.36144283413887024, "learning_rate": 9.483596670657228e-05, "loss": 1.5949, "step": 1405 }, { "epoch": 0.1460475745299678, "grad_norm": 0.33135420083999634, "learning_rate": 9.482874261084307e-05, "loss": 1.6486, "step": 1406 }, { "epoch": 0.14615144904954813, "grad_norm": 0.35105010867118835, "learning_rate": 9.482151374120244e-05, "loss": 1.7674, "step": 1407 }, { "epoch": 0.1462553235691285, "grad_norm": 0.3350829780101776, "learning_rate": 9.481428009842024e-05, "loss": 1.6668, "step": 1408 }, { "epoch": 0.14635919808870884, "grad_norm": 0.3917206823825836, "learning_rate": 9.480704168326676e-05, "loss": 1.7319, "step": 1409 }, { "epoch": 0.1464630726082892, "grad_norm": 0.35552677512168884, "learning_rate": 9.479979849651286e-05, "loss": 1.7501, "step": 1410 }, { "epoch": 0.14656694712786952, "grad_norm": 0.3432137072086334, "learning_rate": 9.479255053892986e-05, "loss": 1.7739, "step": 1411 }, { "epoch": 0.14667082164744988, "grad_norm": 0.37724825739860535, "learning_rate": 9.478529781128962e-05, "loss": 1.8712, "step": 1412 }, { "epoch": 0.14677469616703023, "grad_norm": 0.37483900785446167, "learning_rate": 9.477804031436451e-05, "loss": 1.7262, "step": 1413 }, { "epoch": 0.1468785706866106, "grad_norm": 0.35302111506462097, "learning_rate": 9.47707780489274e-05, "loss": 1.68, "step": 1414 }, { "epoch": 0.14698244520619091, "grad_norm": 0.3883351683616638, "learning_rate": 9.476351101575163e-05, "loss": 1.7192, "step": 1415 }, { "epoch": 0.14708631972577127, "grad_norm": 0.3823285400867462, "learning_rate": 9.475623921561112e-05, "loss": 1.8355, "step": 1416 }, { "epoch": 0.14719019424535162, "grad_norm": 0.3662102520465851, "learning_rate": 9.474896264928024e-05, "loss": 1.5081, "step": 1417 }, { "epoch": 0.14729406876493195, "grad_norm": 0.35046282410621643, "learning_rate": 9.47416813175339e-05, "loss": 1.7609, "step": 1418 }, { "epoch": 0.1473979432845123, "grad_norm": 0.37491992115974426, "learning_rate": 9.47343952211475e-05, "loss": 1.9254, "step": 1419 }, { "epoch": 0.14750181780409266, "grad_norm": 0.38457345962524414, "learning_rate": 9.472710436089693e-05, "loss": 1.8976, "step": 1420 }, { "epoch": 0.14760569232367302, "grad_norm": 0.39856263995170593, "learning_rate": 9.471980873755865e-05, "loss": 1.989, "step": 1421 }, { "epoch": 0.14770956684325334, "grad_norm": 0.3745686113834381, "learning_rate": 9.471250835190957e-05, "loss": 1.7134, "step": 1422 }, { "epoch": 0.1478134413628337, "grad_norm": 0.360574871301651, "learning_rate": 9.470520320472712e-05, "loss": 1.722, "step": 1423 }, { "epoch": 0.14791731588241405, "grad_norm": 0.35429859161376953, "learning_rate": 9.469789329678922e-05, "loss": 1.5907, "step": 1424 }, { "epoch": 0.14802119040199438, "grad_norm": 0.3642517030239105, "learning_rate": 9.469057862887436e-05, "loss": 1.7298, "step": 1425 }, { "epoch": 0.14812506492157473, "grad_norm": 0.4171449840068817, "learning_rate": 9.468325920176148e-05, "loss": 1.9391, "step": 1426 }, { "epoch": 0.1482289394411551, "grad_norm": 0.3378536105155945, "learning_rate": 9.467593501623003e-05, "loss": 1.7072, "step": 1427 }, { "epoch": 0.14833281396073544, "grad_norm": 0.37104731798171997, "learning_rate": 9.466860607305998e-05, "loss": 1.7929, "step": 1428 }, { "epoch": 0.14843668848031577, "grad_norm": 0.38163742423057556, "learning_rate": 9.466127237303182e-05, "loss": 1.8288, "step": 1429 }, { "epoch": 0.14854056299989613, "grad_norm": 0.3787825107574463, "learning_rate": 9.465393391692653e-05, "loss": 1.7313, "step": 1430 }, { "epoch": 0.14864443751947648, "grad_norm": 0.3504464626312256, "learning_rate": 9.464659070552558e-05, "loss": 1.7905, "step": 1431 }, { "epoch": 0.1487483120390568, "grad_norm": 0.3874281346797943, "learning_rate": 9.463924273961099e-05, "loss": 1.8856, "step": 1432 }, { "epoch": 0.14885218655863716, "grad_norm": 0.35603591799736023, "learning_rate": 9.463189001996525e-05, "loss": 1.7671, "step": 1433 }, { "epoch": 0.14895606107821752, "grad_norm": 0.37857985496520996, "learning_rate": 9.462453254737133e-05, "loss": 1.7791, "step": 1434 }, { "epoch": 0.14905993559779787, "grad_norm": 0.3580288290977478, "learning_rate": 9.461717032261282e-05, "loss": 1.6404, "step": 1435 }, { "epoch": 0.1491638101173782, "grad_norm": 0.38064590096473694, "learning_rate": 9.460980334647367e-05, "loss": 1.8078, "step": 1436 }, { "epoch": 0.14926768463695855, "grad_norm": 0.3878273069858551, "learning_rate": 9.460243161973846e-05, "loss": 1.6382, "step": 1437 }, { "epoch": 0.1493715591565389, "grad_norm": 0.3887486457824707, "learning_rate": 9.459505514319216e-05, "loss": 1.8884, "step": 1438 }, { "epoch": 0.14947543367611923, "grad_norm": 0.42063435912132263, "learning_rate": 9.458767391762038e-05, "loss": 1.9866, "step": 1439 }, { "epoch": 0.1495793081956996, "grad_norm": 0.39019012451171875, "learning_rate": 9.45802879438091e-05, "loss": 1.8562, "step": 1440 }, { "epoch": 0.14968318271527994, "grad_norm": 0.36793825030326843, "learning_rate": 9.457289722254489e-05, "loss": 1.8974, "step": 1441 }, { "epoch": 0.1497870572348603, "grad_norm": 0.37522581219673157, "learning_rate": 9.456550175461483e-05, "loss": 1.878, "step": 1442 }, { "epoch": 0.14989093175444063, "grad_norm": 0.3458879590034485, "learning_rate": 9.455810154080644e-05, "loss": 1.7597, "step": 1443 }, { "epoch": 0.14999480627402098, "grad_norm": 0.3603193759918213, "learning_rate": 9.45506965819078e-05, "loss": 1.7119, "step": 1444 }, { "epoch": 0.15009868079360134, "grad_norm": 0.35782983899116516, "learning_rate": 9.45432868787075e-05, "loss": 1.7925, "step": 1445 }, { "epoch": 0.1502025553131817, "grad_norm": 0.3702535331249237, "learning_rate": 9.453587243199458e-05, "loss": 1.8274, "step": 1446 }, { "epoch": 0.15030642983276202, "grad_norm": 0.37376201152801514, "learning_rate": 9.452845324255865e-05, "loss": 1.6782, "step": 1447 }, { "epoch": 0.15041030435234237, "grad_norm": 0.3699168264865875, "learning_rate": 9.452102931118979e-05, "loss": 1.8381, "step": 1448 }, { "epoch": 0.15051417887192273, "grad_norm": 0.36657705903053284, "learning_rate": 9.451360063867857e-05, "loss": 1.8949, "step": 1449 }, { "epoch": 0.15061805339150305, "grad_norm": 0.35056763887405396, "learning_rate": 9.450616722581611e-05, "loss": 1.7087, "step": 1450 }, { "epoch": 0.1507219279110834, "grad_norm": 0.33786895871162415, "learning_rate": 9.449872907339401e-05, "loss": 1.7222, "step": 1451 }, { "epoch": 0.15082580243066376, "grad_norm": 0.3312149941921234, "learning_rate": 9.449128618220436e-05, "loss": 1.7871, "step": 1452 }, { "epoch": 0.15092967695024412, "grad_norm": 0.3575022518634796, "learning_rate": 9.448383855303977e-05, "loss": 1.7017, "step": 1453 }, { "epoch": 0.15103355146982445, "grad_norm": 0.35011738538742065, "learning_rate": 9.447638618669336e-05, "loss": 1.9229, "step": 1454 }, { "epoch": 0.1511374259894048, "grad_norm": 0.37051519751548767, "learning_rate": 9.446892908395877e-05, "loss": 1.8912, "step": 1455 }, { "epoch": 0.15124130050898515, "grad_norm": 0.3656577169895172, "learning_rate": 9.446146724563009e-05, "loss": 1.8665, "step": 1456 }, { "epoch": 0.15134517502856548, "grad_norm": 0.34047558903694153, "learning_rate": 9.445400067250196e-05, "loss": 1.6188, "step": 1457 }, { "epoch": 0.15144904954814584, "grad_norm": 0.3526557385921478, "learning_rate": 9.44465293653695e-05, "loss": 1.7484, "step": 1458 }, { "epoch": 0.1515529240677262, "grad_norm": 0.3618306815624237, "learning_rate": 9.443905332502837e-05, "loss": 1.6957, "step": 1459 }, { "epoch": 0.15165679858730655, "grad_norm": 0.39453551173210144, "learning_rate": 9.443157255227472e-05, "loss": 1.866, "step": 1460 }, { "epoch": 0.15176067310688687, "grad_norm": 0.3841235637664795, "learning_rate": 9.442408704790516e-05, "loss": 1.8885, "step": 1461 }, { "epoch": 0.15186454762646723, "grad_norm": 0.3559393286705017, "learning_rate": 9.441659681271684e-05, "loss": 1.5994, "step": 1462 }, { "epoch": 0.15196842214604758, "grad_norm": 0.3648277521133423, "learning_rate": 9.440910184750743e-05, "loss": 1.741, "step": 1463 }, { "epoch": 0.1520722966656279, "grad_norm": 0.35485556721687317, "learning_rate": 9.440160215307508e-05, "loss": 1.7756, "step": 1464 }, { "epoch": 0.15217617118520826, "grad_norm": 0.35386818647384644, "learning_rate": 9.439409773021844e-05, "loss": 1.877, "step": 1465 }, { "epoch": 0.15228004570478862, "grad_norm": 0.3678518235683441, "learning_rate": 9.438658857973668e-05, "loss": 1.7585, "step": 1466 }, { "epoch": 0.15238392022436897, "grad_norm": 0.36395224928855896, "learning_rate": 9.437907470242946e-05, "loss": 1.7805, "step": 1467 }, { "epoch": 0.1524877947439493, "grad_norm": 0.3579014539718628, "learning_rate": 9.437155609909696e-05, "loss": 1.7271, "step": 1468 }, { "epoch": 0.15259166926352966, "grad_norm": 0.3654552102088928, "learning_rate": 9.436403277053984e-05, "loss": 1.858, "step": 1469 }, { "epoch": 0.15269554378311, "grad_norm": 0.3869788646697998, "learning_rate": 9.435650471755931e-05, "loss": 1.8507, "step": 1470 }, { "epoch": 0.15279941830269034, "grad_norm": 0.3339064121246338, "learning_rate": 9.4348971940957e-05, "loss": 1.7086, "step": 1471 }, { "epoch": 0.1529032928222707, "grad_norm": 0.3778363764286041, "learning_rate": 9.434143444153511e-05, "loss": 1.936, "step": 1472 }, { "epoch": 0.15300716734185105, "grad_norm": 0.4051001965999603, "learning_rate": 9.433389222009633e-05, "loss": 1.7736, "step": 1473 }, { "epoch": 0.1531110418614314, "grad_norm": 0.35207393765449524, "learning_rate": 9.432634527744386e-05, "loss": 1.7176, "step": 1474 }, { "epoch": 0.15321491638101173, "grad_norm": 0.3851469159126282, "learning_rate": 9.431879361438137e-05, "loss": 1.9448, "step": 1475 }, { "epoch": 0.15331879090059208, "grad_norm": 0.35818904638290405, "learning_rate": 9.431123723171305e-05, "loss": 1.7773, "step": 1476 }, { "epoch": 0.15342266542017244, "grad_norm": 0.37601613998413086, "learning_rate": 9.430367613024361e-05, "loss": 1.9316, "step": 1477 }, { "epoch": 0.15352653993975277, "grad_norm": 0.3351070284843445, "learning_rate": 9.429611031077825e-05, "loss": 1.5443, "step": 1478 }, { "epoch": 0.15363041445933312, "grad_norm": 0.4057861566543579, "learning_rate": 9.428853977412266e-05, "loss": 1.8619, "step": 1479 }, { "epoch": 0.15373428897891347, "grad_norm": 0.3662700057029724, "learning_rate": 9.428096452108305e-05, "loss": 1.7752, "step": 1480 }, { "epoch": 0.15383816349849383, "grad_norm": 0.38205060362815857, "learning_rate": 9.427338455246612e-05, "loss": 1.7397, "step": 1481 }, { "epoch": 0.15394203801807416, "grad_norm": 0.3742862641811371, "learning_rate": 9.426579986907909e-05, "loss": 1.7385, "step": 1482 }, { "epoch": 0.1540459125376545, "grad_norm": 0.37825533747673035, "learning_rate": 9.425821047172965e-05, "loss": 1.8288, "step": 1483 }, { "epoch": 0.15414978705723487, "grad_norm": 0.42222508788108826, "learning_rate": 9.425061636122603e-05, "loss": 1.9567, "step": 1484 }, { "epoch": 0.15425366157681522, "grad_norm": 0.3459799885749817, "learning_rate": 9.424301753837692e-05, "loss": 1.7383, "step": 1485 }, { "epoch": 0.15435753609639555, "grad_norm": 0.38764312863349915, "learning_rate": 9.423541400399157e-05, "loss": 1.8508, "step": 1486 }, { "epoch": 0.1544614106159759, "grad_norm": 0.3998440206050873, "learning_rate": 9.422780575887967e-05, "loss": 1.9329, "step": 1487 }, { "epoch": 0.15456528513555626, "grad_norm": 0.365713894367218, "learning_rate": 9.422019280385145e-05, "loss": 1.9418, "step": 1488 }, { "epoch": 0.15466915965513658, "grad_norm": 0.3910145163536072, "learning_rate": 9.421257513971762e-05, "loss": 1.8998, "step": 1489 }, { "epoch": 0.15477303417471694, "grad_norm": 0.3563111424446106, "learning_rate": 9.420495276728943e-05, "loss": 1.8042, "step": 1490 }, { "epoch": 0.1548769086942973, "grad_norm": 0.3810329735279083, "learning_rate": 9.419732568737856e-05, "loss": 1.9044, "step": 1491 }, { "epoch": 0.15498078321387765, "grad_norm": 0.4031761884689331, "learning_rate": 9.418969390079726e-05, "loss": 2.098, "step": 1492 }, { "epoch": 0.15508465773345798, "grad_norm": 0.3679083287715912, "learning_rate": 9.418205740835825e-05, "loss": 1.7396, "step": 1493 }, { "epoch": 0.15518853225303833, "grad_norm": 0.35544419288635254, "learning_rate": 9.417441621087476e-05, "loss": 1.7114, "step": 1494 }, { "epoch": 0.15529240677261869, "grad_norm": 0.36036983132362366, "learning_rate": 9.416677030916053e-05, "loss": 1.828, "step": 1495 }, { "epoch": 0.155396281292199, "grad_norm": 0.34864529967308044, "learning_rate": 9.415911970402976e-05, "loss": 1.7821, "step": 1496 }, { "epoch": 0.15550015581177937, "grad_norm": 0.4014929533004761, "learning_rate": 9.41514643962972e-05, "loss": 1.8746, "step": 1497 }, { "epoch": 0.15560403033135972, "grad_norm": 0.39249807596206665, "learning_rate": 9.414380438677808e-05, "loss": 1.9818, "step": 1498 }, { "epoch": 0.15570790485094008, "grad_norm": 0.33457159996032715, "learning_rate": 9.413613967628814e-05, "loss": 1.7178, "step": 1499 }, { "epoch": 0.1558117793705204, "grad_norm": 0.326725572347641, "learning_rate": 9.412847026564357e-05, "loss": 1.5971, "step": 1500 }, { "epoch": 0.15591565389010076, "grad_norm": 0.36268436908721924, "learning_rate": 9.412079615566116e-05, "loss": 1.8022, "step": 1501 }, { "epoch": 0.1560195284096811, "grad_norm": 0.3470126986503601, "learning_rate": 9.411311734715811e-05, "loss": 1.8473, "step": 1502 }, { "epoch": 0.15612340292926144, "grad_norm": 0.3726639747619629, "learning_rate": 9.410543384095214e-05, "loss": 1.7718, "step": 1503 }, { "epoch": 0.1562272774488418, "grad_norm": 0.37733760476112366, "learning_rate": 9.409774563786152e-05, "loss": 1.8614, "step": 1504 }, { "epoch": 0.15633115196842215, "grad_norm": 0.4124131202697754, "learning_rate": 9.409005273870495e-05, "loss": 2.1187, "step": 1505 }, { "epoch": 0.1564350264880025, "grad_norm": 0.3965660631656647, "learning_rate": 9.40823551443017e-05, "loss": 1.7244, "step": 1506 }, { "epoch": 0.15653890100758283, "grad_norm": 0.390770822763443, "learning_rate": 9.407465285547147e-05, "loss": 1.8046, "step": 1507 }, { "epoch": 0.15664277552716319, "grad_norm": 0.369103342294693, "learning_rate": 9.406694587303451e-05, "loss": 1.7574, "step": 1508 }, { "epoch": 0.15674665004674354, "grad_norm": 0.38057541847229004, "learning_rate": 9.405923419781154e-05, "loss": 1.8895, "step": 1509 }, { "epoch": 0.15685052456632387, "grad_norm": 0.3767298460006714, "learning_rate": 9.405151783062381e-05, "loss": 1.6415, "step": 1510 }, { "epoch": 0.15695439908590422, "grad_norm": 0.35092034935951233, "learning_rate": 9.404379677229305e-05, "loss": 1.7707, "step": 1511 }, { "epoch": 0.15705827360548458, "grad_norm": 0.3567756712436676, "learning_rate": 9.403607102364148e-05, "loss": 1.7814, "step": 1512 }, { "epoch": 0.15716214812506493, "grad_norm": 0.38232964277267456, "learning_rate": 9.402834058549184e-05, "loss": 2.0017, "step": 1513 }, { "epoch": 0.15726602264464526, "grad_norm": 0.35852310061454773, "learning_rate": 9.402060545866737e-05, "loss": 1.8552, "step": 1514 }, { "epoch": 0.15736989716422561, "grad_norm": 0.3555811047554016, "learning_rate": 9.401286564399178e-05, "loss": 1.8447, "step": 1515 }, { "epoch": 0.15747377168380597, "grad_norm": 0.33671462535858154, "learning_rate": 9.400512114228935e-05, "loss": 1.6783, "step": 1516 }, { "epoch": 0.1575776462033863, "grad_norm": 0.3423287570476532, "learning_rate": 9.399737195438473e-05, "loss": 1.7901, "step": 1517 }, { "epoch": 0.15768152072296665, "grad_norm": 0.3938925564289093, "learning_rate": 9.398961808110321e-05, "loss": 1.6647, "step": 1518 }, { "epoch": 0.157785395242547, "grad_norm": 0.366240918636322, "learning_rate": 9.398185952327048e-05, "loss": 1.7797, "step": 1519 }, { "epoch": 0.15788926976212736, "grad_norm": 0.3558516204357147, "learning_rate": 9.397409628171279e-05, "loss": 1.6302, "step": 1520 }, { "epoch": 0.1579931442817077, "grad_norm": 0.3891289234161377, "learning_rate": 9.396632835725687e-05, "loss": 1.9296, "step": 1521 }, { "epoch": 0.15809701880128804, "grad_norm": 0.4402526319026947, "learning_rate": 9.395855575072991e-05, "loss": 1.9748, "step": 1522 }, { "epoch": 0.1582008933208684, "grad_norm": 0.3928702473640442, "learning_rate": 9.395077846295967e-05, "loss": 1.9662, "step": 1523 }, { "epoch": 0.15830476784044875, "grad_norm": 0.36685439944267273, "learning_rate": 9.394299649477434e-05, "loss": 1.7051, "step": 1524 }, { "epoch": 0.15840864236002908, "grad_norm": 0.36427903175354004, "learning_rate": 9.393520984700266e-05, "loss": 1.8365, "step": 1525 }, { "epoch": 0.15851251687960943, "grad_norm": 0.3728634715080261, "learning_rate": 9.392741852047385e-05, "loss": 1.8812, "step": 1526 }, { "epoch": 0.1586163913991898, "grad_norm": 0.3916126489639282, "learning_rate": 9.391962251601758e-05, "loss": 1.8, "step": 1527 }, { "epoch": 0.15872026591877011, "grad_norm": 0.3563532531261444, "learning_rate": 9.391182183446413e-05, "loss": 1.5527, "step": 1528 }, { "epoch": 0.15882414043835047, "grad_norm": 0.3756893277168274, "learning_rate": 9.390401647664417e-05, "loss": 1.9147, "step": 1529 }, { "epoch": 0.15892801495793082, "grad_norm": 0.35163572430610657, "learning_rate": 9.389620644338892e-05, "loss": 1.7478, "step": 1530 }, { "epoch": 0.15903188947751118, "grad_norm": 0.3400569260120392, "learning_rate": 9.388839173553007e-05, "loss": 1.5696, "step": 1531 }, { "epoch": 0.1591357639970915, "grad_norm": 0.3381504416465759, "learning_rate": 9.388057235389986e-05, "loss": 1.6998, "step": 1532 }, { "epoch": 0.15923963851667186, "grad_norm": 0.35516080260276794, "learning_rate": 9.3872748299331e-05, "loss": 1.8103, "step": 1533 }, { "epoch": 0.15934351303625222, "grad_norm": 0.3808714747428894, "learning_rate": 9.386491957265662e-05, "loss": 1.7707, "step": 1534 }, { "epoch": 0.15944738755583254, "grad_norm": 0.3629775941371918, "learning_rate": 9.38570861747105e-05, "loss": 1.8373, "step": 1535 }, { "epoch": 0.1595512620754129, "grad_norm": 0.340930700302124, "learning_rate": 9.384924810632679e-05, "loss": 1.7374, "step": 1536 }, { "epoch": 0.15965513659499325, "grad_norm": 0.40548157691955566, "learning_rate": 9.384140536834018e-05, "loss": 2.0285, "step": 1537 }, { "epoch": 0.1597590111145736, "grad_norm": 0.39659062027931213, "learning_rate": 9.383355796158588e-05, "loss": 1.8455, "step": 1538 }, { "epoch": 0.15986288563415393, "grad_norm": 0.35021933913230896, "learning_rate": 9.382570588689958e-05, "loss": 1.8248, "step": 1539 }, { "epoch": 0.1599667601537343, "grad_norm": 0.39401671290397644, "learning_rate": 9.381784914511746e-05, "loss": 2.1518, "step": 1540 }, { "epoch": 0.16007063467331464, "grad_norm": 0.3451925814151764, "learning_rate": 9.38099877370762e-05, "loss": 1.7234, "step": 1541 }, { "epoch": 0.16017450919289497, "grad_norm": 0.3629453778266907, "learning_rate": 9.380212166361298e-05, "loss": 1.7744, "step": 1542 }, { "epoch": 0.16027838371247533, "grad_norm": 0.4119042456150055, "learning_rate": 9.379425092556545e-05, "loss": 2.0752, "step": 1543 }, { "epoch": 0.16038225823205568, "grad_norm": 0.347231388092041, "learning_rate": 9.378637552377181e-05, "loss": 1.7362, "step": 1544 }, { "epoch": 0.16048613275163603, "grad_norm": 0.4104901850223541, "learning_rate": 9.377849545907074e-05, "loss": 1.9631, "step": 1545 }, { "epoch": 0.16059000727121636, "grad_norm": 0.396040141582489, "learning_rate": 9.377061073230138e-05, "loss": 1.955, "step": 1546 }, { "epoch": 0.16069388179079672, "grad_norm": 0.3335644602775574, "learning_rate": 9.37627213443034e-05, "loss": 1.5817, "step": 1547 }, { "epoch": 0.16079775631037707, "grad_norm": 0.3619517385959625, "learning_rate": 9.375482729591696e-05, "loss": 1.7563, "step": 1548 }, { "epoch": 0.1609016308299574, "grad_norm": 0.35229092836380005, "learning_rate": 9.37469285879827e-05, "loss": 1.7638, "step": 1549 }, { "epoch": 0.16100550534953775, "grad_norm": 0.36668309569358826, "learning_rate": 9.373902522134181e-05, "loss": 1.6821, "step": 1550 }, { "epoch": 0.1611093798691181, "grad_norm": 0.38943883776664734, "learning_rate": 9.37311171968359e-05, "loss": 1.6423, "step": 1551 }, { "epoch": 0.16121325438869846, "grad_norm": 0.35452452301979065, "learning_rate": 9.372320451530711e-05, "loss": 1.7089, "step": 1552 }, { "epoch": 0.1613171289082788, "grad_norm": 0.39926689863204956, "learning_rate": 9.371528717759811e-05, "loss": 1.801, "step": 1553 }, { "epoch": 0.16142100342785914, "grad_norm": 0.37590524554252625, "learning_rate": 9.370736518455203e-05, "loss": 1.7258, "step": 1554 }, { "epoch": 0.1615248779474395, "grad_norm": 0.35222113132476807, "learning_rate": 9.369943853701246e-05, "loss": 1.8522, "step": 1555 }, { "epoch": 0.16162875246701985, "grad_norm": 0.35563012957572937, "learning_rate": 9.369150723582358e-05, "loss": 1.6075, "step": 1556 }, { "epoch": 0.16173262698660018, "grad_norm": 0.39180493354797363, "learning_rate": 9.368357128182998e-05, "loss": 1.7797, "step": 1557 }, { "epoch": 0.16183650150618054, "grad_norm": 0.3635503053665161, "learning_rate": 9.367563067587677e-05, "loss": 1.8579, "step": 1558 }, { "epoch": 0.1619403760257609, "grad_norm": 0.3636340796947479, "learning_rate": 9.366768541880958e-05, "loss": 1.708, "step": 1559 }, { "epoch": 0.16204425054534122, "grad_norm": 0.39017453789711, "learning_rate": 9.365973551147453e-05, "loss": 1.8155, "step": 1560 }, { "epoch": 0.16214812506492157, "grad_norm": 0.3406469225883484, "learning_rate": 9.365178095471819e-05, "loss": 1.7573, "step": 1561 }, { "epoch": 0.16225199958450193, "grad_norm": 0.36579304933547974, "learning_rate": 9.364382174938768e-05, "loss": 1.898, "step": 1562 }, { "epoch": 0.16235587410408228, "grad_norm": 0.3369811773300171, "learning_rate": 9.363585789633057e-05, "loss": 1.6292, "step": 1563 }, { "epoch": 0.1624597486236626, "grad_norm": 0.3355672061443329, "learning_rate": 9.3627889396395e-05, "loss": 1.6484, "step": 1564 }, { "epoch": 0.16256362314324296, "grad_norm": 0.37393924593925476, "learning_rate": 9.36199162504295e-05, "loss": 1.7915, "step": 1565 }, { "epoch": 0.16266749766282332, "grad_norm": 0.37588125467300415, "learning_rate": 9.361193845928318e-05, "loss": 1.6923, "step": 1566 }, { "epoch": 0.16277137218240365, "grad_norm": 0.4559624493122101, "learning_rate": 9.360395602380559e-05, "loss": 1.8842, "step": 1567 }, { "epoch": 0.162875246701984, "grad_norm": 0.39234891533851624, "learning_rate": 9.359596894484682e-05, "loss": 1.9783, "step": 1568 }, { "epoch": 0.16297912122156435, "grad_norm": 0.36912286281585693, "learning_rate": 9.35879772232574e-05, "loss": 1.8153, "step": 1569 }, { "epoch": 0.1630829957411447, "grad_norm": 0.3693285584449768, "learning_rate": 9.357998085988842e-05, "loss": 1.5263, "step": 1570 }, { "epoch": 0.16318687026072504, "grad_norm": 0.38860028982162476, "learning_rate": 9.35719798555914e-05, "loss": 1.6806, "step": 1571 }, { "epoch": 0.1632907447803054, "grad_norm": 0.401967853307724, "learning_rate": 9.356397421121842e-05, "loss": 1.855, "step": 1572 }, { "epoch": 0.16339461929988575, "grad_norm": 0.35193613171577454, "learning_rate": 9.355596392762197e-05, "loss": 1.8909, "step": 1573 }, { "epoch": 0.16349849381946607, "grad_norm": 0.38218873739242554, "learning_rate": 9.354794900565515e-05, "loss": 1.874, "step": 1574 }, { "epoch": 0.16360236833904643, "grad_norm": 0.3593958020210266, "learning_rate": 9.353992944617142e-05, "loss": 1.6768, "step": 1575 }, { "epoch": 0.16370624285862678, "grad_norm": 0.3737293481826782, "learning_rate": 9.353190525002484e-05, "loss": 1.6279, "step": 1576 }, { "epoch": 0.16381011737820714, "grad_norm": 0.38832589983940125, "learning_rate": 9.35238764180699e-05, "loss": 1.8541, "step": 1577 }, { "epoch": 0.16391399189778746, "grad_norm": 0.37959024310112, "learning_rate": 9.351584295116165e-05, "loss": 1.5282, "step": 1578 }, { "epoch": 0.16401786641736782, "grad_norm": 0.37193652987480164, "learning_rate": 9.350780485015553e-05, "loss": 1.6896, "step": 1579 }, { "epoch": 0.16412174093694817, "grad_norm": 0.35193783044815063, "learning_rate": 9.349976211590758e-05, "loss": 1.57, "step": 1580 }, { "epoch": 0.1642256154565285, "grad_norm": 0.3922156095504761, "learning_rate": 9.349171474927427e-05, "loss": 1.5819, "step": 1581 }, { "epoch": 0.16432948997610886, "grad_norm": 0.36705270409584045, "learning_rate": 9.34836627511126e-05, "loss": 1.6357, "step": 1582 }, { "epoch": 0.1644333644956892, "grad_norm": 0.36877331137657166, "learning_rate": 9.347560612228003e-05, "loss": 1.7312, "step": 1583 }, { "epoch": 0.16453723901526957, "grad_norm": 0.3872382342815399, "learning_rate": 9.346754486363452e-05, "loss": 1.8537, "step": 1584 }, { "epoch": 0.1646411135348499, "grad_norm": 0.3447991609573364, "learning_rate": 9.345947897603455e-05, "loss": 1.6428, "step": 1585 }, { "epoch": 0.16474498805443025, "grad_norm": 0.3650977909564972, "learning_rate": 9.345140846033906e-05, "loss": 1.8029, "step": 1586 }, { "epoch": 0.1648488625740106, "grad_norm": 0.35949772596359253, "learning_rate": 9.344333331740751e-05, "loss": 1.6589, "step": 1587 }, { "epoch": 0.16495273709359093, "grad_norm": 0.38417744636535645, "learning_rate": 9.343525354809984e-05, "loss": 1.7891, "step": 1588 }, { "epoch": 0.16505661161317128, "grad_norm": 0.36684268712997437, "learning_rate": 9.342716915327647e-05, "loss": 1.7708, "step": 1589 }, { "epoch": 0.16516048613275164, "grad_norm": 0.3678865134716034, "learning_rate": 9.341908013379831e-05, "loss": 1.7155, "step": 1590 }, { "epoch": 0.165264360652332, "grad_norm": 0.38696667551994324, "learning_rate": 9.341098649052681e-05, "loss": 1.8784, "step": 1591 }, { "epoch": 0.16536823517191232, "grad_norm": 0.3689199388027191, "learning_rate": 9.340288822432388e-05, "loss": 1.874, "step": 1592 }, { "epoch": 0.16547210969149267, "grad_norm": 0.34134095907211304, "learning_rate": 9.339478533605191e-05, "loss": 1.5365, "step": 1593 }, { "epoch": 0.16557598421107303, "grad_norm": 0.3707195818424225, "learning_rate": 9.338667782657378e-05, "loss": 1.7984, "step": 1594 }, { "epoch": 0.16567985873065338, "grad_norm": 0.3389596939086914, "learning_rate": 9.337856569675288e-05, "loss": 1.6244, "step": 1595 }, { "epoch": 0.1657837332502337, "grad_norm": 0.3761754035949707, "learning_rate": 9.337044894745313e-05, "loss": 1.8774, "step": 1596 }, { "epoch": 0.16588760776981407, "grad_norm": 0.37581124901771545, "learning_rate": 9.336232757953884e-05, "loss": 1.7503, "step": 1597 }, { "epoch": 0.16599148228939442, "grad_norm": 0.37551531195640564, "learning_rate": 9.335420159387491e-05, "loss": 1.8433, "step": 1598 }, { "epoch": 0.16609535680897475, "grad_norm": 0.3547391891479492, "learning_rate": 9.33460709913267e-05, "loss": 1.8019, "step": 1599 }, { "epoch": 0.1661992313285551, "grad_norm": 0.40177521109580994, "learning_rate": 9.333793577276004e-05, "loss": 1.9248, "step": 1600 }, { "epoch": 0.16630310584813546, "grad_norm": 0.345114141702652, "learning_rate": 9.332979593904125e-05, "loss": 1.674, "step": 1601 }, { "epoch": 0.1664069803677158, "grad_norm": 0.36117351055145264, "learning_rate": 9.33216514910372e-05, "loss": 1.9506, "step": 1602 }, { "epoch": 0.16651085488729614, "grad_norm": 0.3475273847579956, "learning_rate": 9.331350242961518e-05, "loss": 1.716, "step": 1603 }, { "epoch": 0.1666147294068765, "grad_norm": 0.3611377477645874, "learning_rate": 9.3305348755643e-05, "loss": 1.7274, "step": 1604 }, { "epoch": 0.16671860392645685, "grad_norm": 0.40187951922416687, "learning_rate": 9.329719046998898e-05, "loss": 1.7231, "step": 1605 }, { "epoch": 0.16682247844603718, "grad_norm": 0.3646019399166107, "learning_rate": 9.328902757352191e-05, "loss": 1.6772, "step": 1606 }, { "epoch": 0.16692635296561753, "grad_norm": 0.34303680062294006, "learning_rate": 9.328086006711108e-05, "loss": 1.6853, "step": 1607 }, { "epoch": 0.16703022748519789, "grad_norm": 0.36761873960494995, "learning_rate": 9.327268795162622e-05, "loss": 1.7584, "step": 1608 }, { "epoch": 0.16713410200477824, "grad_norm": 0.36471322178840637, "learning_rate": 9.326451122793766e-05, "loss": 1.7157, "step": 1609 }, { "epoch": 0.16723797652435857, "grad_norm": 0.385139524936676, "learning_rate": 9.325632989691613e-05, "loss": 1.7911, "step": 1610 }, { "epoch": 0.16734185104393892, "grad_norm": 0.3613028824329376, "learning_rate": 9.324814395943289e-05, "loss": 1.7849, "step": 1611 }, { "epoch": 0.16744572556351928, "grad_norm": 0.3828355669975281, "learning_rate": 9.323995341635963e-05, "loss": 1.9662, "step": 1612 }, { "epoch": 0.1675496000830996, "grad_norm": 0.3685465157032013, "learning_rate": 9.323175826856865e-05, "loss": 1.8091, "step": 1613 }, { "epoch": 0.16765347460267996, "grad_norm": 0.36837613582611084, "learning_rate": 9.322355851693263e-05, "loss": 1.716, "step": 1614 }, { "epoch": 0.1677573491222603, "grad_norm": 0.4158804416656494, "learning_rate": 9.321535416232476e-05, "loss": 1.8553, "step": 1615 }, { "epoch": 0.16786122364184067, "grad_norm": 0.35498541593551636, "learning_rate": 9.32071452056188e-05, "loss": 1.6873, "step": 1616 }, { "epoch": 0.167965098161421, "grad_norm": 0.37819525599479675, "learning_rate": 9.319893164768888e-05, "loss": 1.7064, "step": 1617 }, { "epoch": 0.16806897268100135, "grad_norm": 0.3379732668399811, "learning_rate": 9.31907134894097e-05, "loss": 1.6289, "step": 1618 }, { "epoch": 0.1681728472005817, "grad_norm": 0.38348469138145447, "learning_rate": 9.318249073165646e-05, "loss": 1.7028, "step": 1619 }, { "epoch": 0.16827672172016203, "grad_norm": 0.3996196985244751, "learning_rate": 9.317426337530477e-05, "loss": 1.7427, "step": 1620 }, { "epoch": 0.16838059623974239, "grad_norm": 0.3799591064453125, "learning_rate": 9.316603142123082e-05, "loss": 1.6961, "step": 1621 }, { "epoch": 0.16848447075932274, "grad_norm": 0.36466121673583984, "learning_rate": 9.315779487031122e-05, "loss": 1.8285, "step": 1622 }, { "epoch": 0.1685883452789031, "grad_norm": 0.35838162899017334, "learning_rate": 9.314955372342311e-05, "loss": 1.643, "step": 1623 }, { "epoch": 0.16869221979848342, "grad_norm": 0.37461066246032715, "learning_rate": 9.314130798144412e-05, "loss": 1.8822, "step": 1624 }, { "epoch": 0.16879609431806378, "grad_norm": 0.4094388484954834, "learning_rate": 9.313305764525232e-05, "loss": 1.9001, "step": 1625 }, { "epoch": 0.16889996883764413, "grad_norm": 0.382744163274765, "learning_rate": 9.312480271572634e-05, "loss": 1.8858, "step": 1626 }, { "epoch": 0.16900384335722446, "grad_norm": 0.3661576807498932, "learning_rate": 9.311654319374528e-05, "loss": 1.8909, "step": 1627 }, { "epoch": 0.16910771787680481, "grad_norm": 0.38933804631233215, "learning_rate": 9.310827908018866e-05, "loss": 1.8039, "step": 1628 }, { "epoch": 0.16921159239638517, "grad_norm": 0.3421494662761688, "learning_rate": 9.31000103759366e-05, "loss": 1.6478, "step": 1629 }, { "epoch": 0.16931546691596552, "grad_norm": 0.3714037239551544, "learning_rate": 9.309173708186963e-05, "loss": 1.7259, "step": 1630 }, { "epoch": 0.16941934143554585, "grad_norm": 0.4035167694091797, "learning_rate": 9.308345919886877e-05, "loss": 1.8386, "step": 1631 }, { "epoch": 0.1695232159551262, "grad_norm": 0.37035608291625977, "learning_rate": 9.307517672781557e-05, "loss": 1.8391, "step": 1632 }, { "epoch": 0.16962709047470656, "grad_norm": 0.37822678685188293, "learning_rate": 9.306688966959205e-05, "loss": 1.8277, "step": 1633 }, { "epoch": 0.16973096499428691, "grad_norm": 0.3702380955219269, "learning_rate": 9.305859802508073e-05, "loss": 1.7996, "step": 1634 }, { "epoch": 0.16983483951386724, "grad_norm": 0.37005430459976196, "learning_rate": 9.305030179516458e-05, "loss": 1.7441, "step": 1635 }, { "epoch": 0.1699387140334476, "grad_norm": 0.38137298822402954, "learning_rate": 9.304200098072709e-05, "loss": 1.9145, "step": 1636 }, { "epoch": 0.17004258855302795, "grad_norm": 0.39384132623672485, "learning_rate": 9.303369558265224e-05, "loss": 1.7381, "step": 1637 }, { "epoch": 0.17014646307260828, "grad_norm": 0.34569051861763, "learning_rate": 9.30253856018245e-05, "loss": 1.6391, "step": 1638 }, { "epoch": 0.17025033759218863, "grad_norm": 0.38778260350227356, "learning_rate": 9.301707103912878e-05, "loss": 1.8337, "step": 1639 }, { "epoch": 0.170354212111769, "grad_norm": 0.390543133020401, "learning_rate": 9.300875189545055e-05, "loss": 1.9798, "step": 1640 }, { "epoch": 0.17045808663134934, "grad_norm": 0.3621176779270172, "learning_rate": 9.300042817167574e-05, "loss": 1.6744, "step": 1641 }, { "epoch": 0.17056196115092967, "grad_norm": 0.43153461813926697, "learning_rate": 9.299209986869073e-05, "loss": 2.0351, "step": 1642 }, { "epoch": 0.17066583567051002, "grad_norm": 0.39063888788223267, "learning_rate": 9.298376698738242e-05, "loss": 1.8298, "step": 1643 }, { "epoch": 0.17076971019009038, "grad_norm": 0.36155179142951965, "learning_rate": 9.297542952863824e-05, "loss": 1.7124, "step": 1644 }, { "epoch": 0.1708735847096707, "grad_norm": 0.4032329022884369, "learning_rate": 9.296708749334603e-05, "loss": 2.0271, "step": 1645 }, { "epoch": 0.17097745922925106, "grad_norm": 0.39317673444747925, "learning_rate": 9.295874088239416e-05, "loss": 1.8808, "step": 1646 }, { "epoch": 0.17108133374883142, "grad_norm": 0.36430609226226807, "learning_rate": 9.295038969667145e-05, "loss": 1.8879, "step": 1647 }, { "epoch": 0.17118520826841177, "grad_norm": 0.3385809361934662, "learning_rate": 9.29420339370673e-05, "loss": 1.644, "step": 1648 }, { "epoch": 0.1712890827879921, "grad_norm": 0.3685392737388611, "learning_rate": 9.293367360447148e-05, "loss": 1.907, "step": 1649 }, { "epoch": 0.17139295730757245, "grad_norm": 0.36233484745025635, "learning_rate": 9.292530869977432e-05, "loss": 1.6838, "step": 1650 }, { "epoch": 0.1714968318271528, "grad_norm": 0.4106137752532959, "learning_rate": 9.29169392238666e-05, "loss": 1.8516, "step": 1651 }, { "epoch": 0.17160070634673313, "grad_norm": 0.4084801971912384, "learning_rate": 9.290856517763965e-05, "loss": 1.8696, "step": 1652 }, { "epoch": 0.1717045808663135, "grad_norm": 0.3670850694179535, "learning_rate": 9.290018656198517e-05, "loss": 1.7186, "step": 1653 }, { "epoch": 0.17180845538589384, "grad_norm": 0.362164705991745, "learning_rate": 9.289180337779547e-05, "loss": 1.6732, "step": 1654 }, { "epoch": 0.1719123299054742, "grad_norm": 0.3619086742401123, "learning_rate": 9.288341562596328e-05, "loss": 1.6082, "step": 1655 }, { "epoch": 0.17201620442505453, "grad_norm": 0.3776550889015198, "learning_rate": 9.287502330738183e-05, "loss": 1.7092, "step": 1656 }, { "epoch": 0.17212007894463488, "grad_norm": 0.3793870210647583, "learning_rate": 9.286662642294484e-05, "loss": 1.6828, "step": 1657 }, { "epoch": 0.17222395346421523, "grad_norm": 0.3709464967250824, "learning_rate": 9.28582249735465e-05, "loss": 1.7037, "step": 1658 }, { "epoch": 0.17232782798379556, "grad_norm": 0.35695111751556396, "learning_rate": 9.284981896008152e-05, "loss": 1.7627, "step": 1659 }, { "epoch": 0.17243170250337592, "grad_norm": 0.3538592457771301, "learning_rate": 9.284140838344505e-05, "loss": 1.8424, "step": 1660 }, { "epoch": 0.17253557702295627, "grad_norm": 0.3740057647228241, "learning_rate": 9.283299324453276e-05, "loss": 1.9412, "step": 1661 }, { "epoch": 0.17263945154253663, "grad_norm": 0.3617721199989319, "learning_rate": 9.28245735442408e-05, "loss": 1.6443, "step": 1662 }, { "epoch": 0.17274332606211695, "grad_norm": 0.40431293845176697, "learning_rate": 9.281614928346582e-05, "loss": 1.9303, "step": 1663 }, { "epoch": 0.1728472005816973, "grad_norm": 0.3580422103404999, "learning_rate": 9.280772046310491e-05, "loss": 1.5823, "step": 1664 }, { "epoch": 0.17295107510127766, "grad_norm": 0.38240423798561096, "learning_rate": 9.279928708405568e-05, "loss": 1.6381, "step": 1665 }, { "epoch": 0.17305494962085802, "grad_norm": 0.3335290253162384, "learning_rate": 9.279084914721623e-05, "loss": 1.686, "step": 1666 }, { "epoch": 0.17315882414043834, "grad_norm": 0.3627927899360657, "learning_rate": 9.278240665348512e-05, "loss": 1.8426, "step": 1667 }, { "epoch": 0.1732626986600187, "grad_norm": 0.4823165833950043, "learning_rate": 9.277395960376143e-05, "loss": 2.0619, "step": 1668 }, { "epoch": 0.17336657317959905, "grad_norm": 0.36185434460639954, "learning_rate": 9.276550799894468e-05, "loss": 1.7277, "step": 1669 }, { "epoch": 0.17347044769917938, "grad_norm": 0.36980822682380676, "learning_rate": 9.275705183993491e-05, "loss": 1.7649, "step": 1670 }, { "epoch": 0.17357432221875974, "grad_norm": 0.3434619605541229, "learning_rate": 9.274859112763264e-05, "loss": 1.702, "step": 1671 }, { "epoch": 0.1736781967383401, "grad_norm": 0.3595278859138489, "learning_rate": 9.274012586293887e-05, "loss": 1.7485, "step": 1672 }, { "epoch": 0.17378207125792045, "grad_norm": 0.4083694815635681, "learning_rate": 9.273165604675509e-05, "loss": 1.7587, "step": 1673 }, { "epoch": 0.17388594577750077, "grad_norm": 0.3653002679347992, "learning_rate": 9.272318167998325e-05, "loss": 1.7234, "step": 1674 }, { "epoch": 0.17398982029708113, "grad_norm": 0.40430814027786255, "learning_rate": 9.27147027635258e-05, "loss": 1.8682, "step": 1675 }, { "epoch": 0.17409369481666148, "grad_norm": 0.4132486879825592, "learning_rate": 9.270621929828571e-05, "loss": 1.9371, "step": 1676 }, { "epoch": 0.1741975693362418, "grad_norm": 0.3685455322265625, "learning_rate": 9.269773128516638e-05, "loss": 1.8035, "step": 1677 }, { "epoch": 0.17430144385582216, "grad_norm": 0.35275524854660034, "learning_rate": 9.268923872507174e-05, "loss": 1.7536, "step": 1678 }, { "epoch": 0.17440531837540252, "grad_norm": 0.364773154258728, "learning_rate": 9.268074161890614e-05, "loss": 1.8075, "step": 1679 }, { "epoch": 0.17450919289498287, "grad_norm": 0.40323731303215027, "learning_rate": 9.267223996757451e-05, "loss": 1.8028, "step": 1680 }, { "epoch": 0.1746130674145632, "grad_norm": 0.34777867794036865, "learning_rate": 9.266373377198216e-05, "loss": 1.7143, "step": 1681 }, { "epoch": 0.17471694193414355, "grad_norm": 0.39449429512023926, "learning_rate": 9.265522303303494e-05, "loss": 1.8556, "step": 1682 }, { "epoch": 0.1748208164537239, "grad_norm": 0.3480819761753082, "learning_rate": 9.264670775163922e-05, "loss": 1.6931, "step": 1683 }, { "epoch": 0.17492469097330424, "grad_norm": 0.3808116316795349, "learning_rate": 9.263818792870176e-05, "loss": 1.6403, "step": 1684 }, { "epoch": 0.1750285654928846, "grad_norm": 0.3749628961086273, "learning_rate": 9.262966356512987e-05, "loss": 1.6206, "step": 1685 }, { "epoch": 0.17513244001246495, "grad_norm": 0.3475877642631531, "learning_rate": 9.262113466183136e-05, "loss": 1.6314, "step": 1686 }, { "epoch": 0.1752363145320453, "grad_norm": 0.3954698443412781, "learning_rate": 9.261260121971445e-05, "loss": 1.7696, "step": 1687 }, { "epoch": 0.17534018905162563, "grad_norm": 0.36054256558418274, "learning_rate": 9.260406323968791e-05, "loss": 1.734, "step": 1688 }, { "epoch": 0.17544406357120598, "grad_norm": 0.3659268617630005, "learning_rate": 9.259552072266096e-05, "loss": 1.6301, "step": 1689 }, { "epoch": 0.17554793809078634, "grad_norm": 0.3359396457672119, "learning_rate": 9.258697366954329e-05, "loss": 1.6708, "step": 1690 }, { "epoch": 0.17565181261036666, "grad_norm": 0.3496209979057312, "learning_rate": 9.257842208124514e-05, "loss": 1.7327, "step": 1691 }, { "epoch": 0.17575568712994702, "grad_norm": 0.4164491295814514, "learning_rate": 9.256986595867715e-05, "loss": 1.7489, "step": 1692 }, { "epoch": 0.17585956164952737, "grad_norm": 0.4587213695049286, "learning_rate": 9.256130530275048e-05, "loss": 2.1299, "step": 1693 }, { "epoch": 0.17596343616910773, "grad_norm": 0.40228530764579773, "learning_rate": 9.255274011437683e-05, "loss": 1.8252, "step": 1694 }, { "epoch": 0.17606731068868806, "grad_norm": 0.3676128685474396, "learning_rate": 9.254417039446824e-05, "loss": 1.6666, "step": 1695 }, { "epoch": 0.1761711852082684, "grad_norm": 0.3423883318901062, "learning_rate": 9.253559614393739e-05, "loss": 1.7213, "step": 1696 }, { "epoch": 0.17627505972784877, "grad_norm": 0.3809538781642914, "learning_rate": 9.252701736369732e-05, "loss": 1.8218, "step": 1697 }, { "epoch": 0.1763789342474291, "grad_norm": 0.351162314414978, "learning_rate": 9.251843405466162e-05, "loss": 1.7478, "step": 1698 }, { "epoch": 0.17648280876700945, "grad_norm": 0.3520200550556183, "learning_rate": 9.250984621774437e-05, "loss": 1.7541, "step": 1699 }, { "epoch": 0.1765866832865898, "grad_norm": 0.3424931764602661, "learning_rate": 9.250125385386007e-05, "loss": 1.6592, "step": 1700 }, { "epoch": 0.17669055780617016, "grad_norm": 0.3925075829029083, "learning_rate": 9.249265696392377e-05, "loss": 1.784, "step": 1701 }, { "epoch": 0.17679443232575048, "grad_norm": 0.3799852132797241, "learning_rate": 9.248405554885095e-05, "loss": 1.7904, "step": 1702 }, { "epoch": 0.17689830684533084, "grad_norm": 0.37215399742126465, "learning_rate": 9.24754496095576e-05, "loss": 1.8846, "step": 1703 }, { "epoch": 0.1770021813649112, "grad_norm": 0.37840306758880615, "learning_rate": 9.246683914696019e-05, "loss": 1.7425, "step": 1704 }, { "epoch": 0.17710605588449155, "grad_norm": 0.362456351518631, "learning_rate": 9.245822416197567e-05, "loss": 1.8047, "step": 1705 }, { "epoch": 0.17720993040407187, "grad_norm": 0.3728417456150055, "learning_rate": 9.244960465552148e-05, "loss": 1.8033, "step": 1706 }, { "epoch": 0.17731380492365223, "grad_norm": 0.3797183334827423, "learning_rate": 9.244098062851549e-05, "loss": 1.9075, "step": 1707 }, { "epoch": 0.17741767944323258, "grad_norm": 0.38207125663757324, "learning_rate": 9.243235208187612e-05, "loss": 1.8054, "step": 1708 }, { "epoch": 0.1775215539628129, "grad_norm": 0.3418576419353485, "learning_rate": 9.242371901652223e-05, "loss": 1.8205, "step": 1709 }, { "epoch": 0.17762542848239327, "grad_norm": 0.4341420829296112, "learning_rate": 9.241508143337319e-05, "loss": 2.1321, "step": 1710 }, { "epoch": 0.17772930300197362, "grad_norm": 0.38019636273384094, "learning_rate": 9.240643933334884e-05, "loss": 1.6551, "step": 1711 }, { "epoch": 0.17783317752155398, "grad_norm": 0.3897992968559265, "learning_rate": 9.239779271736947e-05, "loss": 1.8644, "step": 1712 }, { "epoch": 0.1779370520411343, "grad_norm": 0.3626013398170471, "learning_rate": 9.238914158635589e-05, "loss": 1.6417, "step": 1713 }, { "epoch": 0.17804092656071466, "grad_norm": 0.38448286056518555, "learning_rate": 9.238048594122939e-05, "loss": 1.7762, "step": 1714 }, { "epoch": 0.178144801080295, "grad_norm": 0.3795235753059387, "learning_rate": 9.237182578291171e-05, "loss": 1.7897, "step": 1715 }, { "epoch": 0.17824867559987534, "grad_norm": 0.3505716323852539, "learning_rate": 9.23631611123251e-05, "loss": 1.8132, "step": 1716 }, { "epoch": 0.1783525501194557, "grad_norm": 0.36175671219825745, "learning_rate": 9.235449193039227e-05, "loss": 1.6795, "step": 1717 }, { "epoch": 0.17845642463903605, "grad_norm": 0.3952975571155548, "learning_rate": 9.234581823803645e-05, "loss": 1.7109, "step": 1718 }, { "epoch": 0.1785602991586164, "grad_norm": 0.3580237925052643, "learning_rate": 9.233714003618127e-05, "loss": 1.8293, "step": 1719 }, { "epoch": 0.17866417367819673, "grad_norm": 0.33214688301086426, "learning_rate": 9.232845732575093e-05, "loss": 1.6718, "step": 1720 }, { "epoch": 0.17876804819777709, "grad_norm": 0.359581857919693, "learning_rate": 9.231977010767005e-05, "loss": 1.8014, "step": 1721 }, { "epoch": 0.17887192271735744, "grad_norm": 0.3323659300804138, "learning_rate": 9.231107838286378e-05, "loss": 1.6053, "step": 1722 }, { "epoch": 0.17897579723693777, "grad_norm": 0.3609966039657593, "learning_rate": 9.230238215225769e-05, "loss": 1.9612, "step": 1723 }, { "epoch": 0.17907967175651812, "grad_norm": 0.33431074023246765, "learning_rate": 9.229368141677788e-05, "loss": 1.6002, "step": 1724 }, { "epoch": 0.17918354627609848, "grad_norm": 0.37472623586654663, "learning_rate": 9.228497617735088e-05, "loss": 1.7805, "step": 1725 }, { "epoch": 0.17928742079567883, "grad_norm": 0.3726039528846741, "learning_rate": 9.227626643490377e-05, "loss": 1.8624, "step": 1726 }, { "epoch": 0.17939129531525916, "grad_norm": 0.4065108895301819, "learning_rate": 9.226755219036405e-05, "loss": 1.7586, "step": 1727 }, { "epoch": 0.1794951698348395, "grad_norm": 0.35552752017974854, "learning_rate": 9.225883344465972e-05, "loss": 1.6, "step": 1728 }, { "epoch": 0.17959904435441987, "grad_norm": 0.3508833944797516, "learning_rate": 9.225011019871927e-05, "loss": 1.7917, "step": 1729 }, { "epoch": 0.1797029188740002, "grad_norm": 0.374521404504776, "learning_rate": 9.224138245347163e-05, "loss": 1.7524, "step": 1730 }, { "epoch": 0.17980679339358055, "grad_norm": 0.36706939339637756, "learning_rate": 9.223265020984626e-05, "loss": 1.7125, "step": 1731 }, { "epoch": 0.1799106679131609, "grad_norm": 0.3556000590324402, "learning_rate": 9.222391346877308e-05, "loss": 1.6935, "step": 1732 }, { "epoch": 0.18001454243274126, "grad_norm": 0.3797661066055298, "learning_rate": 9.221517223118245e-05, "loss": 1.7725, "step": 1733 }, { "epoch": 0.18011841695232159, "grad_norm": 0.3610544502735138, "learning_rate": 9.22064264980053e-05, "loss": 1.8192, "step": 1734 }, { "epoch": 0.18022229147190194, "grad_norm": 0.46052882075309753, "learning_rate": 9.219767627017293e-05, "loss": 2.0761, "step": 1735 }, { "epoch": 0.1803261659914823, "grad_norm": 0.4026206135749817, "learning_rate": 9.218892154861721e-05, "loss": 1.978, "step": 1736 }, { "epoch": 0.18043004051106262, "grad_norm": 0.37845221161842346, "learning_rate": 9.218016233427042e-05, "loss": 1.7811, "step": 1737 }, { "epoch": 0.18053391503064298, "grad_norm": 0.4019536077976227, "learning_rate": 9.217139862806534e-05, "loss": 1.9864, "step": 1738 }, { "epoch": 0.18063778955022333, "grad_norm": 0.36031562089920044, "learning_rate": 9.21626304309353e-05, "loss": 1.6712, "step": 1739 }, { "epoch": 0.1807416640698037, "grad_norm": 0.37847834825515747, "learning_rate": 9.215385774381395e-05, "loss": 1.7967, "step": 1740 }, { "epoch": 0.18084553858938401, "grad_norm": 0.38624006509780884, "learning_rate": 9.21450805676356e-05, "loss": 1.8833, "step": 1741 }, { "epoch": 0.18094941310896437, "grad_norm": 0.34793728590011597, "learning_rate": 9.213629890333489e-05, "loss": 1.8027, "step": 1742 }, { "epoch": 0.18105328762854472, "grad_norm": 0.3407808542251587, "learning_rate": 9.212751275184704e-05, "loss": 1.658, "step": 1743 }, { "epoch": 0.18115716214812508, "grad_norm": 0.3690899908542633, "learning_rate": 9.211872211410769e-05, "loss": 1.6251, "step": 1744 }, { "epoch": 0.1812610366677054, "grad_norm": 0.3415517210960388, "learning_rate": 9.210992699105296e-05, "loss": 1.5998, "step": 1745 }, { "epoch": 0.18136491118728576, "grad_norm": 0.36001309752464294, "learning_rate": 9.210112738361949e-05, "loss": 1.8308, "step": 1746 }, { "epoch": 0.18146878570686611, "grad_norm": 0.3720461130142212, "learning_rate": 9.209232329274433e-05, "loss": 1.7902, "step": 1747 }, { "epoch": 0.18157266022644644, "grad_norm": 0.42275193333625793, "learning_rate": 9.208351471936509e-05, "loss": 1.9225, "step": 1748 }, { "epoch": 0.1816765347460268, "grad_norm": 0.36535343527793884, "learning_rate": 9.20747016644198e-05, "loss": 1.7431, "step": 1749 }, { "epoch": 0.18178040926560715, "grad_norm": 0.3501549959182739, "learning_rate": 9.206588412884698e-05, "loss": 1.6417, "step": 1750 }, { "epoch": 0.1818842837851875, "grad_norm": 0.34967172145843506, "learning_rate": 9.205706211358563e-05, "loss": 1.7612, "step": 1751 }, { "epoch": 0.18198815830476783, "grad_norm": 0.3451713025569916, "learning_rate": 9.204823561957521e-05, "loss": 1.6711, "step": 1752 }, { "epoch": 0.1820920328243482, "grad_norm": 0.37724578380584717, "learning_rate": 9.203940464775568e-05, "loss": 1.9513, "step": 1753 }, { "epoch": 0.18219590734392854, "grad_norm": 0.34096843004226685, "learning_rate": 9.203056919906748e-05, "loss": 1.6691, "step": 1754 }, { "epoch": 0.18229978186350887, "grad_norm": 0.3424280881881714, "learning_rate": 9.202172927445152e-05, "loss": 1.6556, "step": 1755 }, { "epoch": 0.18240365638308922, "grad_norm": 0.3700230121612549, "learning_rate": 9.201288487484916e-05, "loss": 1.7761, "step": 1756 }, { "epoch": 0.18250753090266958, "grad_norm": 0.4259064197540283, "learning_rate": 9.200403600120229e-05, "loss": 2.1197, "step": 1757 }, { "epoch": 0.18261140542224993, "grad_norm": 0.40020573139190674, "learning_rate": 9.199518265445321e-05, "loss": 1.9191, "step": 1758 }, { "epoch": 0.18271527994183026, "grad_norm": 0.36124834418296814, "learning_rate": 9.198632483554478e-05, "loss": 1.8001, "step": 1759 }, { "epoch": 0.18281915446141062, "grad_norm": 0.4086042642593384, "learning_rate": 9.197746254542025e-05, "loss": 1.8518, "step": 1760 }, { "epoch": 0.18292302898099097, "grad_norm": 0.37972450256347656, "learning_rate": 9.196859578502338e-05, "loss": 1.952, "step": 1761 }, { "epoch": 0.1830269035005713, "grad_norm": 0.4148435890674591, "learning_rate": 9.195972455529844e-05, "loss": 1.8547, "step": 1762 }, { "epoch": 0.18313077802015165, "grad_norm": 0.3922263979911804, "learning_rate": 9.195084885719014e-05, "loss": 1.9248, "step": 1763 }, { "epoch": 0.183234652539732, "grad_norm": 0.3774312138557434, "learning_rate": 9.194196869164366e-05, "loss": 1.8411, "step": 1764 }, { "epoch": 0.18333852705931236, "grad_norm": 0.35800519585609436, "learning_rate": 9.193308405960466e-05, "loss": 1.6903, "step": 1765 }, { "epoch": 0.1834424015788927, "grad_norm": 0.40143853425979614, "learning_rate": 9.192419496201932e-05, "loss": 1.7171, "step": 1766 }, { "epoch": 0.18354627609847304, "grad_norm": 0.3324778974056244, "learning_rate": 9.191530139983423e-05, "loss": 1.6295, "step": 1767 }, { "epoch": 0.1836501506180534, "grad_norm": 0.3377951979637146, "learning_rate": 9.190640337399647e-05, "loss": 1.5843, "step": 1768 }, { "epoch": 0.18375402513763373, "grad_norm": 0.39436647295951843, "learning_rate": 9.189750088545367e-05, "loss": 1.957, "step": 1769 }, { "epoch": 0.18385789965721408, "grad_norm": 0.42431554198265076, "learning_rate": 9.188859393515382e-05, "loss": 1.9529, "step": 1770 }, { "epoch": 0.18396177417679443, "grad_norm": 0.3626527786254883, "learning_rate": 9.187968252404547e-05, "loss": 1.7235, "step": 1771 }, { "epoch": 0.1840656486963748, "grad_norm": 0.42632752656936646, "learning_rate": 9.187076665307758e-05, "loss": 2.1363, "step": 1772 }, { "epoch": 0.18416952321595512, "grad_norm": 0.41993650794029236, "learning_rate": 9.186184632319968e-05, "loss": 1.7975, "step": 1773 }, { "epoch": 0.18427339773553547, "grad_norm": 0.37287190556526184, "learning_rate": 9.185292153536165e-05, "loss": 1.8601, "step": 1774 }, { "epoch": 0.18437727225511583, "grad_norm": 0.38827162981033325, "learning_rate": 9.184399229051397e-05, "loss": 1.8401, "step": 1775 }, { "epoch": 0.18448114677469618, "grad_norm": 0.3717474937438965, "learning_rate": 9.183505858960748e-05, "loss": 1.7523, "step": 1776 }, { "epoch": 0.1845850212942765, "grad_norm": 0.3732149600982666, "learning_rate": 9.18261204335936e-05, "loss": 1.7327, "step": 1777 }, { "epoch": 0.18468889581385686, "grad_norm": 0.3960385024547577, "learning_rate": 9.181717782342414e-05, "loss": 1.9453, "step": 1778 }, { "epoch": 0.18479277033343722, "grad_norm": 0.38847383856773376, "learning_rate": 9.180823076005143e-05, "loss": 1.5841, "step": 1779 }, { "epoch": 0.18489664485301754, "grad_norm": 0.37122729420661926, "learning_rate": 9.179927924442826e-05, "loss": 1.8535, "step": 1780 }, { "epoch": 0.1850005193725979, "grad_norm": 0.4589596688747406, "learning_rate": 9.17903232775079e-05, "loss": 2.1207, "step": 1781 }, { "epoch": 0.18510439389217825, "grad_norm": 0.4033050537109375, "learning_rate": 9.178136286024408e-05, "loss": 1.9405, "step": 1782 }, { "epoch": 0.1852082684117586, "grad_norm": 0.37945422530174255, "learning_rate": 9.177239799359102e-05, "loss": 1.8607, "step": 1783 }, { "epoch": 0.18531214293133894, "grad_norm": 0.44369664788246155, "learning_rate": 9.176342867850343e-05, "loss": 1.9337, "step": 1784 }, { "epoch": 0.1854160174509193, "grad_norm": 0.3960365951061249, "learning_rate": 9.175445491593644e-05, "loss": 1.8079, "step": 1785 }, { "epoch": 0.18551989197049965, "grad_norm": 0.34727954864501953, "learning_rate": 9.174547670684569e-05, "loss": 1.5856, "step": 1786 }, { "epoch": 0.18562376649007997, "grad_norm": 0.3571012616157532, "learning_rate": 9.173649405218732e-05, "loss": 1.6828, "step": 1787 }, { "epoch": 0.18572764100966033, "grad_norm": 0.35931140184402466, "learning_rate": 9.172750695291787e-05, "loss": 1.5845, "step": 1788 }, { "epoch": 0.18583151552924068, "grad_norm": 0.36305472254753113, "learning_rate": 9.171851540999442e-05, "loss": 1.7407, "step": 1789 }, { "epoch": 0.18593539004882104, "grad_norm": 0.377492219209671, "learning_rate": 9.17095194243745e-05, "loss": 1.8655, "step": 1790 }, { "epoch": 0.18603926456840136, "grad_norm": 0.3644843101501465, "learning_rate": 9.17005189970161e-05, "loss": 1.7179, "step": 1791 }, { "epoch": 0.18614313908798172, "grad_norm": 0.3360661268234253, "learning_rate": 9.169151412887774e-05, "loss": 1.6426, "step": 1792 }, { "epoch": 0.18624701360756207, "grad_norm": 0.38643765449523926, "learning_rate": 9.16825048209183e-05, "loss": 1.7468, "step": 1793 }, { "epoch": 0.1863508881271424, "grad_norm": 0.34640881419181824, "learning_rate": 9.167349107409721e-05, "loss": 1.5946, "step": 1794 }, { "epoch": 0.18645476264672275, "grad_norm": 0.33807694911956787, "learning_rate": 9.166447288937444e-05, "loss": 1.6169, "step": 1795 }, { "epoch": 0.1865586371663031, "grad_norm": 0.4004231095314026, "learning_rate": 9.165545026771026e-05, "loss": 1.7109, "step": 1796 }, { "epoch": 0.18666251168588346, "grad_norm": 0.3662926256656647, "learning_rate": 9.164642321006555e-05, "loss": 1.7274, "step": 1797 }, { "epoch": 0.1867663862054638, "grad_norm": 0.37025517225265503, "learning_rate": 9.163739171740167e-05, "loss": 1.8982, "step": 1798 }, { "epoch": 0.18687026072504415, "grad_norm": 0.4032931327819824, "learning_rate": 9.162835579068032e-05, "loss": 1.8586, "step": 1799 }, { "epoch": 0.1869741352446245, "grad_norm": 0.36516958475112915, "learning_rate": 9.161931543086379e-05, "loss": 1.8012, "step": 1800 }, { "epoch": 0.18707800976420483, "grad_norm": 0.35469850897789, "learning_rate": 9.161027063891481e-05, "loss": 1.6804, "step": 1801 }, { "epoch": 0.18718188428378518, "grad_norm": 0.4029258191585541, "learning_rate": 9.160122141579659e-05, "loss": 2.0161, "step": 1802 }, { "epoch": 0.18728575880336554, "grad_norm": 0.3713259696960449, "learning_rate": 9.159216776247279e-05, "loss": 1.7743, "step": 1803 }, { "epoch": 0.1873896333229459, "grad_norm": 0.3715076744556427, "learning_rate": 9.158310967990755e-05, "loss": 1.8117, "step": 1804 }, { "epoch": 0.18749350784252622, "grad_norm": 0.43088141083717346, "learning_rate": 9.15740471690655e-05, "loss": 1.8559, "step": 1805 }, { "epoch": 0.18759738236210657, "grad_norm": 0.38683950901031494, "learning_rate": 9.156498023091172e-05, "loss": 1.8888, "step": 1806 }, { "epoch": 0.18770125688168693, "grad_norm": 0.3979988694190979, "learning_rate": 9.155590886641174e-05, "loss": 1.8934, "step": 1807 }, { "epoch": 0.18780513140126726, "grad_norm": 0.39728519320487976, "learning_rate": 9.154683307653162e-05, "loss": 1.8035, "step": 1808 }, { "epoch": 0.1879090059208476, "grad_norm": 0.42086923122406006, "learning_rate": 9.153775286223787e-05, "loss": 1.9612, "step": 1809 }, { "epoch": 0.18801288044042797, "grad_norm": 0.40852025151252747, "learning_rate": 9.152866822449743e-05, "loss": 1.6764, "step": 1810 }, { "epoch": 0.18811675496000832, "grad_norm": 0.3808266222476959, "learning_rate": 9.151957916427778e-05, "loss": 1.7292, "step": 1811 }, { "epoch": 0.18822062947958865, "grad_norm": 0.4031354784965515, "learning_rate": 9.151048568254682e-05, "loss": 1.9939, "step": 1812 }, { "epoch": 0.188324503999169, "grad_norm": 0.3865586817264557, "learning_rate": 9.15013877802729e-05, "loss": 1.7508, "step": 1813 }, { "epoch": 0.18842837851874936, "grad_norm": 0.35779932141304016, "learning_rate": 9.149228545842492e-05, "loss": 1.766, "step": 1814 }, { "epoch": 0.1885322530383297, "grad_norm": 0.35968711972236633, "learning_rate": 9.148317871797219e-05, "loss": 1.8, "step": 1815 }, { "epoch": 0.18863612755791004, "grad_norm": 0.44821977615356445, "learning_rate": 9.14740675598845e-05, "loss": 1.8142, "step": 1816 }, { "epoch": 0.1887400020774904, "grad_norm": 0.433288037776947, "learning_rate": 9.146495198513214e-05, "loss": 1.9191, "step": 1817 }, { "epoch": 0.18884387659707075, "grad_norm": 0.39990267157554626, "learning_rate": 9.145583199468582e-05, "loss": 1.8575, "step": 1818 }, { "epoch": 0.18894775111665107, "grad_norm": 0.37523946166038513, "learning_rate": 9.144670758951677e-05, "loss": 1.799, "step": 1819 }, { "epoch": 0.18905162563623143, "grad_norm": 0.3855798542499542, "learning_rate": 9.143757877059665e-05, "loss": 1.8317, "step": 1820 }, { "epoch": 0.18915550015581178, "grad_norm": 0.35304591059684753, "learning_rate": 9.142844553889762e-05, "loss": 1.5719, "step": 1821 }, { "epoch": 0.18925937467539214, "grad_norm": 0.3850051760673523, "learning_rate": 9.141930789539229e-05, "loss": 1.7996, "step": 1822 }, { "epoch": 0.18936324919497247, "grad_norm": 0.3510708212852478, "learning_rate": 9.141016584105376e-05, "loss": 1.5699, "step": 1823 }, { "epoch": 0.18946712371455282, "grad_norm": 0.3610669672489166, "learning_rate": 9.140101937685558e-05, "loss": 1.7303, "step": 1824 }, { "epoch": 0.18957099823413318, "grad_norm": 0.34818708896636963, "learning_rate": 9.139186850377177e-05, "loss": 1.7005, "step": 1825 }, { "epoch": 0.1896748727537135, "grad_norm": 0.35581493377685547, "learning_rate": 9.138271322277685e-05, "loss": 1.6708, "step": 1826 }, { "epoch": 0.18977874727329386, "grad_norm": 0.35102319717407227, "learning_rate": 9.137355353484573e-05, "loss": 1.8142, "step": 1827 }, { "epoch": 0.1898826217928742, "grad_norm": 0.3373032212257385, "learning_rate": 9.136438944095392e-05, "loss": 1.741, "step": 1828 }, { "epoch": 0.18998649631245457, "grad_norm": 0.37855619192123413, "learning_rate": 9.135522094207729e-05, "loss": 1.7995, "step": 1829 }, { "epoch": 0.1900903708320349, "grad_norm": 0.35194653272628784, "learning_rate": 9.13460480391922e-05, "loss": 1.5989, "step": 1830 }, { "epoch": 0.19019424535161525, "grad_norm": 0.3600010871887207, "learning_rate": 9.13368707332755e-05, "loss": 1.8601, "step": 1831 }, { "epoch": 0.1902981198711956, "grad_norm": 0.37306833267211914, "learning_rate": 9.13276890253045e-05, "loss": 1.86, "step": 1832 }, { "epoch": 0.19040199439077593, "grad_norm": 0.355836421251297, "learning_rate": 9.131850291625702e-05, "loss": 1.6658, "step": 1833 }, { "epoch": 0.19050586891035629, "grad_norm": 0.4272918999195099, "learning_rate": 9.130931240711126e-05, "loss": 1.9726, "step": 1834 }, { "epoch": 0.19060974342993664, "grad_norm": 0.4324601888656616, "learning_rate": 9.130011749884595e-05, "loss": 1.851, "step": 1835 }, { "epoch": 0.190713617949517, "grad_norm": 0.36520737409591675, "learning_rate": 9.129091819244028e-05, "loss": 1.8169, "step": 1836 }, { "epoch": 0.19081749246909732, "grad_norm": 0.4533885419368744, "learning_rate": 9.12817144888739e-05, "loss": 2.0705, "step": 1837 }, { "epoch": 0.19092136698867768, "grad_norm": 0.3979325592517853, "learning_rate": 9.127250638912695e-05, "loss": 1.7838, "step": 1838 }, { "epoch": 0.19102524150825803, "grad_norm": 0.3549365997314453, "learning_rate": 9.126329389418e-05, "loss": 1.8239, "step": 1839 }, { "epoch": 0.19112911602783836, "grad_norm": 0.3781158924102783, "learning_rate": 9.125407700501412e-05, "loss": 1.6151, "step": 1840 }, { "epoch": 0.1912329905474187, "grad_norm": 0.388790100812912, "learning_rate": 9.124485572261082e-05, "loss": 1.9243, "step": 1841 }, { "epoch": 0.19133686506699907, "grad_norm": 0.4158736765384674, "learning_rate": 9.123563004795213e-05, "loss": 1.9735, "step": 1842 }, { "epoch": 0.19144073958657942, "grad_norm": 0.39373651146888733, "learning_rate": 9.122639998202047e-05, "loss": 1.8774, "step": 1843 }, { "epoch": 0.19154461410615975, "grad_norm": 0.3896943926811218, "learning_rate": 9.12171655257988e-05, "loss": 1.8337, "step": 1844 }, { "epoch": 0.1916484886257401, "grad_norm": 0.3980332016944885, "learning_rate": 9.120792668027049e-05, "loss": 1.8081, "step": 1845 }, { "epoch": 0.19175236314532046, "grad_norm": 0.4258505403995514, "learning_rate": 9.119868344641944e-05, "loss": 1.8096, "step": 1846 }, { "epoch": 0.19185623766490079, "grad_norm": 0.3683486580848694, "learning_rate": 9.118943582522996e-05, "loss": 1.826, "step": 1847 }, { "epoch": 0.19196011218448114, "grad_norm": 0.3608863949775696, "learning_rate": 9.118018381768687e-05, "loss": 1.6942, "step": 1848 }, { "epoch": 0.1920639867040615, "grad_norm": 0.35737764835357666, "learning_rate": 9.11709274247754e-05, "loss": 1.8741, "step": 1849 }, { "epoch": 0.19216786122364185, "grad_norm": 0.36617088317871094, "learning_rate": 9.116166664748131e-05, "loss": 1.7608, "step": 1850 }, { "epoch": 0.19227173574322218, "grad_norm": 0.40373408794403076, "learning_rate": 9.11524014867908e-05, "loss": 1.715, "step": 1851 }, { "epoch": 0.19237561026280253, "grad_norm": 0.36378052830696106, "learning_rate": 9.114313194369053e-05, "loss": 1.5889, "step": 1852 }, { "epoch": 0.1924794847823829, "grad_norm": 0.37960201501846313, "learning_rate": 9.113385801916765e-05, "loss": 1.6605, "step": 1853 }, { "epoch": 0.19258335930196324, "grad_norm": 0.3682219982147217, "learning_rate": 9.112457971420973e-05, "loss": 1.8351, "step": 1854 }, { "epoch": 0.19268723382154357, "grad_norm": 0.38188329339027405, "learning_rate": 9.111529702980486e-05, "loss": 1.8845, "step": 1855 }, { "epoch": 0.19279110834112392, "grad_norm": 0.39885640144348145, "learning_rate": 9.110600996694157e-05, "loss": 1.8629, "step": 1856 }, { "epoch": 0.19289498286070428, "grad_norm": 0.36927422881126404, "learning_rate": 9.109671852660885e-05, "loss": 1.718, "step": 1857 }, { "epoch": 0.1929988573802846, "grad_norm": 0.35329553484916687, "learning_rate": 9.108742270979618e-05, "loss": 1.6209, "step": 1858 }, { "epoch": 0.19310273189986496, "grad_norm": 0.40359756350517273, "learning_rate": 9.107812251749349e-05, "loss": 1.936, "step": 1859 }, { "epoch": 0.19320660641944531, "grad_norm": 0.3696520924568176, "learning_rate": 9.106881795069116e-05, "loss": 1.8329, "step": 1860 }, { "epoch": 0.19331048093902567, "grad_norm": 0.38084790110588074, "learning_rate": 9.105950901038008e-05, "loss": 1.8072, "step": 1861 }, { "epoch": 0.193414355458606, "grad_norm": 0.4722822606563568, "learning_rate": 9.105019569755157e-05, "loss": 1.8516, "step": 1862 }, { "epoch": 0.19351822997818635, "grad_norm": 0.3589233160018921, "learning_rate": 9.104087801319741e-05, "loss": 1.7609, "step": 1863 }, { "epoch": 0.1936221044977667, "grad_norm": 0.4372257590293884, "learning_rate": 9.103155595830988e-05, "loss": 1.9742, "step": 1864 }, { "epoch": 0.19372597901734703, "grad_norm": 0.3735795021057129, "learning_rate": 9.10222295338817e-05, "loss": 1.71, "step": 1865 }, { "epoch": 0.1938298535369274, "grad_norm": 0.35734692215919495, "learning_rate": 9.101289874090605e-05, "loss": 1.6526, "step": 1866 }, { "epoch": 0.19393372805650774, "grad_norm": 0.36955031752586365, "learning_rate": 9.10035635803766e-05, "loss": 1.787, "step": 1867 }, { "epoch": 0.1940376025760881, "grad_norm": 0.3762964904308319, "learning_rate": 9.099422405328748e-05, "loss": 1.6645, "step": 1868 }, { "epoch": 0.19414147709566842, "grad_norm": 0.40508878231048584, "learning_rate": 9.098488016063325e-05, "loss": 1.8264, "step": 1869 }, { "epoch": 0.19424535161524878, "grad_norm": 0.43582627177238464, "learning_rate": 9.097553190340899e-05, "loss": 2.0092, "step": 1870 }, { "epoch": 0.19434922613482913, "grad_norm": 0.379056453704834, "learning_rate": 9.09661792826102e-05, "loss": 1.7522, "step": 1871 }, { "epoch": 0.19445310065440946, "grad_norm": 0.35009434819221497, "learning_rate": 9.095682229923287e-05, "loss": 1.7053, "step": 1872 }, { "epoch": 0.19455697517398982, "grad_norm": 0.3844042420387268, "learning_rate": 9.094746095427342e-05, "loss": 1.7458, "step": 1873 }, { "epoch": 0.19466084969357017, "grad_norm": 0.3690871000289917, "learning_rate": 9.09380952487288e-05, "loss": 1.7563, "step": 1874 }, { "epoch": 0.19476472421315053, "grad_norm": 0.3871258497238159, "learning_rate": 9.092872518359637e-05, "loss": 1.9391, "step": 1875 }, { "epoch": 0.19486859873273085, "grad_norm": 0.3885674774646759, "learning_rate": 9.091935075987395e-05, "loss": 1.7193, "step": 1876 }, { "epoch": 0.1949724732523112, "grad_norm": 0.37015703320503235, "learning_rate": 9.090997197855986e-05, "loss": 1.7996, "step": 1877 }, { "epoch": 0.19507634777189156, "grad_norm": 0.3506444990634918, "learning_rate": 9.090058884065287e-05, "loss": 1.5394, "step": 1878 }, { "epoch": 0.1951802222914719, "grad_norm": 0.350041002035141, "learning_rate": 9.089120134715222e-05, "loss": 1.8134, "step": 1879 }, { "epoch": 0.19528409681105224, "grad_norm": 0.35820406675338745, "learning_rate": 9.088180949905756e-05, "loss": 1.7417, "step": 1880 }, { "epoch": 0.1953879713306326, "grad_norm": 0.37615320086479187, "learning_rate": 9.087241329736909e-05, "loss": 1.7162, "step": 1881 }, { "epoch": 0.19549184585021295, "grad_norm": 0.35000884532928467, "learning_rate": 9.086301274308742e-05, "loss": 1.6961, "step": 1882 }, { "epoch": 0.19559572036979328, "grad_norm": 0.3935372233390808, "learning_rate": 9.085360783721363e-05, "loss": 1.6883, "step": 1883 }, { "epoch": 0.19569959488937363, "grad_norm": 0.37884512543678284, "learning_rate": 9.084419858074928e-05, "loss": 1.756, "step": 1884 }, { "epoch": 0.195803469408954, "grad_norm": 0.33219221234321594, "learning_rate": 9.08347849746964e-05, "loss": 1.5695, "step": 1885 }, { "epoch": 0.19590734392853434, "grad_norm": 0.3523041903972626, "learning_rate": 9.082536702005741e-05, "loss": 1.7402, "step": 1886 }, { "epoch": 0.19601121844811467, "grad_norm": 0.38876470923423767, "learning_rate": 9.08159447178353e-05, "loss": 1.6901, "step": 1887 }, { "epoch": 0.19611509296769503, "grad_norm": 0.35667043924331665, "learning_rate": 9.080651806903346e-05, "loss": 1.6679, "step": 1888 }, { "epoch": 0.19621896748727538, "grad_norm": 0.3476751446723938, "learning_rate": 9.079708707465575e-05, "loss": 1.6656, "step": 1889 }, { "epoch": 0.1963228420068557, "grad_norm": 0.3749213218688965, "learning_rate": 9.078765173570648e-05, "loss": 1.6932, "step": 1890 }, { "epoch": 0.19642671652643606, "grad_norm": 0.38287267088890076, "learning_rate": 9.077821205319045e-05, "loss": 1.8927, "step": 1891 }, { "epoch": 0.19653059104601642, "grad_norm": 0.3563637137413025, "learning_rate": 9.076876802811293e-05, "loss": 1.7309, "step": 1892 }, { "epoch": 0.19663446556559677, "grad_norm": 0.4049491882324219, "learning_rate": 9.075931966147964e-05, "loss": 1.8704, "step": 1893 }, { "epoch": 0.1967383400851771, "grad_norm": 0.38649019598960876, "learning_rate": 9.074986695429673e-05, "loss": 1.7995, "step": 1894 }, { "epoch": 0.19684221460475745, "grad_norm": 0.38346853852272034, "learning_rate": 9.074040990757085e-05, "loss": 1.7893, "step": 1895 }, { "epoch": 0.1969460891243378, "grad_norm": 0.39437806606292725, "learning_rate": 9.07309485223091e-05, "loss": 1.8473, "step": 1896 }, { "epoch": 0.19704996364391814, "grad_norm": 0.3689126670360565, "learning_rate": 9.072148279951905e-05, "loss": 1.6973, "step": 1897 }, { "epoch": 0.1971538381634985, "grad_norm": 0.37634965777397156, "learning_rate": 9.07120127402087e-05, "loss": 1.5333, "step": 1898 }, { "epoch": 0.19725771268307885, "grad_norm": 0.365343302488327, "learning_rate": 9.070253834538658e-05, "loss": 1.7896, "step": 1899 }, { "epoch": 0.1973615872026592, "grad_norm": 0.3354664444923401, "learning_rate": 9.069305961606162e-05, "loss": 1.5792, "step": 1900 }, { "epoch": 0.19746546172223953, "grad_norm": 0.4631555676460266, "learning_rate": 9.068357655324322e-05, "loss": 1.9998, "step": 1901 }, { "epoch": 0.19756933624181988, "grad_norm": 0.3880319893360138, "learning_rate": 9.067408915794126e-05, "loss": 1.6883, "step": 1902 }, { "epoch": 0.19767321076140024, "grad_norm": 0.3678785562515259, "learning_rate": 9.066459743116609e-05, "loss": 1.7639, "step": 1903 }, { "epoch": 0.19777708528098056, "grad_norm": 0.39961332082748413, "learning_rate": 9.065510137392848e-05, "loss": 1.8451, "step": 1904 }, { "epoch": 0.19788095980056092, "grad_norm": 0.4052257239818573, "learning_rate": 9.06456009872397e-05, "loss": 1.7716, "step": 1905 }, { "epoch": 0.19798483432014127, "grad_norm": 0.3587489128112793, "learning_rate": 9.063609627211145e-05, "loss": 1.7665, "step": 1906 }, { "epoch": 0.19808870883972163, "grad_norm": 0.36075523495674133, "learning_rate": 9.062658722955593e-05, "loss": 1.7998, "step": 1907 }, { "epoch": 0.19819258335930195, "grad_norm": 0.3680761754512787, "learning_rate": 9.061707386058577e-05, "loss": 1.7033, "step": 1908 }, { "epoch": 0.1982964578788823, "grad_norm": 0.37647727131843567, "learning_rate": 9.060755616621406e-05, "loss": 1.8304, "step": 1909 }, { "epoch": 0.19840033239846266, "grad_norm": 0.3684910237789154, "learning_rate": 9.059803414745438e-05, "loss": 1.6814, "step": 1910 }, { "epoch": 0.198504206918043, "grad_norm": 0.3947627544403076, "learning_rate": 9.058850780532074e-05, "loss": 1.8224, "step": 1911 }, { "epoch": 0.19860808143762335, "grad_norm": 0.38467562198638916, "learning_rate": 9.057897714082761e-05, "loss": 1.6889, "step": 1912 }, { "epoch": 0.1987119559572037, "grad_norm": 0.38776230812072754, "learning_rate": 9.056944215498994e-05, "loss": 1.5652, "step": 1913 }, { "epoch": 0.19881583047678406, "grad_norm": 0.36061033606529236, "learning_rate": 9.055990284882317e-05, "loss": 1.7888, "step": 1914 }, { "epoch": 0.19891970499636438, "grad_norm": 0.37597185373306274, "learning_rate": 9.05503592233431e-05, "loss": 1.8526, "step": 1915 }, { "epoch": 0.19902357951594474, "grad_norm": 0.39023831486701965, "learning_rate": 9.054081127956607e-05, "loss": 1.7178, "step": 1916 }, { "epoch": 0.1991274540355251, "grad_norm": 0.37832608819007874, "learning_rate": 9.053125901850888e-05, "loss": 1.7645, "step": 1917 }, { "epoch": 0.19923132855510542, "grad_norm": 0.3865016996860504, "learning_rate": 9.052170244118876e-05, "loss": 1.9984, "step": 1918 }, { "epoch": 0.19933520307468577, "grad_norm": 0.42755410075187683, "learning_rate": 9.05121415486234e-05, "loss": 1.7952, "step": 1919 }, { "epoch": 0.19943907759426613, "grad_norm": 0.3337387442588806, "learning_rate": 9.050257634183099e-05, "loss": 1.5986, "step": 1920 }, { "epoch": 0.19954295211384648, "grad_norm": 0.35701167583465576, "learning_rate": 9.049300682183012e-05, "loss": 1.5109, "step": 1921 }, { "epoch": 0.1996468266334268, "grad_norm": 0.3579074442386627, "learning_rate": 9.048343298963988e-05, "loss": 1.8023, "step": 1922 }, { "epoch": 0.19975070115300717, "grad_norm": 0.3522278666496277, "learning_rate": 9.047385484627982e-05, "loss": 1.8454, "step": 1923 }, { "epoch": 0.19985457567258752, "grad_norm": 0.35523468255996704, "learning_rate": 9.046427239276991e-05, "loss": 1.7483, "step": 1924 }, { "epoch": 0.19995845019216787, "grad_norm": 0.32691970467567444, "learning_rate": 9.045468563013064e-05, "loss": 1.6541, "step": 1925 }, { "epoch": 0.2000623247117482, "grad_norm": 0.3599613606929779, "learning_rate": 9.04450945593829e-05, "loss": 1.7776, "step": 1926 }, { "epoch": 0.20016619923132856, "grad_norm": 0.3572538495063782, "learning_rate": 9.043549918154808e-05, "loss": 1.7373, "step": 1927 }, { "epoch": 0.2002700737509089, "grad_norm": 0.3507987856864929, "learning_rate": 9.0425899497648e-05, "loss": 1.7154, "step": 1928 }, { "epoch": 0.20037394827048924, "grad_norm": 0.3993472456932068, "learning_rate": 9.041629550870496e-05, "loss": 1.8059, "step": 1929 }, { "epoch": 0.2004778227900696, "grad_norm": 0.34402957558631897, "learning_rate": 9.040668721574171e-05, "loss": 1.7075, "step": 1930 }, { "epoch": 0.20058169730964995, "grad_norm": 0.3825515806674957, "learning_rate": 9.039707461978146e-05, "loss": 1.5313, "step": 1931 }, { "epoch": 0.2006855718292303, "grad_norm": 0.3753582537174225, "learning_rate": 9.038745772184786e-05, "loss": 1.792, "step": 1932 }, { "epoch": 0.20078944634881063, "grad_norm": 0.39727622270584106, "learning_rate": 9.037783652296505e-05, "loss": 1.7264, "step": 1933 }, { "epoch": 0.20089332086839098, "grad_norm": 0.3653308153152466, "learning_rate": 9.036821102415762e-05, "loss": 1.6462, "step": 1934 }, { "epoch": 0.20099719538797134, "grad_norm": 0.3842056095600128, "learning_rate": 9.03585812264506e-05, "loss": 1.8057, "step": 1935 }, { "epoch": 0.20110106990755167, "grad_norm": 0.3687531054019928, "learning_rate": 9.034894713086947e-05, "loss": 1.6832, "step": 1936 }, { "epoch": 0.20120494442713202, "grad_norm": 0.4252033829689026, "learning_rate": 9.033930873844023e-05, "loss": 1.8803, "step": 1937 }, { "epoch": 0.20130881894671238, "grad_norm": 0.3911295235157013, "learning_rate": 9.032966605018924e-05, "loss": 1.8326, "step": 1938 }, { "epoch": 0.20141269346629273, "grad_norm": 0.3815619647502899, "learning_rate": 9.032001906714343e-05, "loss": 1.8654, "step": 1939 }, { "epoch": 0.20151656798587306, "grad_norm": 0.42655375599861145, "learning_rate": 9.031036779033006e-05, "loss": 1.8325, "step": 1940 }, { "epoch": 0.2016204425054534, "grad_norm": 0.3972933292388916, "learning_rate": 9.030071222077698e-05, "loss": 1.5501, "step": 1941 }, { "epoch": 0.20172431702503377, "grad_norm": 0.3478373885154724, "learning_rate": 9.02910523595124e-05, "loss": 1.5961, "step": 1942 }, { "epoch": 0.2018281915446141, "grad_norm": 0.36317178606987, "learning_rate": 9.028138820756504e-05, "loss": 1.7352, "step": 1943 }, { "epoch": 0.20193206606419445, "grad_norm": 0.36241620779037476, "learning_rate": 9.027171976596402e-05, "loss": 1.7462, "step": 1944 }, { "epoch": 0.2020359405837748, "grad_norm": 0.3782672584056854, "learning_rate": 9.026204703573899e-05, "loss": 1.7317, "step": 1945 }, { "epoch": 0.20213981510335516, "grad_norm": 0.3928276002407074, "learning_rate": 9.025237001791999e-05, "loss": 1.7494, "step": 1946 }, { "epoch": 0.20224368962293549, "grad_norm": 0.38330700993537903, "learning_rate": 9.024268871353758e-05, "loss": 1.6797, "step": 1947 }, { "epoch": 0.20234756414251584, "grad_norm": 0.38679707050323486, "learning_rate": 9.023300312362273e-05, "loss": 1.7813, "step": 1948 }, { "epoch": 0.2024514386620962, "grad_norm": 0.41488325595855713, "learning_rate": 9.022331324920687e-05, "loss": 1.8702, "step": 1949 }, { "epoch": 0.20255531318167652, "grad_norm": 0.39658311009407043, "learning_rate": 9.02136190913219e-05, "loss": 1.7838, "step": 1950 }, { "epoch": 0.20265918770125688, "grad_norm": 0.39057308435440063, "learning_rate": 9.020392065100018e-05, "loss": 1.814, "step": 1951 }, { "epoch": 0.20276306222083723, "grad_norm": 0.374887079000473, "learning_rate": 9.019421792927452e-05, "loss": 1.7414, "step": 1952 }, { "epoch": 0.20286693674041759, "grad_norm": 0.3583182990550995, "learning_rate": 9.018451092717816e-05, "loss": 1.6376, "step": 1953 }, { "epoch": 0.2029708112599979, "grad_norm": 0.35195374488830566, "learning_rate": 9.017479964574485e-05, "loss": 1.6618, "step": 1954 }, { "epoch": 0.20307468577957827, "grad_norm": 0.3840351104736328, "learning_rate": 9.016508408600875e-05, "loss": 1.6952, "step": 1955 }, { "epoch": 0.20317856029915862, "grad_norm": 0.37217509746551514, "learning_rate": 9.015536424900449e-05, "loss": 1.6792, "step": 1956 }, { "epoch": 0.20328243481873895, "grad_norm": 0.39523565769195557, "learning_rate": 9.014564013576716e-05, "loss": 1.7578, "step": 1957 }, { "epoch": 0.2033863093383193, "grad_norm": 0.38107234239578247, "learning_rate": 9.013591174733231e-05, "loss": 1.8505, "step": 1958 }, { "epoch": 0.20349018385789966, "grad_norm": 0.3982243835926056, "learning_rate": 9.012617908473593e-05, "loss": 1.9007, "step": 1959 }, { "epoch": 0.20359405837748001, "grad_norm": 0.3710818290710449, "learning_rate": 9.011644214901447e-05, "loss": 1.7955, "step": 1960 }, { "epoch": 0.20369793289706034, "grad_norm": 0.37171775102615356, "learning_rate": 9.010670094120485e-05, "loss": 1.8637, "step": 1961 }, { "epoch": 0.2038018074166407, "grad_norm": 0.3907235562801361, "learning_rate": 9.009695546234443e-05, "loss": 1.7001, "step": 1962 }, { "epoch": 0.20390568193622105, "grad_norm": 0.34530210494995117, "learning_rate": 9.0087205713471e-05, "loss": 1.6479, "step": 1963 }, { "epoch": 0.2040095564558014, "grad_norm": 0.37163665890693665, "learning_rate": 9.007745169562285e-05, "loss": 1.7622, "step": 1964 }, { "epoch": 0.20411343097538173, "grad_norm": 0.36681413650512695, "learning_rate": 9.006769340983873e-05, "loss": 1.9061, "step": 1965 }, { "epoch": 0.2042173054949621, "grad_norm": 0.370410680770874, "learning_rate": 9.00579308571578e-05, "loss": 1.823, "step": 1966 }, { "epoch": 0.20432118001454244, "grad_norm": 0.43219515681266785, "learning_rate": 9.004816403861969e-05, "loss": 1.9272, "step": 1967 }, { "epoch": 0.20442505453412277, "grad_norm": 0.3857935667037964, "learning_rate": 9.00383929552645e-05, "loss": 1.7746, "step": 1968 }, { "epoch": 0.20452892905370312, "grad_norm": 0.37894824147224426, "learning_rate": 9.002861760813278e-05, "loss": 1.7197, "step": 1969 }, { "epoch": 0.20463280357328348, "grad_norm": 0.35982224345207214, "learning_rate": 9.001883799826551e-05, "loss": 1.7175, "step": 1970 }, { "epoch": 0.20473667809286383, "grad_norm": 0.3776380717754364, "learning_rate": 9.000905412670415e-05, "loss": 1.7624, "step": 1971 }, { "epoch": 0.20484055261244416, "grad_norm": 0.38274866342544556, "learning_rate": 8.999926599449062e-05, "loss": 1.7201, "step": 1972 }, { "epoch": 0.20494442713202451, "grad_norm": 0.3876325488090515, "learning_rate": 8.998947360266726e-05, "loss": 1.8462, "step": 1973 }, { "epoch": 0.20504830165160487, "grad_norm": 0.3528856039047241, "learning_rate": 8.99796769522769e-05, "loss": 1.7983, "step": 1974 }, { "epoch": 0.2051521761711852, "grad_norm": 0.3806709945201874, "learning_rate": 8.996987604436279e-05, "loss": 2.0234, "step": 1975 }, { "epoch": 0.20525605069076555, "grad_norm": 0.3677120506763458, "learning_rate": 8.996007087996866e-05, "loss": 1.7771, "step": 1976 }, { "epoch": 0.2053599252103459, "grad_norm": 0.3892456591129303, "learning_rate": 8.995026146013867e-05, "loss": 1.6017, "step": 1977 }, { "epoch": 0.20546379972992626, "grad_norm": 0.4247187077999115, "learning_rate": 8.994044778591749e-05, "loss": 1.7722, "step": 1978 }, { "epoch": 0.2055676742495066, "grad_norm": 0.3515567183494568, "learning_rate": 8.993062985835013e-05, "loss": 1.4704, "step": 1979 }, { "epoch": 0.20567154876908694, "grad_norm": 0.3741825520992279, "learning_rate": 8.99208076784822e-05, "loss": 1.6708, "step": 1980 }, { "epoch": 0.2057754232886673, "grad_norm": 0.37131038308143616, "learning_rate": 8.991098124735961e-05, "loss": 1.7259, "step": 1981 }, { "epoch": 0.20587929780824762, "grad_norm": 0.3691762387752533, "learning_rate": 8.990115056602885e-05, "loss": 1.8153, "step": 1982 }, { "epoch": 0.20598317232782798, "grad_norm": 0.3380625545978546, "learning_rate": 8.989131563553679e-05, "loss": 1.7063, "step": 1983 }, { "epoch": 0.20608704684740833, "grad_norm": 0.4201214909553528, "learning_rate": 8.98814764569308e-05, "loss": 1.8642, "step": 1984 }, { "epoch": 0.2061909213669887, "grad_norm": 0.3742324709892273, "learning_rate": 8.987163303125863e-05, "loss": 1.6623, "step": 1985 }, { "epoch": 0.20629479588656902, "grad_norm": 0.3564354181289673, "learning_rate": 8.986178535956856e-05, "loss": 1.7477, "step": 1986 }, { "epoch": 0.20639867040614937, "grad_norm": 0.41312122344970703, "learning_rate": 8.985193344290929e-05, "loss": 1.901, "step": 1987 }, { "epoch": 0.20650254492572973, "grad_norm": 0.4109034538269043, "learning_rate": 8.984207728232995e-05, "loss": 1.7853, "step": 1988 }, { "epoch": 0.20660641944531005, "grad_norm": 0.383986234664917, "learning_rate": 8.983221687888017e-05, "loss": 1.8616, "step": 1989 }, { "epoch": 0.2067102939648904, "grad_norm": 0.38250911235809326, "learning_rate": 8.982235223360999e-05, "loss": 1.7691, "step": 1990 }, { "epoch": 0.20681416848447076, "grad_norm": 0.35506096482276917, "learning_rate": 8.981248334756994e-05, "loss": 1.8013, "step": 1991 }, { "epoch": 0.20691804300405112, "grad_norm": 0.33986783027648926, "learning_rate": 8.980261022181095e-05, "loss": 1.624, "step": 1992 }, { "epoch": 0.20702191752363144, "grad_norm": 0.35081538558006287, "learning_rate": 8.979273285738445e-05, "loss": 1.6776, "step": 1993 }, { "epoch": 0.2071257920432118, "grad_norm": 0.3631971478462219, "learning_rate": 8.978285125534229e-05, "loss": 1.5715, "step": 1994 }, { "epoch": 0.20722966656279215, "grad_norm": 0.38734301924705505, "learning_rate": 8.977296541673678e-05, "loss": 1.781, "step": 1995 }, { "epoch": 0.2073335410823725, "grad_norm": 0.40530499815940857, "learning_rate": 8.976307534262072e-05, "loss": 1.8291, "step": 1996 }, { "epoch": 0.20743741560195283, "grad_norm": 0.4037350118160248, "learning_rate": 8.975318103404728e-05, "loss": 1.9573, "step": 1997 }, { "epoch": 0.2075412901215332, "grad_norm": 0.3434680700302124, "learning_rate": 8.974328249207015e-05, "loss": 1.7654, "step": 1998 }, { "epoch": 0.20764516464111354, "grad_norm": 0.38100457191467285, "learning_rate": 8.973337971774344e-05, "loss": 1.7762, "step": 1999 }, { "epoch": 0.20774903916069387, "grad_norm": 0.3673883080482483, "learning_rate": 8.972347271212173e-05, "loss": 1.7563, "step": 2000 }, { "epoch": 0.20785291368027423, "grad_norm": 0.3593236207962036, "learning_rate": 8.971356147626004e-05, "loss": 1.6374, "step": 2001 }, { "epoch": 0.20795678819985458, "grad_norm": 0.4013366401195526, "learning_rate": 8.970364601121382e-05, "loss": 1.7564, "step": 2002 }, { "epoch": 0.20806066271943494, "grad_norm": 0.36467140913009644, "learning_rate": 8.9693726318039e-05, "loss": 1.7618, "step": 2003 }, { "epoch": 0.20816453723901526, "grad_norm": 0.36317500472068787, "learning_rate": 8.968380239779194e-05, "loss": 1.6911, "step": 2004 }, { "epoch": 0.20826841175859562, "grad_norm": 0.3494802713394165, "learning_rate": 8.96738742515295e-05, "loss": 1.7087, "step": 2005 }, { "epoch": 0.20837228627817597, "grad_norm": 0.3602224290370941, "learning_rate": 8.96639418803089e-05, "loss": 1.698, "step": 2006 }, { "epoch": 0.2084761607977563, "grad_norm": 0.3831143379211426, "learning_rate": 8.965400528518787e-05, "loss": 1.7733, "step": 2007 }, { "epoch": 0.20858003531733665, "grad_norm": 0.3766723871231079, "learning_rate": 8.964406446722459e-05, "loss": 1.7092, "step": 2008 }, { "epoch": 0.208683909836917, "grad_norm": 0.38016197085380554, "learning_rate": 8.96341194274777e-05, "loss": 1.6285, "step": 2009 }, { "epoch": 0.20878778435649736, "grad_norm": 0.3981560170650482, "learning_rate": 8.962417016700624e-05, "loss": 1.7449, "step": 2010 }, { "epoch": 0.2088916588760777, "grad_norm": 0.3760923445224762, "learning_rate": 8.961421668686974e-05, "loss": 1.7459, "step": 2011 }, { "epoch": 0.20899553339565805, "grad_norm": 0.3543214797973633, "learning_rate": 8.960425898812816e-05, "loss": 1.7329, "step": 2012 }, { "epoch": 0.2090994079152384, "grad_norm": 0.3547232747077942, "learning_rate": 8.959429707184192e-05, "loss": 1.7992, "step": 2013 }, { "epoch": 0.20920328243481873, "grad_norm": 0.43568694591522217, "learning_rate": 8.95843309390719e-05, "loss": 2.0488, "step": 2014 }, { "epoch": 0.20930715695439908, "grad_norm": 0.4254542291164398, "learning_rate": 8.95743605908794e-05, "loss": 1.796, "step": 2015 }, { "epoch": 0.20941103147397944, "grad_norm": 0.3894093632698059, "learning_rate": 8.956438602832619e-05, "loss": 1.8641, "step": 2016 }, { "epoch": 0.2095149059935598, "grad_norm": 0.383073091506958, "learning_rate": 8.955440725247448e-05, "loss": 1.7898, "step": 2017 }, { "epoch": 0.20961878051314012, "grad_norm": 0.37630245089530945, "learning_rate": 8.954442426438694e-05, "loss": 1.738, "step": 2018 }, { "epoch": 0.20972265503272047, "grad_norm": 0.3684820234775543, "learning_rate": 8.953443706512667e-05, "loss": 1.8819, "step": 2019 }, { "epoch": 0.20982652955230083, "grad_norm": 0.36796849966049194, "learning_rate": 8.952444565575723e-05, "loss": 1.7886, "step": 2020 }, { "epoch": 0.20993040407188115, "grad_norm": 0.35004639625549316, "learning_rate": 8.951445003734263e-05, "loss": 1.7872, "step": 2021 }, { "epoch": 0.2100342785914615, "grad_norm": 0.3540632724761963, "learning_rate": 8.950445021094733e-05, "loss": 1.6917, "step": 2022 }, { "epoch": 0.21013815311104186, "grad_norm": 0.3964502215385437, "learning_rate": 8.949444617763623e-05, "loss": 1.7487, "step": 2023 }, { "epoch": 0.21024202763062222, "grad_norm": 0.3707832098007202, "learning_rate": 8.948443793847467e-05, "loss": 1.8752, "step": 2024 }, { "epoch": 0.21034590215020255, "grad_norm": 0.3899782598018646, "learning_rate": 8.947442549452846e-05, "loss": 1.7619, "step": 2025 }, { "epoch": 0.2104497766697829, "grad_norm": 0.349178671836853, "learning_rate": 8.946440884686387e-05, "loss": 1.5852, "step": 2026 }, { "epoch": 0.21055365118936326, "grad_norm": 0.3928735554218292, "learning_rate": 8.945438799654756e-05, "loss": 1.8503, "step": 2027 }, { "epoch": 0.21065752570894358, "grad_norm": 0.3914186954498291, "learning_rate": 8.944436294464669e-05, "loss": 1.6764, "step": 2028 }, { "epoch": 0.21076140022852394, "grad_norm": 0.370089054107666, "learning_rate": 8.943433369222882e-05, "loss": 1.6127, "step": 2029 }, { "epoch": 0.2108652747481043, "grad_norm": 0.35939261317253113, "learning_rate": 8.942430024036203e-05, "loss": 1.7161, "step": 2030 }, { "epoch": 0.21096914926768465, "grad_norm": 0.36686834692955017, "learning_rate": 8.941426259011478e-05, "loss": 1.8648, "step": 2031 }, { "epoch": 0.21107302378726497, "grad_norm": 0.3749295473098755, "learning_rate": 8.940422074255602e-05, "loss": 1.7433, "step": 2032 }, { "epoch": 0.21117689830684533, "grad_norm": 0.36779385805130005, "learning_rate": 8.93941746987551e-05, "loss": 1.778, "step": 2033 }, { "epoch": 0.21128077282642568, "grad_norm": 0.3840864896774292, "learning_rate": 8.938412445978186e-05, "loss": 1.7633, "step": 2034 }, { "epoch": 0.21138464734600604, "grad_norm": 0.3512420356273651, "learning_rate": 8.937407002670659e-05, "loss": 1.6726, "step": 2035 }, { "epoch": 0.21148852186558637, "grad_norm": 0.3997120261192322, "learning_rate": 8.936401140059998e-05, "loss": 1.7913, "step": 2036 }, { "epoch": 0.21159239638516672, "grad_norm": 0.4093436300754547, "learning_rate": 8.935394858253321e-05, "loss": 1.7161, "step": 2037 }, { "epoch": 0.21169627090474707, "grad_norm": 0.3791636824607849, "learning_rate": 8.934388157357788e-05, "loss": 1.9097, "step": 2038 }, { "epoch": 0.2118001454243274, "grad_norm": 0.39315351843833923, "learning_rate": 8.933381037480606e-05, "loss": 1.9521, "step": 2039 }, { "epoch": 0.21190401994390776, "grad_norm": 0.3954058289527893, "learning_rate": 8.932373498729025e-05, "loss": 1.7008, "step": 2040 }, { "epoch": 0.2120078944634881, "grad_norm": 0.44486141204833984, "learning_rate": 8.931365541210342e-05, "loss": 1.8255, "step": 2041 }, { "epoch": 0.21211176898306847, "grad_norm": 0.4147118330001831, "learning_rate": 8.930357165031893e-05, "loss": 1.9377, "step": 2042 }, { "epoch": 0.2122156435026488, "grad_norm": 0.4043649733066559, "learning_rate": 8.929348370301063e-05, "loss": 1.8809, "step": 2043 }, { "epoch": 0.21231951802222915, "grad_norm": 0.37084200978279114, "learning_rate": 8.928339157125283e-05, "loss": 1.7093, "step": 2044 }, { "epoch": 0.2124233925418095, "grad_norm": 0.33814066648483276, "learning_rate": 8.927329525612025e-05, "loss": 1.7248, "step": 2045 }, { "epoch": 0.21252726706138983, "grad_norm": 0.3607522249221802, "learning_rate": 8.926319475868807e-05, "loss": 1.6424, "step": 2046 }, { "epoch": 0.21263114158097018, "grad_norm": 0.35810667276382446, "learning_rate": 8.925309008003192e-05, "loss": 1.6919, "step": 2047 }, { "epoch": 0.21273501610055054, "grad_norm": 0.3557438850402832, "learning_rate": 8.924298122122783e-05, "loss": 1.8568, "step": 2048 }, { "epoch": 0.2128388906201309, "grad_norm": 0.37123459577560425, "learning_rate": 8.923286818335237e-05, "loss": 1.8214, "step": 2049 }, { "epoch": 0.21294276513971122, "grad_norm": 0.42854633927345276, "learning_rate": 8.922275096748247e-05, "loss": 2.1206, "step": 2050 }, { "epoch": 0.21304663965929158, "grad_norm": 0.4002090394496918, "learning_rate": 8.921262957469554e-05, "loss": 1.8579, "step": 2051 }, { "epoch": 0.21315051417887193, "grad_norm": 0.34181690216064453, "learning_rate": 8.920250400606944e-05, "loss": 1.7962, "step": 2052 }, { "epoch": 0.21325438869845226, "grad_norm": 0.35831955075263977, "learning_rate": 8.919237426268246e-05, "loss": 1.6304, "step": 2053 }, { "epoch": 0.2133582632180326, "grad_norm": 0.4297682046890259, "learning_rate": 8.918224034561333e-05, "loss": 1.9758, "step": 2054 }, { "epoch": 0.21346213773761297, "grad_norm": 0.3815910518169403, "learning_rate": 8.917210225594122e-05, "loss": 1.7476, "step": 2055 }, { "epoch": 0.21356601225719332, "grad_norm": 0.3674294650554657, "learning_rate": 8.916195999474579e-05, "loss": 1.7741, "step": 2056 }, { "epoch": 0.21366988677677365, "grad_norm": 0.3721073567867279, "learning_rate": 8.915181356310709e-05, "loss": 1.7451, "step": 2057 }, { "epoch": 0.213773761296354, "grad_norm": 0.379859983921051, "learning_rate": 8.914166296210564e-05, "loss": 1.9665, "step": 2058 }, { "epoch": 0.21387763581593436, "grad_norm": 0.3658583462238312, "learning_rate": 8.913150819282242e-05, "loss": 1.6796, "step": 2059 }, { "epoch": 0.21398151033551469, "grad_norm": 0.3702607750892639, "learning_rate": 8.912134925633878e-05, "loss": 1.7107, "step": 2060 }, { "epoch": 0.21408538485509504, "grad_norm": 0.37530508637428284, "learning_rate": 8.911118615373661e-05, "loss": 1.6896, "step": 2061 }, { "epoch": 0.2141892593746754, "grad_norm": 0.3804415464401245, "learning_rate": 8.910101888609821e-05, "loss": 1.8049, "step": 2062 }, { "epoch": 0.21429313389425575, "grad_norm": 0.36921432614326477, "learning_rate": 8.909084745450628e-05, "loss": 1.7652, "step": 2063 }, { "epoch": 0.21439700841383608, "grad_norm": 0.38994041085243225, "learning_rate": 8.908067186004405e-05, "loss": 1.701, "step": 2064 }, { "epoch": 0.21450088293341643, "grad_norm": 0.3705255389213562, "learning_rate": 8.907049210379508e-05, "loss": 1.781, "step": 2065 }, { "epoch": 0.21460475745299679, "grad_norm": 0.3754466772079468, "learning_rate": 8.906030818684348e-05, "loss": 1.6878, "step": 2066 }, { "epoch": 0.2147086319725771, "grad_norm": 0.3889169991016388, "learning_rate": 8.905012011027372e-05, "loss": 1.6964, "step": 2067 }, { "epoch": 0.21481250649215747, "grad_norm": 0.3674185872077942, "learning_rate": 8.903992787517078e-05, "loss": 1.2466, "step": 2068 }, { "epoch": 0.21491638101173782, "grad_norm": 0.37006455659866333, "learning_rate": 8.902973148262004e-05, "loss": 1.7607, "step": 2069 }, { "epoch": 0.21502025553131818, "grad_norm": 0.3947128355503082, "learning_rate": 8.901953093370734e-05, "loss": 1.8514, "step": 2070 }, { "epoch": 0.2151241300508985, "grad_norm": 0.36148831248283386, "learning_rate": 8.900932622951897e-05, "loss": 1.5947, "step": 2071 }, { "epoch": 0.21522800457047886, "grad_norm": 0.34942540526390076, "learning_rate": 8.899911737114163e-05, "loss": 1.7449, "step": 2072 }, { "epoch": 0.21533187909005921, "grad_norm": 0.3522939383983612, "learning_rate": 8.898890435966251e-05, "loss": 1.7266, "step": 2073 }, { "epoch": 0.21543575360963957, "grad_norm": 0.4223746955394745, "learning_rate": 8.897868719616919e-05, "loss": 1.8562, "step": 2074 }, { "epoch": 0.2155396281292199, "grad_norm": 0.3850705921649933, "learning_rate": 8.896846588174973e-05, "loss": 1.8528, "step": 2075 }, { "epoch": 0.21564350264880025, "grad_norm": 0.391237735748291, "learning_rate": 8.89582404174926e-05, "loss": 1.6268, "step": 2076 }, { "epoch": 0.2157473771683806, "grad_norm": 0.40240150690078735, "learning_rate": 8.89480108044868e-05, "loss": 1.7256, "step": 2077 }, { "epoch": 0.21585125168796093, "grad_norm": 0.400774210691452, "learning_rate": 8.893777704382163e-05, "loss": 1.7654, "step": 2078 }, { "epoch": 0.2159551262075413, "grad_norm": 0.4116402268409729, "learning_rate": 8.892753913658691e-05, "loss": 1.5765, "step": 2079 }, { "epoch": 0.21605900072712164, "grad_norm": 0.3588711619377136, "learning_rate": 8.891729708387294e-05, "loss": 1.7399, "step": 2080 }, { "epoch": 0.216162875246702, "grad_norm": 0.40262454748153687, "learning_rate": 8.890705088677039e-05, "loss": 1.8868, "step": 2081 }, { "epoch": 0.21626674976628232, "grad_norm": 0.38147544860839844, "learning_rate": 8.889680054637042e-05, "loss": 1.907, "step": 2082 }, { "epoch": 0.21637062428586268, "grad_norm": 0.40591171383857727, "learning_rate": 8.888654606376459e-05, "loss": 1.9046, "step": 2083 }, { "epoch": 0.21647449880544303, "grad_norm": 0.3788270652294159, "learning_rate": 8.887628744004493e-05, "loss": 1.7253, "step": 2084 }, { "epoch": 0.21657837332502336, "grad_norm": 0.38902533054351807, "learning_rate": 8.88660246763039e-05, "loss": 1.7522, "step": 2085 }, { "epoch": 0.21668224784460371, "grad_norm": 0.3673681318759918, "learning_rate": 8.885575777363442e-05, "loss": 1.7231, "step": 2086 }, { "epoch": 0.21678612236418407, "grad_norm": 0.33182385563850403, "learning_rate": 8.884548673312981e-05, "loss": 1.6142, "step": 2087 }, { "epoch": 0.21688999688376442, "grad_norm": 0.38326361775398254, "learning_rate": 8.883521155588388e-05, "loss": 1.8635, "step": 2088 }, { "epoch": 0.21699387140334475, "grad_norm": 0.38930484652519226, "learning_rate": 8.882493224299084e-05, "loss": 1.6453, "step": 2089 }, { "epoch": 0.2170977459229251, "grad_norm": 0.3819931447505951, "learning_rate": 8.881464879554536e-05, "loss": 1.821, "step": 2090 }, { "epoch": 0.21720162044250546, "grad_norm": 0.3670012354850769, "learning_rate": 8.880436121464255e-05, "loss": 1.8536, "step": 2091 }, { "epoch": 0.2173054949620858, "grad_norm": 0.3817991614341736, "learning_rate": 8.879406950137796e-05, "loss": 1.7896, "step": 2092 }, { "epoch": 0.21740936948166614, "grad_norm": 0.44066059589385986, "learning_rate": 8.878377365684758e-05, "loss": 1.7763, "step": 2093 }, { "epoch": 0.2175132440012465, "grad_norm": 0.362020343542099, "learning_rate": 8.877347368214783e-05, "loss": 1.7594, "step": 2094 }, { "epoch": 0.21761711852082685, "grad_norm": 0.385455459356308, "learning_rate": 8.876316957837556e-05, "loss": 1.9007, "step": 2095 }, { "epoch": 0.21772099304040718, "grad_norm": 0.3932596743106842, "learning_rate": 8.87528613466281e-05, "loss": 1.6164, "step": 2096 }, { "epoch": 0.21782486755998753, "grad_norm": 0.3642088770866394, "learning_rate": 8.874254898800321e-05, "loss": 1.626, "step": 2097 }, { "epoch": 0.2179287420795679, "grad_norm": 0.4024796783924103, "learning_rate": 8.873223250359903e-05, "loss": 1.8004, "step": 2098 }, { "epoch": 0.21803261659914822, "grad_norm": 0.3582911491394043, "learning_rate": 8.872191189451422e-05, "loss": 1.7169, "step": 2099 }, { "epoch": 0.21813649111872857, "grad_norm": 0.40865403413772583, "learning_rate": 8.871158716184784e-05, "loss": 1.6472, "step": 2100 }, { "epoch": 0.21824036563830893, "grad_norm": 0.39730289578437805, "learning_rate": 8.87012583066994e-05, "loss": 1.6177, "step": 2101 }, { "epoch": 0.21834424015788928, "grad_norm": 0.3871819078922272, "learning_rate": 8.869092533016882e-05, "loss": 1.6924, "step": 2102 }, { "epoch": 0.2184481146774696, "grad_norm": 0.377941757440567, "learning_rate": 8.86805882333565e-05, "loss": 1.8299, "step": 2103 }, { "epoch": 0.21855198919704996, "grad_norm": 0.397905558347702, "learning_rate": 8.867024701736325e-05, "loss": 1.8411, "step": 2104 }, { "epoch": 0.21865586371663032, "grad_norm": 0.3686088025569916, "learning_rate": 8.865990168329031e-05, "loss": 1.6618, "step": 2105 }, { "epoch": 0.21875973823621067, "grad_norm": 0.394415020942688, "learning_rate": 8.864955223223943e-05, "loss": 1.8495, "step": 2106 }, { "epoch": 0.218863612755791, "grad_norm": 0.3614397943019867, "learning_rate": 8.863919866531269e-05, "loss": 1.7565, "step": 2107 }, { "epoch": 0.21896748727537135, "grad_norm": 0.3643103837966919, "learning_rate": 8.862884098361271e-05, "loss": 1.7943, "step": 2108 }, { "epoch": 0.2190713617949517, "grad_norm": 0.37451231479644775, "learning_rate": 8.861847918824248e-05, "loss": 1.5722, "step": 2109 }, { "epoch": 0.21917523631453203, "grad_norm": 0.40897324681282043, "learning_rate": 8.860811328030545e-05, "loss": 1.8011, "step": 2110 }, { "epoch": 0.2192791108341124, "grad_norm": 0.38252517580986023, "learning_rate": 8.85977432609055e-05, "loss": 1.7692, "step": 2111 }, { "epoch": 0.21938298535369274, "grad_norm": 0.3585284352302551, "learning_rate": 8.858736913114698e-05, "loss": 1.6858, "step": 2112 }, { "epoch": 0.2194868598732731, "grad_norm": 0.40061917901039124, "learning_rate": 8.857699089213462e-05, "loss": 1.5681, "step": 2113 }, { "epoch": 0.21959073439285343, "grad_norm": 0.38758501410484314, "learning_rate": 8.856660854497367e-05, "loss": 1.7354, "step": 2114 }, { "epoch": 0.21969460891243378, "grad_norm": 0.37637653946876526, "learning_rate": 8.855622209076972e-05, "loss": 1.6739, "step": 2115 }, { "epoch": 0.21979848343201414, "grad_norm": 0.4087028205394745, "learning_rate": 8.854583153062887e-05, "loss": 2.0526, "step": 2116 }, { "epoch": 0.21990235795159446, "grad_norm": 0.3950178623199463, "learning_rate": 8.853543686565765e-05, "loss": 1.8204, "step": 2117 }, { "epoch": 0.22000623247117482, "grad_norm": 0.3853364586830139, "learning_rate": 8.852503809696297e-05, "loss": 1.7236, "step": 2118 }, { "epoch": 0.22011010699075517, "grad_norm": 0.36730098724365234, "learning_rate": 8.851463522565226e-05, "loss": 1.7896, "step": 2119 }, { "epoch": 0.22021398151033553, "grad_norm": 0.3799102008342743, "learning_rate": 8.85042282528333e-05, "loss": 1.8652, "step": 2120 }, { "epoch": 0.22031785602991585, "grad_norm": 0.37538158893585205, "learning_rate": 8.84938171796144e-05, "loss": 1.8323, "step": 2121 }, { "epoch": 0.2204217305494962, "grad_norm": 0.3702995777130127, "learning_rate": 8.848340200710421e-05, "loss": 1.6752, "step": 2122 }, { "epoch": 0.22052560506907656, "grad_norm": 0.3841915726661682, "learning_rate": 8.847298273641192e-05, "loss": 1.8893, "step": 2123 }, { "epoch": 0.2206294795886569, "grad_norm": 0.3636316955089569, "learning_rate": 8.846255936864703e-05, "loss": 1.6729, "step": 2124 }, { "epoch": 0.22073335410823725, "grad_norm": 0.3771231770515442, "learning_rate": 8.845213190491962e-05, "loss": 1.6048, "step": 2125 }, { "epoch": 0.2208372286278176, "grad_norm": 0.4207725524902344, "learning_rate": 8.844170034634007e-05, "loss": 1.9401, "step": 2126 }, { "epoch": 0.22094110314739795, "grad_norm": 0.42666593194007874, "learning_rate": 8.843126469401931e-05, "loss": 1.9608, "step": 2127 }, { "epoch": 0.22104497766697828, "grad_norm": 0.3714624345302582, "learning_rate": 8.842082494906863e-05, "loss": 1.7066, "step": 2128 }, { "epoch": 0.22114885218655864, "grad_norm": 0.41800081729888916, "learning_rate": 8.841038111259979e-05, "loss": 1.8553, "step": 2129 }, { "epoch": 0.221252726706139, "grad_norm": 0.34756773710250854, "learning_rate": 8.839993318572497e-05, "loss": 1.6992, "step": 2130 }, { "epoch": 0.22135660122571932, "grad_norm": 0.3820962905883789, "learning_rate": 8.83894811695568e-05, "loss": 1.9582, "step": 2131 }, { "epoch": 0.22146047574529967, "grad_norm": 0.3578221797943115, "learning_rate": 8.837902506520835e-05, "loss": 1.7399, "step": 2132 }, { "epoch": 0.22156435026488003, "grad_norm": 0.3778248727321625, "learning_rate": 8.836856487379306e-05, "loss": 1.6818, "step": 2133 }, { "epoch": 0.22166822478446038, "grad_norm": 0.36357828974723816, "learning_rate": 8.835810059642493e-05, "loss": 1.6495, "step": 2134 }, { "epoch": 0.2217720993040407, "grad_norm": 0.384027898311615, "learning_rate": 8.834763223421828e-05, "loss": 1.7112, "step": 2135 }, { "epoch": 0.22187597382362106, "grad_norm": 0.3461948037147522, "learning_rate": 8.833715978828793e-05, "loss": 1.713, "step": 2136 }, { "epoch": 0.22197984834320142, "grad_norm": 0.38593146204948425, "learning_rate": 8.83266832597491e-05, "loss": 1.6994, "step": 2137 }, { "epoch": 0.22208372286278175, "grad_norm": 0.346113383769989, "learning_rate": 8.831620264971745e-05, "loss": 1.6051, "step": 2138 }, { "epoch": 0.2221875973823621, "grad_norm": 0.3933405876159668, "learning_rate": 8.830571795930911e-05, "loss": 1.678, "step": 2139 }, { "epoch": 0.22229147190194246, "grad_norm": 0.34719663858413696, "learning_rate": 8.829522918964058e-05, "loss": 1.6881, "step": 2140 }, { "epoch": 0.2223953464215228, "grad_norm": 0.3760213553905487, "learning_rate": 8.828473634182888e-05, "loss": 1.8456, "step": 2141 }, { "epoch": 0.22249922094110314, "grad_norm": 0.3988967537879944, "learning_rate": 8.827423941699137e-05, "loss": 1.8562, "step": 2142 }, { "epoch": 0.2226030954606835, "grad_norm": 0.40549615025520325, "learning_rate": 8.826373841624593e-05, "loss": 1.7444, "step": 2143 }, { "epoch": 0.22270696998026385, "grad_norm": 0.37362509965896606, "learning_rate": 8.825323334071083e-05, "loss": 1.7901, "step": 2144 }, { "epoch": 0.2228108444998442, "grad_norm": 0.34235090017318726, "learning_rate": 8.824272419150475e-05, "loss": 1.7461, "step": 2145 }, { "epoch": 0.22291471901942453, "grad_norm": 0.3918958306312561, "learning_rate": 8.823221096974684e-05, "loss": 1.782, "step": 2146 }, { "epoch": 0.22301859353900488, "grad_norm": 0.37165430188179016, "learning_rate": 8.822169367655669e-05, "loss": 1.803, "step": 2147 }, { "epoch": 0.22312246805858524, "grad_norm": 0.3669700026512146, "learning_rate": 8.821117231305431e-05, "loss": 1.8741, "step": 2148 }, { "epoch": 0.22322634257816557, "grad_norm": 0.3532780408859253, "learning_rate": 8.820064688036014e-05, "loss": 1.7279, "step": 2149 }, { "epoch": 0.22333021709774592, "grad_norm": 0.3830491006374359, "learning_rate": 8.819011737959504e-05, "loss": 1.8127, "step": 2150 }, { "epoch": 0.22343409161732627, "grad_norm": 0.36550477147102356, "learning_rate": 8.817958381188033e-05, "loss": 1.7985, "step": 2151 }, { "epoch": 0.22353796613690663, "grad_norm": 0.3734526038169861, "learning_rate": 8.816904617833778e-05, "loss": 1.7096, "step": 2152 }, { "epoch": 0.22364184065648696, "grad_norm": 0.36251428723335266, "learning_rate": 8.815850448008953e-05, "loss": 1.7936, "step": 2153 }, { "epoch": 0.2237457151760673, "grad_norm": 0.3801165223121643, "learning_rate": 8.81479587182582e-05, "loss": 1.7445, "step": 2154 }, { "epoch": 0.22384958969564767, "grad_norm": 0.38031628727912903, "learning_rate": 8.813740889396682e-05, "loss": 1.8105, "step": 2155 }, { "epoch": 0.223953464215228, "grad_norm": 0.42004433274269104, "learning_rate": 8.81268550083389e-05, "loss": 1.7913, "step": 2156 }, { "epoch": 0.22405733873480835, "grad_norm": 0.38428744673728943, "learning_rate": 8.81162970624983e-05, "loss": 1.805, "step": 2157 }, { "epoch": 0.2241612132543887, "grad_norm": 0.4019632935523987, "learning_rate": 8.81057350575694e-05, "loss": 1.6997, "step": 2158 }, { "epoch": 0.22426508777396906, "grad_norm": 0.3467335104942322, "learning_rate": 8.809516899467694e-05, "loss": 1.5612, "step": 2159 }, { "epoch": 0.22436896229354938, "grad_norm": 0.36768898367881775, "learning_rate": 8.808459887494615e-05, "loss": 1.6928, "step": 2160 }, { "epoch": 0.22447283681312974, "grad_norm": 0.41053932905197144, "learning_rate": 8.807402469950263e-05, "loss": 1.7605, "step": 2161 }, { "epoch": 0.2245767113327101, "grad_norm": 0.36271244287490845, "learning_rate": 8.806344646947249e-05, "loss": 1.7773, "step": 2162 }, { "epoch": 0.22468058585229042, "grad_norm": 0.38008955121040344, "learning_rate": 8.80528641859822e-05, "loss": 1.8473, "step": 2163 }, { "epoch": 0.22478446037187078, "grad_norm": 0.382253497838974, "learning_rate": 8.804227785015869e-05, "loss": 1.686, "step": 2164 }, { "epoch": 0.22488833489145113, "grad_norm": 0.3781398832798004, "learning_rate": 8.803168746312934e-05, "loss": 1.7894, "step": 2165 }, { "epoch": 0.22499220941103149, "grad_norm": 0.39482182264328003, "learning_rate": 8.802109302602193e-05, "loss": 1.8895, "step": 2166 }, { "epoch": 0.2250960839306118, "grad_norm": 0.40680569410324097, "learning_rate": 8.801049453996468e-05, "loss": 2.0375, "step": 2167 }, { "epoch": 0.22519995845019217, "grad_norm": 0.3483993113040924, "learning_rate": 8.799989200608627e-05, "loss": 1.6489, "step": 2168 }, { "epoch": 0.22530383296977252, "grad_norm": 0.3627791702747345, "learning_rate": 8.798928542551576e-05, "loss": 1.8188, "step": 2169 }, { "epoch": 0.22540770748935285, "grad_norm": 0.3764524459838867, "learning_rate": 8.797867479938269e-05, "loss": 1.9716, "step": 2170 }, { "epoch": 0.2255115820089332, "grad_norm": 0.3761730492115021, "learning_rate": 8.796806012881699e-05, "loss": 1.857, "step": 2171 }, { "epoch": 0.22561545652851356, "grad_norm": 0.3673245906829834, "learning_rate": 8.795744141494905e-05, "loss": 1.6663, "step": 2172 }, { "epoch": 0.2257193310480939, "grad_norm": 0.3899873197078705, "learning_rate": 8.794681865890968e-05, "loss": 1.7631, "step": 2173 }, { "epoch": 0.22582320556767424, "grad_norm": 0.36863207817077637, "learning_rate": 8.793619186183011e-05, "loss": 1.8269, "step": 2174 }, { "epoch": 0.2259270800872546, "grad_norm": 0.36547142267227173, "learning_rate": 8.792556102484204e-05, "loss": 1.6963, "step": 2175 }, { "epoch": 0.22603095460683495, "grad_norm": 0.34882575273513794, "learning_rate": 8.791492614907754e-05, "loss": 1.6336, "step": 2176 }, { "epoch": 0.22613482912641528, "grad_norm": 0.38748350739479065, "learning_rate": 8.790428723566915e-05, "loss": 1.8569, "step": 2177 }, { "epoch": 0.22623870364599563, "grad_norm": 0.362943559885025, "learning_rate": 8.789364428574984e-05, "loss": 1.7161, "step": 2178 }, { "epoch": 0.22634257816557599, "grad_norm": 0.37946629524230957, "learning_rate": 8.788299730045299e-05, "loss": 1.8211, "step": 2179 }, { "epoch": 0.22644645268515634, "grad_norm": 0.3608231842517853, "learning_rate": 8.787234628091243e-05, "loss": 1.7704, "step": 2180 }, { "epoch": 0.22655032720473667, "grad_norm": 0.38280248641967773, "learning_rate": 8.786169122826242e-05, "loss": 1.7376, "step": 2181 }, { "epoch": 0.22665420172431702, "grad_norm": 0.36834195256233215, "learning_rate": 8.785103214363761e-05, "loss": 1.6503, "step": 2182 }, { "epoch": 0.22675807624389738, "grad_norm": 0.3779491186141968, "learning_rate": 8.784036902817312e-05, "loss": 1.7421, "step": 2183 }, { "epoch": 0.22686195076347773, "grad_norm": 0.40714192390441895, "learning_rate": 8.78297018830045e-05, "loss": 1.7828, "step": 2184 }, { "epoch": 0.22696582528305806, "grad_norm": 0.36981451511383057, "learning_rate": 8.781903070926774e-05, "loss": 1.7687, "step": 2185 }, { "epoch": 0.22706969980263841, "grad_norm": 0.37374863028526306, "learning_rate": 8.780835550809918e-05, "loss": 1.7469, "step": 2186 }, { "epoch": 0.22717357432221877, "grad_norm": 0.3481188416481018, "learning_rate": 8.779767628063567e-05, "loss": 1.5268, "step": 2187 }, { "epoch": 0.2272774488417991, "grad_norm": 0.352603942155838, "learning_rate": 8.77869930280145e-05, "loss": 1.5966, "step": 2188 }, { "epoch": 0.22738132336137945, "grad_norm": 0.38702160120010376, "learning_rate": 8.77763057513733e-05, "loss": 1.7469, "step": 2189 }, { "epoch": 0.2274851978809598, "grad_norm": 0.37263423204421997, "learning_rate": 8.77656144518502e-05, "loss": 1.6986, "step": 2190 }, { "epoch": 0.22758907240054016, "grad_norm": 0.3658749759197235, "learning_rate": 8.775491913058376e-05, "loss": 1.7425, "step": 2191 }, { "epoch": 0.2276929469201205, "grad_norm": 0.38823366165161133, "learning_rate": 8.774421978871292e-05, "loss": 1.7665, "step": 2192 }, { "epoch": 0.22779682143970084, "grad_norm": 0.35970309376716614, "learning_rate": 8.773351642737708e-05, "loss": 1.6914, "step": 2193 }, { "epoch": 0.2279006959592812, "grad_norm": 0.3693389892578125, "learning_rate": 8.772280904771608e-05, "loss": 1.8031, "step": 2194 }, { "epoch": 0.22800457047886152, "grad_norm": 0.34769538044929504, "learning_rate": 8.771209765087018e-05, "loss": 1.7248, "step": 2195 }, { "epoch": 0.22810844499844188, "grad_norm": 0.37290412187576294, "learning_rate": 8.770138223798003e-05, "loss": 1.7994, "step": 2196 }, { "epoch": 0.22821231951802223, "grad_norm": 0.37960392236709595, "learning_rate": 8.769066281018676e-05, "loss": 1.8668, "step": 2197 }, { "epoch": 0.2283161940376026, "grad_norm": 0.3551698625087738, "learning_rate": 8.76799393686319e-05, "loss": 1.7928, "step": 2198 }, { "epoch": 0.22842006855718291, "grad_norm": 0.3406142294406891, "learning_rate": 8.766921191445742e-05, "loss": 1.6981, "step": 2199 }, { "epoch": 0.22852394307676327, "grad_norm": 0.410206139087677, "learning_rate": 8.765848044880569e-05, "loss": 1.634, "step": 2200 }, { "epoch": 0.22862781759634362, "grad_norm": 0.42800506949424744, "learning_rate": 8.764774497281954e-05, "loss": 1.7564, "step": 2201 }, { "epoch": 0.22873169211592395, "grad_norm": 0.39173564314842224, "learning_rate": 8.763700548764223e-05, "loss": 1.8504, "step": 2202 }, { "epoch": 0.2288355666355043, "grad_norm": 0.35888996720314026, "learning_rate": 8.76262619944174e-05, "loss": 1.5756, "step": 2203 }, { "epoch": 0.22893944115508466, "grad_norm": 0.4852285087108612, "learning_rate": 8.761551449428918e-05, "loss": 1.9804, "step": 2204 }, { "epoch": 0.22904331567466502, "grad_norm": 0.4047568440437317, "learning_rate": 8.760476298840206e-05, "loss": 1.81, "step": 2205 }, { "epoch": 0.22914719019424534, "grad_norm": 0.3965972065925598, "learning_rate": 8.7594007477901e-05, "loss": 1.8159, "step": 2206 }, { "epoch": 0.2292510647138257, "grad_norm": 0.4031708538532257, "learning_rate": 8.758324796393142e-05, "loss": 1.8237, "step": 2207 }, { "epoch": 0.22935493923340605, "grad_norm": 0.3491591215133667, "learning_rate": 8.757248444763908e-05, "loss": 1.6837, "step": 2208 }, { "epoch": 0.22945881375298638, "grad_norm": 0.36505648493766785, "learning_rate": 8.756171693017022e-05, "loss": 1.6408, "step": 2209 }, { "epoch": 0.22956268827256673, "grad_norm": 0.394775927066803, "learning_rate": 8.755094541267149e-05, "loss": 1.8816, "step": 2210 }, { "epoch": 0.2296665627921471, "grad_norm": 0.4164559543132782, "learning_rate": 8.754016989629e-05, "loss": 1.9372, "step": 2211 }, { "epoch": 0.22977043731172744, "grad_norm": 0.3899494707584381, "learning_rate": 8.752939038217322e-05, "loss": 1.7356, "step": 2212 }, { "epoch": 0.22987431183130777, "grad_norm": 0.36787334084510803, "learning_rate": 8.751860687146912e-05, "loss": 1.7804, "step": 2213 }, { "epoch": 0.22997818635088813, "grad_norm": 0.3581325113773346, "learning_rate": 8.750781936532604e-05, "loss": 1.825, "step": 2214 }, { "epoch": 0.23008206087046848, "grad_norm": 0.3675483465194702, "learning_rate": 8.749702786489277e-05, "loss": 1.7208, "step": 2215 }, { "epoch": 0.23018593539004883, "grad_norm": 0.38968801498413086, "learning_rate": 8.748623237131853e-05, "loss": 1.8447, "step": 2216 }, { "epoch": 0.23028980990962916, "grad_norm": 0.3647545576095581, "learning_rate": 8.747543288575293e-05, "loss": 1.6169, "step": 2217 }, { "epoch": 0.23039368442920952, "grad_norm": 0.3703446388244629, "learning_rate": 8.746462940934605e-05, "loss": 1.6316, "step": 2218 }, { "epoch": 0.23049755894878987, "grad_norm": 0.3884913921356201, "learning_rate": 8.745382194324839e-05, "loss": 1.7359, "step": 2219 }, { "epoch": 0.2306014334683702, "grad_norm": 0.39596831798553467, "learning_rate": 8.744301048861083e-05, "loss": 1.7425, "step": 2220 }, { "epoch": 0.23070530798795055, "grad_norm": 0.41258224844932556, "learning_rate": 8.743219504658472e-05, "loss": 1.8832, "step": 2221 }, { "epoch": 0.2308091825075309, "grad_norm": 0.3803333342075348, "learning_rate": 8.742137561832182e-05, "loss": 1.8591, "step": 2222 }, { "epoch": 0.23091305702711126, "grad_norm": 0.3818683326244354, "learning_rate": 8.741055220497431e-05, "loss": 1.9393, "step": 2223 }, { "epoch": 0.2310169315466916, "grad_norm": 0.3759542405605316, "learning_rate": 8.739972480769481e-05, "loss": 1.8439, "step": 2224 }, { "epoch": 0.23112080606627194, "grad_norm": 0.3411445617675781, "learning_rate": 8.738889342763635e-05, "loss": 1.7198, "step": 2225 }, { "epoch": 0.2312246805858523, "grad_norm": 0.3545193076133728, "learning_rate": 8.737805806595239e-05, "loss": 1.6465, "step": 2226 }, { "epoch": 0.23132855510543263, "grad_norm": 0.3674876093864441, "learning_rate": 8.73672187237968e-05, "loss": 1.7859, "step": 2227 }, { "epoch": 0.23143242962501298, "grad_norm": 0.3644886612892151, "learning_rate": 8.735637540232389e-05, "loss": 1.7008, "step": 2228 }, { "epoch": 0.23153630414459334, "grad_norm": 0.3750406503677368, "learning_rate": 8.734552810268838e-05, "loss": 1.6847, "step": 2229 }, { "epoch": 0.2316401786641737, "grad_norm": 0.3923105299472809, "learning_rate": 8.733467682604545e-05, "loss": 1.7455, "step": 2230 }, { "epoch": 0.23174405318375402, "grad_norm": 0.3961966931819916, "learning_rate": 8.732382157355066e-05, "loss": 1.6726, "step": 2231 }, { "epoch": 0.23184792770333437, "grad_norm": 0.4062844514846802, "learning_rate": 8.731296234636e-05, "loss": 1.9202, "step": 2232 }, { "epoch": 0.23195180222291473, "grad_norm": 0.4073842167854309, "learning_rate": 8.73020991456299e-05, "loss": 1.9388, "step": 2233 }, { "epoch": 0.23205567674249505, "grad_norm": 0.38866791129112244, "learning_rate": 8.72912319725172e-05, "loss": 1.6893, "step": 2234 }, { "epoch": 0.2321595512620754, "grad_norm": 0.3830127716064453, "learning_rate": 8.72803608281792e-05, "loss": 1.812, "step": 2235 }, { "epoch": 0.23226342578165576, "grad_norm": 0.4291350245475769, "learning_rate": 8.726948571377356e-05, "loss": 1.8426, "step": 2236 }, { "epoch": 0.23236730030123612, "grad_norm": 0.36664703488349915, "learning_rate": 8.72586066304584e-05, "loss": 1.6714, "step": 2237 }, { "epoch": 0.23247117482081645, "grad_norm": 0.34283211827278137, "learning_rate": 8.724772357939229e-05, "loss": 1.7595, "step": 2238 }, { "epoch": 0.2325750493403968, "grad_norm": 0.3679027557373047, "learning_rate": 8.723683656173413e-05, "loss": 1.6493, "step": 2239 }, { "epoch": 0.23267892385997715, "grad_norm": 0.3953079879283905, "learning_rate": 8.722594557864335e-05, "loss": 1.7885, "step": 2240 }, { "epoch": 0.23278279837955748, "grad_norm": 0.37095022201538086, "learning_rate": 8.721505063127972e-05, "loss": 1.6947, "step": 2241 }, { "epoch": 0.23288667289913784, "grad_norm": 0.38997137546539307, "learning_rate": 8.72041517208035e-05, "loss": 1.8158, "step": 2242 }, { "epoch": 0.2329905474187182, "grad_norm": 0.36092621088027954, "learning_rate": 8.719324884837531e-05, "loss": 1.7928, "step": 2243 }, { "epoch": 0.23309442193829855, "grad_norm": 0.3828462064266205, "learning_rate": 8.718234201515627e-05, "loss": 1.7782, "step": 2244 }, { "epoch": 0.23319829645787887, "grad_norm": 0.35145506262779236, "learning_rate": 8.717143122230782e-05, "loss": 1.6527, "step": 2245 }, { "epoch": 0.23330217097745923, "grad_norm": 0.3912096321582794, "learning_rate": 8.71605164709919e-05, "loss": 1.8666, "step": 2246 }, { "epoch": 0.23340604549703958, "grad_norm": 0.3545803725719452, "learning_rate": 8.714959776237083e-05, "loss": 1.7043, "step": 2247 }, { "epoch": 0.2335099200166199, "grad_norm": 0.3926384449005127, "learning_rate": 8.713867509760738e-05, "loss": 2.007, "step": 2248 }, { "epoch": 0.23361379453620026, "grad_norm": 0.3713902533054352, "learning_rate": 8.712774847786471e-05, "loss": 1.7151, "step": 2249 }, { "epoch": 0.23371766905578062, "grad_norm": 0.39191189408302307, "learning_rate": 8.711681790430645e-05, "loss": 1.8144, "step": 2250 }, { "epoch": 0.23382154357536097, "grad_norm": 0.40152493119239807, "learning_rate": 8.710588337809662e-05, "loss": 1.9487, "step": 2251 }, { "epoch": 0.2339254180949413, "grad_norm": 0.3732980489730835, "learning_rate": 8.709494490039963e-05, "loss": 1.6245, "step": 2252 }, { "epoch": 0.23402929261452166, "grad_norm": 0.3951932191848755, "learning_rate": 8.708400247238035e-05, "loss": 1.8078, "step": 2253 }, { "epoch": 0.234133167134102, "grad_norm": 0.3723011910915375, "learning_rate": 8.707305609520408e-05, "loss": 1.7107, "step": 2254 }, { "epoch": 0.23423704165368237, "grad_norm": 0.3788382411003113, "learning_rate": 8.706210577003653e-05, "loss": 1.6238, "step": 2255 }, { "epoch": 0.2343409161732627, "grad_norm": 0.3792799115180969, "learning_rate": 8.705115149804381e-05, "loss": 1.7069, "step": 2256 }, { "epoch": 0.23444479069284305, "grad_norm": 0.3782130181789398, "learning_rate": 8.704019328039244e-05, "loss": 1.6198, "step": 2257 }, { "epoch": 0.2345486652124234, "grad_norm": 0.3798910975456238, "learning_rate": 8.702923111824943e-05, "loss": 1.8176, "step": 2258 }, { "epoch": 0.23465253973200373, "grad_norm": 0.38398462533950806, "learning_rate": 8.701826501278216e-05, "loss": 1.7607, "step": 2259 }, { "epoch": 0.23475641425158408, "grad_norm": 0.3783421814441681, "learning_rate": 8.70072949651584e-05, "loss": 1.765, "step": 2260 }, { "epoch": 0.23486028877116444, "grad_norm": 0.34123924374580383, "learning_rate": 8.69963209765464e-05, "loss": 1.6595, "step": 2261 }, { "epoch": 0.2349641632907448, "grad_norm": 0.33665284514427185, "learning_rate": 8.698534304811478e-05, "loss": 1.4557, "step": 2262 }, { "epoch": 0.23506803781032512, "grad_norm": 0.37269553542137146, "learning_rate": 8.697436118103264e-05, "loss": 1.8497, "step": 2263 }, { "epoch": 0.23517191232990547, "grad_norm": 0.38616353273391724, "learning_rate": 8.696337537646944e-05, "loss": 1.7943, "step": 2264 }, { "epoch": 0.23527578684948583, "grad_norm": 0.4167322814464569, "learning_rate": 8.695238563559509e-05, "loss": 1.9579, "step": 2265 }, { "epoch": 0.23537966136906616, "grad_norm": 0.38582777976989746, "learning_rate": 8.694139195957991e-05, "loss": 1.7782, "step": 2266 }, { "epoch": 0.2354835358886465, "grad_norm": 0.3635927438735962, "learning_rate": 8.693039434959464e-05, "loss": 1.7276, "step": 2267 }, { "epoch": 0.23558741040822687, "grad_norm": 0.42506465315818787, "learning_rate": 8.691939280681045e-05, "loss": 1.818, "step": 2268 }, { "epoch": 0.23569128492780722, "grad_norm": 0.38665080070495605, "learning_rate": 8.69083873323989e-05, "loss": 1.7603, "step": 2269 }, { "epoch": 0.23579515944738755, "grad_norm": 0.3853725790977478, "learning_rate": 8.689737792753198e-05, "loss": 1.6039, "step": 2270 }, { "epoch": 0.2358990339669679, "grad_norm": 0.37736237049102783, "learning_rate": 8.688636459338215e-05, "loss": 1.8416, "step": 2271 }, { "epoch": 0.23600290848654826, "grad_norm": 0.4537546634674072, "learning_rate": 8.68753473311222e-05, "loss": 1.6987, "step": 2272 }, { "epoch": 0.23610678300612858, "grad_norm": 0.3903726041316986, "learning_rate": 8.686432614192538e-05, "loss": 1.9472, "step": 2273 }, { "epoch": 0.23621065752570894, "grad_norm": 0.41961470246315, "learning_rate": 8.68533010269654e-05, "loss": 1.8414, "step": 2274 }, { "epoch": 0.2363145320452893, "grad_norm": 0.36322250962257385, "learning_rate": 8.684227198741633e-05, "loss": 1.5978, "step": 2275 }, { "epoch": 0.23641840656486965, "grad_norm": 0.4004223644733429, "learning_rate": 8.683123902445267e-05, "loss": 1.7981, "step": 2276 }, { "epoch": 0.23652228108444998, "grad_norm": 0.38842740654945374, "learning_rate": 8.682020213924935e-05, "loss": 1.7098, "step": 2277 }, { "epoch": 0.23662615560403033, "grad_norm": 0.3592800199985504, "learning_rate": 8.680916133298171e-05, "loss": 1.661, "step": 2278 }, { "epoch": 0.23673003012361069, "grad_norm": 0.357117623090744, "learning_rate": 8.67981166068255e-05, "loss": 1.5888, "step": 2279 }, { "epoch": 0.236833904643191, "grad_norm": 0.3849972188472748, "learning_rate": 8.678706796195692e-05, "loss": 1.7733, "step": 2280 }, { "epoch": 0.23693777916277137, "grad_norm": 0.36867091059684753, "learning_rate": 8.677601539955256e-05, "loss": 1.8096, "step": 2281 }, { "epoch": 0.23704165368235172, "grad_norm": 0.3629715144634247, "learning_rate": 8.676495892078941e-05, "loss": 1.6919, "step": 2282 }, { "epoch": 0.23714552820193208, "grad_norm": 0.4076031446456909, "learning_rate": 8.675389852684492e-05, "loss": 1.9165, "step": 2283 }, { "epoch": 0.2372494027215124, "grad_norm": 0.37175339460372925, "learning_rate": 8.674283421889691e-05, "loss": 1.6767, "step": 2284 }, { "epoch": 0.23735327724109276, "grad_norm": 0.3934227526187897, "learning_rate": 8.673176599812368e-05, "loss": 1.7891, "step": 2285 }, { "epoch": 0.2374571517606731, "grad_norm": 0.4014788269996643, "learning_rate": 8.672069386570389e-05, "loss": 1.9999, "step": 2286 }, { "epoch": 0.23756102628025344, "grad_norm": 0.35122254490852356, "learning_rate": 8.670961782281664e-05, "loss": 1.8249, "step": 2287 }, { "epoch": 0.2376649007998338, "grad_norm": 0.36434510350227356, "learning_rate": 8.669853787064142e-05, "loss": 1.8532, "step": 2288 }, { "epoch": 0.23776877531941415, "grad_norm": 0.3808654546737671, "learning_rate": 8.668745401035818e-05, "loss": 1.7474, "step": 2289 }, { "epoch": 0.2378726498389945, "grad_norm": 0.3531155288219452, "learning_rate": 8.667636624314725e-05, "loss": 1.8941, "step": 2290 }, { "epoch": 0.23797652435857483, "grad_norm": 0.472049742937088, "learning_rate": 8.666527457018943e-05, "loss": 1.985, "step": 2291 }, { "epoch": 0.23808039887815519, "grad_norm": 0.39368936419487, "learning_rate": 8.665417899266586e-05, "loss": 1.6889, "step": 2292 }, { "epoch": 0.23818427339773554, "grad_norm": 0.3707980811595917, "learning_rate": 8.664307951175814e-05, "loss": 1.5088, "step": 2293 }, { "epoch": 0.2382881479173159, "grad_norm": 0.3548358380794525, "learning_rate": 8.663197612864827e-05, "loss": 1.7288, "step": 2294 }, { "epoch": 0.23839202243689622, "grad_norm": 0.47809383273124695, "learning_rate": 8.662086884451869e-05, "loss": 1.8472, "step": 2295 }, { "epoch": 0.23849589695647658, "grad_norm": 0.4353974163532257, "learning_rate": 8.660975766055224e-05, "loss": 1.8897, "step": 2296 }, { "epoch": 0.23859977147605693, "grad_norm": 0.4466110169887543, "learning_rate": 8.659864257793215e-05, "loss": 1.9391, "step": 2297 }, { "epoch": 0.23870364599563726, "grad_norm": 0.39143791794776917, "learning_rate": 8.65875235978421e-05, "loss": 1.7464, "step": 2298 }, { "epoch": 0.23880752051521761, "grad_norm": 0.38728442788124084, "learning_rate": 8.65764007214662e-05, "loss": 1.7393, "step": 2299 }, { "epoch": 0.23891139503479797, "grad_norm": 0.3674345910549164, "learning_rate": 8.656527394998892e-05, "loss": 1.6336, "step": 2300 }, { "epoch": 0.23901526955437832, "grad_norm": 0.38445186614990234, "learning_rate": 8.655414328459519e-05, "loss": 1.8982, "step": 2301 }, { "epoch": 0.23911914407395865, "grad_norm": 0.38056597113609314, "learning_rate": 8.654300872647033e-05, "loss": 1.6851, "step": 2302 }, { "epoch": 0.239223018593539, "grad_norm": 0.4110845625400543, "learning_rate": 8.65318702768001e-05, "loss": 1.8767, "step": 2303 }, { "epoch": 0.23932689311311936, "grad_norm": 0.38014286756515503, "learning_rate": 8.652072793677061e-05, "loss": 1.725, "step": 2304 }, { "epoch": 0.2394307676326997, "grad_norm": 0.39622190594673157, "learning_rate": 8.650958170756852e-05, "loss": 1.9115, "step": 2305 }, { "epoch": 0.23953464215228004, "grad_norm": 0.36890217661857605, "learning_rate": 8.649843159038071e-05, "loss": 1.698, "step": 2306 }, { "epoch": 0.2396385166718604, "grad_norm": 0.38787323236465454, "learning_rate": 8.648727758639467e-05, "loss": 1.8998, "step": 2307 }, { "epoch": 0.23974239119144075, "grad_norm": 0.38866347074508667, "learning_rate": 8.647611969679816e-05, "loss": 1.7095, "step": 2308 }, { "epoch": 0.23984626571102108, "grad_norm": 0.39878830313682556, "learning_rate": 8.646495792277943e-05, "loss": 1.9387, "step": 2309 }, { "epoch": 0.23995014023060143, "grad_norm": 0.3923484981060028, "learning_rate": 8.645379226552712e-05, "loss": 1.8302, "step": 2310 }, { "epoch": 0.2400540147501818, "grad_norm": 0.3727096617221832, "learning_rate": 8.644262272623029e-05, "loss": 1.6717, "step": 2311 }, { "epoch": 0.24015788926976211, "grad_norm": 0.3535787761211395, "learning_rate": 8.64314493060784e-05, "loss": 1.6446, "step": 2312 }, { "epoch": 0.24026176378934247, "grad_norm": 0.3555033504962921, "learning_rate": 8.642027200626135e-05, "loss": 1.7145, "step": 2313 }, { "epoch": 0.24036563830892282, "grad_norm": 0.3748113512992859, "learning_rate": 8.640909082796939e-05, "loss": 1.7144, "step": 2314 }, { "epoch": 0.24046951282850318, "grad_norm": 0.37921851873397827, "learning_rate": 8.639790577239328e-05, "loss": 1.8516, "step": 2315 }, { "epoch": 0.2405733873480835, "grad_norm": 0.36275529861450195, "learning_rate": 8.638671684072412e-05, "loss": 1.6144, "step": 2316 }, { "epoch": 0.24067726186766386, "grad_norm": 0.35582250356674194, "learning_rate": 8.637552403415343e-05, "loss": 1.7601, "step": 2317 }, { "epoch": 0.24078113638724422, "grad_norm": 0.3954647183418274, "learning_rate": 8.636432735387319e-05, "loss": 1.798, "step": 2318 }, { "epoch": 0.24088501090682454, "grad_norm": 0.36659133434295654, "learning_rate": 8.635312680107572e-05, "loss": 1.551, "step": 2319 }, { "epoch": 0.2409888854264049, "grad_norm": 0.35685068368911743, "learning_rate": 8.634192237695382e-05, "loss": 1.6926, "step": 2320 }, { "epoch": 0.24109275994598525, "grad_norm": 0.35937169194221497, "learning_rate": 8.633071408270065e-05, "loss": 1.604, "step": 2321 }, { "epoch": 0.2411966344655656, "grad_norm": 0.44768527150154114, "learning_rate": 8.631950191950983e-05, "loss": 2.0036, "step": 2322 }, { "epoch": 0.24130050898514593, "grad_norm": 0.37177640199661255, "learning_rate": 8.630828588857537e-05, "loss": 1.7145, "step": 2323 }, { "epoch": 0.2414043835047263, "grad_norm": 0.38756901025772095, "learning_rate": 8.629706599109169e-05, "loss": 1.7844, "step": 2324 }, { "epoch": 0.24150825802430664, "grad_norm": 0.37525761127471924, "learning_rate": 8.628584222825357e-05, "loss": 1.586, "step": 2325 }, { "epoch": 0.241612132543887, "grad_norm": 0.4448417127132416, "learning_rate": 8.627461460125632e-05, "loss": 1.6915, "step": 2326 }, { "epoch": 0.24171600706346733, "grad_norm": 0.39078620076179504, "learning_rate": 8.626338311129557e-05, "loss": 1.7567, "step": 2327 }, { "epoch": 0.24181988158304768, "grad_norm": 0.38780951499938965, "learning_rate": 8.625214775956737e-05, "loss": 1.6969, "step": 2328 }, { "epoch": 0.24192375610262803, "grad_norm": 0.3700442314147949, "learning_rate": 8.624090854726822e-05, "loss": 1.6198, "step": 2329 }, { "epoch": 0.24202763062220836, "grad_norm": 0.37854552268981934, "learning_rate": 8.622966547559499e-05, "loss": 1.7932, "step": 2330 }, { "epoch": 0.24213150514178872, "grad_norm": 0.49389779567718506, "learning_rate": 8.621841854574501e-05, "loss": 1.8561, "step": 2331 }, { "epoch": 0.24223537966136907, "grad_norm": 0.35576608777046204, "learning_rate": 8.620716775891595e-05, "loss": 1.5224, "step": 2332 }, { "epoch": 0.24233925418094943, "grad_norm": 0.37670719623565674, "learning_rate": 8.619591311630595e-05, "loss": 1.8392, "step": 2333 }, { "epoch": 0.24244312870052975, "grad_norm": 0.3652971386909485, "learning_rate": 8.618465461911355e-05, "loss": 1.7624, "step": 2334 }, { "epoch": 0.2425470032201101, "grad_norm": 0.3692745268344879, "learning_rate": 8.617339226853768e-05, "loss": 1.6901, "step": 2335 }, { "epoch": 0.24265087773969046, "grad_norm": 0.36297428607940674, "learning_rate": 8.61621260657777e-05, "loss": 1.8461, "step": 2336 }, { "epoch": 0.2427547522592708, "grad_norm": 0.37688395380973816, "learning_rate": 8.615085601203337e-05, "loss": 1.6562, "step": 2337 }, { "epoch": 0.24285862677885114, "grad_norm": 0.3952527642250061, "learning_rate": 8.613958210850485e-05, "loss": 1.7862, "step": 2338 }, { "epoch": 0.2429625012984315, "grad_norm": 0.35479936003685, "learning_rate": 8.612830435639275e-05, "loss": 1.9233, "step": 2339 }, { "epoch": 0.24306637581801185, "grad_norm": 0.40649887919425964, "learning_rate": 8.611702275689805e-05, "loss": 1.8637, "step": 2340 }, { "epoch": 0.24317025033759218, "grad_norm": 0.36132219433784485, "learning_rate": 8.610573731122214e-05, "loss": 1.7277, "step": 2341 }, { "epoch": 0.24327412485717254, "grad_norm": 0.35456719994544983, "learning_rate": 8.609444802056686e-05, "loss": 1.7141, "step": 2342 }, { "epoch": 0.2433779993767529, "grad_norm": 0.38762810826301575, "learning_rate": 8.608315488613439e-05, "loss": 1.7315, "step": 2343 }, { "epoch": 0.24348187389633322, "grad_norm": 0.3507632911205292, "learning_rate": 8.607185790912739e-05, "loss": 1.712, "step": 2344 }, { "epoch": 0.24358574841591357, "grad_norm": 0.39255252480506897, "learning_rate": 8.60605570907489e-05, "loss": 1.7046, "step": 2345 }, { "epoch": 0.24368962293549393, "grad_norm": 0.35092976689338684, "learning_rate": 8.604925243220235e-05, "loss": 1.8009, "step": 2346 }, { "epoch": 0.24379349745507428, "grad_norm": 0.37961629033088684, "learning_rate": 8.603794393469162e-05, "loss": 1.8602, "step": 2347 }, { "epoch": 0.2438973719746546, "grad_norm": 0.38758939504623413, "learning_rate": 8.602663159942098e-05, "loss": 1.8524, "step": 2348 }, { "epoch": 0.24400124649423496, "grad_norm": 0.3866504430770874, "learning_rate": 8.601531542759506e-05, "loss": 1.722, "step": 2349 }, { "epoch": 0.24410512101381532, "grad_norm": 0.3876989483833313, "learning_rate": 8.600399542041901e-05, "loss": 1.7846, "step": 2350 }, { "epoch": 0.24420899553339565, "grad_norm": 0.3649909198284149, "learning_rate": 8.599267157909827e-05, "loss": 1.7255, "step": 2351 }, { "epoch": 0.244312870052976, "grad_norm": 0.3867836892604828, "learning_rate": 8.598134390483879e-05, "loss": 1.8129, "step": 2352 }, { "epoch": 0.24441674457255635, "grad_norm": 0.40322503447532654, "learning_rate": 8.59700123988468e-05, "loss": 2.0753, "step": 2353 }, { "epoch": 0.2445206190921367, "grad_norm": 0.37695807218551636, "learning_rate": 8.595867706232911e-05, "loss": 1.8298, "step": 2354 }, { "epoch": 0.24462449361171704, "grad_norm": 0.3737477958202362, "learning_rate": 8.594733789649279e-05, "loss": 1.7315, "step": 2355 }, { "epoch": 0.2447283681312974, "grad_norm": 0.3679026663303375, "learning_rate": 8.593599490254538e-05, "loss": 1.6989, "step": 2356 }, { "epoch": 0.24483224265087775, "grad_norm": 0.3665264844894409, "learning_rate": 8.592464808169482e-05, "loss": 1.7326, "step": 2357 }, { "epoch": 0.24493611717045807, "grad_norm": 0.4071952700614929, "learning_rate": 8.591329743514947e-05, "loss": 1.774, "step": 2358 }, { "epoch": 0.24503999169003843, "grad_norm": 0.4158788025379181, "learning_rate": 8.590194296411806e-05, "loss": 2.0193, "step": 2359 }, { "epoch": 0.24514386620961878, "grad_norm": 0.3840784430503845, "learning_rate": 8.589058466980979e-05, "loss": 1.6699, "step": 2360 }, { "epoch": 0.24524774072919914, "grad_norm": 0.36834099888801575, "learning_rate": 8.587922255343421e-05, "loss": 1.6759, "step": 2361 }, { "epoch": 0.24535161524877946, "grad_norm": 0.35662734508514404, "learning_rate": 8.58678566162013e-05, "loss": 1.4792, "step": 2362 }, { "epoch": 0.24545548976835982, "grad_norm": 0.369302362203598, "learning_rate": 8.585648685932141e-05, "loss": 1.7362, "step": 2363 }, { "epoch": 0.24555936428794017, "grad_norm": 0.38440340757369995, "learning_rate": 8.58451132840054e-05, "loss": 1.8942, "step": 2364 }, { "epoch": 0.24566323880752053, "grad_norm": 0.3850240707397461, "learning_rate": 8.58337358914644e-05, "loss": 1.8449, "step": 2365 }, { "epoch": 0.24576711332710086, "grad_norm": 0.393264502286911, "learning_rate": 8.582235468291007e-05, "loss": 1.88, "step": 2366 }, { "epoch": 0.2458709878466812, "grad_norm": 0.40909549593925476, "learning_rate": 8.581096965955436e-05, "loss": 1.8228, "step": 2367 }, { "epoch": 0.24597486236626157, "grad_norm": 0.36390751600265503, "learning_rate": 8.579958082260973e-05, "loss": 1.7457, "step": 2368 }, { "epoch": 0.2460787368858419, "grad_norm": 0.3853476047515869, "learning_rate": 8.5788188173289e-05, "loss": 1.8158, "step": 2369 }, { "epoch": 0.24618261140542225, "grad_norm": 0.37678855657577515, "learning_rate": 8.577679171280537e-05, "loss": 1.8368, "step": 2370 }, { "epoch": 0.2462864859250026, "grad_norm": 0.3758280277252197, "learning_rate": 8.57653914423725e-05, "loss": 1.6665, "step": 2371 }, { "epoch": 0.24639036044458296, "grad_norm": 0.38738977909088135, "learning_rate": 8.575398736320442e-05, "loss": 1.8557, "step": 2372 }, { "epoch": 0.24649423496416328, "grad_norm": 0.35604584217071533, "learning_rate": 8.574257947651558e-05, "loss": 1.6039, "step": 2373 }, { "epoch": 0.24659810948374364, "grad_norm": 0.3709351420402527, "learning_rate": 8.573116778352084e-05, "loss": 1.85, "step": 2374 }, { "epoch": 0.246701984003324, "grad_norm": 0.37650248408317566, "learning_rate": 8.571975228543543e-05, "loss": 1.8249, "step": 2375 }, { "epoch": 0.24680585852290432, "grad_norm": 0.3389437794685364, "learning_rate": 8.570833298347502e-05, "loss": 1.7355, "step": 2376 }, { "epoch": 0.24690973304248467, "grad_norm": 0.3376055061817169, "learning_rate": 8.56969098788557e-05, "loss": 1.5235, "step": 2377 }, { "epoch": 0.24701360756206503, "grad_norm": 0.3974694013595581, "learning_rate": 8.568548297279392e-05, "loss": 1.8553, "step": 2378 }, { "epoch": 0.24711748208164538, "grad_norm": 0.35596492886543274, "learning_rate": 8.567405226650656e-05, "loss": 1.4544, "step": 2379 }, { "epoch": 0.2472213566012257, "grad_norm": 0.37058472633361816, "learning_rate": 8.56626177612109e-05, "loss": 1.7564, "step": 2380 }, { "epoch": 0.24732523112080607, "grad_norm": 0.3887064456939697, "learning_rate": 8.565117945812463e-05, "loss": 1.8172, "step": 2381 }, { "epoch": 0.24742910564038642, "grad_norm": 0.3546907901763916, "learning_rate": 8.563973735846583e-05, "loss": 1.6763, "step": 2382 }, { "epoch": 0.24753298015996675, "grad_norm": 0.38635915517807007, "learning_rate": 8.562829146345301e-05, "loss": 1.7017, "step": 2383 }, { "epoch": 0.2476368546795471, "grad_norm": 0.3599991500377655, "learning_rate": 8.561684177430507e-05, "loss": 1.6087, "step": 2384 }, { "epoch": 0.24774072919912746, "grad_norm": 0.3766249120235443, "learning_rate": 8.560538829224129e-05, "loss": 1.6795, "step": 2385 }, { "epoch": 0.2478446037187078, "grad_norm": 0.38669300079345703, "learning_rate": 8.559393101848139e-05, "loss": 1.7984, "step": 2386 }, { "epoch": 0.24794847823828814, "grad_norm": 0.35774049162864685, "learning_rate": 8.558246995424548e-05, "loss": 1.5587, "step": 2387 }, { "epoch": 0.2480523527578685, "grad_norm": 0.3456031382083893, "learning_rate": 8.557100510075406e-05, "loss": 1.6269, "step": 2388 }, { "epoch": 0.24815622727744885, "grad_norm": 0.42325082421302795, "learning_rate": 8.555953645922809e-05, "loss": 2.0851, "step": 2389 }, { "epoch": 0.24826010179702918, "grad_norm": 0.32227158546447754, "learning_rate": 8.554806403088884e-05, "loss": 1.5321, "step": 2390 }, { "epoch": 0.24836397631660953, "grad_norm": 0.35737884044647217, "learning_rate": 8.553658781695807e-05, "loss": 1.7002, "step": 2391 }, { "epoch": 0.24846785083618989, "grad_norm": 0.3779980540275574, "learning_rate": 8.552510781865788e-05, "loss": 1.74, "step": 2392 }, { "epoch": 0.24857172535577024, "grad_norm": 0.3488198518753052, "learning_rate": 8.551362403721084e-05, "loss": 1.7332, "step": 2393 }, { "epoch": 0.24867559987535057, "grad_norm": 0.3871583342552185, "learning_rate": 8.550213647383982e-05, "loss": 1.8173, "step": 2394 }, { "epoch": 0.24877947439493092, "grad_norm": 0.3559076488018036, "learning_rate": 8.549064512976822e-05, "loss": 1.824, "step": 2395 }, { "epoch": 0.24888334891451128, "grad_norm": 0.36714842915534973, "learning_rate": 8.547915000621974e-05, "loss": 1.7691, "step": 2396 }, { "epoch": 0.2489872234340916, "grad_norm": 0.35242655873298645, "learning_rate": 8.546765110441855e-05, "loss": 1.7228, "step": 2397 }, { "epoch": 0.24909109795367196, "grad_norm": 0.3446803689002991, "learning_rate": 8.545614842558915e-05, "loss": 1.613, "step": 2398 }, { "epoch": 0.2491949724732523, "grad_norm": 0.36509135365486145, "learning_rate": 8.544464197095651e-05, "loss": 1.6443, "step": 2399 }, { "epoch": 0.24929884699283267, "grad_norm": 0.3790837228298187, "learning_rate": 8.5433131741746e-05, "loss": 1.7364, "step": 2400 }, { "epoch": 0.249402721512413, "grad_norm": 0.4487643837928772, "learning_rate": 8.542161773918334e-05, "loss": 1.8304, "step": 2401 }, { "epoch": 0.24950659603199335, "grad_norm": 0.3896706700325012, "learning_rate": 8.54100999644947e-05, "loss": 1.7448, "step": 2402 }, { "epoch": 0.2496104705515737, "grad_norm": 0.382664293050766, "learning_rate": 8.53985784189066e-05, "loss": 1.7384, "step": 2403 }, { "epoch": 0.24971434507115406, "grad_norm": 0.3803345561027527, "learning_rate": 8.538705310364603e-05, "loss": 1.8425, "step": 2404 }, { "epoch": 0.24981821959073439, "grad_norm": 0.37627753615379333, "learning_rate": 8.537552401994034e-05, "loss": 1.7596, "step": 2405 }, { "epoch": 0.24992209411031474, "grad_norm": 0.3746028542518616, "learning_rate": 8.536399116901728e-05, "loss": 1.8014, "step": 2406 }, { "epoch": 0.25002596862989507, "grad_norm": 0.3502449095249176, "learning_rate": 8.535245455210501e-05, "loss": 1.5849, "step": 2407 }, { "epoch": 0.25012984314947545, "grad_norm": 0.40077903866767883, "learning_rate": 8.534091417043208e-05, "loss": 1.9111, "step": 2408 }, { "epoch": 0.2502337176690558, "grad_norm": 0.3578510284423828, "learning_rate": 8.532937002522747e-05, "loss": 1.6662, "step": 2409 }, { "epoch": 0.2503375921886361, "grad_norm": 0.38965925574302673, "learning_rate": 8.531782211772052e-05, "loss": 1.5775, "step": 2410 }, { "epoch": 0.2504414667082165, "grad_norm": 0.35679998993873596, "learning_rate": 8.530627044914101e-05, "loss": 1.796, "step": 2411 }, { "epoch": 0.2505453412277968, "grad_norm": 0.3703193962574005, "learning_rate": 8.52947150207191e-05, "loss": 1.8143, "step": 2412 }, { "epoch": 0.25064921574737714, "grad_norm": 0.3648841977119446, "learning_rate": 8.528315583368536e-05, "loss": 1.7078, "step": 2413 }, { "epoch": 0.2507530902669575, "grad_norm": 0.38748499751091003, "learning_rate": 8.527159288927074e-05, "loss": 1.876, "step": 2414 }, { "epoch": 0.25085696478653785, "grad_norm": 0.37417733669281006, "learning_rate": 8.526002618870659e-05, "loss": 1.646, "step": 2415 }, { "epoch": 0.25096083930611823, "grad_norm": 0.3735947012901306, "learning_rate": 8.52484557332247e-05, "loss": 1.8018, "step": 2416 }, { "epoch": 0.25106471382569856, "grad_norm": 0.36857619881629944, "learning_rate": 8.523688152405722e-05, "loss": 1.6909, "step": 2417 }, { "epoch": 0.2511685883452789, "grad_norm": 0.41932663321495056, "learning_rate": 8.522530356243671e-05, "loss": 1.6112, "step": 2418 }, { "epoch": 0.25127246286485927, "grad_norm": 0.4023738503456116, "learning_rate": 8.521372184959615e-05, "loss": 1.9221, "step": 2419 }, { "epoch": 0.2513763373844396, "grad_norm": 0.35625362396240234, "learning_rate": 8.520213638676889e-05, "loss": 1.6999, "step": 2420 }, { "epoch": 0.2514802119040199, "grad_norm": 0.36791056394577026, "learning_rate": 8.519054717518868e-05, "loss": 1.8009, "step": 2421 }, { "epoch": 0.2515840864236003, "grad_norm": 0.3679400086402893, "learning_rate": 8.517895421608972e-05, "loss": 1.6935, "step": 2422 }, { "epoch": 0.25168796094318063, "grad_norm": 0.36350300908088684, "learning_rate": 8.516735751070652e-05, "loss": 1.7611, "step": 2423 }, { "epoch": 0.25179183546276096, "grad_norm": 0.36680689454078674, "learning_rate": 8.515575706027406e-05, "loss": 1.7451, "step": 2424 }, { "epoch": 0.25189570998234134, "grad_norm": 0.3586255609989166, "learning_rate": 8.514415286602771e-05, "loss": 1.6652, "step": 2425 }, { "epoch": 0.25199958450192167, "grad_norm": 0.380088210105896, "learning_rate": 8.51325449292032e-05, "loss": 1.7082, "step": 2426 }, { "epoch": 0.25210345902150205, "grad_norm": 0.3451653718948364, "learning_rate": 8.512093325103671e-05, "loss": 1.6654, "step": 2427 }, { "epoch": 0.2522073335410824, "grad_norm": 0.42833617329597473, "learning_rate": 8.510931783276477e-05, "loss": 1.9137, "step": 2428 }, { "epoch": 0.2523112080606627, "grad_norm": 0.42482277750968933, "learning_rate": 8.509769867562436e-05, "loss": 2.0342, "step": 2429 }, { "epoch": 0.2524150825802431, "grad_norm": 0.3890083134174347, "learning_rate": 8.508607578085282e-05, "loss": 1.7393, "step": 2430 }, { "epoch": 0.2525189570998234, "grad_norm": 0.37014102935791016, "learning_rate": 8.507444914968788e-05, "loss": 1.8187, "step": 2431 }, { "epoch": 0.25262283161940374, "grad_norm": 0.370259553194046, "learning_rate": 8.506281878336768e-05, "loss": 1.733, "step": 2432 }, { "epoch": 0.2527267061389841, "grad_norm": 0.37391120195388794, "learning_rate": 8.50511846831308e-05, "loss": 1.691, "step": 2433 }, { "epoch": 0.25283058065856445, "grad_norm": 0.3874087631702423, "learning_rate": 8.503954685021616e-05, "loss": 1.7962, "step": 2434 }, { "epoch": 0.2529344551781448, "grad_norm": 0.3736341893672943, "learning_rate": 8.50279052858631e-05, "loss": 1.8226, "step": 2435 }, { "epoch": 0.25303832969772516, "grad_norm": 0.37461403012275696, "learning_rate": 8.501625999131134e-05, "loss": 1.8345, "step": 2436 }, { "epoch": 0.2531422042173055, "grad_norm": 0.39281967282295227, "learning_rate": 8.500461096780105e-05, "loss": 1.6779, "step": 2437 }, { "epoch": 0.2532460787368858, "grad_norm": 0.4174926280975342, "learning_rate": 8.499295821657273e-05, "loss": 1.8124, "step": 2438 }, { "epoch": 0.2533499532564662, "grad_norm": 0.4330326318740845, "learning_rate": 8.498130173886731e-05, "loss": 1.9252, "step": 2439 }, { "epoch": 0.2534538277760465, "grad_norm": 0.357661634683609, "learning_rate": 8.496964153592613e-05, "loss": 1.7423, "step": 2440 }, { "epoch": 0.2535577022956269, "grad_norm": 0.3977978527545929, "learning_rate": 8.495797760899088e-05, "loss": 1.4438, "step": 2441 }, { "epoch": 0.25366157681520723, "grad_norm": 0.4523005783557892, "learning_rate": 8.494630995930372e-05, "loss": 1.5389, "step": 2442 }, { "epoch": 0.25376545133478756, "grad_norm": 0.3693365156650543, "learning_rate": 8.493463858810713e-05, "loss": 1.8215, "step": 2443 }, { "epoch": 0.25386932585436794, "grad_norm": 0.40159082412719727, "learning_rate": 8.492296349664401e-05, "loss": 1.8385, "step": 2444 }, { "epoch": 0.25397320037394827, "grad_norm": 0.3591073751449585, "learning_rate": 8.491128468615772e-05, "loss": 1.642, "step": 2445 }, { "epoch": 0.2540770748935286, "grad_norm": 0.3740187883377075, "learning_rate": 8.489960215789192e-05, "loss": 1.8311, "step": 2446 }, { "epoch": 0.254180949413109, "grad_norm": 0.38675010204315186, "learning_rate": 8.488791591309072e-05, "loss": 1.7373, "step": 2447 }, { "epoch": 0.2542848239326893, "grad_norm": 0.34369611740112305, "learning_rate": 8.48762259529986e-05, "loss": 1.7054, "step": 2448 }, { "epoch": 0.25438869845226963, "grad_norm": 0.3643989861011505, "learning_rate": 8.486453227886049e-05, "loss": 1.6371, "step": 2449 }, { "epoch": 0.25449257297185, "grad_norm": 0.38395020365715027, "learning_rate": 8.485283489192162e-05, "loss": 1.5869, "step": 2450 }, { "epoch": 0.25459644749143034, "grad_norm": 0.365596741437912, "learning_rate": 8.48411337934277e-05, "loss": 1.5886, "step": 2451 }, { "epoch": 0.25470032201101067, "grad_norm": 0.3677592873573303, "learning_rate": 8.48294289846248e-05, "loss": 1.8044, "step": 2452 }, { "epoch": 0.25480419653059105, "grad_norm": 0.39587604999542236, "learning_rate": 8.48177204667594e-05, "loss": 1.8413, "step": 2453 }, { "epoch": 0.2549080710501714, "grad_norm": 0.3834674656391144, "learning_rate": 8.480600824107837e-05, "loss": 1.6651, "step": 2454 }, { "epoch": 0.25501194556975176, "grad_norm": 0.38077157735824585, "learning_rate": 8.479429230882893e-05, "loss": 1.558, "step": 2455 }, { "epoch": 0.2551158200893321, "grad_norm": 0.4156430959701538, "learning_rate": 8.478257267125878e-05, "loss": 1.7901, "step": 2456 }, { "epoch": 0.2552196946089124, "grad_norm": 0.3814376890659332, "learning_rate": 8.477084932961596e-05, "loss": 1.6559, "step": 2457 }, { "epoch": 0.2553235691284928, "grad_norm": 0.3800637722015381, "learning_rate": 8.475912228514889e-05, "loss": 1.7842, "step": 2458 }, { "epoch": 0.2554274436480731, "grad_norm": 0.36574795842170715, "learning_rate": 8.474739153910646e-05, "loss": 1.6281, "step": 2459 }, { "epoch": 0.25553131816765345, "grad_norm": 0.3729517161846161, "learning_rate": 8.473565709273785e-05, "loss": 1.8031, "step": 2460 }, { "epoch": 0.25563519268723384, "grad_norm": 0.4632837176322937, "learning_rate": 8.47239189472927e-05, "loss": 1.9434, "step": 2461 }, { "epoch": 0.25573906720681416, "grad_norm": 0.3544566333293915, "learning_rate": 8.471217710402106e-05, "loss": 1.8198, "step": 2462 }, { "epoch": 0.2558429417263945, "grad_norm": 0.3790525794029236, "learning_rate": 8.470043156417333e-05, "loss": 1.8017, "step": 2463 }, { "epoch": 0.2559468162459749, "grad_norm": 0.3937493562698364, "learning_rate": 8.46886823290003e-05, "loss": 1.7512, "step": 2464 }, { "epoch": 0.2560506907655552, "grad_norm": 0.4141665995121002, "learning_rate": 8.467692939975316e-05, "loss": 1.9163, "step": 2465 }, { "epoch": 0.2561545652851356, "grad_norm": 0.4007442891597748, "learning_rate": 8.466517277768355e-05, "loss": 1.9303, "step": 2466 }, { "epoch": 0.2562584398047159, "grad_norm": 0.36880823969841003, "learning_rate": 8.465341246404345e-05, "loss": 1.6991, "step": 2467 }, { "epoch": 0.25636231432429624, "grad_norm": 0.3562661409378052, "learning_rate": 8.464164846008522e-05, "loss": 1.7796, "step": 2468 }, { "epoch": 0.2564661888438766, "grad_norm": 0.3477732241153717, "learning_rate": 8.462988076706164e-05, "loss": 1.7226, "step": 2469 }, { "epoch": 0.25657006336345695, "grad_norm": 0.35839709639549255, "learning_rate": 8.461810938622589e-05, "loss": 1.7105, "step": 2470 }, { "epoch": 0.2566739378830373, "grad_norm": 0.39441442489624023, "learning_rate": 8.460633431883151e-05, "loss": 1.6964, "step": 2471 }, { "epoch": 0.25677781240261766, "grad_norm": 0.41418394446372986, "learning_rate": 8.459455556613247e-05, "loss": 1.7889, "step": 2472 }, { "epoch": 0.256881686922198, "grad_norm": 0.3982704281806946, "learning_rate": 8.458277312938312e-05, "loss": 2.066, "step": 2473 }, { "epoch": 0.2569855614417783, "grad_norm": 0.38881179690361023, "learning_rate": 8.457098700983818e-05, "loss": 1.8605, "step": 2474 }, { "epoch": 0.2570894359613587, "grad_norm": 0.39533254504203796, "learning_rate": 8.455919720875279e-05, "loss": 1.8728, "step": 2475 }, { "epoch": 0.257193310480939, "grad_norm": 0.4153515100479126, "learning_rate": 8.454740372738246e-05, "loss": 1.6178, "step": 2476 }, { "epoch": 0.25729718500051935, "grad_norm": 0.35797804594039917, "learning_rate": 8.453560656698311e-05, "loss": 1.5962, "step": 2477 }, { "epoch": 0.25740105952009973, "grad_norm": 0.3884628415107727, "learning_rate": 8.452380572881107e-05, "loss": 1.6725, "step": 2478 }, { "epoch": 0.25750493403968006, "grad_norm": 0.4121970534324646, "learning_rate": 8.451200121412299e-05, "loss": 1.731, "step": 2479 }, { "epoch": 0.25760880855926044, "grad_norm": 0.3405035734176636, "learning_rate": 8.4500193024176e-05, "loss": 1.5419, "step": 2480 }, { "epoch": 0.25771268307884077, "grad_norm": 0.38876980543136597, "learning_rate": 8.448838116022758e-05, "loss": 1.6488, "step": 2481 }, { "epoch": 0.2578165575984211, "grad_norm": 0.38169679045677185, "learning_rate": 8.447656562353557e-05, "loss": 1.8325, "step": 2482 }, { "epoch": 0.2579204321180015, "grad_norm": 0.36781740188598633, "learning_rate": 8.446474641535824e-05, "loss": 1.5927, "step": 2483 }, { "epoch": 0.2580243066375818, "grad_norm": 0.39887794852256775, "learning_rate": 8.445292353695427e-05, "loss": 1.7471, "step": 2484 }, { "epoch": 0.25812818115716213, "grad_norm": 0.381283164024353, "learning_rate": 8.444109698958267e-05, "loss": 1.7668, "step": 2485 }, { "epoch": 0.2582320556767425, "grad_norm": 0.38031065464019775, "learning_rate": 8.44292667745029e-05, "loss": 1.7104, "step": 2486 }, { "epoch": 0.25833593019632284, "grad_norm": 0.36943575739860535, "learning_rate": 8.441743289297476e-05, "loss": 1.8359, "step": 2487 }, { "epoch": 0.25843980471590317, "grad_norm": 0.3790445625782013, "learning_rate": 8.440559534625851e-05, "loss": 1.6393, "step": 2488 }, { "epoch": 0.25854367923548355, "grad_norm": 0.3714222013950348, "learning_rate": 8.439375413561472e-05, "loss": 1.6612, "step": 2489 }, { "epoch": 0.2586475537550639, "grad_norm": 0.38887256383895874, "learning_rate": 8.43819092623044e-05, "loss": 1.7044, "step": 2490 }, { "epoch": 0.25875142827464426, "grad_norm": 0.3530022203922272, "learning_rate": 8.437006072758891e-05, "loss": 1.608, "step": 2491 }, { "epoch": 0.2588553027942246, "grad_norm": 0.38229474425315857, "learning_rate": 8.435820853273007e-05, "loss": 1.7361, "step": 2492 }, { "epoch": 0.2589591773138049, "grad_norm": 0.35574570298194885, "learning_rate": 8.434635267899002e-05, "loss": 1.799, "step": 2493 }, { "epoch": 0.2590630518333853, "grad_norm": 0.40232568979263306, "learning_rate": 8.433449316763133e-05, "loss": 1.8073, "step": 2494 }, { "epoch": 0.2591669263529656, "grad_norm": 0.42104196548461914, "learning_rate": 8.432262999991694e-05, "loss": 2.0099, "step": 2495 }, { "epoch": 0.25927080087254595, "grad_norm": 0.37814000248908997, "learning_rate": 8.431076317711017e-05, "loss": 1.6813, "step": 2496 }, { "epoch": 0.25937467539212633, "grad_norm": 0.38011592626571655, "learning_rate": 8.429889270047475e-05, "loss": 1.6921, "step": 2497 }, { "epoch": 0.25947854991170666, "grad_norm": 0.3853015601634979, "learning_rate": 8.428701857127481e-05, "loss": 1.8239, "step": 2498 }, { "epoch": 0.259582424431287, "grad_norm": 0.351633757352829, "learning_rate": 8.427514079077485e-05, "loss": 1.6058, "step": 2499 }, { "epoch": 0.25968629895086737, "grad_norm": 0.4057175815105438, "learning_rate": 8.426325936023974e-05, "loss": 1.8754, "step": 2500 }, { "epoch": 0.2597901734704477, "grad_norm": 0.3578610420227051, "learning_rate": 8.425137428093477e-05, "loss": 1.7336, "step": 2501 }, { "epoch": 0.259894047990028, "grad_norm": 0.3505629301071167, "learning_rate": 8.423948555412562e-05, "loss": 1.7356, "step": 2502 }, { "epoch": 0.2599979225096084, "grad_norm": 0.3745979368686676, "learning_rate": 8.422759318107832e-05, "loss": 1.8169, "step": 2503 }, { "epoch": 0.26010179702918873, "grad_norm": 0.37145760655403137, "learning_rate": 8.421569716305934e-05, "loss": 1.904, "step": 2504 }, { "epoch": 0.2602056715487691, "grad_norm": 0.3965916931629181, "learning_rate": 8.42037975013355e-05, "loss": 1.809, "step": 2505 }, { "epoch": 0.26030954606834944, "grad_norm": 0.39221835136413574, "learning_rate": 8.419189419717404e-05, "loss": 1.7811, "step": 2506 }, { "epoch": 0.26041342058792977, "grad_norm": 0.3546196222305298, "learning_rate": 8.417998725184254e-05, "loss": 1.7371, "step": 2507 }, { "epoch": 0.26051729510751015, "grad_norm": 0.3630322217941284, "learning_rate": 8.416807666660901e-05, "loss": 1.719, "step": 2508 }, { "epoch": 0.2606211696270905, "grad_norm": 0.34440553188323975, "learning_rate": 8.415616244274185e-05, "loss": 1.6887, "step": 2509 }, { "epoch": 0.2607250441466708, "grad_norm": 0.39099493622779846, "learning_rate": 8.414424458150983e-05, "loss": 1.7395, "step": 2510 }, { "epoch": 0.2608289186662512, "grad_norm": 0.35405251383781433, "learning_rate": 8.413232308418206e-05, "loss": 1.6363, "step": 2511 }, { "epoch": 0.2609327931858315, "grad_norm": 0.41946062445640564, "learning_rate": 8.412039795202816e-05, "loss": 1.91, "step": 2512 }, { "epoch": 0.26103666770541184, "grad_norm": 0.4249524474143982, "learning_rate": 8.4108469186318e-05, "loss": 1.7341, "step": 2513 }, { "epoch": 0.2611405422249922, "grad_norm": 0.3654036223888397, "learning_rate": 8.409653678832194e-05, "loss": 1.6744, "step": 2514 }, { "epoch": 0.26124441674457255, "grad_norm": 0.35637834668159485, "learning_rate": 8.408460075931068e-05, "loss": 1.6597, "step": 2515 }, { "epoch": 0.2613482912641529, "grad_norm": 0.39124104380607605, "learning_rate": 8.407266110055531e-05, "loss": 1.6995, "step": 2516 }, { "epoch": 0.26145216578373326, "grad_norm": 0.3535708487033844, "learning_rate": 8.40607178133273e-05, "loss": 1.657, "step": 2517 }, { "epoch": 0.2615560403033136, "grad_norm": 0.4048800766468048, "learning_rate": 8.404877089889853e-05, "loss": 1.7071, "step": 2518 }, { "epoch": 0.26165991482289397, "grad_norm": 0.3928733170032501, "learning_rate": 8.403682035854125e-05, "loss": 1.5713, "step": 2519 }, { "epoch": 0.2617637893424743, "grad_norm": 0.3696751892566681, "learning_rate": 8.40248661935281e-05, "loss": 1.6968, "step": 2520 }, { "epoch": 0.2618676638620546, "grad_norm": 0.35033291578292847, "learning_rate": 8.40129084051321e-05, "loss": 1.5176, "step": 2521 }, { "epoch": 0.261971538381635, "grad_norm": 0.38887977600097656, "learning_rate": 8.400094699462667e-05, "loss": 1.84, "step": 2522 }, { "epoch": 0.26207541290121533, "grad_norm": 0.382174015045166, "learning_rate": 8.398898196328561e-05, "loss": 1.818, "step": 2523 }, { "epoch": 0.26217928742079566, "grad_norm": 0.3753756880760193, "learning_rate": 8.39770133123831e-05, "loss": 1.764, "step": 2524 }, { "epoch": 0.26228316194037604, "grad_norm": 0.37997132539749146, "learning_rate": 8.396504104319366e-05, "loss": 1.8256, "step": 2525 }, { "epoch": 0.26238703645995637, "grad_norm": 0.36985552310943604, "learning_rate": 8.395306515699234e-05, "loss": 1.5408, "step": 2526 }, { "epoch": 0.2624909109795367, "grad_norm": 0.36994847655296326, "learning_rate": 8.394108565505441e-05, "loss": 1.6385, "step": 2527 }, { "epoch": 0.2625947854991171, "grad_norm": 0.3789272904396057, "learning_rate": 8.392910253865557e-05, "loss": 1.6302, "step": 2528 }, { "epoch": 0.2626986600186974, "grad_norm": 0.39001405239105225, "learning_rate": 8.391711580907202e-05, "loss": 1.8292, "step": 2529 }, { "epoch": 0.2628025345382778, "grad_norm": 0.3820188343524933, "learning_rate": 8.390512546758016e-05, "loss": 1.8284, "step": 2530 }, { "epoch": 0.2629064090578581, "grad_norm": 0.43149465322494507, "learning_rate": 8.389313151545694e-05, "loss": 1.8026, "step": 2531 }, { "epoch": 0.26301028357743844, "grad_norm": 0.4724147319793701, "learning_rate": 8.388113395397957e-05, "loss": 1.7194, "step": 2532 }, { "epoch": 0.2631141580970188, "grad_norm": 0.3973971903324127, "learning_rate": 8.386913278442571e-05, "loss": 1.703, "step": 2533 }, { "epoch": 0.26321803261659915, "grad_norm": 0.38330528140068054, "learning_rate": 8.385712800807343e-05, "loss": 1.8659, "step": 2534 }, { "epoch": 0.2633219071361795, "grad_norm": 0.37077972292900085, "learning_rate": 8.38451196262011e-05, "loss": 1.8752, "step": 2535 }, { "epoch": 0.26342578165575986, "grad_norm": 0.36941617727279663, "learning_rate": 8.383310764008751e-05, "loss": 1.765, "step": 2536 }, { "epoch": 0.2635296561753402, "grad_norm": 0.3706973195075989, "learning_rate": 8.382109205101188e-05, "loss": 1.6668, "step": 2537 }, { "epoch": 0.2636335306949205, "grad_norm": 0.37633809447288513, "learning_rate": 8.380907286025377e-05, "loss": 1.6682, "step": 2538 }, { "epoch": 0.2637374052145009, "grad_norm": 0.39212766289711, "learning_rate": 8.37970500690931e-05, "loss": 1.6501, "step": 2539 }, { "epoch": 0.2638412797340812, "grad_norm": 0.3803769052028656, "learning_rate": 8.378502367881025e-05, "loss": 1.6854, "step": 2540 }, { "epoch": 0.26394515425366155, "grad_norm": 0.40050208568573, "learning_rate": 8.37729936906859e-05, "loss": 1.8676, "step": 2541 }, { "epoch": 0.26404902877324193, "grad_norm": 0.39779967069625854, "learning_rate": 8.376096010600116e-05, "loss": 1.7777, "step": 2542 }, { "epoch": 0.26415290329282226, "grad_norm": 0.4135241210460663, "learning_rate": 8.374892292603751e-05, "loss": 1.7512, "step": 2543 }, { "epoch": 0.26425677781240264, "grad_norm": 0.36392831802368164, "learning_rate": 8.373688215207682e-05, "loss": 1.7389, "step": 2544 }, { "epoch": 0.26436065233198297, "grad_norm": 0.40198683738708496, "learning_rate": 8.372483778540134e-05, "loss": 1.9784, "step": 2545 }, { "epoch": 0.2644645268515633, "grad_norm": 0.35998931527137756, "learning_rate": 8.371278982729371e-05, "loss": 1.8175, "step": 2546 }, { "epoch": 0.2645684013711437, "grad_norm": 0.4466514587402344, "learning_rate": 8.370073827903693e-05, "loss": 1.8014, "step": 2547 }, { "epoch": 0.264672275890724, "grad_norm": 0.39324405789375305, "learning_rate": 8.368868314191439e-05, "loss": 1.8389, "step": 2548 }, { "epoch": 0.26477615041030433, "grad_norm": 0.3979739248752594, "learning_rate": 8.367662441720989e-05, "loss": 1.7975, "step": 2549 }, { "epoch": 0.2648800249298847, "grad_norm": 0.3831605315208435, "learning_rate": 8.366456210620757e-05, "loss": 1.7531, "step": 2550 }, { "epoch": 0.26498389944946504, "grad_norm": 0.3608654737472534, "learning_rate": 8.365249621019197e-05, "loss": 1.6416, "step": 2551 }, { "epoch": 0.26508777396904537, "grad_norm": 0.37154313921928406, "learning_rate": 8.364042673044803e-05, "loss": 1.775, "step": 2552 }, { "epoch": 0.26519164848862575, "grad_norm": 0.36865541338920593, "learning_rate": 8.362835366826105e-05, "loss": 1.6302, "step": 2553 }, { "epoch": 0.2652955230082061, "grad_norm": 0.40405401587486267, "learning_rate": 8.361627702491673e-05, "loss": 1.8325, "step": 2554 }, { "epoch": 0.2653993975277864, "grad_norm": 0.3631226122379303, "learning_rate": 8.360419680170111e-05, "loss": 1.6433, "step": 2555 }, { "epoch": 0.2655032720473668, "grad_norm": 0.3895721435546875, "learning_rate": 8.359211299990064e-05, "loss": 1.5261, "step": 2556 }, { "epoch": 0.2656071465669471, "grad_norm": 0.34275680780410767, "learning_rate": 8.358002562080219e-05, "loss": 1.7036, "step": 2557 }, { "epoch": 0.2657110210865275, "grad_norm": 0.4039117991924286, "learning_rate": 8.356793466569293e-05, "loss": 1.801, "step": 2558 }, { "epoch": 0.2658148956061078, "grad_norm": 0.444367915391922, "learning_rate": 8.355584013586047e-05, "loss": 2.0989, "step": 2559 }, { "epoch": 0.26591877012568815, "grad_norm": 0.3847164809703827, "learning_rate": 8.354374203259278e-05, "loss": 1.8125, "step": 2560 }, { "epoch": 0.26602264464526854, "grad_norm": 0.36114174127578735, "learning_rate": 8.353164035717822e-05, "loss": 1.6731, "step": 2561 }, { "epoch": 0.26612651916484886, "grad_norm": 0.37462207674980164, "learning_rate": 8.351953511090551e-05, "loss": 1.7634, "step": 2562 }, { "epoch": 0.2662303936844292, "grad_norm": 0.38300392031669617, "learning_rate": 8.350742629506378e-05, "loss": 1.8874, "step": 2563 }, { "epoch": 0.26633426820400957, "grad_norm": 0.37371304631233215, "learning_rate": 8.349531391094251e-05, "loss": 1.7228, "step": 2564 }, { "epoch": 0.2664381427235899, "grad_norm": 0.33728665113449097, "learning_rate": 8.34831979598316e-05, "loss": 1.6236, "step": 2565 }, { "epoch": 0.2665420172431702, "grad_norm": 0.38045376539230347, "learning_rate": 8.347107844302129e-05, "loss": 1.8109, "step": 2566 }, { "epoch": 0.2666458917627506, "grad_norm": 0.36660969257354736, "learning_rate": 8.345895536180219e-05, "loss": 1.7463, "step": 2567 }, { "epoch": 0.26674976628233094, "grad_norm": 0.3986735939979553, "learning_rate": 8.344682871746534e-05, "loss": 1.8362, "step": 2568 }, { "epoch": 0.2668536408019113, "grad_norm": 0.41932711005210876, "learning_rate": 8.343469851130212e-05, "loss": 1.845, "step": 2569 }, { "epoch": 0.26695751532149165, "grad_norm": 0.40920954942703247, "learning_rate": 8.342256474460431e-05, "loss": 1.6763, "step": 2570 }, { "epoch": 0.26706138984107197, "grad_norm": 0.3709944784641266, "learning_rate": 8.341042741866408e-05, "loss": 1.8428, "step": 2571 }, { "epoch": 0.26716526436065235, "grad_norm": 0.3826795220375061, "learning_rate": 8.339828653477391e-05, "loss": 1.5346, "step": 2572 }, { "epoch": 0.2672691388802327, "grad_norm": 0.37877601385116577, "learning_rate": 8.338614209422677e-05, "loss": 1.7763, "step": 2573 }, { "epoch": 0.267373013399813, "grad_norm": 0.4136562645435333, "learning_rate": 8.33739940983159e-05, "loss": 1.9153, "step": 2574 }, { "epoch": 0.2674768879193934, "grad_norm": 0.3574436604976654, "learning_rate": 8.336184254833499e-05, "loss": 1.4252, "step": 2575 }, { "epoch": 0.2675807624389737, "grad_norm": 0.37612104415893555, "learning_rate": 8.33496874455781e-05, "loss": 1.8131, "step": 2576 }, { "epoch": 0.26768463695855405, "grad_norm": 0.4038298726081848, "learning_rate": 8.33375287913396e-05, "loss": 1.769, "step": 2577 }, { "epoch": 0.26778851147813443, "grad_norm": 0.3684050440788269, "learning_rate": 8.332536658691435e-05, "loss": 1.806, "step": 2578 }, { "epoch": 0.26789238599771475, "grad_norm": 0.39676815271377563, "learning_rate": 8.331320083359751e-05, "loss": 1.8308, "step": 2579 }, { "epoch": 0.2679962605172951, "grad_norm": 0.37601250410079956, "learning_rate": 8.330103153268462e-05, "loss": 1.6708, "step": 2580 }, { "epoch": 0.26810013503687546, "grad_norm": 0.35616087913513184, "learning_rate": 8.328885868547164e-05, "loss": 1.6567, "step": 2581 }, { "epoch": 0.2682040095564558, "grad_norm": 0.4200495183467865, "learning_rate": 8.327668229325487e-05, "loss": 1.9518, "step": 2582 }, { "epoch": 0.2683078840760362, "grad_norm": 0.4047677516937256, "learning_rate": 8.326450235733098e-05, "loss": 1.8429, "step": 2583 }, { "epoch": 0.2684117585956165, "grad_norm": 0.35730311274528503, "learning_rate": 8.32523188789971e-05, "loss": 1.5773, "step": 2584 }, { "epoch": 0.26851563311519683, "grad_norm": 0.38395121693611145, "learning_rate": 8.324013185955062e-05, "loss": 1.685, "step": 2585 }, { "epoch": 0.2686195076347772, "grad_norm": 0.3953067362308502, "learning_rate": 8.322794130028938e-05, "loss": 1.8974, "step": 2586 }, { "epoch": 0.26872338215435754, "grad_norm": 0.3572498559951782, "learning_rate": 8.321574720251158e-05, "loss": 1.6507, "step": 2587 }, { "epoch": 0.26882725667393786, "grad_norm": 0.38150545954704285, "learning_rate": 8.32035495675158e-05, "loss": 1.7104, "step": 2588 }, { "epoch": 0.26893113119351825, "grad_norm": 0.36428940296173096, "learning_rate": 8.3191348396601e-05, "loss": 1.6052, "step": 2589 }, { "epoch": 0.2690350057130986, "grad_norm": 0.4125150144100189, "learning_rate": 8.317914369106648e-05, "loss": 1.9357, "step": 2590 }, { "epoch": 0.2691388802326789, "grad_norm": 0.3636937439441681, "learning_rate": 8.316693545221196e-05, "loss": 1.7948, "step": 2591 }, { "epoch": 0.2692427547522593, "grad_norm": 0.40792933106422424, "learning_rate": 8.315472368133754e-05, "loss": 1.6696, "step": 2592 }, { "epoch": 0.2693466292718396, "grad_norm": 0.40981337428092957, "learning_rate": 8.314250837974364e-05, "loss": 1.8025, "step": 2593 }, { "epoch": 0.26945050379141994, "grad_norm": 0.3620801270008087, "learning_rate": 8.313028954873114e-05, "loss": 1.725, "step": 2594 }, { "epoch": 0.2695543783110003, "grad_norm": 0.35369589924812317, "learning_rate": 8.311806718960124e-05, "loss": 1.5303, "step": 2595 }, { "epoch": 0.26965825283058065, "grad_norm": 0.38566333055496216, "learning_rate": 8.310584130365551e-05, "loss": 1.9351, "step": 2596 }, { "epoch": 0.26976212735016103, "grad_norm": 0.3703290820121765, "learning_rate": 8.309361189219589e-05, "loss": 1.6705, "step": 2597 }, { "epoch": 0.26986600186974136, "grad_norm": 0.3818873167037964, "learning_rate": 8.308137895652477e-05, "loss": 1.7879, "step": 2598 }, { "epoch": 0.2699698763893217, "grad_norm": 0.3748812675476074, "learning_rate": 8.306914249794483e-05, "loss": 1.7799, "step": 2599 }, { "epoch": 0.27007375090890207, "grad_norm": 0.3769199252128601, "learning_rate": 8.305690251775915e-05, "loss": 1.6268, "step": 2600 }, { "epoch": 0.2701776254284824, "grad_norm": 0.3615618944168091, "learning_rate": 8.304465901727123e-05, "loss": 1.7004, "step": 2601 }, { "epoch": 0.2702814999480627, "grad_norm": 0.36809107661247253, "learning_rate": 8.303241199778486e-05, "loss": 1.7739, "step": 2602 }, { "epoch": 0.2703853744676431, "grad_norm": 0.37990957498550415, "learning_rate": 8.30201614606043e-05, "loss": 1.5496, "step": 2603 }, { "epoch": 0.27048924898722343, "grad_norm": 0.3890332579612732, "learning_rate": 8.300790740703409e-05, "loss": 1.7503, "step": 2604 }, { "epoch": 0.27059312350680376, "grad_norm": 0.3829896152019501, "learning_rate": 8.299564983837922e-05, "loss": 1.7856, "step": 2605 }, { "epoch": 0.27069699802638414, "grad_norm": 0.36139553785324097, "learning_rate": 8.298338875594501e-05, "loss": 1.7488, "step": 2606 }, { "epoch": 0.27080087254596447, "grad_norm": 0.3622409403324127, "learning_rate": 8.29711241610372e-05, "loss": 1.6934, "step": 2607 }, { "epoch": 0.27090474706554485, "grad_norm": 0.37896421551704407, "learning_rate": 8.295885605496184e-05, "loss": 1.785, "step": 2608 }, { "epoch": 0.2710086215851252, "grad_norm": 0.36020195484161377, "learning_rate": 8.294658443902539e-05, "loss": 1.7482, "step": 2609 }, { "epoch": 0.2711124961047055, "grad_norm": 0.3811848759651184, "learning_rate": 8.29343093145347e-05, "loss": 1.7077, "step": 2610 }, { "epoch": 0.2712163706242859, "grad_norm": 0.3960614502429962, "learning_rate": 8.292203068279695e-05, "loss": 1.8574, "step": 2611 }, { "epoch": 0.2713202451438662, "grad_norm": 0.4237765371799469, "learning_rate": 8.290974854511974e-05, "loss": 1.7988, "step": 2612 }, { "epoch": 0.27142411966344654, "grad_norm": 0.3800017237663269, "learning_rate": 8.289746290281104e-05, "loss": 1.6488, "step": 2613 }, { "epoch": 0.2715279941830269, "grad_norm": 0.38529491424560547, "learning_rate": 8.288517375717913e-05, "loss": 1.8813, "step": 2614 }, { "epoch": 0.27163186870260725, "grad_norm": 0.3625027537345886, "learning_rate": 8.287288110953274e-05, "loss": 1.4855, "step": 2615 }, { "epoch": 0.2717357432221876, "grad_norm": 0.4243226647377014, "learning_rate": 8.286058496118091e-05, "loss": 1.921, "step": 2616 }, { "epoch": 0.27183961774176796, "grad_norm": 0.36872926354408264, "learning_rate": 8.284828531343312e-05, "loss": 1.6206, "step": 2617 }, { "epoch": 0.2719434922613483, "grad_norm": 0.40654125809669495, "learning_rate": 8.283598216759915e-05, "loss": 1.8668, "step": 2618 }, { "epoch": 0.2720473667809286, "grad_norm": 0.3534424602985382, "learning_rate": 8.282367552498925e-05, "loss": 1.6809, "step": 2619 }, { "epoch": 0.272151241300509, "grad_norm": 0.41357290744781494, "learning_rate": 8.28113653869139e-05, "loss": 1.922, "step": 2620 }, { "epoch": 0.2722551158200893, "grad_norm": 0.41436415910720825, "learning_rate": 8.27990517546841e-05, "loss": 1.8944, "step": 2621 }, { "epoch": 0.2723589903396697, "grad_norm": 0.3990755081176758, "learning_rate": 8.278673462961112e-05, "loss": 1.8936, "step": 2622 }, { "epoch": 0.27246286485925003, "grad_norm": 0.3946811258792877, "learning_rate": 8.277441401300665e-05, "loss": 1.9192, "step": 2623 }, { "epoch": 0.27256673937883036, "grad_norm": 0.37876197695732117, "learning_rate": 8.276208990618274e-05, "loss": 1.6761, "step": 2624 }, { "epoch": 0.27267061389841074, "grad_norm": 0.4105803072452545, "learning_rate": 8.27497623104518e-05, "loss": 1.7843, "step": 2625 }, { "epoch": 0.27277448841799107, "grad_norm": 0.37790194153785706, "learning_rate": 8.273743122712664e-05, "loss": 1.5891, "step": 2626 }, { "epoch": 0.2728783629375714, "grad_norm": 0.43674150109291077, "learning_rate": 8.272509665752041e-05, "loss": 1.793, "step": 2627 }, { "epoch": 0.2729822374571518, "grad_norm": 0.45887747406959534, "learning_rate": 8.271275860294667e-05, "loss": 1.9712, "step": 2628 }, { "epoch": 0.2730861119767321, "grad_norm": 0.3662078082561493, "learning_rate": 8.27004170647193e-05, "loss": 1.6459, "step": 2629 }, { "epoch": 0.27318998649631243, "grad_norm": 0.364083468914032, "learning_rate": 8.268807204415258e-05, "loss": 1.6445, "step": 2630 }, { "epoch": 0.2732938610158928, "grad_norm": 0.3766544759273529, "learning_rate": 8.267572354256117e-05, "loss": 1.6357, "step": 2631 }, { "epoch": 0.27339773553547314, "grad_norm": 0.38634899258613586, "learning_rate": 8.266337156126008e-05, "loss": 1.7254, "step": 2632 }, { "epoch": 0.27350161005505347, "grad_norm": 0.391665518283844, "learning_rate": 8.26510161015647e-05, "loss": 1.7073, "step": 2633 }, { "epoch": 0.27360548457463385, "grad_norm": 0.5298207998275757, "learning_rate": 8.263865716479078e-05, "loss": 1.8689, "step": 2634 }, { "epoch": 0.2737093590942142, "grad_norm": 0.4072588086128235, "learning_rate": 8.262629475225448e-05, "loss": 1.8721, "step": 2635 }, { "epoch": 0.27381323361379456, "grad_norm": 0.3996543288230896, "learning_rate": 8.261392886527229e-05, "loss": 1.7335, "step": 2636 }, { "epoch": 0.2739171081333749, "grad_norm": 0.414880633354187, "learning_rate": 8.260155950516106e-05, "loss": 1.8539, "step": 2637 }, { "epoch": 0.2740209826529552, "grad_norm": 0.36667972803115845, "learning_rate": 8.258918667323806e-05, "loss": 1.6214, "step": 2638 }, { "epoch": 0.2741248571725356, "grad_norm": 0.39150235056877136, "learning_rate": 8.257681037082085e-05, "loss": 1.7361, "step": 2639 }, { "epoch": 0.2742287316921159, "grad_norm": 0.38801634311676025, "learning_rate": 8.256443059922749e-05, "loss": 1.8081, "step": 2640 }, { "epoch": 0.27433260621169625, "grad_norm": 0.42313215136528015, "learning_rate": 8.255204735977626e-05, "loss": 1.7, "step": 2641 }, { "epoch": 0.27443648073127663, "grad_norm": 0.36863112449645996, "learning_rate": 8.25396606537859e-05, "loss": 1.5137, "step": 2642 }, { "epoch": 0.27454035525085696, "grad_norm": 0.39456894993782043, "learning_rate": 8.252727048257551e-05, "loss": 1.7127, "step": 2643 }, { "epoch": 0.2746442297704373, "grad_norm": 0.3853185474872589, "learning_rate": 8.251487684746453e-05, "loss": 1.8356, "step": 2644 }, { "epoch": 0.27474810429001767, "grad_norm": 0.4087388813495636, "learning_rate": 8.250247974977279e-05, "loss": 1.6803, "step": 2645 }, { "epoch": 0.274851978809598, "grad_norm": 0.3977982997894287, "learning_rate": 8.249007919082048e-05, "loss": 1.7868, "step": 2646 }, { "epoch": 0.2749558533291784, "grad_norm": 0.3721441626548767, "learning_rate": 8.247767517192818e-05, "loss": 1.8521, "step": 2647 }, { "epoch": 0.2750597278487587, "grad_norm": 0.3743959963321686, "learning_rate": 8.246526769441681e-05, "loss": 1.7785, "step": 2648 }, { "epoch": 0.27516360236833903, "grad_norm": 0.39412543177604675, "learning_rate": 8.245285675960767e-05, "loss": 1.8763, "step": 2649 }, { "epoch": 0.2752674768879194, "grad_norm": 0.3404049575328827, "learning_rate": 8.244044236882242e-05, "loss": 1.4576, "step": 2650 }, { "epoch": 0.27537135140749974, "grad_norm": 0.3886807858943939, "learning_rate": 8.24280245233831e-05, "loss": 1.7468, "step": 2651 }, { "epoch": 0.27547522592708007, "grad_norm": 0.3996857702732086, "learning_rate": 8.241560322461212e-05, "loss": 1.92, "step": 2652 }, { "epoch": 0.27557910044666045, "grad_norm": 0.3869626224040985, "learning_rate": 8.240317847383226e-05, "loss": 1.6042, "step": 2653 }, { "epoch": 0.2756829749662408, "grad_norm": 0.4047796130180359, "learning_rate": 8.239075027236663e-05, "loss": 1.8099, "step": 2654 }, { "epoch": 0.2757868494858211, "grad_norm": 0.44502919912338257, "learning_rate": 8.237831862153877e-05, "loss": 1.7363, "step": 2655 }, { "epoch": 0.2758907240054015, "grad_norm": 0.388954222202301, "learning_rate": 8.236588352267252e-05, "loss": 1.8691, "step": 2656 }, { "epoch": 0.2759945985249818, "grad_norm": 0.39591512084007263, "learning_rate": 8.235344497709214e-05, "loss": 1.8538, "step": 2657 }, { "epoch": 0.27609847304456214, "grad_norm": 0.38203978538513184, "learning_rate": 8.234100298612226e-05, "loss": 1.28, "step": 2658 }, { "epoch": 0.2762023475641425, "grad_norm": 0.43226009607315063, "learning_rate": 8.23285575510878e-05, "loss": 1.9326, "step": 2659 }, { "epoch": 0.27630622208372285, "grad_norm": 0.37048643827438354, "learning_rate": 8.231610867331415e-05, "loss": 1.7221, "step": 2660 }, { "epoch": 0.27641009660330323, "grad_norm": 0.41580793261528015, "learning_rate": 8.2303656354127e-05, "loss": 1.9031, "step": 2661 }, { "epoch": 0.27651397112288356, "grad_norm": 0.3827223777770996, "learning_rate": 8.229120059485243e-05, "loss": 1.6825, "step": 2662 }, { "epoch": 0.2766178456424639, "grad_norm": 0.37424013018608093, "learning_rate": 8.227874139681688e-05, "loss": 1.7172, "step": 2663 }, { "epoch": 0.27672172016204427, "grad_norm": 0.43709954619407654, "learning_rate": 8.226627876134714e-05, "loss": 1.8136, "step": 2664 }, { "epoch": 0.2768255946816246, "grad_norm": 0.35949236154556274, "learning_rate": 8.22538126897704e-05, "loss": 1.7346, "step": 2665 }, { "epoch": 0.2769294692012049, "grad_norm": 0.39692017436027527, "learning_rate": 8.224134318341421e-05, "loss": 1.925, "step": 2666 }, { "epoch": 0.2770333437207853, "grad_norm": 0.3917725384235382, "learning_rate": 8.222887024360646e-05, "loss": 1.7681, "step": 2667 }, { "epoch": 0.27713721824036563, "grad_norm": 0.4082455635070801, "learning_rate": 8.221639387167543e-05, "loss": 1.8118, "step": 2668 }, { "epoch": 0.27724109275994596, "grad_norm": 0.36559662222862244, "learning_rate": 8.220391406894973e-05, "loss": 1.8121, "step": 2669 }, { "epoch": 0.27734496727952634, "grad_norm": 0.372790664434433, "learning_rate": 8.21914308367584e-05, "loss": 1.7102, "step": 2670 }, { "epoch": 0.27744884179910667, "grad_norm": 0.37069472670555115, "learning_rate": 8.217894417643078e-05, "loss": 1.6396, "step": 2671 }, { "epoch": 0.277552716318687, "grad_norm": 0.38368502259254456, "learning_rate": 8.216645408929661e-05, "loss": 1.8278, "step": 2672 }, { "epoch": 0.2776565908382674, "grad_norm": 0.4187244474887848, "learning_rate": 8.215396057668598e-05, "loss": 1.7567, "step": 2673 }, { "epoch": 0.2777604653578477, "grad_norm": 0.37204065918922424, "learning_rate": 8.214146363992939e-05, "loss": 1.7282, "step": 2674 }, { "epoch": 0.2778643398774281, "grad_norm": 0.37890782952308655, "learning_rate": 8.21289632803576e-05, "loss": 1.6675, "step": 2675 }, { "epoch": 0.2779682143970084, "grad_norm": 0.37642747163772583, "learning_rate": 8.211645949930187e-05, "loss": 1.6311, "step": 2676 }, { "epoch": 0.27807208891658874, "grad_norm": 0.3810623288154602, "learning_rate": 8.210395229809369e-05, "loss": 1.7166, "step": 2677 }, { "epoch": 0.2781759634361691, "grad_norm": 0.3708658516407013, "learning_rate": 8.209144167806502e-05, "loss": 1.7207, "step": 2678 }, { "epoch": 0.27827983795574945, "grad_norm": 0.3930984139442444, "learning_rate": 8.207892764054814e-05, "loss": 1.7495, "step": 2679 }, { "epoch": 0.2783837124753298, "grad_norm": 0.3427664339542389, "learning_rate": 8.20664101868757e-05, "loss": 1.6362, "step": 2680 }, { "epoch": 0.27848758699491016, "grad_norm": 0.3461865484714508, "learning_rate": 8.205388931838068e-05, "loss": 1.594, "step": 2681 }, { "epoch": 0.2785914615144905, "grad_norm": 0.43719297647476196, "learning_rate": 8.20413650363965e-05, "loss": 1.9431, "step": 2682 }, { "epoch": 0.2786953360340708, "grad_norm": 0.3867358863353729, "learning_rate": 8.202883734225686e-05, "loss": 1.7749, "step": 2683 }, { "epoch": 0.2787992105536512, "grad_norm": 0.4017643928527832, "learning_rate": 8.201630623729588e-05, "loss": 1.8466, "step": 2684 }, { "epoch": 0.2789030850732315, "grad_norm": 0.4061945974826813, "learning_rate": 8.200377172284803e-05, "loss": 2.0036, "step": 2685 }, { "epoch": 0.2790069595928119, "grad_norm": 0.36883074045181274, "learning_rate": 8.199123380024812e-05, "loss": 1.6884, "step": 2686 }, { "epoch": 0.27911083411239224, "grad_norm": 0.37533366680145264, "learning_rate": 8.197869247083136e-05, "loss": 1.6512, "step": 2687 }, { "epoch": 0.27921470863197256, "grad_norm": 0.3572383522987366, "learning_rate": 8.196614773593329e-05, "loss": 1.6059, "step": 2688 }, { "epoch": 0.27931858315155295, "grad_norm": 0.37231144309043884, "learning_rate": 8.195359959688985e-05, "loss": 1.6252, "step": 2689 }, { "epoch": 0.2794224576711333, "grad_norm": 0.3484655022621155, "learning_rate": 8.194104805503728e-05, "loss": 1.7011, "step": 2690 }, { "epoch": 0.2795263321907136, "grad_norm": 0.3684258460998535, "learning_rate": 8.192849311171225e-05, "loss": 1.6864, "step": 2691 }, { "epoch": 0.279630206710294, "grad_norm": 0.34496328234672546, "learning_rate": 8.191593476825174e-05, "loss": 1.5545, "step": 2692 }, { "epoch": 0.2797340812298743, "grad_norm": 0.3760117292404175, "learning_rate": 8.190337302599314e-05, "loss": 1.7473, "step": 2693 }, { "epoch": 0.27983795574945464, "grad_norm": 0.3527992367744446, "learning_rate": 8.189080788627419e-05, "loss": 1.6099, "step": 2694 }, { "epoch": 0.279941830269035, "grad_norm": 0.37164172530174255, "learning_rate": 8.187823935043292e-05, "loss": 1.6274, "step": 2695 }, { "epoch": 0.28004570478861535, "grad_norm": 0.37998443841934204, "learning_rate": 8.186566741980786e-05, "loss": 1.8226, "step": 2696 }, { "epoch": 0.2801495793081957, "grad_norm": 0.3952192962169647, "learning_rate": 8.185309209573775e-05, "loss": 1.6987, "step": 2697 }, { "epoch": 0.28025345382777606, "grad_norm": 0.3859109878540039, "learning_rate": 8.18405133795618e-05, "loss": 1.7248, "step": 2698 }, { "epoch": 0.2803573283473564, "grad_norm": 0.3739028573036194, "learning_rate": 8.182793127261955e-05, "loss": 1.8529, "step": 2699 }, { "epoch": 0.28046120286693677, "grad_norm": 0.44212570786476135, "learning_rate": 8.181534577625088e-05, "loss": 1.8915, "step": 2700 }, { "epoch": 0.2805650773865171, "grad_norm": 0.3574367165565491, "learning_rate": 8.180275689179607e-05, "loss": 1.7146, "step": 2701 }, { "epoch": 0.2806689519060974, "grad_norm": 0.36751818656921387, "learning_rate": 8.17901646205957e-05, "loss": 1.5943, "step": 2702 }, { "epoch": 0.2807728264256778, "grad_norm": 0.3904435634613037, "learning_rate": 8.177756896399078e-05, "loss": 1.7849, "step": 2703 }, { "epoch": 0.28087670094525813, "grad_norm": 0.38835206627845764, "learning_rate": 8.176496992332265e-05, "loss": 1.7837, "step": 2704 }, { "epoch": 0.28098057546483846, "grad_norm": 0.46619629859924316, "learning_rate": 8.175236749993298e-05, "loss": 1.9918, "step": 2705 }, { "epoch": 0.28108444998441884, "grad_norm": 0.412544846534729, "learning_rate": 8.173976169516386e-05, "loss": 1.8577, "step": 2706 }, { "epoch": 0.28118832450399917, "grad_norm": 0.36168932914733887, "learning_rate": 8.17271525103577e-05, "loss": 1.6675, "step": 2707 }, { "epoch": 0.2812921990235795, "grad_norm": 0.4263473451137543, "learning_rate": 8.171453994685728e-05, "loss": 1.8988, "step": 2708 }, { "epoch": 0.2813960735431599, "grad_norm": 0.369133323431015, "learning_rate": 8.170192400600574e-05, "loss": 1.6811, "step": 2709 }, { "epoch": 0.2814999480627402, "grad_norm": 0.39230549335479736, "learning_rate": 8.168930468914658e-05, "loss": 1.5888, "step": 2710 }, { "epoch": 0.2816038225823206, "grad_norm": 0.3522338569164276, "learning_rate": 8.167668199762364e-05, "loss": 1.5904, "step": 2711 }, { "epoch": 0.2817076971019009, "grad_norm": 0.3948661983013153, "learning_rate": 8.166405593278116e-05, "loss": 1.7703, "step": 2712 }, { "epoch": 0.28181157162148124, "grad_norm": 0.35214731097221375, "learning_rate": 8.165142649596372e-05, "loss": 1.6956, "step": 2713 }, { "epoch": 0.2819154461410616, "grad_norm": 0.4061570167541504, "learning_rate": 8.163879368851623e-05, "loss": 1.9759, "step": 2714 }, { "epoch": 0.28201932066064195, "grad_norm": 0.3767627477645874, "learning_rate": 8.162615751178405e-05, "loss": 1.6924, "step": 2715 }, { "epoch": 0.2821231951802223, "grad_norm": 0.37810075283050537, "learning_rate": 8.161351796711274e-05, "loss": 1.8406, "step": 2716 }, { "epoch": 0.28222706969980266, "grad_norm": 0.37295323610305786, "learning_rate": 8.16008750558484e-05, "loss": 1.7651, "step": 2717 }, { "epoch": 0.282330944219383, "grad_norm": 0.3852209448814392, "learning_rate": 8.158822877933733e-05, "loss": 1.7538, "step": 2718 }, { "epoch": 0.2824348187389633, "grad_norm": 0.36513835191726685, "learning_rate": 8.15755791389263e-05, "loss": 1.7825, "step": 2719 }, { "epoch": 0.2825386932585437, "grad_norm": 0.3820558190345764, "learning_rate": 8.15629261359624e-05, "loss": 1.737, "step": 2720 }, { "epoch": 0.282642567778124, "grad_norm": 0.3873055875301361, "learning_rate": 8.155026977179307e-05, "loss": 1.774, "step": 2721 }, { "epoch": 0.28274644229770435, "grad_norm": 0.3791974186897278, "learning_rate": 8.15376100477661e-05, "loss": 1.7034, "step": 2722 }, { "epoch": 0.28285031681728473, "grad_norm": 0.35163450241088867, "learning_rate": 8.152494696522965e-05, "loss": 1.5747, "step": 2723 }, { "epoch": 0.28295419133686506, "grad_norm": 0.390493780374527, "learning_rate": 8.151228052553228e-05, "loss": 1.7802, "step": 2724 }, { "epoch": 0.28305806585644544, "grad_norm": 0.3626655340194702, "learning_rate": 8.149961073002283e-05, "loss": 1.7353, "step": 2725 }, { "epoch": 0.28316194037602577, "grad_norm": 0.40224525332450867, "learning_rate": 8.148693758005054e-05, "loss": 1.8108, "step": 2726 }, { "epoch": 0.2832658148956061, "grad_norm": 0.3655959367752075, "learning_rate": 8.1474261076965e-05, "loss": 1.7521, "step": 2727 }, { "epoch": 0.2833696894151865, "grad_norm": 0.3598358929157257, "learning_rate": 8.146158122211618e-05, "loss": 1.6139, "step": 2728 }, { "epoch": 0.2834735639347668, "grad_norm": 0.4097810983657837, "learning_rate": 8.144889801685436e-05, "loss": 1.9751, "step": 2729 }, { "epoch": 0.28357743845434713, "grad_norm": 0.3773553967475891, "learning_rate": 8.143621146253022e-05, "loss": 1.7618, "step": 2730 }, { "epoch": 0.2836813129739275, "grad_norm": 0.3876679837703705, "learning_rate": 8.142352156049475e-05, "loss": 1.7756, "step": 2731 }, { "epoch": 0.28378518749350784, "grad_norm": 0.3985688090324402, "learning_rate": 8.141082831209938e-05, "loss": 1.8868, "step": 2732 }, { "epoch": 0.28388906201308817, "grad_norm": 0.40376704931259155, "learning_rate": 8.139813171869579e-05, "loss": 1.9038, "step": 2733 }, { "epoch": 0.28399293653266855, "grad_norm": 0.333291620016098, "learning_rate": 8.138543178163607e-05, "loss": 1.5568, "step": 2734 }, { "epoch": 0.2840968110522489, "grad_norm": 0.39361095428466797, "learning_rate": 8.137272850227271e-05, "loss": 1.6809, "step": 2735 }, { "epoch": 0.2842006855718292, "grad_norm": 0.39014434814453125, "learning_rate": 8.136002188195846e-05, "loss": 1.7479, "step": 2736 }, { "epoch": 0.2843045600914096, "grad_norm": 0.4338228404521942, "learning_rate": 8.13473119220465e-05, "loss": 1.8751, "step": 2737 }, { "epoch": 0.2844084346109899, "grad_norm": 0.44359880685806274, "learning_rate": 8.133459862389036e-05, "loss": 1.8787, "step": 2738 }, { "epoch": 0.2845123091305703, "grad_norm": 0.42322838306427, "learning_rate": 8.132188198884386e-05, "loss": 1.827, "step": 2739 }, { "epoch": 0.2846161836501506, "grad_norm": 0.363816499710083, "learning_rate": 8.130916201826126e-05, "loss": 1.5635, "step": 2740 }, { "epoch": 0.28472005816973095, "grad_norm": 0.39882132411003113, "learning_rate": 8.129643871349712e-05, "loss": 2.0049, "step": 2741 }, { "epoch": 0.28482393268931133, "grad_norm": 0.3964424729347229, "learning_rate": 8.128371207590639e-05, "loss": 1.6739, "step": 2742 }, { "epoch": 0.28492780720889166, "grad_norm": 0.42000818252563477, "learning_rate": 8.127098210684434e-05, "loss": 1.9091, "step": 2743 }, { "epoch": 0.285031681728472, "grad_norm": 0.3911057412624359, "learning_rate": 8.125824880766661e-05, "loss": 1.7084, "step": 2744 }, { "epoch": 0.28513555624805237, "grad_norm": 0.37378305196762085, "learning_rate": 8.124551217972922e-05, "loss": 1.7252, "step": 2745 }, { "epoch": 0.2852394307676327, "grad_norm": 0.40639790892601013, "learning_rate": 8.12327722243885e-05, "loss": 1.8441, "step": 2746 }, { "epoch": 0.285343305287213, "grad_norm": 0.3873542249202728, "learning_rate": 8.122002894300117e-05, "loss": 1.506, "step": 2747 }, { "epoch": 0.2854471798067934, "grad_norm": 0.34335261583328247, "learning_rate": 8.120728233692428e-05, "loss": 1.6598, "step": 2748 }, { "epoch": 0.28555105432637373, "grad_norm": 0.4100406765937805, "learning_rate": 8.119453240751525e-05, "loss": 1.6315, "step": 2749 }, { "epoch": 0.2856549288459541, "grad_norm": 0.34580525755882263, "learning_rate": 8.118177915613182e-05, "loss": 1.562, "step": 2750 }, { "epoch": 0.28575880336553444, "grad_norm": 0.39648303389549255, "learning_rate": 8.116902258413216e-05, "loss": 1.9186, "step": 2751 }, { "epoch": 0.28586267788511477, "grad_norm": 0.3890984356403351, "learning_rate": 8.115626269287473e-05, "loss": 1.862, "step": 2752 }, { "epoch": 0.28596655240469515, "grad_norm": 0.3671417236328125, "learning_rate": 8.114349948371833e-05, "loss": 1.7655, "step": 2753 }, { "epoch": 0.2860704269242755, "grad_norm": 0.3553464412689209, "learning_rate": 8.113073295802219e-05, "loss": 1.5148, "step": 2754 }, { "epoch": 0.2861743014438558, "grad_norm": 0.3629017472267151, "learning_rate": 8.111796311714579e-05, "loss": 1.5992, "step": 2755 }, { "epoch": 0.2862781759634362, "grad_norm": 0.4407372772693634, "learning_rate": 8.110518996244907e-05, "loss": 1.7642, "step": 2756 }, { "epoch": 0.2863820504830165, "grad_norm": 0.42658326029777527, "learning_rate": 8.109241349529223e-05, "loss": 1.9091, "step": 2757 }, { "epoch": 0.28648592500259684, "grad_norm": 0.38717007637023926, "learning_rate": 8.107963371703589e-05, "loss": 1.7881, "step": 2758 }, { "epoch": 0.2865897995221772, "grad_norm": 0.39365988969802856, "learning_rate": 8.1066850629041e-05, "loss": 1.6742, "step": 2759 }, { "epoch": 0.28669367404175755, "grad_norm": 0.3701498806476593, "learning_rate": 8.105406423266884e-05, "loss": 1.6718, "step": 2760 }, { "epoch": 0.2867975485613379, "grad_norm": 0.3934311866760254, "learning_rate": 8.104127452928107e-05, "loss": 1.7174, "step": 2761 }, { "epoch": 0.28690142308091826, "grad_norm": 0.3584936261177063, "learning_rate": 8.10284815202397e-05, "loss": 1.7269, "step": 2762 }, { "epoch": 0.2870052976004986, "grad_norm": 0.4173574447631836, "learning_rate": 8.101568520690708e-05, "loss": 1.7251, "step": 2763 }, { "epoch": 0.28710917212007897, "grad_norm": 0.3903685212135315, "learning_rate": 8.10028855906459e-05, "loss": 1.7643, "step": 2764 }, { "epoch": 0.2872130466396593, "grad_norm": 0.37448278069496155, "learning_rate": 8.099008267281924e-05, "loss": 1.7648, "step": 2765 }, { "epoch": 0.2873169211592396, "grad_norm": 0.3669279217720032, "learning_rate": 8.097727645479053e-05, "loss": 1.7988, "step": 2766 }, { "epoch": 0.28742079567882, "grad_norm": 0.4136315584182739, "learning_rate": 8.096446693792347e-05, "loss": 1.857, "step": 2767 }, { "epoch": 0.28752467019840033, "grad_norm": 0.33900344371795654, "learning_rate": 8.095165412358225e-05, "loss": 1.5582, "step": 2768 }, { "epoch": 0.28762854471798066, "grad_norm": 0.36668214201927185, "learning_rate": 8.093883801313129e-05, "loss": 1.6839, "step": 2769 }, { "epoch": 0.28773241923756104, "grad_norm": 0.4014098048210144, "learning_rate": 8.09260186079354e-05, "loss": 1.7254, "step": 2770 }, { "epoch": 0.28783629375714137, "grad_norm": 0.44136208295822144, "learning_rate": 8.091319590935978e-05, "loss": 1.6938, "step": 2771 }, { "epoch": 0.2879401682767217, "grad_norm": 0.42486312985420227, "learning_rate": 8.090036991876992e-05, "loss": 1.7066, "step": 2772 }, { "epoch": 0.2880440427963021, "grad_norm": 0.4575170576572418, "learning_rate": 8.08875406375317e-05, "loss": 2.0641, "step": 2773 }, { "epoch": 0.2881479173158824, "grad_norm": 0.4238605499267578, "learning_rate": 8.087470806701131e-05, "loss": 1.9339, "step": 2774 }, { "epoch": 0.28825179183546273, "grad_norm": 0.37625259160995483, "learning_rate": 8.086187220857537e-05, "loss": 1.6855, "step": 2775 }, { "epoch": 0.2883556663550431, "grad_norm": 0.3760613799095154, "learning_rate": 8.084903306359078e-05, "loss": 1.786, "step": 2776 }, { "epoch": 0.28845954087462344, "grad_norm": 0.4117826521396637, "learning_rate": 8.083619063342477e-05, "loss": 1.796, "step": 2777 }, { "epoch": 0.2885634153942038, "grad_norm": 0.3975922465324402, "learning_rate": 8.0823344919445e-05, "loss": 1.8309, "step": 2778 }, { "epoch": 0.28866728991378415, "grad_norm": 0.39822137355804443, "learning_rate": 8.081049592301944e-05, "loss": 1.8686, "step": 2779 }, { "epoch": 0.2887711644333645, "grad_norm": 0.3700173795223236, "learning_rate": 8.079764364551638e-05, "loss": 1.7419, "step": 2780 }, { "epoch": 0.28887503895294486, "grad_norm": 0.3475184440612793, "learning_rate": 8.07847880883045e-05, "loss": 1.478, "step": 2781 }, { "epoch": 0.2889789134725252, "grad_norm": 0.37445908784866333, "learning_rate": 8.077192925275283e-05, "loss": 1.8183, "step": 2782 }, { "epoch": 0.2890827879921055, "grad_norm": 0.3727985918521881, "learning_rate": 8.075906714023073e-05, "loss": 1.7757, "step": 2783 }, { "epoch": 0.2891866625116859, "grad_norm": 0.36759766936302185, "learning_rate": 8.07462017521079e-05, "loss": 1.6917, "step": 2784 }, { "epoch": 0.2892905370312662, "grad_norm": 0.39550402760505676, "learning_rate": 8.073333308975442e-05, "loss": 1.7787, "step": 2785 }, { "epoch": 0.28939441155084655, "grad_norm": 0.3845430612564087, "learning_rate": 8.072046115454069e-05, "loss": 1.9255, "step": 2786 }, { "epoch": 0.28949828607042694, "grad_norm": 0.39458414912223816, "learning_rate": 8.070758594783748e-05, "loss": 1.6905, "step": 2787 }, { "epoch": 0.28960216059000726, "grad_norm": 0.38234516978263855, "learning_rate": 8.069470747101588e-05, "loss": 1.784, "step": 2788 }, { "epoch": 0.28970603510958765, "grad_norm": 0.4639817774295807, "learning_rate": 8.068182572544736e-05, "loss": 1.7567, "step": 2789 }, { "epoch": 0.28980990962916797, "grad_norm": 0.43574076890945435, "learning_rate": 8.066894071250375e-05, "loss": 1.8726, "step": 2790 }, { "epoch": 0.2899137841487483, "grad_norm": 0.37440717220306396, "learning_rate": 8.065605243355717e-05, "loss": 1.7629, "step": 2791 }, { "epoch": 0.2900176586683287, "grad_norm": 0.4101695716381073, "learning_rate": 8.064316088998012e-05, "loss": 1.7499, "step": 2792 }, { "epoch": 0.290121533187909, "grad_norm": 0.3932081162929535, "learning_rate": 8.063026608314547e-05, "loss": 1.7148, "step": 2793 }, { "epoch": 0.29022540770748934, "grad_norm": 0.37535983324050903, "learning_rate": 8.061736801442643e-05, "loss": 1.8031, "step": 2794 }, { "epoch": 0.2903292822270697, "grad_norm": 0.3890056610107422, "learning_rate": 8.06044666851965e-05, "loss": 1.718, "step": 2795 }, { "epoch": 0.29043315674665005, "grad_norm": 0.44528940320014954, "learning_rate": 8.059156209682959e-05, "loss": 1.7655, "step": 2796 }, { "epoch": 0.29053703126623037, "grad_norm": 0.3969075679779053, "learning_rate": 8.057865425069995e-05, "loss": 1.7541, "step": 2797 }, { "epoch": 0.29064090578581075, "grad_norm": 0.37456732988357544, "learning_rate": 8.056574314818215e-05, "loss": 1.747, "step": 2798 }, { "epoch": 0.2907447803053911, "grad_norm": 0.3638935387134552, "learning_rate": 8.055282879065114e-05, "loss": 1.6764, "step": 2799 }, { "epoch": 0.2908486548249714, "grad_norm": 0.3996514081954956, "learning_rate": 8.053991117948216e-05, "loss": 1.7869, "step": 2800 }, { "epoch": 0.2909525293445518, "grad_norm": 0.3886312246322632, "learning_rate": 8.052699031605087e-05, "loss": 1.8845, "step": 2801 }, { "epoch": 0.2910564038641321, "grad_norm": 0.37657225131988525, "learning_rate": 8.051406620173323e-05, "loss": 1.6482, "step": 2802 }, { "epoch": 0.2911602783837125, "grad_norm": 0.3608708083629608, "learning_rate": 8.050113883790556e-05, "loss": 1.6466, "step": 2803 }, { "epoch": 0.29126415290329283, "grad_norm": 0.41230741143226624, "learning_rate": 8.048820822594452e-05, "loss": 1.8663, "step": 2804 }, { "epoch": 0.29136802742287315, "grad_norm": 0.3793978989124298, "learning_rate": 8.047527436722713e-05, "loss": 1.7933, "step": 2805 }, { "epoch": 0.29147190194245354, "grad_norm": 0.35737892985343933, "learning_rate": 8.046233726313073e-05, "loss": 1.6788, "step": 2806 }, { "epoch": 0.29157577646203386, "grad_norm": 0.4132479429244995, "learning_rate": 8.044939691503304e-05, "loss": 1.7392, "step": 2807 }, { "epoch": 0.2916796509816142, "grad_norm": 0.4083154499530792, "learning_rate": 8.043645332431206e-05, "loss": 1.6476, "step": 2808 }, { "epoch": 0.2917835255011946, "grad_norm": 0.38581231236457825, "learning_rate": 8.042350649234625e-05, "loss": 1.7906, "step": 2809 }, { "epoch": 0.2918874000207749, "grad_norm": 0.4001144766807556, "learning_rate": 8.041055642051429e-05, "loss": 1.5869, "step": 2810 }, { "epoch": 0.29199127454035523, "grad_norm": 0.3624385893344879, "learning_rate": 8.039760311019529e-05, "loss": 1.7447, "step": 2811 }, { "epoch": 0.2920951490599356, "grad_norm": 0.3716486394405365, "learning_rate": 8.038464656276867e-05, "loss": 1.8424, "step": 2812 }, { "epoch": 0.29219902357951594, "grad_norm": 0.39141982793807983, "learning_rate": 8.03716867796142e-05, "loss": 1.6962, "step": 2813 }, { "epoch": 0.29230289809909626, "grad_norm": 0.3740139901638031, "learning_rate": 8.035872376211199e-05, "loss": 1.6874, "step": 2814 }, { "epoch": 0.29240677261867665, "grad_norm": 0.464403361082077, "learning_rate": 8.03457575116425e-05, "loss": 1.8638, "step": 2815 }, { "epoch": 0.292510647138257, "grad_norm": 0.3929448425769806, "learning_rate": 8.033278802958656e-05, "loss": 1.808, "step": 2816 }, { "epoch": 0.29261452165783736, "grad_norm": 0.4005575180053711, "learning_rate": 8.031981531732529e-05, "loss": 1.6983, "step": 2817 }, { "epoch": 0.2927183961774177, "grad_norm": 0.3648793697357178, "learning_rate": 8.03068393762402e-05, "loss": 1.7543, "step": 2818 }, { "epoch": 0.292822270696998, "grad_norm": 0.3590324819087982, "learning_rate": 8.02938602077131e-05, "loss": 1.8414, "step": 2819 }, { "epoch": 0.2929261452165784, "grad_norm": 0.3629642426967621, "learning_rate": 8.02808778131262e-05, "loss": 1.8325, "step": 2820 }, { "epoch": 0.2930300197361587, "grad_norm": 0.42311742901802063, "learning_rate": 8.026789219386201e-05, "loss": 1.7774, "step": 2821 }, { "epoch": 0.29313389425573905, "grad_norm": 0.37744253873825073, "learning_rate": 8.025490335130341e-05, "loss": 1.7133, "step": 2822 }, { "epoch": 0.29323776877531943, "grad_norm": 0.3763290047645569, "learning_rate": 8.02419112868336e-05, "loss": 1.7046, "step": 2823 }, { "epoch": 0.29334164329489976, "grad_norm": 0.3736268877983093, "learning_rate": 8.022891600183613e-05, "loss": 1.6936, "step": 2824 }, { "epoch": 0.2934455178144801, "grad_norm": 0.3851562440395355, "learning_rate": 8.021591749769489e-05, "loss": 1.7645, "step": 2825 }, { "epoch": 0.29354939233406047, "grad_norm": 0.4103233814239502, "learning_rate": 8.020291577579413e-05, "loss": 1.8886, "step": 2826 }, { "epoch": 0.2936532668536408, "grad_norm": 0.38178038597106934, "learning_rate": 8.018991083751846e-05, "loss": 1.7208, "step": 2827 }, { "epoch": 0.2937571413732212, "grad_norm": 0.36348870396614075, "learning_rate": 8.017690268425276e-05, "loss": 1.6585, "step": 2828 }, { "epoch": 0.2938610158928015, "grad_norm": 0.3771371841430664, "learning_rate": 8.016389131738229e-05, "loss": 1.7227, "step": 2829 }, { "epoch": 0.29396489041238183, "grad_norm": 0.38507938385009766, "learning_rate": 8.015087673829271e-05, "loss": 1.8273, "step": 2830 }, { "epoch": 0.2940687649319622, "grad_norm": 0.3727617561817169, "learning_rate": 8.013785894836993e-05, "loss": 1.6725, "step": 2831 }, { "epoch": 0.29417263945154254, "grad_norm": 0.37300458550453186, "learning_rate": 8.012483794900026e-05, "loss": 1.729, "step": 2832 }, { "epoch": 0.29427651397112287, "grad_norm": 0.4091015160083771, "learning_rate": 8.011181374157034e-05, "loss": 1.6771, "step": 2833 }, { "epoch": 0.29438038849070325, "grad_norm": 0.4204038083553314, "learning_rate": 8.009878632746712e-05, "loss": 1.8316, "step": 2834 }, { "epoch": 0.2944842630102836, "grad_norm": 0.4067396819591522, "learning_rate": 8.008575570807795e-05, "loss": 1.7071, "step": 2835 }, { "epoch": 0.2945881375298639, "grad_norm": 0.4235256314277649, "learning_rate": 8.007272188479049e-05, "loss": 1.8557, "step": 2836 }, { "epoch": 0.2946920120494443, "grad_norm": 0.3819893002510071, "learning_rate": 8.005968485899269e-05, "loss": 1.7695, "step": 2837 }, { "epoch": 0.2947958865690246, "grad_norm": 0.3644767999649048, "learning_rate": 8.004664463207294e-05, "loss": 1.6902, "step": 2838 }, { "epoch": 0.29489976108860494, "grad_norm": 0.36920779943466187, "learning_rate": 8.003360120541992e-05, "loss": 1.7109, "step": 2839 }, { "epoch": 0.2950036356081853, "grad_norm": 0.3806985318660736, "learning_rate": 8.002055458042264e-05, "loss": 1.8152, "step": 2840 }, { "epoch": 0.29510751012776565, "grad_norm": 0.4298724830150604, "learning_rate": 8.000750475847045e-05, "loss": 1.8906, "step": 2841 }, { "epoch": 0.29521138464734603, "grad_norm": 0.3639010787010193, "learning_rate": 7.999445174095308e-05, "loss": 1.6325, "step": 2842 }, { "epoch": 0.29531525916692636, "grad_norm": 0.36708855628967285, "learning_rate": 7.998139552926057e-05, "loss": 1.6156, "step": 2843 }, { "epoch": 0.2954191336865067, "grad_norm": 0.3640385866165161, "learning_rate": 7.996833612478331e-05, "loss": 1.71, "step": 2844 }, { "epoch": 0.29552300820608707, "grad_norm": 0.44802719354629517, "learning_rate": 7.9955273528912e-05, "loss": 1.7186, "step": 2845 }, { "epoch": 0.2956268827256674, "grad_norm": 0.37511417269706726, "learning_rate": 7.994220774303771e-05, "loss": 1.5034, "step": 2846 }, { "epoch": 0.2957307572452477, "grad_norm": 0.36259564757347107, "learning_rate": 7.992913876855188e-05, "loss": 1.6271, "step": 2847 }, { "epoch": 0.2958346317648281, "grad_norm": 0.3824392855167389, "learning_rate": 7.99160666068462e-05, "loss": 1.7693, "step": 2848 }, { "epoch": 0.29593850628440843, "grad_norm": 0.41231241822242737, "learning_rate": 7.99029912593128e-05, "loss": 1.5047, "step": 2849 }, { "epoch": 0.29604238080398876, "grad_norm": 0.3836499750614166, "learning_rate": 7.988991272734407e-05, "loss": 1.6688, "step": 2850 }, { "epoch": 0.29614625532356914, "grad_norm": 0.4020180106163025, "learning_rate": 7.987683101233279e-05, "loss": 1.7687, "step": 2851 }, { "epoch": 0.29625012984314947, "grad_norm": 0.3831713795661926, "learning_rate": 7.986374611567207e-05, "loss": 1.7141, "step": 2852 }, { "epoch": 0.2963540043627298, "grad_norm": 0.44648897647857666, "learning_rate": 7.985065803875532e-05, "loss": 1.9484, "step": 2853 }, { "epoch": 0.2964578788823102, "grad_norm": 0.36257603764533997, "learning_rate": 7.983756678297633e-05, "loss": 1.6849, "step": 2854 }, { "epoch": 0.2965617534018905, "grad_norm": 0.40492746233940125, "learning_rate": 7.982447234972922e-05, "loss": 1.7369, "step": 2855 }, { "epoch": 0.2966656279214709, "grad_norm": 0.37346115708351135, "learning_rate": 7.981137474040845e-05, "loss": 1.7769, "step": 2856 }, { "epoch": 0.2967695024410512, "grad_norm": 0.3808712959289551, "learning_rate": 7.979827395640883e-05, "loss": 1.6387, "step": 2857 }, { "epoch": 0.29687337696063154, "grad_norm": 0.3708186149597168, "learning_rate": 7.978516999912544e-05, "loss": 1.7448, "step": 2858 }, { "epoch": 0.2969772514802119, "grad_norm": 0.3663538992404938, "learning_rate": 7.977206286995379e-05, "loss": 1.7705, "step": 2859 }, { "epoch": 0.29708112599979225, "grad_norm": 0.41986000537872314, "learning_rate": 7.975895257028965e-05, "loss": 1.9017, "step": 2860 }, { "epoch": 0.2971850005193726, "grad_norm": 0.36581292748451233, "learning_rate": 7.974583910152922e-05, "loss": 1.5924, "step": 2861 }, { "epoch": 0.29728887503895296, "grad_norm": 0.38697493076324463, "learning_rate": 7.973272246506893e-05, "loss": 1.7093, "step": 2862 }, { "epoch": 0.2973927495585333, "grad_norm": 0.38015836477279663, "learning_rate": 7.971960266230565e-05, "loss": 1.7662, "step": 2863 }, { "epoch": 0.2974966240781136, "grad_norm": 0.37382447719573975, "learning_rate": 7.97064796946365e-05, "loss": 1.6753, "step": 2864 }, { "epoch": 0.297600498597694, "grad_norm": 0.42534875869750977, "learning_rate": 7.969335356345894e-05, "loss": 2.0455, "step": 2865 }, { "epoch": 0.2977043731172743, "grad_norm": 0.39144277572631836, "learning_rate": 7.968022427017088e-05, "loss": 1.4737, "step": 2866 }, { "epoch": 0.2978082476368547, "grad_norm": 0.3727049231529236, "learning_rate": 7.966709181617045e-05, "loss": 1.8034, "step": 2867 }, { "epoch": 0.29791212215643503, "grad_norm": 0.3644360601902008, "learning_rate": 7.965395620285616e-05, "loss": 1.7262, "step": 2868 }, { "epoch": 0.29801599667601536, "grad_norm": 0.40929368138313293, "learning_rate": 7.964081743162684e-05, "loss": 1.8834, "step": 2869 }, { "epoch": 0.29811987119559574, "grad_norm": 0.40274330973625183, "learning_rate": 7.962767550388166e-05, "loss": 1.6383, "step": 2870 }, { "epoch": 0.29822374571517607, "grad_norm": 0.38310062885284424, "learning_rate": 7.961453042102014e-05, "loss": 1.6704, "step": 2871 }, { "epoch": 0.2983276202347564, "grad_norm": 0.3942394554615021, "learning_rate": 7.960138218444215e-05, "loss": 1.8623, "step": 2872 }, { "epoch": 0.2984314947543368, "grad_norm": 0.3523310124874115, "learning_rate": 7.958823079554785e-05, "loss": 1.7932, "step": 2873 }, { "epoch": 0.2985353692739171, "grad_norm": 0.36860281229019165, "learning_rate": 7.957507625573776e-05, "loss": 1.7049, "step": 2874 }, { "epoch": 0.29863924379349743, "grad_norm": 0.37940865755081177, "learning_rate": 7.956191856641276e-05, "loss": 1.682, "step": 2875 }, { "epoch": 0.2987431183130778, "grad_norm": 0.4322430193424225, "learning_rate": 7.9548757728974e-05, "loss": 1.9822, "step": 2876 }, { "epoch": 0.29884699283265814, "grad_norm": 0.39275041222572327, "learning_rate": 7.953559374482305e-05, "loss": 1.8279, "step": 2877 }, { "epoch": 0.29895086735223847, "grad_norm": 0.3673188388347626, "learning_rate": 7.952242661536175e-05, "loss": 1.6634, "step": 2878 }, { "epoch": 0.29905474187181885, "grad_norm": 0.39935851097106934, "learning_rate": 7.950925634199228e-05, "loss": 1.7705, "step": 2879 }, { "epoch": 0.2991586163913992, "grad_norm": 0.4122634530067444, "learning_rate": 7.949608292611721e-05, "loss": 1.7795, "step": 2880 }, { "epoch": 0.29926249091097956, "grad_norm": 0.40487560629844666, "learning_rate": 7.948290636913939e-05, "loss": 1.9119, "step": 2881 }, { "epoch": 0.2993663654305599, "grad_norm": 0.3445805013179779, "learning_rate": 7.946972667246201e-05, "loss": 1.5662, "step": 2882 }, { "epoch": 0.2994702399501402, "grad_norm": 0.3658605217933655, "learning_rate": 7.945654383748861e-05, "loss": 1.5546, "step": 2883 }, { "epoch": 0.2995741144697206, "grad_norm": 0.3820507824420929, "learning_rate": 7.944335786562307e-05, "loss": 1.5994, "step": 2884 }, { "epoch": 0.2996779889893009, "grad_norm": 0.4138948321342468, "learning_rate": 7.943016875826957e-05, "loss": 1.829, "step": 2885 }, { "epoch": 0.29978186350888125, "grad_norm": 0.35458049178123474, "learning_rate": 7.941697651683267e-05, "loss": 1.6141, "step": 2886 }, { "epoch": 0.29988573802846163, "grad_norm": 0.4207065999507904, "learning_rate": 7.940378114271723e-05, "loss": 1.837, "step": 2887 }, { "epoch": 0.29998961254804196, "grad_norm": 0.35922253131866455, "learning_rate": 7.939058263732846e-05, "loss": 1.5883, "step": 2888 }, { "epoch": 0.3000934870676223, "grad_norm": 0.38362789154052734, "learning_rate": 7.93773810020719e-05, "loss": 1.7448, "step": 2889 }, { "epoch": 0.30019736158720267, "grad_norm": 0.39529237151145935, "learning_rate": 7.93641762383534e-05, "loss": 1.7747, "step": 2890 }, { "epoch": 0.300301236106783, "grad_norm": 0.37079519033432007, "learning_rate": 7.93509683475792e-05, "loss": 1.6808, "step": 2891 }, { "epoch": 0.3004051106263634, "grad_norm": 0.37962087988853455, "learning_rate": 7.93377573311558e-05, "loss": 1.6966, "step": 2892 }, { "epoch": 0.3005089851459437, "grad_norm": 0.36086609959602356, "learning_rate": 7.932454319049008e-05, "loss": 1.6517, "step": 2893 }, { "epoch": 0.30061285966552403, "grad_norm": 0.3676837980747223, "learning_rate": 7.931132592698927e-05, "loss": 1.5688, "step": 2894 }, { "epoch": 0.3007167341851044, "grad_norm": 0.38151493668556213, "learning_rate": 7.929810554206088e-05, "loss": 1.7309, "step": 2895 }, { "epoch": 0.30082060870468474, "grad_norm": 0.38594383001327515, "learning_rate": 7.928488203711279e-05, "loss": 1.7323, "step": 2896 }, { "epoch": 0.30092448322426507, "grad_norm": 0.3639463186264038, "learning_rate": 7.927165541355319e-05, "loss": 1.6411, "step": 2897 }, { "epoch": 0.30102835774384545, "grad_norm": 0.36965441703796387, "learning_rate": 7.92584256727906e-05, "loss": 1.6233, "step": 2898 }, { "epoch": 0.3011322322634258, "grad_norm": 0.40190815925598145, "learning_rate": 7.924519281623393e-05, "loss": 1.8847, "step": 2899 }, { "epoch": 0.3012361067830061, "grad_norm": 0.3761938214302063, "learning_rate": 7.923195684529232e-05, "loss": 1.6344, "step": 2900 }, { "epoch": 0.3013399813025865, "grad_norm": 0.39748868346214294, "learning_rate": 7.921871776137533e-05, "loss": 1.9062, "step": 2901 }, { "epoch": 0.3014438558221668, "grad_norm": 0.3873717188835144, "learning_rate": 7.920547556589282e-05, "loss": 1.8971, "step": 2902 }, { "epoch": 0.30154773034174714, "grad_norm": 0.39114367961883545, "learning_rate": 7.919223026025498e-05, "loss": 1.7381, "step": 2903 }, { "epoch": 0.3016516048613275, "grad_norm": 0.38005557656288147, "learning_rate": 7.917898184587231e-05, "loss": 1.8914, "step": 2904 }, { "epoch": 0.30175547938090785, "grad_norm": 0.3953699469566345, "learning_rate": 7.916573032415569e-05, "loss": 1.8567, "step": 2905 }, { "epoch": 0.30185935390048824, "grad_norm": 0.3641657829284668, "learning_rate": 7.915247569651627e-05, "loss": 1.5998, "step": 2906 }, { "epoch": 0.30196322842006856, "grad_norm": 0.35653895139694214, "learning_rate": 7.913921796436561e-05, "loss": 1.5639, "step": 2907 }, { "epoch": 0.3020671029396489, "grad_norm": 0.3951781690120697, "learning_rate": 7.912595712911552e-05, "loss": 1.7428, "step": 2908 }, { "epoch": 0.3021709774592293, "grad_norm": 0.47132858633995056, "learning_rate": 7.91126931921782e-05, "loss": 1.7877, "step": 2909 }, { "epoch": 0.3022748519788096, "grad_norm": 0.4093819260597229, "learning_rate": 7.909942615496613e-05, "loss": 1.8667, "step": 2910 }, { "epoch": 0.3023787264983899, "grad_norm": 0.43610236048698425, "learning_rate": 7.908615601889218e-05, "loss": 1.6627, "step": 2911 }, { "epoch": 0.3024826010179703, "grad_norm": 0.42912378907203674, "learning_rate": 7.907288278536947e-05, "loss": 1.6026, "step": 2912 }, { "epoch": 0.30258647553755064, "grad_norm": 0.4064681828022003, "learning_rate": 7.905960645581152e-05, "loss": 1.7998, "step": 2913 }, { "epoch": 0.30269035005713096, "grad_norm": 0.373544842004776, "learning_rate": 7.904632703163218e-05, "loss": 1.6253, "step": 2914 }, { "epoch": 0.30279422457671135, "grad_norm": 0.376240074634552, "learning_rate": 7.903304451424556e-05, "loss": 1.7304, "step": 2915 }, { "epoch": 0.3028980990962917, "grad_norm": 0.3998112678527832, "learning_rate": 7.901975890506616e-05, "loss": 1.7403, "step": 2916 }, { "epoch": 0.303001973615872, "grad_norm": 0.38144102692604065, "learning_rate": 7.900647020550882e-05, "loss": 1.7706, "step": 2917 }, { "epoch": 0.3031058481354524, "grad_norm": 0.370951771736145, "learning_rate": 7.899317841698864e-05, "loss": 1.7351, "step": 2918 }, { "epoch": 0.3032097226550327, "grad_norm": 0.41188183426856995, "learning_rate": 7.897988354092113e-05, "loss": 1.8509, "step": 2919 }, { "epoch": 0.3033135971746131, "grad_norm": 0.4149079918861389, "learning_rate": 7.896658557872207e-05, "loss": 1.6496, "step": 2920 }, { "epoch": 0.3034174716941934, "grad_norm": 0.38840773701667786, "learning_rate": 7.89532845318076e-05, "loss": 1.4942, "step": 2921 }, { "epoch": 0.30352134621377375, "grad_norm": 0.4095969498157501, "learning_rate": 7.893998040159418e-05, "loss": 1.5979, "step": 2922 }, { "epoch": 0.30362522073335413, "grad_norm": 0.3632447123527527, "learning_rate": 7.892667318949855e-05, "loss": 1.5617, "step": 2923 }, { "epoch": 0.30372909525293446, "grad_norm": 0.34822526574134827, "learning_rate": 7.891336289693789e-05, "loss": 1.6198, "step": 2924 }, { "epoch": 0.3038329697725148, "grad_norm": 0.3828188180923462, "learning_rate": 7.890004952532962e-05, "loss": 1.7494, "step": 2925 }, { "epoch": 0.30393684429209517, "grad_norm": 0.37072688341140747, "learning_rate": 7.888673307609149e-05, "loss": 1.5492, "step": 2926 }, { "epoch": 0.3040407188116755, "grad_norm": 0.3795013427734375, "learning_rate": 7.887341355064162e-05, "loss": 1.6222, "step": 2927 }, { "epoch": 0.3041445933312558, "grad_norm": 0.40049639344215393, "learning_rate": 7.886009095039843e-05, "loss": 1.8504, "step": 2928 }, { "epoch": 0.3042484678508362, "grad_norm": 0.4613747000694275, "learning_rate": 7.884676527678068e-05, "loss": 2.1015, "step": 2929 }, { "epoch": 0.30435234237041653, "grad_norm": 0.3635263442993164, "learning_rate": 7.883343653120743e-05, "loss": 1.6364, "step": 2930 }, { "epoch": 0.3044562168899969, "grad_norm": 0.4116704761981964, "learning_rate": 7.882010471509809e-05, "loss": 1.5827, "step": 2931 }, { "epoch": 0.30456009140957724, "grad_norm": 0.41490453481674194, "learning_rate": 7.88067698298724e-05, "loss": 1.9797, "step": 2932 }, { "epoch": 0.30466396592915757, "grad_norm": 0.3712293207645416, "learning_rate": 7.879343187695045e-05, "loss": 1.7037, "step": 2933 }, { "epoch": 0.30476784044873795, "grad_norm": 0.37144172191619873, "learning_rate": 7.878009085775257e-05, "loss": 1.7411, "step": 2934 }, { "epoch": 0.3048717149683183, "grad_norm": 0.38958555459976196, "learning_rate": 7.876674677369954e-05, "loss": 1.7444, "step": 2935 }, { "epoch": 0.3049755894878986, "grad_norm": 0.3905205726623535, "learning_rate": 7.875339962621235e-05, "loss": 1.8388, "step": 2936 }, { "epoch": 0.305079464007479, "grad_norm": 0.4097367525100708, "learning_rate": 7.874004941671239e-05, "loss": 1.7877, "step": 2937 }, { "epoch": 0.3051833385270593, "grad_norm": 0.39029520750045776, "learning_rate": 7.872669614662135e-05, "loss": 1.7491, "step": 2938 }, { "epoch": 0.30528721304663964, "grad_norm": 0.3893304467201233, "learning_rate": 7.871333981736124e-05, "loss": 1.7892, "step": 2939 }, { "epoch": 0.30539108756622, "grad_norm": 0.39329490065574646, "learning_rate": 7.869998043035442e-05, "loss": 1.7247, "step": 2940 }, { "epoch": 0.30549496208580035, "grad_norm": 0.4152490496635437, "learning_rate": 7.868661798702355e-05, "loss": 1.767, "step": 2941 }, { "epoch": 0.3055988366053807, "grad_norm": 0.40060338377952576, "learning_rate": 7.867325248879163e-05, "loss": 1.6133, "step": 2942 }, { "epoch": 0.30570271112496106, "grad_norm": 0.422410786151886, "learning_rate": 7.865988393708197e-05, "loss": 1.6876, "step": 2943 }, { "epoch": 0.3058065856445414, "grad_norm": 0.4146443009376526, "learning_rate": 7.864651233331823e-05, "loss": 1.7866, "step": 2944 }, { "epoch": 0.30591046016412177, "grad_norm": 0.363182008266449, "learning_rate": 7.863313767892438e-05, "loss": 1.5605, "step": 2945 }, { "epoch": 0.3060143346837021, "grad_norm": 0.4126920998096466, "learning_rate": 7.861975997532471e-05, "loss": 1.8965, "step": 2946 }, { "epoch": 0.3061182092032824, "grad_norm": 0.4038070738315582, "learning_rate": 7.860637922394387e-05, "loss": 1.8668, "step": 2947 }, { "epoch": 0.3062220837228628, "grad_norm": 0.3751528859138489, "learning_rate": 7.859299542620675e-05, "loss": 1.8258, "step": 2948 }, { "epoch": 0.30632595824244313, "grad_norm": 0.422244668006897, "learning_rate": 7.857960858353866e-05, "loss": 1.8309, "step": 2949 }, { "epoch": 0.30642983276202346, "grad_norm": 0.37908682227134705, "learning_rate": 7.85662186973652e-05, "loss": 1.7936, "step": 2950 }, { "epoch": 0.30653370728160384, "grad_norm": 0.36694470047950745, "learning_rate": 7.855282576911224e-05, "loss": 1.7259, "step": 2951 }, { "epoch": 0.30663758180118417, "grad_norm": 0.3961583077907562, "learning_rate": 7.85394298002061e-05, "loss": 1.7949, "step": 2952 }, { "epoch": 0.3067414563207645, "grad_norm": 0.3951317071914673, "learning_rate": 7.852603079207328e-05, "loss": 1.6679, "step": 2953 }, { "epoch": 0.3068453308403449, "grad_norm": 0.3763517737388611, "learning_rate": 7.851262874614069e-05, "loss": 1.6965, "step": 2954 }, { "epoch": 0.3069492053599252, "grad_norm": 0.3869110941886902, "learning_rate": 7.849922366383555e-05, "loss": 1.6044, "step": 2955 }, { "epoch": 0.30705307987950553, "grad_norm": 0.4839021563529968, "learning_rate": 7.848581554658539e-05, "loss": 2.1448, "step": 2956 }, { "epoch": 0.3071569543990859, "grad_norm": 0.4335784912109375, "learning_rate": 7.847240439581806e-05, "loss": 1.7972, "step": 2957 }, { "epoch": 0.30726082891866624, "grad_norm": 0.5460455417633057, "learning_rate": 7.845899021296178e-05, "loss": 1.732, "step": 2958 }, { "epoch": 0.3073647034382466, "grad_norm": 0.39001351594924927, "learning_rate": 7.844557299944501e-05, "loss": 1.7266, "step": 2959 }, { "epoch": 0.30746857795782695, "grad_norm": 0.39050352573394775, "learning_rate": 7.843215275669663e-05, "loss": 1.7042, "step": 2960 }, { "epoch": 0.3075724524774073, "grad_norm": 0.3831944465637207, "learning_rate": 7.841872948614573e-05, "loss": 1.7086, "step": 2961 }, { "epoch": 0.30767632699698766, "grad_norm": 0.4240471422672272, "learning_rate": 7.840530318922181e-05, "loss": 1.7215, "step": 2962 }, { "epoch": 0.307780201516568, "grad_norm": 0.3831408619880676, "learning_rate": 7.839187386735469e-05, "loss": 1.7995, "step": 2963 }, { "epoch": 0.3078840760361483, "grad_norm": 0.43340522050857544, "learning_rate": 7.837844152197447e-05, "loss": 1.7028, "step": 2964 }, { "epoch": 0.3079879505557287, "grad_norm": 0.3613108992576599, "learning_rate": 7.836500615451156e-05, "loss": 1.4274, "step": 2965 }, { "epoch": 0.308091825075309, "grad_norm": 0.4255940914154053, "learning_rate": 7.835156776639679e-05, "loss": 1.5747, "step": 2966 }, { "epoch": 0.30819569959488935, "grad_norm": 0.3656274974346161, "learning_rate": 7.833812635906118e-05, "loss": 1.6061, "step": 2967 }, { "epoch": 0.30829957411446973, "grad_norm": 0.4550713002681732, "learning_rate": 7.832468193393616e-05, "loss": 1.8704, "step": 2968 }, { "epoch": 0.30840344863405006, "grad_norm": 0.40488240122795105, "learning_rate": 7.831123449245345e-05, "loss": 1.7132, "step": 2969 }, { "epoch": 0.30850732315363044, "grad_norm": 0.3986703157424927, "learning_rate": 7.829778403604513e-05, "loss": 1.8511, "step": 2970 }, { "epoch": 0.30861119767321077, "grad_norm": 0.41377493739128113, "learning_rate": 7.828433056614351e-05, "loss": 1.8369, "step": 2971 }, { "epoch": 0.3087150721927911, "grad_norm": 0.3639225959777832, "learning_rate": 7.827087408418132e-05, "loss": 1.7907, "step": 2972 }, { "epoch": 0.3088189467123715, "grad_norm": 0.3887461721897125, "learning_rate": 7.825741459159157e-05, "loss": 1.7639, "step": 2973 }, { "epoch": 0.3089228212319518, "grad_norm": 0.38962462544441223, "learning_rate": 7.824395208980758e-05, "loss": 1.7871, "step": 2974 }, { "epoch": 0.30902669575153213, "grad_norm": 0.43991488218307495, "learning_rate": 7.8230486580263e-05, "loss": 1.988, "step": 2975 }, { "epoch": 0.3091305702711125, "grad_norm": 0.4023156464099884, "learning_rate": 7.821701806439179e-05, "loss": 1.812, "step": 2976 }, { "epoch": 0.30923444479069284, "grad_norm": 0.37170884013175964, "learning_rate": 7.820354654362828e-05, "loss": 1.7231, "step": 2977 }, { "epoch": 0.30933831931027317, "grad_norm": 0.38935086131095886, "learning_rate": 7.819007201940706e-05, "loss": 1.7493, "step": 2978 }, { "epoch": 0.30944219382985355, "grad_norm": 0.3613695204257965, "learning_rate": 7.817659449316305e-05, "loss": 1.621, "step": 2979 }, { "epoch": 0.3095460683494339, "grad_norm": 0.3804933726787567, "learning_rate": 7.816311396633152e-05, "loss": 1.7622, "step": 2980 }, { "epoch": 0.3096499428690142, "grad_norm": 0.3554391860961914, "learning_rate": 7.814963044034802e-05, "loss": 1.615, "step": 2981 }, { "epoch": 0.3097538173885946, "grad_norm": 0.42907533049583435, "learning_rate": 7.813614391664847e-05, "loss": 1.9264, "step": 2982 }, { "epoch": 0.3098576919081749, "grad_norm": 0.46726834774017334, "learning_rate": 7.812265439666906e-05, "loss": 1.6321, "step": 2983 }, { "epoch": 0.3099615664277553, "grad_norm": 0.43149927258491516, "learning_rate": 7.81091618818463e-05, "loss": 1.902, "step": 2984 }, { "epoch": 0.3100654409473356, "grad_norm": 0.42148879170417786, "learning_rate": 7.80956663736171e-05, "loss": 1.8288, "step": 2985 }, { "epoch": 0.31016931546691595, "grad_norm": 0.39359238743782043, "learning_rate": 7.808216787341855e-05, "loss": 1.7853, "step": 2986 }, { "epoch": 0.31027318998649633, "grad_norm": 0.37277668714523315, "learning_rate": 7.806866638268818e-05, "loss": 1.702, "step": 2987 }, { "epoch": 0.31037706450607666, "grad_norm": 0.3854857087135315, "learning_rate": 7.80551619028638e-05, "loss": 1.7776, "step": 2988 }, { "epoch": 0.310480939025657, "grad_norm": 0.3613017201423645, "learning_rate": 7.80416544353835e-05, "loss": 1.6451, "step": 2989 }, { "epoch": 0.31058481354523737, "grad_norm": 0.3885599970817566, "learning_rate": 7.802814398168572e-05, "loss": 1.8862, "step": 2990 }, { "epoch": 0.3106886880648177, "grad_norm": 0.3823319375514984, "learning_rate": 7.801463054320927e-05, "loss": 1.7783, "step": 2991 }, { "epoch": 0.310792562584398, "grad_norm": 0.43862950801849365, "learning_rate": 7.800111412139318e-05, "loss": 1.654, "step": 2992 }, { "epoch": 0.3108964371039784, "grad_norm": 0.4039106070995331, "learning_rate": 7.798759471767684e-05, "loss": 1.7425, "step": 2993 }, { "epoch": 0.31100031162355873, "grad_norm": 0.3942197263240814, "learning_rate": 7.797407233349998e-05, "loss": 1.7184, "step": 2994 }, { "epoch": 0.31110418614313906, "grad_norm": 0.35280346870422363, "learning_rate": 7.796054697030262e-05, "loss": 1.5123, "step": 2995 }, { "epoch": 0.31120806066271944, "grad_norm": 0.38291507959365845, "learning_rate": 7.794701862952512e-05, "loss": 1.7482, "step": 2996 }, { "epoch": 0.31131193518229977, "grad_norm": 0.4096364378929138, "learning_rate": 7.793348731260813e-05, "loss": 1.8343, "step": 2997 }, { "epoch": 0.31141580970188015, "grad_norm": 0.40099769830703735, "learning_rate": 7.79199530209926e-05, "loss": 1.655, "step": 2998 }, { "epoch": 0.3115196842214605, "grad_norm": 0.3833656907081604, "learning_rate": 7.79064157561199e-05, "loss": 1.7954, "step": 2999 }, { "epoch": 0.3116235587410408, "grad_norm": 0.3824611008167267, "learning_rate": 7.789287551943158e-05, "loss": 1.5854, "step": 3000 }, { "epoch": 0.3117274332606212, "grad_norm": 0.3726818561553955, "learning_rate": 7.787933231236958e-05, "loss": 1.6898, "step": 3001 }, { "epoch": 0.3118313077802015, "grad_norm": 0.37149757146835327, "learning_rate": 7.786578613637617e-05, "loss": 1.6463, "step": 3002 }, { "epoch": 0.31193518229978184, "grad_norm": 0.42311716079711914, "learning_rate": 7.78522369928939e-05, "loss": 1.951, "step": 3003 }, { "epoch": 0.3120390568193622, "grad_norm": 0.47437840700149536, "learning_rate": 7.783868488336563e-05, "loss": 1.6597, "step": 3004 }, { "epoch": 0.31214293133894255, "grad_norm": 0.3761157989501953, "learning_rate": 7.782512980923459e-05, "loss": 1.8036, "step": 3005 }, { "epoch": 0.3122468058585229, "grad_norm": 0.3912384510040283, "learning_rate": 7.781157177194426e-05, "loss": 1.6382, "step": 3006 }, { "epoch": 0.31235068037810326, "grad_norm": 0.3848210871219635, "learning_rate": 7.779801077293847e-05, "loss": 1.7521, "step": 3007 }, { "epoch": 0.3124545548976836, "grad_norm": 0.3805733323097229, "learning_rate": 7.778444681366137e-05, "loss": 1.7019, "step": 3008 }, { "epoch": 0.31255842941726397, "grad_norm": 0.38343513011932373, "learning_rate": 7.777087989555741e-05, "loss": 1.7686, "step": 3009 }, { "epoch": 0.3126623039368443, "grad_norm": 0.37739554047584534, "learning_rate": 7.775731002007138e-05, "loss": 1.5242, "step": 3010 }, { "epoch": 0.3127661784564246, "grad_norm": 0.37050339579582214, "learning_rate": 7.774373718864833e-05, "loss": 1.771, "step": 3011 }, { "epoch": 0.312870052976005, "grad_norm": 0.39899301528930664, "learning_rate": 7.773016140273368e-05, "loss": 1.8089, "step": 3012 }, { "epoch": 0.31297392749558534, "grad_norm": 0.3865603506565094, "learning_rate": 7.771658266377315e-05, "loss": 1.6336, "step": 3013 }, { "epoch": 0.31307780201516566, "grad_norm": 0.3612881302833557, "learning_rate": 7.770300097321276e-05, "loss": 1.4883, "step": 3014 }, { "epoch": 0.31318167653474605, "grad_norm": 0.422632098197937, "learning_rate": 7.768941633249884e-05, "loss": 1.8948, "step": 3015 }, { "epoch": 0.31328555105432637, "grad_norm": 0.3684242069721222, "learning_rate": 7.76758287430781e-05, "loss": 1.702, "step": 3016 }, { "epoch": 0.3133894255739067, "grad_norm": 0.36709776520729065, "learning_rate": 7.766223820639747e-05, "loss": 1.687, "step": 3017 }, { "epoch": 0.3134933000934871, "grad_norm": 0.39332762360572815, "learning_rate": 7.764864472390423e-05, "loss": 1.7333, "step": 3018 }, { "epoch": 0.3135971746130674, "grad_norm": 0.3547021448612213, "learning_rate": 7.7635048297046e-05, "loss": 1.6883, "step": 3019 }, { "epoch": 0.31370104913264774, "grad_norm": 0.393284410238266, "learning_rate": 7.762144892727069e-05, "loss": 1.9121, "step": 3020 }, { "epoch": 0.3138049236522281, "grad_norm": 0.4200425148010254, "learning_rate": 7.760784661602653e-05, "loss": 1.8525, "step": 3021 }, { "epoch": 0.31390879817180845, "grad_norm": 0.3398437798023224, "learning_rate": 7.759424136476206e-05, "loss": 1.529, "step": 3022 }, { "epoch": 0.31401267269138883, "grad_norm": 0.3940331041812897, "learning_rate": 7.75806331749261e-05, "loss": 1.8153, "step": 3023 }, { "epoch": 0.31411654721096915, "grad_norm": 0.37523049116134644, "learning_rate": 7.756702204796786e-05, "loss": 1.6807, "step": 3024 }, { "epoch": 0.3142204217305495, "grad_norm": 0.3940492868423462, "learning_rate": 7.75534079853368e-05, "loss": 1.8526, "step": 3025 }, { "epoch": 0.31432429625012986, "grad_norm": 0.40810003876686096, "learning_rate": 7.753979098848272e-05, "loss": 1.6186, "step": 3026 }, { "epoch": 0.3144281707697102, "grad_norm": 0.3908647894859314, "learning_rate": 7.752617105885574e-05, "loss": 1.6896, "step": 3027 }, { "epoch": 0.3145320452892905, "grad_norm": 0.35582828521728516, "learning_rate": 7.751254819790623e-05, "loss": 1.719, "step": 3028 }, { "epoch": 0.3146359198088709, "grad_norm": 0.3710813820362091, "learning_rate": 7.749892240708494e-05, "loss": 1.7155, "step": 3029 }, { "epoch": 0.31473979432845123, "grad_norm": 0.4214909076690674, "learning_rate": 7.748529368784292e-05, "loss": 1.8792, "step": 3030 }, { "epoch": 0.31484366884803155, "grad_norm": 0.4322544038295746, "learning_rate": 7.747166204163152e-05, "loss": 1.8348, "step": 3031 }, { "epoch": 0.31494754336761194, "grad_norm": 0.38530245423316956, "learning_rate": 7.745802746990239e-05, "loss": 1.7037, "step": 3032 }, { "epoch": 0.31505141788719226, "grad_norm": 0.3923969268798828, "learning_rate": 7.744438997410752e-05, "loss": 1.6523, "step": 3033 }, { "epoch": 0.3151552924067726, "grad_norm": 0.3855164349079132, "learning_rate": 7.74307495556992e-05, "loss": 1.705, "step": 3034 }, { "epoch": 0.315259166926353, "grad_norm": 0.3986038267612457, "learning_rate": 7.741710621613002e-05, "loss": 1.9339, "step": 3035 }, { "epoch": 0.3153630414459333, "grad_norm": 0.42609813809394836, "learning_rate": 7.740345995685287e-05, "loss": 1.8712, "step": 3036 }, { "epoch": 0.3154669159655137, "grad_norm": 0.4008728563785553, "learning_rate": 7.738981077932099e-05, "loss": 1.8456, "step": 3037 }, { "epoch": 0.315570790485094, "grad_norm": 0.4060449004173279, "learning_rate": 7.737615868498793e-05, "loss": 1.7126, "step": 3038 }, { "epoch": 0.31567466500467434, "grad_norm": 0.36810052394866943, "learning_rate": 7.736250367530751e-05, "loss": 1.6292, "step": 3039 }, { "epoch": 0.3157785395242547, "grad_norm": 0.39102548360824585, "learning_rate": 7.734884575173387e-05, "loss": 1.7703, "step": 3040 }, { "epoch": 0.31588241404383505, "grad_norm": 0.4772125780582428, "learning_rate": 7.73351849157215e-05, "loss": 1.7719, "step": 3041 }, { "epoch": 0.3159862885634154, "grad_norm": 0.40599432587623596, "learning_rate": 7.732152116872515e-05, "loss": 1.6538, "step": 3042 }, { "epoch": 0.31609016308299576, "grad_norm": 0.4301255941390991, "learning_rate": 7.730785451219991e-05, "loss": 1.755, "step": 3043 }, { "epoch": 0.3161940376025761, "grad_norm": 0.386177122592926, "learning_rate": 7.729418494760119e-05, "loss": 1.5637, "step": 3044 }, { "epoch": 0.3162979121221564, "grad_norm": 0.3636477589607239, "learning_rate": 7.728051247638468e-05, "loss": 1.5991, "step": 3045 }, { "epoch": 0.3164017866417368, "grad_norm": 0.38973626494407654, "learning_rate": 7.726683710000637e-05, "loss": 1.7331, "step": 3046 }, { "epoch": 0.3165056611613171, "grad_norm": 0.42155221104621887, "learning_rate": 7.72531588199226e-05, "loss": 1.8179, "step": 3047 }, { "epoch": 0.3166095356808975, "grad_norm": 0.4188525080680847, "learning_rate": 7.723947763759e-05, "loss": 1.7464, "step": 3048 }, { "epoch": 0.31671341020047783, "grad_norm": 0.38648781180381775, "learning_rate": 7.722579355446551e-05, "loss": 1.5823, "step": 3049 }, { "epoch": 0.31681728472005816, "grad_norm": 0.4101918339729309, "learning_rate": 7.721210657200637e-05, "loss": 1.7697, "step": 3050 }, { "epoch": 0.31692115923963854, "grad_norm": 0.366359680891037, "learning_rate": 7.719841669167014e-05, "loss": 1.6861, "step": 3051 }, { "epoch": 0.31702503375921887, "grad_norm": 0.4024447202682495, "learning_rate": 7.71847239149147e-05, "loss": 1.9079, "step": 3052 }, { "epoch": 0.3171289082787992, "grad_norm": 0.3973731994628906, "learning_rate": 7.717102824319822e-05, "loss": 1.8736, "step": 3053 }, { "epoch": 0.3172327827983796, "grad_norm": 0.42864885926246643, "learning_rate": 7.715732967797915e-05, "loss": 1.7298, "step": 3054 }, { "epoch": 0.3173366573179599, "grad_norm": 0.37840789556503296, "learning_rate": 7.714362822071632e-05, "loss": 1.8096, "step": 3055 }, { "epoch": 0.31744053183754023, "grad_norm": 0.3534778654575348, "learning_rate": 7.712992387286882e-05, "loss": 1.5215, "step": 3056 }, { "epoch": 0.3175444063571206, "grad_norm": 0.37285417318344116, "learning_rate": 7.711621663589602e-05, "loss": 1.6497, "step": 3057 }, { "epoch": 0.31764828087670094, "grad_norm": 0.35734882950782776, "learning_rate": 7.71025065112577e-05, "loss": 1.6053, "step": 3058 }, { "epoch": 0.31775215539628127, "grad_norm": 0.38647857308387756, "learning_rate": 7.708879350041383e-05, "loss": 1.6968, "step": 3059 }, { "epoch": 0.31785602991586165, "grad_norm": 0.4510471522808075, "learning_rate": 7.707507760482474e-05, "loss": 1.6815, "step": 3060 }, { "epoch": 0.317959904435442, "grad_norm": 0.4056614935398102, "learning_rate": 7.706135882595108e-05, "loss": 1.6113, "step": 3061 }, { "epoch": 0.31806377895502236, "grad_norm": 0.4500080645084381, "learning_rate": 7.70476371652538e-05, "loss": 1.9964, "step": 3062 }, { "epoch": 0.3181676534746027, "grad_norm": 0.3800092935562134, "learning_rate": 7.703391262419413e-05, "loss": 1.67, "step": 3063 }, { "epoch": 0.318271527994183, "grad_norm": 0.4021591544151306, "learning_rate": 7.702018520423367e-05, "loss": 1.7399, "step": 3064 }, { "epoch": 0.3183754025137634, "grad_norm": 0.42205873131752014, "learning_rate": 7.700645490683421e-05, "loss": 1.8155, "step": 3065 }, { "epoch": 0.3184792770333437, "grad_norm": 0.3718779385089874, "learning_rate": 7.699272173345799e-05, "loss": 1.7583, "step": 3066 }, { "epoch": 0.31858315155292405, "grad_norm": 0.3647688031196594, "learning_rate": 7.697898568556745e-05, "loss": 1.5891, "step": 3067 }, { "epoch": 0.31868702607250443, "grad_norm": 0.3905600309371948, "learning_rate": 7.696524676462538e-05, "loss": 1.7358, "step": 3068 }, { "epoch": 0.31879090059208476, "grad_norm": 0.36496713757514954, "learning_rate": 7.695150497209485e-05, "loss": 1.7218, "step": 3069 }, { "epoch": 0.3188947751116651, "grad_norm": 0.4626975357532501, "learning_rate": 7.69377603094393e-05, "loss": 1.8209, "step": 3070 }, { "epoch": 0.31899864963124547, "grad_norm": 0.4146973490715027, "learning_rate": 7.692401277812236e-05, "loss": 1.6095, "step": 3071 }, { "epoch": 0.3191025241508258, "grad_norm": 0.3558516800403595, "learning_rate": 7.69102623796081e-05, "loss": 1.5576, "step": 3072 }, { "epoch": 0.3192063986704061, "grad_norm": 0.4161911606788635, "learning_rate": 7.689650911536081e-05, "loss": 1.656, "step": 3073 }, { "epoch": 0.3193102731899865, "grad_norm": 0.40754130482673645, "learning_rate": 7.688275298684509e-05, "loss": 1.7435, "step": 3074 }, { "epoch": 0.31941414770956683, "grad_norm": 0.36731454730033875, "learning_rate": 7.686899399552587e-05, "loss": 1.6421, "step": 3075 }, { "epoch": 0.3195180222291472, "grad_norm": 0.4020611047744751, "learning_rate": 7.685523214286839e-05, "loss": 1.7208, "step": 3076 }, { "epoch": 0.31962189674872754, "grad_norm": 0.48006507754325867, "learning_rate": 7.684146743033815e-05, "loss": 1.7648, "step": 3077 }, { "epoch": 0.31972577126830787, "grad_norm": 0.39849093556404114, "learning_rate": 7.6827699859401e-05, "loss": 1.751, "step": 3078 }, { "epoch": 0.31982964578788825, "grad_norm": 0.3924444615840912, "learning_rate": 7.681392943152308e-05, "loss": 1.7878, "step": 3079 }, { "epoch": 0.3199335203074686, "grad_norm": 0.4005417823791504, "learning_rate": 7.680015614817083e-05, "loss": 1.7752, "step": 3080 }, { "epoch": 0.3200373948270489, "grad_norm": 0.39548689126968384, "learning_rate": 7.678638001081102e-05, "loss": 1.7377, "step": 3081 }, { "epoch": 0.3201412693466293, "grad_norm": 0.4022079408168793, "learning_rate": 7.677260102091066e-05, "loss": 1.7732, "step": 3082 }, { "epoch": 0.3202451438662096, "grad_norm": 0.4020305573940277, "learning_rate": 7.675881917993713e-05, "loss": 1.6768, "step": 3083 }, { "epoch": 0.32034901838578994, "grad_norm": 0.374347448348999, "learning_rate": 7.674503448935808e-05, "loss": 1.59, "step": 3084 }, { "epoch": 0.3204528929053703, "grad_norm": 0.3810427784919739, "learning_rate": 7.673124695064148e-05, "loss": 1.3873, "step": 3085 }, { "epoch": 0.32055676742495065, "grad_norm": 0.43509384989738464, "learning_rate": 7.671745656525559e-05, "loss": 1.5037, "step": 3086 }, { "epoch": 0.32066064194453103, "grad_norm": 0.38209038972854614, "learning_rate": 7.670366333466899e-05, "loss": 1.6842, "step": 3087 }, { "epoch": 0.32076451646411136, "grad_norm": 0.4374238848686218, "learning_rate": 7.668986726035054e-05, "loss": 1.858, "step": 3088 }, { "epoch": 0.3208683909836917, "grad_norm": 0.4977831244468689, "learning_rate": 7.66760683437694e-05, "loss": 1.9369, "step": 3089 }, { "epoch": 0.32097226550327207, "grad_norm": 0.37174275517463684, "learning_rate": 7.666226658639507e-05, "loss": 1.6107, "step": 3090 }, { "epoch": 0.3210761400228524, "grad_norm": 0.3945876657962799, "learning_rate": 7.664846198969733e-05, "loss": 1.8063, "step": 3091 }, { "epoch": 0.3211800145424327, "grad_norm": 0.4560282230377197, "learning_rate": 7.663465455514625e-05, "loss": 2.2025, "step": 3092 }, { "epoch": 0.3212838890620131, "grad_norm": 0.3639695346355438, "learning_rate": 7.662084428421221e-05, "loss": 1.6021, "step": 3093 }, { "epoch": 0.32138776358159343, "grad_norm": 0.36369383335113525, "learning_rate": 7.660703117836591e-05, "loss": 1.6781, "step": 3094 }, { "epoch": 0.32149163810117376, "grad_norm": 0.3726049065589905, "learning_rate": 7.659321523907834e-05, "loss": 1.8033, "step": 3095 }, { "epoch": 0.32159551262075414, "grad_norm": 0.3972417116165161, "learning_rate": 7.657939646782077e-05, "loss": 1.6971, "step": 3096 }, { "epoch": 0.32169938714033447, "grad_norm": 0.4059099555015564, "learning_rate": 7.656557486606482e-05, "loss": 1.7625, "step": 3097 }, { "epoch": 0.3218032616599148, "grad_norm": 0.3809730112552643, "learning_rate": 7.655175043528235e-05, "loss": 1.79, "step": 3098 }, { "epoch": 0.3219071361794952, "grad_norm": 0.37229931354522705, "learning_rate": 7.653792317694556e-05, "loss": 1.7281, "step": 3099 }, { "epoch": 0.3220110106990755, "grad_norm": 0.40562358498573303, "learning_rate": 7.652409309252699e-05, "loss": 1.7644, "step": 3100 }, { "epoch": 0.3221148852186559, "grad_norm": 0.3879106044769287, "learning_rate": 7.651026018349938e-05, "loss": 1.815, "step": 3101 }, { "epoch": 0.3222187597382362, "grad_norm": 0.39501863718032837, "learning_rate": 7.649642445133585e-05, "loss": 1.8004, "step": 3102 }, { "epoch": 0.32232263425781654, "grad_norm": 0.42894792556762695, "learning_rate": 7.64825858975098e-05, "loss": 1.8058, "step": 3103 }, { "epoch": 0.3224265087773969, "grad_norm": 0.4245396852493286, "learning_rate": 7.646874452349491e-05, "loss": 1.7229, "step": 3104 }, { "epoch": 0.32253038329697725, "grad_norm": 0.3750874996185303, "learning_rate": 7.64549003307652e-05, "loss": 1.6437, "step": 3105 }, { "epoch": 0.3226342578165576, "grad_norm": 0.44294023513793945, "learning_rate": 7.644105332079497e-05, "loss": 1.8526, "step": 3106 }, { "epoch": 0.32273813233613796, "grad_norm": 0.4349030554294586, "learning_rate": 7.642720349505881e-05, "loss": 1.8628, "step": 3107 }, { "epoch": 0.3228420068557183, "grad_norm": 0.4126608967781067, "learning_rate": 7.641335085503161e-05, "loss": 1.7873, "step": 3108 }, { "epoch": 0.3229458813752986, "grad_norm": 0.3600351810455322, "learning_rate": 7.639949540218858e-05, "loss": 1.7672, "step": 3109 }, { "epoch": 0.323049755894879, "grad_norm": 0.38123077154159546, "learning_rate": 7.63856371380052e-05, "loss": 1.7417, "step": 3110 }, { "epoch": 0.3231536304144593, "grad_norm": 0.41736799478530884, "learning_rate": 7.63717760639573e-05, "loss": 1.855, "step": 3111 }, { "epoch": 0.3232575049340397, "grad_norm": 0.42737138271331787, "learning_rate": 7.635791218152094e-05, "loss": 1.766, "step": 3112 }, { "epoch": 0.32336137945362003, "grad_norm": 0.37469685077667236, "learning_rate": 7.634404549217254e-05, "loss": 1.6696, "step": 3113 }, { "epoch": 0.32346525397320036, "grad_norm": 0.4223494231700897, "learning_rate": 7.633017599738879e-05, "loss": 1.7205, "step": 3114 }, { "epoch": 0.32356912849278074, "grad_norm": 0.39290282130241394, "learning_rate": 7.631630369864668e-05, "loss": 1.6764, "step": 3115 }, { "epoch": 0.32367300301236107, "grad_norm": 0.38165774941444397, "learning_rate": 7.63024285974235e-05, "loss": 1.633, "step": 3116 }, { "epoch": 0.3237768775319414, "grad_norm": 0.46332311630249023, "learning_rate": 7.628855069519685e-05, "loss": 1.9059, "step": 3117 }, { "epoch": 0.3238807520515218, "grad_norm": 0.38814160227775574, "learning_rate": 7.627466999344459e-05, "loss": 1.6475, "step": 3118 }, { "epoch": 0.3239846265711021, "grad_norm": 0.44505298137664795, "learning_rate": 7.626078649364494e-05, "loss": 1.8901, "step": 3119 }, { "epoch": 0.32408850109068243, "grad_norm": 0.4025139808654785, "learning_rate": 7.624690019727636e-05, "loss": 1.62, "step": 3120 }, { "epoch": 0.3241923756102628, "grad_norm": 0.37177082896232605, "learning_rate": 7.623301110581764e-05, "loss": 1.7613, "step": 3121 }, { "epoch": 0.32429625012984314, "grad_norm": 0.38354530930519104, "learning_rate": 7.621911922074787e-05, "loss": 1.6685, "step": 3122 }, { "epoch": 0.32440012464942347, "grad_norm": 0.404633492231369, "learning_rate": 7.620522454354644e-05, "loss": 1.8067, "step": 3123 }, { "epoch": 0.32450399916900385, "grad_norm": 0.3900499641895294, "learning_rate": 7.619132707569299e-05, "loss": 1.8046, "step": 3124 }, { "epoch": 0.3246078736885842, "grad_norm": 0.4414837062358856, "learning_rate": 7.617742681866749e-05, "loss": 1.9807, "step": 3125 }, { "epoch": 0.32471174820816456, "grad_norm": 0.3875233232975006, "learning_rate": 7.616352377395025e-05, "loss": 1.7041, "step": 3126 }, { "epoch": 0.3248156227277449, "grad_norm": 0.4394824206829071, "learning_rate": 7.614961794302178e-05, "loss": 1.7103, "step": 3127 }, { "epoch": 0.3249194972473252, "grad_norm": 0.38259413838386536, "learning_rate": 7.6135709327363e-05, "loss": 1.6377, "step": 3128 }, { "epoch": 0.3250233717669056, "grad_norm": 0.4169695973396301, "learning_rate": 7.612179792845504e-05, "loss": 1.7518, "step": 3129 }, { "epoch": 0.3251272462864859, "grad_norm": 0.4079383909702301, "learning_rate": 7.610788374777935e-05, "loss": 1.6768, "step": 3130 }, { "epoch": 0.32523112080606625, "grad_norm": 0.4074368476867676, "learning_rate": 7.609396678681771e-05, "loss": 1.8424, "step": 3131 }, { "epoch": 0.32533499532564664, "grad_norm": 0.41162872314453125, "learning_rate": 7.608004704705212e-05, "loss": 1.8505, "step": 3132 }, { "epoch": 0.32543886984522696, "grad_norm": 0.41227778792381287, "learning_rate": 7.606612452996495e-05, "loss": 1.6893, "step": 3133 }, { "epoch": 0.3255427443648073, "grad_norm": 0.3721327781677246, "learning_rate": 7.605219923703886e-05, "loss": 1.7895, "step": 3134 }, { "epoch": 0.3256466188843877, "grad_norm": 0.3553870916366577, "learning_rate": 7.603827116975677e-05, "loss": 1.5966, "step": 3135 }, { "epoch": 0.325750493403968, "grad_norm": 0.35715457797050476, "learning_rate": 7.602434032960189e-05, "loss": 1.7413, "step": 3136 }, { "epoch": 0.3258543679235483, "grad_norm": 0.39415115118026733, "learning_rate": 7.601040671805779e-05, "loss": 1.3362, "step": 3137 }, { "epoch": 0.3259582424431287, "grad_norm": 0.3815273344516754, "learning_rate": 7.599647033660824e-05, "loss": 1.6745, "step": 3138 }, { "epoch": 0.32606211696270904, "grad_norm": 0.3810808062553406, "learning_rate": 7.598253118673738e-05, "loss": 1.6381, "step": 3139 }, { "epoch": 0.3261659914822894, "grad_norm": 0.43945252895355225, "learning_rate": 7.596858926992962e-05, "loss": 1.8387, "step": 3140 }, { "epoch": 0.32626986600186975, "grad_norm": 0.38324853777885437, "learning_rate": 7.595464458766968e-05, "loss": 1.6446, "step": 3141 }, { "epoch": 0.3263737405214501, "grad_norm": 0.4006134569644928, "learning_rate": 7.594069714144252e-05, "loss": 1.4691, "step": 3142 }, { "epoch": 0.32647761504103046, "grad_norm": 0.4041348993778229, "learning_rate": 7.592674693273348e-05, "loss": 1.8108, "step": 3143 }, { "epoch": 0.3265814895606108, "grad_norm": 0.380634605884552, "learning_rate": 7.591279396302812e-05, "loss": 1.7062, "step": 3144 }, { "epoch": 0.3266853640801911, "grad_norm": 0.3991457223892212, "learning_rate": 7.589883823381234e-05, "loss": 1.6658, "step": 3145 }, { "epoch": 0.3267892385997715, "grad_norm": 0.4109683334827423, "learning_rate": 7.58848797465723e-05, "loss": 1.7899, "step": 3146 }, { "epoch": 0.3268931131193518, "grad_norm": 0.38975322246551514, "learning_rate": 7.587091850279447e-05, "loss": 1.7506, "step": 3147 }, { "epoch": 0.32699698763893215, "grad_norm": 0.3871016800403595, "learning_rate": 7.585695450396564e-05, "loss": 1.7103, "step": 3148 }, { "epoch": 0.32710086215851253, "grad_norm": 0.36475732922554016, "learning_rate": 7.584298775157282e-05, "loss": 1.5979, "step": 3149 }, { "epoch": 0.32720473667809286, "grad_norm": 0.3877626359462738, "learning_rate": 7.58290182471034e-05, "loss": 1.8509, "step": 3150 }, { "epoch": 0.32730861119767324, "grad_norm": 0.36147022247314453, "learning_rate": 7.581504599204503e-05, "loss": 1.686, "step": 3151 }, { "epoch": 0.32741248571725357, "grad_norm": 0.38483577966690063, "learning_rate": 7.58010709878856e-05, "loss": 1.8448, "step": 3152 }, { "epoch": 0.3275163602368339, "grad_norm": 0.415414035320282, "learning_rate": 7.578709323611338e-05, "loss": 1.7782, "step": 3153 }, { "epoch": 0.3276202347564143, "grad_norm": 0.3928332030773163, "learning_rate": 7.577311273821687e-05, "loss": 1.7276, "step": 3154 }, { "epoch": 0.3277241092759946, "grad_norm": 0.36280107498168945, "learning_rate": 7.575912949568489e-05, "loss": 1.7094, "step": 3155 }, { "epoch": 0.32782798379557493, "grad_norm": 0.36135435104370117, "learning_rate": 7.574514351000657e-05, "loss": 1.6252, "step": 3156 }, { "epoch": 0.3279318583151553, "grad_norm": 0.38758692145347595, "learning_rate": 7.573115478267126e-05, "loss": 1.7715, "step": 3157 }, { "epoch": 0.32803573283473564, "grad_norm": 0.39279675483703613, "learning_rate": 7.571716331516869e-05, "loss": 1.71, "step": 3158 }, { "epoch": 0.32813960735431597, "grad_norm": 0.36881011724472046, "learning_rate": 7.570316910898882e-05, "loss": 1.6717, "step": 3159 }, { "epoch": 0.32824348187389635, "grad_norm": 0.3746923506259918, "learning_rate": 7.568917216562193e-05, "loss": 1.6144, "step": 3160 }, { "epoch": 0.3283473563934767, "grad_norm": 0.4102190434932709, "learning_rate": 7.56751724865586e-05, "loss": 1.7229, "step": 3161 }, { "epoch": 0.328451230913057, "grad_norm": 0.39784857630729675, "learning_rate": 7.566117007328967e-05, "loss": 1.6128, "step": 3162 }, { "epoch": 0.3285551054326374, "grad_norm": 0.37368080019950867, "learning_rate": 7.564716492730628e-05, "loss": 1.8158, "step": 3163 }, { "epoch": 0.3286589799522177, "grad_norm": 0.4041743576526642, "learning_rate": 7.56331570500999e-05, "loss": 1.5258, "step": 3164 }, { "epoch": 0.3287628544717981, "grad_norm": 0.4261610507965088, "learning_rate": 7.561914644316221e-05, "loss": 1.8395, "step": 3165 }, { "epoch": 0.3288667289913784, "grad_norm": 0.3845587372779846, "learning_rate": 7.560513310798529e-05, "loss": 1.7382, "step": 3166 }, { "epoch": 0.32897060351095875, "grad_norm": 0.37753623723983765, "learning_rate": 7.559111704606142e-05, "loss": 1.7647, "step": 3167 }, { "epoch": 0.32907447803053913, "grad_norm": 0.3923317492008209, "learning_rate": 7.557709825888319e-05, "loss": 1.6671, "step": 3168 }, { "epoch": 0.32917835255011946, "grad_norm": 0.4080863893032074, "learning_rate": 7.556307674794351e-05, "loss": 1.8564, "step": 3169 }, { "epoch": 0.3292822270696998, "grad_norm": 0.38211774826049805, "learning_rate": 7.554905251473557e-05, "loss": 1.8405, "step": 3170 }, { "epoch": 0.32938610158928017, "grad_norm": 0.3729119896888733, "learning_rate": 7.55350255607528e-05, "loss": 1.6871, "step": 3171 }, { "epoch": 0.3294899761088605, "grad_norm": 0.36250361800193787, "learning_rate": 7.5520995887489e-05, "loss": 1.6192, "step": 3172 }, { "epoch": 0.3295938506284408, "grad_norm": 0.4741998314857483, "learning_rate": 7.550696349643822e-05, "loss": 1.9924, "step": 3173 }, { "epoch": 0.3296977251480212, "grad_norm": 0.39271846413612366, "learning_rate": 7.549292838909478e-05, "loss": 1.7255, "step": 3174 }, { "epoch": 0.32980159966760153, "grad_norm": 0.4134219288825989, "learning_rate": 7.547889056695332e-05, "loss": 1.7508, "step": 3175 }, { "epoch": 0.32990547418718186, "grad_norm": 0.377145379781723, "learning_rate": 7.546485003150876e-05, "loss": 1.7239, "step": 3176 }, { "epoch": 0.33000934870676224, "grad_norm": 0.37137743830680847, "learning_rate": 7.54508067842563e-05, "loss": 1.6987, "step": 3177 }, { "epoch": 0.33011322322634257, "grad_norm": 0.36599913239479065, "learning_rate": 7.543676082669146e-05, "loss": 1.675, "step": 3178 }, { "epoch": 0.33021709774592295, "grad_norm": 0.3733290731906891, "learning_rate": 7.542271216030998e-05, "loss": 1.6841, "step": 3179 }, { "epoch": 0.3303209722655033, "grad_norm": 0.3710775375366211, "learning_rate": 7.540866078660797e-05, "loss": 1.7604, "step": 3180 }, { "epoch": 0.3304248467850836, "grad_norm": 0.4162429869174957, "learning_rate": 7.539460670708178e-05, "loss": 1.7021, "step": 3181 }, { "epoch": 0.330528721304664, "grad_norm": 0.4121178090572357, "learning_rate": 7.538054992322806e-05, "loss": 1.885, "step": 3182 }, { "epoch": 0.3306325958242443, "grad_norm": 0.3530847728252411, "learning_rate": 7.536649043654376e-05, "loss": 1.3871, "step": 3183 }, { "epoch": 0.33073647034382464, "grad_norm": 0.42063409090042114, "learning_rate": 7.535242824852608e-05, "loss": 1.7323, "step": 3184 }, { "epoch": 0.330840344863405, "grad_norm": 0.4144652485847473, "learning_rate": 7.533836336067257e-05, "loss": 1.7173, "step": 3185 }, { "epoch": 0.33094421938298535, "grad_norm": 0.3715633749961853, "learning_rate": 7.5324295774481e-05, "loss": 1.6793, "step": 3186 }, { "epoch": 0.3310480939025657, "grad_norm": 0.40452414751052856, "learning_rate": 7.531022549144946e-05, "loss": 1.7443, "step": 3187 }, { "epoch": 0.33115196842214606, "grad_norm": 0.3777833878993988, "learning_rate": 7.529615251307631e-05, "loss": 1.675, "step": 3188 }, { "epoch": 0.3312558429417264, "grad_norm": 0.3736540973186493, "learning_rate": 7.528207684086027e-05, "loss": 1.7026, "step": 3189 }, { "epoch": 0.33135971746130677, "grad_norm": 0.3794771134853363, "learning_rate": 7.526799847630024e-05, "loss": 1.6583, "step": 3190 }, { "epoch": 0.3314635919808871, "grad_norm": 0.38491761684417725, "learning_rate": 7.525391742089547e-05, "loss": 1.6339, "step": 3191 }, { "epoch": 0.3315674665004674, "grad_norm": 0.3660680055618286, "learning_rate": 7.52398336761455e-05, "loss": 1.7295, "step": 3192 }, { "epoch": 0.3316713410200478, "grad_norm": 0.4049983024597168, "learning_rate": 7.522574724355009e-05, "loss": 1.8415, "step": 3193 }, { "epoch": 0.33177521553962813, "grad_norm": 0.4031364917755127, "learning_rate": 7.52116581246094e-05, "loss": 1.8444, "step": 3194 }, { "epoch": 0.33187909005920846, "grad_norm": 0.3920493721961975, "learning_rate": 7.519756632082376e-05, "loss": 1.9211, "step": 3195 }, { "epoch": 0.33198296457878884, "grad_norm": 0.40728676319122314, "learning_rate": 7.518347183369385e-05, "loss": 1.5216, "step": 3196 }, { "epoch": 0.33208683909836917, "grad_norm": 0.3954370617866516, "learning_rate": 7.516937466472065e-05, "loss": 1.6549, "step": 3197 }, { "epoch": 0.3321907136179495, "grad_norm": 0.3614353537559509, "learning_rate": 7.515527481540536e-05, "loss": 1.6972, "step": 3198 }, { "epoch": 0.3322945881375299, "grad_norm": 0.35628658533096313, "learning_rate": 7.514117228724953e-05, "loss": 1.6622, "step": 3199 }, { "epoch": 0.3323984626571102, "grad_norm": 0.3558284342288971, "learning_rate": 7.512706708175496e-05, "loss": 1.5866, "step": 3200 }, { "epoch": 0.33250233717669053, "grad_norm": 0.3872045576572418, "learning_rate": 7.511295920042374e-05, "loss": 1.7721, "step": 3201 }, { "epoch": 0.3326062116962709, "grad_norm": 0.3917664587497711, "learning_rate": 7.509884864475825e-05, "loss": 1.7278, "step": 3202 }, { "epoch": 0.33271008621585124, "grad_norm": 0.3571791648864746, "learning_rate": 7.508473541626115e-05, "loss": 1.7481, "step": 3203 }, { "epoch": 0.3328139607354316, "grad_norm": 0.3802284002304077, "learning_rate": 7.507061951643541e-05, "loss": 1.7085, "step": 3204 }, { "epoch": 0.33291783525501195, "grad_norm": 0.41221603751182556, "learning_rate": 7.505650094678425e-05, "loss": 1.8142, "step": 3205 }, { "epoch": 0.3330217097745923, "grad_norm": 0.41970524191856384, "learning_rate": 7.504237970881118e-05, "loss": 1.7213, "step": 3206 }, { "epoch": 0.33312558429417266, "grad_norm": 0.38147467374801636, "learning_rate": 7.502825580402002e-05, "loss": 1.637, "step": 3207 }, { "epoch": 0.333229458813753, "grad_norm": 0.430268257856369, "learning_rate": 7.501412923391483e-05, "loss": 1.7966, "step": 3208 }, { "epoch": 0.3333333333333333, "grad_norm": 0.422599732875824, "learning_rate": 7.500000000000001e-05, "loss": 1.7097, "step": 3209 }, { "epoch": 0.3334372078529137, "grad_norm": 0.3947455585002899, "learning_rate": 7.498586810378019e-05, "loss": 1.6064, "step": 3210 }, { "epoch": 0.333541082372494, "grad_norm": 0.43581533432006836, "learning_rate": 7.49717335467603e-05, "loss": 1.8244, "step": 3211 }, { "epoch": 0.33364495689207435, "grad_norm": 0.42619240283966064, "learning_rate": 7.49575963304456e-05, "loss": 1.8339, "step": 3212 }, { "epoch": 0.33374883141165473, "grad_norm": 0.4488205015659332, "learning_rate": 7.494345645634156e-05, "loss": 1.6953, "step": 3213 }, { "epoch": 0.33385270593123506, "grad_norm": 0.35742267966270447, "learning_rate": 7.492931392595397e-05, "loss": 1.7246, "step": 3214 }, { "epoch": 0.3339565804508154, "grad_norm": 0.37364768981933594, "learning_rate": 7.49151687407889e-05, "loss": 1.6528, "step": 3215 }, { "epoch": 0.33406045497039577, "grad_norm": 0.3801439702510834, "learning_rate": 7.49010209023527e-05, "loss": 1.5913, "step": 3216 }, { "epoch": 0.3341643294899761, "grad_norm": 0.37618300318717957, "learning_rate": 7.488687041215202e-05, "loss": 1.609, "step": 3217 }, { "epoch": 0.3342682040095565, "grad_norm": 0.37247294187545776, "learning_rate": 7.487271727169377e-05, "loss": 1.6473, "step": 3218 }, { "epoch": 0.3343720785291368, "grad_norm": 0.3996807336807251, "learning_rate": 7.485856148248513e-05, "loss": 1.7744, "step": 3219 }, { "epoch": 0.33447595304871713, "grad_norm": 0.3898894488811493, "learning_rate": 7.484440304603362e-05, "loss": 1.8776, "step": 3220 }, { "epoch": 0.3345798275682975, "grad_norm": 0.4372991621494293, "learning_rate": 7.483024196384696e-05, "loss": 1.8293, "step": 3221 }, { "epoch": 0.33468370208787784, "grad_norm": 0.4678659439086914, "learning_rate": 7.481607823743321e-05, "loss": 1.8113, "step": 3222 }, { "epoch": 0.33478757660745817, "grad_norm": 0.38630014657974243, "learning_rate": 7.480191186830072e-05, "loss": 1.6579, "step": 3223 }, { "epoch": 0.33489145112703855, "grad_norm": 0.4188450276851654, "learning_rate": 7.478774285795805e-05, "loss": 1.9302, "step": 3224 }, { "epoch": 0.3349953256466189, "grad_norm": 0.3890570402145386, "learning_rate": 7.477357120791415e-05, "loss": 1.8298, "step": 3225 }, { "epoch": 0.3350992001661992, "grad_norm": 0.4579260051250458, "learning_rate": 7.475939691967814e-05, "loss": 1.673, "step": 3226 }, { "epoch": 0.3352030746857796, "grad_norm": 0.39062821865081787, "learning_rate": 7.47452199947595e-05, "loss": 1.7051, "step": 3227 }, { "epoch": 0.3353069492053599, "grad_norm": 0.37739062309265137, "learning_rate": 7.473104043466794e-05, "loss": 1.6151, "step": 3228 }, { "epoch": 0.3354108237249403, "grad_norm": 0.3767510950565338, "learning_rate": 7.471685824091348e-05, "loss": 1.7629, "step": 3229 }, { "epoch": 0.3355146982445206, "grad_norm": 0.41610831022262573, "learning_rate": 7.470267341500642e-05, "loss": 1.7987, "step": 3230 }, { "epoch": 0.33561857276410095, "grad_norm": 0.3895459771156311, "learning_rate": 7.468848595845733e-05, "loss": 1.5922, "step": 3231 }, { "epoch": 0.33572244728368134, "grad_norm": 0.3886757493019104, "learning_rate": 7.467429587277705e-05, "loss": 1.6534, "step": 3232 }, { "epoch": 0.33582632180326166, "grad_norm": 0.37435784935951233, "learning_rate": 7.466010315947676e-05, "loss": 1.7319, "step": 3233 }, { "epoch": 0.335930196322842, "grad_norm": 0.4168533682823181, "learning_rate": 7.464590782006782e-05, "loss": 1.8126, "step": 3234 }, { "epoch": 0.33603407084242237, "grad_norm": 0.3834715187549591, "learning_rate": 7.463170985606194e-05, "loss": 1.7701, "step": 3235 }, { "epoch": 0.3361379453620027, "grad_norm": 0.44510364532470703, "learning_rate": 7.46175092689711e-05, "loss": 1.8166, "step": 3236 }, { "epoch": 0.336241819881583, "grad_norm": 0.41382500529289246, "learning_rate": 7.460330606030754e-05, "loss": 1.9408, "step": 3237 }, { "epoch": 0.3363456944011634, "grad_norm": 0.3592391014099121, "learning_rate": 7.45891002315838e-05, "loss": 1.6185, "step": 3238 }, { "epoch": 0.33644956892074374, "grad_norm": 0.36257967352867126, "learning_rate": 7.457489178431269e-05, "loss": 1.5973, "step": 3239 }, { "epoch": 0.33655344344032406, "grad_norm": 0.368685245513916, "learning_rate": 7.45606807200073e-05, "loss": 1.5255, "step": 3240 }, { "epoch": 0.33665731795990445, "grad_norm": 0.3916224241256714, "learning_rate": 7.4546467040181e-05, "loss": 1.6482, "step": 3241 }, { "epoch": 0.33676119247948477, "grad_norm": 0.36512747406959534, "learning_rate": 7.453225074634742e-05, "loss": 1.5935, "step": 3242 }, { "epoch": 0.33686506699906515, "grad_norm": 0.40340378880500793, "learning_rate": 7.451803184002048e-05, "loss": 1.8217, "step": 3243 }, { "epoch": 0.3369689415186455, "grad_norm": 0.4013363718986511, "learning_rate": 7.450381032271442e-05, "loss": 1.698, "step": 3244 }, { "epoch": 0.3370728160382258, "grad_norm": 0.4022335708141327, "learning_rate": 7.44895861959437e-05, "loss": 1.7154, "step": 3245 }, { "epoch": 0.3371766905578062, "grad_norm": 0.4084652364253998, "learning_rate": 7.447535946122304e-05, "loss": 1.6902, "step": 3246 }, { "epoch": 0.3372805650773865, "grad_norm": 0.41394492983818054, "learning_rate": 7.446113012006756e-05, "loss": 1.6136, "step": 3247 }, { "epoch": 0.33738443959696685, "grad_norm": 0.3942053020000458, "learning_rate": 7.444689817399251e-05, "loss": 1.7797, "step": 3248 }, { "epoch": 0.33748831411654723, "grad_norm": 0.41565534472465515, "learning_rate": 7.443266362451349e-05, "loss": 1.7013, "step": 3249 }, { "epoch": 0.33759218863612755, "grad_norm": 0.3893018960952759, "learning_rate": 7.441842647314638e-05, "loss": 1.7065, "step": 3250 }, { "epoch": 0.3376960631557079, "grad_norm": 0.4059962034225464, "learning_rate": 7.440418672140733e-05, "loss": 1.7509, "step": 3251 }, { "epoch": 0.33779993767528826, "grad_norm": 0.4266146719455719, "learning_rate": 7.438994437081272e-05, "loss": 1.7629, "step": 3252 }, { "epoch": 0.3379038121948686, "grad_norm": 0.37736377120018005, "learning_rate": 7.437569942287932e-05, "loss": 1.8509, "step": 3253 }, { "epoch": 0.3380076867144489, "grad_norm": 0.3954285681247711, "learning_rate": 7.436145187912406e-05, "loss": 1.6512, "step": 3254 }, { "epoch": 0.3381115612340293, "grad_norm": 0.3633100092411041, "learning_rate": 7.43472017410642e-05, "loss": 1.5277, "step": 3255 }, { "epoch": 0.33821543575360963, "grad_norm": 0.3525071144104004, "learning_rate": 7.433294901021727e-05, "loss": 1.5944, "step": 3256 }, { "epoch": 0.33831931027319, "grad_norm": 0.4135834574699402, "learning_rate": 7.431869368810106e-05, "loss": 1.6653, "step": 3257 }, { "epoch": 0.33842318479277034, "grad_norm": 0.40316396951675415, "learning_rate": 7.430443577623369e-05, "loss": 1.7842, "step": 3258 }, { "epoch": 0.33852705931235066, "grad_norm": 0.3851187825202942, "learning_rate": 7.429017527613347e-05, "loss": 1.721, "step": 3259 }, { "epoch": 0.33863093383193105, "grad_norm": 0.4158236086368561, "learning_rate": 7.427591218931906e-05, "loss": 1.9518, "step": 3260 }, { "epoch": 0.3387348083515114, "grad_norm": 0.3790571689605713, "learning_rate": 7.426164651730937e-05, "loss": 1.5893, "step": 3261 }, { "epoch": 0.3388386828710917, "grad_norm": 0.37710022926330566, "learning_rate": 7.424737826162358e-05, "loss": 1.6549, "step": 3262 }, { "epoch": 0.3389425573906721, "grad_norm": 0.3882453441619873, "learning_rate": 7.423310742378113e-05, "loss": 1.6472, "step": 3263 }, { "epoch": 0.3390464319102524, "grad_norm": 0.39669013023376465, "learning_rate": 7.421883400530176e-05, "loss": 1.7416, "step": 3264 }, { "epoch": 0.33915030642983274, "grad_norm": 0.41558176279067993, "learning_rate": 7.42045580077055e-05, "loss": 1.6492, "step": 3265 }, { "epoch": 0.3392541809494131, "grad_norm": 0.4130480885505676, "learning_rate": 7.41902794325126e-05, "loss": 1.7518, "step": 3266 }, { "epoch": 0.33935805546899345, "grad_norm": 0.398885577917099, "learning_rate": 7.417599828124363e-05, "loss": 1.6747, "step": 3267 }, { "epoch": 0.33946192998857383, "grad_norm": 0.39864227175712585, "learning_rate": 7.416171455541943e-05, "loss": 1.7727, "step": 3268 }, { "epoch": 0.33956580450815416, "grad_norm": 0.4171668589115143, "learning_rate": 7.414742825656111e-05, "loss": 1.6616, "step": 3269 }, { "epoch": 0.3396696790277345, "grad_norm": 0.379443883895874, "learning_rate": 7.413313938619003e-05, "loss": 1.8302, "step": 3270 }, { "epoch": 0.33977355354731487, "grad_norm": 0.3963342607021332, "learning_rate": 7.411884794582784e-05, "loss": 1.7629, "step": 3271 }, { "epoch": 0.3398774280668952, "grad_norm": 0.3690603971481323, "learning_rate": 7.410455393699647e-05, "loss": 1.6549, "step": 3272 }, { "epoch": 0.3399813025864755, "grad_norm": 0.36229610443115234, "learning_rate": 7.409025736121816e-05, "loss": 1.6949, "step": 3273 }, { "epoch": 0.3400851771060559, "grad_norm": 0.3683745265007019, "learning_rate": 7.407595822001532e-05, "loss": 1.8114, "step": 3274 }, { "epoch": 0.34018905162563623, "grad_norm": 0.3948541283607483, "learning_rate": 7.406165651491075e-05, "loss": 1.6883, "step": 3275 }, { "epoch": 0.34029292614521656, "grad_norm": 0.4150826036930084, "learning_rate": 7.404735224742745e-05, "loss": 1.7254, "step": 3276 }, { "epoch": 0.34039680066479694, "grad_norm": 0.4049845337867737, "learning_rate": 7.40330454190887e-05, "loss": 1.6418, "step": 3277 }, { "epoch": 0.34050067518437727, "grad_norm": 0.37728235125541687, "learning_rate": 7.401873603141809e-05, "loss": 1.8138, "step": 3278 }, { "epoch": 0.3406045497039576, "grad_norm": 0.4334777891635895, "learning_rate": 7.400442408593944e-05, "loss": 1.9727, "step": 3279 }, { "epoch": 0.340708424223538, "grad_norm": 0.395805299282074, "learning_rate": 7.399010958417684e-05, "loss": 1.7511, "step": 3280 }, { "epoch": 0.3408122987431183, "grad_norm": 0.3635464310646057, "learning_rate": 7.397579252765475e-05, "loss": 1.7119, "step": 3281 }, { "epoch": 0.3409161732626987, "grad_norm": 0.3630931079387665, "learning_rate": 7.396147291789775e-05, "loss": 1.7253, "step": 3282 }, { "epoch": 0.341020047782279, "grad_norm": 0.37721481919288635, "learning_rate": 7.394715075643078e-05, "loss": 1.7831, "step": 3283 }, { "epoch": 0.34112392230185934, "grad_norm": 0.41346338391304016, "learning_rate": 7.393282604477907e-05, "loss": 1.8239, "step": 3284 }, { "epoch": 0.3412277968214397, "grad_norm": 0.3926272392272949, "learning_rate": 7.391849878446805e-05, "loss": 1.8158, "step": 3285 }, { "epoch": 0.34133167134102005, "grad_norm": 0.37988725304603577, "learning_rate": 7.39041689770235e-05, "loss": 1.6549, "step": 3286 }, { "epoch": 0.3414355458606004, "grad_norm": 0.46685877442359924, "learning_rate": 7.388983662397139e-05, "loss": 1.8314, "step": 3287 }, { "epoch": 0.34153942038018076, "grad_norm": 0.38952314853668213, "learning_rate": 7.387550172683803e-05, "loss": 1.9213, "step": 3288 }, { "epoch": 0.3416432948997611, "grad_norm": 0.3613092005252838, "learning_rate": 7.386116428715e-05, "loss": 1.7343, "step": 3289 }, { "epoch": 0.3417471694193414, "grad_norm": 0.3758079707622528, "learning_rate": 7.384682430643409e-05, "loss": 1.6233, "step": 3290 }, { "epoch": 0.3418510439389218, "grad_norm": 0.36782777309417725, "learning_rate": 7.383248178621739e-05, "loss": 1.7206, "step": 3291 }, { "epoch": 0.3419549184585021, "grad_norm": 0.36162370443344116, "learning_rate": 7.381813672802729e-05, "loss": 1.6387, "step": 3292 }, { "epoch": 0.34205879297808245, "grad_norm": 0.3870641887187958, "learning_rate": 7.380378913339142e-05, "loss": 1.7021, "step": 3293 }, { "epoch": 0.34216266749766283, "grad_norm": 0.366452157497406, "learning_rate": 7.378943900383766e-05, "loss": 1.6213, "step": 3294 }, { "epoch": 0.34226654201724316, "grad_norm": 0.40911996364593506, "learning_rate": 7.377508634089422e-05, "loss": 1.8177, "step": 3295 }, { "epoch": 0.34237041653682354, "grad_norm": 0.396062433719635, "learning_rate": 7.376073114608955e-05, "loss": 1.7282, "step": 3296 }, { "epoch": 0.34247429105640387, "grad_norm": 0.4013812839984894, "learning_rate": 7.374637342095236e-05, "loss": 1.7302, "step": 3297 }, { "epoch": 0.3425781655759842, "grad_norm": 0.37226590514183044, "learning_rate": 7.373201316701162e-05, "loss": 1.6312, "step": 3298 }, { "epoch": 0.3426820400955646, "grad_norm": 0.39679157733917236, "learning_rate": 7.371765038579658e-05, "loss": 1.6591, "step": 3299 }, { "epoch": 0.3427859146151449, "grad_norm": 0.4248996078968048, "learning_rate": 7.37032850788368e-05, "loss": 1.8358, "step": 3300 }, { "epoch": 0.34288978913472523, "grad_norm": 0.37451761960983276, "learning_rate": 7.368891724766204e-05, "loss": 1.7225, "step": 3301 }, { "epoch": 0.3429936636543056, "grad_norm": 0.38723859190940857, "learning_rate": 7.367454689380238e-05, "loss": 1.5813, "step": 3302 }, { "epoch": 0.34309753817388594, "grad_norm": 0.4239073395729065, "learning_rate": 7.366017401878813e-05, "loss": 1.8608, "step": 3303 }, { "epoch": 0.34320141269346627, "grad_norm": 0.39057475328445435, "learning_rate": 7.364579862414993e-05, "loss": 1.7135, "step": 3304 }, { "epoch": 0.34330528721304665, "grad_norm": 0.4171145558357239, "learning_rate": 7.36314207114186e-05, "loss": 1.8721, "step": 3305 }, { "epoch": 0.343409161732627, "grad_norm": 0.4564177095890045, "learning_rate": 7.36170402821253e-05, "loss": 1.6559, "step": 3306 }, { "epoch": 0.34351303625220736, "grad_norm": 0.4414724111557007, "learning_rate": 7.360265733780142e-05, "loss": 1.7733, "step": 3307 }, { "epoch": 0.3436169107717877, "grad_norm": 0.35561293363571167, "learning_rate": 7.358827187997867e-05, "loss": 1.5218, "step": 3308 }, { "epoch": 0.343720785291368, "grad_norm": 0.41624075174331665, "learning_rate": 7.357388391018893e-05, "loss": 1.6683, "step": 3309 }, { "epoch": 0.3438246598109484, "grad_norm": 0.38379234075546265, "learning_rate": 7.355949342996447e-05, "loss": 1.7642, "step": 3310 }, { "epoch": 0.3439285343305287, "grad_norm": 0.40274274349212646, "learning_rate": 7.35451004408377e-05, "loss": 1.7529, "step": 3311 }, { "epoch": 0.34403240885010905, "grad_norm": 0.4023614525794983, "learning_rate": 7.353070494434141e-05, "loss": 1.77, "step": 3312 }, { "epoch": 0.34413628336968943, "grad_norm": 0.41878741979599, "learning_rate": 7.351630694200857e-05, "loss": 1.7528, "step": 3313 }, { "epoch": 0.34424015788926976, "grad_norm": 0.3700146973133087, "learning_rate": 7.35019064353725e-05, "loss": 1.677, "step": 3314 }, { "epoch": 0.3443440324088501, "grad_norm": 0.44311147928237915, "learning_rate": 7.348750342596671e-05, "loss": 1.9235, "step": 3315 }, { "epoch": 0.34444790692843047, "grad_norm": 0.4150424599647522, "learning_rate": 7.3473097915325e-05, "loss": 1.6732, "step": 3316 }, { "epoch": 0.3445517814480108, "grad_norm": 0.3866238594055176, "learning_rate": 7.345868990498146e-05, "loss": 1.6598, "step": 3317 }, { "epoch": 0.3446556559675911, "grad_norm": 0.42701780796051025, "learning_rate": 7.344427939647045e-05, "loss": 1.7695, "step": 3318 }, { "epoch": 0.3447595304871715, "grad_norm": 0.3794342577457428, "learning_rate": 7.342986639132654e-05, "loss": 1.6552, "step": 3319 }, { "epoch": 0.34486340500675183, "grad_norm": 0.4094541668891907, "learning_rate": 7.341545089108464e-05, "loss": 1.9607, "step": 3320 }, { "epoch": 0.3449672795263322, "grad_norm": 0.4172968566417694, "learning_rate": 7.340103289727983e-05, "loss": 1.7933, "step": 3321 }, { "epoch": 0.34507115404591254, "grad_norm": 0.402832955121994, "learning_rate": 7.338661241144758e-05, "loss": 1.8795, "step": 3322 }, { "epoch": 0.34517502856549287, "grad_norm": 0.38485434651374817, "learning_rate": 7.337218943512353e-05, "loss": 1.764, "step": 3323 }, { "epoch": 0.34527890308507325, "grad_norm": 0.4096323847770691, "learning_rate": 7.335776396984361e-05, "loss": 1.814, "step": 3324 }, { "epoch": 0.3453827776046536, "grad_norm": 0.347913920879364, "learning_rate": 7.334333601714403e-05, "loss": 1.5926, "step": 3325 }, { "epoch": 0.3454866521242339, "grad_norm": 0.404429167509079, "learning_rate": 7.332890557856125e-05, "loss": 1.8246, "step": 3326 }, { "epoch": 0.3455905266438143, "grad_norm": 0.3735859990119934, "learning_rate": 7.331447265563197e-05, "loss": 1.5425, "step": 3327 }, { "epoch": 0.3456944011633946, "grad_norm": 0.35603833198547363, "learning_rate": 7.330003724989324e-05, "loss": 1.5314, "step": 3328 }, { "epoch": 0.34579827568297494, "grad_norm": 0.3887482285499573, "learning_rate": 7.328559936288228e-05, "loss": 1.688, "step": 3329 }, { "epoch": 0.3459021502025553, "grad_norm": 0.4250495731830597, "learning_rate": 7.327115899613662e-05, "loss": 1.8245, "step": 3330 }, { "epoch": 0.34600602472213565, "grad_norm": 0.36763063073158264, "learning_rate": 7.325671615119407e-05, "loss": 1.4173, "step": 3331 }, { "epoch": 0.34610989924171603, "grad_norm": 0.3886317312717438, "learning_rate": 7.324227082959264e-05, "loss": 1.7182, "step": 3332 }, { "epoch": 0.34621377376129636, "grad_norm": 0.4716704487800598, "learning_rate": 7.322782303287067e-05, "loss": 1.6838, "step": 3333 }, { "epoch": 0.3463176482808767, "grad_norm": 0.3932298719882965, "learning_rate": 7.321337276256674e-05, "loss": 1.8532, "step": 3334 }, { "epoch": 0.34642152280045707, "grad_norm": 0.4240279793739319, "learning_rate": 7.319892002021966e-05, "loss": 1.7919, "step": 3335 }, { "epoch": 0.3465253973200374, "grad_norm": 0.41877415776252747, "learning_rate": 7.318446480736857e-05, "loss": 1.6985, "step": 3336 }, { "epoch": 0.3466292718396177, "grad_norm": 0.45144563913345337, "learning_rate": 7.317000712555283e-05, "loss": 1.9947, "step": 3337 }, { "epoch": 0.3467331463591981, "grad_norm": 0.3652666509151459, "learning_rate": 7.315554697631205e-05, "loss": 1.7206, "step": 3338 }, { "epoch": 0.34683702087877843, "grad_norm": 0.3598730266094208, "learning_rate": 7.314108436118614e-05, "loss": 1.6871, "step": 3339 }, { "epoch": 0.34694089539835876, "grad_norm": 0.3652264177799225, "learning_rate": 7.312661928171525e-05, "loss": 1.5859, "step": 3340 }, { "epoch": 0.34704476991793914, "grad_norm": 0.36526259779930115, "learning_rate": 7.311215173943982e-05, "loss": 1.5721, "step": 3341 }, { "epoch": 0.34714864443751947, "grad_norm": 0.3666329085826874, "learning_rate": 7.309768173590049e-05, "loss": 1.6762, "step": 3342 }, { "epoch": 0.3472525189570998, "grad_norm": 0.39144429564476013, "learning_rate": 7.308320927263823e-05, "loss": 1.6709, "step": 3343 }, { "epoch": 0.3473563934766802, "grad_norm": 0.41937124729156494, "learning_rate": 7.306873435119423e-05, "loss": 1.7338, "step": 3344 }, { "epoch": 0.3474602679962605, "grad_norm": 0.38105306029319763, "learning_rate": 7.305425697311e-05, "loss": 1.5917, "step": 3345 }, { "epoch": 0.3475641425158409, "grad_norm": 0.3741595149040222, "learning_rate": 7.30397771399272e-05, "loss": 1.6511, "step": 3346 }, { "epoch": 0.3476680170354212, "grad_norm": 0.399387389421463, "learning_rate": 7.302529485318786e-05, "loss": 1.6948, "step": 3347 }, { "epoch": 0.34777189155500154, "grad_norm": 0.3723578155040741, "learning_rate": 7.301081011443423e-05, "loss": 1.6319, "step": 3348 }, { "epoch": 0.3478757660745819, "grad_norm": 0.37226778268814087, "learning_rate": 7.29963229252088e-05, "loss": 1.7856, "step": 3349 }, { "epoch": 0.34797964059416225, "grad_norm": 0.38649681210517883, "learning_rate": 7.298183328705436e-05, "loss": 1.65, "step": 3350 }, { "epoch": 0.3480835151137426, "grad_norm": 0.3898165822029114, "learning_rate": 7.296734120151394e-05, "loss": 1.8236, "step": 3351 }, { "epoch": 0.34818738963332296, "grad_norm": 0.37378284335136414, "learning_rate": 7.295284667013083e-05, "loss": 1.5903, "step": 3352 }, { "epoch": 0.3482912641529033, "grad_norm": 0.39997485280036926, "learning_rate": 7.293834969444861e-05, "loss": 1.8107, "step": 3353 }, { "epoch": 0.3483951386724836, "grad_norm": 0.37795597314834595, "learning_rate": 7.292385027601105e-05, "loss": 1.7677, "step": 3354 }, { "epoch": 0.348499013192064, "grad_norm": 0.3679714500904083, "learning_rate": 7.290934841636225e-05, "loss": 1.6464, "step": 3355 }, { "epoch": 0.3486028877116443, "grad_norm": 0.3939383029937744, "learning_rate": 7.289484411704656e-05, "loss": 1.7565, "step": 3356 }, { "epoch": 0.34870676223122465, "grad_norm": 0.38755881786346436, "learning_rate": 7.288033737960855e-05, "loss": 1.5508, "step": 3357 }, { "epoch": 0.34881063675080504, "grad_norm": 0.3855699598789215, "learning_rate": 7.286582820559308e-05, "loss": 1.6979, "step": 3358 }, { "epoch": 0.34891451127038536, "grad_norm": 0.37389200925827026, "learning_rate": 7.285131659654527e-05, "loss": 1.703, "step": 3359 }, { "epoch": 0.34901838578996575, "grad_norm": 0.3579116761684418, "learning_rate": 7.283680255401049e-05, "loss": 1.6348, "step": 3360 }, { "epoch": 0.3491222603095461, "grad_norm": 0.39986559748649597, "learning_rate": 7.282228607953436e-05, "loss": 1.7792, "step": 3361 }, { "epoch": 0.3492261348291264, "grad_norm": 0.3745286166667938, "learning_rate": 7.28077671746628e-05, "loss": 1.7119, "step": 3362 }, { "epoch": 0.3493300093487068, "grad_norm": 0.45481473207473755, "learning_rate": 7.279324584094194e-05, "loss": 1.8279, "step": 3363 }, { "epoch": 0.3494338838682871, "grad_norm": 0.42257899045944214, "learning_rate": 7.277872207991818e-05, "loss": 1.6366, "step": 3364 }, { "epoch": 0.34953775838786744, "grad_norm": 0.3691715896129608, "learning_rate": 7.276419589313821e-05, "loss": 1.6741, "step": 3365 }, { "epoch": 0.3496416329074478, "grad_norm": 0.36931112408638, "learning_rate": 7.274966728214895e-05, "loss": 1.7447, "step": 3366 }, { "epoch": 0.34974550742702815, "grad_norm": 0.4134025573730469, "learning_rate": 7.273513624849757e-05, "loss": 1.9171, "step": 3367 }, { "epoch": 0.3498493819466085, "grad_norm": 0.4000683128833771, "learning_rate": 7.272060279373152e-05, "loss": 1.7925, "step": 3368 }, { "epoch": 0.34995325646618886, "grad_norm": 0.3902685344219208, "learning_rate": 7.27060669193985e-05, "loss": 1.6959, "step": 3369 }, { "epoch": 0.3500571309857692, "grad_norm": 0.40209463238716125, "learning_rate": 7.269152862704647e-05, "loss": 1.7667, "step": 3370 }, { "epoch": 0.35016100550534957, "grad_norm": 0.46758002042770386, "learning_rate": 7.267698791822363e-05, "loss": 1.8386, "step": 3371 }, { "epoch": 0.3502648800249299, "grad_norm": 0.4505844712257385, "learning_rate": 7.266244479447847e-05, "loss": 1.9704, "step": 3372 }, { "epoch": 0.3503687545445102, "grad_norm": 0.4215162694454193, "learning_rate": 7.264789925735974e-05, "loss": 1.9084, "step": 3373 }, { "epoch": 0.3504726290640906, "grad_norm": 0.4192196726799011, "learning_rate": 7.263335130841638e-05, "loss": 1.7802, "step": 3374 }, { "epoch": 0.35057650358367093, "grad_norm": 0.4384503662586212, "learning_rate": 7.261880094919765e-05, "loss": 1.6945, "step": 3375 }, { "epoch": 0.35068037810325126, "grad_norm": 0.4039691984653473, "learning_rate": 7.260424818125305e-05, "loss": 1.7436, "step": 3376 }, { "epoch": 0.35078425262283164, "grad_norm": 0.3905966579914093, "learning_rate": 7.258969300613233e-05, "loss": 1.7358, "step": 3377 }, { "epoch": 0.35088812714241197, "grad_norm": 0.4261592626571655, "learning_rate": 7.257513542538553e-05, "loss": 1.8172, "step": 3378 }, { "epoch": 0.3509920016619923, "grad_norm": 0.39150550961494446, "learning_rate": 7.25605754405629e-05, "loss": 1.6052, "step": 3379 }, { "epoch": 0.3510958761815727, "grad_norm": 0.3744257092475891, "learning_rate": 7.254601305321496e-05, "loss": 1.7486, "step": 3380 }, { "epoch": 0.351199750701153, "grad_norm": 0.36264219880104065, "learning_rate": 7.253144826489249e-05, "loss": 1.6068, "step": 3381 }, { "epoch": 0.35130362522073333, "grad_norm": 0.35563141107559204, "learning_rate": 7.251688107714654e-05, "loss": 1.6542, "step": 3382 }, { "epoch": 0.3514074997403137, "grad_norm": 0.39678165316581726, "learning_rate": 7.250231149152838e-05, "loss": 1.8309, "step": 3383 }, { "epoch": 0.35151137425989404, "grad_norm": 0.3613341748714447, "learning_rate": 7.248773950958958e-05, "loss": 1.735, "step": 3384 }, { "epoch": 0.3516152487794744, "grad_norm": 0.3558713495731354, "learning_rate": 7.247316513288192e-05, "loss": 1.5545, "step": 3385 }, { "epoch": 0.35171912329905475, "grad_norm": 0.3634355366230011, "learning_rate": 7.245858836295749e-05, "loss": 1.748, "step": 3386 }, { "epoch": 0.3518229978186351, "grad_norm": 0.3754010796546936, "learning_rate": 7.244400920136858e-05, "loss": 1.703, "step": 3387 }, { "epoch": 0.35192687233821546, "grad_norm": 0.380953848361969, "learning_rate": 7.242942764966776e-05, "loss": 1.6779, "step": 3388 }, { "epoch": 0.3520307468577958, "grad_norm": 0.4804019033908844, "learning_rate": 7.241484370940784e-05, "loss": 1.7014, "step": 3389 }, { "epoch": 0.3521346213773761, "grad_norm": 0.41277310252189636, "learning_rate": 7.240025738214192e-05, "loss": 1.6532, "step": 3390 }, { "epoch": 0.3522384958969565, "grad_norm": 0.4273497760295868, "learning_rate": 7.238566866942332e-05, "loss": 1.9818, "step": 3391 }, { "epoch": 0.3523423704165368, "grad_norm": 0.38524994254112244, "learning_rate": 7.23710775728056e-05, "loss": 1.7702, "step": 3392 }, { "epoch": 0.35244624493611715, "grad_norm": 0.37887367606163025, "learning_rate": 7.235648409384263e-05, "loss": 1.6614, "step": 3393 }, { "epoch": 0.35255011945569753, "grad_norm": 0.3723335564136505, "learning_rate": 7.234188823408851e-05, "loss": 1.6692, "step": 3394 }, { "epoch": 0.35265399397527786, "grad_norm": 0.38009339570999146, "learning_rate": 7.232728999509754e-05, "loss": 1.7263, "step": 3395 }, { "epoch": 0.3527578684948582, "grad_norm": 0.4187556505203247, "learning_rate": 7.231268937842438e-05, "loss": 1.7052, "step": 3396 }, { "epoch": 0.35286174301443857, "grad_norm": 0.3810417056083679, "learning_rate": 7.229808638562381e-05, "loss": 1.659, "step": 3397 }, { "epoch": 0.3529656175340189, "grad_norm": 0.41524961590766907, "learning_rate": 7.228348101825099e-05, "loss": 1.5006, "step": 3398 }, { "epoch": 0.3530694920535993, "grad_norm": 0.38853874802589417, "learning_rate": 7.226887327786125e-05, "loss": 1.8323, "step": 3399 }, { "epoch": 0.3531733665731796, "grad_norm": 0.3678146004676819, "learning_rate": 7.22542631660102e-05, "loss": 1.6357, "step": 3400 }, { "epoch": 0.35327724109275993, "grad_norm": 0.3867931365966797, "learning_rate": 7.223965068425376e-05, "loss": 1.7819, "step": 3401 }, { "epoch": 0.3533811156123403, "grad_norm": 0.37931546568870544, "learning_rate": 7.222503583414795e-05, "loss": 1.6478, "step": 3402 }, { "epoch": 0.35348499013192064, "grad_norm": 0.43002453446388245, "learning_rate": 7.22104186172492e-05, "loss": 1.8901, "step": 3403 }, { "epoch": 0.35358886465150097, "grad_norm": 0.36890125274658203, "learning_rate": 7.219579903511412e-05, "loss": 1.7524, "step": 3404 }, { "epoch": 0.35369273917108135, "grad_norm": 0.4097067415714264, "learning_rate": 7.218117708929957e-05, "loss": 1.6646, "step": 3405 }, { "epoch": 0.3537966136906617, "grad_norm": 0.47631388902664185, "learning_rate": 7.216655278136269e-05, "loss": 1.8199, "step": 3406 }, { "epoch": 0.353900488210242, "grad_norm": 0.4068619906902313, "learning_rate": 7.215192611286083e-05, "loss": 1.5446, "step": 3407 }, { "epoch": 0.3540043627298224, "grad_norm": 0.36159148812294006, "learning_rate": 7.213729708535164e-05, "loss": 1.4702, "step": 3408 }, { "epoch": 0.3541082372494027, "grad_norm": 0.39345964789390564, "learning_rate": 7.212266570039299e-05, "loss": 1.7324, "step": 3409 }, { "epoch": 0.3542121117689831, "grad_norm": 0.4041385352611542, "learning_rate": 7.2108031959543e-05, "loss": 1.7712, "step": 3410 }, { "epoch": 0.3543159862885634, "grad_norm": 0.40899065136909485, "learning_rate": 7.209339586436005e-05, "loss": 1.5812, "step": 3411 }, { "epoch": 0.35441986080814375, "grad_norm": 0.37464478611946106, "learning_rate": 7.20787574164028e-05, "loss": 1.4846, "step": 3412 }, { "epoch": 0.35452373532772413, "grad_norm": 0.41557177901268005, "learning_rate": 7.206411661723007e-05, "loss": 1.7985, "step": 3413 }, { "epoch": 0.35462760984730446, "grad_norm": 0.4185909032821655, "learning_rate": 7.204947346840106e-05, "loss": 1.7178, "step": 3414 }, { "epoch": 0.3547314843668848, "grad_norm": 0.40856650471687317, "learning_rate": 7.203482797147512e-05, "loss": 1.5386, "step": 3415 }, { "epoch": 0.35483535888646517, "grad_norm": 0.40840035676956177, "learning_rate": 7.202018012801185e-05, "loss": 1.8462, "step": 3416 }, { "epoch": 0.3549392334060455, "grad_norm": 0.4034786820411682, "learning_rate": 7.200552993957117e-05, "loss": 1.6743, "step": 3417 }, { "epoch": 0.3550431079256258, "grad_norm": 0.39444810152053833, "learning_rate": 7.199087740771321e-05, "loss": 1.6801, "step": 3418 }, { "epoch": 0.3551469824452062, "grad_norm": 0.4487433433532715, "learning_rate": 7.197622253399833e-05, "loss": 1.9052, "step": 3419 }, { "epoch": 0.35525085696478653, "grad_norm": 0.37087568640708923, "learning_rate": 7.196156531998718e-05, "loss": 1.6895, "step": 3420 }, { "epoch": 0.35535473148436686, "grad_norm": 0.3976334035396576, "learning_rate": 7.194690576724064e-05, "loss": 1.6898, "step": 3421 }, { "epoch": 0.35545860600394724, "grad_norm": 0.3944483697414398, "learning_rate": 7.19322438773198e-05, "loss": 1.7735, "step": 3422 }, { "epoch": 0.35556248052352757, "grad_norm": 0.4049598276615143, "learning_rate": 7.191757965178609e-05, "loss": 1.7066, "step": 3423 }, { "epoch": 0.35566635504310795, "grad_norm": 0.3984638452529907, "learning_rate": 7.19029130922011e-05, "loss": 1.622, "step": 3424 }, { "epoch": 0.3557702295626883, "grad_norm": 0.37969374656677246, "learning_rate": 7.188824420012671e-05, "loss": 1.5406, "step": 3425 }, { "epoch": 0.3558741040822686, "grad_norm": 0.39994993805885315, "learning_rate": 7.187357297712506e-05, "loss": 1.6681, "step": 3426 }, { "epoch": 0.355977978601849, "grad_norm": 0.38335686922073364, "learning_rate": 7.185889942475851e-05, "loss": 1.7488, "step": 3427 }, { "epoch": 0.3560818531214293, "grad_norm": 0.37399354577064514, "learning_rate": 7.184422354458966e-05, "loss": 1.6041, "step": 3428 }, { "epoch": 0.35618572764100964, "grad_norm": 0.3858386278152466, "learning_rate": 7.182954533818142e-05, "loss": 1.7968, "step": 3429 }, { "epoch": 0.35628960216059, "grad_norm": 0.39342349767684937, "learning_rate": 7.181486480709687e-05, "loss": 1.7446, "step": 3430 }, { "epoch": 0.35639347668017035, "grad_norm": 0.3939875364303589, "learning_rate": 7.180018195289937e-05, "loss": 1.7684, "step": 3431 }, { "epoch": 0.3564973511997507, "grad_norm": 0.3789867162704468, "learning_rate": 7.178549677715254e-05, "loss": 1.7591, "step": 3432 }, { "epoch": 0.35660122571933106, "grad_norm": 0.41745778918266296, "learning_rate": 7.177080928142024e-05, "loss": 1.8228, "step": 3433 }, { "epoch": 0.3567051002389114, "grad_norm": 0.3856961727142334, "learning_rate": 7.175611946726657e-05, "loss": 1.7026, "step": 3434 }, { "epoch": 0.3568089747584917, "grad_norm": 0.36199331283569336, "learning_rate": 7.174142733625589e-05, "loss": 1.4263, "step": 3435 }, { "epoch": 0.3569128492780721, "grad_norm": 0.38251036405563354, "learning_rate": 7.172673288995277e-05, "loss": 1.6076, "step": 3436 }, { "epoch": 0.3570167237976524, "grad_norm": 0.3627626895904541, "learning_rate": 7.171203612992209e-05, "loss": 1.7049, "step": 3437 }, { "epoch": 0.3571205983172328, "grad_norm": 0.37557271122932434, "learning_rate": 7.169733705772891e-05, "loss": 1.5924, "step": 3438 }, { "epoch": 0.35722447283681313, "grad_norm": 0.4099326729774475, "learning_rate": 7.168263567493859e-05, "loss": 1.8338, "step": 3439 }, { "epoch": 0.35732834735639346, "grad_norm": 0.3865790367126465, "learning_rate": 7.16679319831167e-05, "loss": 1.8652, "step": 3440 }, { "epoch": 0.35743222187597384, "grad_norm": 0.3986462950706482, "learning_rate": 7.165322598382905e-05, "loss": 1.8941, "step": 3441 }, { "epoch": 0.35753609639555417, "grad_norm": 0.3964523673057556, "learning_rate": 7.163851767864176e-05, "loss": 1.8183, "step": 3442 }, { "epoch": 0.3576399709151345, "grad_norm": 0.4160378575325012, "learning_rate": 7.162380706912112e-05, "loss": 1.7119, "step": 3443 }, { "epoch": 0.3577438454347149, "grad_norm": 0.3585418462753296, "learning_rate": 7.16090941568337e-05, "loss": 1.5949, "step": 3444 }, { "epoch": 0.3578477199542952, "grad_norm": 0.3526414930820465, "learning_rate": 7.159437894334629e-05, "loss": 1.5768, "step": 3445 }, { "epoch": 0.35795159447387553, "grad_norm": 0.4531826674938202, "learning_rate": 7.157966143022599e-05, "loss": 1.7067, "step": 3446 }, { "epoch": 0.3580554689934559, "grad_norm": 0.3793008625507355, "learning_rate": 7.156494161904005e-05, "loss": 1.6634, "step": 3447 }, { "epoch": 0.35815934351303624, "grad_norm": 0.3665551245212555, "learning_rate": 7.155021951135605e-05, "loss": 1.6025, "step": 3448 }, { "epoch": 0.3582632180326166, "grad_norm": 0.4132823646068573, "learning_rate": 7.153549510874178e-05, "loss": 1.709, "step": 3449 }, { "epoch": 0.35836709255219695, "grad_norm": 0.38809409737586975, "learning_rate": 7.152076841276527e-05, "loss": 1.6638, "step": 3450 }, { "epoch": 0.3584709670717773, "grad_norm": 0.388092964887619, "learning_rate": 7.150603942499478e-05, "loss": 1.7261, "step": 3451 }, { "epoch": 0.35857484159135766, "grad_norm": 0.4031296372413635, "learning_rate": 7.149130814699884e-05, "loss": 1.7373, "step": 3452 }, { "epoch": 0.358678716110938, "grad_norm": 0.36466771364212036, "learning_rate": 7.147657458034622e-05, "loss": 1.6811, "step": 3453 }, { "epoch": 0.3587825906305183, "grad_norm": 0.3705892562866211, "learning_rate": 7.146183872660595e-05, "loss": 1.6032, "step": 3454 }, { "epoch": 0.3588864651500987, "grad_norm": 0.3856612741947174, "learning_rate": 7.144710058734724e-05, "loss": 1.5488, "step": 3455 }, { "epoch": 0.358990339669679, "grad_norm": 0.3789518177509308, "learning_rate": 7.143236016413963e-05, "loss": 1.6905, "step": 3456 }, { "epoch": 0.35909421418925935, "grad_norm": 0.3953239321708679, "learning_rate": 7.141761745855284e-05, "loss": 1.7514, "step": 3457 }, { "epoch": 0.35919808870883974, "grad_norm": 0.3837435841560364, "learning_rate": 7.140287247215685e-05, "loss": 1.3697, "step": 3458 }, { "epoch": 0.35930196322842006, "grad_norm": 0.3627301752567291, "learning_rate": 7.13881252065219e-05, "loss": 1.6055, "step": 3459 }, { "epoch": 0.3594058377480004, "grad_norm": 0.41467034816741943, "learning_rate": 7.137337566321844e-05, "loss": 1.7294, "step": 3460 }, { "epoch": 0.35950971226758077, "grad_norm": 0.4232648015022278, "learning_rate": 7.135862384381717e-05, "loss": 1.8147, "step": 3461 }, { "epoch": 0.3596135867871611, "grad_norm": 0.43227145075798035, "learning_rate": 7.134386974988909e-05, "loss": 1.5631, "step": 3462 }, { "epoch": 0.3597174613067415, "grad_norm": 0.4435618817806244, "learning_rate": 7.132911338300537e-05, "loss": 1.8052, "step": 3463 }, { "epoch": 0.3598213358263218, "grad_norm": 0.41515088081359863, "learning_rate": 7.131435474473744e-05, "loss": 1.8405, "step": 3464 }, { "epoch": 0.35992521034590214, "grad_norm": 0.3956370949745178, "learning_rate": 7.1299593836657e-05, "loss": 1.6957, "step": 3465 }, { "epoch": 0.3600290848654825, "grad_norm": 0.4101237654685974, "learning_rate": 7.128483066033594e-05, "loss": 1.6149, "step": 3466 }, { "epoch": 0.36013295938506285, "grad_norm": 0.39739376306533813, "learning_rate": 7.127006521734646e-05, "loss": 1.7836, "step": 3467 }, { "epoch": 0.36023683390464317, "grad_norm": 0.4086010158061981, "learning_rate": 7.125529750926094e-05, "loss": 1.7712, "step": 3468 }, { "epoch": 0.36034070842422355, "grad_norm": 0.3838006556034088, "learning_rate": 7.124052753765202e-05, "loss": 1.7435, "step": 3469 }, { "epoch": 0.3604445829438039, "grad_norm": 0.40963372588157654, "learning_rate": 7.122575530409262e-05, "loss": 1.4758, "step": 3470 }, { "epoch": 0.3605484574633842, "grad_norm": 0.4056002199649811, "learning_rate": 7.121098081015586e-05, "loss": 1.6, "step": 3471 }, { "epoch": 0.3606523319829646, "grad_norm": 0.46054428815841675, "learning_rate": 7.119620405741506e-05, "loss": 1.845, "step": 3472 }, { "epoch": 0.3607562065025449, "grad_norm": 0.41523477435112, "learning_rate": 7.118142504744389e-05, "loss": 1.7532, "step": 3473 }, { "epoch": 0.36086008102212525, "grad_norm": 0.40955179929733276, "learning_rate": 7.116664378181616e-05, "loss": 1.7787, "step": 3474 }, { "epoch": 0.36096395554170563, "grad_norm": 0.39328327775001526, "learning_rate": 7.115186026210597e-05, "loss": 1.7785, "step": 3475 }, { "epoch": 0.36106783006128595, "grad_norm": 0.380825012922287, "learning_rate": 7.113707448988767e-05, "loss": 1.6208, "step": 3476 }, { "epoch": 0.36117170458086634, "grad_norm": 0.3730035722255707, "learning_rate": 7.11222864667358e-05, "loss": 1.66, "step": 3477 }, { "epoch": 0.36127557910044666, "grad_norm": 0.39808517694473267, "learning_rate": 7.110749619422519e-05, "loss": 1.6975, "step": 3478 }, { "epoch": 0.361379453620027, "grad_norm": 0.4094315767288208, "learning_rate": 7.109270367393088e-05, "loss": 1.7015, "step": 3479 }, { "epoch": 0.3614833281396074, "grad_norm": 0.41740068793296814, "learning_rate": 7.107790890742815e-05, "loss": 1.7778, "step": 3480 }, { "epoch": 0.3615872026591877, "grad_norm": 0.41227155923843384, "learning_rate": 7.106311189629253e-05, "loss": 1.8917, "step": 3481 }, { "epoch": 0.36169107717876803, "grad_norm": 0.3613469898700714, "learning_rate": 7.104831264209982e-05, "loss": 1.6972, "step": 3482 }, { "epoch": 0.3617949516983484, "grad_norm": 0.402305543422699, "learning_rate": 7.103351114642596e-05, "loss": 1.5879, "step": 3483 }, { "epoch": 0.36189882621792874, "grad_norm": 0.39771807193756104, "learning_rate": 7.101870741084726e-05, "loss": 1.7124, "step": 3484 }, { "epoch": 0.36200270073750906, "grad_norm": 0.36613133549690247, "learning_rate": 7.100390143694016e-05, "loss": 1.681, "step": 3485 }, { "epoch": 0.36210657525708945, "grad_norm": 0.3933365046977997, "learning_rate": 7.098909322628138e-05, "loss": 1.703, "step": 3486 }, { "epoch": 0.3622104497766698, "grad_norm": 0.39445760846138, "learning_rate": 7.097428278044792e-05, "loss": 1.7333, "step": 3487 }, { "epoch": 0.36231432429625016, "grad_norm": 0.3644881844520569, "learning_rate": 7.095947010101693e-05, "loss": 1.713, "step": 3488 }, { "epoch": 0.3624181988158305, "grad_norm": 0.3871423304080963, "learning_rate": 7.094465518956588e-05, "loss": 1.7749, "step": 3489 }, { "epoch": 0.3625220733354108, "grad_norm": 0.38430991768836975, "learning_rate": 7.092983804767242e-05, "loss": 1.6216, "step": 3490 }, { "epoch": 0.3626259478549912, "grad_norm": 0.3718239367008209, "learning_rate": 7.091501867691446e-05, "loss": 1.6914, "step": 3491 }, { "epoch": 0.3627298223745715, "grad_norm": 0.39211010932922363, "learning_rate": 7.090019707887017e-05, "loss": 1.4819, "step": 3492 }, { "epoch": 0.36283369689415185, "grad_norm": 0.3777989149093628, "learning_rate": 7.08853732551179e-05, "loss": 1.6425, "step": 3493 }, { "epoch": 0.36293757141373223, "grad_norm": 0.3853297829627991, "learning_rate": 7.087054720723629e-05, "loss": 1.6724, "step": 3494 }, { "epoch": 0.36304144593331256, "grad_norm": 0.3982897400856018, "learning_rate": 7.085571893680421e-05, "loss": 1.7549, "step": 3495 }, { "epoch": 0.3631453204528929, "grad_norm": 0.38394638895988464, "learning_rate": 7.084088844540074e-05, "loss": 1.7959, "step": 3496 }, { "epoch": 0.36324919497247327, "grad_norm": 0.4506365656852722, "learning_rate": 7.08260557346052e-05, "loss": 1.907, "step": 3497 }, { "epoch": 0.3633530694920536, "grad_norm": 0.43902164697647095, "learning_rate": 7.081122080599721e-05, "loss": 1.7573, "step": 3498 }, { "epoch": 0.3634569440116339, "grad_norm": 0.4061184525489807, "learning_rate": 7.079638366115653e-05, "loss": 1.8399, "step": 3499 }, { "epoch": 0.3635608185312143, "grad_norm": 0.3644791841506958, "learning_rate": 7.078154430166319e-05, "loss": 1.5973, "step": 3500 }, { "epoch": 0.36366469305079463, "grad_norm": 0.44396528601646423, "learning_rate": 7.07667027290975e-05, "loss": 1.8829, "step": 3501 }, { "epoch": 0.363768567570375, "grad_norm": 0.44041764736175537, "learning_rate": 7.075185894503993e-05, "loss": 1.9544, "step": 3502 }, { "epoch": 0.36387244208995534, "grad_norm": 0.3821714520454407, "learning_rate": 7.073701295107128e-05, "loss": 1.7017, "step": 3503 }, { "epoch": 0.36397631660953567, "grad_norm": 0.3823404014110565, "learning_rate": 7.072216474877249e-05, "loss": 1.5732, "step": 3504 }, { "epoch": 0.36408019112911605, "grad_norm": 0.38946443796157837, "learning_rate": 7.070731433972481e-05, "loss": 1.5651, "step": 3505 }, { "epoch": 0.3641840656486964, "grad_norm": 0.3907792270183563, "learning_rate": 7.069246172550967e-05, "loss": 1.5276, "step": 3506 }, { "epoch": 0.3642879401682767, "grad_norm": 0.374189168214798, "learning_rate": 7.067760690770876e-05, "loss": 1.4874, "step": 3507 }, { "epoch": 0.3643918146878571, "grad_norm": 0.37600815296173096, "learning_rate": 7.066274988790399e-05, "loss": 1.6168, "step": 3508 }, { "epoch": 0.3644956892074374, "grad_norm": 0.39330026507377625, "learning_rate": 7.064789066767755e-05, "loss": 1.8023, "step": 3509 }, { "epoch": 0.36459956372701774, "grad_norm": 0.4047221541404724, "learning_rate": 7.063302924861182e-05, "loss": 1.718, "step": 3510 }, { "epoch": 0.3647034382465981, "grad_norm": 0.3758139908313751, "learning_rate": 7.06181656322894e-05, "loss": 1.6952, "step": 3511 }, { "epoch": 0.36480731276617845, "grad_norm": 0.3800835907459259, "learning_rate": 7.060329982029317e-05, "loss": 1.5578, "step": 3512 }, { "epoch": 0.3649111872857588, "grad_norm": 0.38068559765815735, "learning_rate": 7.058843181420624e-05, "loss": 1.7363, "step": 3513 }, { "epoch": 0.36501506180533916, "grad_norm": 0.40776312351226807, "learning_rate": 7.05735616156119e-05, "loss": 1.5816, "step": 3514 }, { "epoch": 0.3651189363249195, "grad_norm": 0.40851452946662903, "learning_rate": 7.055868922609374e-05, "loss": 1.8035, "step": 3515 }, { "epoch": 0.36522281084449987, "grad_norm": 0.4785706698894501, "learning_rate": 7.054381464723554e-05, "loss": 2.132, "step": 3516 }, { "epoch": 0.3653266853640802, "grad_norm": 0.3917141556739807, "learning_rate": 7.052893788062131e-05, "loss": 1.7564, "step": 3517 }, { "epoch": 0.3654305598836605, "grad_norm": 0.35814887285232544, "learning_rate": 7.051405892783536e-05, "loss": 1.6212, "step": 3518 }, { "epoch": 0.3655344344032409, "grad_norm": 0.37529706954956055, "learning_rate": 7.049917779046213e-05, "loss": 1.6613, "step": 3519 }, { "epoch": 0.36563830892282123, "grad_norm": 0.4299023449420929, "learning_rate": 7.048429447008636e-05, "loss": 1.7108, "step": 3520 }, { "epoch": 0.36574218344240156, "grad_norm": 0.39884042739868164, "learning_rate": 7.046940896829302e-05, "loss": 1.722, "step": 3521 }, { "epoch": 0.36584605796198194, "grad_norm": 0.3787887394428253, "learning_rate": 7.04545212866673e-05, "loss": 1.7693, "step": 3522 }, { "epoch": 0.36594993248156227, "grad_norm": 0.37919625639915466, "learning_rate": 7.043963142679459e-05, "loss": 1.8372, "step": 3523 }, { "epoch": 0.3660538070011426, "grad_norm": 0.4805675446987152, "learning_rate": 7.04247393902606e-05, "loss": 1.9524, "step": 3524 }, { "epoch": 0.366157681520723, "grad_norm": 0.3908388316631317, "learning_rate": 7.040984517865116e-05, "loss": 1.7887, "step": 3525 }, { "epoch": 0.3662615560403033, "grad_norm": 0.39634791016578674, "learning_rate": 7.039494879355242e-05, "loss": 1.7503, "step": 3526 }, { "epoch": 0.3663654305598837, "grad_norm": 0.3950914740562439, "learning_rate": 7.038005023655072e-05, "loss": 1.6939, "step": 3527 }, { "epoch": 0.366469305079464, "grad_norm": 0.3956228792667389, "learning_rate": 7.036514950923264e-05, "loss": 1.7095, "step": 3528 }, { "epoch": 0.36657317959904434, "grad_norm": 0.38164466619491577, "learning_rate": 7.0350246613185e-05, "loss": 1.6409, "step": 3529 }, { "epoch": 0.3666770541186247, "grad_norm": 0.39597025513648987, "learning_rate": 7.033534154999481e-05, "loss": 1.781, "step": 3530 }, { "epoch": 0.36678092863820505, "grad_norm": 0.3930729329586029, "learning_rate": 7.032043432124939e-05, "loss": 1.7551, "step": 3531 }, { "epoch": 0.3668848031577854, "grad_norm": 0.38284942507743835, "learning_rate": 7.030552492853621e-05, "loss": 1.4163, "step": 3532 }, { "epoch": 0.36698867767736576, "grad_norm": 0.3863504230976105, "learning_rate": 7.029061337344303e-05, "loss": 1.8067, "step": 3533 }, { "epoch": 0.3670925521969461, "grad_norm": 0.42066165804862976, "learning_rate": 7.027569965755777e-05, "loss": 1.819, "step": 3534 }, { "epoch": 0.3671964267165264, "grad_norm": 0.3733902871608734, "learning_rate": 7.026078378246868e-05, "loss": 1.6656, "step": 3535 }, { "epoch": 0.3673003012361068, "grad_norm": 0.39036303758621216, "learning_rate": 7.024586574976414e-05, "loss": 1.7219, "step": 3536 }, { "epoch": 0.3674041757556871, "grad_norm": 0.3913772702217102, "learning_rate": 7.023094556103283e-05, "loss": 1.6209, "step": 3537 }, { "epoch": 0.36750805027526745, "grad_norm": 0.3987481892108917, "learning_rate": 7.021602321786362e-05, "loss": 1.7411, "step": 3538 }, { "epoch": 0.36761192479484783, "grad_norm": 0.38136929273605347, "learning_rate": 7.020109872184563e-05, "loss": 1.6651, "step": 3539 }, { "epoch": 0.36771579931442816, "grad_norm": 0.3978997766971588, "learning_rate": 7.018617207456821e-05, "loss": 1.7601, "step": 3540 }, { "epoch": 0.36781967383400854, "grad_norm": 0.39841562509536743, "learning_rate": 7.01712432776209e-05, "loss": 1.6426, "step": 3541 }, { "epoch": 0.36792354835358887, "grad_norm": 0.40489330887794495, "learning_rate": 7.015631233259354e-05, "loss": 1.4646, "step": 3542 }, { "epoch": 0.3680274228731692, "grad_norm": 0.3978332579135895, "learning_rate": 7.014137924107614e-05, "loss": 1.7999, "step": 3543 }, { "epoch": 0.3681312973927496, "grad_norm": 0.4058266878128052, "learning_rate": 7.012644400465895e-05, "loss": 1.8085, "step": 3544 }, { "epoch": 0.3682351719123299, "grad_norm": 0.4187474846839905, "learning_rate": 7.011150662493248e-05, "loss": 1.851, "step": 3545 }, { "epoch": 0.36833904643191023, "grad_norm": 0.43871933221817017, "learning_rate": 7.009656710348742e-05, "loss": 2.0409, "step": 3546 }, { "epoch": 0.3684429209514906, "grad_norm": 0.4018607437610626, "learning_rate": 7.008162544191474e-05, "loss": 1.7269, "step": 3547 }, { "epoch": 0.36854679547107094, "grad_norm": 0.4518108665943146, "learning_rate": 7.006668164180557e-05, "loss": 1.8257, "step": 3548 }, { "epoch": 0.36865066999065127, "grad_norm": 0.4045441150665283, "learning_rate": 7.005173570475135e-05, "loss": 1.8499, "step": 3549 }, { "epoch": 0.36875454451023165, "grad_norm": 0.41423842310905457, "learning_rate": 7.003678763234367e-05, "loss": 1.8105, "step": 3550 }, { "epoch": 0.368858419029812, "grad_norm": 0.37954044342041016, "learning_rate": 7.002183742617441e-05, "loss": 1.7043, "step": 3551 }, { "epoch": 0.36896229354939236, "grad_norm": 0.39509761333465576, "learning_rate": 7.000688508783564e-05, "loss": 1.7842, "step": 3552 }, { "epoch": 0.3690661680689727, "grad_norm": 0.4210776686668396, "learning_rate": 6.999193061891967e-05, "loss": 1.5356, "step": 3553 }, { "epoch": 0.369170042588553, "grad_norm": 0.4176965355873108, "learning_rate": 6.997697402101904e-05, "loss": 1.8425, "step": 3554 }, { "epoch": 0.3692739171081334, "grad_norm": 0.4520731270313263, "learning_rate": 6.996201529572648e-05, "loss": 1.7272, "step": 3555 }, { "epoch": 0.3693777916277137, "grad_norm": 0.3699086606502533, "learning_rate": 6.994705444463501e-05, "loss": 1.661, "step": 3556 }, { "epoch": 0.36948166614729405, "grad_norm": 0.38104352355003357, "learning_rate": 6.993209146933784e-05, "loss": 1.5626, "step": 3557 }, { "epoch": 0.36958554066687443, "grad_norm": 0.4503292739391327, "learning_rate": 6.991712637142839e-05, "loss": 1.8327, "step": 3558 }, { "epoch": 0.36968941518645476, "grad_norm": 0.43513697385787964, "learning_rate": 6.990215915250035e-05, "loss": 1.6256, "step": 3559 }, { "epoch": 0.3697932897060351, "grad_norm": 0.4127349555492401, "learning_rate": 6.988718981414761e-05, "loss": 1.8755, "step": 3560 }, { "epoch": 0.36989716422561547, "grad_norm": 0.4628101885318756, "learning_rate": 6.987221835796429e-05, "loss": 1.8136, "step": 3561 }, { "epoch": 0.3700010387451958, "grad_norm": 0.4025733768939972, "learning_rate": 6.98572447855447e-05, "loss": 1.66, "step": 3562 }, { "epoch": 0.3701049132647761, "grad_norm": 0.3781670331954956, "learning_rate": 6.984226909848346e-05, "loss": 1.6159, "step": 3563 }, { "epoch": 0.3702087877843565, "grad_norm": 0.40306591987609863, "learning_rate": 6.982729129837531e-05, "loss": 1.888, "step": 3564 }, { "epoch": 0.37031266230393683, "grad_norm": 0.4283866584300995, "learning_rate": 6.981231138681531e-05, "loss": 1.7992, "step": 3565 }, { "epoch": 0.3704165368235172, "grad_norm": 0.4207936227321625, "learning_rate": 6.979732936539868e-05, "loss": 1.7709, "step": 3566 }, { "epoch": 0.37052041134309754, "grad_norm": 0.4613027274608612, "learning_rate": 6.97823452357209e-05, "loss": 1.765, "step": 3567 }, { "epoch": 0.37062428586267787, "grad_norm": 0.40821146965026855, "learning_rate": 6.976735899937767e-05, "loss": 1.7137, "step": 3568 }, { "epoch": 0.37072816038225825, "grad_norm": 0.3744267225265503, "learning_rate": 6.97523706579649e-05, "loss": 1.7768, "step": 3569 }, { "epoch": 0.3708320349018386, "grad_norm": 0.410510778427124, "learning_rate": 6.973738021307871e-05, "loss": 1.4456, "step": 3570 }, { "epoch": 0.3709359094214189, "grad_norm": 0.4135860800743103, "learning_rate": 6.97223876663155e-05, "loss": 1.6854, "step": 3571 }, { "epoch": 0.3710397839409993, "grad_norm": 0.43364089727401733, "learning_rate": 6.970739301927183e-05, "loss": 1.7468, "step": 3572 }, { "epoch": 0.3711436584605796, "grad_norm": 0.37639757990837097, "learning_rate": 6.969239627354453e-05, "loss": 1.6043, "step": 3573 }, { "epoch": 0.37124753298015994, "grad_norm": 0.415420800447464, "learning_rate": 6.967739743073065e-05, "loss": 1.7144, "step": 3574 }, { "epoch": 0.3713514074997403, "grad_norm": 0.39380958676338196, "learning_rate": 6.966239649242743e-05, "loss": 1.8287, "step": 3575 }, { "epoch": 0.37145528201932065, "grad_norm": 0.41520291566848755, "learning_rate": 6.964739346023235e-05, "loss": 1.8377, "step": 3576 }, { "epoch": 0.371559156538901, "grad_norm": 0.4204116761684418, "learning_rate": 6.963238833574312e-05, "loss": 1.7471, "step": 3577 }, { "epoch": 0.37166303105848136, "grad_norm": 0.4574458599090576, "learning_rate": 6.961738112055767e-05, "loss": 1.4237, "step": 3578 }, { "epoch": 0.3717669055780617, "grad_norm": 0.3843066990375519, "learning_rate": 6.960237181627416e-05, "loss": 1.7035, "step": 3579 }, { "epoch": 0.3718707800976421, "grad_norm": 0.3904613256454468, "learning_rate": 6.958736042449095e-05, "loss": 1.8277, "step": 3580 }, { "epoch": 0.3719746546172224, "grad_norm": 0.3790266811847687, "learning_rate": 6.957234694680663e-05, "loss": 1.7919, "step": 3581 }, { "epoch": 0.3720785291368027, "grad_norm": 0.3974626362323761, "learning_rate": 6.955733138482006e-05, "loss": 1.795, "step": 3582 }, { "epoch": 0.3721824036563831, "grad_norm": 0.3986896574497223, "learning_rate": 6.954231374013022e-05, "loss": 1.7831, "step": 3583 }, { "epoch": 0.37228627817596344, "grad_norm": 0.4144607484340668, "learning_rate": 6.952729401433642e-05, "loss": 1.4946, "step": 3584 }, { "epoch": 0.37239015269554376, "grad_norm": 0.4263852536678314, "learning_rate": 6.951227220903813e-05, "loss": 1.7404, "step": 3585 }, { "epoch": 0.37249402721512415, "grad_norm": 0.4002395272254944, "learning_rate": 6.949724832583504e-05, "loss": 1.6906, "step": 3586 }, { "epoch": 0.3725979017347045, "grad_norm": 0.41054767370224, "learning_rate": 6.948222236632709e-05, "loss": 1.7632, "step": 3587 }, { "epoch": 0.3727017762542848, "grad_norm": 0.3650989234447479, "learning_rate": 6.946719433211442e-05, "loss": 1.6058, "step": 3588 }, { "epoch": 0.3728056507738652, "grad_norm": 0.4016493856906891, "learning_rate": 6.945216422479741e-05, "loss": 1.7819, "step": 3589 }, { "epoch": 0.3729095252934455, "grad_norm": 0.3928471505641937, "learning_rate": 6.943713204597664e-05, "loss": 1.8151, "step": 3590 }, { "epoch": 0.3730133998130259, "grad_norm": 0.39063897728919983, "learning_rate": 6.942209779725292e-05, "loss": 1.775, "step": 3591 }, { "epoch": 0.3731172743326062, "grad_norm": 0.38129961490631104, "learning_rate": 6.94070614802273e-05, "loss": 1.6656, "step": 3592 }, { "epoch": 0.37322114885218655, "grad_norm": 0.438495934009552, "learning_rate": 6.9392023096501e-05, "loss": 1.7774, "step": 3593 }, { "epoch": 0.37332502337176693, "grad_norm": 0.43866604566574097, "learning_rate": 6.937698264767549e-05, "loss": 1.8816, "step": 3594 }, { "epoch": 0.37342889789134726, "grad_norm": 0.41461604833602905, "learning_rate": 6.93619401353525e-05, "loss": 1.7163, "step": 3595 }, { "epoch": 0.3735327724109276, "grad_norm": 0.36867401003837585, "learning_rate": 6.934689556113391e-05, "loss": 1.7318, "step": 3596 }, { "epoch": 0.37363664693050797, "grad_norm": 0.41185227036476135, "learning_rate": 6.933184892662185e-05, "loss": 1.8164, "step": 3597 }, { "epoch": 0.3737405214500883, "grad_norm": 0.3863617777824402, "learning_rate": 6.931680023341867e-05, "loss": 1.7503, "step": 3598 }, { "epoch": 0.3738443959696686, "grad_norm": 0.3842338025569916, "learning_rate": 6.930174948312696e-05, "loss": 1.6838, "step": 3599 }, { "epoch": 0.373948270489249, "grad_norm": 0.3778021037578583, "learning_rate": 6.928669667734945e-05, "loss": 1.743, "step": 3600 }, { "epoch": 0.37405214500882933, "grad_norm": 0.40780338644981384, "learning_rate": 6.927164181768921e-05, "loss": 1.868, "step": 3601 }, { "epoch": 0.37415601952840966, "grad_norm": 0.4100748300552368, "learning_rate": 6.925658490574945e-05, "loss": 1.8172, "step": 3602 }, { "epoch": 0.37425989404799004, "grad_norm": 0.3898886740207672, "learning_rate": 6.92415259431336e-05, "loss": 1.671, "step": 3603 }, { "epoch": 0.37436376856757037, "grad_norm": 0.4094226062297821, "learning_rate": 6.922646493144532e-05, "loss": 1.6493, "step": 3604 }, { "epoch": 0.37446764308715075, "grad_norm": 0.36988019943237305, "learning_rate": 6.92114018722885e-05, "loss": 1.6539, "step": 3605 }, { "epoch": 0.3745715176067311, "grad_norm": 0.3859472870826721, "learning_rate": 6.919633676726722e-05, "loss": 1.5816, "step": 3606 }, { "epoch": 0.3746753921263114, "grad_norm": 0.4098130464553833, "learning_rate": 6.918126961798583e-05, "loss": 1.7291, "step": 3607 }, { "epoch": 0.3747792666458918, "grad_norm": 0.40433475375175476, "learning_rate": 6.916620042604883e-05, "loss": 1.7515, "step": 3608 }, { "epoch": 0.3748831411654721, "grad_norm": 0.38681918382644653, "learning_rate": 6.915112919306101e-05, "loss": 1.6408, "step": 3609 }, { "epoch": 0.37498701568505244, "grad_norm": 0.43381497263908386, "learning_rate": 6.913605592062731e-05, "loss": 1.7996, "step": 3610 }, { "epoch": 0.3750908902046328, "grad_norm": 0.37756863236427307, "learning_rate": 6.91209806103529e-05, "loss": 1.6188, "step": 3611 }, { "epoch": 0.37519476472421315, "grad_norm": 0.44089293479919434, "learning_rate": 6.910590326384323e-05, "loss": 1.8074, "step": 3612 }, { "epoch": 0.3752986392437935, "grad_norm": 0.3893734812736511, "learning_rate": 6.909082388270389e-05, "loss": 1.6607, "step": 3613 }, { "epoch": 0.37540251376337386, "grad_norm": 0.40464848279953003, "learning_rate": 6.90757424685407e-05, "loss": 1.7711, "step": 3614 }, { "epoch": 0.3755063882829542, "grad_norm": 0.3943893313407898, "learning_rate": 6.906065902295975e-05, "loss": 1.8252, "step": 3615 }, { "epoch": 0.3756102628025345, "grad_norm": 0.38202980160713196, "learning_rate": 6.904557354756728e-05, "loss": 1.7832, "step": 3616 }, { "epoch": 0.3757141373221149, "grad_norm": 0.38370367884635925, "learning_rate": 6.903048604396981e-05, "loss": 1.6776, "step": 3617 }, { "epoch": 0.3758180118416952, "grad_norm": 0.39055919647216797, "learning_rate": 6.9015396513774e-05, "loss": 1.7664, "step": 3618 }, { "epoch": 0.3759218863612756, "grad_norm": 0.3747326731681824, "learning_rate": 6.900030495858681e-05, "loss": 1.5807, "step": 3619 }, { "epoch": 0.37602576088085593, "grad_norm": 0.4175710380077362, "learning_rate": 6.898521138001533e-05, "loss": 1.8023, "step": 3620 }, { "epoch": 0.37612963540043626, "grad_norm": 0.4446984529495239, "learning_rate": 6.897011577966693e-05, "loss": 1.7831, "step": 3621 }, { "epoch": 0.37623350992001664, "grad_norm": 0.3818349242210388, "learning_rate": 6.895501815914917e-05, "loss": 1.7742, "step": 3622 }, { "epoch": 0.37633738443959697, "grad_norm": 0.4104137122631073, "learning_rate": 6.893991852006983e-05, "loss": 1.6418, "step": 3623 }, { "epoch": 0.3764412589591773, "grad_norm": 0.3920380771160126, "learning_rate": 6.892481686403694e-05, "loss": 1.6307, "step": 3624 }, { "epoch": 0.3765451334787577, "grad_norm": 0.36929798126220703, "learning_rate": 6.890971319265863e-05, "loss": 1.6734, "step": 3625 }, { "epoch": 0.376649007998338, "grad_norm": 0.39406248927116394, "learning_rate": 6.889460750754337e-05, "loss": 1.6584, "step": 3626 }, { "epoch": 0.37675288251791833, "grad_norm": 0.4494403898715973, "learning_rate": 6.887949981029981e-05, "loss": 1.9331, "step": 3627 }, { "epoch": 0.3768567570374987, "grad_norm": 0.4188593924045563, "learning_rate": 6.886439010253678e-05, "loss": 1.8305, "step": 3628 }, { "epoch": 0.37696063155707904, "grad_norm": 0.3847233057022095, "learning_rate": 6.884927838586336e-05, "loss": 1.675, "step": 3629 }, { "epoch": 0.3770645060766594, "grad_norm": 0.3903995156288147, "learning_rate": 6.88341646618888e-05, "loss": 1.6902, "step": 3630 }, { "epoch": 0.37716838059623975, "grad_norm": 0.38415199518203735, "learning_rate": 6.881904893222265e-05, "loss": 1.593, "step": 3631 }, { "epoch": 0.3772722551158201, "grad_norm": 0.3710866868495941, "learning_rate": 6.880393119847456e-05, "loss": 1.5337, "step": 3632 }, { "epoch": 0.37737612963540046, "grad_norm": 0.3745911419391632, "learning_rate": 6.878881146225447e-05, "loss": 1.7957, "step": 3633 }, { "epoch": 0.3774800041549808, "grad_norm": 0.42726603150367737, "learning_rate": 6.877368972517252e-05, "loss": 1.783, "step": 3634 }, { "epoch": 0.3775838786745611, "grad_norm": 0.36636191606521606, "learning_rate": 6.875856598883907e-05, "loss": 1.756, "step": 3635 }, { "epoch": 0.3776877531941415, "grad_norm": 0.40645745396614075, "learning_rate": 6.874344025486464e-05, "loss": 1.7357, "step": 3636 }, { "epoch": 0.3777916277137218, "grad_norm": 0.42991214990615845, "learning_rate": 6.872831252486005e-05, "loss": 1.9177, "step": 3637 }, { "epoch": 0.37789550223330215, "grad_norm": 0.39431822299957275, "learning_rate": 6.871318280043626e-05, "loss": 1.6861, "step": 3638 }, { "epoch": 0.37799937675288253, "grad_norm": 0.4022711515426636, "learning_rate": 6.869805108320446e-05, "loss": 1.7634, "step": 3639 }, { "epoch": 0.37810325127246286, "grad_norm": 0.3846032917499542, "learning_rate": 6.868291737477608e-05, "loss": 1.616, "step": 3640 }, { "epoch": 0.3782071257920432, "grad_norm": 0.3910578787326813, "learning_rate": 6.866778167676272e-05, "loss": 1.6158, "step": 3641 }, { "epoch": 0.37831100031162357, "grad_norm": 0.39738887548446655, "learning_rate": 6.865264399077623e-05, "loss": 1.657, "step": 3642 }, { "epoch": 0.3784148748312039, "grad_norm": 0.3639686703681946, "learning_rate": 6.863750431842865e-05, "loss": 1.536, "step": 3643 }, { "epoch": 0.3785187493507843, "grad_norm": 0.4142943322658539, "learning_rate": 6.862236266133225e-05, "loss": 1.8389, "step": 3644 }, { "epoch": 0.3786226238703646, "grad_norm": 0.42220360040664673, "learning_rate": 6.860721902109948e-05, "loss": 1.6854, "step": 3645 }, { "epoch": 0.37872649838994493, "grad_norm": 0.43140527606010437, "learning_rate": 6.859207339934302e-05, "loss": 1.8258, "step": 3646 }, { "epoch": 0.3788303729095253, "grad_norm": 0.40784284472465515, "learning_rate": 6.857692579767576e-05, "loss": 1.7949, "step": 3647 }, { "epoch": 0.37893424742910564, "grad_norm": 0.39824676513671875, "learning_rate": 6.856177621771083e-05, "loss": 1.6723, "step": 3648 }, { "epoch": 0.37903812194868597, "grad_norm": 0.3684784471988678, "learning_rate": 6.854662466106151e-05, "loss": 1.7018, "step": 3649 }, { "epoch": 0.37914199646826635, "grad_norm": 0.4189784824848175, "learning_rate": 6.853147112934132e-05, "loss": 1.7914, "step": 3650 }, { "epoch": 0.3792458709878467, "grad_norm": 0.41028210520744324, "learning_rate": 6.851631562416402e-05, "loss": 1.7757, "step": 3651 }, { "epoch": 0.379349745507427, "grad_norm": 0.37003424763679504, "learning_rate": 6.850115814714355e-05, "loss": 1.5974, "step": 3652 }, { "epoch": 0.3794536200270074, "grad_norm": 0.4214622676372528, "learning_rate": 6.848599869989403e-05, "loss": 1.8257, "step": 3653 }, { "epoch": 0.3795574945465877, "grad_norm": 0.4845244586467743, "learning_rate": 6.847083728402986e-05, "loss": 1.5854, "step": 3654 }, { "epoch": 0.37966136906616804, "grad_norm": 0.4533901512622833, "learning_rate": 6.845567390116558e-05, "loss": 1.8799, "step": 3655 }, { "epoch": 0.3797652435857484, "grad_norm": 0.43582406640052795, "learning_rate": 6.8440508552916e-05, "loss": 1.9001, "step": 3656 }, { "epoch": 0.37986911810532875, "grad_norm": 0.3865016996860504, "learning_rate": 6.84253412408961e-05, "loss": 1.5654, "step": 3657 }, { "epoch": 0.37997299262490913, "grad_norm": 0.3525942862033844, "learning_rate": 6.841017196672109e-05, "loss": 1.5015, "step": 3658 }, { "epoch": 0.38007686714448946, "grad_norm": 0.3993504047393799, "learning_rate": 6.839500073200636e-05, "loss": 1.7024, "step": 3659 }, { "epoch": 0.3801807416640698, "grad_norm": 0.4344191253185272, "learning_rate": 6.837982753836755e-05, "loss": 1.7241, "step": 3660 }, { "epoch": 0.38028461618365017, "grad_norm": 0.3942391574382782, "learning_rate": 6.836465238742045e-05, "loss": 1.6898, "step": 3661 }, { "epoch": 0.3803884907032305, "grad_norm": 0.4017980396747589, "learning_rate": 6.834947528078114e-05, "loss": 1.8323, "step": 3662 }, { "epoch": 0.3804923652228108, "grad_norm": 0.4126379191875458, "learning_rate": 6.833429622006584e-05, "loss": 1.7061, "step": 3663 }, { "epoch": 0.3805962397423912, "grad_norm": 0.4057486951351166, "learning_rate": 6.8319115206891e-05, "loss": 1.8332, "step": 3664 }, { "epoch": 0.38070011426197153, "grad_norm": 0.4050893783569336, "learning_rate": 6.83039322428733e-05, "loss": 1.6706, "step": 3665 }, { "epoch": 0.38080398878155186, "grad_norm": 0.37479984760284424, "learning_rate": 6.828874732962958e-05, "loss": 1.4825, "step": 3666 }, { "epoch": 0.38090786330113224, "grad_norm": 0.39387476444244385, "learning_rate": 6.827356046877693e-05, "loss": 1.6615, "step": 3667 }, { "epoch": 0.38101173782071257, "grad_norm": 0.35786718130111694, "learning_rate": 6.825837166193263e-05, "loss": 1.5442, "step": 3668 }, { "epoch": 0.38111561234029295, "grad_norm": 0.3664378225803375, "learning_rate": 6.824318091071416e-05, "loss": 1.6057, "step": 3669 }, { "epoch": 0.3812194868598733, "grad_norm": 0.4093641936779022, "learning_rate": 6.822798821673923e-05, "loss": 1.7737, "step": 3670 }, { "epoch": 0.3813233613794536, "grad_norm": 0.3681464195251465, "learning_rate": 6.821279358162575e-05, "loss": 1.506, "step": 3671 }, { "epoch": 0.381427235899034, "grad_norm": 0.4376184344291687, "learning_rate": 6.819759700699181e-05, "loss": 1.7086, "step": 3672 }, { "epoch": 0.3815311104186143, "grad_norm": 0.4148000180721283, "learning_rate": 6.818239849445574e-05, "loss": 1.8619, "step": 3673 }, { "epoch": 0.38163498493819464, "grad_norm": 0.39664286375045776, "learning_rate": 6.816719804563606e-05, "loss": 1.7338, "step": 3674 }, { "epoch": 0.381738859457775, "grad_norm": 0.37723055481910706, "learning_rate": 6.815199566215149e-05, "loss": 1.5691, "step": 3675 }, { "epoch": 0.38184273397735535, "grad_norm": 0.42841121554374695, "learning_rate": 6.813679134562098e-05, "loss": 1.8749, "step": 3676 }, { "epoch": 0.3819466084969357, "grad_norm": 0.3727743625640869, "learning_rate": 6.812158509766366e-05, "loss": 1.6885, "step": 3677 }, { "epoch": 0.38205048301651606, "grad_norm": 0.401883989572525, "learning_rate": 6.810637691989887e-05, "loss": 1.7689, "step": 3678 }, { "epoch": 0.3821543575360964, "grad_norm": 0.3472015857696533, "learning_rate": 6.809116681394618e-05, "loss": 1.6354, "step": 3679 }, { "epoch": 0.3822582320556767, "grad_norm": 0.3772028684616089, "learning_rate": 6.807595478142536e-05, "loss": 1.6749, "step": 3680 }, { "epoch": 0.3823621065752571, "grad_norm": 0.37408995628356934, "learning_rate": 6.806074082395633e-05, "loss": 1.7558, "step": 3681 }, { "epoch": 0.3824659810948374, "grad_norm": 0.4021287262439728, "learning_rate": 6.80455249431593e-05, "loss": 1.6639, "step": 3682 }, { "epoch": 0.3825698556144178, "grad_norm": 0.3701534867286682, "learning_rate": 6.803030714065461e-05, "loss": 1.6497, "step": 3683 }, { "epoch": 0.38267373013399814, "grad_norm": 0.4467286765575409, "learning_rate": 6.801508741806285e-05, "loss": 1.86, "step": 3684 }, { "epoch": 0.38277760465357846, "grad_norm": 0.40490084886550903, "learning_rate": 6.799986577700481e-05, "loss": 1.7865, "step": 3685 }, { "epoch": 0.38288147917315885, "grad_norm": 0.39086583256721497, "learning_rate": 6.798464221910147e-05, "loss": 1.7091, "step": 3686 }, { "epoch": 0.38298535369273917, "grad_norm": 0.4162215292453766, "learning_rate": 6.796941674597402e-05, "loss": 1.7886, "step": 3687 }, { "epoch": 0.3830892282123195, "grad_norm": 0.3935025930404663, "learning_rate": 6.795418935924384e-05, "loss": 1.8681, "step": 3688 }, { "epoch": 0.3831931027318999, "grad_norm": 0.40464386343955994, "learning_rate": 6.793896006053255e-05, "loss": 1.8301, "step": 3689 }, { "epoch": 0.3832969772514802, "grad_norm": 0.3767825961112976, "learning_rate": 6.792372885146195e-05, "loss": 1.6537, "step": 3690 }, { "epoch": 0.38340085177106054, "grad_norm": 0.36534345149993896, "learning_rate": 6.790849573365404e-05, "loss": 1.5565, "step": 3691 }, { "epoch": 0.3835047262906409, "grad_norm": 0.38759198784828186, "learning_rate": 6.7893260708731e-05, "loss": 1.8307, "step": 3692 }, { "epoch": 0.38360860081022125, "grad_norm": 0.38123390078544617, "learning_rate": 6.78780237783153e-05, "loss": 1.6495, "step": 3693 }, { "epoch": 0.38371247532980157, "grad_norm": 0.4280903935432434, "learning_rate": 6.786278494402952e-05, "loss": 1.8522, "step": 3694 }, { "epoch": 0.38381634984938195, "grad_norm": 0.42416825890541077, "learning_rate": 6.784754420749645e-05, "loss": 1.7361, "step": 3695 }, { "epoch": 0.3839202243689623, "grad_norm": 0.37206903100013733, "learning_rate": 6.783230157033915e-05, "loss": 1.7802, "step": 3696 }, { "epoch": 0.38402409888854266, "grad_norm": 0.4096907079219818, "learning_rate": 6.781705703418084e-05, "loss": 1.7411, "step": 3697 }, { "epoch": 0.384127973408123, "grad_norm": 0.38418519496917725, "learning_rate": 6.780181060064493e-05, "loss": 1.4685, "step": 3698 }, { "epoch": 0.3842318479277033, "grad_norm": 0.3541015088558197, "learning_rate": 6.778656227135505e-05, "loss": 1.4894, "step": 3699 }, { "epoch": 0.3843357224472837, "grad_norm": 0.3923327326774597, "learning_rate": 6.777131204793502e-05, "loss": 1.5972, "step": 3700 }, { "epoch": 0.38443959696686403, "grad_norm": 0.38584020733833313, "learning_rate": 6.775605993200889e-05, "loss": 1.474, "step": 3701 }, { "epoch": 0.38454347148644435, "grad_norm": 0.42586928606033325, "learning_rate": 6.774080592520088e-05, "loss": 1.8876, "step": 3702 }, { "epoch": 0.38464734600602474, "grad_norm": 0.4268626570701599, "learning_rate": 6.77255500291354e-05, "loss": 1.8743, "step": 3703 }, { "epoch": 0.38475122052560506, "grad_norm": 0.43447503447532654, "learning_rate": 6.771029224543713e-05, "loss": 1.9229, "step": 3704 }, { "epoch": 0.3848550950451854, "grad_norm": 0.36980974674224854, "learning_rate": 6.769503257573089e-05, "loss": 1.5463, "step": 3705 }, { "epoch": 0.3849589695647658, "grad_norm": 0.3850055932998657, "learning_rate": 6.767977102164167e-05, "loss": 1.724, "step": 3706 }, { "epoch": 0.3850628440843461, "grad_norm": 0.4217112064361572, "learning_rate": 6.766450758479479e-05, "loss": 1.965, "step": 3707 }, { "epoch": 0.3851667186039265, "grad_norm": 0.3799114525318146, "learning_rate": 6.76492422668156e-05, "loss": 1.6066, "step": 3708 }, { "epoch": 0.3852705931235068, "grad_norm": 0.39665165543556213, "learning_rate": 6.76339750693298e-05, "loss": 1.8164, "step": 3709 }, { "epoch": 0.38537446764308714, "grad_norm": 0.38629379868507385, "learning_rate": 6.76187059939632e-05, "loss": 1.6843, "step": 3710 }, { "epoch": 0.3854783421626675, "grad_norm": 0.4092472493648529, "learning_rate": 6.760343504234184e-05, "loss": 1.5059, "step": 3711 }, { "epoch": 0.38558221668224785, "grad_norm": 0.40238040685653687, "learning_rate": 6.758816221609196e-05, "loss": 1.8893, "step": 3712 }, { "epoch": 0.3856860912018282, "grad_norm": 0.39806345105171204, "learning_rate": 6.757288751684e-05, "loss": 1.7954, "step": 3713 }, { "epoch": 0.38578996572140856, "grad_norm": 0.41928762197494507, "learning_rate": 6.75576109462126e-05, "loss": 1.8645, "step": 3714 }, { "epoch": 0.3858938402409889, "grad_norm": 0.4039596915245056, "learning_rate": 6.75423325058366e-05, "loss": 1.8103, "step": 3715 }, { "epoch": 0.3859977147605692, "grad_norm": 0.400309681892395, "learning_rate": 6.7527052197339e-05, "loss": 1.6896, "step": 3716 }, { "epoch": 0.3861015892801496, "grad_norm": 0.43688875436782837, "learning_rate": 6.751177002234706e-05, "loss": 1.9697, "step": 3717 }, { "epoch": 0.3862054637997299, "grad_norm": 0.5165666937828064, "learning_rate": 6.749648598248821e-05, "loss": 1.9304, "step": 3718 }, { "epoch": 0.38630933831931025, "grad_norm": 0.4006554186344147, "learning_rate": 6.74812000793901e-05, "loss": 1.6306, "step": 3719 }, { "epoch": 0.38641321283889063, "grad_norm": 0.3924461007118225, "learning_rate": 6.74659123146805e-05, "loss": 1.6424, "step": 3720 }, { "epoch": 0.38651708735847096, "grad_norm": 0.35347139835357666, "learning_rate": 6.745062268998752e-05, "loss": 1.4823, "step": 3721 }, { "epoch": 0.38662096187805134, "grad_norm": 0.41199618577957153, "learning_rate": 6.743533120693932e-05, "loss": 1.6857, "step": 3722 }, { "epoch": 0.38672483639763167, "grad_norm": 0.40314194560050964, "learning_rate": 6.742003786716435e-05, "loss": 1.7333, "step": 3723 }, { "epoch": 0.386828710917212, "grad_norm": 0.402383416891098, "learning_rate": 6.740474267229123e-05, "loss": 1.7304, "step": 3724 }, { "epoch": 0.3869325854367924, "grad_norm": 0.4002920687198639, "learning_rate": 6.738944562394878e-05, "loss": 1.6984, "step": 3725 }, { "epoch": 0.3870364599563727, "grad_norm": 0.3783153295516968, "learning_rate": 6.737414672376601e-05, "loss": 1.6048, "step": 3726 }, { "epoch": 0.38714033447595303, "grad_norm": 0.4129243791103363, "learning_rate": 6.735884597337214e-05, "loss": 1.8856, "step": 3727 }, { "epoch": 0.3872442089955334, "grad_norm": 0.36337247490882874, "learning_rate": 6.73435433743966e-05, "loss": 1.6613, "step": 3728 }, { "epoch": 0.38734808351511374, "grad_norm": 0.38264912366867065, "learning_rate": 6.732823892846895e-05, "loss": 1.6385, "step": 3729 }, { "epoch": 0.38745195803469407, "grad_norm": 0.394731342792511, "learning_rate": 6.731293263721902e-05, "loss": 1.6883, "step": 3730 }, { "epoch": 0.38755583255427445, "grad_norm": 0.3739268183708191, "learning_rate": 6.72976245022768e-05, "loss": 1.677, "step": 3731 }, { "epoch": 0.3876597070738548, "grad_norm": 0.37496718764305115, "learning_rate": 6.728231452527251e-05, "loss": 1.8645, "step": 3732 }, { "epoch": 0.3877635815934351, "grad_norm": 0.39028218388557434, "learning_rate": 6.726700270783655e-05, "loss": 1.7683, "step": 3733 }, { "epoch": 0.3878674561130155, "grad_norm": 0.4083595871925354, "learning_rate": 6.725168905159945e-05, "loss": 1.821, "step": 3734 }, { "epoch": 0.3879713306325958, "grad_norm": 0.3999233543872833, "learning_rate": 6.723637355819206e-05, "loss": 1.7861, "step": 3735 }, { "epoch": 0.3880752051521762, "grad_norm": 0.3761618435382843, "learning_rate": 6.722105622924533e-05, "loss": 1.7543, "step": 3736 }, { "epoch": 0.3881790796717565, "grad_norm": 0.4310716390609741, "learning_rate": 6.720573706639044e-05, "loss": 1.7804, "step": 3737 }, { "epoch": 0.38828295419133685, "grad_norm": 0.3662225902080536, "learning_rate": 6.719041607125877e-05, "loss": 1.5168, "step": 3738 }, { "epoch": 0.38838682871091723, "grad_norm": 0.3875160813331604, "learning_rate": 6.717509324548187e-05, "loss": 1.801, "step": 3739 }, { "epoch": 0.38849070323049756, "grad_norm": 0.379233181476593, "learning_rate": 6.715976859069151e-05, "loss": 1.6002, "step": 3740 }, { "epoch": 0.3885945777500779, "grad_norm": 0.3968057632446289, "learning_rate": 6.714444210851964e-05, "loss": 1.5895, "step": 3741 }, { "epoch": 0.38869845226965827, "grad_norm": 0.45486825704574585, "learning_rate": 6.712911380059845e-05, "loss": 1.8872, "step": 3742 }, { "epoch": 0.3888023267892386, "grad_norm": 0.38296419382095337, "learning_rate": 6.711378366856023e-05, "loss": 1.7437, "step": 3743 }, { "epoch": 0.3889062013088189, "grad_norm": 0.394843190908432, "learning_rate": 6.709845171403754e-05, "loss": 1.7002, "step": 3744 }, { "epoch": 0.3890100758283993, "grad_norm": 0.4002247154712677, "learning_rate": 6.70831179386631e-05, "loss": 1.7328, "step": 3745 }, { "epoch": 0.38911395034797963, "grad_norm": 0.45309674739837646, "learning_rate": 6.706778234406987e-05, "loss": 1.7358, "step": 3746 }, { "epoch": 0.38921782486756, "grad_norm": 0.40047159790992737, "learning_rate": 6.705244493189094e-05, "loss": 1.7627, "step": 3747 }, { "epoch": 0.38932169938714034, "grad_norm": 0.3902556300163269, "learning_rate": 6.703710570375966e-05, "loss": 1.6005, "step": 3748 }, { "epoch": 0.38942557390672067, "grad_norm": 0.4025695025920868, "learning_rate": 6.702176466130953e-05, "loss": 1.6763, "step": 3749 }, { "epoch": 0.38952944842630105, "grad_norm": 0.40774810314178467, "learning_rate": 6.70064218061742e-05, "loss": 1.8209, "step": 3750 }, { "epoch": 0.3896333229458814, "grad_norm": 0.36347082257270813, "learning_rate": 6.699107713998762e-05, "loss": 1.4882, "step": 3751 }, { "epoch": 0.3897371974654617, "grad_norm": 0.39139434695243835, "learning_rate": 6.697573066438387e-05, "loss": 1.713, "step": 3752 }, { "epoch": 0.3898410719850421, "grad_norm": 0.4328778386116028, "learning_rate": 6.69603823809972e-05, "loss": 1.9039, "step": 3753 }, { "epoch": 0.3899449465046224, "grad_norm": 0.38652610778808594, "learning_rate": 6.694503229146212e-05, "loss": 1.7179, "step": 3754 }, { "epoch": 0.39004882102420274, "grad_norm": 0.36628860235214233, "learning_rate": 6.692968039741328e-05, "loss": 1.5286, "step": 3755 }, { "epoch": 0.3901526955437831, "grad_norm": 0.3640875220298767, "learning_rate": 6.691432670048552e-05, "loss": 1.6235, "step": 3756 }, { "epoch": 0.39025657006336345, "grad_norm": 0.3808233439922333, "learning_rate": 6.689897120231389e-05, "loss": 1.6554, "step": 3757 }, { "epoch": 0.3903604445829438, "grad_norm": 0.4116235375404358, "learning_rate": 6.688361390453368e-05, "loss": 1.7059, "step": 3758 }, { "epoch": 0.39046431910252416, "grad_norm": 0.39567288756370544, "learning_rate": 6.686825480878026e-05, "loss": 1.5193, "step": 3759 }, { "epoch": 0.3905681936221045, "grad_norm": 0.39557141065597534, "learning_rate": 6.685289391668929e-05, "loss": 1.7383, "step": 3760 }, { "epoch": 0.39067206814168487, "grad_norm": 0.40352147817611694, "learning_rate": 6.683753122989656e-05, "loss": 1.6945, "step": 3761 }, { "epoch": 0.3907759426612652, "grad_norm": 0.38249486684799194, "learning_rate": 6.682216675003811e-05, "loss": 1.5765, "step": 3762 }, { "epoch": 0.3908798171808455, "grad_norm": 0.3883412480354309, "learning_rate": 6.680680047875014e-05, "loss": 1.7523, "step": 3763 }, { "epoch": 0.3909836917004259, "grad_norm": 0.38825148344039917, "learning_rate": 6.679143241766898e-05, "loss": 1.7129, "step": 3764 }, { "epoch": 0.39108756622000623, "grad_norm": 0.4497055113315582, "learning_rate": 6.677606256843126e-05, "loss": 1.7823, "step": 3765 }, { "epoch": 0.39119144073958656, "grad_norm": 0.39453697204589844, "learning_rate": 6.676069093267374e-05, "loss": 1.7184, "step": 3766 }, { "epoch": 0.39129531525916694, "grad_norm": 0.4167402982711792, "learning_rate": 6.674531751203335e-05, "loss": 1.921, "step": 3767 }, { "epoch": 0.39139918977874727, "grad_norm": 0.39788275957107544, "learning_rate": 6.672994230814729e-05, "loss": 1.6948, "step": 3768 }, { "epoch": 0.3915030642983276, "grad_norm": 0.3702066242694855, "learning_rate": 6.671456532265285e-05, "loss": 1.6143, "step": 3769 }, { "epoch": 0.391606938817908, "grad_norm": 0.4112524390220642, "learning_rate": 6.669918655718759e-05, "loss": 1.6526, "step": 3770 }, { "epoch": 0.3917108133374883, "grad_norm": 0.4594886898994446, "learning_rate": 6.668380601338924e-05, "loss": 1.8916, "step": 3771 }, { "epoch": 0.3918146878570687, "grad_norm": 0.3847801089286804, "learning_rate": 6.666842369289566e-05, "loss": 1.803, "step": 3772 }, { "epoch": 0.391918562376649, "grad_norm": 0.413424551486969, "learning_rate": 6.665303959734499e-05, "loss": 1.7646, "step": 3773 }, { "epoch": 0.39202243689622934, "grad_norm": 0.42914092540740967, "learning_rate": 6.66376537283755e-05, "loss": 1.8358, "step": 3774 }, { "epoch": 0.3921263114158097, "grad_norm": 0.42516449093818665, "learning_rate": 6.662226608762566e-05, "loss": 1.7087, "step": 3775 }, { "epoch": 0.39223018593539005, "grad_norm": 0.40384045243263245, "learning_rate": 6.660687667673416e-05, "loss": 1.81, "step": 3776 }, { "epoch": 0.3923340604549704, "grad_norm": 0.3911544978618622, "learning_rate": 6.659148549733983e-05, "loss": 1.6589, "step": 3777 }, { "epoch": 0.39243793497455076, "grad_norm": 0.3848823606967926, "learning_rate": 6.657609255108169e-05, "loss": 1.653, "step": 3778 }, { "epoch": 0.3925418094941311, "grad_norm": 0.38892778754234314, "learning_rate": 6.656069783959901e-05, "loss": 1.7404, "step": 3779 }, { "epoch": 0.3926456840137114, "grad_norm": 0.4406839609146118, "learning_rate": 6.654530136453118e-05, "loss": 2.1317, "step": 3780 }, { "epoch": 0.3927495585332918, "grad_norm": 0.39131468534469604, "learning_rate": 6.652990312751781e-05, "loss": 1.864, "step": 3781 }, { "epoch": 0.3928534330528721, "grad_norm": 0.38605618476867676, "learning_rate": 6.65145031301987e-05, "loss": 1.7456, "step": 3782 }, { "epoch": 0.39295730757245245, "grad_norm": 0.3929480314254761, "learning_rate": 6.649910137421383e-05, "loss": 1.7215, "step": 3783 }, { "epoch": 0.39306118209203283, "grad_norm": 0.4117045998573303, "learning_rate": 6.648369786120336e-05, "loss": 1.8767, "step": 3784 }, { "epoch": 0.39316505661161316, "grad_norm": 0.4199231266975403, "learning_rate": 6.646829259280765e-05, "loss": 1.7678, "step": 3785 }, { "epoch": 0.39326893113119354, "grad_norm": 0.3810809254646301, "learning_rate": 6.645288557066722e-05, "loss": 1.6417, "step": 3786 }, { "epoch": 0.39337280565077387, "grad_norm": 0.40853750705718994, "learning_rate": 6.643747679642282e-05, "loss": 1.7405, "step": 3787 }, { "epoch": 0.3934766801703542, "grad_norm": 0.3977396488189697, "learning_rate": 6.642206627171536e-05, "loss": 1.7114, "step": 3788 }, { "epoch": 0.3935805546899346, "grad_norm": 0.40110906958580017, "learning_rate": 6.640665399818594e-05, "loss": 1.433, "step": 3789 }, { "epoch": 0.3936844292095149, "grad_norm": 0.37747594714164734, "learning_rate": 6.639123997747585e-05, "loss": 1.6693, "step": 3790 }, { "epoch": 0.39378830372909523, "grad_norm": 0.39845001697540283, "learning_rate": 6.637582421122657e-05, "loss": 1.7434, "step": 3791 }, { "epoch": 0.3938921782486756, "grad_norm": 0.3821636438369751, "learning_rate": 6.636040670107972e-05, "loss": 1.6637, "step": 3792 }, { "epoch": 0.39399605276825594, "grad_norm": 0.4459479749202728, "learning_rate": 6.63449874486772e-05, "loss": 1.9512, "step": 3793 }, { "epoch": 0.39409992728783627, "grad_norm": 0.35890012979507446, "learning_rate": 6.632956645566099e-05, "loss": 1.5369, "step": 3794 }, { "epoch": 0.39420380180741665, "grad_norm": 0.40800046920776367, "learning_rate": 6.631414372367333e-05, "loss": 1.7759, "step": 3795 }, { "epoch": 0.394307676326997, "grad_norm": 0.42685750126838684, "learning_rate": 6.629871925435663e-05, "loss": 1.8498, "step": 3796 }, { "epoch": 0.3944115508465773, "grad_norm": 0.3911482095718384, "learning_rate": 6.628329304935346e-05, "loss": 1.7042, "step": 3797 }, { "epoch": 0.3945154253661577, "grad_norm": 0.3812938332557678, "learning_rate": 6.62678651103066e-05, "loss": 1.6208, "step": 3798 }, { "epoch": 0.394619299885738, "grad_norm": 0.4290582239627838, "learning_rate": 6.625243543885899e-05, "loss": 1.8095, "step": 3799 }, { "epoch": 0.3947231744053184, "grad_norm": 0.38044747710227966, "learning_rate": 6.623700403665378e-05, "loss": 1.821, "step": 3800 }, { "epoch": 0.3948270489248987, "grad_norm": 0.42807307839393616, "learning_rate": 6.622157090533431e-05, "loss": 1.9329, "step": 3801 }, { "epoch": 0.39493092344447905, "grad_norm": 0.3865020275115967, "learning_rate": 6.620613604654405e-05, "loss": 1.641, "step": 3802 }, { "epoch": 0.39503479796405944, "grad_norm": 0.4715527892112732, "learning_rate": 6.619069946192672e-05, "loss": 1.8927, "step": 3803 }, { "epoch": 0.39513867248363976, "grad_norm": 0.3802943229675293, "learning_rate": 6.61752611531262e-05, "loss": 1.7063, "step": 3804 }, { "epoch": 0.3952425470032201, "grad_norm": 0.38618597388267517, "learning_rate": 6.615982112178653e-05, "loss": 1.6576, "step": 3805 }, { "epoch": 0.3953464215228005, "grad_norm": 0.4029250741004944, "learning_rate": 6.614437936955196e-05, "loss": 1.7814, "step": 3806 }, { "epoch": 0.3954502960423808, "grad_norm": 0.3960702121257782, "learning_rate": 6.612893589806693e-05, "loss": 1.7102, "step": 3807 }, { "epoch": 0.3955541705619611, "grad_norm": 0.38714170455932617, "learning_rate": 6.611349070897604e-05, "loss": 1.6924, "step": 3808 }, { "epoch": 0.3956580450815415, "grad_norm": 0.4107656180858612, "learning_rate": 6.609804380392406e-05, "loss": 1.5255, "step": 3809 }, { "epoch": 0.39576191960112184, "grad_norm": 0.40633097290992737, "learning_rate": 6.6082595184556e-05, "loss": 1.734, "step": 3810 }, { "epoch": 0.3958657941207022, "grad_norm": 0.4659360945224762, "learning_rate": 6.606714485251699e-05, "loss": 1.7854, "step": 3811 }, { "epoch": 0.39596966864028255, "grad_norm": 0.4087457060813904, "learning_rate": 6.605169280945238e-05, "loss": 1.7713, "step": 3812 }, { "epoch": 0.3960735431598629, "grad_norm": 0.38341912627220154, "learning_rate": 6.603623905700771e-05, "loss": 1.5804, "step": 3813 }, { "epoch": 0.39617741767944326, "grad_norm": 0.41718804836273193, "learning_rate": 6.602078359682865e-05, "loss": 1.7562, "step": 3814 }, { "epoch": 0.3962812921990236, "grad_norm": 0.4072135388851166, "learning_rate": 6.600532643056112e-05, "loss": 1.6313, "step": 3815 }, { "epoch": 0.3963851667186039, "grad_norm": 0.4200040400028229, "learning_rate": 6.598986755985116e-05, "loss": 1.7867, "step": 3816 }, { "epoch": 0.3964890412381843, "grad_norm": 0.38370397686958313, "learning_rate": 6.597440698634503e-05, "loss": 1.5696, "step": 3817 }, { "epoch": 0.3965929157577646, "grad_norm": 0.4030844271183014, "learning_rate": 6.595894471168917e-05, "loss": 1.688, "step": 3818 }, { "epoch": 0.39669679027734495, "grad_norm": 0.4080040454864502, "learning_rate": 6.594348073753019e-05, "loss": 1.7786, "step": 3819 }, { "epoch": 0.39680066479692533, "grad_norm": 0.36491355299949646, "learning_rate": 6.592801506551486e-05, "loss": 1.5053, "step": 3820 }, { "epoch": 0.39690453931650566, "grad_norm": 0.4053652882575989, "learning_rate": 6.591254769729018e-05, "loss": 1.7091, "step": 3821 }, { "epoch": 0.397008413836086, "grad_norm": 0.40529873967170715, "learning_rate": 6.589707863450327e-05, "loss": 1.6866, "step": 3822 }, { "epoch": 0.39711228835566637, "grad_norm": 0.3853190243244171, "learning_rate": 6.588160787880151e-05, "loss": 1.703, "step": 3823 }, { "epoch": 0.3972161628752467, "grad_norm": 0.39480873942375183, "learning_rate": 6.586613543183237e-05, "loss": 1.6685, "step": 3824 }, { "epoch": 0.3973200373948271, "grad_norm": 0.38072872161865234, "learning_rate": 6.58506612952436e-05, "loss": 1.6512, "step": 3825 }, { "epoch": 0.3974239119144074, "grad_norm": 0.42218974232673645, "learning_rate": 6.583518547068302e-05, "loss": 1.7624, "step": 3826 }, { "epoch": 0.39752778643398773, "grad_norm": 0.34545183181762695, "learning_rate": 6.581970795979871e-05, "loss": 1.4234, "step": 3827 }, { "epoch": 0.3976316609535681, "grad_norm": 0.39277178049087524, "learning_rate": 6.580422876423891e-05, "loss": 1.7441, "step": 3828 }, { "epoch": 0.39773553547314844, "grad_norm": 0.39970070123672485, "learning_rate": 6.578874788565201e-05, "loss": 1.6835, "step": 3829 }, { "epoch": 0.39783940999272877, "grad_norm": 0.38639700412750244, "learning_rate": 6.577326532568663e-05, "loss": 1.5985, "step": 3830 }, { "epoch": 0.39794328451230915, "grad_norm": 0.3735407292842865, "learning_rate": 6.575778108599151e-05, "loss": 1.734, "step": 3831 }, { "epoch": 0.3980471590318895, "grad_norm": 0.38914212584495544, "learning_rate": 6.574229516821564e-05, "loss": 1.6279, "step": 3832 }, { "epoch": 0.3981510335514698, "grad_norm": 0.38888975977897644, "learning_rate": 6.572680757400813e-05, "loss": 1.7271, "step": 3833 }, { "epoch": 0.3982549080710502, "grad_norm": 0.4465731382369995, "learning_rate": 6.571131830501827e-05, "loss": 1.8058, "step": 3834 }, { "epoch": 0.3983587825906305, "grad_norm": 0.3945710062980652, "learning_rate": 6.569582736289556e-05, "loss": 1.6509, "step": 3835 }, { "epoch": 0.39846265711021084, "grad_norm": 0.39435771107673645, "learning_rate": 6.56803347492897e-05, "loss": 1.5513, "step": 3836 }, { "epoch": 0.3985665316297912, "grad_norm": 0.3789151906967163, "learning_rate": 6.566484046585047e-05, "loss": 1.5777, "step": 3837 }, { "epoch": 0.39867040614937155, "grad_norm": 0.39071643352508545, "learning_rate": 6.564934451422793e-05, "loss": 1.8612, "step": 3838 }, { "epoch": 0.39877428066895193, "grad_norm": 0.3789994716644287, "learning_rate": 6.563384689607227e-05, "loss": 1.7228, "step": 3839 }, { "epoch": 0.39887815518853226, "grad_norm": 0.3888130486011505, "learning_rate": 6.561834761303386e-05, "loss": 1.7733, "step": 3840 }, { "epoch": 0.3989820297081126, "grad_norm": 0.41186729073524475, "learning_rate": 6.560284666676327e-05, "loss": 1.8526, "step": 3841 }, { "epoch": 0.39908590422769297, "grad_norm": 0.3710212707519531, "learning_rate": 6.558734405891121e-05, "loss": 1.4567, "step": 3842 }, { "epoch": 0.3991897787472733, "grad_norm": 0.4276915490627289, "learning_rate": 6.557183979112859e-05, "loss": 1.8229, "step": 3843 }, { "epoch": 0.3992936532668536, "grad_norm": 0.4046017527580261, "learning_rate": 6.55563338650665e-05, "loss": 1.7953, "step": 3844 }, { "epoch": 0.399397527786434, "grad_norm": 0.36397168040275574, "learning_rate": 6.554082628237619e-05, "loss": 1.4815, "step": 3845 }, { "epoch": 0.39950140230601433, "grad_norm": 0.49856171011924744, "learning_rate": 6.552531704470911e-05, "loss": 1.9411, "step": 3846 }, { "epoch": 0.39960527682559466, "grad_norm": 0.36472898721694946, "learning_rate": 6.550980615371687e-05, "loss": 1.6889, "step": 3847 }, { "epoch": 0.39970915134517504, "grad_norm": 0.37358102202415466, "learning_rate": 6.549429361105126e-05, "loss": 1.6639, "step": 3848 }, { "epoch": 0.39981302586475537, "grad_norm": 0.3778083920478821, "learning_rate": 6.547877941836424e-05, "loss": 1.7107, "step": 3849 }, { "epoch": 0.39991690038433575, "grad_norm": 0.424234002828598, "learning_rate": 6.546326357730794e-05, "loss": 1.7377, "step": 3850 }, { "epoch": 0.4000207749039161, "grad_norm": 0.4264697730541229, "learning_rate": 6.544774608953468e-05, "loss": 1.8146, "step": 3851 }, { "epoch": 0.4001246494234964, "grad_norm": 0.4023953676223755, "learning_rate": 6.543222695669697e-05, "loss": 1.6474, "step": 3852 }, { "epoch": 0.4002285239430768, "grad_norm": 0.41954219341278076, "learning_rate": 6.541670618044748e-05, "loss": 1.8214, "step": 3853 }, { "epoch": 0.4003323984626571, "grad_norm": 0.41983938217163086, "learning_rate": 6.540118376243903e-05, "loss": 1.7134, "step": 3854 }, { "epoch": 0.40043627298223744, "grad_norm": 0.4455628991127014, "learning_rate": 6.538565970432463e-05, "loss": 1.974, "step": 3855 }, { "epoch": 0.4005401475018178, "grad_norm": 0.41283226013183594, "learning_rate": 6.537013400775748e-05, "loss": 1.5895, "step": 3856 }, { "epoch": 0.40064402202139815, "grad_norm": 0.4059913158416748, "learning_rate": 6.535460667439097e-05, "loss": 1.6404, "step": 3857 }, { "epoch": 0.4007478965409785, "grad_norm": 0.4247495234012604, "learning_rate": 6.53390777058786e-05, "loss": 1.8849, "step": 3858 }, { "epoch": 0.40085177106055886, "grad_norm": 0.4334213137626648, "learning_rate": 6.532354710387411e-05, "loss": 1.7472, "step": 3859 }, { "epoch": 0.4009556455801392, "grad_norm": 0.3892272114753723, "learning_rate": 6.530801487003139e-05, "loss": 1.601, "step": 3860 }, { "epoch": 0.4010595200997195, "grad_norm": 0.3871516287326813, "learning_rate": 6.529248100600449e-05, "loss": 1.7275, "step": 3861 }, { "epoch": 0.4011633946192999, "grad_norm": 0.3974870443344116, "learning_rate": 6.527694551344765e-05, "loss": 1.6634, "step": 3862 }, { "epoch": 0.4012672691388802, "grad_norm": 0.3988457918167114, "learning_rate": 6.526140839401527e-05, "loss": 1.6647, "step": 3863 }, { "epoch": 0.4013711436584606, "grad_norm": 0.42209574580192566, "learning_rate": 6.524586964936194e-05, "loss": 1.8112, "step": 3864 }, { "epoch": 0.40147501817804093, "grad_norm": 0.4116060733795166, "learning_rate": 6.523032928114242e-05, "loss": 1.7179, "step": 3865 }, { "epoch": 0.40157889269762126, "grad_norm": 0.341496080160141, "learning_rate": 6.521478729101164e-05, "loss": 1.4234, "step": 3866 }, { "epoch": 0.40168276721720164, "grad_norm": 0.3907933831214905, "learning_rate": 6.519924368062468e-05, "loss": 1.7079, "step": 3867 }, { "epoch": 0.40178664173678197, "grad_norm": 0.4084981679916382, "learning_rate": 6.518369845163683e-05, "loss": 1.6538, "step": 3868 }, { "epoch": 0.4018905162563623, "grad_norm": 0.37673652172088623, "learning_rate": 6.516815160570355e-05, "loss": 1.7034, "step": 3869 }, { "epoch": 0.4019943907759427, "grad_norm": 0.36309683322906494, "learning_rate": 6.515260314448041e-05, "loss": 1.5712, "step": 3870 }, { "epoch": 0.402098265295523, "grad_norm": 0.40556076169013977, "learning_rate": 6.513705306962325e-05, "loss": 1.6909, "step": 3871 }, { "epoch": 0.40220213981510333, "grad_norm": 0.39010584354400635, "learning_rate": 6.5121501382788e-05, "loss": 1.5812, "step": 3872 }, { "epoch": 0.4023060143346837, "grad_norm": 0.4054809510707855, "learning_rate": 6.51059480856308e-05, "loss": 1.6611, "step": 3873 }, { "epoch": 0.40240988885426404, "grad_norm": 0.3722795844078064, "learning_rate": 6.509039317980798e-05, "loss": 1.5964, "step": 3874 }, { "epoch": 0.40251376337384437, "grad_norm": 0.41028568148612976, "learning_rate": 6.5074836666976e-05, "loss": 1.6734, "step": 3875 }, { "epoch": 0.40261763789342475, "grad_norm": 0.39521047472953796, "learning_rate": 6.505927854879148e-05, "loss": 1.5993, "step": 3876 }, { "epoch": 0.4027215124130051, "grad_norm": 0.3818994462490082, "learning_rate": 6.504371882691126e-05, "loss": 1.6791, "step": 3877 }, { "epoch": 0.40282538693258546, "grad_norm": 0.4162769615650177, "learning_rate": 6.502815750299235e-05, "loss": 1.7729, "step": 3878 }, { "epoch": 0.4029292614521658, "grad_norm": 0.38468343019485474, "learning_rate": 6.501259457869187e-05, "loss": 1.7063, "step": 3879 }, { "epoch": 0.4030331359717461, "grad_norm": 0.3765242397785187, "learning_rate": 6.49970300556672e-05, "loss": 1.546, "step": 3880 }, { "epoch": 0.4031370104913265, "grad_norm": 0.3874824345111847, "learning_rate": 6.498146393557578e-05, "loss": 1.7616, "step": 3881 }, { "epoch": 0.4032408850109068, "grad_norm": 0.39522698521614075, "learning_rate": 6.496589622007532e-05, "loss": 1.7472, "step": 3882 }, { "epoch": 0.40334475953048715, "grad_norm": 0.3761104941368103, "learning_rate": 6.495032691082367e-05, "loss": 1.5378, "step": 3883 }, { "epoch": 0.40344863405006753, "grad_norm": 0.4124884605407715, "learning_rate": 6.493475600947879e-05, "loss": 1.6228, "step": 3884 }, { "epoch": 0.40355250856964786, "grad_norm": 0.3794013559818268, "learning_rate": 6.491918351769891e-05, "loss": 1.8077, "step": 3885 }, { "epoch": 0.4036563830892282, "grad_norm": 0.43968185782432556, "learning_rate": 6.490360943714235e-05, "loss": 1.8393, "step": 3886 }, { "epoch": 0.40376025760880857, "grad_norm": 0.4507423937320709, "learning_rate": 6.488803376946764e-05, "loss": 1.9139, "step": 3887 }, { "epoch": 0.4038641321283889, "grad_norm": 0.39006999135017395, "learning_rate": 6.48724565163335e-05, "loss": 1.7191, "step": 3888 }, { "epoch": 0.4039680066479693, "grad_norm": 0.3846725523471832, "learning_rate": 6.48568776793987e-05, "loss": 1.6855, "step": 3889 }, { "epoch": 0.4040718811675496, "grad_norm": 0.4519991874694824, "learning_rate": 6.484129726032233e-05, "loss": 1.7529, "step": 3890 }, { "epoch": 0.40417575568712993, "grad_norm": 0.36061856150627136, "learning_rate": 6.482571526076357e-05, "loss": 1.5992, "step": 3891 }, { "epoch": 0.4042796302067103, "grad_norm": 0.4295485317707062, "learning_rate": 6.481013168238178e-05, "loss": 1.7882, "step": 3892 }, { "epoch": 0.40438350472629064, "grad_norm": 0.4181903600692749, "learning_rate": 6.479454652683649e-05, "loss": 1.9548, "step": 3893 }, { "epoch": 0.40448737924587097, "grad_norm": 0.37351468205451965, "learning_rate": 6.47789597957874e-05, "loss": 1.5784, "step": 3894 }, { "epoch": 0.40459125376545135, "grad_norm": 0.505387544631958, "learning_rate": 6.476337149089438e-05, "loss": 1.9224, "step": 3895 }, { "epoch": 0.4046951282850317, "grad_norm": 0.3897416293621063, "learning_rate": 6.474778161381743e-05, "loss": 1.4597, "step": 3896 }, { "epoch": 0.404799002804612, "grad_norm": 0.44174665212631226, "learning_rate": 6.47321901662168e-05, "loss": 1.9036, "step": 3897 }, { "epoch": 0.4049028773241924, "grad_norm": 0.39586788415908813, "learning_rate": 6.471659714975282e-05, "loss": 1.802, "step": 3898 }, { "epoch": 0.4050067518437727, "grad_norm": 0.40716326236724854, "learning_rate": 6.470100256608603e-05, "loss": 1.7556, "step": 3899 }, { "epoch": 0.40511062636335304, "grad_norm": 0.3839757740497589, "learning_rate": 6.468540641687716e-05, "loss": 1.6419, "step": 3900 }, { "epoch": 0.4052145008829334, "grad_norm": 0.3808140754699707, "learning_rate": 6.466980870378704e-05, "loss": 1.7802, "step": 3901 }, { "epoch": 0.40531837540251375, "grad_norm": 0.4827220141887665, "learning_rate": 6.465420942847673e-05, "loss": 1.9368, "step": 3902 }, { "epoch": 0.40542224992209414, "grad_norm": 0.42109206318855286, "learning_rate": 6.463860859260742e-05, "loss": 1.8801, "step": 3903 }, { "epoch": 0.40552612444167446, "grad_norm": 0.41569140553474426, "learning_rate": 6.462300619784048e-05, "loss": 1.8356, "step": 3904 }, { "epoch": 0.4056299989612548, "grad_norm": 0.3784182071685791, "learning_rate": 6.460740224583746e-05, "loss": 1.5941, "step": 3905 }, { "epoch": 0.40573387348083517, "grad_norm": 0.38103383779525757, "learning_rate": 6.459179673826001e-05, "loss": 1.8269, "step": 3906 }, { "epoch": 0.4058377480004155, "grad_norm": 0.38035640120506287, "learning_rate": 6.457618967677004e-05, "loss": 1.7104, "step": 3907 }, { "epoch": 0.4059416225199958, "grad_norm": 0.4427489638328552, "learning_rate": 6.456058106302958e-05, "loss": 2.0598, "step": 3908 }, { "epoch": 0.4060454970395762, "grad_norm": 0.38776329159736633, "learning_rate": 6.454497089870082e-05, "loss": 1.8005, "step": 3909 }, { "epoch": 0.40614937155915654, "grad_norm": 0.39295095205307007, "learning_rate": 6.45293591854461e-05, "loss": 1.5732, "step": 3910 }, { "epoch": 0.40625324607873686, "grad_norm": 0.4256858229637146, "learning_rate": 6.451374592492796e-05, "loss": 1.8357, "step": 3911 }, { "epoch": 0.40635712059831725, "grad_norm": 0.45658764243125916, "learning_rate": 6.449813111880909e-05, "loss": 1.851, "step": 3912 }, { "epoch": 0.40646099511789757, "grad_norm": 0.40819069743156433, "learning_rate": 6.448251476875235e-05, "loss": 1.4937, "step": 3913 }, { "epoch": 0.4065648696374779, "grad_norm": 0.35700273513793945, "learning_rate": 6.446689687642076e-05, "loss": 1.6035, "step": 3914 }, { "epoch": 0.4066687441570583, "grad_norm": 0.3629886209964752, "learning_rate": 6.445127744347748e-05, "loss": 1.5591, "step": 3915 }, { "epoch": 0.4067726186766386, "grad_norm": 0.38591131567955017, "learning_rate": 6.443565647158589e-05, "loss": 1.5891, "step": 3916 }, { "epoch": 0.406876493196219, "grad_norm": 0.41065141558647156, "learning_rate": 6.442003396240949e-05, "loss": 1.8029, "step": 3917 }, { "epoch": 0.4069803677157993, "grad_norm": 0.4433719515800476, "learning_rate": 6.440440991761195e-05, "loss": 1.8338, "step": 3918 }, { "epoch": 0.40708424223537965, "grad_norm": 0.40136539936065674, "learning_rate": 6.438878433885711e-05, "loss": 1.5756, "step": 3919 }, { "epoch": 0.40718811675496003, "grad_norm": 0.39997127652168274, "learning_rate": 6.437315722780898e-05, "loss": 1.6578, "step": 3920 }, { "epoch": 0.40729199127454035, "grad_norm": 0.41028451919555664, "learning_rate": 6.435752858613171e-05, "loss": 1.7394, "step": 3921 }, { "epoch": 0.4073958657941207, "grad_norm": 0.3836668133735657, "learning_rate": 6.434189841548965e-05, "loss": 1.7368, "step": 3922 }, { "epoch": 0.40749974031370106, "grad_norm": 0.38762718439102173, "learning_rate": 6.432626671754729e-05, "loss": 1.6122, "step": 3923 }, { "epoch": 0.4076036148332814, "grad_norm": 0.37233954668045044, "learning_rate": 6.431063349396926e-05, "loss": 1.5676, "step": 3924 }, { "epoch": 0.4077074893528617, "grad_norm": 0.3751562237739563, "learning_rate": 6.429499874642038e-05, "loss": 1.5929, "step": 3925 }, { "epoch": 0.4078113638724421, "grad_norm": 0.41311609745025635, "learning_rate": 6.427936247656565e-05, "loss": 1.4869, "step": 3926 }, { "epoch": 0.40791523839202243, "grad_norm": 0.37297797203063965, "learning_rate": 6.42637246860702e-05, "loss": 1.6425, "step": 3927 }, { "epoch": 0.4080191129116028, "grad_norm": 0.4266083240509033, "learning_rate": 6.424808537659932e-05, "loss": 1.7858, "step": 3928 }, { "epoch": 0.40812298743118314, "grad_norm": 0.46376118063926697, "learning_rate": 6.423244454981851e-05, "loss": 1.7072, "step": 3929 }, { "epoch": 0.40822686195076346, "grad_norm": 0.4833356440067291, "learning_rate": 6.421680220739336e-05, "loss": 1.792, "step": 3930 }, { "epoch": 0.40833073647034385, "grad_norm": 0.42818766832351685, "learning_rate": 6.420115835098967e-05, "loss": 1.9283, "step": 3931 }, { "epoch": 0.4084346109899242, "grad_norm": 0.41609877347946167, "learning_rate": 6.418551298227338e-05, "loss": 1.6607, "step": 3932 }, { "epoch": 0.4085384855095045, "grad_norm": 0.3854401111602783, "learning_rate": 6.416986610291064e-05, "loss": 1.7383, "step": 3933 }, { "epoch": 0.4086423600290849, "grad_norm": 0.4421563744544983, "learning_rate": 6.415421771456766e-05, "loss": 1.5812, "step": 3934 }, { "epoch": 0.4087462345486652, "grad_norm": 0.40960294008255005, "learning_rate": 6.41385678189109e-05, "loss": 1.7514, "step": 3935 }, { "epoch": 0.40885010906824554, "grad_norm": 0.37743479013442993, "learning_rate": 6.412291641760697e-05, "loss": 1.6141, "step": 3936 }, { "epoch": 0.4089539835878259, "grad_norm": 0.3768305480480194, "learning_rate": 6.410726351232259e-05, "loss": 1.6868, "step": 3937 }, { "epoch": 0.40905785810740625, "grad_norm": 0.40039536356925964, "learning_rate": 6.409160910472469e-05, "loss": 1.7051, "step": 3938 }, { "epoch": 0.4091617326269866, "grad_norm": 0.4146733283996582, "learning_rate": 6.407595319648034e-05, "loss": 1.7432, "step": 3939 }, { "epoch": 0.40926560714656696, "grad_norm": 0.3897091746330261, "learning_rate": 6.406029578925677e-05, "loss": 1.6319, "step": 3940 }, { "epoch": 0.4093694816661473, "grad_norm": 0.3977634608745575, "learning_rate": 6.404463688472138e-05, "loss": 1.6741, "step": 3941 }, { "epoch": 0.40947335618572767, "grad_norm": 0.3753795027732849, "learning_rate": 6.402897648454171e-05, "loss": 1.6135, "step": 3942 }, { "epoch": 0.409577230705308, "grad_norm": 0.40746620297431946, "learning_rate": 6.401331459038546e-05, "loss": 1.6742, "step": 3943 }, { "epoch": 0.4096811052248883, "grad_norm": 0.4224250614643097, "learning_rate": 6.399765120392054e-05, "loss": 1.7462, "step": 3944 }, { "epoch": 0.4097849797444687, "grad_norm": 0.4034820795059204, "learning_rate": 6.398198632681494e-05, "loss": 1.6549, "step": 3945 }, { "epoch": 0.40988885426404903, "grad_norm": 0.40801072120666504, "learning_rate": 6.396631996073686e-05, "loss": 1.5801, "step": 3946 }, { "epoch": 0.40999272878362936, "grad_norm": 0.4579329192638397, "learning_rate": 6.395065210735464e-05, "loss": 1.6841, "step": 3947 }, { "epoch": 0.41009660330320974, "grad_norm": 0.4300961494445801, "learning_rate": 6.39349827683368e-05, "loss": 1.8659, "step": 3948 }, { "epoch": 0.41020047782279007, "grad_norm": 0.4603518843650818, "learning_rate": 6.391931194535199e-05, "loss": 1.82, "step": 3949 }, { "epoch": 0.4103043523423704, "grad_norm": 0.36023688316345215, "learning_rate": 6.390363964006903e-05, "loss": 1.5473, "step": 3950 }, { "epoch": 0.4104082268619508, "grad_norm": 0.36991775035858154, "learning_rate": 6.38879658541569e-05, "loss": 1.554, "step": 3951 }, { "epoch": 0.4105121013815311, "grad_norm": 0.3790716528892517, "learning_rate": 6.387229058928475e-05, "loss": 1.6563, "step": 3952 }, { "epoch": 0.41061597590111143, "grad_norm": 0.38919469714164734, "learning_rate": 6.385661384712185e-05, "loss": 1.6243, "step": 3953 }, { "epoch": 0.4107198504206918, "grad_norm": 0.3934738337993622, "learning_rate": 6.384093562933765e-05, "loss": 1.6521, "step": 3954 }, { "epoch": 0.41082372494027214, "grad_norm": 0.39593058824539185, "learning_rate": 6.382525593760178e-05, "loss": 1.7887, "step": 3955 }, { "epoch": 0.4109275994598525, "grad_norm": 0.47822320461273193, "learning_rate": 6.380957477358399e-05, "loss": 1.8647, "step": 3956 }, { "epoch": 0.41103147397943285, "grad_norm": 0.44262126088142395, "learning_rate": 6.379389213895421e-05, "loss": 1.8746, "step": 3957 }, { "epoch": 0.4111353484990132, "grad_norm": 0.37435340881347656, "learning_rate": 6.377820803538253e-05, "loss": 1.6782, "step": 3958 }, { "epoch": 0.41123922301859356, "grad_norm": 0.41944101452827454, "learning_rate": 6.376252246453914e-05, "loss": 1.7188, "step": 3959 }, { "epoch": 0.4113430975381739, "grad_norm": 0.43215543031692505, "learning_rate": 6.374683542809447e-05, "loss": 1.8221, "step": 3960 }, { "epoch": 0.4114469720577542, "grad_norm": 0.40915408730506897, "learning_rate": 6.373114692771906e-05, "loss": 1.7294, "step": 3961 }, { "epoch": 0.4115508465773346, "grad_norm": 0.41122907400131226, "learning_rate": 6.371545696508358e-05, "loss": 1.708, "step": 3962 }, { "epoch": 0.4116547210969149, "grad_norm": 0.3763563930988312, "learning_rate": 6.369976554185896e-05, "loss": 1.8057, "step": 3963 }, { "epoch": 0.41175859561649525, "grad_norm": 0.4643213748931885, "learning_rate": 6.368407265971616e-05, "loss": 1.6913, "step": 3964 }, { "epoch": 0.41186247013607563, "grad_norm": 0.3934086263179779, "learning_rate": 6.366837832032635e-05, "loss": 1.8158, "step": 3965 }, { "epoch": 0.41196634465565596, "grad_norm": 0.35763460397720337, "learning_rate": 6.365268252536087e-05, "loss": 1.5065, "step": 3966 }, { "epoch": 0.41207021917523634, "grad_norm": 0.381234347820282, "learning_rate": 6.36369852764912e-05, "loss": 1.672, "step": 3967 }, { "epoch": 0.41217409369481667, "grad_norm": 0.4128655195236206, "learning_rate": 6.362128657538897e-05, "loss": 1.8548, "step": 3968 }, { "epoch": 0.412277968214397, "grad_norm": 0.42314839363098145, "learning_rate": 6.360558642372597e-05, "loss": 1.8469, "step": 3969 }, { "epoch": 0.4123818427339774, "grad_norm": 0.3665439188480377, "learning_rate": 6.358988482317414e-05, "loss": 1.6893, "step": 3970 }, { "epoch": 0.4124857172535577, "grad_norm": 0.3973866105079651, "learning_rate": 6.357418177540558e-05, "loss": 1.5002, "step": 3971 }, { "epoch": 0.41258959177313803, "grad_norm": 0.40862172842025757, "learning_rate": 6.355847728209257e-05, "loss": 1.817, "step": 3972 }, { "epoch": 0.4126934662927184, "grad_norm": 0.439984530210495, "learning_rate": 6.354277134490746e-05, "loss": 1.7698, "step": 3973 }, { "epoch": 0.41279734081229874, "grad_norm": 0.3773249685764313, "learning_rate": 6.352706396552285e-05, "loss": 1.4967, "step": 3974 }, { "epoch": 0.41290121533187907, "grad_norm": 0.4068536162376404, "learning_rate": 6.351135514561144e-05, "loss": 1.6282, "step": 3975 }, { "epoch": 0.41300508985145945, "grad_norm": 0.3690778613090515, "learning_rate": 6.34956448868461e-05, "loss": 1.637, "step": 3976 }, { "epoch": 0.4131089643710398, "grad_norm": 0.4202233552932739, "learning_rate": 6.347993319089985e-05, "loss": 1.9794, "step": 3977 }, { "epoch": 0.4132128388906201, "grad_norm": 0.44085201621055603, "learning_rate": 6.346422005944586e-05, "loss": 1.8328, "step": 3978 }, { "epoch": 0.4133167134102005, "grad_norm": 0.39549145102500916, "learning_rate": 6.344850549415746e-05, "loss": 1.7727, "step": 3979 }, { "epoch": 0.4134205879297808, "grad_norm": 0.39016398787498474, "learning_rate": 6.343278949670812e-05, "loss": 1.6293, "step": 3980 }, { "epoch": 0.4135244624493612, "grad_norm": 0.40789923071861267, "learning_rate": 6.341707206877149e-05, "loss": 1.8089, "step": 3981 }, { "epoch": 0.4136283369689415, "grad_norm": 0.41951802372932434, "learning_rate": 6.340135321202134e-05, "loss": 1.793, "step": 3982 }, { "epoch": 0.41373221148852185, "grad_norm": 0.41457945108413696, "learning_rate": 6.33856329281316e-05, "loss": 1.8366, "step": 3983 }, { "epoch": 0.41383608600810223, "grad_norm": 0.4352897107601166, "learning_rate": 6.336991121877637e-05, "loss": 1.7176, "step": 3984 }, { "epoch": 0.41393996052768256, "grad_norm": 0.414348840713501, "learning_rate": 6.335418808562988e-05, "loss": 1.8262, "step": 3985 }, { "epoch": 0.4140438350472629, "grad_norm": 0.3896345794200897, "learning_rate": 6.333846353036654e-05, "loss": 1.8285, "step": 3986 }, { "epoch": 0.41414770956684327, "grad_norm": 0.4062201678752899, "learning_rate": 6.332273755466087e-05, "loss": 1.709, "step": 3987 }, { "epoch": 0.4142515840864236, "grad_norm": 0.4064597189426422, "learning_rate": 6.330701016018757e-05, "loss": 1.7283, "step": 3988 }, { "epoch": 0.4143554586060039, "grad_norm": 0.42126670479774475, "learning_rate": 6.329128134862148e-05, "loss": 1.7227, "step": 3989 }, { "epoch": 0.4144593331255843, "grad_norm": 0.38570138812065125, "learning_rate": 6.32755511216376e-05, "loss": 1.7244, "step": 3990 }, { "epoch": 0.41456320764516463, "grad_norm": 0.4073387682437897, "learning_rate": 6.32598194809111e-05, "loss": 1.6718, "step": 3991 }, { "epoch": 0.414667082164745, "grad_norm": 0.39126601815223694, "learning_rate": 6.324408642811726e-05, "loss": 1.584, "step": 3992 }, { "epoch": 0.41477095668432534, "grad_norm": 0.38054293394088745, "learning_rate": 6.32283519649315e-05, "loss": 1.5411, "step": 3993 }, { "epoch": 0.41487483120390567, "grad_norm": 0.42494648694992065, "learning_rate": 6.321261609302945e-05, "loss": 1.8188, "step": 3994 }, { "epoch": 0.41497870572348605, "grad_norm": 0.3690442442893982, "learning_rate": 6.319687881408683e-05, "loss": 1.6147, "step": 3995 }, { "epoch": 0.4150825802430664, "grad_norm": 0.41196659207344055, "learning_rate": 6.318114012977958e-05, "loss": 1.917, "step": 3996 }, { "epoch": 0.4151864547626467, "grad_norm": 0.38468247652053833, "learning_rate": 6.316540004178371e-05, "loss": 1.7175, "step": 3997 }, { "epoch": 0.4152903292822271, "grad_norm": 0.36904019117355347, "learning_rate": 6.314965855177544e-05, "loss": 1.6544, "step": 3998 }, { "epoch": 0.4153942038018074, "grad_norm": 0.3496026396751404, "learning_rate": 6.31339156614311e-05, "loss": 1.4105, "step": 3999 }, { "epoch": 0.41549807832138774, "grad_norm": 0.3861815333366394, "learning_rate": 6.311817137242721e-05, "loss": 1.7974, "step": 4000 }, { "epoch": 0.4156019528409681, "grad_norm": 0.37794098258018494, "learning_rate": 6.310242568644035e-05, "loss": 1.5556, "step": 4001 }, { "epoch": 0.41570582736054845, "grad_norm": 0.42547518014907837, "learning_rate": 6.30866786051474e-05, "loss": 1.6694, "step": 4002 }, { "epoch": 0.4158097018801288, "grad_norm": 0.3894122540950775, "learning_rate": 6.307093013022525e-05, "loss": 1.5479, "step": 4003 }, { "epoch": 0.41591357639970916, "grad_norm": 0.36968937516212463, "learning_rate": 6.305518026335097e-05, "loss": 1.6548, "step": 4004 }, { "epoch": 0.4160174509192895, "grad_norm": 0.40135595202445984, "learning_rate": 6.303942900620185e-05, "loss": 1.5989, "step": 4005 }, { "epoch": 0.41612132543886987, "grad_norm": 0.39152655005455017, "learning_rate": 6.302367636045526e-05, "loss": 1.6618, "step": 4006 }, { "epoch": 0.4162251999584502, "grad_norm": 0.382112592458725, "learning_rate": 6.300792232778872e-05, "loss": 1.5055, "step": 4007 }, { "epoch": 0.4163290744780305, "grad_norm": 0.4156250059604645, "learning_rate": 6.29921669098799e-05, "loss": 1.9369, "step": 4008 }, { "epoch": 0.4164329489976109, "grad_norm": 0.3940093219280243, "learning_rate": 6.297641010840665e-05, "loss": 1.6275, "step": 4009 }, { "epoch": 0.41653682351719123, "grad_norm": 0.4581923186779022, "learning_rate": 6.296065192504697e-05, "loss": 1.8641, "step": 4010 }, { "epoch": 0.41664069803677156, "grad_norm": 0.3822265863418579, "learning_rate": 6.294489236147894e-05, "loss": 1.7272, "step": 4011 }, { "epoch": 0.41674457255635194, "grad_norm": 0.3892137110233307, "learning_rate": 6.292913141938084e-05, "loss": 1.683, "step": 4012 }, { "epoch": 0.41684844707593227, "grad_norm": 0.3944839835166931, "learning_rate": 6.29133691004311e-05, "loss": 1.6662, "step": 4013 }, { "epoch": 0.4169523215955126, "grad_norm": 0.39415454864501953, "learning_rate": 6.289760540630829e-05, "loss": 1.6862, "step": 4014 }, { "epoch": 0.417056196115093, "grad_norm": 0.39324238896369934, "learning_rate": 6.288184033869109e-05, "loss": 1.7041, "step": 4015 }, { "epoch": 0.4171600706346733, "grad_norm": 0.4160117506980896, "learning_rate": 6.28660738992584e-05, "loss": 1.7689, "step": 4016 }, { "epoch": 0.41726394515425363, "grad_norm": 0.4204423725605011, "learning_rate": 6.285030608968918e-05, "loss": 1.7707, "step": 4017 }, { "epoch": 0.417367819673834, "grad_norm": 0.3935635983943939, "learning_rate": 6.28345369116626e-05, "loss": 1.5884, "step": 4018 }, { "epoch": 0.41747169419341434, "grad_norm": 0.41393959522247314, "learning_rate": 6.281876636685795e-05, "loss": 1.6574, "step": 4019 }, { "epoch": 0.4175755687129947, "grad_norm": 0.4115196764469147, "learning_rate": 6.280299445695469e-05, "loss": 1.8304, "step": 4020 }, { "epoch": 0.41767944323257505, "grad_norm": 0.3949635922908783, "learning_rate": 6.278722118363237e-05, "loss": 1.5268, "step": 4021 }, { "epoch": 0.4177833177521554, "grad_norm": 0.41173237562179565, "learning_rate": 6.277144654857074e-05, "loss": 1.8324, "step": 4022 }, { "epoch": 0.41788719227173576, "grad_norm": 0.3953053951263428, "learning_rate": 6.275567055344967e-05, "loss": 1.6818, "step": 4023 }, { "epoch": 0.4179910667913161, "grad_norm": 0.46665164828300476, "learning_rate": 6.27398931999492e-05, "loss": 1.8911, "step": 4024 }, { "epoch": 0.4180949413108964, "grad_norm": 0.4181447923183441, "learning_rate": 6.272411448974947e-05, "loss": 1.7229, "step": 4025 }, { "epoch": 0.4181988158304768, "grad_norm": 0.3725024461746216, "learning_rate": 6.270833442453079e-05, "loss": 1.664, "step": 4026 }, { "epoch": 0.4183026903500571, "grad_norm": 0.430210679769516, "learning_rate": 6.269255300597364e-05, "loss": 1.6161, "step": 4027 }, { "epoch": 0.41840656486963745, "grad_norm": 0.3929589092731476, "learning_rate": 6.267677023575859e-05, "loss": 1.7089, "step": 4028 }, { "epoch": 0.41851043938921784, "grad_norm": 0.3702670931816101, "learning_rate": 6.26609861155664e-05, "loss": 1.6041, "step": 4029 }, { "epoch": 0.41861431390879816, "grad_norm": 0.3990215063095093, "learning_rate": 6.264520064707795e-05, "loss": 1.7304, "step": 4030 }, { "epoch": 0.41871818842837855, "grad_norm": 0.4187242388725281, "learning_rate": 6.262941383197425e-05, "loss": 1.813, "step": 4031 }, { "epoch": 0.4188220629479589, "grad_norm": 0.46325740218162537, "learning_rate": 6.261362567193651e-05, "loss": 1.8182, "step": 4032 }, { "epoch": 0.4189259374675392, "grad_norm": 0.3769034445285797, "learning_rate": 6.259783616864601e-05, "loss": 1.733, "step": 4033 }, { "epoch": 0.4190298119871196, "grad_norm": 0.42521029710769653, "learning_rate": 6.258204532378423e-05, "loss": 1.8581, "step": 4034 }, { "epoch": 0.4191336865066999, "grad_norm": 0.3773384988307953, "learning_rate": 6.256625313903278e-05, "loss": 1.6578, "step": 4035 }, { "epoch": 0.41923756102628024, "grad_norm": 0.4068077504634857, "learning_rate": 6.255045961607338e-05, "loss": 1.7315, "step": 4036 }, { "epoch": 0.4193414355458606, "grad_norm": 0.38190507888793945, "learning_rate": 6.253466475658792e-05, "loss": 1.5881, "step": 4037 }, { "epoch": 0.41944531006544095, "grad_norm": 0.373384028673172, "learning_rate": 6.251886856225844e-05, "loss": 1.719, "step": 4038 }, { "epoch": 0.4195491845850213, "grad_norm": 0.3872770369052887, "learning_rate": 6.250307103476712e-05, "loss": 1.6688, "step": 4039 }, { "epoch": 0.41965305910460166, "grad_norm": 0.4040100574493408, "learning_rate": 6.248727217579625e-05, "loss": 1.7883, "step": 4040 }, { "epoch": 0.419756933624182, "grad_norm": 0.5108729004859924, "learning_rate": 6.247147198702832e-05, "loss": 1.8919, "step": 4041 }, { "epoch": 0.4198608081437623, "grad_norm": 0.4295211732387543, "learning_rate": 6.245567047014589e-05, "loss": 1.7379, "step": 4042 }, { "epoch": 0.4199646826633427, "grad_norm": 0.4218069911003113, "learning_rate": 6.243986762683171e-05, "loss": 1.7343, "step": 4043 }, { "epoch": 0.420068557182923, "grad_norm": 0.40920522809028625, "learning_rate": 6.242406345876866e-05, "loss": 1.8147, "step": 4044 }, { "epoch": 0.4201724317025034, "grad_norm": 0.3913504481315613, "learning_rate": 6.240825796763977e-05, "loss": 1.6202, "step": 4045 }, { "epoch": 0.42027630622208373, "grad_norm": 0.387791246175766, "learning_rate": 6.239245115512818e-05, "loss": 1.6385, "step": 4046 }, { "epoch": 0.42038018074166406, "grad_norm": 0.3972698450088501, "learning_rate": 6.237664302291722e-05, "loss": 1.6763, "step": 4047 }, { "epoch": 0.42048405526124444, "grad_norm": 0.35716134309768677, "learning_rate": 6.236083357269033e-05, "loss": 1.6527, "step": 4048 }, { "epoch": 0.42058792978082477, "grad_norm": 0.40162917971611023, "learning_rate": 6.234502280613108e-05, "loss": 1.7658, "step": 4049 }, { "epoch": 0.4206918043004051, "grad_norm": 0.3818826377391815, "learning_rate": 6.232921072492318e-05, "loss": 1.7382, "step": 4050 }, { "epoch": 0.4207956788199855, "grad_norm": 0.4017144739627838, "learning_rate": 6.23133973307505e-05, "loss": 1.7945, "step": 4051 }, { "epoch": 0.4208995533395658, "grad_norm": 0.3896738290786743, "learning_rate": 6.229758262529708e-05, "loss": 1.8404, "step": 4052 }, { "epoch": 0.42100342785914613, "grad_norm": 0.38667070865631104, "learning_rate": 6.228176661024703e-05, "loss": 1.619, "step": 4053 }, { "epoch": 0.4211073023787265, "grad_norm": 0.38369256258010864, "learning_rate": 6.226594928728462e-05, "loss": 1.6359, "step": 4054 }, { "epoch": 0.42121117689830684, "grad_norm": 0.44246816635131836, "learning_rate": 6.225013065809431e-05, "loss": 1.8973, "step": 4055 }, { "epoch": 0.42131505141788717, "grad_norm": 0.42337489128112793, "learning_rate": 6.223431072436063e-05, "loss": 1.6915, "step": 4056 }, { "epoch": 0.42141892593746755, "grad_norm": 0.419403612613678, "learning_rate": 6.221848948776828e-05, "loss": 1.6772, "step": 4057 }, { "epoch": 0.4215228004570479, "grad_norm": 0.3898250460624695, "learning_rate": 6.220266695000211e-05, "loss": 1.7213, "step": 4058 }, { "epoch": 0.42162667497662826, "grad_norm": 0.4045391380786896, "learning_rate": 6.21868431127471e-05, "loss": 1.9761, "step": 4059 }, { "epoch": 0.4217305494962086, "grad_norm": 0.3763904273509979, "learning_rate": 6.217101797768837e-05, "loss": 1.7037, "step": 4060 }, { "epoch": 0.4218344240157889, "grad_norm": 0.397461861371994, "learning_rate": 6.215519154651116e-05, "loss": 1.7218, "step": 4061 }, { "epoch": 0.4219382985353693, "grad_norm": 0.4115433990955353, "learning_rate": 6.213936382090085e-05, "loss": 1.7957, "step": 4062 }, { "epoch": 0.4220421730549496, "grad_norm": 0.393285870552063, "learning_rate": 6.2123534802543e-05, "loss": 1.7508, "step": 4063 }, { "epoch": 0.42214604757452995, "grad_norm": 0.3760216236114502, "learning_rate": 6.210770449312326e-05, "loss": 1.6243, "step": 4064 }, { "epoch": 0.42224992209411033, "grad_norm": 0.4444178342819214, "learning_rate": 6.209187289432742e-05, "loss": 1.8161, "step": 4065 }, { "epoch": 0.42235379661369066, "grad_norm": 0.37198397517204285, "learning_rate": 6.207604000784143e-05, "loss": 1.6786, "step": 4066 }, { "epoch": 0.422457671133271, "grad_norm": 0.39783281087875366, "learning_rate": 6.206020583535141e-05, "loss": 1.7703, "step": 4067 }, { "epoch": 0.42256154565285137, "grad_norm": 0.45979025959968567, "learning_rate": 6.204437037854351e-05, "loss": 1.8734, "step": 4068 }, { "epoch": 0.4226654201724317, "grad_norm": 0.42977213859558105, "learning_rate": 6.202853363910413e-05, "loss": 1.7048, "step": 4069 }, { "epoch": 0.4227692946920121, "grad_norm": 0.41636922955513, "learning_rate": 6.201269561871975e-05, "loss": 1.6683, "step": 4070 }, { "epoch": 0.4228731692115924, "grad_norm": 0.4053545594215393, "learning_rate": 6.199685631907697e-05, "loss": 1.6983, "step": 4071 }, { "epoch": 0.42297704373117273, "grad_norm": 0.378132164478302, "learning_rate": 6.19810157418626e-05, "loss": 1.6345, "step": 4072 }, { "epoch": 0.4230809182507531, "grad_norm": 0.4450819492340088, "learning_rate": 6.196517388876348e-05, "loss": 1.6669, "step": 4073 }, { "epoch": 0.42318479277033344, "grad_norm": 0.3707204759120941, "learning_rate": 6.194933076146668e-05, "loss": 1.5462, "step": 4074 }, { "epoch": 0.42328866728991377, "grad_norm": 0.41867223381996155, "learning_rate": 6.193348636165937e-05, "loss": 1.8317, "step": 4075 }, { "epoch": 0.42339254180949415, "grad_norm": 0.4516044557094574, "learning_rate": 6.191764069102883e-05, "loss": 1.8396, "step": 4076 }, { "epoch": 0.4234964163290745, "grad_norm": 0.421350359916687, "learning_rate": 6.190179375126254e-05, "loss": 1.7317, "step": 4077 }, { "epoch": 0.4236002908486548, "grad_norm": 0.3848811984062195, "learning_rate": 6.188594554404804e-05, "loss": 1.6922, "step": 4078 }, { "epoch": 0.4237041653682352, "grad_norm": 0.41102609038352966, "learning_rate": 6.187009607107304e-05, "loss": 1.7322, "step": 4079 }, { "epoch": 0.4238080398878155, "grad_norm": 0.4180265963077545, "learning_rate": 6.185424533402543e-05, "loss": 1.8053, "step": 4080 }, { "epoch": 0.42391191440739584, "grad_norm": 0.4037413001060486, "learning_rate": 6.183839333459315e-05, "loss": 1.6599, "step": 4081 }, { "epoch": 0.4240157889269762, "grad_norm": 0.40133136510849, "learning_rate": 6.18225400744643e-05, "loss": 1.7394, "step": 4082 }, { "epoch": 0.42411966344655655, "grad_norm": 0.37302759289741516, "learning_rate": 6.180668555532719e-05, "loss": 1.5259, "step": 4083 }, { "epoch": 0.42422353796613693, "grad_norm": 0.39756691455841064, "learning_rate": 6.179082977887013e-05, "loss": 1.6746, "step": 4084 }, { "epoch": 0.42432741248571726, "grad_norm": 0.37990039587020874, "learning_rate": 6.177497274678168e-05, "loss": 1.6812, "step": 4085 }, { "epoch": 0.4244312870052976, "grad_norm": 0.3902164697647095, "learning_rate": 6.175911446075049e-05, "loss": 1.6457, "step": 4086 }, { "epoch": 0.42453516152487797, "grad_norm": 0.4064858555793762, "learning_rate": 6.174325492246531e-05, "loss": 1.7168, "step": 4087 }, { "epoch": 0.4246390360444583, "grad_norm": 0.40128055214881897, "learning_rate": 6.17273941336151e-05, "loss": 1.7847, "step": 4088 }, { "epoch": 0.4247429105640386, "grad_norm": 0.4626508951187134, "learning_rate": 6.171153209588891e-05, "loss": 1.951, "step": 4089 }, { "epoch": 0.424846785083619, "grad_norm": 0.40969690680503845, "learning_rate": 6.169566881097588e-05, "loss": 1.4734, "step": 4090 }, { "epoch": 0.42495065960319933, "grad_norm": 0.3807416558265686, "learning_rate": 6.167980428056537e-05, "loss": 1.5504, "step": 4091 }, { "epoch": 0.42505453412277966, "grad_norm": 0.3962438702583313, "learning_rate": 6.16639385063468e-05, "loss": 1.8144, "step": 4092 }, { "epoch": 0.42515840864236004, "grad_norm": 0.3545083701610565, "learning_rate": 6.164807149000974e-05, "loss": 1.5118, "step": 4093 }, { "epoch": 0.42526228316194037, "grad_norm": 0.43927130103111267, "learning_rate": 6.163220323324395e-05, "loss": 1.8641, "step": 4094 }, { "epoch": 0.4253661576815207, "grad_norm": 0.41815078258514404, "learning_rate": 6.161633373773925e-05, "loss": 1.7244, "step": 4095 }, { "epoch": 0.4254700322011011, "grad_norm": 0.4570396840572357, "learning_rate": 6.16004630051856e-05, "loss": 1.8696, "step": 4096 }, { "epoch": 0.4255739067206814, "grad_norm": 0.3970779478549957, "learning_rate": 6.158459103727316e-05, "loss": 1.7826, "step": 4097 }, { "epoch": 0.4256777812402618, "grad_norm": 0.437796026468277, "learning_rate": 6.15687178356921e-05, "loss": 1.6452, "step": 4098 }, { "epoch": 0.4257816557598421, "grad_norm": 0.4131401777267456, "learning_rate": 6.155284340213285e-05, "loss": 1.6827, "step": 4099 }, { "epoch": 0.42588553027942244, "grad_norm": 0.4367378056049347, "learning_rate": 6.15369677382859e-05, "loss": 1.9042, "step": 4100 }, { "epoch": 0.4259894047990028, "grad_norm": 0.39361101388931274, "learning_rate": 6.152109084584186e-05, "loss": 1.643, "step": 4101 }, { "epoch": 0.42609327931858315, "grad_norm": 0.37626150250434875, "learning_rate": 6.150521272649152e-05, "loss": 1.5454, "step": 4102 }, { "epoch": 0.4261971538381635, "grad_norm": 0.3899744749069214, "learning_rate": 6.148933338192578e-05, "loss": 1.6159, "step": 4103 }, { "epoch": 0.42630102835774386, "grad_norm": 0.4217323362827301, "learning_rate": 6.147345281383565e-05, "loss": 1.8407, "step": 4104 }, { "epoch": 0.4264049028773242, "grad_norm": 0.42654165625572205, "learning_rate": 6.145757102391227e-05, "loss": 1.7704, "step": 4105 }, { "epoch": 0.4265087773969045, "grad_norm": 0.3943133056163788, "learning_rate": 6.144168801384698e-05, "loss": 1.7799, "step": 4106 }, { "epoch": 0.4266126519164849, "grad_norm": 0.37911903858184814, "learning_rate": 6.142580378533114e-05, "loss": 1.6106, "step": 4107 }, { "epoch": 0.4267165264360652, "grad_norm": 0.42597952485084534, "learning_rate": 6.140991834005632e-05, "loss": 1.6198, "step": 4108 }, { "epoch": 0.4268204009556456, "grad_norm": 0.43222105503082275, "learning_rate": 6.139403167971422e-05, "loss": 1.7884, "step": 4109 }, { "epoch": 0.42692427547522593, "grad_norm": 0.4335835576057434, "learning_rate": 6.13781438059966e-05, "loss": 1.7371, "step": 4110 }, { "epoch": 0.42702814999480626, "grad_norm": 0.41362127661705017, "learning_rate": 6.136225472059545e-05, "loss": 1.6047, "step": 4111 }, { "epoch": 0.42713202451438664, "grad_norm": 0.38855838775634766, "learning_rate": 6.134636442520277e-05, "loss": 1.6329, "step": 4112 }, { "epoch": 0.42723589903396697, "grad_norm": 0.39782068133354187, "learning_rate": 6.13304729215108e-05, "loss": 1.711, "step": 4113 }, { "epoch": 0.4273397735535473, "grad_norm": 0.4341132342815399, "learning_rate": 6.131458021121184e-05, "loss": 1.6623, "step": 4114 }, { "epoch": 0.4274436480731277, "grad_norm": 0.4523240625858307, "learning_rate": 6.129868629599832e-05, "loss": 1.8319, "step": 4115 }, { "epoch": 0.427547522592708, "grad_norm": 0.3760691285133362, "learning_rate": 6.128279117756288e-05, "loss": 1.8514, "step": 4116 }, { "epoch": 0.42765139711228833, "grad_norm": 0.45057788491249084, "learning_rate": 6.126689485759818e-05, "loss": 1.7907, "step": 4117 }, { "epoch": 0.4277552716318687, "grad_norm": 0.4334897994995117, "learning_rate": 6.125099733779706e-05, "loss": 1.5299, "step": 4118 }, { "epoch": 0.42785914615144904, "grad_norm": 0.4173983633518219, "learning_rate": 6.12350986198525e-05, "loss": 1.6515, "step": 4119 }, { "epoch": 0.42796302067102937, "grad_norm": 0.4509902000427246, "learning_rate": 6.121919870545755e-05, "loss": 1.9308, "step": 4120 }, { "epoch": 0.42806689519060975, "grad_norm": 0.433244526386261, "learning_rate": 6.120329759630546e-05, "loss": 1.9438, "step": 4121 }, { "epoch": 0.4281707697101901, "grad_norm": 0.4360440671443939, "learning_rate": 6.118739529408956e-05, "loss": 1.8285, "step": 4122 }, { "epoch": 0.42827464422977046, "grad_norm": 0.44721856713294983, "learning_rate": 6.117149180050332e-05, "loss": 1.7948, "step": 4123 }, { "epoch": 0.4283785187493508, "grad_norm": 0.4097500741481781, "learning_rate": 6.115558711724036e-05, "loss": 1.6341, "step": 4124 }, { "epoch": 0.4284823932689311, "grad_norm": 0.3732147514820099, "learning_rate": 6.113968124599439e-05, "loss": 1.6512, "step": 4125 }, { "epoch": 0.4285862677885115, "grad_norm": 0.407043993473053, "learning_rate": 6.112377418845925e-05, "loss": 1.7734, "step": 4126 }, { "epoch": 0.4286901423080918, "grad_norm": 0.4379003047943115, "learning_rate": 6.110786594632892e-05, "loss": 1.8773, "step": 4127 }, { "epoch": 0.42879401682767215, "grad_norm": 0.49205613136291504, "learning_rate": 6.109195652129753e-05, "loss": 1.6428, "step": 4128 }, { "epoch": 0.42889789134725254, "grad_norm": 0.40176907181739807, "learning_rate": 6.107604591505928e-05, "loss": 1.5291, "step": 4129 }, { "epoch": 0.42900176586683286, "grad_norm": 0.4463737905025482, "learning_rate": 6.106013412930853e-05, "loss": 1.8709, "step": 4130 }, { "epoch": 0.4291056403864132, "grad_norm": 0.49634161591529846, "learning_rate": 6.10442211657398e-05, "loss": 2.1084, "step": 4131 }, { "epoch": 0.42920951490599357, "grad_norm": 0.3978555500507355, "learning_rate": 6.1028307026047624e-05, "loss": 1.7182, "step": 4132 }, { "epoch": 0.4293133894255739, "grad_norm": 0.39746221899986267, "learning_rate": 6.1012391711926795e-05, "loss": 1.7323, "step": 4133 }, { "epoch": 0.4294172639451542, "grad_norm": 0.39065563678741455, "learning_rate": 6.099647522507216e-05, "loss": 1.7496, "step": 4134 }, { "epoch": 0.4295211384647346, "grad_norm": 0.41617366671562195, "learning_rate": 6.098055756717866e-05, "loss": 1.781, "step": 4135 }, { "epoch": 0.42962501298431494, "grad_norm": 0.41195693612098694, "learning_rate": 6.0964638739941447e-05, "loss": 1.7705, "step": 4136 }, { "epoch": 0.4297288875038953, "grad_norm": 0.397640585899353, "learning_rate": 6.094871874505572e-05, "loss": 1.7518, "step": 4137 }, { "epoch": 0.42983276202347565, "grad_norm": 0.37663549184799194, "learning_rate": 6.093279758421687e-05, "loss": 1.6859, "step": 4138 }, { "epoch": 0.42993663654305597, "grad_norm": 0.3834432065486908, "learning_rate": 6.0916875259120366e-05, "loss": 1.5105, "step": 4139 }, { "epoch": 0.43004051106263635, "grad_norm": 0.4278203248977661, "learning_rate": 6.090095177146178e-05, "loss": 1.7621, "step": 4140 }, { "epoch": 0.4301443855822167, "grad_norm": 0.4155001938343048, "learning_rate": 6.088502712293687e-05, "loss": 1.8687, "step": 4141 }, { "epoch": 0.430248260101797, "grad_norm": 0.4091452658176422, "learning_rate": 6.086910131524147e-05, "loss": 1.6906, "step": 4142 }, { "epoch": 0.4303521346213774, "grad_norm": 0.4341062605381012, "learning_rate": 6.085317435007156e-05, "loss": 1.7431, "step": 4143 }, { "epoch": 0.4304560091409577, "grad_norm": 0.4048025906085968, "learning_rate": 6.0837246229123246e-05, "loss": 1.7628, "step": 4144 }, { "epoch": 0.43055988366053805, "grad_norm": 0.3721714913845062, "learning_rate": 6.0821316954092745e-05, "loss": 1.4422, "step": 4145 }, { "epoch": 0.43066375818011843, "grad_norm": 0.39783287048339844, "learning_rate": 6.08053865266764e-05, "loss": 1.6521, "step": 4146 }, { "epoch": 0.43076763269969875, "grad_norm": 0.44611310958862305, "learning_rate": 6.078945494857068e-05, "loss": 1.9144, "step": 4147 }, { "epoch": 0.43087150721927914, "grad_norm": 0.3994828462600708, "learning_rate": 6.077352222147214e-05, "loss": 1.6819, "step": 4148 }, { "epoch": 0.43097538173885946, "grad_norm": 0.4165269434452057, "learning_rate": 6.075758834707754e-05, "loss": 1.7586, "step": 4149 }, { "epoch": 0.4310792562584398, "grad_norm": 0.4540640413761139, "learning_rate": 6.0741653327083703e-05, "loss": 1.8504, "step": 4150 }, { "epoch": 0.4311831307780202, "grad_norm": 0.4224712550640106, "learning_rate": 6.072571716318756e-05, "loss": 1.7859, "step": 4151 }, { "epoch": 0.4312870052976005, "grad_norm": 0.4140393137931824, "learning_rate": 6.0709779857086214e-05, "loss": 1.6534, "step": 4152 }, { "epoch": 0.43139087981718083, "grad_norm": 0.37748152017593384, "learning_rate": 6.069384141047686e-05, "loss": 1.5697, "step": 4153 }, { "epoch": 0.4314947543367612, "grad_norm": 0.4263037145137787, "learning_rate": 6.067790182505678e-05, "loss": 1.6565, "step": 4154 }, { "epoch": 0.43159862885634154, "grad_norm": 0.38402366638183594, "learning_rate": 6.066196110252347e-05, "loss": 1.6159, "step": 4155 }, { "epoch": 0.43170250337592186, "grad_norm": 0.3785112798213959, "learning_rate": 6.064601924457447e-05, "loss": 1.4971, "step": 4156 }, { "epoch": 0.43180637789550225, "grad_norm": 0.3928913474082947, "learning_rate": 6.0630076252907445e-05, "loss": 1.6329, "step": 4157 }, { "epoch": 0.4319102524150826, "grad_norm": 0.42711085081100464, "learning_rate": 6.061413212922022e-05, "loss": 1.7333, "step": 4158 }, { "epoch": 0.4320141269346629, "grad_norm": 0.40235477685928345, "learning_rate": 6.059818687521074e-05, "loss": 1.6936, "step": 4159 }, { "epoch": 0.4321180014542433, "grad_norm": 0.4238249957561493, "learning_rate": 6.0582240492577015e-05, "loss": 1.7692, "step": 4160 }, { "epoch": 0.4322218759738236, "grad_norm": 0.4094812870025635, "learning_rate": 6.056629298301722e-05, "loss": 1.6549, "step": 4161 }, { "epoch": 0.432325750493404, "grad_norm": 0.4390838146209717, "learning_rate": 6.0550344348229626e-05, "loss": 1.8607, "step": 4162 }, { "epoch": 0.4324296250129843, "grad_norm": 0.3718215823173523, "learning_rate": 6.053439458991268e-05, "loss": 1.6305, "step": 4163 }, { "epoch": 0.43253349953256465, "grad_norm": 0.42819488048553467, "learning_rate": 6.051844370976487e-05, "loss": 1.5788, "step": 4164 }, { "epoch": 0.43263737405214503, "grad_norm": 0.37748944759368896, "learning_rate": 6.050249170948484e-05, "loss": 1.6356, "step": 4165 }, { "epoch": 0.43274124857172536, "grad_norm": 0.4419516623020172, "learning_rate": 6.048653859077138e-05, "loss": 1.7449, "step": 4166 }, { "epoch": 0.4328451230913057, "grad_norm": 0.40298861265182495, "learning_rate": 6.0470584355323375e-05, "loss": 1.648, "step": 4167 }, { "epoch": 0.43294899761088607, "grad_norm": 0.41812488436698914, "learning_rate": 6.045462900483978e-05, "loss": 1.8709, "step": 4168 }, { "epoch": 0.4330528721304664, "grad_norm": 0.4024185538291931, "learning_rate": 6.0438672541019755e-05, "loss": 1.7013, "step": 4169 }, { "epoch": 0.4331567466500467, "grad_norm": 0.3921603262424469, "learning_rate": 6.042271496556254e-05, "loss": 1.6795, "step": 4170 }, { "epoch": 0.4332606211696271, "grad_norm": 0.41133809089660645, "learning_rate": 6.040675628016746e-05, "loss": 1.7468, "step": 4171 }, { "epoch": 0.43336449568920743, "grad_norm": 0.4003753960132599, "learning_rate": 6.039079648653403e-05, "loss": 1.7661, "step": 4172 }, { "epoch": 0.4334683702087878, "grad_norm": 0.3852844536304474, "learning_rate": 6.037483558636183e-05, "loss": 1.6341, "step": 4173 }, { "epoch": 0.43357224472836814, "grad_norm": 0.40245676040649414, "learning_rate": 6.035887358135056e-05, "loss": 1.7788, "step": 4174 }, { "epoch": 0.43367611924794847, "grad_norm": 0.4260011613368988, "learning_rate": 6.0342910473200054e-05, "loss": 1.8262, "step": 4175 }, { "epoch": 0.43377999376752885, "grad_norm": 0.4148736298084259, "learning_rate": 6.032694626361025e-05, "loss": 1.7661, "step": 4176 }, { "epoch": 0.4338838682871092, "grad_norm": 0.38179653882980347, "learning_rate": 6.0310980954281234e-05, "loss": 1.5742, "step": 4177 }, { "epoch": 0.4339877428066895, "grad_norm": 0.4099988639354706, "learning_rate": 6.029501454691318e-05, "loss": 1.66, "step": 4178 }, { "epoch": 0.4340916173262699, "grad_norm": 0.3874553442001343, "learning_rate": 6.027904704320636e-05, "loss": 1.6484, "step": 4179 }, { "epoch": 0.4341954918458502, "grad_norm": 0.390216588973999, "learning_rate": 6.026307844486123e-05, "loss": 1.7898, "step": 4180 }, { "epoch": 0.43429936636543054, "grad_norm": 0.38214191794395447, "learning_rate": 6.024710875357831e-05, "loss": 1.6147, "step": 4181 }, { "epoch": 0.4344032408850109, "grad_norm": 0.37591472268104553, "learning_rate": 6.02311379710582e-05, "loss": 1.685, "step": 4182 }, { "epoch": 0.43450711540459125, "grad_norm": 0.42642828822135925, "learning_rate": 6.021516609900173e-05, "loss": 1.8715, "step": 4183 }, { "epoch": 0.4346109899241716, "grad_norm": 0.4037899672985077, "learning_rate": 6.019919313910976e-05, "loss": 1.6407, "step": 4184 }, { "epoch": 0.43471486444375196, "grad_norm": 0.40876585245132446, "learning_rate": 6.0183219093083244e-05, "loss": 1.6688, "step": 4185 }, { "epoch": 0.4348187389633323, "grad_norm": 0.4056430757045746, "learning_rate": 6.016724396262335e-05, "loss": 1.8047, "step": 4186 }, { "epoch": 0.43492261348291267, "grad_norm": 0.4015723168849945, "learning_rate": 6.015126774943128e-05, "loss": 1.7502, "step": 4187 }, { "epoch": 0.435026488002493, "grad_norm": 0.4126836359500885, "learning_rate": 6.013529045520838e-05, "loss": 1.8191, "step": 4188 }, { "epoch": 0.4351303625220733, "grad_norm": 0.39362239837646484, "learning_rate": 6.011931208165611e-05, "loss": 1.6365, "step": 4189 }, { "epoch": 0.4352342370416537, "grad_norm": 0.37851396203041077, "learning_rate": 6.0103332630476017e-05, "loss": 1.7451, "step": 4190 }, { "epoch": 0.43533811156123403, "grad_norm": 0.3897380530834198, "learning_rate": 6.008735210336982e-05, "loss": 1.7397, "step": 4191 }, { "epoch": 0.43544198608081436, "grad_norm": 0.38429588079452515, "learning_rate": 6.00713705020393e-05, "loss": 1.5615, "step": 4192 }, { "epoch": 0.43554586060039474, "grad_norm": 0.4072874188423157, "learning_rate": 6.005538782818638e-05, "loss": 1.8237, "step": 4193 }, { "epoch": 0.43564973511997507, "grad_norm": 0.4054878354072571, "learning_rate": 6.003940408351311e-05, "loss": 1.7932, "step": 4194 }, { "epoch": 0.4357536096395554, "grad_norm": 0.38141578435897827, "learning_rate": 6.0023419269721613e-05, "loss": 1.6768, "step": 4195 }, { "epoch": 0.4358574841591358, "grad_norm": 0.4173438251018524, "learning_rate": 6.000743338851413e-05, "loss": 1.513, "step": 4196 }, { "epoch": 0.4359613586787161, "grad_norm": 0.4424532651901245, "learning_rate": 5.999144644159307e-05, "loss": 1.7438, "step": 4197 }, { "epoch": 0.43606523319829643, "grad_norm": 0.43783673644065857, "learning_rate": 5.997545843066089e-05, "loss": 1.7892, "step": 4198 }, { "epoch": 0.4361691077178768, "grad_norm": 0.3705306649208069, "learning_rate": 5.995946935742019e-05, "loss": 1.6238, "step": 4199 }, { "epoch": 0.43627298223745714, "grad_norm": 0.38308608531951904, "learning_rate": 5.994347922357372e-05, "loss": 1.5195, "step": 4200 }, { "epoch": 0.4363768567570375, "grad_norm": 0.43744367361068726, "learning_rate": 5.992748803082425e-05, "loss": 1.9181, "step": 4201 }, { "epoch": 0.43648073127661785, "grad_norm": 0.42308345437049866, "learning_rate": 5.991149578087476e-05, "loss": 1.767, "step": 4202 }, { "epoch": 0.4365846057961982, "grad_norm": 0.4409851133823395, "learning_rate": 5.9895502475428265e-05, "loss": 1.8537, "step": 4203 }, { "epoch": 0.43668848031577856, "grad_norm": 0.377109169960022, "learning_rate": 5.9879508116187947e-05, "loss": 1.6446, "step": 4204 }, { "epoch": 0.4367923548353589, "grad_norm": 0.4228600263595581, "learning_rate": 5.9863512704857085e-05, "loss": 1.7255, "step": 4205 }, { "epoch": 0.4368962293549392, "grad_norm": 0.410178542137146, "learning_rate": 5.9847516243139055e-05, "loss": 1.6686, "step": 4206 }, { "epoch": 0.4370001038745196, "grad_norm": 0.37821877002716064, "learning_rate": 5.9831518732737344e-05, "loss": 1.563, "step": 4207 }, { "epoch": 0.4371039783940999, "grad_norm": 0.3783068358898163, "learning_rate": 5.981552017535561e-05, "loss": 1.6068, "step": 4208 }, { "epoch": 0.43720785291368025, "grad_norm": 0.4072893261909485, "learning_rate": 5.979952057269752e-05, "loss": 1.7952, "step": 4209 }, { "epoch": 0.43731172743326063, "grad_norm": 0.3989700376987457, "learning_rate": 5.9783519926466924e-05, "loss": 1.66, "step": 4210 }, { "epoch": 0.43741560195284096, "grad_norm": 0.4692555367946625, "learning_rate": 5.976751823836778e-05, "loss": 1.6255, "step": 4211 }, { "epoch": 0.43751947647242134, "grad_norm": 0.3792388439178467, "learning_rate": 5.975151551010412e-05, "loss": 1.5188, "step": 4212 }, { "epoch": 0.43762335099200167, "grad_norm": 0.38823702931404114, "learning_rate": 5.973551174338014e-05, "loss": 1.6951, "step": 4213 }, { "epoch": 0.437727225511582, "grad_norm": 0.37780869007110596, "learning_rate": 5.9719506939900094e-05, "loss": 1.5638, "step": 4214 }, { "epoch": 0.4378311000311624, "grad_norm": 0.45048758387565613, "learning_rate": 5.9703501101368374e-05, "loss": 1.8339, "step": 4215 }, { "epoch": 0.4379349745507427, "grad_norm": 0.3920362889766693, "learning_rate": 5.968749422948947e-05, "loss": 1.723, "step": 4216 }, { "epoch": 0.43803884907032303, "grad_norm": 0.3886549770832062, "learning_rate": 5.967148632596801e-05, "loss": 1.6502, "step": 4217 }, { "epoch": 0.4381427235899034, "grad_norm": 0.4173612594604492, "learning_rate": 5.965547739250867e-05, "loss": 1.8315, "step": 4218 }, { "epoch": 0.43824659810948374, "grad_norm": 0.45074644684791565, "learning_rate": 5.963946743081633e-05, "loss": 1.7273, "step": 4219 }, { "epoch": 0.43835047262906407, "grad_norm": 0.42388424277305603, "learning_rate": 5.962345644259589e-05, "loss": 1.7344, "step": 4220 }, { "epoch": 0.43845434714864445, "grad_norm": 0.44487425684928894, "learning_rate": 5.9607444429552405e-05, "loss": 1.771, "step": 4221 }, { "epoch": 0.4385582216682248, "grad_norm": 0.4247962236404419, "learning_rate": 5.959143139339104e-05, "loss": 1.7933, "step": 4222 }, { "epoch": 0.4386620961878051, "grad_norm": 0.4087913930416107, "learning_rate": 5.957541733581704e-05, "loss": 1.667, "step": 4223 }, { "epoch": 0.4387659707073855, "grad_norm": 0.3898204267024994, "learning_rate": 5.955940225853577e-05, "loss": 1.6835, "step": 4224 }, { "epoch": 0.4388698452269658, "grad_norm": 0.3906581997871399, "learning_rate": 5.9543386163252744e-05, "loss": 1.7532, "step": 4225 }, { "epoch": 0.4389737197465462, "grad_norm": 0.37566760182380676, "learning_rate": 5.952736905167352e-05, "loss": 1.5813, "step": 4226 }, { "epoch": 0.4390775942661265, "grad_norm": 0.42560824751853943, "learning_rate": 5.951135092550382e-05, "loss": 1.8791, "step": 4227 }, { "epoch": 0.43918146878570685, "grad_norm": 0.4161139130592346, "learning_rate": 5.949533178644943e-05, "loss": 1.6862, "step": 4228 }, { "epoch": 0.43928534330528723, "grad_norm": 0.398384690284729, "learning_rate": 5.9479311636216274e-05, "loss": 1.7341, "step": 4229 }, { "epoch": 0.43938921782486756, "grad_norm": 0.36606839299201965, "learning_rate": 5.946329047651037e-05, "loss": 1.6987, "step": 4230 }, { "epoch": 0.4394930923444479, "grad_norm": 0.41310790181159973, "learning_rate": 5.944726830903785e-05, "loss": 1.7809, "step": 4231 }, { "epoch": 0.43959696686402827, "grad_norm": 0.41169822216033936, "learning_rate": 5.9431245135504934e-05, "loss": 1.7338, "step": 4232 }, { "epoch": 0.4397008413836086, "grad_norm": 0.3810160756111145, "learning_rate": 5.941522095761799e-05, "loss": 1.5948, "step": 4233 }, { "epoch": 0.4398047159031889, "grad_norm": 0.3669174909591675, "learning_rate": 5.939919577708346e-05, "loss": 1.2893, "step": 4234 }, { "epoch": 0.4399085904227693, "grad_norm": 0.4046458601951599, "learning_rate": 5.938316959560788e-05, "loss": 1.6906, "step": 4235 }, { "epoch": 0.44001246494234963, "grad_norm": 0.40834760665893555, "learning_rate": 5.9367142414897945e-05, "loss": 1.7074, "step": 4236 }, { "epoch": 0.44011633946192996, "grad_norm": 0.425662636756897, "learning_rate": 5.935111423666041e-05, "loss": 1.7486, "step": 4237 }, { "epoch": 0.44022021398151034, "grad_norm": 0.37841665744781494, "learning_rate": 5.933508506260214e-05, "loss": 1.7472, "step": 4238 }, { "epoch": 0.44032408850109067, "grad_norm": 0.4464881420135498, "learning_rate": 5.9319054894430126e-05, "loss": 1.8697, "step": 4239 }, { "epoch": 0.44042796302067105, "grad_norm": 0.4150855541229248, "learning_rate": 5.930302373385145e-05, "loss": 1.5873, "step": 4240 }, { "epoch": 0.4405318375402514, "grad_norm": 0.38565129041671753, "learning_rate": 5.928699158257333e-05, "loss": 1.71, "step": 4241 }, { "epoch": 0.4406357120598317, "grad_norm": 0.44895511865615845, "learning_rate": 5.9270958442303035e-05, "loss": 1.8128, "step": 4242 }, { "epoch": 0.4407395865794121, "grad_norm": 0.3845813572406769, "learning_rate": 5.9254924314748e-05, "loss": 1.7892, "step": 4243 }, { "epoch": 0.4408434610989924, "grad_norm": 0.4151462912559509, "learning_rate": 5.923888920161571e-05, "loss": 1.7573, "step": 4244 }, { "epoch": 0.44094733561857274, "grad_norm": 0.38922443985939026, "learning_rate": 5.922285310461378e-05, "loss": 1.7848, "step": 4245 }, { "epoch": 0.4410512101381531, "grad_norm": 0.38882389664649963, "learning_rate": 5.920681602544993e-05, "loss": 1.7333, "step": 4246 }, { "epoch": 0.44115508465773345, "grad_norm": 0.38732343912124634, "learning_rate": 5.9190777965832e-05, "loss": 1.6265, "step": 4247 }, { "epoch": 0.4412589591773138, "grad_norm": 0.3835653066635132, "learning_rate": 5.917473892746791e-05, "loss": 1.5993, "step": 4248 }, { "epoch": 0.44136283369689416, "grad_norm": 0.41864073276519775, "learning_rate": 5.915869891206567e-05, "loss": 1.7911, "step": 4249 }, { "epoch": 0.4414667082164745, "grad_norm": 0.43092262744903564, "learning_rate": 5.9142657921333466e-05, "loss": 1.487, "step": 4250 }, { "epoch": 0.4415705827360549, "grad_norm": 0.3756438195705414, "learning_rate": 5.912661595697948e-05, "loss": 1.5246, "step": 4251 }, { "epoch": 0.4416744572556352, "grad_norm": 0.3791937828063965, "learning_rate": 5.91105730207121e-05, "loss": 1.7199, "step": 4252 }, { "epoch": 0.4417783317752155, "grad_norm": 0.381509393453598, "learning_rate": 5.909452911423976e-05, "loss": 1.6264, "step": 4253 }, { "epoch": 0.4418822062947959, "grad_norm": 0.3744488060474396, "learning_rate": 5.9078484239271e-05, "loss": 1.5363, "step": 4254 }, { "epoch": 0.44198608081437624, "grad_norm": 0.3968122601509094, "learning_rate": 5.9062438397514496e-05, "loss": 1.7037, "step": 4255 }, { "epoch": 0.44208995533395656, "grad_norm": 0.4114841818809509, "learning_rate": 5.904639159067898e-05, "loss": 1.7508, "step": 4256 }, { "epoch": 0.44219382985353695, "grad_norm": 0.37198910117149353, "learning_rate": 5.9030343820473335e-05, "loss": 1.7378, "step": 4257 }, { "epoch": 0.4422977043731173, "grad_norm": 0.47801533341407776, "learning_rate": 5.9014295088606505e-05, "loss": 1.838, "step": 4258 }, { "epoch": 0.4424015788926976, "grad_norm": 0.43482860922813416, "learning_rate": 5.899824539678758e-05, "loss": 1.8126, "step": 4259 }, { "epoch": 0.442505453412278, "grad_norm": 0.3933789134025574, "learning_rate": 5.898219474672568e-05, "loss": 1.5948, "step": 4260 }, { "epoch": 0.4426093279318583, "grad_norm": 0.369711697101593, "learning_rate": 5.896614314013012e-05, "loss": 1.5256, "step": 4261 }, { "epoch": 0.44271320245143864, "grad_norm": 0.43509915471076965, "learning_rate": 5.8950090578710246e-05, "loss": 1.762, "step": 4262 }, { "epoch": 0.442817076971019, "grad_norm": 0.4086291193962097, "learning_rate": 5.893403706417554e-05, "loss": 1.8018, "step": 4263 }, { "epoch": 0.44292095149059935, "grad_norm": 0.38804736733436584, "learning_rate": 5.89179825982356e-05, "loss": 1.717, "step": 4264 }, { "epoch": 0.44302482601017973, "grad_norm": 0.4058080017566681, "learning_rate": 5.8901927182600035e-05, "loss": 1.8017, "step": 4265 }, { "epoch": 0.44312870052976006, "grad_norm": 0.381388783454895, "learning_rate": 5.88858708189787e-05, "loss": 1.8363, "step": 4266 }, { "epoch": 0.4432325750493404, "grad_norm": 0.3509836196899414, "learning_rate": 5.886981350908142e-05, "loss": 1.5213, "step": 4267 }, { "epoch": 0.44333644956892077, "grad_norm": 0.39383694529533386, "learning_rate": 5.885375525461817e-05, "loss": 1.6166, "step": 4268 }, { "epoch": 0.4434403240885011, "grad_norm": 0.4015539586544037, "learning_rate": 5.883769605729907e-05, "loss": 1.7148, "step": 4269 }, { "epoch": 0.4435441986080814, "grad_norm": 0.3899979293346405, "learning_rate": 5.882163591883427e-05, "loss": 1.5109, "step": 4270 }, { "epoch": 0.4436480731276618, "grad_norm": 0.41530346870422363, "learning_rate": 5.8805574840934067e-05, "loss": 1.6773, "step": 4271 }, { "epoch": 0.44375194764724213, "grad_norm": 0.4475281536579132, "learning_rate": 5.878951282530881e-05, "loss": 1.6551, "step": 4272 }, { "epoch": 0.44385582216682246, "grad_norm": 0.442122220993042, "learning_rate": 5.877344987366902e-05, "loss": 1.7879, "step": 4273 }, { "epoch": 0.44395969668640284, "grad_norm": 0.39350566267967224, "learning_rate": 5.875738598772522e-05, "loss": 1.705, "step": 4274 }, { "epoch": 0.44406357120598317, "grad_norm": 0.43882155418395996, "learning_rate": 5.874132116918816e-05, "loss": 1.7977, "step": 4275 }, { "epoch": 0.4441674457255635, "grad_norm": 0.42583081126213074, "learning_rate": 5.8725255419768565e-05, "loss": 1.6302, "step": 4276 }, { "epoch": 0.4442713202451439, "grad_norm": 0.40676143765449524, "learning_rate": 5.870918874117731e-05, "loss": 1.8449, "step": 4277 }, { "epoch": 0.4443751947647242, "grad_norm": 0.42294740676879883, "learning_rate": 5.869312113512542e-05, "loss": 1.7231, "step": 4278 }, { "epoch": 0.4444790692843046, "grad_norm": 0.41314196586608887, "learning_rate": 5.867705260332391e-05, "loss": 1.6797, "step": 4279 }, { "epoch": 0.4445829438038849, "grad_norm": 0.395781934261322, "learning_rate": 5.866098314748401e-05, "loss": 1.7341, "step": 4280 }, { "epoch": 0.44468681832346524, "grad_norm": 0.412775456905365, "learning_rate": 5.864491276931694e-05, "loss": 1.685, "step": 4281 }, { "epoch": 0.4447906928430456, "grad_norm": 0.38872653245925903, "learning_rate": 5.862884147053409e-05, "loss": 1.7268, "step": 4282 }, { "epoch": 0.44489456736262595, "grad_norm": 0.36840468645095825, "learning_rate": 5.861276925284694e-05, "loss": 1.5863, "step": 4283 }, { "epoch": 0.4449984418822063, "grad_norm": 0.37796318531036377, "learning_rate": 5.8596696117967054e-05, "loss": 1.6242, "step": 4284 }, { "epoch": 0.44510231640178666, "grad_norm": 0.3686732351779938, "learning_rate": 5.8580622067606083e-05, "loss": 1.5881, "step": 4285 }, { "epoch": 0.445206190921367, "grad_norm": 0.4148024320602417, "learning_rate": 5.8564547103475795e-05, "loss": 1.815, "step": 4286 }, { "epoch": 0.4453100654409473, "grad_norm": 0.39075860381126404, "learning_rate": 5.854847122728805e-05, "loss": 1.6837, "step": 4287 }, { "epoch": 0.4454139399605277, "grad_norm": 0.40239056944847107, "learning_rate": 5.853239444075479e-05, "loss": 1.7351, "step": 4288 }, { "epoch": 0.445517814480108, "grad_norm": 0.4210759401321411, "learning_rate": 5.8516316745588085e-05, "loss": 1.6621, "step": 4289 }, { "epoch": 0.4456216889996884, "grad_norm": 0.39281001687049866, "learning_rate": 5.850023814350006e-05, "loss": 1.7465, "step": 4290 }, { "epoch": 0.44572556351926873, "grad_norm": 0.4055309295654297, "learning_rate": 5.848415863620299e-05, "loss": 1.6812, "step": 4291 }, { "epoch": 0.44582943803884906, "grad_norm": 0.3883981704711914, "learning_rate": 5.846807822540922e-05, "loss": 1.6243, "step": 4292 }, { "epoch": 0.44593331255842944, "grad_norm": 0.394539475440979, "learning_rate": 5.8451996912831156e-05, "loss": 1.7954, "step": 4293 }, { "epoch": 0.44603718707800977, "grad_norm": 0.40347257256507874, "learning_rate": 5.8435914700181347e-05, "loss": 1.6049, "step": 4294 }, { "epoch": 0.4461410615975901, "grad_norm": 0.47316792607307434, "learning_rate": 5.8419831589172426e-05, "loss": 1.9193, "step": 4295 }, { "epoch": 0.4462449361171705, "grad_norm": 0.427937775850296, "learning_rate": 5.840374758151711e-05, "loss": 1.6742, "step": 4296 }, { "epoch": 0.4463488106367508, "grad_norm": 0.44503235816955566, "learning_rate": 5.838766267892825e-05, "loss": 1.9509, "step": 4297 }, { "epoch": 0.44645268515633113, "grad_norm": 0.4180712103843689, "learning_rate": 5.837157688311873e-05, "loss": 1.8231, "step": 4298 }, { "epoch": 0.4465565596759115, "grad_norm": 0.4312383532524109, "learning_rate": 5.8355490195801566e-05, "loss": 1.7526, "step": 4299 }, { "epoch": 0.44666043419549184, "grad_norm": 0.38129255175590515, "learning_rate": 5.8339402618689885e-05, "loss": 1.749, "step": 4300 }, { "epoch": 0.44676430871507217, "grad_norm": 0.47581759095191956, "learning_rate": 5.832331415349687e-05, "loss": 2.0137, "step": 4301 }, { "epoch": 0.44686818323465255, "grad_norm": 0.3956213593482971, "learning_rate": 5.83072248019358e-05, "loss": 1.7437, "step": 4302 }, { "epoch": 0.4469720577542329, "grad_norm": 0.42220327258110046, "learning_rate": 5.829113456572012e-05, "loss": 1.9, "step": 4303 }, { "epoch": 0.44707593227381326, "grad_norm": 0.39744284749031067, "learning_rate": 5.827504344656326e-05, "loss": 1.5549, "step": 4304 }, { "epoch": 0.4471798067933936, "grad_norm": 0.3954363763332367, "learning_rate": 5.825895144617883e-05, "loss": 1.7203, "step": 4305 }, { "epoch": 0.4472836813129739, "grad_norm": 0.39236709475517273, "learning_rate": 5.82428585662805e-05, "loss": 1.7229, "step": 4306 }, { "epoch": 0.4473875558325543, "grad_norm": 0.3697808086872101, "learning_rate": 5.822676480858201e-05, "loss": 1.6764, "step": 4307 }, { "epoch": 0.4474914303521346, "grad_norm": 0.45007967948913574, "learning_rate": 5.821067017479727e-05, "loss": 1.8381, "step": 4308 }, { "epoch": 0.44759530487171495, "grad_norm": 0.49184536933898926, "learning_rate": 5.819457466664018e-05, "loss": 2.1298, "step": 4309 }, { "epoch": 0.44769917939129533, "grad_norm": 0.3896716833114624, "learning_rate": 5.81784782858248e-05, "loss": 1.7621, "step": 4310 }, { "epoch": 0.44780305391087566, "grad_norm": 0.39417698979377747, "learning_rate": 5.816238103406529e-05, "loss": 1.7275, "step": 4311 }, { "epoch": 0.447906928430456, "grad_norm": 0.45076748728752136, "learning_rate": 5.814628291307587e-05, "loss": 1.7021, "step": 4312 }, { "epoch": 0.44801080295003637, "grad_norm": 0.43015792965888977, "learning_rate": 5.813018392457086e-05, "loss": 1.8654, "step": 4313 }, { "epoch": 0.4481146774696167, "grad_norm": 0.3831145167350769, "learning_rate": 5.8114084070264686e-05, "loss": 1.7, "step": 4314 }, { "epoch": 0.448218551989197, "grad_norm": 0.3929171860218048, "learning_rate": 5.8097983351871844e-05, "loss": 1.611, "step": 4315 }, { "epoch": 0.4483224265087774, "grad_norm": 0.41042855381965637, "learning_rate": 5.808188177110694e-05, "loss": 2.0493, "step": 4316 }, { "epoch": 0.44842630102835773, "grad_norm": 0.4465937316417694, "learning_rate": 5.806577932968467e-05, "loss": 1.8463, "step": 4317 }, { "epoch": 0.4485301755479381, "grad_norm": 0.4185175597667694, "learning_rate": 5.804967602931981e-05, "loss": 1.649, "step": 4318 }, { "epoch": 0.44863405006751844, "grad_norm": 0.413787841796875, "learning_rate": 5.803357187172726e-05, "loss": 1.8159, "step": 4319 }, { "epoch": 0.44873792458709877, "grad_norm": 0.4084557890892029, "learning_rate": 5.801746685862197e-05, "loss": 1.6689, "step": 4320 }, { "epoch": 0.44884179910667915, "grad_norm": 0.4004693329334259, "learning_rate": 5.8001360991718976e-05, "loss": 1.5459, "step": 4321 }, { "epoch": 0.4489456736262595, "grad_norm": 0.40109360218048096, "learning_rate": 5.798525427273347e-05, "loss": 1.6249, "step": 4322 }, { "epoch": 0.4490495481458398, "grad_norm": 0.4452827274799347, "learning_rate": 5.796914670338067e-05, "loss": 1.7092, "step": 4323 }, { "epoch": 0.4491534226654202, "grad_norm": 0.4333063066005707, "learning_rate": 5.795303828537589e-05, "loss": 1.8151, "step": 4324 }, { "epoch": 0.4492572971850005, "grad_norm": 0.4180525243282318, "learning_rate": 5.7936929020434585e-05, "loss": 1.7309, "step": 4325 }, { "epoch": 0.44936117170458084, "grad_norm": 0.40769898891448975, "learning_rate": 5.792081891027224e-05, "loss": 1.7786, "step": 4326 }, { "epoch": 0.4494650462241612, "grad_norm": 0.46367326378822327, "learning_rate": 5.790470795660447e-05, "loss": 1.9491, "step": 4327 }, { "epoch": 0.44956892074374155, "grad_norm": 0.40147140622138977, "learning_rate": 5.788859616114697e-05, "loss": 1.7364, "step": 4328 }, { "epoch": 0.44967279526332193, "grad_norm": 0.4153495132923126, "learning_rate": 5.787248352561549e-05, "loss": 1.7383, "step": 4329 }, { "epoch": 0.44977666978290226, "grad_norm": 0.40633097290992737, "learning_rate": 5.785637005172595e-05, "loss": 1.4243, "step": 4330 }, { "epoch": 0.4498805443024826, "grad_norm": 0.3852026164531708, "learning_rate": 5.784025574119427e-05, "loss": 1.6868, "step": 4331 }, { "epoch": 0.44998441882206297, "grad_norm": 0.3848346769809723, "learning_rate": 5.78241405957365e-05, "loss": 1.5136, "step": 4332 }, { "epoch": 0.4500882933416433, "grad_norm": 0.3910854160785675, "learning_rate": 5.78080246170688e-05, "loss": 1.6864, "step": 4333 }, { "epoch": 0.4501921678612236, "grad_norm": 0.3685462176799774, "learning_rate": 5.7791907806907395e-05, "loss": 1.6707, "step": 4334 }, { "epoch": 0.450296042380804, "grad_norm": 0.4090143144130707, "learning_rate": 5.777579016696856e-05, "loss": 1.5942, "step": 4335 }, { "epoch": 0.45039991690038433, "grad_norm": 0.377214252948761, "learning_rate": 5.7759671698968745e-05, "loss": 1.5599, "step": 4336 }, { "epoch": 0.45050379141996466, "grad_norm": 0.41875123977661133, "learning_rate": 5.774355240462441e-05, "loss": 1.7982, "step": 4337 }, { "epoch": 0.45060766593954504, "grad_norm": 0.3935871422290802, "learning_rate": 5.772743228565215e-05, "loss": 1.7028, "step": 4338 }, { "epoch": 0.45071154045912537, "grad_norm": 0.406841903924942, "learning_rate": 5.771131134376863e-05, "loss": 1.7354, "step": 4339 }, { "epoch": 0.4508154149787057, "grad_norm": 0.37955769896507263, "learning_rate": 5.76951895806906e-05, "loss": 1.647, "step": 4340 }, { "epoch": 0.4509192894982861, "grad_norm": 0.42771968245506287, "learning_rate": 5.76790669981349e-05, "loss": 1.825, "step": 4341 }, { "epoch": 0.4510231640178664, "grad_norm": 0.3990226686000824, "learning_rate": 5.766294359781848e-05, "loss": 1.611, "step": 4342 }, { "epoch": 0.4511270385374468, "grad_norm": 0.4627864360809326, "learning_rate": 5.764681938145832e-05, "loss": 1.7403, "step": 4343 }, { "epoch": 0.4512309130570271, "grad_norm": 0.3694743812084198, "learning_rate": 5.763069435077155e-05, "loss": 1.5556, "step": 4344 }, { "epoch": 0.45133478757660744, "grad_norm": 0.37064674496650696, "learning_rate": 5.761456850747536e-05, "loss": 1.6155, "step": 4345 }, { "epoch": 0.4514386620961878, "grad_norm": 0.37530389428138733, "learning_rate": 5.759844185328701e-05, "loss": 1.462, "step": 4346 }, { "epoch": 0.45154253661576815, "grad_norm": 0.4151467978954315, "learning_rate": 5.7582314389923876e-05, "loss": 1.6177, "step": 4347 }, { "epoch": 0.4516464111353485, "grad_norm": 0.4127391278743744, "learning_rate": 5.756618611910343e-05, "loss": 1.782, "step": 4348 }, { "epoch": 0.45175028565492886, "grad_norm": 0.41236239671707153, "learning_rate": 5.7550057042543137e-05, "loss": 1.7076, "step": 4349 }, { "epoch": 0.4518541601745092, "grad_norm": 0.3952932357788086, "learning_rate": 5.753392716196069e-05, "loss": 1.6074, "step": 4350 }, { "epoch": 0.4519580346940895, "grad_norm": 0.5258936285972595, "learning_rate": 5.751779647907376e-05, "loss": 1.8871, "step": 4351 }, { "epoch": 0.4520619092136699, "grad_norm": 0.3987799882888794, "learning_rate": 5.7501664995600134e-05, "loss": 1.7228, "step": 4352 }, { "epoch": 0.4521657837332502, "grad_norm": 0.3863909840583801, "learning_rate": 5.748553271325772e-05, "loss": 1.5237, "step": 4353 }, { "epoch": 0.45226965825283055, "grad_norm": 0.40286675095558167, "learning_rate": 5.746939963376445e-05, "loss": 1.6773, "step": 4354 }, { "epoch": 0.45237353277241094, "grad_norm": 0.4835149645805359, "learning_rate": 5.745326575883839e-05, "loss": 1.7949, "step": 4355 }, { "epoch": 0.45247740729199126, "grad_norm": 0.3882110118865967, "learning_rate": 5.743713109019766e-05, "loss": 1.6454, "step": 4356 }, { "epoch": 0.45258128181157165, "grad_norm": 0.4016772508621216, "learning_rate": 5.742099562956048e-05, "loss": 1.6265, "step": 4357 }, { "epoch": 0.45268515633115197, "grad_norm": 0.4201604425907135, "learning_rate": 5.740485937864515e-05, "loss": 1.6872, "step": 4358 }, { "epoch": 0.4527890308507323, "grad_norm": 0.45017364621162415, "learning_rate": 5.7388722339170066e-05, "loss": 1.8138, "step": 4359 }, { "epoch": 0.4528929053703127, "grad_norm": 0.39483213424682617, "learning_rate": 5.7372584512853665e-05, "loss": 1.5788, "step": 4360 }, { "epoch": 0.452996779889893, "grad_norm": 0.4778917133808136, "learning_rate": 5.7356445901414545e-05, "loss": 1.8203, "step": 4361 }, { "epoch": 0.45310065440947334, "grad_norm": 0.3956323564052582, "learning_rate": 5.734030650657132e-05, "loss": 1.6614, "step": 4362 }, { "epoch": 0.4532045289290537, "grad_norm": 0.42811745405197144, "learning_rate": 5.73241663300427e-05, "loss": 1.851, "step": 4363 }, { "epoch": 0.45330840344863405, "grad_norm": 0.39979517459869385, "learning_rate": 5.730802537354749e-05, "loss": 1.6255, "step": 4364 }, { "epoch": 0.45341227796821437, "grad_norm": 0.4086802899837494, "learning_rate": 5.729188363880459e-05, "loss": 1.4243, "step": 4365 }, { "epoch": 0.45351615248779475, "grad_norm": 0.43692517280578613, "learning_rate": 5.727574112753296e-05, "loss": 1.8354, "step": 4366 }, { "epoch": 0.4536200270073751, "grad_norm": 0.37770721316337585, "learning_rate": 5.725959784145165e-05, "loss": 1.5944, "step": 4367 }, { "epoch": 0.45372390152695546, "grad_norm": 0.4468797445297241, "learning_rate": 5.72434537822798e-05, "loss": 1.7066, "step": 4368 }, { "epoch": 0.4538277760465358, "grad_norm": 0.4045311510562897, "learning_rate": 5.722730895173662e-05, "loss": 1.7554, "step": 4369 }, { "epoch": 0.4539316505661161, "grad_norm": 0.40388810634613037, "learning_rate": 5.721116335154142e-05, "loss": 1.4915, "step": 4370 }, { "epoch": 0.4540355250856965, "grad_norm": 0.4059275686740875, "learning_rate": 5.719501698341356e-05, "loss": 1.6267, "step": 4371 }, { "epoch": 0.45413939960527683, "grad_norm": 0.3910108804702759, "learning_rate": 5.7178869849072526e-05, "loss": 1.6264, "step": 4372 }, { "epoch": 0.45424327412485715, "grad_norm": 0.445516437292099, "learning_rate": 5.716272195023785e-05, "loss": 1.7784, "step": 4373 }, { "epoch": 0.45434714864443754, "grad_norm": 0.4089578688144684, "learning_rate": 5.7146573288629145e-05, "loss": 1.7885, "step": 4374 }, { "epoch": 0.45445102316401786, "grad_norm": 0.4145938754081726, "learning_rate": 5.713042386596614e-05, "loss": 1.6938, "step": 4375 }, { "epoch": 0.4545548976835982, "grad_norm": 0.38517993688583374, "learning_rate": 5.7114273683968625e-05, "loss": 1.5404, "step": 4376 }, { "epoch": 0.4546587722031786, "grad_norm": 0.40502604842185974, "learning_rate": 5.709812274435643e-05, "loss": 1.6948, "step": 4377 }, { "epoch": 0.4547626467227589, "grad_norm": 0.3674617409706116, "learning_rate": 5.708197104884955e-05, "loss": 1.6687, "step": 4378 }, { "epoch": 0.45486652124233923, "grad_norm": 0.38975459337234497, "learning_rate": 5.706581859916799e-05, "loss": 1.5973, "step": 4379 }, { "epoch": 0.4549703957619196, "grad_norm": 0.3994758725166321, "learning_rate": 5.704966539703185e-05, "loss": 1.6266, "step": 4380 }, { "epoch": 0.45507427028149994, "grad_norm": 0.39510855078697205, "learning_rate": 5.7033511444161355e-05, "loss": 1.7119, "step": 4381 }, { "epoch": 0.4551781448010803, "grad_norm": 0.4508286416530609, "learning_rate": 5.701735674227675e-05, "loss": 1.9857, "step": 4382 }, { "epoch": 0.45528201932066065, "grad_norm": 0.4357827603816986, "learning_rate": 5.7001201293098386e-05, "loss": 1.9188, "step": 4383 }, { "epoch": 0.455385893840241, "grad_norm": 0.4613460898399353, "learning_rate": 5.69850450983467e-05, "loss": 1.7152, "step": 4384 }, { "epoch": 0.45548976835982136, "grad_norm": 0.43691280484199524, "learning_rate": 5.6968888159742184e-05, "loss": 1.7644, "step": 4385 }, { "epoch": 0.4555936428794017, "grad_norm": 0.40065327286720276, "learning_rate": 5.695273047900544e-05, "loss": 1.6811, "step": 4386 }, { "epoch": 0.455697517398982, "grad_norm": 0.4466818869113922, "learning_rate": 5.693657205785715e-05, "loss": 1.8516, "step": 4387 }, { "epoch": 0.4558013919185624, "grad_norm": 0.4043349623680115, "learning_rate": 5.6920412898018026e-05, "loss": 1.8134, "step": 4388 }, { "epoch": 0.4559052664381427, "grad_norm": 0.42350390553474426, "learning_rate": 5.690425300120893e-05, "loss": 1.8296, "step": 4389 }, { "epoch": 0.45600914095772305, "grad_norm": 0.38826295733451843, "learning_rate": 5.6888092369150734e-05, "loss": 1.7926, "step": 4390 }, { "epoch": 0.45611301547730343, "grad_norm": 0.39671480655670166, "learning_rate": 5.6871931003564414e-05, "loss": 1.8088, "step": 4391 }, { "epoch": 0.45621688999688376, "grad_norm": 0.38095131516456604, "learning_rate": 5.685576890617107e-05, "loss": 1.5183, "step": 4392 }, { "epoch": 0.45632076451646414, "grad_norm": 0.37252774834632874, "learning_rate": 5.683960607869179e-05, "loss": 1.6363, "step": 4393 }, { "epoch": 0.45642463903604447, "grad_norm": 0.43346065282821655, "learning_rate": 5.6823442522847835e-05, "loss": 1.5937, "step": 4394 }, { "epoch": 0.4565285135556248, "grad_norm": 0.3876819610595703, "learning_rate": 5.680727824036046e-05, "loss": 1.723, "step": 4395 }, { "epoch": 0.4566323880752052, "grad_norm": 0.3917519152164459, "learning_rate": 5.6791113232951063e-05, "loss": 1.5769, "step": 4396 }, { "epoch": 0.4567362625947855, "grad_norm": 0.4223952889442444, "learning_rate": 5.677494750234108e-05, "loss": 1.7898, "step": 4397 }, { "epoch": 0.45684013711436583, "grad_norm": 0.39051714539527893, "learning_rate": 5.675878105025203e-05, "loss": 1.7079, "step": 4398 }, { "epoch": 0.4569440116339462, "grad_norm": 0.3843806982040405, "learning_rate": 5.674261387840551e-05, "loss": 1.5216, "step": 4399 }, { "epoch": 0.45704788615352654, "grad_norm": 0.4141395092010498, "learning_rate": 5.6726445988523224e-05, "loss": 1.6415, "step": 4400 }, { "epoch": 0.45715176067310687, "grad_norm": 0.4137116074562073, "learning_rate": 5.67102773823269e-05, "loss": 1.7626, "step": 4401 }, { "epoch": 0.45725563519268725, "grad_norm": 0.3695942461490631, "learning_rate": 5.669410806153838e-05, "loss": 1.6487, "step": 4402 }, { "epoch": 0.4573595097122676, "grad_norm": 0.3959349989891052, "learning_rate": 5.667793802787957e-05, "loss": 1.7431, "step": 4403 }, { "epoch": 0.4574633842318479, "grad_norm": 0.39805370569229126, "learning_rate": 5.6661767283072444e-05, "loss": 1.6595, "step": 4404 }, { "epoch": 0.4575672587514283, "grad_norm": 0.38343602418899536, "learning_rate": 5.664559582883906e-05, "loss": 1.8235, "step": 4405 }, { "epoch": 0.4576711332710086, "grad_norm": 0.4133247435092926, "learning_rate": 5.662942366690157e-05, "loss": 1.7427, "step": 4406 }, { "epoch": 0.457775007790589, "grad_norm": 0.44640305638313293, "learning_rate": 5.6613250798982156e-05, "loss": 1.7745, "step": 4407 }, { "epoch": 0.4578788823101693, "grad_norm": 0.4178631603717804, "learning_rate": 5.659707722680313e-05, "loss": 1.7777, "step": 4408 }, { "epoch": 0.45798275682974965, "grad_norm": 0.44724735617637634, "learning_rate": 5.6580902952086836e-05, "loss": 1.5845, "step": 4409 }, { "epoch": 0.45808663134933003, "grad_norm": 0.39849719405174255, "learning_rate": 5.656472797655571e-05, "loss": 1.5669, "step": 4410 }, { "epoch": 0.45819050586891036, "grad_norm": 0.3746553957462311, "learning_rate": 5.6548552301932265e-05, "loss": 1.5851, "step": 4411 }, { "epoch": 0.4582943803884907, "grad_norm": 0.4282001852989197, "learning_rate": 5.6532375929939075e-05, "loss": 1.8875, "step": 4412 }, { "epoch": 0.45839825490807107, "grad_norm": 0.3972803056240082, "learning_rate": 5.65161988622988e-05, "loss": 1.6262, "step": 4413 }, { "epoch": 0.4585021294276514, "grad_norm": 0.38488489389419556, "learning_rate": 5.650002110073418e-05, "loss": 1.5766, "step": 4414 }, { "epoch": 0.4586060039472317, "grad_norm": 0.45945605635643005, "learning_rate": 5.648384264696802e-05, "loss": 1.5854, "step": 4415 }, { "epoch": 0.4587098784668121, "grad_norm": 0.3760433793067932, "learning_rate": 5.6467663502723175e-05, "loss": 1.6934, "step": 4416 }, { "epoch": 0.45881375298639243, "grad_norm": 0.38754063844680786, "learning_rate": 5.645148366972264e-05, "loss": 1.7554, "step": 4417 }, { "epoch": 0.45891762750597276, "grad_norm": 0.388735830783844, "learning_rate": 5.643530314968941e-05, "loss": 1.4804, "step": 4418 }, { "epoch": 0.45902150202555314, "grad_norm": 0.39035165309906006, "learning_rate": 5.6419121944346585e-05, "loss": 1.6639, "step": 4419 }, { "epoch": 0.45912537654513347, "grad_norm": 0.3769480586051941, "learning_rate": 5.640294005541735e-05, "loss": 1.7626, "step": 4420 }, { "epoch": 0.45922925106471385, "grad_norm": 0.41671907901763916, "learning_rate": 5.638675748462493e-05, "loss": 1.7363, "step": 4421 }, { "epoch": 0.4593331255842942, "grad_norm": 0.4125288426876068, "learning_rate": 5.637057423369268e-05, "loss": 1.725, "step": 4422 }, { "epoch": 0.4594370001038745, "grad_norm": 0.44852516055107117, "learning_rate": 5.635439030434395e-05, "loss": 1.8253, "step": 4423 }, { "epoch": 0.4595408746234549, "grad_norm": 0.4015621542930603, "learning_rate": 5.6338205698302224e-05, "loss": 1.6797, "step": 4424 }, { "epoch": 0.4596447491430352, "grad_norm": 0.40786075592041016, "learning_rate": 5.6322020417291034e-05, "loss": 1.7569, "step": 4425 }, { "epoch": 0.45974862366261554, "grad_norm": 0.4462954103946686, "learning_rate": 5.630583446303399e-05, "loss": 1.8999, "step": 4426 }, { "epoch": 0.4598524981821959, "grad_norm": 0.40880078077316284, "learning_rate": 5.6289647837254744e-05, "loss": 1.5374, "step": 4427 }, { "epoch": 0.45995637270177625, "grad_norm": 0.4236305356025696, "learning_rate": 5.627346054167707e-05, "loss": 1.7143, "step": 4428 }, { "epoch": 0.4600602472213566, "grad_norm": 0.3998420834541321, "learning_rate": 5.625727257802479e-05, "loss": 1.8177, "step": 4429 }, { "epoch": 0.46016412174093696, "grad_norm": 0.4473719000816345, "learning_rate": 5.624108394802178e-05, "loss": 1.8531, "step": 4430 }, { "epoch": 0.4602679962605173, "grad_norm": 0.39816030859947205, "learning_rate": 5.6224894653392035e-05, "loss": 1.6969, "step": 4431 }, { "epoch": 0.46037187078009767, "grad_norm": 0.36577850580215454, "learning_rate": 5.6208704695859535e-05, "loss": 1.5994, "step": 4432 }, { "epoch": 0.460475745299678, "grad_norm": 0.3840119540691376, "learning_rate": 5.619251407714843e-05, "loss": 1.6662, "step": 4433 }, { "epoch": 0.4605796198192583, "grad_norm": 0.4154397249221802, "learning_rate": 5.617632279898288e-05, "loss": 1.7509, "step": 4434 }, { "epoch": 0.4606834943388387, "grad_norm": 0.3793880045413971, "learning_rate": 5.6160130863087115e-05, "loss": 1.6896, "step": 4435 }, { "epoch": 0.46078736885841903, "grad_norm": 0.3721482455730438, "learning_rate": 5.614393827118548e-05, "loss": 1.6231, "step": 4436 }, { "epoch": 0.46089124337799936, "grad_norm": 0.37379753589630127, "learning_rate": 5.6127745025002344e-05, "loss": 1.6131, "step": 4437 }, { "epoch": 0.46099511789757974, "grad_norm": 0.3776273727416992, "learning_rate": 5.611155112626215e-05, "loss": 1.7206, "step": 4438 }, { "epoch": 0.46109899241716007, "grad_norm": 0.37556153535842896, "learning_rate": 5.609535657668945e-05, "loss": 1.6558, "step": 4439 }, { "epoch": 0.4612028669367404, "grad_norm": 0.41758623719215393, "learning_rate": 5.6079161378008805e-05, "loss": 1.7243, "step": 4440 }, { "epoch": 0.4613067414563208, "grad_norm": 0.39208123087882996, "learning_rate": 5.606296553194489e-05, "loss": 1.7576, "step": 4441 }, { "epoch": 0.4614106159759011, "grad_norm": 0.3936588764190674, "learning_rate": 5.6046769040222446e-05, "loss": 1.5866, "step": 4442 }, { "epoch": 0.46151449049548143, "grad_norm": 0.3708806335926056, "learning_rate": 5.603057190456628e-05, "loss": 1.6697, "step": 4443 }, { "epoch": 0.4616183650150618, "grad_norm": 0.4200730621814728, "learning_rate": 5.601437412670121e-05, "loss": 1.4743, "step": 4444 }, { "epoch": 0.46172223953464214, "grad_norm": 0.40892651677131653, "learning_rate": 5.5998175708352255e-05, "loss": 1.7079, "step": 4445 }, { "epoch": 0.4618261140542225, "grad_norm": 0.41610532999038696, "learning_rate": 5.598197665124434e-05, "loss": 1.6898, "step": 4446 }, { "epoch": 0.46192998857380285, "grad_norm": 0.39250999689102173, "learning_rate": 5.596577695710258e-05, "loss": 1.686, "step": 4447 }, { "epoch": 0.4620338630933832, "grad_norm": 0.3861941695213318, "learning_rate": 5.594957662765211e-05, "loss": 1.6314, "step": 4448 }, { "epoch": 0.46213773761296356, "grad_norm": 0.4138748049736023, "learning_rate": 5.5933375664618134e-05, "loss": 1.6829, "step": 4449 }, { "epoch": 0.4622416121325439, "grad_norm": 0.45050138235092163, "learning_rate": 5.591717406972594e-05, "loss": 1.9202, "step": 4450 }, { "epoch": 0.4623454866521242, "grad_norm": 0.40642380714416504, "learning_rate": 5.590097184470087e-05, "loss": 1.7383, "step": 4451 }, { "epoch": 0.4624493611717046, "grad_norm": 0.4135674238204956, "learning_rate": 5.588476899126832e-05, "loss": 1.7811, "step": 4452 }, { "epoch": 0.4625532356912849, "grad_norm": 0.4031025767326355, "learning_rate": 5.586856551115378e-05, "loss": 1.6378, "step": 4453 }, { "epoch": 0.46265711021086525, "grad_norm": 0.41000255942344666, "learning_rate": 5.585236140608279e-05, "loss": 1.7076, "step": 4454 }, { "epoch": 0.46276098473044563, "grad_norm": 0.41616103053092957, "learning_rate": 5.583615667778094e-05, "loss": 1.7697, "step": 4455 }, { "epoch": 0.46286485925002596, "grad_norm": 0.4099687337875366, "learning_rate": 5.581995132797394e-05, "loss": 1.5478, "step": 4456 }, { "epoch": 0.4629687337696063, "grad_norm": 0.41153401136398315, "learning_rate": 5.5803745358387526e-05, "loss": 1.7247, "step": 4457 }, { "epoch": 0.46307260828918667, "grad_norm": 0.41319242119789124, "learning_rate": 5.578753877074749e-05, "loss": 1.768, "step": 4458 }, { "epoch": 0.463176482808767, "grad_norm": 0.3985736072063446, "learning_rate": 5.577133156677975e-05, "loss": 1.7182, "step": 4459 }, { "epoch": 0.4632803573283474, "grad_norm": 0.4140629470348358, "learning_rate": 5.575512374821018e-05, "loss": 1.8411, "step": 4460 }, { "epoch": 0.4633842318479277, "grad_norm": 0.40386584401130676, "learning_rate": 5.573891531676484e-05, "loss": 1.6013, "step": 4461 }, { "epoch": 0.46348810636750803, "grad_norm": 0.40092045068740845, "learning_rate": 5.572270627416979e-05, "loss": 1.8274, "step": 4462 }, { "epoch": 0.4635919808870884, "grad_norm": 0.4397892355918884, "learning_rate": 5.5706496622151136e-05, "loss": 1.8865, "step": 4463 }, { "epoch": 0.46369585540666874, "grad_norm": 0.41136595606803894, "learning_rate": 5.569028636243513e-05, "loss": 1.6921, "step": 4464 }, { "epoch": 0.46379972992624907, "grad_norm": 0.3688594698905945, "learning_rate": 5.5674075496748e-05, "loss": 1.6099, "step": 4465 }, { "epoch": 0.46390360444582945, "grad_norm": 0.41220369935035706, "learning_rate": 5.56578640268161e-05, "loss": 1.7032, "step": 4466 }, { "epoch": 0.4640074789654098, "grad_norm": 0.38784199953079224, "learning_rate": 5.5641651954365803e-05, "loss": 1.7474, "step": 4467 }, { "epoch": 0.4641113534849901, "grad_norm": 0.3877417743206024, "learning_rate": 5.562543928112358e-05, "loss": 1.7868, "step": 4468 }, { "epoch": 0.4642152280045705, "grad_norm": 0.41094866394996643, "learning_rate": 5.560922600881595e-05, "loss": 1.684, "step": 4469 }, { "epoch": 0.4643191025241508, "grad_norm": 0.3839239180088043, "learning_rate": 5.559301213916952e-05, "loss": 1.601, "step": 4470 }, { "epoch": 0.4644229770437312, "grad_norm": 0.3839179277420044, "learning_rate": 5.557679767391091e-05, "loss": 1.773, "step": 4471 }, { "epoch": 0.4645268515633115, "grad_norm": 0.3948630392551422, "learning_rate": 5.5560582614766845e-05, "loss": 1.7059, "step": 4472 }, { "epoch": 0.46463072608289185, "grad_norm": 0.4155755639076233, "learning_rate": 5.5544366963464134e-05, "loss": 1.7463, "step": 4473 }, { "epoch": 0.46473460060247224, "grad_norm": 0.3817596733570099, "learning_rate": 5.552815072172955e-05, "loss": 1.6543, "step": 4474 }, { "epoch": 0.46483847512205256, "grad_norm": 0.40604111552238464, "learning_rate": 5.5511933891290056e-05, "loss": 1.6462, "step": 4475 }, { "epoch": 0.4649423496416329, "grad_norm": 0.39084017276763916, "learning_rate": 5.5495716473872604e-05, "loss": 1.656, "step": 4476 }, { "epoch": 0.4650462241612133, "grad_norm": 0.4190458059310913, "learning_rate": 5.5479498471204196e-05, "loss": 1.7544, "step": 4477 }, { "epoch": 0.4651500986807936, "grad_norm": 0.37424778938293457, "learning_rate": 5.546327988501196e-05, "loss": 1.5076, "step": 4478 }, { "epoch": 0.4652539732003739, "grad_norm": 0.4260268807411194, "learning_rate": 5.544706071702302e-05, "loss": 1.638, "step": 4479 }, { "epoch": 0.4653578477199543, "grad_norm": 0.3845141530036926, "learning_rate": 5.5430840968964615e-05, "loss": 1.5985, "step": 4480 }, { "epoch": 0.46546172223953464, "grad_norm": 0.385220468044281, "learning_rate": 5.541462064256401e-05, "loss": 1.7247, "step": 4481 }, { "epoch": 0.46556559675911496, "grad_norm": 0.41517341136932373, "learning_rate": 5.5398399739548524e-05, "loss": 1.8184, "step": 4482 }, { "epoch": 0.46566947127869535, "grad_norm": 0.4744667112827301, "learning_rate": 5.538217826164558e-05, "loss": 1.8839, "step": 4483 }, { "epoch": 0.4657733457982757, "grad_norm": 0.40122199058532715, "learning_rate": 5.5365956210582634e-05, "loss": 1.5911, "step": 4484 }, { "epoch": 0.46587722031785606, "grad_norm": 0.4111073911190033, "learning_rate": 5.5349733588087216e-05, "loss": 1.7894, "step": 4485 }, { "epoch": 0.4659810948374364, "grad_norm": 0.38711994886398315, "learning_rate": 5.533351039588689e-05, "loss": 1.7112, "step": 4486 }, { "epoch": 0.4660849693570167, "grad_norm": 0.4004124104976654, "learning_rate": 5.531728663570933e-05, "loss": 1.7559, "step": 4487 }, { "epoch": 0.4661888438765971, "grad_norm": 0.3998907804489136, "learning_rate": 5.53010623092822e-05, "loss": 1.6631, "step": 4488 }, { "epoch": 0.4662927183961774, "grad_norm": 0.3848699927330017, "learning_rate": 5.5284837418333294e-05, "loss": 1.736, "step": 4489 }, { "epoch": 0.46639659291575775, "grad_norm": 0.41114541888237, "learning_rate": 5.5268611964590425e-05, "loss": 1.8165, "step": 4490 }, { "epoch": 0.46650046743533813, "grad_norm": 0.475700706243515, "learning_rate": 5.525238594978146e-05, "loss": 1.8559, "step": 4491 }, { "epoch": 0.46660434195491846, "grad_norm": 0.37533336877822876, "learning_rate": 5.523615937563438e-05, "loss": 1.5622, "step": 4492 }, { "epoch": 0.4667082164744988, "grad_norm": 0.38787853717803955, "learning_rate": 5.521993224387717e-05, "loss": 1.6647, "step": 4493 }, { "epoch": 0.46681209099407917, "grad_norm": 0.4438975155353546, "learning_rate": 5.520370455623789e-05, "loss": 1.7663, "step": 4494 }, { "epoch": 0.4669159655136595, "grad_norm": 0.4348212480545044, "learning_rate": 5.518747631444467e-05, "loss": 1.8218, "step": 4495 }, { "epoch": 0.4670198400332398, "grad_norm": 0.4120256304740906, "learning_rate": 5.517124752022569e-05, "loss": 1.6427, "step": 4496 }, { "epoch": 0.4671237145528202, "grad_norm": 0.3732791244983673, "learning_rate": 5.515501817530917e-05, "loss": 1.5317, "step": 4497 }, { "epoch": 0.46722758907240053, "grad_norm": 0.40980592370033264, "learning_rate": 5.513878828142344e-05, "loss": 1.736, "step": 4498 }, { "epoch": 0.4673314635919809, "grad_norm": 0.42405858635902405, "learning_rate": 5.5122557840296815e-05, "loss": 1.7212, "step": 4499 }, { "epoch": 0.46743533811156124, "grad_norm": 0.380643367767334, "learning_rate": 5.5106326853657764e-05, "loss": 1.6732, "step": 4500 }, { "epoch": 0.46753921263114157, "grad_norm": 0.4484168589115143, "learning_rate": 5.5090095323234735e-05, "loss": 1.8326, "step": 4501 }, { "epoch": 0.46764308715072195, "grad_norm": 0.44353044033050537, "learning_rate": 5.507386325075625e-05, "loss": 1.8159, "step": 4502 }, { "epoch": 0.4677469616703023, "grad_norm": 0.40523630380630493, "learning_rate": 5.505763063795091e-05, "loss": 1.8045, "step": 4503 }, { "epoch": 0.4678508361898826, "grad_norm": 0.41718611121177673, "learning_rate": 5.504139748654735e-05, "loss": 1.7684, "step": 4504 }, { "epoch": 0.467954710709463, "grad_norm": 0.3799745440483093, "learning_rate": 5.5025163798274283e-05, "loss": 1.7299, "step": 4505 }, { "epoch": 0.4680585852290433, "grad_norm": 0.5379766821861267, "learning_rate": 5.500892957486048e-05, "loss": 1.8735, "step": 4506 }, { "epoch": 0.46816245974862364, "grad_norm": 0.42083024978637695, "learning_rate": 5.499269481803474e-05, "loss": 1.6187, "step": 4507 }, { "epoch": 0.468266334268204, "grad_norm": 0.44988521933555603, "learning_rate": 5.497645952952596e-05, "loss": 1.8866, "step": 4508 }, { "epoch": 0.46837020878778435, "grad_norm": 0.37519097328186035, "learning_rate": 5.496022371106304e-05, "loss": 1.6358, "step": 4509 }, { "epoch": 0.46847408330736473, "grad_norm": 0.40404897928237915, "learning_rate": 5.4943987364374985e-05, "loss": 1.6527, "step": 4510 }, { "epoch": 0.46857795782694506, "grad_norm": 0.3824155032634735, "learning_rate": 5.492775049119085e-05, "loss": 1.571, "step": 4511 }, { "epoch": 0.4686818323465254, "grad_norm": 0.38465747237205505, "learning_rate": 5.491151309323973e-05, "loss": 1.8012, "step": 4512 }, { "epoch": 0.46878570686610577, "grad_norm": 0.41244107484817505, "learning_rate": 5.489527517225076e-05, "loss": 1.7915, "step": 4513 }, { "epoch": 0.4688895813856861, "grad_norm": 0.39716869592666626, "learning_rate": 5.487903672995318e-05, "loss": 1.6541, "step": 4514 }, { "epoch": 0.4689934559052664, "grad_norm": 0.3766006529331207, "learning_rate": 5.486279776807626e-05, "loss": 1.7457, "step": 4515 }, { "epoch": 0.4690973304248468, "grad_norm": 0.3792710602283478, "learning_rate": 5.484655828834929e-05, "loss": 1.5523, "step": 4516 }, { "epoch": 0.46920120494442713, "grad_norm": 0.4360962212085724, "learning_rate": 5.483031829250167e-05, "loss": 1.7419, "step": 4517 }, { "epoch": 0.46930507946400746, "grad_norm": 0.4073307514190674, "learning_rate": 5.481407778226284e-05, "loss": 1.7789, "step": 4518 }, { "epoch": 0.46940895398358784, "grad_norm": 0.3982420563697815, "learning_rate": 5.4797836759362256e-05, "loss": 1.6478, "step": 4519 }, { "epoch": 0.46951282850316817, "grad_norm": 0.3903031051158905, "learning_rate": 5.478159522552949e-05, "loss": 1.5748, "step": 4520 }, { "epoch": 0.4696167030227485, "grad_norm": 0.4410886764526367, "learning_rate": 5.476535318249414e-05, "loss": 1.7942, "step": 4521 }, { "epoch": 0.4697205775423289, "grad_norm": 0.3934234380722046, "learning_rate": 5.4749110631985846e-05, "loss": 1.6272, "step": 4522 }, { "epoch": 0.4698244520619092, "grad_norm": 0.4124247431755066, "learning_rate": 5.4732867575734314e-05, "loss": 1.7511, "step": 4523 }, { "epoch": 0.4699283265814896, "grad_norm": 0.4178972840309143, "learning_rate": 5.4716624015469284e-05, "loss": 1.7224, "step": 4524 }, { "epoch": 0.4700322011010699, "grad_norm": 0.44093725085258484, "learning_rate": 5.470037995292061e-05, "loss": 1.6125, "step": 4525 }, { "epoch": 0.47013607562065024, "grad_norm": 0.3923119008541107, "learning_rate": 5.468413538981814e-05, "loss": 1.6888, "step": 4526 }, { "epoch": 0.4702399501402306, "grad_norm": 0.4040331244468689, "learning_rate": 5.466789032789176e-05, "loss": 1.7642, "step": 4527 }, { "epoch": 0.47034382465981095, "grad_norm": 0.4173428416252136, "learning_rate": 5.465164476887149e-05, "loss": 1.6372, "step": 4528 }, { "epoch": 0.4704476991793913, "grad_norm": 0.36699429154396057, "learning_rate": 5.463539871448735e-05, "loss": 1.7026, "step": 4529 }, { "epoch": 0.47055157369897166, "grad_norm": 0.37579965591430664, "learning_rate": 5.461915216646938e-05, "loss": 1.513, "step": 4530 }, { "epoch": 0.470655448218552, "grad_norm": 0.39434462785720825, "learning_rate": 5.4602905126547744e-05, "loss": 1.6832, "step": 4531 }, { "epoch": 0.4707593227381323, "grad_norm": 0.40880143642425537, "learning_rate": 5.4586657596452615e-05, "loss": 1.5767, "step": 4532 }, { "epoch": 0.4708631972577127, "grad_norm": 0.4027133584022522, "learning_rate": 5.4570409577914215e-05, "loss": 1.6025, "step": 4533 }, { "epoch": 0.470967071777293, "grad_norm": 0.37982282042503357, "learning_rate": 5.455416107266287e-05, "loss": 1.6899, "step": 4534 }, { "epoch": 0.47107094629687335, "grad_norm": 0.4183795154094696, "learning_rate": 5.453791208242889e-05, "loss": 1.7355, "step": 4535 }, { "epoch": 0.47117482081645373, "grad_norm": 0.4518531560897827, "learning_rate": 5.452166260894267e-05, "loss": 1.9148, "step": 4536 }, { "epoch": 0.47127869533603406, "grad_norm": 0.41706570982933044, "learning_rate": 5.450541265393464e-05, "loss": 1.7175, "step": 4537 }, { "epoch": 0.47138256985561444, "grad_norm": 0.38298308849334717, "learning_rate": 5.4489162219135314e-05, "loss": 1.8146, "step": 4538 }, { "epoch": 0.47148644437519477, "grad_norm": 0.40449774265289307, "learning_rate": 5.447291130627522e-05, "loss": 1.8353, "step": 4539 }, { "epoch": 0.4715903188947751, "grad_norm": 0.40344521403312683, "learning_rate": 5.4456659917084975e-05, "loss": 1.672, "step": 4540 }, { "epoch": 0.4716941934143555, "grad_norm": 0.3761052191257477, "learning_rate": 5.44404080532952e-05, "loss": 1.7119, "step": 4541 }, { "epoch": 0.4717980679339358, "grad_norm": 0.45389243960380554, "learning_rate": 5.442415571663661e-05, "loss": 1.9569, "step": 4542 }, { "epoch": 0.47190194245351613, "grad_norm": 0.3938455581665039, "learning_rate": 5.440790290883997e-05, "loss": 1.5407, "step": 4543 }, { "epoch": 0.4720058169730965, "grad_norm": 0.39464959502220154, "learning_rate": 5.439164963163601e-05, "loss": 1.7799, "step": 4544 }, { "epoch": 0.47210969149267684, "grad_norm": 0.3845635652542114, "learning_rate": 5.437539588675564e-05, "loss": 1.6902, "step": 4545 }, { "epoch": 0.47221356601225717, "grad_norm": 0.41250425577163696, "learning_rate": 5.435914167592975e-05, "loss": 1.7341, "step": 4546 }, { "epoch": 0.47231744053183755, "grad_norm": 0.42640382051467896, "learning_rate": 5.434288700088925e-05, "loss": 1.7448, "step": 4547 }, { "epoch": 0.4724213150514179, "grad_norm": 0.3739955425262451, "learning_rate": 5.432663186336517e-05, "loss": 1.6007, "step": 4548 }, { "epoch": 0.47252518957099826, "grad_norm": 0.3853756785392761, "learning_rate": 5.431037626508856e-05, "loss": 1.6348, "step": 4549 }, { "epoch": 0.4726290640905786, "grad_norm": 0.4050140082836151, "learning_rate": 5.429412020779049e-05, "loss": 1.6318, "step": 4550 }, { "epoch": 0.4727329386101589, "grad_norm": 0.39661628007888794, "learning_rate": 5.427786369320211e-05, "loss": 1.7079, "step": 4551 }, { "epoch": 0.4728368131297393, "grad_norm": 0.3745506703853607, "learning_rate": 5.4261606723054604e-05, "loss": 1.5058, "step": 4552 }, { "epoch": 0.4729406876493196, "grad_norm": 0.45182400941848755, "learning_rate": 5.424534929907924e-05, "loss": 1.7662, "step": 4553 }, { "epoch": 0.47304456216889995, "grad_norm": 0.39401260018348694, "learning_rate": 5.422909142300729e-05, "loss": 1.6387, "step": 4554 }, { "epoch": 0.47314843668848033, "grad_norm": 0.43190351128578186, "learning_rate": 5.4212833096570085e-05, "loss": 1.9062, "step": 4555 }, { "epoch": 0.47325231120806066, "grad_norm": 0.3953791558742523, "learning_rate": 5.419657432149903e-05, "loss": 1.779, "step": 4556 }, { "epoch": 0.473356185727641, "grad_norm": 0.4370119273662567, "learning_rate": 5.418031509952554e-05, "loss": 1.7205, "step": 4557 }, { "epoch": 0.47346006024722137, "grad_norm": 0.46050959825515747, "learning_rate": 5.4164055432381076e-05, "loss": 1.8187, "step": 4558 }, { "epoch": 0.4735639347668017, "grad_norm": 0.3929772675037384, "learning_rate": 5.414779532179721e-05, "loss": 1.7266, "step": 4559 }, { "epoch": 0.473667809286382, "grad_norm": 0.3913351595401764, "learning_rate": 5.41315347695055e-05, "loss": 1.6847, "step": 4560 }, { "epoch": 0.4737716838059624, "grad_norm": 0.37702706456184387, "learning_rate": 5.4115273777237554e-05, "loss": 1.5548, "step": 4561 }, { "epoch": 0.47387555832554273, "grad_norm": 0.4072859585285187, "learning_rate": 5.4099012346725054e-05, "loss": 1.6328, "step": 4562 }, { "epoch": 0.4739794328451231, "grad_norm": 0.4485922157764435, "learning_rate": 5.4082750479699715e-05, "loss": 1.7856, "step": 4563 }, { "epoch": 0.47408330736470344, "grad_norm": 0.4083961248397827, "learning_rate": 5.406648817789329e-05, "loss": 1.6751, "step": 4564 }, { "epoch": 0.47418718188428377, "grad_norm": 0.4277319312095642, "learning_rate": 5.40502254430376e-05, "loss": 1.793, "step": 4565 }, { "epoch": 0.47429105640386415, "grad_norm": 0.4535433053970337, "learning_rate": 5.403396227686449e-05, "loss": 1.5738, "step": 4566 }, { "epoch": 0.4743949309234445, "grad_norm": 0.41116324067115784, "learning_rate": 5.4017698681105865e-05, "loss": 1.7443, "step": 4567 }, { "epoch": 0.4744988054430248, "grad_norm": 0.38053932785987854, "learning_rate": 5.400143465749368e-05, "loss": 1.6522, "step": 4568 }, { "epoch": 0.4746026799626052, "grad_norm": 0.40427184104919434, "learning_rate": 5.398517020775989e-05, "loss": 1.7336, "step": 4569 }, { "epoch": 0.4747065544821855, "grad_norm": 0.4339055120944977, "learning_rate": 5.39689053336366e-05, "loss": 1.7395, "step": 4570 }, { "epoch": 0.47481042900176584, "grad_norm": 0.415379136800766, "learning_rate": 5.3952640036855825e-05, "loss": 1.5726, "step": 4571 }, { "epoch": 0.4749143035213462, "grad_norm": 0.4650099575519562, "learning_rate": 5.3936374319149705e-05, "loss": 2.0233, "step": 4572 }, { "epoch": 0.47501817804092655, "grad_norm": 0.429559588432312, "learning_rate": 5.392010818225044e-05, "loss": 1.539, "step": 4573 }, { "epoch": 0.4751220525605069, "grad_norm": 0.4143827259540558, "learning_rate": 5.390384162789024e-05, "loss": 1.767, "step": 4574 }, { "epoch": 0.47522592708008726, "grad_norm": 0.4033125042915344, "learning_rate": 5.3887574657801346e-05, "loss": 1.7017, "step": 4575 }, { "epoch": 0.4753298015996676, "grad_norm": 0.41067859530448914, "learning_rate": 5.3871307273716074e-05, "loss": 1.6445, "step": 4576 }, { "epoch": 0.47543367611924797, "grad_norm": 0.41794803738594055, "learning_rate": 5.3855039477366775e-05, "loss": 1.7336, "step": 4577 }, { "epoch": 0.4755375506388283, "grad_norm": 0.38265594840049744, "learning_rate": 5.3838771270485854e-05, "loss": 1.6598, "step": 4578 }, { "epoch": 0.4756414251584086, "grad_norm": 0.3966656029224396, "learning_rate": 5.3822502654805725e-05, "loss": 1.5692, "step": 4579 }, { "epoch": 0.475745299677989, "grad_norm": 0.4140309989452362, "learning_rate": 5.3806233632058876e-05, "loss": 1.687, "step": 4580 }, { "epoch": 0.47584917419756934, "grad_norm": 0.43315640091896057, "learning_rate": 5.378996420397784e-05, "loss": 1.6507, "step": 4581 }, { "epoch": 0.47595304871714966, "grad_norm": 0.4187996983528137, "learning_rate": 5.377369437229518e-05, "loss": 1.782, "step": 4582 }, { "epoch": 0.47605692323673005, "grad_norm": 0.41366416215896606, "learning_rate": 5.375742413874351e-05, "loss": 1.7035, "step": 4583 }, { "epoch": 0.47616079775631037, "grad_norm": 0.4386520981788635, "learning_rate": 5.3741153505055475e-05, "loss": 1.6678, "step": 4584 }, { "epoch": 0.4762646722758907, "grad_norm": 0.4309309124946594, "learning_rate": 5.372488247296378e-05, "loss": 1.7662, "step": 4585 }, { "epoch": 0.4763685467954711, "grad_norm": 0.4261937737464905, "learning_rate": 5.370861104420114e-05, "loss": 1.7769, "step": 4586 }, { "epoch": 0.4764724213150514, "grad_norm": 0.3945866823196411, "learning_rate": 5.369233922050036e-05, "loss": 1.4977, "step": 4587 }, { "epoch": 0.4765762958346318, "grad_norm": 0.38651934266090393, "learning_rate": 5.3676067003594255e-05, "loss": 1.7031, "step": 4588 }, { "epoch": 0.4766801703542121, "grad_norm": 0.43587467074394226, "learning_rate": 5.365979439521569e-05, "loss": 1.6961, "step": 4589 }, { "epoch": 0.47678404487379245, "grad_norm": 0.43202391266822815, "learning_rate": 5.364352139709758e-05, "loss": 1.8117, "step": 4590 }, { "epoch": 0.47688791939337283, "grad_norm": 0.4378650486469269, "learning_rate": 5.362724801097284e-05, "loss": 1.6322, "step": 4591 }, { "epoch": 0.47699179391295315, "grad_norm": 0.4283282160758972, "learning_rate": 5.3610974238574495e-05, "loss": 1.7273, "step": 4592 }, { "epoch": 0.4770956684325335, "grad_norm": 0.42612189054489136, "learning_rate": 5.359470008163556e-05, "loss": 1.824, "step": 4593 }, { "epoch": 0.47719954295211386, "grad_norm": 0.38064736127853394, "learning_rate": 5.35784255418891e-05, "loss": 1.5924, "step": 4594 }, { "epoch": 0.4773034174716942, "grad_norm": 0.40858712792396545, "learning_rate": 5.356215062106823e-05, "loss": 1.7402, "step": 4595 }, { "epoch": 0.4774072919912745, "grad_norm": 0.4329719543457031, "learning_rate": 5.354587532090611e-05, "loss": 1.861, "step": 4596 }, { "epoch": 0.4775111665108549, "grad_norm": 0.4516226649284363, "learning_rate": 5.352959964313592e-05, "loss": 1.8382, "step": 4597 }, { "epoch": 0.47761504103043523, "grad_norm": 0.4081841707229614, "learning_rate": 5.3513323589490924e-05, "loss": 1.6581, "step": 4598 }, { "epoch": 0.47771891555001555, "grad_norm": 0.3968779444694519, "learning_rate": 5.349704716170435e-05, "loss": 1.5309, "step": 4599 }, { "epoch": 0.47782279006959594, "grad_norm": 0.4048573076725006, "learning_rate": 5.348077036150954e-05, "loss": 1.7319, "step": 4600 }, { "epoch": 0.47792666458917626, "grad_norm": 0.4110606014728546, "learning_rate": 5.346449319063983e-05, "loss": 1.7603, "step": 4601 }, { "epoch": 0.47803053910875665, "grad_norm": 0.4106254577636719, "learning_rate": 5.34482156508286e-05, "loss": 1.5944, "step": 4602 }, { "epoch": 0.478134413628337, "grad_norm": 0.3852187693119049, "learning_rate": 5.343193774380933e-05, "loss": 1.5459, "step": 4603 }, { "epoch": 0.4782382881479173, "grad_norm": 0.40513893961906433, "learning_rate": 5.341565947131546e-05, "loss": 1.6581, "step": 4604 }, { "epoch": 0.4783421626674977, "grad_norm": 0.38794079422950745, "learning_rate": 5.3399380835080493e-05, "loss": 1.6475, "step": 4605 }, { "epoch": 0.478446037187078, "grad_norm": 0.3935708999633789, "learning_rate": 5.338310183683799e-05, "loss": 1.7491, "step": 4606 }, { "epoch": 0.47854991170665834, "grad_norm": 0.5326302647590637, "learning_rate": 5.3366822478321524e-05, "loss": 1.7748, "step": 4607 }, { "epoch": 0.4786537862262387, "grad_norm": 0.4071970283985138, "learning_rate": 5.335054276126471e-05, "loss": 1.6662, "step": 4608 }, { "epoch": 0.47875766074581905, "grad_norm": 0.43349170684814453, "learning_rate": 5.333426268740125e-05, "loss": 1.8815, "step": 4609 }, { "epoch": 0.4788615352653994, "grad_norm": 0.372646689414978, "learning_rate": 5.331798225846482e-05, "loss": 1.5864, "step": 4610 }, { "epoch": 0.47896540978497976, "grad_norm": 0.4014649987220764, "learning_rate": 5.330170147618914e-05, "loss": 1.6708, "step": 4611 }, { "epoch": 0.4790692843045601, "grad_norm": 0.4292800724506378, "learning_rate": 5.3285420342308035e-05, "loss": 1.8315, "step": 4612 }, { "epoch": 0.47917315882414047, "grad_norm": 0.41147077083587646, "learning_rate": 5.326913885855527e-05, "loss": 1.6449, "step": 4613 }, { "epoch": 0.4792770333437208, "grad_norm": 0.35840991139411926, "learning_rate": 5.325285702666473e-05, "loss": 1.5407, "step": 4614 }, { "epoch": 0.4793809078633011, "grad_norm": 0.4264986515045166, "learning_rate": 5.323657484837028e-05, "loss": 1.6642, "step": 4615 }, { "epoch": 0.4794847823828815, "grad_norm": 0.4104459881782532, "learning_rate": 5.3220292325405854e-05, "loss": 1.6084, "step": 4616 }, { "epoch": 0.47958865690246183, "grad_norm": 0.4270613193511963, "learning_rate": 5.320400945950542e-05, "loss": 1.9207, "step": 4617 }, { "epoch": 0.47969253142204216, "grad_norm": 0.4374467730522156, "learning_rate": 5.318772625240298e-05, "loss": 1.6418, "step": 4618 }, { "epoch": 0.47979640594162254, "grad_norm": 0.3923068940639496, "learning_rate": 5.3171442705832554e-05, "loss": 1.6963, "step": 4619 }, { "epoch": 0.47990028046120287, "grad_norm": 0.3642514646053314, "learning_rate": 5.315515882152822e-05, "loss": 1.5201, "step": 4620 }, { "epoch": 0.4800041549807832, "grad_norm": 0.41580015420913696, "learning_rate": 5.313887460122408e-05, "loss": 1.5179, "step": 4621 }, { "epoch": 0.4801080295003636, "grad_norm": 0.42775875329971313, "learning_rate": 5.312259004665426e-05, "loss": 1.7967, "step": 4622 }, { "epoch": 0.4802119040199439, "grad_norm": 0.3977086544036865, "learning_rate": 5.310630515955297e-05, "loss": 1.4864, "step": 4623 }, { "epoch": 0.48031577853952423, "grad_norm": 0.41823333501815796, "learning_rate": 5.309001994165442e-05, "loss": 1.7526, "step": 4624 }, { "epoch": 0.4804196530591046, "grad_norm": 0.41474875807762146, "learning_rate": 5.307373439469283e-05, "loss": 1.5733, "step": 4625 }, { "epoch": 0.48052352757868494, "grad_norm": 0.370061457157135, "learning_rate": 5.305744852040253e-05, "loss": 1.541, "step": 4626 }, { "epoch": 0.4806274020982653, "grad_norm": 0.36893734335899353, "learning_rate": 5.304116232051779e-05, "loss": 1.6238, "step": 4627 }, { "epoch": 0.48073127661784565, "grad_norm": 0.4357246458530426, "learning_rate": 5.3024875796773e-05, "loss": 1.9604, "step": 4628 }, { "epoch": 0.480835151137426, "grad_norm": 0.3936040997505188, "learning_rate": 5.3008588950902524e-05, "loss": 1.4643, "step": 4629 }, { "epoch": 0.48093902565700636, "grad_norm": 0.38085660338401794, "learning_rate": 5.2992301784640785e-05, "loss": 1.6453, "step": 4630 }, { "epoch": 0.4810429001765867, "grad_norm": 0.3864935338497162, "learning_rate": 5.2976014299722263e-05, "loss": 1.6807, "step": 4631 }, { "epoch": 0.481146774696167, "grad_norm": 0.405137836933136, "learning_rate": 5.2959726497881425e-05, "loss": 1.7978, "step": 4632 }, { "epoch": 0.4812506492157474, "grad_norm": 0.3816376328468323, "learning_rate": 5.29434383808528e-05, "loss": 1.7011, "step": 4633 }, { "epoch": 0.4813545237353277, "grad_norm": 0.3807872533798218, "learning_rate": 5.292714995037096e-05, "loss": 1.6109, "step": 4634 }, { "epoch": 0.48145839825490805, "grad_norm": 0.3880075514316559, "learning_rate": 5.291086120817047e-05, "loss": 1.6175, "step": 4635 }, { "epoch": 0.48156227277448843, "grad_norm": 0.40148478746414185, "learning_rate": 5.289457215598595e-05, "loss": 1.5225, "step": 4636 }, { "epoch": 0.48166614729406876, "grad_norm": 0.4223523437976837, "learning_rate": 5.28782827955521e-05, "loss": 1.7701, "step": 4637 }, { "epoch": 0.4817700218136491, "grad_norm": 0.41836312413215637, "learning_rate": 5.286199312860358e-05, "loss": 1.6871, "step": 4638 }, { "epoch": 0.48187389633322947, "grad_norm": 0.41826310753822327, "learning_rate": 5.2845703156875106e-05, "loss": 1.7912, "step": 4639 }, { "epoch": 0.4819777708528098, "grad_norm": 0.3970772325992584, "learning_rate": 5.282941288210146e-05, "loss": 1.6757, "step": 4640 }, { "epoch": 0.4820816453723902, "grad_norm": 0.4118634760379791, "learning_rate": 5.2813122306017396e-05, "loss": 1.8312, "step": 4641 }, { "epoch": 0.4821855198919705, "grad_norm": 0.38596856594085693, "learning_rate": 5.279683143035775e-05, "loss": 1.6572, "step": 4642 }, { "epoch": 0.48228939441155083, "grad_norm": 0.3717338442802429, "learning_rate": 5.278054025685739e-05, "loss": 1.6622, "step": 4643 }, { "epoch": 0.4823932689311312, "grad_norm": 0.38928812742233276, "learning_rate": 5.276424878725116e-05, "loss": 1.578, "step": 4644 }, { "epoch": 0.48249714345071154, "grad_norm": 0.4005277156829834, "learning_rate": 5.2747957023274005e-05, "loss": 1.6593, "step": 4645 }, { "epoch": 0.48260101797029187, "grad_norm": 0.37568166851997375, "learning_rate": 5.2731664966660864e-05, "loss": 1.6828, "step": 4646 }, { "epoch": 0.48270489248987225, "grad_norm": 0.4337156116962433, "learning_rate": 5.271537261914672e-05, "loss": 1.8237, "step": 4647 }, { "epoch": 0.4828087670094526, "grad_norm": 0.4006423056125641, "learning_rate": 5.269907998246656e-05, "loss": 1.6222, "step": 4648 }, { "epoch": 0.4829126415290329, "grad_norm": 0.3779585361480713, "learning_rate": 5.268278705835544e-05, "loss": 1.6672, "step": 4649 }, { "epoch": 0.4830165160486133, "grad_norm": 0.4423217475414276, "learning_rate": 5.266649384854841e-05, "loss": 1.7731, "step": 4650 }, { "epoch": 0.4831203905681936, "grad_norm": 0.4306303560733795, "learning_rate": 5.265020035478061e-05, "loss": 1.7711, "step": 4651 }, { "epoch": 0.483224265087774, "grad_norm": 0.38442516326904297, "learning_rate": 5.263390657878712e-05, "loss": 1.4701, "step": 4652 }, { "epoch": 0.4833281396073543, "grad_norm": 0.38074007630348206, "learning_rate": 5.2617612522303136e-05, "loss": 1.6486, "step": 4653 }, { "epoch": 0.48343201412693465, "grad_norm": 0.3747223913669586, "learning_rate": 5.2601318187063855e-05, "loss": 1.6641, "step": 4654 }, { "epoch": 0.48353588864651503, "grad_norm": 0.48334217071533203, "learning_rate": 5.258502357480445e-05, "loss": 1.5608, "step": 4655 }, { "epoch": 0.48363976316609536, "grad_norm": 0.4297388792037964, "learning_rate": 5.256872868726021e-05, "loss": 1.6617, "step": 4656 }, { "epoch": 0.4837436376856757, "grad_norm": 0.433788001537323, "learning_rate": 5.255243352616641e-05, "loss": 1.8598, "step": 4657 }, { "epoch": 0.48384751220525607, "grad_norm": 0.4389197528362274, "learning_rate": 5.253613809325834e-05, "loss": 1.8134, "step": 4658 }, { "epoch": 0.4839513867248364, "grad_norm": 0.39111611247062683, "learning_rate": 5.251984239027133e-05, "loss": 1.5018, "step": 4659 }, { "epoch": 0.4840552612444167, "grad_norm": 0.39344149827957153, "learning_rate": 5.2503546418940784e-05, "loss": 1.7079, "step": 4660 }, { "epoch": 0.4841591357639971, "grad_norm": 0.42502087354660034, "learning_rate": 5.248725018100207e-05, "loss": 1.7178, "step": 4661 }, { "epoch": 0.48426301028357743, "grad_norm": 0.4289288818836212, "learning_rate": 5.2470953678190615e-05, "loss": 1.455, "step": 4662 }, { "epoch": 0.48436688480315776, "grad_norm": 0.41218411922454834, "learning_rate": 5.2454656912241864e-05, "loss": 1.8562, "step": 4663 }, { "epoch": 0.48447075932273814, "grad_norm": 0.38502606749534607, "learning_rate": 5.2438359884891285e-05, "loss": 1.5738, "step": 4664 }, { "epoch": 0.48457463384231847, "grad_norm": 0.4311594069004059, "learning_rate": 5.242206259787441e-05, "loss": 1.69, "step": 4665 }, { "epoch": 0.48467850836189885, "grad_norm": 0.4154076874256134, "learning_rate": 5.2405765052926755e-05, "loss": 1.7648, "step": 4666 }, { "epoch": 0.4847823828814792, "grad_norm": 0.4217148721218109, "learning_rate": 5.23894672517839e-05, "loss": 1.6886, "step": 4667 }, { "epoch": 0.4848862574010595, "grad_norm": 0.39737677574157715, "learning_rate": 5.237316919618143e-05, "loss": 1.6023, "step": 4668 }, { "epoch": 0.4849901319206399, "grad_norm": 0.3839234709739685, "learning_rate": 5.2356870887854926e-05, "loss": 1.7436, "step": 4669 }, { "epoch": 0.4850940064402202, "grad_norm": 0.39807283878326416, "learning_rate": 5.234057232854006e-05, "loss": 1.7422, "step": 4670 }, { "epoch": 0.48519788095980054, "grad_norm": 0.40253087878227234, "learning_rate": 5.232427351997251e-05, "loss": 1.6793, "step": 4671 }, { "epoch": 0.4853017554793809, "grad_norm": 0.41623252630233765, "learning_rate": 5.2307974463887944e-05, "loss": 1.6998, "step": 4672 }, { "epoch": 0.48540562999896125, "grad_norm": 0.39853668212890625, "learning_rate": 5.229167516202211e-05, "loss": 1.7607, "step": 4673 }, { "epoch": 0.4855095045185416, "grad_norm": 0.433289110660553, "learning_rate": 5.2275375616110755e-05, "loss": 1.707, "step": 4674 }, { "epoch": 0.48561337903812196, "grad_norm": 0.37123411893844604, "learning_rate": 5.2259075827889644e-05, "loss": 1.765, "step": 4675 }, { "epoch": 0.4857172535577023, "grad_norm": 0.3944990634918213, "learning_rate": 5.224277579909458e-05, "loss": 1.6677, "step": 4676 }, { "epoch": 0.4858211280772826, "grad_norm": 0.38303902745246887, "learning_rate": 5.222647553146138e-05, "loss": 1.5869, "step": 4677 }, { "epoch": 0.485925002596863, "grad_norm": 0.40630918741226196, "learning_rate": 5.221017502672589e-05, "loss": 1.6596, "step": 4678 }, { "epoch": 0.4860288771164433, "grad_norm": 0.38045310974121094, "learning_rate": 5.219387428662402e-05, "loss": 1.75, "step": 4679 }, { "epoch": 0.4861327516360237, "grad_norm": 0.36957138776779175, "learning_rate": 5.2177573312891646e-05, "loss": 1.5169, "step": 4680 }, { "epoch": 0.48623662615560403, "grad_norm": 0.40403348207473755, "learning_rate": 5.21612721072647e-05, "loss": 1.8167, "step": 4681 }, { "epoch": 0.48634050067518436, "grad_norm": 0.4342941641807556, "learning_rate": 5.214497067147914e-05, "loss": 1.6114, "step": 4682 }, { "epoch": 0.48644437519476474, "grad_norm": 0.40665194392204285, "learning_rate": 5.212866900727093e-05, "loss": 1.6924, "step": 4683 }, { "epoch": 0.48654824971434507, "grad_norm": 0.397088885307312, "learning_rate": 5.211236711637607e-05, "loss": 1.6505, "step": 4684 }, { "epoch": 0.4866521242339254, "grad_norm": 0.4288082718849182, "learning_rate": 5.209606500053059e-05, "loss": 1.9258, "step": 4685 }, { "epoch": 0.4867559987535058, "grad_norm": 0.43021494150161743, "learning_rate": 5.207976266147053e-05, "loss": 1.7348, "step": 4686 }, { "epoch": 0.4868598732730861, "grad_norm": 0.4119536280632019, "learning_rate": 5.206346010093198e-05, "loss": 1.6395, "step": 4687 }, { "epoch": 0.48696374779266643, "grad_norm": 0.4928002655506134, "learning_rate": 5.204715732065103e-05, "loss": 1.846, "step": 4688 }, { "epoch": 0.4870676223122468, "grad_norm": 0.3708318769931793, "learning_rate": 5.203085432236379e-05, "loss": 1.4251, "step": 4689 }, { "epoch": 0.48717149683182714, "grad_norm": 0.3991853892803192, "learning_rate": 5.201455110780641e-05, "loss": 1.6869, "step": 4690 }, { "epoch": 0.4872753713514075, "grad_norm": 0.4395716190338135, "learning_rate": 5.199824767871503e-05, "loss": 1.8586, "step": 4691 }, { "epoch": 0.48737924587098785, "grad_norm": 0.4379446804523468, "learning_rate": 5.198194403682588e-05, "loss": 1.6928, "step": 4692 }, { "epoch": 0.4874831203905682, "grad_norm": 0.4161565601825714, "learning_rate": 5.1965640183875144e-05, "loss": 1.8182, "step": 4693 }, { "epoch": 0.48758699491014856, "grad_norm": 0.41332149505615234, "learning_rate": 5.194933612159906e-05, "loss": 1.8115, "step": 4694 }, { "epoch": 0.4876908694297289, "grad_norm": 0.38883647322654724, "learning_rate": 5.193303185173388e-05, "loss": 1.7725, "step": 4695 }, { "epoch": 0.4877947439493092, "grad_norm": 0.4268670976161957, "learning_rate": 5.191672737601591e-05, "loss": 1.823, "step": 4696 }, { "epoch": 0.4878986184688896, "grad_norm": 0.4016038775444031, "learning_rate": 5.1900422696181396e-05, "loss": 1.5653, "step": 4697 }, { "epoch": 0.4880024929884699, "grad_norm": 0.43578583002090454, "learning_rate": 5.1884117813966705e-05, "loss": 1.7653, "step": 4698 }, { "epoch": 0.48810636750805025, "grad_norm": 0.45143744349479675, "learning_rate": 5.186781273110816e-05, "loss": 1.7976, "step": 4699 }, { "epoch": 0.48821024202763064, "grad_norm": 0.415283739566803, "learning_rate": 5.1851507449342104e-05, "loss": 1.6952, "step": 4700 }, { "epoch": 0.48831411654721096, "grad_norm": 0.38803666830062866, "learning_rate": 5.183520197040498e-05, "loss": 1.6622, "step": 4701 }, { "epoch": 0.4884179910667913, "grad_norm": 0.400741845369339, "learning_rate": 5.1818896296033145e-05, "loss": 1.7436, "step": 4702 }, { "epoch": 0.4885218655863717, "grad_norm": 0.39219218492507935, "learning_rate": 5.180259042796306e-05, "loss": 1.6381, "step": 4703 }, { "epoch": 0.488625740105952, "grad_norm": 0.44362738728523254, "learning_rate": 5.178628436793115e-05, "loss": 1.8482, "step": 4704 }, { "epoch": 0.4887296146255324, "grad_norm": 0.4290272295475006, "learning_rate": 5.176997811767387e-05, "loss": 1.7622, "step": 4705 }, { "epoch": 0.4888334891451127, "grad_norm": 0.38011881709098816, "learning_rate": 5.175367167892774e-05, "loss": 1.7822, "step": 4706 }, { "epoch": 0.48893736366469304, "grad_norm": 0.4040461778640747, "learning_rate": 5.1737365053429254e-05, "loss": 1.4567, "step": 4707 }, { "epoch": 0.4890412381842734, "grad_norm": 0.40654054284095764, "learning_rate": 5.1721058242914935e-05, "loss": 1.7415, "step": 4708 }, { "epoch": 0.48914511270385375, "grad_norm": 0.3765581548213959, "learning_rate": 5.170475124912134e-05, "loss": 1.6306, "step": 4709 }, { "epoch": 0.4892489872234341, "grad_norm": 0.4085083305835724, "learning_rate": 5.168844407378506e-05, "loss": 1.6247, "step": 4710 }, { "epoch": 0.48935286174301446, "grad_norm": 0.4177039861679077, "learning_rate": 5.167213671864263e-05, "loss": 1.6903, "step": 4711 }, { "epoch": 0.4894567362625948, "grad_norm": 0.449349582195282, "learning_rate": 5.1655829185430685e-05, "loss": 1.6055, "step": 4712 }, { "epoch": 0.4895606107821751, "grad_norm": 0.4039447605609894, "learning_rate": 5.1639521475885854e-05, "loss": 1.6719, "step": 4713 }, { "epoch": 0.4896644853017555, "grad_norm": 0.39503759145736694, "learning_rate": 5.162321359174476e-05, "loss": 1.7133, "step": 4714 }, { "epoch": 0.4897683598213358, "grad_norm": 0.4664006531238556, "learning_rate": 5.16069055347441e-05, "loss": 1.805, "step": 4715 }, { "epoch": 0.48987223434091615, "grad_norm": 0.39584633708000183, "learning_rate": 5.1590597306620525e-05, "loss": 1.5628, "step": 4716 }, { "epoch": 0.48997610886049653, "grad_norm": 0.3739977478981018, "learning_rate": 5.1574288909110744e-05, "loss": 1.5699, "step": 4717 }, { "epoch": 0.49007998338007686, "grad_norm": 0.3966865539550781, "learning_rate": 5.155798034395147e-05, "loss": 1.7159, "step": 4718 }, { "epoch": 0.49018385789965724, "grad_norm": 0.48280176520347595, "learning_rate": 5.154167161287945e-05, "loss": 1.8649, "step": 4719 }, { "epoch": 0.49028773241923757, "grad_norm": 0.43992406129837036, "learning_rate": 5.152536271763143e-05, "loss": 1.7603, "step": 4720 }, { "epoch": 0.4903916069388179, "grad_norm": 0.40175652503967285, "learning_rate": 5.1509053659944175e-05, "loss": 1.664, "step": 4721 }, { "epoch": 0.4904954814583983, "grad_norm": 0.3840475082397461, "learning_rate": 5.149274444155446e-05, "loss": 1.5613, "step": 4722 }, { "epoch": 0.4905993559779786, "grad_norm": 0.42482373118400574, "learning_rate": 5.147643506419914e-05, "loss": 1.7978, "step": 4723 }, { "epoch": 0.49070323049755893, "grad_norm": 0.40053901076316833, "learning_rate": 5.146012552961499e-05, "loss": 1.736, "step": 4724 }, { "epoch": 0.4908071050171393, "grad_norm": 0.3966452479362488, "learning_rate": 5.1443815839538845e-05, "loss": 1.7367, "step": 4725 }, { "epoch": 0.49091097953671964, "grad_norm": 0.39208799600601196, "learning_rate": 5.1427505995707594e-05, "loss": 1.4424, "step": 4726 }, { "epoch": 0.49101485405629997, "grad_norm": 0.42873436212539673, "learning_rate": 5.1411195999858074e-05, "loss": 1.8207, "step": 4727 }, { "epoch": 0.49111872857588035, "grad_norm": 0.44238024950027466, "learning_rate": 5.1394885853727195e-05, "loss": 1.756, "step": 4728 }, { "epoch": 0.4912226030954607, "grad_norm": 0.38255882263183594, "learning_rate": 5.137857555905184e-05, "loss": 1.5362, "step": 4729 }, { "epoch": 0.49132647761504106, "grad_norm": 0.4030906558036804, "learning_rate": 5.1362265117568965e-05, "loss": 1.6924, "step": 4730 }, { "epoch": 0.4914303521346214, "grad_norm": 0.4658985733985901, "learning_rate": 5.134595453101547e-05, "loss": 1.7965, "step": 4731 }, { "epoch": 0.4915342266542017, "grad_norm": 0.3776882290840149, "learning_rate": 5.1329643801128324e-05, "loss": 1.5735, "step": 4732 }, { "epoch": 0.4916381011737821, "grad_norm": 0.3989698588848114, "learning_rate": 5.131333292964447e-05, "loss": 1.7203, "step": 4733 }, { "epoch": 0.4917419756933624, "grad_norm": 0.4176570177078247, "learning_rate": 5.12970219183009e-05, "loss": 1.7932, "step": 4734 }, { "epoch": 0.49184585021294275, "grad_norm": 0.42140617966651917, "learning_rate": 5.128071076883463e-05, "loss": 1.8202, "step": 4735 }, { "epoch": 0.49194972473252313, "grad_norm": 0.41681942343711853, "learning_rate": 5.126439948298264e-05, "loss": 1.6721, "step": 4736 }, { "epoch": 0.49205359925210346, "grad_norm": 0.465843141078949, "learning_rate": 5.124808806248199e-05, "loss": 1.6901, "step": 4737 }, { "epoch": 0.4921574737716838, "grad_norm": 0.41670602560043335, "learning_rate": 5.123177650906968e-05, "loss": 1.745, "step": 4738 }, { "epoch": 0.49226134829126417, "grad_norm": 0.415339857339859, "learning_rate": 5.1215464824482775e-05, "loss": 1.7013, "step": 4739 }, { "epoch": 0.4923652228108445, "grad_norm": 0.3894515931606293, "learning_rate": 5.119915301045836e-05, "loss": 1.5359, "step": 4740 }, { "epoch": 0.4924690973304248, "grad_norm": 0.37234368920326233, "learning_rate": 5.118284106873349e-05, "loss": 1.6351, "step": 4741 }, { "epoch": 0.4925729718500052, "grad_norm": 0.47200119495391846, "learning_rate": 5.116652900104527e-05, "loss": 1.9368, "step": 4742 }, { "epoch": 0.49267684636958553, "grad_norm": 0.430601567029953, "learning_rate": 5.1150216809130826e-05, "loss": 1.6977, "step": 4743 }, { "epoch": 0.4927807208891659, "grad_norm": 0.4601489305496216, "learning_rate": 5.113390449472726e-05, "loss": 1.8549, "step": 4744 }, { "epoch": 0.49288459540874624, "grad_norm": 0.42277416586875916, "learning_rate": 5.11175920595717e-05, "loss": 1.7991, "step": 4745 }, { "epoch": 0.49298846992832657, "grad_norm": 0.3993331789970398, "learning_rate": 5.1101279505401325e-05, "loss": 1.7089, "step": 4746 }, { "epoch": 0.49309234444790695, "grad_norm": 0.4037160277366638, "learning_rate": 5.108496683395324e-05, "loss": 1.6423, "step": 4747 }, { "epoch": 0.4931962189674873, "grad_norm": 0.3775770664215088, "learning_rate": 5.106865404696468e-05, "loss": 1.6201, "step": 4748 }, { "epoch": 0.4933000934870676, "grad_norm": 0.4403548836708069, "learning_rate": 5.1052341146172785e-05, "loss": 1.7299, "step": 4749 }, { "epoch": 0.493403968006648, "grad_norm": 0.4192941188812256, "learning_rate": 5.103602813331476e-05, "loss": 1.7541, "step": 4750 }, { "epoch": 0.4935078425262283, "grad_norm": 0.3859345018863678, "learning_rate": 5.101971501012785e-05, "loss": 1.3569, "step": 4751 }, { "epoch": 0.49361171704580864, "grad_norm": 0.4286668598651886, "learning_rate": 5.1003401778349224e-05, "loss": 1.8115, "step": 4752 }, { "epoch": 0.493715591565389, "grad_norm": 0.4297914206981659, "learning_rate": 5.0987088439716134e-05, "loss": 1.6486, "step": 4753 }, { "epoch": 0.49381946608496935, "grad_norm": 0.4609547555446625, "learning_rate": 5.097077499596583e-05, "loss": 1.8906, "step": 4754 }, { "epoch": 0.4939233406045497, "grad_norm": 0.3923378884792328, "learning_rate": 5.095446144883556e-05, "loss": 1.5989, "step": 4755 }, { "epoch": 0.49402721512413006, "grad_norm": 0.42669469118118286, "learning_rate": 5.0938147800062586e-05, "loss": 1.5584, "step": 4756 }, { "epoch": 0.4941310896437104, "grad_norm": 0.4073004126548767, "learning_rate": 5.092183405138419e-05, "loss": 1.6068, "step": 4757 }, { "epoch": 0.49423496416329077, "grad_norm": 0.4162704050540924, "learning_rate": 5.090552020453766e-05, "loss": 1.8445, "step": 4758 }, { "epoch": 0.4943388386828711, "grad_norm": 0.45715436339378357, "learning_rate": 5.08892062612603e-05, "loss": 1.8761, "step": 4759 }, { "epoch": 0.4944427132024514, "grad_norm": 0.38431254029273987, "learning_rate": 5.08728922232894e-05, "loss": 1.5639, "step": 4760 }, { "epoch": 0.4945465877220318, "grad_norm": 0.4815351963043213, "learning_rate": 5.0856578092362285e-05, "loss": 1.7764, "step": 4761 }, { "epoch": 0.49465046224161213, "grad_norm": 0.41645729541778564, "learning_rate": 5.084026387021629e-05, "loss": 1.6924, "step": 4762 }, { "epoch": 0.49475433676119246, "grad_norm": 0.4079589247703552, "learning_rate": 5.0823949558588746e-05, "loss": 1.7549, "step": 4763 }, { "epoch": 0.49485821128077284, "grad_norm": 0.40971922874450684, "learning_rate": 5.0807635159216993e-05, "loss": 1.5389, "step": 4764 }, { "epoch": 0.49496208580035317, "grad_norm": 0.41493481397628784, "learning_rate": 5.079132067383842e-05, "loss": 1.7683, "step": 4765 }, { "epoch": 0.4950659603199335, "grad_norm": 0.3941478729248047, "learning_rate": 5.077500610419036e-05, "loss": 1.7123, "step": 4766 }, { "epoch": 0.4951698348395139, "grad_norm": 0.4111056327819824, "learning_rate": 5.0758691452010175e-05, "loss": 1.7361, "step": 4767 }, { "epoch": 0.4952737093590942, "grad_norm": 0.4429363012313843, "learning_rate": 5.0742376719035276e-05, "loss": 1.8205, "step": 4768 }, { "epoch": 0.4953775838786746, "grad_norm": 0.3898932635784149, "learning_rate": 5.0726061907003045e-05, "loss": 1.6695, "step": 4769 }, { "epoch": 0.4954814583982549, "grad_norm": 0.43110302090644836, "learning_rate": 5.070974701765089e-05, "loss": 1.6963, "step": 4770 }, { "epoch": 0.49558533291783524, "grad_norm": 0.4207279682159424, "learning_rate": 5.069343205271622e-05, "loss": 1.5213, "step": 4771 }, { "epoch": 0.4956892074374156, "grad_norm": 0.423676073551178, "learning_rate": 5.067711701393644e-05, "loss": 1.5632, "step": 4772 }, { "epoch": 0.49579308195699595, "grad_norm": 0.39001235365867615, "learning_rate": 5.066080190304898e-05, "loss": 1.717, "step": 4773 }, { "epoch": 0.4958969564765763, "grad_norm": 0.3835489749908447, "learning_rate": 5.0644486721791273e-05, "loss": 1.7107, "step": 4774 }, { "epoch": 0.49600083099615666, "grad_norm": 0.4401542842388153, "learning_rate": 5.0628171471900756e-05, "loss": 1.8646, "step": 4775 }, { "epoch": 0.496104705515737, "grad_norm": 0.38474228978157043, "learning_rate": 5.0611856155114876e-05, "loss": 1.6334, "step": 4776 }, { "epoch": 0.4962085800353173, "grad_norm": 0.4321017563343048, "learning_rate": 5.059554077317109e-05, "loss": 1.7811, "step": 4777 }, { "epoch": 0.4963124545548977, "grad_norm": 0.38683265447616577, "learning_rate": 5.0579225327806847e-05, "loss": 1.6125, "step": 4778 }, { "epoch": 0.496416329074478, "grad_norm": 0.3820945918560028, "learning_rate": 5.0562909820759654e-05, "loss": 1.554, "step": 4779 }, { "epoch": 0.49652020359405835, "grad_norm": 0.38485878705978394, "learning_rate": 5.054659425376693e-05, "loss": 1.6951, "step": 4780 }, { "epoch": 0.49662407811363873, "grad_norm": 0.40460067987442017, "learning_rate": 5.0530278628566175e-05, "loss": 1.7489, "step": 4781 }, { "epoch": 0.49672795263321906, "grad_norm": 0.4411293566226959, "learning_rate": 5.05139629468949e-05, "loss": 1.7431, "step": 4782 }, { "epoch": 0.49683182715279944, "grad_norm": 0.41115111112594604, "learning_rate": 5.0497647210490565e-05, "loss": 1.5739, "step": 4783 }, { "epoch": 0.49693570167237977, "grad_norm": 0.4054515063762665, "learning_rate": 5.048133142109069e-05, "loss": 1.6096, "step": 4784 }, { "epoch": 0.4970395761919601, "grad_norm": 0.45801469683647156, "learning_rate": 5.046501558043278e-05, "loss": 1.8216, "step": 4785 }, { "epoch": 0.4971434507115405, "grad_norm": 0.3939175009727478, "learning_rate": 5.044869969025434e-05, "loss": 1.6871, "step": 4786 }, { "epoch": 0.4972473252311208, "grad_norm": 0.38540032505989075, "learning_rate": 5.043238375229289e-05, "loss": 1.6358, "step": 4787 }, { "epoch": 0.49735119975070113, "grad_norm": 0.4039050340652466, "learning_rate": 5.0416067768285924e-05, "loss": 1.6841, "step": 4788 }, { "epoch": 0.4974550742702815, "grad_norm": 0.3593752384185791, "learning_rate": 5.039975173997099e-05, "loss": 1.4662, "step": 4789 }, { "epoch": 0.49755894878986184, "grad_norm": 0.4602617919445038, "learning_rate": 5.038343566908562e-05, "loss": 1.8741, "step": 4790 }, { "epoch": 0.49766282330944217, "grad_norm": 0.39395564794540405, "learning_rate": 5.036711955736734e-05, "loss": 1.6235, "step": 4791 }, { "epoch": 0.49776669782902255, "grad_norm": 0.3926306664943695, "learning_rate": 5.0350803406553684e-05, "loss": 1.4756, "step": 4792 }, { "epoch": 0.4978705723486029, "grad_norm": 0.4372478723526001, "learning_rate": 5.033448721838222e-05, "loss": 1.6785, "step": 4793 }, { "epoch": 0.4979744468681832, "grad_norm": 0.41775140166282654, "learning_rate": 5.0318170994590464e-05, "loss": 1.6019, "step": 4794 }, { "epoch": 0.4980783213877636, "grad_norm": 0.4097503423690796, "learning_rate": 5.030185473691597e-05, "loss": 1.8253, "step": 4795 }, { "epoch": 0.4981821959073439, "grad_norm": 0.38628101348876953, "learning_rate": 5.0285538447096315e-05, "loss": 1.6124, "step": 4796 }, { "epoch": 0.4982860704269243, "grad_norm": 0.4230360984802246, "learning_rate": 5.026922212686902e-05, "loss": 1.67, "step": 4797 }, { "epoch": 0.4983899449465046, "grad_norm": 0.40452295541763306, "learning_rate": 5.025290577797169e-05, "loss": 1.6923, "step": 4798 }, { "epoch": 0.49849381946608495, "grad_norm": 0.38569211959838867, "learning_rate": 5.0236589402141864e-05, "loss": 1.585, "step": 4799 }, { "epoch": 0.49859769398566534, "grad_norm": 0.4305466413497925, "learning_rate": 5.022027300111711e-05, "loss": 1.4631, "step": 4800 }, { "epoch": 0.49870156850524566, "grad_norm": 0.42307230830192566, "learning_rate": 5.0203956576635015e-05, "loss": 1.6969, "step": 4801 }, { "epoch": 0.498805443024826, "grad_norm": 0.44654223322868347, "learning_rate": 5.018764013043312e-05, "loss": 1.8514, "step": 4802 }, { "epoch": 0.49890931754440637, "grad_norm": 0.4396657943725586, "learning_rate": 5.0171323664249005e-05, "loss": 1.7264, "step": 4803 }, { "epoch": 0.4990131920639867, "grad_norm": 0.39395204186439514, "learning_rate": 5.0155007179820266e-05, "loss": 1.6963, "step": 4804 }, { "epoch": 0.499117066583567, "grad_norm": 0.4187118411064148, "learning_rate": 5.013869067888448e-05, "loss": 1.7839, "step": 4805 }, { "epoch": 0.4992209411031474, "grad_norm": 0.40489673614501953, "learning_rate": 5.01223741631792e-05, "loss": 1.6467, "step": 4806 }, { "epoch": 0.49932481562272774, "grad_norm": 0.41493701934814453, "learning_rate": 5.010605763444205e-05, "loss": 1.8405, "step": 4807 }, { "epoch": 0.4994286901423081, "grad_norm": 0.4441494643688202, "learning_rate": 5.008974109441057e-05, "loss": 1.6736, "step": 4808 }, { "epoch": 0.49953256466188845, "grad_norm": 0.38821157813072205, "learning_rate": 5.007342454482238e-05, "loss": 1.6041, "step": 4809 }, { "epoch": 0.49963643918146877, "grad_norm": 0.4361104965209961, "learning_rate": 5.005710798741503e-05, "loss": 1.7678, "step": 4810 }, { "epoch": 0.49974031370104915, "grad_norm": 0.39289024472236633, "learning_rate": 5.0040791423926125e-05, "loss": 1.5475, "step": 4811 }, { "epoch": 0.4998441882206295, "grad_norm": 0.35669323801994324, "learning_rate": 5.002447485609327e-05, "loss": 1.4978, "step": 4812 }, { "epoch": 0.4999480627402098, "grad_norm": 0.38865354657173157, "learning_rate": 5.0008158285654027e-05, "loss": 1.7025, "step": 4813 }, { "epoch": 0.5000519372597901, "grad_norm": 0.42419496178627014, "learning_rate": 4.9991841714345985e-05, "loss": 1.7601, "step": 4814 }, { "epoch": 0.5001558117793705, "grad_norm": 0.39531058073043823, "learning_rate": 4.997552514390674e-05, "loss": 1.5614, "step": 4815 }, { "epoch": 0.5002596862989509, "grad_norm": 0.415740430355072, "learning_rate": 4.9959208576073866e-05, "loss": 1.7504, "step": 4816 }, { "epoch": 0.5003635608185312, "grad_norm": 0.4087817966938019, "learning_rate": 4.994289201258498e-05, "loss": 1.6628, "step": 4817 }, { "epoch": 0.5004674353381116, "grad_norm": 0.39313584566116333, "learning_rate": 4.992657545517764e-05, "loss": 1.7745, "step": 4818 }, { "epoch": 0.5005713098576919, "grad_norm": 0.3950080871582031, "learning_rate": 4.9910258905589427e-05, "loss": 1.6339, "step": 4819 }, { "epoch": 0.5006751843772722, "grad_norm": 0.37852367758750916, "learning_rate": 4.989394236555797e-05, "loss": 1.6569, "step": 4820 }, { "epoch": 0.5007790588968526, "grad_norm": 0.43450725078582764, "learning_rate": 4.9877625836820806e-05, "loss": 1.602, "step": 4821 }, { "epoch": 0.500882933416433, "grad_norm": 0.42665791511535645, "learning_rate": 4.986130932111553e-05, "loss": 1.6783, "step": 4822 }, { "epoch": 0.5009868079360132, "grad_norm": 0.396518737077713, "learning_rate": 4.984499282017975e-05, "loss": 1.6118, "step": 4823 }, { "epoch": 0.5010906824555936, "grad_norm": 0.4622015655040741, "learning_rate": 4.9828676335751006e-05, "loss": 1.8231, "step": 4824 }, { "epoch": 0.501194556975174, "grad_norm": 0.3886905908584595, "learning_rate": 4.9812359869566896e-05, "loss": 1.667, "step": 4825 }, { "epoch": 0.5012984314947543, "grad_norm": 0.4054078161716461, "learning_rate": 4.979604342336501e-05, "loss": 1.6415, "step": 4826 }, { "epoch": 0.5014023060143347, "grad_norm": 0.38207703828811646, "learning_rate": 4.977972699888289e-05, "loss": 1.7066, "step": 4827 }, { "epoch": 0.501506180533915, "grad_norm": 0.43799397349357605, "learning_rate": 4.976341059785815e-05, "loss": 1.6469, "step": 4828 }, { "epoch": 0.5016100550534954, "grad_norm": 0.5063971281051636, "learning_rate": 4.974709422202831e-05, "loss": 1.9436, "step": 4829 }, { "epoch": 0.5017139295730757, "grad_norm": 0.39013898372650146, "learning_rate": 4.973077787313099e-05, "loss": 1.5765, "step": 4830 }, { "epoch": 0.5018178040926561, "grad_norm": 0.4318859875202179, "learning_rate": 4.9714461552903704e-05, "loss": 1.8306, "step": 4831 }, { "epoch": 0.5019216786122365, "grad_norm": 0.4619205892086029, "learning_rate": 4.9698145263084036e-05, "loss": 1.7797, "step": 4832 }, { "epoch": 0.5020255531318167, "grad_norm": 0.3963909447193146, "learning_rate": 4.968182900540956e-05, "loss": 1.646, "step": 4833 }, { "epoch": 0.5021294276513971, "grad_norm": 0.3879808187484741, "learning_rate": 4.966551278161779e-05, "loss": 1.6855, "step": 4834 }, { "epoch": 0.5022333021709775, "grad_norm": 0.393045037984848, "learning_rate": 4.964919659344632e-05, "loss": 1.678, "step": 4835 }, { "epoch": 0.5023371766905578, "grad_norm": 0.38027775287628174, "learning_rate": 4.963288044263268e-05, "loss": 1.6203, "step": 4836 }, { "epoch": 0.5024410512101382, "grad_norm": 0.4136490821838379, "learning_rate": 4.96165643309144e-05, "loss": 1.772, "step": 4837 }, { "epoch": 0.5025449257297185, "grad_norm": 0.3826773762702942, "learning_rate": 4.9600248260029016e-05, "loss": 1.8326, "step": 4838 }, { "epoch": 0.5026488002492988, "grad_norm": 0.4194203317165375, "learning_rate": 4.9583932231714095e-05, "loss": 1.5652, "step": 4839 }, { "epoch": 0.5027526747688792, "grad_norm": 0.4387541115283966, "learning_rate": 4.956761624770713e-05, "loss": 1.8814, "step": 4840 }, { "epoch": 0.5028565492884596, "grad_norm": 0.3808096945285797, "learning_rate": 4.9551300309745666e-05, "loss": 1.748, "step": 4841 }, { "epoch": 0.5029604238080398, "grad_norm": 0.4226476550102234, "learning_rate": 4.953498441956723e-05, "loss": 1.6055, "step": 4842 }, { "epoch": 0.5030642983276202, "grad_norm": 0.4065408706665039, "learning_rate": 4.951866857890931e-05, "loss": 1.7516, "step": 4843 }, { "epoch": 0.5031681728472006, "grad_norm": 0.3814634084701538, "learning_rate": 4.950235278950943e-05, "loss": 1.521, "step": 4844 }, { "epoch": 0.5032720473667809, "grad_norm": 0.4002365469932556, "learning_rate": 4.9486037053105125e-05, "loss": 1.7387, "step": 4845 }, { "epoch": 0.5033759218863613, "grad_norm": 0.39226168394088745, "learning_rate": 4.946972137143383e-05, "loss": 1.6565, "step": 4846 }, { "epoch": 0.5034797964059416, "grad_norm": 0.425890177488327, "learning_rate": 4.9453405746233076e-05, "loss": 1.7404, "step": 4847 }, { "epoch": 0.5035836709255219, "grad_norm": 0.41715189814567566, "learning_rate": 4.943709017924038e-05, "loss": 1.7237, "step": 4848 }, { "epoch": 0.5036875454451023, "grad_norm": 0.4025704860687256, "learning_rate": 4.9420774672193165e-05, "loss": 1.4753, "step": 4849 }, { "epoch": 0.5037914199646827, "grad_norm": 0.4412018060684204, "learning_rate": 4.940445922682892e-05, "loss": 1.9151, "step": 4850 }, { "epoch": 0.503895294484263, "grad_norm": 0.3976673483848572, "learning_rate": 4.938814384488515e-05, "loss": 1.707, "step": 4851 }, { "epoch": 0.5039991690038433, "grad_norm": 0.40271249413490295, "learning_rate": 4.937182852809926e-05, "loss": 1.658, "step": 4852 }, { "epoch": 0.5041030435234237, "grad_norm": 0.4666357934474945, "learning_rate": 4.935551327820873e-05, "loss": 1.8472, "step": 4853 }, { "epoch": 0.5042069180430041, "grad_norm": 0.42836159467697144, "learning_rate": 4.933919809695103e-05, "loss": 1.7557, "step": 4854 }, { "epoch": 0.5043107925625844, "grad_norm": 0.44381582736968994, "learning_rate": 4.9322882986063566e-05, "loss": 1.7145, "step": 4855 }, { "epoch": 0.5044146670821648, "grad_norm": 0.4695770740509033, "learning_rate": 4.9306567947283785e-05, "loss": 1.5709, "step": 4856 }, { "epoch": 0.5045185416017451, "grad_norm": 0.40107208490371704, "learning_rate": 4.92902529823491e-05, "loss": 1.6956, "step": 4857 }, { "epoch": 0.5046224161213254, "grad_norm": 0.39944395422935486, "learning_rate": 4.927393809299696e-05, "loss": 1.6442, "step": 4858 }, { "epoch": 0.5047262906409058, "grad_norm": 0.42077764868736267, "learning_rate": 4.925762328096473e-05, "loss": 1.7561, "step": 4859 }, { "epoch": 0.5048301651604862, "grad_norm": 0.4306979477405548, "learning_rate": 4.924130854798983e-05, "loss": 1.6888, "step": 4860 }, { "epoch": 0.5049340396800664, "grad_norm": 0.41281387209892273, "learning_rate": 4.922499389580967e-05, "loss": 1.8061, "step": 4861 }, { "epoch": 0.5050379141996468, "grad_norm": 0.4295770227909088, "learning_rate": 4.920867932616159e-05, "loss": 1.7401, "step": 4862 }, { "epoch": 0.5051417887192272, "grad_norm": 0.37192273139953613, "learning_rate": 4.9192364840783e-05, "loss": 1.5993, "step": 4863 }, { "epoch": 0.5052456632388075, "grad_norm": 0.4081692695617676, "learning_rate": 4.917605044141127e-05, "loss": 1.7359, "step": 4864 }, { "epoch": 0.5053495377583879, "grad_norm": 0.40556600689888, "learning_rate": 4.915973612978372e-05, "loss": 1.6507, "step": 4865 }, { "epoch": 0.5054534122779683, "grad_norm": 0.41496628522872925, "learning_rate": 4.914342190763771e-05, "loss": 1.6881, "step": 4866 }, { "epoch": 0.5055572867975485, "grad_norm": 0.3876533806324005, "learning_rate": 4.912710777671062e-05, "loss": 1.5866, "step": 4867 }, { "epoch": 0.5056611613171289, "grad_norm": 0.3975103795528412, "learning_rate": 4.9110793738739715e-05, "loss": 1.7478, "step": 4868 }, { "epoch": 0.5057650358367093, "grad_norm": 0.37800195813179016, "learning_rate": 4.909447979546235e-05, "loss": 1.6304, "step": 4869 }, { "epoch": 0.5058689103562896, "grad_norm": 0.3874056041240692, "learning_rate": 4.907816594861582e-05, "loss": 1.6151, "step": 4870 }, { "epoch": 0.5059727848758699, "grad_norm": 0.42271357774734497, "learning_rate": 4.906185219993743e-05, "loss": 1.7042, "step": 4871 }, { "epoch": 0.5060766593954503, "grad_norm": 0.44538614153862, "learning_rate": 4.904553855116445e-05, "loss": 1.7608, "step": 4872 }, { "epoch": 0.5061805339150306, "grad_norm": 0.41812676191329956, "learning_rate": 4.90292250040342e-05, "loss": 1.7, "step": 4873 }, { "epoch": 0.506284408434611, "grad_norm": 0.3845331370830536, "learning_rate": 4.901291156028388e-05, "loss": 1.5532, "step": 4874 }, { "epoch": 0.5063882829541914, "grad_norm": 0.4065440595149994, "learning_rate": 4.899659822165079e-05, "loss": 1.7019, "step": 4875 }, { "epoch": 0.5064921574737716, "grad_norm": 0.42397770285606384, "learning_rate": 4.898028498987217e-05, "loss": 1.7414, "step": 4876 }, { "epoch": 0.506596031993352, "grad_norm": 0.38539576530456543, "learning_rate": 4.8963971866685245e-05, "loss": 1.7377, "step": 4877 }, { "epoch": 0.5066999065129324, "grad_norm": 0.373605877161026, "learning_rate": 4.894765885382722e-05, "loss": 1.6698, "step": 4878 }, { "epoch": 0.5068037810325128, "grad_norm": 0.36256158351898193, "learning_rate": 4.8931345953035326e-05, "loss": 1.5029, "step": 4879 }, { "epoch": 0.506907655552093, "grad_norm": 0.39593711495399475, "learning_rate": 4.891503316604677e-05, "loss": 1.7055, "step": 4880 }, { "epoch": 0.5070115300716734, "grad_norm": 0.3991319239139557, "learning_rate": 4.889872049459869e-05, "loss": 1.6002, "step": 4881 }, { "epoch": 0.5071154045912538, "grad_norm": 0.3885091543197632, "learning_rate": 4.88824079404283e-05, "loss": 1.6086, "step": 4882 }, { "epoch": 0.5072192791108341, "grad_norm": 0.41479891538619995, "learning_rate": 4.8866095505272754e-05, "loss": 1.4958, "step": 4883 }, { "epoch": 0.5073231536304145, "grad_norm": 0.4320788085460663, "learning_rate": 4.8849783190869186e-05, "loss": 1.6549, "step": 4884 }, { "epoch": 0.5074270281499949, "grad_norm": 0.49210458993911743, "learning_rate": 4.883347099895473e-05, "loss": 1.9594, "step": 4885 }, { "epoch": 0.5075309026695751, "grad_norm": 0.38703057169914246, "learning_rate": 4.8817158931266534e-05, "loss": 1.5258, "step": 4886 }, { "epoch": 0.5076347771891555, "grad_norm": 0.387260764837265, "learning_rate": 4.880084698954166e-05, "loss": 1.5779, "step": 4887 }, { "epoch": 0.5077386517087359, "grad_norm": 0.42436152696609497, "learning_rate": 4.878453517551722e-05, "loss": 1.874, "step": 4888 }, { "epoch": 0.5078425262283162, "grad_norm": 0.3863707184791565, "learning_rate": 4.876822349093034e-05, "loss": 1.4991, "step": 4889 }, { "epoch": 0.5079464007478965, "grad_norm": 0.40059852600097656, "learning_rate": 4.875191193751802e-05, "loss": 1.5891, "step": 4890 }, { "epoch": 0.5080502752674769, "grad_norm": 0.45580723881721497, "learning_rate": 4.873560051701736e-05, "loss": 1.8611, "step": 4891 }, { "epoch": 0.5081541497870572, "grad_norm": 0.3890886902809143, "learning_rate": 4.871928923116539e-05, "loss": 1.5606, "step": 4892 }, { "epoch": 0.5082580243066376, "grad_norm": 0.42675158381462097, "learning_rate": 4.8702978081699104e-05, "loss": 1.7001, "step": 4893 }, { "epoch": 0.508361898826218, "grad_norm": 0.44091951847076416, "learning_rate": 4.868666707035554e-05, "loss": 1.8272, "step": 4894 }, { "epoch": 0.5084657733457982, "grad_norm": 0.3957894444465637, "learning_rate": 4.86703561988717e-05, "loss": 1.8023, "step": 4895 }, { "epoch": 0.5085696478653786, "grad_norm": 0.37731900811195374, "learning_rate": 4.865404546898454e-05, "loss": 1.6024, "step": 4896 }, { "epoch": 0.508673522384959, "grad_norm": 0.41856059432029724, "learning_rate": 4.863773488243105e-05, "loss": 1.6815, "step": 4897 }, { "epoch": 0.5087773969045393, "grad_norm": 0.45765843987464905, "learning_rate": 4.862142444094816e-05, "loss": 1.8893, "step": 4898 }, { "epoch": 0.5088812714241197, "grad_norm": 0.4164111018180847, "learning_rate": 4.8605114146272824e-05, "loss": 1.6191, "step": 4899 }, { "epoch": 0.5089851459437, "grad_norm": 0.4314156770706177, "learning_rate": 4.858880400014194e-05, "loss": 1.9152, "step": 4900 }, { "epoch": 0.5090890204632803, "grad_norm": 0.42690524458885193, "learning_rate": 4.857249400429244e-05, "loss": 1.787, "step": 4901 }, { "epoch": 0.5091928949828607, "grad_norm": 0.3987329602241516, "learning_rate": 4.8556184160461167e-05, "loss": 1.6278, "step": 4902 }, { "epoch": 0.5092967695024411, "grad_norm": 0.427369624376297, "learning_rate": 4.8539874470385024e-05, "loss": 1.8381, "step": 4903 }, { "epoch": 0.5094006440220213, "grad_norm": 0.39449864625930786, "learning_rate": 4.852356493580088e-05, "loss": 1.7702, "step": 4904 }, { "epoch": 0.5095045185416017, "grad_norm": 0.4443662464618683, "learning_rate": 4.850725555844555e-05, "loss": 1.8187, "step": 4905 }, { "epoch": 0.5096083930611821, "grad_norm": 0.42031386494636536, "learning_rate": 4.8490946340055837e-05, "loss": 1.7612, "step": 4906 }, { "epoch": 0.5097122675807625, "grad_norm": 0.44448190927505493, "learning_rate": 4.8474637282368577e-05, "loss": 1.6873, "step": 4907 }, { "epoch": 0.5098161421003428, "grad_norm": 0.4955507516860962, "learning_rate": 4.8458328387120565e-05, "loss": 2.1091, "step": 4908 }, { "epoch": 0.5099200166199231, "grad_norm": 0.3980197012424469, "learning_rate": 4.844201965604853e-05, "loss": 1.6583, "step": 4909 }, { "epoch": 0.5100238911395035, "grad_norm": 0.41697457432746887, "learning_rate": 4.842571109088927e-05, "loss": 1.5176, "step": 4910 }, { "epoch": 0.5101277656590838, "grad_norm": 0.412379652261734, "learning_rate": 4.840940269337949e-05, "loss": 1.5699, "step": 4911 }, { "epoch": 0.5102316401786642, "grad_norm": 0.420980840921402, "learning_rate": 4.839309446525592e-05, "loss": 1.6266, "step": 4912 }, { "epoch": 0.5103355146982446, "grad_norm": 0.4162856340408325, "learning_rate": 4.837678640825524e-05, "loss": 1.6759, "step": 4913 }, { "epoch": 0.5104393892178248, "grad_norm": 0.4442167580127716, "learning_rate": 4.836047852411417e-05, "loss": 1.7818, "step": 4914 }, { "epoch": 0.5105432637374052, "grad_norm": 0.41404595971107483, "learning_rate": 4.834417081456933e-05, "loss": 1.7876, "step": 4915 }, { "epoch": 0.5106471382569856, "grad_norm": 0.4493081271648407, "learning_rate": 4.8327863281357376e-05, "loss": 1.9106, "step": 4916 }, { "epoch": 0.5107510127765659, "grad_norm": 0.379351407289505, "learning_rate": 4.8311555926214966e-05, "loss": 1.6729, "step": 4917 }, { "epoch": 0.5108548872961463, "grad_norm": 0.40297406911849976, "learning_rate": 4.829524875087867e-05, "loss": 1.7575, "step": 4918 }, { "epoch": 0.5109587618157266, "grad_norm": 0.4127398133277893, "learning_rate": 4.8278941757085063e-05, "loss": 1.5132, "step": 4919 }, { "epoch": 0.5110626363353069, "grad_norm": 0.38783156871795654, "learning_rate": 4.8262634946570764e-05, "loss": 1.6706, "step": 4920 }, { "epoch": 0.5111665108548873, "grad_norm": 0.39855116605758667, "learning_rate": 4.824632832107227e-05, "loss": 1.7722, "step": 4921 }, { "epoch": 0.5112703853744677, "grad_norm": 0.38658004999160767, "learning_rate": 4.8230021882326135e-05, "loss": 1.7226, "step": 4922 }, { "epoch": 0.5113742598940479, "grad_norm": 0.4114481210708618, "learning_rate": 4.821371563206888e-05, "loss": 1.7858, "step": 4923 }, { "epoch": 0.5114781344136283, "grad_norm": 0.3694150745868683, "learning_rate": 4.819740957203696e-05, "loss": 1.4821, "step": 4924 }, { "epoch": 0.5115820089332087, "grad_norm": 0.40538889169692993, "learning_rate": 4.818110370396686e-05, "loss": 1.6467, "step": 4925 }, { "epoch": 0.511685883452789, "grad_norm": 0.4382588863372803, "learning_rate": 4.816479802959503e-05, "loss": 1.6146, "step": 4926 }, { "epoch": 0.5117897579723694, "grad_norm": 0.41980645060539246, "learning_rate": 4.81484925506579e-05, "loss": 1.6783, "step": 4927 }, { "epoch": 0.5118936324919497, "grad_norm": 0.413632333278656, "learning_rate": 4.813218726889186e-05, "loss": 1.749, "step": 4928 }, { "epoch": 0.51199750701153, "grad_norm": 0.40364977717399597, "learning_rate": 4.8115882186033326e-05, "loss": 1.5492, "step": 4929 }, { "epoch": 0.5121013815311104, "grad_norm": 0.3991188406944275, "learning_rate": 4.8099577303818616e-05, "loss": 1.7461, "step": 4930 }, { "epoch": 0.5122052560506908, "grad_norm": 0.4893375337123871, "learning_rate": 4.8083272623984104e-05, "loss": 1.9981, "step": 4931 }, { "epoch": 0.5123091305702712, "grad_norm": 0.40781837701797485, "learning_rate": 4.8066968148266116e-05, "loss": 1.7219, "step": 4932 }, { "epoch": 0.5124130050898514, "grad_norm": 0.39397352933883667, "learning_rate": 4.805066387840096e-05, "loss": 1.6126, "step": 4933 }, { "epoch": 0.5125168796094318, "grad_norm": 0.41246309876441956, "learning_rate": 4.803435981612486e-05, "loss": 1.7436, "step": 4934 }, { "epoch": 0.5126207541290122, "grad_norm": 0.3805597424507141, "learning_rate": 4.801805596317412e-05, "loss": 1.6401, "step": 4935 }, { "epoch": 0.5127246286485925, "grad_norm": 0.404367059469223, "learning_rate": 4.800175232128498e-05, "loss": 1.6504, "step": 4936 }, { "epoch": 0.5128285031681729, "grad_norm": 0.3783699870109558, "learning_rate": 4.79854488921936e-05, "loss": 1.6495, "step": 4937 }, { "epoch": 0.5129323776877532, "grad_norm": 0.48037418723106384, "learning_rate": 4.7969145677636215e-05, "loss": 1.6088, "step": 4938 }, { "epoch": 0.5130362522073335, "grad_norm": 0.4683350622653961, "learning_rate": 4.795284267934898e-05, "loss": 1.8945, "step": 4939 }, { "epoch": 0.5131401267269139, "grad_norm": 0.4192398488521576, "learning_rate": 4.793653989906802e-05, "loss": 1.8459, "step": 4940 }, { "epoch": 0.5132440012464943, "grad_norm": 0.3951408565044403, "learning_rate": 4.792023733852947e-05, "loss": 1.6314, "step": 4941 }, { "epoch": 0.5133478757660745, "grad_norm": 0.38078486919403076, "learning_rate": 4.790393499946943e-05, "loss": 1.6349, "step": 4942 }, { "epoch": 0.5134517502856549, "grad_norm": 0.4152866005897522, "learning_rate": 4.788763288362394e-05, "loss": 1.7195, "step": 4943 }, { "epoch": 0.5135556248052353, "grad_norm": 0.37676650285720825, "learning_rate": 4.787133099272908e-05, "loss": 1.5917, "step": 4944 }, { "epoch": 0.5136594993248156, "grad_norm": 0.3816780149936676, "learning_rate": 4.7855029328520876e-05, "loss": 1.6664, "step": 4945 }, { "epoch": 0.513763373844396, "grad_norm": 0.3960273265838623, "learning_rate": 4.7838727892735315e-05, "loss": 1.6255, "step": 4946 }, { "epoch": 0.5138672483639763, "grad_norm": 0.41314899921417236, "learning_rate": 4.782242668710836e-05, "loss": 1.7316, "step": 4947 }, { "epoch": 0.5139711228835566, "grad_norm": 0.3930761516094208, "learning_rate": 4.7806125713376e-05, "loss": 1.6513, "step": 4948 }, { "epoch": 0.514074997403137, "grad_norm": 0.4532455503940582, "learning_rate": 4.7789824973274114e-05, "loss": 1.6415, "step": 4949 }, { "epoch": 0.5141788719227174, "grad_norm": 0.4238525927066803, "learning_rate": 4.7773524468538624e-05, "loss": 1.8161, "step": 4950 }, { "epoch": 0.5142827464422977, "grad_norm": 0.40888962149620056, "learning_rate": 4.7757224200905444e-05, "loss": 1.7179, "step": 4951 }, { "epoch": 0.514386620961878, "grad_norm": 0.38617759943008423, "learning_rate": 4.774092417211037e-05, "loss": 1.6069, "step": 4952 }, { "epoch": 0.5144904954814584, "grad_norm": 0.41479814052581787, "learning_rate": 4.7724624383889256e-05, "loss": 1.7158, "step": 4953 }, { "epoch": 0.5145943700010387, "grad_norm": 0.38144171237945557, "learning_rate": 4.770832483797789e-05, "loss": 1.6917, "step": 4954 }, { "epoch": 0.5146982445206191, "grad_norm": 0.41436558961868286, "learning_rate": 4.769202553611206e-05, "loss": 1.817, "step": 4955 }, { "epoch": 0.5148021190401995, "grad_norm": 0.43558332324028015, "learning_rate": 4.76757264800275e-05, "loss": 1.7038, "step": 4956 }, { "epoch": 0.5149059935597798, "grad_norm": 0.3971516788005829, "learning_rate": 4.765942767145994e-05, "loss": 1.6663, "step": 4957 }, { "epoch": 0.5150098680793601, "grad_norm": 0.39739498496055603, "learning_rate": 4.764312911214509e-05, "loss": 1.5594, "step": 4958 }, { "epoch": 0.5151137425989405, "grad_norm": 0.408383846282959, "learning_rate": 4.762683080381859e-05, "loss": 1.7743, "step": 4959 }, { "epoch": 0.5152176171185209, "grad_norm": 0.39290642738342285, "learning_rate": 4.761053274821611e-05, "loss": 1.6531, "step": 4960 }, { "epoch": 0.5153214916381011, "grad_norm": 0.46999290585517883, "learning_rate": 4.7594234947073256e-05, "loss": 1.8683, "step": 4961 }, { "epoch": 0.5154253661576815, "grad_norm": 0.40802252292633057, "learning_rate": 4.75779374021256e-05, "loss": 1.5823, "step": 4962 }, { "epoch": 0.5155292406772619, "grad_norm": 0.42017194628715515, "learning_rate": 4.756164011510871e-05, "loss": 1.802, "step": 4963 }, { "epoch": 0.5156331151968422, "grad_norm": 0.4172336161136627, "learning_rate": 4.754534308775816e-05, "loss": 1.7046, "step": 4964 }, { "epoch": 0.5157369897164226, "grad_norm": 0.38955143094062805, "learning_rate": 4.75290463218094e-05, "loss": 1.5606, "step": 4965 }, { "epoch": 0.515840864236003, "grad_norm": 0.47302696108818054, "learning_rate": 4.7512749818997936e-05, "loss": 1.6814, "step": 4966 }, { "epoch": 0.5159447387555832, "grad_norm": 0.40039142966270447, "learning_rate": 4.749645358105923e-05, "loss": 1.6354, "step": 4967 }, { "epoch": 0.5160486132751636, "grad_norm": 0.3926571011543274, "learning_rate": 4.748015760972867e-05, "loss": 1.6332, "step": 4968 }, { "epoch": 0.516152487794744, "grad_norm": 0.4233894348144531, "learning_rate": 4.7463861906741666e-05, "loss": 1.3089, "step": 4969 }, { "epoch": 0.5162563623143243, "grad_norm": 0.3955030143260956, "learning_rate": 4.7447566473833615e-05, "loss": 1.5917, "step": 4970 }, { "epoch": 0.5163602368339046, "grad_norm": 0.3940108120441437, "learning_rate": 4.743127131273979e-05, "loss": 1.7515, "step": 4971 }, { "epoch": 0.516464111353485, "grad_norm": 0.44134819507598877, "learning_rate": 4.7414976425195544e-05, "loss": 1.5085, "step": 4972 }, { "epoch": 0.5165679858730653, "grad_norm": 0.44649553298950195, "learning_rate": 4.739868181293616e-05, "loss": 1.8201, "step": 4973 }, { "epoch": 0.5166718603926457, "grad_norm": 0.4249851703643799, "learning_rate": 4.738238747769687e-05, "loss": 1.7515, "step": 4974 }, { "epoch": 0.5167757349122261, "grad_norm": 0.41049546003341675, "learning_rate": 4.7366093421212884e-05, "loss": 1.6677, "step": 4975 }, { "epoch": 0.5168796094318063, "grad_norm": 0.4584263563156128, "learning_rate": 4.7349799645219415e-05, "loss": 1.7249, "step": 4976 }, { "epoch": 0.5169834839513867, "grad_norm": 0.4268789291381836, "learning_rate": 4.73335061514516e-05, "loss": 1.6639, "step": 4977 }, { "epoch": 0.5170873584709671, "grad_norm": 0.4349389672279358, "learning_rate": 4.731721294164457e-05, "loss": 1.6422, "step": 4978 }, { "epoch": 0.5171912329905474, "grad_norm": 0.43998467922210693, "learning_rate": 4.730092001753346e-05, "loss": 1.8851, "step": 4979 }, { "epoch": 0.5172951075101277, "grad_norm": 0.4167867600917816, "learning_rate": 4.72846273808533e-05, "loss": 1.6397, "step": 4980 }, { "epoch": 0.5173989820297081, "grad_norm": 0.40250730514526367, "learning_rate": 4.726833503333915e-05, "loss": 1.4696, "step": 4981 }, { "epoch": 0.5175028565492885, "grad_norm": 0.44303545355796814, "learning_rate": 4.7252042976726e-05, "loss": 1.814, "step": 4982 }, { "epoch": 0.5176067310688688, "grad_norm": 0.3853762745857239, "learning_rate": 4.723575121274885e-05, "loss": 1.6521, "step": 4983 }, { "epoch": 0.5177106055884492, "grad_norm": 0.39856797456741333, "learning_rate": 4.721945974314263e-05, "loss": 1.6085, "step": 4984 }, { "epoch": 0.5178144801080296, "grad_norm": 0.380560040473938, "learning_rate": 4.720316856964224e-05, "loss": 1.6189, "step": 4985 }, { "epoch": 0.5179183546276098, "grad_norm": 0.4493720531463623, "learning_rate": 4.7186877693982616e-05, "loss": 1.7474, "step": 4986 }, { "epoch": 0.5180222291471902, "grad_norm": 0.38229069113731384, "learning_rate": 4.717058711789855e-05, "loss": 1.5625, "step": 4987 }, { "epoch": 0.5181261036667706, "grad_norm": 0.3856019675731659, "learning_rate": 4.715429684312489e-05, "loss": 1.6004, "step": 4988 }, { "epoch": 0.5182299781863509, "grad_norm": 0.3876418471336365, "learning_rate": 4.713800687139644e-05, "loss": 1.6596, "step": 4989 }, { "epoch": 0.5183338527059312, "grad_norm": 0.41648218035697937, "learning_rate": 4.712171720444791e-05, "loss": 1.8412, "step": 4990 }, { "epoch": 0.5184377272255116, "grad_norm": 0.43493372201919556, "learning_rate": 4.7105427844014046e-05, "loss": 1.8457, "step": 4991 }, { "epoch": 0.5185416017450919, "grad_norm": 0.37969422340393066, "learning_rate": 4.7089138791829554e-05, "loss": 1.6451, "step": 4992 }, { "epoch": 0.5186454762646723, "grad_norm": 0.4082360863685608, "learning_rate": 4.707285004962906e-05, "loss": 1.8016, "step": 4993 }, { "epoch": 0.5187493507842527, "grad_norm": 0.42352405190467834, "learning_rate": 4.70565616191472e-05, "loss": 1.5526, "step": 4994 }, { "epoch": 0.5188532253038329, "grad_norm": 0.3847419321537018, "learning_rate": 4.704027350211859e-05, "loss": 1.6518, "step": 4995 }, { "epoch": 0.5189570998234133, "grad_norm": 0.40276169776916504, "learning_rate": 4.702398570027775e-05, "loss": 1.5955, "step": 4996 }, { "epoch": 0.5190609743429937, "grad_norm": 0.44198933243751526, "learning_rate": 4.700769821535921e-05, "loss": 1.7202, "step": 4997 }, { "epoch": 0.519164848862574, "grad_norm": 0.39979052543640137, "learning_rate": 4.6991411049097494e-05, "loss": 1.6186, "step": 4998 }, { "epoch": 0.5192687233821544, "grad_norm": 0.39757975935935974, "learning_rate": 4.697512420322701e-05, "loss": 1.7077, "step": 4999 }, { "epoch": 0.5193725979017347, "grad_norm": 0.3819902837276459, "learning_rate": 4.69588376794822e-05, "loss": 1.5771, "step": 5000 } ], "logging_steps": 1.0, "max_steps": 9627, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.54256789372928e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }