{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3462340984047922, "eval_steps": 500, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.974445267101927e-05, "grad_norm": 1.5155678328486468, "learning_rate": 2.991325157044571e-09, "loss": 0.8371, "step": 1 }, { "epoch": 0.00017948890534203853, "grad_norm": 1.7181883373178652, "learning_rate": 5.982650314089142e-09, "loss": 0.8976, "step": 2 }, { "epoch": 0.0002692333580130578, "grad_norm": 1.9203570504073555, "learning_rate": 8.973975471133713e-09, "loss": 0.9868, "step": 3 }, { "epoch": 0.00035897781068407707, "grad_norm": 1.7812839944837784, "learning_rate": 1.1965300628178285e-08, "loss": 0.9393, "step": 4 }, { "epoch": 0.0004487222633550964, "grad_norm": 1.7163993153629633, "learning_rate": 1.4956625785222854e-08, "loss": 0.9058, "step": 5 }, { "epoch": 0.0005384667160261156, "grad_norm": 1.9157521538020188, "learning_rate": 1.7947950942267426e-08, "loss": 0.9286, "step": 6 }, { "epoch": 0.0006282111686971349, "grad_norm": 1.5453730186317258, "learning_rate": 2.0939276099311998e-08, "loss": 0.7728, "step": 7 }, { "epoch": 0.0007179556213681541, "grad_norm": 1.7293411372040977, "learning_rate": 2.393060125635657e-08, "loss": 0.9033, "step": 8 }, { "epoch": 0.0008077000740391734, "grad_norm": 1.7530758316683852, "learning_rate": 2.692192641340114e-08, "loss": 0.8896, "step": 9 }, { "epoch": 0.0008974445267101928, "grad_norm": 1.8334042007070164, "learning_rate": 2.991325157044571e-08, "loss": 1.0002, "step": 10 }, { "epoch": 0.000987188979381212, "grad_norm": 1.67761246833091, "learning_rate": 3.290457672749028e-08, "loss": 0.8564, "step": 11 }, { "epoch": 0.0010769334320522312, "grad_norm": 1.770968988304497, "learning_rate": 3.589590188453485e-08, "loss": 0.9614, "step": 12 }, { "epoch": 0.0011666778847232505, "grad_norm": 1.7782858646219546, "learning_rate": 3.8887227041579424e-08, "loss": 0.9359, "step": 13 }, { "epoch": 0.0012564223373942697, "grad_norm": 1.772940724626866, "learning_rate": 4.1878552198623995e-08, "loss": 1.0056, "step": 14 }, { "epoch": 0.001346166790065289, "grad_norm": 1.7116137420316484, "learning_rate": 4.486987735566857e-08, "loss": 0.8909, "step": 15 }, { "epoch": 0.0014359112427363083, "grad_norm": 1.7540605479462088, "learning_rate": 4.786120251271314e-08, "loss": 0.8666, "step": 16 }, { "epoch": 0.0015256556954073275, "grad_norm": 1.6901533223285523, "learning_rate": 5.085252766975771e-08, "loss": 0.8615, "step": 17 }, { "epoch": 0.0016154001480783468, "grad_norm": 1.7955001158796802, "learning_rate": 5.384385282680228e-08, "loss": 0.9388, "step": 18 }, { "epoch": 0.0017051446007493663, "grad_norm": 1.6944617845244543, "learning_rate": 5.683517798384685e-08, "loss": 0.8478, "step": 19 }, { "epoch": 0.0017948890534203856, "grad_norm": 1.7220417659948373, "learning_rate": 5.982650314089142e-08, "loss": 0.8697, "step": 20 }, { "epoch": 0.0018846335060914048, "grad_norm": 1.6340714736832063, "learning_rate": 6.281782829793599e-08, "loss": 0.8516, "step": 21 }, { "epoch": 0.001974377958762424, "grad_norm": 1.7729145934552155, "learning_rate": 6.580915345498056e-08, "loss": 0.9342, "step": 22 }, { "epoch": 0.002064122411433443, "grad_norm": 1.6884402677667407, "learning_rate": 6.880047861202513e-08, "loss": 0.9077, "step": 23 }, { "epoch": 0.0021538668641044624, "grad_norm": 1.8395614934408826, "learning_rate": 7.17918037690697e-08, "loss": 0.8741, "step": 24 }, { "epoch": 0.0022436113167754817, "grad_norm": 1.6095867420088694, "learning_rate": 7.478312892611428e-08, "loss": 0.8393, "step": 25 }, { "epoch": 0.002333355769446501, "grad_norm": 1.8230091830518813, "learning_rate": 7.777445408315885e-08, "loss": 0.9553, "step": 26 }, { "epoch": 0.00242310022211752, "grad_norm": 1.7730671613284572, "learning_rate": 8.076577924020342e-08, "loss": 0.9117, "step": 27 }, { "epoch": 0.0025128446747885395, "grad_norm": 1.6640547720274084, "learning_rate": 8.375710439724799e-08, "loss": 0.8065, "step": 28 }, { "epoch": 0.0026025891274595587, "grad_norm": 1.7695786038984263, "learning_rate": 8.674842955429256e-08, "loss": 0.9453, "step": 29 }, { "epoch": 0.002692333580130578, "grad_norm": 1.7180929279564898, "learning_rate": 8.973975471133713e-08, "loss": 0.9736, "step": 30 }, { "epoch": 0.0027820780328015973, "grad_norm": 1.8339871756875228, "learning_rate": 9.27310798683817e-08, "loss": 1.0162, "step": 31 }, { "epoch": 0.0028718224854726165, "grad_norm": 1.6893085044021907, "learning_rate": 9.572240502542628e-08, "loss": 1.0218, "step": 32 }, { "epoch": 0.002961566938143636, "grad_norm": 1.6264171432578827, "learning_rate": 9.871373018247085e-08, "loss": 0.84, "step": 33 }, { "epoch": 0.003051311390814655, "grad_norm": 1.7185153005835259, "learning_rate": 1.0170505533951542e-07, "loss": 0.8585, "step": 34 }, { "epoch": 0.0031410558434856743, "grad_norm": 1.766598831661545, "learning_rate": 1.0469638049655999e-07, "loss": 0.9095, "step": 35 }, { "epoch": 0.0032308002961566936, "grad_norm": 1.6677336263105458, "learning_rate": 1.0768770565360456e-07, "loss": 0.8262, "step": 36 }, { "epoch": 0.003320544748827713, "grad_norm": 1.638253431623471, "learning_rate": 1.1067903081064913e-07, "loss": 0.8935, "step": 37 }, { "epoch": 0.0034102892014987326, "grad_norm": 1.610856397181512, "learning_rate": 1.136703559676937e-07, "loss": 0.8072, "step": 38 }, { "epoch": 0.003500033654169752, "grad_norm": 1.6981751665151819, "learning_rate": 1.1666168112473828e-07, "loss": 0.9091, "step": 39 }, { "epoch": 0.003589778106840771, "grad_norm": 1.906660715288631, "learning_rate": 1.1965300628178284e-07, "loss": 0.9409, "step": 40 }, { "epoch": 0.0036795225595117904, "grad_norm": 1.6427684005081913, "learning_rate": 1.2264433143882742e-07, "loss": 0.8519, "step": 41 }, { "epoch": 0.0037692670121828096, "grad_norm": 1.6544671410033156, "learning_rate": 1.2563565659587198e-07, "loss": 0.9142, "step": 42 }, { "epoch": 0.003859011464853829, "grad_norm": 1.6725311762971735, "learning_rate": 1.2862698175291656e-07, "loss": 0.8565, "step": 43 }, { "epoch": 0.003948755917524848, "grad_norm": 1.688821933841521, "learning_rate": 1.3161830690996112e-07, "loss": 0.8653, "step": 44 }, { "epoch": 0.0040385003701958674, "grad_norm": 1.804948676564791, "learning_rate": 1.3460963206700568e-07, "loss": 0.9156, "step": 45 }, { "epoch": 0.004128244822866886, "grad_norm": 1.731658891260687, "learning_rate": 1.3760095722405027e-07, "loss": 0.8864, "step": 46 }, { "epoch": 0.004217989275537906, "grad_norm": 1.5209073092607521, "learning_rate": 1.4059228238109482e-07, "loss": 0.802, "step": 47 }, { "epoch": 0.004307733728208925, "grad_norm": 1.8573629443005213, "learning_rate": 1.435836075381394e-07, "loss": 0.9557, "step": 48 }, { "epoch": 0.0043974781808799445, "grad_norm": 1.5429835774389227, "learning_rate": 1.4657493269518397e-07, "loss": 0.8413, "step": 49 }, { "epoch": 0.004487222633550963, "grad_norm": 1.8786057228176092, "learning_rate": 1.4956625785222855e-07, "loss": 0.9505, "step": 50 }, { "epoch": 0.004576967086221983, "grad_norm": 1.6424471779226748, "learning_rate": 1.525575830092731e-07, "loss": 0.8366, "step": 51 }, { "epoch": 0.004666711538893002, "grad_norm": 1.7185647699086615, "learning_rate": 1.555489081663177e-07, "loss": 0.9508, "step": 52 }, { "epoch": 0.004756455991564022, "grad_norm": 1.7714837298345099, "learning_rate": 1.5854023332336228e-07, "loss": 0.968, "step": 53 }, { "epoch": 0.00484620044423504, "grad_norm": 1.8580595597295806, "learning_rate": 1.6153155848040684e-07, "loss": 0.9168, "step": 54 }, { "epoch": 0.00493594489690606, "grad_norm": 1.7298260969951187, "learning_rate": 1.6452288363745142e-07, "loss": 0.83, "step": 55 }, { "epoch": 0.005025689349577079, "grad_norm": 1.7558521558417248, "learning_rate": 1.6751420879449598e-07, "loss": 0.9859, "step": 56 }, { "epoch": 0.005115433802248099, "grad_norm": 1.7065798790445745, "learning_rate": 1.7050553395154057e-07, "loss": 0.9485, "step": 57 }, { "epoch": 0.0052051782549191175, "grad_norm": 1.5755919874232935, "learning_rate": 1.7349685910858512e-07, "loss": 0.893, "step": 58 }, { "epoch": 0.005294922707590137, "grad_norm": 1.628029317152642, "learning_rate": 1.764881842656297e-07, "loss": 0.8139, "step": 59 }, { "epoch": 0.005384667160261156, "grad_norm": 1.7084426104710997, "learning_rate": 1.7947950942267427e-07, "loss": 0.9172, "step": 60 }, { "epoch": 0.005474411612932176, "grad_norm": 1.6447387180446402, "learning_rate": 1.8247083457971883e-07, "loss": 0.9132, "step": 61 }, { "epoch": 0.0055641560656031945, "grad_norm": 1.5744418978367876, "learning_rate": 1.854621597367634e-07, "loss": 0.8252, "step": 62 }, { "epoch": 0.005653900518274214, "grad_norm": 1.6712064351134088, "learning_rate": 1.8845348489380797e-07, "loss": 0.9176, "step": 63 }, { "epoch": 0.005743644970945233, "grad_norm": 1.8398992854546152, "learning_rate": 1.9144481005085255e-07, "loss": 1.0308, "step": 64 }, { "epoch": 0.005833389423616253, "grad_norm": 1.6987837847411595, "learning_rate": 1.944361352078971e-07, "loss": 0.9862, "step": 65 }, { "epoch": 0.005923133876287272, "grad_norm": 1.674158367561109, "learning_rate": 1.974274603649417e-07, "loss": 0.9068, "step": 66 }, { "epoch": 0.006012878328958291, "grad_norm": 1.5504106988925905, "learning_rate": 2.0041878552198625e-07, "loss": 0.9574, "step": 67 }, { "epoch": 0.00610262278162931, "grad_norm": 1.8009594566316924, "learning_rate": 2.0341011067903084e-07, "loss": 0.982, "step": 68 }, { "epoch": 0.00619236723430033, "grad_norm": 1.7614007948420731, "learning_rate": 2.064014358360754e-07, "loss": 0.8913, "step": 69 }, { "epoch": 0.006282111686971349, "grad_norm": 1.5499699558233961, "learning_rate": 2.0939276099311998e-07, "loss": 0.8948, "step": 70 }, { "epoch": 0.006371856139642368, "grad_norm": 1.6999895737950574, "learning_rate": 2.1238408615016454e-07, "loss": 1.0085, "step": 71 }, { "epoch": 0.006461600592313387, "grad_norm": 1.5408699267839847, "learning_rate": 2.1537541130720913e-07, "loss": 0.7923, "step": 72 }, { "epoch": 0.006551345044984407, "grad_norm": 1.7033043243042532, "learning_rate": 2.1836673646425368e-07, "loss": 0.8909, "step": 73 }, { "epoch": 0.006641089497655426, "grad_norm": 1.5803798972745848, "learning_rate": 2.2135806162129827e-07, "loss": 0.9015, "step": 74 }, { "epoch": 0.0067308339503264454, "grad_norm": 1.6454242929453395, "learning_rate": 2.2434938677834283e-07, "loss": 0.9582, "step": 75 }, { "epoch": 0.006820578402997465, "grad_norm": 1.646887273969796, "learning_rate": 2.273407119353874e-07, "loss": 0.8552, "step": 76 }, { "epoch": 0.006910322855668484, "grad_norm": 1.4809752932709817, "learning_rate": 2.3033203709243197e-07, "loss": 0.8357, "step": 77 }, { "epoch": 0.007000067308339504, "grad_norm": 1.654507072848421, "learning_rate": 2.3332336224947655e-07, "loss": 0.9506, "step": 78 }, { "epoch": 0.0070898117610105225, "grad_norm": 1.732300308384692, "learning_rate": 2.363146874065211e-07, "loss": 0.9482, "step": 79 }, { "epoch": 0.007179556213681542, "grad_norm": 1.5004893096754606, "learning_rate": 2.3930601256356567e-07, "loss": 0.847, "step": 80 }, { "epoch": 0.007269300666352561, "grad_norm": 1.5629763486248873, "learning_rate": 2.4229733772061023e-07, "loss": 0.8355, "step": 81 }, { "epoch": 0.007359045119023581, "grad_norm": 1.458297834004058, "learning_rate": 2.4528866287765484e-07, "loss": 0.7781, "step": 82 }, { "epoch": 0.0074487895716946, "grad_norm": 1.536206381802481, "learning_rate": 2.482799880346994e-07, "loss": 0.949, "step": 83 }, { "epoch": 0.007538534024365619, "grad_norm": 1.517590744877931, "learning_rate": 2.5127131319174396e-07, "loss": 0.8342, "step": 84 }, { "epoch": 0.007628278477036638, "grad_norm": 1.5675981858948325, "learning_rate": 2.542626383487885e-07, "loss": 0.8261, "step": 85 }, { "epoch": 0.007718022929707658, "grad_norm": 1.6089296605788568, "learning_rate": 2.5725396350583313e-07, "loss": 0.8356, "step": 86 }, { "epoch": 0.007807767382378677, "grad_norm": 1.402904346662976, "learning_rate": 2.602452886628777e-07, "loss": 0.8399, "step": 87 }, { "epoch": 0.007897511835049695, "grad_norm": 1.6749914438790385, "learning_rate": 2.6323661381992224e-07, "loss": 0.9317, "step": 88 }, { "epoch": 0.007987256287720715, "grad_norm": 1.5112424976496, "learning_rate": 2.662279389769668e-07, "loss": 0.8676, "step": 89 }, { "epoch": 0.008077000740391735, "grad_norm": 1.6763048243106224, "learning_rate": 2.6921926413401136e-07, "loss": 0.9448, "step": 90 }, { "epoch": 0.008166745193062755, "grad_norm": 1.7225279532188673, "learning_rate": 2.7221058929105597e-07, "loss": 1.0075, "step": 91 }, { "epoch": 0.008256489645733773, "grad_norm": 1.4692346128311606, "learning_rate": 2.7520191444810053e-07, "loss": 0.8912, "step": 92 }, { "epoch": 0.008346234098404792, "grad_norm": 1.6968488400641657, "learning_rate": 2.781932396051451e-07, "loss": 0.9304, "step": 93 }, { "epoch": 0.008435978551075812, "grad_norm": 1.4725898341356152, "learning_rate": 2.8118456476218965e-07, "loss": 0.872, "step": 94 }, { "epoch": 0.008525723003746832, "grad_norm": 1.6756381996838332, "learning_rate": 2.8417588991923426e-07, "loss": 1.0352, "step": 95 }, { "epoch": 0.00861546745641785, "grad_norm": 1.4005396750717478, "learning_rate": 2.871672150762788e-07, "loss": 0.7603, "step": 96 }, { "epoch": 0.00870521190908887, "grad_norm": 1.5472998069868351, "learning_rate": 2.901585402333234e-07, "loss": 0.8792, "step": 97 }, { "epoch": 0.008794956361759889, "grad_norm": 1.4929850208409428, "learning_rate": 2.9314986539036793e-07, "loss": 0.9412, "step": 98 }, { "epoch": 0.008884700814430909, "grad_norm": 1.5266693945188585, "learning_rate": 2.9614119054741254e-07, "loss": 0.9102, "step": 99 }, { "epoch": 0.008974445267101927, "grad_norm": 1.5937379743860263, "learning_rate": 2.991325157044571e-07, "loss": 0.8706, "step": 100 }, { "epoch": 0.009064189719772946, "grad_norm": 1.482446454134345, "learning_rate": 3.0212384086150166e-07, "loss": 0.9312, "step": 101 }, { "epoch": 0.009153934172443966, "grad_norm": 1.513818047248909, "learning_rate": 3.051151660185462e-07, "loss": 0.8654, "step": 102 }, { "epoch": 0.009243678625114986, "grad_norm": 1.5948611420460796, "learning_rate": 3.0810649117559083e-07, "loss": 0.9324, "step": 103 }, { "epoch": 0.009333423077786004, "grad_norm": 1.5573671882421232, "learning_rate": 3.110978163326354e-07, "loss": 0.9342, "step": 104 }, { "epoch": 0.009423167530457023, "grad_norm": 1.4989597889447484, "learning_rate": 3.1408914148967995e-07, "loss": 0.9351, "step": 105 }, { "epoch": 0.009512911983128043, "grad_norm": 1.5044753769354242, "learning_rate": 3.1708046664672456e-07, "loss": 0.9643, "step": 106 }, { "epoch": 0.009602656435799063, "grad_norm": 1.324162974391703, "learning_rate": 3.2007179180376906e-07, "loss": 0.8328, "step": 107 }, { "epoch": 0.00969240088847008, "grad_norm": 1.3415855354766937, "learning_rate": 3.230631169608137e-07, "loss": 0.8611, "step": 108 }, { "epoch": 0.0097821453411411, "grad_norm": 1.3200191202613032, "learning_rate": 3.2605444211785823e-07, "loss": 0.8582, "step": 109 }, { "epoch": 0.00987188979381212, "grad_norm": 1.2762241061986597, "learning_rate": 3.2904576727490284e-07, "loss": 0.9269, "step": 110 }, { "epoch": 0.00996163424648314, "grad_norm": 1.3267111205209063, "learning_rate": 3.3203709243194735e-07, "loss": 0.8995, "step": 111 }, { "epoch": 0.010051378699154158, "grad_norm": 1.394195010897112, "learning_rate": 3.3502841758899196e-07, "loss": 0.8611, "step": 112 }, { "epoch": 0.010141123151825178, "grad_norm": 1.2693307744251354, "learning_rate": 3.380197427460365e-07, "loss": 0.8977, "step": 113 }, { "epoch": 0.010230867604496197, "grad_norm": 1.2540417767338194, "learning_rate": 3.4101106790308113e-07, "loss": 0.8299, "step": 114 }, { "epoch": 0.010320612057167217, "grad_norm": 1.3311054535015616, "learning_rate": 3.4400239306012564e-07, "loss": 0.9474, "step": 115 }, { "epoch": 0.010410356509838235, "grad_norm": 1.3642362927801857, "learning_rate": 3.4699371821717025e-07, "loss": 0.9341, "step": 116 }, { "epoch": 0.010500100962509255, "grad_norm": 1.3154820719161504, "learning_rate": 3.499850433742148e-07, "loss": 0.8822, "step": 117 }, { "epoch": 0.010589845415180274, "grad_norm": 1.3218031790027716, "learning_rate": 3.529763685312594e-07, "loss": 0.9302, "step": 118 }, { "epoch": 0.010679589867851294, "grad_norm": 1.3135228407656188, "learning_rate": 3.559676936883039e-07, "loss": 0.8909, "step": 119 }, { "epoch": 0.010769334320522312, "grad_norm": 1.2157806415217913, "learning_rate": 3.5895901884534853e-07, "loss": 0.8537, "step": 120 }, { "epoch": 0.010859078773193332, "grad_norm": 1.306425954397161, "learning_rate": 3.619503440023931e-07, "loss": 0.9827, "step": 121 }, { "epoch": 0.010948823225864351, "grad_norm": 1.1594849871397779, "learning_rate": 3.6494166915943765e-07, "loss": 0.8371, "step": 122 }, { "epoch": 0.011038567678535371, "grad_norm": 1.1480143189422936, "learning_rate": 3.679329943164822e-07, "loss": 0.8222, "step": 123 }, { "epoch": 0.011128312131206389, "grad_norm": 1.267601504170374, "learning_rate": 3.709243194735268e-07, "loss": 0.835, "step": 124 }, { "epoch": 0.011218056583877409, "grad_norm": 1.2702350383802161, "learning_rate": 3.739156446305714e-07, "loss": 0.8779, "step": 125 }, { "epoch": 0.011307801036548428, "grad_norm": 1.2086780604873175, "learning_rate": 3.7690696978761594e-07, "loss": 0.8245, "step": 126 }, { "epoch": 0.011397545489219448, "grad_norm": 1.2041347549193315, "learning_rate": 3.798982949446605e-07, "loss": 0.8704, "step": 127 }, { "epoch": 0.011487289941890466, "grad_norm": 1.1358786109440961, "learning_rate": 3.828896201017051e-07, "loss": 0.7459, "step": 128 }, { "epoch": 0.011577034394561486, "grad_norm": 1.1009214284915103, "learning_rate": 3.8588094525874966e-07, "loss": 0.7828, "step": 129 }, { "epoch": 0.011666778847232506, "grad_norm": 1.186687035768274, "learning_rate": 3.888722704157942e-07, "loss": 0.8833, "step": 130 }, { "epoch": 0.011756523299903525, "grad_norm": 1.2570839537612892, "learning_rate": 3.918635955728388e-07, "loss": 0.916, "step": 131 }, { "epoch": 0.011846267752574543, "grad_norm": 1.1080835233748902, "learning_rate": 3.948549207298834e-07, "loss": 0.8351, "step": 132 }, { "epoch": 0.011936012205245563, "grad_norm": 1.1628457304817346, "learning_rate": 3.9784624588692795e-07, "loss": 0.8181, "step": 133 }, { "epoch": 0.012025756657916583, "grad_norm": 1.2364893280656972, "learning_rate": 4.008375710439725e-07, "loss": 0.9789, "step": 134 }, { "epoch": 0.012115501110587602, "grad_norm": 1.3009529078827258, "learning_rate": 4.0382889620101707e-07, "loss": 0.8501, "step": 135 }, { "epoch": 0.01220524556325862, "grad_norm": 1.1356591020454743, "learning_rate": 4.068202213580617e-07, "loss": 0.8585, "step": 136 }, { "epoch": 0.01229499001592964, "grad_norm": 1.0547789282433464, "learning_rate": 4.0981154651510624e-07, "loss": 0.7755, "step": 137 }, { "epoch": 0.01238473446860066, "grad_norm": 1.1660996286047378, "learning_rate": 4.128028716721508e-07, "loss": 0.8796, "step": 138 }, { "epoch": 0.01247447892127168, "grad_norm": 1.1244261681630603, "learning_rate": 4.1579419682919535e-07, "loss": 0.8118, "step": 139 }, { "epoch": 0.012564223373942697, "grad_norm": 1.1793010753387425, "learning_rate": 4.1878552198623996e-07, "loss": 0.8167, "step": 140 }, { "epoch": 0.012653967826613717, "grad_norm": 1.1590038302109078, "learning_rate": 4.217768471432845e-07, "loss": 0.8816, "step": 141 }, { "epoch": 0.012743712279284737, "grad_norm": 1.1547221972792576, "learning_rate": 4.247681723003291e-07, "loss": 0.8578, "step": 142 }, { "epoch": 0.012833456731955756, "grad_norm": 1.1266018775220188, "learning_rate": 4.2775949745737364e-07, "loss": 0.8669, "step": 143 }, { "epoch": 0.012923201184626774, "grad_norm": 1.1833338383643115, "learning_rate": 4.3075082261441825e-07, "loss": 0.9595, "step": 144 }, { "epoch": 0.013012945637297794, "grad_norm": 1.1501314685954813, "learning_rate": 4.337421477714628e-07, "loss": 0.8971, "step": 145 }, { "epoch": 0.013102690089968814, "grad_norm": 1.0530589903643184, "learning_rate": 4.3673347292850737e-07, "loss": 0.7923, "step": 146 }, { "epoch": 0.013192434542639834, "grad_norm": 1.2201895613993357, "learning_rate": 4.397247980855519e-07, "loss": 0.9083, "step": 147 }, { "epoch": 0.013282178995310851, "grad_norm": 1.1003006037265295, "learning_rate": 4.4271612324259654e-07, "loss": 0.8106, "step": 148 }, { "epoch": 0.013371923447981871, "grad_norm": 1.0211507856878574, "learning_rate": 4.4570744839964104e-07, "loss": 0.837, "step": 149 }, { "epoch": 0.013461667900652891, "grad_norm": 1.1273804609330593, "learning_rate": 4.4869877355668565e-07, "loss": 0.8737, "step": 150 }, { "epoch": 0.01355141235332391, "grad_norm": 0.9708691471603329, "learning_rate": 4.516900987137302e-07, "loss": 0.8129, "step": 151 }, { "epoch": 0.01364115680599493, "grad_norm": 1.1314257602589868, "learning_rate": 4.546814238707748e-07, "loss": 0.7994, "step": 152 }, { "epoch": 0.013730901258665948, "grad_norm": 1.0130538105676437, "learning_rate": 4.5767274902781933e-07, "loss": 0.8601, "step": 153 }, { "epoch": 0.013820645711336968, "grad_norm": 1.0390973767445517, "learning_rate": 4.6066407418486394e-07, "loss": 0.8345, "step": 154 }, { "epoch": 0.013910390164007988, "grad_norm": 1.026919103223306, "learning_rate": 4.636553993419085e-07, "loss": 0.8584, "step": 155 }, { "epoch": 0.014000134616679007, "grad_norm": 1.0326063702001012, "learning_rate": 4.666467244989531e-07, "loss": 0.7812, "step": 156 }, { "epoch": 0.014089879069350025, "grad_norm": 1.0127731775821587, "learning_rate": 4.696380496559976e-07, "loss": 0.8147, "step": 157 }, { "epoch": 0.014179623522021045, "grad_norm": 0.9564286008826473, "learning_rate": 4.726293748130422e-07, "loss": 0.7763, "step": 158 }, { "epoch": 0.014269367974692065, "grad_norm": 0.952732401371976, "learning_rate": 4.756206999700868e-07, "loss": 0.774, "step": 159 }, { "epoch": 0.014359112427363084, "grad_norm": 0.9990733208718338, "learning_rate": 4.786120251271313e-07, "loss": 0.8079, "step": 160 }, { "epoch": 0.014448856880034102, "grad_norm": 1.1057121507841399, "learning_rate": 4.816033502841759e-07, "loss": 0.9181, "step": 161 }, { "epoch": 0.014538601332705122, "grad_norm": 1.0232637591795726, "learning_rate": 4.845946754412205e-07, "loss": 0.7698, "step": 162 }, { "epoch": 0.014628345785376142, "grad_norm": 0.9800408769491525, "learning_rate": 4.875860005982651e-07, "loss": 0.8299, "step": 163 }, { "epoch": 0.014718090238047161, "grad_norm": 0.9962182346984809, "learning_rate": 4.905773257553097e-07, "loss": 0.8205, "step": 164 }, { "epoch": 0.01480783469071818, "grad_norm": 1.0406301762347383, "learning_rate": 4.935686509123542e-07, "loss": 0.8346, "step": 165 }, { "epoch": 0.0148975791433892, "grad_norm": 1.051156386112451, "learning_rate": 4.965599760693988e-07, "loss": 0.7668, "step": 166 }, { "epoch": 0.014987323596060219, "grad_norm": 0.9726266557963344, "learning_rate": 4.995513012264434e-07, "loss": 0.7962, "step": 167 }, { "epoch": 0.015077068048731239, "grad_norm": 0.9984055455228369, "learning_rate": 5.025426263834879e-07, "loss": 0.7693, "step": 168 }, { "epoch": 0.015166812501402257, "grad_norm": 0.9660352197311968, "learning_rate": 5.055339515405325e-07, "loss": 0.8063, "step": 169 }, { "epoch": 0.015256556954073276, "grad_norm": 0.9313369674643367, "learning_rate": 5.08525276697577e-07, "loss": 0.7743, "step": 170 }, { "epoch": 0.015346301406744296, "grad_norm": 0.9092690586929456, "learning_rate": 5.115166018546217e-07, "loss": 0.7921, "step": 171 }, { "epoch": 0.015436045859415316, "grad_norm": 1.002227303005333, "learning_rate": 5.145079270116663e-07, "loss": 0.8153, "step": 172 }, { "epoch": 0.015525790312086334, "grad_norm": 1.0086628826046422, "learning_rate": 5.174992521687108e-07, "loss": 0.9293, "step": 173 }, { "epoch": 0.015615534764757353, "grad_norm": 0.9636783617627634, "learning_rate": 5.204905773257554e-07, "loss": 0.8443, "step": 174 }, { "epoch": 0.015705279217428373, "grad_norm": 0.9318582316361553, "learning_rate": 5.234819024827999e-07, "loss": 0.8011, "step": 175 }, { "epoch": 0.01579502367009939, "grad_norm": 1.02870050516614, "learning_rate": 5.264732276398445e-07, "loss": 0.836, "step": 176 }, { "epoch": 0.015884768122770412, "grad_norm": 1.0158455203613335, "learning_rate": 5.29464552796889e-07, "loss": 0.8823, "step": 177 }, { "epoch": 0.01597451257544143, "grad_norm": 0.9241571022522631, "learning_rate": 5.324558779539336e-07, "loss": 0.7945, "step": 178 }, { "epoch": 0.01606425702811245, "grad_norm": 1.0023632048830557, "learning_rate": 5.354472031109783e-07, "loss": 0.8339, "step": 179 }, { "epoch": 0.01615400148078347, "grad_norm": 1.0225421090401983, "learning_rate": 5.384385282680227e-07, "loss": 0.8795, "step": 180 }, { "epoch": 0.016243745933454488, "grad_norm": 0.9122656836100355, "learning_rate": 5.414298534250674e-07, "loss": 0.8019, "step": 181 }, { "epoch": 0.01633349038612551, "grad_norm": 0.9186069067199923, "learning_rate": 5.444211785821119e-07, "loss": 0.7291, "step": 182 }, { "epoch": 0.016423234838796527, "grad_norm": 0.893042966893392, "learning_rate": 5.474125037391565e-07, "loss": 0.7889, "step": 183 }, { "epoch": 0.016512979291467545, "grad_norm": 0.945903858669602, "learning_rate": 5.504038288962011e-07, "loss": 0.819, "step": 184 }, { "epoch": 0.016602723744138567, "grad_norm": 1.0598408194678919, "learning_rate": 5.533951540532456e-07, "loss": 0.9592, "step": 185 }, { "epoch": 0.016692468196809584, "grad_norm": 0.888993252566139, "learning_rate": 5.563864792102902e-07, "loss": 0.7946, "step": 186 }, { "epoch": 0.016782212649480602, "grad_norm": 0.902290529458392, "learning_rate": 5.593778043673348e-07, "loss": 0.8039, "step": 187 }, { "epoch": 0.016871957102151624, "grad_norm": 0.887537987388707, "learning_rate": 5.623691295243793e-07, "loss": 0.7523, "step": 188 }, { "epoch": 0.016961701554822642, "grad_norm": 0.888589059652769, "learning_rate": 5.65360454681424e-07, "loss": 0.8198, "step": 189 }, { "epoch": 0.017051446007493663, "grad_norm": 0.8914814150339354, "learning_rate": 5.683517798384685e-07, "loss": 0.8335, "step": 190 }, { "epoch": 0.01714119046016468, "grad_norm": 0.9153263593421326, "learning_rate": 5.713431049955131e-07, "loss": 0.8285, "step": 191 }, { "epoch": 0.0172309349128357, "grad_norm": 0.9021161285630993, "learning_rate": 5.743344301525576e-07, "loss": 0.7775, "step": 192 }, { "epoch": 0.01732067936550672, "grad_norm": 0.9065998365551313, "learning_rate": 5.773257553096022e-07, "loss": 0.791, "step": 193 }, { "epoch": 0.01741042381817774, "grad_norm": 0.8430803976609558, "learning_rate": 5.803170804666467e-07, "loss": 0.7888, "step": 194 }, { "epoch": 0.017500168270848757, "grad_norm": 0.8874890271033449, "learning_rate": 5.833084056236914e-07, "loss": 0.7951, "step": 195 }, { "epoch": 0.017589912723519778, "grad_norm": 0.9647799258308127, "learning_rate": 5.862997307807359e-07, "loss": 0.8907, "step": 196 }, { "epoch": 0.017679657176190796, "grad_norm": 0.8568800105240527, "learning_rate": 5.892910559377805e-07, "loss": 0.7869, "step": 197 }, { "epoch": 0.017769401628861817, "grad_norm": 0.8997928519172499, "learning_rate": 5.922823810948251e-07, "loss": 0.7567, "step": 198 }, { "epoch": 0.017859146081532835, "grad_norm": 0.8432013399881791, "learning_rate": 5.952737062518696e-07, "loss": 0.7872, "step": 199 }, { "epoch": 0.017948890534203853, "grad_norm": 0.9049393873533542, "learning_rate": 5.982650314089142e-07, "loss": 0.849, "step": 200 }, { "epoch": 0.018038634986874875, "grad_norm": 0.8655093799383041, "learning_rate": 6.012563565659588e-07, "loss": 0.7666, "step": 201 }, { "epoch": 0.018128379439545893, "grad_norm": 0.9035970183208791, "learning_rate": 6.042476817230033e-07, "loss": 0.8018, "step": 202 }, { "epoch": 0.01821812389221691, "grad_norm": 0.804925470362472, "learning_rate": 6.07239006880048e-07, "loss": 0.7526, "step": 203 }, { "epoch": 0.018307868344887932, "grad_norm": 0.8274708288373038, "learning_rate": 6.102303320370924e-07, "loss": 0.7765, "step": 204 }, { "epoch": 0.01839761279755895, "grad_norm": 0.8064837120147703, "learning_rate": 6.132216571941371e-07, "loss": 0.8074, "step": 205 }, { "epoch": 0.01848735725022997, "grad_norm": 0.7858879220440833, "learning_rate": 6.162129823511817e-07, "loss": 0.8439, "step": 206 }, { "epoch": 0.01857710170290099, "grad_norm": 0.7813750592679553, "learning_rate": 6.192043075082262e-07, "loss": 0.7758, "step": 207 }, { "epoch": 0.018666846155572007, "grad_norm": 0.8657268126416817, "learning_rate": 6.221956326652708e-07, "loss": 0.7855, "step": 208 }, { "epoch": 0.01875659060824303, "grad_norm": 0.8450780430389939, "learning_rate": 6.251869578223153e-07, "loss": 0.8741, "step": 209 }, { "epoch": 0.018846335060914047, "grad_norm": 0.7777771768297495, "learning_rate": 6.281782829793599e-07, "loss": 0.8022, "step": 210 }, { "epoch": 0.018936079513585065, "grad_norm": 0.7781877080189799, "learning_rate": 6.311696081364046e-07, "loss": 0.7725, "step": 211 }, { "epoch": 0.019025823966256086, "grad_norm": 0.7851100302919818, "learning_rate": 6.341609332934491e-07, "loss": 0.7419, "step": 212 }, { "epoch": 0.019115568418927104, "grad_norm": 0.7186347490357212, "learning_rate": 6.371522584504936e-07, "loss": 0.6898, "step": 213 }, { "epoch": 0.019205312871598126, "grad_norm": 0.7160100171230402, "learning_rate": 6.401435836075381e-07, "loss": 0.7185, "step": 214 }, { "epoch": 0.019295057324269144, "grad_norm": 0.7283431137905568, "learning_rate": 6.431349087645827e-07, "loss": 0.7863, "step": 215 }, { "epoch": 0.01938480177694016, "grad_norm": 0.6908963028312556, "learning_rate": 6.461262339216274e-07, "loss": 0.723, "step": 216 }, { "epoch": 0.019474546229611183, "grad_norm": 0.7515989846004073, "learning_rate": 6.491175590786719e-07, "loss": 0.8003, "step": 217 }, { "epoch": 0.0195642906822822, "grad_norm": 0.7794523614395956, "learning_rate": 6.521088842357165e-07, "loss": 0.8036, "step": 218 }, { "epoch": 0.01965403513495322, "grad_norm": 0.7360733802354836, "learning_rate": 6.551002093927611e-07, "loss": 0.7587, "step": 219 }, { "epoch": 0.01974377958762424, "grad_norm": 0.7495797669288589, "learning_rate": 6.580915345498057e-07, "loss": 0.7466, "step": 220 }, { "epoch": 0.01983352404029526, "grad_norm": 0.6546194848536384, "learning_rate": 6.610828597068501e-07, "loss": 0.6992, "step": 221 }, { "epoch": 0.01992326849296628, "grad_norm": 0.7593274458362836, "learning_rate": 6.640741848638947e-07, "loss": 0.7589, "step": 222 }, { "epoch": 0.020013012945637298, "grad_norm": 0.7065359686997408, "learning_rate": 6.670655100209393e-07, "loss": 0.7224, "step": 223 }, { "epoch": 0.020102757398308316, "grad_norm": 0.7844655613535966, "learning_rate": 6.700568351779839e-07, "loss": 0.8591, "step": 224 }, { "epoch": 0.020192501850979337, "grad_norm": 0.7518719090244234, "learning_rate": 6.730481603350285e-07, "loss": 0.8591, "step": 225 }, { "epoch": 0.020282246303650355, "grad_norm": 0.6603723824178934, "learning_rate": 6.76039485492073e-07, "loss": 0.6938, "step": 226 }, { "epoch": 0.020371990756321377, "grad_norm": 0.6776970283526947, "learning_rate": 6.790308106491177e-07, "loss": 0.7841, "step": 227 }, { "epoch": 0.020461735208992395, "grad_norm": 0.7383118918485242, "learning_rate": 6.820221358061623e-07, "loss": 0.8143, "step": 228 }, { "epoch": 0.020551479661663413, "grad_norm": 0.6587639809078495, "learning_rate": 6.850134609632067e-07, "loss": 0.7267, "step": 229 }, { "epoch": 0.020641224114334434, "grad_norm": 0.7015401102041243, "learning_rate": 6.880047861202513e-07, "loss": 0.7091, "step": 230 }, { "epoch": 0.020730968567005452, "grad_norm": 0.7216801891401597, "learning_rate": 6.909961112772958e-07, "loss": 0.7881, "step": 231 }, { "epoch": 0.02082071301967647, "grad_norm": 0.6973407722323502, "learning_rate": 6.939874364343405e-07, "loss": 0.7573, "step": 232 }, { "epoch": 0.02091045747234749, "grad_norm": 0.6428040264291077, "learning_rate": 6.969787615913851e-07, "loss": 0.7142, "step": 233 }, { "epoch": 0.02100020192501851, "grad_norm": 0.6448186467059078, "learning_rate": 6.999700867484296e-07, "loss": 0.7288, "step": 234 }, { "epoch": 0.02108994637768953, "grad_norm": 0.7074202614368515, "learning_rate": 7.029614119054743e-07, "loss": 0.7899, "step": 235 }, { "epoch": 0.02117969083036055, "grad_norm": 0.7367061443901816, "learning_rate": 7.059527370625188e-07, "loss": 0.7287, "step": 236 }, { "epoch": 0.021269435283031567, "grad_norm": 0.7288595236823887, "learning_rate": 7.089440622195633e-07, "loss": 0.8388, "step": 237 }, { "epoch": 0.021359179735702588, "grad_norm": 0.771313867169602, "learning_rate": 7.119353873766078e-07, "loss": 0.8347, "step": 238 }, { "epoch": 0.021448924188373606, "grad_norm": 0.6880616644123125, "learning_rate": 7.149267125336524e-07, "loss": 0.7504, "step": 239 }, { "epoch": 0.021538668641044624, "grad_norm": 0.7071577807616488, "learning_rate": 7.179180376906971e-07, "loss": 0.7661, "step": 240 }, { "epoch": 0.021628413093715645, "grad_norm": 0.7460844260847108, "learning_rate": 7.209093628477416e-07, "loss": 0.7922, "step": 241 }, { "epoch": 0.021718157546386663, "grad_norm": 0.7082300828487278, "learning_rate": 7.239006880047862e-07, "loss": 0.7675, "step": 242 }, { "epoch": 0.021807901999057685, "grad_norm": 0.7224019871497739, "learning_rate": 7.268920131618308e-07, "loss": 0.8028, "step": 243 }, { "epoch": 0.021897646451728703, "grad_norm": 0.6419523570168398, "learning_rate": 7.298833383188753e-07, "loss": 0.7345, "step": 244 }, { "epoch": 0.02198739090439972, "grad_norm": 0.6945948301305904, "learning_rate": 7.328746634759199e-07, "loss": 0.7725, "step": 245 }, { "epoch": 0.022077135357070742, "grad_norm": 0.6196324313726417, "learning_rate": 7.358659886329644e-07, "loss": 0.7031, "step": 246 }, { "epoch": 0.02216687980974176, "grad_norm": 0.658100506207315, "learning_rate": 7.38857313790009e-07, "loss": 0.7084, "step": 247 }, { "epoch": 0.022256624262412778, "grad_norm": 0.6483238890924903, "learning_rate": 7.418486389470536e-07, "loss": 0.7367, "step": 248 }, { "epoch": 0.0223463687150838, "grad_norm": 0.7594632274624502, "learning_rate": 7.448399641040982e-07, "loss": 0.9171, "step": 249 }, { "epoch": 0.022436113167754818, "grad_norm": 0.6337318654210855, "learning_rate": 7.478312892611428e-07, "loss": 0.743, "step": 250 }, { "epoch": 0.02252585762042584, "grad_norm": 0.7028097425658985, "learning_rate": 7.508226144181874e-07, "loss": 0.7105, "step": 251 }, { "epoch": 0.022615602073096857, "grad_norm": 0.6967625060240026, "learning_rate": 7.538139395752319e-07, "loss": 0.8312, "step": 252 }, { "epoch": 0.022705346525767875, "grad_norm": 0.6034126886407329, "learning_rate": 7.568052647322764e-07, "loss": 0.6989, "step": 253 }, { "epoch": 0.022795090978438896, "grad_norm": 0.6993402725308835, "learning_rate": 7.59796589889321e-07, "loss": 0.8154, "step": 254 }, { "epoch": 0.022884835431109914, "grad_norm": 0.6553262418238092, "learning_rate": 7.627879150463655e-07, "loss": 0.6921, "step": 255 }, { "epoch": 0.022974579883780932, "grad_norm": 0.6367918525220149, "learning_rate": 7.657792402034102e-07, "loss": 0.7162, "step": 256 }, { "epoch": 0.023064324336451954, "grad_norm": 0.6415569929301651, "learning_rate": 7.687705653604548e-07, "loss": 0.7763, "step": 257 }, { "epoch": 0.02315406878912297, "grad_norm": 0.6158952510024099, "learning_rate": 7.717618905174993e-07, "loss": 0.6857, "step": 258 }, { "epoch": 0.023243813241793993, "grad_norm": 0.6173952411213295, "learning_rate": 7.74753215674544e-07, "loss": 0.709, "step": 259 }, { "epoch": 0.02333355769446501, "grad_norm": 0.6580356392916601, "learning_rate": 7.777445408315884e-07, "loss": 0.7212, "step": 260 }, { "epoch": 0.02342330214713603, "grad_norm": 0.681634077193173, "learning_rate": 7.80735865988633e-07, "loss": 0.7671, "step": 261 }, { "epoch": 0.02351304659980705, "grad_norm": 0.6319101924684586, "learning_rate": 7.837271911456776e-07, "loss": 0.7041, "step": 262 }, { "epoch": 0.02360279105247807, "grad_norm": 0.6391894022248645, "learning_rate": 7.867185163027221e-07, "loss": 0.7645, "step": 263 }, { "epoch": 0.023692535505149086, "grad_norm": 0.6367655830981118, "learning_rate": 7.897098414597668e-07, "loss": 0.7844, "step": 264 }, { "epoch": 0.023782279957820108, "grad_norm": 0.6227091274462602, "learning_rate": 7.927011666168113e-07, "loss": 0.7024, "step": 265 }, { "epoch": 0.023872024410491126, "grad_norm": 0.6460514128323845, "learning_rate": 7.956924917738559e-07, "loss": 0.7778, "step": 266 }, { "epoch": 0.023961768863162147, "grad_norm": 0.630097427859735, "learning_rate": 7.986838169309004e-07, "loss": 0.755, "step": 267 }, { "epoch": 0.024051513315833165, "grad_norm": 0.6184466377539789, "learning_rate": 8.01675142087945e-07, "loss": 0.7465, "step": 268 }, { "epoch": 0.024141257768504183, "grad_norm": 0.6183115345662207, "learning_rate": 8.046664672449896e-07, "loss": 0.6841, "step": 269 }, { "epoch": 0.024231002221175205, "grad_norm": 0.6577251474469161, "learning_rate": 8.076577924020341e-07, "loss": 0.7637, "step": 270 }, { "epoch": 0.024320746673846223, "grad_norm": 0.555893222048351, "learning_rate": 8.106491175590787e-07, "loss": 0.6508, "step": 271 }, { "epoch": 0.02441049112651724, "grad_norm": 0.6377177702290521, "learning_rate": 8.136404427161234e-07, "loss": 0.7755, "step": 272 }, { "epoch": 0.024500235579188262, "grad_norm": 0.6288601931801812, "learning_rate": 8.166317678731679e-07, "loss": 0.7693, "step": 273 }, { "epoch": 0.02458998003185928, "grad_norm": 0.5652392839222021, "learning_rate": 8.196230930302125e-07, "loss": 0.6364, "step": 274 }, { "epoch": 0.0246797244845303, "grad_norm": 0.6014294983782095, "learning_rate": 8.226144181872569e-07, "loss": 0.6333, "step": 275 }, { "epoch": 0.02476946893720132, "grad_norm": 0.5922598781778889, "learning_rate": 8.256057433443016e-07, "loss": 0.6762, "step": 276 }, { "epoch": 0.024859213389872337, "grad_norm": 0.6196804168977599, "learning_rate": 8.285970685013461e-07, "loss": 0.7689, "step": 277 }, { "epoch": 0.02494895784254336, "grad_norm": 0.5758096061338842, "learning_rate": 8.315883936583907e-07, "loss": 0.6749, "step": 278 }, { "epoch": 0.025038702295214377, "grad_norm": 0.6137525295938998, "learning_rate": 8.345797188154353e-07, "loss": 0.7175, "step": 279 }, { "epoch": 0.025128446747885395, "grad_norm": 0.6169295218436536, "learning_rate": 8.375710439724799e-07, "loss": 0.7778, "step": 280 }, { "epoch": 0.025218191200556416, "grad_norm": 0.5619350920938243, "learning_rate": 8.405623691295245e-07, "loss": 0.6451, "step": 281 }, { "epoch": 0.025307935653227434, "grad_norm": 0.567067231613642, "learning_rate": 8.43553694286569e-07, "loss": 0.6897, "step": 282 }, { "epoch": 0.025397680105898456, "grad_norm": 0.6456669541958575, "learning_rate": 8.465450194436135e-07, "loss": 0.7632, "step": 283 }, { "epoch": 0.025487424558569474, "grad_norm": 0.5786711167042602, "learning_rate": 8.495363446006582e-07, "loss": 0.7012, "step": 284 }, { "epoch": 0.02557716901124049, "grad_norm": 0.6154547154196817, "learning_rate": 8.525276697577027e-07, "loss": 0.7542, "step": 285 }, { "epoch": 0.025666913463911513, "grad_norm": 0.6330903137731124, "learning_rate": 8.555189949147473e-07, "loss": 0.7329, "step": 286 }, { "epoch": 0.02575665791658253, "grad_norm": 0.6126173888720343, "learning_rate": 8.585103200717918e-07, "loss": 0.6937, "step": 287 }, { "epoch": 0.02584640236925355, "grad_norm": 0.5952923975480525, "learning_rate": 8.615016452288365e-07, "loss": 0.6747, "step": 288 }, { "epoch": 0.02593614682192457, "grad_norm": 0.5979254457011044, "learning_rate": 8.644929703858811e-07, "loss": 0.7569, "step": 289 }, { "epoch": 0.026025891274595588, "grad_norm": 0.5919496148432704, "learning_rate": 8.674842955429256e-07, "loss": 0.76, "step": 290 }, { "epoch": 0.02611563572726661, "grad_norm": 0.5716317941986323, "learning_rate": 8.704756206999701e-07, "loss": 0.6606, "step": 291 }, { "epoch": 0.026205380179937628, "grad_norm": 0.5934224430282498, "learning_rate": 8.734669458570147e-07, "loss": 0.6451, "step": 292 }, { "epoch": 0.026295124632608646, "grad_norm": 0.580665490446497, "learning_rate": 8.764582710140593e-07, "loss": 0.7108, "step": 293 }, { "epoch": 0.026384869085279667, "grad_norm": 0.5463746265687487, "learning_rate": 8.794495961711039e-07, "loss": 0.6538, "step": 294 }, { "epoch": 0.026474613537950685, "grad_norm": 0.5899802925682566, "learning_rate": 8.824409213281484e-07, "loss": 0.7334, "step": 295 }, { "epoch": 0.026564357990621703, "grad_norm": 0.5788682496150097, "learning_rate": 8.854322464851931e-07, "loss": 0.6456, "step": 296 }, { "epoch": 0.026654102443292724, "grad_norm": 0.6166896216649915, "learning_rate": 8.884235716422376e-07, "loss": 0.7206, "step": 297 }, { "epoch": 0.026743846895963742, "grad_norm": 0.5810367586931265, "learning_rate": 8.914148967992821e-07, "loss": 0.6741, "step": 298 }, { "epoch": 0.026833591348634764, "grad_norm": 0.5942561623366988, "learning_rate": 8.944062219563266e-07, "loss": 0.6772, "step": 299 }, { "epoch": 0.026923335801305782, "grad_norm": 0.6260342706056222, "learning_rate": 8.973975471133713e-07, "loss": 0.6981, "step": 300 }, { "epoch": 0.0270130802539768, "grad_norm": 0.530319082636735, "learning_rate": 9.003888722704159e-07, "loss": 0.5752, "step": 301 }, { "epoch": 0.02710282470664782, "grad_norm": 0.6808508778748436, "learning_rate": 9.033801974274604e-07, "loss": 0.8114, "step": 302 }, { "epoch": 0.02719256915931884, "grad_norm": 0.566787761992284, "learning_rate": 9.06371522584505e-07, "loss": 0.7009, "step": 303 }, { "epoch": 0.02728231361198986, "grad_norm": 0.5580688665166482, "learning_rate": 9.093628477415496e-07, "loss": 0.6594, "step": 304 }, { "epoch": 0.02737205806466088, "grad_norm": 0.6127941403796316, "learning_rate": 9.123541728985942e-07, "loss": 0.7152, "step": 305 }, { "epoch": 0.027461802517331897, "grad_norm": 0.5887869187702106, "learning_rate": 9.153454980556387e-07, "loss": 0.7139, "step": 306 }, { "epoch": 0.027551546970002918, "grad_norm": 0.5771986878625734, "learning_rate": 9.183368232126832e-07, "loss": 0.72, "step": 307 }, { "epoch": 0.027641291422673936, "grad_norm": 0.5777414853597304, "learning_rate": 9.213281483697279e-07, "loss": 0.6491, "step": 308 }, { "epoch": 0.027731035875344954, "grad_norm": 0.6327344232431844, "learning_rate": 9.243194735267724e-07, "loss": 0.859, "step": 309 }, { "epoch": 0.027820780328015975, "grad_norm": 0.5768946565963881, "learning_rate": 9.27310798683817e-07, "loss": 0.6813, "step": 310 }, { "epoch": 0.027910524780686993, "grad_norm": 0.5699948553069366, "learning_rate": 9.303021238408616e-07, "loss": 0.712, "step": 311 }, { "epoch": 0.028000269233358015, "grad_norm": 0.5757711514459602, "learning_rate": 9.332934489979062e-07, "loss": 0.6754, "step": 312 }, { "epoch": 0.028090013686029033, "grad_norm": 0.5636343756230079, "learning_rate": 9.362847741549508e-07, "loss": 0.655, "step": 313 }, { "epoch": 0.02817975813870005, "grad_norm": 0.651904609031761, "learning_rate": 9.392760993119952e-07, "loss": 0.7193, "step": 314 }, { "epoch": 0.028269502591371072, "grad_norm": 0.5583601169510926, "learning_rate": 9.422674244690398e-07, "loss": 0.6476, "step": 315 }, { "epoch": 0.02835924704404209, "grad_norm": 0.5346000366369863, "learning_rate": 9.452587496260845e-07, "loss": 0.5802, "step": 316 }, { "epoch": 0.028448991496713108, "grad_norm": 0.5899675900904015, "learning_rate": 9.48250074783129e-07, "loss": 0.7046, "step": 317 }, { "epoch": 0.02853873594938413, "grad_norm": 0.5658153887565394, "learning_rate": 9.512413999401736e-07, "loss": 0.6357, "step": 318 }, { "epoch": 0.028628480402055147, "grad_norm": 0.5714242991228419, "learning_rate": 9.542327250972181e-07, "loss": 0.652, "step": 319 }, { "epoch": 0.02871822485472617, "grad_norm": 0.6226943082207698, "learning_rate": 9.572240502542627e-07, "loss": 0.8128, "step": 320 }, { "epoch": 0.028807969307397187, "grad_norm": 0.5455085884993995, "learning_rate": 9.602153754113075e-07, "loss": 0.655, "step": 321 }, { "epoch": 0.028897713760068205, "grad_norm": 0.6137762349023167, "learning_rate": 9.632067005683518e-07, "loss": 0.7215, "step": 322 }, { "epoch": 0.028987458212739226, "grad_norm": 0.5406850876655787, "learning_rate": 9.661980257253964e-07, "loss": 0.6689, "step": 323 }, { "epoch": 0.029077202665410244, "grad_norm": 0.5525638541346288, "learning_rate": 9.69189350882441e-07, "loss": 0.6551, "step": 324 }, { "epoch": 0.029166947118081262, "grad_norm": 0.640680146555939, "learning_rate": 9.721806760394855e-07, "loss": 0.7237, "step": 325 }, { "epoch": 0.029256691570752284, "grad_norm": 0.5320930388726586, "learning_rate": 9.751720011965302e-07, "loss": 0.6053, "step": 326 }, { "epoch": 0.0293464360234233, "grad_norm": 0.5842902747537629, "learning_rate": 9.781633263535748e-07, "loss": 0.7204, "step": 327 }, { "epoch": 0.029436180476094323, "grad_norm": 0.5829813119715496, "learning_rate": 9.811546515106194e-07, "loss": 0.6698, "step": 328 }, { "epoch": 0.02952592492876534, "grad_norm": 0.5488684253203725, "learning_rate": 9.841459766676637e-07, "loss": 0.6565, "step": 329 }, { "epoch": 0.02961566938143636, "grad_norm": 0.5871607579267899, "learning_rate": 9.871373018247085e-07, "loss": 0.7217, "step": 330 }, { "epoch": 0.02970541383410738, "grad_norm": 0.5949087027637567, "learning_rate": 9.90128626981753e-07, "loss": 0.7392, "step": 331 }, { "epoch": 0.0297951582867784, "grad_norm": 0.6617098535716346, "learning_rate": 9.931199521387976e-07, "loss": 0.7478, "step": 332 }, { "epoch": 0.029884902739449416, "grad_norm": 0.5524393215927561, "learning_rate": 9.961112772958422e-07, "loss": 0.6263, "step": 333 }, { "epoch": 0.029974647192120438, "grad_norm": 0.558437419606605, "learning_rate": 9.991026024528867e-07, "loss": 0.6637, "step": 334 }, { "epoch": 0.030064391644791456, "grad_norm": 0.54041749892161, "learning_rate": 1.0020939276099313e-06, "loss": 0.6396, "step": 335 }, { "epoch": 0.030154136097462477, "grad_norm": 0.6039043577445723, "learning_rate": 1.0050852527669758e-06, "loss": 0.7576, "step": 336 }, { "epoch": 0.030243880550133495, "grad_norm": 0.5535505321747038, "learning_rate": 1.0080765779240204e-06, "loss": 0.7, "step": 337 }, { "epoch": 0.030333625002804513, "grad_norm": 0.5997712287181035, "learning_rate": 1.011067903081065e-06, "loss": 0.7281, "step": 338 }, { "epoch": 0.030423369455475534, "grad_norm": 0.5325952741405283, "learning_rate": 1.0140592282381095e-06, "loss": 0.6557, "step": 339 }, { "epoch": 0.030513113908146552, "grad_norm": 0.5314596692631268, "learning_rate": 1.017050553395154e-06, "loss": 0.6072, "step": 340 }, { "epoch": 0.03060285836081757, "grad_norm": 0.5771098560140984, "learning_rate": 1.0200418785521986e-06, "loss": 0.7218, "step": 341 }, { "epoch": 0.030692602813488592, "grad_norm": 0.5280444780279557, "learning_rate": 1.0230332037092434e-06, "loss": 0.6476, "step": 342 }, { "epoch": 0.03078234726615961, "grad_norm": 0.5690710726768319, "learning_rate": 1.026024528866288e-06, "loss": 0.6815, "step": 343 }, { "epoch": 0.03087209171883063, "grad_norm": 0.5744164003848771, "learning_rate": 1.0290158540233325e-06, "loss": 0.6691, "step": 344 }, { "epoch": 0.03096183617150165, "grad_norm": 0.559524288446021, "learning_rate": 1.0320071791803769e-06, "loss": 0.6666, "step": 345 }, { "epoch": 0.031051580624172667, "grad_norm": 0.5929471021587753, "learning_rate": 1.0349985043374216e-06, "loss": 0.7287, "step": 346 }, { "epoch": 0.03114132507684369, "grad_norm": 0.5881158095052583, "learning_rate": 1.0379898294944662e-06, "loss": 0.7103, "step": 347 }, { "epoch": 0.031231069529514707, "grad_norm": 0.5897020622007937, "learning_rate": 1.0409811546515107e-06, "loss": 0.7403, "step": 348 }, { "epoch": 0.031320813982185725, "grad_norm": 0.5972518178241226, "learning_rate": 1.0439724798085553e-06, "loss": 0.6854, "step": 349 }, { "epoch": 0.031410558434856746, "grad_norm": 0.5856924190854798, "learning_rate": 1.0469638049655999e-06, "loss": 0.7138, "step": 350 }, { "epoch": 0.03150030288752777, "grad_norm": 0.557116548694714, "learning_rate": 1.0499551301226444e-06, "loss": 0.6701, "step": 351 }, { "epoch": 0.03159004734019878, "grad_norm": 0.533273442802963, "learning_rate": 1.052946455279689e-06, "loss": 0.6352, "step": 352 }, { "epoch": 0.0316797917928698, "grad_norm": 0.5814995763017277, "learning_rate": 1.0559377804367335e-06, "loss": 0.7152, "step": 353 }, { "epoch": 0.031769536245540825, "grad_norm": 0.5769918743837207, "learning_rate": 1.058929105593778e-06, "loss": 0.7288, "step": 354 }, { "epoch": 0.03185928069821184, "grad_norm": 0.5794225288710952, "learning_rate": 1.0619204307508227e-06, "loss": 0.6635, "step": 355 }, { "epoch": 0.03194902515088286, "grad_norm": 0.5754938998019226, "learning_rate": 1.0649117559078672e-06, "loss": 0.6724, "step": 356 }, { "epoch": 0.03203876960355388, "grad_norm": 0.6197573246247823, "learning_rate": 1.0679030810649118e-06, "loss": 0.695, "step": 357 }, { "epoch": 0.0321285140562249, "grad_norm": 0.5818306936614093, "learning_rate": 1.0708944062219565e-06, "loss": 0.6459, "step": 358 }, { "epoch": 0.03221825850889592, "grad_norm": 0.6012939965675231, "learning_rate": 1.073885731379001e-06, "loss": 0.7628, "step": 359 }, { "epoch": 0.03230800296156694, "grad_norm": 0.5216009842449587, "learning_rate": 1.0768770565360454e-06, "loss": 0.6463, "step": 360 }, { "epoch": 0.032397747414237954, "grad_norm": 0.5488488570579623, "learning_rate": 1.07986838169309e-06, "loss": 0.6638, "step": 361 }, { "epoch": 0.032487491866908975, "grad_norm": 0.5684959412164873, "learning_rate": 1.0828597068501348e-06, "loss": 0.7207, "step": 362 }, { "epoch": 0.03257723631958, "grad_norm": 0.5325690202446527, "learning_rate": 1.0858510320071793e-06, "loss": 0.6367, "step": 363 }, { "epoch": 0.03266698077225102, "grad_norm": 0.5896367658316377, "learning_rate": 1.0888423571642239e-06, "loss": 0.7135, "step": 364 }, { "epoch": 0.03275672522492203, "grad_norm": 0.5424329932103303, "learning_rate": 1.0918336823212684e-06, "loss": 0.6143, "step": 365 }, { "epoch": 0.032846469677593054, "grad_norm": 0.6013449267362498, "learning_rate": 1.094825007478313e-06, "loss": 0.6796, "step": 366 }, { "epoch": 0.032936214130264076, "grad_norm": 0.5505342292400798, "learning_rate": 1.0978163326353576e-06, "loss": 0.5887, "step": 367 }, { "epoch": 0.03302595858293509, "grad_norm": 0.49530569266331664, "learning_rate": 1.1008076577924021e-06, "loss": 0.5341, "step": 368 }, { "epoch": 0.03311570303560611, "grad_norm": 0.5494069700982298, "learning_rate": 1.1037989829494467e-06, "loss": 0.682, "step": 369 }, { "epoch": 0.03320544748827713, "grad_norm": 0.5778532920864464, "learning_rate": 1.1067903081064912e-06, "loss": 0.7506, "step": 370 }, { "epoch": 0.03329519194094815, "grad_norm": 0.5891213819945278, "learning_rate": 1.1097816332635358e-06, "loss": 0.7057, "step": 371 }, { "epoch": 0.03338493639361917, "grad_norm": 0.6374218194770659, "learning_rate": 1.1127729584205804e-06, "loss": 0.7624, "step": 372 }, { "epoch": 0.03347468084629019, "grad_norm": 0.5163597590661209, "learning_rate": 1.115764283577625e-06, "loss": 0.5676, "step": 373 }, { "epoch": 0.033564425298961205, "grad_norm": 0.552056267806895, "learning_rate": 1.1187556087346697e-06, "loss": 0.6136, "step": 374 }, { "epoch": 0.033654169751632226, "grad_norm": 0.6479169279903886, "learning_rate": 1.1217469338917142e-06, "loss": 0.6442, "step": 375 }, { "epoch": 0.03374391420430325, "grad_norm": 0.5484023963717238, "learning_rate": 1.1247382590487586e-06, "loss": 0.6138, "step": 376 }, { "epoch": 0.03383365865697427, "grad_norm": 0.5798000225335986, "learning_rate": 1.1277295842058031e-06, "loss": 0.6349, "step": 377 }, { "epoch": 0.033923403109645284, "grad_norm": 0.6398067991427431, "learning_rate": 1.130720909362848e-06, "loss": 0.7576, "step": 378 }, { "epoch": 0.034013147562316305, "grad_norm": 0.5428358722344953, "learning_rate": 1.1337122345198925e-06, "loss": 0.6935, "step": 379 }, { "epoch": 0.03410289201498733, "grad_norm": 0.6433672322312562, "learning_rate": 1.136703559676937e-06, "loss": 0.7779, "step": 380 }, { "epoch": 0.03419263646765834, "grad_norm": 0.5936599044686921, "learning_rate": 1.1396948848339816e-06, "loss": 0.665, "step": 381 }, { "epoch": 0.03428238092032936, "grad_norm": 0.583755800087053, "learning_rate": 1.1426862099910261e-06, "loss": 0.6819, "step": 382 }, { "epoch": 0.034372125373000384, "grad_norm": 0.568775535947946, "learning_rate": 1.1456775351480707e-06, "loss": 0.7106, "step": 383 }, { "epoch": 0.0344618698256714, "grad_norm": 0.6038094782003222, "learning_rate": 1.1486688603051153e-06, "loss": 0.7484, "step": 384 }, { "epoch": 0.03455161427834242, "grad_norm": 0.5351440476338672, "learning_rate": 1.1516601854621598e-06, "loss": 0.6475, "step": 385 }, { "epoch": 0.03464135873101344, "grad_norm": 0.5567569901624483, "learning_rate": 1.1546515106192044e-06, "loss": 0.656, "step": 386 }, { "epoch": 0.034731103183684456, "grad_norm": 0.5662619128356198, "learning_rate": 1.157642835776249e-06, "loss": 0.6593, "step": 387 }, { "epoch": 0.03482084763635548, "grad_norm": 0.5676770851408386, "learning_rate": 1.1606341609332935e-06, "loss": 0.6543, "step": 388 }, { "epoch": 0.0349105920890265, "grad_norm": 0.5775161314424316, "learning_rate": 1.163625486090338e-06, "loss": 0.7184, "step": 389 }, { "epoch": 0.03500033654169751, "grad_norm": 0.5088838860879505, "learning_rate": 1.1666168112473828e-06, "loss": 0.5612, "step": 390 }, { "epoch": 0.035090080994368535, "grad_norm": 0.5549907498199189, "learning_rate": 1.1696081364044272e-06, "loss": 0.6251, "step": 391 }, { "epoch": 0.035179825447039556, "grad_norm": 0.6022946371406134, "learning_rate": 1.1725994615614717e-06, "loss": 0.6633, "step": 392 }, { "epoch": 0.03526956989971058, "grad_norm": 0.5761479768232133, "learning_rate": 1.1755907867185163e-06, "loss": 0.7107, "step": 393 }, { "epoch": 0.03535931435238159, "grad_norm": 0.5612541687120266, "learning_rate": 1.178582111875561e-06, "loss": 0.6331, "step": 394 }, { "epoch": 0.03544905880505261, "grad_norm": 0.5605086259599743, "learning_rate": 1.1815734370326056e-06, "loss": 0.6519, "step": 395 }, { "epoch": 0.035538803257723635, "grad_norm": 0.5280548607798283, "learning_rate": 1.1845647621896502e-06, "loss": 0.6538, "step": 396 }, { "epoch": 0.03562854771039465, "grad_norm": 0.60714478556734, "learning_rate": 1.1875560873466947e-06, "loss": 0.7519, "step": 397 }, { "epoch": 0.03571829216306567, "grad_norm": 0.615869882064988, "learning_rate": 1.1905474125037393e-06, "loss": 0.7123, "step": 398 }, { "epoch": 0.03580803661573669, "grad_norm": 0.6100070163640141, "learning_rate": 1.1935387376607839e-06, "loss": 0.6438, "step": 399 }, { "epoch": 0.03589778106840771, "grad_norm": 0.555931478130768, "learning_rate": 1.1965300628178284e-06, "loss": 0.6403, "step": 400 }, { "epoch": 0.03598752552107873, "grad_norm": 0.5763963952018208, "learning_rate": 1.199521387974873e-06, "loss": 0.6781, "step": 401 }, { "epoch": 0.03607726997374975, "grad_norm": 0.6245713525673827, "learning_rate": 1.2025127131319175e-06, "loss": 0.6809, "step": 402 }, { "epoch": 0.036167014426420764, "grad_norm": 0.5855298251195505, "learning_rate": 1.205504038288962e-06, "loss": 0.7086, "step": 403 }, { "epoch": 0.036256758879091786, "grad_norm": 0.5753887463625197, "learning_rate": 1.2084953634460066e-06, "loss": 0.6869, "step": 404 }, { "epoch": 0.03634650333176281, "grad_norm": 0.5605073890305964, "learning_rate": 1.2114866886030512e-06, "loss": 0.6758, "step": 405 }, { "epoch": 0.03643624778443382, "grad_norm": 0.5554320439040727, "learning_rate": 1.214478013760096e-06, "loss": 0.6357, "step": 406 }, { "epoch": 0.03652599223710484, "grad_norm": 0.5374983743461124, "learning_rate": 1.2174693389171403e-06, "loss": 0.5925, "step": 407 }, { "epoch": 0.036615736689775864, "grad_norm": 0.5365740793454248, "learning_rate": 1.2204606640741849e-06, "loss": 0.6102, "step": 408 }, { "epoch": 0.036705481142446886, "grad_norm": 0.5088100741800563, "learning_rate": 1.2234519892312294e-06, "loss": 0.5806, "step": 409 }, { "epoch": 0.0367952255951179, "grad_norm": 0.5904974227377047, "learning_rate": 1.2264433143882742e-06, "loss": 0.6795, "step": 410 }, { "epoch": 0.03688497004778892, "grad_norm": 0.5299732247697682, "learning_rate": 1.2294346395453188e-06, "loss": 0.6187, "step": 411 }, { "epoch": 0.03697471450045994, "grad_norm": 0.5499258933404155, "learning_rate": 1.2324259647023633e-06, "loss": 0.6345, "step": 412 }, { "epoch": 0.03706445895313096, "grad_norm": 0.5780684217190158, "learning_rate": 1.2354172898594079e-06, "loss": 0.6415, "step": 413 }, { "epoch": 0.03715420340580198, "grad_norm": 0.626270917159728, "learning_rate": 1.2384086150164524e-06, "loss": 0.6213, "step": 414 }, { "epoch": 0.037243947858473, "grad_norm": 0.6241251750502669, "learning_rate": 1.241399940173497e-06, "loss": 0.6961, "step": 415 }, { "epoch": 0.037333692311144015, "grad_norm": 0.6241129945440634, "learning_rate": 1.2443912653305416e-06, "loss": 0.7109, "step": 416 }, { "epoch": 0.037423436763815036, "grad_norm": 0.5197695981093299, "learning_rate": 1.2473825904875861e-06, "loss": 0.6046, "step": 417 }, { "epoch": 0.03751318121648606, "grad_norm": 0.5457516073512932, "learning_rate": 1.2503739156446307e-06, "loss": 0.6617, "step": 418 }, { "epoch": 0.03760292566915707, "grad_norm": 0.5571738658945737, "learning_rate": 1.2533652408016752e-06, "loss": 0.6177, "step": 419 }, { "epoch": 0.037692670121828094, "grad_norm": 0.5836910651449769, "learning_rate": 1.2563565659587198e-06, "loss": 0.596, "step": 420 }, { "epoch": 0.037782414574499115, "grad_norm": 0.5629687975097274, "learning_rate": 1.2593478911157643e-06, "loss": 0.7363, "step": 421 }, { "epoch": 0.03787215902717013, "grad_norm": 0.524531681074637, "learning_rate": 1.2623392162728091e-06, "loss": 0.6027, "step": 422 }, { "epoch": 0.03796190347984115, "grad_norm": 0.5517483465762046, "learning_rate": 1.2653305414298537e-06, "loss": 0.6313, "step": 423 }, { "epoch": 0.03805164793251217, "grad_norm": 0.5959355886778721, "learning_rate": 1.2683218665868982e-06, "loss": 0.6766, "step": 424 }, { "epoch": 0.038141392385183194, "grad_norm": 0.5315066506142311, "learning_rate": 1.2713131917439428e-06, "loss": 0.6064, "step": 425 }, { "epoch": 0.03823113683785421, "grad_norm": 0.5765631597146642, "learning_rate": 1.2743045169009871e-06, "loss": 0.6303, "step": 426 }, { "epoch": 0.03832088129052523, "grad_norm": 0.6343813871886146, "learning_rate": 1.2772958420580317e-06, "loss": 0.6649, "step": 427 }, { "epoch": 0.03841062574319625, "grad_norm": 0.598879886738929, "learning_rate": 1.2802871672150763e-06, "loss": 0.6693, "step": 428 }, { "epoch": 0.038500370195867266, "grad_norm": 0.6061192514066123, "learning_rate": 1.2832784923721208e-06, "loss": 0.6873, "step": 429 }, { "epoch": 0.03859011464853829, "grad_norm": 0.6514614391469561, "learning_rate": 1.2862698175291654e-06, "loss": 0.6479, "step": 430 }, { "epoch": 0.03867985910120931, "grad_norm": 0.5721717529264797, "learning_rate": 1.2892611426862101e-06, "loss": 0.626, "step": 431 }, { "epoch": 0.03876960355388032, "grad_norm": 0.6945919921108507, "learning_rate": 1.2922524678432547e-06, "loss": 0.7828, "step": 432 }, { "epoch": 0.038859348006551345, "grad_norm": 0.5642446682405257, "learning_rate": 1.2952437930002993e-06, "loss": 0.6018, "step": 433 }, { "epoch": 0.038949092459222366, "grad_norm": 0.5330371435016759, "learning_rate": 1.2982351181573438e-06, "loss": 0.597, "step": 434 }, { "epoch": 0.03903883691189338, "grad_norm": 0.56047239445254, "learning_rate": 1.3012264433143884e-06, "loss": 0.6876, "step": 435 }, { "epoch": 0.0391285813645644, "grad_norm": 0.627840429782498, "learning_rate": 1.304217768471433e-06, "loss": 0.6208, "step": 436 }, { "epoch": 0.039218325817235423, "grad_norm": 0.6197423208835827, "learning_rate": 1.3072090936284775e-06, "loss": 0.6776, "step": 437 }, { "epoch": 0.03930807026990644, "grad_norm": 0.5531446381385325, "learning_rate": 1.3102004187855223e-06, "loss": 0.5946, "step": 438 }, { "epoch": 0.03939781472257746, "grad_norm": 0.5359420841652252, "learning_rate": 1.3131917439425668e-06, "loss": 0.6046, "step": 439 }, { "epoch": 0.03948755917524848, "grad_norm": 0.5511659387841619, "learning_rate": 1.3161830690996114e-06, "loss": 0.641, "step": 440 }, { "epoch": 0.0395773036279195, "grad_norm": 0.628237469648903, "learning_rate": 1.319174394256656e-06, "loss": 0.6254, "step": 441 }, { "epoch": 0.03966704808059052, "grad_norm": 0.6098369658316066, "learning_rate": 1.3221657194137003e-06, "loss": 0.6554, "step": 442 }, { "epoch": 0.03975679253326154, "grad_norm": 0.5413096694926818, "learning_rate": 1.3251570445707448e-06, "loss": 0.5952, "step": 443 }, { "epoch": 0.03984653698593256, "grad_norm": 0.6157919635328177, "learning_rate": 1.3281483697277894e-06, "loss": 0.714, "step": 444 }, { "epoch": 0.039936281438603574, "grad_norm": 0.6392024965197953, "learning_rate": 1.331139694884834e-06, "loss": 0.6639, "step": 445 }, { "epoch": 0.040026025891274596, "grad_norm": 0.56352860976102, "learning_rate": 1.3341310200418785e-06, "loss": 0.6321, "step": 446 }, { "epoch": 0.04011577034394562, "grad_norm": 0.6452822348512346, "learning_rate": 1.3371223451989233e-06, "loss": 0.6841, "step": 447 }, { "epoch": 0.04020551479661663, "grad_norm": 0.536129160024184, "learning_rate": 1.3401136703559678e-06, "loss": 0.6242, "step": 448 }, { "epoch": 0.04029525924928765, "grad_norm": 0.5923600436340227, "learning_rate": 1.3431049955130124e-06, "loss": 0.6874, "step": 449 }, { "epoch": 0.040385003701958674, "grad_norm": 0.6064954168812542, "learning_rate": 1.346096320670057e-06, "loss": 0.6687, "step": 450 }, { "epoch": 0.04047474815462969, "grad_norm": 0.5771547707313468, "learning_rate": 1.3490876458271015e-06, "loss": 0.7079, "step": 451 }, { "epoch": 0.04056449260730071, "grad_norm": 0.5769440514116055, "learning_rate": 1.352078970984146e-06, "loss": 0.6699, "step": 452 }, { "epoch": 0.04065423705997173, "grad_norm": 0.5532231379250256, "learning_rate": 1.3550702961411906e-06, "loss": 0.5595, "step": 453 }, { "epoch": 0.04074398151264275, "grad_norm": 0.59381229005715, "learning_rate": 1.3580616212982354e-06, "loss": 0.7096, "step": 454 }, { "epoch": 0.04083372596531377, "grad_norm": 0.5804880071547158, "learning_rate": 1.36105294645528e-06, "loss": 0.6631, "step": 455 }, { "epoch": 0.04092347041798479, "grad_norm": 0.6142639046229208, "learning_rate": 1.3640442716123245e-06, "loss": 0.6615, "step": 456 }, { "epoch": 0.04101321487065581, "grad_norm": 0.5537388014979389, "learning_rate": 1.3670355967693689e-06, "loss": 0.6231, "step": 457 }, { "epoch": 0.041102959323326825, "grad_norm": 0.5749580189863193, "learning_rate": 1.3700269219264134e-06, "loss": 0.5844, "step": 458 }, { "epoch": 0.041192703775997846, "grad_norm": 0.5609750362798168, "learning_rate": 1.373018247083458e-06, "loss": 0.6554, "step": 459 }, { "epoch": 0.04128244822866887, "grad_norm": 0.5569187996430222, "learning_rate": 1.3760095722405025e-06, "loss": 0.6519, "step": 460 }, { "epoch": 0.04137219268133988, "grad_norm": 0.6190277297169872, "learning_rate": 1.379000897397547e-06, "loss": 0.6715, "step": 461 }, { "epoch": 0.041461937134010904, "grad_norm": 0.5716148401746025, "learning_rate": 1.3819922225545917e-06, "loss": 0.5462, "step": 462 }, { "epoch": 0.041551681586681925, "grad_norm": 0.5557627515531459, "learning_rate": 1.3849835477116364e-06, "loss": 0.5876, "step": 463 }, { "epoch": 0.04164142603935294, "grad_norm": 0.6997286090814844, "learning_rate": 1.387974872868681e-06, "loss": 0.6506, "step": 464 }, { "epoch": 0.04173117049202396, "grad_norm": 0.5772254680112628, "learning_rate": 1.3909661980257255e-06, "loss": 0.6607, "step": 465 }, { "epoch": 0.04182091494469498, "grad_norm": 0.5635773414577844, "learning_rate": 1.3939575231827701e-06, "loss": 0.6511, "step": 466 }, { "epoch": 0.041910659397366, "grad_norm": 0.5356407766836802, "learning_rate": 1.3969488483398147e-06, "loss": 0.6071, "step": 467 }, { "epoch": 0.04200040385003702, "grad_norm": 0.5676267989775242, "learning_rate": 1.3999401734968592e-06, "loss": 0.7106, "step": 468 }, { "epoch": 0.04209014830270804, "grad_norm": 0.5717389486628346, "learning_rate": 1.4029314986539038e-06, "loss": 0.596, "step": 469 }, { "epoch": 0.04217989275537906, "grad_norm": 0.6063450793860123, "learning_rate": 1.4059228238109486e-06, "loss": 0.6665, "step": 470 }, { "epoch": 0.042269637208050076, "grad_norm": 0.5758710993294146, "learning_rate": 1.4089141489679931e-06, "loss": 0.6162, "step": 471 }, { "epoch": 0.0423593816607211, "grad_norm": 0.618565802698522, "learning_rate": 1.4119054741250377e-06, "loss": 0.6324, "step": 472 }, { "epoch": 0.04244912611339212, "grad_norm": 0.6442793089282217, "learning_rate": 1.414896799282082e-06, "loss": 0.6603, "step": 473 }, { "epoch": 0.04253887056606313, "grad_norm": 0.5468245155366286, "learning_rate": 1.4178881244391266e-06, "loss": 0.6233, "step": 474 }, { "epoch": 0.042628615018734155, "grad_norm": 0.5357563185043394, "learning_rate": 1.4208794495961711e-06, "loss": 0.5881, "step": 475 }, { "epoch": 0.042718359471405176, "grad_norm": 0.5573577369453915, "learning_rate": 1.4238707747532157e-06, "loss": 0.6177, "step": 476 }, { "epoch": 0.04280810392407619, "grad_norm": 0.6975741538015393, "learning_rate": 1.4268620999102602e-06, "loss": 0.7626, "step": 477 }, { "epoch": 0.04289784837674721, "grad_norm": 0.6013211805025372, "learning_rate": 1.4298534250673048e-06, "loss": 0.5947, "step": 478 }, { "epoch": 0.042987592829418234, "grad_norm": 0.6286341888913406, "learning_rate": 1.4328447502243496e-06, "loss": 0.6409, "step": 479 }, { "epoch": 0.04307733728208925, "grad_norm": 0.6432973140815405, "learning_rate": 1.4358360753813941e-06, "loss": 0.6346, "step": 480 }, { "epoch": 0.04316708173476027, "grad_norm": 0.5149548577915971, "learning_rate": 1.4388274005384387e-06, "loss": 0.5209, "step": 481 }, { "epoch": 0.04325682618743129, "grad_norm": 0.5900151869594733, "learning_rate": 1.4418187256954833e-06, "loss": 0.6349, "step": 482 }, { "epoch": 0.043346570640102305, "grad_norm": 0.5561947176061255, "learning_rate": 1.4448100508525278e-06, "loss": 0.6847, "step": 483 }, { "epoch": 0.04343631509277333, "grad_norm": 0.5975729669260605, "learning_rate": 1.4478013760095724e-06, "loss": 0.635, "step": 484 }, { "epoch": 0.04352605954544435, "grad_norm": 0.5638659627486822, "learning_rate": 1.450792701166617e-06, "loss": 0.54, "step": 485 }, { "epoch": 0.04361580399811537, "grad_norm": 0.5584846836658388, "learning_rate": 1.4537840263236617e-06, "loss": 0.5827, "step": 486 }, { "epoch": 0.043705548450786384, "grad_norm": 0.5740523861482187, "learning_rate": 1.4567753514807063e-06, "loss": 0.6104, "step": 487 }, { "epoch": 0.043795292903457406, "grad_norm": 0.5454739010518386, "learning_rate": 1.4597666766377506e-06, "loss": 0.6138, "step": 488 }, { "epoch": 0.04388503735612843, "grad_norm": 0.5696841451373902, "learning_rate": 1.4627580017947952e-06, "loss": 0.6573, "step": 489 }, { "epoch": 0.04397478180879944, "grad_norm": 0.6023930527627553, "learning_rate": 1.4657493269518397e-06, "loss": 0.7187, "step": 490 }, { "epoch": 0.04406452626147046, "grad_norm": 0.5893603559580208, "learning_rate": 1.4687406521088843e-06, "loss": 0.6193, "step": 491 }, { "epoch": 0.044154270714141484, "grad_norm": 0.5915009706620751, "learning_rate": 1.4717319772659288e-06, "loss": 0.6924, "step": 492 }, { "epoch": 0.0442440151668125, "grad_norm": 0.5314399526136474, "learning_rate": 1.4747233024229734e-06, "loss": 0.607, "step": 493 }, { "epoch": 0.04433375961948352, "grad_norm": 0.5019484277504134, "learning_rate": 1.477714627580018e-06, "loss": 0.581, "step": 494 }, { "epoch": 0.04442350407215454, "grad_norm": 0.5238685809007168, "learning_rate": 1.4807059527370627e-06, "loss": 0.5849, "step": 495 }, { "epoch": 0.044513248524825556, "grad_norm": 0.5964759081820203, "learning_rate": 1.4836972778941073e-06, "loss": 0.6472, "step": 496 }, { "epoch": 0.04460299297749658, "grad_norm": 0.5157194497314039, "learning_rate": 1.4866886030511518e-06, "loss": 0.6242, "step": 497 }, { "epoch": 0.0446927374301676, "grad_norm": 0.5048057180937905, "learning_rate": 1.4896799282081964e-06, "loss": 0.591, "step": 498 }, { "epoch": 0.044782481882838614, "grad_norm": 0.5925725080201113, "learning_rate": 1.492671253365241e-06, "loss": 0.7303, "step": 499 }, { "epoch": 0.044872226335509635, "grad_norm": 0.6830763643110448, "learning_rate": 1.4956625785222855e-06, "loss": 0.7865, "step": 500 }, { "epoch": 0.04496197078818066, "grad_norm": 0.5396618479461824, "learning_rate": 1.49865390367933e-06, "loss": 0.619, "step": 501 }, { "epoch": 0.04505171524085168, "grad_norm": 0.5876300335613126, "learning_rate": 1.5016452288363748e-06, "loss": 0.7077, "step": 502 }, { "epoch": 0.04514145969352269, "grad_norm": 0.5698932023098341, "learning_rate": 1.504636553993419e-06, "loss": 0.6196, "step": 503 }, { "epoch": 0.045231204146193714, "grad_norm": 0.5179078621696069, "learning_rate": 1.5076278791504637e-06, "loss": 0.5745, "step": 504 }, { "epoch": 0.045320948598864735, "grad_norm": 0.5783928116103261, "learning_rate": 1.5106192043075083e-06, "loss": 0.6645, "step": 505 }, { "epoch": 0.04541069305153575, "grad_norm": 0.5582391279391228, "learning_rate": 1.5136105294645529e-06, "loss": 0.6289, "step": 506 }, { "epoch": 0.04550043750420677, "grad_norm": 0.5730628117765596, "learning_rate": 1.5166018546215974e-06, "loss": 0.6657, "step": 507 }, { "epoch": 0.04559018195687779, "grad_norm": 0.5429185611593947, "learning_rate": 1.519593179778642e-06, "loss": 0.573, "step": 508 }, { "epoch": 0.04567992640954881, "grad_norm": 0.6451981453787805, "learning_rate": 1.5225845049356865e-06, "loss": 0.6803, "step": 509 }, { "epoch": 0.04576967086221983, "grad_norm": 0.5739138669664737, "learning_rate": 1.525575830092731e-06, "loss": 0.58, "step": 510 }, { "epoch": 0.04585941531489085, "grad_norm": 0.6585048805951444, "learning_rate": 1.5285671552497759e-06, "loss": 0.6823, "step": 511 }, { "epoch": 0.045949159767561865, "grad_norm": 0.5641903827863276, "learning_rate": 1.5315584804068204e-06, "loss": 0.6332, "step": 512 }, { "epoch": 0.046038904220232886, "grad_norm": 0.5313100450183068, "learning_rate": 1.534549805563865e-06, "loss": 0.6136, "step": 513 }, { "epoch": 0.04612864867290391, "grad_norm": 0.5704445230866944, "learning_rate": 1.5375411307209095e-06, "loss": 0.5857, "step": 514 }, { "epoch": 0.04621839312557492, "grad_norm": 0.5819226988440598, "learning_rate": 1.540532455877954e-06, "loss": 0.645, "step": 515 }, { "epoch": 0.04630813757824594, "grad_norm": 0.6451401129973301, "learning_rate": 1.5435237810349987e-06, "loss": 0.578, "step": 516 }, { "epoch": 0.046397882030916965, "grad_norm": 0.5626032709802226, "learning_rate": 1.5465151061920432e-06, "loss": 0.5793, "step": 517 }, { "epoch": 0.046487626483587986, "grad_norm": 0.6812263815874029, "learning_rate": 1.549506431349088e-06, "loss": 0.7817, "step": 518 }, { "epoch": 0.046577370936259, "grad_norm": 0.5504193550709848, "learning_rate": 1.5524977565061321e-06, "loss": 0.5944, "step": 519 }, { "epoch": 0.04666711538893002, "grad_norm": 0.5354893832354787, "learning_rate": 1.5554890816631769e-06, "loss": 0.5463, "step": 520 }, { "epoch": 0.046756859841601044, "grad_norm": 0.5898713893180316, "learning_rate": 1.5584804068202214e-06, "loss": 0.6612, "step": 521 }, { "epoch": 0.04684660429427206, "grad_norm": 0.5697343902085884, "learning_rate": 1.561471731977266e-06, "loss": 0.582, "step": 522 }, { "epoch": 0.04693634874694308, "grad_norm": 0.5859043638222439, "learning_rate": 1.5644630571343106e-06, "loss": 0.5887, "step": 523 }, { "epoch": 0.0470260931996141, "grad_norm": 0.6393274995531374, "learning_rate": 1.5674543822913551e-06, "loss": 0.6793, "step": 524 }, { "epoch": 0.047115837652285115, "grad_norm": 0.633699190003323, "learning_rate": 1.5704457074483997e-06, "loss": 0.7126, "step": 525 }, { "epoch": 0.04720558210495614, "grad_norm": 0.5883708402216752, "learning_rate": 1.5734370326054442e-06, "loss": 0.698, "step": 526 }, { "epoch": 0.04729532655762716, "grad_norm": 0.5635663798806919, "learning_rate": 1.576428357762489e-06, "loss": 0.6202, "step": 527 }, { "epoch": 0.04738507101029817, "grad_norm": 0.5894219547859572, "learning_rate": 1.5794196829195336e-06, "loss": 0.6381, "step": 528 }, { "epoch": 0.047474815462969194, "grad_norm": 0.5625664337499305, "learning_rate": 1.5824110080765781e-06, "loss": 0.631, "step": 529 }, { "epoch": 0.047564559915640216, "grad_norm": 0.5092800615646674, "learning_rate": 1.5854023332336227e-06, "loss": 0.6169, "step": 530 }, { "epoch": 0.04765430436831124, "grad_norm": 0.5435448886412605, "learning_rate": 1.5883936583906672e-06, "loss": 0.5762, "step": 531 }, { "epoch": 0.04774404882098225, "grad_norm": 0.6087631028368766, "learning_rate": 1.5913849835477118e-06, "loss": 0.699, "step": 532 }, { "epoch": 0.04783379327365327, "grad_norm": 0.5560617045584771, "learning_rate": 1.5943763087047564e-06, "loss": 0.5931, "step": 533 }, { "epoch": 0.047923537726324295, "grad_norm": 0.4848066500178396, "learning_rate": 1.5973676338618007e-06, "loss": 0.5376, "step": 534 }, { "epoch": 0.04801328217899531, "grad_norm": 0.5782820547853402, "learning_rate": 1.6003589590188453e-06, "loss": 0.6423, "step": 535 }, { "epoch": 0.04810302663166633, "grad_norm": 0.5452246254554736, "learning_rate": 1.60335028417589e-06, "loss": 0.5902, "step": 536 }, { "epoch": 0.04819277108433735, "grad_norm": 0.5847388003233639, "learning_rate": 1.6063416093329346e-06, "loss": 0.6422, "step": 537 }, { "epoch": 0.048282515537008366, "grad_norm": 0.5724515446454962, "learning_rate": 1.6093329344899792e-06, "loss": 0.5417, "step": 538 }, { "epoch": 0.04837225998967939, "grad_norm": 0.587583689626471, "learning_rate": 1.6123242596470237e-06, "loss": 0.5846, "step": 539 }, { "epoch": 0.04846200444235041, "grad_norm": 0.555060033475107, "learning_rate": 1.6153155848040683e-06, "loss": 0.561, "step": 540 }, { "epoch": 0.048551748895021424, "grad_norm": 0.587853541783574, "learning_rate": 1.6183069099611128e-06, "loss": 0.6608, "step": 541 }, { "epoch": 0.048641493347692445, "grad_norm": 0.5640489807532789, "learning_rate": 1.6212982351181574e-06, "loss": 0.6515, "step": 542 }, { "epoch": 0.04873123780036347, "grad_norm": 0.537927664537603, "learning_rate": 1.6242895602752022e-06, "loss": 0.5534, "step": 543 }, { "epoch": 0.04882098225303448, "grad_norm": 0.5422010292112358, "learning_rate": 1.6272808854322467e-06, "loss": 0.6055, "step": 544 }, { "epoch": 0.0489107267057055, "grad_norm": 0.5681361111245534, "learning_rate": 1.6302722105892913e-06, "loss": 0.5783, "step": 545 }, { "epoch": 0.049000471158376524, "grad_norm": 0.560127966366581, "learning_rate": 1.6332635357463358e-06, "loss": 0.5864, "step": 546 }, { "epoch": 0.049090215611047545, "grad_norm": 0.5342937031636312, "learning_rate": 1.6362548609033804e-06, "loss": 0.5877, "step": 547 }, { "epoch": 0.04917996006371856, "grad_norm": 0.5278011708794217, "learning_rate": 1.639246186060425e-06, "loss": 0.5568, "step": 548 }, { "epoch": 0.04926970451638958, "grad_norm": 0.5656724852207835, "learning_rate": 1.6422375112174695e-06, "loss": 0.5936, "step": 549 }, { "epoch": 0.0493594489690606, "grad_norm": 0.5677222874905351, "learning_rate": 1.6452288363745139e-06, "loss": 0.6742, "step": 550 }, { "epoch": 0.04944919342173162, "grad_norm": 0.5548204560123478, "learning_rate": 1.6482201615315584e-06, "loss": 0.5391, "step": 551 }, { "epoch": 0.04953893787440264, "grad_norm": 0.5782342860810812, "learning_rate": 1.6512114866886032e-06, "loss": 0.6882, "step": 552 }, { "epoch": 0.04962868232707366, "grad_norm": 0.5314786563932044, "learning_rate": 1.6542028118456477e-06, "loss": 0.6348, "step": 553 }, { "epoch": 0.049718426779744675, "grad_norm": 0.5340527645159591, "learning_rate": 1.6571941370026923e-06, "loss": 0.5923, "step": 554 }, { "epoch": 0.049808171232415696, "grad_norm": 0.5554510056105677, "learning_rate": 1.6601854621597369e-06, "loss": 0.6009, "step": 555 }, { "epoch": 0.04989791568508672, "grad_norm": 0.5401645495491985, "learning_rate": 1.6631767873167814e-06, "loss": 0.5907, "step": 556 }, { "epoch": 0.04998766013775773, "grad_norm": 0.5289566971131854, "learning_rate": 1.666168112473826e-06, "loss": 0.6281, "step": 557 }, { "epoch": 0.050077404590428753, "grad_norm": 0.5650709202601109, "learning_rate": 1.6691594376308705e-06, "loss": 0.6216, "step": 558 }, { "epoch": 0.050167149043099775, "grad_norm": 0.6438048962537194, "learning_rate": 1.6721507627879153e-06, "loss": 0.6731, "step": 559 }, { "epoch": 0.05025689349577079, "grad_norm": 0.5291309161868094, "learning_rate": 1.6751420879449599e-06, "loss": 0.5939, "step": 560 }, { "epoch": 0.05034663794844181, "grad_norm": 0.5532090158926728, "learning_rate": 1.6781334131020044e-06, "loss": 0.5921, "step": 561 }, { "epoch": 0.05043638240111283, "grad_norm": 0.5404475265628191, "learning_rate": 1.681124738259049e-06, "loss": 0.5742, "step": 562 }, { "epoch": 0.050526126853783854, "grad_norm": 0.5346770688490551, "learning_rate": 1.6841160634160935e-06, "loss": 0.611, "step": 563 }, { "epoch": 0.05061587130645487, "grad_norm": 0.5663515689976562, "learning_rate": 1.687107388573138e-06, "loss": 0.6069, "step": 564 }, { "epoch": 0.05070561575912589, "grad_norm": 0.5138179378340174, "learning_rate": 1.6900987137301824e-06, "loss": 0.5367, "step": 565 }, { "epoch": 0.05079536021179691, "grad_norm": 0.5654389606176619, "learning_rate": 1.693090038887227e-06, "loss": 0.5723, "step": 566 }, { "epoch": 0.050885104664467926, "grad_norm": 0.5627457525384731, "learning_rate": 1.6960813640442716e-06, "loss": 0.5693, "step": 567 }, { "epoch": 0.05097484911713895, "grad_norm": 0.5448352673531911, "learning_rate": 1.6990726892013163e-06, "loss": 0.6037, "step": 568 }, { "epoch": 0.05106459356980997, "grad_norm": 0.6217642272137931, "learning_rate": 1.7020640143583609e-06, "loss": 0.5407, "step": 569 }, { "epoch": 0.05115433802248098, "grad_norm": 0.5926147147702953, "learning_rate": 1.7050553395154054e-06, "loss": 0.665, "step": 570 }, { "epoch": 0.051244082475152004, "grad_norm": 0.6197530263020663, "learning_rate": 1.70804666467245e-06, "loss": 0.6758, "step": 571 }, { "epoch": 0.051333826927823026, "grad_norm": 0.5247770855229299, "learning_rate": 1.7110379898294946e-06, "loss": 0.5833, "step": 572 }, { "epoch": 0.05142357138049404, "grad_norm": 0.5591850636643309, "learning_rate": 1.7140293149865391e-06, "loss": 0.6083, "step": 573 }, { "epoch": 0.05151331583316506, "grad_norm": 0.5663166387868016, "learning_rate": 1.7170206401435837e-06, "loss": 0.6318, "step": 574 }, { "epoch": 0.05160306028583608, "grad_norm": 0.5551015342624528, "learning_rate": 1.7200119653006284e-06, "loss": 0.5564, "step": 575 }, { "epoch": 0.0516928047385071, "grad_norm": 0.6267933286764086, "learning_rate": 1.723003290457673e-06, "loss": 0.6686, "step": 576 }, { "epoch": 0.05178254919117812, "grad_norm": 0.5797902186282748, "learning_rate": 1.7259946156147176e-06, "loss": 0.6249, "step": 577 }, { "epoch": 0.05187229364384914, "grad_norm": 0.5634118773970944, "learning_rate": 1.7289859407717621e-06, "loss": 0.5906, "step": 578 }, { "epoch": 0.05196203809652016, "grad_norm": 0.5375808240726816, "learning_rate": 1.7319772659288067e-06, "loss": 0.612, "step": 579 }, { "epoch": 0.052051782549191176, "grad_norm": 0.5151875052756598, "learning_rate": 1.7349685910858512e-06, "loss": 0.5394, "step": 580 }, { "epoch": 0.0521415270018622, "grad_norm": 0.618453570587191, "learning_rate": 1.7379599162428956e-06, "loss": 0.6595, "step": 581 }, { "epoch": 0.05223127145453322, "grad_norm": 0.5866087454299251, "learning_rate": 1.7409512413999401e-06, "loss": 0.6128, "step": 582 }, { "epoch": 0.052321015907204234, "grad_norm": 0.558650894825553, "learning_rate": 1.7439425665569847e-06, "loss": 0.6826, "step": 583 }, { "epoch": 0.052410760359875255, "grad_norm": 0.5339850115470356, "learning_rate": 1.7469338917140295e-06, "loss": 0.649, "step": 584 }, { "epoch": 0.05250050481254628, "grad_norm": 0.6012428015334614, "learning_rate": 1.749925216871074e-06, "loss": 0.6622, "step": 585 }, { "epoch": 0.05259024926521729, "grad_norm": 0.5614550792116602, "learning_rate": 1.7529165420281186e-06, "loss": 0.5949, "step": 586 }, { "epoch": 0.05267999371788831, "grad_norm": 0.5786913977908901, "learning_rate": 1.7559078671851631e-06, "loss": 0.5851, "step": 587 }, { "epoch": 0.052769738170559334, "grad_norm": 0.6505016073928017, "learning_rate": 1.7588991923422077e-06, "loss": 0.7139, "step": 588 }, { "epoch": 0.05285948262323035, "grad_norm": 0.5947712155181465, "learning_rate": 1.7618905174992523e-06, "loss": 0.6698, "step": 589 }, { "epoch": 0.05294922707590137, "grad_norm": 0.4897171962122552, "learning_rate": 1.7648818426562968e-06, "loss": 0.5572, "step": 590 }, { "epoch": 0.05303897152857239, "grad_norm": 0.5510961417542485, "learning_rate": 1.7678731678133416e-06, "loss": 0.6168, "step": 591 }, { "epoch": 0.053128715981243406, "grad_norm": 0.508683068185384, "learning_rate": 1.7708644929703861e-06, "loss": 0.5184, "step": 592 }, { "epoch": 0.05321846043391443, "grad_norm": 0.526776837981549, "learning_rate": 1.7738558181274307e-06, "loss": 0.5985, "step": 593 }, { "epoch": 0.05330820488658545, "grad_norm": 0.533365296164477, "learning_rate": 1.7768471432844753e-06, "loss": 0.5557, "step": 594 }, { "epoch": 0.05339794933925647, "grad_norm": 0.6021119330638466, "learning_rate": 1.7798384684415198e-06, "loss": 0.59, "step": 595 }, { "epoch": 0.053487693791927485, "grad_norm": 0.5667461164721802, "learning_rate": 1.7828297935985642e-06, "loss": 0.5958, "step": 596 }, { "epoch": 0.053577438244598506, "grad_norm": 0.5301998439367879, "learning_rate": 1.7858211187556087e-06, "loss": 0.6163, "step": 597 }, { "epoch": 0.05366718269726953, "grad_norm": 0.5202932120631691, "learning_rate": 1.7888124439126533e-06, "loss": 0.513, "step": 598 }, { "epoch": 0.05375692714994054, "grad_norm": 0.5356716151821928, "learning_rate": 1.7918037690696978e-06, "loss": 0.6403, "step": 599 }, { "epoch": 0.053846671602611564, "grad_norm": 0.589147787191888, "learning_rate": 1.7947950942267426e-06, "loss": 0.6618, "step": 600 }, { "epoch": 0.053936416055282585, "grad_norm": 0.6112171283102885, "learning_rate": 1.7977864193837872e-06, "loss": 0.6729, "step": 601 }, { "epoch": 0.0540261605079536, "grad_norm": 0.5481047187205067, "learning_rate": 1.8007777445408317e-06, "loss": 0.5981, "step": 602 }, { "epoch": 0.05411590496062462, "grad_norm": 0.5486360509360162, "learning_rate": 1.8037690696978763e-06, "loss": 0.6333, "step": 603 }, { "epoch": 0.05420564941329564, "grad_norm": 0.5266183392156077, "learning_rate": 1.8067603948549208e-06, "loss": 0.618, "step": 604 }, { "epoch": 0.05429539386596666, "grad_norm": 0.5731296998790035, "learning_rate": 1.8097517200119654e-06, "loss": 0.6302, "step": 605 }, { "epoch": 0.05438513831863768, "grad_norm": 0.5610669675304675, "learning_rate": 1.81274304516901e-06, "loss": 0.6055, "step": 606 }, { "epoch": 0.0544748827713087, "grad_norm": 0.5502967692094022, "learning_rate": 1.8157343703260547e-06, "loss": 0.5625, "step": 607 }, { "epoch": 0.05456462722397972, "grad_norm": 0.5451253480512108, "learning_rate": 1.8187256954830993e-06, "loss": 0.5803, "step": 608 }, { "epoch": 0.054654371676650736, "grad_norm": 0.566464819782034, "learning_rate": 1.8217170206401439e-06, "loss": 0.6162, "step": 609 }, { "epoch": 0.05474411612932176, "grad_norm": 0.5394395674986464, "learning_rate": 1.8247083457971884e-06, "loss": 0.5291, "step": 610 }, { "epoch": 0.05483386058199278, "grad_norm": 0.5224512275384406, "learning_rate": 1.827699670954233e-06, "loss": 0.6126, "step": 611 }, { "epoch": 0.05492360503466379, "grad_norm": 0.5625756189063268, "learning_rate": 1.8306909961112773e-06, "loss": 0.5993, "step": 612 }, { "epoch": 0.055013349487334814, "grad_norm": 0.5147556582069412, "learning_rate": 1.8336823212683219e-06, "loss": 0.5304, "step": 613 }, { "epoch": 0.055103093940005836, "grad_norm": 0.5446545722973405, "learning_rate": 1.8366736464253664e-06, "loss": 0.5783, "step": 614 }, { "epoch": 0.05519283839267685, "grad_norm": 0.525146354451617, "learning_rate": 1.839664971582411e-06, "loss": 0.6171, "step": 615 }, { "epoch": 0.05528258284534787, "grad_norm": 0.6508286209329042, "learning_rate": 1.8426562967394558e-06, "loss": 0.7084, "step": 616 }, { "epoch": 0.05537232729801889, "grad_norm": 0.5928430427384094, "learning_rate": 1.8456476218965003e-06, "loss": 0.6472, "step": 617 }, { "epoch": 0.05546207175068991, "grad_norm": 0.5097984288758044, "learning_rate": 1.8486389470535449e-06, "loss": 0.5188, "step": 618 }, { "epoch": 0.05555181620336093, "grad_norm": 0.571832513899835, "learning_rate": 1.8516302722105894e-06, "loss": 0.6796, "step": 619 }, { "epoch": 0.05564156065603195, "grad_norm": 0.5662858656202006, "learning_rate": 1.854621597367634e-06, "loss": 0.6754, "step": 620 }, { "epoch": 0.055731305108702965, "grad_norm": 0.5189396892041145, "learning_rate": 1.8576129225246786e-06, "loss": 0.5674, "step": 621 }, { "epoch": 0.05582104956137399, "grad_norm": 0.5093465604339215, "learning_rate": 1.8606042476817231e-06, "loss": 0.6233, "step": 622 }, { "epoch": 0.05591079401404501, "grad_norm": 0.5596256834646729, "learning_rate": 1.8635955728387679e-06, "loss": 0.6051, "step": 623 }, { "epoch": 0.05600053846671603, "grad_norm": 0.613699517920912, "learning_rate": 1.8665868979958124e-06, "loss": 0.6367, "step": 624 }, { "epoch": 0.056090282919387044, "grad_norm": 0.6329304109150851, "learning_rate": 1.869578223152857e-06, "loss": 0.7282, "step": 625 }, { "epoch": 0.056180027372058065, "grad_norm": 0.5622738804373987, "learning_rate": 1.8725695483099016e-06, "loss": 0.6091, "step": 626 }, { "epoch": 0.05626977182472909, "grad_norm": 0.5494579259054229, "learning_rate": 1.875560873466946e-06, "loss": 0.5886, "step": 627 }, { "epoch": 0.0563595162774001, "grad_norm": 0.5755434029039578, "learning_rate": 1.8785521986239905e-06, "loss": 0.6546, "step": 628 }, { "epoch": 0.05644926073007112, "grad_norm": 0.5812324851399044, "learning_rate": 1.881543523781035e-06, "loss": 0.674, "step": 629 }, { "epoch": 0.056539005182742144, "grad_norm": 0.6260444099537925, "learning_rate": 1.8845348489380796e-06, "loss": 0.5918, "step": 630 }, { "epoch": 0.05662874963541316, "grad_norm": 0.5796840011536103, "learning_rate": 1.8875261740951241e-06, "loss": 0.6002, "step": 631 }, { "epoch": 0.05671849408808418, "grad_norm": 0.5440721165365929, "learning_rate": 1.890517499252169e-06, "loss": 0.5678, "step": 632 }, { "epoch": 0.0568082385407552, "grad_norm": 0.5404895675338827, "learning_rate": 1.8935088244092135e-06, "loss": 0.61, "step": 633 }, { "epoch": 0.056897982993426216, "grad_norm": 0.5801265775579916, "learning_rate": 1.896500149566258e-06, "loss": 0.6247, "step": 634 }, { "epoch": 0.05698772744609724, "grad_norm": 0.5709853371252378, "learning_rate": 1.8994914747233026e-06, "loss": 0.5931, "step": 635 }, { "epoch": 0.05707747189876826, "grad_norm": 0.5714764021048876, "learning_rate": 1.9024827998803471e-06, "loss": 0.575, "step": 636 }, { "epoch": 0.05716721635143927, "grad_norm": 0.7074812979326809, "learning_rate": 1.9054741250373917e-06, "loss": 0.7192, "step": 637 }, { "epoch": 0.057256960804110295, "grad_norm": 0.5420958337718326, "learning_rate": 1.9084654501944363e-06, "loss": 0.6056, "step": 638 }, { "epoch": 0.057346705256781316, "grad_norm": 0.5729596777690799, "learning_rate": 1.911456775351481e-06, "loss": 0.5899, "step": 639 }, { "epoch": 0.05743644970945234, "grad_norm": 0.5490927359418484, "learning_rate": 1.9144481005085254e-06, "loss": 0.5597, "step": 640 }, { "epoch": 0.05752619416212335, "grad_norm": 0.5635026475504599, "learning_rate": 1.91743942566557e-06, "loss": 0.6031, "step": 641 }, { "epoch": 0.057615938614794374, "grad_norm": 0.5529102462956882, "learning_rate": 1.920430750822615e-06, "loss": 0.6072, "step": 642 }, { "epoch": 0.057705683067465395, "grad_norm": 0.5657748879393452, "learning_rate": 1.923422075979659e-06, "loss": 0.6463, "step": 643 }, { "epoch": 0.05779542752013641, "grad_norm": 0.5907259181919069, "learning_rate": 1.9264134011367036e-06, "loss": 0.5663, "step": 644 }, { "epoch": 0.05788517197280743, "grad_norm": 0.496842920205585, "learning_rate": 1.929404726293748e-06, "loss": 0.5715, "step": 645 }, { "epoch": 0.05797491642547845, "grad_norm": 0.5402190493700273, "learning_rate": 1.9323960514507927e-06, "loss": 0.6237, "step": 646 }, { "epoch": 0.05806466087814947, "grad_norm": 0.528754318486535, "learning_rate": 1.9353873766078373e-06, "loss": 0.5192, "step": 647 }, { "epoch": 0.05815440533082049, "grad_norm": 0.5350343235063516, "learning_rate": 1.938378701764882e-06, "loss": 0.6081, "step": 648 }, { "epoch": 0.05824414978349151, "grad_norm": 0.5559593040167691, "learning_rate": 1.9413700269219264e-06, "loss": 0.6112, "step": 649 }, { "epoch": 0.058333894236162524, "grad_norm": 0.5416856929446983, "learning_rate": 1.944361352078971e-06, "loss": 0.6159, "step": 650 }, { "epoch": 0.058423638688833546, "grad_norm": 0.6675998768869439, "learning_rate": 1.947352677236016e-06, "loss": 0.6029, "step": 651 }, { "epoch": 0.05851338314150457, "grad_norm": 0.5565186629571028, "learning_rate": 1.9503440023930605e-06, "loss": 0.5548, "step": 652 }, { "epoch": 0.05860312759417558, "grad_norm": 0.5040838727222228, "learning_rate": 1.953335327550105e-06, "loss": 0.5986, "step": 653 }, { "epoch": 0.0586928720468466, "grad_norm": 0.5708532463601382, "learning_rate": 1.9563266527071496e-06, "loss": 0.6412, "step": 654 }, { "epoch": 0.058782616499517625, "grad_norm": 0.5759823095093577, "learning_rate": 1.959317977864194e-06, "loss": 0.6437, "step": 655 }, { "epoch": 0.058872360952188646, "grad_norm": 0.5943816597259176, "learning_rate": 1.9623093030212387e-06, "loss": 0.6148, "step": 656 }, { "epoch": 0.05896210540485966, "grad_norm": 0.5313082913878187, "learning_rate": 1.9653006281782833e-06, "loss": 0.577, "step": 657 }, { "epoch": 0.05905184985753068, "grad_norm": 0.5650371242034214, "learning_rate": 1.9682919533353274e-06, "loss": 0.5981, "step": 658 }, { "epoch": 0.0591415943102017, "grad_norm": 0.5521334980699059, "learning_rate": 1.971283278492372e-06, "loss": 0.6702, "step": 659 }, { "epoch": 0.05923133876287272, "grad_norm": 0.5177571926539168, "learning_rate": 1.974274603649417e-06, "loss": 0.6169, "step": 660 }, { "epoch": 0.05932108321554374, "grad_norm": 0.6102173227279462, "learning_rate": 1.9772659288064615e-06, "loss": 0.5771, "step": 661 }, { "epoch": 0.05941082766821476, "grad_norm": 0.546091059094992, "learning_rate": 1.980257253963506e-06, "loss": 0.5595, "step": 662 }, { "epoch": 0.059500572120885775, "grad_norm": 0.5341178660299266, "learning_rate": 1.9832485791205506e-06, "loss": 0.6274, "step": 663 }, { "epoch": 0.0595903165735568, "grad_norm": 0.58202259276606, "learning_rate": 1.986239904277595e-06, "loss": 0.6117, "step": 664 }, { "epoch": 0.05968006102622782, "grad_norm": 0.5417328722563024, "learning_rate": 1.9892312294346398e-06, "loss": 0.5356, "step": 665 }, { "epoch": 0.05976980547889883, "grad_norm": 0.6273068973879422, "learning_rate": 1.9922225545916843e-06, "loss": 0.6921, "step": 666 }, { "epoch": 0.059859549931569854, "grad_norm": 0.5827824631802684, "learning_rate": 1.995213879748729e-06, "loss": 0.608, "step": 667 }, { "epoch": 0.059949294384240875, "grad_norm": 0.5743971034327195, "learning_rate": 1.9982052049057734e-06, "loss": 0.6793, "step": 668 }, { "epoch": 0.06003903883691189, "grad_norm": 0.5510103875302008, "learning_rate": 2.001196530062818e-06, "loss": 0.6186, "step": 669 }, { "epoch": 0.06012878328958291, "grad_norm": 0.5642473583399399, "learning_rate": 2.0041878552198625e-06, "loss": 0.6905, "step": 670 }, { "epoch": 0.06021852774225393, "grad_norm": 0.5295849451044828, "learning_rate": 2.007179180376907e-06, "loss": 0.6118, "step": 671 }, { "epoch": 0.060308272194924954, "grad_norm": 0.5465481848727465, "learning_rate": 2.0101705055339517e-06, "loss": 0.553, "step": 672 }, { "epoch": 0.06039801664759597, "grad_norm": 0.5549294015774784, "learning_rate": 2.0131618306909962e-06, "loss": 0.5806, "step": 673 }, { "epoch": 0.06048776110026699, "grad_norm": 0.5589562037879691, "learning_rate": 2.0161531558480408e-06, "loss": 0.6443, "step": 674 }, { "epoch": 0.06057750555293801, "grad_norm": 0.5655936705659278, "learning_rate": 2.0191444810050853e-06, "loss": 0.6452, "step": 675 }, { "epoch": 0.060667250005609026, "grad_norm": 0.5071827570287784, "learning_rate": 2.02213580616213e-06, "loss": 0.5246, "step": 676 }, { "epoch": 0.06075699445828005, "grad_norm": 0.5877595576082383, "learning_rate": 2.0251271313191745e-06, "loss": 0.6293, "step": 677 }, { "epoch": 0.06084673891095107, "grad_norm": 0.6219662333329985, "learning_rate": 2.028118456476219e-06, "loss": 0.6658, "step": 678 }, { "epoch": 0.06093648336362208, "grad_norm": 0.5777175278475044, "learning_rate": 2.0311097816332636e-06, "loss": 0.5899, "step": 679 }, { "epoch": 0.061026227816293105, "grad_norm": 0.5652942663671743, "learning_rate": 2.034101106790308e-06, "loss": 0.5814, "step": 680 }, { "epoch": 0.061115972268964126, "grad_norm": 0.5767566004357185, "learning_rate": 2.0370924319473527e-06, "loss": 0.5907, "step": 681 }, { "epoch": 0.06120571672163514, "grad_norm": 0.5643185145255483, "learning_rate": 2.0400837571043972e-06, "loss": 0.6036, "step": 682 }, { "epoch": 0.06129546117430616, "grad_norm": 0.5909447442512258, "learning_rate": 2.0430750822614422e-06, "loss": 0.6443, "step": 683 }, { "epoch": 0.061385205626977184, "grad_norm": 0.5800023910166985, "learning_rate": 2.0460664074184868e-06, "loss": 0.6142, "step": 684 }, { "epoch": 0.061474950079648205, "grad_norm": 0.5767277110377851, "learning_rate": 2.0490577325755313e-06, "loss": 0.6015, "step": 685 }, { "epoch": 0.06156469453231922, "grad_norm": 0.6280438589939755, "learning_rate": 2.052049057732576e-06, "loss": 0.6866, "step": 686 }, { "epoch": 0.06165443898499024, "grad_norm": 0.5655330410349949, "learning_rate": 2.0550403828896205e-06, "loss": 0.6003, "step": 687 }, { "epoch": 0.06174418343766126, "grad_norm": 0.6002693720269004, "learning_rate": 2.058031708046665e-06, "loss": 0.6661, "step": 688 }, { "epoch": 0.06183392789033228, "grad_norm": 0.5251015458209229, "learning_rate": 2.061023033203709e-06, "loss": 0.5858, "step": 689 }, { "epoch": 0.0619236723430033, "grad_norm": 0.5347945517465951, "learning_rate": 2.0640143583607537e-06, "loss": 0.5715, "step": 690 }, { "epoch": 0.06201341679567432, "grad_norm": 0.5675269209121994, "learning_rate": 2.0670056835177983e-06, "loss": 0.5786, "step": 691 }, { "epoch": 0.062103161248345334, "grad_norm": 0.5834793562616617, "learning_rate": 2.0699970086748433e-06, "loss": 0.6487, "step": 692 }, { "epoch": 0.062192905701016356, "grad_norm": 0.6188831950395486, "learning_rate": 2.072988333831888e-06, "loss": 0.6834, "step": 693 }, { "epoch": 0.06228265015368738, "grad_norm": 0.5876919020243466, "learning_rate": 2.0759796589889324e-06, "loss": 0.6126, "step": 694 }, { "epoch": 0.06237239460635839, "grad_norm": 0.5726838050147156, "learning_rate": 2.078970984145977e-06, "loss": 0.6072, "step": 695 }, { "epoch": 0.06246213905902941, "grad_norm": 0.5255302974995429, "learning_rate": 2.0819623093030215e-06, "loss": 0.5417, "step": 696 }, { "epoch": 0.06255188351170043, "grad_norm": 0.5847758028048807, "learning_rate": 2.084953634460066e-06, "loss": 0.6735, "step": 697 }, { "epoch": 0.06264162796437145, "grad_norm": 0.5312288867334547, "learning_rate": 2.0879449596171106e-06, "loss": 0.6186, "step": 698 }, { "epoch": 0.06273137241704248, "grad_norm": 0.5143743240498135, "learning_rate": 2.090936284774155e-06, "loss": 0.5399, "step": 699 }, { "epoch": 0.06282111686971349, "grad_norm": 0.6586758805470901, "learning_rate": 2.0939276099311997e-06, "loss": 0.6918, "step": 700 }, { "epoch": 0.0629108613223845, "grad_norm": 0.611288223754298, "learning_rate": 2.0969189350882443e-06, "loss": 0.6581, "step": 701 }, { "epoch": 0.06300060577505553, "grad_norm": 0.5498020913937606, "learning_rate": 2.099910260245289e-06, "loss": 0.5381, "step": 702 }, { "epoch": 0.06309035022772655, "grad_norm": 0.5404562792903514, "learning_rate": 2.1029015854023334e-06, "loss": 0.5466, "step": 703 }, { "epoch": 0.06318009468039756, "grad_norm": 0.5450082384210092, "learning_rate": 2.105892910559378e-06, "loss": 0.5987, "step": 704 }, { "epoch": 0.06326983913306859, "grad_norm": 0.6063471485808086, "learning_rate": 2.1088842357164225e-06, "loss": 0.652, "step": 705 }, { "epoch": 0.0633595835857396, "grad_norm": 0.4934524337763134, "learning_rate": 2.111875560873467e-06, "loss": 0.5872, "step": 706 }, { "epoch": 0.06344932803841062, "grad_norm": 0.6047342981142998, "learning_rate": 2.1148668860305116e-06, "loss": 0.6166, "step": 707 }, { "epoch": 0.06353907249108165, "grad_norm": 0.5044869191495578, "learning_rate": 2.117858211187556e-06, "loss": 0.5066, "step": 708 }, { "epoch": 0.06362881694375266, "grad_norm": 0.5252072755178957, "learning_rate": 2.1208495363446007e-06, "loss": 0.5743, "step": 709 }, { "epoch": 0.06371856139642368, "grad_norm": 0.5523589324869942, "learning_rate": 2.1238408615016453e-06, "loss": 0.5638, "step": 710 }, { "epoch": 0.0638083058490947, "grad_norm": 0.5428574080870123, "learning_rate": 2.12683218665869e-06, "loss": 0.6596, "step": 711 }, { "epoch": 0.06389805030176572, "grad_norm": 0.603115897131115, "learning_rate": 2.1298235118157344e-06, "loss": 0.6528, "step": 712 }, { "epoch": 0.06398779475443674, "grad_norm": 0.5488771274826679, "learning_rate": 2.132814836972779e-06, "loss": 0.5765, "step": 713 }, { "epoch": 0.06407753920710776, "grad_norm": 0.5007454336814826, "learning_rate": 2.1358061621298235e-06, "loss": 0.524, "step": 714 }, { "epoch": 0.06416728365977878, "grad_norm": 0.5626566272704187, "learning_rate": 2.1387974872868685e-06, "loss": 0.5702, "step": 715 }, { "epoch": 0.0642570281124498, "grad_norm": 0.6044958701053016, "learning_rate": 2.141788812443913e-06, "loss": 0.5672, "step": 716 }, { "epoch": 0.06434677256512082, "grad_norm": 0.6113939579615428, "learning_rate": 2.1447801376009576e-06, "loss": 0.6499, "step": 717 }, { "epoch": 0.06443651701779184, "grad_norm": 0.5919031026008333, "learning_rate": 2.147771462758002e-06, "loss": 0.6773, "step": 718 }, { "epoch": 0.06452626147046285, "grad_norm": 0.5415010699750028, "learning_rate": 2.1507627879150468e-06, "loss": 0.5803, "step": 719 }, { "epoch": 0.06461600592313388, "grad_norm": 0.5617765173373872, "learning_rate": 2.153754113072091e-06, "loss": 0.6114, "step": 720 }, { "epoch": 0.0647057503758049, "grad_norm": 0.49917924903644784, "learning_rate": 2.1567454382291354e-06, "loss": 0.5172, "step": 721 }, { "epoch": 0.06479549482847591, "grad_norm": 0.5613418990685882, "learning_rate": 2.15973676338618e-06, "loss": 0.5367, "step": 722 }, { "epoch": 0.06488523928114694, "grad_norm": 0.5499674012826711, "learning_rate": 2.1627280885432246e-06, "loss": 0.6204, "step": 723 }, { "epoch": 0.06497498373381795, "grad_norm": 0.5742868680770419, "learning_rate": 2.1657194137002695e-06, "loss": 0.5958, "step": 724 }, { "epoch": 0.06506472818648898, "grad_norm": 0.521774739061049, "learning_rate": 2.168710738857314e-06, "loss": 0.6087, "step": 725 }, { "epoch": 0.06515447263916, "grad_norm": 0.5673799971366897, "learning_rate": 2.1717020640143587e-06, "loss": 0.6408, "step": 726 }, { "epoch": 0.06524421709183101, "grad_norm": 0.5066011082053112, "learning_rate": 2.1746933891714032e-06, "loss": 0.518, "step": 727 }, { "epoch": 0.06533396154450204, "grad_norm": 0.5679337730892796, "learning_rate": 2.1776847143284478e-06, "loss": 0.6292, "step": 728 }, { "epoch": 0.06542370599717305, "grad_norm": 0.5388396701056952, "learning_rate": 2.1806760394854923e-06, "loss": 0.5724, "step": 729 }, { "epoch": 0.06551345044984407, "grad_norm": 0.5065552333793318, "learning_rate": 2.183667364642537e-06, "loss": 0.5695, "step": 730 }, { "epoch": 0.0656031949025151, "grad_norm": 0.5939205840443695, "learning_rate": 2.1866586897995814e-06, "loss": 0.6064, "step": 731 }, { "epoch": 0.06569293935518611, "grad_norm": 0.5253633873072486, "learning_rate": 2.189650014956626e-06, "loss": 0.6004, "step": 732 }, { "epoch": 0.06578268380785712, "grad_norm": 0.5147121321169114, "learning_rate": 2.1926413401136706e-06, "loss": 0.5337, "step": 733 }, { "epoch": 0.06587242826052815, "grad_norm": 0.601143961199377, "learning_rate": 2.195632665270715e-06, "loss": 0.5892, "step": 734 }, { "epoch": 0.06596217271319917, "grad_norm": 0.5345360732337785, "learning_rate": 2.1986239904277597e-06, "loss": 0.5848, "step": 735 }, { "epoch": 0.06605191716587018, "grad_norm": 0.6102736682795898, "learning_rate": 2.2016153155848042e-06, "loss": 0.6085, "step": 736 }, { "epoch": 0.06614166161854121, "grad_norm": 0.5387211535897537, "learning_rate": 2.204606640741849e-06, "loss": 0.5898, "step": 737 }, { "epoch": 0.06623140607121222, "grad_norm": 0.5744970803755456, "learning_rate": 2.2075979658988934e-06, "loss": 0.663, "step": 738 }, { "epoch": 0.06632115052388324, "grad_norm": 0.585180631322379, "learning_rate": 2.210589291055938e-06, "loss": 0.6321, "step": 739 }, { "epoch": 0.06641089497655427, "grad_norm": 0.535075600807479, "learning_rate": 2.2135806162129825e-06, "loss": 0.6062, "step": 740 }, { "epoch": 0.06650063942922528, "grad_norm": 0.5429930689913632, "learning_rate": 2.216571941370027e-06, "loss": 0.6189, "step": 741 }, { "epoch": 0.0665903838818963, "grad_norm": 0.5660273184984203, "learning_rate": 2.2195632665270716e-06, "loss": 0.5543, "step": 742 }, { "epoch": 0.06668012833456732, "grad_norm": 0.5631778896082438, "learning_rate": 2.222554591684116e-06, "loss": 0.6069, "step": 743 }, { "epoch": 0.06676987278723834, "grad_norm": 0.5739091253721704, "learning_rate": 2.2255459168411607e-06, "loss": 0.5893, "step": 744 }, { "epoch": 0.06685961723990935, "grad_norm": 0.5516720904202668, "learning_rate": 2.2285372419982053e-06, "loss": 0.6486, "step": 745 }, { "epoch": 0.06694936169258038, "grad_norm": 0.5451630471040406, "learning_rate": 2.23152856715525e-06, "loss": 0.5578, "step": 746 }, { "epoch": 0.0670391061452514, "grad_norm": 0.6078507972687773, "learning_rate": 2.234519892312295e-06, "loss": 0.5991, "step": 747 }, { "epoch": 0.06712885059792241, "grad_norm": 0.5297932620900028, "learning_rate": 2.2375112174693394e-06, "loss": 0.533, "step": 748 }, { "epoch": 0.06721859505059344, "grad_norm": 0.5232378385213327, "learning_rate": 2.240502542626384e-06, "loss": 0.5776, "step": 749 }, { "epoch": 0.06730833950326445, "grad_norm": 0.588297532689172, "learning_rate": 2.2434938677834285e-06, "loss": 0.6104, "step": 750 }, { "epoch": 0.06739808395593547, "grad_norm": 0.5529040460959067, "learning_rate": 2.2464851929404726e-06, "loss": 0.5925, "step": 751 }, { "epoch": 0.0674878284086065, "grad_norm": 0.5894138843546038, "learning_rate": 2.249476518097517e-06, "loss": 0.6609, "step": 752 }, { "epoch": 0.06757757286127751, "grad_norm": 0.5508826663364192, "learning_rate": 2.2524678432545617e-06, "loss": 0.642, "step": 753 }, { "epoch": 0.06766731731394854, "grad_norm": 0.601900830411943, "learning_rate": 2.2554591684116063e-06, "loss": 0.5957, "step": 754 }, { "epoch": 0.06775706176661955, "grad_norm": 0.5574067030349251, "learning_rate": 2.258450493568651e-06, "loss": 0.5965, "step": 755 }, { "epoch": 0.06784680621929057, "grad_norm": 0.5422997458112636, "learning_rate": 2.261441818725696e-06, "loss": 0.5652, "step": 756 }, { "epoch": 0.0679365506719616, "grad_norm": 0.5008074780709415, "learning_rate": 2.2644331438827404e-06, "loss": 0.5385, "step": 757 }, { "epoch": 0.06802629512463261, "grad_norm": 0.5191197964508725, "learning_rate": 2.267424469039785e-06, "loss": 0.6113, "step": 758 }, { "epoch": 0.06811603957730362, "grad_norm": 0.5311967339381762, "learning_rate": 2.2704157941968295e-06, "loss": 0.6088, "step": 759 }, { "epoch": 0.06820578402997465, "grad_norm": 0.5637315336401888, "learning_rate": 2.273407119353874e-06, "loss": 0.5936, "step": 760 }, { "epoch": 0.06829552848264567, "grad_norm": 0.5563862255001771, "learning_rate": 2.2763984445109186e-06, "loss": 0.5681, "step": 761 }, { "epoch": 0.06838527293531668, "grad_norm": 0.6110565969448731, "learning_rate": 2.279389769667963e-06, "loss": 0.6532, "step": 762 }, { "epoch": 0.06847501738798771, "grad_norm": 0.5614240700451261, "learning_rate": 2.2823810948250077e-06, "loss": 0.5868, "step": 763 }, { "epoch": 0.06856476184065873, "grad_norm": 0.5946282593236358, "learning_rate": 2.2853724199820523e-06, "loss": 0.6454, "step": 764 }, { "epoch": 0.06865450629332974, "grad_norm": 0.568557081043481, "learning_rate": 2.288363745139097e-06, "loss": 0.5807, "step": 765 }, { "epoch": 0.06874425074600077, "grad_norm": 0.5516602803240361, "learning_rate": 2.2913550702961414e-06, "loss": 0.6016, "step": 766 }, { "epoch": 0.06883399519867178, "grad_norm": 0.5565914315546214, "learning_rate": 2.294346395453186e-06, "loss": 0.4976, "step": 767 }, { "epoch": 0.0689237396513428, "grad_norm": 0.5091796915716339, "learning_rate": 2.2973377206102305e-06, "loss": 0.5698, "step": 768 }, { "epoch": 0.06901348410401383, "grad_norm": 0.6289596086075698, "learning_rate": 2.300329045767275e-06, "loss": 0.7041, "step": 769 }, { "epoch": 0.06910322855668484, "grad_norm": 0.5039122993794602, "learning_rate": 2.3033203709243196e-06, "loss": 0.5035, "step": 770 }, { "epoch": 0.06919297300935585, "grad_norm": 0.5317688890250607, "learning_rate": 2.306311696081364e-06, "loss": 0.5982, "step": 771 }, { "epoch": 0.06928271746202688, "grad_norm": 0.5566439882724576, "learning_rate": 2.3093030212384088e-06, "loss": 0.6146, "step": 772 }, { "epoch": 0.0693724619146979, "grad_norm": 0.5686646392907094, "learning_rate": 2.3122943463954533e-06, "loss": 0.6282, "step": 773 }, { "epoch": 0.06946220636736891, "grad_norm": 0.5082085528453391, "learning_rate": 2.315285671552498e-06, "loss": 0.5366, "step": 774 }, { "epoch": 0.06955195082003994, "grad_norm": 0.552499608715381, "learning_rate": 2.3182769967095424e-06, "loss": 0.6119, "step": 775 }, { "epoch": 0.06964169527271095, "grad_norm": 0.6259732330255502, "learning_rate": 2.321268321866587e-06, "loss": 0.6542, "step": 776 }, { "epoch": 0.06973143972538197, "grad_norm": 0.5668420219978624, "learning_rate": 2.3242596470236316e-06, "loss": 0.5415, "step": 777 }, { "epoch": 0.069821184178053, "grad_norm": 0.6052811275123866, "learning_rate": 2.327250972180676e-06, "loss": 0.7091, "step": 778 }, { "epoch": 0.06991092863072401, "grad_norm": 0.47548494489563503, "learning_rate": 2.330242297337721e-06, "loss": 0.4955, "step": 779 }, { "epoch": 0.07000067308339503, "grad_norm": 0.5288898454739812, "learning_rate": 2.3332336224947657e-06, "loss": 0.5313, "step": 780 }, { "epoch": 0.07009041753606605, "grad_norm": 0.5154837922970635, "learning_rate": 2.3362249476518102e-06, "loss": 0.5079, "step": 781 }, { "epoch": 0.07018016198873707, "grad_norm": 0.5571028929947841, "learning_rate": 2.3392162728088543e-06, "loss": 0.6063, "step": 782 }, { "epoch": 0.07026990644140808, "grad_norm": 0.5391595943755743, "learning_rate": 2.342207597965899e-06, "loss": 0.5946, "step": 783 }, { "epoch": 0.07035965089407911, "grad_norm": 0.497860699470397, "learning_rate": 2.3451989231229435e-06, "loss": 0.5504, "step": 784 }, { "epoch": 0.07044939534675013, "grad_norm": 0.4986204329488697, "learning_rate": 2.348190248279988e-06, "loss": 0.5117, "step": 785 }, { "epoch": 0.07053913979942115, "grad_norm": 0.5792252565979341, "learning_rate": 2.3511815734370326e-06, "loss": 0.6509, "step": 786 }, { "epoch": 0.07062888425209217, "grad_norm": 0.5443269051905749, "learning_rate": 2.354172898594077e-06, "loss": 0.6006, "step": 787 }, { "epoch": 0.07071862870476318, "grad_norm": 0.5358607790964909, "learning_rate": 2.357164223751122e-06, "loss": 0.5545, "step": 788 }, { "epoch": 0.07080837315743421, "grad_norm": 0.539229454682394, "learning_rate": 2.3601555489081667e-06, "loss": 0.5076, "step": 789 }, { "epoch": 0.07089811761010523, "grad_norm": 0.5251110630445146, "learning_rate": 2.3631468740652112e-06, "loss": 0.5996, "step": 790 }, { "epoch": 0.07098786206277624, "grad_norm": 0.5606058660058411, "learning_rate": 2.366138199222256e-06, "loss": 0.5723, "step": 791 }, { "epoch": 0.07107760651544727, "grad_norm": 0.609654334899023, "learning_rate": 2.3691295243793004e-06, "loss": 0.7061, "step": 792 }, { "epoch": 0.07116735096811828, "grad_norm": 0.4843576955817518, "learning_rate": 2.372120849536345e-06, "loss": 0.4902, "step": 793 }, { "epoch": 0.0712570954207893, "grad_norm": 0.5726692610738975, "learning_rate": 2.3751121746933895e-06, "loss": 0.5934, "step": 794 }, { "epoch": 0.07134683987346033, "grad_norm": 0.5223348832715162, "learning_rate": 2.378103499850434e-06, "loss": 0.5386, "step": 795 }, { "epoch": 0.07143658432613134, "grad_norm": 0.5431922874042706, "learning_rate": 2.3810948250074786e-06, "loss": 0.5988, "step": 796 }, { "epoch": 0.07152632877880236, "grad_norm": 0.5184327806365285, "learning_rate": 2.384086150164523e-06, "loss": 0.5772, "step": 797 }, { "epoch": 0.07161607323147338, "grad_norm": 0.5305781494121968, "learning_rate": 2.3870774753215677e-06, "loss": 0.6047, "step": 798 }, { "epoch": 0.0717058176841444, "grad_norm": 0.5349978079572019, "learning_rate": 2.3900688004786123e-06, "loss": 0.6042, "step": 799 }, { "epoch": 0.07179556213681541, "grad_norm": 0.5390604556359342, "learning_rate": 2.393060125635657e-06, "loss": 0.6277, "step": 800 }, { "epoch": 0.07188530658948644, "grad_norm": 0.5184869221804614, "learning_rate": 2.3960514507927014e-06, "loss": 0.5407, "step": 801 }, { "epoch": 0.07197505104215746, "grad_norm": 0.5142987741698496, "learning_rate": 2.399042775949746e-06, "loss": 0.5672, "step": 802 }, { "epoch": 0.07206479549482847, "grad_norm": 0.5770776928214469, "learning_rate": 2.4020341011067905e-06, "loss": 0.5451, "step": 803 }, { "epoch": 0.0721545399474995, "grad_norm": 0.4858596809816261, "learning_rate": 2.405025426263835e-06, "loss": 0.4884, "step": 804 }, { "epoch": 0.07224428440017051, "grad_norm": 0.5471807833919408, "learning_rate": 2.4080167514208796e-06, "loss": 0.5471, "step": 805 }, { "epoch": 0.07233402885284153, "grad_norm": 0.5365383154440108, "learning_rate": 2.411008076577924e-06, "loss": 0.5293, "step": 806 }, { "epoch": 0.07242377330551256, "grad_norm": 0.5855068669371137, "learning_rate": 2.4139994017349687e-06, "loss": 0.5999, "step": 807 }, { "epoch": 0.07251351775818357, "grad_norm": 0.5408678851121348, "learning_rate": 2.4169907268920133e-06, "loss": 0.5977, "step": 808 }, { "epoch": 0.07260326221085459, "grad_norm": 0.57877101476343, "learning_rate": 2.419982052049058e-06, "loss": 0.6444, "step": 809 }, { "epoch": 0.07269300666352561, "grad_norm": 0.48876216461124183, "learning_rate": 2.4229733772061024e-06, "loss": 0.5451, "step": 810 }, { "epoch": 0.07278275111619663, "grad_norm": 0.5253193800381714, "learning_rate": 2.4259647023631474e-06, "loss": 0.5051, "step": 811 }, { "epoch": 0.07287249556886764, "grad_norm": 0.5398673929899848, "learning_rate": 2.428956027520192e-06, "loss": 0.5437, "step": 812 }, { "epoch": 0.07296224002153867, "grad_norm": 0.6435515453517863, "learning_rate": 2.431947352677236e-06, "loss": 0.667, "step": 813 }, { "epoch": 0.07305198447420969, "grad_norm": 0.512180898978296, "learning_rate": 2.4349386778342806e-06, "loss": 0.5605, "step": 814 }, { "epoch": 0.0731417289268807, "grad_norm": 0.5645301727352668, "learning_rate": 2.437930002991325e-06, "loss": 0.6262, "step": 815 }, { "epoch": 0.07323147337955173, "grad_norm": 0.5564404496182122, "learning_rate": 2.4409213281483698e-06, "loss": 0.5882, "step": 816 }, { "epoch": 0.07332121783222274, "grad_norm": 0.4811359535114916, "learning_rate": 2.4439126533054143e-06, "loss": 0.5328, "step": 817 }, { "epoch": 0.07341096228489377, "grad_norm": 0.5758040492079578, "learning_rate": 2.446903978462459e-06, "loss": 0.6269, "step": 818 }, { "epoch": 0.07350070673756479, "grad_norm": 0.49923626139929056, "learning_rate": 2.4498953036195034e-06, "loss": 0.5324, "step": 819 }, { "epoch": 0.0735904511902358, "grad_norm": 0.5392385465421619, "learning_rate": 2.4528866287765484e-06, "loss": 0.5438, "step": 820 }, { "epoch": 0.07368019564290683, "grad_norm": 0.560327755329399, "learning_rate": 2.455877953933593e-06, "loss": 0.5875, "step": 821 }, { "epoch": 0.07376994009557784, "grad_norm": 0.5531535493997993, "learning_rate": 2.4588692790906375e-06, "loss": 0.6511, "step": 822 }, { "epoch": 0.07385968454824886, "grad_norm": 0.5819334581106109, "learning_rate": 2.461860604247682e-06, "loss": 0.6123, "step": 823 }, { "epoch": 0.07394942900091989, "grad_norm": 0.5053846137987426, "learning_rate": 2.4648519294047266e-06, "loss": 0.5138, "step": 824 }, { "epoch": 0.0740391734535909, "grad_norm": 0.5461058791275101, "learning_rate": 2.467843254561771e-06, "loss": 0.631, "step": 825 }, { "epoch": 0.07412891790626192, "grad_norm": 0.5603594201743465, "learning_rate": 2.4708345797188158e-06, "loss": 0.5847, "step": 826 }, { "epoch": 0.07421866235893294, "grad_norm": 0.5214905848686209, "learning_rate": 2.4738259048758603e-06, "loss": 0.4947, "step": 827 }, { "epoch": 0.07430840681160396, "grad_norm": 0.5633886298745292, "learning_rate": 2.476817230032905e-06, "loss": 0.6343, "step": 828 }, { "epoch": 0.07439815126427497, "grad_norm": 0.5367097679217593, "learning_rate": 2.4798085551899494e-06, "loss": 0.5189, "step": 829 }, { "epoch": 0.074487895716946, "grad_norm": 0.6024350711406807, "learning_rate": 2.482799880346994e-06, "loss": 0.6528, "step": 830 }, { "epoch": 0.07457764016961702, "grad_norm": 0.6119510061847355, "learning_rate": 2.4857912055040386e-06, "loss": 0.6721, "step": 831 }, { "epoch": 0.07466738462228803, "grad_norm": 0.5980877544987349, "learning_rate": 2.488782530661083e-06, "loss": 0.6848, "step": 832 }, { "epoch": 0.07475712907495906, "grad_norm": 0.5009727309465613, "learning_rate": 2.4917738558181277e-06, "loss": 0.5464, "step": 833 }, { "epoch": 0.07484687352763007, "grad_norm": 0.532140474113337, "learning_rate": 2.4947651809751722e-06, "loss": 0.5328, "step": 834 }, { "epoch": 0.07493661798030109, "grad_norm": 0.5374056909157032, "learning_rate": 2.4977565061322168e-06, "loss": 0.5935, "step": 835 }, { "epoch": 0.07502636243297212, "grad_norm": 0.5389798921984494, "learning_rate": 2.5007478312892613e-06, "loss": 0.6032, "step": 836 }, { "epoch": 0.07511610688564313, "grad_norm": 0.5088550370065843, "learning_rate": 2.503739156446306e-06, "loss": 0.506, "step": 837 }, { "epoch": 0.07520585133831414, "grad_norm": 0.5225120843055461, "learning_rate": 2.5067304816033505e-06, "loss": 0.6089, "step": 838 }, { "epoch": 0.07529559579098517, "grad_norm": 0.5325592925880234, "learning_rate": 2.509721806760395e-06, "loss": 0.6114, "step": 839 }, { "epoch": 0.07538534024365619, "grad_norm": 0.5311690829944331, "learning_rate": 2.5127131319174396e-06, "loss": 0.5062, "step": 840 }, { "epoch": 0.0754750846963272, "grad_norm": 0.5599985509025555, "learning_rate": 2.515704457074484e-06, "loss": 0.594, "step": 841 }, { "epoch": 0.07556482914899823, "grad_norm": 0.5184011217045648, "learning_rate": 2.5186957822315287e-06, "loss": 0.5227, "step": 842 }, { "epoch": 0.07565457360166924, "grad_norm": 0.5412848135257948, "learning_rate": 2.5216871073885737e-06, "loss": 0.6274, "step": 843 }, { "epoch": 0.07574431805434026, "grad_norm": 0.5462847604064038, "learning_rate": 2.5246784325456182e-06, "loss": 0.6454, "step": 844 }, { "epoch": 0.07583406250701129, "grad_norm": 0.5491997318320664, "learning_rate": 2.527669757702663e-06, "loss": 0.6249, "step": 845 }, { "epoch": 0.0759238069596823, "grad_norm": 0.5674265521272716, "learning_rate": 2.5306610828597074e-06, "loss": 0.628, "step": 846 }, { "epoch": 0.07601355141235333, "grad_norm": 0.5421415168785975, "learning_rate": 2.533652408016752e-06, "loss": 0.5764, "step": 847 }, { "epoch": 0.07610329586502435, "grad_norm": 0.5218373767516139, "learning_rate": 2.5366437331737965e-06, "loss": 0.5297, "step": 848 }, { "epoch": 0.07619304031769536, "grad_norm": 0.5607092880095358, "learning_rate": 2.539635058330841e-06, "loss": 0.5724, "step": 849 }, { "epoch": 0.07628278477036639, "grad_norm": 0.6123174558234901, "learning_rate": 2.5426263834878856e-06, "loss": 0.6385, "step": 850 }, { "epoch": 0.0763725292230374, "grad_norm": 0.6167409781693516, "learning_rate": 2.54561770864493e-06, "loss": 0.634, "step": 851 }, { "epoch": 0.07646227367570842, "grad_norm": 0.533695111099911, "learning_rate": 2.5486090338019743e-06, "loss": 0.587, "step": 852 }, { "epoch": 0.07655201812837945, "grad_norm": 0.5409448957138746, "learning_rate": 2.551600358959019e-06, "loss": 0.6203, "step": 853 }, { "epoch": 0.07664176258105046, "grad_norm": 0.6245554979836528, "learning_rate": 2.5545916841160634e-06, "loss": 0.6045, "step": 854 }, { "epoch": 0.07673150703372147, "grad_norm": 0.5921164011722169, "learning_rate": 2.557583009273108e-06, "loss": 0.5345, "step": 855 }, { "epoch": 0.0768212514863925, "grad_norm": 0.6269531731565767, "learning_rate": 2.5605743344301525e-06, "loss": 0.6607, "step": 856 }, { "epoch": 0.07691099593906352, "grad_norm": 0.5755673728290194, "learning_rate": 2.563565659587197e-06, "loss": 0.6328, "step": 857 }, { "epoch": 0.07700074039173453, "grad_norm": 0.4936530182048866, "learning_rate": 2.5665569847442416e-06, "loss": 0.5391, "step": 858 }, { "epoch": 0.07709048484440556, "grad_norm": 0.5012196296147113, "learning_rate": 2.569548309901286e-06, "loss": 0.5306, "step": 859 }, { "epoch": 0.07718022929707657, "grad_norm": 0.5591350164704834, "learning_rate": 2.5725396350583307e-06, "loss": 0.6088, "step": 860 }, { "epoch": 0.07726997374974759, "grad_norm": 0.5045183411929942, "learning_rate": 2.5755309602153757e-06, "loss": 0.5662, "step": 861 }, { "epoch": 0.07735971820241862, "grad_norm": 0.5402862403344699, "learning_rate": 2.5785222853724203e-06, "loss": 0.5634, "step": 862 }, { "epoch": 0.07744946265508963, "grad_norm": 0.5851610621340364, "learning_rate": 2.581513610529465e-06, "loss": 0.6049, "step": 863 }, { "epoch": 0.07753920710776065, "grad_norm": 0.5602439958530712, "learning_rate": 2.5845049356865094e-06, "loss": 0.5853, "step": 864 }, { "epoch": 0.07762895156043167, "grad_norm": 0.551276413936477, "learning_rate": 2.587496260843554e-06, "loss": 0.5672, "step": 865 }, { "epoch": 0.07771869601310269, "grad_norm": 0.5555280062615726, "learning_rate": 2.5904875860005985e-06, "loss": 0.5939, "step": 866 }, { "epoch": 0.0778084404657737, "grad_norm": 0.5707938304028287, "learning_rate": 2.593478911157643e-06, "loss": 0.6334, "step": 867 }, { "epoch": 0.07789818491844473, "grad_norm": 0.5448181691199326, "learning_rate": 2.5964702363146876e-06, "loss": 0.5559, "step": 868 }, { "epoch": 0.07798792937111575, "grad_norm": 0.526032054513817, "learning_rate": 2.599461561471732e-06, "loss": 0.5369, "step": 869 }, { "epoch": 0.07807767382378676, "grad_norm": 0.5219105348521079, "learning_rate": 2.6024528866287768e-06, "loss": 0.548, "step": 870 }, { "epoch": 0.07816741827645779, "grad_norm": 0.5299725726175823, "learning_rate": 2.6054442117858213e-06, "loss": 0.5541, "step": 871 }, { "epoch": 0.0782571627291288, "grad_norm": 0.5533395431502602, "learning_rate": 2.608435536942866e-06, "loss": 0.5462, "step": 872 }, { "epoch": 0.07834690718179982, "grad_norm": 0.5312333112370738, "learning_rate": 2.6114268620999104e-06, "loss": 0.5766, "step": 873 }, { "epoch": 0.07843665163447085, "grad_norm": 0.5447566895751622, "learning_rate": 2.614418187256955e-06, "loss": 0.5861, "step": 874 }, { "epoch": 0.07852639608714186, "grad_norm": 0.5528949951507894, "learning_rate": 2.617409512414e-06, "loss": 0.5841, "step": 875 }, { "epoch": 0.07861614053981288, "grad_norm": 0.5167817274448884, "learning_rate": 2.6204008375710445e-06, "loss": 0.5646, "step": 876 }, { "epoch": 0.0787058849924839, "grad_norm": 0.5556851951345048, "learning_rate": 2.623392162728089e-06, "loss": 0.5993, "step": 877 }, { "epoch": 0.07879562944515492, "grad_norm": 0.5856237084626448, "learning_rate": 2.6263834878851336e-06, "loss": 0.6426, "step": 878 }, { "epoch": 0.07888537389782595, "grad_norm": 0.5837730923171894, "learning_rate": 2.629374813042178e-06, "loss": 0.6418, "step": 879 }, { "epoch": 0.07897511835049696, "grad_norm": 0.518557297585053, "learning_rate": 2.6323661381992228e-06, "loss": 0.4608, "step": 880 }, { "epoch": 0.07906486280316798, "grad_norm": 0.5132289026967289, "learning_rate": 2.6353574633562673e-06, "loss": 0.5614, "step": 881 }, { "epoch": 0.079154607255839, "grad_norm": 0.5415490784782501, "learning_rate": 2.638348788513312e-06, "loss": 0.6146, "step": 882 }, { "epoch": 0.07924435170851002, "grad_norm": 0.5778793534289705, "learning_rate": 2.641340113670356e-06, "loss": 0.6317, "step": 883 }, { "epoch": 0.07933409616118103, "grad_norm": 0.5237704851703907, "learning_rate": 2.6443314388274006e-06, "loss": 0.5453, "step": 884 }, { "epoch": 0.07942384061385206, "grad_norm": 0.5181418150674818, "learning_rate": 2.647322763984445e-06, "loss": 0.5485, "step": 885 }, { "epoch": 0.07951358506652308, "grad_norm": 0.540648204851679, "learning_rate": 2.6503140891414897e-06, "loss": 0.5027, "step": 886 }, { "epoch": 0.07960332951919409, "grad_norm": 0.5516506063192165, "learning_rate": 2.6533054142985342e-06, "loss": 0.6236, "step": 887 }, { "epoch": 0.07969307397186512, "grad_norm": 0.5213378309136045, "learning_rate": 2.656296739455579e-06, "loss": 0.5886, "step": 888 }, { "epoch": 0.07978281842453613, "grad_norm": 0.575232323544391, "learning_rate": 2.6592880646126234e-06, "loss": 0.5607, "step": 889 }, { "epoch": 0.07987256287720715, "grad_norm": 0.5297876570308612, "learning_rate": 2.662279389769668e-06, "loss": 0.5926, "step": 890 }, { "epoch": 0.07996230732987818, "grad_norm": 0.5599874094907852, "learning_rate": 2.6652707149267125e-06, "loss": 0.5646, "step": 891 }, { "epoch": 0.08005205178254919, "grad_norm": 0.5157494547415625, "learning_rate": 2.668262040083757e-06, "loss": 0.5887, "step": 892 }, { "epoch": 0.0801417962352202, "grad_norm": 0.5849066044859557, "learning_rate": 2.671253365240802e-06, "loss": 0.6071, "step": 893 }, { "epoch": 0.08023154068789123, "grad_norm": 0.5375581085729706, "learning_rate": 2.6742446903978466e-06, "loss": 0.5238, "step": 894 }, { "epoch": 0.08032128514056225, "grad_norm": 0.5313448095201141, "learning_rate": 2.677236015554891e-06, "loss": 0.5745, "step": 895 }, { "epoch": 0.08041102959323326, "grad_norm": 0.5109704948764173, "learning_rate": 2.6802273407119357e-06, "loss": 0.4981, "step": 896 }, { "epoch": 0.08050077404590429, "grad_norm": 0.5147299895313513, "learning_rate": 2.6832186658689802e-06, "loss": 0.5326, "step": 897 }, { "epoch": 0.0805905184985753, "grad_norm": 0.5769009571787378, "learning_rate": 2.686209991026025e-06, "loss": 0.5847, "step": 898 }, { "epoch": 0.08068026295124632, "grad_norm": 0.5090751570252381, "learning_rate": 2.6892013161830694e-06, "loss": 0.6275, "step": 899 }, { "epoch": 0.08077000740391735, "grad_norm": 0.5860002104507384, "learning_rate": 2.692192641340114e-06, "loss": 0.6226, "step": 900 }, { "epoch": 0.08085975185658836, "grad_norm": 0.5748682844098821, "learning_rate": 2.6951839664971585e-06, "loss": 0.6319, "step": 901 }, { "epoch": 0.08094949630925938, "grad_norm": 0.5277346753185642, "learning_rate": 2.698175291654203e-06, "loss": 0.5422, "step": 902 }, { "epoch": 0.0810392407619304, "grad_norm": 0.5320261276399805, "learning_rate": 2.7011666168112476e-06, "loss": 0.5968, "step": 903 }, { "epoch": 0.08112898521460142, "grad_norm": 0.5411995314967544, "learning_rate": 2.704157941968292e-06, "loss": 0.5498, "step": 904 }, { "epoch": 0.08121872966727244, "grad_norm": 0.5467017709126403, "learning_rate": 2.7071492671253367e-06, "loss": 0.5594, "step": 905 }, { "epoch": 0.08130847411994346, "grad_norm": 0.5323426658905719, "learning_rate": 2.7101405922823813e-06, "loss": 0.5518, "step": 906 }, { "epoch": 0.08139821857261448, "grad_norm": 0.5584658234345553, "learning_rate": 2.7131319174394263e-06, "loss": 0.5733, "step": 907 }, { "epoch": 0.0814879630252855, "grad_norm": 0.48693772518891676, "learning_rate": 2.716123242596471e-06, "loss": 0.5038, "step": 908 }, { "epoch": 0.08157770747795652, "grad_norm": 0.5055222078753738, "learning_rate": 2.7191145677535154e-06, "loss": 0.5039, "step": 909 }, { "epoch": 0.08166745193062754, "grad_norm": 0.5271474198457728, "learning_rate": 2.72210589291056e-06, "loss": 0.5785, "step": 910 }, { "epoch": 0.08175719638329856, "grad_norm": 0.5379669099693727, "learning_rate": 2.7250972180676045e-06, "loss": 0.5771, "step": 911 }, { "epoch": 0.08184694083596958, "grad_norm": 0.5771067000500839, "learning_rate": 2.728088543224649e-06, "loss": 0.6113, "step": 912 }, { "epoch": 0.08193668528864059, "grad_norm": 0.5736808665182129, "learning_rate": 2.7310798683816936e-06, "loss": 0.6367, "step": 913 }, { "epoch": 0.08202642974131162, "grad_norm": 0.5202978019981742, "learning_rate": 2.7340711935387377e-06, "loss": 0.5613, "step": 914 }, { "epoch": 0.08211617419398264, "grad_norm": 0.5527237256122042, "learning_rate": 2.7370625186957823e-06, "loss": 0.5651, "step": 915 }, { "epoch": 0.08220591864665365, "grad_norm": 0.6045655166998497, "learning_rate": 2.740053843852827e-06, "loss": 0.5518, "step": 916 }, { "epoch": 0.08229566309932468, "grad_norm": 0.5079367670886568, "learning_rate": 2.7430451690098714e-06, "loss": 0.5362, "step": 917 }, { "epoch": 0.08238540755199569, "grad_norm": 0.4895517657670816, "learning_rate": 2.746036494166916e-06, "loss": 0.5027, "step": 918 }, { "epoch": 0.08247515200466671, "grad_norm": 0.5337798364618854, "learning_rate": 2.7490278193239605e-06, "loss": 0.5254, "step": 919 }, { "epoch": 0.08256489645733774, "grad_norm": 0.4816587539723988, "learning_rate": 2.752019144481005e-06, "loss": 0.5147, "step": 920 }, { "epoch": 0.08265464091000875, "grad_norm": 0.5055670353023775, "learning_rate": 2.7550104696380496e-06, "loss": 0.5552, "step": 921 }, { "epoch": 0.08274438536267976, "grad_norm": 0.5355551503360362, "learning_rate": 2.758001794795094e-06, "loss": 0.5881, "step": 922 }, { "epoch": 0.0828341298153508, "grad_norm": 0.5158888174560465, "learning_rate": 2.7609931199521388e-06, "loss": 0.5156, "step": 923 }, { "epoch": 0.08292387426802181, "grad_norm": 0.5151826503598539, "learning_rate": 2.7639844451091833e-06, "loss": 0.5099, "step": 924 }, { "epoch": 0.08301361872069282, "grad_norm": 0.5543588979939065, "learning_rate": 2.7669757702662283e-06, "loss": 0.5897, "step": 925 }, { "epoch": 0.08310336317336385, "grad_norm": 0.6121877743375668, "learning_rate": 2.769967095423273e-06, "loss": 0.6606, "step": 926 }, { "epoch": 0.08319310762603487, "grad_norm": 0.5920242151937909, "learning_rate": 2.7729584205803174e-06, "loss": 0.5376, "step": 927 }, { "epoch": 0.08328285207870588, "grad_norm": 0.5510698129313119, "learning_rate": 2.775949745737362e-06, "loss": 0.5779, "step": 928 }, { "epoch": 0.08337259653137691, "grad_norm": 0.585888621226034, "learning_rate": 2.7789410708944065e-06, "loss": 0.6183, "step": 929 }, { "epoch": 0.08346234098404792, "grad_norm": 0.5165764963714738, "learning_rate": 2.781932396051451e-06, "loss": 0.5866, "step": 930 }, { "epoch": 0.08355208543671894, "grad_norm": 0.5012725478146803, "learning_rate": 2.7849237212084957e-06, "loss": 0.5748, "step": 931 }, { "epoch": 0.08364182988938997, "grad_norm": 0.5661719551828591, "learning_rate": 2.7879150463655402e-06, "loss": 0.5447, "step": 932 }, { "epoch": 0.08373157434206098, "grad_norm": 0.5703464739789873, "learning_rate": 2.7909063715225848e-06, "loss": 0.5731, "step": 933 }, { "epoch": 0.083821318794732, "grad_norm": 0.5691314455697686, "learning_rate": 2.7938976966796293e-06, "loss": 0.6447, "step": 934 }, { "epoch": 0.08391106324740302, "grad_norm": 0.5438632844514814, "learning_rate": 2.796889021836674e-06, "loss": 0.5781, "step": 935 }, { "epoch": 0.08400080770007404, "grad_norm": 0.5084380141834711, "learning_rate": 2.7998803469937184e-06, "loss": 0.5159, "step": 936 }, { "epoch": 0.08409055215274505, "grad_norm": 0.48120688337878414, "learning_rate": 2.802871672150763e-06, "loss": 0.5071, "step": 937 }, { "epoch": 0.08418029660541608, "grad_norm": 0.6153041710335764, "learning_rate": 2.8058629973078076e-06, "loss": 0.6484, "step": 938 }, { "epoch": 0.0842700410580871, "grad_norm": 0.5529433579588646, "learning_rate": 2.8088543224648525e-06, "loss": 0.6207, "step": 939 }, { "epoch": 0.08435978551075812, "grad_norm": 0.51742144931593, "learning_rate": 2.811845647621897e-06, "loss": 0.5045, "step": 940 }, { "epoch": 0.08444952996342914, "grad_norm": 0.5298473003823676, "learning_rate": 2.8148369727789417e-06, "loss": 0.541, "step": 941 }, { "epoch": 0.08453927441610015, "grad_norm": 0.5278921509898792, "learning_rate": 2.8178282979359862e-06, "loss": 0.5332, "step": 942 }, { "epoch": 0.08462901886877118, "grad_norm": 0.5753139658187981, "learning_rate": 2.8208196230930308e-06, "loss": 0.6303, "step": 943 }, { "epoch": 0.0847187633214422, "grad_norm": 0.5208529042148431, "learning_rate": 2.8238109482500753e-06, "loss": 0.5341, "step": 944 }, { "epoch": 0.08480850777411321, "grad_norm": 0.5104699632287135, "learning_rate": 2.8268022734071195e-06, "loss": 0.5608, "step": 945 }, { "epoch": 0.08489825222678424, "grad_norm": 0.528955232012221, "learning_rate": 2.829793598564164e-06, "loss": 0.5627, "step": 946 }, { "epoch": 0.08498799667945525, "grad_norm": 0.6275288531674763, "learning_rate": 2.8327849237212086e-06, "loss": 0.6794, "step": 947 }, { "epoch": 0.08507774113212627, "grad_norm": 0.5521757419473654, "learning_rate": 2.835776248878253e-06, "loss": 0.6485, "step": 948 }, { "epoch": 0.0851674855847973, "grad_norm": 0.4813684872364874, "learning_rate": 2.8387675740352977e-06, "loss": 0.4708, "step": 949 }, { "epoch": 0.08525723003746831, "grad_norm": 0.4925715587103556, "learning_rate": 2.8417588991923423e-06, "loss": 0.511, "step": 950 }, { "epoch": 0.08534697449013932, "grad_norm": 0.4970812504387781, "learning_rate": 2.844750224349387e-06, "loss": 0.5798, "step": 951 }, { "epoch": 0.08543671894281035, "grad_norm": 0.5128267252697212, "learning_rate": 2.8477415495064314e-06, "loss": 0.5637, "step": 952 }, { "epoch": 0.08552646339548137, "grad_norm": 0.5064343196824542, "learning_rate": 2.850732874663476e-06, "loss": 0.5569, "step": 953 }, { "epoch": 0.08561620784815238, "grad_norm": 0.5211214115090573, "learning_rate": 2.8537241998205205e-06, "loss": 0.5751, "step": 954 }, { "epoch": 0.08570595230082341, "grad_norm": 0.5708037207647573, "learning_rate": 2.856715524977565e-06, "loss": 0.6494, "step": 955 }, { "epoch": 0.08579569675349442, "grad_norm": 0.5130907973018272, "learning_rate": 2.8597068501346096e-06, "loss": 0.5559, "step": 956 }, { "epoch": 0.08588544120616544, "grad_norm": 0.4832185388534612, "learning_rate": 2.8626981752916546e-06, "loss": 0.5376, "step": 957 }, { "epoch": 0.08597518565883647, "grad_norm": 0.5197138867020737, "learning_rate": 2.865689500448699e-06, "loss": 0.5204, "step": 958 }, { "epoch": 0.08606493011150748, "grad_norm": 0.5293603928312405, "learning_rate": 2.8686808256057437e-06, "loss": 0.6207, "step": 959 }, { "epoch": 0.0861546745641785, "grad_norm": 0.5048876833684267, "learning_rate": 2.8716721507627883e-06, "loss": 0.5501, "step": 960 }, { "epoch": 0.08624441901684952, "grad_norm": 0.5518307688685832, "learning_rate": 2.874663475919833e-06, "loss": 0.6597, "step": 961 }, { "epoch": 0.08633416346952054, "grad_norm": 0.48351426201085873, "learning_rate": 2.8776548010768774e-06, "loss": 0.5624, "step": 962 }, { "epoch": 0.08642390792219155, "grad_norm": 0.5355067967023048, "learning_rate": 2.880646126233922e-06, "loss": 0.5205, "step": 963 }, { "epoch": 0.08651365237486258, "grad_norm": 0.5964239081815136, "learning_rate": 2.8836374513909665e-06, "loss": 0.7099, "step": 964 }, { "epoch": 0.0866033968275336, "grad_norm": 0.4369193751195741, "learning_rate": 2.886628776548011e-06, "loss": 0.4132, "step": 965 }, { "epoch": 0.08669314128020461, "grad_norm": 0.5811013452050635, "learning_rate": 2.8896201017050556e-06, "loss": 0.6613, "step": 966 }, { "epoch": 0.08678288573287564, "grad_norm": 0.5528619699765411, "learning_rate": 2.8926114268621e-06, "loss": 0.5747, "step": 967 }, { "epoch": 0.08687263018554665, "grad_norm": 0.5422046631135541, "learning_rate": 2.8956027520191447e-06, "loss": 0.5256, "step": 968 }, { "epoch": 0.08696237463821768, "grad_norm": 0.5314508166669812, "learning_rate": 2.8985940771761893e-06, "loss": 0.5622, "step": 969 }, { "epoch": 0.0870521190908887, "grad_norm": 0.5379517225263746, "learning_rate": 2.901585402333234e-06, "loss": 0.5806, "step": 970 }, { "epoch": 0.08714186354355971, "grad_norm": 0.5654448955132719, "learning_rate": 2.904576727490279e-06, "loss": 0.5842, "step": 971 }, { "epoch": 0.08723160799623074, "grad_norm": 0.5887261875330332, "learning_rate": 2.9075680526473234e-06, "loss": 0.662, "step": 972 }, { "epoch": 0.08732135244890175, "grad_norm": 0.5551916053654022, "learning_rate": 2.910559377804368e-06, "loss": 0.591, "step": 973 }, { "epoch": 0.08741109690157277, "grad_norm": 0.4700339621659106, "learning_rate": 2.9135507029614125e-06, "loss": 0.4671, "step": 974 }, { "epoch": 0.0875008413542438, "grad_norm": 0.5413065646684045, "learning_rate": 2.916542028118457e-06, "loss": 0.5383, "step": 975 }, { "epoch": 0.08759058580691481, "grad_norm": 0.4948819163005821, "learning_rate": 2.919533353275501e-06, "loss": 0.5419, "step": 976 }, { "epoch": 0.08768033025958583, "grad_norm": 0.5321097039889268, "learning_rate": 2.9225246784325458e-06, "loss": 0.5548, "step": 977 }, { "epoch": 0.08777007471225685, "grad_norm": 0.492439839613164, "learning_rate": 2.9255160035895903e-06, "loss": 0.4751, "step": 978 }, { "epoch": 0.08785981916492787, "grad_norm": 0.48635635555128925, "learning_rate": 2.928507328746635e-06, "loss": 0.5123, "step": 979 }, { "epoch": 0.08794956361759888, "grad_norm": 0.5715740186285229, "learning_rate": 2.9314986539036794e-06, "loss": 0.6209, "step": 980 }, { "epoch": 0.08803930807026991, "grad_norm": 0.5275192597856908, "learning_rate": 2.934489979060724e-06, "loss": 0.5421, "step": 981 }, { "epoch": 0.08812905252294093, "grad_norm": 0.5881010669044041, "learning_rate": 2.9374813042177686e-06, "loss": 0.6396, "step": 982 }, { "epoch": 0.08821879697561194, "grad_norm": 0.5070455867064735, "learning_rate": 2.940472629374813e-06, "loss": 0.5301, "step": 983 }, { "epoch": 0.08830854142828297, "grad_norm": 0.5449399041812809, "learning_rate": 2.9434639545318577e-06, "loss": 0.5633, "step": 984 }, { "epoch": 0.08839828588095398, "grad_norm": 0.6033508576486586, "learning_rate": 2.9464552796889022e-06, "loss": 0.6565, "step": 985 }, { "epoch": 0.088488030333625, "grad_norm": 0.551302145477973, "learning_rate": 2.9494466048459468e-06, "loss": 0.5477, "step": 986 }, { "epoch": 0.08857777478629603, "grad_norm": 0.5343797271631976, "learning_rate": 2.9524379300029913e-06, "loss": 0.5427, "step": 987 }, { "epoch": 0.08866751923896704, "grad_norm": 0.48285855879168604, "learning_rate": 2.955429255160036e-06, "loss": 0.4872, "step": 988 }, { "epoch": 0.08875726369163806, "grad_norm": 0.5293552027534721, "learning_rate": 2.958420580317081e-06, "loss": 0.5718, "step": 989 }, { "epoch": 0.08884700814430908, "grad_norm": 0.5254335567687666, "learning_rate": 2.9614119054741254e-06, "loss": 0.5709, "step": 990 }, { "epoch": 0.0889367525969801, "grad_norm": 0.5502014929815554, "learning_rate": 2.96440323063117e-06, "loss": 0.5493, "step": 991 }, { "epoch": 0.08902649704965111, "grad_norm": 0.49881601313494417, "learning_rate": 2.9673945557882146e-06, "loss": 0.4702, "step": 992 }, { "epoch": 0.08911624150232214, "grad_norm": 0.5685346234562234, "learning_rate": 2.970385880945259e-06, "loss": 0.6105, "step": 993 }, { "epoch": 0.08920598595499316, "grad_norm": 0.5341625841353972, "learning_rate": 2.9733772061023037e-06, "loss": 0.5772, "step": 994 }, { "epoch": 0.08929573040766417, "grad_norm": 0.5849333732202422, "learning_rate": 2.9763685312593482e-06, "loss": 0.6804, "step": 995 }, { "epoch": 0.0893854748603352, "grad_norm": 0.528322037386313, "learning_rate": 2.979359856416393e-06, "loss": 0.5208, "step": 996 }, { "epoch": 0.08947521931300621, "grad_norm": 0.5024659099468071, "learning_rate": 2.9823511815734374e-06, "loss": 0.5189, "step": 997 }, { "epoch": 0.08956496376567723, "grad_norm": 0.5494861384945021, "learning_rate": 2.985342506730482e-06, "loss": 0.5657, "step": 998 }, { "epoch": 0.08965470821834826, "grad_norm": 0.5586484798464486, "learning_rate": 2.9883338318875265e-06, "loss": 0.6176, "step": 999 }, { "epoch": 0.08974445267101927, "grad_norm": 0.5165346204962474, "learning_rate": 2.991325157044571e-06, "loss": 0.505, "step": 1000 }, { "epoch": 0.0898341971236903, "grad_norm": 0.6015315843830392, "learning_rate": 2.9943164822016156e-06, "loss": 0.5864, "step": 1001 }, { "epoch": 0.08992394157636131, "grad_norm": 0.5944890771786746, "learning_rate": 2.99730780735866e-06, "loss": 0.652, "step": 1002 }, { "epoch": 0.09001368602903233, "grad_norm": 0.5822087317222316, "learning_rate": 3.000299132515705e-06, "loss": 0.5967, "step": 1003 }, { "epoch": 0.09010343048170336, "grad_norm": 0.5345248203759594, "learning_rate": 3.0032904576727497e-06, "loss": 0.6094, "step": 1004 }, { "epoch": 0.09019317493437437, "grad_norm": 0.5594227661124901, "learning_rate": 3.0062817828297942e-06, "loss": 0.5585, "step": 1005 }, { "epoch": 0.09028291938704538, "grad_norm": 0.5075266914183765, "learning_rate": 3.009273107986838e-06, "loss": 0.5735, "step": 1006 }, { "epoch": 0.09037266383971641, "grad_norm": 0.5606333764430238, "learning_rate": 3.012264433143883e-06, "loss": 0.5637, "step": 1007 }, { "epoch": 0.09046240829238743, "grad_norm": 0.5049818660954184, "learning_rate": 3.0152557583009275e-06, "loss": 0.4726, "step": 1008 }, { "epoch": 0.09055215274505844, "grad_norm": 0.5347777960056298, "learning_rate": 3.018247083457972e-06, "loss": 0.5248, "step": 1009 }, { "epoch": 0.09064189719772947, "grad_norm": 0.5336992718674209, "learning_rate": 3.0212384086150166e-06, "loss": 0.5517, "step": 1010 }, { "epoch": 0.09073164165040049, "grad_norm": 0.5454639194272417, "learning_rate": 3.024229733772061e-06, "loss": 0.6283, "step": 1011 }, { "epoch": 0.0908213861030715, "grad_norm": 0.6471713262394484, "learning_rate": 3.0272210589291057e-06, "loss": 0.6641, "step": 1012 }, { "epoch": 0.09091113055574253, "grad_norm": 0.535347052077836, "learning_rate": 3.0302123840861503e-06, "loss": 0.5857, "step": 1013 }, { "epoch": 0.09100087500841354, "grad_norm": 0.5025386142741965, "learning_rate": 3.033203709243195e-06, "loss": 0.5043, "step": 1014 }, { "epoch": 0.09109061946108456, "grad_norm": 0.515674489421795, "learning_rate": 3.0361950344002394e-06, "loss": 0.5382, "step": 1015 }, { "epoch": 0.09118036391375559, "grad_norm": 0.5016264356963782, "learning_rate": 3.039186359557284e-06, "loss": 0.5522, "step": 1016 }, { "epoch": 0.0912701083664266, "grad_norm": 0.5530074246314471, "learning_rate": 3.0421776847143285e-06, "loss": 0.62, "step": 1017 }, { "epoch": 0.09135985281909761, "grad_norm": 0.5628888741408824, "learning_rate": 3.045169009871373e-06, "loss": 0.6147, "step": 1018 }, { "epoch": 0.09144959727176864, "grad_norm": 0.5266681892541824, "learning_rate": 3.0481603350284176e-06, "loss": 0.5439, "step": 1019 }, { "epoch": 0.09153934172443966, "grad_norm": 0.46238896503451365, "learning_rate": 3.051151660185462e-06, "loss": 0.4275, "step": 1020 }, { "epoch": 0.09162908617711067, "grad_norm": 0.5254008649482084, "learning_rate": 3.054142985342507e-06, "loss": 0.5448, "step": 1021 }, { "epoch": 0.0917188306297817, "grad_norm": 0.5535818544790874, "learning_rate": 3.0571343104995517e-06, "loss": 0.6019, "step": 1022 }, { "epoch": 0.09180857508245271, "grad_norm": 0.5413743845842119, "learning_rate": 3.0601256356565963e-06, "loss": 0.6138, "step": 1023 }, { "epoch": 0.09189831953512373, "grad_norm": 0.4953551623090186, "learning_rate": 3.063116960813641e-06, "loss": 0.5432, "step": 1024 }, { "epoch": 0.09198806398779476, "grad_norm": 0.5271365181898608, "learning_rate": 3.0661082859706854e-06, "loss": 0.5671, "step": 1025 }, { "epoch": 0.09207780844046577, "grad_norm": 0.5884960932624775, "learning_rate": 3.06909961112773e-06, "loss": 0.6477, "step": 1026 }, { "epoch": 0.09216755289313679, "grad_norm": 0.5700862340038931, "learning_rate": 3.0720909362847745e-06, "loss": 0.6267, "step": 1027 }, { "epoch": 0.09225729734580781, "grad_norm": 0.5008177045243344, "learning_rate": 3.075082261441819e-06, "loss": 0.5403, "step": 1028 }, { "epoch": 0.09234704179847883, "grad_norm": 0.5146315276321758, "learning_rate": 3.0780735865988636e-06, "loss": 0.5096, "step": 1029 }, { "epoch": 0.09243678625114984, "grad_norm": 0.600225831186809, "learning_rate": 3.081064911755908e-06, "loss": 0.6393, "step": 1030 }, { "epoch": 0.09252653070382087, "grad_norm": 0.5590244176089472, "learning_rate": 3.0840562369129528e-06, "loss": 0.611, "step": 1031 }, { "epoch": 0.09261627515649189, "grad_norm": 0.5004378812837209, "learning_rate": 3.0870475620699973e-06, "loss": 0.5628, "step": 1032 }, { "epoch": 0.09270601960916292, "grad_norm": 0.4997404295111308, "learning_rate": 3.090038887227042e-06, "loss": 0.5155, "step": 1033 }, { "epoch": 0.09279576406183393, "grad_norm": 0.532231809861155, "learning_rate": 3.0930302123840864e-06, "loss": 0.581, "step": 1034 }, { "epoch": 0.09288550851450494, "grad_norm": 0.5589118751596875, "learning_rate": 3.0960215375411314e-06, "loss": 0.6153, "step": 1035 }, { "epoch": 0.09297525296717597, "grad_norm": 0.6258227964160111, "learning_rate": 3.099012862698176e-06, "loss": 0.7119, "step": 1036 }, { "epoch": 0.09306499741984699, "grad_norm": 0.5340584223270374, "learning_rate": 3.1020041878552197e-06, "loss": 0.5491, "step": 1037 }, { "epoch": 0.093154741872518, "grad_norm": 0.5596751878810656, "learning_rate": 3.1049955130122642e-06, "loss": 0.6073, "step": 1038 }, { "epoch": 0.09324448632518903, "grad_norm": 0.4839444433495957, "learning_rate": 3.1079868381693092e-06, "loss": 0.512, "step": 1039 }, { "epoch": 0.09333423077786004, "grad_norm": 0.5776126096186315, "learning_rate": 3.1109781633263538e-06, "loss": 0.5792, "step": 1040 }, { "epoch": 0.09342397523053106, "grad_norm": 0.587398353508364, "learning_rate": 3.1139694884833983e-06, "loss": 0.6162, "step": 1041 }, { "epoch": 0.09351371968320209, "grad_norm": 0.5307978035362182, "learning_rate": 3.116960813640443e-06, "loss": 0.6218, "step": 1042 }, { "epoch": 0.0936034641358731, "grad_norm": 0.4899351214672916, "learning_rate": 3.1199521387974875e-06, "loss": 0.4362, "step": 1043 }, { "epoch": 0.09369320858854412, "grad_norm": 0.5091759867297239, "learning_rate": 3.122943463954532e-06, "loss": 0.5164, "step": 1044 }, { "epoch": 0.09378295304121514, "grad_norm": 0.5554993492376588, "learning_rate": 3.1259347891115766e-06, "loss": 0.5307, "step": 1045 }, { "epoch": 0.09387269749388616, "grad_norm": 0.557840227141079, "learning_rate": 3.128926114268621e-06, "loss": 0.6049, "step": 1046 }, { "epoch": 0.09396244194655717, "grad_norm": 0.5830151036146838, "learning_rate": 3.1319174394256657e-06, "loss": 0.6346, "step": 1047 }, { "epoch": 0.0940521863992282, "grad_norm": 0.528981140881386, "learning_rate": 3.1349087645827102e-06, "loss": 0.5476, "step": 1048 }, { "epoch": 0.09414193085189922, "grad_norm": 0.5346303370008287, "learning_rate": 3.137900089739755e-06, "loss": 0.6043, "step": 1049 }, { "epoch": 0.09423167530457023, "grad_norm": 0.5374575432796606, "learning_rate": 3.1408914148967994e-06, "loss": 0.5307, "step": 1050 }, { "epoch": 0.09432141975724126, "grad_norm": 0.5393582575608002, "learning_rate": 3.143882740053844e-06, "loss": 0.6212, "step": 1051 }, { "epoch": 0.09441116420991227, "grad_norm": 0.49284385978564244, "learning_rate": 3.1468740652108885e-06, "loss": 0.4941, "step": 1052 }, { "epoch": 0.09450090866258329, "grad_norm": 0.5529229771842672, "learning_rate": 3.1498653903679335e-06, "loss": 0.6022, "step": 1053 }, { "epoch": 0.09459065311525432, "grad_norm": 0.5097656493148081, "learning_rate": 3.152856715524978e-06, "loss": 0.5209, "step": 1054 }, { "epoch": 0.09468039756792533, "grad_norm": 0.5736500941928483, "learning_rate": 3.1558480406820226e-06, "loss": 0.5571, "step": 1055 }, { "epoch": 0.09477014202059635, "grad_norm": 0.5812688424647046, "learning_rate": 3.158839365839067e-06, "loss": 0.6563, "step": 1056 }, { "epoch": 0.09485988647326737, "grad_norm": 0.54417438296789, "learning_rate": 3.1618306909961117e-06, "loss": 0.5771, "step": 1057 }, { "epoch": 0.09494963092593839, "grad_norm": 0.4809110997252305, "learning_rate": 3.1648220161531563e-06, "loss": 0.4719, "step": 1058 }, { "epoch": 0.0950393753786094, "grad_norm": 0.5435591881251515, "learning_rate": 3.167813341310201e-06, "loss": 0.5397, "step": 1059 }, { "epoch": 0.09512911983128043, "grad_norm": 0.5668688538529181, "learning_rate": 3.1708046664672454e-06, "loss": 0.6063, "step": 1060 }, { "epoch": 0.09521886428395145, "grad_norm": 0.5249274916912786, "learning_rate": 3.17379599162429e-06, "loss": 0.5855, "step": 1061 }, { "epoch": 0.09530860873662247, "grad_norm": 0.5268172681238871, "learning_rate": 3.1767873167813345e-06, "loss": 0.5692, "step": 1062 }, { "epoch": 0.09539835318929349, "grad_norm": 0.5592699886290566, "learning_rate": 3.179778641938379e-06, "loss": 0.5754, "step": 1063 }, { "epoch": 0.0954880976419645, "grad_norm": 0.5124766268602514, "learning_rate": 3.1827699670954236e-06, "loss": 0.6124, "step": 1064 }, { "epoch": 0.09557784209463553, "grad_norm": 0.5048438594505155, "learning_rate": 3.185761292252468e-06, "loss": 0.5171, "step": 1065 }, { "epoch": 0.09566758654730655, "grad_norm": 0.4895042699609182, "learning_rate": 3.1887526174095127e-06, "loss": 0.515, "step": 1066 }, { "epoch": 0.09575733099997756, "grad_norm": 0.5476024802277413, "learning_rate": 3.1917439425665577e-06, "loss": 0.5241, "step": 1067 }, { "epoch": 0.09584707545264859, "grad_norm": 0.5094793269769967, "learning_rate": 3.1947352677236014e-06, "loss": 0.5199, "step": 1068 }, { "epoch": 0.0959368199053196, "grad_norm": 0.5428523480003681, "learning_rate": 3.197726592880646e-06, "loss": 0.6712, "step": 1069 }, { "epoch": 0.09602656435799062, "grad_norm": 0.5561889240828956, "learning_rate": 3.2007179180376905e-06, "loss": 0.5713, "step": 1070 }, { "epoch": 0.09611630881066165, "grad_norm": 0.564652794728666, "learning_rate": 3.2037092431947355e-06, "loss": 0.604, "step": 1071 }, { "epoch": 0.09620605326333266, "grad_norm": 0.5091863265246312, "learning_rate": 3.20670056835178e-06, "loss": 0.512, "step": 1072 }, { "epoch": 0.09629579771600368, "grad_norm": 0.48083979703972124, "learning_rate": 3.2096918935088246e-06, "loss": 0.4688, "step": 1073 }, { "epoch": 0.0963855421686747, "grad_norm": 0.5449006557295445, "learning_rate": 3.212683218665869e-06, "loss": 0.5981, "step": 1074 }, { "epoch": 0.09647528662134572, "grad_norm": 0.53808796909466, "learning_rate": 3.2156745438229137e-06, "loss": 0.5779, "step": 1075 }, { "epoch": 0.09656503107401673, "grad_norm": 0.5280328142071599, "learning_rate": 3.2186658689799583e-06, "loss": 0.5639, "step": 1076 }, { "epoch": 0.09665477552668776, "grad_norm": 0.5032351632999483, "learning_rate": 3.221657194137003e-06, "loss": 0.5374, "step": 1077 }, { "epoch": 0.09674451997935878, "grad_norm": 0.5267324745075728, "learning_rate": 3.2246485192940474e-06, "loss": 0.5133, "step": 1078 }, { "epoch": 0.09683426443202979, "grad_norm": 0.46970770838886866, "learning_rate": 3.227639844451092e-06, "loss": 0.524, "step": 1079 }, { "epoch": 0.09692400888470082, "grad_norm": 0.541883685624445, "learning_rate": 3.2306311696081365e-06, "loss": 0.5647, "step": 1080 }, { "epoch": 0.09701375333737183, "grad_norm": 0.5147249821922888, "learning_rate": 3.233622494765181e-06, "loss": 0.5871, "step": 1081 }, { "epoch": 0.09710349779004285, "grad_norm": 0.5208078553989415, "learning_rate": 3.2366138199222257e-06, "loss": 0.4902, "step": 1082 }, { "epoch": 0.09719324224271388, "grad_norm": 0.4807682915702048, "learning_rate": 3.2396051450792702e-06, "loss": 0.5035, "step": 1083 }, { "epoch": 0.09728298669538489, "grad_norm": 0.5240471528228813, "learning_rate": 3.2425964702363148e-06, "loss": 0.5671, "step": 1084 }, { "epoch": 0.0973727311480559, "grad_norm": 0.5187574178990009, "learning_rate": 3.2455877953933598e-06, "loss": 0.5732, "step": 1085 }, { "epoch": 0.09746247560072693, "grad_norm": 0.5087745332753151, "learning_rate": 3.2485791205504043e-06, "loss": 0.5042, "step": 1086 }, { "epoch": 0.09755222005339795, "grad_norm": 0.5494114391821584, "learning_rate": 3.251570445707449e-06, "loss": 0.5622, "step": 1087 }, { "epoch": 0.09764196450606896, "grad_norm": 0.48321362242242044, "learning_rate": 3.2545617708644934e-06, "loss": 0.4802, "step": 1088 }, { "epoch": 0.09773170895873999, "grad_norm": 0.5207278701176199, "learning_rate": 3.257553096021538e-06, "loss": 0.5279, "step": 1089 }, { "epoch": 0.097821453411411, "grad_norm": 0.4956163477200348, "learning_rate": 3.2605444211785825e-06, "loss": 0.5431, "step": 1090 }, { "epoch": 0.09791119786408202, "grad_norm": 0.5202039784173514, "learning_rate": 3.263535746335627e-06, "loss": 0.52, "step": 1091 }, { "epoch": 0.09800094231675305, "grad_norm": 0.4970380373586565, "learning_rate": 3.2665270714926717e-06, "loss": 0.5196, "step": 1092 }, { "epoch": 0.09809068676942406, "grad_norm": 0.5097219284927847, "learning_rate": 3.2695183966497162e-06, "loss": 0.534, "step": 1093 }, { "epoch": 0.09818043122209509, "grad_norm": 0.52411678497628, "learning_rate": 3.2725097218067608e-06, "loss": 0.5318, "step": 1094 }, { "epoch": 0.0982701756747661, "grad_norm": 0.5255915223596436, "learning_rate": 3.2755010469638053e-06, "loss": 0.5269, "step": 1095 }, { "epoch": 0.09835992012743712, "grad_norm": 0.5086874834546784, "learning_rate": 3.27849237212085e-06, "loss": 0.5378, "step": 1096 }, { "epoch": 0.09844966458010815, "grad_norm": 0.5401935278368559, "learning_rate": 3.2814836972778945e-06, "loss": 0.5939, "step": 1097 }, { "epoch": 0.09853940903277916, "grad_norm": 0.5151084596449784, "learning_rate": 3.284475022434939e-06, "loss": 0.5339, "step": 1098 }, { "epoch": 0.09862915348545018, "grad_norm": 0.5386163002678838, "learning_rate": 3.287466347591983e-06, "loss": 0.598, "step": 1099 }, { "epoch": 0.0987188979381212, "grad_norm": 0.5237678975539849, "learning_rate": 3.2904576727490277e-06, "loss": 0.5386, "step": 1100 }, { "epoch": 0.09880864239079222, "grad_norm": 0.5679711867168333, "learning_rate": 3.2934489979060723e-06, "loss": 0.6081, "step": 1101 }, { "epoch": 0.09889838684346323, "grad_norm": 0.49936952196519546, "learning_rate": 3.296440323063117e-06, "loss": 0.5385, "step": 1102 }, { "epoch": 0.09898813129613426, "grad_norm": 0.5226786274895849, "learning_rate": 3.299431648220162e-06, "loss": 0.5405, "step": 1103 }, { "epoch": 0.09907787574880528, "grad_norm": 0.5032161588633098, "learning_rate": 3.3024229733772064e-06, "loss": 0.5071, "step": 1104 }, { "epoch": 0.09916762020147629, "grad_norm": 0.5014382974200932, "learning_rate": 3.305414298534251e-06, "loss": 0.5271, "step": 1105 }, { "epoch": 0.09925736465414732, "grad_norm": 0.6078064476643327, "learning_rate": 3.3084056236912955e-06, "loss": 0.5895, "step": 1106 }, { "epoch": 0.09934710910681833, "grad_norm": 0.5819821925843368, "learning_rate": 3.31139694884834e-06, "loss": 0.5981, "step": 1107 }, { "epoch": 0.09943685355948935, "grad_norm": 0.4850736134941477, "learning_rate": 3.3143882740053846e-06, "loss": 0.4903, "step": 1108 }, { "epoch": 0.09952659801216038, "grad_norm": 0.5127610595524763, "learning_rate": 3.317379599162429e-06, "loss": 0.5208, "step": 1109 }, { "epoch": 0.09961634246483139, "grad_norm": 0.5002734660981356, "learning_rate": 3.3203709243194737e-06, "loss": 0.5731, "step": 1110 }, { "epoch": 0.0997060869175024, "grad_norm": 0.5439797761875845, "learning_rate": 3.3233622494765183e-06, "loss": 0.5667, "step": 1111 }, { "epoch": 0.09979583137017344, "grad_norm": 0.5291612573279653, "learning_rate": 3.326353574633563e-06, "loss": 0.5269, "step": 1112 }, { "epoch": 0.09988557582284445, "grad_norm": 0.5330669110774191, "learning_rate": 3.3293448997906074e-06, "loss": 0.549, "step": 1113 }, { "epoch": 0.09997532027551546, "grad_norm": 0.4799697339800237, "learning_rate": 3.332336224947652e-06, "loss": 0.506, "step": 1114 }, { "epoch": 0.10006506472818649, "grad_norm": 0.5081283338005834, "learning_rate": 3.3353275501046965e-06, "loss": 0.4751, "step": 1115 }, { "epoch": 0.10015480918085751, "grad_norm": 0.5360535247862761, "learning_rate": 3.338318875261741e-06, "loss": 0.5865, "step": 1116 }, { "epoch": 0.10024455363352852, "grad_norm": 0.5164735779231591, "learning_rate": 3.341310200418786e-06, "loss": 0.5306, "step": 1117 }, { "epoch": 0.10033429808619955, "grad_norm": 0.624546678936374, "learning_rate": 3.3443015255758306e-06, "loss": 0.625, "step": 1118 }, { "epoch": 0.10042404253887056, "grad_norm": 0.52987996481622, "learning_rate": 3.347292850732875e-06, "loss": 0.5875, "step": 1119 }, { "epoch": 0.10051378699154158, "grad_norm": 0.5321183169498, "learning_rate": 3.3502841758899197e-06, "loss": 0.5681, "step": 1120 }, { "epoch": 0.10060353144421261, "grad_norm": 0.581153128029775, "learning_rate": 3.3532755010469643e-06, "loss": 0.6555, "step": 1121 }, { "epoch": 0.10069327589688362, "grad_norm": 0.5192612480457925, "learning_rate": 3.356266826204009e-06, "loss": 0.5677, "step": 1122 }, { "epoch": 0.10078302034955465, "grad_norm": 0.5415092187174743, "learning_rate": 3.3592581513610534e-06, "loss": 0.5138, "step": 1123 }, { "epoch": 0.10087276480222566, "grad_norm": 0.5463290371759597, "learning_rate": 3.362249476518098e-06, "loss": 0.6327, "step": 1124 }, { "epoch": 0.10096250925489668, "grad_norm": 0.4805277294572426, "learning_rate": 3.3652408016751425e-06, "loss": 0.4912, "step": 1125 }, { "epoch": 0.10105225370756771, "grad_norm": 0.5672158928472939, "learning_rate": 3.368232126832187e-06, "loss": 0.6217, "step": 1126 }, { "epoch": 0.10114199816023872, "grad_norm": 0.5319212637333666, "learning_rate": 3.3712234519892316e-06, "loss": 0.5684, "step": 1127 }, { "epoch": 0.10123174261290974, "grad_norm": 0.49461667151434013, "learning_rate": 3.374214777146276e-06, "loss": 0.5172, "step": 1128 }, { "epoch": 0.10132148706558076, "grad_norm": 0.5675376390920699, "learning_rate": 3.3772061023033207e-06, "loss": 0.5768, "step": 1129 }, { "epoch": 0.10141123151825178, "grad_norm": 0.5871318581404292, "learning_rate": 3.380197427460365e-06, "loss": 0.6138, "step": 1130 }, { "epoch": 0.1015009759709228, "grad_norm": 0.5345774713891062, "learning_rate": 3.3831887526174094e-06, "loss": 0.5776, "step": 1131 }, { "epoch": 0.10159072042359382, "grad_norm": 0.5342852599772593, "learning_rate": 3.386180077774454e-06, "loss": 0.5231, "step": 1132 }, { "epoch": 0.10168046487626484, "grad_norm": 0.512718709133275, "learning_rate": 3.3891714029314986e-06, "loss": 0.5473, "step": 1133 }, { "epoch": 0.10177020932893585, "grad_norm": 0.48477771128528824, "learning_rate": 3.392162728088543e-06, "loss": 0.507, "step": 1134 }, { "epoch": 0.10185995378160688, "grad_norm": 0.461620806503944, "learning_rate": 3.395154053245588e-06, "loss": 0.4756, "step": 1135 }, { "epoch": 0.1019496982342779, "grad_norm": 0.4887815755969751, "learning_rate": 3.3981453784026327e-06, "loss": 0.4698, "step": 1136 }, { "epoch": 0.10203944268694891, "grad_norm": 0.5751937859519148, "learning_rate": 3.401136703559677e-06, "loss": 0.5749, "step": 1137 }, { "epoch": 0.10212918713961994, "grad_norm": 0.5643205098272306, "learning_rate": 3.4041280287167218e-06, "loss": 0.5391, "step": 1138 }, { "epoch": 0.10221893159229095, "grad_norm": 0.5251678455065758, "learning_rate": 3.4071193538737663e-06, "loss": 0.5352, "step": 1139 }, { "epoch": 0.10230867604496197, "grad_norm": 0.51370887730457, "learning_rate": 3.410110679030811e-06, "loss": 0.5437, "step": 1140 }, { "epoch": 0.102398420497633, "grad_norm": 0.5422196029319163, "learning_rate": 3.4131020041878554e-06, "loss": 0.5181, "step": 1141 }, { "epoch": 0.10248816495030401, "grad_norm": 0.5347091673456651, "learning_rate": 3.4160933293449e-06, "loss": 0.5246, "step": 1142 }, { "epoch": 0.10257790940297502, "grad_norm": 0.4868017752024818, "learning_rate": 3.4190846545019446e-06, "loss": 0.5302, "step": 1143 }, { "epoch": 0.10266765385564605, "grad_norm": 0.5189584772923498, "learning_rate": 3.422075979658989e-06, "loss": 0.5618, "step": 1144 }, { "epoch": 0.10275739830831707, "grad_norm": 0.560450842127848, "learning_rate": 3.4250673048160337e-06, "loss": 0.5675, "step": 1145 }, { "epoch": 0.10284714276098808, "grad_norm": 0.5133649214110024, "learning_rate": 3.4280586299730782e-06, "loss": 0.5367, "step": 1146 }, { "epoch": 0.10293688721365911, "grad_norm": 0.5970513624320318, "learning_rate": 3.431049955130123e-06, "loss": 0.5896, "step": 1147 }, { "epoch": 0.10302663166633012, "grad_norm": 0.5305789671558501, "learning_rate": 3.4340412802871674e-06, "loss": 0.5595, "step": 1148 }, { "epoch": 0.10311637611900114, "grad_norm": 0.5202505937493703, "learning_rate": 3.4370326054442123e-06, "loss": 0.5664, "step": 1149 }, { "epoch": 0.10320612057167217, "grad_norm": 0.4929404507579889, "learning_rate": 3.440023930601257e-06, "loss": 0.5589, "step": 1150 }, { "epoch": 0.10329586502434318, "grad_norm": 0.49410778071037537, "learning_rate": 3.4430152557583015e-06, "loss": 0.5611, "step": 1151 }, { "epoch": 0.1033856094770142, "grad_norm": 0.5224736433404802, "learning_rate": 3.446006580915346e-06, "loss": 0.558, "step": 1152 }, { "epoch": 0.10347535392968522, "grad_norm": 0.5459928045585419, "learning_rate": 3.4489979060723906e-06, "loss": 0.5461, "step": 1153 }, { "epoch": 0.10356509838235624, "grad_norm": 0.5619468679001922, "learning_rate": 3.451989231229435e-06, "loss": 0.6457, "step": 1154 }, { "epoch": 0.10365484283502727, "grad_norm": 0.5180843309841107, "learning_rate": 3.4549805563864797e-06, "loss": 0.5246, "step": 1155 }, { "epoch": 0.10374458728769828, "grad_norm": 0.5078576979025236, "learning_rate": 3.4579718815435242e-06, "loss": 0.4981, "step": 1156 }, { "epoch": 0.1038343317403693, "grad_norm": 0.544070803544927, "learning_rate": 3.460963206700569e-06, "loss": 0.5405, "step": 1157 }, { "epoch": 0.10392407619304032, "grad_norm": 0.5131226352468085, "learning_rate": 3.4639545318576134e-06, "loss": 0.4979, "step": 1158 }, { "epoch": 0.10401382064571134, "grad_norm": 0.519638161666093, "learning_rate": 3.466945857014658e-06, "loss": 0.4673, "step": 1159 }, { "epoch": 0.10410356509838235, "grad_norm": 0.5404980567921437, "learning_rate": 3.4699371821717025e-06, "loss": 0.5562, "step": 1160 }, { "epoch": 0.10419330955105338, "grad_norm": 0.4999764918220102, "learning_rate": 3.4729285073287466e-06, "loss": 0.5175, "step": 1161 }, { "epoch": 0.1042830540037244, "grad_norm": 0.5563688327572058, "learning_rate": 3.475919832485791e-06, "loss": 0.5786, "step": 1162 }, { "epoch": 0.10437279845639541, "grad_norm": 0.5744113079681319, "learning_rate": 3.4789111576428357e-06, "loss": 0.6103, "step": 1163 }, { "epoch": 0.10446254290906644, "grad_norm": 0.5183925865096531, "learning_rate": 3.4819024827998803e-06, "loss": 0.5951, "step": 1164 }, { "epoch": 0.10455228736173745, "grad_norm": 0.470058160717916, "learning_rate": 3.484893807956925e-06, "loss": 0.4882, "step": 1165 }, { "epoch": 0.10464203181440847, "grad_norm": 0.5405103263088067, "learning_rate": 3.4878851331139694e-06, "loss": 0.5796, "step": 1166 }, { "epoch": 0.1047317762670795, "grad_norm": 0.5645413622519754, "learning_rate": 3.4908764582710144e-06, "loss": 0.5959, "step": 1167 }, { "epoch": 0.10482152071975051, "grad_norm": 0.5347988337435517, "learning_rate": 3.493867783428059e-06, "loss": 0.5888, "step": 1168 }, { "epoch": 0.10491126517242153, "grad_norm": 0.5391061930750016, "learning_rate": 3.4968591085851035e-06, "loss": 0.6057, "step": 1169 }, { "epoch": 0.10500100962509255, "grad_norm": 0.5624107529586253, "learning_rate": 3.499850433742148e-06, "loss": 0.5938, "step": 1170 }, { "epoch": 0.10509075407776357, "grad_norm": 0.474316502018992, "learning_rate": 3.5028417588991926e-06, "loss": 0.4937, "step": 1171 }, { "epoch": 0.10518049853043458, "grad_norm": 0.4944940540022505, "learning_rate": 3.505833084056237e-06, "loss": 0.5146, "step": 1172 }, { "epoch": 0.10527024298310561, "grad_norm": 0.5923809901124412, "learning_rate": 3.5088244092132817e-06, "loss": 0.5936, "step": 1173 }, { "epoch": 0.10535998743577663, "grad_norm": 0.5496880574406006, "learning_rate": 3.5118157343703263e-06, "loss": 0.5807, "step": 1174 }, { "epoch": 0.10544973188844764, "grad_norm": 0.5964450297896655, "learning_rate": 3.514807059527371e-06, "loss": 0.6347, "step": 1175 }, { "epoch": 0.10553947634111867, "grad_norm": 0.5446647759997333, "learning_rate": 3.5177983846844154e-06, "loss": 0.597, "step": 1176 }, { "epoch": 0.10562922079378968, "grad_norm": 0.500920655331629, "learning_rate": 3.52078970984146e-06, "loss": 0.5267, "step": 1177 }, { "epoch": 0.1057189652464607, "grad_norm": 0.5215913184848825, "learning_rate": 3.5237810349985045e-06, "loss": 0.5493, "step": 1178 }, { "epoch": 0.10580870969913173, "grad_norm": 0.5087178720136964, "learning_rate": 3.526772360155549e-06, "loss": 0.5432, "step": 1179 }, { "epoch": 0.10589845415180274, "grad_norm": 0.5466386216969098, "learning_rate": 3.5297636853125936e-06, "loss": 0.6386, "step": 1180 }, { "epoch": 0.10598819860447375, "grad_norm": 0.5467999613591035, "learning_rate": 3.5327550104696386e-06, "loss": 0.5595, "step": 1181 }, { "epoch": 0.10607794305714478, "grad_norm": 0.559464218254072, "learning_rate": 3.535746335626683e-06, "loss": 0.5464, "step": 1182 }, { "epoch": 0.1061676875098158, "grad_norm": 0.5404266211294432, "learning_rate": 3.5387376607837277e-06, "loss": 0.6066, "step": 1183 }, { "epoch": 0.10625743196248681, "grad_norm": 0.5508086473308944, "learning_rate": 3.5417289859407723e-06, "loss": 0.5346, "step": 1184 }, { "epoch": 0.10634717641515784, "grad_norm": 0.50417929962099, "learning_rate": 3.544720311097817e-06, "loss": 0.5211, "step": 1185 }, { "epoch": 0.10643692086782885, "grad_norm": 0.4937073016723477, "learning_rate": 3.5477116362548614e-06, "loss": 0.5071, "step": 1186 }, { "epoch": 0.10652666532049988, "grad_norm": 0.49749498762102656, "learning_rate": 3.550702961411906e-06, "loss": 0.495, "step": 1187 }, { "epoch": 0.1066164097731709, "grad_norm": 0.5014915591934451, "learning_rate": 3.5536942865689505e-06, "loss": 0.5235, "step": 1188 }, { "epoch": 0.10670615422584191, "grad_norm": 0.5303264991367196, "learning_rate": 3.556685611725995e-06, "loss": 0.5715, "step": 1189 }, { "epoch": 0.10679589867851294, "grad_norm": 0.4810639591764342, "learning_rate": 3.5596769368830396e-06, "loss": 0.493, "step": 1190 }, { "epoch": 0.10688564313118395, "grad_norm": 0.4916761201865361, "learning_rate": 3.562668262040084e-06, "loss": 0.5036, "step": 1191 }, { "epoch": 0.10697538758385497, "grad_norm": 0.5743291740564819, "learning_rate": 3.5656595871971283e-06, "loss": 0.5455, "step": 1192 }, { "epoch": 0.107065132036526, "grad_norm": 0.5850723969015202, "learning_rate": 3.568650912354173e-06, "loss": 0.6865, "step": 1193 }, { "epoch": 0.10715487648919701, "grad_norm": 0.5039770259809849, "learning_rate": 3.5716422375112175e-06, "loss": 0.5398, "step": 1194 }, { "epoch": 0.10724462094186803, "grad_norm": 0.5451495459037823, "learning_rate": 3.574633562668262e-06, "loss": 0.5483, "step": 1195 }, { "epoch": 0.10733436539453906, "grad_norm": 0.5333480531185654, "learning_rate": 3.5776248878253066e-06, "loss": 0.5538, "step": 1196 }, { "epoch": 0.10742410984721007, "grad_norm": 0.5819534073109631, "learning_rate": 3.580616212982351e-06, "loss": 0.5853, "step": 1197 }, { "epoch": 0.10751385429988108, "grad_norm": 0.579525087509147, "learning_rate": 3.5836075381393957e-06, "loss": 0.6234, "step": 1198 }, { "epoch": 0.10760359875255211, "grad_norm": 0.4945760273915269, "learning_rate": 3.5865988632964407e-06, "loss": 0.5212, "step": 1199 }, { "epoch": 0.10769334320522313, "grad_norm": 0.5276838884157375, "learning_rate": 3.5895901884534852e-06, "loss": 0.5522, "step": 1200 }, { "epoch": 0.10778308765789414, "grad_norm": 0.4913801945646372, "learning_rate": 3.59258151361053e-06, "loss": 0.5249, "step": 1201 }, { "epoch": 0.10787283211056517, "grad_norm": 0.5072603065712399, "learning_rate": 3.5955728387675743e-06, "loss": 0.518, "step": 1202 }, { "epoch": 0.10796257656323618, "grad_norm": 0.5031846515647092, "learning_rate": 3.598564163924619e-06, "loss": 0.5039, "step": 1203 }, { "epoch": 0.1080523210159072, "grad_norm": 0.5327507892124804, "learning_rate": 3.6015554890816635e-06, "loss": 0.5654, "step": 1204 }, { "epoch": 0.10814206546857823, "grad_norm": 0.5326844797695341, "learning_rate": 3.604546814238708e-06, "loss": 0.5586, "step": 1205 }, { "epoch": 0.10823180992124924, "grad_norm": 0.6249656841898938, "learning_rate": 3.6075381393957526e-06, "loss": 0.5872, "step": 1206 }, { "epoch": 0.10832155437392026, "grad_norm": 0.525859057149575, "learning_rate": 3.610529464552797e-06, "loss": 0.5506, "step": 1207 }, { "epoch": 0.10841129882659128, "grad_norm": 0.5219315901009624, "learning_rate": 3.6135207897098417e-06, "loss": 0.5568, "step": 1208 }, { "epoch": 0.1085010432792623, "grad_norm": 0.5298606270195432, "learning_rate": 3.6165121148668863e-06, "loss": 0.6121, "step": 1209 }, { "epoch": 0.10859078773193331, "grad_norm": 0.4949697362830915, "learning_rate": 3.619503440023931e-06, "loss": 0.4997, "step": 1210 }, { "epoch": 0.10868053218460434, "grad_norm": 0.5225246611560942, "learning_rate": 3.6224947651809754e-06, "loss": 0.5251, "step": 1211 }, { "epoch": 0.10877027663727536, "grad_norm": 0.519286532867047, "learning_rate": 3.62548609033802e-06, "loss": 0.5324, "step": 1212 }, { "epoch": 0.10886002108994637, "grad_norm": 0.5299575983320418, "learning_rate": 3.628477415495065e-06, "loss": 0.535, "step": 1213 }, { "epoch": 0.1089497655426174, "grad_norm": 0.5222887261956666, "learning_rate": 3.6314687406521095e-06, "loss": 0.5437, "step": 1214 }, { "epoch": 0.10903950999528841, "grad_norm": 0.5411146586023122, "learning_rate": 3.634460065809154e-06, "loss": 0.5587, "step": 1215 }, { "epoch": 0.10912925444795944, "grad_norm": 0.5455957479065512, "learning_rate": 3.6374513909661986e-06, "loss": 0.5755, "step": 1216 }, { "epoch": 0.10921899890063046, "grad_norm": 0.5083037766988705, "learning_rate": 3.640442716123243e-06, "loss": 0.5181, "step": 1217 }, { "epoch": 0.10930874335330147, "grad_norm": 0.47981483601076136, "learning_rate": 3.6434340412802877e-06, "loss": 0.5061, "step": 1218 }, { "epoch": 0.1093984878059725, "grad_norm": 0.5129809993535657, "learning_rate": 3.6464253664373323e-06, "loss": 0.5514, "step": 1219 }, { "epoch": 0.10948823225864351, "grad_norm": 0.5085625444763934, "learning_rate": 3.649416691594377e-06, "loss": 0.5248, "step": 1220 }, { "epoch": 0.10957797671131453, "grad_norm": 0.4836256264403442, "learning_rate": 3.6524080167514214e-06, "loss": 0.5057, "step": 1221 }, { "epoch": 0.10966772116398556, "grad_norm": 0.5055046543740948, "learning_rate": 3.655399341908466e-06, "loss": 0.514, "step": 1222 }, { "epoch": 0.10975746561665657, "grad_norm": 0.5054057897839904, "learning_rate": 3.65839066706551e-06, "loss": 0.5687, "step": 1223 }, { "epoch": 0.10984721006932759, "grad_norm": 0.5197175419098558, "learning_rate": 3.6613819922225546e-06, "loss": 0.5333, "step": 1224 }, { "epoch": 0.10993695452199861, "grad_norm": 0.5409497390338371, "learning_rate": 3.664373317379599e-06, "loss": 0.5496, "step": 1225 }, { "epoch": 0.11002669897466963, "grad_norm": 0.5239396819211524, "learning_rate": 3.6673646425366437e-06, "loss": 0.5725, "step": 1226 }, { "epoch": 0.11011644342734064, "grad_norm": 0.5388212038399087, "learning_rate": 3.6703559676936883e-06, "loss": 0.5656, "step": 1227 }, { "epoch": 0.11020618788001167, "grad_norm": 0.5315201515576908, "learning_rate": 3.673347292850733e-06, "loss": 0.583, "step": 1228 }, { "epoch": 0.11029593233268269, "grad_norm": 0.5215947890302475, "learning_rate": 3.6763386180077774e-06, "loss": 0.53, "step": 1229 }, { "epoch": 0.1103856767853537, "grad_norm": 0.499961921633891, "learning_rate": 3.679329943164822e-06, "loss": 0.4962, "step": 1230 }, { "epoch": 0.11047542123802473, "grad_norm": 0.5398253618578756, "learning_rate": 3.682321268321867e-06, "loss": 0.5675, "step": 1231 }, { "epoch": 0.11056516569069574, "grad_norm": 0.5200037567966537, "learning_rate": 3.6853125934789115e-06, "loss": 0.5717, "step": 1232 }, { "epoch": 0.11065491014336676, "grad_norm": 0.5399122307147823, "learning_rate": 3.688303918635956e-06, "loss": 0.5469, "step": 1233 }, { "epoch": 0.11074465459603779, "grad_norm": 0.5202466800026648, "learning_rate": 3.6912952437930006e-06, "loss": 0.5529, "step": 1234 }, { "epoch": 0.1108343990487088, "grad_norm": 0.530195716507724, "learning_rate": 3.694286568950045e-06, "loss": 0.5257, "step": 1235 }, { "epoch": 0.11092414350137982, "grad_norm": 0.5601574497627658, "learning_rate": 3.6972778941070898e-06, "loss": 0.5841, "step": 1236 }, { "epoch": 0.11101388795405084, "grad_norm": 0.568102271501827, "learning_rate": 3.7002692192641343e-06, "loss": 0.6394, "step": 1237 }, { "epoch": 0.11110363240672186, "grad_norm": 0.5440969664139222, "learning_rate": 3.703260544421179e-06, "loss": 0.587, "step": 1238 }, { "epoch": 0.11119337685939287, "grad_norm": 0.5066157696910416, "learning_rate": 3.7062518695782234e-06, "loss": 0.5233, "step": 1239 }, { "epoch": 0.1112831213120639, "grad_norm": 0.5542405117339848, "learning_rate": 3.709243194735268e-06, "loss": 0.6263, "step": 1240 }, { "epoch": 0.11137286576473492, "grad_norm": 0.5267954087423476, "learning_rate": 3.7122345198923125e-06, "loss": 0.5789, "step": 1241 }, { "epoch": 0.11146261021740593, "grad_norm": 0.5096353345863011, "learning_rate": 3.715225845049357e-06, "loss": 0.5012, "step": 1242 }, { "epoch": 0.11155235467007696, "grad_norm": 0.5477702241907486, "learning_rate": 3.7182171702064017e-06, "loss": 0.5923, "step": 1243 }, { "epoch": 0.11164209912274797, "grad_norm": 0.541214827938929, "learning_rate": 3.7212084953634462e-06, "loss": 0.573, "step": 1244 }, { "epoch": 0.11173184357541899, "grad_norm": 0.5600159495250366, "learning_rate": 3.724199820520491e-06, "loss": 0.5632, "step": 1245 }, { "epoch": 0.11182158802809002, "grad_norm": 0.614063183071651, "learning_rate": 3.7271911456775358e-06, "loss": 0.6356, "step": 1246 }, { "epoch": 0.11191133248076103, "grad_norm": 0.5094064920791921, "learning_rate": 3.7301824708345803e-06, "loss": 0.5445, "step": 1247 }, { "epoch": 0.11200107693343206, "grad_norm": 0.5012977915681988, "learning_rate": 3.733173795991625e-06, "loss": 0.4887, "step": 1248 }, { "epoch": 0.11209082138610307, "grad_norm": 0.523266177118671, "learning_rate": 3.7361651211486694e-06, "loss": 0.5586, "step": 1249 }, { "epoch": 0.11218056583877409, "grad_norm": 0.48353051358500976, "learning_rate": 3.739156446305714e-06, "loss": 0.4765, "step": 1250 }, { "epoch": 0.11227031029144512, "grad_norm": 0.5094772832726893, "learning_rate": 3.7421477714627586e-06, "loss": 0.5402, "step": 1251 }, { "epoch": 0.11236005474411613, "grad_norm": 0.5286790999352703, "learning_rate": 3.745139096619803e-06, "loss": 0.5088, "step": 1252 }, { "epoch": 0.11244979919678715, "grad_norm": 0.5660008143082517, "learning_rate": 3.7481304217768477e-06, "loss": 0.602, "step": 1253 }, { "epoch": 0.11253954364945817, "grad_norm": 0.5301835347854363, "learning_rate": 3.751121746933892e-06, "loss": 0.5643, "step": 1254 }, { "epoch": 0.11262928810212919, "grad_norm": 0.5128611160912625, "learning_rate": 3.7541130720909364e-06, "loss": 0.5421, "step": 1255 }, { "epoch": 0.1127190325548002, "grad_norm": 0.5367691156351162, "learning_rate": 3.757104397247981e-06, "loss": 0.5801, "step": 1256 }, { "epoch": 0.11280877700747123, "grad_norm": 0.5212225698165229, "learning_rate": 3.7600957224050255e-06, "loss": 0.5418, "step": 1257 }, { "epoch": 0.11289852146014225, "grad_norm": 0.5388960758935619, "learning_rate": 3.76308704756207e-06, "loss": 0.5839, "step": 1258 }, { "epoch": 0.11298826591281326, "grad_norm": 0.5249471577584836, "learning_rate": 3.7660783727191146e-06, "loss": 0.5188, "step": 1259 }, { "epoch": 0.11307801036548429, "grad_norm": 0.4885941526300208, "learning_rate": 3.769069697876159e-06, "loss": 0.5358, "step": 1260 }, { "epoch": 0.1131677548181553, "grad_norm": 0.5263048647905463, "learning_rate": 3.7720610230332037e-06, "loss": 0.5121, "step": 1261 }, { "epoch": 0.11325749927082632, "grad_norm": 0.549384884743236, "learning_rate": 3.7750523481902483e-06, "loss": 0.5901, "step": 1262 }, { "epoch": 0.11334724372349735, "grad_norm": 0.5089974000972176, "learning_rate": 3.7780436733472933e-06, "loss": 0.5199, "step": 1263 }, { "epoch": 0.11343698817616836, "grad_norm": 0.4439632773385372, "learning_rate": 3.781034998504338e-06, "loss": 0.4373, "step": 1264 }, { "epoch": 0.11352673262883937, "grad_norm": 0.5329396327722544, "learning_rate": 3.7840263236613824e-06, "loss": 0.5406, "step": 1265 }, { "epoch": 0.1136164770815104, "grad_norm": 0.5803087643738692, "learning_rate": 3.787017648818427e-06, "loss": 0.58, "step": 1266 }, { "epoch": 0.11370622153418142, "grad_norm": 0.5337822975363443, "learning_rate": 3.7900089739754715e-06, "loss": 0.6, "step": 1267 }, { "epoch": 0.11379596598685243, "grad_norm": 0.5620468470591531, "learning_rate": 3.793000299132516e-06, "loss": 0.6346, "step": 1268 }, { "epoch": 0.11388571043952346, "grad_norm": 0.5159927665255056, "learning_rate": 3.7959916242895606e-06, "loss": 0.5101, "step": 1269 }, { "epoch": 0.11397545489219447, "grad_norm": 0.5187876722363773, "learning_rate": 3.798982949446605e-06, "loss": 0.5762, "step": 1270 }, { "epoch": 0.11406519934486549, "grad_norm": 0.5563601669140362, "learning_rate": 3.8019742746036497e-06, "loss": 0.597, "step": 1271 }, { "epoch": 0.11415494379753652, "grad_norm": 0.47222926741554644, "learning_rate": 3.8049655997606943e-06, "loss": 0.5003, "step": 1272 }, { "epoch": 0.11424468825020753, "grad_norm": 0.6134348376678181, "learning_rate": 3.807956924917739e-06, "loss": 0.6658, "step": 1273 }, { "epoch": 0.11433443270287855, "grad_norm": 0.5013086761906834, "learning_rate": 3.8109482500747834e-06, "loss": 0.4925, "step": 1274 }, { "epoch": 0.11442417715554958, "grad_norm": 0.5203396259950717, "learning_rate": 3.813939575231828e-06, "loss": 0.5111, "step": 1275 }, { "epoch": 0.11451392160822059, "grad_norm": 0.5324218655943793, "learning_rate": 3.8169309003888725e-06, "loss": 0.5922, "step": 1276 }, { "epoch": 0.11460366606089162, "grad_norm": 0.5408034443398251, "learning_rate": 3.819922225545917e-06, "loss": 0.5734, "step": 1277 }, { "epoch": 0.11469341051356263, "grad_norm": 0.5054467964516283, "learning_rate": 3.822913550702962e-06, "loss": 0.526, "step": 1278 }, { "epoch": 0.11478315496623365, "grad_norm": 0.5271102388603397, "learning_rate": 3.825904875860006e-06, "loss": 0.5049, "step": 1279 }, { "epoch": 0.11487289941890468, "grad_norm": 0.5235783871872423, "learning_rate": 3.828896201017051e-06, "loss": 0.5115, "step": 1280 }, { "epoch": 0.11496264387157569, "grad_norm": 0.5320967743901303, "learning_rate": 3.831887526174095e-06, "loss": 0.5363, "step": 1281 }, { "epoch": 0.1150523883242467, "grad_norm": 0.5082117021031449, "learning_rate": 3.83487885133114e-06, "loss": 0.565, "step": 1282 }, { "epoch": 0.11514213277691773, "grad_norm": 0.48709134479668115, "learning_rate": 3.837870176488184e-06, "loss": 0.5057, "step": 1283 }, { "epoch": 0.11523187722958875, "grad_norm": 0.5770059299087272, "learning_rate": 3.84086150164523e-06, "loss": 0.6436, "step": 1284 }, { "epoch": 0.11532162168225976, "grad_norm": 0.5388507331904608, "learning_rate": 3.8438528268022735e-06, "loss": 0.6008, "step": 1285 }, { "epoch": 0.11541136613493079, "grad_norm": 0.5119204652300217, "learning_rate": 3.846844151959318e-06, "loss": 0.5469, "step": 1286 }, { "epoch": 0.1155011105876018, "grad_norm": 0.5677107317281517, "learning_rate": 3.849835477116363e-06, "loss": 0.6499, "step": 1287 }, { "epoch": 0.11559085504027282, "grad_norm": 0.5182658827482376, "learning_rate": 3.852826802273407e-06, "loss": 0.5455, "step": 1288 }, { "epoch": 0.11568059949294385, "grad_norm": 0.4938156411471973, "learning_rate": 3.855818127430452e-06, "loss": 0.4791, "step": 1289 }, { "epoch": 0.11577034394561486, "grad_norm": 0.5154208675608634, "learning_rate": 3.858809452587496e-06, "loss": 0.5439, "step": 1290 }, { "epoch": 0.11586008839828588, "grad_norm": 0.5475798696926272, "learning_rate": 3.861800777744541e-06, "loss": 0.6014, "step": 1291 }, { "epoch": 0.1159498328509569, "grad_norm": 0.5633250347068888, "learning_rate": 3.8647921029015854e-06, "loss": 0.5981, "step": 1292 }, { "epoch": 0.11603957730362792, "grad_norm": 0.5560191485681927, "learning_rate": 3.86778342805863e-06, "loss": 0.5688, "step": 1293 }, { "epoch": 0.11612932175629893, "grad_norm": 0.5299257445883421, "learning_rate": 3.8707747532156746e-06, "loss": 0.5414, "step": 1294 }, { "epoch": 0.11621906620896996, "grad_norm": 0.5395138453905143, "learning_rate": 3.873766078372719e-06, "loss": 0.5755, "step": 1295 }, { "epoch": 0.11630881066164098, "grad_norm": 0.49598555036243713, "learning_rate": 3.876757403529764e-06, "loss": 0.5086, "step": 1296 }, { "epoch": 0.11639855511431199, "grad_norm": 0.5565307241727301, "learning_rate": 3.879748728686808e-06, "loss": 0.6006, "step": 1297 }, { "epoch": 0.11648829956698302, "grad_norm": 0.5097938161210147, "learning_rate": 3.882740053843853e-06, "loss": 0.5086, "step": 1298 }, { "epoch": 0.11657804401965403, "grad_norm": 0.5478377479806009, "learning_rate": 3.885731379000897e-06, "loss": 0.6012, "step": 1299 }, { "epoch": 0.11666778847232505, "grad_norm": 0.5083232774077898, "learning_rate": 3.888722704157942e-06, "loss": 0.5943, "step": 1300 }, { "epoch": 0.11675753292499608, "grad_norm": 0.5569329340926085, "learning_rate": 3.8917140293149865e-06, "loss": 0.576, "step": 1301 }, { "epoch": 0.11684727737766709, "grad_norm": 0.5537987894195843, "learning_rate": 3.894705354472032e-06, "loss": 0.5957, "step": 1302 }, { "epoch": 0.1169370218303381, "grad_norm": 0.5430477256894626, "learning_rate": 3.8976966796290764e-06, "loss": 0.5336, "step": 1303 }, { "epoch": 0.11702676628300913, "grad_norm": 0.5149782199670386, "learning_rate": 3.900688004786121e-06, "loss": 0.5369, "step": 1304 }, { "epoch": 0.11711651073568015, "grad_norm": 0.5069703685978504, "learning_rate": 3.9036793299431655e-06, "loss": 0.5388, "step": 1305 }, { "epoch": 0.11720625518835116, "grad_norm": 0.5421843703933251, "learning_rate": 3.90667065510021e-06, "loss": 0.5444, "step": 1306 }, { "epoch": 0.11729599964102219, "grad_norm": 0.5723976698101823, "learning_rate": 3.909661980257255e-06, "loss": 0.5821, "step": 1307 }, { "epoch": 0.1173857440936932, "grad_norm": 0.5124924442931925, "learning_rate": 3.912653305414299e-06, "loss": 0.5189, "step": 1308 }, { "epoch": 0.11747548854636423, "grad_norm": 0.5272132130591551, "learning_rate": 3.915644630571344e-06, "loss": 0.5907, "step": 1309 }, { "epoch": 0.11756523299903525, "grad_norm": 0.5582183457578999, "learning_rate": 3.918635955728388e-06, "loss": 0.6068, "step": 1310 }, { "epoch": 0.11765497745170626, "grad_norm": 0.5491167689661435, "learning_rate": 3.921627280885433e-06, "loss": 0.5588, "step": 1311 }, { "epoch": 0.11774472190437729, "grad_norm": 0.4894802450087221, "learning_rate": 3.9246186060424775e-06, "loss": 0.5006, "step": 1312 }, { "epoch": 0.1178344663570483, "grad_norm": 0.5631850936766992, "learning_rate": 3.927609931199522e-06, "loss": 0.5291, "step": 1313 }, { "epoch": 0.11792421080971932, "grad_norm": 0.5177671582424702, "learning_rate": 3.9306012563565666e-06, "loss": 0.5559, "step": 1314 }, { "epoch": 0.11801395526239035, "grad_norm": 0.5120169878500918, "learning_rate": 3.933592581513611e-06, "loss": 0.5334, "step": 1315 }, { "epoch": 0.11810369971506136, "grad_norm": 0.5155756748367661, "learning_rate": 3.936583906670655e-06, "loss": 0.5719, "step": 1316 }, { "epoch": 0.11819344416773238, "grad_norm": 0.499300557968202, "learning_rate": 3.939575231827699e-06, "loss": 0.5366, "step": 1317 }, { "epoch": 0.1182831886204034, "grad_norm": 0.47902380858289656, "learning_rate": 3.942566556984744e-06, "loss": 0.462, "step": 1318 }, { "epoch": 0.11837293307307442, "grad_norm": 0.4936100174371434, "learning_rate": 3.9455578821417885e-06, "loss": 0.4924, "step": 1319 }, { "epoch": 0.11846267752574544, "grad_norm": 0.4740619354797439, "learning_rate": 3.948549207298834e-06, "loss": 0.5305, "step": 1320 }, { "epoch": 0.11855242197841646, "grad_norm": 0.5609310816269055, "learning_rate": 3.9515405324558785e-06, "loss": 0.53, "step": 1321 }, { "epoch": 0.11864216643108748, "grad_norm": 0.5531258653650754, "learning_rate": 3.954531857612923e-06, "loss": 0.6265, "step": 1322 }, { "epoch": 0.11873191088375849, "grad_norm": 0.5554036317472275, "learning_rate": 3.957523182769968e-06, "loss": 0.5547, "step": 1323 }, { "epoch": 0.11882165533642952, "grad_norm": 0.6106860547626003, "learning_rate": 3.960514507927012e-06, "loss": 0.5792, "step": 1324 }, { "epoch": 0.11891139978910054, "grad_norm": 0.553411157203131, "learning_rate": 3.963505833084057e-06, "loss": 0.6233, "step": 1325 }, { "epoch": 0.11900114424177155, "grad_norm": 0.5180832635528105, "learning_rate": 3.966497158241101e-06, "loss": 0.5312, "step": 1326 }, { "epoch": 0.11909088869444258, "grad_norm": 0.4825919629335359, "learning_rate": 3.969488483398146e-06, "loss": 0.5001, "step": 1327 }, { "epoch": 0.1191806331471136, "grad_norm": 0.4946878077337458, "learning_rate": 3.97247980855519e-06, "loss": 0.485, "step": 1328 }, { "epoch": 0.11927037759978461, "grad_norm": 0.5787192637475264, "learning_rate": 3.975471133712235e-06, "loss": 0.6213, "step": 1329 }, { "epoch": 0.11936012205245564, "grad_norm": 0.4827738593855282, "learning_rate": 3.9784624588692795e-06, "loss": 0.5215, "step": 1330 }, { "epoch": 0.11944986650512665, "grad_norm": 0.5508358691345494, "learning_rate": 3.981453784026324e-06, "loss": 0.5484, "step": 1331 }, { "epoch": 0.11953961095779767, "grad_norm": 0.5922663342637355, "learning_rate": 3.984445109183369e-06, "loss": 0.6776, "step": 1332 }, { "epoch": 0.1196293554104687, "grad_norm": 0.5272705580089537, "learning_rate": 3.987436434340413e-06, "loss": 0.5337, "step": 1333 }, { "epoch": 0.11971909986313971, "grad_norm": 0.5577567436810461, "learning_rate": 3.990427759497458e-06, "loss": 0.5549, "step": 1334 }, { "epoch": 0.11980884431581072, "grad_norm": 0.5293485516216827, "learning_rate": 3.993419084654502e-06, "loss": 0.5729, "step": 1335 }, { "epoch": 0.11989858876848175, "grad_norm": 0.5371033520174133, "learning_rate": 3.996410409811547e-06, "loss": 0.4737, "step": 1336 }, { "epoch": 0.11998833322115277, "grad_norm": 0.49046231756828673, "learning_rate": 3.999401734968591e-06, "loss": 0.4883, "step": 1337 }, { "epoch": 0.12007807767382378, "grad_norm": 0.5602588061011606, "learning_rate": 4.002393060125636e-06, "loss": 0.5464, "step": 1338 }, { "epoch": 0.12016782212649481, "grad_norm": 0.5485518825527617, "learning_rate": 4.0053843852826805e-06, "loss": 0.575, "step": 1339 }, { "epoch": 0.12025756657916582, "grad_norm": 0.5075721910119158, "learning_rate": 4.008375710439725e-06, "loss": 0.4867, "step": 1340 }, { "epoch": 0.12034731103183685, "grad_norm": 0.5509942272901575, "learning_rate": 4.01136703559677e-06, "loss": 0.5483, "step": 1341 }, { "epoch": 0.12043705548450787, "grad_norm": 0.5503021258316978, "learning_rate": 4.014358360753814e-06, "loss": 0.5599, "step": 1342 }, { "epoch": 0.12052679993717888, "grad_norm": 0.5053539180815858, "learning_rate": 4.017349685910859e-06, "loss": 0.5089, "step": 1343 }, { "epoch": 0.12061654438984991, "grad_norm": 0.52548375177757, "learning_rate": 4.020341011067903e-06, "loss": 0.5199, "step": 1344 }, { "epoch": 0.12070628884252092, "grad_norm": 0.4697708159171437, "learning_rate": 4.023332336224948e-06, "loss": 0.435, "step": 1345 }, { "epoch": 0.12079603329519194, "grad_norm": 0.5123155679944249, "learning_rate": 4.0263236613819924e-06, "loss": 0.5179, "step": 1346 }, { "epoch": 0.12088577774786297, "grad_norm": 0.5838426023558941, "learning_rate": 4.029314986539037e-06, "loss": 0.5644, "step": 1347 }, { "epoch": 0.12097552220053398, "grad_norm": 0.5312585064646042, "learning_rate": 4.0323063116960816e-06, "loss": 0.6075, "step": 1348 }, { "epoch": 0.121065266653205, "grad_norm": 0.48822689164009436, "learning_rate": 4.035297636853126e-06, "loss": 0.5401, "step": 1349 }, { "epoch": 0.12115501110587602, "grad_norm": 0.5150879621795162, "learning_rate": 4.038288962010171e-06, "loss": 0.4989, "step": 1350 }, { "epoch": 0.12124475555854704, "grad_norm": 0.570731463157951, "learning_rate": 4.041280287167215e-06, "loss": 0.5858, "step": 1351 }, { "epoch": 0.12133450001121805, "grad_norm": 0.5358967761672172, "learning_rate": 4.04427161232426e-06, "loss": 0.6106, "step": 1352 }, { "epoch": 0.12142424446388908, "grad_norm": 0.5296326242573342, "learning_rate": 4.047262937481304e-06, "loss": 0.5183, "step": 1353 }, { "epoch": 0.1215139889165601, "grad_norm": 0.5001581296809517, "learning_rate": 4.050254262638349e-06, "loss": 0.5184, "step": 1354 }, { "epoch": 0.12160373336923111, "grad_norm": 0.54162709742366, "learning_rate": 4.0532455877953935e-06, "loss": 0.5771, "step": 1355 }, { "epoch": 0.12169347782190214, "grad_norm": 0.5406066605010256, "learning_rate": 4.056236912952438e-06, "loss": 0.5367, "step": 1356 }, { "epoch": 0.12178322227457315, "grad_norm": 0.596685618329206, "learning_rate": 4.059228238109483e-06, "loss": 0.6351, "step": 1357 }, { "epoch": 0.12187296672724417, "grad_norm": 0.5030160520995237, "learning_rate": 4.062219563266527e-06, "loss": 0.5232, "step": 1358 }, { "epoch": 0.1219627111799152, "grad_norm": 0.4995347076595776, "learning_rate": 4.065210888423572e-06, "loss": 0.5139, "step": 1359 }, { "epoch": 0.12205245563258621, "grad_norm": 0.4757458706882947, "learning_rate": 4.068202213580616e-06, "loss": 0.4825, "step": 1360 }, { "epoch": 0.12214220008525722, "grad_norm": 0.506035242687179, "learning_rate": 4.071193538737661e-06, "loss": 0.5312, "step": 1361 }, { "epoch": 0.12223194453792825, "grad_norm": 0.530382307373905, "learning_rate": 4.074184863894705e-06, "loss": 0.5664, "step": 1362 }, { "epoch": 0.12232168899059927, "grad_norm": 0.546180986694299, "learning_rate": 4.07717618905175e-06, "loss": 0.5858, "step": 1363 }, { "epoch": 0.12241143344327028, "grad_norm": 0.48363959415694235, "learning_rate": 4.0801675142087945e-06, "loss": 0.4892, "step": 1364 }, { "epoch": 0.12250117789594131, "grad_norm": 0.5458073655802044, "learning_rate": 4.083158839365839e-06, "loss": 0.5873, "step": 1365 }, { "epoch": 0.12259092234861232, "grad_norm": 0.5208659223059197, "learning_rate": 4.0861501645228845e-06, "loss": 0.5496, "step": 1366 }, { "epoch": 0.12268066680128334, "grad_norm": 0.5222101563358097, "learning_rate": 4.089141489679929e-06, "loss": 0.5611, "step": 1367 }, { "epoch": 0.12277041125395437, "grad_norm": 0.5041671390017456, "learning_rate": 4.0921328148369736e-06, "loss": 0.5471, "step": 1368 }, { "epoch": 0.12286015570662538, "grad_norm": 0.6079399117740579, "learning_rate": 4.095124139994018e-06, "loss": 0.7042, "step": 1369 }, { "epoch": 0.12294990015929641, "grad_norm": 0.5985663520991724, "learning_rate": 4.098115465151063e-06, "loss": 0.5827, "step": 1370 }, { "epoch": 0.12303964461196742, "grad_norm": 0.5782142532993777, "learning_rate": 4.101106790308107e-06, "loss": 0.5302, "step": 1371 }, { "epoch": 0.12312938906463844, "grad_norm": 0.5123644467396539, "learning_rate": 4.104098115465152e-06, "loss": 0.4838, "step": 1372 }, { "epoch": 0.12321913351730947, "grad_norm": 0.5177523669639491, "learning_rate": 4.107089440622196e-06, "loss": 0.5369, "step": 1373 }, { "epoch": 0.12330887796998048, "grad_norm": 0.5240543543885385, "learning_rate": 4.110080765779241e-06, "loss": 0.6099, "step": 1374 }, { "epoch": 0.1233986224226515, "grad_norm": 0.5215852295298744, "learning_rate": 4.1130720909362855e-06, "loss": 0.5125, "step": 1375 }, { "epoch": 0.12348836687532252, "grad_norm": 0.5374078530506192, "learning_rate": 4.11606341609333e-06, "loss": 0.5802, "step": 1376 }, { "epoch": 0.12357811132799354, "grad_norm": 0.5604963192517116, "learning_rate": 4.119054741250375e-06, "loss": 0.5866, "step": 1377 }, { "epoch": 0.12366785578066455, "grad_norm": 0.4888458254349573, "learning_rate": 4.122046066407418e-06, "loss": 0.5352, "step": 1378 }, { "epoch": 0.12375760023333558, "grad_norm": 0.5202969781134398, "learning_rate": 4.125037391564463e-06, "loss": 0.5713, "step": 1379 }, { "epoch": 0.1238473446860066, "grad_norm": 0.48317318661864006, "learning_rate": 4.128028716721507e-06, "loss": 0.4775, "step": 1380 }, { "epoch": 0.12393708913867761, "grad_norm": 0.5759886951769269, "learning_rate": 4.131020041878552e-06, "loss": 0.5839, "step": 1381 }, { "epoch": 0.12402683359134864, "grad_norm": 0.5759115142173272, "learning_rate": 4.1340113670355965e-06, "loss": 0.6025, "step": 1382 }, { "epoch": 0.12411657804401965, "grad_norm": 0.503086460040593, "learning_rate": 4.137002692192641e-06, "loss": 0.5103, "step": 1383 }, { "epoch": 0.12420632249669067, "grad_norm": 0.5301082396183138, "learning_rate": 4.1399940173496865e-06, "loss": 0.5628, "step": 1384 }, { "epoch": 0.1242960669493617, "grad_norm": 0.5034535258488216, "learning_rate": 4.142985342506731e-06, "loss": 0.5339, "step": 1385 }, { "epoch": 0.12438581140203271, "grad_norm": 0.4957430086067334, "learning_rate": 4.145976667663776e-06, "loss": 0.4946, "step": 1386 }, { "epoch": 0.12447555585470373, "grad_norm": 0.5105535661685875, "learning_rate": 4.14896799282082e-06, "loss": 0.5442, "step": 1387 }, { "epoch": 0.12456530030737475, "grad_norm": 0.5175271699459337, "learning_rate": 4.151959317977865e-06, "loss": 0.5607, "step": 1388 }, { "epoch": 0.12465504476004577, "grad_norm": 0.4879940199213417, "learning_rate": 4.154950643134909e-06, "loss": 0.5086, "step": 1389 }, { "epoch": 0.12474478921271678, "grad_norm": 0.5119574363841968, "learning_rate": 4.157941968291954e-06, "loss": 0.5637, "step": 1390 }, { "epoch": 0.12483453366538781, "grad_norm": 0.5197589542891042, "learning_rate": 4.160933293448998e-06, "loss": 0.5408, "step": 1391 }, { "epoch": 0.12492427811805883, "grad_norm": 0.5235787008011866, "learning_rate": 4.163924618606043e-06, "loss": 0.6009, "step": 1392 }, { "epoch": 0.12501402257072985, "grad_norm": 0.5306593681133394, "learning_rate": 4.1669159437630875e-06, "loss": 0.5229, "step": 1393 }, { "epoch": 0.12510376702340087, "grad_norm": 0.5416118351035042, "learning_rate": 4.169907268920132e-06, "loss": 0.5893, "step": 1394 }, { "epoch": 0.12519351147607188, "grad_norm": 0.553593847337661, "learning_rate": 4.172898594077177e-06, "loss": 0.6259, "step": 1395 }, { "epoch": 0.1252832559287429, "grad_norm": 0.5734669829113075, "learning_rate": 4.175889919234221e-06, "loss": 0.6054, "step": 1396 }, { "epoch": 0.1253730003814139, "grad_norm": 0.46962297848460094, "learning_rate": 4.178881244391266e-06, "loss": 0.5154, "step": 1397 }, { "epoch": 0.12546274483408495, "grad_norm": 0.496872229273045, "learning_rate": 4.18187256954831e-06, "loss": 0.5412, "step": 1398 }, { "epoch": 0.12555248928675597, "grad_norm": 0.49856608774050193, "learning_rate": 4.184863894705355e-06, "loss": 0.5406, "step": 1399 }, { "epoch": 0.12564223373942698, "grad_norm": 0.5515172690701591, "learning_rate": 4.1878552198623994e-06, "loss": 0.6179, "step": 1400 }, { "epoch": 0.125731978192098, "grad_norm": 0.5430051213300287, "learning_rate": 4.190846545019444e-06, "loss": 0.5407, "step": 1401 }, { "epoch": 0.125821722644769, "grad_norm": 0.5225545796206768, "learning_rate": 4.1938378701764886e-06, "loss": 0.5323, "step": 1402 }, { "epoch": 0.12591146709744003, "grad_norm": 0.5017148334386098, "learning_rate": 4.196829195333533e-06, "loss": 0.5136, "step": 1403 }, { "epoch": 0.12600121155011107, "grad_norm": 0.52502581706639, "learning_rate": 4.199820520490578e-06, "loss": 0.5771, "step": 1404 }, { "epoch": 0.12609095600278208, "grad_norm": 0.5564590239325203, "learning_rate": 4.202811845647622e-06, "loss": 0.5573, "step": 1405 }, { "epoch": 0.1261807004554531, "grad_norm": 0.5806974877685441, "learning_rate": 4.205803170804667e-06, "loss": 0.6075, "step": 1406 }, { "epoch": 0.1262704449081241, "grad_norm": 0.5232965387817615, "learning_rate": 4.208794495961711e-06, "loss": 0.5538, "step": 1407 }, { "epoch": 0.12636018936079513, "grad_norm": 0.5427428135641992, "learning_rate": 4.211785821118756e-06, "loss": 0.5508, "step": 1408 }, { "epoch": 0.12644993381346614, "grad_norm": 0.5264904394817664, "learning_rate": 4.2147771462758005e-06, "loss": 0.559, "step": 1409 }, { "epoch": 0.12653967826613718, "grad_norm": 0.5181778070361067, "learning_rate": 4.217768471432845e-06, "loss": 0.5575, "step": 1410 }, { "epoch": 0.1266294227188082, "grad_norm": 0.47849027145325607, "learning_rate": 4.22075979658989e-06, "loss": 0.503, "step": 1411 }, { "epoch": 0.1267191671714792, "grad_norm": 0.5539056290162059, "learning_rate": 4.223751121746934e-06, "loss": 0.5905, "step": 1412 }, { "epoch": 0.12680891162415023, "grad_norm": 0.5273662264499775, "learning_rate": 4.226742446903979e-06, "loss": 0.544, "step": 1413 }, { "epoch": 0.12689865607682124, "grad_norm": 0.4638619616087649, "learning_rate": 4.229733772061023e-06, "loss": 0.4746, "step": 1414 }, { "epoch": 0.12698840052949228, "grad_norm": 0.4977125665066228, "learning_rate": 4.232725097218068e-06, "loss": 0.509, "step": 1415 }, { "epoch": 0.1270781449821633, "grad_norm": 0.5383574380989457, "learning_rate": 4.235716422375112e-06, "loss": 0.5721, "step": 1416 }, { "epoch": 0.1271678894348343, "grad_norm": 0.4870693161062212, "learning_rate": 4.238707747532157e-06, "loss": 0.4679, "step": 1417 }, { "epoch": 0.12725763388750533, "grad_norm": 0.5404084324948023, "learning_rate": 4.2416990726892015e-06, "loss": 0.6166, "step": 1418 }, { "epoch": 0.12734737834017634, "grad_norm": 0.5396463234836767, "learning_rate": 4.244690397846246e-06, "loss": 0.6331, "step": 1419 }, { "epoch": 0.12743712279284736, "grad_norm": 0.5079136548559713, "learning_rate": 4.247681723003291e-06, "loss": 0.5289, "step": 1420 }, { "epoch": 0.1275268672455184, "grad_norm": 0.5347540535758293, "learning_rate": 4.250673048160335e-06, "loss": 0.5048, "step": 1421 }, { "epoch": 0.1276166116981894, "grad_norm": 0.512803416317263, "learning_rate": 4.25366437331738e-06, "loss": 0.5548, "step": 1422 }, { "epoch": 0.12770635615086043, "grad_norm": 0.4976982803362886, "learning_rate": 4.256655698474424e-06, "loss": 0.5447, "step": 1423 }, { "epoch": 0.12779610060353144, "grad_norm": 0.4932831779820659, "learning_rate": 4.259647023631469e-06, "loss": 0.4927, "step": 1424 }, { "epoch": 0.12788584505620246, "grad_norm": 0.5042507015849171, "learning_rate": 4.262638348788513e-06, "loss": 0.5381, "step": 1425 }, { "epoch": 0.12797558950887347, "grad_norm": 0.5310451561645149, "learning_rate": 4.265629673945558e-06, "loss": 0.5185, "step": 1426 }, { "epoch": 0.12806533396154451, "grad_norm": 0.5093228304073378, "learning_rate": 4.2686209991026025e-06, "loss": 0.5103, "step": 1427 }, { "epoch": 0.12815507841421553, "grad_norm": 0.5135667596039213, "learning_rate": 4.271612324259647e-06, "loss": 0.487, "step": 1428 }, { "epoch": 0.12824482286688654, "grad_norm": 0.5470288268794712, "learning_rate": 4.274603649416692e-06, "loss": 0.5783, "step": 1429 }, { "epoch": 0.12833456731955756, "grad_norm": 0.5098266714778733, "learning_rate": 4.277594974573737e-06, "loss": 0.5576, "step": 1430 }, { "epoch": 0.12842431177222857, "grad_norm": 0.5851131090956505, "learning_rate": 4.280586299730782e-06, "loss": 0.6049, "step": 1431 }, { "epoch": 0.1285140562248996, "grad_norm": 0.5288534207560924, "learning_rate": 4.283577624887826e-06, "loss": 0.5507, "step": 1432 }, { "epoch": 0.12860380067757063, "grad_norm": 0.5093566941275012, "learning_rate": 4.286568950044871e-06, "loss": 0.5564, "step": 1433 }, { "epoch": 0.12869354513024164, "grad_norm": 0.5341763681185934, "learning_rate": 4.289560275201915e-06, "loss": 0.5165, "step": 1434 }, { "epoch": 0.12878328958291266, "grad_norm": 0.49077146415154305, "learning_rate": 4.29255160035896e-06, "loss": 0.5273, "step": 1435 }, { "epoch": 0.12887303403558367, "grad_norm": 0.5084198563960156, "learning_rate": 4.295542925516004e-06, "loss": 0.5366, "step": 1436 }, { "epoch": 0.1289627784882547, "grad_norm": 0.5253730352336337, "learning_rate": 4.298534250673049e-06, "loss": 0.5973, "step": 1437 }, { "epoch": 0.1290525229409257, "grad_norm": 0.4998192618506214, "learning_rate": 4.3015255758300935e-06, "loss": 0.4906, "step": 1438 }, { "epoch": 0.12914226739359674, "grad_norm": 0.5233567751218476, "learning_rate": 4.304516900987138e-06, "loss": 0.5621, "step": 1439 }, { "epoch": 0.12923201184626776, "grad_norm": 0.4905696045704355, "learning_rate": 4.307508226144182e-06, "loss": 0.5058, "step": 1440 }, { "epoch": 0.12932175629893877, "grad_norm": 0.5014811885171051, "learning_rate": 4.310499551301226e-06, "loss": 0.5653, "step": 1441 }, { "epoch": 0.1294115007516098, "grad_norm": 0.4898974149075828, "learning_rate": 4.313490876458271e-06, "loss": 0.5032, "step": 1442 }, { "epoch": 0.1295012452042808, "grad_norm": 0.5144673569962275, "learning_rate": 4.3164822016153154e-06, "loss": 0.5382, "step": 1443 }, { "epoch": 0.12959098965695182, "grad_norm": 0.5102200989436416, "learning_rate": 4.31947352677236e-06, "loss": 0.5282, "step": 1444 }, { "epoch": 0.12968073410962286, "grad_norm": 0.5617326303894544, "learning_rate": 4.3224648519294046e-06, "loss": 0.644, "step": 1445 }, { "epoch": 0.12977047856229387, "grad_norm": 0.4976380391826685, "learning_rate": 4.325456177086449e-06, "loss": 0.5756, "step": 1446 }, { "epoch": 0.1298602230149649, "grad_norm": 0.5351130039564533, "learning_rate": 4.328447502243494e-06, "loss": 0.5941, "step": 1447 }, { "epoch": 0.1299499674676359, "grad_norm": 0.5181057477454964, "learning_rate": 4.331438827400539e-06, "loss": 0.6031, "step": 1448 }, { "epoch": 0.13003971192030692, "grad_norm": 0.49561046375019685, "learning_rate": 4.334430152557584e-06, "loss": 0.5263, "step": 1449 }, { "epoch": 0.13012945637297796, "grad_norm": 0.5010041448266007, "learning_rate": 4.337421477714628e-06, "loss": 0.524, "step": 1450 }, { "epoch": 0.13021920082564897, "grad_norm": 0.47442455469444134, "learning_rate": 4.340412802871673e-06, "loss": 0.4766, "step": 1451 }, { "epoch": 0.13030894527832, "grad_norm": 0.5365775205960412, "learning_rate": 4.343404128028717e-06, "loss": 0.5686, "step": 1452 }, { "epoch": 0.130398689730991, "grad_norm": 0.520166151186243, "learning_rate": 4.346395453185762e-06, "loss": 0.5966, "step": 1453 }, { "epoch": 0.13048843418366202, "grad_norm": 0.539604579497697, "learning_rate": 4.3493867783428064e-06, "loss": 0.5372, "step": 1454 }, { "epoch": 0.13057817863633303, "grad_norm": 0.5487386809078636, "learning_rate": 4.352378103499851e-06, "loss": 0.5383, "step": 1455 }, { "epoch": 0.13066792308900407, "grad_norm": 0.49774864534877017, "learning_rate": 4.3553694286568955e-06, "loss": 0.5114, "step": 1456 }, { "epoch": 0.1307576675416751, "grad_norm": 0.5062824924871127, "learning_rate": 4.35836075381394e-06, "loss": 0.5013, "step": 1457 }, { "epoch": 0.1308474119943461, "grad_norm": 0.4950926933430459, "learning_rate": 4.361352078970985e-06, "loss": 0.4878, "step": 1458 }, { "epoch": 0.13093715644701712, "grad_norm": 0.5065285424273419, "learning_rate": 4.364343404128029e-06, "loss": 0.5508, "step": 1459 }, { "epoch": 0.13102690089968813, "grad_norm": 0.5256836297670544, "learning_rate": 4.367334729285074e-06, "loss": 0.5813, "step": 1460 }, { "epoch": 0.13111664535235915, "grad_norm": 0.492727858382969, "learning_rate": 4.370326054442118e-06, "loss": 0.4575, "step": 1461 }, { "epoch": 0.1312063898050302, "grad_norm": 0.5530711239780718, "learning_rate": 4.373317379599163e-06, "loss": 0.5746, "step": 1462 }, { "epoch": 0.1312961342577012, "grad_norm": 0.5355189013247544, "learning_rate": 4.3763087047562075e-06, "loss": 0.5907, "step": 1463 }, { "epoch": 0.13138587871037222, "grad_norm": 0.4987466765636248, "learning_rate": 4.379300029913252e-06, "loss": 0.5219, "step": 1464 }, { "epoch": 0.13147562316304323, "grad_norm": 0.5263953730558202, "learning_rate": 4.3822913550702966e-06, "loss": 0.508, "step": 1465 }, { "epoch": 0.13156536761571425, "grad_norm": 0.4976742124614839, "learning_rate": 4.385282680227341e-06, "loss": 0.5125, "step": 1466 }, { "epoch": 0.13165511206838526, "grad_norm": 0.5418158400307885, "learning_rate": 4.388274005384386e-06, "loss": 0.5128, "step": 1467 }, { "epoch": 0.1317448565210563, "grad_norm": 0.4882784402415791, "learning_rate": 4.39126533054143e-06, "loss": 0.5045, "step": 1468 }, { "epoch": 0.13183460097372732, "grad_norm": 0.5295112415831374, "learning_rate": 4.394256655698475e-06, "loss": 0.5601, "step": 1469 }, { "epoch": 0.13192434542639833, "grad_norm": 0.581590397184766, "learning_rate": 4.397247980855519e-06, "loss": 0.6621, "step": 1470 }, { "epoch": 0.13201408987906935, "grad_norm": 0.4883288360400929, "learning_rate": 4.400239306012564e-06, "loss": 0.4816, "step": 1471 }, { "epoch": 0.13210383433174036, "grad_norm": 0.5199828432856836, "learning_rate": 4.4032306311696085e-06, "loss": 0.5163, "step": 1472 }, { "epoch": 0.13219357878441138, "grad_norm": 0.5008559574537056, "learning_rate": 4.406221956326653e-06, "loss": 0.5386, "step": 1473 }, { "epoch": 0.13228332323708242, "grad_norm": 0.49176315470188176, "learning_rate": 4.409213281483698e-06, "loss": 0.5257, "step": 1474 }, { "epoch": 0.13237306768975343, "grad_norm": 0.4945644403909969, "learning_rate": 4.412204606640742e-06, "loss": 0.4641, "step": 1475 }, { "epoch": 0.13246281214242445, "grad_norm": 0.4758011340899084, "learning_rate": 4.415195931797787e-06, "loss": 0.4703, "step": 1476 }, { "epoch": 0.13255255659509546, "grad_norm": 0.537526506046505, "learning_rate": 4.418187256954831e-06, "loss": 0.5278, "step": 1477 }, { "epoch": 0.13264230104776648, "grad_norm": 0.5241186294220208, "learning_rate": 4.421178582111876e-06, "loss": 0.5599, "step": 1478 }, { "epoch": 0.13273204550043752, "grad_norm": 0.5278949863032318, "learning_rate": 4.42416990726892e-06, "loss": 0.531, "step": 1479 }, { "epoch": 0.13282178995310853, "grad_norm": 0.5440721625016796, "learning_rate": 4.427161232425965e-06, "loss": 0.6275, "step": 1480 }, { "epoch": 0.13291153440577955, "grad_norm": 0.5163271636124722, "learning_rate": 4.4301525575830095e-06, "loss": 0.5113, "step": 1481 }, { "epoch": 0.13300127885845056, "grad_norm": 0.49999003417720234, "learning_rate": 4.433143882740054e-06, "loss": 0.5386, "step": 1482 }, { "epoch": 0.13309102331112158, "grad_norm": 0.5462341791679762, "learning_rate": 4.436135207897099e-06, "loss": 0.602, "step": 1483 }, { "epoch": 0.1331807677637926, "grad_norm": 0.49802775766928803, "learning_rate": 4.439126533054143e-06, "loss": 0.5319, "step": 1484 }, { "epoch": 0.13327051221646363, "grad_norm": 0.49442777339328303, "learning_rate": 4.442117858211188e-06, "loss": 0.4587, "step": 1485 }, { "epoch": 0.13336025666913465, "grad_norm": 0.5839216982482773, "learning_rate": 4.445109183368232e-06, "loss": 0.6242, "step": 1486 }, { "epoch": 0.13345000112180566, "grad_norm": 0.5051373201052998, "learning_rate": 4.448100508525277e-06, "loss": 0.5341, "step": 1487 }, { "epoch": 0.13353974557447668, "grad_norm": 0.5175654115655358, "learning_rate": 4.451091833682321e-06, "loss": 0.5511, "step": 1488 }, { "epoch": 0.1336294900271477, "grad_norm": 0.5248172388603077, "learning_rate": 4.454083158839366e-06, "loss": 0.534, "step": 1489 }, { "epoch": 0.1337192344798187, "grad_norm": 0.5294812369728187, "learning_rate": 4.4570744839964105e-06, "loss": 0.5099, "step": 1490 }, { "epoch": 0.13380897893248975, "grad_norm": 0.5292516884334079, "learning_rate": 4.460065809153455e-06, "loss": 0.5695, "step": 1491 }, { "epoch": 0.13389872338516076, "grad_norm": 0.548317641693412, "learning_rate": 4.4630571343105e-06, "loss": 0.556, "step": 1492 }, { "epoch": 0.13398846783783178, "grad_norm": 0.515277885930117, "learning_rate": 4.466048459467544e-06, "loss": 0.5521, "step": 1493 }, { "epoch": 0.1340782122905028, "grad_norm": 0.4984530565272146, "learning_rate": 4.46903978462459e-06, "loss": 0.4837, "step": 1494 }, { "epoch": 0.1341679567431738, "grad_norm": 0.5541974156026076, "learning_rate": 4.472031109781634e-06, "loss": 0.5745, "step": 1495 }, { "epoch": 0.13425770119584482, "grad_norm": 0.46867329986655293, "learning_rate": 4.475022434938679e-06, "loss": 0.495, "step": 1496 }, { "epoch": 0.13434744564851586, "grad_norm": 0.5130865062799445, "learning_rate": 4.478013760095723e-06, "loss": 0.5585, "step": 1497 }, { "epoch": 0.13443719010118688, "grad_norm": 0.525246452238073, "learning_rate": 4.481005085252768e-06, "loss": 0.5645, "step": 1498 }, { "epoch": 0.1345269345538579, "grad_norm": 0.543655592932916, "learning_rate": 4.483996410409812e-06, "loss": 0.608, "step": 1499 }, { "epoch": 0.1346166790065289, "grad_norm": 0.5423663313383297, "learning_rate": 4.486987735566857e-06, "loss": 0.5686, "step": 1500 }, { "epoch": 0.13470642345919992, "grad_norm": 0.5276327612042536, "learning_rate": 4.4899790607239015e-06, "loss": 0.5618, "step": 1501 }, { "epoch": 0.13479616791187093, "grad_norm": 0.5280978974694787, "learning_rate": 4.492970385880945e-06, "loss": 0.5431, "step": 1502 }, { "epoch": 0.13488591236454198, "grad_norm": 0.5484242399926901, "learning_rate": 4.49596171103799e-06, "loss": 0.5528, "step": 1503 }, { "epoch": 0.134975656817213, "grad_norm": 0.5400350678064548, "learning_rate": 4.498953036195034e-06, "loss": 0.5235, "step": 1504 }, { "epoch": 0.135065401269884, "grad_norm": 0.49814406700088404, "learning_rate": 4.501944361352079e-06, "loss": 0.5591, "step": 1505 }, { "epoch": 0.13515514572255502, "grad_norm": 0.44301258094274715, "learning_rate": 4.5049356865091235e-06, "loss": 0.4005, "step": 1506 }, { "epoch": 0.13524489017522603, "grad_norm": 0.5737445827809255, "learning_rate": 4.507927011666168e-06, "loss": 0.6666, "step": 1507 }, { "epoch": 0.13533463462789708, "grad_norm": 0.5820373554854348, "learning_rate": 4.510918336823213e-06, "loss": 0.5635, "step": 1508 }, { "epoch": 0.1354243790805681, "grad_norm": 0.524632102398922, "learning_rate": 4.513909661980257e-06, "loss": 0.4751, "step": 1509 }, { "epoch": 0.1355141235332391, "grad_norm": 0.4708533509212245, "learning_rate": 4.516900987137302e-06, "loss": 0.4787, "step": 1510 }, { "epoch": 0.13560386798591012, "grad_norm": 0.4903327255234637, "learning_rate": 4.519892312294346e-06, "loss": 0.5372, "step": 1511 }, { "epoch": 0.13569361243858113, "grad_norm": 0.5145301464259533, "learning_rate": 4.522883637451392e-06, "loss": 0.5046, "step": 1512 }, { "epoch": 0.13578335689125215, "grad_norm": 0.549088610241486, "learning_rate": 4.525874962608436e-06, "loss": 0.6008, "step": 1513 }, { "epoch": 0.1358731013439232, "grad_norm": 0.5400718229965007, "learning_rate": 4.528866287765481e-06, "loss": 0.4914, "step": 1514 }, { "epoch": 0.1359628457965942, "grad_norm": 0.4969498779189612, "learning_rate": 4.531857612922525e-06, "loss": 0.5199, "step": 1515 }, { "epoch": 0.13605259024926522, "grad_norm": 0.5296105178691051, "learning_rate": 4.53484893807957e-06, "loss": 0.519, "step": 1516 }, { "epoch": 0.13614233470193624, "grad_norm": 0.5004961709724174, "learning_rate": 4.5378402632366145e-06, "loss": 0.5615, "step": 1517 }, { "epoch": 0.13623207915460725, "grad_norm": 0.5190626748863654, "learning_rate": 4.540831588393659e-06, "loss": 0.511, "step": 1518 }, { "epoch": 0.13632182360727826, "grad_norm": 0.5282917651105005, "learning_rate": 4.5438229135507036e-06, "loss": 0.6005, "step": 1519 }, { "epoch": 0.1364115680599493, "grad_norm": 0.5108767070041804, "learning_rate": 4.546814238707748e-06, "loss": 0.538, "step": 1520 }, { "epoch": 0.13650131251262032, "grad_norm": 0.5027822654123749, "learning_rate": 4.549805563864793e-06, "loss": 0.5403, "step": 1521 }, { "epoch": 0.13659105696529134, "grad_norm": 0.6354519733849859, "learning_rate": 4.552796889021837e-06, "loss": 0.6506, "step": 1522 }, { "epoch": 0.13668080141796235, "grad_norm": 0.5213521944214663, "learning_rate": 4.555788214178882e-06, "loss": 0.4576, "step": 1523 }, { "epoch": 0.13677054587063336, "grad_norm": 0.5236489521767156, "learning_rate": 4.558779539335926e-06, "loss": 0.5359, "step": 1524 }, { "epoch": 0.13686029032330438, "grad_norm": 0.5579270090515229, "learning_rate": 4.561770864492971e-06, "loss": 0.5595, "step": 1525 }, { "epoch": 0.13695003477597542, "grad_norm": 0.525983048964037, "learning_rate": 4.5647621896500155e-06, "loss": 0.5368, "step": 1526 }, { "epoch": 0.13703977922864644, "grad_norm": 0.5607264551414507, "learning_rate": 4.56775351480706e-06, "loss": 0.5661, "step": 1527 }, { "epoch": 0.13712952368131745, "grad_norm": 0.5328237720891269, "learning_rate": 4.570744839964105e-06, "loss": 0.5394, "step": 1528 }, { "epoch": 0.13721926813398846, "grad_norm": 0.507117703742589, "learning_rate": 4.573736165121149e-06, "loss": 0.5117, "step": 1529 }, { "epoch": 0.13730901258665948, "grad_norm": 0.5412051708334414, "learning_rate": 4.576727490278194e-06, "loss": 0.6148, "step": 1530 }, { "epoch": 0.1373987570393305, "grad_norm": 0.4969012867756296, "learning_rate": 4.579718815435238e-06, "loss": 0.53, "step": 1531 }, { "epoch": 0.13748850149200154, "grad_norm": 0.5190828621466642, "learning_rate": 4.582710140592283e-06, "loss": 0.5104, "step": 1532 }, { "epoch": 0.13757824594467255, "grad_norm": 0.5102937125207386, "learning_rate": 4.585701465749327e-06, "loss": 0.5431, "step": 1533 }, { "epoch": 0.13766799039734356, "grad_norm": 0.5346259585398053, "learning_rate": 4.588692790906372e-06, "loss": 0.5573, "step": 1534 }, { "epoch": 0.13775773485001458, "grad_norm": 0.4968665309697225, "learning_rate": 4.5916841160634165e-06, "loss": 0.494, "step": 1535 }, { "epoch": 0.1378474793026856, "grad_norm": 0.5183811590462372, "learning_rate": 4.594675441220461e-06, "loss": 0.5305, "step": 1536 }, { "epoch": 0.1379372237553566, "grad_norm": 0.4890432792053275, "learning_rate": 4.597666766377506e-06, "loss": 0.5249, "step": 1537 }, { "epoch": 0.13802696820802765, "grad_norm": 0.5139336808948514, "learning_rate": 4.60065809153455e-06, "loss": 0.5354, "step": 1538 }, { "epoch": 0.13811671266069867, "grad_norm": 0.49077168462225185, "learning_rate": 4.603649416691595e-06, "loss": 0.4709, "step": 1539 }, { "epoch": 0.13820645711336968, "grad_norm": 0.4593968232663295, "learning_rate": 4.606640741848639e-06, "loss": 0.4613, "step": 1540 }, { "epoch": 0.1382962015660407, "grad_norm": 0.5521162928174539, "learning_rate": 4.609632067005684e-06, "loss": 0.6191, "step": 1541 }, { "epoch": 0.1383859460187117, "grad_norm": 0.5492905422465642, "learning_rate": 4.612623392162728e-06, "loss": 0.5787, "step": 1542 }, { "epoch": 0.13847569047138275, "grad_norm": 0.48569891645103486, "learning_rate": 4.615614717319773e-06, "loss": 0.5121, "step": 1543 }, { "epoch": 0.13856543492405377, "grad_norm": 0.5230597110511936, "learning_rate": 4.6186060424768175e-06, "loss": 0.5315, "step": 1544 }, { "epoch": 0.13865517937672478, "grad_norm": 0.5161326097379372, "learning_rate": 4.621597367633862e-06, "loss": 0.5287, "step": 1545 }, { "epoch": 0.1387449238293958, "grad_norm": 0.5627191517761367, "learning_rate": 4.624588692790907e-06, "loss": 0.6281, "step": 1546 }, { "epoch": 0.1388346682820668, "grad_norm": 0.5213623947167647, "learning_rate": 4.627580017947951e-06, "loss": 0.5531, "step": 1547 }, { "epoch": 0.13892441273473782, "grad_norm": 0.4948779450888808, "learning_rate": 4.630571343104996e-06, "loss": 0.4829, "step": 1548 }, { "epoch": 0.13901415718740887, "grad_norm": 0.579190867128092, "learning_rate": 4.63356266826204e-06, "loss": 0.5908, "step": 1549 }, { "epoch": 0.13910390164007988, "grad_norm": 0.5740591807826495, "learning_rate": 4.636553993419085e-06, "loss": 0.5449, "step": 1550 }, { "epoch": 0.1391936460927509, "grad_norm": 0.5041687977877881, "learning_rate": 4.6395453185761294e-06, "loss": 0.5103, "step": 1551 }, { "epoch": 0.1392833905454219, "grad_norm": 0.4875041506299185, "learning_rate": 4.642536643733174e-06, "loss": 0.5466, "step": 1552 }, { "epoch": 0.13937313499809292, "grad_norm": 0.4878081696875125, "learning_rate": 4.6455279688902186e-06, "loss": 0.4921, "step": 1553 }, { "epoch": 0.13946287945076394, "grad_norm": 0.49425249489233886, "learning_rate": 4.648519294047263e-06, "loss": 0.5447, "step": 1554 }, { "epoch": 0.13955262390343498, "grad_norm": 0.5718592083893064, "learning_rate": 4.651510619204308e-06, "loss": 0.6331, "step": 1555 }, { "epoch": 0.139642368356106, "grad_norm": 0.5238100486826704, "learning_rate": 4.654501944361352e-06, "loss": 0.5231, "step": 1556 }, { "epoch": 0.139732112808777, "grad_norm": 0.5289823512831812, "learning_rate": 4.657493269518397e-06, "loss": 0.5431, "step": 1557 }, { "epoch": 0.13982185726144802, "grad_norm": 0.5018624060136901, "learning_rate": 4.660484594675442e-06, "loss": 0.5191, "step": 1558 }, { "epoch": 0.13991160171411904, "grad_norm": 0.5396091069656966, "learning_rate": 4.663475919832487e-06, "loss": 0.6025, "step": 1559 }, { "epoch": 0.14000134616679005, "grad_norm": 0.5267111286660168, "learning_rate": 4.666467244989531e-06, "loss": 0.5569, "step": 1560 }, { "epoch": 0.1400910906194611, "grad_norm": 0.5716833744956309, "learning_rate": 4.669458570146576e-06, "loss": 0.6112, "step": 1561 }, { "epoch": 0.1401808350721321, "grad_norm": 0.5130193076989342, "learning_rate": 4.6724498953036204e-06, "loss": 0.5106, "step": 1562 }, { "epoch": 0.14027057952480312, "grad_norm": 0.5224702499942826, "learning_rate": 4.675441220460665e-06, "loss": 0.5522, "step": 1563 }, { "epoch": 0.14036032397747414, "grad_norm": 0.5284477861753283, "learning_rate": 4.678432545617709e-06, "loss": 0.5854, "step": 1564 }, { "epoch": 0.14045006843014515, "grad_norm": 0.5454415387039189, "learning_rate": 4.681423870774753e-06, "loss": 0.5615, "step": 1565 }, { "epoch": 0.14053981288281617, "grad_norm": 0.5111933836738533, "learning_rate": 4.684415195931798e-06, "loss": 0.5176, "step": 1566 }, { "epoch": 0.1406295573354872, "grad_norm": 0.5004473558943153, "learning_rate": 4.687406521088842e-06, "loss": 0.4639, "step": 1567 }, { "epoch": 0.14071930178815822, "grad_norm": 0.4942036217956625, "learning_rate": 4.690397846245887e-06, "loss": 0.4823, "step": 1568 }, { "epoch": 0.14080904624082924, "grad_norm": 0.4820531865410163, "learning_rate": 4.6933891714029315e-06, "loss": 0.5008, "step": 1569 }, { "epoch": 0.14089879069350025, "grad_norm": 0.5739814448143238, "learning_rate": 4.696380496559976e-06, "loss": 0.6385, "step": 1570 }, { "epoch": 0.14098853514617127, "grad_norm": 0.50234397157554, "learning_rate": 4.699371821717021e-06, "loss": 0.5094, "step": 1571 }, { "epoch": 0.1410782795988423, "grad_norm": 0.53633711841171, "learning_rate": 4.702363146874065e-06, "loss": 0.5629, "step": 1572 }, { "epoch": 0.14116802405151332, "grad_norm": 0.5091032725438335, "learning_rate": 4.70535447203111e-06, "loss": 0.5042, "step": 1573 }, { "epoch": 0.14125776850418434, "grad_norm": 0.5581523550637884, "learning_rate": 4.708345797188154e-06, "loss": 0.5953, "step": 1574 }, { "epoch": 0.14134751295685535, "grad_norm": 0.5556097816975485, "learning_rate": 4.711337122345199e-06, "loss": 0.64, "step": 1575 }, { "epoch": 0.14143725740952637, "grad_norm": 0.5106505097465079, "learning_rate": 4.714328447502244e-06, "loss": 0.5205, "step": 1576 }, { "epoch": 0.14152700186219738, "grad_norm": 0.47300878331106744, "learning_rate": 4.717319772659289e-06, "loss": 0.4673, "step": 1577 }, { "epoch": 0.14161674631486842, "grad_norm": 0.48716642294247137, "learning_rate": 4.720311097816333e-06, "loss": 0.4964, "step": 1578 }, { "epoch": 0.14170649076753944, "grad_norm": 0.4966998245759042, "learning_rate": 4.723302422973378e-06, "loss": 0.5034, "step": 1579 }, { "epoch": 0.14179623522021045, "grad_norm": 0.5084461227497306, "learning_rate": 4.7262937481304225e-06, "loss": 0.4954, "step": 1580 }, { "epoch": 0.14188597967288147, "grad_norm": 0.521665974325125, "learning_rate": 4.729285073287467e-06, "loss": 0.5492, "step": 1581 }, { "epoch": 0.14197572412555248, "grad_norm": 0.4831987167026224, "learning_rate": 4.732276398444512e-06, "loss": 0.5127, "step": 1582 }, { "epoch": 0.1420654685782235, "grad_norm": 0.5381301904129692, "learning_rate": 4.735267723601556e-06, "loss": 0.4924, "step": 1583 }, { "epoch": 0.14215521303089454, "grad_norm": 0.5182745062826046, "learning_rate": 4.738259048758601e-06, "loss": 0.4973, "step": 1584 }, { "epoch": 0.14224495748356555, "grad_norm": 0.5027814945540349, "learning_rate": 4.741250373915645e-06, "loss": 0.5179, "step": 1585 }, { "epoch": 0.14233470193623657, "grad_norm": 0.550868642812459, "learning_rate": 4.74424169907269e-06, "loss": 0.587, "step": 1586 }, { "epoch": 0.14242444638890758, "grad_norm": 0.47323721915055594, "learning_rate": 4.747233024229734e-06, "loss": 0.4845, "step": 1587 }, { "epoch": 0.1425141908415786, "grad_norm": 0.4917597315050679, "learning_rate": 4.750224349386779e-06, "loss": 0.5354, "step": 1588 }, { "epoch": 0.1426039352942496, "grad_norm": 0.5426306164112735, "learning_rate": 4.7532156745438235e-06, "loss": 0.5691, "step": 1589 }, { "epoch": 0.14269367974692065, "grad_norm": 0.5134365163393138, "learning_rate": 4.756206999700868e-06, "loss": 0.5128, "step": 1590 }, { "epoch": 0.14278342419959167, "grad_norm": 0.5411169491079604, "learning_rate": 4.759198324857913e-06, "loss": 0.5819, "step": 1591 }, { "epoch": 0.14287316865226268, "grad_norm": 0.5533546245636055, "learning_rate": 4.762189650014957e-06, "loss": 0.5912, "step": 1592 }, { "epoch": 0.1429629131049337, "grad_norm": 0.5268481593533638, "learning_rate": 4.765180975172002e-06, "loss": 0.5226, "step": 1593 }, { "epoch": 0.1430526575576047, "grad_norm": 0.4866688034462334, "learning_rate": 4.768172300329046e-06, "loss": 0.5471, "step": 1594 }, { "epoch": 0.14314240201027573, "grad_norm": 0.5223496161759533, "learning_rate": 4.771163625486091e-06, "loss": 0.5423, "step": 1595 }, { "epoch": 0.14323214646294677, "grad_norm": 0.5041330727335496, "learning_rate": 4.774154950643135e-06, "loss": 0.5481, "step": 1596 }, { "epoch": 0.14332189091561778, "grad_norm": 0.4875740384801841, "learning_rate": 4.77714627580018e-06, "loss": 0.466, "step": 1597 }, { "epoch": 0.1434116353682888, "grad_norm": 0.5056417708992504, "learning_rate": 4.7801376009572245e-06, "loss": 0.4912, "step": 1598 }, { "epoch": 0.1435013798209598, "grad_norm": 0.5320948386041301, "learning_rate": 4.783128926114269e-06, "loss": 0.5349, "step": 1599 }, { "epoch": 0.14359112427363083, "grad_norm": 0.5080426320800657, "learning_rate": 4.786120251271314e-06, "loss": 0.546, "step": 1600 }, { "epoch": 0.14368086872630187, "grad_norm": 0.525553962154176, "learning_rate": 4.789111576428358e-06, "loss": 0.522, "step": 1601 }, { "epoch": 0.14377061317897288, "grad_norm": 0.5385591907098802, "learning_rate": 4.792102901585403e-06, "loss": 0.5072, "step": 1602 }, { "epoch": 0.1438603576316439, "grad_norm": 0.5111554077970433, "learning_rate": 4.795094226742447e-06, "loss": 0.5378, "step": 1603 }, { "epoch": 0.1439501020843149, "grad_norm": 0.5488291332236377, "learning_rate": 4.798085551899492e-06, "loss": 0.572, "step": 1604 }, { "epoch": 0.14403984653698593, "grad_norm": 0.5347212673100044, "learning_rate": 4.8010768770565364e-06, "loss": 0.5706, "step": 1605 }, { "epoch": 0.14412959098965694, "grad_norm": 0.4887928808464052, "learning_rate": 4.804068202213581e-06, "loss": 0.4822, "step": 1606 }, { "epoch": 0.14421933544232798, "grad_norm": 0.5077624578947526, "learning_rate": 4.8070595273706256e-06, "loss": 0.5153, "step": 1607 }, { "epoch": 0.144309079894999, "grad_norm": 0.4935710346845703, "learning_rate": 4.81005085252767e-06, "loss": 0.5079, "step": 1608 }, { "epoch": 0.14439882434767, "grad_norm": 0.5535907815973296, "learning_rate": 4.813042177684715e-06, "loss": 0.5493, "step": 1609 }, { "epoch": 0.14448856880034103, "grad_norm": 0.5217974430684671, "learning_rate": 4.816033502841759e-06, "loss": 0.5778, "step": 1610 }, { "epoch": 0.14457831325301204, "grad_norm": 0.539775186215612, "learning_rate": 4.819024827998804e-06, "loss": 0.5879, "step": 1611 }, { "epoch": 0.14466805770568306, "grad_norm": 0.5620174815376705, "learning_rate": 4.822016153155848e-06, "loss": 0.6003, "step": 1612 }, { "epoch": 0.1447578021583541, "grad_norm": 0.4926290306599117, "learning_rate": 4.825007478312893e-06, "loss": 0.5091, "step": 1613 }, { "epoch": 0.1448475466110251, "grad_norm": 0.5527408585330242, "learning_rate": 4.8279988034699375e-06, "loss": 0.5028, "step": 1614 }, { "epoch": 0.14493729106369613, "grad_norm": 0.5469215941823183, "learning_rate": 4.830990128626982e-06, "loss": 0.5616, "step": 1615 }, { "epoch": 0.14502703551636714, "grad_norm": 0.49998151411574837, "learning_rate": 4.8339814537840266e-06, "loss": 0.5222, "step": 1616 }, { "epoch": 0.14511677996903816, "grad_norm": 0.5226539317552099, "learning_rate": 4.836972778941071e-06, "loss": 0.5965, "step": 1617 }, { "epoch": 0.14520652442170917, "grad_norm": 0.5127803543119525, "learning_rate": 4.839964104098116e-06, "loss": 0.5715, "step": 1618 }, { "epoch": 0.1452962688743802, "grad_norm": 0.5138752114659626, "learning_rate": 4.84295542925516e-06, "loss": 0.5793, "step": 1619 }, { "epoch": 0.14538601332705123, "grad_norm": 0.4692567787716609, "learning_rate": 4.845946754412205e-06, "loss": 0.4689, "step": 1620 }, { "epoch": 0.14547575777972224, "grad_norm": 0.5085980629463994, "learning_rate": 4.848938079569249e-06, "loss": 0.4712, "step": 1621 }, { "epoch": 0.14556550223239326, "grad_norm": 0.5329453006837682, "learning_rate": 4.851929404726295e-06, "loss": 0.5571, "step": 1622 }, { "epoch": 0.14565524668506427, "grad_norm": 0.5172271783621022, "learning_rate": 4.854920729883339e-06, "loss": 0.5528, "step": 1623 }, { "epoch": 0.14574499113773529, "grad_norm": 0.5390198543101569, "learning_rate": 4.857912055040384e-06, "loss": 0.5335, "step": 1624 }, { "epoch": 0.14583473559040633, "grad_norm": 0.5343040068385155, "learning_rate": 4.8609033801974284e-06, "loss": 0.4928, "step": 1625 }, { "epoch": 0.14592448004307734, "grad_norm": 0.5587357279242461, "learning_rate": 4.863894705354472e-06, "loss": 0.5865, "step": 1626 }, { "epoch": 0.14601422449574836, "grad_norm": 0.5010036717032442, "learning_rate": 4.866886030511517e-06, "loss": 0.5055, "step": 1627 }, { "epoch": 0.14610396894841937, "grad_norm": 0.5422867419299363, "learning_rate": 4.869877355668561e-06, "loss": 0.5933, "step": 1628 }, { "epoch": 0.14619371340109039, "grad_norm": 0.5691554040920553, "learning_rate": 4.872868680825606e-06, "loss": 0.5295, "step": 1629 }, { "epoch": 0.1462834578537614, "grad_norm": 0.508352378412188, "learning_rate": 4.87586000598265e-06, "loss": 0.5513, "step": 1630 }, { "epoch": 0.14637320230643244, "grad_norm": 0.5166483402234419, "learning_rate": 4.878851331139695e-06, "loss": 0.5091, "step": 1631 }, { "epoch": 0.14646294675910346, "grad_norm": 0.5901405070614176, "learning_rate": 4.8818426562967395e-06, "loss": 0.661, "step": 1632 }, { "epoch": 0.14655269121177447, "grad_norm": 0.4823189173695463, "learning_rate": 4.884833981453784e-06, "loss": 0.5277, "step": 1633 }, { "epoch": 0.1466424356644455, "grad_norm": 0.5675259083925122, "learning_rate": 4.887825306610829e-06, "loss": 0.6302, "step": 1634 }, { "epoch": 0.1467321801171165, "grad_norm": 0.49269388069922165, "learning_rate": 4.890816631767873e-06, "loss": 0.5087, "step": 1635 }, { "epoch": 0.14682192456978754, "grad_norm": 0.5612729862449559, "learning_rate": 4.893807956924918e-06, "loss": 0.5837, "step": 1636 }, { "epoch": 0.14691166902245856, "grad_norm": 0.526698333634192, "learning_rate": 4.896799282081962e-06, "loss": 0.5393, "step": 1637 }, { "epoch": 0.14700141347512957, "grad_norm": 0.5498741633343224, "learning_rate": 4.899790607239007e-06, "loss": 0.5563, "step": 1638 }, { "epoch": 0.1470911579278006, "grad_norm": 0.5578230628955767, "learning_rate": 4.902781932396051e-06, "loss": 0.5604, "step": 1639 }, { "epoch": 0.1471809023804716, "grad_norm": 0.5163802106554309, "learning_rate": 4.905773257553097e-06, "loss": 0.541, "step": 1640 }, { "epoch": 0.14727064683314262, "grad_norm": 0.5049368440973204, "learning_rate": 4.908764582710141e-06, "loss": 0.5123, "step": 1641 }, { "epoch": 0.14736039128581366, "grad_norm": 0.5016312451445183, "learning_rate": 4.911755907867186e-06, "loss": 0.5021, "step": 1642 }, { "epoch": 0.14745013573848467, "grad_norm": 0.5573272864588757, "learning_rate": 4.9147472330242305e-06, "loss": 0.5755, "step": 1643 }, { "epoch": 0.1475398801911557, "grad_norm": 0.555334717203433, "learning_rate": 4.917738558181275e-06, "loss": 0.6052, "step": 1644 }, { "epoch": 0.1476296246438267, "grad_norm": 0.5418841844013106, "learning_rate": 4.92072988333832e-06, "loss": 0.5394, "step": 1645 }, { "epoch": 0.14771936909649772, "grad_norm": 0.529273923877958, "learning_rate": 4.923721208495364e-06, "loss": 0.5564, "step": 1646 }, { "epoch": 0.14780911354916873, "grad_norm": 0.5135357896674888, "learning_rate": 4.926712533652409e-06, "loss": 0.5782, "step": 1647 }, { "epoch": 0.14789885800183977, "grad_norm": 0.4762141075810126, "learning_rate": 4.929703858809453e-06, "loss": 0.5166, "step": 1648 }, { "epoch": 0.1479886024545108, "grad_norm": 0.5447969492099026, "learning_rate": 4.932695183966498e-06, "loss": 0.5868, "step": 1649 }, { "epoch": 0.1480783469071818, "grad_norm": 0.4749299703847455, "learning_rate": 4.935686509123542e-06, "loss": 0.4947, "step": 1650 }, { "epoch": 0.14816809135985282, "grad_norm": 0.5398260381899267, "learning_rate": 4.938677834280587e-06, "loss": 0.5788, "step": 1651 }, { "epoch": 0.14825783581252383, "grad_norm": 0.49695432594725925, "learning_rate": 4.9416691594376315e-06, "loss": 0.5342, "step": 1652 }, { "epoch": 0.14834758026519484, "grad_norm": 0.5168416029709125, "learning_rate": 4.944660484594676e-06, "loss": 0.5233, "step": 1653 }, { "epoch": 0.1484373247178659, "grad_norm": 0.5138117499762718, "learning_rate": 4.947651809751721e-06, "loss": 0.5661, "step": 1654 }, { "epoch": 0.1485270691705369, "grad_norm": 0.5161199056115251, "learning_rate": 4.950643134908765e-06, "loss": 0.5448, "step": 1655 }, { "epoch": 0.14861681362320792, "grad_norm": 0.5027485302421812, "learning_rate": 4.95363446006581e-06, "loss": 0.4651, "step": 1656 }, { "epoch": 0.14870655807587893, "grad_norm": 0.4982003660964139, "learning_rate": 4.9566257852228535e-06, "loss": 0.5124, "step": 1657 }, { "epoch": 0.14879630252854995, "grad_norm": 0.49141657078687306, "learning_rate": 4.959617110379899e-06, "loss": 0.5088, "step": 1658 }, { "epoch": 0.14888604698122096, "grad_norm": 0.5329277191621267, "learning_rate": 4.9626084355369434e-06, "loss": 0.5328, "step": 1659 }, { "epoch": 0.148975791433892, "grad_norm": 0.5235220835837526, "learning_rate": 4.965599760693988e-06, "loss": 0.5607, "step": 1660 }, { "epoch": 0.14906553588656302, "grad_norm": 0.4750115707037564, "learning_rate": 4.9685910858510325e-06, "loss": 0.4898, "step": 1661 }, { "epoch": 0.14915528033923403, "grad_norm": 0.5108716012573054, "learning_rate": 4.971582411008077e-06, "loss": 0.5384, "step": 1662 }, { "epoch": 0.14924502479190505, "grad_norm": 0.4748718650275632, "learning_rate": 4.974573736165122e-06, "loss": 0.4563, "step": 1663 }, { "epoch": 0.14933476924457606, "grad_norm": 0.5449091381365221, "learning_rate": 4.977565061322166e-06, "loss": 0.584, "step": 1664 }, { "epoch": 0.1494245136972471, "grad_norm": 0.5357169529992235, "learning_rate": 4.980556386479211e-06, "loss": 0.5949, "step": 1665 }, { "epoch": 0.14951425814991812, "grad_norm": 0.5018097532948684, "learning_rate": 4.983547711636255e-06, "loss": 0.5373, "step": 1666 }, { "epoch": 0.14960400260258913, "grad_norm": 0.49906450179437856, "learning_rate": 4.9865390367933e-06, "loss": 0.5613, "step": 1667 }, { "epoch": 0.14969374705526015, "grad_norm": 0.47959717960954895, "learning_rate": 4.9895303619503445e-06, "loss": 0.4517, "step": 1668 }, { "epoch": 0.14978349150793116, "grad_norm": 0.5562695785877024, "learning_rate": 4.992521687107389e-06, "loss": 0.5469, "step": 1669 }, { "epoch": 0.14987323596060217, "grad_norm": 0.5549142051605164, "learning_rate": 4.9955130122644336e-06, "loss": 0.5411, "step": 1670 }, { "epoch": 0.14996298041327322, "grad_norm": 0.5218006218660305, "learning_rate": 4.998504337421478e-06, "loss": 0.5574, "step": 1671 }, { "epoch": 0.15005272486594423, "grad_norm": 0.5474428213633451, "learning_rate": 5.001495662578523e-06, "loss": 0.61, "step": 1672 }, { "epoch": 0.15014246931861525, "grad_norm": 0.4912139490524773, "learning_rate": 5.004486987735567e-06, "loss": 0.5315, "step": 1673 }, { "epoch": 0.15023221377128626, "grad_norm": 0.5138962022688566, "learning_rate": 5.007478312892612e-06, "loss": 0.5868, "step": 1674 }, { "epoch": 0.15032195822395727, "grad_norm": 0.5296055968471431, "learning_rate": 5.010469638049656e-06, "loss": 0.5236, "step": 1675 }, { "epoch": 0.1504117026766283, "grad_norm": 0.5167384017737204, "learning_rate": 5.013460963206701e-06, "loss": 0.5214, "step": 1676 }, { "epoch": 0.15050144712929933, "grad_norm": 0.5359216227993936, "learning_rate": 5.0164522883637455e-06, "loss": 0.5824, "step": 1677 }, { "epoch": 0.15059119158197035, "grad_norm": 0.5644052113895964, "learning_rate": 5.01944361352079e-06, "loss": 0.6319, "step": 1678 }, { "epoch": 0.15068093603464136, "grad_norm": 0.5470610360998164, "learning_rate": 5.022434938677835e-06, "loss": 0.5525, "step": 1679 }, { "epoch": 0.15077068048731238, "grad_norm": 0.5080444052657196, "learning_rate": 5.025426263834879e-06, "loss": 0.579, "step": 1680 }, { "epoch": 0.1508604249399834, "grad_norm": 0.5395132959002684, "learning_rate": 5.028417588991924e-06, "loss": 0.5573, "step": 1681 }, { "epoch": 0.1509501693926544, "grad_norm": 0.517050233459365, "learning_rate": 5.031408914148968e-06, "loss": 0.5505, "step": 1682 }, { "epoch": 0.15103991384532545, "grad_norm": 0.48662383225737676, "learning_rate": 5.034400239306013e-06, "loss": 0.4717, "step": 1683 }, { "epoch": 0.15112965829799646, "grad_norm": 0.5034372411650815, "learning_rate": 5.037391564463057e-06, "loss": 0.561, "step": 1684 }, { "epoch": 0.15121940275066748, "grad_norm": 0.5208043530965414, "learning_rate": 5.040382889620102e-06, "loss": 0.5596, "step": 1685 }, { "epoch": 0.1513091472033385, "grad_norm": 0.5633861534763074, "learning_rate": 5.043374214777147e-06, "loss": 0.5772, "step": 1686 }, { "epoch": 0.1513988916560095, "grad_norm": 0.53539671757919, "learning_rate": 5.046365539934192e-06, "loss": 0.5646, "step": 1687 }, { "epoch": 0.15148863610868052, "grad_norm": 0.5357950159964345, "learning_rate": 5.0493568650912365e-06, "loss": 0.5456, "step": 1688 }, { "epoch": 0.15157838056135156, "grad_norm": 0.5032211541447175, "learning_rate": 5.052348190248281e-06, "loss": 0.5121, "step": 1689 }, { "epoch": 0.15166812501402258, "grad_norm": 0.5002359933121383, "learning_rate": 5.055339515405326e-06, "loss": 0.5084, "step": 1690 }, { "epoch": 0.1517578694666936, "grad_norm": 0.5424627772904596, "learning_rate": 5.05833084056237e-06, "loss": 0.6045, "step": 1691 }, { "epoch": 0.1518476139193646, "grad_norm": 0.5030067048562354, "learning_rate": 5.061322165719415e-06, "loss": 0.5633, "step": 1692 }, { "epoch": 0.15193735837203562, "grad_norm": 0.47375654865993405, "learning_rate": 5.064313490876459e-06, "loss": 0.4997, "step": 1693 }, { "epoch": 0.15202710282470666, "grad_norm": 0.5595301315404515, "learning_rate": 5.067304816033504e-06, "loss": 0.6812, "step": 1694 }, { "epoch": 0.15211684727737768, "grad_norm": 0.4663118964541345, "learning_rate": 5.070296141190548e-06, "loss": 0.4595, "step": 1695 }, { "epoch": 0.1522065917300487, "grad_norm": 0.5022777449217907, "learning_rate": 5.073287466347593e-06, "loss": 0.5245, "step": 1696 }, { "epoch": 0.1522963361827197, "grad_norm": 0.5334002644466674, "learning_rate": 5.0762787915046375e-06, "loss": 0.5562, "step": 1697 }, { "epoch": 0.15238608063539072, "grad_norm": 0.5004373860483203, "learning_rate": 5.079270116661682e-06, "loss": 0.5138, "step": 1698 }, { "epoch": 0.15247582508806173, "grad_norm": 0.4651086991835335, "learning_rate": 5.082261441818727e-06, "loss": 0.5038, "step": 1699 }, { "epoch": 0.15256556954073278, "grad_norm": 0.5629967741307993, "learning_rate": 5.085252766975771e-06, "loss": 0.6373, "step": 1700 }, { "epoch": 0.1526553139934038, "grad_norm": 0.4857566957439685, "learning_rate": 5.088244092132816e-06, "loss": 0.5232, "step": 1701 }, { "epoch": 0.1527450584460748, "grad_norm": 0.5129806732696812, "learning_rate": 5.09123541728986e-06, "loss": 0.5627, "step": 1702 }, { "epoch": 0.15283480289874582, "grad_norm": 0.5210357910829613, "learning_rate": 5.094226742446904e-06, "loss": 0.5342, "step": 1703 }, { "epoch": 0.15292454735141683, "grad_norm": 0.5270006910069331, "learning_rate": 5.0972180676039486e-06, "loss": 0.5801, "step": 1704 }, { "epoch": 0.15301429180408785, "grad_norm": 0.47097933648338997, "learning_rate": 5.100209392760993e-06, "loss": 0.4331, "step": 1705 }, { "epoch": 0.1531040362567589, "grad_norm": 0.5142365235891045, "learning_rate": 5.103200717918038e-06, "loss": 0.5459, "step": 1706 }, { "epoch": 0.1531937807094299, "grad_norm": 0.4863483342525177, "learning_rate": 5.106192043075082e-06, "loss": 0.4716, "step": 1707 }, { "epoch": 0.15328352516210092, "grad_norm": 0.5303934422632995, "learning_rate": 5.109183368232127e-06, "loss": 0.5998, "step": 1708 }, { "epoch": 0.15337326961477193, "grad_norm": 0.5119972423480188, "learning_rate": 5.112174693389171e-06, "loss": 0.5754, "step": 1709 }, { "epoch": 0.15346301406744295, "grad_norm": 0.5670982268992186, "learning_rate": 5.115166018546216e-06, "loss": 0.6631, "step": 1710 }, { "epoch": 0.15355275852011396, "grad_norm": 0.5198761655234094, "learning_rate": 5.1181573437032605e-06, "loss": 0.5259, "step": 1711 }, { "epoch": 0.153642502972785, "grad_norm": 0.5530399432007996, "learning_rate": 5.121148668860305e-06, "loss": 0.556, "step": 1712 }, { "epoch": 0.15373224742545602, "grad_norm": 0.5470054969858409, "learning_rate": 5.12413999401735e-06, "loss": 0.5684, "step": 1713 }, { "epoch": 0.15382199187812703, "grad_norm": 0.5582994828291693, "learning_rate": 5.127131319174394e-06, "loss": 0.5963, "step": 1714 }, { "epoch": 0.15391173633079805, "grad_norm": 0.5764417785593269, "learning_rate": 5.130122644331439e-06, "loss": 0.6111, "step": 1715 }, { "epoch": 0.15400148078346906, "grad_norm": 0.5147092685379852, "learning_rate": 5.133113969488483e-06, "loss": 0.5121, "step": 1716 }, { "epoch": 0.15409122523614008, "grad_norm": 0.4852506582056994, "learning_rate": 5.136105294645528e-06, "loss": 0.4952, "step": 1717 }, { "epoch": 0.15418096968881112, "grad_norm": 0.5225410076465413, "learning_rate": 5.139096619802572e-06, "loss": 0.5416, "step": 1718 }, { "epoch": 0.15427071414148213, "grad_norm": 0.5186663476887927, "learning_rate": 5.142087944959617e-06, "loss": 0.5581, "step": 1719 }, { "epoch": 0.15436045859415315, "grad_norm": 0.4973471103065244, "learning_rate": 5.1450792701166615e-06, "loss": 0.4679, "step": 1720 }, { "epoch": 0.15445020304682416, "grad_norm": 0.4977461798442169, "learning_rate": 5.148070595273706e-06, "loss": 0.5247, "step": 1721 }, { "epoch": 0.15453994749949518, "grad_norm": 0.5255814295899242, "learning_rate": 5.1510619204307515e-06, "loss": 0.5684, "step": 1722 }, { "epoch": 0.15462969195216622, "grad_norm": 0.5568744176303496, "learning_rate": 5.154053245587796e-06, "loss": 0.5561, "step": 1723 }, { "epoch": 0.15471943640483724, "grad_norm": 0.500107549083899, "learning_rate": 5.1570445707448406e-06, "loss": 0.5101, "step": 1724 }, { "epoch": 0.15480918085750825, "grad_norm": 0.4867776198740327, "learning_rate": 5.160035895901885e-06, "loss": 0.4908, "step": 1725 }, { "epoch": 0.15489892531017926, "grad_norm": 0.5149798630906295, "learning_rate": 5.16302722105893e-06, "loss": 0.5228, "step": 1726 }, { "epoch": 0.15498866976285028, "grad_norm": 0.5499310923743913, "learning_rate": 5.166018546215974e-06, "loss": 0.5519, "step": 1727 }, { "epoch": 0.1550784142155213, "grad_norm": 0.5410194114803271, "learning_rate": 5.169009871373019e-06, "loss": 0.53, "step": 1728 }, { "epoch": 0.15516815866819234, "grad_norm": 0.5710068797155846, "learning_rate": 5.172001196530063e-06, "loss": 0.6365, "step": 1729 }, { "epoch": 0.15525790312086335, "grad_norm": 0.5075804928905293, "learning_rate": 5.174992521687108e-06, "loss": 0.5216, "step": 1730 }, { "epoch": 0.15534764757353436, "grad_norm": 0.5381759911921461, "learning_rate": 5.1779838468441525e-06, "loss": 0.5292, "step": 1731 }, { "epoch": 0.15543739202620538, "grad_norm": 0.5378666880772619, "learning_rate": 5.180975172001197e-06, "loss": 0.5805, "step": 1732 }, { "epoch": 0.1555271364788764, "grad_norm": 0.5386392322708636, "learning_rate": 5.183966497158242e-06, "loss": 0.589, "step": 1733 }, { "epoch": 0.1556168809315474, "grad_norm": 0.49237422063463876, "learning_rate": 5.186957822315286e-06, "loss": 0.5392, "step": 1734 }, { "epoch": 0.15570662538421845, "grad_norm": 0.5016403166932791, "learning_rate": 5.189949147472331e-06, "loss": 0.476, "step": 1735 }, { "epoch": 0.15579636983688946, "grad_norm": 0.5520514893122901, "learning_rate": 5.192940472629375e-06, "loss": 0.6472, "step": 1736 }, { "epoch": 0.15588611428956048, "grad_norm": 0.5908062185119717, "learning_rate": 5.19593179778642e-06, "loss": 0.6066, "step": 1737 }, { "epoch": 0.1559758587422315, "grad_norm": 0.5263198373000078, "learning_rate": 5.198923122943464e-06, "loss": 0.513, "step": 1738 }, { "epoch": 0.1560656031949025, "grad_norm": 0.5553932251283559, "learning_rate": 5.201914448100509e-06, "loss": 0.5224, "step": 1739 }, { "epoch": 0.15615534764757352, "grad_norm": 0.49998414700570476, "learning_rate": 5.2049057732575535e-06, "loss": 0.5256, "step": 1740 }, { "epoch": 0.15624509210024456, "grad_norm": 0.48974451816800446, "learning_rate": 5.207897098414598e-06, "loss": 0.4984, "step": 1741 }, { "epoch": 0.15633483655291558, "grad_norm": 0.5215051824572264, "learning_rate": 5.210888423571643e-06, "loss": 0.5127, "step": 1742 }, { "epoch": 0.1564245810055866, "grad_norm": 0.5507290089190622, "learning_rate": 5.213879748728687e-06, "loss": 0.5898, "step": 1743 }, { "epoch": 0.1565143254582576, "grad_norm": 0.5034259795902374, "learning_rate": 5.216871073885732e-06, "loss": 0.54, "step": 1744 }, { "epoch": 0.15660406991092862, "grad_norm": 0.5273523188229023, "learning_rate": 5.219862399042776e-06, "loss": 0.5096, "step": 1745 }, { "epoch": 0.15669381436359964, "grad_norm": 0.503581487302408, "learning_rate": 5.222853724199821e-06, "loss": 0.5123, "step": 1746 }, { "epoch": 0.15678355881627068, "grad_norm": 0.5154014915862454, "learning_rate": 5.225845049356865e-06, "loss": 0.5187, "step": 1747 }, { "epoch": 0.1568733032689417, "grad_norm": 0.5638349276364993, "learning_rate": 5.22883637451391e-06, "loss": 0.631, "step": 1748 }, { "epoch": 0.1569630477216127, "grad_norm": 0.5543241758694158, "learning_rate": 5.2318276996709545e-06, "loss": 0.561, "step": 1749 }, { "epoch": 0.15705279217428372, "grad_norm": 0.532651790432146, "learning_rate": 5.234819024828e-06, "loss": 0.5492, "step": 1750 }, { "epoch": 0.15714253662695474, "grad_norm": 0.4999332967273782, "learning_rate": 5.2378103499850445e-06, "loss": 0.5138, "step": 1751 }, { "epoch": 0.15723228107962575, "grad_norm": 0.5632363529151087, "learning_rate": 5.240801675142089e-06, "loss": 0.5547, "step": 1752 }, { "epoch": 0.1573220255322968, "grad_norm": 0.533856712566303, "learning_rate": 5.243793000299134e-06, "loss": 0.5608, "step": 1753 }, { "epoch": 0.1574117699849678, "grad_norm": 0.5121017142844909, "learning_rate": 5.246784325456178e-06, "loss": 0.5309, "step": 1754 }, { "epoch": 0.15750151443763882, "grad_norm": 0.5586333098493351, "learning_rate": 5.249775650613223e-06, "loss": 0.5941, "step": 1755 }, { "epoch": 0.15759125889030984, "grad_norm": 0.5139487707605895, "learning_rate": 5.252766975770267e-06, "loss": 0.5228, "step": 1756 }, { "epoch": 0.15768100334298085, "grad_norm": 0.5041521070511038, "learning_rate": 5.255758300927312e-06, "loss": 0.5334, "step": 1757 }, { "epoch": 0.1577707477956519, "grad_norm": 0.4997365275810885, "learning_rate": 5.258749626084356e-06, "loss": 0.5421, "step": 1758 }, { "epoch": 0.1578604922483229, "grad_norm": 0.5299884069062968, "learning_rate": 5.261740951241401e-06, "loss": 0.5572, "step": 1759 }, { "epoch": 0.15795023670099392, "grad_norm": 0.49820946602315697, "learning_rate": 5.2647322763984455e-06, "loss": 0.5066, "step": 1760 }, { "epoch": 0.15803998115366494, "grad_norm": 0.4642900978512135, "learning_rate": 5.26772360155549e-06, "loss": 0.4714, "step": 1761 }, { "epoch": 0.15812972560633595, "grad_norm": 0.5365374972327704, "learning_rate": 5.270714926712535e-06, "loss": 0.5866, "step": 1762 }, { "epoch": 0.15821947005900697, "grad_norm": 0.51776424411797, "learning_rate": 5.273706251869579e-06, "loss": 0.5146, "step": 1763 }, { "epoch": 0.158309214511678, "grad_norm": 0.5310652102228949, "learning_rate": 5.276697577026624e-06, "loss": 0.552, "step": 1764 }, { "epoch": 0.15839895896434902, "grad_norm": 0.5301728715429987, "learning_rate": 5.2796889021836675e-06, "loss": 0.5752, "step": 1765 }, { "epoch": 0.15848870341702004, "grad_norm": 0.527399285078392, "learning_rate": 5.282680227340712e-06, "loss": 0.5215, "step": 1766 }, { "epoch": 0.15857844786969105, "grad_norm": 0.47729764756654675, "learning_rate": 5.2856715524977566e-06, "loss": 0.4307, "step": 1767 }, { "epoch": 0.15866819232236207, "grad_norm": 0.49119386301982043, "learning_rate": 5.288662877654801e-06, "loss": 0.4998, "step": 1768 }, { "epoch": 0.15875793677503308, "grad_norm": 0.5082513214108191, "learning_rate": 5.291654202811846e-06, "loss": 0.4574, "step": 1769 }, { "epoch": 0.15884768122770412, "grad_norm": 0.4975350763358366, "learning_rate": 5.29464552796889e-06, "loss": 0.5371, "step": 1770 }, { "epoch": 0.15893742568037514, "grad_norm": 0.5223288895601479, "learning_rate": 5.297636853125935e-06, "loss": 0.516, "step": 1771 }, { "epoch": 0.15902717013304615, "grad_norm": 0.500688295942012, "learning_rate": 5.300628178282979e-06, "loss": 0.5475, "step": 1772 }, { "epoch": 0.15911691458571717, "grad_norm": 0.5298744460781725, "learning_rate": 5.303619503440024e-06, "loss": 0.5394, "step": 1773 }, { "epoch": 0.15920665903838818, "grad_norm": 0.5360649438586598, "learning_rate": 5.3066108285970685e-06, "loss": 0.5532, "step": 1774 }, { "epoch": 0.1592964034910592, "grad_norm": 0.5236555814179775, "learning_rate": 5.309602153754113e-06, "loss": 0.5536, "step": 1775 }, { "epoch": 0.15938614794373024, "grad_norm": 0.48308552533749666, "learning_rate": 5.312593478911158e-06, "loss": 0.5103, "step": 1776 }, { "epoch": 0.15947589239640125, "grad_norm": 0.4660539154196458, "learning_rate": 5.315584804068202e-06, "loss": 0.5122, "step": 1777 }, { "epoch": 0.15956563684907227, "grad_norm": 0.5006086797744376, "learning_rate": 5.318576129225247e-06, "loss": 0.5236, "step": 1778 }, { "epoch": 0.15965538130174328, "grad_norm": 0.4759806992454269, "learning_rate": 5.321567454382291e-06, "loss": 0.5149, "step": 1779 }, { "epoch": 0.1597451257544143, "grad_norm": 0.5302799797751709, "learning_rate": 5.324558779539336e-06, "loss": 0.5949, "step": 1780 }, { "epoch": 0.1598348702070853, "grad_norm": 0.5150951625038307, "learning_rate": 5.32755010469638e-06, "loss": 0.5456, "step": 1781 }, { "epoch": 0.15992461465975635, "grad_norm": 0.5069339355679982, "learning_rate": 5.330541429853425e-06, "loss": 0.533, "step": 1782 }, { "epoch": 0.16001435911242737, "grad_norm": 0.4811396928134938, "learning_rate": 5.3335327550104695e-06, "loss": 0.4562, "step": 1783 }, { "epoch": 0.16010410356509838, "grad_norm": 0.4788270509022501, "learning_rate": 5.336524080167514e-06, "loss": 0.5032, "step": 1784 }, { "epoch": 0.1601938480177694, "grad_norm": 0.4923376914558422, "learning_rate": 5.339515405324559e-06, "loss": 0.5284, "step": 1785 }, { "epoch": 0.1602835924704404, "grad_norm": 0.513831062867831, "learning_rate": 5.342506730481604e-06, "loss": 0.5455, "step": 1786 }, { "epoch": 0.16037333692311145, "grad_norm": 0.511547874876933, "learning_rate": 5.345498055638649e-06, "loss": 0.5248, "step": 1787 }, { "epoch": 0.16046308137578247, "grad_norm": 0.5721331360369599, "learning_rate": 5.348489380795693e-06, "loss": 0.5508, "step": 1788 }, { "epoch": 0.16055282582845348, "grad_norm": 0.4811474498667285, "learning_rate": 5.351480705952738e-06, "loss": 0.4567, "step": 1789 }, { "epoch": 0.1606425702811245, "grad_norm": 0.5144591102572058, "learning_rate": 5.354472031109782e-06, "loss": 0.5664, "step": 1790 }, { "epoch": 0.1607323147337955, "grad_norm": 0.5578172439044763, "learning_rate": 5.357463356266827e-06, "loss": 0.6389, "step": 1791 }, { "epoch": 0.16082205918646653, "grad_norm": 0.4918906627604277, "learning_rate": 5.360454681423871e-06, "loss": 0.5205, "step": 1792 }, { "epoch": 0.16091180363913757, "grad_norm": 0.5571485459363381, "learning_rate": 5.363446006580916e-06, "loss": 0.5922, "step": 1793 }, { "epoch": 0.16100154809180858, "grad_norm": 0.5676175793034938, "learning_rate": 5.3664373317379605e-06, "loss": 0.6284, "step": 1794 }, { "epoch": 0.1610912925444796, "grad_norm": 0.5114369870671024, "learning_rate": 5.369428656895005e-06, "loss": 0.5747, "step": 1795 }, { "epoch": 0.1611810369971506, "grad_norm": 0.5021623966781928, "learning_rate": 5.37241998205205e-06, "loss": 0.5168, "step": 1796 }, { "epoch": 0.16127078144982163, "grad_norm": 0.5504867113863332, "learning_rate": 5.375411307209094e-06, "loss": 0.5859, "step": 1797 }, { "epoch": 0.16136052590249264, "grad_norm": 0.47885176322928347, "learning_rate": 5.378402632366139e-06, "loss": 0.4905, "step": 1798 }, { "epoch": 0.16145027035516368, "grad_norm": 0.5839001773009034, "learning_rate": 5.381393957523183e-06, "loss": 0.6186, "step": 1799 }, { "epoch": 0.1615400148078347, "grad_norm": 0.49597570194776597, "learning_rate": 5.384385282680228e-06, "loss": 0.4815, "step": 1800 }, { "epoch": 0.1616297592605057, "grad_norm": 0.5140903324665026, "learning_rate": 5.387376607837272e-06, "loss": 0.5224, "step": 1801 }, { "epoch": 0.16171950371317673, "grad_norm": 0.5192257499668832, "learning_rate": 5.390367932994317e-06, "loss": 0.5039, "step": 1802 }, { "epoch": 0.16180924816584774, "grad_norm": 0.47099170361938947, "learning_rate": 5.3933592581513615e-06, "loss": 0.5026, "step": 1803 }, { "epoch": 0.16189899261851876, "grad_norm": 0.5275582923264001, "learning_rate": 5.396350583308406e-06, "loss": 0.5565, "step": 1804 }, { "epoch": 0.1619887370711898, "grad_norm": 0.5614356538532224, "learning_rate": 5.399341908465451e-06, "loss": 0.5695, "step": 1805 }, { "epoch": 0.1620784815238608, "grad_norm": 0.5118273027442722, "learning_rate": 5.402333233622495e-06, "loss": 0.5211, "step": 1806 }, { "epoch": 0.16216822597653183, "grad_norm": 0.47977466228010573, "learning_rate": 5.40532455877954e-06, "loss": 0.481, "step": 1807 }, { "epoch": 0.16225797042920284, "grad_norm": 0.47487919861256467, "learning_rate": 5.408315883936584e-06, "loss": 0.4948, "step": 1808 }, { "epoch": 0.16234771488187386, "grad_norm": 0.4666158813330409, "learning_rate": 5.411307209093629e-06, "loss": 0.4784, "step": 1809 }, { "epoch": 0.16243745933454487, "grad_norm": 0.5464881433612977, "learning_rate": 5.4142985342506734e-06, "loss": 0.5463, "step": 1810 }, { "epoch": 0.1625272037872159, "grad_norm": 0.4949026437023704, "learning_rate": 5.417289859407718e-06, "loss": 0.4904, "step": 1811 }, { "epoch": 0.16261694823988693, "grad_norm": 0.5291893134279086, "learning_rate": 5.4202811845647625e-06, "loss": 0.5228, "step": 1812 }, { "epoch": 0.16270669269255794, "grad_norm": 0.5317184439035925, "learning_rate": 5.423272509721807e-06, "loss": 0.5307, "step": 1813 }, { "epoch": 0.16279643714522896, "grad_norm": 0.5330068529069142, "learning_rate": 5.4262638348788525e-06, "loss": 0.5721, "step": 1814 }, { "epoch": 0.16288618159789997, "grad_norm": 0.5129833460028799, "learning_rate": 5.429255160035897e-06, "loss": 0.5141, "step": 1815 }, { "epoch": 0.162975926050571, "grad_norm": 0.5045729763224541, "learning_rate": 5.432246485192942e-06, "loss": 0.5186, "step": 1816 }, { "epoch": 0.16306567050324203, "grad_norm": 0.5217218396616822, "learning_rate": 5.435237810349986e-06, "loss": 0.5566, "step": 1817 }, { "epoch": 0.16315541495591304, "grad_norm": 0.556961130668929, "learning_rate": 5.438229135507031e-06, "loss": 0.5456, "step": 1818 }, { "epoch": 0.16324515940858406, "grad_norm": 0.4754623521185196, "learning_rate": 5.441220460664075e-06, "loss": 0.4731, "step": 1819 }, { "epoch": 0.16333490386125507, "grad_norm": 0.49446494622951853, "learning_rate": 5.44421178582112e-06, "loss": 0.5362, "step": 1820 }, { "epoch": 0.16342464831392609, "grad_norm": 0.603759925726069, "learning_rate": 5.447203110978164e-06, "loss": 0.5759, "step": 1821 }, { "epoch": 0.16351439276659713, "grad_norm": 0.5239138366090187, "learning_rate": 5.450194436135209e-06, "loss": 0.5552, "step": 1822 }, { "epoch": 0.16360413721926814, "grad_norm": 0.5265058777883079, "learning_rate": 5.4531857612922535e-06, "loss": 0.524, "step": 1823 }, { "epoch": 0.16369388167193916, "grad_norm": 0.5399295334731765, "learning_rate": 5.456177086449298e-06, "loss": 0.5736, "step": 1824 }, { "epoch": 0.16378362612461017, "grad_norm": 0.5002141458349845, "learning_rate": 5.459168411606343e-06, "loss": 0.5331, "step": 1825 }, { "epoch": 0.16387337057728119, "grad_norm": 0.5244584448947246, "learning_rate": 5.462159736763387e-06, "loss": 0.5544, "step": 1826 }, { "epoch": 0.1639631150299522, "grad_norm": 0.47076517531436235, "learning_rate": 5.465151061920431e-06, "loss": 0.4232, "step": 1827 }, { "epoch": 0.16405285948262324, "grad_norm": 0.5115378349731907, "learning_rate": 5.4681423870774755e-06, "loss": 0.5844, "step": 1828 }, { "epoch": 0.16414260393529426, "grad_norm": 0.5473991958404457, "learning_rate": 5.47113371223452e-06, "loss": 0.5811, "step": 1829 }, { "epoch": 0.16423234838796527, "grad_norm": 0.5347184940306094, "learning_rate": 5.474125037391565e-06, "loss": 0.5707, "step": 1830 }, { "epoch": 0.16432209284063629, "grad_norm": 0.45871025991258146, "learning_rate": 5.477116362548609e-06, "loss": 0.465, "step": 1831 }, { "epoch": 0.1644118372933073, "grad_norm": 0.48874968988450973, "learning_rate": 5.480107687705654e-06, "loss": 0.495, "step": 1832 }, { "epoch": 0.16450158174597831, "grad_norm": 0.549349381050542, "learning_rate": 5.483099012862698e-06, "loss": 0.5425, "step": 1833 }, { "epoch": 0.16459132619864936, "grad_norm": 0.48153699108635967, "learning_rate": 5.486090338019743e-06, "loss": 0.4989, "step": 1834 }, { "epoch": 0.16468107065132037, "grad_norm": 0.48845533226727905, "learning_rate": 5.489081663176787e-06, "loss": 0.5517, "step": 1835 }, { "epoch": 0.16477081510399139, "grad_norm": 0.46241113770194664, "learning_rate": 5.492072988333832e-06, "loss": 0.454, "step": 1836 }, { "epoch": 0.1648605595566624, "grad_norm": 0.5017939298484491, "learning_rate": 5.4950643134908765e-06, "loss": 0.503, "step": 1837 }, { "epoch": 0.16495030400933341, "grad_norm": 0.5463389684400182, "learning_rate": 5.498055638647921e-06, "loss": 0.5247, "step": 1838 }, { "epoch": 0.16504004846200443, "grad_norm": 0.4604634691302594, "learning_rate": 5.501046963804966e-06, "loss": 0.4898, "step": 1839 }, { "epoch": 0.16512979291467547, "grad_norm": 0.4536898948065334, "learning_rate": 5.50403828896201e-06, "loss": 0.4438, "step": 1840 }, { "epoch": 0.1652195373673465, "grad_norm": 0.47455224918121686, "learning_rate": 5.507029614119055e-06, "loss": 0.498, "step": 1841 }, { "epoch": 0.1653092818200175, "grad_norm": 0.49250210781554843, "learning_rate": 5.510020939276099e-06, "loss": 0.5066, "step": 1842 }, { "epoch": 0.16539902627268852, "grad_norm": 0.5144315728615794, "learning_rate": 5.513012264433144e-06, "loss": 0.5381, "step": 1843 }, { "epoch": 0.16548877072535953, "grad_norm": 0.500877590708892, "learning_rate": 5.516003589590188e-06, "loss": 0.5452, "step": 1844 }, { "epoch": 0.16557851517803054, "grad_norm": 0.5239963001037492, "learning_rate": 5.518994914747233e-06, "loss": 0.5259, "step": 1845 }, { "epoch": 0.1656682596307016, "grad_norm": 0.5198952230952902, "learning_rate": 5.5219862399042775e-06, "loss": 0.5225, "step": 1846 }, { "epoch": 0.1657580040833726, "grad_norm": 0.4960377611935806, "learning_rate": 5.524977565061322e-06, "loss": 0.5092, "step": 1847 }, { "epoch": 0.16584774853604362, "grad_norm": 0.5376742797159566, "learning_rate": 5.527968890218367e-06, "loss": 0.5692, "step": 1848 }, { "epoch": 0.16593749298871463, "grad_norm": 0.4635395728480016, "learning_rate": 5.530960215375411e-06, "loss": 0.4424, "step": 1849 }, { "epoch": 0.16602723744138564, "grad_norm": 0.5081485563840259, "learning_rate": 5.533951540532457e-06, "loss": 0.4975, "step": 1850 }, { "epoch": 0.1661169818940567, "grad_norm": 0.53685869374204, "learning_rate": 5.536942865689501e-06, "loss": 0.5979, "step": 1851 }, { "epoch": 0.1662067263467277, "grad_norm": 0.484625992422299, "learning_rate": 5.539934190846546e-06, "loss": 0.5034, "step": 1852 }, { "epoch": 0.16629647079939872, "grad_norm": 0.46468830906254005, "learning_rate": 5.54292551600359e-06, "loss": 0.4587, "step": 1853 }, { "epoch": 0.16638621525206973, "grad_norm": 0.5061082886231179, "learning_rate": 5.545916841160635e-06, "loss": 0.5445, "step": 1854 }, { "epoch": 0.16647595970474074, "grad_norm": 0.5191090473451089, "learning_rate": 5.548908166317679e-06, "loss": 0.53, "step": 1855 }, { "epoch": 0.16656570415741176, "grad_norm": 0.5182378729989671, "learning_rate": 5.551899491474724e-06, "loss": 0.5336, "step": 1856 }, { "epoch": 0.1666554486100828, "grad_norm": 0.4868096049216146, "learning_rate": 5.5548908166317685e-06, "loss": 0.4989, "step": 1857 }, { "epoch": 0.16674519306275382, "grad_norm": 0.540632990308571, "learning_rate": 5.557882141788813e-06, "loss": 0.5469, "step": 1858 }, { "epoch": 0.16683493751542483, "grad_norm": 0.45607484111797514, "learning_rate": 5.560873466945858e-06, "loss": 0.4734, "step": 1859 }, { "epoch": 0.16692468196809584, "grad_norm": 0.5123035703688757, "learning_rate": 5.563864792102902e-06, "loss": 0.5384, "step": 1860 }, { "epoch": 0.16701442642076686, "grad_norm": 0.4704187542808598, "learning_rate": 5.566856117259947e-06, "loss": 0.4515, "step": 1861 }, { "epoch": 0.16710417087343787, "grad_norm": 0.5120986198861451, "learning_rate": 5.569847442416991e-06, "loss": 0.5315, "step": 1862 }, { "epoch": 0.16719391532610892, "grad_norm": 0.48802054558732144, "learning_rate": 5.572838767574036e-06, "loss": 0.4697, "step": 1863 }, { "epoch": 0.16728365977877993, "grad_norm": 0.556739647032783, "learning_rate": 5.5758300927310804e-06, "loss": 0.5653, "step": 1864 }, { "epoch": 0.16737340423145095, "grad_norm": 0.5227683717222131, "learning_rate": 5.578821417888125e-06, "loss": 0.5033, "step": 1865 }, { "epoch": 0.16746314868412196, "grad_norm": 0.5112044316948169, "learning_rate": 5.5818127430451695e-06, "loss": 0.5157, "step": 1866 }, { "epoch": 0.16755289313679297, "grad_norm": 0.5017670622553367, "learning_rate": 5.584804068202214e-06, "loss": 0.5409, "step": 1867 }, { "epoch": 0.167642637589464, "grad_norm": 0.5033161508288271, "learning_rate": 5.587795393359259e-06, "loss": 0.5353, "step": 1868 }, { "epoch": 0.16773238204213503, "grad_norm": 0.5343179003869657, "learning_rate": 5.590786718516303e-06, "loss": 0.5235, "step": 1869 }, { "epoch": 0.16782212649480605, "grad_norm": 0.5006825041404216, "learning_rate": 5.593778043673348e-06, "loss": 0.4798, "step": 1870 }, { "epoch": 0.16791187094747706, "grad_norm": 0.5710561703648881, "learning_rate": 5.596769368830392e-06, "loss": 0.5677, "step": 1871 }, { "epoch": 0.16800161540014807, "grad_norm": 0.5035019636776025, "learning_rate": 5.599760693987437e-06, "loss": 0.5179, "step": 1872 }, { "epoch": 0.1680913598528191, "grad_norm": 0.43745312124143376, "learning_rate": 5.6027520191444815e-06, "loss": 0.4233, "step": 1873 }, { "epoch": 0.1681811043054901, "grad_norm": 0.5332947950555701, "learning_rate": 5.605743344301526e-06, "loss": 0.5258, "step": 1874 }, { "epoch": 0.16827084875816115, "grad_norm": 0.48950522018570697, "learning_rate": 5.6087346694585706e-06, "loss": 0.491, "step": 1875 }, { "epoch": 0.16836059321083216, "grad_norm": 0.576117889854056, "learning_rate": 5.611725994615615e-06, "loss": 0.5941, "step": 1876 }, { "epoch": 0.16845033766350317, "grad_norm": 0.5481661073485607, "learning_rate": 5.61471731977266e-06, "loss": 0.5675, "step": 1877 }, { "epoch": 0.1685400821161742, "grad_norm": 0.4837374685763948, "learning_rate": 5.617708644929705e-06, "loss": 0.4911, "step": 1878 }, { "epoch": 0.1686298265688452, "grad_norm": 0.5064321593776046, "learning_rate": 5.62069997008675e-06, "loss": 0.5105, "step": 1879 }, { "epoch": 0.16871957102151625, "grad_norm": 0.5791841838874074, "learning_rate": 5.623691295243794e-06, "loss": 0.6111, "step": 1880 }, { "epoch": 0.16880931547418726, "grad_norm": 0.5207139025038582, "learning_rate": 5.626682620400839e-06, "loss": 0.5264, "step": 1881 }, { "epoch": 0.16889905992685827, "grad_norm": 0.5549823190516381, "learning_rate": 5.629673945557883e-06, "loss": 0.5767, "step": 1882 }, { "epoch": 0.1689888043795293, "grad_norm": 0.5132166790124484, "learning_rate": 5.632665270714928e-06, "loss": 0.5478, "step": 1883 }, { "epoch": 0.1690785488322003, "grad_norm": 0.568688507722128, "learning_rate": 5.6356565958719724e-06, "loss": 0.6117, "step": 1884 }, { "epoch": 0.16916829328487132, "grad_norm": 0.4994863654006143, "learning_rate": 5.638647921029017e-06, "loss": 0.531, "step": 1885 }, { "epoch": 0.16925803773754236, "grad_norm": 0.5000801321313908, "learning_rate": 5.6416392461860616e-06, "loss": 0.4839, "step": 1886 }, { "epoch": 0.16934778219021338, "grad_norm": 0.4849044818620247, "learning_rate": 5.644630571343106e-06, "loss": 0.5059, "step": 1887 }, { "epoch": 0.1694375266428844, "grad_norm": 0.5518843362021102, "learning_rate": 5.647621896500151e-06, "loss": 0.5443, "step": 1888 }, { "epoch": 0.1695272710955554, "grad_norm": 0.47729715712923715, "learning_rate": 5.650613221657194e-06, "loss": 0.5119, "step": 1889 }, { "epoch": 0.16961701554822642, "grad_norm": 0.5110098827362765, "learning_rate": 5.653604546814239e-06, "loss": 0.5273, "step": 1890 }, { "epoch": 0.16970676000089743, "grad_norm": 0.5198841465090496, "learning_rate": 5.6565958719712835e-06, "loss": 0.536, "step": 1891 }, { "epoch": 0.16979650445356848, "grad_norm": 0.4969766130675211, "learning_rate": 5.659587197128328e-06, "loss": 0.5309, "step": 1892 }, { "epoch": 0.1698862489062395, "grad_norm": 0.4910304623251735, "learning_rate": 5.662578522285373e-06, "loss": 0.5179, "step": 1893 }, { "epoch": 0.1699759933589105, "grad_norm": 0.5261170532517466, "learning_rate": 5.665569847442417e-06, "loss": 0.5318, "step": 1894 }, { "epoch": 0.17006573781158152, "grad_norm": 0.4889320058499515, "learning_rate": 5.668561172599462e-06, "loss": 0.422, "step": 1895 }, { "epoch": 0.17015548226425253, "grad_norm": 0.5337535131568819, "learning_rate": 5.671552497756506e-06, "loss": 0.5736, "step": 1896 }, { "epoch": 0.17024522671692355, "grad_norm": 0.5090954660758408, "learning_rate": 5.674543822913551e-06, "loss": 0.5127, "step": 1897 }, { "epoch": 0.1703349711695946, "grad_norm": 0.5059663651060295, "learning_rate": 5.677535148070595e-06, "loss": 0.502, "step": 1898 }, { "epoch": 0.1704247156222656, "grad_norm": 0.523530938195989, "learning_rate": 5.68052647322764e-06, "loss": 0.5509, "step": 1899 }, { "epoch": 0.17051446007493662, "grad_norm": 0.5285216394908461, "learning_rate": 5.6835177983846845e-06, "loss": 0.5336, "step": 1900 }, { "epoch": 0.17060420452760763, "grad_norm": 0.4956664083026419, "learning_rate": 5.686509123541729e-06, "loss": 0.5374, "step": 1901 }, { "epoch": 0.17069394898027865, "grad_norm": 0.49186612851167993, "learning_rate": 5.689500448698774e-06, "loss": 0.5035, "step": 1902 }, { "epoch": 0.17078369343294966, "grad_norm": 0.5610198783980211, "learning_rate": 5.692491773855818e-06, "loss": 0.5314, "step": 1903 }, { "epoch": 0.1708734378856207, "grad_norm": 0.49711491635171684, "learning_rate": 5.695483099012863e-06, "loss": 0.5127, "step": 1904 }, { "epoch": 0.17096318233829172, "grad_norm": 0.5202947612323129, "learning_rate": 5.698474424169907e-06, "loss": 0.572, "step": 1905 }, { "epoch": 0.17105292679096273, "grad_norm": 0.5130990333646753, "learning_rate": 5.701465749326952e-06, "loss": 0.5112, "step": 1906 }, { "epoch": 0.17114267124363375, "grad_norm": 0.46553632873979095, "learning_rate": 5.7044570744839964e-06, "loss": 0.438, "step": 1907 }, { "epoch": 0.17123241569630476, "grad_norm": 0.4801600001937096, "learning_rate": 5.707448399641041e-06, "loss": 0.4486, "step": 1908 }, { "epoch": 0.1713221601489758, "grad_norm": 0.5483067894148347, "learning_rate": 5.7104397247980856e-06, "loss": 0.6, "step": 1909 }, { "epoch": 0.17141190460164682, "grad_norm": 0.5815368537886706, "learning_rate": 5.71343104995513e-06, "loss": 0.5868, "step": 1910 }, { "epoch": 0.17150164905431783, "grad_norm": 0.5005248731316914, "learning_rate": 5.716422375112175e-06, "loss": 0.5673, "step": 1911 }, { "epoch": 0.17159139350698885, "grad_norm": 0.4606955684429137, "learning_rate": 5.719413700269219e-06, "loss": 0.4921, "step": 1912 }, { "epoch": 0.17168113795965986, "grad_norm": 0.6499036527170194, "learning_rate": 5.722405025426264e-06, "loss": 0.5955, "step": 1913 }, { "epoch": 0.17177088241233088, "grad_norm": 0.5134867364010156, "learning_rate": 5.725396350583309e-06, "loss": 0.5075, "step": 1914 }, { "epoch": 0.17186062686500192, "grad_norm": 0.5529478648587292, "learning_rate": 5.728387675740354e-06, "loss": 0.5818, "step": 1915 }, { "epoch": 0.17195037131767293, "grad_norm": 0.5079775657548911, "learning_rate": 5.731379000897398e-06, "loss": 0.4917, "step": 1916 }, { "epoch": 0.17204011577034395, "grad_norm": 0.44119493431640405, "learning_rate": 5.734370326054443e-06, "loss": 0.4409, "step": 1917 }, { "epoch": 0.17212986022301496, "grad_norm": 0.4841606626921162, "learning_rate": 5.737361651211487e-06, "loss": 0.4924, "step": 1918 }, { "epoch": 0.17221960467568598, "grad_norm": 0.5606924000144904, "learning_rate": 5.740352976368532e-06, "loss": 0.5546, "step": 1919 }, { "epoch": 0.172309349128357, "grad_norm": 0.5157704811727629, "learning_rate": 5.7433443015255765e-06, "loss": 0.5041, "step": 1920 }, { "epoch": 0.17239909358102803, "grad_norm": 0.5139454546654606, "learning_rate": 5.746335626682621e-06, "loss": 0.5263, "step": 1921 }, { "epoch": 0.17248883803369905, "grad_norm": 0.5315778683302143, "learning_rate": 5.749326951839666e-06, "loss": 0.5632, "step": 1922 }, { "epoch": 0.17257858248637006, "grad_norm": 0.518131792726586, "learning_rate": 5.75231827699671e-06, "loss": 0.5211, "step": 1923 }, { "epoch": 0.17266832693904108, "grad_norm": 0.49142998150132466, "learning_rate": 5.755309602153755e-06, "loss": 0.5344, "step": 1924 }, { "epoch": 0.1727580713917121, "grad_norm": 0.5192655881730952, "learning_rate": 5.758300927310799e-06, "loss": 0.5152, "step": 1925 }, { "epoch": 0.1728478158443831, "grad_norm": 0.5254095802927795, "learning_rate": 5.761292252467844e-06, "loss": 0.5836, "step": 1926 }, { "epoch": 0.17293756029705415, "grad_norm": 0.5210515719139893, "learning_rate": 5.7642835776248884e-06, "loss": 0.5057, "step": 1927 }, { "epoch": 0.17302730474972516, "grad_norm": 0.45884803558972903, "learning_rate": 5.767274902781933e-06, "loss": 0.4331, "step": 1928 }, { "epoch": 0.17311704920239618, "grad_norm": 0.5332186247331853, "learning_rate": 5.7702662279389776e-06, "loss": 0.5561, "step": 1929 }, { "epoch": 0.1732067936550672, "grad_norm": 0.497039781465732, "learning_rate": 5.773257553096022e-06, "loss": 0.5032, "step": 1930 }, { "epoch": 0.1732965381077382, "grad_norm": 0.5202766239460138, "learning_rate": 5.776248878253067e-06, "loss": 0.5652, "step": 1931 }, { "epoch": 0.17338628256040922, "grad_norm": 0.5324043981168269, "learning_rate": 5.779240203410111e-06, "loss": 0.5472, "step": 1932 }, { "epoch": 0.17347602701308026, "grad_norm": 0.4913888457029192, "learning_rate": 5.782231528567156e-06, "loss": 0.5417, "step": 1933 }, { "epoch": 0.17356577146575128, "grad_norm": 0.5246434340819003, "learning_rate": 5.7852228537242e-06, "loss": 0.5507, "step": 1934 }, { "epoch": 0.1736555159184223, "grad_norm": 0.525575444082107, "learning_rate": 5.788214178881245e-06, "loss": 0.5388, "step": 1935 }, { "epoch": 0.1737452603710933, "grad_norm": 0.5259246711007785, "learning_rate": 5.7912055040382895e-06, "loss": 0.505, "step": 1936 }, { "epoch": 0.17383500482376432, "grad_norm": 0.5789985452612861, "learning_rate": 5.794196829195334e-06, "loss": 0.5718, "step": 1937 }, { "epoch": 0.17392474927643536, "grad_norm": 0.501889738573501, "learning_rate": 5.797188154352379e-06, "loss": 0.506, "step": 1938 }, { "epoch": 0.17401449372910638, "grad_norm": 0.5292307518246456, "learning_rate": 5.800179479509423e-06, "loss": 0.5226, "step": 1939 }, { "epoch": 0.1741042381817774, "grad_norm": 0.6352716018168049, "learning_rate": 5.803170804666468e-06, "loss": 0.6108, "step": 1940 }, { "epoch": 0.1741939826344484, "grad_norm": 0.5168087637124293, "learning_rate": 5.806162129823512e-06, "loss": 0.491, "step": 1941 }, { "epoch": 0.17428372708711942, "grad_norm": 0.5176258614615651, "learning_rate": 5.809153454980558e-06, "loss": 0.5141, "step": 1942 }, { "epoch": 0.17437347153979044, "grad_norm": 0.5412557668341982, "learning_rate": 5.812144780137602e-06, "loss": 0.5144, "step": 1943 }, { "epoch": 0.17446321599246148, "grad_norm": 0.5613217442038927, "learning_rate": 5.815136105294647e-06, "loss": 0.5491, "step": 1944 }, { "epoch": 0.1745529604451325, "grad_norm": 0.4855519120462746, "learning_rate": 5.818127430451691e-06, "loss": 0.4643, "step": 1945 }, { "epoch": 0.1746427048978035, "grad_norm": 0.4849331541074235, "learning_rate": 5.821118755608736e-06, "loss": 0.5151, "step": 1946 }, { "epoch": 0.17473244935047452, "grad_norm": 0.556305664880408, "learning_rate": 5.8241100807657805e-06, "loss": 0.5794, "step": 1947 }, { "epoch": 0.17482219380314554, "grad_norm": 0.45258918507033974, "learning_rate": 5.827101405922825e-06, "loss": 0.4594, "step": 1948 }, { "epoch": 0.17491193825581655, "grad_norm": 0.49521659566466775, "learning_rate": 5.83009273107987e-06, "loss": 0.4566, "step": 1949 }, { "epoch": 0.1750016827084876, "grad_norm": 0.5264109220106203, "learning_rate": 5.833084056236914e-06, "loss": 0.5975, "step": 1950 }, { "epoch": 0.1750914271611586, "grad_norm": 0.5132347736953564, "learning_rate": 5.836075381393958e-06, "loss": 0.4963, "step": 1951 }, { "epoch": 0.17518117161382962, "grad_norm": 0.4972486579353965, "learning_rate": 5.839066706551002e-06, "loss": 0.4621, "step": 1952 }, { "epoch": 0.17527091606650064, "grad_norm": 0.5041167035470479, "learning_rate": 5.842058031708047e-06, "loss": 0.5042, "step": 1953 }, { "epoch": 0.17536066051917165, "grad_norm": 0.5398194014363422, "learning_rate": 5.8450493568650915e-06, "loss": 0.5553, "step": 1954 }, { "epoch": 0.17545040497184267, "grad_norm": 0.5118502999126576, "learning_rate": 5.848040682022136e-06, "loss": 0.5077, "step": 1955 }, { "epoch": 0.1755401494245137, "grad_norm": 0.5347709301697438, "learning_rate": 5.851032007179181e-06, "loss": 0.5586, "step": 1956 }, { "epoch": 0.17562989387718472, "grad_norm": 0.4928910725933579, "learning_rate": 5.854023332336225e-06, "loss": 0.5133, "step": 1957 }, { "epoch": 0.17571963832985574, "grad_norm": 0.5783513788713791, "learning_rate": 5.85701465749327e-06, "loss": 0.5997, "step": 1958 }, { "epoch": 0.17580938278252675, "grad_norm": 0.5036644619003389, "learning_rate": 5.860005982650314e-06, "loss": 0.5733, "step": 1959 }, { "epoch": 0.17589912723519777, "grad_norm": 0.5581502297008584, "learning_rate": 5.862997307807359e-06, "loss": 0.5777, "step": 1960 }, { "epoch": 0.17598887168786878, "grad_norm": 0.5280268977431322, "learning_rate": 5.8659886329644034e-06, "loss": 0.559, "step": 1961 }, { "epoch": 0.17607861614053982, "grad_norm": 0.4830466559417759, "learning_rate": 5.868979958121448e-06, "loss": 0.4818, "step": 1962 }, { "epoch": 0.17616836059321084, "grad_norm": 0.5138619495675187, "learning_rate": 5.8719712832784925e-06, "loss": 0.5252, "step": 1963 }, { "epoch": 0.17625810504588185, "grad_norm": 0.47130110017936905, "learning_rate": 5.874962608435537e-06, "loss": 0.4645, "step": 1964 }, { "epoch": 0.17634784949855287, "grad_norm": 0.5193269437367535, "learning_rate": 5.877953933592582e-06, "loss": 0.5564, "step": 1965 }, { "epoch": 0.17643759395122388, "grad_norm": 0.5486007441564358, "learning_rate": 5.880945258749626e-06, "loss": 0.5618, "step": 1966 }, { "epoch": 0.1765273384038949, "grad_norm": 0.5519436446995214, "learning_rate": 5.883936583906671e-06, "loss": 0.5533, "step": 1967 }, { "epoch": 0.17661708285656594, "grad_norm": 0.4929831634726063, "learning_rate": 5.886927909063715e-06, "loss": 0.5363, "step": 1968 }, { "epoch": 0.17670682730923695, "grad_norm": 0.4867881099571629, "learning_rate": 5.88991923422076e-06, "loss": 0.4834, "step": 1969 }, { "epoch": 0.17679657176190797, "grad_norm": 0.5473313735912694, "learning_rate": 5.8929105593778045e-06, "loss": 0.5667, "step": 1970 }, { "epoch": 0.17688631621457898, "grad_norm": 0.5017872599378747, "learning_rate": 5.895901884534849e-06, "loss": 0.5175, "step": 1971 }, { "epoch": 0.17697606066725, "grad_norm": 0.5074592260614944, "learning_rate": 5.8988932096918936e-06, "loss": 0.5679, "step": 1972 }, { "epoch": 0.17706580511992104, "grad_norm": 0.5161906786990352, "learning_rate": 5.901884534848938e-06, "loss": 0.5556, "step": 1973 }, { "epoch": 0.17715554957259205, "grad_norm": 0.49859849644738263, "learning_rate": 5.904875860005983e-06, "loss": 0.5183, "step": 1974 }, { "epoch": 0.17724529402526307, "grad_norm": 0.49365079603827255, "learning_rate": 5.907867185163027e-06, "loss": 0.5411, "step": 1975 }, { "epoch": 0.17733503847793408, "grad_norm": 0.5151428793710121, "learning_rate": 5.910858510320072e-06, "loss": 0.54, "step": 1976 }, { "epoch": 0.1774247829306051, "grad_norm": 0.49495226472456205, "learning_rate": 5.913849835477116e-06, "loss": 0.5135, "step": 1977 }, { "epoch": 0.1775145273832761, "grad_norm": 0.5442383386313234, "learning_rate": 5.916841160634162e-06, "loss": 0.5669, "step": 1978 }, { "epoch": 0.17760427183594715, "grad_norm": 0.5455138338639747, "learning_rate": 5.919832485791206e-06, "loss": 0.5772, "step": 1979 }, { "epoch": 0.17769401628861817, "grad_norm": 0.5185115362611173, "learning_rate": 5.922823810948251e-06, "loss": 0.5149, "step": 1980 }, { "epoch": 0.17778376074128918, "grad_norm": 0.5390512802042546, "learning_rate": 5.9258151361052954e-06, "loss": 0.5462, "step": 1981 }, { "epoch": 0.1778735051939602, "grad_norm": 0.4843632675135168, "learning_rate": 5.92880646126234e-06, "loss": 0.4942, "step": 1982 }, { "epoch": 0.1779632496466312, "grad_norm": 0.4863530222869499, "learning_rate": 5.9317977864193846e-06, "loss": 0.4831, "step": 1983 }, { "epoch": 0.17805299409930223, "grad_norm": 0.5296062929367586, "learning_rate": 5.934789111576429e-06, "loss": 0.5472, "step": 1984 }, { "epoch": 0.17814273855197327, "grad_norm": 0.5538990239972209, "learning_rate": 5.937780436733474e-06, "loss": 0.5783, "step": 1985 }, { "epoch": 0.17823248300464428, "grad_norm": 0.49635055340085366, "learning_rate": 5.940771761890518e-06, "loss": 0.5045, "step": 1986 }, { "epoch": 0.1783222274573153, "grad_norm": 0.5657227956185329, "learning_rate": 5.943763087047563e-06, "loss": 0.5888, "step": 1987 }, { "epoch": 0.1784119719099863, "grad_norm": 0.5124338873681301, "learning_rate": 5.946754412204607e-06, "loss": 0.5137, "step": 1988 }, { "epoch": 0.17850171636265733, "grad_norm": 0.532742277114245, "learning_rate": 5.949745737361652e-06, "loss": 0.5701, "step": 1989 }, { "epoch": 0.17859146081532834, "grad_norm": 0.5081915556045045, "learning_rate": 5.9527370625186965e-06, "loss": 0.4794, "step": 1990 }, { "epoch": 0.17868120526799938, "grad_norm": 0.5203447552324348, "learning_rate": 5.955728387675741e-06, "loss": 0.5308, "step": 1991 }, { "epoch": 0.1787709497206704, "grad_norm": 0.5003789790671893, "learning_rate": 5.958719712832786e-06, "loss": 0.5081, "step": 1992 }, { "epoch": 0.1788606941733414, "grad_norm": 0.5008376977025094, "learning_rate": 5.96171103798983e-06, "loss": 0.526, "step": 1993 }, { "epoch": 0.17895043862601243, "grad_norm": 0.5342209671479892, "learning_rate": 5.964702363146875e-06, "loss": 0.5002, "step": 1994 }, { "epoch": 0.17904018307868344, "grad_norm": 0.501789128647425, "learning_rate": 5.967693688303919e-06, "loss": 0.5071, "step": 1995 }, { "epoch": 0.17912992753135445, "grad_norm": 0.520381812199398, "learning_rate": 5.970685013460964e-06, "loss": 0.5313, "step": 1996 }, { "epoch": 0.1792196719840255, "grad_norm": 0.5485016942869911, "learning_rate": 5.973676338618008e-06, "loss": 0.5368, "step": 1997 }, { "epoch": 0.1793094164366965, "grad_norm": 0.540714574326739, "learning_rate": 5.976667663775053e-06, "loss": 0.5669, "step": 1998 }, { "epoch": 0.17939916088936753, "grad_norm": 0.5321213387381252, "learning_rate": 5.9796589889320975e-06, "loss": 0.5326, "step": 1999 }, { "epoch": 0.17948890534203854, "grad_norm": 0.5489885466523363, "learning_rate": 5.982650314089142e-06, "loss": 0.5777, "step": 2000 }, { "epoch": 0.17957864979470956, "grad_norm": 0.4693443238940528, "learning_rate": 5.985641639246187e-06, "loss": 0.4306, "step": 2001 }, { "epoch": 0.1796683942473806, "grad_norm": 0.5716310279914231, "learning_rate": 5.988632964403231e-06, "loss": 0.5326, "step": 2002 }, { "epoch": 0.1797581387000516, "grad_norm": 0.5346927992903893, "learning_rate": 5.991624289560276e-06, "loss": 0.4962, "step": 2003 }, { "epoch": 0.17984788315272263, "grad_norm": 0.5393662344368917, "learning_rate": 5.99461561471732e-06, "loss": 0.5832, "step": 2004 }, { "epoch": 0.17993762760539364, "grad_norm": 0.5236573192459311, "learning_rate": 5.997606939874365e-06, "loss": 0.5673, "step": 2005 }, { "epoch": 0.18002737205806466, "grad_norm": 0.5429047588219786, "learning_rate": 6.00059826503141e-06, "loss": 0.5726, "step": 2006 }, { "epoch": 0.18011711651073567, "grad_norm": 0.5129187363009374, "learning_rate": 6.003589590188455e-06, "loss": 0.5008, "step": 2007 }, { "epoch": 0.1802068609634067, "grad_norm": 0.47561788832759877, "learning_rate": 6.006580915345499e-06, "loss": 0.4493, "step": 2008 }, { "epoch": 0.18029660541607773, "grad_norm": 0.4787405295817862, "learning_rate": 6.009572240502544e-06, "loss": 0.4867, "step": 2009 }, { "epoch": 0.18038634986874874, "grad_norm": 0.5109009553396214, "learning_rate": 6.0125635656595885e-06, "loss": 0.4554, "step": 2010 }, { "epoch": 0.18047609432141976, "grad_norm": 0.5487861883656131, "learning_rate": 6.015554890816633e-06, "loss": 0.5846, "step": 2011 }, { "epoch": 0.18056583877409077, "grad_norm": 0.502196458341638, "learning_rate": 6.018546215973676e-06, "loss": 0.4445, "step": 2012 }, { "epoch": 0.18065558322676178, "grad_norm": 0.4931919220590818, "learning_rate": 6.0215375411307205e-06, "loss": 0.5056, "step": 2013 }, { "epoch": 0.18074532767943283, "grad_norm": 0.5354508402594995, "learning_rate": 6.024528866287766e-06, "loss": 0.5173, "step": 2014 }, { "epoch": 0.18083507213210384, "grad_norm": 0.49655504626654307, "learning_rate": 6.0275201914448104e-06, "loss": 0.4977, "step": 2015 }, { "epoch": 0.18092481658477486, "grad_norm": 0.498154460952482, "learning_rate": 6.030511516601855e-06, "loss": 0.5433, "step": 2016 }, { "epoch": 0.18101456103744587, "grad_norm": 0.5018271592530772, "learning_rate": 6.0335028417588995e-06, "loss": 0.4885, "step": 2017 }, { "epoch": 0.18110430549011688, "grad_norm": 0.5117954045800438, "learning_rate": 6.036494166915944e-06, "loss": 0.5402, "step": 2018 }, { "epoch": 0.1811940499427879, "grad_norm": 0.4976682058432374, "learning_rate": 6.039485492072989e-06, "loss": 0.4908, "step": 2019 }, { "epoch": 0.18128379439545894, "grad_norm": 0.5326061520787594, "learning_rate": 6.042476817230033e-06, "loss": 0.5692, "step": 2020 }, { "epoch": 0.18137353884812996, "grad_norm": 0.5544549001360289, "learning_rate": 6.045468142387078e-06, "loss": 0.6076, "step": 2021 }, { "epoch": 0.18146328330080097, "grad_norm": 0.49358183383158444, "learning_rate": 6.048459467544122e-06, "loss": 0.5137, "step": 2022 }, { "epoch": 0.18155302775347198, "grad_norm": 0.5011697442319688, "learning_rate": 6.051450792701167e-06, "loss": 0.5036, "step": 2023 }, { "epoch": 0.181642772206143, "grad_norm": 0.4712423315783183, "learning_rate": 6.0544421178582115e-06, "loss": 0.4816, "step": 2024 }, { "epoch": 0.18173251665881401, "grad_norm": 0.501043732140355, "learning_rate": 6.057433443015256e-06, "loss": 0.5624, "step": 2025 }, { "epoch": 0.18182226111148506, "grad_norm": 0.5067211642843339, "learning_rate": 6.0604247681723006e-06, "loss": 0.5399, "step": 2026 }, { "epoch": 0.18191200556415607, "grad_norm": 0.6568123137369493, "learning_rate": 6.063416093329345e-06, "loss": 0.7095, "step": 2027 }, { "epoch": 0.18200175001682709, "grad_norm": 0.4721779936042633, "learning_rate": 6.06640741848639e-06, "loss": 0.4961, "step": 2028 }, { "epoch": 0.1820914944694981, "grad_norm": 0.5249432370989197, "learning_rate": 6.069398743643434e-06, "loss": 0.555, "step": 2029 }, { "epoch": 0.18218123892216911, "grad_norm": 0.5340787386770139, "learning_rate": 6.072390068800479e-06, "loss": 0.4809, "step": 2030 }, { "epoch": 0.18227098337484016, "grad_norm": 0.5221257058846588, "learning_rate": 6.075381393957523e-06, "loss": 0.5483, "step": 2031 }, { "epoch": 0.18236072782751117, "grad_norm": 0.4824236555394174, "learning_rate": 6.078372719114568e-06, "loss": 0.4779, "step": 2032 }, { "epoch": 0.18245047228018219, "grad_norm": 0.5369350108258162, "learning_rate": 6.0813640442716125e-06, "loss": 0.5581, "step": 2033 }, { "epoch": 0.1825402167328532, "grad_norm": 0.5125364830944303, "learning_rate": 6.084355369428657e-06, "loss": 0.5516, "step": 2034 }, { "epoch": 0.18262996118552421, "grad_norm": 0.5015065375219113, "learning_rate": 6.087346694585702e-06, "loss": 0.5175, "step": 2035 }, { "epoch": 0.18271970563819523, "grad_norm": 0.5615691514484349, "learning_rate": 6.090338019742746e-06, "loss": 0.5169, "step": 2036 }, { "epoch": 0.18280945009086627, "grad_norm": 0.5446066470274484, "learning_rate": 6.093329344899791e-06, "loss": 0.5462, "step": 2037 }, { "epoch": 0.18289919454353729, "grad_norm": 0.47068973270607856, "learning_rate": 6.096320670056835e-06, "loss": 0.4834, "step": 2038 }, { "epoch": 0.1829889389962083, "grad_norm": 0.4818399253140172, "learning_rate": 6.09931199521388e-06, "loss": 0.4751, "step": 2039 }, { "epoch": 0.18307868344887931, "grad_norm": 0.5562175138013783, "learning_rate": 6.102303320370924e-06, "loss": 0.5885, "step": 2040 }, { "epoch": 0.18316842790155033, "grad_norm": 0.5170130299823665, "learning_rate": 6.105294645527969e-06, "loss": 0.5208, "step": 2041 }, { "epoch": 0.18325817235422134, "grad_norm": 0.46318052655512015, "learning_rate": 6.108285970685014e-06, "loss": 0.43, "step": 2042 }, { "epoch": 0.18334791680689239, "grad_norm": 0.4871343182427982, "learning_rate": 6.111277295842059e-06, "loss": 0.5128, "step": 2043 }, { "epoch": 0.1834376612595634, "grad_norm": 0.4982023073865844, "learning_rate": 6.1142686209991035e-06, "loss": 0.4593, "step": 2044 }, { "epoch": 0.18352740571223441, "grad_norm": 0.4966221848544982, "learning_rate": 6.117259946156148e-06, "loss": 0.5316, "step": 2045 }, { "epoch": 0.18361715016490543, "grad_norm": 0.5076840256924927, "learning_rate": 6.120251271313193e-06, "loss": 0.5235, "step": 2046 }, { "epoch": 0.18370689461757644, "grad_norm": 0.5257879974904581, "learning_rate": 6.123242596470237e-06, "loss": 0.5725, "step": 2047 }, { "epoch": 0.18379663907024746, "grad_norm": 0.4692458405944962, "learning_rate": 6.126233921627282e-06, "loss": 0.4677, "step": 2048 }, { "epoch": 0.1838863835229185, "grad_norm": 0.4988404985451217, "learning_rate": 6.129225246784326e-06, "loss": 0.4772, "step": 2049 }, { "epoch": 0.18397612797558952, "grad_norm": 0.4888031813147829, "learning_rate": 6.132216571941371e-06, "loss": 0.5259, "step": 2050 }, { "epoch": 0.18406587242826053, "grad_norm": 0.4904773524939794, "learning_rate": 6.135207897098415e-06, "loss": 0.5276, "step": 2051 }, { "epoch": 0.18415561688093154, "grad_norm": 0.4870407661104616, "learning_rate": 6.13819922225546e-06, "loss": 0.4716, "step": 2052 }, { "epoch": 0.18424536133360256, "grad_norm": 0.5020884642029193, "learning_rate": 6.1411905474125045e-06, "loss": 0.5387, "step": 2053 }, { "epoch": 0.18433510578627357, "grad_norm": 0.4775431399806339, "learning_rate": 6.144181872569549e-06, "loss": 0.4794, "step": 2054 }, { "epoch": 0.18442485023894462, "grad_norm": 0.4690403799297792, "learning_rate": 6.147173197726594e-06, "loss": 0.4603, "step": 2055 }, { "epoch": 0.18451459469161563, "grad_norm": 0.5003209678870968, "learning_rate": 6.150164522883638e-06, "loss": 0.5339, "step": 2056 }, { "epoch": 0.18460433914428664, "grad_norm": 0.5190781042817918, "learning_rate": 6.153155848040683e-06, "loss": 0.4998, "step": 2057 }, { "epoch": 0.18469408359695766, "grad_norm": 0.4940796398973131, "learning_rate": 6.156147173197727e-06, "loss": 0.4987, "step": 2058 }, { "epoch": 0.18478382804962867, "grad_norm": 0.5344336343919228, "learning_rate": 6.159138498354772e-06, "loss": 0.5438, "step": 2059 }, { "epoch": 0.1848735725022997, "grad_norm": 0.5305227743186998, "learning_rate": 6.162129823511816e-06, "loss": 0.5611, "step": 2060 }, { "epoch": 0.18496331695497073, "grad_norm": 0.5227352352631847, "learning_rate": 6.165121148668861e-06, "loss": 0.5562, "step": 2061 }, { "epoch": 0.18505306140764174, "grad_norm": 0.5321034660183137, "learning_rate": 6.1681124738259055e-06, "loss": 0.533, "step": 2062 }, { "epoch": 0.18514280586031276, "grad_norm": 0.5380099184372868, "learning_rate": 6.17110379898295e-06, "loss": 0.553, "step": 2063 }, { "epoch": 0.18523255031298377, "grad_norm": 0.47887457406630285, "learning_rate": 6.174095124139995e-06, "loss": 0.4669, "step": 2064 }, { "epoch": 0.1853222947656548, "grad_norm": 0.5539482665220484, "learning_rate": 6.177086449297039e-06, "loss": 0.6042, "step": 2065 }, { "epoch": 0.18541203921832583, "grad_norm": 0.4731253819169216, "learning_rate": 6.180077774454084e-06, "loss": 0.5165, "step": 2066 }, { "epoch": 0.18550178367099684, "grad_norm": 0.619854133956064, "learning_rate": 6.183069099611128e-06, "loss": 0.6217, "step": 2067 }, { "epoch": 0.18559152812366786, "grad_norm": 0.544855457268899, "learning_rate": 6.186060424768173e-06, "loss": 0.513, "step": 2068 }, { "epoch": 0.18568127257633887, "grad_norm": 0.5313533416628157, "learning_rate": 6.1890517499252174e-06, "loss": 0.5641, "step": 2069 }, { "epoch": 0.1857710170290099, "grad_norm": 0.5366877631139059, "learning_rate": 6.192043075082263e-06, "loss": 0.6108, "step": 2070 }, { "epoch": 0.1858607614816809, "grad_norm": 0.5485755755657651, "learning_rate": 6.195034400239307e-06, "loss": 0.5473, "step": 2071 }, { "epoch": 0.18595050593435195, "grad_norm": 0.4943654146960629, "learning_rate": 6.198025725396352e-06, "loss": 0.5169, "step": 2072 }, { "epoch": 0.18604025038702296, "grad_norm": 0.49564022825262266, "learning_rate": 6.2010170505533965e-06, "loss": 0.5373, "step": 2073 }, { "epoch": 0.18612999483969397, "grad_norm": 0.5093398485334951, "learning_rate": 6.204008375710439e-06, "loss": 0.5433, "step": 2074 }, { "epoch": 0.186219739292365, "grad_norm": 0.5724260177575774, "learning_rate": 6.206999700867484e-06, "loss": 0.5453, "step": 2075 }, { "epoch": 0.186309483745036, "grad_norm": 0.49024830285733245, "learning_rate": 6.2099910260245285e-06, "loss": 0.5029, "step": 2076 }, { "epoch": 0.18639922819770702, "grad_norm": 0.47212069856340044, "learning_rate": 6.212982351181573e-06, "loss": 0.4477, "step": 2077 }, { "epoch": 0.18648897265037806, "grad_norm": 0.5050491236691298, "learning_rate": 6.2159736763386184e-06, "loss": 0.4909, "step": 2078 }, { "epoch": 0.18657871710304907, "grad_norm": 0.5246841277862617, "learning_rate": 6.218965001495663e-06, "loss": 0.5177, "step": 2079 }, { "epoch": 0.1866684615557201, "grad_norm": 0.5271038626912928, "learning_rate": 6.2219563266527076e-06, "loss": 0.5565, "step": 2080 }, { "epoch": 0.1867582060083911, "grad_norm": 0.47148663071580316, "learning_rate": 6.224947651809752e-06, "loss": 0.4527, "step": 2081 }, { "epoch": 0.18684795046106212, "grad_norm": 0.5497146976490649, "learning_rate": 6.227938976966797e-06, "loss": 0.518, "step": 2082 }, { "epoch": 0.18693769491373313, "grad_norm": 0.4836195688608772, "learning_rate": 6.230930302123841e-06, "loss": 0.5024, "step": 2083 }, { "epoch": 0.18702743936640417, "grad_norm": 0.48725850436394885, "learning_rate": 6.233921627280886e-06, "loss": 0.5246, "step": 2084 }, { "epoch": 0.1871171838190752, "grad_norm": 0.5134537411261437, "learning_rate": 6.23691295243793e-06, "loss": 0.5267, "step": 2085 }, { "epoch": 0.1872069282717462, "grad_norm": 0.5275470403938438, "learning_rate": 6.239904277594975e-06, "loss": 0.5062, "step": 2086 }, { "epoch": 0.18729667272441722, "grad_norm": 0.4625151736980228, "learning_rate": 6.2428956027520195e-06, "loss": 0.4778, "step": 2087 }, { "epoch": 0.18738641717708823, "grad_norm": 0.4517569075564969, "learning_rate": 6.245886927909064e-06, "loss": 0.4564, "step": 2088 }, { "epoch": 0.18747616162975925, "grad_norm": 0.5540800614766989, "learning_rate": 6.248878253066109e-06, "loss": 0.5632, "step": 2089 }, { "epoch": 0.1875659060824303, "grad_norm": 0.534904266684182, "learning_rate": 6.251869578223153e-06, "loss": 0.6008, "step": 2090 }, { "epoch": 0.1876556505351013, "grad_norm": 0.45743552100243234, "learning_rate": 6.254860903380198e-06, "loss": 0.4368, "step": 2091 }, { "epoch": 0.18774539498777232, "grad_norm": 0.47020465200508826, "learning_rate": 6.257852228537242e-06, "loss": 0.4583, "step": 2092 }, { "epoch": 0.18783513944044333, "grad_norm": 0.5538167778240114, "learning_rate": 6.260843553694287e-06, "loss": 0.5013, "step": 2093 }, { "epoch": 0.18792488389311435, "grad_norm": 0.5285373477262838, "learning_rate": 6.263834878851331e-06, "loss": 0.4895, "step": 2094 }, { "epoch": 0.1880146283457854, "grad_norm": 0.5077726341612304, "learning_rate": 6.266826204008376e-06, "loss": 0.559, "step": 2095 }, { "epoch": 0.1881043727984564, "grad_norm": 0.5358503574513301, "learning_rate": 6.2698175291654205e-06, "loss": 0.5644, "step": 2096 }, { "epoch": 0.18819411725112742, "grad_norm": 0.5072852956601396, "learning_rate": 6.272808854322465e-06, "loss": 0.5287, "step": 2097 }, { "epoch": 0.18828386170379843, "grad_norm": 0.5409007784365898, "learning_rate": 6.27580017947951e-06, "loss": 0.5112, "step": 2098 }, { "epoch": 0.18837360615646945, "grad_norm": 0.4953663033817722, "learning_rate": 6.278791504636554e-06, "loss": 0.5223, "step": 2099 }, { "epoch": 0.18846335060914046, "grad_norm": 0.5236875505040969, "learning_rate": 6.281782829793599e-06, "loss": 0.5523, "step": 2100 }, { "epoch": 0.1885530950618115, "grad_norm": 0.5381001457727748, "learning_rate": 6.284774154950643e-06, "loss": 0.5572, "step": 2101 }, { "epoch": 0.18864283951448252, "grad_norm": 0.4897385047738879, "learning_rate": 6.287765480107688e-06, "loss": 0.4905, "step": 2102 }, { "epoch": 0.18873258396715353, "grad_norm": 0.5244493882960619, "learning_rate": 6.290756805264732e-06, "loss": 0.503, "step": 2103 }, { "epoch": 0.18882232841982455, "grad_norm": 0.5121633553275071, "learning_rate": 6.293748130421777e-06, "loss": 0.4928, "step": 2104 }, { "epoch": 0.18891207287249556, "grad_norm": 0.5020157267206404, "learning_rate": 6.2967394555788215e-06, "loss": 0.5377, "step": 2105 }, { "epoch": 0.18900181732516658, "grad_norm": 0.5135548328349707, "learning_rate": 6.299730780735867e-06, "loss": 0.5079, "step": 2106 }, { "epoch": 0.18909156177783762, "grad_norm": 0.5230272151864656, "learning_rate": 6.3027221058929115e-06, "loss": 0.5454, "step": 2107 }, { "epoch": 0.18918130623050863, "grad_norm": 0.512097647385939, "learning_rate": 6.305713431049956e-06, "loss": 0.5034, "step": 2108 }, { "epoch": 0.18927105068317965, "grad_norm": 0.5158172144809277, "learning_rate": 6.308704756207001e-06, "loss": 0.5186, "step": 2109 }, { "epoch": 0.18936079513585066, "grad_norm": 0.5368529675585023, "learning_rate": 6.311696081364045e-06, "loss": 0.5187, "step": 2110 }, { "epoch": 0.18945053958852168, "grad_norm": 0.4903423857680011, "learning_rate": 6.31468740652109e-06, "loss": 0.5495, "step": 2111 }, { "epoch": 0.1895402840411927, "grad_norm": 0.44237719674299836, "learning_rate": 6.317678731678134e-06, "loss": 0.4095, "step": 2112 }, { "epoch": 0.18963002849386373, "grad_norm": 0.5296668316413675, "learning_rate": 6.320670056835179e-06, "loss": 0.5537, "step": 2113 }, { "epoch": 0.18971977294653475, "grad_norm": 0.5590776035056054, "learning_rate": 6.323661381992223e-06, "loss": 0.5753, "step": 2114 }, { "epoch": 0.18980951739920576, "grad_norm": 0.5315930906379754, "learning_rate": 6.326652707149268e-06, "loss": 0.5205, "step": 2115 }, { "epoch": 0.18989926185187678, "grad_norm": 0.5870466771523024, "learning_rate": 6.3296440323063125e-06, "loss": 0.5852, "step": 2116 }, { "epoch": 0.1899890063045478, "grad_norm": 0.4864231066550138, "learning_rate": 6.332635357463357e-06, "loss": 0.4591, "step": 2117 }, { "epoch": 0.1900787507572188, "grad_norm": 0.49817424694827156, "learning_rate": 6.335626682620402e-06, "loss": 0.48, "step": 2118 }, { "epoch": 0.19016849520988985, "grad_norm": 0.5241122508796144, "learning_rate": 6.338618007777446e-06, "loss": 0.5043, "step": 2119 }, { "epoch": 0.19025823966256086, "grad_norm": 0.48229953903223693, "learning_rate": 6.341609332934491e-06, "loss": 0.4426, "step": 2120 }, { "epoch": 0.19034798411523188, "grad_norm": 0.45231470224796966, "learning_rate": 6.344600658091535e-06, "loss": 0.4285, "step": 2121 }, { "epoch": 0.1904377285679029, "grad_norm": 0.46797331230895534, "learning_rate": 6.34759198324858e-06, "loss": 0.4696, "step": 2122 }, { "epoch": 0.1905274730205739, "grad_norm": 0.48326733413188805, "learning_rate": 6.350583308405624e-06, "loss": 0.5094, "step": 2123 }, { "epoch": 0.19061721747324495, "grad_norm": 0.5395749841095133, "learning_rate": 6.353574633562669e-06, "loss": 0.5474, "step": 2124 }, { "epoch": 0.19070696192591596, "grad_norm": 0.485853926029328, "learning_rate": 6.3565659587197135e-06, "loss": 0.5493, "step": 2125 }, { "epoch": 0.19079670637858698, "grad_norm": 0.49392619103508556, "learning_rate": 6.359557283876758e-06, "loss": 0.5092, "step": 2126 }, { "epoch": 0.190886450831258, "grad_norm": 0.5135428021211064, "learning_rate": 6.362548609033803e-06, "loss": 0.5105, "step": 2127 }, { "epoch": 0.190976195283929, "grad_norm": 0.4847682810012055, "learning_rate": 6.365539934190847e-06, "loss": 0.465, "step": 2128 }, { "epoch": 0.19106593973660002, "grad_norm": 0.5200101098709192, "learning_rate": 6.368531259347892e-06, "loss": 0.5684, "step": 2129 }, { "epoch": 0.19115568418927106, "grad_norm": 0.4728525575772916, "learning_rate": 6.371522584504936e-06, "loss": 0.4494, "step": 2130 }, { "epoch": 0.19124542864194208, "grad_norm": 0.5204225357551695, "learning_rate": 6.374513909661981e-06, "loss": 0.5383, "step": 2131 }, { "epoch": 0.1913351730946131, "grad_norm": 0.5116502267669183, "learning_rate": 6.3775052348190254e-06, "loss": 0.5475, "step": 2132 }, { "epoch": 0.1914249175472841, "grad_norm": 0.5022030271799617, "learning_rate": 6.38049655997607e-06, "loss": 0.4451, "step": 2133 }, { "epoch": 0.19151466199995512, "grad_norm": 0.5070444817003408, "learning_rate": 6.383487885133115e-06, "loss": 0.4834, "step": 2134 }, { "epoch": 0.19160440645262614, "grad_norm": 0.49353471926001097, "learning_rate": 6.38647921029016e-06, "loss": 0.5222, "step": 2135 }, { "epoch": 0.19169415090529718, "grad_norm": 0.5036000165706443, "learning_rate": 6.389470535447203e-06, "loss": 0.552, "step": 2136 }, { "epoch": 0.1917838953579682, "grad_norm": 0.5210074094676055, "learning_rate": 6.392461860604247e-06, "loss": 0.5647, "step": 2137 }, { "epoch": 0.1918736398106392, "grad_norm": 0.49277982767958833, "learning_rate": 6.395453185761292e-06, "loss": 0.4561, "step": 2138 }, { "epoch": 0.19196338426331022, "grad_norm": 0.491245860391364, "learning_rate": 6.3984445109183365e-06, "loss": 0.4793, "step": 2139 }, { "epoch": 0.19205312871598124, "grad_norm": 0.5491299601443563, "learning_rate": 6.401435836075381e-06, "loss": 0.593, "step": 2140 }, { "epoch": 0.19214287316865225, "grad_norm": 0.5242997645031802, "learning_rate": 6.404427161232426e-06, "loss": 0.5561, "step": 2141 }, { "epoch": 0.1922326176213233, "grad_norm": 0.5167285624584752, "learning_rate": 6.407418486389471e-06, "loss": 0.5536, "step": 2142 }, { "epoch": 0.1923223620739943, "grad_norm": 0.5198390866576997, "learning_rate": 6.410409811546516e-06, "loss": 0.5449, "step": 2143 }, { "epoch": 0.19241210652666532, "grad_norm": 0.5146841250220121, "learning_rate": 6.41340113670356e-06, "loss": 0.5652, "step": 2144 }, { "epoch": 0.19250185097933634, "grad_norm": 0.4805580527228009, "learning_rate": 6.416392461860605e-06, "loss": 0.5069, "step": 2145 }, { "epoch": 0.19259159543200735, "grad_norm": 0.5140311490151025, "learning_rate": 6.419383787017649e-06, "loss": 0.5249, "step": 2146 }, { "epoch": 0.19268133988467837, "grad_norm": 0.5111547201929869, "learning_rate": 6.422375112174694e-06, "loss": 0.5415, "step": 2147 }, { "epoch": 0.1927710843373494, "grad_norm": 0.4918048407850209, "learning_rate": 6.425366437331738e-06, "loss": 0.4984, "step": 2148 }, { "epoch": 0.19286082879002042, "grad_norm": 0.5448572418730993, "learning_rate": 6.428357762488783e-06, "loss": 0.6032, "step": 2149 }, { "epoch": 0.19295057324269144, "grad_norm": 0.538983688134602, "learning_rate": 6.4313490876458275e-06, "loss": 0.6171, "step": 2150 }, { "epoch": 0.19304031769536245, "grad_norm": 0.5465816200717913, "learning_rate": 6.434340412802872e-06, "loss": 0.6178, "step": 2151 }, { "epoch": 0.19313006214803347, "grad_norm": 0.5787793900631525, "learning_rate": 6.437331737959917e-06, "loss": 0.5881, "step": 2152 }, { "epoch": 0.19321980660070448, "grad_norm": 0.497656483348711, "learning_rate": 6.440323063116961e-06, "loss": 0.4737, "step": 2153 }, { "epoch": 0.19330955105337552, "grad_norm": 0.49922896755411106, "learning_rate": 6.443314388274006e-06, "loss": 0.5096, "step": 2154 }, { "epoch": 0.19339929550604654, "grad_norm": 0.5475106101567815, "learning_rate": 6.44630571343105e-06, "loss": 0.4879, "step": 2155 }, { "epoch": 0.19348903995871755, "grad_norm": 0.4840961162428055, "learning_rate": 6.449297038588095e-06, "loss": 0.5253, "step": 2156 }, { "epoch": 0.19357878441138857, "grad_norm": 0.5061734834375493, "learning_rate": 6.452288363745139e-06, "loss": 0.4989, "step": 2157 }, { "epoch": 0.19366852886405958, "grad_norm": 0.5399489792175949, "learning_rate": 6.455279688902184e-06, "loss": 0.5242, "step": 2158 }, { "epoch": 0.19375827331673062, "grad_norm": 0.5334399373589036, "learning_rate": 6.4582710140592285e-06, "loss": 0.5474, "step": 2159 }, { "epoch": 0.19384801776940164, "grad_norm": 0.4855942805054095, "learning_rate": 6.461262339216273e-06, "loss": 0.4864, "step": 2160 }, { "epoch": 0.19393776222207265, "grad_norm": 0.4720362157558178, "learning_rate": 6.464253664373318e-06, "loss": 0.4761, "step": 2161 }, { "epoch": 0.19402750667474367, "grad_norm": 0.49458307589990946, "learning_rate": 6.467244989530362e-06, "loss": 0.5044, "step": 2162 }, { "epoch": 0.19411725112741468, "grad_norm": 0.5547817959310809, "learning_rate": 6.470236314687407e-06, "loss": 0.5895, "step": 2163 }, { "epoch": 0.1942069955800857, "grad_norm": 0.500687573305059, "learning_rate": 6.473227639844451e-06, "loss": 0.4975, "step": 2164 }, { "epoch": 0.19429674003275674, "grad_norm": 0.5202055428235498, "learning_rate": 6.476218965001496e-06, "loss": 0.5046, "step": 2165 }, { "epoch": 0.19438648448542775, "grad_norm": 0.5004692302175417, "learning_rate": 6.4792102901585404e-06, "loss": 0.5123, "step": 2166 }, { "epoch": 0.19447622893809877, "grad_norm": 0.49094051929664284, "learning_rate": 6.482201615315585e-06, "loss": 0.4894, "step": 2167 }, { "epoch": 0.19456597339076978, "grad_norm": 0.5385711919153997, "learning_rate": 6.4851929404726295e-06, "loss": 0.5902, "step": 2168 }, { "epoch": 0.1946557178434408, "grad_norm": 0.518095892709402, "learning_rate": 6.488184265629674e-06, "loss": 0.5044, "step": 2169 }, { "epoch": 0.1947454622961118, "grad_norm": 0.4982559079027668, "learning_rate": 6.4911755907867195e-06, "loss": 0.5437, "step": 2170 }, { "epoch": 0.19483520674878285, "grad_norm": 0.5577169342218045, "learning_rate": 6.494166915943764e-06, "loss": 0.55, "step": 2171 }, { "epoch": 0.19492495120145387, "grad_norm": 0.5713141221113504, "learning_rate": 6.497158241100809e-06, "loss": 0.6122, "step": 2172 }, { "epoch": 0.19501469565412488, "grad_norm": 0.5414662798423631, "learning_rate": 6.500149566257853e-06, "loss": 0.5568, "step": 2173 }, { "epoch": 0.1951044401067959, "grad_norm": 0.5127592534902159, "learning_rate": 6.503140891414898e-06, "loss": 0.5247, "step": 2174 }, { "epoch": 0.1951941845594669, "grad_norm": 0.5231085484576445, "learning_rate": 6.506132216571942e-06, "loss": 0.5386, "step": 2175 }, { "epoch": 0.19528392901213792, "grad_norm": 0.5075908286650523, "learning_rate": 6.509123541728987e-06, "loss": 0.5148, "step": 2176 }, { "epoch": 0.19537367346480897, "grad_norm": 0.49046719544876993, "learning_rate": 6.512114866886031e-06, "loss": 0.5015, "step": 2177 }, { "epoch": 0.19546341791747998, "grad_norm": 0.5169443407806046, "learning_rate": 6.515106192043076e-06, "loss": 0.5128, "step": 2178 }, { "epoch": 0.195553162370151, "grad_norm": 0.46526319278591977, "learning_rate": 6.5180975172001205e-06, "loss": 0.4413, "step": 2179 }, { "epoch": 0.195642906822822, "grad_norm": 0.45731198780457116, "learning_rate": 6.521088842357165e-06, "loss": 0.4391, "step": 2180 }, { "epoch": 0.19573265127549302, "grad_norm": 0.5565214167016569, "learning_rate": 6.52408016751421e-06, "loss": 0.5799, "step": 2181 }, { "epoch": 0.19582239572816404, "grad_norm": 0.4729332450604098, "learning_rate": 6.527071492671254e-06, "loss": 0.5104, "step": 2182 }, { "epoch": 0.19591214018083508, "grad_norm": 0.5154594876873237, "learning_rate": 6.530062817828299e-06, "loss": 0.5058, "step": 2183 }, { "epoch": 0.1960018846335061, "grad_norm": 0.5322564995992931, "learning_rate": 6.533054142985343e-06, "loss": 0.5991, "step": 2184 }, { "epoch": 0.1960916290861771, "grad_norm": 0.5283368401817672, "learning_rate": 6.536045468142388e-06, "loss": 0.5102, "step": 2185 }, { "epoch": 0.19618137353884813, "grad_norm": 0.48666678565221466, "learning_rate": 6.5390367932994324e-06, "loss": 0.459, "step": 2186 }, { "epoch": 0.19627111799151914, "grad_norm": 0.48844836499936645, "learning_rate": 6.542028118456477e-06, "loss": 0.5038, "step": 2187 }, { "epoch": 0.19636086244419018, "grad_norm": 0.5277851713905111, "learning_rate": 6.5450194436135216e-06, "loss": 0.5102, "step": 2188 }, { "epoch": 0.1964506068968612, "grad_norm": 0.4751251622644432, "learning_rate": 6.548010768770566e-06, "loss": 0.4667, "step": 2189 }, { "epoch": 0.1965403513495322, "grad_norm": 0.5580818163443083, "learning_rate": 6.551002093927611e-06, "loss": 0.5889, "step": 2190 }, { "epoch": 0.19663009580220323, "grad_norm": 0.49175476361305026, "learning_rate": 6.553993419084655e-06, "loss": 0.4575, "step": 2191 }, { "epoch": 0.19671984025487424, "grad_norm": 0.5436574916787923, "learning_rate": 6.5569847442417e-06, "loss": 0.5845, "step": 2192 }, { "epoch": 0.19680958470754525, "grad_norm": 0.5579719447139869, "learning_rate": 6.559976069398744e-06, "loss": 0.558, "step": 2193 }, { "epoch": 0.1968993291602163, "grad_norm": 0.5864985148599303, "learning_rate": 6.562967394555789e-06, "loss": 0.5966, "step": 2194 }, { "epoch": 0.1969890736128873, "grad_norm": 0.5602442770365961, "learning_rate": 6.5659587197128335e-06, "loss": 0.5822, "step": 2195 }, { "epoch": 0.19707881806555833, "grad_norm": 0.5080524150870102, "learning_rate": 6.568950044869878e-06, "loss": 0.4825, "step": 2196 }, { "epoch": 0.19716856251822934, "grad_norm": 0.4824098847295217, "learning_rate": 6.571941370026923e-06, "loss": 0.4835, "step": 2197 }, { "epoch": 0.19725830697090035, "grad_norm": 0.47178023684759224, "learning_rate": 6.574932695183966e-06, "loss": 0.4889, "step": 2198 }, { "epoch": 0.19734805142357137, "grad_norm": 0.5331642816470331, "learning_rate": 6.577924020341011e-06, "loss": 0.5548, "step": 2199 }, { "epoch": 0.1974377958762424, "grad_norm": 0.5618376275452447, "learning_rate": 6.580915345498055e-06, "loss": 0.5824, "step": 2200 }, { "epoch": 0.19752754032891343, "grad_norm": 0.5342526746675801, "learning_rate": 6.5839066706551e-06, "loss": 0.5748, "step": 2201 }, { "epoch": 0.19761728478158444, "grad_norm": 0.5143457677324895, "learning_rate": 6.5868979958121445e-06, "loss": 0.478, "step": 2202 }, { "epoch": 0.19770702923425545, "grad_norm": 0.4843016679185341, "learning_rate": 6.589889320969189e-06, "loss": 0.5165, "step": 2203 }, { "epoch": 0.19779677368692647, "grad_norm": 0.5426129153178697, "learning_rate": 6.592880646126234e-06, "loss": 0.6111, "step": 2204 }, { "epoch": 0.19788651813959748, "grad_norm": 0.48699276844642303, "learning_rate": 6.595871971283278e-06, "loss": 0.4585, "step": 2205 }, { "epoch": 0.19797626259226853, "grad_norm": 0.5052253572015458, "learning_rate": 6.598863296440324e-06, "loss": 0.5148, "step": 2206 }, { "epoch": 0.19806600704493954, "grad_norm": 0.5324152336056277, "learning_rate": 6.601854621597368e-06, "loss": 0.5157, "step": 2207 }, { "epoch": 0.19815575149761055, "grad_norm": 0.4933832024255573, "learning_rate": 6.604845946754413e-06, "loss": 0.4941, "step": 2208 }, { "epoch": 0.19824549595028157, "grad_norm": 0.4840084100620003, "learning_rate": 6.607837271911457e-06, "loss": 0.5055, "step": 2209 }, { "epoch": 0.19833524040295258, "grad_norm": 0.4867533137480486, "learning_rate": 6.610828597068502e-06, "loss": 0.533, "step": 2210 }, { "epoch": 0.1984249848556236, "grad_norm": 0.5276879908934089, "learning_rate": 6.613819922225546e-06, "loss": 0.5684, "step": 2211 }, { "epoch": 0.19851472930829464, "grad_norm": 0.46325055873362997, "learning_rate": 6.616811247382591e-06, "loss": 0.477, "step": 2212 }, { "epoch": 0.19860447376096566, "grad_norm": 0.5234135154077778, "learning_rate": 6.6198025725396355e-06, "loss": 0.5459, "step": 2213 }, { "epoch": 0.19869421821363667, "grad_norm": 0.49260588542511863, "learning_rate": 6.62279389769668e-06, "loss": 0.42, "step": 2214 }, { "epoch": 0.19878396266630768, "grad_norm": 0.5465450698051928, "learning_rate": 6.625785222853725e-06, "loss": 0.5277, "step": 2215 }, { "epoch": 0.1988737071189787, "grad_norm": 0.5018228885566487, "learning_rate": 6.628776548010769e-06, "loss": 0.4812, "step": 2216 }, { "epoch": 0.19896345157164974, "grad_norm": 0.5188418447014758, "learning_rate": 6.631767873167814e-06, "loss": 0.5189, "step": 2217 }, { "epoch": 0.19905319602432076, "grad_norm": 0.5338226092072886, "learning_rate": 6.634759198324858e-06, "loss": 0.5222, "step": 2218 }, { "epoch": 0.19914294047699177, "grad_norm": 0.5590436301821328, "learning_rate": 6.637750523481903e-06, "loss": 0.6162, "step": 2219 }, { "epoch": 0.19923268492966278, "grad_norm": 0.5201909387446422, "learning_rate": 6.6407418486389474e-06, "loss": 0.5316, "step": 2220 }, { "epoch": 0.1993224293823338, "grad_norm": 0.5236361366147166, "learning_rate": 6.643733173795992e-06, "loss": 0.613, "step": 2221 }, { "epoch": 0.1994121738350048, "grad_norm": 0.5047314999498418, "learning_rate": 6.6467244989530365e-06, "loss": 0.479, "step": 2222 }, { "epoch": 0.19950191828767586, "grad_norm": 0.5021803822720146, "learning_rate": 6.649715824110081e-06, "loss": 0.5247, "step": 2223 }, { "epoch": 0.19959166274034687, "grad_norm": 0.5119772950000443, "learning_rate": 6.652707149267126e-06, "loss": 0.5213, "step": 2224 }, { "epoch": 0.19968140719301788, "grad_norm": 0.5058114500259702, "learning_rate": 6.65569847442417e-06, "loss": 0.5213, "step": 2225 }, { "epoch": 0.1997711516456889, "grad_norm": 0.506822122784358, "learning_rate": 6.658689799581215e-06, "loss": 0.4828, "step": 2226 }, { "epoch": 0.1998608960983599, "grad_norm": 0.47485930458674436, "learning_rate": 6.661681124738259e-06, "loss": 0.4749, "step": 2227 }, { "epoch": 0.19995064055103093, "grad_norm": 0.5039991613873873, "learning_rate": 6.664672449895304e-06, "loss": 0.5077, "step": 2228 }, { "epoch": 0.20004038500370197, "grad_norm": 0.49644307102334606, "learning_rate": 6.6676637750523484e-06, "loss": 0.4766, "step": 2229 }, { "epoch": 0.20013012945637298, "grad_norm": 0.5280864426016325, "learning_rate": 6.670655100209393e-06, "loss": 0.4969, "step": 2230 }, { "epoch": 0.200219873909044, "grad_norm": 0.48599750000689035, "learning_rate": 6.6736464253664376e-06, "loss": 0.4702, "step": 2231 }, { "epoch": 0.20030961836171501, "grad_norm": 0.5042659234539806, "learning_rate": 6.676637750523482e-06, "loss": 0.4761, "step": 2232 }, { "epoch": 0.20039936281438603, "grad_norm": 0.5425028914871541, "learning_rate": 6.679629075680527e-06, "loss": 0.5604, "step": 2233 }, { "epoch": 0.20048910726705704, "grad_norm": 0.5104968508264655, "learning_rate": 6.682620400837572e-06, "loss": 0.5285, "step": 2234 }, { "epoch": 0.20057885171972809, "grad_norm": 0.509727759964525, "learning_rate": 6.685611725994617e-06, "loss": 0.5269, "step": 2235 }, { "epoch": 0.2006685961723991, "grad_norm": 0.553667486073165, "learning_rate": 6.688603051151661e-06, "loss": 0.585, "step": 2236 }, { "epoch": 0.20075834062507011, "grad_norm": 0.46507267316621614, "learning_rate": 6.691594376308706e-06, "loss": 0.4665, "step": 2237 }, { "epoch": 0.20084808507774113, "grad_norm": 0.5255548185227782, "learning_rate": 6.69458570146575e-06, "loss": 0.5478, "step": 2238 }, { "epoch": 0.20093782953041214, "grad_norm": 0.554464003914575, "learning_rate": 6.697577026622795e-06, "loss": 0.5548, "step": 2239 }, { "epoch": 0.20102757398308316, "grad_norm": 0.5132563759851654, "learning_rate": 6.7005683517798394e-06, "loss": 0.5425, "step": 2240 }, { "epoch": 0.2011173184357542, "grad_norm": 0.47666722177535076, "learning_rate": 6.703559676936884e-06, "loss": 0.4995, "step": 2241 }, { "epoch": 0.20120706288842521, "grad_norm": 0.565099205961834, "learning_rate": 6.7065510020939286e-06, "loss": 0.5935, "step": 2242 }, { "epoch": 0.20129680734109623, "grad_norm": 0.526959429970559, "learning_rate": 6.709542327250973e-06, "loss": 0.5539, "step": 2243 }, { "epoch": 0.20138655179376724, "grad_norm": 0.4899352814922499, "learning_rate": 6.712533652408018e-06, "loss": 0.5204, "step": 2244 }, { "epoch": 0.20147629624643826, "grad_norm": 0.4962976902839272, "learning_rate": 6.715524977565062e-06, "loss": 0.5115, "step": 2245 }, { "epoch": 0.2015660406991093, "grad_norm": 0.5012237595141557, "learning_rate": 6.718516302722107e-06, "loss": 0.4973, "step": 2246 }, { "epoch": 0.20165578515178031, "grad_norm": 0.4954238867951015, "learning_rate": 6.721507627879151e-06, "loss": 0.5029, "step": 2247 }, { "epoch": 0.20174552960445133, "grad_norm": 0.5039923117660425, "learning_rate": 6.724498953036196e-06, "loss": 0.49, "step": 2248 }, { "epoch": 0.20183527405712234, "grad_norm": 0.5173044988275569, "learning_rate": 6.7274902781932405e-06, "loss": 0.5508, "step": 2249 }, { "epoch": 0.20192501850979336, "grad_norm": 0.49662715715913824, "learning_rate": 6.730481603350285e-06, "loss": 0.5053, "step": 2250 }, { "epoch": 0.20201476296246437, "grad_norm": 0.5055231024236021, "learning_rate": 6.73347292850733e-06, "loss": 0.51, "step": 2251 }, { "epoch": 0.20210450741513541, "grad_norm": 0.4785164783289741, "learning_rate": 6.736464253664374e-06, "loss": 0.4806, "step": 2252 }, { "epoch": 0.20219425186780643, "grad_norm": 0.5028040763098864, "learning_rate": 6.739455578821419e-06, "loss": 0.501, "step": 2253 }, { "epoch": 0.20228399632047744, "grad_norm": 0.519908145278955, "learning_rate": 6.742446903978463e-06, "loss": 0.5306, "step": 2254 }, { "epoch": 0.20237374077314846, "grad_norm": 0.5150072186588719, "learning_rate": 6.745438229135508e-06, "loss": 0.5227, "step": 2255 }, { "epoch": 0.20246348522581947, "grad_norm": 0.5327031780722199, "learning_rate": 6.748429554292552e-06, "loss": 0.5556, "step": 2256 }, { "epoch": 0.2025532296784905, "grad_norm": 0.553795050397141, "learning_rate": 6.751420879449597e-06, "loss": 0.5746, "step": 2257 }, { "epoch": 0.20264297413116153, "grad_norm": 0.5095312297291718, "learning_rate": 6.7544122046066415e-06, "loss": 0.4697, "step": 2258 }, { "epoch": 0.20273271858383254, "grad_norm": 0.5286179247376092, "learning_rate": 6.757403529763686e-06, "loss": 0.5248, "step": 2259 }, { "epoch": 0.20282246303650356, "grad_norm": 0.5360232816153302, "learning_rate": 6.76039485492073e-06, "loss": 0.5397, "step": 2260 }, { "epoch": 0.20291220748917457, "grad_norm": 0.453102821138797, "learning_rate": 6.763386180077774e-06, "loss": 0.4331, "step": 2261 }, { "epoch": 0.2030019519418456, "grad_norm": 0.4906145817982473, "learning_rate": 6.766377505234819e-06, "loss": 0.4905, "step": 2262 }, { "epoch": 0.2030916963945166, "grad_norm": 0.5014919174190293, "learning_rate": 6.7693688303918634e-06, "loss": 0.4594, "step": 2263 }, { "epoch": 0.20318144084718764, "grad_norm": 0.5309426473745522, "learning_rate": 6.772360155548908e-06, "loss": 0.5754, "step": 2264 }, { "epoch": 0.20327118529985866, "grad_norm": 0.4868167555546439, "learning_rate": 6.7753514807059525e-06, "loss": 0.5014, "step": 2265 }, { "epoch": 0.20336092975252967, "grad_norm": 0.5320387430402643, "learning_rate": 6.778342805862997e-06, "loss": 0.5752, "step": 2266 }, { "epoch": 0.2034506742052007, "grad_norm": 0.5068270432848805, "learning_rate": 6.781334131020042e-06, "loss": 0.5567, "step": 2267 }, { "epoch": 0.2035404186578717, "grad_norm": 0.5156143601153729, "learning_rate": 6.784325456177086e-06, "loss": 0.5523, "step": 2268 }, { "epoch": 0.20363016311054272, "grad_norm": 0.5215878295920305, "learning_rate": 6.787316781334131e-06, "loss": 0.5391, "step": 2269 }, { "epoch": 0.20371990756321376, "grad_norm": 0.5250748629553444, "learning_rate": 6.790308106491176e-06, "loss": 0.5069, "step": 2270 }, { "epoch": 0.20380965201588477, "grad_norm": 0.4763911379400545, "learning_rate": 6.793299431648221e-06, "loss": 0.4782, "step": 2271 }, { "epoch": 0.2038993964685558, "grad_norm": 0.5144047754926436, "learning_rate": 6.796290756805265e-06, "loss": 0.5315, "step": 2272 }, { "epoch": 0.2039891409212268, "grad_norm": 0.4982592952560913, "learning_rate": 6.79928208196231e-06, "loss": 0.5276, "step": 2273 }, { "epoch": 0.20407888537389782, "grad_norm": 0.5214091256784551, "learning_rate": 6.802273407119354e-06, "loss": 0.5115, "step": 2274 }, { "epoch": 0.20416862982656883, "grad_norm": 0.49946773828240937, "learning_rate": 6.805264732276399e-06, "loss": 0.4867, "step": 2275 }, { "epoch": 0.20425837427923987, "grad_norm": 0.5422720564579332, "learning_rate": 6.8082560574334435e-06, "loss": 0.5736, "step": 2276 }, { "epoch": 0.2043481187319109, "grad_norm": 0.5609617135236056, "learning_rate": 6.811247382590488e-06, "loss": 0.5434, "step": 2277 }, { "epoch": 0.2044378631845819, "grad_norm": 0.4766537632964324, "learning_rate": 6.814238707747533e-06, "loss": 0.4802, "step": 2278 }, { "epoch": 0.20452760763725292, "grad_norm": 0.5023922771380845, "learning_rate": 6.817230032904577e-06, "loss": 0.5116, "step": 2279 }, { "epoch": 0.20461735208992393, "grad_norm": 0.5558188006572478, "learning_rate": 6.820221358061622e-06, "loss": 0.5336, "step": 2280 }, { "epoch": 0.20470709654259497, "grad_norm": 0.4952040382397361, "learning_rate": 6.823212683218666e-06, "loss": 0.5288, "step": 2281 }, { "epoch": 0.204796840995266, "grad_norm": 0.4936179633126169, "learning_rate": 6.826204008375711e-06, "loss": 0.4814, "step": 2282 }, { "epoch": 0.204886585447937, "grad_norm": 0.47645654655568426, "learning_rate": 6.8291953335327554e-06, "loss": 0.4542, "step": 2283 }, { "epoch": 0.20497632990060802, "grad_norm": 0.5401847172123332, "learning_rate": 6.8321866586898e-06, "loss": 0.5619, "step": 2284 }, { "epoch": 0.20506607435327903, "grad_norm": 0.4936217153851142, "learning_rate": 6.8351779838468446e-06, "loss": 0.5107, "step": 2285 }, { "epoch": 0.20515581880595005, "grad_norm": 0.4864041028080568, "learning_rate": 6.838169309003889e-06, "loss": 0.481, "step": 2286 }, { "epoch": 0.2052455632586211, "grad_norm": 0.525964769496491, "learning_rate": 6.841160634160934e-06, "loss": 0.5727, "step": 2287 }, { "epoch": 0.2053353077112921, "grad_norm": 0.5411890331122682, "learning_rate": 6.844151959317978e-06, "loss": 0.5518, "step": 2288 }, { "epoch": 0.20542505216396312, "grad_norm": 0.45342293217804613, "learning_rate": 6.847143284475023e-06, "loss": 0.4198, "step": 2289 }, { "epoch": 0.20551479661663413, "grad_norm": 0.47694522754304775, "learning_rate": 6.850134609632067e-06, "loss": 0.4541, "step": 2290 }, { "epoch": 0.20560454106930515, "grad_norm": 0.47887835177462934, "learning_rate": 6.853125934789112e-06, "loss": 0.4615, "step": 2291 }, { "epoch": 0.20569428552197616, "grad_norm": 0.481093761460287, "learning_rate": 6.8561172599461565e-06, "loss": 0.4662, "step": 2292 }, { "epoch": 0.2057840299746472, "grad_norm": 0.5225763347992387, "learning_rate": 6.859108585103201e-06, "loss": 0.5441, "step": 2293 }, { "epoch": 0.20587377442731822, "grad_norm": 0.47416563144227697, "learning_rate": 6.862099910260246e-06, "loss": 0.4725, "step": 2294 }, { "epoch": 0.20596351887998923, "grad_norm": 0.4761267614653814, "learning_rate": 6.86509123541729e-06, "loss": 0.461, "step": 2295 }, { "epoch": 0.20605326333266025, "grad_norm": 0.5247200264726528, "learning_rate": 6.868082560574335e-06, "loss": 0.5449, "step": 2296 }, { "epoch": 0.20614300778533126, "grad_norm": 0.5336054939443322, "learning_rate": 6.871073885731379e-06, "loss": 0.4862, "step": 2297 }, { "epoch": 0.20623275223800228, "grad_norm": 0.5342809976334842, "learning_rate": 6.874065210888425e-06, "loss": 0.5261, "step": 2298 }, { "epoch": 0.20632249669067332, "grad_norm": 0.5494179579779379, "learning_rate": 6.877056536045469e-06, "loss": 0.5032, "step": 2299 }, { "epoch": 0.20641224114334433, "grad_norm": 0.49840278701645174, "learning_rate": 6.880047861202514e-06, "loss": 0.5484, "step": 2300 }, { "epoch": 0.20650198559601535, "grad_norm": 0.4699689338751732, "learning_rate": 6.883039186359558e-06, "loss": 0.4948, "step": 2301 }, { "epoch": 0.20659173004868636, "grad_norm": 0.4708621505699885, "learning_rate": 6.886030511516603e-06, "loss": 0.4642, "step": 2302 }, { "epoch": 0.20668147450135738, "grad_norm": 0.4921905824347476, "learning_rate": 6.8890218366736475e-06, "loss": 0.5043, "step": 2303 }, { "epoch": 0.2067712189540284, "grad_norm": 0.4844756631894361, "learning_rate": 6.892013161830692e-06, "loss": 0.4769, "step": 2304 }, { "epoch": 0.20686096340669943, "grad_norm": 0.5840789106861662, "learning_rate": 6.8950044869877366e-06, "loss": 0.6187, "step": 2305 }, { "epoch": 0.20695070785937045, "grad_norm": 0.5365806019067907, "learning_rate": 6.897995812144781e-06, "loss": 0.4846, "step": 2306 }, { "epoch": 0.20704045231204146, "grad_norm": 0.5666462241636003, "learning_rate": 6.900987137301826e-06, "loss": 0.573, "step": 2307 }, { "epoch": 0.20713019676471248, "grad_norm": 0.5042645227640711, "learning_rate": 6.90397846245887e-06, "loss": 0.501, "step": 2308 }, { "epoch": 0.2072199412173835, "grad_norm": 0.5359390990054461, "learning_rate": 6.906969787615915e-06, "loss": 0.5801, "step": 2309 }, { "epoch": 0.20730968567005453, "grad_norm": 0.563350690259965, "learning_rate": 6.909961112772959e-06, "loss": 0.5384, "step": 2310 }, { "epoch": 0.20739943012272555, "grad_norm": 0.5120291694917731, "learning_rate": 6.912952437930004e-06, "loss": 0.5441, "step": 2311 }, { "epoch": 0.20748917457539656, "grad_norm": 0.5160903512179218, "learning_rate": 6.9159437630870485e-06, "loss": 0.4909, "step": 2312 }, { "epoch": 0.20757891902806758, "grad_norm": 0.50667041438542, "learning_rate": 6.918935088244093e-06, "loss": 0.4986, "step": 2313 }, { "epoch": 0.2076686634807386, "grad_norm": 0.6191682253689279, "learning_rate": 6.921926413401138e-06, "loss": 0.6296, "step": 2314 }, { "epoch": 0.2077584079334096, "grad_norm": 0.5115087244495171, "learning_rate": 6.924917738558182e-06, "loss": 0.5155, "step": 2315 }, { "epoch": 0.20784815238608065, "grad_norm": 0.49572330065624, "learning_rate": 6.927909063715227e-06, "loss": 0.5172, "step": 2316 }, { "epoch": 0.20793789683875166, "grad_norm": 0.559407695814295, "learning_rate": 6.930900388872271e-06, "loss": 0.5171, "step": 2317 }, { "epoch": 0.20802764129142268, "grad_norm": 0.5431605189161325, "learning_rate": 6.933891714029316e-06, "loss": 0.5408, "step": 2318 }, { "epoch": 0.2081173857440937, "grad_norm": 0.5353219861437861, "learning_rate": 6.93688303918636e-06, "loss": 0.519, "step": 2319 }, { "epoch": 0.2082071301967647, "grad_norm": 0.5360311892999995, "learning_rate": 6.939874364343405e-06, "loss": 0.5084, "step": 2320 }, { "epoch": 0.20829687464943572, "grad_norm": 0.5974446306384888, "learning_rate": 6.9428656895004495e-06, "loss": 0.5888, "step": 2321 }, { "epoch": 0.20838661910210676, "grad_norm": 0.48785904059919294, "learning_rate": 6.945857014657493e-06, "loss": 0.5104, "step": 2322 }, { "epoch": 0.20847636355477778, "grad_norm": 0.4587559921714297, "learning_rate": 6.948848339814538e-06, "loss": 0.4525, "step": 2323 }, { "epoch": 0.2085661080074488, "grad_norm": 0.4909204429987101, "learning_rate": 6.951839664971582e-06, "loss": 0.5769, "step": 2324 }, { "epoch": 0.2086558524601198, "grad_norm": 0.5080524806923449, "learning_rate": 6.954830990128627e-06, "loss": 0.548, "step": 2325 }, { "epoch": 0.20874559691279082, "grad_norm": 0.4895931932232683, "learning_rate": 6.9578223152856715e-06, "loss": 0.5291, "step": 2326 }, { "epoch": 0.20883534136546184, "grad_norm": 0.5223011484697865, "learning_rate": 6.960813640442716e-06, "loss": 0.5371, "step": 2327 }, { "epoch": 0.20892508581813288, "grad_norm": 0.5030171565021871, "learning_rate": 6.9638049655997606e-06, "loss": 0.4749, "step": 2328 }, { "epoch": 0.2090148302708039, "grad_norm": 0.4578641031848378, "learning_rate": 6.966796290756805e-06, "loss": 0.4507, "step": 2329 }, { "epoch": 0.2091045747234749, "grad_norm": 0.47553059687560884, "learning_rate": 6.96978761591385e-06, "loss": 0.469, "step": 2330 }, { "epoch": 0.20919431917614592, "grad_norm": 0.5938550753406926, "learning_rate": 6.972778941070894e-06, "loss": 0.6444, "step": 2331 }, { "epoch": 0.20928406362881694, "grad_norm": 0.5747459643584146, "learning_rate": 6.975770266227939e-06, "loss": 0.6081, "step": 2332 }, { "epoch": 0.20937380808148795, "grad_norm": 0.5185028802471288, "learning_rate": 6.978761591384983e-06, "loss": 0.5204, "step": 2333 }, { "epoch": 0.209463552534159, "grad_norm": 0.5468273160229702, "learning_rate": 6.981752916542029e-06, "loss": 0.5611, "step": 2334 }, { "epoch": 0.20955329698683, "grad_norm": 0.49589560436522395, "learning_rate": 6.984744241699073e-06, "loss": 0.5185, "step": 2335 }, { "epoch": 0.20964304143950102, "grad_norm": 0.4601629726101037, "learning_rate": 6.987735566856118e-06, "loss": 0.475, "step": 2336 }, { "epoch": 0.20973278589217204, "grad_norm": 0.503332019575992, "learning_rate": 6.9907268920131624e-06, "loss": 0.5323, "step": 2337 }, { "epoch": 0.20982253034484305, "grad_norm": 0.5245222613218979, "learning_rate": 6.993718217170207e-06, "loss": 0.525, "step": 2338 }, { "epoch": 0.2099122747975141, "grad_norm": 0.5439864182110778, "learning_rate": 6.9967095423272516e-06, "loss": 0.4887, "step": 2339 }, { "epoch": 0.2100020192501851, "grad_norm": 0.50160906278878, "learning_rate": 6.999700867484296e-06, "loss": 0.5415, "step": 2340 }, { "epoch": 0.21009176370285612, "grad_norm": 0.499905909959773, "learning_rate": 7.002692192641341e-06, "loss": 0.4712, "step": 2341 }, { "epoch": 0.21018150815552714, "grad_norm": 0.5529703726350533, "learning_rate": 7.005683517798385e-06, "loss": 0.5577, "step": 2342 }, { "epoch": 0.21027125260819815, "grad_norm": 0.4959791962354352, "learning_rate": 7.00867484295543e-06, "loss": 0.4801, "step": 2343 }, { "epoch": 0.21036099706086916, "grad_norm": 0.509738626115909, "learning_rate": 7.011666168112474e-06, "loss": 0.5127, "step": 2344 }, { "epoch": 0.2104507415135402, "grad_norm": 0.4985035714631778, "learning_rate": 7.014657493269519e-06, "loss": 0.4692, "step": 2345 }, { "epoch": 0.21054048596621122, "grad_norm": 0.5076591747604904, "learning_rate": 7.0176488184265635e-06, "loss": 0.4873, "step": 2346 }, { "epoch": 0.21063023041888224, "grad_norm": 0.5158826159944807, "learning_rate": 7.020640143583608e-06, "loss": 0.5672, "step": 2347 }, { "epoch": 0.21071997487155325, "grad_norm": 0.5271262511679351, "learning_rate": 7.023631468740653e-06, "loss": 0.4845, "step": 2348 }, { "epoch": 0.21080971932422427, "grad_norm": 0.5328320045680858, "learning_rate": 7.026622793897697e-06, "loss": 0.525, "step": 2349 }, { "epoch": 0.21089946377689528, "grad_norm": 0.47479951921213936, "learning_rate": 7.029614119054742e-06, "loss": 0.4617, "step": 2350 }, { "epoch": 0.21098920822956632, "grad_norm": 0.5150344984086085, "learning_rate": 7.032605444211786e-06, "loss": 0.5313, "step": 2351 }, { "epoch": 0.21107895268223734, "grad_norm": 0.47915720125014666, "learning_rate": 7.035596769368831e-06, "loss": 0.4698, "step": 2352 }, { "epoch": 0.21116869713490835, "grad_norm": 0.5463509471235829, "learning_rate": 7.038588094525875e-06, "loss": 0.5371, "step": 2353 }, { "epoch": 0.21125844158757937, "grad_norm": 0.562595751868324, "learning_rate": 7.04157941968292e-06, "loss": 0.6195, "step": 2354 }, { "epoch": 0.21134818604025038, "grad_norm": 0.507215543862427, "learning_rate": 7.0445707448399645e-06, "loss": 0.4845, "step": 2355 }, { "epoch": 0.2114379304929214, "grad_norm": 0.5124397527666537, "learning_rate": 7.047562069997009e-06, "loss": 0.5093, "step": 2356 }, { "epoch": 0.21152767494559244, "grad_norm": 0.5112598049127411, "learning_rate": 7.050553395154054e-06, "loss": 0.4966, "step": 2357 }, { "epoch": 0.21161741939826345, "grad_norm": 0.5502684029667128, "learning_rate": 7.053544720311098e-06, "loss": 0.5517, "step": 2358 }, { "epoch": 0.21170716385093447, "grad_norm": 0.5260454693685191, "learning_rate": 7.056536045468143e-06, "loss": 0.5116, "step": 2359 }, { "epoch": 0.21179690830360548, "grad_norm": 0.4896105514820253, "learning_rate": 7.059527370625187e-06, "loss": 0.4848, "step": 2360 }, { "epoch": 0.2118866527562765, "grad_norm": 0.5515570801452924, "learning_rate": 7.062518695782232e-06, "loss": 0.5706, "step": 2361 }, { "epoch": 0.2119763972089475, "grad_norm": 0.5587791720997002, "learning_rate": 7.065510020939277e-06, "loss": 0.544, "step": 2362 }, { "epoch": 0.21206614166161855, "grad_norm": 0.4988729494389081, "learning_rate": 7.068501346096322e-06, "loss": 0.4686, "step": 2363 }, { "epoch": 0.21215588611428957, "grad_norm": 0.4976549864043648, "learning_rate": 7.071492671253366e-06, "loss": 0.4727, "step": 2364 }, { "epoch": 0.21224563056696058, "grad_norm": 0.5543903988140968, "learning_rate": 7.074483996410411e-06, "loss": 0.4979, "step": 2365 }, { "epoch": 0.2123353750196316, "grad_norm": 0.5059746550643371, "learning_rate": 7.0774753215674555e-06, "loss": 0.5288, "step": 2366 }, { "epoch": 0.2124251194723026, "grad_norm": 0.4894257364792042, "learning_rate": 7.0804666467245e-06, "loss": 0.4733, "step": 2367 }, { "epoch": 0.21251486392497362, "grad_norm": 0.5151232306109071, "learning_rate": 7.083457971881545e-06, "loss": 0.5098, "step": 2368 }, { "epoch": 0.21260460837764467, "grad_norm": 0.47439365445224624, "learning_rate": 7.086449297038589e-06, "loss": 0.474, "step": 2369 }, { "epoch": 0.21269435283031568, "grad_norm": 0.4848307607542793, "learning_rate": 7.089440622195634e-06, "loss": 0.5546, "step": 2370 }, { "epoch": 0.2127840972829867, "grad_norm": 0.4709239679447036, "learning_rate": 7.092431947352678e-06, "loss": 0.4736, "step": 2371 }, { "epoch": 0.2128738417356577, "grad_norm": 0.48770147500539346, "learning_rate": 7.095423272509723e-06, "loss": 0.4763, "step": 2372 }, { "epoch": 0.21296358618832872, "grad_norm": 0.5364901119999737, "learning_rate": 7.098414597666767e-06, "loss": 0.5639, "step": 2373 }, { "epoch": 0.21305333064099977, "grad_norm": 0.5094946843209436, "learning_rate": 7.101405922823812e-06, "loss": 0.5105, "step": 2374 }, { "epoch": 0.21314307509367078, "grad_norm": 0.6215968611811797, "learning_rate": 7.1043972479808565e-06, "loss": 0.6437, "step": 2375 }, { "epoch": 0.2132328195463418, "grad_norm": 0.46233479503169644, "learning_rate": 7.107388573137901e-06, "loss": 0.4845, "step": 2376 }, { "epoch": 0.2133225639990128, "grad_norm": 0.5069869163770949, "learning_rate": 7.110379898294946e-06, "loss": 0.5185, "step": 2377 }, { "epoch": 0.21341230845168382, "grad_norm": 0.5229618990566022, "learning_rate": 7.11337122345199e-06, "loss": 0.5272, "step": 2378 }, { "epoch": 0.21350205290435484, "grad_norm": 0.5076545289175037, "learning_rate": 7.116362548609035e-06, "loss": 0.505, "step": 2379 }, { "epoch": 0.21359179735702588, "grad_norm": 0.4782740404548863, "learning_rate": 7.119353873766079e-06, "loss": 0.4699, "step": 2380 }, { "epoch": 0.2136815418096969, "grad_norm": 0.5040733263026079, "learning_rate": 7.122345198923124e-06, "loss": 0.5088, "step": 2381 }, { "epoch": 0.2137712862623679, "grad_norm": 0.5115567992366118, "learning_rate": 7.125336524080168e-06, "loss": 0.521, "step": 2382 }, { "epoch": 0.21386103071503892, "grad_norm": 0.4608749860678112, "learning_rate": 7.128327849237213e-06, "loss": 0.4371, "step": 2383 }, { "epoch": 0.21395077516770994, "grad_norm": 0.517037458441088, "learning_rate": 7.131319174394257e-06, "loss": 0.5408, "step": 2384 }, { "epoch": 0.21404051962038095, "grad_norm": 0.5361060345893087, "learning_rate": 7.134310499551301e-06, "loss": 0.5642, "step": 2385 }, { "epoch": 0.214130264073052, "grad_norm": 0.5453347327522869, "learning_rate": 7.137301824708346e-06, "loss": 0.5579, "step": 2386 }, { "epoch": 0.214220008525723, "grad_norm": 0.5125588918436966, "learning_rate": 7.14029314986539e-06, "loss": 0.5252, "step": 2387 }, { "epoch": 0.21430975297839402, "grad_norm": 0.4840860120094062, "learning_rate": 7.143284475022435e-06, "loss": 0.4704, "step": 2388 }, { "epoch": 0.21439949743106504, "grad_norm": 0.5398914800613631, "learning_rate": 7.1462758001794795e-06, "loss": 0.5288, "step": 2389 }, { "epoch": 0.21448924188373605, "grad_norm": 0.5100631330161489, "learning_rate": 7.149267125336524e-06, "loss": 0.5259, "step": 2390 }, { "epoch": 0.21457898633640707, "grad_norm": 0.516222436145066, "learning_rate": 7.152258450493569e-06, "loss": 0.4671, "step": 2391 }, { "epoch": 0.2146687307890781, "grad_norm": 0.5040928603094904, "learning_rate": 7.155249775650613e-06, "loss": 0.4965, "step": 2392 }, { "epoch": 0.21475847524174912, "grad_norm": 0.46551065254102464, "learning_rate": 7.158241100807658e-06, "loss": 0.4879, "step": 2393 }, { "epoch": 0.21484821969442014, "grad_norm": 0.4982393056506407, "learning_rate": 7.161232425964702e-06, "loss": 0.5583, "step": 2394 }, { "epoch": 0.21493796414709115, "grad_norm": 0.502582239772033, "learning_rate": 7.164223751121747e-06, "loss": 0.5447, "step": 2395 }, { "epoch": 0.21502770859976217, "grad_norm": 0.5489464120160742, "learning_rate": 7.167215076278791e-06, "loss": 0.5392, "step": 2396 }, { "epoch": 0.21511745305243318, "grad_norm": 0.5557254686876445, "learning_rate": 7.170206401435836e-06, "loss": 0.6273, "step": 2397 }, { "epoch": 0.21520719750510423, "grad_norm": 0.5182039034030193, "learning_rate": 7.173197726592881e-06, "loss": 0.5164, "step": 2398 }, { "epoch": 0.21529694195777524, "grad_norm": 0.5166440234724842, "learning_rate": 7.176189051749926e-06, "loss": 0.5207, "step": 2399 }, { "epoch": 0.21538668641044625, "grad_norm": 0.4959667731764824, "learning_rate": 7.1791803769069705e-06, "loss": 0.4672, "step": 2400 }, { "epoch": 0.21547643086311727, "grad_norm": 0.5285385935634146, "learning_rate": 7.182171702064015e-06, "loss": 0.5594, "step": 2401 }, { "epoch": 0.21556617531578828, "grad_norm": 0.4628681812634219, "learning_rate": 7.18516302722106e-06, "loss": 0.4924, "step": 2402 }, { "epoch": 0.21565591976845933, "grad_norm": 0.537155359925535, "learning_rate": 7.188154352378104e-06, "loss": 0.5529, "step": 2403 }, { "epoch": 0.21574566422113034, "grad_norm": 0.5108969181825307, "learning_rate": 7.191145677535149e-06, "loss": 0.5632, "step": 2404 }, { "epoch": 0.21583540867380135, "grad_norm": 0.5825279852067985, "learning_rate": 7.194137002692193e-06, "loss": 0.5968, "step": 2405 }, { "epoch": 0.21592515312647237, "grad_norm": 0.5491359972853811, "learning_rate": 7.197128327849238e-06, "loss": 0.5496, "step": 2406 }, { "epoch": 0.21601489757914338, "grad_norm": 0.49020620073489846, "learning_rate": 7.200119653006282e-06, "loss": 0.4847, "step": 2407 }, { "epoch": 0.2161046420318144, "grad_norm": 0.5048437989763501, "learning_rate": 7.203110978163327e-06, "loss": 0.5089, "step": 2408 }, { "epoch": 0.21619438648448544, "grad_norm": 0.5061875400195565, "learning_rate": 7.2061023033203715e-06, "loss": 0.5234, "step": 2409 }, { "epoch": 0.21628413093715645, "grad_norm": 0.5338243311687627, "learning_rate": 7.209093628477416e-06, "loss": 0.5338, "step": 2410 }, { "epoch": 0.21637387538982747, "grad_norm": 0.48431561285384345, "learning_rate": 7.212084953634461e-06, "loss": 0.4983, "step": 2411 }, { "epoch": 0.21646361984249848, "grad_norm": 0.5462889736597567, "learning_rate": 7.215076278791505e-06, "loss": 0.5564, "step": 2412 }, { "epoch": 0.2165533642951695, "grad_norm": 0.47662733165500937, "learning_rate": 7.21806760394855e-06, "loss": 0.4657, "step": 2413 }, { "epoch": 0.2166431087478405, "grad_norm": 0.500781397030228, "learning_rate": 7.221058929105594e-06, "loss": 0.5284, "step": 2414 }, { "epoch": 0.21673285320051155, "grad_norm": 0.5844205021511145, "learning_rate": 7.224050254262639e-06, "loss": 0.6009, "step": 2415 }, { "epoch": 0.21682259765318257, "grad_norm": 0.48620082529832637, "learning_rate": 7.227041579419683e-06, "loss": 0.5156, "step": 2416 }, { "epoch": 0.21691234210585358, "grad_norm": 0.5112073457864691, "learning_rate": 7.230032904576728e-06, "loss": 0.5192, "step": 2417 }, { "epoch": 0.2170020865585246, "grad_norm": 0.43646886368447346, "learning_rate": 7.2330242297337725e-06, "loss": 0.3841, "step": 2418 }, { "epoch": 0.2170918310111956, "grad_norm": 0.5165641943828226, "learning_rate": 7.236015554890817e-06, "loss": 0.6191, "step": 2419 }, { "epoch": 0.21718157546386663, "grad_norm": 0.4917343930109908, "learning_rate": 7.239006880047862e-06, "loss": 0.4367, "step": 2420 }, { "epoch": 0.21727131991653767, "grad_norm": 0.5102770448273435, "learning_rate": 7.241998205204906e-06, "loss": 0.5012, "step": 2421 }, { "epoch": 0.21736106436920868, "grad_norm": 0.5123919815016819, "learning_rate": 7.244989530361951e-06, "loss": 0.5243, "step": 2422 }, { "epoch": 0.2174508088218797, "grad_norm": 0.48499404053842354, "learning_rate": 7.247980855518995e-06, "loss": 0.4749, "step": 2423 }, { "epoch": 0.2175405532745507, "grad_norm": 0.5043209943969089, "learning_rate": 7.25097218067604e-06, "loss": 0.5191, "step": 2424 }, { "epoch": 0.21763029772722173, "grad_norm": 0.49450380782954195, "learning_rate": 7.253963505833084e-06, "loss": 0.4987, "step": 2425 }, { "epoch": 0.21772004217989274, "grad_norm": 0.6017997868041569, "learning_rate": 7.25695483099013e-06, "loss": 0.5709, "step": 2426 }, { "epoch": 0.21780978663256378, "grad_norm": 0.504579145101328, "learning_rate": 7.259946156147174e-06, "loss": 0.5432, "step": 2427 }, { "epoch": 0.2178995310852348, "grad_norm": 0.5120732173314707, "learning_rate": 7.262937481304219e-06, "loss": 0.5069, "step": 2428 }, { "epoch": 0.2179892755379058, "grad_norm": 0.5188239721180532, "learning_rate": 7.2659288064612635e-06, "loss": 0.533, "step": 2429 }, { "epoch": 0.21807901999057683, "grad_norm": 0.46869329450248615, "learning_rate": 7.268920131618308e-06, "loss": 0.4617, "step": 2430 }, { "epoch": 0.21816876444324784, "grad_norm": 0.5308234661478884, "learning_rate": 7.271911456775353e-06, "loss": 0.549, "step": 2431 }, { "epoch": 0.21825850889591888, "grad_norm": 0.5371386021573995, "learning_rate": 7.274902781932397e-06, "loss": 0.4995, "step": 2432 }, { "epoch": 0.2183482533485899, "grad_norm": 0.5093772199759146, "learning_rate": 7.277894107089442e-06, "loss": 0.51, "step": 2433 }, { "epoch": 0.2184379978012609, "grad_norm": 0.4951655706166377, "learning_rate": 7.280885432246486e-06, "loss": 0.4997, "step": 2434 }, { "epoch": 0.21852774225393193, "grad_norm": 0.5503039622116547, "learning_rate": 7.283876757403531e-06, "loss": 0.5687, "step": 2435 }, { "epoch": 0.21861748670660294, "grad_norm": 0.5235370103212144, "learning_rate": 7.286868082560575e-06, "loss": 0.5138, "step": 2436 }, { "epoch": 0.21870723115927396, "grad_norm": 0.4801464223392294, "learning_rate": 7.28985940771762e-06, "loss": 0.5112, "step": 2437 }, { "epoch": 0.218796975611945, "grad_norm": 0.5333584720037488, "learning_rate": 7.2928507328746645e-06, "loss": 0.592, "step": 2438 }, { "epoch": 0.218886720064616, "grad_norm": 0.4968999840600701, "learning_rate": 7.295842058031709e-06, "loss": 0.4495, "step": 2439 }, { "epoch": 0.21897646451728703, "grad_norm": 0.5320230906558366, "learning_rate": 7.298833383188754e-06, "loss": 0.5106, "step": 2440 }, { "epoch": 0.21906620896995804, "grad_norm": 0.5379099185952079, "learning_rate": 7.301824708345798e-06, "loss": 0.5502, "step": 2441 }, { "epoch": 0.21915595342262906, "grad_norm": 0.5385600915415405, "learning_rate": 7.304816033502843e-06, "loss": 0.5682, "step": 2442 }, { "epoch": 0.21924569787530007, "grad_norm": 0.47486808519548246, "learning_rate": 7.307807358659887e-06, "loss": 0.4158, "step": 2443 }, { "epoch": 0.21933544232797111, "grad_norm": 0.5289008904812659, "learning_rate": 7.310798683816932e-06, "loss": 0.5396, "step": 2444 }, { "epoch": 0.21942518678064213, "grad_norm": 0.5345984676141645, "learning_rate": 7.3137900089739764e-06, "loss": 0.5661, "step": 2445 }, { "epoch": 0.21951493123331314, "grad_norm": 0.5303459684221682, "learning_rate": 7.31678133413102e-06, "loss": 0.513, "step": 2446 }, { "epoch": 0.21960467568598416, "grad_norm": 0.5719348162908037, "learning_rate": 7.319772659288065e-06, "loss": 0.6085, "step": 2447 }, { "epoch": 0.21969442013865517, "grad_norm": 0.5042710510760312, "learning_rate": 7.322763984445109e-06, "loss": 0.4767, "step": 2448 }, { "epoch": 0.2197841645913262, "grad_norm": 0.5388230353855219, "learning_rate": 7.325755309602154e-06, "loss": 0.5914, "step": 2449 }, { "epoch": 0.21987390904399723, "grad_norm": 0.5709255243437693, "learning_rate": 7.328746634759198e-06, "loss": 0.5869, "step": 2450 }, { "epoch": 0.21996365349666824, "grad_norm": 0.5203841748051558, "learning_rate": 7.331737959916243e-06, "loss": 0.5139, "step": 2451 }, { "epoch": 0.22005339794933926, "grad_norm": 0.4931269556967296, "learning_rate": 7.3347292850732875e-06, "loss": 0.4899, "step": 2452 }, { "epoch": 0.22014314240201027, "grad_norm": 0.4701310871015154, "learning_rate": 7.337720610230332e-06, "loss": 0.4601, "step": 2453 }, { "epoch": 0.2202328868546813, "grad_norm": 0.4771643702980942, "learning_rate": 7.340711935387377e-06, "loss": 0.4897, "step": 2454 }, { "epoch": 0.2203226313073523, "grad_norm": 0.5302574106717716, "learning_rate": 7.343703260544421e-06, "loss": 0.5487, "step": 2455 }, { "epoch": 0.22041237576002334, "grad_norm": 0.453531506086748, "learning_rate": 7.346694585701466e-06, "loss": 0.42, "step": 2456 }, { "epoch": 0.22050212021269436, "grad_norm": 0.5247200384560009, "learning_rate": 7.34968591085851e-06, "loss": 0.5498, "step": 2457 }, { "epoch": 0.22059186466536537, "grad_norm": 0.5377392818267303, "learning_rate": 7.352677236015555e-06, "loss": 0.5533, "step": 2458 }, { "epoch": 0.2206816091180364, "grad_norm": 0.5226054766683849, "learning_rate": 7.355668561172599e-06, "loss": 0.5478, "step": 2459 }, { "epoch": 0.2207713535707074, "grad_norm": 0.492932494702158, "learning_rate": 7.358659886329644e-06, "loss": 0.4688, "step": 2460 }, { "epoch": 0.22086109802337844, "grad_norm": 0.5348803236142298, "learning_rate": 7.3616512114866885e-06, "loss": 0.525, "step": 2461 }, { "epoch": 0.22095084247604946, "grad_norm": 0.6028952899502501, "learning_rate": 7.364642536643734e-06, "loss": 0.5405, "step": 2462 }, { "epoch": 0.22104058692872047, "grad_norm": 0.5028166766245991, "learning_rate": 7.3676338618007785e-06, "loss": 0.5019, "step": 2463 }, { "epoch": 0.2211303313813915, "grad_norm": 0.5929277434513756, "learning_rate": 7.370625186957823e-06, "loss": 0.6071, "step": 2464 }, { "epoch": 0.2212200758340625, "grad_norm": 0.5151707212193861, "learning_rate": 7.373616512114868e-06, "loss": 0.4848, "step": 2465 }, { "epoch": 0.22130982028673352, "grad_norm": 0.5273291375354361, "learning_rate": 7.376607837271912e-06, "loss": 0.4734, "step": 2466 }, { "epoch": 0.22139956473940456, "grad_norm": 0.5209106172797698, "learning_rate": 7.379599162428957e-06, "loss": 0.4587, "step": 2467 }, { "epoch": 0.22148930919207557, "grad_norm": 0.4932376070810482, "learning_rate": 7.382590487586001e-06, "loss": 0.464, "step": 2468 }, { "epoch": 0.2215790536447466, "grad_norm": 0.5307424364879648, "learning_rate": 7.385581812743046e-06, "loss": 0.5715, "step": 2469 }, { "epoch": 0.2216687980974176, "grad_norm": 0.5240504835015339, "learning_rate": 7.38857313790009e-06, "loss": 0.4939, "step": 2470 }, { "epoch": 0.22175854255008862, "grad_norm": 0.4900838167739303, "learning_rate": 7.391564463057135e-06, "loss": 0.4636, "step": 2471 }, { "epoch": 0.22184828700275963, "grad_norm": 0.45013610561649814, "learning_rate": 7.3945557882141795e-06, "loss": 0.4433, "step": 2472 }, { "epoch": 0.22193803145543067, "grad_norm": 0.49076697034848743, "learning_rate": 7.397547113371224e-06, "loss": 0.46, "step": 2473 }, { "epoch": 0.2220277759081017, "grad_norm": 0.5738997504495783, "learning_rate": 7.400538438528269e-06, "loss": 0.5411, "step": 2474 }, { "epoch": 0.2221175203607727, "grad_norm": 0.4694562935133219, "learning_rate": 7.403529763685313e-06, "loss": 0.4587, "step": 2475 }, { "epoch": 0.22220726481344372, "grad_norm": 0.5347752443542889, "learning_rate": 7.406521088842358e-06, "loss": 0.5145, "step": 2476 }, { "epoch": 0.22229700926611473, "grad_norm": 0.47226684886287845, "learning_rate": 7.409512413999402e-06, "loss": 0.4521, "step": 2477 }, { "epoch": 0.22238675371878575, "grad_norm": 0.4924602870049594, "learning_rate": 7.412503739156447e-06, "loss": 0.5246, "step": 2478 }, { "epoch": 0.2224764981714568, "grad_norm": 0.5276045357125025, "learning_rate": 7.415495064313491e-06, "loss": 0.5352, "step": 2479 }, { "epoch": 0.2225662426241278, "grad_norm": 0.5126726146904135, "learning_rate": 7.418486389470536e-06, "loss": 0.5418, "step": 2480 }, { "epoch": 0.22265598707679882, "grad_norm": 0.5310486392980116, "learning_rate": 7.4214777146275805e-06, "loss": 0.565, "step": 2481 }, { "epoch": 0.22274573152946983, "grad_norm": 0.5344853575029647, "learning_rate": 7.424469039784625e-06, "loss": 0.5489, "step": 2482 }, { "epoch": 0.22283547598214085, "grad_norm": 0.513751220259186, "learning_rate": 7.42746036494167e-06, "loss": 0.5368, "step": 2483 }, { "epoch": 0.22292522043481186, "grad_norm": 0.492107952492094, "learning_rate": 7.430451690098714e-06, "loss": 0.4765, "step": 2484 }, { "epoch": 0.2230149648874829, "grad_norm": 0.4845874839934853, "learning_rate": 7.433443015255759e-06, "loss": 0.5365, "step": 2485 }, { "epoch": 0.22310470934015392, "grad_norm": 0.5345084114645161, "learning_rate": 7.436434340412803e-06, "loss": 0.5394, "step": 2486 }, { "epoch": 0.22319445379282493, "grad_norm": 0.4834277297634255, "learning_rate": 7.439425665569848e-06, "loss": 0.4458, "step": 2487 }, { "epoch": 0.22328419824549595, "grad_norm": 0.48959682170650703, "learning_rate": 7.4424169907268924e-06, "loss": 0.4453, "step": 2488 }, { "epoch": 0.22337394269816696, "grad_norm": 0.4797224043943875, "learning_rate": 7.445408315883937e-06, "loss": 0.5101, "step": 2489 }, { "epoch": 0.22346368715083798, "grad_norm": 0.5581958253226443, "learning_rate": 7.448399641040982e-06, "loss": 0.5621, "step": 2490 }, { "epoch": 0.22355343160350902, "grad_norm": 0.5396381680988926, "learning_rate": 7.451390966198027e-06, "loss": 0.5256, "step": 2491 }, { "epoch": 0.22364317605618003, "grad_norm": 0.5136142445237355, "learning_rate": 7.4543822913550715e-06, "loss": 0.524, "step": 2492 }, { "epoch": 0.22373292050885105, "grad_norm": 0.4745303103954961, "learning_rate": 7.457373616512116e-06, "loss": 0.5015, "step": 2493 }, { "epoch": 0.22382266496152206, "grad_norm": 0.49000110011024767, "learning_rate": 7.460364941669161e-06, "loss": 0.4741, "step": 2494 }, { "epoch": 0.22391240941419308, "grad_norm": 0.50609663065577, "learning_rate": 7.463356266826205e-06, "loss": 0.5423, "step": 2495 }, { "epoch": 0.22400215386686412, "grad_norm": 0.49808695102445333, "learning_rate": 7.46634759198325e-06, "loss": 0.505, "step": 2496 }, { "epoch": 0.22409189831953513, "grad_norm": 0.4730025531364805, "learning_rate": 7.469338917140294e-06, "loss": 0.4829, "step": 2497 }, { "epoch": 0.22418164277220615, "grad_norm": 0.5477450118432805, "learning_rate": 7.472330242297339e-06, "loss": 0.6315, "step": 2498 }, { "epoch": 0.22427138722487716, "grad_norm": 0.5292127392132661, "learning_rate": 7.4753215674543834e-06, "loss": 0.5627, "step": 2499 }, { "epoch": 0.22436113167754818, "grad_norm": 0.5257126124329897, "learning_rate": 7.478312892611428e-06, "loss": 0.5626, "step": 2500 }, { "epoch": 0.2244508761302192, "grad_norm": 0.5443035605526446, "learning_rate": 7.4813042177684725e-06, "loss": 0.4832, "step": 2501 }, { "epoch": 0.22454062058289023, "grad_norm": 0.5516377054550123, "learning_rate": 7.484295542925517e-06, "loss": 0.6219, "step": 2502 }, { "epoch": 0.22463036503556125, "grad_norm": 0.5116629379615502, "learning_rate": 7.487286868082562e-06, "loss": 0.4436, "step": 2503 }, { "epoch": 0.22472010948823226, "grad_norm": 0.5130720827843135, "learning_rate": 7.490278193239606e-06, "loss": 0.5001, "step": 2504 }, { "epoch": 0.22480985394090328, "grad_norm": 0.5419923137903294, "learning_rate": 7.493269518396651e-06, "loss": 0.5064, "step": 2505 }, { "epoch": 0.2248995983935743, "grad_norm": 0.5578410655104741, "learning_rate": 7.496260843553695e-06, "loss": 0.5733, "step": 2506 }, { "epoch": 0.2249893428462453, "grad_norm": 0.4935510624434475, "learning_rate": 7.49925216871074e-06, "loss": 0.4992, "step": 2507 }, { "epoch": 0.22507908729891635, "grad_norm": 0.5111813686539105, "learning_rate": 7.502243493867784e-06, "loss": 0.5577, "step": 2508 }, { "epoch": 0.22516883175158736, "grad_norm": 0.5037016602206871, "learning_rate": 7.505234819024828e-06, "loss": 0.5658, "step": 2509 }, { "epoch": 0.22525857620425838, "grad_norm": 0.5556897102006604, "learning_rate": 7.508226144181873e-06, "loss": 0.5278, "step": 2510 }, { "epoch": 0.2253483206569294, "grad_norm": 0.5195416534697739, "learning_rate": 7.511217469338917e-06, "loss": 0.5223, "step": 2511 }, { "epoch": 0.2254380651096004, "grad_norm": 0.5047799636312034, "learning_rate": 7.514208794495962e-06, "loss": 0.5261, "step": 2512 }, { "epoch": 0.22552780956227142, "grad_norm": 0.48122476407380466, "learning_rate": 7.517200119653006e-06, "loss": 0.4541, "step": 2513 }, { "epoch": 0.22561755401494246, "grad_norm": 0.5229781418637894, "learning_rate": 7.520191444810051e-06, "loss": 0.5193, "step": 2514 }, { "epoch": 0.22570729846761348, "grad_norm": 0.5087075253654124, "learning_rate": 7.5231827699670955e-06, "loss": 0.5495, "step": 2515 }, { "epoch": 0.2257970429202845, "grad_norm": 0.5460290559327776, "learning_rate": 7.52617409512414e-06, "loss": 0.542, "step": 2516 }, { "epoch": 0.2258867873729555, "grad_norm": 0.5159423169870445, "learning_rate": 7.529165420281185e-06, "loss": 0.5578, "step": 2517 }, { "epoch": 0.22597653182562652, "grad_norm": 0.5378752051546112, "learning_rate": 7.532156745438229e-06, "loss": 0.5506, "step": 2518 }, { "epoch": 0.22606627627829753, "grad_norm": 0.45041736920957204, "learning_rate": 7.535148070595274e-06, "loss": 0.4423, "step": 2519 }, { "epoch": 0.22615602073096858, "grad_norm": 0.4866914027789528, "learning_rate": 7.538139395752318e-06, "loss": 0.446, "step": 2520 }, { "epoch": 0.2262457651836396, "grad_norm": 0.5303155043043232, "learning_rate": 7.541130720909363e-06, "loss": 0.519, "step": 2521 }, { "epoch": 0.2263355096363106, "grad_norm": 0.5385985671229394, "learning_rate": 7.5441220460664074e-06, "loss": 0.5314, "step": 2522 }, { "epoch": 0.22642525408898162, "grad_norm": 0.4784520192951358, "learning_rate": 7.547113371223452e-06, "loss": 0.4763, "step": 2523 }, { "epoch": 0.22651499854165263, "grad_norm": 0.46669996966969834, "learning_rate": 7.5501046963804965e-06, "loss": 0.4695, "step": 2524 }, { "epoch": 0.22660474299432368, "grad_norm": 0.486703653491342, "learning_rate": 7.553096021537541e-06, "loss": 0.4837, "step": 2525 }, { "epoch": 0.2266944874469947, "grad_norm": 0.49667943045404067, "learning_rate": 7.5560873466945865e-06, "loss": 0.5154, "step": 2526 }, { "epoch": 0.2267842318996657, "grad_norm": 0.5259486777551533, "learning_rate": 7.559078671851631e-06, "loss": 0.5161, "step": 2527 }, { "epoch": 0.22687397635233672, "grad_norm": 0.5502406494224014, "learning_rate": 7.562069997008676e-06, "loss": 0.5755, "step": 2528 }, { "epoch": 0.22696372080500773, "grad_norm": 0.5247602926674125, "learning_rate": 7.56506132216572e-06, "loss": 0.5251, "step": 2529 }, { "epoch": 0.22705346525767875, "grad_norm": 0.4965264518054937, "learning_rate": 7.568052647322765e-06, "loss": 0.5265, "step": 2530 }, { "epoch": 0.2271432097103498, "grad_norm": 0.5275692193921231, "learning_rate": 7.571043972479809e-06, "loss": 0.5356, "step": 2531 }, { "epoch": 0.2272329541630208, "grad_norm": 0.5275226388187421, "learning_rate": 7.574035297636854e-06, "loss": 0.5557, "step": 2532 }, { "epoch": 0.22732269861569182, "grad_norm": 0.5223214128433803, "learning_rate": 7.577026622793898e-06, "loss": 0.5396, "step": 2533 }, { "epoch": 0.22741244306836284, "grad_norm": 0.5542847629692566, "learning_rate": 7.580017947950943e-06, "loss": 0.5544, "step": 2534 }, { "epoch": 0.22750218752103385, "grad_norm": 0.5679698302117125, "learning_rate": 7.5830092731079875e-06, "loss": 0.6141, "step": 2535 }, { "epoch": 0.22759193197370486, "grad_norm": 0.5431982743380189, "learning_rate": 7.586000598265032e-06, "loss": 0.4807, "step": 2536 }, { "epoch": 0.2276816764263759, "grad_norm": 0.5165493584513536, "learning_rate": 7.588991923422077e-06, "loss": 0.5486, "step": 2537 }, { "epoch": 0.22777142087904692, "grad_norm": 0.48174468593629355, "learning_rate": 7.591983248579121e-06, "loss": 0.4846, "step": 2538 }, { "epoch": 0.22786116533171794, "grad_norm": 0.5006684383004235, "learning_rate": 7.594974573736166e-06, "loss": 0.53, "step": 2539 }, { "epoch": 0.22795090978438895, "grad_norm": 0.5283030397209066, "learning_rate": 7.59796589889321e-06, "loss": 0.5627, "step": 2540 }, { "epoch": 0.22804065423705996, "grad_norm": 0.5190021641688672, "learning_rate": 7.600957224050255e-06, "loss": 0.5783, "step": 2541 }, { "epoch": 0.22813039868973098, "grad_norm": 0.49298693637773094, "learning_rate": 7.6039485492072994e-06, "loss": 0.4874, "step": 2542 }, { "epoch": 0.22822014314240202, "grad_norm": 0.4739408015293531, "learning_rate": 7.606939874364344e-06, "loss": 0.4612, "step": 2543 }, { "epoch": 0.22830988759507304, "grad_norm": 0.49507942736298216, "learning_rate": 7.6099311995213886e-06, "loss": 0.467, "step": 2544 }, { "epoch": 0.22839963204774405, "grad_norm": 0.497682916720323, "learning_rate": 7.612922524678433e-06, "loss": 0.4878, "step": 2545 }, { "epoch": 0.22848937650041506, "grad_norm": 0.4823730971595892, "learning_rate": 7.615913849835478e-06, "loss": 0.4733, "step": 2546 }, { "epoch": 0.22857912095308608, "grad_norm": 0.5172238048561142, "learning_rate": 7.618905174992522e-06, "loss": 0.5282, "step": 2547 }, { "epoch": 0.2286688654057571, "grad_norm": 0.4926558410298805, "learning_rate": 7.621896500149567e-06, "loss": 0.4746, "step": 2548 }, { "epoch": 0.22875860985842814, "grad_norm": 0.4729397055021519, "learning_rate": 7.624887825306611e-06, "loss": 0.4359, "step": 2549 }, { "epoch": 0.22884835431109915, "grad_norm": 0.5673666412498376, "learning_rate": 7.627879150463656e-06, "loss": 0.5553, "step": 2550 }, { "epoch": 0.22893809876377016, "grad_norm": 0.4689296743272086, "learning_rate": 7.6308704756207e-06, "loss": 0.497, "step": 2551 }, { "epoch": 0.22902784321644118, "grad_norm": 0.5208992563226621, "learning_rate": 7.633861800777745e-06, "loss": 0.503, "step": 2552 }, { "epoch": 0.2291175876691122, "grad_norm": 0.5407358074443007, "learning_rate": 7.63685312593479e-06, "loss": 0.601, "step": 2553 }, { "epoch": 0.22920733212178324, "grad_norm": 0.4805128973132208, "learning_rate": 7.639844451091834e-06, "loss": 0.4908, "step": 2554 }, { "epoch": 0.22929707657445425, "grad_norm": 0.465966967322545, "learning_rate": 7.642835776248879e-06, "loss": 0.4569, "step": 2555 }, { "epoch": 0.22938682102712527, "grad_norm": 0.47963209258498973, "learning_rate": 7.645827101405923e-06, "loss": 0.4373, "step": 2556 }, { "epoch": 0.22947656547979628, "grad_norm": 0.5041516300468007, "learning_rate": 7.648818426562968e-06, "loss": 0.4983, "step": 2557 }, { "epoch": 0.2295663099324673, "grad_norm": 0.48465892242859004, "learning_rate": 7.651809751720012e-06, "loss": 0.4775, "step": 2558 }, { "epoch": 0.2296560543851383, "grad_norm": 0.5117580987040452, "learning_rate": 7.654801076877057e-06, "loss": 0.5559, "step": 2559 }, { "epoch": 0.22974579883780935, "grad_norm": 0.5041355112873014, "learning_rate": 7.657792402034101e-06, "loss": 0.517, "step": 2560 }, { "epoch": 0.22983554329048037, "grad_norm": 0.47055821540286125, "learning_rate": 7.660783727191146e-06, "loss": 0.4694, "step": 2561 }, { "epoch": 0.22992528774315138, "grad_norm": 0.49028941851359403, "learning_rate": 7.66377505234819e-06, "loss": 0.4511, "step": 2562 }, { "epoch": 0.2300150321958224, "grad_norm": 0.49347530234235515, "learning_rate": 7.666766377505235e-06, "loss": 0.4538, "step": 2563 }, { "epoch": 0.2301047766484934, "grad_norm": 0.5304049160870685, "learning_rate": 7.66975770266228e-06, "loss": 0.5082, "step": 2564 }, { "epoch": 0.23019452110116442, "grad_norm": 0.4902972972810814, "learning_rate": 7.672749027819324e-06, "loss": 0.4912, "step": 2565 }, { "epoch": 0.23028426555383547, "grad_norm": 0.5059753645805264, "learning_rate": 7.675740352976369e-06, "loss": 0.4733, "step": 2566 }, { "epoch": 0.23037401000650648, "grad_norm": 0.5768002049263695, "learning_rate": 7.678731678133415e-06, "loss": 0.5887, "step": 2567 }, { "epoch": 0.2304637544591775, "grad_norm": 0.5587226584630455, "learning_rate": 7.68172300329046e-06, "loss": 0.5698, "step": 2568 }, { "epoch": 0.2305534989118485, "grad_norm": 0.4777433461511595, "learning_rate": 7.684714328447504e-06, "loss": 0.5, "step": 2569 }, { "epoch": 0.23064324336451952, "grad_norm": 0.44118514430493877, "learning_rate": 7.687705653604547e-06, "loss": 0.4101, "step": 2570 }, { "epoch": 0.23073298781719054, "grad_norm": 0.4990576515518502, "learning_rate": 7.690696978761592e-06, "loss": 0.4897, "step": 2571 }, { "epoch": 0.23082273226986158, "grad_norm": 0.5006647192055774, "learning_rate": 7.693688303918636e-06, "loss": 0.5298, "step": 2572 }, { "epoch": 0.2309124767225326, "grad_norm": 0.48961873847577064, "learning_rate": 7.69667962907568e-06, "loss": 0.5118, "step": 2573 }, { "epoch": 0.2310022211752036, "grad_norm": 0.5080661536030032, "learning_rate": 7.699670954232725e-06, "loss": 0.5062, "step": 2574 }, { "epoch": 0.23109196562787462, "grad_norm": 0.53504713097083, "learning_rate": 7.70266227938977e-06, "loss": 0.5286, "step": 2575 }, { "epoch": 0.23118171008054564, "grad_norm": 0.49484310253953045, "learning_rate": 7.705653604546814e-06, "loss": 0.4981, "step": 2576 }, { "epoch": 0.23127145453321665, "grad_norm": 0.5136836385502643, "learning_rate": 7.708644929703859e-06, "loss": 0.5073, "step": 2577 }, { "epoch": 0.2313611989858877, "grad_norm": 0.5186492633782226, "learning_rate": 7.711636254860904e-06, "loss": 0.557, "step": 2578 }, { "epoch": 0.2314509434385587, "grad_norm": 0.4769828906456706, "learning_rate": 7.714627580017948e-06, "loss": 0.4978, "step": 2579 }, { "epoch": 0.23154068789122972, "grad_norm": 0.48761807061965334, "learning_rate": 7.717618905174993e-06, "loss": 0.5396, "step": 2580 }, { "epoch": 0.23163043234390074, "grad_norm": 0.4955397653870579, "learning_rate": 7.720610230332037e-06, "loss": 0.4488, "step": 2581 }, { "epoch": 0.23172017679657175, "grad_norm": 0.49958812051329266, "learning_rate": 7.723601555489082e-06, "loss": 0.5108, "step": 2582 }, { "epoch": 0.23180992124924277, "grad_norm": 0.49474028514584506, "learning_rate": 7.726592880646126e-06, "loss": 0.4858, "step": 2583 }, { "epoch": 0.2318996657019138, "grad_norm": 0.48900364554663794, "learning_rate": 7.729584205803171e-06, "loss": 0.4507, "step": 2584 }, { "epoch": 0.23198941015458482, "grad_norm": 0.5152216967267844, "learning_rate": 7.732575530960215e-06, "loss": 0.5347, "step": 2585 }, { "epoch": 0.23207915460725584, "grad_norm": 0.48391209480398373, "learning_rate": 7.73556685611726e-06, "loss": 0.475, "step": 2586 }, { "epoch": 0.23216889905992685, "grad_norm": 0.4848431381495425, "learning_rate": 7.738558181274305e-06, "loss": 0.4628, "step": 2587 }, { "epoch": 0.23225864351259787, "grad_norm": 0.4993406726929989, "learning_rate": 7.741549506431349e-06, "loss": 0.496, "step": 2588 }, { "epoch": 0.2323483879652689, "grad_norm": 0.5179863756594685, "learning_rate": 7.744540831588394e-06, "loss": 0.4947, "step": 2589 }, { "epoch": 0.23243813241793992, "grad_norm": 0.5613748148119346, "learning_rate": 7.747532156745438e-06, "loss": 0.5462, "step": 2590 }, { "epoch": 0.23252787687061094, "grad_norm": 0.5622977773079868, "learning_rate": 7.750523481902483e-06, "loss": 0.5556, "step": 2591 }, { "epoch": 0.23261762132328195, "grad_norm": 0.49751582357910745, "learning_rate": 7.753514807059527e-06, "loss": 0.5354, "step": 2592 }, { "epoch": 0.23270736577595297, "grad_norm": 0.5524859567426168, "learning_rate": 7.756506132216572e-06, "loss": 0.5955, "step": 2593 }, { "epoch": 0.23279711022862398, "grad_norm": 0.5077211533448261, "learning_rate": 7.759497457373616e-06, "loss": 0.5251, "step": 2594 }, { "epoch": 0.23288685468129502, "grad_norm": 0.510477530282548, "learning_rate": 7.762488782530661e-06, "loss": 0.5413, "step": 2595 }, { "epoch": 0.23297659913396604, "grad_norm": 0.5081972788683066, "learning_rate": 7.765480107687706e-06, "loss": 0.4875, "step": 2596 }, { "epoch": 0.23306634358663705, "grad_norm": 0.5194953489518532, "learning_rate": 7.76847143284475e-06, "loss": 0.5556, "step": 2597 }, { "epoch": 0.23315608803930807, "grad_norm": 0.5116568210201612, "learning_rate": 7.771462758001795e-06, "loss": 0.5203, "step": 2598 }, { "epoch": 0.23324583249197908, "grad_norm": 0.5316079201322302, "learning_rate": 7.77445408315884e-06, "loss": 0.5165, "step": 2599 }, { "epoch": 0.2333355769446501, "grad_norm": 0.49850088370288914, "learning_rate": 7.777445408315884e-06, "loss": 0.5004, "step": 2600 }, { "epoch": 0.23342532139732114, "grad_norm": 0.4921453079884484, "learning_rate": 7.780436733472928e-06, "loss": 0.4885, "step": 2601 }, { "epoch": 0.23351506584999215, "grad_norm": 0.5253017944234011, "learning_rate": 7.783428058629973e-06, "loss": 0.5085, "step": 2602 }, { "epoch": 0.23360481030266317, "grad_norm": 0.478198025444246, "learning_rate": 7.786419383787017e-06, "loss": 0.494, "step": 2603 }, { "epoch": 0.23369455475533418, "grad_norm": 0.5203379120845496, "learning_rate": 7.789410708944064e-06, "loss": 0.5116, "step": 2604 }, { "epoch": 0.2337842992080052, "grad_norm": 0.4528727671439929, "learning_rate": 7.792402034101108e-06, "loss": 0.4403, "step": 2605 }, { "epoch": 0.2338740436606762, "grad_norm": 0.5188446473469683, "learning_rate": 7.795393359258153e-06, "loss": 0.5252, "step": 2606 }, { "epoch": 0.23396378811334725, "grad_norm": 0.518185909913706, "learning_rate": 7.798384684415197e-06, "loss": 0.5412, "step": 2607 }, { "epoch": 0.23405353256601827, "grad_norm": 0.5093592765688961, "learning_rate": 7.801376009572242e-06, "loss": 0.5317, "step": 2608 }, { "epoch": 0.23414327701868928, "grad_norm": 0.5138852031051256, "learning_rate": 7.804367334729287e-06, "loss": 0.469, "step": 2609 }, { "epoch": 0.2342330214713603, "grad_norm": 0.5360672014086832, "learning_rate": 7.807358659886331e-06, "loss": 0.5064, "step": 2610 }, { "epoch": 0.2343227659240313, "grad_norm": 0.5137970505701769, "learning_rate": 7.810349985043376e-06, "loss": 0.5345, "step": 2611 }, { "epoch": 0.23441251037670233, "grad_norm": 0.4578632931529334, "learning_rate": 7.81334131020042e-06, "loss": 0.438, "step": 2612 }, { "epoch": 0.23450225482937337, "grad_norm": 0.46099913885437205, "learning_rate": 7.816332635357465e-06, "loss": 0.4483, "step": 2613 }, { "epoch": 0.23459199928204438, "grad_norm": 0.5188178666246271, "learning_rate": 7.81932396051451e-06, "loss": 0.5147, "step": 2614 }, { "epoch": 0.2346817437347154, "grad_norm": 0.535906243024029, "learning_rate": 7.822315285671554e-06, "loss": 0.5756, "step": 2615 }, { "epoch": 0.2347714881873864, "grad_norm": 0.5404291485712397, "learning_rate": 7.825306610828598e-06, "loss": 0.5147, "step": 2616 }, { "epoch": 0.23486123264005743, "grad_norm": 0.5307804611819075, "learning_rate": 7.828297935985643e-06, "loss": 0.4703, "step": 2617 }, { "epoch": 0.23495097709272847, "grad_norm": 0.5725635359048754, "learning_rate": 7.831289261142688e-06, "loss": 0.558, "step": 2618 }, { "epoch": 0.23504072154539948, "grad_norm": 0.5002822509927556, "learning_rate": 7.834280586299732e-06, "loss": 0.5076, "step": 2619 }, { "epoch": 0.2351304659980705, "grad_norm": 0.47616207338093336, "learning_rate": 7.837271911456777e-06, "loss": 0.469, "step": 2620 }, { "epoch": 0.2352202104507415, "grad_norm": 0.4665344223122104, "learning_rate": 7.840263236613821e-06, "loss": 0.4773, "step": 2621 }, { "epoch": 0.23530995490341253, "grad_norm": 0.5190400247527625, "learning_rate": 7.843254561770866e-06, "loss": 0.5241, "step": 2622 }, { "epoch": 0.23539969935608354, "grad_norm": 0.514780684643453, "learning_rate": 7.84624588692791e-06, "loss": 0.491, "step": 2623 }, { "epoch": 0.23548944380875458, "grad_norm": 0.4908229789411219, "learning_rate": 7.849237212084955e-06, "loss": 0.4864, "step": 2624 }, { "epoch": 0.2355791882614256, "grad_norm": 0.5456523111076648, "learning_rate": 7.852228537242e-06, "loss": 0.5575, "step": 2625 }, { "epoch": 0.2356689327140966, "grad_norm": 0.5213700791795589, "learning_rate": 7.855219862399044e-06, "loss": 0.5078, "step": 2626 }, { "epoch": 0.23575867716676763, "grad_norm": 0.4991822079613157, "learning_rate": 7.858211187556089e-06, "loss": 0.4687, "step": 2627 }, { "epoch": 0.23584842161943864, "grad_norm": 0.5371766265522734, "learning_rate": 7.861202512713133e-06, "loss": 0.5607, "step": 2628 }, { "epoch": 0.23593816607210966, "grad_norm": 0.4766152987457045, "learning_rate": 7.864193837870178e-06, "loss": 0.4592, "step": 2629 }, { "epoch": 0.2360279105247807, "grad_norm": 0.49791180013448244, "learning_rate": 7.867185163027222e-06, "loss": 0.5004, "step": 2630 }, { "epoch": 0.2361176549774517, "grad_norm": 0.5207325787294367, "learning_rate": 7.870176488184267e-06, "loss": 0.5444, "step": 2631 }, { "epoch": 0.23620739943012273, "grad_norm": 0.5237434234289642, "learning_rate": 7.87316781334131e-06, "loss": 0.5422, "step": 2632 }, { "epoch": 0.23629714388279374, "grad_norm": 0.5350109495936693, "learning_rate": 7.876159138498354e-06, "loss": 0.5067, "step": 2633 }, { "epoch": 0.23638688833546476, "grad_norm": 0.5226423637688677, "learning_rate": 7.879150463655399e-06, "loss": 0.5657, "step": 2634 }, { "epoch": 0.23647663278813577, "grad_norm": 0.5225498105022979, "learning_rate": 7.882141788812443e-06, "loss": 0.5934, "step": 2635 }, { "epoch": 0.2365663772408068, "grad_norm": 0.5621181690083679, "learning_rate": 7.885133113969488e-06, "loss": 0.5269, "step": 2636 }, { "epoch": 0.23665612169347783, "grad_norm": 0.507563607921815, "learning_rate": 7.888124439126532e-06, "loss": 0.4998, "step": 2637 }, { "epoch": 0.23674586614614884, "grad_norm": 0.4871001071561844, "learning_rate": 7.891115764283577e-06, "loss": 0.4678, "step": 2638 }, { "epoch": 0.23683561059881986, "grad_norm": 0.4941598042188283, "learning_rate": 7.894107089440622e-06, "loss": 0.5162, "step": 2639 }, { "epoch": 0.23692535505149087, "grad_norm": 0.5658719474273204, "learning_rate": 7.897098414597668e-06, "loss": 0.5441, "step": 2640 }, { "epoch": 0.23701509950416189, "grad_norm": 0.48402948566769227, "learning_rate": 7.900089739754712e-06, "loss": 0.4816, "step": 2641 }, { "epoch": 0.23710484395683293, "grad_norm": 0.5175958034303508, "learning_rate": 7.903081064911757e-06, "loss": 0.5077, "step": 2642 }, { "epoch": 0.23719458840950394, "grad_norm": 0.5243113864819844, "learning_rate": 7.906072390068802e-06, "loss": 0.5008, "step": 2643 }, { "epoch": 0.23728433286217496, "grad_norm": 0.5111546382640274, "learning_rate": 7.909063715225846e-06, "loss": 0.4947, "step": 2644 }, { "epoch": 0.23737407731484597, "grad_norm": 0.5549697232049369, "learning_rate": 7.91205504038289e-06, "loss": 0.5562, "step": 2645 }, { "epoch": 0.23746382176751699, "grad_norm": 0.5715056689644618, "learning_rate": 7.915046365539935e-06, "loss": 0.5642, "step": 2646 }, { "epoch": 0.23755356622018803, "grad_norm": 0.495412332585794, "learning_rate": 7.91803769069698e-06, "loss": 0.4212, "step": 2647 }, { "epoch": 0.23764331067285904, "grad_norm": 0.5132484457913861, "learning_rate": 7.921029015854024e-06, "loss": 0.4983, "step": 2648 }, { "epoch": 0.23773305512553006, "grad_norm": 0.475239023995458, "learning_rate": 7.924020341011069e-06, "loss": 0.4823, "step": 2649 }, { "epoch": 0.23782279957820107, "grad_norm": 0.5724418780605551, "learning_rate": 7.927011666168113e-06, "loss": 0.5619, "step": 2650 }, { "epoch": 0.2379125440308721, "grad_norm": 0.49840844673147283, "learning_rate": 7.930002991325158e-06, "loss": 0.5121, "step": 2651 }, { "epoch": 0.2380022884835431, "grad_norm": 0.5100473802103372, "learning_rate": 7.932994316482203e-06, "loss": 0.5403, "step": 2652 }, { "epoch": 0.23809203293621414, "grad_norm": 0.5092819560028529, "learning_rate": 7.935985641639247e-06, "loss": 0.5429, "step": 2653 }, { "epoch": 0.23818177738888516, "grad_norm": 0.5230943382663858, "learning_rate": 7.938976966796292e-06, "loss": 0.5293, "step": 2654 }, { "epoch": 0.23827152184155617, "grad_norm": 0.47039147539099324, "learning_rate": 7.941968291953336e-06, "loss": 0.4779, "step": 2655 }, { "epoch": 0.2383612662942272, "grad_norm": 0.5301045826876641, "learning_rate": 7.94495961711038e-06, "loss": 0.5671, "step": 2656 }, { "epoch": 0.2384510107468982, "grad_norm": 0.4988636708140087, "learning_rate": 7.947950942267425e-06, "loss": 0.5019, "step": 2657 }, { "epoch": 0.23854075519956922, "grad_norm": 0.487034562293523, "learning_rate": 7.95094226742447e-06, "loss": 0.5143, "step": 2658 }, { "epoch": 0.23863049965224026, "grad_norm": 0.5633255894021977, "learning_rate": 7.953933592581514e-06, "loss": 0.58, "step": 2659 }, { "epoch": 0.23872024410491127, "grad_norm": 0.5303132093368897, "learning_rate": 7.956924917738559e-06, "loss": 0.5512, "step": 2660 }, { "epoch": 0.2388099885575823, "grad_norm": 0.4533421043107783, "learning_rate": 7.959916242895604e-06, "loss": 0.4256, "step": 2661 }, { "epoch": 0.2388997330102533, "grad_norm": 0.5572819039627739, "learning_rate": 7.962907568052648e-06, "loss": 0.5441, "step": 2662 }, { "epoch": 0.23898947746292432, "grad_norm": 0.5818642812439457, "learning_rate": 7.965898893209693e-06, "loss": 0.5448, "step": 2663 }, { "epoch": 0.23907922191559533, "grad_norm": 0.4960411030662932, "learning_rate": 7.968890218366737e-06, "loss": 0.4841, "step": 2664 }, { "epoch": 0.23916896636826637, "grad_norm": 0.5220568367312702, "learning_rate": 7.971881543523782e-06, "loss": 0.5795, "step": 2665 }, { "epoch": 0.2392587108209374, "grad_norm": 0.47791753148086064, "learning_rate": 7.974872868680826e-06, "loss": 0.4676, "step": 2666 }, { "epoch": 0.2393484552736084, "grad_norm": 0.49589481789929846, "learning_rate": 7.977864193837871e-06, "loss": 0.455, "step": 2667 }, { "epoch": 0.23943819972627942, "grad_norm": 0.5152237982823036, "learning_rate": 7.980855518994915e-06, "loss": 0.5206, "step": 2668 }, { "epoch": 0.23952794417895043, "grad_norm": 0.4975038830762605, "learning_rate": 7.98384684415196e-06, "loss": 0.5111, "step": 2669 }, { "epoch": 0.23961768863162144, "grad_norm": 0.49225080762376344, "learning_rate": 7.986838169309005e-06, "loss": 0.4777, "step": 2670 }, { "epoch": 0.2397074330842925, "grad_norm": 0.5051564531117492, "learning_rate": 7.98982949446605e-06, "loss": 0.5505, "step": 2671 }, { "epoch": 0.2397971775369635, "grad_norm": 0.49782792469855136, "learning_rate": 7.992820819623094e-06, "loss": 0.4869, "step": 2672 }, { "epoch": 0.23988692198963452, "grad_norm": 0.5124351476442579, "learning_rate": 7.995812144780138e-06, "loss": 0.5345, "step": 2673 }, { "epoch": 0.23997666644230553, "grad_norm": 0.5266728714811006, "learning_rate": 7.998803469937183e-06, "loss": 0.54, "step": 2674 }, { "epoch": 0.24006641089497655, "grad_norm": 0.48296820149033903, "learning_rate": 8.001794795094227e-06, "loss": 0.4844, "step": 2675 }, { "epoch": 0.24015615534764756, "grad_norm": 0.5004050797414936, "learning_rate": 8.004786120251272e-06, "loss": 0.4941, "step": 2676 }, { "epoch": 0.2402458998003186, "grad_norm": 0.5130619360576048, "learning_rate": 8.007777445408317e-06, "loss": 0.5035, "step": 2677 }, { "epoch": 0.24033564425298962, "grad_norm": 0.5079260922757027, "learning_rate": 8.010768770565361e-06, "loss": 0.5039, "step": 2678 }, { "epoch": 0.24042538870566063, "grad_norm": 0.4942606952080482, "learning_rate": 8.013760095722406e-06, "loss": 0.4968, "step": 2679 }, { "epoch": 0.24051513315833165, "grad_norm": 0.5803524127788783, "learning_rate": 8.01675142087945e-06, "loss": 0.5642, "step": 2680 }, { "epoch": 0.24060487761100266, "grad_norm": 0.5236637558270778, "learning_rate": 8.019742746036495e-06, "loss": 0.4839, "step": 2681 }, { "epoch": 0.2406946220636737, "grad_norm": 0.5026931806430391, "learning_rate": 8.02273407119354e-06, "loss": 0.4993, "step": 2682 }, { "epoch": 0.24078436651634472, "grad_norm": 0.5012928863751451, "learning_rate": 8.025725396350584e-06, "loss": 0.5095, "step": 2683 }, { "epoch": 0.24087411096901573, "grad_norm": 0.5166461689146016, "learning_rate": 8.028716721507628e-06, "loss": 0.5319, "step": 2684 }, { "epoch": 0.24096385542168675, "grad_norm": 0.5365207986152403, "learning_rate": 8.031708046664673e-06, "loss": 0.5354, "step": 2685 }, { "epoch": 0.24105359987435776, "grad_norm": 0.5234584309620344, "learning_rate": 8.034699371821718e-06, "loss": 0.5184, "step": 2686 }, { "epoch": 0.24114334432702877, "grad_norm": 0.5611632766206811, "learning_rate": 8.037690696978762e-06, "loss": 0.5801, "step": 2687 }, { "epoch": 0.24123308877969982, "grad_norm": 0.4799303597016165, "learning_rate": 8.040682022135807e-06, "loss": 0.4599, "step": 2688 }, { "epoch": 0.24132283323237083, "grad_norm": 0.5321255353732268, "learning_rate": 8.043673347292851e-06, "loss": 0.5454, "step": 2689 }, { "epoch": 0.24141257768504185, "grad_norm": 0.4815307465321592, "learning_rate": 8.046664672449896e-06, "loss": 0.4586, "step": 2690 }, { "epoch": 0.24150232213771286, "grad_norm": 0.4465618783895566, "learning_rate": 8.04965599760694e-06, "loss": 0.462, "step": 2691 }, { "epoch": 0.24159206659038387, "grad_norm": 0.48836385538105725, "learning_rate": 8.052647322763985e-06, "loss": 0.5076, "step": 2692 }, { "epoch": 0.2416818110430549, "grad_norm": 0.4806710926184605, "learning_rate": 8.05563864792103e-06, "loss": 0.4827, "step": 2693 }, { "epoch": 0.24177155549572593, "grad_norm": 0.5488483098171142, "learning_rate": 8.058629973078074e-06, "loss": 0.5479, "step": 2694 }, { "epoch": 0.24186129994839695, "grad_norm": 0.4978541924653008, "learning_rate": 8.061621298235119e-06, "loss": 0.4906, "step": 2695 }, { "epoch": 0.24195104440106796, "grad_norm": 0.5368029402433161, "learning_rate": 8.064612623392163e-06, "loss": 0.5623, "step": 2696 }, { "epoch": 0.24204078885373898, "grad_norm": 0.5066404459385507, "learning_rate": 8.067603948549208e-06, "loss": 0.5091, "step": 2697 }, { "epoch": 0.24213053330641, "grad_norm": 0.4773596775664869, "learning_rate": 8.070595273706252e-06, "loss": 0.5462, "step": 2698 }, { "epoch": 0.242220277759081, "grad_norm": 0.5232990082445951, "learning_rate": 8.073586598863297e-06, "loss": 0.5217, "step": 2699 }, { "epoch": 0.24231002221175205, "grad_norm": 0.49631713898881175, "learning_rate": 8.076577924020341e-06, "loss": 0.5147, "step": 2700 }, { "epoch": 0.24239976666442306, "grad_norm": 0.540984627965372, "learning_rate": 8.079569249177386e-06, "loss": 0.601, "step": 2701 }, { "epoch": 0.24248951111709408, "grad_norm": 0.534892308766349, "learning_rate": 8.08256057433443e-06, "loss": 0.5422, "step": 2702 }, { "epoch": 0.2425792555697651, "grad_norm": 0.5516475349241069, "learning_rate": 8.085551899491475e-06, "loss": 0.5382, "step": 2703 }, { "epoch": 0.2426690000224361, "grad_norm": 0.5297420678046744, "learning_rate": 8.08854322464852e-06, "loss": 0.5212, "step": 2704 }, { "epoch": 0.24275874447510712, "grad_norm": 0.5223562788294238, "learning_rate": 8.091534549805564e-06, "loss": 0.5368, "step": 2705 }, { "epoch": 0.24284848892777816, "grad_norm": 0.4743776763685702, "learning_rate": 8.094525874962609e-06, "loss": 0.4602, "step": 2706 }, { "epoch": 0.24293823338044918, "grad_norm": 0.4966340322675198, "learning_rate": 8.097517200119653e-06, "loss": 0.5052, "step": 2707 }, { "epoch": 0.2430279778331202, "grad_norm": 0.47993696984787754, "learning_rate": 8.100508525276698e-06, "loss": 0.4389, "step": 2708 }, { "epoch": 0.2431177222857912, "grad_norm": 0.5257646113335341, "learning_rate": 8.103499850433742e-06, "loss": 0.5654, "step": 2709 }, { "epoch": 0.24320746673846222, "grad_norm": 0.493735250588477, "learning_rate": 8.106491175590787e-06, "loss": 0.4652, "step": 2710 }, { "epoch": 0.24329721119113326, "grad_norm": 0.47381673410389635, "learning_rate": 8.109482500747831e-06, "loss": 0.4597, "step": 2711 }, { "epoch": 0.24338695564380428, "grad_norm": 0.5011335653304232, "learning_rate": 8.112473825904876e-06, "loss": 0.5045, "step": 2712 }, { "epoch": 0.2434767000964753, "grad_norm": 0.5349541101235756, "learning_rate": 8.11546515106192e-06, "loss": 0.5492, "step": 2713 }, { "epoch": 0.2435664445491463, "grad_norm": 0.48824741422856976, "learning_rate": 8.118456476218965e-06, "loss": 0.5098, "step": 2714 }, { "epoch": 0.24365618900181732, "grad_norm": 0.49294959826721757, "learning_rate": 8.12144780137601e-06, "loss": 0.477, "step": 2715 }, { "epoch": 0.24374593345448833, "grad_norm": 0.5448381767586323, "learning_rate": 8.124439126533054e-06, "loss": 0.6259, "step": 2716 }, { "epoch": 0.24383567790715938, "grad_norm": 0.4496739445972098, "learning_rate": 8.127430451690099e-06, "loss": 0.4181, "step": 2717 }, { "epoch": 0.2439254223598304, "grad_norm": 0.544629756488251, "learning_rate": 8.130421776847143e-06, "loss": 0.5296, "step": 2718 }, { "epoch": 0.2440151668125014, "grad_norm": 0.49221041594083265, "learning_rate": 8.133413102004188e-06, "loss": 0.4307, "step": 2719 }, { "epoch": 0.24410491126517242, "grad_norm": 0.4925282826856314, "learning_rate": 8.136404427161233e-06, "loss": 0.5152, "step": 2720 }, { "epoch": 0.24419465571784343, "grad_norm": 0.4844733109800227, "learning_rate": 8.139395752318277e-06, "loss": 0.4955, "step": 2721 }, { "epoch": 0.24428440017051445, "grad_norm": 0.4708764819180728, "learning_rate": 8.142387077475322e-06, "loss": 0.4652, "step": 2722 }, { "epoch": 0.2443741446231855, "grad_norm": 0.5101758115823714, "learning_rate": 8.145378402632366e-06, "loss": 0.5089, "step": 2723 }, { "epoch": 0.2444638890758565, "grad_norm": 0.5057547224604897, "learning_rate": 8.14836972778941e-06, "loss": 0.5338, "step": 2724 }, { "epoch": 0.24455363352852752, "grad_norm": 0.4753028996555967, "learning_rate": 8.151361052946455e-06, "loss": 0.4671, "step": 2725 }, { "epoch": 0.24464337798119853, "grad_norm": 0.5512094197704195, "learning_rate": 8.1543523781035e-06, "loss": 0.5801, "step": 2726 }, { "epoch": 0.24473312243386955, "grad_norm": 0.5409713047462457, "learning_rate": 8.157343703260544e-06, "loss": 0.5434, "step": 2727 }, { "epoch": 0.24482286688654056, "grad_norm": 0.5066180120530338, "learning_rate": 8.160335028417589e-06, "loss": 0.4734, "step": 2728 }, { "epoch": 0.2449126113392116, "grad_norm": 0.5022184656389523, "learning_rate": 8.163326353574634e-06, "loss": 0.5422, "step": 2729 }, { "epoch": 0.24500235579188262, "grad_norm": 0.511541414690284, "learning_rate": 8.166317678731678e-06, "loss": 0.5278, "step": 2730 }, { "epoch": 0.24509210024455363, "grad_norm": 0.5168901873513171, "learning_rate": 8.169309003888723e-06, "loss": 0.5471, "step": 2731 }, { "epoch": 0.24518184469722465, "grad_norm": 0.5048852649373473, "learning_rate": 8.172300329045769e-06, "loss": 0.5314, "step": 2732 }, { "epoch": 0.24527158914989566, "grad_norm": 0.5700512338116807, "learning_rate": 8.175291654202813e-06, "loss": 0.564, "step": 2733 }, { "epoch": 0.24536133360256668, "grad_norm": 0.49941744012020645, "learning_rate": 8.178282979359858e-06, "loss": 0.4965, "step": 2734 }, { "epoch": 0.24545107805523772, "grad_norm": 0.5255714383645655, "learning_rate": 8.181274304516903e-06, "loss": 0.5396, "step": 2735 }, { "epoch": 0.24554082250790873, "grad_norm": 0.4938391940663324, "learning_rate": 8.184265629673947e-06, "loss": 0.5043, "step": 2736 }, { "epoch": 0.24563056696057975, "grad_norm": 0.4628682514277479, "learning_rate": 8.187256954830992e-06, "loss": 0.4515, "step": 2737 }, { "epoch": 0.24572031141325076, "grad_norm": 0.5306743888680953, "learning_rate": 8.190248279988036e-06, "loss": 0.5133, "step": 2738 }, { "epoch": 0.24581005586592178, "grad_norm": 0.46704297590556143, "learning_rate": 8.19323960514508e-06, "loss": 0.5039, "step": 2739 }, { "epoch": 0.24589980031859282, "grad_norm": 0.48855741069081005, "learning_rate": 8.196230930302125e-06, "loss": 0.5213, "step": 2740 }, { "epoch": 0.24598954477126383, "grad_norm": 0.4946812455309229, "learning_rate": 8.19922225545917e-06, "loss": 0.4959, "step": 2741 }, { "epoch": 0.24607928922393485, "grad_norm": 0.5501586868901769, "learning_rate": 8.202213580616214e-06, "loss": 0.5957, "step": 2742 }, { "epoch": 0.24616903367660586, "grad_norm": 0.5342127264734822, "learning_rate": 8.205204905773259e-06, "loss": 0.4998, "step": 2743 }, { "epoch": 0.24625877812927688, "grad_norm": 0.5509755702597274, "learning_rate": 8.208196230930304e-06, "loss": 0.5642, "step": 2744 }, { "epoch": 0.2463485225819479, "grad_norm": 0.5082215951468423, "learning_rate": 8.211187556087348e-06, "loss": 0.5151, "step": 2745 }, { "epoch": 0.24643826703461894, "grad_norm": 0.4760188128898625, "learning_rate": 8.214178881244393e-06, "loss": 0.4677, "step": 2746 }, { "epoch": 0.24652801148728995, "grad_norm": 0.45069676586568785, "learning_rate": 8.217170206401437e-06, "loss": 0.4517, "step": 2747 }, { "epoch": 0.24661775593996096, "grad_norm": 0.48853989490908517, "learning_rate": 8.220161531558482e-06, "loss": 0.4225, "step": 2748 }, { "epoch": 0.24670750039263198, "grad_norm": 0.5184135936248299, "learning_rate": 8.223152856715526e-06, "loss": 0.4873, "step": 2749 }, { "epoch": 0.246797244845303, "grad_norm": 0.49956862416788617, "learning_rate": 8.226144181872571e-06, "loss": 0.4717, "step": 2750 }, { "epoch": 0.246886989297974, "grad_norm": 0.49358550185635847, "learning_rate": 8.229135507029616e-06, "loss": 0.5477, "step": 2751 }, { "epoch": 0.24697673375064505, "grad_norm": 0.4833702571547841, "learning_rate": 8.23212683218666e-06, "loss": 0.5021, "step": 2752 }, { "epoch": 0.24706647820331606, "grad_norm": 0.5187234821562975, "learning_rate": 8.235118157343705e-06, "loss": 0.4942, "step": 2753 }, { "epoch": 0.24715622265598708, "grad_norm": 0.557990008700461, "learning_rate": 8.23810948250075e-06, "loss": 0.6349, "step": 2754 }, { "epoch": 0.2472459671086581, "grad_norm": 0.5360015790788409, "learning_rate": 8.241100807657792e-06, "loss": 0.6023, "step": 2755 }, { "epoch": 0.2473357115613291, "grad_norm": 0.517894993201032, "learning_rate": 8.244092132814837e-06, "loss": 0.5057, "step": 2756 }, { "epoch": 0.24742545601400012, "grad_norm": 0.5302057054118305, "learning_rate": 8.247083457971881e-06, "loss": 0.5198, "step": 2757 }, { "epoch": 0.24751520046667116, "grad_norm": 0.5200736111910096, "learning_rate": 8.250074783128926e-06, "loss": 0.4828, "step": 2758 }, { "epoch": 0.24760494491934218, "grad_norm": 0.5286864923131238, "learning_rate": 8.25306610828597e-06, "loss": 0.5433, "step": 2759 }, { "epoch": 0.2476946893720132, "grad_norm": 0.5378497398703532, "learning_rate": 8.256057433443015e-06, "loss": 0.551, "step": 2760 }, { "epoch": 0.2477844338246842, "grad_norm": 0.5084799087350383, "learning_rate": 8.25904875860006e-06, "loss": 0.4884, "step": 2761 }, { "epoch": 0.24787417827735522, "grad_norm": 0.5299770337041629, "learning_rate": 8.262040083757104e-06, "loss": 0.5426, "step": 2762 }, { "epoch": 0.24796392273002624, "grad_norm": 0.5565821903394305, "learning_rate": 8.265031408914149e-06, "loss": 0.5684, "step": 2763 }, { "epoch": 0.24805366718269728, "grad_norm": 0.528519274601438, "learning_rate": 8.268022734071193e-06, "loss": 0.4775, "step": 2764 }, { "epoch": 0.2481434116353683, "grad_norm": 0.49299401859391717, "learning_rate": 8.271014059228238e-06, "loss": 0.4848, "step": 2765 }, { "epoch": 0.2482331560880393, "grad_norm": 0.5115525384212263, "learning_rate": 8.274005384385282e-06, "loss": 0.5334, "step": 2766 }, { "epoch": 0.24832290054071032, "grad_norm": 0.49302009912247236, "learning_rate": 8.276996709542327e-06, "loss": 0.4676, "step": 2767 }, { "epoch": 0.24841264499338134, "grad_norm": 0.5051611477561764, "learning_rate": 8.279988034699373e-06, "loss": 0.5115, "step": 2768 }, { "epoch": 0.24850238944605238, "grad_norm": 0.5094507490295614, "learning_rate": 8.282979359856418e-06, "loss": 0.5301, "step": 2769 }, { "epoch": 0.2485921338987234, "grad_norm": 0.5297268717186414, "learning_rate": 8.285970685013462e-06, "loss": 0.5249, "step": 2770 }, { "epoch": 0.2486818783513944, "grad_norm": 0.5510984851369112, "learning_rate": 8.288962010170507e-06, "loss": 0.5199, "step": 2771 }, { "epoch": 0.24877162280406542, "grad_norm": 0.4913157024683489, "learning_rate": 8.291953335327551e-06, "loss": 0.5348, "step": 2772 }, { "epoch": 0.24886136725673644, "grad_norm": 0.47855569045410123, "learning_rate": 8.294944660484596e-06, "loss": 0.4825, "step": 2773 }, { "epoch": 0.24895111170940745, "grad_norm": 0.5725918176008361, "learning_rate": 8.29793598564164e-06, "loss": 0.508, "step": 2774 }, { "epoch": 0.2490408561620785, "grad_norm": 0.5102728510716585, "learning_rate": 8.300927310798685e-06, "loss": 0.4635, "step": 2775 }, { "epoch": 0.2491306006147495, "grad_norm": 0.484228002870613, "learning_rate": 8.30391863595573e-06, "loss": 0.497, "step": 2776 }, { "epoch": 0.24922034506742052, "grad_norm": 0.4871003483618222, "learning_rate": 8.306909961112774e-06, "loss": 0.4911, "step": 2777 }, { "epoch": 0.24931008952009154, "grad_norm": 0.5499995546918792, "learning_rate": 8.309901286269819e-06, "loss": 0.5697, "step": 2778 }, { "epoch": 0.24939983397276255, "grad_norm": 0.48934207777082517, "learning_rate": 8.312892611426863e-06, "loss": 0.5001, "step": 2779 }, { "epoch": 0.24948957842543357, "grad_norm": 0.4884727010580838, "learning_rate": 8.315883936583908e-06, "loss": 0.4712, "step": 2780 }, { "epoch": 0.2495793228781046, "grad_norm": 0.5135356874284402, "learning_rate": 8.318875261740952e-06, "loss": 0.4973, "step": 2781 }, { "epoch": 0.24966906733077562, "grad_norm": 0.5836115730662244, "learning_rate": 8.321866586897997e-06, "loss": 0.5924, "step": 2782 }, { "epoch": 0.24975881178344664, "grad_norm": 0.4877852161023587, "learning_rate": 8.324857912055041e-06, "loss": 0.4928, "step": 2783 }, { "epoch": 0.24984855623611765, "grad_norm": 0.49635295035108123, "learning_rate": 8.327849237212086e-06, "loss": 0.5369, "step": 2784 }, { "epoch": 0.24993830068878867, "grad_norm": 0.48453783483592106, "learning_rate": 8.33084056236913e-06, "loss": 0.4418, "step": 2785 }, { "epoch": 0.2500280451414597, "grad_norm": 0.5104480377118343, "learning_rate": 8.333831887526175e-06, "loss": 0.4783, "step": 2786 }, { "epoch": 0.2501177895941307, "grad_norm": 0.5263229549216046, "learning_rate": 8.33682321268322e-06, "loss": 0.5341, "step": 2787 }, { "epoch": 0.25020753404680174, "grad_norm": 0.5983085096051924, "learning_rate": 8.339814537840264e-06, "loss": 0.5835, "step": 2788 }, { "epoch": 0.25029727849947275, "grad_norm": 0.5015328784956935, "learning_rate": 8.342805862997309e-06, "loss": 0.5238, "step": 2789 }, { "epoch": 0.25038702295214377, "grad_norm": 0.5782763693294486, "learning_rate": 8.345797188154353e-06, "loss": 0.5949, "step": 2790 }, { "epoch": 0.2504767674048148, "grad_norm": 0.4850429479482499, "learning_rate": 8.348788513311398e-06, "loss": 0.5196, "step": 2791 }, { "epoch": 0.2505665118574858, "grad_norm": 0.5034580682382593, "learning_rate": 8.351779838468442e-06, "loss": 0.4983, "step": 2792 }, { "epoch": 0.2506562563101568, "grad_norm": 0.4855080025688866, "learning_rate": 8.354771163625487e-06, "loss": 0.4678, "step": 2793 }, { "epoch": 0.2507460007628278, "grad_norm": 0.4976616905637222, "learning_rate": 8.357762488782532e-06, "loss": 0.5174, "step": 2794 }, { "epoch": 0.25083574521549884, "grad_norm": 0.5061100265953373, "learning_rate": 8.360753813939576e-06, "loss": 0.5222, "step": 2795 }, { "epoch": 0.2509254896681699, "grad_norm": 0.5645850478132594, "learning_rate": 8.36374513909662e-06, "loss": 0.5419, "step": 2796 }, { "epoch": 0.2510152341208409, "grad_norm": 0.4782712501931696, "learning_rate": 8.366736464253665e-06, "loss": 0.4552, "step": 2797 }, { "epoch": 0.25110497857351194, "grad_norm": 0.5852946202224265, "learning_rate": 8.36972778941071e-06, "loss": 0.5556, "step": 2798 }, { "epoch": 0.25119472302618295, "grad_norm": 0.5648466314156065, "learning_rate": 8.372719114567754e-06, "loss": 0.5871, "step": 2799 }, { "epoch": 0.25128446747885397, "grad_norm": 0.4397243033992216, "learning_rate": 8.375710439724799e-06, "loss": 0.4167, "step": 2800 }, { "epoch": 0.251374211931525, "grad_norm": 0.49892933663425104, "learning_rate": 8.378701764881843e-06, "loss": 0.4879, "step": 2801 }, { "epoch": 0.251463956384196, "grad_norm": 0.5282188210703449, "learning_rate": 8.381693090038888e-06, "loss": 0.5214, "step": 2802 }, { "epoch": 0.251553700836867, "grad_norm": 0.5586021077759283, "learning_rate": 8.384684415195933e-06, "loss": 0.5828, "step": 2803 }, { "epoch": 0.251643445289538, "grad_norm": 0.4680519389934608, "learning_rate": 8.387675740352977e-06, "loss": 0.4167, "step": 2804 }, { "epoch": 0.25173318974220904, "grad_norm": 0.49481096808519204, "learning_rate": 8.390667065510022e-06, "loss": 0.478, "step": 2805 }, { "epoch": 0.25182293419488005, "grad_norm": 0.5066985164993464, "learning_rate": 8.393658390667066e-06, "loss": 0.5523, "step": 2806 }, { "epoch": 0.2519126786475511, "grad_norm": 0.44654236178863926, "learning_rate": 8.39664971582411e-06, "loss": 0.4555, "step": 2807 }, { "epoch": 0.25200242310022214, "grad_norm": 0.4861524555834088, "learning_rate": 8.399641040981155e-06, "loss": 0.4428, "step": 2808 }, { "epoch": 0.25209216755289315, "grad_norm": 0.5527371815414163, "learning_rate": 8.4026323661382e-06, "loss": 0.5698, "step": 2809 }, { "epoch": 0.25218191200556417, "grad_norm": 0.528708842885586, "learning_rate": 8.405623691295244e-06, "loss": 0.5332, "step": 2810 }, { "epoch": 0.2522716564582352, "grad_norm": 0.5247463380812513, "learning_rate": 8.408615016452289e-06, "loss": 0.5195, "step": 2811 }, { "epoch": 0.2523614009109062, "grad_norm": 0.4733395438348522, "learning_rate": 8.411606341609334e-06, "loss": 0.4341, "step": 2812 }, { "epoch": 0.2524511453635772, "grad_norm": 0.5025291232979928, "learning_rate": 8.414597666766378e-06, "loss": 0.5157, "step": 2813 }, { "epoch": 0.2525408898162482, "grad_norm": 0.5179322682333817, "learning_rate": 8.417588991923423e-06, "loss": 0.4964, "step": 2814 }, { "epoch": 0.25263063426891924, "grad_norm": 0.5026997588634209, "learning_rate": 8.420580317080467e-06, "loss": 0.5011, "step": 2815 }, { "epoch": 0.25272037872159026, "grad_norm": 0.46599913840424934, "learning_rate": 8.423571642237512e-06, "loss": 0.4562, "step": 2816 }, { "epoch": 0.25281012317426127, "grad_norm": 0.4717234225868244, "learning_rate": 8.426562967394556e-06, "loss": 0.4581, "step": 2817 }, { "epoch": 0.2528998676269323, "grad_norm": 0.5233639156135367, "learning_rate": 8.429554292551601e-06, "loss": 0.4831, "step": 2818 }, { "epoch": 0.25298961207960335, "grad_norm": 0.5327062137579655, "learning_rate": 8.432545617708645e-06, "loss": 0.5606, "step": 2819 }, { "epoch": 0.25307935653227437, "grad_norm": 0.5336125545453109, "learning_rate": 8.43553694286569e-06, "loss": 0.5083, "step": 2820 }, { "epoch": 0.2531691009849454, "grad_norm": 0.47107832167669567, "learning_rate": 8.438528268022735e-06, "loss": 0.4291, "step": 2821 }, { "epoch": 0.2532588454376164, "grad_norm": 0.5031421346784803, "learning_rate": 8.44151959317978e-06, "loss": 0.4913, "step": 2822 }, { "epoch": 0.2533485898902874, "grad_norm": 0.5185827913524417, "learning_rate": 8.444510918336824e-06, "loss": 0.5226, "step": 2823 }, { "epoch": 0.2534383343429584, "grad_norm": 0.5361922312135747, "learning_rate": 8.447502243493868e-06, "loss": 0.5179, "step": 2824 }, { "epoch": 0.25352807879562944, "grad_norm": 0.5160452194923918, "learning_rate": 8.450493568650913e-06, "loss": 0.4808, "step": 2825 }, { "epoch": 0.25361782324830046, "grad_norm": 0.5450444419989532, "learning_rate": 8.453484893807957e-06, "loss": 0.5957, "step": 2826 }, { "epoch": 0.25370756770097147, "grad_norm": 0.5219909161171535, "learning_rate": 8.456476218965002e-06, "loss": 0.491, "step": 2827 }, { "epoch": 0.2537973121536425, "grad_norm": 0.5287153469961953, "learning_rate": 8.459467544122047e-06, "loss": 0.5392, "step": 2828 }, { "epoch": 0.2538870566063135, "grad_norm": 0.4908728411242387, "learning_rate": 8.462458869279091e-06, "loss": 0.531, "step": 2829 }, { "epoch": 0.25397680105898457, "grad_norm": 0.4959999544316013, "learning_rate": 8.465450194436136e-06, "loss": 0.5225, "step": 2830 }, { "epoch": 0.2540665455116556, "grad_norm": 0.46033427905922597, "learning_rate": 8.46844151959318e-06, "loss": 0.4455, "step": 2831 }, { "epoch": 0.2541562899643266, "grad_norm": 0.5218219163197889, "learning_rate": 8.471432844750225e-06, "loss": 0.4947, "step": 2832 }, { "epoch": 0.2542460344169976, "grad_norm": 0.5102445183067752, "learning_rate": 8.47442416990727e-06, "loss": 0.4863, "step": 2833 }, { "epoch": 0.2543357788696686, "grad_norm": 0.5297418964895435, "learning_rate": 8.477415495064314e-06, "loss": 0.4871, "step": 2834 }, { "epoch": 0.25442552332233964, "grad_norm": 0.500284735650434, "learning_rate": 8.480406820221358e-06, "loss": 0.4946, "step": 2835 }, { "epoch": 0.25451526777501066, "grad_norm": 0.5145391740827576, "learning_rate": 8.483398145378403e-06, "loss": 0.5209, "step": 2836 }, { "epoch": 0.25460501222768167, "grad_norm": 0.5286377788760738, "learning_rate": 8.486389470535448e-06, "loss": 0.5536, "step": 2837 }, { "epoch": 0.2546947566803527, "grad_norm": 0.47917960601624876, "learning_rate": 8.489380795692492e-06, "loss": 0.4996, "step": 2838 }, { "epoch": 0.2547845011330237, "grad_norm": 0.515987231679974, "learning_rate": 8.492372120849537e-06, "loss": 0.5074, "step": 2839 }, { "epoch": 0.2548742455856947, "grad_norm": 0.4857870457202798, "learning_rate": 8.495363446006581e-06, "loss": 0.4895, "step": 2840 }, { "epoch": 0.25496399003836573, "grad_norm": 0.4602800524602582, "learning_rate": 8.498354771163626e-06, "loss": 0.4574, "step": 2841 }, { "epoch": 0.2550537344910368, "grad_norm": 0.48764014204488176, "learning_rate": 8.50134609632067e-06, "loss": 0.523, "step": 2842 }, { "epoch": 0.2551434789437078, "grad_norm": 0.5186112800137976, "learning_rate": 8.504337421477715e-06, "loss": 0.4908, "step": 2843 }, { "epoch": 0.2552332233963788, "grad_norm": 0.4921189630213855, "learning_rate": 8.50732874663476e-06, "loss": 0.4633, "step": 2844 }, { "epoch": 0.25532296784904984, "grad_norm": 0.4786552118120954, "learning_rate": 8.510320071791804e-06, "loss": 0.4671, "step": 2845 }, { "epoch": 0.25541271230172086, "grad_norm": 0.5386558389800317, "learning_rate": 8.513311396948849e-06, "loss": 0.5429, "step": 2846 }, { "epoch": 0.25550245675439187, "grad_norm": 0.5217138115415968, "learning_rate": 8.516302722105893e-06, "loss": 0.531, "step": 2847 }, { "epoch": 0.2555922012070629, "grad_norm": 0.5148328414005304, "learning_rate": 8.519294047262938e-06, "loss": 0.5205, "step": 2848 }, { "epoch": 0.2556819456597339, "grad_norm": 0.508527633097378, "learning_rate": 8.522285372419982e-06, "loss": 0.5183, "step": 2849 }, { "epoch": 0.2557716901124049, "grad_norm": 0.5501940705938185, "learning_rate": 8.525276697577027e-06, "loss": 0.5642, "step": 2850 }, { "epoch": 0.25586143456507593, "grad_norm": 0.4796633192728576, "learning_rate": 8.528268022734071e-06, "loss": 0.4666, "step": 2851 }, { "epoch": 0.25595117901774694, "grad_norm": 0.5528822839682995, "learning_rate": 8.531259347891116e-06, "loss": 0.5931, "step": 2852 }, { "epoch": 0.25604092347041796, "grad_norm": 0.5143245442002445, "learning_rate": 8.53425067304816e-06, "loss": 0.5258, "step": 2853 }, { "epoch": 0.25613066792308903, "grad_norm": 0.5388736059234641, "learning_rate": 8.537241998205205e-06, "loss": 0.5389, "step": 2854 }, { "epoch": 0.25622041237576004, "grad_norm": 0.5290215956419049, "learning_rate": 8.54023332336225e-06, "loss": 0.5108, "step": 2855 }, { "epoch": 0.25631015682843106, "grad_norm": 0.4880696839886831, "learning_rate": 8.543224648519294e-06, "loss": 0.5046, "step": 2856 }, { "epoch": 0.25639990128110207, "grad_norm": 0.5133311295215901, "learning_rate": 8.546215973676339e-06, "loss": 0.511, "step": 2857 }, { "epoch": 0.2564896457337731, "grad_norm": 0.4964750996960577, "learning_rate": 8.549207298833383e-06, "loss": 0.4873, "step": 2858 }, { "epoch": 0.2565793901864441, "grad_norm": 0.5164309340564546, "learning_rate": 8.552198623990428e-06, "loss": 0.5109, "step": 2859 }, { "epoch": 0.2566691346391151, "grad_norm": 0.5467378617050368, "learning_rate": 8.555189949147474e-06, "loss": 0.5153, "step": 2860 }, { "epoch": 0.25675887909178613, "grad_norm": 0.5089457869064415, "learning_rate": 8.558181274304519e-06, "loss": 0.4793, "step": 2861 }, { "epoch": 0.25684862354445714, "grad_norm": 0.5612683354645658, "learning_rate": 8.561172599461563e-06, "loss": 0.6158, "step": 2862 }, { "epoch": 0.25693836799712816, "grad_norm": 0.4923099863715257, "learning_rate": 8.564163924618608e-06, "loss": 0.4861, "step": 2863 }, { "epoch": 0.2570281124497992, "grad_norm": 0.5215876070948462, "learning_rate": 8.567155249775652e-06, "loss": 0.5119, "step": 2864 }, { "epoch": 0.25711785690247024, "grad_norm": 0.5245487953540401, "learning_rate": 8.570146574932697e-06, "loss": 0.528, "step": 2865 }, { "epoch": 0.25720760135514126, "grad_norm": 0.494122570665525, "learning_rate": 8.573137900089741e-06, "loss": 0.4837, "step": 2866 }, { "epoch": 0.25729734580781227, "grad_norm": 0.5682080584507446, "learning_rate": 8.576129225246786e-06, "loss": 0.5317, "step": 2867 }, { "epoch": 0.2573870902604833, "grad_norm": 0.46192511816757603, "learning_rate": 8.57912055040383e-06, "loss": 0.4276, "step": 2868 }, { "epoch": 0.2574768347131543, "grad_norm": 0.52246931703599, "learning_rate": 8.582111875560875e-06, "loss": 0.5256, "step": 2869 }, { "epoch": 0.2575665791658253, "grad_norm": 0.4869259377530519, "learning_rate": 8.58510320071792e-06, "loss": 0.4806, "step": 2870 }, { "epoch": 0.25765632361849633, "grad_norm": 0.5443293620342529, "learning_rate": 8.588094525874964e-06, "loss": 0.5249, "step": 2871 }, { "epoch": 0.25774606807116734, "grad_norm": 0.48053982417472363, "learning_rate": 8.591085851032009e-06, "loss": 0.4813, "step": 2872 }, { "epoch": 0.25783581252383836, "grad_norm": 0.5119431188520323, "learning_rate": 8.594077176189053e-06, "loss": 0.509, "step": 2873 }, { "epoch": 0.2579255569765094, "grad_norm": 0.49168602520454674, "learning_rate": 8.597068501346098e-06, "loss": 0.5202, "step": 2874 }, { "epoch": 0.2580153014291804, "grad_norm": 0.4585045687803273, "learning_rate": 8.600059826503142e-06, "loss": 0.4377, "step": 2875 }, { "epoch": 0.2581050458818514, "grad_norm": 0.5402429339975892, "learning_rate": 8.603051151660187e-06, "loss": 0.5291, "step": 2876 }, { "epoch": 0.2581947903345225, "grad_norm": 0.5132171358022236, "learning_rate": 8.606042476817232e-06, "loss": 0.4802, "step": 2877 }, { "epoch": 0.2582845347871935, "grad_norm": 0.511460548217228, "learning_rate": 8.609033801974276e-06, "loss": 0.5152, "step": 2878 }, { "epoch": 0.2583742792398645, "grad_norm": 0.4790674859712967, "learning_rate": 8.612025127131319e-06, "loss": 0.4752, "step": 2879 }, { "epoch": 0.2584640236925355, "grad_norm": 0.6011313635706071, "learning_rate": 8.615016452288364e-06, "loss": 0.5517, "step": 2880 }, { "epoch": 0.25855376814520653, "grad_norm": 0.5092397375361403, "learning_rate": 8.618007777445408e-06, "loss": 0.5078, "step": 2881 }, { "epoch": 0.25864351259787755, "grad_norm": 0.5026215426646254, "learning_rate": 8.620999102602453e-06, "loss": 0.5159, "step": 2882 }, { "epoch": 0.25873325705054856, "grad_norm": 0.5088589460248008, "learning_rate": 8.623990427759497e-06, "loss": 0.4964, "step": 2883 }, { "epoch": 0.2588230015032196, "grad_norm": 0.4971333301553243, "learning_rate": 8.626981752916542e-06, "loss": 0.513, "step": 2884 }, { "epoch": 0.2589127459558906, "grad_norm": 0.5497124920560825, "learning_rate": 8.629973078073586e-06, "loss": 0.5741, "step": 2885 }, { "epoch": 0.2590024904085616, "grad_norm": 0.5326292574878924, "learning_rate": 8.632964403230631e-06, "loss": 0.565, "step": 2886 }, { "epoch": 0.2590922348612326, "grad_norm": 0.4774724656333151, "learning_rate": 8.635955728387675e-06, "loss": 0.4887, "step": 2887 }, { "epoch": 0.25918197931390363, "grad_norm": 0.47549680790140175, "learning_rate": 8.63894705354472e-06, "loss": 0.4401, "step": 2888 }, { "epoch": 0.2592717237665747, "grad_norm": 0.4965746143516686, "learning_rate": 8.641938378701765e-06, "loss": 0.4978, "step": 2889 }, { "epoch": 0.2593614682192457, "grad_norm": 0.4960527655525206, "learning_rate": 8.644929703858809e-06, "loss": 0.4924, "step": 2890 }, { "epoch": 0.25945121267191673, "grad_norm": 0.4888615239424536, "learning_rate": 8.647921029015854e-06, "loss": 0.5094, "step": 2891 }, { "epoch": 0.25954095712458775, "grad_norm": 0.5378365477624351, "learning_rate": 8.650912354172898e-06, "loss": 0.519, "step": 2892 }, { "epoch": 0.25963070157725876, "grad_norm": 0.5047318594477318, "learning_rate": 8.653903679329943e-06, "loss": 0.5678, "step": 2893 }, { "epoch": 0.2597204460299298, "grad_norm": 0.5304025783804852, "learning_rate": 8.656895004486987e-06, "loss": 0.4846, "step": 2894 }, { "epoch": 0.2598101904826008, "grad_norm": 0.4829920708229352, "learning_rate": 8.659886329644032e-06, "loss": 0.5055, "step": 2895 }, { "epoch": 0.2598999349352718, "grad_norm": 0.5236326464621821, "learning_rate": 8.662877654801078e-06, "loss": 0.523, "step": 2896 }, { "epoch": 0.2599896793879428, "grad_norm": 0.4718822907896483, "learning_rate": 8.665868979958123e-06, "loss": 0.4489, "step": 2897 }, { "epoch": 0.26007942384061383, "grad_norm": 0.5228008102600121, "learning_rate": 8.668860305115167e-06, "loss": 0.5431, "step": 2898 }, { "epoch": 0.26016916829328485, "grad_norm": 0.5036497073439424, "learning_rate": 8.671851630272212e-06, "loss": 0.5186, "step": 2899 }, { "epoch": 0.2602589127459559, "grad_norm": 0.4943097665287827, "learning_rate": 8.674842955429256e-06, "loss": 0.4608, "step": 2900 }, { "epoch": 0.26034865719862693, "grad_norm": 0.5357743028712425, "learning_rate": 8.677834280586301e-06, "loss": 0.4989, "step": 2901 }, { "epoch": 0.26043840165129795, "grad_norm": 0.5027697079798538, "learning_rate": 8.680825605743346e-06, "loss": 0.544, "step": 2902 }, { "epoch": 0.26052814610396896, "grad_norm": 0.548340921158327, "learning_rate": 8.68381693090039e-06, "loss": 0.6292, "step": 2903 }, { "epoch": 0.26061789055664, "grad_norm": 0.4882051544956675, "learning_rate": 8.686808256057435e-06, "loss": 0.5228, "step": 2904 }, { "epoch": 0.260707635009311, "grad_norm": 0.5272946049625589, "learning_rate": 8.68979958121448e-06, "loss": 0.5044, "step": 2905 }, { "epoch": 0.260797379461982, "grad_norm": 0.5167000214123212, "learning_rate": 8.692790906371524e-06, "loss": 0.5711, "step": 2906 }, { "epoch": 0.260887123914653, "grad_norm": 0.4610543232756048, "learning_rate": 8.695782231528568e-06, "loss": 0.4182, "step": 2907 }, { "epoch": 0.26097686836732403, "grad_norm": 0.4872384361732273, "learning_rate": 8.698773556685613e-06, "loss": 0.4853, "step": 2908 }, { "epoch": 0.26106661281999505, "grad_norm": 0.49710641298318187, "learning_rate": 8.701764881842657e-06, "loss": 0.5363, "step": 2909 }, { "epoch": 0.26115635727266606, "grad_norm": 0.5365247477662903, "learning_rate": 8.704756206999702e-06, "loss": 0.5333, "step": 2910 }, { "epoch": 0.2612461017253371, "grad_norm": 0.5705704620128044, "learning_rate": 8.707747532156747e-06, "loss": 0.5596, "step": 2911 }, { "epoch": 0.26133584617800815, "grad_norm": 0.5213683336174575, "learning_rate": 8.710738857313791e-06, "loss": 0.545, "step": 2912 }, { "epoch": 0.26142559063067916, "grad_norm": 0.48357317691590135, "learning_rate": 8.713730182470836e-06, "loss": 0.488, "step": 2913 }, { "epoch": 0.2615153350833502, "grad_norm": 0.49765555571074876, "learning_rate": 8.71672150762788e-06, "loss": 0.4798, "step": 2914 }, { "epoch": 0.2616050795360212, "grad_norm": 0.475742584696798, "learning_rate": 8.719712832784925e-06, "loss": 0.4021, "step": 2915 }, { "epoch": 0.2616948239886922, "grad_norm": 0.4645881780994618, "learning_rate": 8.72270415794197e-06, "loss": 0.4769, "step": 2916 }, { "epoch": 0.2617845684413632, "grad_norm": 0.49599839647478294, "learning_rate": 8.725695483099014e-06, "loss": 0.4845, "step": 2917 }, { "epoch": 0.26187431289403423, "grad_norm": 0.5142391146017278, "learning_rate": 8.728686808256058e-06, "loss": 0.5331, "step": 2918 }, { "epoch": 0.26196405734670525, "grad_norm": 0.5005894410962769, "learning_rate": 8.731678133413103e-06, "loss": 0.4861, "step": 2919 }, { "epoch": 0.26205380179937626, "grad_norm": 0.5399067516905862, "learning_rate": 8.734669458570148e-06, "loss": 0.5373, "step": 2920 }, { "epoch": 0.2621435462520473, "grad_norm": 0.5219038787213514, "learning_rate": 8.737660783727192e-06, "loss": 0.5134, "step": 2921 }, { "epoch": 0.2622332907047183, "grad_norm": 0.5220930324659118, "learning_rate": 8.740652108884237e-06, "loss": 0.5412, "step": 2922 }, { "epoch": 0.26232303515738936, "grad_norm": 0.5597822108267021, "learning_rate": 8.743643434041281e-06, "loss": 0.6054, "step": 2923 }, { "epoch": 0.2624127796100604, "grad_norm": 0.4909992249646132, "learning_rate": 8.746634759198326e-06, "loss": 0.4705, "step": 2924 }, { "epoch": 0.2625025240627314, "grad_norm": 0.4966070116448876, "learning_rate": 8.74962608435537e-06, "loss": 0.5082, "step": 2925 }, { "epoch": 0.2625922685154024, "grad_norm": 0.4686424786531185, "learning_rate": 8.752617409512415e-06, "loss": 0.4653, "step": 2926 }, { "epoch": 0.2626820129680734, "grad_norm": 0.535143692454291, "learning_rate": 8.75560873466946e-06, "loss": 0.5045, "step": 2927 }, { "epoch": 0.26277175742074443, "grad_norm": 0.4805845868045109, "learning_rate": 8.758600059826504e-06, "loss": 0.5036, "step": 2928 }, { "epoch": 0.26286150187341545, "grad_norm": 0.5114201410103373, "learning_rate": 8.761591384983549e-06, "loss": 0.5295, "step": 2929 }, { "epoch": 0.26295124632608646, "grad_norm": 0.5103558516808507, "learning_rate": 8.764582710140593e-06, "loss": 0.5043, "step": 2930 }, { "epoch": 0.2630409907787575, "grad_norm": 0.5212625618739358, "learning_rate": 8.767574035297638e-06, "loss": 0.5541, "step": 2931 }, { "epoch": 0.2631307352314285, "grad_norm": 0.5396407817584472, "learning_rate": 8.770565360454682e-06, "loss": 0.5955, "step": 2932 }, { "epoch": 0.2632204796840995, "grad_norm": 0.5410036665662256, "learning_rate": 8.773556685611727e-06, "loss": 0.5706, "step": 2933 }, { "epoch": 0.2633102241367705, "grad_norm": 0.4983180993363171, "learning_rate": 8.776548010768771e-06, "loss": 0.539, "step": 2934 }, { "epoch": 0.2633999685894416, "grad_norm": 0.47782619686097977, "learning_rate": 8.779539335925816e-06, "loss": 0.46, "step": 2935 }, { "epoch": 0.2634897130421126, "grad_norm": 0.5051098533513202, "learning_rate": 8.78253066108286e-06, "loss": 0.5132, "step": 2936 }, { "epoch": 0.2635794574947836, "grad_norm": 0.5085855317915484, "learning_rate": 8.785521986239905e-06, "loss": 0.5548, "step": 2937 }, { "epoch": 0.26366920194745463, "grad_norm": 0.4901015773283156, "learning_rate": 8.78851331139695e-06, "loss": 0.4717, "step": 2938 }, { "epoch": 0.26375894640012565, "grad_norm": 0.4814884487554232, "learning_rate": 8.791504636553994e-06, "loss": 0.5189, "step": 2939 }, { "epoch": 0.26384869085279666, "grad_norm": 0.49898580718434876, "learning_rate": 8.794495961711039e-06, "loss": 0.4836, "step": 2940 }, { "epoch": 0.2639384353054677, "grad_norm": 0.5256841510494555, "learning_rate": 8.797487286868083e-06, "loss": 0.5314, "step": 2941 }, { "epoch": 0.2640281797581387, "grad_norm": 0.49080715235904787, "learning_rate": 8.800478612025128e-06, "loss": 0.4939, "step": 2942 }, { "epoch": 0.2641179242108097, "grad_norm": 0.5384828195773345, "learning_rate": 8.803469937182172e-06, "loss": 0.5784, "step": 2943 }, { "epoch": 0.2642076686634807, "grad_norm": 0.5086062693060774, "learning_rate": 8.806461262339217e-06, "loss": 0.5211, "step": 2944 }, { "epoch": 0.26429741311615174, "grad_norm": 0.4758841939168293, "learning_rate": 8.809452587496262e-06, "loss": 0.4617, "step": 2945 }, { "epoch": 0.26438715756882275, "grad_norm": 0.4976102951100595, "learning_rate": 8.812443912653306e-06, "loss": 0.5148, "step": 2946 }, { "epoch": 0.2644769020214938, "grad_norm": 0.4734742383120719, "learning_rate": 8.81543523781035e-06, "loss": 0.4751, "step": 2947 }, { "epoch": 0.26456664647416483, "grad_norm": 0.543051208948959, "learning_rate": 8.818426562967395e-06, "loss": 0.5696, "step": 2948 }, { "epoch": 0.26465639092683585, "grad_norm": 0.49395464278470047, "learning_rate": 8.82141788812444e-06, "loss": 0.4759, "step": 2949 }, { "epoch": 0.26474613537950686, "grad_norm": 0.5273321373627721, "learning_rate": 8.824409213281484e-06, "loss": 0.5276, "step": 2950 }, { "epoch": 0.2648358798321779, "grad_norm": 0.5178714840853154, "learning_rate": 8.827400538438529e-06, "loss": 0.5226, "step": 2951 }, { "epoch": 0.2649256242848489, "grad_norm": 0.4720065743456212, "learning_rate": 8.830391863595573e-06, "loss": 0.4725, "step": 2952 }, { "epoch": 0.2650153687375199, "grad_norm": 0.47101977169721043, "learning_rate": 8.833383188752618e-06, "loss": 0.5101, "step": 2953 }, { "epoch": 0.2651051131901909, "grad_norm": 0.49304842944039373, "learning_rate": 8.836374513909663e-06, "loss": 0.4869, "step": 2954 }, { "epoch": 0.26519485764286194, "grad_norm": 0.48999622540120863, "learning_rate": 8.839365839066707e-06, "loss": 0.5003, "step": 2955 }, { "epoch": 0.26528460209553295, "grad_norm": 0.5221813277072096, "learning_rate": 8.842357164223752e-06, "loss": 0.5038, "step": 2956 }, { "epoch": 0.26537434654820397, "grad_norm": 0.5077126155833304, "learning_rate": 8.845348489380796e-06, "loss": 0.4829, "step": 2957 }, { "epoch": 0.26546409100087504, "grad_norm": 0.5013093183770896, "learning_rate": 8.84833981453784e-06, "loss": 0.4965, "step": 2958 }, { "epoch": 0.26555383545354605, "grad_norm": 0.5507179283943682, "learning_rate": 8.851331139694885e-06, "loss": 0.5583, "step": 2959 }, { "epoch": 0.26564357990621706, "grad_norm": 0.5009732316954963, "learning_rate": 8.85432246485193e-06, "loss": 0.5127, "step": 2960 }, { "epoch": 0.2657333243588881, "grad_norm": 0.5292061646555096, "learning_rate": 8.857313790008974e-06, "loss": 0.5316, "step": 2961 }, { "epoch": 0.2658230688115591, "grad_norm": 0.48044251128102133, "learning_rate": 8.860305115166019e-06, "loss": 0.4519, "step": 2962 }, { "epoch": 0.2659128132642301, "grad_norm": 0.5274019073932832, "learning_rate": 8.863296440323064e-06, "loss": 0.523, "step": 2963 }, { "epoch": 0.2660025577169011, "grad_norm": 0.5919578571064797, "learning_rate": 8.866287765480108e-06, "loss": 0.67, "step": 2964 }, { "epoch": 0.26609230216957214, "grad_norm": 0.5480619538114423, "learning_rate": 8.869279090637153e-06, "loss": 0.5506, "step": 2965 }, { "epoch": 0.26618204662224315, "grad_norm": 0.5382954675084214, "learning_rate": 8.872270415794197e-06, "loss": 0.541, "step": 2966 }, { "epoch": 0.26627179107491417, "grad_norm": 0.49525535107556334, "learning_rate": 8.875261740951242e-06, "loss": 0.5221, "step": 2967 }, { "epoch": 0.2663615355275852, "grad_norm": 0.4454367923907484, "learning_rate": 8.878253066108286e-06, "loss": 0.4569, "step": 2968 }, { "epoch": 0.2664512799802562, "grad_norm": 0.5420455623158311, "learning_rate": 8.881244391265331e-06, "loss": 0.5226, "step": 2969 }, { "epoch": 0.26654102443292726, "grad_norm": 0.4740623446997653, "learning_rate": 8.884235716422375e-06, "loss": 0.5021, "step": 2970 }, { "epoch": 0.2666307688855983, "grad_norm": 0.5284919614663995, "learning_rate": 8.88722704157942e-06, "loss": 0.5757, "step": 2971 }, { "epoch": 0.2667205133382693, "grad_norm": 0.492166007294812, "learning_rate": 8.890218366736465e-06, "loss": 0.4828, "step": 2972 }, { "epoch": 0.2668102577909403, "grad_norm": 0.5048157763894242, "learning_rate": 8.89320969189351e-06, "loss": 0.5015, "step": 2973 }, { "epoch": 0.2669000022436113, "grad_norm": 0.5131146595760113, "learning_rate": 8.896201017050554e-06, "loss": 0.5191, "step": 2974 }, { "epoch": 0.26698974669628234, "grad_norm": 0.4460471017647455, "learning_rate": 8.899192342207598e-06, "loss": 0.4313, "step": 2975 }, { "epoch": 0.26707949114895335, "grad_norm": 0.47671939569607097, "learning_rate": 8.902183667364643e-06, "loss": 0.4075, "step": 2976 }, { "epoch": 0.26716923560162437, "grad_norm": 0.45492574550891435, "learning_rate": 8.905174992521687e-06, "loss": 0.4793, "step": 2977 }, { "epoch": 0.2672589800542954, "grad_norm": 0.5473299132391233, "learning_rate": 8.908166317678732e-06, "loss": 0.5105, "step": 2978 }, { "epoch": 0.2673487245069664, "grad_norm": 0.5592385912175062, "learning_rate": 8.911157642835777e-06, "loss": 0.6119, "step": 2979 }, { "epoch": 0.2674384689596374, "grad_norm": 0.5858321509272467, "learning_rate": 8.914148967992821e-06, "loss": 0.5578, "step": 2980 }, { "epoch": 0.2675282134123084, "grad_norm": 0.46638183896170304, "learning_rate": 8.917140293149866e-06, "loss": 0.4789, "step": 2981 }, { "epoch": 0.2676179578649795, "grad_norm": 0.5219556694284573, "learning_rate": 8.92013161830691e-06, "loss": 0.5089, "step": 2982 }, { "epoch": 0.2677077023176505, "grad_norm": 0.49335350005491, "learning_rate": 8.923122943463955e-06, "loss": 0.5107, "step": 2983 }, { "epoch": 0.2677974467703215, "grad_norm": 0.5220237844096514, "learning_rate": 8.926114268621e-06, "loss": 0.4969, "step": 2984 }, { "epoch": 0.26788719122299254, "grad_norm": 0.4623178108450916, "learning_rate": 8.929105593778044e-06, "loss": 0.4212, "step": 2985 }, { "epoch": 0.26797693567566355, "grad_norm": 0.5356001410114101, "learning_rate": 8.932096918935088e-06, "loss": 0.4999, "step": 2986 }, { "epoch": 0.26806668012833457, "grad_norm": 0.5893099187696116, "learning_rate": 8.935088244092135e-06, "loss": 0.5895, "step": 2987 }, { "epoch": 0.2681564245810056, "grad_norm": 0.5195175917891165, "learning_rate": 8.93807956924918e-06, "loss": 0.5347, "step": 2988 }, { "epoch": 0.2682461690336766, "grad_norm": 0.520364232695316, "learning_rate": 8.941070894406224e-06, "loss": 0.5138, "step": 2989 }, { "epoch": 0.2683359134863476, "grad_norm": 0.5141854900698034, "learning_rate": 8.944062219563268e-06, "loss": 0.5197, "step": 2990 }, { "epoch": 0.2684256579390186, "grad_norm": 0.467494053463128, "learning_rate": 8.947053544720313e-06, "loss": 0.4699, "step": 2991 }, { "epoch": 0.26851540239168964, "grad_norm": 0.5025266299167928, "learning_rate": 8.950044869877357e-06, "loss": 0.4825, "step": 2992 }, { "epoch": 0.2686051468443607, "grad_norm": 0.48595595198245634, "learning_rate": 8.953036195034402e-06, "loss": 0.4655, "step": 2993 }, { "epoch": 0.2686948912970317, "grad_norm": 0.4935243366953539, "learning_rate": 8.956027520191447e-06, "loss": 0.4741, "step": 2994 }, { "epoch": 0.26878463574970274, "grad_norm": 0.48264557130027524, "learning_rate": 8.959018845348491e-06, "loss": 0.4401, "step": 2995 }, { "epoch": 0.26887438020237375, "grad_norm": 0.466754522576973, "learning_rate": 8.962010170505536e-06, "loss": 0.4987, "step": 2996 }, { "epoch": 0.26896412465504477, "grad_norm": 0.5374857002316131, "learning_rate": 8.96500149566258e-06, "loss": 0.5851, "step": 2997 }, { "epoch": 0.2690538691077158, "grad_norm": 0.5136054092364734, "learning_rate": 8.967992820819625e-06, "loss": 0.5411, "step": 2998 }, { "epoch": 0.2691436135603868, "grad_norm": 0.6190007184500732, "learning_rate": 8.97098414597667e-06, "loss": 0.5391, "step": 2999 }, { "epoch": 0.2692333580130578, "grad_norm": 0.5019289413113623, "learning_rate": 8.973975471133714e-06, "loss": 0.4911, "step": 3000 }, { "epoch": 0.2693231024657288, "grad_norm": 0.5187264638637513, "learning_rate": 8.976966796290758e-06, "loss": 0.5006, "step": 3001 }, { "epoch": 0.26941284691839984, "grad_norm": 0.573325638725442, "learning_rate": 8.979958121447803e-06, "loss": 0.5457, "step": 3002 }, { "epoch": 0.26950259137107085, "grad_norm": 0.5893905922596345, "learning_rate": 8.982949446604846e-06, "loss": 0.5466, "step": 3003 }, { "epoch": 0.26959233582374187, "grad_norm": 0.5111172044128918, "learning_rate": 8.98594077176189e-06, "loss": 0.5673, "step": 3004 }, { "epoch": 0.26968208027641294, "grad_norm": 0.4980718434387555, "learning_rate": 8.988932096918935e-06, "loss": 0.4984, "step": 3005 }, { "epoch": 0.26977182472908395, "grad_norm": 0.5278638948749275, "learning_rate": 8.99192342207598e-06, "loss": 0.4834, "step": 3006 }, { "epoch": 0.26986156918175497, "grad_norm": 0.5152492226890479, "learning_rate": 8.994914747233024e-06, "loss": 0.4705, "step": 3007 }, { "epoch": 0.269951313634426, "grad_norm": 0.5287071768030969, "learning_rate": 8.997906072390069e-06, "loss": 0.5237, "step": 3008 }, { "epoch": 0.270041058087097, "grad_norm": 0.5115395811546268, "learning_rate": 9.000897397547113e-06, "loss": 0.5137, "step": 3009 }, { "epoch": 0.270130802539768, "grad_norm": 0.5008932915820621, "learning_rate": 9.003888722704158e-06, "loss": 0.5117, "step": 3010 }, { "epoch": 0.270220546992439, "grad_norm": 0.5032149568563613, "learning_rate": 9.006880047861202e-06, "loss": 0.4658, "step": 3011 }, { "epoch": 0.27031029144511004, "grad_norm": 0.48729872663265134, "learning_rate": 9.009871373018247e-06, "loss": 0.5168, "step": 3012 }, { "epoch": 0.27040003589778105, "grad_norm": 0.49792996119045, "learning_rate": 9.012862698175291e-06, "loss": 0.4904, "step": 3013 }, { "epoch": 0.27048978035045207, "grad_norm": 0.5227661512588389, "learning_rate": 9.015854023332336e-06, "loss": 0.4994, "step": 3014 }, { "epoch": 0.2705795248031231, "grad_norm": 0.5066156420935568, "learning_rate": 9.01884534848938e-06, "loss": 0.4835, "step": 3015 }, { "epoch": 0.27066926925579415, "grad_norm": 0.48824667541208255, "learning_rate": 9.021836673646425e-06, "loss": 0.4847, "step": 3016 }, { "epoch": 0.27075901370846517, "grad_norm": 0.5382270520206265, "learning_rate": 9.02482799880347e-06, "loss": 0.4988, "step": 3017 }, { "epoch": 0.2708487581611362, "grad_norm": 0.5290978370892503, "learning_rate": 9.027819323960514e-06, "loss": 0.5602, "step": 3018 }, { "epoch": 0.2709385026138072, "grad_norm": 0.5027099376298924, "learning_rate": 9.030810649117559e-06, "loss": 0.5188, "step": 3019 }, { "epoch": 0.2710282470664782, "grad_norm": 0.48850478436084466, "learning_rate": 9.033801974274603e-06, "loss": 0.4877, "step": 3020 }, { "epoch": 0.2711179915191492, "grad_norm": 0.5009836801613978, "learning_rate": 9.036793299431648e-06, "loss": 0.5102, "step": 3021 }, { "epoch": 0.27120773597182024, "grad_norm": 0.4669117443439128, "learning_rate": 9.039784624588693e-06, "loss": 0.4408, "step": 3022 }, { "epoch": 0.27129748042449126, "grad_norm": 0.5261005352396443, "learning_rate": 9.042775949745737e-06, "loss": 0.5446, "step": 3023 }, { "epoch": 0.27138722487716227, "grad_norm": 0.5064610315441175, "learning_rate": 9.045767274902783e-06, "loss": 0.4595, "step": 3024 }, { "epoch": 0.2714769693298333, "grad_norm": 0.5163970891718981, "learning_rate": 9.048758600059828e-06, "loss": 0.4943, "step": 3025 }, { "epoch": 0.2715667137825043, "grad_norm": 0.4926676040974909, "learning_rate": 9.051749925216872e-06, "loss": 0.4566, "step": 3026 }, { "epoch": 0.2716564582351753, "grad_norm": 0.472256643979082, "learning_rate": 9.054741250373917e-06, "loss": 0.4607, "step": 3027 }, { "epoch": 0.2717462026878464, "grad_norm": 0.5458828452031057, "learning_rate": 9.057732575530962e-06, "loss": 0.5142, "step": 3028 }, { "epoch": 0.2718359471405174, "grad_norm": 0.46738897561886683, "learning_rate": 9.060723900688006e-06, "loss": 0.4446, "step": 3029 }, { "epoch": 0.2719256915931884, "grad_norm": 0.5002109244114685, "learning_rate": 9.06371522584505e-06, "loss": 0.4639, "step": 3030 }, { "epoch": 0.2720154360458594, "grad_norm": 0.49263287352033497, "learning_rate": 9.066706551002095e-06, "loss": 0.5173, "step": 3031 }, { "epoch": 0.27210518049853044, "grad_norm": 0.5225675486529893, "learning_rate": 9.06969787615914e-06, "loss": 0.4924, "step": 3032 }, { "epoch": 0.27219492495120146, "grad_norm": 0.4617217589789307, "learning_rate": 9.072689201316184e-06, "loss": 0.4506, "step": 3033 }, { "epoch": 0.27228466940387247, "grad_norm": 0.5262503199924142, "learning_rate": 9.075680526473229e-06, "loss": 0.5468, "step": 3034 }, { "epoch": 0.2723744138565435, "grad_norm": 0.5285571341659548, "learning_rate": 9.078671851630273e-06, "loss": 0.5532, "step": 3035 }, { "epoch": 0.2724641583092145, "grad_norm": 0.5465845081378992, "learning_rate": 9.081663176787318e-06, "loss": 0.5173, "step": 3036 }, { "epoch": 0.2725539027618855, "grad_norm": 0.5715874508173172, "learning_rate": 9.084654501944363e-06, "loss": 0.6296, "step": 3037 }, { "epoch": 0.27264364721455653, "grad_norm": 0.47938624243306655, "learning_rate": 9.087645827101407e-06, "loss": 0.4468, "step": 3038 }, { "epoch": 0.27273339166722754, "grad_norm": 0.48132240938034665, "learning_rate": 9.090637152258452e-06, "loss": 0.4472, "step": 3039 }, { "epoch": 0.2728231361198986, "grad_norm": 0.5022425085621914, "learning_rate": 9.093628477415496e-06, "loss": 0.4856, "step": 3040 }, { "epoch": 0.2729128805725696, "grad_norm": 0.4674276697794192, "learning_rate": 9.09661980257254e-06, "loss": 0.442, "step": 3041 }, { "epoch": 0.27300262502524064, "grad_norm": 0.4722450257572174, "learning_rate": 9.099611127729585e-06, "loss": 0.5166, "step": 3042 }, { "epoch": 0.27309236947791166, "grad_norm": 0.5300715381624115, "learning_rate": 9.10260245288663e-06, "loss": 0.5532, "step": 3043 }, { "epoch": 0.27318211393058267, "grad_norm": 0.4865322990799193, "learning_rate": 9.105593778043674e-06, "loss": 0.4533, "step": 3044 }, { "epoch": 0.2732718583832537, "grad_norm": 0.5156661940644699, "learning_rate": 9.108585103200719e-06, "loss": 0.5472, "step": 3045 }, { "epoch": 0.2733616028359247, "grad_norm": 0.46939936021821677, "learning_rate": 9.111576428357764e-06, "loss": 0.4352, "step": 3046 }, { "epoch": 0.2734513472885957, "grad_norm": 0.5386812081592939, "learning_rate": 9.114567753514808e-06, "loss": 0.5717, "step": 3047 }, { "epoch": 0.27354109174126673, "grad_norm": 0.520233984264009, "learning_rate": 9.117559078671853e-06, "loss": 0.514, "step": 3048 }, { "epoch": 0.27363083619393774, "grad_norm": 0.6188260465577804, "learning_rate": 9.120550403828897e-06, "loss": 0.678, "step": 3049 }, { "epoch": 0.27372058064660876, "grad_norm": 0.48830117452339694, "learning_rate": 9.123541728985942e-06, "loss": 0.4897, "step": 3050 }, { "epoch": 0.2738103250992798, "grad_norm": 0.47654063123769236, "learning_rate": 9.126533054142986e-06, "loss": 0.4974, "step": 3051 }, { "epoch": 0.27390006955195084, "grad_norm": 0.4848756772769522, "learning_rate": 9.129524379300031e-06, "loss": 0.5027, "step": 3052 }, { "epoch": 0.27398981400462186, "grad_norm": 0.5203543991585807, "learning_rate": 9.132515704457076e-06, "loss": 0.5475, "step": 3053 }, { "epoch": 0.27407955845729287, "grad_norm": 0.5399563869878697, "learning_rate": 9.13550702961412e-06, "loss": 0.5296, "step": 3054 }, { "epoch": 0.2741693029099639, "grad_norm": 0.4639244180912348, "learning_rate": 9.138498354771165e-06, "loss": 0.4608, "step": 3055 }, { "epoch": 0.2742590473626349, "grad_norm": 0.47738176157109896, "learning_rate": 9.14148967992821e-06, "loss": 0.4867, "step": 3056 }, { "epoch": 0.2743487918153059, "grad_norm": 0.545950927057399, "learning_rate": 9.144481005085254e-06, "loss": 0.546, "step": 3057 }, { "epoch": 0.27443853626797693, "grad_norm": 0.4975022571285766, "learning_rate": 9.147472330242298e-06, "loss": 0.4814, "step": 3058 }, { "epoch": 0.27452828072064794, "grad_norm": 0.576649201949623, "learning_rate": 9.150463655399343e-06, "loss": 0.5968, "step": 3059 }, { "epoch": 0.27461802517331896, "grad_norm": 0.47323652230950974, "learning_rate": 9.153454980556387e-06, "loss": 0.434, "step": 3060 }, { "epoch": 0.27470776962599, "grad_norm": 0.5426893313066411, "learning_rate": 9.156446305713432e-06, "loss": 0.5435, "step": 3061 }, { "epoch": 0.274797514078661, "grad_norm": 0.5123180035853493, "learning_rate": 9.159437630870477e-06, "loss": 0.5111, "step": 3062 }, { "epoch": 0.27488725853133206, "grad_norm": 0.5067612763170108, "learning_rate": 9.162428956027521e-06, "loss": 0.5204, "step": 3063 }, { "epoch": 0.27497700298400307, "grad_norm": 0.5212869199474579, "learning_rate": 9.165420281184566e-06, "loss": 0.5203, "step": 3064 }, { "epoch": 0.2750667474366741, "grad_norm": 0.4836802496027896, "learning_rate": 9.16841160634161e-06, "loss": 0.5062, "step": 3065 }, { "epoch": 0.2751564918893451, "grad_norm": 0.5586663615374894, "learning_rate": 9.171402931498655e-06, "loss": 0.5684, "step": 3066 }, { "epoch": 0.2752462363420161, "grad_norm": 0.5014261177567134, "learning_rate": 9.1743942566557e-06, "loss": 0.4729, "step": 3067 }, { "epoch": 0.27533598079468713, "grad_norm": 0.5118132629248058, "learning_rate": 9.177385581812744e-06, "loss": 0.4591, "step": 3068 }, { "epoch": 0.27542572524735814, "grad_norm": 0.5061244817039148, "learning_rate": 9.180376906969788e-06, "loss": 0.4708, "step": 3069 }, { "epoch": 0.27551546970002916, "grad_norm": 0.5506397212570857, "learning_rate": 9.183368232126833e-06, "loss": 0.5729, "step": 3070 }, { "epoch": 0.2756052141527002, "grad_norm": 0.5327720286745551, "learning_rate": 9.186359557283878e-06, "loss": 0.5385, "step": 3071 }, { "epoch": 0.2756949586053712, "grad_norm": 0.49342382681273617, "learning_rate": 9.189350882440922e-06, "loss": 0.4801, "step": 3072 }, { "epoch": 0.2757847030580422, "grad_norm": 0.5200285464155947, "learning_rate": 9.192342207597967e-06, "loss": 0.4891, "step": 3073 }, { "epoch": 0.2758744475107132, "grad_norm": 0.46341959894422124, "learning_rate": 9.195333532755011e-06, "loss": 0.463, "step": 3074 }, { "epoch": 0.2759641919633843, "grad_norm": 0.5273640125607041, "learning_rate": 9.198324857912056e-06, "loss": 0.4865, "step": 3075 }, { "epoch": 0.2760539364160553, "grad_norm": 0.5075106234969781, "learning_rate": 9.2013161830691e-06, "loss": 0.4921, "step": 3076 }, { "epoch": 0.2761436808687263, "grad_norm": 0.5227733323637915, "learning_rate": 9.204307508226145e-06, "loss": 0.4551, "step": 3077 }, { "epoch": 0.27623342532139733, "grad_norm": 0.4862133461403023, "learning_rate": 9.20729883338319e-06, "loss": 0.4949, "step": 3078 }, { "epoch": 0.27632316977406834, "grad_norm": 0.5006392594223774, "learning_rate": 9.210290158540234e-06, "loss": 0.4689, "step": 3079 }, { "epoch": 0.27641291422673936, "grad_norm": 0.5006722808070265, "learning_rate": 9.213281483697279e-06, "loss": 0.4724, "step": 3080 }, { "epoch": 0.2765026586794104, "grad_norm": 0.52845028066443, "learning_rate": 9.216272808854323e-06, "loss": 0.5311, "step": 3081 }, { "epoch": 0.2765924031320814, "grad_norm": 0.5507477572056203, "learning_rate": 9.219264134011368e-06, "loss": 0.6139, "step": 3082 }, { "epoch": 0.2766821475847524, "grad_norm": 0.5112584080512735, "learning_rate": 9.222255459168412e-06, "loss": 0.492, "step": 3083 }, { "epoch": 0.2767718920374234, "grad_norm": 0.48401237844817135, "learning_rate": 9.225246784325457e-06, "loss": 0.5107, "step": 3084 }, { "epoch": 0.27686163649009443, "grad_norm": 0.5048405893670994, "learning_rate": 9.228238109482501e-06, "loss": 0.4871, "step": 3085 }, { "epoch": 0.2769513809427655, "grad_norm": 0.5183660067253235, "learning_rate": 9.231229434639546e-06, "loss": 0.5333, "step": 3086 }, { "epoch": 0.2770411253954365, "grad_norm": 0.5390721784372137, "learning_rate": 9.23422075979659e-06, "loss": 0.5234, "step": 3087 }, { "epoch": 0.27713086984810753, "grad_norm": 0.49755050096675835, "learning_rate": 9.237212084953635e-06, "loss": 0.4934, "step": 3088 }, { "epoch": 0.27722061430077855, "grad_norm": 0.4969697414715136, "learning_rate": 9.24020341011068e-06, "loss": 0.4945, "step": 3089 }, { "epoch": 0.27731035875344956, "grad_norm": 0.538773251612943, "learning_rate": 9.243194735267724e-06, "loss": 0.5336, "step": 3090 }, { "epoch": 0.2774001032061206, "grad_norm": 0.4740363877020283, "learning_rate": 9.246186060424769e-06, "loss": 0.4633, "step": 3091 }, { "epoch": 0.2774898476587916, "grad_norm": 0.4897932844598919, "learning_rate": 9.249177385581813e-06, "loss": 0.5197, "step": 3092 }, { "epoch": 0.2775795921114626, "grad_norm": 0.5383552948968378, "learning_rate": 9.252168710738858e-06, "loss": 0.5085, "step": 3093 }, { "epoch": 0.2776693365641336, "grad_norm": 0.4962756786825904, "learning_rate": 9.255160035895902e-06, "loss": 0.4694, "step": 3094 }, { "epoch": 0.27775908101680463, "grad_norm": 0.4485725282009316, "learning_rate": 9.258151361052947e-06, "loss": 0.4026, "step": 3095 }, { "epoch": 0.27784882546947565, "grad_norm": 0.5299536757902791, "learning_rate": 9.261142686209992e-06, "loss": 0.4799, "step": 3096 }, { "epoch": 0.27793856992214666, "grad_norm": 0.4699002561083527, "learning_rate": 9.264134011367036e-06, "loss": 0.4438, "step": 3097 }, { "epoch": 0.27802831437481773, "grad_norm": 0.4553487929341454, "learning_rate": 9.26712533652408e-06, "loss": 0.4474, "step": 3098 }, { "epoch": 0.27811805882748875, "grad_norm": 0.5215293099103742, "learning_rate": 9.270116661681125e-06, "loss": 0.5024, "step": 3099 }, { "epoch": 0.27820780328015976, "grad_norm": 0.4925059746408255, "learning_rate": 9.27310798683817e-06, "loss": 0.4635, "step": 3100 }, { "epoch": 0.2782975477328308, "grad_norm": 0.482587800293708, "learning_rate": 9.276099311995214e-06, "loss": 0.453, "step": 3101 }, { "epoch": 0.2783872921855018, "grad_norm": 0.506070101785249, "learning_rate": 9.279090637152259e-06, "loss": 0.5202, "step": 3102 }, { "epoch": 0.2784770366381728, "grad_norm": 0.5012870823767581, "learning_rate": 9.282081962309303e-06, "loss": 0.4941, "step": 3103 }, { "epoch": 0.2785667810908438, "grad_norm": 0.4900002233707917, "learning_rate": 9.285073287466348e-06, "loss": 0.4478, "step": 3104 }, { "epoch": 0.27865652554351483, "grad_norm": 0.5088674622951646, "learning_rate": 9.288064612623393e-06, "loss": 0.5387, "step": 3105 }, { "epoch": 0.27874626999618585, "grad_norm": 0.47861789052863424, "learning_rate": 9.291055937780437e-06, "loss": 0.443, "step": 3106 }, { "epoch": 0.27883601444885686, "grad_norm": 0.495771059355825, "learning_rate": 9.294047262937482e-06, "loss": 0.5068, "step": 3107 }, { "epoch": 0.2789257589015279, "grad_norm": 0.4969222690005518, "learning_rate": 9.297038588094526e-06, "loss": 0.4548, "step": 3108 }, { "epoch": 0.27901550335419895, "grad_norm": 0.5550510628719094, "learning_rate": 9.30002991325157e-06, "loss": 0.5777, "step": 3109 }, { "epoch": 0.27910524780686996, "grad_norm": 0.5499845215529513, "learning_rate": 9.303021238408615e-06, "loss": 0.569, "step": 3110 }, { "epoch": 0.279194992259541, "grad_norm": 0.48051333388633016, "learning_rate": 9.30601256356566e-06, "loss": 0.4727, "step": 3111 }, { "epoch": 0.279284736712212, "grad_norm": 0.4810365165804643, "learning_rate": 9.309003888722704e-06, "loss": 0.4294, "step": 3112 }, { "epoch": 0.279374481164883, "grad_norm": 0.5250830053712369, "learning_rate": 9.311995213879749e-06, "loss": 0.5782, "step": 3113 }, { "epoch": 0.279464225617554, "grad_norm": 0.5386516281292296, "learning_rate": 9.314986539036794e-06, "loss": 0.509, "step": 3114 }, { "epoch": 0.27955397007022503, "grad_norm": 0.4753893770091844, "learning_rate": 9.31797786419384e-06, "loss": 0.4786, "step": 3115 }, { "epoch": 0.27964371452289605, "grad_norm": 0.5620062014416373, "learning_rate": 9.320969189350884e-06, "loss": 0.565, "step": 3116 }, { "epoch": 0.27973345897556706, "grad_norm": 0.535355520507953, "learning_rate": 9.323960514507929e-06, "loss": 0.5431, "step": 3117 }, { "epoch": 0.2798232034282381, "grad_norm": 0.4830018815808689, "learning_rate": 9.326951839664974e-06, "loss": 0.4744, "step": 3118 }, { "epoch": 0.2799129478809091, "grad_norm": 0.5378063713304428, "learning_rate": 9.329943164822018e-06, "loss": 0.5106, "step": 3119 }, { "epoch": 0.2800026923335801, "grad_norm": 0.5732918846239965, "learning_rate": 9.332934489979063e-06, "loss": 0.5182, "step": 3120 }, { "epoch": 0.2800924367862512, "grad_norm": 0.47365850119758107, "learning_rate": 9.335925815136107e-06, "loss": 0.4584, "step": 3121 }, { "epoch": 0.2801821812389222, "grad_norm": 0.49684384228760015, "learning_rate": 9.338917140293152e-06, "loss": 0.4924, "step": 3122 }, { "epoch": 0.2802719256915932, "grad_norm": 0.4846054338243885, "learning_rate": 9.341908465450196e-06, "loss": 0.4685, "step": 3123 }, { "epoch": 0.2803616701442642, "grad_norm": 0.4617058065740434, "learning_rate": 9.344899790607241e-06, "loss": 0.4049, "step": 3124 }, { "epoch": 0.28045141459693523, "grad_norm": 0.551246157388876, "learning_rate": 9.347891115764285e-06, "loss": 0.5614, "step": 3125 }, { "epoch": 0.28054115904960625, "grad_norm": 0.47178517777350454, "learning_rate": 9.35088244092133e-06, "loss": 0.4988, "step": 3126 }, { "epoch": 0.28063090350227726, "grad_norm": 0.5436588078917712, "learning_rate": 9.353873766078373e-06, "loss": 0.5095, "step": 3127 }, { "epoch": 0.2807206479549483, "grad_norm": 0.5546548237257194, "learning_rate": 9.356865091235417e-06, "loss": 0.5228, "step": 3128 }, { "epoch": 0.2808103924076193, "grad_norm": 0.47879503778947036, "learning_rate": 9.359856416392462e-06, "loss": 0.4447, "step": 3129 }, { "epoch": 0.2809001368602903, "grad_norm": 0.5649076931253924, "learning_rate": 9.362847741549507e-06, "loss": 0.5632, "step": 3130 }, { "epoch": 0.2809898813129613, "grad_norm": 0.47883977899647756, "learning_rate": 9.365839066706551e-06, "loss": 0.4479, "step": 3131 }, { "epoch": 0.28107962576563233, "grad_norm": 0.4555880146957813, "learning_rate": 9.368830391863596e-06, "loss": 0.4186, "step": 3132 }, { "epoch": 0.2811693702183034, "grad_norm": 0.4630366825426274, "learning_rate": 9.37182171702064e-06, "loss": 0.4566, "step": 3133 }, { "epoch": 0.2812591146709744, "grad_norm": 0.5231218258344037, "learning_rate": 9.374813042177685e-06, "loss": 0.5095, "step": 3134 }, { "epoch": 0.28134885912364543, "grad_norm": 0.47166903973139646, "learning_rate": 9.37780436733473e-06, "loss": 0.4704, "step": 3135 }, { "epoch": 0.28143860357631645, "grad_norm": 0.49023277169378304, "learning_rate": 9.380795692491774e-06, "loss": 0.5128, "step": 3136 }, { "epoch": 0.28152834802898746, "grad_norm": 0.4786919934042048, "learning_rate": 9.383787017648818e-06, "loss": 0.4656, "step": 3137 }, { "epoch": 0.2816180924816585, "grad_norm": 0.5517956136122709, "learning_rate": 9.386778342805863e-06, "loss": 0.5539, "step": 3138 }, { "epoch": 0.2817078369343295, "grad_norm": 0.4607145043704011, "learning_rate": 9.389769667962908e-06, "loss": 0.4475, "step": 3139 }, { "epoch": 0.2817975813870005, "grad_norm": 0.5230156903256563, "learning_rate": 9.392760993119952e-06, "loss": 0.5122, "step": 3140 }, { "epoch": 0.2818873258396715, "grad_norm": 0.488434937822265, "learning_rate": 9.395752318276997e-06, "loss": 0.4734, "step": 3141 }, { "epoch": 0.28197707029234254, "grad_norm": 0.5794313076980709, "learning_rate": 9.398743643434041e-06, "loss": 0.6233, "step": 3142 }, { "epoch": 0.28206681474501355, "grad_norm": 0.4973117591341386, "learning_rate": 9.401734968591086e-06, "loss": 0.5186, "step": 3143 }, { "epoch": 0.2821565591976846, "grad_norm": 0.45693799532318313, "learning_rate": 9.40472629374813e-06, "loss": 0.4555, "step": 3144 }, { "epoch": 0.28224630365035563, "grad_norm": 0.49425957604425375, "learning_rate": 9.407717618905175e-06, "loss": 0.4375, "step": 3145 }, { "epoch": 0.28233604810302665, "grad_norm": 0.5549107041506678, "learning_rate": 9.41070894406222e-06, "loss": 0.5333, "step": 3146 }, { "epoch": 0.28242579255569766, "grad_norm": 0.5134858291227653, "learning_rate": 9.413700269219264e-06, "loss": 0.4724, "step": 3147 }, { "epoch": 0.2825155370083687, "grad_norm": 0.5251674872654389, "learning_rate": 9.416691594376309e-06, "loss": 0.5059, "step": 3148 }, { "epoch": 0.2826052814610397, "grad_norm": 0.5268343993259791, "learning_rate": 9.419682919533353e-06, "loss": 0.5624, "step": 3149 }, { "epoch": 0.2826950259137107, "grad_norm": 0.5558496845359434, "learning_rate": 9.422674244690398e-06, "loss": 0.5791, "step": 3150 }, { "epoch": 0.2827847703663817, "grad_norm": 0.5321531086531154, "learning_rate": 9.425665569847442e-06, "loss": 0.5211, "step": 3151 }, { "epoch": 0.28287451481905274, "grad_norm": 0.5084368023200062, "learning_rate": 9.428656895004488e-06, "loss": 0.459, "step": 3152 }, { "epoch": 0.28296425927172375, "grad_norm": 0.46155126924346596, "learning_rate": 9.431648220161533e-06, "loss": 0.4682, "step": 3153 }, { "epoch": 0.28305400372439476, "grad_norm": 0.49304101112405946, "learning_rate": 9.434639545318578e-06, "loss": 0.485, "step": 3154 }, { "epoch": 0.2831437481770658, "grad_norm": 0.5758313419511638, "learning_rate": 9.437630870475622e-06, "loss": 0.5924, "step": 3155 }, { "epoch": 0.28323349262973685, "grad_norm": 0.5279549821175017, "learning_rate": 9.440622195632667e-06, "loss": 0.5488, "step": 3156 }, { "epoch": 0.28332323708240786, "grad_norm": 0.5327916710670163, "learning_rate": 9.443613520789711e-06, "loss": 0.5254, "step": 3157 }, { "epoch": 0.2834129815350789, "grad_norm": 0.4899844431209292, "learning_rate": 9.446604845946756e-06, "loss": 0.5096, "step": 3158 }, { "epoch": 0.2835027259877499, "grad_norm": 0.5090498377980944, "learning_rate": 9.4495961711038e-06, "loss": 0.5421, "step": 3159 }, { "epoch": 0.2835924704404209, "grad_norm": 0.4682483509474357, "learning_rate": 9.452587496260845e-06, "loss": 0.4785, "step": 3160 }, { "epoch": 0.2836822148930919, "grad_norm": 0.502018552444294, "learning_rate": 9.45557882141789e-06, "loss": 0.4827, "step": 3161 }, { "epoch": 0.28377195934576294, "grad_norm": 0.49422131122634794, "learning_rate": 9.458570146574934e-06, "loss": 0.4879, "step": 3162 }, { "epoch": 0.28386170379843395, "grad_norm": 0.5209242921389614, "learning_rate": 9.461561471731979e-06, "loss": 0.495, "step": 3163 }, { "epoch": 0.28395144825110497, "grad_norm": 0.4724787213128039, "learning_rate": 9.464552796889023e-06, "loss": 0.4192, "step": 3164 }, { "epoch": 0.284041192703776, "grad_norm": 0.5813018692102088, "learning_rate": 9.467544122046068e-06, "loss": 0.5239, "step": 3165 }, { "epoch": 0.284130937156447, "grad_norm": 0.5328775053066611, "learning_rate": 9.470535447203112e-06, "loss": 0.5443, "step": 3166 }, { "epoch": 0.284220681609118, "grad_norm": 0.5418228490611291, "learning_rate": 9.473526772360157e-06, "loss": 0.5909, "step": 3167 }, { "epoch": 0.2843104260617891, "grad_norm": 0.4849686314777892, "learning_rate": 9.476518097517201e-06, "loss": 0.4822, "step": 3168 }, { "epoch": 0.2844001705144601, "grad_norm": 0.48917607653616213, "learning_rate": 9.479509422674246e-06, "loss": 0.4705, "step": 3169 }, { "epoch": 0.2844899149671311, "grad_norm": 0.5687772106536308, "learning_rate": 9.48250074783129e-06, "loss": 0.5458, "step": 3170 }, { "epoch": 0.2845796594198021, "grad_norm": 0.4999386947746123, "learning_rate": 9.485492072988335e-06, "loss": 0.4786, "step": 3171 }, { "epoch": 0.28466940387247314, "grad_norm": 0.4831606061554445, "learning_rate": 9.48848339814538e-06, "loss": 0.4744, "step": 3172 }, { "epoch": 0.28475914832514415, "grad_norm": 0.5437014910729541, "learning_rate": 9.491474723302424e-06, "loss": 0.5897, "step": 3173 }, { "epoch": 0.28484889277781517, "grad_norm": 0.47654401737212837, "learning_rate": 9.494466048459469e-06, "loss": 0.4949, "step": 3174 }, { "epoch": 0.2849386372304862, "grad_norm": 0.5134715320633888, "learning_rate": 9.497457373616513e-06, "loss": 0.5097, "step": 3175 }, { "epoch": 0.2850283816831572, "grad_norm": 0.5446994432441301, "learning_rate": 9.500448698773558e-06, "loss": 0.5455, "step": 3176 }, { "epoch": 0.2851181261358282, "grad_norm": 0.5041521071537826, "learning_rate": 9.503440023930602e-06, "loss": 0.4801, "step": 3177 }, { "epoch": 0.2852078705884992, "grad_norm": 0.48139979875315386, "learning_rate": 9.506431349087647e-06, "loss": 0.4765, "step": 3178 }, { "epoch": 0.2852976150411703, "grad_norm": 0.4736300294265978, "learning_rate": 9.509422674244692e-06, "loss": 0.4033, "step": 3179 }, { "epoch": 0.2853873594938413, "grad_norm": 0.5418492504620599, "learning_rate": 9.512413999401736e-06, "loss": 0.5172, "step": 3180 }, { "epoch": 0.2854771039465123, "grad_norm": 0.5786290877690773, "learning_rate": 9.51540532455878e-06, "loss": 0.5596, "step": 3181 }, { "epoch": 0.28556684839918334, "grad_norm": 0.5669240733491203, "learning_rate": 9.518396649715825e-06, "loss": 0.5318, "step": 3182 }, { "epoch": 0.28565659285185435, "grad_norm": 0.5029963210721651, "learning_rate": 9.52138797487287e-06, "loss": 0.5024, "step": 3183 }, { "epoch": 0.28574633730452537, "grad_norm": 0.5513425971628411, "learning_rate": 9.524379300029914e-06, "loss": 0.5585, "step": 3184 }, { "epoch": 0.2858360817571964, "grad_norm": 0.5066516896207929, "learning_rate": 9.527370625186959e-06, "loss": 0.4977, "step": 3185 }, { "epoch": 0.2859258262098674, "grad_norm": 0.541218635470506, "learning_rate": 9.530361950344003e-06, "loss": 0.4862, "step": 3186 }, { "epoch": 0.2860155706625384, "grad_norm": 0.6069187037269358, "learning_rate": 9.533353275501048e-06, "loss": 0.5782, "step": 3187 }, { "epoch": 0.2861053151152094, "grad_norm": 0.4742656236578572, "learning_rate": 9.536344600658093e-06, "loss": 0.4382, "step": 3188 }, { "epoch": 0.28619505956788044, "grad_norm": 0.5116141611530428, "learning_rate": 9.539335925815137e-06, "loss": 0.5158, "step": 3189 }, { "epoch": 0.28628480402055145, "grad_norm": 0.49581457361677705, "learning_rate": 9.542327250972182e-06, "loss": 0.484, "step": 3190 }, { "epoch": 0.2863745484732225, "grad_norm": 0.5036444113528503, "learning_rate": 9.545318576129226e-06, "loss": 0.5195, "step": 3191 }, { "epoch": 0.28646429292589354, "grad_norm": 0.5450654066780823, "learning_rate": 9.54830990128627e-06, "loss": 0.5931, "step": 3192 }, { "epoch": 0.28655403737856455, "grad_norm": 0.5380775080614101, "learning_rate": 9.551301226443315e-06, "loss": 0.552, "step": 3193 }, { "epoch": 0.28664378183123557, "grad_norm": 0.49947496283226905, "learning_rate": 9.55429255160036e-06, "loss": 0.4653, "step": 3194 }, { "epoch": 0.2867335262839066, "grad_norm": 0.5132920622198782, "learning_rate": 9.557283876757404e-06, "loss": 0.5259, "step": 3195 }, { "epoch": 0.2868232707365776, "grad_norm": 0.5009319431883272, "learning_rate": 9.560275201914449e-06, "loss": 0.4595, "step": 3196 }, { "epoch": 0.2869130151892486, "grad_norm": 0.544413646175313, "learning_rate": 9.563266527071494e-06, "loss": 0.5847, "step": 3197 }, { "epoch": 0.2870027596419196, "grad_norm": 0.506678047777391, "learning_rate": 9.566257852228538e-06, "loss": 0.5525, "step": 3198 }, { "epoch": 0.28709250409459064, "grad_norm": 0.4876624513717633, "learning_rate": 9.569249177385583e-06, "loss": 0.4406, "step": 3199 }, { "epoch": 0.28718224854726165, "grad_norm": 0.4346515069491429, "learning_rate": 9.572240502542627e-06, "loss": 0.4259, "step": 3200 }, { "epoch": 0.28727199299993267, "grad_norm": 0.5097925082740865, "learning_rate": 9.575231827699672e-06, "loss": 0.5349, "step": 3201 }, { "epoch": 0.28736173745260374, "grad_norm": 0.4947792324365721, "learning_rate": 9.578223152856716e-06, "loss": 0.4915, "step": 3202 }, { "epoch": 0.28745148190527475, "grad_norm": 0.46041364153002, "learning_rate": 9.581214478013761e-06, "loss": 0.4253, "step": 3203 }, { "epoch": 0.28754122635794577, "grad_norm": 0.49420914725933573, "learning_rate": 9.584205803170806e-06, "loss": 0.4619, "step": 3204 }, { "epoch": 0.2876309708106168, "grad_norm": 0.520786051425665, "learning_rate": 9.58719712832785e-06, "loss": 0.5049, "step": 3205 }, { "epoch": 0.2877207152632878, "grad_norm": 0.5331047292752773, "learning_rate": 9.590188453484895e-06, "loss": 0.5694, "step": 3206 }, { "epoch": 0.2878104597159588, "grad_norm": 0.4964105884168792, "learning_rate": 9.59317977864194e-06, "loss": 0.5051, "step": 3207 }, { "epoch": 0.2879002041686298, "grad_norm": 0.4985936034850177, "learning_rate": 9.596171103798984e-06, "loss": 0.5146, "step": 3208 }, { "epoch": 0.28798994862130084, "grad_norm": 0.5107217589656609, "learning_rate": 9.599162428956028e-06, "loss": 0.4767, "step": 3209 }, { "epoch": 0.28807969307397185, "grad_norm": 0.5042572078903812, "learning_rate": 9.602153754113073e-06, "loss": 0.5024, "step": 3210 }, { "epoch": 0.28816943752664287, "grad_norm": 0.502755716898763, "learning_rate": 9.605145079270117e-06, "loss": 0.4769, "step": 3211 }, { "epoch": 0.2882591819793139, "grad_norm": 0.4711949147970339, "learning_rate": 9.608136404427162e-06, "loss": 0.4231, "step": 3212 }, { "epoch": 0.2883489264319849, "grad_norm": 0.5111561939523325, "learning_rate": 9.611127729584207e-06, "loss": 0.4809, "step": 3213 }, { "epoch": 0.28843867088465597, "grad_norm": 0.5357782314044014, "learning_rate": 9.614119054741251e-06, "loss": 0.5341, "step": 3214 }, { "epoch": 0.288528415337327, "grad_norm": 0.4995879599006359, "learning_rate": 9.617110379898296e-06, "loss": 0.4995, "step": 3215 }, { "epoch": 0.288618159789998, "grad_norm": 0.4969070347638202, "learning_rate": 9.62010170505534e-06, "loss": 0.5021, "step": 3216 }, { "epoch": 0.288707904242669, "grad_norm": 0.4984333832303357, "learning_rate": 9.623093030212385e-06, "loss": 0.5104, "step": 3217 }, { "epoch": 0.28879764869534, "grad_norm": 0.5256176591601757, "learning_rate": 9.62608435536943e-06, "loss": 0.5223, "step": 3218 }, { "epoch": 0.28888739314801104, "grad_norm": 0.5630062568771439, "learning_rate": 9.629075680526474e-06, "loss": 0.578, "step": 3219 }, { "epoch": 0.28897713760068205, "grad_norm": 0.49103060231934653, "learning_rate": 9.632067005683518e-06, "loss": 0.4759, "step": 3220 }, { "epoch": 0.28906688205335307, "grad_norm": 0.46575047121446606, "learning_rate": 9.635058330840563e-06, "loss": 0.4284, "step": 3221 }, { "epoch": 0.2891566265060241, "grad_norm": 0.4968637846708568, "learning_rate": 9.638049655997608e-06, "loss": 0.4906, "step": 3222 }, { "epoch": 0.2892463709586951, "grad_norm": 0.46704856022921204, "learning_rate": 9.641040981154652e-06, "loss": 0.459, "step": 3223 }, { "epoch": 0.2893361154113661, "grad_norm": 0.5290341107281662, "learning_rate": 9.644032306311697e-06, "loss": 0.4998, "step": 3224 }, { "epoch": 0.2894258598640371, "grad_norm": 0.4618166265635667, "learning_rate": 9.647023631468741e-06, "loss": 0.4129, "step": 3225 }, { "epoch": 0.2895156043167082, "grad_norm": 0.5604541119802192, "learning_rate": 9.650014956625786e-06, "loss": 0.5421, "step": 3226 }, { "epoch": 0.2896053487693792, "grad_norm": 0.5053843469331585, "learning_rate": 9.65300628178283e-06, "loss": 0.4769, "step": 3227 }, { "epoch": 0.2896950932220502, "grad_norm": 0.5637171094441542, "learning_rate": 9.655997606939875e-06, "loss": 0.5462, "step": 3228 }, { "epoch": 0.28978483767472124, "grad_norm": 0.500908512296986, "learning_rate": 9.65898893209692e-06, "loss": 0.5231, "step": 3229 }, { "epoch": 0.28987458212739226, "grad_norm": 0.507951436315637, "learning_rate": 9.661980257253964e-06, "loss": 0.4982, "step": 3230 }, { "epoch": 0.28996432658006327, "grad_norm": 0.5092344588252259, "learning_rate": 9.664971582411009e-06, "loss": 0.4773, "step": 3231 }, { "epoch": 0.2900540710327343, "grad_norm": 0.5085502520020675, "learning_rate": 9.667962907568053e-06, "loss": 0.4779, "step": 3232 }, { "epoch": 0.2901438154854053, "grad_norm": 0.4412281668526709, "learning_rate": 9.670954232725098e-06, "loss": 0.4031, "step": 3233 }, { "epoch": 0.2902335599380763, "grad_norm": 0.547988875176586, "learning_rate": 9.673945557882142e-06, "loss": 0.571, "step": 3234 }, { "epoch": 0.2903233043907473, "grad_norm": 0.48383103770687474, "learning_rate": 9.676936883039187e-06, "loss": 0.4496, "step": 3235 }, { "epoch": 0.29041304884341834, "grad_norm": 0.47731499134410915, "learning_rate": 9.679928208196231e-06, "loss": 0.4841, "step": 3236 }, { "epoch": 0.2905027932960894, "grad_norm": 0.5012666372187206, "learning_rate": 9.682919533353276e-06, "loss": 0.5106, "step": 3237 }, { "epoch": 0.2905925377487604, "grad_norm": 0.5073625198556164, "learning_rate": 9.68591085851032e-06, "loss": 0.4759, "step": 3238 }, { "epoch": 0.29068228220143144, "grad_norm": 0.5009969128784385, "learning_rate": 9.688902183667365e-06, "loss": 0.4602, "step": 3239 }, { "epoch": 0.29077202665410246, "grad_norm": 0.4910020901150771, "learning_rate": 9.69189350882441e-06, "loss": 0.4225, "step": 3240 }, { "epoch": 0.29086177110677347, "grad_norm": 0.4987691026525816, "learning_rate": 9.694884833981454e-06, "loss": 0.4852, "step": 3241 }, { "epoch": 0.2909515155594445, "grad_norm": 0.4938568267110973, "learning_rate": 9.697876159138499e-06, "loss": 0.4529, "step": 3242 }, { "epoch": 0.2910412600121155, "grad_norm": 0.5325774390719057, "learning_rate": 9.700867484295545e-06, "loss": 0.4937, "step": 3243 }, { "epoch": 0.2911310044647865, "grad_norm": 0.5108784731594329, "learning_rate": 9.70385880945259e-06, "loss": 0.5115, "step": 3244 }, { "epoch": 0.29122074891745753, "grad_norm": 0.5154407799587558, "learning_rate": 9.706850134609634e-06, "loss": 0.5515, "step": 3245 }, { "epoch": 0.29131049337012854, "grad_norm": 0.4693205678073429, "learning_rate": 9.709841459766679e-06, "loss": 0.4291, "step": 3246 }, { "epoch": 0.29140023782279956, "grad_norm": 0.5397424708545195, "learning_rate": 9.712832784923723e-06, "loss": 0.5455, "step": 3247 }, { "epoch": 0.29148998227547057, "grad_norm": 0.5116746698770949, "learning_rate": 9.715824110080768e-06, "loss": 0.4731, "step": 3248 }, { "epoch": 0.29157972672814164, "grad_norm": 0.5305875076659033, "learning_rate": 9.718815435237812e-06, "loss": 0.5517, "step": 3249 }, { "epoch": 0.29166947118081266, "grad_norm": 0.5395921413081763, "learning_rate": 9.721806760394857e-06, "loss": 0.5258, "step": 3250 }, { "epoch": 0.29175921563348367, "grad_norm": 0.49937149641617506, "learning_rate": 9.7247980855519e-06, "loss": 0.4558, "step": 3251 }, { "epoch": 0.2918489600861547, "grad_norm": 0.513244393912187, "learning_rate": 9.727789410708944e-06, "loss": 0.5114, "step": 3252 }, { "epoch": 0.2919387045388257, "grad_norm": 0.5490822732152681, "learning_rate": 9.730780735865989e-06, "loss": 0.5664, "step": 3253 }, { "epoch": 0.2920284489914967, "grad_norm": 0.4968409997869055, "learning_rate": 9.733772061023033e-06, "loss": 0.4458, "step": 3254 }, { "epoch": 0.29211819344416773, "grad_norm": 0.5291510396750317, "learning_rate": 9.736763386180078e-06, "loss": 0.543, "step": 3255 }, { "epoch": 0.29220793789683874, "grad_norm": 0.4888073606289114, "learning_rate": 9.739754711337123e-06, "loss": 0.4647, "step": 3256 }, { "epoch": 0.29229768234950976, "grad_norm": 0.519706491532026, "learning_rate": 9.742746036494167e-06, "loss": 0.5059, "step": 3257 }, { "epoch": 0.29238742680218077, "grad_norm": 0.5008024868195311, "learning_rate": 9.745737361651212e-06, "loss": 0.5015, "step": 3258 }, { "epoch": 0.2924771712548518, "grad_norm": 0.4981628568229359, "learning_rate": 9.748728686808256e-06, "loss": 0.5007, "step": 3259 }, { "epoch": 0.2925669157075228, "grad_norm": 0.4999563554932986, "learning_rate": 9.7517200119653e-06, "loss": 0.5142, "step": 3260 }, { "epoch": 0.29265666016019387, "grad_norm": 0.4706691952740776, "learning_rate": 9.754711337122345e-06, "loss": 0.4285, "step": 3261 }, { "epoch": 0.2927464046128649, "grad_norm": 0.44942766298764364, "learning_rate": 9.75770266227939e-06, "loss": 0.4656, "step": 3262 }, { "epoch": 0.2928361490655359, "grad_norm": 0.45577116593135036, "learning_rate": 9.760693987436434e-06, "loss": 0.3941, "step": 3263 }, { "epoch": 0.2929258935182069, "grad_norm": 0.49844104112171, "learning_rate": 9.763685312593479e-06, "loss": 0.5075, "step": 3264 }, { "epoch": 0.29301563797087793, "grad_norm": 0.46879077374368733, "learning_rate": 9.766676637750524e-06, "loss": 0.4614, "step": 3265 }, { "epoch": 0.29310538242354894, "grad_norm": 0.516426069475325, "learning_rate": 9.769667962907568e-06, "loss": 0.521, "step": 3266 }, { "epoch": 0.29319512687621996, "grad_norm": 0.5639650464987679, "learning_rate": 9.772659288064613e-06, "loss": 0.5972, "step": 3267 }, { "epoch": 0.293284871328891, "grad_norm": 0.48831937206658277, "learning_rate": 9.775650613221657e-06, "loss": 0.4898, "step": 3268 }, { "epoch": 0.293374615781562, "grad_norm": 0.47877905554224426, "learning_rate": 9.778641938378702e-06, "loss": 0.4406, "step": 3269 }, { "epoch": 0.293464360234233, "grad_norm": 0.4704127054247446, "learning_rate": 9.781633263535746e-06, "loss": 0.4267, "step": 3270 }, { "epoch": 0.293554104686904, "grad_norm": 0.47158174521676216, "learning_rate": 9.784624588692791e-06, "loss": 0.4452, "step": 3271 }, { "epoch": 0.2936438491395751, "grad_norm": 0.5103129571839341, "learning_rate": 9.787615913849835e-06, "loss": 0.509, "step": 3272 }, { "epoch": 0.2937335935922461, "grad_norm": 0.5249505571885906, "learning_rate": 9.79060723900688e-06, "loss": 0.5845, "step": 3273 }, { "epoch": 0.2938233380449171, "grad_norm": 0.5427055015029532, "learning_rate": 9.793598564163925e-06, "loss": 0.5015, "step": 3274 }, { "epoch": 0.29391308249758813, "grad_norm": 0.49117232539152883, "learning_rate": 9.79658988932097e-06, "loss": 0.4682, "step": 3275 }, { "epoch": 0.29400282695025914, "grad_norm": 0.5070659239071986, "learning_rate": 9.799581214478014e-06, "loss": 0.5047, "step": 3276 }, { "epoch": 0.29409257140293016, "grad_norm": 0.4660420344318957, "learning_rate": 9.802572539635058e-06, "loss": 0.4184, "step": 3277 }, { "epoch": 0.2941823158556012, "grad_norm": 0.4740844671782458, "learning_rate": 9.805563864792103e-06, "loss": 0.5129, "step": 3278 }, { "epoch": 0.2942720603082722, "grad_norm": 0.5565431850762128, "learning_rate": 9.808555189949147e-06, "loss": 0.5123, "step": 3279 }, { "epoch": 0.2943618047609432, "grad_norm": 0.4814566329638592, "learning_rate": 9.811546515106194e-06, "loss": 0.4695, "step": 3280 }, { "epoch": 0.2944515492136142, "grad_norm": 0.49879525138404324, "learning_rate": 9.814537840263238e-06, "loss": 0.5105, "step": 3281 }, { "epoch": 0.29454129366628523, "grad_norm": 0.5098938694804399, "learning_rate": 9.817529165420283e-06, "loss": 0.557, "step": 3282 }, { "epoch": 0.29463103811895625, "grad_norm": 0.5258647489553143, "learning_rate": 9.820520490577327e-06, "loss": 0.5405, "step": 3283 }, { "epoch": 0.2947207825716273, "grad_norm": 0.5164217482662077, "learning_rate": 9.823511815734372e-06, "loss": 0.4795, "step": 3284 }, { "epoch": 0.29481052702429833, "grad_norm": 0.4889755674011454, "learning_rate": 9.826503140891416e-06, "loss": 0.4857, "step": 3285 }, { "epoch": 0.29490027147696934, "grad_norm": 0.5042884917651306, "learning_rate": 9.829494466048461e-06, "loss": 0.4762, "step": 3286 }, { "epoch": 0.29499001592964036, "grad_norm": 0.5150988694108596, "learning_rate": 9.832485791205506e-06, "loss": 0.5225, "step": 3287 }, { "epoch": 0.2950797603823114, "grad_norm": 0.4831047983143882, "learning_rate": 9.83547711636255e-06, "loss": 0.4626, "step": 3288 }, { "epoch": 0.2951695048349824, "grad_norm": 0.4814193774476015, "learning_rate": 9.838468441519595e-06, "loss": 0.48, "step": 3289 }, { "epoch": 0.2952592492876534, "grad_norm": 0.4953527784992184, "learning_rate": 9.84145976667664e-06, "loss": 0.4783, "step": 3290 }, { "epoch": 0.2953489937403244, "grad_norm": 0.45816028523702845, "learning_rate": 9.844451091833684e-06, "loss": 0.4433, "step": 3291 }, { "epoch": 0.29543873819299543, "grad_norm": 0.4748882328426796, "learning_rate": 9.847442416990728e-06, "loss": 0.4469, "step": 3292 }, { "epoch": 0.29552848264566645, "grad_norm": 0.5254594955067163, "learning_rate": 9.850433742147773e-06, "loss": 0.5216, "step": 3293 }, { "epoch": 0.29561822709833746, "grad_norm": 0.4580167256500118, "learning_rate": 9.853425067304817e-06, "loss": 0.4097, "step": 3294 }, { "epoch": 0.29570797155100853, "grad_norm": 0.5021164459334307, "learning_rate": 9.856416392461862e-06, "loss": 0.5133, "step": 3295 }, { "epoch": 0.29579771600367954, "grad_norm": 0.48502079900510264, "learning_rate": 9.859407717618907e-06, "loss": 0.4848, "step": 3296 }, { "epoch": 0.29588746045635056, "grad_norm": 0.43535537826919996, "learning_rate": 9.862399042775951e-06, "loss": 0.4188, "step": 3297 }, { "epoch": 0.2959772049090216, "grad_norm": 0.4965225689108689, "learning_rate": 9.865390367932996e-06, "loss": 0.4826, "step": 3298 }, { "epoch": 0.2960669493616926, "grad_norm": 0.49734515334615864, "learning_rate": 9.86838169309004e-06, "loss": 0.4499, "step": 3299 }, { "epoch": 0.2961566938143636, "grad_norm": 0.5559548063699499, "learning_rate": 9.871373018247085e-06, "loss": 0.5841, "step": 3300 }, { "epoch": 0.2962464382670346, "grad_norm": 0.48214655998996336, "learning_rate": 9.87436434340413e-06, "loss": 0.4242, "step": 3301 }, { "epoch": 0.29633618271970563, "grad_norm": 0.5450707633015103, "learning_rate": 9.877355668561174e-06, "loss": 0.5433, "step": 3302 }, { "epoch": 0.29642592717237665, "grad_norm": 0.5349646584183283, "learning_rate": 9.880346993718218e-06, "loss": 0.5328, "step": 3303 }, { "epoch": 0.29651567162504766, "grad_norm": 0.4699230598936684, "learning_rate": 9.883338318875263e-06, "loss": 0.463, "step": 3304 }, { "epoch": 0.2966054160777187, "grad_norm": 0.56233665454568, "learning_rate": 9.886329644032308e-06, "loss": 0.5373, "step": 3305 }, { "epoch": 0.2966951605303897, "grad_norm": 0.5143651318436743, "learning_rate": 9.889320969189352e-06, "loss": 0.501, "step": 3306 }, { "epoch": 0.29678490498306076, "grad_norm": 0.5331596687787341, "learning_rate": 9.892312294346397e-06, "loss": 0.5026, "step": 3307 }, { "epoch": 0.2968746494357318, "grad_norm": 0.4543612038678747, "learning_rate": 9.895303619503441e-06, "loss": 0.4692, "step": 3308 }, { "epoch": 0.2969643938884028, "grad_norm": 0.43598125495041895, "learning_rate": 9.898294944660486e-06, "loss": 0.4028, "step": 3309 }, { "epoch": 0.2970541383410738, "grad_norm": 0.5267890415378671, "learning_rate": 9.90128626981753e-06, "loss": 0.5334, "step": 3310 }, { "epoch": 0.2971438827937448, "grad_norm": 0.5066946702951358, "learning_rate": 9.904277594974575e-06, "loss": 0.5229, "step": 3311 }, { "epoch": 0.29723362724641583, "grad_norm": 0.5220078991306332, "learning_rate": 9.90726892013162e-06, "loss": 0.5335, "step": 3312 }, { "epoch": 0.29732337169908685, "grad_norm": 0.4815292293903043, "learning_rate": 9.910260245288662e-06, "loss": 0.4918, "step": 3313 }, { "epoch": 0.29741311615175786, "grad_norm": 0.5101118475441397, "learning_rate": 9.913251570445707e-06, "loss": 0.5529, "step": 3314 }, { "epoch": 0.2975028606044289, "grad_norm": 0.5495133797594294, "learning_rate": 9.916242895602751e-06, "loss": 0.5146, "step": 3315 }, { "epoch": 0.2975926050570999, "grad_norm": 0.5126868327638183, "learning_rate": 9.919234220759798e-06, "loss": 0.5167, "step": 3316 }, { "epoch": 0.2976823495097709, "grad_norm": 0.5491055351103666, "learning_rate": 9.922225545916842e-06, "loss": 0.5568, "step": 3317 }, { "epoch": 0.2977720939624419, "grad_norm": 0.5088269426953972, "learning_rate": 9.925216871073887e-06, "loss": 0.5395, "step": 3318 }, { "epoch": 0.297861838415113, "grad_norm": 0.47934875481279987, "learning_rate": 9.928208196230931e-06, "loss": 0.4479, "step": 3319 }, { "epoch": 0.297951582867784, "grad_norm": 0.49842921606144874, "learning_rate": 9.931199521387976e-06, "loss": 0.4897, "step": 3320 }, { "epoch": 0.298041327320455, "grad_norm": 0.480683464128846, "learning_rate": 9.93419084654502e-06, "loss": 0.4943, "step": 3321 }, { "epoch": 0.29813107177312603, "grad_norm": 0.5164206186271755, "learning_rate": 9.937182171702065e-06, "loss": 0.4774, "step": 3322 }, { "epoch": 0.29822081622579705, "grad_norm": 0.500837454979635, "learning_rate": 9.94017349685911e-06, "loss": 0.4633, "step": 3323 }, { "epoch": 0.29831056067846806, "grad_norm": 0.5183383559578177, "learning_rate": 9.943164822016154e-06, "loss": 0.5133, "step": 3324 }, { "epoch": 0.2984003051311391, "grad_norm": 0.5078471468272936, "learning_rate": 9.946156147173199e-06, "loss": 0.4816, "step": 3325 }, { "epoch": 0.2984900495838101, "grad_norm": 0.5160155005798993, "learning_rate": 9.949147472330243e-06, "loss": 0.486, "step": 3326 }, { "epoch": 0.2985797940364811, "grad_norm": 0.5187562543649653, "learning_rate": 9.952138797487288e-06, "loss": 0.5018, "step": 3327 }, { "epoch": 0.2986695384891521, "grad_norm": 0.5429180581591675, "learning_rate": 9.955130122644332e-06, "loss": 0.4826, "step": 3328 }, { "epoch": 0.29875928294182313, "grad_norm": 0.5133317072683443, "learning_rate": 9.958121447801377e-06, "loss": 0.4641, "step": 3329 }, { "epoch": 0.2988490273944942, "grad_norm": 0.5454653225950603, "learning_rate": 9.961112772958422e-06, "loss": 0.5476, "step": 3330 }, { "epoch": 0.2989387718471652, "grad_norm": 0.5454839063470766, "learning_rate": 9.964104098115466e-06, "loss": 0.54, "step": 3331 }, { "epoch": 0.29902851629983623, "grad_norm": 0.4766493938683443, "learning_rate": 9.96709542327251e-06, "loss": 0.4967, "step": 3332 }, { "epoch": 0.29911826075250725, "grad_norm": 0.5226433856959319, "learning_rate": 9.970086748429555e-06, "loss": 0.5589, "step": 3333 }, { "epoch": 0.29920800520517826, "grad_norm": 0.5399157862547589, "learning_rate": 9.9730780735866e-06, "loss": 0.5055, "step": 3334 }, { "epoch": 0.2992977496578493, "grad_norm": 0.4474009181971925, "learning_rate": 9.976069398743644e-06, "loss": 0.4745, "step": 3335 }, { "epoch": 0.2993874941105203, "grad_norm": 0.49949518254893777, "learning_rate": 9.979060723900689e-06, "loss": 0.5341, "step": 3336 }, { "epoch": 0.2994772385631913, "grad_norm": 0.4847431434497283, "learning_rate": 9.982052049057733e-06, "loss": 0.5085, "step": 3337 }, { "epoch": 0.2995669830158623, "grad_norm": 0.4993353363518004, "learning_rate": 9.985043374214778e-06, "loss": 0.4406, "step": 3338 }, { "epoch": 0.29965672746853333, "grad_norm": 0.4829468906147105, "learning_rate": 9.988034699371823e-06, "loss": 0.4772, "step": 3339 }, { "epoch": 0.29974647192120435, "grad_norm": 0.5059749135827805, "learning_rate": 9.991026024528867e-06, "loss": 0.5251, "step": 3340 }, { "epoch": 0.29983621637387536, "grad_norm": 0.4968358953129729, "learning_rate": 9.994017349685912e-06, "loss": 0.5245, "step": 3341 }, { "epoch": 0.29992596082654643, "grad_norm": 0.46663353260929985, "learning_rate": 9.997008674842956e-06, "loss": 0.4719, "step": 3342 }, { "epoch": 0.30001570527921745, "grad_norm": 0.48414794228060226, "learning_rate": 1e-05, "loss": 0.4963, "step": 3343 }, { "epoch": 0.30010544973188846, "grad_norm": 0.4990118963225736, "learning_rate": 9.999999972735506e-06, "loss": 0.4602, "step": 3344 }, { "epoch": 0.3001951941845595, "grad_norm": 0.5238801561239894, "learning_rate": 9.999999890942019e-06, "loss": 0.5297, "step": 3345 }, { "epoch": 0.3002849386372305, "grad_norm": 0.49411004012427995, "learning_rate": 9.999999754619543e-06, "loss": 0.5249, "step": 3346 }, { "epoch": 0.3003746830899015, "grad_norm": 0.5007141055859716, "learning_rate": 9.999999563768079e-06, "loss": 0.4996, "step": 3347 }, { "epoch": 0.3004644275425725, "grad_norm": 0.4789719194902872, "learning_rate": 9.999999318387628e-06, "loss": 0.4867, "step": 3348 }, { "epoch": 0.30055417199524354, "grad_norm": 0.47759141202476824, "learning_rate": 9.999999018478193e-06, "loss": 0.4526, "step": 3349 }, { "epoch": 0.30064391644791455, "grad_norm": 0.49712066052403187, "learning_rate": 9.999998664039779e-06, "loss": 0.5113, "step": 3350 }, { "epoch": 0.30073366090058556, "grad_norm": 0.5023214872033277, "learning_rate": 9.999998255072387e-06, "loss": 0.4944, "step": 3351 }, { "epoch": 0.3008234053532566, "grad_norm": 0.53889178317055, "learning_rate": 9.999997791576024e-06, "loss": 0.5094, "step": 3352 }, { "epoch": 0.30091314980592765, "grad_norm": 0.5088607150967613, "learning_rate": 9.999997273550694e-06, "loss": 0.5018, "step": 3353 }, { "epoch": 0.30100289425859866, "grad_norm": 0.6015959061294962, "learning_rate": 9.999996700996403e-06, "loss": 0.5987, "step": 3354 }, { "epoch": 0.3010926387112697, "grad_norm": 0.45211616357685175, "learning_rate": 9.999996073913157e-06, "loss": 0.4199, "step": 3355 }, { "epoch": 0.3011823831639407, "grad_norm": 0.4830510719555555, "learning_rate": 9.99999539230096e-06, "loss": 0.5019, "step": 3356 }, { "epoch": 0.3012721276166117, "grad_norm": 0.5398644658418298, "learning_rate": 9.999994656159827e-06, "loss": 0.5437, "step": 3357 }, { "epoch": 0.3013618720692827, "grad_norm": 0.45964190433231183, "learning_rate": 9.999993865489758e-06, "loss": 0.479, "step": 3358 }, { "epoch": 0.30145161652195374, "grad_norm": 0.4636959308679054, "learning_rate": 9.999993020290766e-06, "loss": 0.4561, "step": 3359 }, { "epoch": 0.30154136097462475, "grad_norm": 0.5047107084548335, "learning_rate": 9.999992120562859e-06, "loss": 0.5007, "step": 3360 }, { "epoch": 0.30163110542729576, "grad_norm": 0.4823950902911089, "learning_rate": 9.999991166306045e-06, "loss": 0.5152, "step": 3361 }, { "epoch": 0.3017208498799668, "grad_norm": 0.5136080128550391, "learning_rate": 9.999990157520339e-06, "loss": 0.5213, "step": 3362 }, { "epoch": 0.3018105943326378, "grad_norm": 0.4971435540111839, "learning_rate": 9.999989094205748e-06, "loss": 0.481, "step": 3363 }, { "epoch": 0.3019003387853088, "grad_norm": 0.4606845557728455, "learning_rate": 9.999987976362285e-06, "loss": 0.4433, "step": 3364 }, { "epoch": 0.3019900832379799, "grad_norm": 0.5294489220986076, "learning_rate": 9.999986803989961e-06, "loss": 0.5489, "step": 3365 }, { "epoch": 0.3020798276906509, "grad_norm": 0.5207433943856885, "learning_rate": 9.999985577088792e-06, "loss": 0.53, "step": 3366 }, { "epoch": 0.3021695721433219, "grad_norm": 0.4687025486495824, "learning_rate": 9.999984295658789e-06, "loss": 0.4834, "step": 3367 }, { "epoch": 0.3022593165959929, "grad_norm": 0.5176470776398652, "learning_rate": 9.999982959699966e-06, "loss": 0.5547, "step": 3368 }, { "epoch": 0.30234906104866394, "grad_norm": 0.5609152938207536, "learning_rate": 9.999981569212336e-06, "loss": 0.5806, "step": 3369 }, { "epoch": 0.30243880550133495, "grad_norm": 0.5009644795519879, "learning_rate": 9.999980124195917e-06, "loss": 0.5184, "step": 3370 }, { "epoch": 0.30252854995400597, "grad_norm": 0.4751341818871623, "learning_rate": 9.999978624650724e-06, "loss": 0.5124, "step": 3371 }, { "epoch": 0.302618294406677, "grad_norm": 0.4576330667443962, "learning_rate": 9.999977070576774e-06, "loss": 0.4708, "step": 3372 }, { "epoch": 0.302708038859348, "grad_norm": 0.523243930755859, "learning_rate": 9.999975461974081e-06, "loss": 0.5134, "step": 3373 }, { "epoch": 0.302797783312019, "grad_norm": 0.4974416380434895, "learning_rate": 9.999973798842666e-06, "loss": 0.5157, "step": 3374 }, { "epoch": 0.30288752776469, "grad_norm": 0.530754488601261, "learning_rate": 9.999972081182546e-06, "loss": 0.5143, "step": 3375 }, { "epoch": 0.30297727221736104, "grad_norm": 0.5226723615614004, "learning_rate": 9.999970308993738e-06, "loss": 0.4959, "step": 3376 }, { "epoch": 0.3030670166700321, "grad_norm": 0.5002148871395121, "learning_rate": 9.999968482276264e-06, "loss": 0.5485, "step": 3377 }, { "epoch": 0.3031567611227031, "grad_norm": 0.5066304828731001, "learning_rate": 9.999966601030142e-06, "loss": 0.5127, "step": 3378 }, { "epoch": 0.30324650557537414, "grad_norm": 0.527392535846917, "learning_rate": 9.999964665255393e-06, "loss": 0.5776, "step": 3379 }, { "epoch": 0.30333625002804515, "grad_norm": 0.4942621464087077, "learning_rate": 9.99996267495204e-06, "loss": 0.5094, "step": 3380 }, { "epoch": 0.30342599448071617, "grad_norm": 0.49319259751010264, "learning_rate": 9.999960630120101e-06, "loss": 0.4841, "step": 3381 }, { "epoch": 0.3035157389333872, "grad_norm": 0.5370302011019141, "learning_rate": 9.9999585307596e-06, "loss": 0.5158, "step": 3382 }, { "epoch": 0.3036054833860582, "grad_norm": 0.5125791213410282, "learning_rate": 9.999956376870563e-06, "loss": 0.5172, "step": 3383 }, { "epoch": 0.3036952278387292, "grad_norm": 0.4786208067509046, "learning_rate": 9.999954168453008e-06, "loss": 0.4476, "step": 3384 }, { "epoch": 0.3037849722914002, "grad_norm": 0.5252679153444954, "learning_rate": 9.999951905506964e-06, "loss": 0.4748, "step": 3385 }, { "epoch": 0.30387471674407124, "grad_norm": 0.5289092251481704, "learning_rate": 9.999949588032452e-06, "loss": 0.5372, "step": 3386 }, { "epoch": 0.30396446119674225, "grad_norm": 0.47746684633422426, "learning_rate": 9.9999472160295e-06, "loss": 0.4605, "step": 3387 }, { "epoch": 0.3040542056494133, "grad_norm": 0.5052011449230418, "learning_rate": 9.99994478949813e-06, "loss": 0.5535, "step": 3388 }, { "epoch": 0.30414395010208434, "grad_norm": 0.5333735311984867, "learning_rate": 9.999942308438373e-06, "loss": 0.5099, "step": 3389 }, { "epoch": 0.30423369455475535, "grad_norm": 0.4771920315796274, "learning_rate": 9.999939772850255e-06, "loss": 0.4529, "step": 3390 }, { "epoch": 0.30432343900742637, "grad_norm": 0.4700830472934711, "learning_rate": 9.999937182733801e-06, "loss": 0.4274, "step": 3391 }, { "epoch": 0.3044131834600974, "grad_norm": 0.49516757181513615, "learning_rate": 9.999934538089042e-06, "loss": 0.5119, "step": 3392 }, { "epoch": 0.3045029279127684, "grad_norm": 0.4581617092546256, "learning_rate": 9.999931838916005e-06, "loss": 0.4728, "step": 3393 }, { "epoch": 0.3045926723654394, "grad_norm": 0.5112425726814883, "learning_rate": 9.999929085214721e-06, "loss": 0.4875, "step": 3394 }, { "epoch": 0.3046824168181104, "grad_norm": 0.5029579698025045, "learning_rate": 9.999926276985219e-06, "loss": 0.4868, "step": 3395 }, { "epoch": 0.30477216127078144, "grad_norm": 0.5210622592121883, "learning_rate": 9.999923414227531e-06, "loss": 0.5184, "step": 3396 }, { "epoch": 0.30486190572345245, "grad_norm": 0.4962747620533544, "learning_rate": 9.999920496941686e-06, "loss": 0.5419, "step": 3397 }, { "epoch": 0.30495165017612347, "grad_norm": 0.5603515608272986, "learning_rate": 9.999917525127719e-06, "loss": 0.5498, "step": 3398 }, { "epoch": 0.3050413946287945, "grad_norm": 0.5668050639502525, "learning_rate": 9.999914498785657e-06, "loss": 0.5737, "step": 3399 }, { "epoch": 0.30513113908146555, "grad_norm": 0.4502074301336371, "learning_rate": 9.999911417915538e-06, "loss": 0.4171, "step": 3400 }, { "epoch": 0.30522088353413657, "grad_norm": 0.5002453519234996, "learning_rate": 9.999908282517395e-06, "loss": 0.4843, "step": 3401 }, { "epoch": 0.3053106279868076, "grad_norm": 0.4886621885816807, "learning_rate": 9.99990509259126e-06, "loss": 0.4952, "step": 3402 }, { "epoch": 0.3054003724394786, "grad_norm": 0.4762606686034961, "learning_rate": 9.99990184813717e-06, "loss": 0.4629, "step": 3403 }, { "epoch": 0.3054901168921496, "grad_norm": 0.5271530963009665, "learning_rate": 9.999898549155158e-06, "loss": 0.5055, "step": 3404 }, { "epoch": 0.3055798613448206, "grad_norm": 0.5366176892965122, "learning_rate": 9.999895195645263e-06, "loss": 0.5682, "step": 3405 }, { "epoch": 0.30566960579749164, "grad_norm": 0.4812999830860471, "learning_rate": 9.999891787607519e-06, "loss": 0.4569, "step": 3406 }, { "epoch": 0.30575935025016265, "grad_norm": 0.5171021581951464, "learning_rate": 9.999888325041964e-06, "loss": 0.5595, "step": 3407 }, { "epoch": 0.30584909470283367, "grad_norm": 0.5862931215498466, "learning_rate": 9.999884807948636e-06, "loss": 0.5967, "step": 3408 }, { "epoch": 0.3059388391555047, "grad_norm": 0.5038482910114404, "learning_rate": 9.999881236327573e-06, "loss": 0.494, "step": 3409 }, { "epoch": 0.3060285836081757, "grad_norm": 0.49339838870887703, "learning_rate": 9.999877610178814e-06, "loss": 0.4875, "step": 3410 }, { "epoch": 0.3061183280608467, "grad_norm": 0.48697826405849, "learning_rate": 9.9998739295024e-06, "loss": 0.5035, "step": 3411 }, { "epoch": 0.3062080725135178, "grad_norm": 0.5252648595693472, "learning_rate": 9.999870194298369e-06, "loss": 0.5026, "step": 3412 }, { "epoch": 0.3062978169661888, "grad_norm": 0.5454814245299339, "learning_rate": 9.999866404566762e-06, "loss": 0.5107, "step": 3413 }, { "epoch": 0.3063875614188598, "grad_norm": 0.4933190097000187, "learning_rate": 9.999862560307622e-06, "loss": 0.487, "step": 3414 }, { "epoch": 0.3064773058715308, "grad_norm": 0.5449115088284272, "learning_rate": 9.999858661520991e-06, "loss": 0.4967, "step": 3415 }, { "epoch": 0.30656705032420184, "grad_norm": 0.49043222396766156, "learning_rate": 9.999854708206908e-06, "loss": 0.4382, "step": 3416 }, { "epoch": 0.30665679477687285, "grad_norm": 0.4994671731860747, "learning_rate": 9.999850700365419e-06, "loss": 0.5061, "step": 3417 }, { "epoch": 0.30674653922954387, "grad_norm": 0.5106504239985145, "learning_rate": 9.999846637996567e-06, "loss": 0.4463, "step": 3418 }, { "epoch": 0.3068362836822149, "grad_norm": 0.5011269246443579, "learning_rate": 9.999842521100399e-06, "loss": 0.4831, "step": 3419 }, { "epoch": 0.3069260281348859, "grad_norm": 0.5112657929759835, "learning_rate": 9.999838349676953e-06, "loss": 0.4787, "step": 3420 }, { "epoch": 0.3070157725875569, "grad_norm": 0.5373210679036574, "learning_rate": 9.999834123726282e-06, "loss": 0.5539, "step": 3421 }, { "epoch": 0.3071055170402279, "grad_norm": 0.4790048617619451, "learning_rate": 9.99982984324843e-06, "loss": 0.4504, "step": 3422 }, { "epoch": 0.307195261492899, "grad_norm": 0.4743691959494049, "learning_rate": 9.99982550824344e-06, "loss": 0.4699, "step": 3423 }, { "epoch": 0.30728500594557, "grad_norm": 0.48006702490249875, "learning_rate": 9.999821118711362e-06, "loss": 0.4886, "step": 3424 }, { "epoch": 0.307374750398241, "grad_norm": 0.5259825470308519, "learning_rate": 9.999816674652245e-06, "loss": 0.5422, "step": 3425 }, { "epoch": 0.30746449485091204, "grad_norm": 0.512515451281979, "learning_rate": 9.999812176066135e-06, "loss": 0.5099, "step": 3426 }, { "epoch": 0.30755423930358305, "grad_norm": 0.4560210956271942, "learning_rate": 9.999807622953084e-06, "loss": 0.4117, "step": 3427 }, { "epoch": 0.30764398375625407, "grad_norm": 0.5413224826479703, "learning_rate": 9.99980301531314e-06, "loss": 0.5242, "step": 3428 }, { "epoch": 0.3077337282089251, "grad_norm": 0.5593815283290783, "learning_rate": 9.999798353146354e-06, "loss": 0.5645, "step": 3429 }, { "epoch": 0.3078234726615961, "grad_norm": 0.561433843284349, "learning_rate": 9.999793636452772e-06, "loss": 0.556, "step": 3430 }, { "epoch": 0.3079132171142671, "grad_norm": 0.5372698902611451, "learning_rate": 9.999788865232452e-06, "loss": 0.5441, "step": 3431 }, { "epoch": 0.3080029615669381, "grad_norm": 0.48425761734751205, "learning_rate": 9.999784039485445e-06, "loss": 0.5152, "step": 3432 }, { "epoch": 0.30809270601960914, "grad_norm": 0.5561816642835277, "learning_rate": 9.9997791592118e-06, "loss": 0.5679, "step": 3433 }, { "epoch": 0.30818245047228016, "grad_norm": 0.48945006249578366, "learning_rate": 9.999774224411573e-06, "loss": 0.4681, "step": 3434 }, { "epoch": 0.3082721949249512, "grad_norm": 0.48340868295747164, "learning_rate": 9.999769235084817e-06, "loss": 0.4483, "step": 3435 }, { "epoch": 0.30836193937762224, "grad_norm": 0.48088507446537604, "learning_rate": 9.999764191231588e-06, "loss": 0.5067, "step": 3436 }, { "epoch": 0.30845168383029326, "grad_norm": 0.5652934759356427, "learning_rate": 9.99975909285194e-06, "loss": 0.5094, "step": 3437 }, { "epoch": 0.30854142828296427, "grad_norm": 0.45415092813817576, "learning_rate": 9.999753939945926e-06, "loss": 0.4034, "step": 3438 }, { "epoch": 0.3086311727356353, "grad_norm": 0.5232715570455825, "learning_rate": 9.999748732513606e-06, "loss": 0.545, "step": 3439 }, { "epoch": 0.3087209171883063, "grad_norm": 0.5059948667954428, "learning_rate": 9.999743470555035e-06, "loss": 0.5193, "step": 3440 }, { "epoch": 0.3088106616409773, "grad_norm": 0.50854318657302, "learning_rate": 9.99973815407027e-06, "loss": 0.4998, "step": 3441 }, { "epoch": 0.3089004060936483, "grad_norm": 0.5426394338656334, "learning_rate": 9.999732783059369e-06, "loss": 0.5782, "step": 3442 }, { "epoch": 0.30899015054631934, "grad_norm": 0.4980861480193479, "learning_rate": 9.999727357522393e-06, "loss": 0.4737, "step": 3443 }, { "epoch": 0.30907989499899036, "grad_norm": 0.49619885086897436, "learning_rate": 9.999721877459397e-06, "loss": 0.5172, "step": 3444 }, { "epoch": 0.30916963945166137, "grad_norm": 0.5280902690588406, "learning_rate": 9.999716342870445e-06, "loss": 0.5948, "step": 3445 }, { "epoch": 0.30925938390433244, "grad_norm": 0.5092195905101644, "learning_rate": 9.999710753755595e-06, "loss": 0.506, "step": 3446 }, { "epoch": 0.30934912835700346, "grad_norm": 0.5453061538587403, "learning_rate": 9.999705110114907e-06, "loss": 0.489, "step": 3447 }, { "epoch": 0.30943887280967447, "grad_norm": 0.4965196033888928, "learning_rate": 9.999699411948447e-06, "loss": 0.5783, "step": 3448 }, { "epoch": 0.3095286172623455, "grad_norm": 0.49164954706345193, "learning_rate": 9.999693659256272e-06, "loss": 0.4741, "step": 3449 }, { "epoch": 0.3096183617150165, "grad_norm": 0.47550678941138197, "learning_rate": 9.999687852038447e-06, "loss": 0.5033, "step": 3450 }, { "epoch": 0.3097081061676875, "grad_norm": 0.5370035865708265, "learning_rate": 9.999681990295036e-06, "loss": 0.46, "step": 3451 }, { "epoch": 0.30979785062035853, "grad_norm": 0.49241075203813106, "learning_rate": 9.999676074026102e-06, "loss": 0.442, "step": 3452 }, { "epoch": 0.30988759507302954, "grad_norm": 0.5287346287992677, "learning_rate": 9.999670103231708e-06, "loss": 0.5228, "step": 3453 }, { "epoch": 0.30997733952570056, "grad_norm": 0.47291195143192005, "learning_rate": 9.999664077911922e-06, "loss": 0.4715, "step": 3454 }, { "epoch": 0.31006708397837157, "grad_norm": 0.513089926951184, "learning_rate": 9.99965799806681e-06, "loss": 0.4595, "step": 3455 }, { "epoch": 0.3101568284310426, "grad_norm": 0.5073952840587566, "learning_rate": 9.999651863696434e-06, "loss": 0.516, "step": 3456 }, { "epoch": 0.3102465728837136, "grad_norm": 0.4582633227897026, "learning_rate": 9.999645674800866e-06, "loss": 0.4577, "step": 3457 }, { "epoch": 0.31033631733638467, "grad_norm": 0.5307268967571082, "learning_rate": 9.99963943138017e-06, "loss": 0.5693, "step": 3458 }, { "epoch": 0.3104260617890557, "grad_norm": 0.4794509657298615, "learning_rate": 9.999633133434415e-06, "loss": 0.488, "step": 3459 }, { "epoch": 0.3105158062417267, "grad_norm": 0.5302729123823243, "learning_rate": 9.999626780963669e-06, "loss": 0.5375, "step": 3460 }, { "epoch": 0.3106055506943977, "grad_norm": 0.5283515161694207, "learning_rate": 9.999620373968004e-06, "loss": 0.5225, "step": 3461 }, { "epoch": 0.31069529514706873, "grad_norm": 0.5026829077693686, "learning_rate": 9.999613912447488e-06, "loss": 0.5125, "step": 3462 }, { "epoch": 0.31078503959973974, "grad_norm": 0.5207967668731872, "learning_rate": 9.99960739640219e-06, "loss": 0.4983, "step": 3463 }, { "epoch": 0.31087478405241076, "grad_norm": 0.5417197656207601, "learning_rate": 9.999600825832185e-06, "loss": 0.5466, "step": 3464 }, { "epoch": 0.31096452850508177, "grad_norm": 0.5057698182471856, "learning_rate": 9.99959420073754e-06, "loss": 0.5265, "step": 3465 }, { "epoch": 0.3110542729577528, "grad_norm": 0.5399189856980979, "learning_rate": 9.999587521118331e-06, "loss": 0.5441, "step": 3466 }, { "epoch": 0.3111440174104238, "grad_norm": 0.4638183800165656, "learning_rate": 9.99958078697463e-06, "loss": 0.4701, "step": 3467 }, { "epoch": 0.3112337618630948, "grad_norm": 0.4899642034720701, "learning_rate": 9.999573998306507e-06, "loss": 0.4864, "step": 3468 }, { "epoch": 0.31132350631576583, "grad_norm": 0.471889337981424, "learning_rate": 9.999567155114041e-06, "loss": 0.4697, "step": 3469 }, { "epoch": 0.3114132507684369, "grad_norm": 0.47953639260858616, "learning_rate": 9.999560257397304e-06, "loss": 0.4845, "step": 3470 }, { "epoch": 0.3115029952211079, "grad_norm": 0.4962057102795659, "learning_rate": 9.999553305156372e-06, "loss": 0.4443, "step": 3471 }, { "epoch": 0.31159273967377893, "grad_norm": 0.4680215382533433, "learning_rate": 9.99954629839132e-06, "loss": 0.4428, "step": 3472 }, { "epoch": 0.31168248412644994, "grad_norm": 0.4927359733872266, "learning_rate": 9.999539237102225e-06, "loss": 0.4536, "step": 3473 }, { "epoch": 0.31177222857912096, "grad_norm": 0.4809056295492708, "learning_rate": 9.999532121289166e-06, "loss": 0.4831, "step": 3474 }, { "epoch": 0.311861973031792, "grad_norm": 0.5266333931766255, "learning_rate": 9.999524950952215e-06, "loss": 0.544, "step": 3475 }, { "epoch": 0.311951717484463, "grad_norm": 0.5348530137688148, "learning_rate": 9.999517726091457e-06, "loss": 0.532, "step": 3476 }, { "epoch": 0.312041461937134, "grad_norm": 0.5474035909356965, "learning_rate": 9.999510446706966e-06, "loss": 0.5524, "step": 3477 }, { "epoch": 0.312131206389805, "grad_norm": 0.4769399442435277, "learning_rate": 9.999503112798822e-06, "loss": 0.4481, "step": 3478 }, { "epoch": 0.31222095084247603, "grad_norm": 0.515121393478533, "learning_rate": 9.999495724367106e-06, "loss": 0.5233, "step": 3479 }, { "epoch": 0.31231069529514704, "grad_norm": 0.49416668588154156, "learning_rate": 9.9994882814119e-06, "loss": 0.4631, "step": 3480 }, { "epoch": 0.3124004397478181, "grad_norm": 0.5517517708926186, "learning_rate": 9.999480783933284e-06, "loss": 0.5506, "step": 3481 }, { "epoch": 0.31249018420048913, "grad_norm": 0.5506838177883244, "learning_rate": 9.999473231931339e-06, "loss": 0.5183, "step": 3482 }, { "epoch": 0.31257992865316014, "grad_norm": 0.5164830419393327, "learning_rate": 9.999465625406145e-06, "loss": 0.4606, "step": 3483 }, { "epoch": 0.31266967310583116, "grad_norm": 0.49096838057323233, "learning_rate": 9.99945796435779e-06, "loss": 0.4999, "step": 3484 }, { "epoch": 0.3127594175585022, "grad_norm": 0.47430834444636116, "learning_rate": 9.999450248786355e-06, "loss": 0.4212, "step": 3485 }, { "epoch": 0.3128491620111732, "grad_norm": 0.5210802894981681, "learning_rate": 9.999442478691925e-06, "loss": 0.5242, "step": 3486 }, { "epoch": 0.3129389064638442, "grad_norm": 0.47136446865407955, "learning_rate": 9.999434654074584e-06, "loss": 0.4321, "step": 3487 }, { "epoch": 0.3130286509165152, "grad_norm": 0.47735861421289716, "learning_rate": 9.999426774934418e-06, "loss": 0.5013, "step": 3488 }, { "epoch": 0.31311839536918623, "grad_norm": 0.4791520832794593, "learning_rate": 9.999418841271511e-06, "loss": 0.4612, "step": 3489 }, { "epoch": 0.31320813982185725, "grad_norm": 0.5029719221128112, "learning_rate": 9.999410853085952e-06, "loss": 0.4457, "step": 3490 }, { "epoch": 0.31329788427452826, "grad_norm": 0.5297301709215337, "learning_rate": 9.999402810377827e-06, "loss": 0.5555, "step": 3491 }, { "epoch": 0.3133876287271993, "grad_norm": 0.5403443887134393, "learning_rate": 9.999394713147223e-06, "loss": 0.5641, "step": 3492 }, { "epoch": 0.31347737317987034, "grad_norm": 0.5087249389915268, "learning_rate": 9.999386561394231e-06, "loss": 0.4668, "step": 3493 }, { "epoch": 0.31356711763254136, "grad_norm": 0.5934303548456711, "learning_rate": 9.999378355118937e-06, "loss": 0.6465, "step": 3494 }, { "epoch": 0.3136568620852124, "grad_norm": 0.520035667704673, "learning_rate": 9.999370094321431e-06, "loss": 0.5386, "step": 3495 }, { "epoch": 0.3137466065378834, "grad_norm": 0.4852869502716211, "learning_rate": 9.999361779001803e-06, "loss": 0.4397, "step": 3496 }, { "epoch": 0.3138363509905544, "grad_norm": 0.5581111108053441, "learning_rate": 9.999353409160145e-06, "loss": 0.5561, "step": 3497 }, { "epoch": 0.3139260954432254, "grad_norm": 0.522910187736883, "learning_rate": 9.999344984796549e-06, "loss": 0.5029, "step": 3498 }, { "epoch": 0.31401583989589643, "grad_norm": 0.5178558063019876, "learning_rate": 9.999336505911103e-06, "loss": 0.4653, "step": 3499 }, { "epoch": 0.31410558434856745, "grad_norm": 0.5017590936499884, "learning_rate": 9.999327972503902e-06, "loss": 0.4779, "step": 3500 }, { "epoch": 0.31419532880123846, "grad_norm": 0.4822470308940445, "learning_rate": 9.99931938457504e-06, "loss": 0.4681, "step": 3501 }, { "epoch": 0.3142850732539095, "grad_norm": 0.5382490470771888, "learning_rate": 9.99931074212461e-06, "loss": 0.5433, "step": 3502 }, { "epoch": 0.3143748177065805, "grad_norm": 0.5214364114575988, "learning_rate": 9.999302045152705e-06, "loss": 0.5375, "step": 3503 }, { "epoch": 0.3144645621592515, "grad_norm": 0.5332754277731778, "learning_rate": 9.999293293659423e-06, "loss": 0.5654, "step": 3504 }, { "epoch": 0.3145543066119226, "grad_norm": 0.4691362564402262, "learning_rate": 9.999284487644854e-06, "loss": 0.4316, "step": 3505 }, { "epoch": 0.3146440510645936, "grad_norm": 0.47781015370707536, "learning_rate": 9.999275627109097e-06, "loss": 0.4483, "step": 3506 }, { "epoch": 0.3147337955172646, "grad_norm": 0.5096354576757883, "learning_rate": 9.999266712052251e-06, "loss": 0.4967, "step": 3507 }, { "epoch": 0.3148235399699356, "grad_norm": 0.49466290255972256, "learning_rate": 9.99925774247441e-06, "loss": 0.4867, "step": 3508 }, { "epoch": 0.31491328442260663, "grad_norm": 0.47342038521748697, "learning_rate": 9.999248718375673e-06, "loss": 0.4445, "step": 3509 }, { "epoch": 0.31500302887527765, "grad_norm": 0.4837757584221395, "learning_rate": 9.999239639756139e-06, "loss": 0.4801, "step": 3510 }, { "epoch": 0.31509277332794866, "grad_norm": 0.4775737977359385, "learning_rate": 9.999230506615908e-06, "loss": 0.4818, "step": 3511 }, { "epoch": 0.3151825177806197, "grad_norm": 0.47888348152223603, "learning_rate": 9.999221318955074e-06, "loss": 0.4594, "step": 3512 }, { "epoch": 0.3152722622332907, "grad_norm": 0.5401785527415478, "learning_rate": 9.999212076773744e-06, "loss": 0.5146, "step": 3513 }, { "epoch": 0.3153620066859617, "grad_norm": 0.5080296478666649, "learning_rate": 9.999202780072012e-06, "loss": 0.4902, "step": 3514 }, { "epoch": 0.3154517511386327, "grad_norm": 0.5225882317941088, "learning_rate": 9.999193428849988e-06, "loss": 0.556, "step": 3515 }, { "epoch": 0.3155414955913038, "grad_norm": 0.4632918247226631, "learning_rate": 9.999184023107766e-06, "loss": 0.4261, "step": 3516 }, { "epoch": 0.3156312400439748, "grad_norm": 0.4646950748798244, "learning_rate": 9.999174562845453e-06, "loss": 0.4472, "step": 3517 }, { "epoch": 0.3157209844966458, "grad_norm": 0.4847963250664037, "learning_rate": 9.999165048063152e-06, "loss": 0.4721, "step": 3518 }, { "epoch": 0.31581072894931683, "grad_norm": 0.5037060330528639, "learning_rate": 9.999155478760965e-06, "loss": 0.5136, "step": 3519 }, { "epoch": 0.31590047340198785, "grad_norm": 0.5075583984297591, "learning_rate": 9.999145854938996e-06, "loss": 0.4969, "step": 3520 }, { "epoch": 0.31599021785465886, "grad_norm": 0.4883486867055792, "learning_rate": 9.999136176597353e-06, "loss": 0.474, "step": 3521 }, { "epoch": 0.3160799623073299, "grad_norm": 0.5090825891472265, "learning_rate": 9.999126443736138e-06, "loss": 0.4944, "step": 3522 }, { "epoch": 0.3161697067600009, "grad_norm": 0.4355261195755429, "learning_rate": 9.999116656355459e-06, "loss": 0.4056, "step": 3523 }, { "epoch": 0.3162594512126719, "grad_norm": 0.4651167279000681, "learning_rate": 9.999106814455423e-06, "loss": 0.4623, "step": 3524 }, { "epoch": 0.3163491956653429, "grad_norm": 0.5320485190778851, "learning_rate": 9.999096918036138e-06, "loss": 0.5518, "step": 3525 }, { "epoch": 0.31643894011801393, "grad_norm": 0.48322150626211946, "learning_rate": 9.999086967097708e-06, "loss": 0.4717, "step": 3526 }, { "epoch": 0.31652868457068495, "grad_norm": 0.547106877649915, "learning_rate": 9.999076961640247e-06, "loss": 0.5549, "step": 3527 }, { "epoch": 0.316618429023356, "grad_norm": 0.48718616637538653, "learning_rate": 9.999066901663861e-06, "loss": 0.4709, "step": 3528 }, { "epoch": 0.31670817347602703, "grad_norm": 0.5350529059942177, "learning_rate": 9.99905678716866e-06, "loss": 0.4862, "step": 3529 }, { "epoch": 0.31679791792869805, "grad_norm": 0.5436991714027751, "learning_rate": 9.999046618154753e-06, "loss": 0.6021, "step": 3530 }, { "epoch": 0.31688766238136906, "grad_norm": 0.5207077961938656, "learning_rate": 9.999036394622254e-06, "loss": 0.5491, "step": 3531 }, { "epoch": 0.3169774068340401, "grad_norm": 0.5050998418086661, "learning_rate": 9.999026116571273e-06, "loss": 0.5198, "step": 3532 }, { "epoch": 0.3170671512867111, "grad_norm": 0.5159016214678906, "learning_rate": 9.99901578400192e-06, "loss": 0.5123, "step": 3533 }, { "epoch": 0.3171568957393821, "grad_norm": 0.5534277266805337, "learning_rate": 9.999005396914312e-06, "loss": 0.5602, "step": 3534 }, { "epoch": 0.3172466401920531, "grad_norm": 0.5128415743685488, "learning_rate": 9.998994955308558e-06, "loss": 0.5578, "step": 3535 }, { "epoch": 0.31733638464472413, "grad_norm": 0.5236740048699513, "learning_rate": 9.998984459184773e-06, "loss": 0.5591, "step": 3536 }, { "epoch": 0.31742612909739515, "grad_norm": 0.5045390889862144, "learning_rate": 9.998973908543075e-06, "loss": 0.4684, "step": 3537 }, { "epoch": 0.31751587355006616, "grad_norm": 0.5295743304498304, "learning_rate": 9.998963303383575e-06, "loss": 0.5165, "step": 3538 }, { "epoch": 0.31760561800273723, "grad_norm": 0.5424854539491865, "learning_rate": 9.99895264370639e-06, "loss": 0.5398, "step": 3539 }, { "epoch": 0.31769536245540825, "grad_norm": 0.5152907883256221, "learning_rate": 9.998941929511636e-06, "loss": 0.5529, "step": 3540 }, { "epoch": 0.31778510690807926, "grad_norm": 0.507308695611168, "learning_rate": 9.99893116079943e-06, "loss": 0.4817, "step": 3541 }, { "epoch": 0.3178748513607503, "grad_norm": 0.5217730261436307, "learning_rate": 9.99892033756989e-06, "loss": 0.464, "step": 3542 }, { "epoch": 0.3179645958134213, "grad_norm": 0.5230615391412309, "learning_rate": 9.998909459823135e-06, "loss": 0.5096, "step": 3543 }, { "epoch": 0.3180543402660923, "grad_norm": 0.4685937459105773, "learning_rate": 9.99889852755928e-06, "loss": 0.4597, "step": 3544 }, { "epoch": 0.3181440847187633, "grad_norm": 0.4665372746921635, "learning_rate": 9.998887540778446e-06, "loss": 0.4646, "step": 3545 }, { "epoch": 0.31823382917143433, "grad_norm": 0.4997189209106231, "learning_rate": 9.998876499480754e-06, "loss": 0.4733, "step": 3546 }, { "epoch": 0.31832357362410535, "grad_norm": 0.49082269697643527, "learning_rate": 9.998865403666324e-06, "loss": 0.5085, "step": 3547 }, { "epoch": 0.31841331807677636, "grad_norm": 0.5183139920959738, "learning_rate": 9.998854253335277e-06, "loss": 0.4813, "step": 3548 }, { "epoch": 0.3185030625294474, "grad_norm": 0.5267348186233308, "learning_rate": 9.998843048487733e-06, "loss": 0.5177, "step": 3549 }, { "epoch": 0.3185928069821184, "grad_norm": 0.4971218801267972, "learning_rate": 9.998831789123815e-06, "loss": 0.528, "step": 3550 }, { "epoch": 0.31868255143478946, "grad_norm": 0.47565651617245935, "learning_rate": 9.998820475243649e-06, "loss": 0.4625, "step": 3551 }, { "epoch": 0.3187722958874605, "grad_norm": 0.4381448358599306, "learning_rate": 9.998809106847353e-06, "loss": 0.4125, "step": 3552 }, { "epoch": 0.3188620403401315, "grad_norm": 0.5129926144624767, "learning_rate": 9.998797683935056e-06, "loss": 0.5013, "step": 3553 }, { "epoch": 0.3189517847928025, "grad_norm": 0.46730658810822817, "learning_rate": 9.998786206506878e-06, "loss": 0.4107, "step": 3554 }, { "epoch": 0.3190415292454735, "grad_norm": 0.5113478263198761, "learning_rate": 9.998774674562945e-06, "loss": 0.5311, "step": 3555 }, { "epoch": 0.31913127369814454, "grad_norm": 0.49365312958637797, "learning_rate": 9.998763088103387e-06, "loss": 0.4662, "step": 3556 }, { "epoch": 0.31922101815081555, "grad_norm": 0.4762564455745854, "learning_rate": 9.998751447128328e-06, "loss": 0.4773, "step": 3557 }, { "epoch": 0.31931076260348656, "grad_norm": 0.5242984261595173, "learning_rate": 9.99873975163789e-06, "loss": 0.4791, "step": 3558 }, { "epoch": 0.3194005070561576, "grad_norm": 0.5234662244051279, "learning_rate": 9.99872800163221e-06, "loss": 0.5484, "step": 3559 }, { "epoch": 0.3194902515088286, "grad_norm": 0.486604269914302, "learning_rate": 9.99871619711141e-06, "loss": 0.5325, "step": 3560 }, { "epoch": 0.3195799959614996, "grad_norm": 0.4691559046849344, "learning_rate": 9.998704338075617e-06, "loss": 0.4546, "step": 3561 }, { "epoch": 0.3196697404141706, "grad_norm": 0.5271252589609301, "learning_rate": 9.998692424524967e-06, "loss": 0.5468, "step": 3562 }, { "epoch": 0.3197594848668417, "grad_norm": 0.4954685854339204, "learning_rate": 9.998680456459584e-06, "loss": 0.4547, "step": 3563 }, { "epoch": 0.3198492293195127, "grad_norm": 0.4883152821699957, "learning_rate": 9.998668433879602e-06, "loss": 0.4655, "step": 3564 }, { "epoch": 0.3199389737721837, "grad_norm": 0.4951217841180322, "learning_rate": 9.998656356785147e-06, "loss": 0.4843, "step": 3565 }, { "epoch": 0.32002871822485474, "grad_norm": 0.46971659278954814, "learning_rate": 9.998644225176358e-06, "loss": 0.4631, "step": 3566 }, { "epoch": 0.32011846267752575, "grad_norm": 0.4717088051653059, "learning_rate": 9.998632039053364e-06, "loss": 0.4281, "step": 3567 }, { "epoch": 0.32020820713019676, "grad_norm": 0.4790603651905578, "learning_rate": 9.998619798416295e-06, "loss": 0.4655, "step": 3568 }, { "epoch": 0.3202979515828678, "grad_norm": 0.5126320729924679, "learning_rate": 9.99860750326529e-06, "loss": 0.48, "step": 3569 }, { "epoch": 0.3203876960355388, "grad_norm": 0.4812148884615563, "learning_rate": 9.99859515360048e-06, "loss": 0.3975, "step": 3570 }, { "epoch": 0.3204774404882098, "grad_norm": 0.47772339427079136, "learning_rate": 9.998582749421997e-06, "loss": 0.5014, "step": 3571 }, { "epoch": 0.3205671849408808, "grad_norm": 0.5012906842804342, "learning_rate": 9.998570290729982e-06, "loss": 0.4745, "step": 3572 }, { "epoch": 0.32065692939355184, "grad_norm": 0.46641936709367066, "learning_rate": 9.998557777524567e-06, "loss": 0.4801, "step": 3573 }, { "epoch": 0.3207466738462229, "grad_norm": 0.49850519069928995, "learning_rate": 9.99854520980589e-06, "loss": 0.4967, "step": 3574 }, { "epoch": 0.3208364182988939, "grad_norm": 0.4482490555646261, "learning_rate": 9.998532587574087e-06, "loss": 0.4519, "step": 3575 }, { "epoch": 0.32092616275156494, "grad_norm": 0.5205565683518593, "learning_rate": 9.998519910829297e-06, "loss": 0.51, "step": 3576 }, { "epoch": 0.32101590720423595, "grad_norm": 0.525404145066959, "learning_rate": 9.998507179571657e-06, "loss": 0.4549, "step": 3577 }, { "epoch": 0.32110565165690697, "grad_norm": 0.5229557429839129, "learning_rate": 9.998494393801307e-06, "loss": 0.5311, "step": 3578 }, { "epoch": 0.321195396109578, "grad_norm": 0.5217632684589245, "learning_rate": 9.998481553518385e-06, "loss": 0.4798, "step": 3579 }, { "epoch": 0.321285140562249, "grad_norm": 0.4615658688503485, "learning_rate": 9.998468658723032e-06, "loss": 0.4704, "step": 3580 }, { "epoch": 0.32137488501492, "grad_norm": 0.4782558488675695, "learning_rate": 9.998455709415389e-06, "loss": 0.5055, "step": 3581 }, { "epoch": 0.321464629467591, "grad_norm": 0.510248029504776, "learning_rate": 9.998442705595596e-06, "loss": 0.5435, "step": 3582 }, { "epoch": 0.32155437392026204, "grad_norm": 0.5000399854637008, "learning_rate": 9.998429647263796e-06, "loss": 0.5396, "step": 3583 }, { "epoch": 0.32164411837293305, "grad_norm": 0.47626107864770745, "learning_rate": 9.99841653442013e-06, "loss": 0.4513, "step": 3584 }, { "epoch": 0.32173386282560407, "grad_norm": 0.48978130133775744, "learning_rate": 9.998403367064744e-06, "loss": 0.5143, "step": 3585 }, { "epoch": 0.32182360727827514, "grad_norm": 0.5049429093535142, "learning_rate": 9.998390145197778e-06, "loss": 0.5124, "step": 3586 }, { "epoch": 0.32191335173094615, "grad_norm": 0.5491100044053899, "learning_rate": 9.998376868819377e-06, "loss": 0.6077, "step": 3587 }, { "epoch": 0.32200309618361717, "grad_norm": 0.49571296837526696, "learning_rate": 9.998363537929687e-06, "loss": 0.5224, "step": 3588 }, { "epoch": 0.3220928406362882, "grad_norm": 0.49577252825990664, "learning_rate": 9.998350152528852e-06, "loss": 0.4694, "step": 3589 }, { "epoch": 0.3221825850889592, "grad_norm": 0.49046874656820716, "learning_rate": 9.998336712617022e-06, "loss": 0.4932, "step": 3590 }, { "epoch": 0.3222723295416302, "grad_norm": 0.4621070035786717, "learning_rate": 9.998323218194339e-06, "loss": 0.4694, "step": 3591 }, { "epoch": 0.3223620739943012, "grad_norm": 0.447940456699236, "learning_rate": 9.99830966926095e-06, "loss": 0.3948, "step": 3592 }, { "epoch": 0.32245181844697224, "grad_norm": 0.4628150218160442, "learning_rate": 9.998296065817006e-06, "loss": 0.4416, "step": 3593 }, { "epoch": 0.32254156289964325, "grad_norm": 0.47884066655613566, "learning_rate": 9.998282407862654e-06, "loss": 0.4565, "step": 3594 }, { "epoch": 0.32263130735231427, "grad_norm": 0.4561112095944482, "learning_rate": 9.998268695398042e-06, "loss": 0.4072, "step": 3595 }, { "epoch": 0.3227210518049853, "grad_norm": 0.4756621509943659, "learning_rate": 9.99825492842332e-06, "loss": 0.4605, "step": 3596 }, { "epoch": 0.3228107962576563, "grad_norm": 0.4726521788065401, "learning_rate": 9.998241106938637e-06, "loss": 0.4253, "step": 3597 }, { "epoch": 0.32290054071032737, "grad_norm": 0.47484723982627275, "learning_rate": 9.998227230944147e-06, "loss": 0.4483, "step": 3598 }, { "epoch": 0.3229902851629984, "grad_norm": 0.5050801205277674, "learning_rate": 9.998213300439999e-06, "loss": 0.5127, "step": 3599 }, { "epoch": 0.3230800296156694, "grad_norm": 0.5293408951442989, "learning_rate": 9.998199315426346e-06, "loss": 0.5348, "step": 3600 }, { "epoch": 0.3231697740683404, "grad_norm": 0.45602982873350695, "learning_rate": 9.998185275903338e-06, "loss": 0.4024, "step": 3601 }, { "epoch": 0.3232595185210114, "grad_norm": 0.4716638400736757, "learning_rate": 9.998171181871133e-06, "loss": 0.4478, "step": 3602 }, { "epoch": 0.32334926297368244, "grad_norm": 0.5471951876573585, "learning_rate": 9.998157033329878e-06, "loss": 0.5302, "step": 3603 }, { "epoch": 0.32343900742635345, "grad_norm": 0.5194459441708204, "learning_rate": 9.998142830279733e-06, "loss": 0.4916, "step": 3604 }, { "epoch": 0.32352875187902447, "grad_norm": 0.49353224892818287, "learning_rate": 9.998128572720851e-06, "loss": 0.4675, "step": 3605 }, { "epoch": 0.3236184963316955, "grad_norm": 0.5093238007546269, "learning_rate": 9.998114260653388e-06, "loss": 0.4978, "step": 3606 }, { "epoch": 0.3237082407843665, "grad_norm": 0.5220107677040616, "learning_rate": 9.998099894077498e-06, "loss": 0.5609, "step": 3607 }, { "epoch": 0.3237979852370375, "grad_norm": 0.5090748811979067, "learning_rate": 9.998085472993339e-06, "loss": 0.4874, "step": 3608 }, { "epoch": 0.3238877296897086, "grad_norm": 0.4695029281393856, "learning_rate": 9.99807099740107e-06, "loss": 0.4741, "step": 3609 }, { "epoch": 0.3239774741423796, "grad_norm": 0.5159902358706995, "learning_rate": 9.998056467300846e-06, "loss": 0.484, "step": 3610 }, { "epoch": 0.3240672185950506, "grad_norm": 0.49664057534849837, "learning_rate": 9.998041882692828e-06, "loss": 0.4547, "step": 3611 }, { "epoch": 0.3241569630477216, "grad_norm": 0.5087632350983906, "learning_rate": 9.998027243577174e-06, "loss": 0.543, "step": 3612 }, { "epoch": 0.32424670750039264, "grad_norm": 0.47363352530103614, "learning_rate": 9.998012549954042e-06, "loss": 0.4538, "step": 3613 }, { "epoch": 0.32433645195306365, "grad_norm": 0.45957307938216435, "learning_rate": 9.997997801823594e-06, "loss": 0.4436, "step": 3614 }, { "epoch": 0.32442619640573467, "grad_norm": 0.5038953617194157, "learning_rate": 9.99798299918599e-06, "loss": 0.4999, "step": 3615 }, { "epoch": 0.3245159408584057, "grad_norm": 0.5203292810248359, "learning_rate": 9.997968142041392e-06, "loss": 0.4893, "step": 3616 }, { "epoch": 0.3246056853110767, "grad_norm": 0.4755946725416721, "learning_rate": 9.997953230389963e-06, "loss": 0.444, "step": 3617 }, { "epoch": 0.3246954297637477, "grad_norm": 0.4881244344736457, "learning_rate": 9.997938264231864e-06, "loss": 0.4039, "step": 3618 }, { "epoch": 0.3247851742164187, "grad_norm": 0.48115300768852826, "learning_rate": 9.997923243567257e-06, "loss": 0.4546, "step": 3619 }, { "epoch": 0.32487491866908974, "grad_norm": 0.49703801808282144, "learning_rate": 9.997908168396311e-06, "loss": 0.4636, "step": 3620 }, { "epoch": 0.3249646631217608, "grad_norm": 0.48628590723222176, "learning_rate": 9.997893038719186e-06, "loss": 0.4831, "step": 3621 }, { "epoch": 0.3250544075744318, "grad_norm": 0.5250412919424152, "learning_rate": 9.997877854536048e-06, "loss": 0.5135, "step": 3622 }, { "epoch": 0.32514415202710284, "grad_norm": 0.48103752570236874, "learning_rate": 9.997862615847063e-06, "loss": 0.3957, "step": 3623 }, { "epoch": 0.32523389647977385, "grad_norm": 0.5256184543434324, "learning_rate": 9.997847322652395e-06, "loss": 0.5018, "step": 3624 }, { "epoch": 0.32532364093244487, "grad_norm": 0.49524359357368325, "learning_rate": 9.997831974952214e-06, "loss": 0.4798, "step": 3625 }, { "epoch": 0.3254133853851159, "grad_norm": 0.488511831882348, "learning_rate": 9.997816572746686e-06, "loss": 0.4735, "step": 3626 }, { "epoch": 0.3255031298377869, "grad_norm": 0.5019656665253357, "learning_rate": 9.997801116035979e-06, "loss": 0.519, "step": 3627 }, { "epoch": 0.3255928742904579, "grad_norm": 0.45559886277012823, "learning_rate": 9.997785604820261e-06, "loss": 0.4143, "step": 3628 }, { "epoch": 0.3256826187431289, "grad_norm": 0.5411389195692783, "learning_rate": 9.997770039099702e-06, "loss": 0.5391, "step": 3629 }, { "epoch": 0.32577236319579994, "grad_norm": 0.5310036200325396, "learning_rate": 9.997754418874472e-06, "loss": 0.49, "step": 3630 }, { "epoch": 0.32586210764847096, "grad_norm": 0.7197434324970351, "learning_rate": 9.997738744144741e-06, "loss": 0.5001, "step": 3631 }, { "epoch": 0.325951852101142, "grad_norm": 0.436830642743409, "learning_rate": 9.997723014910679e-06, "loss": 0.4136, "step": 3632 }, { "epoch": 0.32604159655381304, "grad_norm": 0.5441435427492861, "learning_rate": 9.997707231172457e-06, "loss": 0.5343, "step": 3633 }, { "epoch": 0.32613134100648405, "grad_norm": 0.5129717688581826, "learning_rate": 9.99769139293025e-06, "loss": 0.5163, "step": 3634 }, { "epoch": 0.32622108545915507, "grad_norm": 0.4563445540344854, "learning_rate": 9.99767550018423e-06, "loss": 0.4297, "step": 3635 }, { "epoch": 0.3263108299118261, "grad_norm": 0.460713444498111, "learning_rate": 9.997659552934569e-06, "loss": 0.4354, "step": 3636 }, { "epoch": 0.3264005743644971, "grad_norm": 0.48771486693628996, "learning_rate": 9.99764355118144e-06, "loss": 0.508, "step": 3637 }, { "epoch": 0.3264903188171681, "grad_norm": 0.5079444737873239, "learning_rate": 9.99762749492502e-06, "loss": 0.5264, "step": 3638 }, { "epoch": 0.3265800632698391, "grad_norm": 0.4853987368754163, "learning_rate": 9.997611384165484e-06, "loss": 0.4528, "step": 3639 }, { "epoch": 0.32666980772251014, "grad_norm": 0.49476133282786683, "learning_rate": 9.997595218903004e-06, "loss": 0.5125, "step": 3640 }, { "epoch": 0.32675955217518116, "grad_norm": 0.4869142646947067, "learning_rate": 9.997578999137761e-06, "loss": 0.5031, "step": 3641 }, { "epoch": 0.32684929662785217, "grad_norm": 0.518708913077524, "learning_rate": 9.997562724869929e-06, "loss": 0.4574, "step": 3642 }, { "epoch": 0.3269390410805232, "grad_norm": 0.5177402417125431, "learning_rate": 9.997546396099687e-06, "loss": 0.4938, "step": 3643 }, { "epoch": 0.32702878553319426, "grad_norm": 0.5026312204586745, "learning_rate": 9.997530012827212e-06, "loss": 0.4782, "step": 3644 }, { "epoch": 0.32711852998586527, "grad_norm": 0.48273551744718973, "learning_rate": 9.997513575052685e-06, "loss": 0.4836, "step": 3645 }, { "epoch": 0.3272082744385363, "grad_norm": 0.49016278514691386, "learning_rate": 9.997497082776282e-06, "loss": 0.4524, "step": 3646 }, { "epoch": 0.3272980188912073, "grad_norm": 0.4847467244860986, "learning_rate": 9.997480535998182e-06, "loss": 0.4497, "step": 3647 }, { "epoch": 0.3273877633438783, "grad_norm": 0.5111729806890503, "learning_rate": 9.997463934718571e-06, "loss": 0.5009, "step": 3648 }, { "epoch": 0.3274775077965493, "grad_norm": 0.4666800212223, "learning_rate": 9.997447278937626e-06, "loss": 0.4448, "step": 3649 }, { "epoch": 0.32756725224922034, "grad_norm": 0.48663002060297345, "learning_rate": 9.997430568655529e-06, "loss": 0.4773, "step": 3650 }, { "epoch": 0.32765699670189136, "grad_norm": 0.4988394597527181, "learning_rate": 9.997413803872464e-06, "loss": 0.4793, "step": 3651 }, { "epoch": 0.32774674115456237, "grad_norm": 0.5301282790040001, "learning_rate": 9.99739698458861e-06, "loss": 0.5357, "step": 3652 }, { "epoch": 0.3278364856072334, "grad_norm": 0.5049550578851287, "learning_rate": 9.997380110804155e-06, "loss": 0.5115, "step": 3653 }, { "epoch": 0.3279262300599044, "grad_norm": 0.5400485252705886, "learning_rate": 9.99736318251928e-06, "loss": 0.5913, "step": 3654 }, { "epoch": 0.3280159745125754, "grad_norm": 0.5247406400498386, "learning_rate": 9.99734619973417e-06, "loss": 0.4937, "step": 3655 }, { "epoch": 0.3281057189652465, "grad_norm": 0.4991842299685514, "learning_rate": 9.997329162449013e-06, "loss": 0.4508, "step": 3656 }, { "epoch": 0.3281954634179175, "grad_norm": 0.4615854778504511, "learning_rate": 9.99731207066399e-06, "loss": 0.4417, "step": 3657 }, { "epoch": 0.3282852078705885, "grad_norm": 0.4799779381092766, "learning_rate": 9.99729492437929e-06, "loss": 0.4884, "step": 3658 }, { "epoch": 0.32837495232325953, "grad_norm": 0.49965102972325065, "learning_rate": 9.997277723595102e-06, "loss": 0.4873, "step": 3659 }, { "epoch": 0.32846469677593054, "grad_norm": 0.47912111832120674, "learning_rate": 9.997260468311612e-06, "loss": 0.4517, "step": 3660 }, { "epoch": 0.32855444122860156, "grad_norm": 0.4841215368242534, "learning_rate": 9.997243158529005e-06, "loss": 0.4543, "step": 3661 }, { "epoch": 0.32864418568127257, "grad_norm": 0.498559916087592, "learning_rate": 9.997225794247473e-06, "loss": 0.5065, "step": 3662 }, { "epoch": 0.3287339301339436, "grad_norm": 0.5170698629280305, "learning_rate": 9.997208375467207e-06, "loss": 0.4928, "step": 3663 }, { "epoch": 0.3288236745866146, "grad_norm": 0.4739365778951504, "learning_rate": 9.997190902188395e-06, "loss": 0.4863, "step": 3664 }, { "epoch": 0.3289134190392856, "grad_norm": 0.5262425017534018, "learning_rate": 9.997173374411223e-06, "loss": 0.5153, "step": 3665 }, { "epoch": 0.32900316349195663, "grad_norm": 0.5414455542753613, "learning_rate": 9.997155792135892e-06, "loss": 0.5873, "step": 3666 }, { "epoch": 0.3290929079446277, "grad_norm": 0.5351694741933253, "learning_rate": 9.997138155362585e-06, "loss": 0.488, "step": 3667 }, { "epoch": 0.3291826523972987, "grad_norm": 0.4967509051712156, "learning_rate": 9.997120464091499e-06, "loss": 0.4674, "step": 3668 }, { "epoch": 0.32927239684996973, "grad_norm": 0.5038707394104289, "learning_rate": 9.997102718322825e-06, "loss": 0.4485, "step": 3669 }, { "epoch": 0.32936214130264074, "grad_norm": 0.4889262231778397, "learning_rate": 9.997084918056757e-06, "loss": 0.4724, "step": 3670 }, { "epoch": 0.32945188575531176, "grad_norm": 0.5408158312586371, "learning_rate": 9.99706706329349e-06, "loss": 0.514, "step": 3671 }, { "epoch": 0.32954163020798277, "grad_norm": 0.5297237876587554, "learning_rate": 9.997049154033216e-06, "loss": 0.5235, "step": 3672 }, { "epoch": 0.3296313746606538, "grad_norm": 0.512006367501591, "learning_rate": 9.997031190276134e-06, "loss": 0.5212, "step": 3673 }, { "epoch": 0.3297211191133248, "grad_norm": 0.4729060034200244, "learning_rate": 9.997013172022439e-06, "loss": 0.3948, "step": 3674 }, { "epoch": 0.3298108635659958, "grad_norm": 0.5004073174806355, "learning_rate": 9.996995099272325e-06, "loss": 0.436, "step": 3675 }, { "epoch": 0.32990060801866683, "grad_norm": 0.4700566693579742, "learning_rate": 9.996976972025991e-06, "loss": 0.4541, "step": 3676 }, { "epoch": 0.32999035247133784, "grad_norm": 0.4544624596228173, "learning_rate": 9.996958790283635e-06, "loss": 0.4415, "step": 3677 }, { "epoch": 0.33008009692400886, "grad_norm": 0.5145296056685753, "learning_rate": 9.996940554045455e-06, "loss": 0.5483, "step": 3678 }, { "epoch": 0.33016984137667993, "grad_norm": 0.5024712797051497, "learning_rate": 9.99692226331165e-06, "loss": 0.5007, "step": 3679 }, { "epoch": 0.33025958582935094, "grad_norm": 0.5669279059329854, "learning_rate": 9.996903918082419e-06, "loss": 0.5462, "step": 3680 }, { "epoch": 0.33034933028202196, "grad_norm": 0.4889163911950141, "learning_rate": 9.996885518357961e-06, "loss": 0.4463, "step": 3681 }, { "epoch": 0.330439074734693, "grad_norm": 0.5417831694596521, "learning_rate": 9.996867064138479e-06, "loss": 0.5276, "step": 3682 }, { "epoch": 0.330528819187364, "grad_norm": 0.49041177012995013, "learning_rate": 9.996848555424173e-06, "loss": 0.4453, "step": 3683 }, { "epoch": 0.330618563640035, "grad_norm": 0.4750371357831901, "learning_rate": 9.996829992215245e-06, "loss": 0.4512, "step": 3684 }, { "epoch": 0.330708308092706, "grad_norm": 0.48244210873615445, "learning_rate": 9.996811374511896e-06, "loss": 0.4852, "step": 3685 }, { "epoch": 0.33079805254537703, "grad_norm": 0.5115865233130431, "learning_rate": 9.996792702314331e-06, "loss": 0.5538, "step": 3686 }, { "epoch": 0.33088779699804804, "grad_norm": 0.5162220377238694, "learning_rate": 9.996773975622755e-06, "loss": 0.5151, "step": 3687 }, { "epoch": 0.33097754145071906, "grad_norm": 0.45656320421111385, "learning_rate": 9.99675519443737e-06, "loss": 0.4513, "step": 3688 }, { "epoch": 0.3310672859033901, "grad_norm": 0.5184529233655806, "learning_rate": 9.99673635875838e-06, "loss": 0.5659, "step": 3689 }, { "epoch": 0.3311570303560611, "grad_norm": 0.5110227305604216, "learning_rate": 9.99671746858599e-06, "loss": 0.5404, "step": 3690 }, { "epoch": 0.33124677480873216, "grad_norm": 0.5127130239583044, "learning_rate": 9.99669852392041e-06, "loss": 0.4793, "step": 3691 }, { "epoch": 0.3313365192614032, "grad_norm": 0.5186630485518984, "learning_rate": 9.996679524761844e-06, "loss": 0.5019, "step": 3692 }, { "epoch": 0.3314262637140742, "grad_norm": 0.502997736707097, "learning_rate": 9.9966604711105e-06, "loss": 0.4735, "step": 3693 }, { "epoch": 0.3315160081667452, "grad_norm": 0.5166913370945686, "learning_rate": 9.996641362966583e-06, "loss": 0.4827, "step": 3694 }, { "epoch": 0.3316057526194162, "grad_norm": 0.5130451472218475, "learning_rate": 9.996622200330304e-06, "loss": 0.5036, "step": 3695 }, { "epoch": 0.33169549707208723, "grad_norm": 0.48636209286764326, "learning_rate": 9.996602983201873e-06, "loss": 0.4783, "step": 3696 }, { "epoch": 0.33178524152475825, "grad_norm": 0.5222663168450478, "learning_rate": 9.996583711581498e-06, "loss": 0.55, "step": 3697 }, { "epoch": 0.33187498597742926, "grad_norm": 0.5041783281625765, "learning_rate": 9.996564385469388e-06, "loss": 0.513, "step": 3698 }, { "epoch": 0.3319647304301003, "grad_norm": 0.4964024488688456, "learning_rate": 9.996545004865757e-06, "loss": 0.4882, "step": 3699 }, { "epoch": 0.3320544748827713, "grad_norm": 0.511616503207808, "learning_rate": 9.996525569770812e-06, "loss": 0.503, "step": 3700 }, { "epoch": 0.3321442193354423, "grad_norm": 0.5491125247407873, "learning_rate": 9.996506080184768e-06, "loss": 0.4768, "step": 3701 }, { "epoch": 0.3322339637881134, "grad_norm": 0.505661411757389, "learning_rate": 9.996486536107839e-06, "loss": 0.5275, "step": 3702 }, { "epoch": 0.3323237082407844, "grad_norm": 0.5054445534293907, "learning_rate": 9.996466937540234e-06, "loss": 0.4924, "step": 3703 }, { "epoch": 0.3324134526934554, "grad_norm": 0.5118268835351368, "learning_rate": 9.99644728448217e-06, "loss": 0.5007, "step": 3704 }, { "epoch": 0.3325031971461264, "grad_norm": 0.5203927281409462, "learning_rate": 9.99642757693386e-06, "loss": 0.4908, "step": 3705 }, { "epoch": 0.33259294159879743, "grad_norm": 0.46584017375385595, "learning_rate": 9.996407814895518e-06, "loss": 0.4644, "step": 3706 }, { "epoch": 0.33268268605146845, "grad_norm": 0.5551299678611339, "learning_rate": 9.996387998367361e-06, "loss": 0.6037, "step": 3707 }, { "epoch": 0.33277243050413946, "grad_norm": 0.48916460995976097, "learning_rate": 9.996368127349607e-06, "loss": 0.4307, "step": 3708 }, { "epoch": 0.3328621749568105, "grad_norm": 0.4813845099838155, "learning_rate": 9.996348201842467e-06, "loss": 0.4424, "step": 3709 }, { "epoch": 0.3329519194094815, "grad_norm": 0.4810774330827198, "learning_rate": 9.996328221846163e-06, "loss": 0.4432, "step": 3710 }, { "epoch": 0.3330416638621525, "grad_norm": 0.5400937685892152, "learning_rate": 9.996308187360913e-06, "loss": 0.5352, "step": 3711 }, { "epoch": 0.3331314083148235, "grad_norm": 0.47812603201069015, "learning_rate": 9.996288098386932e-06, "loss": 0.4676, "step": 3712 }, { "epoch": 0.33322115276749453, "grad_norm": 0.49020884564730455, "learning_rate": 9.996267954924444e-06, "loss": 0.4566, "step": 3713 }, { "epoch": 0.3333108972201656, "grad_norm": 0.4530431878153158, "learning_rate": 9.996247756973663e-06, "loss": 0.467, "step": 3714 }, { "epoch": 0.3334006416728366, "grad_norm": 0.4860968164510744, "learning_rate": 9.996227504534814e-06, "loss": 0.4639, "step": 3715 }, { "epoch": 0.33349038612550763, "grad_norm": 0.5330328538067833, "learning_rate": 9.996207197608114e-06, "loss": 0.5197, "step": 3716 }, { "epoch": 0.33358013057817865, "grad_norm": 0.4846259535957292, "learning_rate": 9.996186836193788e-06, "loss": 0.4857, "step": 3717 }, { "epoch": 0.33366987503084966, "grad_norm": 0.48256571253560676, "learning_rate": 9.996166420292058e-06, "loss": 0.4573, "step": 3718 }, { "epoch": 0.3337596194835207, "grad_norm": 0.4727930815306409, "learning_rate": 9.996145949903143e-06, "loss": 0.487, "step": 3719 }, { "epoch": 0.3338493639361917, "grad_norm": 0.5692293976270073, "learning_rate": 9.99612542502727e-06, "loss": 0.5919, "step": 3720 }, { "epoch": 0.3339391083888627, "grad_norm": 0.4879181466316751, "learning_rate": 9.996104845664659e-06, "loss": 0.4813, "step": 3721 }, { "epoch": 0.3340288528415337, "grad_norm": 0.48513467335465477, "learning_rate": 9.996084211815537e-06, "loss": 0.4767, "step": 3722 }, { "epoch": 0.33411859729420473, "grad_norm": 0.4667267163146928, "learning_rate": 9.99606352348013e-06, "loss": 0.4278, "step": 3723 }, { "epoch": 0.33420834174687575, "grad_norm": 0.4604949105002181, "learning_rate": 9.996042780658663e-06, "loss": 0.4418, "step": 3724 }, { "epoch": 0.3342980861995468, "grad_norm": 0.5899376142896244, "learning_rate": 9.99602198335136e-06, "loss": 0.6486, "step": 3725 }, { "epoch": 0.33438783065221783, "grad_norm": 0.5107997614979422, "learning_rate": 9.996001131558452e-06, "loss": 0.5139, "step": 3726 }, { "epoch": 0.33447757510488885, "grad_norm": 0.5380891204696783, "learning_rate": 9.995980225280161e-06, "loss": 0.5293, "step": 3727 }, { "epoch": 0.33456731955755986, "grad_norm": 0.44976529313909813, "learning_rate": 9.995959264516719e-06, "loss": 0.4115, "step": 3728 }, { "epoch": 0.3346570640102309, "grad_norm": 0.5216303147989283, "learning_rate": 9.995938249268353e-06, "loss": 0.5062, "step": 3729 }, { "epoch": 0.3347468084629019, "grad_norm": 0.4826521846125581, "learning_rate": 9.995917179535293e-06, "loss": 0.462, "step": 3730 }, { "epoch": 0.3348365529155729, "grad_norm": 0.451583121918391, "learning_rate": 9.995896055317768e-06, "loss": 0.4261, "step": 3731 }, { "epoch": 0.3349262973682439, "grad_norm": 0.5076406477822689, "learning_rate": 9.99587487661601e-06, "loss": 0.4901, "step": 3732 }, { "epoch": 0.33501604182091493, "grad_norm": 0.47569481648402273, "learning_rate": 9.995853643430247e-06, "loss": 0.4548, "step": 3733 }, { "epoch": 0.33510578627358595, "grad_norm": 0.5087090825209654, "learning_rate": 9.995832355760713e-06, "loss": 0.4939, "step": 3734 }, { "epoch": 0.33519553072625696, "grad_norm": 0.4938820312298494, "learning_rate": 9.995811013607639e-06, "loss": 0.4304, "step": 3735 }, { "epoch": 0.335285275178928, "grad_norm": 0.4963638576026584, "learning_rate": 9.995789616971258e-06, "loss": 0.4892, "step": 3736 }, { "epoch": 0.33537501963159905, "grad_norm": 0.49801987350546967, "learning_rate": 9.995768165851807e-06, "loss": 0.4707, "step": 3737 }, { "epoch": 0.33546476408427006, "grad_norm": 0.49746086849242593, "learning_rate": 9.995746660249513e-06, "loss": 0.4972, "step": 3738 }, { "epoch": 0.3355545085369411, "grad_norm": 0.49025812836206634, "learning_rate": 9.995725100164614e-06, "loss": 0.4948, "step": 3739 }, { "epoch": 0.3356442529896121, "grad_norm": 0.5277106976246867, "learning_rate": 9.995703485597346e-06, "loss": 0.493, "step": 3740 }, { "epoch": 0.3357339974422831, "grad_norm": 0.4898324654431945, "learning_rate": 9.995681816547945e-06, "loss": 0.5234, "step": 3741 }, { "epoch": 0.3358237418949541, "grad_norm": 0.5437386304028461, "learning_rate": 9.995660093016644e-06, "loss": 0.5506, "step": 3742 }, { "epoch": 0.33591348634762513, "grad_norm": 0.4615300855730376, "learning_rate": 9.995638315003683e-06, "loss": 0.4372, "step": 3743 }, { "epoch": 0.33600323080029615, "grad_norm": 0.5117382510907812, "learning_rate": 9.995616482509298e-06, "loss": 0.5099, "step": 3744 }, { "epoch": 0.33609297525296716, "grad_norm": 0.48054402062098645, "learning_rate": 9.995594595533729e-06, "loss": 0.4648, "step": 3745 }, { "epoch": 0.3361827197056382, "grad_norm": 0.475814174187611, "learning_rate": 9.995572654077212e-06, "loss": 0.4695, "step": 3746 }, { "epoch": 0.3362724641583092, "grad_norm": 0.521116187083403, "learning_rate": 9.99555065813999e-06, "loss": 0.5057, "step": 3747 }, { "epoch": 0.3363622086109802, "grad_norm": 0.49950130747693317, "learning_rate": 9.995528607722298e-06, "loss": 0.4834, "step": 3748 }, { "epoch": 0.3364519530636513, "grad_norm": 0.5504572893744845, "learning_rate": 9.995506502824381e-06, "loss": 0.5136, "step": 3749 }, { "epoch": 0.3365416975163223, "grad_norm": 0.4653620567958048, "learning_rate": 9.995484343446477e-06, "loss": 0.4695, "step": 3750 }, { "epoch": 0.3366314419689933, "grad_norm": 0.5332184448561069, "learning_rate": 9.995462129588828e-06, "loss": 0.5386, "step": 3751 }, { "epoch": 0.3367211864216643, "grad_norm": 0.5357122576916153, "learning_rate": 9.995439861251679e-06, "loss": 0.5484, "step": 3752 }, { "epoch": 0.33681093087433533, "grad_norm": 0.4693397475852552, "learning_rate": 9.99541753843527e-06, "loss": 0.478, "step": 3753 }, { "epoch": 0.33690067532700635, "grad_norm": 0.5319711970216447, "learning_rate": 9.995395161139845e-06, "loss": 0.5497, "step": 3754 }, { "epoch": 0.33699041977967736, "grad_norm": 0.5029213245127169, "learning_rate": 9.995372729365649e-06, "loss": 0.491, "step": 3755 }, { "epoch": 0.3370801642323484, "grad_norm": 0.5185691807322116, "learning_rate": 9.995350243112927e-06, "loss": 0.5278, "step": 3756 }, { "epoch": 0.3371699086850194, "grad_norm": 0.425041424931347, "learning_rate": 9.99532770238192e-06, "loss": 0.4156, "step": 3757 }, { "epoch": 0.3372596531376904, "grad_norm": 0.4680515017629114, "learning_rate": 9.995305107172881e-06, "loss": 0.4559, "step": 3758 }, { "epoch": 0.3373493975903614, "grad_norm": 0.49107759379381044, "learning_rate": 9.99528245748605e-06, "loss": 0.5022, "step": 3759 }, { "epoch": 0.3374391420430325, "grad_norm": 0.46041596823569697, "learning_rate": 9.995259753321678e-06, "loss": 0.4347, "step": 3760 }, { "epoch": 0.3375288864957035, "grad_norm": 0.48914577753467364, "learning_rate": 9.99523699468001e-06, "loss": 0.486, "step": 3761 }, { "epoch": 0.3376186309483745, "grad_norm": 0.517552985865598, "learning_rate": 9.995214181561296e-06, "loss": 0.5333, "step": 3762 }, { "epoch": 0.33770837540104554, "grad_norm": 0.4511680952579214, "learning_rate": 9.995191313965783e-06, "loss": 0.4313, "step": 3763 }, { "epoch": 0.33779811985371655, "grad_norm": 0.534735234664256, "learning_rate": 9.995168391893723e-06, "loss": 0.5452, "step": 3764 }, { "epoch": 0.33788786430638756, "grad_norm": 0.449848640909819, "learning_rate": 9.995145415345364e-06, "loss": 0.4283, "step": 3765 }, { "epoch": 0.3379776087590586, "grad_norm": 0.46024055068635733, "learning_rate": 9.995122384320956e-06, "loss": 0.4589, "step": 3766 }, { "epoch": 0.3380673532117296, "grad_norm": 0.5103380186739229, "learning_rate": 9.995099298820753e-06, "loss": 0.4961, "step": 3767 }, { "epoch": 0.3381570976644006, "grad_norm": 0.4673200342759106, "learning_rate": 9.995076158845005e-06, "loss": 0.4711, "step": 3768 }, { "epoch": 0.3382468421170716, "grad_norm": 0.4591800453379483, "learning_rate": 9.995052964393964e-06, "loss": 0.4183, "step": 3769 }, { "epoch": 0.33833658656974264, "grad_norm": 0.5480725813105163, "learning_rate": 9.995029715467883e-06, "loss": 0.5, "step": 3770 }, { "epoch": 0.33842633102241365, "grad_norm": 0.4447417807002702, "learning_rate": 9.995006412067014e-06, "loss": 0.4226, "step": 3771 }, { "epoch": 0.3385160754750847, "grad_norm": 0.5175980999465714, "learning_rate": 9.994983054191616e-06, "loss": 0.5119, "step": 3772 }, { "epoch": 0.33860581992775574, "grad_norm": 0.4946430975554593, "learning_rate": 9.99495964184194e-06, "loss": 0.452, "step": 3773 }, { "epoch": 0.33869556438042675, "grad_norm": 0.5465181738425291, "learning_rate": 9.994936175018242e-06, "loss": 0.6321, "step": 3774 }, { "epoch": 0.33878530883309776, "grad_norm": 0.506346200686125, "learning_rate": 9.994912653720777e-06, "loss": 0.4997, "step": 3775 }, { "epoch": 0.3388750532857688, "grad_norm": 0.5059981391283748, "learning_rate": 9.994889077949802e-06, "loss": 0.498, "step": 3776 }, { "epoch": 0.3389647977384398, "grad_norm": 0.5396296253747636, "learning_rate": 9.994865447705577e-06, "loss": 0.5041, "step": 3777 }, { "epoch": 0.3390545421911108, "grad_norm": 0.4803455220208868, "learning_rate": 9.994841762988356e-06, "loss": 0.4914, "step": 3778 }, { "epoch": 0.3391442866437818, "grad_norm": 0.4670427752291253, "learning_rate": 9.994818023798399e-06, "loss": 0.4588, "step": 3779 }, { "epoch": 0.33923403109645284, "grad_norm": 0.5096364504090303, "learning_rate": 9.994794230135965e-06, "loss": 0.4349, "step": 3780 }, { "epoch": 0.33932377554912385, "grad_norm": 0.4697026490250955, "learning_rate": 9.994770382001313e-06, "loss": 0.474, "step": 3781 }, { "epoch": 0.33941352000179487, "grad_norm": 0.4711574697785046, "learning_rate": 9.994746479394702e-06, "loss": 0.4441, "step": 3782 }, { "epoch": 0.3395032644544659, "grad_norm": 0.4821358218154005, "learning_rate": 9.994722522316395e-06, "loss": 0.4736, "step": 3783 }, { "epoch": 0.33959300890713695, "grad_norm": 0.5169807546488049, "learning_rate": 9.994698510766651e-06, "loss": 0.5057, "step": 3784 }, { "epoch": 0.33968275335980797, "grad_norm": 0.4648783557133812, "learning_rate": 9.994674444745733e-06, "loss": 0.4565, "step": 3785 }, { "epoch": 0.339772497812479, "grad_norm": 0.4924779637019771, "learning_rate": 9.994650324253905e-06, "loss": 0.5127, "step": 3786 }, { "epoch": 0.33986224226515, "grad_norm": 0.4729455973828661, "learning_rate": 9.994626149291426e-06, "loss": 0.4568, "step": 3787 }, { "epoch": 0.339951986717821, "grad_norm": 0.4979171375992044, "learning_rate": 9.994601919858563e-06, "loss": 0.4482, "step": 3788 }, { "epoch": 0.340041731170492, "grad_norm": 0.5154065925210297, "learning_rate": 9.99457763595558e-06, "loss": 0.5436, "step": 3789 }, { "epoch": 0.34013147562316304, "grad_norm": 0.5206071324849191, "learning_rate": 9.994553297582741e-06, "loss": 0.4851, "step": 3790 }, { "epoch": 0.34022122007583405, "grad_norm": 0.5167166907241247, "learning_rate": 9.994528904740313e-06, "loss": 0.4878, "step": 3791 }, { "epoch": 0.34031096452850507, "grad_norm": 0.48869521930489923, "learning_rate": 9.994504457428557e-06, "loss": 0.482, "step": 3792 }, { "epoch": 0.3404007089811761, "grad_norm": 0.5030947618025808, "learning_rate": 9.994479955647746e-06, "loss": 0.5178, "step": 3793 }, { "epoch": 0.3404904534338471, "grad_norm": 0.5052347191641487, "learning_rate": 9.994455399398145e-06, "loss": 0.4702, "step": 3794 }, { "epoch": 0.34058019788651817, "grad_norm": 0.48965165423269147, "learning_rate": 9.994430788680018e-06, "loss": 0.4818, "step": 3795 }, { "epoch": 0.3406699423391892, "grad_norm": 0.4978897570558449, "learning_rate": 9.99440612349364e-06, "loss": 0.4934, "step": 3796 }, { "epoch": 0.3407596867918602, "grad_norm": 0.5339920613267953, "learning_rate": 9.994381403839275e-06, "loss": 0.4459, "step": 3797 }, { "epoch": 0.3408494312445312, "grad_norm": 0.47061382042056865, "learning_rate": 9.994356629717195e-06, "loss": 0.4861, "step": 3798 }, { "epoch": 0.3409391756972022, "grad_norm": 0.45923063064021724, "learning_rate": 9.994331801127668e-06, "loss": 0.4765, "step": 3799 }, { "epoch": 0.34102892014987324, "grad_norm": 0.48007058452902235, "learning_rate": 9.994306918070968e-06, "loss": 0.5226, "step": 3800 }, { "epoch": 0.34111866460254425, "grad_norm": 0.45902381918041985, "learning_rate": 9.994281980547364e-06, "loss": 0.482, "step": 3801 }, { "epoch": 0.34120840905521527, "grad_norm": 0.5006385042326451, "learning_rate": 9.994256988557128e-06, "loss": 0.4907, "step": 3802 }, { "epoch": 0.3412981535078863, "grad_norm": 0.5125624993010304, "learning_rate": 9.994231942100533e-06, "loss": 0.4922, "step": 3803 }, { "epoch": 0.3413878979605573, "grad_norm": 0.46054589767706716, "learning_rate": 9.994206841177854e-06, "loss": 0.4313, "step": 3804 }, { "epoch": 0.3414776424132283, "grad_norm": 0.5720975704422406, "learning_rate": 9.99418168578936e-06, "loss": 0.5723, "step": 3805 }, { "epoch": 0.3415673868658993, "grad_norm": 0.5175204470667152, "learning_rate": 9.99415647593533e-06, "loss": 0.5463, "step": 3806 }, { "epoch": 0.3416571313185704, "grad_norm": 0.4526737199267562, "learning_rate": 9.994131211616038e-06, "loss": 0.3961, "step": 3807 }, { "epoch": 0.3417468757712414, "grad_norm": 0.4950048382450881, "learning_rate": 9.994105892831758e-06, "loss": 0.4986, "step": 3808 }, { "epoch": 0.3418366202239124, "grad_norm": 0.48342107217035435, "learning_rate": 9.994080519582766e-06, "loss": 0.4639, "step": 3809 }, { "epoch": 0.34192636467658344, "grad_norm": 0.44397762962902904, "learning_rate": 9.99405509186934e-06, "loss": 0.4007, "step": 3810 }, { "epoch": 0.34201610912925445, "grad_norm": 0.510961622014436, "learning_rate": 9.994029609691758e-06, "loss": 0.4826, "step": 3811 }, { "epoch": 0.34210585358192547, "grad_norm": 0.5254778829768155, "learning_rate": 9.994004073050297e-06, "loss": 0.4791, "step": 3812 }, { "epoch": 0.3421955980345965, "grad_norm": 0.48223590400069427, "learning_rate": 9.993978481945234e-06, "loss": 0.5016, "step": 3813 }, { "epoch": 0.3422853424872675, "grad_norm": 0.5274804827020726, "learning_rate": 9.99395283637685e-06, "loss": 0.4973, "step": 3814 }, { "epoch": 0.3423750869399385, "grad_norm": 0.495853956838692, "learning_rate": 9.993927136345425e-06, "loss": 0.4799, "step": 3815 }, { "epoch": 0.3424648313926095, "grad_norm": 0.5514867296690685, "learning_rate": 9.993901381851237e-06, "loss": 0.545, "step": 3816 }, { "epoch": 0.34255457584528054, "grad_norm": 0.5067203519105725, "learning_rate": 9.993875572894568e-06, "loss": 0.4864, "step": 3817 }, { "epoch": 0.3426443202979516, "grad_norm": 0.4875695269289469, "learning_rate": 9.993849709475701e-06, "loss": 0.4776, "step": 3818 }, { "epoch": 0.3427340647506226, "grad_norm": 0.5148901615203706, "learning_rate": 9.993823791594917e-06, "loss": 0.5431, "step": 3819 }, { "epoch": 0.34282380920329364, "grad_norm": 0.526506528398691, "learning_rate": 9.993797819252497e-06, "loss": 0.5262, "step": 3820 }, { "epoch": 0.34291355365596465, "grad_norm": 0.4566243304065991, "learning_rate": 9.993771792448726e-06, "loss": 0.4315, "step": 3821 }, { "epoch": 0.34300329810863567, "grad_norm": 0.4721116552945224, "learning_rate": 9.99374571118389e-06, "loss": 0.4578, "step": 3822 }, { "epoch": 0.3430930425613067, "grad_norm": 0.5155399524358353, "learning_rate": 9.993719575458268e-06, "loss": 0.4815, "step": 3823 }, { "epoch": 0.3431827870139777, "grad_norm": 0.5440870691812841, "learning_rate": 9.993693385272148e-06, "loss": 0.5522, "step": 3824 }, { "epoch": 0.3432725314666487, "grad_norm": 0.49152078377188074, "learning_rate": 9.993667140625816e-06, "loss": 0.5165, "step": 3825 }, { "epoch": 0.3433622759193197, "grad_norm": 0.5339577704345355, "learning_rate": 9.993640841519559e-06, "loss": 0.5613, "step": 3826 }, { "epoch": 0.34345202037199074, "grad_norm": 0.5099875489883555, "learning_rate": 9.993614487953662e-06, "loss": 0.52, "step": 3827 }, { "epoch": 0.34354176482466176, "grad_norm": 0.5294948998886932, "learning_rate": 9.993588079928413e-06, "loss": 0.5608, "step": 3828 }, { "epoch": 0.34363150927733277, "grad_norm": 0.48113395325824443, "learning_rate": 9.993561617444101e-06, "loss": 0.469, "step": 3829 }, { "epoch": 0.34372125373000384, "grad_norm": 0.5302436725021545, "learning_rate": 9.993535100501012e-06, "loss": 0.5559, "step": 3830 }, { "epoch": 0.34381099818267485, "grad_norm": 0.468767900699538, "learning_rate": 9.993508529099437e-06, "loss": 0.4241, "step": 3831 }, { "epoch": 0.34390074263534587, "grad_norm": 0.4975525169304086, "learning_rate": 9.993481903239667e-06, "loss": 0.5128, "step": 3832 }, { "epoch": 0.3439904870880169, "grad_norm": 0.5277384484731528, "learning_rate": 9.99345522292199e-06, "loss": 0.4811, "step": 3833 }, { "epoch": 0.3440802315406879, "grad_norm": 0.5174398959251518, "learning_rate": 9.993428488146698e-06, "loss": 0.5145, "step": 3834 }, { "epoch": 0.3441699759933589, "grad_norm": 0.4743965078398269, "learning_rate": 9.993401698914084e-06, "loss": 0.4797, "step": 3835 }, { "epoch": 0.3442597204460299, "grad_norm": 0.4269388275181897, "learning_rate": 9.993374855224437e-06, "loss": 0.3824, "step": 3836 }, { "epoch": 0.34434946489870094, "grad_norm": 0.5348272908252759, "learning_rate": 9.993347957078051e-06, "loss": 0.5184, "step": 3837 }, { "epoch": 0.34443920935137196, "grad_norm": 0.47327312874296895, "learning_rate": 9.993321004475221e-06, "loss": 0.4675, "step": 3838 }, { "epoch": 0.34452895380404297, "grad_norm": 0.5155007883356238, "learning_rate": 9.99329399741624e-06, "loss": 0.4778, "step": 3839 }, { "epoch": 0.344618698256714, "grad_norm": 0.5077602441831108, "learning_rate": 9.993266935901403e-06, "loss": 0.5008, "step": 3840 }, { "epoch": 0.344708442709385, "grad_norm": 0.5120318328981963, "learning_rate": 9.993239819931003e-06, "loss": 0.5095, "step": 3841 }, { "epoch": 0.34479818716205607, "grad_norm": 0.4602578117021708, "learning_rate": 9.993212649505337e-06, "loss": 0.4277, "step": 3842 }, { "epoch": 0.3448879316147271, "grad_norm": 0.5104383461135488, "learning_rate": 9.993185424624701e-06, "loss": 0.531, "step": 3843 }, { "epoch": 0.3449776760673981, "grad_norm": 0.4970173977754651, "learning_rate": 9.993158145289394e-06, "loss": 0.4959, "step": 3844 }, { "epoch": 0.3450674205200691, "grad_norm": 0.5043039274572523, "learning_rate": 9.99313081149971e-06, "loss": 0.5213, "step": 3845 }, { "epoch": 0.3451571649727401, "grad_norm": 0.4556385775592553, "learning_rate": 9.993103423255949e-06, "loss": 0.433, "step": 3846 }, { "epoch": 0.34524690942541114, "grad_norm": 0.4838946990468592, "learning_rate": 9.993075980558412e-06, "loss": 0.464, "step": 3847 }, { "epoch": 0.34533665387808216, "grad_norm": 0.5132426557369909, "learning_rate": 9.993048483407395e-06, "loss": 0.4777, "step": 3848 }, { "epoch": 0.34542639833075317, "grad_norm": 0.6023934553413274, "learning_rate": 9.993020931803199e-06, "loss": 0.5442, "step": 3849 }, { "epoch": 0.3455161427834242, "grad_norm": 0.5497324633907229, "learning_rate": 9.992993325746123e-06, "loss": 0.5105, "step": 3850 }, { "epoch": 0.3456058872360952, "grad_norm": 0.5420732597171686, "learning_rate": 9.99296566523647e-06, "loss": 0.5847, "step": 3851 }, { "epoch": 0.3456956316887662, "grad_norm": 0.49090915637052707, "learning_rate": 9.992937950274539e-06, "loss": 0.4729, "step": 3852 }, { "epoch": 0.3457853761414373, "grad_norm": 0.5026154307211125, "learning_rate": 9.992910180860638e-06, "loss": 0.5083, "step": 3853 }, { "epoch": 0.3458751205941083, "grad_norm": 0.4909991749360006, "learning_rate": 9.992882356995064e-06, "loss": 0.4436, "step": 3854 }, { "epoch": 0.3459648650467793, "grad_norm": 0.5960692946340939, "learning_rate": 9.992854478678122e-06, "loss": 0.5822, "step": 3855 }, { "epoch": 0.3460546094994503, "grad_norm": 0.5046308934140361, "learning_rate": 9.992826545910117e-06, "loss": 0.4827, "step": 3856 }, { "epoch": 0.34614435395212134, "grad_norm": 0.4710072061074713, "learning_rate": 9.992798558691355e-06, "loss": 0.3974, "step": 3857 }, { "epoch": 0.34623409840479236, "grad_norm": 0.5042385229439985, "learning_rate": 9.992770517022136e-06, "loss": 0.5248, "step": 3858 }, { "epoch": 0.34632384285746337, "grad_norm": 0.4818738334309885, "learning_rate": 9.992742420902772e-06, "loss": 0.4413, "step": 3859 }, { "epoch": 0.3464135873101344, "grad_norm": 0.5725407050168678, "learning_rate": 9.992714270333565e-06, "loss": 0.5523, "step": 3860 }, { "epoch": 0.3465033317628054, "grad_norm": 0.4902003822827663, "learning_rate": 9.992686065314824e-06, "loss": 0.4553, "step": 3861 }, { "epoch": 0.3465930762154764, "grad_norm": 0.4703954286442129, "learning_rate": 9.992657805846859e-06, "loss": 0.4391, "step": 3862 }, { "epoch": 0.34668282066814743, "grad_norm": 0.5525731858421858, "learning_rate": 9.992629491929972e-06, "loss": 0.6074, "step": 3863 }, { "epoch": 0.34677256512081844, "grad_norm": 0.5131579554889132, "learning_rate": 9.992601123564478e-06, "loss": 0.4938, "step": 3864 }, { "epoch": 0.3468623095734895, "grad_norm": 0.5209562980707249, "learning_rate": 9.99257270075068e-06, "loss": 0.5294, "step": 3865 }, { "epoch": 0.34695205402616053, "grad_norm": 0.553491171648799, "learning_rate": 9.992544223488894e-06, "loss": 0.5562, "step": 3866 }, { "epoch": 0.34704179847883154, "grad_norm": 0.4532373931963853, "learning_rate": 9.992515691779429e-06, "loss": 0.4113, "step": 3867 }, { "epoch": 0.34713154293150256, "grad_norm": 0.4807361772223163, "learning_rate": 9.992487105622594e-06, "loss": 0.4994, "step": 3868 }, { "epoch": 0.34722128738417357, "grad_norm": 0.4370862080992624, "learning_rate": 9.992458465018703e-06, "loss": 0.4257, "step": 3869 }, { "epoch": 0.3473110318368446, "grad_norm": 0.508359655686438, "learning_rate": 9.992429769968066e-06, "loss": 0.5142, "step": 3870 }, { "epoch": 0.3474007762895156, "grad_norm": 0.49698386665596783, "learning_rate": 9.992401020470998e-06, "loss": 0.4914, "step": 3871 }, { "epoch": 0.3474905207421866, "grad_norm": 0.44590844738080343, "learning_rate": 9.992372216527811e-06, "loss": 0.3959, "step": 3872 }, { "epoch": 0.34758026519485763, "grad_norm": 0.5206391318905711, "learning_rate": 9.99234335813882e-06, "loss": 0.5105, "step": 3873 }, { "epoch": 0.34767000964752864, "grad_norm": 0.49547703291725337, "learning_rate": 9.992314445304342e-06, "loss": 0.4475, "step": 3874 }, { "epoch": 0.34775975410019966, "grad_norm": 0.44263206196816685, "learning_rate": 9.992285478024689e-06, "loss": 0.4138, "step": 3875 }, { "epoch": 0.34784949855287073, "grad_norm": 0.43925248528270766, "learning_rate": 9.992256456300178e-06, "loss": 0.4202, "step": 3876 }, { "epoch": 0.34793924300554174, "grad_norm": 0.43394039100116655, "learning_rate": 9.992227380131125e-06, "loss": 0.4171, "step": 3877 }, { "epoch": 0.34802898745821276, "grad_norm": 0.4963762294287133, "learning_rate": 9.992198249517848e-06, "loss": 0.4688, "step": 3878 }, { "epoch": 0.34811873191088377, "grad_norm": 0.4891261479663254, "learning_rate": 9.992169064460663e-06, "loss": 0.4783, "step": 3879 }, { "epoch": 0.3482084763635548, "grad_norm": 0.4956807054904242, "learning_rate": 9.99213982495989e-06, "loss": 0.4674, "step": 3880 }, { "epoch": 0.3482982208162258, "grad_norm": 0.547783498883811, "learning_rate": 9.992110531015849e-06, "loss": 0.5131, "step": 3881 }, { "epoch": 0.3483879652688968, "grad_norm": 0.5132138428889402, "learning_rate": 9.992081182628857e-06, "loss": 0.475, "step": 3882 }, { "epoch": 0.34847770972156783, "grad_norm": 0.511399807588367, "learning_rate": 9.992051779799234e-06, "loss": 0.4807, "step": 3883 }, { "epoch": 0.34856745417423884, "grad_norm": 0.503767543381787, "learning_rate": 9.992022322527304e-06, "loss": 0.4883, "step": 3884 }, { "epoch": 0.34865719862690986, "grad_norm": 0.5010774107533451, "learning_rate": 9.991992810813385e-06, "loss": 0.5064, "step": 3885 }, { "epoch": 0.3487469430795809, "grad_norm": 0.5594723612137644, "learning_rate": 9.991963244657798e-06, "loss": 0.5675, "step": 3886 }, { "epoch": 0.3488366875322519, "grad_norm": 0.5065606634943359, "learning_rate": 9.991933624060869e-06, "loss": 0.541, "step": 3887 }, { "epoch": 0.34892643198492296, "grad_norm": 0.46784505085971745, "learning_rate": 9.991903949022919e-06, "loss": 0.4442, "step": 3888 }, { "epoch": 0.349016176437594, "grad_norm": 0.4909947395709598, "learning_rate": 9.991874219544271e-06, "loss": 0.5142, "step": 3889 }, { "epoch": 0.349105920890265, "grad_norm": 0.5062529623354862, "learning_rate": 9.99184443562525e-06, "loss": 0.5167, "step": 3890 }, { "epoch": 0.349195665342936, "grad_norm": 0.4833638316163257, "learning_rate": 9.991814597266184e-06, "loss": 0.468, "step": 3891 }, { "epoch": 0.349285409795607, "grad_norm": 0.4818435011499395, "learning_rate": 9.991784704467391e-06, "loss": 0.4862, "step": 3892 }, { "epoch": 0.34937515424827803, "grad_norm": 0.5415656317967252, "learning_rate": 9.991754757229204e-06, "loss": 0.5364, "step": 3893 }, { "epoch": 0.34946489870094904, "grad_norm": 0.498551805557039, "learning_rate": 9.991724755551946e-06, "loss": 0.5282, "step": 3894 }, { "epoch": 0.34955464315362006, "grad_norm": 0.4630037258121974, "learning_rate": 9.991694699435945e-06, "loss": 0.4418, "step": 3895 }, { "epoch": 0.3496443876062911, "grad_norm": 0.5028027192910118, "learning_rate": 9.99166458888153e-06, "loss": 0.4846, "step": 3896 }, { "epoch": 0.3497341320589621, "grad_norm": 0.5148046851378169, "learning_rate": 9.991634423889027e-06, "loss": 0.5468, "step": 3897 }, { "epoch": 0.3498238765116331, "grad_norm": 0.5089101791898129, "learning_rate": 9.991604204458766e-06, "loss": 0.5036, "step": 3898 }, { "epoch": 0.3499136209643041, "grad_norm": 0.5387022529601423, "learning_rate": 9.991573930591076e-06, "loss": 0.5068, "step": 3899 }, { "epoch": 0.3500033654169752, "grad_norm": 0.5381694737054814, "learning_rate": 9.99154360228629e-06, "loss": 0.529, "step": 3900 }, { "epoch": 0.3500931098696462, "grad_norm": 0.4745472563003225, "learning_rate": 9.991513219544738e-06, "loss": 0.4178, "step": 3901 }, { "epoch": 0.3501828543223172, "grad_norm": 0.49809681051015664, "learning_rate": 9.991482782366747e-06, "loss": 0.4819, "step": 3902 }, { "epoch": 0.35027259877498823, "grad_norm": 0.49534707538351286, "learning_rate": 9.991452290752653e-06, "loss": 0.4794, "step": 3903 }, { "epoch": 0.35036234322765925, "grad_norm": 0.48964110159596697, "learning_rate": 9.991421744702786e-06, "loss": 0.472, "step": 3904 }, { "epoch": 0.35045208768033026, "grad_norm": 0.49118561380578807, "learning_rate": 9.991391144217484e-06, "loss": 0.4874, "step": 3905 }, { "epoch": 0.3505418321330013, "grad_norm": 0.4995837257640838, "learning_rate": 9.991360489297076e-06, "loss": 0.4512, "step": 3906 }, { "epoch": 0.3506315765856723, "grad_norm": 0.49037177688945105, "learning_rate": 9.991329779941898e-06, "loss": 0.4915, "step": 3907 }, { "epoch": 0.3507213210383433, "grad_norm": 0.5515954460356034, "learning_rate": 9.991299016152284e-06, "loss": 0.5925, "step": 3908 }, { "epoch": 0.3508110654910143, "grad_norm": 0.5767360277663464, "learning_rate": 9.99126819792857e-06, "loss": 0.5397, "step": 3909 }, { "epoch": 0.35090080994368533, "grad_norm": 0.5080644539239306, "learning_rate": 9.991237325271092e-06, "loss": 0.4946, "step": 3910 }, { "epoch": 0.3509905543963564, "grad_norm": 0.46449963147710305, "learning_rate": 9.991206398180188e-06, "loss": 0.4558, "step": 3911 }, { "epoch": 0.3510802988490274, "grad_norm": 0.4721783049718613, "learning_rate": 9.991175416656193e-06, "loss": 0.4537, "step": 3912 }, { "epoch": 0.35117004330169843, "grad_norm": 0.534333581891699, "learning_rate": 9.991144380699448e-06, "loss": 0.5495, "step": 3913 }, { "epoch": 0.35125978775436945, "grad_norm": 0.5116280334184351, "learning_rate": 9.991113290310287e-06, "loss": 0.5064, "step": 3914 }, { "epoch": 0.35134953220704046, "grad_norm": 0.4915446161119993, "learning_rate": 9.991082145489055e-06, "loss": 0.4644, "step": 3915 }, { "epoch": 0.3514392766597115, "grad_norm": 0.5422826005232086, "learning_rate": 9.991050946236086e-06, "loss": 0.5272, "step": 3916 }, { "epoch": 0.3515290211123825, "grad_norm": 0.4452514462192333, "learning_rate": 9.991019692551722e-06, "loss": 0.4181, "step": 3917 }, { "epoch": 0.3516187655650535, "grad_norm": 0.506424837016113, "learning_rate": 9.990988384436305e-06, "loss": 0.5288, "step": 3918 }, { "epoch": 0.3517085100177245, "grad_norm": 0.49679178155580067, "learning_rate": 9.990957021890176e-06, "loss": 0.49, "step": 3919 }, { "epoch": 0.35179825447039553, "grad_norm": 0.6019725665585653, "learning_rate": 9.990925604913678e-06, "loss": 0.5716, "step": 3920 }, { "epoch": 0.35188799892306655, "grad_norm": 0.4820981420772912, "learning_rate": 9.99089413350715e-06, "loss": 0.5152, "step": 3921 }, { "epoch": 0.35197774337573756, "grad_norm": 0.454901537576923, "learning_rate": 9.990862607670939e-06, "loss": 0.4422, "step": 3922 }, { "epoch": 0.35206748782840863, "grad_norm": 0.5081876875363841, "learning_rate": 9.990831027405388e-06, "loss": 0.5067, "step": 3923 }, { "epoch": 0.35215723228107965, "grad_norm": 0.4575823716219339, "learning_rate": 9.99079939271084e-06, "loss": 0.4472, "step": 3924 }, { "epoch": 0.35224697673375066, "grad_norm": 0.48451612639756075, "learning_rate": 9.990767703587642e-06, "loss": 0.4812, "step": 3925 }, { "epoch": 0.3523367211864217, "grad_norm": 0.47854524018328176, "learning_rate": 9.990735960036138e-06, "loss": 0.4994, "step": 3926 }, { "epoch": 0.3524264656390927, "grad_norm": 0.5016180637376003, "learning_rate": 9.990704162056675e-06, "loss": 0.4811, "step": 3927 }, { "epoch": 0.3525162100917637, "grad_norm": 0.5052198691636598, "learning_rate": 9.990672309649598e-06, "loss": 0.5034, "step": 3928 }, { "epoch": 0.3526059545444347, "grad_norm": 0.4984649134057294, "learning_rate": 9.990640402815258e-06, "loss": 0.4561, "step": 3929 }, { "epoch": 0.35269569899710573, "grad_norm": 0.5165025639832797, "learning_rate": 9.990608441553999e-06, "loss": 0.5215, "step": 3930 }, { "epoch": 0.35278544344977675, "grad_norm": 0.5306198981186497, "learning_rate": 9.990576425866172e-06, "loss": 0.6046, "step": 3931 }, { "epoch": 0.35287518790244776, "grad_norm": 0.44017434441558956, "learning_rate": 9.990544355752126e-06, "loss": 0.3991, "step": 3932 }, { "epoch": 0.3529649323551188, "grad_norm": 0.4797129996695122, "learning_rate": 9.99051223121221e-06, "loss": 0.455, "step": 3933 }, { "epoch": 0.3530546768077898, "grad_norm": 0.47989730570105815, "learning_rate": 9.990480052246777e-06, "loss": 0.4622, "step": 3934 }, { "epoch": 0.35314442126046086, "grad_norm": 0.4688514842414579, "learning_rate": 9.990447818856171e-06, "loss": 0.4373, "step": 3935 }, { "epoch": 0.3532341657131319, "grad_norm": 0.4748815828004117, "learning_rate": 9.990415531040751e-06, "loss": 0.4371, "step": 3936 }, { "epoch": 0.3533239101658029, "grad_norm": 0.4967942038922891, "learning_rate": 9.990383188800866e-06, "loss": 0.4917, "step": 3937 }, { "epoch": 0.3534136546184739, "grad_norm": 0.4599008713827594, "learning_rate": 9.990350792136869e-06, "loss": 0.4582, "step": 3938 }, { "epoch": 0.3535033990711449, "grad_norm": 0.4769933999199009, "learning_rate": 9.990318341049113e-06, "loss": 0.4625, "step": 3939 }, { "epoch": 0.35359314352381593, "grad_norm": 0.4849153535494881, "learning_rate": 9.990285835537952e-06, "loss": 0.448, "step": 3940 }, { "epoch": 0.35368288797648695, "grad_norm": 0.47892306197678797, "learning_rate": 9.990253275603741e-06, "loss": 0.4774, "step": 3941 }, { "epoch": 0.35377263242915796, "grad_norm": 0.5089357031319425, "learning_rate": 9.990220661246834e-06, "loss": 0.4813, "step": 3942 }, { "epoch": 0.353862376881829, "grad_norm": 0.5243276765814349, "learning_rate": 9.990187992467589e-06, "loss": 0.5585, "step": 3943 }, { "epoch": 0.3539521213345, "grad_norm": 0.5003543567646823, "learning_rate": 9.99015526926636e-06, "loss": 0.4985, "step": 3944 }, { "epoch": 0.354041865787171, "grad_norm": 0.45579879017718217, "learning_rate": 9.990122491643504e-06, "loss": 0.4554, "step": 3945 }, { "epoch": 0.3541316102398421, "grad_norm": 0.49584953724523906, "learning_rate": 9.990089659599378e-06, "loss": 0.5101, "step": 3946 }, { "epoch": 0.3542213546925131, "grad_norm": 0.5107646097002699, "learning_rate": 9.990056773134343e-06, "loss": 0.5066, "step": 3947 }, { "epoch": 0.3543110991451841, "grad_norm": 0.4862294076729634, "learning_rate": 9.990023832248757e-06, "loss": 0.4779, "step": 3948 }, { "epoch": 0.3544008435978551, "grad_norm": 0.4672276429612079, "learning_rate": 9.989990836942976e-06, "loss": 0.4279, "step": 3949 }, { "epoch": 0.35449058805052613, "grad_norm": 0.4475597892372106, "learning_rate": 9.989957787217361e-06, "loss": 0.4424, "step": 3950 }, { "epoch": 0.35458033250319715, "grad_norm": 0.4664643320121077, "learning_rate": 9.989924683072274e-06, "loss": 0.4596, "step": 3951 }, { "epoch": 0.35467007695586816, "grad_norm": 0.49276089129281014, "learning_rate": 9.989891524508077e-06, "loss": 0.4754, "step": 3952 }, { "epoch": 0.3547598214085392, "grad_norm": 0.5045042799404684, "learning_rate": 9.989858311525129e-06, "loss": 0.4594, "step": 3953 }, { "epoch": 0.3548495658612102, "grad_norm": 0.4834046085660042, "learning_rate": 9.989825044123793e-06, "loss": 0.5059, "step": 3954 }, { "epoch": 0.3549393103138812, "grad_norm": 0.5238600424713298, "learning_rate": 9.989791722304431e-06, "loss": 0.5276, "step": 3955 }, { "epoch": 0.3550290547665522, "grad_norm": 0.5307387479451771, "learning_rate": 9.98975834606741e-06, "loss": 0.504, "step": 3956 }, { "epoch": 0.35511879921922324, "grad_norm": 0.48530463790582073, "learning_rate": 9.98972491541309e-06, "loss": 0.4554, "step": 3957 }, { "epoch": 0.3552085436718943, "grad_norm": 0.49506379333443795, "learning_rate": 9.989691430341837e-06, "loss": 0.4719, "step": 3958 }, { "epoch": 0.3552982881245653, "grad_norm": 0.5429144643809487, "learning_rate": 9.989657890854015e-06, "loss": 0.5017, "step": 3959 }, { "epoch": 0.35538803257723633, "grad_norm": 0.5066012335999517, "learning_rate": 9.98962429694999e-06, "loss": 0.4888, "step": 3960 }, { "epoch": 0.35547777702990735, "grad_norm": 0.4904343488945606, "learning_rate": 9.989590648630132e-06, "loss": 0.4741, "step": 3961 }, { "epoch": 0.35556752148257836, "grad_norm": 0.5183590547327677, "learning_rate": 9.989556945894806e-06, "loss": 0.5156, "step": 3962 }, { "epoch": 0.3556572659352494, "grad_norm": 0.49463943052548554, "learning_rate": 9.989523188744376e-06, "loss": 0.4101, "step": 3963 }, { "epoch": 0.3557470103879204, "grad_norm": 0.5166303453674276, "learning_rate": 9.989489377179214e-06, "loss": 0.4932, "step": 3964 }, { "epoch": 0.3558367548405914, "grad_norm": 0.46509697523230586, "learning_rate": 9.989455511199688e-06, "loss": 0.4376, "step": 3965 }, { "epoch": 0.3559264992932624, "grad_norm": 0.4880267374210556, "learning_rate": 9.989421590806167e-06, "loss": 0.4816, "step": 3966 }, { "epoch": 0.35601624374593344, "grad_norm": 0.49637196278023027, "learning_rate": 9.98938761599902e-06, "loss": 0.5075, "step": 3967 }, { "epoch": 0.35610598819860445, "grad_norm": 0.44551107682128044, "learning_rate": 9.989353586778621e-06, "loss": 0.4139, "step": 3968 }, { "epoch": 0.3561957326512755, "grad_norm": 0.560754525323631, "learning_rate": 9.989319503145336e-06, "loss": 0.5643, "step": 3969 }, { "epoch": 0.35628547710394654, "grad_norm": 0.49259037766784347, "learning_rate": 9.989285365099539e-06, "loss": 0.4833, "step": 3970 }, { "epoch": 0.35637522155661755, "grad_norm": 0.4718470168458281, "learning_rate": 9.989251172641605e-06, "loss": 0.454, "step": 3971 }, { "epoch": 0.35646496600928856, "grad_norm": 0.5096299695892413, "learning_rate": 9.989216925771903e-06, "loss": 0.5328, "step": 3972 }, { "epoch": 0.3565547104619596, "grad_norm": 0.5119948394990019, "learning_rate": 9.989182624490807e-06, "loss": 0.4447, "step": 3973 }, { "epoch": 0.3566444549146306, "grad_norm": 0.4747993395557233, "learning_rate": 9.989148268798694e-06, "loss": 0.4137, "step": 3974 }, { "epoch": 0.3567341993673016, "grad_norm": 0.48491057669168586, "learning_rate": 9.989113858695937e-06, "loss": 0.4395, "step": 3975 }, { "epoch": 0.3568239438199726, "grad_norm": 0.5069438781028489, "learning_rate": 9.98907939418291e-06, "loss": 0.4745, "step": 3976 }, { "epoch": 0.35691368827264364, "grad_norm": 0.5331287595392081, "learning_rate": 9.98904487525999e-06, "loss": 0.5599, "step": 3977 }, { "epoch": 0.35700343272531465, "grad_norm": 0.4801099910143907, "learning_rate": 9.989010301927553e-06, "loss": 0.4803, "step": 3978 }, { "epoch": 0.35709317717798567, "grad_norm": 0.5067141223045792, "learning_rate": 9.988975674185976e-06, "loss": 0.4844, "step": 3979 }, { "epoch": 0.3571829216306567, "grad_norm": 0.4585857101437009, "learning_rate": 9.988940992035638e-06, "loss": 0.415, "step": 3980 }, { "epoch": 0.35727266608332775, "grad_norm": 0.5237892881431928, "learning_rate": 9.988906255476917e-06, "loss": 0.4954, "step": 3981 }, { "epoch": 0.35736241053599876, "grad_norm": 0.4862156939075775, "learning_rate": 9.98887146451019e-06, "loss": 0.5186, "step": 3982 }, { "epoch": 0.3574521549886698, "grad_norm": 0.4705517084906326, "learning_rate": 9.988836619135838e-06, "loss": 0.445, "step": 3983 }, { "epoch": 0.3575418994413408, "grad_norm": 0.5208167525358284, "learning_rate": 9.98880171935424e-06, "loss": 0.4626, "step": 3984 }, { "epoch": 0.3576316438940118, "grad_norm": 0.49352421072704794, "learning_rate": 9.988766765165777e-06, "loss": 0.5299, "step": 3985 }, { "epoch": 0.3577213883466828, "grad_norm": 0.5115452867358133, "learning_rate": 9.988731756570831e-06, "loss": 0.516, "step": 3986 }, { "epoch": 0.35781113279935384, "grad_norm": 0.4667506810810785, "learning_rate": 9.988696693569782e-06, "loss": 0.4614, "step": 3987 }, { "epoch": 0.35790087725202485, "grad_norm": 0.5079861289958066, "learning_rate": 9.988661576163015e-06, "loss": 0.4614, "step": 3988 }, { "epoch": 0.35799062170469587, "grad_norm": 0.491385644300401, "learning_rate": 9.98862640435091e-06, "loss": 0.4335, "step": 3989 }, { "epoch": 0.3580803661573669, "grad_norm": 0.49113111635465867, "learning_rate": 9.988591178133853e-06, "loss": 0.4252, "step": 3990 }, { "epoch": 0.3581701106100379, "grad_norm": 0.4856080932106293, "learning_rate": 9.988555897512228e-06, "loss": 0.4666, "step": 3991 }, { "epoch": 0.3582598550627089, "grad_norm": 0.519717012940696, "learning_rate": 9.988520562486417e-06, "loss": 0.5461, "step": 3992 }, { "epoch": 0.35834959951538, "grad_norm": 0.49093217186071847, "learning_rate": 9.98848517305681e-06, "loss": 0.4172, "step": 3993 }, { "epoch": 0.358439343968051, "grad_norm": 0.5528741926126974, "learning_rate": 9.988449729223788e-06, "loss": 0.5385, "step": 3994 }, { "epoch": 0.358529088420722, "grad_norm": 0.4852793540570042, "learning_rate": 9.988414230987742e-06, "loss": 0.4591, "step": 3995 }, { "epoch": 0.358618832873393, "grad_norm": 0.4592487673906175, "learning_rate": 9.988378678349055e-06, "loss": 0.4791, "step": 3996 }, { "epoch": 0.35870857732606404, "grad_norm": 0.47630998241535794, "learning_rate": 9.988343071308117e-06, "loss": 0.4646, "step": 3997 }, { "epoch": 0.35879832177873505, "grad_norm": 0.4683030961256899, "learning_rate": 9.98830740986532e-06, "loss": 0.4378, "step": 3998 }, { "epoch": 0.35888806623140607, "grad_norm": 0.4687078529393659, "learning_rate": 9.988271694021043e-06, "loss": 0.4192, "step": 3999 }, { "epoch": 0.3589778106840771, "grad_norm": 0.45696899097376353, "learning_rate": 9.988235923775683e-06, "loss": 0.4474, "step": 4000 }, { "epoch": 0.3590675551367481, "grad_norm": 0.5401622941228121, "learning_rate": 9.988200099129632e-06, "loss": 0.5864, "step": 4001 }, { "epoch": 0.3591572995894191, "grad_norm": 0.569512617429905, "learning_rate": 9.988164220083275e-06, "loss": 0.5617, "step": 4002 }, { "epoch": 0.3592470440420901, "grad_norm": 0.5015749888295455, "learning_rate": 9.988128286637007e-06, "loss": 0.4702, "step": 4003 }, { "epoch": 0.3593367884947612, "grad_norm": 0.48416213155635973, "learning_rate": 9.988092298791216e-06, "loss": 0.4356, "step": 4004 }, { "epoch": 0.3594265329474322, "grad_norm": 0.5271123352158682, "learning_rate": 9.988056256546298e-06, "loss": 0.5155, "step": 4005 }, { "epoch": 0.3595162774001032, "grad_norm": 0.5063450055199916, "learning_rate": 9.988020159902646e-06, "loss": 0.4712, "step": 4006 }, { "epoch": 0.35960602185277424, "grad_norm": 0.483920408884566, "learning_rate": 9.987984008860652e-06, "loss": 0.428, "step": 4007 }, { "epoch": 0.35969576630544525, "grad_norm": 0.4948599383950583, "learning_rate": 9.98794780342071e-06, "loss": 0.4792, "step": 4008 }, { "epoch": 0.35978551075811627, "grad_norm": 0.5315125084376898, "learning_rate": 9.987911543583216e-06, "loss": 0.4833, "step": 4009 }, { "epoch": 0.3598752552107873, "grad_norm": 0.46759838776771667, "learning_rate": 9.987875229348566e-06, "loss": 0.4463, "step": 4010 }, { "epoch": 0.3599649996634583, "grad_norm": 0.4846462236090899, "learning_rate": 9.987838860717156e-06, "loss": 0.4699, "step": 4011 }, { "epoch": 0.3600547441161293, "grad_norm": 0.5421059763231907, "learning_rate": 9.98780243768938e-06, "loss": 0.5044, "step": 4012 }, { "epoch": 0.3601444885688003, "grad_norm": 0.45143845964061363, "learning_rate": 9.987765960265639e-06, "loss": 0.4107, "step": 4013 }, { "epoch": 0.36023423302147134, "grad_norm": 0.5132913416821389, "learning_rate": 9.987729428446328e-06, "loss": 0.5078, "step": 4014 }, { "epoch": 0.36032397747414235, "grad_norm": 0.5399327697017171, "learning_rate": 9.987692842231847e-06, "loss": 0.5225, "step": 4015 }, { "epoch": 0.3604137219268134, "grad_norm": 0.5134928777213542, "learning_rate": 9.987656201622594e-06, "loss": 0.5365, "step": 4016 }, { "epoch": 0.36050346637948444, "grad_norm": 0.496311719022839, "learning_rate": 9.987619506618967e-06, "loss": 0.4989, "step": 4017 }, { "epoch": 0.36059321083215545, "grad_norm": 0.48644758321406684, "learning_rate": 9.98758275722137e-06, "loss": 0.4877, "step": 4018 }, { "epoch": 0.36068295528482647, "grad_norm": 0.5279535237888979, "learning_rate": 9.987545953430203e-06, "loss": 0.4964, "step": 4019 }, { "epoch": 0.3607726997374975, "grad_norm": 0.540437947355398, "learning_rate": 9.987509095245865e-06, "loss": 0.5464, "step": 4020 }, { "epoch": 0.3608624441901685, "grad_norm": 0.5515756759860626, "learning_rate": 9.987472182668758e-06, "loss": 0.5304, "step": 4021 }, { "epoch": 0.3609521886428395, "grad_norm": 0.48306500798056273, "learning_rate": 9.987435215699286e-06, "loss": 0.4335, "step": 4022 }, { "epoch": 0.3610419330955105, "grad_norm": 0.4991855931519782, "learning_rate": 9.987398194337854e-06, "loss": 0.4516, "step": 4023 }, { "epoch": 0.36113167754818154, "grad_norm": 0.47233247814761414, "learning_rate": 9.987361118584862e-06, "loss": 0.4986, "step": 4024 }, { "epoch": 0.36122142200085255, "grad_norm": 0.48673802626726087, "learning_rate": 9.987323988440717e-06, "loss": 0.4414, "step": 4025 }, { "epoch": 0.36131116645352357, "grad_norm": 0.5740942618214354, "learning_rate": 9.987286803905823e-06, "loss": 0.5799, "step": 4026 }, { "epoch": 0.3614009109061946, "grad_norm": 0.4353623378548241, "learning_rate": 9.987249564980585e-06, "loss": 0.3827, "step": 4027 }, { "epoch": 0.36149065535886565, "grad_norm": 0.4653666683719344, "learning_rate": 9.98721227166541e-06, "loss": 0.4455, "step": 4028 }, { "epoch": 0.36158039981153667, "grad_norm": 0.5819972632259689, "learning_rate": 9.987174923960704e-06, "loss": 0.551, "step": 4029 }, { "epoch": 0.3616701442642077, "grad_norm": 0.47775550317423665, "learning_rate": 9.987137521866874e-06, "loss": 0.4741, "step": 4030 }, { "epoch": 0.3617598887168787, "grad_norm": 0.5179520279813864, "learning_rate": 9.98710006538433e-06, "loss": 0.5003, "step": 4031 }, { "epoch": 0.3618496331695497, "grad_norm": 0.5148080628975651, "learning_rate": 9.987062554513477e-06, "loss": 0.4956, "step": 4032 }, { "epoch": 0.3619393776222207, "grad_norm": 0.5018412739557645, "learning_rate": 9.98702498925473e-06, "loss": 0.5356, "step": 4033 }, { "epoch": 0.36202912207489174, "grad_norm": 0.4609955891305031, "learning_rate": 9.986987369608492e-06, "loss": 0.437, "step": 4034 }, { "epoch": 0.36211886652756275, "grad_norm": 0.5179239097418493, "learning_rate": 9.986949695575177e-06, "loss": 0.5111, "step": 4035 }, { "epoch": 0.36220861098023377, "grad_norm": 0.4940790191226864, "learning_rate": 9.986911967155193e-06, "loss": 0.4564, "step": 4036 }, { "epoch": 0.3622983554329048, "grad_norm": 0.5366388982359654, "learning_rate": 9.986874184348955e-06, "loss": 0.5556, "step": 4037 }, { "epoch": 0.3623880998855758, "grad_norm": 0.5352970855377772, "learning_rate": 9.986836347156874e-06, "loss": 0.5278, "step": 4038 }, { "epoch": 0.36247784433824687, "grad_norm": 0.48814177842482825, "learning_rate": 9.986798455579361e-06, "loss": 0.4562, "step": 4039 }, { "epoch": 0.3625675887909179, "grad_norm": 0.516033504700227, "learning_rate": 9.98676050961683e-06, "loss": 0.4622, "step": 4040 }, { "epoch": 0.3626573332435889, "grad_norm": 0.4826117532237261, "learning_rate": 9.986722509269697e-06, "loss": 0.4563, "step": 4041 }, { "epoch": 0.3627470776962599, "grad_norm": 0.48394621918467084, "learning_rate": 9.986684454538373e-06, "loss": 0.4824, "step": 4042 }, { "epoch": 0.3628368221489309, "grad_norm": 0.5191640581305503, "learning_rate": 9.986646345423274e-06, "loss": 0.5219, "step": 4043 }, { "epoch": 0.36292656660160194, "grad_norm": 0.5489541178488906, "learning_rate": 9.986608181924817e-06, "loss": 0.5226, "step": 4044 }, { "epoch": 0.36301631105427296, "grad_norm": 0.5042589982052409, "learning_rate": 9.986569964043419e-06, "loss": 0.4711, "step": 4045 }, { "epoch": 0.36310605550694397, "grad_norm": 0.47486795821561173, "learning_rate": 9.986531691779494e-06, "loss": 0.4188, "step": 4046 }, { "epoch": 0.363195799959615, "grad_norm": 0.4831250559242043, "learning_rate": 9.98649336513346e-06, "loss": 0.4722, "step": 4047 }, { "epoch": 0.363285544412286, "grad_norm": 0.5165051830035159, "learning_rate": 9.986454984105737e-06, "loss": 0.5478, "step": 4048 }, { "epoch": 0.363375288864957, "grad_norm": 0.4512737536157001, "learning_rate": 9.98641654869674e-06, "loss": 0.4483, "step": 4049 }, { "epoch": 0.36346503331762803, "grad_norm": 0.4960314637008465, "learning_rate": 9.986378058906892e-06, "loss": 0.5019, "step": 4050 }, { "epoch": 0.3635547777702991, "grad_norm": 0.47859674157265625, "learning_rate": 9.98633951473661e-06, "loss": 0.4461, "step": 4051 }, { "epoch": 0.3636445222229701, "grad_norm": 0.4837919678956562, "learning_rate": 9.986300916186318e-06, "loss": 0.4761, "step": 4052 }, { "epoch": 0.3637342666756411, "grad_norm": 0.47630330543118427, "learning_rate": 9.986262263256433e-06, "loss": 0.4818, "step": 4053 }, { "epoch": 0.36382401112831214, "grad_norm": 0.49650570903650476, "learning_rate": 9.986223555947377e-06, "loss": 0.4784, "step": 4054 }, { "epoch": 0.36391375558098316, "grad_norm": 0.5539743508461027, "learning_rate": 9.986184794259573e-06, "loss": 0.5525, "step": 4055 }, { "epoch": 0.36400350003365417, "grad_norm": 0.48254496706794126, "learning_rate": 9.986145978193445e-06, "loss": 0.4505, "step": 4056 }, { "epoch": 0.3640932444863252, "grad_norm": 0.5070942587325532, "learning_rate": 9.986107107749415e-06, "loss": 0.4922, "step": 4057 }, { "epoch": 0.3641829889389962, "grad_norm": 0.4824328102215217, "learning_rate": 9.986068182927907e-06, "loss": 0.4868, "step": 4058 }, { "epoch": 0.3642727333916672, "grad_norm": 0.49654610315069636, "learning_rate": 9.986029203729344e-06, "loss": 0.5091, "step": 4059 }, { "epoch": 0.36436247784433823, "grad_norm": 0.5008882428126248, "learning_rate": 9.985990170154154e-06, "loss": 0.4701, "step": 4060 }, { "epoch": 0.36445222229700924, "grad_norm": 0.5316872586370989, "learning_rate": 9.985951082202762e-06, "loss": 0.5312, "step": 4061 }, { "epoch": 0.3645419667496803, "grad_norm": 0.49428189896452984, "learning_rate": 9.985911939875592e-06, "loss": 0.5068, "step": 4062 }, { "epoch": 0.3646317112023513, "grad_norm": 0.45042127157739204, "learning_rate": 9.985872743173073e-06, "loss": 0.4019, "step": 4063 }, { "epoch": 0.36472145565502234, "grad_norm": 0.49472032538278193, "learning_rate": 9.985833492095633e-06, "loss": 0.5109, "step": 4064 }, { "epoch": 0.36481120010769336, "grad_norm": 0.4991153951575897, "learning_rate": 9.985794186643699e-06, "loss": 0.5066, "step": 4065 }, { "epoch": 0.36490094456036437, "grad_norm": 0.5076442635647725, "learning_rate": 9.985754826817697e-06, "loss": 0.4937, "step": 4066 }, { "epoch": 0.3649906890130354, "grad_norm": 0.49691000526319223, "learning_rate": 9.985715412618063e-06, "loss": 0.5332, "step": 4067 }, { "epoch": 0.3650804334657064, "grad_norm": 0.5092662504075282, "learning_rate": 9.98567594404522e-06, "loss": 0.4776, "step": 4068 }, { "epoch": 0.3651701779183774, "grad_norm": 0.4998858576897889, "learning_rate": 9.985636421099602e-06, "loss": 0.5081, "step": 4069 }, { "epoch": 0.36525992237104843, "grad_norm": 0.5267370889266072, "learning_rate": 9.98559684378164e-06, "loss": 0.5135, "step": 4070 }, { "epoch": 0.36534966682371944, "grad_norm": 0.5200258818905569, "learning_rate": 9.985557212091763e-06, "loss": 0.5461, "step": 4071 }, { "epoch": 0.36543941127639046, "grad_norm": 0.5568652886988744, "learning_rate": 9.985517526030406e-06, "loss": 0.55, "step": 4072 }, { "epoch": 0.3655291557290615, "grad_norm": 0.5043752705945413, "learning_rate": 9.985477785598002e-06, "loss": 0.5148, "step": 4073 }, { "epoch": 0.36561890018173254, "grad_norm": 0.5004158876001272, "learning_rate": 9.985437990794981e-06, "loss": 0.4716, "step": 4074 }, { "epoch": 0.36570864463440356, "grad_norm": 0.4753837636619831, "learning_rate": 9.985398141621782e-06, "loss": 0.4568, "step": 4075 }, { "epoch": 0.36579838908707457, "grad_norm": 0.46172710550046786, "learning_rate": 9.985358238078835e-06, "loss": 0.4632, "step": 4076 }, { "epoch": 0.3658881335397456, "grad_norm": 0.5079862848751625, "learning_rate": 9.985318280166578e-06, "loss": 0.4981, "step": 4077 }, { "epoch": 0.3659778779924166, "grad_norm": 0.4792497031380296, "learning_rate": 9.985278267885446e-06, "loss": 0.4602, "step": 4078 }, { "epoch": 0.3660676224450876, "grad_norm": 0.4894416274583047, "learning_rate": 9.985238201235875e-06, "loss": 0.4729, "step": 4079 }, { "epoch": 0.36615736689775863, "grad_norm": 0.5653072986798713, "learning_rate": 9.985198080218301e-06, "loss": 0.5438, "step": 4080 }, { "epoch": 0.36624711135042964, "grad_norm": 0.501996848824997, "learning_rate": 9.985157904833162e-06, "loss": 0.4765, "step": 4081 }, { "epoch": 0.36633685580310066, "grad_norm": 0.4866444486736639, "learning_rate": 9.9851176750809e-06, "loss": 0.4219, "step": 4082 }, { "epoch": 0.3664266002557717, "grad_norm": 0.5368713691452212, "learning_rate": 9.985077390961949e-06, "loss": 0.4424, "step": 4083 }, { "epoch": 0.3665163447084427, "grad_norm": 0.47827468281757585, "learning_rate": 9.985037052476749e-06, "loss": 0.4615, "step": 4084 }, { "epoch": 0.3666060891611137, "grad_norm": 0.4840811138467316, "learning_rate": 9.984996659625741e-06, "loss": 0.4705, "step": 4085 }, { "epoch": 0.36669583361378477, "grad_norm": 0.48862486414404777, "learning_rate": 9.984956212409366e-06, "loss": 0.4583, "step": 4086 }, { "epoch": 0.3667855780664558, "grad_norm": 0.4787215898701267, "learning_rate": 9.984915710828067e-06, "loss": 0.5121, "step": 4087 }, { "epoch": 0.3668753225191268, "grad_norm": 0.5221284034449598, "learning_rate": 9.98487515488228e-06, "loss": 0.5281, "step": 4088 }, { "epoch": 0.3669650669717978, "grad_norm": 0.4507857454899456, "learning_rate": 9.98483454457245e-06, "loss": 0.398, "step": 4089 }, { "epoch": 0.36705481142446883, "grad_norm": 0.44029070809359894, "learning_rate": 9.984793879899022e-06, "loss": 0.4131, "step": 4090 }, { "epoch": 0.36714455587713984, "grad_norm": 0.5045241400449832, "learning_rate": 9.984753160862439e-06, "loss": 0.5079, "step": 4091 }, { "epoch": 0.36723430032981086, "grad_norm": 0.46289307245426226, "learning_rate": 9.984712387463142e-06, "loss": 0.4498, "step": 4092 }, { "epoch": 0.3673240447824819, "grad_norm": 0.5085490932460632, "learning_rate": 9.984671559701577e-06, "loss": 0.4831, "step": 4093 }, { "epoch": 0.3674137892351529, "grad_norm": 0.49457017235551587, "learning_rate": 9.984630677578192e-06, "loss": 0.5037, "step": 4094 }, { "epoch": 0.3675035336878239, "grad_norm": 0.533656655847635, "learning_rate": 9.98458974109343e-06, "loss": 0.5405, "step": 4095 }, { "epoch": 0.3675932781404949, "grad_norm": 0.45132069218867826, "learning_rate": 9.984548750247736e-06, "loss": 0.4296, "step": 4096 }, { "epoch": 0.367683022593166, "grad_norm": 0.4669656294141809, "learning_rate": 9.98450770504156e-06, "loss": 0.44, "step": 4097 }, { "epoch": 0.367772767045837, "grad_norm": 0.49097798551839233, "learning_rate": 9.98446660547535e-06, "loss": 0.5003, "step": 4098 }, { "epoch": 0.367862511498508, "grad_norm": 0.5065269895110777, "learning_rate": 9.984425451549553e-06, "loss": 0.4695, "step": 4099 }, { "epoch": 0.36795225595117903, "grad_norm": 0.5171272492755523, "learning_rate": 9.984384243264618e-06, "loss": 0.4643, "step": 4100 }, { "epoch": 0.36804200040385004, "grad_norm": 0.5390930234144942, "learning_rate": 9.984342980620994e-06, "loss": 0.5568, "step": 4101 }, { "epoch": 0.36813174485652106, "grad_norm": 0.47862976019719067, "learning_rate": 9.98430166361913e-06, "loss": 0.4473, "step": 4102 }, { "epoch": 0.3682214893091921, "grad_norm": 0.44520705566604557, "learning_rate": 9.984260292259478e-06, "loss": 0.4348, "step": 4103 }, { "epoch": 0.3683112337618631, "grad_norm": 0.49546021905021426, "learning_rate": 9.98421886654249e-06, "loss": 0.4965, "step": 4104 }, { "epoch": 0.3684009782145341, "grad_norm": 0.4737491870929244, "learning_rate": 9.984177386468617e-06, "loss": 0.4506, "step": 4105 }, { "epoch": 0.3684907226672051, "grad_norm": 0.506035388102288, "learning_rate": 9.984135852038311e-06, "loss": 0.4713, "step": 4106 }, { "epoch": 0.36858046711987613, "grad_norm": 0.5110489075011524, "learning_rate": 9.984094263252024e-06, "loss": 0.5405, "step": 4107 }, { "epoch": 0.36867021157254715, "grad_norm": 0.5048933699364121, "learning_rate": 9.984052620110212e-06, "loss": 0.5249, "step": 4108 }, { "epoch": 0.3687599560252182, "grad_norm": 0.5009138605129113, "learning_rate": 9.984010922613328e-06, "loss": 0.4822, "step": 4109 }, { "epoch": 0.36884970047788923, "grad_norm": 0.48664642985596146, "learning_rate": 9.983969170761827e-06, "loss": 0.4883, "step": 4110 }, { "epoch": 0.36893944493056025, "grad_norm": 0.4661046463616039, "learning_rate": 9.983927364556164e-06, "loss": 0.4602, "step": 4111 }, { "epoch": 0.36902918938323126, "grad_norm": 0.4702454836429819, "learning_rate": 9.983885503996792e-06, "loss": 0.455, "step": 4112 }, { "epoch": 0.3691189338359023, "grad_norm": 0.4828112308692837, "learning_rate": 9.983843589084173e-06, "loss": 0.4309, "step": 4113 }, { "epoch": 0.3692086782885733, "grad_norm": 0.47698040077840537, "learning_rate": 9.98380161981876e-06, "loss": 0.4718, "step": 4114 }, { "epoch": 0.3692984227412443, "grad_norm": 0.48319821017304737, "learning_rate": 9.983759596201015e-06, "loss": 0.4665, "step": 4115 }, { "epoch": 0.3693881671939153, "grad_norm": 0.47942548406409935, "learning_rate": 9.98371751823139e-06, "loss": 0.4693, "step": 4116 }, { "epoch": 0.36947791164658633, "grad_norm": 0.5623908018116021, "learning_rate": 9.98367538591035e-06, "loss": 0.5177, "step": 4117 }, { "epoch": 0.36956765609925735, "grad_norm": 0.49784621620022806, "learning_rate": 9.983633199238352e-06, "loss": 0.5123, "step": 4118 }, { "epoch": 0.36965740055192836, "grad_norm": 0.5068917849123084, "learning_rate": 9.983590958215854e-06, "loss": 0.5109, "step": 4119 }, { "epoch": 0.3697471450045994, "grad_norm": 0.4588613576977061, "learning_rate": 9.98354866284332e-06, "loss": 0.4455, "step": 4120 }, { "epoch": 0.36983688945727045, "grad_norm": 0.4866087549045301, "learning_rate": 9.98350631312121e-06, "loss": 0.4637, "step": 4121 }, { "epoch": 0.36992663390994146, "grad_norm": 0.493876755507559, "learning_rate": 9.983463909049985e-06, "loss": 0.4804, "step": 4122 }, { "epoch": 0.3700163783626125, "grad_norm": 0.46812248739747475, "learning_rate": 9.98342145063011e-06, "loss": 0.4497, "step": 4123 }, { "epoch": 0.3701061228152835, "grad_norm": 0.4902857822363799, "learning_rate": 9.983378937862043e-06, "loss": 0.4664, "step": 4124 }, { "epoch": 0.3701958672679545, "grad_norm": 0.4844943636551334, "learning_rate": 9.983336370746253e-06, "loss": 0.4789, "step": 4125 }, { "epoch": 0.3702856117206255, "grad_norm": 0.4846588485125752, "learning_rate": 9.983293749283203e-06, "loss": 0.5416, "step": 4126 }, { "epoch": 0.37037535617329653, "grad_norm": 0.4922771204305583, "learning_rate": 9.983251073473355e-06, "loss": 0.4858, "step": 4127 }, { "epoch": 0.37046510062596755, "grad_norm": 0.4940874120532849, "learning_rate": 9.983208343317176e-06, "loss": 0.4597, "step": 4128 }, { "epoch": 0.37055484507863856, "grad_norm": 0.46947940031658125, "learning_rate": 9.983165558815136e-06, "loss": 0.4689, "step": 4129 }, { "epoch": 0.3706445895313096, "grad_norm": 0.5003972553269926, "learning_rate": 9.983122719967695e-06, "loss": 0.4746, "step": 4130 }, { "epoch": 0.3707343339839806, "grad_norm": 0.4989758138376457, "learning_rate": 9.983079826775324e-06, "loss": 0.4915, "step": 4131 }, { "epoch": 0.37082407843665166, "grad_norm": 0.5181343904494194, "learning_rate": 9.98303687923849e-06, "loss": 0.5329, "step": 4132 }, { "epoch": 0.3709138228893227, "grad_norm": 0.49134486931061083, "learning_rate": 9.98299387735766e-06, "loss": 0.5025, "step": 4133 }, { "epoch": 0.3710035673419937, "grad_norm": 0.5047161846665104, "learning_rate": 9.982950821133306e-06, "loss": 0.5029, "step": 4134 }, { "epoch": 0.3710933117946647, "grad_norm": 0.5072692409199597, "learning_rate": 9.982907710565895e-06, "loss": 0.5037, "step": 4135 }, { "epoch": 0.3711830562473357, "grad_norm": 0.5260802195961058, "learning_rate": 9.982864545655898e-06, "loss": 0.5884, "step": 4136 }, { "epoch": 0.37127280070000673, "grad_norm": 0.5478716184748451, "learning_rate": 9.982821326403787e-06, "loss": 0.586, "step": 4137 }, { "epoch": 0.37136254515267775, "grad_norm": 0.5410933390828763, "learning_rate": 9.982778052810031e-06, "loss": 0.5701, "step": 4138 }, { "epoch": 0.37145228960534876, "grad_norm": 0.45096352177061577, "learning_rate": 9.982734724875102e-06, "loss": 0.4085, "step": 4139 }, { "epoch": 0.3715420340580198, "grad_norm": 0.49890737147382946, "learning_rate": 9.982691342599476e-06, "loss": 0.4843, "step": 4140 }, { "epoch": 0.3716317785106908, "grad_norm": 0.5058761272375882, "learning_rate": 9.982647905983622e-06, "loss": 0.5104, "step": 4141 }, { "epoch": 0.3717215229633618, "grad_norm": 0.5041641667267704, "learning_rate": 9.982604415028015e-06, "loss": 0.5009, "step": 4142 }, { "epoch": 0.3718112674160328, "grad_norm": 0.48689032406801414, "learning_rate": 9.982560869733131e-06, "loss": 0.4465, "step": 4143 }, { "epoch": 0.3719010118687039, "grad_norm": 0.5004012144862638, "learning_rate": 9.982517270099443e-06, "loss": 0.4809, "step": 4144 }, { "epoch": 0.3719907563213749, "grad_norm": 0.5091064715784918, "learning_rate": 9.982473616127428e-06, "loss": 0.4756, "step": 4145 }, { "epoch": 0.3720805007740459, "grad_norm": 0.4763091321853366, "learning_rate": 9.98242990781756e-06, "loss": 0.47, "step": 4146 }, { "epoch": 0.37217024522671693, "grad_norm": 0.4657294113281313, "learning_rate": 9.982386145170317e-06, "loss": 0.4333, "step": 4147 }, { "epoch": 0.37225998967938795, "grad_norm": 0.502673097822204, "learning_rate": 9.982342328186176e-06, "loss": 0.4939, "step": 4148 }, { "epoch": 0.37234973413205896, "grad_norm": 0.4856564224131156, "learning_rate": 9.982298456865615e-06, "loss": 0.4833, "step": 4149 }, { "epoch": 0.37243947858473, "grad_norm": 0.4937726725276724, "learning_rate": 9.982254531209113e-06, "loss": 0.4967, "step": 4150 }, { "epoch": 0.372529223037401, "grad_norm": 0.5745343114920156, "learning_rate": 9.98221055121715e-06, "loss": 0.5435, "step": 4151 }, { "epoch": 0.372618967490072, "grad_norm": 0.538512657800478, "learning_rate": 9.982166516890202e-06, "loss": 0.5299, "step": 4152 }, { "epoch": 0.372708711942743, "grad_norm": 0.48053125256416, "learning_rate": 9.98212242822875e-06, "loss": 0.4176, "step": 4153 }, { "epoch": 0.37279845639541404, "grad_norm": 0.48791996419559513, "learning_rate": 9.982078285233278e-06, "loss": 0.4942, "step": 4154 }, { "epoch": 0.3728882008480851, "grad_norm": 0.4996919868290125, "learning_rate": 9.982034087904266e-06, "loss": 0.4878, "step": 4155 }, { "epoch": 0.3729779453007561, "grad_norm": 0.505214973665753, "learning_rate": 9.981989836242193e-06, "loss": 0.5279, "step": 4156 }, { "epoch": 0.37306768975342713, "grad_norm": 0.46591077152266497, "learning_rate": 9.981945530247548e-06, "loss": 0.4412, "step": 4157 }, { "epoch": 0.37315743420609815, "grad_norm": 0.5606895945965561, "learning_rate": 9.981901169920809e-06, "loss": 0.523, "step": 4158 }, { "epoch": 0.37324717865876916, "grad_norm": 0.5015614432796389, "learning_rate": 9.98185675526246e-06, "loss": 0.4878, "step": 4159 }, { "epoch": 0.3733369231114402, "grad_norm": 0.4700383483529803, "learning_rate": 9.981812286272987e-06, "loss": 0.4465, "step": 4160 }, { "epoch": 0.3734266675641112, "grad_norm": 0.5338252602164049, "learning_rate": 9.981767762952874e-06, "loss": 0.5132, "step": 4161 }, { "epoch": 0.3735164120167822, "grad_norm": 0.4583021716431124, "learning_rate": 9.981723185302609e-06, "loss": 0.4749, "step": 4162 }, { "epoch": 0.3736061564694532, "grad_norm": 0.5133779807618387, "learning_rate": 9.981678553322675e-06, "loss": 0.5419, "step": 4163 }, { "epoch": 0.37369590092212424, "grad_norm": 0.4936602017692631, "learning_rate": 9.98163386701356e-06, "loss": 0.4937, "step": 4164 }, { "epoch": 0.37378564537479525, "grad_norm": 0.5736370702753573, "learning_rate": 9.98158912637575e-06, "loss": 0.5373, "step": 4165 }, { "epoch": 0.37387538982746626, "grad_norm": 0.5091292847396091, "learning_rate": 9.981544331409736e-06, "loss": 0.4979, "step": 4166 }, { "epoch": 0.37396513428013733, "grad_norm": 0.5297730892013518, "learning_rate": 9.981499482116003e-06, "loss": 0.5017, "step": 4167 }, { "epoch": 0.37405487873280835, "grad_norm": 0.5227911392721398, "learning_rate": 9.981454578495044e-06, "loss": 0.5251, "step": 4168 }, { "epoch": 0.37414462318547936, "grad_norm": 0.5233202910521144, "learning_rate": 9.981409620547346e-06, "loss": 0.518, "step": 4169 }, { "epoch": 0.3742343676381504, "grad_norm": 0.4953598457713787, "learning_rate": 9.981364608273398e-06, "loss": 0.4721, "step": 4170 }, { "epoch": 0.3743241120908214, "grad_norm": 0.5294527427675851, "learning_rate": 9.981319541673695e-06, "loss": 0.5627, "step": 4171 }, { "epoch": 0.3744138565434924, "grad_norm": 0.4975522631465522, "learning_rate": 9.981274420748725e-06, "loss": 0.4899, "step": 4172 }, { "epoch": 0.3745036009961634, "grad_norm": 0.4817683944364395, "learning_rate": 9.981229245498982e-06, "loss": 0.4501, "step": 4173 }, { "epoch": 0.37459334544883444, "grad_norm": 0.5398504676640683, "learning_rate": 9.981184015924958e-06, "loss": 0.4903, "step": 4174 }, { "epoch": 0.37468308990150545, "grad_norm": 0.514563092294318, "learning_rate": 9.981138732027146e-06, "loss": 0.4818, "step": 4175 }, { "epoch": 0.37477283435417647, "grad_norm": 0.4958901011605586, "learning_rate": 9.98109339380604e-06, "loss": 0.4432, "step": 4176 }, { "epoch": 0.3748625788068475, "grad_norm": 0.5455578770991898, "learning_rate": 9.981048001262136e-06, "loss": 0.4918, "step": 4177 }, { "epoch": 0.3749523232595185, "grad_norm": 0.4958419760078547, "learning_rate": 9.981002554395927e-06, "loss": 0.49, "step": 4178 }, { "epoch": 0.37504206771218956, "grad_norm": 0.49842162631701104, "learning_rate": 9.98095705320791e-06, "loss": 0.5111, "step": 4179 }, { "epoch": 0.3751318121648606, "grad_norm": 0.46053647083829174, "learning_rate": 9.980911497698577e-06, "loss": 0.4398, "step": 4180 }, { "epoch": 0.3752215566175316, "grad_norm": 0.612103137311709, "learning_rate": 9.980865887868432e-06, "loss": 0.6007, "step": 4181 }, { "epoch": 0.3753113010702026, "grad_norm": 0.5205893036503376, "learning_rate": 9.980820223717967e-06, "loss": 0.4809, "step": 4182 }, { "epoch": 0.3754010455228736, "grad_norm": 0.553395294234275, "learning_rate": 9.980774505247682e-06, "loss": 0.5497, "step": 4183 }, { "epoch": 0.37549078997554464, "grad_norm": 0.47781799975298583, "learning_rate": 9.980728732458076e-06, "loss": 0.416, "step": 4184 }, { "epoch": 0.37558053442821565, "grad_norm": 0.4819695539137767, "learning_rate": 9.980682905349646e-06, "loss": 0.4109, "step": 4185 }, { "epoch": 0.37567027888088667, "grad_norm": 0.5031492478235721, "learning_rate": 9.980637023922894e-06, "loss": 0.5079, "step": 4186 }, { "epoch": 0.3757600233335577, "grad_norm": 0.4832378103560468, "learning_rate": 9.980591088178321e-06, "loss": 0.467, "step": 4187 }, { "epoch": 0.3758497677862287, "grad_norm": 0.48752082612344555, "learning_rate": 9.980545098116426e-06, "loss": 0.4949, "step": 4188 }, { "epoch": 0.3759395122388997, "grad_norm": 0.5017640982427015, "learning_rate": 9.980499053737711e-06, "loss": 0.4986, "step": 4189 }, { "epoch": 0.3760292566915708, "grad_norm": 0.497372055511962, "learning_rate": 9.980452955042678e-06, "loss": 0.5077, "step": 4190 }, { "epoch": 0.3761190011442418, "grad_norm": 0.47882608591212544, "learning_rate": 9.98040680203183e-06, "loss": 0.4907, "step": 4191 }, { "epoch": 0.3762087455969128, "grad_norm": 0.45675873044169263, "learning_rate": 9.98036059470567e-06, "loss": 0.4416, "step": 4192 }, { "epoch": 0.3762984900495838, "grad_norm": 0.5194399564975581, "learning_rate": 9.980314333064703e-06, "loss": 0.5219, "step": 4193 }, { "epoch": 0.37638823450225484, "grad_norm": 0.5770502296978264, "learning_rate": 9.980268017109433e-06, "loss": 0.5459, "step": 4194 }, { "epoch": 0.37647797895492585, "grad_norm": 0.5403562736058501, "learning_rate": 9.980221646840365e-06, "loss": 0.5861, "step": 4195 }, { "epoch": 0.37656772340759687, "grad_norm": 0.47423036071110986, "learning_rate": 9.980175222258004e-06, "loss": 0.4658, "step": 4196 }, { "epoch": 0.3766574678602679, "grad_norm": 0.5287709375710344, "learning_rate": 9.980128743362858e-06, "loss": 0.5742, "step": 4197 }, { "epoch": 0.3767472123129389, "grad_norm": 0.529754127402442, "learning_rate": 9.980082210155432e-06, "loss": 0.5359, "step": 4198 }, { "epoch": 0.3768369567656099, "grad_norm": 0.4586723431351852, "learning_rate": 9.980035622636235e-06, "loss": 0.4158, "step": 4199 }, { "epoch": 0.3769267012182809, "grad_norm": 0.47086134912829314, "learning_rate": 9.979988980805775e-06, "loss": 0.4464, "step": 4200 }, { "epoch": 0.37701644567095194, "grad_norm": 0.5214244517954185, "learning_rate": 9.979942284664559e-06, "loss": 0.5084, "step": 4201 }, { "epoch": 0.377106190123623, "grad_norm": 0.45016762084373163, "learning_rate": 9.979895534213097e-06, "loss": 0.4467, "step": 4202 }, { "epoch": 0.377195934576294, "grad_norm": 0.46484503627894913, "learning_rate": 9.9798487294519e-06, "loss": 0.4476, "step": 4203 }, { "epoch": 0.37728567902896504, "grad_norm": 0.48285655366168784, "learning_rate": 9.979801870381478e-06, "loss": 0.5133, "step": 4204 }, { "epoch": 0.37737542348163605, "grad_norm": 0.45760840250325946, "learning_rate": 9.979754957002342e-06, "loss": 0.4662, "step": 4205 }, { "epoch": 0.37746516793430707, "grad_norm": 0.5529242765465985, "learning_rate": 9.979707989315002e-06, "loss": 0.5692, "step": 4206 }, { "epoch": 0.3775549123869781, "grad_norm": 0.5009045476285291, "learning_rate": 9.97966096731997e-06, "loss": 0.4468, "step": 4207 }, { "epoch": 0.3776446568396491, "grad_norm": 0.5131738918789914, "learning_rate": 9.979613891017763e-06, "loss": 0.491, "step": 4208 }, { "epoch": 0.3777344012923201, "grad_norm": 0.4829460088276109, "learning_rate": 9.979566760408892e-06, "loss": 0.4527, "step": 4209 }, { "epoch": 0.3778241457449911, "grad_norm": 0.5163088436999926, "learning_rate": 9.97951957549387e-06, "loss": 0.5262, "step": 4210 }, { "epoch": 0.37791389019766214, "grad_norm": 0.5349792059614138, "learning_rate": 9.979472336273213e-06, "loss": 0.5193, "step": 4211 }, { "epoch": 0.37800363465033315, "grad_norm": 0.49383205208487485, "learning_rate": 9.979425042747433e-06, "loss": 0.4771, "step": 4212 }, { "epoch": 0.37809337910300417, "grad_norm": 0.4581521983817865, "learning_rate": 9.97937769491705e-06, "loss": 0.4439, "step": 4213 }, { "epoch": 0.37818312355567524, "grad_norm": 0.4989451653918908, "learning_rate": 9.97933029278258e-06, "loss": 0.534, "step": 4214 }, { "epoch": 0.37827286800834625, "grad_norm": 0.4983596204091628, "learning_rate": 9.979282836344537e-06, "loss": 0.5018, "step": 4215 }, { "epoch": 0.37836261246101727, "grad_norm": 0.44829212482360414, "learning_rate": 9.979235325603441e-06, "loss": 0.4262, "step": 4216 }, { "epoch": 0.3784523569136883, "grad_norm": 0.49866939505765595, "learning_rate": 9.979187760559808e-06, "loss": 0.4728, "step": 4217 }, { "epoch": 0.3785421013663593, "grad_norm": 0.5062591758594196, "learning_rate": 9.979140141214158e-06, "loss": 0.4239, "step": 4218 }, { "epoch": 0.3786318458190303, "grad_norm": 0.4823385323375618, "learning_rate": 9.979092467567012e-06, "loss": 0.5115, "step": 4219 }, { "epoch": 0.3787215902717013, "grad_norm": 0.5016377927757593, "learning_rate": 9.979044739618887e-06, "loss": 0.5153, "step": 4220 }, { "epoch": 0.37881133472437234, "grad_norm": 0.468434920018293, "learning_rate": 9.978996957370306e-06, "loss": 0.4313, "step": 4221 }, { "epoch": 0.37890107917704335, "grad_norm": 0.5111286198276153, "learning_rate": 9.978949120821789e-06, "loss": 0.5018, "step": 4222 }, { "epoch": 0.37899082362971437, "grad_norm": 0.49678125837261544, "learning_rate": 9.978901229973856e-06, "loss": 0.4925, "step": 4223 }, { "epoch": 0.3790805680823854, "grad_norm": 0.5476911569267265, "learning_rate": 9.978853284827031e-06, "loss": 0.5368, "step": 4224 }, { "epoch": 0.37917031253505645, "grad_norm": 0.4953375339381644, "learning_rate": 9.978805285381838e-06, "loss": 0.4987, "step": 4225 }, { "epoch": 0.37926005698772747, "grad_norm": 0.4979028032076941, "learning_rate": 9.978757231638799e-06, "loss": 0.4435, "step": 4226 }, { "epoch": 0.3793498014403985, "grad_norm": 0.4943593699776616, "learning_rate": 9.978709123598437e-06, "loss": 0.4898, "step": 4227 }, { "epoch": 0.3794395458930695, "grad_norm": 0.5752169562120681, "learning_rate": 9.97866096126128e-06, "loss": 0.6195, "step": 4228 }, { "epoch": 0.3795292903457405, "grad_norm": 0.4868027449894195, "learning_rate": 9.978612744627849e-06, "loss": 0.4996, "step": 4229 }, { "epoch": 0.3796190347984115, "grad_norm": 0.5161822799973257, "learning_rate": 9.978564473698674e-06, "loss": 0.5329, "step": 4230 }, { "epoch": 0.37970877925108254, "grad_norm": 0.47421611909851574, "learning_rate": 9.978516148474279e-06, "loss": 0.4595, "step": 4231 }, { "epoch": 0.37979852370375355, "grad_norm": 0.5315791898998945, "learning_rate": 9.97846776895519e-06, "loss": 0.5705, "step": 4232 }, { "epoch": 0.37988826815642457, "grad_norm": 0.4949615302071906, "learning_rate": 9.978419335141938e-06, "loss": 0.484, "step": 4233 }, { "epoch": 0.3799780126090956, "grad_norm": 0.5224338524890689, "learning_rate": 9.978370847035048e-06, "loss": 0.4925, "step": 4234 }, { "epoch": 0.3800677570617666, "grad_norm": 0.4907151215831741, "learning_rate": 9.978322304635051e-06, "loss": 0.4624, "step": 4235 }, { "epoch": 0.3801575015144376, "grad_norm": 0.4392671940969334, "learning_rate": 9.978273707942477e-06, "loss": 0.4381, "step": 4236 }, { "epoch": 0.3802472459671087, "grad_norm": 0.530829820128318, "learning_rate": 9.978225056957852e-06, "loss": 0.5448, "step": 4237 }, { "epoch": 0.3803369904197797, "grad_norm": 0.5316033156659418, "learning_rate": 9.97817635168171e-06, "loss": 0.5077, "step": 4238 }, { "epoch": 0.3804267348724507, "grad_norm": 0.48544380386431885, "learning_rate": 9.978127592114581e-06, "loss": 0.4462, "step": 4239 }, { "epoch": 0.3805164793251217, "grad_norm": 0.4684775069540284, "learning_rate": 9.978078778256998e-06, "loss": 0.4367, "step": 4240 }, { "epoch": 0.38060622377779274, "grad_norm": 0.4668065162885806, "learning_rate": 9.978029910109491e-06, "loss": 0.4438, "step": 4241 }, { "epoch": 0.38069596823046375, "grad_norm": 0.5059863843419646, "learning_rate": 9.977980987672596e-06, "loss": 0.4553, "step": 4242 }, { "epoch": 0.38078571268313477, "grad_norm": 0.4936198411493507, "learning_rate": 9.977932010946843e-06, "loss": 0.4824, "step": 4243 }, { "epoch": 0.3808754571358058, "grad_norm": 0.4878290034660685, "learning_rate": 9.97788297993277e-06, "loss": 0.4626, "step": 4244 }, { "epoch": 0.3809652015884768, "grad_norm": 0.5566324418310374, "learning_rate": 9.977833894630908e-06, "loss": 0.5758, "step": 4245 }, { "epoch": 0.3810549460411478, "grad_norm": 0.46391946656621824, "learning_rate": 9.977784755041794e-06, "loss": 0.4561, "step": 4246 }, { "epoch": 0.3811446904938188, "grad_norm": 0.5615916578443008, "learning_rate": 9.977735561165965e-06, "loss": 0.5301, "step": 4247 }, { "epoch": 0.3812344349464899, "grad_norm": 0.5030842374981964, "learning_rate": 9.977686313003956e-06, "loss": 0.5269, "step": 4248 }, { "epoch": 0.3813241793991609, "grad_norm": 0.4632600208767467, "learning_rate": 9.977637010556304e-06, "loss": 0.4646, "step": 4249 }, { "epoch": 0.3814139238518319, "grad_norm": 0.5158891792967095, "learning_rate": 9.977587653823548e-06, "loss": 0.4842, "step": 4250 }, { "epoch": 0.38150366830450294, "grad_norm": 0.49718960404099694, "learning_rate": 9.977538242806226e-06, "loss": 0.4696, "step": 4251 }, { "epoch": 0.38159341275717396, "grad_norm": 0.4837152521237393, "learning_rate": 9.977488777504874e-06, "loss": 0.4302, "step": 4252 }, { "epoch": 0.38168315720984497, "grad_norm": 0.500624556402506, "learning_rate": 9.977439257920037e-06, "loss": 0.5012, "step": 4253 }, { "epoch": 0.381772901662516, "grad_norm": 0.4902525336921981, "learning_rate": 9.97738968405225e-06, "loss": 0.4926, "step": 4254 }, { "epoch": 0.381862646115187, "grad_norm": 0.4719552420839815, "learning_rate": 9.977340055902055e-06, "loss": 0.4941, "step": 4255 }, { "epoch": 0.381952390567858, "grad_norm": 0.4523586910867684, "learning_rate": 9.977290373469994e-06, "loss": 0.4032, "step": 4256 }, { "epoch": 0.38204213502052903, "grad_norm": 0.49756361113040887, "learning_rate": 9.97724063675661e-06, "loss": 0.459, "step": 4257 }, { "epoch": 0.38213187947320004, "grad_norm": 0.5396003396990673, "learning_rate": 9.977190845762443e-06, "loss": 0.5414, "step": 4258 }, { "epoch": 0.38222162392587106, "grad_norm": 0.5223215945336455, "learning_rate": 9.977141000488037e-06, "loss": 0.543, "step": 4259 }, { "epoch": 0.3823113683785421, "grad_norm": 0.4739467011754422, "learning_rate": 9.977091100933935e-06, "loss": 0.4837, "step": 4260 }, { "epoch": 0.38240111283121314, "grad_norm": 0.525740730332886, "learning_rate": 9.977041147100683e-06, "loss": 0.4853, "step": 4261 }, { "epoch": 0.38249085728388416, "grad_norm": 0.5001437096242357, "learning_rate": 9.976991138988823e-06, "loss": 0.516, "step": 4262 }, { "epoch": 0.38258060173655517, "grad_norm": 0.5122225365889811, "learning_rate": 9.976941076598904e-06, "loss": 0.5, "step": 4263 }, { "epoch": 0.3826703461892262, "grad_norm": 0.4751604233683343, "learning_rate": 9.976890959931469e-06, "loss": 0.4721, "step": 4264 }, { "epoch": 0.3827600906418972, "grad_norm": 0.5226080388848918, "learning_rate": 9.976840788987065e-06, "loss": 0.5263, "step": 4265 }, { "epoch": 0.3828498350945682, "grad_norm": 0.5187214586248733, "learning_rate": 9.97679056376624e-06, "loss": 0.5295, "step": 4266 }, { "epoch": 0.38293957954723923, "grad_norm": 0.5110720655381333, "learning_rate": 9.976740284269541e-06, "loss": 0.4914, "step": 4267 }, { "epoch": 0.38302932399991024, "grad_norm": 0.45974838811507807, "learning_rate": 9.976689950497517e-06, "loss": 0.4111, "step": 4268 }, { "epoch": 0.38311906845258126, "grad_norm": 0.4670112231570637, "learning_rate": 9.976639562450716e-06, "loss": 0.4484, "step": 4269 }, { "epoch": 0.38320881290525227, "grad_norm": 0.5180543144881352, "learning_rate": 9.976589120129689e-06, "loss": 0.5015, "step": 4270 }, { "epoch": 0.3832985573579233, "grad_norm": 0.4727119839849213, "learning_rate": 9.976538623534985e-06, "loss": 0.4256, "step": 4271 }, { "epoch": 0.38338830181059436, "grad_norm": 0.5390789462461778, "learning_rate": 9.976488072667157e-06, "loss": 0.5193, "step": 4272 }, { "epoch": 0.38347804626326537, "grad_norm": 0.5195466615342567, "learning_rate": 9.976437467526752e-06, "loss": 0.5653, "step": 4273 }, { "epoch": 0.3835677907159364, "grad_norm": 0.5211712285565786, "learning_rate": 9.976386808114324e-06, "loss": 0.4917, "step": 4274 }, { "epoch": 0.3836575351686074, "grad_norm": 0.5103984017591567, "learning_rate": 9.976336094430428e-06, "loss": 0.5348, "step": 4275 }, { "epoch": 0.3837472796212784, "grad_norm": 0.5288051798003288, "learning_rate": 9.976285326475612e-06, "loss": 0.518, "step": 4276 }, { "epoch": 0.38383702407394943, "grad_norm": 0.5068294697018925, "learning_rate": 9.976234504250434e-06, "loss": 0.5218, "step": 4277 }, { "epoch": 0.38392676852662044, "grad_norm": 0.526899357349526, "learning_rate": 9.976183627755446e-06, "loss": 0.4814, "step": 4278 }, { "epoch": 0.38401651297929146, "grad_norm": 0.49604044222676136, "learning_rate": 9.976132696991203e-06, "loss": 0.4712, "step": 4279 }, { "epoch": 0.3841062574319625, "grad_norm": 0.4952707919939127, "learning_rate": 9.976081711958264e-06, "loss": 0.5209, "step": 4280 }, { "epoch": 0.3841960018846335, "grad_norm": 0.5290776554631982, "learning_rate": 9.976030672657177e-06, "loss": 0.5028, "step": 4281 }, { "epoch": 0.3842857463373045, "grad_norm": 0.49202503200549613, "learning_rate": 9.975979579088508e-06, "loss": 0.4885, "step": 4282 }, { "epoch": 0.38437549078997557, "grad_norm": 0.5511578247199909, "learning_rate": 9.975928431252807e-06, "loss": 0.5365, "step": 4283 }, { "epoch": 0.3844652352426466, "grad_norm": 0.49296456094875174, "learning_rate": 9.975877229150636e-06, "loss": 0.4566, "step": 4284 }, { "epoch": 0.3845549796953176, "grad_norm": 0.45050217816655364, "learning_rate": 9.975825972782551e-06, "loss": 0.4401, "step": 4285 }, { "epoch": 0.3846447241479886, "grad_norm": 0.49576146270164334, "learning_rate": 9.975774662149113e-06, "loss": 0.4879, "step": 4286 }, { "epoch": 0.38473446860065963, "grad_norm": 0.4794712249616058, "learning_rate": 9.975723297250881e-06, "loss": 0.476, "step": 4287 }, { "epoch": 0.38482421305333064, "grad_norm": 0.43893799162545266, "learning_rate": 9.975671878088415e-06, "loss": 0.4304, "step": 4288 }, { "epoch": 0.38491395750600166, "grad_norm": 0.47965144462180886, "learning_rate": 9.975620404662274e-06, "loss": 0.4398, "step": 4289 }, { "epoch": 0.3850037019586727, "grad_norm": 0.4738192452693945, "learning_rate": 9.975568876973021e-06, "loss": 0.4155, "step": 4290 }, { "epoch": 0.3850934464113437, "grad_norm": 0.5044102955950098, "learning_rate": 9.97551729502122e-06, "loss": 0.5099, "step": 4291 }, { "epoch": 0.3851831908640147, "grad_norm": 0.4486637931383791, "learning_rate": 9.975465658807428e-06, "loss": 0.4012, "step": 4292 }, { "epoch": 0.3852729353166857, "grad_norm": 0.5338063797374609, "learning_rate": 9.975413968332214e-06, "loss": 0.5035, "step": 4293 }, { "epoch": 0.38536267976935673, "grad_norm": 0.49331624652655737, "learning_rate": 9.97536222359614e-06, "loss": 0.4888, "step": 4294 }, { "epoch": 0.3854524242220278, "grad_norm": 0.5465140363480547, "learning_rate": 9.975310424599766e-06, "loss": 0.479, "step": 4295 }, { "epoch": 0.3855421686746988, "grad_norm": 0.486579919030391, "learning_rate": 9.975258571343665e-06, "loss": 0.4628, "step": 4296 }, { "epoch": 0.38563191312736983, "grad_norm": 0.560169351386099, "learning_rate": 9.975206663828395e-06, "loss": 0.5466, "step": 4297 }, { "epoch": 0.38572165758004084, "grad_norm": 0.4547286036958555, "learning_rate": 9.975154702054525e-06, "loss": 0.3974, "step": 4298 }, { "epoch": 0.38581140203271186, "grad_norm": 0.49931480351066626, "learning_rate": 9.975102686022624e-06, "loss": 0.4807, "step": 4299 }, { "epoch": 0.3859011464853829, "grad_norm": 0.48502780171095683, "learning_rate": 9.975050615733256e-06, "loss": 0.4928, "step": 4300 }, { "epoch": 0.3859908909380539, "grad_norm": 0.5039418820965118, "learning_rate": 9.974998491186988e-06, "loss": 0.5378, "step": 4301 }, { "epoch": 0.3860806353907249, "grad_norm": 0.5089271265185297, "learning_rate": 9.974946312384393e-06, "loss": 0.5292, "step": 4302 }, { "epoch": 0.3861703798433959, "grad_norm": 0.5524663315395779, "learning_rate": 9.974894079326035e-06, "loss": 0.575, "step": 4303 }, { "epoch": 0.38626012429606693, "grad_norm": 0.48248106619831477, "learning_rate": 9.97484179201249e-06, "loss": 0.4794, "step": 4304 }, { "epoch": 0.38634986874873795, "grad_norm": 0.48055695908153206, "learning_rate": 9.97478945044432e-06, "loss": 0.4754, "step": 4305 }, { "epoch": 0.38643961320140896, "grad_norm": 0.4838631162962249, "learning_rate": 9.974737054622101e-06, "loss": 0.4388, "step": 4306 }, { "epoch": 0.38652935765408003, "grad_norm": 0.47219981478725814, "learning_rate": 9.974684604546406e-06, "loss": 0.4071, "step": 4307 }, { "epoch": 0.38661910210675104, "grad_norm": 0.5265809184655214, "learning_rate": 9.974632100217801e-06, "loss": 0.5396, "step": 4308 }, { "epoch": 0.38670884655942206, "grad_norm": 0.5041379970773511, "learning_rate": 9.974579541636863e-06, "loss": 0.5034, "step": 4309 }, { "epoch": 0.3867985910120931, "grad_norm": 0.4832455669752664, "learning_rate": 9.974526928804167e-06, "loss": 0.479, "step": 4310 }, { "epoch": 0.3868883354647641, "grad_norm": 0.5002971764753699, "learning_rate": 9.974474261720283e-06, "loss": 0.4549, "step": 4311 }, { "epoch": 0.3869780799174351, "grad_norm": 0.5057435624448274, "learning_rate": 9.974421540385787e-06, "loss": 0.4621, "step": 4312 }, { "epoch": 0.3870678243701061, "grad_norm": 0.5030194820729356, "learning_rate": 9.97436876480125e-06, "loss": 0.4668, "step": 4313 }, { "epoch": 0.38715756882277713, "grad_norm": 0.48790917121425564, "learning_rate": 9.974315934967254e-06, "loss": 0.4605, "step": 4314 }, { "epoch": 0.38724731327544815, "grad_norm": 0.5278726234752132, "learning_rate": 9.974263050884372e-06, "loss": 0.5305, "step": 4315 }, { "epoch": 0.38733705772811916, "grad_norm": 0.5367172739953446, "learning_rate": 9.974210112553182e-06, "loss": 0.5872, "step": 4316 }, { "epoch": 0.3874268021807902, "grad_norm": 0.487451381102993, "learning_rate": 9.974157119974257e-06, "loss": 0.4643, "step": 4317 }, { "epoch": 0.38751654663346125, "grad_norm": 0.5148272714234058, "learning_rate": 9.97410407314818e-06, "loss": 0.5528, "step": 4318 }, { "epoch": 0.38760629108613226, "grad_norm": 0.45971929953852847, "learning_rate": 9.974050972075529e-06, "loss": 0.4472, "step": 4319 }, { "epoch": 0.3876960355388033, "grad_norm": 0.5110762339678894, "learning_rate": 9.973997816756883e-06, "loss": 0.5243, "step": 4320 }, { "epoch": 0.3877857799914743, "grad_norm": 0.48023219536476874, "learning_rate": 9.973944607192815e-06, "loss": 0.4398, "step": 4321 }, { "epoch": 0.3878755244441453, "grad_norm": 0.4795222459412853, "learning_rate": 9.973891343383914e-06, "loss": 0.5005, "step": 4322 }, { "epoch": 0.3879652688968163, "grad_norm": 0.4804159041028912, "learning_rate": 9.973838025330759e-06, "loss": 0.4343, "step": 4323 }, { "epoch": 0.38805501334948733, "grad_norm": 0.5371867852047627, "learning_rate": 9.973784653033929e-06, "loss": 0.465, "step": 4324 }, { "epoch": 0.38814475780215835, "grad_norm": 0.4908779376910422, "learning_rate": 9.973731226494009e-06, "loss": 0.4925, "step": 4325 }, { "epoch": 0.38823450225482936, "grad_norm": 0.5491557147637209, "learning_rate": 9.973677745711577e-06, "loss": 0.5392, "step": 4326 }, { "epoch": 0.3883242467075004, "grad_norm": 0.4611154344580379, "learning_rate": 9.973624210687219e-06, "loss": 0.4324, "step": 4327 }, { "epoch": 0.3884139911601714, "grad_norm": 0.48869403927259264, "learning_rate": 9.97357062142152e-06, "loss": 0.469, "step": 4328 }, { "epoch": 0.3885037356128424, "grad_norm": 0.47272697409722464, "learning_rate": 9.973516977915064e-06, "loss": 0.45, "step": 4329 }, { "epoch": 0.3885934800655135, "grad_norm": 0.49396026884385486, "learning_rate": 9.973463280168437e-06, "loss": 0.4888, "step": 4330 }, { "epoch": 0.3886832245181845, "grad_norm": 0.4896455411020741, "learning_rate": 9.97340952818222e-06, "loss": 0.4761, "step": 4331 }, { "epoch": 0.3887729689708555, "grad_norm": 0.5115614239340234, "learning_rate": 9.973355721957003e-06, "loss": 0.5117, "step": 4332 }, { "epoch": 0.3888627134235265, "grad_norm": 0.5014110491335411, "learning_rate": 9.973301861493373e-06, "loss": 0.4902, "step": 4333 }, { "epoch": 0.38895245787619753, "grad_norm": 0.5135697421415657, "learning_rate": 9.973247946791915e-06, "loss": 0.5301, "step": 4334 }, { "epoch": 0.38904220232886855, "grad_norm": 0.47555834836141175, "learning_rate": 9.97319397785322e-06, "loss": 0.4653, "step": 4335 }, { "epoch": 0.38913194678153956, "grad_norm": 0.497156607067981, "learning_rate": 9.973139954677875e-06, "loss": 0.4665, "step": 4336 }, { "epoch": 0.3892216912342106, "grad_norm": 0.4825408499269647, "learning_rate": 9.973085877266469e-06, "loss": 0.4441, "step": 4337 }, { "epoch": 0.3893114356868816, "grad_norm": 0.4779713685191843, "learning_rate": 9.97303174561959e-06, "loss": 0.4661, "step": 4338 }, { "epoch": 0.3894011801395526, "grad_norm": 0.4699938158908379, "learning_rate": 9.972977559737833e-06, "loss": 0.456, "step": 4339 }, { "epoch": 0.3894909245922236, "grad_norm": 0.48602342788240954, "learning_rate": 9.972923319621786e-06, "loss": 0.475, "step": 4340 }, { "epoch": 0.3895806690448947, "grad_norm": 0.4717076035869659, "learning_rate": 9.97286902527204e-06, "loss": 0.4552, "step": 4341 }, { "epoch": 0.3896704134975657, "grad_norm": 0.5228845462751444, "learning_rate": 9.972814676689187e-06, "loss": 0.5272, "step": 4342 }, { "epoch": 0.3897601579502367, "grad_norm": 0.5132797784791545, "learning_rate": 9.972760273873822e-06, "loss": 0.4432, "step": 4343 }, { "epoch": 0.38984990240290773, "grad_norm": 0.4692960590557307, "learning_rate": 9.972705816826535e-06, "loss": 0.4224, "step": 4344 }, { "epoch": 0.38993964685557875, "grad_norm": 0.47737744058285575, "learning_rate": 9.972651305547925e-06, "loss": 0.4582, "step": 4345 }, { "epoch": 0.39002939130824976, "grad_norm": 0.48880606198935916, "learning_rate": 9.97259674003858e-06, "loss": 0.433, "step": 4346 }, { "epoch": 0.3901191357609208, "grad_norm": 0.4828872015612475, "learning_rate": 9.9725421202991e-06, "loss": 0.4554, "step": 4347 }, { "epoch": 0.3902088802135918, "grad_norm": 0.515480416814972, "learning_rate": 9.972487446330079e-06, "loss": 0.5335, "step": 4348 }, { "epoch": 0.3902986246662628, "grad_norm": 0.47890885832740376, "learning_rate": 9.972432718132114e-06, "loss": 0.4491, "step": 4349 }, { "epoch": 0.3903883691189338, "grad_norm": 0.4950840628092488, "learning_rate": 9.972377935705801e-06, "loss": 0.5163, "step": 4350 }, { "epoch": 0.39047811357160483, "grad_norm": 0.49973823032218095, "learning_rate": 9.972323099051737e-06, "loss": 0.4487, "step": 4351 }, { "epoch": 0.39056785802427585, "grad_norm": 0.4973610386127306, "learning_rate": 9.97226820817052e-06, "loss": 0.5004, "step": 4352 }, { "epoch": 0.3906576024769469, "grad_norm": 0.5043036648824953, "learning_rate": 9.972213263062751e-06, "loss": 0.4666, "step": 4353 }, { "epoch": 0.39074734692961793, "grad_norm": 0.4928811857484728, "learning_rate": 9.972158263729027e-06, "loss": 0.4621, "step": 4354 }, { "epoch": 0.39083709138228895, "grad_norm": 0.4657481709138899, "learning_rate": 9.972103210169947e-06, "loss": 0.4567, "step": 4355 }, { "epoch": 0.39092683583495996, "grad_norm": 0.46524710733568314, "learning_rate": 9.972048102386114e-06, "loss": 0.4586, "step": 4356 }, { "epoch": 0.391016580287631, "grad_norm": 0.5211290255058149, "learning_rate": 9.971992940378126e-06, "loss": 0.4833, "step": 4357 }, { "epoch": 0.391106324740302, "grad_norm": 0.4934445523792471, "learning_rate": 9.971937724146588e-06, "loss": 0.4888, "step": 4358 }, { "epoch": 0.391196069192973, "grad_norm": 0.5126798401663198, "learning_rate": 9.971882453692101e-06, "loss": 0.4864, "step": 4359 }, { "epoch": 0.391285813645644, "grad_norm": 0.49132163538768914, "learning_rate": 9.971827129015267e-06, "loss": 0.4626, "step": 4360 }, { "epoch": 0.39137555809831504, "grad_norm": 0.47914713967702965, "learning_rate": 9.971771750116688e-06, "loss": 0.469, "step": 4361 }, { "epoch": 0.39146530255098605, "grad_norm": 0.4684538063408932, "learning_rate": 9.97171631699697e-06, "loss": 0.4579, "step": 4362 }, { "epoch": 0.39155504700365706, "grad_norm": 0.48125399595781426, "learning_rate": 9.971660829656717e-06, "loss": 0.449, "step": 4363 }, { "epoch": 0.3916447914563281, "grad_norm": 0.5114605467808128, "learning_rate": 9.971605288096536e-06, "loss": 0.519, "step": 4364 }, { "epoch": 0.39173453590899915, "grad_norm": 0.49992629206809863, "learning_rate": 9.97154969231703e-06, "loss": 0.4847, "step": 4365 }, { "epoch": 0.39182428036167016, "grad_norm": 0.5127733428702014, "learning_rate": 9.971494042318806e-06, "loss": 0.4926, "step": 4366 }, { "epoch": 0.3919140248143412, "grad_norm": 0.516564331123576, "learning_rate": 9.97143833810247e-06, "loss": 0.5354, "step": 4367 }, { "epoch": 0.3920037692670122, "grad_norm": 0.5026080527189307, "learning_rate": 9.971382579668632e-06, "loss": 0.5228, "step": 4368 }, { "epoch": 0.3920935137196832, "grad_norm": 0.49796084131217927, "learning_rate": 9.971326767017899e-06, "loss": 0.425, "step": 4369 }, { "epoch": 0.3921832581723542, "grad_norm": 0.5028435128726623, "learning_rate": 9.97127090015088e-06, "loss": 0.4563, "step": 4370 }, { "epoch": 0.39227300262502524, "grad_norm": 0.4831723694991434, "learning_rate": 9.971214979068182e-06, "loss": 0.4113, "step": 4371 }, { "epoch": 0.39236274707769625, "grad_norm": 0.503006858910589, "learning_rate": 9.971159003770418e-06, "loss": 0.4734, "step": 4372 }, { "epoch": 0.39245249153036726, "grad_norm": 0.5289965835836805, "learning_rate": 9.971102974258196e-06, "loss": 0.5561, "step": 4373 }, { "epoch": 0.3925422359830383, "grad_norm": 0.4610098069062488, "learning_rate": 9.971046890532129e-06, "loss": 0.4316, "step": 4374 }, { "epoch": 0.3926319804357093, "grad_norm": 0.4684123396364914, "learning_rate": 9.970990752592827e-06, "loss": 0.4182, "step": 4375 }, { "epoch": 0.39272172488838036, "grad_norm": 0.4714975247048358, "learning_rate": 9.970934560440904e-06, "loss": 0.4353, "step": 4376 }, { "epoch": 0.3928114693410514, "grad_norm": 0.5646615862529983, "learning_rate": 9.97087831407697e-06, "loss": 0.5776, "step": 4377 }, { "epoch": 0.3929012137937224, "grad_norm": 0.45385043507088674, "learning_rate": 9.970822013501642e-06, "loss": 0.4463, "step": 4378 }, { "epoch": 0.3929909582463934, "grad_norm": 0.5090081387850431, "learning_rate": 9.970765658715531e-06, "loss": 0.5208, "step": 4379 }, { "epoch": 0.3930807026990644, "grad_norm": 0.4724485560907876, "learning_rate": 9.970709249719254e-06, "loss": 0.4282, "step": 4380 }, { "epoch": 0.39317044715173544, "grad_norm": 0.5074620912534412, "learning_rate": 9.970652786513424e-06, "loss": 0.5099, "step": 4381 }, { "epoch": 0.39326019160440645, "grad_norm": 0.5247790490513347, "learning_rate": 9.970596269098659e-06, "loss": 0.5693, "step": 4382 }, { "epoch": 0.39334993605707747, "grad_norm": 0.4620876299349783, "learning_rate": 9.970539697475573e-06, "loss": 0.4397, "step": 4383 }, { "epoch": 0.3934396805097485, "grad_norm": 0.5072017123303518, "learning_rate": 9.970483071644783e-06, "loss": 0.5323, "step": 4384 }, { "epoch": 0.3935294249624195, "grad_norm": 0.5212178659894532, "learning_rate": 9.97042639160691e-06, "loss": 0.518, "step": 4385 }, { "epoch": 0.3936191694150905, "grad_norm": 0.44726219712976795, "learning_rate": 9.970369657362567e-06, "loss": 0.4322, "step": 4386 }, { "epoch": 0.3937089138677615, "grad_norm": 0.519870684437044, "learning_rate": 9.970312868912377e-06, "loss": 0.5266, "step": 4387 }, { "epoch": 0.3937986583204326, "grad_norm": 0.5027714557343959, "learning_rate": 9.970256026256959e-06, "loss": 0.5237, "step": 4388 }, { "epoch": 0.3938884027731036, "grad_norm": 0.4857364917853876, "learning_rate": 9.97019912939693e-06, "loss": 0.5025, "step": 4389 }, { "epoch": 0.3939781472257746, "grad_norm": 0.4940225145286117, "learning_rate": 9.970142178332914e-06, "loss": 0.4629, "step": 4390 }, { "epoch": 0.39406789167844564, "grad_norm": 0.4697344487309064, "learning_rate": 9.970085173065528e-06, "loss": 0.4608, "step": 4391 }, { "epoch": 0.39415763613111665, "grad_norm": 0.4720858735946101, "learning_rate": 9.970028113595396e-06, "loss": 0.4465, "step": 4392 }, { "epoch": 0.39424738058378767, "grad_norm": 0.46784895507231805, "learning_rate": 9.969970999923142e-06, "loss": 0.4419, "step": 4393 }, { "epoch": 0.3943371250364587, "grad_norm": 0.49535555249394736, "learning_rate": 9.969913832049386e-06, "loss": 0.4844, "step": 4394 }, { "epoch": 0.3944268694891297, "grad_norm": 0.5057066316627341, "learning_rate": 9.969856609974755e-06, "loss": 0.5476, "step": 4395 }, { "epoch": 0.3945166139418007, "grad_norm": 0.47431346411099684, "learning_rate": 9.96979933369987e-06, "loss": 0.449, "step": 4396 }, { "epoch": 0.3946063583944717, "grad_norm": 0.4532759542576207, "learning_rate": 9.969742003225353e-06, "loss": 0.4606, "step": 4397 }, { "epoch": 0.39469610284714274, "grad_norm": 0.4953970052057739, "learning_rate": 9.969684618551836e-06, "loss": 0.4453, "step": 4398 }, { "epoch": 0.3947858472998138, "grad_norm": 0.5193449608732124, "learning_rate": 9.969627179679941e-06, "loss": 0.5134, "step": 4399 }, { "epoch": 0.3948755917524848, "grad_norm": 0.510905542659681, "learning_rate": 9.969569686610295e-06, "loss": 0.5108, "step": 4400 }, { "epoch": 0.39496533620515584, "grad_norm": 0.5156603419925159, "learning_rate": 9.969512139343523e-06, "loss": 0.4628, "step": 4401 }, { "epoch": 0.39505508065782685, "grad_norm": 0.5281356404267877, "learning_rate": 9.969454537880254e-06, "loss": 0.4951, "step": 4402 }, { "epoch": 0.39514482511049787, "grad_norm": 0.4693209750158662, "learning_rate": 9.969396882221119e-06, "loss": 0.4524, "step": 4403 }, { "epoch": 0.3952345695631689, "grad_norm": 0.5113968369898946, "learning_rate": 9.969339172366744e-06, "loss": 0.5204, "step": 4404 }, { "epoch": 0.3953243140158399, "grad_norm": 0.4428567286576333, "learning_rate": 9.969281408317758e-06, "loss": 0.402, "step": 4405 }, { "epoch": 0.3954140584685109, "grad_norm": 0.4929546132946335, "learning_rate": 9.969223590074792e-06, "loss": 0.4571, "step": 4406 }, { "epoch": 0.3955038029211819, "grad_norm": 0.4732278204145844, "learning_rate": 9.969165717638477e-06, "loss": 0.4448, "step": 4407 }, { "epoch": 0.39559354737385294, "grad_norm": 0.4900583825616085, "learning_rate": 9.969107791009441e-06, "loss": 0.4366, "step": 4408 }, { "epoch": 0.39568329182652395, "grad_norm": 0.514317967118707, "learning_rate": 9.96904981018832e-06, "loss": 0.4977, "step": 4409 }, { "epoch": 0.39577303627919497, "grad_norm": 0.5276637122347931, "learning_rate": 9.968991775175743e-06, "loss": 0.5452, "step": 4410 }, { "epoch": 0.39586278073186604, "grad_norm": 0.5351096746302159, "learning_rate": 9.968933685972347e-06, "loss": 0.4993, "step": 4411 }, { "epoch": 0.39595252518453705, "grad_norm": 0.553086843462827, "learning_rate": 9.96887554257876e-06, "loss": 0.562, "step": 4412 }, { "epoch": 0.39604226963720807, "grad_norm": 0.4971685919354373, "learning_rate": 9.968817344995622e-06, "loss": 0.4766, "step": 4413 }, { "epoch": 0.3961320140898791, "grad_norm": 0.45743280983931944, "learning_rate": 9.968759093223563e-06, "loss": 0.414, "step": 4414 }, { "epoch": 0.3962217585425501, "grad_norm": 0.4998327045902143, "learning_rate": 9.96870078726322e-06, "loss": 0.4635, "step": 4415 }, { "epoch": 0.3963115029952211, "grad_norm": 0.490893645422004, "learning_rate": 9.96864242711523e-06, "loss": 0.4942, "step": 4416 }, { "epoch": 0.3964012474478921, "grad_norm": 0.4994421371823295, "learning_rate": 9.968584012780227e-06, "loss": 0.4769, "step": 4417 }, { "epoch": 0.39649099190056314, "grad_norm": 0.5463358655782491, "learning_rate": 9.96852554425885e-06, "loss": 0.5328, "step": 4418 }, { "epoch": 0.39658073635323415, "grad_norm": 0.4702850103096045, "learning_rate": 9.968467021551736e-06, "loss": 0.4168, "step": 4419 }, { "epoch": 0.39667048080590517, "grad_norm": 0.4617921095460931, "learning_rate": 9.968408444659522e-06, "loss": 0.4464, "step": 4420 }, { "epoch": 0.3967602252585762, "grad_norm": 0.45952884034714353, "learning_rate": 9.968349813582849e-06, "loss": 0.4396, "step": 4421 }, { "epoch": 0.3968499697112472, "grad_norm": 0.5635509263129247, "learning_rate": 9.968291128322355e-06, "loss": 0.5784, "step": 4422 }, { "epoch": 0.39693971416391827, "grad_norm": 0.5364135562530103, "learning_rate": 9.968232388878682e-06, "loss": 0.4951, "step": 4423 }, { "epoch": 0.3970294586165893, "grad_norm": 0.5202387986430183, "learning_rate": 9.968173595252468e-06, "loss": 0.5185, "step": 4424 }, { "epoch": 0.3971192030692603, "grad_norm": 0.4621062366822924, "learning_rate": 9.968114747444355e-06, "loss": 0.4563, "step": 4425 }, { "epoch": 0.3972089475219313, "grad_norm": 0.443097069058059, "learning_rate": 9.968055845454986e-06, "loss": 0.41, "step": 4426 }, { "epoch": 0.3972986919746023, "grad_norm": 0.5300841164460908, "learning_rate": 9.967996889285003e-06, "loss": 0.5592, "step": 4427 }, { "epoch": 0.39738843642727334, "grad_norm": 0.4829610312885885, "learning_rate": 9.967937878935048e-06, "loss": 0.4877, "step": 4428 }, { "epoch": 0.39747818087994435, "grad_norm": 0.4826145648438093, "learning_rate": 9.967878814405763e-06, "loss": 0.4703, "step": 4429 }, { "epoch": 0.39756792533261537, "grad_norm": 0.47806679564611604, "learning_rate": 9.967819695697796e-06, "loss": 0.4786, "step": 4430 }, { "epoch": 0.3976576697852864, "grad_norm": 0.49856311625416255, "learning_rate": 9.96776052281179e-06, "loss": 0.5057, "step": 4431 }, { "epoch": 0.3977474142379574, "grad_norm": 0.45714693393798156, "learning_rate": 9.96770129574839e-06, "loss": 0.4093, "step": 4432 }, { "epoch": 0.3978371586906284, "grad_norm": 0.5080422948291479, "learning_rate": 9.96764201450824e-06, "loss": 0.5158, "step": 4433 }, { "epoch": 0.3979269031432995, "grad_norm": 0.5015110271737421, "learning_rate": 9.96758267909199e-06, "loss": 0.4789, "step": 4434 }, { "epoch": 0.3980166475959705, "grad_norm": 0.49745878388897385, "learning_rate": 9.967523289500288e-06, "loss": 0.4896, "step": 4435 }, { "epoch": 0.3981063920486415, "grad_norm": 0.50233316518891, "learning_rate": 9.967463845733778e-06, "loss": 0.5008, "step": 4436 }, { "epoch": 0.3981961365013125, "grad_norm": 0.48624094044310084, "learning_rate": 9.967404347793109e-06, "loss": 0.4702, "step": 4437 }, { "epoch": 0.39828588095398354, "grad_norm": 0.4473739655427887, "learning_rate": 9.967344795678931e-06, "loss": 0.4638, "step": 4438 }, { "epoch": 0.39837562540665455, "grad_norm": 0.499989983234306, "learning_rate": 9.967285189391894e-06, "loss": 0.5345, "step": 4439 }, { "epoch": 0.39846536985932557, "grad_norm": 0.4505818470517731, "learning_rate": 9.967225528932645e-06, "loss": 0.3946, "step": 4440 }, { "epoch": 0.3985551143119966, "grad_norm": 0.49508852288278693, "learning_rate": 9.967165814301838e-06, "loss": 0.4797, "step": 4441 }, { "epoch": 0.3986448587646676, "grad_norm": 0.5089903711678871, "learning_rate": 9.967106045500123e-06, "loss": 0.4981, "step": 4442 }, { "epoch": 0.3987346032173386, "grad_norm": 0.4584283814067944, "learning_rate": 9.967046222528151e-06, "loss": 0.4627, "step": 4443 }, { "epoch": 0.3988243476700096, "grad_norm": 0.4636005340879217, "learning_rate": 9.966986345386576e-06, "loss": 0.4496, "step": 4444 }, { "epoch": 0.39891409212268064, "grad_norm": 0.48054587835707396, "learning_rate": 9.96692641407605e-06, "loss": 0.4687, "step": 4445 }, { "epoch": 0.3990038365753517, "grad_norm": 0.5045391200699204, "learning_rate": 9.966866428597226e-06, "loss": 0.4726, "step": 4446 }, { "epoch": 0.3990935810280227, "grad_norm": 0.4851925149740408, "learning_rate": 9.96680638895076e-06, "loss": 0.4567, "step": 4447 }, { "epoch": 0.39918332548069374, "grad_norm": 0.46862861029153874, "learning_rate": 9.966746295137305e-06, "loss": 0.413, "step": 4448 }, { "epoch": 0.39927306993336475, "grad_norm": 0.5115527348009631, "learning_rate": 9.966686147157518e-06, "loss": 0.5147, "step": 4449 }, { "epoch": 0.39936281438603577, "grad_norm": 0.5042111294004278, "learning_rate": 9.966625945012054e-06, "loss": 0.4953, "step": 4450 }, { "epoch": 0.3994525588387068, "grad_norm": 0.5023397875322128, "learning_rate": 9.966565688701569e-06, "loss": 0.4543, "step": 4451 }, { "epoch": 0.3995423032913778, "grad_norm": 0.5455030953587039, "learning_rate": 9.966505378226722e-06, "loss": 0.5416, "step": 4452 }, { "epoch": 0.3996320477440488, "grad_norm": 0.43161881045708894, "learning_rate": 9.966445013588168e-06, "loss": 0.3752, "step": 4453 }, { "epoch": 0.3997217921967198, "grad_norm": 0.44997093057169757, "learning_rate": 9.966384594786569e-06, "loss": 0.4307, "step": 4454 }, { "epoch": 0.39981153664939084, "grad_norm": 0.5589732593677655, "learning_rate": 9.96632412182258e-06, "loss": 0.4697, "step": 4455 }, { "epoch": 0.39990128110206186, "grad_norm": 0.5617931485596148, "learning_rate": 9.966263594696862e-06, "loss": 0.5621, "step": 4456 }, { "epoch": 0.39999102555473287, "grad_norm": 0.49869699423968156, "learning_rate": 9.966203013410076e-06, "loss": 0.4798, "step": 4457 }, { "epoch": 0.40008077000740394, "grad_norm": 0.49097348926522694, "learning_rate": 9.966142377962883e-06, "loss": 0.4762, "step": 4458 }, { "epoch": 0.40017051446007496, "grad_norm": 0.504445804264276, "learning_rate": 9.966081688355942e-06, "loss": 0.5035, "step": 4459 }, { "epoch": 0.40026025891274597, "grad_norm": 0.4907964489170539, "learning_rate": 9.966020944589918e-06, "loss": 0.4856, "step": 4460 }, { "epoch": 0.400350003365417, "grad_norm": 0.4721494780442798, "learning_rate": 9.96596014666547e-06, "loss": 0.4703, "step": 4461 }, { "epoch": 0.400439747818088, "grad_norm": 0.5066212048831001, "learning_rate": 9.965899294583263e-06, "loss": 0.5373, "step": 4462 }, { "epoch": 0.400529492270759, "grad_norm": 0.5035932945592562, "learning_rate": 9.96583838834396e-06, "loss": 0.5019, "step": 4463 }, { "epoch": 0.40061923672343003, "grad_norm": 0.5268754767358496, "learning_rate": 9.965777427948227e-06, "loss": 0.559, "step": 4464 }, { "epoch": 0.40070898117610104, "grad_norm": 0.4686157774732924, "learning_rate": 9.965716413396726e-06, "loss": 0.4877, "step": 4465 }, { "epoch": 0.40079872562877206, "grad_norm": 0.5008578280956137, "learning_rate": 9.965655344690124e-06, "loss": 0.5524, "step": 4466 }, { "epoch": 0.40088847008144307, "grad_norm": 0.47012737343712324, "learning_rate": 9.965594221829088e-06, "loss": 0.4455, "step": 4467 }, { "epoch": 0.4009782145341141, "grad_norm": 0.5499070098809439, "learning_rate": 9.965533044814282e-06, "loss": 0.5112, "step": 4468 }, { "epoch": 0.40106795898678516, "grad_norm": 0.48105148486263977, "learning_rate": 9.965471813646377e-06, "loss": 0.4477, "step": 4469 }, { "epoch": 0.40115770343945617, "grad_norm": 0.5057569482466663, "learning_rate": 9.965410528326035e-06, "loss": 0.4885, "step": 4470 }, { "epoch": 0.4012474478921272, "grad_norm": 0.5069146451605896, "learning_rate": 9.96534918885393e-06, "loss": 0.4394, "step": 4471 }, { "epoch": 0.4013371923447982, "grad_norm": 0.47042559581143417, "learning_rate": 9.965287795230728e-06, "loss": 0.4487, "step": 4472 }, { "epoch": 0.4014269367974692, "grad_norm": 0.48618960329258537, "learning_rate": 9.9652263474571e-06, "loss": 0.4664, "step": 4473 }, { "epoch": 0.40151668125014023, "grad_norm": 0.511846474497886, "learning_rate": 9.965164845533715e-06, "loss": 0.5421, "step": 4474 }, { "epoch": 0.40160642570281124, "grad_norm": 0.4848159345367383, "learning_rate": 9.965103289461245e-06, "loss": 0.4977, "step": 4475 }, { "epoch": 0.40169617015548226, "grad_norm": 0.48422509158214677, "learning_rate": 9.96504167924036e-06, "loss": 0.4367, "step": 4476 }, { "epoch": 0.40178591460815327, "grad_norm": 0.5313342050009705, "learning_rate": 9.96498001487173e-06, "loss": 0.5259, "step": 4477 }, { "epoch": 0.4018756590608243, "grad_norm": 0.48815875348408494, "learning_rate": 9.964918296356031e-06, "loss": 0.4498, "step": 4478 }, { "epoch": 0.4019654035134953, "grad_norm": 0.4967321263376117, "learning_rate": 9.964856523693937e-06, "loss": 0.477, "step": 4479 }, { "epoch": 0.4020551479661663, "grad_norm": 0.5254666513912084, "learning_rate": 9.964794696886117e-06, "loss": 0.5133, "step": 4480 }, { "epoch": 0.4021448924188374, "grad_norm": 0.47672810126206067, "learning_rate": 9.96473281593325e-06, "loss": 0.4641, "step": 4481 }, { "epoch": 0.4022346368715084, "grad_norm": 0.5229681337965862, "learning_rate": 9.964670880836009e-06, "loss": 0.5962, "step": 4482 }, { "epoch": 0.4023243813241794, "grad_norm": 0.4910334283860501, "learning_rate": 9.964608891595067e-06, "loss": 0.4474, "step": 4483 }, { "epoch": 0.40241412577685043, "grad_norm": 0.4985712826846192, "learning_rate": 9.964546848211102e-06, "loss": 0.4805, "step": 4484 }, { "epoch": 0.40250387022952144, "grad_norm": 0.47648389048705986, "learning_rate": 9.964484750684793e-06, "loss": 0.4624, "step": 4485 }, { "epoch": 0.40259361468219246, "grad_norm": 0.4576531245170057, "learning_rate": 9.964422599016815e-06, "loss": 0.4165, "step": 4486 }, { "epoch": 0.40268335913486347, "grad_norm": 0.5029516113740319, "learning_rate": 9.964360393207845e-06, "loss": 0.5391, "step": 4487 }, { "epoch": 0.4027731035875345, "grad_norm": 0.49262891415264787, "learning_rate": 9.964298133258564e-06, "loss": 0.5043, "step": 4488 }, { "epoch": 0.4028628480402055, "grad_norm": 0.516626053273196, "learning_rate": 9.964235819169647e-06, "loss": 0.4841, "step": 4489 }, { "epoch": 0.4029525924928765, "grad_norm": 0.47898908765047676, "learning_rate": 9.964173450941777e-06, "loss": 0.4765, "step": 4490 }, { "epoch": 0.40304233694554753, "grad_norm": 0.5286247874482752, "learning_rate": 9.96411102857563e-06, "loss": 0.5085, "step": 4491 }, { "epoch": 0.4031320813982186, "grad_norm": 0.48778093465030736, "learning_rate": 9.964048552071895e-06, "loss": 0.4741, "step": 4492 }, { "epoch": 0.4032218258508896, "grad_norm": 0.5311365153405037, "learning_rate": 9.963986021431246e-06, "loss": 0.5435, "step": 4493 }, { "epoch": 0.40331157030356063, "grad_norm": 0.5356548055747204, "learning_rate": 9.963923436654367e-06, "loss": 0.5402, "step": 4494 }, { "epoch": 0.40340131475623164, "grad_norm": 0.504577103466302, "learning_rate": 9.96386079774194e-06, "loss": 0.4526, "step": 4495 }, { "epoch": 0.40349105920890266, "grad_norm": 0.501622667845821, "learning_rate": 9.96379810469465e-06, "loss": 0.4651, "step": 4496 }, { "epoch": 0.4035808036615737, "grad_norm": 0.45011605952305755, "learning_rate": 9.96373535751318e-06, "loss": 0.4469, "step": 4497 }, { "epoch": 0.4036705481142447, "grad_norm": 0.5207121794325652, "learning_rate": 9.963672556198212e-06, "loss": 0.5348, "step": 4498 }, { "epoch": 0.4037602925669157, "grad_norm": 0.47093338907117405, "learning_rate": 9.963609700750434e-06, "loss": 0.4531, "step": 4499 }, { "epoch": 0.4038500370195867, "grad_norm": 0.4598359571533913, "learning_rate": 9.96354679117053e-06, "loss": 0.411, "step": 4500 }, { "epoch": 0.40393978147225773, "grad_norm": 0.5232967595398418, "learning_rate": 9.963483827459185e-06, "loss": 0.5134, "step": 4501 }, { "epoch": 0.40402952592492875, "grad_norm": 0.4933829521818477, "learning_rate": 9.963420809617088e-06, "loss": 0.4534, "step": 4502 }, { "epoch": 0.40411927037759976, "grad_norm": 0.4676204493862575, "learning_rate": 9.963357737644925e-06, "loss": 0.4544, "step": 4503 }, { "epoch": 0.40420901483027083, "grad_norm": 0.4951216896829072, "learning_rate": 9.963294611543385e-06, "loss": 0.4962, "step": 4504 }, { "epoch": 0.40429875928294184, "grad_norm": 0.4478751847571459, "learning_rate": 9.963231431313156e-06, "loss": 0.397, "step": 4505 }, { "epoch": 0.40438850373561286, "grad_norm": 0.45843246016418576, "learning_rate": 9.963168196954924e-06, "loss": 0.4422, "step": 4506 }, { "epoch": 0.4044782481882839, "grad_norm": 0.49304294768695045, "learning_rate": 9.963104908469382e-06, "loss": 0.4721, "step": 4507 }, { "epoch": 0.4045679926409549, "grad_norm": 0.5162455154815253, "learning_rate": 9.963041565857221e-06, "loss": 0.4725, "step": 4508 }, { "epoch": 0.4046577370936259, "grad_norm": 0.4909399989686327, "learning_rate": 9.96297816911913e-06, "loss": 0.4346, "step": 4509 }, { "epoch": 0.4047474815462969, "grad_norm": 0.4678150663465116, "learning_rate": 9.9629147182558e-06, "loss": 0.4388, "step": 4510 }, { "epoch": 0.40483722599896793, "grad_norm": 0.5114223665854885, "learning_rate": 9.962851213267924e-06, "loss": 0.5448, "step": 4511 }, { "epoch": 0.40492697045163895, "grad_norm": 0.4693157009485709, "learning_rate": 9.962787654156191e-06, "loss": 0.4447, "step": 4512 }, { "epoch": 0.40501671490430996, "grad_norm": 0.5408686997552365, "learning_rate": 9.962724040921301e-06, "loss": 0.5391, "step": 4513 }, { "epoch": 0.405106459356981, "grad_norm": 0.5392935501674402, "learning_rate": 9.962660373563943e-06, "loss": 0.5171, "step": 4514 }, { "epoch": 0.405196203809652, "grad_norm": 0.46657478463847607, "learning_rate": 9.962596652084812e-06, "loss": 0.4612, "step": 4515 }, { "epoch": 0.40528594826232306, "grad_norm": 0.4799327268424635, "learning_rate": 9.962532876484604e-06, "loss": 0.4672, "step": 4516 }, { "epoch": 0.4053756927149941, "grad_norm": 0.4871422286942435, "learning_rate": 9.962469046764013e-06, "loss": 0.4985, "step": 4517 }, { "epoch": 0.4054654371676651, "grad_norm": 0.4805255360816817, "learning_rate": 9.962405162923736e-06, "loss": 0.502, "step": 4518 }, { "epoch": 0.4055551816203361, "grad_norm": 0.5238178202996935, "learning_rate": 9.962341224964471e-06, "loss": 0.5355, "step": 4519 }, { "epoch": 0.4056449260730071, "grad_norm": 0.4667416423773974, "learning_rate": 9.962277232886913e-06, "loss": 0.4521, "step": 4520 }, { "epoch": 0.40573467052567813, "grad_norm": 0.5530647271791993, "learning_rate": 9.96221318669176e-06, "loss": 0.5413, "step": 4521 }, { "epoch": 0.40582441497834915, "grad_norm": 0.46661231750012905, "learning_rate": 9.962149086379713e-06, "loss": 0.4555, "step": 4522 }, { "epoch": 0.40591415943102016, "grad_norm": 0.4942029468855182, "learning_rate": 9.962084931951468e-06, "loss": 0.4726, "step": 4523 }, { "epoch": 0.4060039038836912, "grad_norm": 0.5035964794950487, "learning_rate": 9.962020723407726e-06, "loss": 0.5129, "step": 4524 }, { "epoch": 0.4060936483363622, "grad_norm": 0.4735929979870735, "learning_rate": 9.96195646074919e-06, "loss": 0.4503, "step": 4525 }, { "epoch": 0.4061833927890332, "grad_norm": 0.4705893520453193, "learning_rate": 9.961892143976555e-06, "loss": 0.4503, "step": 4526 }, { "epoch": 0.4062731372417043, "grad_norm": 0.446742167936227, "learning_rate": 9.961827773090527e-06, "loss": 0.4031, "step": 4527 }, { "epoch": 0.4063628816943753, "grad_norm": 0.4821911058802285, "learning_rate": 9.961763348091807e-06, "loss": 0.4746, "step": 4528 }, { "epoch": 0.4064526261470463, "grad_norm": 0.4536375430471639, "learning_rate": 9.961698868981095e-06, "loss": 0.4345, "step": 4529 }, { "epoch": 0.4065423705997173, "grad_norm": 0.483176148654339, "learning_rate": 9.961634335759098e-06, "loss": 0.4436, "step": 4530 }, { "epoch": 0.40663211505238833, "grad_norm": 0.49642081196129256, "learning_rate": 9.961569748426518e-06, "loss": 0.4601, "step": 4531 }, { "epoch": 0.40672185950505935, "grad_norm": 0.46937567756827003, "learning_rate": 9.961505106984059e-06, "loss": 0.4778, "step": 4532 }, { "epoch": 0.40681160395773036, "grad_norm": 0.5126254955689339, "learning_rate": 9.961440411432426e-06, "loss": 0.463, "step": 4533 }, { "epoch": 0.4069013484104014, "grad_norm": 0.4809427546403006, "learning_rate": 9.961375661772326e-06, "loss": 0.4585, "step": 4534 }, { "epoch": 0.4069910928630724, "grad_norm": 0.538221575964078, "learning_rate": 9.961310858004465e-06, "loss": 0.537, "step": 4535 }, { "epoch": 0.4070808373157434, "grad_norm": 0.5286234270221759, "learning_rate": 9.961246000129548e-06, "loss": 0.4857, "step": 4536 }, { "epoch": 0.4071705817684144, "grad_norm": 0.46845848941767326, "learning_rate": 9.961181088148283e-06, "loss": 0.4381, "step": 4537 }, { "epoch": 0.40726032622108543, "grad_norm": 0.5170070597981289, "learning_rate": 9.961116122061379e-06, "loss": 0.543, "step": 4538 }, { "epoch": 0.4073500706737565, "grad_norm": 0.4798073526827197, "learning_rate": 9.961051101869543e-06, "loss": 0.4609, "step": 4539 }, { "epoch": 0.4074398151264275, "grad_norm": 0.4676800078435948, "learning_rate": 9.960986027573486e-06, "loss": 0.432, "step": 4540 }, { "epoch": 0.40752955957909853, "grad_norm": 0.47783538698884925, "learning_rate": 9.960920899173913e-06, "loss": 0.5167, "step": 4541 }, { "epoch": 0.40761930403176955, "grad_norm": 0.467864677797731, "learning_rate": 9.960855716671541e-06, "loss": 0.4507, "step": 4542 }, { "epoch": 0.40770904848444056, "grad_norm": 0.512856885676433, "learning_rate": 9.960790480067077e-06, "loss": 0.5395, "step": 4543 }, { "epoch": 0.4077987929371116, "grad_norm": 0.49452470379434543, "learning_rate": 9.960725189361233e-06, "loss": 0.4795, "step": 4544 }, { "epoch": 0.4078885373897826, "grad_norm": 0.5065682516778354, "learning_rate": 9.96065984455472e-06, "loss": 0.4886, "step": 4545 }, { "epoch": 0.4079782818424536, "grad_norm": 0.478325695118085, "learning_rate": 9.960594445648253e-06, "loss": 0.4277, "step": 4546 }, { "epoch": 0.4080680262951246, "grad_norm": 0.4561764787420301, "learning_rate": 9.960528992642545e-06, "loss": 0.4064, "step": 4547 }, { "epoch": 0.40815777074779563, "grad_norm": 0.511643569287975, "learning_rate": 9.960463485538307e-06, "loss": 0.5007, "step": 4548 }, { "epoch": 0.40824751520046665, "grad_norm": 0.4880610666649143, "learning_rate": 9.960397924336256e-06, "loss": 0.4604, "step": 4549 }, { "epoch": 0.40833725965313766, "grad_norm": 0.5330226493970368, "learning_rate": 9.960332309037105e-06, "loss": 0.5422, "step": 4550 }, { "epoch": 0.40842700410580873, "grad_norm": 0.45858123854209115, "learning_rate": 9.96026663964157e-06, "loss": 0.457, "step": 4551 }, { "epoch": 0.40851674855847975, "grad_norm": 0.4348293078897167, "learning_rate": 9.960200916150372e-06, "loss": 0.4173, "step": 4552 }, { "epoch": 0.40860649301115076, "grad_norm": 0.4734646695965948, "learning_rate": 9.960135138564221e-06, "loss": 0.4602, "step": 4553 }, { "epoch": 0.4086962374638218, "grad_norm": 0.49623639104368017, "learning_rate": 9.960069306883837e-06, "loss": 0.5382, "step": 4554 }, { "epoch": 0.4087859819164928, "grad_norm": 0.4912855353221446, "learning_rate": 9.960003421109939e-06, "loss": 0.4711, "step": 4555 }, { "epoch": 0.4088757263691638, "grad_norm": 0.4873066656809044, "learning_rate": 9.959937481243245e-06, "loss": 0.488, "step": 4556 }, { "epoch": 0.4089654708218348, "grad_norm": 0.47871239465671156, "learning_rate": 9.959871487284473e-06, "loss": 0.4534, "step": 4557 }, { "epoch": 0.40905521527450583, "grad_norm": 0.4706555291761396, "learning_rate": 9.959805439234342e-06, "loss": 0.4499, "step": 4558 }, { "epoch": 0.40914495972717685, "grad_norm": 0.5391677869593706, "learning_rate": 9.959739337093576e-06, "loss": 0.5538, "step": 4559 }, { "epoch": 0.40923470417984786, "grad_norm": 0.4881516378053108, "learning_rate": 9.959673180862891e-06, "loss": 0.4972, "step": 4560 }, { "epoch": 0.4093244486325189, "grad_norm": 0.4636596474897408, "learning_rate": 9.959606970543014e-06, "loss": 0.4629, "step": 4561 }, { "epoch": 0.40941419308518995, "grad_norm": 0.457606145479205, "learning_rate": 9.959540706134662e-06, "loss": 0.4381, "step": 4562 }, { "epoch": 0.40950393753786096, "grad_norm": 0.43371829356721747, "learning_rate": 9.95947438763856e-06, "loss": 0.3961, "step": 4563 }, { "epoch": 0.409593681990532, "grad_norm": 0.5071815194321958, "learning_rate": 9.959408015055435e-06, "loss": 0.5087, "step": 4564 }, { "epoch": 0.409683426443203, "grad_norm": 0.502451735527189, "learning_rate": 9.959341588386002e-06, "loss": 0.5083, "step": 4565 }, { "epoch": 0.409773170895874, "grad_norm": 0.5303342609504328, "learning_rate": 9.959275107630992e-06, "loss": 0.5344, "step": 4566 }, { "epoch": 0.409862915348545, "grad_norm": 0.5125164117468974, "learning_rate": 9.959208572791131e-06, "loss": 0.4706, "step": 4567 }, { "epoch": 0.40995265980121603, "grad_norm": 0.5524498266402086, "learning_rate": 9.959141983867139e-06, "loss": 0.5529, "step": 4568 }, { "epoch": 0.41004240425388705, "grad_norm": 0.5414148642228365, "learning_rate": 9.959075340859745e-06, "loss": 0.5139, "step": 4569 }, { "epoch": 0.41013214870655806, "grad_norm": 0.5020808209813267, "learning_rate": 9.959008643769678e-06, "loss": 0.5174, "step": 4570 }, { "epoch": 0.4102218931592291, "grad_norm": 0.5829867875309488, "learning_rate": 9.958941892597663e-06, "loss": 0.5913, "step": 4571 }, { "epoch": 0.4103116376119001, "grad_norm": 0.49093496469924247, "learning_rate": 9.958875087344427e-06, "loss": 0.486, "step": 4572 }, { "epoch": 0.4104013820645711, "grad_norm": 0.5130194333614477, "learning_rate": 9.9588082280107e-06, "loss": 0.4537, "step": 4573 }, { "epoch": 0.4104911265172422, "grad_norm": 0.47727268795128075, "learning_rate": 9.958741314597212e-06, "loss": 0.4106, "step": 4574 }, { "epoch": 0.4105808709699132, "grad_norm": 0.48323825985056007, "learning_rate": 9.958674347104692e-06, "loss": 0.4698, "step": 4575 }, { "epoch": 0.4106706154225842, "grad_norm": 0.49219481100239, "learning_rate": 9.95860732553387e-06, "loss": 0.4701, "step": 4576 }, { "epoch": 0.4107603598752552, "grad_norm": 0.4856528415688426, "learning_rate": 9.958540249885477e-06, "loss": 0.5052, "step": 4577 }, { "epoch": 0.41085010432792624, "grad_norm": 0.5192139834863211, "learning_rate": 9.958473120160245e-06, "loss": 0.4957, "step": 4578 }, { "epoch": 0.41093984878059725, "grad_norm": 0.5454577543368232, "learning_rate": 9.958405936358905e-06, "loss": 0.5468, "step": 4579 }, { "epoch": 0.41102959323326826, "grad_norm": 0.5110939329581614, "learning_rate": 9.958338698482191e-06, "loss": 0.4322, "step": 4580 }, { "epoch": 0.4111193376859393, "grad_norm": 0.49720377152106743, "learning_rate": 9.958271406530835e-06, "loss": 0.517, "step": 4581 }, { "epoch": 0.4112090821386103, "grad_norm": 0.48809227920120796, "learning_rate": 9.958204060505572e-06, "loss": 0.438, "step": 4582 }, { "epoch": 0.4112988265912813, "grad_norm": 0.4738593868622045, "learning_rate": 9.958136660407137e-06, "loss": 0.4678, "step": 4583 }, { "epoch": 0.4113885710439523, "grad_norm": 0.5437941260652811, "learning_rate": 9.958069206236262e-06, "loss": 0.5343, "step": 4584 }, { "epoch": 0.4114783154966234, "grad_norm": 0.5008289592666667, "learning_rate": 9.958001697993686e-06, "loss": 0.4683, "step": 4585 }, { "epoch": 0.4115680599492944, "grad_norm": 0.5741563554966562, "learning_rate": 9.957934135680144e-06, "loss": 0.5445, "step": 4586 }, { "epoch": 0.4116578044019654, "grad_norm": 0.5421540169478507, "learning_rate": 9.957866519296374e-06, "loss": 0.5389, "step": 4587 }, { "epoch": 0.41174754885463644, "grad_norm": 0.5339837857400184, "learning_rate": 9.95779884884311e-06, "loss": 0.4473, "step": 4588 }, { "epoch": 0.41183729330730745, "grad_norm": 0.4846455888811301, "learning_rate": 9.957731124321095e-06, "loss": 0.4874, "step": 4589 }, { "epoch": 0.41192703775997846, "grad_norm": 0.5377683650691096, "learning_rate": 9.957663345731063e-06, "loss": 0.4677, "step": 4590 }, { "epoch": 0.4120167822126495, "grad_norm": 0.49025492252572295, "learning_rate": 9.957595513073754e-06, "loss": 0.5067, "step": 4591 }, { "epoch": 0.4121065266653205, "grad_norm": 0.5094122218603254, "learning_rate": 9.95752762634991e-06, "loss": 0.4568, "step": 4592 }, { "epoch": 0.4121962711179915, "grad_norm": 0.5119008965622284, "learning_rate": 9.957459685560268e-06, "loss": 0.4886, "step": 4593 }, { "epoch": 0.4122860155706625, "grad_norm": 0.5372485002448605, "learning_rate": 9.957391690705574e-06, "loss": 0.5432, "step": 4594 }, { "epoch": 0.41237576002333354, "grad_norm": 0.5060336553560321, "learning_rate": 9.957323641786566e-06, "loss": 0.5053, "step": 4595 }, { "epoch": 0.41246550447600455, "grad_norm": 0.5191229995069945, "learning_rate": 9.957255538803986e-06, "loss": 0.5225, "step": 4596 }, { "epoch": 0.4125552489286756, "grad_norm": 0.49431486605004166, "learning_rate": 9.957187381758577e-06, "loss": 0.468, "step": 4597 }, { "epoch": 0.41264499338134664, "grad_norm": 0.47647032717712584, "learning_rate": 9.957119170651084e-06, "loss": 0.4664, "step": 4598 }, { "epoch": 0.41273473783401765, "grad_norm": 0.5096895860978982, "learning_rate": 9.957050905482249e-06, "loss": 0.5298, "step": 4599 }, { "epoch": 0.41282448228668867, "grad_norm": 0.5095108786315686, "learning_rate": 9.956982586252816e-06, "loss": 0.5407, "step": 4600 }, { "epoch": 0.4129142267393597, "grad_norm": 0.49905639512492644, "learning_rate": 9.956914212963534e-06, "loss": 0.501, "step": 4601 }, { "epoch": 0.4130039711920307, "grad_norm": 0.471077435127097, "learning_rate": 9.956845785615143e-06, "loss": 0.4172, "step": 4602 }, { "epoch": 0.4130937156447017, "grad_norm": 0.4707589014010727, "learning_rate": 9.956777304208393e-06, "loss": 0.4613, "step": 4603 }, { "epoch": 0.4131834600973727, "grad_norm": 0.4624463667927495, "learning_rate": 9.95670876874403e-06, "loss": 0.4062, "step": 4604 }, { "epoch": 0.41327320455004374, "grad_norm": 0.5063702639863893, "learning_rate": 9.956640179222802e-06, "loss": 0.5353, "step": 4605 }, { "epoch": 0.41336294900271475, "grad_norm": 0.485265655951739, "learning_rate": 9.956571535645456e-06, "loss": 0.4605, "step": 4606 }, { "epoch": 0.41345269345538577, "grad_norm": 0.4911048853911186, "learning_rate": 9.95650283801274e-06, "loss": 0.504, "step": 4607 }, { "epoch": 0.4135424379080568, "grad_norm": 0.5061897771997538, "learning_rate": 9.956434086325407e-06, "loss": 0.5097, "step": 4608 }, { "epoch": 0.41363218236072785, "grad_norm": 0.4837615681028739, "learning_rate": 9.956365280584202e-06, "loss": 0.4388, "step": 4609 }, { "epoch": 0.41372192681339887, "grad_norm": 0.5405996651972572, "learning_rate": 9.956296420789877e-06, "loss": 0.5153, "step": 4610 }, { "epoch": 0.4138116712660699, "grad_norm": 0.4600456760583972, "learning_rate": 9.956227506943183e-06, "loss": 0.3979, "step": 4611 }, { "epoch": 0.4139014157187409, "grad_norm": 0.48371835850083444, "learning_rate": 9.956158539044874e-06, "loss": 0.4579, "step": 4612 }, { "epoch": 0.4139911601714119, "grad_norm": 0.5030272623181493, "learning_rate": 9.956089517095699e-06, "loss": 0.4879, "step": 4613 }, { "epoch": 0.4140809046240829, "grad_norm": 0.47020417819553895, "learning_rate": 9.956020441096413e-06, "loss": 0.4589, "step": 4614 }, { "epoch": 0.41417064907675394, "grad_norm": 0.5089315866188446, "learning_rate": 9.955951311047766e-06, "loss": 0.5432, "step": 4615 }, { "epoch": 0.41426039352942495, "grad_norm": 0.5148816411698244, "learning_rate": 9.955882126950516e-06, "loss": 0.4764, "step": 4616 }, { "epoch": 0.41435013798209597, "grad_norm": 0.47541983667826, "learning_rate": 9.955812888805414e-06, "loss": 0.4544, "step": 4617 }, { "epoch": 0.414439882434767, "grad_norm": 0.4697481936942142, "learning_rate": 9.955743596613216e-06, "loss": 0.41, "step": 4618 }, { "epoch": 0.414529626887438, "grad_norm": 0.45113671768464064, "learning_rate": 9.955674250374681e-06, "loss": 0.4393, "step": 4619 }, { "epoch": 0.41461937134010907, "grad_norm": 0.5427585263292303, "learning_rate": 9.955604850090561e-06, "loss": 0.5302, "step": 4620 }, { "epoch": 0.4147091157927801, "grad_norm": 0.5424783172662813, "learning_rate": 9.955535395761615e-06, "loss": 0.5024, "step": 4621 }, { "epoch": 0.4147988602454511, "grad_norm": 0.5570180207131653, "learning_rate": 9.9554658873886e-06, "loss": 0.5285, "step": 4622 }, { "epoch": 0.4148886046981221, "grad_norm": 0.4942815814398351, "learning_rate": 9.955396324972275e-06, "loss": 0.4485, "step": 4623 }, { "epoch": 0.4149783491507931, "grad_norm": 0.4876426568329293, "learning_rate": 9.955326708513396e-06, "loss": 0.3834, "step": 4624 }, { "epoch": 0.41506809360346414, "grad_norm": 0.4960977011859494, "learning_rate": 9.955257038012725e-06, "loss": 0.4877, "step": 4625 }, { "epoch": 0.41515783805613515, "grad_norm": 0.5025336779520694, "learning_rate": 9.955187313471019e-06, "loss": 0.5027, "step": 4626 }, { "epoch": 0.41524758250880617, "grad_norm": 0.5056682453591516, "learning_rate": 9.95511753488904e-06, "loss": 0.4866, "step": 4627 }, { "epoch": 0.4153373269614772, "grad_norm": 0.5075816910125576, "learning_rate": 9.955047702267551e-06, "loss": 0.4911, "step": 4628 }, { "epoch": 0.4154270714141482, "grad_norm": 0.5694800958743284, "learning_rate": 9.95497781560731e-06, "loss": 0.6134, "step": 4629 }, { "epoch": 0.4155168158668192, "grad_norm": 0.49666212560098744, "learning_rate": 9.954907874909084e-06, "loss": 0.4683, "step": 4630 }, { "epoch": 0.4156065603194902, "grad_norm": 0.4422851346246505, "learning_rate": 9.95483788017363e-06, "loss": 0.3791, "step": 4631 }, { "epoch": 0.4156963047721613, "grad_norm": 0.4732306257972403, "learning_rate": 9.954767831401713e-06, "loss": 0.4597, "step": 4632 }, { "epoch": 0.4157860492248323, "grad_norm": 0.5270699208452118, "learning_rate": 9.954697728594099e-06, "loss": 0.5168, "step": 4633 }, { "epoch": 0.4158757936775033, "grad_norm": 0.49505486682591165, "learning_rate": 9.954627571751551e-06, "loss": 0.5018, "step": 4634 }, { "epoch": 0.41596553813017434, "grad_norm": 0.5106743121500056, "learning_rate": 9.954557360874835e-06, "loss": 0.4851, "step": 4635 }, { "epoch": 0.41605528258284535, "grad_norm": 0.4748167840772771, "learning_rate": 9.954487095964717e-06, "loss": 0.4637, "step": 4636 }, { "epoch": 0.41614502703551637, "grad_norm": 0.5058694672282135, "learning_rate": 9.954416777021962e-06, "loss": 0.4701, "step": 4637 }, { "epoch": 0.4162347714881874, "grad_norm": 0.49559683418642175, "learning_rate": 9.954346404047336e-06, "loss": 0.519, "step": 4638 }, { "epoch": 0.4163245159408584, "grad_norm": 0.49492737955831484, "learning_rate": 9.954275977041609e-06, "loss": 0.4738, "step": 4639 }, { "epoch": 0.4164142603935294, "grad_norm": 0.4945687835671694, "learning_rate": 9.954205496005547e-06, "loss": 0.4553, "step": 4640 }, { "epoch": 0.4165040048462004, "grad_norm": 0.5256190947015658, "learning_rate": 9.95413496093992e-06, "loss": 0.5307, "step": 4641 }, { "epoch": 0.41659374929887144, "grad_norm": 0.47258277267434956, "learning_rate": 9.954064371845497e-06, "loss": 0.4192, "step": 4642 }, { "epoch": 0.41668349375154246, "grad_norm": 0.5252831264772624, "learning_rate": 9.953993728723049e-06, "loss": 0.456, "step": 4643 }, { "epoch": 0.4167732382042135, "grad_norm": 0.5219171048384685, "learning_rate": 9.953923031573345e-06, "loss": 0.5133, "step": 4644 }, { "epoch": 0.41686298265688454, "grad_norm": 0.5035280009618566, "learning_rate": 9.953852280397154e-06, "loss": 0.4755, "step": 4645 }, { "epoch": 0.41695272710955555, "grad_norm": 0.49063838440154384, "learning_rate": 9.95378147519525e-06, "loss": 0.4579, "step": 4646 }, { "epoch": 0.41704247156222657, "grad_norm": 0.46361689926347244, "learning_rate": 9.953710615968405e-06, "loss": 0.4919, "step": 4647 }, { "epoch": 0.4171322160148976, "grad_norm": 0.4614881948352261, "learning_rate": 9.953639702717394e-06, "loss": 0.4845, "step": 4648 }, { "epoch": 0.4172219604675686, "grad_norm": 0.5224216635627196, "learning_rate": 9.953568735442987e-06, "loss": 0.5392, "step": 4649 }, { "epoch": 0.4173117049202396, "grad_norm": 0.48408149411578016, "learning_rate": 9.953497714145957e-06, "loss": 0.4792, "step": 4650 }, { "epoch": 0.4174014493729106, "grad_norm": 0.4709182150931545, "learning_rate": 9.953426638827082e-06, "loss": 0.4439, "step": 4651 }, { "epoch": 0.41749119382558164, "grad_norm": 0.5206180259718963, "learning_rate": 9.953355509487135e-06, "loss": 0.5231, "step": 4652 }, { "epoch": 0.41758093827825266, "grad_norm": 0.5490840030198665, "learning_rate": 9.953284326126893e-06, "loss": 0.5262, "step": 4653 }, { "epoch": 0.41767068273092367, "grad_norm": 0.5016275824985621, "learning_rate": 9.95321308874713e-06, "loss": 0.5187, "step": 4654 }, { "epoch": 0.41776042718359474, "grad_norm": 0.4909097800376594, "learning_rate": 9.953141797348627e-06, "loss": 0.4913, "step": 4655 }, { "epoch": 0.41785017163626575, "grad_norm": 0.4496410435108433, "learning_rate": 9.953070451932158e-06, "loss": 0.4453, "step": 4656 }, { "epoch": 0.41793991608893677, "grad_norm": 0.5429310762270437, "learning_rate": 9.952999052498502e-06, "loss": 0.5508, "step": 4657 }, { "epoch": 0.4180296605416078, "grad_norm": 0.4828180081179003, "learning_rate": 9.95292759904844e-06, "loss": 0.4316, "step": 4658 }, { "epoch": 0.4181194049942788, "grad_norm": 0.494027438218848, "learning_rate": 9.952856091582746e-06, "loss": 0.4198, "step": 4659 }, { "epoch": 0.4182091494469498, "grad_norm": 0.49443492199312206, "learning_rate": 9.952784530102204e-06, "loss": 0.4871, "step": 4660 }, { "epoch": 0.4182988938996208, "grad_norm": 0.47806239901742453, "learning_rate": 9.952712914607592e-06, "loss": 0.3979, "step": 4661 }, { "epoch": 0.41838863835229184, "grad_norm": 0.4616500789971851, "learning_rate": 9.952641245099694e-06, "loss": 0.4428, "step": 4662 }, { "epoch": 0.41847838280496286, "grad_norm": 0.4872315849597141, "learning_rate": 9.95256952157929e-06, "loss": 0.5039, "step": 4663 }, { "epoch": 0.41856812725763387, "grad_norm": 0.47140434214449584, "learning_rate": 9.952497744047163e-06, "loss": 0.4693, "step": 4664 }, { "epoch": 0.4186578717103049, "grad_norm": 0.5337560391638779, "learning_rate": 9.952425912504093e-06, "loss": 0.4971, "step": 4665 }, { "epoch": 0.4187476161629759, "grad_norm": 0.5089179221763893, "learning_rate": 9.952354026950866e-06, "loss": 0.501, "step": 4666 }, { "epoch": 0.41883736061564697, "grad_norm": 0.5102493823066901, "learning_rate": 9.952282087388266e-06, "loss": 0.5377, "step": 4667 }, { "epoch": 0.418927105068318, "grad_norm": 0.48842270207564525, "learning_rate": 9.952210093817076e-06, "loss": 0.472, "step": 4668 }, { "epoch": 0.419016849520989, "grad_norm": 0.5166381877627932, "learning_rate": 9.952138046238082e-06, "loss": 0.4956, "step": 4669 }, { "epoch": 0.41910659397366, "grad_norm": 0.4804808993121946, "learning_rate": 9.952065944652071e-06, "loss": 0.4521, "step": 4670 }, { "epoch": 0.419196338426331, "grad_norm": 0.49764887813282893, "learning_rate": 9.951993789059827e-06, "loss": 0.4819, "step": 4671 }, { "epoch": 0.41928608287900204, "grad_norm": 0.49934454594520333, "learning_rate": 9.951921579462139e-06, "loss": 0.4912, "step": 4672 }, { "epoch": 0.41937582733167306, "grad_norm": 0.5569961275363055, "learning_rate": 9.951849315859791e-06, "loss": 0.5469, "step": 4673 }, { "epoch": 0.41946557178434407, "grad_norm": 0.5298356764177276, "learning_rate": 9.951776998253578e-06, "loss": 0.5043, "step": 4674 }, { "epoch": 0.4195553162370151, "grad_norm": 0.5008273184127194, "learning_rate": 9.95170462664428e-06, "loss": 0.4738, "step": 4675 }, { "epoch": 0.4196450606896861, "grad_norm": 0.48293808579633124, "learning_rate": 9.951632201032693e-06, "loss": 0.4523, "step": 4676 }, { "epoch": 0.4197348051423571, "grad_norm": 0.5236044207767169, "learning_rate": 9.951559721419604e-06, "loss": 0.5336, "step": 4677 }, { "epoch": 0.4198245495950282, "grad_norm": 0.4753337207477251, "learning_rate": 9.951487187805802e-06, "loss": 0.4589, "step": 4678 }, { "epoch": 0.4199142940476992, "grad_norm": 0.46955260924174336, "learning_rate": 9.951414600192082e-06, "loss": 0.4465, "step": 4679 }, { "epoch": 0.4200040385003702, "grad_norm": 0.48399065212314885, "learning_rate": 9.951341958579232e-06, "loss": 0.4845, "step": 4680 }, { "epoch": 0.42009378295304123, "grad_norm": 0.5598587163725478, "learning_rate": 9.951269262968049e-06, "loss": 0.4971, "step": 4681 }, { "epoch": 0.42018352740571224, "grad_norm": 0.4951985099869002, "learning_rate": 9.95119651335932e-06, "loss": 0.4819, "step": 4682 }, { "epoch": 0.42027327185838326, "grad_norm": 0.46748961768171393, "learning_rate": 9.95112370975384e-06, "loss": 0.4177, "step": 4683 }, { "epoch": 0.42036301631105427, "grad_norm": 0.4992373285929607, "learning_rate": 9.951050852152404e-06, "loss": 0.4812, "step": 4684 }, { "epoch": 0.4204527607637253, "grad_norm": 0.48823735354468645, "learning_rate": 9.950977940555809e-06, "loss": 0.4461, "step": 4685 }, { "epoch": 0.4205425052163963, "grad_norm": 0.5247699208672552, "learning_rate": 9.950904974964845e-06, "loss": 0.4654, "step": 4686 }, { "epoch": 0.4206322496690673, "grad_norm": 0.48876360839392413, "learning_rate": 9.950831955380314e-06, "loss": 0.4389, "step": 4687 }, { "epoch": 0.42072199412173833, "grad_norm": 0.5250991913252402, "learning_rate": 9.950758881803006e-06, "loss": 0.4387, "step": 4688 }, { "epoch": 0.42081173857440934, "grad_norm": 0.45447011172187074, "learning_rate": 9.95068575423372e-06, "loss": 0.4479, "step": 4689 }, { "epoch": 0.4209014830270804, "grad_norm": 0.50093956404422, "learning_rate": 9.950612572673255e-06, "loss": 0.5258, "step": 4690 }, { "epoch": 0.42099122747975143, "grad_norm": 0.4319506199077542, "learning_rate": 9.950539337122409e-06, "loss": 0.3888, "step": 4691 }, { "epoch": 0.42108097193242244, "grad_norm": 0.47283537400502135, "learning_rate": 9.950466047581979e-06, "loss": 0.4424, "step": 4692 }, { "epoch": 0.42117071638509346, "grad_norm": 0.49548155830246726, "learning_rate": 9.950392704052766e-06, "loss": 0.4906, "step": 4693 }, { "epoch": 0.42126046083776447, "grad_norm": 0.48452820486373416, "learning_rate": 9.95031930653557e-06, "loss": 0.4917, "step": 4694 }, { "epoch": 0.4213502052904355, "grad_norm": 0.5143159710333292, "learning_rate": 9.95024585503119e-06, "loss": 0.5401, "step": 4695 }, { "epoch": 0.4214399497431065, "grad_norm": 0.47762180079126954, "learning_rate": 9.950172349540426e-06, "loss": 0.4495, "step": 4696 }, { "epoch": 0.4215296941957775, "grad_norm": 0.4460743539444418, "learning_rate": 9.950098790064082e-06, "loss": 0.4153, "step": 4697 }, { "epoch": 0.42161943864844853, "grad_norm": 0.4649233537725192, "learning_rate": 9.95002517660296e-06, "loss": 0.4619, "step": 4698 }, { "epoch": 0.42170918310111954, "grad_norm": 0.514149778819476, "learning_rate": 9.949951509157863e-06, "loss": 0.5578, "step": 4699 }, { "epoch": 0.42179892755379056, "grad_norm": 0.5046053111020947, "learning_rate": 9.949877787729592e-06, "loss": 0.5305, "step": 4700 }, { "epoch": 0.4218886720064616, "grad_norm": 0.5065115465974004, "learning_rate": 9.949804012318953e-06, "loss": 0.4964, "step": 4701 }, { "epoch": 0.42197841645913264, "grad_norm": 0.49230792370206344, "learning_rate": 9.94973018292675e-06, "loss": 0.4902, "step": 4702 }, { "epoch": 0.42206816091180366, "grad_norm": 0.4789940055695457, "learning_rate": 9.94965629955379e-06, "loss": 0.5002, "step": 4703 }, { "epoch": 0.4221579053644747, "grad_norm": 0.5038410784164833, "learning_rate": 9.949582362200876e-06, "loss": 0.5114, "step": 4704 }, { "epoch": 0.4222476498171457, "grad_norm": 0.533662530703501, "learning_rate": 9.949508370868816e-06, "loss": 0.4572, "step": 4705 }, { "epoch": 0.4223373942698167, "grad_norm": 0.5309345034349032, "learning_rate": 9.949434325558417e-06, "loss": 0.5186, "step": 4706 }, { "epoch": 0.4224271387224877, "grad_norm": 0.4904483607518056, "learning_rate": 9.949360226270485e-06, "loss": 0.4787, "step": 4707 }, { "epoch": 0.42251688317515873, "grad_norm": 0.5141653207883737, "learning_rate": 9.94928607300583e-06, "loss": 0.4949, "step": 4708 }, { "epoch": 0.42260662762782975, "grad_norm": 0.5078393096613486, "learning_rate": 9.949211865765258e-06, "loss": 0.5054, "step": 4709 }, { "epoch": 0.42269637208050076, "grad_norm": 0.5256049421818684, "learning_rate": 9.949137604549584e-06, "loss": 0.4969, "step": 4710 }, { "epoch": 0.4227861165331718, "grad_norm": 0.5312463866132209, "learning_rate": 9.94906328935961e-06, "loss": 0.5156, "step": 4711 }, { "epoch": 0.4228758609858428, "grad_norm": 0.5152249427540089, "learning_rate": 9.948988920196151e-06, "loss": 0.4882, "step": 4712 }, { "epoch": 0.42296560543851386, "grad_norm": 0.4757863791299234, "learning_rate": 9.94891449706002e-06, "loss": 0.494, "step": 4713 }, { "epoch": 0.4230553498911849, "grad_norm": 0.44993062223447594, "learning_rate": 9.948840019952023e-06, "loss": 0.4146, "step": 4714 }, { "epoch": 0.4231450943438559, "grad_norm": 0.4674029185996766, "learning_rate": 9.948765488872978e-06, "loss": 0.4089, "step": 4715 }, { "epoch": 0.4232348387965269, "grad_norm": 0.4677396507115747, "learning_rate": 9.948690903823695e-06, "loss": 0.4449, "step": 4716 }, { "epoch": 0.4233245832491979, "grad_norm": 0.5577219273749768, "learning_rate": 9.948616264804987e-06, "loss": 0.5728, "step": 4717 }, { "epoch": 0.42341432770186893, "grad_norm": 0.5064536816992898, "learning_rate": 9.948541571817667e-06, "loss": 0.4853, "step": 4718 }, { "epoch": 0.42350407215453995, "grad_norm": 0.5100839893867671, "learning_rate": 9.948466824862553e-06, "loss": 0.5119, "step": 4719 }, { "epoch": 0.42359381660721096, "grad_norm": 0.510868882362117, "learning_rate": 9.948392023940459e-06, "loss": 0.4915, "step": 4720 }, { "epoch": 0.423683561059882, "grad_norm": 0.4863271620300202, "learning_rate": 9.948317169052197e-06, "loss": 0.5256, "step": 4721 }, { "epoch": 0.423773305512553, "grad_norm": 0.49441392371738135, "learning_rate": 9.94824226019859e-06, "loss": 0.5195, "step": 4722 }, { "epoch": 0.423863049965224, "grad_norm": 0.48122356443897574, "learning_rate": 9.94816729738045e-06, "loss": 0.4377, "step": 4723 }, { "epoch": 0.423952794417895, "grad_norm": 0.5102349699702854, "learning_rate": 9.948092280598595e-06, "loss": 0.4528, "step": 4724 }, { "epoch": 0.4240425388705661, "grad_norm": 0.5045668198459722, "learning_rate": 9.948017209853846e-06, "loss": 0.4896, "step": 4725 }, { "epoch": 0.4241322833232371, "grad_norm": 0.5023241093413716, "learning_rate": 9.947942085147018e-06, "loss": 0.487, "step": 4726 }, { "epoch": 0.4242220277759081, "grad_norm": 0.49337907733732805, "learning_rate": 9.947866906478933e-06, "loss": 0.4381, "step": 4727 }, { "epoch": 0.42431177222857913, "grad_norm": 0.4893010356615849, "learning_rate": 9.94779167385041e-06, "loss": 0.5093, "step": 4728 }, { "epoch": 0.42440151668125015, "grad_norm": 0.4620679043017427, "learning_rate": 9.94771638726227e-06, "loss": 0.4536, "step": 4729 }, { "epoch": 0.42449126113392116, "grad_norm": 0.47043516345562575, "learning_rate": 9.947641046715332e-06, "loss": 0.439, "step": 4730 }, { "epoch": 0.4245810055865922, "grad_norm": 0.45499287908320224, "learning_rate": 9.94756565221042e-06, "loss": 0.4606, "step": 4731 }, { "epoch": 0.4246707500392632, "grad_norm": 0.5094636350107664, "learning_rate": 9.947490203748356e-06, "loss": 0.5136, "step": 4732 }, { "epoch": 0.4247604944919342, "grad_norm": 0.456952808654207, "learning_rate": 9.94741470132996e-06, "loss": 0.3982, "step": 4733 }, { "epoch": 0.4248502389446052, "grad_norm": 0.47841565425243937, "learning_rate": 9.947339144956059e-06, "loss": 0.4375, "step": 4734 }, { "epoch": 0.42493998339727623, "grad_norm": 0.5500901007380351, "learning_rate": 9.947263534627477e-06, "loss": 0.5238, "step": 4735 }, { "epoch": 0.42502972784994725, "grad_norm": 0.456235577729981, "learning_rate": 9.947187870345035e-06, "loss": 0.4407, "step": 4736 }, { "epoch": 0.4251194723026183, "grad_norm": 0.5041008210941089, "learning_rate": 9.947112152109561e-06, "loss": 0.4933, "step": 4737 }, { "epoch": 0.42520921675528933, "grad_norm": 0.48264751760452546, "learning_rate": 9.947036379921882e-06, "loss": 0.4669, "step": 4738 }, { "epoch": 0.42529896120796035, "grad_norm": 0.49516686590815207, "learning_rate": 9.946960553782821e-06, "loss": 0.5136, "step": 4739 }, { "epoch": 0.42538870566063136, "grad_norm": 0.5054028446216735, "learning_rate": 9.946884673693205e-06, "loss": 0.5079, "step": 4740 }, { "epoch": 0.4254784501133024, "grad_norm": 0.44336775240208254, "learning_rate": 9.946808739653865e-06, "loss": 0.3979, "step": 4741 }, { "epoch": 0.4255681945659734, "grad_norm": 0.48285273042680993, "learning_rate": 9.946732751665626e-06, "loss": 0.4958, "step": 4742 }, { "epoch": 0.4256579390186444, "grad_norm": 0.4704054238507443, "learning_rate": 9.946656709729319e-06, "loss": 0.4374, "step": 4743 }, { "epoch": 0.4257476834713154, "grad_norm": 0.475362483923119, "learning_rate": 9.94658061384577e-06, "loss": 0.4462, "step": 4744 }, { "epoch": 0.42583742792398643, "grad_norm": 0.4849560584317035, "learning_rate": 9.946504464015814e-06, "loss": 0.4983, "step": 4745 }, { "epoch": 0.42592717237665745, "grad_norm": 0.4711037632417115, "learning_rate": 9.946428260240277e-06, "loss": 0.4177, "step": 4746 }, { "epoch": 0.42601691682932846, "grad_norm": 0.5196332430052859, "learning_rate": 9.946352002519991e-06, "loss": 0.4601, "step": 4747 }, { "epoch": 0.42610666128199953, "grad_norm": 0.475740013097272, "learning_rate": 9.94627569085579e-06, "loss": 0.442, "step": 4748 }, { "epoch": 0.42619640573467055, "grad_norm": 0.5066615839764742, "learning_rate": 9.946199325248502e-06, "loss": 0.4591, "step": 4749 }, { "epoch": 0.42628615018734156, "grad_norm": 0.545999665302942, "learning_rate": 9.946122905698962e-06, "loss": 0.5511, "step": 4750 }, { "epoch": 0.4263758946400126, "grad_norm": 0.5307646900468991, "learning_rate": 9.946046432208006e-06, "loss": 0.4965, "step": 4751 }, { "epoch": 0.4264656390926836, "grad_norm": 0.4750316005020862, "learning_rate": 9.945969904776465e-06, "loss": 0.4615, "step": 4752 }, { "epoch": 0.4265553835453546, "grad_norm": 0.47659423530054257, "learning_rate": 9.945893323405174e-06, "loss": 0.4276, "step": 4753 }, { "epoch": 0.4266451279980256, "grad_norm": 0.5509662282088217, "learning_rate": 9.94581668809497e-06, "loss": 0.5565, "step": 4754 }, { "epoch": 0.42673487245069663, "grad_norm": 0.543354102633667, "learning_rate": 9.945739998846685e-06, "loss": 0.5663, "step": 4755 }, { "epoch": 0.42682461690336765, "grad_norm": 0.48619513678103615, "learning_rate": 9.945663255661158e-06, "loss": 0.4501, "step": 4756 }, { "epoch": 0.42691436135603866, "grad_norm": 0.4853232959943584, "learning_rate": 9.945586458539224e-06, "loss": 0.4735, "step": 4757 }, { "epoch": 0.4270041058087097, "grad_norm": 0.5186717423508238, "learning_rate": 9.945509607481724e-06, "loss": 0.5626, "step": 4758 }, { "epoch": 0.4270938502613807, "grad_norm": 0.4810405437409969, "learning_rate": 9.945432702489494e-06, "loss": 0.4707, "step": 4759 }, { "epoch": 0.42718359471405176, "grad_norm": 0.5309931207436955, "learning_rate": 9.945355743563375e-06, "loss": 0.5591, "step": 4760 }, { "epoch": 0.4272733391667228, "grad_norm": 0.5343018974438627, "learning_rate": 9.945278730704201e-06, "loss": 0.4576, "step": 4761 }, { "epoch": 0.4273630836193938, "grad_norm": 0.4734142403735019, "learning_rate": 9.945201663912817e-06, "loss": 0.5015, "step": 4762 }, { "epoch": 0.4274528280720648, "grad_norm": 0.524207805280019, "learning_rate": 9.945124543190062e-06, "loss": 0.5412, "step": 4763 }, { "epoch": 0.4275425725247358, "grad_norm": 0.5242342896881383, "learning_rate": 9.945047368536774e-06, "loss": 0.4992, "step": 4764 }, { "epoch": 0.42763231697740683, "grad_norm": 0.47160407600035287, "learning_rate": 9.944970139953799e-06, "loss": 0.4747, "step": 4765 }, { "epoch": 0.42772206143007785, "grad_norm": 0.46890236181157413, "learning_rate": 9.944892857441976e-06, "loss": 0.4486, "step": 4766 }, { "epoch": 0.42781180588274886, "grad_norm": 0.4800822024108837, "learning_rate": 9.94481552100215e-06, "loss": 0.4599, "step": 4767 }, { "epoch": 0.4279015503354199, "grad_norm": 0.5067457261456494, "learning_rate": 9.944738130635166e-06, "loss": 0.5317, "step": 4768 }, { "epoch": 0.4279912947880909, "grad_norm": 0.4868203202552174, "learning_rate": 9.944660686341865e-06, "loss": 0.4504, "step": 4769 }, { "epoch": 0.4280810392407619, "grad_norm": 0.4467639440758577, "learning_rate": 9.94458318812309e-06, "loss": 0.4427, "step": 4770 }, { "epoch": 0.428170783693433, "grad_norm": 0.4857107641960778, "learning_rate": 9.94450563597969e-06, "loss": 0.5357, "step": 4771 }, { "epoch": 0.428260528146104, "grad_norm": 0.4997921077469058, "learning_rate": 9.94442802991251e-06, "loss": 0.4972, "step": 4772 }, { "epoch": 0.428350272598775, "grad_norm": 0.5387220071553281, "learning_rate": 9.944350369922396e-06, "loss": 0.5639, "step": 4773 }, { "epoch": 0.428440017051446, "grad_norm": 0.5244860127602311, "learning_rate": 9.944272656010193e-06, "loss": 0.5581, "step": 4774 }, { "epoch": 0.42852976150411703, "grad_norm": 0.45432046922488195, "learning_rate": 9.94419488817675e-06, "loss": 0.4752, "step": 4775 }, { "epoch": 0.42861950595678805, "grad_norm": 0.5355560547408198, "learning_rate": 9.944117066422916e-06, "loss": 0.5525, "step": 4776 }, { "epoch": 0.42870925040945906, "grad_norm": 0.4642891670209997, "learning_rate": 9.94403919074954e-06, "loss": 0.4148, "step": 4777 }, { "epoch": 0.4287989948621301, "grad_norm": 0.48285339142916356, "learning_rate": 9.943961261157468e-06, "loss": 0.4729, "step": 4778 }, { "epoch": 0.4288887393148011, "grad_norm": 0.4710146374792006, "learning_rate": 9.943883277647552e-06, "loss": 0.4814, "step": 4779 }, { "epoch": 0.4289784837674721, "grad_norm": 0.47425354177424256, "learning_rate": 9.943805240220646e-06, "loss": 0.4775, "step": 4780 }, { "epoch": 0.4290682282201431, "grad_norm": 0.5092776015006715, "learning_rate": 9.943727148877594e-06, "loss": 0.5354, "step": 4781 }, { "epoch": 0.42915797267281414, "grad_norm": 0.47187698459510546, "learning_rate": 9.943649003619253e-06, "loss": 0.4351, "step": 4782 }, { "epoch": 0.4292477171254852, "grad_norm": 0.5313957573260008, "learning_rate": 9.943570804446474e-06, "loss": 0.4619, "step": 4783 }, { "epoch": 0.4293374615781562, "grad_norm": 0.47556324602379646, "learning_rate": 9.943492551360107e-06, "loss": 0.4982, "step": 4784 }, { "epoch": 0.42942720603082724, "grad_norm": 0.5089615032972524, "learning_rate": 9.94341424436101e-06, "loss": 0.5493, "step": 4785 }, { "epoch": 0.42951695048349825, "grad_norm": 0.5470140454202855, "learning_rate": 9.943335883450035e-06, "loss": 0.5461, "step": 4786 }, { "epoch": 0.42960669493616926, "grad_norm": 0.5162881855721967, "learning_rate": 9.943257468628036e-06, "loss": 0.5661, "step": 4787 }, { "epoch": 0.4296964393888403, "grad_norm": 0.4997478794249416, "learning_rate": 9.943178999895868e-06, "loss": 0.5113, "step": 4788 }, { "epoch": 0.4297861838415113, "grad_norm": 0.5088960535771767, "learning_rate": 9.943100477254388e-06, "loss": 0.4892, "step": 4789 }, { "epoch": 0.4298759282941823, "grad_norm": 0.5256196130537337, "learning_rate": 9.94302190070445e-06, "loss": 0.4694, "step": 4790 }, { "epoch": 0.4299656727468533, "grad_norm": 0.4961712193489336, "learning_rate": 9.942943270246914e-06, "loss": 0.4848, "step": 4791 }, { "epoch": 0.43005541719952434, "grad_norm": 0.5227506800689431, "learning_rate": 9.942864585882636e-06, "loss": 0.5104, "step": 4792 }, { "epoch": 0.43014516165219535, "grad_norm": 0.5180502266766185, "learning_rate": 9.942785847612475e-06, "loss": 0.5519, "step": 4793 }, { "epoch": 0.43023490610486637, "grad_norm": 0.5383384364576128, "learning_rate": 9.942707055437288e-06, "loss": 0.5421, "step": 4794 }, { "epoch": 0.43032465055753744, "grad_norm": 0.4934779565306879, "learning_rate": 9.942628209357937e-06, "loss": 0.4774, "step": 4795 }, { "epoch": 0.43041439501020845, "grad_norm": 0.4837642618472211, "learning_rate": 9.942549309375278e-06, "loss": 0.4627, "step": 4796 }, { "epoch": 0.43050413946287946, "grad_norm": 0.47697989150648595, "learning_rate": 9.942470355490175e-06, "loss": 0.4552, "step": 4797 }, { "epoch": 0.4305938839155505, "grad_norm": 0.47499912739016004, "learning_rate": 9.942391347703486e-06, "loss": 0.443, "step": 4798 }, { "epoch": 0.4306836283682215, "grad_norm": 0.5042507575401313, "learning_rate": 9.942312286016075e-06, "loss": 0.5066, "step": 4799 }, { "epoch": 0.4307733728208925, "grad_norm": 0.512399625463252, "learning_rate": 9.942233170428804e-06, "loss": 0.4763, "step": 4800 }, { "epoch": 0.4308631172735635, "grad_norm": 0.5283121778634293, "learning_rate": 9.942154000942535e-06, "loss": 0.5484, "step": 4801 }, { "epoch": 0.43095286172623454, "grad_norm": 0.5060388058537361, "learning_rate": 9.942074777558132e-06, "loss": 0.5401, "step": 4802 }, { "epoch": 0.43104260617890555, "grad_norm": 0.5083976906786702, "learning_rate": 9.941995500276458e-06, "loss": 0.5043, "step": 4803 }, { "epoch": 0.43113235063157657, "grad_norm": 0.5022963728574974, "learning_rate": 9.941916169098379e-06, "loss": 0.5096, "step": 4804 }, { "epoch": 0.4312220950842476, "grad_norm": 0.46785885190482945, "learning_rate": 9.941836784024759e-06, "loss": 0.4315, "step": 4805 }, { "epoch": 0.43131183953691865, "grad_norm": 0.5344132919712749, "learning_rate": 9.941757345056465e-06, "loss": 0.5571, "step": 4806 }, { "epoch": 0.43140158398958967, "grad_norm": 0.5065660752543518, "learning_rate": 9.941677852194362e-06, "loss": 0.4982, "step": 4807 }, { "epoch": 0.4314913284422607, "grad_norm": 0.5111032464729436, "learning_rate": 9.941598305439318e-06, "loss": 0.4737, "step": 4808 }, { "epoch": 0.4315810728949317, "grad_norm": 0.49579489450988046, "learning_rate": 9.9415187047922e-06, "loss": 0.4609, "step": 4809 }, { "epoch": 0.4316708173476027, "grad_norm": 0.5403690205604144, "learning_rate": 9.941439050253876e-06, "loss": 0.485, "step": 4810 }, { "epoch": 0.4317605618002737, "grad_norm": 0.4674222230674241, "learning_rate": 9.941359341825215e-06, "loss": 0.478, "step": 4811 }, { "epoch": 0.43185030625294474, "grad_norm": 0.5114006575679217, "learning_rate": 9.941279579507086e-06, "loss": 0.514, "step": 4812 }, { "epoch": 0.43194005070561575, "grad_norm": 0.5453323352347706, "learning_rate": 9.94119976330036e-06, "loss": 0.5486, "step": 4813 }, { "epoch": 0.43202979515828677, "grad_norm": 0.5214296939514874, "learning_rate": 9.941119893205905e-06, "loss": 0.5223, "step": 4814 }, { "epoch": 0.4321195396109578, "grad_norm": 0.5052607474389023, "learning_rate": 9.941039969224594e-06, "loss": 0.4747, "step": 4815 }, { "epoch": 0.4322092840636288, "grad_norm": 0.5168052437083285, "learning_rate": 9.940959991357297e-06, "loss": 0.5063, "step": 4816 }, { "epoch": 0.4322990285162998, "grad_norm": 0.5228422032686237, "learning_rate": 9.94087995960489e-06, "loss": 0.501, "step": 4817 }, { "epoch": 0.4323887729689709, "grad_norm": 0.4634181749316955, "learning_rate": 9.940799873968241e-06, "loss": 0.4504, "step": 4818 }, { "epoch": 0.4324785174216419, "grad_norm": 0.4665356974439564, "learning_rate": 9.940719734448228e-06, "loss": 0.4276, "step": 4819 }, { "epoch": 0.4325682618743129, "grad_norm": 0.5184702107551928, "learning_rate": 9.94063954104572e-06, "loss": 0.5525, "step": 4820 }, { "epoch": 0.4326580063269839, "grad_norm": 0.47001664505991836, "learning_rate": 9.940559293761594e-06, "loss": 0.4833, "step": 4821 }, { "epoch": 0.43274775077965494, "grad_norm": 0.5089706280606594, "learning_rate": 9.940478992596727e-06, "loss": 0.5032, "step": 4822 }, { "epoch": 0.43283749523232595, "grad_norm": 0.5023806029394444, "learning_rate": 9.940398637551994e-06, "loss": 0.4803, "step": 4823 }, { "epoch": 0.43292723968499697, "grad_norm": 0.4537666841676502, "learning_rate": 9.940318228628268e-06, "loss": 0.4178, "step": 4824 }, { "epoch": 0.433016984137668, "grad_norm": 0.462275475444151, "learning_rate": 9.940237765826428e-06, "loss": 0.461, "step": 4825 }, { "epoch": 0.433106728590339, "grad_norm": 0.4960203443267273, "learning_rate": 9.940157249147353e-06, "loss": 0.5197, "step": 4826 }, { "epoch": 0.43319647304301, "grad_norm": 0.46579764712405963, "learning_rate": 9.94007667859192e-06, "loss": 0.418, "step": 4827 }, { "epoch": 0.433286217495681, "grad_norm": 0.5159517627453133, "learning_rate": 9.939996054161008e-06, "loss": 0.5353, "step": 4828 }, { "epoch": 0.43337596194835204, "grad_norm": 0.5009809463071843, "learning_rate": 9.939915375855496e-06, "loss": 0.4728, "step": 4829 }, { "epoch": 0.4334657064010231, "grad_norm": 0.49140744215952165, "learning_rate": 9.939834643676263e-06, "loss": 0.4376, "step": 4830 }, { "epoch": 0.4335554508536941, "grad_norm": 0.49254794761118303, "learning_rate": 9.93975385762419e-06, "loss": 0.4616, "step": 4831 }, { "epoch": 0.43364519530636514, "grad_norm": 0.5045871289339873, "learning_rate": 9.939673017700158e-06, "loss": 0.5193, "step": 4832 }, { "epoch": 0.43373493975903615, "grad_norm": 0.47896616519820706, "learning_rate": 9.93959212390505e-06, "loss": 0.464, "step": 4833 }, { "epoch": 0.43382468421170717, "grad_norm": 0.525611253321201, "learning_rate": 9.939511176239747e-06, "loss": 0.4804, "step": 4834 }, { "epoch": 0.4339144286643782, "grad_norm": 0.5251465016597991, "learning_rate": 9.939430174705132e-06, "loss": 0.5162, "step": 4835 }, { "epoch": 0.4340041731170492, "grad_norm": 0.5075280370034837, "learning_rate": 9.939349119302086e-06, "loss": 0.5448, "step": 4836 }, { "epoch": 0.4340939175697202, "grad_norm": 0.5209692540347756, "learning_rate": 9.939268010031498e-06, "loss": 0.5116, "step": 4837 }, { "epoch": 0.4341836620223912, "grad_norm": 0.4945679927330584, "learning_rate": 9.939186846894249e-06, "loss": 0.4553, "step": 4838 }, { "epoch": 0.43427340647506224, "grad_norm": 0.48226112366490326, "learning_rate": 9.939105629891223e-06, "loss": 0.4594, "step": 4839 }, { "epoch": 0.43436315092773325, "grad_norm": 0.5009630946357777, "learning_rate": 9.93902435902331e-06, "loss": 0.4535, "step": 4840 }, { "epoch": 0.4344528953804043, "grad_norm": 0.4814513621197296, "learning_rate": 9.938943034291394e-06, "loss": 0.4673, "step": 4841 }, { "epoch": 0.43454263983307534, "grad_norm": 0.5207409600663278, "learning_rate": 9.938861655696362e-06, "loss": 0.5082, "step": 4842 }, { "epoch": 0.43463238428574635, "grad_norm": 0.5434614108381977, "learning_rate": 9.9387802232391e-06, "loss": 0.5484, "step": 4843 }, { "epoch": 0.43472212873841737, "grad_norm": 0.48958261441305173, "learning_rate": 9.938698736920499e-06, "loss": 0.4712, "step": 4844 }, { "epoch": 0.4348118731910884, "grad_norm": 0.5145098623759403, "learning_rate": 9.938617196741444e-06, "loss": 0.5198, "step": 4845 }, { "epoch": 0.4349016176437594, "grad_norm": 0.5670748191793092, "learning_rate": 9.938535602702828e-06, "loss": 0.5684, "step": 4846 }, { "epoch": 0.4349913620964304, "grad_norm": 0.47700076694729315, "learning_rate": 9.938453954805538e-06, "loss": 0.5176, "step": 4847 }, { "epoch": 0.4350811065491014, "grad_norm": 0.46278630853543573, "learning_rate": 9.938372253050468e-06, "loss": 0.4245, "step": 4848 }, { "epoch": 0.43517085100177244, "grad_norm": 0.5061925649687532, "learning_rate": 9.938290497438505e-06, "loss": 0.533, "step": 4849 }, { "epoch": 0.43526059545444346, "grad_norm": 0.47922119934200785, "learning_rate": 9.938208687970542e-06, "loss": 0.4569, "step": 4850 }, { "epoch": 0.43535033990711447, "grad_norm": 0.4530028841172524, "learning_rate": 9.93812682464747e-06, "loss": 0.4099, "step": 4851 }, { "epoch": 0.4354400843597855, "grad_norm": 0.483702983978299, "learning_rate": 9.938044907470187e-06, "loss": 0.4659, "step": 4852 }, { "epoch": 0.43552982881245655, "grad_norm": 0.5070771404423488, "learning_rate": 9.937962936439582e-06, "loss": 0.4649, "step": 4853 }, { "epoch": 0.43561957326512757, "grad_norm": 0.468081372619939, "learning_rate": 9.93788091155655e-06, "loss": 0.4477, "step": 4854 }, { "epoch": 0.4357093177177986, "grad_norm": 0.46342751136372645, "learning_rate": 9.937798832821984e-06, "loss": 0.4102, "step": 4855 }, { "epoch": 0.4357990621704696, "grad_norm": 0.4998151479094823, "learning_rate": 9.93771670023678e-06, "loss": 0.4906, "step": 4856 }, { "epoch": 0.4358888066231406, "grad_norm": 0.456781345025946, "learning_rate": 9.937634513801835e-06, "loss": 0.3982, "step": 4857 }, { "epoch": 0.4359785510758116, "grad_norm": 0.4580950722542469, "learning_rate": 9.937552273518043e-06, "loss": 0.4349, "step": 4858 }, { "epoch": 0.43606829552848264, "grad_norm": 0.5217687136908715, "learning_rate": 9.937469979386304e-06, "loss": 0.4737, "step": 4859 }, { "epoch": 0.43615803998115366, "grad_norm": 0.4719967202138062, "learning_rate": 9.937387631407512e-06, "loss": 0.4776, "step": 4860 }, { "epoch": 0.43624778443382467, "grad_norm": 0.49001135900530435, "learning_rate": 9.937305229582567e-06, "loss": 0.491, "step": 4861 }, { "epoch": 0.4363375288864957, "grad_norm": 0.4542350804357993, "learning_rate": 9.93722277391237e-06, "loss": 0.4247, "step": 4862 }, { "epoch": 0.4364272733391667, "grad_norm": 0.5150867841492993, "learning_rate": 9.937140264397815e-06, "loss": 0.4868, "step": 4863 }, { "epoch": 0.43651701779183777, "grad_norm": 0.47973602547556393, "learning_rate": 9.937057701039806e-06, "loss": 0.488, "step": 4864 }, { "epoch": 0.4366067622445088, "grad_norm": 0.46746883911943576, "learning_rate": 9.936975083839241e-06, "loss": 0.4301, "step": 4865 }, { "epoch": 0.4366965066971798, "grad_norm": 0.4872094209123713, "learning_rate": 9.936892412797024e-06, "loss": 0.4759, "step": 4866 }, { "epoch": 0.4367862511498508, "grad_norm": 0.464885810063401, "learning_rate": 9.936809687914051e-06, "loss": 0.4357, "step": 4867 }, { "epoch": 0.4368759956025218, "grad_norm": 0.4838009170965439, "learning_rate": 9.93672690919123e-06, "loss": 0.4547, "step": 4868 }, { "epoch": 0.43696574005519284, "grad_norm": 0.48588261885433204, "learning_rate": 9.936644076629462e-06, "loss": 0.4789, "step": 4869 }, { "epoch": 0.43705548450786386, "grad_norm": 0.4597745726769619, "learning_rate": 9.93656119022965e-06, "loss": 0.4512, "step": 4870 }, { "epoch": 0.43714522896053487, "grad_norm": 0.4852585553200608, "learning_rate": 9.936478249992696e-06, "loss": 0.496, "step": 4871 }, { "epoch": 0.4372349734132059, "grad_norm": 0.5088550228727695, "learning_rate": 9.936395255919507e-06, "loss": 0.5262, "step": 4872 }, { "epoch": 0.4373247178658769, "grad_norm": 0.5225068696016878, "learning_rate": 9.936312208010988e-06, "loss": 0.5073, "step": 4873 }, { "epoch": 0.4374144623185479, "grad_norm": 0.524900744873903, "learning_rate": 9.936229106268045e-06, "loss": 0.4955, "step": 4874 }, { "epoch": 0.43750420677121893, "grad_norm": 0.5327180202949896, "learning_rate": 9.936145950691582e-06, "loss": 0.5526, "step": 4875 }, { "epoch": 0.43759395122389, "grad_norm": 0.5097064846114168, "learning_rate": 9.936062741282509e-06, "loss": 0.4785, "step": 4876 }, { "epoch": 0.437683695676561, "grad_norm": 0.5215465931769074, "learning_rate": 9.93597947804173e-06, "loss": 0.5098, "step": 4877 }, { "epoch": 0.437773440129232, "grad_norm": 0.5033430756512804, "learning_rate": 9.935896160970155e-06, "loss": 0.4984, "step": 4878 }, { "epoch": 0.43786318458190304, "grad_norm": 0.45085276205674146, "learning_rate": 9.935812790068693e-06, "loss": 0.428, "step": 4879 }, { "epoch": 0.43795292903457406, "grad_norm": 0.49790928968089293, "learning_rate": 9.935729365338253e-06, "loss": 0.482, "step": 4880 }, { "epoch": 0.43804267348724507, "grad_norm": 0.44312205234721286, "learning_rate": 9.935645886779743e-06, "loss": 0.4057, "step": 4881 }, { "epoch": 0.4381324179399161, "grad_norm": 0.5048641970053338, "learning_rate": 9.935562354394077e-06, "loss": 0.4675, "step": 4882 }, { "epoch": 0.4382221623925871, "grad_norm": 0.5288442014350253, "learning_rate": 9.935478768182162e-06, "loss": 0.5616, "step": 4883 }, { "epoch": 0.4383119068452581, "grad_norm": 0.5481031865279123, "learning_rate": 9.935395128144914e-06, "loss": 0.5537, "step": 4884 }, { "epoch": 0.43840165129792913, "grad_norm": 0.491700802329725, "learning_rate": 9.935311434283241e-06, "loss": 0.4545, "step": 4885 }, { "epoch": 0.43849139575060014, "grad_norm": 0.5382666921322018, "learning_rate": 9.935227686598056e-06, "loss": 0.5374, "step": 4886 }, { "epoch": 0.43858114020327116, "grad_norm": 0.5226531777406545, "learning_rate": 9.935143885090277e-06, "loss": 0.5159, "step": 4887 }, { "epoch": 0.43867088465594223, "grad_norm": 0.4838470409186158, "learning_rate": 9.935060029760812e-06, "loss": 0.448, "step": 4888 }, { "epoch": 0.43876062910861324, "grad_norm": 0.510087340472868, "learning_rate": 9.93497612061058e-06, "loss": 0.4875, "step": 4889 }, { "epoch": 0.43885037356128426, "grad_norm": 0.5120205077924903, "learning_rate": 9.934892157640492e-06, "loss": 0.4997, "step": 4890 }, { "epoch": 0.43894011801395527, "grad_norm": 0.4747817908329098, "learning_rate": 9.93480814085147e-06, "loss": 0.4766, "step": 4891 }, { "epoch": 0.4390298624666263, "grad_norm": 0.5159655148287994, "learning_rate": 9.934724070244421e-06, "loss": 0.5548, "step": 4892 }, { "epoch": 0.4391196069192973, "grad_norm": 0.4657246060333478, "learning_rate": 9.93463994582027e-06, "loss": 0.4418, "step": 4893 }, { "epoch": 0.4392093513719683, "grad_norm": 0.4627704058722574, "learning_rate": 9.934555767579932e-06, "loss": 0.4252, "step": 4894 }, { "epoch": 0.43929909582463933, "grad_norm": 0.5032089948180806, "learning_rate": 9.934471535524325e-06, "loss": 0.4512, "step": 4895 }, { "epoch": 0.43938884027731034, "grad_norm": 0.47676259090882006, "learning_rate": 9.934387249654364e-06, "loss": 0.4593, "step": 4896 }, { "epoch": 0.43947858472998136, "grad_norm": 0.49579367705770844, "learning_rate": 9.934302909970974e-06, "loss": 0.5247, "step": 4897 }, { "epoch": 0.4395683291826524, "grad_norm": 0.4535970634604541, "learning_rate": 9.934218516475071e-06, "loss": 0.4374, "step": 4898 }, { "epoch": 0.43965807363532344, "grad_norm": 0.48385117825834534, "learning_rate": 9.934134069167579e-06, "loss": 0.4785, "step": 4899 }, { "epoch": 0.43974781808799446, "grad_norm": 0.47079265237679935, "learning_rate": 9.934049568049413e-06, "loss": 0.4115, "step": 4900 }, { "epoch": 0.43983756254066547, "grad_norm": 0.5358629080938564, "learning_rate": 9.9339650131215e-06, "loss": 0.5047, "step": 4901 }, { "epoch": 0.4399273069933365, "grad_norm": 0.4342448681837725, "learning_rate": 9.933880404384759e-06, "loss": 0.3777, "step": 4902 }, { "epoch": 0.4400170514460075, "grad_norm": 0.4863777043950578, "learning_rate": 9.933795741840115e-06, "loss": 0.4864, "step": 4903 }, { "epoch": 0.4401067958986785, "grad_norm": 0.5072725077299313, "learning_rate": 9.93371102548849e-06, "loss": 0.4754, "step": 4904 }, { "epoch": 0.44019654035134953, "grad_norm": 0.5124023937455499, "learning_rate": 9.933626255330808e-06, "loss": 0.5066, "step": 4905 }, { "epoch": 0.44028628480402054, "grad_norm": 0.4855563343656834, "learning_rate": 9.933541431367994e-06, "loss": 0.4348, "step": 4906 }, { "epoch": 0.44037602925669156, "grad_norm": 0.49076364217583196, "learning_rate": 9.933456553600973e-06, "loss": 0.4821, "step": 4907 }, { "epoch": 0.4404657737093626, "grad_norm": 0.4931835598505356, "learning_rate": 9.93337162203067e-06, "loss": 0.5203, "step": 4908 }, { "epoch": 0.4405555181620336, "grad_norm": 0.49760463988874865, "learning_rate": 9.93328663665801e-06, "loss": 0.4965, "step": 4909 }, { "epoch": 0.4406452626147046, "grad_norm": 0.5906263227838592, "learning_rate": 9.933201597483923e-06, "loss": 0.5452, "step": 4910 }, { "epoch": 0.4407350070673757, "grad_norm": 0.48087947061332453, "learning_rate": 9.933116504509334e-06, "loss": 0.4272, "step": 4911 }, { "epoch": 0.4408247515200467, "grad_norm": 0.43983912726925695, "learning_rate": 9.933031357735174e-06, "loss": 0.3867, "step": 4912 }, { "epoch": 0.4409144959727177, "grad_norm": 0.4775951419921367, "learning_rate": 9.93294615716237e-06, "loss": 0.4463, "step": 4913 }, { "epoch": 0.4410042404253887, "grad_norm": 0.47902235561063095, "learning_rate": 9.932860902791847e-06, "loss": 0.4856, "step": 4914 }, { "epoch": 0.44109398487805973, "grad_norm": 0.4775600976487932, "learning_rate": 9.93277559462454e-06, "loss": 0.4767, "step": 4915 }, { "epoch": 0.44118372933073075, "grad_norm": 0.5024163741476678, "learning_rate": 9.932690232661377e-06, "loss": 0.4559, "step": 4916 }, { "epoch": 0.44127347378340176, "grad_norm": 0.4658456017746308, "learning_rate": 9.93260481690329e-06, "loss": 0.5002, "step": 4917 }, { "epoch": 0.4413632182360728, "grad_norm": 0.5143006040028083, "learning_rate": 9.932519347351212e-06, "loss": 0.4336, "step": 4918 }, { "epoch": 0.4414529626887438, "grad_norm": 0.5047031627796904, "learning_rate": 9.93243382400607e-06, "loss": 0.483, "step": 4919 }, { "epoch": 0.4415427071414148, "grad_norm": 0.4657409078172535, "learning_rate": 9.932348246868804e-06, "loss": 0.4542, "step": 4920 }, { "epoch": 0.4416324515940858, "grad_norm": 0.5129933506090386, "learning_rate": 9.932262615940342e-06, "loss": 0.5284, "step": 4921 }, { "epoch": 0.4417221960467569, "grad_norm": 0.4766121829429613, "learning_rate": 9.93217693122162e-06, "loss": 0.444, "step": 4922 }, { "epoch": 0.4418119404994279, "grad_norm": 0.4564813147065665, "learning_rate": 9.932091192713568e-06, "loss": 0.4257, "step": 4923 }, { "epoch": 0.4419016849520989, "grad_norm": 0.5011928000875845, "learning_rate": 9.932005400417129e-06, "loss": 0.5536, "step": 4924 }, { "epoch": 0.44199142940476993, "grad_norm": 0.48016771633451, "learning_rate": 9.931919554333232e-06, "loss": 0.4367, "step": 4925 }, { "epoch": 0.44208117385744095, "grad_norm": 0.5297709965735843, "learning_rate": 9.931833654462814e-06, "loss": 0.5039, "step": 4926 }, { "epoch": 0.44217091831011196, "grad_norm": 0.5181142674471624, "learning_rate": 9.931747700806817e-06, "loss": 0.4727, "step": 4927 }, { "epoch": 0.442260662762783, "grad_norm": 0.4649881831800886, "learning_rate": 9.931661693366173e-06, "loss": 0.4117, "step": 4928 }, { "epoch": 0.442350407215454, "grad_norm": 0.49344667491646077, "learning_rate": 9.931575632141822e-06, "loss": 0.4812, "step": 4929 }, { "epoch": 0.442440151668125, "grad_norm": 0.5198918990783613, "learning_rate": 9.931489517134703e-06, "loss": 0.5119, "step": 4930 }, { "epoch": 0.442529896120796, "grad_norm": 0.4879007467324339, "learning_rate": 9.931403348345751e-06, "loss": 0.49, "step": 4931 }, { "epoch": 0.44261964057346703, "grad_norm": 0.5096814804109765, "learning_rate": 9.931317125775913e-06, "loss": 0.5487, "step": 4932 }, { "epoch": 0.44270938502613805, "grad_norm": 0.4915992978275393, "learning_rate": 9.931230849426125e-06, "loss": 0.4761, "step": 4933 }, { "epoch": 0.4427991294788091, "grad_norm": 0.5216012347581891, "learning_rate": 9.931144519297327e-06, "loss": 0.4836, "step": 4934 }, { "epoch": 0.44288887393148013, "grad_norm": 0.5126083584289535, "learning_rate": 9.931058135390464e-06, "loss": 0.5024, "step": 4935 }, { "epoch": 0.44297861838415115, "grad_norm": 0.5391348684317818, "learning_rate": 9.930971697706474e-06, "loss": 0.5473, "step": 4936 }, { "epoch": 0.44306836283682216, "grad_norm": 0.5354499247002585, "learning_rate": 9.930885206246303e-06, "loss": 0.5756, "step": 4937 }, { "epoch": 0.4431581072894932, "grad_norm": 0.48292936410207926, "learning_rate": 9.930798661010891e-06, "loss": 0.4963, "step": 4938 }, { "epoch": 0.4432478517421642, "grad_norm": 0.47279773140926723, "learning_rate": 9.930712062001186e-06, "loss": 0.4586, "step": 4939 }, { "epoch": 0.4433375961948352, "grad_norm": 0.47087212294123915, "learning_rate": 9.930625409218128e-06, "loss": 0.458, "step": 4940 }, { "epoch": 0.4434273406475062, "grad_norm": 0.4616870458639609, "learning_rate": 9.930538702662666e-06, "loss": 0.4212, "step": 4941 }, { "epoch": 0.44351708510017723, "grad_norm": 0.43357854108156485, "learning_rate": 9.930451942335745e-06, "loss": 0.3992, "step": 4942 }, { "epoch": 0.44360682955284825, "grad_norm": 0.48518061637219523, "learning_rate": 9.930365128238308e-06, "loss": 0.4737, "step": 4943 }, { "epoch": 0.44369657400551926, "grad_norm": 0.5053316350854482, "learning_rate": 9.930278260371304e-06, "loss": 0.5053, "step": 4944 }, { "epoch": 0.4437863184581903, "grad_norm": 0.4955078155691053, "learning_rate": 9.930191338735681e-06, "loss": 0.498, "step": 4945 }, { "epoch": 0.44387606291086135, "grad_norm": 0.45806389659138497, "learning_rate": 9.930104363332385e-06, "loss": 0.4056, "step": 4946 }, { "epoch": 0.44396580736353236, "grad_norm": 0.46565392369882236, "learning_rate": 9.930017334162368e-06, "loss": 0.4343, "step": 4947 }, { "epoch": 0.4440555518162034, "grad_norm": 0.5547764434417477, "learning_rate": 9.929930251226574e-06, "loss": 0.5009, "step": 4948 }, { "epoch": 0.4441452962688744, "grad_norm": 0.4763031107446885, "learning_rate": 9.929843114525956e-06, "loss": 0.446, "step": 4949 }, { "epoch": 0.4442350407215454, "grad_norm": 0.5165063527613044, "learning_rate": 9.929755924061466e-06, "loss": 0.5187, "step": 4950 }, { "epoch": 0.4443247851742164, "grad_norm": 0.4909526519056218, "learning_rate": 9.929668679834051e-06, "loss": 0.4919, "step": 4951 }, { "epoch": 0.44441452962688743, "grad_norm": 0.5019022625494557, "learning_rate": 9.929581381844664e-06, "loss": 0.5063, "step": 4952 }, { "epoch": 0.44450427407955845, "grad_norm": 0.4891962136329015, "learning_rate": 9.929494030094258e-06, "loss": 0.4784, "step": 4953 }, { "epoch": 0.44459401853222946, "grad_norm": 0.5269639759709899, "learning_rate": 9.929406624583784e-06, "loss": 0.5346, "step": 4954 }, { "epoch": 0.4446837629849005, "grad_norm": 0.5343085374980626, "learning_rate": 9.929319165314196e-06, "loss": 0.5082, "step": 4955 }, { "epoch": 0.4447735074375715, "grad_norm": 0.477949327313476, "learning_rate": 9.929231652286449e-06, "loss": 0.4609, "step": 4956 }, { "epoch": 0.44486325189024256, "grad_norm": 0.5022874130301216, "learning_rate": 9.929144085501496e-06, "loss": 0.4892, "step": 4957 }, { "epoch": 0.4449529963429136, "grad_norm": 0.48935698192986454, "learning_rate": 9.929056464960292e-06, "loss": 0.498, "step": 4958 }, { "epoch": 0.4450427407955846, "grad_norm": 0.4976429492341453, "learning_rate": 9.928968790663792e-06, "loss": 0.4841, "step": 4959 }, { "epoch": 0.4451324852482556, "grad_norm": 0.4901608826674669, "learning_rate": 9.928881062612954e-06, "loss": 0.4517, "step": 4960 }, { "epoch": 0.4452222297009266, "grad_norm": 0.4516369210085846, "learning_rate": 9.928793280808733e-06, "loss": 0.4616, "step": 4961 }, { "epoch": 0.44531197415359763, "grad_norm": 0.5316139551881949, "learning_rate": 9.928705445252087e-06, "loss": 0.4963, "step": 4962 }, { "epoch": 0.44540171860626865, "grad_norm": 0.5229012969394363, "learning_rate": 9.928617555943976e-06, "loss": 0.5234, "step": 4963 }, { "epoch": 0.44549146305893966, "grad_norm": 0.5151818321838372, "learning_rate": 9.928529612885354e-06, "loss": 0.5197, "step": 4964 }, { "epoch": 0.4455812075116107, "grad_norm": 0.48033011083739446, "learning_rate": 9.928441616077183e-06, "loss": 0.4588, "step": 4965 }, { "epoch": 0.4456709519642817, "grad_norm": 0.4537892518243013, "learning_rate": 9.928353565520424e-06, "loss": 0.4347, "step": 4966 }, { "epoch": 0.4457606964169527, "grad_norm": 0.4685625740240877, "learning_rate": 9.928265461216033e-06, "loss": 0.445, "step": 4967 }, { "epoch": 0.4458504408696237, "grad_norm": 0.5277996898206047, "learning_rate": 9.928177303164975e-06, "loss": 0.4929, "step": 4968 }, { "epoch": 0.4459401853222948, "grad_norm": 0.45015574900844824, "learning_rate": 9.92808909136821e-06, "loss": 0.4676, "step": 4969 }, { "epoch": 0.4460299297749658, "grad_norm": 0.5497872734980774, "learning_rate": 9.928000825826699e-06, "loss": 0.5329, "step": 4970 }, { "epoch": 0.4461196742276368, "grad_norm": 0.46861445006138563, "learning_rate": 9.927912506541406e-06, "loss": 0.4494, "step": 4971 }, { "epoch": 0.44620941868030783, "grad_norm": 0.4781327435674136, "learning_rate": 9.927824133513293e-06, "loss": 0.4968, "step": 4972 }, { "epoch": 0.44629916313297885, "grad_norm": 0.4631820461192646, "learning_rate": 9.927735706743324e-06, "loss": 0.5099, "step": 4973 }, { "epoch": 0.44638890758564986, "grad_norm": 0.4970849640830206, "learning_rate": 9.927647226232466e-06, "loss": 0.4834, "step": 4974 }, { "epoch": 0.4464786520383209, "grad_norm": 0.49622245281081034, "learning_rate": 9.92755869198168e-06, "loss": 0.4684, "step": 4975 }, { "epoch": 0.4465683964909919, "grad_norm": 0.4656765589106346, "learning_rate": 9.927470103991935e-06, "loss": 0.4858, "step": 4976 }, { "epoch": 0.4466581409436629, "grad_norm": 0.4800402241511523, "learning_rate": 9.927381462264191e-06, "loss": 0.5085, "step": 4977 }, { "epoch": 0.4467478853963339, "grad_norm": 0.5319919726274962, "learning_rate": 9.927292766799424e-06, "loss": 0.5299, "step": 4978 }, { "epoch": 0.44683762984900494, "grad_norm": 0.45283004290126744, "learning_rate": 9.927204017598593e-06, "loss": 0.4127, "step": 4979 }, { "epoch": 0.44692737430167595, "grad_norm": 0.46683095622943344, "learning_rate": 9.92711521466267e-06, "loss": 0.4686, "step": 4980 }, { "epoch": 0.447017118754347, "grad_norm": 0.45659972486365025, "learning_rate": 9.927026357992623e-06, "loss": 0.4227, "step": 4981 }, { "epoch": 0.44710686320701803, "grad_norm": 0.45938554355965094, "learning_rate": 9.926937447589422e-06, "loss": 0.3905, "step": 4982 }, { "epoch": 0.44719660765968905, "grad_norm": 0.520568858127945, "learning_rate": 9.926848483454035e-06, "loss": 0.5095, "step": 4983 }, { "epoch": 0.44728635211236006, "grad_norm": 0.47812220578615516, "learning_rate": 9.926759465587432e-06, "loss": 0.4943, "step": 4984 }, { "epoch": 0.4473760965650311, "grad_norm": 0.49640918240838255, "learning_rate": 9.926670393990585e-06, "loss": 0.4903, "step": 4985 }, { "epoch": 0.4474658410177021, "grad_norm": 0.4788795005005969, "learning_rate": 9.926581268664464e-06, "loss": 0.4946, "step": 4986 }, { "epoch": 0.4475555854703731, "grad_norm": 0.45636522861934803, "learning_rate": 9.926492089610042e-06, "loss": 0.436, "step": 4987 }, { "epoch": 0.4476453299230441, "grad_norm": 0.5131759009236107, "learning_rate": 9.926402856828293e-06, "loss": 0.5029, "step": 4988 }, { "epoch": 0.44773507437571514, "grad_norm": 0.5340769450496253, "learning_rate": 9.926313570320184e-06, "loss": 0.5285, "step": 4989 }, { "epoch": 0.44782481882838615, "grad_norm": 0.48844449282244595, "learning_rate": 9.926224230086699e-06, "loss": 0.484, "step": 4990 }, { "epoch": 0.44791456328105717, "grad_norm": 0.46828562417331493, "learning_rate": 9.926134836128804e-06, "loss": 0.4538, "step": 4991 }, { "epoch": 0.44800430773372824, "grad_norm": 0.5122961854742525, "learning_rate": 9.926045388447476e-06, "loss": 0.4772, "step": 4992 }, { "epoch": 0.44809405218639925, "grad_norm": 0.49994307088851553, "learning_rate": 9.925955887043692e-06, "loss": 0.4884, "step": 4993 }, { "epoch": 0.44818379663907026, "grad_norm": 0.502333538647547, "learning_rate": 9.925866331918427e-06, "loss": 0.4911, "step": 4994 }, { "epoch": 0.4482735410917413, "grad_norm": 0.46121457750295036, "learning_rate": 9.925776723072657e-06, "loss": 0.4597, "step": 4995 }, { "epoch": 0.4483632855444123, "grad_norm": 0.5219957334679945, "learning_rate": 9.925687060507361e-06, "loss": 0.5244, "step": 4996 }, { "epoch": 0.4484530299970833, "grad_norm": 0.4814063176724445, "learning_rate": 9.925597344223516e-06, "loss": 0.4575, "step": 4997 }, { "epoch": 0.4485427744497543, "grad_norm": 0.4898575562958106, "learning_rate": 9.9255075742221e-06, "loss": 0.4745, "step": 4998 }, { "epoch": 0.44863251890242534, "grad_norm": 0.4893271628421165, "learning_rate": 9.925417750504091e-06, "loss": 0.4238, "step": 4999 }, { "epoch": 0.44872226335509635, "grad_norm": 0.49793347446075076, "learning_rate": 9.925327873070471e-06, "loss": 0.5274, "step": 5000 }, { "epoch": 0.44881200780776737, "grad_norm": 0.5169935132342979, "learning_rate": 9.925237941922219e-06, "loss": 0.5401, "step": 5001 }, { "epoch": 0.4489017522604384, "grad_norm": 0.5069159683682566, "learning_rate": 9.925147957060317e-06, "loss": 0.5083, "step": 5002 }, { "epoch": 0.4489914967131094, "grad_norm": 0.4780063433211983, "learning_rate": 9.925057918485743e-06, "loss": 0.4414, "step": 5003 }, { "epoch": 0.44908124116578046, "grad_norm": 0.4462119776368237, "learning_rate": 9.924967826199483e-06, "loss": 0.4363, "step": 5004 }, { "epoch": 0.4491709856184515, "grad_norm": 0.4437135814134744, "learning_rate": 9.924877680202518e-06, "loss": 0.4452, "step": 5005 }, { "epoch": 0.4492607300711225, "grad_norm": 0.5469950921517527, "learning_rate": 9.92478748049583e-06, "loss": 0.5613, "step": 5006 }, { "epoch": 0.4493504745237935, "grad_norm": 0.48702919527027644, "learning_rate": 9.924697227080404e-06, "loss": 0.5162, "step": 5007 }, { "epoch": 0.4494402189764645, "grad_norm": 0.5205481218755037, "learning_rate": 9.924606919957222e-06, "loss": 0.5017, "step": 5008 }, { "epoch": 0.44952996342913554, "grad_norm": 0.4782413190439468, "learning_rate": 9.924516559127274e-06, "loss": 0.4375, "step": 5009 }, { "epoch": 0.44961970788180655, "grad_norm": 0.49249033851058843, "learning_rate": 9.924426144591538e-06, "loss": 0.4909, "step": 5010 }, { "epoch": 0.44970945233447757, "grad_norm": 0.4677186802502062, "learning_rate": 9.924335676351007e-06, "loss": 0.4773, "step": 5011 }, { "epoch": 0.4497991967871486, "grad_norm": 0.47782291315421677, "learning_rate": 9.924245154406665e-06, "loss": 0.4591, "step": 5012 }, { "epoch": 0.4498889412398196, "grad_norm": 0.45955016137205484, "learning_rate": 9.924154578759498e-06, "loss": 0.4241, "step": 5013 }, { "epoch": 0.4499786856924906, "grad_norm": 0.4763021239544656, "learning_rate": 9.924063949410493e-06, "loss": 0.48, "step": 5014 }, { "epoch": 0.4500684301451617, "grad_norm": 0.4765358240660426, "learning_rate": 9.923973266360643e-06, "loss": 0.4539, "step": 5015 }, { "epoch": 0.4501581745978327, "grad_norm": 0.5774741181379522, "learning_rate": 9.923882529610934e-06, "loss": 0.5906, "step": 5016 }, { "epoch": 0.4502479190505037, "grad_norm": 0.5302564532381532, "learning_rate": 9.923791739162355e-06, "loss": 0.526, "step": 5017 }, { "epoch": 0.4503376635031747, "grad_norm": 0.48826866154650606, "learning_rate": 9.923700895015896e-06, "loss": 0.4874, "step": 5018 }, { "epoch": 0.45042740795584574, "grad_norm": 0.4437471455444021, "learning_rate": 9.92360999717255e-06, "loss": 0.4228, "step": 5019 }, { "epoch": 0.45051715240851675, "grad_norm": 0.4847812622929247, "learning_rate": 9.923519045633307e-06, "loss": 0.4589, "step": 5020 }, { "epoch": 0.45060689686118777, "grad_norm": 0.5654574301099057, "learning_rate": 9.923428040399157e-06, "loss": 0.5486, "step": 5021 }, { "epoch": 0.4506966413138588, "grad_norm": 0.4829789014038662, "learning_rate": 9.923336981471097e-06, "loss": 0.4458, "step": 5022 }, { "epoch": 0.4507863857665298, "grad_norm": 0.46761576271247085, "learning_rate": 9.923245868850115e-06, "loss": 0.4294, "step": 5023 }, { "epoch": 0.4508761302192008, "grad_norm": 0.4431219482915915, "learning_rate": 9.923154702537208e-06, "loss": 0.4214, "step": 5024 }, { "epoch": 0.4509658746718718, "grad_norm": 0.4188189482519248, "learning_rate": 9.92306348253337e-06, "loss": 0.3756, "step": 5025 }, { "epoch": 0.45105561912454284, "grad_norm": 0.47134162515071204, "learning_rate": 9.922972208839595e-06, "loss": 0.4358, "step": 5026 }, { "epoch": 0.4511453635772139, "grad_norm": 0.5233936055565773, "learning_rate": 9.922880881456876e-06, "loss": 0.5489, "step": 5027 }, { "epoch": 0.4512351080298849, "grad_norm": 0.5016317153349422, "learning_rate": 9.922789500386214e-06, "loss": 0.4571, "step": 5028 }, { "epoch": 0.45132485248255594, "grad_norm": 0.522488946950435, "learning_rate": 9.922698065628603e-06, "loss": 0.5435, "step": 5029 }, { "epoch": 0.45141459693522695, "grad_norm": 0.5653016035224677, "learning_rate": 9.922606577185039e-06, "loss": 0.5772, "step": 5030 }, { "epoch": 0.45150434138789797, "grad_norm": 0.5089424862028991, "learning_rate": 9.922515035056522e-06, "loss": 0.5048, "step": 5031 }, { "epoch": 0.451594085840569, "grad_norm": 0.4605066625912434, "learning_rate": 9.922423439244049e-06, "loss": 0.424, "step": 5032 }, { "epoch": 0.45168383029324, "grad_norm": 0.544170947560468, "learning_rate": 9.922331789748619e-06, "loss": 0.5374, "step": 5033 }, { "epoch": 0.451773574745911, "grad_norm": 0.5126613577685923, "learning_rate": 9.922240086571234e-06, "loss": 0.5309, "step": 5034 }, { "epoch": 0.451863319198582, "grad_norm": 0.4978169501358785, "learning_rate": 9.922148329712888e-06, "loss": 0.487, "step": 5035 }, { "epoch": 0.45195306365125304, "grad_norm": 0.5514411234900272, "learning_rate": 9.922056519174588e-06, "loss": 0.5598, "step": 5036 }, { "epoch": 0.45204280810392405, "grad_norm": 0.46347426341914205, "learning_rate": 9.921964654957333e-06, "loss": 0.4326, "step": 5037 }, { "epoch": 0.45213255255659507, "grad_norm": 0.5090530230240345, "learning_rate": 9.921872737062123e-06, "loss": 0.4882, "step": 5038 }, { "epoch": 0.45222229700926614, "grad_norm": 0.48737001353182147, "learning_rate": 9.921780765489964e-06, "loss": 0.5143, "step": 5039 }, { "epoch": 0.45231204146193715, "grad_norm": 0.5023226943143337, "learning_rate": 9.921688740241856e-06, "loss": 0.5134, "step": 5040 }, { "epoch": 0.45240178591460817, "grad_norm": 0.4549719606493914, "learning_rate": 9.921596661318804e-06, "loss": 0.4336, "step": 5041 }, { "epoch": 0.4524915303672792, "grad_norm": 0.5090132048420258, "learning_rate": 9.921504528721812e-06, "loss": 0.4894, "step": 5042 }, { "epoch": 0.4525812748199502, "grad_norm": 0.4923652252851843, "learning_rate": 9.921412342451886e-06, "loss": 0.453, "step": 5043 }, { "epoch": 0.4526710192726212, "grad_norm": 0.4651727073239432, "learning_rate": 9.921320102510029e-06, "loss": 0.4116, "step": 5044 }, { "epoch": 0.4527607637252922, "grad_norm": 0.4537726775628562, "learning_rate": 9.921227808897247e-06, "loss": 0.4165, "step": 5045 }, { "epoch": 0.45285050817796324, "grad_norm": 0.4768898404215344, "learning_rate": 9.921135461614548e-06, "loss": 0.4541, "step": 5046 }, { "epoch": 0.45294025263063425, "grad_norm": 0.44571300993385554, "learning_rate": 9.92104306066294e-06, "loss": 0.4024, "step": 5047 }, { "epoch": 0.45302999708330527, "grad_norm": 0.474741078122224, "learning_rate": 9.92095060604343e-06, "loss": 0.4814, "step": 5048 }, { "epoch": 0.4531197415359763, "grad_norm": 0.5576791695156761, "learning_rate": 9.920858097757023e-06, "loss": 0.5608, "step": 5049 }, { "epoch": 0.45320948598864735, "grad_norm": 0.5021297809045967, "learning_rate": 9.920765535804732e-06, "loss": 0.4828, "step": 5050 }, { "epoch": 0.45329923044131837, "grad_norm": 0.5450946142697666, "learning_rate": 9.920672920187566e-06, "loss": 0.5128, "step": 5051 }, { "epoch": 0.4533889748939894, "grad_norm": 0.4619600682260913, "learning_rate": 9.920580250906532e-06, "loss": 0.4468, "step": 5052 }, { "epoch": 0.4534787193466604, "grad_norm": 0.49936788323420944, "learning_rate": 9.920487527962643e-06, "loss": 0.4709, "step": 5053 }, { "epoch": 0.4535684637993314, "grad_norm": 0.4839787382582648, "learning_rate": 9.920394751356913e-06, "loss": 0.4865, "step": 5054 }, { "epoch": 0.4536582082520024, "grad_norm": 0.4910274311166544, "learning_rate": 9.920301921090347e-06, "loss": 0.4579, "step": 5055 }, { "epoch": 0.45374795270467344, "grad_norm": 0.5166532937550954, "learning_rate": 9.920209037163962e-06, "loss": 0.5377, "step": 5056 }, { "epoch": 0.45383769715734446, "grad_norm": 0.477109099854778, "learning_rate": 9.920116099578771e-06, "loss": 0.4916, "step": 5057 }, { "epoch": 0.45392744161001547, "grad_norm": 0.4621087414117078, "learning_rate": 9.920023108335786e-06, "loss": 0.4331, "step": 5058 }, { "epoch": 0.4540171860626865, "grad_norm": 0.5069691671960346, "learning_rate": 9.919930063436024e-06, "loss": 0.5187, "step": 5059 }, { "epoch": 0.4541069305153575, "grad_norm": 0.5432917438867972, "learning_rate": 9.919836964880495e-06, "loss": 0.5706, "step": 5060 }, { "epoch": 0.4541966749680285, "grad_norm": 0.45623947828927863, "learning_rate": 9.919743812670217e-06, "loss": 0.4293, "step": 5061 }, { "epoch": 0.4542864194206996, "grad_norm": 0.5531486109309406, "learning_rate": 9.919650606806207e-06, "loss": 0.5277, "step": 5062 }, { "epoch": 0.4543761638733706, "grad_norm": 0.44790128709983346, "learning_rate": 9.91955734728948e-06, "loss": 0.4227, "step": 5063 }, { "epoch": 0.4544659083260416, "grad_norm": 0.484674629125104, "learning_rate": 9.91946403412105e-06, "loss": 0.473, "step": 5064 }, { "epoch": 0.4545556527787126, "grad_norm": 0.5316054035533129, "learning_rate": 9.919370667301943e-06, "loss": 0.5357, "step": 5065 }, { "epoch": 0.45464539723138364, "grad_norm": 0.47545325835204366, "learning_rate": 9.91927724683317e-06, "loss": 0.468, "step": 5066 }, { "epoch": 0.45473514168405466, "grad_norm": 0.5354449742982543, "learning_rate": 9.919183772715752e-06, "loss": 0.5306, "step": 5067 }, { "epoch": 0.45482488613672567, "grad_norm": 0.46765478907984454, "learning_rate": 9.919090244950707e-06, "loss": 0.472, "step": 5068 }, { "epoch": 0.4549146305893967, "grad_norm": 0.5001857318111362, "learning_rate": 9.918996663539058e-06, "loss": 0.4831, "step": 5069 }, { "epoch": 0.4550043750420677, "grad_norm": 0.5301222033097759, "learning_rate": 9.918903028481824e-06, "loss": 0.5293, "step": 5070 }, { "epoch": 0.4550941194947387, "grad_norm": 0.44233836016322964, "learning_rate": 9.918809339780027e-06, "loss": 0.4093, "step": 5071 }, { "epoch": 0.45518386394740973, "grad_norm": 0.5179741866214894, "learning_rate": 9.918715597434686e-06, "loss": 0.5198, "step": 5072 }, { "epoch": 0.45527360840008074, "grad_norm": 0.5222351420658848, "learning_rate": 9.918621801446826e-06, "loss": 0.4953, "step": 5073 }, { "epoch": 0.4553633528527518, "grad_norm": 0.5369528247474716, "learning_rate": 9.91852795181747e-06, "loss": 0.5218, "step": 5074 }, { "epoch": 0.4554530973054228, "grad_norm": 0.4937156803105818, "learning_rate": 9.91843404854764e-06, "loss": 0.4907, "step": 5075 }, { "epoch": 0.45554284175809384, "grad_norm": 0.45843493311819317, "learning_rate": 9.91834009163836e-06, "loss": 0.465, "step": 5076 }, { "epoch": 0.45563258621076486, "grad_norm": 0.5373548063458828, "learning_rate": 9.918246081090657e-06, "loss": 0.5056, "step": 5077 }, { "epoch": 0.45572233066343587, "grad_norm": 0.49769328832966536, "learning_rate": 9.918152016905554e-06, "loss": 0.5262, "step": 5078 }, { "epoch": 0.4558120751161069, "grad_norm": 0.4954983509745315, "learning_rate": 9.918057899084077e-06, "loss": 0.438, "step": 5079 }, { "epoch": 0.4559018195687779, "grad_norm": 0.5253697593580258, "learning_rate": 9.917963727627254e-06, "loss": 0.5084, "step": 5080 }, { "epoch": 0.4559915640214489, "grad_norm": 0.49754393832058774, "learning_rate": 9.91786950253611e-06, "loss": 0.4984, "step": 5081 }, { "epoch": 0.45608130847411993, "grad_norm": 0.45084015237711833, "learning_rate": 9.917775223811675e-06, "loss": 0.4343, "step": 5082 }, { "epoch": 0.45617105292679094, "grad_norm": 0.5071838624235275, "learning_rate": 9.917680891454974e-06, "loss": 0.5385, "step": 5083 }, { "epoch": 0.45626079737946196, "grad_norm": 0.4928819839503446, "learning_rate": 9.917586505467038e-06, "loss": 0.468, "step": 5084 }, { "epoch": 0.456350541832133, "grad_norm": 0.461411019747897, "learning_rate": 9.917492065848898e-06, "loss": 0.4623, "step": 5085 }, { "epoch": 0.45644028628480404, "grad_norm": 0.47190990104827935, "learning_rate": 9.91739757260158e-06, "loss": 0.4655, "step": 5086 }, { "epoch": 0.45653003073747506, "grad_norm": 0.5218771576066584, "learning_rate": 9.917303025726114e-06, "loss": 0.4857, "step": 5087 }, { "epoch": 0.45661977519014607, "grad_norm": 0.4936376642818101, "learning_rate": 9.917208425223537e-06, "loss": 0.4389, "step": 5088 }, { "epoch": 0.4567095196428171, "grad_norm": 0.5416455031806724, "learning_rate": 9.917113771094876e-06, "loss": 0.5481, "step": 5089 }, { "epoch": 0.4567992640954881, "grad_norm": 0.521535605427669, "learning_rate": 9.917019063341165e-06, "loss": 0.5117, "step": 5090 }, { "epoch": 0.4568890085481591, "grad_norm": 0.5026135205349601, "learning_rate": 9.916924301963435e-06, "loss": 0.5041, "step": 5091 }, { "epoch": 0.45697875300083013, "grad_norm": 0.47876167189042085, "learning_rate": 9.916829486962722e-06, "loss": 0.4759, "step": 5092 }, { "epoch": 0.45706849745350114, "grad_norm": 0.4662116462235035, "learning_rate": 9.916734618340058e-06, "loss": 0.4504, "step": 5093 }, { "epoch": 0.45715824190617216, "grad_norm": 0.4523067296904437, "learning_rate": 9.91663969609648e-06, "loss": 0.4406, "step": 5094 }, { "epoch": 0.4572479863588432, "grad_norm": 0.531660564362228, "learning_rate": 9.91654472023302e-06, "loss": 0.4857, "step": 5095 }, { "epoch": 0.4573377308115142, "grad_norm": 0.4866651039451645, "learning_rate": 9.916449690750716e-06, "loss": 0.418, "step": 5096 }, { "epoch": 0.45742747526418526, "grad_norm": 0.5034796356491948, "learning_rate": 9.916354607650604e-06, "loss": 0.5003, "step": 5097 }, { "epoch": 0.45751721971685627, "grad_norm": 0.47132802551819064, "learning_rate": 9.91625947093372e-06, "loss": 0.4676, "step": 5098 }, { "epoch": 0.4576069641695273, "grad_norm": 0.46777284440497313, "learning_rate": 9.916164280601102e-06, "loss": 0.4517, "step": 5099 }, { "epoch": 0.4576967086221983, "grad_norm": 0.4831843338268825, "learning_rate": 9.916069036653789e-06, "loss": 0.4453, "step": 5100 }, { "epoch": 0.4577864530748693, "grad_norm": 0.487820024295016, "learning_rate": 9.915973739092819e-06, "loss": 0.4723, "step": 5101 }, { "epoch": 0.45787619752754033, "grad_norm": 0.4977636689383352, "learning_rate": 9.915878387919232e-06, "loss": 0.4993, "step": 5102 }, { "epoch": 0.45796594198021134, "grad_norm": 0.4710878082272777, "learning_rate": 9.915782983134067e-06, "loss": 0.4497, "step": 5103 }, { "epoch": 0.45805568643288236, "grad_norm": 0.4375101617158665, "learning_rate": 9.915687524738364e-06, "loss": 0.3754, "step": 5104 }, { "epoch": 0.4581454308855534, "grad_norm": 0.4685276616487323, "learning_rate": 9.915592012733167e-06, "loss": 0.4738, "step": 5105 }, { "epoch": 0.4582351753382244, "grad_norm": 0.4409143257431613, "learning_rate": 9.915496447119512e-06, "loss": 0.3703, "step": 5106 }, { "epoch": 0.4583249197908954, "grad_norm": 0.4599365679152367, "learning_rate": 9.915400827898448e-06, "loss": 0.4561, "step": 5107 }, { "epoch": 0.45841466424356647, "grad_norm": 0.449126554921262, "learning_rate": 9.915305155071013e-06, "loss": 0.4188, "step": 5108 }, { "epoch": 0.4585044086962375, "grad_norm": 0.5033636735562269, "learning_rate": 9.91520942863825e-06, "loss": 0.572, "step": 5109 }, { "epoch": 0.4585941531489085, "grad_norm": 0.5450637364928336, "learning_rate": 9.915113648601208e-06, "loss": 0.5073, "step": 5110 }, { "epoch": 0.4586838976015795, "grad_norm": 0.5167926671644277, "learning_rate": 9.915017814960926e-06, "loss": 0.5594, "step": 5111 }, { "epoch": 0.45877364205425053, "grad_norm": 0.4740521689222281, "learning_rate": 9.914921927718453e-06, "loss": 0.455, "step": 5112 }, { "epoch": 0.45886338650692154, "grad_norm": 0.45674781930458674, "learning_rate": 9.914825986874833e-06, "loss": 0.4347, "step": 5113 }, { "epoch": 0.45895313095959256, "grad_norm": 0.47262600486115675, "learning_rate": 9.914729992431112e-06, "loss": 0.4135, "step": 5114 }, { "epoch": 0.4590428754122636, "grad_norm": 0.5113041275792823, "learning_rate": 9.91463394438834e-06, "loss": 0.5571, "step": 5115 }, { "epoch": 0.4591326198649346, "grad_norm": 0.5119294397461108, "learning_rate": 9.914537842747558e-06, "loss": 0.4521, "step": 5116 }, { "epoch": 0.4592223643176056, "grad_norm": 0.5008562522528648, "learning_rate": 9.91444168750982e-06, "loss": 0.48, "step": 5117 }, { "epoch": 0.4593121087702766, "grad_norm": 0.4829836145344728, "learning_rate": 9.914345478676173e-06, "loss": 0.4489, "step": 5118 }, { "epoch": 0.45940185322294763, "grad_norm": 0.475139233605938, "learning_rate": 9.914249216247664e-06, "loss": 0.4496, "step": 5119 }, { "epoch": 0.4594915976756187, "grad_norm": 0.44669714795292076, "learning_rate": 9.914152900225347e-06, "loss": 0.4147, "step": 5120 }, { "epoch": 0.4595813421282897, "grad_norm": 0.5063633902575988, "learning_rate": 9.914056530610268e-06, "loss": 0.5423, "step": 5121 }, { "epoch": 0.45967108658096073, "grad_norm": 0.5315042443076926, "learning_rate": 9.913960107403482e-06, "loss": 0.5673, "step": 5122 }, { "epoch": 0.45976083103363174, "grad_norm": 0.47287031388545636, "learning_rate": 9.913863630606038e-06, "loss": 0.4386, "step": 5123 }, { "epoch": 0.45985057548630276, "grad_norm": 0.5491238402643237, "learning_rate": 9.913767100218988e-06, "loss": 0.5278, "step": 5124 }, { "epoch": 0.4599403199389738, "grad_norm": 0.5039477381388215, "learning_rate": 9.913670516243385e-06, "loss": 0.4915, "step": 5125 }, { "epoch": 0.4600300643916448, "grad_norm": 0.45546804656473583, "learning_rate": 9.913573878680284e-06, "loss": 0.4247, "step": 5126 }, { "epoch": 0.4601198088443158, "grad_norm": 0.5231121414684359, "learning_rate": 9.91347718753074e-06, "loss": 0.4877, "step": 5127 }, { "epoch": 0.4602095532969868, "grad_norm": 0.48575920443500076, "learning_rate": 9.913380442795802e-06, "loss": 0.4768, "step": 5128 }, { "epoch": 0.46029929774965783, "grad_norm": 0.4846425390229268, "learning_rate": 9.91328364447653e-06, "loss": 0.5156, "step": 5129 }, { "epoch": 0.46038904220232885, "grad_norm": 0.4677449447875803, "learning_rate": 9.913186792573977e-06, "loss": 0.4658, "step": 5130 }, { "epoch": 0.46047878665499986, "grad_norm": 0.4393635776035819, "learning_rate": 9.913089887089203e-06, "loss": 0.3878, "step": 5131 }, { "epoch": 0.46056853110767093, "grad_norm": 0.4960190905613943, "learning_rate": 9.912992928023259e-06, "loss": 0.5317, "step": 5132 }, { "epoch": 0.46065827556034195, "grad_norm": 0.4715319010400041, "learning_rate": 9.912895915377206e-06, "loss": 0.4441, "step": 5133 }, { "epoch": 0.46074802001301296, "grad_norm": 0.5071338447113631, "learning_rate": 9.912798849152102e-06, "loss": 0.5098, "step": 5134 }, { "epoch": 0.460837764465684, "grad_norm": 0.45059156657532873, "learning_rate": 9.912701729349005e-06, "loss": 0.4304, "step": 5135 }, { "epoch": 0.460927508918355, "grad_norm": 0.4701774291276089, "learning_rate": 9.912604555968974e-06, "loss": 0.4699, "step": 5136 }, { "epoch": 0.461017253371026, "grad_norm": 0.5122987819959695, "learning_rate": 9.91250732901307e-06, "loss": 0.5366, "step": 5137 }, { "epoch": 0.461106997823697, "grad_norm": 0.4713314408179381, "learning_rate": 9.912410048482352e-06, "loss": 0.4364, "step": 5138 }, { "epoch": 0.46119674227636803, "grad_norm": 0.5059228826775612, "learning_rate": 9.91231271437788e-06, "loss": 0.5425, "step": 5139 }, { "epoch": 0.46128648672903905, "grad_norm": 0.48092189299193544, "learning_rate": 9.91221532670072e-06, "loss": 0.4662, "step": 5140 }, { "epoch": 0.46137623118171006, "grad_norm": 0.4431262151728311, "learning_rate": 9.912117885451928e-06, "loss": 0.4128, "step": 5141 }, { "epoch": 0.4614659756343811, "grad_norm": 0.5145086484611583, "learning_rate": 9.91202039063257e-06, "loss": 0.4938, "step": 5142 }, { "epoch": 0.46155572008705215, "grad_norm": 0.46260180169816295, "learning_rate": 9.91192284224371e-06, "loss": 0.4388, "step": 5143 }, { "epoch": 0.46164546453972316, "grad_norm": 0.5164153216828722, "learning_rate": 9.91182524028641e-06, "loss": 0.5482, "step": 5144 }, { "epoch": 0.4617352089923942, "grad_norm": 0.4971966750759152, "learning_rate": 9.911727584761734e-06, "loss": 0.4779, "step": 5145 }, { "epoch": 0.4618249534450652, "grad_norm": 0.4817640334507172, "learning_rate": 9.91162987567075e-06, "loss": 0.4621, "step": 5146 }, { "epoch": 0.4619146978977362, "grad_norm": 0.47528105716684815, "learning_rate": 9.911532113014521e-06, "loss": 0.459, "step": 5147 }, { "epoch": 0.4620044423504072, "grad_norm": 0.5380780467628895, "learning_rate": 9.911434296794113e-06, "loss": 0.5326, "step": 5148 }, { "epoch": 0.46209418680307823, "grad_norm": 0.44532463845483217, "learning_rate": 9.911336427010596e-06, "loss": 0.4304, "step": 5149 }, { "epoch": 0.46218393125574925, "grad_norm": 0.4660836490302525, "learning_rate": 9.911238503665033e-06, "loss": 0.4006, "step": 5150 }, { "epoch": 0.46227367570842026, "grad_norm": 0.5088809300374888, "learning_rate": 9.911140526758494e-06, "loss": 0.4848, "step": 5151 }, { "epoch": 0.4623634201610913, "grad_norm": 0.5224614178403015, "learning_rate": 9.911042496292049e-06, "loss": 0.5122, "step": 5152 }, { "epoch": 0.4624531646137623, "grad_norm": 0.4962810626961456, "learning_rate": 9.910944412266764e-06, "loss": 0.4699, "step": 5153 }, { "epoch": 0.4625429090664333, "grad_norm": 0.4733571338530908, "learning_rate": 9.91084627468371e-06, "loss": 0.4694, "step": 5154 }, { "epoch": 0.4626326535191044, "grad_norm": 0.4939440677577548, "learning_rate": 9.910748083543957e-06, "loss": 0.4543, "step": 5155 }, { "epoch": 0.4627223979717754, "grad_norm": 0.4692318110018403, "learning_rate": 9.910649838848578e-06, "loss": 0.4603, "step": 5156 }, { "epoch": 0.4628121424244464, "grad_norm": 0.5244895132485209, "learning_rate": 9.910551540598644e-06, "loss": 0.4794, "step": 5157 }, { "epoch": 0.4629018868771174, "grad_norm": 0.4984696804374486, "learning_rate": 9.910453188795222e-06, "loss": 0.5009, "step": 5158 }, { "epoch": 0.46299163132978843, "grad_norm": 0.5074227473616988, "learning_rate": 9.91035478343939e-06, "loss": 0.4878, "step": 5159 }, { "epoch": 0.46308137578245945, "grad_norm": 0.5025820646394334, "learning_rate": 9.91025632453222e-06, "loss": 0.459, "step": 5160 }, { "epoch": 0.46317112023513046, "grad_norm": 0.5221737068729229, "learning_rate": 9.910157812074786e-06, "loss": 0.4944, "step": 5161 }, { "epoch": 0.4632608646878015, "grad_norm": 0.5211142068854581, "learning_rate": 9.910059246068162e-06, "loss": 0.4734, "step": 5162 }, { "epoch": 0.4633506091404725, "grad_norm": 0.42456781282779243, "learning_rate": 9.909960626513422e-06, "loss": 0.3651, "step": 5163 }, { "epoch": 0.4634403535931435, "grad_norm": 0.5065334757897686, "learning_rate": 9.909861953411642e-06, "loss": 0.4248, "step": 5164 }, { "epoch": 0.4635300980458145, "grad_norm": 0.5542394296863534, "learning_rate": 9.909763226763898e-06, "loss": 0.4987, "step": 5165 }, { "epoch": 0.46361984249848553, "grad_norm": 0.5203536025200118, "learning_rate": 9.909664446571266e-06, "loss": 0.4918, "step": 5166 }, { "epoch": 0.4637095869511566, "grad_norm": 0.5120514065226306, "learning_rate": 9.909565612834826e-06, "loss": 0.529, "step": 5167 }, { "epoch": 0.4637993314038276, "grad_norm": 0.4947542900047796, "learning_rate": 9.909466725555654e-06, "loss": 0.506, "step": 5168 }, { "epoch": 0.46388907585649863, "grad_norm": 0.47596162821565824, "learning_rate": 9.90936778473483e-06, "loss": 0.4391, "step": 5169 }, { "epoch": 0.46397882030916965, "grad_norm": 0.47100548494695366, "learning_rate": 9.909268790373429e-06, "loss": 0.4529, "step": 5170 }, { "epoch": 0.46406856476184066, "grad_norm": 0.49741795128870175, "learning_rate": 9.909169742472532e-06, "loss": 0.5202, "step": 5171 }, { "epoch": 0.4641583092145117, "grad_norm": 0.5162874571009799, "learning_rate": 9.909070641033224e-06, "loss": 0.4723, "step": 5172 }, { "epoch": 0.4642480536671827, "grad_norm": 0.5294129226786843, "learning_rate": 9.90897148605658e-06, "loss": 0.5303, "step": 5173 }, { "epoch": 0.4643377981198537, "grad_norm": 0.4632189133246771, "learning_rate": 9.908872277543685e-06, "loss": 0.4145, "step": 5174 }, { "epoch": 0.4644275425725247, "grad_norm": 0.4951690026629554, "learning_rate": 9.90877301549562e-06, "loss": 0.4191, "step": 5175 }, { "epoch": 0.46451728702519574, "grad_norm": 0.5064363324012605, "learning_rate": 9.908673699913465e-06, "loss": 0.4951, "step": 5176 }, { "epoch": 0.46460703147786675, "grad_norm": 0.5148550981456989, "learning_rate": 9.908574330798307e-06, "loss": 0.5208, "step": 5177 }, { "epoch": 0.4646967759305378, "grad_norm": 0.4404694492525657, "learning_rate": 9.908474908151227e-06, "loss": 0.3958, "step": 5178 }, { "epoch": 0.46478652038320883, "grad_norm": 0.5123400591456849, "learning_rate": 9.90837543197331e-06, "loss": 0.5126, "step": 5179 }, { "epoch": 0.46487626483587985, "grad_norm": 0.49457110481950106, "learning_rate": 9.908275902265641e-06, "loss": 0.486, "step": 5180 }, { "epoch": 0.46496600928855086, "grad_norm": 0.5156422040151301, "learning_rate": 9.908176319029305e-06, "loss": 0.5318, "step": 5181 }, { "epoch": 0.4650557537412219, "grad_norm": 0.523204545497769, "learning_rate": 9.908076682265388e-06, "loss": 0.5137, "step": 5182 }, { "epoch": 0.4651454981938929, "grad_norm": 0.46615135341139485, "learning_rate": 9.907976991974979e-06, "loss": 0.4363, "step": 5183 }, { "epoch": 0.4652352426465639, "grad_norm": 0.5322514591049338, "learning_rate": 9.907877248159162e-06, "loss": 0.5526, "step": 5184 }, { "epoch": 0.4653249870992349, "grad_norm": 0.5447398199276132, "learning_rate": 9.907777450819028e-06, "loss": 0.5323, "step": 5185 }, { "epoch": 0.46541473155190594, "grad_norm": 0.5436424342049747, "learning_rate": 9.907677599955661e-06, "loss": 0.5125, "step": 5186 }, { "epoch": 0.46550447600457695, "grad_norm": 0.5524472648418891, "learning_rate": 9.907577695570153e-06, "loss": 0.5746, "step": 5187 }, { "epoch": 0.46559422045724796, "grad_norm": 0.49917618012483644, "learning_rate": 9.907477737663594e-06, "loss": 0.4912, "step": 5188 }, { "epoch": 0.465683964909919, "grad_norm": 0.4545762394999497, "learning_rate": 9.907377726237072e-06, "loss": 0.4152, "step": 5189 }, { "epoch": 0.46577370936259005, "grad_norm": 0.5000688318763029, "learning_rate": 9.90727766129168e-06, "loss": 0.4878, "step": 5190 }, { "epoch": 0.46586345381526106, "grad_norm": 0.49867624686771617, "learning_rate": 9.907177542828507e-06, "loss": 0.4551, "step": 5191 }, { "epoch": 0.4659531982679321, "grad_norm": 0.46227707934277756, "learning_rate": 9.907077370848645e-06, "loss": 0.4307, "step": 5192 }, { "epoch": 0.4660429427206031, "grad_norm": 0.4684848623666201, "learning_rate": 9.906977145353189e-06, "loss": 0.4703, "step": 5193 }, { "epoch": 0.4661326871732741, "grad_norm": 0.5001997924828415, "learning_rate": 9.906876866343231e-06, "loss": 0.4501, "step": 5194 }, { "epoch": 0.4662224316259451, "grad_norm": 0.4568021822310685, "learning_rate": 9.906776533819863e-06, "loss": 0.4554, "step": 5195 }, { "epoch": 0.46631217607861614, "grad_norm": 0.5092817863640756, "learning_rate": 9.90667614778418e-06, "loss": 0.5003, "step": 5196 }, { "epoch": 0.46640192053128715, "grad_norm": 0.44327250240778904, "learning_rate": 9.906575708237279e-06, "loss": 0.3934, "step": 5197 }, { "epoch": 0.46649166498395817, "grad_norm": 0.46872241938417064, "learning_rate": 9.90647521518025e-06, "loss": 0.4514, "step": 5198 }, { "epoch": 0.4665814094366292, "grad_norm": 0.47721785111151366, "learning_rate": 9.906374668614196e-06, "loss": 0.4716, "step": 5199 }, { "epoch": 0.4666711538893002, "grad_norm": 0.4547080748004917, "learning_rate": 9.906274068540208e-06, "loss": 0.4315, "step": 5200 }, { "epoch": 0.46676089834197126, "grad_norm": 0.4841106202307312, "learning_rate": 9.906173414959386e-06, "loss": 0.4395, "step": 5201 }, { "epoch": 0.4668506427946423, "grad_norm": 0.5384306208323045, "learning_rate": 9.906072707872825e-06, "loss": 0.5587, "step": 5202 }, { "epoch": 0.4669403872473133, "grad_norm": 0.49547649280803796, "learning_rate": 9.905971947281628e-06, "loss": 0.4585, "step": 5203 }, { "epoch": 0.4670301316999843, "grad_norm": 0.5296282293461381, "learning_rate": 9.905871133186888e-06, "loss": 0.5636, "step": 5204 }, { "epoch": 0.4671198761526553, "grad_norm": 0.4693059204426143, "learning_rate": 9.905770265589709e-06, "loss": 0.498, "step": 5205 }, { "epoch": 0.46720962060532634, "grad_norm": 0.4927912774187229, "learning_rate": 9.905669344491189e-06, "loss": 0.4691, "step": 5206 }, { "epoch": 0.46729936505799735, "grad_norm": 0.47338301563407803, "learning_rate": 9.905568369892428e-06, "loss": 0.4393, "step": 5207 }, { "epoch": 0.46738910951066837, "grad_norm": 0.4647734237137967, "learning_rate": 9.90546734179453e-06, "loss": 0.4486, "step": 5208 }, { "epoch": 0.4674788539633394, "grad_norm": 0.5546655988207276, "learning_rate": 9.905366260198595e-06, "loss": 0.5339, "step": 5209 }, { "epoch": 0.4675685984160104, "grad_norm": 0.4657725268784775, "learning_rate": 9.905265125105724e-06, "loss": 0.4075, "step": 5210 }, { "epoch": 0.4676583428686814, "grad_norm": 0.4474990841636557, "learning_rate": 9.905163936517021e-06, "loss": 0.4369, "step": 5211 }, { "epoch": 0.4677480873213524, "grad_norm": 0.5254981705292101, "learning_rate": 9.905062694433591e-06, "loss": 0.509, "step": 5212 }, { "epoch": 0.4678378317740235, "grad_norm": 0.4474626370271107, "learning_rate": 9.904961398856538e-06, "loss": 0.4296, "step": 5213 }, { "epoch": 0.4679275762266945, "grad_norm": 0.5373081343628119, "learning_rate": 9.904860049786965e-06, "loss": 0.5448, "step": 5214 }, { "epoch": 0.4680173206793655, "grad_norm": 0.4799791228277603, "learning_rate": 9.904758647225978e-06, "loss": 0.5039, "step": 5215 }, { "epoch": 0.46810706513203654, "grad_norm": 0.49671085321220987, "learning_rate": 9.904657191174683e-06, "loss": 0.5222, "step": 5216 }, { "epoch": 0.46819680958470755, "grad_norm": 0.4802201596171003, "learning_rate": 9.904555681634186e-06, "loss": 0.494, "step": 5217 }, { "epoch": 0.46828655403737857, "grad_norm": 0.5326261268840699, "learning_rate": 9.904454118605596e-06, "loss": 0.4755, "step": 5218 }, { "epoch": 0.4683762984900496, "grad_norm": 0.47648097931024663, "learning_rate": 9.904352502090016e-06, "loss": 0.4818, "step": 5219 }, { "epoch": 0.4684660429427206, "grad_norm": 0.4829788277120224, "learning_rate": 9.90425083208856e-06, "loss": 0.4627, "step": 5220 }, { "epoch": 0.4685557873953916, "grad_norm": 0.4479097782214063, "learning_rate": 9.904149108602332e-06, "loss": 0.4041, "step": 5221 }, { "epoch": 0.4686455318480626, "grad_norm": 0.5086504871153473, "learning_rate": 9.904047331632444e-06, "loss": 0.5151, "step": 5222 }, { "epoch": 0.46873527630073364, "grad_norm": 0.504221178533772, "learning_rate": 9.903945501180008e-06, "loss": 0.4805, "step": 5223 }, { "epoch": 0.46882502075340465, "grad_norm": 0.527906901142495, "learning_rate": 9.90384361724613e-06, "loss": 0.5238, "step": 5224 }, { "epoch": 0.4689147652060757, "grad_norm": 0.49577279498573473, "learning_rate": 9.903741679831922e-06, "loss": 0.4748, "step": 5225 }, { "epoch": 0.46900450965874674, "grad_norm": 0.4459296288279644, "learning_rate": 9.903639688938498e-06, "loss": 0.435, "step": 5226 }, { "epoch": 0.46909425411141775, "grad_norm": 0.4953380093785028, "learning_rate": 9.903537644566969e-06, "loss": 0.532, "step": 5227 }, { "epoch": 0.46918399856408877, "grad_norm": 0.4621358010744427, "learning_rate": 9.903435546718447e-06, "loss": 0.3919, "step": 5228 }, { "epoch": 0.4692737430167598, "grad_norm": 0.4617810819095961, "learning_rate": 9.903333395394049e-06, "loss": 0.449, "step": 5229 }, { "epoch": 0.4693634874694308, "grad_norm": 0.4649816143690274, "learning_rate": 9.903231190594883e-06, "loss": 0.3989, "step": 5230 }, { "epoch": 0.4694532319221018, "grad_norm": 0.5097450758469337, "learning_rate": 9.903128932322069e-06, "loss": 0.5005, "step": 5231 }, { "epoch": 0.4695429763747728, "grad_norm": 0.5219342001182087, "learning_rate": 9.90302662057672e-06, "loss": 0.4966, "step": 5232 }, { "epoch": 0.46963272082744384, "grad_norm": 0.5011749297377837, "learning_rate": 9.902924255359951e-06, "loss": 0.4955, "step": 5233 }, { "epoch": 0.46972246528011485, "grad_norm": 0.5199737344227995, "learning_rate": 9.90282183667288e-06, "loss": 0.5417, "step": 5234 }, { "epoch": 0.46981220973278587, "grad_norm": 0.5051134805665228, "learning_rate": 9.902719364516625e-06, "loss": 0.4812, "step": 5235 }, { "epoch": 0.46990195418545694, "grad_norm": 0.45698451280078367, "learning_rate": 9.902616838892301e-06, "loss": 0.3944, "step": 5236 }, { "epoch": 0.46999169863812795, "grad_norm": 0.43980325767529965, "learning_rate": 9.902514259801028e-06, "loss": 0.4094, "step": 5237 }, { "epoch": 0.47008144309079897, "grad_norm": 0.4718433623377543, "learning_rate": 9.902411627243922e-06, "loss": 0.4215, "step": 5238 }, { "epoch": 0.47017118754347, "grad_norm": 0.5202769342976663, "learning_rate": 9.902308941222103e-06, "loss": 0.5801, "step": 5239 }, { "epoch": 0.470260931996141, "grad_norm": 0.5078729491938132, "learning_rate": 9.902206201736695e-06, "loss": 0.4953, "step": 5240 }, { "epoch": 0.470350676448812, "grad_norm": 0.47010844546897684, "learning_rate": 9.902103408788815e-06, "loss": 0.4237, "step": 5241 }, { "epoch": 0.470440420901483, "grad_norm": 0.537559512632296, "learning_rate": 9.902000562379583e-06, "loss": 0.5512, "step": 5242 }, { "epoch": 0.47053016535415404, "grad_norm": 0.49455523952411645, "learning_rate": 9.901897662510122e-06, "loss": 0.4766, "step": 5243 }, { "epoch": 0.47061990980682505, "grad_norm": 0.4640214641962264, "learning_rate": 9.901794709181555e-06, "loss": 0.4748, "step": 5244 }, { "epoch": 0.47070965425949607, "grad_norm": 0.49505404267552333, "learning_rate": 9.901691702395003e-06, "loss": 0.498, "step": 5245 }, { "epoch": 0.4707993987121671, "grad_norm": 0.5136938782430939, "learning_rate": 9.901588642151592e-06, "loss": 0.4749, "step": 5246 }, { "epoch": 0.4708891431648381, "grad_norm": 0.4932796821127501, "learning_rate": 9.901485528452443e-06, "loss": 0.4951, "step": 5247 }, { "epoch": 0.47097888761750917, "grad_norm": 0.4736144741169739, "learning_rate": 9.901382361298682e-06, "loss": 0.4699, "step": 5248 }, { "epoch": 0.4710686320701802, "grad_norm": 0.5555152368955316, "learning_rate": 9.901279140691436e-06, "loss": 0.5604, "step": 5249 }, { "epoch": 0.4711583765228512, "grad_norm": 0.5969292764046663, "learning_rate": 9.901175866631827e-06, "loss": 0.5758, "step": 5250 }, { "epoch": 0.4712481209755222, "grad_norm": 0.5047897817628253, "learning_rate": 9.901072539120983e-06, "loss": 0.4967, "step": 5251 }, { "epoch": 0.4713378654281932, "grad_norm": 0.47449460080790157, "learning_rate": 9.90096915816003e-06, "loss": 0.4482, "step": 5252 }, { "epoch": 0.47142760988086424, "grad_norm": 0.48190146302925724, "learning_rate": 9.9008657237501e-06, "loss": 0.4629, "step": 5253 }, { "epoch": 0.47151735433353525, "grad_norm": 0.5048912778239847, "learning_rate": 9.900762235892314e-06, "loss": 0.5349, "step": 5254 }, { "epoch": 0.47160709878620627, "grad_norm": 0.459715662928592, "learning_rate": 9.900658694587803e-06, "loss": 0.4176, "step": 5255 }, { "epoch": 0.4716968432388773, "grad_norm": 0.48601731030212203, "learning_rate": 9.900555099837702e-06, "loss": 0.4603, "step": 5256 }, { "epoch": 0.4717865876915483, "grad_norm": 0.48113661275554787, "learning_rate": 9.900451451643131e-06, "loss": 0.451, "step": 5257 }, { "epoch": 0.4718763321442193, "grad_norm": 0.43758680871255146, "learning_rate": 9.900347750005228e-06, "loss": 0.3867, "step": 5258 }, { "epoch": 0.4719660765968903, "grad_norm": 0.4789568109805834, "learning_rate": 9.90024399492512e-06, "loss": 0.4575, "step": 5259 }, { "epoch": 0.4720558210495614, "grad_norm": 0.48784079861794616, "learning_rate": 9.900140186403942e-06, "loss": 0.4383, "step": 5260 }, { "epoch": 0.4721455655022324, "grad_norm": 0.4724322366905374, "learning_rate": 9.900036324442821e-06, "loss": 0.4565, "step": 5261 }, { "epoch": 0.4722353099549034, "grad_norm": 0.48449678622436076, "learning_rate": 9.899932409042894e-06, "loss": 0.4599, "step": 5262 }, { "epoch": 0.47232505440757444, "grad_norm": 0.45948906628073516, "learning_rate": 9.899828440205294e-06, "loss": 0.4266, "step": 5263 }, { "epoch": 0.47241479886024546, "grad_norm": 0.5592097479369208, "learning_rate": 9.899724417931152e-06, "loss": 0.5582, "step": 5264 }, { "epoch": 0.47250454331291647, "grad_norm": 0.44977767808410957, "learning_rate": 9.899620342221605e-06, "loss": 0.4081, "step": 5265 }, { "epoch": 0.4725942877655875, "grad_norm": 0.5626967088230096, "learning_rate": 9.899516213077788e-06, "loss": 0.536, "step": 5266 }, { "epoch": 0.4726840322182585, "grad_norm": 0.5019610364911051, "learning_rate": 9.899412030500835e-06, "loss": 0.4734, "step": 5267 }, { "epoch": 0.4727737766709295, "grad_norm": 0.4675815981349075, "learning_rate": 9.899307794491884e-06, "loss": 0.4644, "step": 5268 }, { "epoch": 0.4728635211236005, "grad_norm": 0.44854032536599703, "learning_rate": 9.899203505052068e-06, "loss": 0.4021, "step": 5269 }, { "epoch": 0.47295326557627154, "grad_norm": 0.49311158787225623, "learning_rate": 9.89909916218253e-06, "loss": 0.435, "step": 5270 }, { "epoch": 0.4730430100289426, "grad_norm": 0.48613921936542714, "learning_rate": 9.898994765884404e-06, "loss": 0.4208, "step": 5271 }, { "epoch": 0.4731327544816136, "grad_norm": 0.5392417778326961, "learning_rate": 9.89889031615883e-06, "loss": 0.5806, "step": 5272 }, { "epoch": 0.47322249893428464, "grad_norm": 0.49868913785546576, "learning_rate": 9.898785813006947e-06, "loss": 0.5133, "step": 5273 }, { "epoch": 0.47331224338695566, "grad_norm": 0.46173299138385976, "learning_rate": 9.898681256429894e-06, "loss": 0.4532, "step": 5274 }, { "epoch": 0.47340198783962667, "grad_norm": 0.45485233552372667, "learning_rate": 9.898576646428813e-06, "loss": 0.4589, "step": 5275 }, { "epoch": 0.4734917322922977, "grad_norm": 0.5330805258353589, "learning_rate": 9.898471983004843e-06, "loss": 0.5539, "step": 5276 }, { "epoch": 0.4735814767449687, "grad_norm": 0.5389179134070617, "learning_rate": 9.898367266159124e-06, "loss": 0.505, "step": 5277 }, { "epoch": 0.4736712211976397, "grad_norm": 0.4634174235548773, "learning_rate": 9.898262495892802e-06, "loss": 0.4706, "step": 5278 }, { "epoch": 0.47376096565031073, "grad_norm": 0.5165294703207309, "learning_rate": 9.898157672207018e-06, "loss": 0.4758, "step": 5279 }, { "epoch": 0.47385071010298174, "grad_norm": 0.500083303760861, "learning_rate": 9.898052795102914e-06, "loss": 0.5474, "step": 5280 }, { "epoch": 0.47394045455565276, "grad_norm": 0.5199918321980058, "learning_rate": 9.897947864581634e-06, "loss": 0.467, "step": 5281 }, { "epoch": 0.47403019900832377, "grad_norm": 0.5344157661622065, "learning_rate": 9.897842880644324e-06, "loss": 0.5495, "step": 5282 }, { "epoch": 0.47411994346099484, "grad_norm": 0.5023711825731009, "learning_rate": 9.897737843292128e-06, "loss": 0.4959, "step": 5283 }, { "epoch": 0.47420968791366586, "grad_norm": 0.45499911968171586, "learning_rate": 9.89763275252619e-06, "loss": 0.4534, "step": 5284 }, { "epoch": 0.47429943236633687, "grad_norm": 0.5218283134697445, "learning_rate": 9.897527608347659e-06, "loss": 0.5294, "step": 5285 }, { "epoch": 0.4743891768190079, "grad_norm": 0.45005156434677007, "learning_rate": 9.89742241075768e-06, "loss": 0.4271, "step": 5286 }, { "epoch": 0.4744789212716789, "grad_norm": 0.506672611179963, "learning_rate": 9.897317159757401e-06, "loss": 0.497, "step": 5287 }, { "epoch": 0.4745686657243499, "grad_norm": 0.5079444494902982, "learning_rate": 9.897211855347967e-06, "loss": 0.5191, "step": 5288 }, { "epoch": 0.47465841017702093, "grad_norm": 0.4425739967448214, "learning_rate": 9.897106497530531e-06, "loss": 0.3988, "step": 5289 }, { "epoch": 0.47474815462969194, "grad_norm": 0.4998035133397539, "learning_rate": 9.897001086306238e-06, "loss": 0.448, "step": 5290 }, { "epoch": 0.47483789908236296, "grad_norm": 0.505555591648256, "learning_rate": 9.89689562167624e-06, "loss": 0.5051, "step": 5291 }, { "epoch": 0.47492764353503397, "grad_norm": 0.48001525746716517, "learning_rate": 9.896790103641687e-06, "loss": 0.4376, "step": 5292 }, { "epoch": 0.475017387987705, "grad_norm": 0.48088651142275085, "learning_rate": 9.896684532203729e-06, "loss": 0.4319, "step": 5293 }, { "epoch": 0.47510713244037606, "grad_norm": 0.47781188768116895, "learning_rate": 9.896578907363517e-06, "loss": 0.4649, "step": 5294 }, { "epoch": 0.47519687689304707, "grad_norm": 0.4899890677130721, "learning_rate": 9.896473229122206e-06, "loss": 0.4533, "step": 5295 }, { "epoch": 0.4752866213457181, "grad_norm": 0.509853098888649, "learning_rate": 9.896367497480943e-06, "loss": 0.5092, "step": 5296 }, { "epoch": 0.4753763657983891, "grad_norm": 0.4584258577907832, "learning_rate": 9.896261712440884e-06, "loss": 0.4378, "step": 5297 }, { "epoch": 0.4754661102510601, "grad_norm": 0.495165506133679, "learning_rate": 9.896155874003187e-06, "loss": 0.5426, "step": 5298 }, { "epoch": 0.47555585470373113, "grad_norm": 0.45105933021331585, "learning_rate": 9.896049982168998e-06, "loss": 0.4293, "step": 5299 }, { "epoch": 0.47564559915640214, "grad_norm": 0.5016060013640111, "learning_rate": 9.895944036939476e-06, "loss": 0.4883, "step": 5300 }, { "epoch": 0.47573534360907316, "grad_norm": 0.475841825204323, "learning_rate": 9.895838038315778e-06, "loss": 0.447, "step": 5301 }, { "epoch": 0.4758250880617442, "grad_norm": 0.48811480529986623, "learning_rate": 9.895731986299057e-06, "loss": 0.4603, "step": 5302 }, { "epoch": 0.4759148325144152, "grad_norm": 0.4703729627108031, "learning_rate": 9.895625880890471e-06, "loss": 0.4466, "step": 5303 }, { "epoch": 0.4760045769670862, "grad_norm": 0.49881126346839094, "learning_rate": 9.895519722091179e-06, "loss": 0.4787, "step": 5304 }, { "epoch": 0.4760943214197572, "grad_norm": 0.45839348240271377, "learning_rate": 9.895413509902334e-06, "loss": 0.4549, "step": 5305 }, { "epoch": 0.4761840658724283, "grad_norm": 0.4970315797669622, "learning_rate": 9.8953072443251e-06, "loss": 0.4837, "step": 5306 }, { "epoch": 0.4762738103250993, "grad_norm": 0.5162263478388711, "learning_rate": 9.895200925360631e-06, "loss": 0.5053, "step": 5307 }, { "epoch": 0.4763635547777703, "grad_norm": 0.4834064878831901, "learning_rate": 9.895094553010088e-06, "loss": 0.4697, "step": 5308 }, { "epoch": 0.47645329923044133, "grad_norm": 0.537322624966983, "learning_rate": 9.894988127274633e-06, "loss": 0.4972, "step": 5309 }, { "epoch": 0.47654304368311234, "grad_norm": 0.5204278933860773, "learning_rate": 9.894881648155424e-06, "loss": 0.4476, "step": 5310 }, { "epoch": 0.47663278813578336, "grad_norm": 0.4737536428040901, "learning_rate": 9.894775115653625e-06, "loss": 0.4873, "step": 5311 }, { "epoch": 0.4767225325884544, "grad_norm": 0.47021967372384005, "learning_rate": 9.894668529770395e-06, "loss": 0.4273, "step": 5312 }, { "epoch": 0.4768122770411254, "grad_norm": 0.5436102472553086, "learning_rate": 9.894561890506898e-06, "loss": 0.5462, "step": 5313 }, { "epoch": 0.4769020214937964, "grad_norm": 0.5157312828302215, "learning_rate": 9.894455197864298e-06, "loss": 0.4544, "step": 5314 }, { "epoch": 0.4769917659464674, "grad_norm": 0.5260705335303146, "learning_rate": 9.894348451843757e-06, "loss": 0.5255, "step": 5315 }, { "epoch": 0.47708151039913843, "grad_norm": 0.5153149844266801, "learning_rate": 9.894241652446438e-06, "loss": 0.5118, "step": 5316 }, { "epoch": 0.47717125485180945, "grad_norm": 0.45387703081818953, "learning_rate": 9.894134799673508e-06, "loss": 0.4509, "step": 5317 }, { "epoch": 0.4772609993044805, "grad_norm": 0.4965076044943891, "learning_rate": 9.894027893526131e-06, "loss": 0.4877, "step": 5318 }, { "epoch": 0.47735074375715153, "grad_norm": 0.47972203478350933, "learning_rate": 9.893920934005474e-06, "loss": 0.4819, "step": 5319 }, { "epoch": 0.47744048820982254, "grad_norm": 0.5100561660657522, "learning_rate": 9.893813921112703e-06, "loss": 0.5127, "step": 5320 }, { "epoch": 0.47753023266249356, "grad_norm": 0.5539554955491361, "learning_rate": 9.893706854848984e-06, "loss": 0.5743, "step": 5321 }, { "epoch": 0.4776199771151646, "grad_norm": 0.4550646792251887, "learning_rate": 9.893599735215486e-06, "loss": 0.4291, "step": 5322 }, { "epoch": 0.4777097215678356, "grad_norm": 0.48306006274132635, "learning_rate": 9.893492562213376e-06, "loss": 0.4383, "step": 5323 }, { "epoch": 0.4777994660205066, "grad_norm": 0.5568406986298344, "learning_rate": 9.893385335843825e-06, "loss": 0.5345, "step": 5324 }, { "epoch": 0.4778892104731776, "grad_norm": 0.5308762362013633, "learning_rate": 9.893278056108e-06, "loss": 0.5424, "step": 5325 }, { "epoch": 0.47797895492584863, "grad_norm": 0.5143598133501596, "learning_rate": 9.893170723007073e-06, "loss": 0.5343, "step": 5326 }, { "epoch": 0.47806869937851965, "grad_norm": 0.4250617787056777, "learning_rate": 9.893063336542213e-06, "loss": 0.3792, "step": 5327 }, { "epoch": 0.47815844383119066, "grad_norm": 0.503166330789461, "learning_rate": 9.892955896714591e-06, "loss": 0.4657, "step": 5328 }, { "epoch": 0.47824818828386173, "grad_norm": 0.4621756708009772, "learning_rate": 9.892848403525379e-06, "loss": 0.4221, "step": 5329 }, { "epoch": 0.47833793273653274, "grad_norm": 0.4890135354682604, "learning_rate": 9.89274085697575e-06, "loss": 0.4446, "step": 5330 }, { "epoch": 0.47842767718920376, "grad_norm": 0.5286617623171139, "learning_rate": 9.892633257066879e-06, "loss": 0.4837, "step": 5331 }, { "epoch": 0.4785174216418748, "grad_norm": 0.49146836321855225, "learning_rate": 9.892525603799934e-06, "loss": 0.5012, "step": 5332 }, { "epoch": 0.4786071660945458, "grad_norm": 0.45329869478427887, "learning_rate": 9.892417897176093e-06, "loss": 0.4196, "step": 5333 }, { "epoch": 0.4786969105472168, "grad_norm": 0.47909004562232593, "learning_rate": 9.89231013719653e-06, "loss": 0.4715, "step": 5334 }, { "epoch": 0.4787866549998878, "grad_norm": 0.48525489169200076, "learning_rate": 9.89220232386242e-06, "loss": 0.4585, "step": 5335 }, { "epoch": 0.47887639945255883, "grad_norm": 0.5545237466259662, "learning_rate": 9.892094457174937e-06, "loss": 0.5146, "step": 5336 }, { "epoch": 0.47896614390522985, "grad_norm": 0.543207613302011, "learning_rate": 9.89198653713526e-06, "loss": 0.5639, "step": 5337 }, { "epoch": 0.47905588835790086, "grad_norm": 0.524812410307435, "learning_rate": 9.891878563744566e-06, "loss": 0.5406, "step": 5338 }, { "epoch": 0.4791456328105719, "grad_norm": 0.47751849717587747, "learning_rate": 9.89177053700403e-06, "loss": 0.4327, "step": 5339 }, { "epoch": 0.4792353772632429, "grad_norm": 0.5001856127326259, "learning_rate": 9.891662456914834e-06, "loss": 0.4833, "step": 5340 }, { "epoch": 0.47932512171591396, "grad_norm": 0.5244895362881591, "learning_rate": 9.891554323478152e-06, "loss": 0.487, "step": 5341 }, { "epoch": 0.479414866168585, "grad_norm": 0.519129046393546, "learning_rate": 9.891446136695167e-06, "loss": 0.4805, "step": 5342 }, { "epoch": 0.479504610621256, "grad_norm": 0.5158619340191992, "learning_rate": 9.891337896567057e-06, "loss": 0.4852, "step": 5343 }, { "epoch": 0.479594355073927, "grad_norm": 0.4304611358447675, "learning_rate": 9.891229603095003e-06, "loss": 0.4184, "step": 5344 }, { "epoch": 0.479684099526598, "grad_norm": 0.5087032321836683, "learning_rate": 9.891121256280186e-06, "loss": 0.5327, "step": 5345 }, { "epoch": 0.47977384397926903, "grad_norm": 0.4766133830678062, "learning_rate": 9.891012856123788e-06, "loss": 0.4774, "step": 5346 }, { "epoch": 0.47986358843194005, "grad_norm": 0.5024134879546482, "learning_rate": 9.89090440262699e-06, "loss": 0.427, "step": 5347 }, { "epoch": 0.47995333288461106, "grad_norm": 0.4787512248677717, "learning_rate": 9.890795895790974e-06, "loss": 0.4624, "step": 5348 }, { "epoch": 0.4800430773372821, "grad_norm": 0.5028218132775382, "learning_rate": 9.890687335616927e-06, "loss": 0.4397, "step": 5349 }, { "epoch": 0.4801328217899531, "grad_norm": 0.5389538489015918, "learning_rate": 9.89057872210603e-06, "loss": 0.5028, "step": 5350 }, { "epoch": 0.4802225662426241, "grad_norm": 0.48410289689559577, "learning_rate": 9.89047005525947e-06, "loss": 0.4637, "step": 5351 }, { "epoch": 0.4803123106952951, "grad_norm": 0.524581710445189, "learning_rate": 9.890361335078429e-06, "loss": 0.516, "step": 5352 }, { "epoch": 0.4804020551479662, "grad_norm": 0.4837369964087643, "learning_rate": 9.890252561564094e-06, "loss": 0.4416, "step": 5353 }, { "epoch": 0.4804917996006372, "grad_norm": 0.5163283662685525, "learning_rate": 9.89014373471765e-06, "loss": 0.4721, "step": 5354 }, { "epoch": 0.4805815440533082, "grad_norm": 0.4836769798411474, "learning_rate": 9.890034854540288e-06, "loss": 0.4745, "step": 5355 }, { "epoch": 0.48067128850597923, "grad_norm": 0.4877522227807051, "learning_rate": 9.889925921033191e-06, "loss": 0.5032, "step": 5356 }, { "epoch": 0.48076103295865025, "grad_norm": 0.4719825462169263, "learning_rate": 9.889816934197549e-06, "loss": 0.4315, "step": 5357 }, { "epoch": 0.48085077741132126, "grad_norm": 0.47835645116787945, "learning_rate": 9.88970789403455e-06, "loss": 0.4977, "step": 5358 }, { "epoch": 0.4809405218639923, "grad_norm": 0.48894171385008406, "learning_rate": 9.889598800545384e-06, "loss": 0.4112, "step": 5359 }, { "epoch": 0.4810302663166633, "grad_norm": 0.4666520497426765, "learning_rate": 9.88948965373124e-06, "loss": 0.4517, "step": 5360 }, { "epoch": 0.4811200107693343, "grad_norm": 0.4787487793606339, "learning_rate": 9.889380453593308e-06, "loss": 0.4433, "step": 5361 }, { "epoch": 0.4812097552220053, "grad_norm": 0.5037829318448572, "learning_rate": 9.88927120013278e-06, "loss": 0.5144, "step": 5362 }, { "epoch": 0.48129949967467633, "grad_norm": 0.5215680737199713, "learning_rate": 9.889161893350847e-06, "loss": 0.5288, "step": 5363 }, { "epoch": 0.4813892441273474, "grad_norm": 0.4876611825000386, "learning_rate": 9.889052533248701e-06, "loss": 0.4693, "step": 5364 }, { "epoch": 0.4814789885800184, "grad_norm": 0.5197995077882438, "learning_rate": 9.888943119827534e-06, "loss": 0.5066, "step": 5365 }, { "epoch": 0.48156873303268943, "grad_norm": 0.47461886637860407, "learning_rate": 9.888833653088541e-06, "loss": 0.4621, "step": 5366 }, { "epoch": 0.48165847748536045, "grad_norm": 0.5269878447306856, "learning_rate": 9.888724133032914e-06, "loss": 0.5176, "step": 5367 }, { "epoch": 0.48174822193803146, "grad_norm": 0.471710508075732, "learning_rate": 9.888614559661848e-06, "loss": 0.4801, "step": 5368 }, { "epoch": 0.4818379663907025, "grad_norm": 0.46192251248269306, "learning_rate": 9.88850493297654e-06, "loss": 0.4404, "step": 5369 }, { "epoch": 0.4819277108433735, "grad_norm": 0.5028733502758345, "learning_rate": 9.88839525297818e-06, "loss": 0.4585, "step": 5370 }, { "epoch": 0.4820174552960445, "grad_norm": 0.47383066679531216, "learning_rate": 9.888285519667971e-06, "loss": 0.4807, "step": 5371 }, { "epoch": 0.4821071997487155, "grad_norm": 0.5250703106632881, "learning_rate": 9.888175733047105e-06, "loss": 0.5075, "step": 5372 }, { "epoch": 0.48219694420138653, "grad_norm": 0.5224589741145188, "learning_rate": 9.888065893116781e-06, "loss": 0.4721, "step": 5373 }, { "epoch": 0.48228668865405755, "grad_norm": 0.47827418784057363, "learning_rate": 9.887955999878195e-06, "loss": 0.4327, "step": 5374 }, { "epoch": 0.48237643310672856, "grad_norm": 0.4894856004207385, "learning_rate": 9.88784605333255e-06, "loss": 0.472, "step": 5375 }, { "epoch": 0.48246617755939963, "grad_norm": 0.4738970146440123, "learning_rate": 9.88773605348104e-06, "loss": 0.4352, "step": 5376 }, { "epoch": 0.48255592201207065, "grad_norm": 0.5361161184256579, "learning_rate": 9.88762600032487e-06, "loss": 0.5159, "step": 5377 }, { "epoch": 0.48264566646474166, "grad_norm": 0.4897505226042638, "learning_rate": 9.887515893865235e-06, "loss": 0.4457, "step": 5378 }, { "epoch": 0.4827354109174127, "grad_norm": 0.4474047846882846, "learning_rate": 9.887405734103338e-06, "loss": 0.4139, "step": 5379 }, { "epoch": 0.4828251553700837, "grad_norm": 0.516362320657051, "learning_rate": 9.88729552104038e-06, "loss": 0.4555, "step": 5380 }, { "epoch": 0.4829148998227547, "grad_norm": 0.43891207670642446, "learning_rate": 9.887185254677565e-06, "loss": 0.4355, "step": 5381 }, { "epoch": 0.4830046442754257, "grad_norm": 0.5628675820169226, "learning_rate": 9.887074935016092e-06, "loss": 0.4993, "step": 5382 }, { "epoch": 0.48309438872809674, "grad_norm": 0.5114107889633484, "learning_rate": 9.886964562057166e-06, "loss": 0.4601, "step": 5383 }, { "epoch": 0.48318413318076775, "grad_norm": 0.48979472582154115, "learning_rate": 9.886854135801991e-06, "loss": 0.4621, "step": 5384 }, { "epoch": 0.48327387763343876, "grad_norm": 0.4958831879995341, "learning_rate": 9.886743656251772e-06, "loss": 0.4848, "step": 5385 }, { "epoch": 0.4833636220861098, "grad_norm": 0.4613709734966178, "learning_rate": 9.886633123407711e-06, "loss": 0.4468, "step": 5386 }, { "epoch": 0.48345336653878085, "grad_norm": 0.5216602264242177, "learning_rate": 9.886522537271018e-06, "loss": 0.4804, "step": 5387 }, { "epoch": 0.48354311099145186, "grad_norm": 0.4915863637668659, "learning_rate": 9.886411897842894e-06, "loss": 0.4971, "step": 5388 }, { "epoch": 0.4836328554441229, "grad_norm": 0.5028441919023696, "learning_rate": 9.886301205124549e-06, "loss": 0.4974, "step": 5389 }, { "epoch": 0.4837225998967939, "grad_norm": 0.4555452206779961, "learning_rate": 9.886190459117188e-06, "loss": 0.4276, "step": 5390 }, { "epoch": 0.4838123443494649, "grad_norm": 0.4878534537739872, "learning_rate": 9.88607965982202e-06, "loss": 0.4242, "step": 5391 }, { "epoch": 0.4839020888021359, "grad_norm": 0.4463425251203539, "learning_rate": 9.885968807240253e-06, "loss": 0.417, "step": 5392 }, { "epoch": 0.48399183325480694, "grad_norm": 0.4798126182399112, "learning_rate": 9.885857901373098e-06, "loss": 0.4648, "step": 5393 }, { "epoch": 0.48408157770747795, "grad_norm": 0.5163398131075035, "learning_rate": 9.88574694222176e-06, "loss": 0.4666, "step": 5394 }, { "epoch": 0.48417132216014896, "grad_norm": 0.4748464601687797, "learning_rate": 9.885635929787454e-06, "loss": 0.4363, "step": 5395 }, { "epoch": 0.48426106661282, "grad_norm": 0.5170107772699506, "learning_rate": 9.885524864071387e-06, "loss": 0.5073, "step": 5396 }, { "epoch": 0.484350811065491, "grad_norm": 0.47749385190663646, "learning_rate": 9.885413745074772e-06, "loss": 0.4627, "step": 5397 }, { "epoch": 0.484440555518162, "grad_norm": 0.538471677889321, "learning_rate": 9.885302572798821e-06, "loss": 0.5165, "step": 5398 }, { "epoch": 0.4845302999708331, "grad_norm": 0.5254909111314229, "learning_rate": 9.885191347244745e-06, "loss": 0.4819, "step": 5399 }, { "epoch": 0.4846200444235041, "grad_norm": 0.5382338595064893, "learning_rate": 9.885080068413758e-06, "loss": 0.5132, "step": 5400 }, { "epoch": 0.4847097888761751, "grad_norm": 0.5036816293872409, "learning_rate": 9.884968736307074e-06, "loss": 0.487, "step": 5401 }, { "epoch": 0.4847995333288461, "grad_norm": 0.5225107771886499, "learning_rate": 9.884857350925907e-06, "loss": 0.5014, "step": 5402 }, { "epoch": 0.48488927778151714, "grad_norm": 0.48274553118223684, "learning_rate": 9.884745912271472e-06, "loss": 0.4406, "step": 5403 }, { "epoch": 0.48497902223418815, "grad_norm": 0.5058722703878527, "learning_rate": 9.884634420344982e-06, "loss": 0.4543, "step": 5404 }, { "epoch": 0.48506876668685917, "grad_norm": 0.44667002022365876, "learning_rate": 9.884522875147657e-06, "loss": 0.4204, "step": 5405 }, { "epoch": 0.4851585111395302, "grad_norm": 0.5325851860158238, "learning_rate": 9.88441127668071e-06, "loss": 0.4483, "step": 5406 }, { "epoch": 0.4852482555922012, "grad_norm": 0.5454762248765249, "learning_rate": 9.884299624945359e-06, "loss": 0.5017, "step": 5407 }, { "epoch": 0.4853380000448722, "grad_norm": 0.49253850099404306, "learning_rate": 9.884187919942823e-06, "loss": 0.4133, "step": 5408 }, { "epoch": 0.4854277444975432, "grad_norm": 0.4730062060915645, "learning_rate": 9.884076161674317e-06, "loss": 0.4355, "step": 5409 }, { "epoch": 0.48551748895021424, "grad_norm": 0.5269076951326357, "learning_rate": 9.883964350141063e-06, "loss": 0.5167, "step": 5410 }, { "epoch": 0.4856072334028853, "grad_norm": 0.47920637326898935, "learning_rate": 9.883852485344279e-06, "loss": 0.4566, "step": 5411 }, { "epoch": 0.4856969778555563, "grad_norm": 0.48078972070888926, "learning_rate": 9.883740567285188e-06, "loss": 0.4501, "step": 5412 }, { "epoch": 0.48578672230822734, "grad_norm": 0.4666759884803788, "learning_rate": 9.883628595965005e-06, "loss": 0.4434, "step": 5413 }, { "epoch": 0.48587646676089835, "grad_norm": 0.47811651793822413, "learning_rate": 9.883516571384955e-06, "loss": 0.4676, "step": 5414 }, { "epoch": 0.48596621121356937, "grad_norm": 0.49848834019590693, "learning_rate": 9.883404493546258e-06, "loss": 0.5162, "step": 5415 }, { "epoch": 0.4860559556662404, "grad_norm": 0.508687264310829, "learning_rate": 9.883292362450137e-06, "loss": 0.4959, "step": 5416 }, { "epoch": 0.4861457001189114, "grad_norm": 0.486080800963034, "learning_rate": 9.883180178097816e-06, "loss": 0.5161, "step": 5417 }, { "epoch": 0.4862354445715824, "grad_norm": 0.5319191561686214, "learning_rate": 9.883067940490516e-06, "loss": 0.4976, "step": 5418 }, { "epoch": 0.4863251890242534, "grad_norm": 0.4932773594308446, "learning_rate": 9.882955649629463e-06, "loss": 0.4276, "step": 5419 }, { "epoch": 0.48641493347692444, "grad_norm": 0.46596122940151347, "learning_rate": 9.88284330551588e-06, "loss": 0.4238, "step": 5420 }, { "epoch": 0.48650467792959545, "grad_norm": 0.4894440543811354, "learning_rate": 9.882730908150996e-06, "loss": 0.4549, "step": 5421 }, { "epoch": 0.4865944223822665, "grad_norm": 0.4606632431610633, "learning_rate": 9.882618457536032e-06, "loss": 0.4274, "step": 5422 }, { "epoch": 0.48668416683493754, "grad_norm": 0.4770139628309927, "learning_rate": 9.882505953672216e-06, "loss": 0.4696, "step": 5423 }, { "epoch": 0.48677391128760855, "grad_norm": 0.46043773950921896, "learning_rate": 9.882393396560778e-06, "loss": 0.4349, "step": 5424 }, { "epoch": 0.48686365574027957, "grad_norm": 0.47910155602511123, "learning_rate": 9.88228078620294e-06, "loss": 0.4646, "step": 5425 }, { "epoch": 0.4869534001929506, "grad_norm": 0.5877248495422092, "learning_rate": 9.882168122599934e-06, "loss": 0.5644, "step": 5426 }, { "epoch": 0.4870431446456216, "grad_norm": 0.4557448571288433, "learning_rate": 9.882055405752988e-06, "loss": 0.4223, "step": 5427 }, { "epoch": 0.4871328890982926, "grad_norm": 0.471613544244847, "learning_rate": 9.88194263566333e-06, "loss": 0.4496, "step": 5428 }, { "epoch": 0.4872226335509636, "grad_norm": 0.4393937573204208, "learning_rate": 9.881829812332191e-06, "loss": 0.409, "step": 5429 }, { "epoch": 0.48731237800363464, "grad_norm": 0.5039601577677291, "learning_rate": 9.881716935760801e-06, "loss": 0.5011, "step": 5430 }, { "epoch": 0.48740212245630565, "grad_norm": 0.5235226504153564, "learning_rate": 9.881604005950393e-06, "loss": 0.5165, "step": 5431 }, { "epoch": 0.48749186690897667, "grad_norm": 0.5477913393787258, "learning_rate": 9.881491022902194e-06, "loss": 0.5482, "step": 5432 }, { "epoch": 0.4875816113616477, "grad_norm": 0.48713160869684935, "learning_rate": 9.881377986617442e-06, "loss": 0.4404, "step": 5433 }, { "epoch": 0.48767135581431875, "grad_norm": 0.5263743653698031, "learning_rate": 9.881264897097365e-06, "loss": 0.5282, "step": 5434 }, { "epoch": 0.48776110026698977, "grad_norm": 0.5205804738415478, "learning_rate": 9.881151754343196e-06, "loss": 0.5285, "step": 5435 }, { "epoch": 0.4878508447196608, "grad_norm": 0.4593138597486911, "learning_rate": 9.881038558356175e-06, "loss": 0.4534, "step": 5436 }, { "epoch": 0.4879405891723318, "grad_norm": 0.45468241925197533, "learning_rate": 9.88092530913753e-06, "loss": 0.415, "step": 5437 }, { "epoch": 0.4880303336250028, "grad_norm": 0.5107134360845851, "learning_rate": 9.880812006688499e-06, "loss": 0.5221, "step": 5438 }, { "epoch": 0.4881200780776738, "grad_norm": 0.5348694826313438, "learning_rate": 9.880698651010317e-06, "loss": 0.5181, "step": 5439 }, { "epoch": 0.48820982253034484, "grad_norm": 0.4687341079319733, "learning_rate": 9.880585242104221e-06, "loss": 0.4546, "step": 5440 }, { "epoch": 0.48829956698301585, "grad_norm": 0.47124441381050525, "learning_rate": 9.880471779971445e-06, "loss": 0.4588, "step": 5441 }, { "epoch": 0.48838931143568687, "grad_norm": 0.48782464887763666, "learning_rate": 9.880358264613233e-06, "loss": 0.5202, "step": 5442 }, { "epoch": 0.4884790558883579, "grad_norm": 0.48219269963076133, "learning_rate": 9.880244696030814e-06, "loss": 0.4676, "step": 5443 }, { "epoch": 0.4885688003410289, "grad_norm": 0.4670219987094812, "learning_rate": 9.880131074225435e-06, "loss": 0.442, "step": 5444 }, { "epoch": 0.48865854479369997, "grad_norm": 0.4948424863965795, "learning_rate": 9.880017399198327e-06, "loss": 0.4822, "step": 5445 }, { "epoch": 0.488748289246371, "grad_norm": 0.5082159559808728, "learning_rate": 9.879903670950737e-06, "loss": 0.5033, "step": 5446 }, { "epoch": 0.488838033699042, "grad_norm": 0.5774178684846714, "learning_rate": 9.879789889483902e-06, "loss": 0.4527, "step": 5447 }, { "epoch": 0.488927778151713, "grad_norm": 0.48683473318905685, "learning_rate": 9.879676054799063e-06, "loss": 0.4561, "step": 5448 }, { "epoch": 0.489017522604384, "grad_norm": 0.47885486223016255, "learning_rate": 9.87956216689746e-06, "loss": 0.4826, "step": 5449 }, { "epoch": 0.48910726705705504, "grad_norm": 0.510461735880211, "learning_rate": 9.879448225780339e-06, "loss": 0.4394, "step": 5450 }, { "epoch": 0.48919701150972605, "grad_norm": 0.4822307055745803, "learning_rate": 9.87933423144894e-06, "loss": 0.4758, "step": 5451 }, { "epoch": 0.48928675596239707, "grad_norm": 0.47524101146102493, "learning_rate": 9.879220183904505e-06, "loss": 0.4551, "step": 5452 }, { "epoch": 0.4893765004150681, "grad_norm": 0.4709357229875677, "learning_rate": 9.879106083148281e-06, "loss": 0.4664, "step": 5453 }, { "epoch": 0.4894662448677391, "grad_norm": 0.4837273044034277, "learning_rate": 9.87899192918151e-06, "loss": 0.4676, "step": 5454 }, { "epoch": 0.4895559893204101, "grad_norm": 0.46652328863049647, "learning_rate": 9.878877722005436e-06, "loss": 0.4642, "step": 5455 }, { "epoch": 0.4896457337730811, "grad_norm": 0.4838121742120657, "learning_rate": 9.878763461621308e-06, "loss": 0.5007, "step": 5456 }, { "epoch": 0.4897354782257522, "grad_norm": 0.4786299850148147, "learning_rate": 9.878649148030371e-06, "loss": 0.4728, "step": 5457 }, { "epoch": 0.4898252226784232, "grad_norm": 0.5420274758804102, "learning_rate": 9.878534781233868e-06, "loss": 0.5177, "step": 5458 }, { "epoch": 0.4899149671310942, "grad_norm": 0.5156201047200061, "learning_rate": 9.878420361233052e-06, "loss": 0.5179, "step": 5459 }, { "epoch": 0.49000471158376524, "grad_norm": 0.49432837829599136, "learning_rate": 9.878305888029164e-06, "loss": 0.5337, "step": 5460 }, { "epoch": 0.49009445603643625, "grad_norm": 0.4916570019977981, "learning_rate": 9.878191361623461e-06, "loss": 0.4652, "step": 5461 }, { "epoch": 0.49018420048910727, "grad_norm": 0.47523167570238334, "learning_rate": 9.878076782017186e-06, "loss": 0.4726, "step": 5462 }, { "epoch": 0.4902739449417783, "grad_norm": 0.49802666619764024, "learning_rate": 9.877962149211588e-06, "loss": 0.4923, "step": 5463 }, { "epoch": 0.4903636893944493, "grad_norm": 0.47968277968894, "learning_rate": 9.87784746320792e-06, "loss": 0.4588, "step": 5464 }, { "epoch": 0.4904534338471203, "grad_norm": 0.5145563203903444, "learning_rate": 9.877732724007434e-06, "loss": 0.5477, "step": 5465 }, { "epoch": 0.4905431782997913, "grad_norm": 0.5523500672070107, "learning_rate": 9.877617931611376e-06, "loss": 0.5091, "step": 5466 }, { "epoch": 0.49063292275246234, "grad_norm": 0.5145595720329639, "learning_rate": 9.877503086021003e-06, "loss": 0.4995, "step": 5467 }, { "epoch": 0.49072266720513336, "grad_norm": 0.4641369009035598, "learning_rate": 9.877388187237565e-06, "loss": 0.4202, "step": 5468 }, { "epoch": 0.4908124116578044, "grad_norm": 0.5035810665707176, "learning_rate": 9.877273235262316e-06, "loss": 0.506, "step": 5469 }, { "epoch": 0.49090215611047544, "grad_norm": 0.43708390764339805, "learning_rate": 9.877158230096508e-06, "loss": 0.3874, "step": 5470 }, { "epoch": 0.49099190056314646, "grad_norm": 0.4983520518915792, "learning_rate": 9.877043171741398e-06, "loss": 0.4787, "step": 5471 }, { "epoch": 0.49108164501581747, "grad_norm": 0.4888989673561317, "learning_rate": 9.87692806019824e-06, "loss": 0.4757, "step": 5472 }, { "epoch": 0.4911713894684885, "grad_norm": 0.4339253769879261, "learning_rate": 9.876812895468288e-06, "loss": 0.4113, "step": 5473 }, { "epoch": 0.4912611339211595, "grad_norm": 0.457660736044341, "learning_rate": 9.876697677552798e-06, "loss": 0.4437, "step": 5474 }, { "epoch": 0.4913508783738305, "grad_norm": 0.4574034128630521, "learning_rate": 9.876582406453027e-06, "loss": 0.4426, "step": 5475 }, { "epoch": 0.4914406228265015, "grad_norm": 0.5454531622071417, "learning_rate": 9.876467082170233e-06, "loss": 0.5282, "step": 5476 }, { "epoch": 0.49153036727917254, "grad_norm": 0.4901142086979865, "learning_rate": 9.876351704705673e-06, "loss": 0.4298, "step": 5477 }, { "epoch": 0.49162011173184356, "grad_norm": 0.4718213607758002, "learning_rate": 9.876236274060606e-06, "loss": 0.4927, "step": 5478 }, { "epoch": 0.49170985618451457, "grad_norm": 0.5013978751695849, "learning_rate": 9.87612079023629e-06, "loss": 0.512, "step": 5479 }, { "epoch": 0.49179960063718564, "grad_norm": 0.4848709530281916, "learning_rate": 9.876005253233983e-06, "loss": 0.462, "step": 5480 }, { "epoch": 0.49188934508985666, "grad_norm": 0.46150823549385456, "learning_rate": 9.875889663054948e-06, "loss": 0.4255, "step": 5481 }, { "epoch": 0.49197908954252767, "grad_norm": 0.5060206965550795, "learning_rate": 9.875774019700443e-06, "loss": 0.4844, "step": 5482 }, { "epoch": 0.4920688339951987, "grad_norm": 0.514525348038586, "learning_rate": 9.875658323171732e-06, "loss": 0.5016, "step": 5483 }, { "epoch": 0.4921585784478697, "grad_norm": 0.4958309357560664, "learning_rate": 9.875542573470074e-06, "loss": 0.4867, "step": 5484 }, { "epoch": 0.4922483229005407, "grad_norm": 0.4654501519120771, "learning_rate": 9.875426770596735e-06, "loss": 0.4403, "step": 5485 }, { "epoch": 0.49233806735321173, "grad_norm": 0.4591079259833837, "learning_rate": 9.875310914552972e-06, "loss": 0.4605, "step": 5486 }, { "epoch": 0.49242781180588274, "grad_norm": 0.5214826473122463, "learning_rate": 9.875195005340054e-06, "loss": 0.4865, "step": 5487 }, { "epoch": 0.49251755625855376, "grad_norm": 0.5346601209414353, "learning_rate": 9.875079042959242e-06, "loss": 0.5843, "step": 5488 }, { "epoch": 0.49260730071122477, "grad_norm": 0.44382445556555683, "learning_rate": 9.874963027411802e-06, "loss": 0.4271, "step": 5489 }, { "epoch": 0.4926970451638958, "grad_norm": 0.5312674506624546, "learning_rate": 9.874846958699e-06, "loss": 0.5263, "step": 5490 }, { "epoch": 0.4927867896165668, "grad_norm": 0.48353123551544097, "learning_rate": 9.8747308368221e-06, "loss": 0.4525, "step": 5491 }, { "epoch": 0.49287653406923787, "grad_norm": 0.45111353599185333, "learning_rate": 9.87461466178237e-06, "loss": 0.3998, "step": 5492 }, { "epoch": 0.4929662785219089, "grad_norm": 0.4806811251047349, "learning_rate": 9.874498433581075e-06, "loss": 0.42, "step": 5493 }, { "epoch": 0.4930560229745799, "grad_norm": 0.5021736438368777, "learning_rate": 9.874382152219484e-06, "loss": 0.4369, "step": 5494 }, { "epoch": 0.4931457674272509, "grad_norm": 0.528643754446649, "learning_rate": 9.874265817698865e-06, "loss": 0.5363, "step": 5495 }, { "epoch": 0.49323551187992193, "grad_norm": 0.4559721726117749, "learning_rate": 9.874149430020485e-06, "loss": 0.4769, "step": 5496 }, { "epoch": 0.49332525633259294, "grad_norm": 0.511910763380368, "learning_rate": 9.874032989185618e-06, "loss": 0.5064, "step": 5497 }, { "epoch": 0.49341500078526396, "grad_norm": 0.5049644257235156, "learning_rate": 9.873916495195528e-06, "loss": 0.4288, "step": 5498 }, { "epoch": 0.49350474523793497, "grad_norm": 0.4973450060990458, "learning_rate": 9.87379994805149e-06, "loss": 0.4849, "step": 5499 }, { "epoch": 0.493594489690606, "grad_norm": 0.5433727926394133, "learning_rate": 9.873683347754772e-06, "loss": 0.5139, "step": 5500 }, { "epoch": 0.493684234143277, "grad_norm": 0.491540379569126, "learning_rate": 9.873566694306649e-06, "loss": 0.4453, "step": 5501 }, { "epoch": 0.493773978595948, "grad_norm": 0.4981859202327766, "learning_rate": 9.87344998770839e-06, "loss": 0.4639, "step": 5502 }, { "epoch": 0.49386372304861903, "grad_norm": 0.4706666815101646, "learning_rate": 9.873333227961267e-06, "loss": 0.4969, "step": 5503 }, { "epoch": 0.4939534675012901, "grad_norm": 0.45546013023146253, "learning_rate": 9.873216415066557e-06, "loss": 0.4641, "step": 5504 }, { "epoch": 0.4940432119539611, "grad_norm": 0.4864573714419613, "learning_rate": 9.87309954902553e-06, "loss": 0.502, "step": 5505 }, { "epoch": 0.49413295640663213, "grad_norm": 0.4898997699120401, "learning_rate": 9.872982629839467e-06, "loss": 0.4832, "step": 5506 }, { "epoch": 0.49422270085930314, "grad_norm": 0.5404075195116196, "learning_rate": 9.872865657509636e-06, "loss": 0.532, "step": 5507 }, { "epoch": 0.49431244531197416, "grad_norm": 0.49141182479623796, "learning_rate": 9.872748632037314e-06, "loss": 0.4942, "step": 5508 }, { "epoch": 0.4944021897646452, "grad_norm": 0.49773490072521065, "learning_rate": 9.872631553423781e-06, "loss": 0.4892, "step": 5509 }, { "epoch": 0.4944919342173162, "grad_norm": 0.5288644587810468, "learning_rate": 9.872514421670312e-06, "loss": 0.4818, "step": 5510 }, { "epoch": 0.4945816786699872, "grad_norm": 0.5533374765025755, "learning_rate": 9.872397236778183e-06, "loss": 0.5758, "step": 5511 }, { "epoch": 0.4946714231226582, "grad_norm": 0.5011944020097663, "learning_rate": 9.872279998748672e-06, "loss": 0.4946, "step": 5512 }, { "epoch": 0.49476116757532923, "grad_norm": 0.4721334528984612, "learning_rate": 9.87216270758306e-06, "loss": 0.484, "step": 5513 }, { "epoch": 0.49485091202800024, "grad_norm": 0.4989348664630689, "learning_rate": 9.872045363282624e-06, "loss": 0.4801, "step": 5514 }, { "epoch": 0.4949406564806713, "grad_norm": 0.4709448209019108, "learning_rate": 9.871927965848644e-06, "loss": 0.4654, "step": 5515 }, { "epoch": 0.49503040093334233, "grad_norm": 0.5433932982793832, "learning_rate": 9.871810515282402e-06, "loss": 0.4763, "step": 5516 }, { "epoch": 0.49512014538601334, "grad_norm": 0.4596933206316811, "learning_rate": 9.871693011585178e-06, "loss": 0.4702, "step": 5517 }, { "epoch": 0.49520988983868436, "grad_norm": 0.4963441813645071, "learning_rate": 9.871575454758251e-06, "loss": 0.4515, "step": 5518 }, { "epoch": 0.4952996342913554, "grad_norm": 0.5426211312874618, "learning_rate": 9.871457844802905e-06, "loss": 0.536, "step": 5519 }, { "epoch": 0.4953893787440264, "grad_norm": 0.4936346630206415, "learning_rate": 9.871340181720423e-06, "loss": 0.4897, "step": 5520 }, { "epoch": 0.4954791231966974, "grad_norm": 0.43438649250272027, "learning_rate": 9.871222465512089e-06, "loss": 0.3913, "step": 5521 }, { "epoch": 0.4955688676493684, "grad_norm": 0.468944589183731, "learning_rate": 9.871104696179185e-06, "loss": 0.425, "step": 5522 }, { "epoch": 0.49565861210203943, "grad_norm": 0.4888325289774961, "learning_rate": 9.870986873722999e-06, "loss": 0.5248, "step": 5523 }, { "epoch": 0.49574835655471045, "grad_norm": 0.48953417786343423, "learning_rate": 9.870868998144809e-06, "loss": 0.4525, "step": 5524 }, { "epoch": 0.49583810100738146, "grad_norm": 0.49566830843006776, "learning_rate": 9.870751069445907e-06, "loss": 0.5186, "step": 5525 }, { "epoch": 0.4959278454600525, "grad_norm": 0.4823959760088903, "learning_rate": 9.870633087627576e-06, "loss": 0.4463, "step": 5526 }, { "epoch": 0.49601758991272354, "grad_norm": 0.4880459550989127, "learning_rate": 9.870515052691103e-06, "loss": 0.48, "step": 5527 }, { "epoch": 0.49610733436539456, "grad_norm": 0.513024458547216, "learning_rate": 9.870396964637777e-06, "loss": 0.46, "step": 5528 }, { "epoch": 0.4961970788180656, "grad_norm": 0.530224457793316, "learning_rate": 9.870278823468885e-06, "loss": 0.4654, "step": 5529 }, { "epoch": 0.4962868232707366, "grad_norm": 0.49933079463366087, "learning_rate": 9.870160629185712e-06, "loss": 0.4822, "step": 5530 }, { "epoch": 0.4963765677234076, "grad_norm": 0.4597402763752256, "learning_rate": 9.870042381789552e-06, "loss": 0.4649, "step": 5531 }, { "epoch": 0.4964663121760786, "grad_norm": 0.485995483659628, "learning_rate": 9.869924081281692e-06, "loss": 0.4466, "step": 5532 }, { "epoch": 0.49655605662874963, "grad_norm": 0.4577553285318434, "learning_rate": 9.869805727663422e-06, "loss": 0.404, "step": 5533 }, { "epoch": 0.49664580108142065, "grad_norm": 0.46832845950720536, "learning_rate": 9.869687320936034e-06, "loss": 0.4553, "step": 5534 }, { "epoch": 0.49673554553409166, "grad_norm": 0.446471225856076, "learning_rate": 9.86956886110082e-06, "loss": 0.4102, "step": 5535 }, { "epoch": 0.4968252899867627, "grad_norm": 0.4869436862396034, "learning_rate": 9.869450348159069e-06, "loss": 0.4635, "step": 5536 }, { "epoch": 0.4969150344394337, "grad_norm": 0.4929138844450859, "learning_rate": 9.869331782112075e-06, "loss": 0.4653, "step": 5537 }, { "epoch": 0.49700477889210476, "grad_norm": 0.49854322642282806, "learning_rate": 9.869213162961134e-06, "loss": 0.501, "step": 5538 }, { "epoch": 0.4970945233447758, "grad_norm": 0.4610783970860919, "learning_rate": 9.869094490707534e-06, "loss": 0.4386, "step": 5539 }, { "epoch": 0.4971842677974468, "grad_norm": 0.5025343296026574, "learning_rate": 9.868975765352572e-06, "loss": 0.4632, "step": 5540 }, { "epoch": 0.4972740122501178, "grad_norm": 0.502546857056352, "learning_rate": 9.868856986897544e-06, "loss": 0.4498, "step": 5541 }, { "epoch": 0.4973637567027888, "grad_norm": 0.48947882344228805, "learning_rate": 9.868738155343743e-06, "loss": 0.4554, "step": 5542 }, { "epoch": 0.49745350115545983, "grad_norm": 0.453750410568444, "learning_rate": 9.868619270692468e-06, "loss": 0.4301, "step": 5543 }, { "epoch": 0.49754324560813085, "grad_norm": 0.4846639035860843, "learning_rate": 9.868500332945013e-06, "loss": 0.4496, "step": 5544 }, { "epoch": 0.49763299006080186, "grad_norm": 0.5096587717749879, "learning_rate": 9.868381342102674e-06, "loss": 0.49, "step": 5545 }, { "epoch": 0.4977227345134729, "grad_norm": 0.48934336916611687, "learning_rate": 9.868262298166752e-06, "loss": 0.4515, "step": 5546 }, { "epoch": 0.4978124789661439, "grad_norm": 0.5208769329585529, "learning_rate": 9.868143201138543e-06, "loss": 0.4541, "step": 5547 }, { "epoch": 0.4979022234188149, "grad_norm": 0.4844679651961079, "learning_rate": 9.868024051019347e-06, "loss": 0.4773, "step": 5548 }, { "epoch": 0.4979919678714859, "grad_norm": 0.4773351180621448, "learning_rate": 9.867904847810463e-06, "loss": 0.4613, "step": 5549 }, { "epoch": 0.498081712324157, "grad_norm": 0.46166304937344627, "learning_rate": 9.867785591513193e-06, "loss": 0.4411, "step": 5550 }, { "epoch": 0.498171456776828, "grad_norm": 0.5101797440588223, "learning_rate": 9.867666282128834e-06, "loss": 0.5088, "step": 5551 }, { "epoch": 0.498261201229499, "grad_norm": 0.5023481776552032, "learning_rate": 9.867546919658688e-06, "loss": 0.4528, "step": 5552 }, { "epoch": 0.49835094568217003, "grad_norm": 0.5169088018662715, "learning_rate": 9.867427504104058e-06, "loss": 0.5124, "step": 5553 }, { "epoch": 0.49844069013484105, "grad_norm": 0.48864274961046794, "learning_rate": 9.867308035466245e-06, "loss": 0.4506, "step": 5554 }, { "epoch": 0.49853043458751206, "grad_norm": 0.4607854092393499, "learning_rate": 9.867188513746555e-06, "loss": 0.443, "step": 5555 }, { "epoch": 0.4986201790401831, "grad_norm": 0.5195807973978639, "learning_rate": 9.867068938946286e-06, "loss": 0.5438, "step": 5556 }, { "epoch": 0.4987099234928541, "grad_norm": 0.5096955995836684, "learning_rate": 9.866949311066749e-06, "loss": 0.4692, "step": 5557 }, { "epoch": 0.4987996679455251, "grad_norm": 0.4942628917616829, "learning_rate": 9.866829630109242e-06, "loss": 0.4596, "step": 5558 }, { "epoch": 0.4988894123981961, "grad_norm": 0.5391629314697443, "learning_rate": 9.866709896075075e-06, "loss": 0.5342, "step": 5559 }, { "epoch": 0.49897915685086713, "grad_norm": 0.49717200041830933, "learning_rate": 9.866590108965553e-06, "loss": 0.4969, "step": 5560 }, { "epoch": 0.49906890130353815, "grad_norm": 0.4637423811533641, "learning_rate": 9.866470268781978e-06, "loss": 0.4145, "step": 5561 }, { "epoch": 0.4991586457562092, "grad_norm": 0.45063395732311706, "learning_rate": 9.866350375525661e-06, "loss": 0.4044, "step": 5562 }, { "epoch": 0.49924839020888023, "grad_norm": 0.5244122572852155, "learning_rate": 9.866230429197912e-06, "loss": 0.5131, "step": 5563 }, { "epoch": 0.49933813466155125, "grad_norm": 0.43314670695271634, "learning_rate": 9.866110429800033e-06, "loss": 0.4126, "step": 5564 }, { "epoch": 0.49942787911422226, "grad_norm": 0.4961331093467623, "learning_rate": 9.865990377333336e-06, "loss": 0.4336, "step": 5565 }, { "epoch": 0.4995176235668933, "grad_norm": 0.501813129699047, "learning_rate": 9.86587027179913e-06, "loss": 0.4803, "step": 5566 }, { "epoch": 0.4996073680195643, "grad_norm": 0.48919753461876625, "learning_rate": 9.865750113198724e-06, "loss": 0.4756, "step": 5567 }, { "epoch": 0.4996971124722353, "grad_norm": 0.5239528209026804, "learning_rate": 9.86562990153343e-06, "loss": 0.5236, "step": 5568 }, { "epoch": 0.4997868569249063, "grad_norm": 0.506753774590379, "learning_rate": 9.865509636804557e-06, "loss": 0.4673, "step": 5569 }, { "epoch": 0.49987660137757733, "grad_norm": 0.502669210956922, "learning_rate": 9.865389319013422e-06, "loss": 0.5434, "step": 5570 }, { "epoch": 0.49996634583024835, "grad_norm": 0.46123947062821197, "learning_rate": 9.865268948161328e-06, "loss": 0.4384, "step": 5571 }, { "epoch": 0.5000560902829194, "grad_norm": 0.4810395565593217, "learning_rate": 9.865148524249595e-06, "loss": 0.465, "step": 5572 }, { "epoch": 0.5001458347355904, "grad_norm": 0.5256739569715331, "learning_rate": 9.865028047279532e-06, "loss": 0.515, "step": 5573 }, { "epoch": 0.5002355791882614, "grad_norm": 0.47959252035283595, "learning_rate": 9.864907517252457e-06, "loss": 0.498, "step": 5574 }, { "epoch": 0.5003253236409324, "grad_norm": 0.4987573502977205, "learning_rate": 9.86478693416968e-06, "loss": 0.5071, "step": 5575 }, { "epoch": 0.5004150680936035, "grad_norm": 0.4377730260577394, "learning_rate": 9.864666298032521e-06, "loss": 0.3702, "step": 5576 }, { "epoch": 0.5005048125462744, "grad_norm": 0.47940114104896847, "learning_rate": 9.86454560884229e-06, "loss": 0.5445, "step": 5577 }, { "epoch": 0.5005945569989455, "grad_norm": 0.4802491141958228, "learning_rate": 9.86442486660031e-06, "loss": 0.4334, "step": 5578 }, { "epoch": 0.5006843014516166, "grad_norm": 0.47547336814863006, "learning_rate": 9.864304071307891e-06, "loss": 0.4374, "step": 5579 }, { "epoch": 0.5007740459042875, "grad_norm": 0.5267016127887342, "learning_rate": 9.864183222966354e-06, "loss": 0.5582, "step": 5580 }, { "epoch": 0.5008637903569586, "grad_norm": 0.49071618545312695, "learning_rate": 9.864062321577018e-06, "loss": 0.4458, "step": 5581 }, { "epoch": 0.5009535348096296, "grad_norm": 0.47273998495783887, "learning_rate": 9.863941367141197e-06, "loss": 0.4261, "step": 5582 }, { "epoch": 0.5010432792623006, "grad_norm": 0.44576430563268665, "learning_rate": 9.863820359660215e-06, "loss": 0.4178, "step": 5583 }, { "epoch": 0.5011330237149716, "grad_norm": 0.5012400304259843, "learning_rate": 9.86369929913539e-06, "loss": 0.4791, "step": 5584 }, { "epoch": 0.5012227681676427, "grad_norm": 0.4704810932068598, "learning_rate": 9.86357818556804e-06, "loss": 0.4123, "step": 5585 }, { "epoch": 0.5013125126203136, "grad_norm": 0.5267706644307398, "learning_rate": 9.86345701895949e-06, "loss": 0.4854, "step": 5586 }, { "epoch": 0.5014022570729847, "grad_norm": 0.4834227412911738, "learning_rate": 9.863335799311057e-06, "loss": 0.4464, "step": 5587 }, { "epoch": 0.5014920015256557, "grad_norm": 0.5006493910944649, "learning_rate": 9.863214526624065e-06, "loss": 0.4846, "step": 5588 }, { "epoch": 0.5015817459783267, "grad_norm": 0.5385002782770002, "learning_rate": 9.863093200899838e-06, "loss": 0.4853, "step": 5589 }, { "epoch": 0.5016714904309977, "grad_norm": 0.4662165586702815, "learning_rate": 9.862971822139699e-06, "loss": 0.3896, "step": 5590 }, { "epoch": 0.5017612348836687, "grad_norm": 0.47738954201422507, "learning_rate": 9.86285039034497e-06, "loss": 0.4966, "step": 5591 }, { "epoch": 0.5018509793363398, "grad_norm": 0.48392778286402094, "learning_rate": 9.862728905516974e-06, "loss": 0.4435, "step": 5592 }, { "epoch": 0.5019407237890108, "grad_norm": 0.4766617899599912, "learning_rate": 9.86260736765704e-06, "loss": 0.4481, "step": 5593 }, { "epoch": 0.5020304682416818, "grad_norm": 0.4864347102965531, "learning_rate": 9.862485776766489e-06, "loss": 0.4788, "step": 5594 }, { "epoch": 0.5021202126943528, "grad_norm": 0.47054793357782915, "learning_rate": 9.86236413284665e-06, "loss": 0.4725, "step": 5595 }, { "epoch": 0.5022099571470239, "grad_norm": 0.49867906040518334, "learning_rate": 9.86224243589885e-06, "loss": 0.427, "step": 5596 }, { "epoch": 0.5022997015996948, "grad_norm": 0.4868345965829783, "learning_rate": 9.862120685924416e-06, "loss": 0.4501, "step": 5597 }, { "epoch": 0.5023894460523659, "grad_norm": 0.49048270081514217, "learning_rate": 9.861998882924673e-06, "loss": 0.4423, "step": 5598 }, { "epoch": 0.5024791905050369, "grad_norm": 0.4867239345859266, "learning_rate": 9.861877026900952e-06, "loss": 0.4158, "step": 5599 }, { "epoch": 0.5025689349577079, "grad_norm": 0.4495797301396112, "learning_rate": 9.861755117854582e-06, "loss": 0.4104, "step": 5600 }, { "epoch": 0.5026586794103789, "grad_norm": 0.5188049098425587, "learning_rate": 9.86163315578689e-06, "loss": 0.5536, "step": 5601 }, { "epoch": 0.50274842386305, "grad_norm": 0.4913017654889191, "learning_rate": 9.861511140699208e-06, "loss": 0.4218, "step": 5602 }, { "epoch": 0.502838168315721, "grad_norm": 0.5358649564419904, "learning_rate": 9.861389072592868e-06, "loss": 0.5508, "step": 5603 }, { "epoch": 0.502927912768392, "grad_norm": 0.47814025513702574, "learning_rate": 9.8612669514692e-06, "loss": 0.4475, "step": 5604 }, { "epoch": 0.5030176572210631, "grad_norm": 0.49983226207565856, "learning_rate": 9.861144777329534e-06, "loss": 0.5244, "step": 5605 }, { "epoch": 0.503107401673734, "grad_norm": 0.5157767819600477, "learning_rate": 9.861022550175205e-06, "loss": 0.5297, "step": 5606 }, { "epoch": 0.5031971461264051, "grad_norm": 0.49517581978440617, "learning_rate": 9.860900270007545e-06, "loss": 0.506, "step": 5607 }, { "epoch": 0.503286890579076, "grad_norm": 0.47327464157188637, "learning_rate": 9.860777936827888e-06, "loss": 0.451, "step": 5608 }, { "epoch": 0.5033766350317471, "grad_norm": 0.5581669036725155, "learning_rate": 9.860655550637566e-06, "loss": 0.6018, "step": 5609 }, { "epoch": 0.5034663794844181, "grad_norm": 0.44987644930040277, "learning_rate": 9.860533111437916e-06, "loss": 0.4456, "step": 5610 }, { "epoch": 0.5035561239370892, "grad_norm": 0.5666933219947381, "learning_rate": 9.860410619230274e-06, "loss": 0.5452, "step": 5611 }, { "epoch": 0.5036458683897601, "grad_norm": 0.5199185309571714, "learning_rate": 9.860288074015973e-06, "loss": 0.4782, "step": 5612 }, { "epoch": 0.5037356128424312, "grad_norm": 0.47904326776825673, "learning_rate": 9.860165475796352e-06, "loss": 0.4484, "step": 5613 }, { "epoch": 0.5038253572951022, "grad_norm": 0.48074025643256424, "learning_rate": 9.860042824572746e-06, "loss": 0.461, "step": 5614 }, { "epoch": 0.5039151017477732, "grad_norm": 0.5089754545025867, "learning_rate": 9.859920120346495e-06, "loss": 0.5111, "step": 5615 }, { "epoch": 0.5040048462004443, "grad_norm": 0.4691711773079407, "learning_rate": 9.859797363118935e-06, "loss": 0.4068, "step": 5616 }, { "epoch": 0.5040945906531152, "grad_norm": 0.48114381634904724, "learning_rate": 9.859674552891405e-06, "loss": 0.4408, "step": 5617 }, { "epoch": 0.5041843351057863, "grad_norm": 0.4513551110284766, "learning_rate": 9.859551689665248e-06, "loss": 0.3979, "step": 5618 }, { "epoch": 0.5042740795584573, "grad_norm": 0.4332967312602158, "learning_rate": 9.859428773441797e-06, "loss": 0.3813, "step": 5619 }, { "epoch": 0.5043638240111283, "grad_norm": 0.492457032470363, "learning_rate": 9.859305804222399e-06, "loss": 0.467, "step": 5620 }, { "epoch": 0.5044535684637993, "grad_norm": 0.5004575857334149, "learning_rate": 9.859182782008392e-06, "loss": 0.4516, "step": 5621 }, { "epoch": 0.5045433129164704, "grad_norm": 0.4934959040097065, "learning_rate": 9.859059706801118e-06, "loss": 0.4738, "step": 5622 }, { "epoch": 0.5046330573691413, "grad_norm": 0.49176650125342336, "learning_rate": 9.85893657860192e-06, "loss": 0.5029, "step": 5623 }, { "epoch": 0.5047228018218124, "grad_norm": 0.48190630589623656, "learning_rate": 9.858813397412136e-06, "loss": 0.475, "step": 5624 }, { "epoch": 0.5048125462744834, "grad_norm": 0.5123423244206715, "learning_rate": 9.858690163233117e-06, "loss": 0.4986, "step": 5625 }, { "epoch": 0.5049022907271544, "grad_norm": 0.4795309902150654, "learning_rate": 9.858566876066204e-06, "loss": 0.4583, "step": 5626 }, { "epoch": 0.5049920351798255, "grad_norm": 0.4502962188435428, "learning_rate": 9.858443535912741e-06, "loss": 0.3901, "step": 5627 }, { "epoch": 0.5050817796324965, "grad_norm": 0.47498729340121737, "learning_rate": 9.858320142774073e-06, "loss": 0.4375, "step": 5628 }, { "epoch": 0.5051715240851675, "grad_norm": 0.5333064499688663, "learning_rate": 9.858196696651545e-06, "loss": 0.5001, "step": 5629 }, { "epoch": 0.5052612685378385, "grad_norm": 0.4824661333781281, "learning_rate": 9.858073197546505e-06, "loss": 0.4572, "step": 5630 }, { "epoch": 0.5053510129905096, "grad_norm": 0.5161049371097769, "learning_rate": 9.857949645460299e-06, "loss": 0.5294, "step": 5631 }, { "epoch": 0.5054407574431805, "grad_norm": 0.49340042829617103, "learning_rate": 9.857826040394273e-06, "loss": 0.4842, "step": 5632 }, { "epoch": 0.5055305018958516, "grad_norm": 0.4331586705324088, "learning_rate": 9.857702382349779e-06, "loss": 0.3784, "step": 5633 }, { "epoch": 0.5056202463485225, "grad_norm": 0.47199734078630534, "learning_rate": 9.857578671328161e-06, "loss": 0.4443, "step": 5634 }, { "epoch": 0.5057099908011936, "grad_norm": 0.5426245270441772, "learning_rate": 9.85745490733077e-06, "loss": 0.512, "step": 5635 }, { "epoch": 0.5057997352538646, "grad_norm": 0.5027127517169873, "learning_rate": 9.857331090358957e-06, "loss": 0.4905, "step": 5636 }, { "epoch": 0.5058894797065356, "grad_norm": 0.5517632426664848, "learning_rate": 9.857207220414072e-06, "loss": 0.5128, "step": 5637 }, { "epoch": 0.5059792241592067, "grad_norm": 0.4499811236815485, "learning_rate": 9.857083297497466e-06, "loss": 0.4324, "step": 5638 }, { "epoch": 0.5060689686118777, "grad_norm": 0.4301144601303113, "learning_rate": 9.856959321610487e-06, "loss": 0.39, "step": 5639 }, { "epoch": 0.5061587130645487, "grad_norm": 0.4621807595429013, "learning_rate": 9.85683529275449e-06, "loss": 0.4175, "step": 5640 }, { "epoch": 0.5062484575172197, "grad_norm": 0.5121522510038692, "learning_rate": 9.856711210930828e-06, "loss": 0.4881, "step": 5641 }, { "epoch": 0.5063382019698908, "grad_norm": 0.4841546287991733, "learning_rate": 9.856587076140855e-06, "loss": 0.4504, "step": 5642 }, { "epoch": 0.5064279464225617, "grad_norm": 0.4772015394877526, "learning_rate": 9.856462888385921e-06, "loss": 0.4833, "step": 5643 }, { "epoch": 0.5065176908752328, "grad_norm": 0.4840125017787168, "learning_rate": 9.856338647667385e-06, "loss": 0.4585, "step": 5644 }, { "epoch": 0.5066074353279038, "grad_norm": 0.5009742254693379, "learning_rate": 9.856214353986597e-06, "loss": 0.4755, "step": 5645 }, { "epoch": 0.5066971797805748, "grad_norm": 0.48029360817031774, "learning_rate": 9.856090007344919e-06, "loss": 0.4008, "step": 5646 }, { "epoch": 0.5067869242332458, "grad_norm": 0.4872156387949137, "learning_rate": 9.855965607743701e-06, "loss": 0.4693, "step": 5647 }, { "epoch": 0.5068766686859169, "grad_norm": 0.4763392291689435, "learning_rate": 9.8558411551843e-06, "loss": 0.4827, "step": 5648 }, { "epoch": 0.5069664131385879, "grad_norm": 0.47736568583916694, "learning_rate": 9.855716649668079e-06, "loss": 0.4538, "step": 5649 }, { "epoch": 0.5070561575912589, "grad_norm": 0.47092366173467126, "learning_rate": 9.85559209119639e-06, "loss": 0.4318, "step": 5650 }, { "epoch": 0.50714590204393, "grad_norm": 0.54895774899353, "learning_rate": 9.855467479770594e-06, "loss": 0.4772, "step": 5651 }, { "epoch": 0.5072356464966009, "grad_norm": 0.5140467116788338, "learning_rate": 9.855342815392049e-06, "loss": 0.493, "step": 5652 }, { "epoch": 0.507325390949272, "grad_norm": 0.4795924486415845, "learning_rate": 9.855218098062115e-06, "loss": 0.4445, "step": 5653 }, { "epoch": 0.5074151354019429, "grad_norm": 0.4817938446686378, "learning_rate": 9.855093327782153e-06, "loss": 0.4338, "step": 5654 }, { "epoch": 0.507504879854614, "grad_norm": 0.5337970234605978, "learning_rate": 9.854968504553522e-06, "loss": 0.525, "step": 5655 }, { "epoch": 0.507594624307285, "grad_norm": 0.4790615174877725, "learning_rate": 9.854843628377584e-06, "loss": 0.4392, "step": 5656 }, { "epoch": 0.507684368759956, "grad_norm": 0.5287917222731398, "learning_rate": 9.854718699255701e-06, "loss": 0.5086, "step": 5657 }, { "epoch": 0.507774113212627, "grad_norm": 0.4696707585171341, "learning_rate": 9.854593717189237e-06, "loss": 0.4534, "step": 5658 }, { "epoch": 0.5078638576652981, "grad_norm": 0.4902795195392282, "learning_rate": 9.854468682179552e-06, "loss": 0.4467, "step": 5659 }, { "epoch": 0.5079536021179691, "grad_norm": 0.4257658202803229, "learning_rate": 9.854343594228013e-06, "loss": 0.3768, "step": 5660 }, { "epoch": 0.5080433465706401, "grad_norm": 0.5070691130037451, "learning_rate": 9.85421845333598e-06, "loss": 0.4621, "step": 5661 }, { "epoch": 0.5081330910233112, "grad_norm": 0.5124454118431913, "learning_rate": 9.854093259504822e-06, "loss": 0.4677, "step": 5662 }, { "epoch": 0.5082228354759821, "grad_norm": 0.44292029339818606, "learning_rate": 9.853968012735901e-06, "loss": 0.4125, "step": 5663 }, { "epoch": 0.5083125799286532, "grad_norm": 0.490146580862719, "learning_rate": 9.853842713030585e-06, "loss": 0.487, "step": 5664 }, { "epoch": 0.5084023243813242, "grad_norm": 0.47102211110684544, "learning_rate": 9.85371736039024e-06, "loss": 0.4855, "step": 5665 }, { "epoch": 0.5084920688339952, "grad_norm": 0.46234576276050227, "learning_rate": 9.853591954816233e-06, "loss": 0.4192, "step": 5666 }, { "epoch": 0.5085818132866662, "grad_norm": 0.4955186683316008, "learning_rate": 9.853466496309931e-06, "loss": 0.4971, "step": 5667 }, { "epoch": 0.5086715577393373, "grad_norm": 0.5382971842664331, "learning_rate": 9.853340984872703e-06, "loss": 0.522, "step": 5668 }, { "epoch": 0.5087613021920082, "grad_norm": 0.49635436091175494, "learning_rate": 9.853215420505917e-06, "loss": 0.4726, "step": 5669 }, { "epoch": 0.5088510466446793, "grad_norm": 0.4573651021801654, "learning_rate": 9.853089803210944e-06, "loss": 0.3659, "step": 5670 }, { "epoch": 0.5089407910973502, "grad_norm": 0.5080583248568966, "learning_rate": 9.852964132989153e-06, "loss": 0.4825, "step": 5671 }, { "epoch": 0.5090305355500213, "grad_norm": 0.515328326249515, "learning_rate": 9.852838409841913e-06, "loss": 0.4422, "step": 5672 }, { "epoch": 0.5091202800026924, "grad_norm": 0.4147369182783466, "learning_rate": 9.852712633770598e-06, "loss": 0.3432, "step": 5673 }, { "epoch": 0.5092100244553633, "grad_norm": 0.5019827311875333, "learning_rate": 9.852586804776579e-06, "loss": 0.4876, "step": 5674 }, { "epoch": 0.5092997689080344, "grad_norm": 0.47764082291238896, "learning_rate": 9.852460922861225e-06, "loss": 0.4432, "step": 5675 }, { "epoch": 0.5093895133607054, "grad_norm": 0.47569028990015216, "learning_rate": 9.852334988025913e-06, "loss": 0.4138, "step": 5676 }, { "epoch": 0.5094792578133764, "grad_norm": 0.5028645322588163, "learning_rate": 9.852209000272014e-06, "loss": 0.4643, "step": 5677 }, { "epoch": 0.5095690022660474, "grad_norm": 0.4738985471566761, "learning_rate": 9.852082959600904e-06, "loss": 0.4231, "step": 5678 }, { "epoch": 0.5096587467187185, "grad_norm": 0.5198183293308947, "learning_rate": 9.851956866013956e-06, "loss": 0.4907, "step": 5679 }, { "epoch": 0.5097484911713894, "grad_norm": 0.4893186917439119, "learning_rate": 9.851830719512545e-06, "loss": 0.4926, "step": 5680 }, { "epoch": 0.5098382356240605, "grad_norm": 0.5042040306745005, "learning_rate": 9.851704520098047e-06, "loss": 0.4474, "step": 5681 }, { "epoch": 0.5099279800767315, "grad_norm": 0.469783567888103, "learning_rate": 9.85157826777184e-06, "loss": 0.4133, "step": 5682 }, { "epoch": 0.5100177245294025, "grad_norm": 0.4173023027544551, "learning_rate": 9.851451962535298e-06, "loss": 0.3485, "step": 5683 }, { "epoch": 0.5101074689820736, "grad_norm": 0.49667549853909443, "learning_rate": 9.8513256043898e-06, "loss": 0.5248, "step": 5684 }, { "epoch": 0.5101972134347446, "grad_norm": 0.48433515279590894, "learning_rate": 9.851199193336724e-06, "loss": 0.4544, "step": 5685 }, { "epoch": 0.5102869578874156, "grad_norm": 0.5207280761634151, "learning_rate": 9.851072729377448e-06, "loss": 0.5563, "step": 5686 }, { "epoch": 0.5103767023400866, "grad_norm": 0.549310409388251, "learning_rate": 9.850946212513353e-06, "loss": 0.5273, "step": 5687 }, { "epoch": 0.5104664467927577, "grad_norm": 0.4900221906272034, "learning_rate": 9.850819642745816e-06, "loss": 0.4713, "step": 5688 }, { "epoch": 0.5105561912454286, "grad_norm": 0.5227851706404831, "learning_rate": 9.85069302007622e-06, "loss": 0.5579, "step": 5689 }, { "epoch": 0.5106459356980997, "grad_norm": 0.48111208752057044, "learning_rate": 9.850566344505946e-06, "loss": 0.4608, "step": 5690 }, { "epoch": 0.5107356801507706, "grad_norm": 0.5040084526612072, "learning_rate": 9.850439616036371e-06, "loss": 0.4867, "step": 5691 }, { "epoch": 0.5108254246034417, "grad_norm": 0.45961052714495026, "learning_rate": 9.850312834668883e-06, "loss": 0.4292, "step": 5692 }, { "epoch": 0.5109151690561127, "grad_norm": 0.4517573364055655, "learning_rate": 9.85018600040486e-06, "loss": 0.4606, "step": 5693 }, { "epoch": 0.5110049135087837, "grad_norm": 0.49120032364044386, "learning_rate": 9.85005911324569e-06, "loss": 0.4598, "step": 5694 }, { "epoch": 0.5110946579614548, "grad_norm": 0.49123882470353736, "learning_rate": 9.849932173192752e-06, "loss": 0.4477, "step": 5695 }, { "epoch": 0.5111844024141258, "grad_norm": 0.522145318918245, "learning_rate": 9.849805180247431e-06, "loss": 0.5715, "step": 5696 }, { "epoch": 0.5112741468667968, "grad_norm": 0.45502993771513556, "learning_rate": 9.849678134411116e-06, "loss": 0.4604, "step": 5697 }, { "epoch": 0.5113638913194678, "grad_norm": 0.5087670490530278, "learning_rate": 9.84955103568519e-06, "loss": 0.5365, "step": 5698 }, { "epoch": 0.5114536357721389, "grad_norm": 0.48717716622078194, "learning_rate": 9.849423884071037e-06, "loss": 0.4853, "step": 5699 }, { "epoch": 0.5115433802248098, "grad_norm": 0.47599930511998273, "learning_rate": 9.849296679570046e-06, "loss": 0.4525, "step": 5700 }, { "epoch": 0.5116331246774809, "grad_norm": 0.5017298641584028, "learning_rate": 9.849169422183605e-06, "loss": 0.5587, "step": 5701 }, { "epoch": 0.5117228691301519, "grad_norm": 0.5121519268129576, "learning_rate": 9.849042111913102e-06, "loss": 0.5212, "step": 5702 }, { "epoch": 0.5118126135828229, "grad_norm": 0.5170072772945443, "learning_rate": 9.848914748759922e-06, "loss": 0.5362, "step": 5703 }, { "epoch": 0.5119023580354939, "grad_norm": 0.5584840391823945, "learning_rate": 9.848787332725456e-06, "loss": 0.5647, "step": 5704 }, { "epoch": 0.511992102488165, "grad_norm": 0.4693692151900627, "learning_rate": 9.848659863811093e-06, "loss": 0.4794, "step": 5705 }, { "epoch": 0.5120818469408359, "grad_norm": 0.5137113145635598, "learning_rate": 9.848532342018226e-06, "loss": 0.5021, "step": 5706 }, { "epoch": 0.512171591393507, "grad_norm": 0.4968054575527234, "learning_rate": 9.848404767348245e-06, "loss": 0.4541, "step": 5707 }, { "epoch": 0.5122613358461781, "grad_norm": 0.5266216955818793, "learning_rate": 9.848277139802538e-06, "loss": 0.4963, "step": 5708 }, { "epoch": 0.512351080298849, "grad_norm": 0.5041896308664536, "learning_rate": 9.848149459382499e-06, "loss": 0.4769, "step": 5709 }, { "epoch": 0.5124408247515201, "grad_norm": 0.53483589207048, "learning_rate": 9.848021726089521e-06, "loss": 0.5257, "step": 5710 }, { "epoch": 0.512530569204191, "grad_norm": 0.4432649942714586, "learning_rate": 9.847893939924996e-06, "loss": 0.4187, "step": 5711 }, { "epoch": 0.5126203136568621, "grad_norm": 0.4678712142849456, "learning_rate": 9.847766100890317e-06, "loss": 0.4186, "step": 5712 }, { "epoch": 0.5127100581095331, "grad_norm": 0.45984849813849127, "learning_rate": 9.84763820898688e-06, "loss": 0.4407, "step": 5713 }, { "epoch": 0.5127998025622041, "grad_norm": 0.49977787599693, "learning_rate": 9.847510264216077e-06, "loss": 0.4483, "step": 5714 }, { "epoch": 0.5128895470148751, "grad_norm": 0.4685482501727278, "learning_rate": 9.847382266579308e-06, "loss": 0.4389, "step": 5715 }, { "epoch": 0.5129792914675462, "grad_norm": 0.5157576034708325, "learning_rate": 9.847254216077967e-06, "loss": 0.481, "step": 5716 }, { "epoch": 0.5130690359202171, "grad_norm": 0.43466118983660246, "learning_rate": 9.847126112713447e-06, "loss": 0.3829, "step": 5717 }, { "epoch": 0.5131587803728882, "grad_norm": 0.562163668251459, "learning_rate": 9.846997956487147e-06, "loss": 0.5929, "step": 5718 }, { "epoch": 0.5132485248255593, "grad_norm": 0.45216863521940565, "learning_rate": 9.846869747400467e-06, "loss": 0.4447, "step": 5719 }, { "epoch": 0.5133382692782302, "grad_norm": 0.5064944107378102, "learning_rate": 9.846741485454805e-06, "loss": 0.4994, "step": 5720 }, { "epoch": 0.5134280137309013, "grad_norm": 0.5046206075611237, "learning_rate": 9.846613170651556e-06, "loss": 0.5212, "step": 5721 }, { "epoch": 0.5135177581835723, "grad_norm": 0.5267665069467066, "learning_rate": 9.846484802992122e-06, "loss": 0.5176, "step": 5722 }, { "epoch": 0.5136075026362433, "grad_norm": 0.47225967664752994, "learning_rate": 9.846356382477904e-06, "loss": 0.4285, "step": 5723 }, { "epoch": 0.5136972470889143, "grad_norm": 0.47401270568857395, "learning_rate": 9.8462279091103e-06, "loss": 0.4381, "step": 5724 }, { "epoch": 0.5137869915415854, "grad_norm": 0.4691260985916442, "learning_rate": 9.846099382890715e-06, "loss": 0.4129, "step": 5725 }, { "epoch": 0.5138767359942563, "grad_norm": 0.49128858256858493, "learning_rate": 9.845970803820547e-06, "loss": 0.4521, "step": 5726 }, { "epoch": 0.5139664804469274, "grad_norm": 0.47773219790709676, "learning_rate": 9.845842171901196e-06, "loss": 0.4425, "step": 5727 }, { "epoch": 0.5140562248995983, "grad_norm": 0.49752692347096683, "learning_rate": 9.84571348713407e-06, "loss": 0.4951, "step": 5728 }, { "epoch": 0.5141459693522694, "grad_norm": 0.5054455567600271, "learning_rate": 9.845584749520572e-06, "loss": 0.4832, "step": 5729 }, { "epoch": 0.5142357138049405, "grad_norm": 0.5006909810237387, "learning_rate": 9.845455959062104e-06, "loss": 0.4935, "step": 5730 }, { "epoch": 0.5143254582576114, "grad_norm": 0.4780361752396039, "learning_rate": 9.84532711576007e-06, "loss": 0.4861, "step": 5731 }, { "epoch": 0.5144152027102825, "grad_norm": 0.46161873563259276, "learning_rate": 9.845198219615877e-06, "loss": 0.4597, "step": 5732 }, { "epoch": 0.5145049471629535, "grad_norm": 0.5278279225795612, "learning_rate": 9.84506927063093e-06, "loss": 0.5132, "step": 5733 }, { "epoch": 0.5145946916156245, "grad_norm": 0.5128501224217082, "learning_rate": 9.844940268806635e-06, "loss": 0.5675, "step": 5734 }, { "epoch": 0.5146844360682955, "grad_norm": 0.4609800939114512, "learning_rate": 9.8448112141444e-06, "loss": 0.4054, "step": 5735 }, { "epoch": 0.5147741805209666, "grad_norm": 0.4642237213276628, "learning_rate": 9.844682106645629e-06, "loss": 0.4532, "step": 5736 }, { "epoch": 0.5148639249736375, "grad_norm": 0.4762271921602506, "learning_rate": 9.844552946311733e-06, "loss": 0.4839, "step": 5737 }, { "epoch": 0.5149536694263086, "grad_norm": 0.44910000225595414, "learning_rate": 9.844423733144124e-06, "loss": 0.3884, "step": 5738 }, { "epoch": 0.5150434138789796, "grad_norm": 0.49938124336087747, "learning_rate": 9.844294467144203e-06, "loss": 0.5172, "step": 5739 }, { "epoch": 0.5151331583316506, "grad_norm": 0.44282118096804296, "learning_rate": 9.844165148313388e-06, "loss": 0.4057, "step": 5740 }, { "epoch": 0.5152229027843216, "grad_norm": 0.5122101597071388, "learning_rate": 9.844035776653083e-06, "loss": 0.4999, "step": 5741 }, { "epoch": 0.5153126472369927, "grad_norm": 0.5054013791305864, "learning_rate": 9.8439063521647e-06, "loss": 0.5365, "step": 5742 }, { "epoch": 0.5154023916896637, "grad_norm": 0.4861239245626004, "learning_rate": 9.843776874849653e-06, "loss": 0.4576, "step": 5743 }, { "epoch": 0.5154921361423347, "grad_norm": 0.5070401754038543, "learning_rate": 9.843647344709354e-06, "loss": 0.4669, "step": 5744 }, { "epoch": 0.5155818805950058, "grad_norm": 0.5477506099855136, "learning_rate": 9.843517761745213e-06, "loss": 0.5357, "step": 5745 }, { "epoch": 0.5156716250476767, "grad_norm": 0.4881356297293872, "learning_rate": 9.843388125958646e-06, "loss": 0.4684, "step": 5746 }, { "epoch": 0.5157613695003478, "grad_norm": 0.49020503713557206, "learning_rate": 9.843258437351064e-06, "loss": 0.4124, "step": 5747 }, { "epoch": 0.5158511139530187, "grad_norm": 0.5020572065458301, "learning_rate": 9.843128695923885e-06, "loss": 0.4927, "step": 5748 }, { "epoch": 0.5159408584056898, "grad_norm": 0.4685984794721833, "learning_rate": 9.84299890167852e-06, "loss": 0.473, "step": 5749 }, { "epoch": 0.5160306028583608, "grad_norm": 0.5224411321658738, "learning_rate": 9.842869054616387e-06, "loss": 0.5207, "step": 5750 }, { "epoch": 0.5161203473110318, "grad_norm": 0.44776892891261205, "learning_rate": 9.8427391547389e-06, "loss": 0.4206, "step": 5751 }, { "epoch": 0.5162100917637028, "grad_norm": 0.47834001824943984, "learning_rate": 9.842609202047478e-06, "loss": 0.455, "step": 5752 }, { "epoch": 0.5162998362163739, "grad_norm": 0.4957137394438583, "learning_rate": 9.842479196543537e-06, "loss": 0.487, "step": 5753 }, { "epoch": 0.516389580669045, "grad_norm": 0.4724721356081964, "learning_rate": 9.842349138228496e-06, "loss": 0.4407, "step": 5754 }, { "epoch": 0.5164793251217159, "grad_norm": 0.49219717329765766, "learning_rate": 9.84221902710377e-06, "loss": 0.4897, "step": 5755 }, { "epoch": 0.516569069574387, "grad_norm": 0.5287709820637865, "learning_rate": 9.842088863170782e-06, "loss": 0.5107, "step": 5756 }, { "epoch": 0.5166588140270579, "grad_norm": 0.5516597944181237, "learning_rate": 9.841958646430951e-06, "loss": 0.5721, "step": 5757 }, { "epoch": 0.516748558479729, "grad_norm": 0.46138394115601816, "learning_rate": 9.841828376885695e-06, "loss": 0.4446, "step": 5758 }, { "epoch": 0.5168383029324, "grad_norm": 0.5009051858501894, "learning_rate": 9.841698054536435e-06, "loss": 0.4619, "step": 5759 }, { "epoch": 0.516928047385071, "grad_norm": 0.47431853920582845, "learning_rate": 9.841567679384594e-06, "loss": 0.4925, "step": 5760 }, { "epoch": 0.517017791837742, "grad_norm": 0.477820223118353, "learning_rate": 9.841437251431593e-06, "loss": 0.4878, "step": 5761 }, { "epoch": 0.5171075362904131, "grad_norm": 0.47153929950565565, "learning_rate": 9.841306770678854e-06, "loss": 0.4737, "step": 5762 }, { "epoch": 0.517197280743084, "grad_norm": 0.45111866968246644, "learning_rate": 9.841176237127801e-06, "loss": 0.4009, "step": 5763 }, { "epoch": 0.5172870251957551, "grad_norm": 0.5040762370510381, "learning_rate": 9.841045650779856e-06, "loss": 0.5179, "step": 5764 }, { "epoch": 0.5173767696484262, "grad_norm": 0.47685717537869715, "learning_rate": 9.840915011636446e-06, "loss": 0.4439, "step": 5765 }, { "epoch": 0.5174665141010971, "grad_norm": 0.4620658216937383, "learning_rate": 9.840784319698991e-06, "loss": 0.455, "step": 5766 }, { "epoch": 0.5175562585537682, "grad_norm": 0.5068409569909863, "learning_rate": 9.84065357496892e-06, "loss": 0.493, "step": 5767 }, { "epoch": 0.5176460030064391, "grad_norm": 0.4585301208093601, "learning_rate": 9.840522777447658e-06, "loss": 0.4328, "step": 5768 }, { "epoch": 0.5177357474591102, "grad_norm": 0.44827877591696913, "learning_rate": 9.84039192713663e-06, "loss": 0.4075, "step": 5769 }, { "epoch": 0.5178254919117812, "grad_norm": 0.5031842516093972, "learning_rate": 9.840261024037267e-06, "loss": 0.4914, "step": 5770 }, { "epoch": 0.5179152363644522, "grad_norm": 0.4968432913128742, "learning_rate": 9.840130068150993e-06, "loss": 0.4788, "step": 5771 }, { "epoch": 0.5180049808171232, "grad_norm": 0.47997150874802225, "learning_rate": 9.839999059479236e-06, "loss": 0.4494, "step": 5772 }, { "epoch": 0.5180947252697943, "grad_norm": 0.4940407412691953, "learning_rate": 9.839867998023426e-06, "loss": 0.5273, "step": 5773 }, { "epoch": 0.5181844697224652, "grad_norm": 0.4601822443115114, "learning_rate": 9.839736883784994e-06, "loss": 0.4489, "step": 5774 }, { "epoch": 0.5182742141751363, "grad_norm": 0.46288547901807187, "learning_rate": 9.839605716765367e-06, "loss": 0.4294, "step": 5775 }, { "epoch": 0.5183639586278073, "grad_norm": 0.5383906117439037, "learning_rate": 9.839474496965977e-06, "loss": 0.5744, "step": 5776 }, { "epoch": 0.5184537030804783, "grad_norm": 0.45936119126052627, "learning_rate": 9.839343224388255e-06, "loss": 0.4161, "step": 5777 }, { "epoch": 0.5185434475331494, "grad_norm": 0.42760692028395264, "learning_rate": 9.839211899033632e-06, "loss": 0.3634, "step": 5778 }, { "epoch": 0.5186331919858204, "grad_norm": 0.5337868841955449, "learning_rate": 9.83908052090354e-06, "loss": 0.48, "step": 5779 }, { "epoch": 0.5187229364384914, "grad_norm": 0.46638946951973054, "learning_rate": 9.838949089999413e-06, "loss": 0.4239, "step": 5780 }, { "epoch": 0.5188126808911624, "grad_norm": 0.5235243930633403, "learning_rate": 9.838817606322683e-06, "loss": 0.5259, "step": 5781 }, { "epoch": 0.5189024253438335, "grad_norm": 0.4777157364735246, "learning_rate": 9.838686069874785e-06, "loss": 0.4589, "step": 5782 }, { "epoch": 0.5189921697965044, "grad_norm": 0.45590921940364815, "learning_rate": 9.838554480657154e-06, "loss": 0.4314, "step": 5783 }, { "epoch": 0.5190819142491755, "grad_norm": 0.49492572727473866, "learning_rate": 9.838422838671223e-06, "loss": 0.4452, "step": 5784 }, { "epoch": 0.5191716587018465, "grad_norm": 0.5070553382969737, "learning_rate": 9.83829114391843e-06, "loss": 0.476, "step": 5785 }, { "epoch": 0.5192614031545175, "grad_norm": 0.47827408589788634, "learning_rate": 9.83815939640021e-06, "loss": 0.5006, "step": 5786 }, { "epoch": 0.5193511476071885, "grad_norm": 0.5030495760099508, "learning_rate": 9.838027596117999e-06, "loss": 0.4537, "step": 5787 }, { "epoch": 0.5194408920598595, "grad_norm": 0.4857928404660635, "learning_rate": 9.837895743073235e-06, "loss": 0.4796, "step": 5788 }, { "epoch": 0.5195306365125306, "grad_norm": 0.4519522686976434, "learning_rate": 9.837763837267358e-06, "loss": 0.4152, "step": 5789 }, { "epoch": 0.5196203809652016, "grad_norm": 0.47156458958953756, "learning_rate": 9.837631878701804e-06, "loss": 0.4324, "step": 5790 }, { "epoch": 0.5197101254178726, "grad_norm": 0.4738396752467419, "learning_rate": 9.83749986737801e-06, "loss": 0.4412, "step": 5791 }, { "epoch": 0.5197998698705436, "grad_norm": 0.4864401142617824, "learning_rate": 9.837367803297422e-06, "loss": 0.4365, "step": 5792 }, { "epoch": 0.5198896143232147, "grad_norm": 0.5125483993216212, "learning_rate": 9.837235686461478e-06, "loss": 0.55, "step": 5793 }, { "epoch": 0.5199793587758856, "grad_norm": 0.5002951966756369, "learning_rate": 9.837103516871616e-06, "loss": 0.4798, "step": 5794 }, { "epoch": 0.5200691032285567, "grad_norm": 0.48322509629529653, "learning_rate": 9.836971294529278e-06, "loss": 0.4643, "step": 5795 }, { "epoch": 0.5201588476812277, "grad_norm": 0.505372691583528, "learning_rate": 9.836839019435908e-06, "loss": 0.4938, "step": 5796 }, { "epoch": 0.5202485921338987, "grad_norm": 0.5306776546828007, "learning_rate": 9.836706691592948e-06, "loss": 0.5614, "step": 5797 }, { "epoch": 0.5203383365865697, "grad_norm": 0.5237956016324103, "learning_rate": 9.83657431100184e-06, "loss": 0.5284, "step": 5798 }, { "epoch": 0.5204280810392408, "grad_norm": 0.4285965518867621, "learning_rate": 9.83644187766403e-06, "loss": 0.3997, "step": 5799 }, { "epoch": 0.5205178254919118, "grad_norm": 0.4921275877797598, "learning_rate": 9.83630939158096e-06, "loss": 0.4439, "step": 5800 }, { "epoch": 0.5206075699445828, "grad_norm": 0.5069971437212775, "learning_rate": 9.836176852754076e-06, "loss": 0.5641, "step": 5801 }, { "epoch": 0.5206973143972539, "grad_norm": 0.49619036255957855, "learning_rate": 9.836044261184822e-06, "loss": 0.4959, "step": 5802 }, { "epoch": 0.5207870588499248, "grad_norm": 0.45763085830821265, "learning_rate": 9.835911616874648e-06, "loss": 0.4359, "step": 5803 }, { "epoch": 0.5208768033025959, "grad_norm": 0.47846964605092, "learning_rate": 9.835778919824995e-06, "loss": 0.4556, "step": 5804 }, { "epoch": 0.5209665477552669, "grad_norm": 0.4819773064233019, "learning_rate": 9.835646170037314e-06, "loss": 0.4224, "step": 5805 }, { "epoch": 0.5210562922079379, "grad_norm": 0.4682348735610754, "learning_rate": 9.835513367513052e-06, "loss": 0.4952, "step": 5806 }, { "epoch": 0.5211460366606089, "grad_norm": 0.46142394855020075, "learning_rate": 9.835380512253655e-06, "loss": 0.4306, "step": 5807 }, { "epoch": 0.52123578111328, "grad_norm": 0.5007426822190489, "learning_rate": 9.835247604260576e-06, "loss": 0.4503, "step": 5808 }, { "epoch": 0.5213255255659509, "grad_norm": 0.5336966996519522, "learning_rate": 9.835114643535262e-06, "loss": 0.4921, "step": 5809 }, { "epoch": 0.521415270018622, "grad_norm": 0.5068823289365649, "learning_rate": 9.834981630079164e-06, "loss": 0.4666, "step": 5810 }, { "epoch": 0.5215050144712929, "grad_norm": 0.4639334573138247, "learning_rate": 9.83484856389373e-06, "loss": 0.4509, "step": 5811 }, { "epoch": 0.521594758923964, "grad_norm": 0.46439661954549893, "learning_rate": 9.834715444980414e-06, "loss": 0.4431, "step": 5812 }, { "epoch": 0.5216845033766351, "grad_norm": 0.4990439786751908, "learning_rate": 9.834582273340668e-06, "loss": 0.5006, "step": 5813 }, { "epoch": 0.521774247829306, "grad_norm": 0.48020149109848137, "learning_rate": 9.834449048975942e-06, "loss": 0.4764, "step": 5814 }, { "epoch": 0.5218639922819771, "grad_norm": 0.5009093341450815, "learning_rate": 9.83431577188769e-06, "loss": 0.4681, "step": 5815 }, { "epoch": 0.5219537367346481, "grad_norm": 0.548135858555083, "learning_rate": 9.834182442077367e-06, "loss": 0.476, "step": 5816 }, { "epoch": 0.5220434811873191, "grad_norm": 0.5004415691205165, "learning_rate": 9.834049059546425e-06, "loss": 0.4901, "step": 5817 }, { "epoch": 0.5221332256399901, "grad_norm": 0.49994648307743206, "learning_rate": 9.83391562429632e-06, "loss": 0.4697, "step": 5818 }, { "epoch": 0.5222229700926612, "grad_norm": 0.5107905329545226, "learning_rate": 9.833782136328508e-06, "loss": 0.5471, "step": 5819 }, { "epoch": 0.5223127145453321, "grad_norm": 0.4528764720805843, "learning_rate": 9.83364859564444e-06, "loss": 0.4362, "step": 5820 }, { "epoch": 0.5224024589980032, "grad_norm": 0.49085311857417574, "learning_rate": 9.833515002245577e-06, "loss": 0.4557, "step": 5821 }, { "epoch": 0.5224922034506742, "grad_norm": 0.5494445242655243, "learning_rate": 9.833381356133376e-06, "loss": 0.5501, "step": 5822 }, { "epoch": 0.5225819479033452, "grad_norm": 0.5048742450113195, "learning_rate": 9.833247657309293e-06, "loss": 0.4669, "step": 5823 }, { "epoch": 0.5226716923560163, "grad_norm": 0.4745417172632089, "learning_rate": 9.833113905774785e-06, "loss": 0.4896, "step": 5824 }, { "epoch": 0.5227614368086873, "grad_norm": 0.4783597832129368, "learning_rate": 9.832980101531313e-06, "loss": 0.4857, "step": 5825 }, { "epoch": 0.5228511812613583, "grad_norm": 0.49121045819718157, "learning_rate": 9.832846244580336e-06, "loss": 0.495, "step": 5826 }, { "epoch": 0.5229409257140293, "grad_norm": 0.5038763134565531, "learning_rate": 9.832712334923312e-06, "loss": 0.4075, "step": 5827 }, { "epoch": 0.5230306701667004, "grad_norm": 0.4598147981360785, "learning_rate": 9.832578372561704e-06, "loss": 0.4279, "step": 5828 }, { "epoch": 0.5231204146193713, "grad_norm": 0.47920332014838163, "learning_rate": 9.83244435749697e-06, "loss": 0.426, "step": 5829 }, { "epoch": 0.5232101590720424, "grad_norm": 0.4620745428694311, "learning_rate": 9.832310289730574e-06, "loss": 0.4444, "step": 5830 }, { "epoch": 0.5232999035247133, "grad_norm": 0.49911536947394824, "learning_rate": 9.832176169263974e-06, "loss": 0.504, "step": 5831 }, { "epoch": 0.5233896479773844, "grad_norm": 0.4543192526100564, "learning_rate": 9.83204199609864e-06, "loss": 0.4128, "step": 5832 }, { "epoch": 0.5234793924300554, "grad_norm": 0.4575163557958861, "learning_rate": 9.831907770236028e-06, "loss": 0.425, "step": 5833 }, { "epoch": 0.5235691368827264, "grad_norm": 0.49140987059724955, "learning_rate": 9.831773491677607e-06, "loss": 0.4352, "step": 5834 }, { "epoch": 0.5236588813353975, "grad_norm": 0.5095444158181057, "learning_rate": 9.831639160424838e-06, "loss": 0.5237, "step": 5835 }, { "epoch": 0.5237486257880685, "grad_norm": 0.4661162385181483, "learning_rate": 9.831504776479189e-06, "loss": 0.4534, "step": 5836 }, { "epoch": 0.5238383702407395, "grad_norm": 0.4676828201339124, "learning_rate": 9.83137033984212e-06, "loss": 0.4142, "step": 5837 }, { "epoch": 0.5239281146934105, "grad_norm": 0.5001488735319498, "learning_rate": 9.831235850515106e-06, "loss": 0.5087, "step": 5838 }, { "epoch": 0.5240178591460816, "grad_norm": 0.4577238079469982, "learning_rate": 9.831101308499605e-06, "loss": 0.4743, "step": 5839 }, { "epoch": 0.5241076035987525, "grad_norm": 0.4606633410376205, "learning_rate": 9.83096671379709e-06, "loss": 0.4783, "step": 5840 }, { "epoch": 0.5241973480514236, "grad_norm": 0.5294607167885124, "learning_rate": 9.830832066409026e-06, "loss": 0.5429, "step": 5841 }, { "epoch": 0.5242870925040946, "grad_norm": 0.5095823110410199, "learning_rate": 9.830697366336883e-06, "loss": 0.4636, "step": 5842 }, { "epoch": 0.5243768369567656, "grad_norm": 0.5185135243280959, "learning_rate": 9.830562613582129e-06, "loss": 0.5245, "step": 5843 }, { "epoch": 0.5244665814094366, "grad_norm": 0.44819716891996936, "learning_rate": 9.830427808146233e-06, "loss": 0.4211, "step": 5844 }, { "epoch": 0.5245563258621077, "grad_norm": 0.4608413037286411, "learning_rate": 9.830292950030666e-06, "loss": 0.4509, "step": 5845 }, { "epoch": 0.5246460703147787, "grad_norm": 0.5285266018067201, "learning_rate": 9.830158039236902e-06, "loss": 0.5229, "step": 5846 }, { "epoch": 0.5247358147674497, "grad_norm": 0.46458868696072025, "learning_rate": 9.830023075766405e-06, "loss": 0.4414, "step": 5847 }, { "epoch": 0.5248255592201208, "grad_norm": 0.47943127460482804, "learning_rate": 9.829888059620653e-06, "loss": 0.4542, "step": 5848 }, { "epoch": 0.5249153036727917, "grad_norm": 0.46504930314767035, "learning_rate": 9.829752990801116e-06, "loss": 0.4, "step": 5849 }, { "epoch": 0.5250050481254628, "grad_norm": 0.5244504725999584, "learning_rate": 9.829617869309268e-06, "loss": 0.5323, "step": 5850 }, { "epoch": 0.5250947925781337, "grad_norm": 0.509711743257169, "learning_rate": 9.82948269514658e-06, "loss": 0.4643, "step": 5851 }, { "epoch": 0.5251845370308048, "grad_norm": 0.4574344690930487, "learning_rate": 9.829347468314531e-06, "loss": 0.4239, "step": 5852 }, { "epoch": 0.5252742814834758, "grad_norm": 0.4916588272131998, "learning_rate": 9.829212188814591e-06, "loss": 0.4241, "step": 5853 }, { "epoch": 0.5253640259361468, "grad_norm": 0.558699582261274, "learning_rate": 9.829076856648241e-06, "loss": 0.5614, "step": 5854 }, { "epoch": 0.5254537703888178, "grad_norm": 0.44520009711231473, "learning_rate": 9.82894147181695e-06, "loss": 0.3873, "step": 5855 }, { "epoch": 0.5255435148414889, "grad_norm": 0.45600128347423924, "learning_rate": 9.828806034322198e-06, "loss": 0.4209, "step": 5856 }, { "epoch": 0.5256332592941598, "grad_norm": 0.4729869245509415, "learning_rate": 9.828670544165462e-06, "loss": 0.3927, "step": 5857 }, { "epoch": 0.5257230037468309, "grad_norm": 0.4988339784156047, "learning_rate": 9.82853500134822e-06, "loss": 0.4734, "step": 5858 }, { "epoch": 0.525812748199502, "grad_norm": 0.459604405338623, "learning_rate": 9.828399405871948e-06, "loss": 0.4373, "step": 5859 }, { "epoch": 0.5259024926521729, "grad_norm": 0.4511365351586275, "learning_rate": 9.82826375773813e-06, "loss": 0.45, "step": 5860 }, { "epoch": 0.525992237104844, "grad_norm": 0.5198466852080348, "learning_rate": 9.828128056948239e-06, "loss": 0.4974, "step": 5861 }, { "epoch": 0.526081981557515, "grad_norm": 0.4623979457380522, "learning_rate": 9.827992303503758e-06, "loss": 0.3961, "step": 5862 }, { "epoch": 0.526171726010186, "grad_norm": 0.563456184540455, "learning_rate": 9.827856497406167e-06, "loss": 0.5373, "step": 5863 }, { "epoch": 0.526261470462857, "grad_norm": 0.48916107576585643, "learning_rate": 9.827720638656947e-06, "loss": 0.4616, "step": 5864 }, { "epoch": 0.526351214915528, "grad_norm": 0.43585391199320905, "learning_rate": 9.827584727257583e-06, "loss": 0.3943, "step": 5865 }, { "epoch": 0.526440959368199, "grad_norm": 0.46158435537065073, "learning_rate": 9.82744876320955e-06, "loss": 0.4671, "step": 5866 }, { "epoch": 0.5265307038208701, "grad_norm": 0.48673406014078824, "learning_rate": 9.827312746514337e-06, "loss": 0.4698, "step": 5867 }, { "epoch": 0.526620448273541, "grad_norm": 0.49495492933923046, "learning_rate": 9.827176677173425e-06, "loss": 0.4679, "step": 5868 }, { "epoch": 0.5267101927262121, "grad_norm": 0.5412882043376793, "learning_rate": 9.827040555188298e-06, "loss": 0.5008, "step": 5869 }, { "epoch": 0.5267999371788832, "grad_norm": 0.512777737768873, "learning_rate": 9.826904380560441e-06, "loss": 0.4912, "step": 5870 }, { "epoch": 0.5268896816315541, "grad_norm": 0.508927288053962, "learning_rate": 9.82676815329134e-06, "loss": 0.5079, "step": 5871 }, { "epoch": 0.5269794260842252, "grad_norm": 0.5034623586053701, "learning_rate": 9.826631873382477e-06, "loss": 0.4369, "step": 5872 }, { "epoch": 0.5270691705368962, "grad_norm": 0.5159530860859443, "learning_rate": 9.826495540835342e-06, "loss": 0.482, "step": 5873 }, { "epoch": 0.5271589149895672, "grad_norm": 0.4895359343060671, "learning_rate": 9.826359155651422e-06, "loss": 0.4723, "step": 5874 }, { "epoch": 0.5272486594422382, "grad_norm": 0.5082933860306866, "learning_rate": 9.8262227178322e-06, "loss": 0.4483, "step": 5875 }, { "epoch": 0.5273384038949093, "grad_norm": 0.4978954804554653, "learning_rate": 9.82608622737917e-06, "loss": 0.4501, "step": 5876 }, { "epoch": 0.5274281483475802, "grad_norm": 0.474984440107278, "learning_rate": 9.825949684293816e-06, "loss": 0.4416, "step": 5877 }, { "epoch": 0.5275178928002513, "grad_norm": 0.43916488018178274, "learning_rate": 9.825813088577627e-06, "loss": 0.4314, "step": 5878 }, { "epoch": 0.5276076372529223, "grad_norm": 0.49871329991693114, "learning_rate": 9.825676440232097e-06, "loss": 0.4259, "step": 5879 }, { "epoch": 0.5276973817055933, "grad_norm": 0.5320563492651189, "learning_rate": 9.825539739258712e-06, "loss": 0.513, "step": 5880 }, { "epoch": 0.5277871261582644, "grad_norm": 0.5295416694262551, "learning_rate": 9.825402985658965e-06, "loss": 0.4267, "step": 5881 }, { "epoch": 0.5278768706109354, "grad_norm": 0.5335797339053625, "learning_rate": 9.825266179434347e-06, "loss": 0.5134, "step": 5882 }, { "epoch": 0.5279666150636064, "grad_norm": 0.474898279394917, "learning_rate": 9.825129320586349e-06, "loss": 0.5022, "step": 5883 }, { "epoch": 0.5280563595162774, "grad_norm": 0.498410096988233, "learning_rate": 9.824992409116464e-06, "loss": 0.4407, "step": 5884 }, { "epoch": 0.5281461039689485, "grad_norm": 0.5499125472380856, "learning_rate": 9.824855445026188e-06, "loss": 0.5694, "step": 5885 }, { "epoch": 0.5282358484216194, "grad_norm": 0.45757666958126136, "learning_rate": 9.824718428317008e-06, "loss": 0.4502, "step": 5886 }, { "epoch": 0.5283255928742905, "grad_norm": 0.4640966099672753, "learning_rate": 9.824581358990426e-06, "loss": 0.4518, "step": 5887 }, { "epoch": 0.5284153373269614, "grad_norm": 0.4701361512320335, "learning_rate": 9.824444237047933e-06, "loss": 0.4424, "step": 5888 }, { "epoch": 0.5285050817796325, "grad_norm": 0.4595668787469851, "learning_rate": 9.824307062491022e-06, "loss": 0.4603, "step": 5889 }, { "epoch": 0.5285948262323035, "grad_norm": 0.43045745463839147, "learning_rate": 9.824169835321194e-06, "loss": 0.4069, "step": 5890 }, { "epoch": 0.5286845706849745, "grad_norm": 0.45491621084383677, "learning_rate": 9.824032555539942e-06, "loss": 0.4318, "step": 5891 }, { "epoch": 0.5287743151376455, "grad_norm": 0.5156537501420262, "learning_rate": 9.823895223148765e-06, "loss": 0.5196, "step": 5892 }, { "epoch": 0.5288640595903166, "grad_norm": 0.5039454498976218, "learning_rate": 9.823757838149159e-06, "loss": 0.4753, "step": 5893 }, { "epoch": 0.5289538040429876, "grad_norm": 0.5244352370399628, "learning_rate": 9.823620400542624e-06, "loss": 0.5335, "step": 5894 }, { "epoch": 0.5290435484956586, "grad_norm": 0.46973612984251617, "learning_rate": 9.82348291033066e-06, "loss": 0.4582, "step": 5895 }, { "epoch": 0.5291332929483297, "grad_norm": 0.48148076160832115, "learning_rate": 9.823345367514763e-06, "loss": 0.4792, "step": 5896 }, { "epoch": 0.5292230374010006, "grad_norm": 0.4776690030202428, "learning_rate": 9.823207772096434e-06, "loss": 0.4465, "step": 5897 }, { "epoch": 0.5293127818536717, "grad_norm": 0.4727358741240318, "learning_rate": 9.823070124077176e-06, "loss": 0.448, "step": 5898 }, { "epoch": 0.5294025263063427, "grad_norm": 0.49769671518655606, "learning_rate": 9.822932423458489e-06, "loss": 0.4593, "step": 5899 }, { "epoch": 0.5294922707590137, "grad_norm": 0.49969838080866674, "learning_rate": 9.822794670241873e-06, "loss": 0.5126, "step": 5900 }, { "epoch": 0.5295820152116847, "grad_norm": 0.4600815361059747, "learning_rate": 9.822656864428832e-06, "loss": 0.4432, "step": 5901 }, { "epoch": 0.5296717596643558, "grad_norm": 0.5302276813903098, "learning_rate": 9.822519006020868e-06, "loss": 0.4888, "step": 5902 }, { "epoch": 0.5297615041170267, "grad_norm": 0.5554274990378933, "learning_rate": 9.822381095019484e-06, "loss": 0.4738, "step": 5903 }, { "epoch": 0.5298512485696978, "grad_norm": 0.5146083172245144, "learning_rate": 9.822243131426186e-06, "loss": 0.523, "step": 5904 }, { "epoch": 0.5299409930223689, "grad_norm": 0.4738069491259111, "learning_rate": 9.822105115242477e-06, "loss": 0.4909, "step": 5905 }, { "epoch": 0.5300307374750398, "grad_norm": 0.5059743975433976, "learning_rate": 9.821967046469864e-06, "loss": 0.4814, "step": 5906 }, { "epoch": 0.5301204819277109, "grad_norm": 0.5188361446227923, "learning_rate": 9.82182892510985e-06, "loss": 0.4948, "step": 5907 }, { "epoch": 0.5302102263803818, "grad_norm": 0.49890581050315075, "learning_rate": 9.821690751163944e-06, "loss": 0.4792, "step": 5908 }, { "epoch": 0.5302999708330529, "grad_norm": 0.5278070663659332, "learning_rate": 9.82155252463365e-06, "loss": 0.5193, "step": 5909 }, { "epoch": 0.5303897152857239, "grad_norm": 0.48270725454529534, "learning_rate": 9.82141424552048e-06, "loss": 0.4361, "step": 5910 }, { "epoch": 0.5304794597383949, "grad_norm": 0.5114492205745405, "learning_rate": 9.821275913825936e-06, "loss": 0.4608, "step": 5911 }, { "epoch": 0.5305692041910659, "grad_norm": 0.4430061570258965, "learning_rate": 9.82113752955153e-06, "loss": 0.427, "step": 5912 }, { "epoch": 0.530658948643737, "grad_norm": 0.45453783542579423, "learning_rate": 9.820999092698773e-06, "loss": 0.4329, "step": 5913 }, { "epoch": 0.5307486930964079, "grad_norm": 0.47414404086963424, "learning_rate": 9.82086060326917e-06, "loss": 0.426, "step": 5914 }, { "epoch": 0.530838437549079, "grad_norm": 0.48416520679305763, "learning_rate": 9.820722061264235e-06, "loss": 0.4161, "step": 5915 }, { "epoch": 0.5309281820017501, "grad_norm": 0.4810206102604501, "learning_rate": 9.82058346668548e-06, "loss": 0.4674, "step": 5916 }, { "epoch": 0.531017926454421, "grad_norm": 0.44061940941503663, "learning_rate": 9.820444819534414e-06, "loss": 0.3965, "step": 5917 }, { "epoch": 0.5311076709070921, "grad_norm": 0.46476381203479694, "learning_rate": 9.820306119812549e-06, "loss": 0.4673, "step": 5918 }, { "epoch": 0.5311974153597631, "grad_norm": 0.5259432848470141, "learning_rate": 9.820167367521397e-06, "loss": 0.5723, "step": 5919 }, { "epoch": 0.5312871598124341, "grad_norm": 0.47673631379065673, "learning_rate": 9.820028562662473e-06, "loss": 0.4347, "step": 5920 }, { "epoch": 0.5313769042651051, "grad_norm": 0.5115611358025751, "learning_rate": 9.819889705237291e-06, "loss": 0.5034, "step": 5921 }, { "epoch": 0.5314666487177762, "grad_norm": 0.4784740842124868, "learning_rate": 9.819750795247364e-06, "loss": 0.4462, "step": 5922 }, { "epoch": 0.5315563931704471, "grad_norm": 0.5010139369103469, "learning_rate": 9.819611832694208e-06, "loss": 0.4873, "step": 5923 }, { "epoch": 0.5316461376231182, "grad_norm": 0.5104278696780928, "learning_rate": 9.819472817579336e-06, "loss": 0.4777, "step": 5924 }, { "epoch": 0.5317358820757891, "grad_norm": 0.44949572778029684, "learning_rate": 9.819333749904267e-06, "loss": 0.4281, "step": 5925 }, { "epoch": 0.5318256265284602, "grad_norm": 0.5344715826088057, "learning_rate": 9.819194629670516e-06, "loss": 0.5111, "step": 5926 }, { "epoch": 0.5319153709811312, "grad_norm": 0.4694285499403494, "learning_rate": 9.819055456879603e-06, "loss": 0.4103, "step": 5927 }, { "epoch": 0.5320051154338022, "grad_norm": 0.4382065517166419, "learning_rate": 9.818916231533041e-06, "loss": 0.3971, "step": 5928 }, { "epoch": 0.5320948598864733, "grad_norm": 0.5051753509054244, "learning_rate": 9.818776953632355e-06, "loss": 0.4874, "step": 5929 }, { "epoch": 0.5321846043391443, "grad_norm": 0.4908761144008253, "learning_rate": 9.818637623179055e-06, "loss": 0.4837, "step": 5930 }, { "epoch": 0.5322743487918153, "grad_norm": 0.5211827732800326, "learning_rate": 9.818498240174668e-06, "loss": 0.4975, "step": 5931 }, { "epoch": 0.5323640932444863, "grad_norm": 0.5060230827936298, "learning_rate": 9.81835880462071e-06, "loss": 0.4711, "step": 5932 }, { "epoch": 0.5324538376971574, "grad_norm": 0.4426698176923761, "learning_rate": 9.818219316518705e-06, "loss": 0.3739, "step": 5933 }, { "epoch": 0.5325435821498283, "grad_norm": 0.5090849498918775, "learning_rate": 9.818079775870173e-06, "loss": 0.516, "step": 5934 }, { "epoch": 0.5326333266024994, "grad_norm": 0.5516198113646426, "learning_rate": 9.817940182676634e-06, "loss": 0.5784, "step": 5935 }, { "epoch": 0.5327230710551704, "grad_norm": 0.47890978718574084, "learning_rate": 9.81780053693961e-06, "loss": 0.4561, "step": 5936 }, { "epoch": 0.5328128155078414, "grad_norm": 0.5379166613159502, "learning_rate": 9.817660838660628e-06, "loss": 0.5213, "step": 5937 }, { "epoch": 0.5329025599605124, "grad_norm": 0.5159336495890481, "learning_rate": 9.817521087841209e-06, "loss": 0.4877, "step": 5938 }, { "epoch": 0.5329923044131835, "grad_norm": 0.5468195259841925, "learning_rate": 9.817381284482875e-06, "loss": 0.5698, "step": 5939 }, { "epoch": 0.5330820488658545, "grad_norm": 0.5079377613790017, "learning_rate": 9.817241428587154e-06, "loss": 0.4463, "step": 5940 }, { "epoch": 0.5331717933185255, "grad_norm": 0.49468056069674693, "learning_rate": 9.817101520155571e-06, "loss": 0.4587, "step": 5941 }, { "epoch": 0.5332615377711966, "grad_norm": 0.5106823066124383, "learning_rate": 9.81696155918965e-06, "loss": 0.4992, "step": 5942 }, { "epoch": 0.5333512822238675, "grad_norm": 0.4633731179230596, "learning_rate": 9.816821545690918e-06, "loss": 0.4317, "step": 5943 }, { "epoch": 0.5334410266765386, "grad_norm": 0.4797805857870082, "learning_rate": 9.816681479660904e-06, "loss": 0.4652, "step": 5944 }, { "epoch": 0.5335307711292095, "grad_norm": 0.48784860682507003, "learning_rate": 9.81654136110113e-06, "loss": 0.4664, "step": 5945 }, { "epoch": 0.5336205155818806, "grad_norm": 0.4833280935573299, "learning_rate": 9.816401190013132e-06, "loss": 0.4852, "step": 5946 }, { "epoch": 0.5337102600345516, "grad_norm": 0.46831844610926027, "learning_rate": 9.816260966398433e-06, "loss": 0.4403, "step": 5947 }, { "epoch": 0.5338000044872226, "grad_norm": 0.5277212894895438, "learning_rate": 9.816120690258562e-06, "loss": 0.5005, "step": 5948 }, { "epoch": 0.5338897489398936, "grad_norm": 0.4910554279506833, "learning_rate": 9.815980361595052e-06, "loss": 0.4783, "step": 5949 }, { "epoch": 0.5339794933925647, "grad_norm": 0.4694907902856656, "learning_rate": 9.815839980409434e-06, "loss": 0.4258, "step": 5950 }, { "epoch": 0.5340692378452357, "grad_norm": 0.5178010128615491, "learning_rate": 9.815699546703235e-06, "loss": 0.4744, "step": 5951 }, { "epoch": 0.5341589822979067, "grad_norm": 0.5125603997855079, "learning_rate": 9.815559060477989e-06, "loss": 0.5093, "step": 5952 }, { "epoch": 0.5342487267505778, "grad_norm": 0.47349560215263997, "learning_rate": 9.815418521735226e-06, "loss": 0.4755, "step": 5953 }, { "epoch": 0.5343384712032487, "grad_norm": 0.46909678487407747, "learning_rate": 9.815277930476482e-06, "loss": 0.4373, "step": 5954 }, { "epoch": 0.5344282156559198, "grad_norm": 0.46319164222019976, "learning_rate": 9.815137286703287e-06, "loss": 0.4381, "step": 5955 }, { "epoch": 0.5345179601085908, "grad_norm": 0.5102817404354246, "learning_rate": 9.814996590417178e-06, "loss": 0.5018, "step": 5956 }, { "epoch": 0.5346077045612618, "grad_norm": 0.46601558982671387, "learning_rate": 9.814855841619687e-06, "loss": 0.4348, "step": 5957 }, { "epoch": 0.5346974490139328, "grad_norm": 0.5439822692553605, "learning_rate": 9.81471504031235e-06, "loss": 0.5318, "step": 5958 }, { "epoch": 0.5347871934666039, "grad_norm": 0.48164333204184184, "learning_rate": 9.814574186496704e-06, "loss": 0.4777, "step": 5959 }, { "epoch": 0.5348769379192748, "grad_norm": 0.4576300548110017, "learning_rate": 9.81443328017428e-06, "loss": 0.4379, "step": 5960 }, { "epoch": 0.5349666823719459, "grad_norm": 0.4752857410161375, "learning_rate": 9.814292321346621e-06, "loss": 0.4626, "step": 5961 }, { "epoch": 0.5350564268246168, "grad_norm": 0.46946278720626883, "learning_rate": 9.81415131001526e-06, "loss": 0.4282, "step": 5962 }, { "epoch": 0.5351461712772879, "grad_norm": 0.4345226538602921, "learning_rate": 9.814010246181737e-06, "loss": 0.4118, "step": 5963 }, { "epoch": 0.535235915729959, "grad_norm": 0.4614652758318292, "learning_rate": 9.813869129847589e-06, "loss": 0.4609, "step": 5964 }, { "epoch": 0.53532566018263, "grad_norm": 0.49146850271172543, "learning_rate": 9.813727961014357e-06, "loss": 0.4615, "step": 5965 }, { "epoch": 0.535415404635301, "grad_norm": 0.5015157986758941, "learning_rate": 9.813586739683578e-06, "loss": 0.4869, "step": 5966 }, { "epoch": 0.535505149087972, "grad_norm": 0.5410293409184654, "learning_rate": 9.813445465856794e-06, "loss": 0.4983, "step": 5967 }, { "epoch": 0.535594893540643, "grad_norm": 0.5345069258022181, "learning_rate": 9.813304139535545e-06, "loss": 0.4854, "step": 5968 }, { "epoch": 0.535684637993314, "grad_norm": 0.5077785412470249, "learning_rate": 9.813162760721371e-06, "loss": 0.506, "step": 5969 }, { "epoch": 0.5357743824459851, "grad_norm": 0.4895366625831867, "learning_rate": 9.813021329415817e-06, "loss": 0.5068, "step": 5970 }, { "epoch": 0.535864126898656, "grad_norm": 0.4893289228337739, "learning_rate": 9.812879845620423e-06, "loss": 0.4576, "step": 5971 }, { "epoch": 0.5359538713513271, "grad_norm": 0.42625201778466215, "learning_rate": 9.812738309336733e-06, "loss": 0.3887, "step": 5972 }, { "epoch": 0.5360436158039981, "grad_norm": 0.472641426691406, "learning_rate": 9.81259672056629e-06, "loss": 0.4681, "step": 5973 }, { "epoch": 0.5361333602566691, "grad_norm": 0.5062020284716712, "learning_rate": 9.812455079310637e-06, "loss": 0.5045, "step": 5974 }, { "epoch": 0.5362231047093402, "grad_norm": 0.47603426291699813, "learning_rate": 9.812313385571322e-06, "loss": 0.436, "step": 5975 }, { "epoch": 0.5363128491620112, "grad_norm": 0.4924543535808795, "learning_rate": 9.812171639349887e-06, "loss": 0.4872, "step": 5976 }, { "epoch": 0.5364025936146822, "grad_norm": 0.4775217450398222, "learning_rate": 9.81202984064788e-06, "loss": 0.4309, "step": 5977 }, { "epoch": 0.5364923380673532, "grad_norm": 0.4501087968783218, "learning_rate": 9.811887989466846e-06, "loss": 0.4304, "step": 5978 }, { "epoch": 0.5365820825200243, "grad_norm": 0.47340901765687476, "learning_rate": 9.811746085808333e-06, "loss": 0.4491, "step": 5979 }, { "epoch": 0.5366718269726952, "grad_norm": 0.5037903367505308, "learning_rate": 9.811604129673887e-06, "loss": 0.5075, "step": 5980 }, { "epoch": 0.5367615714253663, "grad_norm": 0.5057339343890163, "learning_rate": 9.811462121065059e-06, "loss": 0.475, "step": 5981 }, { "epoch": 0.5368513158780372, "grad_norm": 0.44514096931797087, "learning_rate": 9.811320059983394e-06, "loss": 0.4194, "step": 5982 }, { "epoch": 0.5369410603307083, "grad_norm": 0.5009002525202805, "learning_rate": 9.811177946430446e-06, "loss": 0.5159, "step": 5983 }, { "epoch": 0.5370308047833793, "grad_norm": 0.4704420567372775, "learning_rate": 9.81103578040776e-06, "loss": 0.4296, "step": 5984 }, { "epoch": 0.5371205492360503, "grad_norm": 0.47777518942219305, "learning_rate": 9.81089356191689e-06, "loss": 0.4558, "step": 5985 }, { "epoch": 0.5372102936887214, "grad_norm": 0.4664623403466684, "learning_rate": 9.810751290959384e-06, "loss": 0.4325, "step": 5986 }, { "epoch": 0.5373000381413924, "grad_norm": 0.5131298219517171, "learning_rate": 9.810608967536797e-06, "loss": 0.4778, "step": 5987 }, { "epoch": 0.5373897825940634, "grad_norm": 0.4799921533537915, "learning_rate": 9.810466591650678e-06, "loss": 0.4572, "step": 5988 }, { "epoch": 0.5374795270467344, "grad_norm": 0.47913378920524097, "learning_rate": 9.810324163302581e-06, "loss": 0.4739, "step": 5989 }, { "epoch": 0.5375692714994055, "grad_norm": 0.5024880641066624, "learning_rate": 9.81018168249406e-06, "loss": 0.5109, "step": 5990 }, { "epoch": 0.5376590159520764, "grad_norm": 0.534748796051517, "learning_rate": 9.810039149226669e-06, "loss": 0.5198, "step": 5991 }, { "epoch": 0.5377487604047475, "grad_norm": 0.4383738484098494, "learning_rate": 9.80989656350196e-06, "loss": 0.3987, "step": 5992 }, { "epoch": 0.5378385048574185, "grad_norm": 0.4748851097151913, "learning_rate": 9.80975392532149e-06, "loss": 0.4606, "step": 5993 }, { "epoch": 0.5379282493100895, "grad_norm": 0.4562423086406922, "learning_rate": 9.809611234686816e-06, "loss": 0.3995, "step": 5994 }, { "epoch": 0.5380179937627605, "grad_norm": 0.4628421962759714, "learning_rate": 9.80946849159949e-06, "loss": 0.4498, "step": 5995 }, { "epoch": 0.5381077382154316, "grad_norm": 0.5315841925963762, "learning_rate": 9.809325696061072e-06, "loss": 0.5134, "step": 5996 }, { "epoch": 0.5381974826681025, "grad_norm": 0.490176224767717, "learning_rate": 9.80918284807312e-06, "loss": 0.504, "step": 5997 }, { "epoch": 0.5382872271207736, "grad_norm": 0.4766321354315386, "learning_rate": 9.809039947637189e-06, "loss": 0.4855, "step": 5998 }, { "epoch": 0.5383769715734447, "grad_norm": 0.4887369499692144, "learning_rate": 9.808896994754837e-06, "loss": 0.4569, "step": 5999 }, { "epoch": 0.5384667160261156, "grad_norm": 0.5614798802121136, "learning_rate": 9.808753989427627e-06, "loss": 0.5358, "step": 6000 }, { "epoch": 0.5385564604787867, "grad_norm": 0.4804621092733034, "learning_rate": 9.808610931657116e-06, "loss": 0.4392, "step": 6001 }, { "epoch": 0.5386462049314577, "grad_norm": 0.5336248332346055, "learning_rate": 9.808467821444865e-06, "loss": 0.5341, "step": 6002 }, { "epoch": 0.5387359493841287, "grad_norm": 0.46983567385688135, "learning_rate": 9.808324658792434e-06, "loss": 0.4212, "step": 6003 }, { "epoch": 0.5388256938367997, "grad_norm": 0.4710974276389621, "learning_rate": 9.808181443701384e-06, "loss": 0.4262, "step": 6004 }, { "epoch": 0.5389154382894707, "grad_norm": 0.5076495332197672, "learning_rate": 9.808038176173279e-06, "loss": 0.5151, "step": 6005 }, { "epoch": 0.5390051827421417, "grad_norm": 0.46714105159269687, "learning_rate": 9.807894856209677e-06, "loss": 0.4511, "step": 6006 }, { "epoch": 0.5390949271948128, "grad_norm": 0.502750977711236, "learning_rate": 9.807751483812145e-06, "loss": 0.5105, "step": 6007 }, { "epoch": 0.5391846716474837, "grad_norm": 0.5018395595328347, "learning_rate": 9.807608058982246e-06, "loss": 0.5115, "step": 6008 }, { "epoch": 0.5392744161001548, "grad_norm": 0.5395087966562674, "learning_rate": 9.807464581721544e-06, "loss": 0.4851, "step": 6009 }, { "epoch": 0.5393641605528259, "grad_norm": 0.49142460068088606, "learning_rate": 9.807321052031602e-06, "loss": 0.4368, "step": 6010 }, { "epoch": 0.5394539050054968, "grad_norm": 0.5361146530795278, "learning_rate": 9.80717746991399e-06, "loss": 0.5056, "step": 6011 }, { "epoch": 0.5395436494581679, "grad_norm": 0.46389229969468276, "learning_rate": 9.807033835370267e-06, "loss": 0.4452, "step": 6012 }, { "epoch": 0.5396333939108389, "grad_norm": 0.4805759631715054, "learning_rate": 9.806890148402004e-06, "loss": 0.4721, "step": 6013 }, { "epoch": 0.5397231383635099, "grad_norm": 0.4823123660762233, "learning_rate": 9.806746409010768e-06, "loss": 0.4961, "step": 6014 }, { "epoch": 0.5398128828161809, "grad_norm": 0.5224835957945733, "learning_rate": 9.806602617198125e-06, "loss": 0.5057, "step": 6015 }, { "epoch": 0.539902627268852, "grad_norm": 0.46803886677240514, "learning_rate": 9.806458772965644e-06, "loss": 0.4195, "step": 6016 }, { "epoch": 0.5399923717215229, "grad_norm": 0.4556730604844212, "learning_rate": 9.806314876314891e-06, "loss": 0.4595, "step": 6017 }, { "epoch": 0.540082116174194, "grad_norm": 0.49607149337877254, "learning_rate": 9.80617092724744e-06, "loss": 0.5005, "step": 6018 }, { "epoch": 0.540171860626865, "grad_norm": 0.5565411511105701, "learning_rate": 9.80602692576486e-06, "loss": 0.4949, "step": 6019 }, { "epoch": 0.540261605079536, "grad_norm": 0.4737262875252376, "learning_rate": 9.80588287186872e-06, "loss": 0.4781, "step": 6020 }, { "epoch": 0.5403513495322071, "grad_norm": 0.5328964547763524, "learning_rate": 9.80573876556059e-06, "loss": 0.5212, "step": 6021 }, { "epoch": 0.540441093984878, "grad_norm": 0.49815314318396076, "learning_rate": 9.805594606842042e-06, "loss": 0.4915, "step": 6022 }, { "epoch": 0.5405308384375491, "grad_norm": 0.5166749649645254, "learning_rate": 9.80545039571465e-06, "loss": 0.5232, "step": 6023 }, { "epoch": 0.5406205828902201, "grad_norm": 0.4871785308557758, "learning_rate": 9.805306132179985e-06, "loss": 0.4871, "step": 6024 }, { "epoch": 0.5407103273428912, "grad_norm": 0.5130187582481898, "learning_rate": 9.80516181623962e-06, "loss": 0.5408, "step": 6025 }, { "epoch": 0.5408000717955621, "grad_norm": 0.47441844146669176, "learning_rate": 9.805017447895134e-06, "loss": 0.4172, "step": 6026 }, { "epoch": 0.5408898162482332, "grad_norm": 0.47541504536505774, "learning_rate": 9.804873027148095e-06, "loss": 0.4756, "step": 6027 }, { "epoch": 0.5409795607009041, "grad_norm": 0.4771292349359014, "learning_rate": 9.804728554000077e-06, "loss": 0.4527, "step": 6028 }, { "epoch": 0.5410693051535752, "grad_norm": 0.471185564470369, "learning_rate": 9.804584028452664e-06, "loss": 0.4156, "step": 6029 }, { "epoch": 0.5411590496062462, "grad_norm": 0.4892597300040764, "learning_rate": 9.804439450507425e-06, "loss": 0.501, "step": 6030 }, { "epoch": 0.5412487940589172, "grad_norm": 0.5116394792312876, "learning_rate": 9.804294820165939e-06, "loss": 0.4592, "step": 6031 }, { "epoch": 0.5413385385115883, "grad_norm": 0.5264021037559229, "learning_rate": 9.804150137429783e-06, "loss": 0.5256, "step": 6032 }, { "epoch": 0.5414282829642593, "grad_norm": 0.5367506407936418, "learning_rate": 9.804005402300534e-06, "loss": 0.5502, "step": 6033 }, { "epoch": 0.5415180274169303, "grad_norm": 0.5477458626533275, "learning_rate": 9.803860614779774e-06, "loss": 0.5232, "step": 6034 }, { "epoch": 0.5416077718696013, "grad_norm": 0.4575966358341873, "learning_rate": 9.803715774869077e-06, "loss": 0.4585, "step": 6035 }, { "epoch": 0.5416975163222724, "grad_norm": 0.48860457328721785, "learning_rate": 9.803570882570027e-06, "loss": 0.516, "step": 6036 }, { "epoch": 0.5417872607749433, "grad_norm": 0.5029809386970324, "learning_rate": 9.803425937884202e-06, "loss": 0.492, "step": 6037 }, { "epoch": 0.5418770052276144, "grad_norm": 0.5070824854282623, "learning_rate": 9.803280940813183e-06, "loss": 0.48, "step": 6038 }, { "epoch": 0.5419667496802854, "grad_norm": 0.45649345995638485, "learning_rate": 9.80313589135855e-06, "loss": 0.4123, "step": 6039 }, { "epoch": 0.5420564941329564, "grad_norm": 0.49176080096504676, "learning_rate": 9.802990789521889e-06, "loss": 0.4655, "step": 6040 }, { "epoch": 0.5421462385856274, "grad_norm": 0.4788749740024046, "learning_rate": 9.802845635304777e-06, "loss": 0.4572, "step": 6041 }, { "epoch": 0.5422359830382985, "grad_norm": 0.45693698344975353, "learning_rate": 9.8027004287088e-06, "loss": 0.4547, "step": 6042 }, { "epoch": 0.5423257274909694, "grad_norm": 0.43747988637152013, "learning_rate": 9.802555169735542e-06, "loss": 0.3814, "step": 6043 }, { "epoch": 0.5424154719436405, "grad_norm": 0.4990975624341235, "learning_rate": 9.802409858386586e-06, "loss": 0.4843, "step": 6044 }, { "epoch": 0.5425052163963116, "grad_norm": 0.4903577953892542, "learning_rate": 9.802264494663517e-06, "loss": 0.4477, "step": 6045 }, { "epoch": 0.5425949608489825, "grad_norm": 0.5429975117395064, "learning_rate": 9.802119078567921e-06, "loss": 0.496, "step": 6046 }, { "epoch": 0.5426847053016536, "grad_norm": 0.4929317510482595, "learning_rate": 9.801973610101382e-06, "loss": 0.4806, "step": 6047 }, { "epoch": 0.5427744497543245, "grad_norm": 0.5858783800335187, "learning_rate": 9.80182808926549e-06, "loss": 0.589, "step": 6048 }, { "epoch": 0.5428641942069956, "grad_norm": 0.5165743615443358, "learning_rate": 9.801682516061829e-06, "loss": 0.4967, "step": 6049 }, { "epoch": 0.5429539386596666, "grad_norm": 0.46619421710315323, "learning_rate": 9.801536890491988e-06, "loss": 0.4567, "step": 6050 }, { "epoch": 0.5430436831123376, "grad_norm": 0.49617631807082646, "learning_rate": 9.801391212557552e-06, "loss": 0.4958, "step": 6051 }, { "epoch": 0.5431334275650086, "grad_norm": 0.46787291084048727, "learning_rate": 9.801245482260116e-06, "loss": 0.4321, "step": 6052 }, { "epoch": 0.5432231720176797, "grad_norm": 0.4636238918345011, "learning_rate": 9.801099699601263e-06, "loss": 0.4314, "step": 6053 }, { "epoch": 0.5433129164703506, "grad_norm": 0.48981497224224957, "learning_rate": 9.800953864582587e-06, "loss": 0.4866, "step": 6054 }, { "epoch": 0.5434026609230217, "grad_norm": 0.4920651667941584, "learning_rate": 9.800807977205676e-06, "loss": 0.4565, "step": 6055 }, { "epoch": 0.5434924053756928, "grad_norm": 0.4947610582228058, "learning_rate": 9.800662037472121e-06, "loss": 0.4793, "step": 6056 }, { "epoch": 0.5435821498283637, "grad_norm": 0.5095655179341658, "learning_rate": 9.800516045383516e-06, "loss": 0.5478, "step": 6057 }, { "epoch": 0.5436718942810348, "grad_norm": 0.4893846542962237, "learning_rate": 9.800370000941453e-06, "loss": 0.499, "step": 6058 }, { "epoch": 0.5437616387337058, "grad_norm": 0.469114343801136, "learning_rate": 9.800223904147521e-06, "loss": 0.4392, "step": 6059 }, { "epoch": 0.5438513831863768, "grad_norm": 0.5125648607159898, "learning_rate": 9.800077755003317e-06, "loss": 0.4694, "step": 6060 }, { "epoch": 0.5439411276390478, "grad_norm": 0.47933303406585415, "learning_rate": 9.799931553510433e-06, "loss": 0.4458, "step": 6061 }, { "epoch": 0.5440308720917189, "grad_norm": 0.505278472400564, "learning_rate": 9.799785299670463e-06, "loss": 0.4891, "step": 6062 }, { "epoch": 0.5441206165443898, "grad_norm": 0.4961104610512181, "learning_rate": 9.799638993485006e-06, "loss": 0.4847, "step": 6063 }, { "epoch": 0.5442103609970609, "grad_norm": 0.4875968929736448, "learning_rate": 9.799492634955651e-06, "loss": 0.4573, "step": 6064 }, { "epoch": 0.5443001054497318, "grad_norm": 0.48767446823717997, "learning_rate": 9.799346224084001e-06, "loss": 0.4821, "step": 6065 }, { "epoch": 0.5443898499024029, "grad_norm": 0.5295371030846034, "learning_rate": 9.79919976087165e-06, "loss": 0.5729, "step": 6066 }, { "epoch": 0.544479594355074, "grad_norm": 0.4719143136186823, "learning_rate": 9.799053245320193e-06, "loss": 0.4649, "step": 6067 }, { "epoch": 0.5445693388077449, "grad_norm": 0.4973931614398306, "learning_rate": 9.798906677431229e-06, "loss": 0.4919, "step": 6068 }, { "epoch": 0.544659083260416, "grad_norm": 0.536865758633497, "learning_rate": 9.798760057206358e-06, "loss": 0.5666, "step": 6069 }, { "epoch": 0.544748827713087, "grad_norm": 0.4544527260499495, "learning_rate": 9.798613384647179e-06, "loss": 0.411, "step": 6070 }, { "epoch": 0.544838572165758, "grad_norm": 0.4826773174519909, "learning_rate": 9.79846665975529e-06, "loss": 0.4326, "step": 6071 }, { "epoch": 0.544928316618429, "grad_norm": 0.5061828196575965, "learning_rate": 9.798319882532292e-06, "loss": 0.4594, "step": 6072 }, { "epoch": 0.5450180610711001, "grad_norm": 0.5169710575301413, "learning_rate": 9.798173052979786e-06, "loss": 0.5214, "step": 6073 }, { "epoch": 0.545107805523771, "grad_norm": 0.509548851387495, "learning_rate": 9.798026171099372e-06, "loss": 0.522, "step": 6074 }, { "epoch": 0.5451975499764421, "grad_norm": 0.500981254581672, "learning_rate": 9.797879236892653e-06, "loss": 0.4685, "step": 6075 }, { "epoch": 0.5452872944291131, "grad_norm": 0.47158792389386617, "learning_rate": 9.797732250361232e-06, "loss": 0.3891, "step": 6076 }, { "epoch": 0.5453770388817841, "grad_norm": 0.4830444865659246, "learning_rate": 9.79758521150671e-06, "loss": 0.4955, "step": 6077 }, { "epoch": 0.5454667833344551, "grad_norm": 0.4915451223023701, "learning_rate": 9.797438120330694e-06, "loss": 0.489, "step": 6078 }, { "epoch": 0.5455565277871262, "grad_norm": 0.5112231088567616, "learning_rate": 9.797290976834783e-06, "loss": 0.526, "step": 6079 }, { "epoch": 0.5456462722397972, "grad_norm": 0.48713901762199013, "learning_rate": 9.797143781020587e-06, "loss": 0.4674, "step": 6080 }, { "epoch": 0.5457360166924682, "grad_norm": 0.46484497296085936, "learning_rate": 9.796996532889708e-06, "loss": 0.4387, "step": 6081 }, { "epoch": 0.5458257611451393, "grad_norm": 0.5558922691988739, "learning_rate": 9.796849232443754e-06, "loss": 0.5567, "step": 6082 }, { "epoch": 0.5459155055978102, "grad_norm": 0.4721580764772079, "learning_rate": 9.796701879684328e-06, "loss": 0.4088, "step": 6083 }, { "epoch": 0.5460052500504813, "grad_norm": 0.45690743562985453, "learning_rate": 9.796554474613041e-06, "loss": 0.4179, "step": 6084 }, { "epoch": 0.5460949945031522, "grad_norm": 0.5103752704295608, "learning_rate": 9.796407017231499e-06, "loss": 0.4517, "step": 6085 }, { "epoch": 0.5461847389558233, "grad_norm": 0.46799460915160596, "learning_rate": 9.796259507541308e-06, "loss": 0.4664, "step": 6086 }, { "epoch": 0.5462744834084943, "grad_norm": 0.502997232782326, "learning_rate": 9.79611194554408e-06, "loss": 0.5018, "step": 6087 }, { "epoch": 0.5463642278611653, "grad_norm": 0.4946150838480572, "learning_rate": 9.795964331241423e-06, "loss": 0.4442, "step": 6088 }, { "epoch": 0.5464539723138363, "grad_norm": 0.4928492834095212, "learning_rate": 9.795816664634947e-06, "loss": 0.4593, "step": 6089 }, { "epoch": 0.5465437167665074, "grad_norm": 0.4526904589174918, "learning_rate": 9.795668945726263e-06, "loss": 0.4029, "step": 6090 }, { "epoch": 0.5466334612191784, "grad_norm": 0.4686988259348465, "learning_rate": 9.795521174516977e-06, "loss": 0.4567, "step": 6091 }, { "epoch": 0.5467232056718494, "grad_norm": 0.5180579135629404, "learning_rate": 9.795373351008709e-06, "loss": 0.5283, "step": 6092 }, { "epoch": 0.5468129501245205, "grad_norm": 0.5096471103124931, "learning_rate": 9.795225475203065e-06, "loss": 0.5169, "step": 6093 }, { "epoch": 0.5469026945771914, "grad_norm": 0.5443998318358094, "learning_rate": 9.795077547101661e-06, "loss": 0.532, "step": 6094 }, { "epoch": 0.5469924390298625, "grad_norm": 0.49002398937717506, "learning_rate": 9.794929566706109e-06, "loss": 0.4955, "step": 6095 }, { "epoch": 0.5470821834825335, "grad_norm": 0.5047355294648107, "learning_rate": 9.794781534018021e-06, "loss": 0.4607, "step": 6096 }, { "epoch": 0.5471719279352045, "grad_norm": 0.47201111557678127, "learning_rate": 9.794633449039014e-06, "loss": 0.4431, "step": 6097 }, { "epoch": 0.5472616723878755, "grad_norm": 0.4684508448481699, "learning_rate": 9.794485311770703e-06, "loss": 0.4338, "step": 6098 }, { "epoch": 0.5473514168405466, "grad_norm": 0.4886949765313191, "learning_rate": 9.794337122214701e-06, "loss": 0.4538, "step": 6099 }, { "epoch": 0.5474411612932175, "grad_norm": 0.4718832312441389, "learning_rate": 9.794188880372626e-06, "loss": 0.501, "step": 6100 }, { "epoch": 0.5475309057458886, "grad_norm": 0.4300824269754358, "learning_rate": 9.794040586246096e-06, "loss": 0.3921, "step": 6101 }, { "epoch": 0.5476206501985597, "grad_norm": 0.4589325305500336, "learning_rate": 9.793892239836726e-06, "loss": 0.4451, "step": 6102 }, { "epoch": 0.5477103946512306, "grad_norm": 0.4601045554671519, "learning_rate": 9.793743841146134e-06, "loss": 0.4188, "step": 6103 }, { "epoch": 0.5478001391039017, "grad_norm": 0.44597751587674594, "learning_rate": 9.793595390175939e-06, "loss": 0.4157, "step": 6104 }, { "epoch": 0.5478898835565726, "grad_norm": 0.49383644416174743, "learning_rate": 9.793446886927761e-06, "loss": 0.4754, "step": 6105 }, { "epoch": 0.5479796280092437, "grad_norm": 0.46809161597338805, "learning_rate": 9.793298331403219e-06, "loss": 0.4471, "step": 6106 }, { "epoch": 0.5480693724619147, "grad_norm": 0.5317539134608847, "learning_rate": 9.793149723603931e-06, "loss": 0.5581, "step": 6107 }, { "epoch": 0.5481591169145857, "grad_norm": 0.5070377334273081, "learning_rate": 9.79300106353152e-06, "loss": 0.4893, "step": 6108 }, { "epoch": 0.5482488613672567, "grad_norm": 0.4695273896550934, "learning_rate": 9.792852351187608e-06, "loss": 0.4408, "step": 6109 }, { "epoch": 0.5483386058199278, "grad_norm": 0.4646618020321203, "learning_rate": 9.792703586573813e-06, "loss": 0.4057, "step": 6110 }, { "epoch": 0.5484283502725987, "grad_norm": 0.5054283650241005, "learning_rate": 9.79255476969176e-06, "loss": 0.4633, "step": 6111 }, { "epoch": 0.5485180947252698, "grad_norm": 0.4843224034487744, "learning_rate": 9.792405900543073e-06, "loss": 0.4659, "step": 6112 }, { "epoch": 0.5486078391779408, "grad_norm": 0.46766213949159186, "learning_rate": 9.792256979129373e-06, "loss": 0.4327, "step": 6113 }, { "epoch": 0.5486975836306118, "grad_norm": 0.5035535680660198, "learning_rate": 9.792108005452288e-06, "loss": 0.4532, "step": 6114 }, { "epoch": 0.5487873280832829, "grad_norm": 0.4784665038357347, "learning_rate": 9.791958979513437e-06, "loss": 0.3912, "step": 6115 }, { "epoch": 0.5488770725359539, "grad_norm": 0.49153171716933675, "learning_rate": 9.79180990131445e-06, "loss": 0.5286, "step": 6116 }, { "epoch": 0.5489668169886249, "grad_norm": 0.4738963800049182, "learning_rate": 9.791660770856952e-06, "loss": 0.4659, "step": 6117 }, { "epoch": 0.5490565614412959, "grad_norm": 0.5125680131537861, "learning_rate": 9.791511588142568e-06, "loss": 0.501, "step": 6118 }, { "epoch": 0.549146305893967, "grad_norm": 0.5237421245041401, "learning_rate": 9.791362353172924e-06, "loss": 0.5036, "step": 6119 }, { "epoch": 0.5492360503466379, "grad_norm": 0.4478436854055256, "learning_rate": 9.791213065949649e-06, "loss": 0.4076, "step": 6120 }, { "epoch": 0.549325794799309, "grad_norm": 0.48624777827046317, "learning_rate": 9.791063726474372e-06, "loss": 0.4772, "step": 6121 }, { "epoch": 0.54941553925198, "grad_norm": 0.47877338674572517, "learning_rate": 9.790914334748721e-06, "loss": 0.4373, "step": 6122 }, { "epoch": 0.549505283704651, "grad_norm": 0.5098296416932162, "learning_rate": 9.790764890774328e-06, "loss": 0.4841, "step": 6123 }, { "epoch": 0.549595028157322, "grad_norm": 0.486220130268207, "learning_rate": 9.790615394552816e-06, "loss": 0.4579, "step": 6124 }, { "epoch": 0.549684772609993, "grad_norm": 0.4495412301297397, "learning_rate": 9.790465846085819e-06, "loss": 0.433, "step": 6125 }, { "epoch": 0.5497745170626641, "grad_norm": 0.5196886111061356, "learning_rate": 9.79031624537497e-06, "loss": 0.527, "step": 6126 }, { "epoch": 0.5498642615153351, "grad_norm": 0.4866071238258848, "learning_rate": 9.790166592421898e-06, "loss": 0.4522, "step": 6127 }, { "epoch": 0.5499540059680061, "grad_norm": 0.4958347967603773, "learning_rate": 9.790016887228235e-06, "loss": 0.4272, "step": 6128 }, { "epoch": 0.5500437504206771, "grad_norm": 0.5935898697710699, "learning_rate": 9.789867129795614e-06, "loss": 0.5541, "step": 6129 }, { "epoch": 0.5501334948733482, "grad_norm": 0.4886867117303741, "learning_rate": 9.78971732012567e-06, "loss": 0.4716, "step": 6130 }, { "epoch": 0.5502232393260191, "grad_norm": 0.4781905833656077, "learning_rate": 9.789567458220037e-06, "loss": 0.4563, "step": 6131 }, { "epoch": 0.5503129837786902, "grad_norm": 0.4893657361636573, "learning_rate": 9.789417544080344e-06, "loss": 0.4248, "step": 6132 }, { "epoch": 0.5504027282313612, "grad_norm": 0.5014842143422246, "learning_rate": 9.789267577708233e-06, "loss": 0.4972, "step": 6133 }, { "epoch": 0.5504924726840322, "grad_norm": 0.45188012480680934, "learning_rate": 9.789117559105334e-06, "loss": 0.4264, "step": 6134 }, { "epoch": 0.5505822171367032, "grad_norm": 0.48333249936422706, "learning_rate": 9.788967488273286e-06, "loss": 0.4342, "step": 6135 }, { "epoch": 0.5506719615893743, "grad_norm": 0.45392102604425977, "learning_rate": 9.788817365213727e-06, "loss": 0.4237, "step": 6136 }, { "epoch": 0.5507617060420453, "grad_norm": 0.5022384149146815, "learning_rate": 9.78866718992829e-06, "loss": 0.494, "step": 6137 }, { "epoch": 0.5508514504947163, "grad_norm": 0.4727971683244558, "learning_rate": 9.788516962418615e-06, "loss": 0.4347, "step": 6138 }, { "epoch": 0.5509411949473874, "grad_norm": 0.5147202004867569, "learning_rate": 9.788366682686341e-06, "loss": 0.4826, "step": 6139 }, { "epoch": 0.5510309394000583, "grad_norm": 0.4999239613562984, "learning_rate": 9.788216350733105e-06, "loss": 0.4908, "step": 6140 }, { "epoch": 0.5511206838527294, "grad_norm": 0.5327619738577122, "learning_rate": 9.78806596656055e-06, "loss": 0.5497, "step": 6141 }, { "epoch": 0.5512104283054003, "grad_norm": 0.508178580037446, "learning_rate": 9.787915530170311e-06, "loss": 0.4907, "step": 6142 }, { "epoch": 0.5513001727580714, "grad_norm": 0.4530912027581269, "learning_rate": 9.787765041564033e-06, "loss": 0.443, "step": 6143 }, { "epoch": 0.5513899172107424, "grad_norm": 0.4900644817105893, "learning_rate": 9.787614500743355e-06, "loss": 0.4731, "step": 6144 }, { "epoch": 0.5514796616634134, "grad_norm": 0.46503265678098177, "learning_rate": 9.78746390770992e-06, "loss": 0.4201, "step": 6145 }, { "epoch": 0.5515694061160844, "grad_norm": 0.4744055402197967, "learning_rate": 9.787313262465369e-06, "loss": 0.4217, "step": 6146 }, { "epoch": 0.5516591505687555, "grad_norm": 0.5170592069910501, "learning_rate": 9.787162565011345e-06, "loss": 0.4796, "step": 6147 }, { "epoch": 0.5517488950214264, "grad_norm": 0.4825413835030096, "learning_rate": 9.787011815349495e-06, "loss": 0.4662, "step": 6148 }, { "epoch": 0.5518386394740975, "grad_norm": 0.535432219766825, "learning_rate": 9.786861013481457e-06, "loss": 0.5253, "step": 6149 }, { "epoch": 0.5519283839267686, "grad_norm": 0.4944295353900993, "learning_rate": 9.78671015940888e-06, "loss": 0.419, "step": 6150 }, { "epoch": 0.5520181283794395, "grad_norm": 0.5298269252578597, "learning_rate": 9.786559253133408e-06, "loss": 0.475, "step": 6151 }, { "epoch": 0.5521078728321106, "grad_norm": 0.46911863063700976, "learning_rate": 9.786408294656686e-06, "loss": 0.4257, "step": 6152 }, { "epoch": 0.5521976172847816, "grad_norm": 0.4900119158786874, "learning_rate": 9.786257283980361e-06, "loss": 0.4276, "step": 6153 }, { "epoch": 0.5522873617374526, "grad_norm": 0.48031234490342467, "learning_rate": 9.786106221106081e-06, "loss": 0.4674, "step": 6154 }, { "epoch": 0.5523771061901236, "grad_norm": 0.5333565986290726, "learning_rate": 9.785955106035492e-06, "loss": 0.4944, "step": 6155 }, { "epoch": 0.5524668506427947, "grad_norm": 0.4794319439342063, "learning_rate": 9.785803938770243e-06, "loss": 0.4547, "step": 6156 }, { "epoch": 0.5525565950954656, "grad_norm": 0.43931877395166674, "learning_rate": 9.78565271931198e-06, "loss": 0.4155, "step": 6157 }, { "epoch": 0.5526463395481367, "grad_norm": 0.4594208580212706, "learning_rate": 9.785501447662355e-06, "loss": 0.4327, "step": 6158 }, { "epoch": 0.5527360840008076, "grad_norm": 0.49124070784729956, "learning_rate": 9.785350123823016e-06, "loss": 0.4756, "step": 6159 }, { "epoch": 0.5528258284534787, "grad_norm": 0.46572833681327486, "learning_rate": 9.785198747795616e-06, "loss": 0.4283, "step": 6160 }, { "epoch": 0.5529155729061498, "grad_norm": 0.5805370985133431, "learning_rate": 9.785047319581805e-06, "loss": 0.5407, "step": 6161 }, { "epoch": 0.5530053173588207, "grad_norm": 0.5286218017302031, "learning_rate": 9.78489583918323e-06, "loss": 0.4751, "step": 6162 }, { "epoch": 0.5530950618114918, "grad_norm": 0.5197577090279318, "learning_rate": 9.78474430660155e-06, "loss": 0.5338, "step": 6163 }, { "epoch": 0.5531848062641628, "grad_norm": 0.4791766230061311, "learning_rate": 9.784592721838412e-06, "loss": 0.4508, "step": 6164 }, { "epoch": 0.5532745507168338, "grad_norm": 0.4755748855483321, "learning_rate": 9.78444108489547e-06, "loss": 0.4436, "step": 6165 }, { "epoch": 0.5533642951695048, "grad_norm": 0.499317399236912, "learning_rate": 9.784289395774381e-06, "loss": 0.431, "step": 6166 }, { "epoch": 0.5534540396221759, "grad_norm": 0.47736620443133226, "learning_rate": 9.784137654476798e-06, "loss": 0.4905, "step": 6167 }, { "epoch": 0.5535437840748468, "grad_norm": 0.47774265340862304, "learning_rate": 9.783985861004375e-06, "loss": 0.4565, "step": 6168 }, { "epoch": 0.5536335285275179, "grad_norm": 0.48812018806811697, "learning_rate": 9.783834015358766e-06, "loss": 0.4718, "step": 6169 }, { "epoch": 0.5537232729801889, "grad_norm": 0.5020778833219351, "learning_rate": 9.78368211754163e-06, "loss": 0.4347, "step": 6170 }, { "epoch": 0.5538130174328599, "grad_norm": 0.5119096325233821, "learning_rate": 9.78353016755462e-06, "loss": 0.46, "step": 6171 }, { "epoch": 0.553902761885531, "grad_norm": 0.5203743983542037, "learning_rate": 9.783378165399397e-06, "loss": 0.5245, "step": 6172 }, { "epoch": 0.553992506338202, "grad_norm": 0.4770817163203007, "learning_rate": 9.783226111077618e-06, "loss": 0.4628, "step": 6173 }, { "epoch": 0.554082250790873, "grad_norm": 0.5123376498157698, "learning_rate": 9.783074004590937e-06, "loss": 0.4592, "step": 6174 }, { "epoch": 0.554171995243544, "grad_norm": 0.4646170887931645, "learning_rate": 9.78292184594102e-06, "loss": 0.4564, "step": 6175 }, { "epoch": 0.5542617396962151, "grad_norm": 0.44984553607794436, "learning_rate": 9.782769635129521e-06, "loss": 0.418, "step": 6176 }, { "epoch": 0.554351484148886, "grad_norm": 0.49524165637958417, "learning_rate": 9.782617372158101e-06, "loss": 0.4616, "step": 6177 }, { "epoch": 0.5544412286015571, "grad_norm": 0.49887160088401866, "learning_rate": 9.78246505702842e-06, "loss": 0.5147, "step": 6178 }, { "epoch": 0.554530973054228, "grad_norm": 0.47936336641731664, "learning_rate": 9.782312689742142e-06, "loss": 0.4187, "step": 6179 }, { "epoch": 0.5546207175068991, "grad_norm": 0.5246413813174466, "learning_rate": 9.782160270300927e-06, "loss": 0.5253, "step": 6180 }, { "epoch": 0.5547104619595701, "grad_norm": 0.4917747850300104, "learning_rate": 9.782007798706434e-06, "loss": 0.4983, "step": 6181 }, { "epoch": 0.5548002064122411, "grad_norm": 0.5106142708596464, "learning_rate": 9.781855274960333e-06, "loss": 0.5124, "step": 6182 }, { "epoch": 0.5548899508649122, "grad_norm": 0.4795767845239876, "learning_rate": 9.781702699064281e-06, "loss": 0.4492, "step": 6183 }, { "epoch": 0.5549796953175832, "grad_norm": 0.49591211984565003, "learning_rate": 9.781550071019945e-06, "loss": 0.5088, "step": 6184 }, { "epoch": 0.5550694397702542, "grad_norm": 0.46973869405329854, "learning_rate": 9.781397390828989e-06, "loss": 0.4065, "step": 6185 }, { "epoch": 0.5551591842229252, "grad_norm": 0.4936856194170565, "learning_rate": 9.781244658493077e-06, "loss": 0.4969, "step": 6186 }, { "epoch": 0.5552489286755963, "grad_norm": 0.4914777105405008, "learning_rate": 9.781091874013875e-06, "loss": 0.4971, "step": 6187 }, { "epoch": 0.5553386731282672, "grad_norm": 0.5187466249194671, "learning_rate": 9.780939037393052e-06, "loss": 0.5475, "step": 6188 }, { "epoch": 0.5554284175809383, "grad_norm": 0.4736630755880173, "learning_rate": 9.780786148632271e-06, "loss": 0.4591, "step": 6189 }, { "epoch": 0.5555181620336093, "grad_norm": 0.4716568131023153, "learning_rate": 9.780633207733202e-06, "loss": 0.4612, "step": 6190 }, { "epoch": 0.5556079064862803, "grad_norm": 0.45577677459040156, "learning_rate": 9.78048021469751e-06, "loss": 0.4474, "step": 6191 }, { "epoch": 0.5556976509389513, "grad_norm": 0.5228069710702654, "learning_rate": 9.780327169526867e-06, "loss": 0.5423, "step": 6192 }, { "epoch": 0.5557873953916224, "grad_norm": 0.4606979077962528, "learning_rate": 9.78017407222294e-06, "loss": 0.4439, "step": 6193 }, { "epoch": 0.5558771398442933, "grad_norm": 0.4703926204056753, "learning_rate": 9.780020922787399e-06, "loss": 0.4704, "step": 6194 }, { "epoch": 0.5559668842969644, "grad_norm": 0.5161348666349446, "learning_rate": 9.779867721221916e-06, "loss": 0.478, "step": 6195 }, { "epoch": 0.5560566287496355, "grad_norm": 0.5452776692808272, "learning_rate": 9.779714467528159e-06, "loss": 0.4838, "step": 6196 }, { "epoch": 0.5561463732023064, "grad_norm": 0.4811718521051694, "learning_rate": 9.7795611617078e-06, "loss": 0.4765, "step": 6197 }, { "epoch": 0.5562361176549775, "grad_norm": 0.4785467886283873, "learning_rate": 9.779407803762512e-06, "loss": 0.4584, "step": 6198 }, { "epoch": 0.5563258621076485, "grad_norm": 0.46904140630036983, "learning_rate": 9.779254393693967e-06, "loss": 0.451, "step": 6199 }, { "epoch": 0.5564156065603195, "grad_norm": 0.47782823489940873, "learning_rate": 9.779100931503837e-06, "loss": 0.4302, "step": 6200 }, { "epoch": 0.5565053510129905, "grad_norm": 0.5050752628751227, "learning_rate": 9.778947417193799e-06, "loss": 0.4525, "step": 6201 }, { "epoch": 0.5565950954656615, "grad_norm": 0.4711456517064627, "learning_rate": 9.778793850765523e-06, "loss": 0.4283, "step": 6202 }, { "epoch": 0.5566848399183325, "grad_norm": 0.4823354602575543, "learning_rate": 9.778640232220687e-06, "loss": 0.4569, "step": 6203 }, { "epoch": 0.5567745843710036, "grad_norm": 0.5443147221063456, "learning_rate": 9.778486561560963e-06, "loss": 0.5209, "step": 6204 }, { "epoch": 0.5568643288236745, "grad_norm": 0.4788317230018987, "learning_rate": 9.778332838788029e-06, "loss": 0.4326, "step": 6205 }, { "epoch": 0.5569540732763456, "grad_norm": 0.4410980638281006, "learning_rate": 9.778179063903562e-06, "loss": 0.3919, "step": 6206 }, { "epoch": 0.5570438177290167, "grad_norm": 0.4471974861479852, "learning_rate": 9.778025236909239e-06, "loss": 0.427, "step": 6207 }, { "epoch": 0.5571335621816876, "grad_norm": 0.5786619675263956, "learning_rate": 9.777871357806736e-06, "loss": 0.5816, "step": 6208 }, { "epoch": 0.5572233066343587, "grad_norm": 0.49170942576267607, "learning_rate": 9.777717426597733e-06, "loss": 0.4365, "step": 6209 }, { "epoch": 0.5573130510870297, "grad_norm": 0.4594236661805439, "learning_rate": 9.777563443283907e-06, "loss": 0.4613, "step": 6210 }, { "epoch": 0.5574027955397007, "grad_norm": 0.5114248364237997, "learning_rate": 9.777409407866939e-06, "loss": 0.4544, "step": 6211 }, { "epoch": 0.5574925399923717, "grad_norm": 0.5411879902143624, "learning_rate": 9.777255320348507e-06, "loss": 0.5213, "step": 6212 }, { "epoch": 0.5575822844450428, "grad_norm": 0.4823243648109155, "learning_rate": 9.777101180730293e-06, "loss": 0.4409, "step": 6213 }, { "epoch": 0.5576720288977137, "grad_norm": 0.569443228250234, "learning_rate": 9.776946989013977e-06, "loss": 0.5311, "step": 6214 }, { "epoch": 0.5577617733503848, "grad_norm": 0.4676564500338231, "learning_rate": 9.776792745201241e-06, "loss": 0.4012, "step": 6215 }, { "epoch": 0.5578515178030558, "grad_norm": 0.48093614844191385, "learning_rate": 9.776638449293768e-06, "loss": 0.4577, "step": 6216 }, { "epoch": 0.5579412622557268, "grad_norm": 0.4895892873138823, "learning_rate": 9.77648410129324e-06, "loss": 0.4606, "step": 6217 }, { "epoch": 0.5580310067083979, "grad_norm": 0.4702020528288323, "learning_rate": 9.776329701201339e-06, "loss": 0.4707, "step": 6218 }, { "epoch": 0.5581207511610689, "grad_norm": 0.4935351171800278, "learning_rate": 9.776175249019753e-06, "loss": 0.4584, "step": 6219 }, { "epoch": 0.5582104956137399, "grad_norm": 0.5107377471945223, "learning_rate": 9.77602074475016e-06, "loss": 0.481, "step": 6220 }, { "epoch": 0.5583002400664109, "grad_norm": 0.4994765930435762, "learning_rate": 9.77586618839425e-06, "loss": 0.4881, "step": 6221 }, { "epoch": 0.558389984519082, "grad_norm": 0.5139578857489407, "learning_rate": 9.775711579953708e-06, "loss": 0.5207, "step": 6222 }, { "epoch": 0.5584797289717529, "grad_norm": 0.5205793928316191, "learning_rate": 9.775556919430218e-06, "loss": 0.5105, "step": 6223 }, { "epoch": 0.558569473424424, "grad_norm": 0.4668988807443456, "learning_rate": 9.775402206825468e-06, "loss": 0.4397, "step": 6224 }, { "epoch": 0.5586592178770949, "grad_norm": 0.49789160422199125, "learning_rate": 9.775247442141145e-06, "loss": 0.4483, "step": 6225 }, { "epoch": 0.558748962329766, "grad_norm": 0.5163204554599788, "learning_rate": 9.775092625378938e-06, "loss": 0.5188, "step": 6226 }, { "epoch": 0.558838706782437, "grad_norm": 0.48278479691041015, "learning_rate": 9.774937756540534e-06, "loss": 0.4825, "step": 6227 }, { "epoch": 0.558928451235108, "grad_norm": 0.4853283034196136, "learning_rate": 9.774782835627622e-06, "loss": 0.442, "step": 6228 }, { "epoch": 0.559018195687779, "grad_norm": 0.4860964607996269, "learning_rate": 9.774627862641893e-06, "loss": 0.5084, "step": 6229 }, { "epoch": 0.5591079401404501, "grad_norm": 0.4999094232401947, "learning_rate": 9.774472837585034e-06, "loss": 0.4928, "step": 6230 }, { "epoch": 0.5591976845931211, "grad_norm": 0.4557759476115205, "learning_rate": 9.77431776045874e-06, "loss": 0.4048, "step": 6231 }, { "epoch": 0.5592874290457921, "grad_norm": 0.5038049229859676, "learning_rate": 9.774162631264699e-06, "loss": 0.4911, "step": 6232 }, { "epoch": 0.5593771734984632, "grad_norm": 0.5015885821443189, "learning_rate": 9.774007450004604e-06, "loss": 0.4773, "step": 6233 }, { "epoch": 0.5594669179511341, "grad_norm": 0.48770915559753875, "learning_rate": 9.773852216680147e-06, "loss": 0.4505, "step": 6234 }, { "epoch": 0.5595566624038052, "grad_norm": 0.5000837983406473, "learning_rate": 9.77369693129302e-06, "loss": 0.4997, "step": 6235 }, { "epoch": 0.5596464068564762, "grad_norm": 0.44946916509428225, "learning_rate": 9.773541593844917e-06, "loss": 0.4056, "step": 6236 }, { "epoch": 0.5597361513091472, "grad_norm": 0.5056899357346668, "learning_rate": 9.773386204337536e-06, "loss": 0.4531, "step": 6237 }, { "epoch": 0.5598258957618182, "grad_norm": 0.47862901103633204, "learning_rate": 9.773230762772565e-06, "loss": 0.4602, "step": 6238 }, { "epoch": 0.5599156402144893, "grad_norm": 0.48550920966161054, "learning_rate": 9.773075269151703e-06, "loss": 0.4174, "step": 6239 }, { "epoch": 0.5600053846671602, "grad_norm": 0.5162762059837941, "learning_rate": 9.772919723476645e-06, "loss": 0.5271, "step": 6240 }, { "epoch": 0.5600951291198313, "grad_norm": 0.5032711327003528, "learning_rate": 9.77276412574909e-06, "loss": 0.5106, "step": 6241 }, { "epoch": 0.5601848735725024, "grad_norm": 0.49269481277656935, "learning_rate": 9.772608475970731e-06, "loss": 0.4612, "step": 6242 }, { "epoch": 0.5602746180251733, "grad_norm": 0.522606541635828, "learning_rate": 9.772452774143267e-06, "loss": 0.4923, "step": 6243 }, { "epoch": 0.5603643624778444, "grad_norm": 0.5658703902792486, "learning_rate": 9.772297020268397e-06, "loss": 0.5851, "step": 6244 }, { "epoch": 0.5604541069305153, "grad_norm": 0.455993909985214, "learning_rate": 9.772141214347818e-06, "loss": 0.4022, "step": 6245 }, { "epoch": 0.5605438513831864, "grad_norm": 0.490246765265707, "learning_rate": 9.771985356383229e-06, "loss": 0.4512, "step": 6246 }, { "epoch": 0.5606335958358574, "grad_norm": 0.5049546830390995, "learning_rate": 9.771829446376334e-06, "loss": 0.5088, "step": 6247 }, { "epoch": 0.5607233402885284, "grad_norm": 0.5266468988087297, "learning_rate": 9.771673484328826e-06, "loss": 0.5417, "step": 6248 }, { "epoch": 0.5608130847411994, "grad_norm": 0.4950396863501901, "learning_rate": 9.77151747024241e-06, "loss": 0.4932, "step": 6249 }, { "epoch": 0.5609028291938705, "grad_norm": 0.49966445332963677, "learning_rate": 9.77136140411879e-06, "loss": 0.5305, "step": 6250 }, { "epoch": 0.5609925736465414, "grad_norm": 0.48976566865760834, "learning_rate": 9.771205285959664e-06, "loss": 0.5012, "step": 6251 }, { "epoch": 0.5610823180992125, "grad_norm": 0.46009213617869443, "learning_rate": 9.771049115766735e-06, "loss": 0.4168, "step": 6252 }, { "epoch": 0.5611720625518836, "grad_norm": 0.49535557671790353, "learning_rate": 9.770892893541708e-06, "loss": 0.4622, "step": 6253 }, { "epoch": 0.5612618070045545, "grad_norm": 0.5278307488413135, "learning_rate": 9.770736619286287e-06, "loss": 0.4419, "step": 6254 }, { "epoch": 0.5613515514572256, "grad_norm": 0.5380648958738299, "learning_rate": 9.770580293002174e-06, "loss": 0.5097, "step": 6255 }, { "epoch": 0.5614412959098966, "grad_norm": 0.45978182778312837, "learning_rate": 9.770423914691075e-06, "loss": 0.4373, "step": 6256 }, { "epoch": 0.5615310403625676, "grad_norm": 0.48786378035979033, "learning_rate": 9.770267484354696e-06, "loss": 0.4755, "step": 6257 }, { "epoch": 0.5616207848152386, "grad_norm": 0.4894225009400599, "learning_rate": 9.770111001994742e-06, "loss": 0.4684, "step": 6258 }, { "epoch": 0.5617105292679097, "grad_norm": 0.48113985000777326, "learning_rate": 9.76995446761292e-06, "loss": 0.4398, "step": 6259 }, { "epoch": 0.5618002737205806, "grad_norm": 0.4891233130565954, "learning_rate": 9.769797881210936e-06, "loss": 0.4568, "step": 6260 }, { "epoch": 0.5618900181732517, "grad_norm": 0.49049993901469535, "learning_rate": 9.7696412427905e-06, "loss": 0.4846, "step": 6261 }, { "epoch": 0.5619797626259226, "grad_norm": 0.5017816988119093, "learning_rate": 9.769484552353319e-06, "loss": 0.4628, "step": 6262 }, { "epoch": 0.5620695070785937, "grad_norm": 0.5019797800128601, "learning_rate": 9.769327809901102e-06, "loss": 0.4928, "step": 6263 }, { "epoch": 0.5621592515312647, "grad_norm": 0.4828125921791414, "learning_rate": 9.769171015435558e-06, "loss": 0.428, "step": 6264 }, { "epoch": 0.5622489959839357, "grad_norm": 0.4517290177938657, "learning_rate": 9.769014168958398e-06, "loss": 0.4171, "step": 6265 }, { "epoch": 0.5623387404366068, "grad_norm": 0.4624900502929021, "learning_rate": 9.76885727047133e-06, "loss": 0.4414, "step": 6266 }, { "epoch": 0.5624284848892778, "grad_norm": 0.48621942442314076, "learning_rate": 9.76870031997607e-06, "loss": 0.4394, "step": 6267 }, { "epoch": 0.5625182293419488, "grad_norm": 0.5001196605788143, "learning_rate": 9.768543317474324e-06, "loss": 0.4628, "step": 6268 }, { "epoch": 0.5626079737946198, "grad_norm": 0.5308943403454058, "learning_rate": 9.768386262967807e-06, "loss": 0.5069, "step": 6269 }, { "epoch": 0.5626977182472909, "grad_norm": 0.4906926656263715, "learning_rate": 9.768229156458232e-06, "loss": 0.4731, "step": 6270 }, { "epoch": 0.5627874626999618, "grad_norm": 0.4930788163871203, "learning_rate": 9.768071997947313e-06, "loss": 0.4853, "step": 6271 }, { "epoch": 0.5628772071526329, "grad_norm": 0.5295193363518231, "learning_rate": 9.767914787436761e-06, "loss": 0.4997, "step": 6272 }, { "epoch": 0.5629669516053039, "grad_norm": 0.5100023927771996, "learning_rate": 9.767757524928293e-06, "loss": 0.4761, "step": 6273 }, { "epoch": 0.5630566960579749, "grad_norm": 0.4870900583472893, "learning_rate": 9.767600210423626e-06, "loss": 0.3933, "step": 6274 }, { "epoch": 0.5631464405106459, "grad_norm": 0.45491570961116495, "learning_rate": 9.76744284392447e-06, "loss": 0.4168, "step": 6275 }, { "epoch": 0.563236184963317, "grad_norm": 0.5591585086248958, "learning_rate": 9.767285425432546e-06, "loss": 0.5676, "step": 6276 }, { "epoch": 0.563325929415988, "grad_norm": 0.49392121447208986, "learning_rate": 9.767127954949568e-06, "loss": 0.4443, "step": 6277 }, { "epoch": 0.563415673868659, "grad_norm": 0.4657216904321779, "learning_rate": 9.766970432477257e-06, "loss": 0.458, "step": 6278 }, { "epoch": 0.56350541832133, "grad_norm": 0.45374705897277984, "learning_rate": 9.766812858017327e-06, "loss": 0.4294, "step": 6279 }, { "epoch": 0.563595162774001, "grad_norm": 0.47567868101276156, "learning_rate": 9.766655231571497e-06, "loss": 0.4804, "step": 6280 }, { "epoch": 0.5636849072266721, "grad_norm": 0.5082633413157527, "learning_rate": 9.766497553141487e-06, "loss": 0.5228, "step": 6281 }, { "epoch": 0.563774651679343, "grad_norm": 0.46522266481618735, "learning_rate": 9.766339822729019e-06, "loss": 0.4236, "step": 6282 }, { "epoch": 0.5638643961320141, "grad_norm": 0.5115407003177941, "learning_rate": 9.766182040335808e-06, "loss": 0.4603, "step": 6283 }, { "epoch": 0.5639541405846851, "grad_norm": 0.4818534627330544, "learning_rate": 9.766024205963578e-06, "loss": 0.501, "step": 6284 }, { "epoch": 0.5640438850373561, "grad_norm": 0.4806498855065624, "learning_rate": 9.76586631961405e-06, "loss": 0.4318, "step": 6285 }, { "epoch": 0.5641336294900271, "grad_norm": 0.4988161077162344, "learning_rate": 9.765708381288946e-06, "loss": 0.4925, "step": 6286 }, { "epoch": 0.5642233739426982, "grad_norm": 0.4991839577076894, "learning_rate": 9.765550390989987e-06, "loss": 0.4653, "step": 6287 }, { "epoch": 0.5643131183953692, "grad_norm": 0.5297429950406869, "learning_rate": 9.765392348718898e-06, "loss": 0.533, "step": 6288 }, { "epoch": 0.5644028628480402, "grad_norm": 0.47571955547544204, "learning_rate": 9.765234254477402e-06, "loss": 0.4687, "step": 6289 }, { "epoch": 0.5644926073007113, "grad_norm": 0.4584575448714674, "learning_rate": 9.765076108267221e-06, "loss": 0.4186, "step": 6290 }, { "epoch": 0.5645823517533822, "grad_norm": 0.49142985466261074, "learning_rate": 9.764917910090083e-06, "loss": 0.4835, "step": 6291 }, { "epoch": 0.5646720962060533, "grad_norm": 0.4780358365258755, "learning_rate": 9.764759659947713e-06, "loss": 0.4535, "step": 6292 }, { "epoch": 0.5647618406587243, "grad_norm": 0.484461410111311, "learning_rate": 9.764601357841834e-06, "loss": 0.4614, "step": 6293 }, { "epoch": 0.5648515851113953, "grad_norm": 0.5159633968185329, "learning_rate": 9.764443003774174e-06, "loss": 0.4905, "step": 6294 }, { "epoch": 0.5649413295640663, "grad_norm": 0.512650119012176, "learning_rate": 9.76428459774646e-06, "loss": 0.5497, "step": 6295 }, { "epoch": 0.5650310740167374, "grad_norm": 0.45960276179626836, "learning_rate": 9.764126139760419e-06, "loss": 0.4113, "step": 6296 }, { "epoch": 0.5651208184694083, "grad_norm": 0.44477066904709667, "learning_rate": 9.763967629817782e-06, "loss": 0.4205, "step": 6297 }, { "epoch": 0.5652105629220794, "grad_norm": 0.46063469076559904, "learning_rate": 9.763809067920273e-06, "loss": 0.4675, "step": 6298 }, { "epoch": 0.5653003073747503, "grad_norm": 0.4392409078234867, "learning_rate": 9.763650454069626e-06, "loss": 0.3902, "step": 6299 }, { "epoch": 0.5653900518274214, "grad_norm": 0.5298486258064165, "learning_rate": 9.763491788267566e-06, "loss": 0.5075, "step": 6300 }, { "epoch": 0.5654797962800925, "grad_norm": 0.46128402858259515, "learning_rate": 9.763333070515827e-06, "loss": 0.4335, "step": 6301 }, { "epoch": 0.5655695407327634, "grad_norm": 0.503520808734989, "learning_rate": 9.763174300816139e-06, "loss": 0.4684, "step": 6302 }, { "epoch": 0.5656592851854345, "grad_norm": 0.5194702475848859, "learning_rate": 9.763015479170232e-06, "loss": 0.5001, "step": 6303 }, { "epoch": 0.5657490296381055, "grad_norm": 0.4958911671896788, "learning_rate": 9.76285660557984e-06, "loss": 0.4813, "step": 6304 }, { "epoch": 0.5658387740907765, "grad_norm": 0.4573492534547525, "learning_rate": 9.762697680046696e-06, "loss": 0.3912, "step": 6305 }, { "epoch": 0.5659285185434475, "grad_norm": 0.5306090106873906, "learning_rate": 9.762538702572531e-06, "loss": 0.4687, "step": 6306 }, { "epoch": 0.5660182629961186, "grad_norm": 0.5094029408320138, "learning_rate": 9.76237967315908e-06, "loss": 0.5079, "step": 6307 }, { "epoch": 0.5661080074487895, "grad_norm": 0.4974858121979648, "learning_rate": 9.762220591808076e-06, "loss": 0.4904, "step": 6308 }, { "epoch": 0.5661977519014606, "grad_norm": 0.4966828409933401, "learning_rate": 9.762061458521256e-06, "loss": 0.466, "step": 6309 }, { "epoch": 0.5662874963541316, "grad_norm": 0.4857604868656332, "learning_rate": 9.761902273300357e-06, "loss": 0.4663, "step": 6310 }, { "epoch": 0.5663772408068026, "grad_norm": 0.48798611868595276, "learning_rate": 9.761743036147112e-06, "loss": 0.4808, "step": 6311 }, { "epoch": 0.5664669852594737, "grad_norm": 0.4661498351427801, "learning_rate": 9.761583747063258e-06, "loss": 0.4299, "step": 6312 }, { "epoch": 0.5665567297121447, "grad_norm": 0.5145840566985125, "learning_rate": 9.76142440605053e-06, "loss": 0.4694, "step": 6313 }, { "epoch": 0.5666464741648157, "grad_norm": 0.5206157050135969, "learning_rate": 9.761265013110671e-06, "loss": 0.435, "step": 6314 }, { "epoch": 0.5667362186174867, "grad_norm": 0.5160449166430991, "learning_rate": 9.761105568245415e-06, "loss": 0.4518, "step": 6315 }, { "epoch": 0.5668259630701578, "grad_norm": 0.4727671062446169, "learning_rate": 9.760946071456503e-06, "loss": 0.471, "step": 6316 }, { "epoch": 0.5669157075228287, "grad_norm": 0.5024029398503618, "learning_rate": 9.760786522745673e-06, "loss": 0.5038, "step": 6317 }, { "epoch": 0.5670054519754998, "grad_norm": 0.4674318439782237, "learning_rate": 9.760626922114668e-06, "loss": 0.4233, "step": 6318 }, { "epoch": 0.5670951964281707, "grad_norm": 0.44550916391069323, "learning_rate": 9.760467269565224e-06, "loss": 0.3889, "step": 6319 }, { "epoch": 0.5671849408808418, "grad_norm": 0.47234341365917426, "learning_rate": 9.760307565099087e-06, "loss": 0.4381, "step": 6320 }, { "epoch": 0.5672746853335128, "grad_norm": 0.4927049754578366, "learning_rate": 9.760147808717995e-06, "loss": 0.4625, "step": 6321 }, { "epoch": 0.5673644297861838, "grad_norm": 0.5281091140992927, "learning_rate": 9.759988000423693e-06, "loss": 0.493, "step": 6322 }, { "epoch": 0.5674541742388549, "grad_norm": 0.5043963380913191, "learning_rate": 9.75982814021792e-06, "loss": 0.4842, "step": 6323 }, { "epoch": 0.5675439186915259, "grad_norm": 0.48272037615155744, "learning_rate": 9.759668228102424e-06, "loss": 0.4431, "step": 6324 }, { "epoch": 0.5676336631441969, "grad_norm": 0.5150522476241146, "learning_rate": 9.759508264078946e-06, "loss": 0.4732, "step": 6325 }, { "epoch": 0.5677234075968679, "grad_norm": 0.5090969389828572, "learning_rate": 9.75934824814923e-06, "loss": 0.4876, "step": 6326 }, { "epoch": 0.567813152049539, "grad_norm": 0.46279756117240345, "learning_rate": 9.759188180315022e-06, "loss": 0.4519, "step": 6327 }, { "epoch": 0.5679028965022099, "grad_norm": 0.5081083401951554, "learning_rate": 9.759028060578071e-06, "loss": 0.5501, "step": 6328 }, { "epoch": 0.567992640954881, "grad_norm": 0.5718097275657077, "learning_rate": 9.758867888940118e-06, "loss": 0.5213, "step": 6329 }, { "epoch": 0.568082385407552, "grad_norm": 0.5547011817774413, "learning_rate": 9.758707665402913e-06, "loss": 0.4861, "step": 6330 }, { "epoch": 0.568172129860223, "grad_norm": 0.5063085347784101, "learning_rate": 9.758547389968202e-06, "loss": 0.4602, "step": 6331 }, { "epoch": 0.568261874312894, "grad_norm": 0.47382347375066725, "learning_rate": 9.758387062637735e-06, "loss": 0.43, "step": 6332 }, { "epoch": 0.5683516187655651, "grad_norm": 0.4970849351141016, "learning_rate": 9.758226683413257e-06, "loss": 0.4698, "step": 6333 }, { "epoch": 0.568441363218236, "grad_norm": 0.5078656466457425, "learning_rate": 9.758066252296518e-06, "loss": 0.4468, "step": 6334 }, { "epoch": 0.5685311076709071, "grad_norm": 0.46956195392385186, "learning_rate": 9.75790576928927e-06, "loss": 0.4073, "step": 6335 }, { "epoch": 0.5686208521235782, "grad_norm": 0.4506335805580036, "learning_rate": 9.757745234393264e-06, "loss": 0.4298, "step": 6336 }, { "epoch": 0.5687105965762491, "grad_norm": 0.47698249216469346, "learning_rate": 9.757584647610245e-06, "loss": 0.4465, "step": 6337 }, { "epoch": 0.5688003410289202, "grad_norm": 0.5521436259808019, "learning_rate": 9.75742400894197e-06, "loss": 0.5102, "step": 6338 }, { "epoch": 0.5688900854815911, "grad_norm": 0.5162255162736257, "learning_rate": 9.757263318390189e-06, "loss": 0.5081, "step": 6339 }, { "epoch": 0.5689798299342622, "grad_norm": 0.48655786971456333, "learning_rate": 9.757102575956653e-06, "loss": 0.4772, "step": 6340 }, { "epoch": 0.5690695743869332, "grad_norm": 0.4862815873525279, "learning_rate": 9.756941781643117e-06, "loss": 0.42, "step": 6341 }, { "epoch": 0.5691593188396042, "grad_norm": 0.47129415837135485, "learning_rate": 9.756780935451333e-06, "loss": 0.4285, "step": 6342 }, { "epoch": 0.5692490632922752, "grad_norm": 0.5223407217484725, "learning_rate": 9.756620037383056e-06, "loss": 0.4907, "step": 6343 }, { "epoch": 0.5693388077449463, "grad_norm": 0.4670180820171929, "learning_rate": 9.756459087440043e-06, "loss": 0.4536, "step": 6344 }, { "epoch": 0.5694285521976172, "grad_norm": 0.48281282301321715, "learning_rate": 9.756298085624045e-06, "loss": 0.4467, "step": 6345 }, { "epoch": 0.5695182966502883, "grad_norm": 0.5424278457057228, "learning_rate": 9.756137031936819e-06, "loss": 0.5515, "step": 6346 }, { "epoch": 0.5696080411029594, "grad_norm": 0.4432436229364708, "learning_rate": 9.755975926380123e-06, "loss": 0.3843, "step": 6347 }, { "epoch": 0.5696977855556303, "grad_norm": 0.5111621254511931, "learning_rate": 9.755814768955714e-06, "loss": 0.4268, "step": 6348 }, { "epoch": 0.5697875300083014, "grad_norm": 0.4689581577313941, "learning_rate": 9.75565355966535e-06, "loss": 0.4472, "step": 6349 }, { "epoch": 0.5698772744609724, "grad_norm": 0.5096816173585885, "learning_rate": 9.755492298510785e-06, "loss": 0.4879, "step": 6350 }, { "epoch": 0.5699670189136434, "grad_norm": 0.5056771284700943, "learning_rate": 9.755330985493783e-06, "loss": 0.4912, "step": 6351 }, { "epoch": 0.5700567633663144, "grad_norm": 0.514653028653733, "learning_rate": 9.755169620616098e-06, "loss": 0.5071, "step": 6352 }, { "epoch": 0.5701465078189855, "grad_norm": 0.44631792133444853, "learning_rate": 9.755008203879496e-06, "loss": 0.4236, "step": 6353 }, { "epoch": 0.5702362522716564, "grad_norm": 0.5261281890207223, "learning_rate": 9.754846735285733e-06, "loss": 0.4582, "step": 6354 }, { "epoch": 0.5703259967243275, "grad_norm": 0.4558567635620752, "learning_rate": 9.75468521483657e-06, "loss": 0.4449, "step": 6355 }, { "epoch": 0.5704157411769984, "grad_norm": 0.4450490826374872, "learning_rate": 9.75452364253377e-06, "loss": 0.3945, "step": 6356 }, { "epoch": 0.5705054856296695, "grad_norm": 0.4806091682202047, "learning_rate": 9.754362018379096e-06, "loss": 0.4578, "step": 6357 }, { "epoch": 0.5705952300823406, "grad_norm": 0.4893851810787305, "learning_rate": 9.754200342374308e-06, "loss": 0.4853, "step": 6358 }, { "epoch": 0.5706849745350115, "grad_norm": 0.48428520095737, "learning_rate": 9.754038614521169e-06, "loss": 0.4692, "step": 6359 }, { "epoch": 0.5707747189876826, "grad_norm": 0.47587349467768997, "learning_rate": 9.753876834821445e-06, "loss": 0.5081, "step": 6360 }, { "epoch": 0.5708644634403536, "grad_norm": 0.48825591448016853, "learning_rate": 9.7537150032769e-06, "loss": 0.4399, "step": 6361 }, { "epoch": 0.5709542078930246, "grad_norm": 0.5301533816469584, "learning_rate": 9.753553119889298e-06, "loss": 0.4603, "step": 6362 }, { "epoch": 0.5710439523456956, "grad_norm": 0.4808844672654908, "learning_rate": 9.753391184660405e-06, "loss": 0.4483, "step": 6363 }, { "epoch": 0.5711336967983667, "grad_norm": 0.5231010275491159, "learning_rate": 9.75322919759199e-06, "loss": 0.4759, "step": 6364 }, { "epoch": 0.5712234412510376, "grad_norm": 0.5348804750526669, "learning_rate": 9.753067158685813e-06, "loss": 0.5131, "step": 6365 }, { "epoch": 0.5713131857037087, "grad_norm": 0.4631422370609026, "learning_rate": 9.752905067943645e-06, "loss": 0.4218, "step": 6366 }, { "epoch": 0.5714029301563797, "grad_norm": 0.47344276375284305, "learning_rate": 9.752742925367253e-06, "loss": 0.4519, "step": 6367 }, { "epoch": 0.5714926746090507, "grad_norm": 0.5164312385434783, "learning_rate": 9.752580730958407e-06, "loss": 0.4129, "step": 6368 }, { "epoch": 0.5715824190617218, "grad_norm": 0.47821076849450395, "learning_rate": 9.752418484718876e-06, "loss": 0.4308, "step": 6369 }, { "epoch": 0.5716721635143928, "grad_norm": 0.5329803619711897, "learning_rate": 9.752256186650426e-06, "loss": 0.4955, "step": 6370 }, { "epoch": 0.5717619079670638, "grad_norm": 0.5614326085220142, "learning_rate": 9.75209383675483e-06, "loss": 0.4842, "step": 6371 }, { "epoch": 0.5718516524197348, "grad_norm": 0.47755619190167437, "learning_rate": 9.751931435033855e-06, "loss": 0.3977, "step": 6372 }, { "epoch": 0.5719413968724059, "grad_norm": 0.5468771280757161, "learning_rate": 9.751768981489278e-06, "loss": 0.5226, "step": 6373 }, { "epoch": 0.5720311413250768, "grad_norm": 0.49814715734958553, "learning_rate": 9.751606476122866e-06, "loss": 0.4587, "step": 6374 }, { "epoch": 0.5721208857777479, "grad_norm": 0.5065140963737641, "learning_rate": 9.751443918936392e-06, "loss": 0.4921, "step": 6375 }, { "epoch": 0.5722106302304188, "grad_norm": 0.508246949181023, "learning_rate": 9.75128130993163e-06, "loss": 0.4919, "step": 6376 }, { "epoch": 0.5723003746830899, "grad_norm": 0.4928368431487547, "learning_rate": 9.751118649110353e-06, "loss": 0.5123, "step": 6377 }, { "epoch": 0.5723901191357609, "grad_norm": 0.45543729994659754, "learning_rate": 9.750955936474333e-06, "loss": 0.4046, "step": 6378 }, { "epoch": 0.572479863588432, "grad_norm": 0.5236498597658712, "learning_rate": 9.750793172025348e-06, "loss": 0.513, "step": 6379 }, { "epoch": 0.5725696080411029, "grad_norm": 0.5402736093976334, "learning_rate": 9.750630355765171e-06, "loss": 0.5372, "step": 6380 }, { "epoch": 0.572659352493774, "grad_norm": 0.48373889602838993, "learning_rate": 9.750467487695578e-06, "loss": 0.4194, "step": 6381 }, { "epoch": 0.572749096946445, "grad_norm": 0.49837147319514435, "learning_rate": 9.750304567818346e-06, "loss": 0.4548, "step": 6382 }, { "epoch": 0.572838841399116, "grad_norm": 0.47251291815932495, "learning_rate": 9.75014159613525e-06, "loss": 0.4502, "step": 6383 }, { "epoch": 0.5729285858517871, "grad_norm": 0.4904485777862683, "learning_rate": 9.749978572648069e-06, "loss": 0.4591, "step": 6384 }, { "epoch": 0.573018330304458, "grad_norm": 0.5126685047934103, "learning_rate": 9.749815497358579e-06, "loss": 0.5263, "step": 6385 }, { "epoch": 0.5731080747571291, "grad_norm": 0.49264243322327694, "learning_rate": 9.74965237026856e-06, "loss": 0.4595, "step": 6386 }, { "epoch": 0.5731978192098001, "grad_norm": 0.516406790269193, "learning_rate": 9.749489191379792e-06, "loss": 0.528, "step": 6387 }, { "epoch": 0.5732875636624711, "grad_norm": 0.5405601081112489, "learning_rate": 9.749325960694052e-06, "loss": 0.5297, "step": 6388 }, { "epoch": 0.5733773081151421, "grad_norm": 0.4454747481958265, "learning_rate": 9.749162678213122e-06, "loss": 0.3754, "step": 6389 }, { "epoch": 0.5734670525678132, "grad_norm": 0.5284316679082802, "learning_rate": 9.748999343938782e-06, "loss": 0.4787, "step": 6390 }, { "epoch": 0.5735567970204841, "grad_norm": 0.49221940825556404, "learning_rate": 9.748835957872814e-06, "loss": 0.458, "step": 6391 }, { "epoch": 0.5736465414731552, "grad_norm": 0.4797129333326174, "learning_rate": 9.748672520016998e-06, "loss": 0.4682, "step": 6392 }, { "epoch": 0.5737362859258263, "grad_norm": 0.45172292001103864, "learning_rate": 9.748509030373119e-06, "loss": 0.416, "step": 6393 }, { "epoch": 0.5738260303784972, "grad_norm": 0.5153995039505715, "learning_rate": 9.748345488942958e-06, "loss": 0.5167, "step": 6394 }, { "epoch": 0.5739157748311683, "grad_norm": 0.5180730617382662, "learning_rate": 9.7481818957283e-06, "loss": 0.5045, "step": 6395 }, { "epoch": 0.5740055192838392, "grad_norm": 0.4938664716332367, "learning_rate": 9.748018250730927e-06, "loss": 0.5117, "step": 6396 }, { "epoch": 0.5740952637365103, "grad_norm": 0.46816322422168855, "learning_rate": 9.747854553952627e-06, "loss": 0.4203, "step": 6397 }, { "epoch": 0.5741850081891813, "grad_norm": 0.48653834250390776, "learning_rate": 9.747690805395182e-06, "loss": 0.4923, "step": 6398 }, { "epoch": 0.5742747526418523, "grad_norm": 0.47527570391122553, "learning_rate": 9.747527005060378e-06, "loss": 0.4238, "step": 6399 }, { "epoch": 0.5743644970945233, "grad_norm": 0.4318224998283025, "learning_rate": 9.747363152950004e-06, "loss": 0.4087, "step": 6400 }, { "epoch": 0.5744542415471944, "grad_norm": 0.5210514614872827, "learning_rate": 9.747199249065846e-06, "loss": 0.5409, "step": 6401 }, { "epoch": 0.5745439859998653, "grad_norm": 0.47716512763369756, "learning_rate": 9.74703529340969e-06, "loss": 0.4206, "step": 6402 }, { "epoch": 0.5746337304525364, "grad_norm": 0.46958697480482114, "learning_rate": 9.746871285983326e-06, "loss": 0.4303, "step": 6403 }, { "epoch": 0.5747234749052075, "grad_norm": 0.4886238217687839, "learning_rate": 9.74670722678854e-06, "loss": 0.4551, "step": 6404 }, { "epoch": 0.5748132193578784, "grad_norm": 0.48869694005983083, "learning_rate": 9.746543115827126e-06, "loss": 0.4904, "step": 6405 }, { "epoch": 0.5749029638105495, "grad_norm": 0.44853804293206195, "learning_rate": 9.746378953100866e-06, "loss": 0.4096, "step": 6406 }, { "epoch": 0.5749927082632205, "grad_norm": 0.47877972671779556, "learning_rate": 9.746214738611558e-06, "loss": 0.454, "step": 6407 }, { "epoch": 0.5750824527158915, "grad_norm": 0.43679531606313055, "learning_rate": 9.746050472360989e-06, "loss": 0.4046, "step": 6408 }, { "epoch": 0.5751721971685625, "grad_norm": 0.4590393271586867, "learning_rate": 9.745886154350953e-06, "loss": 0.4244, "step": 6409 }, { "epoch": 0.5752619416212336, "grad_norm": 0.5178304633653219, "learning_rate": 9.745721784583237e-06, "loss": 0.5591, "step": 6410 }, { "epoch": 0.5753516860739045, "grad_norm": 0.492054901703288, "learning_rate": 9.745557363059639e-06, "loss": 0.4328, "step": 6411 }, { "epoch": 0.5754414305265756, "grad_norm": 0.5449284441553016, "learning_rate": 9.745392889781948e-06, "loss": 0.5196, "step": 6412 }, { "epoch": 0.5755311749792466, "grad_norm": 0.5260912687249603, "learning_rate": 9.745228364751961e-06, "loss": 0.499, "step": 6413 }, { "epoch": 0.5756209194319176, "grad_norm": 0.47246020162718183, "learning_rate": 9.745063787971472e-06, "loss": 0.41, "step": 6414 }, { "epoch": 0.5757106638845886, "grad_norm": 0.4637787753000863, "learning_rate": 9.744899159442275e-06, "loss": 0.4434, "step": 6415 }, { "epoch": 0.5758004083372597, "grad_norm": 0.5313904281760999, "learning_rate": 9.744734479166165e-06, "loss": 0.5078, "step": 6416 }, { "epoch": 0.5758901527899307, "grad_norm": 0.4479508889578202, "learning_rate": 9.744569747144937e-06, "loss": 0.393, "step": 6417 }, { "epoch": 0.5759798972426017, "grad_norm": 0.4726717643271573, "learning_rate": 9.744404963380389e-06, "loss": 0.4545, "step": 6418 }, { "epoch": 0.5760696416952727, "grad_norm": 0.4864702494520532, "learning_rate": 9.744240127874318e-06, "loss": 0.4134, "step": 6419 }, { "epoch": 0.5761593861479437, "grad_norm": 0.4929898218637735, "learning_rate": 9.744075240628522e-06, "loss": 0.4878, "step": 6420 }, { "epoch": 0.5762491306006148, "grad_norm": 0.4902438401439751, "learning_rate": 9.7439103016448e-06, "loss": 0.4821, "step": 6421 }, { "epoch": 0.5763388750532857, "grad_norm": 0.4596126262785129, "learning_rate": 9.743745310924945e-06, "loss": 0.4048, "step": 6422 }, { "epoch": 0.5764286195059568, "grad_norm": 0.5809463213227621, "learning_rate": 9.743580268470765e-06, "loss": 0.5431, "step": 6423 }, { "epoch": 0.5765183639586278, "grad_norm": 0.47918608739629037, "learning_rate": 9.743415174284054e-06, "loss": 0.4614, "step": 6424 }, { "epoch": 0.5766081084112988, "grad_norm": 0.4839473207473874, "learning_rate": 9.743250028366615e-06, "loss": 0.4742, "step": 6425 }, { "epoch": 0.5766978528639698, "grad_norm": 0.4618108032259666, "learning_rate": 9.74308483072025e-06, "loss": 0.44, "step": 6426 }, { "epoch": 0.5767875973166409, "grad_norm": 0.4480672336670691, "learning_rate": 9.742919581346755e-06, "loss": 0.4035, "step": 6427 }, { "epoch": 0.5768773417693119, "grad_norm": 0.4654225329861673, "learning_rate": 9.742754280247939e-06, "loss": 0.4281, "step": 6428 }, { "epoch": 0.5769670862219829, "grad_norm": 0.521530658827286, "learning_rate": 9.742588927425601e-06, "loss": 0.5149, "step": 6429 }, { "epoch": 0.577056830674654, "grad_norm": 0.5225802956012519, "learning_rate": 9.742423522881546e-06, "loss": 0.4783, "step": 6430 }, { "epoch": 0.5771465751273249, "grad_norm": 0.4902659786720482, "learning_rate": 9.742258066617576e-06, "loss": 0.4819, "step": 6431 }, { "epoch": 0.577236319579996, "grad_norm": 0.5421538660841179, "learning_rate": 9.742092558635497e-06, "loss": 0.5122, "step": 6432 }, { "epoch": 0.577326064032667, "grad_norm": 0.5025477695012649, "learning_rate": 9.741926998937114e-06, "loss": 0.486, "step": 6433 }, { "epoch": 0.577415808485338, "grad_norm": 0.5380691963060419, "learning_rate": 9.741761387524232e-06, "loss": 0.5677, "step": 6434 }, { "epoch": 0.577505552938009, "grad_norm": 0.5017047001995348, "learning_rate": 9.741595724398656e-06, "loss": 0.4896, "step": 6435 }, { "epoch": 0.57759529739068, "grad_norm": 0.4732828751652398, "learning_rate": 9.741430009562194e-06, "loss": 0.503, "step": 6436 }, { "epoch": 0.577685041843351, "grad_norm": 0.5532039619665617, "learning_rate": 9.741264243016654e-06, "loss": 0.5251, "step": 6437 }, { "epoch": 0.5777747862960221, "grad_norm": 0.507089712808099, "learning_rate": 9.741098424763842e-06, "loss": 0.4831, "step": 6438 }, { "epoch": 0.5778645307486932, "grad_norm": 0.500506965660172, "learning_rate": 9.740932554805568e-06, "loss": 0.456, "step": 6439 }, { "epoch": 0.5779542752013641, "grad_norm": 0.44983396745326903, "learning_rate": 9.74076663314364e-06, "loss": 0.3987, "step": 6440 }, { "epoch": 0.5780440196540352, "grad_norm": 0.49270951160784765, "learning_rate": 9.740600659779868e-06, "loss": 0.4716, "step": 6441 }, { "epoch": 0.5781337641067061, "grad_norm": 0.44269799264183024, "learning_rate": 9.740434634716062e-06, "loss": 0.4031, "step": 6442 }, { "epoch": 0.5782235085593772, "grad_norm": 0.4932086403878248, "learning_rate": 9.740268557954031e-06, "loss": 0.4464, "step": 6443 }, { "epoch": 0.5783132530120482, "grad_norm": 0.46507836039867445, "learning_rate": 9.74010242949559e-06, "loss": 0.4739, "step": 6444 }, { "epoch": 0.5784029974647192, "grad_norm": 0.533700666559018, "learning_rate": 9.739936249342547e-06, "loss": 0.5395, "step": 6445 }, { "epoch": 0.5784927419173902, "grad_norm": 0.4590790511135351, "learning_rate": 9.739770017496715e-06, "loss": 0.4504, "step": 6446 }, { "epoch": 0.5785824863700613, "grad_norm": 0.4706517300863761, "learning_rate": 9.739603733959907e-06, "loss": 0.462, "step": 6447 }, { "epoch": 0.5786722308227322, "grad_norm": 0.477697668594038, "learning_rate": 9.73943739873394e-06, "loss": 0.4824, "step": 6448 }, { "epoch": 0.5787619752754033, "grad_norm": 0.4426548163188003, "learning_rate": 9.739271011820623e-06, "loss": 0.4006, "step": 6449 }, { "epoch": 0.5788517197280743, "grad_norm": 0.48532964315294447, "learning_rate": 9.739104573221774e-06, "loss": 0.4683, "step": 6450 }, { "epoch": 0.5789414641807453, "grad_norm": 0.44371272552878777, "learning_rate": 9.738938082939206e-06, "loss": 0.4208, "step": 6451 }, { "epoch": 0.5790312086334164, "grad_norm": 0.5156944530066839, "learning_rate": 9.738771540974737e-06, "loss": 0.4832, "step": 6452 }, { "epoch": 0.5791209530860874, "grad_norm": 0.5429901566063692, "learning_rate": 9.738604947330179e-06, "loss": 0.5141, "step": 6453 }, { "epoch": 0.5792106975387584, "grad_norm": 0.47812748836666064, "learning_rate": 9.738438302007353e-06, "loss": 0.4494, "step": 6454 }, { "epoch": 0.5793004419914294, "grad_norm": 0.48031907268406443, "learning_rate": 9.738271605008075e-06, "loss": 0.5085, "step": 6455 }, { "epoch": 0.5793901864441005, "grad_norm": 0.4749877237809809, "learning_rate": 9.738104856334165e-06, "loss": 0.4689, "step": 6456 }, { "epoch": 0.5794799308967714, "grad_norm": 0.4960171438555535, "learning_rate": 9.737938055987437e-06, "loss": 0.496, "step": 6457 }, { "epoch": 0.5795696753494425, "grad_norm": 0.4541190926030177, "learning_rate": 9.737771203969716e-06, "loss": 0.4119, "step": 6458 }, { "epoch": 0.5796594198021134, "grad_norm": 0.5168475572959534, "learning_rate": 9.737604300282815e-06, "loss": 0.492, "step": 6459 }, { "epoch": 0.5797491642547845, "grad_norm": 0.4952605333364635, "learning_rate": 9.737437344928557e-06, "loss": 0.4968, "step": 6460 }, { "epoch": 0.5798389087074555, "grad_norm": 0.4869353207124946, "learning_rate": 9.737270337908765e-06, "loss": 0.4597, "step": 6461 }, { "epoch": 0.5799286531601265, "grad_norm": 0.4431175071143664, "learning_rate": 9.737103279225258e-06, "loss": 0.4135, "step": 6462 }, { "epoch": 0.5800183976127976, "grad_norm": 0.5313325850749527, "learning_rate": 9.736936168879859e-06, "loss": 0.5569, "step": 6463 }, { "epoch": 0.5801081420654686, "grad_norm": 0.44016713460391343, "learning_rate": 9.736769006874389e-06, "loss": 0.3931, "step": 6464 }, { "epoch": 0.5801978865181396, "grad_norm": 0.4641364285732085, "learning_rate": 9.736601793210673e-06, "loss": 0.4559, "step": 6465 }, { "epoch": 0.5802876309708106, "grad_norm": 0.5024398479896923, "learning_rate": 9.736434527890535e-06, "loss": 0.4992, "step": 6466 }, { "epoch": 0.5803773754234817, "grad_norm": 0.4833117907018218, "learning_rate": 9.736267210915795e-06, "loss": 0.4803, "step": 6467 }, { "epoch": 0.5804671198761526, "grad_norm": 0.5120799422068105, "learning_rate": 9.736099842288282e-06, "loss": 0.5113, "step": 6468 }, { "epoch": 0.5805568643288237, "grad_norm": 0.4543805124150115, "learning_rate": 9.735932422009819e-06, "loss": 0.3944, "step": 6469 }, { "epoch": 0.5806466087814947, "grad_norm": 0.4547804217625555, "learning_rate": 9.735764950082234e-06, "loss": 0.408, "step": 6470 }, { "epoch": 0.5807363532341657, "grad_norm": 0.4733686429445591, "learning_rate": 9.735597426507353e-06, "loss": 0.4398, "step": 6471 }, { "epoch": 0.5808260976868367, "grad_norm": 0.45623789223514083, "learning_rate": 9.735429851287e-06, "loss": 0.4271, "step": 6472 }, { "epoch": 0.5809158421395078, "grad_norm": 0.46279856964093574, "learning_rate": 9.735262224423005e-06, "loss": 0.4689, "step": 6473 }, { "epoch": 0.5810055865921788, "grad_norm": 0.48523186933636325, "learning_rate": 9.735094545917196e-06, "loss": 0.457, "step": 6474 }, { "epoch": 0.5810953310448498, "grad_norm": 0.4663774172699648, "learning_rate": 9.734926815771402e-06, "loss": 0.455, "step": 6475 }, { "epoch": 0.5811850754975209, "grad_norm": 0.48447319154118595, "learning_rate": 9.73475903398745e-06, "loss": 0.5006, "step": 6476 }, { "epoch": 0.5812748199501918, "grad_norm": 0.5257417117868592, "learning_rate": 9.734591200567172e-06, "loss": 0.5119, "step": 6477 }, { "epoch": 0.5813645644028629, "grad_norm": 0.5010425934582656, "learning_rate": 9.734423315512398e-06, "loss": 0.451, "step": 6478 }, { "epoch": 0.5814543088555338, "grad_norm": 0.45407257506812626, "learning_rate": 9.73425537882496e-06, "loss": 0.4205, "step": 6479 }, { "epoch": 0.5815440533082049, "grad_norm": 0.5137717202830984, "learning_rate": 9.734087390506687e-06, "loss": 0.467, "step": 6480 }, { "epoch": 0.5816337977608759, "grad_norm": 0.4856616797470038, "learning_rate": 9.73391935055941e-06, "loss": 0.4939, "step": 6481 }, { "epoch": 0.5817235422135469, "grad_norm": 0.5344670995756413, "learning_rate": 9.733751258984966e-06, "loss": 0.4905, "step": 6482 }, { "epoch": 0.5818132866662179, "grad_norm": 0.47758534970512434, "learning_rate": 9.733583115785189e-06, "loss": 0.4388, "step": 6483 }, { "epoch": 0.581903031118889, "grad_norm": 0.45565799363448933, "learning_rate": 9.733414920961906e-06, "loss": 0.3852, "step": 6484 }, { "epoch": 0.5819927755715599, "grad_norm": 0.585928558909007, "learning_rate": 9.733246674516955e-06, "loss": 0.5674, "step": 6485 }, { "epoch": 0.582082520024231, "grad_norm": 0.49662143954654714, "learning_rate": 9.733078376452172e-06, "loss": 0.4755, "step": 6486 }, { "epoch": 0.5821722644769021, "grad_norm": 0.4885332808536557, "learning_rate": 9.73291002676939e-06, "loss": 0.4627, "step": 6487 }, { "epoch": 0.582262008929573, "grad_norm": 0.49167973421952077, "learning_rate": 9.732741625470448e-06, "loss": 0.4107, "step": 6488 }, { "epoch": 0.5823517533822441, "grad_norm": 0.49402323100907974, "learning_rate": 9.732573172557181e-06, "loss": 0.492, "step": 6489 }, { "epoch": 0.5824414978349151, "grad_norm": 0.5819622550982575, "learning_rate": 9.732404668031424e-06, "loss": 0.4608, "step": 6490 }, { "epoch": 0.5825312422875861, "grad_norm": 0.47406689685218384, "learning_rate": 9.732236111895017e-06, "loss": 0.4493, "step": 6491 }, { "epoch": 0.5826209867402571, "grad_norm": 0.5126912453264031, "learning_rate": 9.7320675041498e-06, "loss": 0.5063, "step": 6492 }, { "epoch": 0.5827107311929282, "grad_norm": 0.5074532235627225, "learning_rate": 9.731898844797609e-06, "loss": 0.451, "step": 6493 }, { "epoch": 0.5828004756455991, "grad_norm": 0.4992365428628801, "learning_rate": 9.731730133840283e-06, "loss": 0.4958, "step": 6494 }, { "epoch": 0.5828902200982702, "grad_norm": 0.4694613817218991, "learning_rate": 9.731561371279664e-06, "loss": 0.4114, "step": 6495 }, { "epoch": 0.5829799645509411, "grad_norm": 0.5820428198666937, "learning_rate": 9.731392557117589e-06, "loss": 0.5028, "step": 6496 }, { "epoch": 0.5830697090036122, "grad_norm": 0.45885267028666255, "learning_rate": 9.731223691355905e-06, "loss": 0.4497, "step": 6497 }, { "epoch": 0.5831594534562833, "grad_norm": 0.5069939712833412, "learning_rate": 9.731054773996449e-06, "loss": 0.5241, "step": 6498 }, { "epoch": 0.5832491979089542, "grad_norm": 0.5159476526476021, "learning_rate": 9.730885805041065e-06, "loss": 0.5236, "step": 6499 }, { "epoch": 0.5833389423616253, "grad_norm": 0.4834876556737801, "learning_rate": 9.730716784491592e-06, "loss": 0.4386, "step": 6500 }, { "epoch": 0.5834286868142963, "grad_norm": 0.5250546907425687, "learning_rate": 9.730547712349879e-06, "loss": 0.5414, "step": 6501 }, { "epoch": 0.5835184312669673, "grad_norm": 0.5389991151614478, "learning_rate": 9.730378588617766e-06, "loss": 0.5057, "step": 6502 }, { "epoch": 0.5836081757196383, "grad_norm": 0.5235357023841212, "learning_rate": 9.730209413297099e-06, "loss": 0.4964, "step": 6503 }, { "epoch": 0.5836979201723094, "grad_norm": 0.5004929302191675, "learning_rate": 9.730040186389723e-06, "loss": 0.4582, "step": 6504 }, { "epoch": 0.5837876646249803, "grad_norm": 0.4720594324878029, "learning_rate": 9.729870907897483e-06, "loss": 0.4195, "step": 6505 }, { "epoch": 0.5838774090776514, "grad_norm": 0.48408113524128177, "learning_rate": 9.729701577822226e-06, "loss": 0.4527, "step": 6506 }, { "epoch": 0.5839671535303224, "grad_norm": 0.5451649949553358, "learning_rate": 9.729532196165797e-06, "loss": 0.5406, "step": 6507 }, { "epoch": 0.5840568979829934, "grad_norm": 0.46842066434588553, "learning_rate": 9.729362762930046e-06, "loss": 0.4508, "step": 6508 }, { "epoch": 0.5841466424356645, "grad_norm": 0.471875517908175, "learning_rate": 9.729193278116818e-06, "loss": 0.4256, "step": 6509 }, { "epoch": 0.5842363868883355, "grad_norm": 0.4538880805705492, "learning_rate": 9.72902374172796e-06, "loss": 0.4288, "step": 6510 }, { "epoch": 0.5843261313410065, "grad_norm": 0.5156005337629759, "learning_rate": 9.728854153765327e-06, "loss": 0.4904, "step": 6511 }, { "epoch": 0.5844158757936775, "grad_norm": 0.5230771559911106, "learning_rate": 9.728684514230763e-06, "loss": 0.5013, "step": 6512 }, { "epoch": 0.5845056202463486, "grad_norm": 0.486992757351222, "learning_rate": 9.72851482312612e-06, "loss": 0.4875, "step": 6513 }, { "epoch": 0.5845953646990195, "grad_norm": 0.5175145801709644, "learning_rate": 9.72834508045325e-06, "loss": 0.514, "step": 6514 }, { "epoch": 0.5846851091516906, "grad_norm": 0.5172611352311316, "learning_rate": 9.728175286214001e-06, "loss": 0.4985, "step": 6515 }, { "epoch": 0.5847748536043615, "grad_norm": 0.4984583352576696, "learning_rate": 9.728005440410227e-06, "loss": 0.4832, "step": 6516 }, { "epoch": 0.5848645980570326, "grad_norm": 0.5154922152483731, "learning_rate": 9.727835543043779e-06, "loss": 0.4886, "step": 6517 }, { "epoch": 0.5849543425097036, "grad_norm": 0.4690091518962599, "learning_rate": 9.727665594116511e-06, "loss": 0.4393, "step": 6518 }, { "epoch": 0.5850440869623746, "grad_norm": 0.48679246163797696, "learning_rate": 9.727495593630276e-06, "loss": 0.4678, "step": 6519 }, { "epoch": 0.5851338314150456, "grad_norm": 0.488956701598387, "learning_rate": 9.72732554158693e-06, "loss": 0.443, "step": 6520 }, { "epoch": 0.5852235758677167, "grad_norm": 0.49766189043180625, "learning_rate": 9.727155437988322e-06, "loss": 0.5182, "step": 6521 }, { "epoch": 0.5853133203203877, "grad_norm": 0.5988268883074175, "learning_rate": 9.726985282836315e-06, "loss": 0.541, "step": 6522 }, { "epoch": 0.5854030647730587, "grad_norm": 0.4627997469479353, "learning_rate": 9.726815076132757e-06, "loss": 0.4364, "step": 6523 }, { "epoch": 0.5854928092257298, "grad_norm": 0.501871485452853, "learning_rate": 9.72664481787951e-06, "loss": 0.4913, "step": 6524 }, { "epoch": 0.5855825536784007, "grad_norm": 0.48232138188691304, "learning_rate": 9.726474508078425e-06, "loss": 0.4294, "step": 6525 }, { "epoch": 0.5856722981310718, "grad_norm": 0.5069435792899555, "learning_rate": 9.726304146731366e-06, "loss": 0.4832, "step": 6526 }, { "epoch": 0.5857620425837428, "grad_norm": 0.4569691066159204, "learning_rate": 9.726133733840186e-06, "loss": 0.42, "step": 6527 }, { "epoch": 0.5858517870364138, "grad_norm": 0.47024005545574754, "learning_rate": 9.725963269406747e-06, "loss": 0.4367, "step": 6528 }, { "epoch": 0.5859415314890848, "grad_norm": 0.4639297335611032, "learning_rate": 9.725792753432904e-06, "loss": 0.4786, "step": 6529 }, { "epoch": 0.5860312759417559, "grad_norm": 0.528698026469412, "learning_rate": 9.72562218592052e-06, "loss": 0.5272, "step": 6530 }, { "epoch": 0.5861210203944268, "grad_norm": 0.4673468466478699, "learning_rate": 9.725451566871453e-06, "loss": 0.4402, "step": 6531 }, { "epoch": 0.5862107648470979, "grad_norm": 0.4835175454421887, "learning_rate": 9.725280896287566e-06, "loss": 0.4827, "step": 6532 }, { "epoch": 0.586300509299769, "grad_norm": 0.5217048961951982, "learning_rate": 9.72511017417072e-06, "loss": 0.5114, "step": 6533 }, { "epoch": 0.5863902537524399, "grad_norm": 0.5111311401159028, "learning_rate": 9.724939400522774e-06, "loss": 0.5016, "step": 6534 }, { "epoch": 0.586479998205111, "grad_norm": 0.5124898933421347, "learning_rate": 9.724768575345592e-06, "loss": 0.4571, "step": 6535 }, { "epoch": 0.586569742657782, "grad_norm": 0.4607016782175657, "learning_rate": 9.724597698641039e-06, "loss": 0.441, "step": 6536 }, { "epoch": 0.586659487110453, "grad_norm": 0.4878169824372754, "learning_rate": 9.724426770410976e-06, "loss": 0.483, "step": 6537 }, { "epoch": 0.586749231563124, "grad_norm": 0.5389943792194825, "learning_rate": 9.724255790657268e-06, "loss": 0.5495, "step": 6538 }, { "epoch": 0.586838976015795, "grad_norm": 0.4830086422952544, "learning_rate": 9.72408475938178e-06, "loss": 0.4492, "step": 6539 }, { "epoch": 0.586928720468466, "grad_norm": 0.5042921439902104, "learning_rate": 9.723913676586377e-06, "loss": 0.4853, "step": 6540 }, { "epoch": 0.5870184649211371, "grad_norm": 0.4841999761406178, "learning_rate": 9.723742542272925e-06, "loss": 0.4635, "step": 6541 }, { "epoch": 0.587108209373808, "grad_norm": 0.4625536864000973, "learning_rate": 9.723571356443289e-06, "loss": 0.4374, "step": 6542 }, { "epoch": 0.5871979538264791, "grad_norm": 0.5049616710856819, "learning_rate": 9.723400119099338e-06, "loss": 0.4893, "step": 6543 }, { "epoch": 0.5872876982791502, "grad_norm": 0.46584305191466924, "learning_rate": 9.723228830242937e-06, "loss": 0.4448, "step": 6544 }, { "epoch": 0.5873774427318211, "grad_norm": 0.4868934516466521, "learning_rate": 9.723057489875956e-06, "loss": 0.4528, "step": 6545 }, { "epoch": 0.5874671871844922, "grad_norm": 0.44686288737407437, "learning_rate": 9.722886098000263e-06, "loss": 0.4306, "step": 6546 }, { "epoch": 0.5875569316371632, "grad_norm": 0.5172678835587566, "learning_rate": 9.722714654617729e-06, "loss": 0.4516, "step": 6547 }, { "epoch": 0.5876466760898342, "grad_norm": 0.48186766060974245, "learning_rate": 9.72254315973022e-06, "loss": 0.4487, "step": 6548 }, { "epoch": 0.5877364205425052, "grad_norm": 0.5121972191775604, "learning_rate": 9.722371613339609e-06, "loss": 0.4995, "step": 6549 }, { "epoch": 0.5878261649951763, "grad_norm": 0.464055136589305, "learning_rate": 9.722200015447767e-06, "loss": 0.4544, "step": 6550 }, { "epoch": 0.5879159094478472, "grad_norm": 0.49498898609252334, "learning_rate": 9.722028366056563e-06, "loss": 0.4981, "step": 6551 }, { "epoch": 0.5880056539005183, "grad_norm": 0.43987799333058986, "learning_rate": 9.721856665167872e-06, "loss": 0.4081, "step": 6552 }, { "epoch": 0.5880953983531892, "grad_norm": 0.49861677780259805, "learning_rate": 9.721684912783565e-06, "loss": 0.4718, "step": 6553 }, { "epoch": 0.5881851428058603, "grad_norm": 0.4481621653042754, "learning_rate": 9.721513108905513e-06, "loss": 0.45, "step": 6554 }, { "epoch": 0.5882748872585314, "grad_norm": 0.470835291030288, "learning_rate": 9.721341253535594e-06, "loss": 0.4462, "step": 6555 }, { "epoch": 0.5883646317112023, "grad_norm": 0.4820166384047361, "learning_rate": 9.721169346675679e-06, "loss": 0.3971, "step": 6556 }, { "epoch": 0.5884543761638734, "grad_norm": 0.5537956218385232, "learning_rate": 9.720997388327644e-06, "loss": 0.5401, "step": 6557 }, { "epoch": 0.5885441206165444, "grad_norm": 0.44929694788298774, "learning_rate": 9.720825378493364e-06, "loss": 0.4218, "step": 6558 }, { "epoch": 0.5886338650692154, "grad_norm": 0.5220358130430861, "learning_rate": 9.720653317174716e-06, "loss": 0.5118, "step": 6559 }, { "epoch": 0.5887236095218864, "grad_norm": 0.4925824294288539, "learning_rate": 9.720481204373574e-06, "loss": 0.4317, "step": 6560 }, { "epoch": 0.5888133539745575, "grad_norm": 0.46825673288482694, "learning_rate": 9.720309040091818e-06, "loss": 0.4575, "step": 6561 }, { "epoch": 0.5889030984272284, "grad_norm": 0.5330090357356968, "learning_rate": 9.720136824331323e-06, "loss": 0.514, "step": 6562 }, { "epoch": 0.5889928428798995, "grad_norm": 0.5089151602786759, "learning_rate": 9.719964557093967e-06, "loss": 0.5088, "step": 6563 }, { "epoch": 0.5890825873325705, "grad_norm": 0.48112710665313463, "learning_rate": 9.719792238381634e-06, "loss": 0.4688, "step": 6564 }, { "epoch": 0.5891723317852415, "grad_norm": 0.5377587796434583, "learning_rate": 9.719619868196195e-06, "loss": 0.4739, "step": 6565 }, { "epoch": 0.5892620762379125, "grad_norm": 0.4609557902112239, "learning_rate": 9.719447446539536e-06, "loss": 0.4038, "step": 6566 }, { "epoch": 0.5893518206905836, "grad_norm": 0.48978907930951693, "learning_rate": 9.719274973413537e-06, "loss": 0.4476, "step": 6567 }, { "epoch": 0.5894415651432546, "grad_norm": 0.48530867055965166, "learning_rate": 9.719102448820076e-06, "loss": 0.4979, "step": 6568 }, { "epoch": 0.5895313095959256, "grad_norm": 0.49136584824841084, "learning_rate": 9.718929872761036e-06, "loss": 0.4785, "step": 6569 }, { "epoch": 0.5896210540485967, "grad_norm": 0.49303321763523317, "learning_rate": 9.718757245238298e-06, "loss": 0.4778, "step": 6570 }, { "epoch": 0.5897107985012676, "grad_norm": 0.5125946201041599, "learning_rate": 9.718584566253746e-06, "loss": 0.4739, "step": 6571 }, { "epoch": 0.5898005429539387, "grad_norm": 0.5143533286830273, "learning_rate": 9.718411835809264e-06, "loss": 0.4756, "step": 6572 }, { "epoch": 0.5898902874066096, "grad_norm": 0.5031246453447507, "learning_rate": 9.718239053906735e-06, "loss": 0.4544, "step": 6573 }, { "epoch": 0.5899800318592807, "grad_norm": 0.45902006891300706, "learning_rate": 9.718066220548041e-06, "loss": 0.413, "step": 6574 }, { "epoch": 0.5900697763119517, "grad_norm": 0.5221782001830358, "learning_rate": 9.717893335735071e-06, "loss": 0.5011, "step": 6575 }, { "epoch": 0.5901595207646227, "grad_norm": 0.4527440270911532, "learning_rate": 9.717720399469708e-06, "loss": 0.4329, "step": 6576 }, { "epoch": 0.5902492652172937, "grad_norm": 0.4693482516972479, "learning_rate": 9.717547411753837e-06, "loss": 0.4156, "step": 6577 }, { "epoch": 0.5903390096699648, "grad_norm": 0.5150601075634522, "learning_rate": 9.717374372589346e-06, "loss": 0.5219, "step": 6578 }, { "epoch": 0.5904287541226358, "grad_norm": 0.4323555007634747, "learning_rate": 9.717201281978124e-06, "loss": 0.4082, "step": 6579 }, { "epoch": 0.5905184985753068, "grad_norm": 0.4943233322679262, "learning_rate": 9.717028139922054e-06, "loss": 0.4895, "step": 6580 }, { "epoch": 0.5906082430279779, "grad_norm": 0.5220137335889768, "learning_rate": 9.716854946423029e-06, "loss": 0.5111, "step": 6581 }, { "epoch": 0.5906979874806488, "grad_norm": 0.5062741716530728, "learning_rate": 9.716681701482935e-06, "loss": 0.5333, "step": 6582 }, { "epoch": 0.5907877319333199, "grad_norm": 0.47685249583194594, "learning_rate": 9.716508405103662e-06, "loss": 0.4554, "step": 6583 }, { "epoch": 0.5908774763859909, "grad_norm": 0.5033860210073033, "learning_rate": 9.716335057287101e-06, "loss": 0.4948, "step": 6584 }, { "epoch": 0.5909672208386619, "grad_norm": 0.5012622704851007, "learning_rate": 9.716161658035142e-06, "loss": 0.4816, "step": 6585 }, { "epoch": 0.5910569652913329, "grad_norm": 0.49118170794369137, "learning_rate": 9.715988207349673e-06, "loss": 0.4646, "step": 6586 }, { "epoch": 0.591146709744004, "grad_norm": 0.5091737573891494, "learning_rate": 9.71581470523259e-06, "loss": 0.4327, "step": 6587 }, { "epoch": 0.5912364541966749, "grad_norm": 0.4744563254139655, "learning_rate": 9.715641151685784e-06, "loss": 0.4228, "step": 6588 }, { "epoch": 0.591326198649346, "grad_norm": 0.49860390790261233, "learning_rate": 9.715467546711146e-06, "loss": 0.4982, "step": 6589 }, { "epoch": 0.5914159431020171, "grad_norm": 0.4634905429999526, "learning_rate": 9.71529389031057e-06, "loss": 0.4328, "step": 6590 }, { "epoch": 0.591505687554688, "grad_norm": 0.4403144964620733, "learning_rate": 9.715120182485951e-06, "loss": 0.4246, "step": 6591 }, { "epoch": 0.5915954320073591, "grad_norm": 0.46478883975611124, "learning_rate": 9.714946423239184e-06, "loss": 0.448, "step": 6592 }, { "epoch": 0.59168517646003, "grad_norm": 0.5028361481527921, "learning_rate": 9.714772612572161e-06, "loss": 0.4726, "step": 6593 }, { "epoch": 0.5917749209127011, "grad_norm": 0.5259812561548736, "learning_rate": 9.714598750486781e-06, "loss": 0.4928, "step": 6594 }, { "epoch": 0.5918646653653721, "grad_norm": 0.4862422860032641, "learning_rate": 9.714424836984936e-06, "loss": 0.5035, "step": 6595 }, { "epoch": 0.5919544098180431, "grad_norm": 0.4795884736015654, "learning_rate": 9.714250872068527e-06, "loss": 0.4772, "step": 6596 }, { "epoch": 0.5920441542707141, "grad_norm": 0.5404758946826437, "learning_rate": 9.71407685573945e-06, "loss": 0.5368, "step": 6597 }, { "epoch": 0.5921338987233852, "grad_norm": 0.4785390641248026, "learning_rate": 9.7139027879996e-06, "loss": 0.4559, "step": 6598 }, { "epoch": 0.5922236431760561, "grad_norm": 0.4850490410321507, "learning_rate": 9.71372866885088e-06, "loss": 0.4531, "step": 6599 }, { "epoch": 0.5923133876287272, "grad_norm": 0.44780440623497403, "learning_rate": 9.713554498295184e-06, "loss": 0.4568, "step": 6600 }, { "epoch": 0.5924031320813982, "grad_norm": 0.482686537284575, "learning_rate": 9.713380276334414e-06, "loss": 0.4849, "step": 6601 }, { "epoch": 0.5924928765340692, "grad_norm": 0.44517640885260845, "learning_rate": 9.71320600297047e-06, "loss": 0.4041, "step": 6602 }, { "epoch": 0.5925826209867403, "grad_norm": 0.5184535794710702, "learning_rate": 9.713031678205252e-06, "loss": 0.4994, "step": 6603 }, { "epoch": 0.5926723654394113, "grad_norm": 0.4695264266763651, "learning_rate": 9.712857302040664e-06, "loss": 0.4592, "step": 6604 }, { "epoch": 0.5927621098920823, "grad_norm": 0.4625929318244123, "learning_rate": 9.712682874478602e-06, "loss": 0.3947, "step": 6605 }, { "epoch": 0.5928518543447533, "grad_norm": 0.5321865807553328, "learning_rate": 9.712508395520975e-06, "loss": 0.5167, "step": 6606 }, { "epoch": 0.5929415987974244, "grad_norm": 0.4974715041836243, "learning_rate": 9.712333865169678e-06, "loss": 0.4213, "step": 6607 }, { "epoch": 0.5930313432500953, "grad_norm": 0.47584647724668794, "learning_rate": 9.712159283426624e-06, "loss": 0.4401, "step": 6608 }, { "epoch": 0.5931210877027664, "grad_norm": 0.4685463358944813, "learning_rate": 9.711984650293708e-06, "loss": 0.4621, "step": 6609 }, { "epoch": 0.5932108321554374, "grad_norm": 0.4756906954136531, "learning_rate": 9.711809965772838e-06, "loss": 0.456, "step": 6610 }, { "epoch": 0.5933005766081084, "grad_norm": 0.5150968360139702, "learning_rate": 9.711635229865922e-06, "loss": 0.4853, "step": 6611 }, { "epoch": 0.5933903210607794, "grad_norm": 0.5717755133880177, "learning_rate": 9.71146044257486e-06, "loss": 0.525, "step": 6612 }, { "epoch": 0.5934800655134504, "grad_norm": 0.4696454259247719, "learning_rate": 9.711285603901564e-06, "loss": 0.4625, "step": 6613 }, { "epoch": 0.5935698099661215, "grad_norm": 0.4721019318140628, "learning_rate": 9.711110713847936e-06, "loss": 0.4443, "step": 6614 }, { "epoch": 0.5936595544187925, "grad_norm": 0.4992780642025271, "learning_rate": 9.710935772415887e-06, "loss": 0.4849, "step": 6615 }, { "epoch": 0.5937492988714635, "grad_norm": 0.4872058652341808, "learning_rate": 9.71076077960732e-06, "loss": 0.5144, "step": 6616 }, { "epoch": 0.5938390433241345, "grad_norm": 0.5085414967846023, "learning_rate": 9.71058573542415e-06, "loss": 0.4723, "step": 6617 }, { "epoch": 0.5939287877768056, "grad_norm": 0.5156238374580944, "learning_rate": 9.71041063986828e-06, "loss": 0.5116, "step": 6618 }, { "epoch": 0.5940185322294765, "grad_norm": 0.49158449985413777, "learning_rate": 9.710235492941623e-06, "loss": 0.5024, "step": 6619 }, { "epoch": 0.5941082766821476, "grad_norm": 0.46200119200779943, "learning_rate": 9.710060294646089e-06, "loss": 0.412, "step": 6620 }, { "epoch": 0.5941980211348186, "grad_norm": 0.4964857827664269, "learning_rate": 9.709885044983586e-06, "loss": 0.4252, "step": 6621 }, { "epoch": 0.5942877655874896, "grad_norm": 0.49052594708276653, "learning_rate": 9.709709743956028e-06, "loss": 0.5082, "step": 6622 }, { "epoch": 0.5943775100401606, "grad_norm": 0.5128944798481803, "learning_rate": 9.709534391565327e-06, "loss": 0.5177, "step": 6623 }, { "epoch": 0.5944672544928317, "grad_norm": 0.4481976644732236, "learning_rate": 9.709358987813393e-06, "loss": 0.4364, "step": 6624 }, { "epoch": 0.5945569989455027, "grad_norm": 0.5192765874229369, "learning_rate": 9.70918353270214e-06, "loss": 0.5406, "step": 6625 }, { "epoch": 0.5946467433981737, "grad_norm": 0.5150031351908819, "learning_rate": 9.709008026233483e-06, "loss": 0.5169, "step": 6626 }, { "epoch": 0.5947364878508448, "grad_norm": 0.47746661354153086, "learning_rate": 9.708832468409333e-06, "loss": 0.4447, "step": 6627 }, { "epoch": 0.5948262323035157, "grad_norm": 0.5050057239688444, "learning_rate": 9.70865685923161e-06, "loss": 0.4977, "step": 6628 }, { "epoch": 0.5949159767561868, "grad_norm": 0.49060392242014184, "learning_rate": 9.708481198702222e-06, "loss": 0.4597, "step": 6629 }, { "epoch": 0.5950057212088578, "grad_norm": 0.4923121943227129, "learning_rate": 9.708305486823088e-06, "loss": 0.4736, "step": 6630 }, { "epoch": 0.5950954656615288, "grad_norm": 0.4830950083295224, "learning_rate": 9.708129723596125e-06, "loss": 0.4247, "step": 6631 }, { "epoch": 0.5951852101141998, "grad_norm": 0.48691781726620237, "learning_rate": 9.70795390902325e-06, "loss": 0.4169, "step": 6632 }, { "epoch": 0.5952749545668709, "grad_norm": 0.5144878445173275, "learning_rate": 9.70777804310638e-06, "loss": 0.545, "step": 6633 }, { "epoch": 0.5953646990195418, "grad_norm": 0.4974657901035809, "learning_rate": 9.707602125847431e-06, "loss": 0.4133, "step": 6634 }, { "epoch": 0.5954544434722129, "grad_norm": 0.4783092727020271, "learning_rate": 9.707426157248325e-06, "loss": 0.4274, "step": 6635 }, { "epoch": 0.5955441879248838, "grad_norm": 0.4690166277473537, "learning_rate": 9.707250137310979e-06, "loss": 0.4227, "step": 6636 }, { "epoch": 0.5956339323775549, "grad_norm": 0.46758728761796187, "learning_rate": 9.707074066037312e-06, "loss": 0.4438, "step": 6637 }, { "epoch": 0.595723676830226, "grad_norm": 0.5031482458551426, "learning_rate": 9.706897943429246e-06, "loss": 0.5183, "step": 6638 }, { "epoch": 0.5958134212828969, "grad_norm": 0.5081468942620712, "learning_rate": 9.7067217694887e-06, "loss": 0.4552, "step": 6639 }, { "epoch": 0.595903165735568, "grad_norm": 0.49198508185066653, "learning_rate": 9.706545544217595e-06, "loss": 0.4269, "step": 6640 }, { "epoch": 0.595992910188239, "grad_norm": 0.5230063087887311, "learning_rate": 9.706369267617857e-06, "loss": 0.4921, "step": 6641 }, { "epoch": 0.59608265464091, "grad_norm": 0.5390638877812672, "learning_rate": 9.706192939691403e-06, "loss": 0.5137, "step": 6642 }, { "epoch": 0.596172399093581, "grad_norm": 0.45028073724531625, "learning_rate": 9.70601656044016e-06, "loss": 0.4602, "step": 6643 }, { "epoch": 0.5962621435462521, "grad_norm": 0.4619986259689815, "learning_rate": 9.705840129866048e-06, "loss": 0.4235, "step": 6644 }, { "epoch": 0.596351887998923, "grad_norm": 0.5054301340442601, "learning_rate": 9.705663647970995e-06, "loss": 0.4831, "step": 6645 }, { "epoch": 0.5964416324515941, "grad_norm": 0.4295060904697192, "learning_rate": 9.705487114756925e-06, "loss": 0.3652, "step": 6646 }, { "epoch": 0.596531376904265, "grad_norm": 0.5532223449985348, "learning_rate": 9.70531053022576e-06, "loss": 0.563, "step": 6647 }, { "epoch": 0.5966211213569361, "grad_norm": 0.529552430263471, "learning_rate": 9.705133894379428e-06, "loss": 0.4791, "step": 6648 }, { "epoch": 0.5967108658096072, "grad_norm": 0.4699482976275198, "learning_rate": 9.704957207219856e-06, "loss": 0.4117, "step": 6649 }, { "epoch": 0.5968006102622782, "grad_norm": 0.4677988770430738, "learning_rate": 9.704780468748969e-06, "loss": 0.4547, "step": 6650 }, { "epoch": 0.5968903547149492, "grad_norm": 0.5108687740879514, "learning_rate": 9.704603678968697e-06, "loss": 0.4923, "step": 6651 }, { "epoch": 0.5969800991676202, "grad_norm": 0.5093333594597356, "learning_rate": 9.704426837880966e-06, "loss": 0.4697, "step": 6652 }, { "epoch": 0.5970698436202913, "grad_norm": 0.4957565891549718, "learning_rate": 9.704249945487705e-06, "loss": 0.4877, "step": 6653 }, { "epoch": 0.5971595880729622, "grad_norm": 0.5051733543773523, "learning_rate": 9.704073001790843e-06, "loss": 0.5118, "step": 6654 }, { "epoch": 0.5972493325256333, "grad_norm": 0.49896587502337764, "learning_rate": 9.703896006792311e-06, "loss": 0.4742, "step": 6655 }, { "epoch": 0.5973390769783042, "grad_norm": 0.48727655268739484, "learning_rate": 9.703718960494038e-06, "loss": 0.4892, "step": 6656 }, { "epoch": 0.5974288214309753, "grad_norm": 0.49453220744697624, "learning_rate": 9.703541862897956e-06, "loss": 0.4513, "step": 6657 }, { "epoch": 0.5975185658836463, "grad_norm": 0.5174601923099943, "learning_rate": 9.703364714005993e-06, "loss": 0.5146, "step": 6658 }, { "epoch": 0.5976083103363173, "grad_norm": 0.4859261210997065, "learning_rate": 9.703187513820086e-06, "loss": 0.4812, "step": 6659 }, { "epoch": 0.5976980547889884, "grad_norm": 0.47948297642949816, "learning_rate": 9.703010262342164e-06, "loss": 0.4289, "step": 6660 }, { "epoch": 0.5977877992416594, "grad_norm": 0.461921963049214, "learning_rate": 9.702832959574162e-06, "loss": 0.4451, "step": 6661 }, { "epoch": 0.5978775436943304, "grad_norm": 0.512525417963172, "learning_rate": 9.702655605518012e-06, "loss": 0.5414, "step": 6662 }, { "epoch": 0.5979672881470014, "grad_norm": 0.5055681664659704, "learning_rate": 9.70247820017565e-06, "loss": 0.5091, "step": 6663 }, { "epoch": 0.5980570325996725, "grad_norm": 0.5565700578196389, "learning_rate": 9.702300743549006e-06, "loss": 0.5361, "step": 6664 }, { "epoch": 0.5981467770523434, "grad_norm": 0.544113667658664, "learning_rate": 9.702123235640024e-06, "loss": 0.5484, "step": 6665 }, { "epoch": 0.5982365215050145, "grad_norm": 0.532013231000769, "learning_rate": 9.701945676450631e-06, "loss": 0.4969, "step": 6666 }, { "epoch": 0.5983262659576855, "grad_norm": 0.4677257954083565, "learning_rate": 9.701768065982769e-06, "loss": 0.4456, "step": 6667 }, { "epoch": 0.5984160104103565, "grad_norm": 0.5130173880648048, "learning_rate": 9.701590404238373e-06, "loss": 0.5106, "step": 6668 }, { "epoch": 0.5985057548630275, "grad_norm": 0.5342794163006368, "learning_rate": 9.701412691219381e-06, "loss": 0.4772, "step": 6669 }, { "epoch": 0.5985954993156986, "grad_norm": 0.5015562304063863, "learning_rate": 9.701234926927732e-06, "loss": 0.4659, "step": 6670 }, { "epoch": 0.5986852437683695, "grad_norm": 0.47840380057388804, "learning_rate": 9.701057111365361e-06, "loss": 0.4509, "step": 6671 }, { "epoch": 0.5987749882210406, "grad_norm": 0.47180658224717975, "learning_rate": 9.700879244534212e-06, "loss": 0.4489, "step": 6672 }, { "epoch": 0.5988647326737117, "grad_norm": 0.5135235308712107, "learning_rate": 9.700701326436221e-06, "loss": 0.4589, "step": 6673 }, { "epoch": 0.5989544771263826, "grad_norm": 0.5359961154738382, "learning_rate": 9.70052335707333e-06, "loss": 0.5491, "step": 6674 }, { "epoch": 0.5990442215790537, "grad_norm": 0.4788559295473387, "learning_rate": 9.700345336447481e-06, "loss": 0.4163, "step": 6675 }, { "epoch": 0.5991339660317246, "grad_norm": 0.5142834064017938, "learning_rate": 9.700167264560614e-06, "loss": 0.4992, "step": 6676 }, { "epoch": 0.5992237104843957, "grad_norm": 0.46522268131717087, "learning_rate": 9.699989141414672e-06, "loss": 0.4391, "step": 6677 }, { "epoch": 0.5993134549370667, "grad_norm": 0.45661462311701473, "learning_rate": 9.699810967011595e-06, "loss": 0.4619, "step": 6678 }, { "epoch": 0.5994031993897377, "grad_norm": 0.4914069880206978, "learning_rate": 9.699632741353329e-06, "loss": 0.4415, "step": 6679 }, { "epoch": 0.5994929438424087, "grad_norm": 0.4699025194278011, "learning_rate": 9.699454464441816e-06, "loss": 0.4475, "step": 6680 }, { "epoch": 0.5995826882950798, "grad_norm": 0.4736690152381642, "learning_rate": 9.699276136279002e-06, "loss": 0.5029, "step": 6681 }, { "epoch": 0.5996724327477507, "grad_norm": 0.44382422125044124, "learning_rate": 9.699097756866829e-06, "loss": 0.3819, "step": 6682 }, { "epoch": 0.5997621772004218, "grad_norm": 0.41348545553059185, "learning_rate": 9.698919326207245e-06, "loss": 0.3664, "step": 6683 }, { "epoch": 0.5998519216530929, "grad_norm": 0.4831362882715823, "learning_rate": 9.698740844302195e-06, "loss": 0.475, "step": 6684 }, { "epoch": 0.5999416661057638, "grad_norm": 0.5116768485773856, "learning_rate": 9.698562311153625e-06, "loss": 0.4465, "step": 6685 }, { "epoch": 0.6000314105584349, "grad_norm": 0.47022279823727425, "learning_rate": 9.698383726763485e-06, "loss": 0.4711, "step": 6686 }, { "epoch": 0.6001211550111059, "grad_norm": 0.47524077200334974, "learning_rate": 9.698205091133718e-06, "loss": 0.4546, "step": 6687 }, { "epoch": 0.6002108994637769, "grad_norm": 0.4886970815966349, "learning_rate": 9.698026404266275e-06, "loss": 0.4756, "step": 6688 }, { "epoch": 0.6003006439164479, "grad_norm": 0.4379788493629613, "learning_rate": 9.697847666163103e-06, "loss": 0.3894, "step": 6689 }, { "epoch": 0.600390388369119, "grad_norm": 0.4969424757076003, "learning_rate": 9.697668876826153e-06, "loss": 0.4795, "step": 6690 }, { "epoch": 0.6004801328217899, "grad_norm": 0.42023713917043876, "learning_rate": 9.697490036257375e-06, "loss": 0.3883, "step": 6691 }, { "epoch": 0.600569877274461, "grad_norm": 0.48011784722541817, "learning_rate": 9.69731114445872e-06, "loss": 0.4737, "step": 6692 }, { "epoch": 0.6006596217271319, "grad_norm": 0.5331282910832698, "learning_rate": 9.697132201432133e-06, "loss": 0.5235, "step": 6693 }, { "epoch": 0.600749366179803, "grad_norm": 0.44867115775583327, "learning_rate": 9.696953207179573e-06, "loss": 0.4306, "step": 6694 }, { "epoch": 0.6008391106324741, "grad_norm": 0.5102032174338595, "learning_rate": 9.696774161702988e-06, "loss": 0.5207, "step": 6695 }, { "epoch": 0.600928855085145, "grad_norm": 0.4906734183199204, "learning_rate": 9.696595065004333e-06, "loss": 0.4497, "step": 6696 }, { "epoch": 0.6010185995378161, "grad_norm": 0.4468158606007009, "learning_rate": 9.69641591708556e-06, "loss": 0.4232, "step": 6697 }, { "epoch": 0.6011083439904871, "grad_norm": 0.4743155550375821, "learning_rate": 9.696236717948622e-06, "loss": 0.463, "step": 6698 }, { "epoch": 0.6011980884431581, "grad_norm": 0.4683341897134594, "learning_rate": 9.696057467595475e-06, "loss": 0.4016, "step": 6699 }, { "epoch": 0.6012878328958291, "grad_norm": 0.4975809211388592, "learning_rate": 9.695878166028071e-06, "loss": 0.5318, "step": 6700 }, { "epoch": 0.6013775773485002, "grad_norm": 0.4883796390831829, "learning_rate": 9.695698813248368e-06, "loss": 0.4947, "step": 6701 }, { "epoch": 0.6014673218011711, "grad_norm": 0.5254836652140442, "learning_rate": 9.695519409258322e-06, "loss": 0.5353, "step": 6702 }, { "epoch": 0.6015570662538422, "grad_norm": 0.4893095910446923, "learning_rate": 9.695339954059889e-06, "loss": 0.4555, "step": 6703 }, { "epoch": 0.6016468107065132, "grad_norm": 0.47775449821410876, "learning_rate": 9.695160447655026e-06, "loss": 0.4735, "step": 6704 }, { "epoch": 0.6017365551591842, "grad_norm": 0.46708600427191754, "learning_rate": 9.694980890045688e-06, "loss": 0.4585, "step": 6705 }, { "epoch": 0.6018262996118553, "grad_norm": 0.4430215291985726, "learning_rate": 9.69480128123384e-06, "loss": 0.3803, "step": 6706 }, { "epoch": 0.6019160440645263, "grad_norm": 0.5130963472378947, "learning_rate": 9.694621621221432e-06, "loss": 0.5101, "step": 6707 }, { "epoch": 0.6020057885171973, "grad_norm": 0.4796600575174584, "learning_rate": 9.694441910010432e-06, "loss": 0.449, "step": 6708 }, { "epoch": 0.6020955329698683, "grad_norm": 0.5109656145078341, "learning_rate": 9.694262147602793e-06, "loss": 0.4639, "step": 6709 }, { "epoch": 0.6021852774225394, "grad_norm": 0.4638448522893214, "learning_rate": 9.69408233400048e-06, "loss": 0.4248, "step": 6710 }, { "epoch": 0.6022750218752103, "grad_norm": 0.4824685258331896, "learning_rate": 9.693902469205452e-06, "loss": 0.459, "step": 6711 }, { "epoch": 0.6023647663278814, "grad_norm": 0.5166019145163835, "learning_rate": 9.693722553219671e-06, "loss": 0.4866, "step": 6712 }, { "epoch": 0.6024545107805523, "grad_norm": 0.5102319238139839, "learning_rate": 9.693542586045097e-06, "loss": 0.5352, "step": 6713 }, { "epoch": 0.6025442552332234, "grad_norm": 0.47376013999397776, "learning_rate": 9.693362567683698e-06, "loss": 0.4174, "step": 6714 }, { "epoch": 0.6026339996858944, "grad_norm": 0.5114042977179024, "learning_rate": 9.693182498137432e-06, "loss": 0.4797, "step": 6715 }, { "epoch": 0.6027237441385654, "grad_norm": 0.4563535852384355, "learning_rate": 9.693002377408265e-06, "loss": 0.4399, "step": 6716 }, { "epoch": 0.6028134885912364, "grad_norm": 0.4803224294928545, "learning_rate": 9.69282220549816e-06, "loss": 0.4359, "step": 6717 }, { "epoch": 0.6029032330439075, "grad_norm": 0.48579348270455386, "learning_rate": 9.692641982409085e-06, "loss": 0.4992, "step": 6718 }, { "epoch": 0.6029929774965785, "grad_norm": 0.47814691356909195, "learning_rate": 9.692461708143002e-06, "loss": 0.4845, "step": 6719 }, { "epoch": 0.6030827219492495, "grad_norm": 0.5354741420363325, "learning_rate": 9.69228138270188e-06, "loss": 0.4968, "step": 6720 }, { "epoch": 0.6031724664019206, "grad_norm": 0.46516806400979827, "learning_rate": 9.692101006087683e-06, "loss": 0.4022, "step": 6721 }, { "epoch": 0.6032622108545915, "grad_norm": 0.542701395738551, "learning_rate": 9.691920578302379e-06, "loss": 0.5231, "step": 6722 }, { "epoch": 0.6033519553072626, "grad_norm": 0.518496424021262, "learning_rate": 9.691740099347935e-06, "loss": 0.4836, "step": 6723 }, { "epoch": 0.6034416997599336, "grad_norm": 0.4543367577841053, "learning_rate": 9.691559569226324e-06, "loss": 0.4146, "step": 6724 }, { "epoch": 0.6035314442126046, "grad_norm": 0.46790310092756254, "learning_rate": 9.691378987939509e-06, "loss": 0.4327, "step": 6725 }, { "epoch": 0.6036211886652756, "grad_norm": 0.4496598774834368, "learning_rate": 9.69119835548946e-06, "loss": 0.3692, "step": 6726 }, { "epoch": 0.6037109331179467, "grad_norm": 0.45307334143661016, "learning_rate": 9.691017671878149e-06, "loss": 0.4313, "step": 6727 }, { "epoch": 0.6038006775706176, "grad_norm": 0.4535318477444745, "learning_rate": 9.690836937107546e-06, "loss": 0.4078, "step": 6728 }, { "epoch": 0.6038904220232887, "grad_norm": 0.49141327936420226, "learning_rate": 9.690656151179623e-06, "loss": 0.4735, "step": 6729 }, { "epoch": 0.6039801664759598, "grad_norm": 0.5187858687579215, "learning_rate": 9.69047531409635e-06, "loss": 0.5084, "step": 6730 }, { "epoch": 0.6040699109286307, "grad_norm": 0.47990982712322494, "learning_rate": 9.690294425859698e-06, "loss": 0.4204, "step": 6731 }, { "epoch": 0.6041596553813018, "grad_norm": 0.5358697148131903, "learning_rate": 9.690113486471645e-06, "loss": 0.5554, "step": 6732 }, { "epoch": 0.6042493998339727, "grad_norm": 0.513682715397858, "learning_rate": 9.689932495934158e-06, "loss": 0.4482, "step": 6733 }, { "epoch": 0.6043391442866438, "grad_norm": 0.4792685246777402, "learning_rate": 9.689751454249215e-06, "loss": 0.4528, "step": 6734 }, { "epoch": 0.6044288887393148, "grad_norm": 0.4821900540076651, "learning_rate": 9.689570361418788e-06, "loss": 0.4569, "step": 6735 }, { "epoch": 0.6045186331919858, "grad_norm": 0.4972997183784302, "learning_rate": 9.689389217444854e-06, "loss": 0.4818, "step": 6736 }, { "epoch": 0.6046083776446568, "grad_norm": 0.5019015949447808, "learning_rate": 9.689208022329387e-06, "loss": 0.455, "step": 6737 }, { "epoch": 0.6046981220973279, "grad_norm": 0.4504812480488154, "learning_rate": 9.689026776074362e-06, "loss": 0.4237, "step": 6738 }, { "epoch": 0.6047878665499988, "grad_norm": 0.4627549767393529, "learning_rate": 9.688845478681759e-06, "loss": 0.4121, "step": 6739 }, { "epoch": 0.6048776110026699, "grad_norm": 0.5079797150690926, "learning_rate": 9.688664130153554e-06, "loss": 0.4658, "step": 6740 }, { "epoch": 0.604967355455341, "grad_norm": 0.5075473311507859, "learning_rate": 9.688482730491722e-06, "loss": 0.4629, "step": 6741 }, { "epoch": 0.6050570999080119, "grad_norm": 0.4849728001064649, "learning_rate": 9.688301279698244e-06, "loss": 0.4872, "step": 6742 }, { "epoch": 0.605146844360683, "grad_norm": 0.5365669673274464, "learning_rate": 9.688119777775098e-06, "loss": 0.4978, "step": 6743 }, { "epoch": 0.605236588813354, "grad_norm": 0.49163919047158466, "learning_rate": 9.687938224724266e-06, "loss": 0.4108, "step": 6744 }, { "epoch": 0.605326333266025, "grad_norm": 0.49501764003341625, "learning_rate": 9.687756620547723e-06, "loss": 0.4671, "step": 6745 }, { "epoch": 0.605416077718696, "grad_norm": 0.460699982400342, "learning_rate": 9.687574965247455e-06, "loss": 0.4518, "step": 6746 }, { "epoch": 0.6055058221713671, "grad_norm": 0.47656651933149546, "learning_rate": 9.687393258825438e-06, "loss": 0.4162, "step": 6747 }, { "epoch": 0.605595566624038, "grad_norm": 0.5178152743002844, "learning_rate": 9.687211501283657e-06, "loss": 0.504, "step": 6748 }, { "epoch": 0.6056853110767091, "grad_norm": 0.46258744646342265, "learning_rate": 9.687029692624092e-06, "loss": 0.4277, "step": 6749 }, { "epoch": 0.60577505552938, "grad_norm": 0.4917340429237092, "learning_rate": 9.686847832848729e-06, "loss": 0.4854, "step": 6750 }, { "epoch": 0.6058647999820511, "grad_norm": 0.5452359380919566, "learning_rate": 9.686665921959548e-06, "loss": 0.5469, "step": 6751 }, { "epoch": 0.6059545444347221, "grad_norm": 0.4783345181256911, "learning_rate": 9.686483959958534e-06, "loss": 0.3951, "step": 6752 }, { "epoch": 0.6060442888873931, "grad_norm": 0.5195424545194244, "learning_rate": 9.686301946847674e-06, "loss": 0.5173, "step": 6753 }, { "epoch": 0.6061340333400642, "grad_norm": 0.4804930312671518, "learning_rate": 9.686119882628948e-06, "loss": 0.4337, "step": 6754 }, { "epoch": 0.6062237777927352, "grad_norm": 0.49598720304591587, "learning_rate": 9.685937767304346e-06, "loss": 0.4972, "step": 6755 }, { "epoch": 0.6063135222454062, "grad_norm": 0.4414913362303684, "learning_rate": 9.68575560087585e-06, "loss": 0.4073, "step": 6756 }, { "epoch": 0.6064032666980772, "grad_norm": 0.5071797634804347, "learning_rate": 9.68557338334545e-06, "loss": 0.5565, "step": 6757 }, { "epoch": 0.6064930111507483, "grad_norm": 0.487880569407278, "learning_rate": 9.685391114715133e-06, "loss": 0.4812, "step": 6758 }, { "epoch": 0.6065827556034192, "grad_norm": 0.5185864703583866, "learning_rate": 9.685208794986886e-06, "loss": 0.4747, "step": 6759 }, { "epoch": 0.6066725000560903, "grad_norm": 0.5065373491465052, "learning_rate": 9.685026424162696e-06, "loss": 0.4675, "step": 6760 }, { "epoch": 0.6067622445087613, "grad_norm": 0.5341927130362725, "learning_rate": 9.684844002244554e-06, "loss": 0.5232, "step": 6761 }, { "epoch": 0.6068519889614323, "grad_norm": 0.46130398282369595, "learning_rate": 9.68466152923445e-06, "loss": 0.4553, "step": 6762 }, { "epoch": 0.6069417334141033, "grad_norm": 0.501563182859308, "learning_rate": 9.684479005134371e-06, "loss": 0.4588, "step": 6763 }, { "epoch": 0.6070314778667744, "grad_norm": 0.48673601020153734, "learning_rate": 9.68429642994631e-06, "loss": 0.4395, "step": 6764 }, { "epoch": 0.6071212223194454, "grad_norm": 0.44967861208410315, "learning_rate": 9.684113803672255e-06, "loss": 0.4118, "step": 6765 }, { "epoch": 0.6072109667721164, "grad_norm": 0.5228397213521612, "learning_rate": 9.683931126314204e-06, "loss": 0.4346, "step": 6766 }, { "epoch": 0.6073007112247875, "grad_norm": 0.48747020669135727, "learning_rate": 9.683748397874142e-06, "loss": 0.4527, "step": 6767 }, { "epoch": 0.6073904556774584, "grad_norm": 0.44402116482270104, "learning_rate": 9.683565618354066e-06, "loss": 0.4249, "step": 6768 }, { "epoch": 0.6074802001301295, "grad_norm": 0.48713201945388723, "learning_rate": 9.683382787755969e-06, "loss": 0.4454, "step": 6769 }, { "epoch": 0.6075699445828004, "grad_norm": 0.5129438301996505, "learning_rate": 9.683199906081843e-06, "loss": 0.5144, "step": 6770 }, { "epoch": 0.6076596890354715, "grad_norm": 0.47931593292277874, "learning_rate": 9.683016973333687e-06, "loss": 0.4438, "step": 6771 }, { "epoch": 0.6077494334881425, "grad_norm": 0.500658351413707, "learning_rate": 9.682833989513491e-06, "loss": 0.4867, "step": 6772 }, { "epoch": 0.6078391779408135, "grad_norm": 0.4628739918461511, "learning_rate": 9.682650954623251e-06, "loss": 0.462, "step": 6773 }, { "epoch": 0.6079289223934845, "grad_norm": 0.4961007998487183, "learning_rate": 9.682467868664968e-06, "loss": 0.4703, "step": 6774 }, { "epoch": 0.6080186668461556, "grad_norm": 0.4894659339466327, "learning_rate": 9.682284731640632e-06, "loss": 0.487, "step": 6775 }, { "epoch": 0.6081084112988266, "grad_norm": 0.5079779737324002, "learning_rate": 9.682101543552246e-06, "loss": 0.4972, "step": 6776 }, { "epoch": 0.6081981557514976, "grad_norm": 0.492784230918585, "learning_rate": 9.681918304401805e-06, "loss": 0.4133, "step": 6777 }, { "epoch": 0.6082879002041687, "grad_norm": 0.532263959135084, "learning_rate": 9.681735014191307e-06, "loss": 0.4991, "step": 6778 }, { "epoch": 0.6083776446568396, "grad_norm": 0.5130108257687761, "learning_rate": 9.681551672922752e-06, "loss": 0.4719, "step": 6779 }, { "epoch": 0.6084673891095107, "grad_norm": 0.48976933046163346, "learning_rate": 9.681368280598139e-06, "loss": 0.4871, "step": 6780 }, { "epoch": 0.6085571335621817, "grad_norm": 0.47950266436065797, "learning_rate": 9.681184837219468e-06, "loss": 0.4785, "step": 6781 }, { "epoch": 0.6086468780148527, "grad_norm": 0.516663180283681, "learning_rate": 9.68100134278874e-06, "loss": 0.5201, "step": 6782 }, { "epoch": 0.6087366224675237, "grad_norm": 0.4590390601915337, "learning_rate": 9.680817797307955e-06, "loss": 0.427, "step": 6783 }, { "epoch": 0.6088263669201948, "grad_norm": 0.4823313476659469, "learning_rate": 9.680634200779118e-06, "loss": 0.4137, "step": 6784 }, { "epoch": 0.6089161113728657, "grad_norm": 0.48580225007128586, "learning_rate": 9.680450553204226e-06, "loss": 0.4916, "step": 6785 }, { "epoch": 0.6090058558255368, "grad_norm": 0.5186823015263183, "learning_rate": 9.680266854585288e-06, "loss": 0.5147, "step": 6786 }, { "epoch": 0.6090956002782077, "grad_norm": 0.498119627718558, "learning_rate": 9.6800831049243e-06, "loss": 0.4589, "step": 6787 }, { "epoch": 0.6091853447308788, "grad_norm": 0.4692250172463176, "learning_rate": 9.679899304223273e-06, "loss": 0.4644, "step": 6788 }, { "epoch": 0.6092750891835499, "grad_norm": 0.5284201654991915, "learning_rate": 9.679715452484208e-06, "loss": 0.5582, "step": 6789 }, { "epoch": 0.6093648336362208, "grad_norm": 0.5470707091482301, "learning_rate": 9.679531549709112e-06, "loss": 0.5086, "step": 6790 }, { "epoch": 0.6094545780888919, "grad_norm": 0.49291414864385036, "learning_rate": 9.679347595899987e-06, "loss": 0.4658, "step": 6791 }, { "epoch": 0.6095443225415629, "grad_norm": 0.5002869005345943, "learning_rate": 9.679163591058843e-06, "loss": 0.4941, "step": 6792 }, { "epoch": 0.609634066994234, "grad_norm": 0.44496115246038687, "learning_rate": 9.678979535187682e-06, "loss": 0.4091, "step": 6793 }, { "epoch": 0.6097238114469049, "grad_norm": 0.49757608650510377, "learning_rate": 9.678795428288518e-06, "loss": 0.4841, "step": 6794 }, { "epoch": 0.609813555899576, "grad_norm": 0.48508582503867165, "learning_rate": 9.678611270363355e-06, "loss": 0.4254, "step": 6795 }, { "epoch": 0.6099033003522469, "grad_norm": 0.5528302444805495, "learning_rate": 9.6784270614142e-06, "loss": 0.5136, "step": 6796 }, { "epoch": 0.609993044804918, "grad_norm": 0.4926381120525993, "learning_rate": 9.678242801443062e-06, "loss": 0.4494, "step": 6797 }, { "epoch": 0.610082789257589, "grad_norm": 0.4829919603130731, "learning_rate": 9.678058490451956e-06, "loss": 0.4539, "step": 6798 }, { "epoch": 0.61017253371026, "grad_norm": 0.4895829987702992, "learning_rate": 9.677874128442885e-06, "loss": 0.4676, "step": 6799 }, { "epoch": 0.6102622781629311, "grad_norm": 0.5062761751320433, "learning_rate": 9.677689715417863e-06, "loss": 0.498, "step": 6800 }, { "epoch": 0.6103520226156021, "grad_norm": 0.49677408403208123, "learning_rate": 9.677505251378903e-06, "loss": 0.4379, "step": 6801 }, { "epoch": 0.6104417670682731, "grad_norm": 0.45973026885069374, "learning_rate": 9.677320736328014e-06, "loss": 0.42, "step": 6802 }, { "epoch": 0.6105315115209441, "grad_norm": 0.49923511415645355, "learning_rate": 9.677136170267206e-06, "loss": 0.5096, "step": 6803 }, { "epoch": 0.6106212559736152, "grad_norm": 0.48805131908024396, "learning_rate": 9.676951553198499e-06, "loss": 0.4668, "step": 6804 }, { "epoch": 0.6107110004262861, "grad_norm": 0.43986737926814623, "learning_rate": 9.6767668851239e-06, "loss": 0.3984, "step": 6805 }, { "epoch": 0.6108007448789572, "grad_norm": 0.45405829448957796, "learning_rate": 9.676582166045424e-06, "loss": 0.4047, "step": 6806 }, { "epoch": 0.6108904893316282, "grad_norm": 0.4557142779759542, "learning_rate": 9.676397395965088e-06, "loss": 0.4287, "step": 6807 }, { "epoch": 0.6109802337842992, "grad_norm": 0.4991161185700464, "learning_rate": 9.676212574884905e-06, "loss": 0.5217, "step": 6808 }, { "epoch": 0.6110699782369702, "grad_norm": 0.4849937512804069, "learning_rate": 9.676027702806891e-06, "loss": 0.4603, "step": 6809 }, { "epoch": 0.6111597226896412, "grad_norm": 0.5014062132504532, "learning_rate": 9.675842779733064e-06, "loss": 0.4483, "step": 6810 }, { "epoch": 0.6112494671423123, "grad_norm": 0.5010722167501823, "learning_rate": 9.675657805665439e-06, "loss": 0.4226, "step": 6811 }, { "epoch": 0.6113392115949833, "grad_norm": 0.5681500976050113, "learning_rate": 9.675472780606032e-06, "loss": 0.5844, "step": 6812 }, { "epoch": 0.6114289560476543, "grad_norm": 0.5088846920949168, "learning_rate": 9.675287704556864e-06, "loss": 0.4263, "step": 6813 }, { "epoch": 0.6115187005003253, "grad_norm": 0.5223095874134603, "learning_rate": 9.67510257751995e-06, "loss": 0.51, "step": 6814 }, { "epoch": 0.6116084449529964, "grad_norm": 0.4535034064204404, "learning_rate": 9.674917399497312e-06, "loss": 0.4032, "step": 6815 }, { "epoch": 0.6116981894056673, "grad_norm": 0.47892743551162303, "learning_rate": 9.674732170490968e-06, "loss": 0.4718, "step": 6816 }, { "epoch": 0.6117879338583384, "grad_norm": 0.45670393197445835, "learning_rate": 9.674546890502938e-06, "loss": 0.4609, "step": 6817 }, { "epoch": 0.6118776783110094, "grad_norm": 0.4733771235452997, "learning_rate": 9.674361559535244e-06, "loss": 0.4138, "step": 6818 }, { "epoch": 0.6119674227636804, "grad_norm": 0.46141895730246923, "learning_rate": 9.674176177589904e-06, "loss": 0.4243, "step": 6819 }, { "epoch": 0.6120571672163514, "grad_norm": 0.48465981494040045, "learning_rate": 9.673990744668945e-06, "loss": 0.4455, "step": 6820 }, { "epoch": 0.6121469116690225, "grad_norm": 0.46235464271369836, "learning_rate": 9.673805260774383e-06, "loss": 0.4348, "step": 6821 }, { "epoch": 0.6122366561216934, "grad_norm": 0.4960958662743075, "learning_rate": 9.673619725908244e-06, "loss": 0.51, "step": 6822 }, { "epoch": 0.6123264005743645, "grad_norm": 0.5324592580999359, "learning_rate": 9.673434140072554e-06, "loss": 0.4991, "step": 6823 }, { "epoch": 0.6124161450270356, "grad_norm": 0.4377923278454505, "learning_rate": 9.67324850326933e-06, "loss": 0.4122, "step": 6824 }, { "epoch": 0.6125058894797065, "grad_norm": 0.5203892931321639, "learning_rate": 9.673062815500604e-06, "loss": 0.5239, "step": 6825 }, { "epoch": 0.6125956339323776, "grad_norm": 0.4745981932928014, "learning_rate": 9.672877076768399e-06, "loss": 0.4575, "step": 6826 }, { "epoch": 0.6126853783850486, "grad_norm": 0.49478561625238254, "learning_rate": 9.672691287074736e-06, "loss": 0.4884, "step": 6827 }, { "epoch": 0.6127751228377196, "grad_norm": 0.47077107162327925, "learning_rate": 9.672505446421647e-06, "loss": 0.4226, "step": 6828 }, { "epoch": 0.6128648672903906, "grad_norm": 0.5033679288536145, "learning_rate": 9.672319554811157e-06, "loss": 0.4507, "step": 6829 }, { "epoch": 0.6129546117430617, "grad_norm": 0.461247819855208, "learning_rate": 9.672133612245291e-06, "loss": 0.4544, "step": 6830 }, { "epoch": 0.6130443561957326, "grad_norm": 0.5210221140231392, "learning_rate": 9.67194761872608e-06, "loss": 0.4483, "step": 6831 }, { "epoch": 0.6131341006484037, "grad_norm": 0.456373110633914, "learning_rate": 9.67176157425555e-06, "loss": 0.4394, "step": 6832 }, { "epoch": 0.6132238451010746, "grad_norm": 0.48941960491931963, "learning_rate": 9.671575478835734e-06, "loss": 0.4721, "step": 6833 }, { "epoch": 0.6133135895537457, "grad_norm": 0.5146589268916779, "learning_rate": 9.671389332468655e-06, "loss": 0.5019, "step": 6834 }, { "epoch": 0.6134033340064168, "grad_norm": 0.4742407222724972, "learning_rate": 9.671203135156348e-06, "loss": 0.3996, "step": 6835 }, { "epoch": 0.6134930784590877, "grad_norm": 0.4728247564331727, "learning_rate": 9.671016886900841e-06, "loss": 0.4516, "step": 6836 }, { "epoch": 0.6135828229117588, "grad_norm": 0.4991836014124764, "learning_rate": 9.670830587704167e-06, "loss": 0.4601, "step": 6837 }, { "epoch": 0.6136725673644298, "grad_norm": 0.46005329924290017, "learning_rate": 9.670644237568358e-06, "loss": 0.4028, "step": 6838 }, { "epoch": 0.6137623118171008, "grad_norm": 0.512028970302735, "learning_rate": 9.670457836495446e-06, "loss": 0.4106, "step": 6839 }, { "epoch": 0.6138520562697718, "grad_norm": 0.4508961599768108, "learning_rate": 9.670271384487462e-06, "loss": 0.3921, "step": 6840 }, { "epoch": 0.6139418007224429, "grad_norm": 0.5072515702527393, "learning_rate": 9.67008488154644e-06, "loss": 0.5087, "step": 6841 }, { "epoch": 0.6140315451751138, "grad_norm": 0.48607750005764455, "learning_rate": 9.669898327674416e-06, "loss": 0.4755, "step": 6842 }, { "epoch": 0.6141212896277849, "grad_norm": 0.49269216570233043, "learning_rate": 9.669711722873423e-06, "loss": 0.4759, "step": 6843 }, { "epoch": 0.6142110340804559, "grad_norm": 0.4937829024043317, "learning_rate": 9.669525067145496e-06, "loss": 0.495, "step": 6844 }, { "epoch": 0.6143007785331269, "grad_norm": 0.5067221896285711, "learning_rate": 9.66933836049267e-06, "loss": 0.4991, "step": 6845 }, { "epoch": 0.614390522985798, "grad_norm": 0.442590084081862, "learning_rate": 9.669151602916984e-06, "loss": 0.4401, "step": 6846 }, { "epoch": 0.614480267438469, "grad_norm": 0.5231815408379675, "learning_rate": 9.668964794420473e-06, "loss": 0.5127, "step": 6847 }, { "epoch": 0.61457001189114, "grad_norm": 0.4647958056461706, "learning_rate": 9.668777935005171e-06, "loss": 0.4294, "step": 6848 }, { "epoch": 0.614659756343811, "grad_norm": 0.4868848303560012, "learning_rate": 9.668591024673122e-06, "loss": 0.4715, "step": 6849 }, { "epoch": 0.614749500796482, "grad_norm": 0.48879185864190544, "learning_rate": 9.66840406342636e-06, "loss": 0.4496, "step": 6850 }, { "epoch": 0.614839245249153, "grad_norm": 0.46706131820602526, "learning_rate": 9.668217051266926e-06, "loss": 0.4281, "step": 6851 }, { "epoch": 0.6149289897018241, "grad_norm": 0.5128570348834299, "learning_rate": 9.668029988196858e-06, "loss": 0.4473, "step": 6852 }, { "epoch": 0.615018734154495, "grad_norm": 0.5157756782535156, "learning_rate": 9.667842874218198e-06, "loss": 0.5014, "step": 6853 }, { "epoch": 0.6151084786071661, "grad_norm": 0.5029853298112594, "learning_rate": 9.667655709332984e-06, "loss": 0.4712, "step": 6854 }, { "epoch": 0.6151982230598371, "grad_norm": 0.4375071572641784, "learning_rate": 9.66746849354326e-06, "loss": 0.4067, "step": 6855 }, { "epoch": 0.6152879675125081, "grad_norm": 0.48431254163471504, "learning_rate": 9.667281226851066e-06, "loss": 0.4667, "step": 6856 }, { "epoch": 0.6153777119651791, "grad_norm": 0.518617270490974, "learning_rate": 9.667093909258444e-06, "loss": 0.4824, "step": 6857 }, { "epoch": 0.6154674564178502, "grad_norm": 0.5092043700744096, "learning_rate": 9.66690654076744e-06, "loss": 0.4961, "step": 6858 }, { "epoch": 0.6155572008705212, "grad_norm": 0.4526296568872726, "learning_rate": 9.666719121380093e-06, "loss": 0.4217, "step": 6859 }, { "epoch": 0.6156469453231922, "grad_norm": 0.5057365357534499, "learning_rate": 9.66653165109845e-06, "loss": 0.4803, "step": 6860 }, { "epoch": 0.6157366897758633, "grad_norm": 0.45446812922931373, "learning_rate": 9.666344129924554e-06, "loss": 0.4382, "step": 6861 }, { "epoch": 0.6158264342285342, "grad_norm": 0.5323789637302924, "learning_rate": 9.66615655786045e-06, "loss": 0.4944, "step": 6862 }, { "epoch": 0.6159161786812053, "grad_norm": 0.5108025640419288, "learning_rate": 9.665968934908185e-06, "loss": 0.5168, "step": 6863 }, { "epoch": 0.6160059231338763, "grad_norm": 0.46384810444831875, "learning_rate": 9.665781261069804e-06, "loss": 0.4267, "step": 6864 }, { "epoch": 0.6160956675865473, "grad_norm": 0.501195897786597, "learning_rate": 9.665593536347354e-06, "loss": 0.4998, "step": 6865 }, { "epoch": 0.6161854120392183, "grad_norm": 0.5069857873649829, "learning_rate": 9.665405760742883e-06, "loss": 0.5058, "step": 6866 }, { "epoch": 0.6162751564918894, "grad_norm": 0.4630839188425847, "learning_rate": 9.665217934258437e-06, "loss": 0.4223, "step": 6867 }, { "epoch": 0.6163649009445603, "grad_norm": 0.46933743102456454, "learning_rate": 9.665030056896067e-06, "loss": 0.4572, "step": 6868 }, { "epoch": 0.6164546453972314, "grad_norm": 0.47310165302299395, "learning_rate": 9.664842128657819e-06, "loss": 0.4486, "step": 6869 }, { "epoch": 0.6165443898499025, "grad_norm": 0.4988282284871563, "learning_rate": 9.664654149545745e-06, "loss": 0.5009, "step": 6870 }, { "epoch": 0.6166341343025734, "grad_norm": 0.5027906850434679, "learning_rate": 9.664466119561895e-06, "loss": 0.4018, "step": 6871 }, { "epoch": 0.6167238787552445, "grad_norm": 0.5478783213087093, "learning_rate": 9.664278038708316e-06, "loss": 0.5136, "step": 6872 }, { "epoch": 0.6168136232079154, "grad_norm": 0.4769473171370109, "learning_rate": 9.664089906987064e-06, "loss": 0.4476, "step": 6873 }, { "epoch": 0.6169033676605865, "grad_norm": 0.4156248248024773, "learning_rate": 9.663901724400188e-06, "loss": 0.3804, "step": 6874 }, { "epoch": 0.6169931121132575, "grad_norm": 0.48605885656122955, "learning_rate": 9.663713490949742e-06, "loss": 0.4909, "step": 6875 }, { "epoch": 0.6170828565659285, "grad_norm": 0.5308174188238596, "learning_rate": 9.663525206637779e-06, "loss": 0.5165, "step": 6876 }, { "epoch": 0.6171726010185995, "grad_norm": 0.5381762830798134, "learning_rate": 9.663336871466349e-06, "loss": 0.4894, "step": 6877 }, { "epoch": 0.6172623454712706, "grad_norm": 0.48611269590761574, "learning_rate": 9.663148485437508e-06, "loss": 0.4514, "step": 6878 }, { "epoch": 0.6173520899239415, "grad_norm": 0.555711045715146, "learning_rate": 9.66296004855331e-06, "loss": 0.5135, "step": 6879 }, { "epoch": 0.6174418343766126, "grad_norm": 0.44073594207976907, "learning_rate": 9.662771560815812e-06, "loss": 0.4026, "step": 6880 }, { "epoch": 0.6175315788292837, "grad_norm": 0.48983159976283747, "learning_rate": 9.662583022227068e-06, "loss": 0.49, "step": 6881 }, { "epoch": 0.6176213232819546, "grad_norm": 0.48936891823216694, "learning_rate": 9.662394432789136e-06, "loss": 0.4587, "step": 6882 }, { "epoch": 0.6177110677346257, "grad_norm": 0.48260596033555636, "learning_rate": 9.662205792504069e-06, "loss": 0.4429, "step": 6883 }, { "epoch": 0.6178008121872967, "grad_norm": 0.48794405921251777, "learning_rate": 9.662017101373926e-06, "loss": 0.4344, "step": 6884 }, { "epoch": 0.6178905566399677, "grad_norm": 0.4625340141398141, "learning_rate": 9.661828359400766e-06, "loss": 0.4151, "step": 6885 }, { "epoch": 0.6179803010926387, "grad_norm": 0.5043206966423551, "learning_rate": 9.661639566586646e-06, "loss": 0.5198, "step": 6886 }, { "epoch": 0.6180700455453098, "grad_norm": 0.48751236678240334, "learning_rate": 9.661450722933627e-06, "loss": 0.4719, "step": 6887 }, { "epoch": 0.6181597899979807, "grad_norm": 0.4702123167928784, "learning_rate": 9.661261828443766e-06, "loss": 0.3988, "step": 6888 }, { "epoch": 0.6182495344506518, "grad_norm": 0.49378830275366137, "learning_rate": 9.661072883119126e-06, "loss": 0.5002, "step": 6889 }, { "epoch": 0.6183392789033227, "grad_norm": 0.46532075195706124, "learning_rate": 9.660883886961763e-06, "loss": 0.3918, "step": 6890 }, { "epoch": 0.6184290233559938, "grad_norm": 0.48831242785617235, "learning_rate": 9.660694839973743e-06, "loss": 0.4454, "step": 6891 }, { "epoch": 0.6185187678086649, "grad_norm": 0.45143211164049957, "learning_rate": 9.660505742157125e-06, "loss": 0.4047, "step": 6892 }, { "epoch": 0.6186085122613358, "grad_norm": 0.5049364442309008, "learning_rate": 9.660316593513972e-06, "loss": 0.4955, "step": 6893 }, { "epoch": 0.6186982567140069, "grad_norm": 0.45567878123511874, "learning_rate": 9.660127394046346e-06, "loss": 0.4125, "step": 6894 }, { "epoch": 0.6187880011666779, "grad_norm": 0.5037134370356934, "learning_rate": 9.659938143756312e-06, "loss": 0.4999, "step": 6895 }, { "epoch": 0.6188777456193489, "grad_norm": 0.5079290661081116, "learning_rate": 9.659748842645931e-06, "loss": 0.4802, "step": 6896 }, { "epoch": 0.6189674900720199, "grad_norm": 0.4901517572584977, "learning_rate": 9.65955949071727e-06, "loss": 0.5083, "step": 6897 }, { "epoch": 0.619057234524691, "grad_norm": 0.4562044309952196, "learning_rate": 9.659370087972395e-06, "loss": 0.405, "step": 6898 }, { "epoch": 0.6191469789773619, "grad_norm": 0.4809937405306107, "learning_rate": 9.659180634413367e-06, "loss": 0.4321, "step": 6899 }, { "epoch": 0.619236723430033, "grad_norm": 0.4676046081587716, "learning_rate": 9.658991130042258e-06, "loss": 0.429, "step": 6900 }, { "epoch": 0.619326467882704, "grad_norm": 0.4808323362670118, "learning_rate": 9.658801574861132e-06, "loss": 0.4488, "step": 6901 }, { "epoch": 0.619416212335375, "grad_norm": 0.47051884425889334, "learning_rate": 9.658611968872055e-06, "loss": 0.4461, "step": 6902 }, { "epoch": 0.619505956788046, "grad_norm": 0.5278233647658297, "learning_rate": 9.658422312077096e-06, "loss": 0.4827, "step": 6903 }, { "epoch": 0.6195957012407171, "grad_norm": 0.5234859049219245, "learning_rate": 9.658232604478323e-06, "loss": 0.4906, "step": 6904 }, { "epoch": 0.6196854456933881, "grad_norm": 0.4496558161262902, "learning_rate": 9.658042846077807e-06, "loss": 0.4207, "step": 6905 }, { "epoch": 0.6197751901460591, "grad_norm": 0.5287966880326149, "learning_rate": 9.657853036877613e-06, "loss": 0.5117, "step": 6906 }, { "epoch": 0.6198649345987302, "grad_norm": 0.4440494022640267, "learning_rate": 9.657663176879815e-06, "loss": 0.4162, "step": 6907 }, { "epoch": 0.6199546790514011, "grad_norm": 0.49571863377734915, "learning_rate": 9.657473266086483e-06, "loss": 0.471, "step": 6908 }, { "epoch": 0.6200444235040722, "grad_norm": 0.4959993002526184, "learning_rate": 9.657283304499688e-06, "loss": 0.4717, "step": 6909 }, { "epoch": 0.6201341679567431, "grad_norm": 0.5056883264153679, "learning_rate": 9.6570932921215e-06, "loss": 0.519, "step": 6910 }, { "epoch": 0.6202239124094142, "grad_norm": 0.5026376495747488, "learning_rate": 9.65690322895399e-06, "loss": 0.5261, "step": 6911 }, { "epoch": 0.6203136568620852, "grad_norm": 0.4743965831147085, "learning_rate": 9.656713114999236e-06, "loss": 0.4583, "step": 6912 }, { "epoch": 0.6204034013147562, "grad_norm": 0.44294546466984286, "learning_rate": 9.656522950259308e-06, "loss": 0.4111, "step": 6913 }, { "epoch": 0.6204931457674272, "grad_norm": 0.49345273283819857, "learning_rate": 9.65633273473628e-06, "loss": 0.4862, "step": 6914 }, { "epoch": 0.6205828902200983, "grad_norm": 0.4805541614367887, "learning_rate": 9.656142468432227e-06, "loss": 0.4346, "step": 6915 }, { "epoch": 0.6206726346727693, "grad_norm": 0.4978737634139058, "learning_rate": 9.655952151349223e-06, "loss": 0.4758, "step": 6916 }, { "epoch": 0.6207623791254403, "grad_norm": 0.44915784179204526, "learning_rate": 9.655761783489345e-06, "loss": 0.4251, "step": 6917 }, { "epoch": 0.6208521235781114, "grad_norm": 0.47566117385437234, "learning_rate": 9.65557136485467e-06, "loss": 0.4657, "step": 6918 }, { "epoch": 0.6209418680307823, "grad_norm": 0.4878239144800079, "learning_rate": 9.655380895447271e-06, "loss": 0.4698, "step": 6919 }, { "epoch": 0.6210316124834534, "grad_norm": 0.4344504725411561, "learning_rate": 9.655190375269228e-06, "loss": 0.3895, "step": 6920 }, { "epoch": 0.6211213569361244, "grad_norm": 0.5114682629756417, "learning_rate": 9.654999804322618e-06, "loss": 0.5241, "step": 6921 }, { "epoch": 0.6212111013887954, "grad_norm": 0.4853784051484854, "learning_rate": 9.65480918260952e-06, "loss": 0.448, "step": 6922 }, { "epoch": 0.6213008458414664, "grad_norm": 0.5287344664499126, "learning_rate": 9.654618510132013e-06, "loss": 0.4884, "step": 6923 }, { "epoch": 0.6213905902941375, "grad_norm": 0.4932785898754838, "learning_rate": 9.654427786892174e-06, "loss": 0.4389, "step": 6924 }, { "epoch": 0.6214803347468084, "grad_norm": 0.474369499926618, "learning_rate": 9.654237012892086e-06, "loss": 0.453, "step": 6925 }, { "epoch": 0.6215700791994795, "grad_norm": 0.48903515292868516, "learning_rate": 9.654046188133827e-06, "loss": 0.4852, "step": 6926 }, { "epoch": 0.6216598236521506, "grad_norm": 0.46693921025751267, "learning_rate": 9.65385531261948e-06, "loss": 0.4207, "step": 6927 }, { "epoch": 0.6217495681048215, "grad_norm": 0.5171559664994655, "learning_rate": 9.653664386351126e-06, "loss": 0.5099, "step": 6928 }, { "epoch": 0.6218393125574926, "grad_norm": 0.504371511061244, "learning_rate": 9.65347340933085e-06, "loss": 0.498, "step": 6929 }, { "epoch": 0.6219290570101635, "grad_norm": 0.5066903535788828, "learning_rate": 9.65328238156073e-06, "loss": 0.4956, "step": 6930 }, { "epoch": 0.6220188014628346, "grad_norm": 0.4464242920635678, "learning_rate": 9.65309130304285e-06, "loss": 0.4152, "step": 6931 }, { "epoch": 0.6221085459155056, "grad_norm": 0.5235603248098353, "learning_rate": 9.652900173779296e-06, "loss": 0.5177, "step": 6932 }, { "epoch": 0.6221982903681766, "grad_norm": 0.49752532589406806, "learning_rate": 9.652708993772153e-06, "loss": 0.4454, "step": 6933 }, { "epoch": 0.6222880348208476, "grad_norm": 0.4837663783695265, "learning_rate": 9.652517763023504e-06, "loss": 0.4366, "step": 6934 }, { "epoch": 0.6223777792735187, "grad_norm": 0.44932372019673095, "learning_rate": 9.652326481535434e-06, "loss": 0.4089, "step": 6935 }, { "epoch": 0.6224675237261896, "grad_norm": 0.534879014871917, "learning_rate": 9.65213514931003e-06, "loss": 0.5354, "step": 6936 }, { "epoch": 0.6225572681788607, "grad_norm": 0.4940655076587633, "learning_rate": 9.65194376634938e-06, "loss": 0.4786, "step": 6937 }, { "epoch": 0.6226470126315317, "grad_norm": 0.5843304802466613, "learning_rate": 9.651752332655571e-06, "loss": 0.5445, "step": 6938 }, { "epoch": 0.6227367570842027, "grad_norm": 0.46972718801660784, "learning_rate": 9.651560848230687e-06, "loss": 0.4598, "step": 6939 }, { "epoch": 0.6228265015368738, "grad_norm": 0.568543279805353, "learning_rate": 9.651369313076822e-06, "loss": 0.5229, "step": 6940 }, { "epoch": 0.6229162459895448, "grad_norm": 0.5113494360429116, "learning_rate": 9.65117772719606e-06, "loss": 0.5125, "step": 6941 }, { "epoch": 0.6230059904422158, "grad_norm": 0.4818916498724998, "learning_rate": 9.650986090590494e-06, "loss": 0.4259, "step": 6942 }, { "epoch": 0.6230957348948868, "grad_norm": 0.45288089345085103, "learning_rate": 9.65079440326221e-06, "loss": 0.4238, "step": 6943 }, { "epoch": 0.6231854793475579, "grad_norm": 0.5385687340164587, "learning_rate": 9.6506026652133e-06, "loss": 0.5083, "step": 6944 }, { "epoch": 0.6232752238002288, "grad_norm": 0.5001314514877337, "learning_rate": 9.650410876445859e-06, "loss": 0.4689, "step": 6945 }, { "epoch": 0.6233649682528999, "grad_norm": 0.4370737707966617, "learning_rate": 9.650219036961973e-06, "loss": 0.383, "step": 6946 }, { "epoch": 0.6234547127055708, "grad_norm": 0.4845986934613903, "learning_rate": 9.650027146763738e-06, "loss": 0.4409, "step": 6947 }, { "epoch": 0.6235444571582419, "grad_norm": 0.4336534205953858, "learning_rate": 9.649835205853244e-06, "loss": 0.3963, "step": 6948 }, { "epoch": 0.6236342016109129, "grad_norm": 0.5561857022175668, "learning_rate": 9.649643214232585e-06, "loss": 0.6049, "step": 6949 }, { "epoch": 0.623723946063584, "grad_norm": 0.5253331516564455, "learning_rate": 9.649451171903857e-06, "loss": 0.4926, "step": 6950 }, { "epoch": 0.623813690516255, "grad_norm": 0.5700296959346128, "learning_rate": 9.649259078869152e-06, "loss": 0.5333, "step": 6951 }, { "epoch": 0.623903434968926, "grad_norm": 0.48333907711230956, "learning_rate": 9.649066935130566e-06, "loss": 0.4219, "step": 6952 }, { "epoch": 0.623993179421597, "grad_norm": 0.4984405831518025, "learning_rate": 9.648874740690196e-06, "loss": 0.4881, "step": 6953 }, { "epoch": 0.624082923874268, "grad_norm": 0.5275500021152297, "learning_rate": 9.648682495550133e-06, "loss": 0.4919, "step": 6954 }, { "epoch": 0.6241726683269391, "grad_norm": 0.5428038593258915, "learning_rate": 9.64849019971248e-06, "loss": 0.5086, "step": 6955 }, { "epoch": 0.62426241277961, "grad_norm": 0.4895775018723942, "learning_rate": 9.648297853179327e-06, "loss": 0.4305, "step": 6956 }, { "epoch": 0.6243521572322811, "grad_norm": 0.5111861576263637, "learning_rate": 9.648105455952779e-06, "loss": 0.4995, "step": 6957 }, { "epoch": 0.6244419016849521, "grad_norm": 0.4909837499792662, "learning_rate": 9.647913008034928e-06, "loss": 0.4544, "step": 6958 }, { "epoch": 0.6245316461376231, "grad_norm": 0.4951989716828756, "learning_rate": 9.647720509427878e-06, "loss": 0.489, "step": 6959 }, { "epoch": 0.6246213905902941, "grad_norm": 0.4823376015073218, "learning_rate": 9.647527960133724e-06, "loss": 0.4932, "step": 6960 }, { "epoch": 0.6247111350429652, "grad_norm": 0.4628126607894139, "learning_rate": 9.64733536015457e-06, "loss": 0.4575, "step": 6961 }, { "epoch": 0.6248008794956362, "grad_norm": 0.5193818435419086, "learning_rate": 9.647142709492513e-06, "loss": 0.4885, "step": 6962 }, { "epoch": 0.6248906239483072, "grad_norm": 0.5441336500370677, "learning_rate": 9.646950008149656e-06, "loss": 0.5879, "step": 6963 }, { "epoch": 0.6249803684009783, "grad_norm": 0.4786362932985848, "learning_rate": 9.646757256128099e-06, "loss": 0.4771, "step": 6964 }, { "epoch": 0.6250701128536492, "grad_norm": 0.4942309103018343, "learning_rate": 9.646564453429945e-06, "loss": 0.4749, "step": 6965 }, { "epoch": 0.6251598573063203, "grad_norm": 0.5634275056263522, "learning_rate": 9.646371600057298e-06, "loss": 0.5995, "step": 6966 }, { "epoch": 0.6252496017589912, "grad_norm": 0.5070014088615199, "learning_rate": 9.646178696012259e-06, "loss": 0.5066, "step": 6967 }, { "epoch": 0.6253393462116623, "grad_norm": 0.4443620818429698, "learning_rate": 9.645985741296934e-06, "loss": 0.4206, "step": 6968 }, { "epoch": 0.6254290906643333, "grad_norm": 0.43913089076277734, "learning_rate": 9.645792735913424e-06, "loss": 0.3928, "step": 6969 }, { "epoch": 0.6255188351170043, "grad_norm": 0.48541381739129813, "learning_rate": 9.645599679863838e-06, "loss": 0.4254, "step": 6970 }, { "epoch": 0.6256085795696753, "grad_norm": 0.4673225976621127, "learning_rate": 9.645406573150277e-06, "loss": 0.4225, "step": 6971 }, { "epoch": 0.6256983240223464, "grad_norm": 0.460689974210683, "learning_rate": 9.645213415774853e-06, "loss": 0.4318, "step": 6972 }, { "epoch": 0.6257880684750173, "grad_norm": 0.5079597010397259, "learning_rate": 9.645020207739665e-06, "loss": 0.4961, "step": 6973 }, { "epoch": 0.6258778129276884, "grad_norm": 0.5050264382631304, "learning_rate": 9.644826949046826e-06, "loss": 0.4656, "step": 6974 }, { "epoch": 0.6259675573803595, "grad_norm": 0.4913825412837382, "learning_rate": 9.64463363969844e-06, "loss": 0.4832, "step": 6975 }, { "epoch": 0.6260573018330304, "grad_norm": 0.5175967009170223, "learning_rate": 9.644440279696618e-06, "loss": 0.5089, "step": 6976 }, { "epoch": 0.6261470462857015, "grad_norm": 0.4873149742503613, "learning_rate": 9.644246869043468e-06, "loss": 0.4712, "step": 6977 }, { "epoch": 0.6262367907383725, "grad_norm": 0.5054433938924119, "learning_rate": 9.644053407741098e-06, "loss": 0.4935, "step": 6978 }, { "epoch": 0.6263265351910435, "grad_norm": 0.4738182966887729, "learning_rate": 9.643859895791618e-06, "loss": 0.4309, "step": 6979 }, { "epoch": 0.6264162796437145, "grad_norm": 0.4651771565062999, "learning_rate": 9.64366633319714e-06, "loss": 0.4301, "step": 6980 }, { "epoch": 0.6265060240963856, "grad_norm": 0.46851453984037894, "learning_rate": 9.643472719959774e-06, "loss": 0.4257, "step": 6981 }, { "epoch": 0.6265957685490565, "grad_norm": 0.49936902903370245, "learning_rate": 9.643279056081632e-06, "loss": 0.5159, "step": 6982 }, { "epoch": 0.6266855130017276, "grad_norm": 0.5442502895874783, "learning_rate": 9.643085341564825e-06, "loss": 0.5399, "step": 6983 }, { "epoch": 0.6267752574543985, "grad_norm": 0.5088131948923341, "learning_rate": 9.642891576411466e-06, "loss": 0.5241, "step": 6984 }, { "epoch": 0.6268650019070696, "grad_norm": 0.4497597526435882, "learning_rate": 9.642697760623669e-06, "loss": 0.414, "step": 6985 }, { "epoch": 0.6269547463597407, "grad_norm": 0.4727783504986657, "learning_rate": 9.642503894203546e-06, "loss": 0.4608, "step": 6986 }, { "epoch": 0.6270444908124116, "grad_norm": 0.49128794231465206, "learning_rate": 9.642309977153212e-06, "loss": 0.4582, "step": 6987 }, { "epoch": 0.6271342352650827, "grad_norm": 0.48668827196088854, "learning_rate": 9.642116009474784e-06, "loss": 0.4189, "step": 6988 }, { "epoch": 0.6272239797177537, "grad_norm": 0.48126568653559515, "learning_rate": 9.641921991170376e-06, "loss": 0.4592, "step": 6989 }, { "epoch": 0.6273137241704247, "grad_norm": 0.5110714843922963, "learning_rate": 9.6417279222421e-06, "loss": 0.4786, "step": 6990 }, { "epoch": 0.6274034686230957, "grad_norm": 0.44819361280118425, "learning_rate": 9.64153380269208e-06, "loss": 0.4172, "step": 6991 }, { "epoch": 0.6274932130757668, "grad_norm": 0.4597326064437573, "learning_rate": 9.641339632522426e-06, "loss": 0.3894, "step": 6992 }, { "epoch": 0.6275829575284377, "grad_norm": 0.496068328551548, "learning_rate": 9.64114541173526e-06, "loss": 0.4978, "step": 6993 }, { "epoch": 0.6276727019811088, "grad_norm": 0.5086222975982024, "learning_rate": 9.640951140332697e-06, "loss": 0.4446, "step": 6994 }, { "epoch": 0.6277624464337798, "grad_norm": 0.4850145700382548, "learning_rate": 9.64075681831686e-06, "loss": 0.4657, "step": 6995 }, { "epoch": 0.6278521908864508, "grad_norm": 0.4177811560484044, "learning_rate": 9.640562445689865e-06, "loss": 0.3498, "step": 6996 }, { "epoch": 0.6279419353391219, "grad_norm": 0.5085041592212748, "learning_rate": 9.640368022453831e-06, "loss": 0.4452, "step": 6997 }, { "epoch": 0.6280316797917929, "grad_norm": 0.4703864956038106, "learning_rate": 9.64017354861088e-06, "loss": 0.4703, "step": 6998 }, { "epoch": 0.6281214242444639, "grad_norm": 0.5287210868350521, "learning_rate": 9.639979024163134e-06, "loss": 0.5337, "step": 6999 }, { "epoch": 0.6282111686971349, "grad_norm": 0.48586779663213925, "learning_rate": 9.639784449112713e-06, "loss": 0.4358, "step": 7000 }, { "epoch": 0.628300913149806, "grad_norm": 0.42488741944797553, "learning_rate": 9.639589823461736e-06, "loss": 0.368, "step": 7001 }, { "epoch": 0.6283906576024769, "grad_norm": 0.5078063692116915, "learning_rate": 9.639395147212332e-06, "loss": 0.4704, "step": 7002 }, { "epoch": 0.628480402055148, "grad_norm": 0.46281676121959503, "learning_rate": 9.63920042036662e-06, "loss": 0.3916, "step": 7003 }, { "epoch": 0.628570146507819, "grad_norm": 0.49599730755503574, "learning_rate": 9.639005642926724e-06, "loss": 0.4615, "step": 7004 }, { "epoch": 0.62865989096049, "grad_norm": 0.5209071829618765, "learning_rate": 9.638810814894768e-06, "loss": 0.5204, "step": 7005 }, { "epoch": 0.628749635413161, "grad_norm": 0.5154035473263656, "learning_rate": 9.638615936272878e-06, "loss": 0.4935, "step": 7006 }, { "epoch": 0.628839379865832, "grad_norm": 0.4896970068566784, "learning_rate": 9.638421007063179e-06, "loss": 0.4457, "step": 7007 }, { "epoch": 0.628929124318503, "grad_norm": 0.5327551083580896, "learning_rate": 9.638226027267796e-06, "loss": 0.5399, "step": 7008 }, { "epoch": 0.6290188687711741, "grad_norm": 0.4771377328987959, "learning_rate": 9.638030996888854e-06, "loss": 0.438, "step": 7009 }, { "epoch": 0.6291086132238451, "grad_norm": 0.5089588661833524, "learning_rate": 9.637835915928485e-06, "loss": 0.4835, "step": 7010 }, { "epoch": 0.6291983576765161, "grad_norm": 0.4631964509485615, "learning_rate": 9.637640784388813e-06, "loss": 0.4179, "step": 7011 }, { "epoch": 0.6292881021291872, "grad_norm": 0.4904176164175483, "learning_rate": 9.637445602271964e-06, "loss": 0.4866, "step": 7012 }, { "epoch": 0.6293778465818581, "grad_norm": 0.5386292110288283, "learning_rate": 9.637250369580072e-06, "loss": 0.5007, "step": 7013 }, { "epoch": 0.6294675910345292, "grad_norm": 0.5203147296088488, "learning_rate": 9.637055086315261e-06, "loss": 0.4867, "step": 7014 }, { "epoch": 0.6295573354872002, "grad_norm": 0.46719122573239463, "learning_rate": 9.636859752479664e-06, "loss": 0.4487, "step": 7015 }, { "epoch": 0.6296470799398712, "grad_norm": 0.4930614507292989, "learning_rate": 9.63666436807541e-06, "loss": 0.4712, "step": 7016 }, { "epoch": 0.6297368243925422, "grad_norm": 0.5048573361949772, "learning_rate": 9.63646893310463e-06, "loss": 0.4679, "step": 7017 }, { "epoch": 0.6298265688452133, "grad_norm": 0.4951449926967353, "learning_rate": 9.636273447569455e-06, "loss": 0.4802, "step": 7018 }, { "epoch": 0.6299163132978842, "grad_norm": 0.47341256473751564, "learning_rate": 9.63607791147202e-06, "loss": 0.4708, "step": 7019 }, { "epoch": 0.6300060577505553, "grad_norm": 0.46622561133489354, "learning_rate": 9.635882324814451e-06, "loss": 0.4167, "step": 7020 }, { "epoch": 0.6300958022032264, "grad_norm": 0.5093317009699795, "learning_rate": 9.635686687598887e-06, "loss": 0.4536, "step": 7021 }, { "epoch": 0.6301855466558973, "grad_norm": 0.4795765113074473, "learning_rate": 9.63549099982746e-06, "loss": 0.4744, "step": 7022 }, { "epoch": 0.6302752911085684, "grad_norm": 0.4883450470092132, "learning_rate": 9.635295261502302e-06, "loss": 0.4658, "step": 7023 }, { "epoch": 0.6303650355612394, "grad_norm": 0.4629516984711726, "learning_rate": 9.63509947262555e-06, "loss": 0.436, "step": 7024 }, { "epoch": 0.6304547800139104, "grad_norm": 0.45111499308544134, "learning_rate": 9.634903633199337e-06, "loss": 0.433, "step": 7025 }, { "epoch": 0.6305445244665814, "grad_norm": 0.495873957816674, "learning_rate": 9.634707743225802e-06, "loss": 0.4816, "step": 7026 }, { "epoch": 0.6306342689192524, "grad_norm": 0.49186824683067615, "learning_rate": 9.634511802707079e-06, "loss": 0.4379, "step": 7027 }, { "epoch": 0.6307240133719234, "grad_norm": 0.5007215595037141, "learning_rate": 9.634315811645305e-06, "loss": 0.4901, "step": 7028 }, { "epoch": 0.6308137578245945, "grad_norm": 0.5026128275829665, "learning_rate": 9.63411977004262e-06, "loss": 0.4671, "step": 7029 }, { "epoch": 0.6309035022772654, "grad_norm": 0.5364899210246353, "learning_rate": 9.633923677901158e-06, "loss": 0.4968, "step": 7030 }, { "epoch": 0.6309932467299365, "grad_norm": 0.4703734347877191, "learning_rate": 9.63372753522306e-06, "loss": 0.4482, "step": 7031 }, { "epoch": 0.6310829911826076, "grad_norm": 0.45003328181255586, "learning_rate": 9.633531342010464e-06, "loss": 0.4413, "step": 7032 }, { "epoch": 0.6311727356352785, "grad_norm": 0.5316980257895009, "learning_rate": 9.63333509826551e-06, "loss": 0.5445, "step": 7033 }, { "epoch": 0.6312624800879496, "grad_norm": 0.43471205352016407, "learning_rate": 9.63313880399034e-06, "loss": 0.4031, "step": 7034 }, { "epoch": 0.6313522245406206, "grad_norm": 0.506705955026764, "learning_rate": 9.632942459187091e-06, "loss": 0.4763, "step": 7035 }, { "epoch": 0.6314419689932916, "grad_norm": 0.47371782997010786, "learning_rate": 9.632746063857908e-06, "loss": 0.4231, "step": 7036 }, { "epoch": 0.6315317134459626, "grad_norm": 0.5302362656049991, "learning_rate": 9.632549618004932e-06, "loss": 0.5262, "step": 7037 }, { "epoch": 0.6316214578986337, "grad_norm": 0.4838632227909418, "learning_rate": 9.632353121630302e-06, "loss": 0.4362, "step": 7038 }, { "epoch": 0.6317112023513046, "grad_norm": 0.5200273955484113, "learning_rate": 9.632156574736167e-06, "loss": 0.4959, "step": 7039 }, { "epoch": 0.6318009468039757, "grad_norm": 0.5050158935878098, "learning_rate": 9.631959977324665e-06, "loss": 0.4913, "step": 7040 }, { "epoch": 0.6318906912566467, "grad_norm": 0.4740769700142858, "learning_rate": 9.631763329397942e-06, "loss": 0.4111, "step": 7041 }, { "epoch": 0.6319804357093177, "grad_norm": 0.459510036332937, "learning_rate": 9.631566630958142e-06, "loss": 0.4413, "step": 7042 }, { "epoch": 0.6320701801619887, "grad_norm": 0.45312370714787537, "learning_rate": 9.631369882007413e-06, "loss": 0.4157, "step": 7043 }, { "epoch": 0.6321599246146598, "grad_norm": 0.46571206628006717, "learning_rate": 9.631173082547897e-06, "loss": 0.4453, "step": 7044 }, { "epoch": 0.6322496690673308, "grad_norm": 0.45406009795194596, "learning_rate": 9.630976232581743e-06, "loss": 0.4012, "step": 7045 }, { "epoch": 0.6323394135200018, "grad_norm": 0.5536697126989786, "learning_rate": 9.630779332111097e-06, "loss": 0.5658, "step": 7046 }, { "epoch": 0.6324291579726729, "grad_norm": 0.5069345130499774, "learning_rate": 9.630582381138105e-06, "loss": 0.4664, "step": 7047 }, { "epoch": 0.6325189024253438, "grad_norm": 0.4876672646349687, "learning_rate": 9.630385379664916e-06, "loss": 0.4306, "step": 7048 }, { "epoch": 0.6326086468780149, "grad_norm": 0.5572970334688254, "learning_rate": 9.63018832769368e-06, "loss": 0.523, "step": 7049 }, { "epoch": 0.6326983913306858, "grad_norm": 0.5050815942801697, "learning_rate": 9.629991225226543e-06, "loss": 0.4961, "step": 7050 }, { "epoch": 0.6327881357833569, "grad_norm": 0.5006560896258393, "learning_rate": 9.629794072265657e-06, "loss": 0.4508, "step": 7051 }, { "epoch": 0.6328778802360279, "grad_norm": 0.4756571588751066, "learning_rate": 9.62959686881317e-06, "loss": 0.4664, "step": 7052 }, { "epoch": 0.6329676246886989, "grad_norm": 0.4764277757753782, "learning_rate": 9.629399614871234e-06, "loss": 0.4061, "step": 7053 }, { "epoch": 0.6330573691413699, "grad_norm": 0.5448270131209129, "learning_rate": 9.629202310442001e-06, "loss": 0.5351, "step": 7054 }, { "epoch": 0.633147113594041, "grad_norm": 0.5087153584175632, "learning_rate": 9.629004955527622e-06, "loss": 0.4595, "step": 7055 }, { "epoch": 0.633236858046712, "grad_norm": 0.4745080804101161, "learning_rate": 9.628807550130246e-06, "loss": 0.4347, "step": 7056 }, { "epoch": 0.633326602499383, "grad_norm": 0.5602966382580463, "learning_rate": 9.628610094252031e-06, "loss": 0.5132, "step": 7057 }, { "epoch": 0.6334163469520541, "grad_norm": 0.4887364917463424, "learning_rate": 9.628412587895128e-06, "loss": 0.3968, "step": 7058 }, { "epoch": 0.633506091404725, "grad_norm": 0.4648642618946278, "learning_rate": 9.628215031061692e-06, "loss": 0.4128, "step": 7059 }, { "epoch": 0.6335958358573961, "grad_norm": 0.5300511499250956, "learning_rate": 9.628017423753876e-06, "loss": 0.5424, "step": 7060 }, { "epoch": 0.633685580310067, "grad_norm": 0.5005429518132138, "learning_rate": 9.627819765973837e-06, "loss": 0.4194, "step": 7061 }, { "epoch": 0.6337753247627381, "grad_norm": 0.5169247090198901, "learning_rate": 9.62762205772373e-06, "loss": 0.5059, "step": 7062 }, { "epoch": 0.6338650692154091, "grad_norm": 0.4919299646040546, "learning_rate": 9.627424299005708e-06, "loss": 0.4168, "step": 7063 }, { "epoch": 0.6339548136680802, "grad_norm": 0.49080954397845544, "learning_rate": 9.627226489821931e-06, "loss": 0.463, "step": 7064 }, { "epoch": 0.6340445581207511, "grad_norm": 0.5378612540395048, "learning_rate": 9.627028630174557e-06, "loss": 0.5127, "step": 7065 }, { "epoch": 0.6341343025734222, "grad_norm": 0.46170411158622116, "learning_rate": 9.626830720065742e-06, "loss": 0.3852, "step": 7066 }, { "epoch": 0.6342240470260933, "grad_norm": 0.4831281932000933, "learning_rate": 9.626632759497645e-06, "loss": 0.4419, "step": 7067 }, { "epoch": 0.6343137914787642, "grad_norm": 0.5129544771151956, "learning_rate": 9.626434748472423e-06, "loss": 0.465, "step": 7068 }, { "epoch": 0.6344035359314353, "grad_norm": 0.4410865993770725, "learning_rate": 9.626236686992238e-06, "loss": 0.3782, "step": 7069 }, { "epoch": 0.6344932803841062, "grad_norm": 0.5095102412944204, "learning_rate": 9.626038575059251e-06, "loss": 0.4608, "step": 7070 }, { "epoch": 0.6345830248367773, "grad_norm": 0.5018582113583648, "learning_rate": 9.62584041267562e-06, "loss": 0.5027, "step": 7071 }, { "epoch": 0.6346727692894483, "grad_norm": 0.46022083867479413, "learning_rate": 9.625642199843505e-06, "loss": 0.4037, "step": 7072 }, { "epoch": 0.6347625137421193, "grad_norm": 0.524468289389725, "learning_rate": 9.625443936565074e-06, "loss": 0.5173, "step": 7073 }, { "epoch": 0.6348522581947903, "grad_norm": 0.4388935427221608, "learning_rate": 9.625245622842481e-06, "loss": 0.3849, "step": 7074 }, { "epoch": 0.6349420026474614, "grad_norm": 0.48406731691608207, "learning_rate": 9.625047258677893e-06, "loss": 0.452, "step": 7075 }, { "epoch": 0.6350317471001323, "grad_norm": 0.45504538104050835, "learning_rate": 9.624848844073473e-06, "loss": 0.424, "step": 7076 }, { "epoch": 0.6351214915528034, "grad_norm": 0.45437404857792313, "learning_rate": 9.624650379031387e-06, "loss": 0.4206, "step": 7077 }, { "epoch": 0.6352112360054745, "grad_norm": 0.49347581155289505, "learning_rate": 9.624451863553795e-06, "loss": 0.4748, "step": 7078 }, { "epoch": 0.6353009804581454, "grad_norm": 0.5125195257488085, "learning_rate": 9.624253297642863e-06, "loss": 0.5062, "step": 7079 }, { "epoch": 0.6353907249108165, "grad_norm": 0.4841264112343877, "learning_rate": 9.62405468130076e-06, "loss": 0.4606, "step": 7080 }, { "epoch": 0.6354804693634875, "grad_norm": 0.540894309549966, "learning_rate": 9.623856014529648e-06, "loss": 0.4997, "step": 7081 }, { "epoch": 0.6355702138161585, "grad_norm": 0.50176544583428, "learning_rate": 9.623657297331697e-06, "loss": 0.5611, "step": 7082 }, { "epoch": 0.6356599582688295, "grad_norm": 0.4557039630042371, "learning_rate": 9.62345852970907e-06, "loss": 0.3863, "step": 7083 }, { "epoch": 0.6357497027215006, "grad_norm": 0.5092742952206663, "learning_rate": 9.62325971166394e-06, "loss": 0.4889, "step": 7084 }, { "epoch": 0.6358394471741715, "grad_norm": 0.481447751906819, "learning_rate": 9.623060843198473e-06, "loss": 0.4603, "step": 7085 }, { "epoch": 0.6359291916268426, "grad_norm": 0.5322165989437599, "learning_rate": 9.622861924314835e-06, "loss": 0.5016, "step": 7086 }, { "epoch": 0.6360189360795135, "grad_norm": 0.5294655854382634, "learning_rate": 9.622662955015199e-06, "loss": 0.5006, "step": 7087 }, { "epoch": 0.6361086805321846, "grad_norm": 0.49082774090881764, "learning_rate": 9.622463935301734e-06, "loss": 0.4709, "step": 7088 }, { "epoch": 0.6361984249848556, "grad_norm": 0.4987295267501235, "learning_rate": 9.622264865176609e-06, "loss": 0.4563, "step": 7089 }, { "epoch": 0.6362881694375266, "grad_norm": 0.47388993941693985, "learning_rate": 9.622065744641997e-06, "loss": 0.4232, "step": 7090 }, { "epoch": 0.6363779138901977, "grad_norm": 0.46929880663337215, "learning_rate": 9.621866573700068e-06, "loss": 0.4496, "step": 7091 }, { "epoch": 0.6364676583428687, "grad_norm": 0.5322753696193153, "learning_rate": 9.621667352352995e-06, "loss": 0.4912, "step": 7092 }, { "epoch": 0.6365574027955397, "grad_norm": 0.5346588896321771, "learning_rate": 9.621468080602949e-06, "loss": 0.4507, "step": 7093 }, { "epoch": 0.6366471472482107, "grad_norm": 0.4692963215350991, "learning_rate": 9.621268758452106e-06, "loss": 0.4139, "step": 7094 }, { "epoch": 0.6367368917008818, "grad_norm": 0.46558880571842115, "learning_rate": 9.62106938590264e-06, "loss": 0.4371, "step": 7095 }, { "epoch": 0.6368266361535527, "grad_norm": 0.4830947121810206, "learning_rate": 9.620869962956723e-06, "loss": 0.4581, "step": 7096 }, { "epoch": 0.6369163806062238, "grad_norm": 0.4924910770411904, "learning_rate": 9.620670489616528e-06, "loss": 0.4418, "step": 7097 }, { "epoch": 0.6370061250588948, "grad_norm": 0.5249443023995488, "learning_rate": 9.620470965884236e-06, "loss": 0.5124, "step": 7098 }, { "epoch": 0.6370958695115658, "grad_norm": 0.44655555813052333, "learning_rate": 9.620271391762019e-06, "loss": 0.4032, "step": 7099 }, { "epoch": 0.6371856139642368, "grad_norm": 0.501396992673496, "learning_rate": 9.620071767252054e-06, "loss": 0.5285, "step": 7100 }, { "epoch": 0.6372753584169079, "grad_norm": 0.5654372395248916, "learning_rate": 9.61987209235652e-06, "loss": 0.5264, "step": 7101 }, { "epoch": 0.6373651028695789, "grad_norm": 0.5088567822546819, "learning_rate": 9.619672367077593e-06, "loss": 0.503, "step": 7102 }, { "epoch": 0.6374548473222499, "grad_norm": 0.4983326229307297, "learning_rate": 9.61947259141745e-06, "loss": 0.4194, "step": 7103 }, { "epoch": 0.637544591774921, "grad_norm": 0.49817341162775397, "learning_rate": 9.619272765378271e-06, "loss": 0.5128, "step": 7104 }, { "epoch": 0.6376343362275919, "grad_norm": 0.576631797067255, "learning_rate": 9.619072888962238e-06, "loss": 0.6258, "step": 7105 }, { "epoch": 0.637724080680263, "grad_norm": 0.48935851883686, "learning_rate": 9.618872962171525e-06, "loss": 0.4976, "step": 7106 }, { "epoch": 0.6378138251329339, "grad_norm": 0.4819790043372424, "learning_rate": 9.618672985008318e-06, "loss": 0.4751, "step": 7107 }, { "epoch": 0.637903569585605, "grad_norm": 0.4802743412133384, "learning_rate": 9.618472957474795e-06, "loss": 0.3998, "step": 7108 }, { "epoch": 0.637993314038276, "grad_norm": 0.4825273929299265, "learning_rate": 9.618272879573137e-06, "loss": 0.432, "step": 7109 }, { "epoch": 0.638083058490947, "grad_norm": 0.47197142797802416, "learning_rate": 9.618072751305528e-06, "loss": 0.4209, "step": 7110 }, { "epoch": 0.638172802943618, "grad_norm": 0.5515566214138025, "learning_rate": 9.617872572674147e-06, "loss": 0.5093, "step": 7111 }, { "epoch": 0.6382625473962891, "grad_norm": 0.5117265010536189, "learning_rate": 9.617672343681182e-06, "loss": 0.4382, "step": 7112 }, { "epoch": 0.6383522918489601, "grad_norm": 0.4972568741116072, "learning_rate": 9.617472064328814e-06, "loss": 0.4785, "step": 7113 }, { "epoch": 0.6384420363016311, "grad_norm": 0.4815771396535965, "learning_rate": 9.617271734619226e-06, "loss": 0.45, "step": 7114 }, { "epoch": 0.6385317807543022, "grad_norm": 0.5178896338956482, "learning_rate": 9.617071354554606e-06, "loss": 0.4673, "step": 7115 }, { "epoch": 0.6386215252069731, "grad_norm": 0.512886246741089, "learning_rate": 9.616870924137136e-06, "loss": 0.4802, "step": 7116 }, { "epoch": 0.6387112696596442, "grad_norm": 0.49767076965005713, "learning_rate": 9.616670443369003e-06, "loss": 0.4448, "step": 7117 }, { "epoch": 0.6388010141123152, "grad_norm": 0.4485700367890167, "learning_rate": 9.616469912252396e-06, "loss": 0.3946, "step": 7118 }, { "epoch": 0.6388907585649862, "grad_norm": 0.5345076571266145, "learning_rate": 9.616269330789498e-06, "loss": 0.445, "step": 7119 }, { "epoch": 0.6389805030176572, "grad_norm": 0.5167846896257235, "learning_rate": 9.6160686989825e-06, "loss": 0.4708, "step": 7120 }, { "epoch": 0.6390702474703283, "grad_norm": 0.5145321706411456, "learning_rate": 9.615868016833585e-06, "loss": 0.5009, "step": 7121 }, { "epoch": 0.6391599919229992, "grad_norm": 0.4624229964756048, "learning_rate": 9.615667284344947e-06, "loss": 0.4195, "step": 7122 }, { "epoch": 0.6392497363756703, "grad_norm": 0.49237879670560253, "learning_rate": 9.615466501518774e-06, "loss": 0.4825, "step": 7123 }, { "epoch": 0.6393394808283412, "grad_norm": 0.5196399384641046, "learning_rate": 9.615265668357253e-06, "loss": 0.5049, "step": 7124 }, { "epoch": 0.6394292252810123, "grad_norm": 0.4828942176356324, "learning_rate": 9.615064784862576e-06, "loss": 0.4288, "step": 7125 }, { "epoch": 0.6395189697336834, "grad_norm": 0.44455952494455314, "learning_rate": 9.614863851036933e-06, "loss": 0.3828, "step": 7126 }, { "epoch": 0.6396087141863543, "grad_norm": 0.4987880126284247, "learning_rate": 9.614662866882518e-06, "loss": 0.4959, "step": 7127 }, { "epoch": 0.6396984586390254, "grad_norm": 0.47974690418418664, "learning_rate": 9.614461832401521e-06, "loss": 0.4529, "step": 7128 }, { "epoch": 0.6397882030916964, "grad_norm": 0.4935870815339949, "learning_rate": 9.614260747596132e-06, "loss": 0.4592, "step": 7129 }, { "epoch": 0.6398779475443674, "grad_norm": 0.5468816507562411, "learning_rate": 9.61405961246855e-06, "loss": 0.4624, "step": 7130 }, { "epoch": 0.6399676919970384, "grad_norm": 0.4969068223758916, "learning_rate": 9.613858427020962e-06, "loss": 0.4653, "step": 7131 }, { "epoch": 0.6400574364497095, "grad_norm": 0.46376143997322117, "learning_rate": 9.613657191255565e-06, "loss": 0.4399, "step": 7132 }, { "epoch": 0.6401471809023804, "grad_norm": 0.5158170144253607, "learning_rate": 9.613455905174557e-06, "loss": 0.5308, "step": 7133 }, { "epoch": 0.6402369253550515, "grad_norm": 0.4367198834931236, "learning_rate": 9.613254568780126e-06, "loss": 0.3669, "step": 7134 }, { "epoch": 0.6403266698077225, "grad_norm": 0.5176029069093816, "learning_rate": 9.613053182074473e-06, "loss": 0.5109, "step": 7135 }, { "epoch": 0.6404164142603935, "grad_norm": 0.5905852183400956, "learning_rate": 9.612851745059793e-06, "loss": 0.5104, "step": 7136 }, { "epoch": 0.6405061587130646, "grad_norm": 0.4716619704940317, "learning_rate": 9.612650257738282e-06, "loss": 0.4033, "step": 7137 }, { "epoch": 0.6405959031657356, "grad_norm": 0.462667010165294, "learning_rate": 9.612448720112139e-06, "loss": 0.4508, "step": 7138 }, { "epoch": 0.6406856476184066, "grad_norm": 0.5553724639346989, "learning_rate": 9.612247132183562e-06, "loss": 0.5085, "step": 7139 }, { "epoch": 0.6407753920710776, "grad_norm": 0.47782251525491665, "learning_rate": 9.612045493954746e-06, "loss": 0.459, "step": 7140 }, { "epoch": 0.6408651365237487, "grad_norm": 0.4975343659492277, "learning_rate": 9.611843805427894e-06, "loss": 0.4263, "step": 7141 }, { "epoch": 0.6409548809764196, "grad_norm": 0.5003078268805291, "learning_rate": 9.611642066605205e-06, "loss": 0.4503, "step": 7142 }, { "epoch": 0.6410446254290907, "grad_norm": 0.49190836500533913, "learning_rate": 9.611440277488876e-06, "loss": 0.5452, "step": 7143 }, { "epoch": 0.6411343698817616, "grad_norm": 0.5063430274890396, "learning_rate": 9.611238438081111e-06, "loss": 0.4389, "step": 7144 }, { "epoch": 0.6412241143344327, "grad_norm": 0.4853164200361271, "learning_rate": 9.611036548384109e-06, "loss": 0.4309, "step": 7145 }, { "epoch": 0.6413138587871037, "grad_norm": 0.5255620367570473, "learning_rate": 9.610834608400075e-06, "loss": 0.5196, "step": 7146 }, { "epoch": 0.6414036032397747, "grad_norm": 0.49215923284004415, "learning_rate": 9.610632618131207e-06, "loss": 0.4666, "step": 7147 }, { "epoch": 0.6414933476924458, "grad_norm": 0.4975089270646358, "learning_rate": 9.610430577579713e-06, "loss": 0.4618, "step": 7148 }, { "epoch": 0.6415830921451168, "grad_norm": 0.5622437542030742, "learning_rate": 9.610228486747791e-06, "loss": 0.4942, "step": 7149 }, { "epoch": 0.6416728365977878, "grad_norm": 0.49201447052889896, "learning_rate": 9.610026345637648e-06, "loss": 0.4376, "step": 7150 }, { "epoch": 0.6417625810504588, "grad_norm": 0.5414805946643203, "learning_rate": 9.609824154251488e-06, "loss": 0.5316, "step": 7151 }, { "epoch": 0.6418523255031299, "grad_norm": 0.4678177853931224, "learning_rate": 9.609621912591516e-06, "loss": 0.4446, "step": 7152 }, { "epoch": 0.6419420699558008, "grad_norm": 0.5073091607576452, "learning_rate": 9.609419620659938e-06, "loss": 0.4787, "step": 7153 }, { "epoch": 0.6420318144084719, "grad_norm": 0.5203259288340825, "learning_rate": 9.60921727845896e-06, "loss": 0.5226, "step": 7154 }, { "epoch": 0.6421215588611429, "grad_norm": 0.4814282430414734, "learning_rate": 9.609014885990788e-06, "loss": 0.4863, "step": 7155 }, { "epoch": 0.6422113033138139, "grad_norm": 0.5161193427703085, "learning_rate": 9.60881244325763e-06, "loss": 0.4978, "step": 7156 }, { "epoch": 0.6423010477664849, "grad_norm": 0.4995299894274101, "learning_rate": 9.608609950261692e-06, "loss": 0.4607, "step": 7157 }, { "epoch": 0.642390792219156, "grad_norm": 0.4628771387428831, "learning_rate": 9.608407407005184e-06, "loss": 0.4556, "step": 7158 }, { "epoch": 0.6424805366718269, "grad_norm": 0.4391159564200099, "learning_rate": 9.608204813490317e-06, "loss": 0.4238, "step": 7159 }, { "epoch": 0.642570281124498, "grad_norm": 0.5138521332775763, "learning_rate": 9.608002169719298e-06, "loss": 0.5374, "step": 7160 }, { "epoch": 0.6426600255771691, "grad_norm": 0.49449964172606586, "learning_rate": 9.607799475694335e-06, "loss": 0.5209, "step": 7161 }, { "epoch": 0.64274977002984, "grad_norm": 0.49218402919287474, "learning_rate": 9.607596731417643e-06, "loss": 0.4666, "step": 7162 }, { "epoch": 0.6428395144825111, "grad_norm": 0.5327138244754983, "learning_rate": 9.60739393689143e-06, "loss": 0.5152, "step": 7163 }, { "epoch": 0.642929258935182, "grad_norm": 0.5214158251219543, "learning_rate": 9.607191092117907e-06, "loss": 0.5472, "step": 7164 }, { "epoch": 0.6430190033878531, "grad_norm": 0.4617867962903566, "learning_rate": 9.60698819709929e-06, "loss": 0.438, "step": 7165 }, { "epoch": 0.6431087478405241, "grad_norm": 0.4647552264390852, "learning_rate": 9.606785251837788e-06, "loss": 0.408, "step": 7166 }, { "epoch": 0.6431984922931951, "grad_norm": 0.4800131901178327, "learning_rate": 9.606582256335615e-06, "loss": 0.482, "step": 7167 }, { "epoch": 0.6432882367458661, "grad_norm": 0.5149081911481942, "learning_rate": 9.606379210594985e-06, "loss": 0.5034, "step": 7168 }, { "epoch": 0.6433779811985372, "grad_norm": 0.48854315327997905, "learning_rate": 9.606176114618116e-06, "loss": 0.4552, "step": 7169 }, { "epoch": 0.6434677256512081, "grad_norm": 0.45929216228817554, "learning_rate": 9.605972968407218e-06, "loss": 0.4018, "step": 7170 }, { "epoch": 0.6435574701038792, "grad_norm": 0.5175128228027407, "learning_rate": 9.605769771964509e-06, "loss": 0.4696, "step": 7171 }, { "epoch": 0.6436472145565503, "grad_norm": 0.4929249978703277, "learning_rate": 9.605566525292202e-06, "loss": 0.4296, "step": 7172 }, { "epoch": 0.6437369590092212, "grad_norm": 0.4594643537317421, "learning_rate": 9.605363228392517e-06, "loss": 0.4214, "step": 7173 }, { "epoch": 0.6438267034618923, "grad_norm": 0.46050794278688983, "learning_rate": 9.605159881267671e-06, "loss": 0.4463, "step": 7174 }, { "epoch": 0.6439164479145633, "grad_norm": 0.49112276605078076, "learning_rate": 9.604956483919879e-06, "loss": 0.478, "step": 7175 }, { "epoch": 0.6440061923672343, "grad_norm": 0.531598578507734, "learning_rate": 9.604753036351362e-06, "loss": 0.517, "step": 7176 }, { "epoch": 0.6440959368199053, "grad_norm": 0.4844916870518016, "learning_rate": 9.604549538564339e-06, "loss": 0.4483, "step": 7177 }, { "epoch": 0.6441856812725764, "grad_norm": 0.48630509172316466, "learning_rate": 9.604345990561025e-06, "loss": 0.5041, "step": 7178 }, { "epoch": 0.6442754257252473, "grad_norm": 0.48636845126968903, "learning_rate": 9.604142392343644e-06, "loss": 0.4927, "step": 7179 }, { "epoch": 0.6443651701779184, "grad_norm": 0.5421525777762076, "learning_rate": 9.603938743914415e-06, "loss": 0.538, "step": 7180 }, { "epoch": 0.6444549146305893, "grad_norm": 0.49854162179632855, "learning_rate": 9.60373504527556e-06, "loss": 0.4919, "step": 7181 }, { "epoch": 0.6445446590832604, "grad_norm": 0.4710318585975919, "learning_rate": 9.603531296429299e-06, "loss": 0.4235, "step": 7182 }, { "epoch": 0.6446344035359315, "grad_norm": 0.5292459941177464, "learning_rate": 9.603327497377854e-06, "loss": 0.5174, "step": 7183 }, { "epoch": 0.6447241479886024, "grad_norm": 0.49248535345061073, "learning_rate": 9.603123648123449e-06, "loss": 0.468, "step": 7184 }, { "epoch": 0.6448138924412735, "grad_norm": 0.4690486276665921, "learning_rate": 9.602919748668307e-06, "loss": 0.4036, "step": 7185 }, { "epoch": 0.6449036368939445, "grad_norm": 0.5013900477831539, "learning_rate": 9.602715799014648e-06, "loss": 0.4791, "step": 7186 }, { "epoch": 0.6449933813466155, "grad_norm": 0.47309853539051544, "learning_rate": 9.602511799164701e-06, "loss": 0.4456, "step": 7187 }, { "epoch": 0.6450831257992865, "grad_norm": 0.5030538194707419, "learning_rate": 9.60230774912069e-06, "loss": 0.5157, "step": 7188 }, { "epoch": 0.6451728702519576, "grad_norm": 0.505154871713038, "learning_rate": 9.60210364888484e-06, "loss": 0.4897, "step": 7189 }, { "epoch": 0.6452626147046285, "grad_norm": 0.49910719932966774, "learning_rate": 9.601899498459375e-06, "loss": 0.4798, "step": 7190 }, { "epoch": 0.6453523591572996, "grad_norm": 0.4076495970044399, "learning_rate": 9.601695297846523e-06, "loss": 0.3772, "step": 7191 }, { "epoch": 0.6454421036099706, "grad_norm": 0.4914971838457367, "learning_rate": 9.60149104704851e-06, "loss": 0.4721, "step": 7192 }, { "epoch": 0.6455318480626416, "grad_norm": 0.487339756888804, "learning_rate": 9.601286746067564e-06, "loss": 0.4732, "step": 7193 }, { "epoch": 0.6456215925153126, "grad_norm": 0.45930711913971545, "learning_rate": 9.601082394905916e-06, "loss": 0.4076, "step": 7194 }, { "epoch": 0.6457113369679837, "grad_norm": 0.49352796411501565, "learning_rate": 9.60087799356579e-06, "loss": 0.4376, "step": 7195 }, { "epoch": 0.6458010814206547, "grad_norm": 0.4972041571044428, "learning_rate": 9.600673542049416e-06, "loss": 0.4609, "step": 7196 }, { "epoch": 0.6458908258733257, "grad_norm": 0.5249306199694102, "learning_rate": 9.600469040359025e-06, "loss": 0.4699, "step": 7197 }, { "epoch": 0.6459805703259968, "grad_norm": 0.5218469070976444, "learning_rate": 9.600264488496849e-06, "loss": 0.4864, "step": 7198 }, { "epoch": 0.6460703147786677, "grad_norm": 0.4735493628718235, "learning_rate": 9.600059886465114e-06, "loss": 0.3956, "step": 7199 }, { "epoch": 0.6461600592313388, "grad_norm": 0.4446180678288327, "learning_rate": 9.599855234266055e-06, "loss": 0.4169, "step": 7200 }, { "epoch": 0.6462498036840097, "grad_norm": 0.4992615531928599, "learning_rate": 9.599650531901904e-06, "loss": 0.4971, "step": 7201 }, { "epoch": 0.6463395481366808, "grad_norm": 0.5562928106154442, "learning_rate": 9.599445779374893e-06, "loss": 0.6017, "step": 7202 }, { "epoch": 0.6464292925893518, "grad_norm": 0.520744613686784, "learning_rate": 9.599240976687251e-06, "loss": 0.4986, "step": 7203 }, { "epoch": 0.6465190370420228, "grad_norm": 0.5296658501159102, "learning_rate": 9.599036123841219e-06, "loss": 0.535, "step": 7204 }, { "epoch": 0.6466087814946938, "grad_norm": 0.5405844047078022, "learning_rate": 9.598831220839025e-06, "loss": 0.5268, "step": 7205 }, { "epoch": 0.6466985259473649, "grad_norm": 0.4890488054790594, "learning_rate": 9.598626267682907e-06, "loss": 0.4471, "step": 7206 }, { "epoch": 0.646788270400036, "grad_norm": 0.4636336477681854, "learning_rate": 9.598421264375097e-06, "loss": 0.4605, "step": 7207 }, { "epoch": 0.6468780148527069, "grad_norm": 0.5104402126495907, "learning_rate": 9.598216210917834e-06, "loss": 0.5067, "step": 7208 }, { "epoch": 0.646967759305378, "grad_norm": 0.49560331711429506, "learning_rate": 9.598011107313352e-06, "loss": 0.4285, "step": 7209 }, { "epoch": 0.6470575037580489, "grad_norm": 0.47831455489612706, "learning_rate": 9.59780595356389e-06, "loss": 0.4742, "step": 7210 }, { "epoch": 0.64714724821072, "grad_norm": 0.4723939819229206, "learning_rate": 9.597600749671683e-06, "loss": 0.4416, "step": 7211 }, { "epoch": 0.647236992663391, "grad_norm": 0.5145361039508455, "learning_rate": 9.597395495638971e-06, "loss": 0.4773, "step": 7212 }, { "epoch": 0.647326737116062, "grad_norm": 0.4854425191384369, "learning_rate": 9.59719019146799e-06, "loss": 0.4867, "step": 7213 }, { "epoch": 0.647416481568733, "grad_norm": 0.5352832821927184, "learning_rate": 9.596984837160983e-06, "loss": 0.511, "step": 7214 }, { "epoch": 0.6475062260214041, "grad_norm": 0.4587652365082754, "learning_rate": 9.596779432720185e-06, "loss": 0.4165, "step": 7215 }, { "epoch": 0.647595970474075, "grad_norm": 0.49321915384410264, "learning_rate": 9.596573978147839e-06, "loss": 0.4592, "step": 7216 }, { "epoch": 0.6476857149267461, "grad_norm": 0.498568321067614, "learning_rate": 9.596368473446182e-06, "loss": 0.4407, "step": 7217 }, { "epoch": 0.6477754593794172, "grad_norm": 0.4666903611043699, "learning_rate": 9.596162918617463e-06, "loss": 0.4343, "step": 7218 }, { "epoch": 0.6478652038320881, "grad_norm": 0.5047249117029087, "learning_rate": 9.595957313663913e-06, "loss": 0.5098, "step": 7219 }, { "epoch": 0.6479549482847592, "grad_norm": 0.48199954331836387, "learning_rate": 9.595751658587784e-06, "loss": 0.4706, "step": 7220 }, { "epoch": 0.6480446927374302, "grad_norm": 0.5016354627308725, "learning_rate": 9.595545953391315e-06, "loss": 0.4508, "step": 7221 }, { "epoch": 0.6481344371901012, "grad_norm": 0.46764532804372455, "learning_rate": 9.595340198076746e-06, "loss": 0.4485, "step": 7222 }, { "epoch": 0.6482241816427722, "grad_norm": 0.4363980283903315, "learning_rate": 9.595134392646326e-06, "loss": 0.4271, "step": 7223 }, { "epoch": 0.6483139260954432, "grad_norm": 0.4842005285475062, "learning_rate": 9.594928537102298e-06, "loss": 0.4276, "step": 7224 }, { "epoch": 0.6484036705481142, "grad_norm": 0.4707238964496792, "learning_rate": 9.594722631446906e-06, "loss": 0.4727, "step": 7225 }, { "epoch": 0.6484934150007853, "grad_norm": 0.48403030396520297, "learning_rate": 9.594516675682395e-06, "loss": 0.4446, "step": 7226 }, { "epoch": 0.6485831594534562, "grad_norm": 0.4980839722817108, "learning_rate": 9.594310669811013e-06, "loss": 0.4773, "step": 7227 }, { "epoch": 0.6486729039061273, "grad_norm": 0.4769182192826254, "learning_rate": 9.594104613835007e-06, "loss": 0.4831, "step": 7228 }, { "epoch": 0.6487626483587984, "grad_norm": 0.5771813281898868, "learning_rate": 9.593898507756622e-06, "loss": 0.5848, "step": 7229 }, { "epoch": 0.6488523928114693, "grad_norm": 0.4903592642468809, "learning_rate": 9.593692351578108e-06, "loss": 0.4721, "step": 7230 }, { "epoch": 0.6489421372641404, "grad_norm": 0.4358879034202378, "learning_rate": 9.593486145301711e-06, "loss": 0.3568, "step": 7231 }, { "epoch": 0.6490318817168114, "grad_norm": 0.48326439244796593, "learning_rate": 9.593279888929683e-06, "loss": 0.4725, "step": 7232 }, { "epoch": 0.6491216261694824, "grad_norm": 0.43642564957273994, "learning_rate": 9.59307358246427e-06, "loss": 0.4166, "step": 7233 }, { "epoch": 0.6492113706221534, "grad_norm": 0.5102021874980477, "learning_rate": 9.592867225907723e-06, "loss": 0.5182, "step": 7234 }, { "epoch": 0.6493011150748245, "grad_norm": 0.45259155135115076, "learning_rate": 9.592660819262292e-06, "loss": 0.4199, "step": 7235 }, { "epoch": 0.6493908595274954, "grad_norm": 0.4904959108221915, "learning_rate": 9.59245436253023e-06, "loss": 0.4515, "step": 7236 }, { "epoch": 0.6494806039801665, "grad_norm": 0.5198511094574483, "learning_rate": 9.59224785571379e-06, "loss": 0.4842, "step": 7237 }, { "epoch": 0.6495703484328375, "grad_norm": 0.476717145007672, "learning_rate": 9.592041298815217e-06, "loss": 0.4575, "step": 7238 }, { "epoch": 0.6496600928855085, "grad_norm": 0.48327368547250865, "learning_rate": 9.59183469183677e-06, "loss": 0.4915, "step": 7239 }, { "epoch": 0.6497498373381795, "grad_norm": 0.4639920370430298, "learning_rate": 9.591628034780702e-06, "loss": 0.4216, "step": 7240 }, { "epoch": 0.6498395817908506, "grad_norm": 0.4792884810824301, "learning_rate": 9.591421327649264e-06, "loss": 0.4273, "step": 7241 }, { "epoch": 0.6499293262435216, "grad_norm": 0.4770206986301239, "learning_rate": 9.591214570444712e-06, "loss": 0.4637, "step": 7242 }, { "epoch": 0.6500190706961926, "grad_norm": 0.4784452857675491, "learning_rate": 9.591007763169299e-06, "loss": 0.4607, "step": 7243 }, { "epoch": 0.6501088151488637, "grad_norm": 0.4600833817537452, "learning_rate": 9.590800905825282e-06, "loss": 0.407, "step": 7244 }, { "epoch": 0.6501985596015346, "grad_norm": 0.520981123137276, "learning_rate": 9.590593998414918e-06, "loss": 0.4507, "step": 7245 }, { "epoch": 0.6502883040542057, "grad_norm": 0.479349157377393, "learning_rate": 9.590387040940462e-06, "loss": 0.4694, "step": 7246 }, { "epoch": 0.6503780485068766, "grad_norm": 0.5144254697096433, "learning_rate": 9.590180033404171e-06, "loss": 0.4922, "step": 7247 }, { "epoch": 0.6504677929595477, "grad_norm": 0.46745603024931914, "learning_rate": 9.589972975808301e-06, "loss": 0.425, "step": 7248 }, { "epoch": 0.6505575374122187, "grad_norm": 0.4774739035700665, "learning_rate": 9.589765868155115e-06, "loss": 0.4358, "step": 7249 }, { "epoch": 0.6506472818648897, "grad_norm": 0.4880725151957238, "learning_rate": 9.589558710446867e-06, "loss": 0.4899, "step": 7250 }, { "epoch": 0.6507370263175607, "grad_norm": 0.5324826026977225, "learning_rate": 9.589351502685817e-06, "loss": 0.5209, "step": 7251 }, { "epoch": 0.6508267707702318, "grad_norm": 0.48706659998684504, "learning_rate": 9.589144244874226e-06, "loss": 0.4486, "step": 7252 }, { "epoch": 0.6509165152229028, "grad_norm": 0.49442445020969716, "learning_rate": 9.588936937014354e-06, "loss": 0.4593, "step": 7253 }, { "epoch": 0.6510062596755738, "grad_norm": 0.43342810731558423, "learning_rate": 9.588729579108465e-06, "loss": 0.3671, "step": 7254 }, { "epoch": 0.6510960041282449, "grad_norm": 0.4321174009314764, "learning_rate": 9.588522171158814e-06, "loss": 0.4335, "step": 7255 }, { "epoch": 0.6511857485809158, "grad_norm": 0.5597814295612751, "learning_rate": 9.588314713167666e-06, "loss": 0.4928, "step": 7256 }, { "epoch": 0.6512754930335869, "grad_norm": 0.4699483561937424, "learning_rate": 9.588107205137285e-06, "loss": 0.4588, "step": 7257 }, { "epoch": 0.6513652374862579, "grad_norm": 0.4947890087448514, "learning_rate": 9.587899647069931e-06, "loss": 0.4357, "step": 7258 }, { "epoch": 0.6514549819389289, "grad_norm": 0.47389653349455674, "learning_rate": 9.58769203896787e-06, "loss": 0.403, "step": 7259 }, { "epoch": 0.6515447263915999, "grad_norm": 0.5202424304150469, "learning_rate": 9.587484380833366e-06, "loss": 0.4968, "step": 7260 }, { "epoch": 0.651634470844271, "grad_norm": 0.49854224316064955, "learning_rate": 9.587276672668682e-06, "loss": 0.4601, "step": 7261 }, { "epoch": 0.6517242152969419, "grad_norm": 0.44220353468102763, "learning_rate": 9.587068914476085e-06, "loss": 0.3971, "step": 7262 }, { "epoch": 0.651813959749613, "grad_norm": 0.48342521775975883, "learning_rate": 9.58686110625784e-06, "loss": 0.4647, "step": 7263 }, { "epoch": 0.651903704202284, "grad_norm": 0.4792544454905391, "learning_rate": 9.586653248016213e-06, "loss": 0.479, "step": 7264 }, { "epoch": 0.651993448654955, "grad_norm": 0.4903967876495383, "learning_rate": 9.586445339753472e-06, "loss": 0.4331, "step": 7265 }, { "epoch": 0.6520831931076261, "grad_norm": 0.5134738369202321, "learning_rate": 9.586237381471884e-06, "loss": 0.4777, "step": 7266 }, { "epoch": 0.652172937560297, "grad_norm": 0.5396911280031726, "learning_rate": 9.586029373173715e-06, "loss": 0.5691, "step": 7267 }, { "epoch": 0.6522626820129681, "grad_norm": 0.47150213625999965, "learning_rate": 9.585821314861235e-06, "loss": 0.4071, "step": 7268 }, { "epoch": 0.6523524264656391, "grad_norm": 0.47258708313424663, "learning_rate": 9.585613206536715e-06, "loss": 0.4324, "step": 7269 }, { "epoch": 0.6524421709183101, "grad_norm": 0.46490574601195683, "learning_rate": 9.58540504820242e-06, "loss": 0.4307, "step": 7270 }, { "epoch": 0.6525319153709811, "grad_norm": 0.47141680539591485, "learning_rate": 9.585196839860625e-06, "loss": 0.469, "step": 7271 }, { "epoch": 0.6526216598236522, "grad_norm": 0.4521325968286417, "learning_rate": 9.5849885815136e-06, "loss": 0.3873, "step": 7272 }, { "epoch": 0.6527114042763231, "grad_norm": 0.477001465310168, "learning_rate": 9.584780273163611e-06, "loss": 0.3924, "step": 7273 }, { "epoch": 0.6528011487289942, "grad_norm": 0.5300786478074823, "learning_rate": 9.584571914812936e-06, "loss": 0.4805, "step": 7274 }, { "epoch": 0.6528908931816652, "grad_norm": 0.4891946313427652, "learning_rate": 9.584363506463843e-06, "loss": 0.4673, "step": 7275 }, { "epoch": 0.6529806376343362, "grad_norm": 0.45122384866018267, "learning_rate": 9.58415504811861e-06, "loss": 0.3704, "step": 7276 }, { "epoch": 0.6530703820870073, "grad_norm": 0.4639065998438193, "learning_rate": 9.583946539779503e-06, "loss": 0.4578, "step": 7277 }, { "epoch": 0.6531601265396783, "grad_norm": 0.5102114894294231, "learning_rate": 9.583737981448801e-06, "loss": 0.4907, "step": 7278 }, { "epoch": 0.6532498709923493, "grad_norm": 0.5225113064087707, "learning_rate": 9.583529373128779e-06, "loss": 0.448, "step": 7279 }, { "epoch": 0.6533396154450203, "grad_norm": 0.477102179676376, "learning_rate": 9.58332071482171e-06, "loss": 0.497, "step": 7280 }, { "epoch": 0.6534293598976914, "grad_norm": 0.4772801411424063, "learning_rate": 9.583112006529871e-06, "loss": 0.4066, "step": 7281 }, { "epoch": 0.6535191043503623, "grad_norm": 0.5218622684358977, "learning_rate": 9.582903248255537e-06, "loss": 0.4857, "step": 7282 }, { "epoch": 0.6536088488030334, "grad_norm": 0.5273289810608737, "learning_rate": 9.582694440000984e-06, "loss": 0.4697, "step": 7283 }, { "epoch": 0.6536985932557043, "grad_norm": 0.4431572249163343, "learning_rate": 9.58248558176849e-06, "loss": 0.4066, "step": 7284 }, { "epoch": 0.6537883377083754, "grad_norm": 0.4520152709693213, "learning_rate": 9.582276673560334e-06, "loss": 0.3997, "step": 7285 }, { "epoch": 0.6538780821610464, "grad_norm": 0.5242101119953254, "learning_rate": 9.582067715378793e-06, "loss": 0.543, "step": 7286 }, { "epoch": 0.6539678266137174, "grad_norm": 0.492545978213429, "learning_rate": 9.581858707226147e-06, "loss": 0.4818, "step": 7287 }, { "epoch": 0.6540575710663885, "grad_norm": 0.5125043486441789, "learning_rate": 9.581649649104674e-06, "loss": 0.4711, "step": 7288 }, { "epoch": 0.6541473155190595, "grad_norm": 0.5100288770612782, "learning_rate": 9.581440541016653e-06, "loss": 0.5071, "step": 7289 }, { "epoch": 0.6542370599717305, "grad_norm": 0.460060673056035, "learning_rate": 9.581231382964367e-06, "loss": 0.3975, "step": 7290 }, { "epoch": 0.6543268044244015, "grad_norm": 0.472189660623431, "learning_rate": 9.581022174950095e-06, "loss": 0.4512, "step": 7291 }, { "epoch": 0.6544165488770726, "grad_norm": 0.48634699691853317, "learning_rate": 9.580812916976121e-06, "loss": 0.4766, "step": 7292 }, { "epoch": 0.6545062933297435, "grad_norm": 0.5242275160355906, "learning_rate": 9.580603609044725e-06, "loss": 0.5307, "step": 7293 }, { "epoch": 0.6545960377824146, "grad_norm": 0.4953748092545355, "learning_rate": 9.580394251158192e-06, "loss": 0.4607, "step": 7294 }, { "epoch": 0.6546857822350856, "grad_norm": 0.5060840636270865, "learning_rate": 9.5801848433188e-06, "loss": 0.5257, "step": 7295 }, { "epoch": 0.6547755266877566, "grad_norm": 0.47468840938709844, "learning_rate": 9.57997538552884e-06, "loss": 0.4508, "step": 7296 }, { "epoch": 0.6548652711404276, "grad_norm": 0.4848795205464119, "learning_rate": 9.57976587779059e-06, "loss": 0.4246, "step": 7297 }, { "epoch": 0.6549550155930987, "grad_norm": 0.5057037754149102, "learning_rate": 9.57955632010634e-06, "loss": 0.4788, "step": 7298 }, { "epoch": 0.6550447600457697, "grad_norm": 0.4870995235947349, "learning_rate": 9.57934671247837e-06, "loss": 0.4291, "step": 7299 }, { "epoch": 0.6551345044984407, "grad_norm": 0.4725215700110351, "learning_rate": 9.57913705490897e-06, "loss": 0.4484, "step": 7300 }, { "epoch": 0.6552242489511118, "grad_norm": 0.5095838020087856, "learning_rate": 9.578927347400426e-06, "loss": 0.4708, "step": 7301 }, { "epoch": 0.6553139934037827, "grad_norm": 0.4550838498793913, "learning_rate": 9.578717589955024e-06, "loss": 0.4723, "step": 7302 }, { "epoch": 0.6554037378564538, "grad_norm": 0.5207659046950397, "learning_rate": 9.578507782575054e-06, "loss": 0.4636, "step": 7303 }, { "epoch": 0.6554934823091247, "grad_norm": 0.484602803124919, "learning_rate": 9.578297925262799e-06, "loss": 0.4833, "step": 7304 }, { "epoch": 0.6555832267617958, "grad_norm": 0.4976413364377289, "learning_rate": 9.578088018020551e-06, "loss": 0.4794, "step": 7305 }, { "epoch": 0.6556729712144668, "grad_norm": 0.4548609309251019, "learning_rate": 9.577878060850601e-06, "loss": 0.383, "step": 7306 }, { "epoch": 0.6557627156671378, "grad_norm": 0.48550121129915136, "learning_rate": 9.577668053755234e-06, "loss": 0.4199, "step": 7307 }, { "epoch": 0.6558524601198088, "grad_norm": 0.5010892147582139, "learning_rate": 9.577457996736744e-06, "loss": 0.5209, "step": 7308 }, { "epoch": 0.6559422045724799, "grad_norm": 0.5018381388415114, "learning_rate": 9.57724788979742e-06, "loss": 0.4363, "step": 7309 }, { "epoch": 0.6560319490251508, "grad_norm": 0.49448663871833326, "learning_rate": 9.577037732939555e-06, "loss": 0.4277, "step": 7310 }, { "epoch": 0.6561216934778219, "grad_norm": 0.49028438305194494, "learning_rate": 9.576827526165442e-06, "loss": 0.4532, "step": 7311 }, { "epoch": 0.656211437930493, "grad_norm": 0.493321323074573, "learning_rate": 9.57661726947737e-06, "loss": 0.4864, "step": 7312 }, { "epoch": 0.6563011823831639, "grad_norm": 0.4721803812717055, "learning_rate": 9.576406962877634e-06, "loss": 0.4548, "step": 7313 }, { "epoch": 0.656390926835835, "grad_norm": 0.5037371848858597, "learning_rate": 9.576196606368526e-06, "loss": 0.5233, "step": 7314 }, { "epoch": 0.656480671288506, "grad_norm": 0.4663131846866346, "learning_rate": 9.57598619995234e-06, "loss": 0.3934, "step": 7315 }, { "epoch": 0.656570415741177, "grad_norm": 0.526124311176803, "learning_rate": 9.575775743631375e-06, "loss": 0.4905, "step": 7316 }, { "epoch": 0.656660160193848, "grad_norm": 0.48669859964607504, "learning_rate": 9.575565237407922e-06, "loss": 0.4839, "step": 7317 }, { "epoch": 0.6567499046465191, "grad_norm": 0.48043299925875105, "learning_rate": 9.575354681284277e-06, "loss": 0.4894, "step": 7318 }, { "epoch": 0.65683964909919, "grad_norm": 0.47549862951631644, "learning_rate": 9.575144075262738e-06, "loss": 0.4217, "step": 7319 }, { "epoch": 0.6569293935518611, "grad_norm": 0.5269118846054213, "learning_rate": 9.5749334193456e-06, "loss": 0.4858, "step": 7320 }, { "epoch": 0.657019138004532, "grad_norm": 0.44453964688118414, "learning_rate": 9.574722713535163e-06, "loss": 0.3816, "step": 7321 }, { "epoch": 0.6571088824572031, "grad_norm": 0.5175207939096721, "learning_rate": 9.574511957833721e-06, "loss": 0.4551, "step": 7322 }, { "epoch": 0.6571986269098742, "grad_norm": 0.5153746327316145, "learning_rate": 9.574301152243576e-06, "loss": 0.4571, "step": 7323 }, { "epoch": 0.6572883713625451, "grad_norm": 0.46004096775905684, "learning_rate": 9.574090296767027e-06, "loss": 0.4103, "step": 7324 }, { "epoch": 0.6573781158152162, "grad_norm": 0.47394132027427677, "learning_rate": 9.57387939140637e-06, "loss": 0.4503, "step": 7325 }, { "epoch": 0.6574678602678872, "grad_norm": 0.4422899820320001, "learning_rate": 9.573668436163907e-06, "loss": 0.3925, "step": 7326 }, { "epoch": 0.6575576047205582, "grad_norm": 0.47305465723798923, "learning_rate": 9.573457431041941e-06, "loss": 0.4694, "step": 7327 }, { "epoch": 0.6576473491732292, "grad_norm": 0.5357591436517849, "learning_rate": 9.573246376042772e-06, "loss": 0.4895, "step": 7328 }, { "epoch": 0.6577370936259003, "grad_norm": 0.5287632028575309, "learning_rate": 9.573035271168698e-06, "loss": 0.5019, "step": 7329 }, { "epoch": 0.6578268380785712, "grad_norm": 0.474846184775792, "learning_rate": 9.572824116422026e-06, "loss": 0.3972, "step": 7330 }, { "epoch": 0.6579165825312423, "grad_norm": 0.49824004150668333, "learning_rate": 9.572612911805055e-06, "loss": 0.5331, "step": 7331 }, { "epoch": 0.6580063269839133, "grad_norm": 0.4684429057290215, "learning_rate": 9.572401657320092e-06, "loss": 0.4321, "step": 7332 }, { "epoch": 0.6580960714365843, "grad_norm": 0.48037288649947024, "learning_rate": 9.572190352969438e-06, "loss": 0.485, "step": 7333 }, { "epoch": 0.6581858158892554, "grad_norm": 0.4841428681995493, "learning_rate": 9.5719789987554e-06, "loss": 0.4868, "step": 7334 }, { "epoch": 0.6582755603419264, "grad_norm": 0.5032379400770919, "learning_rate": 9.57176759468028e-06, "loss": 0.4714, "step": 7335 }, { "epoch": 0.6583653047945974, "grad_norm": 0.47779340524123276, "learning_rate": 9.571556140746387e-06, "loss": 0.4343, "step": 7336 }, { "epoch": 0.6584550492472684, "grad_norm": 0.504550158218505, "learning_rate": 9.571344636956023e-06, "loss": 0.4622, "step": 7337 }, { "epoch": 0.6585447936999395, "grad_norm": 0.4739769052682702, "learning_rate": 9.571133083311498e-06, "loss": 0.4353, "step": 7338 }, { "epoch": 0.6586345381526104, "grad_norm": 0.5815181292147082, "learning_rate": 9.570921479815119e-06, "loss": 0.5244, "step": 7339 }, { "epoch": 0.6587242826052815, "grad_norm": 0.47602330932056175, "learning_rate": 9.570709826469194e-06, "loss": 0.4223, "step": 7340 }, { "epoch": 0.6588140270579524, "grad_norm": 0.5142499181768055, "learning_rate": 9.570498123276027e-06, "loss": 0.4681, "step": 7341 }, { "epoch": 0.6589037715106235, "grad_norm": 0.5068770623245347, "learning_rate": 9.57028637023793e-06, "loss": 0.4721, "step": 7342 }, { "epoch": 0.6589935159632945, "grad_norm": 0.4834779744111622, "learning_rate": 9.570074567357212e-06, "loss": 0.471, "step": 7343 }, { "epoch": 0.6590832604159655, "grad_norm": 0.4887846352819265, "learning_rate": 9.569862714636184e-06, "loss": 0.448, "step": 7344 }, { "epoch": 0.6591730048686365, "grad_norm": 0.4996322673618849, "learning_rate": 9.569650812077155e-06, "loss": 0.4608, "step": 7345 }, { "epoch": 0.6592627493213076, "grad_norm": 0.4815480373874538, "learning_rate": 9.569438859682437e-06, "loss": 0.4347, "step": 7346 }, { "epoch": 0.6593524937739786, "grad_norm": 0.47554562875859785, "learning_rate": 9.569226857454342e-06, "loss": 0.4364, "step": 7347 }, { "epoch": 0.6594422382266496, "grad_norm": 0.4713958386196149, "learning_rate": 9.569014805395179e-06, "loss": 0.4377, "step": 7348 }, { "epoch": 0.6595319826793207, "grad_norm": 0.5236356330827536, "learning_rate": 9.568802703507261e-06, "loss": 0.5326, "step": 7349 }, { "epoch": 0.6596217271319916, "grad_norm": 0.47885505876176626, "learning_rate": 9.568590551792906e-06, "loss": 0.4139, "step": 7350 }, { "epoch": 0.6597114715846627, "grad_norm": 0.5278805924087921, "learning_rate": 9.568378350254422e-06, "loss": 0.523, "step": 7351 }, { "epoch": 0.6598012160373337, "grad_norm": 0.48957111631838196, "learning_rate": 9.568166098894127e-06, "loss": 0.4591, "step": 7352 }, { "epoch": 0.6598909604900047, "grad_norm": 0.48172213755681464, "learning_rate": 9.567953797714334e-06, "loss": 0.4204, "step": 7353 }, { "epoch": 0.6599807049426757, "grad_norm": 0.4866658695244138, "learning_rate": 9.567741446717359e-06, "loss": 0.4237, "step": 7354 }, { "epoch": 0.6600704493953468, "grad_norm": 0.45948061301802456, "learning_rate": 9.567529045905515e-06, "loss": 0.4087, "step": 7355 }, { "epoch": 0.6601601938480177, "grad_norm": 0.4450095812079104, "learning_rate": 9.567316595281124e-06, "loss": 0.387, "step": 7356 }, { "epoch": 0.6602499383006888, "grad_norm": 0.4926208593716717, "learning_rate": 9.567104094846499e-06, "loss": 0.4822, "step": 7357 }, { "epoch": 0.6603396827533599, "grad_norm": 0.4795562372819563, "learning_rate": 9.566891544603957e-06, "loss": 0.4437, "step": 7358 }, { "epoch": 0.6604294272060308, "grad_norm": 0.44870562434397226, "learning_rate": 9.566678944555819e-06, "loss": 0.4217, "step": 7359 }, { "epoch": 0.6605191716587019, "grad_norm": 0.466095937897747, "learning_rate": 9.566466294704402e-06, "loss": 0.433, "step": 7360 }, { "epoch": 0.6606089161113728, "grad_norm": 0.5128730794218495, "learning_rate": 9.566253595052024e-06, "loss": 0.5367, "step": 7361 }, { "epoch": 0.6606986605640439, "grad_norm": 0.46812280269860707, "learning_rate": 9.566040845601005e-06, "loss": 0.4534, "step": 7362 }, { "epoch": 0.6607884050167149, "grad_norm": 0.48641427774003754, "learning_rate": 9.565828046353669e-06, "loss": 0.4696, "step": 7363 }, { "epoch": 0.660878149469386, "grad_norm": 0.49348206193387945, "learning_rate": 9.565615197312332e-06, "loss": 0.477, "step": 7364 }, { "epoch": 0.6609678939220569, "grad_norm": 0.5099887179028212, "learning_rate": 9.565402298479315e-06, "loss": 0.4776, "step": 7365 }, { "epoch": 0.661057638374728, "grad_norm": 0.5259404535931456, "learning_rate": 9.565189349856945e-06, "loss": 0.5567, "step": 7366 }, { "epoch": 0.6611473828273989, "grad_norm": 0.5312064852417914, "learning_rate": 9.56497635144754e-06, "loss": 0.5, "step": 7367 }, { "epoch": 0.66123712728007, "grad_norm": 0.48255066746533515, "learning_rate": 9.564763303253422e-06, "loss": 0.472, "step": 7368 }, { "epoch": 0.6613268717327411, "grad_norm": 0.4630420366016875, "learning_rate": 9.564550205276918e-06, "loss": 0.4439, "step": 7369 }, { "epoch": 0.661416616185412, "grad_norm": 0.4707011280130697, "learning_rate": 9.564337057520353e-06, "loss": 0.4576, "step": 7370 }, { "epoch": 0.6615063606380831, "grad_norm": 0.5581632465276036, "learning_rate": 9.564123859986044e-06, "loss": 0.5238, "step": 7371 }, { "epoch": 0.6615961050907541, "grad_norm": 0.5275907508414569, "learning_rate": 9.563910612676326e-06, "loss": 0.4887, "step": 7372 }, { "epoch": 0.6616858495434251, "grad_norm": 0.447459185016437, "learning_rate": 9.563697315593519e-06, "loss": 0.426, "step": 7373 }, { "epoch": 0.6617755939960961, "grad_norm": 0.4938187498786199, "learning_rate": 9.563483968739948e-06, "loss": 0.4596, "step": 7374 }, { "epoch": 0.6618653384487672, "grad_norm": 0.4654950820265009, "learning_rate": 9.563270572117943e-06, "loss": 0.4465, "step": 7375 }, { "epoch": 0.6619550829014381, "grad_norm": 0.48562269003955033, "learning_rate": 9.56305712572983e-06, "loss": 0.4546, "step": 7376 }, { "epoch": 0.6620448273541092, "grad_norm": 0.48196570443562087, "learning_rate": 9.562843629577936e-06, "loss": 0.4617, "step": 7377 }, { "epoch": 0.6621345718067801, "grad_norm": 0.4705429325856148, "learning_rate": 9.562630083664591e-06, "loss": 0.4404, "step": 7378 }, { "epoch": 0.6622243162594512, "grad_norm": 0.4671291675459037, "learning_rate": 9.562416487992122e-06, "loss": 0.4146, "step": 7379 }, { "epoch": 0.6623140607121222, "grad_norm": 0.4906406352765768, "learning_rate": 9.56220284256286e-06, "loss": 0.4911, "step": 7380 }, { "epoch": 0.6624038051647932, "grad_norm": 0.48156373636746447, "learning_rate": 9.561989147379134e-06, "loss": 0.4471, "step": 7381 }, { "epoch": 0.6624935496174643, "grad_norm": 0.49518523770888734, "learning_rate": 9.561775402443277e-06, "loss": 0.4597, "step": 7382 }, { "epoch": 0.6625832940701353, "grad_norm": 0.47683367948594824, "learning_rate": 9.561561607757615e-06, "loss": 0.4827, "step": 7383 }, { "epoch": 0.6626730385228063, "grad_norm": 0.4923795735501917, "learning_rate": 9.561347763324484e-06, "loss": 0.4749, "step": 7384 }, { "epoch": 0.6627627829754773, "grad_norm": 0.5072787005546108, "learning_rate": 9.561133869146215e-06, "loss": 0.4721, "step": 7385 }, { "epoch": 0.6628525274281484, "grad_norm": 0.5019131117653983, "learning_rate": 9.560919925225141e-06, "loss": 0.4857, "step": 7386 }, { "epoch": 0.6629422718808193, "grad_norm": 0.45106081252055225, "learning_rate": 9.560705931563593e-06, "loss": 0.3915, "step": 7387 }, { "epoch": 0.6630320163334904, "grad_norm": 0.4946169147056212, "learning_rate": 9.560491888163908e-06, "loss": 0.452, "step": 7388 }, { "epoch": 0.6631217607861614, "grad_norm": 0.5125725267629705, "learning_rate": 9.560277795028417e-06, "loss": 0.4727, "step": 7389 }, { "epoch": 0.6632115052388324, "grad_norm": 0.46672385346291145, "learning_rate": 9.560063652159457e-06, "loss": 0.4307, "step": 7390 }, { "epoch": 0.6633012496915034, "grad_norm": 0.47730444608911116, "learning_rate": 9.559849459559366e-06, "loss": 0.4314, "step": 7391 }, { "epoch": 0.6633909941441745, "grad_norm": 0.4671962105641546, "learning_rate": 9.559635217230473e-06, "loss": 0.437, "step": 7392 }, { "epoch": 0.6634807385968455, "grad_norm": 0.49970514029300406, "learning_rate": 9.55942092517512e-06, "loss": 0.4544, "step": 7393 }, { "epoch": 0.6635704830495165, "grad_norm": 0.5680042603258659, "learning_rate": 9.559206583395641e-06, "loss": 0.5179, "step": 7394 }, { "epoch": 0.6636602275021876, "grad_norm": 0.4353536782376229, "learning_rate": 9.558992191894378e-06, "loss": 0.3982, "step": 7395 }, { "epoch": 0.6637499719548585, "grad_norm": 0.4931895871405728, "learning_rate": 9.558777750673664e-06, "loss": 0.4838, "step": 7396 }, { "epoch": 0.6638397164075296, "grad_norm": 0.48132828479990636, "learning_rate": 9.55856325973584e-06, "loss": 0.4516, "step": 7397 }, { "epoch": 0.6639294608602005, "grad_norm": 0.4657260708227648, "learning_rate": 9.558348719083246e-06, "loss": 0.4214, "step": 7398 }, { "epoch": 0.6640192053128716, "grad_norm": 0.45133788018188614, "learning_rate": 9.558134128718219e-06, "loss": 0.3995, "step": 7399 }, { "epoch": 0.6641089497655426, "grad_norm": 0.4763660145572512, "learning_rate": 9.557919488643103e-06, "loss": 0.4312, "step": 7400 }, { "epoch": 0.6641986942182136, "grad_norm": 0.5207681762428281, "learning_rate": 9.557704798860236e-06, "loss": 0.4657, "step": 7401 }, { "epoch": 0.6642884386708846, "grad_norm": 0.533219836144601, "learning_rate": 9.557490059371958e-06, "loss": 0.4959, "step": 7402 }, { "epoch": 0.6643781831235557, "grad_norm": 0.48802343200752724, "learning_rate": 9.557275270180617e-06, "loss": 0.511, "step": 7403 }, { "epoch": 0.6644679275762267, "grad_norm": 0.5070114180357432, "learning_rate": 9.557060431288549e-06, "loss": 0.4695, "step": 7404 }, { "epoch": 0.6645576720288977, "grad_norm": 0.4808116873698571, "learning_rate": 9.5568455426981e-06, "loss": 0.4443, "step": 7405 }, { "epoch": 0.6646474164815688, "grad_norm": 0.4800705290663831, "learning_rate": 9.556630604411613e-06, "loss": 0.4342, "step": 7406 }, { "epoch": 0.6647371609342397, "grad_norm": 0.4960125550676307, "learning_rate": 9.556415616431432e-06, "loss": 0.4457, "step": 7407 }, { "epoch": 0.6648269053869108, "grad_norm": 0.47434807182889904, "learning_rate": 9.556200578759901e-06, "loss": 0.4407, "step": 7408 }, { "epoch": 0.6649166498395818, "grad_norm": 0.5064277756755682, "learning_rate": 9.555985491399368e-06, "loss": 0.4849, "step": 7409 }, { "epoch": 0.6650063942922528, "grad_norm": 0.47658068779230967, "learning_rate": 9.555770354352177e-06, "loss": 0.4712, "step": 7410 }, { "epoch": 0.6650961387449238, "grad_norm": 0.4462863804803216, "learning_rate": 9.555555167620672e-06, "loss": 0.4073, "step": 7411 }, { "epoch": 0.6651858831975949, "grad_norm": 0.4838403613526124, "learning_rate": 9.555339931207202e-06, "loss": 0.467, "step": 7412 }, { "epoch": 0.6652756276502658, "grad_norm": 0.505510887249745, "learning_rate": 9.555124645114113e-06, "loss": 0.4643, "step": 7413 }, { "epoch": 0.6653653721029369, "grad_norm": 0.46081289649900925, "learning_rate": 9.554909309343754e-06, "loss": 0.4211, "step": 7414 }, { "epoch": 0.665455116555608, "grad_norm": 0.5258816765659812, "learning_rate": 9.554693923898474e-06, "loss": 0.485, "step": 7415 }, { "epoch": 0.6655448610082789, "grad_norm": 0.4997621383986149, "learning_rate": 9.554478488780621e-06, "loss": 0.4491, "step": 7416 }, { "epoch": 0.66563460546095, "grad_norm": 0.4919673684721334, "learning_rate": 9.554263003992545e-06, "loss": 0.4591, "step": 7417 }, { "epoch": 0.665724349913621, "grad_norm": 0.5359465021109723, "learning_rate": 9.554047469536596e-06, "loss": 0.5192, "step": 7418 }, { "epoch": 0.665814094366292, "grad_norm": 0.49144694051987975, "learning_rate": 9.553831885415123e-06, "loss": 0.472, "step": 7419 }, { "epoch": 0.665903838818963, "grad_norm": 0.49754933425014786, "learning_rate": 9.553616251630478e-06, "loss": 0.4536, "step": 7420 }, { "epoch": 0.665993583271634, "grad_norm": 0.4841058087405864, "learning_rate": 9.553400568185014e-06, "loss": 0.473, "step": 7421 }, { "epoch": 0.666083327724305, "grad_norm": 0.47873967859136224, "learning_rate": 9.553184835081084e-06, "loss": 0.4625, "step": 7422 }, { "epoch": 0.6661730721769761, "grad_norm": 0.4797441164382061, "learning_rate": 9.552969052321037e-06, "loss": 0.4955, "step": 7423 }, { "epoch": 0.666262816629647, "grad_norm": 0.4572915792868889, "learning_rate": 9.552753219907228e-06, "loss": 0.4688, "step": 7424 }, { "epoch": 0.6663525610823181, "grad_norm": 0.47358907072150214, "learning_rate": 9.552537337842012e-06, "loss": 0.4488, "step": 7425 }, { "epoch": 0.6664423055349891, "grad_norm": 0.5494665531166602, "learning_rate": 9.552321406127742e-06, "loss": 0.5406, "step": 7426 }, { "epoch": 0.6665320499876601, "grad_norm": 0.5123915716085614, "learning_rate": 9.552105424766774e-06, "loss": 0.4869, "step": 7427 }, { "epoch": 0.6666217944403312, "grad_norm": 0.48840810602265056, "learning_rate": 9.551889393761462e-06, "loss": 0.467, "step": 7428 }, { "epoch": 0.6667115388930022, "grad_norm": 0.4755460798375727, "learning_rate": 9.551673313114162e-06, "loss": 0.4835, "step": 7429 }, { "epoch": 0.6668012833456732, "grad_norm": 0.48129952901168843, "learning_rate": 9.551457182827233e-06, "loss": 0.4795, "step": 7430 }, { "epoch": 0.6668910277983442, "grad_norm": 0.5400313063485975, "learning_rate": 9.55124100290303e-06, "loss": 0.5311, "step": 7431 }, { "epoch": 0.6669807722510153, "grad_norm": 0.44490888651673793, "learning_rate": 9.551024773343911e-06, "loss": 0.3798, "step": 7432 }, { "epoch": 0.6670705167036862, "grad_norm": 0.5031757971961119, "learning_rate": 9.550808494152234e-06, "loss": 0.4513, "step": 7433 }, { "epoch": 0.6671602611563573, "grad_norm": 0.473298885545225, "learning_rate": 9.550592165330358e-06, "loss": 0.4274, "step": 7434 }, { "epoch": 0.6672500056090283, "grad_norm": 0.5184333888303593, "learning_rate": 9.550375786880642e-06, "loss": 0.4762, "step": 7435 }, { "epoch": 0.6673397500616993, "grad_norm": 0.47977855653199886, "learning_rate": 9.550159358805447e-06, "loss": 0.4252, "step": 7436 }, { "epoch": 0.6674294945143703, "grad_norm": 0.5275251669169407, "learning_rate": 9.549942881107131e-06, "loss": 0.4963, "step": 7437 }, { "epoch": 0.6675192389670414, "grad_norm": 0.47052810349336, "learning_rate": 9.549726353788056e-06, "loss": 0.4386, "step": 7438 }, { "epoch": 0.6676089834197124, "grad_norm": 0.47897077599276144, "learning_rate": 9.549509776850583e-06, "loss": 0.4503, "step": 7439 }, { "epoch": 0.6676987278723834, "grad_norm": 0.4990933643498441, "learning_rate": 9.549293150297075e-06, "loss": 0.4728, "step": 7440 }, { "epoch": 0.6677884723250544, "grad_norm": 0.5052950575065192, "learning_rate": 9.549076474129895e-06, "loss": 0.4489, "step": 7441 }, { "epoch": 0.6678782167777254, "grad_norm": 0.5043261614033747, "learning_rate": 9.548859748351404e-06, "loss": 0.4476, "step": 7442 }, { "epoch": 0.6679679612303965, "grad_norm": 0.507704342904559, "learning_rate": 9.548642972963967e-06, "loss": 0.4762, "step": 7443 }, { "epoch": 0.6680577056830674, "grad_norm": 0.4869509022391624, "learning_rate": 9.548426147969947e-06, "loss": 0.433, "step": 7444 }, { "epoch": 0.6681474501357385, "grad_norm": 0.48273967478192376, "learning_rate": 9.548209273371709e-06, "loss": 0.4186, "step": 7445 }, { "epoch": 0.6682371945884095, "grad_norm": 0.4506106799572628, "learning_rate": 9.547992349171618e-06, "loss": 0.3983, "step": 7446 }, { "epoch": 0.6683269390410805, "grad_norm": 0.5325366104401994, "learning_rate": 9.547775375372041e-06, "loss": 0.5194, "step": 7447 }, { "epoch": 0.6684166834937515, "grad_norm": 0.48316583463120655, "learning_rate": 9.547558351975344e-06, "loss": 0.4482, "step": 7448 }, { "epoch": 0.6685064279464226, "grad_norm": 0.4926237810259381, "learning_rate": 9.547341278983892e-06, "loss": 0.4949, "step": 7449 }, { "epoch": 0.6685961723990936, "grad_norm": 0.47854578724336844, "learning_rate": 9.547124156400056e-06, "loss": 0.4604, "step": 7450 }, { "epoch": 0.6686859168517646, "grad_norm": 0.5081987942193336, "learning_rate": 9.546906984226201e-06, "loss": 0.4597, "step": 7451 }, { "epoch": 0.6687756613044357, "grad_norm": 0.45554728968954206, "learning_rate": 9.546689762464694e-06, "loss": 0.4184, "step": 7452 }, { "epoch": 0.6688654057571066, "grad_norm": 0.49613769451644824, "learning_rate": 9.546472491117906e-06, "loss": 0.4579, "step": 7453 }, { "epoch": 0.6689551502097777, "grad_norm": 0.5008820491079965, "learning_rate": 9.546255170188208e-06, "loss": 0.4354, "step": 7454 }, { "epoch": 0.6690448946624487, "grad_norm": 0.5321571330138655, "learning_rate": 9.546037799677967e-06, "loss": 0.5067, "step": 7455 }, { "epoch": 0.6691346391151197, "grad_norm": 0.44835227632502345, "learning_rate": 9.545820379589556e-06, "loss": 0.4207, "step": 7456 }, { "epoch": 0.6692243835677907, "grad_norm": 0.4970256457001797, "learning_rate": 9.545602909925346e-06, "loss": 0.4393, "step": 7457 }, { "epoch": 0.6693141280204618, "grad_norm": 0.45212929566743604, "learning_rate": 9.545385390687707e-06, "loss": 0.4402, "step": 7458 }, { "epoch": 0.6694038724731327, "grad_norm": 0.5097233850535454, "learning_rate": 9.545167821879014e-06, "loss": 0.4624, "step": 7459 }, { "epoch": 0.6694936169258038, "grad_norm": 0.467650854273374, "learning_rate": 9.544950203501635e-06, "loss": 0.411, "step": 7460 }, { "epoch": 0.6695833613784747, "grad_norm": 0.5259356730163482, "learning_rate": 9.544732535557948e-06, "loss": 0.4696, "step": 7461 }, { "epoch": 0.6696731058311458, "grad_norm": 0.49316553231398275, "learning_rate": 9.544514818050324e-06, "loss": 0.4681, "step": 7462 }, { "epoch": 0.6697628502838169, "grad_norm": 0.47512914570735065, "learning_rate": 9.544297050981139e-06, "loss": 0.434, "step": 7463 }, { "epoch": 0.6698525947364878, "grad_norm": 0.4738881495331585, "learning_rate": 9.544079234352766e-06, "loss": 0.4193, "step": 7464 }, { "epoch": 0.6699423391891589, "grad_norm": 0.46487722590738106, "learning_rate": 9.543861368167584e-06, "loss": 0.4175, "step": 7465 }, { "epoch": 0.6700320836418299, "grad_norm": 0.49508034917241683, "learning_rate": 9.543643452427967e-06, "loss": 0.4936, "step": 7466 }, { "epoch": 0.6701218280945009, "grad_norm": 0.43594029624639113, "learning_rate": 9.543425487136289e-06, "loss": 0.3956, "step": 7467 }, { "epoch": 0.6702115725471719, "grad_norm": 0.5177666523373199, "learning_rate": 9.54320747229493e-06, "loss": 0.4625, "step": 7468 }, { "epoch": 0.670301316999843, "grad_norm": 0.457873181343359, "learning_rate": 9.54298940790627e-06, "loss": 0.4057, "step": 7469 }, { "epoch": 0.6703910614525139, "grad_norm": 0.600771070526469, "learning_rate": 9.542771293972681e-06, "loss": 0.5287, "step": 7470 }, { "epoch": 0.670480805905185, "grad_norm": 0.47561384595329137, "learning_rate": 9.542553130496546e-06, "loss": 0.4209, "step": 7471 }, { "epoch": 0.670570550357856, "grad_norm": 0.5239521187918771, "learning_rate": 9.542334917480244e-06, "loss": 0.4893, "step": 7472 }, { "epoch": 0.670660294810527, "grad_norm": 0.477939208384183, "learning_rate": 9.542116654926153e-06, "loss": 0.4834, "step": 7473 }, { "epoch": 0.6707500392631981, "grad_norm": 0.46344262089822574, "learning_rate": 9.541898342836655e-06, "loss": 0.4117, "step": 7474 }, { "epoch": 0.670839783715869, "grad_norm": 0.4707906313295109, "learning_rate": 9.54167998121413e-06, "loss": 0.4355, "step": 7475 }, { "epoch": 0.6709295281685401, "grad_norm": 0.4766238461675953, "learning_rate": 9.54146157006096e-06, "loss": 0.4621, "step": 7476 }, { "epoch": 0.6710192726212111, "grad_norm": 0.4732609408089841, "learning_rate": 9.541243109379525e-06, "loss": 0.3932, "step": 7477 }, { "epoch": 0.6711090170738822, "grad_norm": 0.4765037960571132, "learning_rate": 9.541024599172211e-06, "loss": 0.4473, "step": 7478 }, { "epoch": 0.6711987615265531, "grad_norm": 0.5327560872583498, "learning_rate": 9.540806039441396e-06, "loss": 0.4925, "step": 7479 }, { "epoch": 0.6712885059792242, "grad_norm": 0.4884342724136477, "learning_rate": 9.54058743018947e-06, "loss": 0.4616, "step": 7480 }, { "epoch": 0.6713782504318951, "grad_norm": 0.4627362692366096, "learning_rate": 9.540368771418813e-06, "loss": 0.416, "step": 7481 }, { "epoch": 0.6714679948845662, "grad_norm": 0.4842653847509245, "learning_rate": 9.540150063131809e-06, "loss": 0.4623, "step": 7482 }, { "epoch": 0.6715577393372372, "grad_norm": 0.5024253814365262, "learning_rate": 9.539931305330845e-06, "loss": 0.4684, "step": 7483 }, { "epoch": 0.6716474837899082, "grad_norm": 0.4665786809568717, "learning_rate": 9.539712498018307e-06, "loss": 0.409, "step": 7484 }, { "epoch": 0.6717372282425793, "grad_norm": 0.5155814416084458, "learning_rate": 9.53949364119658e-06, "loss": 0.4741, "step": 7485 }, { "epoch": 0.6718269726952503, "grad_norm": 0.4703181855809026, "learning_rate": 9.539274734868052e-06, "loss": 0.4082, "step": 7486 }, { "epoch": 0.6719167171479213, "grad_norm": 0.4832869440154637, "learning_rate": 9.53905577903511e-06, "loss": 0.4642, "step": 7487 }, { "epoch": 0.6720064616005923, "grad_norm": 0.5244259084324345, "learning_rate": 9.538836773700141e-06, "loss": 0.4551, "step": 7488 }, { "epoch": 0.6720962060532634, "grad_norm": 0.5271465688372465, "learning_rate": 9.538617718865534e-06, "loss": 0.5036, "step": 7489 }, { "epoch": 0.6721859505059343, "grad_norm": 0.5359137406485962, "learning_rate": 9.538398614533677e-06, "loss": 0.5648, "step": 7490 }, { "epoch": 0.6722756949586054, "grad_norm": 0.5200121473493082, "learning_rate": 9.538179460706962e-06, "loss": 0.5126, "step": 7491 }, { "epoch": 0.6723654394112764, "grad_norm": 0.44897567780917663, "learning_rate": 9.537960257387778e-06, "loss": 0.3877, "step": 7492 }, { "epoch": 0.6724551838639474, "grad_norm": 0.4936858070236687, "learning_rate": 9.537741004578514e-06, "loss": 0.4459, "step": 7493 }, { "epoch": 0.6725449283166184, "grad_norm": 0.45972602693365605, "learning_rate": 9.537521702281563e-06, "loss": 0.4455, "step": 7494 }, { "epoch": 0.6726346727692895, "grad_norm": 0.4590102005300753, "learning_rate": 9.537302350499316e-06, "loss": 0.4323, "step": 7495 }, { "epoch": 0.6727244172219604, "grad_norm": 0.5075290177860492, "learning_rate": 9.537082949234164e-06, "loss": 0.4564, "step": 7496 }, { "epoch": 0.6728141616746315, "grad_norm": 0.4410415507714228, "learning_rate": 9.536863498488502e-06, "loss": 0.3803, "step": 7497 }, { "epoch": 0.6729039061273026, "grad_norm": 0.532740379876128, "learning_rate": 9.536643998264722e-06, "loss": 0.51, "step": 7498 }, { "epoch": 0.6729936505799735, "grad_norm": 0.45687012854216213, "learning_rate": 9.536424448565218e-06, "loss": 0.4294, "step": 7499 }, { "epoch": 0.6730833950326446, "grad_norm": 0.4587285687696103, "learning_rate": 9.536204849392384e-06, "loss": 0.4252, "step": 7500 }, { "epoch": 0.6731731394853155, "grad_norm": 0.4814207922279069, "learning_rate": 9.535985200748617e-06, "loss": 0.4409, "step": 7501 }, { "epoch": 0.6732628839379866, "grad_norm": 0.506690674258732, "learning_rate": 9.535765502636307e-06, "loss": 0.5325, "step": 7502 }, { "epoch": 0.6733526283906576, "grad_norm": 0.4984905504792709, "learning_rate": 9.535545755057856e-06, "loss": 0.4395, "step": 7503 }, { "epoch": 0.6734423728433286, "grad_norm": 0.4778302305679801, "learning_rate": 9.53532595801566e-06, "loss": 0.4575, "step": 7504 }, { "epoch": 0.6735321172959996, "grad_norm": 0.525943888773769, "learning_rate": 9.535106111512112e-06, "loss": 0.5246, "step": 7505 }, { "epoch": 0.6736218617486707, "grad_norm": 0.5069888491012755, "learning_rate": 9.534886215549613e-06, "loss": 0.4367, "step": 7506 }, { "epoch": 0.6737116062013416, "grad_norm": 0.4598932939630575, "learning_rate": 9.534666270130558e-06, "loss": 0.4051, "step": 7507 }, { "epoch": 0.6738013506540127, "grad_norm": 0.48111478396675295, "learning_rate": 9.53444627525735e-06, "loss": 0.4837, "step": 7508 }, { "epoch": 0.6738910951066838, "grad_norm": 0.44945596577189617, "learning_rate": 9.534226230932385e-06, "loss": 0.405, "step": 7509 }, { "epoch": 0.6739808395593547, "grad_norm": 0.486443400445668, "learning_rate": 9.534006137158063e-06, "loss": 0.4948, "step": 7510 }, { "epoch": 0.6740705840120258, "grad_norm": 0.5248921124714777, "learning_rate": 9.533785993936786e-06, "loss": 0.4987, "step": 7511 }, { "epoch": 0.6741603284646968, "grad_norm": 0.49417654878870854, "learning_rate": 9.533565801270954e-06, "loss": 0.4582, "step": 7512 }, { "epoch": 0.6742500729173678, "grad_norm": 0.46723415635665866, "learning_rate": 9.533345559162968e-06, "loss": 0.4357, "step": 7513 }, { "epoch": 0.6743398173700388, "grad_norm": 0.5004031680252475, "learning_rate": 9.53312526761523e-06, "loss": 0.4625, "step": 7514 }, { "epoch": 0.6744295618227099, "grad_norm": 0.5027471081446736, "learning_rate": 9.532904926630142e-06, "loss": 0.5083, "step": 7515 }, { "epoch": 0.6745193062753808, "grad_norm": 0.5746289668566485, "learning_rate": 9.532684536210108e-06, "loss": 0.5399, "step": 7516 }, { "epoch": 0.6746090507280519, "grad_norm": 0.5242989958112398, "learning_rate": 9.532464096357532e-06, "loss": 0.4686, "step": 7517 }, { "epoch": 0.6746987951807228, "grad_norm": 0.45855177242151873, "learning_rate": 9.532243607074816e-06, "loss": 0.4192, "step": 7518 }, { "epoch": 0.6747885396333939, "grad_norm": 0.47654355219984246, "learning_rate": 9.532023068364367e-06, "loss": 0.4353, "step": 7519 }, { "epoch": 0.674878284086065, "grad_norm": 0.5279592392215794, "learning_rate": 9.531802480228587e-06, "loss": 0.5175, "step": 7520 }, { "epoch": 0.6749680285387359, "grad_norm": 0.5314292014103709, "learning_rate": 9.531581842669884e-06, "loss": 0.4762, "step": 7521 }, { "epoch": 0.675057772991407, "grad_norm": 0.5297270169461675, "learning_rate": 9.531361155690666e-06, "loss": 0.5274, "step": 7522 }, { "epoch": 0.675147517444078, "grad_norm": 0.47903696010391833, "learning_rate": 9.531140419293337e-06, "loss": 0.4856, "step": 7523 }, { "epoch": 0.675237261896749, "grad_norm": 0.4817549819030056, "learning_rate": 9.530919633480302e-06, "loss": 0.4985, "step": 7524 }, { "epoch": 0.67532700634942, "grad_norm": 0.45419280764904363, "learning_rate": 9.530698798253975e-06, "loss": 0.4436, "step": 7525 }, { "epoch": 0.6754167508020911, "grad_norm": 0.5009548457336567, "learning_rate": 9.53047791361676e-06, "loss": 0.4623, "step": 7526 }, { "epoch": 0.675506495254762, "grad_norm": 0.46722403274121516, "learning_rate": 9.530256979571068e-06, "loss": 0.4416, "step": 7527 }, { "epoch": 0.6755962397074331, "grad_norm": 0.4493628989408369, "learning_rate": 9.530035996119307e-06, "loss": 0.3847, "step": 7528 }, { "epoch": 0.6756859841601041, "grad_norm": 0.5114362734704498, "learning_rate": 9.529814963263885e-06, "loss": 0.4587, "step": 7529 }, { "epoch": 0.6757757286127751, "grad_norm": 0.5165074222004361, "learning_rate": 9.529593881007219e-06, "loss": 0.4582, "step": 7530 }, { "epoch": 0.6758654730654461, "grad_norm": 0.4905412337426935, "learning_rate": 9.529372749351716e-06, "loss": 0.4466, "step": 7531 }, { "epoch": 0.6759552175181172, "grad_norm": 0.4779026232716466, "learning_rate": 9.529151568299784e-06, "loss": 0.4325, "step": 7532 }, { "epoch": 0.6760449619707882, "grad_norm": 0.4881449524166523, "learning_rate": 9.528930337853843e-06, "loss": 0.4611, "step": 7533 }, { "epoch": 0.6761347064234592, "grad_norm": 0.4751045480174225, "learning_rate": 9.528709058016299e-06, "loss": 0.4475, "step": 7534 }, { "epoch": 0.6762244508761303, "grad_norm": 0.4776059880026029, "learning_rate": 9.52848772878957e-06, "loss": 0.4565, "step": 7535 }, { "epoch": 0.6763141953288012, "grad_norm": 0.4668250914739028, "learning_rate": 9.528266350176065e-06, "loss": 0.4452, "step": 7536 }, { "epoch": 0.6764039397814723, "grad_norm": 0.5014407721005475, "learning_rate": 9.528044922178203e-06, "loss": 0.4893, "step": 7537 }, { "epoch": 0.6764936842341432, "grad_norm": 0.4960245792174573, "learning_rate": 9.527823444798397e-06, "loss": 0.4811, "step": 7538 }, { "epoch": 0.6765834286868143, "grad_norm": 0.45488261197124685, "learning_rate": 9.52760191803906e-06, "loss": 0.417, "step": 7539 }, { "epoch": 0.6766731731394853, "grad_norm": 0.4661480902889321, "learning_rate": 9.527380341902613e-06, "loss": 0.4407, "step": 7540 }, { "epoch": 0.6767629175921563, "grad_norm": 0.4660963530182043, "learning_rate": 9.527158716391467e-06, "loss": 0.4152, "step": 7541 }, { "epoch": 0.6768526620448273, "grad_norm": 0.5087487280138202, "learning_rate": 9.526937041508043e-06, "loss": 0.4772, "step": 7542 }, { "epoch": 0.6769424064974984, "grad_norm": 0.5004551722084722, "learning_rate": 9.526715317254758e-06, "loss": 0.4649, "step": 7543 }, { "epoch": 0.6770321509501694, "grad_norm": 0.48850671035339066, "learning_rate": 9.526493543634028e-06, "loss": 0.4554, "step": 7544 }, { "epoch": 0.6771218954028404, "grad_norm": 0.49234226960934085, "learning_rate": 9.526271720648272e-06, "loss": 0.46, "step": 7545 }, { "epoch": 0.6772116398555115, "grad_norm": 0.5220911993699907, "learning_rate": 9.526049848299911e-06, "loss": 0.4799, "step": 7546 }, { "epoch": 0.6773013843081824, "grad_norm": 0.47553740280086976, "learning_rate": 9.525827926591364e-06, "loss": 0.4609, "step": 7547 }, { "epoch": 0.6773911287608535, "grad_norm": 0.5029699199831178, "learning_rate": 9.52560595552505e-06, "loss": 0.4614, "step": 7548 }, { "epoch": 0.6774808732135245, "grad_norm": 0.49209558739709774, "learning_rate": 9.525383935103392e-06, "loss": 0.4475, "step": 7549 }, { "epoch": 0.6775706176661955, "grad_norm": 0.44490857222635805, "learning_rate": 9.525161865328808e-06, "loss": 0.4339, "step": 7550 }, { "epoch": 0.6776603621188665, "grad_norm": 0.46999927963529337, "learning_rate": 9.524939746203723e-06, "loss": 0.4617, "step": 7551 }, { "epoch": 0.6777501065715376, "grad_norm": 0.48469089357835105, "learning_rate": 9.524717577730558e-06, "loss": 0.4619, "step": 7552 }, { "epoch": 0.6778398510242085, "grad_norm": 0.5405818740841646, "learning_rate": 9.524495359911736e-06, "loss": 0.4976, "step": 7553 }, { "epoch": 0.6779295954768796, "grad_norm": 0.530029213652036, "learning_rate": 9.52427309274968e-06, "loss": 0.4902, "step": 7554 }, { "epoch": 0.6780193399295507, "grad_norm": 0.48075540880580386, "learning_rate": 9.524050776246816e-06, "loss": 0.4267, "step": 7555 }, { "epoch": 0.6781090843822216, "grad_norm": 0.4823472387686679, "learning_rate": 9.523828410405568e-06, "loss": 0.4537, "step": 7556 }, { "epoch": 0.6781988288348927, "grad_norm": 0.49908537785662366, "learning_rate": 9.523605995228358e-06, "loss": 0.4716, "step": 7557 }, { "epoch": 0.6782885732875636, "grad_norm": 0.4576508697485599, "learning_rate": 9.523383530717615e-06, "loss": 0.4134, "step": 7558 }, { "epoch": 0.6783783177402347, "grad_norm": 0.5017680869050104, "learning_rate": 9.523161016875763e-06, "loss": 0.4542, "step": 7559 }, { "epoch": 0.6784680621929057, "grad_norm": 0.48381649732401366, "learning_rate": 9.522938453705232e-06, "loss": 0.4885, "step": 7560 }, { "epoch": 0.6785578066455767, "grad_norm": 0.4371498822334571, "learning_rate": 9.522715841208446e-06, "loss": 0.3715, "step": 7561 }, { "epoch": 0.6786475510982477, "grad_norm": 0.4929124766262497, "learning_rate": 9.522493179387832e-06, "loss": 0.4465, "step": 7562 }, { "epoch": 0.6787372955509188, "grad_norm": 0.5369138967620453, "learning_rate": 9.52227046824582e-06, "loss": 0.508, "step": 7563 }, { "epoch": 0.6788270400035897, "grad_norm": 0.4500955217022295, "learning_rate": 9.52204770778484e-06, "loss": 0.4104, "step": 7564 }, { "epoch": 0.6789167844562608, "grad_norm": 0.48401280381605916, "learning_rate": 9.521824898007322e-06, "loss": 0.4493, "step": 7565 }, { "epoch": 0.6790065289089318, "grad_norm": 0.5187552989002708, "learning_rate": 9.521602038915691e-06, "loss": 0.4856, "step": 7566 }, { "epoch": 0.6790962733616028, "grad_norm": 0.45668341437487736, "learning_rate": 9.521379130512382e-06, "loss": 0.3977, "step": 7567 }, { "epoch": 0.6791860178142739, "grad_norm": 0.4958565405958793, "learning_rate": 9.521156172799823e-06, "loss": 0.487, "step": 7568 }, { "epoch": 0.6792757622669449, "grad_norm": 0.47723892833826836, "learning_rate": 9.52093316578045e-06, "loss": 0.4283, "step": 7569 }, { "epoch": 0.6793655067196159, "grad_norm": 0.4889012033247748, "learning_rate": 9.52071010945669e-06, "loss": 0.4683, "step": 7570 }, { "epoch": 0.6794552511722869, "grad_norm": 0.4889623206153657, "learning_rate": 9.52048700383098e-06, "loss": 0.442, "step": 7571 }, { "epoch": 0.679544995624958, "grad_norm": 0.45542542742055403, "learning_rate": 9.520263848905749e-06, "loss": 0.3861, "step": 7572 }, { "epoch": 0.6796347400776289, "grad_norm": 0.5185376171134866, "learning_rate": 9.520040644683432e-06, "loss": 0.4752, "step": 7573 }, { "epoch": 0.6797244845303, "grad_norm": 0.5011038441237446, "learning_rate": 9.519817391166466e-06, "loss": 0.4805, "step": 7574 }, { "epoch": 0.679814228982971, "grad_norm": 0.44551467698493774, "learning_rate": 9.519594088357283e-06, "loss": 0.4005, "step": 7575 }, { "epoch": 0.679903973435642, "grad_norm": 0.4941264958393137, "learning_rate": 9.519370736258318e-06, "loss": 0.4673, "step": 7576 }, { "epoch": 0.679993717888313, "grad_norm": 0.5007413334952892, "learning_rate": 9.519147334872008e-06, "loss": 0.4283, "step": 7577 }, { "epoch": 0.680083462340984, "grad_norm": 0.49821161378784923, "learning_rate": 9.518923884200788e-06, "loss": 0.4456, "step": 7578 }, { "epoch": 0.6801732067936551, "grad_norm": 0.4755031207093437, "learning_rate": 9.518700384247099e-06, "loss": 0.4114, "step": 7579 }, { "epoch": 0.6802629512463261, "grad_norm": 0.46855261558344313, "learning_rate": 9.518476835013373e-06, "loss": 0.4319, "step": 7580 }, { "epoch": 0.6803526956989971, "grad_norm": 0.49451925370676164, "learning_rate": 9.518253236502051e-06, "loss": 0.4355, "step": 7581 }, { "epoch": 0.6804424401516681, "grad_norm": 0.46383670068083865, "learning_rate": 9.518029588715571e-06, "loss": 0.449, "step": 7582 }, { "epoch": 0.6805321846043392, "grad_norm": 0.4680406022616862, "learning_rate": 9.51780589165637e-06, "loss": 0.4378, "step": 7583 }, { "epoch": 0.6806219290570101, "grad_norm": 0.47660997983100745, "learning_rate": 9.517582145326894e-06, "loss": 0.4245, "step": 7584 }, { "epoch": 0.6807116735096812, "grad_norm": 0.4840344515590463, "learning_rate": 9.517358349729575e-06, "loss": 0.4337, "step": 7585 }, { "epoch": 0.6808014179623522, "grad_norm": 0.5148165650006216, "learning_rate": 9.51713450486686e-06, "loss": 0.5057, "step": 7586 }, { "epoch": 0.6808911624150232, "grad_norm": 0.4904972936747451, "learning_rate": 9.516910610741186e-06, "loss": 0.4478, "step": 7587 }, { "epoch": 0.6809809068676942, "grad_norm": 0.5101873168919093, "learning_rate": 9.516686667354997e-06, "loss": 0.4892, "step": 7588 }, { "epoch": 0.6810706513203653, "grad_norm": 0.44327328168799957, "learning_rate": 9.516462674710734e-06, "loss": 0.4141, "step": 7589 }, { "epoch": 0.6811603957730363, "grad_norm": 0.527240919765659, "learning_rate": 9.516238632810842e-06, "loss": 0.538, "step": 7590 }, { "epoch": 0.6812501402257073, "grad_norm": 0.4727767793192535, "learning_rate": 9.516014541657761e-06, "loss": 0.4573, "step": 7591 }, { "epoch": 0.6813398846783784, "grad_norm": 0.471267189305176, "learning_rate": 9.515790401253938e-06, "loss": 0.4002, "step": 7592 }, { "epoch": 0.6814296291310493, "grad_norm": 0.5127757729544985, "learning_rate": 9.515566211601815e-06, "loss": 0.4797, "step": 7593 }, { "epoch": 0.6815193735837204, "grad_norm": 0.4910044595281779, "learning_rate": 9.515341972703838e-06, "loss": 0.4878, "step": 7594 }, { "epoch": 0.6816091180363913, "grad_norm": 0.46895778621665496, "learning_rate": 9.515117684562454e-06, "loss": 0.3839, "step": 7595 }, { "epoch": 0.6816988624890624, "grad_norm": 0.5040820830205418, "learning_rate": 9.514893347180104e-06, "loss": 0.4704, "step": 7596 }, { "epoch": 0.6817886069417334, "grad_norm": 0.4821686228240326, "learning_rate": 9.514668960559243e-06, "loss": 0.4636, "step": 7597 }, { "epoch": 0.6818783513944044, "grad_norm": 0.4728774346373285, "learning_rate": 9.514444524702312e-06, "loss": 0.4507, "step": 7598 }, { "epoch": 0.6819680958470754, "grad_norm": 0.48440943487789395, "learning_rate": 9.51422003961176e-06, "loss": 0.4425, "step": 7599 }, { "epoch": 0.6820578402997465, "grad_norm": 0.45472892681868415, "learning_rate": 9.513995505290034e-06, "loss": 0.4095, "step": 7600 }, { "epoch": 0.6821475847524175, "grad_norm": 0.4574261324646362, "learning_rate": 9.513770921739584e-06, "loss": 0.4156, "step": 7601 }, { "epoch": 0.6822373292050885, "grad_norm": 0.45217629909836066, "learning_rate": 9.51354628896286e-06, "loss": 0.4077, "step": 7602 }, { "epoch": 0.6823270736577596, "grad_norm": 0.5093534295325761, "learning_rate": 9.51332160696231e-06, "loss": 0.4918, "step": 7603 }, { "epoch": 0.6824168181104305, "grad_norm": 0.48874750379770304, "learning_rate": 9.513096875740386e-06, "loss": 0.487, "step": 7604 }, { "epoch": 0.6825065625631016, "grad_norm": 0.45141591991922364, "learning_rate": 9.51287209529954e-06, "loss": 0.44, "step": 7605 }, { "epoch": 0.6825963070157726, "grad_norm": 0.48316219693575047, "learning_rate": 9.512647265642219e-06, "loss": 0.4341, "step": 7606 }, { "epoch": 0.6826860514684436, "grad_norm": 0.5048339404788832, "learning_rate": 9.512422386770876e-06, "loss": 0.4556, "step": 7607 }, { "epoch": 0.6827757959211146, "grad_norm": 0.5132987562605957, "learning_rate": 9.512197458687969e-06, "loss": 0.506, "step": 7608 }, { "epoch": 0.6828655403737857, "grad_norm": 0.48676705656641606, "learning_rate": 9.511972481395944e-06, "loss": 0.4851, "step": 7609 }, { "epoch": 0.6829552848264566, "grad_norm": 0.5026108006304335, "learning_rate": 9.51174745489726e-06, "loss": 0.4785, "step": 7610 }, { "epoch": 0.6830450292791277, "grad_norm": 0.49141115529161955, "learning_rate": 9.511522379194367e-06, "loss": 0.4478, "step": 7611 }, { "epoch": 0.6831347737317987, "grad_norm": 0.46777877620916625, "learning_rate": 9.511297254289721e-06, "loss": 0.4464, "step": 7612 }, { "epoch": 0.6832245181844697, "grad_norm": 0.4838353830060916, "learning_rate": 9.51107208018578e-06, "loss": 0.4471, "step": 7613 }, { "epoch": 0.6833142626371408, "grad_norm": 0.5270735725744649, "learning_rate": 9.510846856884995e-06, "loss": 0.5107, "step": 7614 }, { "epoch": 0.6834040070898117, "grad_norm": 0.5204234773230813, "learning_rate": 9.510621584389825e-06, "loss": 0.518, "step": 7615 }, { "epoch": 0.6834937515424828, "grad_norm": 0.4993489122699292, "learning_rate": 9.510396262702725e-06, "loss": 0.4614, "step": 7616 }, { "epoch": 0.6835834959951538, "grad_norm": 0.5318191803020254, "learning_rate": 9.510170891826156e-06, "loss": 0.4456, "step": 7617 }, { "epoch": 0.6836732404478248, "grad_norm": 0.4828877809397563, "learning_rate": 9.509945471762572e-06, "loss": 0.4777, "step": 7618 }, { "epoch": 0.6837629849004958, "grad_norm": 0.47638085171914835, "learning_rate": 9.509720002514434e-06, "loss": 0.3983, "step": 7619 }, { "epoch": 0.6838527293531669, "grad_norm": 0.4663945084187853, "learning_rate": 9.509494484084198e-06, "loss": 0.4205, "step": 7620 }, { "epoch": 0.6839424738058378, "grad_norm": 0.4643539723062334, "learning_rate": 9.509268916474326e-06, "loss": 0.3965, "step": 7621 }, { "epoch": 0.6840322182585089, "grad_norm": 0.4704765000934044, "learning_rate": 9.509043299687278e-06, "loss": 0.4477, "step": 7622 }, { "epoch": 0.6841219627111799, "grad_norm": 0.4622805624260537, "learning_rate": 9.508817633725512e-06, "loss": 0.412, "step": 7623 }, { "epoch": 0.6842117071638509, "grad_norm": 0.477041508817863, "learning_rate": 9.50859191859149e-06, "loss": 0.4365, "step": 7624 }, { "epoch": 0.684301451616522, "grad_norm": 0.4850399500057299, "learning_rate": 9.508366154287675e-06, "loss": 0.4202, "step": 7625 }, { "epoch": 0.684391196069193, "grad_norm": 0.47726846966916053, "learning_rate": 9.50814034081653e-06, "loss": 0.4176, "step": 7626 }, { "epoch": 0.684480940521864, "grad_norm": 0.49805215510978756, "learning_rate": 9.507914478180515e-06, "loss": 0.4772, "step": 7627 }, { "epoch": 0.684570684974535, "grad_norm": 0.5039229215593786, "learning_rate": 9.507688566382095e-06, "loss": 0.4927, "step": 7628 }, { "epoch": 0.6846604294272061, "grad_norm": 0.48206316282693173, "learning_rate": 9.507462605423732e-06, "loss": 0.4565, "step": 7629 }, { "epoch": 0.684750173879877, "grad_norm": 0.5539663263130638, "learning_rate": 9.507236595307892e-06, "loss": 0.5368, "step": 7630 }, { "epoch": 0.6848399183325481, "grad_norm": 0.48918472910449146, "learning_rate": 9.50701053603704e-06, "loss": 0.4522, "step": 7631 }, { "epoch": 0.684929662785219, "grad_norm": 0.44907773035140464, "learning_rate": 9.50678442761364e-06, "loss": 0.4685, "step": 7632 }, { "epoch": 0.6850194072378901, "grad_norm": 0.5204338840681374, "learning_rate": 9.506558270040158e-06, "loss": 0.5473, "step": 7633 }, { "epoch": 0.6851091516905611, "grad_norm": 0.5562816646299225, "learning_rate": 9.506332063319062e-06, "loss": 0.558, "step": 7634 }, { "epoch": 0.6851988961432322, "grad_norm": 0.4905875187221732, "learning_rate": 9.506105807452816e-06, "loss": 0.4808, "step": 7635 }, { "epoch": 0.6852886405959032, "grad_norm": 0.469681555280839, "learning_rate": 9.50587950244389e-06, "loss": 0.4378, "step": 7636 }, { "epoch": 0.6853783850485742, "grad_norm": 0.5008213050805201, "learning_rate": 9.505653148294753e-06, "loss": 0.4756, "step": 7637 }, { "epoch": 0.6854681295012452, "grad_norm": 0.5039857281121309, "learning_rate": 9.505426745007871e-06, "loss": 0.5018, "step": 7638 }, { "epoch": 0.6855578739539162, "grad_norm": 0.5341782513944897, "learning_rate": 9.505200292585714e-06, "loss": 0.5362, "step": 7639 }, { "epoch": 0.6856476184065873, "grad_norm": 0.5089507234527129, "learning_rate": 9.504973791030751e-06, "loss": 0.4635, "step": 7640 }, { "epoch": 0.6857373628592582, "grad_norm": 0.47643077702966075, "learning_rate": 9.504747240345454e-06, "loss": 0.46, "step": 7641 }, { "epoch": 0.6858271073119293, "grad_norm": 0.5058411776933767, "learning_rate": 9.504520640532291e-06, "loss": 0.4797, "step": 7642 }, { "epoch": 0.6859168517646003, "grad_norm": 0.4760338901367511, "learning_rate": 9.504293991593737e-06, "loss": 0.4257, "step": 7643 }, { "epoch": 0.6860065962172713, "grad_norm": 0.48631558262021213, "learning_rate": 9.50406729353226e-06, "loss": 0.4247, "step": 7644 }, { "epoch": 0.6860963406699423, "grad_norm": 0.48181118928603983, "learning_rate": 9.503840546350335e-06, "loss": 0.4298, "step": 7645 }, { "epoch": 0.6861860851226134, "grad_norm": 0.578367096594489, "learning_rate": 9.503613750050433e-06, "loss": 0.5112, "step": 7646 }, { "epoch": 0.6862758295752843, "grad_norm": 0.4665266925925059, "learning_rate": 9.503386904635031e-06, "loss": 0.4149, "step": 7647 }, { "epoch": 0.6863655740279554, "grad_norm": 0.47051392902181377, "learning_rate": 9.503160010106595e-06, "loss": 0.4392, "step": 7648 }, { "epoch": 0.6864553184806265, "grad_norm": 0.47007231213533435, "learning_rate": 9.50293306646761e-06, "loss": 0.461, "step": 7649 }, { "epoch": 0.6865450629332974, "grad_norm": 0.4899482170980022, "learning_rate": 9.502706073720543e-06, "loss": 0.4613, "step": 7650 }, { "epoch": 0.6866348073859685, "grad_norm": 0.5004622889038446, "learning_rate": 9.502479031867872e-06, "loss": 0.4641, "step": 7651 }, { "epoch": 0.6867245518386395, "grad_norm": 0.4808500692356268, "learning_rate": 9.502251940912073e-06, "loss": 0.3933, "step": 7652 }, { "epoch": 0.6868142962913105, "grad_norm": 0.5161445681928676, "learning_rate": 9.502024800855624e-06, "loss": 0.4678, "step": 7653 }, { "epoch": 0.6869040407439815, "grad_norm": 0.5024259282991472, "learning_rate": 9.501797611701e-06, "loss": 0.4986, "step": 7654 }, { "epoch": 0.6869937851966526, "grad_norm": 0.48430237835079765, "learning_rate": 9.501570373450679e-06, "loss": 0.4465, "step": 7655 }, { "epoch": 0.6870835296493235, "grad_norm": 0.4795562554183991, "learning_rate": 9.501343086107143e-06, "loss": 0.4585, "step": 7656 }, { "epoch": 0.6871732741019946, "grad_norm": 0.487346630294683, "learning_rate": 9.501115749672865e-06, "loss": 0.4565, "step": 7657 }, { "epoch": 0.6872630185546655, "grad_norm": 0.5266307553202132, "learning_rate": 9.500888364150328e-06, "loss": 0.4622, "step": 7658 }, { "epoch": 0.6873527630073366, "grad_norm": 0.5095787818092375, "learning_rate": 9.50066092954201e-06, "loss": 0.5081, "step": 7659 }, { "epoch": 0.6874425074600077, "grad_norm": 0.43478844950388895, "learning_rate": 9.500433445850393e-06, "loss": 0.4134, "step": 7660 }, { "epoch": 0.6875322519126786, "grad_norm": 0.5285927048803206, "learning_rate": 9.500205913077956e-06, "loss": 0.5399, "step": 7661 }, { "epoch": 0.6876219963653497, "grad_norm": 0.44727936668043106, "learning_rate": 9.499978331227183e-06, "loss": 0.4266, "step": 7662 }, { "epoch": 0.6877117408180207, "grad_norm": 0.44938980156255426, "learning_rate": 9.499750700300552e-06, "loss": 0.3626, "step": 7663 }, { "epoch": 0.6878014852706917, "grad_norm": 0.4574902882512468, "learning_rate": 9.499523020300548e-06, "loss": 0.372, "step": 7664 }, { "epoch": 0.6878912297233627, "grad_norm": 0.5009139289672198, "learning_rate": 9.499295291229655e-06, "loss": 0.484, "step": 7665 }, { "epoch": 0.6879809741760338, "grad_norm": 0.43929739813650887, "learning_rate": 9.499067513090357e-06, "loss": 0.3958, "step": 7666 }, { "epoch": 0.6880707186287047, "grad_norm": 0.5350981516256924, "learning_rate": 9.498839685885134e-06, "loss": 0.4986, "step": 7667 }, { "epoch": 0.6881604630813758, "grad_norm": 0.48716130230018767, "learning_rate": 9.498611809616474e-06, "loss": 0.4903, "step": 7668 }, { "epoch": 0.6882502075340468, "grad_norm": 0.5029423422050876, "learning_rate": 9.498383884286861e-06, "loss": 0.4855, "step": 7669 }, { "epoch": 0.6883399519867178, "grad_norm": 0.4641197928623486, "learning_rate": 9.498155909898782e-06, "loss": 0.3872, "step": 7670 }, { "epoch": 0.6884296964393889, "grad_norm": 0.4954492248345262, "learning_rate": 9.497927886454723e-06, "loss": 0.4747, "step": 7671 }, { "epoch": 0.6885194408920599, "grad_norm": 0.4845130975357452, "learning_rate": 9.497699813957169e-06, "loss": 0.4256, "step": 7672 }, { "epoch": 0.6886091853447309, "grad_norm": 0.47782753893313956, "learning_rate": 9.497471692408608e-06, "loss": 0.4363, "step": 7673 }, { "epoch": 0.6886989297974019, "grad_norm": 0.4852783637530116, "learning_rate": 9.49724352181153e-06, "loss": 0.4971, "step": 7674 }, { "epoch": 0.688788674250073, "grad_norm": 0.5362910512032868, "learning_rate": 9.49701530216842e-06, "loss": 0.447, "step": 7675 }, { "epoch": 0.6888784187027439, "grad_norm": 0.5236666634771175, "learning_rate": 9.496787033481769e-06, "loss": 0.4903, "step": 7676 }, { "epoch": 0.688968163155415, "grad_norm": 0.46624879685006654, "learning_rate": 9.496558715754067e-06, "loss": 0.4231, "step": 7677 }, { "epoch": 0.6890579076080859, "grad_norm": 0.5216069119502987, "learning_rate": 9.496330348987801e-06, "loss": 0.5298, "step": 7678 }, { "epoch": 0.689147652060757, "grad_norm": 0.47001931073477043, "learning_rate": 9.496101933185466e-06, "loss": 0.4727, "step": 7679 }, { "epoch": 0.689237396513428, "grad_norm": 0.4295317614699842, "learning_rate": 9.495873468349548e-06, "loss": 0.348, "step": 7680 }, { "epoch": 0.689327140966099, "grad_norm": 0.49888907632799223, "learning_rate": 9.495644954482544e-06, "loss": 0.4821, "step": 7681 }, { "epoch": 0.68941688541877, "grad_norm": 0.4604654938364337, "learning_rate": 9.495416391586941e-06, "loss": 0.3849, "step": 7682 }, { "epoch": 0.6895066298714411, "grad_norm": 0.4639439308990281, "learning_rate": 9.495187779665235e-06, "loss": 0.4208, "step": 7683 }, { "epoch": 0.6895963743241121, "grad_norm": 0.5989288544138175, "learning_rate": 9.494959118719917e-06, "loss": 0.5835, "step": 7684 }, { "epoch": 0.6896861187767831, "grad_norm": 0.5040031422171509, "learning_rate": 9.494730408753483e-06, "loss": 0.4669, "step": 7685 }, { "epoch": 0.6897758632294542, "grad_norm": 0.4599269899446275, "learning_rate": 9.494501649768426e-06, "loss": 0.4007, "step": 7686 }, { "epoch": 0.6898656076821251, "grad_norm": 0.4594062282843055, "learning_rate": 9.494272841767242e-06, "loss": 0.4128, "step": 7687 }, { "epoch": 0.6899553521347962, "grad_norm": 0.5462347548251096, "learning_rate": 9.494043984752424e-06, "loss": 0.5296, "step": 7688 }, { "epoch": 0.6900450965874672, "grad_norm": 0.5135601371791922, "learning_rate": 9.493815078726469e-06, "loss": 0.4958, "step": 7689 }, { "epoch": 0.6901348410401382, "grad_norm": 0.4916444426407029, "learning_rate": 9.493586123691874e-06, "loss": 0.4624, "step": 7690 }, { "epoch": 0.6902245854928092, "grad_norm": 0.486706198130141, "learning_rate": 9.493357119651136e-06, "loss": 0.4533, "step": 7691 }, { "epoch": 0.6903143299454803, "grad_norm": 0.4894051153796041, "learning_rate": 9.493128066606751e-06, "loss": 0.4587, "step": 7692 }, { "epoch": 0.6904040743981512, "grad_norm": 0.4780177219847756, "learning_rate": 9.49289896456122e-06, "loss": 0.4186, "step": 7693 }, { "epoch": 0.6904938188508223, "grad_norm": 0.47928225258439344, "learning_rate": 9.492669813517038e-06, "loss": 0.4537, "step": 7694 }, { "epoch": 0.6905835633034934, "grad_norm": 0.4785040890193099, "learning_rate": 9.492440613476704e-06, "loss": 0.3992, "step": 7695 }, { "epoch": 0.6906733077561643, "grad_norm": 0.46480427789095724, "learning_rate": 9.492211364442721e-06, "loss": 0.4381, "step": 7696 }, { "epoch": 0.6907630522088354, "grad_norm": 0.5047278568042409, "learning_rate": 9.491982066417587e-06, "loss": 0.4522, "step": 7697 }, { "epoch": 0.6908527966615063, "grad_norm": 0.48154038714525993, "learning_rate": 9.491752719403805e-06, "loss": 0.4717, "step": 7698 }, { "epoch": 0.6909425411141774, "grad_norm": 0.49698409583422765, "learning_rate": 9.491523323403871e-06, "loss": 0.4895, "step": 7699 }, { "epoch": 0.6910322855668484, "grad_norm": 0.4811561554605377, "learning_rate": 9.491293878420291e-06, "loss": 0.4376, "step": 7700 }, { "epoch": 0.6911220300195194, "grad_norm": 0.46562415587095934, "learning_rate": 9.491064384455567e-06, "loss": 0.4276, "step": 7701 }, { "epoch": 0.6912117744721904, "grad_norm": 0.5005345671315099, "learning_rate": 9.4908348415122e-06, "loss": 0.4786, "step": 7702 }, { "epoch": 0.6913015189248615, "grad_norm": 0.4996986042226102, "learning_rate": 9.490605249592697e-06, "loss": 0.5002, "step": 7703 }, { "epoch": 0.6913912633775324, "grad_norm": 0.5338278236499007, "learning_rate": 9.490375608699556e-06, "loss": 0.4881, "step": 7704 }, { "epoch": 0.6914810078302035, "grad_norm": 0.45319369952892163, "learning_rate": 9.490145918835287e-06, "loss": 0.4368, "step": 7705 }, { "epoch": 0.6915707522828746, "grad_norm": 0.49466130642016304, "learning_rate": 9.489916180002391e-06, "loss": 0.4541, "step": 7706 }, { "epoch": 0.6916604967355455, "grad_norm": 0.4679895915427834, "learning_rate": 9.489686392203377e-06, "loss": 0.4682, "step": 7707 }, { "epoch": 0.6917502411882166, "grad_norm": 0.45811594741810135, "learning_rate": 9.489456555440748e-06, "loss": 0.4285, "step": 7708 }, { "epoch": 0.6918399856408876, "grad_norm": 0.4871928190621218, "learning_rate": 9.489226669717013e-06, "loss": 0.4307, "step": 7709 }, { "epoch": 0.6919297300935586, "grad_norm": 0.500395427954725, "learning_rate": 9.488996735034677e-06, "loss": 0.4992, "step": 7710 }, { "epoch": 0.6920194745462296, "grad_norm": 0.522136830440505, "learning_rate": 9.488766751396248e-06, "loss": 0.4867, "step": 7711 }, { "epoch": 0.6921092189989007, "grad_norm": 0.49372425971391515, "learning_rate": 9.488536718804235e-06, "loss": 0.4812, "step": 7712 }, { "epoch": 0.6921989634515716, "grad_norm": 0.5094955037546014, "learning_rate": 9.488306637261147e-06, "loss": 0.4697, "step": 7713 }, { "epoch": 0.6922887079042427, "grad_norm": 0.4923592373890025, "learning_rate": 9.488076506769491e-06, "loss": 0.4886, "step": 7714 }, { "epoch": 0.6923784523569136, "grad_norm": 0.4377656820677863, "learning_rate": 9.48784632733178e-06, "loss": 0.4366, "step": 7715 }, { "epoch": 0.6924681968095847, "grad_norm": 0.43477609283662666, "learning_rate": 9.48761609895052e-06, "loss": 0.3715, "step": 7716 }, { "epoch": 0.6925579412622557, "grad_norm": 0.498395110575305, "learning_rate": 9.487385821628226e-06, "loss": 0.5041, "step": 7717 }, { "epoch": 0.6926476857149267, "grad_norm": 0.4732381367786157, "learning_rate": 9.487155495367409e-06, "loss": 0.4466, "step": 7718 }, { "epoch": 0.6927374301675978, "grad_norm": 0.4490413334177242, "learning_rate": 9.486925120170577e-06, "loss": 0.4251, "step": 7719 }, { "epoch": 0.6928271746202688, "grad_norm": 0.5228195502309239, "learning_rate": 9.486694696040248e-06, "loss": 0.5128, "step": 7720 }, { "epoch": 0.6929169190729398, "grad_norm": 0.4616919971281945, "learning_rate": 9.486464222978931e-06, "loss": 0.4109, "step": 7721 }, { "epoch": 0.6930066635256108, "grad_norm": 0.5034381902521635, "learning_rate": 9.486233700989139e-06, "loss": 0.405, "step": 7722 }, { "epoch": 0.6930964079782819, "grad_norm": 0.5263779743754056, "learning_rate": 9.486003130073388e-06, "loss": 0.4277, "step": 7723 }, { "epoch": 0.6931861524309528, "grad_norm": 0.522120369872734, "learning_rate": 9.485772510234192e-06, "loss": 0.5083, "step": 7724 }, { "epoch": 0.6932758968836239, "grad_norm": 0.4822208364426117, "learning_rate": 9.485541841474068e-06, "loss": 0.4627, "step": 7725 }, { "epoch": 0.6933656413362949, "grad_norm": 0.4304177108599304, "learning_rate": 9.48531112379553e-06, "loss": 0.3694, "step": 7726 }, { "epoch": 0.6934553857889659, "grad_norm": 0.49226288060511525, "learning_rate": 9.48508035720109e-06, "loss": 0.4858, "step": 7727 }, { "epoch": 0.6935451302416369, "grad_norm": 0.43699965301374527, "learning_rate": 9.484849541693272e-06, "loss": 0.3839, "step": 7728 }, { "epoch": 0.693634874694308, "grad_norm": 0.45353560174271335, "learning_rate": 9.48461867727459e-06, "loss": 0.3802, "step": 7729 }, { "epoch": 0.693724619146979, "grad_norm": 0.5107257003301603, "learning_rate": 9.48438776394756e-06, "loss": 0.5087, "step": 7730 }, { "epoch": 0.69381436359965, "grad_norm": 0.514023329580187, "learning_rate": 9.484156801714704e-06, "loss": 0.4654, "step": 7731 }, { "epoch": 0.6939041080523211, "grad_norm": 0.4914854436062308, "learning_rate": 9.483925790578536e-06, "loss": 0.4209, "step": 7732 }, { "epoch": 0.693993852504992, "grad_norm": 0.519111711019027, "learning_rate": 9.483694730541582e-06, "loss": 0.5142, "step": 7733 }, { "epoch": 0.6940835969576631, "grad_norm": 0.5165940244355487, "learning_rate": 9.483463621606355e-06, "loss": 0.4892, "step": 7734 }, { "epoch": 0.694173341410334, "grad_norm": 0.46053221147307855, "learning_rate": 9.483232463775379e-06, "loss": 0.3975, "step": 7735 }, { "epoch": 0.6942630858630051, "grad_norm": 0.4972157917212545, "learning_rate": 9.483001257051174e-06, "loss": 0.4279, "step": 7736 }, { "epoch": 0.6943528303156761, "grad_norm": 0.4941922359171806, "learning_rate": 9.482770001436263e-06, "loss": 0.4708, "step": 7737 }, { "epoch": 0.6944425747683471, "grad_norm": 0.47457157917878745, "learning_rate": 9.482538696933168e-06, "loss": 0.5009, "step": 7738 }, { "epoch": 0.6945323192210181, "grad_norm": 0.46538656378066245, "learning_rate": 9.482307343544408e-06, "loss": 0.4128, "step": 7739 }, { "epoch": 0.6946220636736892, "grad_norm": 0.47637674147001285, "learning_rate": 9.48207594127251e-06, "loss": 0.417, "step": 7740 }, { "epoch": 0.6947118081263602, "grad_norm": 0.5071174874828465, "learning_rate": 9.481844490119996e-06, "loss": 0.4945, "step": 7741 }, { "epoch": 0.6948015525790312, "grad_norm": 0.4764877798195127, "learning_rate": 9.48161299008939e-06, "loss": 0.4132, "step": 7742 }, { "epoch": 0.6948912970317023, "grad_norm": 0.5074471364772157, "learning_rate": 9.481381441183218e-06, "loss": 0.4971, "step": 7743 }, { "epoch": 0.6949810414843732, "grad_norm": 0.48023001641140534, "learning_rate": 9.481149843404004e-06, "loss": 0.452, "step": 7744 }, { "epoch": 0.6950707859370443, "grad_norm": 0.5171401431603173, "learning_rate": 9.480918196754275e-06, "loss": 0.4632, "step": 7745 }, { "epoch": 0.6951605303897153, "grad_norm": 0.5124715188044434, "learning_rate": 9.480686501236553e-06, "loss": 0.4155, "step": 7746 }, { "epoch": 0.6952502748423863, "grad_norm": 0.5207779518581722, "learning_rate": 9.480454756853372e-06, "loss": 0.5141, "step": 7747 }, { "epoch": 0.6953400192950573, "grad_norm": 0.4573103649970984, "learning_rate": 9.480222963607252e-06, "loss": 0.386, "step": 7748 }, { "epoch": 0.6954297637477284, "grad_norm": 0.45318177664236065, "learning_rate": 9.479991121500728e-06, "loss": 0.4029, "step": 7749 }, { "epoch": 0.6955195082003993, "grad_norm": 0.4516488550992367, "learning_rate": 9.479759230536323e-06, "loss": 0.4023, "step": 7750 }, { "epoch": 0.6956092526530704, "grad_norm": 0.4929486487016524, "learning_rate": 9.479527290716568e-06, "loss": 0.4958, "step": 7751 }, { "epoch": 0.6956989971057415, "grad_norm": 0.47379008685858515, "learning_rate": 9.479295302043992e-06, "loss": 0.4492, "step": 7752 }, { "epoch": 0.6957887415584124, "grad_norm": 0.5043594730877737, "learning_rate": 9.479063264521126e-06, "loss": 0.4332, "step": 7753 }, { "epoch": 0.6958784860110835, "grad_norm": 0.44216135975233767, "learning_rate": 9.4788311781505e-06, "loss": 0.3954, "step": 7754 }, { "epoch": 0.6959682304637544, "grad_norm": 0.4801773939799904, "learning_rate": 9.478599042934645e-06, "loss": 0.4503, "step": 7755 }, { "epoch": 0.6960579749164255, "grad_norm": 0.4978680778042443, "learning_rate": 9.478366858876092e-06, "loss": 0.4345, "step": 7756 }, { "epoch": 0.6961477193690965, "grad_norm": 0.5402516940732696, "learning_rate": 9.478134625977373e-06, "loss": 0.5139, "step": 7757 }, { "epoch": 0.6962374638217675, "grad_norm": 0.46805356769616685, "learning_rate": 9.477902344241022e-06, "loss": 0.4143, "step": 7758 }, { "epoch": 0.6963272082744385, "grad_norm": 0.49530643966267607, "learning_rate": 9.477670013669572e-06, "loss": 0.4386, "step": 7759 }, { "epoch": 0.6964169527271096, "grad_norm": 0.47879462821727087, "learning_rate": 9.477437634265557e-06, "loss": 0.4341, "step": 7760 }, { "epoch": 0.6965066971797805, "grad_norm": 0.4912956165521226, "learning_rate": 9.47720520603151e-06, "loss": 0.4729, "step": 7761 }, { "epoch": 0.6965964416324516, "grad_norm": 0.5203255530140217, "learning_rate": 9.476972728969966e-06, "loss": 0.4754, "step": 7762 }, { "epoch": 0.6966861860851226, "grad_norm": 0.46096849823640645, "learning_rate": 9.47674020308346e-06, "loss": 0.4456, "step": 7763 }, { "epoch": 0.6967759305377936, "grad_norm": 0.43823685158986286, "learning_rate": 9.476507628374532e-06, "loss": 0.4159, "step": 7764 }, { "epoch": 0.6968656749904647, "grad_norm": 0.46968844335727294, "learning_rate": 9.476275004845712e-06, "loss": 0.4344, "step": 7765 }, { "epoch": 0.6969554194431357, "grad_norm": 0.42899414047581064, "learning_rate": 9.476042332499541e-06, "loss": 0.3717, "step": 7766 }, { "epoch": 0.6970451638958067, "grad_norm": 0.4900586854696541, "learning_rate": 9.475809611338555e-06, "loss": 0.4595, "step": 7767 }, { "epoch": 0.6971349083484777, "grad_norm": 0.516518442788819, "learning_rate": 9.475576841365295e-06, "loss": 0.4611, "step": 7768 }, { "epoch": 0.6972246528011488, "grad_norm": 0.45207274495762273, "learning_rate": 9.475344022582294e-06, "loss": 0.427, "step": 7769 }, { "epoch": 0.6973143972538197, "grad_norm": 0.44158498744702257, "learning_rate": 9.475111154992096e-06, "loss": 0.3753, "step": 7770 }, { "epoch": 0.6974041417064908, "grad_norm": 0.4506752954156112, "learning_rate": 9.474878238597238e-06, "loss": 0.3953, "step": 7771 }, { "epoch": 0.6974938861591617, "grad_norm": 0.4835835670583792, "learning_rate": 9.474645273400262e-06, "loss": 0.4586, "step": 7772 }, { "epoch": 0.6975836306118328, "grad_norm": 0.44741040718276504, "learning_rate": 9.474412259403708e-06, "loss": 0.427, "step": 7773 }, { "epoch": 0.6976733750645038, "grad_norm": 0.4414728503109031, "learning_rate": 9.474179196610114e-06, "loss": 0.4018, "step": 7774 }, { "epoch": 0.6977631195171748, "grad_norm": 0.5014504060340345, "learning_rate": 9.473946085022026e-06, "loss": 0.4797, "step": 7775 }, { "epoch": 0.6978528639698459, "grad_norm": 0.5018963333637594, "learning_rate": 9.473712924641985e-06, "loss": 0.4502, "step": 7776 }, { "epoch": 0.6979426084225169, "grad_norm": 0.49597185722063236, "learning_rate": 9.473479715472534e-06, "loss": 0.4956, "step": 7777 }, { "epoch": 0.698032352875188, "grad_norm": 0.4647681082325695, "learning_rate": 9.473246457516217e-06, "loss": 0.4138, "step": 7778 }, { "epoch": 0.6981220973278589, "grad_norm": 0.5317801198612003, "learning_rate": 9.473013150775574e-06, "loss": 0.5023, "step": 7779 }, { "epoch": 0.69821184178053, "grad_norm": 0.45972179910726574, "learning_rate": 9.472779795253153e-06, "loss": 0.4497, "step": 7780 }, { "epoch": 0.6983015862332009, "grad_norm": 0.5208233907047344, "learning_rate": 9.4725463909515e-06, "loss": 0.5129, "step": 7781 }, { "epoch": 0.698391330685872, "grad_norm": 0.510337940389709, "learning_rate": 9.472312937873157e-06, "loss": 0.4601, "step": 7782 }, { "epoch": 0.698481075138543, "grad_norm": 0.47972022947673715, "learning_rate": 9.472079436020672e-06, "loss": 0.4745, "step": 7783 }, { "epoch": 0.698570819591214, "grad_norm": 0.49972158687356716, "learning_rate": 9.471845885396592e-06, "loss": 0.5143, "step": 7784 }, { "epoch": 0.698660564043885, "grad_norm": 0.44500646187364395, "learning_rate": 9.47161228600346e-06, "loss": 0.3525, "step": 7785 }, { "epoch": 0.6987503084965561, "grad_norm": 0.5541867338582597, "learning_rate": 9.47137863784383e-06, "loss": 0.5296, "step": 7786 }, { "epoch": 0.6988400529492271, "grad_norm": 0.4601474796939685, "learning_rate": 9.471144940920244e-06, "loss": 0.399, "step": 7787 }, { "epoch": 0.6989297974018981, "grad_norm": 0.44659248743098684, "learning_rate": 9.470911195235258e-06, "loss": 0.4443, "step": 7788 }, { "epoch": 0.6990195418545692, "grad_norm": 0.48266537140350535, "learning_rate": 9.470677400791413e-06, "loss": 0.4523, "step": 7789 }, { "epoch": 0.6991092863072401, "grad_norm": 0.4588498202874555, "learning_rate": 9.470443557591263e-06, "loss": 0.4286, "step": 7790 }, { "epoch": 0.6991990307599112, "grad_norm": 0.45117265827138675, "learning_rate": 9.470209665637358e-06, "loss": 0.426, "step": 7791 }, { "epoch": 0.6992887752125821, "grad_norm": 0.4947564256389697, "learning_rate": 9.469975724932249e-06, "loss": 0.4574, "step": 7792 }, { "epoch": 0.6993785196652532, "grad_norm": 0.4935736954208848, "learning_rate": 9.469741735478485e-06, "loss": 0.4896, "step": 7793 }, { "epoch": 0.6994682641179242, "grad_norm": 0.4900392361646121, "learning_rate": 9.469507697278622e-06, "loss": 0.4516, "step": 7794 }, { "epoch": 0.6995580085705952, "grad_norm": 0.4702791064055639, "learning_rate": 9.469273610335208e-06, "loss": 0.4315, "step": 7795 }, { "epoch": 0.6996477530232662, "grad_norm": 0.5038673194331186, "learning_rate": 9.4690394746508e-06, "loss": 0.4675, "step": 7796 }, { "epoch": 0.6997374974759373, "grad_norm": 0.473421819869218, "learning_rate": 9.468805290227947e-06, "loss": 0.4167, "step": 7797 }, { "epoch": 0.6998272419286082, "grad_norm": 0.4672461345907165, "learning_rate": 9.468571057069207e-06, "loss": 0.4208, "step": 7798 }, { "epoch": 0.6999169863812793, "grad_norm": 0.45893845497457136, "learning_rate": 9.468336775177132e-06, "loss": 0.4163, "step": 7799 }, { "epoch": 0.7000067308339504, "grad_norm": 0.5014144660632489, "learning_rate": 9.468102444554277e-06, "loss": 0.4593, "step": 7800 }, { "epoch": 0.7000964752866213, "grad_norm": 0.43958637293650554, "learning_rate": 9.467868065203199e-06, "loss": 0.4291, "step": 7801 }, { "epoch": 0.7001862197392924, "grad_norm": 0.44465441906695025, "learning_rate": 9.467633637126454e-06, "loss": 0.3961, "step": 7802 }, { "epoch": 0.7002759641919634, "grad_norm": 0.52006431734006, "learning_rate": 9.467399160326599e-06, "loss": 0.448, "step": 7803 }, { "epoch": 0.7003657086446344, "grad_norm": 0.45755473359719395, "learning_rate": 9.467164634806187e-06, "loss": 0.4315, "step": 7804 }, { "epoch": 0.7004554530973054, "grad_norm": 0.4742426244860418, "learning_rate": 9.466930060567782e-06, "loss": 0.4392, "step": 7805 }, { "epoch": 0.7005451975499765, "grad_norm": 0.49645657400493287, "learning_rate": 9.466695437613938e-06, "loss": 0.4523, "step": 7806 }, { "epoch": 0.7006349420026474, "grad_norm": 0.48582712342257617, "learning_rate": 9.466460765947214e-06, "loss": 0.4364, "step": 7807 }, { "epoch": 0.7007246864553185, "grad_norm": 0.5136923405826586, "learning_rate": 9.466226045570171e-06, "loss": 0.4745, "step": 7808 }, { "epoch": 0.7008144309079894, "grad_norm": 0.5164347489484915, "learning_rate": 9.465991276485368e-06, "loss": 0.4997, "step": 7809 }, { "epoch": 0.7009041753606605, "grad_norm": 0.4815753036750106, "learning_rate": 9.465756458695366e-06, "loss": 0.4216, "step": 7810 }, { "epoch": 0.7009939198133316, "grad_norm": 0.5554195712089323, "learning_rate": 9.465521592202724e-06, "loss": 0.5433, "step": 7811 }, { "epoch": 0.7010836642660025, "grad_norm": 0.5448645865941373, "learning_rate": 9.465286677010007e-06, "loss": 0.5494, "step": 7812 }, { "epoch": 0.7011734087186736, "grad_norm": 0.48142450382352103, "learning_rate": 9.465051713119772e-06, "loss": 0.5054, "step": 7813 }, { "epoch": 0.7012631531713446, "grad_norm": 0.5121022383645176, "learning_rate": 9.464816700534584e-06, "loss": 0.4899, "step": 7814 }, { "epoch": 0.7013528976240156, "grad_norm": 0.5525614428757216, "learning_rate": 9.464581639257008e-06, "loss": 0.5512, "step": 7815 }, { "epoch": 0.7014426420766866, "grad_norm": 0.5317085024737126, "learning_rate": 9.464346529289606e-06, "loss": 0.5382, "step": 7816 }, { "epoch": 0.7015323865293577, "grad_norm": 0.5196971201210785, "learning_rate": 9.46411137063494e-06, "loss": 0.4788, "step": 7817 }, { "epoch": 0.7016221309820286, "grad_norm": 0.4957760495685342, "learning_rate": 9.463876163295576e-06, "loss": 0.4593, "step": 7818 }, { "epoch": 0.7017118754346997, "grad_norm": 0.47883303408007727, "learning_rate": 9.46364090727408e-06, "loss": 0.4195, "step": 7819 }, { "epoch": 0.7018016198873707, "grad_norm": 0.48901589527840617, "learning_rate": 9.463405602573015e-06, "loss": 0.4653, "step": 7820 }, { "epoch": 0.7018913643400417, "grad_norm": 0.4942293682717553, "learning_rate": 9.46317024919495e-06, "loss": 0.456, "step": 7821 }, { "epoch": 0.7019811087927128, "grad_norm": 0.4943809280157289, "learning_rate": 9.462934847142454e-06, "loss": 0.4138, "step": 7822 }, { "epoch": 0.7020708532453838, "grad_norm": 0.4808998754156261, "learning_rate": 9.462699396418088e-06, "loss": 0.4108, "step": 7823 }, { "epoch": 0.7021605976980548, "grad_norm": 0.5145804994734678, "learning_rate": 9.462463897024424e-06, "loss": 0.4324, "step": 7824 }, { "epoch": 0.7022503421507258, "grad_norm": 0.4757640303797672, "learning_rate": 9.462228348964029e-06, "loss": 0.3995, "step": 7825 }, { "epoch": 0.7023400866033969, "grad_norm": 0.5060622510575661, "learning_rate": 9.461992752239472e-06, "loss": 0.5138, "step": 7826 }, { "epoch": 0.7024298310560678, "grad_norm": 0.48397876932821776, "learning_rate": 9.461757106853322e-06, "loss": 0.4477, "step": 7827 }, { "epoch": 0.7025195755087389, "grad_norm": 0.5143720795123741, "learning_rate": 9.461521412808151e-06, "loss": 0.478, "step": 7828 }, { "epoch": 0.7026093199614099, "grad_norm": 0.5416762676553134, "learning_rate": 9.461285670106526e-06, "loss": 0.5759, "step": 7829 }, { "epoch": 0.7026990644140809, "grad_norm": 0.48201956825667436, "learning_rate": 9.461049878751021e-06, "loss": 0.4293, "step": 7830 }, { "epoch": 0.7027888088667519, "grad_norm": 0.4582679189470918, "learning_rate": 9.460814038744204e-06, "loss": 0.3862, "step": 7831 }, { "epoch": 0.702878553319423, "grad_norm": 0.47293813389584805, "learning_rate": 9.460578150088652e-06, "loss": 0.4086, "step": 7832 }, { "epoch": 0.7029682977720939, "grad_norm": 0.4906481812570584, "learning_rate": 9.460342212786933e-06, "loss": 0.4648, "step": 7833 }, { "epoch": 0.703058042224765, "grad_norm": 0.48998983313076294, "learning_rate": 9.460106226841623e-06, "loss": 0.4561, "step": 7834 }, { "epoch": 0.703147786677436, "grad_norm": 0.47983638839684134, "learning_rate": 9.459870192255293e-06, "loss": 0.4438, "step": 7835 }, { "epoch": 0.703237531130107, "grad_norm": 0.47467340495353477, "learning_rate": 9.459634109030521e-06, "loss": 0.4301, "step": 7836 }, { "epoch": 0.7033272755827781, "grad_norm": 0.5773333695278542, "learning_rate": 9.459397977169878e-06, "loss": 0.5454, "step": 7837 }, { "epoch": 0.703417020035449, "grad_norm": 0.47202581572143465, "learning_rate": 9.45916179667594e-06, "loss": 0.4297, "step": 7838 }, { "epoch": 0.7035067644881201, "grad_norm": 0.5233062977154784, "learning_rate": 9.458925567551284e-06, "loss": 0.5506, "step": 7839 }, { "epoch": 0.7035965089407911, "grad_norm": 0.48856862504987486, "learning_rate": 9.458689289798485e-06, "loss": 0.4415, "step": 7840 }, { "epoch": 0.7036862533934621, "grad_norm": 0.5059947781322651, "learning_rate": 9.458452963420122e-06, "loss": 0.4889, "step": 7841 }, { "epoch": 0.7037759978461331, "grad_norm": 0.5050498060263493, "learning_rate": 9.45821658841877e-06, "loss": 0.4942, "step": 7842 }, { "epoch": 0.7038657422988042, "grad_norm": 0.4828804090201337, "learning_rate": 9.457980164797004e-06, "loss": 0.4752, "step": 7843 }, { "epoch": 0.7039554867514751, "grad_norm": 0.42286438727660486, "learning_rate": 9.457743692557411e-06, "loss": 0.3768, "step": 7844 }, { "epoch": 0.7040452312041462, "grad_norm": 0.47409994940576994, "learning_rate": 9.457507171702562e-06, "loss": 0.4345, "step": 7845 }, { "epoch": 0.7041349756568173, "grad_norm": 0.538361074264549, "learning_rate": 9.45727060223504e-06, "loss": 0.4549, "step": 7846 }, { "epoch": 0.7042247201094882, "grad_norm": 0.4796089435277417, "learning_rate": 9.457033984157425e-06, "loss": 0.4427, "step": 7847 }, { "epoch": 0.7043144645621593, "grad_norm": 0.48515794909517207, "learning_rate": 9.456797317472295e-06, "loss": 0.4614, "step": 7848 }, { "epoch": 0.7044042090148303, "grad_norm": 0.539331630563147, "learning_rate": 9.456560602182233e-06, "loss": 0.4772, "step": 7849 }, { "epoch": 0.7044939534675013, "grad_norm": 0.48607775936235076, "learning_rate": 9.456323838289823e-06, "loss": 0.4595, "step": 7850 }, { "epoch": 0.7045836979201723, "grad_norm": 0.4895765929404218, "learning_rate": 9.45608702579764e-06, "loss": 0.4393, "step": 7851 }, { "epoch": 0.7046734423728434, "grad_norm": 0.5016857014141456, "learning_rate": 9.455850164708276e-06, "loss": 0.4726, "step": 7852 }, { "epoch": 0.7047631868255143, "grad_norm": 0.5275081556237052, "learning_rate": 9.455613255024306e-06, "loss": 0.5321, "step": 7853 }, { "epoch": 0.7048529312781854, "grad_norm": 0.4836259899086583, "learning_rate": 9.455376296748317e-06, "loss": 0.4624, "step": 7854 }, { "epoch": 0.7049426757308563, "grad_norm": 0.47525862452840983, "learning_rate": 9.455139289882895e-06, "loss": 0.4665, "step": 7855 }, { "epoch": 0.7050324201835274, "grad_norm": 0.4728012829406888, "learning_rate": 9.454902234430622e-06, "loss": 0.4389, "step": 7856 }, { "epoch": 0.7051221646361985, "grad_norm": 0.5369905748421019, "learning_rate": 9.454665130394084e-06, "loss": 0.5422, "step": 7857 }, { "epoch": 0.7052119090888694, "grad_norm": 0.46969552237089157, "learning_rate": 9.454427977775867e-06, "loss": 0.4345, "step": 7858 }, { "epoch": 0.7053016535415405, "grad_norm": 0.5204590184002753, "learning_rate": 9.454190776578559e-06, "loss": 0.4943, "step": 7859 }, { "epoch": 0.7053913979942115, "grad_norm": 0.49966630149843955, "learning_rate": 9.453953526804743e-06, "loss": 0.429, "step": 7860 }, { "epoch": 0.7054811424468825, "grad_norm": 0.47742924320599306, "learning_rate": 9.453716228457008e-06, "loss": 0.4252, "step": 7861 }, { "epoch": 0.7055708868995535, "grad_norm": 0.46371350406775724, "learning_rate": 9.453478881537946e-06, "loss": 0.4411, "step": 7862 }, { "epoch": 0.7056606313522246, "grad_norm": 0.5026429142640295, "learning_rate": 9.453241486050138e-06, "loss": 0.4537, "step": 7863 }, { "epoch": 0.7057503758048955, "grad_norm": 0.46530260792151534, "learning_rate": 9.453004041996178e-06, "loss": 0.441, "step": 7864 }, { "epoch": 0.7058401202575666, "grad_norm": 0.5247835956241935, "learning_rate": 9.452766549378657e-06, "loss": 0.5507, "step": 7865 }, { "epoch": 0.7059298647102376, "grad_norm": 0.46295871416413825, "learning_rate": 9.45252900820016e-06, "loss": 0.404, "step": 7866 }, { "epoch": 0.7060196091629086, "grad_norm": 0.4831428101698896, "learning_rate": 9.452291418463281e-06, "loss": 0.4587, "step": 7867 }, { "epoch": 0.7061093536155796, "grad_norm": 0.5068948827293512, "learning_rate": 9.452053780170611e-06, "loss": 0.4566, "step": 7868 }, { "epoch": 0.7061990980682507, "grad_norm": 0.5076002071916893, "learning_rate": 9.451816093324739e-06, "loss": 0.5001, "step": 7869 }, { "epoch": 0.7062888425209217, "grad_norm": 0.48811947212547324, "learning_rate": 9.451578357928262e-06, "loss": 0.4737, "step": 7870 }, { "epoch": 0.7063785869735927, "grad_norm": 0.4618449253911116, "learning_rate": 9.451340573983767e-06, "loss": 0.4158, "step": 7871 }, { "epoch": 0.7064683314262638, "grad_norm": 0.4934989717436615, "learning_rate": 9.45110274149385e-06, "loss": 0.4722, "step": 7872 }, { "epoch": 0.7065580758789347, "grad_norm": 0.5081394226800082, "learning_rate": 9.450864860461106e-06, "loss": 0.4978, "step": 7873 }, { "epoch": 0.7066478203316058, "grad_norm": 0.5179519516272664, "learning_rate": 9.450626930888127e-06, "loss": 0.4739, "step": 7874 }, { "epoch": 0.7067375647842767, "grad_norm": 0.49948026832213444, "learning_rate": 9.45038895277751e-06, "loss": 0.4879, "step": 7875 }, { "epoch": 0.7068273092369478, "grad_norm": 0.49635858333869076, "learning_rate": 9.45015092613185e-06, "loss": 0.5259, "step": 7876 }, { "epoch": 0.7069170536896188, "grad_norm": 0.48447685609813346, "learning_rate": 9.44991285095374e-06, "loss": 0.4443, "step": 7877 }, { "epoch": 0.7070067981422898, "grad_norm": 0.4891176234127795, "learning_rate": 9.449674727245777e-06, "loss": 0.4471, "step": 7878 }, { "epoch": 0.7070965425949608, "grad_norm": 0.5199716924727315, "learning_rate": 9.449436555010563e-06, "loss": 0.5211, "step": 7879 }, { "epoch": 0.7071862870476319, "grad_norm": 0.5074596590050053, "learning_rate": 9.449198334250689e-06, "loss": 0.4443, "step": 7880 }, { "epoch": 0.7072760315003029, "grad_norm": 0.4654693995109078, "learning_rate": 9.448960064968756e-06, "loss": 0.4327, "step": 7881 }, { "epoch": 0.7073657759529739, "grad_norm": 0.4838073294176341, "learning_rate": 9.448721747167364e-06, "loss": 0.4566, "step": 7882 }, { "epoch": 0.707455520405645, "grad_norm": 0.4975474187560122, "learning_rate": 9.44848338084911e-06, "loss": 0.4836, "step": 7883 }, { "epoch": 0.7075452648583159, "grad_norm": 0.43341798438845036, "learning_rate": 9.448244966016593e-06, "loss": 0.3762, "step": 7884 }, { "epoch": 0.707635009310987, "grad_norm": 0.5051703656279884, "learning_rate": 9.448006502672414e-06, "loss": 0.5121, "step": 7885 }, { "epoch": 0.707724753763658, "grad_norm": 0.49800470103377364, "learning_rate": 9.447767990819173e-06, "loss": 0.4835, "step": 7886 }, { "epoch": 0.707814498216329, "grad_norm": 0.4949879613472941, "learning_rate": 9.447529430459474e-06, "loss": 0.5164, "step": 7887 }, { "epoch": 0.707904242669, "grad_norm": 0.46571417676516, "learning_rate": 9.447290821595915e-06, "loss": 0.4138, "step": 7888 }, { "epoch": 0.707993987121671, "grad_norm": 0.4461525704129708, "learning_rate": 9.4470521642311e-06, "loss": 0.4148, "step": 7889 }, { "epoch": 0.708083731574342, "grad_norm": 0.4867734020631108, "learning_rate": 9.446813458367632e-06, "loss": 0.4386, "step": 7890 }, { "epoch": 0.7081734760270131, "grad_norm": 0.483600746908013, "learning_rate": 9.446574704008113e-06, "loss": 0.4442, "step": 7891 }, { "epoch": 0.7082632204796842, "grad_norm": 0.48493944897678604, "learning_rate": 9.446335901155146e-06, "loss": 0.4179, "step": 7892 }, { "epoch": 0.7083529649323551, "grad_norm": 0.45253660426589976, "learning_rate": 9.44609704981134e-06, "loss": 0.3893, "step": 7893 }, { "epoch": 0.7084427093850262, "grad_norm": 0.5183013969628204, "learning_rate": 9.445858149979295e-06, "loss": 0.4538, "step": 7894 }, { "epoch": 0.7085324538376971, "grad_norm": 0.5337124560714451, "learning_rate": 9.44561920166162e-06, "loss": 0.5094, "step": 7895 }, { "epoch": 0.7086221982903682, "grad_norm": 0.48153735393142105, "learning_rate": 9.445380204860917e-06, "loss": 0.4752, "step": 7896 }, { "epoch": 0.7087119427430392, "grad_norm": 0.5049066904848981, "learning_rate": 9.445141159579795e-06, "loss": 0.5093, "step": 7897 }, { "epoch": 0.7088016871957102, "grad_norm": 0.4933927862894904, "learning_rate": 9.44490206582086e-06, "loss": 0.4384, "step": 7898 }, { "epoch": 0.7088914316483812, "grad_norm": 0.4215253824041371, "learning_rate": 9.444662923586722e-06, "loss": 0.393, "step": 7899 }, { "epoch": 0.7089811761010523, "grad_norm": 0.49640848851518815, "learning_rate": 9.444423732879986e-06, "loss": 0.502, "step": 7900 }, { "epoch": 0.7090709205537232, "grad_norm": 0.5361883431224763, "learning_rate": 9.444184493703261e-06, "loss": 0.4799, "step": 7901 }, { "epoch": 0.7091606650063943, "grad_norm": 0.47547688446136555, "learning_rate": 9.443945206059158e-06, "loss": 0.4449, "step": 7902 }, { "epoch": 0.7092504094590653, "grad_norm": 0.47314093791687795, "learning_rate": 9.443705869950284e-06, "loss": 0.451, "step": 7903 }, { "epoch": 0.7093401539117363, "grad_norm": 0.5041823999781868, "learning_rate": 9.44346648537925e-06, "loss": 0.4735, "step": 7904 }, { "epoch": 0.7094298983644074, "grad_norm": 0.49853350392231033, "learning_rate": 9.44322705234867e-06, "loss": 0.497, "step": 7905 }, { "epoch": 0.7095196428170784, "grad_norm": 0.4955935154094542, "learning_rate": 9.442987570861148e-06, "loss": 0.4689, "step": 7906 }, { "epoch": 0.7096093872697494, "grad_norm": 0.5014318987545889, "learning_rate": 9.442748040919303e-06, "loss": 0.5004, "step": 7907 }, { "epoch": 0.7096991317224204, "grad_norm": 0.4528411678043471, "learning_rate": 9.442508462525744e-06, "loss": 0.4011, "step": 7908 }, { "epoch": 0.7097888761750915, "grad_norm": 0.48449688450579453, "learning_rate": 9.442268835683084e-06, "loss": 0.4243, "step": 7909 }, { "epoch": 0.7098786206277624, "grad_norm": 0.5065063758204106, "learning_rate": 9.442029160393937e-06, "loss": 0.4481, "step": 7910 }, { "epoch": 0.7099683650804335, "grad_norm": 0.5339335593546279, "learning_rate": 9.441789436660917e-06, "loss": 0.4771, "step": 7911 }, { "epoch": 0.7100581095331044, "grad_norm": 0.48669963793133736, "learning_rate": 9.441549664486636e-06, "loss": 0.4353, "step": 7912 }, { "epoch": 0.7101478539857755, "grad_norm": 0.5006033761444961, "learning_rate": 9.441309843873711e-06, "loss": 0.4891, "step": 7913 }, { "epoch": 0.7102375984384465, "grad_norm": 0.45405163655908065, "learning_rate": 9.441069974824758e-06, "loss": 0.4455, "step": 7914 }, { "epoch": 0.7103273428911175, "grad_norm": 0.48354215292417635, "learning_rate": 9.440830057342392e-06, "loss": 0.4365, "step": 7915 }, { "epoch": 0.7104170873437886, "grad_norm": 0.4878198891598592, "learning_rate": 9.440590091429228e-06, "loss": 0.4537, "step": 7916 }, { "epoch": 0.7105068317964596, "grad_norm": 0.47260474754809517, "learning_rate": 9.440350077087886e-06, "loss": 0.4562, "step": 7917 }, { "epoch": 0.7105965762491306, "grad_norm": 0.525222188772521, "learning_rate": 9.440110014320983e-06, "loss": 0.4729, "step": 7918 }, { "epoch": 0.7106863207018016, "grad_norm": 0.5228676541232101, "learning_rate": 9.439869903131134e-06, "loss": 0.4603, "step": 7919 }, { "epoch": 0.7107760651544727, "grad_norm": 0.47740872709113136, "learning_rate": 9.43962974352096e-06, "loss": 0.4293, "step": 7920 }, { "epoch": 0.7108658096071436, "grad_norm": 0.4720174226436366, "learning_rate": 9.43938953549308e-06, "loss": 0.453, "step": 7921 }, { "epoch": 0.7109555540598147, "grad_norm": 0.46331689047320684, "learning_rate": 9.439149279050115e-06, "loss": 0.4389, "step": 7922 }, { "epoch": 0.7110452985124857, "grad_norm": 0.5102672904595275, "learning_rate": 9.438908974194682e-06, "loss": 0.4913, "step": 7923 }, { "epoch": 0.7111350429651567, "grad_norm": 0.4935394663537051, "learning_rate": 9.438668620929404e-06, "loss": 0.4599, "step": 7924 }, { "epoch": 0.7112247874178277, "grad_norm": 0.48881272433045325, "learning_rate": 9.438428219256902e-06, "loss": 0.4901, "step": 7925 }, { "epoch": 0.7113145318704988, "grad_norm": 0.48149388418476474, "learning_rate": 9.438187769179798e-06, "loss": 0.4148, "step": 7926 }, { "epoch": 0.7114042763231698, "grad_norm": 0.4687340413665959, "learning_rate": 9.437947270700714e-06, "loss": 0.4396, "step": 7927 }, { "epoch": 0.7114940207758408, "grad_norm": 0.5038253061272512, "learning_rate": 9.43770672382227e-06, "loss": 0.4914, "step": 7928 }, { "epoch": 0.7115837652285119, "grad_norm": 0.483313898582085, "learning_rate": 9.437466128547095e-06, "loss": 0.4726, "step": 7929 }, { "epoch": 0.7116735096811828, "grad_norm": 0.5262692862845655, "learning_rate": 9.437225484877807e-06, "loss": 0.5299, "step": 7930 }, { "epoch": 0.7117632541338539, "grad_norm": 0.5026619004362101, "learning_rate": 9.436984792817036e-06, "loss": 0.4828, "step": 7931 }, { "epoch": 0.7118529985865248, "grad_norm": 0.5215228026373584, "learning_rate": 9.436744052367403e-06, "loss": 0.5043, "step": 7932 }, { "epoch": 0.7119427430391959, "grad_norm": 0.49446469621688244, "learning_rate": 9.436503263531535e-06, "loss": 0.4488, "step": 7933 }, { "epoch": 0.7120324874918669, "grad_norm": 0.4767174307454773, "learning_rate": 9.436262426312058e-06, "loss": 0.4622, "step": 7934 }, { "epoch": 0.7121222319445379, "grad_norm": 0.446292582537715, "learning_rate": 9.436021540711596e-06, "loss": 0.418, "step": 7935 }, { "epoch": 0.7122119763972089, "grad_norm": 0.48850575380915384, "learning_rate": 9.43578060673278e-06, "loss": 0.4604, "step": 7936 }, { "epoch": 0.71230172084988, "grad_norm": 0.5421865043275699, "learning_rate": 9.435539624378236e-06, "loss": 0.5391, "step": 7937 }, { "epoch": 0.712391465302551, "grad_norm": 0.51342557407989, "learning_rate": 9.435298593650592e-06, "loss": 0.5048, "step": 7938 }, { "epoch": 0.712481209755222, "grad_norm": 0.49056096408225036, "learning_rate": 9.435057514552474e-06, "loss": 0.4937, "step": 7939 }, { "epoch": 0.7125709542078931, "grad_norm": 0.47473420847827164, "learning_rate": 9.434816387086517e-06, "loss": 0.4664, "step": 7940 }, { "epoch": 0.712660698660564, "grad_norm": 0.4585975038675604, "learning_rate": 9.434575211255346e-06, "loss": 0.4377, "step": 7941 }, { "epoch": 0.7127504431132351, "grad_norm": 0.4521340677598803, "learning_rate": 9.434333987061592e-06, "loss": 0.4315, "step": 7942 }, { "epoch": 0.7128401875659061, "grad_norm": 0.46855359010646896, "learning_rate": 9.434092714507888e-06, "loss": 0.4618, "step": 7943 }, { "epoch": 0.7129299320185771, "grad_norm": 0.47221306054340584, "learning_rate": 9.433851393596862e-06, "loss": 0.4366, "step": 7944 }, { "epoch": 0.7130196764712481, "grad_norm": 0.482705639554904, "learning_rate": 9.43361002433115e-06, "loss": 0.3916, "step": 7945 }, { "epoch": 0.7131094209239192, "grad_norm": 0.5369116867934124, "learning_rate": 9.433368606713379e-06, "loss": 0.5003, "step": 7946 }, { "epoch": 0.7131991653765901, "grad_norm": 0.5161405005003571, "learning_rate": 9.433127140746186e-06, "loss": 0.4875, "step": 7947 }, { "epoch": 0.7132889098292612, "grad_norm": 0.4709794867210569, "learning_rate": 9.432885626432203e-06, "loss": 0.4528, "step": 7948 }, { "epoch": 0.7133786542819321, "grad_norm": 0.4859108941467341, "learning_rate": 9.432644063774063e-06, "loss": 0.4853, "step": 7949 }, { "epoch": 0.7134683987346032, "grad_norm": 0.4311449852510999, "learning_rate": 9.432402452774401e-06, "loss": 0.4007, "step": 7950 }, { "epoch": 0.7135581431872743, "grad_norm": 0.45200575644573426, "learning_rate": 9.432160793435853e-06, "loss": 0.3847, "step": 7951 }, { "epoch": 0.7136478876399452, "grad_norm": 0.4870884098673877, "learning_rate": 9.431919085761055e-06, "loss": 0.4835, "step": 7952 }, { "epoch": 0.7137376320926163, "grad_norm": 0.48824368287293196, "learning_rate": 9.43167732975264e-06, "loss": 0.4284, "step": 7953 }, { "epoch": 0.7138273765452873, "grad_norm": 0.50471644109254, "learning_rate": 9.431435525413246e-06, "loss": 0.5117, "step": 7954 }, { "epoch": 0.7139171209979583, "grad_norm": 0.43825130458237155, "learning_rate": 9.431193672745511e-06, "loss": 0.3922, "step": 7955 }, { "epoch": 0.7140068654506293, "grad_norm": 0.46617903754512147, "learning_rate": 9.430951771752073e-06, "loss": 0.4191, "step": 7956 }, { "epoch": 0.7140966099033004, "grad_norm": 0.5129803396665801, "learning_rate": 9.430709822435567e-06, "loss": 0.4898, "step": 7957 }, { "epoch": 0.7141863543559713, "grad_norm": 0.47190808231341785, "learning_rate": 9.430467824798636e-06, "loss": 0.3967, "step": 7958 }, { "epoch": 0.7142760988086424, "grad_norm": 0.47085139737050397, "learning_rate": 9.430225778843915e-06, "loss": 0.4007, "step": 7959 }, { "epoch": 0.7143658432613134, "grad_norm": 0.46539384895883, "learning_rate": 9.429983684574047e-06, "loss": 0.4173, "step": 7960 }, { "epoch": 0.7144555877139844, "grad_norm": 0.4992734017837715, "learning_rate": 9.429741541991671e-06, "loss": 0.4738, "step": 7961 }, { "epoch": 0.7145453321666555, "grad_norm": 0.4812376955188024, "learning_rate": 9.429499351099426e-06, "loss": 0.4414, "step": 7962 }, { "epoch": 0.7146350766193265, "grad_norm": 0.500935003875399, "learning_rate": 9.429257111899956e-06, "loss": 0.4855, "step": 7963 }, { "epoch": 0.7147248210719975, "grad_norm": 0.5158284042643294, "learning_rate": 9.429014824395903e-06, "loss": 0.4843, "step": 7964 }, { "epoch": 0.7148145655246685, "grad_norm": 0.48710869122800077, "learning_rate": 9.428772488589907e-06, "loss": 0.4499, "step": 7965 }, { "epoch": 0.7149043099773396, "grad_norm": 0.45921495307089427, "learning_rate": 9.428530104484612e-06, "loss": 0.4472, "step": 7966 }, { "epoch": 0.7149940544300105, "grad_norm": 0.49167355384637196, "learning_rate": 9.428287672082661e-06, "loss": 0.3976, "step": 7967 }, { "epoch": 0.7150837988826816, "grad_norm": 0.47739999649953746, "learning_rate": 9.428045191386698e-06, "loss": 0.4738, "step": 7968 }, { "epoch": 0.7151735433353525, "grad_norm": 0.4981638008157507, "learning_rate": 9.427802662399369e-06, "loss": 0.4494, "step": 7969 }, { "epoch": 0.7152632877880236, "grad_norm": 0.5506828227489013, "learning_rate": 9.427560085123315e-06, "loss": 0.4942, "step": 7970 }, { "epoch": 0.7153530322406946, "grad_norm": 0.48516958133838883, "learning_rate": 9.427317459561187e-06, "loss": 0.4138, "step": 7971 }, { "epoch": 0.7154427766933656, "grad_norm": 0.498406282715299, "learning_rate": 9.427074785715626e-06, "loss": 0.4753, "step": 7972 }, { "epoch": 0.7155325211460367, "grad_norm": 0.4642601489960459, "learning_rate": 9.426832063589283e-06, "loss": 0.4309, "step": 7973 }, { "epoch": 0.7156222655987077, "grad_norm": 0.48751637011424387, "learning_rate": 9.4265892931848e-06, "loss": 0.43, "step": 7974 }, { "epoch": 0.7157120100513787, "grad_norm": 0.5121669397850901, "learning_rate": 9.42634647450483e-06, "loss": 0.5008, "step": 7975 }, { "epoch": 0.7158017545040497, "grad_norm": 0.46687612445143767, "learning_rate": 9.426103607552018e-06, "loss": 0.4189, "step": 7976 }, { "epoch": 0.7158914989567208, "grad_norm": 0.48563017766895084, "learning_rate": 9.425860692329011e-06, "loss": 0.432, "step": 7977 }, { "epoch": 0.7159812434093917, "grad_norm": 0.5027966283798161, "learning_rate": 9.425617728838463e-06, "loss": 0.4593, "step": 7978 }, { "epoch": 0.7160709878620628, "grad_norm": 0.45430950199857634, "learning_rate": 9.42537471708302e-06, "loss": 0.4051, "step": 7979 }, { "epoch": 0.7161607323147338, "grad_norm": 0.49480487066649415, "learning_rate": 9.425131657065335e-06, "loss": 0.4506, "step": 7980 }, { "epoch": 0.7162504767674048, "grad_norm": 0.47745851203470085, "learning_rate": 9.424888548788055e-06, "loss": 0.4431, "step": 7981 }, { "epoch": 0.7163402212200758, "grad_norm": 0.5452832346415972, "learning_rate": 9.424645392253834e-06, "loss": 0.4673, "step": 7982 }, { "epoch": 0.7164299656727469, "grad_norm": 0.5156258816070924, "learning_rate": 9.424402187465323e-06, "loss": 0.4599, "step": 7983 }, { "epoch": 0.7165197101254178, "grad_norm": 0.4842112881731072, "learning_rate": 9.424158934425176e-06, "loss": 0.4285, "step": 7984 }, { "epoch": 0.7166094545780889, "grad_norm": 0.5103256438663007, "learning_rate": 9.423915633136043e-06, "loss": 0.5024, "step": 7985 }, { "epoch": 0.71669919903076, "grad_norm": 0.5115619468621715, "learning_rate": 9.423672283600577e-06, "loss": 0.4893, "step": 7986 }, { "epoch": 0.7167889434834309, "grad_norm": 0.48522276858485885, "learning_rate": 9.423428885821437e-06, "loss": 0.4547, "step": 7987 }, { "epoch": 0.716878687936102, "grad_norm": 0.4831066011207923, "learning_rate": 9.423185439801274e-06, "loss": 0.441, "step": 7988 }, { "epoch": 0.716968432388773, "grad_norm": 0.4965151151228729, "learning_rate": 9.422941945542741e-06, "loss": 0.4956, "step": 7989 }, { "epoch": 0.717058176841444, "grad_norm": 0.5004731074742916, "learning_rate": 9.422698403048498e-06, "loss": 0.4863, "step": 7990 }, { "epoch": 0.717147921294115, "grad_norm": 0.5248068902131077, "learning_rate": 9.422454812321197e-06, "loss": 0.4818, "step": 7991 }, { "epoch": 0.717237665746786, "grad_norm": 0.4915046775093697, "learning_rate": 9.422211173363497e-06, "loss": 0.4468, "step": 7992 }, { "epoch": 0.717327410199457, "grad_norm": 0.503369250589584, "learning_rate": 9.421967486178054e-06, "loss": 0.4571, "step": 7993 }, { "epoch": 0.7174171546521281, "grad_norm": 0.49337616317747357, "learning_rate": 9.421723750767527e-06, "loss": 0.4565, "step": 7994 }, { "epoch": 0.717506899104799, "grad_norm": 0.5057809041216264, "learning_rate": 9.421479967134571e-06, "loss": 0.4841, "step": 7995 }, { "epoch": 0.7175966435574701, "grad_norm": 0.4840915190122165, "learning_rate": 9.421236135281847e-06, "loss": 0.3897, "step": 7996 }, { "epoch": 0.7176863880101412, "grad_norm": 0.5062146650701049, "learning_rate": 9.420992255212015e-06, "loss": 0.4457, "step": 7997 }, { "epoch": 0.7177761324628121, "grad_norm": 0.52146894004556, "learning_rate": 9.420748326927734e-06, "loss": 0.5221, "step": 7998 }, { "epoch": 0.7178658769154832, "grad_norm": 0.4491751561119856, "learning_rate": 9.420504350431662e-06, "loss": 0.348, "step": 7999 }, { "epoch": 0.7179556213681542, "grad_norm": 0.47095552100010507, "learning_rate": 9.420260325726463e-06, "loss": 0.4312, "step": 8000 }, { "epoch": 0.7180453658208252, "grad_norm": 0.5411787830074681, "learning_rate": 9.420016252814796e-06, "loss": 0.4509, "step": 8001 }, { "epoch": 0.7181351102734962, "grad_norm": 0.5085585449574748, "learning_rate": 9.419772131699325e-06, "loss": 0.4838, "step": 8002 }, { "epoch": 0.7182248547261673, "grad_norm": 0.47266411885872794, "learning_rate": 9.41952796238271e-06, "loss": 0.43, "step": 8003 }, { "epoch": 0.7183145991788382, "grad_norm": 0.4613121853697801, "learning_rate": 9.419283744867614e-06, "loss": 0.4501, "step": 8004 }, { "epoch": 0.7184043436315093, "grad_norm": 0.5215291133947341, "learning_rate": 9.419039479156703e-06, "loss": 0.4404, "step": 8005 }, { "epoch": 0.7184940880841802, "grad_norm": 0.5079305782428456, "learning_rate": 9.418795165252638e-06, "loss": 0.4545, "step": 8006 }, { "epoch": 0.7185838325368513, "grad_norm": 0.4558499955805955, "learning_rate": 9.418550803158085e-06, "loss": 0.4083, "step": 8007 }, { "epoch": 0.7186735769895224, "grad_norm": 0.4586074202982485, "learning_rate": 9.41830639287571e-06, "loss": 0.3987, "step": 8008 }, { "epoch": 0.7187633214421933, "grad_norm": 0.49179642085572134, "learning_rate": 9.418061934408174e-06, "loss": 0.4443, "step": 8009 }, { "epoch": 0.7188530658948644, "grad_norm": 0.5035199053284246, "learning_rate": 9.417817427758148e-06, "loss": 0.4288, "step": 8010 }, { "epoch": 0.7189428103475354, "grad_norm": 0.5050152361809025, "learning_rate": 9.417572872928297e-06, "loss": 0.4644, "step": 8011 }, { "epoch": 0.7190325548002064, "grad_norm": 0.4851663578587551, "learning_rate": 9.417328269921286e-06, "loss": 0.4555, "step": 8012 }, { "epoch": 0.7191222992528774, "grad_norm": 0.4441863060265639, "learning_rate": 9.417083618739786e-06, "loss": 0.3839, "step": 8013 }, { "epoch": 0.7192120437055485, "grad_norm": 0.4639691803163383, "learning_rate": 9.416838919386463e-06, "loss": 0.4354, "step": 8014 }, { "epoch": 0.7193017881582194, "grad_norm": 0.56090480254411, "learning_rate": 9.416594171863985e-06, "loss": 0.5612, "step": 8015 }, { "epoch": 0.7193915326108905, "grad_norm": 0.5194491270401334, "learning_rate": 9.416349376175022e-06, "loss": 0.4599, "step": 8016 }, { "epoch": 0.7194812770635615, "grad_norm": 0.49165298344314373, "learning_rate": 9.416104532322245e-06, "loss": 0.4864, "step": 8017 }, { "epoch": 0.7195710215162325, "grad_norm": 0.5049636604595548, "learning_rate": 9.415859640308324e-06, "loss": 0.4942, "step": 8018 }, { "epoch": 0.7196607659689035, "grad_norm": 0.4431956880463253, "learning_rate": 9.415614700135928e-06, "loss": 0.402, "step": 8019 }, { "epoch": 0.7197505104215746, "grad_norm": 0.48497090047033703, "learning_rate": 9.415369711807729e-06, "loss": 0.457, "step": 8020 }, { "epoch": 0.7198402548742456, "grad_norm": 0.4802015498771377, "learning_rate": 9.415124675326399e-06, "loss": 0.5079, "step": 8021 }, { "epoch": 0.7199299993269166, "grad_norm": 0.4786331989459919, "learning_rate": 9.41487959069461e-06, "loss": 0.4356, "step": 8022 }, { "epoch": 0.7200197437795877, "grad_norm": 0.4857691543941658, "learning_rate": 9.414634457915035e-06, "loss": 0.4651, "step": 8023 }, { "epoch": 0.7201094882322586, "grad_norm": 0.4975365293493491, "learning_rate": 9.414389276990347e-06, "loss": 0.4445, "step": 8024 }, { "epoch": 0.7201992326849297, "grad_norm": 0.4938166339841146, "learning_rate": 9.41414404792322e-06, "loss": 0.451, "step": 8025 }, { "epoch": 0.7202889771376007, "grad_norm": 0.47342318598808525, "learning_rate": 9.41389877071633e-06, "loss": 0.4559, "step": 8026 }, { "epoch": 0.7203787215902717, "grad_norm": 0.46487018897342, "learning_rate": 9.41365344537235e-06, "loss": 0.4085, "step": 8027 }, { "epoch": 0.7204684660429427, "grad_norm": 0.4846994318108158, "learning_rate": 9.413408071893957e-06, "loss": 0.4183, "step": 8028 }, { "epoch": 0.7205582104956137, "grad_norm": 0.4772926831228131, "learning_rate": 9.413162650283825e-06, "loss": 0.4496, "step": 8029 }, { "epoch": 0.7206479549482847, "grad_norm": 0.45206652158341865, "learning_rate": 9.412917180544633e-06, "loss": 0.421, "step": 8030 }, { "epoch": 0.7207376994009558, "grad_norm": 0.5057962063840112, "learning_rate": 9.412671662679057e-06, "loss": 0.4844, "step": 8031 }, { "epoch": 0.7208274438536268, "grad_norm": 0.5751658916406193, "learning_rate": 9.412426096689772e-06, "loss": 0.4886, "step": 8032 }, { "epoch": 0.7209171883062978, "grad_norm": 0.5011267122642735, "learning_rate": 9.41218048257946e-06, "loss": 0.4435, "step": 8033 }, { "epoch": 0.7210069327589689, "grad_norm": 0.46776955971985, "learning_rate": 9.411934820350798e-06, "loss": 0.4486, "step": 8034 }, { "epoch": 0.7210966772116398, "grad_norm": 0.4827637552825218, "learning_rate": 9.411689110006464e-06, "loss": 0.4623, "step": 8035 }, { "epoch": 0.7211864216643109, "grad_norm": 0.45313944631284503, "learning_rate": 9.41144335154914e-06, "loss": 0.4122, "step": 8036 }, { "epoch": 0.7212761661169819, "grad_norm": 0.5057353617075531, "learning_rate": 9.411197544981506e-06, "loss": 0.4944, "step": 8037 }, { "epoch": 0.7213659105696529, "grad_norm": 0.48214397332612113, "learning_rate": 9.410951690306239e-06, "loss": 0.4037, "step": 8038 }, { "epoch": 0.7214556550223239, "grad_norm": 0.5031459312491579, "learning_rate": 9.410705787526025e-06, "loss": 0.4767, "step": 8039 }, { "epoch": 0.721545399474995, "grad_norm": 0.4489159949914193, "learning_rate": 9.410459836643543e-06, "loss": 0.4148, "step": 8040 }, { "epoch": 0.7216351439276659, "grad_norm": 0.49019225175363185, "learning_rate": 9.410213837661475e-06, "loss": 0.4498, "step": 8041 }, { "epoch": 0.721724888380337, "grad_norm": 0.5148859795866361, "learning_rate": 9.409967790582505e-06, "loss": 0.456, "step": 8042 }, { "epoch": 0.7218146328330081, "grad_norm": 0.5101440062860368, "learning_rate": 9.409721695409318e-06, "loss": 0.4936, "step": 8043 }, { "epoch": 0.721904377285679, "grad_norm": 0.4947419038557503, "learning_rate": 9.409475552144595e-06, "loss": 0.4825, "step": 8044 }, { "epoch": 0.7219941217383501, "grad_norm": 0.4510508307936293, "learning_rate": 9.409229360791023e-06, "loss": 0.4209, "step": 8045 }, { "epoch": 0.722083866191021, "grad_norm": 0.4642948047678403, "learning_rate": 9.408983121351282e-06, "loss": 0.4125, "step": 8046 }, { "epoch": 0.7221736106436921, "grad_norm": 0.5013283501537104, "learning_rate": 9.408736833828063e-06, "loss": 0.4625, "step": 8047 }, { "epoch": 0.7222633550963631, "grad_norm": 0.48568965019397004, "learning_rate": 9.40849049822405e-06, "loss": 0.4232, "step": 8048 }, { "epoch": 0.7223530995490341, "grad_norm": 0.49975963909030363, "learning_rate": 9.408244114541929e-06, "loss": 0.4642, "step": 8049 }, { "epoch": 0.7224428440017051, "grad_norm": 0.47351678568937344, "learning_rate": 9.407997682784387e-06, "loss": 0.4072, "step": 8050 }, { "epoch": 0.7225325884543762, "grad_norm": 0.45224191927977647, "learning_rate": 9.407751202954111e-06, "loss": 0.4065, "step": 8051 }, { "epoch": 0.7226223329070471, "grad_norm": 0.45903998446387756, "learning_rate": 9.407504675053792e-06, "loss": 0.3977, "step": 8052 }, { "epoch": 0.7227120773597182, "grad_norm": 0.501302046272713, "learning_rate": 9.407258099086115e-06, "loss": 0.434, "step": 8053 }, { "epoch": 0.7228018218123892, "grad_norm": 0.5209979529292463, "learning_rate": 9.407011475053768e-06, "loss": 0.5398, "step": 8054 }, { "epoch": 0.7228915662650602, "grad_norm": 0.47132218982273805, "learning_rate": 9.406764802959447e-06, "loss": 0.4808, "step": 8055 }, { "epoch": 0.7229813107177313, "grad_norm": 0.528701363865312, "learning_rate": 9.406518082805837e-06, "loss": 0.5039, "step": 8056 }, { "epoch": 0.7230710551704023, "grad_norm": 0.5139186558572716, "learning_rate": 9.40627131459563e-06, "loss": 0.5088, "step": 8057 }, { "epoch": 0.7231607996230733, "grad_norm": 0.4726501187779759, "learning_rate": 9.406024498331516e-06, "loss": 0.4313, "step": 8058 }, { "epoch": 0.7232505440757443, "grad_norm": 0.4984567202563336, "learning_rate": 9.40577763401619e-06, "loss": 0.4309, "step": 8059 }, { "epoch": 0.7233402885284154, "grad_norm": 0.5215553709949732, "learning_rate": 9.40553072165234e-06, "loss": 0.4913, "step": 8060 }, { "epoch": 0.7234300329810863, "grad_norm": 0.5225538039714953, "learning_rate": 9.405283761242663e-06, "loss": 0.5018, "step": 8061 }, { "epoch": 0.7235197774337574, "grad_norm": 0.48478228793974415, "learning_rate": 9.405036752789848e-06, "loss": 0.4568, "step": 8062 }, { "epoch": 0.7236095218864284, "grad_norm": 0.4474051015964186, "learning_rate": 9.404789696296594e-06, "loss": 0.3863, "step": 8063 }, { "epoch": 0.7236992663390994, "grad_norm": 0.5485215343239028, "learning_rate": 9.40454259176559e-06, "loss": 0.4993, "step": 8064 }, { "epoch": 0.7237890107917704, "grad_norm": 0.4722530658140663, "learning_rate": 9.404295439199535e-06, "loss": 0.4134, "step": 8065 }, { "epoch": 0.7238787552444415, "grad_norm": 0.4442974525783165, "learning_rate": 9.404048238601122e-06, "loss": 0.3967, "step": 8066 }, { "epoch": 0.7239684996971125, "grad_norm": 0.48515392645988675, "learning_rate": 9.403800989973047e-06, "loss": 0.455, "step": 8067 }, { "epoch": 0.7240582441497835, "grad_norm": 0.5083537538076239, "learning_rate": 9.403553693318008e-06, "loss": 0.4564, "step": 8068 }, { "epoch": 0.7241479886024546, "grad_norm": 0.48215942427135144, "learning_rate": 9.4033063486387e-06, "loss": 0.4459, "step": 8069 }, { "epoch": 0.7242377330551255, "grad_norm": 0.4833694417423055, "learning_rate": 9.403058955937823e-06, "loss": 0.4423, "step": 8070 }, { "epoch": 0.7243274775077966, "grad_norm": 0.4872047010180895, "learning_rate": 9.40281151521807e-06, "loss": 0.4267, "step": 8071 }, { "epoch": 0.7244172219604675, "grad_norm": 0.4777442198866572, "learning_rate": 9.402564026482147e-06, "loss": 0.3973, "step": 8072 }, { "epoch": 0.7245069664131386, "grad_norm": 0.4874616404234394, "learning_rate": 9.402316489732748e-06, "loss": 0.4732, "step": 8073 }, { "epoch": 0.7245967108658096, "grad_norm": 0.5561954681067822, "learning_rate": 9.402068904972573e-06, "loss": 0.566, "step": 8074 }, { "epoch": 0.7246864553184806, "grad_norm": 0.4190303343711252, "learning_rate": 9.401821272204323e-06, "loss": 0.3824, "step": 8075 }, { "epoch": 0.7247761997711516, "grad_norm": 0.45651870167927827, "learning_rate": 9.401573591430699e-06, "loss": 0.4372, "step": 8076 }, { "epoch": 0.7248659442238227, "grad_norm": 0.48160993964616683, "learning_rate": 9.401325862654403e-06, "loss": 0.432, "step": 8077 }, { "epoch": 0.7249556886764937, "grad_norm": 0.48049530240730776, "learning_rate": 9.401078085878133e-06, "loss": 0.4471, "step": 8078 }, { "epoch": 0.7250454331291647, "grad_norm": 0.5076210014039271, "learning_rate": 9.400830261104594e-06, "loss": 0.4701, "step": 8079 }, { "epoch": 0.7251351775818358, "grad_norm": 0.46802581992090453, "learning_rate": 9.400582388336488e-06, "loss": 0.4341, "step": 8080 }, { "epoch": 0.7252249220345067, "grad_norm": 0.493607785367902, "learning_rate": 9.400334467576519e-06, "loss": 0.4302, "step": 8081 }, { "epoch": 0.7253146664871778, "grad_norm": 0.5210418221683526, "learning_rate": 9.40008649882739e-06, "loss": 0.4868, "step": 8082 }, { "epoch": 0.7254044109398488, "grad_norm": 0.47976386097521456, "learning_rate": 9.399838482091805e-06, "loss": 0.4426, "step": 8083 }, { "epoch": 0.7254941553925198, "grad_norm": 0.5082676216159774, "learning_rate": 9.399590417372471e-06, "loss": 0.5281, "step": 8084 }, { "epoch": 0.7255838998451908, "grad_norm": 0.4465514524507587, "learning_rate": 9.399342304672091e-06, "loss": 0.4152, "step": 8085 }, { "epoch": 0.7256736442978619, "grad_norm": 0.49353616671044, "learning_rate": 9.399094143993371e-06, "loss": 0.4902, "step": 8086 }, { "epoch": 0.7257633887505328, "grad_norm": 0.5016130633663921, "learning_rate": 9.398845935339019e-06, "loss": 0.4793, "step": 8087 }, { "epoch": 0.7258531332032039, "grad_norm": 0.49340514050505835, "learning_rate": 9.398597678711741e-06, "loss": 0.4793, "step": 8088 }, { "epoch": 0.7259428776558748, "grad_norm": 0.5051036926850582, "learning_rate": 9.398349374114241e-06, "loss": 0.443, "step": 8089 }, { "epoch": 0.7260326221085459, "grad_norm": 0.468571305756267, "learning_rate": 9.398101021549235e-06, "loss": 0.4186, "step": 8090 }, { "epoch": 0.726122366561217, "grad_norm": 0.4829608573031535, "learning_rate": 9.397852621019423e-06, "loss": 0.432, "step": 8091 }, { "epoch": 0.7262121110138879, "grad_norm": 0.4876988093897019, "learning_rate": 9.39760417252752e-06, "loss": 0.4553, "step": 8092 }, { "epoch": 0.726301855466559, "grad_norm": 0.5322489615837176, "learning_rate": 9.397355676076233e-06, "loss": 0.5038, "step": 8093 }, { "epoch": 0.72639159991923, "grad_norm": 0.4872425284043344, "learning_rate": 9.39710713166827e-06, "loss": 0.4337, "step": 8094 }, { "epoch": 0.726481344371901, "grad_norm": 0.4834694483972693, "learning_rate": 9.396858539306348e-06, "loss": 0.4605, "step": 8095 }, { "epoch": 0.726571088824572, "grad_norm": 0.46972336856783564, "learning_rate": 9.396609898993172e-06, "loss": 0.4292, "step": 8096 }, { "epoch": 0.7266608332772431, "grad_norm": 0.4965888717981716, "learning_rate": 9.396361210731455e-06, "loss": 0.4336, "step": 8097 }, { "epoch": 0.726750577729914, "grad_norm": 0.4698162868968451, "learning_rate": 9.396112474523911e-06, "loss": 0.4059, "step": 8098 }, { "epoch": 0.7268403221825851, "grad_norm": 0.4594700207141629, "learning_rate": 9.39586369037325e-06, "loss": 0.4122, "step": 8099 }, { "epoch": 0.7269300666352561, "grad_norm": 0.5025585619057158, "learning_rate": 9.395614858282187e-06, "loss": 0.5103, "step": 8100 }, { "epoch": 0.7270198110879271, "grad_norm": 0.5284859174702868, "learning_rate": 9.395365978253437e-06, "loss": 0.489, "step": 8101 }, { "epoch": 0.7271095555405982, "grad_norm": 0.49533514986656774, "learning_rate": 9.39511705028971e-06, "loss": 0.4626, "step": 8102 }, { "epoch": 0.7271992999932692, "grad_norm": 0.49801690160621237, "learning_rate": 9.394868074393726e-06, "loss": 0.4695, "step": 8103 }, { "epoch": 0.7272890444459402, "grad_norm": 0.4901172928847896, "learning_rate": 9.394619050568197e-06, "loss": 0.4617, "step": 8104 }, { "epoch": 0.7273787888986112, "grad_norm": 0.472079613791318, "learning_rate": 9.394369978815839e-06, "loss": 0.4429, "step": 8105 }, { "epoch": 0.7274685333512823, "grad_norm": 0.5270580423874626, "learning_rate": 9.394120859139367e-06, "loss": 0.509, "step": 8106 }, { "epoch": 0.7275582778039532, "grad_norm": 0.5267865319331608, "learning_rate": 9.3938716915415e-06, "loss": 0.4409, "step": 8107 }, { "epoch": 0.7276480222566243, "grad_norm": 0.48131015543107575, "learning_rate": 9.393622476024957e-06, "loss": 0.4262, "step": 8108 }, { "epoch": 0.7277377667092952, "grad_norm": 0.49804708420042515, "learning_rate": 9.393373212592454e-06, "loss": 0.4612, "step": 8109 }, { "epoch": 0.7278275111619663, "grad_norm": 0.4995675827815659, "learning_rate": 9.393123901246708e-06, "loss": 0.4673, "step": 8110 }, { "epoch": 0.7279172556146373, "grad_norm": 0.4383898420665898, "learning_rate": 9.39287454199044e-06, "loss": 0.4151, "step": 8111 }, { "epoch": 0.7280070000673083, "grad_norm": 0.5301861991594967, "learning_rate": 9.392625134826368e-06, "loss": 0.5314, "step": 8112 }, { "epoch": 0.7280967445199794, "grad_norm": 0.47891073471733975, "learning_rate": 9.392375679757213e-06, "loss": 0.4151, "step": 8113 }, { "epoch": 0.7281864889726504, "grad_norm": 0.5055242678425327, "learning_rate": 9.392126176785694e-06, "loss": 0.4714, "step": 8114 }, { "epoch": 0.7282762334253214, "grad_norm": 0.46816823483355513, "learning_rate": 9.391876625914536e-06, "loss": 0.4264, "step": 8115 }, { "epoch": 0.7283659778779924, "grad_norm": 0.5152295949068957, "learning_rate": 9.391627027146454e-06, "loss": 0.4566, "step": 8116 }, { "epoch": 0.7284557223306635, "grad_norm": 0.50692874474284, "learning_rate": 9.391377380484176e-06, "loss": 0.4589, "step": 8117 }, { "epoch": 0.7285454667833344, "grad_norm": 0.48779317652570137, "learning_rate": 9.391127685930423e-06, "loss": 0.4194, "step": 8118 }, { "epoch": 0.7286352112360055, "grad_norm": 0.5047596249245179, "learning_rate": 9.390877943487917e-06, "loss": 0.5421, "step": 8119 }, { "epoch": 0.7287249556886765, "grad_norm": 0.49177230786726966, "learning_rate": 9.390628153159382e-06, "loss": 0.4775, "step": 8120 }, { "epoch": 0.7288147001413475, "grad_norm": 0.474648959754384, "learning_rate": 9.390378314947543e-06, "loss": 0.4445, "step": 8121 }, { "epoch": 0.7289044445940185, "grad_norm": 0.4580723843972636, "learning_rate": 9.390128428855122e-06, "loss": 0.4253, "step": 8122 }, { "epoch": 0.7289941890466896, "grad_norm": 0.5178142939604041, "learning_rate": 9.38987849488485e-06, "loss": 0.5568, "step": 8123 }, { "epoch": 0.7290839334993606, "grad_norm": 0.5009454367076797, "learning_rate": 9.389628513039446e-06, "loss": 0.4808, "step": 8124 }, { "epoch": 0.7291736779520316, "grad_norm": 0.48861643857579506, "learning_rate": 9.38937848332164e-06, "loss": 0.4656, "step": 8125 }, { "epoch": 0.7292634224047027, "grad_norm": 0.4861923479549222, "learning_rate": 9.389128405734158e-06, "loss": 0.4575, "step": 8126 }, { "epoch": 0.7293531668573736, "grad_norm": 0.45945768784317426, "learning_rate": 9.388878280279727e-06, "loss": 0.4347, "step": 8127 }, { "epoch": 0.7294429113100447, "grad_norm": 0.4565329948794127, "learning_rate": 9.388628106961074e-06, "loss": 0.4153, "step": 8128 }, { "epoch": 0.7295326557627156, "grad_norm": 0.5174303552503763, "learning_rate": 9.38837788578093e-06, "loss": 0.4544, "step": 8129 }, { "epoch": 0.7296224002153867, "grad_norm": 0.46122402245605776, "learning_rate": 9.388127616742024e-06, "loss": 0.3909, "step": 8130 }, { "epoch": 0.7297121446680577, "grad_norm": 0.5000052056189898, "learning_rate": 9.38787729984708e-06, "loss": 0.4614, "step": 8131 }, { "epoch": 0.7298018891207287, "grad_norm": 0.5000329660574703, "learning_rate": 9.387626935098835e-06, "loss": 0.4781, "step": 8132 }, { "epoch": 0.7298916335733997, "grad_norm": 0.4588258731977786, "learning_rate": 9.387376522500014e-06, "loss": 0.4353, "step": 8133 }, { "epoch": 0.7299813780260708, "grad_norm": 0.4458722376389439, "learning_rate": 9.38712606205335e-06, "loss": 0.3888, "step": 8134 }, { "epoch": 0.7300711224787417, "grad_norm": 0.5344485859134981, "learning_rate": 9.386875553761577e-06, "loss": 0.5187, "step": 8135 }, { "epoch": 0.7301608669314128, "grad_norm": 0.5148531594630464, "learning_rate": 9.386624997627421e-06, "loss": 0.4525, "step": 8136 }, { "epoch": 0.7302506113840839, "grad_norm": 0.5124825029310036, "learning_rate": 9.386374393653618e-06, "loss": 0.5063, "step": 8137 }, { "epoch": 0.7303403558367548, "grad_norm": 0.4853366904485867, "learning_rate": 9.386123741842905e-06, "loss": 0.4472, "step": 8138 }, { "epoch": 0.7304301002894259, "grad_norm": 0.49635865206463015, "learning_rate": 9.385873042198008e-06, "loss": 0.4448, "step": 8139 }, { "epoch": 0.7305198447420969, "grad_norm": 0.4576206134558815, "learning_rate": 9.385622294721665e-06, "loss": 0.3871, "step": 8140 }, { "epoch": 0.7306095891947679, "grad_norm": 0.48100738434607476, "learning_rate": 9.385371499416611e-06, "loss": 0.4539, "step": 8141 }, { "epoch": 0.7306993336474389, "grad_norm": 0.5116340793404106, "learning_rate": 9.38512065628558e-06, "loss": 0.4724, "step": 8142 }, { "epoch": 0.73078907810011, "grad_norm": 0.5052519343897317, "learning_rate": 9.384869765331308e-06, "loss": 0.4865, "step": 8143 }, { "epoch": 0.7308788225527809, "grad_norm": 0.5543068312336633, "learning_rate": 9.384618826556531e-06, "loss": 0.5221, "step": 8144 }, { "epoch": 0.730968567005452, "grad_norm": 0.5349771133276712, "learning_rate": 9.384367839963987e-06, "loss": 0.5275, "step": 8145 }, { "epoch": 0.731058311458123, "grad_norm": 0.4488330943684549, "learning_rate": 9.384116805556411e-06, "loss": 0.3922, "step": 8146 }, { "epoch": 0.731148055910794, "grad_norm": 0.4879131689847977, "learning_rate": 9.383865723336543e-06, "loss": 0.4748, "step": 8147 }, { "epoch": 0.7312378003634651, "grad_norm": 0.5304163579501009, "learning_rate": 9.383614593307117e-06, "loss": 0.5537, "step": 8148 }, { "epoch": 0.731327544816136, "grad_norm": 0.5515099718889751, "learning_rate": 9.383363415470878e-06, "loss": 0.4968, "step": 8149 }, { "epoch": 0.7314172892688071, "grad_norm": 0.5221671774745845, "learning_rate": 9.38311218983056e-06, "loss": 0.483, "step": 8150 }, { "epoch": 0.7315070337214781, "grad_norm": 0.49128377573367277, "learning_rate": 9.382860916388908e-06, "loss": 0.4494, "step": 8151 }, { "epoch": 0.7315967781741491, "grad_norm": 0.5076302060206918, "learning_rate": 9.382609595148658e-06, "loss": 0.49, "step": 8152 }, { "epoch": 0.7316865226268201, "grad_norm": 0.5229524982363555, "learning_rate": 9.38235822611255e-06, "loss": 0.4953, "step": 8153 }, { "epoch": 0.7317762670794912, "grad_norm": 0.5723230342615969, "learning_rate": 9.382106809283329e-06, "loss": 0.5583, "step": 8154 }, { "epoch": 0.7318660115321621, "grad_norm": 0.4889419881688966, "learning_rate": 9.381855344663735e-06, "loss": 0.4109, "step": 8155 }, { "epoch": 0.7319557559848332, "grad_norm": 0.5087217718033203, "learning_rate": 9.381603832256512e-06, "loss": 0.4514, "step": 8156 }, { "epoch": 0.7320455004375042, "grad_norm": 0.45037392333088144, "learning_rate": 9.381352272064401e-06, "loss": 0.3993, "step": 8157 }, { "epoch": 0.7321352448901752, "grad_norm": 0.508249925378231, "learning_rate": 9.381100664090145e-06, "loss": 0.4433, "step": 8158 }, { "epoch": 0.7322249893428463, "grad_norm": 0.516569377158553, "learning_rate": 9.38084900833649e-06, "loss": 0.4464, "step": 8159 }, { "epoch": 0.7323147337955173, "grad_norm": 0.4939433281390685, "learning_rate": 9.38059730480618e-06, "loss": 0.4471, "step": 8160 }, { "epoch": 0.7324044782481883, "grad_norm": 0.4934305048537468, "learning_rate": 9.38034555350196e-06, "loss": 0.4132, "step": 8161 }, { "epoch": 0.7324942227008593, "grad_norm": 0.49305081666689227, "learning_rate": 9.380093754426575e-06, "loss": 0.414, "step": 8162 }, { "epoch": 0.7325839671535304, "grad_norm": 0.5350531865334602, "learning_rate": 9.379841907582772e-06, "loss": 0.5386, "step": 8163 }, { "epoch": 0.7326737116062013, "grad_norm": 0.532776968064933, "learning_rate": 9.379590012973296e-06, "loss": 0.48, "step": 8164 }, { "epoch": 0.7327634560588724, "grad_norm": 0.4943673138221664, "learning_rate": 9.379338070600897e-06, "loss": 0.5027, "step": 8165 }, { "epoch": 0.7328532005115433, "grad_norm": 0.5069384639069758, "learning_rate": 9.37908608046832e-06, "loss": 0.5003, "step": 8166 }, { "epoch": 0.7329429449642144, "grad_norm": 0.4985163359451287, "learning_rate": 9.378834042578314e-06, "loss": 0.4416, "step": 8167 }, { "epoch": 0.7330326894168854, "grad_norm": 0.5042390086655824, "learning_rate": 9.378581956933626e-06, "loss": 0.4021, "step": 8168 }, { "epoch": 0.7331224338695564, "grad_norm": 0.5274382154913831, "learning_rate": 9.378329823537006e-06, "loss": 0.5404, "step": 8169 }, { "epoch": 0.7332121783222274, "grad_norm": 0.4778270016902781, "learning_rate": 9.378077642391206e-06, "loss": 0.4349, "step": 8170 }, { "epoch": 0.7333019227748985, "grad_norm": 0.4840092009489785, "learning_rate": 9.377825413498976e-06, "loss": 0.4706, "step": 8171 }, { "epoch": 0.7333916672275695, "grad_norm": 0.539652684970077, "learning_rate": 9.377573136863063e-06, "loss": 0.4847, "step": 8172 }, { "epoch": 0.7334814116802405, "grad_norm": 0.5222617211109424, "learning_rate": 9.377320812486223e-06, "loss": 0.4313, "step": 8173 }, { "epoch": 0.7335711561329116, "grad_norm": 0.49357668308145863, "learning_rate": 9.377068440371204e-06, "loss": 0.4951, "step": 8174 }, { "epoch": 0.7336609005855825, "grad_norm": 0.4523131721516528, "learning_rate": 9.37681602052076e-06, "loss": 0.4252, "step": 8175 }, { "epoch": 0.7337506450382536, "grad_norm": 0.44323079963954054, "learning_rate": 9.376563552937644e-06, "loss": 0.3938, "step": 8176 }, { "epoch": 0.7338403894909246, "grad_norm": 0.512745076705181, "learning_rate": 9.37631103762461e-06, "loss": 0.4673, "step": 8177 }, { "epoch": 0.7339301339435956, "grad_norm": 0.49375281785399894, "learning_rate": 9.376058474584409e-06, "loss": 0.4673, "step": 8178 }, { "epoch": 0.7340198783962666, "grad_norm": 0.5155468349404242, "learning_rate": 9.375805863819798e-06, "loss": 0.5109, "step": 8179 }, { "epoch": 0.7341096228489377, "grad_norm": 0.4814250216164783, "learning_rate": 9.375553205333531e-06, "loss": 0.448, "step": 8180 }, { "epoch": 0.7341993673016086, "grad_norm": 0.5165419143441763, "learning_rate": 9.375300499128365e-06, "loss": 0.527, "step": 8181 }, { "epoch": 0.7342891117542797, "grad_norm": 0.4589138862509177, "learning_rate": 9.375047745207054e-06, "loss": 0.3913, "step": 8182 }, { "epoch": 0.7343788562069508, "grad_norm": 0.4998218915556342, "learning_rate": 9.374794943572354e-06, "loss": 0.4887, "step": 8183 }, { "epoch": 0.7344686006596217, "grad_norm": 0.5008983312044369, "learning_rate": 9.374542094227025e-06, "loss": 0.4395, "step": 8184 }, { "epoch": 0.7345583451122928, "grad_norm": 0.48201982375595803, "learning_rate": 9.374289197173821e-06, "loss": 0.4902, "step": 8185 }, { "epoch": 0.7346480895649637, "grad_norm": 0.534184167480608, "learning_rate": 9.374036252415502e-06, "loss": 0.5416, "step": 8186 }, { "epoch": 0.7347378340176348, "grad_norm": 0.48092490482577815, "learning_rate": 9.373783259954827e-06, "loss": 0.4443, "step": 8187 }, { "epoch": 0.7348275784703058, "grad_norm": 0.47299373252314747, "learning_rate": 9.373530219794555e-06, "loss": 0.4427, "step": 8188 }, { "epoch": 0.7349173229229768, "grad_norm": 0.5108028396603054, "learning_rate": 9.373277131937444e-06, "loss": 0.4805, "step": 8189 }, { "epoch": 0.7350070673756478, "grad_norm": 0.548193168600697, "learning_rate": 9.373023996386255e-06, "loss": 0.5021, "step": 8190 }, { "epoch": 0.7350968118283189, "grad_norm": 0.5374910261239885, "learning_rate": 9.37277081314375e-06, "loss": 0.5359, "step": 8191 }, { "epoch": 0.7351865562809898, "grad_norm": 0.4900807576643982, "learning_rate": 9.372517582212688e-06, "loss": 0.4475, "step": 8192 }, { "epoch": 0.7352763007336609, "grad_norm": 0.47834978789946614, "learning_rate": 9.37226430359583e-06, "loss": 0.4185, "step": 8193 }, { "epoch": 0.735366045186332, "grad_norm": 0.4751840939263819, "learning_rate": 9.372010977295942e-06, "loss": 0.4278, "step": 8194 }, { "epoch": 0.7354557896390029, "grad_norm": 0.48033584028681864, "learning_rate": 9.371757603315783e-06, "loss": 0.4089, "step": 8195 }, { "epoch": 0.735545534091674, "grad_norm": 0.461572003294869, "learning_rate": 9.371504181658119e-06, "loss": 0.4198, "step": 8196 }, { "epoch": 0.735635278544345, "grad_norm": 0.5509920042557149, "learning_rate": 9.371250712325712e-06, "loss": 0.4751, "step": 8197 }, { "epoch": 0.735725022997016, "grad_norm": 0.4613037770493627, "learning_rate": 9.370997195321328e-06, "loss": 0.4286, "step": 8198 }, { "epoch": 0.735814767449687, "grad_norm": 0.4793892472165293, "learning_rate": 9.370743630647728e-06, "loss": 0.4681, "step": 8199 }, { "epoch": 0.7359045119023581, "grad_norm": 0.46836921046921226, "learning_rate": 9.370490018307683e-06, "loss": 0.436, "step": 8200 }, { "epoch": 0.735994256355029, "grad_norm": 0.4923555498933906, "learning_rate": 9.370236358303953e-06, "loss": 0.4594, "step": 8201 }, { "epoch": 0.7360840008077001, "grad_norm": 0.49137431775997187, "learning_rate": 9.36998265063931e-06, "loss": 0.4679, "step": 8202 }, { "epoch": 0.736173745260371, "grad_norm": 0.49961855605289196, "learning_rate": 9.369728895316515e-06, "loss": 0.4308, "step": 8203 }, { "epoch": 0.7362634897130421, "grad_norm": 0.5112004270207181, "learning_rate": 9.369475092338339e-06, "loss": 0.4825, "step": 8204 }, { "epoch": 0.7363532341657131, "grad_norm": 0.5124288551574758, "learning_rate": 9.36922124170755e-06, "loss": 0.5314, "step": 8205 }, { "epoch": 0.7364429786183841, "grad_norm": 0.47553825752791923, "learning_rate": 9.368967343426918e-06, "loss": 0.4497, "step": 8206 }, { "epoch": 0.7365327230710552, "grad_norm": 0.4891744210466235, "learning_rate": 9.368713397499207e-06, "loss": 0.4591, "step": 8207 }, { "epoch": 0.7366224675237262, "grad_norm": 0.5670657866035062, "learning_rate": 9.36845940392719e-06, "loss": 0.4987, "step": 8208 }, { "epoch": 0.7367122119763972, "grad_norm": 0.49476152310117444, "learning_rate": 9.368205362713636e-06, "loss": 0.466, "step": 8209 }, { "epoch": 0.7368019564290682, "grad_norm": 0.48233764276122504, "learning_rate": 9.367951273861315e-06, "loss": 0.463, "step": 8210 }, { "epoch": 0.7368917008817393, "grad_norm": 0.5043762702933087, "learning_rate": 9.367697137373001e-06, "loss": 0.5298, "step": 8211 }, { "epoch": 0.7369814453344102, "grad_norm": 0.4562874176023181, "learning_rate": 9.367442953251463e-06, "loss": 0.4559, "step": 8212 }, { "epoch": 0.7370711897870813, "grad_norm": 0.48382379845238827, "learning_rate": 9.367188721499472e-06, "loss": 0.4505, "step": 8213 }, { "epoch": 0.7371609342397523, "grad_norm": 0.4387577483417475, "learning_rate": 9.366934442119804e-06, "loss": 0.3931, "step": 8214 }, { "epoch": 0.7372506786924233, "grad_norm": 0.4695068360190249, "learning_rate": 9.366680115115228e-06, "loss": 0.4371, "step": 8215 }, { "epoch": 0.7373404231450943, "grad_norm": 0.4903330106303921, "learning_rate": 9.36642574048852e-06, "loss": 0.4743, "step": 8216 }, { "epoch": 0.7374301675977654, "grad_norm": 0.5319057737742177, "learning_rate": 9.366171318242456e-06, "loss": 0.4587, "step": 8217 }, { "epoch": 0.7375199120504364, "grad_norm": 0.4689898267293773, "learning_rate": 9.365916848379808e-06, "loss": 0.4496, "step": 8218 }, { "epoch": 0.7376096565031074, "grad_norm": 0.4822885116125501, "learning_rate": 9.365662330903352e-06, "loss": 0.4308, "step": 8219 }, { "epoch": 0.7376994009557785, "grad_norm": 0.4869031021376841, "learning_rate": 9.365407765815862e-06, "loss": 0.421, "step": 8220 }, { "epoch": 0.7377891454084494, "grad_norm": 0.49191883068932213, "learning_rate": 9.365153153120117e-06, "loss": 0.4618, "step": 8221 }, { "epoch": 0.7378788898611205, "grad_norm": 0.47547165168300387, "learning_rate": 9.364898492818892e-06, "loss": 0.4377, "step": 8222 }, { "epoch": 0.7379686343137914, "grad_norm": 0.4776685647393086, "learning_rate": 9.364643784914966e-06, "loss": 0.4467, "step": 8223 }, { "epoch": 0.7380583787664625, "grad_norm": 0.5146156331314786, "learning_rate": 9.364389029411115e-06, "loss": 0.5497, "step": 8224 }, { "epoch": 0.7381481232191335, "grad_norm": 0.4687383102882096, "learning_rate": 9.364134226310118e-06, "loss": 0.4647, "step": 8225 }, { "epoch": 0.7382378676718045, "grad_norm": 0.5088329196338834, "learning_rate": 9.363879375614752e-06, "loss": 0.4471, "step": 8226 }, { "epoch": 0.7383276121244755, "grad_norm": 0.4667763172243339, "learning_rate": 9.3636244773278e-06, "loss": 0.4328, "step": 8227 }, { "epoch": 0.7384173565771466, "grad_norm": 0.5419801221337837, "learning_rate": 9.36336953145204e-06, "loss": 0.5335, "step": 8228 }, { "epoch": 0.7385071010298176, "grad_norm": 0.5025212831039496, "learning_rate": 9.363114537990253e-06, "loss": 0.5264, "step": 8229 }, { "epoch": 0.7385968454824886, "grad_norm": 0.4777858693917717, "learning_rate": 9.362859496945219e-06, "loss": 0.4489, "step": 8230 }, { "epoch": 0.7386865899351597, "grad_norm": 0.45657332189169986, "learning_rate": 9.362604408319718e-06, "loss": 0.438, "step": 8231 }, { "epoch": 0.7387763343878306, "grad_norm": 0.5053739370894598, "learning_rate": 9.362349272116533e-06, "loss": 0.4945, "step": 8232 }, { "epoch": 0.7388660788405017, "grad_norm": 0.4693747110366073, "learning_rate": 9.36209408833845e-06, "loss": 0.4128, "step": 8233 }, { "epoch": 0.7389558232931727, "grad_norm": 0.5549707114575656, "learning_rate": 9.361838856988247e-06, "loss": 0.5463, "step": 8234 }, { "epoch": 0.7390455677458437, "grad_norm": 0.502416701465526, "learning_rate": 9.361583578068711e-06, "loss": 0.4722, "step": 8235 }, { "epoch": 0.7391353121985147, "grad_norm": 0.5254328082515484, "learning_rate": 9.361328251582624e-06, "loss": 0.4712, "step": 8236 }, { "epoch": 0.7392250566511858, "grad_norm": 0.49105173037069594, "learning_rate": 9.361072877532772e-06, "loss": 0.4324, "step": 8237 }, { "epoch": 0.7393148011038567, "grad_norm": 0.49609658513206506, "learning_rate": 9.360817455921938e-06, "loss": 0.5051, "step": 8238 }, { "epoch": 0.7394045455565278, "grad_norm": 0.5069607318931013, "learning_rate": 9.36056198675291e-06, "loss": 0.4415, "step": 8239 }, { "epoch": 0.7394942900091988, "grad_norm": 0.47066644398466095, "learning_rate": 9.360306470028471e-06, "loss": 0.4456, "step": 8240 }, { "epoch": 0.7395840344618698, "grad_norm": 0.4805879990913253, "learning_rate": 9.360050905751413e-06, "loss": 0.4259, "step": 8241 }, { "epoch": 0.7396737789145409, "grad_norm": 0.4520610503777354, "learning_rate": 9.359795293924516e-06, "loss": 0.4193, "step": 8242 }, { "epoch": 0.7397635233672119, "grad_norm": 0.49955286025171153, "learning_rate": 9.359539634550573e-06, "loss": 0.4867, "step": 8243 }, { "epoch": 0.7398532678198829, "grad_norm": 0.49058967333713693, "learning_rate": 9.35928392763237e-06, "loss": 0.4596, "step": 8244 }, { "epoch": 0.7399430122725539, "grad_norm": 0.48787953577907156, "learning_rate": 9.359028173172697e-06, "loss": 0.4683, "step": 8245 }, { "epoch": 0.740032756725225, "grad_norm": 0.49854888876483455, "learning_rate": 9.358772371174341e-06, "loss": 0.4674, "step": 8246 }, { "epoch": 0.7401225011778959, "grad_norm": 0.4818922820149564, "learning_rate": 9.358516521640095e-06, "loss": 0.4748, "step": 8247 }, { "epoch": 0.740212245630567, "grad_norm": 0.4866351080312966, "learning_rate": 9.358260624572746e-06, "loss": 0.4744, "step": 8248 }, { "epoch": 0.7403019900832379, "grad_norm": 0.4677107138444733, "learning_rate": 9.358004679975085e-06, "loss": 0.4446, "step": 8249 }, { "epoch": 0.740391734535909, "grad_norm": 0.4725259771098829, "learning_rate": 9.357748687849906e-06, "loss": 0.4425, "step": 8250 }, { "epoch": 0.74048147898858, "grad_norm": 0.4636616979743222, "learning_rate": 9.357492648199999e-06, "loss": 0.4486, "step": 8251 }, { "epoch": 0.740571223441251, "grad_norm": 0.4955620536004186, "learning_rate": 9.357236561028155e-06, "loss": 0.4529, "step": 8252 }, { "epoch": 0.7406609678939221, "grad_norm": 0.5026004028371898, "learning_rate": 9.35698042633717e-06, "loss": 0.5251, "step": 8253 }, { "epoch": 0.7407507123465931, "grad_norm": 0.5231624905040454, "learning_rate": 9.356724244129836e-06, "loss": 0.5499, "step": 8254 }, { "epoch": 0.7408404567992641, "grad_norm": 0.5453619908765289, "learning_rate": 9.356468014408945e-06, "loss": 0.5072, "step": 8255 }, { "epoch": 0.7409302012519351, "grad_norm": 0.4704054026570546, "learning_rate": 9.356211737177294e-06, "loss": 0.4077, "step": 8256 }, { "epoch": 0.7410199457046062, "grad_norm": 0.4869158892098052, "learning_rate": 9.355955412437678e-06, "loss": 0.4229, "step": 8257 }, { "epoch": 0.7411096901572771, "grad_norm": 0.5060816502555404, "learning_rate": 9.355699040192889e-06, "loss": 0.4926, "step": 8258 }, { "epoch": 0.7411994346099482, "grad_norm": 0.45593724385229784, "learning_rate": 9.355442620445727e-06, "loss": 0.4075, "step": 8259 }, { "epoch": 0.7412891790626192, "grad_norm": 0.4843219140249888, "learning_rate": 9.355186153198985e-06, "loss": 0.4451, "step": 8260 }, { "epoch": 0.7413789235152902, "grad_norm": 0.5149050286875412, "learning_rate": 9.354929638455463e-06, "loss": 0.4732, "step": 8261 }, { "epoch": 0.7414686679679612, "grad_norm": 0.4808987488182198, "learning_rate": 9.354673076217957e-06, "loss": 0.416, "step": 8262 }, { "epoch": 0.7415584124206323, "grad_norm": 0.44998683356079133, "learning_rate": 9.354416466489264e-06, "loss": 0.3644, "step": 8263 }, { "epoch": 0.7416481568733033, "grad_norm": 0.470963764972944, "learning_rate": 9.354159809272183e-06, "loss": 0.4312, "step": 8264 }, { "epoch": 0.7417379013259743, "grad_norm": 0.48225796413441196, "learning_rate": 9.353903104569518e-06, "loss": 0.45, "step": 8265 }, { "epoch": 0.7418276457786454, "grad_norm": 0.4757934076680213, "learning_rate": 9.35364635238406e-06, "loss": 0.4718, "step": 8266 }, { "epoch": 0.7419173902313163, "grad_norm": 0.5143409018948208, "learning_rate": 9.353389552718615e-06, "loss": 0.5117, "step": 8267 }, { "epoch": 0.7420071346839874, "grad_norm": 0.5009146069178722, "learning_rate": 9.353132705575983e-06, "loss": 0.4343, "step": 8268 }, { "epoch": 0.7420968791366583, "grad_norm": 0.5075380767697738, "learning_rate": 9.352875810958962e-06, "loss": 0.4862, "step": 8269 }, { "epoch": 0.7421866235893294, "grad_norm": 0.48568117281318335, "learning_rate": 9.352618868870356e-06, "loss": 0.4692, "step": 8270 }, { "epoch": 0.7422763680420004, "grad_norm": 0.49948697152343585, "learning_rate": 9.352361879312968e-06, "loss": 0.4453, "step": 8271 }, { "epoch": 0.7423661124946714, "grad_norm": 0.5042135969395762, "learning_rate": 9.352104842289599e-06, "loss": 0.4557, "step": 8272 }, { "epoch": 0.7424558569473424, "grad_norm": 0.43930770344045394, "learning_rate": 9.351847757803054e-06, "loss": 0.426, "step": 8273 }, { "epoch": 0.7425456014000135, "grad_norm": 0.4858953988337628, "learning_rate": 9.351590625856136e-06, "loss": 0.4711, "step": 8274 }, { "epoch": 0.7426353458526845, "grad_norm": 0.4561282852795243, "learning_rate": 9.351333446451647e-06, "loss": 0.4056, "step": 8275 }, { "epoch": 0.7427250903053555, "grad_norm": 0.4990568005218498, "learning_rate": 9.351076219592394e-06, "loss": 0.4462, "step": 8276 }, { "epoch": 0.7428148347580266, "grad_norm": 0.47054813082645536, "learning_rate": 9.350818945281181e-06, "loss": 0.4225, "step": 8277 }, { "epoch": 0.7429045792106975, "grad_norm": 0.521158117621682, "learning_rate": 9.350561623520815e-06, "loss": 0.4996, "step": 8278 }, { "epoch": 0.7429943236633686, "grad_norm": 0.454248392426544, "learning_rate": 9.350304254314102e-06, "loss": 0.4234, "step": 8279 }, { "epoch": 0.7430840681160396, "grad_norm": 0.4866994433804172, "learning_rate": 9.35004683766385e-06, "loss": 0.4392, "step": 8280 }, { "epoch": 0.7431738125687106, "grad_norm": 0.5307040021788734, "learning_rate": 9.349789373572865e-06, "loss": 0.4991, "step": 8281 }, { "epoch": 0.7432635570213816, "grad_norm": 0.5254958590575343, "learning_rate": 9.349531862043952e-06, "loss": 0.4938, "step": 8282 }, { "epoch": 0.7433533014740527, "grad_norm": 0.4582442620012527, "learning_rate": 9.349274303079924e-06, "loss": 0.4117, "step": 8283 }, { "epoch": 0.7434430459267236, "grad_norm": 0.4876103545431083, "learning_rate": 9.349016696683588e-06, "loss": 0.4217, "step": 8284 }, { "epoch": 0.7435327903793947, "grad_norm": 0.4747544684890091, "learning_rate": 9.348759042857753e-06, "loss": 0.4306, "step": 8285 }, { "epoch": 0.7436225348320656, "grad_norm": 0.5341354638776974, "learning_rate": 9.348501341605231e-06, "loss": 0.5313, "step": 8286 }, { "epoch": 0.7437122792847367, "grad_norm": 0.4807758998801543, "learning_rate": 9.34824359292883e-06, "loss": 0.4508, "step": 8287 }, { "epoch": 0.7438020237374078, "grad_norm": 0.46629776433429865, "learning_rate": 9.347985796831361e-06, "loss": 0.4567, "step": 8288 }, { "epoch": 0.7438917681900787, "grad_norm": 0.5235685421701379, "learning_rate": 9.347727953315637e-06, "loss": 0.4777, "step": 8289 }, { "epoch": 0.7439815126427498, "grad_norm": 0.44683406132222114, "learning_rate": 9.347470062384469e-06, "loss": 0.4268, "step": 8290 }, { "epoch": 0.7440712570954208, "grad_norm": 0.45601484651196106, "learning_rate": 9.34721212404067e-06, "loss": 0.4334, "step": 8291 }, { "epoch": 0.7441610015480918, "grad_norm": 0.48928574251263746, "learning_rate": 9.346954138287053e-06, "loss": 0.4517, "step": 8292 }, { "epoch": 0.7442507460007628, "grad_norm": 0.5194555319668629, "learning_rate": 9.346696105126432e-06, "loss": 0.4714, "step": 8293 }, { "epoch": 0.7443404904534339, "grad_norm": 0.4723339371693163, "learning_rate": 9.346438024561617e-06, "loss": 0.456, "step": 8294 }, { "epoch": 0.7444302349061048, "grad_norm": 0.4663218528751449, "learning_rate": 9.34617989659543e-06, "loss": 0.4402, "step": 8295 }, { "epoch": 0.7445199793587759, "grad_norm": 0.5355129230578739, "learning_rate": 9.34592172123068e-06, "loss": 0.49, "step": 8296 }, { "epoch": 0.7446097238114469, "grad_norm": 0.5465800356552836, "learning_rate": 9.345663498470185e-06, "loss": 0.4576, "step": 8297 }, { "epoch": 0.7446994682641179, "grad_norm": 0.4970512544697642, "learning_rate": 9.345405228316763e-06, "loss": 0.4508, "step": 8298 }, { "epoch": 0.744789212716789, "grad_norm": 0.4775197502739895, "learning_rate": 9.345146910773226e-06, "loss": 0.4001, "step": 8299 }, { "epoch": 0.74487895716946, "grad_norm": 0.5112365190264524, "learning_rate": 9.344888545842393e-06, "loss": 0.4597, "step": 8300 }, { "epoch": 0.744968701622131, "grad_norm": 0.520700470537079, "learning_rate": 9.344630133527084e-06, "loss": 0.5239, "step": 8301 }, { "epoch": 0.745058446074802, "grad_norm": 0.4626300918367458, "learning_rate": 9.344371673830114e-06, "loss": 0.391, "step": 8302 }, { "epoch": 0.745148190527473, "grad_norm": 0.5038110217734386, "learning_rate": 9.344113166754305e-06, "loss": 0.4802, "step": 8303 }, { "epoch": 0.745237934980144, "grad_norm": 0.46131390852094156, "learning_rate": 9.343854612302473e-06, "loss": 0.4286, "step": 8304 }, { "epoch": 0.7453276794328151, "grad_norm": 0.49571599111980413, "learning_rate": 9.34359601047744e-06, "loss": 0.4648, "step": 8305 }, { "epoch": 0.745417423885486, "grad_norm": 0.49675958135241577, "learning_rate": 9.343337361282026e-06, "loss": 0.4303, "step": 8306 }, { "epoch": 0.7455071683381571, "grad_norm": 0.47810782483829806, "learning_rate": 9.34307866471905e-06, "loss": 0.4483, "step": 8307 }, { "epoch": 0.7455969127908281, "grad_norm": 0.5198285331951705, "learning_rate": 9.342819920791335e-06, "loss": 0.5185, "step": 8308 }, { "epoch": 0.7456866572434991, "grad_norm": 0.46770062329396944, "learning_rate": 9.342561129501701e-06, "loss": 0.4172, "step": 8309 }, { "epoch": 0.7457764016961702, "grad_norm": 0.5369758614403652, "learning_rate": 9.342302290852973e-06, "loss": 0.4929, "step": 8310 }, { "epoch": 0.7458661461488412, "grad_norm": 0.48217847001198005, "learning_rate": 9.342043404847971e-06, "loss": 0.4094, "step": 8311 }, { "epoch": 0.7459558906015122, "grad_norm": 0.4786330714015731, "learning_rate": 9.341784471489522e-06, "loss": 0.4216, "step": 8312 }, { "epoch": 0.7460456350541832, "grad_norm": 0.5296105485395848, "learning_rate": 9.341525490780446e-06, "loss": 0.4934, "step": 8313 }, { "epoch": 0.7461353795068543, "grad_norm": 0.45249840477438796, "learning_rate": 9.341266462723569e-06, "loss": 0.3668, "step": 8314 }, { "epoch": 0.7462251239595252, "grad_norm": 0.4940987738205376, "learning_rate": 9.341007387321717e-06, "loss": 0.4595, "step": 8315 }, { "epoch": 0.7463148684121963, "grad_norm": 0.49530483367736283, "learning_rate": 9.340748264577715e-06, "loss": 0.4351, "step": 8316 }, { "epoch": 0.7464046128648673, "grad_norm": 0.488604939826774, "learning_rate": 9.340489094494387e-06, "loss": 0.4741, "step": 8317 }, { "epoch": 0.7464943573175383, "grad_norm": 0.450096468286174, "learning_rate": 9.34022987707456e-06, "loss": 0.4054, "step": 8318 }, { "epoch": 0.7465841017702093, "grad_norm": 0.5326177592431008, "learning_rate": 9.339970612321063e-06, "loss": 0.5467, "step": 8319 }, { "epoch": 0.7466738462228804, "grad_norm": 0.4589270918006896, "learning_rate": 9.339711300236723e-06, "loss": 0.4058, "step": 8320 }, { "epoch": 0.7467635906755513, "grad_norm": 0.47027643645997336, "learning_rate": 9.339451940824367e-06, "loss": 0.4435, "step": 8321 }, { "epoch": 0.7468533351282224, "grad_norm": 0.4653859292824494, "learning_rate": 9.339192534086823e-06, "loss": 0.3745, "step": 8322 }, { "epoch": 0.7469430795808935, "grad_norm": 0.4898889838147436, "learning_rate": 9.338933080026922e-06, "loss": 0.4661, "step": 8323 }, { "epoch": 0.7470328240335644, "grad_norm": 0.47585235722397146, "learning_rate": 9.338673578647492e-06, "loss": 0.4429, "step": 8324 }, { "epoch": 0.7471225684862355, "grad_norm": 0.47160113150396066, "learning_rate": 9.338414029951361e-06, "loss": 0.4442, "step": 8325 }, { "epoch": 0.7472123129389064, "grad_norm": 0.45284683849491797, "learning_rate": 9.338154433941366e-06, "loss": 0.3893, "step": 8326 }, { "epoch": 0.7473020573915775, "grad_norm": 0.4769739631649003, "learning_rate": 9.337894790620332e-06, "loss": 0.4146, "step": 8327 }, { "epoch": 0.7473918018442485, "grad_norm": 0.49777600954259743, "learning_rate": 9.337635099991092e-06, "loss": 0.4988, "step": 8328 }, { "epoch": 0.7474815462969195, "grad_norm": 0.527921001361522, "learning_rate": 9.33737536205648e-06, "loss": 0.4622, "step": 8329 }, { "epoch": 0.7475712907495905, "grad_norm": 0.49783262651668997, "learning_rate": 9.337115576819327e-06, "loss": 0.4398, "step": 8330 }, { "epoch": 0.7476610352022616, "grad_norm": 0.5047822304198737, "learning_rate": 9.336855744282468e-06, "loss": 0.4705, "step": 8331 }, { "epoch": 0.7477507796549325, "grad_norm": 0.44729995176326676, "learning_rate": 9.336595864448733e-06, "loss": 0.4134, "step": 8332 }, { "epoch": 0.7478405241076036, "grad_norm": 0.541935889930918, "learning_rate": 9.336335937320962e-06, "loss": 0.5223, "step": 8333 }, { "epoch": 0.7479302685602747, "grad_norm": 0.48195864873250854, "learning_rate": 9.336075962901984e-06, "loss": 0.4639, "step": 8334 }, { "epoch": 0.7480200130129456, "grad_norm": 0.5045922951339903, "learning_rate": 9.335815941194638e-06, "loss": 0.4674, "step": 8335 }, { "epoch": 0.7481097574656167, "grad_norm": 0.4699548300337682, "learning_rate": 9.335555872201757e-06, "loss": 0.4115, "step": 8336 }, { "epoch": 0.7481995019182877, "grad_norm": 0.5102297631060846, "learning_rate": 9.33529575592618e-06, "loss": 0.47, "step": 8337 }, { "epoch": 0.7482892463709587, "grad_norm": 0.5007620121394002, "learning_rate": 9.33503559237074e-06, "loss": 0.4673, "step": 8338 }, { "epoch": 0.7483789908236297, "grad_norm": 0.5002389768030978, "learning_rate": 9.334775381538277e-06, "loss": 0.4741, "step": 8339 }, { "epoch": 0.7484687352763008, "grad_norm": 0.4921561696268216, "learning_rate": 9.33451512343163e-06, "loss": 0.4754, "step": 8340 }, { "epoch": 0.7485584797289717, "grad_norm": 0.4843183819458024, "learning_rate": 9.334254818053635e-06, "loss": 0.4716, "step": 8341 }, { "epoch": 0.7486482241816428, "grad_norm": 0.48844468814760195, "learning_rate": 9.333994465407131e-06, "loss": 0.4134, "step": 8342 }, { "epoch": 0.7487379686343137, "grad_norm": 0.4510740787941196, "learning_rate": 9.333734065494959e-06, "loss": 0.3962, "step": 8343 }, { "epoch": 0.7488277130869848, "grad_norm": 0.4896975893163439, "learning_rate": 9.333473618319958e-06, "loss": 0.4923, "step": 8344 }, { "epoch": 0.7489174575396559, "grad_norm": 0.4770988626231564, "learning_rate": 9.333213123884967e-06, "loss": 0.4473, "step": 8345 }, { "epoch": 0.7490072019923268, "grad_norm": 0.4823815629768651, "learning_rate": 9.332952582192828e-06, "loss": 0.4237, "step": 8346 }, { "epoch": 0.7490969464449979, "grad_norm": 0.4940057521686601, "learning_rate": 9.332691993246384e-06, "loss": 0.4894, "step": 8347 }, { "epoch": 0.7491866908976689, "grad_norm": 0.4989562231362169, "learning_rate": 9.332431357048472e-06, "loss": 0.4841, "step": 8348 }, { "epoch": 0.7492764353503399, "grad_norm": 0.45266926035671984, "learning_rate": 9.332170673601941e-06, "loss": 0.388, "step": 8349 }, { "epoch": 0.7493661798030109, "grad_norm": 0.46920171651265447, "learning_rate": 9.33190994290963e-06, "loss": 0.4215, "step": 8350 }, { "epoch": 0.749455924255682, "grad_norm": 0.5352582525934262, "learning_rate": 9.331649164974384e-06, "loss": 0.5401, "step": 8351 }, { "epoch": 0.7495456687083529, "grad_norm": 0.5111962117136152, "learning_rate": 9.331388339799046e-06, "loss": 0.5122, "step": 8352 }, { "epoch": 0.749635413161024, "grad_norm": 0.5154009414360675, "learning_rate": 9.33112746738646e-06, "loss": 0.5077, "step": 8353 }, { "epoch": 0.749725157613695, "grad_norm": 0.5020572456123797, "learning_rate": 9.330866547739472e-06, "loss": 0.516, "step": 8354 }, { "epoch": 0.749814902066366, "grad_norm": 0.49691823303947497, "learning_rate": 9.330605580860928e-06, "loss": 0.4633, "step": 8355 }, { "epoch": 0.749904646519037, "grad_norm": 0.513189464126217, "learning_rate": 9.330344566753673e-06, "loss": 0.4631, "step": 8356 }, { "epoch": 0.7499943909717081, "grad_norm": 0.5118901558569459, "learning_rate": 9.330083505420556e-06, "loss": 0.478, "step": 8357 }, { "epoch": 0.7500841354243791, "grad_norm": 0.4675659722001019, "learning_rate": 9.329822396864418e-06, "loss": 0.4101, "step": 8358 }, { "epoch": 0.7501738798770501, "grad_norm": 0.5101844683402003, "learning_rate": 9.329561241088114e-06, "loss": 0.5398, "step": 8359 }, { "epoch": 0.7502636243297212, "grad_norm": 0.4614396731819334, "learning_rate": 9.329300038094486e-06, "loss": 0.4315, "step": 8360 }, { "epoch": 0.7503533687823921, "grad_norm": 0.46793677485594043, "learning_rate": 9.329038787886389e-06, "loss": 0.4326, "step": 8361 }, { "epoch": 0.7504431132350632, "grad_norm": 0.4744692816124616, "learning_rate": 9.328777490466666e-06, "loss": 0.4361, "step": 8362 }, { "epoch": 0.7505328576877341, "grad_norm": 0.505684110577694, "learning_rate": 9.32851614583817e-06, "loss": 0.4488, "step": 8363 }, { "epoch": 0.7506226021404052, "grad_norm": 0.5669836571624558, "learning_rate": 9.32825475400375e-06, "loss": 0.5518, "step": 8364 }, { "epoch": 0.7507123465930762, "grad_norm": 0.4804993635738635, "learning_rate": 9.327993314966257e-06, "loss": 0.4696, "step": 8365 }, { "epoch": 0.7508020910457472, "grad_norm": 0.4781347643300964, "learning_rate": 9.327731828728542e-06, "loss": 0.4011, "step": 8366 }, { "epoch": 0.7508918354984182, "grad_norm": 0.5165193323701982, "learning_rate": 9.327470295293458e-06, "loss": 0.4751, "step": 8367 }, { "epoch": 0.7509815799510893, "grad_norm": 0.4540375400720842, "learning_rate": 9.327208714663856e-06, "loss": 0.4358, "step": 8368 }, { "epoch": 0.7510713244037603, "grad_norm": 0.5264881851684237, "learning_rate": 9.326947086842589e-06, "loss": 0.553, "step": 8369 }, { "epoch": 0.7511610688564313, "grad_norm": 0.5275827903076894, "learning_rate": 9.326685411832509e-06, "loss": 0.5197, "step": 8370 }, { "epoch": 0.7512508133091024, "grad_norm": 0.4822037186533197, "learning_rate": 9.326423689636472e-06, "loss": 0.4956, "step": 8371 }, { "epoch": 0.7513405577617733, "grad_norm": 0.4505197927396956, "learning_rate": 9.32616192025733e-06, "loss": 0.4246, "step": 8372 }, { "epoch": 0.7514303022144444, "grad_norm": 0.4769560682947288, "learning_rate": 9.32590010369794e-06, "loss": 0.4406, "step": 8373 }, { "epoch": 0.7515200466671154, "grad_norm": 0.45772812984887923, "learning_rate": 9.325638239961158e-06, "loss": 0.4095, "step": 8374 }, { "epoch": 0.7516097911197864, "grad_norm": 0.6176920574556953, "learning_rate": 9.325376329049837e-06, "loss": 0.6771, "step": 8375 }, { "epoch": 0.7516995355724574, "grad_norm": 0.4701846270232838, "learning_rate": 9.325114370966834e-06, "loss": 0.4308, "step": 8376 }, { "epoch": 0.7517892800251285, "grad_norm": 0.48347233394937916, "learning_rate": 9.324852365715008e-06, "loss": 0.4492, "step": 8377 }, { "epoch": 0.7518790244777994, "grad_norm": 0.49111712609383873, "learning_rate": 9.324590313297212e-06, "loss": 0.4397, "step": 8378 }, { "epoch": 0.7519687689304705, "grad_norm": 0.46005933074172717, "learning_rate": 9.324328213716309e-06, "loss": 0.4151, "step": 8379 }, { "epoch": 0.7520585133831416, "grad_norm": 0.49907200714765365, "learning_rate": 9.324066066975154e-06, "loss": 0.4949, "step": 8380 }, { "epoch": 0.7521482578358125, "grad_norm": 0.5179944565553543, "learning_rate": 9.323803873076607e-06, "loss": 0.5224, "step": 8381 }, { "epoch": 0.7522380022884836, "grad_norm": 0.48310130051321865, "learning_rate": 9.323541632023529e-06, "loss": 0.4706, "step": 8382 }, { "epoch": 0.7523277467411545, "grad_norm": 0.4853201174686341, "learning_rate": 9.323279343818776e-06, "loss": 0.4569, "step": 8383 }, { "epoch": 0.7524174911938256, "grad_norm": 0.49503662205750343, "learning_rate": 9.323017008465214e-06, "loss": 0.4745, "step": 8384 }, { "epoch": 0.7525072356464966, "grad_norm": 0.4923526218759527, "learning_rate": 9.322754625965699e-06, "loss": 0.4958, "step": 8385 }, { "epoch": 0.7525969800991676, "grad_norm": 0.4951547826460269, "learning_rate": 9.322492196323093e-06, "loss": 0.4459, "step": 8386 }, { "epoch": 0.7526867245518386, "grad_norm": 0.4778300332261831, "learning_rate": 9.322229719540262e-06, "loss": 0.4138, "step": 8387 }, { "epoch": 0.7527764690045097, "grad_norm": 0.44777896646193066, "learning_rate": 9.321967195620065e-06, "loss": 0.412, "step": 8388 }, { "epoch": 0.7528662134571806, "grad_norm": 0.4878980711793557, "learning_rate": 9.321704624565365e-06, "loss": 0.4767, "step": 8389 }, { "epoch": 0.7529559579098517, "grad_norm": 0.5152955992646953, "learning_rate": 9.321442006379027e-06, "loss": 0.5065, "step": 8390 }, { "epoch": 0.7530457023625227, "grad_norm": 0.5096452652388941, "learning_rate": 9.321179341063916e-06, "loss": 0.507, "step": 8391 }, { "epoch": 0.7531354468151937, "grad_norm": 0.4889857539882121, "learning_rate": 9.320916628622893e-06, "loss": 0.4514, "step": 8392 }, { "epoch": 0.7532251912678648, "grad_norm": 0.5054808525219816, "learning_rate": 9.320653869058826e-06, "loss": 0.4745, "step": 8393 }, { "epoch": 0.7533149357205358, "grad_norm": 0.5097366036185195, "learning_rate": 9.320391062374578e-06, "loss": 0.4625, "step": 8394 }, { "epoch": 0.7534046801732068, "grad_norm": 0.5211104948342267, "learning_rate": 9.320128208573021e-06, "loss": 0.4366, "step": 8395 }, { "epoch": 0.7534944246258778, "grad_norm": 0.5293926142799694, "learning_rate": 9.319865307657015e-06, "loss": 0.4748, "step": 8396 }, { "epoch": 0.7535841690785489, "grad_norm": 0.45356413368709037, "learning_rate": 9.319602359629429e-06, "loss": 0.3819, "step": 8397 }, { "epoch": 0.7536739135312198, "grad_norm": 0.47625550858525273, "learning_rate": 9.319339364493132e-06, "loss": 0.4123, "step": 8398 }, { "epoch": 0.7537636579838909, "grad_norm": 0.5164858138662909, "learning_rate": 9.31907632225099e-06, "loss": 0.4462, "step": 8399 }, { "epoch": 0.7538534024365618, "grad_norm": 0.5209964243220307, "learning_rate": 9.318813232905877e-06, "loss": 0.5187, "step": 8400 }, { "epoch": 0.7539431468892329, "grad_norm": 0.46899010745662734, "learning_rate": 9.318550096460655e-06, "loss": 0.416, "step": 8401 }, { "epoch": 0.7540328913419039, "grad_norm": 0.4832044950203199, "learning_rate": 9.318286912918199e-06, "loss": 0.4072, "step": 8402 }, { "epoch": 0.754122635794575, "grad_norm": 0.4753886601066671, "learning_rate": 9.318023682281376e-06, "loss": 0.4306, "step": 8403 }, { "epoch": 0.754212380247246, "grad_norm": 0.46624451502745756, "learning_rate": 9.317760404553057e-06, "loss": 0.4533, "step": 8404 }, { "epoch": 0.754302124699917, "grad_norm": 0.59720919792401, "learning_rate": 9.317497079736118e-06, "loss": 0.5556, "step": 8405 }, { "epoch": 0.754391869152588, "grad_norm": 0.4757634011242102, "learning_rate": 9.317233707833424e-06, "loss": 0.4409, "step": 8406 }, { "epoch": 0.754481613605259, "grad_norm": 0.4960042724089796, "learning_rate": 9.316970288847851e-06, "loss": 0.4869, "step": 8407 }, { "epoch": 0.7545713580579301, "grad_norm": 0.553892778919377, "learning_rate": 9.316706822782272e-06, "loss": 0.5518, "step": 8408 }, { "epoch": 0.754661102510601, "grad_norm": 0.5234212293413594, "learning_rate": 9.316443309639559e-06, "loss": 0.5238, "step": 8409 }, { "epoch": 0.7547508469632721, "grad_norm": 0.4593112054667536, "learning_rate": 9.316179749422586e-06, "loss": 0.4494, "step": 8410 }, { "epoch": 0.7548405914159431, "grad_norm": 0.5012022367735578, "learning_rate": 9.315916142134228e-06, "loss": 0.472, "step": 8411 }, { "epoch": 0.7549303358686141, "grad_norm": 0.4982447381757226, "learning_rate": 9.31565248777736e-06, "loss": 0.4582, "step": 8412 }, { "epoch": 0.7550200803212851, "grad_norm": 0.49535521525079523, "learning_rate": 9.315388786354856e-06, "loss": 0.4416, "step": 8413 }, { "epoch": 0.7551098247739562, "grad_norm": 0.4421451872469583, "learning_rate": 9.315125037869593e-06, "loss": 0.3957, "step": 8414 }, { "epoch": 0.7551995692266272, "grad_norm": 0.5083650374897536, "learning_rate": 9.314861242324447e-06, "loss": 0.4962, "step": 8415 }, { "epoch": 0.7552893136792982, "grad_norm": 0.47245992223799005, "learning_rate": 9.314597399722296e-06, "loss": 0.4313, "step": 8416 }, { "epoch": 0.7553790581319693, "grad_norm": 0.5044761630301701, "learning_rate": 9.314333510066014e-06, "loss": 0.4815, "step": 8417 }, { "epoch": 0.7554688025846402, "grad_norm": 0.45686141430119864, "learning_rate": 9.314069573358484e-06, "loss": 0.4033, "step": 8418 }, { "epoch": 0.7555585470373113, "grad_norm": 0.5036639696691205, "learning_rate": 9.31380558960258e-06, "loss": 0.4908, "step": 8419 }, { "epoch": 0.7556482914899822, "grad_norm": 0.5078010372116072, "learning_rate": 9.313541558801183e-06, "loss": 0.4492, "step": 8420 }, { "epoch": 0.7557380359426533, "grad_norm": 0.5143616091916755, "learning_rate": 9.313277480957173e-06, "loss": 0.5349, "step": 8421 }, { "epoch": 0.7558277803953243, "grad_norm": 0.46348997306167455, "learning_rate": 9.313013356073428e-06, "loss": 0.4434, "step": 8422 }, { "epoch": 0.7559175248479953, "grad_norm": 0.5110919843703841, "learning_rate": 9.31274918415283e-06, "loss": 0.5081, "step": 8423 }, { "epoch": 0.7560072693006663, "grad_norm": 0.48320930970018977, "learning_rate": 9.31248496519826e-06, "loss": 0.4507, "step": 8424 }, { "epoch": 0.7560970137533374, "grad_norm": 0.472923252790455, "learning_rate": 9.3122206992126e-06, "loss": 0.4304, "step": 8425 }, { "epoch": 0.7561867582060083, "grad_norm": 0.5019052343605879, "learning_rate": 9.31195638619873e-06, "loss": 0.4576, "step": 8426 }, { "epoch": 0.7562765026586794, "grad_norm": 0.5223669893705281, "learning_rate": 9.311692026159534e-06, "loss": 0.5119, "step": 8427 }, { "epoch": 0.7563662471113505, "grad_norm": 0.5082784357094514, "learning_rate": 9.311427619097897e-06, "loss": 0.4587, "step": 8428 }, { "epoch": 0.7564559915640214, "grad_norm": 0.5091373515721704, "learning_rate": 9.311163165016696e-06, "loss": 0.5044, "step": 8429 }, { "epoch": 0.7565457360166925, "grad_norm": 0.4831936548763572, "learning_rate": 9.310898663918824e-06, "loss": 0.4708, "step": 8430 }, { "epoch": 0.7566354804693635, "grad_norm": 0.45166650928481317, "learning_rate": 9.310634115807158e-06, "loss": 0.4116, "step": 8431 }, { "epoch": 0.7567252249220345, "grad_norm": 0.5058605923259742, "learning_rate": 9.310369520684588e-06, "loss": 0.4832, "step": 8432 }, { "epoch": 0.7568149693747055, "grad_norm": 0.46794304200628084, "learning_rate": 9.310104878553996e-06, "loss": 0.4407, "step": 8433 }, { "epoch": 0.7569047138273766, "grad_norm": 0.521191901506177, "learning_rate": 9.309840189418271e-06, "loss": 0.5002, "step": 8434 }, { "epoch": 0.7569944582800475, "grad_norm": 0.4812446933588868, "learning_rate": 9.3095754532803e-06, "loss": 0.4541, "step": 8435 }, { "epoch": 0.7570842027327186, "grad_norm": 0.4917201054771809, "learning_rate": 9.309310670142966e-06, "loss": 0.4851, "step": 8436 }, { "epoch": 0.7571739471853896, "grad_norm": 0.4746315356228661, "learning_rate": 9.30904584000916e-06, "loss": 0.4297, "step": 8437 }, { "epoch": 0.7572636916380606, "grad_norm": 0.4945521541797311, "learning_rate": 9.30878096288177e-06, "loss": 0.4484, "step": 8438 }, { "epoch": 0.7573534360907317, "grad_norm": 0.49826564057302863, "learning_rate": 9.308516038763685e-06, "loss": 0.4829, "step": 8439 }, { "epoch": 0.7574431805434026, "grad_norm": 0.49491677481858526, "learning_rate": 9.308251067657795e-06, "loss": 0.4856, "step": 8440 }, { "epoch": 0.7575329249960737, "grad_norm": 0.4808683168056768, "learning_rate": 9.307986049566985e-06, "loss": 0.4606, "step": 8441 }, { "epoch": 0.7576226694487447, "grad_norm": 0.5027717345334123, "learning_rate": 9.30772098449415e-06, "loss": 0.4754, "step": 8442 }, { "epoch": 0.7577124139014157, "grad_norm": 0.4722804767055354, "learning_rate": 9.307455872442178e-06, "loss": 0.3852, "step": 8443 }, { "epoch": 0.7578021583540867, "grad_norm": 0.4691458288654764, "learning_rate": 9.307190713413963e-06, "loss": 0.3969, "step": 8444 }, { "epoch": 0.7578919028067578, "grad_norm": 0.5065425565138814, "learning_rate": 9.306925507412394e-06, "loss": 0.5126, "step": 8445 }, { "epoch": 0.7579816472594287, "grad_norm": 0.4820908423291749, "learning_rate": 9.306660254440365e-06, "loss": 0.4594, "step": 8446 }, { "epoch": 0.7580713917120998, "grad_norm": 0.4892947978077457, "learning_rate": 9.306394954500769e-06, "loss": 0.4443, "step": 8447 }, { "epoch": 0.7581611361647708, "grad_norm": 0.4712479013628584, "learning_rate": 9.3061296075965e-06, "loss": 0.4392, "step": 8448 }, { "epoch": 0.7582508806174418, "grad_norm": 0.43926908595323494, "learning_rate": 9.305864213730446e-06, "loss": 0.3879, "step": 8449 }, { "epoch": 0.7583406250701129, "grad_norm": 0.4906265441708355, "learning_rate": 9.30559877290551e-06, "loss": 0.4379, "step": 8450 }, { "epoch": 0.7584303695227839, "grad_norm": 0.4871045536663243, "learning_rate": 9.30533328512458e-06, "loss": 0.4619, "step": 8451 }, { "epoch": 0.7585201139754549, "grad_norm": 0.44925200532621834, "learning_rate": 9.305067750390555e-06, "loss": 0.4237, "step": 8452 }, { "epoch": 0.7586098584281259, "grad_norm": 0.5037704003903758, "learning_rate": 9.30480216870633e-06, "loss": 0.4582, "step": 8453 }, { "epoch": 0.758699602880797, "grad_norm": 0.46224003224401355, "learning_rate": 9.304536540074801e-06, "loss": 0.4139, "step": 8454 }, { "epoch": 0.7587893473334679, "grad_norm": 0.46739435068584184, "learning_rate": 9.304270864498866e-06, "loss": 0.4624, "step": 8455 }, { "epoch": 0.758879091786139, "grad_norm": 0.4840494761008197, "learning_rate": 9.30400514198142e-06, "loss": 0.4587, "step": 8456 }, { "epoch": 0.75896883623881, "grad_norm": 0.532426015657391, "learning_rate": 9.303739372525362e-06, "loss": 0.5339, "step": 8457 }, { "epoch": 0.759058580691481, "grad_norm": 0.48225896298607035, "learning_rate": 9.303473556133593e-06, "loss": 0.4727, "step": 8458 }, { "epoch": 0.759148325144152, "grad_norm": 0.4593580952639098, "learning_rate": 9.303207692809008e-06, "loss": 0.4294, "step": 8459 }, { "epoch": 0.759238069596823, "grad_norm": 0.5285653429713653, "learning_rate": 9.30294178255451e-06, "loss": 0.472, "step": 8460 }, { "epoch": 0.7593278140494941, "grad_norm": 0.5366408460163051, "learning_rate": 9.302675825372995e-06, "loss": 0.5289, "step": 8461 }, { "epoch": 0.7594175585021651, "grad_norm": 0.5124557103476214, "learning_rate": 9.30240982126737e-06, "loss": 0.5026, "step": 8462 }, { "epoch": 0.7595073029548361, "grad_norm": 0.42167457340172687, "learning_rate": 9.302143770240526e-06, "loss": 0.3743, "step": 8463 }, { "epoch": 0.7595970474075071, "grad_norm": 0.48734738348142587, "learning_rate": 9.301877672295373e-06, "loss": 0.4709, "step": 8464 }, { "epoch": 0.7596867918601782, "grad_norm": 0.4941940646196415, "learning_rate": 9.30161152743481e-06, "loss": 0.4549, "step": 8465 }, { "epoch": 0.7597765363128491, "grad_norm": 0.5043888112674634, "learning_rate": 9.30134533566174e-06, "loss": 0.5447, "step": 8466 }, { "epoch": 0.7598662807655202, "grad_norm": 0.44106674275697755, "learning_rate": 9.301079096979066e-06, "loss": 0.3813, "step": 8467 }, { "epoch": 0.7599560252181912, "grad_norm": 0.4845003953535988, "learning_rate": 9.30081281138969e-06, "loss": 0.4445, "step": 8468 }, { "epoch": 0.7600457696708622, "grad_norm": 0.48367798134534357, "learning_rate": 9.300546478896519e-06, "loss": 0.4133, "step": 8469 }, { "epoch": 0.7601355141235332, "grad_norm": 0.4773120069892556, "learning_rate": 9.300280099502454e-06, "loss": 0.4232, "step": 8470 }, { "epoch": 0.7602252585762043, "grad_norm": 0.5287818671477137, "learning_rate": 9.300013673210404e-06, "loss": 0.4906, "step": 8471 }, { "epoch": 0.7603150030288752, "grad_norm": 0.5366192604164247, "learning_rate": 9.29974720002327e-06, "loss": 0.5056, "step": 8472 }, { "epoch": 0.7604047474815463, "grad_norm": 0.45590599297396117, "learning_rate": 9.299480679943964e-06, "loss": 0.417, "step": 8473 }, { "epoch": 0.7604944919342174, "grad_norm": 0.44867562796105126, "learning_rate": 9.299214112975388e-06, "loss": 0.3821, "step": 8474 }, { "epoch": 0.7605842363868883, "grad_norm": 0.4954459799036395, "learning_rate": 9.29894749912045e-06, "loss": 0.4913, "step": 8475 }, { "epoch": 0.7606739808395594, "grad_norm": 0.46645994623932163, "learning_rate": 9.298680838382057e-06, "loss": 0.464, "step": 8476 }, { "epoch": 0.7607637252922304, "grad_norm": 0.5033158628518689, "learning_rate": 9.298414130763119e-06, "loss": 0.4593, "step": 8477 }, { "epoch": 0.7608534697449014, "grad_norm": 0.4621360176311963, "learning_rate": 9.298147376266543e-06, "loss": 0.4238, "step": 8478 }, { "epoch": 0.7609432141975724, "grad_norm": 0.4587156506675846, "learning_rate": 9.297880574895242e-06, "loss": 0.4129, "step": 8479 }, { "epoch": 0.7610329586502435, "grad_norm": 0.45208650206260964, "learning_rate": 9.297613726652121e-06, "loss": 0.4286, "step": 8480 }, { "epoch": 0.7611227031029144, "grad_norm": 0.44678945441465373, "learning_rate": 9.297346831540092e-06, "loss": 0.4231, "step": 8481 }, { "epoch": 0.7612124475555855, "grad_norm": 0.5518048336419737, "learning_rate": 9.297079889562065e-06, "loss": 0.5206, "step": 8482 }, { "epoch": 0.7613021920082564, "grad_norm": 0.4975851578981387, "learning_rate": 9.296812900720953e-06, "loss": 0.4945, "step": 8483 }, { "epoch": 0.7613919364609275, "grad_norm": 0.4474681442664524, "learning_rate": 9.296545865019666e-06, "loss": 0.4363, "step": 8484 }, { "epoch": 0.7614816809135986, "grad_norm": 0.4935336981587238, "learning_rate": 9.296278782461116e-06, "loss": 0.4462, "step": 8485 }, { "epoch": 0.7615714253662695, "grad_norm": 0.5498181558513834, "learning_rate": 9.296011653048217e-06, "loss": 0.5635, "step": 8486 }, { "epoch": 0.7616611698189406, "grad_norm": 0.48861944117590794, "learning_rate": 9.295744476783882e-06, "loss": 0.4618, "step": 8487 }, { "epoch": 0.7617509142716116, "grad_norm": 0.49624239173319157, "learning_rate": 9.295477253671026e-06, "loss": 0.435, "step": 8488 }, { "epoch": 0.7618406587242826, "grad_norm": 0.45832028308200085, "learning_rate": 9.29520998371256e-06, "loss": 0.4075, "step": 8489 }, { "epoch": 0.7619304031769536, "grad_norm": 0.4951353665714987, "learning_rate": 9.2949426669114e-06, "loss": 0.4302, "step": 8490 }, { "epoch": 0.7620201476296247, "grad_norm": 0.46652595583365847, "learning_rate": 9.294675303270464e-06, "loss": 0.4591, "step": 8491 }, { "epoch": 0.7621098920822956, "grad_norm": 0.4688241048970398, "learning_rate": 9.294407892792665e-06, "loss": 0.4256, "step": 8492 }, { "epoch": 0.7621996365349667, "grad_norm": 0.4827528069178042, "learning_rate": 9.29414043548092e-06, "loss": 0.4869, "step": 8493 }, { "epoch": 0.7622893809876377, "grad_norm": 0.48953358669532837, "learning_rate": 9.293872931338146e-06, "loss": 0.4378, "step": 8494 }, { "epoch": 0.7623791254403087, "grad_norm": 0.4885889232577811, "learning_rate": 9.29360538036726e-06, "loss": 0.4258, "step": 8495 }, { "epoch": 0.7624688698929798, "grad_norm": 0.4708971251433775, "learning_rate": 9.293337782571181e-06, "loss": 0.4511, "step": 8496 }, { "epoch": 0.7625586143456508, "grad_norm": 0.4800427463885544, "learning_rate": 9.293070137952826e-06, "loss": 0.4625, "step": 8497 }, { "epoch": 0.7626483587983218, "grad_norm": 0.4650025583069206, "learning_rate": 9.292802446515115e-06, "loss": 0.4515, "step": 8498 }, { "epoch": 0.7627381032509928, "grad_norm": 0.4996924268760894, "learning_rate": 9.292534708260965e-06, "loss": 0.4772, "step": 8499 }, { "epoch": 0.7628278477036639, "grad_norm": 0.4553479010103083, "learning_rate": 9.292266923193298e-06, "loss": 0.4043, "step": 8500 }, { "epoch": 0.7629175921563348, "grad_norm": 0.4956823053937826, "learning_rate": 9.291999091315036e-06, "loss": 0.4114, "step": 8501 }, { "epoch": 0.7630073366090059, "grad_norm": 0.47192717991305355, "learning_rate": 9.291731212629096e-06, "loss": 0.4703, "step": 8502 }, { "epoch": 0.7630970810616768, "grad_norm": 0.532629435477067, "learning_rate": 9.291463287138402e-06, "loss": 0.5461, "step": 8503 }, { "epoch": 0.7631868255143479, "grad_norm": 0.4274787983524803, "learning_rate": 9.291195314845875e-06, "loss": 0.3752, "step": 8504 }, { "epoch": 0.7632765699670189, "grad_norm": 0.49087539058439605, "learning_rate": 9.29092729575444e-06, "loss": 0.4363, "step": 8505 }, { "epoch": 0.7633663144196899, "grad_norm": 0.49399468216294395, "learning_rate": 9.290659229867015e-06, "loss": 0.4487, "step": 8506 }, { "epoch": 0.7634560588723609, "grad_norm": 0.48689876545751426, "learning_rate": 9.290391117186528e-06, "loss": 0.4694, "step": 8507 }, { "epoch": 0.763545803325032, "grad_norm": 0.45644878492959456, "learning_rate": 9.2901229577159e-06, "loss": 0.4274, "step": 8508 }, { "epoch": 0.763635547777703, "grad_norm": 0.4921621228033633, "learning_rate": 9.289854751458059e-06, "loss": 0.4525, "step": 8509 }, { "epoch": 0.763725292230374, "grad_norm": 0.5008044943655677, "learning_rate": 9.289586498415926e-06, "loss": 0.4911, "step": 8510 }, { "epoch": 0.7638150366830451, "grad_norm": 0.4813378108857417, "learning_rate": 9.289318198592428e-06, "loss": 0.4331, "step": 8511 }, { "epoch": 0.763904781135716, "grad_norm": 0.5066213674537483, "learning_rate": 9.289049851990492e-06, "loss": 0.4408, "step": 8512 }, { "epoch": 0.7639945255883871, "grad_norm": 0.5309022029242373, "learning_rate": 9.288781458613043e-06, "loss": 0.4927, "step": 8513 }, { "epoch": 0.7640842700410581, "grad_norm": 0.4550704713012196, "learning_rate": 9.28851301846301e-06, "loss": 0.4006, "step": 8514 }, { "epoch": 0.7641740144937291, "grad_norm": 0.47538561235238197, "learning_rate": 9.28824453154332e-06, "loss": 0.4465, "step": 8515 }, { "epoch": 0.7642637589464001, "grad_norm": 0.49834478444775576, "learning_rate": 9.287975997856897e-06, "loss": 0.4255, "step": 8516 }, { "epoch": 0.7643535033990712, "grad_norm": 0.5007949434764103, "learning_rate": 9.287707417406676e-06, "loss": 0.4439, "step": 8517 }, { "epoch": 0.7644432478517421, "grad_norm": 0.5026441383436913, "learning_rate": 9.287438790195582e-06, "loss": 0.4924, "step": 8518 }, { "epoch": 0.7645329923044132, "grad_norm": 0.499711318417239, "learning_rate": 9.287170116226547e-06, "loss": 0.4668, "step": 8519 }, { "epoch": 0.7646227367570843, "grad_norm": 0.4669324093509235, "learning_rate": 9.286901395502499e-06, "loss": 0.4306, "step": 8520 }, { "epoch": 0.7647124812097552, "grad_norm": 0.4568329399896286, "learning_rate": 9.28663262802637e-06, "loss": 0.4029, "step": 8521 }, { "epoch": 0.7648022256624263, "grad_norm": 0.540812291585053, "learning_rate": 9.286363813801089e-06, "loss": 0.5282, "step": 8522 }, { "epoch": 0.7648919701150972, "grad_norm": 0.5042297463905444, "learning_rate": 9.286094952829591e-06, "loss": 0.4756, "step": 8523 }, { "epoch": 0.7649817145677683, "grad_norm": 0.4926012555231979, "learning_rate": 9.285826045114806e-06, "loss": 0.4847, "step": 8524 }, { "epoch": 0.7650714590204393, "grad_norm": 0.4999265783306307, "learning_rate": 9.285557090659666e-06, "loss": 0.4534, "step": 8525 }, { "epoch": 0.7651612034731103, "grad_norm": 0.47402834316941894, "learning_rate": 9.285288089467106e-06, "loss": 0.4103, "step": 8526 }, { "epoch": 0.7652509479257813, "grad_norm": 0.4778931776972049, "learning_rate": 9.285019041540058e-06, "loss": 0.4487, "step": 8527 }, { "epoch": 0.7653406923784524, "grad_norm": 0.4613382351028799, "learning_rate": 9.284749946881457e-06, "loss": 0.3895, "step": 8528 }, { "epoch": 0.7654304368311233, "grad_norm": 0.47015134039577744, "learning_rate": 9.284480805494239e-06, "loss": 0.444, "step": 8529 }, { "epoch": 0.7655201812837944, "grad_norm": 0.5090646155334329, "learning_rate": 9.284211617381338e-06, "loss": 0.4363, "step": 8530 }, { "epoch": 0.7656099257364655, "grad_norm": 0.5698930186945383, "learning_rate": 9.283942382545687e-06, "loss": 0.4874, "step": 8531 }, { "epoch": 0.7656996701891364, "grad_norm": 0.44440103290612776, "learning_rate": 9.283673100990227e-06, "loss": 0.3682, "step": 8532 }, { "epoch": 0.7657894146418075, "grad_norm": 0.47911544297363956, "learning_rate": 9.283403772717893e-06, "loss": 0.4162, "step": 8533 }, { "epoch": 0.7658791590944785, "grad_norm": 0.470900539439351, "learning_rate": 9.28313439773162e-06, "loss": 0.4505, "step": 8534 }, { "epoch": 0.7659689035471495, "grad_norm": 0.49069578197385205, "learning_rate": 9.28286497603435e-06, "loss": 0.4563, "step": 8535 }, { "epoch": 0.7660586479998205, "grad_norm": 0.46363495995602766, "learning_rate": 9.282595507629018e-06, "loss": 0.3883, "step": 8536 }, { "epoch": 0.7661483924524916, "grad_norm": 0.47011266673036295, "learning_rate": 9.282325992518562e-06, "loss": 0.4424, "step": 8537 }, { "epoch": 0.7662381369051625, "grad_norm": 0.4378407674685462, "learning_rate": 9.282056430705923e-06, "loss": 0.4057, "step": 8538 }, { "epoch": 0.7663278813578336, "grad_norm": 0.48858720490333296, "learning_rate": 9.281786822194043e-06, "loss": 0.4504, "step": 8539 }, { "epoch": 0.7664176258105045, "grad_norm": 0.5242597126281126, "learning_rate": 9.281517166985858e-06, "loss": 0.5136, "step": 8540 }, { "epoch": 0.7665073702631756, "grad_norm": 0.48047299491702383, "learning_rate": 9.281247465084311e-06, "loss": 0.4326, "step": 8541 }, { "epoch": 0.7665971147158466, "grad_norm": 0.4675572180785554, "learning_rate": 9.280977716492343e-06, "loss": 0.4204, "step": 8542 }, { "epoch": 0.7666868591685176, "grad_norm": 0.5305878586855054, "learning_rate": 9.280707921212898e-06, "loss": 0.498, "step": 8543 }, { "epoch": 0.7667766036211887, "grad_norm": 0.5300068370905686, "learning_rate": 9.280438079248913e-06, "loss": 0.4407, "step": 8544 }, { "epoch": 0.7668663480738597, "grad_norm": 0.5173730493050165, "learning_rate": 9.280168190603335e-06, "loss": 0.4936, "step": 8545 }, { "epoch": 0.7669560925265307, "grad_norm": 0.507480808787372, "learning_rate": 9.279898255279107e-06, "loss": 0.4441, "step": 8546 }, { "epoch": 0.7670458369792017, "grad_norm": 0.4815155405767395, "learning_rate": 9.279628273279173e-06, "loss": 0.4546, "step": 8547 }, { "epoch": 0.7671355814318728, "grad_norm": 0.46358573335393843, "learning_rate": 9.279358244606476e-06, "loss": 0.4133, "step": 8548 }, { "epoch": 0.7672253258845437, "grad_norm": 0.4563731991403673, "learning_rate": 9.27908816926396e-06, "loss": 0.4297, "step": 8549 }, { "epoch": 0.7673150703372148, "grad_norm": 0.44575568611627464, "learning_rate": 9.278818047254575e-06, "loss": 0.3539, "step": 8550 }, { "epoch": 0.7674048147898858, "grad_norm": 0.53690971321639, "learning_rate": 9.278547878581262e-06, "loss": 0.4749, "step": 8551 }, { "epoch": 0.7674945592425568, "grad_norm": 0.43141080807542914, "learning_rate": 9.278277663246968e-06, "loss": 0.3706, "step": 8552 }, { "epoch": 0.7675843036952278, "grad_norm": 0.5495821148960224, "learning_rate": 9.278007401254643e-06, "loss": 0.5348, "step": 8553 }, { "epoch": 0.7676740481478989, "grad_norm": 0.4665028840746124, "learning_rate": 9.277737092607232e-06, "loss": 0.4529, "step": 8554 }, { "epoch": 0.7677637926005699, "grad_norm": 0.5115347358171604, "learning_rate": 9.277466737307683e-06, "loss": 0.5182, "step": 8555 }, { "epoch": 0.7678535370532409, "grad_norm": 0.473540116644381, "learning_rate": 9.277196335358947e-06, "loss": 0.425, "step": 8556 }, { "epoch": 0.767943281505912, "grad_norm": 0.5176897107202708, "learning_rate": 9.276925886763967e-06, "loss": 0.4553, "step": 8557 }, { "epoch": 0.7680330259585829, "grad_norm": 0.5148826488362628, "learning_rate": 9.2766553915257e-06, "loss": 0.5149, "step": 8558 }, { "epoch": 0.768122770411254, "grad_norm": 0.45376850029030946, "learning_rate": 9.27638484964709e-06, "loss": 0.3759, "step": 8559 }, { "epoch": 0.768212514863925, "grad_norm": 0.5100091039218899, "learning_rate": 9.276114261131092e-06, "loss": 0.437, "step": 8560 }, { "epoch": 0.768302259316596, "grad_norm": 0.4552656303652436, "learning_rate": 9.275843625980653e-06, "loss": 0.4112, "step": 8561 }, { "epoch": 0.768392003769267, "grad_norm": 0.5065758744142841, "learning_rate": 9.275572944198727e-06, "loss": 0.4474, "step": 8562 }, { "epoch": 0.768481748221938, "grad_norm": 0.47272013732400353, "learning_rate": 9.275302215788265e-06, "loss": 0.4347, "step": 8563 }, { "epoch": 0.768571492674609, "grad_norm": 0.5098877794855081, "learning_rate": 9.275031440752218e-06, "loss": 0.4711, "step": 8564 }, { "epoch": 0.7686612371272801, "grad_norm": 0.4946443971792224, "learning_rate": 9.274760619093544e-06, "loss": 0.4515, "step": 8565 }, { "epoch": 0.7687509815799511, "grad_norm": 0.4591669302021559, "learning_rate": 9.27448975081519e-06, "loss": 0.4138, "step": 8566 }, { "epoch": 0.7688407260326221, "grad_norm": 0.4969928887623383, "learning_rate": 9.274218835920117e-06, "loss": 0.4599, "step": 8567 }, { "epoch": 0.7689304704852932, "grad_norm": 0.4928934614064071, "learning_rate": 9.273947874411273e-06, "loss": 0.4616, "step": 8568 }, { "epoch": 0.7690202149379641, "grad_norm": 0.4840079508988539, "learning_rate": 9.273676866291617e-06, "loss": 0.4127, "step": 8569 }, { "epoch": 0.7691099593906352, "grad_norm": 0.4839595297483501, "learning_rate": 9.273405811564104e-06, "loss": 0.4903, "step": 8570 }, { "epoch": 0.7691997038433062, "grad_norm": 0.5109076903368546, "learning_rate": 9.273134710231689e-06, "loss": 0.4882, "step": 8571 }, { "epoch": 0.7692894482959772, "grad_norm": 0.5059357169040205, "learning_rate": 9.27286356229733e-06, "loss": 0.4919, "step": 8572 }, { "epoch": 0.7693791927486482, "grad_norm": 0.48264791620139036, "learning_rate": 9.27259236776398e-06, "loss": 0.4491, "step": 8573 }, { "epoch": 0.7694689372013193, "grad_norm": 0.45707112635839103, "learning_rate": 9.272321126634602e-06, "loss": 0.4397, "step": 8574 }, { "epoch": 0.7695586816539902, "grad_norm": 0.4653066791373666, "learning_rate": 9.272049838912154e-06, "loss": 0.4441, "step": 8575 }, { "epoch": 0.7696484261066613, "grad_norm": 0.5051398811029371, "learning_rate": 9.271778504599588e-06, "loss": 0.5442, "step": 8576 }, { "epoch": 0.7697381705593322, "grad_norm": 0.49988470900602283, "learning_rate": 9.271507123699872e-06, "loss": 0.4697, "step": 8577 }, { "epoch": 0.7698279150120033, "grad_norm": 0.5560737961175414, "learning_rate": 9.271235696215959e-06, "loss": 0.5416, "step": 8578 }, { "epoch": 0.7699176594646744, "grad_norm": 0.4939997079057647, "learning_rate": 9.270964222150812e-06, "loss": 0.4397, "step": 8579 }, { "epoch": 0.7700074039173453, "grad_norm": 0.49789713370531324, "learning_rate": 9.270692701507392e-06, "loss": 0.4274, "step": 8580 }, { "epoch": 0.7700971483700164, "grad_norm": 0.46283327668282587, "learning_rate": 9.270421134288657e-06, "loss": 0.433, "step": 8581 }, { "epoch": 0.7701868928226874, "grad_norm": 0.5306558907628746, "learning_rate": 9.27014952049757e-06, "loss": 0.4812, "step": 8582 }, { "epoch": 0.7702766372753584, "grad_norm": 0.4678700955935137, "learning_rate": 9.269877860137097e-06, "loss": 0.4596, "step": 8583 }, { "epoch": 0.7703663817280294, "grad_norm": 0.5192095023798362, "learning_rate": 9.269606153210198e-06, "loss": 0.4584, "step": 8584 }, { "epoch": 0.7704561261807005, "grad_norm": 0.4996469079843628, "learning_rate": 9.269334399719834e-06, "loss": 0.4924, "step": 8585 }, { "epoch": 0.7705458706333714, "grad_norm": 0.4905663188239189, "learning_rate": 9.26906259966897e-06, "loss": 0.4797, "step": 8586 }, { "epoch": 0.7706356150860425, "grad_norm": 0.4471268270857291, "learning_rate": 9.268790753060572e-06, "loss": 0.4394, "step": 8587 }, { "epoch": 0.7707253595387135, "grad_norm": 0.4999247483392294, "learning_rate": 9.268518859897604e-06, "loss": 0.4499, "step": 8588 }, { "epoch": 0.7708151039913845, "grad_norm": 0.43729689146640066, "learning_rate": 9.26824692018303e-06, "loss": 0.38, "step": 8589 }, { "epoch": 0.7709048484440556, "grad_norm": 0.5480203806550531, "learning_rate": 9.267974933919817e-06, "loss": 0.5163, "step": 8590 }, { "epoch": 0.7709945928967266, "grad_norm": 0.5206840395611, "learning_rate": 9.267702901110929e-06, "loss": 0.4442, "step": 8591 }, { "epoch": 0.7710843373493976, "grad_norm": 0.5169200413820959, "learning_rate": 9.267430821759335e-06, "loss": 0.4999, "step": 8592 }, { "epoch": 0.7711740818020686, "grad_norm": 0.4709966393953976, "learning_rate": 9.267158695868002e-06, "loss": 0.3916, "step": 8593 }, { "epoch": 0.7712638262547397, "grad_norm": 0.48154558563073685, "learning_rate": 9.266886523439898e-06, "loss": 0.4436, "step": 8594 }, { "epoch": 0.7713535707074106, "grad_norm": 0.5115997302704506, "learning_rate": 9.26661430447799e-06, "loss": 0.5058, "step": 8595 }, { "epoch": 0.7714433151600817, "grad_norm": 0.4426106912092324, "learning_rate": 9.266342038985247e-06, "loss": 0.4166, "step": 8596 }, { "epoch": 0.7715330596127526, "grad_norm": 0.4849458755460636, "learning_rate": 9.266069726964639e-06, "loss": 0.4432, "step": 8597 }, { "epoch": 0.7716228040654237, "grad_norm": 0.5031285215918333, "learning_rate": 9.265797368419136e-06, "loss": 0.475, "step": 8598 }, { "epoch": 0.7717125485180947, "grad_norm": 0.517812088863821, "learning_rate": 9.265524963351706e-06, "loss": 0.4395, "step": 8599 }, { "epoch": 0.7718022929707657, "grad_norm": 0.5201559602698101, "learning_rate": 9.26525251176532e-06, "loss": 0.4766, "step": 8600 }, { "epoch": 0.7718920374234368, "grad_norm": 0.5007548988331857, "learning_rate": 9.264980013662955e-06, "loss": 0.4261, "step": 8601 }, { "epoch": 0.7719817818761078, "grad_norm": 0.5017496837522577, "learning_rate": 9.264707469047576e-06, "loss": 0.4986, "step": 8602 }, { "epoch": 0.7720715263287788, "grad_norm": 0.4392406040250781, "learning_rate": 9.264434877922159e-06, "loss": 0.3624, "step": 8603 }, { "epoch": 0.7721612707814498, "grad_norm": 0.531379113972319, "learning_rate": 9.264162240289676e-06, "loss": 0.5582, "step": 8604 }, { "epoch": 0.7722510152341209, "grad_norm": 0.5161463499067986, "learning_rate": 9.263889556153101e-06, "loss": 0.51, "step": 8605 }, { "epoch": 0.7723407596867918, "grad_norm": 0.49790870558507977, "learning_rate": 9.263616825515404e-06, "loss": 0.4501, "step": 8606 }, { "epoch": 0.7724305041394629, "grad_norm": 0.5294095995278691, "learning_rate": 9.263344048379564e-06, "loss": 0.4917, "step": 8607 }, { "epoch": 0.7725202485921339, "grad_norm": 0.5265034945718986, "learning_rate": 9.263071224748554e-06, "loss": 0.5094, "step": 8608 }, { "epoch": 0.7726099930448049, "grad_norm": 0.4848227703347907, "learning_rate": 9.26279835462535e-06, "loss": 0.4565, "step": 8609 }, { "epoch": 0.7726997374974759, "grad_norm": 0.46801876586390684, "learning_rate": 9.262525438012926e-06, "loss": 0.4278, "step": 8610 }, { "epoch": 0.772789481950147, "grad_norm": 0.4505946913560861, "learning_rate": 9.262252474914262e-06, "loss": 0.4115, "step": 8611 }, { "epoch": 0.7728792264028179, "grad_norm": 0.45623789231173806, "learning_rate": 9.26197946533233e-06, "loss": 0.4136, "step": 8612 }, { "epoch": 0.772968970855489, "grad_norm": 0.523965405875535, "learning_rate": 9.26170640927011e-06, "loss": 0.4824, "step": 8613 }, { "epoch": 0.7730587153081601, "grad_norm": 0.4869341724770498, "learning_rate": 9.261433306730582e-06, "loss": 0.4584, "step": 8614 }, { "epoch": 0.773148459760831, "grad_norm": 0.4326155190359174, "learning_rate": 9.261160157716721e-06, "loss": 0.4008, "step": 8615 }, { "epoch": 0.7732382042135021, "grad_norm": 0.4838043879497396, "learning_rate": 9.260886962231508e-06, "loss": 0.4618, "step": 8616 }, { "epoch": 0.773327948666173, "grad_norm": 0.4865506920914301, "learning_rate": 9.26061372027792e-06, "loss": 0.475, "step": 8617 }, { "epoch": 0.7734176931188441, "grad_norm": 0.5543758609263614, "learning_rate": 9.26034043185894e-06, "loss": 0.4853, "step": 8618 }, { "epoch": 0.7735074375715151, "grad_norm": 0.4527646311909822, "learning_rate": 9.260067096977547e-06, "loss": 0.4225, "step": 8619 }, { "epoch": 0.7735971820241861, "grad_norm": 0.5229812532030125, "learning_rate": 9.25979371563672e-06, "loss": 0.5, "step": 8620 }, { "epoch": 0.7736869264768571, "grad_norm": 0.5351668518761885, "learning_rate": 9.259520287839444e-06, "loss": 0.5182, "step": 8621 }, { "epoch": 0.7737766709295282, "grad_norm": 0.5463010909134208, "learning_rate": 9.259246813588699e-06, "loss": 0.5076, "step": 8622 }, { "epoch": 0.7738664153821991, "grad_norm": 0.4937012035863963, "learning_rate": 9.258973292887468e-06, "loss": 0.4402, "step": 8623 }, { "epoch": 0.7739561598348702, "grad_norm": 0.49013558564330817, "learning_rate": 9.258699725738733e-06, "loss": 0.4558, "step": 8624 }, { "epoch": 0.7740459042875413, "grad_norm": 0.5575724497352967, "learning_rate": 9.258426112145479e-06, "loss": 0.5063, "step": 8625 }, { "epoch": 0.7741356487402122, "grad_norm": 0.455898428702456, "learning_rate": 9.258152452110689e-06, "loss": 0.415, "step": 8626 }, { "epoch": 0.7742253931928833, "grad_norm": 0.5192747893375925, "learning_rate": 9.257878745637347e-06, "loss": 0.4857, "step": 8627 }, { "epoch": 0.7743151376455543, "grad_norm": 0.5150095183763111, "learning_rate": 9.25760499272844e-06, "loss": 0.4604, "step": 8628 }, { "epoch": 0.7744048820982253, "grad_norm": 0.5044896417536906, "learning_rate": 9.25733119338695e-06, "loss": 0.4197, "step": 8629 }, { "epoch": 0.7744946265508963, "grad_norm": 0.4707031681450566, "learning_rate": 9.257057347615866e-06, "loss": 0.406, "step": 8630 }, { "epoch": 0.7745843710035674, "grad_norm": 0.42116718079048837, "learning_rate": 9.256783455418173e-06, "loss": 0.3657, "step": 8631 }, { "epoch": 0.7746741154562383, "grad_norm": 0.4672706847244781, "learning_rate": 9.25650951679686e-06, "loss": 0.4253, "step": 8632 }, { "epoch": 0.7747638599089094, "grad_norm": 0.47802776970329436, "learning_rate": 9.256235531754913e-06, "loss": 0.4485, "step": 8633 }, { "epoch": 0.7748536043615804, "grad_norm": 0.452419748111085, "learning_rate": 9.255961500295319e-06, "loss": 0.4178, "step": 8634 }, { "epoch": 0.7749433488142514, "grad_norm": 0.45643639074130826, "learning_rate": 9.25568742242107e-06, "loss": 0.4059, "step": 8635 }, { "epoch": 0.7750330932669225, "grad_norm": 0.5258894345472439, "learning_rate": 9.25541329813515e-06, "loss": 0.5124, "step": 8636 }, { "epoch": 0.7751228377195934, "grad_norm": 0.5239657427597166, "learning_rate": 9.255139127440553e-06, "loss": 0.4745, "step": 8637 }, { "epoch": 0.7752125821722645, "grad_norm": 0.49006333301727445, "learning_rate": 9.254864910340266e-06, "loss": 0.4475, "step": 8638 }, { "epoch": 0.7753023266249355, "grad_norm": 0.531615919565198, "learning_rate": 9.254590646837282e-06, "loss": 0.5107, "step": 8639 }, { "epoch": 0.7753920710776065, "grad_norm": 0.4839380634168673, "learning_rate": 9.25431633693459e-06, "loss": 0.4261, "step": 8640 }, { "epoch": 0.7754818155302775, "grad_norm": 0.4739820003107731, "learning_rate": 9.254041980635183e-06, "loss": 0.4288, "step": 8641 }, { "epoch": 0.7755715599829486, "grad_norm": 0.5210131511602977, "learning_rate": 9.253767577942053e-06, "loss": 0.4825, "step": 8642 }, { "epoch": 0.7756613044356195, "grad_norm": 0.4433878987910115, "learning_rate": 9.25349312885819e-06, "loss": 0.4004, "step": 8643 }, { "epoch": 0.7757510488882906, "grad_norm": 0.5226153802436924, "learning_rate": 9.253218633386593e-06, "loss": 0.5171, "step": 8644 }, { "epoch": 0.7758407933409616, "grad_norm": 0.451207001156066, "learning_rate": 9.25294409153025e-06, "loss": 0.4224, "step": 8645 }, { "epoch": 0.7759305377936326, "grad_norm": 0.5001002626397323, "learning_rate": 9.252669503292156e-06, "loss": 0.4921, "step": 8646 }, { "epoch": 0.7760202822463037, "grad_norm": 0.5223521937526681, "learning_rate": 9.252394868675308e-06, "loss": 0.4986, "step": 8647 }, { "epoch": 0.7761100266989747, "grad_norm": 0.5011337704090276, "learning_rate": 9.252120187682698e-06, "loss": 0.4282, "step": 8648 }, { "epoch": 0.7761997711516457, "grad_norm": 0.5244512036814379, "learning_rate": 9.251845460317326e-06, "loss": 0.4938, "step": 8649 }, { "epoch": 0.7762895156043167, "grad_norm": 0.452369429605918, "learning_rate": 9.251570686582184e-06, "loss": 0.4439, "step": 8650 }, { "epoch": 0.7763792600569878, "grad_norm": 0.5025460285946407, "learning_rate": 9.251295866480268e-06, "loss": 0.4737, "step": 8651 }, { "epoch": 0.7764690045096587, "grad_norm": 0.48307665102561037, "learning_rate": 9.25102100001458e-06, "loss": 0.4379, "step": 8652 }, { "epoch": 0.7765587489623298, "grad_norm": 0.4956567514227262, "learning_rate": 9.250746087188114e-06, "loss": 0.4628, "step": 8653 }, { "epoch": 0.7766484934150008, "grad_norm": 0.4849362056562123, "learning_rate": 9.250471128003868e-06, "loss": 0.4464, "step": 8654 }, { "epoch": 0.7767382378676718, "grad_norm": 0.4742626898608298, "learning_rate": 9.250196122464842e-06, "loss": 0.4595, "step": 8655 }, { "epoch": 0.7768279823203428, "grad_norm": 0.465257575332321, "learning_rate": 9.249921070574035e-06, "loss": 0.4405, "step": 8656 }, { "epoch": 0.7769177267730139, "grad_norm": 0.4781994386869835, "learning_rate": 9.249645972334447e-06, "loss": 0.4415, "step": 8657 }, { "epoch": 0.7770074712256848, "grad_norm": 0.5343130890931118, "learning_rate": 9.249370827749076e-06, "loss": 0.4579, "step": 8658 }, { "epoch": 0.7770972156783559, "grad_norm": 0.46513775667567414, "learning_rate": 9.249095636820926e-06, "loss": 0.4467, "step": 8659 }, { "epoch": 0.777186960131027, "grad_norm": 0.4776297712530327, "learning_rate": 9.248820399552995e-06, "loss": 0.4717, "step": 8660 }, { "epoch": 0.7772767045836979, "grad_norm": 0.5239750590783421, "learning_rate": 9.248545115948288e-06, "loss": 0.4975, "step": 8661 }, { "epoch": 0.777366449036369, "grad_norm": 0.44767817124156073, "learning_rate": 9.248269786009803e-06, "loss": 0.4131, "step": 8662 }, { "epoch": 0.7774561934890399, "grad_norm": 0.4450969004866521, "learning_rate": 9.247994409740545e-06, "loss": 0.3976, "step": 8663 }, { "epoch": 0.777545937941711, "grad_norm": 0.4679808420290966, "learning_rate": 9.247718987143519e-06, "loss": 0.4373, "step": 8664 }, { "epoch": 0.777635682394382, "grad_norm": 0.47348299849715686, "learning_rate": 9.247443518221726e-06, "loss": 0.4471, "step": 8665 }, { "epoch": 0.777725426847053, "grad_norm": 0.4528180619051958, "learning_rate": 9.247168002978172e-06, "loss": 0.4303, "step": 8666 }, { "epoch": 0.777815171299724, "grad_norm": 0.5425705732584626, "learning_rate": 9.24689244141586e-06, "loss": 0.545, "step": 8667 }, { "epoch": 0.7779049157523951, "grad_norm": 0.5345738650031194, "learning_rate": 9.246616833537796e-06, "loss": 0.5154, "step": 8668 }, { "epoch": 0.777994660205066, "grad_norm": 0.5571837337230666, "learning_rate": 9.246341179346986e-06, "loss": 0.535, "step": 8669 }, { "epoch": 0.7780844046577371, "grad_norm": 0.46948715630058657, "learning_rate": 9.246065478846435e-06, "loss": 0.456, "step": 8670 }, { "epoch": 0.7781741491104082, "grad_norm": 0.510998737497591, "learning_rate": 9.245789732039152e-06, "loss": 0.4719, "step": 8671 }, { "epoch": 0.7782638935630791, "grad_norm": 0.46844074063900865, "learning_rate": 9.245513938928142e-06, "loss": 0.4176, "step": 8672 }, { "epoch": 0.7783536380157502, "grad_norm": 0.5166322619646787, "learning_rate": 9.245238099516415e-06, "loss": 0.5022, "step": 8673 }, { "epoch": 0.7784433824684212, "grad_norm": 0.47476046206478206, "learning_rate": 9.244962213806976e-06, "loss": 0.4445, "step": 8674 }, { "epoch": 0.7785331269210922, "grad_norm": 0.5122497354543526, "learning_rate": 9.244686281802837e-06, "loss": 0.4689, "step": 8675 }, { "epoch": 0.7786228713737632, "grad_norm": 0.48452454113984467, "learning_rate": 9.244410303507006e-06, "loss": 0.4652, "step": 8676 }, { "epoch": 0.7787126158264343, "grad_norm": 0.4551931341097273, "learning_rate": 9.244134278922493e-06, "loss": 0.3866, "step": 8677 }, { "epoch": 0.7788023602791052, "grad_norm": 0.4677101222013258, "learning_rate": 9.243858208052307e-06, "loss": 0.423, "step": 8678 }, { "epoch": 0.7788921047317763, "grad_norm": 0.5083051640563063, "learning_rate": 9.24358209089946e-06, "loss": 0.4808, "step": 8679 }, { "epoch": 0.7789818491844472, "grad_norm": 0.480463858563408, "learning_rate": 9.243305927466963e-06, "loss": 0.4403, "step": 8680 }, { "epoch": 0.7790715936371183, "grad_norm": 0.5340177187435964, "learning_rate": 9.243029717757827e-06, "loss": 0.5001, "step": 8681 }, { "epoch": 0.7791613380897894, "grad_norm": 0.4806003376135443, "learning_rate": 9.242753461775067e-06, "loss": 0.4395, "step": 8682 }, { "epoch": 0.7792510825424603, "grad_norm": 0.5064657843171622, "learning_rate": 9.242477159521693e-06, "loss": 0.4686, "step": 8683 }, { "epoch": 0.7793408269951314, "grad_norm": 0.4965468849809745, "learning_rate": 9.24220081100072e-06, "loss": 0.4993, "step": 8684 }, { "epoch": 0.7794305714478024, "grad_norm": 0.5036934998557971, "learning_rate": 9.24192441621516e-06, "loss": 0.4572, "step": 8685 }, { "epoch": 0.7795203159004734, "grad_norm": 0.4586764997305448, "learning_rate": 9.241647975168029e-06, "loss": 0.4354, "step": 8686 }, { "epoch": 0.7796100603531444, "grad_norm": 0.5175009904245406, "learning_rate": 9.241371487862341e-06, "loss": 0.5186, "step": 8687 }, { "epoch": 0.7796998048058155, "grad_norm": 0.4545528702479648, "learning_rate": 9.24109495430111e-06, "loss": 0.426, "step": 8688 }, { "epoch": 0.7797895492584864, "grad_norm": 0.49174429201229797, "learning_rate": 9.240818374487354e-06, "loss": 0.4766, "step": 8689 }, { "epoch": 0.7798792937111575, "grad_norm": 0.46072369485272274, "learning_rate": 9.24054174842409e-06, "loss": 0.4459, "step": 8690 }, { "epoch": 0.7799690381638285, "grad_norm": 0.49942078204117746, "learning_rate": 9.240265076114333e-06, "loss": 0.4354, "step": 8691 }, { "epoch": 0.7800587826164995, "grad_norm": 0.46437587572099287, "learning_rate": 9.239988357561098e-06, "loss": 0.4007, "step": 8692 }, { "epoch": 0.7801485270691705, "grad_norm": 0.5153783025617645, "learning_rate": 9.239711592767408e-06, "loss": 0.4599, "step": 8693 }, { "epoch": 0.7802382715218416, "grad_norm": 0.5068721834121961, "learning_rate": 9.239434781736278e-06, "loss": 0.4664, "step": 8694 }, { "epoch": 0.7803280159745126, "grad_norm": 0.5070578723803127, "learning_rate": 9.239157924470729e-06, "loss": 0.5097, "step": 8695 }, { "epoch": 0.7804177604271836, "grad_norm": 0.4741095467855803, "learning_rate": 9.238881020973778e-06, "loss": 0.466, "step": 8696 }, { "epoch": 0.7805075048798547, "grad_norm": 0.5002209029761049, "learning_rate": 9.238604071248447e-06, "loss": 0.4623, "step": 8697 }, { "epoch": 0.7805972493325256, "grad_norm": 0.4919111131504791, "learning_rate": 9.238327075297754e-06, "loss": 0.4507, "step": 8698 }, { "epoch": 0.7806869937851967, "grad_norm": 0.44996894863195147, "learning_rate": 9.238050033124721e-06, "loss": 0.426, "step": 8699 }, { "epoch": 0.7807767382378676, "grad_norm": 0.5256254750693434, "learning_rate": 9.237772944732372e-06, "loss": 0.5041, "step": 8700 }, { "epoch": 0.7808664826905387, "grad_norm": 0.5078932661663863, "learning_rate": 9.237495810123724e-06, "loss": 0.4859, "step": 8701 }, { "epoch": 0.7809562271432097, "grad_norm": 0.5168483039752791, "learning_rate": 9.237218629301803e-06, "loss": 0.4616, "step": 8702 }, { "epoch": 0.7810459715958807, "grad_norm": 0.5186751472868676, "learning_rate": 9.23694140226963e-06, "loss": 0.5194, "step": 8703 }, { "epoch": 0.7811357160485517, "grad_norm": 0.4789972796497651, "learning_rate": 9.236664129030229e-06, "loss": 0.4568, "step": 8704 }, { "epoch": 0.7812254605012228, "grad_norm": 0.49572545983431804, "learning_rate": 9.236386809586624e-06, "loss": 0.4668, "step": 8705 }, { "epoch": 0.7813152049538938, "grad_norm": 0.4875427446744572, "learning_rate": 9.23610944394184e-06, "loss": 0.4774, "step": 8706 }, { "epoch": 0.7814049494065648, "grad_norm": 0.4622831010665037, "learning_rate": 9.235832032098901e-06, "loss": 0.4264, "step": 8707 }, { "epoch": 0.7814946938592359, "grad_norm": 0.46997495225319924, "learning_rate": 9.235554574060833e-06, "loss": 0.4647, "step": 8708 }, { "epoch": 0.7815844383119068, "grad_norm": 0.5023633487147656, "learning_rate": 9.23527706983066e-06, "loss": 0.4411, "step": 8709 }, { "epoch": 0.7816741827645779, "grad_norm": 0.5134945758055453, "learning_rate": 9.23499951941141e-06, "loss": 0.4765, "step": 8710 }, { "epoch": 0.7817639272172489, "grad_norm": 0.4885166169395789, "learning_rate": 9.234721922806111e-06, "loss": 0.4626, "step": 8711 }, { "epoch": 0.7818536716699199, "grad_norm": 0.4370390790360597, "learning_rate": 9.234444280017789e-06, "loss": 0.3937, "step": 8712 }, { "epoch": 0.7819434161225909, "grad_norm": 0.4574861257245002, "learning_rate": 9.234166591049471e-06, "loss": 0.4283, "step": 8713 }, { "epoch": 0.782033160575262, "grad_norm": 0.4782331327394213, "learning_rate": 9.23388885590419e-06, "loss": 0.3912, "step": 8714 }, { "epoch": 0.7821229050279329, "grad_norm": 0.43271332375811233, "learning_rate": 9.233611074584969e-06, "loss": 0.386, "step": 8715 }, { "epoch": 0.782212649480604, "grad_norm": 0.4532750316196812, "learning_rate": 9.233333247094839e-06, "loss": 0.3871, "step": 8716 }, { "epoch": 0.782302393933275, "grad_norm": 0.46907137591017983, "learning_rate": 9.233055373436833e-06, "loss": 0.4593, "step": 8717 }, { "epoch": 0.782392138385946, "grad_norm": 0.501199532303614, "learning_rate": 9.232777453613976e-06, "loss": 0.5061, "step": 8718 }, { "epoch": 0.7824818828386171, "grad_norm": 0.5000102020398849, "learning_rate": 9.232499487629304e-06, "loss": 0.5164, "step": 8719 }, { "epoch": 0.782571627291288, "grad_norm": 0.46219705582167636, "learning_rate": 9.23222147548585e-06, "loss": 0.3578, "step": 8720 }, { "epoch": 0.7826613717439591, "grad_norm": 0.48052990568026505, "learning_rate": 9.231943417186639e-06, "loss": 0.413, "step": 8721 }, { "epoch": 0.7827511161966301, "grad_norm": 0.43217852077444024, "learning_rate": 9.231665312734707e-06, "loss": 0.4015, "step": 8722 }, { "epoch": 0.7828408606493011, "grad_norm": 0.4957075448972741, "learning_rate": 9.231387162133088e-06, "loss": 0.4892, "step": 8723 }, { "epoch": 0.7829306051019721, "grad_norm": 0.4668823169328131, "learning_rate": 9.231108965384815e-06, "loss": 0.4237, "step": 8724 }, { "epoch": 0.7830203495546432, "grad_norm": 0.5163086223499473, "learning_rate": 9.23083072249292e-06, "loss": 0.4787, "step": 8725 }, { "epoch": 0.7831100940073141, "grad_norm": 0.5009666345870155, "learning_rate": 9.23055243346044e-06, "loss": 0.4535, "step": 8726 }, { "epoch": 0.7831998384599852, "grad_norm": 0.5107105755365373, "learning_rate": 9.230274098290409e-06, "loss": 0.5117, "step": 8727 }, { "epoch": 0.7832895829126562, "grad_norm": 0.5020155278154914, "learning_rate": 9.22999571698586e-06, "loss": 0.4895, "step": 8728 }, { "epoch": 0.7833793273653272, "grad_norm": 0.4982142437527182, "learning_rate": 9.229717289549835e-06, "loss": 0.4568, "step": 8729 }, { "epoch": 0.7834690718179983, "grad_norm": 0.47123237417990066, "learning_rate": 9.229438815985364e-06, "loss": 0.466, "step": 8730 }, { "epoch": 0.7835588162706693, "grad_norm": 0.5255245604074145, "learning_rate": 9.229160296295488e-06, "loss": 0.4983, "step": 8731 }, { "epoch": 0.7836485607233403, "grad_norm": 0.4856307178023145, "learning_rate": 9.228881730483242e-06, "loss": 0.4554, "step": 8732 }, { "epoch": 0.7837383051760113, "grad_norm": 0.48594980823696243, "learning_rate": 9.228603118551666e-06, "loss": 0.45, "step": 8733 }, { "epoch": 0.7838280496286824, "grad_norm": 0.5096284542337475, "learning_rate": 9.228324460503799e-06, "loss": 0.445, "step": 8734 }, { "epoch": 0.7839177940813533, "grad_norm": 0.508755976149873, "learning_rate": 9.228045756342676e-06, "loss": 0.4628, "step": 8735 }, { "epoch": 0.7840075385340244, "grad_norm": 0.47350279193994893, "learning_rate": 9.22776700607134e-06, "loss": 0.4296, "step": 8736 }, { "epoch": 0.7840972829866953, "grad_norm": 0.4426692831551816, "learning_rate": 9.227488209692832e-06, "loss": 0.3911, "step": 8737 }, { "epoch": 0.7841870274393664, "grad_norm": 0.46188248731691195, "learning_rate": 9.227209367210189e-06, "loss": 0.4167, "step": 8738 }, { "epoch": 0.7842767718920374, "grad_norm": 0.5105412421768364, "learning_rate": 9.226930478626455e-06, "loss": 0.4645, "step": 8739 }, { "epoch": 0.7843665163447084, "grad_norm": 0.4536321569216712, "learning_rate": 9.226651543944667e-06, "loss": 0.3859, "step": 8740 }, { "epoch": 0.7844562607973795, "grad_norm": 0.5101513810476421, "learning_rate": 9.226372563167873e-06, "loss": 0.482, "step": 8741 }, { "epoch": 0.7845460052500505, "grad_norm": 0.45483612245846944, "learning_rate": 9.226093536299113e-06, "loss": 0.4148, "step": 8742 }, { "epoch": 0.7846357497027215, "grad_norm": 0.4836433312225612, "learning_rate": 9.225814463341427e-06, "loss": 0.4435, "step": 8743 }, { "epoch": 0.7847254941553925, "grad_norm": 0.4782949165009919, "learning_rate": 9.225535344297865e-06, "loss": 0.3867, "step": 8744 }, { "epoch": 0.7848152386080636, "grad_norm": 0.5445661452553162, "learning_rate": 9.225256179171464e-06, "loss": 0.543, "step": 8745 }, { "epoch": 0.7849049830607345, "grad_norm": 0.5114614094428317, "learning_rate": 9.224976967965274e-06, "loss": 0.4613, "step": 8746 }, { "epoch": 0.7849947275134056, "grad_norm": 0.5344415777864955, "learning_rate": 9.224697710682335e-06, "loss": 0.5482, "step": 8747 }, { "epoch": 0.7850844719660766, "grad_norm": 0.5175813421407656, "learning_rate": 9.224418407325697e-06, "loss": 0.5137, "step": 8748 }, { "epoch": 0.7851742164187476, "grad_norm": 0.46453846838798285, "learning_rate": 9.224139057898406e-06, "loss": 0.4309, "step": 8749 }, { "epoch": 0.7852639608714186, "grad_norm": 0.4708267291985152, "learning_rate": 9.223859662403503e-06, "loss": 0.4007, "step": 8750 }, { "epoch": 0.7853537053240897, "grad_norm": 0.47079860267121293, "learning_rate": 9.223580220844042e-06, "loss": 0.4515, "step": 8751 }, { "epoch": 0.7854434497767607, "grad_norm": 0.4876750470554362, "learning_rate": 9.223300733223066e-06, "loss": 0.4126, "step": 8752 }, { "epoch": 0.7855331942294317, "grad_norm": 0.48404593147830677, "learning_rate": 9.223021199543624e-06, "loss": 0.4459, "step": 8753 }, { "epoch": 0.7856229386821028, "grad_norm": 0.4987732925718958, "learning_rate": 9.222741619808766e-06, "loss": 0.4497, "step": 8754 }, { "epoch": 0.7857126831347737, "grad_norm": 0.5484958632654436, "learning_rate": 9.222461994021541e-06, "loss": 0.4874, "step": 8755 }, { "epoch": 0.7858024275874448, "grad_norm": 0.5166955674993448, "learning_rate": 9.222182322184996e-06, "loss": 0.4972, "step": 8756 }, { "epoch": 0.7858921720401157, "grad_norm": 0.47963619766657767, "learning_rate": 9.221902604302183e-06, "loss": 0.4368, "step": 8757 }, { "epoch": 0.7859819164927868, "grad_norm": 0.4719191972364722, "learning_rate": 9.221622840376151e-06, "loss": 0.4273, "step": 8758 }, { "epoch": 0.7860716609454578, "grad_norm": 0.49112598125313844, "learning_rate": 9.221343030409954e-06, "loss": 0.4482, "step": 8759 }, { "epoch": 0.7861614053981288, "grad_norm": 0.49461818084212755, "learning_rate": 9.221063174406641e-06, "loss": 0.4647, "step": 8760 }, { "epoch": 0.7862511498507998, "grad_norm": 0.4710767599945592, "learning_rate": 9.220783272369265e-06, "loss": 0.4467, "step": 8761 }, { "epoch": 0.7863408943034709, "grad_norm": 0.5314731138606195, "learning_rate": 9.220503324300876e-06, "loss": 0.4312, "step": 8762 }, { "epoch": 0.7864306387561418, "grad_norm": 0.5324374082794835, "learning_rate": 9.220223330204532e-06, "loss": 0.4296, "step": 8763 }, { "epoch": 0.7865203832088129, "grad_norm": 0.508463554741116, "learning_rate": 9.219943290083284e-06, "loss": 0.5198, "step": 8764 }, { "epoch": 0.786610127661484, "grad_norm": 0.45766987599013265, "learning_rate": 9.219663203940185e-06, "loss": 0.4064, "step": 8765 }, { "epoch": 0.7866998721141549, "grad_norm": 0.46808748442704434, "learning_rate": 9.219383071778292e-06, "loss": 0.4271, "step": 8766 }, { "epoch": 0.786789616566826, "grad_norm": 0.5128495413392595, "learning_rate": 9.219102893600657e-06, "loss": 0.4995, "step": 8767 }, { "epoch": 0.786879361019497, "grad_norm": 0.5152744594846331, "learning_rate": 9.218822669410339e-06, "loss": 0.5017, "step": 8768 }, { "epoch": 0.786969105472168, "grad_norm": 0.444277138744914, "learning_rate": 9.21854239921039e-06, "loss": 0.4186, "step": 8769 }, { "epoch": 0.787058849924839, "grad_norm": 0.48390490430515315, "learning_rate": 9.218262083003871e-06, "loss": 0.4233, "step": 8770 }, { "epoch": 0.7871485943775101, "grad_norm": 0.5181382133106812, "learning_rate": 9.217981720793835e-06, "loss": 0.4729, "step": 8771 }, { "epoch": 0.787238338830181, "grad_norm": 0.5158319106118946, "learning_rate": 9.217701312583344e-06, "loss": 0.4815, "step": 8772 }, { "epoch": 0.7873280832828521, "grad_norm": 0.4375571976449246, "learning_rate": 9.217420858375453e-06, "loss": 0.3412, "step": 8773 }, { "epoch": 0.787417827735523, "grad_norm": 0.4360593029720164, "learning_rate": 9.21714035817322e-06, "loss": 0.3876, "step": 8774 }, { "epoch": 0.7875075721881941, "grad_norm": 0.46644741328321193, "learning_rate": 9.216859811979704e-06, "loss": 0.4156, "step": 8775 }, { "epoch": 0.7875973166408652, "grad_norm": 0.49527789564356123, "learning_rate": 9.216579219797967e-06, "loss": 0.4708, "step": 8776 }, { "epoch": 0.7876870610935361, "grad_norm": 0.452263292434315, "learning_rate": 9.216298581631068e-06, "loss": 0.3961, "step": 8777 }, { "epoch": 0.7877768055462072, "grad_norm": 0.49572084266119665, "learning_rate": 9.21601789748207e-06, "loss": 0.4921, "step": 8778 }, { "epoch": 0.7878665499988782, "grad_norm": 0.4662628818307658, "learning_rate": 9.215737167354029e-06, "loss": 0.4273, "step": 8779 }, { "epoch": 0.7879562944515492, "grad_norm": 0.4984605761174052, "learning_rate": 9.215456391250009e-06, "loss": 0.5028, "step": 8780 }, { "epoch": 0.7880460389042202, "grad_norm": 0.48348042876807334, "learning_rate": 9.215175569173073e-06, "loss": 0.4411, "step": 8781 }, { "epoch": 0.7881357833568913, "grad_norm": 0.49117510695821115, "learning_rate": 9.214894701126283e-06, "loss": 0.4088, "step": 8782 }, { "epoch": 0.7882255278095622, "grad_norm": 0.46811951168875543, "learning_rate": 9.2146137871127e-06, "loss": 0.4485, "step": 8783 }, { "epoch": 0.7883152722622333, "grad_norm": 0.4743024122890313, "learning_rate": 9.214332827135391e-06, "loss": 0.4814, "step": 8784 }, { "epoch": 0.7884050167149043, "grad_norm": 0.47401659348435077, "learning_rate": 9.214051821197419e-06, "loss": 0.4431, "step": 8785 }, { "epoch": 0.7884947611675753, "grad_norm": 0.49072020633784025, "learning_rate": 9.213770769301848e-06, "loss": 0.4013, "step": 8786 }, { "epoch": 0.7885845056202464, "grad_norm": 0.4329341371749497, "learning_rate": 9.213489671451742e-06, "loss": 0.363, "step": 8787 }, { "epoch": 0.7886742500729174, "grad_norm": 0.5111653553255509, "learning_rate": 9.213208527650171e-06, "loss": 0.4637, "step": 8788 }, { "epoch": 0.7887639945255884, "grad_norm": 0.5074911959354352, "learning_rate": 9.212927337900194e-06, "loss": 0.4853, "step": 8789 }, { "epoch": 0.7888537389782594, "grad_norm": 0.4638988649846951, "learning_rate": 9.212646102204886e-06, "loss": 0.4667, "step": 8790 }, { "epoch": 0.7889434834309305, "grad_norm": 0.5131423601048075, "learning_rate": 9.212364820567306e-06, "loss": 0.4799, "step": 8791 }, { "epoch": 0.7890332278836014, "grad_norm": 0.47474697312338593, "learning_rate": 9.212083492990527e-06, "loss": 0.4576, "step": 8792 }, { "epoch": 0.7891229723362725, "grad_norm": 0.46390999353421936, "learning_rate": 9.211802119477615e-06, "loss": 0.4293, "step": 8793 }, { "epoch": 0.7892127167889434, "grad_norm": 0.4527337425125021, "learning_rate": 9.211520700031637e-06, "loss": 0.3956, "step": 8794 }, { "epoch": 0.7893024612416145, "grad_norm": 0.5196385665456751, "learning_rate": 9.211239234655666e-06, "loss": 0.5352, "step": 8795 }, { "epoch": 0.7893922056942855, "grad_norm": 0.44145406389152353, "learning_rate": 9.210957723352772e-06, "loss": 0.3853, "step": 8796 }, { "epoch": 0.7894819501469565, "grad_norm": 0.41563115110884513, "learning_rate": 9.21067616612602e-06, "loss": 0.3494, "step": 8797 }, { "epoch": 0.7895716945996276, "grad_norm": 0.5162075746587842, "learning_rate": 9.210394562978484e-06, "loss": 0.4481, "step": 8798 }, { "epoch": 0.7896614390522986, "grad_norm": 0.5178206913256215, "learning_rate": 9.210112913913234e-06, "loss": 0.4353, "step": 8799 }, { "epoch": 0.7897511835049696, "grad_norm": 0.4938602610896188, "learning_rate": 9.209831218933342e-06, "loss": 0.4263, "step": 8800 }, { "epoch": 0.7898409279576406, "grad_norm": 0.49067399776001885, "learning_rate": 9.209549478041881e-06, "loss": 0.4624, "step": 8801 }, { "epoch": 0.7899306724103117, "grad_norm": 0.4873999061568599, "learning_rate": 9.209267691241923e-06, "loss": 0.4049, "step": 8802 }, { "epoch": 0.7900204168629826, "grad_norm": 0.5355084447221657, "learning_rate": 9.20898585853654e-06, "loss": 0.5225, "step": 8803 }, { "epoch": 0.7901101613156537, "grad_norm": 0.5017128014452193, "learning_rate": 9.208703979928809e-06, "loss": 0.4568, "step": 8804 }, { "epoch": 0.7901999057683247, "grad_norm": 0.5197680751214421, "learning_rate": 9.208422055421798e-06, "loss": 0.4827, "step": 8805 }, { "epoch": 0.7902896502209957, "grad_norm": 0.474334771682677, "learning_rate": 9.208140085018588e-06, "loss": 0.4553, "step": 8806 }, { "epoch": 0.7903793946736667, "grad_norm": 0.4657367273664579, "learning_rate": 9.20785806872225e-06, "loss": 0.4213, "step": 8807 }, { "epoch": 0.7904691391263378, "grad_norm": 0.4882864091160601, "learning_rate": 9.207576006535863e-06, "loss": 0.4424, "step": 8808 }, { "epoch": 0.7905588835790087, "grad_norm": 0.47397674227261494, "learning_rate": 9.207293898462499e-06, "loss": 0.4265, "step": 8809 }, { "epoch": 0.7906486280316798, "grad_norm": 0.4796953028154195, "learning_rate": 9.207011744505237e-06, "loss": 0.393, "step": 8810 }, { "epoch": 0.7907383724843509, "grad_norm": 0.48230571211885775, "learning_rate": 9.206729544667154e-06, "loss": 0.417, "step": 8811 }, { "epoch": 0.7908281169370218, "grad_norm": 0.4443661072784221, "learning_rate": 9.206447298951328e-06, "loss": 0.4072, "step": 8812 }, { "epoch": 0.7909178613896929, "grad_norm": 0.5023755014951037, "learning_rate": 9.206165007360836e-06, "loss": 0.4331, "step": 8813 }, { "epoch": 0.7910076058423638, "grad_norm": 0.5080961371621626, "learning_rate": 9.205882669898758e-06, "loss": 0.5049, "step": 8814 }, { "epoch": 0.7910973502950349, "grad_norm": 0.4651361364894733, "learning_rate": 9.205600286568172e-06, "loss": 0.4172, "step": 8815 }, { "epoch": 0.7911870947477059, "grad_norm": 0.502928891394934, "learning_rate": 9.205317857372158e-06, "loss": 0.4812, "step": 8816 }, { "epoch": 0.791276839200377, "grad_norm": 0.45191028739374123, "learning_rate": 9.205035382313796e-06, "loss": 0.4105, "step": 8817 }, { "epoch": 0.7913665836530479, "grad_norm": 0.5033405524536894, "learning_rate": 9.204752861396167e-06, "loss": 0.5018, "step": 8818 }, { "epoch": 0.791456328105719, "grad_norm": 0.5089293672700783, "learning_rate": 9.204470294622351e-06, "loss": 0.4882, "step": 8819 }, { "epoch": 0.7915460725583899, "grad_norm": 0.5087813888554771, "learning_rate": 9.204187681995432e-06, "loss": 0.4558, "step": 8820 }, { "epoch": 0.791635817011061, "grad_norm": 0.5081788000685825, "learning_rate": 9.203905023518489e-06, "loss": 0.524, "step": 8821 }, { "epoch": 0.7917255614637321, "grad_norm": 0.4538628892304444, "learning_rate": 9.203622319194605e-06, "loss": 0.4237, "step": 8822 }, { "epoch": 0.791815305916403, "grad_norm": 0.523500796629378, "learning_rate": 9.203339569026865e-06, "loss": 0.4872, "step": 8823 }, { "epoch": 0.7919050503690741, "grad_norm": 0.5659246218998009, "learning_rate": 9.203056773018352e-06, "loss": 0.6022, "step": 8824 }, { "epoch": 0.7919947948217451, "grad_norm": 0.5262516755995309, "learning_rate": 9.202773931172152e-06, "loss": 0.5188, "step": 8825 }, { "epoch": 0.7920845392744161, "grad_norm": 0.46359856347726325, "learning_rate": 9.202491043491346e-06, "loss": 0.4112, "step": 8826 }, { "epoch": 0.7921742837270871, "grad_norm": 0.5133672917789911, "learning_rate": 9.20220810997902e-06, "loss": 0.3938, "step": 8827 }, { "epoch": 0.7922640281797582, "grad_norm": 0.5199622155992195, "learning_rate": 9.201925130638262e-06, "loss": 0.4871, "step": 8828 }, { "epoch": 0.7923537726324291, "grad_norm": 0.5106539841738053, "learning_rate": 9.201642105472155e-06, "loss": 0.4875, "step": 8829 }, { "epoch": 0.7924435170851002, "grad_norm": 0.5372857424688385, "learning_rate": 9.201359034483788e-06, "loss": 0.5166, "step": 8830 }, { "epoch": 0.7925332615377711, "grad_norm": 0.4631791283011266, "learning_rate": 9.201075917676246e-06, "loss": 0.456, "step": 8831 }, { "epoch": 0.7926230059904422, "grad_norm": 0.4847046564744168, "learning_rate": 9.20079275505262e-06, "loss": 0.4384, "step": 8832 }, { "epoch": 0.7927127504431133, "grad_norm": 0.46750584124057637, "learning_rate": 9.200509546615994e-06, "loss": 0.4641, "step": 8833 }, { "epoch": 0.7928024948957842, "grad_norm": 0.5046082707735899, "learning_rate": 9.20022629236946e-06, "loss": 0.4579, "step": 8834 }, { "epoch": 0.7928922393484553, "grad_norm": 0.4808817732842128, "learning_rate": 9.199942992316102e-06, "loss": 0.4537, "step": 8835 }, { "epoch": 0.7929819838011263, "grad_norm": 0.5793107513160886, "learning_rate": 9.199659646459015e-06, "loss": 0.5717, "step": 8836 }, { "epoch": 0.7930717282537973, "grad_norm": 0.49397066976720116, "learning_rate": 9.19937625480129e-06, "loss": 0.4837, "step": 8837 }, { "epoch": 0.7931614727064683, "grad_norm": 0.4842899733315844, "learning_rate": 9.199092817346014e-06, "loss": 0.3994, "step": 8838 }, { "epoch": 0.7932512171591394, "grad_norm": 0.45949602129407974, "learning_rate": 9.198809334096276e-06, "loss": 0.4273, "step": 8839 }, { "epoch": 0.7933409616118103, "grad_norm": 0.5679233928741759, "learning_rate": 9.198525805055174e-06, "loss": 0.5247, "step": 8840 }, { "epoch": 0.7934307060644814, "grad_norm": 0.5131662813241672, "learning_rate": 9.198242230225796e-06, "loss": 0.4657, "step": 8841 }, { "epoch": 0.7935204505171524, "grad_norm": 0.4890955072688454, "learning_rate": 9.197958609611234e-06, "loss": 0.4658, "step": 8842 }, { "epoch": 0.7936101949698234, "grad_norm": 0.44776544722173905, "learning_rate": 9.197674943214584e-06, "loss": 0.3938, "step": 8843 }, { "epoch": 0.7936999394224944, "grad_norm": 0.47937765050705256, "learning_rate": 9.19739123103894e-06, "loss": 0.4103, "step": 8844 }, { "epoch": 0.7937896838751655, "grad_norm": 0.47780222123914723, "learning_rate": 9.197107473087389e-06, "loss": 0.425, "step": 8845 }, { "epoch": 0.7938794283278365, "grad_norm": 0.47780287381776826, "learning_rate": 9.196823669363036e-06, "loss": 0.4696, "step": 8846 }, { "epoch": 0.7939691727805075, "grad_norm": 0.4681988185378114, "learning_rate": 9.196539819868967e-06, "loss": 0.4349, "step": 8847 }, { "epoch": 0.7940589172331786, "grad_norm": 0.4918021915200498, "learning_rate": 9.196255924608284e-06, "loss": 0.4505, "step": 8848 }, { "epoch": 0.7941486616858495, "grad_norm": 0.47384182418769116, "learning_rate": 9.19597198358408e-06, "loss": 0.4515, "step": 8849 }, { "epoch": 0.7942384061385206, "grad_norm": 0.46045864062311775, "learning_rate": 9.195687996799453e-06, "loss": 0.4403, "step": 8850 }, { "epoch": 0.7943281505911916, "grad_norm": 0.48088058996813554, "learning_rate": 9.1954039642575e-06, "loss": 0.4531, "step": 8851 }, { "epoch": 0.7944178950438626, "grad_norm": 0.48415222104023636, "learning_rate": 9.195119885961314e-06, "loss": 0.4199, "step": 8852 }, { "epoch": 0.7945076394965336, "grad_norm": 0.48520187505135404, "learning_rate": 9.194835761914001e-06, "loss": 0.4575, "step": 8853 }, { "epoch": 0.7945973839492046, "grad_norm": 0.5096480305797255, "learning_rate": 9.194551592118654e-06, "loss": 0.4816, "step": 8854 }, { "epoch": 0.7946871284018756, "grad_norm": 0.5074269021792545, "learning_rate": 9.194267376578374e-06, "loss": 0.4804, "step": 8855 }, { "epoch": 0.7947768728545467, "grad_norm": 0.5142562850805433, "learning_rate": 9.193983115296262e-06, "loss": 0.4696, "step": 8856 }, { "epoch": 0.7948666173072177, "grad_norm": 0.5249249523768585, "learning_rate": 9.193698808275412e-06, "loss": 0.5023, "step": 8857 }, { "epoch": 0.7949563617598887, "grad_norm": 0.44952201393600993, "learning_rate": 9.193414455518934e-06, "loss": 0.4104, "step": 8858 }, { "epoch": 0.7950461062125598, "grad_norm": 0.46469575836895877, "learning_rate": 9.193130057029922e-06, "loss": 0.3937, "step": 8859 }, { "epoch": 0.7951358506652307, "grad_norm": 0.4337016335074063, "learning_rate": 9.192845612811479e-06, "loss": 0.3878, "step": 8860 }, { "epoch": 0.7952255951179018, "grad_norm": 0.492818388177612, "learning_rate": 9.19256112286671e-06, "loss": 0.4497, "step": 8861 }, { "epoch": 0.7953153395705728, "grad_norm": 0.46558255295627443, "learning_rate": 9.192276587198714e-06, "loss": 0.4169, "step": 8862 }, { "epoch": 0.7954050840232438, "grad_norm": 0.46422613045427713, "learning_rate": 9.191992005810594e-06, "loss": 0.4379, "step": 8863 }, { "epoch": 0.7954948284759148, "grad_norm": 0.5094712408170547, "learning_rate": 9.191707378705457e-06, "loss": 0.4938, "step": 8864 }, { "epoch": 0.7955845729285859, "grad_norm": 0.4909471576443008, "learning_rate": 9.191422705886406e-06, "loss": 0.4562, "step": 8865 }, { "epoch": 0.7956743173812568, "grad_norm": 0.46881046846148483, "learning_rate": 9.191137987356544e-06, "loss": 0.4351, "step": 8866 }, { "epoch": 0.7957640618339279, "grad_norm": 0.49258050256551145, "learning_rate": 9.190853223118977e-06, "loss": 0.4814, "step": 8867 }, { "epoch": 0.795853806286599, "grad_norm": 0.4264290949229125, "learning_rate": 9.190568413176807e-06, "loss": 0.3786, "step": 8868 }, { "epoch": 0.7959435507392699, "grad_norm": 0.47095127188352287, "learning_rate": 9.190283557533148e-06, "loss": 0.4057, "step": 8869 }, { "epoch": 0.796033295191941, "grad_norm": 0.4790783318383565, "learning_rate": 9.189998656191101e-06, "loss": 0.4087, "step": 8870 }, { "epoch": 0.796123039644612, "grad_norm": 0.48754273821019745, "learning_rate": 9.189713709153773e-06, "loss": 0.4502, "step": 8871 }, { "epoch": 0.796212784097283, "grad_norm": 0.4574959151697927, "learning_rate": 9.189428716424274e-06, "loss": 0.4107, "step": 8872 }, { "epoch": 0.796302528549954, "grad_norm": 0.4708666031516108, "learning_rate": 9.189143678005709e-06, "loss": 0.4134, "step": 8873 }, { "epoch": 0.796392273002625, "grad_norm": 0.48813096076476203, "learning_rate": 9.18885859390119e-06, "loss": 0.4413, "step": 8874 }, { "epoch": 0.796482017455296, "grad_norm": 0.5862511030038903, "learning_rate": 9.188573464113824e-06, "loss": 0.6059, "step": 8875 }, { "epoch": 0.7965717619079671, "grad_norm": 0.4872548590904624, "learning_rate": 9.188288288646721e-06, "loss": 0.467, "step": 8876 }, { "epoch": 0.796661506360638, "grad_norm": 0.45278821718548146, "learning_rate": 9.188003067502992e-06, "loss": 0.4036, "step": 8877 }, { "epoch": 0.7967512508133091, "grad_norm": 0.4643213177627124, "learning_rate": 9.187717800685746e-06, "loss": 0.4235, "step": 8878 }, { "epoch": 0.7968409952659801, "grad_norm": 0.4894427356769732, "learning_rate": 9.187432488198093e-06, "loss": 0.4303, "step": 8879 }, { "epoch": 0.7969307397186511, "grad_norm": 0.5179918620820073, "learning_rate": 9.18714713004315e-06, "loss": 0.4937, "step": 8880 }, { "epoch": 0.7970204841713222, "grad_norm": 0.49864089091312075, "learning_rate": 9.186861726224023e-06, "loss": 0.4857, "step": 8881 }, { "epoch": 0.7971102286239932, "grad_norm": 0.46489749732853863, "learning_rate": 9.186576276743826e-06, "loss": 0.3944, "step": 8882 }, { "epoch": 0.7971999730766642, "grad_norm": 0.4983745811741453, "learning_rate": 9.186290781605674e-06, "loss": 0.418, "step": 8883 }, { "epoch": 0.7972897175293352, "grad_norm": 0.5073974976655565, "learning_rate": 9.18600524081268e-06, "loss": 0.4752, "step": 8884 }, { "epoch": 0.7973794619820063, "grad_norm": 0.47974804508961966, "learning_rate": 9.185719654367957e-06, "loss": 0.4404, "step": 8885 }, { "epoch": 0.7974692064346772, "grad_norm": 0.49674190088630826, "learning_rate": 9.18543402227462e-06, "loss": 0.4483, "step": 8886 }, { "epoch": 0.7975589508873483, "grad_norm": 0.49633467207071036, "learning_rate": 9.185148344535783e-06, "loss": 0.421, "step": 8887 }, { "epoch": 0.7976486953400193, "grad_norm": 0.5049513314319891, "learning_rate": 9.184862621154565e-06, "loss": 0.4785, "step": 8888 }, { "epoch": 0.7977384397926903, "grad_norm": 0.48458075769354614, "learning_rate": 9.184576852134079e-06, "loss": 0.4255, "step": 8889 }, { "epoch": 0.7978281842453613, "grad_norm": 0.5168516922277009, "learning_rate": 9.184291037477442e-06, "loss": 0.5152, "step": 8890 }, { "epoch": 0.7979179286980324, "grad_norm": 0.46441350880595106, "learning_rate": 9.184005177187772e-06, "loss": 0.435, "step": 8891 }, { "epoch": 0.7980076731507034, "grad_norm": 0.43749398202400935, "learning_rate": 9.183719271268185e-06, "loss": 0.3965, "step": 8892 }, { "epoch": 0.7980974176033744, "grad_norm": 0.4762396836741295, "learning_rate": 9.1834333197218e-06, "loss": 0.4527, "step": 8893 }, { "epoch": 0.7981871620560455, "grad_norm": 0.46964943483164856, "learning_rate": 9.183147322551736e-06, "loss": 0.477, "step": 8894 }, { "epoch": 0.7982769065087164, "grad_norm": 0.5091294624640301, "learning_rate": 9.18286127976111e-06, "loss": 0.4961, "step": 8895 }, { "epoch": 0.7983666509613875, "grad_norm": 0.5340285075464534, "learning_rate": 9.182575191353043e-06, "loss": 0.5266, "step": 8896 }, { "epoch": 0.7984563954140584, "grad_norm": 0.5353796895822703, "learning_rate": 9.182289057330656e-06, "loss": 0.5337, "step": 8897 }, { "epoch": 0.7985461398667295, "grad_norm": 0.44102986271250355, "learning_rate": 9.182002877697068e-06, "loss": 0.4011, "step": 8898 }, { "epoch": 0.7986358843194005, "grad_norm": 0.47693703049329256, "learning_rate": 9.181716652455401e-06, "loss": 0.4276, "step": 8899 }, { "epoch": 0.7987256287720715, "grad_norm": 0.48125437042744657, "learning_rate": 9.181430381608775e-06, "loss": 0.4682, "step": 8900 }, { "epoch": 0.7988153732247425, "grad_norm": 0.513294360942177, "learning_rate": 9.181144065160315e-06, "loss": 0.4705, "step": 8901 }, { "epoch": 0.7989051176774136, "grad_norm": 0.47014200176146825, "learning_rate": 9.18085770311314e-06, "loss": 0.4232, "step": 8902 }, { "epoch": 0.7989948621300846, "grad_norm": 0.4707152362181974, "learning_rate": 9.180571295470375e-06, "loss": 0.4465, "step": 8903 }, { "epoch": 0.7990846065827556, "grad_norm": 0.44793813363993934, "learning_rate": 9.180284842235143e-06, "loss": 0.4062, "step": 8904 }, { "epoch": 0.7991743510354267, "grad_norm": 0.47246585880612835, "learning_rate": 9.179998343410569e-06, "loss": 0.4117, "step": 8905 }, { "epoch": 0.7992640954880976, "grad_norm": 0.4496089127561095, "learning_rate": 9.179711798999774e-06, "loss": 0.3929, "step": 8906 }, { "epoch": 0.7993538399407687, "grad_norm": 0.4726484414031548, "learning_rate": 9.179425209005887e-06, "loss": 0.4018, "step": 8907 }, { "epoch": 0.7994435843934397, "grad_norm": 0.48910877628122124, "learning_rate": 9.179138573432033e-06, "loss": 0.4439, "step": 8908 }, { "epoch": 0.7995333288461107, "grad_norm": 0.47345338701242096, "learning_rate": 9.178851892281336e-06, "loss": 0.4488, "step": 8909 }, { "epoch": 0.7996230732987817, "grad_norm": 0.49128825095391704, "learning_rate": 9.178565165556923e-06, "loss": 0.4513, "step": 8910 }, { "epoch": 0.7997128177514528, "grad_norm": 0.4510037765586899, "learning_rate": 9.178278393261923e-06, "loss": 0.4398, "step": 8911 }, { "epoch": 0.7998025622041237, "grad_norm": 0.512815190409187, "learning_rate": 9.17799157539946e-06, "loss": 0.4466, "step": 8912 }, { "epoch": 0.7998923066567948, "grad_norm": 0.45496192417814835, "learning_rate": 9.177704711972666e-06, "loss": 0.3918, "step": 8913 }, { "epoch": 0.7999820511094657, "grad_norm": 0.49056354360241944, "learning_rate": 9.177417802984667e-06, "loss": 0.4765, "step": 8914 }, { "epoch": 0.8000717955621368, "grad_norm": 0.5554130205063968, "learning_rate": 9.177130848438591e-06, "loss": 0.5657, "step": 8915 }, { "epoch": 0.8001615400148079, "grad_norm": 0.4436039024577966, "learning_rate": 9.17684384833757e-06, "loss": 0.3992, "step": 8916 }, { "epoch": 0.8002512844674788, "grad_norm": 0.4871078015686843, "learning_rate": 9.176556802684732e-06, "loss": 0.4464, "step": 8917 }, { "epoch": 0.8003410289201499, "grad_norm": 0.48488122979333464, "learning_rate": 9.17626971148321e-06, "loss": 0.4941, "step": 8918 }, { "epoch": 0.8004307733728209, "grad_norm": 0.488661872709929, "learning_rate": 9.175982574736133e-06, "loss": 0.4385, "step": 8919 }, { "epoch": 0.8005205178254919, "grad_norm": 0.4864257296906334, "learning_rate": 9.175695392446631e-06, "loss": 0.4085, "step": 8920 }, { "epoch": 0.8006102622781629, "grad_norm": 0.4775072642110195, "learning_rate": 9.17540816461784e-06, "loss": 0.4351, "step": 8921 }, { "epoch": 0.800700006730834, "grad_norm": 0.48434425705011325, "learning_rate": 9.17512089125289e-06, "loss": 0.4699, "step": 8922 }, { "epoch": 0.8007897511835049, "grad_norm": 0.4775093277374858, "learning_rate": 9.17483357235491e-06, "loss": 0.4717, "step": 8923 }, { "epoch": 0.800879495636176, "grad_norm": 0.5193450357808126, "learning_rate": 9.174546207927042e-06, "loss": 0.4839, "step": 8924 }, { "epoch": 0.800969240088847, "grad_norm": 0.5059057184246274, "learning_rate": 9.174258797972412e-06, "loss": 0.4853, "step": 8925 }, { "epoch": 0.801058984541518, "grad_norm": 0.43254545697246327, "learning_rate": 9.173971342494162e-06, "loss": 0.3717, "step": 8926 }, { "epoch": 0.8011487289941891, "grad_norm": 0.47011930528633983, "learning_rate": 9.17368384149542e-06, "loss": 0.4065, "step": 8927 }, { "epoch": 0.8012384734468601, "grad_norm": 0.4904220338782416, "learning_rate": 9.173396294979324e-06, "loss": 0.4778, "step": 8928 }, { "epoch": 0.8013282178995311, "grad_norm": 0.4957246255406004, "learning_rate": 9.173108702949011e-06, "loss": 0.4986, "step": 8929 }, { "epoch": 0.8014179623522021, "grad_norm": 0.45612448228808844, "learning_rate": 9.172821065407617e-06, "loss": 0.3828, "step": 8930 }, { "epoch": 0.8015077068048732, "grad_norm": 0.495737359452084, "learning_rate": 9.172533382358278e-06, "loss": 0.4792, "step": 8931 }, { "epoch": 0.8015974512575441, "grad_norm": 0.41741609118479195, "learning_rate": 9.172245653804132e-06, "loss": 0.381, "step": 8932 }, { "epoch": 0.8016871957102152, "grad_norm": 0.46485118934098124, "learning_rate": 9.171957879748316e-06, "loss": 0.4627, "step": 8933 }, { "epoch": 0.8017769401628861, "grad_norm": 0.47951939015258926, "learning_rate": 9.171670060193971e-06, "loss": 0.4372, "step": 8934 }, { "epoch": 0.8018666846155572, "grad_norm": 0.4616456923827537, "learning_rate": 9.171382195144233e-06, "loss": 0.4233, "step": 8935 }, { "epoch": 0.8019564290682282, "grad_norm": 0.5385750725464774, "learning_rate": 9.171094284602241e-06, "loss": 0.486, "step": 8936 }, { "epoch": 0.8020461735208992, "grad_norm": 0.47522059196631616, "learning_rate": 9.17080632857114e-06, "loss": 0.4476, "step": 8937 }, { "epoch": 0.8021359179735703, "grad_norm": 0.4877410445237426, "learning_rate": 9.170518327054065e-06, "loss": 0.4933, "step": 8938 }, { "epoch": 0.8022256624262413, "grad_norm": 0.4530895710582947, "learning_rate": 9.170230280054159e-06, "loss": 0.436, "step": 8939 }, { "epoch": 0.8023154068789123, "grad_norm": 0.43812104950842234, "learning_rate": 9.169942187574564e-06, "loss": 0.4033, "step": 8940 }, { "epoch": 0.8024051513315833, "grad_norm": 0.48856126102216535, "learning_rate": 9.169654049618418e-06, "loss": 0.4887, "step": 8941 }, { "epoch": 0.8024948957842544, "grad_norm": 0.4688856467834611, "learning_rate": 9.169365866188868e-06, "loss": 0.4339, "step": 8942 }, { "epoch": 0.8025846402369253, "grad_norm": 0.4692405819919559, "learning_rate": 9.169077637289055e-06, "loss": 0.4573, "step": 8943 }, { "epoch": 0.8026743846895964, "grad_norm": 0.5047110777770639, "learning_rate": 9.168789362922123e-06, "loss": 0.4965, "step": 8944 }, { "epoch": 0.8027641291422674, "grad_norm": 0.46025700348026943, "learning_rate": 9.168501043091215e-06, "loss": 0.4101, "step": 8945 }, { "epoch": 0.8028538735949384, "grad_norm": 0.483746011801659, "learning_rate": 9.168212677799477e-06, "loss": 0.4817, "step": 8946 }, { "epoch": 0.8029436180476094, "grad_norm": 0.47958151782464653, "learning_rate": 9.167924267050052e-06, "loss": 0.4439, "step": 8947 }, { "epoch": 0.8030333625002805, "grad_norm": 0.49162373451436125, "learning_rate": 9.167635810846084e-06, "loss": 0.4307, "step": 8948 }, { "epoch": 0.8031231069529514, "grad_norm": 0.4916534741215092, "learning_rate": 9.167347309190724e-06, "loss": 0.4797, "step": 8949 }, { "epoch": 0.8032128514056225, "grad_norm": 0.5206465442995722, "learning_rate": 9.167058762087114e-06, "loss": 0.476, "step": 8950 }, { "epoch": 0.8033025958582936, "grad_norm": 0.48973372624616784, "learning_rate": 9.166770169538401e-06, "loss": 0.4051, "step": 8951 }, { "epoch": 0.8033923403109645, "grad_norm": 0.4943191129795044, "learning_rate": 9.166481531547734e-06, "loss": 0.4245, "step": 8952 }, { "epoch": 0.8034820847636356, "grad_norm": 0.48915708150945575, "learning_rate": 9.166192848118261e-06, "loss": 0.4422, "step": 8953 }, { "epoch": 0.8035718292163065, "grad_norm": 0.48534019578525306, "learning_rate": 9.165904119253129e-06, "loss": 0.4398, "step": 8954 }, { "epoch": 0.8036615736689776, "grad_norm": 0.4959730853380947, "learning_rate": 9.165615344955486e-06, "loss": 0.4514, "step": 8955 }, { "epoch": 0.8037513181216486, "grad_norm": 0.4622494241902904, "learning_rate": 9.165326525228484e-06, "loss": 0.4453, "step": 8956 }, { "epoch": 0.8038410625743196, "grad_norm": 0.48064195716781055, "learning_rate": 9.165037660075272e-06, "loss": 0.4433, "step": 8957 }, { "epoch": 0.8039308070269906, "grad_norm": 0.4561486338148738, "learning_rate": 9.164748749499e-06, "loss": 0.4043, "step": 8958 }, { "epoch": 0.8040205514796617, "grad_norm": 0.5035354291198066, "learning_rate": 9.164459793502816e-06, "loss": 0.5054, "step": 8959 }, { "epoch": 0.8041102959323326, "grad_norm": 0.4604227819636688, "learning_rate": 9.164170792089875e-06, "loss": 0.4257, "step": 8960 }, { "epoch": 0.8042000403850037, "grad_norm": 0.4602384001806815, "learning_rate": 9.163881745263328e-06, "loss": 0.4388, "step": 8961 }, { "epoch": 0.8042897848376748, "grad_norm": 0.47496326118542465, "learning_rate": 9.163592653026327e-06, "loss": 0.4356, "step": 8962 }, { "epoch": 0.8043795292903457, "grad_norm": 0.5487398279803082, "learning_rate": 9.163303515382026e-06, "loss": 0.5536, "step": 8963 }, { "epoch": 0.8044692737430168, "grad_norm": 0.5048308782099165, "learning_rate": 9.163014332333574e-06, "loss": 0.4824, "step": 8964 }, { "epoch": 0.8045590181956878, "grad_norm": 0.5959854562517349, "learning_rate": 9.162725103884128e-06, "loss": 0.5325, "step": 8965 }, { "epoch": 0.8046487626483588, "grad_norm": 0.5217332178797089, "learning_rate": 9.162435830036845e-06, "loss": 0.4908, "step": 8966 }, { "epoch": 0.8047385071010298, "grad_norm": 0.5292897211517316, "learning_rate": 9.162146510794875e-06, "loss": 0.4237, "step": 8967 }, { "epoch": 0.8048282515537009, "grad_norm": 0.5458489439819014, "learning_rate": 9.161857146161375e-06, "loss": 0.5156, "step": 8968 }, { "epoch": 0.8049179960063718, "grad_norm": 0.47343343509479296, "learning_rate": 9.1615677361395e-06, "loss": 0.3782, "step": 8969 }, { "epoch": 0.8050077404590429, "grad_norm": 0.4803559984817471, "learning_rate": 9.161278280732409e-06, "loss": 0.4102, "step": 8970 }, { "epoch": 0.8050974849117138, "grad_norm": 0.5190880454323585, "learning_rate": 9.160988779943257e-06, "loss": 0.4905, "step": 8971 }, { "epoch": 0.8051872293643849, "grad_norm": 0.5365412393679742, "learning_rate": 9.160699233775199e-06, "loss": 0.4634, "step": 8972 }, { "epoch": 0.805276973817056, "grad_norm": 0.49135621671839386, "learning_rate": 9.160409642231396e-06, "loss": 0.4584, "step": 8973 }, { "epoch": 0.8053667182697269, "grad_norm": 0.4878903613933012, "learning_rate": 9.160120005315003e-06, "loss": 0.4373, "step": 8974 }, { "epoch": 0.805456462722398, "grad_norm": 0.5011509372178691, "learning_rate": 9.159830323029184e-06, "loss": 0.4297, "step": 8975 }, { "epoch": 0.805546207175069, "grad_norm": 0.5425071985642118, "learning_rate": 9.159540595377092e-06, "loss": 0.4898, "step": 8976 }, { "epoch": 0.80563595162774, "grad_norm": 0.5154571876741445, "learning_rate": 9.159250822361892e-06, "loss": 0.5024, "step": 8977 }, { "epoch": 0.805725696080411, "grad_norm": 0.48479928120744753, "learning_rate": 9.15896100398674e-06, "loss": 0.4443, "step": 8978 }, { "epoch": 0.8058154405330821, "grad_norm": 0.5058362266235459, "learning_rate": 9.158671140254799e-06, "loss": 0.4291, "step": 8979 }, { "epoch": 0.805905184985753, "grad_norm": 0.4473673490735524, "learning_rate": 9.15838123116923e-06, "loss": 0.429, "step": 8980 }, { "epoch": 0.8059949294384241, "grad_norm": 0.4822437490149857, "learning_rate": 9.158091276733194e-06, "loss": 0.4441, "step": 8981 }, { "epoch": 0.8060846738910951, "grad_norm": 0.4535071730726287, "learning_rate": 9.157801276949855e-06, "loss": 0.4149, "step": 8982 }, { "epoch": 0.8061744183437661, "grad_norm": 0.4820647917066344, "learning_rate": 9.157511231822374e-06, "loss": 0.4244, "step": 8983 }, { "epoch": 0.8062641627964372, "grad_norm": 0.48422150338134423, "learning_rate": 9.157221141353914e-06, "loss": 0.4483, "step": 8984 }, { "epoch": 0.8063539072491082, "grad_norm": 0.4598665084658775, "learning_rate": 9.156931005547637e-06, "loss": 0.4185, "step": 8985 }, { "epoch": 0.8064436517017792, "grad_norm": 0.4817502675692247, "learning_rate": 9.156640824406713e-06, "loss": 0.4331, "step": 8986 }, { "epoch": 0.8065333961544502, "grad_norm": 0.4805953656298059, "learning_rate": 9.1563505979343e-06, "loss": 0.4366, "step": 8987 }, { "epoch": 0.8066231406071213, "grad_norm": 0.4946946449318197, "learning_rate": 9.156060326133568e-06, "loss": 0.4419, "step": 8988 }, { "epoch": 0.8067128850597922, "grad_norm": 0.4975034628803258, "learning_rate": 9.155770009007678e-06, "loss": 0.447, "step": 8989 }, { "epoch": 0.8068026295124633, "grad_norm": 0.4531930633072694, "learning_rate": 9.155479646559801e-06, "loss": 0.3941, "step": 8990 }, { "epoch": 0.8068923739651342, "grad_norm": 0.502513803444301, "learning_rate": 9.155189238793103e-06, "loss": 0.4405, "step": 8991 }, { "epoch": 0.8069821184178053, "grad_norm": 0.4822930940688595, "learning_rate": 9.154898785710746e-06, "loss": 0.4635, "step": 8992 }, { "epoch": 0.8070718628704763, "grad_norm": 0.49956660555650245, "learning_rate": 9.154608287315904e-06, "loss": 0.4706, "step": 8993 }, { "epoch": 0.8071616073231473, "grad_norm": 0.48580740922563875, "learning_rate": 9.154317743611742e-06, "loss": 0.3854, "step": 8994 }, { "epoch": 0.8072513517758183, "grad_norm": 0.4609914548527063, "learning_rate": 9.154027154601429e-06, "loss": 0.4639, "step": 8995 }, { "epoch": 0.8073410962284894, "grad_norm": 0.4923405721028617, "learning_rate": 9.153736520288133e-06, "loss": 0.4604, "step": 8996 }, { "epoch": 0.8074308406811604, "grad_norm": 0.5227678957463706, "learning_rate": 9.153445840675026e-06, "loss": 0.5092, "step": 8997 }, { "epoch": 0.8075205851338314, "grad_norm": 0.4440471935403028, "learning_rate": 9.153155115765275e-06, "loss": 0.3835, "step": 8998 }, { "epoch": 0.8076103295865025, "grad_norm": 0.4724561979551926, "learning_rate": 9.152864345562054e-06, "loss": 0.4101, "step": 8999 }, { "epoch": 0.8077000740391734, "grad_norm": 0.4704650231231959, "learning_rate": 9.152573530068533e-06, "loss": 0.4195, "step": 9000 }, { "epoch": 0.8077898184918445, "grad_norm": 0.4974170659029701, "learning_rate": 9.152282669287882e-06, "loss": 0.4737, "step": 9001 }, { "epoch": 0.8078795629445155, "grad_norm": 0.47864610479100433, "learning_rate": 9.151991763223274e-06, "loss": 0.4738, "step": 9002 }, { "epoch": 0.8079693073971865, "grad_norm": 0.49632386060715916, "learning_rate": 9.151700811877882e-06, "loss": 0.4582, "step": 9003 }, { "epoch": 0.8080590518498575, "grad_norm": 0.5134808428323375, "learning_rate": 9.151409815254878e-06, "loss": 0.4756, "step": 9004 }, { "epoch": 0.8081487963025286, "grad_norm": 0.4906196237765198, "learning_rate": 9.151118773357437e-06, "loss": 0.4665, "step": 9005 }, { "epoch": 0.8082385407551995, "grad_norm": 0.5034723187684149, "learning_rate": 9.150827686188732e-06, "loss": 0.4519, "step": 9006 }, { "epoch": 0.8083282852078706, "grad_norm": 0.46290810352768424, "learning_rate": 9.150536553751938e-06, "loss": 0.3796, "step": 9007 }, { "epoch": 0.8084180296605417, "grad_norm": 0.5279253529308948, "learning_rate": 9.15024537605023e-06, "loss": 0.5595, "step": 9008 }, { "epoch": 0.8085077741132126, "grad_norm": 0.49174783994879606, "learning_rate": 9.149954153086785e-06, "loss": 0.4917, "step": 9009 }, { "epoch": 0.8085975185658837, "grad_norm": 0.4875366857595988, "learning_rate": 9.149662884864773e-06, "loss": 0.4626, "step": 9010 }, { "epoch": 0.8086872630185546, "grad_norm": 0.5021668178804186, "learning_rate": 9.149371571387379e-06, "loss": 0.4471, "step": 9011 }, { "epoch": 0.8087770074712257, "grad_norm": 0.4625087913892563, "learning_rate": 9.149080212657774e-06, "loss": 0.4034, "step": 9012 }, { "epoch": 0.8088667519238967, "grad_norm": 0.4736728308339841, "learning_rate": 9.14878880867914e-06, "loss": 0.4321, "step": 9013 }, { "epoch": 0.8089564963765677, "grad_norm": 0.494473958979203, "learning_rate": 9.14849735945465e-06, "loss": 0.4408, "step": 9014 }, { "epoch": 0.8090462408292387, "grad_norm": 0.5319743127214664, "learning_rate": 9.148205864987485e-06, "loss": 0.4681, "step": 9015 }, { "epoch": 0.8091359852819098, "grad_norm": 0.536585726039843, "learning_rate": 9.147914325280824e-06, "loss": 0.4663, "step": 9016 }, { "epoch": 0.8092257297345807, "grad_norm": 0.47250149596178687, "learning_rate": 9.147622740337847e-06, "loss": 0.4513, "step": 9017 }, { "epoch": 0.8093154741872518, "grad_norm": 0.45479577504131324, "learning_rate": 9.147331110161733e-06, "loss": 0.4155, "step": 9018 }, { "epoch": 0.8094052186399229, "grad_norm": 0.4736950306603159, "learning_rate": 9.147039434755663e-06, "loss": 0.4527, "step": 9019 }, { "epoch": 0.8094949630925938, "grad_norm": 0.4817176982099096, "learning_rate": 9.146747714122819e-06, "loss": 0.4443, "step": 9020 }, { "epoch": 0.8095847075452649, "grad_norm": 0.5176701479737364, "learning_rate": 9.14645594826638e-06, "loss": 0.5025, "step": 9021 }, { "epoch": 0.8096744519979359, "grad_norm": 0.44049200833966534, "learning_rate": 9.146164137189527e-06, "loss": 0.4299, "step": 9022 }, { "epoch": 0.8097641964506069, "grad_norm": 0.49270684453040253, "learning_rate": 9.145872280895448e-06, "loss": 0.4558, "step": 9023 }, { "epoch": 0.8098539409032779, "grad_norm": 0.46784300892563097, "learning_rate": 9.14558037938732e-06, "loss": 0.4355, "step": 9024 }, { "epoch": 0.809943685355949, "grad_norm": 0.4508027433482751, "learning_rate": 9.145288432668332e-06, "loss": 0.4155, "step": 9025 }, { "epoch": 0.8100334298086199, "grad_norm": 0.47846113960731224, "learning_rate": 9.144996440741663e-06, "loss": 0.4553, "step": 9026 }, { "epoch": 0.810123174261291, "grad_norm": 0.5249823071530355, "learning_rate": 9.144704403610498e-06, "loss": 0.4653, "step": 9027 }, { "epoch": 0.810212918713962, "grad_norm": 0.5445580877066036, "learning_rate": 9.144412321278024e-06, "loss": 0.4963, "step": 9028 }, { "epoch": 0.810302663166633, "grad_norm": 0.46085438178287746, "learning_rate": 9.144120193747427e-06, "loss": 0.4401, "step": 9029 }, { "epoch": 0.810392407619304, "grad_norm": 0.5346828177872017, "learning_rate": 9.14382802102189e-06, "loss": 0.4908, "step": 9030 }, { "epoch": 0.810482152071975, "grad_norm": 0.4512986447262143, "learning_rate": 9.143535803104601e-06, "loss": 0.4003, "step": 9031 }, { "epoch": 0.8105718965246461, "grad_norm": 0.4945420978878411, "learning_rate": 9.143243539998747e-06, "loss": 0.4669, "step": 9032 }, { "epoch": 0.8106616409773171, "grad_norm": 0.480623073471994, "learning_rate": 9.142951231707516e-06, "loss": 0.4182, "step": 9033 }, { "epoch": 0.8107513854299881, "grad_norm": 0.4627806760327621, "learning_rate": 9.142658878234092e-06, "loss": 0.4103, "step": 9034 }, { "epoch": 0.8108411298826591, "grad_norm": 0.4949508396865597, "learning_rate": 9.142366479581669e-06, "loss": 0.4577, "step": 9035 }, { "epoch": 0.8109308743353302, "grad_norm": 0.445639399552557, "learning_rate": 9.142074035753432e-06, "loss": 0.4051, "step": 9036 }, { "epoch": 0.8110206187880011, "grad_norm": 0.4453572605654388, "learning_rate": 9.14178154675257e-06, "loss": 0.4127, "step": 9037 }, { "epoch": 0.8111103632406722, "grad_norm": 0.4688876609806419, "learning_rate": 9.141489012582277e-06, "loss": 0.4139, "step": 9038 }, { "epoch": 0.8112001076933432, "grad_norm": 0.5197771262278899, "learning_rate": 9.141196433245739e-06, "loss": 0.5276, "step": 9039 }, { "epoch": 0.8112898521460142, "grad_norm": 0.5029977902367471, "learning_rate": 9.140903808746149e-06, "loss": 0.5126, "step": 9040 }, { "epoch": 0.8113795965986852, "grad_norm": 0.44660066665875603, "learning_rate": 9.140611139086696e-06, "loss": 0.4312, "step": 9041 }, { "epoch": 0.8114693410513563, "grad_norm": 0.4507297612028036, "learning_rate": 9.140318424270576e-06, "loss": 0.4321, "step": 9042 }, { "epoch": 0.8115590855040273, "grad_norm": 0.5126346154277461, "learning_rate": 9.140025664300977e-06, "loss": 0.522, "step": 9043 }, { "epoch": 0.8116488299566983, "grad_norm": 0.5174390806624801, "learning_rate": 9.139732859181095e-06, "loss": 0.5183, "step": 9044 }, { "epoch": 0.8117385744093694, "grad_norm": 0.5218261304153502, "learning_rate": 9.13944000891412e-06, "loss": 0.5179, "step": 9045 }, { "epoch": 0.8118283188620403, "grad_norm": 0.5861667885245068, "learning_rate": 9.139147113503247e-06, "loss": 0.4922, "step": 9046 }, { "epoch": 0.8119180633147114, "grad_norm": 0.5178036586203193, "learning_rate": 9.138854172951672e-06, "loss": 0.5452, "step": 9047 }, { "epoch": 0.8120078077673824, "grad_norm": 0.46837495897513026, "learning_rate": 9.138561187262589e-06, "loss": 0.4113, "step": 9048 }, { "epoch": 0.8120975522200534, "grad_norm": 0.5156540791648554, "learning_rate": 9.138268156439192e-06, "loss": 0.4858, "step": 9049 }, { "epoch": 0.8121872966727244, "grad_norm": 0.4546902097771862, "learning_rate": 9.137975080484679e-06, "loss": 0.3633, "step": 9050 }, { "epoch": 0.8122770411253954, "grad_norm": 0.5621093333254992, "learning_rate": 9.137681959402244e-06, "loss": 0.5363, "step": 9051 }, { "epoch": 0.8123667855780664, "grad_norm": 0.4824677766109311, "learning_rate": 9.137388793195085e-06, "loss": 0.4126, "step": 9052 }, { "epoch": 0.8124565300307375, "grad_norm": 0.4802955126033235, "learning_rate": 9.137095581866397e-06, "loss": 0.4562, "step": 9053 }, { "epoch": 0.8125462744834085, "grad_norm": 0.493512718075368, "learning_rate": 9.13680232541938e-06, "loss": 0.446, "step": 9054 }, { "epoch": 0.8126360189360795, "grad_norm": 0.4939352523316111, "learning_rate": 9.136509023857233e-06, "loss": 0.4612, "step": 9055 }, { "epoch": 0.8127257633887506, "grad_norm": 0.45744823759773584, "learning_rate": 9.136215677183153e-06, "loss": 0.3993, "step": 9056 }, { "epoch": 0.8128155078414215, "grad_norm": 0.47752003649552877, "learning_rate": 9.135922285400339e-06, "loss": 0.4822, "step": 9057 }, { "epoch": 0.8129052522940926, "grad_norm": 0.4533750732071999, "learning_rate": 9.13562884851199e-06, "loss": 0.4127, "step": 9058 }, { "epoch": 0.8129949967467636, "grad_norm": 0.4810261582153553, "learning_rate": 9.135335366521308e-06, "loss": 0.4715, "step": 9059 }, { "epoch": 0.8130847411994346, "grad_norm": 0.5248661147985056, "learning_rate": 9.135041839431493e-06, "loss": 0.5093, "step": 9060 }, { "epoch": 0.8131744856521056, "grad_norm": 0.5142700187119568, "learning_rate": 9.134748267245748e-06, "loss": 0.5295, "step": 9061 }, { "epoch": 0.8132642301047767, "grad_norm": 0.48269471781443424, "learning_rate": 9.13445464996727e-06, "loss": 0.4354, "step": 9062 }, { "epoch": 0.8133539745574476, "grad_norm": 0.5019241827358275, "learning_rate": 9.134160987599265e-06, "loss": 0.482, "step": 9063 }, { "epoch": 0.8134437190101187, "grad_norm": 0.4777312553919921, "learning_rate": 9.133867280144935e-06, "loss": 0.4543, "step": 9064 }, { "epoch": 0.8135334634627897, "grad_norm": 0.4844451909335962, "learning_rate": 9.133573527607481e-06, "loss": 0.4568, "step": 9065 }, { "epoch": 0.8136232079154607, "grad_norm": 0.48109462319859564, "learning_rate": 9.133279729990111e-06, "loss": 0.4521, "step": 9066 }, { "epoch": 0.8137129523681318, "grad_norm": 0.5377385129842015, "learning_rate": 9.132985887296026e-06, "loss": 0.5318, "step": 9067 }, { "epoch": 0.8138026968208028, "grad_norm": 0.5239269100556018, "learning_rate": 9.13269199952843e-06, "loss": 0.4782, "step": 9068 }, { "epoch": 0.8138924412734738, "grad_norm": 0.4487514840800037, "learning_rate": 9.132398066690528e-06, "loss": 0.3778, "step": 9069 }, { "epoch": 0.8139821857261448, "grad_norm": 0.49783506500022257, "learning_rate": 9.132104088785527e-06, "loss": 0.4176, "step": 9070 }, { "epoch": 0.8140719301788159, "grad_norm": 0.5025166587798855, "learning_rate": 9.131810065816634e-06, "loss": 0.481, "step": 9071 }, { "epoch": 0.8141616746314868, "grad_norm": 0.4978689148213563, "learning_rate": 9.131515997787051e-06, "loss": 0.4818, "step": 9072 }, { "epoch": 0.8142514190841579, "grad_norm": 0.4872138394387006, "learning_rate": 9.131221884699992e-06, "loss": 0.4726, "step": 9073 }, { "epoch": 0.8143411635368288, "grad_norm": 0.5079708828148513, "learning_rate": 9.130927726558658e-06, "loss": 0.4549, "step": 9074 }, { "epoch": 0.8144309079894999, "grad_norm": 0.47054885002146746, "learning_rate": 9.130633523366261e-06, "loss": 0.4651, "step": 9075 }, { "epoch": 0.8145206524421709, "grad_norm": 0.4967848011510662, "learning_rate": 9.130339275126008e-06, "loss": 0.4801, "step": 9076 }, { "epoch": 0.8146103968948419, "grad_norm": 0.49712793576013703, "learning_rate": 9.130044981841108e-06, "loss": 0.4837, "step": 9077 }, { "epoch": 0.814700141347513, "grad_norm": 0.5041161918211403, "learning_rate": 9.129750643514772e-06, "loss": 0.4269, "step": 9078 }, { "epoch": 0.814789885800184, "grad_norm": 0.44696980349828025, "learning_rate": 9.12945626015021e-06, "loss": 0.3918, "step": 9079 }, { "epoch": 0.814879630252855, "grad_norm": 0.5042062230020765, "learning_rate": 9.129161831750629e-06, "loss": 0.4672, "step": 9080 }, { "epoch": 0.814969374705526, "grad_norm": 0.5535833142375733, "learning_rate": 9.128867358319243e-06, "loss": 0.5051, "step": 9081 }, { "epoch": 0.8150591191581971, "grad_norm": 0.48729239352991194, "learning_rate": 9.128572839859262e-06, "loss": 0.4352, "step": 9082 }, { "epoch": 0.815148863610868, "grad_norm": 0.5374236358609147, "learning_rate": 9.128278276373898e-06, "loss": 0.5162, "step": 9083 }, { "epoch": 0.8152386080635391, "grad_norm": 0.4987838077903219, "learning_rate": 9.127983667866366e-06, "loss": 0.4889, "step": 9084 }, { "epoch": 0.81532835251621, "grad_norm": 0.5012440891283715, "learning_rate": 9.127689014339877e-06, "loss": 0.494, "step": 9085 }, { "epoch": 0.8154180969688811, "grad_norm": 0.4720541593746817, "learning_rate": 9.127394315797644e-06, "loss": 0.4544, "step": 9086 }, { "epoch": 0.8155078414215521, "grad_norm": 0.49873394777184166, "learning_rate": 9.127099572242882e-06, "loss": 0.42, "step": 9087 }, { "epoch": 0.8155975858742232, "grad_norm": 0.4678190534488331, "learning_rate": 9.126804783678804e-06, "loss": 0.4028, "step": 9088 }, { "epoch": 0.8156873303268942, "grad_norm": 0.4858180128460391, "learning_rate": 9.126509950108627e-06, "loss": 0.478, "step": 9089 }, { "epoch": 0.8157770747795652, "grad_norm": 0.4967378804715015, "learning_rate": 9.126215071535565e-06, "loss": 0.4633, "step": 9090 }, { "epoch": 0.8158668192322363, "grad_norm": 0.4961906456883661, "learning_rate": 9.125920147962832e-06, "loss": 0.446, "step": 9091 }, { "epoch": 0.8159565636849072, "grad_norm": 0.45738529750624657, "learning_rate": 9.12562517939365e-06, "loss": 0.4503, "step": 9092 }, { "epoch": 0.8160463081375783, "grad_norm": 0.4627562971149884, "learning_rate": 9.12533016583123e-06, "loss": 0.38, "step": 9093 }, { "epoch": 0.8161360525902492, "grad_norm": 0.43203715509919716, "learning_rate": 9.125035107278792e-06, "loss": 0.3905, "step": 9094 }, { "epoch": 0.8162257970429203, "grad_norm": 0.49172613210853394, "learning_rate": 9.124740003739554e-06, "loss": 0.4733, "step": 9095 }, { "epoch": 0.8163155414955913, "grad_norm": 0.4966352561761444, "learning_rate": 9.124444855216731e-06, "loss": 0.4433, "step": 9096 }, { "epoch": 0.8164052859482623, "grad_norm": 0.5085365705986004, "learning_rate": 9.124149661713548e-06, "loss": 0.4538, "step": 9097 }, { "epoch": 0.8164950304009333, "grad_norm": 0.48398914183062236, "learning_rate": 9.12385442323322e-06, "loss": 0.4685, "step": 9098 }, { "epoch": 0.8165847748536044, "grad_norm": 0.5126882717010579, "learning_rate": 9.123559139778967e-06, "loss": 0.4401, "step": 9099 }, { "epoch": 0.8166745193062753, "grad_norm": 0.49117525315653154, "learning_rate": 9.123263811354014e-06, "loss": 0.4417, "step": 9100 }, { "epoch": 0.8167642637589464, "grad_norm": 0.44378987569567774, "learning_rate": 9.122968437961573e-06, "loss": 0.3892, "step": 9101 }, { "epoch": 0.8168540082116175, "grad_norm": 0.5170329905831815, "learning_rate": 9.122673019604873e-06, "loss": 0.4524, "step": 9102 }, { "epoch": 0.8169437526642884, "grad_norm": 0.5021153767715826, "learning_rate": 9.122377556287133e-06, "loss": 0.4686, "step": 9103 }, { "epoch": 0.8170334971169595, "grad_norm": 0.4944335438408028, "learning_rate": 9.122082048011573e-06, "loss": 0.508, "step": 9104 }, { "epoch": 0.8171232415696305, "grad_norm": 0.4929911957520078, "learning_rate": 9.12178649478142e-06, "loss": 0.4303, "step": 9105 }, { "epoch": 0.8172129860223015, "grad_norm": 0.49582473688647866, "learning_rate": 9.121490896599894e-06, "loss": 0.4241, "step": 9106 }, { "epoch": 0.8173027304749725, "grad_norm": 0.5031928818622904, "learning_rate": 9.121195253470222e-06, "loss": 0.4733, "step": 9107 }, { "epoch": 0.8173924749276436, "grad_norm": 0.5186218083430887, "learning_rate": 9.120899565395624e-06, "loss": 0.451, "step": 9108 }, { "epoch": 0.8174822193803145, "grad_norm": 0.5467072847995257, "learning_rate": 9.120603832379327e-06, "loss": 0.5035, "step": 9109 }, { "epoch": 0.8175719638329856, "grad_norm": 0.46946640673663814, "learning_rate": 9.120308054424557e-06, "loss": 0.452, "step": 9110 }, { "epoch": 0.8176617082856565, "grad_norm": 0.5123624590998712, "learning_rate": 9.120012231534539e-06, "loss": 0.4805, "step": 9111 }, { "epoch": 0.8177514527383276, "grad_norm": 0.5146227253339092, "learning_rate": 9.119716363712499e-06, "loss": 0.4691, "step": 9112 }, { "epoch": 0.8178411971909987, "grad_norm": 0.4562044800285872, "learning_rate": 9.119420450961662e-06, "loss": 0.4218, "step": 9113 }, { "epoch": 0.8179309416436696, "grad_norm": 0.5161959147314894, "learning_rate": 9.119124493285259e-06, "loss": 0.4798, "step": 9114 }, { "epoch": 0.8180206860963407, "grad_norm": 0.5145928293168384, "learning_rate": 9.118828490686513e-06, "loss": 0.5012, "step": 9115 }, { "epoch": 0.8181104305490117, "grad_norm": 0.5077856634873181, "learning_rate": 9.118532443168658e-06, "loss": 0.4539, "step": 9116 }, { "epoch": 0.8182001750016827, "grad_norm": 0.49464375799933125, "learning_rate": 9.118236350734916e-06, "loss": 0.451, "step": 9117 }, { "epoch": 0.8182899194543537, "grad_norm": 0.4774756647660947, "learning_rate": 9.11794021338852e-06, "loss": 0.4599, "step": 9118 }, { "epoch": 0.8183796639070248, "grad_norm": 0.4968248123620218, "learning_rate": 9.1176440311327e-06, "loss": 0.4353, "step": 9119 }, { "epoch": 0.8184694083596957, "grad_norm": 0.5376594323601349, "learning_rate": 9.117347803970687e-06, "loss": 0.4988, "step": 9120 }, { "epoch": 0.8185591528123668, "grad_norm": 0.4881721685328869, "learning_rate": 9.117051531905707e-06, "loss": 0.4415, "step": 9121 }, { "epoch": 0.8186488972650378, "grad_norm": 0.43628658768961553, "learning_rate": 9.116755214940993e-06, "loss": 0.388, "step": 9122 }, { "epoch": 0.8187386417177088, "grad_norm": 0.49808132565325647, "learning_rate": 9.11645885307978e-06, "loss": 0.4597, "step": 9123 }, { "epoch": 0.8188283861703799, "grad_norm": 0.5149818171014358, "learning_rate": 9.116162446325296e-06, "loss": 0.5004, "step": 9124 }, { "epoch": 0.8189181306230509, "grad_norm": 0.4985990849852574, "learning_rate": 9.115865994680774e-06, "loss": 0.4726, "step": 9125 }, { "epoch": 0.8190078750757219, "grad_norm": 0.46981475761718533, "learning_rate": 9.115569498149451e-06, "loss": 0.3957, "step": 9126 }, { "epoch": 0.8190976195283929, "grad_norm": 0.48339931581166834, "learning_rate": 9.115272956734555e-06, "loss": 0.4277, "step": 9127 }, { "epoch": 0.819187363981064, "grad_norm": 0.473111501316053, "learning_rate": 9.114976370439323e-06, "loss": 0.4657, "step": 9128 }, { "epoch": 0.8192771084337349, "grad_norm": 0.45202679385667976, "learning_rate": 9.114679739266989e-06, "loss": 0.4294, "step": 9129 }, { "epoch": 0.819366852886406, "grad_norm": 0.42827190021262607, "learning_rate": 9.114383063220788e-06, "loss": 0.36, "step": 9130 }, { "epoch": 0.8194565973390769, "grad_norm": 0.4727190702221528, "learning_rate": 9.114086342303956e-06, "loss": 0.4075, "step": 9131 }, { "epoch": 0.819546341791748, "grad_norm": 0.449116364961071, "learning_rate": 9.113789576519729e-06, "loss": 0.415, "step": 9132 }, { "epoch": 0.819636086244419, "grad_norm": 0.48794948416235273, "learning_rate": 9.113492765871343e-06, "loss": 0.4508, "step": 9133 }, { "epoch": 0.81972583069709, "grad_norm": 0.5144338100396786, "learning_rate": 9.113195910362034e-06, "loss": 0.4835, "step": 9134 }, { "epoch": 0.819815575149761, "grad_norm": 0.5332988559460682, "learning_rate": 9.11289900999504e-06, "loss": 0.5213, "step": 9135 }, { "epoch": 0.8199053196024321, "grad_norm": 0.4744321265213312, "learning_rate": 9.1126020647736e-06, "loss": 0.421, "step": 9136 }, { "epoch": 0.8199950640551031, "grad_norm": 0.5375269870987779, "learning_rate": 9.112305074700953e-06, "loss": 0.5686, "step": 9137 }, { "epoch": 0.8200848085077741, "grad_norm": 0.47463267897388833, "learning_rate": 9.112008039780335e-06, "loss": 0.421, "step": 9138 }, { "epoch": 0.8201745529604452, "grad_norm": 0.5235819764690348, "learning_rate": 9.111710960014988e-06, "loss": 0.4662, "step": 9139 }, { "epoch": 0.8202642974131161, "grad_norm": 0.5350471199627955, "learning_rate": 9.111413835408151e-06, "loss": 0.4925, "step": 9140 }, { "epoch": 0.8203540418657872, "grad_norm": 0.5079353434034823, "learning_rate": 9.111116665963065e-06, "loss": 0.4364, "step": 9141 }, { "epoch": 0.8204437863184582, "grad_norm": 0.5052702820440715, "learning_rate": 9.110819451682968e-06, "loss": 0.4955, "step": 9142 }, { "epoch": 0.8205335307711292, "grad_norm": 0.4769827714986608, "learning_rate": 9.110522192571105e-06, "loss": 0.5022, "step": 9143 }, { "epoch": 0.8206232752238002, "grad_norm": 0.4929832031777708, "learning_rate": 9.110224888630717e-06, "loss": 0.441, "step": 9144 }, { "epoch": 0.8207130196764713, "grad_norm": 0.5077482496159282, "learning_rate": 9.109927539865044e-06, "loss": 0.4771, "step": 9145 }, { "epoch": 0.8208027641291422, "grad_norm": 0.5160382439692855, "learning_rate": 9.109630146277334e-06, "loss": 0.4831, "step": 9146 }, { "epoch": 0.8208925085818133, "grad_norm": 0.4676832007822314, "learning_rate": 9.109332707870823e-06, "loss": 0.4314, "step": 9147 }, { "epoch": 0.8209822530344844, "grad_norm": 0.5153654340328966, "learning_rate": 9.109035224648761e-06, "loss": 0.4474, "step": 9148 }, { "epoch": 0.8210719974871553, "grad_norm": 0.47855647765384524, "learning_rate": 9.10873769661439e-06, "loss": 0.4603, "step": 9149 }, { "epoch": 0.8211617419398264, "grad_norm": 0.4431134910208824, "learning_rate": 9.108440123770955e-06, "loss": 0.3889, "step": 9150 }, { "epoch": 0.8212514863924973, "grad_norm": 0.5061541495566453, "learning_rate": 9.108142506121698e-06, "loss": 0.4617, "step": 9151 }, { "epoch": 0.8213412308451684, "grad_norm": 0.45614202234230933, "learning_rate": 9.10784484366987e-06, "loss": 0.3975, "step": 9152 }, { "epoch": 0.8214309752978394, "grad_norm": 0.45889677906641296, "learning_rate": 9.107547136418716e-06, "loss": 0.3917, "step": 9153 }, { "epoch": 0.8215207197505104, "grad_norm": 0.48872315638706193, "learning_rate": 9.107249384371482e-06, "loss": 0.4832, "step": 9154 }, { "epoch": 0.8216104642031814, "grad_norm": 0.4749877486431807, "learning_rate": 9.106951587531412e-06, "loss": 0.4548, "step": 9155 }, { "epoch": 0.8217002086558525, "grad_norm": 0.44914849279743413, "learning_rate": 9.10665374590176e-06, "loss": 0.4162, "step": 9156 }, { "epoch": 0.8217899531085234, "grad_norm": 0.4989079403124631, "learning_rate": 9.10635585948577e-06, "loss": 0.479, "step": 9157 }, { "epoch": 0.8218796975611945, "grad_norm": 0.47133968386809627, "learning_rate": 9.106057928286692e-06, "loss": 0.4235, "step": 9158 }, { "epoch": 0.8219694420138656, "grad_norm": 0.4236215556233169, "learning_rate": 9.105759952307773e-06, "loss": 0.3922, "step": 9159 }, { "epoch": 0.8220591864665365, "grad_norm": 0.48547888661428185, "learning_rate": 9.105461931552266e-06, "loss": 0.463, "step": 9160 }, { "epoch": 0.8221489309192076, "grad_norm": 0.4985285381137512, "learning_rate": 9.105163866023418e-06, "loss": 0.4657, "step": 9161 }, { "epoch": 0.8222386753718786, "grad_norm": 0.43681419579702124, "learning_rate": 9.104865755724484e-06, "loss": 0.3624, "step": 9162 }, { "epoch": 0.8223284198245496, "grad_norm": 0.47474848200542663, "learning_rate": 9.10456760065871e-06, "loss": 0.4627, "step": 9163 }, { "epoch": 0.8224181642772206, "grad_norm": 0.46385313143240525, "learning_rate": 9.10426940082935e-06, "loss": 0.4317, "step": 9164 }, { "epoch": 0.8225079087298917, "grad_norm": 0.5088865317457566, "learning_rate": 9.103971156239657e-06, "loss": 0.4337, "step": 9165 }, { "epoch": 0.8225976531825626, "grad_norm": 0.46561281883211986, "learning_rate": 9.103672866892884e-06, "loss": 0.4088, "step": 9166 }, { "epoch": 0.8226873976352337, "grad_norm": 0.4493172321422973, "learning_rate": 9.103374532792281e-06, "loss": 0.3767, "step": 9167 }, { "epoch": 0.8227771420879046, "grad_norm": 0.4612947713811487, "learning_rate": 9.103076153941103e-06, "loss": 0.4272, "step": 9168 }, { "epoch": 0.8228668865405757, "grad_norm": 0.44958171326678426, "learning_rate": 9.102777730342606e-06, "loss": 0.44, "step": 9169 }, { "epoch": 0.8229566309932468, "grad_norm": 0.5577607287091035, "learning_rate": 9.102479262000042e-06, "loss": 0.501, "step": 9170 }, { "epoch": 0.8230463754459177, "grad_norm": 0.4474887054212144, "learning_rate": 9.102180748916667e-06, "loss": 0.4205, "step": 9171 }, { "epoch": 0.8231361198985888, "grad_norm": 0.5362112994085783, "learning_rate": 9.101882191095738e-06, "loss": 0.478, "step": 9172 }, { "epoch": 0.8232258643512598, "grad_norm": 0.5166928823959079, "learning_rate": 9.101583588540507e-06, "loss": 0.4409, "step": 9173 }, { "epoch": 0.8233156088039308, "grad_norm": 0.45970691906097877, "learning_rate": 9.101284941254236e-06, "loss": 0.3631, "step": 9174 }, { "epoch": 0.8234053532566018, "grad_norm": 0.43179821610684654, "learning_rate": 9.100986249240176e-06, "loss": 0.3539, "step": 9175 }, { "epoch": 0.8234950977092729, "grad_norm": 0.4423244248812099, "learning_rate": 9.10068751250159e-06, "loss": 0.3844, "step": 9176 }, { "epoch": 0.8235848421619438, "grad_norm": 0.4680108067007517, "learning_rate": 9.100388731041733e-06, "loss": 0.3981, "step": 9177 }, { "epoch": 0.8236745866146149, "grad_norm": 0.48852866628645414, "learning_rate": 9.100089904863862e-06, "loss": 0.4875, "step": 9178 }, { "epoch": 0.8237643310672859, "grad_norm": 0.48703187737323816, "learning_rate": 9.09979103397124e-06, "loss": 0.4365, "step": 9179 }, { "epoch": 0.8238540755199569, "grad_norm": 0.5079203973467785, "learning_rate": 9.099492118367123e-06, "loss": 0.4503, "step": 9180 }, { "epoch": 0.8239438199726279, "grad_norm": 0.5229864899373861, "learning_rate": 9.099193158054773e-06, "loss": 0.4769, "step": 9181 }, { "epoch": 0.824033564425299, "grad_norm": 0.5166552941613424, "learning_rate": 9.09889415303745e-06, "loss": 0.4653, "step": 9182 }, { "epoch": 0.82412330887797, "grad_norm": 0.47296591873833455, "learning_rate": 9.098595103318413e-06, "loss": 0.436, "step": 9183 }, { "epoch": 0.824213053330641, "grad_norm": 0.4876012748685486, "learning_rate": 9.098296008900927e-06, "loss": 0.4968, "step": 9184 }, { "epoch": 0.8243027977833121, "grad_norm": 0.46460760843435533, "learning_rate": 9.097996869788251e-06, "loss": 0.4216, "step": 9185 }, { "epoch": 0.824392542235983, "grad_norm": 0.4908863418715205, "learning_rate": 9.097697685983647e-06, "loss": 0.4503, "step": 9186 }, { "epoch": 0.8244822866886541, "grad_norm": 0.5158095136519012, "learning_rate": 9.097398457490379e-06, "loss": 0.4454, "step": 9187 }, { "epoch": 0.824572031141325, "grad_norm": 0.5310270816765991, "learning_rate": 9.09709918431171e-06, "loss": 0.4885, "step": 9188 }, { "epoch": 0.8246617755939961, "grad_norm": 0.48415445347719765, "learning_rate": 9.096799866450904e-06, "loss": 0.4319, "step": 9189 }, { "epoch": 0.8247515200466671, "grad_norm": 0.5045756621476034, "learning_rate": 9.096500503911227e-06, "loss": 0.4874, "step": 9190 }, { "epoch": 0.8248412644993381, "grad_norm": 0.4983977406550519, "learning_rate": 9.096201096695942e-06, "loss": 0.4684, "step": 9191 }, { "epoch": 0.8249310089520091, "grad_norm": 0.5163576590651453, "learning_rate": 9.095901644808313e-06, "loss": 0.4767, "step": 9192 }, { "epoch": 0.8250207534046802, "grad_norm": 0.5351276369005238, "learning_rate": 9.095602148251608e-06, "loss": 0.4876, "step": 9193 }, { "epoch": 0.8251104978573512, "grad_norm": 0.456024843801505, "learning_rate": 9.095302607029094e-06, "loss": 0.3853, "step": 9194 }, { "epoch": 0.8252002423100222, "grad_norm": 0.5092276023427779, "learning_rate": 9.095003021144033e-06, "loss": 0.4334, "step": 9195 }, { "epoch": 0.8252899867626933, "grad_norm": 0.4802709793108564, "learning_rate": 9.094703390599697e-06, "loss": 0.4102, "step": 9196 }, { "epoch": 0.8253797312153642, "grad_norm": 0.5437139541435895, "learning_rate": 9.094403715399352e-06, "loss": 0.4939, "step": 9197 }, { "epoch": 0.8254694756680353, "grad_norm": 0.460137823097197, "learning_rate": 9.094103995546266e-06, "loss": 0.422, "step": 9198 }, { "epoch": 0.8255592201207063, "grad_norm": 0.4955198759347692, "learning_rate": 9.093804231043708e-06, "loss": 0.4905, "step": 9199 }, { "epoch": 0.8256489645733773, "grad_norm": 0.522371688322484, "learning_rate": 9.093504421894947e-06, "loss": 0.4908, "step": 9200 }, { "epoch": 0.8257387090260483, "grad_norm": 0.49893950656611696, "learning_rate": 9.093204568103255e-06, "loss": 0.4068, "step": 9201 }, { "epoch": 0.8258284534787194, "grad_norm": 0.4741589166655917, "learning_rate": 9.092904669671898e-06, "loss": 0.4369, "step": 9202 }, { "epoch": 0.8259181979313903, "grad_norm": 0.4835047408045518, "learning_rate": 9.092604726604148e-06, "loss": 0.4403, "step": 9203 }, { "epoch": 0.8260079423840614, "grad_norm": 0.5223788033067537, "learning_rate": 9.092304738903278e-06, "loss": 0.4792, "step": 9204 }, { "epoch": 0.8260976868367325, "grad_norm": 0.5107661676609003, "learning_rate": 9.092004706572556e-06, "loss": 0.4829, "step": 9205 }, { "epoch": 0.8261874312894034, "grad_norm": 0.537883788342876, "learning_rate": 9.091704629615259e-06, "loss": 0.5555, "step": 9206 }, { "epoch": 0.8262771757420745, "grad_norm": 0.46594490728082816, "learning_rate": 9.091404508034656e-06, "loss": 0.4498, "step": 9207 }, { "epoch": 0.8263669201947454, "grad_norm": 0.4747241158920996, "learning_rate": 9.091104341834023e-06, "loss": 0.4032, "step": 9208 }, { "epoch": 0.8264566646474165, "grad_norm": 0.5423335094262903, "learning_rate": 9.090804131016629e-06, "loss": 0.5319, "step": 9209 }, { "epoch": 0.8265464091000875, "grad_norm": 0.5302509094219257, "learning_rate": 9.090503875585752e-06, "loss": 0.415, "step": 9210 }, { "epoch": 0.8266361535527585, "grad_norm": 0.47597811128795603, "learning_rate": 9.090203575544664e-06, "loss": 0.3874, "step": 9211 }, { "epoch": 0.8267258980054295, "grad_norm": 0.5123029013606261, "learning_rate": 9.089903230896642e-06, "loss": 0.4923, "step": 9212 }, { "epoch": 0.8268156424581006, "grad_norm": 0.5035400760361511, "learning_rate": 9.08960284164496e-06, "loss": 0.4949, "step": 9213 }, { "epoch": 0.8269053869107715, "grad_norm": 0.5505634226867768, "learning_rate": 9.089302407792894e-06, "loss": 0.5472, "step": 9214 }, { "epoch": 0.8269951313634426, "grad_norm": 0.4770765989763996, "learning_rate": 9.089001929343725e-06, "loss": 0.4208, "step": 9215 }, { "epoch": 0.8270848758161136, "grad_norm": 0.5024427100407765, "learning_rate": 9.088701406300721e-06, "loss": 0.4555, "step": 9216 }, { "epoch": 0.8271746202687846, "grad_norm": 0.5163136931885549, "learning_rate": 9.088400838667168e-06, "loss": 0.4809, "step": 9217 }, { "epoch": 0.8272643647214557, "grad_norm": 0.4646987740232964, "learning_rate": 9.088100226446339e-06, "loss": 0.4121, "step": 9218 }, { "epoch": 0.8273541091741267, "grad_norm": 0.48163344723837626, "learning_rate": 9.087799569641514e-06, "loss": 0.4126, "step": 9219 }, { "epoch": 0.8274438536267977, "grad_norm": 0.5013103666376456, "learning_rate": 9.087498868255973e-06, "loss": 0.4947, "step": 9220 }, { "epoch": 0.8275335980794687, "grad_norm": 0.46863829661625256, "learning_rate": 9.087198122292994e-06, "loss": 0.4457, "step": 9221 }, { "epoch": 0.8276233425321398, "grad_norm": 0.47570086777713566, "learning_rate": 9.086897331755856e-06, "loss": 0.4392, "step": 9222 }, { "epoch": 0.8277130869848107, "grad_norm": 0.5348161091442905, "learning_rate": 9.086596496647839e-06, "loss": 0.4992, "step": 9223 }, { "epoch": 0.8278028314374818, "grad_norm": 0.4739082137204844, "learning_rate": 9.086295616972227e-06, "loss": 0.4462, "step": 9224 }, { "epoch": 0.8278925758901527, "grad_norm": 0.5043305767978391, "learning_rate": 9.085994692732299e-06, "loss": 0.4998, "step": 9225 }, { "epoch": 0.8279823203428238, "grad_norm": 0.46054139410182054, "learning_rate": 9.08569372393134e-06, "loss": 0.413, "step": 9226 }, { "epoch": 0.8280720647954948, "grad_norm": 0.44535206966887914, "learning_rate": 9.085392710572627e-06, "loss": 0.4087, "step": 9227 }, { "epoch": 0.8281618092481658, "grad_norm": 0.4769011530094262, "learning_rate": 9.085091652659446e-06, "loss": 0.4154, "step": 9228 }, { "epoch": 0.8282515537008369, "grad_norm": 0.5338745567259227, "learning_rate": 9.08479055019508e-06, "loss": 0.478, "step": 9229 }, { "epoch": 0.8283412981535079, "grad_norm": 0.5292542273261522, "learning_rate": 9.084489403182812e-06, "loss": 0.5538, "step": 9230 }, { "epoch": 0.828431042606179, "grad_norm": 0.4397192219981967, "learning_rate": 9.084188211625926e-06, "loss": 0.4084, "step": 9231 }, { "epoch": 0.8285207870588499, "grad_norm": 0.488193380997599, "learning_rate": 9.08388697552771e-06, "loss": 0.4363, "step": 9232 }, { "epoch": 0.828610531511521, "grad_norm": 0.528389747730212, "learning_rate": 9.083585694891445e-06, "loss": 0.5289, "step": 9233 }, { "epoch": 0.8287002759641919, "grad_norm": 0.4774965122449751, "learning_rate": 9.083284369720418e-06, "loss": 0.4628, "step": 9234 }, { "epoch": 0.828790020416863, "grad_norm": 0.511237052652694, "learning_rate": 9.082983000017918e-06, "loss": 0.4653, "step": 9235 }, { "epoch": 0.828879764869534, "grad_norm": 0.46829559024095924, "learning_rate": 9.082681585787227e-06, "loss": 0.4296, "step": 9236 }, { "epoch": 0.828969509322205, "grad_norm": 0.46477557911336975, "learning_rate": 9.082380127031635e-06, "loss": 0.4388, "step": 9237 }, { "epoch": 0.829059253774876, "grad_norm": 0.4825361559733523, "learning_rate": 9.08207862375443e-06, "loss": 0.4569, "step": 9238 }, { "epoch": 0.8291489982275471, "grad_norm": 0.44449919512892383, "learning_rate": 9.081777075958898e-06, "loss": 0.3918, "step": 9239 }, { "epoch": 0.8292387426802181, "grad_norm": 0.5249645806846889, "learning_rate": 9.08147548364833e-06, "loss": 0.4788, "step": 9240 }, { "epoch": 0.8293284871328891, "grad_norm": 0.4742308360643656, "learning_rate": 9.081173846826014e-06, "loss": 0.4641, "step": 9241 }, { "epoch": 0.8294182315855602, "grad_norm": 0.5143225851877714, "learning_rate": 9.080872165495238e-06, "loss": 0.4651, "step": 9242 }, { "epoch": 0.8295079760382311, "grad_norm": 0.46783419255388115, "learning_rate": 9.080570439659296e-06, "loss": 0.4093, "step": 9243 }, { "epoch": 0.8295977204909022, "grad_norm": 0.4955248703753228, "learning_rate": 9.080268669321475e-06, "loss": 0.4419, "step": 9244 }, { "epoch": 0.8296874649435731, "grad_norm": 0.4856219474226627, "learning_rate": 9.079966854485067e-06, "loss": 0.4128, "step": 9245 }, { "epoch": 0.8297772093962442, "grad_norm": 0.483071470355502, "learning_rate": 9.079664995153364e-06, "loss": 0.4411, "step": 9246 }, { "epoch": 0.8298669538489152, "grad_norm": 0.46123137685044696, "learning_rate": 9.079363091329657e-06, "loss": 0.3953, "step": 9247 }, { "epoch": 0.8299566983015862, "grad_norm": 0.48449178948114147, "learning_rate": 9.07906114301724e-06, "loss": 0.3724, "step": 9248 }, { "epoch": 0.8300464427542572, "grad_norm": 0.5249770694252797, "learning_rate": 9.078759150219406e-06, "loss": 0.5207, "step": 9249 }, { "epoch": 0.8301361872069283, "grad_norm": 0.5001543631351468, "learning_rate": 9.078457112939447e-06, "loss": 0.4739, "step": 9250 }, { "epoch": 0.8302259316595992, "grad_norm": 0.5045639072176128, "learning_rate": 9.078155031180658e-06, "loss": 0.4714, "step": 9251 }, { "epoch": 0.8303156761122703, "grad_norm": 0.47995736387864946, "learning_rate": 9.077852904946333e-06, "loss": 0.4099, "step": 9252 }, { "epoch": 0.8304054205649414, "grad_norm": 0.4866624707172475, "learning_rate": 9.077550734239769e-06, "loss": 0.4601, "step": 9253 }, { "epoch": 0.8304951650176123, "grad_norm": 0.4384946506836324, "learning_rate": 9.077248519064256e-06, "loss": 0.4, "step": 9254 }, { "epoch": 0.8305849094702834, "grad_norm": 0.4976086723118739, "learning_rate": 9.076946259423097e-06, "loss": 0.4428, "step": 9255 }, { "epoch": 0.8306746539229544, "grad_norm": 0.5266337601308486, "learning_rate": 9.076643955319582e-06, "loss": 0.4972, "step": 9256 }, { "epoch": 0.8307643983756254, "grad_norm": 0.5003626320211305, "learning_rate": 9.07634160675701e-06, "loss": 0.4394, "step": 9257 }, { "epoch": 0.8308541428282964, "grad_norm": 0.48894387654806587, "learning_rate": 9.076039213738682e-06, "loss": 0.4456, "step": 9258 }, { "epoch": 0.8309438872809675, "grad_norm": 0.4697333581258631, "learning_rate": 9.075736776267892e-06, "loss": 0.4256, "step": 9259 }, { "epoch": 0.8310336317336384, "grad_norm": 0.4680245108007263, "learning_rate": 9.075434294347938e-06, "loss": 0.4182, "step": 9260 }, { "epoch": 0.8311233761863095, "grad_norm": 0.48750102588978234, "learning_rate": 9.07513176798212e-06, "loss": 0.4943, "step": 9261 }, { "epoch": 0.8312131206389805, "grad_norm": 0.5078569992371521, "learning_rate": 9.074829197173738e-06, "loss": 0.4845, "step": 9262 }, { "epoch": 0.8313028650916515, "grad_norm": 0.5140235543998078, "learning_rate": 9.07452658192609e-06, "loss": 0.5356, "step": 9263 }, { "epoch": 0.8313926095443226, "grad_norm": 0.483292829314227, "learning_rate": 9.07422392224248e-06, "loss": 0.4804, "step": 9264 }, { "epoch": 0.8314823539969936, "grad_norm": 0.4999468692454306, "learning_rate": 9.073921218126204e-06, "loss": 0.4512, "step": 9265 }, { "epoch": 0.8315720984496646, "grad_norm": 0.5062259985134338, "learning_rate": 9.073618469580565e-06, "loss": 0.4916, "step": 9266 }, { "epoch": 0.8316618429023356, "grad_norm": 0.4799591541907041, "learning_rate": 9.073315676608866e-06, "loss": 0.4355, "step": 9267 }, { "epoch": 0.8317515873550066, "grad_norm": 0.4935635011703075, "learning_rate": 9.073012839214408e-06, "loss": 0.4138, "step": 9268 }, { "epoch": 0.8318413318076776, "grad_norm": 0.44162757977804284, "learning_rate": 9.072709957400493e-06, "loss": 0.3539, "step": 9269 }, { "epoch": 0.8319310762603487, "grad_norm": 0.5235469008004184, "learning_rate": 9.072407031170427e-06, "loss": 0.4529, "step": 9270 }, { "epoch": 0.8320208207130196, "grad_norm": 0.45498186023147985, "learning_rate": 9.072104060527509e-06, "loss": 0.3948, "step": 9271 }, { "epoch": 0.8321105651656907, "grad_norm": 0.465774851091279, "learning_rate": 9.071801045475047e-06, "loss": 0.3836, "step": 9272 }, { "epoch": 0.8322003096183617, "grad_norm": 0.47896020920287063, "learning_rate": 9.071497986016344e-06, "loss": 0.4434, "step": 9273 }, { "epoch": 0.8322900540710327, "grad_norm": 0.45007614310591726, "learning_rate": 9.071194882154708e-06, "loss": 0.4246, "step": 9274 }, { "epoch": 0.8323797985237038, "grad_norm": 0.4937614886905436, "learning_rate": 9.07089173389344e-06, "loss": 0.4364, "step": 9275 }, { "epoch": 0.8324695429763748, "grad_norm": 0.5324212690651038, "learning_rate": 9.070588541235849e-06, "loss": 0.5106, "step": 9276 }, { "epoch": 0.8325592874290458, "grad_norm": 0.4634820836697501, "learning_rate": 9.07028530418524e-06, "loss": 0.4319, "step": 9277 }, { "epoch": 0.8326490318817168, "grad_norm": 0.5276047670209537, "learning_rate": 9.06998202274492e-06, "loss": 0.5072, "step": 9278 }, { "epoch": 0.8327387763343879, "grad_norm": 0.48513239212394665, "learning_rate": 9.0696786969182e-06, "loss": 0.4793, "step": 9279 }, { "epoch": 0.8328285207870588, "grad_norm": 0.5435676193085102, "learning_rate": 9.069375326708383e-06, "loss": 0.5101, "step": 9280 }, { "epoch": 0.8329182652397299, "grad_norm": 0.48174904734216845, "learning_rate": 9.069071912118781e-06, "loss": 0.4734, "step": 9281 }, { "epoch": 0.8330080096924009, "grad_norm": 0.43114899332129103, "learning_rate": 9.0687684531527e-06, "loss": 0.3636, "step": 9282 }, { "epoch": 0.8330977541450719, "grad_norm": 0.5011310690094394, "learning_rate": 9.068464949813454e-06, "loss": 0.4498, "step": 9283 }, { "epoch": 0.8331874985977429, "grad_norm": 0.4995928864858982, "learning_rate": 9.068161402104349e-06, "loss": 0.4489, "step": 9284 }, { "epoch": 0.833277243050414, "grad_norm": 0.5024188250796158, "learning_rate": 9.067857810028698e-06, "loss": 0.4817, "step": 9285 }, { "epoch": 0.8333669875030849, "grad_norm": 0.501224784562433, "learning_rate": 9.06755417358981e-06, "loss": 0.4895, "step": 9286 }, { "epoch": 0.833456731955756, "grad_norm": 0.5274963904358385, "learning_rate": 9.067250492790996e-06, "loss": 0.4802, "step": 9287 }, { "epoch": 0.833546476408427, "grad_norm": 0.5052424946905498, "learning_rate": 9.066946767635569e-06, "loss": 0.3783, "step": 9288 }, { "epoch": 0.833636220861098, "grad_norm": 0.4894685297898968, "learning_rate": 9.06664299812684e-06, "loss": 0.459, "step": 9289 }, { "epoch": 0.8337259653137691, "grad_norm": 0.4526408730697142, "learning_rate": 9.066339184268126e-06, "loss": 0.4176, "step": 9290 }, { "epoch": 0.83381570976644, "grad_norm": 0.4321277379285607, "learning_rate": 9.066035326062736e-06, "loss": 0.3652, "step": 9291 }, { "epoch": 0.8339054542191111, "grad_norm": 0.4553234072642098, "learning_rate": 9.065731423513985e-06, "loss": 0.4566, "step": 9292 }, { "epoch": 0.8339951986717821, "grad_norm": 0.4443840232570471, "learning_rate": 9.065427476625187e-06, "loss": 0.4334, "step": 9293 }, { "epoch": 0.8340849431244531, "grad_norm": 0.5439258768079587, "learning_rate": 9.065123485399659e-06, "loss": 0.4598, "step": 9294 }, { "epoch": 0.8341746875771241, "grad_norm": 0.48810132971195125, "learning_rate": 9.064819449840713e-06, "loss": 0.4604, "step": 9295 }, { "epoch": 0.8342644320297952, "grad_norm": 0.4932322277067251, "learning_rate": 9.064515369951667e-06, "loss": 0.4534, "step": 9296 }, { "epoch": 0.8343541764824661, "grad_norm": 0.464361853627161, "learning_rate": 9.064211245735837e-06, "loss": 0.4328, "step": 9297 }, { "epoch": 0.8344439209351372, "grad_norm": 0.49881819888374807, "learning_rate": 9.063907077196539e-06, "loss": 0.4676, "step": 9298 }, { "epoch": 0.8345336653878083, "grad_norm": 0.46714718142982675, "learning_rate": 9.06360286433709e-06, "loss": 0.406, "step": 9299 }, { "epoch": 0.8346234098404792, "grad_norm": 0.5253765091892251, "learning_rate": 9.063298607160809e-06, "loss": 0.5009, "step": 9300 }, { "epoch": 0.8347131542931503, "grad_norm": 0.4475132186131584, "learning_rate": 9.06299430567101e-06, "loss": 0.4225, "step": 9301 }, { "epoch": 0.8348028987458213, "grad_norm": 0.5011161748945894, "learning_rate": 9.06268995987102e-06, "loss": 0.5228, "step": 9302 }, { "epoch": 0.8348926431984923, "grad_norm": 0.42982317862498903, "learning_rate": 9.062385569764151e-06, "loss": 0.4061, "step": 9303 }, { "epoch": 0.8349823876511633, "grad_norm": 0.4589633666939294, "learning_rate": 9.062081135353724e-06, "loss": 0.4066, "step": 9304 }, { "epoch": 0.8350721321038344, "grad_norm": 0.4672413064005812, "learning_rate": 9.06177665664306e-06, "loss": 0.4296, "step": 9305 }, { "epoch": 0.8351618765565053, "grad_norm": 0.49383467346914744, "learning_rate": 9.06147213363548e-06, "loss": 0.487, "step": 9306 }, { "epoch": 0.8352516210091764, "grad_norm": 0.47190174258319645, "learning_rate": 9.061167566334304e-06, "loss": 0.431, "step": 9307 }, { "epoch": 0.8353413654618473, "grad_norm": 0.44082684512851394, "learning_rate": 9.060862954742856e-06, "loss": 0.3689, "step": 9308 }, { "epoch": 0.8354311099145184, "grad_norm": 0.5128895618564069, "learning_rate": 9.060558298864453e-06, "loss": 0.4791, "step": 9309 }, { "epoch": 0.8355208543671895, "grad_norm": 0.5016889890317809, "learning_rate": 9.060253598702421e-06, "loss": 0.476, "step": 9310 }, { "epoch": 0.8356105988198604, "grad_norm": 0.4667804790549151, "learning_rate": 9.059948854260086e-06, "loss": 0.4187, "step": 9311 }, { "epoch": 0.8357003432725315, "grad_norm": 0.4638616339839323, "learning_rate": 9.059644065540764e-06, "loss": 0.4339, "step": 9312 }, { "epoch": 0.8357900877252025, "grad_norm": 0.4693618367733097, "learning_rate": 9.059339232547786e-06, "loss": 0.4586, "step": 9313 }, { "epoch": 0.8358798321778735, "grad_norm": 0.47163426017819554, "learning_rate": 9.05903435528447e-06, "loss": 0.3825, "step": 9314 }, { "epoch": 0.8359695766305445, "grad_norm": 0.5097596406415935, "learning_rate": 9.058729433754147e-06, "loss": 0.5138, "step": 9315 }, { "epoch": 0.8360593210832156, "grad_norm": 0.47854203854243216, "learning_rate": 9.05842446796014e-06, "loss": 0.4494, "step": 9316 }, { "epoch": 0.8361490655358865, "grad_norm": 0.45731056481073656, "learning_rate": 9.058119457905773e-06, "loss": 0.4094, "step": 9317 }, { "epoch": 0.8362388099885576, "grad_norm": 0.5212521851749075, "learning_rate": 9.057814403594375e-06, "loss": 0.4889, "step": 9318 }, { "epoch": 0.8363285544412286, "grad_norm": 0.5022760599065251, "learning_rate": 9.057509305029272e-06, "loss": 0.4883, "step": 9319 }, { "epoch": 0.8364182988938996, "grad_norm": 0.5224666337585164, "learning_rate": 9.057204162213794e-06, "loss": 0.4831, "step": 9320 }, { "epoch": 0.8365080433465707, "grad_norm": 0.4648562432797913, "learning_rate": 9.056898975151262e-06, "loss": 0.4211, "step": 9321 }, { "epoch": 0.8365977877992417, "grad_norm": 0.5051285441155601, "learning_rate": 9.05659374384501e-06, "loss": 0.4496, "step": 9322 }, { "epoch": 0.8366875322519127, "grad_norm": 0.5043983046084545, "learning_rate": 9.056288468298366e-06, "loss": 0.4971, "step": 9323 }, { "epoch": 0.8367772767045837, "grad_norm": 0.5017092093650142, "learning_rate": 9.055983148514657e-06, "loss": 0.4685, "step": 9324 }, { "epoch": 0.8368670211572548, "grad_norm": 0.48292821523571333, "learning_rate": 9.055677784497215e-06, "loss": 0.4419, "step": 9325 }, { "epoch": 0.8369567656099257, "grad_norm": 0.4934912466919694, "learning_rate": 9.05537237624937e-06, "loss": 0.4345, "step": 9326 }, { "epoch": 0.8370465100625968, "grad_norm": 0.491718610854391, "learning_rate": 9.055066923774452e-06, "loss": 0.4174, "step": 9327 }, { "epoch": 0.8371362545152677, "grad_norm": 0.5195923282844749, "learning_rate": 9.054761427075791e-06, "loss": 0.4423, "step": 9328 }, { "epoch": 0.8372259989679388, "grad_norm": 0.5351081359521116, "learning_rate": 9.054455886156721e-06, "loss": 0.5182, "step": 9329 }, { "epoch": 0.8373157434206098, "grad_norm": 0.4954327095749197, "learning_rate": 9.054150301020575e-06, "loss": 0.4592, "step": 9330 }, { "epoch": 0.8374054878732808, "grad_norm": 0.5010753426906367, "learning_rate": 9.053844671670681e-06, "loss": 0.5051, "step": 9331 }, { "epoch": 0.8374952323259518, "grad_norm": 0.490390774230572, "learning_rate": 9.053538998110377e-06, "loss": 0.4631, "step": 9332 }, { "epoch": 0.8375849767786229, "grad_norm": 0.48140288470056053, "learning_rate": 9.053233280342993e-06, "loss": 0.4368, "step": 9333 }, { "epoch": 0.8376747212312939, "grad_norm": 0.47206047652806205, "learning_rate": 9.052927518371866e-06, "loss": 0.4437, "step": 9334 }, { "epoch": 0.8377644656839649, "grad_norm": 0.5082115737093572, "learning_rate": 9.05262171220033e-06, "loss": 0.4982, "step": 9335 }, { "epoch": 0.837854210136636, "grad_norm": 0.4525806643045199, "learning_rate": 9.052315861831718e-06, "loss": 0.3878, "step": 9336 }, { "epoch": 0.8379439545893069, "grad_norm": 0.471472062875866, "learning_rate": 9.052009967269368e-06, "loss": 0.4087, "step": 9337 }, { "epoch": 0.838033699041978, "grad_norm": 0.5420880108701748, "learning_rate": 9.051704028516614e-06, "loss": 0.5241, "step": 9338 }, { "epoch": 0.838123443494649, "grad_norm": 0.5460809490972696, "learning_rate": 9.051398045576794e-06, "loss": 0.473, "step": 9339 }, { "epoch": 0.83821318794732, "grad_norm": 0.4558974299092504, "learning_rate": 9.051092018453243e-06, "loss": 0.4263, "step": 9340 }, { "epoch": 0.838302932399991, "grad_norm": 0.4400422826404117, "learning_rate": 9.050785947149303e-06, "loss": 0.3821, "step": 9341 }, { "epoch": 0.838392676852662, "grad_norm": 0.5216793050078045, "learning_rate": 9.050479831668307e-06, "loss": 0.4696, "step": 9342 }, { "epoch": 0.838482421305333, "grad_norm": 0.44731397632967485, "learning_rate": 9.050173672013595e-06, "loss": 0.3701, "step": 9343 }, { "epoch": 0.8385721657580041, "grad_norm": 0.43589967494331894, "learning_rate": 9.049867468188506e-06, "loss": 0.3483, "step": 9344 }, { "epoch": 0.8386619102106752, "grad_norm": 0.4831947768464156, "learning_rate": 9.049561220196381e-06, "loss": 0.4705, "step": 9345 }, { "epoch": 0.8387516546633461, "grad_norm": 0.4931089823460285, "learning_rate": 9.049254928040557e-06, "loss": 0.4786, "step": 9346 }, { "epoch": 0.8388413991160172, "grad_norm": 0.4779270137834269, "learning_rate": 9.048948591724376e-06, "loss": 0.45, "step": 9347 }, { "epoch": 0.8389311435686881, "grad_norm": 0.49708416574764364, "learning_rate": 9.048642211251179e-06, "loss": 0.4625, "step": 9348 }, { "epoch": 0.8390208880213592, "grad_norm": 0.5022736197189146, "learning_rate": 9.04833578662431e-06, "loss": 0.4456, "step": 9349 }, { "epoch": 0.8391106324740302, "grad_norm": 0.45695079189825516, "learning_rate": 9.048029317847104e-06, "loss": 0.4462, "step": 9350 }, { "epoch": 0.8392003769267012, "grad_norm": 0.5315769903818647, "learning_rate": 9.047722804922908e-06, "loss": 0.4792, "step": 9351 }, { "epoch": 0.8392901213793722, "grad_norm": 0.449357555870316, "learning_rate": 9.047416247855064e-06, "loss": 0.3975, "step": 9352 }, { "epoch": 0.8393798658320433, "grad_norm": 0.5093747744190784, "learning_rate": 9.047109646646915e-06, "loss": 0.4755, "step": 9353 }, { "epoch": 0.8394696102847142, "grad_norm": 0.46565846520331383, "learning_rate": 9.046803001301806e-06, "loss": 0.4128, "step": 9354 }, { "epoch": 0.8395593547373853, "grad_norm": 0.5383265566965042, "learning_rate": 9.046496311823078e-06, "loss": 0.5361, "step": 9355 }, { "epoch": 0.8396490991900564, "grad_norm": 0.508101086821703, "learning_rate": 9.04618957821408e-06, "loss": 0.4873, "step": 9356 }, { "epoch": 0.8397388436427273, "grad_norm": 0.4505300271374582, "learning_rate": 9.045882800478155e-06, "loss": 0.398, "step": 9357 }, { "epoch": 0.8398285880953984, "grad_norm": 0.4763603421236477, "learning_rate": 9.045575978618645e-06, "loss": 0.4729, "step": 9358 }, { "epoch": 0.8399183325480694, "grad_norm": 0.4321976340136135, "learning_rate": 9.045269112638904e-06, "loss": 0.4313, "step": 9359 }, { "epoch": 0.8400080770007404, "grad_norm": 0.4602154022427283, "learning_rate": 9.044962202542273e-06, "loss": 0.4434, "step": 9360 }, { "epoch": 0.8400978214534114, "grad_norm": 0.49652134342533644, "learning_rate": 9.0446552483321e-06, "loss": 0.4543, "step": 9361 }, { "epoch": 0.8401875659060825, "grad_norm": 0.5063242544817156, "learning_rate": 9.044348250011733e-06, "loss": 0.4697, "step": 9362 }, { "epoch": 0.8402773103587534, "grad_norm": 0.5451887661844333, "learning_rate": 9.044041207584522e-06, "loss": 0.4316, "step": 9363 }, { "epoch": 0.8403670548114245, "grad_norm": 0.480421431051485, "learning_rate": 9.043734121053811e-06, "loss": 0.4677, "step": 9364 }, { "epoch": 0.8404567992640954, "grad_norm": 0.4392253581039021, "learning_rate": 9.043426990422952e-06, "loss": 0.3745, "step": 9365 }, { "epoch": 0.8405465437167665, "grad_norm": 0.44460803806432847, "learning_rate": 9.043119815695297e-06, "loss": 0.375, "step": 9366 }, { "epoch": 0.8406362881694375, "grad_norm": 0.4753455346054526, "learning_rate": 9.04281259687419e-06, "loss": 0.4265, "step": 9367 }, { "epoch": 0.8407260326221085, "grad_norm": 0.49849480395531565, "learning_rate": 9.042505333962986e-06, "loss": 0.4862, "step": 9368 }, { "epoch": 0.8408157770747796, "grad_norm": 0.500136486985392, "learning_rate": 9.042198026965034e-06, "loss": 0.4412, "step": 9369 }, { "epoch": 0.8409055215274506, "grad_norm": 0.4780568025296495, "learning_rate": 9.041890675883684e-06, "loss": 0.4355, "step": 9370 }, { "epoch": 0.8409952659801216, "grad_norm": 0.47759784662927635, "learning_rate": 9.041583280722292e-06, "loss": 0.452, "step": 9371 }, { "epoch": 0.8410850104327926, "grad_norm": 0.46468589113933767, "learning_rate": 9.04127584148421e-06, "loss": 0.4295, "step": 9372 }, { "epoch": 0.8411747548854637, "grad_norm": 0.50270402827306, "learning_rate": 9.040968358172787e-06, "loss": 0.4617, "step": 9373 }, { "epoch": 0.8412644993381346, "grad_norm": 0.5265705874088453, "learning_rate": 9.040660830791379e-06, "loss": 0.4889, "step": 9374 }, { "epoch": 0.8413542437908057, "grad_norm": 0.4525690044724982, "learning_rate": 9.04035325934334e-06, "loss": 0.3997, "step": 9375 }, { "epoch": 0.8414439882434767, "grad_norm": 0.4858859827005918, "learning_rate": 9.040045643832023e-06, "loss": 0.4432, "step": 9376 }, { "epoch": 0.8415337326961477, "grad_norm": 0.5279378360595777, "learning_rate": 9.039737984260784e-06, "loss": 0.5052, "step": 9377 }, { "epoch": 0.8416234771488187, "grad_norm": 0.46027340509913023, "learning_rate": 9.039430280632977e-06, "loss": 0.4518, "step": 9378 }, { "epoch": 0.8417132216014898, "grad_norm": 0.4550963391117138, "learning_rate": 9.039122532951958e-06, "loss": 0.4144, "step": 9379 }, { "epoch": 0.8418029660541608, "grad_norm": 0.49748361068290614, "learning_rate": 9.038814741221086e-06, "loss": 0.4869, "step": 9380 }, { "epoch": 0.8418927105068318, "grad_norm": 0.4718016868206914, "learning_rate": 9.038506905443714e-06, "loss": 0.4473, "step": 9381 }, { "epoch": 0.8419824549595029, "grad_norm": 0.4714321222536695, "learning_rate": 9.0381990256232e-06, "loss": 0.4149, "step": 9382 }, { "epoch": 0.8420721994121738, "grad_norm": 0.4576087284607844, "learning_rate": 9.037891101762903e-06, "loss": 0.4212, "step": 9383 }, { "epoch": 0.8421619438648449, "grad_norm": 0.46471652512435174, "learning_rate": 9.03758313386618e-06, "loss": 0.4219, "step": 9384 }, { "epoch": 0.8422516883175158, "grad_norm": 0.44738506048817367, "learning_rate": 9.03727512193639e-06, "loss": 0.3842, "step": 9385 }, { "epoch": 0.8423414327701869, "grad_norm": 0.5155589280375186, "learning_rate": 9.036967065976894e-06, "loss": 0.5089, "step": 9386 }, { "epoch": 0.8424311772228579, "grad_norm": 0.4910104530273104, "learning_rate": 9.036658965991047e-06, "loss": 0.4641, "step": 9387 }, { "epoch": 0.8425209216755289, "grad_norm": 0.4649553848977998, "learning_rate": 9.036350821982214e-06, "loss": 0.4015, "step": 9388 }, { "epoch": 0.8426106661281999, "grad_norm": 0.4697459773501805, "learning_rate": 9.036042633953752e-06, "loss": 0.4334, "step": 9389 }, { "epoch": 0.842700410580871, "grad_norm": 0.45655727617401026, "learning_rate": 9.035734401909024e-06, "loss": 0.4011, "step": 9390 }, { "epoch": 0.842790155033542, "grad_norm": 0.4084410277403768, "learning_rate": 9.035426125851392e-06, "loss": 0.3279, "step": 9391 }, { "epoch": 0.842879899486213, "grad_norm": 0.5311111561841239, "learning_rate": 9.035117805784216e-06, "loss": 0.5092, "step": 9392 }, { "epoch": 0.8429696439388841, "grad_norm": 0.4822947320184907, "learning_rate": 9.034809441710858e-06, "loss": 0.4394, "step": 9393 }, { "epoch": 0.843059388391555, "grad_norm": 0.5056216742158129, "learning_rate": 9.034501033634686e-06, "loss": 0.5128, "step": 9394 }, { "epoch": 0.8431491328442261, "grad_norm": 0.5336208293889217, "learning_rate": 9.034192581559058e-06, "loss": 0.529, "step": 9395 }, { "epoch": 0.8432388772968971, "grad_norm": 0.4790336763702929, "learning_rate": 9.033884085487338e-06, "loss": 0.4734, "step": 9396 }, { "epoch": 0.8433286217495681, "grad_norm": 0.5019045526461575, "learning_rate": 9.033575545422896e-06, "loss": 0.4828, "step": 9397 }, { "epoch": 0.8434183662022391, "grad_norm": 0.552842310262771, "learning_rate": 9.03326696136909e-06, "loss": 0.5406, "step": 9398 }, { "epoch": 0.8435081106549102, "grad_norm": 0.4898518742403828, "learning_rate": 9.03295833332929e-06, "loss": 0.4666, "step": 9399 }, { "epoch": 0.8435978551075811, "grad_norm": 0.49874102541111065, "learning_rate": 9.03264966130686e-06, "loss": 0.4736, "step": 9400 }, { "epoch": 0.8436875995602522, "grad_norm": 0.4670211568846466, "learning_rate": 9.032340945305165e-06, "loss": 0.3963, "step": 9401 }, { "epoch": 0.8437773440129231, "grad_norm": 0.47848190153630504, "learning_rate": 9.032032185327575e-06, "loss": 0.4108, "step": 9402 }, { "epoch": 0.8438670884655942, "grad_norm": 0.49008725963168065, "learning_rate": 9.031723381377455e-06, "loss": 0.4807, "step": 9403 }, { "epoch": 0.8439568329182653, "grad_norm": 0.4756984502475225, "learning_rate": 9.031414533458173e-06, "loss": 0.4464, "step": 9404 }, { "epoch": 0.8440465773709362, "grad_norm": 0.45090792732566787, "learning_rate": 9.0311056415731e-06, "loss": 0.4037, "step": 9405 }, { "epoch": 0.8441363218236073, "grad_norm": 0.5102057979605302, "learning_rate": 9.030796705725599e-06, "loss": 0.4593, "step": 9406 }, { "epoch": 0.8442260662762783, "grad_norm": 0.47626295071848646, "learning_rate": 9.030487725919044e-06, "loss": 0.4343, "step": 9407 }, { "epoch": 0.8443158107289493, "grad_norm": 0.4667428216238002, "learning_rate": 9.030178702156803e-06, "loss": 0.4133, "step": 9408 }, { "epoch": 0.8444055551816203, "grad_norm": 0.4793564465419133, "learning_rate": 9.029869634442248e-06, "loss": 0.4135, "step": 9409 }, { "epoch": 0.8444952996342914, "grad_norm": 0.5152466326917484, "learning_rate": 9.029560522778745e-06, "loss": 0.4609, "step": 9410 }, { "epoch": 0.8445850440869623, "grad_norm": 0.5164089535778007, "learning_rate": 9.02925136716967e-06, "loss": 0.4923, "step": 9411 }, { "epoch": 0.8446747885396334, "grad_norm": 0.4618398002582386, "learning_rate": 9.028942167618392e-06, "loss": 0.4486, "step": 9412 }, { "epoch": 0.8447645329923044, "grad_norm": 0.44893621325231775, "learning_rate": 9.028632924128285e-06, "loss": 0.4139, "step": 9413 }, { "epoch": 0.8448542774449754, "grad_norm": 0.4466875839374628, "learning_rate": 9.028323636702719e-06, "loss": 0.4052, "step": 9414 }, { "epoch": 0.8449440218976465, "grad_norm": 0.477961475645351, "learning_rate": 9.028014305345068e-06, "loss": 0.4664, "step": 9415 }, { "epoch": 0.8450337663503175, "grad_norm": 0.5319100325173165, "learning_rate": 9.027704930058704e-06, "loss": 0.4964, "step": 9416 }, { "epoch": 0.8451235108029885, "grad_norm": 0.4642179933232769, "learning_rate": 9.027395510847006e-06, "loss": 0.4137, "step": 9417 }, { "epoch": 0.8452132552556595, "grad_norm": 0.5140871375346092, "learning_rate": 9.027086047713344e-06, "loss": 0.4844, "step": 9418 }, { "epoch": 0.8453029997083306, "grad_norm": 0.49301584393901504, "learning_rate": 9.026776540661095e-06, "loss": 0.4379, "step": 9419 }, { "epoch": 0.8453927441610015, "grad_norm": 0.49051654862313887, "learning_rate": 9.026466989693634e-06, "loss": 0.4354, "step": 9420 }, { "epoch": 0.8454824886136726, "grad_norm": 0.5096410568018209, "learning_rate": 9.026157394814334e-06, "loss": 0.4896, "step": 9421 }, { "epoch": 0.8455722330663435, "grad_norm": 0.5202162691494389, "learning_rate": 9.025847756026577e-06, "loss": 0.4928, "step": 9422 }, { "epoch": 0.8456619775190146, "grad_norm": 0.48596120735400933, "learning_rate": 9.025538073333735e-06, "loss": 0.4229, "step": 9423 }, { "epoch": 0.8457517219716856, "grad_norm": 0.44291310900419395, "learning_rate": 9.025228346739188e-06, "loss": 0.4128, "step": 9424 }, { "epoch": 0.8458414664243566, "grad_norm": 0.42005706192934306, "learning_rate": 9.02491857624631e-06, "loss": 0.3557, "step": 9425 }, { "epoch": 0.8459312108770277, "grad_norm": 0.4799652129350508, "learning_rate": 9.024608761858487e-06, "loss": 0.464, "step": 9426 }, { "epoch": 0.8460209553296987, "grad_norm": 0.4475812773635583, "learning_rate": 9.02429890357909e-06, "loss": 0.3853, "step": 9427 }, { "epoch": 0.8461106997823697, "grad_norm": 0.5600917788802603, "learning_rate": 9.023989001411501e-06, "loss": 0.4985, "step": 9428 }, { "epoch": 0.8462004442350407, "grad_norm": 0.46914628672558795, "learning_rate": 9.023679055359101e-06, "loss": 0.4177, "step": 9429 }, { "epoch": 0.8462901886877118, "grad_norm": 0.4579012319437538, "learning_rate": 9.02336906542527e-06, "loss": 0.4223, "step": 9430 }, { "epoch": 0.8463799331403827, "grad_norm": 0.4605793091707156, "learning_rate": 9.023059031613387e-06, "loss": 0.405, "step": 9431 }, { "epoch": 0.8464696775930538, "grad_norm": 0.4729340175522696, "learning_rate": 9.022748953926832e-06, "loss": 0.432, "step": 9432 }, { "epoch": 0.8465594220457248, "grad_norm": 0.4645670941984383, "learning_rate": 9.022438832368992e-06, "loss": 0.4374, "step": 9433 }, { "epoch": 0.8466491664983958, "grad_norm": 0.48708167863122787, "learning_rate": 9.022128666943242e-06, "loss": 0.4272, "step": 9434 }, { "epoch": 0.8467389109510668, "grad_norm": 0.46871617226709844, "learning_rate": 9.021818457652972e-06, "loss": 0.4202, "step": 9435 }, { "epoch": 0.8468286554037379, "grad_norm": 0.4931377025526425, "learning_rate": 9.021508204501558e-06, "loss": 0.4933, "step": 9436 }, { "epoch": 0.8469183998564088, "grad_norm": 0.5335206915529277, "learning_rate": 9.02119790749239e-06, "loss": 0.4729, "step": 9437 }, { "epoch": 0.8470081443090799, "grad_norm": 0.445379235329122, "learning_rate": 9.02088756662885e-06, "loss": 0.4036, "step": 9438 }, { "epoch": 0.847097888761751, "grad_norm": 0.5035642785602569, "learning_rate": 9.020577181914318e-06, "loss": 0.4978, "step": 9439 }, { "epoch": 0.8471876332144219, "grad_norm": 0.4885460422412468, "learning_rate": 9.020266753352185e-06, "loss": 0.4789, "step": 9440 }, { "epoch": 0.847277377667093, "grad_norm": 0.4416503441567058, "learning_rate": 9.019956280945832e-06, "loss": 0.3874, "step": 9441 }, { "epoch": 0.847367122119764, "grad_norm": 0.5183177496781264, "learning_rate": 9.019645764698648e-06, "loss": 0.4858, "step": 9442 }, { "epoch": 0.847456866572435, "grad_norm": 0.49083934023048803, "learning_rate": 9.01933520461402e-06, "loss": 0.4393, "step": 9443 }, { "epoch": 0.847546611025106, "grad_norm": 0.44270491666138206, "learning_rate": 9.019024600695333e-06, "loss": 0.3921, "step": 9444 }, { "epoch": 0.847636355477777, "grad_norm": 0.4546945884599111, "learning_rate": 9.018713952945972e-06, "loss": 0.4075, "step": 9445 }, { "epoch": 0.847726099930448, "grad_norm": 0.5015638245309272, "learning_rate": 9.01840326136933e-06, "loss": 0.5153, "step": 9446 }, { "epoch": 0.8478158443831191, "grad_norm": 0.5475045589762544, "learning_rate": 9.018092525968794e-06, "loss": 0.5138, "step": 9447 }, { "epoch": 0.84790558883579, "grad_norm": 0.5518678642100235, "learning_rate": 9.01778174674775e-06, "loss": 0.4861, "step": 9448 }, { "epoch": 0.8479953332884611, "grad_norm": 0.48382655779277256, "learning_rate": 9.017470923709588e-06, "loss": 0.4532, "step": 9449 }, { "epoch": 0.8480850777411322, "grad_norm": 0.4687616101463149, "learning_rate": 9.0171600568577e-06, "loss": 0.3931, "step": 9450 }, { "epoch": 0.8481748221938031, "grad_norm": 0.5732757641565085, "learning_rate": 9.016849146195475e-06, "loss": 0.5677, "step": 9451 }, { "epoch": 0.8482645666464742, "grad_norm": 0.48094783118858353, "learning_rate": 9.016538191726302e-06, "loss": 0.4617, "step": 9452 }, { "epoch": 0.8483543110991452, "grad_norm": 0.46024526321256454, "learning_rate": 9.016227193453578e-06, "loss": 0.4195, "step": 9453 }, { "epoch": 0.8484440555518162, "grad_norm": 0.45750666388071703, "learning_rate": 9.015916151380688e-06, "loss": 0.4108, "step": 9454 }, { "epoch": 0.8485338000044872, "grad_norm": 0.4945211129790568, "learning_rate": 9.015605065511027e-06, "loss": 0.4267, "step": 9455 }, { "epoch": 0.8486235444571583, "grad_norm": 0.4971080412448089, "learning_rate": 9.015293935847988e-06, "loss": 0.492, "step": 9456 }, { "epoch": 0.8487132889098292, "grad_norm": 0.5210291826414792, "learning_rate": 9.014982762394962e-06, "loss": 0.5084, "step": 9457 }, { "epoch": 0.8488030333625003, "grad_norm": 0.4579945508842019, "learning_rate": 9.014671545155346e-06, "loss": 0.3971, "step": 9458 }, { "epoch": 0.8488927778151713, "grad_norm": 0.5023501504479284, "learning_rate": 9.014360284132532e-06, "loss": 0.4612, "step": 9459 }, { "epoch": 0.8489825222678423, "grad_norm": 0.5049276163958717, "learning_rate": 9.014048979329915e-06, "loss": 0.4451, "step": 9460 }, { "epoch": 0.8490722667205134, "grad_norm": 0.5257759951915449, "learning_rate": 9.013737630750888e-06, "loss": 0.5186, "step": 9461 }, { "epoch": 0.8491620111731844, "grad_norm": 0.5241184547655595, "learning_rate": 9.01342623839885e-06, "loss": 0.4983, "step": 9462 }, { "epoch": 0.8492517556258554, "grad_norm": 0.48638197971632297, "learning_rate": 9.013114802277195e-06, "loss": 0.469, "step": 9463 }, { "epoch": 0.8493415000785264, "grad_norm": 0.4690679214039844, "learning_rate": 9.012803322389321e-06, "loss": 0.4337, "step": 9464 }, { "epoch": 0.8494312445311974, "grad_norm": 0.456896639229233, "learning_rate": 9.012491798738621e-06, "loss": 0.4448, "step": 9465 }, { "epoch": 0.8495209889838684, "grad_norm": 0.45019244751964255, "learning_rate": 9.012180231328496e-06, "loss": 0.3975, "step": 9466 }, { "epoch": 0.8496107334365395, "grad_norm": 0.5264303253184817, "learning_rate": 9.011868620162345e-06, "loss": 0.5101, "step": 9467 }, { "epoch": 0.8497004778892104, "grad_norm": 0.4987184780543567, "learning_rate": 9.011556965243563e-06, "loss": 0.4529, "step": 9468 }, { "epoch": 0.8497902223418815, "grad_norm": 0.4633286129622511, "learning_rate": 9.01124526657555e-06, "loss": 0.4052, "step": 9469 }, { "epoch": 0.8498799667945525, "grad_norm": 0.4887208730316736, "learning_rate": 9.010933524161705e-06, "loss": 0.5184, "step": 9470 }, { "epoch": 0.8499697112472235, "grad_norm": 0.5343029232783644, "learning_rate": 9.01062173800543e-06, "loss": 0.4956, "step": 9471 }, { "epoch": 0.8500594556998945, "grad_norm": 0.5741429740890689, "learning_rate": 9.010309908110122e-06, "loss": 0.569, "step": 9472 }, { "epoch": 0.8501492001525656, "grad_norm": 0.5052693232132646, "learning_rate": 9.009998034479186e-06, "loss": 0.4674, "step": 9473 }, { "epoch": 0.8502389446052366, "grad_norm": 0.44549641164420567, "learning_rate": 9.009686117116018e-06, "loss": 0.3696, "step": 9474 }, { "epoch": 0.8503286890579076, "grad_norm": 0.46948992261530065, "learning_rate": 9.009374156024022e-06, "loss": 0.4144, "step": 9475 }, { "epoch": 0.8504184335105787, "grad_norm": 0.5180545262060874, "learning_rate": 9.009062151206602e-06, "loss": 0.4629, "step": 9476 }, { "epoch": 0.8505081779632496, "grad_norm": 0.46692054970217817, "learning_rate": 9.00875010266716e-06, "loss": 0.4231, "step": 9477 }, { "epoch": 0.8505979224159207, "grad_norm": 0.5260118707555169, "learning_rate": 9.008438010409097e-06, "loss": 0.5224, "step": 9478 }, { "epoch": 0.8506876668685917, "grad_norm": 0.47462347173853403, "learning_rate": 9.008125874435818e-06, "loss": 0.4045, "step": 9479 }, { "epoch": 0.8507774113212627, "grad_norm": 0.4805502411900927, "learning_rate": 9.007813694750727e-06, "loss": 0.4248, "step": 9480 }, { "epoch": 0.8508671557739337, "grad_norm": 0.5115901038430394, "learning_rate": 9.007501471357229e-06, "loss": 0.4999, "step": 9481 }, { "epoch": 0.8509569002266048, "grad_norm": 0.4767759731982002, "learning_rate": 9.007189204258728e-06, "loss": 0.4041, "step": 9482 }, { "epoch": 0.8510466446792757, "grad_norm": 0.5593709273992409, "learning_rate": 9.006876893458631e-06, "loss": 0.5305, "step": 9483 }, { "epoch": 0.8511363891319468, "grad_norm": 0.4619483012308243, "learning_rate": 9.006564538960343e-06, "loss": 0.4305, "step": 9484 }, { "epoch": 0.8512261335846179, "grad_norm": 0.5086519110592248, "learning_rate": 9.006252140767268e-06, "loss": 0.4722, "step": 9485 }, { "epoch": 0.8513158780372888, "grad_norm": 0.48097167040265354, "learning_rate": 9.00593969888282e-06, "loss": 0.4146, "step": 9486 }, { "epoch": 0.8514056224899599, "grad_norm": 0.5046614475074732, "learning_rate": 9.005627213310399e-06, "loss": 0.4576, "step": 9487 }, { "epoch": 0.8514953669426308, "grad_norm": 0.44899565416344905, "learning_rate": 9.005314684053417e-06, "loss": 0.407, "step": 9488 }, { "epoch": 0.8515851113953019, "grad_norm": 0.496189759017448, "learning_rate": 9.005002111115281e-06, "loss": 0.4513, "step": 9489 }, { "epoch": 0.8516748558479729, "grad_norm": 0.4623273416045285, "learning_rate": 9.004689494499402e-06, "loss": 0.4116, "step": 9490 }, { "epoch": 0.8517646003006439, "grad_norm": 0.48522369786450525, "learning_rate": 9.004376834209184e-06, "loss": 0.4116, "step": 9491 }, { "epoch": 0.8518543447533149, "grad_norm": 0.4512575485585242, "learning_rate": 9.004064130248042e-06, "loss": 0.3842, "step": 9492 }, { "epoch": 0.851944089205986, "grad_norm": 0.5791726078440828, "learning_rate": 9.003751382619386e-06, "loss": 0.5106, "step": 9493 }, { "epoch": 0.8520338336586569, "grad_norm": 0.4770409636279335, "learning_rate": 9.003438591326623e-06, "loss": 0.4581, "step": 9494 }, { "epoch": 0.852123578111328, "grad_norm": 0.49751424413585765, "learning_rate": 9.003125756373168e-06, "loss": 0.4515, "step": 9495 }, { "epoch": 0.8522133225639991, "grad_norm": 0.4759186456930198, "learning_rate": 9.002812877762431e-06, "loss": 0.4691, "step": 9496 }, { "epoch": 0.85230306701667, "grad_norm": 0.5124936647451963, "learning_rate": 9.002499955497823e-06, "loss": 0.4608, "step": 9497 }, { "epoch": 0.8523928114693411, "grad_norm": 0.44483083485977054, "learning_rate": 9.002186989582761e-06, "loss": 0.3858, "step": 9498 }, { "epoch": 0.852482555922012, "grad_norm": 0.4689598044614461, "learning_rate": 9.001873980020653e-06, "loss": 0.4454, "step": 9499 }, { "epoch": 0.8525723003746831, "grad_norm": 0.46136112208985375, "learning_rate": 9.001560926814915e-06, "loss": 0.3986, "step": 9500 }, { "epoch": 0.8526620448273541, "grad_norm": 0.5307843960165101, "learning_rate": 9.001247829968963e-06, "loss": 0.5233, "step": 9501 }, { "epoch": 0.8527517892800252, "grad_norm": 0.46601575149739205, "learning_rate": 9.000934689486208e-06, "loss": 0.4518, "step": 9502 }, { "epoch": 0.8528415337326961, "grad_norm": 0.4850617178451761, "learning_rate": 9.000621505370067e-06, "loss": 0.4449, "step": 9503 }, { "epoch": 0.8529312781853672, "grad_norm": 0.4999922584011333, "learning_rate": 9.000308277623955e-06, "loss": 0.5037, "step": 9504 }, { "epoch": 0.8530210226380381, "grad_norm": 0.4831929846242139, "learning_rate": 8.99999500625129e-06, "loss": 0.4303, "step": 9505 }, { "epoch": 0.8531107670907092, "grad_norm": 0.4810430922335403, "learning_rate": 8.999681691255485e-06, "loss": 0.4395, "step": 9506 }, { "epoch": 0.8532005115433803, "grad_norm": 0.4968903489154054, "learning_rate": 8.999368332639957e-06, "loss": 0.4585, "step": 9507 }, { "epoch": 0.8532902559960512, "grad_norm": 0.4819230160156943, "learning_rate": 8.999054930408127e-06, "loss": 0.4651, "step": 9508 }, { "epoch": 0.8533800004487223, "grad_norm": 0.46166595832918883, "learning_rate": 8.99874148456341e-06, "loss": 0.4413, "step": 9509 }, { "epoch": 0.8534697449013933, "grad_norm": 0.45799970044285343, "learning_rate": 8.998427995109227e-06, "loss": 0.4314, "step": 9510 }, { "epoch": 0.8535594893540643, "grad_norm": 0.47830409475290064, "learning_rate": 8.998114462048993e-06, "loss": 0.4146, "step": 9511 }, { "epoch": 0.8536492338067353, "grad_norm": 0.44386316968920836, "learning_rate": 8.99780088538613e-06, "loss": 0.4091, "step": 9512 }, { "epoch": 0.8537389782594064, "grad_norm": 0.4782189799626982, "learning_rate": 8.997487265124058e-06, "loss": 0.4371, "step": 9513 }, { "epoch": 0.8538287227120773, "grad_norm": 0.4658181640165195, "learning_rate": 8.997173601266196e-06, "loss": 0.4314, "step": 9514 }, { "epoch": 0.8539184671647484, "grad_norm": 0.49832554378814486, "learning_rate": 8.996859893815966e-06, "loss": 0.5064, "step": 9515 }, { "epoch": 0.8540082116174194, "grad_norm": 0.514855409819113, "learning_rate": 8.996546142776787e-06, "loss": 0.4794, "step": 9516 }, { "epoch": 0.8540979560700904, "grad_norm": 0.475072753022722, "learning_rate": 8.996232348152083e-06, "loss": 0.4389, "step": 9517 }, { "epoch": 0.8541877005227614, "grad_norm": 0.5077160879215518, "learning_rate": 8.995918509945277e-06, "loss": 0.4394, "step": 9518 }, { "epoch": 0.8542774449754325, "grad_norm": 0.47456860154190333, "learning_rate": 8.995604628159788e-06, "loss": 0.4324, "step": 9519 }, { "epoch": 0.8543671894281035, "grad_norm": 0.47022413557602893, "learning_rate": 8.995290702799041e-06, "loss": 0.4818, "step": 9520 }, { "epoch": 0.8544569338807745, "grad_norm": 0.48128241935954374, "learning_rate": 8.994976733866461e-06, "loss": 0.4536, "step": 9521 }, { "epoch": 0.8545466783334456, "grad_norm": 0.5351282799585454, "learning_rate": 8.99466272136547e-06, "loss": 0.4673, "step": 9522 }, { "epoch": 0.8546364227861165, "grad_norm": 0.5068122001949348, "learning_rate": 8.994348665299495e-06, "loss": 0.4577, "step": 9523 }, { "epoch": 0.8547261672387876, "grad_norm": 0.5166629885530509, "learning_rate": 8.994034565671959e-06, "loss": 0.5086, "step": 9524 }, { "epoch": 0.8548159116914585, "grad_norm": 0.423465256811774, "learning_rate": 8.993720422486288e-06, "loss": 0.3409, "step": 9525 }, { "epoch": 0.8549056561441296, "grad_norm": 0.4972196661709673, "learning_rate": 8.993406235745907e-06, "loss": 0.416, "step": 9526 }, { "epoch": 0.8549954005968006, "grad_norm": 0.4865074183156836, "learning_rate": 8.993092005454243e-06, "loss": 0.4173, "step": 9527 }, { "epoch": 0.8550851450494716, "grad_norm": 0.48559483258028185, "learning_rate": 8.992777731614725e-06, "loss": 0.4413, "step": 9528 }, { "epoch": 0.8551748895021426, "grad_norm": 0.4783194822713229, "learning_rate": 8.99246341423078e-06, "loss": 0.4262, "step": 9529 }, { "epoch": 0.8552646339548137, "grad_norm": 0.5135574739619883, "learning_rate": 8.99214905330583e-06, "loss": 0.4679, "step": 9530 }, { "epoch": 0.8553543784074847, "grad_norm": 0.5057303063378173, "learning_rate": 8.991834648843311e-06, "loss": 0.4677, "step": 9531 }, { "epoch": 0.8554441228601557, "grad_norm": 0.498572326432417, "learning_rate": 8.991520200846648e-06, "loss": 0.4463, "step": 9532 }, { "epoch": 0.8555338673128268, "grad_norm": 0.5388597284966444, "learning_rate": 8.991205709319273e-06, "loss": 0.4977, "step": 9533 }, { "epoch": 0.8556236117654977, "grad_norm": 0.5455635959288068, "learning_rate": 8.990891174264612e-06, "loss": 0.5421, "step": 9534 }, { "epoch": 0.8557133562181688, "grad_norm": 0.48750763455065144, "learning_rate": 8.990576595686097e-06, "loss": 0.4687, "step": 9535 }, { "epoch": 0.8558031006708398, "grad_norm": 0.4539815707289695, "learning_rate": 8.990261973587162e-06, "loss": 0.4103, "step": 9536 }, { "epoch": 0.8558928451235108, "grad_norm": 0.5259056837410226, "learning_rate": 8.989947307971232e-06, "loss": 0.5216, "step": 9537 }, { "epoch": 0.8559825895761818, "grad_norm": 0.4919952072912637, "learning_rate": 8.989632598841742e-06, "loss": 0.4375, "step": 9538 }, { "epoch": 0.8560723340288529, "grad_norm": 0.4715402928741043, "learning_rate": 8.989317846202125e-06, "loss": 0.3969, "step": 9539 }, { "epoch": 0.8561620784815238, "grad_norm": 0.47879210550062645, "learning_rate": 8.989003050055813e-06, "loss": 0.4389, "step": 9540 }, { "epoch": 0.8562518229341949, "grad_norm": 0.4803740563589137, "learning_rate": 8.988688210406238e-06, "loss": 0.4398, "step": 9541 }, { "epoch": 0.856341567386866, "grad_norm": 0.5066660803696732, "learning_rate": 8.988373327256834e-06, "loss": 0.4884, "step": 9542 }, { "epoch": 0.8564313118395369, "grad_norm": 0.45966164589556274, "learning_rate": 8.988058400611035e-06, "loss": 0.4099, "step": 9543 }, { "epoch": 0.856521056292208, "grad_norm": 0.49018505256917533, "learning_rate": 8.987743430472276e-06, "loss": 0.4447, "step": 9544 }, { "epoch": 0.8566108007448789, "grad_norm": 0.5014854396410683, "learning_rate": 8.987428416843993e-06, "loss": 0.4476, "step": 9545 }, { "epoch": 0.85670054519755, "grad_norm": 0.4823935253073278, "learning_rate": 8.987113359729619e-06, "loss": 0.4246, "step": 9546 }, { "epoch": 0.856790289650221, "grad_norm": 0.4689248980820825, "learning_rate": 8.986798259132593e-06, "loss": 0.4374, "step": 9547 }, { "epoch": 0.856880034102892, "grad_norm": 0.4857441204624233, "learning_rate": 8.986483115056347e-06, "loss": 0.4505, "step": 9548 }, { "epoch": 0.856969778555563, "grad_norm": 0.5486961314868617, "learning_rate": 8.986167927504322e-06, "loss": 0.5124, "step": 9549 }, { "epoch": 0.8570595230082341, "grad_norm": 0.4509934667788793, "learning_rate": 8.985852696479954e-06, "loss": 0.4126, "step": 9550 }, { "epoch": 0.857149267460905, "grad_norm": 0.44797841523794435, "learning_rate": 8.985537421986681e-06, "loss": 0.3953, "step": 9551 }, { "epoch": 0.8572390119135761, "grad_norm": 0.4856017842774866, "learning_rate": 8.985222104027942e-06, "loss": 0.3982, "step": 9552 }, { "epoch": 0.8573287563662471, "grad_norm": 0.5269039476505513, "learning_rate": 8.984906742607174e-06, "loss": 0.5386, "step": 9553 }, { "epoch": 0.8574185008189181, "grad_norm": 0.5308858085637223, "learning_rate": 8.984591337727815e-06, "loss": 0.4894, "step": 9554 }, { "epoch": 0.8575082452715892, "grad_norm": 0.5147021693223506, "learning_rate": 8.98427588939331e-06, "loss": 0.5147, "step": 9555 }, { "epoch": 0.8575979897242602, "grad_norm": 0.469044222201259, "learning_rate": 8.983960397607094e-06, "loss": 0.4371, "step": 9556 }, { "epoch": 0.8576877341769312, "grad_norm": 0.49026718868286684, "learning_rate": 8.98364486237261e-06, "loss": 0.4632, "step": 9557 }, { "epoch": 0.8577774786296022, "grad_norm": 0.4253078175401977, "learning_rate": 8.983329283693303e-06, "loss": 0.3861, "step": 9558 }, { "epoch": 0.8578672230822733, "grad_norm": 0.5180452212522569, "learning_rate": 8.983013661572606e-06, "loss": 0.4748, "step": 9559 }, { "epoch": 0.8579569675349442, "grad_norm": 0.4771381148499445, "learning_rate": 8.982697996013969e-06, "loss": 0.4306, "step": 9560 }, { "epoch": 0.8580467119876153, "grad_norm": 0.5696404912905123, "learning_rate": 8.982382287020827e-06, "loss": 0.4804, "step": 9561 }, { "epoch": 0.8581364564402862, "grad_norm": 0.5366159090052727, "learning_rate": 8.982066534596632e-06, "loss": 0.5055, "step": 9562 }, { "epoch": 0.8582262008929573, "grad_norm": 0.4814594052740135, "learning_rate": 8.98175073874482e-06, "loss": 0.453, "step": 9563 }, { "epoch": 0.8583159453456283, "grad_norm": 0.45547458348644804, "learning_rate": 8.98143489946884e-06, "loss": 0.4014, "step": 9564 }, { "epoch": 0.8584056897982993, "grad_norm": 0.4981432766082409, "learning_rate": 8.981119016772133e-06, "loss": 0.4852, "step": 9565 }, { "epoch": 0.8584954342509704, "grad_norm": 0.47583248652361926, "learning_rate": 8.980803090658145e-06, "loss": 0.4545, "step": 9566 }, { "epoch": 0.8585851787036414, "grad_norm": 0.458606117604717, "learning_rate": 8.980487121130324e-06, "loss": 0.3848, "step": 9567 }, { "epoch": 0.8586749231563124, "grad_norm": 0.5115597566629636, "learning_rate": 8.980171108192111e-06, "loss": 0.4965, "step": 9568 }, { "epoch": 0.8587646676089834, "grad_norm": 0.48692524758145334, "learning_rate": 8.979855051846957e-06, "loss": 0.4603, "step": 9569 }, { "epoch": 0.8588544120616545, "grad_norm": 0.48497404613161066, "learning_rate": 8.979538952098306e-06, "loss": 0.4355, "step": 9570 }, { "epoch": 0.8589441565143254, "grad_norm": 0.4756675634001091, "learning_rate": 8.979222808949606e-06, "loss": 0.4379, "step": 9571 }, { "epoch": 0.8590339009669965, "grad_norm": 0.4762289551789149, "learning_rate": 8.978906622404305e-06, "loss": 0.4367, "step": 9572 }, { "epoch": 0.8591236454196675, "grad_norm": 0.5024145583452269, "learning_rate": 8.978590392465852e-06, "loss": 0.4252, "step": 9573 }, { "epoch": 0.8592133898723385, "grad_norm": 0.4459905392450371, "learning_rate": 8.978274119137694e-06, "loss": 0.3982, "step": 9574 }, { "epoch": 0.8593031343250095, "grad_norm": 0.4725209979616059, "learning_rate": 8.977957802423283e-06, "loss": 0.4262, "step": 9575 }, { "epoch": 0.8593928787776806, "grad_norm": 0.4796726302689489, "learning_rate": 8.977641442326066e-06, "loss": 0.369, "step": 9576 }, { "epoch": 0.8594826232303516, "grad_norm": 0.5377754519891876, "learning_rate": 8.977325038849494e-06, "loss": 0.5149, "step": 9577 }, { "epoch": 0.8595723676830226, "grad_norm": 0.45038921752695354, "learning_rate": 8.977008591997016e-06, "loss": 0.3576, "step": 9578 }, { "epoch": 0.8596621121356937, "grad_norm": 0.7279091549321965, "learning_rate": 8.976692101772087e-06, "loss": 0.4484, "step": 9579 }, { "epoch": 0.8597518565883646, "grad_norm": 0.49583157080751766, "learning_rate": 8.976375568178157e-06, "loss": 0.4754, "step": 9580 }, { "epoch": 0.8598416010410357, "grad_norm": 0.4270978560193925, "learning_rate": 8.976058991218675e-06, "loss": 0.3624, "step": 9581 }, { "epoch": 0.8599313454937066, "grad_norm": 0.49070226711222464, "learning_rate": 8.975742370897097e-06, "loss": 0.4498, "step": 9582 }, { "epoch": 0.8600210899463777, "grad_norm": 0.4684192391442222, "learning_rate": 8.975425707216877e-06, "loss": 0.4029, "step": 9583 }, { "epoch": 0.8601108343990487, "grad_norm": 0.5446339769227526, "learning_rate": 8.975109000181464e-06, "loss": 0.5204, "step": 9584 }, { "epoch": 0.8602005788517197, "grad_norm": 0.4952626226676174, "learning_rate": 8.974792249794315e-06, "loss": 0.478, "step": 9585 }, { "epoch": 0.8602903233043907, "grad_norm": 0.4648397118001007, "learning_rate": 8.974475456058885e-06, "loss": 0.4217, "step": 9586 }, { "epoch": 0.8603800677570618, "grad_norm": 0.46470029861729145, "learning_rate": 8.974158618978626e-06, "loss": 0.4205, "step": 9587 }, { "epoch": 0.8604698122097327, "grad_norm": 0.4810851626414982, "learning_rate": 8.973841738556996e-06, "loss": 0.4178, "step": 9588 }, { "epoch": 0.8605595566624038, "grad_norm": 0.48228771958792915, "learning_rate": 8.97352481479745e-06, "loss": 0.4464, "step": 9589 }, { "epoch": 0.8606493011150749, "grad_norm": 0.5225745957336452, "learning_rate": 8.973207847703441e-06, "loss": 0.4642, "step": 9590 }, { "epoch": 0.8607390455677458, "grad_norm": 0.5442566705764523, "learning_rate": 8.972890837278434e-06, "loss": 0.5057, "step": 9591 }, { "epoch": 0.8608287900204169, "grad_norm": 0.467733952237627, "learning_rate": 8.972573783525878e-06, "loss": 0.4152, "step": 9592 }, { "epoch": 0.8609185344730879, "grad_norm": 0.46171164723900965, "learning_rate": 8.972256686449236e-06, "loss": 0.434, "step": 9593 }, { "epoch": 0.8610082789257589, "grad_norm": 0.47226020043901606, "learning_rate": 8.971939546051963e-06, "loss": 0.4727, "step": 9594 }, { "epoch": 0.8610980233784299, "grad_norm": 0.5144843753240905, "learning_rate": 8.971622362337519e-06, "loss": 0.4955, "step": 9595 }, { "epoch": 0.861187767831101, "grad_norm": 0.5042113291325081, "learning_rate": 8.971305135309363e-06, "loss": 0.49, "step": 9596 }, { "epoch": 0.8612775122837719, "grad_norm": 0.46374883930506366, "learning_rate": 8.970987864970954e-06, "loss": 0.377, "step": 9597 }, { "epoch": 0.861367256736443, "grad_norm": 0.5217706749267778, "learning_rate": 8.970670551325753e-06, "loss": 0.5144, "step": 9598 }, { "epoch": 0.861457001189114, "grad_norm": 0.4826114561841728, "learning_rate": 8.97035319437722e-06, "loss": 0.4128, "step": 9599 }, { "epoch": 0.861546745641785, "grad_norm": 0.4612255167931025, "learning_rate": 8.970035794128817e-06, "loss": 0.4147, "step": 9600 }, { "epoch": 0.8616364900944561, "grad_norm": 0.5272501010237968, "learning_rate": 8.969718350584005e-06, "loss": 0.5199, "step": 9601 }, { "epoch": 0.861726234547127, "grad_norm": 0.4825855172281538, "learning_rate": 8.969400863746245e-06, "loss": 0.4396, "step": 9602 }, { "epoch": 0.8618159789997981, "grad_norm": 0.5261823781385009, "learning_rate": 8.969083333619e-06, "loss": 0.4641, "step": 9603 }, { "epoch": 0.8619057234524691, "grad_norm": 0.47207397545632945, "learning_rate": 8.968765760205733e-06, "loss": 0.4369, "step": 9604 }, { "epoch": 0.8619954679051401, "grad_norm": 0.47968301066487995, "learning_rate": 8.968448143509907e-06, "loss": 0.4658, "step": 9605 }, { "epoch": 0.8620852123578111, "grad_norm": 0.507950910109843, "learning_rate": 8.968130483534989e-06, "loss": 0.4372, "step": 9606 }, { "epoch": 0.8621749568104822, "grad_norm": 0.47639299906238136, "learning_rate": 8.967812780284437e-06, "loss": 0.4737, "step": 9607 }, { "epoch": 0.8622647012631531, "grad_norm": 0.5084944095662406, "learning_rate": 8.967495033761722e-06, "loss": 0.5361, "step": 9608 }, { "epoch": 0.8623544457158242, "grad_norm": 0.5335442528408075, "learning_rate": 8.967177243970307e-06, "loss": 0.4588, "step": 9609 }, { "epoch": 0.8624441901684952, "grad_norm": 0.47361548863535213, "learning_rate": 8.966859410913655e-06, "loss": 0.4754, "step": 9610 }, { "epoch": 0.8625339346211662, "grad_norm": 0.4639597186693648, "learning_rate": 8.966541534595236e-06, "loss": 0.3946, "step": 9611 }, { "epoch": 0.8626236790738373, "grad_norm": 0.4817223439967788, "learning_rate": 8.966223615018517e-06, "loss": 0.4501, "step": 9612 }, { "epoch": 0.8627134235265083, "grad_norm": 0.46576623282506713, "learning_rate": 8.965905652186962e-06, "loss": 0.4071, "step": 9613 }, { "epoch": 0.8628031679791793, "grad_norm": 0.5202593217767963, "learning_rate": 8.96558764610404e-06, "loss": 0.4756, "step": 9614 }, { "epoch": 0.8628929124318503, "grad_norm": 0.4900050889276904, "learning_rate": 8.965269596773221e-06, "loss": 0.4426, "step": 9615 }, { "epoch": 0.8629826568845214, "grad_norm": 0.5094352749955594, "learning_rate": 8.96495150419797e-06, "loss": 0.4177, "step": 9616 }, { "epoch": 0.8630724013371923, "grad_norm": 0.4770209444649582, "learning_rate": 8.96463336838176e-06, "loss": 0.4218, "step": 9617 }, { "epoch": 0.8631621457898634, "grad_norm": 0.5157160538499845, "learning_rate": 8.964315189328057e-06, "loss": 0.4688, "step": 9618 }, { "epoch": 0.8632518902425343, "grad_norm": 0.4625007193542693, "learning_rate": 8.963996967040333e-06, "loss": 0.3888, "step": 9619 }, { "epoch": 0.8633416346952054, "grad_norm": 0.5138646010699053, "learning_rate": 8.963678701522059e-06, "loss": 0.4736, "step": 9620 }, { "epoch": 0.8634313791478764, "grad_norm": 0.4920743239803262, "learning_rate": 8.963360392776703e-06, "loss": 0.4516, "step": 9621 }, { "epoch": 0.8635211236005474, "grad_norm": 0.4688409630245703, "learning_rate": 8.963042040807739e-06, "loss": 0.4035, "step": 9622 }, { "epoch": 0.8636108680532184, "grad_norm": 0.5322650752480665, "learning_rate": 8.962723645618639e-06, "loss": 0.431, "step": 9623 }, { "epoch": 0.8637006125058895, "grad_norm": 0.48830739104619375, "learning_rate": 8.962405207212875e-06, "loss": 0.4153, "step": 9624 }, { "epoch": 0.8637903569585605, "grad_norm": 0.4757708990258188, "learning_rate": 8.962086725593917e-06, "loss": 0.4147, "step": 9625 }, { "epoch": 0.8638801014112315, "grad_norm": 0.5215613364176505, "learning_rate": 8.961768200765242e-06, "loss": 0.5023, "step": 9626 }, { "epoch": 0.8639698458639026, "grad_norm": 0.529401734740834, "learning_rate": 8.961449632730323e-06, "loss": 0.4477, "step": 9627 }, { "epoch": 0.8640595903165735, "grad_norm": 0.45916104562856414, "learning_rate": 8.961131021492632e-06, "loss": 0.4098, "step": 9628 }, { "epoch": 0.8641493347692446, "grad_norm": 0.4617104069530176, "learning_rate": 8.960812367055646e-06, "loss": 0.3892, "step": 9629 }, { "epoch": 0.8642390792219156, "grad_norm": 0.5094153955244318, "learning_rate": 8.960493669422841e-06, "loss": 0.4253, "step": 9630 }, { "epoch": 0.8643288236745866, "grad_norm": 0.5067937722826309, "learning_rate": 8.960174928597692e-06, "loss": 0.4693, "step": 9631 }, { "epoch": 0.8644185681272576, "grad_norm": 0.47802539245297326, "learning_rate": 8.959856144583672e-06, "loss": 0.4074, "step": 9632 }, { "epoch": 0.8645083125799287, "grad_norm": 0.4390634223073476, "learning_rate": 8.95953731738426e-06, "loss": 0.3725, "step": 9633 }, { "epoch": 0.8645980570325996, "grad_norm": 0.5734041254232879, "learning_rate": 8.959218447002935e-06, "loss": 0.5011, "step": 9634 }, { "epoch": 0.8646878014852707, "grad_norm": 0.5498919999321866, "learning_rate": 8.95889953344317e-06, "loss": 0.5018, "step": 9635 }, { "epoch": 0.8647775459379418, "grad_norm": 0.4521598126780793, "learning_rate": 8.958580576708448e-06, "loss": 0.3944, "step": 9636 }, { "epoch": 0.8648672903906127, "grad_norm": 0.5315874034525293, "learning_rate": 8.958261576802244e-06, "loss": 0.4834, "step": 9637 }, { "epoch": 0.8649570348432838, "grad_norm": 0.5127523126031015, "learning_rate": 8.957942533728038e-06, "loss": 0.4819, "step": 9638 }, { "epoch": 0.8650467792959547, "grad_norm": 0.4857181897774664, "learning_rate": 8.95762344748931e-06, "loss": 0.4424, "step": 9639 }, { "epoch": 0.8651365237486258, "grad_norm": 0.4717671567823119, "learning_rate": 8.957304318089538e-06, "loss": 0.4395, "step": 9640 }, { "epoch": 0.8652262682012968, "grad_norm": 0.49295583553335187, "learning_rate": 8.956985145532205e-06, "loss": 0.4785, "step": 9641 }, { "epoch": 0.8653160126539678, "grad_norm": 0.5023464228930895, "learning_rate": 8.95666592982079e-06, "loss": 0.4517, "step": 9642 }, { "epoch": 0.8654057571066388, "grad_norm": 0.5001743242922138, "learning_rate": 8.956346670958774e-06, "loss": 0.5054, "step": 9643 }, { "epoch": 0.8654955015593099, "grad_norm": 0.5414105251853368, "learning_rate": 8.95602736894964e-06, "loss": 0.5007, "step": 9644 }, { "epoch": 0.8655852460119808, "grad_norm": 0.5167656233798171, "learning_rate": 8.95570802379687e-06, "loss": 0.4781, "step": 9645 }, { "epoch": 0.8656749904646519, "grad_norm": 0.4637672982022725, "learning_rate": 8.955388635503946e-06, "loss": 0.4059, "step": 9646 }, { "epoch": 0.865764734917323, "grad_norm": 0.45915541995189474, "learning_rate": 8.955069204074352e-06, "loss": 0.4346, "step": 9647 }, { "epoch": 0.8658544793699939, "grad_norm": 0.4859688433072731, "learning_rate": 8.954749729511571e-06, "loss": 0.4573, "step": 9648 }, { "epoch": 0.865944223822665, "grad_norm": 0.47314920674531796, "learning_rate": 8.95443021181909e-06, "loss": 0.4382, "step": 9649 }, { "epoch": 0.866033968275336, "grad_norm": 0.503449657382583, "learning_rate": 8.954110651000387e-06, "loss": 0.4732, "step": 9650 }, { "epoch": 0.866123712728007, "grad_norm": 0.5010409867326475, "learning_rate": 8.953791047058954e-06, "loss": 0.435, "step": 9651 }, { "epoch": 0.866213457180678, "grad_norm": 0.4835437240276817, "learning_rate": 8.953471399998272e-06, "loss": 0.4537, "step": 9652 }, { "epoch": 0.8663032016333491, "grad_norm": 0.5473301377504541, "learning_rate": 8.95315170982183e-06, "loss": 0.5136, "step": 9653 }, { "epoch": 0.86639294608602, "grad_norm": 0.4518769726456394, "learning_rate": 8.952831976533114e-06, "loss": 0.4242, "step": 9654 }, { "epoch": 0.8664826905386911, "grad_norm": 0.4919835378193951, "learning_rate": 8.952512200135609e-06, "loss": 0.4498, "step": 9655 }, { "epoch": 0.866572434991362, "grad_norm": 0.48123295839660923, "learning_rate": 8.952192380632803e-06, "loss": 0.3911, "step": 9656 }, { "epoch": 0.8666621794440331, "grad_norm": 0.504223815757331, "learning_rate": 8.951872518028184e-06, "loss": 0.5218, "step": 9657 }, { "epoch": 0.8667519238967041, "grad_norm": 0.5152601210764868, "learning_rate": 8.951552612325243e-06, "loss": 0.4907, "step": 9658 }, { "epoch": 0.8668416683493751, "grad_norm": 0.5020053995169835, "learning_rate": 8.951232663527465e-06, "loss": 0.4951, "step": 9659 }, { "epoch": 0.8669314128020462, "grad_norm": 0.4886738153725876, "learning_rate": 8.95091267163834e-06, "loss": 0.4624, "step": 9660 }, { "epoch": 0.8670211572547172, "grad_norm": 0.5008942422138635, "learning_rate": 8.950592636661361e-06, "loss": 0.4694, "step": 9661 }, { "epoch": 0.8671109017073882, "grad_norm": 0.48664961772470033, "learning_rate": 8.950272558600016e-06, "loss": 0.4347, "step": 9662 }, { "epoch": 0.8672006461600592, "grad_norm": 0.6089557596173027, "learning_rate": 8.949952437457793e-06, "loss": 0.5736, "step": 9663 }, { "epoch": 0.8672903906127303, "grad_norm": 0.477327802630706, "learning_rate": 8.949632273238188e-06, "loss": 0.4546, "step": 9664 }, { "epoch": 0.8673801350654012, "grad_norm": 0.4760001994948213, "learning_rate": 8.949312065944688e-06, "loss": 0.4385, "step": 9665 }, { "epoch": 0.8674698795180723, "grad_norm": 0.5916535261461014, "learning_rate": 8.94899181558079e-06, "loss": 0.5731, "step": 9666 }, { "epoch": 0.8675596239707433, "grad_norm": 0.48017234658765906, "learning_rate": 8.948671522149983e-06, "loss": 0.4742, "step": 9667 }, { "epoch": 0.8676493684234143, "grad_norm": 0.49725438763717705, "learning_rate": 8.948351185655761e-06, "loss": 0.4439, "step": 9668 }, { "epoch": 0.8677391128760853, "grad_norm": 0.47125180398197597, "learning_rate": 8.94803080610162e-06, "loss": 0.4191, "step": 9669 }, { "epoch": 0.8678288573287564, "grad_norm": 0.45588018609038283, "learning_rate": 8.947710383491048e-06, "loss": 0.3902, "step": 9670 }, { "epoch": 0.8679186017814274, "grad_norm": 0.5020425925889499, "learning_rate": 8.947389917827546e-06, "loss": 0.5305, "step": 9671 }, { "epoch": 0.8680083462340984, "grad_norm": 0.5367741059687038, "learning_rate": 8.947069409114604e-06, "loss": 0.5372, "step": 9672 }, { "epoch": 0.8680980906867695, "grad_norm": 0.5025117545004383, "learning_rate": 8.94674885735572e-06, "loss": 0.4371, "step": 9673 }, { "epoch": 0.8681878351394404, "grad_norm": 0.4323506730619284, "learning_rate": 8.946428262554392e-06, "loss": 0.3437, "step": 9674 }, { "epoch": 0.8682775795921115, "grad_norm": 0.46937783727787896, "learning_rate": 8.946107624714112e-06, "loss": 0.3989, "step": 9675 }, { "epoch": 0.8683673240447825, "grad_norm": 0.4947501044017092, "learning_rate": 8.945786943838378e-06, "loss": 0.4416, "step": 9676 }, { "epoch": 0.8684570684974535, "grad_norm": 0.46484629679963313, "learning_rate": 8.945466219930688e-06, "loss": 0.4307, "step": 9677 }, { "epoch": 0.8685468129501245, "grad_norm": 0.4707780623508232, "learning_rate": 8.94514545299454e-06, "loss": 0.4299, "step": 9678 }, { "epoch": 0.8686365574027956, "grad_norm": 0.45333768910119226, "learning_rate": 8.944824643033432e-06, "loss": 0.4108, "step": 9679 }, { "epoch": 0.8687263018554665, "grad_norm": 0.5030047435583109, "learning_rate": 8.944503790050861e-06, "loss": 0.4265, "step": 9680 }, { "epoch": 0.8688160463081376, "grad_norm": 0.5168671022087821, "learning_rate": 8.944182894050329e-06, "loss": 0.5034, "step": 9681 }, { "epoch": 0.8689057907608086, "grad_norm": 0.4411259904697337, "learning_rate": 8.943861955035334e-06, "loss": 0.371, "step": 9682 }, { "epoch": 0.8689955352134796, "grad_norm": 0.4454147933630453, "learning_rate": 8.943540973009375e-06, "loss": 0.4007, "step": 9683 }, { "epoch": 0.8690852796661507, "grad_norm": 0.46620932301997037, "learning_rate": 8.943219947975957e-06, "loss": 0.4098, "step": 9684 }, { "epoch": 0.8691750241188216, "grad_norm": 0.46497366746095714, "learning_rate": 8.942898879938575e-06, "loss": 0.4092, "step": 9685 }, { "epoch": 0.8692647685714927, "grad_norm": 0.48068830700750315, "learning_rate": 8.942577768900736e-06, "loss": 0.4566, "step": 9686 }, { "epoch": 0.8693545130241637, "grad_norm": 0.4727003696750987, "learning_rate": 8.942256614865936e-06, "loss": 0.446, "step": 9687 }, { "epoch": 0.8694442574768347, "grad_norm": 0.4843317272999763, "learning_rate": 8.941935417837684e-06, "loss": 0.424, "step": 9688 }, { "epoch": 0.8695340019295057, "grad_norm": 0.423429533740537, "learning_rate": 8.941614177819477e-06, "loss": 0.3658, "step": 9689 }, { "epoch": 0.8696237463821768, "grad_norm": 0.4736532039815306, "learning_rate": 8.941292894814823e-06, "loss": 0.4157, "step": 9690 }, { "epoch": 0.8697134908348477, "grad_norm": 0.4885355719377306, "learning_rate": 8.940971568827224e-06, "loss": 0.4722, "step": 9691 }, { "epoch": 0.8698032352875188, "grad_norm": 0.523675339864344, "learning_rate": 8.940650199860184e-06, "loss": 0.5275, "step": 9692 }, { "epoch": 0.8698929797401899, "grad_norm": 0.49370382181856903, "learning_rate": 8.94032878791721e-06, "loss": 0.4722, "step": 9693 }, { "epoch": 0.8699827241928608, "grad_norm": 0.4973102249176793, "learning_rate": 8.940007333001803e-06, "loss": 0.501, "step": 9694 }, { "epoch": 0.8700724686455319, "grad_norm": 0.43271300779631133, "learning_rate": 8.939685835117471e-06, "loss": 0.3804, "step": 9695 }, { "epoch": 0.8701622130982029, "grad_norm": 0.4846371326012949, "learning_rate": 8.939364294267722e-06, "loss": 0.4951, "step": 9696 }, { "epoch": 0.8702519575508739, "grad_norm": 0.47439831967733304, "learning_rate": 8.939042710456059e-06, "loss": 0.4459, "step": 9697 }, { "epoch": 0.8703417020035449, "grad_norm": 0.49878576547104686, "learning_rate": 8.938721083685993e-06, "loss": 0.4741, "step": 9698 }, { "epoch": 0.870431446456216, "grad_norm": 0.47003286669260846, "learning_rate": 8.938399413961028e-06, "loss": 0.4095, "step": 9699 }, { "epoch": 0.8705211909088869, "grad_norm": 0.5035981909400721, "learning_rate": 8.938077701284675e-06, "loss": 0.4698, "step": 9700 }, { "epoch": 0.870610935361558, "grad_norm": 0.4798540316493262, "learning_rate": 8.93775594566044e-06, "loss": 0.4626, "step": 9701 }, { "epoch": 0.8707006798142289, "grad_norm": 0.4977833650426893, "learning_rate": 8.937434147091835e-06, "loss": 0.4536, "step": 9702 }, { "epoch": 0.8707904242669, "grad_norm": 0.5020921358222245, "learning_rate": 8.937112305582366e-06, "loss": 0.4777, "step": 9703 }, { "epoch": 0.870880168719571, "grad_norm": 0.49718139861871286, "learning_rate": 8.936790421135547e-06, "loss": 0.4597, "step": 9704 }, { "epoch": 0.870969913172242, "grad_norm": 0.4784207255999969, "learning_rate": 8.936468493754882e-06, "loss": 0.4029, "step": 9705 }, { "epoch": 0.8710596576249131, "grad_norm": 0.47563406412934245, "learning_rate": 8.93614652344389e-06, "loss": 0.4404, "step": 9706 }, { "epoch": 0.8711494020775841, "grad_norm": 0.4801417951670988, "learning_rate": 8.935824510206077e-06, "loss": 0.425, "step": 9707 }, { "epoch": 0.8712391465302551, "grad_norm": 0.46863354667684026, "learning_rate": 8.935502454044955e-06, "loss": 0.4313, "step": 9708 }, { "epoch": 0.8713288909829261, "grad_norm": 0.5081952727567899, "learning_rate": 8.935180354964038e-06, "loss": 0.5024, "step": 9709 }, { "epoch": 0.8714186354355972, "grad_norm": 0.5048821530245398, "learning_rate": 8.93485821296684e-06, "loss": 0.4215, "step": 9710 }, { "epoch": 0.8715083798882681, "grad_norm": 0.44084798624461796, "learning_rate": 8.93453602805687e-06, "loss": 0.3982, "step": 9711 }, { "epoch": 0.8715981243409392, "grad_norm": 0.4736318667226202, "learning_rate": 8.934213800237644e-06, "loss": 0.4458, "step": 9712 }, { "epoch": 0.8716878687936102, "grad_norm": 0.4686604647538424, "learning_rate": 8.933891529512677e-06, "loss": 0.406, "step": 9713 }, { "epoch": 0.8717776132462812, "grad_norm": 0.513091897235783, "learning_rate": 8.933569215885483e-06, "loss": 0.4524, "step": 9714 }, { "epoch": 0.8718673576989522, "grad_norm": 0.5223458758461692, "learning_rate": 8.933246859359577e-06, "loss": 0.4898, "step": 9715 }, { "epoch": 0.8719571021516233, "grad_norm": 0.510844403229301, "learning_rate": 8.932924459938473e-06, "loss": 0.4107, "step": 9716 }, { "epoch": 0.8720468466042943, "grad_norm": 0.49908895026087213, "learning_rate": 8.93260201762569e-06, "loss": 0.4963, "step": 9717 }, { "epoch": 0.8721365910569653, "grad_norm": 0.45474771514223994, "learning_rate": 8.932279532424741e-06, "loss": 0.4029, "step": 9718 }, { "epoch": 0.8722263355096364, "grad_norm": 0.45668300956399255, "learning_rate": 8.931957004339147e-06, "loss": 0.3866, "step": 9719 }, { "epoch": 0.8723160799623073, "grad_norm": 0.4657294250972204, "learning_rate": 8.931634433372423e-06, "loss": 0.4179, "step": 9720 }, { "epoch": 0.8724058244149784, "grad_norm": 0.4512741159968461, "learning_rate": 8.931311819528087e-06, "loss": 0.4163, "step": 9721 }, { "epoch": 0.8724955688676493, "grad_norm": 0.45934716850290525, "learning_rate": 8.930989162809658e-06, "loss": 0.3928, "step": 9722 }, { "epoch": 0.8725853133203204, "grad_norm": 0.46608378990246957, "learning_rate": 8.930666463220655e-06, "loss": 0.394, "step": 9723 }, { "epoch": 0.8726750577729914, "grad_norm": 0.5693456791929026, "learning_rate": 8.930343720764595e-06, "loss": 0.5368, "step": 9724 }, { "epoch": 0.8727648022256624, "grad_norm": 0.5032068276431705, "learning_rate": 8.930020935445e-06, "loss": 0.4599, "step": 9725 }, { "epoch": 0.8728545466783334, "grad_norm": 0.49568098519689785, "learning_rate": 8.92969810726539e-06, "loss": 0.5166, "step": 9726 }, { "epoch": 0.8729442911310045, "grad_norm": 0.5067182954357988, "learning_rate": 8.929375236229286e-06, "loss": 0.4599, "step": 9727 }, { "epoch": 0.8730340355836755, "grad_norm": 0.47678028425081886, "learning_rate": 8.929052322340208e-06, "loss": 0.423, "step": 9728 }, { "epoch": 0.8731237800363465, "grad_norm": 0.48311405230594157, "learning_rate": 8.928729365601677e-06, "loss": 0.4676, "step": 9729 }, { "epoch": 0.8732135244890176, "grad_norm": 0.5411404501346967, "learning_rate": 8.92840636601722e-06, "loss": 0.5141, "step": 9730 }, { "epoch": 0.8733032689416885, "grad_norm": 0.4889283682705371, "learning_rate": 8.928083323590352e-06, "loss": 0.4506, "step": 9731 }, { "epoch": 0.8733930133943596, "grad_norm": 0.4713979507038008, "learning_rate": 8.9277602383246e-06, "loss": 0.4629, "step": 9732 }, { "epoch": 0.8734827578470306, "grad_norm": 0.49235064966823705, "learning_rate": 8.927437110223491e-06, "loss": 0.4564, "step": 9733 }, { "epoch": 0.8735725022997016, "grad_norm": 0.46504771137941947, "learning_rate": 8.927113939290543e-06, "loss": 0.3826, "step": 9734 }, { "epoch": 0.8736622467523726, "grad_norm": 0.4630399872456189, "learning_rate": 8.926790725529281e-06, "loss": 0.3996, "step": 9735 }, { "epoch": 0.8737519912050437, "grad_norm": 0.4535924018020329, "learning_rate": 8.926467468943235e-06, "loss": 0.3988, "step": 9736 }, { "epoch": 0.8738417356577146, "grad_norm": 0.5543787417924151, "learning_rate": 8.926144169535926e-06, "loss": 0.4885, "step": 9737 }, { "epoch": 0.8739314801103857, "grad_norm": 0.5097795345724753, "learning_rate": 8.925820827310881e-06, "loss": 0.5003, "step": 9738 }, { "epoch": 0.8740212245630566, "grad_norm": 0.45955574692788337, "learning_rate": 8.925497442271628e-06, "loss": 0.3615, "step": 9739 }, { "epoch": 0.8741109690157277, "grad_norm": 0.5034321462424554, "learning_rate": 8.925174014421687e-06, "loss": 0.4583, "step": 9740 }, { "epoch": 0.8742007134683988, "grad_norm": 0.5418920773861081, "learning_rate": 8.924850543764594e-06, "loss": 0.491, "step": 9741 }, { "epoch": 0.8742904579210697, "grad_norm": 0.48861153076169245, "learning_rate": 8.924527030303874e-06, "loss": 0.4137, "step": 9742 }, { "epoch": 0.8743802023737408, "grad_norm": 0.5458279443946086, "learning_rate": 8.924203474043051e-06, "loss": 0.4891, "step": 9743 }, { "epoch": 0.8744699468264118, "grad_norm": 0.4777437327852589, "learning_rate": 8.92387987498566e-06, "loss": 0.4328, "step": 9744 }, { "epoch": 0.8745596912790828, "grad_norm": 0.4798777671645579, "learning_rate": 8.923556233135224e-06, "loss": 0.4374, "step": 9745 }, { "epoch": 0.8746494357317538, "grad_norm": 0.4356347052839657, "learning_rate": 8.923232548495277e-06, "loss": 0.3657, "step": 9746 }, { "epoch": 0.8747391801844249, "grad_norm": 0.5004522248361392, "learning_rate": 8.922908821069347e-06, "loss": 0.4599, "step": 9747 }, { "epoch": 0.8748289246370958, "grad_norm": 0.4966912343561429, "learning_rate": 8.922585050860966e-06, "loss": 0.4842, "step": 9748 }, { "epoch": 0.8749186690897669, "grad_norm": 0.49373120900731465, "learning_rate": 8.922261237873663e-06, "loss": 0.4575, "step": 9749 }, { "epoch": 0.8750084135424379, "grad_norm": 0.497682819178186, "learning_rate": 8.92193738211097e-06, "loss": 0.4219, "step": 9750 }, { "epoch": 0.8750981579951089, "grad_norm": 0.5673450622168497, "learning_rate": 8.92161348357642e-06, "loss": 0.535, "step": 9751 }, { "epoch": 0.87518790244778, "grad_norm": 0.5683852851814385, "learning_rate": 8.921289542273546e-06, "loss": 0.5486, "step": 9752 }, { "epoch": 0.875277646900451, "grad_norm": 0.46304171316594883, "learning_rate": 8.920965558205877e-06, "loss": 0.4121, "step": 9753 }, { "epoch": 0.875367391353122, "grad_norm": 0.47634561435530165, "learning_rate": 8.92064153137695e-06, "loss": 0.4567, "step": 9754 }, { "epoch": 0.875457135805793, "grad_norm": 0.4748353305934448, "learning_rate": 8.920317461790297e-06, "loss": 0.4218, "step": 9755 }, { "epoch": 0.875546880258464, "grad_norm": 0.5091320798473673, "learning_rate": 8.919993349449453e-06, "loss": 0.4953, "step": 9756 }, { "epoch": 0.875636624711135, "grad_norm": 0.5023295238217466, "learning_rate": 8.919669194357955e-06, "loss": 0.4856, "step": 9757 }, { "epoch": 0.8757263691638061, "grad_norm": 0.525963369295773, "learning_rate": 8.919344996519332e-06, "loss": 0.4164, "step": 9758 }, { "epoch": 0.875816113616477, "grad_norm": 0.4812840938998126, "learning_rate": 8.919020755937127e-06, "loss": 0.4775, "step": 9759 }, { "epoch": 0.8759058580691481, "grad_norm": 0.5226335745910512, "learning_rate": 8.91869647261487e-06, "loss": 0.5005, "step": 9760 }, { "epoch": 0.8759956025218191, "grad_norm": 0.5116149179272539, "learning_rate": 8.9183721465561e-06, "loss": 0.4421, "step": 9761 }, { "epoch": 0.8760853469744901, "grad_norm": 0.4937058997274979, "learning_rate": 8.918047777764357e-06, "loss": 0.4267, "step": 9762 }, { "epoch": 0.8761750914271612, "grad_norm": 0.4949193670226786, "learning_rate": 8.917723366243171e-06, "loss": 0.4017, "step": 9763 }, { "epoch": 0.8762648358798322, "grad_norm": 0.4994955899387126, "learning_rate": 8.917398911996087e-06, "loss": 0.4954, "step": 9764 }, { "epoch": 0.8763545803325032, "grad_norm": 0.4467197126524047, "learning_rate": 8.91707441502664e-06, "loss": 0.4167, "step": 9765 }, { "epoch": 0.8764443247851742, "grad_norm": 0.45440425909863014, "learning_rate": 8.91674987533837e-06, "loss": 0.3793, "step": 9766 }, { "epoch": 0.8765340692378453, "grad_norm": 0.47511695606830334, "learning_rate": 8.916425292934818e-06, "loss": 0.4165, "step": 9767 }, { "epoch": 0.8766238136905162, "grad_norm": 0.4590519953458215, "learning_rate": 8.916100667819521e-06, "loss": 0.3842, "step": 9768 }, { "epoch": 0.8767135581431873, "grad_norm": 0.48740615292714123, "learning_rate": 8.91577599999602e-06, "loss": 0.4237, "step": 9769 }, { "epoch": 0.8768033025958583, "grad_norm": 0.4586668575644188, "learning_rate": 8.915451289467857e-06, "loss": 0.3842, "step": 9770 }, { "epoch": 0.8768930470485293, "grad_norm": 0.47471531757490826, "learning_rate": 8.915126536238571e-06, "loss": 0.4171, "step": 9771 }, { "epoch": 0.8769827915012003, "grad_norm": 0.5354955275591449, "learning_rate": 8.914801740311708e-06, "loss": 0.5061, "step": 9772 }, { "epoch": 0.8770725359538714, "grad_norm": 0.5108710747781522, "learning_rate": 8.914476901690803e-06, "loss": 0.4768, "step": 9773 }, { "epoch": 0.8771622804065423, "grad_norm": 0.47676581882792685, "learning_rate": 8.914152020379405e-06, "loss": 0.4132, "step": 9774 }, { "epoch": 0.8772520248592134, "grad_norm": 0.5474559161528786, "learning_rate": 8.913827096381055e-06, "loss": 0.4643, "step": 9775 }, { "epoch": 0.8773417693118845, "grad_norm": 0.49898855674240544, "learning_rate": 8.913502129699297e-06, "loss": 0.4715, "step": 9776 }, { "epoch": 0.8774315137645554, "grad_norm": 0.4396836914126381, "learning_rate": 8.913177120337673e-06, "loss": 0.3725, "step": 9777 }, { "epoch": 0.8775212582172265, "grad_norm": 0.4668414808242846, "learning_rate": 8.912852068299729e-06, "loss": 0.4412, "step": 9778 }, { "epoch": 0.8776110026698974, "grad_norm": 0.4827349953407997, "learning_rate": 8.912526973589012e-06, "loss": 0.4204, "step": 9779 }, { "epoch": 0.8777007471225685, "grad_norm": 0.5010294186774514, "learning_rate": 8.912201836209062e-06, "loss": 0.4664, "step": 9780 }, { "epoch": 0.8777904915752395, "grad_norm": 0.4717804354883491, "learning_rate": 8.911876656163432e-06, "loss": 0.4309, "step": 9781 }, { "epoch": 0.8778802360279105, "grad_norm": 0.5540563958776785, "learning_rate": 8.911551433455662e-06, "loss": 0.5162, "step": 9782 }, { "epoch": 0.8779699804805815, "grad_norm": 0.4703735323714036, "learning_rate": 8.911226168089302e-06, "loss": 0.4023, "step": 9783 }, { "epoch": 0.8780597249332526, "grad_norm": 0.45161085767714176, "learning_rate": 8.910900860067898e-06, "loss": 0.361, "step": 9784 }, { "epoch": 0.8781494693859235, "grad_norm": 0.5053780348469191, "learning_rate": 8.910575509395e-06, "loss": 0.5108, "step": 9785 }, { "epoch": 0.8782392138385946, "grad_norm": 0.5289022659179825, "learning_rate": 8.910250116074153e-06, "loss": 0.4937, "step": 9786 }, { "epoch": 0.8783289582912657, "grad_norm": 0.4802444269218063, "learning_rate": 8.909924680108908e-06, "loss": 0.453, "step": 9787 }, { "epoch": 0.8784187027439366, "grad_norm": 0.45569653528901033, "learning_rate": 8.909599201502815e-06, "loss": 0.4258, "step": 9788 }, { "epoch": 0.8785084471966077, "grad_norm": 0.5059315187088609, "learning_rate": 8.90927368025942e-06, "loss": 0.4451, "step": 9789 }, { "epoch": 0.8785981916492787, "grad_norm": 0.5515989335204445, "learning_rate": 8.908948116382276e-06, "loss": 0.4977, "step": 9790 }, { "epoch": 0.8786879361019497, "grad_norm": 0.5451541667495482, "learning_rate": 8.908622509874932e-06, "loss": 0.492, "step": 9791 }, { "epoch": 0.8787776805546207, "grad_norm": 0.4557662707980199, "learning_rate": 8.908296860740939e-06, "loss": 0.4053, "step": 9792 }, { "epoch": 0.8788674250072918, "grad_norm": 0.4842492925717696, "learning_rate": 8.907971168983851e-06, "loss": 0.4377, "step": 9793 }, { "epoch": 0.8789571694599627, "grad_norm": 0.4614125767599997, "learning_rate": 8.907645434607217e-06, "loss": 0.4095, "step": 9794 }, { "epoch": 0.8790469139126338, "grad_norm": 0.4703096939823173, "learning_rate": 8.90731965761459e-06, "loss": 0.4205, "step": 9795 }, { "epoch": 0.8791366583653047, "grad_norm": 0.47261009513539604, "learning_rate": 8.906993838009523e-06, "loss": 0.4368, "step": 9796 }, { "epoch": 0.8792264028179758, "grad_norm": 0.5130775757566866, "learning_rate": 8.90666797579557e-06, "loss": 0.503, "step": 9797 }, { "epoch": 0.8793161472706469, "grad_norm": 0.534014008390895, "learning_rate": 8.906342070976285e-06, "loss": 0.5326, "step": 9798 }, { "epoch": 0.8794058917233178, "grad_norm": 0.48740850203168357, "learning_rate": 8.906016123555222e-06, "loss": 0.4218, "step": 9799 }, { "epoch": 0.8794956361759889, "grad_norm": 0.4493503544304194, "learning_rate": 8.905690133535934e-06, "loss": 0.4225, "step": 9800 }, { "epoch": 0.8795853806286599, "grad_norm": 0.46424128848755003, "learning_rate": 8.90536410092198e-06, "loss": 0.4045, "step": 9801 }, { "epoch": 0.8796751250813309, "grad_norm": 0.4789610039476101, "learning_rate": 8.90503802571691e-06, "loss": 0.4282, "step": 9802 }, { "epoch": 0.8797648695340019, "grad_norm": 0.48207720506624546, "learning_rate": 8.904711907924285e-06, "loss": 0.41, "step": 9803 }, { "epoch": 0.879854613986673, "grad_norm": 0.4944225922525665, "learning_rate": 8.90438574754766e-06, "loss": 0.4504, "step": 9804 }, { "epoch": 0.8799443584393439, "grad_norm": 0.5307970290115656, "learning_rate": 8.904059544590592e-06, "loss": 0.4585, "step": 9805 }, { "epoch": 0.880034102892015, "grad_norm": 0.43851045242269093, "learning_rate": 8.903733299056638e-06, "loss": 0.385, "step": 9806 }, { "epoch": 0.880123847344686, "grad_norm": 0.496621357717802, "learning_rate": 8.903407010949357e-06, "loss": 0.3964, "step": 9807 }, { "epoch": 0.880213591797357, "grad_norm": 0.4778878041867076, "learning_rate": 8.903080680272304e-06, "loss": 0.4708, "step": 9808 }, { "epoch": 0.880303336250028, "grad_norm": 0.45556507464968266, "learning_rate": 8.902754307029045e-06, "loss": 0.4031, "step": 9809 }, { "epoch": 0.8803930807026991, "grad_norm": 0.4822474699964466, "learning_rate": 8.902427891223132e-06, "loss": 0.4352, "step": 9810 }, { "epoch": 0.8804828251553701, "grad_norm": 0.5064101229680091, "learning_rate": 8.902101432858129e-06, "loss": 0.4925, "step": 9811 }, { "epoch": 0.8805725696080411, "grad_norm": 0.47180625428293455, "learning_rate": 8.901774931937594e-06, "loss": 0.4589, "step": 9812 }, { "epoch": 0.8806623140607122, "grad_norm": 0.5648099094515183, "learning_rate": 8.90144838846509e-06, "loss": 0.5116, "step": 9813 }, { "epoch": 0.8807520585133831, "grad_norm": 0.4504502570148268, "learning_rate": 8.901121802444178e-06, "loss": 0.3933, "step": 9814 }, { "epoch": 0.8808418029660542, "grad_norm": 0.4878873600653982, "learning_rate": 8.900795173878418e-06, "loss": 0.466, "step": 9815 }, { "epoch": 0.8809315474187251, "grad_norm": 0.4898357906738308, "learning_rate": 8.900468502771373e-06, "loss": 0.4485, "step": 9816 }, { "epoch": 0.8810212918713962, "grad_norm": 0.4980616683734052, "learning_rate": 8.900141789126606e-06, "loss": 0.4877, "step": 9817 }, { "epoch": 0.8811110363240672, "grad_norm": 0.5165850775729224, "learning_rate": 8.899815032947678e-06, "loss": 0.4574, "step": 9818 }, { "epoch": 0.8812007807767382, "grad_norm": 0.4792997489389854, "learning_rate": 8.899488234238156e-06, "loss": 0.4458, "step": 9819 }, { "epoch": 0.8812905252294092, "grad_norm": 0.4097658146490825, "learning_rate": 8.899161393001601e-06, "loss": 0.3753, "step": 9820 }, { "epoch": 0.8813802696820803, "grad_norm": 0.5110796207197205, "learning_rate": 8.898834509241581e-06, "loss": 0.4867, "step": 9821 }, { "epoch": 0.8814700141347513, "grad_norm": 0.4639615709053202, "learning_rate": 8.898507582961654e-06, "loss": 0.3958, "step": 9822 }, { "epoch": 0.8815597585874223, "grad_norm": 0.4746000642355862, "learning_rate": 8.898180614165395e-06, "loss": 0.4075, "step": 9823 }, { "epoch": 0.8816495030400934, "grad_norm": 0.5174798309353663, "learning_rate": 8.897853602856364e-06, "loss": 0.4796, "step": 9824 }, { "epoch": 0.8817392474927643, "grad_norm": 0.4841558650708947, "learning_rate": 8.897526549038125e-06, "loss": 0.4151, "step": 9825 }, { "epoch": 0.8818289919454354, "grad_norm": 0.4609444552426956, "learning_rate": 8.89719945271425e-06, "loss": 0.382, "step": 9826 }, { "epoch": 0.8819187363981064, "grad_norm": 0.45583103866165553, "learning_rate": 8.896872313888306e-06, "loss": 0.4253, "step": 9827 }, { "epoch": 0.8820084808507774, "grad_norm": 0.5072928460979719, "learning_rate": 8.896545132563855e-06, "loss": 0.4788, "step": 9828 }, { "epoch": 0.8820982253034484, "grad_norm": 0.5068581531814522, "learning_rate": 8.89621790874447e-06, "loss": 0.4321, "step": 9829 }, { "epoch": 0.8821879697561195, "grad_norm": 0.4354028562495787, "learning_rate": 8.895890642433721e-06, "loss": 0.3765, "step": 9830 }, { "epoch": 0.8822777142087904, "grad_norm": 0.4901842959585929, "learning_rate": 8.895563333635174e-06, "loss": 0.4712, "step": 9831 }, { "epoch": 0.8823674586614615, "grad_norm": 0.4851963103364581, "learning_rate": 8.8952359823524e-06, "loss": 0.4418, "step": 9832 }, { "epoch": 0.8824572031141326, "grad_norm": 0.4871631735727884, "learning_rate": 8.894908588588967e-06, "loss": 0.4254, "step": 9833 }, { "epoch": 0.8825469475668035, "grad_norm": 0.5156342144950895, "learning_rate": 8.894581152348446e-06, "loss": 0.4865, "step": 9834 }, { "epoch": 0.8826366920194746, "grad_norm": 0.47741022407379335, "learning_rate": 8.89425367363441e-06, "loss": 0.4312, "step": 9835 }, { "epoch": 0.8827264364721455, "grad_norm": 0.5063551447108319, "learning_rate": 8.893926152450429e-06, "loss": 0.4584, "step": 9836 }, { "epoch": 0.8828161809248166, "grad_norm": 0.5470508241835375, "learning_rate": 8.893598588800076e-06, "loss": 0.5419, "step": 9837 }, { "epoch": 0.8829059253774876, "grad_norm": 0.469658567590367, "learning_rate": 8.893270982686922e-06, "loss": 0.4189, "step": 9838 }, { "epoch": 0.8829956698301586, "grad_norm": 0.4862718679902558, "learning_rate": 8.892943334114542e-06, "loss": 0.4549, "step": 9839 }, { "epoch": 0.8830854142828296, "grad_norm": 0.48569834556555697, "learning_rate": 8.892615643086504e-06, "loss": 0.4726, "step": 9840 }, { "epoch": 0.8831751587355007, "grad_norm": 0.49652373889269613, "learning_rate": 8.892287909606387e-06, "loss": 0.4778, "step": 9841 }, { "epoch": 0.8832649031881716, "grad_norm": 0.49547797466439725, "learning_rate": 8.891960133677763e-06, "loss": 0.4205, "step": 9842 }, { "epoch": 0.8833546476408427, "grad_norm": 0.4693297055716404, "learning_rate": 8.891632315304209e-06, "loss": 0.426, "step": 9843 }, { "epoch": 0.8834443920935138, "grad_norm": 0.4594111895732796, "learning_rate": 8.891304454489296e-06, "loss": 0.4009, "step": 9844 }, { "epoch": 0.8835341365461847, "grad_norm": 0.49675116754297943, "learning_rate": 8.890976551236604e-06, "loss": 0.4541, "step": 9845 }, { "epoch": 0.8836238809988558, "grad_norm": 0.4882957523562459, "learning_rate": 8.890648605549706e-06, "loss": 0.4465, "step": 9846 }, { "epoch": 0.8837136254515268, "grad_norm": 0.4719687909068958, "learning_rate": 8.890320617432178e-06, "loss": 0.406, "step": 9847 }, { "epoch": 0.8838033699041978, "grad_norm": 0.522065152688437, "learning_rate": 8.889992586887599e-06, "loss": 0.5139, "step": 9848 }, { "epoch": 0.8838931143568688, "grad_norm": 0.4783340744958039, "learning_rate": 8.889664513919547e-06, "loss": 0.4547, "step": 9849 }, { "epoch": 0.8839828588095399, "grad_norm": 0.48284771677099353, "learning_rate": 8.889336398531599e-06, "loss": 0.4399, "step": 9850 }, { "epoch": 0.8840726032622108, "grad_norm": 0.47963146864297873, "learning_rate": 8.889008240727332e-06, "loss": 0.4325, "step": 9851 }, { "epoch": 0.8841623477148819, "grad_norm": 0.48312390617633133, "learning_rate": 8.888680040510325e-06, "loss": 0.4287, "step": 9852 }, { "epoch": 0.8842520921675529, "grad_norm": 0.4763185848185155, "learning_rate": 8.88835179788416e-06, "loss": 0.4105, "step": 9853 }, { "epoch": 0.8843418366202239, "grad_norm": 0.5351360141692917, "learning_rate": 8.888023512852415e-06, "loss": 0.4765, "step": 9854 }, { "epoch": 0.8844315810728949, "grad_norm": 0.4775009577130089, "learning_rate": 8.88769518541867e-06, "loss": 0.4205, "step": 9855 }, { "epoch": 0.884521325525566, "grad_norm": 0.45573683857983727, "learning_rate": 8.887366815586504e-06, "loss": 0.3965, "step": 9856 }, { "epoch": 0.884611069978237, "grad_norm": 0.47935951390474546, "learning_rate": 8.8870384033595e-06, "loss": 0.4698, "step": 9857 }, { "epoch": 0.884700814430908, "grad_norm": 0.4934258468528363, "learning_rate": 8.886709948741242e-06, "loss": 0.461, "step": 9858 }, { "epoch": 0.884790558883579, "grad_norm": 0.5177558452541362, "learning_rate": 8.886381451735308e-06, "loss": 0.4609, "step": 9859 }, { "epoch": 0.88488030333625, "grad_norm": 0.5491741943445168, "learning_rate": 8.886052912345283e-06, "loss": 0.503, "step": 9860 }, { "epoch": 0.8849700477889211, "grad_norm": 0.5317572770277051, "learning_rate": 8.885724330574748e-06, "loss": 0.4932, "step": 9861 }, { "epoch": 0.885059792241592, "grad_norm": 0.5041557158196505, "learning_rate": 8.885395706427288e-06, "loss": 0.4414, "step": 9862 }, { "epoch": 0.8851495366942631, "grad_norm": 0.5008017308102816, "learning_rate": 8.885067039906486e-06, "loss": 0.4666, "step": 9863 }, { "epoch": 0.8852392811469341, "grad_norm": 0.4620832326788013, "learning_rate": 8.884738331015928e-06, "loss": 0.4018, "step": 9864 }, { "epoch": 0.8853290255996051, "grad_norm": 0.48356388847143433, "learning_rate": 8.884409579759197e-06, "loss": 0.4514, "step": 9865 }, { "epoch": 0.8854187700522761, "grad_norm": 0.4684160385006788, "learning_rate": 8.88408078613988e-06, "loss": 0.4044, "step": 9866 }, { "epoch": 0.8855085145049472, "grad_norm": 0.48981931274486273, "learning_rate": 8.883751950161559e-06, "loss": 0.4142, "step": 9867 }, { "epoch": 0.8855982589576182, "grad_norm": 0.49732751400191033, "learning_rate": 8.883423071827825e-06, "loss": 0.4472, "step": 9868 }, { "epoch": 0.8856880034102892, "grad_norm": 0.4136549790043341, "learning_rate": 8.883094151142262e-06, "loss": 0.3595, "step": 9869 }, { "epoch": 0.8857777478629603, "grad_norm": 0.4254211409361322, "learning_rate": 8.882765188108457e-06, "loss": 0.352, "step": 9870 }, { "epoch": 0.8858674923156312, "grad_norm": 0.4928208065028112, "learning_rate": 8.882436182730001e-06, "loss": 0.4212, "step": 9871 }, { "epoch": 0.8859572367683023, "grad_norm": 0.49296069238181967, "learning_rate": 8.882107135010479e-06, "loss": 0.4697, "step": 9872 }, { "epoch": 0.8860469812209733, "grad_norm": 0.5200309328161933, "learning_rate": 8.881778044953477e-06, "loss": 0.4835, "step": 9873 }, { "epoch": 0.8861367256736443, "grad_norm": 0.4783872321634257, "learning_rate": 8.88144891256259e-06, "loss": 0.4801, "step": 9874 }, { "epoch": 0.8862264701263153, "grad_norm": 0.5088618268551713, "learning_rate": 8.881119737841402e-06, "loss": 0.4796, "step": 9875 }, { "epoch": 0.8863162145789863, "grad_norm": 0.493109272873216, "learning_rate": 8.880790520793507e-06, "loss": 0.466, "step": 9876 }, { "epoch": 0.8864059590316573, "grad_norm": 0.493675317981388, "learning_rate": 8.880461261422493e-06, "loss": 0.455, "step": 9877 }, { "epoch": 0.8864957034843284, "grad_norm": 0.5507782062672403, "learning_rate": 8.880131959731953e-06, "loss": 0.5052, "step": 9878 }, { "epoch": 0.8865854479369994, "grad_norm": 0.4862982585628152, "learning_rate": 8.879802615725474e-06, "loss": 0.4749, "step": 9879 }, { "epoch": 0.8866751923896704, "grad_norm": 0.47282686564120013, "learning_rate": 8.879473229406653e-06, "loss": 0.4061, "step": 9880 }, { "epoch": 0.8867649368423415, "grad_norm": 0.4805390662983545, "learning_rate": 8.879143800779079e-06, "loss": 0.4286, "step": 9881 }, { "epoch": 0.8868546812950124, "grad_norm": 0.502070588536831, "learning_rate": 8.878814329846346e-06, "loss": 0.3994, "step": 9882 }, { "epoch": 0.8869444257476835, "grad_norm": 0.45112007108538305, "learning_rate": 8.878484816612045e-06, "loss": 0.4298, "step": 9883 }, { "epoch": 0.8870341702003545, "grad_norm": 0.5298396165171335, "learning_rate": 8.878155261079772e-06, "loss": 0.4798, "step": 9884 }, { "epoch": 0.8871239146530255, "grad_norm": 0.5246467452141265, "learning_rate": 8.877825663253122e-06, "loss": 0.4911, "step": 9885 }, { "epoch": 0.8872136591056965, "grad_norm": 0.4832103768260713, "learning_rate": 8.877496023135685e-06, "loss": 0.482, "step": 9886 }, { "epoch": 0.8873034035583676, "grad_norm": 0.49491244009190344, "learning_rate": 8.87716634073106e-06, "loss": 0.456, "step": 9887 }, { "epoch": 0.8873931480110385, "grad_norm": 0.4867714102524174, "learning_rate": 8.876836616042844e-06, "loss": 0.4574, "step": 9888 }, { "epoch": 0.8874828924637096, "grad_norm": 0.4764892990783572, "learning_rate": 8.876506849074627e-06, "loss": 0.4385, "step": 9889 }, { "epoch": 0.8875726369163806, "grad_norm": 0.529148603701921, "learning_rate": 8.876177039830009e-06, "loss": 0.4783, "step": 9890 }, { "epoch": 0.8876623813690516, "grad_norm": 0.4200635355590389, "learning_rate": 8.875847188312586e-06, "loss": 0.3854, "step": 9891 }, { "epoch": 0.8877521258217227, "grad_norm": 0.4406404298811604, "learning_rate": 8.875517294525958e-06, "loss": 0.3815, "step": 9892 }, { "epoch": 0.8878418702743937, "grad_norm": 0.5095719873562753, "learning_rate": 8.875187358473717e-06, "loss": 0.4842, "step": 9893 }, { "epoch": 0.8879316147270647, "grad_norm": 0.45564233395404624, "learning_rate": 8.874857380159467e-06, "loss": 0.4135, "step": 9894 }, { "epoch": 0.8880213591797357, "grad_norm": 0.4882825454501025, "learning_rate": 8.874527359586803e-06, "loss": 0.4556, "step": 9895 }, { "epoch": 0.8881111036324068, "grad_norm": 0.47471923894725454, "learning_rate": 8.874197296759325e-06, "loss": 0.4201, "step": 9896 }, { "epoch": 0.8882008480850777, "grad_norm": 0.443193209809407, "learning_rate": 8.873867191680637e-06, "loss": 0.3673, "step": 9897 }, { "epoch": 0.8882905925377488, "grad_norm": 0.5156529116834143, "learning_rate": 8.873537044354332e-06, "loss": 0.4962, "step": 9898 }, { "epoch": 0.8883803369904197, "grad_norm": 0.4465858743365572, "learning_rate": 8.873206854784013e-06, "loss": 0.3626, "step": 9899 }, { "epoch": 0.8884700814430908, "grad_norm": 0.5034266702879202, "learning_rate": 8.872876622973284e-06, "loss": 0.3927, "step": 9900 }, { "epoch": 0.8885598258957618, "grad_norm": 0.5042433589952406, "learning_rate": 8.872546348925742e-06, "loss": 0.4407, "step": 9901 }, { "epoch": 0.8886495703484328, "grad_norm": 0.4861697622044377, "learning_rate": 8.872216032644992e-06, "loss": 0.4241, "step": 9902 }, { "epoch": 0.8887393148011039, "grad_norm": 0.4777532520995093, "learning_rate": 8.871885674134635e-06, "loss": 0.4442, "step": 9903 }, { "epoch": 0.8888290592537749, "grad_norm": 0.4861866263211713, "learning_rate": 8.871555273398273e-06, "loss": 0.4797, "step": 9904 }, { "epoch": 0.8889188037064459, "grad_norm": 0.5190532179611395, "learning_rate": 8.871224830439514e-06, "loss": 0.5342, "step": 9905 }, { "epoch": 0.8890085481591169, "grad_norm": 0.4781897546047189, "learning_rate": 8.870894345261954e-06, "loss": 0.4596, "step": 9906 }, { "epoch": 0.889098292611788, "grad_norm": 0.48892464431073357, "learning_rate": 8.870563817869205e-06, "loss": 0.4471, "step": 9907 }, { "epoch": 0.8891880370644589, "grad_norm": 0.4876484638064446, "learning_rate": 8.870233248264866e-06, "loss": 0.4327, "step": 9908 }, { "epoch": 0.88927778151713, "grad_norm": 0.49171362118318934, "learning_rate": 8.869902636452544e-06, "loss": 0.5061, "step": 9909 }, { "epoch": 0.889367525969801, "grad_norm": 0.5179812363035625, "learning_rate": 8.869571982435844e-06, "loss": 0.4866, "step": 9910 }, { "epoch": 0.889457270422472, "grad_norm": 0.4791774451945551, "learning_rate": 8.869241286218374e-06, "loss": 0.4416, "step": 9911 }, { "epoch": 0.889547014875143, "grad_norm": 0.48791792068146106, "learning_rate": 8.86891054780374e-06, "loss": 0.4437, "step": 9912 }, { "epoch": 0.889636759327814, "grad_norm": 0.4492177502570792, "learning_rate": 8.868579767195546e-06, "loss": 0.3783, "step": 9913 }, { "epoch": 0.8897265037804851, "grad_norm": 0.5295099389018723, "learning_rate": 8.868248944397405e-06, "loss": 0.5092, "step": 9914 }, { "epoch": 0.8898162482331561, "grad_norm": 0.46345074374978545, "learning_rate": 8.86791807941292e-06, "loss": 0.4043, "step": 9915 }, { "epoch": 0.8899059926858272, "grad_norm": 0.4533930814239235, "learning_rate": 8.867587172245699e-06, "loss": 0.3881, "step": 9916 }, { "epoch": 0.8899957371384981, "grad_norm": 0.5419776367009349, "learning_rate": 8.867256222899356e-06, "loss": 0.5062, "step": 9917 }, { "epoch": 0.8900854815911692, "grad_norm": 0.5160534435911116, "learning_rate": 8.866925231377497e-06, "loss": 0.4803, "step": 9918 }, { "epoch": 0.8901752260438401, "grad_norm": 0.5010003928573414, "learning_rate": 8.866594197683728e-06, "loss": 0.4247, "step": 9919 }, { "epoch": 0.8902649704965112, "grad_norm": 0.45003898661037045, "learning_rate": 8.866263121821667e-06, "loss": 0.4104, "step": 9920 }, { "epoch": 0.8903547149491822, "grad_norm": 0.47883289395545436, "learning_rate": 8.86593200379492e-06, "loss": 0.4623, "step": 9921 }, { "epoch": 0.8904444594018532, "grad_norm": 0.4303275228176081, "learning_rate": 8.865600843607096e-06, "loss": 0.3388, "step": 9922 }, { "epoch": 0.8905342038545242, "grad_norm": 0.5178276536721829, "learning_rate": 8.865269641261812e-06, "loss": 0.4708, "step": 9923 }, { "epoch": 0.8906239483071953, "grad_norm": 0.4783248452693531, "learning_rate": 8.864938396762675e-06, "loss": 0.4537, "step": 9924 }, { "epoch": 0.8907136927598662, "grad_norm": 0.5043941961799195, "learning_rate": 8.8646071101133e-06, "loss": 0.4556, "step": 9925 }, { "epoch": 0.8908034372125373, "grad_norm": 0.49185459432601897, "learning_rate": 8.864275781317301e-06, "loss": 0.4516, "step": 9926 }, { "epoch": 0.8908931816652084, "grad_norm": 0.5781692445899169, "learning_rate": 8.863944410378288e-06, "loss": 0.4966, "step": 9927 }, { "epoch": 0.8909829261178793, "grad_norm": 0.45568451329363985, "learning_rate": 8.863612997299879e-06, "loss": 0.4309, "step": 9928 }, { "epoch": 0.8910726705705504, "grad_norm": 0.5052241687948024, "learning_rate": 8.863281542085687e-06, "loss": 0.4709, "step": 9929 }, { "epoch": 0.8911624150232214, "grad_norm": 0.47132165531407466, "learning_rate": 8.862950044739324e-06, "loss": 0.4149, "step": 9930 }, { "epoch": 0.8912521594758924, "grad_norm": 0.5012715743257424, "learning_rate": 8.862618505264409e-06, "loss": 0.4782, "step": 9931 }, { "epoch": 0.8913419039285634, "grad_norm": 0.48009866476639945, "learning_rate": 8.862286923664557e-06, "loss": 0.3864, "step": 9932 }, { "epoch": 0.8914316483812345, "grad_norm": 0.48813119636234964, "learning_rate": 8.861955299943382e-06, "loss": 0.4156, "step": 9933 }, { "epoch": 0.8915213928339054, "grad_norm": 0.52720889959354, "learning_rate": 8.861623634104498e-06, "loss": 0.4873, "step": 9934 }, { "epoch": 0.8916111372865765, "grad_norm": 0.5053948965677681, "learning_rate": 8.861291926151532e-06, "loss": 0.4276, "step": 9935 }, { "epoch": 0.8917008817392474, "grad_norm": 0.48008590866055495, "learning_rate": 8.860960176088091e-06, "loss": 0.4522, "step": 9936 }, { "epoch": 0.8917906261919185, "grad_norm": 0.470204231421861, "learning_rate": 8.8606283839178e-06, "loss": 0.4328, "step": 9937 }, { "epoch": 0.8918803706445896, "grad_norm": 0.49919873246585333, "learning_rate": 8.860296549644273e-06, "loss": 0.4531, "step": 9938 }, { "epoch": 0.8919701150972605, "grad_norm": 0.47696805458969904, "learning_rate": 8.859964673271133e-06, "loss": 0.4193, "step": 9939 }, { "epoch": 0.8920598595499316, "grad_norm": 0.48716817688899017, "learning_rate": 8.859632754801996e-06, "loss": 0.4511, "step": 9940 }, { "epoch": 0.8921496040026026, "grad_norm": 0.5356230060297615, "learning_rate": 8.859300794240482e-06, "loss": 0.4476, "step": 9941 }, { "epoch": 0.8922393484552736, "grad_norm": 0.49185998492602057, "learning_rate": 8.858968791590214e-06, "loss": 0.4295, "step": 9942 }, { "epoch": 0.8923290929079446, "grad_norm": 0.5368166379031974, "learning_rate": 8.85863674685481e-06, "loss": 0.5178, "step": 9943 }, { "epoch": 0.8924188373606157, "grad_norm": 0.4653174134550243, "learning_rate": 8.858304660037894e-06, "loss": 0.4072, "step": 9944 }, { "epoch": 0.8925085818132866, "grad_norm": 0.4569419920879526, "learning_rate": 8.857972531143084e-06, "loss": 0.4185, "step": 9945 }, { "epoch": 0.8925983262659577, "grad_norm": 0.49553849764192726, "learning_rate": 8.857640360174004e-06, "loss": 0.4372, "step": 9946 }, { "epoch": 0.8926880707186287, "grad_norm": 0.48182666435613786, "learning_rate": 8.857308147134277e-06, "loss": 0.413, "step": 9947 }, { "epoch": 0.8927778151712997, "grad_norm": 0.4575020286998263, "learning_rate": 8.856975892027525e-06, "loss": 0.3996, "step": 9948 }, { "epoch": 0.8928675596239708, "grad_norm": 0.503015075967522, "learning_rate": 8.856643594857375e-06, "loss": 0.4328, "step": 9949 }, { "epoch": 0.8929573040766418, "grad_norm": 0.469375187651514, "learning_rate": 8.856311255627447e-06, "loss": 0.3996, "step": 9950 }, { "epoch": 0.8930470485293128, "grad_norm": 0.513446932235946, "learning_rate": 8.855978874341365e-06, "loss": 0.4426, "step": 9951 }, { "epoch": 0.8931367929819838, "grad_norm": 0.4839154945729204, "learning_rate": 8.855646451002757e-06, "loss": 0.4279, "step": 9952 }, { "epoch": 0.8932265374346549, "grad_norm": 0.5132597876960544, "learning_rate": 8.855313985615247e-06, "loss": 0.4264, "step": 9953 }, { "epoch": 0.8933162818873258, "grad_norm": 0.5009319566315941, "learning_rate": 8.854981478182461e-06, "loss": 0.4427, "step": 9954 }, { "epoch": 0.8934060263399969, "grad_norm": 0.4843036811859415, "learning_rate": 8.854648928708022e-06, "loss": 0.5217, "step": 9955 }, { "epoch": 0.8934957707926678, "grad_norm": 0.47612397244527815, "learning_rate": 8.854316337195562e-06, "loss": 0.4341, "step": 9956 }, { "epoch": 0.8935855152453389, "grad_norm": 0.43649433104240754, "learning_rate": 8.853983703648705e-06, "loss": 0.3744, "step": 9957 }, { "epoch": 0.8936752596980099, "grad_norm": 0.5162045216167823, "learning_rate": 8.85365102807108e-06, "loss": 0.5017, "step": 9958 }, { "epoch": 0.8937650041506809, "grad_norm": 0.5099283759668065, "learning_rate": 8.853318310466315e-06, "loss": 0.4684, "step": 9959 }, { "epoch": 0.8938547486033519, "grad_norm": 0.5036351447701057, "learning_rate": 8.852985550838038e-06, "loss": 0.4384, "step": 9960 }, { "epoch": 0.893944493056023, "grad_norm": 0.48744502801555256, "learning_rate": 8.852652749189879e-06, "loss": 0.4518, "step": 9961 }, { "epoch": 0.894034237508694, "grad_norm": 0.46275812703278757, "learning_rate": 8.852319905525463e-06, "loss": 0.4068, "step": 9962 }, { "epoch": 0.894123981961365, "grad_norm": 0.5125599119759446, "learning_rate": 8.851987019848427e-06, "loss": 0.4685, "step": 9963 }, { "epoch": 0.8942137264140361, "grad_norm": 0.4572633814093707, "learning_rate": 8.851654092162397e-06, "loss": 0.4112, "step": 9964 }, { "epoch": 0.894303470866707, "grad_norm": 0.5350061793391928, "learning_rate": 8.851321122471003e-06, "loss": 0.4749, "step": 9965 }, { "epoch": 0.8943932153193781, "grad_norm": 0.545993299373847, "learning_rate": 8.85098811077788e-06, "loss": 0.4996, "step": 9966 }, { "epoch": 0.8944829597720491, "grad_norm": 0.47714207693113403, "learning_rate": 8.850655057086656e-06, "loss": 0.4375, "step": 9967 }, { "epoch": 0.8945727042247201, "grad_norm": 0.5048658570568034, "learning_rate": 8.850321961400967e-06, "loss": 0.471, "step": 9968 }, { "epoch": 0.8946624486773911, "grad_norm": 0.4976196017742004, "learning_rate": 8.849988823724442e-06, "loss": 0.4545, "step": 9969 }, { "epoch": 0.8947521931300622, "grad_norm": 0.5173360850585278, "learning_rate": 8.849655644060717e-06, "loss": 0.4677, "step": 9970 }, { "epoch": 0.8948419375827331, "grad_norm": 0.5200098745671398, "learning_rate": 8.849322422413422e-06, "loss": 0.5207, "step": 9971 }, { "epoch": 0.8949316820354042, "grad_norm": 0.47548103310443196, "learning_rate": 8.848989158786197e-06, "loss": 0.407, "step": 9972 }, { "epoch": 0.8950214264880753, "grad_norm": 0.46250838640240016, "learning_rate": 8.84865585318267e-06, "loss": 0.3913, "step": 9973 }, { "epoch": 0.8951111709407462, "grad_norm": 0.45584188095208755, "learning_rate": 8.84832250560648e-06, "loss": 0.3892, "step": 9974 }, { "epoch": 0.8952009153934173, "grad_norm": 0.4959856674452878, "learning_rate": 8.847989116061261e-06, "loss": 0.4819, "step": 9975 }, { "epoch": 0.8952906598460882, "grad_norm": 0.52399687047211, "learning_rate": 8.84765568455065e-06, "loss": 0.4836, "step": 9976 }, { "epoch": 0.8953804042987593, "grad_norm": 0.46253914240294586, "learning_rate": 8.847322211078282e-06, "loss": 0.3636, "step": 9977 }, { "epoch": 0.8954701487514303, "grad_norm": 0.48867445173964896, "learning_rate": 8.846988695647794e-06, "loss": 0.4369, "step": 9978 }, { "epoch": 0.8955598932041013, "grad_norm": 0.4891837436330033, "learning_rate": 8.846655138262824e-06, "loss": 0.4463, "step": 9979 }, { "epoch": 0.8956496376567723, "grad_norm": 0.4798048977140446, "learning_rate": 8.84632153892701e-06, "loss": 0.4685, "step": 9980 }, { "epoch": 0.8957393821094434, "grad_norm": 0.4595283988841095, "learning_rate": 8.845987897643988e-06, "loss": 0.4234, "step": 9981 }, { "epoch": 0.8958291265621143, "grad_norm": 0.4837957694014953, "learning_rate": 8.845654214417398e-06, "loss": 0.4223, "step": 9982 }, { "epoch": 0.8959188710147854, "grad_norm": 0.43995201713908394, "learning_rate": 8.845320489250882e-06, "loss": 0.3897, "step": 9983 }, { "epoch": 0.8960086154674565, "grad_norm": 0.46029951138287295, "learning_rate": 8.844986722148074e-06, "loss": 0.4247, "step": 9984 }, { "epoch": 0.8960983599201274, "grad_norm": 0.540384590151123, "learning_rate": 8.844652913112617e-06, "loss": 0.5597, "step": 9985 }, { "epoch": 0.8961881043727985, "grad_norm": 0.4743921751285115, "learning_rate": 8.84431906214815e-06, "loss": 0.4548, "step": 9986 }, { "epoch": 0.8962778488254695, "grad_norm": 0.5216745261395893, "learning_rate": 8.843985169258317e-06, "loss": 0.5392, "step": 9987 }, { "epoch": 0.8963675932781405, "grad_norm": 0.5080109826865116, "learning_rate": 8.843651234446757e-06, "loss": 0.4917, "step": 9988 }, { "epoch": 0.8964573377308115, "grad_norm": 0.5085001020026568, "learning_rate": 8.843317257717112e-06, "loss": 0.5124, "step": 9989 }, { "epoch": 0.8965470821834826, "grad_norm": 0.5386990386774478, "learning_rate": 8.842983239073027e-06, "loss": 0.5181, "step": 9990 }, { "epoch": 0.8966368266361535, "grad_norm": 0.4989493578619802, "learning_rate": 8.84264917851814e-06, "loss": 0.4676, "step": 9991 }, { "epoch": 0.8967265710888246, "grad_norm": 0.4535127836215822, "learning_rate": 8.842315076056096e-06, "loss": 0.3867, "step": 9992 }, { "epoch": 0.8968163155414955, "grad_norm": 0.46790356500249736, "learning_rate": 8.841980931690539e-06, "loss": 0.4604, "step": 9993 }, { "epoch": 0.8969060599941666, "grad_norm": 0.49387205831362, "learning_rate": 8.841646745425114e-06, "loss": 0.4334, "step": 9994 }, { "epoch": 0.8969958044468376, "grad_norm": 0.5093680995351634, "learning_rate": 8.841312517263465e-06, "loss": 0.4855, "step": 9995 }, { "epoch": 0.8970855488995086, "grad_norm": 0.5253542869684041, "learning_rate": 8.840978247209236e-06, "loss": 0.5208, "step": 9996 }, { "epoch": 0.8971752933521797, "grad_norm": 0.4827317543976782, "learning_rate": 8.840643935266073e-06, "loss": 0.4211, "step": 9997 }, { "epoch": 0.8972650378048507, "grad_norm": 0.479721977437235, "learning_rate": 8.840309581437624e-06, "loss": 0.4294, "step": 9998 }, { "epoch": 0.8973547822575217, "grad_norm": 0.4869954782357511, "learning_rate": 8.839975185727534e-06, "loss": 0.4529, "step": 9999 }, { "epoch": 0.8974445267101927, "grad_norm": 0.458115007492744, "learning_rate": 8.839640748139447e-06, "loss": 0.3901, "step": 10000 }, { "epoch": 0.8975342711628638, "grad_norm": 0.511031170087278, "learning_rate": 8.839306268677014e-06, "loss": 0.4364, "step": 10001 }, { "epoch": 0.8976240156155347, "grad_norm": 0.47016801077214593, "learning_rate": 8.838971747343882e-06, "loss": 0.3989, "step": 10002 }, { "epoch": 0.8977137600682058, "grad_norm": 0.4845822820521942, "learning_rate": 8.838637184143697e-06, "loss": 0.4374, "step": 10003 }, { "epoch": 0.8978035045208768, "grad_norm": 0.4460406070472601, "learning_rate": 8.83830257908011e-06, "loss": 0.3558, "step": 10004 }, { "epoch": 0.8978932489735478, "grad_norm": 0.5001297040862136, "learning_rate": 8.837967932156772e-06, "loss": 0.4661, "step": 10005 }, { "epoch": 0.8979829934262188, "grad_norm": 0.4623792299245698, "learning_rate": 8.83763324337733e-06, "loss": 0.4231, "step": 10006 }, { "epoch": 0.8980727378788899, "grad_norm": 0.4916299646248809, "learning_rate": 8.837298512745432e-06, "loss": 0.4612, "step": 10007 }, { "epoch": 0.8981624823315609, "grad_norm": 0.4877542884757405, "learning_rate": 8.836963740264733e-06, "loss": 0.4167, "step": 10008 }, { "epoch": 0.8982522267842319, "grad_norm": 0.4558991330053077, "learning_rate": 8.83662892593888e-06, "loss": 0.376, "step": 10009 }, { "epoch": 0.898341971236903, "grad_norm": 0.4557332999588161, "learning_rate": 8.836294069771526e-06, "loss": 0.3973, "step": 10010 }, { "epoch": 0.8984317156895739, "grad_norm": 0.4572878222124732, "learning_rate": 8.835959171766324e-06, "loss": 0.4216, "step": 10011 }, { "epoch": 0.898521460142245, "grad_norm": 0.5244032594617472, "learning_rate": 8.835624231926924e-06, "loss": 0.5186, "step": 10012 }, { "epoch": 0.898611204594916, "grad_norm": 0.471326298654354, "learning_rate": 8.835289250256982e-06, "loss": 0.4465, "step": 10013 }, { "epoch": 0.898700949047587, "grad_norm": 0.49218364666435943, "learning_rate": 8.83495422676015e-06, "loss": 0.4346, "step": 10014 }, { "epoch": 0.898790693500258, "grad_norm": 0.4910389908650126, "learning_rate": 8.834619161440077e-06, "loss": 0.4257, "step": 10015 }, { "epoch": 0.898880437952929, "grad_norm": 0.5120668117365788, "learning_rate": 8.834284054300423e-06, "loss": 0.5076, "step": 10016 }, { "epoch": 0.8989701824056, "grad_norm": 0.43582814616323984, "learning_rate": 8.833948905344842e-06, "loss": 0.417, "step": 10017 }, { "epoch": 0.8990599268582711, "grad_norm": 0.4660935108401592, "learning_rate": 8.833613714576988e-06, "loss": 0.4251, "step": 10018 }, { "epoch": 0.8991496713109421, "grad_norm": 0.4751124161779249, "learning_rate": 8.833278482000516e-06, "loss": 0.4366, "step": 10019 }, { "epoch": 0.8992394157636131, "grad_norm": 0.45997807836637966, "learning_rate": 8.832943207619082e-06, "loss": 0.4257, "step": 10020 }, { "epoch": 0.8993291602162842, "grad_norm": 0.48569160002250134, "learning_rate": 8.832607891436344e-06, "loss": 0.4505, "step": 10021 }, { "epoch": 0.8994189046689551, "grad_norm": 0.4799964282479041, "learning_rate": 8.832272533455956e-06, "loss": 0.419, "step": 10022 }, { "epoch": 0.8995086491216262, "grad_norm": 0.48707629839751376, "learning_rate": 8.831937133681578e-06, "loss": 0.4265, "step": 10023 }, { "epoch": 0.8995983935742972, "grad_norm": 0.4873870852332634, "learning_rate": 8.831601692116867e-06, "loss": 0.4045, "step": 10024 }, { "epoch": 0.8996881380269682, "grad_norm": 0.49721786928179307, "learning_rate": 8.831266208765481e-06, "loss": 0.5046, "step": 10025 }, { "epoch": 0.8997778824796392, "grad_norm": 0.506931234553653, "learning_rate": 8.830930683631079e-06, "loss": 0.4629, "step": 10026 }, { "epoch": 0.8998676269323103, "grad_norm": 0.5283092268622174, "learning_rate": 8.83059511671732e-06, "loss": 0.4834, "step": 10027 }, { "epoch": 0.8999573713849812, "grad_norm": 0.4726058433102293, "learning_rate": 8.830259508027863e-06, "loss": 0.4136, "step": 10028 }, { "epoch": 0.9000471158376523, "grad_norm": 0.4561534669762106, "learning_rate": 8.829923857566369e-06, "loss": 0.4193, "step": 10029 }, { "epoch": 0.9001368602903234, "grad_norm": 0.4701169905688121, "learning_rate": 8.829588165336498e-06, "loss": 0.4028, "step": 10030 }, { "epoch": 0.9002266047429943, "grad_norm": 0.4789359225014652, "learning_rate": 8.829252431341912e-06, "loss": 0.417, "step": 10031 }, { "epoch": 0.9003163491956654, "grad_norm": 0.475551856034516, "learning_rate": 8.82891665558627e-06, "loss": 0.4267, "step": 10032 }, { "epoch": 0.9004060936483363, "grad_norm": 0.5337163207629706, "learning_rate": 8.828580838073238e-06, "loss": 0.5278, "step": 10033 }, { "epoch": 0.9004958381010074, "grad_norm": 0.5265624339549678, "learning_rate": 8.828244978806474e-06, "loss": 0.4757, "step": 10034 }, { "epoch": 0.9005855825536784, "grad_norm": 0.49877013599153813, "learning_rate": 8.827909077789644e-06, "loss": 0.4917, "step": 10035 }, { "epoch": 0.9006753270063494, "grad_norm": 0.4820089902237742, "learning_rate": 8.827573135026411e-06, "loss": 0.4399, "step": 10036 }, { "epoch": 0.9007650714590204, "grad_norm": 0.48983085781109054, "learning_rate": 8.827237150520435e-06, "loss": 0.4757, "step": 10037 }, { "epoch": 0.9008548159116915, "grad_norm": 0.5278493805248449, "learning_rate": 8.826901124275386e-06, "loss": 0.4951, "step": 10038 }, { "epoch": 0.9009445603643624, "grad_norm": 0.460815849982438, "learning_rate": 8.826565056294923e-06, "loss": 0.3894, "step": 10039 }, { "epoch": 0.9010343048170335, "grad_norm": 0.5126187273627435, "learning_rate": 8.826228946582716e-06, "loss": 0.447, "step": 10040 }, { "epoch": 0.9011240492697045, "grad_norm": 0.4783832735989319, "learning_rate": 8.825892795142426e-06, "loss": 0.4218, "step": 10041 }, { "epoch": 0.9012137937223755, "grad_norm": 0.47287781167350545, "learning_rate": 8.825556601977724e-06, "loss": 0.4298, "step": 10042 }, { "epoch": 0.9013035381750466, "grad_norm": 0.4728417467913799, "learning_rate": 8.82522036709227e-06, "loss": 0.4416, "step": 10043 }, { "epoch": 0.9013932826277176, "grad_norm": 0.5309648453603473, "learning_rate": 8.824884090489736e-06, "loss": 0.4618, "step": 10044 }, { "epoch": 0.9014830270803886, "grad_norm": 0.48801547355283076, "learning_rate": 8.824547772173788e-06, "loss": 0.4116, "step": 10045 }, { "epoch": 0.9015727715330596, "grad_norm": 0.47305920884159164, "learning_rate": 8.824211412148095e-06, "loss": 0.4512, "step": 10046 }, { "epoch": 0.9016625159857307, "grad_norm": 0.4467205694037532, "learning_rate": 8.823875010416324e-06, "loss": 0.3946, "step": 10047 }, { "epoch": 0.9017522604384016, "grad_norm": 0.49952857631947056, "learning_rate": 8.823538566982142e-06, "loss": 0.4541, "step": 10048 }, { "epoch": 0.9018420048910727, "grad_norm": 0.448705871970507, "learning_rate": 8.82320208184922e-06, "loss": 0.4069, "step": 10049 }, { "epoch": 0.9019317493437436, "grad_norm": 0.4489535773642541, "learning_rate": 8.822865555021228e-06, "loss": 0.3916, "step": 10050 }, { "epoch": 0.9020214937964147, "grad_norm": 0.43297790688346127, "learning_rate": 8.822528986501837e-06, "loss": 0.3947, "step": 10051 }, { "epoch": 0.9021112382490857, "grad_norm": 0.5449231145660111, "learning_rate": 8.822192376294715e-06, "loss": 0.4992, "step": 10052 }, { "epoch": 0.9022009827017567, "grad_norm": 0.5275713564016116, "learning_rate": 8.821855724403533e-06, "loss": 0.4366, "step": 10053 }, { "epoch": 0.9022907271544278, "grad_norm": 0.5365858943309505, "learning_rate": 8.821519030831966e-06, "loss": 0.4712, "step": 10054 }, { "epoch": 0.9023804716070988, "grad_norm": 0.4534909253599498, "learning_rate": 8.821182295583684e-06, "loss": 0.4011, "step": 10055 }, { "epoch": 0.9024702160597698, "grad_norm": 0.4688723767541335, "learning_rate": 8.820845518662358e-06, "loss": 0.4477, "step": 10056 }, { "epoch": 0.9025599605124408, "grad_norm": 0.4980495329297701, "learning_rate": 8.820508700071662e-06, "loss": 0.4489, "step": 10057 }, { "epoch": 0.9026497049651119, "grad_norm": 0.5177403815655488, "learning_rate": 8.820171839815267e-06, "loss": 0.4829, "step": 10058 }, { "epoch": 0.9027394494177828, "grad_norm": 0.4500185717766727, "learning_rate": 8.819834937896851e-06, "loss": 0.377, "step": 10059 }, { "epoch": 0.9028291938704539, "grad_norm": 0.4808831080698353, "learning_rate": 8.819497994320085e-06, "loss": 0.4332, "step": 10060 }, { "epoch": 0.9029189383231249, "grad_norm": 0.4984691419649444, "learning_rate": 8.819161009088647e-06, "loss": 0.4656, "step": 10061 }, { "epoch": 0.9030086827757959, "grad_norm": 0.49349920910148165, "learning_rate": 8.818823982206206e-06, "loss": 0.4484, "step": 10062 }, { "epoch": 0.9030984272284669, "grad_norm": 0.5537281700544936, "learning_rate": 8.818486913676443e-06, "loss": 0.5829, "step": 10063 }, { "epoch": 0.903188171681138, "grad_norm": 0.47361127954014814, "learning_rate": 8.818149803503033e-06, "loss": 0.4316, "step": 10064 }, { "epoch": 0.903277916133809, "grad_norm": 0.4581601337914681, "learning_rate": 8.81781265168965e-06, "loss": 0.3862, "step": 10065 }, { "epoch": 0.90336766058648, "grad_norm": 0.4839158794860724, "learning_rate": 8.817475458239974e-06, "loss": 0.4653, "step": 10066 }, { "epoch": 0.9034574050391511, "grad_norm": 0.5182576996961029, "learning_rate": 8.81713822315768e-06, "loss": 0.5071, "step": 10067 }, { "epoch": 0.903547149491822, "grad_norm": 0.509716504864113, "learning_rate": 8.816800946446448e-06, "loss": 0.4685, "step": 10068 }, { "epoch": 0.9036368939444931, "grad_norm": 0.43879715750248055, "learning_rate": 8.816463628109955e-06, "loss": 0.3815, "step": 10069 }, { "epoch": 0.903726638397164, "grad_norm": 0.49942554484612506, "learning_rate": 8.816126268151879e-06, "loss": 0.4559, "step": 10070 }, { "epoch": 0.9038163828498351, "grad_norm": 0.47019079636414524, "learning_rate": 8.8157888665759e-06, "loss": 0.375, "step": 10071 }, { "epoch": 0.9039061273025061, "grad_norm": 0.5054237863284358, "learning_rate": 8.815451423385697e-06, "loss": 0.4697, "step": 10072 }, { "epoch": 0.9039958717551771, "grad_norm": 0.4587653627858049, "learning_rate": 8.81511393858495e-06, "loss": 0.4183, "step": 10073 }, { "epoch": 0.9040856162078481, "grad_norm": 0.43705216212006615, "learning_rate": 8.814776412177344e-06, "loss": 0.3748, "step": 10074 }, { "epoch": 0.9041753606605192, "grad_norm": 0.5261569328662168, "learning_rate": 8.814438844166554e-06, "loss": 0.4517, "step": 10075 }, { "epoch": 0.9042651051131901, "grad_norm": 0.46466358789332374, "learning_rate": 8.814101234556264e-06, "loss": 0.4201, "step": 10076 }, { "epoch": 0.9043548495658612, "grad_norm": 0.49143447264893664, "learning_rate": 8.813763583350155e-06, "loss": 0.4698, "step": 10077 }, { "epoch": 0.9044445940185323, "grad_norm": 0.4422020765327627, "learning_rate": 8.81342589055191e-06, "loss": 0.3719, "step": 10078 }, { "epoch": 0.9045343384712032, "grad_norm": 0.466760867305859, "learning_rate": 8.813088156165212e-06, "loss": 0.4259, "step": 10079 }, { "epoch": 0.9046240829238743, "grad_norm": 0.4582602561835164, "learning_rate": 8.812750380193745e-06, "loss": 0.4195, "step": 10080 }, { "epoch": 0.9047138273765453, "grad_norm": 0.4750140360942991, "learning_rate": 8.81241256264119e-06, "loss": 0.4317, "step": 10081 }, { "epoch": 0.9048035718292163, "grad_norm": 0.5133015964642381, "learning_rate": 8.812074703511234e-06, "loss": 0.4494, "step": 10082 }, { "epoch": 0.9048933162818873, "grad_norm": 0.5320331118999967, "learning_rate": 8.81173680280756e-06, "loss": 0.5018, "step": 10083 }, { "epoch": 0.9049830607345584, "grad_norm": 0.4927132589705888, "learning_rate": 8.811398860533854e-06, "loss": 0.4438, "step": 10084 }, { "epoch": 0.9050728051872293, "grad_norm": 0.46520804593457526, "learning_rate": 8.811060876693801e-06, "loss": 0.4193, "step": 10085 }, { "epoch": 0.9051625496399004, "grad_norm": 0.49146300224100564, "learning_rate": 8.810722851291087e-06, "loss": 0.4488, "step": 10086 }, { "epoch": 0.9052522940925714, "grad_norm": 0.487345213219989, "learning_rate": 8.8103847843294e-06, "loss": 0.4205, "step": 10087 }, { "epoch": 0.9053420385452424, "grad_norm": 0.5137732709082652, "learning_rate": 8.810046675812424e-06, "loss": 0.4702, "step": 10088 }, { "epoch": 0.9054317829979135, "grad_norm": 0.5283677686009564, "learning_rate": 8.809708525743848e-06, "loss": 0.4453, "step": 10089 }, { "epoch": 0.9055215274505845, "grad_norm": 0.4946156414094574, "learning_rate": 8.809370334127359e-06, "loss": 0.4419, "step": 10090 }, { "epoch": 0.9056112719032555, "grad_norm": 0.5081678274444416, "learning_rate": 8.809032100966647e-06, "loss": 0.4642, "step": 10091 }, { "epoch": 0.9057010163559265, "grad_norm": 0.4430524414432257, "learning_rate": 8.808693826265397e-06, "loss": 0.3941, "step": 10092 }, { "epoch": 0.9057907608085976, "grad_norm": 0.5206758368917621, "learning_rate": 8.808355510027305e-06, "loss": 0.4826, "step": 10093 }, { "epoch": 0.9058805052612685, "grad_norm": 0.5133214015718806, "learning_rate": 8.808017152256052e-06, "loss": 0.4079, "step": 10094 }, { "epoch": 0.9059702497139396, "grad_norm": 0.4470103380822256, "learning_rate": 8.807678752955336e-06, "loss": 0.3883, "step": 10095 }, { "epoch": 0.9060599941666105, "grad_norm": 0.5129389787527349, "learning_rate": 8.807340312128841e-06, "loss": 0.4989, "step": 10096 }, { "epoch": 0.9061497386192816, "grad_norm": 0.5209084043260818, "learning_rate": 8.807001829780262e-06, "loss": 0.4791, "step": 10097 }, { "epoch": 0.9062394830719526, "grad_norm": 0.4830010054840495, "learning_rate": 8.80666330591329e-06, "loss": 0.4475, "step": 10098 }, { "epoch": 0.9063292275246236, "grad_norm": 0.46155909673091267, "learning_rate": 8.806324740531615e-06, "loss": 0.4435, "step": 10099 }, { "epoch": 0.9064189719772947, "grad_norm": 0.5167675761839449, "learning_rate": 8.805986133638931e-06, "loss": 0.4956, "step": 10100 }, { "epoch": 0.9065087164299657, "grad_norm": 0.4798170149320231, "learning_rate": 8.805647485238929e-06, "loss": 0.4629, "step": 10101 }, { "epoch": 0.9065984608826367, "grad_norm": 0.48854301985858706, "learning_rate": 8.805308795335305e-06, "loss": 0.4312, "step": 10102 }, { "epoch": 0.9066882053353077, "grad_norm": 0.4907330213750731, "learning_rate": 8.80497006393175e-06, "loss": 0.4149, "step": 10103 }, { "epoch": 0.9067779497879788, "grad_norm": 0.5056313355873167, "learning_rate": 8.80463129103196e-06, "loss": 0.4667, "step": 10104 }, { "epoch": 0.9068676942406497, "grad_norm": 0.47774666083363365, "learning_rate": 8.80429247663963e-06, "loss": 0.4136, "step": 10105 }, { "epoch": 0.9069574386933208, "grad_norm": 0.48632754876367174, "learning_rate": 8.803953620758452e-06, "loss": 0.48, "step": 10106 }, { "epoch": 0.9070471831459918, "grad_norm": 0.5333353214057609, "learning_rate": 8.803614723392124e-06, "loss": 0.5047, "step": 10107 }, { "epoch": 0.9071369275986628, "grad_norm": 0.5175870534061827, "learning_rate": 8.803275784544343e-06, "loss": 0.4403, "step": 10108 }, { "epoch": 0.9072266720513338, "grad_norm": 0.4844532666521805, "learning_rate": 8.802936804218803e-06, "loss": 0.4611, "step": 10109 }, { "epoch": 0.9073164165040049, "grad_norm": 0.48169511864794223, "learning_rate": 8.8025977824192e-06, "loss": 0.4553, "step": 10110 }, { "epoch": 0.9074061609566758, "grad_norm": 0.5040175968147341, "learning_rate": 8.802258719149235e-06, "loss": 0.4882, "step": 10111 }, { "epoch": 0.9074959054093469, "grad_norm": 0.43478517625253543, "learning_rate": 8.801919614412602e-06, "loss": 0.4125, "step": 10112 }, { "epoch": 0.907585649862018, "grad_norm": 0.4622961349442543, "learning_rate": 8.801580468213e-06, "loss": 0.4062, "step": 10113 }, { "epoch": 0.9076753943146889, "grad_norm": 0.5077591392305189, "learning_rate": 8.801241280554131e-06, "loss": 0.4634, "step": 10114 }, { "epoch": 0.90776513876736, "grad_norm": 0.4781229166228893, "learning_rate": 8.80090205143969e-06, "loss": 0.399, "step": 10115 }, { "epoch": 0.9078548832200309, "grad_norm": 0.4892604845787836, "learning_rate": 8.80056278087338e-06, "loss": 0.4203, "step": 10116 }, { "epoch": 0.907944627672702, "grad_norm": 0.45367150854710575, "learning_rate": 8.800223468858897e-06, "loss": 0.4017, "step": 10117 }, { "epoch": 0.908034372125373, "grad_norm": 0.48012539503162593, "learning_rate": 8.799884115399946e-06, "loss": 0.4515, "step": 10118 }, { "epoch": 0.908124116578044, "grad_norm": 0.4985826584031432, "learning_rate": 8.799544720500224e-06, "loss": 0.4791, "step": 10119 }, { "epoch": 0.908213861030715, "grad_norm": 0.5575329900089034, "learning_rate": 8.799205284163434e-06, "loss": 0.5658, "step": 10120 }, { "epoch": 0.9083036054833861, "grad_norm": 0.4845320149786961, "learning_rate": 8.798865806393278e-06, "loss": 0.4588, "step": 10121 }, { "epoch": 0.908393349936057, "grad_norm": 0.49527098423442534, "learning_rate": 8.798526287193458e-06, "loss": 0.4897, "step": 10122 }, { "epoch": 0.9084830943887281, "grad_norm": 0.48032625401935347, "learning_rate": 8.79818672656768e-06, "loss": 0.4269, "step": 10123 }, { "epoch": 0.9085728388413992, "grad_norm": 0.5073151955107205, "learning_rate": 8.79784712451964e-06, "loss": 0.4902, "step": 10124 }, { "epoch": 0.9086625832940701, "grad_norm": 0.5229177995792827, "learning_rate": 8.797507481053048e-06, "loss": 0.5221, "step": 10125 }, { "epoch": 0.9087523277467412, "grad_norm": 0.4686309944426661, "learning_rate": 8.797167796171603e-06, "loss": 0.3969, "step": 10126 }, { "epoch": 0.9088420721994122, "grad_norm": 0.48428376785753086, "learning_rate": 8.796828069879014e-06, "loss": 0.4496, "step": 10127 }, { "epoch": 0.9089318166520832, "grad_norm": 0.48785648143222937, "learning_rate": 8.796488302178984e-06, "loss": 0.4524, "step": 10128 }, { "epoch": 0.9090215611047542, "grad_norm": 0.49141039646633616, "learning_rate": 8.796148493075219e-06, "loss": 0.422, "step": 10129 }, { "epoch": 0.9091113055574253, "grad_norm": 0.49397472071147064, "learning_rate": 8.795808642571423e-06, "loss": 0.4788, "step": 10130 }, { "epoch": 0.9092010500100962, "grad_norm": 0.44712976950220673, "learning_rate": 8.795468750671306e-06, "loss": 0.3938, "step": 10131 }, { "epoch": 0.9092907944627673, "grad_norm": 0.5077745896564576, "learning_rate": 8.795128817378573e-06, "loss": 0.4615, "step": 10132 }, { "epoch": 0.9093805389154382, "grad_norm": 0.5013660274383451, "learning_rate": 8.794788842696928e-06, "loss": 0.4886, "step": 10133 }, { "epoch": 0.9094702833681093, "grad_norm": 0.5320453386049806, "learning_rate": 8.794448826630085e-06, "loss": 0.4821, "step": 10134 }, { "epoch": 0.9095600278207804, "grad_norm": 0.4983517737574982, "learning_rate": 8.794108769181745e-06, "loss": 0.4262, "step": 10135 }, { "epoch": 0.9096497722734513, "grad_norm": 0.534518315474903, "learning_rate": 8.793768670355625e-06, "loss": 0.4718, "step": 10136 }, { "epoch": 0.9097395167261224, "grad_norm": 0.5238491609472584, "learning_rate": 8.793428530155427e-06, "loss": 0.4608, "step": 10137 }, { "epoch": 0.9098292611787934, "grad_norm": 0.5156542888263125, "learning_rate": 8.793088348584862e-06, "loss": 0.4457, "step": 10138 }, { "epoch": 0.9099190056314644, "grad_norm": 0.5060074368199097, "learning_rate": 8.792748125647644e-06, "loss": 0.4636, "step": 10139 }, { "epoch": 0.9100087500841354, "grad_norm": 0.4486851648038332, "learning_rate": 8.792407861347477e-06, "loss": 0.4238, "step": 10140 }, { "epoch": 0.9100984945368065, "grad_norm": 0.4892677377998765, "learning_rate": 8.792067555688076e-06, "loss": 0.4609, "step": 10141 }, { "epoch": 0.9101882389894774, "grad_norm": 0.5467393537476333, "learning_rate": 8.791727208673153e-06, "loss": 0.5073, "step": 10142 }, { "epoch": 0.9102779834421485, "grad_norm": 0.5132083130547437, "learning_rate": 8.791386820306415e-06, "loss": 0.4504, "step": 10143 }, { "epoch": 0.9103677278948195, "grad_norm": 0.49193308023400895, "learning_rate": 8.79104639059158e-06, "loss": 0.4348, "step": 10144 }, { "epoch": 0.9104574723474905, "grad_norm": 0.5145981741192367, "learning_rate": 8.790705919532358e-06, "loss": 0.5218, "step": 10145 }, { "epoch": 0.9105472168001615, "grad_norm": 0.48722995968834065, "learning_rate": 8.790365407132462e-06, "loss": 0.4532, "step": 10146 }, { "epoch": 0.9106369612528326, "grad_norm": 0.45701007609547994, "learning_rate": 8.790024853395604e-06, "loss": 0.3808, "step": 10147 }, { "epoch": 0.9107267057055036, "grad_norm": 0.5278567753376865, "learning_rate": 8.789684258325502e-06, "loss": 0.5204, "step": 10148 }, { "epoch": 0.9108164501581746, "grad_norm": 0.5053726467755633, "learning_rate": 8.789343621925868e-06, "loss": 0.4642, "step": 10149 }, { "epoch": 0.9109061946108457, "grad_norm": 0.45960708297456426, "learning_rate": 8.789002944200415e-06, "loss": 0.4401, "step": 10150 }, { "epoch": 0.9109959390635166, "grad_norm": 0.4989689956381242, "learning_rate": 8.788662225152862e-06, "loss": 0.4684, "step": 10151 }, { "epoch": 0.9110856835161877, "grad_norm": 0.47453016648415003, "learning_rate": 8.788321464786923e-06, "loss": 0.4132, "step": 10152 }, { "epoch": 0.9111754279688586, "grad_norm": 0.4586855973501965, "learning_rate": 8.787980663106316e-06, "loss": 0.4165, "step": 10153 }, { "epoch": 0.9112651724215297, "grad_norm": 0.5162425085118715, "learning_rate": 8.787639820114756e-06, "loss": 0.4417, "step": 10154 }, { "epoch": 0.9113549168742007, "grad_norm": 0.49873937651241285, "learning_rate": 8.787298935815958e-06, "loss": 0.4653, "step": 10155 }, { "epoch": 0.9114446613268717, "grad_norm": 0.525806348874126, "learning_rate": 8.786958010213644e-06, "loss": 0.4851, "step": 10156 }, { "epoch": 0.9115344057795427, "grad_norm": 0.5131836326729682, "learning_rate": 8.78661704331153e-06, "loss": 0.4973, "step": 10157 }, { "epoch": 0.9116241502322138, "grad_norm": 0.50349154269333, "learning_rate": 8.786276035113334e-06, "loss": 0.4605, "step": 10158 }, { "epoch": 0.9117138946848848, "grad_norm": 0.4840725483735902, "learning_rate": 8.785934985622775e-06, "loss": 0.412, "step": 10159 }, { "epoch": 0.9118036391375558, "grad_norm": 0.4807938896249951, "learning_rate": 8.785593894843577e-06, "loss": 0.4294, "step": 10160 }, { "epoch": 0.9118933835902269, "grad_norm": 0.49699947612055695, "learning_rate": 8.78525276277945e-06, "loss": 0.4564, "step": 10161 }, { "epoch": 0.9119831280428978, "grad_norm": 0.4533906848392973, "learning_rate": 8.784911589434124e-06, "loss": 0.4518, "step": 10162 }, { "epoch": 0.9120728724955689, "grad_norm": 0.48358327525591904, "learning_rate": 8.784570374811314e-06, "loss": 0.4295, "step": 10163 }, { "epoch": 0.9121626169482399, "grad_norm": 0.5021951732200672, "learning_rate": 8.784229118914744e-06, "loss": 0.474, "step": 10164 }, { "epoch": 0.9122523614009109, "grad_norm": 0.4969292585201264, "learning_rate": 8.783887821748136e-06, "loss": 0.4904, "step": 10165 }, { "epoch": 0.9123421058535819, "grad_norm": 0.4669447537060637, "learning_rate": 8.783546483315211e-06, "loss": 0.4323, "step": 10166 }, { "epoch": 0.912431850306253, "grad_norm": 0.5077376376230139, "learning_rate": 8.78320510361969e-06, "loss": 0.4592, "step": 10167 }, { "epoch": 0.9125215947589239, "grad_norm": 0.5102132179323082, "learning_rate": 8.782863682665298e-06, "loss": 0.4477, "step": 10168 }, { "epoch": 0.912611339211595, "grad_norm": 0.4995175802080519, "learning_rate": 8.782522220455759e-06, "loss": 0.4288, "step": 10169 }, { "epoch": 0.912701083664266, "grad_norm": 0.4571356165882209, "learning_rate": 8.782180716994795e-06, "loss": 0.3982, "step": 10170 }, { "epoch": 0.912790828116937, "grad_norm": 0.5269218557206584, "learning_rate": 8.78183917228613e-06, "loss": 0.4928, "step": 10171 }, { "epoch": 0.9128805725696081, "grad_norm": 0.5501568670814341, "learning_rate": 8.781497586333491e-06, "loss": 0.5415, "step": 10172 }, { "epoch": 0.912970317022279, "grad_norm": 0.5238210513082046, "learning_rate": 8.781155959140604e-06, "loss": 0.4854, "step": 10173 }, { "epoch": 0.9130600614749501, "grad_norm": 0.4807316292622169, "learning_rate": 8.780814290711193e-06, "loss": 0.4615, "step": 10174 }, { "epoch": 0.9131498059276211, "grad_norm": 0.4695377743031742, "learning_rate": 8.780472581048983e-06, "loss": 0.4027, "step": 10175 }, { "epoch": 0.9132395503802921, "grad_norm": 0.5203194534943953, "learning_rate": 8.780130830157703e-06, "loss": 0.463, "step": 10176 }, { "epoch": 0.9133292948329631, "grad_norm": 0.46463821895816704, "learning_rate": 8.779789038041078e-06, "loss": 0.4339, "step": 10177 }, { "epoch": 0.9134190392856342, "grad_norm": 0.501504211976457, "learning_rate": 8.779447204702835e-06, "loss": 0.4826, "step": 10178 }, { "epoch": 0.9135087837383051, "grad_norm": 0.48294225537656776, "learning_rate": 8.779105330146706e-06, "loss": 0.4346, "step": 10179 }, { "epoch": 0.9135985281909762, "grad_norm": 0.5038710329808157, "learning_rate": 8.778763414376417e-06, "loss": 0.4344, "step": 10180 }, { "epoch": 0.9136882726436472, "grad_norm": 0.4858687132785577, "learning_rate": 8.778421457395696e-06, "loss": 0.4085, "step": 10181 }, { "epoch": 0.9137780170963182, "grad_norm": 0.472622007714609, "learning_rate": 8.778079459208273e-06, "loss": 0.4298, "step": 10182 }, { "epoch": 0.9138677615489893, "grad_norm": 0.483629870895074, "learning_rate": 8.777737419817877e-06, "loss": 0.4375, "step": 10183 }, { "epoch": 0.9139575060016603, "grad_norm": 0.46084941697015447, "learning_rate": 8.777395339228238e-06, "loss": 0.4113, "step": 10184 }, { "epoch": 0.9140472504543313, "grad_norm": 0.5482596163434372, "learning_rate": 8.77705321744309e-06, "loss": 0.4769, "step": 10185 }, { "epoch": 0.9141369949070023, "grad_norm": 0.5114869154658604, "learning_rate": 8.776711054466162e-06, "loss": 0.4617, "step": 10186 }, { "epoch": 0.9142267393596734, "grad_norm": 0.5300359902427854, "learning_rate": 8.776368850301185e-06, "loss": 0.546, "step": 10187 }, { "epoch": 0.9143164838123443, "grad_norm": 0.4736645254855364, "learning_rate": 8.776026604951891e-06, "loss": 0.4391, "step": 10188 }, { "epoch": 0.9144062282650154, "grad_norm": 0.4589872223733313, "learning_rate": 8.77568431842201e-06, "loss": 0.389, "step": 10189 }, { "epoch": 0.9144959727176863, "grad_norm": 0.5015325606824654, "learning_rate": 8.775341990715281e-06, "loss": 0.4489, "step": 10190 }, { "epoch": 0.9145857171703574, "grad_norm": 0.4657914197304887, "learning_rate": 8.774999621835434e-06, "loss": 0.4289, "step": 10191 }, { "epoch": 0.9146754616230284, "grad_norm": 0.46001143241159687, "learning_rate": 8.774657211786202e-06, "loss": 0.3762, "step": 10192 }, { "epoch": 0.9147652060756994, "grad_norm": 0.4887415878000018, "learning_rate": 8.77431476057132e-06, "loss": 0.4344, "step": 10193 }, { "epoch": 0.9148549505283705, "grad_norm": 0.46426580916062415, "learning_rate": 8.773972268194524e-06, "loss": 0.4067, "step": 10194 }, { "epoch": 0.9149446949810415, "grad_norm": 0.4853033267439232, "learning_rate": 8.773629734659547e-06, "loss": 0.4477, "step": 10195 }, { "epoch": 0.9150344394337125, "grad_norm": 0.46750926435477774, "learning_rate": 8.773287159970126e-06, "loss": 0.3717, "step": 10196 }, { "epoch": 0.9151241838863835, "grad_norm": 0.4606058898037381, "learning_rate": 8.772944544129997e-06, "loss": 0.4208, "step": 10197 }, { "epoch": 0.9152139283390546, "grad_norm": 0.504139935914199, "learning_rate": 8.772601887142896e-06, "loss": 0.4796, "step": 10198 }, { "epoch": 0.9153036727917255, "grad_norm": 0.4802730713003409, "learning_rate": 8.77225918901256e-06, "loss": 0.4081, "step": 10199 }, { "epoch": 0.9153934172443966, "grad_norm": 0.5026798908960965, "learning_rate": 8.771916449742726e-06, "loss": 0.5047, "step": 10200 }, { "epoch": 0.9154831616970676, "grad_norm": 0.5239776319656766, "learning_rate": 8.771573669337133e-06, "loss": 0.4848, "step": 10201 }, { "epoch": 0.9155729061497386, "grad_norm": 0.5496740870709382, "learning_rate": 8.771230847799518e-06, "loss": 0.5404, "step": 10202 }, { "epoch": 0.9156626506024096, "grad_norm": 0.5241074152704239, "learning_rate": 8.77088798513362e-06, "loss": 0.4861, "step": 10203 }, { "epoch": 0.9157523950550807, "grad_norm": 0.4765965554170805, "learning_rate": 8.770545081343181e-06, "loss": 0.4395, "step": 10204 }, { "epoch": 0.9158421395077517, "grad_norm": 0.4723242746925366, "learning_rate": 8.770202136431936e-06, "loss": 0.4162, "step": 10205 }, { "epoch": 0.9159318839604227, "grad_norm": 0.4423740812900872, "learning_rate": 8.769859150403627e-06, "loss": 0.3956, "step": 10206 }, { "epoch": 0.9160216284130938, "grad_norm": 0.5096439727085347, "learning_rate": 8.769516123261996e-06, "loss": 0.4652, "step": 10207 }, { "epoch": 0.9161113728657647, "grad_norm": 0.49852040454492835, "learning_rate": 8.769173055010782e-06, "loss": 0.4544, "step": 10208 }, { "epoch": 0.9162011173184358, "grad_norm": 0.49885577636152856, "learning_rate": 8.768829945653728e-06, "loss": 0.4523, "step": 10209 }, { "epoch": 0.9162908617711067, "grad_norm": 0.43809184896727493, "learning_rate": 8.768486795194574e-06, "loss": 0.4066, "step": 10210 }, { "epoch": 0.9163806062237778, "grad_norm": 0.4533725341247105, "learning_rate": 8.768143603637065e-06, "loss": 0.4139, "step": 10211 }, { "epoch": 0.9164703506764488, "grad_norm": 0.4519680825838912, "learning_rate": 8.76780037098494e-06, "loss": 0.3853, "step": 10212 }, { "epoch": 0.9165600951291198, "grad_norm": 0.5205782704189084, "learning_rate": 8.767457097241947e-06, "loss": 0.4733, "step": 10213 }, { "epoch": 0.9166498395817908, "grad_norm": 0.5193404155354386, "learning_rate": 8.767113782411826e-06, "loss": 0.4585, "step": 10214 }, { "epoch": 0.9167395840344619, "grad_norm": 0.4743689732488012, "learning_rate": 8.766770426498323e-06, "loss": 0.4509, "step": 10215 }, { "epoch": 0.9168293284871329, "grad_norm": 0.4946580401082843, "learning_rate": 8.766427029505182e-06, "loss": 0.4765, "step": 10216 }, { "epoch": 0.9169190729398039, "grad_norm": 0.48720284461948166, "learning_rate": 8.766083591436145e-06, "loss": 0.4324, "step": 10217 }, { "epoch": 0.917008817392475, "grad_norm": 0.45303113037332804, "learning_rate": 8.765740112294964e-06, "loss": 0.4156, "step": 10218 }, { "epoch": 0.9170985618451459, "grad_norm": 0.5085704027483188, "learning_rate": 8.76539659208538e-06, "loss": 0.4726, "step": 10219 }, { "epoch": 0.917188306297817, "grad_norm": 0.48556349402219434, "learning_rate": 8.765053030811138e-06, "loss": 0.4356, "step": 10220 }, { "epoch": 0.917278050750488, "grad_norm": 0.4631686283777392, "learning_rate": 8.764709428475991e-06, "loss": 0.374, "step": 10221 }, { "epoch": 0.917367795203159, "grad_norm": 0.5196853878990311, "learning_rate": 8.764365785083681e-06, "loss": 0.463, "step": 10222 }, { "epoch": 0.91745753965583, "grad_norm": 0.4908162221426301, "learning_rate": 8.764022100637958e-06, "loss": 0.4377, "step": 10223 }, { "epoch": 0.9175472841085011, "grad_norm": 0.45972107745153, "learning_rate": 8.763678375142568e-06, "loss": 0.3874, "step": 10224 }, { "epoch": 0.917637028561172, "grad_norm": 0.4727768048841084, "learning_rate": 8.763334608601264e-06, "loss": 0.4176, "step": 10225 }, { "epoch": 0.9177267730138431, "grad_norm": 0.44603225536602914, "learning_rate": 8.76299080101779e-06, "loss": 0.3757, "step": 10226 }, { "epoch": 0.917816517466514, "grad_norm": 0.45399963913588876, "learning_rate": 8.762646952395896e-06, "loss": 0.3788, "step": 10227 }, { "epoch": 0.9179062619191851, "grad_norm": 0.52918800173189, "learning_rate": 8.762303062739336e-06, "loss": 0.5151, "step": 10228 }, { "epoch": 0.9179960063718562, "grad_norm": 0.45558243364089773, "learning_rate": 8.76195913205186e-06, "loss": 0.4173, "step": 10229 }, { "epoch": 0.9180857508245271, "grad_norm": 0.4904691099938042, "learning_rate": 8.761615160337213e-06, "loss": 0.4708, "step": 10230 }, { "epoch": 0.9181754952771982, "grad_norm": 0.4916076994703368, "learning_rate": 8.761271147599153e-06, "loss": 0.4596, "step": 10231 }, { "epoch": 0.9182652397298692, "grad_norm": 0.4951381722675742, "learning_rate": 8.760927093841427e-06, "loss": 0.4889, "step": 10232 }, { "epoch": 0.9183549841825402, "grad_norm": 0.4967665585946796, "learning_rate": 8.76058299906779e-06, "loss": 0.465, "step": 10233 }, { "epoch": 0.9184447286352112, "grad_norm": 0.5173495554509304, "learning_rate": 8.760238863281994e-06, "loss": 0.4669, "step": 10234 }, { "epoch": 0.9185344730878823, "grad_norm": 0.5478255408347877, "learning_rate": 8.75989468648779e-06, "loss": 0.5205, "step": 10235 }, { "epoch": 0.9186242175405532, "grad_norm": 0.512333959410495, "learning_rate": 8.759550468688934e-06, "loss": 0.4685, "step": 10236 }, { "epoch": 0.9187139619932243, "grad_norm": 0.475175592804169, "learning_rate": 8.75920620988918e-06, "loss": 0.3863, "step": 10237 }, { "epoch": 0.9188037064458953, "grad_norm": 0.5120741035835316, "learning_rate": 8.75886191009228e-06, "loss": 0.4582, "step": 10238 }, { "epoch": 0.9188934508985663, "grad_norm": 0.4902827013945927, "learning_rate": 8.758517569301992e-06, "loss": 0.4099, "step": 10239 }, { "epoch": 0.9189831953512374, "grad_norm": 0.5499829864427521, "learning_rate": 8.75817318752207e-06, "loss": 0.5045, "step": 10240 }, { "epoch": 0.9190729398039084, "grad_norm": 0.533972091412279, "learning_rate": 8.757828764756269e-06, "loss": 0.4749, "step": 10241 }, { "epoch": 0.9191626842565794, "grad_norm": 0.48455477214770076, "learning_rate": 8.757484301008345e-06, "loss": 0.4636, "step": 10242 }, { "epoch": 0.9192524287092504, "grad_norm": 0.5324200683085359, "learning_rate": 8.757139796282056e-06, "loss": 0.5053, "step": 10243 }, { "epoch": 0.9193421731619215, "grad_norm": 0.5003625467152409, "learning_rate": 8.75679525058116e-06, "loss": 0.4759, "step": 10244 }, { "epoch": 0.9194319176145924, "grad_norm": 0.5207564228733949, "learning_rate": 8.756450663909413e-06, "loss": 0.5242, "step": 10245 }, { "epoch": 0.9195216620672635, "grad_norm": 0.47465253095859, "learning_rate": 8.756106036270571e-06, "loss": 0.4472, "step": 10246 }, { "epoch": 0.9196114065199344, "grad_norm": 0.46936288085375255, "learning_rate": 8.755761367668396e-06, "loss": 0.4222, "step": 10247 }, { "epoch": 0.9197011509726055, "grad_norm": 0.4595541176850339, "learning_rate": 8.755416658106644e-06, "loss": 0.4171, "step": 10248 }, { "epoch": 0.9197908954252765, "grad_norm": 0.5330122055703399, "learning_rate": 8.755071907589077e-06, "loss": 0.4801, "step": 10249 }, { "epoch": 0.9198806398779475, "grad_norm": 0.5348164518070503, "learning_rate": 8.754727116119454e-06, "loss": 0.4963, "step": 10250 }, { "epoch": 0.9199703843306186, "grad_norm": 0.5164656327494976, "learning_rate": 8.754382283701534e-06, "loss": 0.5035, "step": 10251 }, { "epoch": 0.9200601287832896, "grad_norm": 0.49687039303292263, "learning_rate": 8.754037410339078e-06, "loss": 0.4745, "step": 10252 }, { "epoch": 0.9201498732359606, "grad_norm": 0.46398467239981783, "learning_rate": 8.753692496035849e-06, "loss": 0.4036, "step": 10253 }, { "epoch": 0.9202396176886316, "grad_norm": 0.4572654911961488, "learning_rate": 8.753347540795605e-06, "loss": 0.4243, "step": 10254 }, { "epoch": 0.9203293621413027, "grad_norm": 0.5034578286505921, "learning_rate": 8.753002544622112e-06, "loss": 0.4397, "step": 10255 }, { "epoch": 0.9204191065939736, "grad_norm": 0.4907702703389172, "learning_rate": 8.752657507519129e-06, "loss": 0.3913, "step": 10256 }, { "epoch": 0.9205088510466447, "grad_norm": 0.48753119843385145, "learning_rate": 8.752312429490421e-06, "loss": 0.4759, "step": 10257 }, { "epoch": 0.9205985954993157, "grad_norm": 0.5499323090166462, "learning_rate": 8.751967310539751e-06, "loss": 0.5288, "step": 10258 }, { "epoch": 0.9206883399519867, "grad_norm": 0.46921583803456396, "learning_rate": 8.751622150670883e-06, "loss": 0.4142, "step": 10259 }, { "epoch": 0.9207780844046577, "grad_norm": 0.4488520830045344, "learning_rate": 8.75127694988758e-06, "loss": 0.3954, "step": 10260 }, { "epoch": 0.9208678288573288, "grad_norm": 0.45644127334727097, "learning_rate": 8.750931708193608e-06, "loss": 0.4259, "step": 10261 }, { "epoch": 0.9209575733099997, "grad_norm": 0.5031821365169984, "learning_rate": 8.750586425592733e-06, "loss": 0.4553, "step": 10262 }, { "epoch": 0.9210473177626708, "grad_norm": 0.4623873109652903, "learning_rate": 8.750241102088717e-06, "loss": 0.4258, "step": 10263 }, { "epoch": 0.9211370622153419, "grad_norm": 0.4941095955675364, "learning_rate": 8.74989573768533e-06, "loss": 0.4409, "step": 10264 }, { "epoch": 0.9212268066680128, "grad_norm": 0.5295466049455837, "learning_rate": 8.749550332386337e-06, "loss": 0.5041, "step": 10265 }, { "epoch": 0.9213165511206839, "grad_norm": 0.5276849404338735, "learning_rate": 8.749204886195504e-06, "loss": 0.4736, "step": 10266 }, { "epoch": 0.9214062955733548, "grad_norm": 0.5405016494375597, "learning_rate": 8.748859399116598e-06, "loss": 0.4332, "step": 10267 }, { "epoch": 0.9214960400260259, "grad_norm": 0.49421837311696615, "learning_rate": 8.74851387115339e-06, "loss": 0.4811, "step": 10268 }, { "epoch": 0.9215857844786969, "grad_norm": 0.4752317753460759, "learning_rate": 8.748168302309645e-06, "loss": 0.4175, "step": 10269 }, { "epoch": 0.921675528931368, "grad_norm": 0.5432188549548997, "learning_rate": 8.747822692589132e-06, "loss": 0.5189, "step": 10270 }, { "epoch": 0.9217652733840389, "grad_norm": 0.4844211998290304, "learning_rate": 8.747477041995622e-06, "loss": 0.416, "step": 10271 }, { "epoch": 0.92185501783671, "grad_norm": 0.47990569127591476, "learning_rate": 8.747131350532882e-06, "loss": 0.4318, "step": 10272 }, { "epoch": 0.9219447622893809, "grad_norm": 0.5179698698912457, "learning_rate": 8.746785618204685e-06, "loss": 0.456, "step": 10273 }, { "epoch": 0.922034506742052, "grad_norm": 0.47944163039118975, "learning_rate": 8.7464398450148e-06, "loss": 0.4476, "step": 10274 }, { "epoch": 0.9221242511947231, "grad_norm": 0.5269733451114401, "learning_rate": 8.746094030966999e-06, "loss": 0.4979, "step": 10275 }, { "epoch": 0.922213995647394, "grad_norm": 0.515594748031644, "learning_rate": 8.745748176065052e-06, "loss": 0.4568, "step": 10276 }, { "epoch": 0.9223037401000651, "grad_norm": 0.5027800649430759, "learning_rate": 8.745402280312731e-06, "loss": 0.4601, "step": 10277 }, { "epoch": 0.9223934845527361, "grad_norm": 0.5271858846308008, "learning_rate": 8.745056343713808e-06, "loss": 0.4749, "step": 10278 }, { "epoch": 0.9224832290054071, "grad_norm": 0.47285341770102024, "learning_rate": 8.744710366272058e-06, "loss": 0.4178, "step": 10279 }, { "epoch": 0.9225729734580781, "grad_norm": 0.4919723253528507, "learning_rate": 8.74436434799125e-06, "loss": 0.4458, "step": 10280 }, { "epoch": 0.9226627179107492, "grad_norm": 0.4646995061329046, "learning_rate": 8.744018288875161e-06, "loss": 0.4673, "step": 10281 }, { "epoch": 0.9227524623634201, "grad_norm": 0.48671021990898966, "learning_rate": 8.743672188927564e-06, "loss": 0.4305, "step": 10282 }, { "epoch": 0.9228422068160912, "grad_norm": 0.4730021168232413, "learning_rate": 8.743326048152235e-06, "loss": 0.4423, "step": 10283 }, { "epoch": 0.9229319512687622, "grad_norm": 0.48274637215940647, "learning_rate": 8.742979866552945e-06, "loss": 0.4736, "step": 10284 }, { "epoch": 0.9230216957214332, "grad_norm": 0.44915752390766606, "learning_rate": 8.742633644133474e-06, "loss": 0.3796, "step": 10285 }, { "epoch": 0.9231114401741043, "grad_norm": 0.5009315018311794, "learning_rate": 8.742287380897595e-06, "loss": 0.4712, "step": 10286 }, { "epoch": 0.9232011846267753, "grad_norm": 0.4676285136944837, "learning_rate": 8.741941076849082e-06, "loss": 0.4621, "step": 10287 }, { "epoch": 0.9232909290794463, "grad_norm": 0.4605292550823853, "learning_rate": 8.741594731991719e-06, "loss": 0.388, "step": 10288 }, { "epoch": 0.9233806735321173, "grad_norm": 0.43647904446884495, "learning_rate": 8.741248346329278e-06, "loss": 0.3864, "step": 10289 }, { "epoch": 0.9234704179847883, "grad_norm": 0.5053495785625277, "learning_rate": 8.740901919865537e-06, "loss": 0.4619, "step": 10290 }, { "epoch": 0.9235601624374593, "grad_norm": 0.4740755758843635, "learning_rate": 8.740555452604274e-06, "loss": 0.4293, "step": 10291 }, { "epoch": 0.9236499068901304, "grad_norm": 0.4882560739830078, "learning_rate": 8.740208944549269e-06, "loss": 0.4425, "step": 10292 }, { "epoch": 0.9237396513428013, "grad_norm": 0.5022491834720266, "learning_rate": 8.7398623957043e-06, "loss": 0.4545, "step": 10293 }, { "epoch": 0.9238293957954724, "grad_norm": 0.4894964019341806, "learning_rate": 8.739515806073144e-06, "loss": 0.4831, "step": 10294 }, { "epoch": 0.9239191402481434, "grad_norm": 0.47856809810705375, "learning_rate": 8.739169175659586e-06, "loss": 0.463, "step": 10295 }, { "epoch": 0.9240088847008144, "grad_norm": 0.4620813093189147, "learning_rate": 8.7388225044674e-06, "loss": 0.395, "step": 10296 }, { "epoch": 0.9240986291534854, "grad_norm": 0.5347964504307309, "learning_rate": 8.738475792500374e-06, "loss": 0.5211, "step": 10297 }, { "epoch": 0.9241883736061565, "grad_norm": 0.47366440159170187, "learning_rate": 8.738129039762284e-06, "loss": 0.4254, "step": 10298 }, { "epoch": 0.9242781180588275, "grad_norm": 0.4538311905526192, "learning_rate": 8.737782246256913e-06, "loss": 0.382, "step": 10299 }, { "epoch": 0.9243678625114985, "grad_norm": 0.46533281731460246, "learning_rate": 8.737435411988045e-06, "loss": 0.4378, "step": 10300 }, { "epoch": 0.9244576069641696, "grad_norm": 0.47762863068594047, "learning_rate": 8.73708853695946e-06, "loss": 0.4537, "step": 10301 }, { "epoch": 0.9245473514168405, "grad_norm": 0.5045438928624355, "learning_rate": 8.736741621174938e-06, "loss": 0.4815, "step": 10302 }, { "epoch": 0.9246370958695116, "grad_norm": 0.49455879868927494, "learning_rate": 8.73639466463827e-06, "loss": 0.4435, "step": 10303 }, { "epoch": 0.9247268403221826, "grad_norm": 0.46624683505440795, "learning_rate": 8.736047667353233e-06, "loss": 0.4449, "step": 10304 }, { "epoch": 0.9248165847748536, "grad_norm": 0.4697444255929599, "learning_rate": 8.735700629323616e-06, "loss": 0.4386, "step": 10305 }, { "epoch": 0.9249063292275246, "grad_norm": 0.4688726752379413, "learning_rate": 8.7353535505532e-06, "loss": 0.4059, "step": 10306 }, { "epoch": 0.9249960736801957, "grad_norm": 0.4726375760907571, "learning_rate": 8.735006431045773e-06, "loss": 0.4121, "step": 10307 }, { "epoch": 0.9250858181328666, "grad_norm": 0.5257765555430166, "learning_rate": 8.73465927080512e-06, "loss": 0.4779, "step": 10308 }, { "epoch": 0.9251755625855377, "grad_norm": 0.45518612765516975, "learning_rate": 8.734312069835025e-06, "loss": 0.3885, "step": 10309 }, { "epoch": 0.9252653070382088, "grad_norm": 0.4439294513601987, "learning_rate": 8.733964828139277e-06, "loss": 0.398, "step": 10310 }, { "epoch": 0.9253550514908797, "grad_norm": 0.4515270246525146, "learning_rate": 8.733617545721663e-06, "loss": 0.4058, "step": 10311 }, { "epoch": 0.9254447959435508, "grad_norm": 0.5021731990417613, "learning_rate": 8.733270222585969e-06, "loss": 0.4482, "step": 10312 }, { "epoch": 0.9255345403962217, "grad_norm": 0.4905527748056102, "learning_rate": 8.732922858735982e-06, "loss": 0.4305, "step": 10313 }, { "epoch": 0.9256242848488928, "grad_norm": 0.5010790691621699, "learning_rate": 8.732575454175494e-06, "loss": 0.4854, "step": 10314 }, { "epoch": 0.9257140293015638, "grad_norm": 0.4888924380284665, "learning_rate": 8.732228008908289e-06, "loss": 0.438, "step": 10315 }, { "epoch": 0.9258037737542348, "grad_norm": 0.46651940124395386, "learning_rate": 8.731880522938159e-06, "loss": 0.4121, "step": 10316 }, { "epoch": 0.9258935182069058, "grad_norm": 0.5051381294569876, "learning_rate": 8.731532996268894e-06, "loss": 0.4872, "step": 10317 }, { "epoch": 0.9259832626595769, "grad_norm": 0.46877562867006844, "learning_rate": 8.731185428904283e-06, "loss": 0.4289, "step": 10318 }, { "epoch": 0.9260730071122478, "grad_norm": 0.5660808568037664, "learning_rate": 8.730837820848115e-06, "loss": 0.529, "step": 10319 }, { "epoch": 0.9261627515649189, "grad_norm": 0.487817394579619, "learning_rate": 8.730490172104184e-06, "loss": 0.4127, "step": 10320 }, { "epoch": 0.92625249601759, "grad_norm": 0.4885421974836379, "learning_rate": 8.73014248267628e-06, "loss": 0.4858, "step": 10321 }, { "epoch": 0.9263422404702609, "grad_norm": 0.49244205856736384, "learning_rate": 8.729794752568195e-06, "loss": 0.4585, "step": 10322 }, { "epoch": 0.926431984922932, "grad_norm": 0.4853960279062908, "learning_rate": 8.72944698178372e-06, "loss": 0.4538, "step": 10323 }, { "epoch": 0.926521729375603, "grad_norm": 0.5066520243479771, "learning_rate": 8.72909917032665e-06, "loss": 0.4422, "step": 10324 }, { "epoch": 0.926611473828274, "grad_norm": 0.491932034301501, "learning_rate": 8.728751318200776e-06, "loss": 0.4594, "step": 10325 }, { "epoch": 0.926701218280945, "grad_norm": 0.5014015193164868, "learning_rate": 8.728403425409893e-06, "loss": 0.4235, "step": 10326 }, { "epoch": 0.926790962733616, "grad_norm": 0.48598489633212283, "learning_rate": 8.728055491957793e-06, "loss": 0.4396, "step": 10327 }, { "epoch": 0.926880707186287, "grad_norm": 0.49289401130979316, "learning_rate": 8.727707517848273e-06, "loss": 0.4583, "step": 10328 }, { "epoch": 0.9269704516389581, "grad_norm": 0.4967759074773238, "learning_rate": 8.72735950308513e-06, "loss": 0.4749, "step": 10329 }, { "epoch": 0.927060196091629, "grad_norm": 0.45660379308526816, "learning_rate": 8.727011447672154e-06, "loss": 0.3901, "step": 10330 }, { "epoch": 0.9271499405443001, "grad_norm": 0.5215637798529363, "learning_rate": 8.726663351613142e-06, "loss": 0.5012, "step": 10331 }, { "epoch": 0.9272396849969711, "grad_norm": 0.4760787549559567, "learning_rate": 8.726315214911894e-06, "loss": 0.4632, "step": 10332 }, { "epoch": 0.9273294294496421, "grad_norm": 0.5086585529037854, "learning_rate": 8.725967037572204e-06, "loss": 0.4717, "step": 10333 }, { "epoch": 0.9274191739023132, "grad_norm": 0.5212803243496448, "learning_rate": 8.725618819597866e-06, "loss": 0.4783, "step": 10334 }, { "epoch": 0.9275089183549842, "grad_norm": 0.5178287305651342, "learning_rate": 8.725270560992685e-06, "loss": 0.4832, "step": 10335 }, { "epoch": 0.9275986628076552, "grad_norm": 0.4912548568195585, "learning_rate": 8.72492226176045e-06, "loss": 0.4168, "step": 10336 }, { "epoch": 0.9276884072603262, "grad_norm": 0.4552206982490632, "learning_rate": 8.72457392190497e-06, "loss": 0.3654, "step": 10337 }, { "epoch": 0.9277781517129973, "grad_norm": 0.4647061825781405, "learning_rate": 8.724225541430037e-06, "loss": 0.4625, "step": 10338 }, { "epoch": 0.9278678961656682, "grad_norm": 0.48495276704185447, "learning_rate": 8.72387712033945e-06, "loss": 0.4305, "step": 10339 }, { "epoch": 0.9279576406183393, "grad_norm": 0.4263688520524231, "learning_rate": 8.723528658637011e-06, "loss": 0.3639, "step": 10340 }, { "epoch": 0.9280473850710103, "grad_norm": 0.48291309227733453, "learning_rate": 8.72318015632652e-06, "loss": 0.4066, "step": 10341 }, { "epoch": 0.9281371295236813, "grad_norm": 0.46575352089225625, "learning_rate": 8.722831613411779e-06, "loss": 0.4309, "step": 10342 }, { "epoch": 0.9282268739763523, "grad_norm": 0.45817165418981765, "learning_rate": 8.722483029896587e-06, "loss": 0.4406, "step": 10343 }, { "epoch": 0.9283166184290234, "grad_norm": 0.5319196055176617, "learning_rate": 8.722134405784746e-06, "loss": 0.4555, "step": 10344 }, { "epoch": 0.9284063628816944, "grad_norm": 0.4918498911437182, "learning_rate": 8.721785741080058e-06, "loss": 0.4878, "step": 10345 }, { "epoch": 0.9284961073343654, "grad_norm": 0.5040499798044203, "learning_rate": 8.721437035786324e-06, "loss": 0.4241, "step": 10346 }, { "epoch": 0.9285858517870365, "grad_norm": 0.47270620543764563, "learning_rate": 8.72108828990735e-06, "loss": 0.4375, "step": 10347 }, { "epoch": 0.9286755962397074, "grad_norm": 0.5652617412169333, "learning_rate": 8.72073950344694e-06, "loss": 0.5403, "step": 10348 }, { "epoch": 0.9287653406923785, "grad_norm": 0.46046074797862674, "learning_rate": 8.720390676408893e-06, "loss": 0.4281, "step": 10349 }, { "epoch": 0.9288550851450494, "grad_norm": 0.48307579155022246, "learning_rate": 8.720041808797018e-06, "loss": 0.4409, "step": 10350 }, { "epoch": 0.9289448295977205, "grad_norm": 0.5073606937306292, "learning_rate": 8.719692900615117e-06, "loss": 0.4854, "step": 10351 }, { "epoch": 0.9290345740503915, "grad_norm": 0.47735794904234186, "learning_rate": 8.719343951866995e-06, "loss": 0.3871, "step": 10352 }, { "epoch": 0.9291243185030625, "grad_norm": 0.4775296938499411, "learning_rate": 8.71899496255646e-06, "loss": 0.4579, "step": 10353 }, { "epoch": 0.9292140629557335, "grad_norm": 0.4898381881823895, "learning_rate": 8.718645932687316e-06, "loss": 0.4395, "step": 10354 }, { "epoch": 0.9293038074084046, "grad_norm": 0.4792218551175539, "learning_rate": 8.718296862263369e-06, "loss": 0.4105, "step": 10355 }, { "epoch": 0.9293935518610756, "grad_norm": 0.4924760414286258, "learning_rate": 8.717947751288428e-06, "loss": 0.3965, "step": 10356 }, { "epoch": 0.9294832963137466, "grad_norm": 0.4765858421153656, "learning_rate": 8.717598599766297e-06, "loss": 0.4593, "step": 10357 }, { "epoch": 0.9295730407664177, "grad_norm": 0.46460386899350986, "learning_rate": 8.717249407700787e-06, "loss": 0.4118, "step": 10358 }, { "epoch": 0.9296627852190886, "grad_norm": 0.4608818269593573, "learning_rate": 8.716900175095704e-06, "loss": 0.4387, "step": 10359 }, { "epoch": 0.9297525296717597, "grad_norm": 0.47919104795007095, "learning_rate": 8.71655090195486e-06, "loss": 0.456, "step": 10360 }, { "epoch": 0.9298422741244307, "grad_norm": 0.5237400774036544, "learning_rate": 8.71620158828206e-06, "loss": 0.5049, "step": 10361 }, { "epoch": 0.9299320185771017, "grad_norm": 0.42867756350423725, "learning_rate": 8.715852234081117e-06, "loss": 0.3716, "step": 10362 }, { "epoch": 0.9300217630297727, "grad_norm": 0.4895381857756693, "learning_rate": 8.715502839355839e-06, "loss": 0.4569, "step": 10363 }, { "epoch": 0.9301115074824438, "grad_norm": 0.47722155560527957, "learning_rate": 8.715153404110035e-06, "loss": 0.4287, "step": 10364 }, { "epoch": 0.9302012519351147, "grad_norm": 0.4543912644511694, "learning_rate": 8.714803928347519e-06, "loss": 0.402, "step": 10365 }, { "epoch": 0.9302909963877858, "grad_norm": 0.44195053931596767, "learning_rate": 8.714454412072102e-06, "loss": 0.4028, "step": 10366 }, { "epoch": 0.9303807408404569, "grad_norm": 0.4757586903002887, "learning_rate": 8.714104855287593e-06, "loss": 0.4479, "step": 10367 }, { "epoch": 0.9304704852931278, "grad_norm": 0.47559494787056544, "learning_rate": 8.713755257997807e-06, "loss": 0.4279, "step": 10368 }, { "epoch": 0.9305602297457989, "grad_norm": 0.5073925021859921, "learning_rate": 8.713405620206554e-06, "loss": 0.4649, "step": 10369 }, { "epoch": 0.9306499741984698, "grad_norm": 0.5328982416387336, "learning_rate": 8.713055941917649e-06, "loss": 0.5031, "step": 10370 }, { "epoch": 0.9307397186511409, "grad_norm": 0.44922778938268804, "learning_rate": 8.712706223134908e-06, "loss": 0.3785, "step": 10371 }, { "epoch": 0.9308294631038119, "grad_norm": 0.46040167616867633, "learning_rate": 8.71235646386214e-06, "loss": 0.4163, "step": 10372 }, { "epoch": 0.9309192075564829, "grad_norm": 0.45618107166165756, "learning_rate": 8.71200666410316e-06, "loss": 0.4128, "step": 10373 }, { "epoch": 0.9310089520091539, "grad_norm": 0.46183350838856657, "learning_rate": 8.711656823861786e-06, "loss": 0.4162, "step": 10374 }, { "epoch": 0.931098696461825, "grad_norm": 0.5013791204536383, "learning_rate": 8.711306943141833e-06, "loss": 0.4534, "step": 10375 }, { "epoch": 0.9311884409144959, "grad_norm": 0.4926939745607232, "learning_rate": 8.710957021947112e-06, "loss": 0.4, "step": 10376 }, { "epoch": 0.931278185367167, "grad_norm": 0.504466245727493, "learning_rate": 8.710607060281446e-06, "loss": 0.5157, "step": 10377 }, { "epoch": 0.931367929819838, "grad_norm": 0.4915938314037726, "learning_rate": 8.710257058148647e-06, "loss": 0.4214, "step": 10378 }, { "epoch": 0.931457674272509, "grad_norm": 0.46344906782447437, "learning_rate": 8.709907015552533e-06, "loss": 0.4018, "step": 10379 }, { "epoch": 0.9315474187251801, "grad_norm": 0.5009500485483843, "learning_rate": 8.709556932496922e-06, "loss": 0.5102, "step": 10380 }, { "epoch": 0.9316371631778511, "grad_norm": 0.46175320556176175, "learning_rate": 8.709206808985631e-06, "loss": 0.4153, "step": 10381 }, { "epoch": 0.9317269076305221, "grad_norm": 0.4816419025730865, "learning_rate": 8.70885664502248e-06, "loss": 0.4538, "step": 10382 }, { "epoch": 0.9318166520831931, "grad_norm": 0.4914350522638888, "learning_rate": 8.708506440611285e-06, "loss": 0.4352, "step": 10383 }, { "epoch": 0.9319063965358642, "grad_norm": 0.5460508502753854, "learning_rate": 8.708156195755869e-06, "loss": 0.6232, "step": 10384 }, { "epoch": 0.9319961409885351, "grad_norm": 0.47420935238998, "learning_rate": 8.707805910460047e-06, "loss": 0.4626, "step": 10385 }, { "epoch": 0.9320858854412062, "grad_norm": 0.49276863946537536, "learning_rate": 8.707455584727646e-06, "loss": 0.4852, "step": 10386 }, { "epoch": 0.9321756298938771, "grad_norm": 0.5763380241138227, "learning_rate": 8.70710521856248e-06, "loss": 0.5453, "step": 10387 }, { "epoch": 0.9322653743465482, "grad_norm": 0.5071539886547854, "learning_rate": 8.706754811968374e-06, "loss": 0.4385, "step": 10388 }, { "epoch": 0.9323551187992192, "grad_norm": 0.47836767255192864, "learning_rate": 8.706404364949147e-06, "loss": 0.4431, "step": 10389 }, { "epoch": 0.9324448632518902, "grad_norm": 0.4757208269875475, "learning_rate": 8.706053877508623e-06, "loss": 0.4425, "step": 10390 }, { "epoch": 0.9325346077045613, "grad_norm": 0.47257037239914146, "learning_rate": 8.705703349650622e-06, "loss": 0.4495, "step": 10391 }, { "epoch": 0.9326243521572323, "grad_norm": 0.4974613664947459, "learning_rate": 8.705352781378968e-06, "loss": 0.4656, "step": 10392 }, { "epoch": 0.9327140966099033, "grad_norm": 0.4455291724971847, "learning_rate": 8.705002172697485e-06, "loss": 0.3772, "step": 10393 }, { "epoch": 0.9328038410625743, "grad_norm": 0.4535053629815877, "learning_rate": 8.704651523609995e-06, "loss": 0.3896, "step": 10394 }, { "epoch": 0.9328935855152454, "grad_norm": 0.5388237209250581, "learning_rate": 8.704300834120324e-06, "loss": 0.4518, "step": 10395 }, { "epoch": 0.9329833299679163, "grad_norm": 0.4989382201879341, "learning_rate": 8.703950104232295e-06, "loss": 0.4221, "step": 10396 }, { "epoch": 0.9330730744205874, "grad_norm": 0.4969978376080369, "learning_rate": 8.703599333949734e-06, "loss": 0.473, "step": 10397 }, { "epoch": 0.9331628188732584, "grad_norm": 0.49843339375664375, "learning_rate": 8.703248523276465e-06, "loss": 0.4314, "step": 10398 }, { "epoch": 0.9332525633259294, "grad_norm": 0.49776481770088105, "learning_rate": 8.702897672216315e-06, "loss": 0.477, "step": 10399 }, { "epoch": 0.9333423077786004, "grad_norm": 0.47311996938288614, "learning_rate": 8.702546780773112e-06, "loss": 0.4475, "step": 10400 }, { "epoch": 0.9334320522312715, "grad_norm": 0.48937010672479725, "learning_rate": 8.702195848950681e-06, "loss": 0.4637, "step": 10401 }, { "epoch": 0.9335217966839425, "grad_norm": 0.5122347966371333, "learning_rate": 8.701844876752847e-06, "loss": 0.556, "step": 10402 }, { "epoch": 0.9336115411366135, "grad_norm": 0.4507509847645026, "learning_rate": 8.70149386418344e-06, "loss": 0.387, "step": 10403 }, { "epoch": 0.9337012855892846, "grad_norm": 0.4880541160831616, "learning_rate": 8.70114281124629e-06, "loss": 0.4338, "step": 10404 }, { "epoch": 0.9337910300419555, "grad_norm": 0.46765635507762976, "learning_rate": 8.700791717945223e-06, "loss": 0.4361, "step": 10405 }, { "epoch": 0.9338807744946266, "grad_norm": 0.5082537716942452, "learning_rate": 8.700440584284067e-06, "loss": 0.4774, "step": 10406 }, { "epoch": 0.9339705189472975, "grad_norm": 0.5112316138306152, "learning_rate": 8.700089410266652e-06, "loss": 0.4608, "step": 10407 }, { "epoch": 0.9340602633999686, "grad_norm": 0.4990935389684668, "learning_rate": 8.69973819589681e-06, "loss": 0.4764, "step": 10408 }, { "epoch": 0.9341500078526396, "grad_norm": 0.4918101433311141, "learning_rate": 8.699386941178369e-06, "loss": 0.4086, "step": 10409 }, { "epoch": 0.9342397523053106, "grad_norm": 0.4812607560152529, "learning_rate": 8.699035646115162e-06, "loss": 0.4, "step": 10410 }, { "epoch": 0.9343294967579816, "grad_norm": 0.4627854658834591, "learning_rate": 8.698684310711018e-06, "loss": 0.3943, "step": 10411 }, { "epoch": 0.9344192412106527, "grad_norm": 0.5092077761671981, "learning_rate": 8.698332934969768e-06, "loss": 0.4512, "step": 10412 }, { "epoch": 0.9345089856633236, "grad_norm": 0.5054510165464339, "learning_rate": 8.697981518895247e-06, "loss": 0.4648, "step": 10413 }, { "epoch": 0.9345987301159947, "grad_norm": 0.4952944735193438, "learning_rate": 8.697630062491285e-06, "loss": 0.4549, "step": 10414 }, { "epoch": 0.9346884745686658, "grad_norm": 0.5628956366310499, "learning_rate": 8.697278565761715e-06, "loss": 0.5237, "step": 10415 }, { "epoch": 0.9347782190213367, "grad_norm": 0.443488975889633, "learning_rate": 8.696927028710372e-06, "loss": 0.3923, "step": 10416 }, { "epoch": 0.9348679634740078, "grad_norm": 0.5523125704606081, "learning_rate": 8.696575451341088e-06, "loss": 0.5201, "step": 10417 }, { "epoch": 0.9349577079266788, "grad_norm": 0.4425765472524434, "learning_rate": 8.696223833657698e-06, "loss": 0.3785, "step": 10418 }, { "epoch": 0.9350474523793498, "grad_norm": 0.4655879335696151, "learning_rate": 8.695872175664038e-06, "loss": 0.4287, "step": 10419 }, { "epoch": 0.9351371968320208, "grad_norm": 0.49956827776227153, "learning_rate": 8.695520477363942e-06, "loss": 0.485, "step": 10420 }, { "epoch": 0.9352269412846919, "grad_norm": 0.45780602196998194, "learning_rate": 8.695168738761244e-06, "loss": 0.4202, "step": 10421 }, { "epoch": 0.9353166857373628, "grad_norm": 0.496918804253087, "learning_rate": 8.694816959859783e-06, "loss": 0.408, "step": 10422 }, { "epoch": 0.9354064301900339, "grad_norm": 0.5048067373937616, "learning_rate": 8.694465140663393e-06, "loss": 0.4812, "step": 10423 }, { "epoch": 0.9354961746427048, "grad_norm": 0.5091611736778681, "learning_rate": 8.694113281175911e-06, "loss": 0.4135, "step": 10424 }, { "epoch": 0.9355859190953759, "grad_norm": 0.5243691112258023, "learning_rate": 8.693761381401175e-06, "loss": 0.4504, "step": 10425 }, { "epoch": 0.935675663548047, "grad_norm": 0.5128680818504363, "learning_rate": 8.693409441343024e-06, "loss": 0.4668, "step": 10426 }, { "epoch": 0.935765408000718, "grad_norm": 0.5111995187906712, "learning_rate": 8.693057461005294e-06, "loss": 0.4806, "step": 10427 }, { "epoch": 0.935855152453389, "grad_norm": 0.470043576952323, "learning_rate": 8.692705440391824e-06, "loss": 0.4295, "step": 10428 }, { "epoch": 0.93594489690606, "grad_norm": 0.4747019385283374, "learning_rate": 8.692353379506456e-06, "loss": 0.4513, "step": 10429 }, { "epoch": 0.936034641358731, "grad_norm": 0.5249314904349597, "learning_rate": 8.692001278353024e-06, "loss": 0.4711, "step": 10430 }, { "epoch": 0.936124385811402, "grad_norm": 0.46375664477829387, "learning_rate": 8.691649136935374e-06, "loss": 0.417, "step": 10431 }, { "epoch": 0.9362141302640731, "grad_norm": 0.5133040123718476, "learning_rate": 8.69129695525734e-06, "loss": 0.4937, "step": 10432 }, { "epoch": 0.936303874716744, "grad_norm": 0.5190660999130476, "learning_rate": 8.69094473332277e-06, "loss": 0.4688, "step": 10433 }, { "epoch": 0.9363936191694151, "grad_norm": 0.4680766533058381, "learning_rate": 8.6905924711355e-06, "loss": 0.4507, "step": 10434 }, { "epoch": 0.9364833636220861, "grad_norm": 0.44679744311815267, "learning_rate": 8.690240168699373e-06, "loss": 0.3884, "step": 10435 }, { "epoch": 0.9365731080747571, "grad_norm": 0.49339050596957573, "learning_rate": 8.68988782601823e-06, "loss": 0.4317, "step": 10436 }, { "epoch": 0.9366628525274282, "grad_norm": 0.48166593725786794, "learning_rate": 8.689535443095917e-06, "loss": 0.4572, "step": 10437 }, { "epoch": 0.9367525969800992, "grad_norm": 0.4960878651279869, "learning_rate": 8.689183019936276e-06, "loss": 0.4248, "step": 10438 }, { "epoch": 0.9368423414327702, "grad_norm": 0.4862538114810581, "learning_rate": 8.688830556543148e-06, "loss": 0.4307, "step": 10439 }, { "epoch": 0.9369320858854412, "grad_norm": 0.5120784619742667, "learning_rate": 8.688478052920377e-06, "loss": 0.4425, "step": 10440 }, { "epoch": 0.9370218303381123, "grad_norm": 0.49950242974150677, "learning_rate": 8.68812550907181e-06, "loss": 0.4393, "step": 10441 }, { "epoch": 0.9371115747907832, "grad_norm": 0.4430402323364096, "learning_rate": 8.687772925001292e-06, "loss": 0.3667, "step": 10442 }, { "epoch": 0.9372013192434543, "grad_norm": 0.5474390325662, "learning_rate": 8.687420300712663e-06, "loss": 0.5517, "step": 10443 }, { "epoch": 0.9372910636961252, "grad_norm": 0.539576120431483, "learning_rate": 8.687067636209775e-06, "loss": 0.4377, "step": 10444 }, { "epoch": 0.9373808081487963, "grad_norm": 0.48217040092733154, "learning_rate": 8.68671493149647e-06, "loss": 0.4292, "step": 10445 }, { "epoch": 0.9374705526014673, "grad_norm": 0.4676284386057358, "learning_rate": 8.686362186576598e-06, "loss": 0.3906, "step": 10446 }, { "epoch": 0.9375602970541383, "grad_norm": 0.46973904395525495, "learning_rate": 8.686009401454e-06, "loss": 0.4574, "step": 10447 }, { "epoch": 0.9376500415068093, "grad_norm": 0.5212688620795038, "learning_rate": 8.68565657613253e-06, "loss": 0.4765, "step": 10448 }, { "epoch": 0.9377397859594804, "grad_norm": 0.4925692485066766, "learning_rate": 8.685303710616032e-06, "loss": 0.4505, "step": 10449 }, { "epoch": 0.9378295304121514, "grad_norm": 0.4868693847240053, "learning_rate": 8.684950804908358e-06, "loss": 0.4365, "step": 10450 }, { "epoch": 0.9379192748648224, "grad_norm": 0.46094557065982816, "learning_rate": 8.684597859013352e-06, "loss": 0.4118, "step": 10451 }, { "epoch": 0.9380090193174935, "grad_norm": 0.5246993950550882, "learning_rate": 8.684244872934867e-06, "loss": 0.464, "step": 10452 }, { "epoch": 0.9380987637701644, "grad_norm": 0.46555049481182403, "learning_rate": 8.683891846676748e-06, "loss": 0.4277, "step": 10453 }, { "epoch": 0.9381885082228355, "grad_norm": 0.4942774670566514, "learning_rate": 8.68353878024285e-06, "loss": 0.4793, "step": 10454 }, { "epoch": 0.9382782526755065, "grad_norm": 0.5531030175408467, "learning_rate": 8.68318567363702e-06, "loss": 0.4282, "step": 10455 }, { "epoch": 0.9383679971281775, "grad_norm": 0.5160241491675571, "learning_rate": 8.682832526863113e-06, "loss": 0.4741, "step": 10456 }, { "epoch": 0.9384577415808485, "grad_norm": 0.5249554095711284, "learning_rate": 8.682479339924974e-06, "loss": 0.4516, "step": 10457 }, { "epoch": 0.9385474860335196, "grad_norm": 0.4712481794110572, "learning_rate": 8.682126112826461e-06, "loss": 0.4023, "step": 10458 }, { "epoch": 0.9386372304861905, "grad_norm": 0.5513879854244461, "learning_rate": 8.681772845571423e-06, "loss": 0.513, "step": 10459 }, { "epoch": 0.9387269749388616, "grad_norm": 0.5128145609177699, "learning_rate": 8.681419538163713e-06, "loss": 0.4733, "step": 10460 }, { "epoch": 0.9388167193915327, "grad_norm": 0.5261952089902074, "learning_rate": 8.681066190607183e-06, "loss": 0.5262, "step": 10461 }, { "epoch": 0.9389064638442036, "grad_norm": 0.44466536772718623, "learning_rate": 8.68071280290569e-06, "loss": 0.3729, "step": 10462 }, { "epoch": 0.9389962082968747, "grad_norm": 0.49599614083779076, "learning_rate": 8.680359375063086e-06, "loss": 0.4424, "step": 10463 }, { "epoch": 0.9390859527495456, "grad_norm": 0.45456027893087847, "learning_rate": 8.680005907083223e-06, "loss": 0.383, "step": 10464 }, { "epoch": 0.9391756972022167, "grad_norm": 0.4614644681517273, "learning_rate": 8.67965239896996e-06, "loss": 0.3949, "step": 10465 }, { "epoch": 0.9392654416548877, "grad_norm": 0.4701881518746673, "learning_rate": 8.67929885072715e-06, "loss": 0.4585, "step": 10466 }, { "epoch": 0.9393551861075587, "grad_norm": 0.5104286050988314, "learning_rate": 8.67894526235865e-06, "loss": 0.4762, "step": 10467 }, { "epoch": 0.9394449305602297, "grad_norm": 0.5138417372912186, "learning_rate": 8.678591633868314e-06, "loss": 0.4801, "step": 10468 }, { "epoch": 0.9395346750129008, "grad_norm": 0.4918206764233409, "learning_rate": 8.67823796526e-06, "loss": 0.439, "step": 10469 }, { "epoch": 0.9396244194655717, "grad_norm": 0.46103756190479606, "learning_rate": 8.677884256537567e-06, "loss": 0.3665, "step": 10470 }, { "epoch": 0.9397141639182428, "grad_norm": 0.47853292309191453, "learning_rate": 8.67753050770487e-06, "loss": 0.4401, "step": 10471 }, { "epoch": 0.9398039083709139, "grad_norm": 0.46983777851393804, "learning_rate": 8.677176718765767e-06, "loss": 0.3732, "step": 10472 }, { "epoch": 0.9398936528235848, "grad_norm": 0.48616009066426036, "learning_rate": 8.676822889724114e-06, "loss": 0.4419, "step": 10473 }, { "epoch": 0.9399833972762559, "grad_norm": 0.4629390096931567, "learning_rate": 8.676469020583776e-06, "loss": 0.4246, "step": 10474 }, { "epoch": 0.9400731417289269, "grad_norm": 0.4814512322359664, "learning_rate": 8.676115111348608e-06, "loss": 0.4152, "step": 10475 }, { "epoch": 0.9401628861815979, "grad_norm": 0.4714033992973373, "learning_rate": 8.675761162022469e-06, "loss": 0.4183, "step": 10476 }, { "epoch": 0.9402526306342689, "grad_norm": 0.47103646996689624, "learning_rate": 8.675407172609222e-06, "loss": 0.4201, "step": 10477 }, { "epoch": 0.94034237508694, "grad_norm": 0.5055652988277973, "learning_rate": 8.675053143112726e-06, "loss": 0.4847, "step": 10478 }, { "epoch": 0.9404321195396109, "grad_norm": 0.5854900848725064, "learning_rate": 8.674699073536841e-06, "loss": 0.4968, "step": 10479 }, { "epoch": 0.940521863992282, "grad_norm": 0.4912727084026041, "learning_rate": 8.67434496388543e-06, "loss": 0.4475, "step": 10480 }, { "epoch": 0.940611608444953, "grad_norm": 0.5158699244774021, "learning_rate": 8.673990814162354e-06, "loss": 0.4605, "step": 10481 }, { "epoch": 0.940701352897624, "grad_norm": 0.5137833207215625, "learning_rate": 8.673636624371475e-06, "loss": 0.4441, "step": 10482 }, { "epoch": 0.940791097350295, "grad_norm": 0.5129613122757447, "learning_rate": 8.673282394516657e-06, "loss": 0.54, "step": 10483 }, { "epoch": 0.940880841802966, "grad_norm": 0.48476854531572305, "learning_rate": 8.672928124601763e-06, "loss": 0.4342, "step": 10484 }, { "epoch": 0.9409705862556371, "grad_norm": 0.46188619950744364, "learning_rate": 8.672573814630656e-06, "loss": 0.4024, "step": 10485 }, { "epoch": 0.9410603307083081, "grad_norm": 0.49263159644285526, "learning_rate": 8.672219464607199e-06, "loss": 0.4545, "step": 10486 }, { "epoch": 0.9411500751609791, "grad_norm": 0.5204069551883531, "learning_rate": 8.671865074535259e-06, "loss": 0.4147, "step": 10487 }, { "epoch": 0.9412398196136501, "grad_norm": 0.538695680462379, "learning_rate": 8.6715106444187e-06, "loss": 0.5171, "step": 10488 }, { "epoch": 0.9413295640663212, "grad_norm": 0.4784395450340166, "learning_rate": 8.671156174261383e-06, "loss": 0.4621, "step": 10489 }, { "epoch": 0.9414193085189921, "grad_norm": 0.5235255355944339, "learning_rate": 8.670801664067181e-06, "loss": 0.4965, "step": 10490 }, { "epoch": 0.9415090529716632, "grad_norm": 0.5400622114439911, "learning_rate": 8.670447113839955e-06, "loss": 0.5284, "step": 10491 }, { "epoch": 0.9415987974243342, "grad_norm": 0.4982125321972315, "learning_rate": 8.670092523583576e-06, "loss": 0.5071, "step": 10492 }, { "epoch": 0.9416885418770052, "grad_norm": 0.48376829701084406, "learning_rate": 8.669737893301906e-06, "loss": 0.468, "step": 10493 }, { "epoch": 0.9417782863296762, "grad_norm": 0.522959334983313, "learning_rate": 8.669383222998815e-06, "loss": 0.5014, "step": 10494 }, { "epoch": 0.9418680307823473, "grad_norm": 0.4671603697433364, "learning_rate": 8.669028512678173e-06, "loss": 0.4091, "step": 10495 }, { "epoch": 0.9419577752350183, "grad_norm": 0.4779041778336589, "learning_rate": 8.668673762343844e-06, "loss": 0.4252, "step": 10496 }, { "epoch": 0.9420475196876893, "grad_norm": 0.4688126240064914, "learning_rate": 8.668318971999701e-06, "loss": 0.455, "step": 10497 }, { "epoch": 0.9421372641403604, "grad_norm": 0.47028224350964093, "learning_rate": 8.66796414164961e-06, "loss": 0.4296, "step": 10498 }, { "epoch": 0.9422270085930313, "grad_norm": 0.4492473469785808, "learning_rate": 8.667609271297444e-06, "loss": 0.4107, "step": 10499 }, { "epoch": 0.9423167530457024, "grad_norm": 0.5236592790283172, "learning_rate": 8.66725436094707e-06, "loss": 0.5189, "step": 10500 }, { "epoch": 0.9424064974983734, "grad_norm": 0.4767940631543412, "learning_rate": 8.666899410602361e-06, "loss": 0.435, "step": 10501 }, { "epoch": 0.9424962419510444, "grad_norm": 0.48377976965829605, "learning_rate": 8.666544420267187e-06, "loss": 0.4073, "step": 10502 }, { "epoch": 0.9425859864037154, "grad_norm": 0.45483487996260036, "learning_rate": 8.666189389945419e-06, "loss": 0.3969, "step": 10503 }, { "epoch": 0.9426757308563865, "grad_norm": 0.4714726316815058, "learning_rate": 8.66583431964093e-06, "loss": 0.4478, "step": 10504 }, { "epoch": 0.9427654753090574, "grad_norm": 0.4321041558654493, "learning_rate": 8.66547920935759e-06, "loss": 0.3637, "step": 10505 }, { "epoch": 0.9428552197617285, "grad_norm": 0.4492040384839762, "learning_rate": 8.665124059099274e-06, "loss": 0.4132, "step": 10506 }, { "epoch": 0.9429449642143996, "grad_norm": 0.474098377327642, "learning_rate": 8.664768868869856e-06, "loss": 0.422, "step": 10507 }, { "epoch": 0.9430347086670705, "grad_norm": 0.4708906698062527, "learning_rate": 8.664413638673208e-06, "loss": 0.4172, "step": 10508 }, { "epoch": 0.9431244531197416, "grad_norm": 0.4872216127153613, "learning_rate": 8.664058368513202e-06, "loss": 0.4757, "step": 10509 }, { "epoch": 0.9432141975724125, "grad_norm": 0.4789534498297829, "learning_rate": 8.663703058393717e-06, "loss": 0.4138, "step": 10510 }, { "epoch": 0.9433039420250836, "grad_norm": 0.503593833922695, "learning_rate": 8.663347708318626e-06, "loss": 0.442, "step": 10511 }, { "epoch": 0.9433936864777546, "grad_norm": 0.48920108139242735, "learning_rate": 8.662992318291803e-06, "loss": 0.4613, "step": 10512 }, { "epoch": 0.9434834309304256, "grad_norm": 0.4942905278312316, "learning_rate": 8.662636888317126e-06, "loss": 0.4508, "step": 10513 }, { "epoch": 0.9435731753830966, "grad_norm": 0.5227032497240182, "learning_rate": 8.662281418398468e-06, "loss": 0.4554, "step": 10514 }, { "epoch": 0.9436629198357677, "grad_norm": 0.4632250949347416, "learning_rate": 8.66192590853971e-06, "loss": 0.3979, "step": 10515 }, { "epoch": 0.9437526642884386, "grad_norm": 0.47816411279738696, "learning_rate": 8.661570358744726e-06, "loss": 0.4642, "step": 10516 }, { "epoch": 0.9438424087411097, "grad_norm": 0.5289349776654136, "learning_rate": 8.661214769017394e-06, "loss": 0.4804, "step": 10517 }, { "epoch": 0.9439321531937807, "grad_norm": 0.4987257061180278, "learning_rate": 8.660859139361595e-06, "loss": 0.4983, "step": 10518 }, { "epoch": 0.9440218976464517, "grad_norm": 0.44704054446929586, "learning_rate": 8.660503469781203e-06, "loss": 0.3938, "step": 10519 }, { "epoch": 0.9441116420991228, "grad_norm": 0.4764378268396347, "learning_rate": 8.6601477602801e-06, "loss": 0.4389, "step": 10520 }, { "epoch": 0.9442013865517938, "grad_norm": 0.4500254336696631, "learning_rate": 8.659792010862162e-06, "loss": 0.3931, "step": 10521 }, { "epoch": 0.9442911310044648, "grad_norm": 0.4984177250522948, "learning_rate": 8.659436221531271e-06, "loss": 0.4162, "step": 10522 }, { "epoch": 0.9443808754571358, "grad_norm": 0.502691635800103, "learning_rate": 8.659080392291309e-06, "loss": 0.4514, "step": 10523 }, { "epoch": 0.9444706199098069, "grad_norm": 0.5093495544331798, "learning_rate": 8.658724523146154e-06, "loss": 0.5041, "step": 10524 }, { "epoch": 0.9445603643624778, "grad_norm": 0.4680176013748619, "learning_rate": 8.658368614099685e-06, "loss": 0.4157, "step": 10525 }, { "epoch": 0.9446501088151489, "grad_norm": 0.45343929004825667, "learning_rate": 8.65801266515579e-06, "loss": 0.415, "step": 10526 }, { "epoch": 0.9447398532678198, "grad_norm": 0.4974580388860412, "learning_rate": 8.657656676318346e-06, "loss": 0.4284, "step": 10527 }, { "epoch": 0.9448295977204909, "grad_norm": 0.492180696755043, "learning_rate": 8.657300647591236e-06, "loss": 0.425, "step": 10528 }, { "epoch": 0.9449193421731619, "grad_norm": 0.4903033224545528, "learning_rate": 8.656944578978343e-06, "loss": 0.4502, "step": 10529 }, { "epoch": 0.9450090866258329, "grad_norm": 0.5628453022086536, "learning_rate": 8.656588470483551e-06, "loss": 0.5821, "step": 10530 }, { "epoch": 0.945098831078504, "grad_norm": 0.4477292567720026, "learning_rate": 8.65623232211074e-06, "loss": 0.3816, "step": 10531 }, { "epoch": 0.945188575531175, "grad_norm": 0.48317355038549453, "learning_rate": 8.6558761338638e-06, "loss": 0.4395, "step": 10532 }, { "epoch": 0.945278319983846, "grad_norm": 0.5604717234526158, "learning_rate": 8.655519905746614e-06, "loss": 0.4873, "step": 10533 }, { "epoch": 0.945368064436517, "grad_norm": 0.511703520323115, "learning_rate": 8.655163637763061e-06, "loss": 0.4655, "step": 10534 }, { "epoch": 0.9454578088891881, "grad_norm": 0.4790701330214525, "learning_rate": 8.654807329917035e-06, "loss": 0.4659, "step": 10535 }, { "epoch": 0.945547553341859, "grad_norm": 0.47482213924562594, "learning_rate": 8.654450982212417e-06, "loss": 0.4214, "step": 10536 }, { "epoch": 0.9456372977945301, "grad_norm": 0.5100515752770397, "learning_rate": 8.654094594653093e-06, "loss": 0.4676, "step": 10537 }, { "epoch": 0.945727042247201, "grad_norm": 0.46940899653652, "learning_rate": 8.653738167242951e-06, "loss": 0.475, "step": 10538 }, { "epoch": 0.9458167866998721, "grad_norm": 0.47471976469103944, "learning_rate": 8.653381699985878e-06, "loss": 0.4247, "step": 10539 }, { "epoch": 0.9459065311525431, "grad_norm": 0.4249980790987589, "learning_rate": 8.653025192885762e-06, "loss": 0.3265, "step": 10540 }, { "epoch": 0.9459962756052142, "grad_norm": 0.466735753732538, "learning_rate": 8.652668645946489e-06, "loss": 0.4097, "step": 10541 }, { "epoch": 0.9460860200578852, "grad_norm": 0.47557589278165185, "learning_rate": 8.65231205917195e-06, "loss": 0.4369, "step": 10542 }, { "epoch": 0.9461757645105562, "grad_norm": 0.49847849368224645, "learning_rate": 8.651955432566033e-06, "loss": 0.4529, "step": 10543 }, { "epoch": 0.9462655089632273, "grad_norm": 0.5202485980405344, "learning_rate": 8.651598766132625e-06, "loss": 0.5495, "step": 10544 }, { "epoch": 0.9463552534158982, "grad_norm": 0.49671334068815826, "learning_rate": 8.65124205987562e-06, "loss": 0.4754, "step": 10545 }, { "epoch": 0.9464449978685693, "grad_norm": 0.49297473382025175, "learning_rate": 8.650885313798905e-06, "loss": 0.458, "step": 10546 }, { "epoch": 0.9465347423212402, "grad_norm": 0.49507183118068715, "learning_rate": 8.65052852790637e-06, "loss": 0.4606, "step": 10547 }, { "epoch": 0.9466244867739113, "grad_norm": 0.4898935301741978, "learning_rate": 8.65017170220191e-06, "loss": 0.4373, "step": 10548 }, { "epoch": 0.9467142312265823, "grad_norm": 0.47069971625103285, "learning_rate": 8.649814836689413e-06, "loss": 0.418, "step": 10549 }, { "epoch": 0.9468039756792533, "grad_norm": 0.5239697207166337, "learning_rate": 8.649457931372771e-06, "loss": 0.4723, "step": 10550 }, { "epoch": 0.9468937201319243, "grad_norm": 0.4329458961490955, "learning_rate": 8.649100986255878e-06, "loss": 0.407, "step": 10551 }, { "epoch": 0.9469834645845954, "grad_norm": 0.47060942179524434, "learning_rate": 8.648744001342628e-06, "loss": 0.43, "step": 10552 }, { "epoch": 0.9470732090372664, "grad_norm": 0.4408199835235638, "learning_rate": 8.648386976636908e-06, "loss": 0.3821, "step": 10553 }, { "epoch": 0.9471629534899374, "grad_norm": 0.4965681254442419, "learning_rate": 8.648029912142618e-06, "loss": 0.456, "step": 10554 }, { "epoch": 0.9472526979426085, "grad_norm": 0.4989017716977161, "learning_rate": 8.647672807863652e-06, "loss": 0.5148, "step": 10555 }, { "epoch": 0.9473424423952794, "grad_norm": 0.4661054240849811, "learning_rate": 8.647315663803899e-06, "loss": 0.4261, "step": 10556 }, { "epoch": 0.9474321868479505, "grad_norm": 0.5024894190609391, "learning_rate": 8.64695847996726e-06, "loss": 0.459, "step": 10557 }, { "epoch": 0.9475219313006215, "grad_norm": 0.49025855236212956, "learning_rate": 8.646601256357626e-06, "loss": 0.4411, "step": 10558 }, { "epoch": 0.9476116757532925, "grad_norm": 0.5002107248987001, "learning_rate": 8.646243992978895e-06, "loss": 0.4473, "step": 10559 }, { "epoch": 0.9477014202059635, "grad_norm": 0.4688851927093183, "learning_rate": 8.645886689834964e-06, "loss": 0.3962, "step": 10560 }, { "epoch": 0.9477911646586346, "grad_norm": 0.49550668192266994, "learning_rate": 8.645529346929726e-06, "loss": 0.4576, "step": 10561 }, { "epoch": 0.9478809091113055, "grad_norm": 0.4670002616228415, "learning_rate": 8.645171964267083e-06, "loss": 0.4405, "step": 10562 }, { "epoch": 0.9479706535639766, "grad_norm": 0.44104199353207013, "learning_rate": 8.64481454185093e-06, "loss": 0.3543, "step": 10563 }, { "epoch": 0.9480603980166475, "grad_norm": 0.4940044102114164, "learning_rate": 8.644457079685162e-06, "loss": 0.4709, "step": 10564 }, { "epoch": 0.9481501424693186, "grad_norm": 0.4815651803242729, "learning_rate": 8.644099577773684e-06, "loss": 0.4426, "step": 10565 }, { "epoch": 0.9482398869219897, "grad_norm": 0.5055606411391982, "learning_rate": 8.64374203612039e-06, "loss": 0.4761, "step": 10566 }, { "epoch": 0.9483296313746606, "grad_norm": 0.5021499004010377, "learning_rate": 8.643384454729181e-06, "loss": 0.516, "step": 10567 }, { "epoch": 0.9484193758273317, "grad_norm": 0.4828570217015195, "learning_rate": 8.643026833603956e-06, "loss": 0.4404, "step": 10568 }, { "epoch": 0.9485091202800027, "grad_norm": 0.48036196245508234, "learning_rate": 8.642669172748615e-06, "loss": 0.4133, "step": 10569 }, { "epoch": 0.9485988647326737, "grad_norm": 0.4809134761425074, "learning_rate": 8.64231147216706e-06, "loss": 0.4813, "step": 10570 }, { "epoch": 0.9486886091853447, "grad_norm": 0.43776267828261795, "learning_rate": 8.641953731863191e-06, "loss": 0.3762, "step": 10571 }, { "epoch": 0.9487783536380158, "grad_norm": 0.4147482201150205, "learning_rate": 8.64159595184091e-06, "loss": 0.3561, "step": 10572 }, { "epoch": 0.9488680980906867, "grad_norm": 0.5087699846373769, "learning_rate": 8.641238132104116e-06, "loss": 0.4721, "step": 10573 }, { "epoch": 0.9489578425433578, "grad_norm": 0.5452570015159391, "learning_rate": 8.640880272656715e-06, "loss": 0.5009, "step": 10574 }, { "epoch": 0.9490475869960288, "grad_norm": 0.5290012642758489, "learning_rate": 8.640522373502608e-06, "loss": 0.5113, "step": 10575 }, { "epoch": 0.9491373314486998, "grad_norm": 0.478591871322637, "learning_rate": 8.640164434645701e-06, "loss": 0.432, "step": 10576 }, { "epoch": 0.9492270759013709, "grad_norm": 0.48895498235433854, "learning_rate": 8.639806456089893e-06, "loss": 0.462, "step": 10577 }, { "epoch": 0.9493168203540419, "grad_norm": 0.5078241815026344, "learning_rate": 8.639448437839091e-06, "loss": 0.4548, "step": 10578 }, { "epoch": 0.9494065648067129, "grad_norm": 0.5150340024233445, "learning_rate": 8.639090379897198e-06, "loss": 0.4916, "step": 10579 }, { "epoch": 0.9494963092593839, "grad_norm": 0.4919376560481409, "learning_rate": 8.63873228226812e-06, "loss": 0.4762, "step": 10580 }, { "epoch": 0.949586053712055, "grad_norm": 0.4470730309029215, "learning_rate": 8.638374144955763e-06, "loss": 0.4084, "step": 10581 }, { "epoch": 0.9496757981647259, "grad_norm": 0.5295088626412382, "learning_rate": 8.638015967964031e-06, "loss": 0.5111, "step": 10582 }, { "epoch": 0.949765542617397, "grad_norm": 0.5123681866744149, "learning_rate": 8.637657751296831e-06, "loss": 0.4573, "step": 10583 }, { "epoch": 0.9498552870700679, "grad_norm": 0.4703593053102997, "learning_rate": 8.637299494958068e-06, "loss": 0.4423, "step": 10584 }, { "epoch": 0.949945031522739, "grad_norm": 0.4707333817561149, "learning_rate": 8.636941198951653e-06, "loss": 0.4345, "step": 10585 }, { "epoch": 0.95003477597541, "grad_norm": 0.44901593232778575, "learning_rate": 8.636582863281493e-06, "loss": 0.3732, "step": 10586 }, { "epoch": 0.950124520428081, "grad_norm": 0.5351507000969286, "learning_rate": 8.63622448795149e-06, "loss": 0.4849, "step": 10587 }, { "epoch": 0.9502142648807521, "grad_norm": 0.4726888219969128, "learning_rate": 8.635866072965558e-06, "loss": 0.4491, "step": 10588 }, { "epoch": 0.9503040093334231, "grad_norm": 0.4367011471253466, "learning_rate": 8.635507618327605e-06, "loss": 0.3903, "step": 10589 }, { "epoch": 0.9503937537860941, "grad_norm": 0.48749081890193646, "learning_rate": 8.635149124041538e-06, "loss": 0.4394, "step": 10590 }, { "epoch": 0.9504834982387651, "grad_norm": 0.47619376457851587, "learning_rate": 8.634790590111271e-06, "loss": 0.4367, "step": 10591 }, { "epoch": 0.9505732426914362, "grad_norm": 0.4651888516307828, "learning_rate": 8.634432016540708e-06, "loss": 0.4123, "step": 10592 }, { "epoch": 0.9506629871441071, "grad_norm": 0.49212810312231076, "learning_rate": 8.634073403333764e-06, "loss": 0.4991, "step": 10593 }, { "epoch": 0.9507527315967782, "grad_norm": 0.47670710628218466, "learning_rate": 8.633714750494349e-06, "loss": 0.4441, "step": 10594 }, { "epoch": 0.9508424760494492, "grad_norm": 0.4742974179853687, "learning_rate": 8.633356058026374e-06, "loss": 0.4327, "step": 10595 }, { "epoch": 0.9509322205021202, "grad_norm": 0.4844609139635437, "learning_rate": 8.632997325933752e-06, "loss": 0.4546, "step": 10596 }, { "epoch": 0.9510219649547912, "grad_norm": 0.49456389660964006, "learning_rate": 8.632638554220395e-06, "loss": 0.5028, "step": 10597 }, { "epoch": 0.9511117094074623, "grad_norm": 0.4372305041789281, "learning_rate": 8.632279742890212e-06, "loss": 0.3723, "step": 10598 }, { "epoch": 0.9512014538601332, "grad_norm": 0.46601475006002513, "learning_rate": 8.631920891947121e-06, "loss": 0.454, "step": 10599 }, { "epoch": 0.9512911983128043, "grad_norm": 0.45323076953509345, "learning_rate": 8.631562001395033e-06, "loss": 0.3878, "step": 10600 }, { "epoch": 0.9513809427654754, "grad_norm": 0.5199726917707587, "learning_rate": 8.631203071237864e-06, "loss": 0.4552, "step": 10601 }, { "epoch": 0.9514706872181463, "grad_norm": 0.47599623953342424, "learning_rate": 8.630844101479525e-06, "loss": 0.4192, "step": 10602 }, { "epoch": 0.9515604316708174, "grad_norm": 0.4791995548892393, "learning_rate": 8.630485092123934e-06, "loss": 0.4447, "step": 10603 }, { "epoch": 0.9516501761234883, "grad_norm": 0.5298000946309877, "learning_rate": 8.630126043175004e-06, "loss": 0.5142, "step": 10604 }, { "epoch": 0.9517399205761594, "grad_norm": 0.5299085513357287, "learning_rate": 8.629766954636654e-06, "loss": 0.4837, "step": 10605 }, { "epoch": 0.9518296650288304, "grad_norm": 0.5075784904399574, "learning_rate": 8.629407826512796e-06, "loss": 0.4378, "step": 10606 }, { "epoch": 0.9519194094815014, "grad_norm": 0.4620694146530072, "learning_rate": 8.62904865880735e-06, "loss": 0.4109, "step": 10607 }, { "epoch": 0.9520091539341724, "grad_norm": 0.4759030043039791, "learning_rate": 8.628689451524232e-06, "loss": 0.4006, "step": 10608 }, { "epoch": 0.9520988983868435, "grad_norm": 0.4658246876611127, "learning_rate": 8.628330204667359e-06, "loss": 0.4312, "step": 10609 }, { "epoch": 0.9521886428395144, "grad_norm": 0.4716276734834285, "learning_rate": 8.627970918240648e-06, "loss": 0.419, "step": 10610 }, { "epoch": 0.9522783872921855, "grad_norm": 0.4991566765518298, "learning_rate": 8.627611592248017e-06, "loss": 0.4586, "step": 10611 }, { "epoch": 0.9523681317448566, "grad_norm": 0.5191227962485875, "learning_rate": 8.62725222669339e-06, "loss": 0.4693, "step": 10612 }, { "epoch": 0.9524578761975275, "grad_norm": 0.49578010405474726, "learning_rate": 8.62689282158068e-06, "loss": 0.4024, "step": 10613 }, { "epoch": 0.9525476206501986, "grad_norm": 0.47707332249239354, "learning_rate": 8.626533376913807e-06, "loss": 0.4382, "step": 10614 }, { "epoch": 0.9526373651028696, "grad_norm": 0.5094231790351794, "learning_rate": 8.626173892696695e-06, "loss": 0.4281, "step": 10615 }, { "epoch": 0.9527271095555406, "grad_norm": 0.46432346646288913, "learning_rate": 8.62581436893326e-06, "loss": 0.4357, "step": 10616 }, { "epoch": 0.9528168540082116, "grad_norm": 0.46727573557885943, "learning_rate": 8.625454805627427e-06, "loss": 0.4087, "step": 10617 }, { "epoch": 0.9529065984608827, "grad_norm": 0.4989409253319145, "learning_rate": 8.625095202783116e-06, "loss": 0.4593, "step": 10618 }, { "epoch": 0.9529963429135536, "grad_norm": 0.5015801942018124, "learning_rate": 8.624735560404246e-06, "loss": 0.461, "step": 10619 }, { "epoch": 0.9530860873662247, "grad_norm": 0.5206256743375933, "learning_rate": 8.624375878494744e-06, "loss": 0.5142, "step": 10620 }, { "epoch": 0.9531758318188956, "grad_norm": 0.45337579983236526, "learning_rate": 8.624016157058527e-06, "loss": 0.4151, "step": 10621 }, { "epoch": 0.9532655762715667, "grad_norm": 0.4973789017960744, "learning_rate": 8.623656396099525e-06, "loss": 0.4591, "step": 10622 }, { "epoch": 0.9533553207242378, "grad_norm": 0.48971680137057877, "learning_rate": 8.623296595621656e-06, "loss": 0.4461, "step": 10623 }, { "epoch": 0.9534450651769087, "grad_norm": 0.5223538509177256, "learning_rate": 8.622936755628845e-06, "loss": 0.4754, "step": 10624 }, { "epoch": 0.9535348096295798, "grad_norm": 0.4858992050254968, "learning_rate": 8.622576876125016e-06, "loss": 0.3887, "step": 10625 }, { "epoch": 0.9536245540822508, "grad_norm": 0.47252817341438846, "learning_rate": 8.622216957114095e-06, "loss": 0.425, "step": 10626 }, { "epoch": 0.9537142985349218, "grad_norm": 0.488855060362332, "learning_rate": 8.621856998600007e-06, "loss": 0.4216, "step": 10627 }, { "epoch": 0.9538040429875928, "grad_norm": 0.4721590678019512, "learning_rate": 8.621497000586678e-06, "loss": 0.4001, "step": 10628 }, { "epoch": 0.9538937874402639, "grad_norm": 0.48160919842262, "learning_rate": 8.621136963078034e-06, "loss": 0.4496, "step": 10629 }, { "epoch": 0.9539835318929348, "grad_norm": 0.5008446055011951, "learning_rate": 8.620776886078e-06, "loss": 0.5027, "step": 10630 }, { "epoch": 0.9540732763456059, "grad_norm": 0.4463749071798425, "learning_rate": 8.620416769590504e-06, "loss": 0.4039, "step": 10631 }, { "epoch": 0.9541630207982769, "grad_norm": 0.4703076901520373, "learning_rate": 8.620056613619475e-06, "loss": 0.4192, "step": 10632 }, { "epoch": 0.9542527652509479, "grad_norm": 0.5180759807900929, "learning_rate": 8.619696418168835e-06, "loss": 0.4927, "step": 10633 }, { "epoch": 0.9543425097036189, "grad_norm": 0.4451473625192756, "learning_rate": 8.61933618324252e-06, "loss": 0.3749, "step": 10634 }, { "epoch": 0.95443225415629, "grad_norm": 0.5165727392916858, "learning_rate": 8.618975908844453e-06, "loss": 0.4555, "step": 10635 }, { "epoch": 0.954521998608961, "grad_norm": 0.466876254431189, "learning_rate": 8.618615594978565e-06, "loss": 0.4066, "step": 10636 }, { "epoch": 0.954611743061632, "grad_norm": 0.46029553603849843, "learning_rate": 8.618255241648788e-06, "loss": 0.3828, "step": 10637 }, { "epoch": 0.9547014875143031, "grad_norm": 0.49687099810840274, "learning_rate": 8.617894848859048e-06, "loss": 0.4516, "step": 10638 }, { "epoch": 0.954791231966974, "grad_norm": 0.5036969639541857, "learning_rate": 8.617534416613275e-06, "loss": 0.4651, "step": 10639 }, { "epoch": 0.9548809764196451, "grad_norm": 0.492404023668851, "learning_rate": 8.617173944915403e-06, "loss": 0.4533, "step": 10640 }, { "epoch": 0.954970720872316, "grad_norm": 0.5369544080792868, "learning_rate": 8.616813433769363e-06, "loss": 0.5647, "step": 10641 }, { "epoch": 0.9550604653249871, "grad_norm": 0.4948169816327989, "learning_rate": 8.616452883179086e-06, "loss": 0.4371, "step": 10642 }, { "epoch": 0.9551502097776581, "grad_norm": 0.46444524731418213, "learning_rate": 8.616092293148501e-06, "loss": 0.4284, "step": 10643 }, { "epoch": 0.9552399542303291, "grad_norm": 0.5517491200502753, "learning_rate": 8.615731663681544e-06, "loss": 0.4118, "step": 10644 }, { "epoch": 0.9553296986830001, "grad_norm": 0.4947088809608167, "learning_rate": 8.615370994782148e-06, "loss": 0.4784, "step": 10645 }, { "epoch": 0.9554194431356712, "grad_norm": 0.49643436863773005, "learning_rate": 8.615010286454244e-06, "loss": 0.4235, "step": 10646 }, { "epoch": 0.9555091875883422, "grad_norm": 0.43323060551624076, "learning_rate": 8.61464953870177e-06, "loss": 0.3966, "step": 10647 }, { "epoch": 0.9555989320410132, "grad_norm": 0.5180792071099781, "learning_rate": 8.614288751528655e-06, "loss": 0.4476, "step": 10648 }, { "epoch": 0.9556886764936843, "grad_norm": 0.47979569060817445, "learning_rate": 8.613927924938836e-06, "loss": 0.4591, "step": 10649 }, { "epoch": 0.9557784209463552, "grad_norm": 0.5301978299979121, "learning_rate": 8.613567058936249e-06, "loss": 0.5078, "step": 10650 }, { "epoch": 0.9558681653990263, "grad_norm": 0.5112392393286401, "learning_rate": 8.61320615352483e-06, "loss": 0.4667, "step": 10651 }, { "epoch": 0.9559579098516973, "grad_norm": 0.4567007807911688, "learning_rate": 8.612845208708513e-06, "loss": 0.4157, "step": 10652 }, { "epoch": 0.9560476543043683, "grad_norm": 0.455503913309731, "learning_rate": 8.612484224491235e-06, "loss": 0.3825, "step": 10653 }, { "epoch": 0.9561373987570393, "grad_norm": 0.4328672654644012, "learning_rate": 8.612123200876934e-06, "loss": 0.3753, "step": 10654 }, { "epoch": 0.9562271432097104, "grad_norm": 0.457226864436267, "learning_rate": 8.611762137869544e-06, "loss": 0.4156, "step": 10655 }, { "epoch": 0.9563168876623813, "grad_norm": 0.4646591967293976, "learning_rate": 8.611401035473008e-06, "loss": 0.3948, "step": 10656 }, { "epoch": 0.9564066321150524, "grad_norm": 0.5586918888014204, "learning_rate": 8.611039893691258e-06, "loss": 0.592, "step": 10657 }, { "epoch": 0.9564963765677235, "grad_norm": 0.4550065196837245, "learning_rate": 8.610678712528237e-06, "loss": 0.4573, "step": 10658 }, { "epoch": 0.9565861210203944, "grad_norm": 0.46878863594637366, "learning_rate": 8.610317491987882e-06, "loss": 0.4007, "step": 10659 }, { "epoch": 0.9566758654730655, "grad_norm": 0.4865370215624481, "learning_rate": 8.609956232074134e-06, "loss": 0.4079, "step": 10660 }, { "epoch": 0.9567656099257364, "grad_norm": 0.5268763089473535, "learning_rate": 8.609594932790932e-06, "loss": 0.4726, "step": 10661 }, { "epoch": 0.9568553543784075, "grad_norm": 0.4759305368553583, "learning_rate": 8.609233594142217e-06, "loss": 0.4131, "step": 10662 }, { "epoch": 0.9569450988310785, "grad_norm": 0.5197314779223906, "learning_rate": 8.608872216131927e-06, "loss": 0.4817, "step": 10663 }, { "epoch": 0.9570348432837495, "grad_norm": 0.5037512003682915, "learning_rate": 8.608510798764005e-06, "loss": 0.4186, "step": 10664 }, { "epoch": 0.9571245877364205, "grad_norm": 0.5095691774468839, "learning_rate": 8.608149342042391e-06, "loss": 0.4721, "step": 10665 }, { "epoch": 0.9572143321890916, "grad_norm": 0.4714192522584156, "learning_rate": 8.60778784597103e-06, "loss": 0.4417, "step": 10666 }, { "epoch": 0.9573040766417625, "grad_norm": 0.5071352077753796, "learning_rate": 8.607426310553864e-06, "loss": 0.457, "step": 10667 }, { "epoch": 0.9573938210944336, "grad_norm": 0.49861562037496315, "learning_rate": 8.607064735794832e-06, "loss": 0.4629, "step": 10668 }, { "epoch": 0.9574835655471046, "grad_norm": 0.48507701384523105, "learning_rate": 8.606703121697882e-06, "loss": 0.4373, "step": 10669 }, { "epoch": 0.9575733099997756, "grad_norm": 0.47027966893954415, "learning_rate": 8.606341468266954e-06, "loss": 0.4473, "step": 10670 }, { "epoch": 0.9576630544524467, "grad_norm": 0.47202566446480876, "learning_rate": 8.605979775505995e-06, "loss": 0.4143, "step": 10671 }, { "epoch": 0.9577527989051177, "grad_norm": 0.49721184480836095, "learning_rate": 8.605618043418946e-06, "loss": 0.4317, "step": 10672 }, { "epoch": 0.9578425433577887, "grad_norm": 0.5092476336330137, "learning_rate": 8.605256272009757e-06, "loss": 0.4261, "step": 10673 }, { "epoch": 0.9579322878104597, "grad_norm": 0.5303371159715882, "learning_rate": 8.604894461282369e-06, "loss": 0.5075, "step": 10674 }, { "epoch": 0.9580220322631308, "grad_norm": 0.44710632233158804, "learning_rate": 8.60453261124073e-06, "loss": 0.3818, "step": 10675 }, { "epoch": 0.9581117767158017, "grad_norm": 0.4768599862176832, "learning_rate": 8.604170721888785e-06, "loss": 0.4106, "step": 10676 }, { "epoch": 0.9582015211684728, "grad_norm": 0.5076381796090083, "learning_rate": 8.60380879323048e-06, "loss": 0.4357, "step": 10677 }, { "epoch": 0.9582912656211438, "grad_norm": 0.5253099928942175, "learning_rate": 8.603446825269768e-06, "loss": 0.4907, "step": 10678 }, { "epoch": 0.9583810100738148, "grad_norm": 0.4970280421334304, "learning_rate": 8.60308481801059e-06, "loss": 0.4531, "step": 10679 }, { "epoch": 0.9584707545264858, "grad_norm": 0.5124861005761115, "learning_rate": 8.602722771456895e-06, "loss": 0.4806, "step": 10680 }, { "epoch": 0.9585604989791568, "grad_norm": 0.4669141133975264, "learning_rate": 8.602360685612633e-06, "loss": 0.4064, "step": 10681 }, { "epoch": 0.9586502434318279, "grad_norm": 0.44299924994500195, "learning_rate": 8.601998560481753e-06, "loss": 0.4305, "step": 10682 }, { "epoch": 0.9587399878844989, "grad_norm": 0.4245487566562383, "learning_rate": 8.601636396068203e-06, "loss": 0.3673, "step": 10683 }, { "epoch": 0.95882973233717, "grad_norm": 0.48567096379244745, "learning_rate": 8.601274192375933e-06, "loss": 0.4292, "step": 10684 }, { "epoch": 0.9589194767898409, "grad_norm": 0.5010590877599799, "learning_rate": 8.600911949408893e-06, "loss": 0.4881, "step": 10685 }, { "epoch": 0.959009221242512, "grad_norm": 0.44258628899628616, "learning_rate": 8.600549667171035e-06, "loss": 0.3936, "step": 10686 }, { "epoch": 0.9590989656951829, "grad_norm": 0.5190926467475555, "learning_rate": 8.600187345666306e-06, "loss": 0.4582, "step": 10687 }, { "epoch": 0.959188710147854, "grad_norm": 0.5077413686329982, "learning_rate": 8.599824984898663e-06, "loss": 0.4576, "step": 10688 }, { "epoch": 0.959278454600525, "grad_norm": 0.49375838718379594, "learning_rate": 8.599462584872056e-06, "loss": 0.4572, "step": 10689 }, { "epoch": 0.959368199053196, "grad_norm": 0.4903992930008841, "learning_rate": 8.599100145590434e-06, "loss": 0.437, "step": 10690 }, { "epoch": 0.959457943505867, "grad_norm": 0.5011935834185556, "learning_rate": 8.598737667057754e-06, "loss": 0.5207, "step": 10691 }, { "epoch": 0.9595476879585381, "grad_norm": 0.5057617077350388, "learning_rate": 8.598375149277967e-06, "loss": 0.4886, "step": 10692 }, { "epoch": 0.9596374324112091, "grad_norm": 0.5060959421409864, "learning_rate": 8.598012592255024e-06, "loss": 0.435, "step": 10693 }, { "epoch": 0.9597271768638801, "grad_norm": 0.4435879456769257, "learning_rate": 8.597649995992884e-06, "loss": 0.4093, "step": 10694 }, { "epoch": 0.9598169213165512, "grad_norm": 0.5001677499988408, "learning_rate": 8.5972873604955e-06, "loss": 0.4778, "step": 10695 }, { "epoch": 0.9599066657692221, "grad_norm": 0.5168272531885714, "learning_rate": 8.596924685766825e-06, "loss": 0.4921, "step": 10696 }, { "epoch": 0.9599964102218932, "grad_norm": 0.47342682931798985, "learning_rate": 8.596561971810816e-06, "loss": 0.4333, "step": 10697 }, { "epoch": 0.9600861546745642, "grad_norm": 0.498292322820793, "learning_rate": 8.596199218631426e-06, "loss": 0.4705, "step": 10698 }, { "epoch": 0.9601758991272352, "grad_norm": 0.48980844775758664, "learning_rate": 8.595836426232614e-06, "loss": 0.4241, "step": 10699 }, { "epoch": 0.9602656435799062, "grad_norm": 0.45286962963403293, "learning_rate": 8.595473594618336e-06, "loss": 0.3839, "step": 10700 }, { "epoch": 0.9603553880325773, "grad_norm": 0.47726071042588575, "learning_rate": 8.595110723792548e-06, "loss": 0.433, "step": 10701 }, { "epoch": 0.9604451324852482, "grad_norm": 0.5183083857933699, "learning_rate": 8.594747813759207e-06, "loss": 0.4731, "step": 10702 }, { "epoch": 0.9605348769379193, "grad_norm": 0.4674162635317296, "learning_rate": 8.594384864522273e-06, "loss": 0.4288, "step": 10703 }, { "epoch": 0.9606246213905902, "grad_norm": 0.47996285010858436, "learning_rate": 8.594021876085703e-06, "loss": 0.4679, "step": 10704 }, { "epoch": 0.9607143658432613, "grad_norm": 0.43385466845790793, "learning_rate": 8.593658848453456e-06, "loss": 0.3849, "step": 10705 }, { "epoch": 0.9608041102959324, "grad_norm": 0.4884040821794698, "learning_rate": 8.593295781629489e-06, "loss": 0.4439, "step": 10706 }, { "epoch": 0.9608938547486033, "grad_norm": 0.49061308790847674, "learning_rate": 8.592932675617764e-06, "loss": 0.4346, "step": 10707 }, { "epoch": 0.9609835992012744, "grad_norm": 0.4858405286981316, "learning_rate": 8.592569530422242e-06, "loss": 0.4794, "step": 10708 }, { "epoch": 0.9610733436539454, "grad_norm": 0.46256395272590034, "learning_rate": 8.592206346046881e-06, "loss": 0.3848, "step": 10709 }, { "epoch": 0.9611630881066164, "grad_norm": 0.5043119332775324, "learning_rate": 8.59184312249564e-06, "loss": 0.446, "step": 10710 }, { "epoch": 0.9612528325592874, "grad_norm": 0.4568299062742524, "learning_rate": 8.591479859772485e-06, "loss": 0.4092, "step": 10711 }, { "epoch": 0.9613425770119585, "grad_norm": 0.5369453462284474, "learning_rate": 8.591116557881376e-06, "loss": 0.4678, "step": 10712 }, { "epoch": 0.9614323214646294, "grad_norm": 0.49276881488992036, "learning_rate": 8.590753216826273e-06, "loss": 0.4431, "step": 10713 }, { "epoch": 0.9615220659173005, "grad_norm": 0.478361652356585, "learning_rate": 8.59038983661114e-06, "loss": 0.4078, "step": 10714 }, { "epoch": 0.9616118103699715, "grad_norm": 0.4863529650877571, "learning_rate": 8.590026417239941e-06, "loss": 0.4482, "step": 10715 }, { "epoch": 0.9617015548226425, "grad_norm": 0.48439688437168343, "learning_rate": 8.589662958716638e-06, "loss": 0.4587, "step": 10716 }, { "epoch": 0.9617912992753136, "grad_norm": 0.5168526578784571, "learning_rate": 8.589299461045195e-06, "loss": 0.4201, "step": 10717 }, { "epoch": 0.9618810437279846, "grad_norm": 0.49955674025008745, "learning_rate": 8.588935924229576e-06, "loss": 0.4354, "step": 10718 }, { "epoch": 0.9619707881806556, "grad_norm": 0.4874788638899038, "learning_rate": 8.588572348273745e-06, "loss": 0.4525, "step": 10719 }, { "epoch": 0.9620605326333266, "grad_norm": 0.5999488842366063, "learning_rate": 8.58820873318167e-06, "loss": 0.529, "step": 10720 }, { "epoch": 0.9621502770859977, "grad_norm": 0.46162520121272077, "learning_rate": 8.587845078957314e-06, "loss": 0.4024, "step": 10721 }, { "epoch": 0.9622400215386686, "grad_norm": 0.5311708827085343, "learning_rate": 8.587481385604643e-06, "loss": 0.5435, "step": 10722 }, { "epoch": 0.9623297659913397, "grad_norm": 0.46499584806452526, "learning_rate": 8.587117653127625e-06, "loss": 0.408, "step": 10723 }, { "epoch": 0.9624195104440106, "grad_norm": 0.4596561377669434, "learning_rate": 8.586753881530226e-06, "loss": 0.4314, "step": 10724 }, { "epoch": 0.9625092548966817, "grad_norm": 0.4545896501802518, "learning_rate": 8.586390070816412e-06, "loss": 0.3832, "step": 10725 }, { "epoch": 0.9625989993493527, "grad_norm": 0.48314025985497566, "learning_rate": 8.58602622099015e-06, "loss": 0.3959, "step": 10726 }, { "epoch": 0.9626887438020237, "grad_norm": 0.5076359491312774, "learning_rate": 8.585662332055414e-06, "loss": 0.4544, "step": 10727 }, { "epoch": 0.9627784882546948, "grad_norm": 0.48343237476370854, "learning_rate": 8.585298404016164e-06, "loss": 0.4119, "step": 10728 }, { "epoch": 0.9628682327073658, "grad_norm": 0.46372475289947696, "learning_rate": 8.584934436876375e-06, "loss": 0.4052, "step": 10729 }, { "epoch": 0.9629579771600368, "grad_norm": 0.5077031719608759, "learning_rate": 8.584570430640014e-06, "loss": 0.4649, "step": 10730 }, { "epoch": 0.9630477216127078, "grad_norm": 0.48986834141490765, "learning_rate": 8.58420638531105e-06, "loss": 0.4019, "step": 10731 }, { "epoch": 0.9631374660653789, "grad_norm": 0.5360251523931039, "learning_rate": 8.583842300893458e-06, "loss": 0.4545, "step": 10732 }, { "epoch": 0.9632272105180498, "grad_norm": 0.5172426333119498, "learning_rate": 8.583478177391203e-06, "loss": 0.5112, "step": 10733 }, { "epoch": 0.9633169549707209, "grad_norm": 0.4419266722384475, "learning_rate": 8.583114014808258e-06, "loss": 0.4069, "step": 10734 }, { "epoch": 0.9634066994233919, "grad_norm": 0.4709160920969907, "learning_rate": 8.582749813148594e-06, "loss": 0.4393, "step": 10735 }, { "epoch": 0.9634964438760629, "grad_norm": 0.5021350893402656, "learning_rate": 8.582385572416184e-06, "loss": 0.4742, "step": 10736 }, { "epoch": 0.9635861883287339, "grad_norm": 0.5076095541331607, "learning_rate": 8.582021292615e-06, "loss": 0.4542, "step": 10737 }, { "epoch": 0.963675932781405, "grad_norm": 0.4918059987191285, "learning_rate": 8.581656973749014e-06, "loss": 0.4601, "step": 10738 }, { "epoch": 0.963765677234076, "grad_norm": 0.5045413138818368, "learning_rate": 8.5812926158222e-06, "loss": 0.4322, "step": 10739 }, { "epoch": 0.963855421686747, "grad_norm": 0.5376843369780732, "learning_rate": 8.580928218838532e-06, "loss": 0.4952, "step": 10740 }, { "epoch": 0.963945166139418, "grad_norm": 0.48837775521471855, "learning_rate": 8.580563782801982e-06, "loss": 0.433, "step": 10741 }, { "epoch": 0.964034910592089, "grad_norm": 0.48021471322326925, "learning_rate": 8.580199307716528e-06, "loss": 0.4404, "step": 10742 }, { "epoch": 0.9641246550447601, "grad_norm": 0.48171624749175324, "learning_rate": 8.579834793586142e-06, "loss": 0.4477, "step": 10743 }, { "epoch": 0.964214399497431, "grad_norm": 0.5335812907425571, "learning_rate": 8.5794702404148e-06, "loss": 0.5123, "step": 10744 }, { "epoch": 0.9643041439501021, "grad_norm": 0.5018766048196415, "learning_rate": 8.579105648206479e-06, "loss": 0.4505, "step": 10745 }, { "epoch": 0.9643938884027731, "grad_norm": 0.49964651026564033, "learning_rate": 8.578741016965152e-06, "loss": 0.5133, "step": 10746 }, { "epoch": 0.9644836328554441, "grad_norm": 0.5465151041506036, "learning_rate": 8.578376346694799e-06, "loss": 0.4837, "step": 10747 }, { "epoch": 0.9645733773081151, "grad_norm": 0.4575664538838734, "learning_rate": 8.578011637399394e-06, "loss": 0.3982, "step": 10748 }, { "epoch": 0.9646631217607862, "grad_norm": 0.5053027982754899, "learning_rate": 8.577646889082919e-06, "loss": 0.4429, "step": 10749 }, { "epoch": 0.9647528662134571, "grad_norm": 0.521253123829038, "learning_rate": 8.577282101749347e-06, "loss": 0.465, "step": 10750 }, { "epoch": 0.9648426106661282, "grad_norm": 0.46571515645845957, "learning_rate": 8.576917275402658e-06, "loss": 0.4271, "step": 10751 }, { "epoch": 0.9649323551187993, "grad_norm": 0.4506919637152197, "learning_rate": 8.576552410046831e-06, "loss": 0.3829, "step": 10752 }, { "epoch": 0.9650220995714702, "grad_norm": 0.49254701570036147, "learning_rate": 8.576187505685846e-06, "loss": 0.4446, "step": 10753 }, { "epoch": 0.9651118440241413, "grad_norm": 0.5227130398186789, "learning_rate": 8.575822562323681e-06, "loss": 0.4265, "step": 10754 }, { "epoch": 0.9652015884768123, "grad_norm": 0.4927265149703411, "learning_rate": 8.575457579964316e-06, "loss": 0.4329, "step": 10755 }, { "epoch": 0.9652913329294833, "grad_norm": 0.4901748013825086, "learning_rate": 8.575092558611735e-06, "loss": 0.4133, "step": 10756 }, { "epoch": 0.9653810773821543, "grad_norm": 0.45811781432537024, "learning_rate": 8.574727498269915e-06, "loss": 0.3897, "step": 10757 }, { "epoch": 0.9654708218348254, "grad_norm": 0.46957296152635, "learning_rate": 8.574362398942836e-06, "loss": 0.452, "step": 10758 }, { "epoch": 0.9655605662874963, "grad_norm": 0.5188837146311164, "learning_rate": 8.573997260634484e-06, "loss": 0.5144, "step": 10759 }, { "epoch": 0.9656503107401674, "grad_norm": 0.48250181603534936, "learning_rate": 8.573632083348838e-06, "loss": 0.4251, "step": 10760 }, { "epoch": 0.9657400551928383, "grad_norm": 0.5053418720217804, "learning_rate": 8.573266867089883e-06, "loss": 0.4681, "step": 10761 }, { "epoch": 0.9658297996455094, "grad_norm": 0.5213696171406036, "learning_rate": 8.5729016118616e-06, "loss": 0.508, "step": 10762 }, { "epoch": 0.9659195440981805, "grad_norm": 0.4960562663370081, "learning_rate": 8.572536317667972e-06, "loss": 0.4012, "step": 10763 }, { "epoch": 0.9660092885508514, "grad_norm": 0.44472727787519556, "learning_rate": 8.572170984512985e-06, "loss": 0.4084, "step": 10764 }, { "epoch": 0.9660990330035225, "grad_norm": 0.45455719852029075, "learning_rate": 8.571805612400621e-06, "loss": 0.4407, "step": 10765 }, { "epoch": 0.9661887774561935, "grad_norm": 0.4724136579885762, "learning_rate": 8.571440201334866e-06, "loss": 0.4007, "step": 10766 }, { "epoch": 0.9662785219088645, "grad_norm": 0.45882628635537803, "learning_rate": 8.571074751319705e-06, "loss": 0.4015, "step": 10767 }, { "epoch": 0.9663682663615355, "grad_norm": 0.5146742074525407, "learning_rate": 8.570709262359125e-06, "loss": 0.4314, "step": 10768 }, { "epoch": 0.9664580108142066, "grad_norm": 0.4914499499004345, "learning_rate": 8.570343734457108e-06, "loss": 0.4865, "step": 10769 }, { "epoch": 0.9665477552668775, "grad_norm": 0.4851380136538369, "learning_rate": 8.569978167617645e-06, "loss": 0.4801, "step": 10770 }, { "epoch": 0.9666374997195486, "grad_norm": 0.4883831375984564, "learning_rate": 8.569612561844717e-06, "loss": 0.4323, "step": 10771 }, { "epoch": 0.9667272441722196, "grad_norm": 0.5326836558162359, "learning_rate": 8.569246917142318e-06, "loss": 0.4925, "step": 10772 }, { "epoch": 0.9668169886248906, "grad_norm": 0.5451847157658387, "learning_rate": 8.56888123351443e-06, "loss": 0.4915, "step": 10773 }, { "epoch": 0.9669067330775617, "grad_norm": 0.526556249761758, "learning_rate": 8.568515510965045e-06, "loss": 0.4584, "step": 10774 }, { "epoch": 0.9669964775302327, "grad_norm": 0.5012136916960926, "learning_rate": 8.568149749498151e-06, "loss": 0.4243, "step": 10775 }, { "epoch": 0.9670862219829037, "grad_norm": 0.5035150516571816, "learning_rate": 8.567783949117734e-06, "loss": 0.4576, "step": 10776 }, { "epoch": 0.9671759664355747, "grad_norm": 0.46215343422957267, "learning_rate": 8.567418109827786e-06, "loss": 0.4283, "step": 10777 }, { "epoch": 0.9672657108882458, "grad_norm": 0.49353305217786236, "learning_rate": 8.567052231632296e-06, "loss": 0.4519, "step": 10778 }, { "epoch": 0.9673554553409167, "grad_norm": 0.48214912332425924, "learning_rate": 8.566686314535253e-06, "loss": 0.4317, "step": 10779 }, { "epoch": 0.9674451997935878, "grad_norm": 0.481870797575497, "learning_rate": 8.56632035854065e-06, "loss": 0.4393, "step": 10780 }, { "epoch": 0.9675349442462587, "grad_norm": 0.4973205701202547, "learning_rate": 8.565954363652476e-06, "loss": 0.4326, "step": 10781 }, { "epoch": 0.9676246886989298, "grad_norm": 0.48294558934182286, "learning_rate": 8.565588329874723e-06, "loss": 0.4411, "step": 10782 }, { "epoch": 0.9677144331516008, "grad_norm": 0.48077402944603714, "learning_rate": 8.565222257211386e-06, "loss": 0.4416, "step": 10783 }, { "epoch": 0.9678041776042718, "grad_norm": 0.46561269944302086, "learning_rate": 8.564856145666451e-06, "loss": 0.3692, "step": 10784 }, { "epoch": 0.9678939220569428, "grad_norm": 0.5339290661471656, "learning_rate": 8.564489995243915e-06, "loss": 0.4639, "step": 10785 }, { "epoch": 0.9679836665096139, "grad_norm": 0.47297466050520626, "learning_rate": 8.564123805947771e-06, "loss": 0.4092, "step": 10786 }, { "epoch": 0.9680734109622849, "grad_norm": 0.4759442813499262, "learning_rate": 8.563757577782012e-06, "loss": 0.4503, "step": 10787 }, { "epoch": 0.9681631554149559, "grad_norm": 0.46725291512690553, "learning_rate": 8.563391310750632e-06, "loss": 0.3955, "step": 10788 }, { "epoch": 0.968252899867627, "grad_norm": 0.4834470950767397, "learning_rate": 8.563025004857626e-06, "loss": 0.4492, "step": 10789 }, { "epoch": 0.9683426443202979, "grad_norm": 0.5052570152792517, "learning_rate": 8.562658660106988e-06, "loss": 0.4434, "step": 10790 }, { "epoch": 0.968432388772969, "grad_norm": 0.4886880557276392, "learning_rate": 8.562292276502712e-06, "loss": 0.4653, "step": 10791 }, { "epoch": 0.96852213322564, "grad_norm": 0.45855469128569476, "learning_rate": 8.561925854048798e-06, "loss": 0.4197, "step": 10792 }, { "epoch": 0.968611877678311, "grad_norm": 0.4570529553434199, "learning_rate": 8.561559392749237e-06, "loss": 0.4242, "step": 10793 }, { "epoch": 0.968701622130982, "grad_norm": 0.5267982128602507, "learning_rate": 8.56119289260803e-06, "loss": 0.475, "step": 10794 }, { "epoch": 0.9687913665836531, "grad_norm": 0.47995545168529236, "learning_rate": 8.56082635362917e-06, "loss": 0.4339, "step": 10795 }, { "epoch": 0.968881111036324, "grad_norm": 0.5015144838932097, "learning_rate": 8.560459775816657e-06, "loss": 0.4652, "step": 10796 }, { "epoch": 0.9689708554889951, "grad_norm": 0.47991526623919084, "learning_rate": 8.560093159174488e-06, "loss": 0.4443, "step": 10797 }, { "epoch": 0.9690605999416662, "grad_norm": 0.5405507051399709, "learning_rate": 8.559726503706663e-06, "loss": 0.5292, "step": 10798 }, { "epoch": 0.9691503443943371, "grad_norm": 0.520436184060082, "learning_rate": 8.559359809417177e-06, "loss": 0.4251, "step": 10799 }, { "epoch": 0.9692400888470082, "grad_norm": 0.4789900599395078, "learning_rate": 8.558993076310031e-06, "loss": 0.4779, "step": 10800 }, { "epoch": 0.9693298332996791, "grad_norm": 0.47777361652443384, "learning_rate": 8.558626304389225e-06, "loss": 0.4031, "step": 10801 }, { "epoch": 0.9694195777523502, "grad_norm": 0.4377896749150312, "learning_rate": 8.55825949365876e-06, "loss": 0.3668, "step": 10802 }, { "epoch": 0.9695093222050212, "grad_norm": 0.5327383576601048, "learning_rate": 8.557892644122635e-06, "loss": 0.452, "step": 10803 }, { "epoch": 0.9695990666576922, "grad_norm": 0.49211942376893697, "learning_rate": 8.55752575578485e-06, "loss": 0.4788, "step": 10804 }, { "epoch": 0.9696888111103632, "grad_norm": 0.5115299279584712, "learning_rate": 8.557158828649408e-06, "loss": 0.484, "step": 10805 }, { "epoch": 0.9697785555630343, "grad_norm": 0.5183091131114447, "learning_rate": 8.556791862720309e-06, "loss": 0.487, "step": 10806 }, { "epoch": 0.9698683000157052, "grad_norm": 0.5458119863673296, "learning_rate": 8.556424858001555e-06, "loss": 0.5436, "step": 10807 }, { "epoch": 0.9699580444683763, "grad_norm": 0.5084930011272483, "learning_rate": 8.55605781449715e-06, "loss": 0.4634, "step": 10808 }, { "epoch": 0.9700477889210474, "grad_norm": 0.4627950035841991, "learning_rate": 8.555690732211095e-06, "loss": 0.4071, "step": 10809 }, { "epoch": 0.9701375333737183, "grad_norm": 0.48480097239140674, "learning_rate": 8.555323611147394e-06, "loss": 0.4269, "step": 10810 }, { "epoch": 0.9702272778263894, "grad_norm": 0.49077427820333125, "learning_rate": 8.554956451310052e-06, "loss": 0.4324, "step": 10811 }, { "epoch": 0.9703170222790604, "grad_norm": 0.46603861773961597, "learning_rate": 8.554589252703074e-06, "loss": 0.4446, "step": 10812 }, { "epoch": 0.9704067667317314, "grad_norm": 0.4192925325515038, "learning_rate": 8.55422201533046e-06, "loss": 0.3572, "step": 10813 }, { "epoch": 0.9704965111844024, "grad_norm": 0.5016966484750852, "learning_rate": 8.55385473919622e-06, "loss": 0.4402, "step": 10814 }, { "epoch": 0.9705862556370735, "grad_norm": 0.533797611001389, "learning_rate": 8.553487424304356e-06, "loss": 0.4721, "step": 10815 }, { "epoch": 0.9706760000897444, "grad_norm": 0.4778634331133714, "learning_rate": 8.553120070658876e-06, "loss": 0.4109, "step": 10816 }, { "epoch": 0.9707657445424155, "grad_norm": 0.5065577982695195, "learning_rate": 8.552752678263785e-06, "loss": 0.4792, "step": 10817 }, { "epoch": 0.9708554889950864, "grad_norm": 0.4604713010910702, "learning_rate": 8.552385247123093e-06, "loss": 0.3954, "step": 10818 }, { "epoch": 0.9709452334477575, "grad_norm": 0.500525202025374, "learning_rate": 8.5520177772408e-06, "loss": 0.4212, "step": 10819 }, { "epoch": 0.9710349779004285, "grad_norm": 0.5171455250477783, "learning_rate": 8.551650268620919e-06, "loss": 0.4772, "step": 10820 }, { "epoch": 0.9711247223530995, "grad_norm": 0.5249085458933949, "learning_rate": 8.551282721267458e-06, "loss": 0.5426, "step": 10821 }, { "epoch": 0.9712144668057706, "grad_norm": 0.5064210149748422, "learning_rate": 8.550915135184423e-06, "loss": 0.4535, "step": 10822 }, { "epoch": 0.9713042112584416, "grad_norm": 0.4955124321154069, "learning_rate": 8.550547510375825e-06, "loss": 0.4591, "step": 10823 }, { "epoch": 0.9713939557111126, "grad_norm": 0.48652079000689086, "learning_rate": 8.550179846845672e-06, "loss": 0.4559, "step": 10824 }, { "epoch": 0.9714837001637836, "grad_norm": 0.4516188167922147, "learning_rate": 8.549812144597975e-06, "loss": 0.4024, "step": 10825 }, { "epoch": 0.9715734446164547, "grad_norm": 0.4403754926940246, "learning_rate": 8.549444403636743e-06, "loss": 0.3578, "step": 10826 }, { "epoch": 0.9716631890691256, "grad_norm": 0.48163771852024906, "learning_rate": 8.549076623965986e-06, "loss": 0.4378, "step": 10827 }, { "epoch": 0.9717529335217967, "grad_norm": 0.4671316848816096, "learning_rate": 8.548708805589714e-06, "loss": 0.4125, "step": 10828 }, { "epoch": 0.9718426779744677, "grad_norm": 0.4997139903680916, "learning_rate": 8.548340948511941e-06, "loss": 0.4722, "step": 10829 }, { "epoch": 0.9719324224271387, "grad_norm": 0.5062853756064959, "learning_rate": 8.54797305273668e-06, "loss": 0.4754, "step": 10830 }, { "epoch": 0.9720221668798097, "grad_norm": 0.4768729981372043, "learning_rate": 8.547605118267939e-06, "loss": 0.4138, "step": 10831 }, { "epoch": 0.9721119113324808, "grad_norm": 0.5832493148802271, "learning_rate": 8.547237145109732e-06, "loss": 0.5467, "step": 10832 }, { "epoch": 0.9722016557851518, "grad_norm": 0.5179271341603431, "learning_rate": 8.546869133266072e-06, "loss": 0.4673, "step": 10833 }, { "epoch": 0.9722914002378228, "grad_norm": 0.53080998454254, "learning_rate": 8.546501082740975e-06, "loss": 0.4904, "step": 10834 }, { "epoch": 0.9723811446904939, "grad_norm": 0.46770761829109136, "learning_rate": 8.54613299353845e-06, "loss": 0.4003, "step": 10835 }, { "epoch": 0.9724708891431648, "grad_norm": 0.47205883847105373, "learning_rate": 8.545764865662516e-06, "loss": 0.4789, "step": 10836 }, { "epoch": 0.9725606335958359, "grad_norm": 0.5003779753294361, "learning_rate": 8.545396699117185e-06, "loss": 0.4531, "step": 10837 }, { "epoch": 0.9726503780485068, "grad_norm": 0.4679578572038337, "learning_rate": 8.545028493906474e-06, "loss": 0.3955, "step": 10838 }, { "epoch": 0.9727401225011779, "grad_norm": 0.49871792413239013, "learning_rate": 8.544660250034397e-06, "loss": 0.4104, "step": 10839 }, { "epoch": 0.9728298669538489, "grad_norm": 0.4826960504075382, "learning_rate": 8.544291967504972e-06, "loss": 0.3976, "step": 10840 }, { "epoch": 0.97291961140652, "grad_norm": 0.4928571894560204, "learning_rate": 8.543923646322211e-06, "loss": 0.4544, "step": 10841 }, { "epoch": 0.9730093558591909, "grad_norm": 0.44912262641671213, "learning_rate": 8.543555286490137e-06, "loss": 0.4174, "step": 10842 }, { "epoch": 0.973099100311862, "grad_norm": 0.4736356134935839, "learning_rate": 8.543186888012762e-06, "loss": 0.3942, "step": 10843 }, { "epoch": 0.973188844764533, "grad_norm": 0.45983093884833226, "learning_rate": 8.542818450894106e-06, "loss": 0.4129, "step": 10844 }, { "epoch": 0.973278589217204, "grad_norm": 0.521624920267021, "learning_rate": 8.542449975138189e-06, "loss": 0.4708, "step": 10845 }, { "epoch": 0.9733683336698751, "grad_norm": 0.47956600002247407, "learning_rate": 8.542081460749025e-06, "loss": 0.465, "step": 10846 }, { "epoch": 0.973458078122546, "grad_norm": 0.44454574106614975, "learning_rate": 8.541712907730636e-06, "loss": 0.3766, "step": 10847 }, { "epoch": 0.9735478225752171, "grad_norm": 0.49117423416462663, "learning_rate": 8.541344316087042e-06, "loss": 0.417, "step": 10848 }, { "epoch": 0.9736375670278881, "grad_norm": 0.4663257023536348, "learning_rate": 8.540975685822259e-06, "loss": 0.4405, "step": 10849 }, { "epoch": 0.9737273114805591, "grad_norm": 0.5014196371987815, "learning_rate": 8.540607016940312e-06, "loss": 0.4646, "step": 10850 }, { "epoch": 0.9738170559332301, "grad_norm": 0.4678529685156654, "learning_rate": 8.540238309445219e-06, "loss": 0.389, "step": 10851 }, { "epoch": 0.9739068003859012, "grad_norm": 0.4666478961643906, "learning_rate": 8.539869563341001e-06, "loss": 0.4391, "step": 10852 }, { "epoch": 0.9739965448385721, "grad_norm": 0.47713419689833525, "learning_rate": 8.53950077863168e-06, "loss": 0.4189, "step": 10853 }, { "epoch": 0.9740862892912432, "grad_norm": 0.5126226296618578, "learning_rate": 8.539131955321277e-06, "loss": 0.4563, "step": 10854 }, { "epoch": 0.9741760337439141, "grad_norm": 0.4506602016070146, "learning_rate": 8.538763093413815e-06, "loss": 0.3654, "step": 10855 }, { "epoch": 0.9742657781965852, "grad_norm": 0.5085780209221603, "learning_rate": 8.538394192913317e-06, "loss": 0.4533, "step": 10856 }, { "epoch": 0.9743555226492563, "grad_norm": 0.5204873498455665, "learning_rate": 8.538025253823808e-06, "loss": 0.4629, "step": 10857 }, { "epoch": 0.9744452671019272, "grad_norm": 0.5169588001086468, "learning_rate": 8.537656276149306e-06, "loss": 0.4533, "step": 10858 }, { "epoch": 0.9745350115545983, "grad_norm": 0.4971641947986396, "learning_rate": 8.53728725989384e-06, "loss": 0.4384, "step": 10859 }, { "epoch": 0.9746247560072693, "grad_norm": 0.4548486554785486, "learning_rate": 8.536918205061436e-06, "loss": 0.4295, "step": 10860 }, { "epoch": 0.9747145004599403, "grad_norm": 0.47679259987856565, "learning_rate": 8.536549111656111e-06, "loss": 0.4212, "step": 10861 }, { "epoch": 0.9748042449126113, "grad_norm": 0.47473818232916776, "learning_rate": 8.536179979681897e-06, "loss": 0.4342, "step": 10862 }, { "epoch": 0.9748939893652824, "grad_norm": 0.5430923503651143, "learning_rate": 8.53581080914282e-06, "loss": 0.4666, "step": 10863 }, { "epoch": 0.9749837338179533, "grad_norm": 0.4958301341882358, "learning_rate": 8.5354416000429e-06, "loss": 0.4405, "step": 10864 }, { "epoch": 0.9750734782706244, "grad_norm": 0.5052090702996396, "learning_rate": 8.53507235238617e-06, "loss": 0.4716, "step": 10865 }, { "epoch": 0.9751632227232954, "grad_norm": 0.466196008022977, "learning_rate": 8.534703066176653e-06, "loss": 0.4654, "step": 10866 }, { "epoch": 0.9752529671759664, "grad_norm": 0.4241354174568832, "learning_rate": 8.534333741418377e-06, "loss": 0.3619, "step": 10867 }, { "epoch": 0.9753427116286375, "grad_norm": 0.49670405111198884, "learning_rate": 8.533964378115372e-06, "loss": 0.4868, "step": 10868 }, { "epoch": 0.9754324560813085, "grad_norm": 0.4665918906701591, "learning_rate": 8.533594976271663e-06, "loss": 0.4272, "step": 10869 }, { "epoch": 0.9755222005339795, "grad_norm": 0.45279084309153766, "learning_rate": 8.533225535891283e-06, "loss": 0.3679, "step": 10870 }, { "epoch": 0.9756119449866505, "grad_norm": 0.46563775233020016, "learning_rate": 8.532856056978255e-06, "loss": 0.4691, "step": 10871 }, { "epoch": 0.9757016894393216, "grad_norm": 0.49057373026101947, "learning_rate": 8.532486539536613e-06, "loss": 0.4644, "step": 10872 }, { "epoch": 0.9757914338919925, "grad_norm": 0.4647117406573437, "learning_rate": 8.532116983570387e-06, "loss": 0.3976, "step": 10873 }, { "epoch": 0.9758811783446636, "grad_norm": 0.4798419526585308, "learning_rate": 8.531747389083604e-06, "loss": 0.4267, "step": 10874 }, { "epoch": 0.9759709227973346, "grad_norm": 0.4954848361438981, "learning_rate": 8.531377756080297e-06, "loss": 0.4515, "step": 10875 }, { "epoch": 0.9760606672500056, "grad_norm": 0.48335332712545676, "learning_rate": 8.531008084564499e-06, "loss": 0.4284, "step": 10876 }, { "epoch": 0.9761504117026766, "grad_norm": 0.4431445114863246, "learning_rate": 8.530638374540237e-06, "loss": 0.396, "step": 10877 }, { "epoch": 0.9762401561553476, "grad_norm": 0.48542875872117514, "learning_rate": 8.530268626011546e-06, "loss": 0.4675, "step": 10878 }, { "epoch": 0.9763299006080187, "grad_norm": 0.5096380751533551, "learning_rate": 8.529898838982457e-06, "loss": 0.458, "step": 10879 }, { "epoch": 0.9764196450606897, "grad_norm": 0.5124359766248459, "learning_rate": 8.529529013457003e-06, "loss": 0.4778, "step": 10880 }, { "epoch": 0.9765093895133607, "grad_norm": 0.47223894663784344, "learning_rate": 8.529159149439222e-06, "loss": 0.3986, "step": 10881 }, { "epoch": 0.9765991339660317, "grad_norm": 0.5103561979011356, "learning_rate": 8.528789246933139e-06, "loss": 0.4703, "step": 10882 }, { "epoch": 0.9766888784187028, "grad_norm": 0.4804942313822856, "learning_rate": 8.528419305942795e-06, "loss": 0.4178, "step": 10883 }, { "epoch": 0.9767786228713737, "grad_norm": 0.46728400622843647, "learning_rate": 8.528049326472222e-06, "loss": 0.428, "step": 10884 }, { "epoch": 0.9768683673240448, "grad_norm": 0.5298316918450775, "learning_rate": 8.527679308525451e-06, "loss": 0.4725, "step": 10885 }, { "epoch": 0.9769581117767158, "grad_norm": 0.4925255526244638, "learning_rate": 8.527309252106526e-06, "loss": 0.435, "step": 10886 }, { "epoch": 0.9770478562293868, "grad_norm": 0.5282267466680204, "learning_rate": 8.526939157219476e-06, "loss": 0.4463, "step": 10887 }, { "epoch": 0.9771376006820578, "grad_norm": 0.5503038645210493, "learning_rate": 8.52656902386834e-06, "loss": 0.5425, "step": 10888 }, { "epoch": 0.9772273451347289, "grad_norm": 0.4568684022021795, "learning_rate": 8.526198852057153e-06, "loss": 0.4052, "step": 10889 }, { "epoch": 0.9773170895873999, "grad_norm": 0.5040996828022452, "learning_rate": 8.525828641789955e-06, "loss": 0.4839, "step": 10890 }, { "epoch": 0.9774068340400709, "grad_norm": 0.5458303676172997, "learning_rate": 8.525458393070779e-06, "loss": 0.5285, "step": 10891 }, { "epoch": 0.977496578492742, "grad_norm": 0.44652798363490986, "learning_rate": 8.525088105903665e-06, "loss": 0.4041, "step": 10892 }, { "epoch": 0.9775863229454129, "grad_norm": 0.4784372385902932, "learning_rate": 8.524717780292653e-06, "loss": 0.456, "step": 10893 }, { "epoch": 0.977676067398084, "grad_norm": 0.5155612769325308, "learning_rate": 8.524347416241781e-06, "loss": 0.4803, "step": 10894 }, { "epoch": 0.977765811850755, "grad_norm": 0.4363588556403219, "learning_rate": 8.523977013755085e-06, "loss": 0.3903, "step": 10895 }, { "epoch": 0.977855556303426, "grad_norm": 0.5504284988283816, "learning_rate": 8.523606572836607e-06, "loss": 0.4525, "step": 10896 }, { "epoch": 0.977945300756097, "grad_norm": 0.48284893273967056, "learning_rate": 8.523236093490388e-06, "loss": 0.4684, "step": 10897 }, { "epoch": 0.978035045208768, "grad_norm": 0.4841630436457596, "learning_rate": 8.522865575720466e-06, "loss": 0.4052, "step": 10898 }, { "epoch": 0.978124789661439, "grad_norm": 0.568120570480857, "learning_rate": 8.522495019530884e-06, "loss": 0.5475, "step": 10899 }, { "epoch": 0.9782145341141101, "grad_norm": 0.5551035345901237, "learning_rate": 8.522124424925682e-06, "loss": 0.4634, "step": 10900 }, { "epoch": 0.978304278566781, "grad_norm": 0.4599603750582005, "learning_rate": 8.5217537919089e-06, "loss": 0.4184, "step": 10901 }, { "epoch": 0.9783940230194521, "grad_norm": 0.4746207096647881, "learning_rate": 8.521383120484583e-06, "loss": 0.4368, "step": 10902 }, { "epoch": 0.9784837674721232, "grad_norm": 0.47235308889367467, "learning_rate": 8.521012410656773e-06, "loss": 0.4155, "step": 10903 }, { "epoch": 0.9785735119247941, "grad_norm": 0.4886800313162687, "learning_rate": 8.520641662429513e-06, "loss": 0.4516, "step": 10904 }, { "epoch": 0.9786632563774652, "grad_norm": 0.4865752205014999, "learning_rate": 8.520270875806843e-06, "loss": 0.4245, "step": 10905 }, { "epoch": 0.9787530008301362, "grad_norm": 0.49621786243694516, "learning_rate": 8.51990005079281e-06, "loss": 0.4227, "step": 10906 }, { "epoch": 0.9788427452828072, "grad_norm": 0.47236240130529783, "learning_rate": 8.519529187391458e-06, "loss": 0.4193, "step": 10907 }, { "epoch": 0.9789324897354782, "grad_norm": 0.5055573886243874, "learning_rate": 8.51915828560683e-06, "loss": 0.4713, "step": 10908 }, { "epoch": 0.9790222341881493, "grad_norm": 0.42554879233690646, "learning_rate": 8.518787345442972e-06, "loss": 0.3781, "step": 10909 }, { "epoch": 0.9791119786408202, "grad_norm": 0.4931430730155257, "learning_rate": 8.51841636690393e-06, "loss": 0.4735, "step": 10910 }, { "epoch": 0.9792017230934913, "grad_norm": 0.47194390466781067, "learning_rate": 8.518045349993748e-06, "loss": 0.4401, "step": 10911 }, { "epoch": 0.9792914675461623, "grad_norm": 0.436515234908101, "learning_rate": 8.517674294716475e-06, "loss": 0.3616, "step": 10912 }, { "epoch": 0.9793812119988333, "grad_norm": 0.470786571245776, "learning_rate": 8.517303201076154e-06, "loss": 0.4665, "step": 10913 }, { "epoch": 0.9794709564515044, "grad_norm": 0.5162834090769389, "learning_rate": 8.516932069076835e-06, "loss": 0.4315, "step": 10914 }, { "epoch": 0.9795607009041754, "grad_norm": 0.4655044018003179, "learning_rate": 8.516560898722565e-06, "loss": 0.3872, "step": 10915 }, { "epoch": 0.9796504453568464, "grad_norm": 0.4848211490685983, "learning_rate": 8.51618969001739e-06, "loss": 0.4341, "step": 10916 }, { "epoch": 0.9797401898095174, "grad_norm": 0.4373962125916978, "learning_rate": 8.515818442965363e-06, "loss": 0.4124, "step": 10917 }, { "epoch": 0.9798299342621885, "grad_norm": 0.48879123060133534, "learning_rate": 8.515447157570527e-06, "loss": 0.4138, "step": 10918 }, { "epoch": 0.9799196787148594, "grad_norm": 0.5419614793480163, "learning_rate": 8.515075833836934e-06, "loss": 0.5036, "step": 10919 }, { "epoch": 0.9800094231675305, "grad_norm": 0.4947730263290986, "learning_rate": 8.514704471768633e-06, "loss": 0.3985, "step": 10920 }, { "epoch": 0.9800991676202014, "grad_norm": 0.5139348902301669, "learning_rate": 8.514333071369676e-06, "loss": 0.4708, "step": 10921 }, { "epoch": 0.9801889120728725, "grad_norm": 0.488301917733451, "learning_rate": 8.513961632644109e-06, "loss": 0.4357, "step": 10922 }, { "epoch": 0.9802786565255435, "grad_norm": 0.5127725477349725, "learning_rate": 8.513590155595987e-06, "loss": 0.4994, "step": 10923 }, { "epoch": 0.9803684009782145, "grad_norm": 0.4930337513007124, "learning_rate": 8.513218640229361e-06, "loss": 0.4714, "step": 10924 }, { "epoch": 0.9804581454308856, "grad_norm": 0.4757716642552149, "learning_rate": 8.51284708654828e-06, "loss": 0.3708, "step": 10925 }, { "epoch": 0.9805478898835566, "grad_norm": 0.5226097517690452, "learning_rate": 8.512475494556798e-06, "loss": 0.5299, "step": 10926 }, { "epoch": 0.9806376343362276, "grad_norm": 0.5063623148698209, "learning_rate": 8.512103864258964e-06, "loss": 0.4689, "step": 10927 }, { "epoch": 0.9807273787888986, "grad_norm": 0.47959217921720626, "learning_rate": 8.511732195658838e-06, "loss": 0.4431, "step": 10928 }, { "epoch": 0.9808171232415697, "grad_norm": 0.4572201955219485, "learning_rate": 8.511360488760467e-06, "loss": 0.43, "step": 10929 }, { "epoch": 0.9809068676942406, "grad_norm": 0.520827364503059, "learning_rate": 8.51098874356791e-06, "loss": 0.5198, "step": 10930 }, { "epoch": 0.9809966121469117, "grad_norm": 0.4774689369677449, "learning_rate": 8.510616960085215e-06, "loss": 0.423, "step": 10931 }, { "epoch": 0.9810863565995827, "grad_norm": 0.46282895880811614, "learning_rate": 8.510245138316442e-06, "loss": 0.4206, "step": 10932 }, { "epoch": 0.9811761010522537, "grad_norm": 0.44033818502298466, "learning_rate": 8.509873278265642e-06, "loss": 0.3498, "step": 10933 }, { "epoch": 0.9812658455049247, "grad_norm": 0.4981863883099723, "learning_rate": 8.509501379936872e-06, "loss": 0.4206, "step": 10934 }, { "epoch": 0.9813555899575958, "grad_norm": 0.5377526759228755, "learning_rate": 8.50912944333419e-06, "loss": 0.4992, "step": 10935 }, { "epoch": 0.9814453344102667, "grad_norm": 0.45458997355877406, "learning_rate": 8.50875746846165e-06, "loss": 0.4343, "step": 10936 }, { "epoch": 0.9815350788629378, "grad_norm": 0.5153480099008912, "learning_rate": 8.50838545532331e-06, "loss": 0.4712, "step": 10937 }, { "epoch": 0.9816248233156089, "grad_norm": 0.4718216204855763, "learning_rate": 8.508013403923225e-06, "loss": 0.4097, "step": 10938 }, { "epoch": 0.9817145677682798, "grad_norm": 0.48203374279155686, "learning_rate": 8.507641314265453e-06, "loss": 0.4941, "step": 10939 }, { "epoch": 0.9818043122209509, "grad_norm": 0.4619327834302876, "learning_rate": 8.507269186354054e-06, "loss": 0.4473, "step": 10940 }, { "epoch": 0.9818940566736218, "grad_norm": 0.4790047289102534, "learning_rate": 8.506897020193084e-06, "loss": 0.4426, "step": 10941 }, { "epoch": 0.9819838011262929, "grad_norm": 0.46159276489013484, "learning_rate": 8.506524815786603e-06, "loss": 0.4055, "step": 10942 }, { "epoch": 0.9820735455789639, "grad_norm": 0.531397043150722, "learning_rate": 8.506152573138671e-06, "loss": 0.4915, "step": 10943 }, { "epoch": 0.9821632900316349, "grad_norm": 0.4704892697089101, "learning_rate": 8.505780292253344e-06, "loss": 0.4564, "step": 10944 }, { "epoch": 0.9822530344843059, "grad_norm": 0.5168731091754409, "learning_rate": 8.505407973134687e-06, "loss": 0.5014, "step": 10945 }, { "epoch": 0.982342778936977, "grad_norm": 0.4905374175680077, "learning_rate": 8.505035615786759e-06, "loss": 0.4613, "step": 10946 }, { "epoch": 0.9824325233896479, "grad_norm": 0.47553801413026486, "learning_rate": 8.504663220213617e-06, "loss": 0.4495, "step": 10947 }, { "epoch": 0.982522267842319, "grad_norm": 0.5312566114723598, "learning_rate": 8.504290786419327e-06, "loss": 0.4817, "step": 10948 }, { "epoch": 0.9826120122949901, "grad_norm": 0.5267213665713979, "learning_rate": 8.503918314407948e-06, "loss": 0.4799, "step": 10949 }, { "epoch": 0.982701756747661, "grad_norm": 0.5011560694223587, "learning_rate": 8.503545804183543e-06, "loss": 0.4565, "step": 10950 }, { "epoch": 0.9827915012003321, "grad_norm": 0.4684188564954735, "learning_rate": 8.503173255750175e-06, "loss": 0.3976, "step": 10951 }, { "epoch": 0.982881245653003, "grad_norm": 0.49306683214029445, "learning_rate": 8.502800669111904e-06, "loss": 0.4215, "step": 10952 }, { "epoch": 0.9829709901056741, "grad_norm": 0.4767174171339294, "learning_rate": 8.5024280442728e-06, "loss": 0.4085, "step": 10953 }, { "epoch": 0.9830607345583451, "grad_norm": 0.46099641222955023, "learning_rate": 8.502055381236919e-06, "loss": 0.4132, "step": 10954 }, { "epoch": 0.9831504790110162, "grad_norm": 0.4787767669961269, "learning_rate": 8.501682680008329e-06, "loss": 0.4311, "step": 10955 }, { "epoch": 0.9832402234636871, "grad_norm": 0.44940707945350894, "learning_rate": 8.501309940591096e-06, "loss": 0.4038, "step": 10956 }, { "epoch": 0.9833299679163582, "grad_norm": 0.49009851408371125, "learning_rate": 8.500937162989283e-06, "loss": 0.4454, "step": 10957 }, { "epoch": 0.9834197123690291, "grad_norm": 0.5192631575489302, "learning_rate": 8.500564347206955e-06, "loss": 0.492, "step": 10958 }, { "epoch": 0.9835094568217002, "grad_norm": 0.45900402157808057, "learning_rate": 8.500191493248177e-06, "loss": 0.3864, "step": 10959 }, { "epoch": 0.9835992012743713, "grad_norm": 0.48070796978415337, "learning_rate": 8.49981860111702e-06, "loss": 0.4057, "step": 10960 }, { "epoch": 0.9836889457270422, "grad_norm": 0.4535114306354716, "learning_rate": 8.499445670817546e-06, "loss": 0.3913, "step": 10961 }, { "epoch": 0.9837786901797133, "grad_norm": 0.4435147416288418, "learning_rate": 8.499072702353823e-06, "loss": 0.4255, "step": 10962 }, { "epoch": 0.9838684346323843, "grad_norm": 0.461514434060969, "learning_rate": 8.498699695729921e-06, "loss": 0.4131, "step": 10963 }, { "epoch": 0.9839581790850553, "grad_norm": 0.5429994524768589, "learning_rate": 8.498326650949906e-06, "loss": 0.4884, "step": 10964 }, { "epoch": 0.9840479235377263, "grad_norm": 0.4760587908085237, "learning_rate": 8.497953568017845e-06, "loss": 0.4531, "step": 10965 }, { "epoch": 0.9841376679903974, "grad_norm": 0.5175220033409159, "learning_rate": 8.49758044693781e-06, "loss": 0.4928, "step": 10966 }, { "epoch": 0.9842274124430683, "grad_norm": 0.4755672645951192, "learning_rate": 8.497207287713866e-06, "loss": 0.4307, "step": 10967 }, { "epoch": 0.9843171568957394, "grad_norm": 0.4785430123423249, "learning_rate": 8.496834090350087e-06, "loss": 0.4286, "step": 10968 }, { "epoch": 0.9844069013484104, "grad_norm": 0.4706927972933288, "learning_rate": 8.496460854850541e-06, "loss": 0.4374, "step": 10969 }, { "epoch": 0.9844966458010814, "grad_norm": 0.5063883909109417, "learning_rate": 8.496087581219296e-06, "loss": 0.4465, "step": 10970 }, { "epoch": 0.9845863902537524, "grad_norm": 0.5343475462233792, "learning_rate": 8.495714269460429e-06, "loss": 0.4839, "step": 10971 }, { "epoch": 0.9846761347064235, "grad_norm": 0.4605213439364756, "learning_rate": 8.495340919578006e-06, "loss": 0.4034, "step": 10972 }, { "epoch": 0.9847658791590945, "grad_norm": 0.41654775270042893, "learning_rate": 8.494967531576101e-06, "loss": 0.3711, "step": 10973 }, { "epoch": 0.9848556236117655, "grad_norm": 0.5471409192436099, "learning_rate": 8.494594105458784e-06, "loss": 0.4853, "step": 10974 }, { "epoch": 0.9849453680644366, "grad_norm": 0.4908964476157054, "learning_rate": 8.494220641230128e-06, "loss": 0.4115, "step": 10975 }, { "epoch": 0.9850351125171075, "grad_norm": 0.5202379514264224, "learning_rate": 8.49384713889421e-06, "loss": 0.4571, "step": 10976 }, { "epoch": 0.9851248569697786, "grad_norm": 0.4795518435766929, "learning_rate": 8.493473598455098e-06, "loss": 0.4598, "step": 10977 }, { "epoch": 0.9852146014224495, "grad_norm": 0.45696178189393566, "learning_rate": 8.493100019916867e-06, "loss": 0.4269, "step": 10978 }, { "epoch": 0.9853043458751206, "grad_norm": 0.44885335432519896, "learning_rate": 8.492726403283594e-06, "loss": 0.3785, "step": 10979 }, { "epoch": 0.9853940903277916, "grad_norm": 0.4721988750373435, "learning_rate": 8.49235274855935e-06, "loss": 0.4363, "step": 10980 }, { "epoch": 0.9854838347804626, "grad_norm": 0.5527943515068686, "learning_rate": 8.491979055748214e-06, "loss": 0.5372, "step": 10981 }, { "epoch": 0.9855735792331336, "grad_norm": 0.47503988420626136, "learning_rate": 8.491605324854258e-06, "loss": 0.4368, "step": 10982 }, { "epoch": 0.9856633236858047, "grad_norm": 0.4833742037602822, "learning_rate": 8.491231555881558e-06, "loss": 0.4302, "step": 10983 }, { "epoch": 0.9857530681384757, "grad_norm": 0.487270009415356, "learning_rate": 8.490857748834191e-06, "loss": 0.4313, "step": 10984 }, { "epoch": 0.9858428125911467, "grad_norm": 0.5522461558228567, "learning_rate": 8.490483903716234e-06, "loss": 0.4799, "step": 10985 }, { "epoch": 0.9859325570438178, "grad_norm": 0.48354054049875517, "learning_rate": 8.490110020531766e-06, "loss": 0.4189, "step": 10986 }, { "epoch": 0.9860223014964887, "grad_norm": 0.4998441746768671, "learning_rate": 8.48973609928486e-06, "loss": 0.4626, "step": 10987 }, { "epoch": 0.9861120459491598, "grad_norm": 0.5029496482294861, "learning_rate": 8.489362139979597e-06, "loss": 0.4792, "step": 10988 }, { "epoch": 0.9862017904018308, "grad_norm": 0.45486520015904863, "learning_rate": 8.488988142620055e-06, "loss": 0.3823, "step": 10989 }, { "epoch": 0.9862915348545018, "grad_norm": 0.4639121446510259, "learning_rate": 8.48861410721031e-06, "loss": 0.4401, "step": 10990 }, { "epoch": 0.9863812793071728, "grad_norm": 0.51952402796881, "learning_rate": 8.488240033754446e-06, "loss": 0.5067, "step": 10991 }, { "epoch": 0.9864710237598439, "grad_norm": 0.5245539567332668, "learning_rate": 8.48786592225654e-06, "loss": 0.5024, "step": 10992 }, { "epoch": 0.9865607682125148, "grad_norm": 0.5353377576522509, "learning_rate": 8.487491772720673e-06, "loss": 0.497, "step": 10993 }, { "epoch": 0.9866505126651859, "grad_norm": 0.4921689957027868, "learning_rate": 8.487117585150923e-06, "loss": 0.4344, "step": 10994 }, { "epoch": 0.986740257117857, "grad_norm": 0.4743782432239203, "learning_rate": 8.486743359551373e-06, "loss": 0.3956, "step": 10995 }, { "epoch": 0.9868300015705279, "grad_norm": 0.4992491940829379, "learning_rate": 8.486369095926103e-06, "loss": 0.5024, "step": 10996 }, { "epoch": 0.986919746023199, "grad_norm": 0.5292166216287767, "learning_rate": 8.485994794279195e-06, "loss": 0.5566, "step": 10997 }, { "epoch": 0.9870094904758699, "grad_norm": 0.4748932907516849, "learning_rate": 8.485620454614731e-06, "loss": 0.4256, "step": 10998 }, { "epoch": 0.987099234928541, "grad_norm": 0.49966251330614814, "learning_rate": 8.485246076936794e-06, "loss": 0.4753, "step": 10999 }, { "epoch": 0.987188979381212, "grad_norm": 0.449574186804227, "learning_rate": 8.484871661249467e-06, "loss": 0.3727, "step": 11000 }, { "epoch": 0.987278723833883, "grad_norm": 0.4545899403864548, "learning_rate": 8.484497207556831e-06, "loss": 0.4222, "step": 11001 }, { "epoch": 0.987368468286554, "grad_norm": 0.47583916173546265, "learning_rate": 8.484122715862974e-06, "loss": 0.3938, "step": 11002 }, { "epoch": 0.9874582127392251, "grad_norm": 0.505091221614525, "learning_rate": 8.483748186171978e-06, "loss": 0.4352, "step": 11003 }, { "epoch": 0.987547957191896, "grad_norm": 0.5086723267976901, "learning_rate": 8.483373618487927e-06, "loss": 0.4641, "step": 11004 }, { "epoch": 0.9876377016445671, "grad_norm": 0.5231755065403433, "learning_rate": 8.482999012814906e-06, "loss": 0.5386, "step": 11005 }, { "epoch": 0.9877274460972381, "grad_norm": 0.48621726454402875, "learning_rate": 8.482624369157e-06, "loss": 0.4682, "step": 11006 }, { "epoch": 0.9878171905499091, "grad_norm": 0.49872507428976787, "learning_rate": 8.482249687518294e-06, "loss": 0.4653, "step": 11007 }, { "epoch": 0.9879069350025802, "grad_norm": 0.4848745442920272, "learning_rate": 8.481874967902878e-06, "loss": 0.4318, "step": 11008 }, { "epoch": 0.9879966794552512, "grad_norm": 0.4905354234343817, "learning_rate": 8.481500210314835e-06, "loss": 0.4627, "step": 11009 }, { "epoch": 0.9880864239079222, "grad_norm": 0.4915731906980418, "learning_rate": 8.481125414758254e-06, "loss": 0.4178, "step": 11010 }, { "epoch": 0.9881761683605932, "grad_norm": 0.4830928300272768, "learning_rate": 8.48075058123722e-06, "loss": 0.4596, "step": 11011 }, { "epoch": 0.9882659128132643, "grad_norm": 0.4681416102936163, "learning_rate": 8.480375709755824e-06, "loss": 0.4181, "step": 11012 }, { "epoch": 0.9883556572659352, "grad_norm": 0.4760631521789355, "learning_rate": 8.480000800318154e-06, "loss": 0.4377, "step": 11013 }, { "epoch": 0.9884454017186063, "grad_norm": 0.5279911073909903, "learning_rate": 8.479625852928294e-06, "loss": 0.495, "step": 11014 }, { "epoch": 0.9885351461712772, "grad_norm": 0.5028339904877344, "learning_rate": 8.479250867590339e-06, "loss": 0.5055, "step": 11015 }, { "epoch": 0.9886248906239483, "grad_norm": 0.4633418916461558, "learning_rate": 8.478875844308377e-06, "loss": 0.4412, "step": 11016 }, { "epoch": 0.9887146350766193, "grad_norm": 0.44956369865473655, "learning_rate": 8.478500783086495e-06, "loss": 0.3854, "step": 11017 }, { "epoch": 0.9888043795292903, "grad_norm": 0.4461504200991832, "learning_rate": 8.478125683928785e-06, "loss": 0.379, "step": 11018 }, { "epoch": 0.9888941239819614, "grad_norm": 0.4370047989143097, "learning_rate": 8.477750546839341e-06, "loss": 0.3643, "step": 11019 }, { "epoch": 0.9889838684346324, "grad_norm": 0.5226355820182238, "learning_rate": 8.477375371822249e-06, "loss": 0.469, "step": 11020 }, { "epoch": 0.9890736128873034, "grad_norm": 0.5327751578229498, "learning_rate": 8.477000158881604e-06, "loss": 0.562, "step": 11021 }, { "epoch": 0.9891633573399744, "grad_norm": 0.4955221147354857, "learning_rate": 8.476624908021496e-06, "loss": 0.4382, "step": 11022 }, { "epoch": 0.9892531017926455, "grad_norm": 0.5291828106581296, "learning_rate": 8.476249619246019e-06, "loss": 0.5011, "step": 11023 }, { "epoch": 0.9893428462453164, "grad_norm": 0.4653254297395625, "learning_rate": 8.475874292559264e-06, "loss": 0.4329, "step": 11024 }, { "epoch": 0.9894325906979875, "grad_norm": 0.4910679506322309, "learning_rate": 8.475498927965327e-06, "loss": 0.445, "step": 11025 }, { "epoch": 0.9895223351506585, "grad_norm": 0.4736295352392152, "learning_rate": 8.475123525468301e-06, "loss": 0.4284, "step": 11026 }, { "epoch": 0.9896120796033295, "grad_norm": 0.46367164548740686, "learning_rate": 8.474748085072278e-06, "loss": 0.4208, "step": 11027 }, { "epoch": 0.9897018240560005, "grad_norm": 0.48992197963017897, "learning_rate": 8.474372606781352e-06, "loss": 0.4942, "step": 11028 }, { "epoch": 0.9897915685086716, "grad_norm": 0.5030278064775656, "learning_rate": 8.47399709059962e-06, "loss": 0.4153, "step": 11029 }, { "epoch": 0.9898813129613426, "grad_norm": 0.4934934274794381, "learning_rate": 8.473621536531178e-06, "loss": 0.4538, "step": 11030 }, { "epoch": 0.9899710574140136, "grad_norm": 0.5117456244588765, "learning_rate": 8.47324594458012e-06, "loss": 0.4812, "step": 11031 }, { "epoch": 0.9900608018666847, "grad_norm": 0.4861015073893155, "learning_rate": 8.472870314750541e-06, "loss": 0.4577, "step": 11032 }, { "epoch": 0.9901505463193556, "grad_norm": 0.4724215001291463, "learning_rate": 8.472494647046541e-06, "loss": 0.4087, "step": 11033 }, { "epoch": 0.9902402907720267, "grad_norm": 0.613346157470465, "learning_rate": 8.472118941472214e-06, "loss": 0.5628, "step": 11034 }, { "epoch": 0.9903300352246976, "grad_norm": 0.4987572637909806, "learning_rate": 8.47174319803166e-06, "loss": 0.4345, "step": 11035 }, { "epoch": 0.9904197796773687, "grad_norm": 0.47118918278405564, "learning_rate": 8.471367416728975e-06, "loss": 0.4493, "step": 11036 }, { "epoch": 0.9905095241300397, "grad_norm": 0.4626070174066639, "learning_rate": 8.470991597568256e-06, "loss": 0.4058, "step": 11037 }, { "epoch": 0.9905992685827107, "grad_norm": 0.45756059305568586, "learning_rate": 8.470615740553602e-06, "loss": 0.4004, "step": 11038 }, { "epoch": 0.9906890130353817, "grad_norm": 0.49499167779456427, "learning_rate": 8.470239845689117e-06, "loss": 0.4399, "step": 11039 }, { "epoch": 0.9907787574880528, "grad_norm": 0.547236136403522, "learning_rate": 8.469863912978893e-06, "loss": 0.4858, "step": 11040 }, { "epoch": 0.9908685019407237, "grad_norm": 0.5119856594424397, "learning_rate": 8.469487942427035e-06, "loss": 0.4598, "step": 11041 }, { "epoch": 0.9909582463933948, "grad_norm": 0.487053475235123, "learning_rate": 8.46911193403764e-06, "loss": 0.4585, "step": 11042 }, { "epoch": 0.9910479908460659, "grad_norm": 0.4953619021843554, "learning_rate": 8.468735887814814e-06, "loss": 0.4627, "step": 11043 }, { "epoch": 0.9911377352987368, "grad_norm": 0.48997810474982606, "learning_rate": 8.468359803762652e-06, "loss": 0.4772, "step": 11044 }, { "epoch": 0.9912274797514079, "grad_norm": 0.4429561884735296, "learning_rate": 8.467983681885257e-06, "loss": 0.4133, "step": 11045 }, { "epoch": 0.9913172242040789, "grad_norm": 0.46215554799723124, "learning_rate": 8.467607522186733e-06, "loss": 0.4301, "step": 11046 }, { "epoch": 0.9914069686567499, "grad_norm": 0.4886290084416382, "learning_rate": 8.467231324671182e-06, "loss": 0.471, "step": 11047 }, { "epoch": 0.9914967131094209, "grad_norm": 0.5032466444975153, "learning_rate": 8.466855089342703e-06, "loss": 0.4608, "step": 11048 }, { "epoch": 0.991586457562092, "grad_norm": 0.5350829645868619, "learning_rate": 8.466478816205404e-06, "loss": 0.4391, "step": 11049 }, { "epoch": 0.9916762020147629, "grad_norm": 0.5466035881913514, "learning_rate": 8.466102505263385e-06, "loss": 0.5372, "step": 11050 }, { "epoch": 0.991765946467434, "grad_norm": 0.43984650525283703, "learning_rate": 8.465726156520754e-06, "loss": 0.4069, "step": 11051 }, { "epoch": 0.991855690920105, "grad_norm": 0.47581628891447647, "learning_rate": 8.46534976998161e-06, "loss": 0.4248, "step": 11052 }, { "epoch": 0.991945435372776, "grad_norm": 0.5190868939184398, "learning_rate": 8.464973345650064e-06, "loss": 0.431, "step": 11053 }, { "epoch": 0.9920351798254471, "grad_norm": 0.4768485686349613, "learning_rate": 8.464596883530216e-06, "loss": 0.4061, "step": 11054 }, { "epoch": 0.992124924278118, "grad_norm": 0.801400374877037, "learning_rate": 8.464220383626174e-06, "loss": 0.457, "step": 11055 }, { "epoch": 0.9922146687307891, "grad_norm": 0.505707215358836, "learning_rate": 8.463843845942041e-06, "loss": 0.4404, "step": 11056 }, { "epoch": 0.9923044131834601, "grad_norm": 0.4501718803510306, "learning_rate": 8.463467270481929e-06, "loss": 0.4051, "step": 11057 }, { "epoch": 0.9923941576361311, "grad_norm": 0.5551789092954917, "learning_rate": 8.463090657249941e-06, "loss": 0.4868, "step": 11058 }, { "epoch": 0.9924839020888021, "grad_norm": 0.4578584648309376, "learning_rate": 8.462714006250186e-06, "loss": 0.4267, "step": 11059 }, { "epoch": 0.9925736465414732, "grad_norm": 0.5141035515106958, "learning_rate": 8.462337317486768e-06, "loss": 0.4617, "step": 11060 }, { "epoch": 0.9926633909941441, "grad_norm": 0.44320321370698035, "learning_rate": 8.4619605909638e-06, "loss": 0.3943, "step": 11061 }, { "epoch": 0.9927531354468152, "grad_norm": 0.4860167428187141, "learning_rate": 8.461583826685389e-06, "loss": 0.462, "step": 11062 }, { "epoch": 0.9928428798994862, "grad_norm": 0.49246961248449367, "learning_rate": 8.461207024655642e-06, "loss": 0.4654, "step": 11063 }, { "epoch": 0.9929326243521572, "grad_norm": 0.45575928379823405, "learning_rate": 8.46083018487867e-06, "loss": 0.3815, "step": 11064 }, { "epoch": 0.9930223688048283, "grad_norm": 0.5227629412976836, "learning_rate": 8.460453307358583e-06, "loss": 0.4748, "step": 11065 }, { "epoch": 0.9931121132574993, "grad_norm": 0.4793590385085846, "learning_rate": 8.46007639209949e-06, "loss": 0.4497, "step": 11066 }, { "epoch": 0.9932018577101703, "grad_norm": 0.46846993010642257, "learning_rate": 8.459699439105502e-06, "loss": 0.4609, "step": 11067 }, { "epoch": 0.9932916021628413, "grad_norm": 0.45513135925663273, "learning_rate": 8.45932244838073e-06, "loss": 0.397, "step": 11068 }, { "epoch": 0.9933813466155124, "grad_norm": 0.4784648420378715, "learning_rate": 8.458945419929285e-06, "loss": 0.4315, "step": 11069 }, { "epoch": 0.9934710910681833, "grad_norm": 0.4383789105025782, "learning_rate": 8.45856835375528e-06, "loss": 0.377, "step": 11070 }, { "epoch": 0.9935608355208544, "grad_norm": 0.5240342518699926, "learning_rate": 8.458191249862827e-06, "loss": 0.5304, "step": 11071 }, { "epoch": 0.9936505799735253, "grad_norm": 0.4610098732161279, "learning_rate": 8.457814108256037e-06, "loss": 0.415, "step": 11072 }, { "epoch": 0.9937403244261964, "grad_norm": 0.48880385737500276, "learning_rate": 8.457436928939024e-06, "loss": 0.463, "step": 11073 }, { "epoch": 0.9938300688788674, "grad_norm": 0.5081094300707397, "learning_rate": 8.457059711915902e-06, "loss": 0.4703, "step": 11074 }, { "epoch": 0.9939198133315384, "grad_norm": 0.49707477183806076, "learning_rate": 8.456682457190786e-06, "loss": 0.4544, "step": 11075 }, { "epoch": 0.9940095577842095, "grad_norm": 0.46393069489790084, "learning_rate": 8.456305164767786e-06, "loss": 0.4522, "step": 11076 }, { "epoch": 0.9940993022368805, "grad_norm": 0.46207719948903087, "learning_rate": 8.455927834651022e-06, "loss": 0.3841, "step": 11077 }, { "epoch": 0.9941890466895515, "grad_norm": 0.48127173681559743, "learning_rate": 8.455550466844603e-06, "loss": 0.4774, "step": 11078 }, { "epoch": 0.9942787911422225, "grad_norm": 0.4578155850825805, "learning_rate": 8.45517306135265e-06, "loss": 0.3834, "step": 11079 }, { "epoch": 0.9943685355948936, "grad_norm": 0.5056750115861062, "learning_rate": 8.454795618179277e-06, "loss": 0.488, "step": 11080 }, { "epoch": 0.9944582800475645, "grad_norm": 0.436578769691226, "learning_rate": 8.454418137328599e-06, "loss": 0.3608, "step": 11081 }, { "epoch": 0.9945480245002356, "grad_norm": 0.517219032350827, "learning_rate": 8.454040618804734e-06, "loss": 0.4962, "step": 11082 }, { "epoch": 0.9946377689529066, "grad_norm": 0.49517113650532296, "learning_rate": 8.4536630626118e-06, "loss": 0.4434, "step": 11083 }, { "epoch": 0.9947275134055776, "grad_norm": 0.47235206131281093, "learning_rate": 8.453285468753913e-06, "loss": 0.4115, "step": 11084 }, { "epoch": 0.9948172578582486, "grad_norm": 0.4696972489641992, "learning_rate": 8.452907837235191e-06, "loss": 0.4075, "step": 11085 }, { "epoch": 0.9949070023109197, "grad_norm": 0.48847664085766196, "learning_rate": 8.452530168059754e-06, "loss": 0.4393, "step": 11086 }, { "epoch": 0.9949967467635906, "grad_norm": 0.4820196361970443, "learning_rate": 8.452152461231719e-06, "loss": 0.4276, "step": 11087 }, { "epoch": 0.9950864912162617, "grad_norm": 0.4863209227091356, "learning_rate": 8.451774716755207e-06, "loss": 0.428, "step": 11088 }, { "epoch": 0.9951762356689328, "grad_norm": 0.4787268372260186, "learning_rate": 8.451396934634336e-06, "loss": 0.4272, "step": 11089 }, { "epoch": 0.9952659801216037, "grad_norm": 0.4770471530080152, "learning_rate": 8.451019114873224e-06, "loss": 0.3924, "step": 11090 }, { "epoch": 0.9953557245742748, "grad_norm": 0.5106981733283976, "learning_rate": 8.450641257475997e-06, "loss": 0.4429, "step": 11091 }, { "epoch": 0.9954454690269458, "grad_norm": 0.5561168904944972, "learning_rate": 8.450263362446772e-06, "loss": 0.4928, "step": 11092 }, { "epoch": 0.9955352134796168, "grad_norm": 0.5069497913467328, "learning_rate": 8.44988542978967e-06, "loss": 0.4455, "step": 11093 }, { "epoch": 0.9956249579322878, "grad_norm": 0.44075465069631026, "learning_rate": 8.449507459508815e-06, "loss": 0.3845, "step": 11094 }, { "epoch": 0.9957147023849588, "grad_norm": 0.5410513781760323, "learning_rate": 8.449129451608326e-06, "loss": 0.5643, "step": 11095 }, { "epoch": 0.9958044468376298, "grad_norm": 0.43739201494769464, "learning_rate": 8.448751406092329e-06, "loss": 0.3331, "step": 11096 }, { "epoch": 0.9958941912903009, "grad_norm": 0.47883895881503524, "learning_rate": 8.448373322964945e-06, "loss": 0.4065, "step": 11097 }, { "epoch": 0.9959839357429718, "grad_norm": 0.45810085811602586, "learning_rate": 8.447995202230298e-06, "loss": 0.4294, "step": 11098 }, { "epoch": 0.9960736801956429, "grad_norm": 0.48311078853768086, "learning_rate": 8.447617043892509e-06, "loss": 0.4223, "step": 11099 }, { "epoch": 0.996163424648314, "grad_norm": 0.4983407316267177, "learning_rate": 8.447238847955705e-06, "loss": 0.4258, "step": 11100 }, { "epoch": 0.9962531691009849, "grad_norm": 0.5511927700496434, "learning_rate": 8.44686061442401e-06, "loss": 0.4652, "step": 11101 }, { "epoch": 0.996342913553656, "grad_norm": 0.49466232612397915, "learning_rate": 8.44648234330155e-06, "loss": 0.4419, "step": 11102 }, { "epoch": 0.996432658006327, "grad_norm": 0.4912740393169461, "learning_rate": 8.446104034592446e-06, "loss": 0.4323, "step": 11103 }, { "epoch": 0.996522402458998, "grad_norm": 0.45867700678826945, "learning_rate": 8.445725688300827e-06, "loss": 0.4121, "step": 11104 }, { "epoch": 0.996612146911669, "grad_norm": 0.5196658709318224, "learning_rate": 8.445347304430823e-06, "loss": 0.4828, "step": 11105 }, { "epoch": 0.9967018913643401, "grad_norm": 0.4693600302735835, "learning_rate": 8.444968882986552e-06, "loss": 0.4034, "step": 11106 }, { "epoch": 0.996791635817011, "grad_norm": 0.5218849828131846, "learning_rate": 8.444590423972148e-06, "loss": 0.5261, "step": 11107 }, { "epoch": 0.9968813802696821, "grad_norm": 0.46788950640473953, "learning_rate": 8.444211927391735e-06, "loss": 0.3841, "step": 11108 }, { "epoch": 0.996971124722353, "grad_norm": 0.4904086944501008, "learning_rate": 8.443833393249443e-06, "loss": 0.4716, "step": 11109 }, { "epoch": 0.9970608691750241, "grad_norm": 0.5078576367937504, "learning_rate": 8.443454821549397e-06, "loss": 0.4595, "step": 11110 }, { "epoch": 0.9971506136276952, "grad_norm": 0.4914139981526812, "learning_rate": 8.443076212295728e-06, "loss": 0.4324, "step": 11111 }, { "epoch": 0.9972403580803662, "grad_norm": 0.4616868146845904, "learning_rate": 8.442697565492567e-06, "loss": 0.3621, "step": 11112 }, { "epoch": 0.9973301025330372, "grad_norm": 0.4607357205068906, "learning_rate": 8.442318881144038e-06, "loss": 0.3834, "step": 11113 }, { "epoch": 0.9974198469857082, "grad_norm": 0.4936422582120302, "learning_rate": 8.441940159254277e-06, "loss": 0.4316, "step": 11114 }, { "epoch": 0.9975095914383793, "grad_norm": 0.5094298945113643, "learning_rate": 8.441561399827407e-06, "loss": 0.4744, "step": 11115 }, { "epoch": 0.9975993358910502, "grad_norm": 0.50846523497594, "learning_rate": 8.441182602867566e-06, "loss": 0.4999, "step": 11116 }, { "epoch": 0.9976890803437213, "grad_norm": 0.4852947359657814, "learning_rate": 8.440803768378881e-06, "loss": 0.4563, "step": 11117 }, { "epoch": 0.9977788247963922, "grad_norm": 0.4689156092522766, "learning_rate": 8.440424896365485e-06, "loss": 0.4222, "step": 11118 }, { "epoch": 0.9978685692490633, "grad_norm": 0.4969771939493948, "learning_rate": 8.440045986831509e-06, "loss": 0.4472, "step": 11119 }, { "epoch": 0.9979583137017343, "grad_norm": 0.5364264826609325, "learning_rate": 8.439667039781085e-06, "loss": 0.4737, "step": 11120 }, { "epoch": 0.9980480581544053, "grad_norm": 0.5144527609049784, "learning_rate": 8.439288055218348e-06, "loss": 0.5564, "step": 11121 }, { "epoch": 0.9981378026070763, "grad_norm": 0.48430936832198185, "learning_rate": 8.43890903314743e-06, "loss": 0.4422, "step": 11122 }, { "epoch": 0.9982275470597474, "grad_norm": 0.4838479617403718, "learning_rate": 8.43852997357246e-06, "loss": 0.4436, "step": 11123 }, { "epoch": 0.9983172915124184, "grad_norm": 0.5047580171147967, "learning_rate": 8.43815087649758e-06, "loss": 0.4544, "step": 11124 }, { "epoch": 0.9984070359650894, "grad_norm": 0.4947840869684399, "learning_rate": 8.437771741926917e-06, "loss": 0.4769, "step": 11125 }, { "epoch": 0.9984967804177605, "grad_norm": 0.5093082920864707, "learning_rate": 8.437392569864612e-06, "loss": 0.4157, "step": 11126 }, { "epoch": 0.9985865248704314, "grad_norm": 0.4614570183102, "learning_rate": 8.437013360314796e-06, "loss": 0.3704, "step": 11127 }, { "epoch": 0.9986762693231025, "grad_norm": 0.48603268166447156, "learning_rate": 8.436634113281605e-06, "loss": 0.4395, "step": 11128 }, { "epoch": 0.9987660137757735, "grad_norm": 0.48077867552238057, "learning_rate": 8.436254828769177e-06, "loss": 0.4312, "step": 11129 }, { "epoch": 0.9988557582284445, "grad_norm": 0.49355860183596867, "learning_rate": 8.435875506781647e-06, "loss": 0.4177, "step": 11130 }, { "epoch": 0.9989455026811155, "grad_norm": 0.4800914232755672, "learning_rate": 8.435496147323153e-06, "loss": 0.3883, "step": 11131 }, { "epoch": 0.9990352471337866, "grad_norm": 0.4651012255569735, "learning_rate": 8.43511675039783e-06, "loss": 0.4118, "step": 11132 }, { "epoch": 0.9991249915864575, "grad_norm": 0.44797084259333275, "learning_rate": 8.434737316009817e-06, "loss": 0.4025, "step": 11133 }, { "epoch": 0.9992147360391286, "grad_norm": 0.49390157128980433, "learning_rate": 8.434357844163253e-06, "loss": 0.4685, "step": 11134 }, { "epoch": 0.9993044804917997, "grad_norm": 0.5019348996651559, "learning_rate": 8.433978334862276e-06, "loss": 0.4833, "step": 11135 }, { "epoch": 0.9993942249444706, "grad_norm": 0.50629152933496, "learning_rate": 8.433598788111021e-06, "loss": 0.4582, "step": 11136 }, { "epoch": 0.9994839693971417, "grad_norm": 0.4894409542115756, "learning_rate": 8.433219203913634e-06, "loss": 0.4216, "step": 11137 }, { "epoch": 0.9995737138498126, "grad_norm": 0.5120484257743511, "learning_rate": 8.432839582274248e-06, "loss": 0.4567, "step": 11138 }, { "epoch": 0.9996634583024837, "grad_norm": 0.49307653148818553, "learning_rate": 8.432459923197009e-06, "loss": 0.5046, "step": 11139 }, { "epoch": 0.9997532027551547, "grad_norm": 0.5177538397428642, "learning_rate": 8.432080226686053e-06, "loss": 0.4537, "step": 11140 }, { "epoch": 0.9998429472078257, "grad_norm": 0.4989904818400552, "learning_rate": 8.431700492745523e-06, "loss": 0.403, "step": 11141 }, { "epoch": 0.9999326916604967, "grad_norm": 0.49357142186524644, "learning_rate": 8.43132072137956e-06, "loss": 0.4294, "step": 11142 }, { "epoch": 1.000089744452671, "grad_norm": 0.7451593439596634, "learning_rate": 8.430940912592307e-06, "loss": 0.858, "step": 11143 }, { "epoch": 1.0001794889053421, "grad_norm": 0.46855810775500323, "learning_rate": 8.430561066387902e-06, "loss": 0.3767, "step": 11144 }, { "epoch": 1.000269233358013, "grad_norm": 0.4601026365721298, "learning_rate": 8.430181182770492e-06, "loss": 0.4318, "step": 11145 }, { "epoch": 1.000358977810684, "grad_norm": 0.4860087042990841, "learning_rate": 8.429801261744217e-06, "loss": 0.3658, "step": 11146 }, { "epoch": 1.0004487222633551, "grad_norm": 0.458565547426651, "learning_rate": 8.429421303313223e-06, "loss": 0.4131, "step": 11147 }, { "epoch": 1.0005384667160262, "grad_norm": 0.49418598950809633, "learning_rate": 8.429041307481651e-06, "loss": 0.4898, "step": 11148 }, { "epoch": 1.000628211168697, "grad_norm": 0.5215496981794661, "learning_rate": 8.428661274253648e-06, "loss": 0.3781, "step": 11149 }, { "epoch": 1.0007179556213681, "grad_norm": 0.4605101211184213, "learning_rate": 8.428281203633356e-06, "loss": 0.426, "step": 11150 }, { "epoch": 1.0008077000740392, "grad_norm": 0.44785699815151486, "learning_rate": 8.427901095624921e-06, "loss": 0.3727, "step": 11151 }, { "epoch": 1.0008974445267103, "grad_norm": 0.4596735579464043, "learning_rate": 8.427520950232489e-06, "loss": 0.4027, "step": 11152 }, { "epoch": 1.000987188979381, "grad_norm": 0.4540253879246772, "learning_rate": 8.427140767460204e-06, "loss": 0.3514, "step": 11153 }, { "epoch": 1.0010769334320522, "grad_norm": 0.45344965854236136, "learning_rate": 8.426760547312213e-06, "loss": 0.399, "step": 11154 }, { "epoch": 1.0011666778847232, "grad_norm": 0.49586489561205654, "learning_rate": 8.426380289792666e-06, "loss": 0.4333, "step": 11155 }, { "epoch": 1.0012564223373943, "grad_norm": 0.47170836717498005, "learning_rate": 8.425999994905705e-06, "loss": 0.3357, "step": 11156 }, { "epoch": 1.0013461667900654, "grad_norm": 0.49141346631289795, "learning_rate": 8.42561966265548e-06, "loss": 0.3656, "step": 11157 }, { "epoch": 1.0014359112427362, "grad_norm": 0.4634140119766351, "learning_rate": 8.425239293046137e-06, "loss": 0.4411, "step": 11158 }, { "epoch": 1.0015256556954073, "grad_norm": 0.5215744161205162, "learning_rate": 8.424858886081827e-06, "loss": 0.4131, "step": 11159 }, { "epoch": 1.0016154001480784, "grad_norm": 0.49458395291541074, "learning_rate": 8.424478441766695e-06, "loss": 0.4121, "step": 11160 }, { "epoch": 1.0017051446007494, "grad_norm": 0.4783814230428347, "learning_rate": 8.424097960104893e-06, "loss": 0.4354, "step": 11161 }, { "epoch": 1.0017948890534203, "grad_norm": 0.506749485609892, "learning_rate": 8.42371744110057e-06, "loss": 0.356, "step": 11162 }, { "epoch": 1.0018846335060914, "grad_norm": 0.46724073603533295, "learning_rate": 8.423336884757875e-06, "loss": 0.3746, "step": 11163 }, { "epoch": 1.0019743779587624, "grad_norm": 0.46543951763250735, "learning_rate": 8.42295629108096e-06, "loss": 0.4826, "step": 11164 }, { "epoch": 1.0020641224114335, "grad_norm": 0.5183120662596344, "learning_rate": 8.422575660073975e-06, "loss": 0.4147, "step": 11165 }, { "epoch": 1.0021538668641046, "grad_norm": 0.4803013278248868, "learning_rate": 8.422194991741068e-06, "loss": 0.4769, "step": 11166 }, { "epoch": 1.0022436113167754, "grad_norm": 0.532953611265863, "learning_rate": 8.421814286086394e-06, "loss": 0.4247, "step": 11167 }, { "epoch": 1.0023333557694465, "grad_norm": 0.5299453142224427, "learning_rate": 8.421433543114104e-06, "loss": 0.39, "step": 11168 }, { "epoch": 1.0024231002221176, "grad_norm": 0.48204198556593575, "learning_rate": 8.42105276282835e-06, "loss": 0.3748, "step": 11169 }, { "epoch": 1.0025128446747886, "grad_norm": 0.4657934099628417, "learning_rate": 8.420671945233285e-06, "loss": 0.4086, "step": 11170 }, { "epoch": 1.0026025891274595, "grad_norm": 0.4877902616376033, "learning_rate": 8.420291090333062e-06, "loss": 0.3878, "step": 11171 }, { "epoch": 1.0026923335801305, "grad_norm": 0.5395083824108127, "learning_rate": 8.419910198131834e-06, "loss": 0.4556, "step": 11172 }, { "epoch": 1.0027820780328016, "grad_norm": 0.48903277937681855, "learning_rate": 8.419529268633757e-06, "loss": 0.4013, "step": 11173 }, { "epoch": 1.0028718224854727, "grad_norm": 0.4902648576405799, "learning_rate": 8.419148301842983e-06, "loss": 0.4372, "step": 11174 }, { "epoch": 1.0029615669381435, "grad_norm": 0.4890670953398461, "learning_rate": 8.418767297763666e-06, "loss": 0.4025, "step": 11175 }, { "epoch": 1.0030513113908146, "grad_norm": 0.5234579623203868, "learning_rate": 8.418386256399967e-06, "loss": 0.4805, "step": 11176 }, { "epoch": 1.0031410558434857, "grad_norm": 0.46040547115075525, "learning_rate": 8.418005177756032e-06, "loss": 0.4261, "step": 11177 }, { "epoch": 1.0032308002961567, "grad_norm": 0.4955827526889757, "learning_rate": 8.417624061836025e-06, "loss": 0.3823, "step": 11178 }, { "epoch": 1.0033205447488278, "grad_norm": 0.4692478086830973, "learning_rate": 8.4172429086441e-06, "loss": 0.3546, "step": 11179 }, { "epoch": 1.0034102892014987, "grad_norm": 0.43825163747775764, "learning_rate": 8.416861718184414e-06, "loss": 0.3352, "step": 11180 }, { "epoch": 1.0035000336541697, "grad_norm": 0.5034373972456317, "learning_rate": 8.416480490461121e-06, "loss": 0.4051, "step": 11181 }, { "epoch": 1.0035897781068408, "grad_norm": 0.4488664425689945, "learning_rate": 8.416099225478383e-06, "loss": 0.3353, "step": 11182 }, { "epoch": 1.0036795225595119, "grad_norm": 0.44778883782998996, "learning_rate": 8.415717923240357e-06, "loss": 0.3992, "step": 11183 }, { "epoch": 1.0037692670121827, "grad_norm": 0.4815036659987246, "learning_rate": 8.4153365837512e-06, "loss": 0.4887, "step": 11184 }, { "epoch": 1.0038590114648538, "grad_norm": 0.5431700243599544, "learning_rate": 8.41495520701507e-06, "loss": 0.4061, "step": 11185 }, { "epoch": 1.0039487559175249, "grad_norm": 0.48593990344194093, "learning_rate": 8.414573793036128e-06, "loss": 0.3874, "step": 11186 }, { "epoch": 1.004038500370196, "grad_norm": 0.48133777910701825, "learning_rate": 8.414192341818533e-06, "loss": 0.3889, "step": 11187 }, { "epoch": 1.0041282448228668, "grad_norm": 0.49469525194383085, "learning_rate": 8.413810853366446e-06, "loss": 0.4549, "step": 11188 }, { "epoch": 1.0042179892755378, "grad_norm": 0.5499677396954588, "learning_rate": 8.413429327684026e-06, "loss": 0.4554, "step": 11189 }, { "epoch": 1.004307733728209, "grad_norm": 0.4813484195111792, "learning_rate": 8.413047764775437e-06, "loss": 0.4128, "step": 11190 }, { "epoch": 1.00439747818088, "grad_norm": 0.5046657789010989, "learning_rate": 8.412666164644836e-06, "loss": 0.4109, "step": 11191 }, { "epoch": 1.004487222633551, "grad_norm": 0.4767170283726914, "learning_rate": 8.412284527296386e-06, "loss": 0.3741, "step": 11192 }, { "epoch": 1.004576967086222, "grad_norm": 0.49601821626550807, "learning_rate": 8.41190285273425e-06, "loss": 0.3908, "step": 11193 }, { "epoch": 1.004666711538893, "grad_norm": 0.47757774793975055, "learning_rate": 8.411521140962591e-06, "loss": 0.4131, "step": 11194 }, { "epoch": 1.004756455991564, "grad_norm": 0.5047872223394129, "learning_rate": 8.411139391985569e-06, "loss": 0.3974, "step": 11195 }, { "epoch": 1.0048462004442351, "grad_norm": 0.5023248551644987, "learning_rate": 8.41075760580735e-06, "loss": 0.4104, "step": 11196 }, { "epoch": 1.004935944896906, "grad_norm": 0.48239204614567377, "learning_rate": 8.410375782432096e-06, "loss": 0.3539, "step": 11197 }, { "epoch": 1.005025689349577, "grad_norm": 0.4698460199437498, "learning_rate": 8.409993921863973e-06, "loss": 0.398, "step": 11198 }, { "epoch": 1.005115433802248, "grad_norm": 0.46436923005908576, "learning_rate": 8.409612024107145e-06, "loss": 0.3977, "step": 11199 }, { "epoch": 1.0052051782549192, "grad_norm": 0.531962342387733, "learning_rate": 8.409230089165777e-06, "loss": 0.3883, "step": 11200 }, { "epoch": 1.0052949227075902, "grad_norm": 0.4657126216420472, "learning_rate": 8.40884811704403e-06, "loss": 0.3729, "step": 11201 }, { "epoch": 1.005384667160261, "grad_norm": 0.46311882237516666, "learning_rate": 8.408466107746077e-06, "loss": 0.4244, "step": 11202 }, { "epoch": 1.0054744116129322, "grad_norm": 0.4949948058233784, "learning_rate": 8.408084061276077e-06, "loss": 0.3644, "step": 11203 }, { "epoch": 1.0055641560656032, "grad_norm": 0.4903790381014253, "learning_rate": 8.407701977638201e-06, "loss": 0.3538, "step": 11204 }, { "epoch": 1.0056539005182743, "grad_norm": 0.48243545250633213, "learning_rate": 8.407319856836617e-06, "loss": 0.4288, "step": 11205 }, { "epoch": 1.0057436449709451, "grad_norm": 0.48884807020231913, "learning_rate": 8.406937698875487e-06, "loss": 0.3987, "step": 11206 }, { "epoch": 1.0058333894236162, "grad_norm": 0.48488538807701337, "learning_rate": 8.406555503758985e-06, "loss": 0.4352, "step": 11207 }, { "epoch": 1.0059231338762873, "grad_norm": 0.5009044933740938, "learning_rate": 8.406173271491275e-06, "loss": 0.3835, "step": 11208 }, { "epoch": 1.0060128783289584, "grad_norm": 0.4805744354705746, "learning_rate": 8.405791002076527e-06, "loss": 0.3926, "step": 11209 }, { "epoch": 1.0061026227816292, "grad_norm": 0.5445696923238121, "learning_rate": 8.405408695518908e-06, "loss": 0.3906, "step": 11210 }, { "epoch": 1.0061923672343003, "grad_norm": 0.4851597168337817, "learning_rate": 8.405026351822591e-06, "loss": 0.4092, "step": 11211 }, { "epoch": 1.0062821116869713, "grad_norm": 0.46742668104981755, "learning_rate": 8.404643970991743e-06, "loss": 0.3756, "step": 11212 }, { "epoch": 1.0063718561396424, "grad_norm": 0.4970750172797334, "learning_rate": 8.404261553030534e-06, "loss": 0.4825, "step": 11213 }, { "epoch": 1.0064616005923135, "grad_norm": 0.5105889369718194, "learning_rate": 8.403879097943136e-06, "loss": 0.385, "step": 11214 }, { "epoch": 1.0065513450449843, "grad_norm": 0.5168268887881772, "learning_rate": 8.40349660573372e-06, "loss": 0.4449, "step": 11215 }, { "epoch": 1.0066410894976554, "grad_norm": 0.515019532879488, "learning_rate": 8.403114076406455e-06, "loss": 0.4188, "step": 11216 }, { "epoch": 1.0067308339503265, "grad_norm": 0.49897399189121566, "learning_rate": 8.402731509965517e-06, "loss": 0.413, "step": 11217 }, { "epoch": 1.0068205784029975, "grad_norm": 0.4713533227500992, "learning_rate": 8.402348906415075e-06, "loss": 0.4233, "step": 11218 }, { "epoch": 1.0069103228556684, "grad_norm": 0.5192282421876103, "learning_rate": 8.401966265759303e-06, "loss": 0.4006, "step": 11219 }, { "epoch": 1.0070000673083395, "grad_norm": 0.5088807963772728, "learning_rate": 8.40158358800237e-06, "loss": 0.4376, "step": 11220 }, { "epoch": 1.0070898117610105, "grad_norm": 0.4719619427804689, "learning_rate": 8.401200873148457e-06, "loss": 0.3741, "step": 11221 }, { "epoch": 1.0071795562136816, "grad_norm": 0.447924757366005, "learning_rate": 8.400818121201733e-06, "loss": 0.3594, "step": 11222 }, { "epoch": 1.0072693006663525, "grad_norm": 0.49962721268358684, "learning_rate": 8.400435332166372e-06, "loss": 0.4457, "step": 11223 }, { "epoch": 1.0073590451190235, "grad_norm": 0.48239719252626595, "learning_rate": 8.400052506046548e-06, "loss": 0.4178, "step": 11224 }, { "epoch": 1.0074487895716946, "grad_norm": 0.5211570808162305, "learning_rate": 8.39966964284644e-06, "loss": 0.4255, "step": 11225 }, { "epoch": 1.0075385340243657, "grad_norm": 0.5194580237107671, "learning_rate": 8.399286742570218e-06, "loss": 0.4824, "step": 11226 }, { "epoch": 1.0076282784770367, "grad_norm": 0.4880266996755666, "learning_rate": 8.398903805222063e-06, "loss": 0.4258, "step": 11227 }, { "epoch": 1.0077180229297076, "grad_norm": 0.5132106977028967, "learning_rate": 8.398520830806148e-06, "loss": 0.3855, "step": 11228 }, { "epoch": 1.0078077673823786, "grad_norm": 0.47228359931755987, "learning_rate": 8.39813781932665e-06, "loss": 0.3379, "step": 11229 }, { "epoch": 1.0078975118350497, "grad_norm": 0.45674345167136354, "learning_rate": 8.39775477078775e-06, "loss": 0.3744, "step": 11230 }, { "epoch": 1.0079872562877208, "grad_norm": 0.48445253067152677, "learning_rate": 8.397371685193619e-06, "loss": 0.3229, "step": 11231 }, { "epoch": 1.0080770007403916, "grad_norm": 0.4741380985376808, "learning_rate": 8.396988562548438e-06, "loss": 0.4674, "step": 11232 }, { "epoch": 1.0081667451930627, "grad_norm": 0.4838509386038059, "learning_rate": 8.396605402856385e-06, "loss": 0.4029, "step": 11233 }, { "epoch": 1.0082564896457338, "grad_norm": 0.5361130753506461, "learning_rate": 8.39622220612164e-06, "loss": 0.3447, "step": 11234 }, { "epoch": 1.0083462340984048, "grad_norm": 0.45047184759827513, "learning_rate": 8.395838972348381e-06, "loss": 0.3854, "step": 11235 }, { "epoch": 1.008435978551076, "grad_norm": 0.4687596889658155, "learning_rate": 8.395455701540788e-06, "loss": 0.4385, "step": 11236 }, { "epoch": 1.0085257230037468, "grad_norm": 0.5071995638061653, "learning_rate": 8.395072393703039e-06, "loss": 0.4405, "step": 11237 }, { "epoch": 1.0086154674564178, "grad_norm": 0.4847169730751464, "learning_rate": 8.394689048839314e-06, "loss": 0.3876, "step": 11238 }, { "epoch": 1.008705211909089, "grad_norm": 0.5199719986012608, "learning_rate": 8.3943056669538e-06, "loss": 0.4421, "step": 11239 }, { "epoch": 1.00879495636176, "grad_norm": 0.5203189496892046, "learning_rate": 8.393922248050669e-06, "loss": 0.4194, "step": 11240 }, { "epoch": 1.0088847008144308, "grad_norm": 0.5404713154766098, "learning_rate": 8.39353879213411e-06, "loss": 0.4708, "step": 11241 }, { "epoch": 1.008974445267102, "grad_norm": 0.4818680446283474, "learning_rate": 8.3931552992083e-06, "loss": 0.3512, "step": 11242 }, { "epoch": 1.009064189719773, "grad_norm": 0.4669817347980469, "learning_rate": 8.392771769277424e-06, "loss": 0.3852, "step": 11243 }, { "epoch": 1.009153934172444, "grad_norm": 0.4867929854765999, "learning_rate": 8.392388202345661e-06, "loss": 0.4332, "step": 11244 }, { "epoch": 1.0092436786251149, "grad_norm": 0.5108859472749974, "learning_rate": 8.3920045984172e-06, "loss": 0.3733, "step": 11245 }, { "epoch": 1.009333423077786, "grad_norm": 0.47882264130300134, "learning_rate": 8.391620957496219e-06, "loss": 0.422, "step": 11246 }, { "epoch": 1.009423167530457, "grad_norm": 0.5387511081800038, "learning_rate": 8.391237279586905e-06, "loss": 0.4996, "step": 11247 }, { "epoch": 1.009512911983128, "grad_norm": 0.4696174186666153, "learning_rate": 8.390853564693441e-06, "loss": 0.3801, "step": 11248 }, { "epoch": 1.0096026564357992, "grad_norm": 0.5000962762044809, "learning_rate": 8.390469812820015e-06, "loss": 0.3912, "step": 11249 }, { "epoch": 1.00969240088847, "grad_norm": 0.4976043623563409, "learning_rate": 8.390086023970807e-06, "loss": 0.46, "step": 11250 }, { "epoch": 1.009782145341141, "grad_norm": 0.4551580265241601, "learning_rate": 8.389702198150004e-06, "loss": 0.3574, "step": 11251 }, { "epoch": 1.0098718897938121, "grad_norm": 0.48967407315919514, "learning_rate": 8.389318335361796e-06, "loss": 0.4086, "step": 11252 }, { "epoch": 1.0099616342464832, "grad_norm": 0.48013735025487503, "learning_rate": 8.388934435610364e-06, "loss": 0.393, "step": 11253 }, { "epoch": 1.010051378699154, "grad_norm": 0.5236839805254769, "learning_rate": 8.388550498899897e-06, "loss": 0.4083, "step": 11254 }, { "epoch": 1.0101411231518251, "grad_norm": 0.47049506472981883, "learning_rate": 8.388166525234582e-06, "loss": 0.4112, "step": 11255 }, { "epoch": 1.0102308676044962, "grad_norm": 0.47033419330737364, "learning_rate": 8.387782514618608e-06, "loss": 0.4497, "step": 11256 }, { "epoch": 1.0103206120571673, "grad_norm": 0.4862301057372603, "learning_rate": 8.38739846705616e-06, "loss": 0.3677, "step": 11257 }, { "epoch": 1.0104103565098381, "grad_norm": 0.5027061364959001, "learning_rate": 8.387014382551429e-06, "loss": 0.4189, "step": 11258 }, { "epoch": 1.0105001009625092, "grad_norm": 0.4762225578540074, "learning_rate": 8.386630261108601e-06, "loss": 0.4195, "step": 11259 }, { "epoch": 1.0105898454151803, "grad_norm": 0.5272312148688518, "learning_rate": 8.38624610273187e-06, "loss": 0.4699, "step": 11260 }, { "epoch": 1.0106795898678513, "grad_norm": 0.48381993815337243, "learning_rate": 8.38586190742542e-06, "loss": 0.4233, "step": 11261 }, { "epoch": 1.0107693343205224, "grad_norm": 0.5031017340295942, "learning_rate": 8.385477675193444e-06, "loss": 0.4217, "step": 11262 }, { "epoch": 1.0108590787731933, "grad_norm": 0.47354471562729156, "learning_rate": 8.385093406040129e-06, "loss": 0.3983, "step": 11263 }, { "epoch": 1.0109488232258643, "grad_norm": 0.4456188636310603, "learning_rate": 8.38470909996967e-06, "loss": 0.3735, "step": 11264 }, { "epoch": 1.0110385676785354, "grad_norm": 0.46413560903496387, "learning_rate": 8.384324756986258e-06, "loss": 0.4447, "step": 11265 }, { "epoch": 1.0111283121312065, "grad_norm": 0.5734115500008758, "learning_rate": 8.383940377094081e-06, "loss": 0.4605, "step": 11266 }, { "epoch": 1.0112180565838773, "grad_norm": 0.48105656466824304, "learning_rate": 8.383555960297335e-06, "loss": 0.4853, "step": 11267 }, { "epoch": 1.0113078010365484, "grad_norm": 0.5013235596578091, "learning_rate": 8.38317150660021e-06, "loss": 0.4052, "step": 11268 }, { "epoch": 1.0113975454892195, "grad_norm": 0.45756259762342294, "learning_rate": 8.382787016006898e-06, "loss": 0.3183, "step": 11269 }, { "epoch": 1.0114872899418905, "grad_norm": 0.4568586060681401, "learning_rate": 8.382402488521595e-06, "loss": 0.3926, "step": 11270 }, { "epoch": 1.0115770343945616, "grad_norm": 0.4910016225729047, "learning_rate": 8.38201792414849e-06, "loss": 0.4467, "step": 11271 }, { "epoch": 1.0116667788472324, "grad_norm": 0.4605836713072801, "learning_rate": 8.381633322891785e-06, "loss": 0.3681, "step": 11272 }, { "epoch": 1.0117565232999035, "grad_norm": 0.46133439567401285, "learning_rate": 8.381248684755665e-06, "loss": 0.3681, "step": 11273 }, { "epoch": 1.0118462677525746, "grad_norm": 0.47846038119454504, "learning_rate": 8.380864009744331e-06, "loss": 0.467, "step": 11274 }, { "epoch": 1.0119360122052456, "grad_norm": 0.5449998422708999, "learning_rate": 8.380479297861977e-06, "loss": 0.4175, "step": 11275 }, { "epoch": 1.0120257566579165, "grad_norm": 0.47033344523074416, "learning_rate": 8.380094549112798e-06, "loss": 0.3759, "step": 11276 }, { "epoch": 1.0121155011105876, "grad_norm": 0.5300186846144268, "learning_rate": 8.379709763500989e-06, "loss": 0.4238, "step": 11277 }, { "epoch": 1.0122052455632586, "grad_norm": 0.4819835846494568, "learning_rate": 8.379324941030747e-06, "loss": 0.4299, "step": 11278 }, { "epoch": 1.0122949900159297, "grad_norm": 0.49740590116602995, "learning_rate": 8.378940081706268e-06, "loss": 0.3604, "step": 11279 }, { "epoch": 1.0123847344686006, "grad_norm": 0.46373254295210814, "learning_rate": 8.378555185531754e-06, "loss": 0.4245, "step": 11280 }, { "epoch": 1.0124744789212716, "grad_norm": 0.47998442949010783, "learning_rate": 8.378170252511394e-06, "loss": 0.3741, "step": 11281 }, { "epoch": 1.0125642233739427, "grad_norm": 0.4627357255700568, "learning_rate": 8.377785282649396e-06, "loss": 0.3285, "step": 11282 }, { "epoch": 1.0126539678266138, "grad_norm": 0.46064298671721016, "learning_rate": 8.37740027594995e-06, "loss": 0.3935, "step": 11283 }, { "epoch": 1.0127437122792848, "grad_norm": 0.5183623097369743, "learning_rate": 8.37701523241726e-06, "loss": 0.3852, "step": 11284 }, { "epoch": 1.0128334567319557, "grad_norm": 0.45974624241851425, "learning_rate": 8.37663015205552e-06, "loss": 0.415, "step": 11285 }, { "epoch": 1.0129232011846268, "grad_norm": 0.5066259952806456, "learning_rate": 8.376245034868937e-06, "loss": 0.3902, "step": 11286 }, { "epoch": 1.0130129456372978, "grad_norm": 0.47758443147879176, "learning_rate": 8.375859880861705e-06, "loss": 0.424, "step": 11287 }, { "epoch": 1.013102690089969, "grad_norm": 0.488883928119857, "learning_rate": 8.375474690038027e-06, "loss": 0.3684, "step": 11288 }, { "epoch": 1.0131924345426397, "grad_norm": 0.494130165631937, "learning_rate": 8.375089462402102e-06, "loss": 0.4302, "step": 11289 }, { "epoch": 1.0132821789953108, "grad_norm": 0.4736964189002751, "learning_rate": 8.374704197958134e-06, "loss": 0.3846, "step": 11290 }, { "epoch": 1.0133719234479819, "grad_norm": 0.4857686722400532, "learning_rate": 8.374318896710321e-06, "loss": 0.3627, "step": 11291 }, { "epoch": 1.013461667900653, "grad_norm": 0.5130870749673317, "learning_rate": 8.37393355866287e-06, "loss": 0.436, "step": 11292 }, { "epoch": 1.013551412353324, "grad_norm": 0.5079385882969718, "learning_rate": 8.373548183819977e-06, "loss": 0.4068, "step": 11293 }, { "epoch": 1.0136411568059949, "grad_norm": 0.5062863560298776, "learning_rate": 8.37316277218585e-06, "loss": 0.4382, "step": 11294 }, { "epoch": 1.013730901258666, "grad_norm": 0.5022868518256144, "learning_rate": 8.372777323764691e-06, "loss": 0.3999, "step": 11295 }, { "epoch": 1.013820645711337, "grad_norm": 0.46638935684508614, "learning_rate": 8.372391838560704e-06, "loss": 0.4014, "step": 11296 }, { "epoch": 1.013910390164008, "grad_norm": 0.5114285157297705, "learning_rate": 8.37200631657809e-06, "loss": 0.4635, "step": 11297 }, { "epoch": 1.014000134616679, "grad_norm": 0.49985577409224013, "learning_rate": 8.371620757821055e-06, "loss": 0.4302, "step": 11298 }, { "epoch": 1.01408987906935, "grad_norm": 0.49821516992087966, "learning_rate": 8.371235162293806e-06, "loss": 0.3414, "step": 11299 }, { "epoch": 1.014179623522021, "grad_norm": 0.5002335660923595, "learning_rate": 8.370849530000547e-06, "loss": 0.3859, "step": 11300 }, { "epoch": 1.0142693679746921, "grad_norm": 0.479016942670694, "learning_rate": 8.370463860945481e-06, "loss": 0.4012, "step": 11301 }, { "epoch": 1.014359112427363, "grad_norm": 0.49343163485292213, "learning_rate": 8.370078155132818e-06, "loss": 0.367, "step": 11302 }, { "epoch": 1.014448856880034, "grad_norm": 0.4932699332800966, "learning_rate": 8.369692412566764e-06, "loss": 0.4614, "step": 11303 }, { "epoch": 1.0145386013327051, "grad_norm": 0.49237841069777827, "learning_rate": 8.36930663325152e-06, "loss": 0.3601, "step": 11304 }, { "epoch": 1.0146283457853762, "grad_norm": 0.5185154319973825, "learning_rate": 8.368920817191302e-06, "loss": 0.4693, "step": 11305 }, { "epoch": 1.0147180902380473, "grad_norm": 0.4625341199317369, "learning_rate": 8.368534964390312e-06, "loss": 0.405, "step": 11306 }, { "epoch": 1.0148078346907181, "grad_norm": 0.5061272679745622, "learning_rate": 8.36814907485276e-06, "loss": 0.4217, "step": 11307 }, { "epoch": 1.0148975791433892, "grad_norm": 0.4999777088231699, "learning_rate": 8.367763148582854e-06, "loss": 0.3679, "step": 11308 }, { "epoch": 1.0149873235960603, "grad_norm": 0.44008290711828396, "learning_rate": 8.367377185584801e-06, "loss": 0.361, "step": 11309 }, { "epoch": 1.0150770680487313, "grad_norm": 0.48866977725823363, "learning_rate": 8.366991185862813e-06, "loss": 0.4128, "step": 11310 }, { "epoch": 1.0151668125014022, "grad_norm": 0.460420547976678, "learning_rate": 8.366605149421099e-06, "loss": 0.4153, "step": 11311 }, { "epoch": 1.0152565569540732, "grad_norm": 0.47689290052559186, "learning_rate": 8.366219076263868e-06, "loss": 0.3963, "step": 11312 }, { "epoch": 1.0153463014067443, "grad_norm": 0.47218855916634733, "learning_rate": 8.36583296639533e-06, "loss": 0.4118, "step": 11313 }, { "epoch": 1.0154360458594154, "grad_norm": 0.4989816098082532, "learning_rate": 8.365446819819698e-06, "loss": 0.4525, "step": 11314 }, { "epoch": 1.0155257903120862, "grad_norm": 0.5738713081996772, "learning_rate": 8.365060636541183e-06, "loss": 0.469, "step": 11315 }, { "epoch": 1.0156155347647573, "grad_norm": 0.5134931153994534, "learning_rate": 8.364674416563995e-06, "loss": 0.4038, "step": 11316 }, { "epoch": 1.0157052792174284, "grad_norm": 0.4882924641236426, "learning_rate": 8.364288159892347e-06, "loss": 0.4189, "step": 11317 }, { "epoch": 1.0157950236700994, "grad_norm": 0.4885312197467318, "learning_rate": 8.36390186653045e-06, "loss": 0.3737, "step": 11318 }, { "epoch": 1.0158847681227705, "grad_norm": 0.4785651331328381, "learning_rate": 8.36351553648252e-06, "loss": 0.416, "step": 11319 }, { "epoch": 1.0159745125754414, "grad_norm": 0.4987476044151935, "learning_rate": 8.363129169752766e-06, "loss": 0.4616, "step": 11320 }, { "epoch": 1.0160642570281124, "grad_norm": 0.5097158478120201, "learning_rate": 8.362742766345406e-06, "loss": 0.4414, "step": 11321 }, { "epoch": 1.0161540014807835, "grad_norm": 0.48465199388563845, "learning_rate": 8.362356326264651e-06, "loss": 0.3245, "step": 11322 }, { "epoch": 1.0162437459334546, "grad_norm": 0.474182879779645, "learning_rate": 8.361969849514717e-06, "loss": 0.3995, "step": 11323 }, { "epoch": 1.0163334903861254, "grad_norm": 0.47151797914319815, "learning_rate": 8.361583336099817e-06, "loss": 0.3884, "step": 11324 }, { "epoch": 1.0164232348387965, "grad_norm": 0.4617042509031953, "learning_rate": 8.361196786024169e-06, "loss": 0.3965, "step": 11325 }, { "epoch": 1.0165129792914676, "grad_norm": 0.4909502224159032, "learning_rate": 8.360810199291987e-06, "loss": 0.4159, "step": 11326 }, { "epoch": 1.0166027237441386, "grad_norm": 0.4766090230940404, "learning_rate": 8.360423575907486e-06, "loss": 0.3825, "step": 11327 }, { "epoch": 1.0166924681968097, "grad_norm": 0.5243019780044912, "learning_rate": 8.360036915874882e-06, "loss": 0.4422, "step": 11328 }, { "epoch": 1.0167822126494805, "grad_norm": 0.46046297696965555, "learning_rate": 8.359650219198397e-06, "loss": 0.3979, "step": 11329 }, { "epoch": 1.0168719571021516, "grad_norm": 0.47689575841486426, "learning_rate": 8.359263485882243e-06, "loss": 0.4261, "step": 11330 }, { "epoch": 1.0169617015548227, "grad_norm": 0.4822957296817347, "learning_rate": 8.35887671593064e-06, "loss": 0.3338, "step": 11331 }, { "epoch": 1.0170514460074938, "grad_norm": 0.43231674383089175, "learning_rate": 8.358489909347804e-06, "loss": 0.3654, "step": 11332 }, { "epoch": 1.0171411904601646, "grad_norm": 0.5044037004392955, "learning_rate": 8.358103066137956e-06, "loss": 0.4615, "step": 11333 }, { "epoch": 1.0172309349128357, "grad_norm": 0.4993886073281555, "learning_rate": 8.357716186305312e-06, "loss": 0.4383, "step": 11334 }, { "epoch": 1.0173206793655067, "grad_norm": 0.5015228222433717, "learning_rate": 8.357329269854095e-06, "loss": 0.3595, "step": 11335 }, { "epoch": 1.0174104238181778, "grad_norm": 0.4741887753965688, "learning_rate": 8.356942316788521e-06, "loss": 0.4196, "step": 11336 }, { "epoch": 1.0175001682708487, "grad_norm": 0.5230859348277871, "learning_rate": 8.356555327112812e-06, "loss": 0.3851, "step": 11337 }, { "epoch": 1.0175899127235197, "grad_norm": 0.4763270258071777, "learning_rate": 8.356168300831187e-06, "loss": 0.3418, "step": 11338 }, { "epoch": 1.0176796571761908, "grad_norm": 0.4902687969984469, "learning_rate": 8.35578123794787e-06, "loss": 0.4454, "step": 11339 }, { "epoch": 1.0177694016288619, "grad_norm": 0.46463635002575365, "learning_rate": 8.35539413846708e-06, "loss": 0.4176, "step": 11340 }, { "epoch": 1.017859146081533, "grad_norm": 0.5199876852625014, "learning_rate": 8.355007002393037e-06, "loss": 0.3955, "step": 11341 }, { "epoch": 1.0179488905342038, "grad_norm": 0.5105556181889012, "learning_rate": 8.354619829729965e-06, "loss": 0.3918, "step": 11342 }, { "epoch": 1.0180386349868749, "grad_norm": 0.462727829285628, "learning_rate": 8.354232620482086e-06, "loss": 0.3886, "step": 11343 }, { "epoch": 1.018128379439546, "grad_norm": 0.4787112024684835, "learning_rate": 8.353845374653622e-06, "loss": 0.4101, "step": 11344 }, { "epoch": 1.018218123892217, "grad_norm": 0.5307668134573671, "learning_rate": 8.353458092248799e-06, "loss": 0.4783, "step": 11345 }, { "epoch": 1.0183078683448878, "grad_norm": 0.49403525035364904, "learning_rate": 8.353070773271837e-06, "loss": 0.409, "step": 11346 }, { "epoch": 1.018397612797559, "grad_norm": 0.48535728268905287, "learning_rate": 8.352683417726962e-06, "loss": 0.378, "step": 11347 }, { "epoch": 1.01848735725023, "grad_norm": 0.4987191133947576, "learning_rate": 8.352296025618398e-06, "loss": 0.3886, "step": 11348 }, { "epoch": 1.018577101702901, "grad_norm": 0.45790392861454204, "learning_rate": 8.351908596950372e-06, "loss": 0.3281, "step": 11349 }, { "epoch": 1.018666846155572, "grad_norm": 0.44842670395392686, "learning_rate": 8.351521131727105e-06, "loss": 0.3827, "step": 11350 }, { "epoch": 1.018756590608243, "grad_norm": 0.4902429666386444, "learning_rate": 8.351133629952825e-06, "loss": 0.3841, "step": 11351 }, { "epoch": 1.018846335060914, "grad_norm": 0.4804653324771213, "learning_rate": 8.35074609163176e-06, "loss": 0.413, "step": 11352 }, { "epoch": 1.0189360795135851, "grad_norm": 0.5217913901624716, "learning_rate": 8.350358516768132e-06, "loss": 0.3653, "step": 11353 }, { "epoch": 1.0190258239662562, "grad_norm": 0.4610064296818735, "learning_rate": 8.349970905366171e-06, "loss": 0.3887, "step": 11354 }, { "epoch": 1.019115568418927, "grad_norm": 0.47592992431868136, "learning_rate": 8.349583257430103e-06, "loss": 0.4382, "step": 11355 }, { "epoch": 1.019205312871598, "grad_norm": 0.46113966330680517, "learning_rate": 8.349195572964157e-06, "loss": 0.341, "step": 11356 }, { "epoch": 1.0192950573242692, "grad_norm": 0.49606258261307157, "learning_rate": 8.34880785197256e-06, "loss": 0.3872, "step": 11357 }, { "epoch": 1.0193848017769402, "grad_norm": 0.4904121956060365, "learning_rate": 8.34842009445954e-06, "loss": 0.4126, "step": 11358 }, { "epoch": 1.019474546229611, "grad_norm": 0.48693513318936466, "learning_rate": 8.348032300429325e-06, "loss": 0.3868, "step": 11359 }, { "epoch": 1.0195642906822822, "grad_norm": 0.471370982515594, "learning_rate": 8.347644469886145e-06, "loss": 0.4311, "step": 11360 }, { "epoch": 1.0196540351349532, "grad_norm": 0.531259904136943, "learning_rate": 8.347256602834231e-06, "loss": 0.4417, "step": 11361 }, { "epoch": 1.0197437795876243, "grad_norm": 0.5109366839048058, "learning_rate": 8.346868699277813e-06, "loss": 0.4172, "step": 11362 }, { "epoch": 1.0198335240402954, "grad_norm": 0.47358010429922137, "learning_rate": 8.346480759221119e-06, "loss": 0.399, "step": 11363 }, { "epoch": 1.0199232684929662, "grad_norm": 0.5104101871747152, "learning_rate": 8.34609278266838e-06, "loss": 0.4879, "step": 11364 }, { "epoch": 1.0200130129456373, "grad_norm": 0.5179856641101751, "learning_rate": 8.34570476962383e-06, "loss": 0.4359, "step": 11365 }, { "epoch": 1.0201027573983084, "grad_norm": 0.5612577810839466, "learning_rate": 8.345316720091699e-06, "loss": 0.4457, "step": 11366 }, { "epoch": 1.0201925018509794, "grad_norm": 0.4882543389880165, "learning_rate": 8.344928634076218e-06, "loss": 0.3862, "step": 11367 }, { "epoch": 1.0202822463036503, "grad_norm": 0.4899617024570923, "learning_rate": 8.34454051158162e-06, "loss": 0.4055, "step": 11368 }, { "epoch": 1.0203719907563213, "grad_norm": 0.46389982909356, "learning_rate": 8.344152352612141e-06, "loss": 0.3839, "step": 11369 }, { "epoch": 1.0204617352089924, "grad_norm": 0.4917960019657696, "learning_rate": 8.343764157172008e-06, "loss": 0.4358, "step": 11370 }, { "epoch": 1.0205514796616635, "grad_norm": 0.4943465972435545, "learning_rate": 8.34337592526546e-06, "loss": 0.4031, "step": 11371 }, { "epoch": 1.0206412241143343, "grad_norm": 0.5132805856440019, "learning_rate": 8.342987656896726e-06, "loss": 0.4306, "step": 11372 }, { "epoch": 1.0207309685670054, "grad_norm": 0.5039341922964117, "learning_rate": 8.342599352070047e-06, "loss": 0.3754, "step": 11373 }, { "epoch": 1.0208207130196765, "grad_norm": 0.512301665857495, "learning_rate": 8.342211010789651e-06, "loss": 0.4689, "step": 11374 }, { "epoch": 1.0209104574723475, "grad_norm": 0.479880307033816, "learning_rate": 8.341822633059777e-06, "loss": 0.4053, "step": 11375 }, { "epoch": 1.0210002019250186, "grad_norm": 0.5098616035472907, "learning_rate": 8.341434218884661e-06, "loss": 0.4715, "step": 11376 }, { "epoch": 1.0210899463776895, "grad_norm": 0.46957267210094855, "learning_rate": 8.341045768268536e-06, "loss": 0.3269, "step": 11377 }, { "epoch": 1.0211796908303605, "grad_norm": 0.43935839167585966, "learning_rate": 8.34065728121564e-06, "loss": 0.3908, "step": 11378 }, { "epoch": 1.0212694352830316, "grad_norm": 0.4740945092940761, "learning_rate": 8.34026875773021e-06, "loss": 0.3891, "step": 11379 }, { "epoch": 1.0213591797357027, "grad_norm": 0.4674569753555092, "learning_rate": 8.339880197816483e-06, "loss": 0.3495, "step": 11380 }, { "epoch": 1.0214489241883735, "grad_norm": 0.4484768625356735, "learning_rate": 8.339491601478698e-06, "loss": 0.4121, "step": 11381 }, { "epoch": 1.0215386686410446, "grad_norm": 0.5066536203265707, "learning_rate": 8.33910296872109e-06, "loss": 0.4285, "step": 11382 }, { "epoch": 1.0216284130937157, "grad_norm": 0.5201251073689094, "learning_rate": 8.338714299547898e-06, "loss": 0.452, "step": 11383 }, { "epoch": 1.0217181575463867, "grad_norm": 0.5327422254234255, "learning_rate": 8.338325593963363e-06, "loss": 0.4064, "step": 11384 }, { "epoch": 1.0218079019990576, "grad_norm": 0.47347269926500885, "learning_rate": 8.337936851971724e-06, "loss": 0.3629, "step": 11385 }, { "epoch": 1.0218976464517286, "grad_norm": 0.48567310755865156, "learning_rate": 8.337548073577218e-06, "loss": 0.3803, "step": 11386 }, { "epoch": 1.0219873909043997, "grad_norm": 0.505352720704247, "learning_rate": 8.337159258784086e-06, "loss": 0.4576, "step": 11387 }, { "epoch": 1.0220771353570708, "grad_norm": 0.5114950032648363, "learning_rate": 8.336770407596568e-06, "loss": 0.4092, "step": 11388 }, { "epoch": 1.0221668798097419, "grad_norm": 0.46151186720566556, "learning_rate": 8.336381520018905e-06, "loss": 0.4347, "step": 11389 }, { "epoch": 1.0222566242624127, "grad_norm": 0.5718188964625462, "learning_rate": 8.33599259605534e-06, "loss": 0.413, "step": 11390 }, { "epoch": 1.0223463687150838, "grad_norm": 0.44417709904329317, "learning_rate": 8.335603635710113e-06, "loss": 0.3806, "step": 11391 }, { "epoch": 1.0224361131677548, "grad_norm": 0.4930736930348725, "learning_rate": 8.335214638987464e-06, "loss": 0.4235, "step": 11392 }, { "epoch": 1.022525857620426, "grad_norm": 0.49648344872436956, "learning_rate": 8.334825605891638e-06, "loss": 0.3935, "step": 11393 }, { "epoch": 1.0226156020730968, "grad_norm": 0.5308535692484374, "learning_rate": 8.334436536426876e-06, "loss": 0.4423, "step": 11394 }, { "epoch": 1.0227053465257678, "grad_norm": 0.47294303732640613, "learning_rate": 8.334047430597424e-06, "loss": 0.4646, "step": 11395 }, { "epoch": 1.022795090978439, "grad_norm": 0.5541887153391863, "learning_rate": 8.333658288407522e-06, "loss": 0.4883, "step": 11396 }, { "epoch": 1.02288483543111, "grad_norm": 0.5264332457198672, "learning_rate": 8.333269109861416e-06, "loss": 0.4543, "step": 11397 }, { "epoch": 1.022974579883781, "grad_norm": 0.4937597457264233, "learning_rate": 8.332879894963349e-06, "loss": 0.3836, "step": 11398 }, { "epoch": 1.023064324336452, "grad_norm": 0.4732500997070676, "learning_rate": 8.332490643717569e-06, "loss": 0.3891, "step": 11399 }, { "epoch": 1.023154068789123, "grad_norm": 0.48523819957473296, "learning_rate": 8.332101356128315e-06, "loss": 0.4187, "step": 11400 }, { "epoch": 1.023243813241794, "grad_norm": 0.4828833522798052, "learning_rate": 8.331712032199839e-06, "loss": 0.4058, "step": 11401 }, { "epoch": 1.023333557694465, "grad_norm": 0.4781416960769096, "learning_rate": 8.331322671936381e-06, "loss": 0.3375, "step": 11402 }, { "epoch": 1.023423302147136, "grad_norm": 0.4792088627222052, "learning_rate": 8.330933275342192e-06, "loss": 0.365, "step": 11403 }, { "epoch": 1.023513046599807, "grad_norm": 0.4943202931153259, "learning_rate": 8.330543842421517e-06, "loss": 0.4124, "step": 11404 }, { "epoch": 1.023602791052478, "grad_norm": 0.5017611842040278, "learning_rate": 8.330154373178601e-06, "loss": 0.3698, "step": 11405 }, { "epoch": 1.0236925355051492, "grad_norm": 0.5041201951095208, "learning_rate": 8.329764867617695e-06, "loss": 0.3636, "step": 11406 }, { "epoch": 1.02378227995782, "grad_norm": 0.5008402707380962, "learning_rate": 8.329375325743046e-06, "loss": 0.4663, "step": 11407 }, { "epoch": 1.023872024410491, "grad_norm": 0.4791275190729689, "learning_rate": 8.3289857475589e-06, "loss": 0.383, "step": 11408 }, { "epoch": 1.0239617688631621, "grad_norm": 0.5135357618709547, "learning_rate": 8.328596133069507e-06, "loss": 0.4129, "step": 11409 }, { "epoch": 1.0240515133158332, "grad_norm": 0.48628111134809743, "learning_rate": 8.328206482279117e-06, "loss": 0.4111, "step": 11410 }, { "epoch": 1.0241412577685043, "grad_norm": 0.4931581835770764, "learning_rate": 8.32781679519198e-06, "loss": 0.4132, "step": 11411 }, { "epoch": 1.0242310022211751, "grad_norm": 0.478510197915143, "learning_rate": 8.327427071812342e-06, "loss": 0.3932, "step": 11412 }, { "epoch": 1.0243207466738462, "grad_norm": 0.48550853327981575, "learning_rate": 8.327037312144458e-06, "loss": 0.4473, "step": 11413 }, { "epoch": 1.0244104911265173, "grad_norm": 0.5374917878552936, "learning_rate": 8.326647516192576e-06, "loss": 0.4255, "step": 11414 }, { "epoch": 1.0245002355791883, "grad_norm": 0.47834428113045324, "learning_rate": 8.326257683960947e-06, "loss": 0.4086, "step": 11415 }, { "epoch": 1.0245899800318592, "grad_norm": 0.4771876587224208, "learning_rate": 8.325867815453823e-06, "loss": 0.4329, "step": 11416 }, { "epoch": 1.0246797244845303, "grad_norm": 0.5329704352643527, "learning_rate": 8.325477910675456e-06, "loss": 0.4131, "step": 11417 }, { "epoch": 1.0247694689372013, "grad_norm": 0.5387790561648202, "learning_rate": 8.325087969630096e-06, "loss": 0.4444, "step": 11418 }, { "epoch": 1.0248592133898724, "grad_norm": 0.4731458110895897, "learning_rate": 8.324697992322e-06, "loss": 0.4419, "step": 11419 }, { "epoch": 1.0249489578425433, "grad_norm": 0.5232029893489263, "learning_rate": 8.324307978755417e-06, "loss": 0.4631, "step": 11420 }, { "epoch": 1.0250387022952143, "grad_norm": 0.5154473128151097, "learning_rate": 8.323917928934604e-06, "loss": 0.437, "step": 11421 }, { "epoch": 1.0251284467478854, "grad_norm": 0.4393033644234219, "learning_rate": 8.323527842863812e-06, "loss": 0.419, "step": 11422 }, { "epoch": 1.0252181912005565, "grad_norm": 0.5520479419855471, "learning_rate": 8.323137720547296e-06, "loss": 0.42, "step": 11423 }, { "epoch": 1.0253079356532275, "grad_norm": 0.4806010060878027, "learning_rate": 8.322747561989309e-06, "loss": 0.401, "step": 11424 }, { "epoch": 1.0253976801058984, "grad_norm": 0.47597066145038486, "learning_rate": 8.32235736719411e-06, "loss": 0.3674, "step": 11425 }, { "epoch": 1.0254874245585694, "grad_norm": 0.463662245315041, "learning_rate": 8.321967136165951e-06, "loss": 0.4263, "step": 11426 }, { "epoch": 1.0255771690112405, "grad_norm": 0.4904948855057875, "learning_rate": 8.321576868909087e-06, "loss": 0.4028, "step": 11427 }, { "epoch": 1.0256669134639116, "grad_norm": 0.5304233092276093, "learning_rate": 8.321186565427778e-06, "loss": 0.4423, "step": 11428 }, { "epoch": 1.0257566579165824, "grad_norm": 0.5477800759040267, "learning_rate": 8.320796225726278e-06, "loss": 0.4296, "step": 11429 }, { "epoch": 1.0258464023692535, "grad_norm": 0.5121973484193192, "learning_rate": 8.320405849808847e-06, "loss": 0.4988, "step": 11430 }, { "epoch": 1.0259361468219246, "grad_norm": 0.5182805330087357, "learning_rate": 8.320015437679736e-06, "loss": 0.4043, "step": 11431 }, { "epoch": 1.0260258912745956, "grad_norm": 0.502299742271599, "learning_rate": 8.319624989343207e-06, "loss": 0.465, "step": 11432 }, { "epoch": 1.0261156357272667, "grad_norm": 0.4992720824288106, "learning_rate": 8.31923450480352e-06, "loss": 0.4359, "step": 11433 }, { "epoch": 1.0262053801799376, "grad_norm": 0.537986563431516, "learning_rate": 8.31884398406493e-06, "loss": 0.4667, "step": 11434 }, { "epoch": 1.0262951246326086, "grad_norm": 0.4955018616376691, "learning_rate": 8.318453427131696e-06, "loss": 0.3751, "step": 11435 }, { "epoch": 1.0263848690852797, "grad_norm": 0.46905048702304375, "learning_rate": 8.318062834008081e-06, "loss": 0.3817, "step": 11436 }, { "epoch": 1.0264746135379508, "grad_norm": 0.475528923114968, "learning_rate": 8.31767220469834e-06, "loss": 0.4583, "step": 11437 }, { "epoch": 1.0265643579906216, "grad_norm": 0.5404420377187481, "learning_rate": 8.317281539206736e-06, "loss": 0.3728, "step": 11438 }, { "epoch": 1.0266541024432927, "grad_norm": 0.49849286391936265, "learning_rate": 8.31689083753753e-06, "loss": 0.4331, "step": 11439 }, { "epoch": 1.0267438468959638, "grad_norm": 0.5245919137291158, "learning_rate": 8.316500099694982e-06, "loss": 0.3581, "step": 11440 }, { "epoch": 1.0268335913486348, "grad_norm": 0.47633572119265233, "learning_rate": 8.316109325683351e-06, "loss": 0.3487, "step": 11441 }, { "epoch": 1.0269233358013057, "grad_norm": 0.43194629785943733, "learning_rate": 8.315718515506903e-06, "loss": 0.3428, "step": 11442 }, { "epoch": 1.0270130802539768, "grad_norm": 0.4534825196792488, "learning_rate": 8.315327669169896e-06, "loss": 0.424, "step": 11443 }, { "epoch": 1.0271028247066478, "grad_norm": 0.5685169558873476, "learning_rate": 8.314936786676597e-06, "loss": 0.4708, "step": 11444 }, { "epoch": 1.027192569159319, "grad_norm": 0.5021018752725169, "learning_rate": 8.314545868031265e-06, "loss": 0.4027, "step": 11445 }, { "epoch": 1.02728231361199, "grad_norm": 0.5476404981016141, "learning_rate": 8.314154913238164e-06, "loss": 0.4118, "step": 11446 }, { "epoch": 1.0273720580646608, "grad_norm": 0.4609314870436505, "learning_rate": 8.313763922301559e-06, "loss": 0.3949, "step": 11447 }, { "epoch": 1.0274618025173319, "grad_norm": 0.5290959325027373, "learning_rate": 8.313372895225713e-06, "loss": 0.4475, "step": 11448 }, { "epoch": 1.027551546970003, "grad_norm": 0.46984750855383534, "learning_rate": 8.312981832014892e-06, "loss": 0.347, "step": 11449 }, { "epoch": 1.027641291422674, "grad_norm": 0.4842027499040006, "learning_rate": 8.312590732673359e-06, "loss": 0.4193, "step": 11450 }, { "epoch": 1.0277310358753449, "grad_norm": 0.4748659507418834, "learning_rate": 8.31219959720538e-06, "loss": 0.3412, "step": 11451 }, { "epoch": 1.027820780328016, "grad_norm": 0.4595305837600578, "learning_rate": 8.31180842561522e-06, "loss": 0.4213, "step": 11452 }, { "epoch": 1.027910524780687, "grad_norm": 0.47469774186846464, "learning_rate": 8.311417217907145e-06, "loss": 0.3166, "step": 11453 }, { "epoch": 1.028000269233358, "grad_norm": 0.4607772116065953, "learning_rate": 8.311025974085423e-06, "loss": 0.3923, "step": 11454 }, { "epoch": 1.028090013686029, "grad_norm": 0.4509146233379243, "learning_rate": 8.31063469415432e-06, "loss": 0.4289, "step": 11455 }, { "epoch": 1.0281797581387, "grad_norm": 0.5611697005327643, "learning_rate": 8.310243378118103e-06, "loss": 0.4026, "step": 11456 }, { "epoch": 1.028269502591371, "grad_norm": 0.5065802645429143, "learning_rate": 8.309852025981038e-06, "loss": 0.381, "step": 11457 }, { "epoch": 1.0283592470440421, "grad_norm": 0.4958617337535894, "learning_rate": 8.309460637747397e-06, "loss": 0.4845, "step": 11458 }, { "epoch": 1.0284489914967132, "grad_norm": 0.5440714250211154, "learning_rate": 8.309069213421445e-06, "loss": 0.45, "step": 11459 }, { "epoch": 1.028538735949384, "grad_norm": 0.5090006310764339, "learning_rate": 8.308677753007453e-06, "loss": 0.3725, "step": 11460 }, { "epoch": 1.0286284804020551, "grad_norm": 0.47915701764426244, "learning_rate": 8.308286256509688e-06, "loss": 0.4028, "step": 11461 }, { "epoch": 1.0287182248547262, "grad_norm": 0.5544112468046979, "learning_rate": 8.307894723932422e-06, "loss": 0.4289, "step": 11462 }, { "epoch": 1.0288079693073973, "grad_norm": 0.4976106270092681, "learning_rate": 8.307503155279921e-06, "loss": 0.4153, "step": 11463 }, { "epoch": 1.0288977137600681, "grad_norm": 0.474130592248271, "learning_rate": 8.307111550556461e-06, "loss": 0.4164, "step": 11464 }, { "epoch": 1.0289874582127392, "grad_norm": 0.5221862672672889, "learning_rate": 8.306719909766308e-06, "loss": 0.3827, "step": 11465 }, { "epoch": 1.0290772026654103, "grad_norm": 0.5166188596725879, "learning_rate": 8.306328232913735e-06, "loss": 0.4495, "step": 11466 }, { "epoch": 1.0291669471180813, "grad_norm": 0.465677542145551, "learning_rate": 8.305936520003014e-06, "loss": 0.3898, "step": 11467 }, { "epoch": 1.0292566915707524, "grad_norm": 0.48632968321480985, "learning_rate": 8.305544771038416e-06, "loss": 0.363, "step": 11468 }, { "epoch": 1.0293464360234232, "grad_norm": 0.48409042335093244, "learning_rate": 8.305152986024213e-06, "loss": 0.3649, "step": 11469 }, { "epoch": 1.0294361804760943, "grad_norm": 0.45301154986928516, "learning_rate": 8.304761164964681e-06, "loss": 0.4515, "step": 11470 }, { "epoch": 1.0295259249287654, "grad_norm": 0.5247365380175515, "learning_rate": 8.304369307864089e-06, "loss": 0.4416, "step": 11471 }, { "epoch": 1.0296156693814364, "grad_norm": 0.4912752023050559, "learning_rate": 8.303977414726712e-06, "loss": 0.3539, "step": 11472 }, { "epoch": 1.0297054138341073, "grad_norm": 0.4509474371697922, "learning_rate": 8.303585485556826e-06, "loss": 0.4495, "step": 11473 }, { "epoch": 1.0297951582867784, "grad_norm": 0.4903806578863507, "learning_rate": 8.3031935203587e-06, "loss": 0.397, "step": 11474 }, { "epoch": 1.0298849027394494, "grad_norm": 0.5241255570406858, "learning_rate": 8.302801519136614e-06, "loss": 0.448, "step": 11475 }, { "epoch": 1.0299746471921205, "grad_norm": 0.47830675512379967, "learning_rate": 8.302409481894841e-06, "loss": 0.3671, "step": 11476 }, { "epoch": 1.0300643916447914, "grad_norm": 0.49352158962030757, "learning_rate": 8.302017408637656e-06, "loss": 0.4175, "step": 11477 }, { "epoch": 1.0301541360974624, "grad_norm": 0.502558273251193, "learning_rate": 8.301625299369335e-06, "loss": 0.404, "step": 11478 }, { "epoch": 1.0302438805501335, "grad_norm": 0.49232794298218924, "learning_rate": 8.301233154094158e-06, "loss": 0.3658, "step": 11479 }, { "epoch": 1.0303336250028046, "grad_norm": 0.446405791558752, "learning_rate": 8.300840972816396e-06, "loss": 0.4293, "step": 11480 }, { "epoch": 1.0304233694554756, "grad_norm": 0.5013937894489973, "learning_rate": 8.300448755540328e-06, "loss": 0.4039, "step": 11481 }, { "epoch": 1.0305131139081465, "grad_norm": 0.4987049635876817, "learning_rate": 8.300056502270231e-06, "loss": 0.42, "step": 11482 }, { "epoch": 1.0306028583608176, "grad_norm": 0.48172527823130634, "learning_rate": 8.299664213010386e-06, "loss": 0.3693, "step": 11483 }, { "epoch": 1.0306926028134886, "grad_norm": 0.48088734552509343, "learning_rate": 8.299271887765069e-06, "loss": 0.4552, "step": 11484 }, { "epoch": 1.0307823472661597, "grad_norm": 0.5063563515549291, "learning_rate": 8.298879526538557e-06, "loss": 0.3776, "step": 11485 }, { "epoch": 1.0308720917188305, "grad_norm": 0.44426364704202703, "learning_rate": 8.298487129335129e-06, "loss": 0.3961, "step": 11486 }, { "epoch": 1.0309618361715016, "grad_norm": 0.4988131516094942, "learning_rate": 8.298094696159069e-06, "loss": 0.4643, "step": 11487 }, { "epoch": 1.0310515806241727, "grad_norm": 0.5064663934470484, "learning_rate": 8.297702227014653e-06, "loss": 0.3945, "step": 11488 }, { "epoch": 1.0311413250768438, "grad_norm": 0.5116109083044761, "learning_rate": 8.297309721906161e-06, "loss": 0.4109, "step": 11489 }, { "epoch": 1.0312310695295146, "grad_norm": 0.48329355548852254, "learning_rate": 8.296917180837875e-06, "loss": 0.4136, "step": 11490 }, { "epoch": 1.0313208139821857, "grad_norm": 0.4792461544167989, "learning_rate": 8.296524603814077e-06, "loss": 0.3877, "step": 11491 }, { "epoch": 1.0314105584348567, "grad_norm": 0.5299804023365852, "learning_rate": 8.296131990839045e-06, "loss": 0.4806, "step": 11492 }, { "epoch": 1.0315003028875278, "grad_norm": 0.5209309363870579, "learning_rate": 8.295739341917064e-06, "loss": 0.4473, "step": 11493 }, { "epoch": 1.0315900473401989, "grad_norm": 0.5273167027176082, "learning_rate": 8.295346657052415e-06, "loss": 0.5097, "step": 11494 }, { "epoch": 1.0316797917928697, "grad_norm": 0.48545173962247234, "learning_rate": 8.294953936249378e-06, "loss": 0.4139, "step": 11495 }, { "epoch": 1.0317695362455408, "grad_norm": 0.47431751012935974, "learning_rate": 8.29456117951224e-06, "loss": 0.374, "step": 11496 }, { "epoch": 1.0318592806982119, "grad_norm": 0.5104188629827698, "learning_rate": 8.294168386845283e-06, "loss": 0.4503, "step": 11497 }, { "epoch": 1.031949025150883, "grad_norm": 0.47789917278820926, "learning_rate": 8.293775558252788e-06, "loss": 0.4011, "step": 11498 }, { "epoch": 1.0320387696035538, "grad_norm": 0.5043883612699421, "learning_rate": 8.293382693739045e-06, "loss": 0.4181, "step": 11499 }, { "epoch": 1.0321285140562249, "grad_norm": 0.5173366572865588, "learning_rate": 8.292989793308332e-06, "loss": 0.4806, "step": 11500 }, { "epoch": 1.032218258508896, "grad_norm": 0.4944654859134638, "learning_rate": 8.292596856964939e-06, "loss": 0.3892, "step": 11501 }, { "epoch": 1.032308002961567, "grad_norm": 0.47297206514815815, "learning_rate": 8.292203884713147e-06, "loss": 0.4117, "step": 11502 }, { "epoch": 1.032397747414238, "grad_norm": 0.5259436558580268, "learning_rate": 8.291810876557244e-06, "loss": 0.4452, "step": 11503 }, { "epoch": 1.032487491866909, "grad_norm": 0.4926078687162778, "learning_rate": 8.291417832501517e-06, "loss": 0.362, "step": 11504 }, { "epoch": 1.03257723631958, "grad_norm": 0.475239452501053, "learning_rate": 8.29102475255025e-06, "loss": 0.3762, "step": 11505 }, { "epoch": 1.032666980772251, "grad_norm": 0.4759769106871371, "learning_rate": 8.290631636707734e-06, "loss": 0.3991, "step": 11506 }, { "epoch": 1.0327567252249221, "grad_norm": 0.5220641239512751, "learning_rate": 8.29023848497825e-06, "loss": 0.42, "step": 11507 }, { "epoch": 1.032846469677593, "grad_norm": 0.4832112060741, "learning_rate": 8.289845297366091e-06, "loss": 0.3757, "step": 11508 }, { "epoch": 1.032936214130264, "grad_norm": 0.4631107651419568, "learning_rate": 8.289452073875542e-06, "loss": 0.4303, "step": 11509 }, { "epoch": 1.033025958582935, "grad_norm": 0.4625870670953198, "learning_rate": 8.289058814510894e-06, "loss": 0.3757, "step": 11510 }, { "epoch": 1.0331157030356062, "grad_norm": 0.4705682085527687, "learning_rate": 8.288665519276433e-06, "loss": 0.3666, "step": 11511 }, { "epoch": 1.033205447488277, "grad_norm": 0.47521401413955444, "learning_rate": 8.28827218817645e-06, "loss": 0.3399, "step": 11512 }, { "epoch": 1.033295191940948, "grad_norm": 0.467739579982879, "learning_rate": 8.287878821215233e-06, "loss": 0.4522, "step": 11513 }, { "epoch": 1.0333849363936192, "grad_norm": 0.5216259873778073, "learning_rate": 8.287485418397075e-06, "loss": 0.4959, "step": 11514 }, { "epoch": 1.0334746808462902, "grad_norm": 0.5073234309812732, "learning_rate": 8.287091979726262e-06, "loss": 0.345, "step": 11515 }, { "epoch": 1.0335644252989613, "grad_norm": 0.48750762834931466, "learning_rate": 8.28669850520709e-06, "loss": 0.4221, "step": 11516 }, { "epoch": 1.0336541697516322, "grad_norm": 0.4737364308633755, "learning_rate": 8.286304994843844e-06, "loss": 0.3666, "step": 11517 }, { "epoch": 1.0337439142043032, "grad_norm": 0.4803028421906241, "learning_rate": 8.285911448640819e-06, "loss": 0.3294, "step": 11518 }, { "epoch": 1.0338336586569743, "grad_norm": 0.45791746791947147, "learning_rate": 8.28551786660231e-06, "loss": 0.477, "step": 11519 }, { "epoch": 1.0339234031096454, "grad_norm": 0.5615287796808879, "learning_rate": 8.285124248732604e-06, "loss": 0.4385, "step": 11520 }, { "epoch": 1.0340131475623162, "grad_norm": 0.520156254894962, "learning_rate": 8.284730595035996e-06, "loss": 0.4534, "step": 11521 }, { "epoch": 1.0341028920149873, "grad_norm": 0.5020399092289693, "learning_rate": 8.28433690551678e-06, "loss": 0.4075, "step": 11522 }, { "epoch": 1.0341926364676584, "grad_norm": 0.4971794559598562, "learning_rate": 8.283943180179247e-06, "loss": 0.4521, "step": 11523 }, { "epoch": 1.0342823809203294, "grad_norm": 0.527220602500922, "learning_rate": 8.283549419027694e-06, "loss": 0.387, "step": 11524 }, { "epoch": 1.0343721253730003, "grad_norm": 0.4929226209867686, "learning_rate": 8.283155622066412e-06, "loss": 0.3661, "step": 11525 }, { "epoch": 1.0344618698256713, "grad_norm": 0.4677777525715866, "learning_rate": 8.2827617892997e-06, "loss": 0.3665, "step": 11526 }, { "epoch": 1.0345516142783424, "grad_norm": 0.47156949846087254, "learning_rate": 8.282367920731846e-06, "loss": 0.445, "step": 11527 }, { "epoch": 1.0346413587310135, "grad_norm": 0.4888704707163619, "learning_rate": 8.281974016367153e-06, "loss": 0.3792, "step": 11528 }, { "epoch": 1.0347311031836846, "grad_norm": 0.476981997626384, "learning_rate": 8.281580076209915e-06, "loss": 0.4132, "step": 11529 }, { "epoch": 1.0348208476363554, "grad_norm": 0.47157830475864365, "learning_rate": 8.281186100264423e-06, "loss": 0.3841, "step": 11530 }, { "epoch": 1.0349105920890265, "grad_norm": 0.46984970116473324, "learning_rate": 8.280792088534979e-06, "loss": 0.3616, "step": 11531 }, { "epoch": 1.0350003365416975, "grad_norm": 0.4483656871110552, "learning_rate": 8.28039804102588e-06, "loss": 0.4106, "step": 11532 }, { "epoch": 1.0350900809943686, "grad_norm": 0.4640302406517542, "learning_rate": 8.28000395774142e-06, "loss": 0.4025, "step": 11533 }, { "epoch": 1.0351798254470395, "grad_norm": 0.4750870430956397, "learning_rate": 8.2796098386859e-06, "loss": 0.3465, "step": 11534 }, { "epoch": 1.0352695698997105, "grad_norm": 0.47036681214527754, "learning_rate": 8.279215683863617e-06, "loss": 0.3021, "step": 11535 }, { "epoch": 1.0353593143523816, "grad_norm": 0.4444856517159727, "learning_rate": 8.278821493278868e-06, "loss": 0.4164, "step": 11536 }, { "epoch": 1.0354490588050527, "grad_norm": 0.5086561246574861, "learning_rate": 8.278427266935954e-06, "loss": 0.4529, "step": 11537 }, { "epoch": 1.0355388032577237, "grad_norm": 0.4842749170921911, "learning_rate": 8.278033004839175e-06, "loss": 0.3837, "step": 11538 }, { "epoch": 1.0356285477103946, "grad_norm": 0.493606480102832, "learning_rate": 8.277638706992828e-06, "loss": 0.4639, "step": 11539 }, { "epoch": 1.0357182921630657, "grad_norm": 0.4998923403229861, "learning_rate": 8.277244373401215e-06, "loss": 0.3838, "step": 11540 }, { "epoch": 1.0358080366157367, "grad_norm": 0.5082241350340818, "learning_rate": 8.276850004068637e-06, "loss": 0.4161, "step": 11541 }, { "epoch": 1.0358977810684078, "grad_norm": 0.5127811071673207, "learning_rate": 8.276455598999393e-06, "loss": 0.4899, "step": 11542 }, { "epoch": 1.0359875255210786, "grad_norm": 0.5275800767157763, "learning_rate": 8.276061158197786e-06, "loss": 0.3528, "step": 11543 }, { "epoch": 1.0360772699737497, "grad_norm": 0.4852574808841579, "learning_rate": 8.275666681668118e-06, "loss": 0.4051, "step": 11544 }, { "epoch": 1.0361670144264208, "grad_norm": 0.46634053214983834, "learning_rate": 8.275272169414687e-06, "loss": 0.3459, "step": 11545 }, { "epoch": 1.0362567588790919, "grad_norm": 0.47118793745160625, "learning_rate": 8.274877621441803e-06, "loss": 0.4027, "step": 11546 }, { "epoch": 1.0363465033317627, "grad_norm": 0.5300168835338381, "learning_rate": 8.274483037753761e-06, "loss": 0.4227, "step": 11547 }, { "epoch": 1.0364362477844338, "grad_norm": 0.4911677284057476, "learning_rate": 8.27408841835487e-06, "loss": 0.3837, "step": 11548 }, { "epoch": 1.0365259922371048, "grad_norm": 0.4783328667756348, "learning_rate": 8.273693763249431e-06, "loss": 0.3733, "step": 11549 }, { "epoch": 1.036615736689776, "grad_norm": 0.4695046625913438, "learning_rate": 8.273299072441747e-06, "loss": 0.4388, "step": 11550 }, { "epoch": 1.036705481142447, "grad_norm": 0.537862935082102, "learning_rate": 8.272904345936124e-06, "loss": 0.4201, "step": 11551 }, { "epoch": 1.0367952255951178, "grad_norm": 0.4878953275658001, "learning_rate": 8.272509583736868e-06, "loss": 0.3997, "step": 11552 }, { "epoch": 1.036884970047789, "grad_norm": 0.4766595181918026, "learning_rate": 8.272114785848281e-06, "loss": 0.3154, "step": 11553 }, { "epoch": 1.03697471450046, "grad_norm": 0.47563282192728207, "learning_rate": 8.271719952274673e-06, "loss": 0.4331, "step": 11554 }, { "epoch": 1.037064458953131, "grad_norm": 0.5153376271807975, "learning_rate": 8.271325083020345e-06, "loss": 0.4314, "step": 11555 }, { "epoch": 1.0371542034058019, "grad_norm": 0.4699070919732211, "learning_rate": 8.270930178089608e-06, "loss": 0.3811, "step": 11556 }, { "epoch": 1.037243947858473, "grad_norm": 0.5081406584906195, "learning_rate": 8.270535237486765e-06, "loss": 0.411, "step": 11557 }, { "epoch": 1.037333692311144, "grad_norm": 0.5173411268135292, "learning_rate": 8.270140261216126e-06, "loss": 0.4347, "step": 11558 }, { "epoch": 1.037423436763815, "grad_norm": 0.5226533476282447, "learning_rate": 8.269745249281996e-06, "loss": 0.4351, "step": 11559 }, { "epoch": 1.037513181216486, "grad_norm": 0.44623578400122377, "learning_rate": 8.269350201688684e-06, "loss": 0.3405, "step": 11560 }, { "epoch": 1.037602925669157, "grad_norm": 0.5191543862370829, "learning_rate": 8.2689551184405e-06, "loss": 0.4493, "step": 11561 }, { "epoch": 1.037692670121828, "grad_norm": 0.5271356926898331, "learning_rate": 8.26855999954175e-06, "loss": 0.3972, "step": 11562 }, { "epoch": 1.0377824145744992, "grad_norm": 0.46435484292810764, "learning_rate": 8.268164844996744e-06, "loss": 0.4107, "step": 11563 }, { "epoch": 1.0378721590271702, "grad_norm": 0.5130962731105204, "learning_rate": 8.267769654809792e-06, "loss": 0.3778, "step": 11564 }, { "epoch": 1.037961903479841, "grad_norm": 0.5008080585060637, "learning_rate": 8.267374428985202e-06, "loss": 0.4474, "step": 11565 }, { "epoch": 1.0380516479325121, "grad_norm": 0.5172040690775888, "learning_rate": 8.266979167527287e-06, "loss": 0.3636, "step": 11566 }, { "epoch": 1.0381413923851832, "grad_norm": 0.4673952443342788, "learning_rate": 8.266583870440357e-06, "loss": 0.3822, "step": 11567 }, { "epoch": 1.0382311368378543, "grad_norm": 0.45948239164573257, "learning_rate": 8.266188537728724e-06, "loss": 0.4097, "step": 11568 }, { "epoch": 1.0383208812905251, "grad_norm": 0.5148822112507836, "learning_rate": 8.265793169396695e-06, "loss": 0.3861, "step": 11569 }, { "epoch": 1.0384106257431962, "grad_norm": 0.5097217027330739, "learning_rate": 8.265397765448586e-06, "loss": 0.3873, "step": 11570 }, { "epoch": 1.0385003701958673, "grad_norm": 0.5083506549615109, "learning_rate": 8.265002325888706e-06, "loss": 0.4121, "step": 11571 }, { "epoch": 1.0385901146485383, "grad_norm": 0.4643325097877188, "learning_rate": 8.264606850721373e-06, "loss": 0.3873, "step": 11572 }, { "epoch": 1.0386798591012094, "grad_norm": 0.5314109413025468, "learning_rate": 8.264211339950895e-06, "loss": 0.4112, "step": 11573 }, { "epoch": 1.0387696035538803, "grad_norm": 0.5004968304369353, "learning_rate": 8.263815793581585e-06, "loss": 0.4153, "step": 11574 }, { "epoch": 1.0388593480065513, "grad_norm": 0.4602946370473446, "learning_rate": 8.26342021161776e-06, "loss": 0.4193, "step": 11575 }, { "epoch": 1.0389490924592224, "grad_norm": 0.4894326435796687, "learning_rate": 8.263024594063733e-06, "loss": 0.4448, "step": 11576 }, { "epoch": 1.0390388369118935, "grad_norm": 0.5371663904272229, "learning_rate": 8.262628940923818e-06, "loss": 0.385, "step": 11577 }, { "epoch": 1.0391285813645643, "grad_norm": 0.48451677368588725, "learning_rate": 8.26223325220233e-06, "loss": 0.4171, "step": 11578 }, { "epoch": 1.0392183258172354, "grad_norm": 0.5156592885409571, "learning_rate": 8.261837527903585e-06, "loss": 0.4458, "step": 11579 }, { "epoch": 1.0393080702699065, "grad_norm": 0.4907498662204379, "learning_rate": 8.261441768031898e-06, "loss": 0.3787, "step": 11580 }, { "epoch": 1.0393978147225775, "grad_norm": 0.4523239619106728, "learning_rate": 8.261045972591584e-06, "loss": 0.3811, "step": 11581 }, { "epoch": 1.0394875591752484, "grad_norm": 0.4560671702708709, "learning_rate": 8.260650141586961e-06, "loss": 0.3297, "step": 11582 }, { "epoch": 1.0395773036279194, "grad_norm": 0.44832323197969637, "learning_rate": 8.260254275022347e-06, "loss": 0.364, "step": 11583 }, { "epoch": 1.0396670480805905, "grad_norm": 0.4804480477544568, "learning_rate": 8.259858372902056e-06, "loss": 0.3808, "step": 11584 }, { "epoch": 1.0397567925332616, "grad_norm": 0.49093865965027955, "learning_rate": 8.25946243523041e-06, "loss": 0.366, "step": 11585 }, { "epoch": 1.0398465369859327, "grad_norm": 0.48768927117334177, "learning_rate": 8.259066462011721e-06, "loss": 0.4082, "step": 11586 }, { "epoch": 1.0399362814386035, "grad_norm": 0.5002457291983554, "learning_rate": 8.258670453250312e-06, "loss": 0.4796, "step": 11587 }, { "epoch": 1.0400260258912746, "grad_norm": 0.5169063059570764, "learning_rate": 8.258274408950501e-06, "loss": 0.3526, "step": 11588 }, { "epoch": 1.0401157703439456, "grad_norm": 0.4501170994434721, "learning_rate": 8.257878329116609e-06, "loss": 0.4366, "step": 11589 }, { "epoch": 1.0402055147966167, "grad_norm": 0.5124271396433717, "learning_rate": 8.25748221375295e-06, "loss": 0.4099, "step": 11590 }, { "epoch": 1.0402952592492876, "grad_norm": 0.47171027380969327, "learning_rate": 8.257086062863849e-06, "loss": 0.4191, "step": 11591 }, { "epoch": 1.0403850037019586, "grad_norm": 0.49149007083845303, "learning_rate": 8.256689876453624e-06, "loss": 0.433, "step": 11592 }, { "epoch": 1.0404747481546297, "grad_norm": 0.5279652713453974, "learning_rate": 8.256293654526597e-06, "loss": 0.3705, "step": 11593 }, { "epoch": 1.0405644926073008, "grad_norm": 0.4732140493938443, "learning_rate": 8.255897397087087e-06, "loss": 0.3982, "step": 11594 }, { "epoch": 1.0406542370599716, "grad_norm": 0.49608606073056255, "learning_rate": 8.255501104139418e-06, "loss": 0.4068, "step": 11595 }, { "epoch": 1.0407439815126427, "grad_norm": 0.569815856028836, "learning_rate": 8.255104775687912e-06, "loss": 0.4654, "step": 11596 }, { "epoch": 1.0408337259653138, "grad_norm": 0.46848594502662, "learning_rate": 8.254708411736889e-06, "loss": 0.3821, "step": 11597 }, { "epoch": 1.0409234704179848, "grad_norm": 0.4908368879865591, "learning_rate": 8.254312012290673e-06, "loss": 0.3658, "step": 11598 }, { "epoch": 1.041013214870656, "grad_norm": 0.4727684126285714, "learning_rate": 8.253915577353586e-06, "loss": 0.3993, "step": 11599 }, { "epoch": 1.0411029593233267, "grad_norm": 0.5052600672986917, "learning_rate": 8.253519106929954e-06, "loss": 0.3894, "step": 11600 }, { "epoch": 1.0411927037759978, "grad_norm": 0.47214251976463667, "learning_rate": 8.253122601024099e-06, "loss": 0.4188, "step": 11601 }, { "epoch": 1.0412824482286689, "grad_norm": 0.510345702445879, "learning_rate": 8.252726059640345e-06, "loss": 0.4062, "step": 11602 }, { "epoch": 1.04137219268134, "grad_norm": 0.45542473328722594, "learning_rate": 8.252329482783016e-06, "loss": 0.3487, "step": 11603 }, { "epoch": 1.0414619371340108, "grad_norm": 0.4951258964828036, "learning_rate": 8.251932870456438e-06, "loss": 0.3771, "step": 11604 }, { "epoch": 1.0415516815866819, "grad_norm": 0.447948496104479, "learning_rate": 8.251536222664937e-06, "loss": 0.3609, "step": 11605 }, { "epoch": 1.041641426039353, "grad_norm": 0.49510572266335723, "learning_rate": 8.251139539412839e-06, "loss": 0.3527, "step": 11606 }, { "epoch": 1.041731170492024, "grad_norm": 0.4487878655903117, "learning_rate": 8.250742820704467e-06, "loss": 0.4087, "step": 11607 }, { "epoch": 1.041820914944695, "grad_norm": 0.5174290342566917, "learning_rate": 8.250346066544152e-06, "loss": 0.3871, "step": 11608 }, { "epoch": 1.041910659397366, "grad_norm": 0.4680783232279631, "learning_rate": 8.249949276936217e-06, "loss": 0.3907, "step": 11609 }, { "epoch": 1.042000403850037, "grad_norm": 0.5704049368497267, "learning_rate": 8.249552451884993e-06, "loss": 0.4525, "step": 11610 }, { "epoch": 1.042090148302708, "grad_norm": 0.480630823776976, "learning_rate": 8.249155591394805e-06, "loss": 0.3694, "step": 11611 }, { "epoch": 1.0421798927553791, "grad_norm": 0.4834214988423184, "learning_rate": 8.24875869546998e-06, "loss": 0.4662, "step": 11612 }, { "epoch": 1.04226963720805, "grad_norm": 0.5040973487586099, "learning_rate": 8.24836176411485e-06, "loss": 0.4165, "step": 11613 }, { "epoch": 1.042359381660721, "grad_norm": 0.46771125678443143, "learning_rate": 8.247964797333741e-06, "loss": 0.3621, "step": 11614 }, { "epoch": 1.0424491261133921, "grad_norm": 0.5016543937920719, "learning_rate": 8.247567795130983e-06, "loss": 0.4675, "step": 11615 }, { "epoch": 1.0425388705660632, "grad_norm": 0.48276583879881585, "learning_rate": 8.247170757510908e-06, "loss": 0.4404, "step": 11616 }, { "epoch": 1.042628615018734, "grad_norm": 0.5098586946049657, "learning_rate": 8.246773684477843e-06, "loss": 0.4017, "step": 11617 }, { "epoch": 1.0427183594714051, "grad_norm": 0.5056111986925346, "learning_rate": 8.246376576036118e-06, "loss": 0.3939, "step": 11618 }, { "epoch": 1.0428081039240762, "grad_norm": 0.509269889094463, "learning_rate": 8.245979432190069e-06, "loss": 0.4369, "step": 11619 }, { "epoch": 1.0428978483767473, "grad_norm": 0.5088920580637252, "learning_rate": 8.24558225294402e-06, "loss": 0.3766, "step": 11620 }, { "epoch": 1.0429875928294183, "grad_norm": 0.4804272677938909, "learning_rate": 8.245185038302306e-06, "loss": 0.4293, "step": 11621 }, { "epoch": 1.0430773372820892, "grad_norm": 0.5092033725568539, "learning_rate": 8.24478778826926e-06, "loss": 0.3784, "step": 11622 }, { "epoch": 1.0431670817347602, "grad_norm": 0.5067261313602905, "learning_rate": 8.244390502849211e-06, "loss": 0.4288, "step": 11623 }, { "epoch": 1.0432568261874313, "grad_norm": 0.5128280686627914, "learning_rate": 8.243993182046496e-06, "loss": 0.3905, "step": 11624 }, { "epoch": 1.0433465706401024, "grad_norm": 0.469172015511293, "learning_rate": 8.243595825865445e-06, "loss": 0.3476, "step": 11625 }, { "epoch": 1.0434363150927732, "grad_norm": 0.4777213185902179, "learning_rate": 8.243198434310393e-06, "loss": 0.4102, "step": 11626 }, { "epoch": 1.0435260595454443, "grad_norm": 0.5169685498005888, "learning_rate": 8.242801007385673e-06, "loss": 0.4435, "step": 11627 }, { "epoch": 1.0436158039981154, "grad_norm": 0.5060269121304762, "learning_rate": 8.242403545095621e-06, "loss": 0.3824, "step": 11628 }, { "epoch": 1.0437055484507864, "grad_norm": 0.4760235611782335, "learning_rate": 8.242006047444568e-06, "loss": 0.3855, "step": 11629 }, { "epoch": 1.0437952929034573, "grad_norm": 0.4969934844331919, "learning_rate": 8.241608514436852e-06, "loss": 0.4695, "step": 11630 }, { "epoch": 1.0438850373561284, "grad_norm": 0.5683850444224618, "learning_rate": 8.241210946076809e-06, "loss": 0.425, "step": 11631 }, { "epoch": 1.0439747818087994, "grad_norm": 0.5045725790696253, "learning_rate": 8.240813342368772e-06, "loss": 0.4203, "step": 11632 }, { "epoch": 1.0440645262614705, "grad_norm": 0.5060866108864516, "learning_rate": 8.240415703317078e-06, "loss": 0.3671, "step": 11633 }, { "epoch": 1.0441542707141416, "grad_norm": 0.4679082977893115, "learning_rate": 8.240018028926067e-06, "loss": 0.3966, "step": 11634 }, { "epoch": 1.0442440151668124, "grad_norm": 0.5018666413998359, "learning_rate": 8.23962031920007e-06, "loss": 0.4168, "step": 11635 }, { "epoch": 1.0443337596194835, "grad_norm": 0.5116360067673054, "learning_rate": 8.23922257414343e-06, "loss": 0.3759, "step": 11636 }, { "epoch": 1.0444235040721546, "grad_norm": 0.5077787201798007, "learning_rate": 8.23882479376048e-06, "loss": 0.3977, "step": 11637 }, { "epoch": 1.0445132485248256, "grad_norm": 0.5626015199152135, "learning_rate": 8.238426978055563e-06, "loss": 0.4297, "step": 11638 }, { "epoch": 1.0446029929774965, "grad_norm": 0.5050053756182887, "learning_rate": 8.238029127033013e-06, "loss": 0.3699, "step": 11639 }, { "epoch": 1.0446927374301676, "grad_norm": 0.49802136876410596, "learning_rate": 8.237631240697172e-06, "loss": 0.4497, "step": 11640 }, { "epoch": 1.0447824818828386, "grad_norm": 0.49317909605702404, "learning_rate": 8.237233319052378e-06, "loss": 0.4061, "step": 11641 }, { "epoch": 1.0448722263355097, "grad_norm": 0.5334037198399534, "learning_rate": 8.236835362102968e-06, "loss": 0.4202, "step": 11642 }, { "epoch": 1.0449619707881808, "grad_norm": 0.4831539023300283, "learning_rate": 8.236437369853287e-06, "loss": 0.4427, "step": 11643 }, { "epoch": 1.0450517152408516, "grad_norm": 0.5124254696313565, "learning_rate": 8.236039342307673e-06, "loss": 0.4358, "step": 11644 }, { "epoch": 1.0451414596935227, "grad_norm": 0.46604324032061956, "learning_rate": 8.235641279470467e-06, "loss": 0.3417, "step": 11645 }, { "epoch": 1.0452312041461937, "grad_norm": 0.43632621242840053, "learning_rate": 8.23524318134601e-06, "loss": 0.3835, "step": 11646 }, { "epoch": 1.0453209485988648, "grad_norm": 0.47680727172845644, "learning_rate": 8.234845047938644e-06, "loss": 0.4307, "step": 11647 }, { "epoch": 1.0454106930515357, "grad_norm": 0.49946877319352545, "learning_rate": 8.23444687925271e-06, "loss": 0.3812, "step": 11648 }, { "epoch": 1.0455004375042067, "grad_norm": 0.4842298345620703, "learning_rate": 8.23404867529255e-06, "loss": 0.412, "step": 11649 }, { "epoch": 1.0455901819568778, "grad_norm": 0.511852264290643, "learning_rate": 8.233650436062511e-06, "loss": 0.4246, "step": 11650 }, { "epoch": 1.0456799264095489, "grad_norm": 0.4870291275874617, "learning_rate": 8.233252161566928e-06, "loss": 0.3253, "step": 11651 }, { "epoch": 1.0457696708622197, "grad_norm": 0.49619088541084994, "learning_rate": 8.232853851810154e-06, "loss": 0.4697, "step": 11652 }, { "epoch": 1.0458594153148908, "grad_norm": 0.5589392865419919, "learning_rate": 8.232455506796526e-06, "loss": 0.4524, "step": 11653 }, { "epoch": 1.0459491597675619, "grad_norm": 0.4748876465317421, "learning_rate": 8.232057126530391e-06, "loss": 0.3612, "step": 11654 }, { "epoch": 1.046038904220233, "grad_norm": 0.4979174040747036, "learning_rate": 8.231658711016095e-06, "loss": 0.4145, "step": 11655 }, { "epoch": 1.046128648672904, "grad_norm": 0.48774862585497525, "learning_rate": 8.231260260257979e-06, "loss": 0.4205, "step": 11656 }, { "epoch": 1.0462183931255749, "grad_norm": 0.48990086292716634, "learning_rate": 8.23086177426039e-06, "loss": 0.3794, "step": 11657 }, { "epoch": 1.046308137578246, "grad_norm": 0.47255284448329504, "learning_rate": 8.230463253027677e-06, "loss": 0.4044, "step": 11658 }, { "epoch": 1.046397882030917, "grad_norm": 0.4784584531560772, "learning_rate": 8.230064696564182e-06, "loss": 0.3892, "step": 11659 }, { "epoch": 1.046487626483588, "grad_norm": 0.4959464387134611, "learning_rate": 8.229666104874252e-06, "loss": 0.4272, "step": 11660 }, { "epoch": 1.046577370936259, "grad_norm": 0.5008811311573752, "learning_rate": 8.229267477962238e-06, "loss": 0.365, "step": 11661 }, { "epoch": 1.04666711538893, "grad_norm": 0.49793953233166716, "learning_rate": 8.228868815832483e-06, "loss": 0.4445, "step": 11662 }, { "epoch": 1.046756859841601, "grad_norm": 0.4923631547928792, "learning_rate": 8.228470118489335e-06, "loss": 0.4093, "step": 11663 }, { "epoch": 1.0468466042942721, "grad_norm": 0.4946773621814943, "learning_rate": 8.228071385937144e-06, "loss": 0.4804, "step": 11664 }, { "epoch": 1.046936348746943, "grad_norm": 0.5544683644988864, "learning_rate": 8.22767261818026e-06, "loss": 0.449, "step": 11665 }, { "epoch": 1.047026093199614, "grad_norm": 0.5315152926357776, "learning_rate": 8.227273815223028e-06, "loss": 0.4237, "step": 11666 }, { "epoch": 1.047115837652285, "grad_norm": 0.5002441174893203, "learning_rate": 8.226874977069799e-06, "loss": 0.4311, "step": 11667 }, { "epoch": 1.0472055821049562, "grad_norm": 0.49853169760382826, "learning_rate": 8.226476103724923e-06, "loss": 0.4924, "step": 11668 }, { "epoch": 1.0472953265576272, "grad_norm": 0.5399300345818305, "learning_rate": 8.22607719519275e-06, "loss": 0.4663, "step": 11669 }, { "epoch": 1.047385071010298, "grad_norm": 0.5039414494538739, "learning_rate": 8.225678251477629e-06, "loss": 0.3865, "step": 11670 }, { "epoch": 1.0474748154629692, "grad_norm": 0.5025581496977386, "learning_rate": 8.225279272583913e-06, "loss": 0.3905, "step": 11671 }, { "epoch": 1.0475645599156402, "grad_norm": 0.4475799049279859, "learning_rate": 8.224880258515953e-06, "loss": 0.3527, "step": 11672 }, { "epoch": 1.0476543043683113, "grad_norm": 0.5007098862055278, "learning_rate": 8.224481209278098e-06, "loss": 0.446, "step": 11673 }, { "epoch": 1.0477440488209822, "grad_norm": 0.49425319612743773, "learning_rate": 8.224082124874702e-06, "loss": 0.4083, "step": 11674 }, { "epoch": 1.0478337932736532, "grad_norm": 0.5223042507500317, "learning_rate": 8.223683005310116e-06, "loss": 0.4522, "step": 11675 }, { "epoch": 1.0479235377263243, "grad_norm": 0.5523828552900674, "learning_rate": 8.223283850588695e-06, "loss": 0.4745, "step": 11676 }, { "epoch": 1.0480132821789954, "grad_norm": 0.5083607814968295, "learning_rate": 8.222884660714791e-06, "loss": 0.3856, "step": 11677 }, { "epoch": 1.0481030266316664, "grad_norm": 0.5029409907384887, "learning_rate": 8.222485435692757e-06, "loss": 0.4171, "step": 11678 }, { "epoch": 1.0481927710843373, "grad_norm": 0.4874424746524817, "learning_rate": 8.222086175526947e-06, "loss": 0.4054, "step": 11679 }, { "epoch": 1.0482825155370084, "grad_norm": 0.5553513927754677, "learning_rate": 8.221686880221715e-06, "loss": 0.428, "step": 11680 }, { "epoch": 1.0483722599896794, "grad_norm": 0.47612679235225114, "learning_rate": 8.221287549781417e-06, "loss": 0.3512, "step": 11681 }, { "epoch": 1.0484620044423505, "grad_norm": 0.45534059459011844, "learning_rate": 8.220888184210405e-06, "loss": 0.3297, "step": 11682 }, { "epoch": 1.0485517488950213, "grad_norm": 0.4953040238400246, "learning_rate": 8.220488783513038e-06, "loss": 0.4315, "step": 11683 }, { "epoch": 1.0486414933476924, "grad_norm": 0.4922162017835202, "learning_rate": 8.22008934769367e-06, "loss": 0.417, "step": 11684 }, { "epoch": 1.0487312378003635, "grad_norm": 0.5087238943591225, "learning_rate": 8.219689876756655e-06, "loss": 0.4235, "step": 11685 }, { "epoch": 1.0488209822530346, "grad_norm": 0.5005489164687114, "learning_rate": 8.219290370706357e-06, "loss": 0.416, "step": 11686 }, { "epoch": 1.0489107267057054, "grad_norm": 0.5123325044481858, "learning_rate": 8.218890829547124e-06, "loss": 0.3386, "step": 11687 }, { "epoch": 1.0490004711583765, "grad_norm": 0.418757097272015, "learning_rate": 8.218491253283319e-06, "loss": 0.3854, "step": 11688 }, { "epoch": 1.0490902156110475, "grad_norm": 0.5411446516964676, "learning_rate": 8.218091641919295e-06, "loss": 0.44, "step": 11689 }, { "epoch": 1.0491799600637186, "grad_norm": 0.4970683941534666, "learning_rate": 8.217691995459416e-06, "loss": 0.415, "step": 11690 }, { "epoch": 1.0492697045163897, "grad_norm": 0.48843546915866937, "learning_rate": 8.217292313908037e-06, "loss": 0.4647, "step": 11691 }, { "epoch": 1.0493594489690605, "grad_norm": 0.5382001829508264, "learning_rate": 8.216892597269515e-06, "loss": 0.4036, "step": 11692 }, { "epoch": 1.0494491934217316, "grad_norm": 0.544054855077509, "learning_rate": 8.216492845548213e-06, "loss": 0.4707, "step": 11693 }, { "epoch": 1.0495389378744027, "grad_norm": 0.49723476814196843, "learning_rate": 8.216093058748489e-06, "loss": 0.3534, "step": 11694 }, { "epoch": 1.0496286823270737, "grad_norm": 0.5139332769227944, "learning_rate": 8.215693236874703e-06, "loss": 0.467, "step": 11695 }, { "epoch": 1.0497184267797446, "grad_norm": 0.49728807915955736, "learning_rate": 8.215293379931216e-06, "loss": 0.4224, "step": 11696 }, { "epoch": 1.0498081712324157, "grad_norm": 0.4748662750319876, "learning_rate": 8.214893487922388e-06, "loss": 0.366, "step": 11697 }, { "epoch": 1.0498979156850867, "grad_norm": 0.4622945064831977, "learning_rate": 8.21449356085258e-06, "loss": 0.3864, "step": 11698 }, { "epoch": 1.0499876601377578, "grad_norm": 0.49242406622189355, "learning_rate": 8.214093598726154e-06, "loss": 0.4415, "step": 11699 }, { "epoch": 1.0500774045904286, "grad_norm": 0.4977377988058824, "learning_rate": 8.213693601547473e-06, "loss": 0.381, "step": 11700 }, { "epoch": 1.0501671490430997, "grad_norm": 0.45642347452630955, "learning_rate": 8.213293569320896e-06, "loss": 0.4066, "step": 11701 }, { "epoch": 1.0502568934957708, "grad_norm": 0.5211853654835611, "learning_rate": 8.212893502050788e-06, "loss": 0.4388, "step": 11702 }, { "epoch": 1.0503466379484419, "grad_norm": 0.5112010335337253, "learning_rate": 8.212493399741513e-06, "loss": 0.4582, "step": 11703 }, { "epoch": 1.050436382401113, "grad_norm": 0.513087867063772, "learning_rate": 8.212093262397432e-06, "loss": 0.4133, "step": 11704 }, { "epoch": 1.0505261268537838, "grad_norm": 0.5016144903229707, "learning_rate": 8.211693090022911e-06, "loss": 0.4212, "step": 11705 }, { "epoch": 1.0506158713064548, "grad_norm": 0.4973220336863076, "learning_rate": 8.211292882622314e-06, "loss": 0.3968, "step": 11706 }, { "epoch": 1.050705615759126, "grad_norm": 0.4938063444492343, "learning_rate": 8.210892640200003e-06, "loss": 0.4222, "step": 11707 }, { "epoch": 1.050795360211797, "grad_norm": 0.5214556431586549, "learning_rate": 8.210492362760346e-06, "loss": 0.4014, "step": 11708 }, { "epoch": 1.0508851046644678, "grad_norm": 0.499158546220637, "learning_rate": 8.210092050307707e-06, "loss": 0.4539, "step": 11709 }, { "epoch": 1.050974849117139, "grad_norm": 0.5224473443270489, "learning_rate": 8.209691702846451e-06, "loss": 0.3675, "step": 11710 }, { "epoch": 1.05106459356981, "grad_norm": 0.5237762350876298, "learning_rate": 8.209291320380946e-06, "loss": 0.3815, "step": 11711 }, { "epoch": 1.051154338022481, "grad_norm": 0.4505801744367133, "learning_rate": 8.208890902915558e-06, "loss": 0.3361, "step": 11712 }, { "epoch": 1.051244082475152, "grad_norm": 0.4982984785700603, "learning_rate": 8.208490450454651e-06, "loss": 0.44, "step": 11713 }, { "epoch": 1.051333826927823, "grad_norm": 0.4723408517183769, "learning_rate": 8.208089963002597e-06, "loss": 0.36, "step": 11714 }, { "epoch": 1.051423571380494, "grad_norm": 0.4846400068514547, "learning_rate": 8.20768944056376e-06, "loss": 0.381, "step": 11715 }, { "epoch": 1.051513315833165, "grad_norm": 0.496243509042338, "learning_rate": 8.20728888314251e-06, "loss": 0.3955, "step": 11716 }, { "epoch": 1.0516030602858362, "grad_norm": 0.47649414902215526, "learning_rate": 8.206888290743214e-06, "loss": 0.4842, "step": 11717 }, { "epoch": 1.051692804738507, "grad_norm": 0.5332638240898446, "learning_rate": 8.206487663370242e-06, "loss": 0.4005, "step": 11718 }, { "epoch": 1.051782549191178, "grad_norm": 0.49844934854610023, "learning_rate": 8.206087001027961e-06, "loss": 0.4729, "step": 11719 }, { "epoch": 1.0518722936438492, "grad_norm": 0.5754579355842715, "learning_rate": 8.205686303720743e-06, "loss": 0.3872, "step": 11720 }, { "epoch": 1.0519620380965202, "grad_norm": 0.4541329049940348, "learning_rate": 8.20528557145296e-06, "loss": 0.3899, "step": 11721 }, { "epoch": 1.052051782549191, "grad_norm": 0.4779448577494619, "learning_rate": 8.204884804228975e-06, "loss": 0.3508, "step": 11722 }, { "epoch": 1.0521415270018621, "grad_norm": 0.4615099460728345, "learning_rate": 8.204484002053164e-06, "loss": 0.4516, "step": 11723 }, { "epoch": 1.0522312714545332, "grad_norm": 0.5272406199390269, "learning_rate": 8.204083164929898e-06, "loss": 0.4703, "step": 11724 }, { "epoch": 1.0523210159072043, "grad_norm": 0.5176268490023785, "learning_rate": 8.203682292863549e-06, "loss": 0.403, "step": 11725 }, { "epoch": 1.0524107603598754, "grad_norm": 0.48282110799382755, "learning_rate": 8.203281385858486e-06, "loss": 0.3946, "step": 11726 }, { "epoch": 1.0525005048125462, "grad_norm": 0.46036428749676317, "learning_rate": 8.202880443919081e-06, "loss": 0.3742, "step": 11727 }, { "epoch": 1.0525902492652173, "grad_norm": 0.5102665306093996, "learning_rate": 8.20247946704971e-06, "loss": 0.4101, "step": 11728 }, { "epoch": 1.0526799937178883, "grad_norm": 0.49755275217394795, "learning_rate": 8.202078455254744e-06, "loss": 0.4198, "step": 11729 }, { "epoch": 1.0527697381705594, "grad_norm": 0.4726664342972406, "learning_rate": 8.201677408538556e-06, "loss": 0.3835, "step": 11730 }, { "epoch": 1.0528594826232303, "grad_norm": 0.4451685775878439, "learning_rate": 8.201276326905522e-06, "loss": 0.3953, "step": 11731 }, { "epoch": 1.0529492270759013, "grad_norm": 0.4947818499970557, "learning_rate": 8.200875210360012e-06, "loss": 0.3666, "step": 11732 }, { "epoch": 1.0530389715285724, "grad_norm": 0.48168922344801546, "learning_rate": 8.200474058906404e-06, "loss": 0.4029, "step": 11733 }, { "epoch": 1.0531287159812435, "grad_norm": 0.4932008276135498, "learning_rate": 8.200072872549073e-06, "loss": 0.4088, "step": 11734 }, { "epoch": 1.0532184604339143, "grad_norm": 0.47861281396219085, "learning_rate": 8.199671651292391e-06, "loss": 0.4312, "step": 11735 }, { "epoch": 1.0533082048865854, "grad_norm": 0.508726822503323, "learning_rate": 8.199270395140735e-06, "loss": 0.3981, "step": 11736 }, { "epoch": 1.0533979493392565, "grad_norm": 0.4649386208223594, "learning_rate": 8.198869104098483e-06, "loss": 0.4044, "step": 11737 }, { "epoch": 1.0534876937919275, "grad_norm": 0.5012479215804194, "learning_rate": 8.19846777817001e-06, "loss": 0.4071, "step": 11738 }, { "epoch": 1.0535774382445986, "grad_norm": 0.5022246496490455, "learning_rate": 8.198066417359693e-06, "loss": 0.3808, "step": 11739 }, { "epoch": 1.0536671826972694, "grad_norm": 0.4732798650907233, "learning_rate": 8.19766502167191e-06, "loss": 0.4451, "step": 11740 }, { "epoch": 1.0537569271499405, "grad_norm": 0.4898261914803569, "learning_rate": 8.197263591111035e-06, "loss": 0.4288, "step": 11741 }, { "epoch": 1.0538466716026116, "grad_norm": 0.508885377593106, "learning_rate": 8.19686212568145e-06, "loss": 0.4505, "step": 11742 }, { "epoch": 1.0539364160552827, "grad_norm": 0.47817198833469676, "learning_rate": 8.196460625387533e-06, "loss": 0.3735, "step": 11743 }, { "epoch": 1.0540261605079535, "grad_norm": 0.5100448732890724, "learning_rate": 8.19605909023366e-06, "loss": 0.3525, "step": 11744 }, { "epoch": 1.0541159049606246, "grad_norm": 0.5248737579613377, "learning_rate": 8.19565752022421e-06, "loss": 0.4442, "step": 11745 }, { "epoch": 1.0542056494132956, "grad_norm": 0.5590336505108778, "learning_rate": 8.195255915363564e-06, "loss": 0.4431, "step": 11746 }, { "epoch": 1.0542953938659667, "grad_norm": 0.456063831182826, "learning_rate": 8.194854275656106e-06, "loss": 0.4153, "step": 11747 }, { "epoch": 1.0543851383186378, "grad_norm": 0.5299895953144904, "learning_rate": 8.194452601106207e-06, "loss": 0.4065, "step": 11748 }, { "epoch": 1.0544748827713086, "grad_norm": 0.48890079162967437, "learning_rate": 8.194050891718257e-06, "loss": 0.499, "step": 11749 }, { "epoch": 1.0545646272239797, "grad_norm": 0.5522692487019474, "learning_rate": 8.193649147496628e-06, "loss": 0.4519, "step": 11750 }, { "epoch": 1.0546543716766508, "grad_norm": 0.46902671338245117, "learning_rate": 8.19324736844571e-06, "loss": 0.3447, "step": 11751 }, { "epoch": 1.0547441161293218, "grad_norm": 0.44944593867347904, "learning_rate": 8.192845554569877e-06, "loss": 0.3803, "step": 11752 }, { "epoch": 1.0548338605819927, "grad_norm": 0.5334513214136255, "learning_rate": 8.192443705873518e-06, "loss": 0.5072, "step": 11753 }, { "epoch": 1.0549236050346638, "grad_norm": 0.5077282790569062, "learning_rate": 8.19204182236101e-06, "loss": 0.423, "step": 11754 }, { "epoch": 1.0550133494873348, "grad_norm": 0.4911798502954886, "learning_rate": 8.19163990403674e-06, "loss": 0.4226, "step": 11755 }, { "epoch": 1.055103093940006, "grad_norm": 0.4876586487396999, "learning_rate": 8.191237950905085e-06, "loss": 0.4423, "step": 11756 }, { "epoch": 1.0551928383926767, "grad_norm": 0.48309039540016147, "learning_rate": 8.190835962970437e-06, "loss": 0.3871, "step": 11757 }, { "epoch": 1.0552825828453478, "grad_norm": 0.5178293991655551, "learning_rate": 8.190433940237174e-06, "loss": 0.4851, "step": 11758 }, { "epoch": 1.0553723272980189, "grad_norm": 0.5032222821462792, "learning_rate": 8.190031882709683e-06, "loss": 0.4087, "step": 11759 }, { "epoch": 1.05546207175069, "grad_norm": 0.495213744033608, "learning_rate": 8.189629790392347e-06, "loss": 0.3761, "step": 11760 }, { "epoch": 1.055551816203361, "grad_norm": 0.4781899534479874, "learning_rate": 8.189227663289554e-06, "loss": 0.3663, "step": 11761 }, { "epoch": 1.0556415606560319, "grad_norm": 0.476361529148854, "learning_rate": 8.188825501405687e-06, "loss": 0.3699, "step": 11762 }, { "epoch": 1.055731305108703, "grad_norm": 0.44404369702362745, "learning_rate": 8.188423304745132e-06, "loss": 0.4215, "step": 11763 }, { "epoch": 1.055821049561374, "grad_norm": 0.5244887343906214, "learning_rate": 8.188021073312275e-06, "loss": 0.4724, "step": 11764 }, { "epoch": 1.055910794014045, "grad_norm": 0.4834943819638233, "learning_rate": 8.187618807111504e-06, "loss": 0.4163, "step": 11765 }, { "epoch": 1.056000538466716, "grad_norm": 0.48947092805603304, "learning_rate": 8.187216506147206e-06, "loss": 0.3989, "step": 11766 }, { "epoch": 1.056090282919387, "grad_norm": 0.5110642576387396, "learning_rate": 8.186814170423769e-06, "loss": 0.397, "step": 11767 }, { "epoch": 1.056180027372058, "grad_norm": 0.4383585307799409, "learning_rate": 8.186411799945578e-06, "loss": 0.3802, "step": 11768 }, { "epoch": 1.0562697718247291, "grad_norm": 0.4940445633707206, "learning_rate": 8.186009394717024e-06, "loss": 0.3881, "step": 11769 }, { "epoch": 1.0563595162774, "grad_norm": 0.4839023741676863, "learning_rate": 8.185606954742493e-06, "loss": 0.4491, "step": 11770 }, { "epoch": 1.056449260730071, "grad_norm": 0.5242556043226935, "learning_rate": 8.185204480026377e-06, "loss": 0.4223, "step": 11771 }, { "epoch": 1.0565390051827421, "grad_norm": 0.4895821217022783, "learning_rate": 8.184801970573063e-06, "loss": 0.4062, "step": 11772 }, { "epoch": 1.0566287496354132, "grad_norm": 0.4734704668986345, "learning_rate": 8.184399426386943e-06, "loss": 0.3899, "step": 11773 }, { "epoch": 1.0567184940880843, "grad_norm": 0.5202564737196708, "learning_rate": 8.183996847472402e-06, "loss": 0.4242, "step": 11774 }, { "epoch": 1.0568082385407551, "grad_norm": 0.4874575069666328, "learning_rate": 8.183594233833836e-06, "loss": 0.4373, "step": 11775 }, { "epoch": 1.0568979829934262, "grad_norm": 0.5617104572549392, "learning_rate": 8.183191585475635e-06, "loss": 0.4262, "step": 11776 }, { "epoch": 1.0569877274460973, "grad_norm": 0.5116736315988732, "learning_rate": 8.182788902402186e-06, "loss": 0.4619, "step": 11777 }, { "epoch": 1.0570774718987683, "grad_norm": 0.4955970671982543, "learning_rate": 8.182386184617885e-06, "loss": 0.378, "step": 11778 }, { "epoch": 1.0571672163514392, "grad_norm": 0.5145412829024075, "learning_rate": 8.181983432127123e-06, "loss": 0.402, "step": 11779 }, { "epoch": 1.0572569608041102, "grad_norm": 0.48356188385767335, "learning_rate": 8.181580644934291e-06, "loss": 0.3791, "step": 11780 }, { "epoch": 1.0573467052567813, "grad_norm": 0.51369384952242, "learning_rate": 8.181177823043783e-06, "loss": 0.4193, "step": 11781 }, { "epoch": 1.0574364497094524, "grad_norm": 0.5489300455411937, "learning_rate": 8.18077496645999e-06, "loss": 0.4653, "step": 11782 }, { "epoch": 1.0575261941621235, "grad_norm": 0.49885064784703437, "learning_rate": 8.180372075187309e-06, "loss": 0.4157, "step": 11783 }, { "epoch": 1.0576159386147943, "grad_norm": 0.4954898218761749, "learning_rate": 8.17996914923013e-06, "loss": 0.4085, "step": 11784 }, { "epoch": 1.0577056830674654, "grad_norm": 0.47006643529109193, "learning_rate": 8.17956618859285e-06, "loss": 0.3632, "step": 11785 }, { "epoch": 1.0577954275201364, "grad_norm": 0.4840234416004645, "learning_rate": 8.179163193279864e-06, "loss": 0.4011, "step": 11786 }, { "epoch": 1.0578851719728075, "grad_norm": 0.509950633878355, "learning_rate": 8.178760163295564e-06, "loss": 0.3671, "step": 11787 }, { "epoch": 1.0579749164254784, "grad_norm": 0.46320498223621304, "learning_rate": 8.17835709864435e-06, "loss": 0.3489, "step": 11788 }, { "epoch": 1.0580646608781494, "grad_norm": 0.4792845991763716, "learning_rate": 8.177953999330613e-06, "loss": 0.3725, "step": 11789 }, { "epoch": 1.0581544053308205, "grad_norm": 0.47250568076865807, "learning_rate": 8.17755086535875e-06, "loss": 0.4221, "step": 11790 }, { "epoch": 1.0582441497834916, "grad_norm": 0.4818748703653277, "learning_rate": 8.177147696733158e-06, "loss": 0.3913, "step": 11791 }, { "epoch": 1.0583338942361624, "grad_norm": 0.48163225908317975, "learning_rate": 8.176744493458237e-06, "loss": 0.382, "step": 11792 }, { "epoch": 1.0584236386888335, "grad_norm": 0.46605264525884227, "learning_rate": 8.17634125553838e-06, "loss": 0.3575, "step": 11793 }, { "epoch": 1.0585133831415046, "grad_norm": 0.5362937847717483, "learning_rate": 8.175937982977987e-06, "loss": 0.4327, "step": 11794 }, { "epoch": 1.0586031275941756, "grad_norm": 0.5202816458877838, "learning_rate": 8.175534675781454e-06, "loss": 0.3994, "step": 11795 }, { "epoch": 1.0586928720468467, "grad_norm": 0.5044934093813788, "learning_rate": 8.17513133395318e-06, "loss": 0.4166, "step": 11796 }, { "epoch": 1.0587826164995175, "grad_norm": 0.485641431656141, "learning_rate": 8.174727957497569e-06, "loss": 0.4074, "step": 11797 }, { "epoch": 1.0588723609521886, "grad_norm": 0.5233989817126594, "learning_rate": 8.17432454641901e-06, "loss": 0.3861, "step": 11798 }, { "epoch": 1.0589621054048597, "grad_norm": 0.4398467940631529, "learning_rate": 8.173921100721911e-06, "loss": 0.3681, "step": 11799 }, { "epoch": 1.0590518498575308, "grad_norm": 0.45123140692459385, "learning_rate": 8.173517620410668e-06, "loss": 0.4053, "step": 11800 }, { "epoch": 1.0591415943102016, "grad_norm": 0.523514429530111, "learning_rate": 8.173114105489683e-06, "loss": 0.4533, "step": 11801 }, { "epoch": 1.0592313387628727, "grad_norm": 0.5355252997435989, "learning_rate": 8.172710555963354e-06, "loss": 0.413, "step": 11802 }, { "epoch": 1.0593210832155437, "grad_norm": 0.5443937295020463, "learning_rate": 8.172306971836086e-06, "loss": 0.4115, "step": 11803 }, { "epoch": 1.0594108276682148, "grad_norm": 0.47463947072899065, "learning_rate": 8.171903353112278e-06, "loss": 0.4121, "step": 11804 }, { "epoch": 1.0595005721208857, "grad_norm": 0.5044762993306753, "learning_rate": 8.17149969979633e-06, "loss": 0.3695, "step": 11805 }, { "epoch": 1.0595903165735567, "grad_norm": 0.45122766838084183, "learning_rate": 8.17109601189265e-06, "loss": 0.3546, "step": 11806 }, { "epoch": 1.0596800610262278, "grad_norm": 0.4820543814555988, "learning_rate": 8.170692289405633e-06, "loss": 0.4231, "step": 11807 }, { "epoch": 1.0597698054788989, "grad_norm": 0.48231792656992944, "learning_rate": 8.170288532339688e-06, "loss": 0.3394, "step": 11808 }, { "epoch": 1.05985954993157, "grad_norm": 0.47504550337869017, "learning_rate": 8.169884740699215e-06, "loss": 0.385, "step": 11809 }, { "epoch": 1.0599492943842408, "grad_norm": 0.46065670841366446, "learning_rate": 8.16948091448862e-06, "loss": 0.3646, "step": 11810 }, { "epoch": 1.0600390388369119, "grad_norm": 0.4533077623316716, "learning_rate": 8.169077053712306e-06, "loss": 0.3841, "step": 11811 }, { "epoch": 1.060128783289583, "grad_norm": 0.525769725006384, "learning_rate": 8.168673158374675e-06, "loss": 0.4102, "step": 11812 }, { "epoch": 1.060218527742254, "grad_norm": 0.5139997446385204, "learning_rate": 8.168269228480134e-06, "loss": 0.4726, "step": 11813 }, { "epoch": 1.0603082721949249, "grad_norm": 0.4909284235878394, "learning_rate": 8.16786526403309e-06, "loss": 0.4457, "step": 11814 }, { "epoch": 1.060398016647596, "grad_norm": 0.524827498451962, "learning_rate": 8.167461265037945e-06, "loss": 0.4557, "step": 11815 }, { "epoch": 1.060487761100267, "grad_norm": 0.4839150077041783, "learning_rate": 8.167057231499108e-06, "loss": 0.3675, "step": 11816 }, { "epoch": 1.060577505552938, "grad_norm": 0.46933852300237067, "learning_rate": 8.166653163420983e-06, "loss": 0.3762, "step": 11817 }, { "epoch": 1.0606672500056091, "grad_norm": 0.4801757586748056, "learning_rate": 8.166249060807977e-06, "loss": 0.4371, "step": 11818 }, { "epoch": 1.06075699445828, "grad_norm": 0.4934258608382929, "learning_rate": 8.1658449236645e-06, "loss": 0.3797, "step": 11819 }, { "epoch": 1.060846738910951, "grad_norm": 0.46542667723556214, "learning_rate": 8.165440751994955e-06, "loss": 0.3767, "step": 11820 }, { "epoch": 1.0609364833636221, "grad_norm": 0.4854175350388868, "learning_rate": 8.165036545803753e-06, "loss": 0.3927, "step": 11821 }, { "epoch": 1.0610262278162932, "grad_norm": 0.4844404555142922, "learning_rate": 8.1646323050953e-06, "loss": 0.3954, "step": 11822 }, { "epoch": 1.061115972268964, "grad_norm": 0.47770447226483104, "learning_rate": 8.164228029874007e-06, "loss": 0.4279, "step": 11823 }, { "epoch": 1.061205716721635, "grad_norm": 0.53282218485032, "learning_rate": 8.16382372014428e-06, "loss": 0.384, "step": 11824 }, { "epoch": 1.0612954611743062, "grad_norm": 0.48553899221456526, "learning_rate": 8.163419375910533e-06, "loss": 0.3905, "step": 11825 }, { "epoch": 1.0613852056269772, "grad_norm": 0.4946978577529499, "learning_rate": 8.16301499717717e-06, "loss": 0.3624, "step": 11826 }, { "epoch": 1.0614749500796483, "grad_norm": 0.47874652398042644, "learning_rate": 8.162610583948604e-06, "loss": 0.4059, "step": 11827 }, { "epoch": 1.0615646945323192, "grad_norm": 0.46389931108218074, "learning_rate": 8.162206136229245e-06, "loss": 0.3763, "step": 11828 }, { "epoch": 1.0616544389849902, "grad_norm": 0.5147613500766456, "learning_rate": 8.161801654023505e-06, "loss": 0.4604, "step": 11829 }, { "epoch": 1.0617441834376613, "grad_norm": 0.5424698932191793, "learning_rate": 8.161397137335794e-06, "loss": 0.4288, "step": 11830 }, { "epoch": 1.0618339278903324, "grad_norm": 0.5211506065685578, "learning_rate": 8.160992586170523e-06, "loss": 0.3765, "step": 11831 }, { "epoch": 1.0619236723430032, "grad_norm": 0.4623601310535067, "learning_rate": 8.160588000532104e-06, "loss": 0.3914, "step": 11832 }, { "epoch": 1.0620134167956743, "grad_norm": 0.4993966884336575, "learning_rate": 8.160183380424952e-06, "loss": 0.3607, "step": 11833 }, { "epoch": 1.0621031612483454, "grad_norm": 0.46035385567860304, "learning_rate": 8.159778725853477e-06, "loss": 0.3662, "step": 11834 }, { "epoch": 1.0621929057010164, "grad_norm": 0.4836414350037135, "learning_rate": 8.159374036822092e-06, "loss": 0.4186, "step": 11835 }, { "epoch": 1.0622826501536873, "grad_norm": 0.4825795578421644, "learning_rate": 8.158969313335212e-06, "loss": 0.4564, "step": 11836 }, { "epoch": 1.0623723946063584, "grad_norm": 0.5300851009095824, "learning_rate": 8.15856455539725e-06, "loss": 0.4223, "step": 11837 }, { "epoch": 1.0624621390590294, "grad_norm": 0.5390912721784127, "learning_rate": 8.15815976301262e-06, "loss": 0.4447, "step": 11838 }, { "epoch": 1.0625518835117005, "grad_norm": 0.5295871819107908, "learning_rate": 8.157754936185737e-06, "loss": 0.4309, "step": 11839 }, { "epoch": 1.0626416279643713, "grad_norm": 0.48352822566814935, "learning_rate": 8.157350074921017e-06, "loss": 0.4407, "step": 11840 }, { "epoch": 1.0627313724170424, "grad_norm": 0.5246806104144227, "learning_rate": 8.156945179222874e-06, "loss": 0.4307, "step": 11841 }, { "epoch": 1.0628211168697135, "grad_norm": 0.5724580728035641, "learning_rate": 8.156540249095724e-06, "loss": 0.4313, "step": 11842 }, { "epoch": 1.0629108613223845, "grad_norm": 0.511028051406603, "learning_rate": 8.156135284543982e-06, "loss": 0.4163, "step": 11843 }, { "epoch": 1.0630006057750556, "grad_norm": 0.5231151337581991, "learning_rate": 8.155730285572065e-06, "loss": 0.4854, "step": 11844 }, { "epoch": 1.0630903502277265, "grad_norm": 0.5092870990757872, "learning_rate": 8.155325252184392e-06, "loss": 0.4092, "step": 11845 }, { "epoch": 1.0631800946803975, "grad_norm": 0.4941807602110644, "learning_rate": 8.154920184385378e-06, "loss": 0.3842, "step": 11846 }, { "epoch": 1.0632698391330686, "grad_norm": 0.5282033558958107, "learning_rate": 8.15451508217944e-06, "loss": 0.438, "step": 11847 }, { "epoch": 1.0633595835857397, "grad_norm": 0.5047828913505354, "learning_rate": 8.154109945570999e-06, "loss": 0.398, "step": 11848 }, { "epoch": 1.0634493280384105, "grad_norm": 0.4392809416814034, "learning_rate": 8.15370477456447e-06, "loss": 0.3494, "step": 11849 }, { "epoch": 1.0635390724910816, "grad_norm": 0.4853551095887963, "learning_rate": 8.153299569164273e-06, "loss": 0.4089, "step": 11850 }, { "epoch": 1.0636288169437527, "grad_norm": 0.4617222000258624, "learning_rate": 8.152894329374828e-06, "loss": 0.4005, "step": 11851 }, { "epoch": 1.0637185613964237, "grad_norm": 0.4881503749270745, "learning_rate": 8.152489055200553e-06, "loss": 0.3938, "step": 11852 }, { "epoch": 1.0638083058490948, "grad_norm": 0.4718504586982682, "learning_rate": 8.15208374664587e-06, "loss": 0.3955, "step": 11853 }, { "epoch": 1.0638980503017657, "grad_norm": 0.4686778471772381, "learning_rate": 8.151678403715195e-06, "loss": 0.4398, "step": 11854 }, { "epoch": 1.0639877947544367, "grad_norm": 0.49417790295336667, "learning_rate": 8.151273026412954e-06, "loss": 0.413, "step": 11855 }, { "epoch": 1.0640775392071078, "grad_norm": 0.4639972887678708, "learning_rate": 8.150867614743563e-06, "loss": 0.3731, "step": 11856 }, { "epoch": 1.0641672836597789, "grad_norm": 0.488340404446655, "learning_rate": 8.150462168711447e-06, "loss": 0.4105, "step": 11857 }, { "epoch": 1.0642570281124497, "grad_norm": 0.46848204568787377, "learning_rate": 8.150056688321026e-06, "loss": 0.3751, "step": 11858 }, { "epoch": 1.0643467725651208, "grad_norm": 0.46466176951248495, "learning_rate": 8.149651173576723e-06, "loss": 0.3436, "step": 11859 }, { "epoch": 1.0644365170177919, "grad_norm": 0.44400301538653947, "learning_rate": 8.14924562448296e-06, "loss": 0.383, "step": 11860 }, { "epoch": 1.064526261470463, "grad_norm": 0.514515615386172, "learning_rate": 8.148840041044159e-06, "loss": 0.4373, "step": 11861 }, { "epoch": 1.064616005923134, "grad_norm": 0.5019178763632441, "learning_rate": 8.148434423264743e-06, "loss": 0.4473, "step": 11862 }, { "epoch": 1.0647057503758048, "grad_norm": 0.5465049623999565, "learning_rate": 8.148028771149138e-06, "loss": 0.4038, "step": 11863 }, { "epoch": 1.064795494828476, "grad_norm": 0.5152830730566867, "learning_rate": 8.147623084701766e-06, "loss": 0.478, "step": 11864 }, { "epoch": 1.064885239281147, "grad_norm": 0.5124598218056554, "learning_rate": 8.14721736392705e-06, "loss": 0.3705, "step": 11865 }, { "epoch": 1.064974983733818, "grad_norm": 0.48318414055298514, "learning_rate": 8.146811608829418e-06, "loss": 0.424, "step": 11866 }, { "epoch": 1.065064728186489, "grad_norm": 0.49791008740628256, "learning_rate": 8.146405819413296e-06, "loss": 0.4384, "step": 11867 }, { "epoch": 1.06515447263916, "grad_norm": 0.4948153123790071, "learning_rate": 8.145999995683105e-06, "loss": 0.448, "step": 11868 }, { "epoch": 1.065244217091831, "grad_norm": 0.5220730583410663, "learning_rate": 8.145594137643273e-06, "loss": 0.4224, "step": 11869 }, { "epoch": 1.065333961544502, "grad_norm": 0.4950382509869594, "learning_rate": 8.145188245298226e-06, "loss": 0.3986, "step": 11870 }, { "epoch": 1.065423705997173, "grad_norm": 0.4816375203771385, "learning_rate": 8.144782318652388e-06, "loss": 0.3618, "step": 11871 }, { "epoch": 1.065513450449844, "grad_norm": 0.5068774467314647, "learning_rate": 8.144376357710193e-06, "loss": 0.4791, "step": 11872 }, { "epoch": 1.065603194902515, "grad_norm": 0.5231908680192504, "learning_rate": 8.14397036247606e-06, "loss": 0.3865, "step": 11873 }, { "epoch": 1.0656929393551862, "grad_norm": 0.49357033307189063, "learning_rate": 8.143564332954426e-06, "loss": 0.4566, "step": 11874 }, { "epoch": 1.065782683807857, "grad_norm": 0.493978499833902, "learning_rate": 8.143158269149708e-06, "loss": 0.383, "step": 11875 }, { "epoch": 1.065872428260528, "grad_norm": 0.5133487718187802, "learning_rate": 8.142752171066345e-06, "loss": 0.3949, "step": 11876 }, { "epoch": 1.0659621727131992, "grad_norm": 0.49176890793153494, "learning_rate": 8.142346038708758e-06, "loss": 0.391, "step": 11877 }, { "epoch": 1.0660519171658702, "grad_norm": 0.4673357106526523, "learning_rate": 8.141939872081381e-06, "loss": 0.3785, "step": 11878 }, { "epoch": 1.0661416616185413, "grad_norm": 0.496092422684188, "learning_rate": 8.14153367118864e-06, "loss": 0.4067, "step": 11879 }, { "epoch": 1.0662314060712121, "grad_norm": 0.5160170886449226, "learning_rate": 8.141127436034968e-06, "loss": 0.4262, "step": 11880 }, { "epoch": 1.0663211505238832, "grad_norm": 0.5223967260332029, "learning_rate": 8.140721166624792e-06, "loss": 0.4337, "step": 11881 }, { "epoch": 1.0664108949765543, "grad_norm": 0.6013688357195992, "learning_rate": 8.140314862962546e-06, "loss": 0.446, "step": 11882 }, { "epoch": 1.0665006394292253, "grad_norm": 0.5363874873410317, "learning_rate": 8.139908525052661e-06, "loss": 0.5, "step": 11883 }, { "epoch": 1.0665903838818962, "grad_norm": 0.5144576871525656, "learning_rate": 8.139502152899565e-06, "loss": 0.4508, "step": 11884 }, { "epoch": 1.0666801283345673, "grad_norm": 0.5529094688597338, "learning_rate": 8.139095746507695e-06, "loss": 0.408, "step": 11885 }, { "epoch": 1.0667698727872383, "grad_norm": 0.5064070681172771, "learning_rate": 8.138689305881477e-06, "loss": 0.4346, "step": 11886 }, { "epoch": 1.0668596172399094, "grad_norm": 0.4659852661694447, "learning_rate": 8.138282831025348e-06, "loss": 0.3767, "step": 11887 }, { "epoch": 1.0669493616925805, "grad_norm": 0.5037034863172919, "learning_rate": 8.13787632194374e-06, "loss": 0.4208, "step": 11888 }, { "epoch": 1.0670391061452513, "grad_norm": 0.49984310101467844, "learning_rate": 8.137469778641088e-06, "loss": 0.4031, "step": 11889 }, { "epoch": 1.0671288505979224, "grad_norm": 0.5009383012676769, "learning_rate": 8.137063201121821e-06, "loss": 0.4437, "step": 11890 }, { "epoch": 1.0672185950505935, "grad_norm": 0.518322161879304, "learning_rate": 8.136656589390378e-06, "loss": 0.4422, "step": 11891 }, { "epoch": 1.0673083395032645, "grad_norm": 0.49464374821857215, "learning_rate": 8.13624994345119e-06, "loss": 0.4115, "step": 11892 }, { "epoch": 1.0673980839559354, "grad_norm": 0.5116408434327057, "learning_rate": 8.135843263308691e-06, "loss": 0.3955, "step": 11893 }, { "epoch": 1.0674878284086065, "grad_norm": 0.49139042891950696, "learning_rate": 8.13543654896732e-06, "loss": 0.4534, "step": 11894 }, { "epoch": 1.0675775728612775, "grad_norm": 0.5290774389942989, "learning_rate": 8.13502980043151e-06, "loss": 0.4043, "step": 11895 }, { "epoch": 1.0676673173139486, "grad_norm": 0.5146942001003824, "learning_rate": 8.134623017705699e-06, "loss": 0.4286, "step": 11896 }, { "epoch": 1.0677570617666197, "grad_norm": 0.5053575543731309, "learning_rate": 8.13421620079432e-06, "loss": 0.4171, "step": 11897 }, { "epoch": 1.0678468062192905, "grad_norm": 0.49323647480263216, "learning_rate": 8.133809349701814e-06, "loss": 0.3977, "step": 11898 }, { "epoch": 1.0679365506719616, "grad_norm": 0.5133074399756654, "learning_rate": 8.133402464432613e-06, "loss": 0.4351, "step": 11899 }, { "epoch": 1.0680262951246327, "grad_norm": 0.49904471457643085, "learning_rate": 8.132995544991159e-06, "loss": 0.4273, "step": 11900 }, { "epoch": 1.0681160395773037, "grad_norm": 0.5390141274206102, "learning_rate": 8.132588591381886e-06, "loss": 0.422, "step": 11901 }, { "epoch": 1.0682057840299746, "grad_norm": 0.4741778147578656, "learning_rate": 8.132181603609237e-06, "loss": 0.3693, "step": 11902 }, { "epoch": 1.0682955284826456, "grad_norm": 0.48872114299526564, "learning_rate": 8.131774581677645e-06, "loss": 0.4216, "step": 11903 }, { "epoch": 1.0683852729353167, "grad_norm": 0.4899401150932999, "learning_rate": 8.131367525591551e-06, "loss": 0.3927, "step": 11904 }, { "epoch": 1.0684750173879878, "grad_norm": 0.5134483012946115, "learning_rate": 8.130960435355397e-06, "loss": 0.5209, "step": 11905 }, { "epoch": 1.0685647618406586, "grad_norm": 0.5059597600394519, "learning_rate": 8.13055331097362e-06, "loss": 0.3859, "step": 11906 }, { "epoch": 1.0686545062933297, "grad_norm": 0.5283879146810139, "learning_rate": 8.130146152450658e-06, "loss": 0.4262, "step": 11907 }, { "epoch": 1.0687442507460008, "grad_norm": 0.43189447784120627, "learning_rate": 8.129738959790954e-06, "loss": 0.3418, "step": 11908 }, { "epoch": 1.0688339951986718, "grad_norm": 0.46222891180724573, "learning_rate": 8.129331732998951e-06, "loss": 0.4126, "step": 11909 }, { "epoch": 1.0689237396513427, "grad_norm": 0.5540336102531235, "learning_rate": 8.128924472079085e-06, "loss": 0.3887, "step": 11910 }, { "epoch": 1.0690134841040138, "grad_norm": 0.47745482215231555, "learning_rate": 8.128517177035802e-06, "loss": 0.3886, "step": 11911 }, { "epoch": 1.0691032285566848, "grad_norm": 0.49744634701344675, "learning_rate": 8.12810984787354e-06, "loss": 0.3845, "step": 11912 }, { "epoch": 1.069192973009356, "grad_norm": 0.49704100850699384, "learning_rate": 8.127702484596745e-06, "loss": 0.3852, "step": 11913 }, { "epoch": 1.069282717462027, "grad_norm": 0.494946197568256, "learning_rate": 8.127295087209857e-06, "loss": 0.4641, "step": 11914 }, { "epoch": 1.0693724619146978, "grad_norm": 0.5013039953018871, "learning_rate": 8.12688765571732e-06, "loss": 0.3753, "step": 11915 }, { "epoch": 1.0694622063673689, "grad_norm": 0.4522378527264597, "learning_rate": 8.126480190123575e-06, "loss": 0.4197, "step": 11916 }, { "epoch": 1.06955195082004, "grad_norm": 0.5267362232953164, "learning_rate": 8.12607269043307e-06, "loss": 0.3907, "step": 11917 }, { "epoch": 1.069641695272711, "grad_norm": 0.4349878065354613, "learning_rate": 8.125665156650248e-06, "loss": 0.4412, "step": 11918 }, { "epoch": 1.0697314397253819, "grad_norm": 0.5004186039621746, "learning_rate": 8.125257588779553e-06, "loss": 0.3776, "step": 11919 }, { "epoch": 1.069821184178053, "grad_norm": 0.5083598708759619, "learning_rate": 8.124849986825426e-06, "loss": 0.3984, "step": 11920 }, { "epoch": 1.069910928630724, "grad_norm": 0.5394417574228654, "learning_rate": 8.124442350792318e-06, "loss": 0.4444, "step": 11921 }, { "epoch": 1.070000673083395, "grad_norm": 0.47000819101076335, "learning_rate": 8.124034680684672e-06, "loss": 0.4148, "step": 11922 }, { "epoch": 1.0700904175360662, "grad_norm": 0.501948228103845, "learning_rate": 8.123626976506935e-06, "loss": 0.3956, "step": 11923 }, { "epoch": 1.070180161988737, "grad_norm": 0.47644960614418375, "learning_rate": 8.123219238263552e-06, "loss": 0.4244, "step": 11924 }, { "epoch": 1.070269906441408, "grad_norm": 0.4892721297421802, "learning_rate": 8.12281146595897e-06, "loss": 0.3891, "step": 11925 }, { "epoch": 1.0703596508940791, "grad_norm": 0.4976682283444482, "learning_rate": 8.122403659597634e-06, "loss": 0.4704, "step": 11926 }, { "epoch": 1.0704493953467502, "grad_norm": 0.5006785215078537, "learning_rate": 8.121995819183999e-06, "loss": 0.4356, "step": 11927 }, { "epoch": 1.070539139799421, "grad_norm": 0.5100445856874407, "learning_rate": 8.121587944722504e-06, "loss": 0.399, "step": 11928 }, { "epoch": 1.0706288842520921, "grad_norm": 0.4856348478718066, "learning_rate": 8.1211800362176e-06, "loss": 0.4246, "step": 11929 }, { "epoch": 1.0707186287047632, "grad_norm": 0.4927325000535825, "learning_rate": 8.120772093673738e-06, "loss": 0.4034, "step": 11930 }, { "epoch": 1.0708083731574343, "grad_norm": 0.4727983403195347, "learning_rate": 8.120364117095365e-06, "loss": 0.3993, "step": 11931 }, { "epoch": 1.0708981176101053, "grad_norm": 0.4710109793507096, "learning_rate": 8.11995610648693e-06, "loss": 0.417, "step": 11932 }, { "epoch": 1.0709878620627762, "grad_norm": 0.483983180331219, "learning_rate": 8.119548061852884e-06, "loss": 0.3921, "step": 11933 }, { "epoch": 1.0710776065154473, "grad_norm": 0.5004274040667981, "learning_rate": 8.119139983197676e-06, "loss": 0.4374, "step": 11934 }, { "epoch": 1.0711673509681183, "grad_norm": 0.5009663282769518, "learning_rate": 8.118731870525757e-06, "loss": 0.4339, "step": 11935 }, { "epoch": 1.0712570954207894, "grad_norm": 0.47978471656108296, "learning_rate": 8.118323723841578e-06, "loss": 0.3427, "step": 11936 }, { "epoch": 1.0713468398734602, "grad_norm": 0.4831761188889017, "learning_rate": 8.11791554314959e-06, "loss": 0.4398, "step": 11937 }, { "epoch": 1.0714365843261313, "grad_norm": 0.5113752698153095, "learning_rate": 8.117507328454241e-06, "loss": 0.4261, "step": 11938 }, { "epoch": 1.0715263287788024, "grad_norm": 0.5081579668862816, "learning_rate": 8.11709907975999e-06, "loss": 0.416, "step": 11939 }, { "epoch": 1.0716160732314735, "grad_norm": 0.5078379440507297, "learning_rate": 8.116690797071286e-06, "loss": 0.4426, "step": 11940 }, { "epoch": 1.0717058176841443, "grad_norm": 0.49262807903294464, "learning_rate": 8.116282480392577e-06, "loss": 0.365, "step": 11941 }, { "epoch": 1.0717955621368154, "grad_norm": 0.5192287361615393, "learning_rate": 8.115874129728322e-06, "loss": 0.4341, "step": 11942 }, { "epoch": 1.0718853065894864, "grad_norm": 0.47923650534603485, "learning_rate": 8.115465745082974e-06, "loss": 0.4703, "step": 11943 }, { "epoch": 1.0719750510421575, "grad_norm": 0.5527150157386869, "learning_rate": 8.115057326460984e-06, "loss": 0.4509, "step": 11944 }, { "epoch": 1.0720647954948284, "grad_norm": 0.5233369706248616, "learning_rate": 8.114648873866807e-06, "loss": 0.3876, "step": 11945 }, { "epoch": 1.0721545399474994, "grad_norm": 0.535663985770871, "learning_rate": 8.114240387304899e-06, "loss": 0.4402, "step": 11946 }, { "epoch": 1.0722442844001705, "grad_norm": 0.49492672158458584, "learning_rate": 8.113831866779713e-06, "loss": 0.4228, "step": 11947 }, { "epoch": 1.0723340288528416, "grad_norm": 0.5001727435485356, "learning_rate": 8.113423312295705e-06, "loss": 0.3856, "step": 11948 }, { "epoch": 1.0724237733055126, "grad_norm": 0.49844698271468013, "learning_rate": 8.11301472385733e-06, "loss": 0.4209, "step": 11949 }, { "epoch": 1.0725135177581835, "grad_norm": 0.5479264776624323, "learning_rate": 8.112606101469045e-06, "loss": 0.4816, "step": 11950 }, { "epoch": 1.0726032622108546, "grad_norm": 0.5001087955915311, "learning_rate": 8.112197445135306e-06, "loss": 0.3866, "step": 11951 }, { "epoch": 1.0726930066635256, "grad_norm": 0.49372082077538454, "learning_rate": 8.111788754860572e-06, "loss": 0.3881, "step": 11952 }, { "epoch": 1.0727827511161967, "grad_norm": 0.4756144561572044, "learning_rate": 8.111380030649295e-06, "loss": 0.4017, "step": 11953 }, { "epoch": 1.0728724955688675, "grad_norm": 0.5111864406156372, "learning_rate": 8.110971272505937e-06, "loss": 0.4769, "step": 11954 }, { "epoch": 1.0729622400215386, "grad_norm": 0.5081523009595973, "learning_rate": 8.11056248043495e-06, "loss": 0.4182, "step": 11955 }, { "epoch": 1.0730519844742097, "grad_norm": 0.4750359562392297, "learning_rate": 8.110153654440802e-06, "loss": 0.3671, "step": 11956 }, { "epoch": 1.0731417289268808, "grad_norm": 0.4929069299171484, "learning_rate": 8.109744794527941e-06, "loss": 0.3997, "step": 11957 }, { "epoch": 1.0732314733795518, "grad_norm": 0.48165090304980185, "learning_rate": 8.109335900700834e-06, "loss": 0.3464, "step": 11958 }, { "epoch": 1.0733212178322227, "grad_norm": 0.45713177732385313, "learning_rate": 8.108926972963935e-06, "loss": 0.4115, "step": 11959 }, { "epoch": 1.0734109622848937, "grad_norm": 0.4748740220620283, "learning_rate": 8.10851801132171e-06, "loss": 0.4113, "step": 11960 }, { "epoch": 1.0735007067375648, "grad_norm": 0.5195698956798024, "learning_rate": 8.108109015778608e-06, "loss": 0.4182, "step": 11961 }, { "epoch": 1.0735904511902359, "grad_norm": 0.47914085608270046, "learning_rate": 8.107699986339102e-06, "loss": 0.3932, "step": 11962 }, { "epoch": 1.0736801956429067, "grad_norm": 0.5063073480505016, "learning_rate": 8.107290923007644e-06, "loss": 0.4486, "step": 11963 }, { "epoch": 1.0737699400955778, "grad_norm": 0.5223872028103237, "learning_rate": 8.1068818257887e-06, "loss": 0.4364, "step": 11964 }, { "epoch": 1.0738596845482489, "grad_norm": 0.5745277609058086, "learning_rate": 8.106472694686729e-06, "loss": 0.3687, "step": 11965 }, { "epoch": 1.07394942900092, "grad_norm": 0.4293031565027903, "learning_rate": 8.106063529706195e-06, "loss": 0.387, "step": 11966 }, { "epoch": 1.074039173453591, "grad_norm": 0.5171577972244314, "learning_rate": 8.105654330851556e-06, "loss": 0.4607, "step": 11967 }, { "epoch": 1.0741289179062619, "grad_norm": 0.4975919499004712, "learning_rate": 8.10524509812728e-06, "loss": 0.4248, "step": 11968 }, { "epoch": 1.074218662358933, "grad_norm": 0.4734971998250675, "learning_rate": 8.104835831537824e-06, "loss": 0.3444, "step": 11969 }, { "epoch": 1.074308406811604, "grad_norm": 0.4405906290461014, "learning_rate": 8.10442653108766e-06, "loss": 0.3918, "step": 11970 }, { "epoch": 1.074398151264275, "grad_norm": 0.5144394132427345, "learning_rate": 8.104017196781245e-06, "loss": 0.4175, "step": 11971 }, { "epoch": 1.074487895716946, "grad_norm": 0.5071753270871687, "learning_rate": 8.103607828623044e-06, "loss": 0.3623, "step": 11972 }, { "epoch": 1.074577640169617, "grad_norm": 0.4467034431146657, "learning_rate": 8.10319842661752e-06, "loss": 0.3987, "step": 11973 }, { "epoch": 1.074667384622288, "grad_norm": 0.5198831629609362, "learning_rate": 8.102788990769143e-06, "loss": 0.398, "step": 11974 }, { "epoch": 1.0747571290749591, "grad_norm": 0.4507780430627895, "learning_rate": 8.102379521082375e-06, "loss": 0.3341, "step": 11975 }, { "epoch": 1.07484687352763, "grad_norm": 0.4698685008058096, "learning_rate": 8.101970017561684e-06, "loss": 0.413, "step": 11976 }, { "epoch": 1.074936617980301, "grad_norm": 0.4726086086551739, "learning_rate": 8.10156048021153e-06, "loss": 0.4178, "step": 11977 }, { "epoch": 1.0750263624329721, "grad_norm": 0.5238670376854135, "learning_rate": 8.101150909036385e-06, "loss": 0.4082, "step": 11978 }, { "epoch": 1.0751161068856432, "grad_norm": 0.5112140029103003, "learning_rate": 8.100741304040715e-06, "loss": 0.4473, "step": 11979 }, { "epoch": 1.075205851338314, "grad_norm": 0.5084893115593461, "learning_rate": 8.100331665228986e-06, "loss": 0.4104, "step": 11980 }, { "epoch": 1.075295595790985, "grad_norm": 0.48908245569563236, "learning_rate": 8.099921992605665e-06, "loss": 0.4386, "step": 11981 }, { "epoch": 1.0753853402436562, "grad_norm": 0.5282531464552116, "learning_rate": 8.09951228617522e-06, "loss": 0.3611, "step": 11982 }, { "epoch": 1.0754750846963272, "grad_norm": 0.4813319726359115, "learning_rate": 8.099102545942119e-06, "loss": 0.3658, "step": 11983 }, { "epoch": 1.0755648291489983, "grad_norm": 0.5191554307787416, "learning_rate": 8.098692771910833e-06, "loss": 0.4031, "step": 11984 }, { "epoch": 1.0756545736016692, "grad_norm": 0.511921655351135, "learning_rate": 8.098282964085827e-06, "loss": 0.421, "step": 11985 }, { "epoch": 1.0757443180543402, "grad_norm": 0.458242708911879, "learning_rate": 8.097873122471571e-06, "loss": 0.412, "step": 11986 }, { "epoch": 1.0758340625070113, "grad_norm": 0.5148506417081496, "learning_rate": 8.097463247072538e-06, "loss": 0.4435, "step": 11987 }, { "epoch": 1.0759238069596824, "grad_norm": 0.4720887406915146, "learning_rate": 8.097053337893195e-06, "loss": 0.3354, "step": 11988 }, { "epoch": 1.0760135514123532, "grad_norm": 0.49125773065834444, "learning_rate": 8.096643394938015e-06, "loss": 0.3876, "step": 11989 }, { "epoch": 1.0761032958650243, "grad_norm": 0.48477772406118386, "learning_rate": 8.096233418211465e-06, "loss": 0.4405, "step": 11990 }, { "epoch": 1.0761930403176954, "grad_norm": 0.4734916593207247, "learning_rate": 8.095823407718017e-06, "loss": 0.3842, "step": 11991 }, { "epoch": 1.0762827847703664, "grad_norm": 0.4978682416968557, "learning_rate": 8.095413363462147e-06, "loss": 0.3907, "step": 11992 }, { "epoch": 1.0763725292230375, "grad_norm": 0.46983576846260533, "learning_rate": 8.095003285448322e-06, "loss": 0.3722, "step": 11993 }, { "epoch": 1.0764622736757083, "grad_norm": 0.5098471923734759, "learning_rate": 8.094593173681015e-06, "loss": 0.5093, "step": 11994 }, { "epoch": 1.0765520181283794, "grad_norm": 0.5354646176523021, "learning_rate": 8.094183028164699e-06, "loss": 0.3946, "step": 11995 }, { "epoch": 1.0766417625810505, "grad_norm": 0.511749106600281, "learning_rate": 8.093772848903849e-06, "loss": 0.4079, "step": 11996 }, { "epoch": 1.0767315070337216, "grad_norm": 0.5244087035100312, "learning_rate": 8.093362635902937e-06, "loss": 0.4743, "step": 11997 }, { "epoch": 1.0768212514863924, "grad_norm": 0.520777114902305, "learning_rate": 8.092952389166434e-06, "loss": 0.3918, "step": 11998 }, { "epoch": 1.0769109959390635, "grad_norm": 0.4877379256051277, "learning_rate": 8.092542108698817e-06, "loss": 0.4171, "step": 11999 }, { "epoch": 1.0770007403917345, "grad_norm": 0.48280851041703804, "learning_rate": 8.092131794504561e-06, "loss": 0.4329, "step": 12000 }, { "epoch": 1.0770904848444056, "grad_norm": 0.539776418332073, "learning_rate": 8.091721446588138e-06, "loss": 0.4175, "step": 12001 }, { "epoch": 1.0771802292970767, "grad_norm": 0.4850712684235401, "learning_rate": 8.091311064954025e-06, "loss": 0.3478, "step": 12002 }, { "epoch": 1.0772699737497475, "grad_norm": 0.49057869174377133, "learning_rate": 8.090900649606698e-06, "loss": 0.3794, "step": 12003 }, { "epoch": 1.0773597182024186, "grad_norm": 0.46929713361894, "learning_rate": 8.090490200550633e-06, "loss": 0.388, "step": 12004 }, { "epoch": 1.0774494626550897, "grad_norm": 0.46610824537931783, "learning_rate": 8.090079717790305e-06, "loss": 0.3804, "step": 12005 }, { "epoch": 1.0775392071077607, "grad_norm": 0.4621540712971448, "learning_rate": 8.08966920133019e-06, "loss": 0.368, "step": 12006 }, { "epoch": 1.0776289515604316, "grad_norm": 0.44780834736264563, "learning_rate": 8.08925865117477e-06, "loss": 0.3333, "step": 12007 }, { "epoch": 1.0777186960131027, "grad_norm": 0.49961539653450704, "learning_rate": 8.088848067328514e-06, "loss": 0.4275, "step": 12008 }, { "epoch": 1.0778084404657737, "grad_norm": 0.48559938052128043, "learning_rate": 8.088437449795904e-06, "loss": 0.3958, "step": 12009 }, { "epoch": 1.0778981849184448, "grad_norm": 0.5111246766645595, "learning_rate": 8.088026798581422e-06, "loss": 0.4281, "step": 12010 }, { "epoch": 1.0779879293711156, "grad_norm": 0.47882467601814305, "learning_rate": 8.087616113689542e-06, "loss": 0.404, "step": 12011 }, { "epoch": 1.0780776738237867, "grad_norm": 0.5081109441490399, "learning_rate": 8.087205395124742e-06, "loss": 0.4051, "step": 12012 }, { "epoch": 1.0781674182764578, "grad_norm": 0.5115386028986146, "learning_rate": 8.086794642891504e-06, "loss": 0.3758, "step": 12013 }, { "epoch": 1.0782571627291289, "grad_norm": 0.4754524875472999, "learning_rate": 8.086383856994307e-06, "loss": 0.3997, "step": 12014 }, { "epoch": 1.0783469071817997, "grad_norm": 0.4592954320209322, "learning_rate": 8.08597303743763e-06, "loss": 0.3591, "step": 12015 }, { "epoch": 1.0784366516344708, "grad_norm": 0.46708763514944446, "learning_rate": 8.085562184225953e-06, "loss": 0.3655, "step": 12016 }, { "epoch": 1.0785263960871418, "grad_norm": 0.4652063305738974, "learning_rate": 8.085151297363758e-06, "loss": 0.3921, "step": 12017 }, { "epoch": 1.078616140539813, "grad_norm": 0.5441705308552564, "learning_rate": 8.084740376855524e-06, "loss": 0.3779, "step": 12018 }, { "epoch": 1.078705884992484, "grad_norm": 0.48894582808800996, "learning_rate": 8.084329422705737e-06, "loss": 0.4079, "step": 12019 }, { "epoch": 1.0787956294451548, "grad_norm": 0.5070596641721089, "learning_rate": 8.083918434918874e-06, "loss": 0.4791, "step": 12020 }, { "epoch": 1.078885373897826, "grad_norm": 0.5088545295005953, "learning_rate": 8.083507413499419e-06, "loss": 0.3732, "step": 12021 }, { "epoch": 1.078975118350497, "grad_norm": 0.4947251134032881, "learning_rate": 8.083096358451854e-06, "loss": 0.4411, "step": 12022 }, { "epoch": 1.079064862803168, "grad_norm": 0.5188408935200721, "learning_rate": 8.082685269780663e-06, "loss": 0.4243, "step": 12023 }, { "epoch": 1.079154607255839, "grad_norm": 0.5630664144394529, "learning_rate": 8.08227414749033e-06, "loss": 0.4122, "step": 12024 }, { "epoch": 1.07924435170851, "grad_norm": 0.47332286204935514, "learning_rate": 8.081862991585334e-06, "loss": 0.4198, "step": 12025 }, { "epoch": 1.079334096161181, "grad_norm": 0.48537757093877, "learning_rate": 8.081451802070163e-06, "loss": 0.3638, "step": 12026 }, { "epoch": 1.079423840613852, "grad_norm": 0.5007510974937218, "learning_rate": 8.0810405789493e-06, "loss": 0.3836, "step": 12027 }, { "epoch": 1.0795135850665232, "grad_norm": 0.5088276041209652, "learning_rate": 8.080629322227233e-06, "loss": 0.409, "step": 12028 }, { "epoch": 1.079603329519194, "grad_norm": 0.4775270659691485, "learning_rate": 8.080218031908441e-06, "loss": 0.3449, "step": 12029 }, { "epoch": 1.079693073971865, "grad_norm": 0.46720545322014606, "learning_rate": 8.079806707997415e-06, "loss": 0.3783, "step": 12030 }, { "epoch": 1.0797828184245362, "grad_norm": 0.4842045940113796, "learning_rate": 8.079395350498636e-06, "loss": 0.4816, "step": 12031 }, { "epoch": 1.0798725628772072, "grad_norm": 0.5420206751006877, "learning_rate": 8.078983959416595e-06, "loss": 0.3885, "step": 12032 }, { "epoch": 1.079962307329878, "grad_norm": 0.45725617783194294, "learning_rate": 8.078572534755775e-06, "loss": 0.4518, "step": 12033 }, { "epoch": 1.0800520517825491, "grad_norm": 0.5424122689486406, "learning_rate": 8.078161076520665e-06, "loss": 0.3952, "step": 12034 }, { "epoch": 1.0801417962352202, "grad_norm": 0.48160270324928145, "learning_rate": 8.077749584715752e-06, "loss": 0.403, "step": 12035 }, { "epoch": 1.0802315406878913, "grad_norm": 0.5054167822516071, "learning_rate": 8.077338059345522e-06, "loss": 0.5244, "step": 12036 }, { "epoch": 1.0803212851405624, "grad_norm": 0.5006153406591227, "learning_rate": 8.076926500414463e-06, "loss": 0.4091, "step": 12037 }, { "epoch": 1.0804110295932332, "grad_norm": 0.46821968883249543, "learning_rate": 8.076514907927067e-06, "loss": 0.39, "step": 12038 }, { "epoch": 1.0805007740459043, "grad_norm": 0.475468187228173, "learning_rate": 8.07610328188782e-06, "loss": 0.4029, "step": 12039 }, { "epoch": 1.0805905184985753, "grad_norm": 0.5597171548379106, "learning_rate": 8.07569162230121e-06, "loss": 0.4161, "step": 12040 }, { "epoch": 1.0806802629512464, "grad_norm": 0.43263253794586487, "learning_rate": 8.075279929171727e-06, "loss": 0.4137, "step": 12041 }, { "epoch": 1.0807700074039173, "grad_norm": 0.529680101164672, "learning_rate": 8.074868202503864e-06, "loss": 0.4553, "step": 12042 }, { "epoch": 1.0808597518565883, "grad_norm": 0.4880779871793101, "learning_rate": 8.074456442302106e-06, "loss": 0.3804, "step": 12043 }, { "epoch": 1.0809494963092594, "grad_norm": 0.4604685497407521, "learning_rate": 8.074044648570949e-06, "loss": 0.3527, "step": 12044 }, { "epoch": 1.0810392407619305, "grad_norm": 0.46177682616535765, "learning_rate": 8.07363282131488e-06, "loss": 0.4322, "step": 12045 }, { "epoch": 1.0811289852146013, "grad_norm": 0.5391095420051158, "learning_rate": 8.073220960538393e-06, "loss": 0.3983, "step": 12046 }, { "epoch": 1.0812187296672724, "grad_norm": 0.483810040252515, "learning_rate": 8.072809066245977e-06, "loss": 0.3704, "step": 12047 }, { "epoch": 1.0813084741199435, "grad_norm": 0.5002892075821946, "learning_rate": 8.072397138442127e-06, "loss": 0.383, "step": 12048 }, { "epoch": 1.0813982185726145, "grad_norm": 0.47849579641316975, "learning_rate": 8.071985177131332e-06, "loss": 0.3807, "step": 12049 }, { "epoch": 1.0814879630252856, "grad_norm": 0.44117398604184466, "learning_rate": 8.071573182318089e-06, "loss": 0.348, "step": 12050 }, { "epoch": 1.0815777074779565, "grad_norm": 0.48346157033304593, "learning_rate": 8.071161154006885e-06, "loss": 0.3874, "step": 12051 }, { "epoch": 1.0816674519306275, "grad_norm": 0.5059493482724075, "learning_rate": 8.070749092202221e-06, "loss": 0.3903, "step": 12052 }, { "epoch": 1.0817571963832986, "grad_norm": 0.5039997972816624, "learning_rate": 8.070336996908585e-06, "loss": 0.427, "step": 12053 }, { "epoch": 1.0818469408359697, "grad_norm": 0.5170858370963587, "learning_rate": 8.069924868130472e-06, "loss": 0.4712, "step": 12054 }, { "epoch": 1.0819366852886405, "grad_norm": 0.5543903423753713, "learning_rate": 8.06951270587238e-06, "loss": 0.3952, "step": 12055 }, { "epoch": 1.0820264297413116, "grad_norm": 0.4595415078755557, "learning_rate": 8.069100510138801e-06, "loss": 0.3318, "step": 12056 }, { "epoch": 1.0821161741939826, "grad_norm": 0.49380677458010963, "learning_rate": 8.068688280934232e-06, "loss": 0.4332, "step": 12057 }, { "epoch": 1.0822059186466537, "grad_norm": 0.5022969489348846, "learning_rate": 8.068276018263168e-06, "loss": 0.3692, "step": 12058 }, { "epoch": 1.0822956630993246, "grad_norm": 0.4907632520164712, "learning_rate": 8.067863722130104e-06, "loss": 0.4027, "step": 12059 }, { "epoch": 1.0823854075519956, "grad_norm": 0.49831909180425266, "learning_rate": 8.067451392539536e-06, "loss": 0.3876, "step": 12060 }, { "epoch": 1.0824751520046667, "grad_norm": 0.4870981802033008, "learning_rate": 8.067039029495965e-06, "loss": 0.4277, "step": 12061 }, { "epoch": 1.0825648964573378, "grad_norm": 0.48690982746475425, "learning_rate": 8.066626633003884e-06, "loss": 0.4372, "step": 12062 }, { "epoch": 1.0826546409100088, "grad_norm": 0.4748208283463339, "learning_rate": 8.06621420306779e-06, "loss": 0.4414, "step": 12063 }, { "epoch": 1.0827443853626797, "grad_norm": 0.51649090303593, "learning_rate": 8.065801739692185e-06, "loss": 0.3884, "step": 12064 }, { "epoch": 1.0828341298153508, "grad_norm": 0.5296306906835397, "learning_rate": 8.065389242881564e-06, "loss": 0.4046, "step": 12065 }, { "epoch": 1.0829238742680218, "grad_norm": 0.5132195155634356, "learning_rate": 8.064976712640427e-06, "loss": 0.4518, "step": 12066 }, { "epoch": 1.083013618720693, "grad_norm": 0.4674781176907657, "learning_rate": 8.064564148973274e-06, "loss": 0.3764, "step": 12067 }, { "epoch": 1.0831033631733638, "grad_norm": 0.5176476315564594, "learning_rate": 8.064151551884599e-06, "loss": 0.3692, "step": 12068 }, { "epoch": 1.0831931076260348, "grad_norm": 0.4802164339349124, "learning_rate": 8.063738921378907e-06, "loss": 0.4189, "step": 12069 }, { "epoch": 1.083282852078706, "grad_norm": 0.49085547341578084, "learning_rate": 8.063326257460698e-06, "loss": 0.3774, "step": 12070 }, { "epoch": 1.083372596531377, "grad_norm": 0.5279704195356553, "learning_rate": 8.06291356013447e-06, "loss": 0.391, "step": 12071 }, { "epoch": 1.083462340984048, "grad_norm": 0.5057171104625938, "learning_rate": 8.062500829404724e-06, "loss": 0.4706, "step": 12072 }, { "epoch": 1.0835520854367189, "grad_norm": 0.4657128951326058, "learning_rate": 8.062088065275963e-06, "loss": 0.3971, "step": 12073 }, { "epoch": 1.08364182988939, "grad_norm": 0.5104860003802717, "learning_rate": 8.061675267752687e-06, "loss": 0.4471, "step": 12074 }, { "epoch": 1.083731574342061, "grad_norm": 0.4942748004723244, "learning_rate": 8.061262436839399e-06, "loss": 0.4434, "step": 12075 }, { "epoch": 1.083821318794732, "grad_norm": 0.5004161937780404, "learning_rate": 8.060849572540599e-06, "loss": 0.4088, "step": 12076 }, { "epoch": 1.083911063247403, "grad_norm": 0.4968735223906427, "learning_rate": 8.060436674860792e-06, "loss": 0.4123, "step": 12077 }, { "epoch": 1.084000807700074, "grad_norm": 0.4900013675081812, "learning_rate": 8.060023743804482e-06, "loss": 0.3702, "step": 12078 }, { "epoch": 1.084090552152745, "grad_norm": 0.46410874654096873, "learning_rate": 8.059610779376167e-06, "loss": 0.413, "step": 12079 }, { "epoch": 1.0841802966054161, "grad_norm": 0.49197431338872505, "learning_rate": 8.059197781580357e-06, "loss": 0.4278, "step": 12080 }, { "epoch": 1.084270041058087, "grad_norm": 0.5099256010536332, "learning_rate": 8.058784750421553e-06, "loss": 0.4713, "step": 12081 }, { "epoch": 1.084359785510758, "grad_norm": 0.5044121487780381, "learning_rate": 8.05837168590426e-06, "loss": 0.3673, "step": 12082 }, { "epoch": 1.0844495299634291, "grad_norm": 0.4681534188414813, "learning_rate": 8.05795858803298e-06, "loss": 0.4072, "step": 12083 }, { "epoch": 1.0845392744161002, "grad_norm": 0.4648847728800873, "learning_rate": 8.057545456812224e-06, "loss": 0.4522, "step": 12084 }, { "epoch": 1.0846290188687713, "grad_norm": 0.5256394021086412, "learning_rate": 8.057132292246493e-06, "loss": 0.4194, "step": 12085 }, { "epoch": 1.0847187633214421, "grad_norm": 0.4579581450840211, "learning_rate": 8.056719094340292e-06, "loss": 0.3439, "step": 12086 }, { "epoch": 1.0848085077741132, "grad_norm": 0.4628809756551693, "learning_rate": 8.056305863098133e-06, "loss": 0.3602, "step": 12087 }, { "epoch": 1.0848982522267843, "grad_norm": 0.46927391376620925, "learning_rate": 8.055892598524517e-06, "loss": 0.4037, "step": 12088 }, { "epoch": 1.0849879966794553, "grad_norm": 0.4836497218672746, "learning_rate": 8.055479300623953e-06, "loss": 0.3782, "step": 12089 }, { "epoch": 1.0850777411321262, "grad_norm": 0.5014365952977734, "learning_rate": 8.055065969400949e-06, "loss": 0.3992, "step": 12090 }, { "epoch": 1.0851674855847973, "grad_norm": 0.48799720998441004, "learning_rate": 8.054652604860012e-06, "loss": 0.3796, "step": 12091 }, { "epoch": 1.0852572300374683, "grad_norm": 0.47893751456225464, "learning_rate": 8.05423920700565e-06, "loss": 0.3636, "step": 12092 }, { "epoch": 1.0853469744901394, "grad_norm": 0.460117954939899, "learning_rate": 8.053825775842372e-06, "loss": 0.4209, "step": 12093 }, { "epoch": 1.0854367189428102, "grad_norm": 0.5937445783802947, "learning_rate": 8.053412311374684e-06, "loss": 0.4495, "step": 12094 }, { "epoch": 1.0855264633954813, "grad_norm": 0.46740878846166, "learning_rate": 8.0529988136071e-06, "loss": 0.3737, "step": 12095 }, { "epoch": 1.0856162078481524, "grad_norm": 0.48976676322159296, "learning_rate": 8.052585282544125e-06, "loss": 0.3887, "step": 12096 }, { "epoch": 1.0857059523008235, "grad_norm": 0.4849730582561826, "learning_rate": 8.052171718190274e-06, "loss": 0.3931, "step": 12097 }, { "epoch": 1.0857956967534945, "grad_norm": 0.45785895270482496, "learning_rate": 8.05175812055005e-06, "loss": 0.3828, "step": 12098 }, { "epoch": 1.0858854412061654, "grad_norm": 0.5225776700258798, "learning_rate": 8.05134448962797e-06, "loss": 0.4065, "step": 12099 }, { "epoch": 1.0859751856588364, "grad_norm": 0.5393634850632092, "learning_rate": 8.050930825428543e-06, "loss": 0.4434, "step": 12100 }, { "epoch": 1.0860649301115075, "grad_norm": 0.5474385468701749, "learning_rate": 8.050517127956282e-06, "loss": 0.4809, "step": 12101 }, { "epoch": 1.0861546745641786, "grad_norm": 0.5783307961636572, "learning_rate": 8.050103397215693e-06, "loss": 0.5093, "step": 12102 }, { "epoch": 1.0862444190168494, "grad_norm": 0.6008346555755978, "learning_rate": 8.049689633211293e-06, "loss": 0.4219, "step": 12103 }, { "epoch": 1.0863341634695205, "grad_norm": 0.5131380311051911, "learning_rate": 8.049275835947594e-06, "loss": 0.4061, "step": 12104 }, { "epoch": 1.0864239079221916, "grad_norm": 0.4904567670718381, "learning_rate": 8.048862005429108e-06, "loss": 0.3845, "step": 12105 }, { "epoch": 1.0865136523748626, "grad_norm": 0.47565040452452734, "learning_rate": 8.048448141660349e-06, "loss": 0.3687, "step": 12106 }, { "epoch": 1.0866033968275337, "grad_norm": 0.4598108621454146, "learning_rate": 8.048034244645825e-06, "loss": 0.3573, "step": 12107 }, { "epoch": 1.0866931412802046, "grad_norm": 0.4918978920468057, "learning_rate": 8.047620314390058e-06, "loss": 0.3577, "step": 12108 }, { "epoch": 1.0867828857328756, "grad_norm": 0.47038378404715653, "learning_rate": 8.04720635089756e-06, "loss": 0.3883, "step": 12109 }, { "epoch": 1.0868726301855467, "grad_norm": 0.46153927163430963, "learning_rate": 8.046792354172846e-06, "loss": 0.3973, "step": 12110 }, { "epoch": 1.0869623746382178, "grad_norm": 0.5704993195391912, "learning_rate": 8.046378324220425e-06, "loss": 0.3915, "step": 12111 }, { "epoch": 1.0870521190908886, "grad_norm": 0.4979015992336497, "learning_rate": 8.045964261044818e-06, "loss": 0.4439, "step": 12112 }, { "epoch": 1.0871418635435597, "grad_norm": 0.5354758823718972, "learning_rate": 8.045550164650541e-06, "loss": 0.4175, "step": 12113 }, { "epoch": 1.0872316079962308, "grad_norm": 0.4956175385782353, "learning_rate": 8.045136035042108e-06, "loss": 0.4045, "step": 12114 }, { "epoch": 1.0873213524489018, "grad_norm": 0.509954243024592, "learning_rate": 8.044721872224034e-06, "loss": 0.4299, "step": 12115 }, { "epoch": 1.0874110969015727, "grad_norm": 0.5057564640140856, "learning_rate": 8.04430767620084e-06, "loss": 0.4209, "step": 12116 }, { "epoch": 1.0875008413542437, "grad_norm": 0.5153360135393245, "learning_rate": 8.04389344697704e-06, "loss": 0.4216, "step": 12117 }, { "epoch": 1.0875905858069148, "grad_norm": 0.5119274161171642, "learning_rate": 8.043479184557152e-06, "loss": 0.3976, "step": 12118 }, { "epoch": 1.0876803302595859, "grad_norm": 0.4778496864938223, "learning_rate": 8.043064888945693e-06, "loss": 0.3975, "step": 12119 }, { "epoch": 1.087770074712257, "grad_norm": 0.5195809043636594, "learning_rate": 8.042650560147184e-06, "loss": 0.3934, "step": 12120 }, { "epoch": 1.0878598191649278, "grad_norm": 0.5155964572412782, "learning_rate": 8.042236198166142e-06, "loss": 0.4505, "step": 12121 }, { "epoch": 1.0879495636175989, "grad_norm": 0.48766117347039345, "learning_rate": 8.041821803007086e-06, "loss": 0.3774, "step": 12122 }, { "epoch": 1.08803930807027, "grad_norm": 0.5307299884693544, "learning_rate": 8.041407374674534e-06, "loss": 0.4912, "step": 12123 }, { "epoch": 1.088129052522941, "grad_norm": 0.5284898733063396, "learning_rate": 8.04099291317301e-06, "loss": 0.4922, "step": 12124 }, { "epoch": 1.0882187969756119, "grad_norm": 0.5646130459082168, "learning_rate": 8.040578418507026e-06, "loss": 0.3871, "step": 12125 }, { "epoch": 1.088308541428283, "grad_norm": 0.4704442946065303, "learning_rate": 8.040163890681113e-06, "loss": 0.4359, "step": 12126 }, { "epoch": 1.088398285880954, "grad_norm": 0.48893290957045954, "learning_rate": 8.039749329699781e-06, "loss": 0.384, "step": 12127 }, { "epoch": 1.088488030333625, "grad_norm": 0.4966607954242567, "learning_rate": 8.03933473556756e-06, "loss": 0.4282, "step": 12128 }, { "epoch": 1.088577774786296, "grad_norm": 0.48322395156788256, "learning_rate": 8.038920108288963e-06, "loss": 0.3684, "step": 12129 }, { "epoch": 1.088667519238967, "grad_norm": 0.4681354884811422, "learning_rate": 8.038505447868521e-06, "loss": 0.3656, "step": 12130 }, { "epoch": 1.088757263691638, "grad_norm": 0.4821284529567141, "learning_rate": 8.03809075431075e-06, "loss": 0.3741, "step": 12131 }, { "epoch": 1.0888470081443091, "grad_norm": 0.471648276820616, "learning_rate": 8.037676027620174e-06, "loss": 0.3782, "step": 12132 }, { "epoch": 1.0889367525969802, "grad_norm": 0.5047470341308732, "learning_rate": 8.037261267801318e-06, "loss": 0.4134, "step": 12133 }, { "epoch": 1.089026497049651, "grad_norm": 0.5450791336061652, "learning_rate": 8.036846474858701e-06, "loss": 0.439, "step": 12134 }, { "epoch": 1.0891162415023221, "grad_norm": 0.5420348087135104, "learning_rate": 8.03643164879685e-06, "loss": 0.3969, "step": 12135 }, { "epoch": 1.0892059859549932, "grad_norm": 0.47696752716016066, "learning_rate": 8.03601678962029e-06, "loss": 0.3802, "step": 12136 }, { "epoch": 1.0892957304076643, "grad_norm": 0.4620029906372769, "learning_rate": 8.035601897333542e-06, "loss": 0.3845, "step": 12137 }, { "epoch": 1.089385474860335, "grad_norm": 0.4931056490728172, "learning_rate": 8.03518697194113e-06, "loss": 0.4114, "step": 12138 }, { "epoch": 1.0894752193130062, "grad_norm": 0.49999823381254344, "learning_rate": 8.034772013447585e-06, "loss": 0.4168, "step": 12139 }, { "epoch": 1.0895649637656772, "grad_norm": 0.47652279306282475, "learning_rate": 8.034357021857429e-06, "loss": 0.4617, "step": 12140 }, { "epoch": 1.0896547082183483, "grad_norm": 0.5485965954501435, "learning_rate": 8.033941997175185e-06, "loss": 0.4545, "step": 12141 }, { "epoch": 1.0897444526710194, "grad_norm": 0.5164239980127328, "learning_rate": 8.033526939405383e-06, "loss": 0.3941, "step": 12142 }, { "epoch": 1.0898341971236902, "grad_norm": 0.5006499166974521, "learning_rate": 8.033111848552548e-06, "loss": 0.3663, "step": 12143 }, { "epoch": 1.0899239415763613, "grad_norm": 0.45839310786204457, "learning_rate": 8.032696724621209e-06, "loss": 0.3872, "step": 12144 }, { "epoch": 1.0900136860290324, "grad_norm": 0.5007657207377368, "learning_rate": 8.03228156761589e-06, "loss": 0.4263, "step": 12145 }, { "epoch": 1.0901034304817034, "grad_norm": 0.5407336710016784, "learning_rate": 8.03186637754112e-06, "loss": 0.4378, "step": 12146 }, { "epoch": 1.0901931749343743, "grad_norm": 0.4780453534548044, "learning_rate": 8.031451154401426e-06, "loss": 0.4075, "step": 12147 }, { "epoch": 1.0902829193870454, "grad_norm": 0.4987184268166836, "learning_rate": 8.03103589820134e-06, "loss": 0.4283, "step": 12148 }, { "epoch": 1.0903726638397164, "grad_norm": 0.44902729293703314, "learning_rate": 8.030620608945386e-06, "loss": 0.3624, "step": 12149 }, { "epoch": 1.0904624082923875, "grad_norm": 0.4782147825972375, "learning_rate": 8.030205286638098e-06, "loss": 0.4044, "step": 12150 }, { "epoch": 1.0905521527450583, "grad_norm": 0.505764141034876, "learning_rate": 8.029789931284e-06, "loss": 0.3885, "step": 12151 }, { "epoch": 1.0906418971977294, "grad_norm": 0.4471306151031493, "learning_rate": 8.029374542887624e-06, "loss": 0.3893, "step": 12152 }, { "epoch": 1.0907316416504005, "grad_norm": 0.4900508415658168, "learning_rate": 8.028959121453503e-06, "loss": 0.3838, "step": 12153 }, { "epoch": 1.0908213861030716, "grad_norm": 0.47127356709638696, "learning_rate": 8.028543666986162e-06, "loss": 0.4256, "step": 12154 }, { "epoch": 1.0909111305557426, "grad_norm": 0.4980836103063354, "learning_rate": 8.028128179490137e-06, "loss": 0.3784, "step": 12155 }, { "epoch": 1.0910008750084135, "grad_norm": 0.47351391767816375, "learning_rate": 8.027712658969958e-06, "loss": 0.4602, "step": 12156 }, { "epoch": 1.0910906194610845, "grad_norm": 0.5200632803281567, "learning_rate": 8.027297105430154e-06, "loss": 0.4548, "step": 12157 }, { "epoch": 1.0911803639137556, "grad_norm": 0.5158244657380621, "learning_rate": 8.026881518875257e-06, "loss": 0.4369, "step": 12158 }, { "epoch": 1.0912701083664267, "grad_norm": 0.5082345952909386, "learning_rate": 8.026465899309804e-06, "loss": 0.4369, "step": 12159 }, { "epoch": 1.0913598528190975, "grad_norm": 0.497053962916322, "learning_rate": 8.026050246738323e-06, "loss": 0.4331, "step": 12160 }, { "epoch": 1.0914495972717686, "grad_norm": 0.493835949308073, "learning_rate": 8.02563456116535e-06, "loss": 0.4552, "step": 12161 }, { "epoch": 1.0915393417244397, "grad_norm": 0.50772489260082, "learning_rate": 8.025218842595416e-06, "loss": 0.3922, "step": 12162 }, { "epoch": 1.0916290861771107, "grad_norm": 0.5039806913050535, "learning_rate": 8.024803091033056e-06, "loss": 0.3838, "step": 12163 }, { "epoch": 1.0917188306297816, "grad_norm": 0.4778046709382001, "learning_rate": 8.024387306482804e-06, "loss": 0.3288, "step": 12164 }, { "epoch": 1.0918085750824527, "grad_norm": 0.4447009328019858, "learning_rate": 8.023971488949193e-06, "loss": 0.3295, "step": 12165 }, { "epoch": 1.0918983195351237, "grad_norm": 0.4680952666348569, "learning_rate": 8.023555638436761e-06, "loss": 0.4201, "step": 12166 }, { "epoch": 1.0919880639877948, "grad_norm": 0.5186474464308949, "learning_rate": 8.023139754950042e-06, "loss": 0.392, "step": 12167 }, { "epoch": 1.0920778084404659, "grad_norm": 0.44111613850474557, "learning_rate": 8.02272383849357e-06, "loss": 0.3454, "step": 12168 }, { "epoch": 1.0921675528931367, "grad_norm": 0.48161805054259027, "learning_rate": 8.02230788907188e-06, "loss": 0.3871, "step": 12169 }, { "epoch": 1.0922572973458078, "grad_norm": 0.49377374693254805, "learning_rate": 8.021891906689512e-06, "loss": 0.4328, "step": 12170 }, { "epoch": 1.0923470417984789, "grad_norm": 0.5196295000497125, "learning_rate": 8.021475891351001e-06, "loss": 0.3906, "step": 12171 }, { "epoch": 1.09243678625115, "grad_norm": 0.47662931210938186, "learning_rate": 8.02105984306088e-06, "loss": 0.3951, "step": 12172 }, { "epoch": 1.0925265307038208, "grad_norm": 0.4817159651135189, "learning_rate": 8.020643761823694e-06, "loss": 0.3586, "step": 12173 }, { "epoch": 1.0926162751564918, "grad_norm": 0.4773009008177939, "learning_rate": 8.020227647643975e-06, "loss": 0.367, "step": 12174 }, { "epoch": 1.092706019609163, "grad_norm": 0.48603482247779617, "learning_rate": 8.019811500526264e-06, "loss": 0.4603, "step": 12175 }, { "epoch": 1.092795764061834, "grad_norm": 0.47509329416295826, "learning_rate": 8.019395320475096e-06, "loss": 0.3497, "step": 12176 }, { "epoch": 1.092885508514505, "grad_norm": 0.46303957740201684, "learning_rate": 8.018979107495015e-06, "loss": 0.3907, "step": 12177 }, { "epoch": 1.092975252967176, "grad_norm": 0.4930426793076712, "learning_rate": 8.018562861590556e-06, "loss": 0.3989, "step": 12178 }, { "epoch": 1.093064997419847, "grad_norm": 0.5312997103819556, "learning_rate": 8.018146582766257e-06, "loss": 0.4246, "step": 12179 }, { "epoch": 1.093154741872518, "grad_norm": 0.5298619822777737, "learning_rate": 8.017730271026662e-06, "loss": 0.4882, "step": 12180 }, { "epoch": 1.0932444863251891, "grad_norm": 0.4994992385812522, "learning_rate": 8.017313926376312e-06, "loss": 0.3305, "step": 12181 }, { "epoch": 1.09333423077786, "grad_norm": 0.4719597392339704, "learning_rate": 8.016897548819743e-06, "loss": 0.3916, "step": 12182 }, { "epoch": 1.093423975230531, "grad_norm": 0.4765786665854565, "learning_rate": 8.016481138361495e-06, "loss": 0.3732, "step": 12183 }, { "epoch": 1.093513719683202, "grad_norm": 0.49095624324638976, "learning_rate": 8.016064695006118e-06, "loss": 0.374, "step": 12184 }, { "epoch": 1.0936034641358732, "grad_norm": 0.47885192400699744, "learning_rate": 8.015648218758145e-06, "loss": 0.3802, "step": 12185 }, { "epoch": 1.093693208588544, "grad_norm": 0.4747245597720952, "learning_rate": 8.015231709622122e-06, "loss": 0.3833, "step": 12186 }, { "epoch": 1.093782953041215, "grad_norm": 0.4581968344039793, "learning_rate": 8.01481516760259e-06, "loss": 0.4217, "step": 12187 }, { "epoch": 1.0938726974938862, "grad_norm": 0.5131629095053583, "learning_rate": 8.014398592704092e-06, "loss": 0.3736, "step": 12188 }, { "epoch": 1.0939624419465572, "grad_norm": 0.4938303362921767, "learning_rate": 8.013981984931171e-06, "loss": 0.4283, "step": 12189 }, { "epoch": 1.0940521863992283, "grad_norm": 0.453195911155255, "learning_rate": 8.013565344288371e-06, "loss": 0.3794, "step": 12190 }, { "epoch": 1.0941419308518991, "grad_norm": 0.5175358545654484, "learning_rate": 8.013148670780237e-06, "loss": 0.4912, "step": 12191 }, { "epoch": 1.0942316753045702, "grad_norm": 0.542325059616489, "learning_rate": 8.012731964411309e-06, "loss": 0.4175, "step": 12192 }, { "epoch": 1.0943214197572413, "grad_norm": 0.4985735631606013, "learning_rate": 8.012315225186135e-06, "loss": 0.4172, "step": 12193 }, { "epoch": 1.0944111642099124, "grad_norm": 0.47363193477177823, "learning_rate": 8.011898453109262e-06, "loss": 0.3543, "step": 12194 }, { "epoch": 1.0945009086625832, "grad_norm": 0.4470008526737611, "learning_rate": 8.011481648185228e-06, "loss": 0.4481, "step": 12195 }, { "epoch": 1.0945906531152543, "grad_norm": 0.5846202066463095, "learning_rate": 8.011064810418584e-06, "loss": 0.479, "step": 12196 }, { "epoch": 1.0946803975679253, "grad_norm": 0.5032526927752209, "learning_rate": 8.010647939813876e-06, "loss": 0.4122, "step": 12197 }, { "epoch": 1.0947701420205964, "grad_norm": 0.4929308972855813, "learning_rate": 8.010231036375648e-06, "loss": 0.4145, "step": 12198 }, { "epoch": 1.0948598864732673, "grad_norm": 0.5332789883010591, "learning_rate": 8.009814100108448e-06, "loss": 0.417, "step": 12199 }, { "epoch": 1.0949496309259383, "grad_norm": 0.5255344180813515, "learning_rate": 8.009397131016824e-06, "loss": 0.4435, "step": 12200 }, { "epoch": 1.0950393753786094, "grad_norm": 0.5145566256358315, "learning_rate": 8.008980129105319e-06, "loss": 0.403, "step": 12201 }, { "epoch": 1.0951291198312805, "grad_norm": 0.5092264412462406, "learning_rate": 8.008563094378488e-06, "loss": 0.4096, "step": 12202 }, { "epoch": 1.0952188642839515, "grad_norm": 0.5107377801026675, "learning_rate": 8.008146026840871e-06, "loss": 0.3905, "step": 12203 }, { "epoch": 1.0953086087366224, "grad_norm": 0.4883439014621255, "learning_rate": 8.007728926497024e-06, "loss": 0.3863, "step": 12204 }, { "epoch": 1.0953983531892935, "grad_norm": 0.5347429715322058, "learning_rate": 8.00731179335149e-06, "loss": 0.4534, "step": 12205 }, { "epoch": 1.0954880976419645, "grad_norm": 0.47138230782186746, "learning_rate": 8.006894627408822e-06, "loss": 0.34, "step": 12206 }, { "epoch": 1.0955778420946356, "grad_norm": 0.4662490184873145, "learning_rate": 8.006477428673567e-06, "loss": 0.4135, "step": 12207 }, { "epoch": 1.0956675865473064, "grad_norm": 0.5192483955270686, "learning_rate": 8.006060197150276e-06, "loss": 0.3914, "step": 12208 }, { "epoch": 1.0957573309999775, "grad_norm": 0.501633170627195, "learning_rate": 8.005642932843499e-06, "loss": 0.3965, "step": 12209 }, { "epoch": 1.0958470754526486, "grad_norm": 0.516466685343247, "learning_rate": 8.005225635757786e-06, "loss": 0.4054, "step": 12210 }, { "epoch": 1.0959368199053197, "grad_norm": 0.47393266452053606, "learning_rate": 8.004808305897689e-06, "loss": 0.4077, "step": 12211 }, { "epoch": 1.0960265643579907, "grad_norm": 0.5081174234785889, "learning_rate": 8.004390943267759e-06, "loss": 0.493, "step": 12212 }, { "epoch": 1.0961163088106616, "grad_norm": 0.5488485472289333, "learning_rate": 8.003973547872548e-06, "loss": 0.4054, "step": 12213 }, { "epoch": 1.0962060532633326, "grad_norm": 0.48545664380641296, "learning_rate": 8.003556119716608e-06, "loss": 0.3983, "step": 12214 }, { "epoch": 1.0962957977160037, "grad_norm": 0.5117052033581787, "learning_rate": 8.00313865880449e-06, "loss": 0.4189, "step": 12215 }, { "epoch": 1.0963855421686748, "grad_norm": 0.5159964704815541, "learning_rate": 8.00272116514075e-06, "loss": 0.3658, "step": 12216 }, { "epoch": 1.0964752866213456, "grad_norm": 0.5013865181162547, "learning_rate": 8.002303638729935e-06, "loss": 0.415, "step": 12217 }, { "epoch": 1.0965650310740167, "grad_norm": 0.4593833657444031, "learning_rate": 8.001886079576606e-06, "loss": 0.4199, "step": 12218 }, { "epoch": 1.0966547755266878, "grad_norm": 0.5593955853709587, "learning_rate": 8.001468487685312e-06, "loss": 0.4329, "step": 12219 }, { "epoch": 1.0967445199793588, "grad_norm": 0.4746019893715263, "learning_rate": 8.001050863060608e-06, "loss": 0.4293, "step": 12220 }, { "epoch": 1.0968342644320297, "grad_norm": 0.510623753280546, "learning_rate": 8.000633205707048e-06, "loss": 0.4248, "step": 12221 }, { "epoch": 1.0969240088847008, "grad_norm": 0.4649567701504428, "learning_rate": 8.00021551562919e-06, "loss": 0.3912, "step": 12222 }, { "epoch": 1.0970137533373718, "grad_norm": 0.47553413435871594, "learning_rate": 7.999797792831586e-06, "loss": 0.4392, "step": 12223 }, { "epoch": 1.097103497790043, "grad_norm": 0.47383055282528663, "learning_rate": 7.999380037318793e-06, "loss": 0.3653, "step": 12224 }, { "epoch": 1.097193242242714, "grad_norm": 0.4996438004357357, "learning_rate": 7.998962249095366e-06, "loss": 0.3851, "step": 12225 }, { "epoch": 1.0972829866953848, "grad_norm": 0.5126322356332347, "learning_rate": 7.998544428165861e-06, "loss": 0.4525, "step": 12226 }, { "epoch": 1.097372731148056, "grad_norm": 0.4911078371613675, "learning_rate": 7.998126574534837e-06, "loss": 0.4338, "step": 12227 }, { "epoch": 1.097462475600727, "grad_norm": 0.507222887802157, "learning_rate": 7.99770868820685e-06, "loss": 0.4248, "step": 12228 }, { "epoch": 1.097552220053398, "grad_norm": 0.5221359572181813, "learning_rate": 7.997290769186455e-06, "loss": 0.4127, "step": 12229 }, { "epoch": 1.0976419645060689, "grad_norm": 0.5320268847291736, "learning_rate": 7.996872817478212e-06, "loss": 0.4518, "step": 12230 }, { "epoch": 1.09773170895874, "grad_norm": 0.5265364460050038, "learning_rate": 7.99645483308668e-06, "loss": 0.4212, "step": 12231 }, { "epoch": 1.097821453411411, "grad_norm": 0.5134125958311515, "learning_rate": 7.996036816016416e-06, "loss": 0.4024, "step": 12232 }, { "epoch": 1.097911197864082, "grad_norm": 0.4900330982687833, "learning_rate": 7.99561876627198e-06, "loss": 0.3637, "step": 12233 }, { "epoch": 1.098000942316753, "grad_norm": 0.4605291019329709, "learning_rate": 7.99520068385793e-06, "loss": 0.4028, "step": 12234 }, { "epoch": 1.098090686769424, "grad_norm": 0.48634154518788775, "learning_rate": 7.994782568778823e-06, "loss": 0.3343, "step": 12235 }, { "epoch": 1.098180431222095, "grad_norm": 0.47037651058916413, "learning_rate": 7.994364421039223e-06, "loss": 0.3586, "step": 12236 }, { "epoch": 1.0982701756747661, "grad_norm": 0.4648616691828318, "learning_rate": 7.99394624064369e-06, "loss": 0.3955, "step": 12237 }, { "epoch": 1.0983599201274372, "grad_norm": 0.514740855207314, "learning_rate": 7.993528027596781e-06, "loss": 0.4089, "step": 12238 }, { "epoch": 1.098449664580108, "grad_norm": 0.4649286132552098, "learning_rate": 7.993109781903064e-06, "loss": 0.3686, "step": 12239 }, { "epoch": 1.0985394090327791, "grad_norm": 0.4989714896771979, "learning_rate": 7.992691503567092e-06, "loss": 0.3751, "step": 12240 }, { "epoch": 1.0986291534854502, "grad_norm": 0.5000699174926861, "learning_rate": 7.992273192593431e-06, "loss": 0.386, "step": 12241 }, { "epoch": 1.0987188979381213, "grad_norm": 0.49433884999452615, "learning_rate": 7.991854848986642e-06, "loss": 0.437, "step": 12242 }, { "epoch": 1.0988086423907921, "grad_norm": 0.4580056594536281, "learning_rate": 7.99143647275129e-06, "loss": 0.3948, "step": 12243 }, { "epoch": 1.0988983868434632, "grad_norm": 0.48700878105440315, "learning_rate": 7.991018063891934e-06, "loss": 0.3477, "step": 12244 }, { "epoch": 1.0989881312961343, "grad_norm": 0.5386743504520144, "learning_rate": 7.990599622413139e-06, "loss": 0.4593, "step": 12245 }, { "epoch": 1.0990778757488053, "grad_norm": 0.4750489998512898, "learning_rate": 7.990181148319465e-06, "loss": 0.3373, "step": 12246 }, { "epoch": 1.0991676202014764, "grad_norm": 0.496935449583945, "learning_rate": 7.989762641615481e-06, "loss": 0.4329, "step": 12247 }, { "epoch": 1.0992573646541473, "grad_norm": 0.4831970501785264, "learning_rate": 7.98934410230575e-06, "loss": 0.3974, "step": 12248 }, { "epoch": 1.0993471091068183, "grad_norm": 0.4747823164728491, "learning_rate": 7.988925530394834e-06, "loss": 0.388, "step": 12249 }, { "epoch": 1.0994368535594894, "grad_norm": 0.5243618590456495, "learning_rate": 7.988506925887301e-06, "loss": 0.449, "step": 12250 }, { "epoch": 1.0995265980121605, "grad_norm": 0.5322781747272717, "learning_rate": 7.988088288787712e-06, "loss": 0.4144, "step": 12251 }, { "epoch": 1.0996163424648313, "grad_norm": 0.5099098172614857, "learning_rate": 7.987669619100638e-06, "loss": 0.4737, "step": 12252 }, { "epoch": 1.0997060869175024, "grad_norm": 0.5107899048484869, "learning_rate": 7.98725091683064e-06, "loss": 0.3974, "step": 12253 }, { "epoch": 1.0997958313701734, "grad_norm": 0.45156978621410115, "learning_rate": 7.986832181982286e-06, "loss": 0.3859, "step": 12254 }, { "epoch": 1.0998855758228445, "grad_norm": 0.5073906558875783, "learning_rate": 7.986413414560145e-06, "loss": 0.4332, "step": 12255 }, { "epoch": 1.0999753202755154, "grad_norm": 0.4844796194681308, "learning_rate": 7.98599461456878e-06, "loss": 0.4347, "step": 12256 }, { "epoch": 1.1000650647281864, "grad_norm": 0.5148263340860446, "learning_rate": 7.98557578201276e-06, "loss": 0.3878, "step": 12257 }, { "epoch": 1.1001548091808575, "grad_norm": 0.4727696574123739, "learning_rate": 7.985156916896654e-06, "loss": 0.4607, "step": 12258 }, { "epoch": 1.1002445536335286, "grad_norm": 0.5113820024379745, "learning_rate": 7.98473801922503e-06, "loss": 0.3967, "step": 12259 }, { "epoch": 1.1003342980861996, "grad_norm": 0.5047423167034513, "learning_rate": 7.984319089002454e-06, "loss": 0.3993, "step": 12260 }, { "epoch": 1.1004240425388705, "grad_norm": 0.48120353272512834, "learning_rate": 7.983900126233496e-06, "loss": 0.3596, "step": 12261 }, { "epoch": 1.1005137869915416, "grad_norm": 0.49460692344933577, "learning_rate": 7.983481130922727e-06, "loss": 0.403, "step": 12262 }, { "epoch": 1.1006035314442126, "grad_norm": 0.475353094478746, "learning_rate": 7.983062103074715e-06, "loss": 0.4159, "step": 12263 }, { "epoch": 1.1006932758968837, "grad_norm": 0.515040413260559, "learning_rate": 7.982643042694026e-06, "loss": 0.3686, "step": 12264 }, { "epoch": 1.1007830203495546, "grad_norm": 0.475389936507824, "learning_rate": 7.982223949785238e-06, "loss": 0.3685, "step": 12265 }, { "epoch": 1.1008727648022256, "grad_norm": 0.4727575640915618, "learning_rate": 7.981804824352915e-06, "loss": 0.3898, "step": 12266 }, { "epoch": 1.1009625092548967, "grad_norm": 0.5475578564110006, "learning_rate": 7.981385666401632e-06, "loss": 0.4799, "step": 12267 }, { "epoch": 1.1010522537075678, "grad_norm": 0.5104956795214667, "learning_rate": 7.980966475935957e-06, "loss": 0.4006, "step": 12268 }, { "epoch": 1.1011419981602386, "grad_norm": 0.495999227866695, "learning_rate": 7.980547252960465e-06, "loss": 0.4427, "step": 12269 }, { "epoch": 1.1012317426129097, "grad_norm": 0.5258021283232938, "learning_rate": 7.980127997479723e-06, "loss": 0.4648, "step": 12270 }, { "epoch": 1.1013214870655808, "grad_norm": 0.5659771874466628, "learning_rate": 7.97970870949831e-06, "loss": 0.4006, "step": 12271 }, { "epoch": 1.1014112315182518, "grad_norm": 0.4684781434781453, "learning_rate": 7.979289389020792e-06, "loss": 0.4527, "step": 12272 }, { "epoch": 1.101500975970923, "grad_norm": 0.4772708941106361, "learning_rate": 7.978870036051747e-06, "loss": 0.3876, "step": 12273 }, { "epoch": 1.1015907204235937, "grad_norm": 0.48707030185235006, "learning_rate": 7.978450650595746e-06, "loss": 0.3915, "step": 12274 }, { "epoch": 1.1016804648762648, "grad_norm": 0.537868693346772, "learning_rate": 7.978031232657364e-06, "loss": 0.3937, "step": 12275 }, { "epoch": 1.1017702093289359, "grad_norm": 0.4698123118048419, "learning_rate": 7.977611782241173e-06, "loss": 0.4491, "step": 12276 }, { "epoch": 1.101859953781607, "grad_norm": 0.4895632595413765, "learning_rate": 7.97719229935175e-06, "loss": 0.3763, "step": 12277 }, { "epoch": 1.1019496982342778, "grad_norm": 0.4844542059481462, "learning_rate": 7.976772783993667e-06, "loss": 0.4, "step": 12278 }, { "epoch": 1.1020394426869489, "grad_norm": 0.45401082886823324, "learning_rate": 7.976353236171502e-06, "loss": 0.3748, "step": 12279 }, { "epoch": 1.10212918713962, "grad_norm": 0.4842306583866519, "learning_rate": 7.97593365588983e-06, "loss": 0.4071, "step": 12280 }, { "epoch": 1.102218931592291, "grad_norm": 0.5152015571378689, "learning_rate": 7.975514043153224e-06, "loss": 0.3962, "step": 12281 }, { "epoch": 1.102308676044962, "grad_norm": 0.48000460743665657, "learning_rate": 7.975094397966263e-06, "loss": 0.4041, "step": 12282 }, { "epoch": 1.102398420497633, "grad_norm": 0.4598192662610566, "learning_rate": 7.974674720333522e-06, "loss": 0.3999, "step": 12283 }, { "epoch": 1.102488164950304, "grad_norm": 0.5210150821434756, "learning_rate": 7.974255010259581e-06, "loss": 0.3326, "step": 12284 }, { "epoch": 1.102577909402975, "grad_norm": 0.5048059532008389, "learning_rate": 7.973835267749013e-06, "loss": 0.4611, "step": 12285 }, { "epoch": 1.1026676538556461, "grad_norm": 0.45183953479856986, "learning_rate": 7.9734154928064e-06, "loss": 0.3985, "step": 12286 }, { "epoch": 1.102757398308317, "grad_norm": 0.5216520643809216, "learning_rate": 7.972995685436316e-06, "loss": 0.4479, "step": 12287 }, { "epoch": 1.102847142760988, "grad_norm": 0.49404305200976284, "learning_rate": 7.97257584564334e-06, "loss": 0.4485, "step": 12288 }, { "epoch": 1.1029368872136591, "grad_norm": 0.5369971203108312, "learning_rate": 7.972155973432054e-06, "loss": 0.4401, "step": 12289 }, { "epoch": 1.1030266316663302, "grad_norm": 0.5118545559091898, "learning_rate": 7.971736068807033e-06, "loss": 0.4109, "step": 12290 }, { "epoch": 1.103116376119001, "grad_norm": 0.4952572797306946, "learning_rate": 7.971316131772858e-06, "loss": 0.4202, "step": 12291 }, { "epoch": 1.103206120571672, "grad_norm": 0.5170304404063683, "learning_rate": 7.97089616233411e-06, "loss": 0.4669, "step": 12292 }, { "epoch": 1.1032958650243432, "grad_norm": 0.5245639296871952, "learning_rate": 7.970476160495368e-06, "loss": 0.4237, "step": 12293 }, { "epoch": 1.1033856094770143, "grad_norm": 0.50253005210903, "learning_rate": 7.970056126261211e-06, "loss": 0.3681, "step": 12294 }, { "epoch": 1.1034753539296853, "grad_norm": 0.45445080180372893, "learning_rate": 7.969636059636221e-06, "loss": 0.4004, "step": 12295 }, { "epoch": 1.1035650983823562, "grad_norm": 0.4917790053919584, "learning_rate": 7.969215960624981e-06, "loss": 0.3817, "step": 12296 }, { "epoch": 1.1036548428350272, "grad_norm": 0.48066745827337404, "learning_rate": 7.96879582923207e-06, "loss": 0.3736, "step": 12297 }, { "epoch": 1.1037445872876983, "grad_norm": 0.47982690781334714, "learning_rate": 7.968375665462072e-06, "loss": 0.3583, "step": 12298 }, { "epoch": 1.1038343317403694, "grad_norm": 0.48073499961258886, "learning_rate": 7.967955469319566e-06, "loss": 0.4069, "step": 12299 }, { "epoch": 1.1039240761930402, "grad_norm": 0.48486246289465224, "learning_rate": 7.96753524080914e-06, "loss": 0.3585, "step": 12300 }, { "epoch": 1.1040138206457113, "grad_norm": 0.4617405061437798, "learning_rate": 7.96711497993537e-06, "loss": 0.3444, "step": 12301 }, { "epoch": 1.1041035650983824, "grad_norm": 0.467446648158728, "learning_rate": 7.966694686702844e-06, "loss": 0.3736, "step": 12302 }, { "epoch": 1.1041933095510534, "grad_norm": 0.462509322522744, "learning_rate": 7.966274361116143e-06, "loss": 0.4124, "step": 12303 }, { "epoch": 1.1042830540037243, "grad_norm": 0.5007784548122077, "learning_rate": 7.965854003179855e-06, "loss": 0.3495, "step": 12304 }, { "epoch": 1.1043727984563954, "grad_norm": 0.4797106409061617, "learning_rate": 7.96543361289856e-06, "loss": 0.4671, "step": 12305 }, { "epoch": 1.1044625429090664, "grad_norm": 0.5168221487743444, "learning_rate": 7.965013190276844e-06, "loss": 0.465, "step": 12306 }, { "epoch": 1.1045522873617375, "grad_norm": 0.5116423672299043, "learning_rate": 7.964592735319295e-06, "loss": 0.3926, "step": 12307 }, { "epoch": 1.1046420318144086, "grad_norm": 0.4871241411163059, "learning_rate": 7.964172248030494e-06, "loss": 0.3835, "step": 12308 }, { "epoch": 1.1047317762670794, "grad_norm": 0.4847171997216867, "learning_rate": 7.963751728415028e-06, "loss": 0.3917, "step": 12309 }, { "epoch": 1.1048215207197505, "grad_norm": 0.48578610652859733, "learning_rate": 7.963331176477485e-06, "loss": 0.3731, "step": 12310 }, { "epoch": 1.1049112651724216, "grad_norm": 0.48364233745156343, "learning_rate": 7.962910592222449e-06, "loss": 0.3466, "step": 12311 }, { "epoch": 1.1050010096250926, "grad_norm": 0.4354358836269425, "learning_rate": 7.962489975654508e-06, "loss": 0.3934, "step": 12312 }, { "epoch": 1.1050907540777635, "grad_norm": 0.5361513827261482, "learning_rate": 7.96206932677825e-06, "loss": 0.3701, "step": 12313 }, { "epoch": 1.1051804985304345, "grad_norm": 0.43921808409616725, "learning_rate": 7.961648645598263e-06, "loss": 0.3697, "step": 12314 }, { "epoch": 1.1052702429831056, "grad_norm": 0.5526012476457803, "learning_rate": 7.961227932119131e-06, "loss": 0.4015, "step": 12315 }, { "epoch": 1.1053599874357767, "grad_norm": 0.4849708624947041, "learning_rate": 7.960807186345445e-06, "loss": 0.3978, "step": 12316 }, { "epoch": 1.1054497318884478, "grad_norm": 0.5155667479580105, "learning_rate": 7.960386408281794e-06, "loss": 0.437, "step": 12317 }, { "epoch": 1.1055394763411186, "grad_norm": 0.5005972117903883, "learning_rate": 7.959965597932766e-06, "loss": 0.3665, "step": 12318 }, { "epoch": 1.1056292207937897, "grad_norm": 0.5124497873979945, "learning_rate": 7.959544755302951e-06, "loss": 0.385, "step": 12319 }, { "epoch": 1.1057189652464607, "grad_norm": 0.49515862967173097, "learning_rate": 7.959123880396939e-06, "loss": 0.426, "step": 12320 }, { "epoch": 1.1058087096991318, "grad_norm": 0.4882836796413834, "learning_rate": 7.958702973219317e-06, "loss": 0.3709, "step": 12321 }, { "epoch": 1.1058984541518027, "grad_norm": 0.46708525687292013, "learning_rate": 7.958282033774679e-06, "loss": 0.3985, "step": 12322 }, { "epoch": 1.1059881986044737, "grad_norm": 0.4809738272064015, "learning_rate": 7.957861062067614e-06, "loss": 0.3996, "step": 12323 }, { "epoch": 1.1060779430571448, "grad_norm": 0.4825135938639987, "learning_rate": 7.957440058102712e-06, "loss": 0.3983, "step": 12324 }, { "epoch": 1.1061676875098159, "grad_norm": 0.5009020716600723, "learning_rate": 7.957019021884565e-06, "loss": 0.4322, "step": 12325 }, { "epoch": 1.1062574319624867, "grad_norm": 0.5274217149836007, "learning_rate": 7.956597953417766e-06, "loss": 0.36, "step": 12326 }, { "epoch": 1.1063471764151578, "grad_norm": 0.46230914454515765, "learning_rate": 7.956176852706907e-06, "loss": 0.3564, "step": 12327 }, { "epoch": 1.1064369208678289, "grad_norm": 0.4737589780568501, "learning_rate": 7.95575571975658e-06, "loss": 0.4256, "step": 12328 }, { "epoch": 1.1065266653205, "grad_norm": 0.5209824569073297, "learning_rate": 7.955334554571376e-06, "loss": 0.4206, "step": 12329 }, { "epoch": 1.106616409773171, "grad_norm": 0.5345249250980787, "learning_rate": 7.95491335715589e-06, "loss": 0.3706, "step": 12330 }, { "epoch": 1.1067061542258418, "grad_norm": 0.4793302267152588, "learning_rate": 7.954492127514716e-06, "loss": 0.4098, "step": 12331 }, { "epoch": 1.106795898678513, "grad_norm": 0.5380060757547724, "learning_rate": 7.954070865652448e-06, "loss": 0.4118, "step": 12332 }, { "epoch": 1.106885643131184, "grad_norm": 0.4658594573568245, "learning_rate": 7.953649571573678e-06, "loss": 0.4065, "step": 12333 }, { "epoch": 1.106975387583855, "grad_norm": 0.5196887465901474, "learning_rate": 7.953228245283002e-06, "loss": 0.3666, "step": 12334 }, { "epoch": 1.107065132036526, "grad_norm": 0.46404996211738414, "learning_rate": 7.952806886785016e-06, "loss": 0.3624, "step": 12335 }, { "epoch": 1.107154876489197, "grad_norm": 0.4856712622316157, "learning_rate": 7.952385496084311e-06, "loss": 0.3844, "step": 12336 }, { "epoch": 1.107244620941868, "grad_norm": 0.5061652582968535, "learning_rate": 7.951964073185487e-06, "loss": 0.4033, "step": 12337 }, { "epoch": 1.107334365394539, "grad_norm": 0.4774023254135211, "learning_rate": 7.951542618093138e-06, "loss": 0.3689, "step": 12338 }, { "epoch": 1.10742410984721, "grad_norm": 0.4879320290228729, "learning_rate": 7.951121130811863e-06, "loss": 0.3298, "step": 12339 }, { "epoch": 1.107513854299881, "grad_norm": 0.4530302577888192, "learning_rate": 7.950699611346254e-06, "loss": 0.3705, "step": 12340 }, { "epoch": 1.107603598752552, "grad_norm": 0.46677103346966575, "learning_rate": 7.95027805970091e-06, "loss": 0.3808, "step": 12341 }, { "epoch": 1.1076933432052232, "grad_norm": 0.5029945716672535, "learning_rate": 7.949856475880431e-06, "loss": 0.3995, "step": 12342 }, { "epoch": 1.1077830876578942, "grad_norm": 0.4604042749150163, "learning_rate": 7.949434859889413e-06, "loss": 0.3507, "step": 12343 }, { "epoch": 1.107872832110565, "grad_norm": 0.484973919049945, "learning_rate": 7.949013211732452e-06, "loss": 0.4254, "step": 12344 }, { "epoch": 1.1079625765632362, "grad_norm": 0.536762709164468, "learning_rate": 7.948591531414149e-06, "loss": 0.4276, "step": 12345 }, { "epoch": 1.1080523210159072, "grad_norm": 0.49038250796374877, "learning_rate": 7.9481698189391e-06, "loss": 0.3814, "step": 12346 }, { "epoch": 1.1081420654685783, "grad_norm": 0.48611605671813296, "learning_rate": 7.947748074311908e-06, "loss": 0.4164, "step": 12347 }, { "epoch": 1.1082318099212491, "grad_norm": 0.5297802266137419, "learning_rate": 7.947326297537167e-06, "loss": 0.4425, "step": 12348 }, { "epoch": 1.1083215543739202, "grad_norm": 0.5359321416207333, "learning_rate": 7.946904488619485e-06, "loss": 0.4636, "step": 12349 }, { "epoch": 1.1084112988265913, "grad_norm": 0.4903950409009364, "learning_rate": 7.946482647563454e-06, "loss": 0.369, "step": 12350 }, { "epoch": 1.1085010432792624, "grad_norm": 0.5281416395626127, "learning_rate": 7.946060774373679e-06, "loss": 0.4525, "step": 12351 }, { "epoch": 1.1085907877319334, "grad_norm": 0.46201345078711853, "learning_rate": 7.945638869054758e-06, "loss": 0.3284, "step": 12352 }, { "epoch": 1.1086805321846043, "grad_norm": 0.44705519781447767, "learning_rate": 7.945216931611296e-06, "loss": 0.3889, "step": 12353 }, { "epoch": 1.1087702766372753, "grad_norm": 0.5295317204742115, "learning_rate": 7.944794962047891e-06, "loss": 0.3748, "step": 12354 }, { "epoch": 1.1088600210899464, "grad_norm": 0.5354220558712458, "learning_rate": 7.944372960369147e-06, "loss": 0.3593, "step": 12355 }, { "epoch": 1.1089497655426175, "grad_norm": 0.491113066833326, "learning_rate": 7.943950926579665e-06, "loss": 0.4035, "step": 12356 }, { "epoch": 1.1090395099952883, "grad_norm": 0.45889987079922523, "learning_rate": 7.94352886068405e-06, "loss": 0.3855, "step": 12357 }, { "epoch": 1.1091292544479594, "grad_norm": 0.5173340363258988, "learning_rate": 7.943106762686902e-06, "loss": 0.4011, "step": 12358 }, { "epoch": 1.1092189989006305, "grad_norm": 0.45855968433218286, "learning_rate": 7.942684632592825e-06, "loss": 0.3536, "step": 12359 }, { "epoch": 1.1093087433533015, "grad_norm": 0.5094448268239671, "learning_rate": 7.942262470406423e-06, "loss": 0.4082, "step": 12360 }, { "epoch": 1.1093984878059726, "grad_norm": 0.4896164938174148, "learning_rate": 7.941840276132302e-06, "loss": 0.412, "step": 12361 }, { "epoch": 1.1094882322586435, "grad_norm": 0.4840631470799628, "learning_rate": 7.941418049775064e-06, "loss": 0.4552, "step": 12362 }, { "epoch": 1.1095779767113145, "grad_norm": 0.5053847097708238, "learning_rate": 7.940995791339315e-06, "loss": 0.4208, "step": 12363 }, { "epoch": 1.1096677211639856, "grad_norm": 0.541758253853422, "learning_rate": 7.940573500829658e-06, "loss": 0.4769, "step": 12364 }, { "epoch": 1.1097574656166567, "grad_norm": 0.5588486492691461, "learning_rate": 7.940151178250701e-06, "loss": 0.4184, "step": 12365 }, { "epoch": 1.1098472100693275, "grad_norm": 0.5164356660153865, "learning_rate": 7.939728823607046e-06, "loss": 0.4173, "step": 12366 }, { "epoch": 1.1099369545219986, "grad_norm": 0.47710924539438443, "learning_rate": 7.939306436903304e-06, "loss": 0.3787, "step": 12367 }, { "epoch": 1.1100266989746697, "grad_norm": 0.49378761539246785, "learning_rate": 7.93888401814408e-06, "loss": 0.4336, "step": 12368 }, { "epoch": 1.1101164434273407, "grad_norm": 0.4935342873030146, "learning_rate": 7.938461567333978e-06, "loss": 0.3775, "step": 12369 }, { "epoch": 1.1102061878800116, "grad_norm": 0.4866377254087856, "learning_rate": 7.938039084477607e-06, "loss": 0.4425, "step": 12370 }, { "epoch": 1.1102959323326826, "grad_norm": 0.47725769190009604, "learning_rate": 7.937616569579574e-06, "loss": 0.3928, "step": 12371 }, { "epoch": 1.1103856767853537, "grad_norm": 0.4859296198449414, "learning_rate": 7.93719402264449e-06, "loss": 0.4113, "step": 12372 }, { "epoch": 1.1104754212380248, "grad_norm": 0.4807586774377364, "learning_rate": 7.936771443676958e-06, "loss": 0.4317, "step": 12373 }, { "epoch": 1.1105651656906956, "grad_norm": 0.5302163533188053, "learning_rate": 7.93634883268159e-06, "loss": 0.4116, "step": 12374 }, { "epoch": 1.1106549101433667, "grad_norm": 0.5264280011116859, "learning_rate": 7.935926189662995e-06, "loss": 0.4344, "step": 12375 }, { "epoch": 1.1107446545960378, "grad_norm": 0.5182427538352891, "learning_rate": 7.935503514625782e-06, "loss": 0.503, "step": 12376 }, { "epoch": 1.1108343990487088, "grad_norm": 0.48371618309939135, "learning_rate": 7.93508080757456e-06, "loss": 0.4268, "step": 12377 }, { "epoch": 1.11092414350138, "grad_norm": 0.4913204997101598, "learning_rate": 7.93465806851394e-06, "loss": 0.3158, "step": 12378 }, { "epoch": 1.1110138879540508, "grad_norm": 0.49278304500324044, "learning_rate": 7.934235297448527e-06, "loss": 0.3942, "step": 12379 }, { "epoch": 1.1111036324067218, "grad_norm": 0.5074381886108671, "learning_rate": 7.93381249438294e-06, "loss": 0.4156, "step": 12380 }, { "epoch": 1.111193376859393, "grad_norm": 0.4963985295447363, "learning_rate": 7.933389659321784e-06, "loss": 0.4199, "step": 12381 }, { "epoch": 1.111283121312064, "grad_norm": 0.49363131777089514, "learning_rate": 7.932966792269672e-06, "loss": 0.3912, "step": 12382 }, { "epoch": 1.1113728657647348, "grad_norm": 0.46505075163984444, "learning_rate": 7.932543893231215e-06, "loss": 0.3325, "step": 12383 }, { "epoch": 1.1114626102174059, "grad_norm": 0.4559682763633077, "learning_rate": 7.93212096221103e-06, "loss": 0.4077, "step": 12384 }, { "epoch": 1.111552354670077, "grad_norm": 0.4592836291116882, "learning_rate": 7.931697999213722e-06, "loss": 0.3508, "step": 12385 }, { "epoch": 1.111642099122748, "grad_norm": 0.44229597161526535, "learning_rate": 7.93127500424391e-06, "loss": 0.4112, "step": 12386 }, { "epoch": 1.111731843575419, "grad_norm": 0.4931644942345363, "learning_rate": 7.930851977306203e-06, "loss": 0.3534, "step": 12387 }, { "epoch": 1.11182158802809, "grad_norm": 0.48426919603764407, "learning_rate": 7.930428918405213e-06, "loss": 0.4384, "step": 12388 }, { "epoch": 1.111911332480761, "grad_norm": 0.49151467130259563, "learning_rate": 7.930005827545559e-06, "loss": 0.4047, "step": 12389 }, { "epoch": 1.112001076933432, "grad_norm": 0.48830089296139967, "learning_rate": 7.929582704731854e-06, "loss": 0.3853, "step": 12390 }, { "epoch": 1.1120908213861032, "grad_norm": 0.4848000718697271, "learning_rate": 7.929159549968711e-06, "loss": 0.4109, "step": 12391 }, { "epoch": 1.112180565838774, "grad_norm": 0.48392330722910565, "learning_rate": 7.928736363260743e-06, "loss": 0.3984, "step": 12392 }, { "epoch": 1.112270310291445, "grad_norm": 0.5239265127844489, "learning_rate": 7.928313144612567e-06, "loss": 0.3877, "step": 12393 }, { "epoch": 1.1123600547441161, "grad_norm": 0.504414547753653, "learning_rate": 7.927889894028802e-06, "loss": 0.4291, "step": 12394 }, { "epoch": 1.1124497991967872, "grad_norm": 0.4900676236879191, "learning_rate": 7.927466611514058e-06, "loss": 0.3781, "step": 12395 }, { "epoch": 1.1125395436494583, "grad_norm": 0.48608622004826224, "learning_rate": 7.927043297072954e-06, "loss": 0.4396, "step": 12396 }, { "epoch": 1.1126292881021291, "grad_norm": 0.48674384735184695, "learning_rate": 7.926619950710107e-06, "loss": 0.4055, "step": 12397 }, { "epoch": 1.1127190325548002, "grad_norm": 0.5425300675125365, "learning_rate": 7.926196572430134e-06, "loss": 0.4699, "step": 12398 }, { "epoch": 1.1128087770074713, "grad_norm": 0.4605433915623535, "learning_rate": 7.92577316223765e-06, "loss": 0.3945, "step": 12399 }, { "epoch": 1.1128985214601423, "grad_norm": 0.48187302282823075, "learning_rate": 7.925349720137277e-06, "loss": 0.3271, "step": 12400 }, { "epoch": 1.1129882659128132, "grad_norm": 0.46344117578999017, "learning_rate": 7.924926246133628e-06, "loss": 0.3628, "step": 12401 }, { "epoch": 1.1130780103654843, "grad_norm": 0.5068829509968943, "learning_rate": 7.924502740231324e-06, "loss": 0.4667, "step": 12402 }, { "epoch": 1.1131677548181553, "grad_norm": 0.47617498276388426, "learning_rate": 7.924079202434983e-06, "loss": 0.3685, "step": 12403 }, { "epoch": 1.1132574992708264, "grad_norm": 0.5113136235768939, "learning_rate": 7.923655632749226e-06, "loss": 0.4133, "step": 12404 }, { "epoch": 1.1133472437234972, "grad_norm": 0.4656856710840826, "learning_rate": 7.923232031178668e-06, "loss": 0.3668, "step": 12405 }, { "epoch": 1.1134369881761683, "grad_norm": 0.4593954772808713, "learning_rate": 7.922808397727932e-06, "loss": 0.3601, "step": 12406 }, { "epoch": 1.1135267326288394, "grad_norm": 0.48468529063763427, "learning_rate": 7.922384732401639e-06, "loss": 0.3592, "step": 12407 }, { "epoch": 1.1136164770815105, "grad_norm": 0.47233348249543045, "learning_rate": 7.921961035204407e-06, "loss": 0.4097, "step": 12408 }, { "epoch": 1.1137062215341813, "grad_norm": 0.5021333347524054, "learning_rate": 7.921537306140858e-06, "loss": 0.4845, "step": 12409 }, { "epoch": 1.1137959659868524, "grad_norm": 0.5662264960565863, "learning_rate": 7.921113545215611e-06, "loss": 0.4047, "step": 12410 }, { "epoch": 1.1138857104395234, "grad_norm": 0.4829738536754182, "learning_rate": 7.92068975243329e-06, "loss": 0.4125, "step": 12411 }, { "epoch": 1.1139754548921945, "grad_norm": 0.5162549450447738, "learning_rate": 7.920265927798517e-06, "loss": 0.4084, "step": 12412 }, { "epoch": 1.1140651993448656, "grad_norm": 0.5017401218397476, "learning_rate": 7.919842071315913e-06, "loss": 0.4082, "step": 12413 }, { "epoch": 1.1141549437975364, "grad_norm": 0.5050656602048961, "learning_rate": 7.9194181829901e-06, "loss": 0.4074, "step": 12414 }, { "epoch": 1.1142446882502075, "grad_norm": 0.502761376447607, "learning_rate": 7.918994262825701e-06, "loss": 0.4101, "step": 12415 }, { "epoch": 1.1143344327028786, "grad_norm": 0.5042198895988513, "learning_rate": 7.91857031082734e-06, "loss": 0.4338, "step": 12416 }, { "epoch": 1.1144241771555496, "grad_norm": 0.5069808805479712, "learning_rate": 7.918146326999641e-06, "loss": 0.4132, "step": 12417 }, { "epoch": 1.1145139216082205, "grad_norm": 0.5157983582028806, "learning_rate": 7.917722311347225e-06, "loss": 0.425, "step": 12418 }, { "epoch": 1.1146036660608916, "grad_norm": 0.5454914175683441, "learning_rate": 7.917298263874718e-06, "loss": 0.4134, "step": 12419 }, { "epoch": 1.1146934105135626, "grad_norm": 0.44075831008352906, "learning_rate": 7.916874184586747e-06, "loss": 0.3775, "step": 12420 }, { "epoch": 1.1147831549662337, "grad_norm": 0.4803429237025411, "learning_rate": 7.916450073487934e-06, "loss": 0.3956, "step": 12421 }, { "epoch": 1.1148728994189048, "grad_norm": 0.5108462719605569, "learning_rate": 7.916025930582905e-06, "loss": 0.4004, "step": 12422 }, { "epoch": 1.1149626438715756, "grad_norm": 0.472332060264834, "learning_rate": 7.915601755876285e-06, "loss": 0.3658, "step": 12423 }, { "epoch": 1.1150523883242467, "grad_norm": 0.42609868271424894, "learning_rate": 7.915177549372702e-06, "loss": 0.3436, "step": 12424 }, { "epoch": 1.1151421327769178, "grad_norm": 0.45042278621969456, "learning_rate": 7.914753311076779e-06, "loss": 0.3481, "step": 12425 }, { "epoch": 1.1152318772295888, "grad_norm": 0.4762604665316842, "learning_rate": 7.914329040993145e-06, "loss": 0.3731, "step": 12426 }, { "epoch": 1.1153216216822597, "grad_norm": 0.4316877932291348, "learning_rate": 7.913904739126428e-06, "loss": 0.3505, "step": 12427 }, { "epoch": 1.1154113661349307, "grad_norm": 0.48976518380165684, "learning_rate": 7.913480405481253e-06, "loss": 0.4556, "step": 12428 }, { "epoch": 1.1155011105876018, "grad_norm": 0.5414408838485064, "learning_rate": 7.91305604006225e-06, "loss": 0.4565, "step": 12429 }, { "epoch": 1.1155908550402729, "grad_norm": 0.4880560576922183, "learning_rate": 7.912631642874042e-06, "loss": 0.3786, "step": 12430 }, { "epoch": 1.115680599492944, "grad_norm": 0.5057114214290946, "learning_rate": 7.912207213921265e-06, "loss": 0.4396, "step": 12431 }, { "epoch": 1.1157703439456148, "grad_norm": 0.5286269060958125, "learning_rate": 7.911782753208541e-06, "loss": 0.4093, "step": 12432 }, { "epoch": 1.1158600883982859, "grad_norm": 0.46283721961483887, "learning_rate": 7.911358260740504e-06, "loss": 0.3365, "step": 12433 }, { "epoch": 1.115949832850957, "grad_norm": 0.48518117330231225, "learning_rate": 7.91093373652178e-06, "loss": 0.3249, "step": 12434 }, { "epoch": 1.116039577303628, "grad_norm": 0.42369496435666215, "learning_rate": 7.910509180557e-06, "loss": 0.3042, "step": 12435 }, { "epoch": 1.1161293217562989, "grad_norm": 0.46778139642968836, "learning_rate": 7.910084592850795e-06, "loss": 0.4388, "step": 12436 }, { "epoch": 1.11621906620897, "grad_norm": 0.5370883712168163, "learning_rate": 7.909659973407796e-06, "loss": 0.4263, "step": 12437 }, { "epoch": 1.116308810661641, "grad_norm": 0.483640730045052, "learning_rate": 7.909235322232629e-06, "loss": 0.4048, "step": 12438 }, { "epoch": 1.116398555114312, "grad_norm": 0.5234213142057061, "learning_rate": 7.908810639329932e-06, "loss": 0.4244, "step": 12439 }, { "epoch": 1.116488299566983, "grad_norm": 0.5043755158649068, "learning_rate": 7.90838592470433e-06, "loss": 0.3886, "step": 12440 }, { "epoch": 1.116578044019654, "grad_norm": 0.5087988092707899, "learning_rate": 7.907961178360459e-06, "loss": 0.4649, "step": 12441 }, { "epoch": 1.116667788472325, "grad_norm": 0.503951808651562, "learning_rate": 7.907536400302952e-06, "loss": 0.3283, "step": 12442 }, { "epoch": 1.1167575329249961, "grad_norm": 0.4331724757332172, "learning_rate": 7.907111590536439e-06, "loss": 0.3338, "step": 12443 }, { "epoch": 1.116847277377667, "grad_norm": 0.48602123316869533, "learning_rate": 7.90668674906555e-06, "loss": 0.4223, "step": 12444 }, { "epoch": 1.116937021830338, "grad_norm": 0.4859952889540568, "learning_rate": 7.906261875894927e-06, "loss": 0.3622, "step": 12445 }, { "epoch": 1.1170267662830091, "grad_norm": 0.5027109309514864, "learning_rate": 7.905836971029196e-06, "loss": 0.3777, "step": 12446 }, { "epoch": 1.1171165107356802, "grad_norm": 0.4983513674446714, "learning_rate": 7.905412034472994e-06, "loss": 0.4488, "step": 12447 }, { "epoch": 1.1172062551883513, "grad_norm": 0.5536917884041981, "learning_rate": 7.904987066230954e-06, "loss": 0.4347, "step": 12448 }, { "epoch": 1.117295999641022, "grad_norm": 0.49843072296256125, "learning_rate": 7.904562066307713e-06, "loss": 0.4318, "step": 12449 }, { "epoch": 1.1173857440936932, "grad_norm": 0.5190193691224418, "learning_rate": 7.904137034707902e-06, "loss": 0.4278, "step": 12450 }, { "epoch": 1.1174754885463642, "grad_norm": 0.48385763421008865, "learning_rate": 7.903711971436158e-06, "loss": 0.3934, "step": 12451 }, { "epoch": 1.1175652329990353, "grad_norm": 0.4796614531102946, "learning_rate": 7.90328687649712e-06, "loss": 0.3801, "step": 12452 }, { "epoch": 1.1176549774517062, "grad_norm": 0.5087481393510483, "learning_rate": 7.902861749895419e-06, "loss": 0.3842, "step": 12453 }, { "epoch": 1.1177447219043772, "grad_norm": 0.5030965311638389, "learning_rate": 7.902436591635696e-06, "loss": 0.4436, "step": 12454 }, { "epoch": 1.1178344663570483, "grad_norm": 0.4931014258420925, "learning_rate": 7.902011401722582e-06, "loss": 0.4127, "step": 12455 }, { "epoch": 1.1179242108097194, "grad_norm": 0.5425143651403863, "learning_rate": 7.90158618016072e-06, "loss": 0.3967, "step": 12456 }, { "epoch": 1.1180139552623904, "grad_norm": 0.47968863466486905, "learning_rate": 7.901160926954743e-06, "loss": 0.4039, "step": 12457 }, { "epoch": 1.1181036997150613, "grad_norm": 0.5118107069393915, "learning_rate": 7.90073564210929e-06, "loss": 0.4037, "step": 12458 }, { "epoch": 1.1181934441677324, "grad_norm": 0.48904687318847717, "learning_rate": 7.900310325629001e-06, "loss": 0.387, "step": 12459 }, { "epoch": 1.1182831886204034, "grad_norm": 0.4786234435624262, "learning_rate": 7.899884977518513e-06, "loss": 0.403, "step": 12460 }, { "epoch": 1.1183729330730745, "grad_norm": 0.5066879084079557, "learning_rate": 7.899459597782461e-06, "loss": 0.4531, "step": 12461 }, { "epoch": 1.1184626775257454, "grad_norm": 0.4905099792116992, "learning_rate": 7.899034186425492e-06, "loss": 0.382, "step": 12462 }, { "epoch": 1.1185524219784164, "grad_norm": 0.46412877080637505, "learning_rate": 7.89860874345224e-06, "loss": 0.3919, "step": 12463 }, { "epoch": 1.1186421664310875, "grad_norm": 0.47733916689952954, "learning_rate": 7.898183268867345e-06, "loss": 0.4678, "step": 12464 }, { "epoch": 1.1187319108837586, "grad_norm": 0.5053916064529099, "learning_rate": 7.897757762675449e-06, "loss": 0.4294, "step": 12465 }, { "epoch": 1.1188216553364296, "grad_norm": 0.557653656625778, "learning_rate": 7.897332224881192e-06, "loss": 0.4077, "step": 12466 }, { "epoch": 1.1189113997891005, "grad_norm": 0.47397445039695124, "learning_rate": 7.896906655489213e-06, "loss": 0.3677, "step": 12467 }, { "epoch": 1.1190011442417716, "grad_norm": 0.49239988983768196, "learning_rate": 7.896481054504155e-06, "loss": 0.3781, "step": 12468 }, { "epoch": 1.1190908886944426, "grad_norm": 0.4778437278333693, "learning_rate": 7.89605542193066e-06, "loss": 0.4193, "step": 12469 }, { "epoch": 1.1191806331471137, "grad_norm": 0.5087320118498656, "learning_rate": 7.895629757773368e-06, "loss": 0.4229, "step": 12470 }, { "epoch": 1.1192703775997845, "grad_norm": 0.4983943185370039, "learning_rate": 7.895204062036921e-06, "loss": 0.3885, "step": 12471 }, { "epoch": 1.1193601220524556, "grad_norm": 0.49708073387839247, "learning_rate": 7.894778334725966e-06, "loss": 0.4135, "step": 12472 }, { "epoch": 1.1194498665051267, "grad_norm": 0.5143281257270672, "learning_rate": 7.89435257584514e-06, "loss": 0.4163, "step": 12473 }, { "epoch": 1.1195396109577977, "grad_norm": 0.5217000076318579, "learning_rate": 7.89392678539909e-06, "loss": 0.498, "step": 12474 }, { "epoch": 1.1196293554104686, "grad_norm": 0.5265927485835976, "learning_rate": 7.893500963392457e-06, "loss": 0.4087, "step": 12475 }, { "epoch": 1.1197190998631397, "grad_norm": 0.5139315455436216, "learning_rate": 7.893075109829889e-06, "loss": 0.4593, "step": 12476 }, { "epoch": 1.1198088443158107, "grad_norm": 0.5333963046808244, "learning_rate": 7.892649224716025e-06, "loss": 0.5095, "step": 12477 }, { "epoch": 1.1198985887684818, "grad_norm": 0.4867408656606103, "learning_rate": 7.892223308055514e-06, "loss": 0.386, "step": 12478 }, { "epoch": 1.1199883332211527, "grad_norm": 0.483131814119594, "learning_rate": 7.891797359853e-06, "loss": 0.3596, "step": 12479 }, { "epoch": 1.1200780776738237, "grad_norm": 0.46567198240849333, "learning_rate": 7.891371380113124e-06, "loss": 0.3787, "step": 12480 }, { "epoch": 1.1201678221264948, "grad_norm": 0.46866599037404183, "learning_rate": 7.890945368840538e-06, "loss": 0.4464, "step": 12481 }, { "epoch": 1.1202575665791659, "grad_norm": 0.5438720566916176, "learning_rate": 7.890519326039885e-06, "loss": 0.3973, "step": 12482 }, { "epoch": 1.120347311031837, "grad_norm": 0.4912332699567678, "learning_rate": 7.890093251715812e-06, "loss": 0.3916, "step": 12483 }, { "epoch": 1.1204370554845078, "grad_norm": 0.4403587294449236, "learning_rate": 7.889667145872963e-06, "loss": 0.3681, "step": 12484 }, { "epoch": 1.1205267999371789, "grad_norm": 0.502618106941781, "learning_rate": 7.889241008515989e-06, "loss": 0.4502, "step": 12485 }, { "epoch": 1.12061654438985, "grad_norm": 0.500712716168991, "learning_rate": 7.888814839649535e-06, "loss": 0.4067, "step": 12486 }, { "epoch": 1.120706288842521, "grad_norm": 0.49933179303788156, "learning_rate": 7.888388639278248e-06, "loss": 0.4372, "step": 12487 }, { "epoch": 1.1207960332951918, "grad_norm": 0.49800647969329664, "learning_rate": 7.88796240740678e-06, "loss": 0.3215, "step": 12488 }, { "epoch": 1.120885777747863, "grad_norm": 0.4898130144178049, "learning_rate": 7.887536144039774e-06, "loss": 0.4224, "step": 12489 }, { "epoch": 1.120975522200534, "grad_norm": 0.4636877766986288, "learning_rate": 7.887109849181884e-06, "loss": 0.3718, "step": 12490 }, { "epoch": 1.121065266653205, "grad_norm": 0.49904225534284774, "learning_rate": 7.886683522837755e-06, "loss": 0.4164, "step": 12491 }, { "epoch": 1.1211550111058761, "grad_norm": 0.5745481577427096, "learning_rate": 7.886257165012039e-06, "loss": 0.4289, "step": 12492 }, { "epoch": 1.121244755558547, "grad_norm": 0.4965166900430739, "learning_rate": 7.885830775709384e-06, "loss": 0.3989, "step": 12493 }, { "epoch": 1.121334500011218, "grad_norm": 0.5153231277857011, "learning_rate": 7.885404354934442e-06, "loss": 0.4419, "step": 12494 }, { "epoch": 1.121424244463889, "grad_norm": 0.48783127447032143, "learning_rate": 7.884977902691862e-06, "loss": 0.4227, "step": 12495 }, { "epoch": 1.1215139889165602, "grad_norm": 0.5218608649442708, "learning_rate": 7.884551418986294e-06, "loss": 0.3882, "step": 12496 }, { "epoch": 1.121603733369231, "grad_norm": 0.5456201296562097, "learning_rate": 7.884124903822392e-06, "loss": 0.4326, "step": 12497 }, { "epoch": 1.121693477821902, "grad_norm": 0.46082537521685835, "learning_rate": 7.883698357204804e-06, "loss": 0.3644, "step": 12498 }, { "epoch": 1.1217832222745732, "grad_norm": 0.4608828850579874, "learning_rate": 7.883271779138185e-06, "loss": 0.3445, "step": 12499 }, { "epoch": 1.1218729667272442, "grad_norm": 0.4819125527778415, "learning_rate": 7.882845169627184e-06, "loss": 0.3619, "step": 12500 }, { "epoch": 1.1219627111799153, "grad_norm": 0.4790579711755725, "learning_rate": 7.882418528676458e-06, "loss": 0.4792, "step": 12501 }, { "epoch": 1.1220524556325862, "grad_norm": 0.5126334021479889, "learning_rate": 7.881991856290655e-06, "loss": 0.3584, "step": 12502 }, { "epoch": 1.1221422000852572, "grad_norm": 0.45188978554538556, "learning_rate": 7.881565152474431e-06, "loss": 0.3757, "step": 12503 }, { "epoch": 1.1222319445379283, "grad_norm": 0.5273327488024775, "learning_rate": 7.881138417232438e-06, "loss": 0.4342, "step": 12504 }, { "epoch": 1.1223216889905994, "grad_norm": 0.5414108790837437, "learning_rate": 7.880711650569334e-06, "loss": 0.4211, "step": 12505 }, { "epoch": 1.1224114334432702, "grad_norm": 0.45419603165470873, "learning_rate": 7.880284852489768e-06, "loss": 0.4013, "step": 12506 }, { "epoch": 1.1225011778959413, "grad_norm": 0.5086896761319466, "learning_rate": 7.879858022998397e-06, "loss": 0.3685, "step": 12507 }, { "epoch": 1.1225909223486124, "grad_norm": 0.4621770313278056, "learning_rate": 7.879431162099874e-06, "loss": 0.3393, "step": 12508 }, { "epoch": 1.1226806668012834, "grad_norm": 0.4621153253488302, "learning_rate": 7.879004269798858e-06, "loss": 0.427, "step": 12509 }, { "epoch": 1.1227704112539543, "grad_norm": 0.5000752916624923, "learning_rate": 7.8785773461e-06, "loss": 0.3836, "step": 12510 }, { "epoch": 1.1228601557066253, "grad_norm": 0.47346788601448775, "learning_rate": 7.87815039100796e-06, "loss": 0.3942, "step": 12511 }, { "epoch": 1.1229499001592964, "grad_norm": 0.43939160885688916, "learning_rate": 7.877723404527391e-06, "loss": 0.3666, "step": 12512 }, { "epoch": 1.1230396446119675, "grad_norm": 0.5467216902359382, "learning_rate": 7.877296386662953e-06, "loss": 0.4246, "step": 12513 }, { "epoch": 1.1231293890646383, "grad_norm": 0.5051470971541024, "learning_rate": 7.876869337419302e-06, "loss": 0.418, "step": 12514 }, { "epoch": 1.1232191335173094, "grad_norm": 0.4645418005679307, "learning_rate": 7.876442256801094e-06, "loss": 0.4183, "step": 12515 }, { "epoch": 1.1233088779699805, "grad_norm": 0.5302692020764923, "learning_rate": 7.876015144812984e-06, "loss": 0.4081, "step": 12516 }, { "epoch": 1.1233986224226515, "grad_norm": 0.46335585847276756, "learning_rate": 7.875588001459638e-06, "loss": 0.3421, "step": 12517 }, { "epoch": 1.1234883668753226, "grad_norm": 0.4668296889472278, "learning_rate": 7.875160826745705e-06, "loss": 0.4272, "step": 12518 }, { "epoch": 1.1235781113279935, "grad_norm": 0.5143693648479419, "learning_rate": 7.874733620675852e-06, "loss": 0.4485, "step": 12519 }, { "epoch": 1.1236678557806645, "grad_norm": 0.5082450451039587, "learning_rate": 7.874306383254733e-06, "loss": 0.3818, "step": 12520 }, { "epoch": 1.1237576002333356, "grad_norm": 0.4999082386025772, "learning_rate": 7.873879114487006e-06, "loss": 0.3651, "step": 12521 }, { "epoch": 1.1238473446860067, "grad_norm": 0.4480346657667053, "learning_rate": 7.873451814377336e-06, "loss": 0.378, "step": 12522 }, { "epoch": 1.1239370891386775, "grad_norm": 0.4895092264979575, "learning_rate": 7.87302448293038e-06, "loss": 0.4231, "step": 12523 }, { "epoch": 1.1240268335913486, "grad_norm": 0.4738577210236731, "learning_rate": 7.872597120150798e-06, "loss": 0.4817, "step": 12524 }, { "epoch": 1.1241165780440197, "grad_norm": 0.5318852899274786, "learning_rate": 7.872169726043253e-06, "loss": 0.3675, "step": 12525 }, { "epoch": 1.1242063224966907, "grad_norm": 0.4983483583957981, "learning_rate": 7.871742300612404e-06, "loss": 0.3705, "step": 12526 }, { "epoch": 1.1242960669493618, "grad_norm": 0.5087127125883759, "learning_rate": 7.871314843862913e-06, "loss": 0.4596, "step": 12527 }, { "epoch": 1.1243858114020326, "grad_norm": 0.4784508601319811, "learning_rate": 7.870887355799441e-06, "loss": 0.3751, "step": 12528 }, { "epoch": 1.1244755558547037, "grad_norm": 0.4405683071352224, "learning_rate": 7.870459836426651e-06, "loss": 0.3646, "step": 12529 }, { "epoch": 1.1245653003073748, "grad_norm": 0.5026594619219639, "learning_rate": 7.870032285749206e-06, "loss": 0.3921, "step": 12530 }, { "epoch": 1.1246550447600459, "grad_norm": 0.5567964911708398, "learning_rate": 7.869604703771767e-06, "loss": 0.3789, "step": 12531 }, { "epoch": 1.1247447892127167, "grad_norm": 0.47384522311691013, "learning_rate": 7.869177090498999e-06, "loss": 0.3746, "step": 12532 }, { "epoch": 1.1248345336653878, "grad_norm": 0.47679867251054664, "learning_rate": 7.868749445935564e-06, "loss": 0.3614, "step": 12533 }, { "epoch": 1.1249242781180588, "grad_norm": 0.44448440060242855, "learning_rate": 7.868321770086128e-06, "loss": 0.3758, "step": 12534 }, { "epoch": 1.12501402257073, "grad_norm": 0.4833077044631259, "learning_rate": 7.86789406295535e-06, "loss": 0.3995, "step": 12535 }, { "epoch": 1.125103767023401, "grad_norm": 0.4884827690098929, "learning_rate": 7.867466324547901e-06, "loss": 0.4138, "step": 12536 }, { "epoch": 1.1251935114760718, "grad_norm": 0.4863197056428893, "learning_rate": 7.867038554868443e-06, "loss": 0.4026, "step": 12537 }, { "epoch": 1.125283255928743, "grad_norm": 0.4740908858096583, "learning_rate": 7.86661075392164e-06, "loss": 0.3835, "step": 12538 }, { "epoch": 1.125373000381414, "grad_norm": 0.4661429870761776, "learning_rate": 7.866182921712158e-06, "loss": 0.3718, "step": 12539 }, { "epoch": 1.125462744834085, "grad_norm": 0.495635032009815, "learning_rate": 7.865755058244664e-06, "loss": 0.4963, "step": 12540 }, { "epoch": 1.1255524892867559, "grad_norm": 0.5040173738445461, "learning_rate": 7.865327163523823e-06, "loss": 0.3992, "step": 12541 }, { "epoch": 1.125642233739427, "grad_norm": 0.47967793615710835, "learning_rate": 7.864899237554304e-06, "loss": 0.3897, "step": 12542 }, { "epoch": 1.125731978192098, "grad_norm": 0.4588478444732364, "learning_rate": 7.864471280340771e-06, "loss": 0.3936, "step": 12543 }, { "epoch": 1.125821722644769, "grad_norm": 0.486290270984009, "learning_rate": 7.864043291887893e-06, "loss": 0.3631, "step": 12544 }, { "epoch": 1.12591146709744, "grad_norm": 0.5022153537836616, "learning_rate": 7.863615272200336e-06, "loss": 0.4461, "step": 12545 }, { "epoch": 1.126001211550111, "grad_norm": 0.5281993732889694, "learning_rate": 7.863187221282769e-06, "loss": 0.3861, "step": 12546 }, { "epoch": 1.126090956002782, "grad_norm": 0.4901829935246271, "learning_rate": 7.862759139139859e-06, "loss": 0.4601, "step": 12547 }, { "epoch": 1.1261807004554532, "grad_norm": 0.5088420257671469, "learning_rate": 7.862331025776277e-06, "loss": 0.3946, "step": 12548 }, { "epoch": 1.126270444908124, "grad_norm": 0.4895038094356917, "learning_rate": 7.861902881196688e-06, "loss": 0.3454, "step": 12549 }, { "epoch": 1.126360189360795, "grad_norm": 0.462129207222396, "learning_rate": 7.861474705405767e-06, "loss": 0.4197, "step": 12550 }, { "epoch": 1.1264499338134661, "grad_norm": 0.520057223553573, "learning_rate": 7.861046498408178e-06, "loss": 0.3812, "step": 12551 }, { "epoch": 1.1265396782661372, "grad_norm": 0.5147842964114288, "learning_rate": 7.860618260208594e-06, "loss": 0.4419, "step": 12552 }, { "epoch": 1.1266294227188083, "grad_norm": 0.5013764590962619, "learning_rate": 7.860189990811684e-06, "loss": 0.4084, "step": 12553 }, { "epoch": 1.1267191671714791, "grad_norm": 0.5190877516007023, "learning_rate": 7.859761690222121e-06, "loss": 0.4516, "step": 12554 }, { "epoch": 1.1268089116241502, "grad_norm": 0.5139014061060019, "learning_rate": 7.859333358444572e-06, "loss": 0.3851, "step": 12555 }, { "epoch": 1.1268986560768213, "grad_norm": 0.4462938982549704, "learning_rate": 7.85890499548371e-06, "loss": 0.352, "step": 12556 }, { "epoch": 1.1269884005294923, "grad_norm": 0.46690559035023516, "learning_rate": 7.858476601344209e-06, "loss": 0.4753, "step": 12557 }, { "epoch": 1.1270781449821632, "grad_norm": 0.48506472856230665, "learning_rate": 7.858048176030737e-06, "loss": 0.3385, "step": 12558 }, { "epoch": 1.1271678894348343, "grad_norm": 0.4681163266974367, "learning_rate": 7.857619719547971e-06, "loss": 0.3748, "step": 12559 }, { "epoch": 1.1272576338875053, "grad_norm": 0.5115576980206221, "learning_rate": 7.857191231900579e-06, "loss": 0.4294, "step": 12560 }, { "epoch": 1.1273473783401764, "grad_norm": 0.4850802856119461, "learning_rate": 7.856762713093235e-06, "loss": 0.3852, "step": 12561 }, { "epoch": 1.1274371227928475, "grad_norm": 0.5083381549818692, "learning_rate": 7.856334163130615e-06, "loss": 0.3759, "step": 12562 }, { "epoch": 1.1275268672455183, "grad_norm": 0.5315316847727326, "learning_rate": 7.855905582017391e-06, "loss": 0.4725, "step": 12563 }, { "epoch": 1.1276166116981894, "grad_norm": 0.48562329899170487, "learning_rate": 7.855476969758237e-06, "loss": 0.3724, "step": 12564 }, { "epoch": 1.1277063561508605, "grad_norm": 0.49198467068924356, "learning_rate": 7.855048326357825e-06, "loss": 0.3331, "step": 12565 }, { "epoch": 1.1277961006035315, "grad_norm": 0.4580237374753682, "learning_rate": 7.854619651820836e-06, "loss": 0.388, "step": 12566 }, { "epoch": 1.1278858450562024, "grad_norm": 0.46986530420645145, "learning_rate": 7.85419094615194e-06, "loss": 0.3808, "step": 12567 }, { "epoch": 1.1279755895088734, "grad_norm": 0.4759939191691387, "learning_rate": 7.853762209355811e-06, "loss": 0.3838, "step": 12568 }, { "epoch": 1.1280653339615445, "grad_norm": 0.48922203825132426, "learning_rate": 7.85333344143713e-06, "loss": 0.4204, "step": 12569 }, { "epoch": 1.1281550784142156, "grad_norm": 0.536029908407342, "learning_rate": 7.85290464240057e-06, "loss": 0.4206, "step": 12570 }, { "epoch": 1.1282448228668867, "grad_norm": 0.5292051615709873, "learning_rate": 7.852475812250806e-06, "loss": 0.4306, "step": 12571 }, { "epoch": 1.1283345673195575, "grad_norm": 0.4641265242876082, "learning_rate": 7.852046950992517e-06, "loss": 0.3645, "step": 12572 }, { "epoch": 1.1284243117722286, "grad_norm": 0.506147485442367, "learning_rate": 7.85161805863038e-06, "loss": 0.4331, "step": 12573 }, { "epoch": 1.1285140562248996, "grad_norm": 0.45483803162073977, "learning_rate": 7.851189135169072e-06, "loss": 0.3214, "step": 12574 }, { "epoch": 1.1286038006775707, "grad_norm": 0.48793093824028116, "learning_rate": 7.85076018061327e-06, "loss": 0.4197, "step": 12575 }, { "epoch": 1.1286935451302416, "grad_norm": 0.5101877933898019, "learning_rate": 7.850331194967653e-06, "loss": 0.4094, "step": 12576 }, { "epoch": 1.1287832895829126, "grad_norm": 0.4967953796839714, "learning_rate": 7.849902178236899e-06, "loss": 0.4213, "step": 12577 }, { "epoch": 1.1288730340355837, "grad_norm": 0.49403986377325493, "learning_rate": 7.849473130425687e-06, "loss": 0.3761, "step": 12578 }, { "epoch": 1.1289627784882548, "grad_norm": 0.4871852362625671, "learning_rate": 7.849044051538696e-06, "loss": 0.4171, "step": 12579 }, { "epoch": 1.1290525229409256, "grad_norm": 0.5252271765725267, "learning_rate": 7.848614941580606e-06, "loss": 0.4029, "step": 12580 }, { "epoch": 1.1291422673935967, "grad_norm": 0.4937657392179354, "learning_rate": 7.848185800556096e-06, "loss": 0.443, "step": 12581 }, { "epoch": 1.1292320118462678, "grad_norm": 0.4787813275052412, "learning_rate": 7.847756628469847e-06, "loss": 0.3675, "step": 12582 }, { "epoch": 1.1293217562989388, "grad_norm": 0.5025387161428951, "learning_rate": 7.847327425326538e-06, "loss": 0.3626, "step": 12583 }, { "epoch": 1.1294115007516097, "grad_norm": 0.5012008149101195, "learning_rate": 7.84689819113085e-06, "loss": 0.4491, "step": 12584 }, { "epoch": 1.1295012452042807, "grad_norm": 0.5050884086766095, "learning_rate": 7.846468925887467e-06, "loss": 0.4794, "step": 12585 }, { "epoch": 1.1295909896569518, "grad_norm": 0.5349692280243286, "learning_rate": 7.846039629601066e-06, "loss": 0.4151, "step": 12586 }, { "epoch": 1.1296807341096229, "grad_norm": 0.45566177077999614, "learning_rate": 7.845610302276332e-06, "loss": 0.3903, "step": 12587 }, { "epoch": 1.129770478562294, "grad_norm": 0.46113414850825224, "learning_rate": 7.845180943917947e-06, "loss": 0.3666, "step": 12588 }, { "epoch": 1.1298602230149648, "grad_norm": 0.4858256773924667, "learning_rate": 7.844751554530593e-06, "loss": 0.4034, "step": 12589 }, { "epoch": 1.1299499674676359, "grad_norm": 0.4618467269154096, "learning_rate": 7.84432213411895e-06, "loss": 0.3445, "step": 12590 }, { "epoch": 1.130039711920307, "grad_norm": 0.4749886725868024, "learning_rate": 7.843892682687706e-06, "loss": 0.4418, "step": 12591 }, { "epoch": 1.130129456372978, "grad_norm": 0.5165314065026463, "learning_rate": 7.84346320024154e-06, "loss": 0.3709, "step": 12592 }, { "epoch": 1.1302192008256489, "grad_norm": 0.46728874447804514, "learning_rate": 7.843033686785142e-06, "loss": 0.3907, "step": 12593 }, { "epoch": 1.13030894527832, "grad_norm": 0.5003834603092193, "learning_rate": 7.84260414232319e-06, "loss": 0.4483, "step": 12594 }, { "epoch": 1.130398689730991, "grad_norm": 0.5005870643927218, "learning_rate": 7.84217456686037e-06, "loss": 0.4277, "step": 12595 }, { "epoch": 1.130488434183662, "grad_norm": 0.5042965977701, "learning_rate": 7.84174496040137e-06, "loss": 0.3987, "step": 12596 }, { "epoch": 1.1305781786363331, "grad_norm": 0.4742534279967006, "learning_rate": 7.84131532295087e-06, "loss": 0.4025, "step": 12597 }, { "epoch": 1.130667923089004, "grad_norm": 0.489384049500868, "learning_rate": 7.84088565451356e-06, "loss": 0.3895, "step": 12598 }, { "epoch": 1.130757667541675, "grad_norm": 0.4964828040725902, "learning_rate": 7.840455955094125e-06, "loss": 0.3776, "step": 12599 }, { "epoch": 1.1308474119943461, "grad_norm": 0.4817550231154259, "learning_rate": 7.84002622469725e-06, "loss": 0.4414, "step": 12600 }, { "epoch": 1.1309371564470172, "grad_norm": 0.5431848327906106, "learning_rate": 7.839596463327623e-06, "loss": 0.4145, "step": 12601 }, { "epoch": 1.131026900899688, "grad_norm": 0.5265197553299, "learning_rate": 7.839166670989929e-06, "loss": 0.4766, "step": 12602 }, { "epoch": 1.1311166453523591, "grad_norm": 0.5033193442373325, "learning_rate": 7.838736847688854e-06, "loss": 0.3447, "step": 12603 }, { "epoch": 1.1312063898050302, "grad_norm": 0.47632123734813075, "learning_rate": 7.838306993429091e-06, "loss": 0.4035, "step": 12604 }, { "epoch": 1.1312961342577013, "grad_norm": 0.48237194467437716, "learning_rate": 7.837877108215324e-06, "loss": 0.4293, "step": 12605 }, { "epoch": 1.1313858787103723, "grad_norm": 0.5234280040040398, "learning_rate": 7.837447192052241e-06, "loss": 0.3812, "step": 12606 }, { "epoch": 1.1314756231630432, "grad_norm": 0.4621224726115298, "learning_rate": 7.837017244944533e-06, "loss": 0.3985, "step": 12607 }, { "epoch": 1.1315653676157142, "grad_norm": 0.4939550303656231, "learning_rate": 7.836587266896885e-06, "loss": 0.4199, "step": 12608 }, { "epoch": 1.1316551120683853, "grad_norm": 0.5061021573845422, "learning_rate": 7.83615725791399e-06, "loss": 0.4187, "step": 12609 }, { "epoch": 1.1317448565210564, "grad_norm": 0.4976371711911882, "learning_rate": 7.835727218000537e-06, "loss": 0.3597, "step": 12610 }, { "epoch": 1.1318346009737272, "grad_norm": 0.4772720381664416, "learning_rate": 7.835297147161215e-06, "loss": 0.3542, "step": 12611 }, { "epoch": 1.1319243454263983, "grad_norm": 0.5194312888074104, "learning_rate": 7.834867045400713e-06, "loss": 0.4574, "step": 12612 }, { "epoch": 1.1320140898790694, "grad_norm": 0.5005662100968918, "learning_rate": 7.834436912723724e-06, "loss": 0.3669, "step": 12613 }, { "epoch": 1.1321038343317404, "grad_norm": 0.5199108400039696, "learning_rate": 7.834006749134939e-06, "loss": 0.4207, "step": 12614 }, { "epoch": 1.1321935787844113, "grad_norm": 0.518012444758406, "learning_rate": 7.833576554639048e-06, "loss": 0.4048, "step": 12615 }, { "epoch": 1.1322833232370824, "grad_norm": 0.4973501501177325, "learning_rate": 7.833146329240741e-06, "loss": 0.3811, "step": 12616 }, { "epoch": 1.1323730676897534, "grad_norm": 0.45048615337183956, "learning_rate": 7.832716072944713e-06, "loss": 0.3854, "step": 12617 }, { "epoch": 1.1324628121424245, "grad_norm": 0.5154361196069499, "learning_rate": 7.832285785755655e-06, "loss": 0.4544, "step": 12618 }, { "epoch": 1.1325525565950953, "grad_norm": 0.5669294893781069, "learning_rate": 7.831855467678261e-06, "loss": 0.4846, "step": 12619 }, { "epoch": 1.1326423010477664, "grad_norm": 0.5274954165448766, "learning_rate": 7.831425118717222e-06, "loss": 0.4332, "step": 12620 }, { "epoch": 1.1327320455004375, "grad_norm": 0.4989644136510609, "learning_rate": 7.830994738877232e-06, "loss": 0.3941, "step": 12621 }, { "epoch": 1.1328217899531086, "grad_norm": 0.4927353112641058, "learning_rate": 7.830564328162984e-06, "loss": 0.3469, "step": 12622 }, { "epoch": 1.1329115344057796, "grad_norm": 0.48577429465107946, "learning_rate": 7.830133886579172e-06, "loss": 0.3755, "step": 12623 }, { "epoch": 1.1330012788584505, "grad_norm": 0.5137250750385295, "learning_rate": 7.829703414130494e-06, "loss": 0.4087, "step": 12624 }, { "epoch": 1.1330910233111215, "grad_norm": 0.48870172725024885, "learning_rate": 7.82927291082164e-06, "loss": 0.3625, "step": 12625 }, { "epoch": 1.1331807677637926, "grad_norm": 0.4808168155796435, "learning_rate": 7.828842376657303e-06, "loss": 0.43, "step": 12626 }, { "epoch": 1.1332705122164637, "grad_norm": 0.5163849320083392, "learning_rate": 7.828411811642186e-06, "loss": 0.4307, "step": 12627 }, { "epoch": 1.1333602566691345, "grad_norm": 0.5170037310163214, "learning_rate": 7.82798121578098e-06, "loss": 0.4328, "step": 12628 }, { "epoch": 1.1334500011218056, "grad_norm": 0.4714600626868056, "learning_rate": 7.827550589078382e-06, "loss": 0.3416, "step": 12629 }, { "epoch": 1.1335397455744767, "grad_norm": 0.46979661775218384, "learning_rate": 7.827119931539087e-06, "loss": 0.4355, "step": 12630 }, { "epoch": 1.1336294900271477, "grad_norm": 0.5669776295494156, "learning_rate": 7.826689243167791e-06, "loss": 0.4657, "step": 12631 }, { "epoch": 1.1337192344798188, "grad_norm": 0.49087206963185226, "learning_rate": 7.826258523969194e-06, "loss": 0.4447, "step": 12632 }, { "epoch": 1.1338089789324897, "grad_norm": 0.5208439046382166, "learning_rate": 7.825827773947991e-06, "loss": 0.4044, "step": 12633 }, { "epoch": 1.1338987233851607, "grad_norm": 0.4798590891439533, "learning_rate": 7.825396993108882e-06, "loss": 0.4111, "step": 12634 }, { "epoch": 1.1339884678378318, "grad_norm": 0.4830330942405268, "learning_rate": 7.824966181456562e-06, "loss": 0.4006, "step": 12635 }, { "epoch": 1.1340782122905029, "grad_norm": 0.47463556641087085, "learning_rate": 7.824535338995733e-06, "loss": 0.3699, "step": 12636 }, { "epoch": 1.1341679567431737, "grad_norm": 0.5481374185211529, "learning_rate": 7.824104465731089e-06, "loss": 0.4129, "step": 12637 }, { "epoch": 1.1342577011958448, "grad_norm": 0.49091924219155536, "learning_rate": 7.823673561667333e-06, "loss": 0.3718, "step": 12638 }, { "epoch": 1.1343474456485159, "grad_norm": 0.5084938485022152, "learning_rate": 7.823242626809163e-06, "loss": 0.4007, "step": 12639 }, { "epoch": 1.134437190101187, "grad_norm": 0.45237633197954147, "learning_rate": 7.822811661161278e-06, "loss": 0.3594, "step": 12640 }, { "epoch": 1.134526934553858, "grad_norm": 0.5301436039828715, "learning_rate": 7.822380664728379e-06, "loss": 0.4296, "step": 12641 }, { "epoch": 1.1346166790065288, "grad_norm": 0.4671610162875681, "learning_rate": 7.821949637515168e-06, "loss": 0.3883, "step": 12642 }, { "epoch": 1.1347064234592, "grad_norm": 0.5202523435948406, "learning_rate": 7.82151857952634e-06, "loss": 0.4299, "step": 12643 }, { "epoch": 1.134796167911871, "grad_norm": 0.5209500073441182, "learning_rate": 7.821087490766602e-06, "loss": 0.3858, "step": 12644 }, { "epoch": 1.134885912364542, "grad_norm": 0.49043000253719493, "learning_rate": 7.820656371240652e-06, "loss": 0.4034, "step": 12645 }, { "epoch": 1.134975656817213, "grad_norm": 0.5259041789409945, "learning_rate": 7.820225220953193e-06, "loss": 0.4002, "step": 12646 }, { "epoch": 1.135065401269884, "grad_norm": 0.5070535404538977, "learning_rate": 7.819794039908927e-06, "loss": 0.3816, "step": 12647 }, { "epoch": 1.135155145722555, "grad_norm": 0.45422990993445217, "learning_rate": 7.819362828112558e-06, "loss": 0.448, "step": 12648 }, { "epoch": 1.1352448901752261, "grad_norm": 0.5452337347884978, "learning_rate": 7.818931585568785e-06, "loss": 0.3715, "step": 12649 }, { "epoch": 1.1353346346278972, "grad_norm": 0.48179593579504965, "learning_rate": 7.818500312282313e-06, "loss": 0.4049, "step": 12650 }, { "epoch": 1.135424379080568, "grad_norm": 0.500853508239897, "learning_rate": 7.818069008257845e-06, "loss": 0.4132, "step": 12651 }, { "epoch": 1.135514123533239, "grad_norm": 0.5200371284373635, "learning_rate": 7.817637673500087e-06, "loss": 0.4035, "step": 12652 }, { "epoch": 1.1356038679859102, "grad_norm": 0.5018823252195066, "learning_rate": 7.817206308013738e-06, "loss": 0.3951, "step": 12653 }, { "epoch": 1.135693612438581, "grad_norm": 0.5102681400650133, "learning_rate": 7.816774911803509e-06, "loss": 0.4104, "step": 12654 }, { "epoch": 1.135783356891252, "grad_norm": 0.48493720953707725, "learning_rate": 7.816343484874098e-06, "loss": 0.406, "step": 12655 }, { "epoch": 1.1358731013439232, "grad_norm": 0.5218264760833701, "learning_rate": 7.815912027230216e-06, "loss": 0.3739, "step": 12656 }, { "epoch": 1.1359628457965942, "grad_norm": 0.48981930474533414, "learning_rate": 7.815480538876563e-06, "loss": 0.3921, "step": 12657 }, { "epoch": 1.1360525902492653, "grad_norm": 0.4798371183423159, "learning_rate": 7.81504901981785e-06, "loss": 0.4429, "step": 12658 }, { "epoch": 1.1361423347019362, "grad_norm": 0.5907059427399496, "learning_rate": 7.814617470058779e-06, "loss": 0.3933, "step": 12659 }, { "epoch": 1.1362320791546072, "grad_norm": 0.5004302650240529, "learning_rate": 7.814185889604059e-06, "loss": 0.4567, "step": 12660 }, { "epoch": 1.1363218236072783, "grad_norm": 0.4929285787636025, "learning_rate": 7.813754278458394e-06, "loss": 0.3746, "step": 12661 }, { "epoch": 1.1364115680599494, "grad_norm": 0.4808628863071871, "learning_rate": 7.813322636626492e-06, "loss": 0.3766, "step": 12662 }, { "epoch": 1.1365013125126202, "grad_norm": 0.5481810671441806, "learning_rate": 7.812890964113065e-06, "loss": 0.5077, "step": 12663 }, { "epoch": 1.1365910569652913, "grad_norm": 0.4932703054495906, "learning_rate": 7.812459260922814e-06, "loss": 0.4387, "step": 12664 }, { "epoch": 1.1366808014179623, "grad_norm": 0.5428312397584252, "learning_rate": 7.812027527060449e-06, "loss": 0.4513, "step": 12665 }, { "epoch": 1.1367705458706334, "grad_norm": 0.48356835278481375, "learning_rate": 7.81159576253068e-06, "loss": 0.4142, "step": 12666 }, { "epoch": 1.1368602903233045, "grad_norm": 0.5090204657271344, "learning_rate": 7.811163967338215e-06, "loss": 0.434, "step": 12667 }, { "epoch": 1.1369500347759753, "grad_norm": 0.5085529558085633, "learning_rate": 7.810732141487764e-06, "loss": 0.3651, "step": 12668 }, { "epoch": 1.1370397792286464, "grad_norm": 0.46988729457455475, "learning_rate": 7.810300284984034e-06, "loss": 0.3554, "step": 12669 }, { "epoch": 1.1371295236813175, "grad_norm": 0.4941572645039945, "learning_rate": 7.809868397831736e-06, "loss": 0.4337, "step": 12670 }, { "epoch": 1.1372192681339885, "grad_norm": 0.5237924655835786, "learning_rate": 7.80943648003558e-06, "loss": 0.4501, "step": 12671 }, { "epoch": 1.1373090125866594, "grad_norm": 0.5277486476299447, "learning_rate": 7.80900453160028e-06, "loss": 0.427, "step": 12672 }, { "epoch": 1.1373987570393305, "grad_norm": 0.5108326213044293, "learning_rate": 7.80857255253054e-06, "loss": 0.4435, "step": 12673 }, { "epoch": 1.1374885014920015, "grad_norm": 0.5263422024341249, "learning_rate": 7.808140542831076e-06, "loss": 0.415, "step": 12674 }, { "epoch": 1.1375782459446726, "grad_norm": 0.4786745163736808, "learning_rate": 7.807708502506599e-06, "loss": 0.3424, "step": 12675 }, { "epoch": 1.1376679903973437, "grad_norm": 0.4471089516667303, "learning_rate": 7.807276431561816e-06, "loss": 0.3652, "step": 12676 }, { "epoch": 1.1377577348500145, "grad_norm": 0.4641715906560189, "learning_rate": 7.806844330001446e-06, "loss": 0.4186, "step": 12677 }, { "epoch": 1.1378474793026856, "grad_norm": 0.5381817036172236, "learning_rate": 7.806412197830198e-06, "loss": 0.5272, "step": 12678 }, { "epoch": 1.1379372237553567, "grad_norm": 0.5251327081383488, "learning_rate": 7.805980035052783e-06, "loss": 0.4633, "step": 12679 }, { "epoch": 1.1380269682080277, "grad_norm": 0.5334336815798918, "learning_rate": 7.805547841673917e-06, "loss": 0.4466, "step": 12680 }, { "epoch": 1.1381167126606986, "grad_norm": 0.5341300266741814, "learning_rate": 7.805115617698313e-06, "loss": 0.442, "step": 12681 }, { "epoch": 1.1382064571133697, "grad_norm": 0.5194972044165058, "learning_rate": 7.804683363130684e-06, "loss": 0.3847, "step": 12682 }, { "epoch": 1.1382962015660407, "grad_norm": 0.4553030974569718, "learning_rate": 7.804251077975742e-06, "loss": 0.4242, "step": 12683 }, { "epoch": 1.1383859460187118, "grad_norm": 0.5029230777765051, "learning_rate": 7.803818762238207e-06, "loss": 0.423, "step": 12684 }, { "epoch": 1.1384756904713829, "grad_norm": 0.5034441435757951, "learning_rate": 7.803386415922788e-06, "loss": 0.3783, "step": 12685 }, { "epoch": 1.1385654349240537, "grad_norm": 0.45637322730445834, "learning_rate": 7.802954039034203e-06, "loss": 0.4539, "step": 12686 }, { "epoch": 1.1386551793767248, "grad_norm": 0.6030310947747454, "learning_rate": 7.802521631577167e-06, "loss": 0.4226, "step": 12687 }, { "epoch": 1.1387449238293958, "grad_norm": 0.47507161022946154, "learning_rate": 7.802089193556394e-06, "loss": 0.3931, "step": 12688 }, { "epoch": 1.1388346682820667, "grad_norm": 0.5304901982915096, "learning_rate": 7.801656724976606e-06, "loss": 0.4854, "step": 12689 }, { "epoch": 1.1389244127347378, "grad_norm": 0.4942312574233788, "learning_rate": 7.80122422584251e-06, "loss": 0.4283, "step": 12690 }, { "epoch": 1.1390141571874088, "grad_norm": 0.5322332382309161, "learning_rate": 7.80079169615883e-06, "loss": 0.3896, "step": 12691 }, { "epoch": 1.13910390164008, "grad_norm": 0.4825087704740727, "learning_rate": 7.800359135930282e-06, "loss": 0.395, "step": 12692 }, { "epoch": 1.139193646092751, "grad_norm": 0.4826121105072902, "learning_rate": 7.799926545161583e-06, "loss": 0.4002, "step": 12693 }, { "epoch": 1.1392833905454218, "grad_norm": 0.4767708419419808, "learning_rate": 7.799493923857446e-06, "loss": 0.3349, "step": 12694 }, { "epoch": 1.139373134998093, "grad_norm": 0.47157868137853337, "learning_rate": 7.799061272022596e-06, "loss": 0.3886, "step": 12695 }, { "epoch": 1.139462879450764, "grad_norm": 0.44499815621196426, "learning_rate": 7.79862858966175e-06, "loss": 0.3785, "step": 12696 }, { "epoch": 1.139552623903435, "grad_norm": 0.4884455923093907, "learning_rate": 7.798195876779624e-06, "loss": 0.3993, "step": 12697 }, { "epoch": 1.1396423683561059, "grad_norm": 0.4584725788005783, "learning_rate": 7.797763133380937e-06, "loss": 0.376, "step": 12698 }, { "epoch": 1.139732112808777, "grad_norm": 0.5080896427260202, "learning_rate": 7.797330359470411e-06, "loss": 0.4586, "step": 12699 }, { "epoch": 1.139821857261448, "grad_norm": 0.5123310214689301, "learning_rate": 7.796897555052765e-06, "loss": 0.3796, "step": 12700 }, { "epoch": 1.139911601714119, "grad_norm": 0.49021088532169255, "learning_rate": 7.796464720132718e-06, "loss": 0.3782, "step": 12701 }, { "epoch": 1.1400013461667902, "grad_norm": 0.461628113744579, "learning_rate": 7.796031854714992e-06, "loss": 0.3908, "step": 12702 }, { "epoch": 1.140091090619461, "grad_norm": 0.512839259668063, "learning_rate": 7.795598958804306e-06, "loss": 0.4142, "step": 12703 }, { "epoch": 1.140180835072132, "grad_norm": 0.4977525919465058, "learning_rate": 7.795166032405381e-06, "loss": 0.4492, "step": 12704 }, { "epoch": 1.1402705795248032, "grad_norm": 0.5250850424648658, "learning_rate": 7.79473307552294e-06, "loss": 0.422, "step": 12705 }, { "epoch": 1.1403603239774742, "grad_norm": 0.49594930021334593, "learning_rate": 7.794300088161706e-06, "loss": 0.44, "step": 12706 }, { "epoch": 1.140450068430145, "grad_norm": 0.4931983599580613, "learning_rate": 7.793867070326396e-06, "loss": 0.4293, "step": 12707 }, { "epoch": 1.1405398128828161, "grad_norm": 0.47184999448058934, "learning_rate": 7.793434022021737e-06, "loss": 0.3618, "step": 12708 }, { "epoch": 1.1406295573354872, "grad_norm": 0.49788719396733916, "learning_rate": 7.793000943252451e-06, "loss": 0.3985, "step": 12709 }, { "epoch": 1.1407193017881583, "grad_norm": 0.5103438130075428, "learning_rate": 7.792567834023258e-06, "loss": 0.4009, "step": 12710 }, { "epoch": 1.1408090462408293, "grad_norm": 0.4719063128133457, "learning_rate": 7.792134694338886e-06, "loss": 0.3752, "step": 12711 }, { "epoch": 1.1408987906935002, "grad_norm": 0.47850088800775326, "learning_rate": 7.791701524204056e-06, "loss": 0.4515, "step": 12712 }, { "epoch": 1.1409885351461713, "grad_norm": 0.5498389900310227, "learning_rate": 7.791268323623491e-06, "loss": 0.434, "step": 12713 }, { "epoch": 1.1410782795988423, "grad_norm": 0.5044621639383756, "learning_rate": 7.79083509260192e-06, "loss": 0.3881, "step": 12714 }, { "epoch": 1.1411680240515134, "grad_norm": 0.5152060605454887, "learning_rate": 7.790401831144063e-06, "loss": 0.4656, "step": 12715 }, { "epoch": 1.1412577685041843, "grad_norm": 0.5096885171139325, "learning_rate": 7.789968539254646e-06, "loss": 0.4145, "step": 12716 }, { "epoch": 1.1413475129568553, "grad_norm": 0.5297756746118587, "learning_rate": 7.789535216938395e-06, "loss": 0.4473, "step": 12717 }, { "epoch": 1.1414372574095264, "grad_norm": 0.4790847146294305, "learning_rate": 7.789101864200038e-06, "loss": 0.3391, "step": 12718 }, { "epoch": 1.1415270018621975, "grad_norm": 0.4804055359279649, "learning_rate": 7.788668481044296e-06, "loss": 0.3738, "step": 12719 }, { "epoch": 1.1416167463148685, "grad_norm": 0.4907594031445089, "learning_rate": 7.7882350674759e-06, "loss": 0.4345, "step": 12720 }, { "epoch": 1.1417064907675394, "grad_norm": 0.532677510320276, "learning_rate": 7.787801623499573e-06, "loss": 0.4022, "step": 12721 }, { "epoch": 1.1417962352202105, "grad_norm": 0.5053097804077369, "learning_rate": 7.787368149120047e-06, "loss": 0.4274, "step": 12722 }, { "epoch": 1.1418859796728815, "grad_norm": 0.457668644222954, "learning_rate": 7.786934644342044e-06, "loss": 0.3429, "step": 12723 }, { "epoch": 1.1419757241255524, "grad_norm": 0.54278692976326, "learning_rate": 7.786501109170296e-06, "loss": 0.4296, "step": 12724 }, { "epoch": 1.1420654685782234, "grad_norm": 0.47609529660286964, "learning_rate": 7.786067543609528e-06, "loss": 0.3912, "step": 12725 }, { "epoch": 1.1421552130308945, "grad_norm": 0.5258887386573315, "learning_rate": 7.78563394766447e-06, "loss": 0.4365, "step": 12726 }, { "epoch": 1.1422449574835656, "grad_norm": 0.44581072467734534, "learning_rate": 7.78520032133985e-06, "loss": 0.4225, "step": 12727 }, { "epoch": 1.1423347019362367, "grad_norm": 0.47645764241067723, "learning_rate": 7.784766664640398e-06, "loss": 0.3952, "step": 12728 }, { "epoch": 1.1424244463889075, "grad_norm": 0.5486656909923081, "learning_rate": 7.784332977570842e-06, "loss": 0.436, "step": 12729 }, { "epoch": 1.1425141908415786, "grad_norm": 0.5435061425588595, "learning_rate": 7.783899260135913e-06, "loss": 0.4167, "step": 12730 }, { "epoch": 1.1426039352942496, "grad_norm": 0.49463668404122874, "learning_rate": 7.78346551234034e-06, "loss": 0.3525, "step": 12731 }, { "epoch": 1.1426936797469207, "grad_norm": 0.4362792544230404, "learning_rate": 7.783031734188854e-06, "loss": 0.3726, "step": 12732 }, { "epoch": 1.1427834241995916, "grad_norm": 0.5042408303695454, "learning_rate": 7.782597925686186e-06, "loss": 0.3592, "step": 12733 }, { "epoch": 1.1428731686522626, "grad_norm": 0.47602225456673375, "learning_rate": 7.782164086837065e-06, "loss": 0.3709, "step": 12734 }, { "epoch": 1.1429629131049337, "grad_norm": 0.4653587881295766, "learning_rate": 7.781730217646226e-06, "loss": 0.4105, "step": 12735 }, { "epoch": 1.1430526575576048, "grad_norm": 0.478452108703345, "learning_rate": 7.781296318118396e-06, "loss": 0.3988, "step": 12736 }, { "epoch": 1.1431424020102758, "grad_norm": 0.5044661876163999, "learning_rate": 7.780862388258312e-06, "loss": 0.4169, "step": 12737 }, { "epoch": 1.1432321464629467, "grad_norm": 0.5018088003612815, "learning_rate": 7.780428428070703e-06, "loss": 0.4431, "step": 12738 }, { "epoch": 1.1433218909156178, "grad_norm": 0.5131401463489161, "learning_rate": 7.779994437560302e-06, "loss": 0.4153, "step": 12739 }, { "epoch": 1.1434116353682888, "grad_norm": 0.4885518802411773, "learning_rate": 7.779560416731845e-06, "loss": 0.3916, "step": 12740 }, { "epoch": 1.14350137982096, "grad_norm": 0.48410759621140237, "learning_rate": 7.77912636559006e-06, "loss": 0.405, "step": 12741 }, { "epoch": 1.1435911242736307, "grad_norm": 0.4696817780900067, "learning_rate": 7.778692284139684e-06, "loss": 0.3872, "step": 12742 }, { "epoch": 1.1436808687263018, "grad_norm": 0.47962113752973085, "learning_rate": 7.778258172385452e-06, "loss": 0.3568, "step": 12743 }, { "epoch": 1.1437706131789729, "grad_norm": 0.5117176714655131, "learning_rate": 7.777824030332097e-06, "loss": 0.4381, "step": 12744 }, { "epoch": 1.143860357631644, "grad_norm": 0.5187866497552206, "learning_rate": 7.777389857984352e-06, "loss": 0.4116, "step": 12745 }, { "epoch": 1.143950102084315, "grad_norm": 0.5336714013599932, "learning_rate": 7.776955655346956e-06, "loss": 0.4117, "step": 12746 }, { "epoch": 1.1440398465369859, "grad_norm": 0.47587179943135327, "learning_rate": 7.776521422424641e-06, "loss": 0.3866, "step": 12747 }, { "epoch": 1.144129590989657, "grad_norm": 0.4738329911866964, "learning_rate": 7.776087159222143e-06, "loss": 0.3633, "step": 12748 }, { "epoch": 1.144219335442328, "grad_norm": 0.4935768047604281, "learning_rate": 7.7756528657442e-06, "loss": 0.4017, "step": 12749 }, { "epoch": 1.144309079894999, "grad_norm": 0.534022135534072, "learning_rate": 7.775218541995547e-06, "loss": 0.4579, "step": 12750 }, { "epoch": 1.14439882434767, "grad_norm": 0.473116202009587, "learning_rate": 7.77478418798092e-06, "loss": 0.3663, "step": 12751 }, { "epoch": 1.144488568800341, "grad_norm": 0.5232298088441009, "learning_rate": 7.774349803705054e-06, "loss": 0.4102, "step": 12752 }, { "epoch": 1.144578313253012, "grad_norm": 0.5039659478914386, "learning_rate": 7.773915389172692e-06, "loss": 0.5594, "step": 12753 }, { "epoch": 1.1446680577056831, "grad_norm": 0.5602248179604792, "learning_rate": 7.773480944388568e-06, "loss": 0.3735, "step": 12754 }, { "epoch": 1.1447578021583542, "grad_norm": 0.4991962033339541, "learning_rate": 7.77304646935742e-06, "loss": 0.4726, "step": 12755 }, { "epoch": 1.144847546611025, "grad_norm": 0.501737820757071, "learning_rate": 7.772611964083987e-06, "loss": 0.362, "step": 12756 }, { "epoch": 1.1449372910636961, "grad_norm": 0.46408306606929445, "learning_rate": 7.772177428573008e-06, "loss": 0.4113, "step": 12757 }, { "epoch": 1.1450270355163672, "grad_norm": 0.4915405619751599, "learning_rate": 7.771742862829221e-06, "loss": 0.3842, "step": 12758 }, { "epoch": 1.145116779969038, "grad_norm": 0.534028028513976, "learning_rate": 7.771308266857364e-06, "loss": 0.4134, "step": 12759 }, { "epoch": 1.1452065244217091, "grad_norm": 0.5522949699817928, "learning_rate": 7.77087364066218e-06, "loss": 0.3893, "step": 12760 }, { "epoch": 1.1452962688743802, "grad_norm": 0.4853465575997542, "learning_rate": 7.770438984248407e-06, "loss": 0.4095, "step": 12761 }, { "epoch": 1.1453860133270513, "grad_norm": 0.4503874654256502, "learning_rate": 7.770004297620785e-06, "loss": 0.4494, "step": 12762 }, { "epoch": 1.1454757577797223, "grad_norm": 0.5148695406315545, "learning_rate": 7.769569580784057e-06, "loss": 0.3871, "step": 12763 }, { "epoch": 1.1455655022323932, "grad_norm": 0.48599408008614864, "learning_rate": 7.76913483374296e-06, "loss": 0.3631, "step": 12764 }, { "epoch": 1.1456552466850642, "grad_norm": 0.5297896415559995, "learning_rate": 7.768700056502238e-06, "loss": 0.4012, "step": 12765 }, { "epoch": 1.1457449911377353, "grad_norm": 0.45029230863883424, "learning_rate": 7.76826524906663e-06, "loss": 0.328, "step": 12766 }, { "epoch": 1.1458347355904064, "grad_norm": 0.4664406904789756, "learning_rate": 7.767830411440883e-06, "loss": 0.3791, "step": 12767 }, { "epoch": 1.1459244800430772, "grad_norm": 0.47775233444244136, "learning_rate": 7.767395543629734e-06, "loss": 0.4542, "step": 12768 }, { "epoch": 1.1460142244957483, "grad_norm": 0.5018190277905231, "learning_rate": 7.766960645637929e-06, "loss": 0.4397, "step": 12769 }, { "epoch": 1.1461039689484194, "grad_norm": 0.49110112183463467, "learning_rate": 7.766525717470208e-06, "loss": 0.4369, "step": 12770 }, { "epoch": 1.1461937134010904, "grad_norm": 0.5154109542337789, "learning_rate": 7.766090759131316e-06, "loss": 0.3982, "step": 12771 }, { "epoch": 1.1462834578537615, "grad_norm": 0.45377617408309107, "learning_rate": 7.765655770625997e-06, "loss": 0.3947, "step": 12772 }, { "epoch": 1.1463732023064324, "grad_norm": 0.5092105895315602, "learning_rate": 7.765220751958996e-06, "loss": 0.4555, "step": 12773 }, { "epoch": 1.1464629467591034, "grad_norm": 0.5341219150047993, "learning_rate": 7.764785703135053e-06, "loss": 0.3427, "step": 12774 }, { "epoch": 1.1465526912117745, "grad_norm": 0.483177763375833, "learning_rate": 7.764350624158915e-06, "loss": 0.4601, "step": 12775 }, { "epoch": 1.1466424356644456, "grad_norm": 0.49387324455267795, "learning_rate": 7.763915515035327e-06, "loss": 0.3905, "step": 12776 }, { "epoch": 1.1467321801171164, "grad_norm": 0.4833197208293288, "learning_rate": 7.763480375769036e-06, "loss": 0.3883, "step": 12777 }, { "epoch": 1.1468219245697875, "grad_norm": 0.4970234355203933, "learning_rate": 7.763045206364783e-06, "loss": 0.3953, "step": 12778 }, { "epoch": 1.1469116690224586, "grad_norm": 0.4947482636546222, "learning_rate": 7.76261000682732e-06, "loss": 0.4117, "step": 12779 }, { "epoch": 1.1470014134751296, "grad_norm": 0.4988612028603378, "learning_rate": 7.762174777161386e-06, "loss": 0.412, "step": 12780 }, { "epoch": 1.1470911579278007, "grad_norm": 0.4970252591728299, "learning_rate": 7.761739517371733e-06, "loss": 0.3769, "step": 12781 }, { "epoch": 1.1471809023804715, "grad_norm": 0.4942317275104932, "learning_rate": 7.761304227463108e-06, "loss": 0.5117, "step": 12782 }, { "epoch": 1.1472706468331426, "grad_norm": 0.5439578464026082, "learning_rate": 7.760868907440253e-06, "loss": 0.4215, "step": 12783 }, { "epoch": 1.1473603912858137, "grad_norm": 0.4650595288919336, "learning_rate": 7.760433557307921e-06, "loss": 0.3841, "step": 12784 }, { "epoch": 1.1474501357384848, "grad_norm": 0.5396321291474716, "learning_rate": 7.759998177070855e-06, "loss": 0.4024, "step": 12785 }, { "epoch": 1.1475398801911556, "grad_norm": 0.5036801536072293, "learning_rate": 7.759562766733808e-06, "loss": 0.3994, "step": 12786 }, { "epoch": 1.1476296246438267, "grad_norm": 0.4882652724291118, "learning_rate": 7.759127326301527e-06, "loss": 0.3709, "step": 12787 }, { "epoch": 1.1477193690964977, "grad_norm": 0.47203119472121935, "learning_rate": 7.758691855778758e-06, "loss": 0.393, "step": 12788 }, { "epoch": 1.1478091135491688, "grad_norm": 0.4694271117435336, "learning_rate": 7.758256355170255e-06, "loss": 0.4016, "step": 12789 }, { "epoch": 1.1478988580018399, "grad_norm": 0.4987363520078663, "learning_rate": 7.757820824480763e-06, "loss": 0.4385, "step": 12790 }, { "epoch": 1.1479886024545107, "grad_norm": 0.514391253403833, "learning_rate": 7.757385263715033e-06, "loss": 0.3903, "step": 12791 }, { "epoch": 1.1480783469071818, "grad_norm": 0.4719079432050028, "learning_rate": 7.756949672877818e-06, "loss": 0.3978, "step": 12792 }, { "epoch": 1.1481680913598529, "grad_norm": 0.5144797374020322, "learning_rate": 7.756514051973863e-06, "loss": 0.3642, "step": 12793 }, { "epoch": 1.1482578358125237, "grad_norm": 0.4863221023980538, "learning_rate": 7.756078401007925e-06, "loss": 0.413, "step": 12794 }, { "epoch": 1.1483475802651948, "grad_norm": 0.5236743024597613, "learning_rate": 7.75564271998475e-06, "loss": 0.4176, "step": 12795 }, { "epoch": 1.1484373247178659, "grad_norm": 0.517130845453688, "learning_rate": 7.755207008909093e-06, "loss": 0.3995, "step": 12796 }, { "epoch": 1.148527069170537, "grad_norm": 0.4548749313127176, "learning_rate": 7.754771267785703e-06, "loss": 0.3877, "step": 12797 }, { "epoch": 1.148616813623208, "grad_norm": 0.5223657839494584, "learning_rate": 7.754335496619336e-06, "loss": 0.4602, "step": 12798 }, { "epoch": 1.1487065580758788, "grad_norm": 0.5002878634103051, "learning_rate": 7.75389969541474e-06, "loss": 0.3827, "step": 12799 }, { "epoch": 1.14879630252855, "grad_norm": 0.4904474475879563, "learning_rate": 7.753463864176669e-06, "loss": 0.3935, "step": 12800 }, { "epoch": 1.148886046981221, "grad_norm": 0.5105942251125306, "learning_rate": 7.753028002909877e-06, "loss": 0.4765, "step": 12801 }, { "epoch": 1.148975791433892, "grad_norm": 0.5199437602519191, "learning_rate": 7.752592111619118e-06, "loss": 0.4526, "step": 12802 }, { "epoch": 1.149065535886563, "grad_norm": 0.5285877993554777, "learning_rate": 7.752156190309143e-06, "loss": 0.4032, "step": 12803 }, { "epoch": 1.149155280339234, "grad_norm": 0.4891736589623169, "learning_rate": 7.75172023898471e-06, "loss": 0.4348, "step": 12804 }, { "epoch": 1.149245024791905, "grad_norm": 0.5823188726164631, "learning_rate": 7.751284257650571e-06, "loss": 0.4688, "step": 12805 }, { "epoch": 1.1493347692445761, "grad_norm": 0.4747443781491743, "learning_rate": 7.750848246311482e-06, "loss": 0.4123, "step": 12806 }, { "epoch": 1.1494245136972472, "grad_norm": 0.5143620911052716, "learning_rate": 7.750412204972196e-06, "loss": 0.4703, "step": 12807 }, { "epoch": 1.149514258149918, "grad_norm": 0.5199106876095122, "learning_rate": 7.749976133637469e-06, "loss": 0.4128, "step": 12808 }, { "epoch": 1.149604002602589, "grad_norm": 0.5018350996778999, "learning_rate": 7.749540032312058e-06, "loss": 0.3737, "step": 12809 }, { "epoch": 1.1496937470552602, "grad_norm": 0.517089131205998, "learning_rate": 7.749103901000718e-06, "loss": 0.4358, "step": 12810 }, { "epoch": 1.1497834915079312, "grad_norm": 0.4857485430642312, "learning_rate": 7.748667739708205e-06, "loss": 0.4155, "step": 12811 }, { "epoch": 1.149873235960602, "grad_norm": 0.5398501901575713, "learning_rate": 7.748231548439278e-06, "loss": 0.4462, "step": 12812 }, { "epoch": 1.1499629804132732, "grad_norm": 0.4760551783152819, "learning_rate": 7.747795327198689e-06, "loss": 0.3767, "step": 12813 }, { "epoch": 1.1500527248659442, "grad_norm": 0.47140199509087455, "learning_rate": 7.747359075991201e-06, "loss": 0.3704, "step": 12814 }, { "epoch": 1.1501424693186153, "grad_norm": 0.5125750646639291, "learning_rate": 7.74692279482157e-06, "loss": 0.4077, "step": 12815 }, { "epoch": 1.1502322137712864, "grad_norm": 0.5343147239604281, "learning_rate": 7.746486483694552e-06, "loss": 0.4208, "step": 12816 }, { "epoch": 1.1503219582239572, "grad_norm": 0.48132535896773526, "learning_rate": 7.746050142614907e-06, "loss": 0.3851, "step": 12817 }, { "epoch": 1.1504117026766283, "grad_norm": 0.43608421383275964, "learning_rate": 7.745613771587392e-06, "loss": 0.3589, "step": 12818 }, { "epoch": 1.1505014471292994, "grad_norm": 0.5019922703536652, "learning_rate": 7.74517737061677e-06, "loss": 0.3527, "step": 12819 }, { "epoch": 1.1505911915819704, "grad_norm": 0.447828626486838, "learning_rate": 7.744740939707796e-06, "loss": 0.4116, "step": 12820 }, { "epoch": 1.1506809360346413, "grad_norm": 0.4784213594989218, "learning_rate": 7.74430447886523e-06, "loss": 0.3874, "step": 12821 }, { "epoch": 1.1507706804873123, "grad_norm": 0.5046252772594547, "learning_rate": 7.743867988093837e-06, "loss": 0.4196, "step": 12822 }, { "epoch": 1.1508604249399834, "grad_norm": 0.5107053302453595, "learning_rate": 7.743431467398368e-06, "loss": 0.3362, "step": 12823 }, { "epoch": 1.1509501693926545, "grad_norm": 0.4607686481315908, "learning_rate": 7.742994916783592e-06, "loss": 0.3616, "step": 12824 }, { "epoch": 1.1510399138453256, "grad_norm": 0.5212973203280576, "learning_rate": 7.742558336254268e-06, "loss": 0.3666, "step": 12825 }, { "epoch": 1.1511296582979964, "grad_norm": 0.4461029751522344, "learning_rate": 7.742121725815156e-06, "loss": 0.4291, "step": 12826 }, { "epoch": 1.1512194027506675, "grad_norm": 0.5069261782923714, "learning_rate": 7.741685085471016e-06, "loss": 0.401, "step": 12827 }, { "epoch": 1.1513091472033385, "grad_norm": 0.47424354165053567, "learning_rate": 7.741248415226612e-06, "loss": 0.4246, "step": 12828 }, { "epoch": 1.1513988916560094, "grad_norm": 0.5463924110961156, "learning_rate": 7.740811715086707e-06, "loss": 0.354, "step": 12829 }, { "epoch": 1.1514886361086805, "grad_norm": 0.494182451375243, "learning_rate": 7.740374985056063e-06, "loss": 0.4383, "step": 12830 }, { "epoch": 1.1515783805613515, "grad_norm": 0.503690226940831, "learning_rate": 7.73993822513944e-06, "loss": 0.353, "step": 12831 }, { "epoch": 1.1516681250140226, "grad_norm": 0.4611112707955915, "learning_rate": 7.739501435341605e-06, "loss": 0.3423, "step": 12832 }, { "epoch": 1.1517578694666937, "grad_norm": 0.471048685055096, "learning_rate": 7.73906461566732e-06, "loss": 0.4044, "step": 12833 }, { "epoch": 1.1518476139193645, "grad_norm": 0.4757030892348578, "learning_rate": 7.73862776612135e-06, "loss": 0.4051, "step": 12834 }, { "epoch": 1.1519373583720356, "grad_norm": 0.545145773260661, "learning_rate": 7.738190886708456e-06, "loss": 0.4236, "step": 12835 }, { "epoch": 1.1520271028247067, "grad_norm": 0.5112932843194107, "learning_rate": 7.737753977433406e-06, "loss": 0.4225, "step": 12836 }, { "epoch": 1.1521168472773777, "grad_norm": 0.5452204603340032, "learning_rate": 7.737317038300964e-06, "loss": 0.4626, "step": 12837 }, { "epoch": 1.1522065917300486, "grad_norm": 0.46985325503005854, "learning_rate": 7.736880069315895e-06, "loss": 0.3707, "step": 12838 }, { "epoch": 1.1522963361827196, "grad_norm": 0.46964184916277546, "learning_rate": 7.736443070482964e-06, "loss": 0.3534, "step": 12839 }, { "epoch": 1.1523860806353907, "grad_norm": 0.5153274931553998, "learning_rate": 7.736006041806936e-06, "loss": 0.3722, "step": 12840 }, { "epoch": 1.1524758250880618, "grad_norm": 0.4613009477298952, "learning_rate": 7.735568983292581e-06, "loss": 0.3676, "step": 12841 }, { "epoch": 1.1525655695407329, "grad_norm": 0.509621632738111, "learning_rate": 7.735131894944659e-06, "loss": 0.4262, "step": 12842 }, { "epoch": 1.1526553139934037, "grad_norm": 0.4848297811646557, "learning_rate": 7.734694776767943e-06, "loss": 0.3604, "step": 12843 }, { "epoch": 1.1527450584460748, "grad_norm": 0.5014120760442048, "learning_rate": 7.734257628767197e-06, "loss": 0.4489, "step": 12844 }, { "epoch": 1.1528348028987458, "grad_norm": 0.5123048177572423, "learning_rate": 7.733820450947188e-06, "loss": 0.3826, "step": 12845 }, { "epoch": 1.152924547351417, "grad_norm": 0.4465653453406482, "learning_rate": 7.733383243312685e-06, "loss": 0.3151, "step": 12846 }, { "epoch": 1.1530142918040878, "grad_norm": 0.5156603224045396, "learning_rate": 7.732946005868457e-06, "loss": 0.4335, "step": 12847 }, { "epoch": 1.1531040362567588, "grad_norm": 0.4756079575508222, "learning_rate": 7.73250873861927e-06, "loss": 0.4376, "step": 12848 }, { "epoch": 1.15319378070943, "grad_norm": 0.5110856137866379, "learning_rate": 7.732071441569896e-06, "loss": 0.3711, "step": 12849 }, { "epoch": 1.153283525162101, "grad_norm": 0.47899679702704184, "learning_rate": 7.7316341147251e-06, "loss": 0.5296, "step": 12850 }, { "epoch": 1.153373269614772, "grad_norm": 0.5809861565245613, "learning_rate": 7.731196758089657e-06, "loss": 0.4022, "step": 12851 }, { "epoch": 1.153463014067443, "grad_norm": 0.5224464190177844, "learning_rate": 7.73075937166833e-06, "loss": 0.4191, "step": 12852 }, { "epoch": 1.153552758520114, "grad_norm": 0.46562737950695404, "learning_rate": 7.730321955465894e-06, "loss": 0.3249, "step": 12853 }, { "epoch": 1.153642502972785, "grad_norm": 0.4680152307903867, "learning_rate": 7.729884509487116e-06, "loss": 0.3715, "step": 12854 }, { "epoch": 1.153732247425456, "grad_norm": 0.49934050820507814, "learning_rate": 7.72944703373677e-06, "loss": 0.3688, "step": 12855 }, { "epoch": 1.153821991878127, "grad_norm": 0.48778491521252676, "learning_rate": 7.729009528219624e-06, "loss": 0.4145, "step": 12856 }, { "epoch": 1.153911736330798, "grad_norm": 0.5410038337752874, "learning_rate": 7.728571992940452e-06, "loss": 0.4024, "step": 12857 }, { "epoch": 1.154001480783469, "grad_norm": 0.4898281034911291, "learning_rate": 7.728134427904024e-06, "loss": 0.3904, "step": 12858 }, { "epoch": 1.1540912252361402, "grad_norm": 0.48374145407136077, "learning_rate": 7.727696833115113e-06, "loss": 0.3427, "step": 12859 }, { "epoch": 1.1541809696888112, "grad_norm": 0.4555434129909731, "learning_rate": 7.72725920857849e-06, "loss": 0.4198, "step": 12860 }, { "epoch": 1.154270714141482, "grad_norm": 0.5328084217294622, "learning_rate": 7.72682155429893e-06, "loss": 0.384, "step": 12861 }, { "epoch": 1.1543604585941531, "grad_norm": 0.5096454797380917, "learning_rate": 7.726383870281203e-06, "loss": 0.4083, "step": 12862 }, { "epoch": 1.1544502030468242, "grad_norm": 0.47269026538798614, "learning_rate": 7.725946156530084e-06, "loss": 0.3682, "step": 12863 }, { "epoch": 1.154539947499495, "grad_norm": 0.4960413223475037, "learning_rate": 7.725508413050344e-06, "loss": 0.3903, "step": 12864 }, { "epoch": 1.1546296919521661, "grad_norm": 0.4550371472256742, "learning_rate": 7.725070639846762e-06, "loss": 0.3676, "step": 12865 }, { "epoch": 1.1547194364048372, "grad_norm": 0.4834418994923826, "learning_rate": 7.724632836924109e-06, "loss": 0.4018, "step": 12866 }, { "epoch": 1.1548091808575083, "grad_norm": 0.46641388772127546, "learning_rate": 7.72419500428716e-06, "loss": 0.3421, "step": 12867 }, { "epoch": 1.1548989253101793, "grad_norm": 0.4802599312904612, "learning_rate": 7.72375714194069e-06, "loss": 0.44, "step": 12868 }, { "epoch": 1.1549886697628502, "grad_norm": 0.5305395827719758, "learning_rate": 7.723319249889474e-06, "loss": 0.429, "step": 12869 }, { "epoch": 1.1550784142155213, "grad_norm": 0.42359957858470987, "learning_rate": 7.722881328138287e-06, "loss": 0.3069, "step": 12870 }, { "epoch": 1.1551681586681923, "grad_norm": 0.4689263687457986, "learning_rate": 7.722443376691908e-06, "loss": 0.4753, "step": 12871 }, { "epoch": 1.1552579031208634, "grad_norm": 0.5383758861145853, "learning_rate": 7.72200539555511e-06, "loss": 0.4986, "step": 12872 }, { "epoch": 1.1553476475735343, "grad_norm": 0.5434140639471868, "learning_rate": 7.721567384732668e-06, "loss": 0.4105, "step": 12873 }, { "epoch": 1.1554373920262053, "grad_norm": 0.48156799682461177, "learning_rate": 7.721129344229364e-06, "loss": 0.4478, "step": 12874 }, { "epoch": 1.1555271364788764, "grad_norm": 0.5544511048669098, "learning_rate": 7.720691274049972e-06, "loss": 0.4772, "step": 12875 }, { "epoch": 1.1556168809315475, "grad_norm": 0.4783419544010668, "learning_rate": 7.72025317419927e-06, "loss": 0.4393, "step": 12876 }, { "epoch": 1.1557066253842185, "grad_norm": 0.50680677751292, "learning_rate": 7.719815044682036e-06, "loss": 0.4019, "step": 12877 }, { "epoch": 1.1557963698368894, "grad_norm": 0.4948631306315597, "learning_rate": 7.719376885503048e-06, "loss": 0.44, "step": 12878 }, { "epoch": 1.1558861142895605, "grad_norm": 0.5184389346041641, "learning_rate": 7.718938696667083e-06, "loss": 0.3846, "step": 12879 }, { "epoch": 1.1559758587422315, "grad_norm": 0.48911276669849835, "learning_rate": 7.718500478178922e-06, "loss": 0.366, "step": 12880 }, { "epoch": 1.1560656031949026, "grad_norm": 0.5057027289965068, "learning_rate": 7.718062230043345e-06, "loss": 0.4388, "step": 12881 }, { "epoch": 1.1561553476475734, "grad_norm": 0.4598134987337076, "learning_rate": 7.717623952265127e-06, "loss": 0.4242, "step": 12882 }, { "epoch": 1.1562450921002445, "grad_norm": 0.5073680348350762, "learning_rate": 7.717185644849053e-06, "loss": 0.4325, "step": 12883 }, { "epoch": 1.1563348365529156, "grad_norm": 0.5449075313906631, "learning_rate": 7.7167473077999e-06, "loss": 0.4279, "step": 12884 }, { "epoch": 1.1564245810055866, "grad_norm": 0.49605796586083734, "learning_rate": 7.716308941122447e-06, "loss": 0.4602, "step": 12885 }, { "epoch": 1.1565143254582577, "grad_norm": 0.5177376355992773, "learning_rate": 7.71587054482148e-06, "loss": 0.4026, "step": 12886 }, { "epoch": 1.1566040699109286, "grad_norm": 0.5144163671297247, "learning_rate": 7.715432118901774e-06, "loss": 0.3837, "step": 12887 }, { "epoch": 1.1566938143635996, "grad_norm": 0.48569480988661756, "learning_rate": 7.714993663368116e-06, "loss": 0.4006, "step": 12888 }, { "epoch": 1.1567835588162707, "grad_norm": 0.4746512659022868, "learning_rate": 7.714555178225282e-06, "loss": 0.3928, "step": 12889 }, { "epoch": 1.1568733032689418, "grad_norm": 0.46321958130880814, "learning_rate": 7.71411666347806e-06, "loss": 0.3774, "step": 12890 }, { "epoch": 1.1569630477216126, "grad_norm": 0.5188789770465836, "learning_rate": 7.713678119131227e-06, "loss": 0.4555, "step": 12891 }, { "epoch": 1.1570527921742837, "grad_norm": 0.4645351571070035, "learning_rate": 7.713239545189571e-06, "loss": 0.3783, "step": 12892 }, { "epoch": 1.1571425366269548, "grad_norm": 0.5174311591737292, "learning_rate": 7.71280094165787e-06, "loss": 0.4381, "step": 12893 }, { "epoch": 1.1572322810796258, "grad_norm": 0.5009544005860268, "learning_rate": 7.712362308540908e-06, "loss": 0.3994, "step": 12894 }, { "epoch": 1.157322025532297, "grad_norm": 0.5039415635592984, "learning_rate": 7.71192364584347e-06, "loss": 0.4414, "step": 12895 }, { "epoch": 1.1574117699849678, "grad_norm": 0.494994544636068, "learning_rate": 7.711484953570343e-06, "loss": 0.3908, "step": 12896 }, { "epoch": 1.1575015144376388, "grad_norm": 0.4858848883391641, "learning_rate": 7.711046231726305e-06, "loss": 0.3844, "step": 12897 }, { "epoch": 1.15759125889031, "grad_norm": 0.5001440793698754, "learning_rate": 7.710607480316146e-06, "loss": 0.4002, "step": 12898 }, { "epoch": 1.1576810033429807, "grad_norm": 0.5399849887943795, "learning_rate": 7.710168699344649e-06, "loss": 0.4212, "step": 12899 }, { "epoch": 1.1577707477956518, "grad_norm": 0.5244714289392957, "learning_rate": 7.709729888816599e-06, "loss": 0.4359, "step": 12900 }, { "epoch": 1.1578604922483229, "grad_norm": 0.5019666247122664, "learning_rate": 7.70929104873678e-06, "loss": 0.4134, "step": 12901 }, { "epoch": 1.157950236700994, "grad_norm": 0.5173543700017922, "learning_rate": 7.70885217910998e-06, "loss": 0.4415, "step": 12902 }, { "epoch": 1.158039981153665, "grad_norm": 0.4956843620733237, "learning_rate": 7.708413279940985e-06, "loss": 0.4679, "step": 12903 }, { "epoch": 1.1581297256063359, "grad_norm": 0.5709151210518206, "learning_rate": 7.707974351234582e-06, "loss": 0.4251, "step": 12904 }, { "epoch": 1.158219470059007, "grad_norm": 0.47978619073747775, "learning_rate": 7.707535392995557e-06, "loss": 0.4801, "step": 12905 }, { "epoch": 1.158309214511678, "grad_norm": 0.49269463622580223, "learning_rate": 7.707096405228697e-06, "loss": 0.3175, "step": 12906 }, { "epoch": 1.158398958964349, "grad_norm": 0.48007696434952823, "learning_rate": 7.70665738793879e-06, "loss": 0.4392, "step": 12907 }, { "epoch": 1.15848870341702, "grad_norm": 0.4911492069845698, "learning_rate": 7.706218341130624e-06, "loss": 0.366, "step": 12908 }, { "epoch": 1.158578447869691, "grad_norm": 0.4864126767655236, "learning_rate": 7.705779264808985e-06, "loss": 0.457, "step": 12909 }, { "epoch": 1.158668192322362, "grad_norm": 0.53695302572792, "learning_rate": 7.705340158978665e-06, "loss": 0.4656, "step": 12910 }, { "epoch": 1.1587579367750331, "grad_norm": 0.5307191577388116, "learning_rate": 7.704901023644453e-06, "loss": 0.4369, "step": 12911 }, { "epoch": 1.1588476812277042, "grad_norm": 0.5561821460416537, "learning_rate": 7.704461858811132e-06, "loss": 0.4053, "step": 12912 }, { "epoch": 1.158937425680375, "grad_norm": 0.49505386792575773, "learning_rate": 7.7040226644835e-06, "loss": 0.4343, "step": 12913 }, { "epoch": 1.1590271701330461, "grad_norm": 0.5288039090259528, "learning_rate": 7.70358344066634e-06, "loss": 0.4081, "step": 12914 }, { "epoch": 1.1591169145857172, "grad_norm": 0.5073681266820107, "learning_rate": 7.703144187364445e-06, "loss": 0.4341, "step": 12915 }, { "epoch": 1.1592066590383883, "grad_norm": 0.4724838355682666, "learning_rate": 7.702704904582604e-06, "loss": 0.3843, "step": 12916 }, { "epoch": 1.1592964034910591, "grad_norm": 0.4995401584590138, "learning_rate": 7.70226559232561e-06, "loss": 0.4583, "step": 12917 }, { "epoch": 1.1593861479437302, "grad_norm": 0.578200412197023, "learning_rate": 7.701826250598252e-06, "loss": 0.5109, "step": 12918 }, { "epoch": 1.1594758923964013, "grad_norm": 0.5238235637944514, "learning_rate": 7.701386879405324e-06, "loss": 0.3911, "step": 12919 }, { "epoch": 1.1595656368490723, "grad_norm": 0.5386115697799739, "learning_rate": 7.700947478751614e-06, "loss": 0.4913, "step": 12920 }, { "epoch": 1.1596553813017434, "grad_norm": 0.48425349025438646, "learning_rate": 7.700508048641918e-06, "loss": 0.3954, "step": 12921 }, { "epoch": 1.1597451257544142, "grad_norm": 0.5015137130162673, "learning_rate": 7.700068589081022e-06, "loss": 0.4473, "step": 12922 }, { "epoch": 1.1598348702070853, "grad_norm": 0.5505954360337493, "learning_rate": 7.699629100073727e-06, "loss": 0.4632, "step": 12923 }, { "epoch": 1.1599246146597564, "grad_norm": 0.5063449849882703, "learning_rate": 7.699189581624818e-06, "loss": 0.4038, "step": 12924 }, { "epoch": 1.1600143591124275, "grad_norm": 0.4971482361272827, "learning_rate": 7.698750033739096e-06, "loss": 0.3993, "step": 12925 }, { "epoch": 1.1601041035650983, "grad_norm": 0.5098479371288317, "learning_rate": 7.698310456421347e-06, "loss": 0.385, "step": 12926 }, { "epoch": 1.1601938480177694, "grad_norm": 0.46377738904759463, "learning_rate": 7.697870849676372e-06, "loss": 0.4372, "step": 12927 }, { "epoch": 1.1602835924704404, "grad_norm": 0.5732396267864214, "learning_rate": 7.697431213508959e-06, "loss": 0.3854, "step": 12928 }, { "epoch": 1.1603733369231115, "grad_norm": 0.49214549347513875, "learning_rate": 7.696991547923907e-06, "loss": 0.3585, "step": 12929 }, { "epoch": 1.1604630813757826, "grad_norm": 0.4555333828930817, "learning_rate": 7.696551852926009e-06, "loss": 0.4143, "step": 12930 }, { "epoch": 1.1605528258284534, "grad_norm": 0.5100010196259851, "learning_rate": 7.696112128520062e-06, "loss": 0.3849, "step": 12931 }, { "epoch": 1.1606425702811245, "grad_norm": 0.5070098450854733, "learning_rate": 7.695672374710859e-06, "loss": 0.3712, "step": 12932 }, { "epoch": 1.1607323147337956, "grad_norm": 0.4883497882249423, "learning_rate": 7.695232591503197e-06, "loss": 0.4891, "step": 12933 }, { "epoch": 1.1608220591864664, "grad_norm": 0.5114290280282329, "learning_rate": 7.694792778901872e-06, "loss": 0.3984, "step": 12934 }, { "epoch": 1.1609118036391375, "grad_norm": 0.4835764422275165, "learning_rate": 7.69435293691168e-06, "loss": 0.4166, "step": 12935 }, { "epoch": 1.1610015480918086, "grad_norm": 0.5013418480984654, "learning_rate": 7.693913065537419e-06, "loss": 0.3811, "step": 12936 }, { "epoch": 1.1610912925444796, "grad_norm": 0.47431675374588306, "learning_rate": 7.693473164783888e-06, "loss": 0.4172, "step": 12937 }, { "epoch": 1.1611810369971507, "grad_norm": 0.4936196347253688, "learning_rate": 7.69303323465588e-06, "loss": 0.4396, "step": 12938 }, { "epoch": 1.1612707814498215, "grad_norm": 0.5022680061094785, "learning_rate": 7.692593275158196e-06, "loss": 0.3833, "step": 12939 }, { "epoch": 1.1613605259024926, "grad_norm": 0.5053331660800698, "learning_rate": 7.692153286295632e-06, "loss": 0.4217, "step": 12940 }, { "epoch": 1.1614502703551637, "grad_norm": 0.4795089646707519, "learning_rate": 7.691713268072989e-06, "loss": 0.3655, "step": 12941 }, { "epoch": 1.1615400148078348, "grad_norm": 0.5241739284644171, "learning_rate": 7.691273220495063e-06, "loss": 0.4868, "step": 12942 }, { "epoch": 1.1616297592605056, "grad_norm": 0.53651766120611, "learning_rate": 7.690833143566656e-06, "loss": 0.4177, "step": 12943 }, { "epoch": 1.1617195037131767, "grad_norm": 0.46513631962007934, "learning_rate": 7.690393037292563e-06, "loss": 0.3523, "step": 12944 }, { "epoch": 1.1618092481658477, "grad_norm": 0.47434419580121695, "learning_rate": 7.689952901677588e-06, "loss": 0.3986, "step": 12945 }, { "epoch": 1.1618989926185188, "grad_norm": 0.4939969918836037, "learning_rate": 7.68951273672653e-06, "loss": 0.42, "step": 12946 }, { "epoch": 1.1619887370711899, "grad_norm": 0.49263057008683686, "learning_rate": 7.68907254244419e-06, "loss": 0.3824, "step": 12947 }, { "epoch": 1.1620784815238607, "grad_norm": 0.520902946268651, "learning_rate": 7.688632318835363e-06, "loss": 0.4459, "step": 12948 }, { "epoch": 1.1621682259765318, "grad_norm": 0.4806459873025358, "learning_rate": 7.688192065904859e-06, "loss": 0.4028, "step": 12949 }, { "epoch": 1.1622579704292029, "grad_norm": 0.45765214497582857, "learning_rate": 7.687751783657472e-06, "loss": 0.3256, "step": 12950 }, { "epoch": 1.162347714881874, "grad_norm": 0.4857977280701212, "learning_rate": 7.687311472098008e-06, "loss": 0.4573, "step": 12951 }, { "epoch": 1.1624374593345448, "grad_norm": 0.5545038094319662, "learning_rate": 7.686871131231267e-06, "loss": 0.4285, "step": 12952 }, { "epoch": 1.1625272037872159, "grad_norm": 0.4818418225863654, "learning_rate": 7.686430761062051e-06, "loss": 0.3832, "step": 12953 }, { "epoch": 1.162616948239887, "grad_norm": 0.48309832406601927, "learning_rate": 7.685990361595163e-06, "loss": 0.4148, "step": 12954 }, { "epoch": 1.162706692692558, "grad_norm": 0.4808018875142361, "learning_rate": 7.685549932835408e-06, "loss": 0.3704, "step": 12955 }, { "epoch": 1.162796437145229, "grad_norm": 0.5058411945628696, "learning_rate": 7.685109474787585e-06, "loss": 0.3901, "step": 12956 }, { "epoch": 1.1628861815979, "grad_norm": 0.45652889905099175, "learning_rate": 7.6846689874565e-06, "loss": 0.379, "step": 12957 }, { "epoch": 1.162975926050571, "grad_norm": 0.526076146079538, "learning_rate": 7.684228470846959e-06, "loss": 0.478, "step": 12958 }, { "epoch": 1.163065670503242, "grad_norm": 0.5011131446093824, "learning_rate": 7.683787924963763e-06, "loss": 0.4305, "step": 12959 }, { "epoch": 1.1631554149559131, "grad_norm": 0.5072539024740321, "learning_rate": 7.683347349811717e-06, "loss": 0.3899, "step": 12960 }, { "epoch": 1.163245159408584, "grad_norm": 0.4715856531074903, "learning_rate": 7.682906745395626e-06, "loss": 0.3631, "step": 12961 }, { "epoch": 1.163334903861255, "grad_norm": 0.5415212369474192, "learning_rate": 7.682466111720297e-06, "loss": 0.4247, "step": 12962 }, { "epoch": 1.1634246483139261, "grad_norm": 0.5245292976688475, "learning_rate": 7.682025448790533e-06, "loss": 0.4251, "step": 12963 }, { "epoch": 1.1635143927665972, "grad_norm": 0.4828932692188688, "learning_rate": 7.681584756611139e-06, "loss": 0.3606, "step": 12964 }, { "epoch": 1.1636041372192683, "grad_norm": 0.4889862555658486, "learning_rate": 7.681144035186925e-06, "loss": 0.4213, "step": 12965 }, { "epoch": 1.163693881671939, "grad_norm": 0.5231916150550479, "learning_rate": 7.680703284522694e-06, "loss": 0.419, "step": 12966 }, { "epoch": 1.1637836261246102, "grad_norm": 0.49329352807576343, "learning_rate": 7.680262504623254e-06, "loss": 0.3969, "step": 12967 }, { "epoch": 1.1638733705772812, "grad_norm": 0.4699462002033827, "learning_rate": 7.679821695493413e-06, "loss": 0.3625, "step": 12968 }, { "epoch": 1.163963115029952, "grad_norm": 0.49686836915472576, "learning_rate": 7.679380857137976e-06, "loss": 0.429, "step": 12969 }, { "epoch": 1.1640528594826232, "grad_norm": 0.5277603141219634, "learning_rate": 7.678939989561753e-06, "loss": 0.4168, "step": 12970 }, { "epoch": 1.1641426039352942, "grad_norm": 0.4909385023592629, "learning_rate": 7.67849909276955e-06, "loss": 0.3669, "step": 12971 }, { "epoch": 1.1642323483879653, "grad_norm": 0.5148147918960273, "learning_rate": 7.678058166766178e-06, "loss": 0.3981, "step": 12972 }, { "epoch": 1.1643220928406364, "grad_norm": 0.4911268251087019, "learning_rate": 7.677617211556444e-06, "loss": 0.4258, "step": 12973 }, { "epoch": 1.1644118372933072, "grad_norm": 0.4937523003910649, "learning_rate": 7.677176227145155e-06, "loss": 0.4284, "step": 12974 }, { "epoch": 1.1645015817459783, "grad_norm": 0.5054895139402796, "learning_rate": 7.676735213537123e-06, "loss": 0.4057, "step": 12975 }, { "epoch": 1.1645913261986494, "grad_norm": 0.48918194978550633, "learning_rate": 7.676294170737158e-06, "loss": 0.4225, "step": 12976 }, { "epoch": 1.1646810706513204, "grad_norm": 0.5143730430885933, "learning_rate": 7.675853098750068e-06, "loss": 0.4248, "step": 12977 }, { "epoch": 1.1647708151039913, "grad_norm": 0.4894104861073017, "learning_rate": 7.675411997580664e-06, "loss": 0.3329, "step": 12978 }, { "epoch": 1.1648605595566623, "grad_norm": 0.46841604954581256, "learning_rate": 7.674970867233758e-06, "loss": 0.4024, "step": 12979 }, { "epoch": 1.1649503040093334, "grad_norm": 0.45916307060137185, "learning_rate": 7.674529707714157e-06, "loss": 0.3212, "step": 12980 }, { "epoch": 1.1650400484620045, "grad_norm": 0.4548010084200056, "learning_rate": 7.674088519026676e-06, "loss": 0.4029, "step": 12981 }, { "epoch": 1.1651297929146756, "grad_norm": 0.5047430539434348, "learning_rate": 7.673647301176126e-06, "loss": 0.4135, "step": 12982 }, { "epoch": 1.1652195373673464, "grad_norm": 0.493649030036563, "learning_rate": 7.673206054167318e-06, "loss": 0.4079, "step": 12983 }, { "epoch": 1.1653092818200175, "grad_norm": 0.5133183992874809, "learning_rate": 7.672764778005065e-06, "loss": 0.4119, "step": 12984 }, { "epoch": 1.1653990262726885, "grad_norm": 0.5215174031785184, "learning_rate": 7.672323472694175e-06, "loss": 0.3735, "step": 12985 }, { "epoch": 1.1654887707253596, "grad_norm": 0.49179752615561223, "learning_rate": 7.671882138239468e-06, "loss": 0.4695, "step": 12986 }, { "epoch": 1.1655785151780305, "grad_norm": 0.5514856581434245, "learning_rate": 7.671440774645752e-06, "loss": 0.4101, "step": 12987 }, { "epoch": 1.1656682596307015, "grad_norm": 0.48368393411761124, "learning_rate": 7.670999381917842e-06, "loss": 0.3994, "step": 12988 }, { "epoch": 1.1657580040833726, "grad_norm": 0.49333619267514306, "learning_rate": 7.670557960060553e-06, "loss": 0.4292, "step": 12989 }, { "epoch": 1.1658477485360437, "grad_norm": 0.5041826001201085, "learning_rate": 7.670116509078697e-06, "loss": 0.4088, "step": 12990 }, { "epoch": 1.1659374929887147, "grad_norm": 0.5108990080079205, "learning_rate": 7.669675028977089e-06, "loss": 0.407, "step": 12991 }, { "epoch": 1.1660272374413856, "grad_norm": 0.45856488691414676, "learning_rate": 7.669233519760544e-06, "loss": 0.3872, "step": 12992 }, { "epoch": 1.1661169818940567, "grad_norm": 0.46585997199512985, "learning_rate": 7.668791981433876e-06, "loss": 0.3638, "step": 12993 }, { "epoch": 1.1662067263467277, "grad_norm": 0.5143084826866362, "learning_rate": 7.6683504140019e-06, "loss": 0.4073, "step": 12994 }, { "epoch": 1.1662964707993988, "grad_norm": 0.46400294954915744, "learning_rate": 7.667908817469436e-06, "loss": 0.3483, "step": 12995 }, { "epoch": 1.1663862152520696, "grad_norm": 0.48240434620302364, "learning_rate": 7.667467191841296e-06, "loss": 0.4104, "step": 12996 }, { "epoch": 1.1664759597047407, "grad_norm": 0.49886761512455113, "learning_rate": 7.667025537122297e-06, "loss": 0.4273, "step": 12997 }, { "epoch": 1.1665657041574118, "grad_norm": 0.5564064912807323, "learning_rate": 7.666583853317255e-06, "loss": 0.3896, "step": 12998 }, { "epoch": 1.1666554486100829, "grad_norm": 0.47831611587338224, "learning_rate": 7.666142140430987e-06, "loss": 0.3656, "step": 12999 }, { "epoch": 1.166745193062754, "grad_norm": 0.4988751626428581, "learning_rate": 7.665700398468312e-06, "loss": 0.3863, "step": 13000 }, { "epoch": 1.1668349375154248, "grad_norm": 0.5077708594615993, "learning_rate": 7.665258627434046e-06, "loss": 0.3497, "step": 13001 }, { "epoch": 1.1669246819680958, "grad_norm": 0.5285536652943327, "learning_rate": 7.664816827333008e-06, "loss": 0.4124, "step": 13002 }, { "epoch": 1.167014426420767, "grad_norm": 0.49224155217255144, "learning_rate": 7.664374998170015e-06, "loss": 0.3865, "step": 13003 }, { "epoch": 1.1671041708734378, "grad_norm": 0.47761877039981293, "learning_rate": 7.663933139949885e-06, "loss": 0.3527, "step": 13004 }, { "epoch": 1.1671939153261088, "grad_norm": 0.4641521397608732, "learning_rate": 7.66349125267744e-06, "loss": 0.3874, "step": 13005 }, { "epoch": 1.16728365977878, "grad_norm": 0.5036062110967915, "learning_rate": 7.663049336357493e-06, "loss": 0.4258, "step": 13006 }, { "epoch": 1.167373404231451, "grad_norm": 0.4718064312193332, "learning_rate": 7.66260739099487e-06, "loss": 0.329, "step": 13007 }, { "epoch": 1.167463148684122, "grad_norm": 0.4718777653246433, "learning_rate": 7.662165416594388e-06, "loss": 0.4492, "step": 13008 }, { "epoch": 1.167552893136793, "grad_norm": 0.5567696370178906, "learning_rate": 7.661723413160867e-06, "loss": 0.4098, "step": 13009 }, { "epoch": 1.167642637589464, "grad_norm": 0.48850863238592607, "learning_rate": 7.661281380699128e-06, "loss": 0.3607, "step": 13010 }, { "epoch": 1.167732382042135, "grad_norm": 0.5060745640294692, "learning_rate": 7.660839319213991e-06, "loss": 0.3943, "step": 13011 }, { "epoch": 1.167822126494806, "grad_norm": 0.49275610639898787, "learning_rate": 7.660397228710277e-06, "loss": 0.4368, "step": 13012 }, { "epoch": 1.167911870947477, "grad_norm": 0.4976537995831822, "learning_rate": 7.659955109192808e-06, "loss": 0.4392, "step": 13013 }, { "epoch": 1.168001615400148, "grad_norm": 0.5310882930109506, "learning_rate": 7.659512960666405e-06, "loss": 0.3763, "step": 13014 }, { "epoch": 1.168091359852819, "grad_norm": 0.5403432444333227, "learning_rate": 7.659070783135889e-06, "loss": 0.4504, "step": 13015 }, { "epoch": 1.1681811043054902, "grad_norm": 0.49598953010593716, "learning_rate": 7.658628576606086e-06, "loss": 0.3809, "step": 13016 }, { "epoch": 1.1682708487581612, "grad_norm": 0.4799565137989277, "learning_rate": 7.658186341081815e-06, "loss": 0.3874, "step": 13017 }, { "epoch": 1.168360593210832, "grad_norm": 0.4862148513873543, "learning_rate": 7.6577440765679e-06, "loss": 0.4362, "step": 13018 }, { "epoch": 1.1684503376635031, "grad_norm": 0.5347767218681323, "learning_rate": 7.657301783069166e-06, "loss": 0.4056, "step": 13019 }, { "epoch": 1.1685400821161742, "grad_norm": 0.5113950466240705, "learning_rate": 7.656859460590432e-06, "loss": 0.4364, "step": 13020 }, { "epoch": 1.1686298265688453, "grad_norm": 0.5099832074426152, "learning_rate": 7.656417109136528e-06, "loss": 0.3549, "step": 13021 }, { "epoch": 1.1687195710215161, "grad_norm": 0.4905747542203152, "learning_rate": 7.655974728712272e-06, "loss": 0.4327, "step": 13022 }, { "epoch": 1.1688093154741872, "grad_norm": 0.5129283588495513, "learning_rate": 7.655532319322494e-06, "loss": 0.4444, "step": 13023 }, { "epoch": 1.1688990599268583, "grad_norm": 0.5132987337291041, "learning_rate": 7.655089880972015e-06, "loss": 0.4131, "step": 13024 }, { "epoch": 1.1689888043795293, "grad_norm": 0.5429585792811563, "learning_rate": 7.654647413665663e-06, "loss": 0.3581, "step": 13025 }, { "epoch": 1.1690785488322004, "grad_norm": 0.4936512915130166, "learning_rate": 7.65420491740826e-06, "loss": 0.4165, "step": 13026 }, { "epoch": 1.1691682932848713, "grad_norm": 0.5018825288297359, "learning_rate": 7.653762392204636e-06, "loss": 0.4225, "step": 13027 }, { "epoch": 1.1692580377375423, "grad_norm": 0.5510584237086704, "learning_rate": 7.653319838059614e-06, "loss": 0.4887, "step": 13028 }, { "epoch": 1.1693477821902134, "grad_norm": 0.5038922639675469, "learning_rate": 7.652877254978021e-06, "loss": 0.4033, "step": 13029 }, { "epoch": 1.1694375266428845, "grad_norm": 0.5011367608936234, "learning_rate": 7.652434642964683e-06, "loss": 0.4023, "step": 13030 }, { "epoch": 1.1695272710955553, "grad_norm": 0.46874312541370333, "learning_rate": 7.65199200202443e-06, "loss": 0.3684, "step": 13031 }, { "epoch": 1.1696170155482264, "grad_norm": 0.4686912364172749, "learning_rate": 7.651549332162085e-06, "loss": 0.4357, "step": 13032 }, { "epoch": 1.1697067600008975, "grad_norm": 0.5055734262406858, "learning_rate": 7.651106633382481e-06, "loss": 0.4219, "step": 13033 }, { "epoch": 1.1697965044535685, "grad_norm": 0.542347162442136, "learning_rate": 7.650663905690441e-06, "loss": 0.4604, "step": 13034 }, { "epoch": 1.1698862489062396, "grad_norm": 0.5298822152808337, "learning_rate": 7.650221149090796e-06, "loss": 0.4556, "step": 13035 }, { "epoch": 1.1699759933589104, "grad_norm": 0.4637777999391698, "learning_rate": 7.649778363588373e-06, "loss": 0.3335, "step": 13036 }, { "epoch": 1.1700657378115815, "grad_norm": 0.4569685144381269, "learning_rate": 7.649335549188005e-06, "loss": 0.3575, "step": 13037 }, { "epoch": 1.1701554822642526, "grad_norm": 0.45578550372675003, "learning_rate": 7.648892705894514e-06, "loss": 0.3877, "step": 13038 }, { "epoch": 1.1702452267169234, "grad_norm": 0.4891982451602601, "learning_rate": 7.648449833712736e-06, "loss": 0.362, "step": 13039 }, { "epoch": 1.1703349711695945, "grad_norm": 0.4911509719346557, "learning_rate": 7.648006932647499e-06, "loss": 0.3636, "step": 13040 }, { "epoch": 1.1704247156222656, "grad_norm": 0.47441776545767017, "learning_rate": 7.647564002703632e-06, "loss": 0.3727, "step": 13041 }, { "epoch": 1.1705144600749366, "grad_norm": 0.4828475219591665, "learning_rate": 7.647121043885964e-06, "loss": 0.4528, "step": 13042 }, { "epoch": 1.1706042045276077, "grad_norm": 0.5620607643928033, "learning_rate": 7.64667805619933e-06, "loss": 0.4127, "step": 13043 }, { "epoch": 1.1706939489802786, "grad_norm": 0.5375633315260063, "learning_rate": 7.646235039648559e-06, "loss": 0.4435, "step": 13044 }, { "epoch": 1.1707836934329496, "grad_norm": 0.5157662848662812, "learning_rate": 7.645791994238484e-06, "loss": 0.3984, "step": 13045 }, { "epoch": 1.1708734378856207, "grad_norm": 0.5004693596323085, "learning_rate": 7.645348919973933e-06, "loss": 0.4059, "step": 13046 }, { "epoch": 1.1709631823382918, "grad_norm": 0.5943615867657187, "learning_rate": 7.644905816859742e-06, "loss": 0.4784, "step": 13047 }, { "epoch": 1.1710529267909626, "grad_norm": 0.5331168353806559, "learning_rate": 7.644462684900743e-06, "loss": 0.39, "step": 13048 }, { "epoch": 1.1711426712436337, "grad_norm": 0.47464432967159054, "learning_rate": 7.644019524101767e-06, "loss": 0.4175, "step": 13049 }, { "epoch": 1.1712324156963048, "grad_norm": 0.5358386770126817, "learning_rate": 7.643576334467646e-06, "loss": 0.3732, "step": 13050 }, { "epoch": 1.1713221601489758, "grad_norm": 0.43921219671970557, "learning_rate": 7.643133116003217e-06, "loss": 0.3395, "step": 13051 }, { "epoch": 1.171411904601647, "grad_norm": 0.4810150450852024, "learning_rate": 7.642689868713308e-06, "loss": 0.3913, "step": 13052 }, { "epoch": 1.1715016490543178, "grad_norm": 0.5351061430671763, "learning_rate": 7.64224659260276e-06, "loss": 0.4708, "step": 13053 }, { "epoch": 1.1715913935069888, "grad_norm": 0.5279895537686873, "learning_rate": 7.641803287676404e-06, "loss": 0.467, "step": 13054 }, { "epoch": 1.17168113795966, "grad_norm": 0.5242768849445145, "learning_rate": 7.641359953939073e-06, "loss": 0.4004, "step": 13055 }, { "epoch": 1.171770882412331, "grad_norm": 0.5033373265102347, "learning_rate": 7.640916591395604e-06, "loss": 0.3575, "step": 13056 }, { "epoch": 1.1718606268650018, "grad_norm": 0.4419395737162383, "learning_rate": 7.64047320005083e-06, "loss": 0.3713, "step": 13057 }, { "epoch": 1.1719503713176729, "grad_norm": 0.530299628101253, "learning_rate": 7.640029779909588e-06, "loss": 0.4397, "step": 13058 }, { "epoch": 1.172040115770344, "grad_norm": 0.49044585162618365, "learning_rate": 7.639586330976717e-06, "loss": 0.4569, "step": 13059 }, { "epoch": 1.172129860223015, "grad_norm": 0.5051318991664143, "learning_rate": 7.639142853257048e-06, "loss": 0.4179, "step": 13060 }, { "epoch": 1.172219604675686, "grad_norm": 0.47640664382754405, "learning_rate": 7.63869934675542e-06, "loss": 0.4097, "step": 13061 }, { "epoch": 1.172309349128357, "grad_norm": 0.5299955885679154, "learning_rate": 7.638255811476669e-06, "loss": 0.4162, "step": 13062 }, { "epoch": 1.172399093581028, "grad_norm": 0.5214320910450668, "learning_rate": 7.637812247425633e-06, "loss": 0.4102, "step": 13063 }, { "epoch": 1.172488838033699, "grad_norm": 0.5174755461028524, "learning_rate": 7.63736865460715e-06, "loss": 0.3492, "step": 13064 }, { "epoch": 1.1725785824863701, "grad_norm": 0.4884402398697534, "learning_rate": 7.636925033026055e-06, "loss": 0.4135, "step": 13065 }, { "epoch": 1.172668326939041, "grad_norm": 0.47866704973638075, "learning_rate": 7.636481382687187e-06, "loss": 0.4172, "step": 13066 }, { "epoch": 1.172758071391712, "grad_norm": 0.5008677580764227, "learning_rate": 7.636037703595387e-06, "loss": 0.3948, "step": 13067 }, { "epoch": 1.1728478158443831, "grad_norm": 0.5449046476979593, "learning_rate": 7.63559399575549e-06, "loss": 0.4005, "step": 13068 }, { "epoch": 1.1729375602970542, "grad_norm": 0.4594525986405826, "learning_rate": 7.635150259172338e-06, "loss": 0.3381, "step": 13069 }, { "epoch": 1.1730273047497253, "grad_norm": 0.4682190437630496, "learning_rate": 7.63470649385077e-06, "loss": 0.3842, "step": 13070 }, { "epoch": 1.1731170492023961, "grad_norm": 0.4827028167425918, "learning_rate": 7.634262699795623e-06, "loss": 0.4377, "step": 13071 }, { "epoch": 1.1732067936550672, "grad_norm": 0.5066341491591013, "learning_rate": 7.633818877011738e-06, "loss": 0.4326, "step": 13072 }, { "epoch": 1.1732965381077383, "grad_norm": 0.4991581213676585, "learning_rate": 7.633375025503957e-06, "loss": 0.3772, "step": 13073 }, { "epoch": 1.173386282560409, "grad_norm": 0.5403909744285259, "learning_rate": 7.632931145277119e-06, "loss": 0.3731, "step": 13074 }, { "epoch": 1.1734760270130802, "grad_norm": 0.45426982313918757, "learning_rate": 7.632487236336066e-06, "loss": 0.4056, "step": 13075 }, { "epoch": 1.1735657714657513, "grad_norm": 0.5478141917692531, "learning_rate": 7.632043298685637e-06, "loss": 0.4192, "step": 13076 }, { "epoch": 1.1736555159184223, "grad_norm": 0.5667023265385305, "learning_rate": 7.631599332330675e-06, "loss": 0.4382, "step": 13077 }, { "epoch": 1.1737452603710934, "grad_norm": 0.504583399544307, "learning_rate": 7.631155337276023e-06, "loss": 0.3487, "step": 13078 }, { "epoch": 1.1738350048237642, "grad_norm": 0.47091050139017715, "learning_rate": 7.630711313526523e-06, "loss": 0.4119, "step": 13079 }, { "epoch": 1.1739247492764353, "grad_norm": 0.5155328183937482, "learning_rate": 7.630267261087013e-06, "loss": 0.444, "step": 13080 }, { "epoch": 1.1740144937291064, "grad_norm": 0.5114162120710523, "learning_rate": 7.62982317996234e-06, "loss": 0.3869, "step": 13081 }, { "epoch": 1.1741042381817774, "grad_norm": 0.48724297169796127, "learning_rate": 7.629379070157348e-06, "loss": 0.4236, "step": 13082 }, { "epoch": 1.1741939826344483, "grad_norm": 0.45412388797994024, "learning_rate": 7.628934931676876e-06, "loss": 0.3761, "step": 13083 }, { "epoch": 1.1742837270871194, "grad_norm": 0.5138952697323448, "learning_rate": 7.628490764525771e-06, "loss": 0.421, "step": 13084 }, { "epoch": 1.1743734715397904, "grad_norm": 0.5079885521412915, "learning_rate": 7.628046568708877e-06, "loss": 0.3998, "step": 13085 }, { "epoch": 1.1744632159924615, "grad_norm": 0.5111762491947434, "learning_rate": 7.627602344231036e-06, "loss": 0.4474, "step": 13086 }, { "epoch": 1.1745529604451326, "grad_norm": 0.5347881981168034, "learning_rate": 7.6271580910970955e-06, "loss": 0.4217, "step": 13087 }, { "epoch": 1.1746427048978034, "grad_norm": 0.5304221291885396, "learning_rate": 7.6267138093118985e-06, "loss": 0.4825, "step": 13088 }, { "epoch": 1.1747324493504745, "grad_norm": 0.5178112145382515, "learning_rate": 7.62626949888029e-06, "loss": 0.4141, "step": 13089 }, { "epoch": 1.1748221938031456, "grad_norm": 0.5076229716880105, "learning_rate": 7.625825159807118e-06, "loss": 0.3895, "step": 13090 }, { "epoch": 1.1749119382558166, "grad_norm": 0.49295951229743495, "learning_rate": 7.625380792097225e-06, "loss": 0.3861, "step": 13091 }, { "epoch": 1.1750016827084875, "grad_norm": 0.4914598588455284, "learning_rate": 7.624936395755458e-06, "loss": 0.4253, "step": 13092 }, { "epoch": 1.1750914271611586, "grad_norm": 0.5159222168602544, "learning_rate": 7.624491970786667e-06, "loss": 0.4948, "step": 13093 }, { "epoch": 1.1751811716138296, "grad_norm": 0.5267227355063655, "learning_rate": 7.624047517195694e-06, "loss": 0.3877, "step": 13094 }, { "epoch": 1.1752709160665007, "grad_norm": 0.4792783333481258, "learning_rate": 7.6236030349873905e-06, "loss": 0.3925, "step": 13095 }, { "epoch": 1.1753606605191718, "grad_norm": 0.5061317424227451, "learning_rate": 7.623158524166599e-06, "loss": 0.3661, "step": 13096 }, { "epoch": 1.1754504049718426, "grad_norm": 0.5461457821000896, "learning_rate": 7.622713984738173e-06, "loss": 0.4213, "step": 13097 }, { "epoch": 1.1755401494245137, "grad_norm": 0.48084329299753836, "learning_rate": 7.622269416706955e-06, "loss": 0.4267, "step": 13098 }, { "epoch": 1.1756298938771848, "grad_norm": 0.48296037754674787, "learning_rate": 7.621824820077797e-06, "loss": 0.4042, "step": 13099 }, { "epoch": 1.1757196383298558, "grad_norm": 0.5120476554453166, "learning_rate": 7.621380194855545e-06, "loss": 0.3642, "step": 13100 }, { "epoch": 1.1758093827825267, "grad_norm": 0.47535942059527475, "learning_rate": 7.620935541045052e-06, "loss": 0.4268, "step": 13101 }, { "epoch": 1.1758991272351977, "grad_norm": 0.5340882147304065, "learning_rate": 7.620490858651164e-06, "loss": 0.4811, "step": 13102 }, { "epoch": 1.1759888716878688, "grad_norm": 0.4991910859452693, "learning_rate": 7.620046147678731e-06, "loss": 0.3723, "step": 13103 }, { "epoch": 1.1760786161405399, "grad_norm": 0.47803279120354103, "learning_rate": 7.619601408132605e-06, "loss": 0.4435, "step": 13104 }, { "epoch": 1.176168360593211, "grad_norm": 0.4942855473235862, "learning_rate": 7.619156640017633e-06, "loss": 0.3863, "step": 13105 }, { "epoch": 1.1762581050458818, "grad_norm": 0.45979355178867876, "learning_rate": 7.618711843338666e-06, "loss": 0.3683, "step": 13106 }, { "epoch": 1.1763478494985529, "grad_norm": 0.4746808482254755, "learning_rate": 7.618267018100559e-06, "loss": 0.3846, "step": 13107 }, { "epoch": 1.176437593951224, "grad_norm": 0.478105516409608, "learning_rate": 7.617822164308159e-06, "loss": 0.4166, "step": 13108 }, { "epoch": 1.1765273384038948, "grad_norm": 0.5017588494506984, "learning_rate": 7.617377281966318e-06, "loss": 0.4159, "step": 13109 }, { "epoch": 1.1766170828565659, "grad_norm": 0.49991508607756335, "learning_rate": 7.616932371079889e-06, "loss": 0.4397, "step": 13110 }, { "epoch": 1.176706827309237, "grad_norm": 0.553878749194926, "learning_rate": 7.616487431653725e-06, "loss": 0.4676, "step": 13111 }, { "epoch": 1.176796571761908, "grad_norm": 0.4981421796192341, "learning_rate": 7.616042463692675e-06, "loss": 0.3783, "step": 13112 }, { "epoch": 1.176886316214579, "grad_norm": 0.48516754783143334, "learning_rate": 7.615597467201595e-06, "loss": 0.4159, "step": 13113 }, { "epoch": 1.17697606066725, "grad_norm": 0.5386442699816631, "learning_rate": 7.615152442185336e-06, "loss": 0.4025, "step": 13114 }, { "epoch": 1.177065805119921, "grad_norm": 0.485713804426702, "learning_rate": 7.614707388648755e-06, "loss": 0.3713, "step": 13115 }, { "epoch": 1.177155549572592, "grad_norm": 0.4789196538115661, "learning_rate": 7.614262306596698e-06, "loss": 0.3731, "step": 13116 }, { "epoch": 1.1772452940252631, "grad_norm": 0.5213130210958411, "learning_rate": 7.613817196034028e-06, "loss": 0.4332, "step": 13117 }, { "epoch": 1.177335038477934, "grad_norm": 0.5043693361947421, "learning_rate": 7.613372056965593e-06, "loss": 0.4332, "step": 13118 }, { "epoch": 1.177424782930605, "grad_norm": 0.4714872556930091, "learning_rate": 7.6129268893962505e-06, "loss": 0.389, "step": 13119 }, { "epoch": 1.177514527383276, "grad_norm": 0.497134079422044, "learning_rate": 7.6124816933308535e-06, "loss": 0.3936, "step": 13120 }, { "epoch": 1.1776042718359472, "grad_norm": 0.4781879901272433, "learning_rate": 7.61203646877426e-06, "loss": 0.4361, "step": 13121 }, { "epoch": 1.1776940162886183, "grad_norm": 0.5418587261758574, "learning_rate": 7.6115912157313224e-06, "loss": 0.3966, "step": 13122 }, { "epoch": 1.177783760741289, "grad_norm": 0.45476543441633316, "learning_rate": 7.611145934206899e-06, "loss": 0.3366, "step": 13123 }, { "epoch": 1.1778735051939602, "grad_norm": 0.47161939686132626, "learning_rate": 7.610700624205845e-06, "loss": 0.4136, "step": 13124 }, { "epoch": 1.1779632496466312, "grad_norm": 0.4978922929643827, "learning_rate": 7.610255285733015e-06, "loss": 0.3573, "step": 13125 }, { "epoch": 1.1780529940993023, "grad_norm": 0.5504459612391513, "learning_rate": 7.609809918793269e-06, "loss": 0.4959, "step": 13126 }, { "epoch": 1.1781427385519732, "grad_norm": 0.47760429330149445, "learning_rate": 7.609364523391463e-06, "loss": 0.3791, "step": 13127 }, { "epoch": 1.1782324830046442, "grad_norm": 0.475325843200968, "learning_rate": 7.608919099532453e-06, "loss": 0.4146, "step": 13128 }, { "epoch": 1.1783222274573153, "grad_norm": 0.4540125839887598, "learning_rate": 7.608473647221098e-06, "loss": 0.3632, "step": 13129 }, { "epoch": 1.1784119719099864, "grad_norm": 0.5029950444531704, "learning_rate": 7.608028166462257e-06, "loss": 0.3653, "step": 13130 }, { "epoch": 1.1785017163626574, "grad_norm": 0.4735057232328065, "learning_rate": 7.607582657260786e-06, "loss": 0.3589, "step": 13131 }, { "epoch": 1.1785914608153283, "grad_norm": 0.46106645668889706, "learning_rate": 7.607137119621546e-06, "loss": 0.3529, "step": 13132 }, { "epoch": 1.1786812052679994, "grad_norm": 0.462057402085151, "learning_rate": 7.606691553549392e-06, "loss": 0.3746, "step": 13133 }, { "epoch": 1.1787709497206704, "grad_norm": 0.47091488161492634, "learning_rate": 7.6062459590491886e-06, "loss": 0.3535, "step": 13134 }, { "epoch": 1.1788606941733415, "grad_norm": 0.4820706962474619, "learning_rate": 7.605800336125792e-06, "loss": 0.4155, "step": 13135 }, { "epoch": 1.1789504386260123, "grad_norm": 0.4697776350380511, "learning_rate": 7.605354684784063e-06, "loss": 0.3696, "step": 13136 }, { "epoch": 1.1790401830786834, "grad_norm": 0.5123662679675941, "learning_rate": 7.60490900502886e-06, "loss": 0.4106, "step": 13137 }, { "epoch": 1.1791299275313545, "grad_norm": 0.5069062221242258, "learning_rate": 7.604463296865046e-06, "loss": 0.4168, "step": 13138 }, { "epoch": 1.1792196719840256, "grad_norm": 0.5064176257329346, "learning_rate": 7.604017560297481e-06, "loss": 0.4108, "step": 13139 }, { "epoch": 1.1793094164366966, "grad_norm": 0.49302590633451127, "learning_rate": 7.603571795331026e-06, "loss": 0.3771, "step": 13140 }, { "epoch": 1.1793991608893675, "grad_norm": 0.5092331757350076, "learning_rate": 7.603126001970542e-06, "loss": 0.4501, "step": 13141 }, { "epoch": 1.1794889053420385, "grad_norm": 0.528298151352436, "learning_rate": 7.6026801802208915e-06, "loss": 0.3602, "step": 13142 }, { "epoch": 1.1795786497947096, "grad_norm": 0.4302883567521878, "learning_rate": 7.602234330086936e-06, "loss": 0.3734, "step": 13143 }, { "epoch": 1.1796683942473807, "grad_norm": 0.48967519046480334, "learning_rate": 7.601788451573538e-06, "loss": 0.3973, "step": 13144 }, { "epoch": 1.1797581387000515, "grad_norm": 0.49418398535126495, "learning_rate": 7.601342544685559e-06, "loss": 0.4132, "step": 13145 }, { "epoch": 1.1798478831527226, "grad_norm": 0.48145701696502663, "learning_rate": 7.600896609427865e-06, "loss": 0.4213, "step": 13146 }, { "epoch": 1.1799376276053937, "grad_norm": 0.47459494768226296, "learning_rate": 7.600450645805317e-06, "loss": 0.3665, "step": 13147 }, { "epoch": 1.1800273720580647, "grad_norm": 0.48343133468201405, "learning_rate": 7.600004653822779e-06, "loss": 0.3889, "step": 13148 }, { "epoch": 1.1801171165107356, "grad_norm": 0.48305495175777796, "learning_rate": 7.599558633485115e-06, "loss": 0.4455, "step": 13149 }, { "epoch": 1.1802068609634067, "grad_norm": 0.4955223857000845, "learning_rate": 7.599112584797188e-06, "loss": 0.3963, "step": 13150 }, { "epoch": 1.1802966054160777, "grad_norm": 0.5756458556589857, "learning_rate": 7.598666507763866e-06, "loss": 0.5166, "step": 13151 }, { "epoch": 1.1803863498687488, "grad_norm": 0.5820025528635454, "learning_rate": 7.598220402390008e-06, "loss": 0.4482, "step": 13152 }, { "epoch": 1.1804760943214196, "grad_norm": 0.4808133990440654, "learning_rate": 7.597774268680485e-06, "loss": 0.3772, "step": 13153 }, { "epoch": 1.1805658387740907, "grad_norm": 0.5372944332970753, "learning_rate": 7.59732810664016e-06, "loss": 0.4686, "step": 13154 }, { "epoch": 1.1806555832267618, "grad_norm": 0.520285496970946, "learning_rate": 7.5968819162738975e-06, "loss": 0.4154, "step": 13155 }, { "epoch": 1.1807453276794329, "grad_norm": 0.5182607282829316, "learning_rate": 7.596435697586566e-06, "loss": 0.4591, "step": 13156 }, { "epoch": 1.180835072132104, "grad_norm": 0.5514971709319918, "learning_rate": 7.59598945058303e-06, "loss": 0.4018, "step": 13157 }, { "epoch": 1.1809248165847748, "grad_norm": 0.4607449482291293, "learning_rate": 7.595543175268157e-06, "loss": 0.3412, "step": 13158 }, { "epoch": 1.1810145610374458, "grad_norm": 0.4871758643322676, "learning_rate": 7.595096871646814e-06, "loss": 0.443, "step": 13159 }, { "epoch": 1.181104305490117, "grad_norm": 0.5415353097071929, "learning_rate": 7.594650539723869e-06, "loss": 0.4816, "step": 13160 }, { "epoch": 1.181194049942788, "grad_norm": 0.5117233409944506, "learning_rate": 7.594204179504188e-06, "loss": 0.3671, "step": 13161 }, { "epoch": 1.1812837943954588, "grad_norm": 0.4998880654591861, "learning_rate": 7.59375779099264e-06, "loss": 0.3921, "step": 13162 }, { "epoch": 1.18137353884813, "grad_norm": 0.4648685350366586, "learning_rate": 7.593311374194092e-06, "loss": 0.3664, "step": 13163 }, { "epoch": 1.181463283300801, "grad_norm": 0.456747213252449, "learning_rate": 7.592864929113415e-06, "loss": 0.3702, "step": 13164 }, { "epoch": 1.181553027753472, "grad_norm": 0.5131508371113748, "learning_rate": 7.592418455755475e-06, "loss": 0.4293, "step": 13165 }, { "epoch": 1.181642772206143, "grad_norm": 0.45276800580365506, "learning_rate": 7.591971954125143e-06, "loss": 0.3732, "step": 13166 }, { "epoch": 1.181732516658814, "grad_norm": 0.5058556238380427, "learning_rate": 7.5915254242272886e-06, "loss": 0.4104, "step": 13167 }, { "epoch": 1.181822261111485, "grad_norm": 0.47220200089398456, "learning_rate": 7.591078866066779e-06, "loss": 0.349, "step": 13168 }, { "epoch": 1.181912005564156, "grad_norm": 0.49886565739767674, "learning_rate": 7.590632279648488e-06, "loss": 0.4332, "step": 13169 }, { "epoch": 1.1820017500168272, "grad_norm": 0.4898605091893199, "learning_rate": 7.590185664977283e-06, "loss": 0.3638, "step": 13170 }, { "epoch": 1.182091494469498, "grad_norm": 0.4849230127813061, "learning_rate": 7.589739022058036e-06, "loss": 0.3657, "step": 13171 }, { "epoch": 1.182181238922169, "grad_norm": 0.49914013739220714, "learning_rate": 7.589292350895619e-06, "loss": 0.3739, "step": 13172 }, { "epoch": 1.1822709833748402, "grad_norm": 0.4505629921744638, "learning_rate": 7.588845651494902e-06, "loss": 0.4185, "step": 13173 }, { "epoch": 1.1823607278275112, "grad_norm": 0.5141118241654522, "learning_rate": 7.5883989238607545e-06, "loss": 0.3496, "step": 13174 }, { "epoch": 1.1824504722801823, "grad_norm": 0.4498329110581942, "learning_rate": 7.587952167998053e-06, "loss": 0.35, "step": 13175 }, { "epoch": 1.1825402167328531, "grad_norm": 0.4827893491793862, "learning_rate": 7.587505383911666e-06, "loss": 0.3685, "step": 13176 }, { "epoch": 1.1826299611855242, "grad_norm": 0.5034755790835309, "learning_rate": 7.587058571606468e-06, "loss": 0.431, "step": 13177 }, { "epoch": 1.1827197056381953, "grad_norm": 0.5149864446941617, "learning_rate": 7.5866117310873314e-06, "loss": 0.3925, "step": 13178 }, { "epoch": 1.1828094500908664, "grad_norm": 0.4911773868202307, "learning_rate": 7.5861648623591285e-06, "loss": 0.3948, "step": 13179 }, { "epoch": 1.1828991945435372, "grad_norm": 0.5033041677045004, "learning_rate": 7.585717965426734e-06, "loss": 0.3619, "step": 13180 }, { "epoch": 1.1829889389962083, "grad_norm": 0.4920608839941777, "learning_rate": 7.585271040295022e-06, "loss": 0.3488, "step": 13181 }, { "epoch": 1.1830786834488793, "grad_norm": 0.46549846186517224, "learning_rate": 7.5848240869688635e-06, "loss": 0.4837, "step": 13182 }, { "epoch": 1.1831684279015504, "grad_norm": 0.5143948871649787, "learning_rate": 7.584377105453138e-06, "loss": 0.359, "step": 13183 }, { "epoch": 1.1832581723542213, "grad_norm": 0.4924408969867176, "learning_rate": 7.583930095752714e-06, "loss": 0.3987, "step": 13184 }, { "epoch": 1.1833479168068923, "grad_norm": 0.47998512411720357, "learning_rate": 7.583483057872472e-06, "loss": 0.3816, "step": 13185 }, { "epoch": 1.1834376612595634, "grad_norm": 0.4938360386039535, "learning_rate": 7.583035991817285e-06, "loss": 0.4033, "step": 13186 }, { "epoch": 1.1835274057122345, "grad_norm": 0.47966766383704024, "learning_rate": 7.582588897592027e-06, "loss": 0.4015, "step": 13187 }, { "epoch": 1.1836171501649053, "grad_norm": 0.499030429528668, "learning_rate": 7.582141775201577e-06, "loss": 0.4499, "step": 13188 }, { "epoch": 1.1837068946175764, "grad_norm": 0.505858500525095, "learning_rate": 7.581694624650811e-06, "loss": 0.3728, "step": 13189 }, { "epoch": 1.1837966390702475, "grad_norm": 0.5070152304317845, "learning_rate": 7.581247445944601e-06, "loss": 0.3713, "step": 13190 }, { "epoch": 1.1838863835229185, "grad_norm": 0.4967249808744552, "learning_rate": 7.580800239087831e-06, "loss": 0.4277, "step": 13191 }, { "epoch": 1.1839761279755896, "grad_norm": 0.5038468296878893, "learning_rate": 7.580353004085372e-06, "loss": 0.4581, "step": 13192 }, { "epoch": 1.1840658724282604, "grad_norm": 0.5165713439457609, "learning_rate": 7.579905740942107e-06, "loss": 0.4058, "step": 13193 }, { "epoch": 1.1841556168809315, "grad_norm": 0.48771532786792904, "learning_rate": 7.579458449662906e-06, "loss": 0.3467, "step": 13194 }, { "epoch": 1.1842453613336026, "grad_norm": 0.4848507535951044, "learning_rate": 7.579011130252655e-06, "loss": 0.4605, "step": 13195 }, { "epoch": 1.1843351057862737, "grad_norm": 0.5268738627491167, "learning_rate": 7.578563782716227e-06, "loss": 0.396, "step": 13196 }, { "epoch": 1.1844248502389445, "grad_norm": 0.5350943602353039, "learning_rate": 7.578116407058505e-06, "loss": 0.4345, "step": 13197 }, { "epoch": 1.1845145946916156, "grad_norm": 0.48548852891984184, "learning_rate": 7.577669003284362e-06, "loss": 0.4103, "step": 13198 }, { "epoch": 1.1846043391442866, "grad_norm": 0.514585261440928, "learning_rate": 7.577221571398685e-06, "loss": 0.4091, "step": 13199 }, { "epoch": 1.1846940835969577, "grad_norm": 0.4737051745409104, "learning_rate": 7.576774111406348e-06, "loss": 0.3469, "step": 13200 }, { "epoch": 1.1847838280496288, "grad_norm": 0.46450529055905415, "learning_rate": 7.576326623312233e-06, "loss": 0.4076, "step": 13201 }, { "epoch": 1.1848735725022996, "grad_norm": 0.49797840299523016, "learning_rate": 7.5758791071212175e-06, "loss": 0.3918, "step": 13202 }, { "epoch": 1.1849633169549707, "grad_norm": 0.4649123710638917, "learning_rate": 7.575431562838187e-06, "loss": 0.3935, "step": 13203 }, { "epoch": 1.1850530614076418, "grad_norm": 0.4814053110191915, "learning_rate": 7.574983990468019e-06, "loss": 0.4066, "step": 13204 }, { "epoch": 1.1851428058603128, "grad_norm": 0.5210656266120278, "learning_rate": 7.574536390015594e-06, "loss": 0.4125, "step": 13205 }, { "epoch": 1.1852325503129837, "grad_norm": 0.4815872420165361, "learning_rate": 7.574088761485796e-06, "loss": 0.4137, "step": 13206 }, { "epoch": 1.1853222947656548, "grad_norm": 0.4755557946531252, "learning_rate": 7.573641104883504e-06, "loss": 0.4184, "step": 13207 }, { "epoch": 1.1854120392183258, "grad_norm": 0.5266435756557353, "learning_rate": 7.573193420213602e-06, "loss": 0.485, "step": 13208 }, { "epoch": 1.185501783670997, "grad_norm": 0.5509976274835914, "learning_rate": 7.572745707480973e-06, "loss": 0.4586, "step": 13209 }, { "epoch": 1.185591528123668, "grad_norm": 0.5447834514775955, "learning_rate": 7.572297966690497e-06, "loss": 0.4275, "step": 13210 }, { "epoch": 1.1856812725763388, "grad_norm": 0.4811902860777558, "learning_rate": 7.571850197847059e-06, "loss": 0.4009, "step": 13211 }, { "epoch": 1.1857710170290099, "grad_norm": 0.4842279543892678, "learning_rate": 7.571402400955542e-06, "loss": 0.4181, "step": 13212 }, { "epoch": 1.185860761481681, "grad_norm": 0.4883224473936312, "learning_rate": 7.570954576020829e-06, "loss": 0.411, "step": 13213 }, { "epoch": 1.185950505934352, "grad_norm": 0.4977667791198327, "learning_rate": 7.570506723047804e-06, "loss": 0.4073, "step": 13214 }, { "epoch": 1.1860402503870229, "grad_norm": 0.5011762767223674, "learning_rate": 7.570058842041352e-06, "loss": 0.3311, "step": 13215 }, { "epoch": 1.186129994839694, "grad_norm": 0.5681439815917533, "learning_rate": 7.569610933006356e-06, "loss": 0.527, "step": 13216 }, { "epoch": 1.186219739292365, "grad_norm": 0.5214724277691949, "learning_rate": 7.569162995947703e-06, "loss": 0.405, "step": 13217 }, { "epoch": 1.186309483745036, "grad_norm": 0.4962694424152066, "learning_rate": 7.568715030870275e-06, "loss": 0.3746, "step": 13218 }, { "epoch": 1.186399228197707, "grad_norm": 0.4585236602627774, "learning_rate": 7.56826703777896e-06, "loss": 0.339, "step": 13219 }, { "epoch": 1.186488972650378, "grad_norm": 0.4710466556450081, "learning_rate": 7.567819016678644e-06, "loss": 0.3716, "step": 13220 }, { "epoch": 1.186578717103049, "grad_norm": 0.4788455079824687, "learning_rate": 7.56737096757421e-06, "loss": 0.3814, "step": 13221 }, { "epoch": 1.1866684615557201, "grad_norm": 0.523630761626657, "learning_rate": 7.566922890470548e-06, "loss": 0.5124, "step": 13222 }, { "epoch": 1.186758206008391, "grad_norm": 0.5182002409198366, "learning_rate": 7.566474785372542e-06, "loss": 0.3514, "step": 13223 }, { "epoch": 1.186847950461062, "grad_norm": 0.519134529244451, "learning_rate": 7.566026652285081e-06, "loss": 0.4147, "step": 13224 }, { "epoch": 1.1869376949137331, "grad_norm": 0.47418731226515276, "learning_rate": 7.56557849121305e-06, "loss": 0.3851, "step": 13225 }, { "epoch": 1.1870274393664042, "grad_norm": 0.48510002791786827, "learning_rate": 7.565130302161338e-06, "loss": 0.3628, "step": 13226 }, { "epoch": 1.1871171838190753, "grad_norm": 0.5112790123736042, "learning_rate": 7.564682085134832e-06, "loss": 0.4245, "step": 13227 }, { "epoch": 1.1872069282717461, "grad_norm": 0.5286683039780167, "learning_rate": 7.5642338401384216e-06, "loss": 0.4016, "step": 13228 }, { "epoch": 1.1872966727244172, "grad_norm": 0.5061590847648088, "learning_rate": 7.563785567176994e-06, "loss": 0.4133, "step": 13229 }, { "epoch": 1.1873864171770883, "grad_norm": 0.5060268045396931, "learning_rate": 7.563337266255439e-06, "loss": 0.47, "step": 13230 }, { "epoch": 1.1874761616297593, "grad_norm": 0.5198721289886661, "learning_rate": 7.562888937378644e-06, "loss": 0.4034, "step": 13231 }, { "epoch": 1.1875659060824302, "grad_norm": 0.5117582021831151, "learning_rate": 7.5624405805515e-06, "loss": 0.4049, "step": 13232 }, { "epoch": 1.1876556505351012, "grad_norm": 0.5004900068385002, "learning_rate": 7.5619921957788954e-06, "loss": 0.3899, "step": 13233 }, { "epoch": 1.1877453949877723, "grad_norm": 0.4984212406097799, "learning_rate": 7.561543783065723e-06, "loss": 0.4853, "step": 13234 }, { "epoch": 1.1878351394404434, "grad_norm": 0.491651916832243, "learning_rate": 7.561095342416869e-06, "loss": 0.3807, "step": 13235 }, { "epoch": 1.1879248838931145, "grad_norm": 0.5142456839472037, "learning_rate": 7.560646873837227e-06, "loss": 0.3869, "step": 13236 }, { "epoch": 1.1880146283457853, "grad_norm": 0.5317195307132861, "learning_rate": 7.560198377331686e-06, "loss": 0.4617, "step": 13237 }, { "epoch": 1.1881043727984564, "grad_norm": 0.4833894306884319, "learning_rate": 7.559749852905138e-06, "loss": 0.3712, "step": 13238 }, { "epoch": 1.1881941172511274, "grad_norm": 0.5146359750788952, "learning_rate": 7.559301300562476e-06, "loss": 0.4403, "step": 13239 }, { "epoch": 1.1882838617037985, "grad_norm": 0.5139567890000101, "learning_rate": 7.5588527203085895e-06, "loss": 0.4313, "step": 13240 }, { "epoch": 1.1883736061564694, "grad_norm": 0.5099198972539327, "learning_rate": 7.558404112148372e-06, "loss": 0.4146, "step": 13241 }, { "epoch": 1.1884633506091404, "grad_norm": 0.5189504793085284, "learning_rate": 7.557955476086716e-06, "loss": 0.4425, "step": 13242 }, { "epoch": 1.1885530950618115, "grad_norm": 0.48396428501883604, "learning_rate": 7.5575068121285125e-06, "loss": 0.3774, "step": 13243 }, { "epoch": 1.1886428395144826, "grad_norm": 0.4849418579891055, "learning_rate": 7.557058120278655e-06, "loss": 0.4289, "step": 13244 }, { "epoch": 1.1887325839671536, "grad_norm": 0.4865691796917607, "learning_rate": 7.556609400542038e-06, "loss": 0.3627, "step": 13245 }, { "epoch": 1.1888223284198245, "grad_norm": 0.4664757462521854, "learning_rate": 7.556160652923558e-06, "loss": 0.4085, "step": 13246 }, { "epoch": 1.1889120728724956, "grad_norm": 0.5126851208100867, "learning_rate": 7.5557118774281024e-06, "loss": 0.4611, "step": 13247 }, { "epoch": 1.1890018173251666, "grad_norm": 0.505892545275738, "learning_rate": 7.5552630740605705e-06, "loss": 0.4226, "step": 13248 }, { "epoch": 1.1890915617778377, "grad_norm": 0.5134870479334039, "learning_rate": 7.5548142428258544e-06, "loss": 0.4151, "step": 13249 }, { "epoch": 1.1891813062305086, "grad_norm": 0.4897058303667609, "learning_rate": 7.5543653837288485e-06, "loss": 0.3734, "step": 13250 }, { "epoch": 1.1892710506831796, "grad_norm": 0.4337347941183547, "learning_rate": 7.553916496774452e-06, "loss": 0.3259, "step": 13251 }, { "epoch": 1.1893607951358507, "grad_norm": 0.5058960095648796, "learning_rate": 7.553467581967555e-06, "loss": 0.4233, "step": 13252 }, { "epoch": 1.1894505395885218, "grad_norm": 0.5255239219713684, "learning_rate": 7.5530186393130565e-06, "loss": 0.4451, "step": 13253 }, { "epoch": 1.1895402840411926, "grad_norm": 0.5566444826710136, "learning_rate": 7.552569668815852e-06, "loss": 0.4545, "step": 13254 }, { "epoch": 1.1896300284938637, "grad_norm": 0.5462284816605621, "learning_rate": 7.5521206704808385e-06, "loss": 0.4842, "step": 13255 }, { "epoch": 1.1897197729465347, "grad_norm": 0.5190391177178721, "learning_rate": 7.551671644312913e-06, "loss": 0.4178, "step": 13256 }, { "epoch": 1.1898095173992058, "grad_norm": 0.49617374188491775, "learning_rate": 7.551222590316969e-06, "loss": 0.3847, "step": 13257 }, { "epoch": 1.1898992618518767, "grad_norm": 0.48318441180571015, "learning_rate": 7.550773508497906e-06, "loss": 0.3553, "step": 13258 }, { "epoch": 1.1899890063045477, "grad_norm": 0.5285268710350037, "learning_rate": 7.550324398860625e-06, "loss": 0.4946, "step": 13259 }, { "epoch": 1.1900787507572188, "grad_norm": 0.5838232849770011, "learning_rate": 7.549875261410018e-06, "loss": 0.451, "step": 13260 }, { "epoch": 1.1901684952098899, "grad_norm": 0.5375635903426053, "learning_rate": 7.549426096150987e-06, "loss": 0.4125, "step": 13261 }, { "epoch": 1.190258239662561, "grad_norm": 0.5018107775383172, "learning_rate": 7.548976903088428e-06, "loss": 0.3758, "step": 13262 }, { "epoch": 1.1903479841152318, "grad_norm": 0.5143552878029969, "learning_rate": 7.548527682227244e-06, "loss": 0.4704, "step": 13263 }, { "epoch": 1.1904377285679029, "grad_norm": 0.4853197571412757, "learning_rate": 7.548078433572329e-06, "loss": 0.3626, "step": 13264 }, { "epoch": 1.190527473020574, "grad_norm": 0.4788328830970372, "learning_rate": 7.5476291571285865e-06, "loss": 0.4247, "step": 13265 }, { "epoch": 1.190617217473245, "grad_norm": 0.5356879939121076, "learning_rate": 7.547179852900912e-06, "loss": 0.3954, "step": 13266 }, { "epoch": 1.1907069619259159, "grad_norm": 0.48677013018116294, "learning_rate": 7.546730520894211e-06, "loss": 0.3947, "step": 13267 }, { "epoch": 1.190796706378587, "grad_norm": 0.5160900514233203, "learning_rate": 7.546281161113379e-06, "loss": 0.4205, "step": 13268 }, { "epoch": 1.190886450831258, "grad_norm": 0.533297636283816, "learning_rate": 7.545831773563319e-06, "loss": 0.3535, "step": 13269 }, { "epoch": 1.190976195283929, "grad_norm": 0.4879597068909926, "learning_rate": 7.5453823582489315e-06, "loss": 0.3829, "step": 13270 }, { "epoch": 1.1910659397366001, "grad_norm": 0.4296116944553234, "learning_rate": 7.544932915175119e-06, "loss": 0.3051, "step": 13271 }, { "epoch": 1.191155684189271, "grad_norm": 0.43322489109582935, "learning_rate": 7.544483444346779e-06, "loss": 0.3733, "step": 13272 }, { "epoch": 1.191245428641942, "grad_norm": 0.5175377353069209, "learning_rate": 7.544033945768817e-06, "loss": 0.4426, "step": 13273 }, { "epoch": 1.1913351730946131, "grad_norm": 0.5186500277403283, "learning_rate": 7.5435844194461335e-06, "loss": 0.3903, "step": 13274 }, { "epoch": 1.1914249175472842, "grad_norm": 0.422475329438877, "learning_rate": 7.5431348653836335e-06, "loss": 0.2973, "step": 13275 }, { "epoch": 1.191514661999955, "grad_norm": 0.48945619959911796, "learning_rate": 7.542685283586216e-06, "loss": 0.4317, "step": 13276 }, { "epoch": 1.191604406452626, "grad_norm": 0.5680641938275606, "learning_rate": 7.542235674058787e-06, "loss": 0.4555, "step": 13277 }, { "epoch": 1.1916941509052972, "grad_norm": 0.4902449934933042, "learning_rate": 7.541786036806249e-06, "loss": 0.3245, "step": 13278 }, { "epoch": 1.1917838953579682, "grad_norm": 0.43664855111486156, "learning_rate": 7.541336371833504e-06, "loss": 0.3622, "step": 13279 }, { "epoch": 1.1918736398106393, "grad_norm": 0.5028350887038207, "learning_rate": 7.540886679145458e-06, "loss": 0.3927, "step": 13280 }, { "epoch": 1.1919633842633102, "grad_norm": 0.4893907400253055, "learning_rate": 7.540436958747015e-06, "loss": 0.442, "step": 13281 }, { "epoch": 1.1920531287159812, "grad_norm": 0.5078308105446436, "learning_rate": 7.539987210643078e-06, "loss": 0.3531, "step": 13282 }, { "epoch": 1.1921428731686523, "grad_norm": 0.46697791746071177, "learning_rate": 7.539537434838554e-06, "loss": 0.3583, "step": 13283 }, { "epoch": 1.1922326176213234, "grad_norm": 0.4734771716584929, "learning_rate": 7.5390876313383465e-06, "loss": 0.4161, "step": 13284 }, { "epoch": 1.1923223620739942, "grad_norm": 0.4806007444353889, "learning_rate": 7.5386378001473634e-06, "loss": 0.4286, "step": 13285 }, { "epoch": 1.1924121065266653, "grad_norm": 0.4934008042846806, "learning_rate": 7.538187941270507e-06, "loss": 0.429, "step": 13286 }, { "epoch": 1.1925018509793364, "grad_norm": 0.5089979848949613, "learning_rate": 7.537738054712687e-06, "loss": 0.4012, "step": 13287 }, { "epoch": 1.1925915954320074, "grad_norm": 0.4699162443833268, "learning_rate": 7.537288140478805e-06, "loss": 0.4387, "step": 13288 }, { "epoch": 1.1926813398846783, "grad_norm": 0.5085543898135721, "learning_rate": 7.536838198573773e-06, "loss": 0.3356, "step": 13289 }, { "epoch": 1.1927710843373494, "grad_norm": 0.4814587827554252, "learning_rate": 7.536388229002494e-06, "loss": 0.3933, "step": 13290 }, { "epoch": 1.1928608287900204, "grad_norm": 0.4869095651873822, "learning_rate": 7.535938231769878e-06, "loss": 0.4003, "step": 13291 }, { "epoch": 1.1929505732426915, "grad_norm": 0.46995979557327955, "learning_rate": 7.535488206880831e-06, "loss": 0.362, "step": 13292 }, { "epoch": 1.1930403176953623, "grad_norm": 0.493319936034629, "learning_rate": 7.535038154340262e-06, "loss": 0.406, "step": 13293 }, { "epoch": 1.1931300621480334, "grad_norm": 0.47319524621921255, "learning_rate": 7.534588074153076e-06, "loss": 0.4554, "step": 13294 }, { "epoch": 1.1932198066007045, "grad_norm": 0.5215949248613082, "learning_rate": 7.534137966324187e-06, "loss": 0.363, "step": 13295 }, { "epoch": 1.1933095510533756, "grad_norm": 0.5136209503594933, "learning_rate": 7.533687830858498e-06, "loss": 0.4684, "step": 13296 }, { "epoch": 1.1933992955060466, "grad_norm": 0.530502503782985, "learning_rate": 7.533237667760922e-06, "loss": 0.3748, "step": 13297 }, { "epoch": 1.1934890399587175, "grad_norm": 0.50143427067686, "learning_rate": 7.532787477036368e-06, "loss": 0.4349, "step": 13298 }, { "epoch": 1.1935787844113885, "grad_norm": 0.527958086094804, "learning_rate": 7.532337258689743e-06, "loss": 0.4639, "step": 13299 }, { "epoch": 1.1936685288640596, "grad_norm": 0.5332535727225485, "learning_rate": 7.531887012725961e-06, "loss": 0.3752, "step": 13300 }, { "epoch": 1.1937582733167307, "grad_norm": 0.47130944085435855, "learning_rate": 7.531436739149929e-06, "loss": 0.3735, "step": 13301 }, { "epoch": 1.1938480177694015, "grad_norm": 0.4600694359810606, "learning_rate": 7.530986437966559e-06, "loss": 0.3744, "step": 13302 }, { "epoch": 1.1939377622220726, "grad_norm": 0.5048096611015586, "learning_rate": 7.530536109180762e-06, "loss": 0.3969, "step": 13303 }, { "epoch": 1.1940275066747437, "grad_norm": 0.4714995834208339, "learning_rate": 7.530085752797448e-06, "loss": 0.3906, "step": 13304 }, { "epoch": 1.1941172511274147, "grad_norm": 0.46435170610116033, "learning_rate": 7.529635368821528e-06, "loss": 0.4247, "step": 13305 }, { "epoch": 1.1942069955800858, "grad_norm": 0.5072851687291937, "learning_rate": 7.5291849572579175e-06, "loss": 0.3931, "step": 13306 }, { "epoch": 1.1942967400327567, "grad_norm": 0.48389860229885967, "learning_rate": 7.528734518111526e-06, "loss": 0.3881, "step": 13307 }, { "epoch": 1.1943864844854277, "grad_norm": 0.4899009077303024, "learning_rate": 7.528284051387265e-06, "loss": 0.401, "step": 13308 }, { "epoch": 1.1944762289380988, "grad_norm": 0.48215138596511703, "learning_rate": 7.52783355709005e-06, "loss": 0.3749, "step": 13309 }, { "epoch": 1.1945659733907699, "grad_norm": 0.5093746359597852, "learning_rate": 7.527383035224791e-06, "loss": 0.4545, "step": 13310 }, { "epoch": 1.1946557178434407, "grad_norm": 0.4803254363875525, "learning_rate": 7.526932485796402e-06, "loss": 0.4042, "step": 13311 }, { "epoch": 1.1947454622961118, "grad_norm": 0.5315117492844997, "learning_rate": 7.526481908809797e-06, "loss": 0.4142, "step": 13312 }, { "epoch": 1.1948352067487829, "grad_norm": 0.5066430333732068, "learning_rate": 7.526031304269893e-06, "loss": 0.3993, "step": 13313 }, { "epoch": 1.194924951201454, "grad_norm": 0.46968329183617014, "learning_rate": 7.5255806721815985e-06, "loss": 0.3757, "step": 13314 }, { "epoch": 1.195014695654125, "grad_norm": 0.4771081833669768, "learning_rate": 7.525130012549831e-06, "loss": 0.3586, "step": 13315 }, { "epoch": 1.1951044401067958, "grad_norm": 0.4877841780348839, "learning_rate": 7.524679325379507e-06, "loss": 0.3996, "step": 13316 }, { "epoch": 1.195194184559467, "grad_norm": 0.4679052798552732, "learning_rate": 7.524228610675539e-06, "loss": 0.4637, "step": 13317 }, { "epoch": 1.195283929012138, "grad_norm": 0.5374730629962918, "learning_rate": 7.523777868442843e-06, "loss": 0.4587, "step": 13318 }, { "epoch": 1.195373673464809, "grad_norm": 0.5356711004198879, "learning_rate": 7.523327098686334e-06, "loss": 0.4266, "step": 13319 }, { "epoch": 1.19546341791748, "grad_norm": 0.5221691109331071, "learning_rate": 7.52287630141093e-06, "loss": 0.385, "step": 13320 }, { "epoch": 1.195553162370151, "grad_norm": 0.5166864821696947, "learning_rate": 7.522425476621546e-06, "loss": 0.3819, "step": 13321 }, { "epoch": 1.195642906822822, "grad_norm": 0.4772865625399652, "learning_rate": 7.521974624323099e-06, "loss": 0.4498, "step": 13322 }, { "epoch": 1.195732651275493, "grad_norm": 0.5079573620355289, "learning_rate": 7.521523744520505e-06, "loss": 0.4267, "step": 13323 }, { "epoch": 1.195822395728164, "grad_norm": 0.5259483535874917, "learning_rate": 7.521072837218681e-06, "loss": 0.382, "step": 13324 }, { "epoch": 1.195912140180835, "grad_norm": 0.4782549880107545, "learning_rate": 7.520621902422547e-06, "loss": 0.464, "step": 13325 }, { "epoch": 1.196001884633506, "grad_norm": 0.5102089941208556, "learning_rate": 7.520170940137019e-06, "loss": 0.4076, "step": 13326 }, { "epoch": 1.1960916290861772, "grad_norm": 0.5177215347763561, "learning_rate": 7.519719950367013e-06, "loss": 0.3533, "step": 13327 }, { "epoch": 1.196181373538848, "grad_norm": 0.45550604202209377, "learning_rate": 7.519268933117453e-06, "loss": 0.3363, "step": 13328 }, { "epoch": 1.196271117991519, "grad_norm": 0.48844180906468715, "learning_rate": 7.518817888393252e-06, "loss": 0.408, "step": 13329 }, { "epoch": 1.1963608624441902, "grad_norm": 0.5114233778335523, "learning_rate": 7.518366816199334e-06, "loss": 0.4144, "step": 13330 }, { "epoch": 1.1964506068968612, "grad_norm": 0.4842834928835976, "learning_rate": 7.517915716540612e-06, "loss": 0.3555, "step": 13331 }, { "epoch": 1.1965403513495323, "grad_norm": 0.48090341348798343, "learning_rate": 7.517464589422012e-06, "loss": 0.4326, "step": 13332 }, { "epoch": 1.1966300958022031, "grad_norm": 0.471928338354264, "learning_rate": 7.5170134348484505e-06, "loss": 0.3825, "step": 13333 }, { "epoch": 1.1967198402548742, "grad_norm": 0.45633804452912746, "learning_rate": 7.516562252824848e-06, "loss": 0.4046, "step": 13334 }, { "epoch": 1.1968095847075453, "grad_norm": 0.5659896429036603, "learning_rate": 7.516111043356127e-06, "loss": 0.3603, "step": 13335 }, { "epoch": 1.1968993291602164, "grad_norm": 0.48362710960680483, "learning_rate": 7.5156598064472054e-06, "loss": 0.3904, "step": 13336 }, { "epoch": 1.1969890736128872, "grad_norm": 0.4969126350441341, "learning_rate": 7.5152085421030055e-06, "loss": 0.3981, "step": 13337 }, { "epoch": 1.1970788180655583, "grad_norm": 0.517438383428345, "learning_rate": 7.514757250328448e-06, "loss": 0.4496, "step": 13338 }, { "epoch": 1.1971685625182293, "grad_norm": 0.5371461471026033, "learning_rate": 7.514305931128456e-06, "loss": 0.4517, "step": 13339 }, { "epoch": 1.1972583069709004, "grad_norm": 0.5088465180160482, "learning_rate": 7.513854584507953e-06, "loss": 0.4786, "step": 13340 }, { "epoch": 1.1973480514235715, "grad_norm": 0.5241117504948025, "learning_rate": 7.513403210471857e-06, "loss": 0.378, "step": 13341 }, { "epoch": 1.1974377958762423, "grad_norm": 0.4970544247991682, "learning_rate": 7.512951809025093e-06, "loss": 0.3703, "step": 13342 }, { "epoch": 1.1975275403289134, "grad_norm": 0.4800279477129332, "learning_rate": 7.512500380172583e-06, "loss": 0.3965, "step": 13343 }, { "epoch": 1.1976172847815845, "grad_norm": 0.5084438528705444, "learning_rate": 7.512048923919253e-06, "loss": 0.3887, "step": 13344 }, { "epoch": 1.1977070292342555, "grad_norm": 0.50320499824751, "learning_rate": 7.511597440270022e-06, "loss": 0.3954, "step": 13345 }, { "epoch": 1.1977967736869264, "grad_norm": 0.451074845843053, "learning_rate": 7.511145929229817e-06, "loss": 0.3579, "step": 13346 }, { "epoch": 1.1978865181395975, "grad_norm": 0.4749921346576188, "learning_rate": 7.510694390803561e-06, "loss": 0.4134, "step": 13347 }, { "epoch": 1.1979762625922685, "grad_norm": 0.5410092329738465, "learning_rate": 7.510242824996179e-06, "loss": 0.3424, "step": 13348 }, { "epoch": 1.1980660070449396, "grad_norm": 0.500594620602155, "learning_rate": 7.509791231812596e-06, "loss": 0.4463, "step": 13349 }, { "epoch": 1.1981557514976107, "grad_norm": 0.48116138836312544, "learning_rate": 7.509339611257735e-06, "loss": 0.3502, "step": 13350 }, { "epoch": 1.1982454959502815, "grad_norm": 0.4916209660496109, "learning_rate": 7.508887963336524e-06, "loss": 0.4022, "step": 13351 }, { "epoch": 1.1983352404029526, "grad_norm": 0.511011896175785, "learning_rate": 7.508436288053885e-06, "loss": 0.4752, "step": 13352 }, { "epoch": 1.1984249848556237, "grad_norm": 0.5095561768390673, "learning_rate": 7.507984585414748e-06, "loss": 0.368, "step": 13353 }, { "epoch": 1.1985147293082947, "grad_norm": 0.4826261812201517, "learning_rate": 7.507532855424036e-06, "loss": 0.3547, "step": 13354 }, { "epoch": 1.1986044737609656, "grad_norm": 0.46293326408660795, "learning_rate": 7.507081098086677e-06, "loss": 0.3872, "step": 13355 }, { "epoch": 1.1986942182136366, "grad_norm": 0.49477337793557175, "learning_rate": 7.506629313407598e-06, "loss": 0.4003, "step": 13356 }, { "epoch": 1.1987839626663077, "grad_norm": 0.5061329642129714, "learning_rate": 7.506177501391725e-06, "loss": 0.408, "step": 13357 }, { "epoch": 1.1988737071189788, "grad_norm": 0.7596004981094309, "learning_rate": 7.505725662043986e-06, "loss": 0.3999, "step": 13358 }, { "epoch": 1.1989634515716499, "grad_norm": 0.49131282917155095, "learning_rate": 7.50527379536931e-06, "loss": 0.4122, "step": 13359 }, { "epoch": 1.1990531960243207, "grad_norm": 0.5007169052082714, "learning_rate": 7.504821901372621e-06, "loss": 0.374, "step": 13360 }, { "epoch": 1.1991429404769918, "grad_norm": 0.4684172890758508, "learning_rate": 7.504369980058852e-06, "loss": 0.3681, "step": 13361 }, { "epoch": 1.1992326849296628, "grad_norm": 0.48306448960707904, "learning_rate": 7.503918031432927e-06, "loss": 0.4046, "step": 13362 }, { "epoch": 1.1993224293823337, "grad_norm": 0.5075851145210305, "learning_rate": 7.5034660554997795e-06, "loss": 0.4013, "step": 13363 }, { "epoch": 1.1994121738350048, "grad_norm": 0.5293294184910075, "learning_rate": 7.503014052264335e-06, "loss": 0.4679, "step": 13364 }, { "epoch": 1.1995019182876758, "grad_norm": 0.5520045686211362, "learning_rate": 7.502562021731526e-06, "loss": 0.4391, "step": 13365 }, { "epoch": 1.199591662740347, "grad_norm": 0.47759394588071746, "learning_rate": 7.502109963906277e-06, "loss": 0.3718, "step": 13366 }, { "epoch": 1.199681407193018, "grad_norm": 0.5016016485629935, "learning_rate": 7.5016578787935254e-06, "loss": 0.372, "step": 13367 }, { "epoch": 1.1997711516456888, "grad_norm": 0.47099818703537766, "learning_rate": 7.501205766398196e-06, "loss": 0.4447, "step": 13368 }, { "epoch": 1.1998608960983599, "grad_norm": 0.5834020460624763, "learning_rate": 7.500753626725222e-06, "loss": 0.4551, "step": 13369 }, { "epoch": 1.199950640551031, "grad_norm": 0.5128977884797229, "learning_rate": 7.500301459779533e-06, "loss": 0.4145, "step": 13370 }, { "epoch": 1.200040385003702, "grad_norm": 0.4864317624290979, "learning_rate": 7.499849265566061e-06, "loss": 0.4003, "step": 13371 }, { "epoch": 1.2001301294563729, "grad_norm": 0.480555533159915, "learning_rate": 7.499397044089738e-06, "loss": 0.3508, "step": 13372 }, { "epoch": 1.200219873909044, "grad_norm": 0.48905820082164436, "learning_rate": 7.498944795355494e-06, "loss": 0.4566, "step": 13373 }, { "epoch": 1.200309618361715, "grad_norm": 0.5155222099354775, "learning_rate": 7.498492519368264e-06, "loss": 0.4188, "step": 13374 }, { "epoch": 1.200399362814386, "grad_norm": 0.46699015186784676, "learning_rate": 7.498040216132977e-06, "loss": 0.3988, "step": 13375 }, { "epoch": 1.2004891072670572, "grad_norm": 0.4982570287461057, "learning_rate": 7.497587885654567e-06, "loss": 0.3812, "step": 13376 }, { "epoch": 1.200578851719728, "grad_norm": 0.46132249199765746, "learning_rate": 7.497135527937969e-06, "loss": 0.3953, "step": 13377 }, { "epoch": 1.200668596172399, "grad_norm": 0.5266707754736397, "learning_rate": 7.4966831429881145e-06, "loss": 0.4726, "step": 13378 }, { "epoch": 1.2007583406250701, "grad_norm": 0.5316535519684584, "learning_rate": 7.496230730809937e-06, "loss": 0.4505, "step": 13379 }, { "epoch": 1.2008480850777412, "grad_norm": 0.5247897104523597, "learning_rate": 7.495778291408371e-06, "loss": 0.3802, "step": 13380 }, { "epoch": 1.200937829530412, "grad_norm": 0.5169943438812222, "learning_rate": 7.495325824788351e-06, "loss": 0.4726, "step": 13381 }, { "epoch": 1.2010275739830831, "grad_norm": 0.4905364633021332, "learning_rate": 7.494873330954811e-06, "loss": 0.3707, "step": 13382 }, { "epoch": 1.2011173184357542, "grad_norm": 0.4885012270660402, "learning_rate": 7.494420809912686e-06, "loss": 0.4258, "step": 13383 }, { "epoch": 1.2012070628884253, "grad_norm": 0.49364186082841643, "learning_rate": 7.493968261666911e-06, "loss": 0.4424, "step": 13384 }, { "epoch": 1.2012968073410963, "grad_norm": 0.5236574979699385, "learning_rate": 7.493515686222421e-06, "loss": 0.4113, "step": 13385 }, { "epoch": 1.2013865517937672, "grad_norm": 0.4879872856669079, "learning_rate": 7.493063083584153e-06, "loss": 0.3983, "step": 13386 }, { "epoch": 1.2014762962464383, "grad_norm": 0.47635490970142674, "learning_rate": 7.492610453757041e-06, "loss": 0.3634, "step": 13387 }, { "epoch": 1.2015660406991093, "grad_norm": 0.47966673597940385, "learning_rate": 7.492157796746023e-06, "loss": 0.4326, "step": 13388 }, { "epoch": 1.2016557851517804, "grad_norm": 0.49987090282398383, "learning_rate": 7.491705112556036e-06, "loss": 0.3959, "step": 13389 }, { "epoch": 1.2017455296044512, "grad_norm": 0.5008797765195899, "learning_rate": 7.491252401192014e-06, "loss": 0.3721, "step": 13390 }, { "epoch": 1.2018352740571223, "grad_norm": 0.5385184591027122, "learning_rate": 7.490799662658896e-06, "loss": 0.4201, "step": 13391 }, { "epoch": 1.2019250185097934, "grad_norm": 0.458026956572573, "learning_rate": 7.490346896961621e-06, "loss": 0.3794, "step": 13392 }, { "epoch": 1.2020147629624645, "grad_norm": 0.46931035624198447, "learning_rate": 7.489894104105124e-06, "loss": 0.374, "step": 13393 }, { "epoch": 1.2021045074151355, "grad_norm": 0.49362747915668415, "learning_rate": 7.489441284094345e-06, "loss": 0.4012, "step": 13394 }, { "epoch": 1.2021942518678064, "grad_norm": 0.4591164421672713, "learning_rate": 7.488988436934221e-06, "loss": 0.3491, "step": 13395 }, { "epoch": 1.2022839963204774, "grad_norm": 0.48548368366299127, "learning_rate": 7.488535562629693e-06, "loss": 0.3427, "step": 13396 }, { "epoch": 1.2023737407731485, "grad_norm": 0.4916806859011076, "learning_rate": 7.488082661185697e-06, "loss": 0.4031, "step": 13397 }, { "epoch": 1.2024634852258194, "grad_norm": 0.4676045539905277, "learning_rate": 7.4876297326071735e-06, "loss": 0.363, "step": 13398 }, { "epoch": 1.2025532296784904, "grad_norm": 0.4742239492554109, "learning_rate": 7.4871767768990625e-06, "loss": 0.4077, "step": 13399 }, { "epoch": 1.2026429741311615, "grad_norm": 0.5733201380498161, "learning_rate": 7.486723794066304e-06, "loss": 0.4676, "step": 13400 }, { "epoch": 1.2027327185838326, "grad_norm": 0.507039809439392, "learning_rate": 7.486270784113836e-06, "loss": 0.3969, "step": 13401 }, { "epoch": 1.2028224630365036, "grad_norm": 0.5016863601797554, "learning_rate": 7.485817747046602e-06, "loss": 0.3737, "step": 13402 }, { "epoch": 1.2029122074891745, "grad_norm": 0.5369626295790916, "learning_rate": 7.48536468286954e-06, "loss": 0.4312, "step": 13403 }, { "epoch": 1.2030019519418456, "grad_norm": 0.5041009113012633, "learning_rate": 7.484911591587594e-06, "loss": 0.4112, "step": 13404 }, { "epoch": 1.2030916963945166, "grad_norm": 0.5282797846529155, "learning_rate": 7.4844584732057025e-06, "loss": 0.4178, "step": 13405 }, { "epoch": 1.2031814408471877, "grad_norm": 0.516686092429211, "learning_rate": 7.4840053277288085e-06, "loss": 0.4729, "step": 13406 }, { "epoch": 1.2032711852998585, "grad_norm": 0.5172407462027653, "learning_rate": 7.483552155161854e-06, "loss": 0.386, "step": 13407 }, { "epoch": 1.2033609297525296, "grad_norm": 0.5138548705664374, "learning_rate": 7.48309895550978e-06, "loss": 0.4106, "step": 13408 }, { "epoch": 1.2034506742052007, "grad_norm": 0.4942787104616695, "learning_rate": 7.48264572877753e-06, "loss": 0.3429, "step": 13409 }, { "epoch": 1.2035404186578718, "grad_norm": 0.48965340933056634, "learning_rate": 7.482192474970047e-06, "loss": 0.3691, "step": 13410 }, { "epoch": 1.2036301631105428, "grad_norm": 0.49271147215535716, "learning_rate": 7.481739194092274e-06, "loss": 0.4539, "step": 13411 }, { "epoch": 1.2037199075632137, "grad_norm": 0.49710806701487925, "learning_rate": 7.481285886149154e-06, "loss": 0.4571, "step": 13412 }, { "epoch": 1.2038096520158847, "grad_norm": 0.5699688751853372, "learning_rate": 7.480832551145629e-06, "loss": 0.4652, "step": 13413 }, { "epoch": 1.2038993964685558, "grad_norm": 0.50780472852113, "learning_rate": 7.4803791890866474e-06, "loss": 0.3708, "step": 13414 }, { "epoch": 1.2039891409212269, "grad_norm": 0.45925516884135315, "learning_rate": 7.47992579997715e-06, "loss": 0.3966, "step": 13415 }, { "epoch": 1.2040788853738977, "grad_norm": 0.46382716902188326, "learning_rate": 7.479472383822083e-06, "loss": 0.3513, "step": 13416 }, { "epoch": 1.2041686298265688, "grad_norm": 0.48680210148006314, "learning_rate": 7.479018940626389e-06, "loss": 0.4435, "step": 13417 }, { "epoch": 1.2042583742792399, "grad_norm": 0.5009165394027011, "learning_rate": 7.4785654703950165e-06, "loss": 0.3739, "step": 13418 }, { "epoch": 1.204348118731911, "grad_norm": 0.4968317811909539, "learning_rate": 7.478111973132907e-06, "loss": 0.4014, "step": 13419 }, { "epoch": 1.204437863184582, "grad_norm": 0.5079344578707805, "learning_rate": 7.47765844884501e-06, "loss": 0.4616, "step": 13420 }, { "epoch": 1.2045276076372529, "grad_norm": 0.5552000463002688, "learning_rate": 7.477204897536269e-06, "loss": 0.4291, "step": 13421 }, { "epoch": 1.204617352089924, "grad_norm": 0.4721807596339799, "learning_rate": 7.476751319211632e-06, "loss": 0.4072, "step": 13422 }, { "epoch": 1.204707096542595, "grad_norm": 0.5012029074729827, "learning_rate": 7.476297713876044e-06, "loss": 0.3772, "step": 13423 }, { "epoch": 1.204796840995266, "grad_norm": 0.5089123064420894, "learning_rate": 7.475844081534454e-06, "loss": 0.3942, "step": 13424 }, { "epoch": 1.204886585447937, "grad_norm": 0.5309395221912129, "learning_rate": 7.475390422191807e-06, "loss": 0.4867, "step": 13425 }, { "epoch": 1.204976329900608, "grad_norm": 0.4896671976807364, "learning_rate": 7.474936735853052e-06, "loss": 0.4653, "step": 13426 }, { "epoch": 1.205066074353279, "grad_norm": 0.4804768004578289, "learning_rate": 7.474483022523136e-06, "loss": 0.4057, "step": 13427 }, { "epoch": 1.2051558188059501, "grad_norm": 0.5158886217525351, "learning_rate": 7.474029282207009e-06, "loss": 0.3788, "step": 13428 }, { "epoch": 1.2052455632586212, "grad_norm": 0.4994202685459693, "learning_rate": 7.473575514909617e-06, "loss": 0.4686, "step": 13429 }, { "epoch": 1.205335307711292, "grad_norm": 0.4658462248668305, "learning_rate": 7.473121720635909e-06, "loss": 0.3831, "step": 13430 }, { "epoch": 1.2054250521639631, "grad_norm": 0.5097146553926436, "learning_rate": 7.4726678993908356e-06, "loss": 0.3629, "step": 13431 }, { "epoch": 1.2055147966166342, "grad_norm": 0.5276466104066, "learning_rate": 7.472214051179344e-06, "loss": 0.4155, "step": 13432 }, { "epoch": 1.205604541069305, "grad_norm": 0.5017214155734808, "learning_rate": 7.4717601760063855e-06, "loss": 0.4078, "step": 13433 }, { "epoch": 1.205694285521976, "grad_norm": 0.5041315261755633, "learning_rate": 7.471306273876908e-06, "loss": 0.4238, "step": 13434 }, { "epoch": 1.2057840299746472, "grad_norm": 0.5095136142891398, "learning_rate": 7.470852344795864e-06, "loss": 0.4392, "step": 13435 }, { "epoch": 1.2058737744273182, "grad_norm": 0.5358293983980855, "learning_rate": 7.470398388768203e-06, "loss": 0.466, "step": 13436 }, { "epoch": 1.2059635188799893, "grad_norm": 0.4953294150619807, "learning_rate": 7.469944405798876e-06, "loss": 0.3733, "step": 13437 }, { "epoch": 1.2060532633326602, "grad_norm": 0.546816355449089, "learning_rate": 7.469490395892833e-06, "loss": 0.3901, "step": 13438 }, { "epoch": 1.2061430077853312, "grad_norm": 0.4902214075582107, "learning_rate": 7.469036359055026e-06, "loss": 0.4, "step": 13439 }, { "epoch": 1.2062327522380023, "grad_norm": 0.5035752408865043, "learning_rate": 7.4685822952904076e-06, "loss": 0.4448, "step": 13440 }, { "epoch": 1.2063224966906734, "grad_norm": 0.520330215205842, "learning_rate": 7.4681282046039285e-06, "loss": 0.4183, "step": 13441 }, { "epoch": 1.2064122411433442, "grad_norm": 0.43555503183375344, "learning_rate": 7.467674087000541e-06, "loss": 0.3789, "step": 13442 }, { "epoch": 1.2065019855960153, "grad_norm": 0.515489673821845, "learning_rate": 7.467219942485197e-06, "loss": 0.3754, "step": 13443 }, { "epoch": 1.2065917300486864, "grad_norm": 0.5137017956318801, "learning_rate": 7.466765771062851e-06, "loss": 0.4877, "step": 13444 }, { "epoch": 1.2066814745013574, "grad_norm": 0.5518183669623193, "learning_rate": 7.466311572738456e-06, "loss": 0.4638, "step": 13445 }, { "epoch": 1.2067712189540285, "grad_norm": 0.5337499232660213, "learning_rate": 7.465857347516964e-06, "loss": 0.4201, "step": 13446 }, { "epoch": 1.2068609634066993, "grad_norm": 0.5179859558721056, "learning_rate": 7.46540309540333e-06, "loss": 0.4597, "step": 13447 }, { "epoch": 1.2069507078593704, "grad_norm": 0.5088283903948785, "learning_rate": 7.464948816402505e-06, "loss": 0.4704, "step": 13448 }, { "epoch": 1.2070404523120415, "grad_norm": 0.5491895800561877, "learning_rate": 7.464494510519449e-06, "loss": 0.425, "step": 13449 }, { "epoch": 1.2071301967647126, "grad_norm": 0.4959537668758559, "learning_rate": 7.464040177759111e-06, "loss": 0.3251, "step": 13450 }, { "epoch": 1.2072199412173834, "grad_norm": 0.5221600196348297, "learning_rate": 7.4635858181264485e-06, "loss": 0.433, "step": 13451 }, { "epoch": 1.2073096856700545, "grad_norm": 0.4957210758930218, "learning_rate": 7.4631314316264155e-06, "loss": 0.3664, "step": 13452 }, { "epoch": 1.2073994301227255, "grad_norm": 0.5337211930708619, "learning_rate": 7.462677018263969e-06, "loss": 0.4627, "step": 13453 }, { "epoch": 1.2074891745753966, "grad_norm": 0.5071806087863918, "learning_rate": 7.462222578044065e-06, "loss": 0.4164, "step": 13454 }, { "epoch": 1.2075789190280677, "grad_norm": 0.4862001783829901, "learning_rate": 7.461768110971657e-06, "loss": 0.3492, "step": 13455 }, { "epoch": 1.2076686634807385, "grad_norm": 0.4617208944803072, "learning_rate": 7.461313617051702e-06, "loss": 0.4, "step": 13456 }, { "epoch": 1.2077584079334096, "grad_norm": 0.5544543748007192, "learning_rate": 7.460859096289159e-06, "loss": 0.4101, "step": 13457 }, { "epoch": 1.2078481523860807, "grad_norm": 0.49640406349682364, "learning_rate": 7.460404548688982e-06, "loss": 0.3873, "step": 13458 }, { "epoch": 1.2079378968387517, "grad_norm": 0.5231174329491967, "learning_rate": 7.459949974256131e-06, "loss": 0.4881, "step": 13459 }, { "epoch": 1.2080276412914226, "grad_norm": 0.5044688620839861, "learning_rate": 7.459495372995561e-06, "loss": 0.423, "step": 13460 }, { "epoch": 1.2081173857440937, "grad_norm": 0.48859106363792465, "learning_rate": 7.459040744912231e-06, "loss": 0.3448, "step": 13461 }, { "epoch": 1.2082071301967647, "grad_norm": 0.4629546169718859, "learning_rate": 7.458586090011099e-06, "loss": 0.3848, "step": 13462 }, { "epoch": 1.2082968746494358, "grad_norm": 0.4827573203291458, "learning_rate": 7.4581314082971224e-06, "loss": 0.4146, "step": 13463 }, { "epoch": 1.2083866191021069, "grad_norm": 0.5100011271691174, "learning_rate": 7.457676699775261e-06, "loss": 0.3895, "step": 13464 }, { "epoch": 1.2084763635547777, "grad_norm": 0.5031379845919636, "learning_rate": 7.457221964450474e-06, "loss": 0.3973, "step": 13465 }, { "epoch": 1.2085661080074488, "grad_norm": 0.4633928361146096, "learning_rate": 7.456767202327719e-06, "loss": 0.3666, "step": 13466 }, { "epoch": 1.2086558524601199, "grad_norm": 0.5059166530679652, "learning_rate": 7.456312413411959e-06, "loss": 0.3875, "step": 13467 }, { "epoch": 1.2087455969127907, "grad_norm": 0.4946357057286207, "learning_rate": 7.4558575977081495e-06, "loss": 0.4334, "step": 13468 }, { "epoch": 1.2088353413654618, "grad_norm": 0.4994275086060271, "learning_rate": 7.455402755221252e-06, "loss": 0.4362, "step": 13469 }, { "epoch": 1.2089250858181328, "grad_norm": 0.4815303790628303, "learning_rate": 7.454947885956229e-06, "loss": 0.4134, "step": 13470 }, { "epoch": 1.209014830270804, "grad_norm": 0.5189276776675482, "learning_rate": 7.454492989918038e-06, "loss": 0.3878, "step": 13471 }, { "epoch": 1.209104574723475, "grad_norm": 0.4942272474478055, "learning_rate": 7.454038067111644e-06, "loss": 0.4342, "step": 13472 }, { "epoch": 1.2091943191761458, "grad_norm": 0.5104955990944026, "learning_rate": 7.453583117542005e-06, "loss": 0.4117, "step": 13473 }, { "epoch": 1.209284063628817, "grad_norm": 0.45610792265019456, "learning_rate": 7.453128141214084e-06, "loss": 0.3519, "step": 13474 }, { "epoch": 1.209373808081488, "grad_norm": 0.46763351013644006, "learning_rate": 7.452673138132842e-06, "loss": 0.3873, "step": 13475 }, { "epoch": 1.209463552534159, "grad_norm": 0.5199433565755395, "learning_rate": 7.452218108303243e-06, "loss": 0.4069, "step": 13476 }, { "epoch": 1.20955329698683, "grad_norm": 0.47230538068634315, "learning_rate": 7.451763051730246e-06, "loss": 0.4904, "step": 13477 }, { "epoch": 1.209643041439501, "grad_norm": 0.5412522346992634, "learning_rate": 7.451307968418817e-06, "loss": 0.3843, "step": 13478 }, { "epoch": 1.209732785892172, "grad_norm": 0.48840957701312254, "learning_rate": 7.4508528583739184e-06, "loss": 0.408, "step": 13479 }, { "epoch": 1.209822530344843, "grad_norm": 0.4700560018504943, "learning_rate": 7.450397721600513e-06, "loss": 0.3638, "step": 13480 }, { "epoch": 1.2099122747975142, "grad_norm": 0.457770296342103, "learning_rate": 7.449942558103563e-06, "loss": 0.3901, "step": 13481 }, { "epoch": 1.210002019250185, "grad_norm": 0.515217428028246, "learning_rate": 7.449487367888036e-06, "loss": 0.4358, "step": 13482 }, { "epoch": 1.210091763702856, "grad_norm": 0.5550289679931929, "learning_rate": 7.449032150958892e-06, "loss": 0.4804, "step": 13483 }, { "epoch": 1.2101815081555272, "grad_norm": 0.542032564067255, "learning_rate": 7.4485769073211e-06, "loss": 0.5122, "step": 13484 }, { "epoch": 1.2102712526081982, "grad_norm": 0.5322026499661632, "learning_rate": 7.448121636979621e-06, "loss": 0.3443, "step": 13485 }, { "epoch": 1.210360997060869, "grad_norm": 0.4990487725747576, "learning_rate": 7.447666339939421e-06, "loss": 0.4722, "step": 13486 }, { "epoch": 1.2104507415135402, "grad_norm": 0.48913070801683284, "learning_rate": 7.447211016205467e-06, "loss": 0.4031, "step": 13487 }, { "epoch": 1.2105404859662112, "grad_norm": 0.5341220279630932, "learning_rate": 7.446755665782722e-06, "loss": 0.3852, "step": 13488 }, { "epoch": 1.2106302304188823, "grad_norm": 0.4860051625497648, "learning_rate": 7.446300288676154e-06, "loss": 0.415, "step": 13489 }, { "epoch": 1.2107199748715534, "grad_norm": 0.5186013024490469, "learning_rate": 7.445844884890729e-06, "loss": 0.4432, "step": 13490 }, { "epoch": 1.2108097193242242, "grad_norm": 0.5510322441056337, "learning_rate": 7.445389454431413e-06, "loss": 0.4158, "step": 13491 }, { "epoch": 1.2108994637768953, "grad_norm": 0.5212770482349448, "learning_rate": 7.4449339973031735e-06, "loss": 0.4059, "step": 13492 }, { "epoch": 1.2109892082295663, "grad_norm": 0.49688065155465416, "learning_rate": 7.444478513510977e-06, "loss": 0.4152, "step": 13493 }, { "epoch": 1.2110789526822374, "grad_norm": 0.49630452637290673, "learning_rate": 7.44402300305979e-06, "loss": 0.3828, "step": 13494 }, { "epoch": 1.2111686971349083, "grad_norm": 0.45004099593856645, "learning_rate": 7.443567465954582e-06, "loss": 0.3373, "step": 13495 }, { "epoch": 1.2112584415875793, "grad_norm": 0.45824827730320045, "learning_rate": 7.443111902200322e-06, "loss": 0.3528, "step": 13496 }, { "epoch": 1.2113481860402504, "grad_norm": 0.45157898398212437, "learning_rate": 7.4426563118019745e-06, "loss": 0.4295, "step": 13497 }, { "epoch": 1.2114379304929215, "grad_norm": 0.5390979492311921, "learning_rate": 7.442200694764511e-06, "loss": 0.4017, "step": 13498 }, { "epoch": 1.2115276749455925, "grad_norm": 0.49997108127536305, "learning_rate": 7.441745051092899e-06, "loss": 0.395, "step": 13499 }, { "epoch": 1.2116174193982634, "grad_norm": 0.5066538753835299, "learning_rate": 7.441289380792109e-06, "loss": 0.4377, "step": 13500 }, { "epoch": 1.2117071638509345, "grad_norm": 0.5296963752570968, "learning_rate": 7.440833683867109e-06, "loss": 0.3589, "step": 13501 }, { "epoch": 1.2117969083036055, "grad_norm": 0.4673960981120041, "learning_rate": 7.440377960322869e-06, "loss": 0.3788, "step": 13502 }, { "epoch": 1.2118866527562764, "grad_norm": 0.5084436103057725, "learning_rate": 7.439922210164361e-06, "loss": 0.4677, "step": 13503 }, { "epoch": 1.2119763972089475, "grad_norm": 0.5419453737041363, "learning_rate": 7.439466433396551e-06, "loss": 0.3795, "step": 13504 }, { "epoch": 1.2120661416616185, "grad_norm": 0.46958740838723906, "learning_rate": 7.439010630024416e-06, "loss": 0.4051, "step": 13505 }, { "epoch": 1.2121558861142896, "grad_norm": 0.5258417214040286, "learning_rate": 7.438554800052921e-06, "loss": 0.4385, "step": 13506 }, { "epoch": 1.2122456305669607, "grad_norm": 0.5044554210769712, "learning_rate": 7.438098943487039e-06, "loss": 0.3427, "step": 13507 }, { "epoch": 1.2123353750196315, "grad_norm": 0.47754568323274116, "learning_rate": 7.437643060331743e-06, "loss": 0.4096, "step": 13508 }, { "epoch": 1.2124251194723026, "grad_norm": 0.5032522444214624, "learning_rate": 7.437187150592002e-06, "loss": 0.4243, "step": 13509 }, { "epoch": 1.2125148639249737, "grad_norm": 0.4593815105521663, "learning_rate": 7.436731214272792e-06, "loss": 0.3631, "step": 13510 }, { "epoch": 1.2126046083776447, "grad_norm": 0.5025067794147843, "learning_rate": 7.436275251379082e-06, "loss": 0.4175, "step": 13511 }, { "epoch": 1.2126943528303156, "grad_norm": 0.47432378176038614, "learning_rate": 7.435819261915846e-06, "loss": 0.365, "step": 13512 }, { "epoch": 1.2127840972829866, "grad_norm": 0.48055270992546734, "learning_rate": 7.435363245888058e-06, "loss": 0.355, "step": 13513 }, { "epoch": 1.2128738417356577, "grad_norm": 0.49351553352450545, "learning_rate": 7.434907203300687e-06, "loss": 0.4205, "step": 13514 }, { "epoch": 1.2129635861883288, "grad_norm": 0.5202227467020475, "learning_rate": 7.4344511341587125e-06, "loss": 0.4364, "step": 13515 }, { "epoch": 1.2130533306409998, "grad_norm": 0.4935649501984116, "learning_rate": 7.4339950384671026e-06, "loss": 0.4267, "step": 13516 }, { "epoch": 1.2131430750936707, "grad_norm": 0.5003132244013105, "learning_rate": 7.433538916230836e-06, "loss": 0.3686, "step": 13517 }, { "epoch": 1.2132328195463418, "grad_norm": 0.475157685610441, "learning_rate": 7.433082767454884e-06, "loss": 0.3746, "step": 13518 }, { "epoch": 1.2133225639990128, "grad_norm": 0.5022639153424541, "learning_rate": 7.432626592144222e-06, "loss": 0.3538, "step": 13519 }, { "epoch": 1.213412308451684, "grad_norm": 0.4657157107788004, "learning_rate": 7.432170390303824e-06, "loss": 0.406, "step": 13520 }, { "epoch": 1.2135020529043548, "grad_norm": 0.44209020976911984, "learning_rate": 7.431714161938669e-06, "loss": 0.3763, "step": 13521 }, { "epoch": 1.2135917973570258, "grad_norm": 0.48707598003596697, "learning_rate": 7.431257907053729e-06, "loss": 0.4166, "step": 13522 }, { "epoch": 1.213681541809697, "grad_norm": 0.5130506600362279, "learning_rate": 7.430801625653982e-06, "loss": 0.5015, "step": 13523 }, { "epoch": 1.213771286262368, "grad_norm": 0.54334186083156, "learning_rate": 7.4303453177444e-06, "loss": 0.4366, "step": 13524 }, { "epoch": 1.213861030715039, "grad_norm": 0.5502064176353324, "learning_rate": 7.429888983329966e-06, "loss": 0.4827, "step": 13525 }, { "epoch": 1.2139507751677099, "grad_norm": 0.47359040764867727, "learning_rate": 7.429432622415651e-06, "loss": 0.3758, "step": 13526 }, { "epoch": 1.214040519620381, "grad_norm": 0.4823519500951973, "learning_rate": 7.4289762350064356e-06, "loss": 0.4012, "step": 13527 }, { "epoch": 1.214130264073052, "grad_norm": 0.4845011278275252, "learning_rate": 7.428519821107293e-06, "loss": 0.3488, "step": 13528 }, { "epoch": 1.214220008525723, "grad_norm": 0.48439853845110836, "learning_rate": 7.428063380723205e-06, "loss": 0.4036, "step": 13529 }, { "epoch": 1.214309752978394, "grad_norm": 0.47626682848866514, "learning_rate": 7.427606913859148e-06, "loss": 0.3588, "step": 13530 }, { "epoch": 1.214399497431065, "grad_norm": 0.4908561879686466, "learning_rate": 7.427150420520099e-06, "loss": 0.3385, "step": 13531 }, { "epoch": 1.214489241883736, "grad_norm": 0.4621305123109797, "learning_rate": 7.4266939007110374e-06, "loss": 0.4333, "step": 13532 }, { "epoch": 1.2145789863364072, "grad_norm": 0.49025530206737644, "learning_rate": 7.426237354436942e-06, "loss": 0.4323, "step": 13533 }, { "epoch": 1.2146687307890782, "grad_norm": 0.5513211376588537, "learning_rate": 7.4257807817027915e-06, "loss": 0.4039, "step": 13534 }, { "epoch": 1.214758475241749, "grad_norm": 0.4873119131709119, "learning_rate": 7.425324182513565e-06, "loss": 0.4202, "step": 13535 }, { "epoch": 1.2148482196944201, "grad_norm": 0.49838305022613205, "learning_rate": 7.4248675568742425e-06, "loss": 0.365, "step": 13536 }, { "epoch": 1.2149379641470912, "grad_norm": 0.48327328385033125, "learning_rate": 7.424410904789805e-06, "loss": 0.3493, "step": 13537 }, { "epoch": 1.215027708599762, "grad_norm": 0.4730439615520419, "learning_rate": 7.42395422626523e-06, "loss": 0.3596, "step": 13538 }, { "epoch": 1.2151174530524331, "grad_norm": 0.4781101024387859, "learning_rate": 7.4234975213055e-06, "loss": 0.3854, "step": 13539 }, { "epoch": 1.2152071975051042, "grad_norm": 0.5063509266578351, "learning_rate": 7.423040789915594e-06, "loss": 0.3948, "step": 13540 }, { "epoch": 1.2152969419577753, "grad_norm": 0.47054994037300274, "learning_rate": 7.422584032100496e-06, "loss": 0.4547, "step": 13541 }, { "epoch": 1.2153866864104463, "grad_norm": 0.482304723778796, "learning_rate": 7.422127247865184e-06, "loss": 0.3326, "step": 13542 }, { "epoch": 1.2154764308631172, "grad_norm": 0.4546126537946347, "learning_rate": 7.4216704372146405e-06, "loss": 0.4281, "step": 13543 }, { "epoch": 1.2155661753157883, "grad_norm": 0.5295660141405483, "learning_rate": 7.4212136001538495e-06, "loss": 0.3623, "step": 13544 }, { "epoch": 1.2156559197684593, "grad_norm": 0.4763087440117727, "learning_rate": 7.420756736687791e-06, "loss": 0.3913, "step": 13545 }, { "epoch": 1.2157456642211304, "grad_norm": 0.48306863791787674, "learning_rate": 7.420299846821447e-06, "loss": 0.4132, "step": 13546 }, { "epoch": 1.2158354086738012, "grad_norm": 0.5209981337286401, "learning_rate": 7.4198429305598026e-06, "loss": 0.4445, "step": 13547 }, { "epoch": 1.2159251531264723, "grad_norm": 0.5278679422156246, "learning_rate": 7.419385987907839e-06, "loss": 0.3595, "step": 13548 }, { "epoch": 1.2160148975791434, "grad_norm": 0.4921642355381797, "learning_rate": 7.418929018870541e-06, "loss": 0.4516, "step": 13549 }, { "epoch": 1.2161046420318145, "grad_norm": 0.48763632366154586, "learning_rate": 7.41847202345289e-06, "loss": 0.3916, "step": 13550 }, { "epoch": 1.2161943864844855, "grad_norm": 0.5078424514914324, "learning_rate": 7.4180150016598705e-06, "loss": 0.3972, "step": 13551 }, { "epoch": 1.2162841309371564, "grad_norm": 0.4878117363734619, "learning_rate": 7.417557953496468e-06, "loss": 0.3558, "step": 13552 }, { "epoch": 1.2163738753898274, "grad_norm": 0.46550443091619015, "learning_rate": 7.417100878967668e-06, "loss": 0.373, "step": 13553 }, { "epoch": 1.2164636198424985, "grad_norm": 0.4960421073071348, "learning_rate": 7.416643778078452e-06, "loss": 0.4376, "step": 13554 }, { "epoch": 1.2165533642951696, "grad_norm": 0.4991503540657626, "learning_rate": 7.416186650833806e-06, "loss": 0.3856, "step": 13555 }, { "epoch": 1.2166431087478404, "grad_norm": 0.4776134575247247, "learning_rate": 7.415729497238717e-06, "loss": 0.4409, "step": 13556 }, { "epoch": 1.2167328532005115, "grad_norm": 0.5026309793623355, "learning_rate": 7.41527231729817e-06, "loss": 0.4519, "step": 13557 }, { "epoch": 1.2168225976531826, "grad_norm": 0.5320897015767083, "learning_rate": 7.41481511101715e-06, "loss": 0.3955, "step": 13558 }, { "epoch": 1.2169123421058536, "grad_norm": 0.512368805150707, "learning_rate": 7.414357878400643e-06, "loss": 0.432, "step": 13559 }, { "epoch": 1.2170020865585247, "grad_norm": 0.45646607056994704, "learning_rate": 7.413900619453637e-06, "loss": 0.3389, "step": 13560 }, { "epoch": 1.2170918310111956, "grad_norm": 0.49007902131782977, "learning_rate": 7.4134433341811176e-06, "loss": 0.3786, "step": 13561 }, { "epoch": 1.2171815754638666, "grad_norm": 0.47841270414315873, "learning_rate": 7.4129860225880724e-06, "loss": 0.3818, "step": 13562 }, { "epoch": 1.2172713199165377, "grad_norm": 0.49721619127173655, "learning_rate": 7.412528684679487e-06, "loss": 0.3858, "step": 13563 }, { "epoch": 1.2173610643692088, "grad_norm": 0.48093082553570693, "learning_rate": 7.412071320460352e-06, "loss": 0.4249, "step": 13564 }, { "epoch": 1.2174508088218796, "grad_norm": 0.565819748280492, "learning_rate": 7.411613929935655e-06, "loss": 0.4367, "step": 13565 }, { "epoch": 1.2175405532745507, "grad_norm": 0.5106770657217733, "learning_rate": 7.4111565131103815e-06, "loss": 0.4185, "step": 13566 }, { "epoch": 1.2176302977272218, "grad_norm": 0.5364346902632092, "learning_rate": 7.410699069989521e-06, "loss": 0.4335, "step": 13567 }, { "epoch": 1.2177200421798928, "grad_norm": 0.46758240436445103, "learning_rate": 7.410241600578065e-06, "loss": 0.342, "step": 13568 }, { "epoch": 1.217809786632564, "grad_norm": 0.5075841860696888, "learning_rate": 7.4097841048809995e-06, "loss": 0.4106, "step": 13569 }, { "epoch": 1.2178995310852347, "grad_norm": 0.5319216243699794, "learning_rate": 7.4093265829033145e-06, "loss": 0.4102, "step": 13570 }, { "epoch": 1.2179892755379058, "grad_norm": 0.469887124263819, "learning_rate": 7.408869034649999e-06, "loss": 0.3882, "step": 13571 }, { "epoch": 1.2180790199905769, "grad_norm": 0.482681314563661, "learning_rate": 7.408411460126046e-06, "loss": 0.3805, "step": 13572 }, { "epoch": 1.2181687644432477, "grad_norm": 0.5299702754796777, "learning_rate": 7.407953859336443e-06, "loss": 0.4218, "step": 13573 }, { "epoch": 1.2182585088959188, "grad_norm": 0.4968870749117855, "learning_rate": 7.407496232286181e-06, "loss": 0.3878, "step": 13574 }, { "epoch": 1.2183482533485899, "grad_norm": 0.5004646425697576, "learning_rate": 7.4070385789802504e-06, "loss": 0.4158, "step": 13575 }, { "epoch": 1.218437997801261, "grad_norm": 0.5361711954182893, "learning_rate": 7.406580899423643e-06, "loss": 0.458, "step": 13576 }, { "epoch": 1.218527742253932, "grad_norm": 0.5472992779736504, "learning_rate": 7.406123193621351e-06, "loss": 0.4391, "step": 13577 }, { "epoch": 1.2186174867066029, "grad_norm": 0.5397693071202152, "learning_rate": 7.405665461578364e-06, "loss": 0.4244, "step": 13578 }, { "epoch": 1.218707231159274, "grad_norm": 0.5361166106578049, "learning_rate": 7.405207703299675e-06, "loss": 0.4031, "step": 13579 }, { "epoch": 1.218796975611945, "grad_norm": 0.4835287933884578, "learning_rate": 7.404749918790276e-06, "loss": 0.4127, "step": 13580 }, { "epoch": 1.218886720064616, "grad_norm": 0.4923805480726994, "learning_rate": 7.40429210805516e-06, "loss": 0.405, "step": 13581 }, { "epoch": 1.218976464517287, "grad_norm": 0.5224369565140793, "learning_rate": 7.403834271099319e-06, "loss": 0.4535, "step": 13582 }, { "epoch": 1.219066208969958, "grad_norm": 0.47730459199696557, "learning_rate": 7.403376407927747e-06, "loss": 0.3839, "step": 13583 }, { "epoch": 1.219155953422629, "grad_norm": 0.4973195434405088, "learning_rate": 7.402918518545437e-06, "loss": 0.3621, "step": 13584 }, { "epoch": 1.2192456978753001, "grad_norm": 0.4692619866164675, "learning_rate": 7.402460602957382e-06, "loss": 0.4463, "step": 13585 }, { "epoch": 1.2193354423279712, "grad_norm": 0.5344588146774214, "learning_rate": 7.402002661168577e-06, "loss": 0.423, "step": 13586 }, { "epoch": 1.219425186780642, "grad_norm": 0.49288300655672457, "learning_rate": 7.401544693184015e-06, "loss": 0.4287, "step": 13587 }, { "epoch": 1.2195149312333131, "grad_norm": 0.4898630341876437, "learning_rate": 7.401086699008692e-06, "loss": 0.3884, "step": 13588 }, { "epoch": 1.2196046756859842, "grad_norm": 0.5225303200676724, "learning_rate": 7.4006286786476016e-06, "loss": 0.4049, "step": 13589 }, { "epoch": 1.2196944201386553, "grad_norm": 0.5343121282756833, "learning_rate": 7.400170632105739e-06, "loss": 0.4695, "step": 13590 }, { "epoch": 1.219784164591326, "grad_norm": 0.5328433085161258, "learning_rate": 7.3997125593880995e-06, "loss": 0.407, "step": 13591 }, { "epoch": 1.2198739090439972, "grad_norm": 0.48907618088932897, "learning_rate": 7.39925446049968e-06, "loss": 0.3716, "step": 13592 }, { "epoch": 1.2199636534966682, "grad_norm": 0.48093147795641167, "learning_rate": 7.398796335445475e-06, "loss": 0.3967, "step": 13593 }, { "epoch": 1.2200533979493393, "grad_norm": 0.533964491457338, "learning_rate": 7.3983381842304815e-06, "loss": 0.4105, "step": 13594 }, { "epoch": 1.2201431424020104, "grad_norm": 0.5109846085126164, "learning_rate": 7.397880006859695e-06, "loss": 0.4533, "step": 13595 }, { "epoch": 1.2202328868546812, "grad_norm": 0.4839068525534408, "learning_rate": 7.397421803338114e-06, "loss": 0.4026, "step": 13596 }, { "epoch": 1.2203226313073523, "grad_norm": 0.5215290731728315, "learning_rate": 7.396963573670734e-06, "loss": 0.4005, "step": 13597 }, { "epoch": 1.2204123757600234, "grad_norm": 0.4815218346323318, "learning_rate": 7.396505317862552e-06, "loss": 0.3818, "step": 13598 }, { "epoch": 1.2205021202126944, "grad_norm": 0.533073835701911, "learning_rate": 7.396047035918568e-06, "loss": 0.3999, "step": 13599 }, { "epoch": 1.2205918646653653, "grad_norm": 0.4767522693613101, "learning_rate": 7.395588727843778e-06, "loss": 0.3391, "step": 13600 }, { "epoch": 1.2206816091180364, "grad_norm": 0.45641237203672935, "learning_rate": 7.395130393643182e-06, "loss": 0.3733, "step": 13601 }, { "epoch": 1.2207713535707074, "grad_norm": 0.49931990227309875, "learning_rate": 7.3946720333217745e-06, "loss": 0.3709, "step": 13602 }, { "epoch": 1.2208610980233785, "grad_norm": 0.47146068640270006, "learning_rate": 7.394213646884561e-06, "loss": 0.4214, "step": 13603 }, { "epoch": 1.2209508424760496, "grad_norm": 0.5099090390890022, "learning_rate": 7.393755234336532e-06, "loss": 0.4034, "step": 13604 }, { "epoch": 1.2210405869287204, "grad_norm": 0.5168121032136437, "learning_rate": 7.393296795682695e-06, "loss": 0.3596, "step": 13605 }, { "epoch": 1.2211303313813915, "grad_norm": 0.4945479051231173, "learning_rate": 7.3928383309280445e-06, "loss": 0.4127, "step": 13606 }, { "epoch": 1.2212200758340626, "grad_norm": 0.501588981995923, "learning_rate": 7.392379840077583e-06, "loss": 0.3715, "step": 13607 }, { "epoch": 1.2213098202867334, "grad_norm": 0.4650439546936014, "learning_rate": 7.391921323136309e-06, "loss": 0.42, "step": 13608 }, { "epoch": 1.2213995647394045, "grad_norm": 0.4948939404703503, "learning_rate": 7.3914627801092244e-06, "loss": 0.3554, "step": 13609 }, { "epoch": 1.2214893091920755, "grad_norm": 0.43790484176660105, "learning_rate": 7.391004211001328e-06, "loss": 0.3244, "step": 13610 }, { "epoch": 1.2215790536447466, "grad_norm": 0.47471217221461914, "learning_rate": 7.390545615817624e-06, "loss": 0.4621, "step": 13611 }, { "epoch": 1.2216687980974177, "grad_norm": 0.5244596174720237, "learning_rate": 7.390086994563109e-06, "loss": 0.4059, "step": 13612 }, { "epoch": 1.2217585425500885, "grad_norm": 0.49807598147696897, "learning_rate": 7.38962834724279e-06, "loss": 0.4172, "step": 13613 }, { "epoch": 1.2218482870027596, "grad_norm": 0.5043874990370831, "learning_rate": 7.389169673861666e-06, "loss": 0.42, "step": 13614 }, { "epoch": 1.2219380314554307, "grad_norm": 0.49071778741229, "learning_rate": 7.38871097442474e-06, "loss": 0.3755, "step": 13615 }, { "epoch": 1.2220277759081017, "grad_norm": 0.5096193949173303, "learning_rate": 7.388252248937012e-06, "loss": 0.4589, "step": 13616 }, { "epoch": 1.2221175203607726, "grad_norm": 0.49385274682016606, "learning_rate": 7.3877934974034895e-06, "loss": 0.3668, "step": 13617 }, { "epoch": 1.2222072648134437, "grad_norm": 0.45210249721239115, "learning_rate": 7.387334719829171e-06, "loss": 0.4092, "step": 13618 }, { "epoch": 1.2222970092661147, "grad_norm": 0.535540221395679, "learning_rate": 7.386875916219064e-06, "loss": 0.3627, "step": 13619 }, { "epoch": 1.2223867537187858, "grad_norm": 0.4505505628329649, "learning_rate": 7.386417086578168e-06, "loss": 0.3477, "step": 13620 }, { "epoch": 1.2224764981714569, "grad_norm": 0.5064014908543507, "learning_rate": 7.38595823091149e-06, "loss": 0.4391, "step": 13621 }, { "epoch": 1.2225662426241277, "grad_norm": 0.5448424326960315, "learning_rate": 7.3854993492240325e-06, "loss": 0.3764, "step": 13622 }, { "epoch": 1.2226559870767988, "grad_norm": 0.4436168151438953, "learning_rate": 7.385040441520801e-06, "loss": 0.3793, "step": 13623 }, { "epoch": 1.2227457315294699, "grad_norm": 0.48477147789950426, "learning_rate": 7.384581507806798e-06, "loss": 0.4206, "step": 13624 }, { "epoch": 1.222835475982141, "grad_norm": 0.5288812262554898, "learning_rate": 7.384122548087031e-06, "loss": 0.4581, "step": 13625 }, { "epoch": 1.2229252204348118, "grad_norm": 0.513830710673975, "learning_rate": 7.383663562366505e-06, "loss": 0.3585, "step": 13626 }, { "epoch": 1.2230149648874828, "grad_norm": 0.48751772822700506, "learning_rate": 7.383204550650227e-06, "loss": 0.3949, "step": 13627 }, { "epoch": 1.223104709340154, "grad_norm": 0.4467370192375966, "learning_rate": 7.382745512943198e-06, "loss": 0.376, "step": 13628 }, { "epoch": 1.223194453792825, "grad_norm": 0.5071642165117247, "learning_rate": 7.3822864492504286e-06, "loss": 0.3686, "step": 13629 }, { "epoch": 1.223284198245496, "grad_norm": 0.5310154267628349, "learning_rate": 7.381827359576924e-06, "loss": 0.4769, "step": 13630 }, { "epoch": 1.223373942698167, "grad_norm": 0.5152707061513879, "learning_rate": 7.38136824392769e-06, "loss": 0.3853, "step": 13631 }, { "epoch": 1.223463687150838, "grad_norm": 0.5158439930092606, "learning_rate": 7.380909102307735e-06, "loss": 0.4364, "step": 13632 }, { "epoch": 1.223553431603509, "grad_norm": 0.4828119368273094, "learning_rate": 7.380449934722067e-06, "loss": 0.3491, "step": 13633 }, { "epoch": 1.2236431760561801, "grad_norm": 0.48280481011582427, "learning_rate": 7.379990741175691e-06, "loss": 0.3762, "step": 13634 }, { "epoch": 1.223732920508851, "grad_norm": 0.4935852401867866, "learning_rate": 7.379531521673616e-06, "loss": 0.3849, "step": 13635 }, { "epoch": 1.223822664961522, "grad_norm": 0.4700636552981745, "learning_rate": 7.379072276220851e-06, "loss": 0.3574, "step": 13636 }, { "epoch": 1.223912409414193, "grad_norm": 0.5008043665315457, "learning_rate": 7.378613004822404e-06, "loss": 0.4231, "step": 13637 }, { "epoch": 1.2240021538668642, "grad_norm": 0.5096577510990877, "learning_rate": 7.378153707483283e-06, "loss": 0.3755, "step": 13638 }, { "epoch": 1.2240918983195352, "grad_norm": 0.5217185002763024, "learning_rate": 7.377694384208498e-06, "loss": 0.4092, "step": 13639 }, { "epoch": 1.224181642772206, "grad_norm": 0.5233322544487813, "learning_rate": 7.377235035003057e-06, "loss": 0.4059, "step": 13640 }, { "epoch": 1.2242713872248772, "grad_norm": 0.48093172246171045, "learning_rate": 7.376775659871973e-06, "loss": 0.3721, "step": 13641 }, { "epoch": 1.2243611316775482, "grad_norm": 0.5280091151401538, "learning_rate": 7.376316258820251e-06, "loss": 0.4334, "step": 13642 }, { "epoch": 1.224450876130219, "grad_norm": 0.5117933927183793, "learning_rate": 7.3758568318529035e-06, "loss": 0.4184, "step": 13643 }, { "epoch": 1.2245406205828901, "grad_norm": 0.5368432345570163, "learning_rate": 7.375397378974942e-06, "loss": 0.4959, "step": 13644 }, { "epoch": 1.2246303650355612, "grad_norm": 0.4934504945957253, "learning_rate": 7.3749379001913745e-06, "loss": 0.3966, "step": 13645 }, { "epoch": 1.2247201094882323, "grad_norm": 0.5191086523037842, "learning_rate": 7.374478395507215e-06, "loss": 0.4845, "step": 13646 }, { "epoch": 1.2248098539409034, "grad_norm": 0.5081839768343944, "learning_rate": 7.374018864927472e-06, "loss": 0.4399, "step": 13647 }, { "epoch": 1.2248995983935742, "grad_norm": 0.562746017690285, "learning_rate": 7.373559308457159e-06, "loss": 0.3728, "step": 13648 }, { "epoch": 1.2249893428462453, "grad_norm": 0.46181399159080344, "learning_rate": 7.373099726101287e-06, "loss": 0.3949, "step": 13649 }, { "epoch": 1.2250790872989163, "grad_norm": 0.46112989501923257, "learning_rate": 7.372640117864869e-06, "loss": 0.3778, "step": 13650 }, { "epoch": 1.2251688317515874, "grad_norm": 0.49331945249227716, "learning_rate": 7.372180483752916e-06, "loss": 0.3881, "step": 13651 }, { "epoch": 1.2252585762042583, "grad_norm": 0.5042827814954443, "learning_rate": 7.371720823770441e-06, "loss": 0.3409, "step": 13652 }, { "epoch": 1.2253483206569293, "grad_norm": 0.4688871913542325, "learning_rate": 7.371261137922456e-06, "loss": 0.3709, "step": 13653 }, { "epoch": 1.2254380651096004, "grad_norm": 0.4713689955257537, "learning_rate": 7.370801426213978e-06, "loss": 0.4015, "step": 13654 }, { "epoch": 1.2255278095622715, "grad_norm": 0.4815198175532442, "learning_rate": 7.3703416886500165e-06, "loss": 0.4103, "step": 13655 }, { "epoch": 1.2256175540149425, "grad_norm": 0.49841090532197374, "learning_rate": 7.369881925235588e-06, "loss": 0.3789, "step": 13656 }, { "epoch": 1.2257072984676134, "grad_norm": 0.49785857047750554, "learning_rate": 7.3694221359757045e-06, "loss": 0.4262, "step": 13657 }, { "epoch": 1.2257970429202845, "grad_norm": 0.4933243753292471, "learning_rate": 7.368962320875382e-06, "loss": 0.3904, "step": 13658 }, { "epoch": 1.2258867873729555, "grad_norm": 0.47531960527306566, "learning_rate": 7.368502479939634e-06, "loss": 0.4522, "step": 13659 }, { "epoch": 1.2259765318256266, "grad_norm": 0.5173199338546198, "learning_rate": 7.368042613173475e-06, "loss": 0.3814, "step": 13660 }, { "epoch": 1.2260662762782975, "grad_norm": 0.50282425263766, "learning_rate": 7.367582720581923e-06, "loss": 0.4081, "step": 13661 }, { "epoch": 1.2261560207309685, "grad_norm": 0.49671803624856936, "learning_rate": 7.367122802169991e-06, "loss": 0.3682, "step": 13662 }, { "epoch": 1.2262457651836396, "grad_norm": 0.4902983030070168, "learning_rate": 7.366662857942695e-06, "loss": 0.409, "step": 13663 }, { "epoch": 1.2263355096363107, "grad_norm": 0.4956136399770478, "learning_rate": 7.366202887905052e-06, "loss": 0.454, "step": 13664 }, { "epoch": 1.2264252540889817, "grad_norm": 0.5129186949699369, "learning_rate": 7.3657428920620754e-06, "loss": 0.4786, "step": 13665 }, { "epoch": 1.2265149985416526, "grad_norm": 0.5428596556060096, "learning_rate": 7.365282870418787e-06, "loss": 0.403, "step": 13666 }, { "epoch": 1.2266047429943236, "grad_norm": 0.5001732800405085, "learning_rate": 7.3648228229802e-06, "loss": 0.4047, "step": 13667 }, { "epoch": 1.2266944874469947, "grad_norm": 0.4969283483137796, "learning_rate": 7.364362749751332e-06, "loss": 0.3913, "step": 13668 }, { "epoch": 1.2267842318996658, "grad_norm": 0.4752948566535662, "learning_rate": 7.363902650737202e-06, "loss": 0.4097, "step": 13669 }, { "epoch": 1.2268739763523366, "grad_norm": 0.4906575613965062, "learning_rate": 7.363442525942827e-06, "loss": 0.3873, "step": 13670 }, { "epoch": 1.2269637208050077, "grad_norm": 0.47726543248098735, "learning_rate": 7.362982375373224e-06, "loss": 0.3976, "step": 13671 }, { "epoch": 1.2270534652576788, "grad_norm": 0.4960709488582479, "learning_rate": 7.362522199033412e-06, "loss": 0.3581, "step": 13672 }, { "epoch": 1.2271432097103498, "grad_norm": 0.4800060204201796, "learning_rate": 7.362061996928409e-06, "loss": 0.3681, "step": 13673 }, { "epoch": 1.227232954163021, "grad_norm": 0.49827455989474273, "learning_rate": 7.361601769063236e-06, "loss": 0.4118, "step": 13674 }, { "epoch": 1.2273226986156918, "grad_norm": 0.5004676600458003, "learning_rate": 7.361141515442909e-06, "loss": 0.4638, "step": 13675 }, { "epoch": 1.2274124430683628, "grad_norm": 0.5230594814612517, "learning_rate": 7.360681236072451e-06, "loss": 0.3942, "step": 13676 }, { "epoch": 1.227502187521034, "grad_norm": 0.4755737555910892, "learning_rate": 7.3602209309568785e-06, "loss": 0.3786, "step": 13677 }, { "epoch": 1.2275919319737048, "grad_norm": 0.5121276587639108, "learning_rate": 7.359760600101213e-06, "loss": 0.3701, "step": 13678 }, { "epoch": 1.2276816764263758, "grad_norm": 0.4654102385310731, "learning_rate": 7.359300243510475e-06, "loss": 0.4022, "step": 13679 }, { "epoch": 1.227771420879047, "grad_norm": 0.5291475956573368, "learning_rate": 7.358839861189684e-06, "loss": 0.4857, "step": 13680 }, { "epoch": 1.227861165331718, "grad_norm": 0.5132223349395149, "learning_rate": 7.358379453143861e-06, "loss": 0.3803, "step": 13681 }, { "epoch": 1.227950909784389, "grad_norm": 0.49936088228654474, "learning_rate": 7.357919019378029e-06, "loss": 0.4567, "step": 13682 }, { "epoch": 1.2280406542370599, "grad_norm": 0.5236931212968172, "learning_rate": 7.357458559897208e-06, "loss": 0.4955, "step": 13683 }, { "epoch": 1.228130398689731, "grad_norm": 0.546749404009764, "learning_rate": 7.356998074706418e-06, "loss": 0.4105, "step": 13684 }, { "epoch": 1.228220143142402, "grad_norm": 0.5257570133389204, "learning_rate": 7.356537563810685e-06, "loss": 0.4035, "step": 13685 }, { "epoch": 1.228309887595073, "grad_norm": 0.48995312729141705, "learning_rate": 7.356077027215027e-06, "loss": 0.4387, "step": 13686 }, { "epoch": 1.228399632047744, "grad_norm": 0.4890151458846111, "learning_rate": 7.355616464924469e-06, "loss": 0.3511, "step": 13687 }, { "epoch": 1.228489376500415, "grad_norm": 0.4466219095736085, "learning_rate": 7.3551558769440325e-06, "loss": 0.3849, "step": 13688 }, { "epoch": 1.228579120953086, "grad_norm": 0.4483475602429749, "learning_rate": 7.354695263278742e-06, "loss": 0.3778, "step": 13689 }, { "epoch": 1.2286688654057571, "grad_norm": 0.5387002224487812, "learning_rate": 7.354234623933619e-06, "loss": 0.4017, "step": 13690 }, { "epoch": 1.2287586098584282, "grad_norm": 0.4812815767628893, "learning_rate": 7.353773958913689e-06, "loss": 0.4293, "step": 13691 }, { "epoch": 1.228848354311099, "grad_norm": 0.49597070223509, "learning_rate": 7.353313268223976e-06, "loss": 0.3273, "step": 13692 }, { "epoch": 1.2289380987637701, "grad_norm": 0.4427412003238477, "learning_rate": 7.352852551869503e-06, "loss": 0.3894, "step": 13693 }, { "epoch": 1.2290278432164412, "grad_norm": 0.4574038620407432, "learning_rate": 7.3523918098552936e-06, "loss": 0.344, "step": 13694 }, { "epoch": 1.2291175876691123, "grad_norm": 0.5071911424766878, "learning_rate": 7.351931042186376e-06, "loss": 0.478, "step": 13695 }, { "epoch": 1.2292073321217831, "grad_norm": 0.5047442905192487, "learning_rate": 7.35147024886777e-06, "loss": 0.3703, "step": 13696 }, { "epoch": 1.2292970765744542, "grad_norm": 0.452466597450073, "learning_rate": 7.3510094299045065e-06, "loss": 0.3903, "step": 13697 }, { "epoch": 1.2293868210271253, "grad_norm": 0.4655007330892372, "learning_rate": 7.350548585301608e-06, "loss": 0.3941, "step": 13698 }, { "epoch": 1.2294765654797963, "grad_norm": 0.507923565235265, "learning_rate": 7.3500877150641015e-06, "loss": 0.44, "step": 13699 }, { "epoch": 1.2295663099324674, "grad_norm": 0.508381387690149, "learning_rate": 7.349626819197011e-06, "loss": 0.4385, "step": 13700 }, { "epoch": 1.2296560543851383, "grad_norm": 0.5286068100876727, "learning_rate": 7.349165897705366e-06, "loss": 0.5377, "step": 13701 }, { "epoch": 1.2297457988378093, "grad_norm": 0.6142173462438205, "learning_rate": 7.348704950594191e-06, "loss": 0.46, "step": 13702 }, { "epoch": 1.2298355432904804, "grad_norm": 0.5139902418835699, "learning_rate": 7.3482439778685145e-06, "loss": 0.4116, "step": 13703 }, { "epoch": 1.2299252877431515, "grad_norm": 0.50031167492165, "learning_rate": 7.347782979533362e-06, "loss": 0.3837, "step": 13704 }, { "epoch": 1.2300150321958223, "grad_norm": 0.4590003065121739, "learning_rate": 7.3473219555937625e-06, "loss": 0.4054, "step": 13705 }, { "epoch": 1.2301047766484934, "grad_norm": 0.5009926971904725, "learning_rate": 7.346860906054744e-06, "loss": 0.3649, "step": 13706 }, { "epoch": 1.2301945211011645, "grad_norm": 0.5052476406520314, "learning_rate": 7.346399830921334e-06, "loss": 0.4588, "step": 13707 }, { "epoch": 1.2302842655538355, "grad_norm": 0.5322480352871823, "learning_rate": 7.34593873019856e-06, "loss": 0.4466, "step": 13708 }, { "epoch": 1.2303740100065066, "grad_norm": 0.5390610190638431, "learning_rate": 7.345477603891452e-06, "loss": 0.4289, "step": 13709 }, { "epoch": 1.2304637544591774, "grad_norm": 0.5050145609059729, "learning_rate": 7.345016452005038e-06, "loss": 0.4119, "step": 13710 }, { "epoch": 1.2305534989118485, "grad_norm": 0.5101075447080341, "learning_rate": 7.344555274544349e-06, "loss": 0.4018, "step": 13711 }, { "epoch": 1.2306432433645196, "grad_norm": 0.5163131076854651, "learning_rate": 7.344094071514412e-06, "loss": 0.4146, "step": 13712 }, { "epoch": 1.2307329878171904, "grad_norm": 0.46439220829982786, "learning_rate": 7.34363284292026e-06, "loss": 0.3009, "step": 13713 }, { "epoch": 1.2308227322698615, "grad_norm": 0.45437770912326875, "learning_rate": 7.34317158876692e-06, "loss": 0.3489, "step": 13714 }, { "epoch": 1.2309124767225326, "grad_norm": 0.4945880051664296, "learning_rate": 7.342710309059423e-06, "loss": 0.3901, "step": 13715 }, { "epoch": 1.2310022211752036, "grad_norm": 0.47741061620101, "learning_rate": 7.342249003802801e-06, "loss": 0.352, "step": 13716 }, { "epoch": 1.2310919656278747, "grad_norm": 0.47638680013603424, "learning_rate": 7.341787673002083e-06, "loss": 0.3887, "step": 13717 }, { "epoch": 1.2311817100805456, "grad_norm": 0.5112426551261492, "learning_rate": 7.3413263166623015e-06, "loss": 0.4223, "step": 13718 }, { "epoch": 1.2312714545332166, "grad_norm": 0.4986674996975259, "learning_rate": 7.340864934788489e-06, "loss": 0.405, "step": 13719 }, { "epoch": 1.2313611989858877, "grad_norm": 0.4831370573059416, "learning_rate": 7.340403527385674e-06, "loss": 0.3648, "step": 13720 }, { "epoch": 1.2314509434385588, "grad_norm": 0.48618757966000226, "learning_rate": 7.339942094458891e-06, "loss": 0.3737, "step": 13721 }, { "epoch": 1.2315406878912296, "grad_norm": 0.49010825008176195, "learning_rate": 7.3394806360131725e-06, "loss": 0.3907, "step": 13722 }, { "epoch": 1.2316304323439007, "grad_norm": 0.513758118146116, "learning_rate": 7.339019152053551e-06, "loss": 0.4576, "step": 13723 }, { "epoch": 1.2317201767965718, "grad_norm": 0.5127155918118282, "learning_rate": 7.338557642585056e-06, "loss": 0.3915, "step": 13724 }, { "epoch": 1.2318099212492428, "grad_norm": 0.4679774911708828, "learning_rate": 7.338096107612725e-06, "loss": 0.4297, "step": 13725 }, { "epoch": 1.231899665701914, "grad_norm": 0.5085755166671052, "learning_rate": 7.337634547141589e-06, "loss": 0.4591, "step": 13726 }, { "epoch": 1.2319894101545847, "grad_norm": 0.5495301284989345, "learning_rate": 7.337172961176682e-06, "loss": 0.422, "step": 13727 }, { "epoch": 1.2320791546072558, "grad_norm": 0.4755609535005917, "learning_rate": 7.336711349723039e-06, "loss": 0.3576, "step": 13728 }, { "epoch": 1.2321688990599269, "grad_norm": 0.45027834800413513, "learning_rate": 7.336249712785693e-06, "loss": 0.3776, "step": 13729 }, { "epoch": 1.232258643512598, "grad_norm": 0.4592331932600128, "learning_rate": 7.33578805036968e-06, "loss": 0.3503, "step": 13730 }, { "epoch": 1.2323483879652688, "grad_norm": 0.47637171638768994, "learning_rate": 7.335326362480033e-06, "loss": 0.4445, "step": 13731 }, { "epoch": 1.2324381324179399, "grad_norm": 0.5279427382574228, "learning_rate": 7.334864649121789e-06, "loss": 0.3898, "step": 13732 }, { "epoch": 1.232527876870611, "grad_norm": 0.46812689551715264, "learning_rate": 7.33440291029998e-06, "loss": 0.3929, "step": 13733 }, { "epoch": 1.232617621323282, "grad_norm": 0.5439473120872522, "learning_rate": 7.333941146019647e-06, "loss": 0.4247, "step": 13734 }, { "epoch": 1.232707365775953, "grad_norm": 0.49621501159098413, "learning_rate": 7.3334793562858205e-06, "loss": 0.4006, "step": 13735 }, { "epoch": 1.232797110228624, "grad_norm": 0.5421637971734101, "learning_rate": 7.333017541103541e-06, "loss": 0.4616, "step": 13736 }, { "epoch": 1.232886854681295, "grad_norm": 0.5142531558488103, "learning_rate": 7.3325557004778415e-06, "loss": 0.4006, "step": 13737 }, { "epoch": 1.232976599133966, "grad_norm": 0.5566791318646924, "learning_rate": 7.33209383441376e-06, "loss": 0.3955, "step": 13738 }, { "epoch": 1.2330663435866371, "grad_norm": 0.5287286050024732, "learning_rate": 7.331631942916334e-06, "loss": 0.4682, "step": 13739 }, { "epoch": 1.233156088039308, "grad_norm": 0.5299264929288591, "learning_rate": 7.331170025990602e-06, "loss": 0.3996, "step": 13740 }, { "epoch": 1.233245832491979, "grad_norm": 0.48588548896819334, "learning_rate": 7.3307080836415976e-06, "loss": 0.3847, "step": 13741 }, { "epoch": 1.2333355769446501, "grad_norm": 0.49798807838668735, "learning_rate": 7.3302461158743635e-06, "loss": 0.4495, "step": 13742 }, { "epoch": 1.2334253213973212, "grad_norm": 0.4907443917044609, "learning_rate": 7.329784122693935e-06, "loss": 0.385, "step": 13743 }, { "epoch": 1.2335150658499923, "grad_norm": 0.495791608073861, "learning_rate": 7.32932210410535e-06, "loss": 0.4261, "step": 13744 }, { "epoch": 1.2336048103026631, "grad_norm": 0.49695066114197434, "learning_rate": 7.328860060113649e-06, "loss": 0.4034, "step": 13745 }, { "epoch": 1.2336945547553342, "grad_norm": 0.47781055337388345, "learning_rate": 7.32839799072387e-06, "loss": 0.4205, "step": 13746 }, { "epoch": 1.2337842992080053, "grad_norm": 0.48458307698420805, "learning_rate": 7.327935895941052e-06, "loss": 0.3735, "step": 13747 }, { "epoch": 1.233874043660676, "grad_norm": 0.5047029431610255, "learning_rate": 7.327473775770235e-06, "loss": 0.3882, "step": 13748 }, { "epoch": 1.2339637881133472, "grad_norm": 0.48803010697461785, "learning_rate": 7.327011630216459e-06, "loss": 0.4636, "step": 13749 }, { "epoch": 1.2340535325660182, "grad_norm": 0.5275731761461727, "learning_rate": 7.326549459284763e-06, "loss": 0.4553, "step": 13750 }, { "epoch": 1.2341432770186893, "grad_norm": 0.46039244885842634, "learning_rate": 7.326087262980188e-06, "loss": 0.36, "step": 13751 }, { "epoch": 1.2342330214713604, "grad_norm": 0.5186641018547381, "learning_rate": 7.325625041307776e-06, "loss": 0.446, "step": 13752 }, { "epoch": 1.2343227659240312, "grad_norm": 0.4850026971417911, "learning_rate": 7.325162794272564e-06, "loss": 0.4255, "step": 13753 }, { "epoch": 1.2344125103767023, "grad_norm": 0.4950743066187609, "learning_rate": 7.3247005218796e-06, "loss": 0.349, "step": 13754 }, { "epoch": 1.2345022548293734, "grad_norm": 0.46034395726176824, "learning_rate": 7.3242382241339175e-06, "loss": 0.4272, "step": 13755 }, { "epoch": 1.2345919992820444, "grad_norm": 0.4995209795614168, "learning_rate": 7.3237759010405625e-06, "loss": 0.3763, "step": 13756 }, { "epoch": 1.2346817437347153, "grad_norm": 0.4573875413307687, "learning_rate": 7.323313552604578e-06, "loss": 0.3875, "step": 13757 }, { "epoch": 1.2347714881873864, "grad_norm": 0.49775643398515174, "learning_rate": 7.322851178831003e-06, "loss": 0.3176, "step": 13758 }, { "epoch": 1.2348612326400574, "grad_norm": 0.449540285151216, "learning_rate": 7.322388779724883e-06, "loss": 0.3845, "step": 13759 }, { "epoch": 1.2349509770927285, "grad_norm": 0.4650812240596874, "learning_rate": 7.321926355291259e-06, "loss": 0.3347, "step": 13760 }, { "epoch": 1.2350407215453996, "grad_norm": 0.46475018071626384, "learning_rate": 7.321463905535174e-06, "loss": 0.4332, "step": 13761 }, { "epoch": 1.2351304659980704, "grad_norm": 0.5679476641957912, "learning_rate": 7.321001430461674e-06, "loss": 0.4539, "step": 13762 }, { "epoch": 1.2352202104507415, "grad_norm": 0.5494982797170566, "learning_rate": 7.3205389300758e-06, "loss": 0.4549, "step": 13763 }, { "epoch": 1.2353099549034126, "grad_norm": 0.5255698089029784, "learning_rate": 7.320076404382596e-06, "loss": 0.4425, "step": 13764 }, { "epoch": 1.2353996993560836, "grad_norm": 0.5098711812598615, "learning_rate": 7.319613853387107e-06, "loss": 0.3783, "step": 13765 }, { "epoch": 1.2354894438087545, "grad_norm": 0.47520316009972735, "learning_rate": 7.3191512770943784e-06, "loss": 0.4034, "step": 13766 }, { "epoch": 1.2355791882614255, "grad_norm": 0.4968729974296414, "learning_rate": 7.3186886755094535e-06, "loss": 0.3472, "step": 13767 }, { "epoch": 1.2356689327140966, "grad_norm": 0.4673169879045396, "learning_rate": 7.318226048637378e-06, "loss": 0.3914, "step": 13768 }, { "epoch": 1.2357586771667677, "grad_norm": 0.5150919097378392, "learning_rate": 7.317763396483197e-06, "loss": 0.3677, "step": 13769 }, { "epoch": 1.2358484216194388, "grad_norm": 0.5052430322571928, "learning_rate": 7.317300719051956e-06, "loss": 0.3789, "step": 13770 }, { "epoch": 1.2359381660721096, "grad_norm": 0.5502901317789822, "learning_rate": 7.316838016348702e-06, "loss": 0.5005, "step": 13771 }, { "epoch": 1.2360279105247807, "grad_norm": 0.5332209546723334, "learning_rate": 7.316375288378478e-06, "loss": 0.3515, "step": 13772 }, { "epoch": 1.2361176549774517, "grad_norm": 0.5567152438292914, "learning_rate": 7.315912535146333e-06, "loss": 0.5055, "step": 13773 }, { "epoch": 1.2362073994301228, "grad_norm": 0.47673422815390915, "learning_rate": 7.315449756657314e-06, "loss": 0.36, "step": 13774 }, { "epoch": 1.2362971438827937, "grad_norm": 0.46648373906184787, "learning_rate": 7.314986952916467e-06, "loss": 0.4068, "step": 13775 }, { "epoch": 1.2363868883354647, "grad_norm": 0.4832697662563129, "learning_rate": 7.314524123928839e-06, "loss": 0.3882, "step": 13776 }, { "epoch": 1.2364766327881358, "grad_norm": 0.4712256369453779, "learning_rate": 7.314061269699478e-06, "loss": 0.3902, "step": 13777 }, { "epoch": 1.2365663772408069, "grad_norm": 0.48611325307918746, "learning_rate": 7.313598390233432e-06, "loss": 0.4034, "step": 13778 }, { "epoch": 1.236656121693478, "grad_norm": 0.4821778167896337, "learning_rate": 7.313135485535749e-06, "loss": 0.4012, "step": 13779 }, { "epoch": 1.2367458661461488, "grad_norm": 0.5090341625082693, "learning_rate": 7.312672555611477e-06, "loss": 0.4242, "step": 13780 }, { "epoch": 1.2368356105988199, "grad_norm": 0.5464558869684272, "learning_rate": 7.312209600465665e-06, "loss": 0.3895, "step": 13781 }, { "epoch": 1.236925355051491, "grad_norm": 0.46832684970008404, "learning_rate": 7.311746620103362e-06, "loss": 0.345, "step": 13782 }, { "epoch": 1.2370150995041618, "grad_norm": 0.4836528638286411, "learning_rate": 7.3112836145296165e-06, "loss": 0.3819, "step": 13783 }, { "epoch": 1.2371048439568328, "grad_norm": 0.47614071847817235, "learning_rate": 7.310820583749477e-06, "loss": 0.395, "step": 13784 }, { "epoch": 1.237194588409504, "grad_norm": 0.4842771513222967, "learning_rate": 7.310357527767995e-06, "loss": 0.3854, "step": 13785 }, { "epoch": 1.237284332862175, "grad_norm": 0.5188740560679471, "learning_rate": 7.309894446590222e-06, "loss": 0.4342, "step": 13786 }, { "epoch": 1.237374077314846, "grad_norm": 0.5030688439447402, "learning_rate": 7.309431340221205e-06, "loss": 0.3678, "step": 13787 }, { "epoch": 1.237463821767517, "grad_norm": 0.4925958540090513, "learning_rate": 7.3089682086659954e-06, "loss": 0.4528, "step": 13788 }, { "epoch": 1.237553566220188, "grad_norm": 0.5273543114976299, "learning_rate": 7.308505051929644e-06, "loss": 0.3739, "step": 13789 }, { "epoch": 1.237643310672859, "grad_norm": 0.538459160017843, "learning_rate": 7.308041870017203e-06, "loss": 0.4689, "step": 13790 }, { "epoch": 1.2377330551255301, "grad_norm": 0.4773527426255977, "learning_rate": 7.307578662933723e-06, "loss": 0.3843, "step": 13791 }, { "epoch": 1.237822799578201, "grad_norm": 0.5203375837264712, "learning_rate": 7.307115430684256e-06, "loss": 0.3803, "step": 13792 }, { "epoch": 1.237912544030872, "grad_norm": 0.488012679956319, "learning_rate": 7.306652173273854e-06, "loss": 0.4106, "step": 13793 }, { "epoch": 1.238002288483543, "grad_norm": 0.47781801733245943, "learning_rate": 7.306188890707569e-06, "loss": 0.375, "step": 13794 }, { "epoch": 1.2380920329362142, "grad_norm": 0.5047879512048293, "learning_rate": 7.305725582990453e-06, "loss": 0.4183, "step": 13795 }, { "epoch": 1.2381817773888852, "grad_norm": 0.49497531132867084, "learning_rate": 7.305262250127558e-06, "loss": 0.3699, "step": 13796 }, { "epoch": 1.238271521841556, "grad_norm": 0.4623834398894756, "learning_rate": 7.30479889212394e-06, "loss": 0.3434, "step": 13797 }, { "epoch": 1.2383612662942272, "grad_norm": 0.4495981231381243, "learning_rate": 7.304335508984649e-06, "loss": 0.4302, "step": 13798 }, { "epoch": 1.2384510107468982, "grad_norm": 0.5172753200299836, "learning_rate": 7.3038721007147405e-06, "loss": 0.3645, "step": 13799 }, { "epoch": 1.2385407551995693, "grad_norm": 0.5512692408969077, "learning_rate": 7.3034086673192675e-06, "loss": 0.456, "step": 13800 }, { "epoch": 1.2386304996522401, "grad_norm": 0.5004887295755588, "learning_rate": 7.302945208803285e-06, "loss": 0.4013, "step": 13801 }, { "epoch": 1.2387202441049112, "grad_norm": 0.49239749831132534, "learning_rate": 7.302481725171846e-06, "loss": 0.4282, "step": 13802 }, { "epoch": 1.2388099885575823, "grad_norm": 0.48674328365149805, "learning_rate": 7.302018216430007e-06, "loss": 0.3783, "step": 13803 }, { "epoch": 1.2388997330102534, "grad_norm": 0.4805473260051162, "learning_rate": 7.30155468258282e-06, "loss": 0.3755, "step": 13804 }, { "epoch": 1.2389894774629244, "grad_norm": 0.5066099269836754, "learning_rate": 7.301091123635343e-06, "loss": 0.4338, "step": 13805 }, { "epoch": 1.2390792219155953, "grad_norm": 0.5253128232392945, "learning_rate": 7.300627539592631e-06, "loss": 0.417, "step": 13806 }, { "epoch": 1.2391689663682663, "grad_norm": 0.4843304033727888, "learning_rate": 7.300163930459739e-06, "loss": 0.4437, "step": 13807 }, { "epoch": 1.2392587108209374, "grad_norm": 0.5757272968211273, "learning_rate": 7.299700296241723e-06, "loss": 0.4114, "step": 13808 }, { "epoch": 1.2393484552736085, "grad_norm": 0.46818503007372775, "learning_rate": 7.299236636943641e-06, "loss": 0.3968, "step": 13809 }, { "epoch": 1.2394381997262793, "grad_norm": 0.5469994408987694, "learning_rate": 7.298772952570547e-06, "loss": 0.4444, "step": 13810 }, { "epoch": 1.2395279441789504, "grad_norm": 0.46774949957544437, "learning_rate": 7.2983092431274995e-06, "loss": 0.4015, "step": 13811 }, { "epoch": 1.2396176886316215, "grad_norm": 0.4881365250481447, "learning_rate": 7.297845508619554e-06, "loss": 0.4066, "step": 13812 }, { "epoch": 1.2397074330842925, "grad_norm": 0.5290090486270894, "learning_rate": 7.297381749051771e-06, "loss": 0.4297, "step": 13813 }, { "epoch": 1.2397971775369636, "grad_norm": 0.4755136657980257, "learning_rate": 7.296917964429204e-06, "loss": 0.4003, "step": 13814 }, { "epoch": 1.2398869219896345, "grad_norm": 0.5179413306537561, "learning_rate": 7.2964541547569144e-06, "loss": 0.4219, "step": 13815 }, { "epoch": 1.2399766664423055, "grad_norm": 0.4709868609417983, "learning_rate": 7.2959903200399585e-06, "loss": 0.3657, "step": 13816 }, { "epoch": 1.2400664108949766, "grad_norm": 0.4896354875230361, "learning_rate": 7.2955264602833974e-06, "loss": 0.3918, "step": 13817 }, { "epoch": 1.2401561553476474, "grad_norm": 0.48398238904997815, "learning_rate": 7.295062575492286e-06, "loss": 0.3778, "step": 13818 }, { "epoch": 1.2402458998003185, "grad_norm": 0.477685786345872, "learning_rate": 7.294598665671686e-06, "loss": 0.3806, "step": 13819 }, { "epoch": 1.2403356442529896, "grad_norm": 0.5772170129942954, "learning_rate": 7.2941347308266565e-06, "loss": 0.4647, "step": 13820 }, { "epoch": 1.2404253887056607, "grad_norm": 0.4739575325502386, "learning_rate": 7.293670770962255e-06, "loss": 0.3788, "step": 13821 }, { "epoch": 1.2405151331583317, "grad_norm": 0.5015057825411628, "learning_rate": 7.293206786083544e-06, "loss": 0.3871, "step": 13822 }, { "epoch": 1.2406048776110026, "grad_norm": 0.4865016552155561, "learning_rate": 7.292742776195582e-06, "loss": 0.4021, "step": 13823 }, { "epoch": 1.2406946220636736, "grad_norm": 0.48412816375405704, "learning_rate": 7.292278741303431e-06, "loss": 0.4497, "step": 13824 }, { "epoch": 1.2407843665163447, "grad_norm": 0.5560440297897546, "learning_rate": 7.2918146814121506e-06, "loss": 0.4568, "step": 13825 }, { "epoch": 1.2408741109690158, "grad_norm": 0.4975260956675361, "learning_rate": 7.2913505965268e-06, "loss": 0.3735, "step": 13826 }, { "epoch": 1.2409638554216866, "grad_norm": 0.49951680302039186, "learning_rate": 7.290886486652444e-06, "loss": 0.4391, "step": 13827 }, { "epoch": 1.2410535998743577, "grad_norm": 0.5260963224247217, "learning_rate": 7.290422351794142e-06, "loss": 0.3636, "step": 13828 }, { "epoch": 1.2411433443270288, "grad_norm": 0.4860843540797815, "learning_rate": 7.289958191956955e-06, "loss": 0.483, "step": 13829 }, { "epoch": 1.2412330887796998, "grad_norm": 0.48433351015909115, "learning_rate": 7.289494007145947e-06, "loss": 0.41, "step": 13830 }, { "epoch": 1.241322833232371, "grad_norm": 0.5053758744051281, "learning_rate": 7.289029797366179e-06, "loss": 0.4333, "step": 13831 }, { "epoch": 1.2414125776850418, "grad_norm": 0.517390340019054, "learning_rate": 7.288565562622713e-06, "loss": 0.3516, "step": 13832 }, { "epoch": 1.2415023221377128, "grad_norm": 0.4405835012064834, "learning_rate": 7.288101302920615e-06, "loss": 0.3995, "step": 13833 }, { "epoch": 1.241592066590384, "grad_norm": 0.5223900120167176, "learning_rate": 7.287637018264945e-06, "loss": 0.3903, "step": 13834 }, { "epoch": 1.241681811043055, "grad_norm": 0.4688278563551316, "learning_rate": 7.287172708660766e-06, "loss": 0.3964, "step": 13835 }, { "epoch": 1.2417715554957258, "grad_norm": 0.5157721363939257, "learning_rate": 7.286708374113145e-06, "loss": 0.3497, "step": 13836 }, { "epoch": 1.241861299948397, "grad_norm": 0.48724189948389224, "learning_rate": 7.286244014627142e-06, "loss": 0.4316, "step": 13837 }, { "epoch": 1.241951044401068, "grad_norm": 0.4978421986373993, "learning_rate": 7.285779630207824e-06, "loss": 0.3661, "step": 13838 }, { "epoch": 1.242040788853739, "grad_norm": 0.46714826741162196, "learning_rate": 7.285315220860254e-06, "loss": 0.3867, "step": 13839 }, { "epoch": 1.24213053330641, "grad_norm": 0.484079581542443, "learning_rate": 7.284850786589499e-06, "loss": 0.3673, "step": 13840 }, { "epoch": 1.242220277759081, "grad_norm": 0.47565308773153314, "learning_rate": 7.284386327400621e-06, "loss": 0.4497, "step": 13841 }, { "epoch": 1.242310022211752, "grad_norm": 0.5251337509331282, "learning_rate": 7.283921843298687e-06, "loss": 0.4098, "step": 13842 }, { "epoch": 1.242399766664423, "grad_norm": 0.4912885025495898, "learning_rate": 7.283457334288762e-06, "loss": 0.388, "step": 13843 }, { "epoch": 1.2424895111170942, "grad_norm": 0.4812236792322664, "learning_rate": 7.282992800375913e-06, "loss": 0.3719, "step": 13844 }, { "epoch": 1.242579255569765, "grad_norm": 0.4684084842760749, "learning_rate": 7.282528241565204e-06, "loss": 0.3665, "step": 13845 }, { "epoch": 1.242669000022436, "grad_norm": 0.4647542653737824, "learning_rate": 7.282063657861704e-06, "loss": 0.4339, "step": 13846 }, { "epoch": 1.2427587444751071, "grad_norm": 0.5259809208789907, "learning_rate": 7.281599049270477e-06, "loss": 0.4395, "step": 13847 }, { "epoch": 1.2428484889277782, "grad_norm": 0.5055113489099727, "learning_rate": 7.281134415796593e-06, "loss": 0.3629, "step": 13848 }, { "epoch": 1.2429382333804493, "grad_norm": 0.47778357227775003, "learning_rate": 7.280669757445115e-06, "loss": 0.3853, "step": 13849 }, { "epoch": 1.2430279778331201, "grad_norm": 0.5082055011345981, "learning_rate": 7.280205074221113e-06, "loss": 0.4157, "step": 13850 }, { "epoch": 1.2431177222857912, "grad_norm": 0.5170565910661641, "learning_rate": 7.279740366129656e-06, "loss": 0.3586, "step": 13851 }, { "epoch": 1.2432074667384623, "grad_norm": 0.4522917902352315, "learning_rate": 7.279275633175809e-06, "loss": 0.3984, "step": 13852 }, { "epoch": 1.2432972111911333, "grad_norm": 0.4775759274172499, "learning_rate": 7.278810875364644e-06, "loss": 0.3458, "step": 13853 }, { "epoch": 1.2433869556438042, "grad_norm": 0.4755050772982161, "learning_rate": 7.278346092701225e-06, "loss": 0.4401, "step": 13854 }, { "epoch": 1.2434767000964753, "grad_norm": 0.5122125134508417, "learning_rate": 7.277881285190626e-06, "loss": 0.4068, "step": 13855 }, { "epoch": 1.2435664445491463, "grad_norm": 0.5118494327322646, "learning_rate": 7.277416452837911e-06, "loss": 0.4211, "step": 13856 }, { "epoch": 1.2436561890018174, "grad_norm": 0.5491449802222148, "learning_rate": 7.276951595648154e-06, "loss": 0.4851, "step": 13857 }, { "epoch": 1.2437459334544883, "grad_norm": 0.526213109018566, "learning_rate": 7.276486713626421e-06, "loss": 0.4985, "step": 13858 }, { "epoch": 1.2438356779071593, "grad_norm": 0.539050954757998, "learning_rate": 7.276021806777783e-06, "loss": 0.4543, "step": 13859 }, { "epoch": 1.2439254223598304, "grad_norm": 0.5174066597423255, "learning_rate": 7.27555687510731e-06, "loss": 0.4379, "step": 13860 }, { "epoch": 1.2440151668125015, "grad_norm": 0.4813650304132054, "learning_rate": 7.275091918620074e-06, "loss": 0.3914, "step": 13861 }, { "epoch": 1.2441049112651723, "grad_norm": 0.48813888673679545, "learning_rate": 7.2746269373211445e-06, "loss": 0.3103, "step": 13862 }, { "epoch": 1.2441946557178434, "grad_norm": 0.48365726696939676, "learning_rate": 7.274161931215593e-06, "loss": 0.393, "step": 13863 }, { "epoch": 1.2442844001705144, "grad_norm": 0.4730079541373014, "learning_rate": 7.273696900308489e-06, "loss": 0.3831, "step": 13864 }, { "epoch": 1.2443741446231855, "grad_norm": 0.4801765535852582, "learning_rate": 7.273231844604907e-06, "loss": 0.3994, "step": 13865 }, { "epoch": 1.2444638890758566, "grad_norm": 0.5153873115020928, "learning_rate": 7.272766764109916e-06, "loss": 0.4113, "step": 13866 }, { "epoch": 1.2445536335285274, "grad_norm": 0.47846308806625154, "learning_rate": 7.27230165882859e-06, "loss": 0.402, "step": 13867 }, { "epoch": 1.2446433779811985, "grad_norm": 0.4913157916249709, "learning_rate": 7.271836528766001e-06, "loss": 0.4105, "step": 13868 }, { "epoch": 1.2447331224338696, "grad_norm": 0.5042613668545075, "learning_rate": 7.2713713739272205e-06, "loss": 0.4128, "step": 13869 }, { "epoch": 1.2448228668865406, "grad_norm": 0.5207090251754086, "learning_rate": 7.270906194317322e-06, "loss": 0.4417, "step": 13870 }, { "epoch": 1.2449126113392115, "grad_norm": 0.48779526291311265, "learning_rate": 7.27044098994138e-06, "loss": 0.4536, "step": 13871 }, { "epoch": 1.2450023557918826, "grad_norm": 0.546720398020875, "learning_rate": 7.269975760804465e-06, "loss": 0.368, "step": 13872 }, { "epoch": 1.2450921002445536, "grad_norm": 0.46468766979764226, "learning_rate": 7.269510506911653e-06, "loss": 0.4464, "step": 13873 }, { "epoch": 1.2451818446972247, "grad_norm": 0.5321882578711605, "learning_rate": 7.269045228268018e-06, "loss": 0.4105, "step": 13874 }, { "epoch": 1.2452715891498958, "grad_norm": 0.46791580660377324, "learning_rate": 7.268579924878634e-06, "loss": 0.3345, "step": 13875 }, { "epoch": 1.2453613336025666, "grad_norm": 0.4903136678208058, "learning_rate": 7.268114596748573e-06, "loss": 0.3845, "step": 13876 }, { "epoch": 1.2454510780552377, "grad_norm": 0.47871738570710504, "learning_rate": 7.267649243882915e-06, "loss": 0.4288, "step": 13877 }, { "epoch": 1.2455408225079088, "grad_norm": 0.5321718719803677, "learning_rate": 7.2671838662867285e-06, "loss": 0.456, "step": 13878 }, { "epoch": 1.2456305669605798, "grad_norm": 0.5071847221851242, "learning_rate": 7.266718463965095e-06, "loss": 0.4403, "step": 13879 }, { "epoch": 1.2457203114132507, "grad_norm": 0.5159273174954442, "learning_rate": 7.266253036923085e-06, "loss": 0.3726, "step": 13880 }, { "epoch": 1.2458100558659218, "grad_norm": 0.4869940206412628, "learning_rate": 7.265787585165779e-06, "loss": 0.426, "step": 13881 }, { "epoch": 1.2458998003185928, "grad_norm": 0.5201856966056154, "learning_rate": 7.26532210869825e-06, "loss": 0.3972, "step": 13882 }, { "epoch": 1.245989544771264, "grad_norm": 0.49916895864712424, "learning_rate": 7.264856607525574e-06, "loss": 0.3774, "step": 13883 }, { "epoch": 1.246079289223935, "grad_norm": 0.5233118608512621, "learning_rate": 7.264391081652831e-06, "loss": 0.3971, "step": 13884 }, { "epoch": 1.2461690336766058, "grad_norm": 0.48398710045032856, "learning_rate": 7.263925531085094e-06, "loss": 0.3968, "step": 13885 }, { "epoch": 1.2462587781292769, "grad_norm": 0.49093876830816763, "learning_rate": 7.2634599558274425e-06, "loss": 0.4175, "step": 13886 }, { "epoch": 1.246348522581948, "grad_norm": 0.498987213271071, "learning_rate": 7.2629943558849535e-06, "loss": 0.3932, "step": 13887 }, { "epoch": 1.246438267034619, "grad_norm": 0.46320920365485674, "learning_rate": 7.262528731262704e-06, "loss": 0.4038, "step": 13888 }, { "epoch": 1.2465280114872899, "grad_norm": 0.5298673644012614, "learning_rate": 7.262063081965776e-06, "loss": 0.445, "step": 13889 }, { "epoch": 1.246617755939961, "grad_norm": 0.5228846557493674, "learning_rate": 7.26159740799924e-06, "loss": 0.3905, "step": 13890 }, { "epoch": 1.246707500392632, "grad_norm": 0.44208210714603413, "learning_rate": 7.261131709368182e-06, "loss": 0.3587, "step": 13891 }, { "epoch": 1.246797244845303, "grad_norm": 0.5125006656766904, "learning_rate": 7.260665986077676e-06, "loss": 0.3914, "step": 13892 }, { "epoch": 1.246886989297974, "grad_norm": 0.4792805105825487, "learning_rate": 7.260200238132805e-06, "loss": 0.3979, "step": 13893 }, { "epoch": 1.246976733750645, "grad_norm": 0.47567068035106663, "learning_rate": 7.2597344655386445e-06, "loss": 0.4126, "step": 13894 }, { "epoch": 1.247066478203316, "grad_norm": 0.484701898737469, "learning_rate": 7.259268668300278e-06, "loss": 0.3719, "step": 13895 }, { "epoch": 1.2471562226559871, "grad_norm": 0.5058901380255473, "learning_rate": 7.258802846422782e-06, "loss": 0.4834, "step": 13896 }, { "epoch": 1.247245967108658, "grad_norm": 0.5311699760205112, "learning_rate": 7.258336999911238e-06, "loss": 0.3996, "step": 13897 }, { "epoch": 1.247335711561329, "grad_norm": 0.45299423039705955, "learning_rate": 7.257871128770726e-06, "loss": 0.3433, "step": 13898 }, { "epoch": 1.2474254560140001, "grad_norm": 0.46746273027683144, "learning_rate": 7.257405233006327e-06, "loss": 0.3706, "step": 13899 }, { "epoch": 1.2475152004666712, "grad_norm": 0.5482254464205252, "learning_rate": 7.256939312623123e-06, "loss": 0.427, "step": 13900 }, { "epoch": 1.2476049449193423, "grad_norm": 0.47361596632957415, "learning_rate": 7.256473367626193e-06, "loss": 0.3645, "step": 13901 }, { "epoch": 1.247694689372013, "grad_norm": 0.5009036556284818, "learning_rate": 7.2560073980206215e-06, "loss": 0.4282, "step": 13902 }, { "epoch": 1.2477844338246842, "grad_norm": 0.48399643905347006, "learning_rate": 7.255541403811487e-06, "loss": 0.3696, "step": 13903 }, { "epoch": 1.2478741782773553, "grad_norm": 0.47800750530917674, "learning_rate": 7.255075385003874e-06, "loss": 0.3941, "step": 13904 }, { "epoch": 1.2479639227300263, "grad_norm": 0.5228676654709062, "learning_rate": 7.254609341602863e-06, "loss": 0.3904, "step": 13905 }, { "epoch": 1.2480536671826972, "grad_norm": 0.47160973580111215, "learning_rate": 7.2541432736135385e-06, "loss": 0.3848, "step": 13906 }, { "epoch": 1.2481434116353682, "grad_norm": 0.5255363908398808, "learning_rate": 7.253677181040983e-06, "loss": 0.3356, "step": 13907 }, { "epoch": 1.2482331560880393, "grad_norm": 0.46023706904038175, "learning_rate": 7.253211063890278e-06, "loss": 0.3917, "step": 13908 }, { "epoch": 1.2483229005407104, "grad_norm": 0.4968428762046925, "learning_rate": 7.252744922166506e-06, "loss": 0.4115, "step": 13909 }, { "epoch": 1.2484126449933814, "grad_norm": 0.5226737265899463, "learning_rate": 7.252278755874756e-06, "loss": 0.4505, "step": 13910 }, { "epoch": 1.2485023894460523, "grad_norm": 0.5372205443542122, "learning_rate": 7.2518125650201055e-06, "loss": 0.3865, "step": 13911 }, { "epoch": 1.2485921338987234, "grad_norm": 0.48538928696033556, "learning_rate": 7.251346349607642e-06, "loss": 0.3965, "step": 13912 }, { "epoch": 1.2486818783513944, "grad_norm": 0.48306892195933115, "learning_rate": 7.250880109642451e-06, "loss": 0.378, "step": 13913 }, { "epoch": 1.2487716228040655, "grad_norm": 0.49578234361618234, "learning_rate": 7.250413845129613e-06, "loss": 0.397, "step": 13914 }, { "epoch": 1.2488613672567364, "grad_norm": 0.46728150987059236, "learning_rate": 7.249947556074217e-06, "loss": 0.3169, "step": 13915 }, { "epoch": 1.2489511117094074, "grad_norm": 0.47751633463287085, "learning_rate": 7.249481242481349e-06, "loss": 0.4078, "step": 13916 }, { "epoch": 1.2490408561620785, "grad_norm": 0.5036316170853582, "learning_rate": 7.249014904356091e-06, "loss": 0.3913, "step": 13917 }, { "epoch": 1.2491306006147496, "grad_norm": 0.4705422424165953, "learning_rate": 7.24854854170353e-06, "loss": 0.3476, "step": 13918 }, { "epoch": 1.2492203450674206, "grad_norm": 0.4820124019586647, "learning_rate": 7.248082154528753e-06, "loss": 0.4624, "step": 13919 }, { "epoch": 1.2493100895200915, "grad_norm": 0.5143347126501135, "learning_rate": 7.247615742836845e-06, "loss": 0.37, "step": 13920 }, { "epoch": 1.2493998339727626, "grad_norm": 0.47806106678147414, "learning_rate": 7.247149306632893e-06, "loss": 0.3293, "step": 13921 }, { "epoch": 1.2494895784254336, "grad_norm": 0.5086984293790086, "learning_rate": 7.246682845921985e-06, "loss": 0.4544, "step": 13922 }, { "epoch": 1.2495793228781047, "grad_norm": 0.4899753754021886, "learning_rate": 7.246216360709207e-06, "loss": 0.3707, "step": 13923 }, { "epoch": 1.2496690673307755, "grad_norm": 0.503716289106988, "learning_rate": 7.245749850999647e-06, "loss": 0.4353, "step": 13924 }, { "epoch": 1.2497588117834466, "grad_norm": 0.5049935868733134, "learning_rate": 7.245283316798391e-06, "loss": 0.422, "step": 13925 }, { "epoch": 1.2498485562361177, "grad_norm": 0.5010227468617564, "learning_rate": 7.24481675811053e-06, "loss": 0.4478, "step": 13926 }, { "epoch": 1.2499383006887888, "grad_norm": 0.49826330183732337, "learning_rate": 7.24435017494115e-06, "loss": 0.4238, "step": 13927 }, { "epoch": 1.2500280451414598, "grad_norm": 0.495724107777256, "learning_rate": 7.243883567295339e-06, "loss": 0.4115, "step": 13928 }, { "epoch": 1.2501177895941307, "grad_norm": 0.4755008636340836, "learning_rate": 7.243416935178187e-06, "loss": 0.3782, "step": 13929 }, { "epoch": 1.2502075340468017, "grad_norm": 0.45203916745859973, "learning_rate": 7.2429502785947825e-06, "loss": 0.2857, "step": 13930 }, { "epoch": 1.2502972784994728, "grad_norm": 0.4445869190619417, "learning_rate": 7.242483597550215e-06, "loss": 0.3994, "step": 13931 }, { "epoch": 1.2503870229521437, "grad_norm": 0.5736967612511594, "learning_rate": 7.2420168920495745e-06, "loss": 0.5008, "step": 13932 }, { "epoch": 1.2504767674048147, "grad_norm": 0.586547188612053, "learning_rate": 7.241550162097949e-06, "loss": 0.4299, "step": 13933 }, { "epoch": 1.2505665118574858, "grad_norm": 0.48320416486520695, "learning_rate": 7.241083407700431e-06, "loss": 0.4247, "step": 13934 }, { "epoch": 1.2506562563101569, "grad_norm": 0.49055843427290424, "learning_rate": 7.240616628862108e-06, "loss": 0.3486, "step": 13935 }, { "epoch": 1.250746000762828, "grad_norm": 0.490185632424669, "learning_rate": 7.240149825588074e-06, "loss": 0.399, "step": 13936 }, { "epoch": 1.2508357452154988, "grad_norm": 0.4791212693837609, "learning_rate": 7.239682997883417e-06, "loss": 0.339, "step": 13937 }, { "epoch": 1.2509254896681699, "grad_norm": 0.5166764287619175, "learning_rate": 7.23921614575323e-06, "loss": 0.4165, "step": 13938 }, { "epoch": 1.251015234120841, "grad_norm": 0.4720525237617544, "learning_rate": 7.238749269202603e-06, "loss": 0.4239, "step": 13939 }, { "epoch": 1.251104978573512, "grad_norm": 0.5120479429039512, "learning_rate": 7.238282368236627e-06, "loss": 0.3709, "step": 13940 }, { "epoch": 1.2511947230261828, "grad_norm": 0.5275443102602153, "learning_rate": 7.237815442860398e-06, "loss": 0.3772, "step": 13941 }, { "epoch": 1.251284467478854, "grad_norm": 0.44910530446905783, "learning_rate": 7.2373484930790025e-06, "loss": 0.3485, "step": 13942 }, { "epoch": 1.251374211931525, "grad_norm": 0.48395221726002224, "learning_rate": 7.236881518897536e-06, "loss": 0.3685, "step": 13943 }, { "epoch": 1.251463956384196, "grad_norm": 0.47266215791748384, "learning_rate": 7.236414520321093e-06, "loss": 0.3858, "step": 13944 }, { "epoch": 1.2515537008368671, "grad_norm": 0.5092361495798502, "learning_rate": 7.235947497354763e-06, "loss": 0.3915, "step": 13945 }, { "epoch": 1.251643445289538, "grad_norm": 0.5180947437635192, "learning_rate": 7.235480450003641e-06, "loss": 0.3684, "step": 13946 }, { "epoch": 1.251733189742209, "grad_norm": 0.4595766843756869, "learning_rate": 7.235013378272822e-06, "loss": 0.4112, "step": 13947 }, { "epoch": 1.25182293419488, "grad_norm": 0.4874559982171317, "learning_rate": 7.2345462821673965e-06, "loss": 0.4131, "step": 13948 }, { "epoch": 1.2519126786475512, "grad_norm": 0.5444726896009104, "learning_rate": 7.23407916169246e-06, "loss": 0.4273, "step": 13949 }, { "epoch": 1.252002423100222, "grad_norm": 0.48839093877942025, "learning_rate": 7.233612016853106e-06, "loss": 0.4518, "step": 13950 }, { "epoch": 1.252092167552893, "grad_norm": 0.49989673424775727, "learning_rate": 7.233144847654433e-06, "loss": 0.3662, "step": 13951 }, { "epoch": 1.2521819120055642, "grad_norm": 0.4764020600561733, "learning_rate": 7.232677654101531e-06, "loss": 0.3652, "step": 13952 }, { "epoch": 1.2522716564582352, "grad_norm": 0.5403566771749136, "learning_rate": 7.232210436199498e-06, "loss": 0.4885, "step": 13953 }, { "epoch": 1.2523614009109063, "grad_norm": 0.5295137068904475, "learning_rate": 7.231743193953428e-06, "loss": 0.392, "step": 13954 }, { "epoch": 1.2524511453635772, "grad_norm": 0.46748272422877124, "learning_rate": 7.231275927368417e-06, "loss": 0.4226, "step": 13955 }, { "epoch": 1.2525408898162482, "grad_norm": 0.5402788390937832, "learning_rate": 7.23080863644956e-06, "loss": 0.4317, "step": 13956 }, { "epoch": 1.2526306342689193, "grad_norm": 0.5537559816687546, "learning_rate": 7.230341321201956e-06, "loss": 0.4078, "step": 13957 }, { "epoch": 1.2527203787215901, "grad_norm": 0.48605520637265526, "learning_rate": 7.229873981630699e-06, "loss": 0.4296, "step": 13958 }, { "epoch": 1.2528101231742612, "grad_norm": 0.5023176535392275, "learning_rate": 7.2294066177408865e-06, "loss": 0.4499, "step": 13959 }, { "epoch": 1.2528998676269323, "grad_norm": 0.5150350512478014, "learning_rate": 7.228939229537614e-06, "loss": 0.4533, "step": 13960 }, { "epoch": 1.2529896120796034, "grad_norm": 0.5557137493718741, "learning_rate": 7.228471817025982e-06, "loss": 0.3906, "step": 13961 }, { "epoch": 1.2530793565322744, "grad_norm": 0.49917038201342195, "learning_rate": 7.2280043802110835e-06, "loss": 0.3743, "step": 13962 }, { "epoch": 1.2531691009849455, "grad_norm": 0.4769753860651334, "learning_rate": 7.227536919098021e-06, "loss": 0.4077, "step": 13963 }, { "epoch": 1.2532588454376163, "grad_norm": 0.45502588565768903, "learning_rate": 7.227069433691889e-06, "loss": 0.3602, "step": 13964 }, { "epoch": 1.2533485898902874, "grad_norm": 0.521138815115732, "learning_rate": 7.226601923997788e-06, "loss": 0.4218, "step": 13965 }, { "epoch": 1.2534383343429585, "grad_norm": 0.5148010952011701, "learning_rate": 7.226134390020816e-06, "loss": 0.4508, "step": 13966 }, { "epoch": 1.2535280787956293, "grad_norm": 0.5360021178947442, "learning_rate": 7.225666831766072e-06, "loss": 0.492, "step": 13967 }, { "epoch": 1.2536178232483004, "grad_norm": 0.5147105723384232, "learning_rate": 7.225199249238652e-06, "loss": 0.4021, "step": 13968 }, { "epoch": 1.2537075677009715, "grad_norm": 0.479432946299956, "learning_rate": 7.2247316424436605e-06, "loss": 0.4023, "step": 13969 }, { "epoch": 1.2537973121536425, "grad_norm": 0.5100361010099962, "learning_rate": 7.224264011386192e-06, "loss": 0.3822, "step": 13970 }, { "epoch": 1.2538870566063136, "grad_norm": 0.46929520867613245, "learning_rate": 7.223796356071353e-06, "loss": 0.3731, "step": 13971 }, { "epoch": 1.2539768010589847, "grad_norm": 0.5233117596900648, "learning_rate": 7.223328676504237e-06, "loss": 0.337, "step": 13972 }, { "epoch": 1.2540665455116555, "grad_norm": 0.5250087599963911, "learning_rate": 7.222860972689949e-06, "loss": 0.3872, "step": 13973 }, { "epoch": 1.2541562899643266, "grad_norm": 0.43535426139392175, "learning_rate": 7.222393244633585e-06, "loss": 0.3494, "step": 13974 }, { "epoch": 1.2542460344169977, "grad_norm": 0.5116047680935867, "learning_rate": 7.2219254923402514e-06, "loss": 0.4564, "step": 13975 }, { "epoch": 1.2543357788696685, "grad_norm": 0.5309379451747639, "learning_rate": 7.221457715815044e-06, "loss": 0.3952, "step": 13976 }, { "epoch": 1.2544255233223396, "grad_norm": 0.5188653214700815, "learning_rate": 7.220989915063069e-06, "loss": 0.4615, "step": 13977 }, { "epoch": 1.2545152677750107, "grad_norm": 0.4960943054050102, "learning_rate": 7.2205220900894255e-06, "loss": 0.3796, "step": 13978 }, { "epoch": 1.2546050122276817, "grad_norm": 0.47878079405204343, "learning_rate": 7.220054240899217e-06, "loss": 0.4485, "step": 13979 }, { "epoch": 1.2546947566803528, "grad_norm": 0.48321888353603776, "learning_rate": 7.219586367497544e-06, "loss": 0.4117, "step": 13980 }, { "epoch": 1.2547845011330236, "grad_norm": 0.5104930743677061, "learning_rate": 7.21911846988951e-06, "loss": 0.4118, "step": 13981 }, { "epoch": 1.2548742455856947, "grad_norm": 0.5249223562629944, "learning_rate": 7.21865054808022e-06, "loss": 0.3809, "step": 13982 }, { "epoch": 1.2549639900383658, "grad_norm": 0.4910055552484773, "learning_rate": 7.218182602074772e-06, "loss": 0.3719, "step": 13983 }, { "epoch": 1.2550537344910369, "grad_norm": 0.45221394376263363, "learning_rate": 7.217714631878273e-06, "loss": 0.3928, "step": 13984 }, { "epoch": 1.2551434789437077, "grad_norm": 0.5469606258127733, "learning_rate": 7.217246637495826e-06, "loss": 0.5074, "step": 13985 }, { "epoch": 1.2552332233963788, "grad_norm": 0.5007725397841246, "learning_rate": 7.216778618932535e-06, "loss": 0.3876, "step": 13986 }, { "epoch": 1.2553229678490498, "grad_norm": 0.4912879108885824, "learning_rate": 7.216310576193502e-06, "loss": 0.3876, "step": 13987 }, { "epoch": 1.255412712301721, "grad_norm": 0.4623078297810563, "learning_rate": 7.215842509283834e-06, "loss": 0.4001, "step": 13988 }, { "epoch": 1.255502456754392, "grad_norm": 0.5287807010839617, "learning_rate": 7.215374418208636e-06, "loss": 0.4138, "step": 13989 }, { "epoch": 1.2555922012070628, "grad_norm": 0.4915666735473072, "learning_rate": 7.214906302973011e-06, "loss": 0.3807, "step": 13990 }, { "epoch": 1.255681945659734, "grad_norm": 0.4775315177683935, "learning_rate": 7.214438163582064e-06, "loss": 0.4052, "step": 13991 }, { "epoch": 1.255771690112405, "grad_norm": 0.4915837302247053, "learning_rate": 7.213970000040902e-06, "loss": 0.4183, "step": 13992 }, { "epoch": 1.2558614345650758, "grad_norm": 0.5214806653937575, "learning_rate": 7.213501812354629e-06, "loss": 0.4068, "step": 13993 }, { "epoch": 1.2559511790177469, "grad_norm": 0.46322078398413774, "learning_rate": 7.213033600528353e-06, "loss": 0.3475, "step": 13994 }, { "epoch": 1.256040923470418, "grad_norm": 0.49109244773902533, "learning_rate": 7.212565364567179e-06, "loss": 0.4384, "step": 13995 }, { "epoch": 1.256130667923089, "grad_norm": 0.5222695809687782, "learning_rate": 7.212097104476213e-06, "loss": 0.4453, "step": 13996 }, { "epoch": 1.25622041237576, "grad_norm": 0.4939833396258455, "learning_rate": 7.211628820260562e-06, "loss": 0.3801, "step": 13997 }, { "epoch": 1.2563101568284312, "grad_norm": 0.4769259065741932, "learning_rate": 7.2111605119253345e-06, "loss": 0.3892, "step": 13998 }, { "epoch": 1.256399901281102, "grad_norm": 0.4883391287243375, "learning_rate": 7.210692179475636e-06, "loss": 0.4169, "step": 13999 }, { "epoch": 1.256489645733773, "grad_norm": 0.4966576947838958, "learning_rate": 7.2102238229165754e-06, "loss": 0.3926, "step": 14000 }, { "epoch": 1.2565793901864442, "grad_norm": 0.5069345288741509, "learning_rate": 7.2097554422532585e-06, "loss": 0.4223, "step": 14001 }, { "epoch": 1.256669134639115, "grad_norm": 0.5421722962166389, "learning_rate": 7.209287037490796e-06, "loss": 0.3858, "step": 14002 }, { "epoch": 1.256758879091786, "grad_norm": 0.5740661339670962, "learning_rate": 7.208818608634294e-06, "loss": 0.4183, "step": 14003 }, { "epoch": 1.2568486235444571, "grad_norm": 0.4819931046822103, "learning_rate": 7.208350155688862e-06, "loss": 0.4106, "step": 14004 }, { "epoch": 1.2569383679971282, "grad_norm": 0.4651758769429253, "learning_rate": 7.20788167865961e-06, "loss": 0.316, "step": 14005 }, { "epoch": 1.2570281124497993, "grad_norm": 0.5143244747685983, "learning_rate": 7.207413177551645e-06, "loss": 0.4422, "step": 14006 }, { "epoch": 1.2571178569024704, "grad_norm": 0.5149818608541498, "learning_rate": 7.206944652370077e-06, "loss": 0.3689, "step": 14007 }, { "epoch": 1.2572076013551412, "grad_norm": 0.4800577244520787, "learning_rate": 7.206476103120016e-06, "loss": 0.4694, "step": 14008 }, { "epoch": 1.2572973458078123, "grad_norm": 0.536026723395962, "learning_rate": 7.2060075298065725e-06, "loss": 0.4517, "step": 14009 }, { "epoch": 1.2573870902604833, "grad_norm": 0.5319544423799449, "learning_rate": 7.205538932434855e-06, "loss": 0.4348, "step": 14010 }, { "epoch": 1.2574768347131542, "grad_norm": 0.5458792910382284, "learning_rate": 7.205070311009975e-06, "loss": 0.3836, "step": 14011 }, { "epoch": 1.2575665791658253, "grad_norm": 0.48175802423072583, "learning_rate": 7.204601665537044e-06, "loss": 0.4207, "step": 14012 }, { "epoch": 1.2576563236184963, "grad_norm": 0.4433901611104129, "learning_rate": 7.204132996021172e-06, "loss": 0.3535, "step": 14013 }, { "epoch": 1.2577460680711674, "grad_norm": 0.472819777014851, "learning_rate": 7.203664302467471e-06, "loss": 0.4069, "step": 14014 }, { "epoch": 1.2578358125238385, "grad_norm": 0.5333693147730469, "learning_rate": 7.203195584881051e-06, "loss": 0.4546, "step": 14015 }, { "epoch": 1.2579255569765093, "grad_norm": 0.5444523938380128, "learning_rate": 7.202726843267025e-06, "loss": 0.514, "step": 14016 }, { "epoch": 1.2580153014291804, "grad_norm": 0.5547277505953823, "learning_rate": 7.202258077630503e-06, "loss": 0.3313, "step": 14017 }, { "epoch": 1.2581050458818515, "grad_norm": 0.49014507448437894, "learning_rate": 7.2017892879766e-06, "loss": 0.4296, "step": 14018 }, { "epoch": 1.2581947903345225, "grad_norm": 0.5169463609954873, "learning_rate": 7.201320474310427e-06, "loss": 0.4047, "step": 14019 }, { "epoch": 1.2582845347871934, "grad_norm": 0.49610465283575067, "learning_rate": 7.200851636637099e-06, "loss": 0.42, "step": 14020 }, { "epoch": 1.2583742792398644, "grad_norm": 0.4826879484735813, "learning_rate": 7.200382774961725e-06, "loss": 0.483, "step": 14021 }, { "epoch": 1.2584640236925355, "grad_norm": 0.48611175469913676, "learning_rate": 7.19991388928942e-06, "loss": 0.4122, "step": 14022 }, { "epoch": 1.2585537681452066, "grad_norm": 0.5150946832470614, "learning_rate": 7.1994449796253e-06, "loss": 0.353, "step": 14023 }, { "epoch": 1.2586435125978777, "grad_norm": 0.5168466691879446, "learning_rate": 7.198976045974476e-06, "loss": 0.4261, "step": 14024 }, { "epoch": 1.2587332570505485, "grad_norm": 0.4893636661663706, "learning_rate": 7.198507088342064e-06, "loss": 0.3767, "step": 14025 }, { "epoch": 1.2588230015032196, "grad_norm": 0.4837418864914704, "learning_rate": 7.198038106733176e-06, "loss": 0.3749, "step": 14026 }, { "epoch": 1.2589127459558906, "grad_norm": 0.5361216983828709, "learning_rate": 7.197569101152928e-06, "loss": 0.4054, "step": 14027 }, { "epoch": 1.2590024904085615, "grad_norm": 0.48234910197214204, "learning_rate": 7.197100071606436e-06, "loss": 0.3872, "step": 14028 }, { "epoch": 1.2590922348612326, "grad_norm": 0.5426733016670408, "learning_rate": 7.196631018098812e-06, "loss": 0.425, "step": 14029 }, { "epoch": 1.2591819793139036, "grad_norm": 0.44408693970687174, "learning_rate": 7.196161940635175e-06, "loss": 0.3883, "step": 14030 }, { "epoch": 1.2592717237665747, "grad_norm": 0.4916437277915052, "learning_rate": 7.195692839220638e-06, "loss": 0.4017, "step": 14031 }, { "epoch": 1.2593614682192458, "grad_norm": 0.4906202023451754, "learning_rate": 7.195223713860319e-06, "loss": 0.3762, "step": 14032 }, { "epoch": 1.2594512126719168, "grad_norm": 0.45479076552025144, "learning_rate": 7.194754564559332e-06, "loss": 0.391, "step": 14033 }, { "epoch": 1.2595409571245877, "grad_norm": 0.516263388213751, "learning_rate": 7.194285391322795e-06, "loss": 0.3796, "step": 14034 }, { "epoch": 1.2596307015772588, "grad_norm": 0.5168957020095498, "learning_rate": 7.193816194155825e-06, "loss": 0.3714, "step": 14035 }, { "epoch": 1.2597204460299298, "grad_norm": 0.49928409138870006, "learning_rate": 7.193346973063537e-06, "loss": 0.3591, "step": 14036 }, { "epoch": 1.2598101904826007, "grad_norm": 0.5122574545652115, "learning_rate": 7.1928777280510506e-06, "loss": 0.4094, "step": 14037 }, { "epoch": 1.2598999349352717, "grad_norm": 0.49922390488000956, "learning_rate": 7.192408459123482e-06, "loss": 0.3826, "step": 14038 }, { "epoch": 1.2599896793879428, "grad_norm": 0.441447761692468, "learning_rate": 7.191939166285949e-06, "loss": 0.3837, "step": 14039 }, { "epoch": 1.2600794238406139, "grad_norm": 0.4499527960265347, "learning_rate": 7.1914698495435706e-06, "loss": 0.4047, "step": 14040 }, { "epoch": 1.260169168293285, "grad_norm": 0.4995789239371604, "learning_rate": 7.1910005089014624e-06, "loss": 0.4153, "step": 14041 }, { "epoch": 1.260258912745956, "grad_norm": 0.47878062866964477, "learning_rate": 7.1905311443647454e-06, "loss": 0.3694, "step": 14042 }, { "epoch": 1.2603486571986269, "grad_norm": 0.5148683328643688, "learning_rate": 7.190061755938539e-06, "loss": 0.4023, "step": 14043 }, { "epoch": 1.260438401651298, "grad_norm": 0.5431790296373998, "learning_rate": 7.18959234362796e-06, "loss": 0.4474, "step": 14044 }, { "epoch": 1.260528146103969, "grad_norm": 0.5166959364027762, "learning_rate": 7.18912290743813e-06, "loss": 0.4011, "step": 14045 }, { "epoch": 1.2606178905566399, "grad_norm": 0.5154906842129843, "learning_rate": 7.1886534473741655e-06, "loss": 0.3716, "step": 14046 }, { "epoch": 1.260707635009311, "grad_norm": 0.4880594307395569, "learning_rate": 7.1881839634411886e-06, "loss": 0.4073, "step": 14047 }, { "epoch": 1.260797379461982, "grad_norm": 0.4829426777586136, "learning_rate": 7.187714455644319e-06, "loss": 0.3577, "step": 14048 }, { "epoch": 1.260887123914653, "grad_norm": 0.5449675445105397, "learning_rate": 7.187244923988679e-06, "loss": 0.5386, "step": 14049 }, { "epoch": 1.2609768683673241, "grad_norm": 0.5806389238288653, "learning_rate": 7.186775368479384e-06, "loss": 0.3972, "step": 14050 }, { "epoch": 1.261066612819995, "grad_norm": 0.5342053411135501, "learning_rate": 7.18630578912156e-06, "loss": 0.5079, "step": 14051 }, { "epoch": 1.261156357272666, "grad_norm": 0.5192525732830445, "learning_rate": 7.185836185920326e-06, "loss": 0.4444, "step": 14052 }, { "epoch": 1.2612461017253371, "grad_norm": 0.5077213133588704, "learning_rate": 7.185366558880804e-06, "loss": 0.3834, "step": 14053 }, { "epoch": 1.2613358461780082, "grad_norm": 0.4994203288245712, "learning_rate": 7.184896908008114e-06, "loss": 0.3661, "step": 14054 }, { "epoch": 1.261425590630679, "grad_norm": 0.510646652258275, "learning_rate": 7.184427233307381e-06, "loss": 0.4359, "step": 14055 }, { "epoch": 1.2615153350833501, "grad_norm": 0.5262696077502784, "learning_rate": 7.1839575347837245e-06, "loss": 0.3937, "step": 14056 }, { "epoch": 1.2616050795360212, "grad_norm": 0.4788963823724961, "learning_rate": 7.183487812442268e-06, "loss": 0.4544, "step": 14057 }, { "epoch": 1.2616948239886923, "grad_norm": 0.5041924164250127, "learning_rate": 7.183018066288133e-06, "loss": 0.3409, "step": 14058 }, { "epoch": 1.2617845684413633, "grad_norm": 0.46238831294080934, "learning_rate": 7.182548296326445e-06, "loss": 0.386, "step": 14059 }, { "epoch": 1.2618743128940342, "grad_norm": 0.5472783588804165, "learning_rate": 7.182078502562324e-06, "loss": 0.4447, "step": 14060 }, { "epoch": 1.2619640573467052, "grad_norm": 0.47981851267380315, "learning_rate": 7.181608685000898e-06, "loss": 0.3834, "step": 14061 }, { "epoch": 1.2620538017993763, "grad_norm": 0.5065980937640904, "learning_rate": 7.181138843647285e-06, "loss": 0.416, "step": 14062 }, { "epoch": 1.2621435462520472, "grad_norm": 0.47160475170051097, "learning_rate": 7.180668978506613e-06, "loss": 0.3526, "step": 14063 }, { "epoch": 1.2622332907047182, "grad_norm": 0.4855565788176001, "learning_rate": 7.180199089584005e-06, "loss": 0.3835, "step": 14064 }, { "epoch": 1.2623230351573893, "grad_norm": 0.5156237603140994, "learning_rate": 7.179729176884587e-06, "loss": 0.3895, "step": 14065 }, { "epoch": 1.2624127796100604, "grad_norm": 0.471375561353624, "learning_rate": 7.179259240413481e-06, "loss": 0.4476, "step": 14066 }, { "epoch": 1.2625025240627314, "grad_norm": 0.5286470626943666, "learning_rate": 7.178789280175814e-06, "loss": 0.3772, "step": 14067 }, { "epoch": 1.2625922685154025, "grad_norm": 0.47560315947412873, "learning_rate": 7.17831929617671e-06, "loss": 0.3414, "step": 14068 }, { "epoch": 1.2626820129680734, "grad_norm": 0.48786108437544273, "learning_rate": 7.177849288421297e-06, "loss": 0.3774, "step": 14069 }, { "epoch": 1.2627717574207444, "grad_norm": 0.4933664913692731, "learning_rate": 7.1773792569146976e-06, "loss": 0.4172, "step": 14070 }, { "epoch": 1.2628615018734155, "grad_norm": 0.5085682953448962, "learning_rate": 7.176909201662039e-06, "loss": 0.4669, "step": 14071 }, { "epoch": 1.2629512463260864, "grad_norm": 0.5048273121888279, "learning_rate": 7.176439122668449e-06, "loss": 0.3994, "step": 14072 }, { "epoch": 1.2630409907787574, "grad_norm": 0.49799018469618334, "learning_rate": 7.175969019939052e-06, "loss": 0.4145, "step": 14073 }, { "epoch": 1.2631307352314285, "grad_norm": 0.4916383909459929, "learning_rate": 7.175498893478978e-06, "loss": 0.4112, "step": 14074 }, { "epoch": 1.2632204796840996, "grad_norm": 0.5270918536094019, "learning_rate": 7.17502874329335e-06, "loss": 0.4328, "step": 14075 }, { "epoch": 1.2633102241367706, "grad_norm": 0.522840311282397, "learning_rate": 7.174558569387299e-06, "loss": 0.4183, "step": 14076 }, { "epoch": 1.2633999685894417, "grad_norm": 0.48127600913685603, "learning_rate": 7.174088371765951e-06, "loss": 0.4896, "step": 14077 }, { "epoch": 1.2634897130421125, "grad_norm": 0.5514727025404856, "learning_rate": 7.173618150434432e-06, "loss": 0.4133, "step": 14078 }, { "epoch": 1.2635794574947836, "grad_norm": 0.5131238095389209, "learning_rate": 7.173147905397874e-06, "loss": 0.4392, "step": 14079 }, { "epoch": 1.2636692019474547, "grad_norm": 0.5308370196659601, "learning_rate": 7.172677636661403e-06, "loss": 0.3632, "step": 14080 }, { "epoch": 1.2637589464001255, "grad_norm": 0.4798441937099695, "learning_rate": 7.172207344230147e-06, "loss": 0.3847, "step": 14081 }, { "epoch": 1.2638486908527966, "grad_norm": 0.47637257167302743, "learning_rate": 7.171737028109238e-06, "loss": 0.3373, "step": 14082 }, { "epoch": 1.2639384353054677, "grad_norm": 0.5089944700224571, "learning_rate": 7.171266688303802e-06, "loss": 0.4646, "step": 14083 }, { "epoch": 1.2640281797581387, "grad_norm": 0.507149854222816, "learning_rate": 7.17079632481897e-06, "loss": 0.4017, "step": 14084 }, { "epoch": 1.2641179242108098, "grad_norm": 0.4740175610960511, "learning_rate": 7.170325937659871e-06, "loss": 0.3804, "step": 14085 }, { "epoch": 1.2642076686634807, "grad_norm": 0.49646002275866774, "learning_rate": 7.169855526831636e-06, "loss": 0.4205, "step": 14086 }, { "epoch": 1.2642974131161517, "grad_norm": 0.5515308244842384, "learning_rate": 7.1693850923393934e-06, "loss": 0.4149, "step": 14087 }, { "epoch": 1.2643871575688228, "grad_norm": 0.44350200250153593, "learning_rate": 7.1689146341882755e-06, "loss": 0.348, "step": 14088 }, { "epoch": 1.2644769020214939, "grad_norm": 0.4853893582100793, "learning_rate": 7.168444152383411e-06, "loss": 0.4153, "step": 14089 }, { "epoch": 1.2645666464741647, "grad_norm": 0.5085091212217534, "learning_rate": 7.167973646929935e-06, "loss": 0.3414, "step": 14090 }, { "epoch": 1.2646563909268358, "grad_norm": 0.5267312694214851, "learning_rate": 7.167503117832974e-06, "loss": 0.4216, "step": 14091 }, { "epoch": 1.2647461353795069, "grad_norm": 0.45676226680614096, "learning_rate": 7.167032565097662e-06, "loss": 0.4282, "step": 14092 }, { "epoch": 1.264835879832178, "grad_norm": 0.5147567328259581, "learning_rate": 7.166561988729128e-06, "loss": 0.4184, "step": 14093 }, { "epoch": 1.264925624284849, "grad_norm": 0.5286379989778993, "learning_rate": 7.1660913887325086e-06, "loss": 0.4762, "step": 14094 }, { "epoch": 1.2650153687375199, "grad_norm": 0.530083573827153, "learning_rate": 7.165620765112933e-06, "loss": 0.3682, "step": 14095 }, { "epoch": 1.265105113190191, "grad_norm": 0.5350320969083907, "learning_rate": 7.1651501178755346e-06, "loss": 0.4131, "step": 14096 }, { "epoch": 1.265194857642862, "grad_norm": 0.5014782946463656, "learning_rate": 7.164679447025445e-06, "loss": 0.4131, "step": 14097 }, { "epoch": 1.2652846020955328, "grad_norm": 0.4743739445606777, "learning_rate": 7.164208752567799e-06, "loss": 0.4042, "step": 14098 }, { "epoch": 1.265374346548204, "grad_norm": 0.4898112375787929, "learning_rate": 7.163738034507728e-06, "loss": 0.3469, "step": 14099 }, { "epoch": 1.265464091000875, "grad_norm": 0.499517832448513, "learning_rate": 7.163267292850368e-06, "loss": 0.432, "step": 14100 }, { "epoch": 1.265553835453546, "grad_norm": 0.49864553900075453, "learning_rate": 7.1627965276008505e-06, "loss": 0.3847, "step": 14101 }, { "epoch": 1.2656435799062171, "grad_norm": 0.46527614428090036, "learning_rate": 7.162325738764311e-06, "loss": 0.3742, "step": 14102 }, { "epoch": 1.2657333243588882, "grad_norm": 0.4829639251800662, "learning_rate": 7.161854926345883e-06, "loss": 0.3767, "step": 14103 }, { "epoch": 1.265823068811559, "grad_norm": 0.4621743152612705, "learning_rate": 7.161384090350701e-06, "loss": 0.4511, "step": 14104 }, { "epoch": 1.26591281326423, "grad_norm": 0.5257920347316395, "learning_rate": 7.1609132307839005e-06, "loss": 0.435, "step": 14105 }, { "epoch": 1.2660025577169012, "grad_norm": 0.5205852634338365, "learning_rate": 7.160442347650616e-06, "loss": 0.3832, "step": 14106 }, { "epoch": 1.266092302169572, "grad_norm": 0.4731419520921012, "learning_rate": 7.159971440955984e-06, "loss": 0.3554, "step": 14107 }, { "epoch": 1.266182046622243, "grad_norm": 0.5040886635285622, "learning_rate": 7.15950051070514e-06, "loss": 0.4403, "step": 14108 }, { "epoch": 1.2662717910749142, "grad_norm": 0.5337121390541857, "learning_rate": 7.159029556903219e-06, "loss": 0.4297, "step": 14109 }, { "epoch": 1.2663615355275852, "grad_norm": 0.507442145413249, "learning_rate": 7.158558579555355e-06, "loss": 0.3956, "step": 14110 }, { "epoch": 1.2664512799802563, "grad_norm": 0.4973867417118216, "learning_rate": 7.158087578666689e-06, "loss": 0.3825, "step": 14111 }, { "epoch": 1.2665410244329274, "grad_norm": 0.5042975362241964, "learning_rate": 7.157616554242355e-06, "loss": 0.4693, "step": 14112 }, { "epoch": 1.2666307688855982, "grad_norm": 0.541765884302822, "learning_rate": 7.157145506287491e-06, "loss": 0.3832, "step": 14113 }, { "epoch": 1.2667205133382693, "grad_norm": 0.4614624922315117, "learning_rate": 7.1566744348072315e-06, "loss": 0.3718, "step": 14114 }, { "epoch": 1.2668102577909404, "grad_norm": 0.49767251844334615, "learning_rate": 7.1562033398067164e-06, "loss": 0.3912, "step": 14115 }, { "epoch": 1.2669000022436112, "grad_norm": 0.4954616036248195, "learning_rate": 7.155732221291083e-06, "loss": 0.4347, "step": 14116 }, { "epoch": 1.2669897466962823, "grad_norm": 0.4883963910401694, "learning_rate": 7.15526107926547e-06, "loss": 0.3941, "step": 14117 }, { "epoch": 1.2670794911489534, "grad_norm": 0.47682829897895124, "learning_rate": 7.154789913735013e-06, "loss": 0.334, "step": 14118 }, { "epoch": 1.2671692356016244, "grad_norm": 0.5138504250513153, "learning_rate": 7.1543187247048525e-06, "loss": 0.3868, "step": 14119 }, { "epoch": 1.2672589800542955, "grad_norm": 0.46714661115150613, "learning_rate": 7.153847512180128e-06, "loss": 0.3571, "step": 14120 }, { "epoch": 1.2673487245069663, "grad_norm": 0.47310004815220136, "learning_rate": 7.153376276165975e-06, "loss": 0.3925, "step": 14121 }, { "epoch": 1.2674384689596374, "grad_norm": 0.5011132970107375, "learning_rate": 7.152905016667537e-06, "loss": 0.3515, "step": 14122 }, { "epoch": 1.2675282134123085, "grad_norm": 0.4905265376631175, "learning_rate": 7.15243373368995e-06, "loss": 0.4019, "step": 14123 }, { "epoch": 1.2676179578649795, "grad_norm": 0.4948145850647705, "learning_rate": 7.151962427238355e-06, "loss": 0.3468, "step": 14124 }, { "epoch": 1.2677077023176504, "grad_norm": 0.48798718662316154, "learning_rate": 7.151491097317893e-06, "loss": 0.4, "step": 14125 }, { "epoch": 1.2677974467703215, "grad_norm": 0.4939236865014274, "learning_rate": 7.151019743933701e-06, "loss": 0.4181, "step": 14126 }, { "epoch": 1.2678871912229925, "grad_norm": 0.5190778638834644, "learning_rate": 7.150548367090925e-06, "loss": 0.4365, "step": 14127 }, { "epoch": 1.2679769356756636, "grad_norm": 0.5055142917763692, "learning_rate": 7.1500769667947e-06, "loss": 0.4165, "step": 14128 }, { "epoch": 1.2680666801283347, "grad_norm": 0.5286818809198995, "learning_rate": 7.149605543050171e-06, "loss": 0.3849, "step": 14129 }, { "epoch": 1.2681564245810055, "grad_norm": 0.4676852858521243, "learning_rate": 7.149134095862476e-06, "loss": 0.3804, "step": 14130 }, { "epoch": 1.2682461690336766, "grad_norm": 0.4919530566614317, "learning_rate": 7.148662625236759e-06, "loss": 0.3894, "step": 14131 }, { "epoch": 1.2683359134863477, "grad_norm": 0.47541240088545766, "learning_rate": 7.148191131178161e-06, "loss": 0.3874, "step": 14132 }, { "epoch": 1.2684256579390185, "grad_norm": 0.477180559087523, "learning_rate": 7.147719613691824e-06, "loss": 0.3348, "step": 14133 }, { "epoch": 1.2685154023916896, "grad_norm": 0.4901728530695773, "learning_rate": 7.147248072782891e-06, "loss": 0.5184, "step": 14134 }, { "epoch": 1.2686051468443607, "grad_norm": 0.4906234346741301, "learning_rate": 7.146776508456503e-06, "loss": 0.3247, "step": 14135 }, { "epoch": 1.2686948912970317, "grad_norm": 0.4556099909949453, "learning_rate": 7.146304920717805e-06, "loss": 0.387, "step": 14136 }, { "epoch": 1.2687846357497028, "grad_norm": 0.5243625806608517, "learning_rate": 7.145833309571937e-06, "loss": 0.4237, "step": 14137 }, { "epoch": 1.2688743802023739, "grad_norm": 0.4955355544405299, "learning_rate": 7.145361675024044e-06, "loss": 0.3821, "step": 14138 }, { "epoch": 1.2689641246550447, "grad_norm": 0.4693632014561503, "learning_rate": 7.14489001707927e-06, "loss": 0.3468, "step": 14139 }, { "epoch": 1.2690538691077158, "grad_norm": 0.4902952744400166, "learning_rate": 7.144418335742758e-06, "loss": 0.4311, "step": 14140 }, { "epoch": 1.2691436135603869, "grad_norm": 0.48245098829059413, "learning_rate": 7.1439466310196535e-06, "loss": 0.3861, "step": 14141 }, { "epoch": 1.2692333580130577, "grad_norm": 0.4554393953425729, "learning_rate": 7.143474902915098e-06, "loss": 0.3561, "step": 14142 }, { "epoch": 1.2693231024657288, "grad_norm": 0.46856177497099627, "learning_rate": 7.143003151434239e-06, "loss": 0.4204, "step": 14143 }, { "epoch": 1.2694128469183998, "grad_norm": 0.5232513561681627, "learning_rate": 7.14253137658222e-06, "loss": 0.4505, "step": 14144 }, { "epoch": 1.269502591371071, "grad_norm": 0.49713194020428086, "learning_rate": 7.1420595783641855e-06, "loss": 0.4088, "step": 14145 }, { "epoch": 1.269592335823742, "grad_norm": 0.525450888378667, "learning_rate": 7.141587756785282e-06, "loss": 0.4011, "step": 14146 }, { "epoch": 1.269682080276413, "grad_norm": 0.5020099664925581, "learning_rate": 7.141115911850655e-06, "loss": 0.4216, "step": 14147 }, { "epoch": 1.269771824729084, "grad_norm": 0.4807210250587316, "learning_rate": 7.1406440435654475e-06, "loss": 0.3851, "step": 14148 }, { "epoch": 1.269861569181755, "grad_norm": 0.5280946660412577, "learning_rate": 7.140172151934811e-06, "loss": 0.4006, "step": 14149 }, { "epoch": 1.269951313634426, "grad_norm": 0.47235146823663765, "learning_rate": 7.139700236963887e-06, "loss": 0.3917, "step": 14150 }, { "epoch": 1.2700410580870969, "grad_norm": 0.4900415452879282, "learning_rate": 7.139228298657825e-06, "loss": 0.3156, "step": 14151 }, { "epoch": 1.270130802539768, "grad_norm": 0.4804692239575341, "learning_rate": 7.138756337021772e-06, "loss": 0.3672, "step": 14152 }, { "epoch": 1.270220546992439, "grad_norm": 0.45981896146593887, "learning_rate": 7.138284352060871e-06, "loss": 0.3798, "step": 14153 }, { "epoch": 1.27031029144511, "grad_norm": 0.47644815839593224, "learning_rate": 7.137812343780275e-06, "loss": 0.42, "step": 14154 }, { "epoch": 1.2704000358977812, "grad_norm": 0.4856991526807062, "learning_rate": 7.137340312185128e-06, "loss": 0.3431, "step": 14155 }, { "epoch": 1.270489780350452, "grad_norm": 0.48118060918714683, "learning_rate": 7.136868257280578e-06, "loss": 0.392, "step": 14156 }, { "epoch": 1.270579524803123, "grad_norm": 0.46843762657767124, "learning_rate": 7.136396179071775e-06, "loss": 0.4223, "step": 14157 }, { "epoch": 1.2706692692557942, "grad_norm": 0.5184825107555473, "learning_rate": 7.135924077563866e-06, "loss": 0.4493, "step": 14158 }, { "epoch": 1.2707590137084652, "grad_norm": 0.5122957878092719, "learning_rate": 7.135451952762e-06, "loss": 0.3908, "step": 14159 }, { "epoch": 1.270848758161136, "grad_norm": 0.45144327909076204, "learning_rate": 7.134979804671327e-06, "loss": 0.3582, "step": 14160 }, { "epoch": 1.2709385026138071, "grad_norm": 0.4868046894637548, "learning_rate": 7.134507633296993e-06, "loss": 0.4027, "step": 14161 }, { "epoch": 1.2710282470664782, "grad_norm": 0.43636830624867307, "learning_rate": 7.1340354386441514e-06, "loss": 0.3682, "step": 14162 }, { "epoch": 1.2711179915191493, "grad_norm": 0.5403901793233333, "learning_rate": 7.133563220717948e-06, "loss": 0.4485, "step": 14163 }, { "epoch": 1.2712077359718204, "grad_norm": 0.4737069614953966, "learning_rate": 7.133090979523536e-06, "loss": 0.4698, "step": 14164 }, { "epoch": 1.2712974804244912, "grad_norm": 0.5484246298436822, "learning_rate": 7.132618715066064e-06, "loss": 0.4119, "step": 14165 }, { "epoch": 1.2713872248771623, "grad_norm": 0.5260764579738726, "learning_rate": 7.1321464273506825e-06, "loss": 0.439, "step": 14166 }, { "epoch": 1.2714769693298333, "grad_norm": 0.49860023785457497, "learning_rate": 7.131674116382543e-06, "loss": 0.4353, "step": 14167 }, { "epoch": 1.2715667137825042, "grad_norm": 0.5206429990486612, "learning_rate": 7.131201782166795e-06, "loss": 0.3648, "step": 14168 }, { "epoch": 1.2716564582351753, "grad_norm": 0.470072837964205, "learning_rate": 7.130729424708589e-06, "loss": 0.4121, "step": 14169 }, { "epoch": 1.2717462026878463, "grad_norm": 0.5128280425273107, "learning_rate": 7.1302570440130805e-06, "loss": 0.4124, "step": 14170 }, { "epoch": 1.2718359471405174, "grad_norm": 0.4887056774750961, "learning_rate": 7.1297846400854174e-06, "loss": 0.4401, "step": 14171 }, { "epoch": 1.2719256915931885, "grad_norm": 0.5589454738165506, "learning_rate": 7.129312212930752e-06, "loss": 0.4325, "step": 14172 }, { "epoch": 1.2720154360458595, "grad_norm": 0.4953333046712583, "learning_rate": 7.1288397625542385e-06, "loss": 0.3815, "step": 14173 }, { "epoch": 1.2721051804985304, "grad_norm": 0.4626099907314824, "learning_rate": 7.128367288961029e-06, "loss": 0.4017, "step": 14174 }, { "epoch": 1.2721949249512015, "grad_norm": 0.4866521449719099, "learning_rate": 7.127894792156274e-06, "loss": 0.4168, "step": 14175 }, { "epoch": 1.2722846694038725, "grad_norm": 0.506379236909431, "learning_rate": 7.127422272145128e-06, "loss": 0.3631, "step": 14176 }, { "epoch": 1.2723744138565434, "grad_norm": 0.48803527345808023, "learning_rate": 7.126949728932743e-06, "loss": 0.4173, "step": 14177 }, { "epoch": 1.2724641583092144, "grad_norm": 0.49364446429422226, "learning_rate": 7.126477162524275e-06, "loss": 0.3635, "step": 14178 }, { "epoch": 1.2725539027618855, "grad_norm": 0.5107774185213094, "learning_rate": 7.126004572924875e-06, "loss": 0.3876, "step": 14179 }, { "epoch": 1.2726436472145566, "grad_norm": 0.4961122857505598, "learning_rate": 7.125531960139699e-06, "loss": 0.4772, "step": 14180 }, { "epoch": 1.2727333916672277, "grad_norm": 0.5083965908896945, "learning_rate": 7.125059324173899e-06, "loss": 0.3984, "step": 14181 }, { "epoch": 1.2728231361198987, "grad_norm": 0.47617731319963247, "learning_rate": 7.124586665032633e-06, "loss": 0.3716, "step": 14182 }, { "epoch": 1.2729128805725696, "grad_norm": 0.4770563562846251, "learning_rate": 7.124113982721052e-06, "loss": 0.3573, "step": 14183 }, { "epoch": 1.2730026250252406, "grad_norm": 0.4748111007754397, "learning_rate": 7.1236412772443135e-06, "loss": 0.3882, "step": 14184 }, { "epoch": 1.2730923694779117, "grad_norm": 0.4784954363496619, "learning_rate": 7.1231685486075705e-06, "loss": 0.3848, "step": 14185 }, { "epoch": 1.2731821139305826, "grad_norm": 0.4832647635001886, "learning_rate": 7.1226957968159816e-06, "loss": 0.4139, "step": 14186 }, { "epoch": 1.2732718583832536, "grad_norm": 0.5471705220019968, "learning_rate": 7.122223021874699e-06, "loss": 0.4262, "step": 14187 }, { "epoch": 1.2733616028359247, "grad_norm": 0.4714819461442487, "learning_rate": 7.121750223788881e-06, "loss": 0.4408, "step": 14188 }, { "epoch": 1.2734513472885958, "grad_norm": 0.5348605693556953, "learning_rate": 7.121277402563683e-06, "loss": 0.4013, "step": 14189 }, { "epoch": 1.2735410917412668, "grad_norm": 0.4901738809125195, "learning_rate": 7.1208045582042615e-06, "loss": 0.4193, "step": 14190 }, { "epoch": 1.2736308361939377, "grad_norm": 0.4968392532518725, "learning_rate": 7.120331690715774e-06, "loss": 0.4327, "step": 14191 }, { "epoch": 1.2737205806466088, "grad_norm": 0.5099383108731741, "learning_rate": 7.119858800103378e-06, "loss": 0.3868, "step": 14192 }, { "epoch": 1.2738103250992798, "grad_norm": 0.4591024890614567, "learning_rate": 7.119385886372228e-06, "loss": 0.3351, "step": 14193 }, { "epoch": 1.273900069551951, "grad_norm": 0.47530831846222643, "learning_rate": 7.118912949527483e-06, "loss": 0.3996, "step": 14194 }, { "epoch": 1.2739898140046217, "grad_norm": 0.5057947048594434, "learning_rate": 7.1184399895743015e-06, "loss": 0.3445, "step": 14195 }, { "epoch": 1.2740795584572928, "grad_norm": 0.47584550875792636, "learning_rate": 7.117967006517842e-06, "loss": 0.3899, "step": 14196 }, { "epoch": 1.2741693029099639, "grad_norm": 0.4828776741226237, "learning_rate": 7.11749400036326e-06, "loss": 0.3581, "step": 14197 }, { "epoch": 1.274259047362635, "grad_norm": 0.5132532964980644, "learning_rate": 7.1170209711157154e-06, "loss": 0.4427, "step": 14198 }, { "epoch": 1.274348791815306, "grad_norm": 0.5401065105894144, "learning_rate": 7.11654791878037e-06, "loss": 0.4764, "step": 14199 }, { "epoch": 1.2744385362679769, "grad_norm": 0.5480906574143343, "learning_rate": 7.1160748433623775e-06, "loss": 0.3311, "step": 14200 }, { "epoch": 1.274528280720648, "grad_norm": 0.47308295564610886, "learning_rate": 7.1156017448669015e-06, "loss": 0.4266, "step": 14201 }, { "epoch": 1.274618025173319, "grad_norm": 0.515974887572035, "learning_rate": 7.1151286232990975e-06, "loss": 0.3808, "step": 14202 }, { "epoch": 1.2747077696259899, "grad_norm": 0.48800172096812194, "learning_rate": 7.11465547866413e-06, "loss": 0.3957, "step": 14203 }, { "epoch": 1.274797514078661, "grad_norm": 0.49023576576348005, "learning_rate": 7.114182310967156e-06, "loss": 0.415, "step": 14204 }, { "epoch": 1.274887258531332, "grad_norm": 0.5324026334708627, "learning_rate": 7.113709120213336e-06, "loss": 0.4419, "step": 14205 }, { "epoch": 1.274977002984003, "grad_norm": 0.5017952461713854, "learning_rate": 7.113235906407831e-06, "loss": 0.3464, "step": 14206 }, { "epoch": 1.2750667474366741, "grad_norm": 0.5123918772183714, "learning_rate": 7.112762669555802e-06, "loss": 0.4695, "step": 14207 }, { "epoch": 1.2751564918893452, "grad_norm": 0.568248877605194, "learning_rate": 7.112289409662408e-06, "loss": 0.4289, "step": 14208 }, { "epoch": 1.275246236342016, "grad_norm": 0.522296991946495, "learning_rate": 7.111816126732814e-06, "loss": 0.4529, "step": 14209 }, { "epoch": 1.2753359807946871, "grad_norm": 0.5295646267944427, "learning_rate": 7.111342820772178e-06, "loss": 0.4903, "step": 14210 }, { "epoch": 1.2754257252473582, "grad_norm": 0.48940574948500115, "learning_rate": 7.110869491785663e-06, "loss": 0.3958, "step": 14211 }, { "epoch": 1.275515469700029, "grad_norm": 0.5151344822116295, "learning_rate": 7.110396139778433e-06, "loss": 0.3557, "step": 14212 }, { "epoch": 1.2756052141527001, "grad_norm": 0.49092724422339684, "learning_rate": 7.109922764755648e-06, "loss": 0.4701, "step": 14213 }, { "epoch": 1.2756949586053712, "grad_norm": 0.5249583883673875, "learning_rate": 7.109449366722471e-06, "loss": 0.4454, "step": 14214 }, { "epoch": 1.2757847030580423, "grad_norm": 0.49201149208702677, "learning_rate": 7.108975945684065e-06, "loss": 0.4235, "step": 14215 }, { "epoch": 1.2758744475107133, "grad_norm": 0.502354212471993, "learning_rate": 7.108502501645591e-06, "loss": 0.4084, "step": 14216 }, { "epoch": 1.2759641919633844, "grad_norm": 0.5045704459520426, "learning_rate": 7.108029034612217e-06, "loss": 0.4122, "step": 14217 }, { "epoch": 1.2760539364160552, "grad_norm": 0.46905436390411864, "learning_rate": 7.107555544589102e-06, "loss": 0.3467, "step": 14218 }, { "epoch": 1.2761436808687263, "grad_norm": 0.45606297457168216, "learning_rate": 7.1070820315814126e-06, "loss": 0.4132, "step": 14219 }, { "epoch": 1.2762334253213974, "grad_norm": 0.48297299655454146, "learning_rate": 7.10660849559431e-06, "loss": 0.4087, "step": 14220 }, { "epoch": 1.2763231697740682, "grad_norm": 0.5265801768520222, "learning_rate": 7.1061349366329624e-06, "loss": 0.3821, "step": 14221 }, { "epoch": 1.2764129142267393, "grad_norm": 0.4982731895657585, "learning_rate": 7.1056613547025315e-06, "loss": 0.4407, "step": 14222 }, { "epoch": 1.2765026586794104, "grad_norm": 0.4823993810231277, "learning_rate": 7.105187749808183e-06, "loss": 0.413, "step": 14223 }, { "epoch": 1.2765924031320814, "grad_norm": 0.5044140665936532, "learning_rate": 7.104714121955079e-06, "loss": 0.4045, "step": 14224 }, { "epoch": 1.2766821475847525, "grad_norm": 0.49861421515954807, "learning_rate": 7.104240471148391e-06, "loss": 0.3902, "step": 14225 }, { "epoch": 1.2767718920374234, "grad_norm": 0.4610380467877132, "learning_rate": 7.103766797393279e-06, "loss": 0.448, "step": 14226 }, { "epoch": 1.2768616364900944, "grad_norm": 0.5444149628441801, "learning_rate": 7.103293100694913e-06, "loss": 0.4348, "step": 14227 }, { "epoch": 1.2769513809427655, "grad_norm": 0.499396511896601, "learning_rate": 7.1028193810584545e-06, "loss": 0.3847, "step": 14228 }, { "epoch": 1.2770411253954366, "grad_norm": 0.5042306036236947, "learning_rate": 7.102345638489072e-06, "loss": 0.425, "step": 14229 }, { "epoch": 1.2771308698481074, "grad_norm": 0.4835808423345716, "learning_rate": 7.101871872991934e-06, "loss": 0.3979, "step": 14230 }, { "epoch": 1.2772206143007785, "grad_norm": 0.46819822971628566, "learning_rate": 7.101398084572204e-06, "loss": 0.4227, "step": 14231 }, { "epoch": 1.2773103587534496, "grad_norm": 0.5659201482203262, "learning_rate": 7.100924273235053e-06, "loss": 0.442, "step": 14232 }, { "epoch": 1.2774001032061206, "grad_norm": 0.4849144139144586, "learning_rate": 7.100450438985642e-06, "loss": 0.3691, "step": 14233 }, { "epoch": 1.2774898476587917, "grad_norm": 0.498999714299531, "learning_rate": 7.099976581829145e-06, "loss": 0.4217, "step": 14234 }, { "epoch": 1.2775795921114625, "grad_norm": 0.4883571131994171, "learning_rate": 7.099502701770726e-06, "loss": 0.3763, "step": 14235 }, { "epoch": 1.2776693365641336, "grad_norm": 0.5314909845115938, "learning_rate": 7.099028798815556e-06, "loss": 0.3845, "step": 14236 }, { "epoch": 1.2777590810168047, "grad_norm": 0.4858765995349236, "learning_rate": 7.098554872968801e-06, "loss": 0.3855, "step": 14237 }, { "epoch": 1.2778488254694755, "grad_norm": 0.5135192036518867, "learning_rate": 7.098080924235628e-06, "loss": 0.401, "step": 14238 }, { "epoch": 1.2779385699221466, "grad_norm": 0.5226339760598382, "learning_rate": 7.09760695262121e-06, "loss": 0.473, "step": 14239 }, { "epoch": 1.2780283143748177, "grad_norm": 0.4845498169160174, "learning_rate": 7.097132958130714e-06, "loss": 0.3692, "step": 14240 }, { "epoch": 1.2781180588274887, "grad_norm": 0.5035738551269712, "learning_rate": 7.096658940769307e-06, "loss": 0.4112, "step": 14241 }, { "epoch": 1.2782078032801598, "grad_norm": 0.5217683456207239, "learning_rate": 7.096184900542163e-06, "loss": 0.3784, "step": 14242 }, { "epoch": 1.2782975477328309, "grad_norm": 0.4647813429594042, "learning_rate": 7.095710837454447e-06, "loss": 0.4184, "step": 14243 }, { "epoch": 1.2783872921855017, "grad_norm": 0.49140922454329306, "learning_rate": 7.095236751511334e-06, "loss": 0.3554, "step": 14244 }, { "epoch": 1.2784770366381728, "grad_norm": 0.48840314760254633, "learning_rate": 7.094762642717989e-06, "loss": 0.4073, "step": 14245 }, { "epoch": 1.2785667810908439, "grad_norm": 0.5421458357794696, "learning_rate": 7.0942885110795876e-06, "loss": 0.4615, "step": 14246 }, { "epoch": 1.2786565255435147, "grad_norm": 0.5205325181474563, "learning_rate": 7.093814356601298e-06, "loss": 0.4422, "step": 14247 }, { "epoch": 1.2787462699961858, "grad_norm": 0.5413988053079364, "learning_rate": 7.0933401792882916e-06, "loss": 0.3881, "step": 14248 }, { "epoch": 1.2788360144488569, "grad_norm": 0.4937541124690143, "learning_rate": 7.092865979145739e-06, "loss": 0.3933, "step": 14249 }, { "epoch": 1.278925758901528, "grad_norm": 0.49446597024208483, "learning_rate": 7.092391756178812e-06, "loss": 0.3886, "step": 14250 }, { "epoch": 1.279015503354199, "grad_norm": 0.47357256665811304, "learning_rate": 7.091917510392684e-06, "loss": 0.3479, "step": 14251 }, { "epoch": 1.27910524780687, "grad_norm": 0.4955720677359075, "learning_rate": 7.091443241792525e-06, "loss": 0.4261, "step": 14252 }, { "epoch": 1.279194992259541, "grad_norm": 0.5425302016795203, "learning_rate": 7.0909689503835075e-06, "loss": 0.4168, "step": 14253 }, { "epoch": 1.279284736712212, "grad_norm": 0.4958579452616853, "learning_rate": 7.090494636170806e-06, "loss": 0.3885, "step": 14254 }, { "epoch": 1.279374481164883, "grad_norm": 0.5587892600565515, "learning_rate": 7.09002029915959e-06, "loss": 0.4353, "step": 14255 }, { "epoch": 1.279464225617554, "grad_norm": 0.495110318422829, "learning_rate": 7.089545939355037e-06, "loss": 0.3773, "step": 14256 }, { "epoch": 1.279553970070225, "grad_norm": 0.485647571115696, "learning_rate": 7.089071556762315e-06, "loss": 0.3758, "step": 14257 }, { "epoch": 1.279643714522896, "grad_norm": 0.5299689974543879, "learning_rate": 7.0885971513866025e-06, "loss": 0.4487, "step": 14258 }, { "epoch": 1.2797334589755671, "grad_norm": 0.48143964515105814, "learning_rate": 7.088122723233068e-06, "loss": 0.4107, "step": 14259 }, { "epoch": 1.2798232034282382, "grad_norm": 0.5424679338025752, "learning_rate": 7.0876482723068905e-06, "loss": 0.3808, "step": 14260 }, { "epoch": 1.279912947880909, "grad_norm": 0.46733213782324257, "learning_rate": 7.087173798613241e-06, "loss": 0.3921, "step": 14261 }, { "epoch": 1.28000269233358, "grad_norm": 0.49993147758853723, "learning_rate": 7.086699302157296e-06, "loss": 0.4063, "step": 14262 }, { "epoch": 1.2800924367862512, "grad_norm": 0.48159332765965274, "learning_rate": 7.0862247829442295e-06, "loss": 0.4218, "step": 14263 }, { "epoch": 1.2801821812389222, "grad_norm": 0.4803073454915896, "learning_rate": 7.0857502409792166e-06, "loss": 0.368, "step": 14264 }, { "epoch": 1.280271925691593, "grad_norm": 0.5050473239477519, "learning_rate": 7.085275676267432e-06, "loss": 0.4285, "step": 14265 }, { "epoch": 1.2803616701442642, "grad_norm": 0.5079860235460617, "learning_rate": 7.084801088814053e-06, "loss": 0.4382, "step": 14266 }, { "epoch": 1.2804514145969352, "grad_norm": 0.5227575706144488, "learning_rate": 7.084326478624253e-06, "loss": 0.3825, "step": 14267 }, { "epoch": 1.2805411590496063, "grad_norm": 0.49857717121083217, "learning_rate": 7.083851845703209e-06, "loss": 0.3934, "step": 14268 }, { "epoch": 1.2806309035022774, "grad_norm": 0.5445418677064201, "learning_rate": 7.083377190056098e-06, "loss": 0.4115, "step": 14269 }, { "epoch": 1.2807206479549482, "grad_norm": 0.5116668294562591, "learning_rate": 7.082902511688095e-06, "loss": 0.4826, "step": 14270 }, { "epoch": 1.2808103924076193, "grad_norm": 0.49684549998793975, "learning_rate": 7.0824278106043775e-06, "loss": 0.3571, "step": 14271 }, { "epoch": 1.2809001368602904, "grad_norm": 0.46061728689743964, "learning_rate": 7.081953086810123e-06, "loss": 0.3363, "step": 14272 }, { "epoch": 1.2809898813129612, "grad_norm": 0.4781992781628499, "learning_rate": 7.081478340310507e-06, "loss": 0.4105, "step": 14273 }, { "epoch": 1.2810796257656323, "grad_norm": 0.534619561105113, "learning_rate": 7.08100357111071e-06, "loss": 0.4865, "step": 14274 }, { "epoch": 1.2811693702183033, "grad_norm": 0.5428953686384301, "learning_rate": 7.080528779215906e-06, "loss": 0.3771, "step": 14275 }, { "epoch": 1.2812591146709744, "grad_norm": 0.4559332088700084, "learning_rate": 7.080053964631277e-06, "loss": 0.3533, "step": 14276 }, { "epoch": 1.2813488591236455, "grad_norm": 0.5260098500486514, "learning_rate": 7.0795791273619975e-06, "loss": 0.4268, "step": 14277 }, { "epoch": 1.2814386035763166, "grad_norm": 0.5035007628587553, "learning_rate": 7.07910426741325e-06, "loss": 0.4582, "step": 14278 }, { "epoch": 1.2815283480289874, "grad_norm": 0.5151087235924509, "learning_rate": 7.07862938479021e-06, "loss": 0.3772, "step": 14279 }, { "epoch": 1.2816180924816585, "grad_norm": 0.4725892210674527, "learning_rate": 7.078154479498056e-06, "loss": 0.3637, "step": 14280 }, { "epoch": 1.2817078369343295, "grad_norm": 0.49005463581526537, "learning_rate": 7.077679551541969e-06, "loss": 0.3575, "step": 14281 }, { "epoch": 1.2817975813870004, "grad_norm": 0.4982268059692801, "learning_rate": 7.077204600927129e-06, "loss": 0.3582, "step": 14282 }, { "epoch": 1.2818873258396715, "grad_norm": 0.41647073190195416, "learning_rate": 7.076729627658714e-06, "loss": 0.3231, "step": 14283 }, { "epoch": 1.2819770702923425, "grad_norm": 0.5281445457720738, "learning_rate": 7.076254631741905e-06, "loss": 0.3819, "step": 14284 }, { "epoch": 1.2820668147450136, "grad_norm": 0.5171497540404502, "learning_rate": 7.075779613181882e-06, "loss": 0.3966, "step": 14285 }, { "epoch": 1.2821565591976847, "grad_norm": 0.5089035246787094, "learning_rate": 7.075304571983825e-06, "loss": 0.4087, "step": 14286 }, { "epoch": 1.2822463036503557, "grad_norm": 0.48895399696194725, "learning_rate": 7.074829508152916e-06, "loss": 0.4119, "step": 14287 }, { "epoch": 1.2823360481030266, "grad_norm": 0.5124028394199891, "learning_rate": 7.074354421694334e-06, "loss": 0.3975, "step": 14288 }, { "epoch": 1.2824257925556977, "grad_norm": 0.5107126307298345, "learning_rate": 7.073879312613263e-06, "loss": 0.4705, "step": 14289 }, { "epoch": 1.2825155370083687, "grad_norm": 0.48327958768074647, "learning_rate": 7.073404180914881e-06, "loss": 0.3531, "step": 14290 }, { "epoch": 1.2826052814610396, "grad_norm": 0.48327243571334655, "learning_rate": 7.072929026604371e-06, "loss": 0.407, "step": 14291 }, { "epoch": 1.2826950259137107, "grad_norm": 0.46458242724431437, "learning_rate": 7.072453849686918e-06, "loss": 0.3613, "step": 14292 }, { "epoch": 1.2827847703663817, "grad_norm": 0.504711111317205, "learning_rate": 7.071978650167699e-06, "loss": 0.3724, "step": 14293 }, { "epoch": 1.2828745148190528, "grad_norm": 0.4650851032445766, "learning_rate": 7.071503428051899e-06, "loss": 0.3718, "step": 14294 }, { "epoch": 1.2829642592717239, "grad_norm": 0.4775922882961342, "learning_rate": 7.071028183344702e-06, "loss": 0.4274, "step": 14295 }, { "epoch": 1.2830540037243947, "grad_norm": 0.5237037391345968, "learning_rate": 7.070552916051287e-06, "loss": 0.3983, "step": 14296 }, { "epoch": 1.2831437481770658, "grad_norm": 0.46898282469144414, "learning_rate": 7.070077626176843e-06, "loss": 0.3665, "step": 14297 }, { "epoch": 1.2832334926297368, "grad_norm": 0.5069921627662165, "learning_rate": 7.069602313726549e-06, "loss": 0.4318, "step": 14298 }, { "epoch": 1.283323237082408, "grad_norm": 0.5099761145749265, "learning_rate": 7.0691269787055895e-06, "loss": 0.4408, "step": 14299 }, { "epoch": 1.2834129815350788, "grad_norm": 0.537476265277251, "learning_rate": 7.068651621119148e-06, "loss": 0.4613, "step": 14300 }, { "epoch": 1.2835027259877498, "grad_norm": 0.5104376881639706, "learning_rate": 7.0681762409724105e-06, "loss": 0.3746, "step": 14301 }, { "epoch": 1.283592470440421, "grad_norm": 0.45143870438428074, "learning_rate": 7.06770083827056e-06, "loss": 0.3802, "step": 14302 }, { "epoch": 1.283682214893092, "grad_norm": 0.5011427935719157, "learning_rate": 7.067225413018781e-06, "loss": 0.4505, "step": 14303 }, { "epoch": 1.283771959345763, "grad_norm": 0.5131231555466602, "learning_rate": 7.066749965222259e-06, "loss": 0.3924, "step": 14304 }, { "epoch": 1.283861703798434, "grad_norm": 0.4527832871216735, "learning_rate": 7.066274494886181e-06, "loss": 0.3615, "step": 14305 }, { "epoch": 1.283951448251105, "grad_norm": 0.4691901072589855, "learning_rate": 7.0657990020157274e-06, "loss": 0.3979, "step": 14306 }, { "epoch": 1.284041192703776, "grad_norm": 0.47875750138284767, "learning_rate": 7.065323486616088e-06, "loss": 0.3633, "step": 14307 }, { "epoch": 1.2841309371564469, "grad_norm": 0.4899335547318204, "learning_rate": 7.064847948692446e-06, "loss": 0.4034, "step": 14308 }, { "epoch": 1.284220681609118, "grad_norm": 0.494846971362953, "learning_rate": 7.0643723882499915e-06, "loss": 0.3891, "step": 14309 }, { "epoch": 1.284310426061789, "grad_norm": 0.5130154208011242, "learning_rate": 7.0638968052939064e-06, "loss": 0.3771, "step": 14310 }, { "epoch": 1.28440017051446, "grad_norm": 0.4926641533505062, "learning_rate": 7.063421199829379e-06, "loss": 0.4285, "step": 14311 }, { "epoch": 1.2844899149671312, "grad_norm": 0.5351118407568035, "learning_rate": 7.062945571861597e-06, "loss": 0.4176, "step": 14312 }, { "epoch": 1.2845796594198022, "grad_norm": 0.4844221654378551, "learning_rate": 7.062469921395747e-06, "loss": 0.3815, "step": 14313 }, { "epoch": 1.284669403872473, "grad_norm": 0.5033445214364277, "learning_rate": 7.061994248437016e-06, "loss": 0.4134, "step": 14314 }, { "epoch": 1.2847591483251442, "grad_norm": 0.5274248904046932, "learning_rate": 7.061518552990593e-06, "loss": 0.418, "step": 14315 }, { "epoch": 1.2848488927778152, "grad_norm": 0.5136953680290703, "learning_rate": 7.0610428350616635e-06, "loss": 0.3821, "step": 14316 }, { "epoch": 1.284938637230486, "grad_norm": 0.5360733151362295, "learning_rate": 7.060567094655417e-06, "loss": 0.4236, "step": 14317 }, { "epoch": 1.2850283816831571, "grad_norm": 0.46832810070316006, "learning_rate": 7.060091331777041e-06, "loss": 0.3548, "step": 14318 }, { "epoch": 1.2851181261358282, "grad_norm": 0.4811540339537614, "learning_rate": 7.0596155464317255e-06, "loss": 0.3917, "step": 14319 }, { "epoch": 1.2852078705884993, "grad_norm": 0.48084006456375034, "learning_rate": 7.059139738624657e-06, "loss": 0.3588, "step": 14320 }, { "epoch": 1.2852976150411703, "grad_norm": 0.44812183035627534, "learning_rate": 7.058663908361027e-06, "loss": 0.3738, "step": 14321 }, { "epoch": 1.2853873594938414, "grad_norm": 0.459315948549639, "learning_rate": 7.058188055646024e-06, "loss": 0.3614, "step": 14322 }, { "epoch": 1.2854771039465123, "grad_norm": 0.4977267759654464, "learning_rate": 7.057712180484838e-06, "loss": 0.4152, "step": 14323 }, { "epoch": 1.2855668483991833, "grad_norm": 0.525107114232928, "learning_rate": 7.0572362828826575e-06, "loss": 0.3949, "step": 14324 }, { "epoch": 1.2856565928518544, "grad_norm": 0.5015670013282426, "learning_rate": 7.056760362844672e-06, "loss": 0.368, "step": 14325 }, { "epoch": 1.2857463373045253, "grad_norm": 0.4842360556588035, "learning_rate": 7.056284420376076e-06, "loss": 0.4349, "step": 14326 }, { "epoch": 1.2858360817571963, "grad_norm": 0.5607218861517426, "learning_rate": 7.055808455482054e-06, "loss": 0.4829, "step": 14327 }, { "epoch": 1.2859258262098674, "grad_norm": 0.4908761381904165, "learning_rate": 7.055332468167801e-06, "loss": 0.3787, "step": 14328 }, { "epoch": 1.2860155706625385, "grad_norm": 0.47982833808535785, "learning_rate": 7.054856458438508e-06, "loss": 0.4486, "step": 14329 }, { "epoch": 1.2861053151152095, "grad_norm": 0.5183087731565618, "learning_rate": 7.054380426299364e-06, "loss": 0.3814, "step": 14330 }, { "epoch": 1.2861950595678804, "grad_norm": 0.4899335964342442, "learning_rate": 7.053904371755562e-06, "loss": 0.3977, "step": 14331 }, { "epoch": 1.2862848040205515, "grad_norm": 0.46105728391091044, "learning_rate": 7.053428294812293e-06, "loss": 0.3746, "step": 14332 }, { "epoch": 1.2863745484732225, "grad_norm": 0.4777424767864384, "learning_rate": 7.05295219547475e-06, "loss": 0.3977, "step": 14333 }, { "epoch": 1.2864642929258936, "grad_norm": 0.5140687542706647, "learning_rate": 7.052476073748125e-06, "loss": 0.4714, "step": 14334 }, { "epoch": 1.2865540373785644, "grad_norm": 0.4908213696761096, "learning_rate": 7.051999929637607e-06, "loss": 0.3872, "step": 14335 }, { "epoch": 1.2866437818312355, "grad_norm": 0.5098928203119611, "learning_rate": 7.051523763148396e-06, "loss": 0.3926, "step": 14336 }, { "epoch": 1.2867335262839066, "grad_norm": 0.4848097921223122, "learning_rate": 7.051047574285678e-06, "loss": 0.3675, "step": 14337 }, { "epoch": 1.2868232707365777, "grad_norm": 0.5007067187217337, "learning_rate": 7.05057136305465e-06, "loss": 0.4392, "step": 14338 }, { "epoch": 1.2869130151892487, "grad_norm": 0.4887189018317411, "learning_rate": 7.050095129460505e-06, "loss": 0.3723, "step": 14339 }, { "epoch": 1.2870027596419196, "grad_norm": 0.5016071256284484, "learning_rate": 7.049618873508436e-06, "loss": 0.3688, "step": 14340 }, { "epoch": 1.2870925040945906, "grad_norm": 0.4799371040743779, "learning_rate": 7.049142595203635e-06, "loss": 0.377, "step": 14341 }, { "epoch": 1.2871822485472617, "grad_norm": 0.48594039049973786, "learning_rate": 7.0486662945513014e-06, "loss": 0.4182, "step": 14342 }, { "epoch": 1.2872719929999326, "grad_norm": 0.5122970116325459, "learning_rate": 7.048189971556625e-06, "loss": 0.3485, "step": 14343 }, { "epoch": 1.2873617374526036, "grad_norm": 0.49930094521210644, "learning_rate": 7.047713626224804e-06, "loss": 0.4655, "step": 14344 }, { "epoch": 1.2874514819052747, "grad_norm": 0.5256255435550329, "learning_rate": 7.047237258561028e-06, "loss": 0.4102, "step": 14345 }, { "epoch": 1.2875412263579458, "grad_norm": 0.5405057537192193, "learning_rate": 7.0467608685704995e-06, "loss": 0.4977, "step": 14346 }, { "epoch": 1.2876309708106168, "grad_norm": 0.5523937351634747, "learning_rate": 7.046284456258406e-06, "loss": 0.4354, "step": 14347 }, { "epoch": 1.287720715263288, "grad_norm": 0.5151106408597933, "learning_rate": 7.04580802162995e-06, "loss": 0.4075, "step": 14348 }, { "epoch": 1.2878104597159588, "grad_norm": 0.47827915647136465, "learning_rate": 7.045331564690323e-06, "loss": 0.3939, "step": 14349 }, { "epoch": 1.2879002041686298, "grad_norm": 0.5083592280727233, "learning_rate": 7.044855085444724e-06, "loss": 0.3486, "step": 14350 }, { "epoch": 1.287989948621301, "grad_norm": 0.4994428495156687, "learning_rate": 7.044378583898347e-06, "loss": 0.4321, "step": 14351 }, { "epoch": 1.2880796930739717, "grad_norm": 0.5190042826312268, "learning_rate": 7.043902060056391e-06, "loss": 0.4708, "step": 14352 }, { "epoch": 1.2881694375266428, "grad_norm": 0.5247045602897207, "learning_rate": 7.043425513924049e-06, "loss": 0.4148, "step": 14353 }, { "epoch": 1.2882591819793139, "grad_norm": 0.5115413761391097, "learning_rate": 7.042948945506524e-06, "loss": 0.3683, "step": 14354 }, { "epoch": 1.288348926431985, "grad_norm": 0.4604285630889661, "learning_rate": 7.042472354809008e-06, "loss": 0.4247, "step": 14355 }, { "epoch": 1.288438670884656, "grad_norm": 0.5163925442600575, "learning_rate": 7.0419957418367e-06, "loss": 0.4072, "step": 14356 }, { "epoch": 1.288528415337327, "grad_norm": 0.463375849496947, "learning_rate": 7.041519106594801e-06, "loss": 0.4413, "step": 14357 }, { "epoch": 1.288618159789998, "grad_norm": 0.5241243568318097, "learning_rate": 7.041042449088504e-06, "loss": 0.3485, "step": 14358 }, { "epoch": 1.288707904242669, "grad_norm": 0.4939092417006735, "learning_rate": 7.0405657693230116e-06, "loss": 0.4207, "step": 14359 }, { "epoch": 1.28879764869534, "grad_norm": 0.4847607373319399, "learning_rate": 7.040089067303519e-06, "loss": 0.372, "step": 14360 }, { "epoch": 1.288887393148011, "grad_norm": 0.5336738185046276, "learning_rate": 7.039612343035228e-06, "loss": 0.4994, "step": 14361 }, { "epoch": 1.288977137600682, "grad_norm": 0.4945254239598339, "learning_rate": 7.039135596523337e-06, "loss": 0.3505, "step": 14362 }, { "epoch": 1.289066882053353, "grad_norm": 0.4755522061244365, "learning_rate": 7.038658827773044e-06, "loss": 0.4382, "step": 14363 }, { "epoch": 1.2891566265060241, "grad_norm": 0.5289937615572298, "learning_rate": 7.038182036789549e-06, "loss": 0.4089, "step": 14364 }, { "epoch": 1.2892463709586952, "grad_norm": 0.5126781995817725, "learning_rate": 7.037705223578053e-06, "loss": 0.3991, "step": 14365 }, { "epoch": 1.289336115411366, "grad_norm": 0.5055210013456057, "learning_rate": 7.037228388143753e-06, "loss": 0.4093, "step": 14366 }, { "epoch": 1.2894258598640371, "grad_norm": 0.49897930019530257, "learning_rate": 7.036751530491853e-06, "loss": 0.371, "step": 14367 }, { "epoch": 1.2895156043167082, "grad_norm": 0.5262985509851646, "learning_rate": 7.036274650627551e-06, "loss": 0.4446, "step": 14368 }, { "epoch": 1.2896053487693793, "grad_norm": 0.5197515583496028, "learning_rate": 7.0357977485560495e-06, "loss": 0.392, "step": 14369 }, { "epoch": 1.2896950932220501, "grad_norm": 0.47653358122174605, "learning_rate": 7.035320824282548e-06, "loss": 0.3666, "step": 14370 }, { "epoch": 1.2897848376747212, "grad_norm": 0.48662263543419987, "learning_rate": 7.034843877812248e-06, "loss": 0.4157, "step": 14371 }, { "epoch": 1.2898745821273923, "grad_norm": 0.5560585327326105, "learning_rate": 7.034366909150351e-06, "loss": 0.4118, "step": 14372 }, { "epoch": 1.2899643265800633, "grad_norm": 0.5087340291995806, "learning_rate": 7.033889918302059e-06, "loss": 0.4159, "step": 14373 }, { "epoch": 1.2900540710327344, "grad_norm": 0.5280735517245461, "learning_rate": 7.033412905272574e-06, "loss": 0.453, "step": 14374 }, { "epoch": 1.2901438154854052, "grad_norm": 0.5191356042515223, "learning_rate": 7.0329358700671e-06, "loss": 0.426, "step": 14375 }, { "epoch": 1.2902335599380763, "grad_norm": 0.5083301910952503, "learning_rate": 7.032458812690836e-06, "loss": 0.4308, "step": 14376 }, { "epoch": 1.2903233043907474, "grad_norm": 0.5146821522769954, "learning_rate": 7.031981733148987e-06, "loss": 0.411, "step": 14377 }, { "epoch": 1.2904130488434182, "grad_norm": 0.525436870875983, "learning_rate": 7.031504631446754e-06, "loss": 0.421, "step": 14378 }, { "epoch": 1.2905027932960893, "grad_norm": 0.4957255519448225, "learning_rate": 7.031027507589343e-06, "loss": 0.341, "step": 14379 }, { "epoch": 1.2905925377487604, "grad_norm": 0.5261213358070252, "learning_rate": 7.030550361581955e-06, "loss": 0.3962, "step": 14380 }, { "epoch": 1.2906822822014314, "grad_norm": 0.46274865168920587, "learning_rate": 7.030073193429795e-06, "loss": 0.4485, "step": 14381 }, { "epoch": 1.2907720266541025, "grad_norm": 0.5057460952392373, "learning_rate": 7.029596003138065e-06, "loss": 0.4557, "step": 14382 }, { "epoch": 1.2908617711067736, "grad_norm": 0.5100248933604336, "learning_rate": 7.029118790711973e-06, "loss": 0.3932, "step": 14383 }, { "epoch": 1.2909515155594444, "grad_norm": 0.4820780832222331, "learning_rate": 7.028641556156719e-06, "loss": 0.358, "step": 14384 }, { "epoch": 1.2910412600121155, "grad_norm": 0.4830971708089607, "learning_rate": 7.02816429947751e-06, "loss": 0.4365, "step": 14385 }, { "epoch": 1.2911310044647866, "grad_norm": 0.5077554185841565, "learning_rate": 7.02768702067955e-06, "loss": 0.3895, "step": 14386 }, { "epoch": 1.2912207489174574, "grad_norm": 0.5062796312232032, "learning_rate": 7.027209719768044e-06, "loss": 0.4338, "step": 14387 }, { "epoch": 1.2913104933701285, "grad_norm": 0.5114385113060642, "learning_rate": 7.0267323967482e-06, "loss": 0.3415, "step": 14388 }, { "epoch": 1.2914002378227996, "grad_norm": 0.45540602947305175, "learning_rate": 7.026255051625219e-06, "loss": 0.33, "step": 14389 }, { "epoch": 1.2914899822754706, "grad_norm": 0.4491290966597596, "learning_rate": 7.025777684404311e-06, "loss": 0.4212, "step": 14390 }, { "epoch": 1.2915797267281417, "grad_norm": 0.5117219258485325, "learning_rate": 7.02530029509068e-06, "loss": 0.417, "step": 14391 }, { "epoch": 1.2916694711808128, "grad_norm": 0.49073390981436726, "learning_rate": 7.024822883689532e-06, "loss": 0.4186, "step": 14392 }, { "epoch": 1.2917592156334836, "grad_norm": 0.48156145350881735, "learning_rate": 7.024345450206076e-06, "loss": 0.3786, "step": 14393 }, { "epoch": 1.2918489600861547, "grad_norm": 0.5096811163624388, "learning_rate": 7.023867994645515e-06, "loss": 0.3485, "step": 14394 }, { "epoch": 1.2919387045388258, "grad_norm": 0.48044595265852974, "learning_rate": 7.02339051701306e-06, "loss": 0.4489, "step": 14395 }, { "epoch": 1.2920284489914966, "grad_norm": 0.5319679572508471, "learning_rate": 7.022913017313915e-06, "loss": 0.3932, "step": 14396 }, { "epoch": 1.2921181934441677, "grad_norm": 0.5239271189395084, "learning_rate": 7.022435495553289e-06, "loss": 0.4693, "step": 14397 }, { "epoch": 1.2922079378968387, "grad_norm": 0.5211308321243389, "learning_rate": 7.021957951736389e-06, "loss": 0.4003, "step": 14398 }, { "epoch": 1.2922976823495098, "grad_norm": 0.4600619764635644, "learning_rate": 7.021480385868425e-06, "loss": 0.3378, "step": 14399 }, { "epoch": 1.2923874268021809, "grad_norm": 0.4751252347018314, "learning_rate": 7.021002797954603e-06, "loss": 0.4161, "step": 14400 }, { "epoch": 1.2924771712548517, "grad_norm": 0.5102243380298617, "learning_rate": 7.020525188000132e-06, "loss": 0.3924, "step": 14401 }, { "epoch": 1.2925669157075228, "grad_norm": 0.4696681727917184, "learning_rate": 7.020047556010221e-06, "loss": 0.3775, "step": 14402 }, { "epoch": 1.2926566601601939, "grad_norm": 0.5216805002496984, "learning_rate": 7.019569901990081e-06, "loss": 0.4299, "step": 14403 }, { "epoch": 1.292746404612865, "grad_norm": 0.5075133039797897, "learning_rate": 7.019092225944916e-06, "loss": 0.3986, "step": 14404 }, { "epoch": 1.2928361490655358, "grad_norm": 0.4822508878551238, "learning_rate": 7.018614527879941e-06, "loss": 0.3817, "step": 14405 }, { "epoch": 1.2929258935182069, "grad_norm": 0.47804213422901554, "learning_rate": 7.018136807800362e-06, "loss": 0.3606, "step": 14406 }, { "epoch": 1.293015637970878, "grad_norm": 0.5014439210930376, "learning_rate": 7.017659065711392e-06, "loss": 0.3765, "step": 14407 }, { "epoch": 1.293105382423549, "grad_norm": 0.4824104424433467, "learning_rate": 7.0171813016182385e-06, "loss": 0.3877, "step": 14408 }, { "epoch": 1.29319512687622, "grad_norm": 0.5049545588787852, "learning_rate": 7.016703515526112e-06, "loss": 0.4258, "step": 14409 }, { "epoch": 1.293284871328891, "grad_norm": 0.5088851775949897, "learning_rate": 7.016225707440225e-06, "loss": 0.3951, "step": 14410 }, { "epoch": 1.293374615781562, "grad_norm": 0.5563864810057889, "learning_rate": 7.0157478773657865e-06, "loss": 0.4121, "step": 14411 }, { "epoch": 1.293464360234233, "grad_norm": 0.5083847126042179, "learning_rate": 7.015270025308009e-06, "loss": 0.4775, "step": 14412 }, { "epoch": 1.293554104686904, "grad_norm": 0.5085236645916188, "learning_rate": 7.014792151272102e-06, "loss": 0.398, "step": 14413 }, { "epoch": 1.293643849139575, "grad_norm": 0.5157302367578506, "learning_rate": 7.0143142552632795e-06, "loss": 0.3988, "step": 14414 }, { "epoch": 1.293733593592246, "grad_norm": 0.5029441260915893, "learning_rate": 7.013836337286752e-06, "loss": 0.3833, "step": 14415 }, { "epoch": 1.2938233380449171, "grad_norm": 0.5062301743263099, "learning_rate": 7.013358397347732e-06, "loss": 0.3708, "step": 14416 }, { "epoch": 1.2939130824975882, "grad_norm": 0.47431023953671286, "learning_rate": 7.01288043545143e-06, "loss": 0.4152, "step": 14417 }, { "epoch": 1.2940028269502593, "grad_norm": 0.522253706126337, "learning_rate": 7.012402451603062e-06, "loss": 0.3568, "step": 14418 }, { "epoch": 1.29409257140293, "grad_norm": 0.5015390488133346, "learning_rate": 7.011924445807838e-06, "loss": 0.3719, "step": 14419 }, { "epoch": 1.2941823158556012, "grad_norm": 0.4747595683924881, "learning_rate": 7.011446418070972e-06, "loss": 0.371, "step": 14420 }, { "epoch": 1.2942720603082722, "grad_norm": 0.4957483807768505, "learning_rate": 7.010968368397677e-06, "loss": 0.3811, "step": 14421 }, { "epoch": 1.294361804760943, "grad_norm": 0.5268889809338598, "learning_rate": 7.010490296793169e-06, "loss": 0.4315, "step": 14422 }, { "epoch": 1.2944515492136142, "grad_norm": 0.5002478928811506, "learning_rate": 7.010012203262657e-06, "loss": 0.3879, "step": 14423 }, { "epoch": 1.2945412936662852, "grad_norm": 0.4902894576448013, "learning_rate": 7.0095340878113585e-06, "loss": 0.4356, "step": 14424 }, { "epoch": 1.2946310381189563, "grad_norm": 0.5656495048796403, "learning_rate": 7.009055950444486e-06, "loss": 0.4546, "step": 14425 }, { "epoch": 1.2947207825716274, "grad_norm": 0.47285095681946715, "learning_rate": 7.008577791167256e-06, "loss": 0.3932, "step": 14426 }, { "epoch": 1.2948105270242984, "grad_norm": 0.4735169983842838, "learning_rate": 7.00809960998488e-06, "loss": 0.351, "step": 14427 }, { "epoch": 1.2949002714769693, "grad_norm": 0.48645731655593427, "learning_rate": 7.0076214069025775e-06, "loss": 0.357, "step": 14428 }, { "epoch": 1.2949900159296404, "grad_norm": 0.5279491767128005, "learning_rate": 7.00714318192556e-06, "loss": 0.4512, "step": 14429 }, { "epoch": 1.2950797603823114, "grad_norm": 0.4882332194620342, "learning_rate": 7.006664935059043e-06, "loss": 0.4573, "step": 14430 }, { "epoch": 1.2951695048349823, "grad_norm": 0.511862566635809, "learning_rate": 7.006186666308243e-06, "loss": 0.3884, "step": 14431 }, { "epoch": 1.2952592492876533, "grad_norm": 0.45854580953425056, "learning_rate": 7.005708375678379e-06, "loss": 0.4065, "step": 14432 }, { "epoch": 1.2953489937403244, "grad_norm": 0.4906092128878164, "learning_rate": 7.005230063174661e-06, "loss": 0.3725, "step": 14433 }, { "epoch": 1.2954387381929955, "grad_norm": 0.4705647723806199, "learning_rate": 7.00475172880231e-06, "loss": 0.4285, "step": 14434 }, { "epoch": 1.2955284826456666, "grad_norm": 0.49739749573844655, "learning_rate": 7.004273372566541e-06, "loss": 0.3579, "step": 14435 }, { "epoch": 1.2956182270983374, "grad_norm": 0.49341700040544384, "learning_rate": 7.0037949944725716e-06, "loss": 0.3681, "step": 14436 }, { "epoch": 1.2957079715510085, "grad_norm": 0.47530673736635703, "learning_rate": 7.003316594525616e-06, "loss": 0.4014, "step": 14437 }, { "epoch": 1.2957977160036795, "grad_norm": 0.5231653940638411, "learning_rate": 7.002838172730897e-06, "loss": 0.4392, "step": 14438 }, { "epoch": 1.2958874604563506, "grad_norm": 0.4850726624343419, "learning_rate": 7.002359729093626e-06, "loss": 0.3563, "step": 14439 }, { "epoch": 1.2959772049090215, "grad_norm": 0.49108931345909496, "learning_rate": 7.001881263619027e-06, "loss": 0.4355, "step": 14440 }, { "epoch": 1.2960669493616925, "grad_norm": 0.48435213046319364, "learning_rate": 7.001402776312312e-06, "loss": 0.4558, "step": 14441 }, { "epoch": 1.2961566938143636, "grad_norm": 0.5750085139805666, "learning_rate": 7.000924267178705e-06, "loss": 0.4421, "step": 14442 }, { "epoch": 1.2962464382670347, "grad_norm": 0.48579699598136583, "learning_rate": 7.000445736223419e-06, "loss": 0.3367, "step": 14443 }, { "epoch": 1.2963361827197057, "grad_norm": 0.4837266463999388, "learning_rate": 6.999967183451678e-06, "loss": 0.3631, "step": 14444 }, { "epoch": 1.2964259271723766, "grad_norm": 0.47911545659220056, "learning_rate": 6.9994886088686965e-06, "loss": 0.3727, "step": 14445 }, { "epoch": 1.2965156716250477, "grad_norm": 0.49481954325433897, "learning_rate": 6.9990100124796965e-06, "loss": 0.3507, "step": 14446 }, { "epoch": 1.2966054160777187, "grad_norm": 0.47992094710303157, "learning_rate": 6.9985313942898955e-06, "loss": 0.4162, "step": 14447 }, { "epoch": 1.2966951605303896, "grad_norm": 0.5123645886188353, "learning_rate": 6.998052754304516e-06, "loss": 0.3755, "step": 14448 }, { "epoch": 1.2967849049830606, "grad_norm": 0.4731792949627161, "learning_rate": 6.997574092528776e-06, "loss": 0.3514, "step": 14449 }, { "epoch": 1.2968746494357317, "grad_norm": 0.5062716517537992, "learning_rate": 6.997095408967897e-06, "loss": 0.337, "step": 14450 }, { "epoch": 1.2969643938884028, "grad_norm": 0.44971638728873653, "learning_rate": 6.996616703627097e-06, "loss": 0.3707, "step": 14451 }, { "epoch": 1.2970541383410739, "grad_norm": 0.49181529832389914, "learning_rate": 6.996137976511598e-06, "loss": 0.4284, "step": 14452 }, { "epoch": 1.297143882793745, "grad_norm": 0.49761839072603475, "learning_rate": 6.995659227626621e-06, "loss": 0.3795, "step": 14453 }, { "epoch": 1.2972336272464158, "grad_norm": 0.45656534419814754, "learning_rate": 6.9951804569773886e-06, "loss": 0.4002, "step": 14454 }, { "epoch": 1.2973233716990868, "grad_norm": 0.47062100454719485, "learning_rate": 6.994701664569119e-06, "loss": 0.3346, "step": 14455 }, { "epoch": 1.297413116151758, "grad_norm": 0.5009315188268395, "learning_rate": 6.994222850407036e-06, "loss": 0.4644, "step": 14456 }, { "epoch": 1.2975028606044288, "grad_norm": 0.5347557161941074, "learning_rate": 6.9937440144963616e-06, "loss": 0.4208, "step": 14457 }, { "epoch": 1.2975926050570998, "grad_norm": 0.5329985935537918, "learning_rate": 6.993265156842317e-06, "loss": 0.3835, "step": 14458 }, { "epoch": 1.297682349509771, "grad_norm": 0.4736640202312368, "learning_rate": 6.9927862774501255e-06, "loss": 0.3871, "step": 14459 }, { "epoch": 1.297772093962442, "grad_norm": 0.5070127458140753, "learning_rate": 6.992307376325008e-06, "loss": 0.4934, "step": 14460 }, { "epoch": 1.297861838415113, "grad_norm": 0.5071077341690513, "learning_rate": 6.991828453472189e-06, "loss": 0.4262, "step": 14461 }, { "epoch": 1.2979515828677841, "grad_norm": 0.49414968671615994, "learning_rate": 6.991349508896891e-06, "loss": 0.4218, "step": 14462 }, { "epoch": 1.298041327320455, "grad_norm": 0.4948668640338382, "learning_rate": 6.990870542604336e-06, "loss": 0.3779, "step": 14463 }, { "epoch": 1.298131071773126, "grad_norm": 0.5134758223086228, "learning_rate": 6.990391554599749e-06, "loss": 0.3837, "step": 14464 }, { "epoch": 1.298220816225797, "grad_norm": 0.5006293380851905, "learning_rate": 6.989912544888354e-06, "loss": 0.4255, "step": 14465 }, { "epoch": 1.298310560678468, "grad_norm": 0.5188077900492702, "learning_rate": 6.989433513475375e-06, "loss": 0.4166, "step": 14466 }, { "epoch": 1.298400305131139, "grad_norm": 0.5137035495193139, "learning_rate": 6.988954460366034e-06, "loss": 0.4424, "step": 14467 }, { "epoch": 1.29849004958381, "grad_norm": 0.5421807036680159, "learning_rate": 6.988475385565557e-06, "loss": 0.4606, "step": 14468 }, { "epoch": 1.2985797940364812, "grad_norm": 0.49422038155955017, "learning_rate": 6.98799628907917e-06, "loss": 0.3826, "step": 14469 }, { "epoch": 1.2986695384891522, "grad_norm": 0.5148938564437999, "learning_rate": 6.987517170912095e-06, "loss": 0.422, "step": 14470 }, { "epoch": 1.298759282941823, "grad_norm": 0.532611319004136, "learning_rate": 6.98703803106956e-06, "loss": 0.5261, "step": 14471 }, { "epoch": 1.2988490273944941, "grad_norm": 0.5411946875003599, "learning_rate": 6.9865588695567886e-06, "loss": 0.4022, "step": 14472 }, { "epoch": 1.2989387718471652, "grad_norm": 0.4957575387046049, "learning_rate": 6.986079686379008e-06, "loss": 0.392, "step": 14473 }, { "epoch": 1.2990285162998363, "grad_norm": 0.4879680574854316, "learning_rate": 6.985600481541441e-06, "loss": 0.4147, "step": 14474 }, { "epoch": 1.2991182607525071, "grad_norm": 0.5349507009282873, "learning_rate": 6.9851212550493185e-06, "loss": 0.3729, "step": 14475 }, { "epoch": 1.2992080052051782, "grad_norm": 0.4840169805345262, "learning_rate": 6.984642006907863e-06, "loss": 0.4103, "step": 14476 }, { "epoch": 1.2992977496578493, "grad_norm": 0.5389761456633994, "learning_rate": 6.984162737122302e-06, "loss": 0.4317, "step": 14477 }, { "epoch": 1.2993874941105203, "grad_norm": 0.5393818839285189, "learning_rate": 6.983683445697862e-06, "loss": 0.4751, "step": 14478 }, { "epoch": 1.2994772385631914, "grad_norm": 0.5343967412602029, "learning_rate": 6.983204132639771e-06, "loss": 0.408, "step": 14479 }, { "epoch": 1.2995669830158623, "grad_norm": 0.47409622330521906, "learning_rate": 6.982724797953256e-06, "loss": 0.3367, "step": 14480 }, { "epoch": 1.2996567274685333, "grad_norm": 0.5272122907146688, "learning_rate": 6.982245441643546e-06, "loss": 0.3725, "step": 14481 }, { "epoch": 1.2997464719212044, "grad_norm": 0.4867146181908789, "learning_rate": 6.981766063715866e-06, "loss": 0.4323, "step": 14482 }, { "epoch": 1.2998362163738753, "grad_norm": 0.5071731187295987, "learning_rate": 6.981286664175444e-06, "loss": 0.3608, "step": 14483 }, { "epoch": 1.2999259608265463, "grad_norm": 0.5086706312533318, "learning_rate": 6.980807243027512e-06, "loss": 0.3732, "step": 14484 }, { "epoch": 1.3000157052792174, "grad_norm": 0.44903300921373834, "learning_rate": 6.980327800277293e-06, "loss": 0.3469, "step": 14485 }, { "epoch": 1.3001054497318885, "grad_norm": 0.5284717933733791, "learning_rate": 6.9798483359300195e-06, "loss": 0.4357, "step": 14486 }, { "epoch": 1.3001951941845595, "grad_norm": 0.4916453576658046, "learning_rate": 6.979368849990921e-06, "loss": 0.3882, "step": 14487 }, { "epoch": 1.3002849386372306, "grad_norm": 0.5028621204840655, "learning_rate": 6.978889342465225e-06, "loss": 0.401, "step": 14488 }, { "epoch": 1.3003746830899015, "grad_norm": 0.5138082271882835, "learning_rate": 6.97840981335816e-06, "loss": 0.4119, "step": 14489 }, { "epoch": 1.3004644275425725, "grad_norm": 0.5121586566111396, "learning_rate": 6.977930262674957e-06, "loss": 0.4608, "step": 14490 }, { "epoch": 1.3005541719952436, "grad_norm": 0.5877305654200877, "learning_rate": 6.977450690420847e-06, "loss": 0.4473, "step": 14491 }, { "epoch": 1.3006439164479144, "grad_norm": 0.503754743441809, "learning_rate": 6.9769710966010584e-06, "loss": 0.4376, "step": 14492 }, { "epoch": 1.3007336609005855, "grad_norm": 0.5163315274337614, "learning_rate": 6.976491481220821e-06, "loss": 0.3874, "step": 14493 }, { "epoch": 1.3008234053532566, "grad_norm": 0.4838357695120437, "learning_rate": 6.976011844285367e-06, "loss": 0.3742, "step": 14494 }, { "epoch": 1.3009131498059276, "grad_norm": 0.48804353101754006, "learning_rate": 6.975532185799926e-06, "loss": 0.4211, "step": 14495 }, { "epoch": 1.3010028942585987, "grad_norm": 0.5149012562829772, "learning_rate": 6.9750525057697305e-06, "loss": 0.3905, "step": 14496 }, { "epoch": 1.3010926387112698, "grad_norm": 0.4672343934131614, "learning_rate": 6.974572804200011e-06, "loss": 0.4063, "step": 14497 }, { "epoch": 1.3011823831639406, "grad_norm": 0.47234146929615306, "learning_rate": 6.974093081095998e-06, "loss": 0.3724, "step": 14498 }, { "epoch": 1.3012721276166117, "grad_norm": 0.47408816142866705, "learning_rate": 6.973613336462924e-06, "loss": 0.3454, "step": 14499 }, { "epoch": 1.3013618720692828, "grad_norm": 0.4883823216188619, "learning_rate": 6.973133570306022e-06, "loss": 0.4126, "step": 14500 }, { "epoch": 1.3014516165219536, "grad_norm": 0.5242833304663065, "learning_rate": 6.972653782630523e-06, "loss": 0.4361, "step": 14501 }, { "epoch": 1.3015413609746247, "grad_norm": 0.5154685541676347, "learning_rate": 6.97217397344166e-06, "loss": 0.3781, "step": 14502 }, { "epoch": 1.3016311054272958, "grad_norm": 0.4633853607222498, "learning_rate": 6.971694142744666e-06, "loss": 0.354, "step": 14503 }, { "epoch": 1.3017208498799668, "grad_norm": 0.49006785741256115, "learning_rate": 6.9712142905447746e-06, "loss": 0.4266, "step": 14504 }, { "epoch": 1.301810594332638, "grad_norm": 0.5132755465312772, "learning_rate": 6.970734416847215e-06, "loss": 0.4237, "step": 14505 }, { "epoch": 1.3019003387853088, "grad_norm": 0.5000530911326827, "learning_rate": 6.970254521657227e-06, "loss": 0.4124, "step": 14506 }, { "epoch": 1.3019900832379798, "grad_norm": 0.48169838494555817, "learning_rate": 6.969774604980038e-06, "loss": 0.4018, "step": 14507 }, { "epoch": 1.302079827690651, "grad_norm": 0.5289756447389651, "learning_rate": 6.969294666820886e-06, "loss": 0.3894, "step": 14508 }, { "epoch": 1.302169572143322, "grad_norm": 0.501607787233311, "learning_rate": 6.968814707185003e-06, "loss": 0.4, "step": 14509 }, { "epoch": 1.3022593165959928, "grad_norm": 0.4883492118197117, "learning_rate": 6.9683347260776245e-06, "loss": 0.4589, "step": 14510 }, { "epoch": 1.3023490610486639, "grad_norm": 0.4919881071263466, "learning_rate": 6.967854723503984e-06, "loss": 0.4565, "step": 14511 }, { "epoch": 1.302438805501335, "grad_norm": 0.5545665210342678, "learning_rate": 6.967374699469318e-06, "loss": 0.3615, "step": 14512 }, { "epoch": 1.302528549954006, "grad_norm": 0.4701780177122071, "learning_rate": 6.966894653978859e-06, "loss": 0.3945, "step": 14513 }, { "epoch": 1.302618294406677, "grad_norm": 0.5085302770358667, "learning_rate": 6.966414587037845e-06, "loss": 0.439, "step": 14514 }, { "epoch": 1.302708038859348, "grad_norm": 0.5338905376058483, "learning_rate": 6.965934498651512e-06, "loss": 0.4435, "step": 14515 }, { "epoch": 1.302797783312019, "grad_norm": 0.4972129200844268, "learning_rate": 6.965454388825091e-06, "loss": 0.3468, "step": 14516 }, { "epoch": 1.30288752776469, "grad_norm": 0.48797295856373846, "learning_rate": 6.964974257563823e-06, "loss": 0.4526, "step": 14517 }, { "epoch": 1.302977272217361, "grad_norm": 0.527116584309632, "learning_rate": 6.964494104872941e-06, "loss": 0.3645, "step": 14518 }, { "epoch": 1.303067016670032, "grad_norm": 0.4797764055479369, "learning_rate": 6.964013930757683e-06, "loss": 0.4001, "step": 14519 }, { "epoch": 1.303156761122703, "grad_norm": 0.4796045613221375, "learning_rate": 6.963533735223287e-06, "loss": 0.3596, "step": 14520 }, { "epoch": 1.3032465055753741, "grad_norm": 0.4913727226657948, "learning_rate": 6.9630535182749855e-06, "loss": 0.3952, "step": 14521 }, { "epoch": 1.3033362500280452, "grad_norm": 0.48461587819689284, "learning_rate": 6.962573279918021e-06, "loss": 0.3924, "step": 14522 }, { "epoch": 1.3034259944807163, "grad_norm": 0.4675770808639748, "learning_rate": 6.962093020157627e-06, "loss": 0.3895, "step": 14523 }, { "epoch": 1.3035157389333871, "grad_norm": 0.4799141429574791, "learning_rate": 6.961612738999043e-06, "loss": 0.372, "step": 14524 }, { "epoch": 1.3036054833860582, "grad_norm": 0.48397919070182754, "learning_rate": 6.961132436447507e-06, "loss": 0.462, "step": 14525 }, { "epoch": 1.3036952278387293, "grad_norm": 0.5736472919998241, "learning_rate": 6.9606521125082545e-06, "loss": 0.3954, "step": 14526 }, { "epoch": 1.3037849722914001, "grad_norm": 0.47498704441679396, "learning_rate": 6.9601717671865264e-06, "loss": 0.3616, "step": 14527 }, { "epoch": 1.3038747167440712, "grad_norm": 0.49634821148812797, "learning_rate": 6.959691400487561e-06, "loss": 0.4407, "step": 14528 }, { "epoch": 1.3039644611967423, "grad_norm": 0.47204346336236, "learning_rate": 6.959211012416597e-06, "loss": 0.3899, "step": 14529 }, { "epoch": 1.3040542056494133, "grad_norm": 0.5260312687749392, "learning_rate": 6.958730602978873e-06, "loss": 0.3553, "step": 14530 }, { "epoch": 1.3041439501020844, "grad_norm": 0.48237533244403097, "learning_rate": 6.958250172179627e-06, "loss": 0.418, "step": 14531 }, { "epoch": 1.3042336945547555, "grad_norm": 0.5376501754835299, "learning_rate": 6.9577697200241014e-06, "loss": 0.4831, "step": 14532 }, { "epoch": 1.3043234390074263, "grad_norm": 0.5114227445480606, "learning_rate": 6.957289246517533e-06, "loss": 0.4214, "step": 14533 }, { "epoch": 1.3044131834600974, "grad_norm": 0.5075440600199718, "learning_rate": 6.956808751665164e-06, "loss": 0.375, "step": 14534 }, { "epoch": 1.3045029279127685, "grad_norm": 0.49723059686765747, "learning_rate": 6.956328235472234e-06, "loss": 0.3958, "step": 14535 }, { "epoch": 1.3045926723654393, "grad_norm": 0.5446201676987131, "learning_rate": 6.955847697943982e-06, "loss": 0.3892, "step": 14536 }, { "epoch": 1.3046824168181104, "grad_norm": 0.5032048096591445, "learning_rate": 6.95536713908565e-06, "loss": 0.4391, "step": 14537 }, { "epoch": 1.3047721612707814, "grad_norm": 0.5700840495881409, "learning_rate": 6.954886558902479e-06, "loss": 0.4524, "step": 14538 }, { "epoch": 1.3048619057234525, "grad_norm": 0.5100655114051458, "learning_rate": 6.954405957399708e-06, "loss": 0.3831, "step": 14539 }, { "epoch": 1.3049516501761236, "grad_norm": 0.48491734026091116, "learning_rate": 6.953925334582581e-06, "loss": 0.3489, "step": 14540 }, { "epoch": 1.3050413946287944, "grad_norm": 0.46547041438654363, "learning_rate": 6.953444690456339e-06, "loss": 0.3492, "step": 14541 }, { "epoch": 1.3051311390814655, "grad_norm": 0.5211459169021554, "learning_rate": 6.952964025026223e-06, "loss": 0.3971, "step": 14542 }, { "epoch": 1.3052208835341366, "grad_norm": 0.4860540459905141, "learning_rate": 6.952483338297476e-06, "loss": 0.4137, "step": 14543 }, { "epoch": 1.3053106279868076, "grad_norm": 0.5259731497685302, "learning_rate": 6.952002630275338e-06, "loss": 0.429, "step": 14544 }, { "epoch": 1.3054003724394785, "grad_norm": 0.4919430908517477, "learning_rate": 6.951521900965055e-06, "loss": 0.4059, "step": 14545 }, { "epoch": 1.3054901168921496, "grad_norm": 0.4556321378907865, "learning_rate": 6.951041150371867e-06, "loss": 0.382, "step": 14546 }, { "epoch": 1.3055798613448206, "grad_norm": 0.5046682394329987, "learning_rate": 6.950560378501019e-06, "loss": 0.4002, "step": 14547 }, { "epoch": 1.3056696057974917, "grad_norm": 0.5021596291614217, "learning_rate": 6.950079585357752e-06, "loss": 0.4457, "step": 14548 }, { "epoch": 1.3057593502501628, "grad_norm": 0.5021523173043116, "learning_rate": 6.949598770947311e-06, "loss": 0.449, "step": 14549 }, { "epoch": 1.3058490947028336, "grad_norm": 0.5260719513690151, "learning_rate": 6.94911793527494e-06, "loss": 0.3745, "step": 14550 }, { "epoch": 1.3059388391555047, "grad_norm": 0.46138224990931354, "learning_rate": 6.9486370783458815e-06, "loss": 0.3903, "step": 14551 }, { "epoch": 1.3060285836081758, "grad_norm": 0.4671209363782571, "learning_rate": 6.948156200165379e-06, "loss": 0.3613, "step": 14552 }, { "epoch": 1.3061183280608466, "grad_norm": 0.4974334460610564, "learning_rate": 6.94767530073868e-06, "loss": 0.3872, "step": 14553 }, { "epoch": 1.3062080725135177, "grad_norm": 0.44227635441111485, "learning_rate": 6.947194380071026e-06, "loss": 0.3561, "step": 14554 }, { "epoch": 1.3062978169661887, "grad_norm": 0.46005423058199096, "learning_rate": 6.946713438167664e-06, "loss": 0.388, "step": 14555 }, { "epoch": 1.3063875614188598, "grad_norm": 0.5275708313772322, "learning_rate": 6.946232475033837e-06, "loss": 0.3806, "step": 14556 }, { "epoch": 1.3064773058715309, "grad_norm": 0.494317160035062, "learning_rate": 6.945751490674793e-06, "loss": 0.4119, "step": 14557 }, { "epoch": 1.306567050324202, "grad_norm": 0.4871052690189757, "learning_rate": 6.945270485095774e-06, "loss": 0.4258, "step": 14558 }, { "epoch": 1.3066567947768728, "grad_norm": 0.49731953358221065, "learning_rate": 6.944789458302028e-06, "loss": 0.3688, "step": 14559 }, { "epoch": 1.3067465392295439, "grad_norm": 0.4721152211638231, "learning_rate": 6.944308410298801e-06, "loss": 0.4151, "step": 14560 }, { "epoch": 1.306836283682215, "grad_norm": 0.4741613005865984, "learning_rate": 6.943827341091339e-06, "loss": 0.3892, "step": 14561 }, { "epoch": 1.3069260281348858, "grad_norm": 0.5056738539762679, "learning_rate": 6.943346250684888e-06, "loss": 0.4022, "step": 14562 }, { "epoch": 1.3070157725875569, "grad_norm": 0.5408925009168801, "learning_rate": 6.942865139084696e-06, "loss": 0.3873, "step": 14563 }, { "epoch": 1.307105517040228, "grad_norm": 0.43824251924839386, "learning_rate": 6.942384006296007e-06, "loss": 0.3585, "step": 14564 }, { "epoch": 1.307195261492899, "grad_norm": 0.5482872178965211, "learning_rate": 6.9419028523240716e-06, "loss": 0.4676, "step": 14565 }, { "epoch": 1.30728500594557, "grad_norm": 0.5488032511053837, "learning_rate": 6.941421677174134e-06, "loss": 0.4519, "step": 14566 }, { "epoch": 1.3073747503982411, "grad_norm": 0.5207759788733805, "learning_rate": 6.9409404808514445e-06, "loss": 0.4094, "step": 14567 }, { "epoch": 1.307464494850912, "grad_norm": 0.5272978871426952, "learning_rate": 6.9404592633612486e-06, "loss": 0.3831, "step": 14568 }, { "epoch": 1.307554239303583, "grad_norm": 0.5113636884042891, "learning_rate": 6.9399780247087975e-06, "loss": 0.4493, "step": 14569 }, { "epoch": 1.3076439837562541, "grad_norm": 0.5267652714608284, "learning_rate": 6.939496764899336e-06, "loss": 0.3737, "step": 14570 }, { "epoch": 1.307733728208925, "grad_norm": 0.5553911863365278, "learning_rate": 6.939015483938116e-06, "loss": 0.4566, "step": 14571 }, { "epoch": 1.307823472661596, "grad_norm": 0.5273778891652003, "learning_rate": 6.938534181830382e-06, "loss": 0.4731, "step": 14572 }, { "epoch": 1.3079132171142671, "grad_norm": 0.5526177407530118, "learning_rate": 6.938052858581387e-06, "loss": 0.4691, "step": 14573 }, { "epoch": 1.3080029615669382, "grad_norm": 0.4941684209219966, "learning_rate": 6.937571514196377e-06, "loss": 0.4235, "step": 14574 }, { "epoch": 1.3080927060196093, "grad_norm": 0.5133403330018397, "learning_rate": 6.937090148680606e-06, "loss": 0.3915, "step": 14575 }, { "epoch": 1.30818245047228, "grad_norm": 0.49685378468038993, "learning_rate": 6.936608762039316e-06, "loss": 0.4275, "step": 14576 }, { "epoch": 1.3082721949249512, "grad_norm": 0.5031555604010084, "learning_rate": 6.936127354277765e-06, "loss": 0.425, "step": 14577 }, { "epoch": 1.3083619393776222, "grad_norm": 0.48304060620559436, "learning_rate": 6.9356459254011995e-06, "loss": 0.4166, "step": 14578 }, { "epoch": 1.3084516838302933, "grad_norm": 0.4892074441939257, "learning_rate": 6.935164475414869e-06, "loss": 0.3615, "step": 14579 }, { "epoch": 1.3085414282829642, "grad_norm": 0.4908408469234763, "learning_rate": 6.934683004324026e-06, "loss": 0.4211, "step": 14580 }, { "epoch": 1.3086311727356352, "grad_norm": 0.5115492686832128, "learning_rate": 6.934201512133921e-06, "loss": 0.3994, "step": 14581 }, { "epoch": 1.3087209171883063, "grad_norm": 0.5026081933259027, "learning_rate": 6.9337199988498046e-06, "loss": 0.3667, "step": 14582 }, { "epoch": 1.3088106616409774, "grad_norm": 0.45806352160490893, "learning_rate": 6.9332384644769276e-06, "loss": 0.3878, "step": 14583 }, { "epoch": 1.3089004060936484, "grad_norm": 0.5023152701358844, "learning_rate": 6.9327569090205425e-06, "loss": 0.446, "step": 14584 }, { "epoch": 1.3089901505463193, "grad_norm": 0.50269799605909, "learning_rate": 6.932275332485899e-06, "loss": 0.3696, "step": 14585 }, { "epoch": 1.3090798949989904, "grad_norm": 0.512608664536314, "learning_rate": 6.931793734878253e-06, "loss": 0.3947, "step": 14586 }, { "epoch": 1.3091696394516614, "grad_norm": 0.5063873055797529, "learning_rate": 6.931312116202853e-06, "loss": 0.426, "step": 14587 }, { "epoch": 1.3092593839043325, "grad_norm": 0.49282189839635926, "learning_rate": 6.930830476464954e-06, "loss": 0.3824, "step": 14588 }, { "epoch": 1.3093491283570033, "grad_norm": 0.4958500761979253, "learning_rate": 6.930348815669805e-06, "loss": 0.4078, "step": 14589 }, { "epoch": 1.3094388728096744, "grad_norm": 0.5118710115393453, "learning_rate": 6.929867133822663e-06, "loss": 0.4007, "step": 14590 }, { "epoch": 1.3095286172623455, "grad_norm": 0.5030728842642775, "learning_rate": 6.929385430928779e-06, "loss": 0.4706, "step": 14591 }, { "epoch": 1.3096183617150166, "grad_norm": 0.5402928842960832, "learning_rate": 6.928903706993408e-06, "loss": 0.3958, "step": 14592 }, { "epoch": 1.3097081061676876, "grad_norm": 0.5136979571060288, "learning_rate": 6.9284219620218e-06, "loss": 0.4099, "step": 14593 }, { "epoch": 1.3097978506203585, "grad_norm": 0.5019703193789942, "learning_rate": 6.927940196019214e-06, "loss": 0.4112, "step": 14594 }, { "epoch": 1.3098875950730295, "grad_norm": 0.48892630672421167, "learning_rate": 6.927458408990899e-06, "loss": 0.3696, "step": 14595 }, { "epoch": 1.3099773395257006, "grad_norm": 0.49746678481478324, "learning_rate": 6.926976600942114e-06, "loss": 0.3773, "step": 14596 }, { "epoch": 1.3100670839783715, "grad_norm": 0.4565130160429294, "learning_rate": 6.926494771878108e-06, "loss": 0.3461, "step": 14597 }, { "epoch": 1.3101568284310425, "grad_norm": 0.48611228145784546, "learning_rate": 6.926012921804142e-06, "loss": 0.4282, "step": 14598 }, { "epoch": 1.3102465728837136, "grad_norm": 0.4868436160741744, "learning_rate": 6.925531050725465e-06, "loss": 0.406, "step": 14599 }, { "epoch": 1.3103363173363847, "grad_norm": 0.5439468474478939, "learning_rate": 6.925049158647337e-06, "loss": 0.4146, "step": 14600 }, { "epoch": 1.3104260617890557, "grad_norm": 0.46907406742126156, "learning_rate": 6.924567245575009e-06, "loss": 0.3837, "step": 14601 }, { "epoch": 1.3105158062417268, "grad_norm": 0.544889421590574, "learning_rate": 6.924085311513742e-06, "loss": 0.3894, "step": 14602 }, { "epoch": 1.3106055506943977, "grad_norm": 0.4864864375270266, "learning_rate": 6.923603356468787e-06, "loss": 0.3776, "step": 14603 }, { "epoch": 1.3106952951470687, "grad_norm": 0.4758959951767869, "learning_rate": 6.923121380445401e-06, "loss": 0.3967, "step": 14604 }, { "epoch": 1.3107850395997398, "grad_norm": 0.5221312169321152, "learning_rate": 6.922639383448843e-06, "loss": 0.4049, "step": 14605 }, { "epoch": 1.3108747840524106, "grad_norm": 0.4785655238313306, "learning_rate": 6.922157365484368e-06, "loss": 0.3802, "step": 14606 }, { "epoch": 1.3109645285050817, "grad_norm": 0.47140288883800807, "learning_rate": 6.9216753265572314e-06, "loss": 0.3266, "step": 14607 }, { "epoch": 1.3110542729577528, "grad_norm": 0.4893469137340002, "learning_rate": 6.921193266672691e-06, "loss": 0.4667, "step": 14608 }, { "epoch": 1.3111440174104239, "grad_norm": 0.50321081281261, "learning_rate": 6.920711185836005e-06, "loss": 0.376, "step": 14609 }, { "epoch": 1.311233761863095, "grad_norm": 0.5206991374395039, "learning_rate": 6.920229084052431e-06, "loss": 0.4612, "step": 14610 }, { "epoch": 1.3113235063157658, "grad_norm": 0.5442305398195352, "learning_rate": 6.919746961327227e-06, "loss": 0.4375, "step": 14611 }, { "epoch": 1.3114132507684368, "grad_norm": 0.5082576179753094, "learning_rate": 6.919264817665648e-06, "loss": 0.3978, "step": 14612 }, { "epoch": 1.311502995221108, "grad_norm": 0.4955416606588414, "learning_rate": 6.918782653072955e-06, "loss": 0.3742, "step": 14613 }, { "epoch": 1.311592739673779, "grad_norm": 0.5461695696582912, "learning_rate": 6.918300467554406e-06, "loss": 0.3856, "step": 14614 }, { "epoch": 1.3116824841264498, "grad_norm": 0.49553289311214316, "learning_rate": 6.917818261115259e-06, "loss": 0.3689, "step": 14615 }, { "epoch": 1.311772228579121, "grad_norm": 0.48061605007651637, "learning_rate": 6.917336033760773e-06, "loss": 0.3729, "step": 14616 }, { "epoch": 1.311861973031792, "grad_norm": 0.4735578063143355, "learning_rate": 6.916853785496207e-06, "loss": 0.392, "step": 14617 }, { "epoch": 1.311951717484463, "grad_norm": 0.496914894293394, "learning_rate": 6.916371516326821e-06, "loss": 0.3408, "step": 14618 }, { "epoch": 1.3120414619371341, "grad_norm": 0.42981873216404415, "learning_rate": 6.915889226257875e-06, "loss": 0.4003, "step": 14619 }, { "epoch": 1.312131206389805, "grad_norm": 0.468145230442157, "learning_rate": 6.915406915294625e-06, "loss": 0.3472, "step": 14620 }, { "epoch": 1.312220950842476, "grad_norm": 0.5242878367328646, "learning_rate": 6.914924583442335e-06, "loss": 0.3949, "step": 14621 }, { "epoch": 1.312310695295147, "grad_norm": 0.49975840383041853, "learning_rate": 6.914442230706264e-06, "loss": 0.3835, "step": 14622 }, { "epoch": 1.3124004397478182, "grad_norm": 0.461241315194487, "learning_rate": 6.9139598570916724e-06, "loss": 0.3663, "step": 14623 }, { "epoch": 1.312490184200489, "grad_norm": 0.5214455600077382, "learning_rate": 6.913477462603821e-06, "loss": 0.464, "step": 14624 }, { "epoch": 1.31257992865316, "grad_norm": 0.5583711324560744, "learning_rate": 6.91299504724797e-06, "loss": 0.4481, "step": 14625 }, { "epoch": 1.3126696731058312, "grad_norm": 0.48318711475577536, "learning_rate": 6.912512611029381e-06, "loss": 0.3649, "step": 14626 }, { "epoch": 1.3127594175585022, "grad_norm": 0.481034968382384, "learning_rate": 6.912030153953315e-06, "loss": 0.3303, "step": 14627 }, { "epoch": 1.3128491620111733, "grad_norm": 0.4392953225500178, "learning_rate": 6.9115476760250344e-06, "loss": 0.4101, "step": 14628 }, { "epoch": 1.3129389064638441, "grad_norm": 0.49579048600796216, "learning_rate": 6.911065177249802e-06, "loss": 0.3663, "step": 14629 }, { "epoch": 1.3130286509165152, "grad_norm": 0.4936931585096637, "learning_rate": 6.910582657632876e-06, "loss": 0.4015, "step": 14630 }, { "epoch": 1.3131183953691863, "grad_norm": 0.4854905831772776, "learning_rate": 6.910100117179523e-06, "loss": 0.309, "step": 14631 }, { "epoch": 1.3132081398218571, "grad_norm": 0.4918671898343853, "learning_rate": 6.909617555895001e-06, "loss": 0.4322, "step": 14632 }, { "epoch": 1.3132978842745282, "grad_norm": 0.582786076409516, "learning_rate": 6.909134973784579e-06, "loss": 0.3903, "step": 14633 }, { "epoch": 1.3133876287271993, "grad_norm": 0.46257568997773946, "learning_rate": 6.908652370853514e-06, "loss": 0.3794, "step": 14634 }, { "epoch": 1.3134773731798703, "grad_norm": 0.5231690280978376, "learning_rate": 6.9081697471070716e-06, "loss": 0.3729, "step": 14635 }, { "epoch": 1.3135671176325414, "grad_norm": 0.49328289462174374, "learning_rate": 6.907687102550515e-06, "loss": 0.4101, "step": 14636 }, { "epoch": 1.3136568620852125, "grad_norm": 0.5318029472802427, "learning_rate": 6.907204437189108e-06, "loss": 0.402, "step": 14637 }, { "epoch": 1.3137466065378833, "grad_norm": 0.47148001788194716, "learning_rate": 6.906721751028114e-06, "loss": 0.3891, "step": 14638 }, { "epoch": 1.3138363509905544, "grad_norm": 0.5049724177457106, "learning_rate": 6.906239044072796e-06, "loss": 0.3788, "step": 14639 }, { "epoch": 1.3139260954432255, "grad_norm": 0.5215997177004413, "learning_rate": 6.905756316328422e-06, "loss": 0.4382, "step": 14640 }, { "epoch": 1.3140158398958963, "grad_norm": 0.5155394968457347, "learning_rate": 6.905273567800254e-06, "loss": 0.4206, "step": 14641 }, { "epoch": 1.3141055843485674, "grad_norm": 0.5012494098348925, "learning_rate": 6.9047907984935545e-06, "loss": 0.3577, "step": 14642 }, { "epoch": 1.3141953288012385, "grad_norm": 0.5119534127895035, "learning_rate": 6.904308008413594e-06, "loss": 0.4395, "step": 14643 }, { "epoch": 1.3142850732539095, "grad_norm": 0.5219859691241958, "learning_rate": 6.903825197565632e-06, "loss": 0.4306, "step": 14644 }, { "epoch": 1.3143748177065806, "grad_norm": 0.5238264424963064, "learning_rate": 6.903342365954939e-06, "loss": 0.3649, "step": 14645 }, { "epoch": 1.3144645621592514, "grad_norm": 0.47061981763776806, "learning_rate": 6.902859513586777e-06, "loss": 0.3751, "step": 14646 }, { "epoch": 1.3145543066119225, "grad_norm": 0.5318234081689137, "learning_rate": 6.902376640466413e-06, "loss": 0.4254, "step": 14647 }, { "epoch": 1.3146440510645936, "grad_norm": 0.47378033086108107, "learning_rate": 6.901893746599113e-06, "loss": 0.3673, "step": 14648 }, { "epoch": 1.3147337955172647, "grad_norm": 0.4705984088386558, "learning_rate": 6.901410831990145e-06, "loss": 0.3822, "step": 14649 }, { "epoch": 1.3148235399699355, "grad_norm": 0.5181304566133464, "learning_rate": 6.900927896644773e-06, "loss": 0.4096, "step": 14650 }, { "epoch": 1.3149132844226066, "grad_norm": 0.5257980779353433, "learning_rate": 6.900444940568265e-06, "loss": 0.3649, "step": 14651 }, { "epoch": 1.3150030288752776, "grad_norm": 0.4960774426387867, "learning_rate": 6.899961963765889e-06, "loss": 0.3875, "step": 14652 }, { "epoch": 1.3150927733279487, "grad_norm": 0.4758076138090282, "learning_rate": 6.8994789662429095e-06, "loss": 0.3795, "step": 14653 }, { "epoch": 1.3151825177806198, "grad_norm": 0.513178024654676, "learning_rate": 6.8989959480045965e-06, "loss": 0.3383, "step": 14654 }, { "epoch": 1.3152722622332906, "grad_norm": 0.47744447253023514, "learning_rate": 6.898512909056218e-06, "loss": 0.4254, "step": 14655 }, { "epoch": 1.3153620066859617, "grad_norm": 0.4996162550233573, "learning_rate": 6.898029849403039e-06, "loss": 0.4091, "step": 14656 }, { "epoch": 1.3154517511386328, "grad_norm": 0.5091154592597462, "learning_rate": 6.897546769050331e-06, "loss": 0.3753, "step": 14657 }, { "epoch": 1.3155414955913038, "grad_norm": 0.5208732723797277, "learning_rate": 6.89706366800336e-06, "loss": 0.3974, "step": 14658 }, { "epoch": 1.3156312400439747, "grad_norm": 0.5615264595677277, "learning_rate": 6.896580546267395e-06, "loss": 0.3823, "step": 14659 }, { "epoch": 1.3157209844966458, "grad_norm": 0.5279625729123176, "learning_rate": 6.896097403847705e-06, "loss": 0.4284, "step": 14660 }, { "epoch": 1.3158107289493168, "grad_norm": 0.5238174360121622, "learning_rate": 6.895614240749562e-06, "loss": 0.405, "step": 14661 }, { "epoch": 1.315900473401988, "grad_norm": 0.5222310766541254, "learning_rate": 6.89513105697823e-06, "loss": 0.4094, "step": 14662 }, { "epoch": 1.315990217854659, "grad_norm": 0.46352038303619086, "learning_rate": 6.894647852538983e-06, "loss": 0.3797, "step": 14663 }, { "epoch": 1.3160799623073298, "grad_norm": 0.5015884992858263, "learning_rate": 6.894164627437087e-06, "loss": 0.4457, "step": 14664 }, { "epoch": 1.316169706760001, "grad_norm": 0.5619762019461888, "learning_rate": 6.893681381677815e-06, "loss": 0.3642, "step": 14665 }, { "epoch": 1.316259451212672, "grad_norm": 0.4614693791533808, "learning_rate": 6.8931981152664354e-06, "loss": 0.4017, "step": 14666 }, { "epoch": 1.3163491956653428, "grad_norm": 0.5313844215106828, "learning_rate": 6.8927148282082194e-06, "loss": 0.4331, "step": 14667 }, { "epoch": 1.3164389401180139, "grad_norm": 0.5140851841908416, "learning_rate": 6.892231520508438e-06, "loss": 0.437, "step": 14668 }, { "epoch": 1.316528684570685, "grad_norm": 0.5211255115079041, "learning_rate": 6.891748192172361e-06, "loss": 0.4124, "step": 14669 }, { "epoch": 1.316618429023356, "grad_norm": 0.507608604929261, "learning_rate": 6.891264843205259e-06, "loss": 0.4335, "step": 14670 }, { "epoch": 1.316708173476027, "grad_norm": 0.53375662065594, "learning_rate": 6.890781473612404e-06, "loss": 0.3802, "step": 14671 }, { "epoch": 1.3167979179286982, "grad_norm": 0.5039550209250069, "learning_rate": 6.890298083399069e-06, "loss": 0.3861, "step": 14672 }, { "epoch": 1.316887662381369, "grad_norm": 0.5402704935555661, "learning_rate": 6.889814672570524e-06, "loss": 0.49, "step": 14673 }, { "epoch": 1.31697740683404, "grad_norm": 0.5025473662445048, "learning_rate": 6.889331241132043e-06, "loss": 0.3818, "step": 14674 }, { "epoch": 1.3170671512867111, "grad_norm": 0.5035629529301475, "learning_rate": 6.888847789088895e-06, "loss": 0.4348, "step": 14675 }, { "epoch": 1.317156895739382, "grad_norm": 0.5125811431724254, "learning_rate": 6.888364316446354e-06, "loss": 0.3769, "step": 14676 }, { "epoch": 1.317246640192053, "grad_norm": 0.5029477671947472, "learning_rate": 6.887880823209694e-06, "loss": 0.4476, "step": 14677 }, { "epoch": 1.3173363846447241, "grad_norm": 0.5410712445010669, "learning_rate": 6.887397309384188e-06, "loss": 0.3674, "step": 14678 }, { "epoch": 1.3174261290973952, "grad_norm": 0.5250465513354595, "learning_rate": 6.886913774975105e-06, "loss": 0.4245, "step": 14679 }, { "epoch": 1.3175158735500663, "grad_norm": 0.4964014434992667, "learning_rate": 6.886430219987722e-06, "loss": 0.3417, "step": 14680 }, { "epoch": 1.3176056180027373, "grad_norm": 0.4668028527659532, "learning_rate": 6.885946644427312e-06, "loss": 0.3765, "step": 14681 }, { "epoch": 1.3176953624554082, "grad_norm": 0.5336933802948702, "learning_rate": 6.885463048299149e-06, "loss": 0.3911, "step": 14682 }, { "epoch": 1.3177851069080793, "grad_norm": 0.4710795363488096, "learning_rate": 6.884979431608505e-06, "loss": 0.3989, "step": 14683 }, { "epoch": 1.3178748513607503, "grad_norm": 0.5267748556743561, "learning_rate": 6.884495794360657e-06, "loss": 0.3876, "step": 14684 }, { "epoch": 1.3179645958134212, "grad_norm": 0.5189128174390462, "learning_rate": 6.884012136560878e-06, "loss": 0.4232, "step": 14685 }, { "epoch": 1.3180543402660923, "grad_norm": 0.44890795950658696, "learning_rate": 6.883528458214442e-06, "loss": 0.3903, "step": 14686 }, { "epoch": 1.3181440847187633, "grad_norm": 0.5281781227782095, "learning_rate": 6.883044759326625e-06, "loss": 0.4216, "step": 14687 }, { "epoch": 1.3182338291714344, "grad_norm": 0.6110120081437906, "learning_rate": 6.882561039902703e-06, "loss": 0.4606, "step": 14688 }, { "epoch": 1.3183235736241055, "grad_norm": 0.5096905387565116, "learning_rate": 6.882077299947949e-06, "loss": 0.3914, "step": 14689 }, { "epoch": 1.3184133180767763, "grad_norm": 0.4733037816920637, "learning_rate": 6.88159353946764e-06, "loss": 0.3882, "step": 14690 }, { "epoch": 1.3185030625294474, "grad_norm": 0.5221546109155683, "learning_rate": 6.8811097584670515e-06, "loss": 0.4619, "step": 14691 }, { "epoch": 1.3185928069821184, "grad_norm": 0.48217757570215514, "learning_rate": 6.88062595695146e-06, "loss": 0.3707, "step": 14692 }, { "epoch": 1.3186825514347895, "grad_norm": 0.4893970695685025, "learning_rate": 6.88014213492614e-06, "loss": 0.4518, "step": 14693 }, { "epoch": 1.3187722958874604, "grad_norm": 0.5058433978445493, "learning_rate": 6.879658292396372e-06, "loss": 0.4082, "step": 14694 }, { "epoch": 1.3188620403401314, "grad_norm": 0.5217890015624002, "learning_rate": 6.879174429367426e-06, "loss": 0.3837, "step": 14695 }, { "epoch": 1.3189517847928025, "grad_norm": 0.4685574897817725, "learning_rate": 6.8786905458445865e-06, "loss": 0.369, "step": 14696 }, { "epoch": 1.3190415292454736, "grad_norm": 0.47848708371791254, "learning_rate": 6.878206641833125e-06, "loss": 0.3459, "step": 14697 }, { "epoch": 1.3191312736981446, "grad_norm": 0.49345183349274496, "learning_rate": 6.877722717338322e-06, "loss": 0.4924, "step": 14698 }, { "epoch": 1.3192210181508155, "grad_norm": 0.5614179733179141, "learning_rate": 6.877238772365453e-06, "loss": 0.4082, "step": 14699 }, { "epoch": 1.3193107626034866, "grad_norm": 0.48972551838492745, "learning_rate": 6.876754806919798e-06, "loss": 0.3853, "step": 14700 }, { "epoch": 1.3194005070561576, "grad_norm": 0.5009920101475617, "learning_rate": 6.876270821006632e-06, "loss": 0.3876, "step": 14701 }, { "epoch": 1.3194902515088285, "grad_norm": 0.5003422035969516, "learning_rate": 6.875786814631237e-06, "loss": 0.3611, "step": 14702 }, { "epoch": 1.3195799959614996, "grad_norm": 0.48045207531189676, "learning_rate": 6.875302787798888e-06, "loss": 0.4122, "step": 14703 }, { "epoch": 1.3196697404141706, "grad_norm": 0.5418064135858136, "learning_rate": 6.874818740514866e-06, "loss": 0.403, "step": 14704 }, { "epoch": 1.3197594848668417, "grad_norm": 0.48559163215766704, "learning_rate": 6.874334672784448e-06, "loss": 0.3596, "step": 14705 }, { "epoch": 1.3198492293195128, "grad_norm": 0.5317985942645624, "learning_rate": 6.873850584612915e-06, "loss": 0.4269, "step": 14706 }, { "epoch": 1.3199389737721838, "grad_norm": 0.5148328707626342, "learning_rate": 6.8733664760055464e-06, "loss": 0.3962, "step": 14707 }, { "epoch": 1.3200287182248547, "grad_norm": 0.4605774939666527, "learning_rate": 6.87288234696762e-06, "loss": 0.4264, "step": 14708 }, { "epoch": 1.3201184626775258, "grad_norm": 0.501950926551585, "learning_rate": 6.872398197504417e-06, "loss": 0.3716, "step": 14709 }, { "epoch": 1.3202082071301968, "grad_norm": 0.4810408438434385, "learning_rate": 6.871914027621219e-06, "loss": 0.4106, "step": 14710 }, { "epoch": 1.3202979515828677, "grad_norm": 0.5539591940985802, "learning_rate": 6.871429837323302e-06, "loss": 0.4581, "step": 14711 }, { "epoch": 1.3203876960355387, "grad_norm": 0.5329168066669366, "learning_rate": 6.870945626615948e-06, "loss": 0.4494, "step": 14712 }, { "epoch": 1.3204774404882098, "grad_norm": 0.5662235954185051, "learning_rate": 6.870461395504441e-06, "loss": 0.408, "step": 14713 }, { "epoch": 1.3205671849408809, "grad_norm": 0.5067050461321346, "learning_rate": 6.869977143994058e-06, "loss": 0.4176, "step": 14714 }, { "epoch": 1.320656929393552, "grad_norm": 0.5048422466400417, "learning_rate": 6.869492872090082e-06, "loss": 0.4146, "step": 14715 }, { "epoch": 1.320746673846223, "grad_norm": 0.4914373958188531, "learning_rate": 6.869008579797794e-06, "loss": 0.383, "step": 14716 }, { "epoch": 1.3208364182988939, "grad_norm": 0.4654925134720682, "learning_rate": 6.868524267122476e-06, "loss": 0.3557, "step": 14717 }, { "epoch": 1.320926162751565, "grad_norm": 0.4838531923447415, "learning_rate": 6.868039934069408e-06, "loss": 0.4474, "step": 14718 }, { "epoch": 1.321015907204236, "grad_norm": 0.5393447226631642, "learning_rate": 6.867555580643875e-06, "loss": 0.4097, "step": 14719 }, { "epoch": 1.3211056516569069, "grad_norm": 0.5216930143793497, "learning_rate": 6.867071206851157e-06, "loss": 0.3756, "step": 14720 }, { "epoch": 1.321195396109578, "grad_norm": 0.49820327847794577, "learning_rate": 6.8665868126965385e-06, "loss": 0.4066, "step": 14721 }, { "epoch": 1.321285140562249, "grad_norm": 0.4498891376342617, "learning_rate": 6.866102398185299e-06, "loss": 0.3624, "step": 14722 }, { "epoch": 1.32137488501492, "grad_norm": 0.48072192547920617, "learning_rate": 6.865617963322724e-06, "loss": 0.3331, "step": 14723 }, { "epoch": 1.3214646294675911, "grad_norm": 0.4382909290187896, "learning_rate": 6.865133508114096e-06, "loss": 0.3729, "step": 14724 }, { "epoch": 1.321554373920262, "grad_norm": 0.4981860006574982, "learning_rate": 6.8646490325646985e-06, "loss": 0.4089, "step": 14725 }, { "epoch": 1.321644118372933, "grad_norm": 0.5189468831421713, "learning_rate": 6.864164536679814e-06, "loss": 0.4799, "step": 14726 }, { "epoch": 1.3217338628256041, "grad_norm": 0.5540701458391495, "learning_rate": 6.863680020464729e-06, "loss": 0.4669, "step": 14727 }, { "epoch": 1.3218236072782752, "grad_norm": 0.5516307342097256, "learning_rate": 6.863195483924724e-06, "loss": 0.4534, "step": 14728 }, { "epoch": 1.321913351730946, "grad_norm": 0.5265996611859388, "learning_rate": 6.862710927065086e-06, "loss": 0.4468, "step": 14729 }, { "epoch": 1.322003096183617, "grad_norm": 0.4944355345880204, "learning_rate": 6.862226349891098e-06, "loss": 0.3901, "step": 14730 }, { "epoch": 1.3220928406362882, "grad_norm": 0.48050053223691475, "learning_rate": 6.861741752408047e-06, "loss": 0.3831, "step": 14731 }, { "epoch": 1.3221825850889593, "grad_norm": 0.47889384559824927, "learning_rate": 6.8612571346212135e-06, "loss": 0.4304, "step": 14732 }, { "epoch": 1.3222723295416303, "grad_norm": 0.5333065985273794, "learning_rate": 6.860772496535887e-06, "loss": 0.4145, "step": 14733 }, { "epoch": 1.3223620739943012, "grad_norm": 0.5414186959209155, "learning_rate": 6.860287838157349e-06, "loss": 0.4294, "step": 14734 }, { "epoch": 1.3224518184469722, "grad_norm": 0.4832514884829815, "learning_rate": 6.859803159490889e-06, "loss": 0.3815, "step": 14735 }, { "epoch": 1.3225415628996433, "grad_norm": 0.4903252673406071, "learning_rate": 6.85931846054179e-06, "loss": 0.368, "step": 14736 }, { "epoch": 1.3226313073523142, "grad_norm": 0.47002288734706604, "learning_rate": 6.858833741315341e-06, "loss": 0.3992, "step": 14737 }, { "epoch": 1.3227210518049852, "grad_norm": 0.5682513739979591, "learning_rate": 6.858349001816824e-06, "loss": 0.3644, "step": 14738 }, { "epoch": 1.3228107962576563, "grad_norm": 0.4677696838340735, "learning_rate": 6.85786424205153e-06, "loss": 0.384, "step": 14739 }, { "epoch": 1.3229005407103274, "grad_norm": 0.5701300958396405, "learning_rate": 6.857379462024741e-06, "loss": 0.3472, "step": 14740 }, { "epoch": 1.3229902851629984, "grad_norm": 0.45478861186915837, "learning_rate": 6.856894661741749e-06, "loss": 0.351, "step": 14741 }, { "epoch": 1.3230800296156695, "grad_norm": 0.4816995534892128, "learning_rate": 6.856409841207836e-06, "loss": 0.4347, "step": 14742 }, { "epoch": 1.3231697740683404, "grad_norm": 0.463811537641026, "learning_rate": 6.855925000428294e-06, "loss": 0.4026, "step": 14743 }, { "epoch": 1.3232595185210114, "grad_norm": 0.5303457024455562, "learning_rate": 6.8554401394084084e-06, "loss": 0.409, "step": 14744 }, { "epoch": 1.3233492629736825, "grad_norm": 0.5187940947378146, "learning_rate": 6.854955258153466e-06, "loss": 0.4604, "step": 14745 }, { "epoch": 1.3234390074263533, "grad_norm": 0.5258034659288163, "learning_rate": 6.854470356668757e-06, "loss": 0.3936, "step": 14746 }, { "epoch": 1.3235287518790244, "grad_norm": 0.49284120174678314, "learning_rate": 6.853985434959567e-06, "loss": 0.3775, "step": 14747 }, { "epoch": 1.3236184963316955, "grad_norm": 0.46892163672258425, "learning_rate": 6.8535004930311874e-06, "loss": 0.3822, "step": 14748 }, { "epoch": 1.3237082407843666, "grad_norm": 0.5097992476394048, "learning_rate": 6.853015530888905e-06, "loss": 0.3625, "step": 14749 }, { "epoch": 1.3237979852370376, "grad_norm": 0.4497264411710268, "learning_rate": 6.8525305485380085e-06, "loss": 0.342, "step": 14750 }, { "epoch": 1.3238877296897087, "grad_norm": 0.46792895726599876, "learning_rate": 6.852045545983787e-06, "loss": 0.3882, "step": 14751 }, { "epoch": 1.3239774741423795, "grad_norm": 0.5228048336056815, "learning_rate": 6.851560523231533e-06, "loss": 0.4636, "step": 14752 }, { "epoch": 1.3240672185950506, "grad_norm": 0.5287487140932832, "learning_rate": 6.851075480286531e-06, "loss": 0.4263, "step": 14753 }, { "epoch": 1.3241569630477217, "grad_norm": 0.525788719391982, "learning_rate": 6.850590417154075e-06, "loss": 0.4649, "step": 14754 }, { "epoch": 1.3242467075003925, "grad_norm": 0.5543733646001057, "learning_rate": 6.850105333839452e-06, "loss": 0.3687, "step": 14755 }, { "epoch": 1.3243364519530636, "grad_norm": 0.46512477721517664, "learning_rate": 6.8496202303479556e-06, "loss": 0.3584, "step": 14756 }, { "epoch": 1.3244261964057347, "grad_norm": 0.4951425025460433, "learning_rate": 6.849135106684872e-06, "loss": 0.4143, "step": 14757 }, { "epoch": 1.3245159408584057, "grad_norm": 0.518187387071898, "learning_rate": 6.848649962855496e-06, "loss": 0.4143, "step": 14758 }, { "epoch": 1.3246056853110768, "grad_norm": 0.4984119907811891, "learning_rate": 6.848164798865115e-06, "loss": 0.4246, "step": 14759 }, { "epoch": 1.3246954297637477, "grad_norm": 0.530056929435965, "learning_rate": 6.847679614719023e-06, "loss": 0.4145, "step": 14760 }, { "epoch": 1.3247851742164187, "grad_norm": 0.5226682346227991, "learning_rate": 6.847194410422509e-06, "loss": 0.3908, "step": 14761 }, { "epoch": 1.3248749186690898, "grad_norm": 0.5024341507798675, "learning_rate": 6.846709185980866e-06, "loss": 0.3992, "step": 14762 }, { "epoch": 1.3249646631217609, "grad_norm": 0.6163900661731669, "learning_rate": 6.846223941399384e-06, "loss": 0.4854, "step": 14763 }, { "epoch": 1.3250544075744317, "grad_norm": 0.5601584521724673, "learning_rate": 6.845738676683358e-06, "loss": 0.4227, "step": 14764 }, { "epoch": 1.3251441520271028, "grad_norm": 0.5002019767978229, "learning_rate": 6.845253391838077e-06, "loss": 0.444, "step": 14765 }, { "epoch": 1.3252338964797739, "grad_norm": 0.5275123661200815, "learning_rate": 6.8447680868688345e-06, "loss": 0.4496, "step": 14766 }, { "epoch": 1.325323640932445, "grad_norm": 0.4847843531649323, "learning_rate": 6.844282761780926e-06, "loss": 0.4017, "step": 14767 }, { "epoch": 1.325413385385116, "grad_norm": 0.503371035868965, "learning_rate": 6.8437974165796394e-06, "loss": 0.3743, "step": 14768 }, { "epoch": 1.3255031298377868, "grad_norm": 0.5148012042765434, "learning_rate": 6.843312051270271e-06, "loss": 0.4363, "step": 14769 }, { "epoch": 1.325592874290458, "grad_norm": 0.5113816167163822, "learning_rate": 6.842826665858114e-06, "loss": 0.4198, "step": 14770 }, { "epoch": 1.325682618743129, "grad_norm": 0.493083750053047, "learning_rate": 6.842341260348461e-06, "loss": 0.3515, "step": 14771 }, { "epoch": 1.3257723631957998, "grad_norm": 0.4463238508138924, "learning_rate": 6.8418558347466055e-06, "loss": 0.3479, "step": 14772 }, { "epoch": 1.325862107648471, "grad_norm": 0.46579642797313875, "learning_rate": 6.841370389057842e-06, "loss": 0.386, "step": 14773 }, { "epoch": 1.325951852101142, "grad_norm": 0.48964609320666946, "learning_rate": 6.840884923287465e-06, "loss": 0.3608, "step": 14774 }, { "epoch": 1.326041596553813, "grad_norm": 0.46042346830249165, "learning_rate": 6.840399437440768e-06, "loss": 0.3729, "step": 14775 }, { "epoch": 1.326131341006484, "grad_norm": 0.5103723480220971, "learning_rate": 6.839913931523048e-06, "loss": 0.4143, "step": 14776 }, { "epoch": 1.3262210854591552, "grad_norm": 0.5407375469335951, "learning_rate": 6.839428405539595e-06, "loss": 0.3994, "step": 14777 }, { "epoch": 1.326310829911826, "grad_norm": 0.49441276611503204, "learning_rate": 6.8389428594957095e-06, "loss": 0.3445, "step": 14778 }, { "epoch": 1.326400574364497, "grad_norm": 0.4661496806937745, "learning_rate": 6.838457293396682e-06, "loss": 0.3962, "step": 14779 }, { "epoch": 1.3264903188171682, "grad_norm": 0.5288093462370402, "learning_rate": 6.837971707247813e-06, "loss": 0.4146, "step": 14780 }, { "epoch": 1.326580063269839, "grad_norm": 0.5139110458896704, "learning_rate": 6.837486101054393e-06, "loss": 0.3802, "step": 14781 }, { "epoch": 1.32666980772251, "grad_norm": 0.46878865879043835, "learning_rate": 6.837000474821721e-06, "loss": 0.353, "step": 14782 }, { "epoch": 1.3267595521751812, "grad_norm": 0.49600647710815465, "learning_rate": 6.836514828555092e-06, "loss": 0.4777, "step": 14783 }, { "epoch": 1.3268492966278522, "grad_norm": 0.5346610267886169, "learning_rate": 6.836029162259805e-06, "loss": 0.4193, "step": 14784 }, { "epoch": 1.3269390410805233, "grad_norm": 0.4781810887836382, "learning_rate": 6.835543475941152e-06, "loss": 0.3987, "step": 14785 }, { "epoch": 1.3270287855331944, "grad_norm": 0.4934108167416928, "learning_rate": 6.835057769604434e-06, "loss": 0.384, "step": 14786 }, { "epoch": 1.3271185299858652, "grad_norm": 0.5111642341842203, "learning_rate": 6.834572043254946e-06, "loss": 0.3846, "step": 14787 }, { "epoch": 1.3272082744385363, "grad_norm": 0.504749891887686, "learning_rate": 6.834086296897985e-06, "loss": 0.354, "step": 14788 }, { "epoch": 1.3272980188912074, "grad_norm": 0.46000138529020995, "learning_rate": 6.833600530538849e-06, "loss": 0.3663, "step": 14789 }, { "epoch": 1.3273877633438782, "grad_norm": 0.5073562312659249, "learning_rate": 6.833114744182836e-06, "loss": 0.3949, "step": 14790 }, { "epoch": 1.3274775077965493, "grad_norm": 0.4835066110384632, "learning_rate": 6.832628937835244e-06, "loss": 0.4068, "step": 14791 }, { "epoch": 1.3275672522492203, "grad_norm": 0.4652265005619293, "learning_rate": 6.8321431115013685e-06, "loss": 0.3641, "step": 14792 }, { "epoch": 1.3276569967018914, "grad_norm": 0.49873628227261196, "learning_rate": 6.831657265186512e-06, "loss": 0.409, "step": 14793 }, { "epoch": 1.3277467411545625, "grad_norm": 0.5099297555232961, "learning_rate": 6.831171398895969e-06, "loss": 0.3782, "step": 14794 }, { "epoch": 1.3278364856072333, "grad_norm": 0.565115940401094, "learning_rate": 6.830685512635042e-06, "loss": 0.4471, "step": 14795 }, { "epoch": 1.3279262300599044, "grad_norm": 0.5037083126610324, "learning_rate": 6.830199606409028e-06, "loss": 0.3807, "step": 14796 }, { "epoch": 1.3280159745125755, "grad_norm": 0.49512085149518037, "learning_rate": 6.829713680223226e-06, "loss": 0.4192, "step": 14797 }, { "epoch": 1.3281057189652465, "grad_norm": 0.4806161786202851, "learning_rate": 6.829227734082936e-06, "loss": 0.3767, "step": 14798 }, { "epoch": 1.3281954634179174, "grad_norm": 0.4674260348032051, "learning_rate": 6.828741767993456e-06, "loss": 0.3753, "step": 14799 }, { "epoch": 1.3282852078705885, "grad_norm": 0.45446767523024967, "learning_rate": 6.828255781960089e-06, "loss": 0.4156, "step": 14800 }, { "epoch": 1.3283749523232595, "grad_norm": 0.5192624455191067, "learning_rate": 6.827769775988134e-06, "loss": 0.3739, "step": 14801 }, { "epoch": 1.3284646967759306, "grad_norm": 0.5130030836735072, "learning_rate": 6.827283750082889e-06, "loss": 0.3611, "step": 14802 }, { "epoch": 1.3285544412286017, "grad_norm": 0.5061102523111205, "learning_rate": 6.826797704249657e-06, "loss": 0.3823, "step": 14803 }, { "epoch": 1.3286441856812725, "grad_norm": 0.47192433721550286, "learning_rate": 6.826311638493737e-06, "loss": 0.3632, "step": 14804 }, { "epoch": 1.3287339301339436, "grad_norm": 0.4511051759600315, "learning_rate": 6.825825552820432e-06, "loss": 0.3424, "step": 14805 }, { "epoch": 1.3288236745866147, "grad_norm": 0.4698973323599311, "learning_rate": 6.825339447235041e-06, "loss": 0.3894, "step": 14806 }, { "epoch": 1.3289134190392855, "grad_norm": 0.4425533860832947, "learning_rate": 6.824853321742868e-06, "loss": 0.3755, "step": 14807 }, { "epoch": 1.3290031634919566, "grad_norm": 0.5073979222589499, "learning_rate": 6.824367176349211e-06, "loss": 0.4167, "step": 14808 }, { "epoch": 1.3290929079446276, "grad_norm": 0.5138756720301175, "learning_rate": 6.823881011059374e-06, "loss": 0.407, "step": 14809 }, { "epoch": 1.3291826523972987, "grad_norm": 0.5322281527114637, "learning_rate": 6.823394825878659e-06, "loss": 0.41, "step": 14810 }, { "epoch": 1.3292723968499698, "grad_norm": 0.4988165983769323, "learning_rate": 6.82290862081237e-06, "loss": 0.3986, "step": 14811 }, { "epoch": 1.3293621413026409, "grad_norm": 0.4802706396286737, "learning_rate": 6.822422395865804e-06, "loss": 0.3662, "step": 14812 }, { "epoch": 1.3294518857553117, "grad_norm": 0.5033251502709568, "learning_rate": 6.8219361510442696e-06, "loss": 0.4255, "step": 14813 }, { "epoch": 1.3295416302079828, "grad_norm": 0.5010021566003932, "learning_rate": 6.821449886353066e-06, "loss": 0.465, "step": 14814 }, { "epoch": 1.3296313746606538, "grad_norm": 0.5242449265084732, "learning_rate": 6.820963601797498e-06, "loss": 0.3969, "step": 14815 }, { "epoch": 1.3297211191133247, "grad_norm": 0.49017059195718315, "learning_rate": 6.8204772973828685e-06, "loss": 0.4212, "step": 14816 }, { "epoch": 1.3298108635659958, "grad_norm": 0.4896996395625826, "learning_rate": 6.81999097311448e-06, "loss": 0.401, "step": 14817 }, { "epoch": 1.3299006080186668, "grad_norm": 0.48807570005006046, "learning_rate": 6.819504628997637e-06, "loss": 0.4245, "step": 14818 }, { "epoch": 1.329990352471338, "grad_norm": 0.5276776573452174, "learning_rate": 6.8190182650376455e-06, "loss": 0.4002, "step": 14819 }, { "epoch": 1.330080096924009, "grad_norm": 0.4982486263571041, "learning_rate": 6.818531881239806e-06, "loss": 0.3718, "step": 14820 }, { "epoch": 1.33016984137668, "grad_norm": 0.4723960914082446, "learning_rate": 6.818045477609426e-06, "loss": 0.4026, "step": 14821 }, { "epoch": 1.3302595858293509, "grad_norm": 0.5278894971256997, "learning_rate": 6.817559054151809e-06, "loss": 0.3858, "step": 14822 }, { "epoch": 1.330349330282022, "grad_norm": 0.5105495387644253, "learning_rate": 6.817072610872259e-06, "loss": 0.5083, "step": 14823 }, { "epoch": 1.330439074734693, "grad_norm": 0.5431901734227168, "learning_rate": 6.816586147776082e-06, "loss": 0.3717, "step": 14824 }, { "epoch": 1.3305288191873639, "grad_norm": 0.5065936254581677, "learning_rate": 6.816099664868584e-06, "loss": 0.3882, "step": 14825 }, { "epoch": 1.330618563640035, "grad_norm": 0.46205532484335654, "learning_rate": 6.815613162155068e-06, "loss": 0.4304, "step": 14826 }, { "epoch": 1.330708308092706, "grad_norm": 0.5180658365992699, "learning_rate": 6.815126639640842e-06, "loss": 0.3858, "step": 14827 }, { "epoch": 1.330798052545377, "grad_norm": 0.48439506159390344, "learning_rate": 6.814640097331211e-06, "loss": 0.4149, "step": 14828 }, { "epoch": 1.3308877969980482, "grad_norm": 0.5528656415167699, "learning_rate": 6.814153535231481e-06, "loss": 0.442, "step": 14829 }, { "epoch": 1.330977541450719, "grad_norm": 0.528108441338952, "learning_rate": 6.813666953346959e-06, "loss": 0.3869, "step": 14830 }, { "epoch": 1.33106728590339, "grad_norm": 0.486441284852321, "learning_rate": 6.813180351682951e-06, "loss": 0.4023, "step": 14831 }, { "epoch": 1.3311570303560611, "grad_norm": 0.5015062623980396, "learning_rate": 6.812693730244764e-06, "loss": 0.3677, "step": 14832 }, { "epoch": 1.3312467748087322, "grad_norm": 0.4644232329568647, "learning_rate": 6.812207089037704e-06, "loss": 0.383, "step": 14833 }, { "epoch": 1.331336519261403, "grad_norm": 0.47541742915274854, "learning_rate": 6.811720428067081e-06, "loss": 0.374, "step": 14834 }, { "epoch": 1.3314262637140741, "grad_norm": 0.4783173612873205, "learning_rate": 6.8112337473382e-06, "loss": 0.3985, "step": 14835 }, { "epoch": 1.3315160081667452, "grad_norm": 0.5170253405251757, "learning_rate": 6.810747046856369e-06, "loss": 0.4305, "step": 14836 }, { "epoch": 1.3316057526194163, "grad_norm": 0.4797069870563204, "learning_rate": 6.810260326626896e-06, "loss": 0.4039, "step": 14837 }, { "epoch": 1.3316954970720873, "grad_norm": 0.5132629491520533, "learning_rate": 6.8097735866550895e-06, "loss": 0.3734, "step": 14838 }, { "epoch": 1.3317852415247582, "grad_norm": 0.469749856915691, "learning_rate": 6.809286826946257e-06, "loss": 0.4302, "step": 14839 }, { "epoch": 1.3318749859774293, "grad_norm": 0.49218125419226294, "learning_rate": 6.808800047505708e-06, "loss": 0.3806, "step": 14840 }, { "epoch": 1.3319647304301003, "grad_norm": 0.5052851143757149, "learning_rate": 6.808313248338751e-06, "loss": 0.3491, "step": 14841 }, { "epoch": 1.3320544748827712, "grad_norm": 0.47390237629654275, "learning_rate": 6.807826429450694e-06, "loss": 0.3601, "step": 14842 }, { "epoch": 1.3321442193354422, "grad_norm": 0.5029477243502616, "learning_rate": 6.8073395908468465e-06, "loss": 0.4429, "step": 14843 }, { "epoch": 1.3322339637881133, "grad_norm": 0.5399340014659613, "learning_rate": 6.806852732532519e-06, "loss": 0.4566, "step": 14844 }, { "epoch": 1.3323237082407844, "grad_norm": 0.5021564311877568, "learning_rate": 6.806365854513019e-06, "loss": 0.4101, "step": 14845 }, { "epoch": 1.3324134526934555, "grad_norm": 0.5003926717713355, "learning_rate": 6.805878956793659e-06, "loss": 0.4107, "step": 14846 }, { "epoch": 1.3325031971461265, "grad_norm": 0.5318093061606837, "learning_rate": 6.805392039379746e-06, "loss": 0.3794, "step": 14847 }, { "epoch": 1.3325929415987974, "grad_norm": 0.5181563178447814, "learning_rate": 6.804905102276592e-06, "loss": 0.3552, "step": 14848 }, { "epoch": 1.3326826860514684, "grad_norm": 0.4722074034825108, "learning_rate": 6.804418145489508e-06, "loss": 0.3461, "step": 14849 }, { "epoch": 1.3327724305041395, "grad_norm": 0.4702944534478852, "learning_rate": 6.803931169023803e-06, "loss": 0.3742, "step": 14850 }, { "epoch": 1.3328621749568104, "grad_norm": 0.484671658660848, "learning_rate": 6.803444172884789e-06, "loss": 0.4743, "step": 14851 }, { "epoch": 1.3329519194094814, "grad_norm": 0.541765433156287, "learning_rate": 6.802957157077777e-06, "loss": 0.4018, "step": 14852 }, { "epoch": 1.3330416638621525, "grad_norm": 0.4762737550667482, "learning_rate": 6.802470121608078e-06, "loss": 0.3501, "step": 14853 }, { "epoch": 1.3331314083148236, "grad_norm": 0.5202866043238135, "learning_rate": 6.801983066481004e-06, "loss": 0.4776, "step": 14854 }, { "epoch": 1.3332211527674946, "grad_norm": 0.5613273487004259, "learning_rate": 6.8014959917018655e-06, "loss": 0.4292, "step": 14855 }, { "epoch": 1.3333108972201657, "grad_norm": 0.4924774603902247, "learning_rate": 6.801008897275976e-06, "loss": 0.4272, "step": 14856 }, { "epoch": 1.3334006416728366, "grad_norm": 0.5014180930709505, "learning_rate": 6.800521783208646e-06, "loss": 0.4014, "step": 14857 }, { "epoch": 1.3334903861255076, "grad_norm": 0.47906169409335403, "learning_rate": 6.80003464950519e-06, "loss": 0.3661, "step": 14858 }, { "epoch": 1.3335801305781787, "grad_norm": 0.5184808851572994, "learning_rate": 6.799547496170917e-06, "loss": 0.421, "step": 14859 }, { "epoch": 1.3336698750308495, "grad_norm": 0.5220656567447185, "learning_rate": 6.799060323211144e-06, "loss": 0.4068, "step": 14860 }, { "epoch": 1.3337596194835206, "grad_norm": 0.5128458561927955, "learning_rate": 6.798573130631182e-06, "loss": 0.3433, "step": 14861 }, { "epoch": 1.3338493639361917, "grad_norm": 0.4991940941279979, "learning_rate": 6.798085918436343e-06, "loss": 0.4597, "step": 14862 }, { "epoch": 1.3339391083888628, "grad_norm": 0.5082371599359685, "learning_rate": 6.797598686631945e-06, "loss": 0.4172, "step": 14863 }, { "epoch": 1.3340288528415338, "grad_norm": 0.529955860972955, "learning_rate": 6.7971114352232954e-06, "loss": 0.3989, "step": 14864 }, { "epoch": 1.3341185972942047, "grad_norm": 0.5067636704833839, "learning_rate": 6.796624164215712e-06, "loss": 0.3947, "step": 14865 }, { "epoch": 1.3342083417468757, "grad_norm": 0.4969007038791842, "learning_rate": 6.796136873614508e-06, "loss": 0.3843, "step": 14866 }, { "epoch": 1.3342980861995468, "grad_norm": 0.4685549072477965, "learning_rate": 6.795649563424997e-06, "loss": 0.3765, "step": 14867 }, { "epoch": 1.3343878306522179, "grad_norm": 0.47327464655353096, "learning_rate": 6.795162233652497e-06, "loss": 0.4073, "step": 14868 }, { "epoch": 1.3344775751048887, "grad_norm": 0.5063574018109381, "learning_rate": 6.7946748843023165e-06, "loss": 0.3936, "step": 14869 }, { "epoch": 1.3345673195575598, "grad_norm": 0.48591852180448425, "learning_rate": 6.794187515379776e-06, "loss": 0.3825, "step": 14870 }, { "epoch": 1.3346570640102309, "grad_norm": 0.48511346943093797, "learning_rate": 6.793700126890188e-06, "loss": 0.4093, "step": 14871 }, { "epoch": 1.334746808462902, "grad_norm": 0.5185888453113803, "learning_rate": 6.793212718838869e-06, "loss": 0.3804, "step": 14872 }, { "epoch": 1.334836552915573, "grad_norm": 0.4825842436588437, "learning_rate": 6.792725291231132e-06, "loss": 0.3892, "step": 14873 }, { "epoch": 1.3349262973682439, "grad_norm": 0.4890926777592723, "learning_rate": 6.792237844072297e-06, "loss": 0.4019, "step": 14874 }, { "epoch": 1.335016041820915, "grad_norm": 0.49480062100287087, "learning_rate": 6.791750377367677e-06, "loss": 0.4213, "step": 14875 }, { "epoch": 1.335105786273586, "grad_norm": 0.5014900098685355, "learning_rate": 6.791262891122588e-06, "loss": 0.4003, "step": 14876 }, { "epoch": 1.3351955307262569, "grad_norm": 0.4981785402126015, "learning_rate": 6.79077538534235e-06, "loss": 0.4077, "step": 14877 }, { "epoch": 1.335285275178928, "grad_norm": 0.46305075477958263, "learning_rate": 6.790287860032274e-06, "loss": 0.3872, "step": 14878 }, { "epoch": 1.335375019631599, "grad_norm": 0.4947943048728052, "learning_rate": 6.789800315197681e-06, "loss": 0.3526, "step": 14879 }, { "epoch": 1.33546476408427, "grad_norm": 0.4928492918413529, "learning_rate": 6.7893127508438865e-06, "loss": 0.4362, "step": 14880 }, { "epoch": 1.3355545085369411, "grad_norm": 0.5159400495951552, "learning_rate": 6.78882516697621e-06, "loss": 0.4402, "step": 14881 }, { "epoch": 1.3356442529896122, "grad_norm": 0.5081557725764025, "learning_rate": 6.788337563599965e-06, "loss": 0.401, "step": 14882 }, { "epoch": 1.335733997442283, "grad_norm": 0.5481321570774627, "learning_rate": 6.787849940720473e-06, "loss": 0.4486, "step": 14883 }, { "epoch": 1.3358237418949541, "grad_norm": 0.49480158348669667, "learning_rate": 6.787362298343047e-06, "loss": 0.3998, "step": 14884 }, { "epoch": 1.3359134863476252, "grad_norm": 0.5107609792009772, "learning_rate": 6.786874636473013e-06, "loss": 0.3154, "step": 14885 }, { "epoch": 1.336003230800296, "grad_norm": 0.47756960247963387, "learning_rate": 6.786386955115683e-06, "loss": 0.3825, "step": 14886 }, { "epoch": 1.336092975252967, "grad_norm": 0.4957790023085426, "learning_rate": 6.785899254276376e-06, "loss": 0.4597, "step": 14887 }, { "epoch": 1.3361827197056382, "grad_norm": 0.5365233402634798, "learning_rate": 6.785411533960413e-06, "loss": 0.421, "step": 14888 }, { "epoch": 1.3362724641583092, "grad_norm": 0.49899810773700964, "learning_rate": 6.784923794173113e-06, "loss": 0.4048, "step": 14889 }, { "epoch": 1.3363622086109803, "grad_norm": 0.5175942676818587, "learning_rate": 6.784436034919792e-06, "loss": 0.3829, "step": 14890 }, { "epoch": 1.3364519530636514, "grad_norm": 0.4660681409788989, "learning_rate": 6.783948256205773e-06, "loss": 0.4513, "step": 14891 }, { "epoch": 1.3365416975163222, "grad_norm": 0.514314333626976, "learning_rate": 6.783460458036376e-06, "loss": 0.3918, "step": 14892 }, { "epoch": 1.3366314419689933, "grad_norm": 0.47471428059566984, "learning_rate": 6.782972640416917e-06, "loss": 0.3666, "step": 14893 }, { "epoch": 1.3367211864216644, "grad_norm": 0.4960518073363326, "learning_rate": 6.782484803352719e-06, "loss": 0.4165, "step": 14894 }, { "epoch": 1.3368109308743352, "grad_norm": 0.550711933685708, "learning_rate": 6.781996946849101e-06, "loss": 0.4134, "step": 14895 }, { "epoch": 1.3369006753270063, "grad_norm": 0.48264000121792855, "learning_rate": 6.781509070911384e-06, "loss": 0.437, "step": 14896 }, { "epoch": 1.3369904197796774, "grad_norm": 0.5315098043751469, "learning_rate": 6.78102117554489e-06, "loss": 0.4443, "step": 14897 }, { "epoch": 1.3370801642323484, "grad_norm": 0.515882664542501, "learning_rate": 6.7805332607549356e-06, "loss": 0.3795, "step": 14898 }, { "epoch": 1.3371699086850195, "grad_norm": 0.49445390312894016, "learning_rate": 6.780045326546847e-06, "loss": 0.397, "step": 14899 }, { "epoch": 1.3372596531376904, "grad_norm": 0.5313621471811968, "learning_rate": 6.779557372925942e-06, "loss": 0.4327, "step": 14900 }, { "epoch": 1.3373493975903614, "grad_norm": 0.4895229277017623, "learning_rate": 6.779069399897545e-06, "loss": 0.3572, "step": 14901 }, { "epoch": 1.3374391420430325, "grad_norm": 0.49350683373861626, "learning_rate": 6.778581407466974e-06, "loss": 0.4101, "step": 14902 }, { "epoch": 1.3375288864957036, "grad_norm": 0.4756405767777895, "learning_rate": 6.778093395639556e-06, "loss": 0.375, "step": 14903 }, { "epoch": 1.3376186309483744, "grad_norm": 0.5041669845630685, "learning_rate": 6.7776053644206074e-06, "loss": 0.3554, "step": 14904 }, { "epoch": 1.3377083754010455, "grad_norm": 0.5175125849842825, "learning_rate": 6.777117313815454e-06, "loss": 0.4308, "step": 14905 }, { "epoch": 1.3377981198537165, "grad_norm": 0.5315683079737135, "learning_rate": 6.776629243829418e-06, "loss": 0.4463, "step": 14906 }, { "epoch": 1.3378878643063876, "grad_norm": 0.4933499809732787, "learning_rate": 6.776141154467823e-06, "loss": 0.4203, "step": 14907 }, { "epoch": 1.3379776087590587, "grad_norm": 0.5256723970785908, "learning_rate": 6.77565304573599e-06, "loss": 0.4201, "step": 14908 }, { "epoch": 1.3380673532117295, "grad_norm": 0.5137490868024346, "learning_rate": 6.775164917639244e-06, "loss": 0.421, "step": 14909 }, { "epoch": 1.3381570976644006, "grad_norm": 0.5138740911922823, "learning_rate": 6.7746767701829065e-06, "loss": 0.4301, "step": 14910 }, { "epoch": 1.3382468421170717, "grad_norm": 0.5196052007409239, "learning_rate": 6.774188603372304e-06, "loss": 0.4349, "step": 14911 }, { "epoch": 1.3383365865697425, "grad_norm": 0.4879856985278932, "learning_rate": 6.773700417212756e-06, "loss": 0.3901, "step": 14912 }, { "epoch": 1.3384263310224136, "grad_norm": 0.5024665245542457, "learning_rate": 6.773212211709591e-06, "loss": 0.3514, "step": 14913 }, { "epoch": 1.3385160754750847, "grad_norm": 0.47036966831398946, "learning_rate": 6.772723986868131e-06, "loss": 0.3581, "step": 14914 }, { "epoch": 1.3386058199277557, "grad_norm": 0.5120588111272145, "learning_rate": 6.772235742693701e-06, "loss": 0.3879, "step": 14915 }, { "epoch": 1.3386955643804268, "grad_norm": 0.48676326890268223, "learning_rate": 6.771747479191625e-06, "loss": 0.3568, "step": 14916 }, { "epoch": 1.3387853088330979, "grad_norm": 0.5143298604340308, "learning_rate": 6.77125919636723e-06, "loss": 0.4227, "step": 14917 }, { "epoch": 1.3388750532857687, "grad_norm": 0.5258622555821698, "learning_rate": 6.770770894225839e-06, "loss": 0.4499, "step": 14918 }, { "epoch": 1.3389647977384398, "grad_norm": 0.5267553673252497, "learning_rate": 6.770282572772777e-06, "loss": 0.3513, "step": 14919 }, { "epoch": 1.3390545421911109, "grad_norm": 0.48270799464950825, "learning_rate": 6.7697942320133715e-06, "loss": 0.4048, "step": 14920 }, { "epoch": 1.3391442866437817, "grad_norm": 0.5056281538117781, "learning_rate": 6.769305871952947e-06, "loss": 0.4224, "step": 14921 }, { "epoch": 1.3392340310964528, "grad_norm": 0.473548751485386, "learning_rate": 6.7688174925968285e-06, "loss": 0.387, "step": 14922 }, { "epoch": 1.3393237755491239, "grad_norm": 0.5087766369565075, "learning_rate": 6.768329093950345e-06, "loss": 0.398, "step": 14923 }, { "epoch": 1.339413520001795, "grad_norm": 0.5350669473087373, "learning_rate": 6.76784067601882e-06, "loss": 0.4468, "step": 14924 }, { "epoch": 1.339503264454466, "grad_norm": 0.5030918198650657, "learning_rate": 6.767352238807582e-06, "loss": 0.4064, "step": 14925 }, { "epoch": 1.339593008907137, "grad_norm": 0.5186658879076068, "learning_rate": 6.766863782321958e-06, "loss": 0.4649, "step": 14926 }, { "epoch": 1.339682753359808, "grad_norm": 0.5410203705404101, "learning_rate": 6.7663753065672725e-06, "loss": 0.4272, "step": 14927 }, { "epoch": 1.339772497812479, "grad_norm": 0.5087584025243772, "learning_rate": 6.765886811548855e-06, "loss": 0.4109, "step": 14928 }, { "epoch": 1.33986224226515, "grad_norm": 0.48754178101987006, "learning_rate": 6.765398297272032e-06, "loss": 0.3983, "step": 14929 }, { "epoch": 1.339951986717821, "grad_norm": 0.47978096951486987, "learning_rate": 6.764909763742132e-06, "loss": 0.406, "step": 14930 }, { "epoch": 1.340041731170492, "grad_norm": 0.5068579000811955, "learning_rate": 6.764421210964482e-06, "loss": 0.3723, "step": 14931 }, { "epoch": 1.340131475623163, "grad_norm": 0.5079543939845211, "learning_rate": 6.76393263894441e-06, "loss": 0.3775, "step": 14932 }, { "epoch": 1.340221220075834, "grad_norm": 0.5154844536007879, "learning_rate": 6.763444047687245e-06, "loss": 0.4037, "step": 14933 }, { "epoch": 1.3403109645285052, "grad_norm": 0.5065060702558861, "learning_rate": 6.762955437198315e-06, "loss": 0.4422, "step": 14934 }, { "epoch": 1.340400708981176, "grad_norm": 0.4933447241537488, "learning_rate": 6.762466807482949e-06, "loss": 0.3757, "step": 14935 }, { "epoch": 1.340490453433847, "grad_norm": 0.5104678058115766, "learning_rate": 6.761978158546476e-06, "loss": 0.4074, "step": 14936 }, { "epoch": 1.3405801978865182, "grad_norm": 0.5323050365128135, "learning_rate": 6.761489490394224e-06, "loss": 0.4203, "step": 14937 }, { "epoch": 1.3406699423391892, "grad_norm": 0.4953224043104697, "learning_rate": 6.761000803031523e-06, "loss": 0.3905, "step": 14938 }, { "epoch": 1.34075968679186, "grad_norm": 0.5123606355958664, "learning_rate": 6.760512096463703e-06, "loss": 0.4127, "step": 14939 }, { "epoch": 1.3408494312445312, "grad_norm": 0.5336080836667954, "learning_rate": 6.760023370696094e-06, "loss": 0.4371, "step": 14940 }, { "epoch": 1.3409391756972022, "grad_norm": 0.4989491580565282, "learning_rate": 6.7595346257340235e-06, "loss": 0.461, "step": 14941 }, { "epoch": 1.3410289201498733, "grad_norm": 0.5136548598368695, "learning_rate": 6.759045861582825e-06, "loss": 0.3864, "step": 14942 }, { "epoch": 1.3411186646025444, "grad_norm": 0.5167588302629192, "learning_rate": 6.758557078247826e-06, "loss": 0.406, "step": 14943 }, { "epoch": 1.3412084090552152, "grad_norm": 0.48481717214131204, "learning_rate": 6.75806827573436e-06, "loss": 0.4326, "step": 14944 }, { "epoch": 1.3412981535078863, "grad_norm": 0.5334122908656473, "learning_rate": 6.757579454047754e-06, "loss": 0.4433, "step": 14945 }, { "epoch": 1.3413878979605574, "grad_norm": 0.49717362129067877, "learning_rate": 6.7570906131933435e-06, "loss": 0.3683, "step": 14946 }, { "epoch": 1.3414776424132282, "grad_norm": 0.5033504817274538, "learning_rate": 6.756601753176455e-06, "loss": 0.4419, "step": 14947 }, { "epoch": 1.3415673868658993, "grad_norm": 0.5530078714532922, "learning_rate": 6.756112874002424e-06, "loss": 0.4212, "step": 14948 }, { "epoch": 1.3416571313185703, "grad_norm": 0.4904339240679592, "learning_rate": 6.75562397567658e-06, "loss": 0.4606, "step": 14949 }, { "epoch": 1.3417468757712414, "grad_norm": 0.5292731841461832, "learning_rate": 6.755135058204255e-06, "loss": 0.3746, "step": 14950 }, { "epoch": 1.3418366202239125, "grad_norm": 0.5095769054039936, "learning_rate": 6.75464612159078e-06, "loss": 0.3682, "step": 14951 }, { "epoch": 1.3419263646765835, "grad_norm": 0.46256111068711725, "learning_rate": 6.754157165841491e-06, "loss": 0.3674, "step": 14952 }, { "epoch": 1.3420161091292544, "grad_norm": 0.5529552844813139, "learning_rate": 6.7536681909617155e-06, "loss": 0.5545, "step": 14953 }, { "epoch": 1.3421058535819255, "grad_norm": 0.5707437056686521, "learning_rate": 6.753179196956791e-06, "loss": 0.3846, "step": 14954 }, { "epoch": 1.3421955980345965, "grad_norm": 0.4999961702023237, "learning_rate": 6.752690183832046e-06, "loss": 0.3495, "step": 14955 }, { "epoch": 1.3422853424872674, "grad_norm": 0.4750537578612301, "learning_rate": 6.7522011515928154e-06, "loss": 0.3667, "step": 14956 }, { "epoch": 1.3423750869399385, "grad_norm": 0.5403684826663402, "learning_rate": 6.751712100244434e-06, "loss": 0.4128, "step": 14957 }, { "epoch": 1.3424648313926095, "grad_norm": 0.49633669406750147, "learning_rate": 6.7512230297922336e-06, "loss": 0.4226, "step": 14958 }, { "epoch": 1.3425545758452806, "grad_norm": 0.474440962924113, "learning_rate": 6.750733940241547e-06, "loss": 0.3934, "step": 14959 }, { "epoch": 1.3426443202979517, "grad_norm": 0.48133813569174894, "learning_rate": 6.7502448315977095e-06, "loss": 0.4026, "step": 14960 }, { "epoch": 1.3427340647506227, "grad_norm": 0.47109400885590924, "learning_rate": 6.7497557038660565e-06, "loss": 0.3947, "step": 14961 }, { "epoch": 1.3428238092032936, "grad_norm": 0.494399148668559, "learning_rate": 6.74926655705192e-06, "loss": 0.3822, "step": 14962 }, { "epoch": 1.3429135536559647, "grad_norm": 0.5154718152410113, "learning_rate": 6.748777391160637e-06, "loss": 0.4574, "step": 14963 }, { "epoch": 1.3430032981086357, "grad_norm": 0.5457795839065269, "learning_rate": 6.748288206197538e-06, "loss": 0.4025, "step": 14964 }, { "epoch": 1.3430930425613066, "grad_norm": 0.4949375390671763, "learning_rate": 6.747799002167963e-06, "loss": 0.4087, "step": 14965 }, { "epoch": 1.3431827870139776, "grad_norm": 0.5073066627583557, "learning_rate": 6.747309779077242e-06, "loss": 0.4331, "step": 14966 }, { "epoch": 1.3432725314666487, "grad_norm": 0.5000421898655202, "learning_rate": 6.746820536930716e-06, "loss": 0.3733, "step": 14967 }, { "epoch": 1.3433622759193198, "grad_norm": 0.5305689662180985, "learning_rate": 6.746331275733716e-06, "loss": 0.4402, "step": 14968 }, { "epoch": 1.3434520203719909, "grad_norm": 0.49144951347156857, "learning_rate": 6.745841995491581e-06, "loss": 0.3815, "step": 14969 }, { "epoch": 1.3435417648246617, "grad_norm": 0.5218181468768385, "learning_rate": 6.745352696209644e-06, "loss": 0.4331, "step": 14970 }, { "epoch": 1.3436315092773328, "grad_norm": 0.47177191767153703, "learning_rate": 6.744863377893244e-06, "loss": 0.3266, "step": 14971 }, { "epoch": 1.3437212537300038, "grad_norm": 0.4870744682903445, "learning_rate": 6.744374040547714e-06, "loss": 0.42, "step": 14972 }, { "epoch": 1.343810998182675, "grad_norm": 0.5523017144057136, "learning_rate": 6.743884684178394e-06, "loss": 0.4042, "step": 14973 }, { "epoch": 1.3439007426353458, "grad_norm": 0.48575417217988026, "learning_rate": 6.74339530879062e-06, "loss": 0.3841, "step": 14974 }, { "epoch": 1.3439904870880168, "grad_norm": 0.5498340408121439, "learning_rate": 6.742905914389729e-06, "loss": 0.4905, "step": 14975 }, { "epoch": 1.344080231540688, "grad_norm": 0.5374036097442477, "learning_rate": 6.742416500981056e-06, "loss": 0.383, "step": 14976 }, { "epoch": 1.344169975993359, "grad_norm": 0.4824541168161147, "learning_rate": 6.741927068569942e-06, "loss": 0.4109, "step": 14977 }, { "epoch": 1.34425972044603, "grad_norm": 0.5360745908242417, "learning_rate": 6.741437617161721e-06, "loss": 0.4434, "step": 14978 }, { "epoch": 1.3443494648987009, "grad_norm": 0.463022821471508, "learning_rate": 6.740948146761734e-06, "loss": 0.313, "step": 14979 }, { "epoch": 1.344439209351372, "grad_norm": 0.47954790555612453, "learning_rate": 6.740458657375318e-06, "loss": 0.3585, "step": 14980 }, { "epoch": 1.344528953804043, "grad_norm": 0.4681047564518417, "learning_rate": 6.739969149007811e-06, "loss": 0.3433, "step": 14981 }, { "epoch": 1.3446186982567139, "grad_norm": 0.42327698104370043, "learning_rate": 6.739479621664551e-06, "loss": 0.2996, "step": 14982 }, { "epoch": 1.344708442709385, "grad_norm": 0.4489457454710655, "learning_rate": 6.738990075350878e-06, "loss": 0.4088, "step": 14983 }, { "epoch": 1.344798187162056, "grad_norm": 0.4999263932441249, "learning_rate": 6.738500510072129e-06, "loss": 0.3496, "step": 14984 }, { "epoch": 1.344887931614727, "grad_norm": 0.4566444572367301, "learning_rate": 6.738010925833646e-06, "loss": 0.3926, "step": 14985 }, { "epoch": 1.3449776760673982, "grad_norm": 0.5250371466091575, "learning_rate": 6.7375213226407655e-06, "loss": 0.3795, "step": 14986 }, { "epoch": 1.3450674205200692, "grad_norm": 0.4897838650102964, "learning_rate": 6.737031700498827e-06, "loss": 0.4079, "step": 14987 }, { "epoch": 1.34515716497274, "grad_norm": 0.5091282135002437, "learning_rate": 6.736542059413173e-06, "loss": 0.3745, "step": 14988 }, { "epoch": 1.3452469094254111, "grad_norm": 0.4817122853320742, "learning_rate": 6.736052399389142e-06, "loss": 0.4444, "step": 14989 }, { "epoch": 1.3453366538780822, "grad_norm": 0.534171072848592, "learning_rate": 6.735562720432071e-06, "loss": 0.4204, "step": 14990 }, { "epoch": 1.345426398330753, "grad_norm": 0.5221561632839367, "learning_rate": 6.735073022547306e-06, "loss": 0.3796, "step": 14991 }, { "epoch": 1.3455161427834241, "grad_norm": 0.5049839790445835, "learning_rate": 6.734583305740183e-06, "loss": 0.3908, "step": 14992 }, { "epoch": 1.3456058872360952, "grad_norm": 0.5014552427656952, "learning_rate": 6.734093570016046e-06, "loss": 0.4037, "step": 14993 }, { "epoch": 1.3456956316887663, "grad_norm": 0.470893413533684, "learning_rate": 6.733603815380234e-06, "loss": 0.3686, "step": 14994 }, { "epoch": 1.3457853761414373, "grad_norm": 0.4733537810803322, "learning_rate": 6.7331140418380895e-06, "loss": 0.3521, "step": 14995 }, { "epoch": 1.3458751205941084, "grad_norm": 0.49637656952859704, "learning_rate": 6.73262424939495e-06, "loss": 0.4499, "step": 14996 }, { "epoch": 1.3459648650467793, "grad_norm": 0.5189816709821197, "learning_rate": 6.7321344380561634e-06, "loss": 0.4451, "step": 14997 }, { "epoch": 1.3460546094994503, "grad_norm": 0.5528323454254578, "learning_rate": 6.731644607827066e-06, "loss": 0.4272, "step": 14998 }, { "epoch": 1.3461443539521214, "grad_norm": 0.5055061198044449, "learning_rate": 6.731154758713003e-06, "loss": 0.4, "step": 14999 }, { "epoch": 1.3462340984047922, "grad_norm": 0.5015618864623269, "learning_rate": 6.730664890719315e-06, "loss": 0.3865, "step": 15000 } ], "logging_steps": 1, "max_steps": 33426, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 427311407235072.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }