{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7575068243858053, "eval_steps": 500, "global_step": 1665, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00045495905368516835, "grad_norm": 9.461428161462043, "learning_rate": 1e-05, "loss": 0.1263, "step": 1 }, { "epoch": 0.0009099181073703367, "grad_norm": 5.190780450250769, "learning_rate": 9.99999979571129e-06, "loss": 0.1723, "step": 2 }, { "epoch": 0.001364877161055505, "grad_norm": 7.521926017130347, "learning_rate": 9.999999182845177e-06, "loss": 0.1327, "step": 3 }, { "epoch": 0.0018198362147406734, "grad_norm": 2.5665810200307217, "learning_rate": 9.99999816140171e-06, "loss": 0.1095, "step": 4 }, { "epoch": 0.0022747952684258415, "grad_norm": 2.738508706395883, "learning_rate": 9.999996731380973e-06, "loss": 0.1151, "step": 5 }, { "epoch": 0.00272975432211101, "grad_norm": 2.67941899677245, "learning_rate": 9.999994892783083e-06, "loss": 0.0821, "step": 6 }, { "epoch": 0.0031847133757961785, "grad_norm": 2.137586234420784, "learning_rate": 9.99999264560819e-06, "loss": 0.0729, "step": 7 }, { "epoch": 0.003639672429481347, "grad_norm": 2.8221590420989164, "learning_rate": 9.999989989856477e-06, "loss": 0.0929, "step": 8 }, { "epoch": 0.004094631483166515, "grad_norm": 1.6167314639784554, "learning_rate": 9.999986925528164e-06, "loss": 0.0466, "step": 9 }, { "epoch": 0.004549590536851683, "grad_norm": 2.1773262431631313, "learning_rate": 9.999983452623498e-06, "loss": 0.0709, "step": 10 }, { "epoch": 0.005004549590536852, "grad_norm": 7.6444390817806465, "learning_rate": 9.999979571142765e-06, "loss": 0.0809, "step": 11 }, { "epoch": 0.00545950864422202, "grad_norm": 2.034523884241798, "learning_rate": 9.999975281086278e-06, "loss": 0.0839, "step": 12 }, { "epoch": 0.005914467697907188, "grad_norm": 3.576108282005355, "learning_rate": 9.999970582454392e-06, "loss": 0.0728, "step": 13 }, { "epoch": 0.006369426751592357, "grad_norm": 2.623641566468802, "learning_rate": 9.999965475247491e-06, "loss": 0.1052, "step": 14 }, { "epoch": 0.006824385805277525, "grad_norm": 2.1413574998269085, "learning_rate": 9.99995995946599e-06, "loss": 0.0885, "step": 15 }, { "epoch": 0.007279344858962694, "grad_norm": 1.4859066724415246, "learning_rate": 9.999954035110342e-06, "loss": 0.0644, "step": 16 }, { "epoch": 0.0077343039126478615, "grad_norm": 2.851793157608408, "learning_rate": 9.999947702181027e-06, "loss": 0.1057, "step": 17 }, { "epoch": 0.00818926296633303, "grad_norm": 4.693829546662477, "learning_rate": 9.999940960678568e-06, "loss": 0.0867, "step": 18 }, { "epoch": 0.008644222020018199, "grad_norm": 2.2728033563417362, "learning_rate": 9.999933810603513e-06, "loss": 0.0789, "step": 19 }, { "epoch": 0.009099181073703366, "grad_norm": 1.6705986173507794, "learning_rate": 9.999926251956447e-06, "loss": 0.0683, "step": 20 }, { "epoch": 0.009554140127388535, "grad_norm": 2.187579869114393, "learning_rate": 9.999918284737986e-06, "loss": 0.0984, "step": 21 }, { "epoch": 0.010009099181073703, "grad_norm": 2.328040268012338, "learning_rate": 9.999909908948782e-06, "loss": 0.0699, "step": 22 }, { "epoch": 0.010464058234758872, "grad_norm": 5.572389775693198, "learning_rate": 9.999901124589519e-06, "loss": 0.0912, "step": 23 }, { "epoch": 0.01091901728844404, "grad_norm": 1.84796719674859, "learning_rate": 9.999891931660916e-06, "loss": 0.1015, "step": 24 }, { "epoch": 0.011373976342129208, "grad_norm": 1.7501762990792236, "learning_rate": 9.999882330163725e-06, "loss": 0.0909, "step": 25 }, { "epoch": 0.011828935395814377, "grad_norm": 0.9922115950592263, "learning_rate": 9.999872320098729e-06, "loss": 0.0656, "step": 26 }, { "epoch": 0.012283894449499545, "grad_norm": 1.5612370560987539, "learning_rate": 9.999861901466746e-06, "loss": 0.0974, "step": 27 }, { "epoch": 0.012738853503184714, "grad_norm": 1.4617271794930395, "learning_rate": 9.999851074268625e-06, "loss": 0.0853, "step": 28 }, { "epoch": 0.013193812556869881, "grad_norm": 1.8127085104491556, "learning_rate": 9.999839838505257e-06, "loss": 0.1081, "step": 29 }, { "epoch": 0.01364877161055505, "grad_norm": 1.4710105512612208, "learning_rate": 9.999828194177555e-06, "loss": 0.0868, "step": 30 }, { "epoch": 0.014103730664240218, "grad_norm": 1.3474487189311888, "learning_rate": 9.999816141286472e-06, "loss": 0.0817, "step": 31 }, { "epoch": 0.014558689717925387, "grad_norm": 1.0967596652549403, "learning_rate": 9.99980367983299e-06, "loss": 0.0637, "step": 32 }, { "epoch": 0.015013648771610554, "grad_norm": 3.179425671823194, "learning_rate": 9.999790809818134e-06, "loss": 0.069, "step": 33 }, { "epoch": 0.015468607825295723, "grad_norm": 4.482257681577152, "learning_rate": 9.999777531242951e-06, "loss": 0.0915, "step": 34 }, { "epoch": 0.01592356687898089, "grad_norm": 3.953299040475791, "learning_rate": 9.999763844108528e-06, "loss": 0.0562, "step": 35 }, { "epoch": 0.01637852593266606, "grad_norm": 1.1127201050382067, "learning_rate": 9.999749748415982e-06, "loss": 0.0556, "step": 36 }, { "epoch": 0.01683348498635123, "grad_norm": 79.45756094624792, "learning_rate": 9.999735244166464e-06, "loss": 0.1223, "step": 37 }, { "epoch": 0.017288444040036398, "grad_norm": 2777.9092912017113, "learning_rate": 9.99972033136116e-06, "loss": 0.3211, "step": 38 }, { "epoch": 0.017743403093721567, "grad_norm": 2.5204693177238466, "learning_rate": 9.999705010001291e-06, "loss": 0.0723, "step": 39 }, { "epoch": 0.018198362147406732, "grad_norm": 2.2975907071135655, "learning_rate": 9.999689280088105e-06, "loss": 0.0696, "step": 40 }, { "epoch": 0.0186533212010919, "grad_norm": 2.998434349074003, "learning_rate": 9.99967314162289e-06, "loss": 0.083, "step": 41 }, { "epoch": 0.01910828025477707, "grad_norm": 3.882239448575704, "learning_rate": 9.999656594606966e-06, "loss": 0.1015, "step": 42 }, { "epoch": 0.019563239308462238, "grad_norm": 3.5286596480512493, "learning_rate": 9.999639639041681e-06, "loss": 0.0817, "step": 43 }, { "epoch": 0.020018198362147407, "grad_norm": 1.6933989447443707, "learning_rate": 9.999622274928424e-06, "loss": 0.1003, "step": 44 }, { "epoch": 0.020473157415832575, "grad_norm": 1.2483160046323276, "learning_rate": 9.999604502268614e-06, "loss": 0.0952, "step": 45 }, { "epoch": 0.020928116469517744, "grad_norm": 0.9417906124383243, "learning_rate": 9.9995863210637e-06, "loss": 0.0731, "step": 46 }, { "epoch": 0.021383075523202913, "grad_norm": 2.8195414757816897, "learning_rate": 9.99956773131517e-06, "loss": 0.1845, "step": 47 }, { "epoch": 0.02183803457688808, "grad_norm": 2.74390379471345, "learning_rate": 9.999548733024545e-06, "loss": 0.1826, "step": 48 }, { "epoch": 0.022292993630573247, "grad_norm": 1.5138494619527987, "learning_rate": 9.999529326193373e-06, "loss": 0.0857, "step": 49 }, { "epoch": 0.022747952684258416, "grad_norm": 1.215379974181271, "learning_rate": 9.999509510823242e-06, "loss": 0.0686, "step": 50 }, { "epoch": 0.023202911737943584, "grad_norm": 1.292187967807859, "learning_rate": 9.999489286915773e-06, "loss": 0.0707, "step": 51 }, { "epoch": 0.023657870791628753, "grad_norm": 1.7888013203563982, "learning_rate": 9.999468654472614e-06, "loss": 0.0682, "step": 52 }, { "epoch": 0.024112829845313922, "grad_norm": 0.8979425621703144, "learning_rate": 9.999447613495457e-06, "loss": 0.0508, "step": 53 }, { "epoch": 0.02456778889899909, "grad_norm": 1.9123835444775663, "learning_rate": 9.99942616398602e-06, "loss": 0.0689, "step": 54 }, { "epoch": 0.02502274795268426, "grad_norm": 0.9393581994096443, "learning_rate": 9.99940430594605e-06, "loss": 0.0496, "step": 55 }, { "epoch": 0.025477707006369428, "grad_norm": 1.0234476513644222, "learning_rate": 9.999382039377339e-06, "loss": 0.0601, "step": 56 }, { "epoch": 0.025932666060054597, "grad_norm": 0.9291387208138827, "learning_rate": 9.999359364281704e-06, "loss": 0.0377, "step": 57 }, { "epoch": 0.026387625113739762, "grad_norm": 1.8209170803663992, "learning_rate": 9.999336280660999e-06, "loss": 0.1144, "step": 58 }, { "epoch": 0.02684258416742493, "grad_norm": 1.1214625046464874, "learning_rate": 9.99931278851711e-06, "loss": 0.0622, "step": 59 }, { "epoch": 0.0272975432211101, "grad_norm": 1.0331723997917317, "learning_rate": 9.999288887851956e-06, "loss": 0.0667, "step": 60 }, { "epoch": 0.027752502274795268, "grad_norm": 1.0412381501406744, "learning_rate": 9.999264578667493e-06, "loss": 0.0566, "step": 61 }, { "epoch": 0.028207461328480437, "grad_norm": 1.4510603110658047, "learning_rate": 9.999239860965703e-06, "loss": 0.0845, "step": 62 }, { "epoch": 0.028662420382165606, "grad_norm": 1.301162540669183, "learning_rate": 9.999214734748609e-06, "loss": 0.0759, "step": 63 }, { "epoch": 0.029117379435850774, "grad_norm": 0.9977688847603402, "learning_rate": 9.999189200018263e-06, "loss": 0.0528, "step": 64 }, { "epoch": 0.029572338489535943, "grad_norm": 1.2894688842348854, "learning_rate": 9.99916325677675e-06, "loss": 0.0899, "step": 65 }, { "epoch": 0.03002729754322111, "grad_norm": 1.4627871680702638, "learning_rate": 9.999136905026194e-06, "loss": 0.1456, "step": 66 }, { "epoch": 0.030482256596906277, "grad_norm": 1.2304385710214434, "learning_rate": 9.999110144768745e-06, "loss": 0.079, "step": 67 }, { "epoch": 0.030937215650591446, "grad_norm": 1.085016380732753, "learning_rate": 9.99908297600659e-06, "loss": 0.0696, "step": 68 }, { "epoch": 0.03139217470427662, "grad_norm": 0.989450558642297, "learning_rate": 9.99905539874195e-06, "loss": 0.069, "step": 69 }, { "epoch": 0.03184713375796178, "grad_norm": 1.0510491151133208, "learning_rate": 9.99902741297708e-06, "loss": 0.0555, "step": 70 }, { "epoch": 0.03230209281164695, "grad_norm": 0.8938033562648371, "learning_rate": 9.998999018714264e-06, "loss": 0.0783, "step": 71 }, { "epoch": 0.03275705186533212, "grad_norm": 2.902512108322722, "learning_rate": 9.998970215955824e-06, "loss": 0.0702, "step": 72 }, { "epoch": 0.033212010919017286, "grad_norm": 0.7661831894133686, "learning_rate": 9.998941004704113e-06, "loss": 0.0519, "step": 73 }, { "epoch": 0.03366696997270246, "grad_norm": 1.1047249497744047, "learning_rate": 9.998911384961518e-06, "loss": 0.0773, "step": 74 }, { "epoch": 0.034121929026387623, "grad_norm": 0.7750047299312716, "learning_rate": 9.998881356730458e-06, "loss": 0.0598, "step": 75 }, { "epoch": 0.034576888080072796, "grad_norm": 0.9815801555720315, "learning_rate": 9.99885092001339e-06, "loss": 0.0661, "step": 76 }, { "epoch": 0.03503184713375796, "grad_norm": 1.3090963451351905, "learning_rate": 9.998820074812799e-06, "loss": 0.0713, "step": 77 }, { "epoch": 0.03548680618744313, "grad_norm": 1.1489338732270693, "learning_rate": 9.998788821131207e-06, "loss": 0.0946, "step": 78 }, { "epoch": 0.0359417652411283, "grad_norm": 0.9040381990998293, "learning_rate": 9.998757158971164e-06, "loss": 0.067, "step": 79 }, { "epoch": 0.036396724294813464, "grad_norm": 1.1019926198229115, "learning_rate": 9.998725088335263e-06, "loss": 0.0874, "step": 80 }, { "epoch": 0.036851683348498636, "grad_norm": 0.5779852750462403, "learning_rate": 9.99869260922612e-06, "loss": 0.0492, "step": 81 }, { "epoch": 0.0373066424021838, "grad_norm": 1.2769852710418472, "learning_rate": 9.998659721646393e-06, "loss": 0.0781, "step": 82 }, { "epoch": 0.03776160145586897, "grad_norm": 0.9020624084974485, "learning_rate": 9.998626425598766e-06, "loss": 0.0734, "step": 83 }, { "epoch": 0.03821656050955414, "grad_norm": 0.9626764462141776, "learning_rate": 9.99859272108596e-06, "loss": 0.0719, "step": 84 }, { "epoch": 0.03867151956323931, "grad_norm": 0.9435885887029873, "learning_rate": 9.998558608110733e-06, "loss": 0.0835, "step": 85 }, { "epoch": 0.039126478616924476, "grad_norm": 1.0578725525123687, "learning_rate": 9.998524086675867e-06, "loss": 0.0746, "step": 86 }, { "epoch": 0.03958143767060965, "grad_norm": 1.0366588534208079, "learning_rate": 9.998489156784188e-06, "loss": 0.0933, "step": 87 }, { "epoch": 0.040036396724294813, "grad_norm": 1.0595948680723846, "learning_rate": 9.998453818438547e-06, "loss": 0.0846, "step": 88 }, { "epoch": 0.04049135577797998, "grad_norm": 0.8807515753016749, "learning_rate": 9.998418071641833e-06, "loss": 0.0649, "step": 89 }, { "epoch": 0.04094631483166515, "grad_norm": 0.9034225145874141, "learning_rate": 9.998381916396967e-06, "loss": 0.0621, "step": 90 }, { "epoch": 0.041401273885350316, "grad_norm": 0.6732889821553815, "learning_rate": 9.998345352706901e-06, "loss": 0.0367, "step": 91 }, { "epoch": 0.04185623293903549, "grad_norm": 0.7136967603743426, "learning_rate": 9.998308380574628e-06, "loss": 0.0569, "step": 92 }, { "epoch": 0.042311191992720654, "grad_norm": 1.1459385364035048, "learning_rate": 9.998271000003166e-06, "loss": 0.1184, "step": 93 }, { "epoch": 0.042766151046405826, "grad_norm": 0.8224906129097734, "learning_rate": 9.998233210995569e-06, "loss": 0.0682, "step": 94 }, { "epoch": 0.04322111010009099, "grad_norm": 1.5182946932236698, "learning_rate": 9.998195013554926e-06, "loss": 0.0875, "step": 95 }, { "epoch": 0.04367606915377616, "grad_norm": 0.9355855711018981, "learning_rate": 9.998156407684359e-06, "loss": 0.0939, "step": 96 }, { "epoch": 0.04413102820746133, "grad_norm": 0.7329840867165283, "learning_rate": 9.998117393387022e-06, "loss": 0.0466, "step": 97 }, { "epoch": 0.044585987261146494, "grad_norm": 0.8701001036058451, "learning_rate": 9.9980779706661e-06, "loss": 0.0729, "step": 98 }, { "epoch": 0.045040946314831666, "grad_norm": 1.0218896298663185, "learning_rate": 9.99803813952482e-06, "loss": 0.0828, "step": 99 }, { "epoch": 0.04549590536851683, "grad_norm": 0.9044995357273884, "learning_rate": 9.997997899966433e-06, "loss": 0.0709, "step": 100 }, { "epoch": 0.045950864422202004, "grad_norm": 0.9877796099816964, "learning_rate": 9.99795725199423e-06, "loss": 0.0903, "step": 101 }, { "epoch": 0.04640582347588717, "grad_norm": 1.0061501994463906, "learning_rate": 9.99791619561153e-06, "loss": 0.0831, "step": 102 }, { "epoch": 0.04686078252957234, "grad_norm": 0.8789173954818107, "learning_rate": 9.997874730821689e-06, "loss": 0.0714, "step": 103 }, { "epoch": 0.047315741583257506, "grad_norm": 15.480920098194954, "learning_rate": 9.997832857628093e-06, "loss": 0.2603, "step": 104 }, { "epoch": 0.04777070063694268, "grad_norm": 1.3806761301603454, "learning_rate": 9.99779057603417e-06, "loss": 0.1227, "step": 105 }, { "epoch": 0.048225659690627844, "grad_norm": 0.8462176607269959, "learning_rate": 9.997747886043368e-06, "loss": 0.0605, "step": 106 }, { "epoch": 0.04868061874431301, "grad_norm": 0.7467169847716549, "learning_rate": 9.997704787659179e-06, "loss": 0.0618, "step": 107 }, { "epoch": 0.04913557779799818, "grad_norm": 1.5653334818977065, "learning_rate": 9.997661280885125e-06, "loss": 0.1253, "step": 108 }, { "epoch": 0.049590536851683346, "grad_norm": 0.871706038604149, "learning_rate": 9.99761736572476e-06, "loss": 0.0716, "step": 109 }, { "epoch": 0.05004549590536852, "grad_norm": 1.1398296008355844, "learning_rate": 9.997573042181672e-06, "loss": 0.0698, "step": 110 }, { "epoch": 0.050500454959053684, "grad_norm": 1.0487992691419916, "learning_rate": 9.997528310259485e-06, "loss": 0.1102, "step": 111 }, { "epoch": 0.050955414012738856, "grad_norm": 0.9112684449646818, "learning_rate": 9.997483169961852e-06, "loss": 0.1032, "step": 112 }, { "epoch": 0.05141037306642402, "grad_norm": 0.9418790141923585, "learning_rate": 9.997437621292463e-06, "loss": 0.0771, "step": 113 }, { "epoch": 0.051865332120109194, "grad_norm": 0.7796140692842074, "learning_rate": 9.99739166425504e-06, "loss": 0.0627, "step": 114 }, { "epoch": 0.05232029117379436, "grad_norm": 1.5434421216734795, "learning_rate": 9.997345298853339e-06, "loss": 0.1495, "step": 115 }, { "epoch": 0.052775250227479524, "grad_norm": 0.8898179660551836, "learning_rate": 9.997298525091148e-06, "loss": 0.0735, "step": 116 }, { "epoch": 0.053230209281164696, "grad_norm": 0.8585916871524272, "learning_rate": 9.997251342972288e-06, "loss": 0.068, "step": 117 }, { "epoch": 0.05368516833484986, "grad_norm": 0.812806800238708, "learning_rate": 9.997203752500616e-06, "loss": 0.0689, "step": 118 }, { "epoch": 0.054140127388535034, "grad_norm": 0.9677722064277628, "learning_rate": 9.997155753680021e-06, "loss": 0.0795, "step": 119 }, { "epoch": 0.0545950864422202, "grad_norm": 1.621934591654054, "learning_rate": 9.997107346514425e-06, "loss": 0.0707, "step": 120 }, { "epoch": 0.05505004549590537, "grad_norm": 0.6750452750311531, "learning_rate": 9.997058531007782e-06, "loss": 0.0588, "step": 121 }, { "epoch": 0.055505004549590536, "grad_norm": 0.9583870506818666, "learning_rate": 9.997009307164083e-06, "loss": 0.0859, "step": 122 }, { "epoch": 0.05595996360327571, "grad_norm": 1.247483970027119, "learning_rate": 9.99695967498735e-06, "loss": 0.0952, "step": 123 }, { "epoch": 0.056414922656960874, "grad_norm": 0.7937903902273558, "learning_rate": 9.996909634481639e-06, "loss": 0.0614, "step": 124 }, { "epoch": 0.05686988171064604, "grad_norm": 4.855426128828546, "learning_rate": 9.996859185651038e-06, "loss": 0.1629, "step": 125 }, { "epoch": 0.05732484076433121, "grad_norm": 1.0499970639607177, "learning_rate": 9.99680832849967e-06, "loss": 0.1031, "step": 126 }, { "epoch": 0.05777979981801638, "grad_norm": 0.8730447821488512, "learning_rate": 9.99675706303169e-06, "loss": 0.0606, "step": 127 }, { "epoch": 0.05823475887170155, "grad_norm": 1.2779985416162813, "learning_rate": 9.99670538925129e-06, "loss": 0.074, "step": 128 }, { "epoch": 0.058689717925386714, "grad_norm": 0.8606157718419157, "learning_rate": 9.996653307162687e-06, "loss": 0.0703, "step": 129 }, { "epoch": 0.059144676979071886, "grad_norm": 0.8920761218762643, "learning_rate": 9.996600816770144e-06, "loss": 0.0818, "step": 130 }, { "epoch": 0.05959963603275705, "grad_norm": 1.1603462045917847, "learning_rate": 9.996547918077944e-06, "loss": 0.1148, "step": 131 }, { "epoch": 0.06005459508644222, "grad_norm": 0.9108713801214797, "learning_rate": 9.996494611090414e-06, "loss": 0.0884, "step": 132 }, { "epoch": 0.06050955414012739, "grad_norm": 0.6523725468628359, "learning_rate": 9.996440895811907e-06, "loss": 0.0535, "step": 133 }, { "epoch": 0.060964513193812554, "grad_norm": 0.8812777694752004, "learning_rate": 9.996386772246816e-06, "loss": 0.087, "step": 134 }, { "epoch": 0.061419472247497726, "grad_norm": 1.0622191207422995, "learning_rate": 9.99633224039956e-06, "loss": 0.0982, "step": 135 }, { "epoch": 0.06187443130118289, "grad_norm": 3.7961077321923025, "learning_rate": 9.996277300274596e-06, "loss": 0.1526, "step": 136 }, { "epoch": 0.062329390354868064, "grad_norm": 0.9444433559435487, "learning_rate": 9.996221951876415e-06, "loss": 0.0996, "step": 137 }, { "epoch": 0.06278434940855324, "grad_norm": 1.444871481552235, "learning_rate": 9.996166195209539e-06, "loss": 0.1075, "step": 138 }, { "epoch": 0.0632393084622384, "grad_norm": 0.7446446480732116, "learning_rate": 9.996110030278522e-06, "loss": 0.0561, "step": 139 }, { "epoch": 0.06369426751592357, "grad_norm": 0.8913010543094952, "learning_rate": 9.996053457087958e-06, "loss": 0.0715, "step": 140 }, { "epoch": 0.06414922656960874, "grad_norm": 0.7815821404043856, "learning_rate": 9.995996475642466e-06, "loss": 0.0796, "step": 141 }, { "epoch": 0.0646041856232939, "grad_norm": 0.74337588448595, "learning_rate": 9.995939085946704e-06, "loss": 0.0661, "step": 142 }, { "epoch": 0.06505914467697907, "grad_norm": 0.9974255688753435, "learning_rate": 9.995881288005363e-06, "loss": 0.0869, "step": 143 }, { "epoch": 0.06551410373066424, "grad_norm": 1.2260290141946268, "learning_rate": 9.995823081823162e-06, "loss": 0.0766, "step": 144 }, { "epoch": 0.06596906278434941, "grad_norm": 0.9751795993584637, "learning_rate": 9.99576446740486e-06, "loss": 0.091, "step": 145 }, { "epoch": 0.06642402183803457, "grad_norm": 1.6175476325168967, "learning_rate": 9.995705444755249e-06, "loss": 0.1208, "step": 146 }, { "epoch": 0.06687898089171974, "grad_norm": 0.7580083688127299, "learning_rate": 9.995646013879147e-06, "loss": 0.0622, "step": 147 }, { "epoch": 0.06733393994540492, "grad_norm": 1.0194887039793072, "learning_rate": 9.995586174781413e-06, "loss": 0.0753, "step": 148 }, { "epoch": 0.06778889899909009, "grad_norm": 0.9065646408503975, "learning_rate": 9.995525927466936e-06, "loss": 0.0848, "step": 149 }, { "epoch": 0.06824385805277525, "grad_norm": 0.8871078738477127, "learning_rate": 9.995465271940641e-06, "loss": 0.0607, "step": 150 }, { "epoch": 0.06869881710646042, "grad_norm": 1.1486707652049646, "learning_rate": 9.995404208207485e-06, "loss": 0.0809, "step": 151 }, { "epoch": 0.06915377616014559, "grad_norm": 1.1473150526096232, "learning_rate": 9.995342736272453e-06, "loss": 0.1035, "step": 152 }, { "epoch": 0.06960873521383075, "grad_norm": 1.3025683052462544, "learning_rate": 9.995280856140572e-06, "loss": 0.1197, "step": 153 }, { "epoch": 0.07006369426751592, "grad_norm": 0.8069596755970996, "learning_rate": 9.9952185678169e-06, "loss": 0.0526, "step": 154 }, { "epoch": 0.0705186533212011, "grad_norm": 0.8153700064848134, "learning_rate": 9.995155871306524e-06, "loss": 0.0613, "step": 155 }, { "epoch": 0.07097361237488627, "grad_norm": 0.7319023745966868, "learning_rate": 9.995092766614567e-06, "loss": 0.0512, "step": 156 }, { "epoch": 0.07142857142857142, "grad_norm": 1.0146656175738817, "learning_rate": 9.995029253746186e-06, "loss": 0.0846, "step": 157 }, { "epoch": 0.0718835304822566, "grad_norm": 0.8015254985373994, "learning_rate": 9.994965332706574e-06, "loss": 0.0619, "step": 158 }, { "epoch": 0.07233848953594177, "grad_norm": 1.0630207312416284, "learning_rate": 9.994901003500952e-06, "loss": 0.0796, "step": 159 }, { "epoch": 0.07279344858962693, "grad_norm": 0.9431304991088505, "learning_rate": 9.994836266134575e-06, "loss": 0.0743, "step": 160 }, { "epoch": 0.0732484076433121, "grad_norm": 1.023738915097686, "learning_rate": 9.994771120612737e-06, "loss": 0.0888, "step": 161 }, { "epoch": 0.07370336669699727, "grad_norm": 0.9272637744585672, "learning_rate": 9.994705566940757e-06, "loss": 0.084, "step": 162 }, { "epoch": 0.07415832575068244, "grad_norm": 1.122378326253592, "learning_rate": 9.994639605123994e-06, "loss": 0.0961, "step": 163 }, { "epoch": 0.0746132848043676, "grad_norm": 0.753531768411978, "learning_rate": 9.994573235167839e-06, "loss": 0.0736, "step": 164 }, { "epoch": 0.07506824385805277, "grad_norm": 0.9314766958597749, "learning_rate": 9.994506457077715e-06, "loss": 0.0838, "step": 165 }, { "epoch": 0.07552320291173795, "grad_norm": 0.996008388557059, "learning_rate": 9.994439270859077e-06, "loss": 0.1076, "step": 166 }, { "epoch": 0.07597816196542312, "grad_norm": 0.9199332464612126, "learning_rate": 9.994371676517418e-06, "loss": 0.0724, "step": 167 }, { "epoch": 0.07643312101910828, "grad_norm": 0.8652292283168678, "learning_rate": 9.994303674058259e-06, "loss": 0.0628, "step": 168 }, { "epoch": 0.07688808007279345, "grad_norm": 0.8176262426438138, "learning_rate": 9.994235263487158e-06, "loss": 0.0743, "step": 169 }, { "epoch": 0.07734303912647862, "grad_norm": 0.8147855247941459, "learning_rate": 9.994166444809705e-06, "loss": 0.0559, "step": 170 }, { "epoch": 0.07779799818016378, "grad_norm": 0.7853019575635352, "learning_rate": 9.994097218031524e-06, "loss": 0.0681, "step": 171 }, { "epoch": 0.07825295723384895, "grad_norm": 0.8445610480134321, "learning_rate": 9.994027583158272e-06, "loss": 0.0785, "step": 172 }, { "epoch": 0.07870791628753412, "grad_norm": 0.8555498692388026, "learning_rate": 9.993957540195638e-06, "loss": 0.077, "step": 173 }, { "epoch": 0.0791628753412193, "grad_norm": 0.8281270493499452, "learning_rate": 9.993887089149346e-06, "loss": 0.0848, "step": 174 }, { "epoch": 0.07961783439490445, "grad_norm": 0.7180425978661062, "learning_rate": 9.993816230025152e-06, "loss": 0.0588, "step": 175 }, { "epoch": 0.08007279344858963, "grad_norm": 0.9287545326980071, "learning_rate": 9.99374496282885e-06, "loss": 0.0874, "step": 176 }, { "epoch": 0.0805277525022748, "grad_norm": 1.5950603980195528, "learning_rate": 9.993673287566261e-06, "loss": 0.1301, "step": 177 }, { "epoch": 0.08098271155595996, "grad_norm": 0.505966633973175, "learning_rate": 9.99360120424324e-06, "loss": 0.0459, "step": 178 }, { "epoch": 0.08143767060964513, "grad_norm": 0.6170796905443107, "learning_rate": 9.993528712865681e-06, "loss": 0.0666, "step": 179 }, { "epoch": 0.0818926296633303, "grad_norm": 0.8965600572228928, "learning_rate": 9.993455813439507e-06, "loss": 0.0648, "step": 180 }, { "epoch": 0.08234758871701547, "grad_norm": 0.7555745664692847, "learning_rate": 9.993382505970673e-06, "loss": 0.0479, "step": 181 }, { "epoch": 0.08280254777070063, "grad_norm": 0.7885826993774436, "learning_rate": 9.99330879046517e-06, "loss": 0.0605, "step": 182 }, { "epoch": 0.0832575068243858, "grad_norm": 0.6970911126559147, "learning_rate": 9.993234666929024e-06, "loss": 0.0545, "step": 183 }, { "epoch": 0.08371246587807098, "grad_norm": 0.8281240642020996, "learning_rate": 9.99316013536829e-06, "loss": 0.0651, "step": 184 }, { "epoch": 0.08416742493175614, "grad_norm": 0.8497823551734951, "learning_rate": 9.993085195789057e-06, "loss": 0.098, "step": 185 }, { "epoch": 0.08462238398544131, "grad_norm": 0.8425278224044996, "learning_rate": 9.993009848197452e-06, "loss": 0.0861, "step": 186 }, { "epoch": 0.08507734303912648, "grad_norm": 0.729342450692031, "learning_rate": 9.992934092599629e-06, "loss": 0.0651, "step": 187 }, { "epoch": 0.08553230209281165, "grad_norm": 0.8810253378927329, "learning_rate": 9.99285792900178e-06, "loss": 0.0995, "step": 188 }, { "epoch": 0.08598726114649681, "grad_norm": 1.0402457083445067, "learning_rate": 9.992781357410131e-06, "loss": 0.1061, "step": 189 }, { "epoch": 0.08644222020018198, "grad_norm": 0.7397036090930822, "learning_rate": 9.992704377830934e-06, "loss": 0.0571, "step": 190 }, { "epoch": 0.08689717925386715, "grad_norm": 1.4783630598693296, "learning_rate": 9.992626990270484e-06, "loss": 0.1154, "step": 191 }, { "epoch": 0.08735213830755233, "grad_norm": 1.1100322283473036, "learning_rate": 9.992549194735101e-06, "loss": 0.1179, "step": 192 }, { "epoch": 0.08780709736123748, "grad_norm": 0.5797984556503705, "learning_rate": 9.992470991231144e-06, "loss": 0.0466, "step": 193 }, { "epoch": 0.08826205641492266, "grad_norm": 1.059908713900853, "learning_rate": 9.992392379765005e-06, "loss": 0.0994, "step": 194 }, { "epoch": 0.08871701546860783, "grad_norm": 1.1187885391430794, "learning_rate": 9.992313360343104e-06, "loss": 0.0986, "step": 195 }, { "epoch": 0.08917197452229299, "grad_norm": 0.7509441330173129, "learning_rate": 9.992233932971901e-06, "loss": 0.0634, "step": 196 }, { "epoch": 0.08962693357597816, "grad_norm": 0.9426276516690344, "learning_rate": 9.992154097657888e-06, "loss": 0.0857, "step": 197 }, { "epoch": 0.09008189262966333, "grad_norm": 0.8754039034503873, "learning_rate": 9.992073854407585e-06, "loss": 0.0881, "step": 198 }, { "epoch": 0.0905368516833485, "grad_norm": 2.8697219156120712, "learning_rate": 9.99199320322755e-06, "loss": 0.0851, "step": 199 }, { "epoch": 0.09099181073703366, "grad_norm": 0.7429242681646778, "learning_rate": 9.991912144124375e-06, "loss": 0.0729, "step": 200 }, { "epoch": 0.09144676979071883, "grad_norm": 1.0552979449251756, "learning_rate": 9.991830677104682e-06, "loss": 0.1066, "step": 201 }, { "epoch": 0.09190172884440401, "grad_norm": 0.8812651371324355, "learning_rate": 9.99174880217513e-06, "loss": 0.0732, "step": 202 }, { "epoch": 0.09235668789808917, "grad_norm": 1.0755107845413352, "learning_rate": 9.991666519342407e-06, "loss": 0.0977, "step": 203 }, { "epoch": 0.09281164695177434, "grad_norm": 0.8925063431256136, "learning_rate": 9.99158382861324e-06, "loss": 0.0904, "step": 204 }, { "epoch": 0.09326660600545951, "grad_norm": 0.8190206986922173, "learning_rate": 9.991500729994384e-06, "loss": 0.0729, "step": 205 }, { "epoch": 0.09372156505914468, "grad_norm": 0.6635798147425112, "learning_rate": 9.991417223492629e-06, "loss": 0.0631, "step": 206 }, { "epoch": 0.09417652411282984, "grad_norm": 1.0314655306023923, "learning_rate": 9.991333309114798e-06, "loss": 0.0852, "step": 207 }, { "epoch": 0.09463148316651501, "grad_norm": 0.8533496857694978, "learning_rate": 9.991248986867753e-06, "loss": 0.0868, "step": 208 }, { "epoch": 0.09508644222020018, "grad_norm": 1.039085255997433, "learning_rate": 9.991164256758378e-06, "loss": 0.095, "step": 209 }, { "epoch": 0.09554140127388536, "grad_norm": 1.1484522866350177, "learning_rate": 9.9910791187936e-06, "loss": 0.1333, "step": 210 }, { "epoch": 0.09599636032757052, "grad_norm": 0.8277820800102422, "learning_rate": 9.99099357298038e-06, "loss": 0.0664, "step": 211 }, { "epoch": 0.09645131938125569, "grad_norm": 0.821796111319934, "learning_rate": 9.9909076193257e-06, "loss": 0.083, "step": 212 }, { "epoch": 0.09690627843494086, "grad_norm": 0.9448800546720313, "learning_rate": 9.990821257836589e-06, "loss": 0.0873, "step": 213 }, { "epoch": 0.09736123748862602, "grad_norm": 0.9002810379340489, "learning_rate": 9.990734488520103e-06, "loss": 0.099, "step": 214 }, { "epoch": 0.09781619654231119, "grad_norm": 0.6145149717344348, "learning_rate": 9.990647311383334e-06, "loss": 0.0425, "step": 215 }, { "epoch": 0.09827115559599636, "grad_norm": 1.1377497370761045, "learning_rate": 9.990559726433404e-06, "loss": 0.0903, "step": 216 }, { "epoch": 0.09872611464968153, "grad_norm": 0.8401357673155365, "learning_rate": 9.99047173367747e-06, "loss": 0.0812, "step": 217 }, { "epoch": 0.09918107370336669, "grad_norm": 0.6977882365614015, "learning_rate": 9.990383333122722e-06, "loss": 0.0613, "step": 218 }, { "epoch": 0.09963603275705187, "grad_norm": 0.6751056796776193, "learning_rate": 9.990294524776384e-06, "loss": 0.0636, "step": 219 }, { "epoch": 0.10009099181073704, "grad_norm": 0.7973250315161167, "learning_rate": 9.990205308645716e-06, "loss": 0.0655, "step": 220 }, { "epoch": 0.1005459508644222, "grad_norm": 0.6494979859380491, "learning_rate": 9.990115684738005e-06, "loss": 0.0461, "step": 221 }, { "epoch": 0.10100090991810737, "grad_norm": 0.7863907355652456, "learning_rate": 9.990025653060574e-06, "loss": 0.0881, "step": 222 }, { "epoch": 0.10145586897179254, "grad_norm": 1.2756737972223395, "learning_rate": 9.98993521362078e-06, "loss": 0.1102, "step": 223 }, { "epoch": 0.10191082802547771, "grad_norm": 1.1992554133605928, "learning_rate": 9.989844366426018e-06, "loss": 0.1147, "step": 224 }, { "epoch": 0.10236578707916287, "grad_norm": 0.5034605400337953, "learning_rate": 9.989753111483707e-06, "loss": 0.0462, "step": 225 }, { "epoch": 0.10282074613284804, "grad_norm": 0.9881921480518578, "learning_rate": 9.989661448801305e-06, "loss": 0.0848, "step": 226 }, { "epoch": 0.10327570518653321, "grad_norm": 0.7581777568438945, "learning_rate": 9.989569378386303e-06, "loss": 0.079, "step": 227 }, { "epoch": 0.10373066424021839, "grad_norm": 0.6464731162067388, "learning_rate": 9.989476900246223e-06, "loss": 0.0617, "step": 228 }, { "epoch": 0.10418562329390355, "grad_norm": 0.8780639185859085, "learning_rate": 9.989384014388624e-06, "loss": 0.086, "step": 229 }, { "epoch": 0.10464058234758872, "grad_norm": 0.6623808171307163, "learning_rate": 9.989290720821095e-06, "loss": 0.0694, "step": 230 }, { "epoch": 0.10509554140127389, "grad_norm": 0.721054554263859, "learning_rate": 9.98919701955126e-06, "loss": 0.0735, "step": 231 }, { "epoch": 0.10555050045495905, "grad_norm": 0.7868134014829404, "learning_rate": 9.989102910586776e-06, "loss": 0.0546, "step": 232 }, { "epoch": 0.10600545950864422, "grad_norm": 0.9137158371163484, "learning_rate": 9.989008393935331e-06, "loss": 0.0771, "step": 233 }, { "epoch": 0.10646041856232939, "grad_norm": 0.8326009579593463, "learning_rate": 9.98891346960465e-06, "loss": 0.0667, "step": 234 }, { "epoch": 0.10691537761601456, "grad_norm": 0.6462724580348628, "learning_rate": 9.988818137602494e-06, "loss": 0.0717, "step": 235 }, { "epoch": 0.10737033666969972, "grad_norm": 0.7513725247558808, "learning_rate": 9.988722397936646e-06, "loss": 0.0733, "step": 236 }, { "epoch": 0.1078252957233849, "grad_norm": 1.094509848236789, "learning_rate": 9.988626250614932e-06, "loss": 0.1009, "step": 237 }, { "epoch": 0.10828025477707007, "grad_norm": 0.8200579138639758, "learning_rate": 9.98852969564521e-06, "loss": 0.0844, "step": 238 }, { "epoch": 0.10873521383075523, "grad_norm": 0.7417763562196316, "learning_rate": 9.988432733035369e-06, "loss": 0.0611, "step": 239 }, { "epoch": 0.1091901728844404, "grad_norm": 0.8476475869820355, "learning_rate": 9.988335362793333e-06, "loss": 0.0863, "step": 240 }, { "epoch": 0.10964513193812557, "grad_norm": 0.9998642783878469, "learning_rate": 9.988237584927058e-06, "loss": 0.0909, "step": 241 }, { "epoch": 0.11010009099181074, "grad_norm": 1.1689324698997519, "learning_rate": 9.988139399444534e-06, "loss": 0.124, "step": 242 }, { "epoch": 0.1105550500454959, "grad_norm": 0.790901332269412, "learning_rate": 9.988040806353786e-06, "loss": 0.0855, "step": 243 }, { "epoch": 0.11101000909918107, "grad_norm": 0.8931785977847209, "learning_rate": 9.987941805662869e-06, "loss": 0.1023, "step": 244 }, { "epoch": 0.11146496815286625, "grad_norm": 0.7352781929773609, "learning_rate": 9.98784239737987e-06, "loss": 0.0563, "step": 245 }, { "epoch": 0.11191992720655142, "grad_norm": 0.7169092611535308, "learning_rate": 9.987742581512919e-06, "loss": 0.0683, "step": 246 }, { "epoch": 0.11237488626023658, "grad_norm": 0.6767560569792272, "learning_rate": 9.987642358070167e-06, "loss": 0.0669, "step": 247 }, { "epoch": 0.11282984531392175, "grad_norm": 0.8442319805699996, "learning_rate": 9.987541727059805e-06, "loss": 0.0768, "step": 248 }, { "epoch": 0.11328480436760692, "grad_norm": 0.7700876798522618, "learning_rate": 9.987440688490058e-06, "loss": 0.0643, "step": 249 }, { "epoch": 0.11373976342129208, "grad_norm": 0.7286087978317647, "learning_rate": 9.98733924236918e-06, "loss": 0.0698, "step": 250 }, { "epoch": 0.11419472247497725, "grad_norm": 0.7917355018437868, "learning_rate": 9.98723738870546e-06, "loss": 0.0791, "step": 251 }, { "epoch": 0.11464968152866242, "grad_norm": 1.0469499693242315, "learning_rate": 9.987135127507226e-06, "loss": 0.0761, "step": 252 }, { "epoch": 0.1151046405823476, "grad_norm": 0.8361714930383379, "learning_rate": 9.987032458782828e-06, "loss": 0.0789, "step": 253 }, { "epoch": 0.11555959963603275, "grad_norm": 0.5902853873046482, "learning_rate": 9.986929382540662e-06, "loss": 0.0479, "step": 254 }, { "epoch": 0.11601455868971793, "grad_norm": 0.7349436304465384, "learning_rate": 9.986825898789145e-06, "loss": 0.0668, "step": 255 }, { "epoch": 0.1164695177434031, "grad_norm": 0.7657107039148755, "learning_rate": 9.986722007536737e-06, "loss": 0.0617, "step": 256 }, { "epoch": 0.11692447679708826, "grad_norm": 0.6450631027744769, "learning_rate": 9.986617708791926e-06, "loss": 0.0679, "step": 257 }, { "epoch": 0.11737943585077343, "grad_norm": 0.6292930010016882, "learning_rate": 9.986513002563236e-06, "loss": 0.0482, "step": 258 }, { "epoch": 0.1178343949044586, "grad_norm": 0.8758541343517451, "learning_rate": 9.986407888859221e-06, "loss": 0.0994, "step": 259 }, { "epoch": 0.11828935395814377, "grad_norm": 0.6537445862223847, "learning_rate": 9.986302367688473e-06, "loss": 0.07, "step": 260 }, { "epoch": 0.11874431301182893, "grad_norm": 0.8029660816844667, "learning_rate": 9.986196439059613e-06, "loss": 0.0623, "step": 261 }, { "epoch": 0.1191992720655141, "grad_norm": 0.7339528606524214, "learning_rate": 9.986090102981297e-06, "loss": 0.0791, "step": 262 }, { "epoch": 0.11965423111919928, "grad_norm": 0.7934112522002073, "learning_rate": 9.985983359462215e-06, "loss": 0.0672, "step": 263 }, { "epoch": 0.12010919017288443, "grad_norm": 1.0186962263060808, "learning_rate": 9.98587620851109e-06, "loss": 0.1213, "step": 264 }, { "epoch": 0.1205641492265696, "grad_norm": 0.6769843647605545, "learning_rate": 9.985768650136679e-06, "loss": 0.0685, "step": 265 }, { "epoch": 0.12101910828025478, "grad_norm": 0.7543020935976431, "learning_rate": 9.985660684347765e-06, "loss": 0.0861, "step": 266 }, { "epoch": 0.12147406733393995, "grad_norm": 0.9552124731299731, "learning_rate": 9.985552311153178e-06, "loss": 0.0922, "step": 267 }, { "epoch": 0.12192902638762511, "grad_norm": 0.7436699167226903, "learning_rate": 9.985443530561769e-06, "loss": 0.0885, "step": 268 }, { "epoch": 0.12238398544131028, "grad_norm": 1.329058937551934, "learning_rate": 9.98533434258243e-06, "loss": 0.1115, "step": 269 }, { "epoch": 0.12283894449499545, "grad_norm": 0.6835909813818813, "learning_rate": 9.985224747224083e-06, "loss": 0.0586, "step": 270 }, { "epoch": 0.12329390354868063, "grad_norm": 1.0733107060854794, "learning_rate": 9.98511474449568e-06, "loss": 0.0811, "step": 271 }, { "epoch": 0.12374886260236578, "grad_norm": 0.5916007278667166, "learning_rate": 9.985004334406215e-06, "loss": 0.0696, "step": 272 }, { "epoch": 0.12420382165605096, "grad_norm": 0.9149357508392912, "learning_rate": 9.984893516964707e-06, "loss": 0.0704, "step": 273 }, { "epoch": 0.12465878070973613, "grad_norm": 1.1634742377762608, "learning_rate": 9.984782292180212e-06, "loss": 0.1178, "step": 274 }, { "epoch": 0.1251137397634213, "grad_norm": 0.603957454908005, "learning_rate": 9.98467066006182e-06, "loss": 0.0585, "step": 275 }, { "epoch": 0.12556869881710647, "grad_norm": 0.7735087790025026, "learning_rate": 9.984558620618651e-06, "loss": 0.0953, "step": 276 }, { "epoch": 0.12602365787079162, "grad_norm": 1.2570182633873541, "learning_rate": 9.984446173859863e-06, "loss": 0.1353, "step": 277 }, { "epoch": 0.1264786169244768, "grad_norm": 0.7275895818672663, "learning_rate": 9.984333319794642e-06, "loss": 0.0774, "step": 278 }, { "epoch": 0.12693357597816196, "grad_norm": 0.6395006056363333, "learning_rate": 9.984220058432212e-06, "loss": 0.0591, "step": 279 }, { "epoch": 0.12738853503184713, "grad_norm": 0.6563921850032347, "learning_rate": 9.984106389781828e-06, "loss": 0.0573, "step": 280 }, { "epoch": 0.1278434940855323, "grad_norm": 0.9399157526953884, "learning_rate": 9.983992313852776e-06, "loss": 0.0793, "step": 281 }, { "epoch": 0.12829845313921748, "grad_norm": 0.93528061821534, "learning_rate": 9.983877830654381e-06, "loss": 0.0807, "step": 282 }, { "epoch": 0.12875341219290265, "grad_norm": 0.7192448233352142, "learning_rate": 9.983762940195996e-06, "loss": 0.0773, "step": 283 }, { "epoch": 0.1292083712465878, "grad_norm": 0.7097381072031733, "learning_rate": 9.98364764248701e-06, "loss": 0.0698, "step": 284 }, { "epoch": 0.12966333030027297, "grad_norm": 1.1635566012920768, "learning_rate": 9.983531937536844e-06, "loss": 0.0893, "step": 285 }, { "epoch": 0.13011828935395814, "grad_norm": 0.8456555685011555, "learning_rate": 9.983415825354954e-06, "loss": 0.0628, "step": 286 }, { "epoch": 0.1305732484076433, "grad_norm": 0.7151838393189083, "learning_rate": 9.983299305950828e-06, "loss": 0.0557, "step": 287 }, { "epoch": 0.13102820746132848, "grad_norm": 0.7095193783870621, "learning_rate": 9.983182379333989e-06, "loss": 0.0604, "step": 288 }, { "epoch": 0.13148316651501366, "grad_norm": 0.8581434444337498, "learning_rate": 9.983065045513986e-06, "loss": 0.0781, "step": 289 }, { "epoch": 0.13193812556869883, "grad_norm": 0.5600994934804626, "learning_rate": 9.982947304500414e-06, "loss": 0.0498, "step": 290 }, { "epoch": 0.13239308462238397, "grad_norm": 0.7355720212694087, "learning_rate": 9.98282915630289e-06, "loss": 0.0692, "step": 291 }, { "epoch": 0.13284804367606914, "grad_norm": 1.6846985851500909, "learning_rate": 9.98271060093107e-06, "loss": 0.1687, "step": 292 }, { "epoch": 0.13330300272975432, "grad_norm": 0.7959406174268434, "learning_rate": 9.98259163839464e-06, "loss": 0.0718, "step": 293 }, { "epoch": 0.1337579617834395, "grad_norm": 0.6005858848115938, "learning_rate": 9.982472268703323e-06, "loss": 0.0465, "step": 294 }, { "epoch": 0.13421292083712466, "grad_norm": 0.7865103977061746, "learning_rate": 9.982352491866874e-06, "loss": 0.071, "step": 295 }, { "epoch": 0.13466787989080983, "grad_norm": 0.7167219429964851, "learning_rate": 9.982232307895077e-06, "loss": 0.0658, "step": 296 }, { "epoch": 0.135122838944495, "grad_norm": 1.206398567596641, "learning_rate": 9.982111716797758e-06, "loss": 0.101, "step": 297 }, { "epoch": 0.13557779799818018, "grad_norm": 1.0085912508470862, "learning_rate": 9.981990718584768e-06, "loss": 0.0959, "step": 298 }, { "epoch": 0.13603275705186532, "grad_norm": 0.8594135430057543, "learning_rate": 9.981869313265995e-06, "loss": 0.0912, "step": 299 }, { "epoch": 0.1364877161055505, "grad_norm": 0.9903339586980618, "learning_rate": 9.981747500851357e-06, "loss": 0.0692, "step": 300 }, { "epoch": 0.13694267515923567, "grad_norm": 0.7623380548666351, "learning_rate": 9.981625281350812e-06, "loss": 0.0699, "step": 301 }, { "epoch": 0.13739763421292084, "grad_norm": 0.6267143484055344, "learning_rate": 9.981502654774349e-06, "loss": 0.0499, "step": 302 }, { "epoch": 0.137852593266606, "grad_norm": 0.8234150836820757, "learning_rate": 9.98137962113198e-06, "loss": 0.0788, "step": 303 }, { "epoch": 0.13830755232029118, "grad_norm": 0.8158733102806115, "learning_rate": 9.98125618043377e-06, "loss": 0.089, "step": 304 }, { "epoch": 0.13876251137397635, "grad_norm": 0.6372656549463032, "learning_rate": 9.981132332689796e-06, "loss": 0.0517, "step": 305 }, { "epoch": 0.1392174704276615, "grad_norm": 0.7713863813548327, "learning_rate": 9.981008077910184e-06, "loss": 0.0769, "step": 306 }, { "epoch": 0.13967242948134667, "grad_norm": 0.8883775702857831, "learning_rate": 9.980883416105084e-06, "loss": 0.0828, "step": 307 }, { "epoch": 0.14012738853503184, "grad_norm": 0.6490936355626988, "learning_rate": 9.980758347284687e-06, "loss": 0.0618, "step": 308 }, { "epoch": 0.14058234758871702, "grad_norm": 0.8359554084586713, "learning_rate": 9.980632871459209e-06, "loss": 0.0714, "step": 309 }, { "epoch": 0.1410373066424022, "grad_norm": 0.7373523328454649, "learning_rate": 9.980506988638906e-06, "loss": 0.0836, "step": 310 }, { "epoch": 0.14149226569608736, "grad_norm": 0.6644370731485183, "learning_rate": 9.980380698834064e-06, "loss": 0.0777, "step": 311 }, { "epoch": 0.14194722474977253, "grad_norm": 0.870883965477211, "learning_rate": 9.980254002055003e-06, "loss": 0.0847, "step": 312 }, { "epoch": 0.14240218380345768, "grad_norm": 0.6021065409531002, "learning_rate": 9.980126898312074e-06, "loss": 0.0583, "step": 313 }, { "epoch": 0.14285714285714285, "grad_norm": 0.8705461588189498, "learning_rate": 9.979999387615665e-06, "loss": 0.0895, "step": 314 }, { "epoch": 0.14331210191082802, "grad_norm": 0.9639410731114018, "learning_rate": 9.979871469976197e-06, "loss": 0.0901, "step": 315 }, { "epoch": 0.1437670609645132, "grad_norm": 0.7554126383153169, "learning_rate": 9.97974314540412e-06, "loss": 0.0699, "step": 316 }, { "epoch": 0.14422202001819837, "grad_norm": 1.1039648440512544, "learning_rate": 9.979614413909922e-06, "loss": 0.1013, "step": 317 }, { "epoch": 0.14467697907188354, "grad_norm": 0.5258831871743486, "learning_rate": 9.979485275504121e-06, "loss": 0.0544, "step": 318 }, { "epoch": 0.1451319381255687, "grad_norm": 1.3025897394440575, "learning_rate": 9.979355730197271e-06, "loss": 0.1067, "step": 319 }, { "epoch": 0.14558689717925385, "grad_norm": 0.5206132423310033, "learning_rate": 9.979225777999956e-06, "loss": 0.0497, "step": 320 }, { "epoch": 0.14604185623293903, "grad_norm": 0.7202189397663867, "learning_rate": 9.9790954189228e-06, "loss": 0.0807, "step": 321 }, { "epoch": 0.1464968152866242, "grad_norm": 0.5738667169449175, "learning_rate": 9.97896465297645e-06, "loss": 0.0614, "step": 322 }, { "epoch": 0.14695177434030937, "grad_norm": 0.7972440737628133, "learning_rate": 9.978833480171592e-06, "loss": 0.0906, "step": 323 }, { "epoch": 0.14740673339399454, "grad_norm": 0.7697423454053598, "learning_rate": 9.978701900518947e-06, "loss": 0.0632, "step": 324 }, { "epoch": 0.14786169244767972, "grad_norm": 0.8259885564233931, "learning_rate": 9.978569914029267e-06, "loss": 0.0944, "step": 325 }, { "epoch": 0.1483166515013649, "grad_norm": 0.8450006655868962, "learning_rate": 9.978437520713335e-06, "loss": 0.0862, "step": 326 }, { "epoch": 0.14877161055505003, "grad_norm": 0.7746078278616594, "learning_rate": 9.978304720581973e-06, "loss": 0.088, "step": 327 }, { "epoch": 0.1492265696087352, "grad_norm": 0.9977734940815816, "learning_rate": 9.97817151364603e-06, "loss": 0.1036, "step": 328 }, { "epoch": 0.14968152866242038, "grad_norm": 0.7800752301510507, "learning_rate": 9.978037899916393e-06, "loss": 0.0778, "step": 329 }, { "epoch": 0.15013648771610555, "grad_norm": 0.7521153273438224, "learning_rate": 9.97790387940398e-06, "loss": 0.0532, "step": 330 }, { "epoch": 0.15059144676979072, "grad_norm": 0.8046420256419254, "learning_rate": 9.977769452119741e-06, "loss": 0.0708, "step": 331 }, { "epoch": 0.1510464058234759, "grad_norm": 0.9071770528791517, "learning_rate": 9.97763461807466e-06, "loss": 0.1006, "step": 332 }, { "epoch": 0.15150136487716107, "grad_norm": 0.8824570234268595, "learning_rate": 9.97749937727976e-06, "loss": 0.0855, "step": 333 }, { "epoch": 0.15195632393084624, "grad_norm": 0.8286075823730068, "learning_rate": 9.977363729746088e-06, "loss": 0.077, "step": 334 }, { "epoch": 0.15241128298453138, "grad_norm": 0.6791233851472963, "learning_rate": 9.977227675484729e-06, "loss": 0.0698, "step": 335 }, { "epoch": 0.15286624203821655, "grad_norm": 0.9813875260679181, "learning_rate": 9.977091214506803e-06, "loss": 0.0838, "step": 336 }, { "epoch": 0.15332120109190173, "grad_norm": 0.9986284190120469, "learning_rate": 9.976954346823456e-06, "loss": 0.0789, "step": 337 }, { "epoch": 0.1537761601455869, "grad_norm": 0.6456071732838817, "learning_rate": 9.976817072445878e-06, "loss": 0.0566, "step": 338 }, { "epoch": 0.15423111919927207, "grad_norm": 0.7707362352402762, "learning_rate": 9.976679391385283e-06, "loss": 0.0677, "step": 339 }, { "epoch": 0.15468607825295724, "grad_norm": 0.5804713825378958, "learning_rate": 9.976541303652923e-06, "loss": 0.0547, "step": 340 }, { "epoch": 0.15514103730664242, "grad_norm": 0.7705377953828665, "learning_rate": 9.976402809260083e-06, "loss": 0.0673, "step": 341 }, { "epoch": 0.15559599636032756, "grad_norm": 0.651002355082985, "learning_rate": 9.976263908218076e-06, "loss": 0.066, "step": 342 }, { "epoch": 0.15605095541401273, "grad_norm": 1.0075230687249708, "learning_rate": 9.976124600538257e-06, "loss": 0.1151, "step": 343 }, { "epoch": 0.1565059144676979, "grad_norm": 0.7110146200064966, "learning_rate": 9.975984886232006e-06, "loss": 0.0693, "step": 344 }, { "epoch": 0.15696087352138308, "grad_norm": 0.782615076662302, "learning_rate": 9.975844765310743e-06, "loss": 0.071, "step": 345 }, { "epoch": 0.15741583257506825, "grad_norm": 1.091513822496144, "learning_rate": 9.975704237785915e-06, "loss": 0.1277, "step": 346 }, { "epoch": 0.15787079162875342, "grad_norm": 0.8244942271322709, "learning_rate": 9.975563303669006e-06, "loss": 0.092, "step": 347 }, { "epoch": 0.1583257506824386, "grad_norm": 1.0997264747524325, "learning_rate": 9.975421962971536e-06, "loss": 0.102, "step": 348 }, { "epoch": 0.15878070973612374, "grad_norm": 1.0471722358260585, "learning_rate": 9.97528021570505e-06, "loss": 0.1112, "step": 349 }, { "epoch": 0.1592356687898089, "grad_norm": 0.6366013160292697, "learning_rate": 9.975138061881135e-06, "loss": 0.0629, "step": 350 }, { "epoch": 0.15969062784349408, "grad_norm": 0.7145502784859615, "learning_rate": 9.974995501511404e-06, "loss": 0.0567, "step": 351 }, { "epoch": 0.16014558689717925, "grad_norm": 1.0825694007542435, "learning_rate": 9.974852534607506e-06, "loss": 0.0897, "step": 352 }, { "epoch": 0.16060054595086443, "grad_norm": 0.8874195306329471, "learning_rate": 9.974709161181126e-06, "loss": 0.0879, "step": 353 }, { "epoch": 0.1610555050045496, "grad_norm": 0.8193025449594961, "learning_rate": 9.974565381243982e-06, "loss": 0.0969, "step": 354 }, { "epoch": 0.16151046405823477, "grad_norm": 0.76528422131405, "learning_rate": 9.974421194807815e-06, "loss": 0.0786, "step": 355 }, { "epoch": 0.16196542311191992, "grad_norm": 0.8836543328533641, "learning_rate": 9.974276601884416e-06, "loss": 0.0744, "step": 356 }, { "epoch": 0.1624203821656051, "grad_norm": 0.7482952108426273, "learning_rate": 9.974131602485596e-06, "loss": 0.0772, "step": 357 }, { "epoch": 0.16287534121929026, "grad_norm": 0.9122723647083647, "learning_rate": 9.973986196623203e-06, "loss": 0.0851, "step": 358 }, { "epoch": 0.16333030027297543, "grad_norm": 0.8373653902978805, "learning_rate": 9.973840384309121e-06, "loss": 0.0865, "step": 359 }, { "epoch": 0.1637852593266606, "grad_norm": 0.6360069343077157, "learning_rate": 9.973694165555264e-06, "loss": 0.0618, "step": 360 }, { "epoch": 0.16424021838034578, "grad_norm": 0.7967304456611868, "learning_rate": 9.973547540373582e-06, "loss": 0.0865, "step": 361 }, { "epoch": 0.16469517743403095, "grad_norm": 1.1699452577832765, "learning_rate": 9.973400508776054e-06, "loss": 0.1144, "step": 362 }, { "epoch": 0.1651501364877161, "grad_norm": 0.6282867599706373, "learning_rate": 9.973253070774698e-06, "loss": 0.0633, "step": 363 }, { "epoch": 0.16560509554140126, "grad_norm": 0.79942272506218, "learning_rate": 9.973105226381559e-06, "loss": 0.069, "step": 364 }, { "epoch": 0.16606005459508644, "grad_norm": 0.9348674828410355, "learning_rate": 9.972956975608719e-06, "loss": 0.1019, "step": 365 }, { "epoch": 0.1665150136487716, "grad_norm": 1.0942665884463076, "learning_rate": 9.972808318468292e-06, "loss": 0.0859, "step": 366 }, { "epoch": 0.16696997270245678, "grad_norm": 0.6283579225277517, "learning_rate": 9.972659254972426e-06, "loss": 0.0589, "step": 367 }, { "epoch": 0.16742493175614195, "grad_norm": 1.0989677054167046, "learning_rate": 9.972509785133304e-06, "loss": 0.1081, "step": 368 }, { "epoch": 0.16787989080982713, "grad_norm": 0.7310198219540203, "learning_rate": 9.972359908963137e-06, "loss": 0.0675, "step": 369 }, { "epoch": 0.16833484986351227, "grad_norm": 0.757671629194488, "learning_rate": 9.972209626474172e-06, "loss": 0.0734, "step": 370 }, { "epoch": 0.16878980891719744, "grad_norm": 0.7966175159886519, "learning_rate": 9.972058937678692e-06, "loss": 0.075, "step": 371 }, { "epoch": 0.16924476797088261, "grad_norm": 0.9805514159267839, "learning_rate": 9.97190784258901e-06, "loss": 0.1071, "step": 372 }, { "epoch": 0.1696997270245678, "grad_norm": 0.7000612574442994, "learning_rate": 9.971756341217471e-06, "loss": 0.0526, "step": 373 }, { "epoch": 0.17015468607825296, "grad_norm": 0.7917466702374949, "learning_rate": 9.971604433576456e-06, "loss": 0.0698, "step": 374 }, { "epoch": 0.17060964513193813, "grad_norm": 0.8412692631182211, "learning_rate": 9.97145211967838e-06, "loss": 0.0783, "step": 375 }, { "epoch": 0.1710646041856233, "grad_norm": 0.5615038895232536, "learning_rate": 9.971299399535685e-06, "loss": 0.053, "step": 376 }, { "epoch": 0.17151956323930848, "grad_norm": 0.6849745369298482, "learning_rate": 9.971146273160854e-06, "loss": 0.0774, "step": 377 }, { "epoch": 0.17197452229299362, "grad_norm": 0.6466596777060115, "learning_rate": 9.9709927405664e-06, "loss": 0.0606, "step": 378 }, { "epoch": 0.1724294813466788, "grad_norm": 0.7169884074840761, "learning_rate": 9.970838801764866e-06, "loss": 0.0839, "step": 379 }, { "epoch": 0.17288444040036396, "grad_norm": 0.9393396355410675, "learning_rate": 9.970684456768836e-06, "loss": 0.1132, "step": 380 }, { "epoch": 0.17333939945404914, "grad_norm": 12.197098173453568, "learning_rate": 9.970529705590918e-06, "loss": 0.4858, "step": 381 }, { "epoch": 0.1737943585077343, "grad_norm": 0.7355841274771772, "learning_rate": 9.97037454824376e-06, "loss": 0.0714, "step": 382 }, { "epoch": 0.17424931756141948, "grad_norm": 1.050385265783733, "learning_rate": 9.97021898474004e-06, "loss": 0.1024, "step": 383 }, { "epoch": 0.17470427661510465, "grad_norm": 0.8612087678995594, "learning_rate": 9.970063015092469e-06, "loss": 0.085, "step": 384 }, { "epoch": 0.1751592356687898, "grad_norm": 1.3886472100476919, "learning_rate": 9.969906639313793e-06, "loss": 0.1212, "step": 385 }, { "epoch": 0.17561419472247497, "grad_norm": 0.8238176964814595, "learning_rate": 9.96974985741679e-06, "loss": 0.0721, "step": 386 }, { "epoch": 0.17606915377616014, "grad_norm": 0.8718897735731601, "learning_rate": 9.969592669414272e-06, "loss": 0.0959, "step": 387 }, { "epoch": 0.17652411282984531, "grad_norm": 6.796752422837202, "learning_rate": 9.969435075319083e-06, "loss": 0.115, "step": 388 }, { "epoch": 0.1769790718835305, "grad_norm": 0.58176536820322, "learning_rate": 9.969277075144104e-06, "loss": 0.0459, "step": 389 }, { "epoch": 0.17743403093721566, "grad_norm": 0.7267253435076165, "learning_rate": 9.969118668902242e-06, "loss": 0.07, "step": 390 }, { "epoch": 0.17788898999090083, "grad_norm": 0.7682389367523258, "learning_rate": 9.968959856606442e-06, "loss": 0.0542, "step": 391 }, { "epoch": 0.17834394904458598, "grad_norm": 0.7873348185837048, "learning_rate": 9.968800638269682e-06, "loss": 0.0598, "step": 392 }, { "epoch": 0.17879890809827115, "grad_norm": 1.287713292390112, "learning_rate": 9.968641013904974e-06, "loss": 0.1442, "step": 393 }, { "epoch": 0.17925386715195632, "grad_norm": 1.085650814952146, "learning_rate": 9.968480983525359e-06, "loss": 0.0926, "step": 394 }, { "epoch": 0.1797088262056415, "grad_norm": 0.6716676596759695, "learning_rate": 9.968320547143918e-06, "loss": 0.0767, "step": 395 }, { "epoch": 0.18016378525932666, "grad_norm": 0.8467396807693714, "learning_rate": 9.968159704773757e-06, "loss": 0.0977, "step": 396 }, { "epoch": 0.18061874431301184, "grad_norm": 0.6438855833782786, "learning_rate": 9.967998456428021e-06, "loss": 0.0586, "step": 397 }, { "epoch": 0.181073703366697, "grad_norm": 0.7254140122399564, "learning_rate": 9.967836802119886e-06, "loss": 0.06, "step": 398 }, { "epoch": 0.18152866242038215, "grad_norm": 0.87517545358881, "learning_rate": 9.967674741862563e-06, "loss": 0.1016, "step": 399 }, { "epoch": 0.18198362147406733, "grad_norm": 1.0624206936058178, "learning_rate": 9.967512275669294e-06, "loss": 0.1296, "step": 400 }, { "epoch": 0.1824385805277525, "grad_norm": 1.0284720738314184, "learning_rate": 9.967349403553353e-06, "loss": 0.0862, "step": 401 }, { "epoch": 0.18289353958143767, "grad_norm": 0.8342932737384292, "learning_rate": 9.967186125528053e-06, "loss": 0.0873, "step": 402 }, { "epoch": 0.18334849863512284, "grad_norm": 1.543095569701571, "learning_rate": 9.967022441606734e-06, "loss": 0.1209, "step": 403 }, { "epoch": 0.18380345768880801, "grad_norm": 0.70731586616612, "learning_rate": 9.966858351802773e-06, "loss": 0.0726, "step": 404 }, { "epoch": 0.1842584167424932, "grad_norm": 0.6660531988680356, "learning_rate": 9.966693856129576e-06, "loss": 0.0562, "step": 405 }, { "epoch": 0.18471337579617833, "grad_norm": 0.8503640969928286, "learning_rate": 9.966528954600587e-06, "loss": 0.0838, "step": 406 }, { "epoch": 0.1851683348498635, "grad_norm": 0.6021534124846688, "learning_rate": 9.96636364722928e-06, "loss": 0.0673, "step": 407 }, { "epoch": 0.18562329390354868, "grad_norm": 0.8782816795828058, "learning_rate": 9.966197934029165e-06, "loss": 0.0845, "step": 408 }, { "epoch": 0.18607825295723385, "grad_norm": 0.9030990654346936, "learning_rate": 9.966031815013781e-06, "loss": 0.0839, "step": 409 }, { "epoch": 0.18653321201091902, "grad_norm": 0.8567507299712805, "learning_rate": 9.965865290196703e-06, "loss": 0.0935, "step": 410 }, { "epoch": 0.1869881710646042, "grad_norm": 0.8099856489670021, "learning_rate": 9.96569835959154e-06, "loss": 0.0747, "step": 411 }, { "epoch": 0.18744313011828936, "grad_norm": 0.8938878675243255, "learning_rate": 9.965531023211931e-06, "loss": 0.0854, "step": 412 }, { "epoch": 0.18789808917197454, "grad_norm": 0.735313860104022, "learning_rate": 9.965363281071551e-06, "loss": 0.0865, "step": 413 }, { "epoch": 0.18835304822565968, "grad_norm": 0.5495229598132649, "learning_rate": 9.965195133184108e-06, "loss": 0.0403, "step": 414 }, { "epoch": 0.18880800727934485, "grad_norm": 1.0700416713113117, "learning_rate": 9.965026579563342e-06, "loss": 0.1086, "step": 415 }, { "epoch": 0.18926296633303002, "grad_norm": 0.7118653717355078, "learning_rate": 9.964857620223024e-06, "loss": 0.0691, "step": 416 }, { "epoch": 0.1897179253867152, "grad_norm": 0.6871481686027417, "learning_rate": 9.964688255176963e-06, "loss": 0.0667, "step": 417 }, { "epoch": 0.19017288444040037, "grad_norm": 0.9848841869658392, "learning_rate": 9.964518484438998e-06, "loss": 0.0813, "step": 418 }, { "epoch": 0.19062784349408554, "grad_norm": 0.6311750922074311, "learning_rate": 9.964348308023001e-06, "loss": 0.0592, "step": 419 }, { "epoch": 0.1910828025477707, "grad_norm": 0.7813168734245782, "learning_rate": 9.964177725942881e-06, "loss": 0.0826, "step": 420 }, { "epoch": 0.19153776160145586, "grad_norm": 0.8572110622332836, "learning_rate": 9.964006738212574e-06, "loss": 0.0853, "step": 421 }, { "epoch": 0.19199272065514103, "grad_norm": 0.5304433423014596, "learning_rate": 9.963835344846056e-06, "loss": 0.048, "step": 422 }, { "epoch": 0.1924476797088262, "grad_norm": 0.7598521228122416, "learning_rate": 9.963663545857328e-06, "loss": 0.0757, "step": 423 }, { "epoch": 0.19290263876251137, "grad_norm": 1.1542546683489703, "learning_rate": 9.963491341260432e-06, "loss": 0.104, "step": 424 }, { "epoch": 0.19335759781619655, "grad_norm": 0.7766563582253432, "learning_rate": 9.963318731069437e-06, "loss": 0.0952, "step": 425 }, { "epoch": 0.19381255686988172, "grad_norm": 1.1319194983916299, "learning_rate": 9.96314571529845e-06, "loss": 0.1005, "step": 426 }, { "epoch": 0.1942675159235669, "grad_norm": 0.7230559135257585, "learning_rate": 9.962972293961608e-06, "loss": 0.0647, "step": 427 }, { "epoch": 0.19472247497725204, "grad_norm": 0.9863934566369588, "learning_rate": 9.962798467073083e-06, "loss": 0.0763, "step": 428 }, { "epoch": 0.1951774340309372, "grad_norm": 0.8259784410005646, "learning_rate": 9.96262423464708e-06, "loss": 0.087, "step": 429 }, { "epoch": 0.19563239308462238, "grad_norm": 0.7987139095182185, "learning_rate": 9.962449596697834e-06, "loss": 0.0671, "step": 430 }, { "epoch": 0.19608735213830755, "grad_norm": 1.130208173229934, "learning_rate": 9.962274553239619e-06, "loss": 0.119, "step": 431 }, { "epoch": 0.19654231119199272, "grad_norm": 0.7399696243677417, "learning_rate": 9.962099104286735e-06, "loss": 0.064, "step": 432 }, { "epoch": 0.1969972702456779, "grad_norm": 1.156015767405528, "learning_rate": 9.961923249853523e-06, "loss": 0.1102, "step": 433 }, { "epoch": 0.19745222929936307, "grad_norm": 0.972422739757894, "learning_rate": 9.961746989954349e-06, "loss": 0.1093, "step": 434 }, { "epoch": 0.1979071883530482, "grad_norm": 0.7766700420403171, "learning_rate": 9.96157032460362e-06, "loss": 0.0655, "step": 435 }, { "epoch": 0.19836214740673339, "grad_norm": 0.7460679115751414, "learning_rate": 9.961393253815767e-06, "loss": 0.0751, "step": 436 }, { "epoch": 0.19881710646041856, "grad_norm": 1.0684214450487566, "learning_rate": 9.961215777605266e-06, "loss": 0.0789, "step": 437 }, { "epoch": 0.19927206551410373, "grad_norm": 0.7683994291392229, "learning_rate": 9.961037895986615e-06, "loss": 0.0849, "step": 438 }, { "epoch": 0.1997270245677889, "grad_norm": 0.7270368453251704, "learning_rate": 9.960859608974352e-06, "loss": 0.0779, "step": 439 }, { "epoch": 0.20018198362147407, "grad_norm": 0.701460207303568, "learning_rate": 9.960680916583042e-06, "loss": 0.0639, "step": 440 }, { "epoch": 0.20063694267515925, "grad_norm": 0.6784619280926262, "learning_rate": 9.960501818827292e-06, "loss": 0.077, "step": 441 }, { "epoch": 0.2010919017288444, "grad_norm": 0.8064075868568972, "learning_rate": 9.960322315721735e-06, "loss": 0.0827, "step": 442 }, { "epoch": 0.20154686078252956, "grad_norm": 0.9155026735417204, "learning_rate": 9.960142407281039e-06, "loss": 0.0841, "step": 443 }, { "epoch": 0.20200181983621474, "grad_norm": 0.6167749294869733, "learning_rate": 9.959962093519904e-06, "loss": 0.054, "step": 444 }, { "epoch": 0.2024567788898999, "grad_norm": 0.8127781985331358, "learning_rate": 9.959781374453066e-06, "loss": 0.0751, "step": 445 }, { "epoch": 0.20291173794358508, "grad_norm": 0.98306444688532, "learning_rate": 9.959600250095294e-06, "loss": 0.075, "step": 446 }, { "epoch": 0.20336669699727025, "grad_norm": 0.7982130269360888, "learning_rate": 9.959418720461384e-06, "loss": 0.0834, "step": 447 }, { "epoch": 0.20382165605095542, "grad_norm": 0.7862225023823932, "learning_rate": 9.959236785566175e-06, "loss": 0.0704, "step": 448 }, { "epoch": 0.20427661510464057, "grad_norm": 0.562107514296544, "learning_rate": 9.959054445424532e-06, "loss": 0.0644, "step": 449 }, { "epoch": 0.20473157415832574, "grad_norm": 0.6089607791855781, "learning_rate": 9.958871700051353e-06, "loss": 0.0512, "step": 450 }, { "epoch": 0.2051865332120109, "grad_norm": 0.6962095067981563, "learning_rate": 9.958688549461573e-06, "loss": 0.0712, "step": 451 }, { "epoch": 0.20564149226569609, "grad_norm": 1.155217046291275, "learning_rate": 9.958504993670158e-06, "loss": 0.1049, "step": 452 }, { "epoch": 0.20609645131938126, "grad_norm": 1.0913314226134752, "learning_rate": 9.958321032692107e-06, "loss": 0.1226, "step": 453 }, { "epoch": 0.20655141037306643, "grad_norm": 22.735025633907238, "learning_rate": 9.958136666542455e-06, "loss": 0.8419, "step": 454 }, { "epoch": 0.2070063694267516, "grad_norm": 1.184019553325164, "learning_rate": 9.957951895236262e-06, "loss": 0.1113, "step": 455 }, { "epoch": 0.20746132848043677, "grad_norm": 0.7664792046331882, "learning_rate": 9.957766718788632e-06, "loss": 0.104, "step": 456 }, { "epoch": 0.20791628753412192, "grad_norm": 0.8672883026786035, "learning_rate": 9.957581137214695e-06, "loss": 0.074, "step": 457 }, { "epoch": 0.2083712465878071, "grad_norm": 0.8772220264781722, "learning_rate": 9.957395150529615e-06, "loss": 0.0986, "step": 458 }, { "epoch": 0.20882620564149226, "grad_norm": 0.7016331971826193, "learning_rate": 9.95720875874859e-06, "loss": 0.0752, "step": 459 }, { "epoch": 0.20928116469517744, "grad_norm": 0.6308822051977305, "learning_rate": 9.957021961886855e-06, "loss": 0.0608, "step": 460 }, { "epoch": 0.2097361237488626, "grad_norm": 0.9803601042372939, "learning_rate": 9.956834759959669e-06, "loss": 0.0908, "step": 461 }, { "epoch": 0.21019108280254778, "grad_norm": 0.7674462109758159, "learning_rate": 9.95664715298233e-06, "loss": 0.074, "step": 462 }, { "epoch": 0.21064604185623295, "grad_norm": 0.7450186566335193, "learning_rate": 9.95645914097017e-06, "loss": 0.0817, "step": 463 }, { "epoch": 0.2111010009099181, "grad_norm": 0.7225723661612439, "learning_rate": 9.956270723938553e-06, "loss": 0.0849, "step": 464 }, { "epoch": 0.21155595996360327, "grad_norm": 0.7190355211871646, "learning_rate": 9.956081901902875e-06, "loss": 0.0748, "step": 465 }, { "epoch": 0.21201091901728844, "grad_norm": 1.210684562087392, "learning_rate": 9.955892674878565e-06, "loss": 0.1272, "step": 466 }, { "epoch": 0.2124658780709736, "grad_norm": 0.834170476650907, "learning_rate": 9.955703042881087e-06, "loss": 0.0992, "step": 467 }, { "epoch": 0.21292083712465878, "grad_norm": 0.874478173291907, "learning_rate": 9.955513005925934e-06, "loss": 0.0858, "step": 468 }, { "epoch": 0.21337579617834396, "grad_norm": 0.5510320150423565, "learning_rate": 9.95532256402864e-06, "loss": 0.0574, "step": 469 }, { "epoch": 0.21383075523202913, "grad_norm": 0.5657171871822584, "learning_rate": 9.955131717204762e-06, "loss": 0.0671, "step": 470 }, { "epoch": 0.21428571428571427, "grad_norm": 0.7564664653864259, "learning_rate": 9.954940465469898e-06, "loss": 0.085, "step": 471 }, { "epoch": 0.21474067333939945, "grad_norm": 0.7594501005901694, "learning_rate": 9.954748808839675e-06, "loss": 0.0733, "step": 472 }, { "epoch": 0.21519563239308462, "grad_norm": 0.6748092428366178, "learning_rate": 9.954556747329754e-06, "loss": 0.0707, "step": 473 }, { "epoch": 0.2156505914467698, "grad_norm": 1.715089789819449, "learning_rate": 9.954364280955832e-06, "loss": 0.1045, "step": 474 }, { "epoch": 0.21610555050045496, "grad_norm": 0.6668751648778155, "learning_rate": 9.954171409733634e-06, "loss": 0.0573, "step": 475 }, { "epoch": 0.21656050955414013, "grad_norm": 0.5963716475430643, "learning_rate": 9.95397813367892e-06, "loss": 0.0752, "step": 476 }, { "epoch": 0.2170154686078253, "grad_norm": 0.9917190233932158, "learning_rate": 9.953784452807487e-06, "loss": 0.1049, "step": 477 }, { "epoch": 0.21747042766151045, "grad_norm": 0.5638529401686616, "learning_rate": 9.953590367135159e-06, "loss": 0.0547, "step": 478 }, { "epoch": 0.21792538671519562, "grad_norm": 0.6477110515460727, "learning_rate": 9.953395876677796e-06, "loss": 0.0564, "step": 479 }, { "epoch": 0.2183803457688808, "grad_norm": 0.5492055118574499, "learning_rate": 9.95320098145129e-06, "loss": 0.0505, "step": 480 }, { "epoch": 0.21883530482256597, "grad_norm": 0.8954528378372288, "learning_rate": 9.95300568147157e-06, "loss": 0.126, "step": 481 }, { "epoch": 0.21929026387625114, "grad_norm": 0.6155736143826033, "learning_rate": 9.952809976754593e-06, "loss": 0.0518, "step": 482 }, { "epoch": 0.2197452229299363, "grad_norm": 1.1486004986445648, "learning_rate": 9.952613867316351e-06, "loss": 0.1142, "step": 483 }, { "epoch": 0.22020018198362148, "grad_norm": 0.8236924325360948, "learning_rate": 9.95241735317287e-06, "loss": 0.1047, "step": 484 }, { "epoch": 0.22065514103730663, "grad_norm": 0.832372102653505, "learning_rate": 9.952220434340209e-06, "loss": 0.0729, "step": 485 }, { "epoch": 0.2211101000909918, "grad_norm": 0.7288716722109786, "learning_rate": 9.952023110834456e-06, "loss": 0.068, "step": 486 }, { "epoch": 0.22156505914467697, "grad_norm": 0.5327254294033283, "learning_rate": 9.951825382671739e-06, "loss": 0.0614, "step": 487 }, { "epoch": 0.22202001819836215, "grad_norm": 0.7204991379763186, "learning_rate": 9.951627249868213e-06, "loss": 0.0666, "step": 488 }, { "epoch": 0.22247497725204732, "grad_norm": 0.7485835393026234, "learning_rate": 9.95142871244007e-06, "loss": 0.068, "step": 489 }, { "epoch": 0.2229299363057325, "grad_norm": 0.45602532896445397, "learning_rate": 9.951229770403531e-06, "loss": 0.0414, "step": 490 }, { "epoch": 0.22338489535941766, "grad_norm": 0.7240661348572547, "learning_rate": 9.951030423774858e-06, "loss": 0.0798, "step": 491 }, { "epoch": 0.22383985441310283, "grad_norm": 0.7716352477687572, "learning_rate": 9.950830672570337e-06, "loss": 0.071, "step": 492 }, { "epoch": 0.22429481346678798, "grad_norm": 1.22677184750836, "learning_rate": 9.95063051680629e-06, "loss": 0.1373, "step": 493 }, { "epoch": 0.22474977252047315, "grad_norm": 0.7365431233953595, "learning_rate": 9.950429956499074e-06, "loss": 0.0699, "step": 494 }, { "epoch": 0.22520473157415832, "grad_norm": 0.705654951368504, "learning_rate": 9.950228991665078e-06, "loss": 0.0741, "step": 495 }, { "epoch": 0.2256596906278435, "grad_norm": 0.8261497906057415, "learning_rate": 9.950027622320724e-06, "loss": 0.0764, "step": 496 }, { "epoch": 0.22611464968152867, "grad_norm": 0.9965395262255518, "learning_rate": 9.949825848482465e-06, "loss": 0.0852, "step": 497 }, { "epoch": 0.22656960873521384, "grad_norm": 0.6807161957389707, "learning_rate": 9.949623670166794e-06, "loss": 0.074, "step": 498 }, { "epoch": 0.227024567788899, "grad_norm": 1.1216390709095547, "learning_rate": 9.949421087390228e-06, "loss": 0.0931, "step": 499 }, { "epoch": 0.22747952684258416, "grad_norm": 1.1278655216416786, "learning_rate": 9.949218100169322e-06, "loss": 0.1177, "step": 500 }, { "epoch": 0.22793448589626933, "grad_norm": 0.9160591457448575, "learning_rate": 9.949014708520664e-06, "loss": 0.1015, "step": 501 }, { "epoch": 0.2283894449499545, "grad_norm": 0.9377363057118697, "learning_rate": 9.948810912460872e-06, "loss": 0.1059, "step": 502 }, { "epoch": 0.22884440400363967, "grad_norm": 0.8760932101779023, "learning_rate": 9.948606712006601e-06, "loss": 0.0812, "step": 503 }, { "epoch": 0.22929936305732485, "grad_norm": 0.6962605051289937, "learning_rate": 9.948402107174537e-06, "loss": 0.0735, "step": 504 }, { "epoch": 0.22975432211101002, "grad_norm": 0.6501265713488487, "learning_rate": 9.948197097981401e-06, "loss": 0.0551, "step": 505 }, { "epoch": 0.2302092811646952, "grad_norm": 1.2156011775652311, "learning_rate": 9.947991684443942e-06, "loss": 0.1066, "step": 506 }, { "epoch": 0.23066424021838033, "grad_norm": 0.9679794435610901, "learning_rate": 9.947785866578951e-06, "loss": 0.0981, "step": 507 }, { "epoch": 0.2311191992720655, "grad_norm": 0.7195724631231237, "learning_rate": 9.94757964440324e-06, "loss": 0.0777, "step": 508 }, { "epoch": 0.23157415832575068, "grad_norm": 0.549427502610929, "learning_rate": 9.947373017933665e-06, "loss": 0.0516, "step": 509 }, { "epoch": 0.23202911737943585, "grad_norm": 0.5667212336170355, "learning_rate": 9.947165987187108e-06, "loss": 0.0583, "step": 510 }, { "epoch": 0.23248407643312102, "grad_norm": 0.6638127935874616, "learning_rate": 9.946958552180489e-06, "loss": 0.0723, "step": 511 }, { "epoch": 0.2329390354868062, "grad_norm": 0.5226768129517959, "learning_rate": 9.946750712930756e-06, "loss": 0.0482, "step": 512 }, { "epoch": 0.23339399454049137, "grad_norm": 0.8358986518129136, "learning_rate": 9.946542469454894e-06, "loss": 0.1037, "step": 513 }, { "epoch": 0.2338489535941765, "grad_norm": 0.6695809647699968, "learning_rate": 9.94633382176992e-06, "loss": 0.0728, "step": 514 }, { "epoch": 0.23430391264786168, "grad_norm": 1.0608546974350634, "learning_rate": 9.946124769892884e-06, "loss": 0.1192, "step": 515 }, { "epoch": 0.23475887170154686, "grad_norm": 0.5090717025630993, "learning_rate": 9.945915313840869e-06, "loss": 0.0612, "step": 516 }, { "epoch": 0.23521383075523203, "grad_norm": 0.8105130307542814, "learning_rate": 9.94570545363099e-06, "loss": 0.0838, "step": 517 }, { "epoch": 0.2356687898089172, "grad_norm": 0.7752986876049957, "learning_rate": 9.945495189280394e-06, "loss": 0.092, "step": 518 }, { "epoch": 0.23612374886260237, "grad_norm": 0.869801315379322, "learning_rate": 9.945284520806267e-06, "loss": 0.077, "step": 519 }, { "epoch": 0.23657870791628755, "grad_norm": 0.5427153243822386, "learning_rate": 9.94507344822582e-06, "loss": 0.0592, "step": 520 }, { "epoch": 0.2370336669699727, "grad_norm": 0.7368670007832758, "learning_rate": 9.944861971556305e-06, "loss": 0.0608, "step": 521 }, { "epoch": 0.23748862602365786, "grad_norm": 0.8141430793460733, "learning_rate": 9.944650090814998e-06, "loss": 0.0616, "step": 522 }, { "epoch": 0.23794358507734303, "grad_norm": 2.1096588720516425, "learning_rate": 9.944437806019216e-06, "loss": 0.0938, "step": 523 }, { "epoch": 0.2383985441310282, "grad_norm": 0.7014907085161215, "learning_rate": 9.944225117186306e-06, "loss": 0.0812, "step": 524 }, { "epoch": 0.23885350318471338, "grad_norm": 0.5078467158211916, "learning_rate": 9.944012024333647e-06, "loss": 0.0561, "step": 525 }, { "epoch": 0.23930846223839855, "grad_norm": 0.6379031604907951, "learning_rate": 9.943798527478652e-06, "loss": 0.0678, "step": 526 }, { "epoch": 0.23976342129208372, "grad_norm": 0.799876019099874, "learning_rate": 9.943584626638768e-06, "loss": 0.0914, "step": 527 }, { "epoch": 0.24021838034576887, "grad_norm": 0.6550229607349646, "learning_rate": 9.943370321831474e-06, "loss": 0.0668, "step": 528 }, { "epoch": 0.24067333939945404, "grad_norm": 0.767534839542607, "learning_rate": 9.943155613074279e-06, "loss": 0.0711, "step": 529 }, { "epoch": 0.2411282984531392, "grad_norm": 0.7571838990000624, "learning_rate": 9.942940500384733e-06, "loss": 0.0893, "step": 530 }, { "epoch": 0.24158325750682438, "grad_norm": 17.807000846945513, "learning_rate": 9.942724983780409e-06, "loss": 0.3419, "step": 531 }, { "epoch": 0.24203821656050956, "grad_norm": 1.2088422410181228, "learning_rate": 9.942509063278922e-06, "loss": 0.1173, "step": 532 }, { "epoch": 0.24249317561419473, "grad_norm": 0.8811842157145667, "learning_rate": 9.942292738897914e-06, "loss": 0.1006, "step": 533 }, { "epoch": 0.2429481346678799, "grad_norm": 0.7726281786442553, "learning_rate": 9.942076010655063e-06, "loss": 0.0909, "step": 534 }, { "epoch": 0.24340309372156507, "grad_norm": 0.9942256398778268, "learning_rate": 9.941858878568078e-06, "loss": 0.134, "step": 535 }, { "epoch": 0.24385805277525022, "grad_norm": 1.001596627292525, "learning_rate": 9.941641342654702e-06, "loss": 0.0977, "step": 536 }, { "epoch": 0.2443130118289354, "grad_norm": 0.5064863363900076, "learning_rate": 9.941423402932713e-06, "loss": 0.0559, "step": 537 }, { "epoch": 0.24476797088262056, "grad_norm": 0.8589680374278897, "learning_rate": 9.94120505941992e-06, "loss": 0.0992, "step": 538 }, { "epoch": 0.24522292993630573, "grad_norm": 0.7830880681851201, "learning_rate": 9.940986312134162e-06, "loss": 0.0825, "step": 539 }, { "epoch": 0.2456778889899909, "grad_norm": 0.5778344550660577, "learning_rate": 9.940767161093316e-06, "loss": 0.0637, "step": 540 }, { "epoch": 0.24613284804367608, "grad_norm": 0.8661775200374767, "learning_rate": 9.94054760631529e-06, "loss": 0.0958, "step": 541 }, { "epoch": 0.24658780709736125, "grad_norm": 0.6976226834296251, "learning_rate": 9.940327647818026e-06, "loss": 0.0752, "step": 542 }, { "epoch": 0.2470427661510464, "grad_norm": 0.7530160135685138, "learning_rate": 9.940107285619495e-06, "loss": 0.077, "step": 543 }, { "epoch": 0.24749772520473157, "grad_norm": 0.7997106896354084, "learning_rate": 9.939886519737707e-06, "loss": 0.0958, "step": 544 }, { "epoch": 0.24795268425841674, "grad_norm": 0.8918061918047896, "learning_rate": 9.939665350190702e-06, "loss": 0.0822, "step": 545 }, { "epoch": 0.2484076433121019, "grad_norm": 0.804115756264787, "learning_rate": 9.93944377699655e-06, "loss": 0.0915, "step": 546 }, { "epoch": 0.24886260236578708, "grad_norm": 0.6234057941022288, "learning_rate": 9.93922180017336e-06, "loss": 0.0672, "step": 547 }, { "epoch": 0.24931756141947226, "grad_norm": 0.8269450754551354, "learning_rate": 9.93899941973927e-06, "loss": 0.1102, "step": 548 }, { "epoch": 0.24977252047315743, "grad_norm": 0.9233841316663005, "learning_rate": 9.93877663571245e-06, "loss": 0.0963, "step": 549 }, { "epoch": 0.2502274795268426, "grad_norm": 0.9944861568923805, "learning_rate": 9.938553448111108e-06, "loss": 0.1127, "step": 550 }, { "epoch": 0.25068243858052774, "grad_norm": 0.8423641298780182, "learning_rate": 9.938329856953482e-06, "loss": 0.0788, "step": 551 }, { "epoch": 0.25113739763421294, "grad_norm": 0.8124861649110975, "learning_rate": 9.938105862257839e-06, "loss": 0.0831, "step": 552 }, { "epoch": 0.2515923566878981, "grad_norm": 0.6612222253979325, "learning_rate": 9.937881464042485e-06, "loss": 0.0703, "step": 553 }, { "epoch": 0.25204731574158323, "grad_norm": 0.854447666921162, "learning_rate": 9.937656662325759e-06, "loss": 0.1074, "step": 554 }, { "epoch": 0.25250227479526843, "grad_norm": 0.74521770368624, "learning_rate": 9.937431457126028e-06, "loss": 0.0777, "step": 555 }, { "epoch": 0.2529572338489536, "grad_norm": 0.5044600553216889, "learning_rate": 9.937205848461694e-06, "loss": 0.0482, "step": 556 }, { "epoch": 0.2534121929026388, "grad_norm": 1.0949051966397356, "learning_rate": 9.936979836351197e-06, "loss": 0.0945, "step": 557 }, { "epoch": 0.2538671519563239, "grad_norm": 1.0332199252594778, "learning_rate": 9.936753420813003e-06, "loss": 0.092, "step": 558 }, { "epoch": 0.2543221110100091, "grad_norm": 0.7029577630748303, "learning_rate": 9.936526601865612e-06, "loss": 0.0612, "step": 559 }, { "epoch": 0.25477707006369427, "grad_norm": 0.5251640812064944, "learning_rate": 9.936299379527561e-06, "loss": 0.0569, "step": 560 }, { "epoch": 0.2552320291173794, "grad_norm": 0.6689496924283664, "learning_rate": 9.936071753817416e-06, "loss": 0.0831, "step": 561 }, { "epoch": 0.2556869881710646, "grad_norm": 0.8094390650978945, "learning_rate": 9.935843724753778e-06, "loss": 0.0897, "step": 562 }, { "epoch": 0.25614194722474976, "grad_norm": 0.9168849457874456, "learning_rate": 9.935615292355283e-06, "loss": 0.1002, "step": 563 }, { "epoch": 0.25659690627843496, "grad_norm": 0.8829987760246157, "learning_rate": 9.935386456640593e-06, "loss": 0.0997, "step": 564 }, { "epoch": 0.2570518653321201, "grad_norm": 0.9381858557170412, "learning_rate": 9.93515721762841e-06, "loss": 0.0926, "step": 565 }, { "epoch": 0.2575068243858053, "grad_norm": 0.6555630906162114, "learning_rate": 9.934927575337469e-06, "loss": 0.0805, "step": 566 }, { "epoch": 0.25796178343949044, "grad_norm": 0.49897284031908906, "learning_rate": 9.93469752978653e-06, "loss": 0.0545, "step": 567 }, { "epoch": 0.2584167424931756, "grad_norm": 0.8528689809178094, "learning_rate": 9.934467080994394e-06, "loss": 0.071, "step": 568 }, { "epoch": 0.2588717015468608, "grad_norm": 0.7999188284583189, "learning_rate": 9.934236228979893e-06, "loss": 0.0675, "step": 569 }, { "epoch": 0.25932666060054593, "grad_norm": 0.6603615540899209, "learning_rate": 9.934004973761888e-06, "loss": 0.0584, "step": 570 }, { "epoch": 0.25978161965423113, "grad_norm": 0.907545218090885, "learning_rate": 9.933773315359281e-06, "loss": 0.0912, "step": 571 }, { "epoch": 0.2602365787079163, "grad_norm": 1.2225854103436529, "learning_rate": 9.933541253790998e-06, "loss": 0.0996, "step": 572 }, { "epoch": 0.2606915377616015, "grad_norm": 0.821182112953313, "learning_rate": 9.933308789076004e-06, "loss": 0.0886, "step": 573 }, { "epoch": 0.2611464968152866, "grad_norm": 0.5608593716975471, "learning_rate": 9.933075921233292e-06, "loss": 0.0597, "step": 574 }, { "epoch": 0.26160145586897177, "grad_norm": 0.977094581221023, "learning_rate": 9.932842650281897e-06, "loss": 0.0796, "step": 575 }, { "epoch": 0.26205641492265697, "grad_norm": 1.0086738407073246, "learning_rate": 9.932608976240875e-06, "loss": 0.1245, "step": 576 }, { "epoch": 0.2625113739763421, "grad_norm": 0.7841605184531412, "learning_rate": 9.932374899129323e-06, "loss": 0.0798, "step": 577 }, { "epoch": 0.2629663330300273, "grad_norm": 0.6360279282536222, "learning_rate": 9.932140418966369e-06, "loss": 0.0714, "step": 578 }, { "epoch": 0.26342129208371245, "grad_norm": 0.8673569892639119, "learning_rate": 9.931905535771174e-06, "loss": 0.0805, "step": 579 }, { "epoch": 0.26387625113739765, "grad_norm": 1.0489822111787226, "learning_rate": 9.93167024956293e-06, "loss": 0.1046, "step": 580 }, { "epoch": 0.2643312101910828, "grad_norm": 0.5670611684906575, "learning_rate": 9.931434560360864e-06, "loss": 0.0662, "step": 581 }, { "epoch": 0.26478616924476794, "grad_norm": 0.6786486717931198, "learning_rate": 9.931198468184236e-06, "loss": 0.0705, "step": 582 }, { "epoch": 0.26524112829845314, "grad_norm": 0.7580601459978998, "learning_rate": 9.93096197305234e-06, "loss": 0.0852, "step": 583 }, { "epoch": 0.2656960873521383, "grad_norm": 0.8802141056853473, "learning_rate": 9.930725074984498e-06, "loss": 0.0989, "step": 584 }, { "epoch": 0.2661510464058235, "grad_norm": 0.6365186853726369, "learning_rate": 9.930487774000071e-06, "loss": 0.0639, "step": 585 }, { "epoch": 0.26660600545950863, "grad_norm": 0.5301331320559389, "learning_rate": 9.930250070118448e-06, "loss": 0.0628, "step": 586 }, { "epoch": 0.26706096451319383, "grad_norm": 0.6982626314754508, "learning_rate": 9.930011963359055e-06, "loss": 0.071, "step": 587 }, { "epoch": 0.267515923566879, "grad_norm": 1.0151988128038116, "learning_rate": 9.929773453741346e-06, "loss": 0.1074, "step": 588 }, { "epoch": 0.2679708826205642, "grad_norm": 0.809050548171497, "learning_rate": 9.929534541284814e-06, "loss": 0.0715, "step": 589 }, { "epoch": 0.2684258416742493, "grad_norm": 0.8254901916718546, "learning_rate": 9.929295226008981e-06, "loss": 0.0867, "step": 590 }, { "epoch": 0.26888080072793447, "grad_norm": 0.695875393623419, "learning_rate": 9.929055507933403e-06, "loss": 0.0667, "step": 591 }, { "epoch": 0.26933575978161967, "grad_norm": 0.6569370607259161, "learning_rate": 9.928815387077668e-06, "loss": 0.0667, "step": 592 }, { "epoch": 0.2697907188353048, "grad_norm": 0.8509989554819866, "learning_rate": 9.9285748634614e-06, "loss": 0.0964, "step": 593 }, { "epoch": 0.27024567788899, "grad_norm": 0.7743154017799978, "learning_rate": 9.928333937104249e-06, "loss": 0.1008, "step": 594 }, { "epoch": 0.27070063694267515, "grad_norm": 0.6810806452813069, "learning_rate": 9.928092608025905e-06, "loss": 0.0623, "step": 595 }, { "epoch": 0.27115559599636035, "grad_norm": 0.6757764847225584, "learning_rate": 9.927850876246087e-06, "loss": 0.0621, "step": 596 }, { "epoch": 0.2716105550500455, "grad_norm": 0.7561897396028232, "learning_rate": 9.927608741784551e-06, "loss": 0.0769, "step": 597 }, { "epoch": 0.27206551410373064, "grad_norm": 0.9087608421567758, "learning_rate": 9.927366204661081e-06, "loss": 0.1064, "step": 598 }, { "epoch": 0.27252047315741584, "grad_norm": 0.6090969825991095, "learning_rate": 9.927123264895497e-06, "loss": 0.0596, "step": 599 }, { "epoch": 0.272975432211101, "grad_norm": 0.5838273869575724, "learning_rate": 9.926879922507651e-06, "loss": 0.0581, "step": 600 }, { "epoch": 0.2734303912647862, "grad_norm": 41.16319851924577, "learning_rate": 9.926636177517427e-06, "loss": 0.7305, "step": 601 }, { "epoch": 0.27388535031847133, "grad_norm": 0.7159907538362364, "learning_rate": 9.926392029944743e-06, "loss": 0.0655, "step": 602 }, { "epoch": 0.27434030937215653, "grad_norm": 0.6649118967721417, "learning_rate": 9.92614747980955e-06, "loss": 0.0676, "step": 603 }, { "epoch": 0.2747952684258417, "grad_norm": 0.6955588874689645, "learning_rate": 9.92590252713183e-06, "loss": 0.0691, "step": 604 }, { "epoch": 0.2752502274795268, "grad_norm": 1.0093833512385355, "learning_rate": 9.925657171931603e-06, "loss": 0.0788, "step": 605 }, { "epoch": 0.275705186533212, "grad_norm": 0.7222760734094591, "learning_rate": 9.925411414228913e-06, "loss": 0.0765, "step": 606 }, { "epoch": 0.27616014558689717, "grad_norm": 0.7901083190949632, "learning_rate": 9.925165254043846e-06, "loss": 0.0899, "step": 607 }, { "epoch": 0.27661510464058237, "grad_norm": 0.9417411536264935, "learning_rate": 9.924918691396516e-06, "loss": 0.105, "step": 608 }, { "epoch": 0.2770700636942675, "grad_norm": 0.8531576003982281, "learning_rate": 9.924671726307073e-06, "loss": 0.0943, "step": 609 }, { "epoch": 0.2775250227479527, "grad_norm": 0.5771833327707789, "learning_rate": 9.924424358795694e-06, "loss": 0.0649, "step": 610 }, { "epoch": 0.27797998180163785, "grad_norm": 0.6804808150530418, "learning_rate": 9.924176588882597e-06, "loss": 0.0591, "step": 611 }, { "epoch": 0.278434940855323, "grad_norm": 0.6916110773643345, "learning_rate": 9.923928416588027e-06, "loss": 0.082, "step": 612 }, { "epoch": 0.2788898999090082, "grad_norm": 0.7302341341594485, "learning_rate": 9.923679841932261e-06, "loss": 0.0858, "step": 613 }, { "epoch": 0.27934485896269334, "grad_norm": 0.7190514572276734, "learning_rate": 9.923430864935615e-06, "loss": 0.0658, "step": 614 }, { "epoch": 0.27979981801637854, "grad_norm": 0.6872892360375661, "learning_rate": 9.923181485618432e-06, "loss": 0.0639, "step": 615 }, { "epoch": 0.2802547770700637, "grad_norm": 0.6937876338258171, "learning_rate": 9.92293170400109e-06, "loss": 0.0759, "step": 616 }, { "epoch": 0.2807097361237489, "grad_norm": 0.8498928251372749, "learning_rate": 9.922681520104002e-06, "loss": 0.0777, "step": 617 }, { "epoch": 0.28116469517743403, "grad_norm": 0.7409609990217324, "learning_rate": 9.922430933947612e-06, "loss": 0.0665, "step": 618 }, { "epoch": 0.2816196542311192, "grad_norm": 1.2216942184143182, "learning_rate": 9.922179945552393e-06, "loss": 0.1405, "step": 619 }, { "epoch": 0.2820746132848044, "grad_norm": 0.6637234254274302, "learning_rate": 9.921928554938857e-06, "loss": 0.062, "step": 620 }, { "epoch": 0.2825295723384895, "grad_norm": 0.9463087936758936, "learning_rate": 9.921676762127548e-06, "loss": 0.0767, "step": 621 }, { "epoch": 0.2829845313921747, "grad_norm": 1.089309305809361, "learning_rate": 9.921424567139042e-06, "loss": 0.1171, "step": 622 }, { "epoch": 0.28343949044585987, "grad_norm": 0.8752119302288704, "learning_rate": 9.921171969993942e-06, "loss": 0.0813, "step": 623 }, { "epoch": 0.28389444949954507, "grad_norm": 0.7870883299373892, "learning_rate": 9.920918970712894e-06, "loss": 0.0993, "step": 624 }, { "epoch": 0.2843494085532302, "grad_norm": 0.6504873266789636, "learning_rate": 9.92066556931657e-06, "loss": 0.073, "step": 625 }, { "epoch": 0.28480436760691535, "grad_norm": 1.1098031698420505, "learning_rate": 9.920411765825679e-06, "loss": 0.1218, "step": 626 }, { "epoch": 0.28525932666060055, "grad_norm": 1.217844501512982, "learning_rate": 9.920157560260957e-06, "loss": 0.1549, "step": 627 }, { "epoch": 0.2857142857142857, "grad_norm": 0.9728161223416268, "learning_rate": 9.919902952643179e-06, "loss": 0.0984, "step": 628 }, { "epoch": 0.2861692447679709, "grad_norm": 0.5217007184455262, "learning_rate": 9.91964794299315e-06, "loss": 0.0636, "step": 629 }, { "epoch": 0.28662420382165604, "grad_norm": 1.7394407973312302, "learning_rate": 9.919392531331706e-06, "loss": 0.1686, "step": 630 }, { "epoch": 0.28707916287534124, "grad_norm": 0.5702940927618096, "learning_rate": 9.919136717679723e-06, "loss": 0.0465, "step": 631 }, { "epoch": 0.2875341219290264, "grad_norm": 0.5990973378462472, "learning_rate": 9.9188805020581e-06, "loss": 0.0678, "step": 632 }, { "epoch": 0.28798908098271153, "grad_norm": 0.9343816967111115, "learning_rate": 9.918623884487777e-06, "loss": 0.1068, "step": 633 }, { "epoch": 0.28844404003639673, "grad_norm": 0.5997939637509836, "learning_rate": 9.91836686498972e-06, "loss": 0.0629, "step": 634 }, { "epoch": 0.2888989990900819, "grad_norm": 0.8063617612610782, "learning_rate": 9.918109443584938e-06, "loss": 0.0904, "step": 635 }, { "epoch": 0.2893539581437671, "grad_norm": 0.6625405697250593, "learning_rate": 9.917851620294461e-06, "loss": 0.0638, "step": 636 }, { "epoch": 0.2898089171974522, "grad_norm": 0.7423789779714624, "learning_rate": 9.917593395139358e-06, "loss": 0.0714, "step": 637 }, { "epoch": 0.2902638762511374, "grad_norm": 0.6102576569607258, "learning_rate": 9.91733476814073e-06, "loss": 0.0563, "step": 638 }, { "epoch": 0.29071883530482256, "grad_norm": 0.8342620452233175, "learning_rate": 9.91707573931971e-06, "loss": 0.0934, "step": 639 }, { "epoch": 0.2911737943585077, "grad_norm": 0.6397583044633867, "learning_rate": 9.916816308697468e-06, "loss": 0.0608, "step": 640 }, { "epoch": 0.2916287534121929, "grad_norm": 0.7837909798874247, "learning_rate": 9.9165564762952e-06, "loss": 0.0936, "step": 641 }, { "epoch": 0.29208371246587805, "grad_norm": 0.9915309549496408, "learning_rate": 9.916296242134142e-06, "loss": 0.1364, "step": 642 }, { "epoch": 0.29253867151956325, "grad_norm": 0.7722166587924495, "learning_rate": 9.916035606235555e-06, "loss": 0.1022, "step": 643 }, { "epoch": 0.2929936305732484, "grad_norm": 0.6446192951972597, "learning_rate": 9.915774568620739e-06, "loss": 0.0794, "step": 644 }, { "epoch": 0.2934485896269336, "grad_norm": 0.7655996282008942, "learning_rate": 9.915513129311025e-06, "loss": 0.083, "step": 645 }, { "epoch": 0.29390354868061874, "grad_norm": 0.7358761993420325, "learning_rate": 9.915251288327776e-06, "loss": 0.0927, "step": 646 }, { "epoch": 0.2943585077343039, "grad_norm": 0.8417441236168001, "learning_rate": 9.914989045692388e-06, "loss": 0.0791, "step": 647 }, { "epoch": 0.2948134667879891, "grad_norm": 0.8847229450668922, "learning_rate": 9.914726401426293e-06, "loss": 0.1114, "step": 648 }, { "epoch": 0.29526842584167423, "grad_norm": 0.6805089048669102, "learning_rate": 9.91446335555095e-06, "loss": 0.0645, "step": 649 }, { "epoch": 0.29572338489535943, "grad_norm": 0.9967907781154212, "learning_rate": 9.914199908087856e-06, "loss": 0.1125, "step": 650 }, { "epoch": 0.2961783439490446, "grad_norm": 0.7069764233646496, "learning_rate": 9.913936059058537e-06, "loss": 0.0961, "step": 651 }, { "epoch": 0.2966333030027298, "grad_norm": 0.8237259808163154, "learning_rate": 9.913671808484554e-06, "loss": 0.0863, "step": 652 }, { "epoch": 0.2970882620564149, "grad_norm": 0.5595221349609915, "learning_rate": 9.913407156387503e-06, "loss": 0.0477, "step": 653 }, { "epoch": 0.29754322111010006, "grad_norm": 0.8322598543263076, "learning_rate": 9.913142102789005e-06, "loss": 0.0785, "step": 654 }, { "epoch": 0.29799818016378526, "grad_norm": 0.9426946452527044, "learning_rate": 9.912876647710723e-06, "loss": 0.0993, "step": 655 }, { "epoch": 0.2984531392174704, "grad_norm": 0.8902481236790349, "learning_rate": 9.912610791174348e-06, "loss": 0.0981, "step": 656 }, { "epoch": 0.2989080982711556, "grad_norm": 0.6714333609160019, "learning_rate": 9.912344533201604e-06, "loss": 0.0716, "step": 657 }, { "epoch": 0.29936305732484075, "grad_norm": 0.6721636461789662, "learning_rate": 9.91207787381425e-06, "loss": 0.0675, "step": 658 }, { "epoch": 0.29981801637852595, "grad_norm": 0.628744075340254, "learning_rate": 9.911810813034073e-06, "loss": 0.0583, "step": 659 }, { "epoch": 0.3002729754322111, "grad_norm": 0.9172548581720068, "learning_rate": 9.9115433508829e-06, "loss": 0.0972, "step": 660 }, { "epoch": 0.30072793448589624, "grad_norm": 0.914462327674233, "learning_rate": 9.911275487382583e-06, "loss": 0.089, "step": 661 }, { "epoch": 0.30118289353958144, "grad_norm": 0.7410939383575923, "learning_rate": 9.911007222555011e-06, "loss": 0.0744, "step": 662 }, { "epoch": 0.3016378525932666, "grad_norm": 0.6952942958219819, "learning_rate": 9.91073855642211e-06, "loss": 0.0627, "step": 663 }, { "epoch": 0.3020928116469518, "grad_norm": 0.8802064643150562, "learning_rate": 9.910469489005828e-06, "loss": 0.0836, "step": 664 }, { "epoch": 0.30254777070063693, "grad_norm": 0.9015922573736656, "learning_rate": 9.910200020328158e-06, "loss": 0.0934, "step": 665 }, { "epoch": 0.30300272975432213, "grad_norm": 0.6635682732023674, "learning_rate": 9.909930150411113e-06, "loss": 0.0623, "step": 666 }, { "epoch": 0.3034576888080073, "grad_norm": 1.928152977107998, "learning_rate": 9.909659879276751e-06, "loss": 0.1457, "step": 667 }, { "epoch": 0.3039126478616925, "grad_norm": 0.7754006092902415, "learning_rate": 9.909389206947156e-06, "loss": 0.0621, "step": 668 }, { "epoch": 0.3043676069153776, "grad_norm": 1.0461982822616211, "learning_rate": 9.909118133444444e-06, "loss": 0.1087, "step": 669 }, { "epoch": 0.30482256596906276, "grad_norm": 0.7981897376851527, "learning_rate": 9.90884665879077e-06, "loss": 0.0921, "step": 670 }, { "epoch": 0.30527752502274796, "grad_norm": 0.8941901965354629, "learning_rate": 9.908574783008313e-06, "loss": 0.1055, "step": 671 }, { "epoch": 0.3057324840764331, "grad_norm": 1.0219508428898654, "learning_rate": 9.908302506119291e-06, "loss": 0.1152, "step": 672 }, { "epoch": 0.3061874431301183, "grad_norm": 0.7623168423299865, "learning_rate": 9.908029828145956e-06, "loss": 0.0837, "step": 673 }, { "epoch": 0.30664240218380345, "grad_norm": 0.7026665400337327, "learning_rate": 9.907756749110587e-06, "loss": 0.0785, "step": 674 }, { "epoch": 0.30709736123748865, "grad_norm": 1.0861630797383492, "learning_rate": 9.9074832690355e-06, "loss": 0.1121, "step": 675 }, { "epoch": 0.3075523202911738, "grad_norm": 0.8171913655631801, "learning_rate": 9.907209387943042e-06, "loss": 0.0759, "step": 676 }, { "epoch": 0.30800727934485894, "grad_norm": 0.695009650682766, "learning_rate": 9.906935105855595e-06, "loss": 0.0508, "step": 677 }, { "epoch": 0.30846223839854414, "grad_norm": 1.1629680848047237, "learning_rate": 9.906660422795569e-06, "loss": 0.1123, "step": 678 }, { "epoch": 0.3089171974522293, "grad_norm": 1.1028006392582481, "learning_rate": 9.906385338785411e-06, "loss": 0.1048, "step": 679 }, { "epoch": 0.3093721565059145, "grad_norm": 0.8590661780887954, "learning_rate": 9.906109853847601e-06, "loss": 0.0947, "step": 680 }, { "epoch": 0.30982711555959963, "grad_norm": 0.9160314729851723, "learning_rate": 9.90583396800465e-06, "loss": 0.0928, "step": 681 }, { "epoch": 0.31028207461328483, "grad_norm": 0.8935511298088069, "learning_rate": 9.9055576812791e-06, "loss": 0.0996, "step": 682 }, { "epoch": 0.31073703366697, "grad_norm": 0.7005723015579258, "learning_rate": 9.905280993693533e-06, "loss": 0.0863, "step": 683 }, { "epoch": 0.3111919927206551, "grad_norm": 0.6441434987399284, "learning_rate": 9.905003905270553e-06, "loss": 0.0682, "step": 684 }, { "epoch": 0.3116469517743403, "grad_norm": 0.9609160991558658, "learning_rate": 9.904726416032803e-06, "loss": 0.1095, "step": 685 }, { "epoch": 0.31210191082802546, "grad_norm": 0.723787688745946, "learning_rate": 9.904448526002963e-06, "loss": 0.0637, "step": 686 }, { "epoch": 0.31255686988171066, "grad_norm": 0.5250433090776031, "learning_rate": 9.904170235203737e-06, "loss": 0.0587, "step": 687 }, { "epoch": 0.3130118289353958, "grad_norm": 0.8819438583914972, "learning_rate": 9.903891543657866e-06, "loss": 0.1112, "step": 688 }, { "epoch": 0.313466787989081, "grad_norm": 0.5413774773467063, "learning_rate": 9.903612451388122e-06, "loss": 0.0722, "step": 689 }, { "epoch": 0.31392174704276615, "grad_norm": 0.8913097595158456, "learning_rate": 9.903332958417315e-06, "loss": 0.0893, "step": 690 }, { "epoch": 0.3143767060964513, "grad_norm": 0.6466979890354269, "learning_rate": 9.903053064768283e-06, "loss": 0.0709, "step": 691 }, { "epoch": 0.3148316651501365, "grad_norm": 0.8428101951038133, "learning_rate": 9.902772770463892e-06, "loss": 0.0814, "step": 692 }, { "epoch": 0.31528662420382164, "grad_norm": 0.5832299371816577, "learning_rate": 9.902492075527057e-06, "loss": 0.0597, "step": 693 }, { "epoch": 0.31574158325750684, "grad_norm": 0.7856263020740725, "learning_rate": 9.902210979980705e-06, "loss": 0.074, "step": 694 }, { "epoch": 0.316196542311192, "grad_norm": 0.8507681095680276, "learning_rate": 9.90192948384781e-06, "loss": 0.0941, "step": 695 }, { "epoch": 0.3166515013648772, "grad_norm": 0.7777857824270489, "learning_rate": 9.901647587151376e-06, "loss": 0.0708, "step": 696 }, { "epoch": 0.31710646041856233, "grad_norm": 1.068022521735614, "learning_rate": 9.901365289914437e-06, "loss": 0.108, "step": 697 }, { "epoch": 0.3175614194722475, "grad_norm": 1.1320770025873614, "learning_rate": 9.901082592160059e-06, "loss": 0.108, "step": 698 }, { "epoch": 0.3180163785259327, "grad_norm": 0.803518334023751, "learning_rate": 9.900799493911346e-06, "loss": 0.0871, "step": 699 }, { "epoch": 0.3184713375796178, "grad_norm": 0.8188444942805464, "learning_rate": 9.900515995191431e-06, "loss": 0.0808, "step": 700 }, { "epoch": 0.318926296633303, "grad_norm": 0.8993527964087475, "learning_rate": 9.900232096023478e-06, "loss": 0.0821, "step": 701 }, { "epoch": 0.31938125568698816, "grad_norm": 0.5600271316880729, "learning_rate": 9.899947796430687e-06, "loss": 0.0478, "step": 702 }, { "epoch": 0.31983621474067336, "grad_norm": 0.8369718087747545, "learning_rate": 9.899663096436292e-06, "loss": 0.0871, "step": 703 }, { "epoch": 0.3202911737943585, "grad_norm": 0.8993771893247359, "learning_rate": 9.899377996063554e-06, "loss": 0.0858, "step": 704 }, { "epoch": 0.32074613284804365, "grad_norm": 0.6615773523414142, "learning_rate": 9.899092495335772e-06, "loss": 0.0601, "step": 705 }, { "epoch": 0.32120109190172885, "grad_norm": 0.8278593900178107, "learning_rate": 9.898806594276273e-06, "loss": 0.0769, "step": 706 }, { "epoch": 0.321656050955414, "grad_norm": 0.7866286577186284, "learning_rate": 9.898520292908425e-06, "loss": 0.0894, "step": 707 }, { "epoch": 0.3221110100090992, "grad_norm": 0.8050313615570786, "learning_rate": 9.89823359125562e-06, "loss": 0.0732, "step": 708 }, { "epoch": 0.32256596906278434, "grad_norm": 1.0243914254387991, "learning_rate": 9.897946489341286e-06, "loss": 0.0901, "step": 709 }, { "epoch": 0.32302092811646954, "grad_norm": 0.7036337195424629, "learning_rate": 9.897658987188882e-06, "loss": 0.0686, "step": 710 }, { "epoch": 0.3234758871701547, "grad_norm": 0.5593772745397846, "learning_rate": 9.897371084821905e-06, "loss": 0.045, "step": 711 }, { "epoch": 0.32393084622383983, "grad_norm": 0.608867956874154, "learning_rate": 9.897082782263878e-06, "loss": 0.0692, "step": 712 }, { "epoch": 0.32438580527752503, "grad_norm": 0.6488333561840038, "learning_rate": 9.896794079538362e-06, "loss": 0.0513, "step": 713 }, { "epoch": 0.3248407643312102, "grad_norm": 0.5593745607285364, "learning_rate": 9.896504976668948e-06, "loss": 0.0437, "step": 714 }, { "epoch": 0.3252957233848954, "grad_norm": 0.5072427035814352, "learning_rate": 9.896215473679259e-06, "loss": 0.0566, "step": 715 }, { "epoch": 0.3257506824385805, "grad_norm": 0.7088539736923404, "learning_rate": 9.895925570592952e-06, "loss": 0.0878, "step": 716 }, { "epoch": 0.3262056414922657, "grad_norm": 0.9653520712469312, "learning_rate": 9.895635267433719e-06, "loss": 0.101, "step": 717 }, { "epoch": 0.32666060054595086, "grad_norm": 1.2323140645024868, "learning_rate": 9.895344564225277e-06, "loss": 0.1359, "step": 718 }, { "epoch": 0.327115559599636, "grad_norm": 0.6826807669546061, "learning_rate": 9.895053460991389e-06, "loss": 0.0799, "step": 719 }, { "epoch": 0.3275705186533212, "grad_norm": 0.9496304010026827, "learning_rate": 9.894761957755834e-06, "loss": 0.0928, "step": 720 }, { "epoch": 0.32802547770700635, "grad_norm": 0.8578622125964999, "learning_rate": 9.894470054542438e-06, "loss": 0.1149, "step": 721 }, { "epoch": 0.32848043676069155, "grad_norm": 0.5483719717114235, "learning_rate": 9.894177751375053e-06, "loss": 0.0621, "step": 722 }, { "epoch": 0.3289353958143767, "grad_norm": 0.6341198897869947, "learning_rate": 9.893885048277564e-06, "loss": 0.0568, "step": 723 }, { "epoch": 0.3293903548680619, "grad_norm": 0.7169738278552924, "learning_rate": 9.893591945273888e-06, "loss": 0.0752, "step": 724 }, { "epoch": 0.32984531392174704, "grad_norm": 0.9839905963719277, "learning_rate": 9.89329844238798e-06, "loss": 0.1167, "step": 725 }, { "epoch": 0.3303002729754322, "grad_norm": 0.6825969142747964, "learning_rate": 9.89300453964382e-06, "loss": 0.0693, "step": 726 }, { "epoch": 0.3307552320291174, "grad_norm": 1.0420794853330364, "learning_rate": 9.892710237065423e-06, "loss": 0.1561, "step": 727 }, { "epoch": 0.33121019108280253, "grad_norm": 1.0109988913697336, "learning_rate": 9.892415534676844e-06, "loss": 0.0813, "step": 728 }, { "epoch": 0.33166515013648773, "grad_norm": 0.6237179977245606, "learning_rate": 9.892120432502161e-06, "loss": 0.063, "step": 729 }, { "epoch": 0.3321201091901729, "grad_norm": 0.7047649578988654, "learning_rate": 9.891824930565488e-06, "loss": 0.0757, "step": 730 }, { "epoch": 0.3325750682438581, "grad_norm": 0.8381336709785119, "learning_rate": 9.891529028890974e-06, "loss": 0.1137, "step": 731 }, { "epoch": 0.3330300272975432, "grad_norm": 1.108812928457643, "learning_rate": 9.891232727502797e-06, "loss": 0.0971, "step": 732 }, { "epoch": 0.33348498635122836, "grad_norm": 0.8911550238765422, "learning_rate": 9.89093602642517e-06, "loss": 0.0869, "step": 733 }, { "epoch": 0.33393994540491356, "grad_norm": 0.7527062298816352, "learning_rate": 9.890638925682339e-06, "loss": 0.085, "step": 734 }, { "epoch": 0.3343949044585987, "grad_norm": 0.8028637093759472, "learning_rate": 9.89034142529858e-06, "loss": 0.0866, "step": 735 }, { "epoch": 0.3348498635122839, "grad_norm": 0.6620365400447171, "learning_rate": 9.890043525298203e-06, "loss": 0.053, "step": 736 }, { "epoch": 0.33530482256596905, "grad_norm": 0.6606838089782118, "learning_rate": 9.889745225705555e-06, "loss": 0.0783, "step": 737 }, { "epoch": 0.33575978161965425, "grad_norm": 0.6719238881234298, "learning_rate": 9.889446526545007e-06, "loss": 0.079, "step": 738 }, { "epoch": 0.3362147406733394, "grad_norm": 0.7379881342173255, "learning_rate": 9.88914742784097e-06, "loss": 0.0848, "step": 739 }, { "epoch": 0.33666969972702454, "grad_norm": 1.9725398231448836, "learning_rate": 9.888847929617887e-06, "loss": 0.1666, "step": 740 }, { "epoch": 0.33712465878070974, "grad_norm": 0.7800667095330575, "learning_rate": 9.888548031900226e-06, "loss": 0.0779, "step": 741 }, { "epoch": 0.3375796178343949, "grad_norm": 0.9725198572426639, "learning_rate": 9.888247734712497e-06, "loss": 0.0719, "step": 742 }, { "epoch": 0.3380345768880801, "grad_norm": 0.9547104503470986, "learning_rate": 9.887947038079238e-06, "loss": 0.1119, "step": 743 }, { "epoch": 0.33848953594176523, "grad_norm": 0.5879353672489683, "learning_rate": 9.887645942025022e-06, "loss": 0.0553, "step": 744 }, { "epoch": 0.33894449499545043, "grad_norm": 0.5485885922626542, "learning_rate": 9.887344446574452e-06, "loss": 0.0494, "step": 745 }, { "epoch": 0.3393994540491356, "grad_norm": 0.9640668269863656, "learning_rate": 9.887042551752163e-06, "loss": 0.1104, "step": 746 }, { "epoch": 0.3398544131028208, "grad_norm": 0.8639463935480832, "learning_rate": 9.886740257582827e-06, "loss": 0.0655, "step": 747 }, { "epoch": 0.3403093721565059, "grad_norm": 0.6489702107287116, "learning_rate": 9.886437564091148e-06, "loss": 0.0777, "step": 748 }, { "epoch": 0.34076433121019106, "grad_norm": 0.8236523684362178, "learning_rate": 9.886134471301854e-06, "loss": 0.0916, "step": 749 }, { "epoch": 0.34121929026387626, "grad_norm": 0.8459143900125461, "learning_rate": 9.885830979239718e-06, "loss": 0.1017, "step": 750 }, { "epoch": 0.3416742493175614, "grad_norm": 0.7496065352262437, "learning_rate": 9.885527087929541e-06, "loss": 0.0861, "step": 751 }, { "epoch": 0.3421292083712466, "grad_norm": 0.849292513666517, "learning_rate": 9.88522279739615e-06, "loss": 0.0839, "step": 752 }, { "epoch": 0.34258416742493175, "grad_norm": 0.7756671663835698, "learning_rate": 9.884918107664417e-06, "loss": 0.0809, "step": 753 }, { "epoch": 0.34303912647861695, "grad_norm": 0.7338987681003677, "learning_rate": 9.884613018759234e-06, "loss": 0.0721, "step": 754 }, { "epoch": 0.3434940855323021, "grad_norm": 0.6003946948163056, "learning_rate": 9.884307530705534e-06, "loss": 0.0782, "step": 755 }, { "epoch": 0.34394904458598724, "grad_norm": 0.5309561440373582, "learning_rate": 9.88400164352828e-06, "loss": 0.0563, "step": 756 }, { "epoch": 0.34440400363967244, "grad_norm": 0.6551261739802692, "learning_rate": 9.883695357252467e-06, "loss": 0.061, "step": 757 }, { "epoch": 0.3448589626933576, "grad_norm": 0.6598139820416582, "learning_rate": 9.883388671903125e-06, "loss": 0.084, "step": 758 }, { "epoch": 0.3453139217470428, "grad_norm": 0.8678451615084499, "learning_rate": 9.883081587505315e-06, "loss": 0.0893, "step": 759 }, { "epoch": 0.34576888080072793, "grad_norm": 0.8849976199871086, "learning_rate": 9.882774104084127e-06, "loss": 0.0938, "step": 760 }, { "epoch": 0.34622383985441313, "grad_norm": 0.6157555054475868, "learning_rate": 9.882466221664691e-06, "loss": 0.0535, "step": 761 }, { "epoch": 0.3466787989080983, "grad_norm": 0.9555128068667961, "learning_rate": 9.882157940272165e-06, "loss": 0.0984, "step": 762 }, { "epoch": 0.3471337579617834, "grad_norm": 0.8431106213501941, "learning_rate": 9.881849259931738e-06, "loss": 0.1062, "step": 763 }, { "epoch": 0.3475887170154686, "grad_norm": 0.6608166650909644, "learning_rate": 9.881540180668637e-06, "loss": 0.0589, "step": 764 }, { "epoch": 0.34804367606915376, "grad_norm": 0.7177237690901401, "learning_rate": 9.881230702508118e-06, "loss": 0.0721, "step": 765 }, { "epoch": 0.34849863512283896, "grad_norm": 0.49396541889218665, "learning_rate": 9.880920825475468e-06, "loss": 0.0582, "step": 766 }, { "epoch": 0.3489535941765241, "grad_norm": 0.7008727540015932, "learning_rate": 9.88061054959601e-06, "loss": 0.0689, "step": 767 }, { "epoch": 0.3494085532302093, "grad_norm": 0.6417543130209264, "learning_rate": 9.880299874895098e-06, "loss": 0.0859, "step": 768 }, { "epoch": 0.34986351228389445, "grad_norm": 0.5325758158155319, "learning_rate": 9.879988801398121e-06, "loss": 0.0508, "step": 769 }, { "epoch": 0.3503184713375796, "grad_norm": 0.653129374155715, "learning_rate": 9.879677329130496e-06, "loss": 0.0822, "step": 770 }, { "epoch": 0.3507734303912648, "grad_norm": 0.6044703796770591, "learning_rate": 9.879365458117678e-06, "loss": 0.0662, "step": 771 }, { "epoch": 0.35122838944494994, "grad_norm": 0.6417796330386928, "learning_rate": 9.879053188385148e-06, "loss": 0.0649, "step": 772 }, { "epoch": 0.35168334849863514, "grad_norm": 0.6127493684308597, "learning_rate": 9.878740519958425e-06, "loss": 0.0601, "step": 773 }, { "epoch": 0.3521383075523203, "grad_norm": 0.9092296350808027, "learning_rate": 9.878427452863059e-06, "loss": 0.1138, "step": 774 }, { "epoch": 0.3525932666060055, "grad_norm": 0.8850379239223551, "learning_rate": 9.878113987124633e-06, "loss": 0.1135, "step": 775 }, { "epoch": 0.35304822565969063, "grad_norm": 0.8106864823035035, "learning_rate": 9.877800122768761e-06, "loss": 0.084, "step": 776 }, { "epoch": 0.3535031847133758, "grad_norm": 0.6717791100158048, "learning_rate": 9.877485859821092e-06, "loss": 0.0764, "step": 777 }, { "epoch": 0.353958143767061, "grad_norm": 0.4266356830653338, "learning_rate": 9.877171198307304e-06, "loss": 0.0496, "step": 778 }, { "epoch": 0.3544131028207461, "grad_norm": 0.7839112755574695, "learning_rate": 9.87685613825311e-06, "loss": 0.0864, "step": 779 }, { "epoch": 0.3548680618744313, "grad_norm": 0.8928629316475961, "learning_rate": 9.876540679684257e-06, "loss": 0.0802, "step": 780 }, { "epoch": 0.35532302092811646, "grad_norm": 0.7427060191976654, "learning_rate": 9.876224822626522e-06, "loss": 0.0809, "step": 781 }, { "epoch": 0.35577797998180166, "grad_norm": 0.6618589317208607, "learning_rate": 9.875908567105716e-06, "loss": 0.0633, "step": 782 }, { "epoch": 0.3562329390354868, "grad_norm": 0.9168643329932029, "learning_rate": 9.87559191314768e-06, "loss": 0.0977, "step": 783 }, { "epoch": 0.35668789808917195, "grad_norm": 1.010661772545197, "learning_rate": 9.87527486077829e-06, "loss": 0.112, "step": 784 }, { "epoch": 0.35714285714285715, "grad_norm": 0.7355960177801563, "learning_rate": 9.874957410023458e-06, "loss": 0.0578, "step": 785 }, { "epoch": 0.3575978161965423, "grad_norm": 0.7012046376593928, "learning_rate": 9.874639560909118e-06, "loss": 0.0856, "step": 786 }, { "epoch": 0.3580527752502275, "grad_norm": 0.629856671324697, "learning_rate": 9.87432131346125e-06, "loss": 0.079, "step": 787 }, { "epoch": 0.35850773430391264, "grad_norm": 0.6605442679933491, "learning_rate": 9.874002667705855e-06, "loss": 0.0713, "step": 788 }, { "epoch": 0.35896269335759784, "grad_norm": 0.6036439966816435, "learning_rate": 9.873683623668972e-06, "loss": 0.0734, "step": 789 }, { "epoch": 0.359417652411283, "grad_norm": 0.9098464282834562, "learning_rate": 9.873364181376674e-06, "loss": 0.1273, "step": 790 }, { "epoch": 0.35987261146496813, "grad_norm": 0.725232432410699, "learning_rate": 9.873044340855062e-06, "loss": 0.0704, "step": 791 }, { "epoch": 0.36032757051865333, "grad_norm": 0.8275864687946802, "learning_rate": 9.872724102130273e-06, "loss": 0.0722, "step": 792 }, { "epoch": 0.3607825295723385, "grad_norm": 0.6908762665090429, "learning_rate": 9.872403465228476e-06, "loss": 0.068, "step": 793 }, { "epoch": 0.3612374886260237, "grad_norm": 0.8007479624540592, "learning_rate": 9.872082430175871e-06, "loss": 0.0792, "step": 794 }, { "epoch": 0.3616924476797088, "grad_norm": 0.7580697654486878, "learning_rate": 9.871760996998692e-06, "loss": 0.0662, "step": 795 }, { "epoch": 0.362147406733394, "grad_norm": 1.0378802589927232, "learning_rate": 9.871439165723207e-06, "loss": 0.0905, "step": 796 }, { "epoch": 0.36260236578707916, "grad_norm": 0.9366156924362913, "learning_rate": 9.87111693637571e-06, "loss": 0.0966, "step": 797 }, { "epoch": 0.3630573248407643, "grad_norm": 0.9568919919938076, "learning_rate": 9.870794308982536e-06, "loss": 0.1092, "step": 798 }, { "epoch": 0.3635122838944495, "grad_norm": 1.0303944561108107, "learning_rate": 9.870471283570046e-06, "loss": 0.1214, "step": 799 }, { "epoch": 0.36396724294813465, "grad_norm": 0.7123988620535131, "learning_rate": 9.870147860164639e-06, "loss": 0.0952, "step": 800 }, { "epoch": 0.36442220200181985, "grad_norm": 0.6461145025804255, "learning_rate": 9.86982403879274e-06, "loss": 0.0653, "step": 801 }, { "epoch": 0.364877161055505, "grad_norm": 0.761176238728339, "learning_rate": 9.869499819480815e-06, "loss": 0.0911, "step": 802 }, { "epoch": 0.3653321201091902, "grad_norm": 0.6778284620896282, "learning_rate": 9.869175202255354e-06, "loss": 0.0726, "step": 803 }, { "epoch": 0.36578707916287534, "grad_norm": 0.6378934869683002, "learning_rate": 9.868850187142885e-06, "loss": 0.0721, "step": 804 }, { "epoch": 0.3662420382165605, "grad_norm": 0.725078464245391, "learning_rate": 9.868524774169968e-06, "loss": 0.0774, "step": 805 }, { "epoch": 0.3666969972702457, "grad_norm": 0.7707907185217752, "learning_rate": 9.86819896336319e-06, "loss": 0.067, "step": 806 }, { "epoch": 0.36715195632393083, "grad_norm": 0.8162851407409059, "learning_rate": 9.867872754749178e-06, "loss": 0.0908, "step": 807 }, { "epoch": 0.36760691537761603, "grad_norm": 0.5330499489332517, "learning_rate": 9.867546148354586e-06, "loss": 0.066, "step": 808 }, { "epoch": 0.3680618744313012, "grad_norm": 0.6649993383235931, "learning_rate": 9.867219144206105e-06, "loss": 0.0672, "step": 809 }, { "epoch": 0.3685168334849864, "grad_norm": 0.9824606570699352, "learning_rate": 9.866891742330458e-06, "loss": 0.11, "step": 810 }, { "epoch": 0.3689717925386715, "grad_norm": 0.6507791006697302, "learning_rate": 9.866563942754394e-06, "loss": 0.0622, "step": 811 }, { "epoch": 0.36942675159235666, "grad_norm": 0.7455907568930894, "learning_rate": 9.866235745504705e-06, "loss": 0.0833, "step": 812 }, { "epoch": 0.36988171064604186, "grad_norm": 0.9927293122267482, "learning_rate": 9.865907150608203e-06, "loss": 0.0978, "step": 813 }, { "epoch": 0.370336669699727, "grad_norm": 0.817279180213694, "learning_rate": 9.865578158091746e-06, "loss": 0.1036, "step": 814 }, { "epoch": 0.3707916287534122, "grad_norm": 0.9966504261459711, "learning_rate": 9.865248767982211e-06, "loss": 0.1027, "step": 815 }, { "epoch": 0.37124658780709735, "grad_norm": 0.9561727776097537, "learning_rate": 9.864918980306521e-06, "loss": 0.1136, "step": 816 }, { "epoch": 0.37170154686078255, "grad_norm": 0.6718095123705313, "learning_rate": 9.86458879509162e-06, "loss": 0.0762, "step": 817 }, { "epoch": 0.3721565059144677, "grad_norm": 0.9803345299998187, "learning_rate": 9.864258212364492e-06, "loss": 0.0791, "step": 818 }, { "epoch": 0.37261146496815284, "grad_norm": 0.8058679812037255, "learning_rate": 9.86392723215215e-06, "loss": 0.069, "step": 819 }, { "epoch": 0.37306642402183804, "grad_norm": 0.5836160590759203, "learning_rate": 9.86359585448164e-06, "loss": 0.0621, "step": 820 }, { "epoch": 0.3735213830755232, "grad_norm": 0.6511599091669776, "learning_rate": 9.863264079380039e-06, "loss": 0.0745, "step": 821 }, { "epoch": 0.3739763421292084, "grad_norm": 0.9308266206126162, "learning_rate": 9.862931906874461e-06, "loss": 0.1132, "step": 822 }, { "epoch": 0.37443130118289353, "grad_norm": 0.613775373571284, "learning_rate": 9.862599336992048e-06, "loss": 0.0545, "step": 823 }, { "epoch": 0.37488626023657873, "grad_norm": 0.6991388893487894, "learning_rate": 9.862266369759976e-06, "loss": 0.0754, "step": 824 }, { "epoch": 0.37534121929026387, "grad_norm": 0.6352968005261165, "learning_rate": 9.861933005205454e-06, "loss": 0.0576, "step": 825 }, { "epoch": 0.37579617834394907, "grad_norm": 1.109194467922723, "learning_rate": 9.861599243355725e-06, "loss": 0.1281, "step": 826 }, { "epoch": 0.3762511373976342, "grad_norm": 0.9742134289860664, "learning_rate": 9.86126508423806e-06, "loss": 0.1067, "step": 827 }, { "epoch": 0.37670609645131936, "grad_norm": 0.6015820455914206, "learning_rate": 9.860930527879763e-06, "loss": 0.055, "step": 828 }, { "epoch": 0.37716105550500456, "grad_norm": 1.0894948091440197, "learning_rate": 9.860595574308179e-06, "loss": 0.1147, "step": 829 }, { "epoch": 0.3776160145586897, "grad_norm": 0.7023892750192133, "learning_rate": 9.860260223550672e-06, "loss": 0.0815, "step": 830 }, { "epoch": 0.3780709736123749, "grad_norm": 0.4943868719085533, "learning_rate": 9.859924475634649e-06, "loss": 0.0476, "step": 831 }, { "epoch": 0.37852593266606005, "grad_norm": 0.9974648765413693, "learning_rate": 9.859588330587545e-06, "loss": 0.1068, "step": 832 }, { "epoch": 0.37898089171974525, "grad_norm": 0.5960289391531881, "learning_rate": 9.859251788436829e-06, "loss": 0.0715, "step": 833 }, { "epoch": 0.3794358507734304, "grad_norm": 0.907079582974149, "learning_rate": 9.85891484921e-06, "loss": 0.0905, "step": 834 }, { "epoch": 0.37989080982711554, "grad_norm": 0.8133034306250352, "learning_rate": 9.858577512934592e-06, "loss": 0.1012, "step": 835 }, { "epoch": 0.38034576888080074, "grad_norm": 0.7828785203637737, "learning_rate": 9.858239779638173e-06, "loss": 0.0726, "step": 836 }, { "epoch": 0.3808007279344859, "grad_norm": 1.3138864597148558, "learning_rate": 9.857901649348338e-06, "loss": 0.1307, "step": 837 }, { "epoch": 0.3812556869881711, "grad_norm": 0.7000750227265026, "learning_rate": 9.857563122092717e-06, "loss": 0.0777, "step": 838 }, { "epoch": 0.3817106460418562, "grad_norm": 0.757283984575844, "learning_rate": 9.857224197898975e-06, "loss": 0.083, "step": 839 }, { "epoch": 0.3821656050955414, "grad_norm": 0.7113754486134378, "learning_rate": 9.856884876794805e-06, "loss": 0.0795, "step": 840 }, { "epoch": 0.38262056414922657, "grad_norm": 0.6891370217065743, "learning_rate": 9.856545158807938e-06, "loss": 0.0576, "step": 841 }, { "epoch": 0.3830755232029117, "grad_norm": 0.7230826558764609, "learning_rate": 9.856205043966134e-06, "loss": 0.0973, "step": 842 }, { "epoch": 0.3835304822565969, "grad_norm": 0.9951638416419379, "learning_rate": 9.855864532297181e-06, "loss": 0.1225, "step": 843 }, { "epoch": 0.38398544131028206, "grad_norm": 0.8272776971451865, "learning_rate": 9.85552362382891e-06, "loss": 0.0928, "step": 844 }, { "epoch": 0.38444040036396726, "grad_norm": 0.662562460388915, "learning_rate": 9.855182318589174e-06, "loss": 0.0711, "step": 845 }, { "epoch": 0.3848953594176524, "grad_norm": 1.185659176011977, "learning_rate": 9.854840616605866e-06, "loss": 0.0922, "step": 846 }, { "epoch": 0.3853503184713376, "grad_norm": 0.7002426118833048, "learning_rate": 9.854498517906908e-06, "loss": 0.0828, "step": 847 }, { "epoch": 0.38580527752502275, "grad_norm": 0.8957633348930525, "learning_rate": 9.854156022520252e-06, "loss": 0.0809, "step": 848 }, { "epoch": 0.3862602365787079, "grad_norm": 1.0593251614278854, "learning_rate": 9.853813130473887e-06, "loss": 0.1109, "step": 849 }, { "epoch": 0.3867151956323931, "grad_norm": 0.7751748709357449, "learning_rate": 9.853469841795832e-06, "loss": 0.0823, "step": 850 }, { "epoch": 0.38717015468607824, "grad_norm": 0.5943868690351954, "learning_rate": 9.853126156514142e-06, "loss": 0.0758, "step": 851 }, { "epoch": 0.38762511373976344, "grad_norm": 0.4901349757557767, "learning_rate": 9.852782074656897e-06, "loss": 0.064, "step": 852 }, { "epoch": 0.3880800727934486, "grad_norm": 0.7531191508768753, "learning_rate": 9.852437596252216e-06, "loss": 0.0824, "step": 853 }, { "epoch": 0.3885350318471338, "grad_norm": 0.7684236261792305, "learning_rate": 9.852092721328248e-06, "loss": 0.0674, "step": 854 }, { "epoch": 0.3889899909008189, "grad_norm": 0.8624513661560378, "learning_rate": 9.851747449913176e-06, "loss": 0.09, "step": 855 }, { "epoch": 0.38944494995450407, "grad_norm": 0.9125725996183891, "learning_rate": 9.851401782035213e-06, "loss": 0.129, "step": 856 }, { "epoch": 0.38989990900818927, "grad_norm": 0.7630714638300728, "learning_rate": 9.851055717722604e-06, "loss": 0.068, "step": 857 }, { "epoch": 0.3903548680618744, "grad_norm": 0.834756070401477, "learning_rate": 9.850709257003628e-06, "loss": 0.0831, "step": 858 }, { "epoch": 0.3908098271155596, "grad_norm": 0.9864776662717517, "learning_rate": 9.850362399906598e-06, "loss": 0.0904, "step": 859 }, { "epoch": 0.39126478616924476, "grad_norm": 0.6242730295284743, "learning_rate": 9.850015146459857e-06, "loss": 0.0754, "step": 860 }, { "epoch": 0.39171974522292996, "grad_norm": 0.838271649072902, "learning_rate": 9.84966749669178e-06, "loss": 0.0899, "step": 861 }, { "epoch": 0.3921747042766151, "grad_norm": 0.6826448278617049, "learning_rate": 9.849319450630777e-06, "loss": 0.0698, "step": 862 }, { "epoch": 0.39262966333030025, "grad_norm": 0.5533993282250775, "learning_rate": 9.848971008305288e-06, "loss": 0.0688, "step": 863 }, { "epoch": 0.39308462238398545, "grad_norm": 0.838673412156409, "learning_rate": 9.848622169743784e-06, "loss": 0.0815, "step": 864 }, { "epoch": 0.3935395814376706, "grad_norm": 0.9783580500729582, "learning_rate": 9.848272934974774e-06, "loss": 0.0745, "step": 865 }, { "epoch": 0.3939945404913558, "grad_norm": 0.5976030953641746, "learning_rate": 9.847923304026793e-06, "loss": 0.0664, "step": 866 }, { "epoch": 0.39444949954504094, "grad_norm": 0.6999143793652887, "learning_rate": 9.847573276928415e-06, "loss": 0.0804, "step": 867 }, { "epoch": 0.39490445859872614, "grad_norm": 0.6338725165728231, "learning_rate": 9.847222853708239e-06, "loss": 0.0655, "step": 868 }, { "epoch": 0.3953594176524113, "grad_norm": 0.7010627446349382, "learning_rate": 9.846872034394902e-06, "loss": 0.0667, "step": 869 }, { "epoch": 0.3958143767060964, "grad_norm": 0.6173227181881447, "learning_rate": 9.84652081901707e-06, "loss": 0.0674, "step": 870 }, { "epoch": 0.3962693357597816, "grad_norm": 0.9673042020268607, "learning_rate": 9.846169207603443e-06, "loss": 0.1267, "step": 871 }, { "epoch": 0.39672429481346677, "grad_norm": 0.6294912489479282, "learning_rate": 9.845817200182755e-06, "loss": 0.0588, "step": 872 }, { "epoch": 0.39717925386715197, "grad_norm": 0.8477152807126976, "learning_rate": 9.845464796783767e-06, "loss": 0.1219, "step": 873 }, { "epoch": 0.3976342129208371, "grad_norm": 0.5887483684825674, "learning_rate": 9.845111997435279e-06, "loss": 0.0731, "step": 874 }, { "epoch": 0.3980891719745223, "grad_norm": 0.5630369277247907, "learning_rate": 9.844758802166116e-06, "loss": 0.0579, "step": 875 }, { "epoch": 0.39854413102820746, "grad_norm": 0.6717541815357567, "learning_rate": 9.844405211005145e-06, "loss": 0.0711, "step": 876 }, { "epoch": 0.3989990900818926, "grad_norm": 0.6571828619535791, "learning_rate": 9.844051223981258e-06, "loss": 0.0638, "step": 877 }, { "epoch": 0.3994540491355778, "grad_norm": 0.6723710552364174, "learning_rate": 9.84369684112338e-06, "loss": 0.0676, "step": 878 }, { "epoch": 0.39990900818926295, "grad_norm": 0.7014173744195523, "learning_rate": 9.84334206246047e-06, "loss": 0.0751, "step": 879 }, { "epoch": 0.40036396724294815, "grad_norm": 0.7999660318519703, "learning_rate": 9.842986888021518e-06, "loss": 0.0895, "step": 880 }, { "epoch": 0.4008189262966333, "grad_norm": 0.5578605501955606, "learning_rate": 9.842631317835548e-06, "loss": 0.0637, "step": 881 }, { "epoch": 0.4012738853503185, "grad_norm": 0.6615256090849237, "learning_rate": 9.842275351931617e-06, "loss": 0.0664, "step": 882 }, { "epoch": 0.40172884440400364, "grad_norm": 0.5263094198672195, "learning_rate": 9.841918990338812e-06, "loss": 0.0611, "step": 883 }, { "epoch": 0.4021838034576888, "grad_norm": 0.8080883575450535, "learning_rate": 9.841562233086252e-06, "loss": 0.0912, "step": 884 }, { "epoch": 0.402638762511374, "grad_norm": 0.6655757939327012, "learning_rate": 9.841205080203092e-06, "loss": 0.0601, "step": 885 }, { "epoch": 0.4030937215650591, "grad_norm": 0.8701903481119097, "learning_rate": 9.840847531718515e-06, "loss": 0.0914, "step": 886 }, { "epoch": 0.4035486806187443, "grad_norm": 0.7730206436987713, "learning_rate": 9.840489587661738e-06, "loss": 0.0747, "step": 887 }, { "epoch": 0.40400363967242947, "grad_norm": 0.7410839527981146, "learning_rate": 9.840131248062012e-06, "loss": 0.079, "step": 888 }, { "epoch": 0.40445859872611467, "grad_norm": 0.627620281196765, "learning_rate": 9.839772512948618e-06, "loss": 0.0715, "step": 889 }, { "epoch": 0.4049135577797998, "grad_norm": 0.8746014124114054, "learning_rate": 9.83941338235087e-06, "loss": 0.0824, "step": 890 }, { "epoch": 0.40536851683348496, "grad_norm": 1.0112737589697485, "learning_rate": 9.839053856298116e-06, "loss": 0.1251, "step": 891 }, { "epoch": 0.40582347588717016, "grad_norm": 0.72216805525771, "learning_rate": 9.838693934819734e-06, "loss": 0.0893, "step": 892 }, { "epoch": 0.4062784349408553, "grad_norm": 0.7544949830136005, "learning_rate": 9.838333617945134e-06, "loss": 0.0968, "step": 893 }, { "epoch": 0.4067333939945405, "grad_norm": 0.9543024355165705, "learning_rate": 9.837972905703762e-06, "loss": 0.102, "step": 894 }, { "epoch": 0.40718835304822565, "grad_norm": 1.02061795078975, "learning_rate": 9.83761179812509e-06, "loss": 0.0649, "step": 895 }, { "epoch": 0.40764331210191085, "grad_norm": 0.39738812842187227, "learning_rate": 9.837250295238629e-06, "loss": 0.0428, "step": 896 }, { "epoch": 0.408098271155596, "grad_norm": 0.8873895570319217, "learning_rate": 9.836888397073919e-06, "loss": 0.1068, "step": 897 }, { "epoch": 0.40855323020928114, "grad_norm": 0.7492126364897504, "learning_rate": 9.836526103660533e-06, "loss": 0.0953, "step": 898 }, { "epoch": 0.40900818926296634, "grad_norm": 0.821575499525911, "learning_rate": 9.836163415028075e-06, "loss": 0.0712, "step": 899 }, { "epoch": 0.4094631483166515, "grad_norm": 1.0052579979241618, "learning_rate": 9.835800331206183e-06, "loss": 0.1138, "step": 900 }, { "epoch": 0.4099181073703367, "grad_norm": 0.7848465428804848, "learning_rate": 9.835436852224525e-06, "loss": 0.0978, "step": 901 }, { "epoch": 0.4103730664240218, "grad_norm": 0.9719856735481065, "learning_rate": 9.835072978112804e-06, "loss": 0.0846, "step": 902 }, { "epoch": 0.410828025477707, "grad_norm": 0.6607308818506346, "learning_rate": 9.834708708900755e-06, "loss": 0.0654, "step": 903 }, { "epoch": 0.41128298453139217, "grad_norm": 0.5191597312034261, "learning_rate": 9.834344044618144e-06, "loss": 0.0518, "step": 904 }, { "epoch": 0.41173794358507737, "grad_norm": 0.5336391872354229, "learning_rate": 9.83397898529477e-06, "loss": 0.0535, "step": 905 }, { "epoch": 0.4121929026387625, "grad_norm": 0.5687342550017563, "learning_rate": 9.833613530960462e-06, "loss": 0.0578, "step": 906 }, { "epoch": 0.41264786169244766, "grad_norm": 0.8793783198642894, "learning_rate": 9.833247681645083e-06, "loss": 0.1286, "step": 907 }, { "epoch": 0.41310282074613286, "grad_norm": 0.8073005899800644, "learning_rate": 9.832881437378534e-06, "loss": 0.0853, "step": 908 }, { "epoch": 0.413557779799818, "grad_norm": 0.511699500000588, "learning_rate": 9.832514798190738e-06, "loss": 0.0504, "step": 909 }, { "epoch": 0.4140127388535032, "grad_norm": 0.5082793074725768, "learning_rate": 9.832147764111655e-06, "loss": 0.056, "step": 910 }, { "epoch": 0.41446769790718835, "grad_norm": 0.9876041013395295, "learning_rate": 9.83178033517128e-06, "loss": 0.0984, "step": 911 }, { "epoch": 0.41492265696087355, "grad_norm": 0.7511273129930924, "learning_rate": 9.831412511399633e-06, "loss": 0.0969, "step": 912 }, { "epoch": 0.4153776160145587, "grad_norm": 1.0144870263760433, "learning_rate": 9.831044292826778e-06, "loss": 0.1482, "step": 913 }, { "epoch": 0.41583257506824384, "grad_norm": 0.70444400073401, "learning_rate": 9.830675679482797e-06, "loss": 0.0802, "step": 914 }, { "epoch": 0.41628753412192904, "grad_norm": 1.0357251397748677, "learning_rate": 9.830306671397816e-06, "loss": 0.1061, "step": 915 }, { "epoch": 0.4167424931756142, "grad_norm": 0.895894802940119, "learning_rate": 9.829937268601988e-06, "loss": 0.1005, "step": 916 }, { "epoch": 0.4171974522292994, "grad_norm": 0.6004589977630954, "learning_rate": 9.829567471125497e-06, "loss": 0.0664, "step": 917 }, { "epoch": 0.4176524112829845, "grad_norm": 0.6058859475834909, "learning_rate": 9.829197278998562e-06, "loss": 0.0728, "step": 918 }, { "epoch": 0.4181073703366697, "grad_norm": 0.5886912548442098, "learning_rate": 9.828826692251435e-06, "loss": 0.074, "step": 919 }, { "epoch": 0.41856232939035487, "grad_norm": 0.5982473215332103, "learning_rate": 9.828455710914398e-06, "loss": 0.0653, "step": 920 }, { "epoch": 0.41901728844404, "grad_norm": 0.8647804622811079, "learning_rate": 9.828084335017763e-06, "loss": 0.0741, "step": 921 }, { "epoch": 0.4194722474977252, "grad_norm": 0.653767178815679, "learning_rate": 9.827712564591883e-06, "loss": 0.0604, "step": 922 }, { "epoch": 0.41992720655141036, "grad_norm": 0.7812500085225947, "learning_rate": 9.827340399667132e-06, "loss": 0.0708, "step": 923 }, { "epoch": 0.42038216560509556, "grad_norm": 0.7314008563711142, "learning_rate": 9.826967840273921e-06, "loss": 0.0721, "step": 924 }, { "epoch": 0.4208371246587807, "grad_norm": 0.8727413076803472, "learning_rate": 9.8265948864427e-06, "loss": 0.0892, "step": 925 }, { "epoch": 0.4212920837124659, "grad_norm": 0.6051379056710864, "learning_rate": 9.826221538203942e-06, "loss": 0.0685, "step": 926 }, { "epoch": 0.42174704276615105, "grad_norm": 0.7279887191787228, "learning_rate": 9.825847795588154e-06, "loss": 0.0766, "step": 927 }, { "epoch": 0.4222020018198362, "grad_norm": 0.7126811268305303, "learning_rate": 9.825473658625876e-06, "loss": 0.0821, "step": 928 }, { "epoch": 0.4226569608735214, "grad_norm": 0.8812960827967533, "learning_rate": 9.825099127347684e-06, "loss": 0.0982, "step": 929 }, { "epoch": 0.42311191992720654, "grad_norm": 0.7462955906438729, "learning_rate": 9.824724201784182e-06, "loss": 0.1073, "step": 930 }, { "epoch": 0.42356687898089174, "grad_norm": 0.5448066050338419, "learning_rate": 9.824348881966004e-06, "loss": 0.0637, "step": 931 }, { "epoch": 0.4240218380345769, "grad_norm": 0.7750150802923693, "learning_rate": 9.823973167923823e-06, "loss": 0.09, "step": 932 }, { "epoch": 0.4244767970882621, "grad_norm": 0.8695175796556455, "learning_rate": 9.82359705968834e-06, "loss": 0.0857, "step": 933 }, { "epoch": 0.4249317561419472, "grad_norm": 0.653112477618241, "learning_rate": 9.823220557290289e-06, "loss": 0.0722, "step": 934 }, { "epoch": 0.42538671519563237, "grad_norm": 0.7764742726938813, "learning_rate": 9.822843660760434e-06, "loss": 0.0582, "step": 935 }, { "epoch": 0.42584167424931757, "grad_norm": 0.8338160462571067, "learning_rate": 9.822466370129576e-06, "loss": 0.0993, "step": 936 }, { "epoch": 0.4262966333030027, "grad_norm": 0.7416650975880095, "learning_rate": 9.822088685428543e-06, "loss": 0.0782, "step": 937 }, { "epoch": 0.4267515923566879, "grad_norm": 0.5969422348364739, "learning_rate": 9.821710606688199e-06, "loss": 0.0546, "step": 938 }, { "epoch": 0.42720655141037306, "grad_norm": 0.6235404067325917, "learning_rate": 9.82133213393944e-06, "loss": 0.0638, "step": 939 }, { "epoch": 0.42766151046405826, "grad_norm": 0.7910461101358781, "learning_rate": 9.820953267213194e-06, "loss": 0.0775, "step": 940 }, { "epoch": 0.4281164695177434, "grad_norm": 0.692978452923811, "learning_rate": 9.820574006540415e-06, "loss": 0.053, "step": 941 }, { "epoch": 0.42857142857142855, "grad_norm": 0.7310389759017597, "learning_rate": 9.820194351952098e-06, "loss": 0.0716, "step": 942 }, { "epoch": 0.42902638762511375, "grad_norm": 0.6553331509390902, "learning_rate": 9.819814303479268e-06, "loss": 0.0612, "step": 943 }, { "epoch": 0.4294813466787989, "grad_norm": 1.1310076957610966, "learning_rate": 9.819433861152978e-06, "loss": 0.1116, "step": 944 }, { "epoch": 0.4299363057324841, "grad_norm": 0.6933766894953944, "learning_rate": 9.819053025004316e-06, "loss": 0.0932, "step": 945 }, { "epoch": 0.43039126478616924, "grad_norm": 0.7823571557493696, "learning_rate": 9.818671795064405e-06, "loss": 0.0847, "step": 946 }, { "epoch": 0.43084622383985444, "grad_norm": 0.8000794358590197, "learning_rate": 9.818290171364396e-06, "loss": 0.0916, "step": 947 }, { "epoch": 0.4313011828935396, "grad_norm": 0.6207042654318157, "learning_rate": 9.817908153935473e-06, "loss": 0.0568, "step": 948 }, { "epoch": 0.4317561419472247, "grad_norm": 0.7957970680354334, "learning_rate": 9.817525742808854e-06, "loss": 0.1203, "step": 949 }, { "epoch": 0.4322111010009099, "grad_norm": 0.6607960765057979, "learning_rate": 9.817142938015786e-06, "loss": 0.069, "step": 950 }, { "epoch": 0.43266606005459507, "grad_norm": 0.8132102265727185, "learning_rate": 9.816759739587552e-06, "loss": 0.0821, "step": 951 }, { "epoch": 0.43312101910828027, "grad_norm": 0.6410149691778323, "learning_rate": 9.816376147555464e-06, "loss": 0.0612, "step": 952 }, { "epoch": 0.4335759781619654, "grad_norm": 1.0196998859089288, "learning_rate": 9.815992161950867e-06, "loss": 0.1183, "step": 953 }, { "epoch": 0.4340309372156506, "grad_norm": 0.5899375116434804, "learning_rate": 9.81560778280514e-06, "loss": 0.0604, "step": 954 }, { "epoch": 0.43448589626933576, "grad_norm": 1.0046158107797931, "learning_rate": 9.815223010149693e-06, "loss": 0.0876, "step": 955 }, { "epoch": 0.4349408553230209, "grad_norm": 0.7980339738331416, "learning_rate": 9.814837844015966e-06, "loss": 0.0894, "step": 956 }, { "epoch": 0.4353958143767061, "grad_norm": 0.6974524248281853, "learning_rate": 9.814452284435433e-06, "loss": 0.0741, "step": 957 }, { "epoch": 0.43585077343039125, "grad_norm": 0.7679692797858835, "learning_rate": 9.814066331439603e-06, "loss": 0.0796, "step": 958 }, { "epoch": 0.43630573248407645, "grad_norm": 0.8183774417740679, "learning_rate": 9.813679985060012e-06, "loss": 0.0963, "step": 959 }, { "epoch": 0.4367606915377616, "grad_norm": 0.7950656053104391, "learning_rate": 9.81329324532823e-06, "loss": 0.0837, "step": 960 }, { "epoch": 0.4372156505914468, "grad_norm": 0.6056809369995887, "learning_rate": 9.812906112275862e-06, "loss": 0.0465, "step": 961 }, { "epoch": 0.43767060964513194, "grad_norm": 1.0980359635620318, "learning_rate": 9.81251858593454e-06, "loss": 0.1206, "step": 962 }, { "epoch": 0.4381255686988171, "grad_norm": 0.6123483237764059, "learning_rate": 9.812130666335933e-06, "loss": 0.08, "step": 963 }, { "epoch": 0.4385805277525023, "grad_norm": 0.8151730014839008, "learning_rate": 9.81174235351174e-06, "loss": 0.0983, "step": 964 }, { "epoch": 0.4390354868061874, "grad_norm": 0.7143828681073273, "learning_rate": 9.811353647493691e-06, "loss": 0.0809, "step": 965 }, { "epoch": 0.4394904458598726, "grad_norm": 0.5647036962239634, "learning_rate": 9.810964548313549e-06, "loss": 0.0581, "step": 966 }, { "epoch": 0.43994540491355777, "grad_norm": 0.7594400506736699, "learning_rate": 9.81057505600311e-06, "loss": 0.078, "step": 967 }, { "epoch": 0.44040036396724297, "grad_norm": 0.6515426202345832, "learning_rate": 9.810185170594205e-06, "loss": 0.0688, "step": 968 }, { "epoch": 0.4408553230209281, "grad_norm": 0.8798906332352223, "learning_rate": 9.809794892118687e-06, "loss": 0.0915, "step": 969 }, { "epoch": 0.44131028207461326, "grad_norm": 0.7350866900672135, "learning_rate": 9.809404220608451e-06, "loss": 0.0671, "step": 970 }, { "epoch": 0.44176524112829846, "grad_norm": 0.7216847217866104, "learning_rate": 9.809013156095424e-06, "loss": 0.0726, "step": 971 }, { "epoch": 0.4422202001819836, "grad_norm": 0.8179702740752783, "learning_rate": 9.808621698611557e-06, "loss": 0.0758, "step": 972 }, { "epoch": 0.4426751592356688, "grad_norm": 0.5533105745807706, "learning_rate": 9.808229848188842e-06, "loss": 0.0528, "step": 973 }, { "epoch": 0.44313011828935395, "grad_norm": 0.7503486538749657, "learning_rate": 9.807837604859296e-06, "loss": 0.0878, "step": 974 }, { "epoch": 0.44358507734303915, "grad_norm": 0.40510949005498975, "learning_rate": 9.807444968654975e-06, "loss": 0.0424, "step": 975 }, { "epoch": 0.4440400363967243, "grad_norm": 0.8540666353042626, "learning_rate": 9.807051939607959e-06, "loss": 0.1108, "step": 976 }, { "epoch": 0.44449499545040944, "grad_norm": 0.7543284179304937, "learning_rate": 9.806658517750369e-06, "loss": 0.0719, "step": 977 }, { "epoch": 0.44494995450409464, "grad_norm": 0.6982493359241757, "learning_rate": 9.80626470311435e-06, "loss": 0.0777, "step": 978 }, { "epoch": 0.4454049135577798, "grad_norm": 0.7275511253894157, "learning_rate": 9.805870495732085e-06, "loss": 0.0693, "step": 979 }, { "epoch": 0.445859872611465, "grad_norm": 0.8647890459895436, "learning_rate": 9.805475895635787e-06, "loss": 0.0882, "step": 980 }, { "epoch": 0.4463148316651501, "grad_norm": 0.757804762973183, "learning_rate": 9.8050809028577e-06, "loss": 0.0724, "step": 981 }, { "epoch": 0.4467697907188353, "grad_norm": 0.7515219153063712, "learning_rate": 9.8046855174301e-06, "loss": 0.0659, "step": 982 }, { "epoch": 0.44722474977252047, "grad_norm": 1.0502681583017184, "learning_rate": 9.804289739385297e-06, "loss": 0.1207, "step": 983 }, { "epoch": 0.44767970882620567, "grad_norm": 0.5780062486364612, "learning_rate": 9.803893568755633e-06, "loss": 0.0772, "step": 984 }, { "epoch": 0.4481346678798908, "grad_norm": 0.5515644567052078, "learning_rate": 9.80349700557348e-06, "loss": 0.0628, "step": 985 }, { "epoch": 0.44858962693357596, "grad_norm": 0.6432677095504179, "learning_rate": 9.803100049871246e-06, "loss": 0.0817, "step": 986 }, { "epoch": 0.44904458598726116, "grad_norm": 0.5424958391196154, "learning_rate": 9.802702701681366e-06, "loss": 0.0649, "step": 987 }, { "epoch": 0.4494995450409463, "grad_norm": 0.6556126282036931, "learning_rate": 9.80230496103631e-06, "loss": 0.0579, "step": 988 }, { "epoch": 0.4499545040946315, "grad_norm": 0.5632646083130022, "learning_rate": 9.801906827968578e-06, "loss": 0.0591, "step": 989 }, { "epoch": 0.45040946314831665, "grad_norm": 1.0464719217252296, "learning_rate": 9.801508302510707e-06, "loss": 0.124, "step": 990 }, { "epoch": 0.45086442220200185, "grad_norm": 0.7231067459050019, "learning_rate": 9.801109384695261e-06, "loss": 0.0631, "step": 991 }, { "epoch": 0.451319381255687, "grad_norm": 0.775594128230074, "learning_rate": 9.800710074554837e-06, "loss": 0.0924, "step": 992 }, { "epoch": 0.45177434030937214, "grad_norm": 0.6340180385643369, "learning_rate": 9.800310372122066e-06, "loss": 0.068, "step": 993 }, { "epoch": 0.45222929936305734, "grad_norm": 0.9703750136380557, "learning_rate": 9.799910277429609e-06, "loss": 0.0902, "step": 994 }, { "epoch": 0.4526842584167425, "grad_norm": 0.5881925827197537, "learning_rate": 9.79950979051016e-06, "loss": 0.0662, "step": 995 }, { "epoch": 0.4531392174704277, "grad_norm": 0.7583235380843109, "learning_rate": 9.799108911396446e-06, "loss": 0.0755, "step": 996 }, { "epoch": 0.4535941765241128, "grad_norm": 0.6585135755735663, "learning_rate": 9.798707640121224e-06, "loss": 0.0669, "step": 997 }, { "epoch": 0.454049135577798, "grad_norm": 0.9344579240939844, "learning_rate": 9.798305976717286e-06, "loss": 0.1028, "step": 998 }, { "epoch": 0.45450409463148317, "grad_norm": 0.6238360425747993, "learning_rate": 9.79790392121745e-06, "loss": 0.0608, "step": 999 }, { "epoch": 0.4549590536851683, "grad_norm": 0.715680092291253, "learning_rate": 9.797501473654573e-06, "loss": 0.0792, "step": 1000 }, { "epoch": 0.4554140127388535, "grad_norm": 0.8167758856821831, "learning_rate": 9.797098634061543e-06, "loss": 0.0948, "step": 1001 }, { "epoch": 0.45586897179253866, "grad_norm": 0.8318764431867516, "learning_rate": 9.796695402471275e-06, "loss": 0.0967, "step": 1002 }, { "epoch": 0.45632393084622386, "grad_norm": 0.9700547030363569, "learning_rate": 9.79629177891672e-06, "loss": 0.1138, "step": 1003 }, { "epoch": 0.456778889899909, "grad_norm": 0.7702596501705347, "learning_rate": 9.79588776343086e-06, "loss": 0.0826, "step": 1004 }, { "epoch": 0.4572338489535942, "grad_norm": 0.833778163717652, "learning_rate": 9.795483356046711e-06, "loss": 0.0927, "step": 1005 }, { "epoch": 0.45768880800727935, "grad_norm": 0.7006737675801851, "learning_rate": 9.795078556797318e-06, "loss": 0.0747, "step": 1006 }, { "epoch": 0.4581437670609645, "grad_norm": 0.8810114143185821, "learning_rate": 9.794673365715761e-06, "loss": 0.0921, "step": 1007 }, { "epoch": 0.4585987261146497, "grad_norm": 0.7286145380478113, "learning_rate": 9.794267782835148e-06, "loss": 0.0832, "step": 1008 }, { "epoch": 0.45905368516833484, "grad_norm": 0.8181887559127218, "learning_rate": 9.793861808188622e-06, "loss": 0.0729, "step": 1009 }, { "epoch": 0.45950864422202004, "grad_norm": 1.0821839097582124, "learning_rate": 9.793455441809359e-06, "loss": 0.1025, "step": 1010 }, { "epoch": 0.4599636032757052, "grad_norm": 0.515896949523265, "learning_rate": 9.793048683730564e-06, "loss": 0.0512, "step": 1011 }, { "epoch": 0.4604185623293904, "grad_norm": 0.7800604571516774, "learning_rate": 9.792641533985474e-06, "loss": 0.1065, "step": 1012 }, { "epoch": 0.4608735213830755, "grad_norm": 0.48365424866268936, "learning_rate": 9.792233992607365e-06, "loss": 0.0622, "step": 1013 }, { "epoch": 0.46132848043676067, "grad_norm": 0.8472876133123602, "learning_rate": 9.791826059629532e-06, "loss": 0.0713, "step": 1014 }, { "epoch": 0.46178343949044587, "grad_norm": 0.935522534168844, "learning_rate": 9.791417735085316e-06, "loss": 0.0853, "step": 1015 }, { "epoch": 0.462238398544131, "grad_norm": 0.8028819334602026, "learning_rate": 9.791009019008078e-06, "loss": 0.0795, "step": 1016 }, { "epoch": 0.4626933575978162, "grad_norm": 0.6458928385673616, "learning_rate": 9.79059991143122e-06, "loss": 0.0836, "step": 1017 }, { "epoch": 0.46314831665150136, "grad_norm": 0.8309912415690437, "learning_rate": 9.790190412388173e-06, "loss": 0.0895, "step": 1018 }, { "epoch": 0.46360327570518656, "grad_norm": 0.6953691809158898, "learning_rate": 9.789780521912396e-06, "loss": 0.0686, "step": 1019 }, { "epoch": 0.4640582347588717, "grad_norm": 0.7563151979586233, "learning_rate": 9.789370240037385e-06, "loss": 0.0879, "step": 1020 }, { "epoch": 0.46451319381255685, "grad_norm": 0.6646619102460968, "learning_rate": 9.788959566796667e-06, "loss": 0.0761, "step": 1021 }, { "epoch": 0.46496815286624205, "grad_norm": 0.8092527562913561, "learning_rate": 9.788548502223801e-06, "loss": 0.0863, "step": 1022 }, { "epoch": 0.4654231119199272, "grad_norm": 2.0284506817542396, "learning_rate": 9.788137046352374e-06, "loss": 0.2011, "step": 1023 }, { "epoch": 0.4658780709736124, "grad_norm": 0.6524644993097855, "learning_rate": 9.787725199216011e-06, "loss": 0.0765, "step": 1024 }, { "epoch": 0.46633303002729753, "grad_norm": 0.48134373932870766, "learning_rate": 9.787312960848368e-06, "loss": 0.0505, "step": 1025 }, { "epoch": 0.46678798908098273, "grad_norm": 0.6646547386252114, "learning_rate": 9.786900331283128e-06, "loss": 0.0825, "step": 1026 }, { "epoch": 0.4672429481346679, "grad_norm": 0.5655812014606527, "learning_rate": 9.78648731055401e-06, "loss": 0.0659, "step": 1027 }, { "epoch": 0.467697907188353, "grad_norm": 0.680196435092224, "learning_rate": 9.786073898694766e-06, "loss": 0.0734, "step": 1028 }, { "epoch": 0.4681528662420382, "grad_norm": 0.6198434008496165, "learning_rate": 9.785660095739176e-06, "loss": 0.0687, "step": 1029 }, { "epoch": 0.46860782529572337, "grad_norm": 0.5967309034966486, "learning_rate": 9.785245901721054e-06, "loss": 0.0443, "step": 1030 }, { "epoch": 0.46906278434940857, "grad_norm": 0.588565790719301, "learning_rate": 9.784831316674246e-06, "loss": 0.0741, "step": 1031 }, { "epoch": 0.4695177434030937, "grad_norm": 0.6384508627867143, "learning_rate": 9.784416340632634e-06, "loss": 0.0639, "step": 1032 }, { "epoch": 0.4699727024567789, "grad_norm": 0.528980291125106, "learning_rate": 9.784000973630124e-06, "loss": 0.0506, "step": 1033 }, { "epoch": 0.47042766151046406, "grad_norm": 0.6297922247581061, "learning_rate": 9.783585215700656e-06, "loss": 0.0704, "step": 1034 }, { "epoch": 0.4708826205641492, "grad_norm": 1.1014615381108162, "learning_rate": 9.783169066878208e-06, "loss": 0.1063, "step": 1035 }, { "epoch": 0.4713375796178344, "grad_norm": 0.7370811970547196, "learning_rate": 9.782752527196785e-06, "loss": 0.0888, "step": 1036 }, { "epoch": 0.47179253867151955, "grad_norm": 0.6272964856361817, "learning_rate": 9.782335596690425e-06, "loss": 0.0683, "step": 1037 }, { "epoch": 0.47224749772520475, "grad_norm": 0.9675945822898259, "learning_rate": 9.781918275393196e-06, "loss": 0.1031, "step": 1038 }, { "epoch": 0.4727024567788899, "grad_norm": 0.8448129794628584, "learning_rate": 9.781500563339202e-06, "loss": 0.0818, "step": 1039 }, { "epoch": 0.4731574158325751, "grad_norm": 0.5148120993988892, "learning_rate": 9.781082460562574e-06, "loss": 0.0525, "step": 1040 }, { "epoch": 0.47361237488626023, "grad_norm": 0.7767251927940846, "learning_rate": 9.780663967097477e-06, "loss": 0.0869, "step": 1041 }, { "epoch": 0.4740673339399454, "grad_norm": 0.9661754574144388, "learning_rate": 9.780245082978112e-06, "loss": 0.0923, "step": 1042 }, { "epoch": 0.4745222929936306, "grad_norm": 0.780061387882855, "learning_rate": 9.779825808238705e-06, "loss": 0.095, "step": 1043 }, { "epoch": 0.4749772520473157, "grad_norm": 0.8513172657519864, "learning_rate": 9.77940614291352e-06, "loss": 0.0772, "step": 1044 }, { "epoch": 0.4754322111010009, "grad_norm": 0.6199453465731616, "learning_rate": 9.778986087036846e-06, "loss": 0.0701, "step": 1045 }, { "epoch": 0.47588717015468607, "grad_norm": 0.5327629714743946, "learning_rate": 9.778565640643011e-06, "loss": 0.0447, "step": 1046 }, { "epoch": 0.47634212920837127, "grad_norm": 0.8882337205809296, "learning_rate": 9.778144803766375e-06, "loss": 0.0788, "step": 1047 }, { "epoch": 0.4767970882620564, "grad_norm": 0.6023343672839219, "learning_rate": 9.77772357644132e-06, "loss": 0.0693, "step": 1048 }, { "epoch": 0.47725204731574156, "grad_norm": 0.8031515985448552, "learning_rate": 9.777301958702273e-06, "loss": 0.0911, "step": 1049 }, { "epoch": 0.47770700636942676, "grad_norm": 0.8695877166802147, "learning_rate": 9.776879950583683e-06, "loss": 0.12, "step": 1050 }, { "epoch": 0.4781619654231119, "grad_norm": 0.6077253389668626, "learning_rate": 9.776457552120034e-06, "loss": 0.0722, "step": 1051 }, { "epoch": 0.4786169244767971, "grad_norm": 0.7976020915977983, "learning_rate": 9.776034763345845e-06, "loss": 0.0783, "step": 1052 }, { "epoch": 0.47907188353048225, "grad_norm": 0.7091049596783572, "learning_rate": 9.775611584295663e-06, "loss": 0.0739, "step": 1053 }, { "epoch": 0.47952684258416745, "grad_norm": 0.7919907245184465, "learning_rate": 9.775188015004072e-06, "loss": 0.0728, "step": 1054 }, { "epoch": 0.4799818016378526, "grad_norm": 0.9227645018819045, "learning_rate": 9.774764055505676e-06, "loss": 0.0905, "step": 1055 }, { "epoch": 0.48043676069153773, "grad_norm": 0.7130315690029604, "learning_rate": 9.774339705835127e-06, "loss": 0.09, "step": 1056 }, { "epoch": 0.48089171974522293, "grad_norm": 0.7993270676292756, "learning_rate": 9.773914966027098e-06, "loss": 0.1011, "step": 1057 }, { "epoch": 0.4813466787989081, "grad_norm": 0.8955668988276211, "learning_rate": 9.773489836116297e-06, "loss": 0.0963, "step": 1058 }, { "epoch": 0.4818016378525933, "grad_norm": 0.7582155580680914, "learning_rate": 9.773064316137464e-06, "loss": 0.0766, "step": 1059 }, { "epoch": 0.4822565969062784, "grad_norm": 0.6939955066308027, "learning_rate": 9.772638406125367e-06, "loss": 0.0687, "step": 1060 }, { "epoch": 0.4827115559599636, "grad_norm": 0.8091635860789653, "learning_rate": 9.772212106114816e-06, "loss": 0.0754, "step": 1061 }, { "epoch": 0.48316651501364877, "grad_norm": 0.8236012040739623, "learning_rate": 9.77178541614064e-06, "loss": 0.0951, "step": 1062 }, { "epoch": 0.48362147406733397, "grad_norm": 0.6622501946117725, "learning_rate": 9.77135833623771e-06, "loss": 0.083, "step": 1063 }, { "epoch": 0.4840764331210191, "grad_norm": 0.8689743387052602, "learning_rate": 9.770930866440927e-06, "loss": 0.1074, "step": 1064 }, { "epoch": 0.48453139217470426, "grad_norm": 0.6733750246744147, "learning_rate": 9.770503006785214e-06, "loss": 0.0639, "step": 1065 }, { "epoch": 0.48498635122838946, "grad_norm": 0.9485233745498586, "learning_rate": 9.770074757305541e-06, "loss": 0.1106, "step": 1066 }, { "epoch": 0.4854413102820746, "grad_norm": 0.8288392949652397, "learning_rate": 9.769646118036902e-06, "loss": 0.0661, "step": 1067 }, { "epoch": 0.4858962693357598, "grad_norm": 0.7475423805914638, "learning_rate": 9.76921708901432e-06, "loss": 0.0686, "step": 1068 }, { "epoch": 0.48635122838944495, "grad_norm": 0.54120364671088, "learning_rate": 9.768787670272855e-06, "loss": 0.0629, "step": 1069 }, { "epoch": 0.48680618744313015, "grad_norm": 0.7281619635509152, "learning_rate": 9.768357861847598e-06, "loss": 0.0723, "step": 1070 }, { "epoch": 0.4872611464968153, "grad_norm": 0.8883321717067604, "learning_rate": 9.767927663773668e-06, "loss": 0.0832, "step": 1071 }, { "epoch": 0.48771610555050043, "grad_norm": 0.7681469789077073, "learning_rate": 9.767497076086223e-06, "loss": 0.0786, "step": 1072 }, { "epoch": 0.48817106460418563, "grad_norm": 0.6590861395931087, "learning_rate": 9.767066098820446e-06, "loss": 0.0704, "step": 1073 }, { "epoch": 0.4886260236578708, "grad_norm": 0.7944203702948146, "learning_rate": 9.766634732011557e-06, "loss": 0.0867, "step": 1074 }, { "epoch": 0.489080982711556, "grad_norm": 0.7832480468570255, "learning_rate": 9.766202975694801e-06, "loss": 0.0873, "step": 1075 }, { "epoch": 0.4895359417652411, "grad_norm": 0.7232266679451883, "learning_rate": 9.765770829905464e-06, "loss": 0.0785, "step": 1076 }, { "epoch": 0.4899909008189263, "grad_norm": 0.5406798309730716, "learning_rate": 9.765338294678856e-06, "loss": 0.0469, "step": 1077 }, { "epoch": 0.49044585987261147, "grad_norm": 0.5866548164219128, "learning_rate": 9.764905370050321e-06, "loss": 0.0524, "step": 1078 }, { "epoch": 0.4909008189262966, "grad_norm": 0.9915720236606885, "learning_rate": 9.76447205605524e-06, "loss": 0.1019, "step": 1079 }, { "epoch": 0.4913557779799818, "grad_norm": 0.6838845303274752, "learning_rate": 9.764038352729018e-06, "loss": 0.0891, "step": 1080 }, { "epoch": 0.49181073703366696, "grad_norm": 0.9385660559352969, "learning_rate": 9.763604260107096e-06, "loss": 0.1058, "step": 1081 }, { "epoch": 0.49226569608735216, "grad_norm": 0.6710872617569944, "learning_rate": 9.763169778224946e-06, "loss": 0.0665, "step": 1082 }, { "epoch": 0.4927206551410373, "grad_norm": 0.7878885609137168, "learning_rate": 9.762734907118072e-06, "loss": 0.0876, "step": 1083 }, { "epoch": 0.4931756141947225, "grad_norm": 0.6302166766090778, "learning_rate": 9.76229964682201e-06, "loss": 0.0507, "step": 1084 }, { "epoch": 0.49363057324840764, "grad_norm": 0.5833462678864086, "learning_rate": 9.761863997372325e-06, "loss": 0.0612, "step": 1085 }, { "epoch": 0.4940855323020928, "grad_norm": 1.036522158484448, "learning_rate": 9.761427958804621e-06, "loss": 0.1395, "step": 1086 }, { "epoch": 0.494540491355778, "grad_norm": 1.1502320115946314, "learning_rate": 9.760991531154526e-06, "loss": 0.1149, "step": 1087 }, { "epoch": 0.49499545040946313, "grad_norm": 0.7616054217825209, "learning_rate": 9.760554714457704e-06, "loss": 0.0684, "step": 1088 }, { "epoch": 0.49545040946314833, "grad_norm": 0.5129309167340426, "learning_rate": 9.760117508749846e-06, "loss": 0.0614, "step": 1089 }, { "epoch": 0.4959053685168335, "grad_norm": 0.7147170789642256, "learning_rate": 9.759679914066686e-06, "loss": 0.0842, "step": 1090 }, { "epoch": 0.4963603275705187, "grad_norm": 0.7513123367978354, "learning_rate": 9.759241930443975e-06, "loss": 0.0749, "step": 1091 }, { "epoch": 0.4968152866242038, "grad_norm": 0.5462870672862663, "learning_rate": 9.75880355791751e-06, "loss": 0.0588, "step": 1092 }, { "epoch": 0.49727024567788897, "grad_norm": 0.6158644897786469, "learning_rate": 9.758364796523105e-06, "loss": 0.0578, "step": 1093 }, { "epoch": 0.49772520473157417, "grad_norm": 0.5248367448810554, "learning_rate": 9.757925646296617e-06, "loss": 0.0504, "step": 1094 }, { "epoch": 0.4981801637852593, "grad_norm": 0.7801307646100064, "learning_rate": 9.757486107273935e-06, "loss": 0.0819, "step": 1095 }, { "epoch": 0.4986351228389445, "grad_norm": 0.6822936325355138, "learning_rate": 9.75704617949097e-06, "loss": 0.0828, "step": 1096 }, { "epoch": 0.49909008189262966, "grad_norm": 0.49379397863131413, "learning_rate": 9.756605862983675e-06, "loss": 0.0606, "step": 1097 }, { "epoch": 0.49954504094631486, "grad_norm": 0.5236513133369656, "learning_rate": 9.756165157788029e-06, "loss": 0.0493, "step": 1098 }, { "epoch": 0.5, "grad_norm": 0.7323812225903658, "learning_rate": 9.755724063940047e-06, "loss": 0.0794, "step": 1099 }, { "epoch": 0.5004549590536852, "grad_norm": 0.853156508842135, "learning_rate": 9.755282581475769e-06, "loss": 0.08, "step": 1100 }, { "epoch": 0.5009099181073703, "grad_norm": 0.7117091061791435, "learning_rate": 9.754840710431274e-06, "loss": 0.0773, "step": 1101 }, { "epoch": 0.5013648771610555, "grad_norm": 0.9350752111669145, "learning_rate": 9.754398450842668e-06, "loss": 0.1046, "step": 1102 }, { "epoch": 0.5018198362147407, "grad_norm": 0.8834833642233855, "learning_rate": 9.753955802746091e-06, "loss": 0.1284, "step": 1103 }, { "epoch": 0.5022747952684259, "grad_norm": 0.9022387216275947, "learning_rate": 9.753512766177717e-06, "loss": 0.0898, "step": 1104 }, { "epoch": 0.502729754322111, "grad_norm": 0.551248880180483, "learning_rate": 9.753069341173745e-06, "loss": 0.0596, "step": 1105 }, { "epoch": 0.5031847133757962, "grad_norm": 0.5970423480352659, "learning_rate": 9.752625527770409e-06, "loss": 0.0723, "step": 1106 }, { "epoch": 0.5036396724294814, "grad_norm": 0.7620108531589319, "learning_rate": 9.75218132600398e-06, "loss": 0.0856, "step": 1107 }, { "epoch": 0.5040946314831665, "grad_norm": 0.7720887684681512, "learning_rate": 9.751736735910753e-06, "loss": 0.0904, "step": 1108 }, { "epoch": 0.5045495905368517, "grad_norm": 0.8672659681858957, "learning_rate": 9.75129175752706e-06, "loss": 0.1043, "step": 1109 }, { "epoch": 0.5050045495905369, "grad_norm": 0.7511079874116621, "learning_rate": 9.75084639088926e-06, "loss": 0.0719, "step": 1110 }, { "epoch": 0.5054595086442221, "grad_norm": 0.7442062138473109, "learning_rate": 9.750400636033746e-06, "loss": 0.0805, "step": 1111 }, { "epoch": 0.5059144676979072, "grad_norm": 0.716157443156474, "learning_rate": 9.749954492996947e-06, "loss": 0.0902, "step": 1112 }, { "epoch": 0.5063694267515924, "grad_norm": 0.7655895172099163, "learning_rate": 9.749507961815317e-06, "loss": 0.0973, "step": 1113 }, { "epoch": 0.5068243858052776, "grad_norm": 0.6288294239038802, "learning_rate": 9.749061042525343e-06, "loss": 0.0646, "step": 1114 }, { "epoch": 0.5072793448589626, "grad_norm": 0.6709452216437115, "learning_rate": 9.74861373516355e-06, "loss": 0.0717, "step": 1115 }, { "epoch": 0.5077343039126478, "grad_norm": 0.6522838269502338, "learning_rate": 9.748166039766484e-06, "loss": 0.0475, "step": 1116 }, { "epoch": 0.508189262966333, "grad_norm": 0.7999784990978867, "learning_rate": 9.747717956370735e-06, "loss": 0.0925, "step": 1117 }, { "epoch": 0.5086442220200182, "grad_norm": 1.0917998243863505, "learning_rate": 9.747269485012913e-06, "loss": 0.1293, "step": 1118 }, { "epoch": 0.5090991810737033, "grad_norm": 0.7636715530766439, "learning_rate": 9.746820625729667e-06, "loss": 0.0774, "step": 1119 }, { "epoch": 0.5095541401273885, "grad_norm": 0.6701230428761437, "learning_rate": 9.746371378557677e-06, "loss": 0.0623, "step": 1120 }, { "epoch": 0.5100090991810737, "grad_norm": 0.972334707766994, "learning_rate": 9.745921743533653e-06, "loss": 0.113, "step": 1121 }, { "epoch": 0.5104640582347588, "grad_norm": 0.6630727679984025, "learning_rate": 9.745471720694335e-06, "loss": 0.0828, "step": 1122 }, { "epoch": 0.510919017288444, "grad_norm": 0.8798279960192045, "learning_rate": 9.745021310076498e-06, "loss": 0.0772, "step": 1123 }, { "epoch": 0.5113739763421292, "grad_norm": 0.6337737332675445, "learning_rate": 9.744570511716952e-06, "loss": 0.0805, "step": 1124 }, { "epoch": 0.5118289353958144, "grad_norm": 0.9171053674032225, "learning_rate": 9.744119325652526e-06, "loss": 0.0901, "step": 1125 }, { "epoch": 0.5122838944494995, "grad_norm": 0.7437420002919692, "learning_rate": 9.743667751920093e-06, "loss": 0.0789, "step": 1126 }, { "epoch": 0.5127388535031847, "grad_norm": 0.692440215965907, "learning_rate": 9.743215790556556e-06, "loss": 0.0885, "step": 1127 }, { "epoch": 0.5131938125568699, "grad_norm": 0.5830998661595514, "learning_rate": 9.742763441598841e-06, "loss": 0.0571, "step": 1128 }, { "epoch": 0.513648771610555, "grad_norm": 0.7409283851806759, "learning_rate": 9.742310705083919e-06, "loss": 0.0819, "step": 1129 }, { "epoch": 0.5141037306642402, "grad_norm": 0.6329559817029019, "learning_rate": 9.74185758104878e-06, "loss": 0.0732, "step": 1130 }, { "epoch": 0.5145586897179254, "grad_norm": 0.47102788261692413, "learning_rate": 9.741404069530455e-06, "loss": 0.0496, "step": 1131 }, { "epoch": 0.5150136487716106, "grad_norm": 0.7193278988032876, "learning_rate": 9.740950170566002e-06, "loss": 0.0797, "step": 1132 }, { "epoch": 0.5154686078252957, "grad_norm": 0.7827454423152818, "learning_rate": 9.740495884192509e-06, "loss": 0.0863, "step": 1133 }, { "epoch": 0.5159235668789809, "grad_norm": 0.5187125000260286, "learning_rate": 9.740041210447101e-06, "loss": 0.048, "step": 1134 }, { "epoch": 0.5163785259326661, "grad_norm": 0.7621657915309645, "learning_rate": 9.739586149366932e-06, "loss": 0.076, "step": 1135 }, { "epoch": 0.5168334849863512, "grad_norm": 1.0691498364952807, "learning_rate": 9.739130700989185e-06, "loss": 0.1085, "step": 1136 }, { "epoch": 0.5172884440400364, "grad_norm": 1.126943089011516, "learning_rate": 9.738674865351081e-06, "loss": 0.1197, "step": 1137 }, { "epoch": 0.5177434030937216, "grad_norm": 0.5967935472543325, "learning_rate": 9.738218642489864e-06, "loss": 0.0715, "step": 1138 }, { "epoch": 0.5181983621474068, "grad_norm": 0.6520369417533736, "learning_rate": 9.73776203244282e-06, "loss": 0.0812, "step": 1139 }, { "epoch": 0.5186533212010919, "grad_norm": 0.6923655317783546, "learning_rate": 9.737305035247258e-06, "loss": 0.0607, "step": 1140 }, { "epoch": 0.5191082802547771, "grad_norm": 0.5971267035932937, "learning_rate": 9.73684765094052e-06, "loss": 0.0597, "step": 1141 }, { "epoch": 0.5195632393084623, "grad_norm": 0.6102979031011873, "learning_rate": 9.736389879559984e-06, "loss": 0.0464, "step": 1142 }, { "epoch": 0.5200181983621474, "grad_norm": 0.5971210330968472, "learning_rate": 9.735931721143058e-06, "loss": 0.0674, "step": 1143 }, { "epoch": 0.5204731574158326, "grad_norm": 0.9014574419537533, "learning_rate": 9.735473175727178e-06, "loss": 0.1071, "step": 1144 }, { "epoch": 0.5209281164695178, "grad_norm": 1.024240239778721, "learning_rate": 9.735014243349814e-06, "loss": 0.1058, "step": 1145 }, { "epoch": 0.521383075523203, "grad_norm": 0.740240244958144, "learning_rate": 9.73455492404847e-06, "loss": 0.0716, "step": 1146 }, { "epoch": 0.521838034576888, "grad_norm": 0.8552793125149327, "learning_rate": 9.734095217860679e-06, "loss": 0.1116, "step": 1147 }, { "epoch": 0.5222929936305732, "grad_norm": 0.8388846880500271, "learning_rate": 9.733635124824007e-06, "loss": 0.1195, "step": 1148 }, { "epoch": 0.5227479526842584, "grad_norm": 0.7476616795889469, "learning_rate": 9.733174644976047e-06, "loss": 0.0982, "step": 1149 }, { "epoch": 0.5232029117379435, "grad_norm": 1.247104578949049, "learning_rate": 9.732713778354431e-06, "loss": 0.1339, "step": 1150 }, { "epoch": 0.5236578707916287, "grad_norm": 0.8127429979477634, "learning_rate": 9.732252524996818e-06, "loss": 0.0994, "step": 1151 }, { "epoch": 0.5241128298453139, "grad_norm": 1.1678300434583342, "learning_rate": 9.731790884940899e-06, "loss": 0.1152, "step": 1152 }, { "epoch": 0.5245677888989991, "grad_norm": 0.5209287069427062, "learning_rate": 9.731328858224398e-06, "loss": 0.0546, "step": 1153 }, { "epoch": 0.5250227479526842, "grad_norm": 0.8363023252623251, "learning_rate": 9.730866444885069e-06, "loss": 0.0894, "step": 1154 }, { "epoch": 0.5254777070063694, "grad_norm": 0.8202924553152645, "learning_rate": 9.730403644960697e-06, "loss": 0.0914, "step": 1155 }, { "epoch": 0.5259326660600546, "grad_norm": 0.4900409376406188, "learning_rate": 9.729940458489105e-06, "loss": 0.0454, "step": 1156 }, { "epoch": 0.5263876251137397, "grad_norm": 0.5631225499534328, "learning_rate": 9.729476885508136e-06, "loss": 0.0542, "step": 1157 }, { "epoch": 0.5268425841674249, "grad_norm": 0.566596895824316, "learning_rate": 9.729012926055674e-06, "loss": 0.0625, "step": 1158 }, { "epoch": 0.5272975432211101, "grad_norm": 0.9035766920121469, "learning_rate": 9.728548580169632e-06, "loss": 0.1013, "step": 1159 }, { "epoch": 0.5277525022747953, "grad_norm": 0.8241016260766749, "learning_rate": 9.728083847887955e-06, "loss": 0.078, "step": 1160 }, { "epoch": 0.5282074613284804, "grad_norm": 0.7435557294319748, "learning_rate": 9.727618729248617e-06, "loss": 0.0864, "step": 1161 }, { "epoch": 0.5286624203821656, "grad_norm": 0.6611375262646607, "learning_rate": 9.727153224289627e-06, "loss": 0.0769, "step": 1162 }, { "epoch": 0.5291173794358508, "grad_norm": 0.8275931946782299, "learning_rate": 9.726687333049024e-06, "loss": 0.0889, "step": 1163 }, { "epoch": 0.5295723384895359, "grad_norm": 1.057751919756087, "learning_rate": 9.726221055564874e-06, "loss": 0.0851, "step": 1164 }, { "epoch": 0.5300272975432211, "grad_norm": 0.7884543920060787, "learning_rate": 9.725754391875287e-06, "loss": 0.0746, "step": 1165 }, { "epoch": 0.5304822565969063, "grad_norm": 0.8593529313000522, "learning_rate": 9.72528734201839e-06, "loss": 0.0828, "step": 1166 }, { "epoch": 0.5309372156505915, "grad_norm": 0.5225417485901063, "learning_rate": 9.72481990603235e-06, "loss": 0.0794, "step": 1167 }, { "epoch": 0.5313921747042766, "grad_norm": 0.8820660720540598, "learning_rate": 9.724352083955366e-06, "loss": 0.1059, "step": 1168 }, { "epoch": 0.5318471337579618, "grad_norm": 0.6775105748188827, "learning_rate": 9.723883875825664e-06, "loss": 0.079, "step": 1169 }, { "epoch": 0.532302092811647, "grad_norm": 0.5969175177573056, "learning_rate": 9.723415281681505e-06, "loss": 0.061, "step": 1170 }, { "epoch": 0.5327570518653321, "grad_norm": 0.7165111743049339, "learning_rate": 9.722946301561179e-06, "loss": 0.0824, "step": 1171 }, { "epoch": 0.5332120109190173, "grad_norm": 0.7771351455478163, "learning_rate": 9.722476935503011e-06, "loss": 0.0936, "step": 1172 }, { "epoch": 0.5336669699727025, "grad_norm": 0.5612071801020553, "learning_rate": 9.722007183545353e-06, "loss": 0.0584, "step": 1173 }, { "epoch": 0.5341219290263877, "grad_norm": 0.7630759308283642, "learning_rate": 9.721537045726594e-06, "loss": 0.0711, "step": 1174 }, { "epoch": 0.5345768880800728, "grad_norm": 0.7415951616336062, "learning_rate": 9.721066522085148e-06, "loss": 0.0786, "step": 1175 }, { "epoch": 0.535031847133758, "grad_norm": 0.6697058559185771, "learning_rate": 9.720595612659467e-06, "loss": 0.0943, "step": 1176 }, { "epoch": 0.5354868061874432, "grad_norm": 0.8294561042543531, "learning_rate": 9.720124317488031e-06, "loss": 0.0766, "step": 1177 }, { "epoch": 0.5359417652411284, "grad_norm": 0.8069252663248169, "learning_rate": 9.719652636609351e-06, "loss": 0.1036, "step": 1178 }, { "epoch": 0.5363967242948134, "grad_norm": 0.5216393236723873, "learning_rate": 9.719180570061973e-06, "loss": 0.0681, "step": 1179 }, { "epoch": 0.5368516833484986, "grad_norm": 0.7561882785891234, "learning_rate": 9.718708117884468e-06, "loss": 0.0888, "step": 1180 }, { "epoch": 0.5373066424021838, "grad_norm": 0.7101886443887773, "learning_rate": 9.718235280115446e-06, "loss": 0.0841, "step": 1181 }, { "epoch": 0.5377616014558689, "grad_norm": 0.93883085852681, "learning_rate": 9.717762056793545e-06, "loss": 0.1116, "step": 1182 }, { "epoch": 0.5382165605095541, "grad_norm": 0.8029318164759022, "learning_rate": 9.717288447957433e-06, "loss": 0.0817, "step": 1183 }, { "epoch": 0.5386715195632393, "grad_norm": 0.7189629467174897, "learning_rate": 9.716814453645811e-06, "loss": 0.0913, "step": 1184 }, { "epoch": 0.5391264786169245, "grad_norm": 0.6194922793353296, "learning_rate": 9.716340073897414e-06, "loss": 0.073, "step": 1185 }, { "epoch": 0.5395814376706096, "grad_norm": 0.5862599296496694, "learning_rate": 9.715865308751006e-06, "loss": 0.0599, "step": 1186 }, { "epoch": 0.5400363967242948, "grad_norm": 1.0638863826866105, "learning_rate": 9.715390158245381e-06, "loss": 0.1412, "step": 1187 }, { "epoch": 0.54049135577798, "grad_norm": 0.6031416289368001, "learning_rate": 9.714914622419367e-06, "loss": 0.0694, "step": 1188 }, { "epoch": 0.5409463148316651, "grad_norm": 0.5762096954254395, "learning_rate": 9.714438701311822e-06, "loss": 0.0627, "step": 1189 }, { "epoch": 0.5414012738853503, "grad_norm": 0.6077021479661606, "learning_rate": 9.713962394961636e-06, "loss": 0.067, "step": 1190 }, { "epoch": 0.5418562329390355, "grad_norm": 0.5381873559759192, "learning_rate": 9.713485703407732e-06, "loss": 0.0595, "step": 1191 }, { "epoch": 0.5423111919927207, "grad_norm": 0.7866618609648011, "learning_rate": 9.713008626689063e-06, "loss": 0.1064, "step": 1192 }, { "epoch": 0.5427661510464058, "grad_norm": 0.7100862231154079, "learning_rate": 9.712531164844611e-06, "loss": 0.07, "step": 1193 }, { "epoch": 0.543221110100091, "grad_norm": 0.5579932774059501, "learning_rate": 9.712053317913394e-06, "loss": 0.0525, "step": 1194 }, { "epoch": 0.5436760691537762, "grad_norm": 0.5454543895601387, "learning_rate": 9.711575085934459e-06, "loss": 0.0741, "step": 1195 }, { "epoch": 0.5441310282074613, "grad_norm": 0.6754854519258514, "learning_rate": 9.711096468946888e-06, "loss": 0.101, "step": 1196 }, { "epoch": 0.5445859872611465, "grad_norm": 0.8125002765504534, "learning_rate": 9.710617466989787e-06, "loss": 0.0937, "step": 1197 }, { "epoch": 0.5450409463148317, "grad_norm": 0.5893498973936582, "learning_rate": 9.710138080102298e-06, "loss": 0.0658, "step": 1198 }, { "epoch": 0.5454959053685169, "grad_norm": 0.8107633297228217, "learning_rate": 9.709658308323597e-06, "loss": 0.0955, "step": 1199 }, { "epoch": 0.545950864422202, "grad_norm": 0.6726060122769176, "learning_rate": 9.70917815169289e-06, "loss": 0.084, "step": 1200 }, { "epoch": 0.5464058234758872, "grad_norm": 0.6077011277694447, "learning_rate": 9.708697610249407e-06, "loss": 0.0756, "step": 1201 }, { "epoch": 0.5468607825295724, "grad_norm": 0.7073007110523803, "learning_rate": 9.70821668403242e-06, "loss": 0.0818, "step": 1202 }, { "epoch": 0.5473157415832575, "grad_norm": 0.9420816064988972, "learning_rate": 9.707735373081231e-06, "loss": 0.1197, "step": 1203 }, { "epoch": 0.5477707006369427, "grad_norm": 0.552138579735494, "learning_rate": 9.707253677435165e-06, "loss": 0.0594, "step": 1204 }, { "epoch": 0.5482256596906279, "grad_norm": 0.6375758502862188, "learning_rate": 9.706771597133587e-06, "loss": 0.0572, "step": 1205 }, { "epoch": 0.5486806187443131, "grad_norm": 0.6581691945271008, "learning_rate": 9.706289132215889e-06, "loss": 0.0707, "step": 1206 }, { "epoch": 0.5491355777979982, "grad_norm": 0.820106985355047, "learning_rate": 9.705806282721498e-06, "loss": 0.0865, "step": 1207 }, { "epoch": 0.5495905368516834, "grad_norm": 0.5258555939105785, "learning_rate": 9.705323048689866e-06, "loss": 0.0462, "step": 1208 }, { "epoch": 0.5500454959053686, "grad_norm": 0.7818892498713288, "learning_rate": 9.704839430160487e-06, "loss": 0.1005, "step": 1209 }, { "epoch": 0.5505004549590536, "grad_norm": 0.6371281646305975, "learning_rate": 9.704355427172874e-06, "loss": 0.0712, "step": 1210 }, { "epoch": 0.5509554140127388, "grad_norm": 0.5981165031558572, "learning_rate": 9.70387103976658e-06, "loss": 0.0669, "step": 1211 }, { "epoch": 0.551410373066424, "grad_norm": 0.640233382171881, "learning_rate": 9.703386267981188e-06, "loss": 0.0629, "step": 1212 }, { "epoch": 0.5518653321201092, "grad_norm": 0.5436666812285462, "learning_rate": 9.70290111185631e-06, "loss": 0.0527, "step": 1213 }, { "epoch": 0.5523202911737943, "grad_norm": 0.9264418893677014, "learning_rate": 9.702415571431594e-06, "loss": 0.1392, "step": 1214 }, { "epoch": 0.5527752502274795, "grad_norm": 0.6659444469982292, "learning_rate": 9.70192964674671e-06, "loss": 0.0948, "step": 1215 }, { "epoch": 0.5532302092811647, "grad_norm": 0.5526163080676849, "learning_rate": 9.70144333784137e-06, "loss": 0.0661, "step": 1216 }, { "epoch": 0.5536851683348498, "grad_norm": 0.7994476768514381, "learning_rate": 9.700956644755313e-06, "loss": 0.0966, "step": 1217 }, { "epoch": 0.554140127388535, "grad_norm": 0.7919884013199107, "learning_rate": 9.700469567528307e-06, "loss": 0.1082, "step": 1218 }, { "epoch": 0.5545950864422202, "grad_norm": 0.7366932972024113, "learning_rate": 9.699982106200155e-06, "loss": 0.0841, "step": 1219 }, { "epoch": 0.5550500454959054, "grad_norm": 0.8558659635343526, "learning_rate": 9.699494260810692e-06, "loss": 0.0866, "step": 1220 }, { "epoch": 0.5555050045495905, "grad_norm": 0.8060928626360002, "learning_rate": 9.699006031399779e-06, "loss": 0.0777, "step": 1221 }, { "epoch": 0.5559599636032757, "grad_norm": 0.6914626835020681, "learning_rate": 9.698517418007314e-06, "loss": 0.0775, "step": 1222 }, { "epoch": 0.5564149226569609, "grad_norm": 0.8706739684427142, "learning_rate": 9.698028420673224e-06, "loss": 0.0984, "step": 1223 }, { "epoch": 0.556869881710646, "grad_norm": 0.7863016327992207, "learning_rate": 9.697539039437468e-06, "loss": 0.1118, "step": 1224 }, { "epoch": 0.5573248407643312, "grad_norm": 0.7719453440565228, "learning_rate": 9.697049274340036e-06, "loss": 0.0824, "step": 1225 }, { "epoch": 0.5577797998180164, "grad_norm": 1.1509899845731206, "learning_rate": 9.696559125420949e-06, "loss": 0.1254, "step": 1226 }, { "epoch": 0.5582347588717016, "grad_norm": 0.5202193771917482, "learning_rate": 9.696068592720257e-06, "loss": 0.0538, "step": 1227 }, { "epoch": 0.5586897179253867, "grad_norm": 0.5880633286090164, "learning_rate": 9.69557767627805e-06, "loss": 0.0711, "step": 1228 }, { "epoch": 0.5591446769790719, "grad_norm": 0.6342846572654288, "learning_rate": 9.695086376134438e-06, "loss": 0.0671, "step": 1229 }, { "epoch": 0.5595996360327571, "grad_norm": 0.7541651906429654, "learning_rate": 9.694594692329571e-06, "loss": 0.0813, "step": 1230 }, { "epoch": 0.5600545950864422, "grad_norm": 0.6416731945433944, "learning_rate": 9.694102624903627e-06, "loss": 0.0733, "step": 1231 }, { "epoch": 0.5605095541401274, "grad_norm": 1.0012992796464886, "learning_rate": 9.693610173896815e-06, "loss": 0.096, "step": 1232 }, { "epoch": 0.5609645131938126, "grad_norm": 0.725396699259508, "learning_rate": 9.693117339349376e-06, "loss": 0.0665, "step": 1233 }, { "epoch": 0.5614194722474978, "grad_norm": 0.7481457641805567, "learning_rate": 9.692624121301581e-06, "loss": 0.0715, "step": 1234 }, { "epoch": 0.5618744313011829, "grad_norm": 0.969766282604155, "learning_rate": 9.692130519793734e-06, "loss": 0.0991, "step": 1235 }, { "epoch": 0.5623293903548681, "grad_norm": 0.8522169509206354, "learning_rate": 9.691636534866172e-06, "loss": 0.1025, "step": 1236 }, { "epoch": 0.5627843494085533, "grad_norm": 0.7682304561659135, "learning_rate": 9.691142166559259e-06, "loss": 0.0846, "step": 1237 }, { "epoch": 0.5632393084622384, "grad_norm": 0.5495617218791536, "learning_rate": 9.690647414913392e-06, "loss": 0.0766, "step": 1238 }, { "epoch": 0.5636942675159236, "grad_norm": 0.6826816911759014, "learning_rate": 9.690152279969003e-06, "loss": 0.0729, "step": 1239 }, { "epoch": 0.5641492265696088, "grad_norm": 0.8352406959674302, "learning_rate": 9.689656761766548e-06, "loss": 0.0896, "step": 1240 }, { "epoch": 0.564604185623294, "grad_norm": 0.5908696548320724, "learning_rate": 9.689160860346522e-06, "loss": 0.0753, "step": 1241 }, { "epoch": 0.565059144676979, "grad_norm": 0.4283914528398344, "learning_rate": 9.688664575749447e-06, "loss": 0.0414, "step": 1242 }, { "epoch": 0.5655141037306642, "grad_norm": 0.6584468440229382, "learning_rate": 9.688167908015877e-06, "loss": 0.0733, "step": 1243 }, { "epoch": 0.5659690627843494, "grad_norm": 0.9211218848648471, "learning_rate": 9.687670857186396e-06, "loss": 0.1171, "step": 1244 }, { "epoch": 0.5664240218380345, "grad_norm": 0.9250852893692096, "learning_rate": 9.68717342330162e-06, "loss": 0.1061, "step": 1245 }, { "epoch": 0.5668789808917197, "grad_norm": 0.8688266055790496, "learning_rate": 9.686675606402203e-06, "loss": 0.1213, "step": 1246 }, { "epoch": 0.5673339399454049, "grad_norm": 0.7110325678190088, "learning_rate": 9.686177406528819e-06, "loss": 0.0836, "step": 1247 }, { "epoch": 0.5677888989990901, "grad_norm": 0.8260984800022192, "learning_rate": 9.685678823722178e-06, "loss": 0.0907, "step": 1248 }, { "epoch": 0.5682438580527752, "grad_norm": 0.6625042460625208, "learning_rate": 9.685179858023026e-06, "loss": 0.0777, "step": 1249 }, { "epoch": 0.5686988171064604, "grad_norm": 0.711324638729454, "learning_rate": 9.684680509472133e-06, "loss": 0.0815, "step": 1250 }, { "epoch": 0.5691537761601456, "grad_norm": 0.6863010294874783, "learning_rate": 9.684180778110306e-06, "loss": 0.0642, "step": 1251 }, { "epoch": 0.5696087352138307, "grad_norm": 0.5978880624303593, "learning_rate": 9.683680663978377e-06, "loss": 0.065, "step": 1252 }, { "epoch": 0.5700636942675159, "grad_norm": 0.6322068932784428, "learning_rate": 9.683180167117216e-06, "loss": 0.0681, "step": 1253 }, { "epoch": 0.5705186533212011, "grad_norm": 0.7826720403434554, "learning_rate": 9.682679287567722e-06, "loss": 0.0881, "step": 1254 }, { "epoch": 0.5709736123748863, "grad_norm": 0.794807695787425, "learning_rate": 9.682178025370824e-06, "loss": 0.1118, "step": 1255 }, { "epoch": 0.5714285714285714, "grad_norm": 0.7050268620804678, "learning_rate": 9.681676380567482e-06, "loss": 0.0839, "step": 1256 }, { "epoch": 0.5718835304822566, "grad_norm": 0.5581694578677082, "learning_rate": 9.681174353198687e-06, "loss": 0.0482, "step": 1257 }, { "epoch": 0.5723384895359418, "grad_norm": 0.6766600070725707, "learning_rate": 9.680671943305465e-06, "loss": 0.0679, "step": 1258 }, { "epoch": 0.5727934485896269, "grad_norm": 0.6995276308642288, "learning_rate": 9.680169150928868e-06, "loss": 0.0823, "step": 1259 }, { "epoch": 0.5732484076433121, "grad_norm": 0.6008334474427011, "learning_rate": 9.679665976109985e-06, "loss": 0.0669, "step": 1260 }, { "epoch": 0.5737033666969973, "grad_norm": 0.6951316344905618, "learning_rate": 9.679162418889932e-06, "loss": 0.0644, "step": 1261 }, { "epoch": 0.5741583257506825, "grad_norm": 0.7661270676130627, "learning_rate": 9.678658479309854e-06, "loss": 0.0837, "step": 1262 }, { "epoch": 0.5746132848043676, "grad_norm": 0.7593531327031607, "learning_rate": 9.678154157410937e-06, "loss": 0.0646, "step": 1263 }, { "epoch": 0.5750682438580528, "grad_norm": 0.7824619403016152, "learning_rate": 9.677649453234388e-06, "loss": 0.0907, "step": 1264 }, { "epoch": 0.575523202911738, "grad_norm": 0.8187746029529864, "learning_rate": 9.67714436682145e-06, "loss": 0.0906, "step": 1265 }, { "epoch": 0.5759781619654231, "grad_norm": 0.7676559233650921, "learning_rate": 9.676638898213394e-06, "loss": 0.0839, "step": 1266 }, { "epoch": 0.5764331210191083, "grad_norm": 0.5944493207466681, "learning_rate": 9.676133047451528e-06, "loss": 0.0588, "step": 1267 }, { "epoch": 0.5768880800727935, "grad_norm": 0.6734586229257056, "learning_rate": 9.675626814577188e-06, "loss": 0.0804, "step": 1268 }, { "epoch": 0.5773430391264787, "grad_norm": 0.6315388478681175, "learning_rate": 9.675120199631738e-06, "loss": 0.0636, "step": 1269 }, { "epoch": 0.5777979981801638, "grad_norm": 0.7252277920198784, "learning_rate": 9.674613202656577e-06, "loss": 0.0842, "step": 1270 }, { "epoch": 0.578252957233849, "grad_norm": 0.58556718084403, "learning_rate": 9.674105823693139e-06, "loss": 0.0764, "step": 1271 }, { "epoch": 0.5787079162875342, "grad_norm": 0.7635901125586164, "learning_rate": 9.673598062782878e-06, "loss": 0.0907, "step": 1272 }, { "epoch": 0.5791628753412192, "grad_norm": 0.33852379656119563, "learning_rate": 9.67308991996729e-06, "loss": 0.0387, "step": 1273 }, { "epoch": 0.5796178343949044, "grad_norm": 0.8984557509320932, "learning_rate": 9.672581395287897e-06, "loss": 0.0969, "step": 1274 }, { "epoch": 0.5800727934485896, "grad_norm": 0.881696210059407, "learning_rate": 9.672072488786254e-06, "loss": 0.115, "step": 1275 }, { "epoch": 0.5805277525022748, "grad_norm": 0.805394208652388, "learning_rate": 9.671563200503947e-06, "loss": 0.0916, "step": 1276 }, { "epoch": 0.5809827115559599, "grad_norm": 0.5947193670178038, "learning_rate": 9.67105353048259e-06, "loss": 0.0645, "step": 1277 }, { "epoch": 0.5814376706096451, "grad_norm": 0.9345719582841384, "learning_rate": 9.670543478763834e-06, "loss": 0.0853, "step": 1278 }, { "epoch": 0.5818926296633303, "grad_norm": 0.46822310121822047, "learning_rate": 9.670033045389356e-06, "loss": 0.06, "step": 1279 }, { "epoch": 0.5823475887170154, "grad_norm": 0.882335352298928, "learning_rate": 9.669522230400868e-06, "loss": 0.1288, "step": 1280 }, { "epoch": 0.5828025477707006, "grad_norm": 0.7155876804587362, "learning_rate": 9.66901103384011e-06, "loss": 0.0923, "step": 1281 }, { "epoch": 0.5832575068243858, "grad_norm": 0.758339057709363, "learning_rate": 9.668499455748857e-06, "loss": 0.0866, "step": 1282 }, { "epoch": 0.583712465878071, "grad_norm": 0.5929990208040478, "learning_rate": 9.66798749616891e-06, "loss": 0.0571, "step": 1283 }, { "epoch": 0.5841674249317561, "grad_norm": 0.5486564328594907, "learning_rate": 9.667475155142104e-06, "loss": 0.0551, "step": 1284 }, { "epoch": 0.5846223839854413, "grad_norm": 0.6958253493282612, "learning_rate": 9.666962432710307e-06, "loss": 0.0731, "step": 1285 }, { "epoch": 0.5850773430391265, "grad_norm": 1.1984701204529857, "learning_rate": 9.666449328915418e-06, "loss": 0.1248, "step": 1286 }, { "epoch": 0.5855323020928116, "grad_norm": 1.07466414021835, "learning_rate": 9.66593584379936e-06, "loss": 0.0969, "step": 1287 }, { "epoch": 0.5859872611464968, "grad_norm": 0.7365065558485686, "learning_rate": 9.6654219774041e-06, "loss": 0.0768, "step": 1288 }, { "epoch": 0.586442220200182, "grad_norm": 0.7278778525375763, "learning_rate": 9.664907729771622e-06, "loss": 0.0931, "step": 1289 }, { "epoch": 0.5868971792538672, "grad_norm": 0.6940342908894654, "learning_rate": 9.664393100943951e-06, "loss": 0.0716, "step": 1290 }, { "epoch": 0.5873521383075523, "grad_norm": 0.7046475563496115, "learning_rate": 9.663878090963142e-06, "loss": 0.0833, "step": 1291 }, { "epoch": 0.5878070973612375, "grad_norm": 0.6554863862272154, "learning_rate": 9.663362699871275e-06, "loss": 0.0705, "step": 1292 }, { "epoch": 0.5882620564149227, "grad_norm": 0.610296786595235, "learning_rate": 9.66284692771047e-06, "loss": 0.0592, "step": 1293 }, { "epoch": 0.5887170154686078, "grad_norm": 0.6866815075031769, "learning_rate": 9.662330774522869e-06, "loss": 0.0748, "step": 1294 }, { "epoch": 0.589171974522293, "grad_norm": 0.5654106713312388, "learning_rate": 9.661814240350653e-06, "loss": 0.0546, "step": 1295 }, { "epoch": 0.5896269335759782, "grad_norm": 1.271034489401823, "learning_rate": 9.66129732523603e-06, "loss": 0.1473, "step": 1296 }, { "epoch": 0.5900818926296634, "grad_norm": 0.45734781465896296, "learning_rate": 9.66078002922124e-06, "loss": 0.0452, "step": 1297 }, { "epoch": 0.5905368516833485, "grad_norm": 0.8001910391102482, "learning_rate": 9.660262352348553e-06, "loss": 0.0801, "step": 1298 }, { "epoch": 0.5909918107370337, "grad_norm": 0.8095822615697389, "learning_rate": 9.659744294660272e-06, "loss": 0.0851, "step": 1299 }, { "epoch": 0.5914467697907189, "grad_norm": 0.6222175915293906, "learning_rate": 9.659225856198732e-06, "loss": 0.0725, "step": 1300 }, { "epoch": 0.591901728844404, "grad_norm": 0.5098172411498206, "learning_rate": 9.658707037006294e-06, "loss": 0.0586, "step": 1301 }, { "epoch": 0.5923566878980892, "grad_norm": 0.5056342525545805, "learning_rate": 9.658187837125357e-06, "loss": 0.0552, "step": 1302 }, { "epoch": 0.5928116469517744, "grad_norm": 0.8298114087640572, "learning_rate": 9.657668256598347e-06, "loss": 0.0976, "step": 1303 }, { "epoch": 0.5932666060054596, "grad_norm": 0.9354418819253106, "learning_rate": 9.657148295467719e-06, "loss": 0.1128, "step": 1304 }, { "epoch": 0.5937215650591446, "grad_norm": 0.732222390896743, "learning_rate": 9.656627953775964e-06, "loss": 0.0719, "step": 1305 }, { "epoch": 0.5941765241128298, "grad_norm": 0.817074061431315, "learning_rate": 9.6561072315656e-06, "loss": 0.097, "step": 1306 }, { "epoch": 0.594631483166515, "grad_norm": 0.6993010225350191, "learning_rate": 9.655586128879185e-06, "loss": 0.0866, "step": 1307 }, { "epoch": 0.5950864422202001, "grad_norm": 0.6036033167422408, "learning_rate": 9.655064645759291e-06, "loss": 0.0615, "step": 1308 }, { "epoch": 0.5955414012738853, "grad_norm": 0.4333029170805267, "learning_rate": 9.654542782248539e-06, "loss": 0.0333, "step": 1309 }, { "epoch": 0.5959963603275705, "grad_norm": 0.5158856954901245, "learning_rate": 9.65402053838957e-06, "loss": 0.0534, "step": 1310 }, { "epoch": 0.5964513193812557, "grad_norm": 0.8439407413306237, "learning_rate": 9.653497914225059e-06, "loss": 0.0886, "step": 1311 }, { "epoch": 0.5969062784349408, "grad_norm": 1.097335021441692, "learning_rate": 9.652974909797714e-06, "loss": 0.1184, "step": 1312 }, { "epoch": 0.597361237488626, "grad_norm": 0.6552117042192046, "learning_rate": 9.652451525150272e-06, "loss": 0.0719, "step": 1313 }, { "epoch": 0.5978161965423112, "grad_norm": 0.6353863518066384, "learning_rate": 9.651927760325504e-06, "loss": 0.0696, "step": 1314 }, { "epoch": 0.5982711555959963, "grad_norm": 0.9048456403488727, "learning_rate": 9.651403615366204e-06, "loss": 0.0859, "step": 1315 }, { "epoch": 0.5987261146496815, "grad_norm": 0.7176841695337582, "learning_rate": 9.650879090315207e-06, "loss": 0.0821, "step": 1316 }, { "epoch": 0.5991810737033667, "grad_norm": 0.696539124420045, "learning_rate": 9.650354185215374e-06, "loss": 0.0875, "step": 1317 }, { "epoch": 0.5996360327570519, "grad_norm": 0.5924500205612657, "learning_rate": 9.649828900109599e-06, "loss": 0.0646, "step": 1318 }, { "epoch": 0.600090991810737, "grad_norm": 0.5430407542910594, "learning_rate": 9.649303235040803e-06, "loss": 0.0486, "step": 1319 }, { "epoch": 0.6005459508644222, "grad_norm": 0.6459813862779727, "learning_rate": 9.648777190051944e-06, "loss": 0.0903, "step": 1320 }, { "epoch": 0.6010009099181074, "grad_norm": 0.6531397749427512, "learning_rate": 9.648250765186006e-06, "loss": 0.0638, "step": 1321 }, { "epoch": 0.6014558689717925, "grad_norm": 0.6616813941465042, "learning_rate": 9.647723960486006e-06, "loss": 0.0861, "step": 1322 }, { "epoch": 0.6019108280254777, "grad_norm": 0.8426003399558685, "learning_rate": 9.647196775994995e-06, "loss": 0.0928, "step": 1323 }, { "epoch": 0.6023657870791629, "grad_norm": 0.6908471872127779, "learning_rate": 9.646669211756049e-06, "loss": 0.064, "step": 1324 }, { "epoch": 0.6028207461328481, "grad_norm": 0.6969433310817453, "learning_rate": 9.64614126781228e-06, "loss": 0.0683, "step": 1325 }, { "epoch": 0.6032757051865332, "grad_norm": 0.7506047981065134, "learning_rate": 9.645612944206826e-06, "loss": 0.0849, "step": 1326 }, { "epoch": 0.6037306642402184, "grad_norm": 0.5624997977779479, "learning_rate": 9.645084240982862e-06, "loss": 0.064, "step": 1327 }, { "epoch": 0.6041856232939036, "grad_norm": 0.43671100502349636, "learning_rate": 9.644555158183592e-06, "loss": 0.0615, "step": 1328 }, { "epoch": 0.6046405823475887, "grad_norm": 0.553762280713577, "learning_rate": 9.64402569585225e-06, "loss": 0.0596, "step": 1329 }, { "epoch": 0.6050955414012739, "grad_norm": 0.6580653378362663, "learning_rate": 9.643495854032099e-06, "loss": 0.0558, "step": 1330 }, { "epoch": 0.6055505004549591, "grad_norm": 0.7656128172437318, "learning_rate": 9.642965632766437e-06, "loss": 0.0915, "step": 1331 }, { "epoch": 0.6060054595086443, "grad_norm": 0.49008300515141723, "learning_rate": 9.642435032098591e-06, "loss": 0.0553, "step": 1332 }, { "epoch": 0.6064604185623294, "grad_norm": 0.6058179105933948, "learning_rate": 9.64190405207192e-06, "loss": 0.0709, "step": 1333 }, { "epoch": 0.6069153776160146, "grad_norm": 0.6707142568108124, "learning_rate": 9.641372692729811e-06, "loss": 0.0715, "step": 1334 }, { "epoch": 0.6073703366696998, "grad_norm": 0.8710319334113071, "learning_rate": 9.640840954115686e-06, "loss": 0.091, "step": 1335 }, { "epoch": 0.607825295723385, "grad_norm": 0.7496993600003082, "learning_rate": 9.640308836272996e-06, "loss": 0.0932, "step": 1336 }, { "epoch": 0.60828025477707, "grad_norm": 0.9684583450547241, "learning_rate": 9.639776339245225e-06, "loss": 0.087, "step": 1337 }, { "epoch": 0.6087352138307552, "grad_norm": 0.7857186962980957, "learning_rate": 9.639243463075884e-06, "loss": 0.1084, "step": 1338 }, { "epoch": 0.6091901728844404, "grad_norm": 1.1677743182021476, "learning_rate": 9.638710207808518e-06, "loss": 0.0712, "step": 1339 }, { "epoch": 0.6096451319381255, "grad_norm": 0.725604064535932, "learning_rate": 9.6381765734867e-06, "loss": 0.077, "step": 1340 }, { "epoch": 0.6101000909918107, "grad_norm": 0.5923782964843433, "learning_rate": 9.63764256015404e-06, "loss": 0.0641, "step": 1341 }, { "epoch": 0.6105550500454959, "grad_norm": 0.7069177546563966, "learning_rate": 9.637108167854173e-06, "loss": 0.0747, "step": 1342 }, { "epoch": 0.6110100090991811, "grad_norm": 0.780384533965345, "learning_rate": 9.636573396630767e-06, "loss": 0.0709, "step": 1343 }, { "epoch": 0.6114649681528662, "grad_norm": 0.7305821703239879, "learning_rate": 9.636038246527523e-06, "loss": 0.0955, "step": 1344 }, { "epoch": 0.6119199272065514, "grad_norm": 0.6274215993935015, "learning_rate": 9.635502717588168e-06, "loss": 0.0656, "step": 1345 }, { "epoch": 0.6123748862602366, "grad_norm": 0.6018866737558257, "learning_rate": 9.634966809856465e-06, "loss": 0.0729, "step": 1346 }, { "epoch": 0.6128298453139217, "grad_norm": 0.9406786913650838, "learning_rate": 9.634430523376207e-06, "loss": 0.1105, "step": 1347 }, { "epoch": 0.6132848043676069, "grad_norm": 0.6910930219074588, "learning_rate": 9.633893858191214e-06, "loss": 0.0652, "step": 1348 }, { "epoch": 0.6137397634212921, "grad_norm": 0.6641071332456526, "learning_rate": 9.633356814345342e-06, "loss": 0.0896, "step": 1349 }, { "epoch": 0.6141947224749773, "grad_norm": 0.6463461735454817, "learning_rate": 9.632819391882475e-06, "loss": 0.0691, "step": 1350 }, { "epoch": 0.6146496815286624, "grad_norm": 0.6570738741447356, "learning_rate": 9.63228159084653e-06, "loss": 0.0726, "step": 1351 }, { "epoch": 0.6151046405823476, "grad_norm": 0.9251372605740943, "learning_rate": 9.631743411281451e-06, "loss": 0.1089, "step": 1352 }, { "epoch": 0.6155595996360328, "grad_norm": 1.0354136522724409, "learning_rate": 9.631204853231219e-06, "loss": 0.1065, "step": 1353 }, { "epoch": 0.6160145586897179, "grad_norm": 0.7577345531084587, "learning_rate": 9.630665916739839e-06, "loss": 0.083, "step": 1354 }, { "epoch": 0.6164695177434031, "grad_norm": 0.6775679844485006, "learning_rate": 9.630126601851353e-06, "loss": 0.065, "step": 1355 }, { "epoch": 0.6169244767970883, "grad_norm": 0.6510409015870585, "learning_rate": 9.62958690860983e-06, "loss": 0.0842, "step": 1356 }, { "epoch": 0.6173794358507735, "grad_norm": 0.6541401291987898, "learning_rate": 9.629046837059373e-06, "loss": 0.0809, "step": 1357 }, { "epoch": 0.6178343949044586, "grad_norm": 0.6773644747284383, "learning_rate": 9.628506387244111e-06, "loss": 0.08, "step": 1358 }, { "epoch": 0.6182893539581438, "grad_norm": 0.7401243921784199, "learning_rate": 9.627965559208212e-06, "loss": 0.0632, "step": 1359 }, { "epoch": 0.618744313011829, "grad_norm": 0.6255731586329286, "learning_rate": 9.627424352995866e-06, "loss": 0.0836, "step": 1360 }, { "epoch": 0.6191992720655141, "grad_norm": 0.8684189032240879, "learning_rate": 9.626882768651298e-06, "loss": 0.0918, "step": 1361 }, { "epoch": 0.6196542311191993, "grad_norm": 0.5565014005760545, "learning_rate": 9.626340806218765e-06, "loss": 0.0508, "step": 1362 }, { "epoch": 0.6201091901728845, "grad_norm": 0.580066419485805, "learning_rate": 9.625798465742555e-06, "loss": 0.0691, "step": 1363 }, { "epoch": 0.6205641492265697, "grad_norm": 0.5980127746625918, "learning_rate": 9.625255747266984e-06, "loss": 0.0674, "step": 1364 }, { "epoch": 0.6210191082802548, "grad_norm": 0.8518146992949526, "learning_rate": 9.6247126508364e-06, "loss": 0.1112, "step": 1365 }, { "epoch": 0.62147406733394, "grad_norm": 0.8485700961520207, "learning_rate": 9.624169176495185e-06, "loss": 0.0966, "step": 1366 }, { "epoch": 0.6219290263876252, "grad_norm": 0.9962639418238284, "learning_rate": 9.623625324287747e-06, "loss": 0.1047, "step": 1367 }, { "epoch": 0.6223839854413102, "grad_norm": 0.7706385402975253, "learning_rate": 9.623081094258527e-06, "loss": 0.1229, "step": 1368 }, { "epoch": 0.6228389444949954, "grad_norm": 0.9185957443221413, "learning_rate": 9.622536486451997e-06, "loss": 0.0981, "step": 1369 }, { "epoch": 0.6232939035486806, "grad_norm": 0.5737112203779396, "learning_rate": 9.621991500912662e-06, "loss": 0.0615, "step": 1370 }, { "epoch": 0.6237488626023658, "grad_norm": 0.8225187377418599, "learning_rate": 9.621446137685051e-06, "loss": 0.1032, "step": 1371 }, { "epoch": 0.6242038216560509, "grad_norm": 0.911993563924521, "learning_rate": 9.620900396813734e-06, "loss": 0.1052, "step": 1372 }, { "epoch": 0.6246587807097361, "grad_norm": 1.1969877300226637, "learning_rate": 9.620354278343306e-06, "loss": 0.1323, "step": 1373 }, { "epoch": 0.6251137397634213, "grad_norm": 0.49674299728731663, "learning_rate": 9.61980778231839e-06, "loss": 0.0469, "step": 1374 }, { "epoch": 0.6255686988171064, "grad_norm": 0.9419790098064809, "learning_rate": 9.619260908783645e-06, "loss": 0.0829, "step": 1375 }, { "epoch": 0.6260236578707916, "grad_norm": 0.8648992102518269, "learning_rate": 9.61871365778376e-06, "loss": 0.1227, "step": 1376 }, { "epoch": 0.6264786169244768, "grad_norm": 0.6855921150752273, "learning_rate": 9.618166029363452e-06, "loss": 0.0893, "step": 1377 }, { "epoch": 0.626933575978162, "grad_norm": 0.7460350385490577, "learning_rate": 9.61761802356747e-06, "loss": 0.1029, "step": 1378 }, { "epoch": 0.6273885350318471, "grad_norm": 0.6238948896650269, "learning_rate": 9.617069640440598e-06, "loss": 0.0671, "step": 1379 }, { "epoch": 0.6278434940855323, "grad_norm": 0.8484782740935036, "learning_rate": 9.616520880027645e-06, "loss": 0.1094, "step": 1380 }, { "epoch": 0.6282984531392175, "grad_norm": 0.4929008515621752, "learning_rate": 9.615971742373453e-06, "loss": 0.0621, "step": 1381 }, { "epoch": 0.6287534121929026, "grad_norm": 0.8230508842215047, "learning_rate": 9.615422227522897e-06, "loss": 0.0873, "step": 1382 }, { "epoch": 0.6292083712465878, "grad_norm": 0.8269677617343545, "learning_rate": 9.614872335520879e-06, "loss": 0.0996, "step": 1383 }, { "epoch": 0.629663330300273, "grad_norm": 0.7039938726965704, "learning_rate": 9.614322066412335e-06, "loss": 0.084, "step": 1384 }, { "epoch": 0.6301182893539582, "grad_norm": 0.7376546247757936, "learning_rate": 9.613771420242229e-06, "loss": 0.0857, "step": 1385 }, { "epoch": 0.6305732484076433, "grad_norm": 0.6736142636267153, "learning_rate": 9.613220397055558e-06, "loss": 0.0732, "step": 1386 }, { "epoch": 0.6310282074613285, "grad_norm": 0.7476942520500481, "learning_rate": 9.612668996897351e-06, "loss": 0.0713, "step": 1387 }, { "epoch": 0.6314831665150137, "grad_norm": 0.7359465201312233, "learning_rate": 9.612117219812662e-06, "loss": 0.0847, "step": 1388 }, { "epoch": 0.6319381255686988, "grad_norm": 0.9663363466846744, "learning_rate": 9.611565065846583e-06, "loss": 0.1015, "step": 1389 }, { "epoch": 0.632393084622384, "grad_norm": 0.7893446645403931, "learning_rate": 9.611012535044232e-06, "loss": 0.0983, "step": 1390 }, { "epoch": 0.6328480436760692, "grad_norm": 1.024989133088754, "learning_rate": 9.61045962745076e-06, "loss": 0.1102, "step": 1391 }, { "epoch": 0.6333030027297544, "grad_norm": 0.4979683651622851, "learning_rate": 9.609906343111348e-06, "loss": 0.0586, "step": 1392 }, { "epoch": 0.6337579617834395, "grad_norm": 1.1009002383858189, "learning_rate": 9.609352682071209e-06, "loss": 0.0963, "step": 1393 }, { "epoch": 0.6342129208371247, "grad_norm": 1.0522149389130615, "learning_rate": 9.608798644375583e-06, "loss": 0.1189, "step": 1394 }, { "epoch": 0.6346678798908099, "grad_norm": 0.9812979427333788, "learning_rate": 9.608244230069745e-06, "loss": 0.1216, "step": 1395 }, { "epoch": 0.635122838944495, "grad_norm": 0.7352050689297358, "learning_rate": 9.607689439199e-06, "loss": 0.0875, "step": 1396 }, { "epoch": 0.6355777979981801, "grad_norm": 0.8346962373874338, "learning_rate": 9.60713427180868e-06, "loss": 0.0872, "step": 1397 }, { "epoch": 0.6360327570518653, "grad_norm": 0.9100484302304894, "learning_rate": 9.606578727944156e-06, "loss": 0.1014, "step": 1398 }, { "epoch": 0.6364877161055505, "grad_norm": 0.6397054531308819, "learning_rate": 9.606022807650819e-06, "loss": 0.0661, "step": 1399 }, { "epoch": 0.6369426751592356, "grad_norm": 0.7013671405977515, "learning_rate": 9.6054665109741e-06, "loss": 0.0788, "step": 1400 }, { "epoch": 0.6373976342129208, "grad_norm": 0.7177935827049716, "learning_rate": 9.604909837959456e-06, "loss": 0.0739, "step": 1401 }, { "epoch": 0.637852593266606, "grad_norm": 1.0034339624615456, "learning_rate": 9.604352788652375e-06, "loss": 0.125, "step": 1402 }, { "epoch": 0.6383075523202911, "grad_norm": 0.7908500695821505, "learning_rate": 9.603795363098377e-06, "loss": 0.0626, "step": 1403 }, { "epoch": 0.6387625113739763, "grad_norm": 0.7396845097003291, "learning_rate": 9.603237561343013e-06, "loss": 0.0845, "step": 1404 }, { "epoch": 0.6392174704276615, "grad_norm": 0.6132031146325181, "learning_rate": 9.602679383431864e-06, "loss": 0.0832, "step": 1405 }, { "epoch": 0.6396724294813467, "grad_norm": 0.5848815265706712, "learning_rate": 9.602120829410539e-06, "loss": 0.0609, "step": 1406 }, { "epoch": 0.6401273885350318, "grad_norm": 1.1396916096380878, "learning_rate": 9.601561899324685e-06, "loss": 0.089, "step": 1407 }, { "epoch": 0.640582347588717, "grad_norm": 0.6243784477376835, "learning_rate": 9.601002593219972e-06, "loss": 0.0629, "step": 1408 }, { "epoch": 0.6410373066424022, "grad_norm": 0.7693306930944409, "learning_rate": 9.600442911142107e-06, "loss": 0.0975, "step": 1409 }, { "epoch": 0.6414922656960873, "grad_norm": 0.5824222441008058, "learning_rate": 9.599882853136821e-06, "loss": 0.0668, "step": 1410 }, { "epoch": 0.6419472247497725, "grad_norm": 0.7486427214965261, "learning_rate": 9.59932241924988e-06, "loss": 0.0885, "step": 1411 }, { "epoch": 0.6424021838034577, "grad_norm": 0.7403442425812181, "learning_rate": 9.598761609527084e-06, "loss": 0.0764, "step": 1412 }, { "epoch": 0.6428571428571429, "grad_norm": 0.8444168000337251, "learning_rate": 9.598200424014255e-06, "loss": 0.0901, "step": 1413 }, { "epoch": 0.643312101910828, "grad_norm": 0.6214870203253012, "learning_rate": 9.597638862757255e-06, "loss": 0.0641, "step": 1414 }, { "epoch": 0.6437670609645132, "grad_norm": 0.45639812216740483, "learning_rate": 9.597076925801967e-06, "loss": 0.0525, "step": 1415 }, { "epoch": 0.6442220200181984, "grad_norm": 0.5879645013041995, "learning_rate": 9.596514613194313e-06, "loss": 0.0664, "step": 1416 }, { "epoch": 0.6446769790718835, "grad_norm": 0.723485890557837, "learning_rate": 9.595951924980245e-06, "loss": 0.0878, "step": 1417 }, { "epoch": 0.6451319381255687, "grad_norm": 0.49190939142236517, "learning_rate": 9.595388861205738e-06, "loss": 0.0446, "step": 1418 }, { "epoch": 0.6455868971792539, "grad_norm": 0.8244975390610266, "learning_rate": 9.59482542191681e-06, "loss": 0.0927, "step": 1419 }, { "epoch": 0.6460418562329391, "grad_norm": 0.8365340393723969, "learning_rate": 9.594261607159494e-06, "loss": 0.0944, "step": 1420 }, { "epoch": 0.6464968152866242, "grad_norm": 0.9246231982112141, "learning_rate": 9.59369741697987e-06, "loss": 0.1132, "step": 1421 }, { "epoch": 0.6469517743403094, "grad_norm": 0.7576903487594321, "learning_rate": 9.593132851424036e-06, "loss": 0.0968, "step": 1422 }, { "epoch": 0.6474067333939946, "grad_norm": 0.7385455319846311, "learning_rate": 9.59256791053813e-06, "loss": 0.1045, "step": 1423 }, { "epoch": 0.6478616924476797, "grad_norm": 0.8466333605064674, "learning_rate": 9.592002594368312e-06, "loss": 0.1058, "step": 1424 }, { "epoch": 0.6483166515013649, "grad_norm": 0.9463191649116842, "learning_rate": 9.59143690296078e-06, "loss": 0.1179, "step": 1425 }, { "epoch": 0.6487716105550501, "grad_norm": 0.49506567565602905, "learning_rate": 9.590870836361758e-06, "loss": 0.0679, "step": 1426 }, { "epoch": 0.6492265696087353, "grad_norm": 0.9070193484568203, "learning_rate": 9.590304394617506e-06, "loss": 0.0889, "step": 1427 }, { "epoch": 0.6496815286624203, "grad_norm": 0.4746970963167155, "learning_rate": 9.589737577774308e-06, "loss": 0.0474, "step": 1428 }, { "epoch": 0.6501364877161055, "grad_norm": 0.7625565873276676, "learning_rate": 9.58917038587848e-06, "loss": 0.1052, "step": 1429 }, { "epoch": 0.6505914467697907, "grad_norm": 0.5544350713091404, "learning_rate": 9.588602818976374e-06, "loss": 0.0602, "step": 1430 }, { "epoch": 0.6510464058234758, "grad_norm": 0.8043877114109435, "learning_rate": 9.588034877114367e-06, "loss": 0.0714, "step": 1431 }, { "epoch": 0.651501364877161, "grad_norm": 0.6177719048805246, "learning_rate": 9.58746656033887e-06, "loss": 0.0822, "step": 1432 }, { "epoch": 0.6519563239308462, "grad_norm": 1.070732220715245, "learning_rate": 9.586897868696323e-06, "loss": 0.1203, "step": 1433 }, { "epoch": 0.6524112829845314, "grad_norm": 1.183590915899486, "learning_rate": 9.586328802233195e-06, "loss": 0.0935, "step": 1434 }, { "epoch": 0.6528662420382165, "grad_norm": 0.581772493938091, "learning_rate": 9.58575936099599e-06, "loss": 0.0682, "step": 1435 }, { "epoch": 0.6533212010919017, "grad_norm": 0.7377901301818582, "learning_rate": 9.58518954503124e-06, "loss": 0.0824, "step": 1436 }, { "epoch": 0.6537761601455869, "grad_norm": 0.9292214040800371, "learning_rate": 9.584619354385505e-06, "loss": 0.1138, "step": 1437 }, { "epoch": 0.654231119199272, "grad_norm": 0.7573270642921373, "learning_rate": 9.58404878910538e-06, "loss": 0.074, "step": 1438 }, { "epoch": 0.6546860782529572, "grad_norm": 0.5838864743945036, "learning_rate": 9.58347784923749e-06, "loss": 0.067, "step": 1439 }, { "epoch": 0.6551410373066424, "grad_norm": 0.6730458126896756, "learning_rate": 9.58290653482849e-06, "loss": 0.0632, "step": 1440 }, { "epoch": 0.6555959963603276, "grad_norm": 0.7216545389315259, "learning_rate": 9.582334845925063e-06, "loss": 0.0757, "step": 1441 }, { "epoch": 0.6560509554140127, "grad_norm": 0.929819001740202, "learning_rate": 9.581762782573926e-06, "loss": 0.0973, "step": 1442 }, { "epoch": 0.6565059144676979, "grad_norm": 0.7680577896195074, "learning_rate": 9.581190344821827e-06, "loss": 0.086, "step": 1443 }, { "epoch": 0.6569608735213831, "grad_norm": 0.8746535076926352, "learning_rate": 9.58061753271554e-06, "loss": 0.1085, "step": 1444 }, { "epoch": 0.6574158325750682, "grad_norm": 0.6364512825611769, "learning_rate": 9.580044346301875e-06, "loss": 0.0764, "step": 1445 }, { "epoch": 0.6578707916287534, "grad_norm": 0.47118649986170347, "learning_rate": 9.57947078562767e-06, "loss": 0.0506, "step": 1446 }, { "epoch": 0.6583257506824386, "grad_norm": 0.6564703457147261, "learning_rate": 9.578896850739792e-06, "loss": 0.0702, "step": 1447 }, { "epoch": 0.6587807097361238, "grad_norm": 0.6786314185300042, "learning_rate": 9.578322541685142e-06, "loss": 0.0778, "step": 1448 }, { "epoch": 0.6592356687898089, "grad_norm": 0.7866249519519628, "learning_rate": 9.577747858510647e-06, "loss": 0.1066, "step": 1449 }, { "epoch": 0.6596906278434941, "grad_norm": 0.8352652198110325, "learning_rate": 9.577172801263272e-06, "loss": 0.0973, "step": 1450 }, { "epoch": 0.6601455868971793, "grad_norm": 0.6694090591857538, "learning_rate": 9.576597369990006e-06, "loss": 0.077, "step": 1451 }, { "epoch": 0.6606005459508644, "grad_norm": 0.6613042389515336, "learning_rate": 9.576021564737871e-06, "loss": 0.0608, "step": 1452 }, { "epoch": 0.6610555050045496, "grad_norm": 0.7515982683897205, "learning_rate": 9.575445385553917e-06, "loss": 0.1003, "step": 1453 }, { "epoch": 0.6615104640582348, "grad_norm": 0.9769815693335377, "learning_rate": 9.57486883248523e-06, "loss": 0.0946, "step": 1454 }, { "epoch": 0.66196542311192, "grad_norm": 1.1665424395125852, "learning_rate": 9.574291905578922e-06, "loss": 0.1317, "step": 1455 }, { "epoch": 0.6624203821656051, "grad_norm": 0.6942177292436024, "learning_rate": 9.573714604882138e-06, "loss": 0.0615, "step": 1456 }, { "epoch": 0.6628753412192903, "grad_norm": 0.9194225981756011, "learning_rate": 9.57313693044205e-06, "loss": 0.0975, "step": 1457 }, { "epoch": 0.6633303002729755, "grad_norm": 0.7117926275391128, "learning_rate": 9.572558882305863e-06, "loss": 0.0847, "step": 1458 }, { "epoch": 0.6637852593266605, "grad_norm": 0.9546376743105418, "learning_rate": 9.571980460520815e-06, "loss": 0.1196, "step": 1459 }, { "epoch": 0.6642402183803457, "grad_norm": 0.8937437496424256, "learning_rate": 9.57140166513417e-06, "loss": 0.096, "step": 1460 }, { "epoch": 0.664695177434031, "grad_norm": 0.5937947199850856, "learning_rate": 9.570822496193225e-06, "loss": 0.058, "step": 1461 }, { "epoch": 0.6651501364877161, "grad_norm": 0.5756039867728808, "learning_rate": 9.570242953745307e-06, "loss": 0.082, "step": 1462 }, { "epoch": 0.6656050955414012, "grad_norm": 0.7416722804778516, "learning_rate": 9.569663037837776e-06, "loss": 0.098, "step": 1463 }, { "epoch": 0.6660600545950864, "grad_norm": 0.6377485683281849, "learning_rate": 9.569082748518017e-06, "loss": 0.0723, "step": 1464 }, { "epoch": 0.6665150136487716, "grad_norm": 0.7884664768500067, "learning_rate": 9.568502085833449e-06, "loss": 0.0884, "step": 1465 }, { "epoch": 0.6669699727024567, "grad_norm": 0.7723350087530905, "learning_rate": 9.567921049831522e-06, "loss": 0.0967, "step": 1466 }, { "epoch": 0.6674249317561419, "grad_norm": 0.7260885892233983, "learning_rate": 9.567339640559716e-06, "loss": 0.0812, "step": 1467 }, { "epoch": 0.6678798908098271, "grad_norm": 0.5596294621225263, "learning_rate": 9.566757858065538e-06, "loss": 0.0631, "step": 1468 }, { "epoch": 0.6683348498635123, "grad_norm": 0.7286352648100037, "learning_rate": 9.566175702396534e-06, "loss": 0.0823, "step": 1469 }, { "epoch": 0.6687898089171974, "grad_norm": 0.9301493673689373, "learning_rate": 9.565593173600271e-06, "loss": 0.0987, "step": 1470 }, { "epoch": 0.6692447679708826, "grad_norm": 0.6817718703338496, "learning_rate": 9.565010271724353e-06, "loss": 0.0755, "step": 1471 }, { "epoch": 0.6696997270245678, "grad_norm": 0.7526239018301766, "learning_rate": 9.56442699681641e-06, "loss": 0.0876, "step": 1472 }, { "epoch": 0.6701546860782529, "grad_norm": 0.7279647211742274, "learning_rate": 9.563843348924105e-06, "loss": 0.0681, "step": 1473 }, { "epoch": 0.6706096451319381, "grad_norm": 0.8487044021854026, "learning_rate": 9.563259328095132e-06, "loss": 0.0903, "step": 1474 }, { "epoch": 0.6710646041856233, "grad_norm": 0.609495225783116, "learning_rate": 9.562674934377214e-06, "loss": 0.0801, "step": 1475 }, { "epoch": 0.6715195632393085, "grad_norm": 0.7638645194963899, "learning_rate": 9.562090167818107e-06, "loss": 0.0874, "step": 1476 }, { "epoch": 0.6719745222929936, "grad_norm": 1.4076317151154771, "learning_rate": 9.561505028465593e-06, "loss": 0.0874, "step": 1477 }, { "epoch": 0.6724294813466788, "grad_norm": 0.6311161675673277, "learning_rate": 9.560919516367486e-06, "loss": 0.0738, "step": 1478 }, { "epoch": 0.672884440400364, "grad_norm": 0.638266808298586, "learning_rate": 9.560333631571634e-06, "loss": 0.0682, "step": 1479 }, { "epoch": 0.6733393994540491, "grad_norm": 0.7097356519617585, "learning_rate": 9.559747374125911e-06, "loss": 0.0987, "step": 1480 }, { "epoch": 0.6737943585077343, "grad_norm": 0.6502346745698145, "learning_rate": 9.559160744078226e-06, "loss": 0.0644, "step": 1481 }, { "epoch": 0.6742493175614195, "grad_norm": 1.056681303492363, "learning_rate": 9.558573741476513e-06, "loss": 0.0939, "step": 1482 }, { "epoch": 0.6747042766151047, "grad_norm": 0.7992268675141662, "learning_rate": 9.557986366368742e-06, "loss": 0.0733, "step": 1483 }, { "epoch": 0.6751592356687898, "grad_norm": 1.0832399406974047, "learning_rate": 9.557398618802907e-06, "loss": 0.1123, "step": 1484 }, { "epoch": 0.675614194722475, "grad_norm": 0.6543008513198456, "learning_rate": 9.556810498827039e-06, "loss": 0.0794, "step": 1485 }, { "epoch": 0.6760691537761602, "grad_norm": 0.6306597614421026, "learning_rate": 9.556222006489193e-06, "loss": 0.0786, "step": 1486 }, { "epoch": 0.6765241128298453, "grad_norm": 0.5618899284499352, "learning_rate": 9.555633141837462e-06, "loss": 0.0618, "step": 1487 }, { "epoch": 0.6769790718835305, "grad_norm": 0.6434016854657288, "learning_rate": 9.555043904919963e-06, "loss": 0.0796, "step": 1488 }, { "epoch": 0.6774340309372157, "grad_norm": 0.7512094182824542, "learning_rate": 9.554454295784848e-06, "loss": 0.0745, "step": 1489 }, { "epoch": 0.6778889899909009, "grad_norm": 0.662429978970196, "learning_rate": 9.553864314480294e-06, "loss": 0.0788, "step": 1490 }, { "epoch": 0.678343949044586, "grad_norm": 0.7125824073483379, "learning_rate": 9.553273961054514e-06, "loss": 0.072, "step": 1491 }, { "epoch": 0.6787989080982711, "grad_norm": 0.8599367957772613, "learning_rate": 9.552683235555749e-06, "loss": 0.0765, "step": 1492 }, { "epoch": 0.6792538671519563, "grad_norm": 0.7900843446637873, "learning_rate": 9.55209213803227e-06, "loss": 0.0861, "step": 1493 }, { "epoch": 0.6797088262056415, "grad_norm": 0.9492542185178791, "learning_rate": 9.551500668532377e-06, "loss": 0.1036, "step": 1494 }, { "epoch": 0.6801637852593266, "grad_norm": 0.5324340095596853, "learning_rate": 9.550908827104404e-06, "loss": 0.0509, "step": 1495 }, { "epoch": 0.6806187443130118, "grad_norm": 1.4654919772375794, "learning_rate": 9.550316613796716e-06, "loss": 0.0891, "step": 1496 }, { "epoch": 0.681073703366697, "grad_norm": 0.6964909028346599, "learning_rate": 9.549724028657698e-06, "loss": 0.0814, "step": 1497 }, { "epoch": 0.6815286624203821, "grad_norm": 0.7118346157191014, "learning_rate": 9.549131071735784e-06, "loss": 0.0711, "step": 1498 }, { "epoch": 0.6819836214740673, "grad_norm": 0.9814989838911676, "learning_rate": 9.54853774307942e-06, "loss": 0.0981, "step": 1499 }, { "epoch": 0.6824385805277525, "grad_norm": 0.8030617514029292, "learning_rate": 9.547944042737092e-06, "loss": 0.0944, "step": 1500 }, { "epoch": 0.6828935395814377, "grad_norm": 0.9091821467413523, "learning_rate": 9.547349970757317e-06, "loss": 0.1419, "step": 1501 }, { "epoch": 0.6833484986351228, "grad_norm": 0.7604842345576438, "learning_rate": 9.546755527188638e-06, "loss": 0.0616, "step": 1502 }, { "epoch": 0.683803457688808, "grad_norm": 0.7795635296832277, "learning_rate": 9.546160712079629e-06, "loss": 0.0819, "step": 1503 }, { "epoch": 0.6842584167424932, "grad_norm": 0.6155010796235886, "learning_rate": 9.545565525478896e-06, "loss": 0.0737, "step": 1504 }, { "epoch": 0.6847133757961783, "grad_norm": 0.6981564617213015, "learning_rate": 9.544969967435079e-06, "loss": 0.0786, "step": 1505 }, { "epoch": 0.6851683348498635, "grad_norm": 0.8590705218017948, "learning_rate": 9.54437403799684e-06, "loss": 0.0835, "step": 1506 }, { "epoch": 0.6856232939035487, "grad_norm": 0.8783591706447448, "learning_rate": 9.543777737212876e-06, "loss": 0.118, "step": 1507 }, { "epoch": 0.6860782529572339, "grad_norm": 0.5312480753344904, "learning_rate": 9.543181065131914e-06, "loss": 0.0535, "step": 1508 }, { "epoch": 0.686533212010919, "grad_norm": 0.6911478055364548, "learning_rate": 9.542584021802715e-06, "loss": 0.0651, "step": 1509 }, { "epoch": 0.6869881710646042, "grad_norm": 0.910176403224045, "learning_rate": 9.54198660727406e-06, "loss": 0.0916, "step": 1510 }, { "epoch": 0.6874431301182894, "grad_norm": 0.5369469100452242, "learning_rate": 9.541388821594774e-06, "loss": 0.064, "step": 1511 }, { "epoch": 0.6878980891719745, "grad_norm": 0.7242695685667516, "learning_rate": 9.540790664813702e-06, "loss": 0.0725, "step": 1512 }, { "epoch": 0.6883530482256597, "grad_norm": 0.7527422721071317, "learning_rate": 9.540192136979722e-06, "loss": 0.0863, "step": 1513 }, { "epoch": 0.6888080072793449, "grad_norm": 0.5409793571909967, "learning_rate": 9.539593238141745e-06, "loss": 0.0678, "step": 1514 }, { "epoch": 0.6892629663330301, "grad_norm": 0.5059270742296627, "learning_rate": 9.538993968348706e-06, "loss": 0.0613, "step": 1515 }, { "epoch": 0.6897179253867152, "grad_norm": 0.8092866682697022, "learning_rate": 9.538394327649581e-06, "loss": 0.0816, "step": 1516 }, { "epoch": 0.6901728844404004, "grad_norm": 0.7416822411067572, "learning_rate": 9.537794316093366e-06, "loss": 0.0736, "step": 1517 }, { "epoch": 0.6906278434940856, "grad_norm": 0.6013123530792879, "learning_rate": 9.537193933729092e-06, "loss": 0.0637, "step": 1518 }, { "epoch": 0.6910828025477707, "grad_norm": 1.0953662823641266, "learning_rate": 9.53659318060582e-06, "loss": 0.1381, "step": 1519 }, { "epoch": 0.6915377616014559, "grad_norm": 0.7906081758139587, "learning_rate": 9.535992056772639e-06, "loss": 0.088, "step": 1520 }, { "epoch": 0.6919927206551411, "grad_norm": 0.9984370937403453, "learning_rate": 9.535390562278673e-06, "loss": 0.086, "step": 1521 }, { "epoch": 0.6924476797088263, "grad_norm": 0.7438661675719108, "learning_rate": 9.53478869717307e-06, "loss": 0.0771, "step": 1522 }, { "epoch": 0.6929026387625113, "grad_norm": 0.85189844123529, "learning_rate": 9.534186461505015e-06, "loss": 0.1109, "step": 1523 }, { "epoch": 0.6933575978161965, "grad_norm": 0.7215256903381998, "learning_rate": 9.533583855323717e-06, "loss": 0.0947, "step": 1524 }, { "epoch": 0.6938125568698817, "grad_norm": 0.8936614524747819, "learning_rate": 9.532980878678422e-06, "loss": 0.0731, "step": 1525 }, { "epoch": 0.6942675159235668, "grad_norm": 0.7734700292932609, "learning_rate": 9.5323775316184e-06, "loss": 0.0844, "step": 1526 }, { "epoch": 0.694722474977252, "grad_norm": 0.7521845435610183, "learning_rate": 9.531773814192953e-06, "loss": 0.0878, "step": 1527 }, { "epoch": 0.6951774340309372, "grad_norm": 0.890089227377408, "learning_rate": 9.531169726451417e-06, "loss": 0.1128, "step": 1528 }, { "epoch": 0.6956323930846224, "grad_norm": 0.7682866565773229, "learning_rate": 9.530565268443153e-06, "loss": 0.0956, "step": 1529 }, { "epoch": 0.6960873521383075, "grad_norm": 0.9617852359873308, "learning_rate": 9.529960440217554e-06, "loss": 0.1088, "step": 1530 }, { "epoch": 0.6965423111919927, "grad_norm": 0.9775947633570551, "learning_rate": 9.529355241824045e-06, "loss": 0.107, "step": 1531 }, { "epoch": 0.6969972702456779, "grad_norm": 0.6007455012792351, "learning_rate": 9.528749673312082e-06, "loss": 0.0743, "step": 1532 }, { "epoch": 0.697452229299363, "grad_norm": 0.5419764603212612, "learning_rate": 9.528143734731143e-06, "loss": 0.0822, "step": 1533 }, { "epoch": 0.6979071883530482, "grad_norm": 0.8185575482665152, "learning_rate": 9.52753742613075e-06, "loss": 0.0832, "step": 1534 }, { "epoch": 0.6983621474067334, "grad_norm": 0.9643638751029543, "learning_rate": 9.526930747560446e-06, "loss": 0.1026, "step": 1535 }, { "epoch": 0.6988171064604186, "grad_norm": 0.8502651132594353, "learning_rate": 9.526323699069803e-06, "loss": 0.0902, "step": 1536 }, { "epoch": 0.6992720655141037, "grad_norm": 0.5376181329235236, "learning_rate": 9.525716280708428e-06, "loss": 0.068, "step": 1537 }, { "epoch": 0.6997270245677889, "grad_norm": 0.7166675033334694, "learning_rate": 9.525108492525957e-06, "loss": 0.0752, "step": 1538 }, { "epoch": 0.7001819836214741, "grad_norm": 0.43432195935007917, "learning_rate": 9.524500334572054e-06, "loss": 0.0417, "step": 1539 }, { "epoch": 0.7006369426751592, "grad_norm": 0.8369054167821826, "learning_rate": 9.523891806896417e-06, "loss": 0.1098, "step": 1540 }, { "epoch": 0.7010919017288444, "grad_norm": 0.49781336551041033, "learning_rate": 9.523282909548773e-06, "loss": 0.0618, "step": 1541 }, { "epoch": 0.7015468607825296, "grad_norm": 0.9187882410427298, "learning_rate": 9.522673642578873e-06, "loss": 0.1247, "step": 1542 }, { "epoch": 0.7020018198362148, "grad_norm": 0.5007920591193696, "learning_rate": 9.522064006036509e-06, "loss": 0.0601, "step": 1543 }, { "epoch": 0.7024567788898999, "grad_norm": 0.582945252861272, "learning_rate": 9.521453999971497e-06, "loss": 0.0585, "step": 1544 }, { "epoch": 0.7029117379435851, "grad_norm": 0.5749885951853907, "learning_rate": 9.520843624433681e-06, "loss": 0.0664, "step": 1545 }, { "epoch": 0.7033666969972703, "grad_norm": 0.9724598324631707, "learning_rate": 9.520232879472942e-06, "loss": 0.1199, "step": 1546 }, { "epoch": 0.7038216560509554, "grad_norm": 1.0592052108390146, "learning_rate": 9.519621765139181e-06, "loss": 0.1278, "step": 1547 }, { "epoch": 0.7042766151046406, "grad_norm": 0.42374402440173636, "learning_rate": 9.519010281482344e-06, "loss": 0.0446, "step": 1548 }, { "epoch": 0.7047315741583258, "grad_norm": 1.102301602930716, "learning_rate": 9.518398428552393e-06, "loss": 0.1226, "step": 1549 }, { "epoch": 0.705186533212011, "grad_norm": 0.6842519583257138, "learning_rate": 9.51778620639933e-06, "loss": 0.0905, "step": 1550 }, { "epoch": 0.7056414922656961, "grad_norm": 0.7530573117253311, "learning_rate": 9.517173615073177e-06, "loss": 0.0766, "step": 1551 }, { "epoch": 0.7060964513193813, "grad_norm": 0.43285639961604566, "learning_rate": 9.516560654623996e-06, "loss": 0.0475, "step": 1552 }, { "epoch": 0.7065514103730665, "grad_norm": 0.9094561094681402, "learning_rate": 9.515947325101875e-06, "loss": 0.0896, "step": 1553 }, { "epoch": 0.7070063694267515, "grad_norm": 0.6097385256206468, "learning_rate": 9.515333626556933e-06, "loss": 0.0653, "step": 1554 }, { "epoch": 0.7074613284804367, "grad_norm": 0.7304393114645329, "learning_rate": 9.514719559039318e-06, "loss": 0.0896, "step": 1555 }, { "epoch": 0.707916287534122, "grad_norm": 0.8799769831067698, "learning_rate": 9.514105122599208e-06, "loss": 0.1176, "step": 1556 }, { "epoch": 0.7083712465878071, "grad_norm": 1.0962688093811397, "learning_rate": 9.513490317286815e-06, "loss": 0.1174, "step": 1557 }, { "epoch": 0.7088262056414922, "grad_norm": 0.8022559500547495, "learning_rate": 9.512875143152373e-06, "loss": 0.0969, "step": 1558 }, { "epoch": 0.7092811646951774, "grad_norm": 0.37133918747574174, "learning_rate": 9.512259600246156e-06, "loss": 0.031, "step": 1559 }, { "epoch": 0.7097361237488626, "grad_norm": 0.6214125216955318, "learning_rate": 9.511643688618463e-06, "loss": 0.0943, "step": 1560 }, { "epoch": 0.7101910828025477, "grad_norm": 0.7097270108607417, "learning_rate": 9.51102740831962e-06, "loss": 0.0847, "step": 1561 }, { "epoch": 0.7106460418562329, "grad_norm": 0.8290870913254417, "learning_rate": 9.510410759399991e-06, "loss": 0.0867, "step": 1562 }, { "epoch": 0.7111010009099181, "grad_norm": 0.7141101307254801, "learning_rate": 9.50979374190996e-06, "loss": 0.0838, "step": 1563 }, { "epoch": 0.7115559599636033, "grad_norm": 0.8532705780985276, "learning_rate": 9.509176355899954e-06, "loss": 0.09, "step": 1564 }, { "epoch": 0.7120109190172884, "grad_norm": 0.6858037908830302, "learning_rate": 9.508558601420417e-06, "loss": 0.0637, "step": 1565 }, { "epoch": 0.7124658780709736, "grad_norm": 0.7489578082911201, "learning_rate": 9.507940478521833e-06, "loss": 0.1059, "step": 1566 }, { "epoch": 0.7129208371246588, "grad_norm": 0.5241685648277268, "learning_rate": 9.507321987254712e-06, "loss": 0.0474, "step": 1567 }, { "epoch": 0.7133757961783439, "grad_norm": 0.9862924439076355, "learning_rate": 9.50670312766959e-06, "loss": 0.1047, "step": 1568 }, { "epoch": 0.7138307552320291, "grad_norm": 0.8286292773017996, "learning_rate": 9.506083899817043e-06, "loss": 0.0808, "step": 1569 }, { "epoch": 0.7142857142857143, "grad_norm": 0.8166629192761119, "learning_rate": 9.505464303747667e-06, "loss": 0.079, "step": 1570 }, { "epoch": 0.7147406733393995, "grad_norm": 0.6651663578468047, "learning_rate": 9.504844339512096e-06, "loss": 0.0879, "step": 1571 }, { "epoch": 0.7151956323930846, "grad_norm": 0.5230779536546156, "learning_rate": 9.50422400716099e-06, "loss": 0.0585, "step": 1572 }, { "epoch": 0.7156505914467698, "grad_norm": 0.6543543054934573, "learning_rate": 9.503603306745036e-06, "loss": 0.0564, "step": 1573 }, { "epoch": 0.716105550500455, "grad_norm": 0.7812592861176204, "learning_rate": 9.502982238314962e-06, "loss": 0.0874, "step": 1574 }, { "epoch": 0.7165605095541401, "grad_norm": 0.5040232473993467, "learning_rate": 9.502360801921512e-06, "loss": 0.0532, "step": 1575 }, { "epoch": 0.7170154686078253, "grad_norm": 0.8631279038726943, "learning_rate": 9.501738997615471e-06, "loss": 0.1045, "step": 1576 }, { "epoch": 0.7174704276615105, "grad_norm": 0.7716014465645913, "learning_rate": 9.501116825447648e-06, "loss": 0.068, "step": 1577 }, { "epoch": 0.7179253867151957, "grad_norm": 0.5327432187838176, "learning_rate": 9.500494285468884e-06, "loss": 0.053, "step": 1578 }, { "epoch": 0.7183803457688808, "grad_norm": 0.8209926537375553, "learning_rate": 9.499871377730053e-06, "loss": 0.1164, "step": 1579 }, { "epoch": 0.718835304822566, "grad_norm": 0.5454374508074649, "learning_rate": 9.499248102282052e-06, "loss": 0.0579, "step": 1580 }, { "epoch": 0.7192902638762512, "grad_norm": 0.4944315103743207, "learning_rate": 9.498624459175815e-06, "loss": 0.0542, "step": 1581 }, { "epoch": 0.7197452229299363, "grad_norm": 0.8372013648456964, "learning_rate": 9.498000448462305e-06, "loss": 0.0948, "step": 1582 }, { "epoch": 0.7202001819836215, "grad_norm": 0.6792072434969908, "learning_rate": 9.49737607019251e-06, "loss": 0.0683, "step": 1583 }, { "epoch": 0.7206551410373067, "grad_norm": 0.6679228302277659, "learning_rate": 9.496751324417452e-06, "loss": 0.0526, "step": 1584 }, { "epoch": 0.7211101000909919, "grad_norm": 0.830168268257237, "learning_rate": 9.496126211188184e-06, "loss": 0.1049, "step": 1585 }, { "epoch": 0.721565059144677, "grad_norm": 0.7614112606151382, "learning_rate": 9.495500730555784e-06, "loss": 0.0966, "step": 1586 }, { "epoch": 0.7220200181983621, "grad_norm": 0.7574732623314945, "learning_rate": 9.494874882571368e-06, "loss": 0.0648, "step": 1587 }, { "epoch": 0.7224749772520473, "grad_norm": 0.7541681951930181, "learning_rate": 9.494248667286075e-06, "loss": 0.0905, "step": 1588 }, { "epoch": 0.7229299363057324, "grad_norm": 0.776748715422375, "learning_rate": 9.493622084751076e-06, "loss": 0.0841, "step": 1589 }, { "epoch": 0.7233848953594176, "grad_norm": 0.6440945504942991, "learning_rate": 9.492995135017574e-06, "loss": 0.0779, "step": 1590 }, { "epoch": 0.7238398544131028, "grad_norm": 0.658893968607762, "learning_rate": 9.4923678181368e-06, "loss": 0.0862, "step": 1591 }, { "epoch": 0.724294813466788, "grad_norm": 0.764304310956247, "learning_rate": 9.491740134160014e-06, "loss": 0.0834, "step": 1592 }, { "epoch": 0.7247497725204731, "grad_norm": 1.246667162089055, "learning_rate": 9.491112083138509e-06, "loss": 0.141, "step": 1593 }, { "epoch": 0.7252047315741583, "grad_norm": 0.7827390484343668, "learning_rate": 9.490483665123606e-06, "loss": 0.0687, "step": 1594 }, { "epoch": 0.7256596906278435, "grad_norm": 0.6055248563993239, "learning_rate": 9.489854880166658e-06, "loss": 0.0716, "step": 1595 }, { "epoch": 0.7261146496815286, "grad_norm": 0.7067865427149594, "learning_rate": 9.489225728319044e-06, "loss": 0.0756, "step": 1596 }, { "epoch": 0.7265696087352138, "grad_norm": 0.85395818798431, "learning_rate": 9.488596209632179e-06, "loss": 0.1099, "step": 1597 }, { "epoch": 0.727024567788899, "grad_norm": 0.6870669290352402, "learning_rate": 9.4879663241575e-06, "loss": 0.0703, "step": 1598 }, { "epoch": 0.7274795268425842, "grad_norm": 1.2809048497988667, "learning_rate": 9.48733607194648e-06, "loss": 0.1663, "step": 1599 }, { "epoch": 0.7279344858962693, "grad_norm": 0.7180890087653823, "learning_rate": 9.486705453050622e-06, "loss": 0.0738, "step": 1600 }, { "epoch": 0.7283894449499545, "grad_norm": 0.5662460892211576, "learning_rate": 9.486074467521456e-06, "loss": 0.0627, "step": 1601 }, { "epoch": 0.7288444040036397, "grad_norm": 0.7172800606287587, "learning_rate": 9.485443115410541e-06, "loss": 0.0715, "step": 1602 }, { "epoch": 0.7292993630573248, "grad_norm": 0.6146064647413995, "learning_rate": 9.484811396769475e-06, "loss": 0.0828, "step": 1603 }, { "epoch": 0.72975432211101, "grad_norm": 0.8606888467276742, "learning_rate": 9.484179311649873e-06, "loss": 0.0962, "step": 1604 }, { "epoch": 0.7302092811646952, "grad_norm": 0.46814164753859155, "learning_rate": 9.483546860103388e-06, "loss": 0.0477, "step": 1605 }, { "epoch": 0.7306642402183804, "grad_norm": 0.7370090010007736, "learning_rate": 9.4829140421817e-06, "loss": 0.081, "step": 1606 }, { "epoch": 0.7311191992720655, "grad_norm": 1.0689466216112777, "learning_rate": 9.482280857936522e-06, "loss": 0.109, "step": 1607 }, { "epoch": 0.7315741583257507, "grad_norm": 0.4147348220425697, "learning_rate": 9.481647307419594e-06, "loss": 0.0479, "step": 1608 }, { "epoch": 0.7320291173794359, "grad_norm": 0.4998747516198886, "learning_rate": 9.481013390682687e-06, "loss": 0.0634, "step": 1609 }, { "epoch": 0.732484076433121, "grad_norm": 0.8673371359679307, "learning_rate": 9.480379107777601e-06, "loss": 0.1108, "step": 1610 }, { "epoch": 0.7329390354868062, "grad_norm": 0.6369274329058493, "learning_rate": 9.47974445875617e-06, "loss": 0.0698, "step": 1611 }, { "epoch": 0.7333939945404914, "grad_norm": 0.6434647227835387, "learning_rate": 9.47910944367025e-06, "loss": 0.0618, "step": 1612 }, { "epoch": 0.7338489535941766, "grad_norm": 0.8035955314379585, "learning_rate": 9.478474062571735e-06, "loss": 0.0997, "step": 1613 }, { "epoch": 0.7343039126478617, "grad_norm": 0.7996949463502321, "learning_rate": 9.477838315512544e-06, "loss": 0.0873, "step": 1614 }, { "epoch": 0.7347588717015469, "grad_norm": 0.6484970204244012, "learning_rate": 9.477202202544626e-06, "loss": 0.0925, "step": 1615 }, { "epoch": 0.7352138307552321, "grad_norm": 0.6478821974846899, "learning_rate": 9.476565723719966e-06, "loss": 0.0693, "step": 1616 }, { "epoch": 0.7356687898089171, "grad_norm": 0.6896940284490023, "learning_rate": 9.475928879090568e-06, "loss": 0.0763, "step": 1617 }, { "epoch": 0.7361237488626023, "grad_norm": 0.6758264439259065, "learning_rate": 9.475291668708476e-06, "loss": 0.0717, "step": 1618 }, { "epoch": 0.7365787079162875, "grad_norm": 0.6285383601705616, "learning_rate": 9.474654092625758e-06, "loss": 0.0561, "step": 1619 }, { "epoch": 0.7370336669699727, "grad_norm": 0.7488998942485512, "learning_rate": 9.474016150894518e-06, "loss": 0.0765, "step": 1620 }, { "epoch": 0.7374886260236578, "grad_norm": 0.7511340475878087, "learning_rate": 9.47337784356688e-06, "loss": 0.0865, "step": 1621 }, { "epoch": 0.737943585077343, "grad_norm": 0.6908706816034008, "learning_rate": 9.472739170695006e-06, "loss": 0.0879, "step": 1622 }, { "epoch": 0.7383985441310282, "grad_norm": 0.9159671053782389, "learning_rate": 9.472100132331089e-06, "loss": 0.0862, "step": 1623 }, { "epoch": 0.7388535031847133, "grad_norm": 0.8367180794291794, "learning_rate": 9.471460728527342e-06, "loss": 0.0988, "step": 1624 }, { "epoch": 0.7393084622383985, "grad_norm": 0.6396536181540736, "learning_rate": 9.470820959336018e-06, "loss": 0.0742, "step": 1625 }, { "epoch": 0.7397634212920837, "grad_norm": 0.7212059639642758, "learning_rate": 9.470180824809394e-06, "loss": 0.0887, "step": 1626 }, { "epoch": 0.7402183803457689, "grad_norm": 0.6570480817818456, "learning_rate": 9.469540324999782e-06, "loss": 0.0654, "step": 1627 }, { "epoch": 0.740673339399454, "grad_norm": 0.6780217435395393, "learning_rate": 9.468899459959518e-06, "loss": 0.0613, "step": 1628 }, { "epoch": 0.7411282984531392, "grad_norm": 0.8367065537687267, "learning_rate": 9.468258229740972e-06, "loss": 0.087, "step": 1629 }, { "epoch": 0.7415832575068244, "grad_norm": 0.6724757485261361, "learning_rate": 9.467616634396542e-06, "loss": 0.0513, "step": 1630 }, { "epoch": 0.7420382165605095, "grad_norm": 0.5923362651506067, "learning_rate": 9.466974673978654e-06, "loss": 0.0668, "step": 1631 }, { "epoch": 0.7424931756141947, "grad_norm": 0.8046255156703264, "learning_rate": 9.466332348539772e-06, "loss": 0.0888, "step": 1632 }, { "epoch": 0.7429481346678799, "grad_norm": 0.7456071657218726, "learning_rate": 9.465689658132379e-06, "loss": 0.0872, "step": 1633 }, { "epoch": 0.7434030937215651, "grad_norm": 0.8751254537474247, "learning_rate": 9.465046602808994e-06, "loss": 0.0901, "step": 1634 }, { "epoch": 0.7438580527752502, "grad_norm": 0.9953711560207276, "learning_rate": 9.464403182622164e-06, "loss": 0.1175, "step": 1635 }, { "epoch": 0.7443130118289354, "grad_norm": 0.738323897945569, "learning_rate": 9.463759397624466e-06, "loss": 0.1016, "step": 1636 }, { "epoch": 0.7447679708826206, "grad_norm": 0.620705920516562, "learning_rate": 9.46311524786851e-06, "loss": 0.0654, "step": 1637 }, { "epoch": 0.7452229299363057, "grad_norm": 1.2433273775382216, "learning_rate": 9.462470733406929e-06, "loss": 0.1403, "step": 1638 }, { "epoch": 0.7456778889899909, "grad_norm": 1.0268174749706445, "learning_rate": 9.461825854292394e-06, "loss": 0.1065, "step": 1639 }, { "epoch": 0.7461328480436761, "grad_norm": 0.6942991337802967, "learning_rate": 9.4611806105776e-06, "loss": 0.0736, "step": 1640 }, { "epoch": 0.7465878070973613, "grad_norm": 0.8367822612372433, "learning_rate": 9.460535002315272e-06, "loss": 0.089, "step": 1641 }, { "epoch": 0.7470427661510464, "grad_norm": 0.5929887457730553, "learning_rate": 9.459889029558167e-06, "loss": 0.0665, "step": 1642 }, { "epoch": 0.7474977252047316, "grad_norm": 0.5692342733265978, "learning_rate": 9.459242692359072e-06, "loss": 0.0708, "step": 1643 }, { "epoch": 0.7479526842584168, "grad_norm": 0.6049162715481944, "learning_rate": 9.4585959907708e-06, "loss": 0.0716, "step": 1644 }, { "epoch": 0.7484076433121019, "grad_norm": 0.5865800556894495, "learning_rate": 9.457948924846201e-06, "loss": 0.0562, "step": 1645 }, { "epoch": 0.7488626023657871, "grad_norm": 1.018263961729041, "learning_rate": 9.457301494638147e-06, "loss": 0.1129, "step": 1646 }, { "epoch": 0.7493175614194723, "grad_norm": 0.8420303347709615, "learning_rate": 9.456653700199542e-06, "loss": 0.0982, "step": 1647 }, { "epoch": 0.7497725204731575, "grad_norm": 0.6178217269864875, "learning_rate": 9.456005541583326e-06, "loss": 0.0777, "step": 1648 }, { "epoch": 0.7502274795268425, "grad_norm": 0.6159701780113571, "learning_rate": 9.455357018842458e-06, "loss": 0.075, "step": 1649 }, { "epoch": 0.7506824385805277, "grad_norm": 0.5563337669331565, "learning_rate": 9.454708132029936e-06, "loss": 0.0594, "step": 1650 }, { "epoch": 0.7511373976342129, "grad_norm": 0.7796132603413727, "learning_rate": 9.454058881198782e-06, "loss": 0.0842, "step": 1651 }, { "epoch": 0.7515923566878981, "grad_norm": 0.5977999349867541, "learning_rate": 9.45340926640205e-06, "loss": 0.0623, "step": 1652 }, { "epoch": 0.7520473157415832, "grad_norm": 0.7762091660359064, "learning_rate": 9.452759287692824e-06, "loss": 0.0923, "step": 1653 }, { "epoch": 0.7525022747952684, "grad_norm": 1.029286283612893, "learning_rate": 9.452108945124218e-06, "loss": 0.1114, "step": 1654 }, { "epoch": 0.7529572338489536, "grad_norm": 0.5046695202197234, "learning_rate": 9.451458238749375e-06, "loss": 0.058, "step": 1655 }, { "epoch": 0.7534121929026387, "grad_norm": 0.6262659207860063, "learning_rate": 9.450807168621468e-06, "loss": 0.0607, "step": 1656 }, { "epoch": 0.7538671519563239, "grad_norm": 0.7451490801568118, "learning_rate": 9.450155734793697e-06, "loss": 0.0716, "step": 1657 }, { "epoch": 0.7543221110100091, "grad_norm": 0.6504007368655154, "learning_rate": 9.449503937319297e-06, "loss": 0.0913, "step": 1658 }, { "epoch": 0.7547770700636943, "grad_norm": 0.8923820492879996, "learning_rate": 9.448851776251528e-06, "loss": 0.0984, "step": 1659 }, { "epoch": 0.7552320291173794, "grad_norm": 0.7256175088606572, "learning_rate": 9.448199251643684e-06, "loss": 0.0834, "step": 1660 }, { "epoch": 0.7556869881710646, "grad_norm": 0.7778885787730276, "learning_rate": 9.447546363549085e-06, "loss": 0.0878, "step": 1661 }, { "epoch": 0.7561419472247498, "grad_norm": 0.8265030986085233, "learning_rate": 9.446893112021083e-06, "loss": 0.0827, "step": 1662 }, { "epoch": 0.7565969062784349, "grad_norm": 0.5801162274559535, "learning_rate": 9.446239497113055e-06, "loss": 0.0797, "step": 1663 }, { "epoch": 0.7570518653321201, "grad_norm": 0.8974914764997551, "learning_rate": 9.445585518878418e-06, "loss": 0.1088, "step": 1664 }, { "epoch": 0.7575068243858053, "grad_norm": 0.8878060872125964, "learning_rate": 9.444931177370605e-06, "loss": 0.1235, "step": 1665 } ], "logging_steps": 1, "max_steps": 10990, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 555, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 10958994579456.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }