| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.262511373976342, |
| "eval_steps": 500, |
| "global_step": 2775, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00045495905368516835, |
| "grad_norm": 9.461428161462043, |
| "learning_rate": 1e-05, |
| "loss": 0.1263, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009099181073703367, |
| "grad_norm": 5.190780450250769, |
| "learning_rate": 9.99999979571129e-06, |
| "loss": 0.1723, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001364877161055505, |
| "grad_norm": 7.521926017130347, |
| "learning_rate": 9.999999182845177e-06, |
| "loss": 0.1327, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0018198362147406734, |
| "grad_norm": 2.5665810200307217, |
| "learning_rate": 9.99999816140171e-06, |
| "loss": 0.1095, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0022747952684258415, |
| "grad_norm": 2.738508706395883, |
| "learning_rate": 9.999996731380973e-06, |
| "loss": 0.1151, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00272975432211101, |
| "grad_norm": 2.67941899677245, |
| "learning_rate": 9.999994892783083e-06, |
| "loss": 0.0821, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0031847133757961785, |
| "grad_norm": 2.137586234420784, |
| "learning_rate": 9.99999264560819e-06, |
| "loss": 0.0729, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003639672429481347, |
| "grad_norm": 2.8221590420989164, |
| "learning_rate": 9.999989989856477e-06, |
| "loss": 0.0929, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004094631483166515, |
| "grad_norm": 1.6167314639784554, |
| "learning_rate": 9.999986925528164e-06, |
| "loss": 0.0466, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004549590536851683, |
| "grad_norm": 2.1773262431631313, |
| "learning_rate": 9.999983452623498e-06, |
| "loss": 0.0709, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005004549590536852, |
| "grad_norm": 7.6444390817806465, |
| "learning_rate": 9.999979571142765e-06, |
| "loss": 0.0809, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00545950864422202, |
| "grad_norm": 2.034523884241798, |
| "learning_rate": 9.999975281086278e-06, |
| "loss": 0.0839, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005914467697907188, |
| "grad_norm": 3.576108282005355, |
| "learning_rate": 9.999970582454392e-06, |
| "loss": 0.0728, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006369426751592357, |
| "grad_norm": 2.623641566468802, |
| "learning_rate": 9.999965475247491e-06, |
| "loss": 0.1052, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.006824385805277525, |
| "grad_norm": 2.1413574998269085, |
| "learning_rate": 9.99995995946599e-06, |
| "loss": 0.0885, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007279344858962694, |
| "grad_norm": 1.4859066724415246, |
| "learning_rate": 9.999954035110342e-06, |
| "loss": 0.0644, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0077343039126478615, |
| "grad_norm": 2.851793157608408, |
| "learning_rate": 9.999947702181027e-06, |
| "loss": 0.1057, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00818926296633303, |
| "grad_norm": 4.693829546662477, |
| "learning_rate": 9.999940960678568e-06, |
| "loss": 0.0867, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.008644222020018199, |
| "grad_norm": 2.2728033563417362, |
| "learning_rate": 9.999933810603513e-06, |
| "loss": 0.0789, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009099181073703366, |
| "grad_norm": 1.6705986173507794, |
| "learning_rate": 9.999926251956447e-06, |
| "loss": 0.0683, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009554140127388535, |
| "grad_norm": 2.187579869114393, |
| "learning_rate": 9.999918284737986e-06, |
| "loss": 0.0984, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010009099181073703, |
| "grad_norm": 2.328040268012338, |
| "learning_rate": 9.999909908948782e-06, |
| "loss": 0.0699, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.010464058234758872, |
| "grad_norm": 5.572389775693198, |
| "learning_rate": 9.999901124589519e-06, |
| "loss": 0.0912, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01091901728844404, |
| "grad_norm": 1.84796719674859, |
| "learning_rate": 9.999891931660916e-06, |
| "loss": 0.1015, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.011373976342129208, |
| "grad_norm": 1.7501762990792236, |
| "learning_rate": 9.999882330163725e-06, |
| "loss": 0.0909, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.011828935395814377, |
| "grad_norm": 0.9922115950592263, |
| "learning_rate": 9.999872320098729e-06, |
| "loss": 0.0656, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.012283894449499545, |
| "grad_norm": 1.5612370560987539, |
| "learning_rate": 9.999861901466746e-06, |
| "loss": 0.0974, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.012738853503184714, |
| "grad_norm": 1.4617271794930395, |
| "learning_rate": 9.999851074268625e-06, |
| "loss": 0.0853, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013193812556869881, |
| "grad_norm": 1.8127085104491556, |
| "learning_rate": 9.999839838505257e-06, |
| "loss": 0.1081, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01364877161055505, |
| "grad_norm": 1.4710105512612208, |
| "learning_rate": 9.999828194177555e-06, |
| "loss": 0.0868, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014103730664240218, |
| "grad_norm": 1.3474487189311888, |
| "learning_rate": 9.999816141286472e-06, |
| "loss": 0.0817, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.014558689717925387, |
| "grad_norm": 1.0967596652549403, |
| "learning_rate": 9.99980367983299e-06, |
| "loss": 0.0637, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015013648771610554, |
| "grad_norm": 3.179425671823194, |
| "learning_rate": 9.999790809818134e-06, |
| "loss": 0.069, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.015468607825295723, |
| "grad_norm": 4.482257681577152, |
| "learning_rate": 9.999777531242951e-06, |
| "loss": 0.0915, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01592356687898089, |
| "grad_norm": 3.953299040475791, |
| "learning_rate": 9.999763844108528e-06, |
| "loss": 0.0562, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01637852593266606, |
| "grad_norm": 1.1127201050382067, |
| "learning_rate": 9.999749748415982e-06, |
| "loss": 0.0556, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01683348498635123, |
| "grad_norm": 79.45756094624792, |
| "learning_rate": 9.999735244166464e-06, |
| "loss": 0.1223, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.017288444040036398, |
| "grad_norm": 2777.9092912017113, |
| "learning_rate": 9.99972033136116e-06, |
| "loss": 0.3211, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.017743403093721567, |
| "grad_norm": 2.5204693177238466, |
| "learning_rate": 9.999705010001291e-06, |
| "loss": 0.0723, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.018198362147406732, |
| "grad_norm": 2.2975907071135655, |
| "learning_rate": 9.999689280088105e-06, |
| "loss": 0.0696, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0186533212010919, |
| "grad_norm": 2.998434349074003, |
| "learning_rate": 9.99967314162289e-06, |
| "loss": 0.083, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01910828025477707, |
| "grad_norm": 3.882239448575704, |
| "learning_rate": 9.999656594606966e-06, |
| "loss": 0.1015, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.019563239308462238, |
| "grad_norm": 3.5286596480512493, |
| "learning_rate": 9.999639639041681e-06, |
| "loss": 0.0817, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.020018198362147407, |
| "grad_norm": 1.6933989447443707, |
| "learning_rate": 9.999622274928424e-06, |
| "loss": 0.1003, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.020473157415832575, |
| "grad_norm": 1.2483160046323276, |
| "learning_rate": 9.999604502268614e-06, |
| "loss": 0.0952, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.020928116469517744, |
| "grad_norm": 0.9417906124383243, |
| "learning_rate": 9.9995863210637e-06, |
| "loss": 0.0731, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.021383075523202913, |
| "grad_norm": 2.8195414757816897, |
| "learning_rate": 9.99956773131517e-06, |
| "loss": 0.1845, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02183803457688808, |
| "grad_norm": 2.74390379471345, |
| "learning_rate": 9.999548733024545e-06, |
| "loss": 0.1826, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.022292993630573247, |
| "grad_norm": 1.5138494619527987, |
| "learning_rate": 9.999529326193373e-06, |
| "loss": 0.0857, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.022747952684258416, |
| "grad_norm": 1.215379974181271, |
| "learning_rate": 9.999509510823242e-06, |
| "loss": 0.0686, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023202911737943584, |
| "grad_norm": 1.292187967807859, |
| "learning_rate": 9.999489286915773e-06, |
| "loss": 0.0707, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.023657870791628753, |
| "grad_norm": 1.7888013203563982, |
| "learning_rate": 9.999468654472614e-06, |
| "loss": 0.0682, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.024112829845313922, |
| "grad_norm": 0.8979425621703144, |
| "learning_rate": 9.999447613495457e-06, |
| "loss": 0.0508, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02456778889899909, |
| "grad_norm": 1.9123835444775663, |
| "learning_rate": 9.99942616398602e-06, |
| "loss": 0.0689, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02502274795268426, |
| "grad_norm": 0.9393581994096443, |
| "learning_rate": 9.99940430594605e-06, |
| "loss": 0.0496, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.025477707006369428, |
| "grad_norm": 1.0234476513644222, |
| "learning_rate": 9.999382039377339e-06, |
| "loss": 0.0601, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.025932666060054597, |
| "grad_norm": 0.9291387208138827, |
| "learning_rate": 9.999359364281704e-06, |
| "loss": 0.0377, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.026387625113739762, |
| "grad_norm": 1.8209170803663992, |
| "learning_rate": 9.999336280660999e-06, |
| "loss": 0.1144, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02684258416742493, |
| "grad_norm": 1.1214625046464874, |
| "learning_rate": 9.99931278851711e-06, |
| "loss": 0.0622, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0272975432211101, |
| "grad_norm": 1.0331723997917317, |
| "learning_rate": 9.999288887851956e-06, |
| "loss": 0.0667, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.027752502274795268, |
| "grad_norm": 1.0412381501406744, |
| "learning_rate": 9.999264578667493e-06, |
| "loss": 0.0566, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.028207461328480437, |
| "grad_norm": 1.4510603110658047, |
| "learning_rate": 9.999239860965703e-06, |
| "loss": 0.0845, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.028662420382165606, |
| "grad_norm": 1.301162540669183, |
| "learning_rate": 9.999214734748609e-06, |
| "loss": 0.0759, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.029117379435850774, |
| "grad_norm": 0.9977688847603402, |
| "learning_rate": 9.999189200018263e-06, |
| "loss": 0.0528, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.029572338489535943, |
| "grad_norm": 1.2894688842348854, |
| "learning_rate": 9.99916325677675e-06, |
| "loss": 0.0899, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03002729754322111, |
| "grad_norm": 1.4627871680702638, |
| "learning_rate": 9.999136905026194e-06, |
| "loss": 0.1456, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.030482256596906277, |
| "grad_norm": 1.2304385710214434, |
| "learning_rate": 9.999110144768745e-06, |
| "loss": 0.079, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.030937215650591446, |
| "grad_norm": 1.085016380732753, |
| "learning_rate": 9.99908297600659e-06, |
| "loss": 0.0696, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03139217470427662, |
| "grad_norm": 0.989450558642297, |
| "learning_rate": 9.99905539874195e-06, |
| "loss": 0.069, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03184713375796178, |
| "grad_norm": 1.0510491151133208, |
| "learning_rate": 9.99902741297708e-06, |
| "loss": 0.0555, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03230209281164695, |
| "grad_norm": 0.8938033562648371, |
| "learning_rate": 9.998999018714264e-06, |
| "loss": 0.0783, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03275705186533212, |
| "grad_norm": 2.902512108322722, |
| "learning_rate": 9.998970215955824e-06, |
| "loss": 0.0702, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.033212010919017286, |
| "grad_norm": 0.7661831894133686, |
| "learning_rate": 9.998941004704113e-06, |
| "loss": 0.0519, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03366696997270246, |
| "grad_norm": 1.1047249497744047, |
| "learning_rate": 9.998911384961518e-06, |
| "loss": 0.0773, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.034121929026387623, |
| "grad_norm": 0.7750047299312716, |
| "learning_rate": 9.998881356730458e-06, |
| "loss": 0.0598, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.034576888080072796, |
| "grad_norm": 0.9815801555720315, |
| "learning_rate": 9.99885092001339e-06, |
| "loss": 0.0661, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03503184713375796, |
| "grad_norm": 1.3090963451351905, |
| "learning_rate": 9.998820074812799e-06, |
| "loss": 0.0713, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03548680618744313, |
| "grad_norm": 1.1489338732270693, |
| "learning_rate": 9.998788821131207e-06, |
| "loss": 0.0946, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0359417652411283, |
| "grad_norm": 0.9040381990998293, |
| "learning_rate": 9.998757158971164e-06, |
| "loss": 0.067, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.036396724294813464, |
| "grad_norm": 1.1019926198229115, |
| "learning_rate": 9.998725088335263e-06, |
| "loss": 0.0874, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.036851683348498636, |
| "grad_norm": 0.5779852750462403, |
| "learning_rate": 9.99869260922612e-06, |
| "loss": 0.0492, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0373066424021838, |
| "grad_norm": 1.2769852710418472, |
| "learning_rate": 9.998659721646393e-06, |
| "loss": 0.0781, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03776160145586897, |
| "grad_norm": 0.9020624084974485, |
| "learning_rate": 9.998626425598766e-06, |
| "loss": 0.0734, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03821656050955414, |
| "grad_norm": 0.9626764462141776, |
| "learning_rate": 9.99859272108596e-06, |
| "loss": 0.0719, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03867151956323931, |
| "grad_norm": 0.9435885887029873, |
| "learning_rate": 9.998558608110733e-06, |
| "loss": 0.0835, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.039126478616924476, |
| "grad_norm": 1.0578725525123687, |
| "learning_rate": 9.998524086675867e-06, |
| "loss": 0.0746, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03958143767060965, |
| "grad_norm": 1.0366588534208079, |
| "learning_rate": 9.998489156784188e-06, |
| "loss": 0.0933, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.040036396724294813, |
| "grad_norm": 1.0595948680723846, |
| "learning_rate": 9.998453818438547e-06, |
| "loss": 0.0846, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04049135577797998, |
| "grad_norm": 0.8807515753016749, |
| "learning_rate": 9.998418071641833e-06, |
| "loss": 0.0649, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04094631483166515, |
| "grad_norm": 0.9034225145874141, |
| "learning_rate": 9.998381916396967e-06, |
| "loss": 0.0621, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.041401273885350316, |
| "grad_norm": 0.6732889821553815, |
| "learning_rate": 9.998345352706901e-06, |
| "loss": 0.0367, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04185623293903549, |
| "grad_norm": 0.7136967603743426, |
| "learning_rate": 9.998308380574628e-06, |
| "loss": 0.0569, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.042311191992720654, |
| "grad_norm": 1.1459385364035048, |
| "learning_rate": 9.998271000003166e-06, |
| "loss": 0.1184, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.042766151046405826, |
| "grad_norm": 0.8224906129097734, |
| "learning_rate": 9.998233210995569e-06, |
| "loss": 0.0682, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04322111010009099, |
| "grad_norm": 1.5182946932236698, |
| "learning_rate": 9.998195013554926e-06, |
| "loss": 0.0875, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04367606915377616, |
| "grad_norm": 0.9355855711018981, |
| "learning_rate": 9.998156407684359e-06, |
| "loss": 0.0939, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04413102820746133, |
| "grad_norm": 0.7329840867165283, |
| "learning_rate": 9.998117393387022e-06, |
| "loss": 0.0466, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.044585987261146494, |
| "grad_norm": 0.8701001036058451, |
| "learning_rate": 9.9980779706661e-06, |
| "loss": 0.0729, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.045040946314831666, |
| "grad_norm": 1.0218896298663185, |
| "learning_rate": 9.99803813952482e-06, |
| "loss": 0.0828, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04549590536851683, |
| "grad_norm": 0.9044995357273884, |
| "learning_rate": 9.997997899966433e-06, |
| "loss": 0.0709, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.045950864422202004, |
| "grad_norm": 0.9877796099816964, |
| "learning_rate": 9.99795725199423e-06, |
| "loss": 0.0903, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04640582347588717, |
| "grad_norm": 1.0061501994463906, |
| "learning_rate": 9.99791619561153e-06, |
| "loss": 0.0831, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04686078252957234, |
| "grad_norm": 0.8789173954818107, |
| "learning_rate": 9.997874730821689e-06, |
| "loss": 0.0714, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.047315741583257506, |
| "grad_norm": 15.480920098194954, |
| "learning_rate": 9.997832857628093e-06, |
| "loss": 0.2603, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04777070063694268, |
| "grad_norm": 1.3806761301603454, |
| "learning_rate": 9.99779057603417e-06, |
| "loss": 0.1227, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.048225659690627844, |
| "grad_norm": 0.8462176607269959, |
| "learning_rate": 9.997747886043368e-06, |
| "loss": 0.0605, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04868061874431301, |
| "grad_norm": 0.7467169847716549, |
| "learning_rate": 9.997704787659179e-06, |
| "loss": 0.0618, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04913557779799818, |
| "grad_norm": 1.5653334818977065, |
| "learning_rate": 9.997661280885125e-06, |
| "loss": 0.1253, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.049590536851683346, |
| "grad_norm": 0.871706038604149, |
| "learning_rate": 9.99761736572476e-06, |
| "loss": 0.0716, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05004549590536852, |
| "grad_norm": 1.1398296008355844, |
| "learning_rate": 9.997573042181672e-06, |
| "loss": 0.0698, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.050500454959053684, |
| "grad_norm": 1.0487992691419916, |
| "learning_rate": 9.997528310259485e-06, |
| "loss": 0.1102, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.050955414012738856, |
| "grad_norm": 0.9112684449646818, |
| "learning_rate": 9.997483169961852e-06, |
| "loss": 0.1032, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05141037306642402, |
| "grad_norm": 0.9418790141923585, |
| "learning_rate": 9.997437621292463e-06, |
| "loss": 0.0771, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.051865332120109194, |
| "grad_norm": 0.7796140692842074, |
| "learning_rate": 9.99739166425504e-06, |
| "loss": 0.0627, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05232029117379436, |
| "grad_norm": 1.5434421216734795, |
| "learning_rate": 9.997345298853339e-06, |
| "loss": 0.1495, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.052775250227479524, |
| "grad_norm": 0.8898179660551836, |
| "learning_rate": 9.997298525091148e-06, |
| "loss": 0.0735, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.053230209281164696, |
| "grad_norm": 0.8585916871524272, |
| "learning_rate": 9.997251342972288e-06, |
| "loss": 0.068, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05368516833484986, |
| "grad_norm": 0.812806800238708, |
| "learning_rate": 9.997203752500616e-06, |
| "loss": 0.0689, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.054140127388535034, |
| "grad_norm": 0.9677722064277628, |
| "learning_rate": 9.997155753680021e-06, |
| "loss": 0.0795, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0545950864422202, |
| "grad_norm": 1.621934591654054, |
| "learning_rate": 9.997107346514425e-06, |
| "loss": 0.0707, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05505004549590537, |
| "grad_norm": 0.6750452750311531, |
| "learning_rate": 9.997058531007782e-06, |
| "loss": 0.0588, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.055505004549590536, |
| "grad_norm": 0.9583870506818666, |
| "learning_rate": 9.997009307164083e-06, |
| "loss": 0.0859, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05595996360327571, |
| "grad_norm": 1.247483970027119, |
| "learning_rate": 9.99695967498735e-06, |
| "loss": 0.0952, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.056414922656960874, |
| "grad_norm": 0.7937903902273558, |
| "learning_rate": 9.996909634481639e-06, |
| "loss": 0.0614, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.05686988171064604, |
| "grad_norm": 4.855426128828546, |
| "learning_rate": 9.996859185651038e-06, |
| "loss": 0.1629, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05732484076433121, |
| "grad_norm": 1.0499970639607177, |
| "learning_rate": 9.99680832849967e-06, |
| "loss": 0.1031, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.05777979981801638, |
| "grad_norm": 0.8730447821488512, |
| "learning_rate": 9.99675706303169e-06, |
| "loss": 0.0606, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05823475887170155, |
| "grad_norm": 1.2779985416162813, |
| "learning_rate": 9.99670538925129e-06, |
| "loss": 0.074, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.058689717925386714, |
| "grad_norm": 0.8606157718419157, |
| "learning_rate": 9.996653307162687e-06, |
| "loss": 0.0703, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.059144676979071886, |
| "grad_norm": 0.8920761218762643, |
| "learning_rate": 9.996600816770144e-06, |
| "loss": 0.0818, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05959963603275705, |
| "grad_norm": 1.1603462045917847, |
| "learning_rate": 9.996547918077944e-06, |
| "loss": 0.1148, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06005459508644222, |
| "grad_norm": 0.9108713801214797, |
| "learning_rate": 9.996494611090414e-06, |
| "loss": 0.0884, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06050955414012739, |
| "grad_norm": 0.6523725468628359, |
| "learning_rate": 9.996440895811907e-06, |
| "loss": 0.0535, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.060964513193812554, |
| "grad_norm": 0.8812777694752004, |
| "learning_rate": 9.996386772246816e-06, |
| "loss": 0.087, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.061419472247497726, |
| "grad_norm": 1.0622191207422995, |
| "learning_rate": 9.99633224039956e-06, |
| "loss": 0.0982, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06187443130118289, |
| "grad_norm": 3.7961077321923025, |
| "learning_rate": 9.996277300274596e-06, |
| "loss": 0.1526, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.062329390354868064, |
| "grad_norm": 0.9444433559435487, |
| "learning_rate": 9.996221951876415e-06, |
| "loss": 0.0996, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06278434940855324, |
| "grad_norm": 1.444871481552235, |
| "learning_rate": 9.996166195209539e-06, |
| "loss": 0.1075, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0632393084622384, |
| "grad_norm": 0.7446446480732116, |
| "learning_rate": 9.996110030278522e-06, |
| "loss": 0.0561, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 0.8913010543094952, |
| "learning_rate": 9.996053457087958e-06, |
| "loss": 0.0715, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06414922656960874, |
| "grad_norm": 0.7815821404043856, |
| "learning_rate": 9.995996475642466e-06, |
| "loss": 0.0796, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0646041856232939, |
| "grad_norm": 0.74337588448595, |
| "learning_rate": 9.995939085946704e-06, |
| "loss": 0.0661, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06505914467697907, |
| "grad_norm": 0.9974255688753435, |
| "learning_rate": 9.995881288005363e-06, |
| "loss": 0.0869, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06551410373066424, |
| "grad_norm": 1.2260290141946268, |
| "learning_rate": 9.995823081823162e-06, |
| "loss": 0.0766, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06596906278434941, |
| "grad_norm": 0.9751795993584637, |
| "learning_rate": 9.99576446740486e-06, |
| "loss": 0.091, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06642402183803457, |
| "grad_norm": 1.6175476325168967, |
| "learning_rate": 9.995705444755249e-06, |
| "loss": 0.1208, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.06687898089171974, |
| "grad_norm": 0.7580083688127299, |
| "learning_rate": 9.995646013879147e-06, |
| "loss": 0.0622, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.06733393994540492, |
| "grad_norm": 1.0194887039793072, |
| "learning_rate": 9.995586174781413e-06, |
| "loss": 0.0753, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.06778889899909009, |
| "grad_norm": 0.9065646408503975, |
| "learning_rate": 9.995525927466936e-06, |
| "loss": 0.0848, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.06824385805277525, |
| "grad_norm": 0.8871078738477127, |
| "learning_rate": 9.995465271940641e-06, |
| "loss": 0.0607, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06869881710646042, |
| "grad_norm": 1.1486707652049646, |
| "learning_rate": 9.995404208207485e-06, |
| "loss": 0.0809, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.06915377616014559, |
| "grad_norm": 1.1473150526096232, |
| "learning_rate": 9.995342736272453e-06, |
| "loss": 0.1035, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06960873521383075, |
| "grad_norm": 1.3025683052462544, |
| "learning_rate": 9.995280856140572e-06, |
| "loss": 0.1197, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07006369426751592, |
| "grad_norm": 0.8069596755970996, |
| "learning_rate": 9.9952185678169e-06, |
| "loss": 0.0526, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0705186533212011, |
| "grad_norm": 0.8153700064848134, |
| "learning_rate": 9.995155871306524e-06, |
| "loss": 0.0613, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07097361237488627, |
| "grad_norm": 0.7319023745966868, |
| "learning_rate": 9.995092766614567e-06, |
| "loss": 0.0512, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 1.0146656175738817, |
| "learning_rate": 9.995029253746186e-06, |
| "loss": 0.0846, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0718835304822566, |
| "grad_norm": 0.8015254985373994, |
| "learning_rate": 9.994965332706574e-06, |
| "loss": 0.0619, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07233848953594177, |
| "grad_norm": 1.0630207312416284, |
| "learning_rate": 9.994901003500952e-06, |
| "loss": 0.0796, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07279344858962693, |
| "grad_norm": 0.9431304991088505, |
| "learning_rate": 9.994836266134575e-06, |
| "loss": 0.0743, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0732484076433121, |
| "grad_norm": 1.023738915097686, |
| "learning_rate": 9.994771120612737e-06, |
| "loss": 0.0888, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07370336669699727, |
| "grad_norm": 0.9272637744585672, |
| "learning_rate": 9.994705566940757e-06, |
| "loss": 0.084, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07415832575068244, |
| "grad_norm": 1.122378326253592, |
| "learning_rate": 9.994639605123994e-06, |
| "loss": 0.0961, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0746132848043676, |
| "grad_norm": 0.753531768411978, |
| "learning_rate": 9.994573235167839e-06, |
| "loss": 0.0736, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07506824385805277, |
| "grad_norm": 0.9314766958597749, |
| "learning_rate": 9.994506457077715e-06, |
| "loss": 0.0838, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07552320291173795, |
| "grad_norm": 0.996008388557059, |
| "learning_rate": 9.994439270859077e-06, |
| "loss": 0.1076, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.07597816196542312, |
| "grad_norm": 0.9199332464612126, |
| "learning_rate": 9.994371676517418e-06, |
| "loss": 0.0724, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.07643312101910828, |
| "grad_norm": 0.8652292283168678, |
| "learning_rate": 9.994303674058259e-06, |
| "loss": 0.0628, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.07688808007279345, |
| "grad_norm": 0.8176262426438138, |
| "learning_rate": 9.994235263487158e-06, |
| "loss": 0.0743, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.07734303912647862, |
| "grad_norm": 0.8147855247941459, |
| "learning_rate": 9.994166444809705e-06, |
| "loss": 0.0559, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07779799818016378, |
| "grad_norm": 0.7853019575635352, |
| "learning_rate": 9.994097218031524e-06, |
| "loss": 0.0681, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.07825295723384895, |
| "grad_norm": 0.8445610480134321, |
| "learning_rate": 9.994027583158272e-06, |
| "loss": 0.0785, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.07870791628753412, |
| "grad_norm": 0.8555498692388026, |
| "learning_rate": 9.993957540195638e-06, |
| "loss": 0.077, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0791628753412193, |
| "grad_norm": 0.8281270493499452, |
| "learning_rate": 9.993887089149346e-06, |
| "loss": 0.0848, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.07961783439490445, |
| "grad_norm": 0.7180425978661062, |
| "learning_rate": 9.993816230025152e-06, |
| "loss": 0.0588, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08007279344858963, |
| "grad_norm": 0.9287545326980071, |
| "learning_rate": 9.99374496282885e-06, |
| "loss": 0.0874, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0805277525022748, |
| "grad_norm": 1.5950603980195528, |
| "learning_rate": 9.993673287566261e-06, |
| "loss": 0.1301, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08098271155595996, |
| "grad_norm": 0.505966633973175, |
| "learning_rate": 9.99360120424324e-06, |
| "loss": 0.0459, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08143767060964513, |
| "grad_norm": 0.6170796905443107, |
| "learning_rate": 9.993528712865681e-06, |
| "loss": 0.0666, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0818926296633303, |
| "grad_norm": 0.8965600572228928, |
| "learning_rate": 9.993455813439507e-06, |
| "loss": 0.0648, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08234758871701547, |
| "grad_norm": 0.7555745664692847, |
| "learning_rate": 9.993382505970673e-06, |
| "loss": 0.0479, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08280254777070063, |
| "grad_norm": 0.7885826993774436, |
| "learning_rate": 9.99330879046517e-06, |
| "loss": 0.0605, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0832575068243858, |
| "grad_norm": 0.6970911126559147, |
| "learning_rate": 9.993234666929024e-06, |
| "loss": 0.0545, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08371246587807098, |
| "grad_norm": 0.8281240642020996, |
| "learning_rate": 9.99316013536829e-06, |
| "loss": 0.0651, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08416742493175614, |
| "grad_norm": 0.8497823551734951, |
| "learning_rate": 9.993085195789057e-06, |
| "loss": 0.098, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08462238398544131, |
| "grad_norm": 0.8425278224044996, |
| "learning_rate": 9.993009848197452e-06, |
| "loss": 0.0861, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.08507734303912648, |
| "grad_norm": 0.729342450692031, |
| "learning_rate": 9.992934092599629e-06, |
| "loss": 0.0651, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.08553230209281165, |
| "grad_norm": 0.8810253378927329, |
| "learning_rate": 9.99285792900178e-06, |
| "loss": 0.0995, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.08598726114649681, |
| "grad_norm": 1.0402457083445067, |
| "learning_rate": 9.992781357410131e-06, |
| "loss": 0.1061, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.08644222020018198, |
| "grad_norm": 0.7397036090930822, |
| "learning_rate": 9.992704377830934e-06, |
| "loss": 0.0571, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08689717925386715, |
| "grad_norm": 1.4783630598693296, |
| "learning_rate": 9.992626990270484e-06, |
| "loss": 0.1154, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.08735213830755233, |
| "grad_norm": 1.1100322283473036, |
| "learning_rate": 9.992549194735101e-06, |
| "loss": 0.1179, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.08780709736123748, |
| "grad_norm": 0.5797984556503705, |
| "learning_rate": 9.992470991231144e-06, |
| "loss": 0.0466, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.08826205641492266, |
| "grad_norm": 1.059908713900853, |
| "learning_rate": 9.992392379765005e-06, |
| "loss": 0.0994, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.08871701546860783, |
| "grad_norm": 1.1187885391430794, |
| "learning_rate": 9.992313360343104e-06, |
| "loss": 0.0986, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08917197452229299, |
| "grad_norm": 0.7509441330173129, |
| "learning_rate": 9.992233932971901e-06, |
| "loss": 0.0634, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.08962693357597816, |
| "grad_norm": 0.9426276516690344, |
| "learning_rate": 9.992154097657888e-06, |
| "loss": 0.0857, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09008189262966333, |
| "grad_norm": 0.8754039034503873, |
| "learning_rate": 9.992073854407585e-06, |
| "loss": 0.0881, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0905368516833485, |
| "grad_norm": 2.8697219156120712, |
| "learning_rate": 9.99199320322755e-06, |
| "loss": 0.0851, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09099181073703366, |
| "grad_norm": 0.7429242681646778, |
| "learning_rate": 9.991912144124375e-06, |
| "loss": 0.0729, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09144676979071883, |
| "grad_norm": 1.0552979449251756, |
| "learning_rate": 9.991830677104682e-06, |
| "loss": 0.1066, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09190172884440401, |
| "grad_norm": 0.8812651371324355, |
| "learning_rate": 9.99174880217513e-06, |
| "loss": 0.0732, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09235668789808917, |
| "grad_norm": 1.0755107845413352, |
| "learning_rate": 9.991666519342407e-06, |
| "loss": 0.0977, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09281164695177434, |
| "grad_norm": 0.8925063431256136, |
| "learning_rate": 9.99158382861324e-06, |
| "loss": 0.0904, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09326660600545951, |
| "grad_norm": 0.8190206986922173, |
| "learning_rate": 9.991500729994384e-06, |
| "loss": 0.0729, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09372156505914468, |
| "grad_norm": 0.6635798147425112, |
| "learning_rate": 9.991417223492629e-06, |
| "loss": 0.0631, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09417652411282984, |
| "grad_norm": 1.0314655306023923, |
| "learning_rate": 9.991333309114798e-06, |
| "loss": 0.0852, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.09463148316651501, |
| "grad_norm": 0.8533496857694978, |
| "learning_rate": 9.991248986867753e-06, |
| "loss": 0.0868, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.09508644222020018, |
| "grad_norm": 1.039085255997433, |
| "learning_rate": 9.991164256758378e-06, |
| "loss": 0.095, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.09554140127388536, |
| "grad_norm": 1.1484522866350177, |
| "learning_rate": 9.9910791187936e-06, |
| "loss": 0.1333, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09599636032757052, |
| "grad_norm": 0.8277820800102422, |
| "learning_rate": 9.99099357298038e-06, |
| "loss": 0.0664, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.09645131938125569, |
| "grad_norm": 0.821796111319934, |
| "learning_rate": 9.9909076193257e-06, |
| "loss": 0.083, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.09690627843494086, |
| "grad_norm": 0.9448800546720313, |
| "learning_rate": 9.990821257836589e-06, |
| "loss": 0.0873, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.09736123748862602, |
| "grad_norm": 0.9002810379340489, |
| "learning_rate": 9.990734488520103e-06, |
| "loss": 0.099, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.09781619654231119, |
| "grad_norm": 0.6145149717344348, |
| "learning_rate": 9.990647311383334e-06, |
| "loss": 0.0425, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.09827115559599636, |
| "grad_norm": 1.1377497370761045, |
| "learning_rate": 9.990559726433404e-06, |
| "loss": 0.0903, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.09872611464968153, |
| "grad_norm": 0.8401357673155365, |
| "learning_rate": 9.99047173367747e-06, |
| "loss": 0.0812, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.09918107370336669, |
| "grad_norm": 0.6977882365614015, |
| "learning_rate": 9.990383333122722e-06, |
| "loss": 0.0613, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.09963603275705187, |
| "grad_norm": 0.6751056796776193, |
| "learning_rate": 9.990294524776384e-06, |
| "loss": 0.0636, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10009099181073704, |
| "grad_norm": 0.7973250315161167, |
| "learning_rate": 9.990205308645716e-06, |
| "loss": 0.0655, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1005459508644222, |
| "grad_norm": 0.6494979859380491, |
| "learning_rate": 9.990115684738005e-06, |
| "loss": 0.0461, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10100090991810737, |
| "grad_norm": 0.7863907355652456, |
| "learning_rate": 9.990025653060574e-06, |
| "loss": 0.0881, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10145586897179254, |
| "grad_norm": 1.2756737972223395, |
| "learning_rate": 9.98993521362078e-06, |
| "loss": 0.1102, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10191082802547771, |
| "grad_norm": 1.1992554133605928, |
| "learning_rate": 9.989844366426018e-06, |
| "loss": 0.1147, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10236578707916287, |
| "grad_norm": 0.5034605400337953, |
| "learning_rate": 9.989753111483707e-06, |
| "loss": 0.0462, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10282074613284804, |
| "grad_norm": 0.9881921480518578, |
| "learning_rate": 9.989661448801305e-06, |
| "loss": 0.0848, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10327570518653321, |
| "grad_norm": 0.7581777568438945, |
| "learning_rate": 9.989569378386303e-06, |
| "loss": 0.079, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.10373066424021839, |
| "grad_norm": 0.6464731162067388, |
| "learning_rate": 9.989476900246223e-06, |
| "loss": 0.0617, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.10418562329390355, |
| "grad_norm": 0.8780639185859085, |
| "learning_rate": 9.989384014388624e-06, |
| "loss": 0.086, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.10464058234758872, |
| "grad_norm": 0.6623808171307163, |
| "learning_rate": 9.989290720821095e-06, |
| "loss": 0.0694, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10509554140127389, |
| "grad_norm": 0.721054554263859, |
| "learning_rate": 9.98919701955126e-06, |
| "loss": 0.0735, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.10555050045495905, |
| "grad_norm": 0.7868134014829404, |
| "learning_rate": 9.989102910586776e-06, |
| "loss": 0.0546, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.10600545950864422, |
| "grad_norm": 0.9137158371163484, |
| "learning_rate": 9.989008393935331e-06, |
| "loss": 0.0771, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.10646041856232939, |
| "grad_norm": 0.8326009579593463, |
| "learning_rate": 9.98891346960465e-06, |
| "loss": 0.0667, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.10691537761601456, |
| "grad_norm": 0.6462724580348628, |
| "learning_rate": 9.988818137602494e-06, |
| "loss": 0.0717, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.10737033666969972, |
| "grad_norm": 0.7513725247558808, |
| "learning_rate": 9.988722397936646e-06, |
| "loss": 0.0733, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1078252957233849, |
| "grad_norm": 1.094509848236789, |
| "learning_rate": 9.988626250614932e-06, |
| "loss": 0.1009, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.10828025477707007, |
| "grad_norm": 0.8200579138639758, |
| "learning_rate": 9.98852969564521e-06, |
| "loss": 0.0844, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.10873521383075523, |
| "grad_norm": 0.7417763562196316, |
| "learning_rate": 9.988432733035369e-06, |
| "loss": 0.0611, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1091901728844404, |
| "grad_norm": 0.8476475869820355, |
| "learning_rate": 9.988335362793333e-06, |
| "loss": 0.0863, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10964513193812557, |
| "grad_norm": 0.9998642783878469, |
| "learning_rate": 9.988237584927058e-06, |
| "loss": 0.0909, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11010009099181074, |
| "grad_norm": 1.1689324698997519, |
| "learning_rate": 9.988139399444534e-06, |
| "loss": 0.124, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1105550500454959, |
| "grad_norm": 0.790901332269412, |
| "learning_rate": 9.988040806353786e-06, |
| "loss": 0.0855, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11101000909918107, |
| "grad_norm": 0.8931785977847209, |
| "learning_rate": 9.987941805662869e-06, |
| "loss": 0.1023, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11146496815286625, |
| "grad_norm": 0.7352781929773609, |
| "learning_rate": 9.98784239737987e-06, |
| "loss": 0.0563, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11191992720655142, |
| "grad_norm": 0.7169092611535308, |
| "learning_rate": 9.987742581512919e-06, |
| "loss": 0.0683, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11237488626023658, |
| "grad_norm": 0.6767560569792272, |
| "learning_rate": 9.987642358070167e-06, |
| "loss": 0.0669, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11282984531392175, |
| "grad_norm": 0.8442319805699996, |
| "learning_rate": 9.987541727059805e-06, |
| "loss": 0.0768, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11328480436760692, |
| "grad_norm": 0.7700876798522618, |
| "learning_rate": 9.987440688490058e-06, |
| "loss": 0.0643, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.11373976342129208, |
| "grad_norm": 0.7286087978317647, |
| "learning_rate": 9.98733924236918e-06, |
| "loss": 0.0698, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11419472247497725, |
| "grad_norm": 0.7917355018437868, |
| "learning_rate": 9.98723738870546e-06, |
| "loss": 0.0791, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.11464968152866242, |
| "grad_norm": 1.0469499693242315, |
| "learning_rate": 9.987135127507226e-06, |
| "loss": 0.0761, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1151046405823476, |
| "grad_norm": 0.8361714930383379, |
| "learning_rate": 9.987032458782828e-06, |
| "loss": 0.0789, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.11555959963603275, |
| "grad_norm": 0.5902853873046482, |
| "learning_rate": 9.986929382540662e-06, |
| "loss": 0.0479, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.11601455868971793, |
| "grad_norm": 0.7349436304465384, |
| "learning_rate": 9.986825898789145e-06, |
| "loss": 0.0668, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1164695177434031, |
| "grad_norm": 0.7657107039148755, |
| "learning_rate": 9.986722007536737e-06, |
| "loss": 0.0617, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.11692447679708826, |
| "grad_norm": 0.6450631027744769, |
| "learning_rate": 9.986617708791926e-06, |
| "loss": 0.0679, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.11737943585077343, |
| "grad_norm": 0.6292930010016882, |
| "learning_rate": 9.986513002563236e-06, |
| "loss": 0.0482, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1178343949044586, |
| "grad_norm": 0.8758541343517451, |
| "learning_rate": 9.986407888859221e-06, |
| "loss": 0.0994, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.11828935395814377, |
| "grad_norm": 0.6537445862223847, |
| "learning_rate": 9.986302367688473e-06, |
| "loss": 0.07, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11874431301182893, |
| "grad_norm": 0.8029660816844667, |
| "learning_rate": 9.986196439059613e-06, |
| "loss": 0.0623, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1191992720655141, |
| "grad_norm": 0.7339528606524214, |
| "learning_rate": 9.986090102981297e-06, |
| "loss": 0.0791, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.11965423111919928, |
| "grad_norm": 0.7934112522002073, |
| "learning_rate": 9.985983359462215e-06, |
| "loss": 0.0672, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12010919017288443, |
| "grad_norm": 1.0186962263060808, |
| "learning_rate": 9.98587620851109e-06, |
| "loss": 0.1213, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1205641492265696, |
| "grad_norm": 0.6769843647605545, |
| "learning_rate": 9.985768650136679e-06, |
| "loss": 0.0685, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12101910828025478, |
| "grad_norm": 0.7543020935976431, |
| "learning_rate": 9.985660684347765e-06, |
| "loss": 0.0861, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12147406733393995, |
| "grad_norm": 0.9552124731299731, |
| "learning_rate": 9.985552311153178e-06, |
| "loss": 0.0922, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12192902638762511, |
| "grad_norm": 0.7436699167226903, |
| "learning_rate": 9.985443530561769e-06, |
| "loss": 0.0885, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.12238398544131028, |
| "grad_norm": 1.329058937551934, |
| "learning_rate": 9.98533434258243e-06, |
| "loss": 0.1115, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.12283894449499545, |
| "grad_norm": 0.6835909813818813, |
| "learning_rate": 9.985224747224083e-06, |
| "loss": 0.0586, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12329390354868063, |
| "grad_norm": 1.0733107060854794, |
| "learning_rate": 9.98511474449568e-06, |
| "loss": 0.0811, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.12374886260236578, |
| "grad_norm": 0.5916007278667166, |
| "learning_rate": 9.985004334406215e-06, |
| "loss": 0.0696, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.12420382165605096, |
| "grad_norm": 0.9149357508392912, |
| "learning_rate": 9.984893516964707e-06, |
| "loss": 0.0704, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.12465878070973613, |
| "grad_norm": 1.1634742377762608, |
| "learning_rate": 9.984782292180212e-06, |
| "loss": 0.1178, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1251137397634213, |
| "grad_norm": 0.603957454908005, |
| "learning_rate": 9.98467066006182e-06, |
| "loss": 0.0585, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12556869881710647, |
| "grad_norm": 0.7735087790025026, |
| "learning_rate": 9.984558620618651e-06, |
| "loss": 0.0953, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.12602365787079162, |
| "grad_norm": 1.2570182633873541, |
| "learning_rate": 9.984446173859863e-06, |
| "loss": 0.1353, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1264786169244768, |
| "grad_norm": 0.7275895818672663, |
| "learning_rate": 9.984333319794642e-06, |
| "loss": 0.0774, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.12693357597816196, |
| "grad_norm": 0.6395006056363333, |
| "learning_rate": 9.984220058432212e-06, |
| "loss": 0.0591, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 0.6563921850032347, |
| "learning_rate": 9.984106389781828e-06, |
| "loss": 0.0573, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1278434940855323, |
| "grad_norm": 0.9399157526953884, |
| "learning_rate": 9.983992313852776e-06, |
| "loss": 0.0793, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.12829845313921748, |
| "grad_norm": 0.93528061821534, |
| "learning_rate": 9.983877830654381e-06, |
| "loss": 0.0807, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.12875341219290265, |
| "grad_norm": 0.7192448233352142, |
| "learning_rate": 9.983762940195996e-06, |
| "loss": 0.0773, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1292083712465878, |
| "grad_norm": 0.7097381072031733, |
| "learning_rate": 9.98364764248701e-06, |
| "loss": 0.0698, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.12966333030027297, |
| "grad_norm": 1.1635566012920768, |
| "learning_rate": 9.983531937536844e-06, |
| "loss": 0.0893, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13011828935395814, |
| "grad_norm": 0.8456555685011555, |
| "learning_rate": 9.983415825354954e-06, |
| "loss": 0.0628, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1305732484076433, |
| "grad_norm": 0.7151838393189083, |
| "learning_rate": 9.983299305950828e-06, |
| "loss": 0.0557, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13102820746132848, |
| "grad_norm": 0.7095193783870621, |
| "learning_rate": 9.983182379333989e-06, |
| "loss": 0.0604, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13148316651501366, |
| "grad_norm": 0.8581434444337498, |
| "learning_rate": 9.983065045513986e-06, |
| "loss": 0.0781, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13193812556869883, |
| "grad_norm": 0.5600994934804626, |
| "learning_rate": 9.982947304500414e-06, |
| "loss": 0.0498, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13239308462238397, |
| "grad_norm": 0.7355720212694087, |
| "learning_rate": 9.98282915630289e-06, |
| "loss": 0.0692, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.13284804367606914, |
| "grad_norm": 1.6846985851500909, |
| "learning_rate": 9.98271060093107e-06, |
| "loss": 0.1687, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.13330300272975432, |
| "grad_norm": 0.7959406174268434, |
| "learning_rate": 9.98259163839464e-06, |
| "loss": 0.0718, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1337579617834395, |
| "grad_norm": 0.6005858848115938, |
| "learning_rate": 9.982472268703323e-06, |
| "loss": 0.0465, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.13421292083712466, |
| "grad_norm": 0.7865103977061746, |
| "learning_rate": 9.982352491866874e-06, |
| "loss": 0.071, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13466787989080983, |
| "grad_norm": 0.7167219429964851, |
| "learning_rate": 9.982232307895077e-06, |
| "loss": 0.0658, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.135122838944495, |
| "grad_norm": 1.206398567596641, |
| "learning_rate": 9.982111716797758e-06, |
| "loss": 0.101, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.13557779799818018, |
| "grad_norm": 1.0085912508470862, |
| "learning_rate": 9.981990718584768e-06, |
| "loss": 0.0959, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.13603275705186532, |
| "grad_norm": 0.8594135430057543, |
| "learning_rate": 9.981869313265995e-06, |
| "loss": 0.0912, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1364877161055505, |
| "grad_norm": 0.9903339586980618, |
| "learning_rate": 9.981747500851357e-06, |
| "loss": 0.0692, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13694267515923567, |
| "grad_norm": 0.7623380548666351, |
| "learning_rate": 9.981625281350812e-06, |
| "loss": 0.0699, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.13739763421292084, |
| "grad_norm": 0.6267143484055344, |
| "learning_rate": 9.981502654774349e-06, |
| "loss": 0.0499, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.137852593266606, |
| "grad_norm": 0.8234150836820757, |
| "learning_rate": 9.98137962113198e-06, |
| "loss": 0.0788, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.13830755232029118, |
| "grad_norm": 0.8158733102806115, |
| "learning_rate": 9.98125618043377e-06, |
| "loss": 0.089, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.13876251137397635, |
| "grad_norm": 0.6372656549463032, |
| "learning_rate": 9.981132332689796e-06, |
| "loss": 0.0517, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1392174704276615, |
| "grad_norm": 0.7713863813548327, |
| "learning_rate": 9.981008077910184e-06, |
| "loss": 0.0769, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.13967242948134667, |
| "grad_norm": 0.8883775702857831, |
| "learning_rate": 9.980883416105084e-06, |
| "loss": 0.0828, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14012738853503184, |
| "grad_norm": 0.6490936355626988, |
| "learning_rate": 9.980758347284687e-06, |
| "loss": 0.0618, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14058234758871702, |
| "grad_norm": 0.8359554084586713, |
| "learning_rate": 9.980632871459209e-06, |
| "loss": 0.0714, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1410373066424022, |
| "grad_norm": 0.7373523328454649, |
| "learning_rate": 9.980506988638906e-06, |
| "loss": 0.0836, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14149226569608736, |
| "grad_norm": 0.6644370731485183, |
| "learning_rate": 9.980380698834064e-06, |
| "loss": 0.0777, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.14194722474977253, |
| "grad_norm": 0.870883965477211, |
| "learning_rate": 9.980254002055003e-06, |
| "loss": 0.0847, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.14240218380345768, |
| "grad_norm": 0.6021065409531002, |
| "learning_rate": 9.980126898312074e-06, |
| "loss": 0.0583, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.8705461588189498, |
| "learning_rate": 9.979999387615665e-06, |
| "loss": 0.0895, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.14331210191082802, |
| "grad_norm": 0.9639410731114018, |
| "learning_rate": 9.979871469976197e-06, |
| "loss": 0.0901, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1437670609645132, |
| "grad_norm": 0.7554126383153169, |
| "learning_rate": 9.97974314540412e-06, |
| "loss": 0.0699, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.14422202001819837, |
| "grad_norm": 1.1039648440512544, |
| "learning_rate": 9.979614413909922e-06, |
| "loss": 0.1013, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.14467697907188354, |
| "grad_norm": 0.5258831871743486, |
| "learning_rate": 9.979485275504121e-06, |
| "loss": 0.0544, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1451319381255687, |
| "grad_norm": 1.3025897394440575, |
| "learning_rate": 9.979355730197271e-06, |
| "loss": 0.1067, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.14558689717925385, |
| "grad_norm": 0.5206132423310033, |
| "learning_rate": 9.979225777999956e-06, |
| "loss": 0.0497, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14604185623293903, |
| "grad_norm": 0.7202189397663867, |
| "learning_rate": 9.9790954189228e-06, |
| "loss": 0.0807, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1464968152866242, |
| "grad_norm": 0.5738667169449175, |
| "learning_rate": 9.97896465297645e-06, |
| "loss": 0.0614, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.14695177434030937, |
| "grad_norm": 0.7972440737628133, |
| "learning_rate": 9.978833480171592e-06, |
| "loss": 0.0906, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.14740673339399454, |
| "grad_norm": 0.7697423454053598, |
| "learning_rate": 9.978701900518947e-06, |
| "loss": 0.0632, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.14786169244767972, |
| "grad_norm": 0.8259885564233931, |
| "learning_rate": 9.978569914029267e-06, |
| "loss": 0.0944, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1483166515013649, |
| "grad_norm": 0.8450006655868962, |
| "learning_rate": 9.978437520713335e-06, |
| "loss": 0.0862, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.14877161055505003, |
| "grad_norm": 0.7746078278616594, |
| "learning_rate": 9.978304720581973e-06, |
| "loss": 0.088, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1492265696087352, |
| "grad_norm": 0.9977734940815816, |
| "learning_rate": 9.97817151364603e-06, |
| "loss": 0.1036, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.14968152866242038, |
| "grad_norm": 0.7800752301510507, |
| "learning_rate": 9.978037899916393e-06, |
| "loss": 0.0778, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15013648771610555, |
| "grad_norm": 0.7521153273438224, |
| "learning_rate": 9.97790387940398e-06, |
| "loss": 0.0532, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15059144676979072, |
| "grad_norm": 0.8046420256419254, |
| "learning_rate": 9.977769452119741e-06, |
| "loss": 0.0708, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1510464058234759, |
| "grad_norm": 0.9071770528791517, |
| "learning_rate": 9.97763461807466e-06, |
| "loss": 0.1006, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.15150136487716107, |
| "grad_norm": 0.8824570234268595, |
| "learning_rate": 9.97749937727976e-06, |
| "loss": 0.0855, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.15195632393084624, |
| "grad_norm": 0.8286075823730068, |
| "learning_rate": 9.977363729746088e-06, |
| "loss": 0.077, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.15241128298453138, |
| "grad_norm": 0.6791233851472963, |
| "learning_rate": 9.977227675484729e-06, |
| "loss": 0.0698, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15286624203821655, |
| "grad_norm": 0.9813875260679181, |
| "learning_rate": 9.977091214506803e-06, |
| "loss": 0.0838, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.15332120109190173, |
| "grad_norm": 0.9986284190120469, |
| "learning_rate": 9.976954346823456e-06, |
| "loss": 0.0789, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1537761601455869, |
| "grad_norm": 0.6456071732838817, |
| "learning_rate": 9.976817072445878e-06, |
| "loss": 0.0566, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.15423111919927207, |
| "grad_norm": 0.7707362352402762, |
| "learning_rate": 9.976679391385283e-06, |
| "loss": 0.0677, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.15468607825295724, |
| "grad_norm": 0.5804713825378958, |
| "learning_rate": 9.976541303652923e-06, |
| "loss": 0.0547, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15514103730664242, |
| "grad_norm": 0.7705377953828665, |
| "learning_rate": 9.976402809260083e-06, |
| "loss": 0.0673, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.15559599636032756, |
| "grad_norm": 0.651002355082985, |
| "learning_rate": 9.976263908218076e-06, |
| "loss": 0.066, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.15605095541401273, |
| "grad_norm": 1.0075230687249708, |
| "learning_rate": 9.976124600538257e-06, |
| "loss": 0.1151, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1565059144676979, |
| "grad_norm": 0.7110146200064966, |
| "learning_rate": 9.975984886232006e-06, |
| "loss": 0.0693, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.15696087352138308, |
| "grad_norm": 0.782615076662302, |
| "learning_rate": 9.975844765310743e-06, |
| "loss": 0.071, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.15741583257506825, |
| "grad_norm": 1.091513822496144, |
| "learning_rate": 9.975704237785915e-06, |
| "loss": 0.1277, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.15787079162875342, |
| "grad_norm": 0.8244942271322709, |
| "learning_rate": 9.975563303669006e-06, |
| "loss": 0.092, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1583257506824386, |
| "grad_norm": 1.0997264747524325, |
| "learning_rate": 9.975421962971536e-06, |
| "loss": 0.102, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.15878070973612374, |
| "grad_norm": 1.0471722358260585, |
| "learning_rate": 9.97528021570505e-06, |
| "loss": 0.1112, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1592356687898089, |
| "grad_norm": 0.6366013160292697, |
| "learning_rate": 9.975138061881135e-06, |
| "loss": 0.0629, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15969062784349408, |
| "grad_norm": 0.7145502784859615, |
| "learning_rate": 9.974995501511404e-06, |
| "loss": 0.0567, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16014558689717925, |
| "grad_norm": 1.0825694007542435, |
| "learning_rate": 9.974852534607506e-06, |
| "loss": 0.0897, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.16060054595086443, |
| "grad_norm": 0.8874195306329471, |
| "learning_rate": 9.974709161181126e-06, |
| "loss": 0.0879, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1610555050045496, |
| "grad_norm": 0.8193025449594961, |
| "learning_rate": 9.974565381243982e-06, |
| "loss": 0.0969, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.16151046405823477, |
| "grad_norm": 0.76528422131405, |
| "learning_rate": 9.974421194807815e-06, |
| "loss": 0.0786, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16196542311191992, |
| "grad_norm": 0.8836543328533641, |
| "learning_rate": 9.974276601884416e-06, |
| "loss": 0.0744, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1624203821656051, |
| "grad_norm": 0.7482952108426273, |
| "learning_rate": 9.974131602485596e-06, |
| "loss": 0.0772, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.16287534121929026, |
| "grad_norm": 0.9122723647083647, |
| "learning_rate": 9.973986196623203e-06, |
| "loss": 0.0851, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.16333030027297543, |
| "grad_norm": 0.8373653902978805, |
| "learning_rate": 9.973840384309121e-06, |
| "loss": 0.0865, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1637852593266606, |
| "grad_norm": 0.6360069343077157, |
| "learning_rate": 9.973694165555264e-06, |
| "loss": 0.0618, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16424021838034578, |
| "grad_norm": 0.7967304456611868, |
| "learning_rate": 9.973547540373582e-06, |
| "loss": 0.0865, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.16469517743403095, |
| "grad_norm": 1.1699452577832765, |
| "learning_rate": 9.973400508776054e-06, |
| "loss": 0.1144, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1651501364877161, |
| "grad_norm": 0.6282867599706373, |
| "learning_rate": 9.973253070774698e-06, |
| "loss": 0.0633, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.16560509554140126, |
| "grad_norm": 0.79942272506218, |
| "learning_rate": 9.973105226381559e-06, |
| "loss": 0.069, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.16606005459508644, |
| "grad_norm": 0.9348674828410355, |
| "learning_rate": 9.972956975608719e-06, |
| "loss": 0.1019, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1665150136487716, |
| "grad_norm": 1.0942665884463076, |
| "learning_rate": 9.972808318468292e-06, |
| "loss": 0.0859, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.16696997270245678, |
| "grad_norm": 0.6283579225277517, |
| "learning_rate": 9.972659254972426e-06, |
| "loss": 0.0589, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.16742493175614195, |
| "grad_norm": 1.0989677054167046, |
| "learning_rate": 9.972509785133304e-06, |
| "loss": 0.1081, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.16787989080982713, |
| "grad_norm": 0.7310198219540203, |
| "learning_rate": 9.972359908963137e-06, |
| "loss": 0.0675, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.16833484986351227, |
| "grad_norm": 0.757671629194488, |
| "learning_rate": 9.972209626474172e-06, |
| "loss": 0.0734, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16878980891719744, |
| "grad_norm": 0.7966175159886519, |
| "learning_rate": 9.972058937678692e-06, |
| "loss": 0.075, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.16924476797088261, |
| "grad_norm": 0.9805514159267839, |
| "learning_rate": 9.97190784258901e-06, |
| "loss": 0.1071, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1696997270245678, |
| "grad_norm": 0.7000612574442994, |
| "learning_rate": 9.971756341217471e-06, |
| "loss": 0.0526, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.17015468607825296, |
| "grad_norm": 0.7917466702374949, |
| "learning_rate": 9.971604433576456e-06, |
| "loss": 0.0698, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.17060964513193813, |
| "grad_norm": 0.8412692631182211, |
| "learning_rate": 9.97145211967838e-06, |
| "loss": 0.0783, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1710646041856233, |
| "grad_norm": 0.5615038895232536, |
| "learning_rate": 9.971299399535685e-06, |
| "loss": 0.053, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.17151956323930848, |
| "grad_norm": 0.6849745369298482, |
| "learning_rate": 9.971146273160854e-06, |
| "loss": 0.0774, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.17197452229299362, |
| "grad_norm": 0.6466596777060115, |
| "learning_rate": 9.9709927405664e-06, |
| "loss": 0.0606, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1724294813466788, |
| "grad_norm": 0.7169884074840761, |
| "learning_rate": 9.970838801764866e-06, |
| "loss": 0.0839, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.17288444040036396, |
| "grad_norm": 0.9393396355410675, |
| "learning_rate": 9.970684456768836e-06, |
| "loss": 0.1132, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17333939945404914, |
| "grad_norm": 12.197098173453568, |
| "learning_rate": 9.970529705590918e-06, |
| "loss": 0.4858, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1737943585077343, |
| "grad_norm": 0.7355841274771772, |
| "learning_rate": 9.97037454824376e-06, |
| "loss": 0.0714, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.17424931756141948, |
| "grad_norm": 1.050385265783733, |
| "learning_rate": 9.97021898474004e-06, |
| "loss": 0.1024, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.17470427661510465, |
| "grad_norm": 0.8612087678995594, |
| "learning_rate": 9.970063015092469e-06, |
| "loss": 0.085, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1751592356687898, |
| "grad_norm": 1.3886472100476919, |
| "learning_rate": 9.969906639313793e-06, |
| "loss": 0.1212, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.17561419472247497, |
| "grad_norm": 0.8238176964814595, |
| "learning_rate": 9.96974985741679e-06, |
| "loss": 0.0721, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.17606915377616014, |
| "grad_norm": 0.8718897735731601, |
| "learning_rate": 9.969592669414272e-06, |
| "loss": 0.0959, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.17652411282984531, |
| "grad_norm": 6.796752422837202, |
| "learning_rate": 9.969435075319083e-06, |
| "loss": 0.115, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1769790718835305, |
| "grad_norm": 0.58176536820322, |
| "learning_rate": 9.969277075144104e-06, |
| "loss": 0.0459, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.17743403093721566, |
| "grad_norm": 0.7267253435076165, |
| "learning_rate": 9.969118668902242e-06, |
| "loss": 0.07, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17788898999090083, |
| "grad_norm": 0.7682389367523258, |
| "learning_rate": 9.968959856606442e-06, |
| "loss": 0.0542, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.17834394904458598, |
| "grad_norm": 0.7873348185837048, |
| "learning_rate": 9.968800638269682e-06, |
| "loss": 0.0598, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.17879890809827115, |
| "grad_norm": 1.287713292390112, |
| "learning_rate": 9.968641013904974e-06, |
| "loss": 0.1442, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.17925386715195632, |
| "grad_norm": 1.085650814952146, |
| "learning_rate": 9.968480983525359e-06, |
| "loss": 0.0926, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1797088262056415, |
| "grad_norm": 0.6716676596759695, |
| "learning_rate": 9.968320547143918e-06, |
| "loss": 0.0767, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.18016378525932666, |
| "grad_norm": 0.8467396807693714, |
| "learning_rate": 9.968159704773757e-06, |
| "loss": 0.0977, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.18061874431301184, |
| "grad_norm": 0.6438855833782786, |
| "learning_rate": 9.967998456428021e-06, |
| "loss": 0.0586, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.181073703366697, |
| "grad_norm": 0.7254140122399564, |
| "learning_rate": 9.967836802119886e-06, |
| "loss": 0.06, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.18152866242038215, |
| "grad_norm": 0.87517545358881, |
| "learning_rate": 9.967674741862563e-06, |
| "loss": 0.1016, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.18198362147406733, |
| "grad_norm": 1.0624206936058178, |
| "learning_rate": 9.967512275669294e-06, |
| "loss": 0.1296, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1824385805277525, |
| "grad_norm": 1.0284720738314184, |
| "learning_rate": 9.967349403553353e-06, |
| "loss": 0.0862, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.18289353958143767, |
| "grad_norm": 0.8342932737384292, |
| "learning_rate": 9.967186125528053e-06, |
| "loss": 0.0873, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.18334849863512284, |
| "grad_norm": 1.543095569701571, |
| "learning_rate": 9.967022441606734e-06, |
| "loss": 0.1209, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.18380345768880801, |
| "grad_norm": 0.70731586616612, |
| "learning_rate": 9.966858351802773e-06, |
| "loss": 0.0726, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1842584167424932, |
| "grad_norm": 0.6660531988680356, |
| "learning_rate": 9.966693856129576e-06, |
| "loss": 0.0562, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.18471337579617833, |
| "grad_norm": 0.8503640969928286, |
| "learning_rate": 9.966528954600587e-06, |
| "loss": 0.0838, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1851683348498635, |
| "grad_norm": 0.6021534124846688, |
| "learning_rate": 9.96636364722928e-06, |
| "loss": 0.0673, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.18562329390354868, |
| "grad_norm": 0.8782816795828058, |
| "learning_rate": 9.966197934029165e-06, |
| "loss": 0.0845, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.18607825295723385, |
| "grad_norm": 0.9030990654346936, |
| "learning_rate": 9.966031815013781e-06, |
| "loss": 0.0839, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.18653321201091902, |
| "grad_norm": 0.8567507299712805, |
| "learning_rate": 9.965865290196703e-06, |
| "loss": 0.0935, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1869881710646042, |
| "grad_norm": 0.8099856489670021, |
| "learning_rate": 9.96569835959154e-06, |
| "loss": 0.0747, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.18744313011828936, |
| "grad_norm": 0.8938878675243255, |
| "learning_rate": 9.965531023211931e-06, |
| "loss": 0.0854, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.18789808917197454, |
| "grad_norm": 0.735313860104022, |
| "learning_rate": 9.965363281071551e-06, |
| "loss": 0.0865, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.18835304822565968, |
| "grad_norm": 0.5495229598132649, |
| "learning_rate": 9.965195133184108e-06, |
| "loss": 0.0403, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.18880800727934485, |
| "grad_norm": 1.0700416713113117, |
| "learning_rate": 9.965026579563342e-06, |
| "loss": 0.1086, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.18926296633303002, |
| "grad_norm": 0.7118653717355078, |
| "learning_rate": 9.964857620223024e-06, |
| "loss": 0.0691, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1897179253867152, |
| "grad_norm": 0.6871481686027417, |
| "learning_rate": 9.964688255176963e-06, |
| "loss": 0.0667, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.19017288444040037, |
| "grad_norm": 0.9848841869658392, |
| "learning_rate": 9.964518484438998e-06, |
| "loss": 0.0813, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.19062784349408554, |
| "grad_norm": 0.6311750922074311, |
| "learning_rate": 9.964348308023001e-06, |
| "loss": 0.0592, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 0.7813168734245782, |
| "learning_rate": 9.964177725942881e-06, |
| "loss": 0.0826, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19153776160145586, |
| "grad_norm": 0.8572110622332836, |
| "learning_rate": 9.964006738212574e-06, |
| "loss": 0.0853, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.19199272065514103, |
| "grad_norm": 0.5304433423014596, |
| "learning_rate": 9.963835344846056e-06, |
| "loss": 0.048, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1924476797088262, |
| "grad_norm": 0.7598521228122416, |
| "learning_rate": 9.963663545857328e-06, |
| "loss": 0.0757, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.19290263876251137, |
| "grad_norm": 1.1542546683489703, |
| "learning_rate": 9.963491341260432e-06, |
| "loss": 0.104, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.19335759781619655, |
| "grad_norm": 0.7766563582253432, |
| "learning_rate": 9.963318731069437e-06, |
| "loss": 0.0952, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.19381255686988172, |
| "grad_norm": 1.1319194983916299, |
| "learning_rate": 9.96314571529845e-06, |
| "loss": 0.1005, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1942675159235669, |
| "grad_norm": 0.7230559135257585, |
| "learning_rate": 9.962972293961608e-06, |
| "loss": 0.0647, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.19472247497725204, |
| "grad_norm": 0.9863934566369588, |
| "learning_rate": 9.962798467073083e-06, |
| "loss": 0.0763, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1951774340309372, |
| "grad_norm": 0.8259784410005646, |
| "learning_rate": 9.96262423464708e-06, |
| "loss": 0.087, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.19563239308462238, |
| "grad_norm": 0.7987139095182185, |
| "learning_rate": 9.962449596697834e-06, |
| "loss": 0.0671, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19608735213830755, |
| "grad_norm": 1.130208173229934, |
| "learning_rate": 9.962274553239619e-06, |
| "loss": 0.119, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.19654231119199272, |
| "grad_norm": 0.7399696243677417, |
| "learning_rate": 9.962099104286735e-06, |
| "loss": 0.064, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1969972702456779, |
| "grad_norm": 1.156015767405528, |
| "learning_rate": 9.961923249853523e-06, |
| "loss": 0.1102, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.19745222929936307, |
| "grad_norm": 0.972422739757894, |
| "learning_rate": 9.961746989954349e-06, |
| "loss": 0.1093, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1979071883530482, |
| "grad_norm": 0.7766700420403171, |
| "learning_rate": 9.96157032460362e-06, |
| "loss": 0.0655, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.19836214740673339, |
| "grad_norm": 0.7460679115751414, |
| "learning_rate": 9.961393253815767e-06, |
| "loss": 0.0751, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.19881710646041856, |
| "grad_norm": 1.0684214450487566, |
| "learning_rate": 9.961215777605266e-06, |
| "loss": 0.0789, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.19927206551410373, |
| "grad_norm": 0.7683994291392229, |
| "learning_rate": 9.961037895986615e-06, |
| "loss": 0.0849, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1997270245677889, |
| "grad_norm": 0.7270368453251704, |
| "learning_rate": 9.960859608974352e-06, |
| "loss": 0.0779, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.20018198362147407, |
| "grad_norm": 0.701460207303568, |
| "learning_rate": 9.960680916583042e-06, |
| "loss": 0.0639, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20063694267515925, |
| "grad_norm": 0.6784619280926262, |
| "learning_rate": 9.960501818827292e-06, |
| "loss": 0.077, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2010919017288444, |
| "grad_norm": 0.8064075868568972, |
| "learning_rate": 9.960322315721735e-06, |
| "loss": 0.0827, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.20154686078252956, |
| "grad_norm": 0.9155026735417204, |
| "learning_rate": 9.960142407281039e-06, |
| "loss": 0.0841, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.20200181983621474, |
| "grad_norm": 0.6167749294869733, |
| "learning_rate": 9.959962093519904e-06, |
| "loss": 0.054, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2024567788898999, |
| "grad_norm": 0.8127781985331358, |
| "learning_rate": 9.959781374453066e-06, |
| "loss": 0.0751, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.20291173794358508, |
| "grad_norm": 0.98306444688532, |
| "learning_rate": 9.959600250095294e-06, |
| "loss": 0.075, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.20336669699727025, |
| "grad_norm": 0.7982130269360888, |
| "learning_rate": 9.959418720461384e-06, |
| "loss": 0.0834, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.20382165605095542, |
| "grad_norm": 0.7862225023823932, |
| "learning_rate": 9.959236785566175e-06, |
| "loss": 0.0704, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.20427661510464057, |
| "grad_norm": 0.562107514296544, |
| "learning_rate": 9.959054445424532e-06, |
| "loss": 0.0644, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.20473157415832574, |
| "grad_norm": 0.6089607791855781, |
| "learning_rate": 9.958871700051353e-06, |
| "loss": 0.0512, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2051865332120109, |
| "grad_norm": 0.6962095067981563, |
| "learning_rate": 9.958688549461573e-06, |
| "loss": 0.0712, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.20564149226569609, |
| "grad_norm": 1.155217046291275, |
| "learning_rate": 9.958504993670158e-06, |
| "loss": 0.1049, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.20609645131938126, |
| "grad_norm": 1.0913314226134752, |
| "learning_rate": 9.958321032692107e-06, |
| "loss": 0.1226, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.20655141037306643, |
| "grad_norm": 22.735025633907238, |
| "learning_rate": 9.958136666542455e-06, |
| "loss": 0.8419, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2070063694267516, |
| "grad_norm": 1.184019553325164, |
| "learning_rate": 9.957951895236262e-06, |
| "loss": 0.1113, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.20746132848043677, |
| "grad_norm": 0.7664792046331882, |
| "learning_rate": 9.957766718788632e-06, |
| "loss": 0.104, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.20791628753412192, |
| "grad_norm": 0.8672883026786035, |
| "learning_rate": 9.957581137214695e-06, |
| "loss": 0.074, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2083712465878071, |
| "grad_norm": 0.8772220264781722, |
| "learning_rate": 9.957395150529615e-06, |
| "loss": 0.0986, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.20882620564149226, |
| "grad_norm": 0.7016331971826193, |
| "learning_rate": 9.95720875874859e-06, |
| "loss": 0.0752, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.20928116469517744, |
| "grad_norm": 0.6308822051977305, |
| "learning_rate": 9.957021961886855e-06, |
| "loss": 0.0608, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2097361237488626, |
| "grad_norm": 0.9803601042372939, |
| "learning_rate": 9.956834759959669e-06, |
| "loss": 0.0908, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.21019108280254778, |
| "grad_norm": 0.7674462109758159, |
| "learning_rate": 9.95664715298233e-06, |
| "loss": 0.074, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.21064604185623295, |
| "grad_norm": 0.7450186566335193, |
| "learning_rate": 9.95645914097017e-06, |
| "loss": 0.0817, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2111010009099181, |
| "grad_norm": 0.7225723661612439, |
| "learning_rate": 9.956270723938553e-06, |
| "loss": 0.0849, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.21155595996360327, |
| "grad_norm": 0.7190355211871646, |
| "learning_rate": 9.956081901902875e-06, |
| "loss": 0.0748, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.21201091901728844, |
| "grad_norm": 1.210684562087392, |
| "learning_rate": 9.955892674878565e-06, |
| "loss": 0.1272, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2124658780709736, |
| "grad_norm": 0.834170476650907, |
| "learning_rate": 9.955703042881087e-06, |
| "loss": 0.0992, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.21292083712465878, |
| "grad_norm": 0.874478173291907, |
| "learning_rate": 9.955513005925934e-06, |
| "loss": 0.0858, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.21337579617834396, |
| "grad_norm": 0.5510320150423565, |
| "learning_rate": 9.95532256402864e-06, |
| "loss": 0.0574, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.21383075523202913, |
| "grad_norm": 0.5657171871822584, |
| "learning_rate": 9.955131717204762e-06, |
| "loss": 0.0671, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.7564664653864259, |
| "learning_rate": 9.954940465469898e-06, |
| "loss": 0.085, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.21474067333939945, |
| "grad_norm": 0.7594501005901694, |
| "learning_rate": 9.954748808839675e-06, |
| "loss": 0.0733, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.21519563239308462, |
| "grad_norm": 0.6748092428366178, |
| "learning_rate": 9.954556747329754e-06, |
| "loss": 0.0707, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2156505914467698, |
| "grad_norm": 1.715089789819449, |
| "learning_rate": 9.954364280955832e-06, |
| "loss": 0.1045, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.21610555050045496, |
| "grad_norm": 0.6668751648778155, |
| "learning_rate": 9.954171409733634e-06, |
| "loss": 0.0573, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.21656050955414013, |
| "grad_norm": 0.5963716475430643, |
| "learning_rate": 9.95397813367892e-06, |
| "loss": 0.0752, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2170154686078253, |
| "grad_norm": 0.9917190233932158, |
| "learning_rate": 9.953784452807487e-06, |
| "loss": 0.1049, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.21747042766151045, |
| "grad_norm": 0.5638529401686616, |
| "learning_rate": 9.953590367135159e-06, |
| "loss": 0.0547, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.21792538671519562, |
| "grad_norm": 0.6477110515460727, |
| "learning_rate": 9.953395876677796e-06, |
| "loss": 0.0564, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2183803457688808, |
| "grad_norm": 0.5492055118574499, |
| "learning_rate": 9.95320098145129e-06, |
| "loss": 0.0505, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21883530482256597, |
| "grad_norm": 0.8954528378372288, |
| "learning_rate": 9.95300568147157e-06, |
| "loss": 0.126, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.21929026387625114, |
| "grad_norm": 0.6155736143826033, |
| "learning_rate": 9.952809976754593e-06, |
| "loss": 0.0518, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2197452229299363, |
| "grad_norm": 1.1486004986445648, |
| "learning_rate": 9.952613867316351e-06, |
| "loss": 0.1142, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.22020018198362148, |
| "grad_norm": 0.8236924325360948, |
| "learning_rate": 9.95241735317287e-06, |
| "loss": 0.1047, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.22065514103730663, |
| "grad_norm": 0.832372102653505, |
| "learning_rate": 9.952220434340209e-06, |
| "loss": 0.0729, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2211101000909918, |
| "grad_norm": 0.7288716722109786, |
| "learning_rate": 9.952023110834456e-06, |
| "loss": 0.068, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.22156505914467697, |
| "grad_norm": 0.5327254294033283, |
| "learning_rate": 9.951825382671739e-06, |
| "loss": 0.0614, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.22202001819836215, |
| "grad_norm": 0.7204991379763186, |
| "learning_rate": 9.951627249868213e-06, |
| "loss": 0.0666, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.22247497725204732, |
| "grad_norm": 0.7485835393026234, |
| "learning_rate": 9.95142871244007e-06, |
| "loss": 0.068, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2229299363057325, |
| "grad_norm": 0.45602532896445397, |
| "learning_rate": 9.951229770403531e-06, |
| "loss": 0.0414, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22338489535941766, |
| "grad_norm": 0.7240661348572547, |
| "learning_rate": 9.951030423774858e-06, |
| "loss": 0.0798, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.22383985441310283, |
| "grad_norm": 0.7716352477687572, |
| "learning_rate": 9.950830672570337e-06, |
| "loss": 0.071, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.22429481346678798, |
| "grad_norm": 1.22677184750836, |
| "learning_rate": 9.95063051680629e-06, |
| "loss": 0.1373, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.22474977252047315, |
| "grad_norm": 0.7365431233953595, |
| "learning_rate": 9.950429956499074e-06, |
| "loss": 0.0699, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.22520473157415832, |
| "grad_norm": 0.705654951368504, |
| "learning_rate": 9.950228991665078e-06, |
| "loss": 0.0741, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2256596906278435, |
| "grad_norm": 0.8261497906057415, |
| "learning_rate": 9.950027622320724e-06, |
| "loss": 0.0764, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.22611464968152867, |
| "grad_norm": 0.9965395262255518, |
| "learning_rate": 9.949825848482465e-06, |
| "loss": 0.0852, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.22656960873521384, |
| "grad_norm": 0.6807161957389707, |
| "learning_rate": 9.949623670166794e-06, |
| "loss": 0.074, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.227024567788899, |
| "grad_norm": 1.1216390709095547, |
| "learning_rate": 9.949421087390228e-06, |
| "loss": 0.0931, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.22747952684258416, |
| "grad_norm": 1.1278655216416786, |
| "learning_rate": 9.949218100169322e-06, |
| "loss": 0.1177, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22793448589626933, |
| "grad_norm": 0.9160591457448575, |
| "learning_rate": 9.949014708520664e-06, |
| "loss": 0.1015, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2283894449499545, |
| "grad_norm": 0.9377363057118697, |
| "learning_rate": 9.948810912460872e-06, |
| "loss": 0.1059, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.22884440400363967, |
| "grad_norm": 0.8760932101779023, |
| "learning_rate": 9.948606712006601e-06, |
| "loss": 0.0812, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.22929936305732485, |
| "grad_norm": 0.6962605051289937, |
| "learning_rate": 9.948402107174537e-06, |
| "loss": 0.0735, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.22975432211101002, |
| "grad_norm": 0.6501265713488487, |
| "learning_rate": 9.948197097981401e-06, |
| "loss": 0.0551, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2302092811646952, |
| "grad_norm": 1.2156011775652311, |
| "learning_rate": 9.947991684443942e-06, |
| "loss": 0.1066, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.23066424021838033, |
| "grad_norm": 0.9679794435610901, |
| "learning_rate": 9.947785866578951e-06, |
| "loss": 0.0981, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2311191992720655, |
| "grad_norm": 0.7195724631231237, |
| "learning_rate": 9.94757964440324e-06, |
| "loss": 0.0777, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.23157415832575068, |
| "grad_norm": 0.549427502610929, |
| "learning_rate": 9.947373017933665e-06, |
| "loss": 0.0516, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.23202911737943585, |
| "grad_norm": 0.5667212336170355, |
| "learning_rate": 9.947165987187108e-06, |
| "loss": 0.0583, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23248407643312102, |
| "grad_norm": 0.6638127935874616, |
| "learning_rate": 9.946958552180489e-06, |
| "loss": 0.0723, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2329390354868062, |
| "grad_norm": 0.5226768129517959, |
| "learning_rate": 9.946750712930756e-06, |
| "loss": 0.0482, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.23339399454049137, |
| "grad_norm": 0.8358986518129136, |
| "learning_rate": 9.946542469454894e-06, |
| "loss": 0.1037, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2338489535941765, |
| "grad_norm": 0.6695809647699968, |
| "learning_rate": 9.94633382176992e-06, |
| "loss": 0.0728, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.23430391264786168, |
| "grad_norm": 1.0608546974350634, |
| "learning_rate": 9.946124769892884e-06, |
| "loss": 0.1192, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.23475887170154686, |
| "grad_norm": 0.5090717025630993, |
| "learning_rate": 9.945915313840869e-06, |
| "loss": 0.0612, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.23521383075523203, |
| "grad_norm": 0.8105130307542814, |
| "learning_rate": 9.94570545363099e-06, |
| "loss": 0.0838, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2356687898089172, |
| "grad_norm": 0.7752986876049957, |
| "learning_rate": 9.945495189280394e-06, |
| "loss": 0.092, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.23612374886260237, |
| "grad_norm": 0.869801315379322, |
| "learning_rate": 9.945284520806267e-06, |
| "loss": 0.077, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.23657870791628755, |
| "grad_norm": 0.5427153243822386, |
| "learning_rate": 9.94507344822582e-06, |
| "loss": 0.0592, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2370336669699727, |
| "grad_norm": 0.7368670007832758, |
| "learning_rate": 9.944861971556305e-06, |
| "loss": 0.0608, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.23748862602365786, |
| "grad_norm": 0.8141430793460733, |
| "learning_rate": 9.944650090814998e-06, |
| "loss": 0.0616, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.23794358507734303, |
| "grad_norm": 2.1096588720516425, |
| "learning_rate": 9.944437806019216e-06, |
| "loss": 0.0938, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2383985441310282, |
| "grad_norm": 0.7014907085161215, |
| "learning_rate": 9.944225117186306e-06, |
| "loss": 0.0812, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.23885350318471338, |
| "grad_norm": 0.5078467158211916, |
| "learning_rate": 9.944012024333647e-06, |
| "loss": 0.0561, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.23930846223839855, |
| "grad_norm": 0.6379031604907951, |
| "learning_rate": 9.943798527478652e-06, |
| "loss": 0.0678, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.23976342129208372, |
| "grad_norm": 0.799876019099874, |
| "learning_rate": 9.943584626638768e-06, |
| "loss": 0.0914, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.24021838034576887, |
| "grad_norm": 0.6550229607349646, |
| "learning_rate": 9.943370321831474e-06, |
| "loss": 0.0668, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.24067333939945404, |
| "grad_norm": 0.767534839542607, |
| "learning_rate": 9.943155613074279e-06, |
| "loss": 0.0711, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2411282984531392, |
| "grad_norm": 0.7571838990000624, |
| "learning_rate": 9.942940500384733e-06, |
| "loss": 0.0893, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24158325750682438, |
| "grad_norm": 17.807000846945513, |
| "learning_rate": 9.942724983780409e-06, |
| "loss": 0.3419, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.24203821656050956, |
| "grad_norm": 1.2088422410181228, |
| "learning_rate": 9.942509063278922e-06, |
| "loss": 0.1173, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.24249317561419473, |
| "grad_norm": 0.8811842157145667, |
| "learning_rate": 9.942292738897914e-06, |
| "loss": 0.1006, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2429481346678799, |
| "grad_norm": 0.7726281786442553, |
| "learning_rate": 9.942076010655063e-06, |
| "loss": 0.0909, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.24340309372156507, |
| "grad_norm": 0.9942256398778268, |
| "learning_rate": 9.941858878568078e-06, |
| "loss": 0.134, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.24385805277525022, |
| "grad_norm": 1.001596627292525, |
| "learning_rate": 9.941641342654702e-06, |
| "loss": 0.0977, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2443130118289354, |
| "grad_norm": 0.5064863363900076, |
| "learning_rate": 9.941423402932713e-06, |
| "loss": 0.0559, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.24476797088262056, |
| "grad_norm": 0.8589680374278897, |
| "learning_rate": 9.94120505941992e-06, |
| "loss": 0.0992, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.24522292993630573, |
| "grad_norm": 0.7830880681851201, |
| "learning_rate": 9.940986312134162e-06, |
| "loss": 0.0825, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2456778889899909, |
| "grad_norm": 0.5778344550660577, |
| "learning_rate": 9.940767161093316e-06, |
| "loss": 0.0637, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24613284804367608, |
| "grad_norm": 0.8661775200374767, |
| "learning_rate": 9.94054760631529e-06, |
| "loss": 0.0958, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.24658780709736125, |
| "grad_norm": 0.6976226834296251, |
| "learning_rate": 9.940327647818026e-06, |
| "loss": 0.0752, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2470427661510464, |
| "grad_norm": 0.7530160135685138, |
| "learning_rate": 9.940107285619495e-06, |
| "loss": 0.077, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.24749772520473157, |
| "grad_norm": 0.7997106896354084, |
| "learning_rate": 9.939886519737707e-06, |
| "loss": 0.0958, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.24795268425841674, |
| "grad_norm": 0.8918061918047896, |
| "learning_rate": 9.939665350190702e-06, |
| "loss": 0.0822, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2484076433121019, |
| "grad_norm": 0.804115756264787, |
| "learning_rate": 9.93944377699655e-06, |
| "loss": 0.0915, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.24886260236578708, |
| "grad_norm": 0.6234057941022288, |
| "learning_rate": 9.93922180017336e-06, |
| "loss": 0.0672, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.24931756141947226, |
| "grad_norm": 0.8269450754551354, |
| "learning_rate": 9.93899941973927e-06, |
| "loss": 0.1102, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.24977252047315743, |
| "grad_norm": 0.9233841316663005, |
| "learning_rate": 9.93877663571245e-06, |
| "loss": 0.0963, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2502274795268426, |
| "grad_norm": 0.9944861568923805, |
| "learning_rate": 9.938553448111108e-06, |
| "loss": 0.1127, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25068243858052774, |
| "grad_norm": 0.8423641298780182, |
| "learning_rate": 9.938329856953482e-06, |
| "loss": 0.0788, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.25113739763421294, |
| "grad_norm": 0.8124861649110975, |
| "learning_rate": 9.938105862257839e-06, |
| "loss": 0.0831, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2515923566878981, |
| "grad_norm": 0.6612222253979325, |
| "learning_rate": 9.937881464042485e-06, |
| "loss": 0.0703, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.25204731574158323, |
| "grad_norm": 0.854447666921162, |
| "learning_rate": 9.937656662325759e-06, |
| "loss": 0.1074, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.25250227479526843, |
| "grad_norm": 0.74521770368624, |
| "learning_rate": 9.937431457126028e-06, |
| "loss": 0.0777, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2529572338489536, |
| "grad_norm": 0.5044600553216889, |
| "learning_rate": 9.937205848461694e-06, |
| "loss": 0.0482, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2534121929026388, |
| "grad_norm": 1.0949051966397356, |
| "learning_rate": 9.936979836351197e-06, |
| "loss": 0.0945, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2538671519563239, |
| "grad_norm": 1.0332199252594778, |
| "learning_rate": 9.936753420813003e-06, |
| "loss": 0.092, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2543221110100091, |
| "grad_norm": 0.7029577630748303, |
| "learning_rate": 9.936526601865612e-06, |
| "loss": 0.0612, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.25477707006369427, |
| "grad_norm": 0.5251640812064944, |
| "learning_rate": 9.936299379527561e-06, |
| "loss": 0.0569, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2552320291173794, |
| "grad_norm": 0.6689496924283664, |
| "learning_rate": 9.936071753817416e-06, |
| "loss": 0.0831, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2556869881710646, |
| "grad_norm": 0.8094390650978945, |
| "learning_rate": 9.935843724753778e-06, |
| "loss": 0.0897, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.25614194722474976, |
| "grad_norm": 0.9168849457874456, |
| "learning_rate": 9.935615292355283e-06, |
| "loss": 0.1002, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.25659690627843496, |
| "grad_norm": 0.8829987760246157, |
| "learning_rate": 9.935386456640593e-06, |
| "loss": 0.0997, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2570518653321201, |
| "grad_norm": 0.9381858557170412, |
| "learning_rate": 9.93515721762841e-06, |
| "loss": 0.0926, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2575068243858053, |
| "grad_norm": 0.6555630906162114, |
| "learning_rate": 9.934927575337469e-06, |
| "loss": 0.0805, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.25796178343949044, |
| "grad_norm": 0.49897284031908906, |
| "learning_rate": 9.93469752978653e-06, |
| "loss": 0.0545, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2584167424931756, |
| "grad_norm": 0.8528689809178094, |
| "learning_rate": 9.934467080994394e-06, |
| "loss": 0.071, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2588717015468608, |
| "grad_norm": 0.7999188284583189, |
| "learning_rate": 9.934236228979893e-06, |
| "loss": 0.0675, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.25932666060054593, |
| "grad_norm": 0.6603615540899209, |
| "learning_rate": 9.934004973761888e-06, |
| "loss": 0.0584, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.25978161965423113, |
| "grad_norm": 0.907545218090885, |
| "learning_rate": 9.933773315359281e-06, |
| "loss": 0.0912, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2602365787079163, |
| "grad_norm": 1.2225854103436529, |
| "learning_rate": 9.933541253790998e-06, |
| "loss": 0.0996, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.2606915377616015, |
| "grad_norm": 0.821182112953313, |
| "learning_rate": 9.933308789076004e-06, |
| "loss": 0.0886, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2611464968152866, |
| "grad_norm": 0.5608593716975471, |
| "learning_rate": 9.933075921233292e-06, |
| "loss": 0.0597, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.26160145586897177, |
| "grad_norm": 0.977094581221023, |
| "learning_rate": 9.932842650281897e-06, |
| "loss": 0.0796, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.26205641492265697, |
| "grad_norm": 1.0086738407073246, |
| "learning_rate": 9.932608976240875e-06, |
| "loss": 0.1245, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2625113739763421, |
| "grad_norm": 0.7841605184531412, |
| "learning_rate": 9.932374899129323e-06, |
| "loss": 0.0798, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2629663330300273, |
| "grad_norm": 0.6360279282536222, |
| "learning_rate": 9.932140418966369e-06, |
| "loss": 0.0714, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.26342129208371245, |
| "grad_norm": 0.8673569892639119, |
| "learning_rate": 9.931905535771174e-06, |
| "loss": 0.0805, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.26387625113739765, |
| "grad_norm": 1.0489822111787226, |
| "learning_rate": 9.93167024956293e-06, |
| "loss": 0.1046, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2643312101910828, |
| "grad_norm": 0.5670611684906575, |
| "learning_rate": 9.931434560360864e-06, |
| "loss": 0.0662, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.26478616924476794, |
| "grad_norm": 0.6786486717931198, |
| "learning_rate": 9.931198468184236e-06, |
| "loss": 0.0705, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.26524112829845314, |
| "grad_norm": 0.7580601459978998, |
| "learning_rate": 9.93096197305234e-06, |
| "loss": 0.0852, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2656960873521383, |
| "grad_norm": 0.8802141056853473, |
| "learning_rate": 9.930725074984498e-06, |
| "loss": 0.0989, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2661510464058235, |
| "grad_norm": 0.6365186853726369, |
| "learning_rate": 9.930487774000071e-06, |
| "loss": 0.0639, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.26660600545950863, |
| "grad_norm": 0.5301331320559389, |
| "learning_rate": 9.930250070118448e-06, |
| "loss": 0.0628, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.26706096451319383, |
| "grad_norm": 0.6982626314754508, |
| "learning_rate": 9.930011963359055e-06, |
| "loss": 0.071, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.267515923566879, |
| "grad_norm": 1.0151988128038116, |
| "learning_rate": 9.929773453741346e-06, |
| "loss": 0.1074, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2679708826205642, |
| "grad_norm": 0.809050548171497, |
| "learning_rate": 9.929534541284814e-06, |
| "loss": 0.0715, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2684258416742493, |
| "grad_norm": 0.8254901916718546, |
| "learning_rate": 9.929295226008981e-06, |
| "loss": 0.0867, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.26888080072793447, |
| "grad_norm": 0.695875393623419, |
| "learning_rate": 9.929055507933403e-06, |
| "loss": 0.0667, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.26933575978161967, |
| "grad_norm": 0.6569370607259161, |
| "learning_rate": 9.928815387077668e-06, |
| "loss": 0.0667, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2697907188353048, |
| "grad_norm": 0.8509989554819866, |
| "learning_rate": 9.9285748634614e-06, |
| "loss": 0.0964, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.27024567788899, |
| "grad_norm": 0.7743154017799978, |
| "learning_rate": 9.928333937104249e-06, |
| "loss": 0.1008, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.27070063694267515, |
| "grad_norm": 0.6810806452813069, |
| "learning_rate": 9.928092608025905e-06, |
| "loss": 0.0623, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.27115559599636035, |
| "grad_norm": 0.6757764847225584, |
| "learning_rate": 9.927850876246087e-06, |
| "loss": 0.0621, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2716105550500455, |
| "grad_norm": 0.7561897396028232, |
| "learning_rate": 9.927608741784551e-06, |
| "loss": 0.0769, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.27206551410373064, |
| "grad_norm": 0.9087608421567758, |
| "learning_rate": 9.927366204661081e-06, |
| "loss": 0.1064, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.27252047315741584, |
| "grad_norm": 0.6090969825991095, |
| "learning_rate": 9.927123264895497e-06, |
| "loss": 0.0596, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.272975432211101, |
| "grad_norm": 0.5838273869575724, |
| "learning_rate": 9.926879922507651e-06, |
| "loss": 0.0581, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2734303912647862, |
| "grad_norm": 41.16319851924577, |
| "learning_rate": 9.926636177517427e-06, |
| "loss": 0.7305, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.27388535031847133, |
| "grad_norm": 0.7159907538362364, |
| "learning_rate": 9.926392029944743e-06, |
| "loss": 0.0655, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.27434030937215653, |
| "grad_norm": 0.6649118967721417, |
| "learning_rate": 9.92614747980955e-06, |
| "loss": 0.0676, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2747952684258417, |
| "grad_norm": 0.6955588874689645, |
| "learning_rate": 9.92590252713183e-06, |
| "loss": 0.0691, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2752502274795268, |
| "grad_norm": 1.0093833512385355, |
| "learning_rate": 9.925657171931603e-06, |
| "loss": 0.0788, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.275705186533212, |
| "grad_norm": 0.7222760734094591, |
| "learning_rate": 9.925411414228913e-06, |
| "loss": 0.0765, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.27616014558689717, |
| "grad_norm": 0.7901083190949632, |
| "learning_rate": 9.925165254043846e-06, |
| "loss": 0.0899, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.27661510464058237, |
| "grad_norm": 0.9417411536264935, |
| "learning_rate": 9.924918691396516e-06, |
| "loss": 0.105, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2770700636942675, |
| "grad_norm": 0.8531576003982281, |
| "learning_rate": 9.924671726307073e-06, |
| "loss": 0.0943, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2775250227479527, |
| "grad_norm": 0.5771833327707789, |
| "learning_rate": 9.924424358795694e-06, |
| "loss": 0.0649, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.27797998180163785, |
| "grad_norm": 0.6804808150530418, |
| "learning_rate": 9.924176588882597e-06, |
| "loss": 0.0591, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.278434940855323, |
| "grad_norm": 0.6916110773643345, |
| "learning_rate": 9.923928416588027e-06, |
| "loss": 0.082, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2788898999090082, |
| "grad_norm": 0.7302341341594485, |
| "learning_rate": 9.923679841932261e-06, |
| "loss": 0.0858, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.27934485896269334, |
| "grad_norm": 0.7190514572276734, |
| "learning_rate": 9.923430864935615e-06, |
| "loss": 0.0658, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.27979981801637854, |
| "grad_norm": 0.6872892360375661, |
| "learning_rate": 9.923181485618432e-06, |
| "loss": 0.0639, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2802547770700637, |
| "grad_norm": 0.6937876338258171, |
| "learning_rate": 9.92293170400109e-06, |
| "loss": 0.0759, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2807097361237489, |
| "grad_norm": 0.8498928251372749, |
| "learning_rate": 9.922681520104002e-06, |
| "loss": 0.0777, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.28116469517743403, |
| "grad_norm": 0.7409609990217324, |
| "learning_rate": 9.922430933947612e-06, |
| "loss": 0.0665, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2816196542311192, |
| "grad_norm": 1.2216942184143182, |
| "learning_rate": 9.922179945552393e-06, |
| "loss": 0.1405, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.2820746132848044, |
| "grad_norm": 0.6637234254274302, |
| "learning_rate": 9.921928554938857e-06, |
| "loss": 0.062, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2825295723384895, |
| "grad_norm": 0.9463087936758936, |
| "learning_rate": 9.921676762127548e-06, |
| "loss": 0.0767, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2829845313921747, |
| "grad_norm": 1.089309305809361, |
| "learning_rate": 9.921424567139042e-06, |
| "loss": 0.1171, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.28343949044585987, |
| "grad_norm": 0.8752119302288704, |
| "learning_rate": 9.921171969993942e-06, |
| "loss": 0.0813, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.28389444949954507, |
| "grad_norm": 0.7870883299373892, |
| "learning_rate": 9.920918970712894e-06, |
| "loss": 0.0993, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2843494085532302, |
| "grad_norm": 0.6504873266789636, |
| "learning_rate": 9.92066556931657e-06, |
| "loss": 0.073, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.28480436760691535, |
| "grad_norm": 1.1098031698420505, |
| "learning_rate": 9.920411765825679e-06, |
| "loss": 0.1218, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.28525932666060055, |
| "grad_norm": 1.217844501512982, |
| "learning_rate": 9.920157560260957e-06, |
| "loss": 0.1549, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.9728161223416268, |
| "learning_rate": 9.919902952643179e-06, |
| "loss": 0.0984, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.2861692447679709, |
| "grad_norm": 0.5217007184455262, |
| "learning_rate": 9.91964794299315e-06, |
| "loss": 0.0636, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.28662420382165604, |
| "grad_norm": 1.7394407973312302, |
| "learning_rate": 9.919392531331706e-06, |
| "loss": 0.1686, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.28707916287534124, |
| "grad_norm": 0.5702940927618096, |
| "learning_rate": 9.919136717679723e-06, |
| "loss": 0.0465, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2875341219290264, |
| "grad_norm": 0.5990973378462472, |
| "learning_rate": 9.9188805020581e-06, |
| "loss": 0.0678, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.28798908098271153, |
| "grad_norm": 0.9343816967111115, |
| "learning_rate": 9.918623884487777e-06, |
| "loss": 0.1068, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.28844404003639673, |
| "grad_norm": 0.5997939637509836, |
| "learning_rate": 9.91836686498972e-06, |
| "loss": 0.0629, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2888989990900819, |
| "grad_norm": 0.8063617612610782, |
| "learning_rate": 9.918109443584938e-06, |
| "loss": 0.0904, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2893539581437671, |
| "grad_norm": 0.6625405697250593, |
| "learning_rate": 9.917851620294461e-06, |
| "loss": 0.0638, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2898089171974522, |
| "grad_norm": 0.7423789779714624, |
| "learning_rate": 9.917593395139358e-06, |
| "loss": 0.0714, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.2902638762511374, |
| "grad_norm": 0.6102576569607258, |
| "learning_rate": 9.91733476814073e-06, |
| "loss": 0.0563, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.29071883530482256, |
| "grad_norm": 0.8342620452233175, |
| "learning_rate": 9.91707573931971e-06, |
| "loss": 0.0934, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.2911737943585077, |
| "grad_norm": 0.6397583044633867, |
| "learning_rate": 9.916816308697468e-06, |
| "loss": 0.0608, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2916287534121929, |
| "grad_norm": 0.7837909798874247, |
| "learning_rate": 9.9165564762952e-06, |
| "loss": 0.0936, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.29208371246587805, |
| "grad_norm": 0.9915309549496408, |
| "learning_rate": 9.916296242134142e-06, |
| "loss": 0.1364, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.29253867151956325, |
| "grad_norm": 0.7722166587924495, |
| "learning_rate": 9.916035606235555e-06, |
| "loss": 0.1022, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2929936305732484, |
| "grad_norm": 0.6446192951972597, |
| "learning_rate": 9.915774568620739e-06, |
| "loss": 0.0794, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2934485896269336, |
| "grad_norm": 0.7655996282008942, |
| "learning_rate": 9.915513129311025e-06, |
| "loss": 0.083, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.29390354868061874, |
| "grad_norm": 0.7358761993420325, |
| "learning_rate": 9.915251288327776e-06, |
| "loss": 0.0927, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2943585077343039, |
| "grad_norm": 0.8417441236168001, |
| "learning_rate": 9.914989045692388e-06, |
| "loss": 0.0791, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.2948134667879891, |
| "grad_norm": 0.8847229450668922, |
| "learning_rate": 9.914726401426293e-06, |
| "loss": 0.1114, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.29526842584167423, |
| "grad_norm": 0.6805089048669102, |
| "learning_rate": 9.91446335555095e-06, |
| "loss": 0.0645, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.29572338489535943, |
| "grad_norm": 0.9967907781154212, |
| "learning_rate": 9.914199908087856e-06, |
| "loss": 0.1125, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2961783439490446, |
| "grad_norm": 0.7069764233646496, |
| "learning_rate": 9.913936059058537e-06, |
| "loss": 0.0961, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.2966333030027298, |
| "grad_norm": 0.8237259808163154, |
| "learning_rate": 9.913671808484554e-06, |
| "loss": 0.0863, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2970882620564149, |
| "grad_norm": 0.5595221349609915, |
| "learning_rate": 9.913407156387503e-06, |
| "loss": 0.0477, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.29754322111010006, |
| "grad_norm": 0.8322598543263076, |
| "learning_rate": 9.913142102789005e-06, |
| "loss": 0.0785, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.29799818016378526, |
| "grad_norm": 0.9426946452527044, |
| "learning_rate": 9.912876647710723e-06, |
| "loss": 0.0993, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2984531392174704, |
| "grad_norm": 0.8902481236790349, |
| "learning_rate": 9.912610791174348e-06, |
| "loss": 0.0981, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2989080982711556, |
| "grad_norm": 0.6714333609160019, |
| "learning_rate": 9.912344533201604e-06, |
| "loss": 0.0716, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.29936305732484075, |
| "grad_norm": 0.6721636461789662, |
| "learning_rate": 9.91207787381425e-06, |
| "loss": 0.0675, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.29981801637852595, |
| "grad_norm": 0.628744075340254, |
| "learning_rate": 9.911810813034073e-06, |
| "loss": 0.0583, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3002729754322111, |
| "grad_norm": 0.9172548581720068, |
| "learning_rate": 9.9115433508829e-06, |
| "loss": 0.0972, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.30072793448589624, |
| "grad_norm": 0.914462327674233, |
| "learning_rate": 9.911275487382583e-06, |
| "loss": 0.089, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.30118289353958144, |
| "grad_norm": 0.7410939383575923, |
| "learning_rate": 9.911007222555011e-06, |
| "loss": 0.0744, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.3016378525932666, |
| "grad_norm": 0.6952942958219819, |
| "learning_rate": 9.91073855642211e-06, |
| "loss": 0.0627, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3020928116469518, |
| "grad_norm": 0.8802064643150562, |
| "learning_rate": 9.910469489005828e-06, |
| "loss": 0.0836, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.30254777070063693, |
| "grad_norm": 0.9015922573736656, |
| "learning_rate": 9.910200020328158e-06, |
| "loss": 0.0934, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.30300272975432213, |
| "grad_norm": 0.6635682732023674, |
| "learning_rate": 9.909930150411113e-06, |
| "loss": 0.0623, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3034576888080073, |
| "grad_norm": 1.928152977107998, |
| "learning_rate": 9.909659879276751e-06, |
| "loss": 0.1457, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3039126478616925, |
| "grad_norm": 0.7754006092902415, |
| "learning_rate": 9.909389206947156e-06, |
| "loss": 0.0621, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3043676069153776, |
| "grad_norm": 1.0461982822616211, |
| "learning_rate": 9.909118133444444e-06, |
| "loss": 0.1087, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.30482256596906276, |
| "grad_norm": 0.7981897376851527, |
| "learning_rate": 9.90884665879077e-06, |
| "loss": 0.0921, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.30527752502274796, |
| "grad_norm": 0.8941901965354629, |
| "learning_rate": 9.908574783008313e-06, |
| "loss": 0.1055, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3057324840764331, |
| "grad_norm": 1.0219508428898654, |
| "learning_rate": 9.908302506119291e-06, |
| "loss": 0.1152, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3061874431301183, |
| "grad_norm": 0.7623168423299865, |
| "learning_rate": 9.908029828145956e-06, |
| "loss": 0.0837, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.30664240218380345, |
| "grad_norm": 0.7026665400337327, |
| "learning_rate": 9.907756749110587e-06, |
| "loss": 0.0785, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.30709736123748865, |
| "grad_norm": 1.0861630797383492, |
| "learning_rate": 9.9074832690355e-06, |
| "loss": 0.1121, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3075523202911738, |
| "grad_norm": 0.8171913655631801, |
| "learning_rate": 9.907209387943042e-06, |
| "loss": 0.0759, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.30800727934485894, |
| "grad_norm": 0.695009650682766, |
| "learning_rate": 9.906935105855595e-06, |
| "loss": 0.0508, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.30846223839854414, |
| "grad_norm": 1.1629680848047237, |
| "learning_rate": 9.906660422795569e-06, |
| "loss": 0.1123, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3089171974522293, |
| "grad_norm": 1.1028006392582481, |
| "learning_rate": 9.906385338785411e-06, |
| "loss": 0.1048, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3093721565059145, |
| "grad_norm": 0.8590661780887954, |
| "learning_rate": 9.906109853847601e-06, |
| "loss": 0.0947, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.30982711555959963, |
| "grad_norm": 0.9160314729851723, |
| "learning_rate": 9.90583396800465e-06, |
| "loss": 0.0928, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.31028207461328483, |
| "grad_norm": 0.8935511298088069, |
| "learning_rate": 9.9055576812791e-06, |
| "loss": 0.0996, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.31073703366697, |
| "grad_norm": 0.7005723015579258, |
| "learning_rate": 9.905280993693533e-06, |
| "loss": 0.0863, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3111919927206551, |
| "grad_norm": 0.6441434987399284, |
| "learning_rate": 9.905003905270553e-06, |
| "loss": 0.0682, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3116469517743403, |
| "grad_norm": 0.9609160991558658, |
| "learning_rate": 9.904726416032803e-06, |
| "loss": 0.1095, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.31210191082802546, |
| "grad_norm": 0.723787688745946, |
| "learning_rate": 9.904448526002963e-06, |
| "loss": 0.0637, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.31255686988171066, |
| "grad_norm": 0.5250433090776031, |
| "learning_rate": 9.904170235203737e-06, |
| "loss": 0.0587, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.3130118289353958, |
| "grad_norm": 0.8819438583914972, |
| "learning_rate": 9.903891543657866e-06, |
| "loss": 0.1112, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.313466787989081, |
| "grad_norm": 0.5413774773467063, |
| "learning_rate": 9.903612451388122e-06, |
| "loss": 0.0722, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.31392174704276615, |
| "grad_norm": 0.8913097595158456, |
| "learning_rate": 9.903332958417315e-06, |
| "loss": 0.0893, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3143767060964513, |
| "grad_norm": 0.6466979890354269, |
| "learning_rate": 9.903053064768283e-06, |
| "loss": 0.0709, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3148316651501365, |
| "grad_norm": 0.8428101951038133, |
| "learning_rate": 9.902772770463892e-06, |
| "loss": 0.0814, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.31528662420382164, |
| "grad_norm": 0.5832299371816577, |
| "learning_rate": 9.902492075527057e-06, |
| "loss": 0.0597, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.31574158325750684, |
| "grad_norm": 0.7856263020740725, |
| "learning_rate": 9.902210979980705e-06, |
| "loss": 0.074, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.316196542311192, |
| "grad_norm": 0.8507681095680276, |
| "learning_rate": 9.90192948384781e-06, |
| "loss": 0.0941, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3166515013648772, |
| "grad_norm": 0.7777857824270489, |
| "learning_rate": 9.901647587151376e-06, |
| "loss": 0.0708, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.31710646041856233, |
| "grad_norm": 1.068022521735614, |
| "learning_rate": 9.901365289914437e-06, |
| "loss": 0.108, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3175614194722475, |
| "grad_norm": 1.1320770025873614, |
| "learning_rate": 9.901082592160059e-06, |
| "loss": 0.108, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3180163785259327, |
| "grad_norm": 0.803518334023751, |
| "learning_rate": 9.900799493911346e-06, |
| "loss": 0.0871, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3184713375796178, |
| "grad_norm": 0.8188444942805464, |
| "learning_rate": 9.900515995191431e-06, |
| "loss": 0.0808, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.318926296633303, |
| "grad_norm": 0.8993527964087475, |
| "learning_rate": 9.900232096023478e-06, |
| "loss": 0.0821, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.31938125568698816, |
| "grad_norm": 0.5600271316880729, |
| "learning_rate": 9.899947796430687e-06, |
| "loss": 0.0478, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.31983621474067336, |
| "grad_norm": 0.8369718087747545, |
| "learning_rate": 9.899663096436292e-06, |
| "loss": 0.0871, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3202911737943585, |
| "grad_norm": 0.8993771893247359, |
| "learning_rate": 9.899377996063554e-06, |
| "loss": 0.0858, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.32074613284804365, |
| "grad_norm": 0.6615773523414142, |
| "learning_rate": 9.899092495335772e-06, |
| "loss": 0.0601, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.32120109190172885, |
| "grad_norm": 0.8278593900178107, |
| "learning_rate": 9.898806594276273e-06, |
| "loss": 0.0769, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.321656050955414, |
| "grad_norm": 0.7866286577186284, |
| "learning_rate": 9.898520292908425e-06, |
| "loss": 0.0894, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3221110100090992, |
| "grad_norm": 0.8050313615570786, |
| "learning_rate": 9.89823359125562e-06, |
| "loss": 0.0732, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.32256596906278434, |
| "grad_norm": 1.0243914254387991, |
| "learning_rate": 9.897946489341286e-06, |
| "loss": 0.0901, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.32302092811646954, |
| "grad_norm": 0.7036337195424629, |
| "learning_rate": 9.897658987188882e-06, |
| "loss": 0.0686, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3234758871701547, |
| "grad_norm": 0.5593772745397846, |
| "learning_rate": 9.897371084821905e-06, |
| "loss": 0.045, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.32393084622383983, |
| "grad_norm": 0.608867956874154, |
| "learning_rate": 9.897082782263878e-06, |
| "loss": 0.0692, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.32438580527752503, |
| "grad_norm": 0.6488333561840038, |
| "learning_rate": 9.896794079538362e-06, |
| "loss": 0.0513, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3248407643312102, |
| "grad_norm": 0.5593745607285364, |
| "learning_rate": 9.896504976668948e-06, |
| "loss": 0.0437, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3252957233848954, |
| "grad_norm": 0.5072427035814352, |
| "learning_rate": 9.896215473679259e-06, |
| "loss": 0.0566, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3257506824385805, |
| "grad_norm": 0.7088539736923404, |
| "learning_rate": 9.895925570592952e-06, |
| "loss": 0.0878, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3262056414922657, |
| "grad_norm": 0.9653520712469312, |
| "learning_rate": 9.895635267433719e-06, |
| "loss": 0.101, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.32666060054595086, |
| "grad_norm": 1.2323140645024868, |
| "learning_rate": 9.895344564225277e-06, |
| "loss": 0.1359, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.327115559599636, |
| "grad_norm": 0.6826807669546061, |
| "learning_rate": 9.895053460991389e-06, |
| "loss": 0.0799, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3275705186533212, |
| "grad_norm": 0.9496304010026827, |
| "learning_rate": 9.894761957755834e-06, |
| "loss": 0.0928, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.32802547770700635, |
| "grad_norm": 0.8578622125964999, |
| "learning_rate": 9.894470054542438e-06, |
| "loss": 0.1149, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.32848043676069155, |
| "grad_norm": 0.5483719717114235, |
| "learning_rate": 9.894177751375053e-06, |
| "loss": 0.0621, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.3289353958143767, |
| "grad_norm": 0.6341198897869947, |
| "learning_rate": 9.893885048277564e-06, |
| "loss": 0.0568, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.3293903548680619, |
| "grad_norm": 0.7169738278552924, |
| "learning_rate": 9.893591945273888e-06, |
| "loss": 0.0752, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.32984531392174704, |
| "grad_norm": 0.9839905963719277, |
| "learning_rate": 9.89329844238798e-06, |
| "loss": 0.1167, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3303002729754322, |
| "grad_norm": 0.6825969142747964, |
| "learning_rate": 9.89300453964382e-06, |
| "loss": 0.0693, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3307552320291174, |
| "grad_norm": 1.0420794853330364, |
| "learning_rate": 9.892710237065423e-06, |
| "loss": 0.1561, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.33121019108280253, |
| "grad_norm": 1.0109988913697336, |
| "learning_rate": 9.892415534676844e-06, |
| "loss": 0.0813, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.33166515013648773, |
| "grad_norm": 0.6237179977245606, |
| "learning_rate": 9.892120432502161e-06, |
| "loss": 0.063, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3321201091901729, |
| "grad_norm": 0.7047649578988654, |
| "learning_rate": 9.891824930565488e-06, |
| "loss": 0.0757, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3325750682438581, |
| "grad_norm": 0.8381336709785119, |
| "learning_rate": 9.891529028890974e-06, |
| "loss": 0.1137, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3330300272975432, |
| "grad_norm": 1.108812928457643, |
| "learning_rate": 9.891232727502797e-06, |
| "loss": 0.0971, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.33348498635122836, |
| "grad_norm": 0.8911550238765422, |
| "learning_rate": 9.89093602642517e-06, |
| "loss": 0.0869, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.33393994540491356, |
| "grad_norm": 0.7527062298816352, |
| "learning_rate": 9.890638925682339e-06, |
| "loss": 0.085, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3343949044585987, |
| "grad_norm": 0.8028637093759472, |
| "learning_rate": 9.89034142529858e-06, |
| "loss": 0.0866, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3348498635122839, |
| "grad_norm": 0.6620365400447171, |
| "learning_rate": 9.890043525298203e-06, |
| "loss": 0.053, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.33530482256596905, |
| "grad_norm": 0.6606838089782118, |
| "learning_rate": 9.889745225705555e-06, |
| "loss": 0.0783, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.33575978161965425, |
| "grad_norm": 0.6719238881234298, |
| "learning_rate": 9.889446526545007e-06, |
| "loss": 0.079, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3362147406733394, |
| "grad_norm": 0.7379881342173255, |
| "learning_rate": 9.88914742784097e-06, |
| "loss": 0.0848, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.33666969972702454, |
| "grad_norm": 1.9725398231448836, |
| "learning_rate": 9.888847929617887e-06, |
| "loss": 0.1666, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.33712465878070974, |
| "grad_norm": 0.7800667095330575, |
| "learning_rate": 9.888548031900226e-06, |
| "loss": 0.0779, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3375796178343949, |
| "grad_norm": 0.9725198572426639, |
| "learning_rate": 9.888247734712497e-06, |
| "loss": 0.0719, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3380345768880801, |
| "grad_norm": 0.9547104503470986, |
| "learning_rate": 9.887947038079238e-06, |
| "loss": 0.1119, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.33848953594176523, |
| "grad_norm": 0.5879353672489683, |
| "learning_rate": 9.887645942025022e-06, |
| "loss": 0.0553, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.33894449499545043, |
| "grad_norm": 0.5485885922626542, |
| "learning_rate": 9.887344446574452e-06, |
| "loss": 0.0494, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3393994540491356, |
| "grad_norm": 0.9640668269863656, |
| "learning_rate": 9.887042551752163e-06, |
| "loss": 0.1104, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3398544131028208, |
| "grad_norm": 0.8639463935480832, |
| "learning_rate": 9.886740257582827e-06, |
| "loss": 0.0655, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3403093721565059, |
| "grad_norm": 0.6489702107287116, |
| "learning_rate": 9.886437564091148e-06, |
| "loss": 0.0777, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.34076433121019106, |
| "grad_norm": 0.8236523684362178, |
| "learning_rate": 9.886134471301854e-06, |
| "loss": 0.0916, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.34121929026387626, |
| "grad_norm": 0.8459143900125461, |
| "learning_rate": 9.885830979239718e-06, |
| "loss": 0.1017, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3416742493175614, |
| "grad_norm": 0.7496065352262437, |
| "learning_rate": 9.885527087929541e-06, |
| "loss": 0.0861, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.3421292083712466, |
| "grad_norm": 0.849292513666517, |
| "learning_rate": 9.88522279739615e-06, |
| "loss": 0.0839, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.34258416742493175, |
| "grad_norm": 0.7756671663835698, |
| "learning_rate": 9.884918107664417e-06, |
| "loss": 0.0809, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.34303912647861695, |
| "grad_norm": 0.7338987681003677, |
| "learning_rate": 9.884613018759234e-06, |
| "loss": 0.0721, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.3434940855323021, |
| "grad_norm": 0.6003946948163056, |
| "learning_rate": 9.884307530705534e-06, |
| "loss": 0.0782, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.34394904458598724, |
| "grad_norm": 0.5309561440373582, |
| "learning_rate": 9.88400164352828e-06, |
| "loss": 0.0563, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.34440400363967244, |
| "grad_norm": 0.6551261739802692, |
| "learning_rate": 9.883695357252467e-06, |
| "loss": 0.061, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.3448589626933576, |
| "grad_norm": 0.6598139820416582, |
| "learning_rate": 9.883388671903125e-06, |
| "loss": 0.084, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3453139217470428, |
| "grad_norm": 0.8678451615084499, |
| "learning_rate": 9.883081587505315e-06, |
| "loss": 0.0893, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.34576888080072793, |
| "grad_norm": 0.8849976199871086, |
| "learning_rate": 9.882774104084127e-06, |
| "loss": 0.0938, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.34622383985441313, |
| "grad_norm": 0.6157555054475868, |
| "learning_rate": 9.882466221664691e-06, |
| "loss": 0.0535, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.3466787989080983, |
| "grad_norm": 0.9555128068667961, |
| "learning_rate": 9.882157940272165e-06, |
| "loss": 0.0984, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3471337579617834, |
| "grad_norm": 0.8431106213501941, |
| "learning_rate": 9.881849259931738e-06, |
| "loss": 0.1062, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3475887170154686, |
| "grad_norm": 0.6608166650909644, |
| "learning_rate": 9.881540180668637e-06, |
| "loss": 0.0589, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.34804367606915376, |
| "grad_norm": 0.7177237690901401, |
| "learning_rate": 9.881230702508118e-06, |
| "loss": 0.0721, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.34849863512283896, |
| "grad_norm": 0.49396541889218665, |
| "learning_rate": 9.880920825475468e-06, |
| "loss": 0.0582, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3489535941765241, |
| "grad_norm": 0.7008727540015932, |
| "learning_rate": 9.88061054959601e-06, |
| "loss": 0.0689, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3494085532302093, |
| "grad_norm": 0.6417543130209264, |
| "learning_rate": 9.880299874895098e-06, |
| "loss": 0.0859, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.34986351228389445, |
| "grad_norm": 0.5325758158155319, |
| "learning_rate": 9.879988801398121e-06, |
| "loss": 0.0508, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3503184713375796, |
| "grad_norm": 0.653129374155715, |
| "learning_rate": 9.879677329130496e-06, |
| "loss": 0.0822, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3507734303912648, |
| "grad_norm": 0.6044703796770591, |
| "learning_rate": 9.879365458117678e-06, |
| "loss": 0.0662, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.35122838944494994, |
| "grad_norm": 0.6417796330386928, |
| "learning_rate": 9.879053188385148e-06, |
| "loss": 0.0649, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.35168334849863514, |
| "grad_norm": 0.6127493684308597, |
| "learning_rate": 9.878740519958425e-06, |
| "loss": 0.0601, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3521383075523203, |
| "grad_norm": 0.9092296350808027, |
| "learning_rate": 9.878427452863059e-06, |
| "loss": 0.1138, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3525932666060055, |
| "grad_norm": 0.8850379239223551, |
| "learning_rate": 9.878113987124633e-06, |
| "loss": 0.1135, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.35304822565969063, |
| "grad_norm": 0.8106864823035035, |
| "learning_rate": 9.877800122768761e-06, |
| "loss": 0.084, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3535031847133758, |
| "grad_norm": 0.6717791100158048, |
| "learning_rate": 9.877485859821092e-06, |
| "loss": 0.0764, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.353958143767061, |
| "grad_norm": 0.4266356830653338, |
| "learning_rate": 9.877171198307304e-06, |
| "loss": 0.0496, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3544131028207461, |
| "grad_norm": 0.7839112755574695, |
| "learning_rate": 9.87685613825311e-06, |
| "loss": 0.0864, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3548680618744313, |
| "grad_norm": 0.8928629316475961, |
| "learning_rate": 9.876540679684257e-06, |
| "loss": 0.0802, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.35532302092811646, |
| "grad_norm": 0.7427060191976654, |
| "learning_rate": 9.876224822626522e-06, |
| "loss": 0.0809, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.35577797998180166, |
| "grad_norm": 0.6618589317208607, |
| "learning_rate": 9.875908567105716e-06, |
| "loss": 0.0633, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3562329390354868, |
| "grad_norm": 0.9168643329932029, |
| "learning_rate": 9.87559191314768e-06, |
| "loss": 0.0977, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.35668789808917195, |
| "grad_norm": 1.010661772545197, |
| "learning_rate": 9.87527486077829e-06, |
| "loss": 0.112, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.7355960177801563, |
| "learning_rate": 9.874957410023458e-06, |
| "loss": 0.0578, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3575978161965423, |
| "grad_norm": 0.7012046376593928, |
| "learning_rate": 9.874639560909118e-06, |
| "loss": 0.0856, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3580527752502275, |
| "grad_norm": 0.629856671324697, |
| "learning_rate": 9.87432131346125e-06, |
| "loss": 0.079, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.35850773430391264, |
| "grad_norm": 0.6605442679933491, |
| "learning_rate": 9.874002667705855e-06, |
| "loss": 0.0713, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.35896269335759784, |
| "grad_norm": 0.6036439966816435, |
| "learning_rate": 9.873683623668972e-06, |
| "loss": 0.0734, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.359417652411283, |
| "grad_norm": 0.9098464282834562, |
| "learning_rate": 9.873364181376674e-06, |
| "loss": 0.1273, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.35987261146496813, |
| "grad_norm": 0.725232432410699, |
| "learning_rate": 9.873044340855062e-06, |
| "loss": 0.0704, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.36032757051865333, |
| "grad_norm": 0.8275864687946802, |
| "learning_rate": 9.872724102130273e-06, |
| "loss": 0.0722, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3607825295723385, |
| "grad_norm": 0.6908762665090429, |
| "learning_rate": 9.872403465228476e-06, |
| "loss": 0.068, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3612374886260237, |
| "grad_norm": 0.8007479624540592, |
| "learning_rate": 9.872082430175871e-06, |
| "loss": 0.0792, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3616924476797088, |
| "grad_norm": 0.7580697654486878, |
| "learning_rate": 9.871760996998692e-06, |
| "loss": 0.0662, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.362147406733394, |
| "grad_norm": 1.0378802589927232, |
| "learning_rate": 9.871439165723207e-06, |
| "loss": 0.0905, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.36260236578707916, |
| "grad_norm": 0.9366156924362913, |
| "learning_rate": 9.87111693637571e-06, |
| "loss": 0.0966, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3630573248407643, |
| "grad_norm": 0.9568919919938076, |
| "learning_rate": 9.870794308982536e-06, |
| "loss": 0.1092, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3635122838944495, |
| "grad_norm": 1.0303944561108107, |
| "learning_rate": 9.870471283570046e-06, |
| "loss": 0.1214, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.36396724294813465, |
| "grad_norm": 0.7123988620535131, |
| "learning_rate": 9.870147860164639e-06, |
| "loss": 0.0952, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36442220200181985, |
| "grad_norm": 0.6461145025804255, |
| "learning_rate": 9.86982403879274e-06, |
| "loss": 0.0653, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.364877161055505, |
| "grad_norm": 0.761176238728339, |
| "learning_rate": 9.869499819480815e-06, |
| "loss": 0.0911, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3653321201091902, |
| "grad_norm": 0.6778284620896282, |
| "learning_rate": 9.869175202255354e-06, |
| "loss": 0.0726, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.36578707916287534, |
| "grad_norm": 0.6378934869683002, |
| "learning_rate": 9.868850187142885e-06, |
| "loss": 0.0721, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3662420382165605, |
| "grad_norm": 0.725078464245391, |
| "learning_rate": 9.868524774169968e-06, |
| "loss": 0.0774, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3666969972702457, |
| "grad_norm": 0.7707907185217752, |
| "learning_rate": 9.86819896336319e-06, |
| "loss": 0.067, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.36715195632393083, |
| "grad_norm": 0.8162851407409059, |
| "learning_rate": 9.867872754749178e-06, |
| "loss": 0.0908, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.36760691537761603, |
| "grad_norm": 0.5330499489332517, |
| "learning_rate": 9.867546148354586e-06, |
| "loss": 0.066, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.3680618744313012, |
| "grad_norm": 0.6649993383235931, |
| "learning_rate": 9.867219144206105e-06, |
| "loss": 0.0672, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3685168334849864, |
| "grad_norm": 0.9824606570699352, |
| "learning_rate": 9.866891742330458e-06, |
| "loss": 0.11, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3689717925386715, |
| "grad_norm": 0.6507791006697302, |
| "learning_rate": 9.866563942754394e-06, |
| "loss": 0.0622, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.36942675159235666, |
| "grad_norm": 0.7455907568930894, |
| "learning_rate": 9.866235745504705e-06, |
| "loss": 0.0833, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.36988171064604186, |
| "grad_norm": 0.9927293122267482, |
| "learning_rate": 9.865907150608203e-06, |
| "loss": 0.0978, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.370336669699727, |
| "grad_norm": 0.817279180213694, |
| "learning_rate": 9.865578158091746e-06, |
| "loss": 0.1036, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.3707916287534122, |
| "grad_norm": 0.9966504261459711, |
| "learning_rate": 9.865248767982211e-06, |
| "loss": 0.1027, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.37124658780709735, |
| "grad_norm": 0.9561727776097537, |
| "learning_rate": 9.864918980306521e-06, |
| "loss": 0.1136, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.37170154686078255, |
| "grad_norm": 0.6718095123705313, |
| "learning_rate": 9.86458879509162e-06, |
| "loss": 0.0762, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3721565059144677, |
| "grad_norm": 0.9803345299998187, |
| "learning_rate": 9.864258212364492e-06, |
| "loss": 0.0791, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.37261146496815284, |
| "grad_norm": 0.8058679812037255, |
| "learning_rate": 9.86392723215215e-06, |
| "loss": 0.069, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.37306642402183804, |
| "grad_norm": 0.5836160590759203, |
| "learning_rate": 9.86359585448164e-06, |
| "loss": 0.0621, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3735213830755232, |
| "grad_norm": 0.6511599091669776, |
| "learning_rate": 9.863264079380039e-06, |
| "loss": 0.0745, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3739763421292084, |
| "grad_norm": 0.9308266206126162, |
| "learning_rate": 9.862931906874461e-06, |
| "loss": 0.1132, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.37443130118289353, |
| "grad_norm": 0.613775373571284, |
| "learning_rate": 9.862599336992048e-06, |
| "loss": 0.0545, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.37488626023657873, |
| "grad_norm": 0.6991388893487894, |
| "learning_rate": 9.862266369759976e-06, |
| "loss": 0.0754, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.37534121929026387, |
| "grad_norm": 0.6352968005261165, |
| "learning_rate": 9.861933005205454e-06, |
| "loss": 0.0576, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.37579617834394907, |
| "grad_norm": 1.109194467922723, |
| "learning_rate": 9.861599243355725e-06, |
| "loss": 0.1281, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.3762511373976342, |
| "grad_norm": 0.9742134289860664, |
| "learning_rate": 9.86126508423806e-06, |
| "loss": 0.1067, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.37670609645131936, |
| "grad_norm": 0.6015820455914206, |
| "learning_rate": 9.860930527879763e-06, |
| "loss": 0.055, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.37716105550500456, |
| "grad_norm": 1.0894948091440197, |
| "learning_rate": 9.860595574308179e-06, |
| "loss": 0.1147, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.3776160145586897, |
| "grad_norm": 0.7023892750192133, |
| "learning_rate": 9.860260223550672e-06, |
| "loss": 0.0815, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3780709736123749, |
| "grad_norm": 0.4943868719085533, |
| "learning_rate": 9.859924475634649e-06, |
| "loss": 0.0476, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.37852593266606005, |
| "grad_norm": 0.9974648765413693, |
| "learning_rate": 9.859588330587545e-06, |
| "loss": 0.1068, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.37898089171974525, |
| "grad_norm": 0.5960289391531881, |
| "learning_rate": 9.859251788436829e-06, |
| "loss": 0.0715, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3794358507734304, |
| "grad_norm": 0.907079582974149, |
| "learning_rate": 9.85891484921e-06, |
| "loss": 0.0905, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.37989080982711554, |
| "grad_norm": 0.8133034306250352, |
| "learning_rate": 9.858577512934592e-06, |
| "loss": 0.1012, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.38034576888080074, |
| "grad_norm": 0.7828785203637737, |
| "learning_rate": 9.858239779638173e-06, |
| "loss": 0.0726, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.3808007279344859, |
| "grad_norm": 1.3138864597148558, |
| "learning_rate": 9.857901649348338e-06, |
| "loss": 0.1307, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3812556869881711, |
| "grad_norm": 0.7000750227265026, |
| "learning_rate": 9.857563122092717e-06, |
| "loss": 0.0777, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3817106460418562, |
| "grad_norm": 0.757283984575844, |
| "learning_rate": 9.857224197898975e-06, |
| "loss": 0.083, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3821656050955414, |
| "grad_norm": 0.7113754486134378, |
| "learning_rate": 9.856884876794805e-06, |
| "loss": 0.0795, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.38262056414922657, |
| "grad_norm": 0.6891370217065743, |
| "learning_rate": 9.856545158807938e-06, |
| "loss": 0.0576, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3830755232029117, |
| "grad_norm": 0.7230826558764609, |
| "learning_rate": 9.856205043966134e-06, |
| "loss": 0.0973, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3835304822565969, |
| "grad_norm": 0.9951638416419379, |
| "learning_rate": 9.855864532297181e-06, |
| "loss": 0.1225, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.38398544131028206, |
| "grad_norm": 0.8272776971451865, |
| "learning_rate": 9.85552362382891e-06, |
| "loss": 0.0928, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.38444040036396726, |
| "grad_norm": 0.662562460388915, |
| "learning_rate": 9.855182318589174e-06, |
| "loss": 0.0711, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3848953594176524, |
| "grad_norm": 1.185659176011977, |
| "learning_rate": 9.854840616605866e-06, |
| "loss": 0.0922, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.3853503184713376, |
| "grad_norm": 0.7002426118833048, |
| "learning_rate": 9.854498517906908e-06, |
| "loss": 0.0828, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.38580527752502275, |
| "grad_norm": 0.8957633348930525, |
| "learning_rate": 9.854156022520252e-06, |
| "loss": 0.0809, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.3862602365787079, |
| "grad_norm": 1.0593251614278854, |
| "learning_rate": 9.853813130473887e-06, |
| "loss": 0.1109, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3867151956323931, |
| "grad_norm": 0.7751748709357449, |
| "learning_rate": 9.853469841795832e-06, |
| "loss": 0.0823, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.38717015468607824, |
| "grad_norm": 0.5943868690351954, |
| "learning_rate": 9.853126156514142e-06, |
| "loss": 0.0758, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.38762511373976344, |
| "grad_norm": 0.4901349757557767, |
| "learning_rate": 9.852782074656897e-06, |
| "loss": 0.064, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.3880800727934486, |
| "grad_norm": 0.7531191508768753, |
| "learning_rate": 9.852437596252216e-06, |
| "loss": 0.0824, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3885350318471338, |
| "grad_norm": 0.7684236261792305, |
| "learning_rate": 9.852092721328248e-06, |
| "loss": 0.0674, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3889899909008189, |
| "grad_norm": 0.8624513661560378, |
| "learning_rate": 9.851747449913176e-06, |
| "loss": 0.09, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.38944494995450407, |
| "grad_norm": 0.9125725996183891, |
| "learning_rate": 9.851401782035213e-06, |
| "loss": 0.129, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.38989990900818927, |
| "grad_norm": 0.7630714638300728, |
| "learning_rate": 9.851055717722604e-06, |
| "loss": 0.068, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3903548680618744, |
| "grad_norm": 0.834756070401477, |
| "learning_rate": 9.850709257003628e-06, |
| "loss": 0.0831, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.3908098271155596, |
| "grad_norm": 0.9864776662717517, |
| "learning_rate": 9.850362399906598e-06, |
| "loss": 0.0904, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.39126478616924476, |
| "grad_norm": 0.6242730295284743, |
| "learning_rate": 9.850015146459857e-06, |
| "loss": 0.0754, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.39171974522292996, |
| "grad_norm": 0.838271649072902, |
| "learning_rate": 9.84966749669178e-06, |
| "loss": 0.0899, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3921747042766151, |
| "grad_norm": 0.6826448278617049, |
| "learning_rate": 9.849319450630777e-06, |
| "loss": 0.0698, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.39262966333030025, |
| "grad_norm": 0.5533993282250775, |
| "learning_rate": 9.848971008305288e-06, |
| "loss": 0.0688, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.39308462238398545, |
| "grad_norm": 0.838673412156409, |
| "learning_rate": 9.848622169743784e-06, |
| "loss": 0.0815, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.3935395814376706, |
| "grad_norm": 0.9783580500729582, |
| "learning_rate": 9.848272934974774e-06, |
| "loss": 0.0745, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3939945404913558, |
| "grad_norm": 0.5976030953641746, |
| "learning_rate": 9.847923304026793e-06, |
| "loss": 0.0664, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.39444949954504094, |
| "grad_norm": 0.6999143793652887, |
| "learning_rate": 9.847573276928415e-06, |
| "loss": 0.0804, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.39490445859872614, |
| "grad_norm": 0.6338725165728231, |
| "learning_rate": 9.847222853708239e-06, |
| "loss": 0.0655, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3953594176524113, |
| "grad_norm": 0.7010627446349382, |
| "learning_rate": 9.846872034394902e-06, |
| "loss": 0.0667, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.3958143767060964, |
| "grad_norm": 0.6173227181881447, |
| "learning_rate": 9.84652081901707e-06, |
| "loss": 0.0674, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3962693357597816, |
| "grad_norm": 0.9673042020268607, |
| "learning_rate": 9.846169207603443e-06, |
| "loss": 0.1267, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.39672429481346677, |
| "grad_norm": 0.6294912489479282, |
| "learning_rate": 9.845817200182755e-06, |
| "loss": 0.0588, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.39717925386715197, |
| "grad_norm": 0.8477152807126976, |
| "learning_rate": 9.845464796783767e-06, |
| "loss": 0.1219, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3976342129208371, |
| "grad_norm": 0.5887483684825674, |
| "learning_rate": 9.845111997435279e-06, |
| "loss": 0.0731, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.3980891719745223, |
| "grad_norm": 0.5630369277247907, |
| "learning_rate": 9.844758802166116e-06, |
| "loss": 0.0579, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.39854413102820746, |
| "grad_norm": 0.6717541815357567, |
| "learning_rate": 9.844405211005145e-06, |
| "loss": 0.0711, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3989990900818926, |
| "grad_norm": 0.6571828619535791, |
| "learning_rate": 9.844051223981258e-06, |
| "loss": 0.0638, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3994540491355778, |
| "grad_norm": 0.6723710552364174, |
| "learning_rate": 9.84369684112338e-06, |
| "loss": 0.0676, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.39990900818926295, |
| "grad_norm": 0.7014173744195523, |
| "learning_rate": 9.84334206246047e-06, |
| "loss": 0.0751, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.40036396724294815, |
| "grad_norm": 0.7999660318519703, |
| "learning_rate": 9.842986888021518e-06, |
| "loss": 0.0895, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4008189262966333, |
| "grad_norm": 0.5578605501955606, |
| "learning_rate": 9.842631317835548e-06, |
| "loss": 0.0637, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.4012738853503185, |
| "grad_norm": 0.6615256090849237, |
| "learning_rate": 9.842275351931617e-06, |
| "loss": 0.0664, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.40172884440400364, |
| "grad_norm": 0.5263094198672195, |
| "learning_rate": 9.841918990338812e-06, |
| "loss": 0.0611, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4021838034576888, |
| "grad_norm": 0.8080883575450535, |
| "learning_rate": 9.841562233086252e-06, |
| "loss": 0.0912, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.402638762511374, |
| "grad_norm": 0.6655757939327012, |
| "learning_rate": 9.841205080203092e-06, |
| "loss": 0.0601, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4030937215650591, |
| "grad_norm": 0.8701903481119097, |
| "learning_rate": 9.840847531718515e-06, |
| "loss": 0.0914, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4035486806187443, |
| "grad_norm": 0.7730206436987713, |
| "learning_rate": 9.840489587661738e-06, |
| "loss": 0.0747, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.40400363967242947, |
| "grad_norm": 0.7410839527981146, |
| "learning_rate": 9.840131248062012e-06, |
| "loss": 0.079, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.40445859872611467, |
| "grad_norm": 0.627620281196765, |
| "learning_rate": 9.839772512948618e-06, |
| "loss": 0.0715, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4049135577797998, |
| "grad_norm": 0.8746014124114054, |
| "learning_rate": 9.83941338235087e-06, |
| "loss": 0.0824, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.40536851683348496, |
| "grad_norm": 1.0112737589697485, |
| "learning_rate": 9.839053856298116e-06, |
| "loss": 0.1251, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.40582347588717016, |
| "grad_norm": 0.72216805525771, |
| "learning_rate": 9.838693934819734e-06, |
| "loss": 0.0893, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4062784349408553, |
| "grad_norm": 0.7544949830136005, |
| "learning_rate": 9.838333617945134e-06, |
| "loss": 0.0968, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4067333939945405, |
| "grad_norm": 0.9543024355165705, |
| "learning_rate": 9.837972905703762e-06, |
| "loss": 0.102, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.40718835304822565, |
| "grad_norm": 1.02061795078975, |
| "learning_rate": 9.83761179812509e-06, |
| "loss": 0.0649, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.40764331210191085, |
| "grad_norm": 0.39738812842187227, |
| "learning_rate": 9.837250295238629e-06, |
| "loss": 0.0428, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.408098271155596, |
| "grad_norm": 0.8873895570319217, |
| "learning_rate": 9.836888397073919e-06, |
| "loss": 0.1068, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.40855323020928114, |
| "grad_norm": 0.7492126364897504, |
| "learning_rate": 9.836526103660533e-06, |
| "loss": 0.0953, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.40900818926296634, |
| "grad_norm": 0.821575499525911, |
| "learning_rate": 9.836163415028075e-06, |
| "loss": 0.0712, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4094631483166515, |
| "grad_norm": 1.0052579979241618, |
| "learning_rate": 9.835800331206183e-06, |
| "loss": 0.1138, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4099181073703367, |
| "grad_norm": 0.7848465428804848, |
| "learning_rate": 9.835436852224525e-06, |
| "loss": 0.0978, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4103730664240218, |
| "grad_norm": 0.9719856735481065, |
| "learning_rate": 9.835072978112804e-06, |
| "loss": 0.0846, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.410828025477707, |
| "grad_norm": 0.6607308818506346, |
| "learning_rate": 9.834708708900755e-06, |
| "loss": 0.0654, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.41128298453139217, |
| "grad_norm": 0.5191597312034261, |
| "learning_rate": 9.834344044618144e-06, |
| "loss": 0.0518, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.41173794358507737, |
| "grad_norm": 0.5336391872354229, |
| "learning_rate": 9.83397898529477e-06, |
| "loss": 0.0535, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4121929026387625, |
| "grad_norm": 0.5687342550017563, |
| "learning_rate": 9.833613530960462e-06, |
| "loss": 0.0578, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.41264786169244766, |
| "grad_norm": 0.8793783198642894, |
| "learning_rate": 9.833247681645083e-06, |
| "loss": 0.1286, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.41310282074613286, |
| "grad_norm": 0.8073005899800644, |
| "learning_rate": 9.832881437378534e-06, |
| "loss": 0.0853, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.413557779799818, |
| "grad_norm": 0.511699500000588, |
| "learning_rate": 9.832514798190738e-06, |
| "loss": 0.0504, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4140127388535032, |
| "grad_norm": 0.5082793074725768, |
| "learning_rate": 9.832147764111655e-06, |
| "loss": 0.056, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.41446769790718835, |
| "grad_norm": 0.9876041013395295, |
| "learning_rate": 9.83178033517128e-06, |
| "loss": 0.0984, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.41492265696087355, |
| "grad_norm": 0.7511273129930924, |
| "learning_rate": 9.831412511399633e-06, |
| "loss": 0.0969, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4153776160145587, |
| "grad_norm": 1.0144870263760433, |
| "learning_rate": 9.831044292826778e-06, |
| "loss": 0.1482, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.41583257506824384, |
| "grad_norm": 0.70444400073401, |
| "learning_rate": 9.830675679482797e-06, |
| "loss": 0.0802, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.41628753412192904, |
| "grad_norm": 1.0357251397748677, |
| "learning_rate": 9.830306671397816e-06, |
| "loss": 0.1061, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.4167424931756142, |
| "grad_norm": 0.895894802940119, |
| "learning_rate": 9.829937268601988e-06, |
| "loss": 0.1005, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4171974522292994, |
| "grad_norm": 0.6004589977630954, |
| "learning_rate": 9.829567471125497e-06, |
| "loss": 0.0664, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4176524112829845, |
| "grad_norm": 0.6058859475834909, |
| "learning_rate": 9.829197278998562e-06, |
| "loss": 0.0728, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4181073703366697, |
| "grad_norm": 0.5886912548442098, |
| "learning_rate": 9.828826692251435e-06, |
| "loss": 0.074, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.41856232939035487, |
| "grad_norm": 0.5982473215332103, |
| "learning_rate": 9.828455710914398e-06, |
| "loss": 0.0653, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.41901728844404, |
| "grad_norm": 0.8647804622811079, |
| "learning_rate": 9.828084335017763e-06, |
| "loss": 0.0741, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4194722474977252, |
| "grad_norm": 0.653767178815679, |
| "learning_rate": 9.827712564591883e-06, |
| "loss": 0.0604, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.41992720655141036, |
| "grad_norm": 0.7812500085225947, |
| "learning_rate": 9.827340399667132e-06, |
| "loss": 0.0708, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.42038216560509556, |
| "grad_norm": 0.7314008563711142, |
| "learning_rate": 9.826967840273921e-06, |
| "loss": 0.0721, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4208371246587807, |
| "grad_norm": 0.8727413076803472, |
| "learning_rate": 9.8265948864427e-06, |
| "loss": 0.0892, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.4212920837124659, |
| "grad_norm": 0.6051379056710864, |
| "learning_rate": 9.826221538203942e-06, |
| "loss": 0.0685, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.42174704276615105, |
| "grad_norm": 0.7279887191787228, |
| "learning_rate": 9.825847795588154e-06, |
| "loss": 0.0766, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.4222020018198362, |
| "grad_norm": 0.7126811268305303, |
| "learning_rate": 9.825473658625876e-06, |
| "loss": 0.0821, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4226569608735214, |
| "grad_norm": 0.8812960827967533, |
| "learning_rate": 9.825099127347684e-06, |
| "loss": 0.0982, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.42311191992720654, |
| "grad_norm": 0.7462955906438729, |
| "learning_rate": 9.824724201784182e-06, |
| "loss": 0.1073, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.42356687898089174, |
| "grad_norm": 0.5448066050338419, |
| "learning_rate": 9.824348881966004e-06, |
| "loss": 0.0637, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4240218380345769, |
| "grad_norm": 0.7750150802923693, |
| "learning_rate": 9.823973167923823e-06, |
| "loss": 0.09, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4244767970882621, |
| "grad_norm": 0.8695175796556455, |
| "learning_rate": 9.82359705968834e-06, |
| "loss": 0.0857, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.4249317561419472, |
| "grad_norm": 0.653112477618241, |
| "learning_rate": 9.823220557290289e-06, |
| "loss": 0.0722, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.42538671519563237, |
| "grad_norm": 0.7764742726938813, |
| "learning_rate": 9.822843660760434e-06, |
| "loss": 0.0582, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.42584167424931757, |
| "grad_norm": 0.8338160462571067, |
| "learning_rate": 9.822466370129576e-06, |
| "loss": 0.0993, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.4262966333030027, |
| "grad_norm": 0.7416650975880095, |
| "learning_rate": 9.822088685428543e-06, |
| "loss": 0.0782, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.4267515923566879, |
| "grad_norm": 0.5969422348364739, |
| "learning_rate": 9.821710606688199e-06, |
| "loss": 0.0546, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.42720655141037306, |
| "grad_norm": 0.6235404067325917, |
| "learning_rate": 9.82133213393944e-06, |
| "loss": 0.0638, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.42766151046405826, |
| "grad_norm": 0.7910461101358781, |
| "learning_rate": 9.820953267213194e-06, |
| "loss": 0.0775, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4281164695177434, |
| "grad_norm": 0.692978452923811, |
| "learning_rate": 9.820574006540415e-06, |
| "loss": 0.053, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.7310389759017597, |
| "learning_rate": 9.820194351952098e-06, |
| "loss": 0.0716, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.42902638762511375, |
| "grad_norm": 0.6553331509390902, |
| "learning_rate": 9.819814303479268e-06, |
| "loss": 0.0612, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.4294813466787989, |
| "grad_norm": 1.1310076957610966, |
| "learning_rate": 9.819433861152978e-06, |
| "loss": 0.1116, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.4299363057324841, |
| "grad_norm": 0.6933766894953944, |
| "learning_rate": 9.819053025004316e-06, |
| "loss": 0.0932, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.43039126478616924, |
| "grad_norm": 0.7823571557493696, |
| "learning_rate": 9.818671795064405e-06, |
| "loss": 0.0847, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.43084622383985444, |
| "grad_norm": 0.8000794358590197, |
| "learning_rate": 9.818290171364396e-06, |
| "loss": 0.0916, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4313011828935396, |
| "grad_norm": 0.6207042654318157, |
| "learning_rate": 9.817908153935473e-06, |
| "loss": 0.0568, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4317561419472247, |
| "grad_norm": 0.7957970680354334, |
| "learning_rate": 9.817525742808854e-06, |
| "loss": 0.1203, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4322111010009099, |
| "grad_norm": 0.6607960765057979, |
| "learning_rate": 9.817142938015786e-06, |
| "loss": 0.069, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.43266606005459507, |
| "grad_norm": 0.8132102265727185, |
| "learning_rate": 9.816759739587552e-06, |
| "loss": 0.0821, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.43312101910828027, |
| "grad_norm": 0.6410149691778323, |
| "learning_rate": 9.816376147555464e-06, |
| "loss": 0.0612, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4335759781619654, |
| "grad_norm": 1.0196998859089288, |
| "learning_rate": 9.815992161950867e-06, |
| "loss": 0.1183, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.4340309372156506, |
| "grad_norm": 0.5899375116434804, |
| "learning_rate": 9.81560778280514e-06, |
| "loss": 0.0604, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.43448589626933576, |
| "grad_norm": 1.0046158107797931, |
| "learning_rate": 9.815223010149693e-06, |
| "loss": 0.0876, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4349408553230209, |
| "grad_norm": 0.7980339738331416, |
| "learning_rate": 9.814837844015966e-06, |
| "loss": 0.0894, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4353958143767061, |
| "grad_norm": 0.6974524248281853, |
| "learning_rate": 9.814452284435433e-06, |
| "loss": 0.0741, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.43585077343039125, |
| "grad_norm": 0.7679692797858835, |
| "learning_rate": 9.814066331439603e-06, |
| "loss": 0.0796, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.43630573248407645, |
| "grad_norm": 0.8183774417740679, |
| "learning_rate": 9.813679985060012e-06, |
| "loss": 0.0963, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.4367606915377616, |
| "grad_norm": 0.7950656053104391, |
| "learning_rate": 9.81329324532823e-06, |
| "loss": 0.0837, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4372156505914468, |
| "grad_norm": 0.6056809369995887, |
| "learning_rate": 9.812906112275862e-06, |
| "loss": 0.0465, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.43767060964513194, |
| "grad_norm": 1.0980359635620318, |
| "learning_rate": 9.81251858593454e-06, |
| "loss": 0.1206, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4381255686988171, |
| "grad_norm": 0.6123483237764059, |
| "learning_rate": 9.812130666335933e-06, |
| "loss": 0.08, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4385805277525023, |
| "grad_norm": 0.8151730014839008, |
| "learning_rate": 9.81174235351174e-06, |
| "loss": 0.0983, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.4390354868061874, |
| "grad_norm": 0.7143828681073273, |
| "learning_rate": 9.811353647493691e-06, |
| "loss": 0.0809, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.4394904458598726, |
| "grad_norm": 0.5647036962239634, |
| "learning_rate": 9.810964548313549e-06, |
| "loss": 0.0581, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.43994540491355777, |
| "grad_norm": 0.7594400506736699, |
| "learning_rate": 9.81057505600311e-06, |
| "loss": 0.078, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.44040036396724297, |
| "grad_norm": 0.6515426202345832, |
| "learning_rate": 9.810185170594205e-06, |
| "loss": 0.0688, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4408553230209281, |
| "grad_norm": 0.8798906332352223, |
| "learning_rate": 9.809794892118687e-06, |
| "loss": 0.0915, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.44131028207461326, |
| "grad_norm": 0.7350866900672135, |
| "learning_rate": 9.809404220608451e-06, |
| "loss": 0.0671, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.44176524112829846, |
| "grad_norm": 0.7216847217866104, |
| "learning_rate": 9.809013156095424e-06, |
| "loss": 0.0726, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.4422202001819836, |
| "grad_norm": 0.8179702740752783, |
| "learning_rate": 9.808621698611557e-06, |
| "loss": 0.0758, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.4426751592356688, |
| "grad_norm": 0.5533105745807706, |
| "learning_rate": 9.808229848188842e-06, |
| "loss": 0.0528, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.44313011828935395, |
| "grad_norm": 0.7503486538749657, |
| "learning_rate": 9.807837604859296e-06, |
| "loss": 0.0878, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.44358507734303915, |
| "grad_norm": 0.40510949005498975, |
| "learning_rate": 9.807444968654975e-06, |
| "loss": 0.0424, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4440400363967243, |
| "grad_norm": 0.8540666353042626, |
| "learning_rate": 9.807051939607959e-06, |
| "loss": 0.1108, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.44449499545040944, |
| "grad_norm": 0.7543284179304937, |
| "learning_rate": 9.806658517750369e-06, |
| "loss": 0.0719, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.44494995450409464, |
| "grad_norm": 0.6982493359241757, |
| "learning_rate": 9.80626470311435e-06, |
| "loss": 0.0777, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.4454049135577798, |
| "grad_norm": 0.7275511253894157, |
| "learning_rate": 9.805870495732085e-06, |
| "loss": 0.0693, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.445859872611465, |
| "grad_norm": 0.8647890459895436, |
| "learning_rate": 9.805475895635787e-06, |
| "loss": 0.0882, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4463148316651501, |
| "grad_norm": 0.757804762973183, |
| "learning_rate": 9.8050809028577e-06, |
| "loss": 0.0724, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.4467697907188353, |
| "grad_norm": 0.7515219153063712, |
| "learning_rate": 9.8046855174301e-06, |
| "loss": 0.0659, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.44722474977252047, |
| "grad_norm": 1.0502681583017184, |
| "learning_rate": 9.804289739385297e-06, |
| "loss": 0.1207, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.44767970882620567, |
| "grad_norm": 0.5780062486364612, |
| "learning_rate": 9.803893568755633e-06, |
| "loss": 0.0772, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.4481346678798908, |
| "grad_norm": 0.5515644567052078, |
| "learning_rate": 9.80349700557348e-06, |
| "loss": 0.0628, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.44858962693357596, |
| "grad_norm": 0.6432677095504179, |
| "learning_rate": 9.803100049871246e-06, |
| "loss": 0.0817, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.44904458598726116, |
| "grad_norm": 0.5424958391196154, |
| "learning_rate": 9.802702701681366e-06, |
| "loss": 0.0649, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.4494995450409463, |
| "grad_norm": 0.6556126282036931, |
| "learning_rate": 9.80230496103631e-06, |
| "loss": 0.0579, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.4499545040946315, |
| "grad_norm": 0.5632646083130022, |
| "learning_rate": 9.801906827968578e-06, |
| "loss": 0.0591, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.45040946314831665, |
| "grad_norm": 1.0464719217252296, |
| "learning_rate": 9.801508302510707e-06, |
| "loss": 0.124, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.45086442220200185, |
| "grad_norm": 0.7231067459050019, |
| "learning_rate": 9.801109384695261e-06, |
| "loss": 0.0631, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.451319381255687, |
| "grad_norm": 0.775594128230074, |
| "learning_rate": 9.800710074554837e-06, |
| "loss": 0.0924, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.45177434030937214, |
| "grad_norm": 0.6340180385643369, |
| "learning_rate": 9.800310372122066e-06, |
| "loss": 0.068, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.45222929936305734, |
| "grad_norm": 0.9703750136380557, |
| "learning_rate": 9.799910277429609e-06, |
| "loss": 0.0902, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4526842584167425, |
| "grad_norm": 0.5881925827197537, |
| "learning_rate": 9.79950979051016e-06, |
| "loss": 0.0662, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4531392174704277, |
| "grad_norm": 0.7583235380843109, |
| "learning_rate": 9.799108911396446e-06, |
| "loss": 0.0755, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.4535941765241128, |
| "grad_norm": 0.6585135755735663, |
| "learning_rate": 9.798707640121224e-06, |
| "loss": 0.0669, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.454049135577798, |
| "grad_norm": 0.9344579240939844, |
| "learning_rate": 9.798305976717286e-06, |
| "loss": 0.1028, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.45450409463148317, |
| "grad_norm": 0.6238360425747993, |
| "learning_rate": 9.79790392121745e-06, |
| "loss": 0.0608, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.4549590536851683, |
| "grad_norm": 0.715680092291253, |
| "learning_rate": 9.797501473654573e-06, |
| "loss": 0.0792, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4554140127388535, |
| "grad_norm": 0.8167758856821831, |
| "learning_rate": 9.797098634061543e-06, |
| "loss": 0.0948, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.45586897179253866, |
| "grad_norm": 0.8318764431867516, |
| "learning_rate": 9.796695402471275e-06, |
| "loss": 0.0967, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.45632393084622386, |
| "grad_norm": 0.9700547030363569, |
| "learning_rate": 9.79629177891672e-06, |
| "loss": 0.1138, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.456778889899909, |
| "grad_norm": 0.7702596501705347, |
| "learning_rate": 9.79588776343086e-06, |
| "loss": 0.0826, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.4572338489535942, |
| "grad_norm": 0.833778163717652, |
| "learning_rate": 9.795483356046711e-06, |
| "loss": 0.0927, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.45768880800727935, |
| "grad_norm": 0.7006737675801851, |
| "learning_rate": 9.795078556797318e-06, |
| "loss": 0.0747, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.4581437670609645, |
| "grad_norm": 0.8810114143185821, |
| "learning_rate": 9.794673365715761e-06, |
| "loss": 0.0921, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4585987261146497, |
| "grad_norm": 0.7286145380478113, |
| "learning_rate": 9.794267782835148e-06, |
| "loss": 0.0832, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.45905368516833484, |
| "grad_norm": 0.8181887559127218, |
| "learning_rate": 9.793861808188622e-06, |
| "loss": 0.0729, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.45950864422202004, |
| "grad_norm": 1.0821839097582124, |
| "learning_rate": 9.793455441809359e-06, |
| "loss": 0.1025, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4599636032757052, |
| "grad_norm": 0.515896949523265, |
| "learning_rate": 9.793048683730564e-06, |
| "loss": 0.0512, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.4604185623293904, |
| "grad_norm": 0.7800604571516774, |
| "learning_rate": 9.792641533985474e-06, |
| "loss": 0.1065, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4608735213830755, |
| "grad_norm": 0.48365424866268936, |
| "learning_rate": 9.792233992607365e-06, |
| "loss": 0.0622, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.46132848043676067, |
| "grad_norm": 0.8472876133123602, |
| "learning_rate": 9.791826059629532e-06, |
| "loss": 0.0713, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.46178343949044587, |
| "grad_norm": 0.935522534168844, |
| "learning_rate": 9.791417735085316e-06, |
| "loss": 0.0853, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.462238398544131, |
| "grad_norm": 0.8028819334602026, |
| "learning_rate": 9.791009019008078e-06, |
| "loss": 0.0795, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4626933575978162, |
| "grad_norm": 0.6458928385673616, |
| "learning_rate": 9.79059991143122e-06, |
| "loss": 0.0836, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.46314831665150136, |
| "grad_norm": 0.8309912415690437, |
| "learning_rate": 9.790190412388173e-06, |
| "loss": 0.0895, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.46360327570518656, |
| "grad_norm": 0.6953691809158898, |
| "learning_rate": 9.789780521912396e-06, |
| "loss": 0.0686, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.4640582347588717, |
| "grad_norm": 0.7563151979586233, |
| "learning_rate": 9.789370240037385e-06, |
| "loss": 0.0879, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.46451319381255685, |
| "grad_norm": 0.6646619102460968, |
| "learning_rate": 9.788959566796667e-06, |
| "loss": 0.0761, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.46496815286624205, |
| "grad_norm": 0.8092527562913561, |
| "learning_rate": 9.788548502223801e-06, |
| "loss": 0.0863, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4654231119199272, |
| "grad_norm": 2.0284506817542396, |
| "learning_rate": 9.788137046352374e-06, |
| "loss": 0.2011, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4658780709736124, |
| "grad_norm": 0.6524644993097855, |
| "learning_rate": 9.787725199216011e-06, |
| "loss": 0.0765, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.46633303002729753, |
| "grad_norm": 0.48134373932870766, |
| "learning_rate": 9.787312960848368e-06, |
| "loss": 0.0505, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.46678798908098273, |
| "grad_norm": 0.6646547386252114, |
| "learning_rate": 9.786900331283128e-06, |
| "loss": 0.0825, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.4672429481346679, |
| "grad_norm": 0.5655812014606527, |
| "learning_rate": 9.78648731055401e-06, |
| "loss": 0.0659, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.467697907188353, |
| "grad_norm": 0.680196435092224, |
| "learning_rate": 9.786073898694766e-06, |
| "loss": 0.0734, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.4681528662420382, |
| "grad_norm": 0.6198434008496165, |
| "learning_rate": 9.785660095739176e-06, |
| "loss": 0.0687, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.46860782529572337, |
| "grad_norm": 0.5967309034966486, |
| "learning_rate": 9.785245901721054e-06, |
| "loss": 0.0443, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.46906278434940857, |
| "grad_norm": 0.588565790719301, |
| "learning_rate": 9.784831316674246e-06, |
| "loss": 0.0741, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.4695177434030937, |
| "grad_norm": 0.6384508627867143, |
| "learning_rate": 9.784416340632634e-06, |
| "loss": 0.0639, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4699727024567789, |
| "grad_norm": 0.528980291125106, |
| "learning_rate": 9.784000973630124e-06, |
| "loss": 0.0506, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.47042766151046406, |
| "grad_norm": 0.6297922247581061, |
| "learning_rate": 9.783585215700656e-06, |
| "loss": 0.0704, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.4708826205641492, |
| "grad_norm": 1.1014615381108162, |
| "learning_rate": 9.783169066878208e-06, |
| "loss": 0.1063, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.4713375796178344, |
| "grad_norm": 0.7370811970547196, |
| "learning_rate": 9.782752527196785e-06, |
| "loss": 0.0888, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.47179253867151955, |
| "grad_norm": 0.6272964856361817, |
| "learning_rate": 9.782335596690425e-06, |
| "loss": 0.0683, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.47224749772520475, |
| "grad_norm": 0.9675945822898259, |
| "learning_rate": 9.781918275393196e-06, |
| "loss": 0.1031, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4727024567788899, |
| "grad_norm": 0.8448129794628584, |
| "learning_rate": 9.781500563339202e-06, |
| "loss": 0.0818, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.4731574158325751, |
| "grad_norm": 0.5148120993988892, |
| "learning_rate": 9.781082460562574e-06, |
| "loss": 0.0525, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.47361237488626023, |
| "grad_norm": 0.7767251927940846, |
| "learning_rate": 9.780663967097477e-06, |
| "loss": 0.0869, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.4740673339399454, |
| "grad_norm": 0.9661754574144388, |
| "learning_rate": 9.780245082978112e-06, |
| "loss": 0.0923, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.4745222929936306, |
| "grad_norm": 0.780061387882855, |
| "learning_rate": 9.779825808238705e-06, |
| "loss": 0.095, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.4749772520473157, |
| "grad_norm": 0.8513172657519864, |
| "learning_rate": 9.77940614291352e-06, |
| "loss": 0.0772, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.4754322111010009, |
| "grad_norm": 0.6199453465731616, |
| "learning_rate": 9.778986087036846e-06, |
| "loss": 0.0701, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.47588717015468607, |
| "grad_norm": 0.5327629714743946, |
| "learning_rate": 9.778565640643011e-06, |
| "loss": 0.0447, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.47634212920837127, |
| "grad_norm": 0.8882337205809296, |
| "learning_rate": 9.778144803766375e-06, |
| "loss": 0.0788, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4767970882620564, |
| "grad_norm": 0.6023343672839219, |
| "learning_rate": 9.77772357644132e-06, |
| "loss": 0.0693, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.47725204731574156, |
| "grad_norm": 0.8031515985448552, |
| "learning_rate": 9.777301958702273e-06, |
| "loss": 0.0911, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.47770700636942676, |
| "grad_norm": 0.8695877166802147, |
| "learning_rate": 9.776879950583683e-06, |
| "loss": 0.12, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4781619654231119, |
| "grad_norm": 0.6077253389668626, |
| "learning_rate": 9.776457552120034e-06, |
| "loss": 0.0722, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.4786169244767971, |
| "grad_norm": 0.7976020915977983, |
| "learning_rate": 9.776034763345845e-06, |
| "loss": 0.0783, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.47907188353048225, |
| "grad_norm": 0.7091049596783572, |
| "learning_rate": 9.775611584295663e-06, |
| "loss": 0.0739, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.47952684258416745, |
| "grad_norm": 0.7919907245184465, |
| "learning_rate": 9.775188015004072e-06, |
| "loss": 0.0728, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4799818016378526, |
| "grad_norm": 0.9227645018819045, |
| "learning_rate": 9.774764055505676e-06, |
| "loss": 0.0905, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.48043676069153773, |
| "grad_norm": 0.7130315690029604, |
| "learning_rate": 9.774339705835127e-06, |
| "loss": 0.09, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.48089171974522293, |
| "grad_norm": 0.7993270676292756, |
| "learning_rate": 9.773914966027098e-06, |
| "loss": 0.1011, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.4813466787989081, |
| "grad_norm": 0.8955668988276211, |
| "learning_rate": 9.773489836116297e-06, |
| "loss": 0.0963, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.4818016378525933, |
| "grad_norm": 0.7582155580680914, |
| "learning_rate": 9.773064316137464e-06, |
| "loss": 0.0766, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4822565969062784, |
| "grad_norm": 0.6939955066308027, |
| "learning_rate": 9.772638406125367e-06, |
| "loss": 0.0687, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4827115559599636, |
| "grad_norm": 0.8091635860789653, |
| "learning_rate": 9.772212106114816e-06, |
| "loss": 0.0754, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.48316651501364877, |
| "grad_norm": 0.8236012040739623, |
| "learning_rate": 9.77178541614064e-06, |
| "loss": 0.0951, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.48362147406733397, |
| "grad_norm": 0.6622501946117725, |
| "learning_rate": 9.77135833623771e-06, |
| "loss": 0.083, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.4840764331210191, |
| "grad_norm": 0.8689743387052602, |
| "learning_rate": 9.770930866440927e-06, |
| "loss": 0.1074, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.48453139217470426, |
| "grad_norm": 0.6733750246744147, |
| "learning_rate": 9.770503006785214e-06, |
| "loss": 0.0639, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.48498635122838946, |
| "grad_norm": 0.9485233745498586, |
| "learning_rate": 9.770074757305541e-06, |
| "loss": 0.1106, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.4854413102820746, |
| "grad_norm": 0.8288392949652397, |
| "learning_rate": 9.769646118036902e-06, |
| "loss": 0.0661, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4858962693357598, |
| "grad_norm": 0.7475423805914638, |
| "learning_rate": 9.76921708901432e-06, |
| "loss": 0.0686, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.48635122838944495, |
| "grad_norm": 0.54120364671088, |
| "learning_rate": 9.768787670272855e-06, |
| "loss": 0.0629, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.48680618744313015, |
| "grad_norm": 0.7281619635509152, |
| "learning_rate": 9.768357861847598e-06, |
| "loss": 0.0723, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4872611464968153, |
| "grad_norm": 0.8883321717067604, |
| "learning_rate": 9.767927663773668e-06, |
| "loss": 0.0832, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.48771610555050043, |
| "grad_norm": 0.7681469789077073, |
| "learning_rate": 9.767497076086223e-06, |
| "loss": 0.0786, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.48817106460418563, |
| "grad_norm": 0.6590861395931087, |
| "learning_rate": 9.767066098820446e-06, |
| "loss": 0.0704, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.4886260236578708, |
| "grad_norm": 0.7944203702948146, |
| "learning_rate": 9.766634732011557e-06, |
| "loss": 0.0867, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.489080982711556, |
| "grad_norm": 0.7832480468570255, |
| "learning_rate": 9.766202975694801e-06, |
| "loss": 0.0873, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4895359417652411, |
| "grad_norm": 0.7232266679451883, |
| "learning_rate": 9.765770829905464e-06, |
| "loss": 0.0785, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.4899909008189263, |
| "grad_norm": 0.5406798309730716, |
| "learning_rate": 9.765338294678856e-06, |
| "loss": 0.0469, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.49044585987261147, |
| "grad_norm": 0.5866548164219128, |
| "learning_rate": 9.764905370050321e-06, |
| "loss": 0.0524, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.4909008189262966, |
| "grad_norm": 0.9915720236606885, |
| "learning_rate": 9.76447205605524e-06, |
| "loss": 0.1019, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.4913557779799818, |
| "grad_norm": 0.6838845303274752, |
| "learning_rate": 9.764038352729018e-06, |
| "loss": 0.0891, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.49181073703366696, |
| "grad_norm": 0.9385660559352969, |
| "learning_rate": 9.763604260107096e-06, |
| "loss": 0.1058, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.49226569608735216, |
| "grad_norm": 0.6710872617569944, |
| "learning_rate": 9.763169778224946e-06, |
| "loss": 0.0665, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.4927206551410373, |
| "grad_norm": 0.7878885609137168, |
| "learning_rate": 9.762734907118072e-06, |
| "loss": 0.0876, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4931756141947225, |
| "grad_norm": 0.6302166766090778, |
| "learning_rate": 9.76229964682201e-06, |
| "loss": 0.0507, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.49363057324840764, |
| "grad_norm": 0.5833462678864086, |
| "learning_rate": 9.761863997372325e-06, |
| "loss": 0.0612, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4940855323020928, |
| "grad_norm": 1.036522158484448, |
| "learning_rate": 9.761427958804621e-06, |
| "loss": 0.1395, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.494540491355778, |
| "grad_norm": 1.1502320115946314, |
| "learning_rate": 9.760991531154526e-06, |
| "loss": 0.1149, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.49499545040946313, |
| "grad_norm": 0.7616054217825209, |
| "learning_rate": 9.760554714457704e-06, |
| "loss": 0.0684, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.49545040946314833, |
| "grad_norm": 0.5129309167340426, |
| "learning_rate": 9.760117508749846e-06, |
| "loss": 0.0614, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4959053685168335, |
| "grad_norm": 0.7147170789642256, |
| "learning_rate": 9.759679914066686e-06, |
| "loss": 0.0842, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4963603275705187, |
| "grad_norm": 0.7513123367978354, |
| "learning_rate": 9.759241930443975e-06, |
| "loss": 0.0749, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4968152866242038, |
| "grad_norm": 0.5462870672862663, |
| "learning_rate": 9.75880355791751e-06, |
| "loss": 0.0588, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.49727024567788897, |
| "grad_norm": 0.6158644897786469, |
| "learning_rate": 9.758364796523105e-06, |
| "loss": 0.0578, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.49772520473157417, |
| "grad_norm": 0.5248367448810554, |
| "learning_rate": 9.757925646296617e-06, |
| "loss": 0.0504, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.4981801637852593, |
| "grad_norm": 0.7801307646100064, |
| "learning_rate": 9.757486107273935e-06, |
| "loss": 0.0819, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.4986351228389445, |
| "grad_norm": 0.6822936325355138, |
| "learning_rate": 9.75704617949097e-06, |
| "loss": 0.0828, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.49909008189262966, |
| "grad_norm": 0.49379397863131413, |
| "learning_rate": 9.756605862983675e-06, |
| "loss": 0.0606, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.49954504094631486, |
| "grad_norm": 0.5236513133369656, |
| "learning_rate": 9.756165157788029e-06, |
| "loss": 0.0493, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.7323812225903658, |
| "learning_rate": 9.755724063940047e-06, |
| "loss": 0.0794, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5004549590536852, |
| "grad_norm": 0.853156508842135, |
| "learning_rate": 9.755282581475769e-06, |
| "loss": 0.08, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5009099181073703, |
| "grad_norm": 0.7117091061791435, |
| "learning_rate": 9.754840710431274e-06, |
| "loss": 0.0773, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5013648771610555, |
| "grad_norm": 0.9350752111669145, |
| "learning_rate": 9.754398450842668e-06, |
| "loss": 0.1046, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5018198362147407, |
| "grad_norm": 0.8834833642233855, |
| "learning_rate": 9.753955802746091e-06, |
| "loss": 0.1284, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5022747952684259, |
| "grad_norm": 0.9022387216275947, |
| "learning_rate": 9.753512766177717e-06, |
| "loss": 0.0898, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.502729754322111, |
| "grad_norm": 0.551248880180483, |
| "learning_rate": 9.753069341173745e-06, |
| "loss": 0.0596, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5031847133757962, |
| "grad_norm": 0.5970423480352659, |
| "learning_rate": 9.752625527770409e-06, |
| "loss": 0.0723, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5036396724294814, |
| "grad_norm": 0.7620108531589319, |
| "learning_rate": 9.75218132600398e-06, |
| "loss": 0.0856, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5040946314831665, |
| "grad_norm": 0.7720887684681512, |
| "learning_rate": 9.751736735910753e-06, |
| "loss": 0.0904, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5045495905368517, |
| "grad_norm": 0.8672659681858957, |
| "learning_rate": 9.75129175752706e-06, |
| "loss": 0.1043, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.5050045495905369, |
| "grad_norm": 0.7511079874116621, |
| "learning_rate": 9.75084639088926e-06, |
| "loss": 0.0719, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5054595086442221, |
| "grad_norm": 0.7442062138473109, |
| "learning_rate": 9.750400636033746e-06, |
| "loss": 0.0805, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5059144676979072, |
| "grad_norm": 0.716157443156474, |
| "learning_rate": 9.749954492996947e-06, |
| "loss": 0.0902, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5063694267515924, |
| "grad_norm": 0.7655895172099163, |
| "learning_rate": 9.749507961815317e-06, |
| "loss": 0.0973, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5068243858052776, |
| "grad_norm": 0.6288294239038802, |
| "learning_rate": 9.749061042525343e-06, |
| "loss": 0.0646, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5072793448589626, |
| "grad_norm": 0.6709452216437115, |
| "learning_rate": 9.74861373516355e-06, |
| "loss": 0.0717, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5077343039126478, |
| "grad_norm": 0.6522838269502338, |
| "learning_rate": 9.748166039766484e-06, |
| "loss": 0.0475, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.508189262966333, |
| "grad_norm": 0.7999784990978867, |
| "learning_rate": 9.747717956370735e-06, |
| "loss": 0.0925, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5086442220200182, |
| "grad_norm": 1.0917998243863505, |
| "learning_rate": 9.747269485012913e-06, |
| "loss": 0.1293, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5090991810737033, |
| "grad_norm": 0.7636715530766439, |
| "learning_rate": 9.746820625729667e-06, |
| "loss": 0.0774, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5095541401273885, |
| "grad_norm": 0.6701230428761437, |
| "learning_rate": 9.746371378557677e-06, |
| "loss": 0.0623, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5100090991810737, |
| "grad_norm": 0.972334707766994, |
| "learning_rate": 9.745921743533653e-06, |
| "loss": 0.113, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.5104640582347588, |
| "grad_norm": 0.6630727679984025, |
| "learning_rate": 9.745471720694335e-06, |
| "loss": 0.0828, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.510919017288444, |
| "grad_norm": 0.8798279960192045, |
| "learning_rate": 9.745021310076498e-06, |
| "loss": 0.0772, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5113739763421292, |
| "grad_norm": 0.6337737332675445, |
| "learning_rate": 9.744570511716952e-06, |
| "loss": 0.0805, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5118289353958144, |
| "grad_norm": 0.9171053674032225, |
| "learning_rate": 9.744119325652526e-06, |
| "loss": 0.0901, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5122838944494995, |
| "grad_norm": 0.7437420002919692, |
| "learning_rate": 9.743667751920093e-06, |
| "loss": 0.0789, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.5127388535031847, |
| "grad_norm": 0.692440215965907, |
| "learning_rate": 9.743215790556556e-06, |
| "loss": 0.0885, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5131938125568699, |
| "grad_norm": 0.5830998661595514, |
| "learning_rate": 9.742763441598841e-06, |
| "loss": 0.0571, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.513648771610555, |
| "grad_norm": 0.7409283851806759, |
| "learning_rate": 9.742310705083919e-06, |
| "loss": 0.0819, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5141037306642402, |
| "grad_norm": 0.6329559817029019, |
| "learning_rate": 9.74185758104878e-06, |
| "loss": 0.0732, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5145586897179254, |
| "grad_norm": 0.47102788261692413, |
| "learning_rate": 9.741404069530455e-06, |
| "loss": 0.0496, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5150136487716106, |
| "grad_norm": 0.7193278988032876, |
| "learning_rate": 9.740950170566002e-06, |
| "loss": 0.0797, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5154686078252957, |
| "grad_norm": 0.7827454423152818, |
| "learning_rate": 9.740495884192509e-06, |
| "loss": 0.0863, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5159235668789809, |
| "grad_norm": 0.5187125000260286, |
| "learning_rate": 9.740041210447101e-06, |
| "loss": 0.048, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5163785259326661, |
| "grad_norm": 0.7621657915309645, |
| "learning_rate": 9.739586149366932e-06, |
| "loss": 0.076, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5168334849863512, |
| "grad_norm": 1.0691498364952807, |
| "learning_rate": 9.739130700989185e-06, |
| "loss": 0.1085, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5172884440400364, |
| "grad_norm": 1.126943089011516, |
| "learning_rate": 9.738674865351081e-06, |
| "loss": 0.1197, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5177434030937216, |
| "grad_norm": 0.5967935472543325, |
| "learning_rate": 9.738218642489864e-06, |
| "loss": 0.0715, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5181983621474068, |
| "grad_norm": 0.6520369417533736, |
| "learning_rate": 9.73776203244282e-06, |
| "loss": 0.0812, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5186533212010919, |
| "grad_norm": 0.6923655317783546, |
| "learning_rate": 9.737305035247258e-06, |
| "loss": 0.0607, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5191082802547771, |
| "grad_norm": 0.5971267035932937, |
| "learning_rate": 9.73684765094052e-06, |
| "loss": 0.0597, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.5195632393084623, |
| "grad_norm": 0.6102979031011873, |
| "learning_rate": 9.736389879559984e-06, |
| "loss": 0.0464, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5200181983621474, |
| "grad_norm": 0.5971210330968472, |
| "learning_rate": 9.735931721143058e-06, |
| "loss": 0.0674, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.5204731574158326, |
| "grad_norm": 0.9014574419537533, |
| "learning_rate": 9.735473175727178e-06, |
| "loss": 0.1071, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.5209281164695178, |
| "grad_norm": 1.024240239778721, |
| "learning_rate": 9.735014243349814e-06, |
| "loss": 0.1058, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.521383075523203, |
| "grad_norm": 0.740240244958144, |
| "learning_rate": 9.73455492404847e-06, |
| "loss": 0.0716, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.521838034576888, |
| "grad_norm": 0.8552793125149327, |
| "learning_rate": 9.734095217860679e-06, |
| "loss": 0.1116, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.5222929936305732, |
| "grad_norm": 0.8388846880500271, |
| "learning_rate": 9.733635124824007e-06, |
| "loss": 0.1195, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5227479526842584, |
| "grad_norm": 0.7476616795889469, |
| "learning_rate": 9.733174644976047e-06, |
| "loss": 0.0982, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5232029117379435, |
| "grad_norm": 1.247104578949049, |
| "learning_rate": 9.732713778354431e-06, |
| "loss": 0.1339, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5236578707916287, |
| "grad_norm": 0.8127429979477634, |
| "learning_rate": 9.732252524996818e-06, |
| "loss": 0.0994, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.5241128298453139, |
| "grad_norm": 1.1678300434583342, |
| "learning_rate": 9.731790884940899e-06, |
| "loss": 0.1152, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5245677888989991, |
| "grad_norm": 0.5209287069427062, |
| "learning_rate": 9.731328858224398e-06, |
| "loss": 0.0546, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.5250227479526842, |
| "grad_norm": 0.8363023252623251, |
| "learning_rate": 9.730866444885069e-06, |
| "loss": 0.0894, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.5254777070063694, |
| "grad_norm": 0.8202924553152645, |
| "learning_rate": 9.730403644960697e-06, |
| "loss": 0.0914, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5259326660600546, |
| "grad_norm": 0.4900409376406188, |
| "learning_rate": 9.729940458489105e-06, |
| "loss": 0.0454, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.5263876251137397, |
| "grad_norm": 0.5631225499534328, |
| "learning_rate": 9.729476885508136e-06, |
| "loss": 0.0542, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.5268425841674249, |
| "grad_norm": 0.566596895824316, |
| "learning_rate": 9.729012926055674e-06, |
| "loss": 0.0625, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.5272975432211101, |
| "grad_norm": 0.9035766920121469, |
| "learning_rate": 9.728548580169632e-06, |
| "loss": 0.1013, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.5277525022747953, |
| "grad_norm": 0.8241016260766749, |
| "learning_rate": 9.728083847887955e-06, |
| "loss": 0.078, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5282074613284804, |
| "grad_norm": 0.7435557294319748, |
| "learning_rate": 9.727618729248617e-06, |
| "loss": 0.0864, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.5286624203821656, |
| "grad_norm": 0.6611375262646607, |
| "learning_rate": 9.727153224289627e-06, |
| "loss": 0.0769, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.5291173794358508, |
| "grad_norm": 0.8275931946782299, |
| "learning_rate": 9.726687333049024e-06, |
| "loss": 0.0889, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.5295723384895359, |
| "grad_norm": 1.057751919756087, |
| "learning_rate": 9.726221055564874e-06, |
| "loss": 0.0851, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.5300272975432211, |
| "grad_norm": 0.7884543920060787, |
| "learning_rate": 9.725754391875287e-06, |
| "loss": 0.0746, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5304822565969063, |
| "grad_norm": 0.8593529313000522, |
| "learning_rate": 9.72528734201839e-06, |
| "loss": 0.0828, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.5309372156505915, |
| "grad_norm": 0.5225417485901063, |
| "learning_rate": 9.72481990603235e-06, |
| "loss": 0.0794, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.5313921747042766, |
| "grad_norm": 0.8820660720540598, |
| "learning_rate": 9.724352083955366e-06, |
| "loss": 0.1059, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.5318471337579618, |
| "grad_norm": 0.6775105748188827, |
| "learning_rate": 9.723883875825664e-06, |
| "loss": 0.079, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.532302092811647, |
| "grad_norm": 0.5969175177573056, |
| "learning_rate": 9.723415281681505e-06, |
| "loss": 0.061, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5327570518653321, |
| "grad_norm": 0.7165111743049339, |
| "learning_rate": 9.722946301561179e-06, |
| "loss": 0.0824, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.5332120109190173, |
| "grad_norm": 0.7771351455478163, |
| "learning_rate": 9.722476935503011e-06, |
| "loss": 0.0936, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.5336669699727025, |
| "grad_norm": 0.5612071801020553, |
| "learning_rate": 9.722007183545353e-06, |
| "loss": 0.0584, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.5341219290263877, |
| "grad_norm": 0.7630759308283642, |
| "learning_rate": 9.721537045726594e-06, |
| "loss": 0.0711, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.5345768880800728, |
| "grad_norm": 0.7415951616336062, |
| "learning_rate": 9.721066522085148e-06, |
| "loss": 0.0786, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.535031847133758, |
| "grad_norm": 0.6697058559185771, |
| "learning_rate": 9.720595612659467e-06, |
| "loss": 0.0943, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.5354868061874432, |
| "grad_norm": 0.8294561042543531, |
| "learning_rate": 9.720124317488031e-06, |
| "loss": 0.0766, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.5359417652411284, |
| "grad_norm": 0.8069252663248169, |
| "learning_rate": 9.719652636609351e-06, |
| "loss": 0.1036, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.5363967242948134, |
| "grad_norm": 0.5216393236723873, |
| "learning_rate": 9.719180570061973e-06, |
| "loss": 0.0681, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.5368516833484986, |
| "grad_norm": 0.7561882785891234, |
| "learning_rate": 9.718708117884468e-06, |
| "loss": 0.0888, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5373066424021838, |
| "grad_norm": 0.7101886443887773, |
| "learning_rate": 9.718235280115446e-06, |
| "loss": 0.0841, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.5377616014558689, |
| "grad_norm": 0.93883085852681, |
| "learning_rate": 9.717762056793545e-06, |
| "loss": 0.1116, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.5382165605095541, |
| "grad_norm": 0.8029318164759022, |
| "learning_rate": 9.717288447957433e-06, |
| "loss": 0.0817, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.5386715195632393, |
| "grad_norm": 0.7189629467174897, |
| "learning_rate": 9.716814453645811e-06, |
| "loss": 0.0913, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.5391264786169245, |
| "grad_norm": 0.6194922793353296, |
| "learning_rate": 9.716340073897414e-06, |
| "loss": 0.073, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5395814376706096, |
| "grad_norm": 0.5862599296496694, |
| "learning_rate": 9.715865308751006e-06, |
| "loss": 0.0599, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.5400363967242948, |
| "grad_norm": 1.0638863826866105, |
| "learning_rate": 9.715390158245381e-06, |
| "loss": 0.1412, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.54049135577798, |
| "grad_norm": 0.6031416289368001, |
| "learning_rate": 9.714914622419367e-06, |
| "loss": 0.0694, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.5409463148316651, |
| "grad_norm": 0.5762096954254395, |
| "learning_rate": 9.714438701311822e-06, |
| "loss": 0.0627, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.5414012738853503, |
| "grad_norm": 0.6077021479661606, |
| "learning_rate": 9.713962394961636e-06, |
| "loss": 0.067, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5418562329390355, |
| "grad_norm": 0.5381873559759192, |
| "learning_rate": 9.713485703407732e-06, |
| "loss": 0.0595, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.5423111919927207, |
| "grad_norm": 0.7866618609648011, |
| "learning_rate": 9.713008626689063e-06, |
| "loss": 0.1064, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.5427661510464058, |
| "grad_norm": 0.7100862231154079, |
| "learning_rate": 9.712531164844611e-06, |
| "loss": 0.07, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.543221110100091, |
| "grad_norm": 0.5579932774059501, |
| "learning_rate": 9.712053317913394e-06, |
| "loss": 0.0525, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.5436760691537762, |
| "grad_norm": 0.5454543895601387, |
| "learning_rate": 9.711575085934459e-06, |
| "loss": 0.0741, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5441310282074613, |
| "grad_norm": 0.6754854519258514, |
| "learning_rate": 9.711096468946888e-06, |
| "loss": 0.101, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.5445859872611465, |
| "grad_norm": 0.8125002765504534, |
| "learning_rate": 9.710617466989787e-06, |
| "loss": 0.0937, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.5450409463148317, |
| "grad_norm": 0.5893498973936582, |
| "learning_rate": 9.710138080102298e-06, |
| "loss": 0.0658, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.5454959053685169, |
| "grad_norm": 0.8107633297228217, |
| "learning_rate": 9.709658308323597e-06, |
| "loss": 0.0955, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.545950864422202, |
| "grad_norm": 0.6726060122769176, |
| "learning_rate": 9.70917815169289e-06, |
| "loss": 0.084, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5464058234758872, |
| "grad_norm": 0.6077011277694447, |
| "learning_rate": 9.708697610249407e-06, |
| "loss": 0.0756, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.5468607825295724, |
| "grad_norm": 0.7073007110523803, |
| "learning_rate": 9.70821668403242e-06, |
| "loss": 0.0818, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.5473157415832575, |
| "grad_norm": 0.9420816064988972, |
| "learning_rate": 9.707735373081231e-06, |
| "loss": 0.1197, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.5477707006369427, |
| "grad_norm": 0.552138579735494, |
| "learning_rate": 9.707253677435165e-06, |
| "loss": 0.0594, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.5482256596906279, |
| "grad_norm": 0.6375758502862188, |
| "learning_rate": 9.706771597133587e-06, |
| "loss": 0.0572, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.5486806187443131, |
| "grad_norm": 0.6581691945271008, |
| "learning_rate": 9.706289132215889e-06, |
| "loss": 0.0707, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.5491355777979982, |
| "grad_norm": 0.820106985355047, |
| "learning_rate": 9.705806282721498e-06, |
| "loss": 0.0865, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.5495905368516834, |
| "grad_norm": 0.5258555939105785, |
| "learning_rate": 9.705323048689866e-06, |
| "loss": 0.0462, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.5500454959053686, |
| "grad_norm": 0.7818892498713288, |
| "learning_rate": 9.704839430160487e-06, |
| "loss": 0.1005, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.5505004549590536, |
| "grad_norm": 0.6371281646305975, |
| "learning_rate": 9.704355427172874e-06, |
| "loss": 0.0712, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5509554140127388, |
| "grad_norm": 0.5981165031558572, |
| "learning_rate": 9.70387103976658e-06, |
| "loss": 0.0669, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.551410373066424, |
| "grad_norm": 0.640233382171881, |
| "learning_rate": 9.703386267981188e-06, |
| "loss": 0.0629, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.5518653321201092, |
| "grad_norm": 0.5436666812285462, |
| "learning_rate": 9.70290111185631e-06, |
| "loss": 0.0527, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.5523202911737943, |
| "grad_norm": 0.9264418893677014, |
| "learning_rate": 9.702415571431594e-06, |
| "loss": 0.1392, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.5527752502274795, |
| "grad_norm": 0.6659444469982292, |
| "learning_rate": 9.70192964674671e-06, |
| "loss": 0.0948, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5532302092811647, |
| "grad_norm": 0.5526163080676849, |
| "learning_rate": 9.70144333784137e-06, |
| "loss": 0.0661, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.5536851683348498, |
| "grad_norm": 0.7994476768514381, |
| "learning_rate": 9.700956644755313e-06, |
| "loss": 0.0966, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.554140127388535, |
| "grad_norm": 0.7919884013199107, |
| "learning_rate": 9.700469567528307e-06, |
| "loss": 0.1082, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.5545950864422202, |
| "grad_norm": 0.7366932972024113, |
| "learning_rate": 9.699982106200155e-06, |
| "loss": 0.0841, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.5550500454959054, |
| "grad_norm": 0.8558659635343526, |
| "learning_rate": 9.699494260810692e-06, |
| "loss": 0.0866, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5555050045495905, |
| "grad_norm": 0.8060928626360002, |
| "learning_rate": 9.699006031399779e-06, |
| "loss": 0.0777, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.5559599636032757, |
| "grad_norm": 0.6914626835020681, |
| "learning_rate": 9.698517418007314e-06, |
| "loss": 0.0775, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.5564149226569609, |
| "grad_norm": 0.8706739684427142, |
| "learning_rate": 9.698028420673224e-06, |
| "loss": 0.0984, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.556869881710646, |
| "grad_norm": 0.7863016327992207, |
| "learning_rate": 9.697539039437468e-06, |
| "loss": 0.1118, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.5573248407643312, |
| "grad_norm": 0.7719453440565228, |
| "learning_rate": 9.697049274340036e-06, |
| "loss": 0.0824, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.5577797998180164, |
| "grad_norm": 1.1509899845731206, |
| "learning_rate": 9.696559125420949e-06, |
| "loss": 0.1254, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.5582347588717016, |
| "grad_norm": 0.5202193771917482, |
| "learning_rate": 9.696068592720257e-06, |
| "loss": 0.0538, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.5586897179253867, |
| "grad_norm": 0.5880633286090164, |
| "learning_rate": 9.69557767627805e-06, |
| "loss": 0.0711, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.5591446769790719, |
| "grad_norm": 0.6342846572654288, |
| "learning_rate": 9.695086376134438e-06, |
| "loss": 0.0671, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.5595996360327571, |
| "grad_norm": 0.7541651906429654, |
| "learning_rate": 9.694594692329571e-06, |
| "loss": 0.0813, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5600545950864422, |
| "grad_norm": 0.6416731945433944, |
| "learning_rate": 9.694102624903627e-06, |
| "loss": 0.0733, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.5605095541401274, |
| "grad_norm": 1.0012992796464886, |
| "learning_rate": 9.693610173896815e-06, |
| "loss": 0.096, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.5609645131938126, |
| "grad_norm": 0.725396699259508, |
| "learning_rate": 9.693117339349376e-06, |
| "loss": 0.0665, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.5614194722474978, |
| "grad_norm": 0.7481457641805567, |
| "learning_rate": 9.692624121301581e-06, |
| "loss": 0.0715, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.5618744313011829, |
| "grad_norm": 0.969766282604155, |
| "learning_rate": 9.692130519793734e-06, |
| "loss": 0.0991, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.5623293903548681, |
| "grad_norm": 0.8522169509206354, |
| "learning_rate": 9.691636534866172e-06, |
| "loss": 0.1025, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.5627843494085533, |
| "grad_norm": 0.7682304561659135, |
| "learning_rate": 9.691142166559259e-06, |
| "loss": 0.0846, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.5632393084622384, |
| "grad_norm": 0.5495617218791536, |
| "learning_rate": 9.690647414913392e-06, |
| "loss": 0.0766, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.5636942675159236, |
| "grad_norm": 0.6826816911759014, |
| "learning_rate": 9.690152279969003e-06, |
| "loss": 0.0729, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.5641492265696088, |
| "grad_norm": 0.8352406959674302, |
| "learning_rate": 9.689656761766548e-06, |
| "loss": 0.0896, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.564604185623294, |
| "grad_norm": 0.5908696548320724, |
| "learning_rate": 9.689160860346522e-06, |
| "loss": 0.0753, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.565059144676979, |
| "grad_norm": 0.4283914528398344, |
| "learning_rate": 9.688664575749447e-06, |
| "loss": 0.0414, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.5655141037306642, |
| "grad_norm": 0.6584468440229382, |
| "learning_rate": 9.688167908015877e-06, |
| "loss": 0.0733, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.5659690627843494, |
| "grad_norm": 0.9211218848648471, |
| "learning_rate": 9.687670857186396e-06, |
| "loss": 0.1171, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.5664240218380345, |
| "grad_norm": 0.9250852893692096, |
| "learning_rate": 9.68717342330162e-06, |
| "loss": 0.1061, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.5668789808917197, |
| "grad_norm": 0.8688266055790496, |
| "learning_rate": 9.686675606402203e-06, |
| "loss": 0.1213, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.5673339399454049, |
| "grad_norm": 0.7110325678190088, |
| "learning_rate": 9.686177406528819e-06, |
| "loss": 0.0836, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.5677888989990901, |
| "grad_norm": 0.8260984800022192, |
| "learning_rate": 9.685678823722178e-06, |
| "loss": 0.0907, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.5682438580527752, |
| "grad_norm": 0.6625042460625208, |
| "learning_rate": 9.685179858023026e-06, |
| "loss": 0.0777, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.5686988171064604, |
| "grad_norm": 0.711324638729454, |
| "learning_rate": 9.684680509472133e-06, |
| "loss": 0.0815, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5691537761601456, |
| "grad_norm": 0.6863010294874783, |
| "learning_rate": 9.684180778110306e-06, |
| "loss": 0.0642, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.5696087352138307, |
| "grad_norm": 0.5978880624303593, |
| "learning_rate": 9.683680663978377e-06, |
| "loss": 0.065, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.5700636942675159, |
| "grad_norm": 0.6322068932784428, |
| "learning_rate": 9.683180167117216e-06, |
| "loss": 0.0681, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.5705186533212011, |
| "grad_norm": 0.7826720403434554, |
| "learning_rate": 9.682679287567722e-06, |
| "loss": 0.0881, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.5709736123748863, |
| "grad_norm": 0.794807695787425, |
| "learning_rate": 9.682178025370824e-06, |
| "loss": 0.1118, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.7050268620804678, |
| "learning_rate": 9.681676380567482e-06, |
| "loss": 0.0839, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.5718835304822566, |
| "grad_norm": 0.5581694578677082, |
| "learning_rate": 9.681174353198687e-06, |
| "loss": 0.0482, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.5723384895359418, |
| "grad_norm": 0.6766600070725707, |
| "learning_rate": 9.680671943305465e-06, |
| "loss": 0.0679, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.5727934485896269, |
| "grad_norm": 0.6995276308642288, |
| "learning_rate": 9.680169150928868e-06, |
| "loss": 0.0823, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.5732484076433121, |
| "grad_norm": 0.6008334474427011, |
| "learning_rate": 9.679665976109985e-06, |
| "loss": 0.0669, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5737033666969973, |
| "grad_norm": 0.6951316344905618, |
| "learning_rate": 9.679162418889932e-06, |
| "loss": 0.0644, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.5741583257506825, |
| "grad_norm": 0.7661270676130627, |
| "learning_rate": 9.678658479309854e-06, |
| "loss": 0.0837, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.5746132848043676, |
| "grad_norm": 0.7593531327031607, |
| "learning_rate": 9.678154157410937e-06, |
| "loss": 0.0646, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.5750682438580528, |
| "grad_norm": 0.7824619403016152, |
| "learning_rate": 9.677649453234388e-06, |
| "loss": 0.0907, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.575523202911738, |
| "grad_norm": 0.8187746029529864, |
| "learning_rate": 9.67714436682145e-06, |
| "loss": 0.0906, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.5759781619654231, |
| "grad_norm": 0.7676559233650921, |
| "learning_rate": 9.676638898213394e-06, |
| "loss": 0.0839, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.5764331210191083, |
| "grad_norm": 0.5944493207466681, |
| "learning_rate": 9.676133047451528e-06, |
| "loss": 0.0588, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.5768880800727935, |
| "grad_norm": 0.6734586229257056, |
| "learning_rate": 9.675626814577188e-06, |
| "loss": 0.0804, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.5773430391264787, |
| "grad_norm": 0.6315388478681175, |
| "learning_rate": 9.675120199631738e-06, |
| "loss": 0.0636, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.5777979981801638, |
| "grad_norm": 0.7252277920198784, |
| "learning_rate": 9.674613202656577e-06, |
| "loss": 0.0842, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.578252957233849, |
| "grad_norm": 0.58556718084403, |
| "learning_rate": 9.674105823693139e-06, |
| "loss": 0.0764, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.5787079162875342, |
| "grad_norm": 0.7635901125586164, |
| "learning_rate": 9.673598062782878e-06, |
| "loss": 0.0907, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.5791628753412192, |
| "grad_norm": 0.33852379656119563, |
| "learning_rate": 9.67308991996729e-06, |
| "loss": 0.0387, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.5796178343949044, |
| "grad_norm": 0.8984557509320932, |
| "learning_rate": 9.672581395287897e-06, |
| "loss": 0.0969, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.5800727934485896, |
| "grad_norm": 0.881696210059407, |
| "learning_rate": 9.672072488786254e-06, |
| "loss": 0.115, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.5805277525022748, |
| "grad_norm": 0.805394208652388, |
| "learning_rate": 9.671563200503947e-06, |
| "loss": 0.0916, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.5809827115559599, |
| "grad_norm": 0.5947193670178038, |
| "learning_rate": 9.67105353048259e-06, |
| "loss": 0.0645, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.5814376706096451, |
| "grad_norm": 0.9345719582841384, |
| "learning_rate": 9.670543478763834e-06, |
| "loss": 0.0853, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.5818926296633303, |
| "grad_norm": 0.46822310121822047, |
| "learning_rate": 9.670033045389356e-06, |
| "loss": 0.06, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.5823475887170154, |
| "grad_norm": 0.882335352298928, |
| "learning_rate": 9.669522230400868e-06, |
| "loss": 0.1288, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5828025477707006, |
| "grad_norm": 0.7155876804587362, |
| "learning_rate": 9.66901103384011e-06, |
| "loss": 0.0923, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.5832575068243858, |
| "grad_norm": 0.758339057709363, |
| "learning_rate": 9.668499455748857e-06, |
| "loss": 0.0866, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.583712465878071, |
| "grad_norm": 0.5929990208040478, |
| "learning_rate": 9.66798749616891e-06, |
| "loss": 0.0571, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.5841674249317561, |
| "grad_norm": 0.5486564328594907, |
| "learning_rate": 9.667475155142104e-06, |
| "loss": 0.0551, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.5846223839854413, |
| "grad_norm": 0.6958253493282612, |
| "learning_rate": 9.666962432710307e-06, |
| "loss": 0.0731, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.5850773430391265, |
| "grad_norm": 1.1984701204529857, |
| "learning_rate": 9.666449328915418e-06, |
| "loss": 0.1248, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.5855323020928116, |
| "grad_norm": 1.07466414021835, |
| "learning_rate": 9.66593584379936e-06, |
| "loss": 0.0969, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.5859872611464968, |
| "grad_norm": 0.7365065558485686, |
| "learning_rate": 9.6654219774041e-06, |
| "loss": 0.0768, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.586442220200182, |
| "grad_norm": 0.7278778525375763, |
| "learning_rate": 9.664907729771622e-06, |
| "loss": 0.0931, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.5868971792538672, |
| "grad_norm": 0.6940342908894654, |
| "learning_rate": 9.664393100943951e-06, |
| "loss": 0.0716, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5873521383075523, |
| "grad_norm": 0.7046475563496115, |
| "learning_rate": 9.663878090963142e-06, |
| "loss": 0.0833, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.5878070973612375, |
| "grad_norm": 0.6554863862272154, |
| "learning_rate": 9.663362699871275e-06, |
| "loss": 0.0705, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.5882620564149227, |
| "grad_norm": 0.610296786595235, |
| "learning_rate": 9.66284692771047e-06, |
| "loss": 0.0592, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.5887170154686078, |
| "grad_norm": 0.6866815075031769, |
| "learning_rate": 9.662330774522869e-06, |
| "loss": 0.0748, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.589171974522293, |
| "grad_norm": 0.5654106713312388, |
| "learning_rate": 9.661814240350653e-06, |
| "loss": 0.0546, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.5896269335759782, |
| "grad_norm": 1.271034489401823, |
| "learning_rate": 9.66129732523603e-06, |
| "loss": 0.1473, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.5900818926296634, |
| "grad_norm": 0.45734781465896296, |
| "learning_rate": 9.66078002922124e-06, |
| "loss": 0.0452, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.5905368516833485, |
| "grad_norm": 0.8001910391102482, |
| "learning_rate": 9.660262352348553e-06, |
| "loss": 0.0801, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.5909918107370337, |
| "grad_norm": 0.8095822615697389, |
| "learning_rate": 9.659744294660272e-06, |
| "loss": 0.0851, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.5914467697907189, |
| "grad_norm": 0.6222175915293906, |
| "learning_rate": 9.659225856198732e-06, |
| "loss": 0.0725, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.591901728844404, |
| "grad_norm": 0.5098172411498206, |
| "learning_rate": 9.658707037006294e-06, |
| "loss": 0.0586, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.5923566878980892, |
| "grad_norm": 0.5056342525545805, |
| "learning_rate": 9.658187837125357e-06, |
| "loss": 0.0552, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.5928116469517744, |
| "grad_norm": 0.8298114087640572, |
| "learning_rate": 9.657668256598347e-06, |
| "loss": 0.0976, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.5932666060054596, |
| "grad_norm": 0.9354418819253106, |
| "learning_rate": 9.657148295467719e-06, |
| "loss": 0.1128, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.5937215650591446, |
| "grad_norm": 0.732222390896743, |
| "learning_rate": 9.656627953775964e-06, |
| "loss": 0.0719, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5941765241128298, |
| "grad_norm": 0.817074061431315, |
| "learning_rate": 9.6561072315656e-06, |
| "loss": 0.097, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.594631483166515, |
| "grad_norm": 0.6993010225350191, |
| "learning_rate": 9.655586128879185e-06, |
| "loss": 0.0866, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.5950864422202001, |
| "grad_norm": 0.6036033167422408, |
| "learning_rate": 9.655064645759291e-06, |
| "loss": 0.0615, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.5955414012738853, |
| "grad_norm": 0.4333029170805267, |
| "learning_rate": 9.654542782248539e-06, |
| "loss": 0.0333, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.5959963603275705, |
| "grad_norm": 0.5158856954901245, |
| "learning_rate": 9.65402053838957e-06, |
| "loss": 0.0534, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5964513193812557, |
| "grad_norm": 0.8439407413306237, |
| "learning_rate": 9.653497914225059e-06, |
| "loss": 0.0886, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.5969062784349408, |
| "grad_norm": 1.097335021441692, |
| "learning_rate": 9.652974909797714e-06, |
| "loss": 0.1184, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.597361237488626, |
| "grad_norm": 0.6552117042192046, |
| "learning_rate": 9.652451525150272e-06, |
| "loss": 0.0719, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.5978161965423112, |
| "grad_norm": 0.6353863518066384, |
| "learning_rate": 9.651927760325504e-06, |
| "loss": 0.0696, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.5982711555959963, |
| "grad_norm": 0.9048456403488727, |
| "learning_rate": 9.651403615366204e-06, |
| "loss": 0.0859, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5987261146496815, |
| "grad_norm": 0.7176841695337582, |
| "learning_rate": 9.650879090315207e-06, |
| "loss": 0.0821, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.5991810737033667, |
| "grad_norm": 0.696539124420045, |
| "learning_rate": 9.650354185215374e-06, |
| "loss": 0.0875, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.5996360327570519, |
| "grad_norm": 0.5924500205612657, |
| "learning_rate": 9.649828900109599e-06, |
| "loss": 0.0646, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.600090991810737, |
| "grad_norm": 0.5430407542910594, |
| "learning_rate": 9.649303235040803e-06, |
| "loss": 0.0486, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.6005459508644222, |
| "grad_norm": 0.6459813862779727, |
| "learning_rate": 9.648777190051944e-06, |
| "loss": 0.0903, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6010009099181074, |
| "grad_norm": 0.6531397749427512, |
| "learning_rate": 9.648250765186006e-06, |
| "loss": 0.0638, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6014558689717925, |
| "grad_norm": 0.6616813941465042, |
| "learning_rate": 9.647723960486006e-06, |
| "loss": 0.0861, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.6019108280254777, |
| "grad_norm": 0.8426003399558685, |
| "learning_rate": 9.647196775994995e-06, |
| "loss": 0.0928, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6023657870791629, |
| "grad_norm": 0.6908471872127779, |
| "learning_rate": 9.646669211756049e-06, |
| "loss": 0.064, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.6028207461328481, |
| "grad_norm": 0.6969433310817453, |
| "learning_rate": 9.64614126781228e-06, |
| "loss": 0.0683, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6032757051865332, |
| "grad_norm": 0.7506047981065134, |
| "learning_rate": 9.645612944206826e-06, |
| "loss": 0.0849, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6037306642402184, |
| "grad_norm": 0.5624997977779479, |
| "learning_rate": 9.645084240982862e-06, |
| "loss": 0.064, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6041856232939036, |
| "grad_norm": 0.43671100502349636, |
| "learning_rate": 9.644555158183592e-06, |
| "loss": 0.0615, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.6046405823475887, |
| "grad_norm": 0.553762280713577, |
| "learning_rate": 9.64402569585225e-06, |
| "loss": 0.0596, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6050955414012739, |
| "grad_norm": 0.6580653378362663, |
| "learning_rate": 9.643495854032099e-06, |
| "loss": 0.0558, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6055505004549591, |
| "grad_norm": 0.7656128172437318, |
| "learning_rate": 9.642965632766437e-06, |
| "loss": 0.0915, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.6060054595086443, |
| "grad_norm": 0.49008300515141723, |
| "learning_rate": 9.642435032098591e-06, |
| "loss": 0.0553, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.6064604185623294, |
| "grad_norm": 0.6058179105933948, |
| "learning_rate": 9.64190405207192e-06, |
| "loss": 0.0709, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.6069153776160146, |
| "grad_norm": 0.6707142568108124, |
| "learning_rate": 9.641372692729811e-06, |
| "loss": 0.0715, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.6073703366696998, |
| "grad_norm": 0.8710319334113071, |
| "learning_rate": 9.640840954115686e-06, |
| "loss": 0.091, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.607825295723385, |
| "grad_norm": 0.7496993600003082, |
| "learning_rate": 9.640308836272996e-06, |
| "loss": 0.0932, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.60828025477707, |
| "grad_norm": 0.9684583450547241, |
| "learning_rate": 9.639776339245225e-06, |
| "loss": 0.087, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.6087352138307552, |
| "grad_norm": 0.7857186962980957, |
| "learning_rate": 9.639243463075884e-06, |
| "loss": 0.1084, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.6091901728844404, |
| "grad_norm": 1.1677743182021476, |
| "learning_rate": 9.638710207808518e-06, |
| "loss": 0.0712, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.6096451319381255, |
| "grad_norm": 0.725604064535932, |
| "learning_rate": 9.6381765734867e-06, |
| "loss": 0.077, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6101000909918107, |
| "grad_norm": 0.5923782964843433, |
| "learning_rate": 9.63764256015404e-06, |
| "loss": 0.0641, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.6105550500454959, |
| "grad_norm": 0.7069177546563966, |
| "learning_rate": 9.637108167854173e-06, |
| "loss": 0.0747, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.6110100090991811, |
| "grad_norm": 0.780384533965345, |
| "learning_rate": 9.636573396630767e-06, |
| "loss": 0.0709, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.6114649681528662, |
| "grad_norm": 0.7305821703239879, |
| "learning_rate": 9.636038246527523e-06, |
| "loss": 0.0955, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.6119199272065514, |
| "grad_norm": 0.6274215993935015, |
| "learning_rate": 9.635502717588168e-06, |
| "loss": 0.0656, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6123748862602366, |
| "grad_norm": 0.6018866737558257, |
| "learning_rate": 9.634966809856465e-06, |
| "loss": 0.0729, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.6128298453139217, |
| "grad_norm": 0.9406786913650838, |
| "learning_rate": 9.634430523376207e-06, |
| "loss": 0.1105, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.6132848043676069, |
| "grad_norm": 0.6910930219074588, |
| "learning_rate": 9.633893858191214e-06, |
| "loss": 0.0652, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.6137397634212921, |
| "grad_norm": 0.6641071332456526, |
| "learning_rate": 9.633356814345342e-06, |
| "loss": 0.0896, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.6141947224749773, |
| "grad_norm": 0.6463461735454817, |
| "learning_rate": 9.632819391882475e-06, |
| "loss": 0.0691, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6146496815286624, |
| "grad_norm": 0.6570738741447356, |
| "learning_rate": 9.63228159084653e-06, |
| "loss": 0.0726, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.6151046405823476, |
| "grad_norm": 0.9251372605740943, |
| "learning_rate": 9.631743411281451e-06, |
| "loss": 0.1089, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.6155595996360328, |
| "grad_norm": 1.0354136522724409, |
| "learning_rate": 9.631204853231219e-06, |
| "loss": 0.1065, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.6160145586897179, |
| "grad_norm": 0.7577345531084587, |
| "learning_rate": 9.630665916739839e-06, |
| "loss": 0.083, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.6164695177434031, |
| "grad_norm": 0.6775679844485006, |
| "learning_rate": 9.630126601851353e-06, |
| "loss": 0.065, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.6169244767970883, |
| "grad_norm": 0.6510409015870585, |
| "learning_rate": 9.62958690860983e-06, |
| "loss": 0.0842, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.6173794358507735, |
| "grad_norm": 0.6541401291987898, |
| "learning_rate": 9.629046837059373e-06, |
| "loss": 0.0809, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.6178343949044586, |
| "grad_norm": 0.6773644747284383, |
| "learning_rate": 9.628506387244111e-06, |
| "loss": 0.08, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.6182893539581438, |
| "grad_norm": 0.7401243921784199, |
| "learning_rate": 9.627965559208212e-06, |
| "loss": 0.0632, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.618744313011829, |
| "grad_norm": 0.6255731586329286, |
| "learning_rate": 9.627424352995866e-06, |
| "loss": 0.0836, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6191992720655141, |
| "grad_norm": 0.8684189032240879, |
| "learning_rate": 9.626882768651298e-06, |
| "loss": 0.0918, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.6196542311191993, |
| "grad_norm": 0.5565014005760545, |
| "learning_rate": 9.626340806218765e-06, |
| "loss": 0.0508, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.6201091901728845, |
| "grad_norm": 0.580066419485805, |
| "learning_rate": 9.625798465742555e-06, |
| "loss": 0.0691, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.6205641492265697, |
| "grad_norm": 0.5980127746625918, |
| "learning_rate": 9.625255747266984e-06, |
| "loss": 0.0674, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.6210191082802548, |
| "grad_norm": 0.8518146992949526, |
| "learning_rate": 9.6247126508364e-06, |
| "loss": 0.1112, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.62147406733394, |
| "grad_norm": 0.8485700961520207, |
| "learning_rate": 9.624169176495185e-06, |
| "loss": 0.0966, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.6219290263876252, |
| "grad_norm": 0.9962639418238284, |
| "learning_rate": 9.623625324287747e-06, |
| "loss": 0.1047, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.6223839854413102, |
| "grad_norm": 0.7706385402975253, |
| "learning_rate": 9.623081094258527e-06, |
| "loss": 0.1229, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.6228389444949954, |
| "grad_norm": 0.9185957443221413, |
| "learning_rate": 9.622536486451997e-06, |
| "loss": 0.0981, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.6232939035486806, |
| "grad_norm": 0.5737112203779396, |
| "learning_rate": 9.621991500912662e-06, |
| "loss": 0.0615, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6237488626023658, |
| "grad_norm": 0.8225187377418599, |
| "learning_rate": 9.621446137685051e-06, |
| "loss": 0.1032, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.6242038216560509, |
| "grad_norm": 0.911993563924521, |
| "learning_rate": 9.620900396813734e-06, |
| "loss": 0.1052, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.6246587807097361, |
| "grad_norm": 1.1969877300226637, |
| "learning_rate": 9.620354278343306e-06, |
| "loss": 0.1323, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.6251137397634213, |
| "grad_norm": 0.49674299728731663, |
| "learning_rate": 9.61980778231839e-06, |
| "loss": 0.0469, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.6255686988171064, |
| "grad_norm": 0.9419790098064809, |
| "learning_rate": 9.619260908783645e-06, |
| "loss": 0.0829, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.6260236578707916, |
| "grad_norm": 0.8648992102518269, |
| "learning_rate": 9.61871365778376e-06, |
| "loss": 0.1227, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.6264786169244768, |
| "grad_norm": 0.6855921150752273, |
| "learning_rate": 9.618166029363452e-06, |
| "loss": 0.0893, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.626933575978162, |
| "grad_norm": 0.7460350385490577, |
| "learning_rate": 9.61761802356747e-06, |
| "loss": 0.1029, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.6273885350318471, |
| "grad_norm": 0.6238948896650269, |
| "learning_rate": 9.617069640440598e-06, |
| "loss": 0.0671, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.6278434940855323, |
| "grad_norm": 0.8484782740935036, |
| "learning_rate": 9.616520880027645e-06, |
| "loss": 0.1094, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6282984531392175, |
| "grad_norm": 0.4929008515621752, |
| "learning_rate": 9.615971742373453e-06, |
| "loss": 0.0621, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.6287534121929026, |
| "grad_norm": 0.8230508842215047, |
| "learning_rate": 9.615422227522897e-06, |
| "loss": 0.0873, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.6292083712465878, |
| "grad_norm": 0.8269677617343545, |
| "learning_rate": 9.614872335520879e-06, |
| "loss": 0.0996, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.629663330300273, |
| "grad_norm": 0.7039938726965704, |
| "learning_rate": 9.614322066412335e-06, |
| "loss": 0.084, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.6301182893539582, |
| "grad_norm": 0.7376546247757936, |
| "learning_rate": 9.613771420242229e-06, |
| "loss": 0.0857, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6305732484076433, |
| "grad_norm": 0.6736142636267153, |
| "learning_rate": 9.613220397055558e-06, |
| "loss": 0.0732, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.6310282074613285, |
| "grad_norm": 0.7476942520500481, |
| "learning_rate": 9.612668996897351e-06, |
| "loss": 0.0713, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.6314831665150137, |
| "grad_norm": 0.7359465201312233, |
| "learning_rate": 9.612117219812662e-06, |
| "loss": 0.0847, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.6319381255686988, |
| "grad_norm": 0.9663363466846744, |
| "learning_rate": 9.611565065846583e-06, |
| "loss": 0.1015, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.632393084622384, |
| "grad_norm": 0.7893446645403931, |
| "learning_rate": 9.611012535044232e-06, |
| "loss": 0.0983, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6328480436760692, |
| "grad_norm": 1.024989133088754, |
| "learning_rate": 9.61045962745076e-06, |
| "loss": 0.1102, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.6333030027297544, |
| "grad_norm": 0.4979683651622851, |
| "learning_rate": 9.609906343111348e-06, |
| "loss": 0.0586, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.6337579617834395, |
| "grad_norm": 1.1009002383858189, |
| "learning_rate": 9.609352682071209e-06, |
| "loss": 0.0963, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.6342129208371247, |
| "grad_norm": 1.0522149389130615, |
| "learning_rate": 9.608798644375583e-06, |
| "loss": 0.1189, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.6346678798908099, |
| "grad_norm": 0.9812979427333788, |
| "learning_rate": 9.608244230069745e-06, |
| "loss": 0.1216, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.635122838944495, |
| "grad_norm": 0.7352050689297358, |
| "learning_rate": 9.607689439199e-06, |
| "loss": 0.0875, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.6355777979981801, |
| "grad_norm": 0.8346962373874338, |
| "learning_rate": 9.60713427180868e-06, |
| "loss": 0.0872, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.6360327570518653, |
| "grad_norm": 0.9100484302304894, |
| "learning_rate": 9.606578727944156e-06, |
| "loss": 0.1014, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.6364877161055505, |
| "grad_norm": 0.6397054531308819, |
| "learning_rate": 9.606022807650819e-06, |
| "loss": 0.0661, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.6369426751592356, |
| "grad_norm": 0.7013671405977515, |
| "learning_rate": 9.6054665109741e-06, |
| "loss": 0.0788, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6373976342129208, |
| "grad_norm": 0.7177935827049716, |
| "learning_rate": 9.604909837959456e-06, |
| "loss": 0.0739, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.637852593266606, |
| "grad_norm": 1.0034339624615456, |
| "learning_rate": 9.604352788652375e-06, |
| "loss": 0.125, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.6383075523202911, |
| "grad_norm": 0.7908500695821505, |
| "learning_rate": 9.603795363098377e-06, |
| "loss": 0.0626, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.6387625113739763, |
| "grad_norm": 0.7396845097003291, |
| "learning_rate": 9.603237561343013e-06, |
| "loss": 0.0845, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.6392174704276615, |
| "grad_norm": 0.6132031146325181, |
| "learning_rate": 9.602679383431864e-06, |
| "loss": 0.0832, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6396724294813467, |
| "grad_norm": 0.5848815265706712, |
| "learning_rate": 9.602120829410539e-06, |
| "loss": 0.0609, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.6401273885350318, |
| "grad_norm": 1.1396916096380878, |
| "learning_rate": 9.601561899324685e-06, |
| "loss": 0.089, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.640582347588717, |
| "grad_norm": 0.6243784477376835, |
| "learning_rate": 9.601002593219972e-06, |
| "loss": 0.0629, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.6410373066424022, |
| "grad_norm": 0.7693306930944409, |
| "learning_rate": 9.600442911142107e-06, |
| "loss": 0.0975, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.6414922656960873, |
| "grad_norm": 0.5824222441008058, |
| "learning_rate": 9.599882853136821e-06, |
| "loss": 0.0668, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6419472247497725, |
| "grad_norm": 0.7486427214965261, |
| "learning_rate": 9.59932241924988e-06, |
| "loss": 0.0885, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.6424021838034577, |
| "grad_norm": 0.7403442425812181, |
| "learning_rate": 9.598761609527084e-06, |
| "loss": 0.0764, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.8444168000337251, |
| "learning_rate": 9.598200424014255e-06, |
| "loss": 0.0901, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.643312101910828, |
| "grad_norm": 0.6214870203253012, |
| "learning_rate": 9.597638862757255e-06, |
| "loss": 0.0641, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.6437670609645132, |
| "grad_norm": 0.45639812216740483, |
| "learning_rate": 9.597076925801967e-06, |
| "loss": 0.0525, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.6442220200181984, |
| "grad_norm": 0.5879645013041995, |
| "learning_rate": 9.596514613194313e-06, |
| "loss": 0.0664, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.6446769790718835, |
| "grad_norm": 0.723485890557837, |
| "learning_rate": 9.595951924980245e-06, |
| "loss": 0.0878, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.6451319381255687, |
| "grad_norm": 0.49190939142236517, |
| "learning_rate": 9.595388861205738e-06, |
| "loss": 0.0446, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.6455868971792539, |
| "grad_norm": 0.8244975390610266, |
| "learning_rate": 9.59482542191681e-06, |
| "loss": 0.0927, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.6460418562329391, |
| "grad_norm": 0.8365340393723969, |
| "learning_rate": 9.594261607159494e-06, |
| "loss": 0.0944, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6464968152866242, |
| "grad_norm": 0.9246231982112141, |
| "learning_rate": 9.59369741697987e-06, |
| "loss": 0.1132, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.6469517743403094, |
| "grad_norm": 0.7576903487594321, |
| "learning_rate": 9.593132851424036e-06, |
| "loss": 0.0968, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.6474067333939946, |
| "grad_norm": 0.7385455319846311, |
| "learning_rate": 9.59256791053813e-06, |
| "loss": 0.1045, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.6478616924476797, |
| "grad_norm": 0.8466333605064674, |
| "learning_rate": 9.592002594368312e-06, |
| "loss": 0.1058, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.6483166515013649, |
| "grad_norm": 0.9463191649116842, |
| "learning_rate": 9.59143690296078e-06, |
| "loss": 0.1179, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6487716105550501, |
| "grad_norm": 0.49506567565602905, |
| "learning_rate": 9.590870836361758e-06, |
| "loss": 0.0679, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.6492265696087353, |
| "grad_norm": 0.9070193484568203, |
| "learning_rate": 9.590304394617506e-06, |
| "loss": 0.0889, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.6496815286624203, |
| "grad_norm": 0.4746970963167155, |
| "learning_rate": 9.589737577774308e-06, |
| "loss": 0.0474, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.6501364877161055, |
| "grad_norm": 0.7625565873276676, |
| "learning_rate": 9.58917038587848e-06, |
| "loss": 0.1052, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.6505914467697907, |
| "grad_norm": 0.5544350713091404, |
| "learning_rate": 9.588602818976374e-06, |
| "loss": 0.0602, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6510464058234758, |
| "grad_norm": 0.8043877114109435, |
| "learning_rate": 9.588034877114367e-06, |
| "loss": 0.0714, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.651501364877161, |
| "grad_norm": 0.6177719048805246, |
| "learning_rate": 9.58746656033887e-06, |
| "loss": 0.0822, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.6519563239308462, |
| "grad_norm": 1.070732220715245, |
| "learning_rate": 9.586897868696323e-06, |
| "loss": 0.1203, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.6524112829845314, |
| "grad_norm": 1.183590915899486, |
| "learning_rate": 9.586328802233195e-06, |
| "loss": 0.0935, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.6528662420382165, |
| "grad_norm": 0.581772493938091, |
| "learning_rate": 9.58575936099599e-06, |
| "loss": 0.0682, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.6533212010919017, |
| "grad_norm": 0.7377901301818582, |
| "learning_rate": 9.58518954503124e-06, |
| "loss": 0.0824, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.6537761601455869, |
| "grad_norm": 0.9292214040800371, |
| "learning_rate": 9.584619354385505e-06, |
| "loss": 0.1138, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.654231119199272, |
| "grad_norm": 0.7573270642921373, |
| "learning_rate": 9.58404878910538e-06, |
| "loss": 0.074, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.6546860782529572, |
| "grad_norm": 0.5838864743945036, |
| "learning_rate": 9.58347784923749e-06, |
| "loss": 0.067, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.6551410373066424, |
| "grad_norm": 0.6730458126896756, |
| "learning_rate": 9.58290653482849e-06, |
| "loss": 0.0632, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6555959963603276, |
| "grad_norm": 0.7216545389315259, |
| "learning_rate": 9.582334845925063e-06, |
| "loss": 0.0757, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.6560509554140127, |
| "grad_norm": 0.929819001740202, |
| "learning_rate": 9.581762782573926e-06, |
| "loss": 0.0973, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.6565059144676979, |
| "grad_norm": 0.7680577896195074, |
| "learning_rate": 9.581190344821827e-06, |
| "loss": 0.086, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.6569608735213831, |
| "grad_norm": 0.8746535076926352, |
| "learning_rate": 9.58061753271554e-06, |
| "loss": 0.1085, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.6574158325750682, |
| "grad_norm": 0.6364512825611769, |
| "learning_rate": 9.580044346301875e-06, |
| "loss": 0.0764, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.6578707916287534, |
| "grad_norm": 0.47118649986170347, |
| "learning_rate": 9.57947078562767e-06, |
| "loss": 0.0506, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.6583257506824386, |
| "grad_norm": 0.6564703457147261, |
| "learning_rate": 9.578896850739792e-06, |
| "loss": 0.0702, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.6587807097361238, |
| "grad_norm": 0.6786314185300042, |
| "learning_rate": 9.578322541685142e-06, |
| "loss": 0.0778, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.6592356687898089, |
| "grad_norm": 0.7866249519519628, |
| "learning_rate": 9.577747858510647e-06, |
| "loss": 0.1066, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.6596906278434941, |
| "grad_norm": 0.8352652198110325, |
| "learning_rate": 9.577172801263272e-06, |
| "loss": 0.0973, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6601455868971793, |
| "grad_norm": 0.6694090591857538, |
| "learning_rate": 9.576597369990006e-06, |
| "loss": 0.077, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.6606005459508644, |
| "grad_norm": 0.6613042389515336, |
| "learning_rate": 9.576021564737871e-06, |
| "loss": 0.0608, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.6610555050045496, |
| "grad_norm": 0.7515982683897205, |
| "learning_rate": 9.575445385553917e-06, |
| "loss": 0.1003, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.6615104640582348, |
| "grad_norm": 0.9769815693335377, |
| "learning_rate": 9.57486883248523e-06, |
| "loss": 0.0946, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.66196542311192, |
| "grad_norm": 1.1665424395125852, |
| "learning_rate": 9.574291905578922e-06, |
| "loss": 0.1317, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.6624203821656051, |
| "grad_norm": 0.6942177292436024, |
| "learning_rate": 9.573714604882138e-06, |
| "loss": 0.0615, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.6628753412192903, |
| "grad_norm": 0.9194225981756011, |
| "learning_rate": 9.57313693044205e-06, |
| "loss": 0.0975, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.6633303002729755, |
| "grad_norm": 0.7117926275391128, |
| "learning_rate": 9.572558882305863e-06, |
| "loss": 0.0847, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.6637852593266605, |
| "grad_norm": 0.9546376743105418, |
| "learning_rate": 9.571980460520815e-06, |
| "loss": 0.1196, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.6642402183803457, |
| "grad_norm": 0.8937437496424256, |
| "learning_rate": 9.57140166513417e-06, |
| "loss": 0.096, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.664695177434031, |
| "grad_norm": 0.5937947199850856, |
| "learning_rate": 9.570822496193225e-06, |
| "loss": 0.058, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.6651501364877161, |
| "grad_norm": 0.5756039867728808, |
| "learning_rate": 9.570242953745307e-06, |
| "loss": 0.082, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.6656050955414012, |
| "grad_norm": 0.7416722804778516, |
| "learning_rate": 9.569663037837776e-06, |
| "loss": 0.098, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.6660600545950864, |
| "grad_norm": 0.6377485683281849, |
| "learning_rate": 9.569082748518017e-06, |
| "loss": 0.0723, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.6665150136487716, |
| "grad_norm": 0.7884664768500067, |
| "learning_rate": 9.568502085833449e-06, |
| "loss": 0.0884, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.6669699727024567, |
| "grad_norm": 0.7723350087530905, |
| "learning_rate": 9.567921049831522e-06, |
| "loss": 0.0967, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.6674249317561419, |
| "grad_norm": 0.7260885892233983, |
| "learning_rate": 9.567339640559716e-06, |
| "loss": 0.0812, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.6678798908098271, |
| "grad_norm": 0.5596294621225263, |
| "learning_rate": 9.566757858065538e-06, |
| "loss": 0.0631, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.6683348498635123, |
| "grad_norm": 0.7286352648100037, |
| "learning_rate": 9.566175702396534e-06, |
| "loss": 0.0823, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.6687898089171974, |
| "grad_norm": 0.9301493673689373, |
| "learning_rate": 9.565593173600271e-06, |
| "loss": 0.0987, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6692447679708826, |
| "grad_norm": 0.6817718703338496, |
| "learning_rate": 9.565010271724353e-06, |
| "loss": 0.0755, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.6696997270245678, |
| "grad_norm": 0.7526239018301766, |
| "learning_rate": 9.56442699681641e-06, |
| "loss": 0.0876, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.6701546860782529, |
| "grad_norm": 0.7279647211742274, |
| "learning_rate": 9.563843348924105e-06, |
| "loss": 0.0681, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.6706096451319381, |
| "grad_norm": 0.8487044021854026, |
| "learning_rate": 9.563259328095132e-06, |
| "loss": 0.0903, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.6710646041856233, |
| "grad_norm": 0.609495225783116, |
| "learning_rate": 9.562674934377214e-06, |
| "loss": 0.0801, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.6715195632393085, |
| "grad_norm": 0.7638645194963899, |
| "learning_rate": 9.562090167818107e-06, |
| "loss": 0.0874, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.6719745222929936, |
| "grad_norm": 1.4076317151154771, |
| "learning_rate": 9.561505028465593e-06, |
| "loss": 0.0874, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.6724294813466788, |
| "grad_norm": 0.6311161675673277, |
| "learning_rate": 9.560919516367486e-06, |
| "loss": 0.0738, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.672884440400364, |
| "grad_norm": 0.638266808298586, |
| "learning_rate": 9.560333631571634e-06, |
| "loss": 0.0682, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.6733393994540491, |
| "grad_norm": 0.7097356519617585, |
| "learning_rate": 9.559747374125911e-06, |
| "loss": 0.0987, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6737943585077343, |
| "grad_norm": 0.6502346745698145, |
| "learning_rate": 9.559160744078226e-06, |
| "loss": 0.0644, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.6742493175614195, |
| "grad_norm": 1.056681303492363, |
| "learning_rate": 9.558573741476513e-06, |
| "loss": 0.0939, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.6747042766151047, |
| "grad_norm": 0.7992268675141662, |
| "learning_rate": 9.557986366368742e-06, |
| "loss": 0.0733, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.6751592356687898, |
| "grad_norm": 1.0832399406974047, |
| "learning_rate": 9.557398618802907e-06, |
| "loss": 0.1123, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.675614194722475, |
| "grad_norm": 0.6543008513198456, |
| "learning_rate": 9.556810498827039e-06, |
| "loss": 0.0794, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.6760691537761602, |
| "grad_norm": 0.6306597614421026, |
| "learning_rate": 9.556222006489193e-06, |
| "loss": 0.0786, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.6765241128298453, |
| "grad_norm": 0.5618899284499352, |
| "learning_rate": 9.555633141837462e-06, |
| "loss": 0.0618, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.6769790718835305, |
| "grad_norm": 0.6434016854657288, |
| "learning_rate": 9.555043904919963e-06, |
| "loss": 0.0796, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.6774340309372157, |
| "grad_norm": 0.7512094182824542, |
| "learning_rate": 9.554454295784848e-06, |
| "loss": 0.0745, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.6778889899909009, |
| "grad_norm": 0.662429978970196, |
| "learning_rate": 9.553864314480294e-06, |
| "loss": 0.0788, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.678343949044586, |
| "grad_norm": 0.7125824073483379, |
| "learning_rate": 9.553273961054514e-06, |
| "loss": 0.072, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.6787989080982711, |
| "grad_norm": 0.8599367957772613, |
| "learning_rate": 9.552683235555749e-06, |
| "loss": 0.0765, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.6792538671519563, |
| "grad_norm": 0.7900843446637873, |
| "learning_rate": 9.55209213803227e-06, |
| "loss": 0.0861, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.6797088262056415, |
| "grad_norm": 0.9492542185178791, |
| "learning_rate": 9.551500668532377e-06, |
| "loss": 0.1036, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.6801637852593266, |
| "grad_norm": 0.5324340095596853, |
| "learning_rate": 9.550908827104404e-06, |
| "loss": 0.0509, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.6806187443130118, |
| "grad_norm": 1.4654919772375794, |
| "learning_rate": 9.550316613796716e-06, |
| "loss": 0.0891, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.681073703366697, |
| "grad_norm": 0.6964909028346599, |
| "learning_rate": 9.549724028657698e-06, |
| "loss": 0.0814, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.6815286624203821, |
| "grad_norm": 0.7118346157191014, |
| "learning_rate": 9.549131071735784e-06, |
| "loss": 0.0711, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.6819836214740673, |
| "grad_norm": 0.9814989838911676, |
| "learning_rate": 9.54853774307942e-06, |
| "loss": 0.0981, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.6824385805277525, |
| "grad_norm": 0.8030617514029292, |
| "learning_rate": 9.547944042737092e-06, |
| "loss": 0.0944, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6828935395814377, |
| "grad_norm": 0.9091821467413523, |
| "learning_rate": 9.547349970757317e-06, |
| "loss": 0.1419, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.6833484986351228, |
| "grad_norm": 0.7604842345576438, |
| "learning_rate": 9.546755527188638e-06, |
| "loss": 0.0616, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.683803457688808, |
| "grad_norm": 0.7795635296832277, |
| "learning_rate": 9.546160712079629e-06, |
| "loss": 0.0819, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.6842584167424932, |
| "grad_norm": 0.6155010796235886, |
| "learning_rate": 9.545565525478896e-06, |
| "loss": 0.0737, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.6847133757961783, |
| "grad_norm": 0.6981564617213015, |
| "learning_rate": 9.544969967435079e-06, |
| "loss": 0.0786, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.6851683348498635, |
| "grad_norm": 0.8590705218017948, |
| "learning_rate": 9.54437403799684e-06, |
| "loss": 0.0835, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.6856232939035487, |
| "grad_norm": 0.8783591706447448, |
| "learning_rate": 9.543777737212876e-06, |
| "loss": 0.118, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.6860782529572339, |
| "grad_norm": 0.5312480753344904, |
| "learning_rate": 9.543181065131914e-06, |
| "loss": 0.0535, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.686533212010919, |
| "grad_norm": 0.6911478055364548, |
| "learning_rate": 9.542584021802715e-06, |
| "loss": 0.0651, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.6869881710646042, |
| "grad_norm": 0.910176403224045, |
| "learning_rate": 9.54198660727406e-06, |
| "loss": 0.0916, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.6874431301182894, |
| "grad_norm": 0.5369469100452242, |
| "learning_rate": 9.541388821594774e-06, |
| "loss": 0.064, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.6878980891719745, |
| "grad_norm": 0.7242695685667516, |
| "learning_rate": 9.540790664813702e-06, |
| "loss": 0.0725, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.6883530482256597, |
| "grad_norm": 0.7527422721071317, |
| "learning_rate": 9.540192136979722e-06, |
| "loss": 0.0863, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.6888080072793449, |
| "grad_norm": 0.5409793571909967, |
| "learning_rate": 9.539593238141745e-06, |
| "loss": 0.0678, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.6892629663330301, |
| "grad_norm": 0.5059270742296627, |
| "learning_rate": 9.538993968348706e-06, |
| "loss": 0.0613, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.6897179253867152, |
| "grad_norm": 0.8092866682697022, |
| "learning_rate": 9.538394327649581e-06, |
| "loss": 0.0816, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.6901728844404004, |
| "grad_norm": 0.7416822411067572, |
| "learning_rate": 9.537794316093366e-06, |
| "loss": 0.0736, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.6906278434940856, |
| "grad_norm": 0.6013123530792879, |
| "learning_rate": 9.537193933729092e-06, |
| "loss": 0.0637, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.6910828025477707, |
| "grad_norm": 1.0953662823641266, |
| "learning_rate": 9.53659318060582e-06, |
| "loss": 0.1381, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.6915377616014559, |
| "grad_norm": 0.7906081758139587, |
| "learning_rate": 9.535992056772639e-06, |
| "loss": 0.088, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.6919927206551411, |
| "grad_norm": 0.9984370937403453, |
| "learning_rate": 9.535390562278673e-06, |
| "loss": 0.086, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.6924476797088263, |
| "grad_norm": 0.7438661675719108, |
| "learning_rate": 9.53478869717307e-06, |
| "loss": 0.0771, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.6929026387625113, |
| "grad_norm": 0.85189844123529, |
| "learning_rate": 9.534186461505015e-06, |
| "loss": 0.1109, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.6933575978161965, |
| "grad_norm": 0.7215256903381998, |
| "learning_rate": 9.533583855323717e-06, |
| "loss": 0.0947, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.6938125568698817, |
| "grad_norm": 0.8936614524747819, |
| "learning_rate": 9.532980878678422e-06, |
| "loss": 0.0731, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.6942675159235668, |
| "grad_norm": 0.7734700292932609, |
| "learning_rate": 9.5323775316184e-06, |
| "loss": 0.0844, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.694722474977252, |
| "grad_norm": 0.7521845435610183, |
| "learning_rate": 9.531773814192953e-06, |
| "loss": 0.0878, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.6951774340309372, |
| "grad_norm": 0.890089227377408, |
| "learning_rate": 9.531169726451417e-06, |
| "loss": 0.1128, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.6956323930846224, |
| "grad_norm": 0.7682866565773229, |
| "learning_rate": 9.530565268443153e-06, |
| "loss": 0.0956, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.6960873521383075, |
| "grad_norm": 0.9617852359873308, |
| "learning_rate": 9.529960440217554e-06, |
| "loss": 0.1088, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6965423111919927, |
| "grad_norm": 0.9775947633570551, |
| "learning_rate": 9.529355241824045e-06, |
| "loss": 0.107, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.6969972702456779, |
| "grad_norm": 0.6007455012792351, |
| "learning_rate": 9.528749673312082e-06, |
| "loss": 0.0743, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.697452229299363, |
| "grad_norm": 0.5419764603212612, |
| "learning_rate": 9.528143734731143e-06, |
| "loss": 0.0822, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.6979071883530482, |
| "grad_norm": 0.8185575482665152, |
| "learning_rate": 9.52753742613075e-06, |
| "loss": 0.0832, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.6983621474067334, |
| "grad_norm": 0.9643638751029543, |
| "learning_rate": 9.526930747560446e-06, |
| "loss": 0.1026, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.6988171064604186, |
| "grad_norm": 0.8502651132594353, |
| "learning_rate": 9.526323699069803e-06, |
| "loss": 0.0902, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.6992720655141037, |
| "grad_norm": 0.5376181329235236, |
| "learning_rate": 9.525716280708428e-06, |
| "loss": 0.068, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.6997270245677889, |
| "grad_norm": 0.7166675033334694, |
| "learning_rate": 9.525108492525957e-06, |
| "loss": 0.0752, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.7001819836214741, |
| "grad_norm": 0.43432195935007917, |
| "learning_rate": 9.524500334572054e-06, |
| "loss": 0.0417, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.7006369426751592, |
| "grad_norm": 0.8369054167821826, |
| "learning_rate": 9.523891806896417e-06, |
| "loss": 0.1098, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7010919017288444, |
| "grad_norm": 0.49781336551041033, |
| "learning_rate": 9.523282909548773e-06, |
| "loss": 0.0618, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.7015468607825296, |
| "grad_norm": 0.9187882410427298, |
| "learning_rate": 9.522673642578873e-06, |
| "loss": 0.1247, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.7020018198362148, |
| "grad_norm": 0.5007920591193696, |
| "learning_rate": 9.522064006036509e-06, |
| "loss": 0.0601, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.7024567788898999, |
| "grad_norm": 0.582945252861272, |
| "learning_rate": 9.521453999971497e-06, |
| "loss": 0.0585, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.7029117379435851, |
| "grad_norm": 0.5749885951853907, |
| "learning_rate": 9.520843624433681e-06, |
| "loss": 0.0664, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.7033666969972703, |
| "grad_norm": 0.9724598324631707, |
| "learning_rate": 9.520232879472942e-06, |
| "loss": 0.1199, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.7038216560509554, |
| "grad_norm": 1.0592052108390146, |
| "learning_rate": 9.519621765139181e-06, |
| "loss": 0.1278, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.7042766151046406, |
| "grad_norm": 0.42374402440173636, |
| "learning_rate": 9.519010281482344e-06, |
| "loss": 0.0446, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.7047315741583258, |
| "grad_norm": 1.102301602930716, |
| "learning_rate": 9.518398428552393e-06, |
| "loss": 0.1226, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.705186533212011, |
| "grad_norm": 0.6842519583257138, |
| "learning_rate": 9.51778620639933e-06, |
| "loss": 0.0905, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.7056414922656961, |
| "grad_norm": 0.7530573117253311, |
| "learning_rate": 9.517173615073177e-06, |
| "loss": 0.0766, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.7060964513193813, |
| "grad_norm": 0.43285639961604566, |
| "learning_rate": 9.516560654623996e-06, |
| "loss": 0.0475, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.7065514103730665, |
| "grad_norm": 0.9094561094681402, |
| "learning_rate": 9.515947325101875e-06, |
| "loss": 0.0896, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.7070063694267515, |
| "grad_norm": 0.6097385256206468, |
| "learning_rate": 9.515333626556933e-06, |
| "loss": 0.0653, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.7074613284804367, |
| "grad_norm": 0.7304393114645329, |
| "learning_rate": 9.514719559039318e-06, |
| "loss": 0.0896, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.707916287534122, |
| "grad_norm": 0.8799769831067698, |
| "learning_rate": 9.514105122599208e-06, |
| "loss": 0.1176, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.7083712465878071, |
| "grad_norm": 1.0962688093811397, |
| "learning_rate": 9.513490317286815e-06, |
| "loss": 0.1174, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.7088262056414922, |
| "grad_norm": 0.8022559500547495, |
| "learning_rate": 9.512875143152373e-06, |
| "loss": 0.0969, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.7092811646951774, |
| "grad_norm": 0.37133918747574174, |
| "learning_rate": 9.512259600246156e-06, |
| "loss": 0.031, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.7097361237488626, |
| "grad_norm": 0.6214125216955318, |
| "learning_rate": 9.511643688618463e-06, |
| "loss": 0.0943, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7101910828025477, |
| "grad_norm": 0.7097270108607417, |
| "learning_rate": 9.51102740831962e-06, |
| "loss": 0.0847, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.7106460418562329, |
| "grad_norm": 0.8290870913254417, |
| "learning_rate": 9.510410759399991e-06, |
| "loss": 0.0867, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.7111010009099181, |
| "grad_norm": 0.7141101307254801, |
| "learning_rate": 9.50979374190996e-06, |
| "loss": 0.0838, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.7115559599636033, |
| "grad_norm": 0.8532705780985276, |
| "learning_rate": 9.509176355899954e-06, |
| "loss": 0.09, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.7120109190172884, |
| "grad_norm": 0.6858037908830302, |
| "learning_rate": 9.508558601420417e-06, |
| "loss": 0.0637, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.7124658780709736, |
| "grad_norm": 0.7489578082911201, |
| "learning_rate": 9.507940478521833e-06, |
| "loss": 0.1059, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.7129208371246588, |
| "grad_norm": 0.5241685648277268, |
| "learning_rate": 9.507321987254712e-06, |
| "loss": 0.0474, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.7133757961783439, |
| "grad_norm": 0.9862924439076355, |
| "learning_rate": 9.50670312766959e-06, |
| "loss": 0.1047, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.7138307552320291, |
| "grad_norm": 0.8286292773017996, |
| "learning_rate": 9.506083899817043e-06, |
| "loss": 0.0808, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.8166629192761119, |
| "learning_rate": 9.505464303747667e-06, |
| "loss": 0.079, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7147406733393995, |
| "grad_norm": 0.6651663578468047, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 0.0879, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.7151956323930846, |
| "grad_norm": 0.5230779536546156, |
| "learning_rate": 9.50422400716099e-06, |
| "loss": 0.0585, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.7156505914467698, |
| "grad_norm": 0.6543543054934573, |
| "learning_rate": 9.503603306745036e-06, |
| "loss": 0.0564, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.716105550500455, |
| "grad_norm": 0.7812592861176204, |
| "learning_rate": 9.502982238314962e-06, |
| "loss": 0.0874, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.7165605095541401, |
| "grad_norm": 0.5040232473993467, |
| "learning_rate": 9.502360801921512e-06, |
| "loss": 0.0532, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7170154686078253, |
| "grad_norm": 0.8631279038726943, |
| "learning_rate": 9.501738997615471e-06, |
| "loss": 0.1045, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.7174704276615105, |
| "grad_norm": 0.7716014465645913, |
| "learning_rate": 9.501116825447648e-06, |
| "loss": 0.068, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.7179253867151957, |
| "grad_norm": 0.5327432187838176, |
| "learning_rate": 9.500494285468884e-06, |
| "loss": 0.053, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.7183803457688808, |
| "grad_norm": 0.8209926537375553, |
| "learning_rate": 9.499871377730053e-06, |
| "loss": 0.1164, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.718835304822566, |
| "grad_norm": 0.5454374508074649, |
| "learning_rate": 9.499248102282052e-06, |
| "loss": 0.0579, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7192902638762512, |
| "grad_norm": 0.4944315103743207, |
| "learning_rate": 9.498624459175815e-06, |
| "loss": 0.0542, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.7197452229299363, |
| "grad_norm": 0.8372013648456964, |
| "learning_rate": 9.498000448462305e-06, |
| "loss": 0.0948, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.7202001819836215, |
| "grad_norm": 0.6792072434969908, |
| "learning_rate": 9.49737607019251e-06, |
| "loss": 0.0683, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.7206551410373067, |
| "grad_norm": 0.6679228302277659, |
| "learning_rate": 9.496751324417452e-06, |
| "loss": 0.0526, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.7211101000909919, |
| "grad_norm": 0.830168268257237, |
| "learning_rate": 9.496126211188184e-06, |
| "loss": 0.1049, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.721565059144677, |
| "grad_norm": 0.7614112606151382, |
| "learning_rate": 9.495500730555784e-06, |
| "loss": 0.0966, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.7220200181983621, |
| "grad_norm": 0.7574732623314945, |
| "learning_rate": 9.494874882571368e-06, |
| "loss": 0.0648, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.7224749772520473, |
| "grad_norm": 0.7541681951930181, |
| "learning_rate": 9.494248667286075e-06, |
| "loss": 0.0905, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.7229299363057324, |
| "grad_norm": 0.776748715422375, |
| "learning_rate": 9.493622084751076e-06, |
| "loss": 0.0841, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.7233848953594176, |
| "grad_norm": 0.6440945504942991, |
| "learning_rate": 9.492995135017574e-06, |
| "loss": 0.0779, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7238398544131028, |
| "grad_norm": 0.658893968607762, |
| "learning_rate": 9.4923678181368e-06, |
| "loss": 0.0862, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.724294813466788, |
| "grad_norm": 0.764304310956247, |
| "learning_rate": 9.491740134160014e-06, |
| "loss": 0.0834, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.7247497725204731, |
| "grad_norm": 1.246667162089055, |
| "learning_rate": 9.491112083138509e-06, |
| "loss": 0.141, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.7252047315741583, |
| "grad_norm": 0.7827390484343668, |
| "learning_rate": 9.490483665123606e-06, |
| "loss": 0.0687, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.7256596906278435, |
| "grad_norm": 0.6055248563993239, |
| "learning_rate": 9.489854880166658e-06, |
| "loss": 0.0716, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.7261146496815286, |
| "grad_norm": 0.7067865427149594, |
| "learning_rate": 9.489225728319044e-06, |
| "loss": 0.0756, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.7265696087352138, |
| "grad_norm": 0.85395818798431, |
| "learning_rate": 9.488596209632179e-06, |
| "loss": 0.1099, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.727024567788899, |
| "grad_norm": 0.6870669290352402, |
| "learning_rate": 9.4879663241575e-06, |
| "loss": 0.0703, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.7274795268425842, |
| "grad_norm": 1.2809048497988667, |
| "learning_rate": 9.48733607194648e-06, |
| "loss": 0.1663, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.7279344858962693, |
| "grad_norm": 0.7180890087653823, |
| "learning_rate": 9.486705453050622e-06, |
| "loss": 0.0738, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7283894449499545, |
| "grad_norm": 0.5662460892211576, |
| "learning_rate": 9.486074467521456e-06, |
| "loss": 0.0627, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.7288444040036397, |
| "grad_norm": 0.7172800606287587, |
| "learning_rate": 9.485443115410541e-06, |
| "loss": 0.0715, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.7292993630573248, |
| "grad_norm": 0.6146064647413995, |
| "learning_rate": 9.484811396769475e-06, |
| "loss": 0.0828, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.72975432211101, |
| "grad_norm": 0.8606888467276742, |
| "learning_rate": 9.484179311649873e-06, |
| "loss": 0.0962, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.7302092811646952, |
| "grad_norm": 0.46814164753859155, |
| "learning_rate": 9.483546860103388e-06, |
| "loss": 0.0477, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.7306642402183804, |
| "grad_norm": 0.7370090010007736, |
| "learning_rate": 9.4829140421817e-06, |
| "loss": 0.081, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.7311191992720655, |
| "grad_norm": 1.0689466216112777, |
| "learning_rate": 9.482280857936522e-06, |
| "loss": 0.109, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.7315741583257507, |
| "grad_norm": 0.4147348220425697, |
| "learning_rate": 9.481647307419594e-06, |
| "loss": 0.0479, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.7320291173794359, |
| "grad_norm": 0.4998747516198886, |
| "learning_rate": 9.481013390682687e-06, |
| "loss": 0.0634, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.732484076433121, |
| "grad_norm": 0.8673371359679307, |
| "learning_rate": 9.480379107777601e-06, |
| "loss": 0.1108, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7329390354868062, |
| "grad_norm": 0.6369274329058493, |
| "learning_rate": 9.47974445875617e-06, |
| "loss": 0.0698, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.7333939945404914, |
| "grad_norm": 0.6434647227835387, |
| "learning_rate": 9.47910944367025e-06, |
| "loss": 0.0618, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.7338489535941766, |
| "grad_norm": 0.8035955314379585, |
| "learning_rate": 9.478474062571735e-06, |
| "loss": 0.0997, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.7343039126478617, |
| "grad_norm": 0.7996949463502321, |
| "learning_rate": 9.477838315512544e-06, |
| "loss": 0.0873, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.7347588717015469, |
| "grad_norm": 0.6484970204244012, |
| "learning_rate": 9.477202202544626e-06, |
| "loss": 0.0925, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.7352138307552321, |
| "grad_norm": 0.6478821974846899, |
| "learning_rate": 9.476565723719966e-06, |
| "loss": 0.0693, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.7356687898089171, |
| "grad_norm": 0.6896940284490023, |
| "learning_rate": 9.475928879090568e-06, |
| "loss": 0.0763, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.7361237488626023, |
| "grad_norm": 0.6758264439259065, |
| "learning_rate": 9.475291668708476e-06, |
| "loss": 0.0717, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.7365787079162875, |
| "grad_norm": 0.6285383601705616, |
| "learning_rate": 9.474654092625758e-06, |
| "loss": 0.0561, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.7370336669699727, |
| "grad_norm": 0.7488998942485512, |
| "learning_rate": 9.474016150894518e-06, |
| "loss": 0.0765, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7374886260236578, |
| "grad_norm": 0.7511340475878087, |
| "learning_rate": 9.47337784356688e-06, |
| "loss": 0.0865, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.737943585077343, |
| "grad_norm": 0.6908706816034008, |
| "learning_rate": 9.472739170695006e-06, |
| "loss": 0.0879, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.7383985441310282, |
| "grad_norm": 0.9159671053782389, |
| "learning_rate": 9.472100132331089e-06, |
| "loss": 0.0862, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.7388535031847133, |
| "grad_norm": 0.8367180794291794, |
| "learning_rate": 9.471460728527342e-06, |
| "loss": 0.0988, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.7393084622383985, |
| "grad_norm": 0.6396536181540736, |
| "learning_rate": 9.470820959336018e-06, |
| "loss": 0.0742, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7397634212920837, |
| "grad_norm": 0.7212059639642758, |
| "learning_rate": 9.470180824809394e-06, |
| "loss": 0.0887, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.7402183803457689, |
| "grad_norm": 0.6570480817818456, |
| "learning_rate": 9.469540324999782e-06, |
| "loss": 0.0654, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.740673339399454, |
| "grad_norm": 0.6780217435395393, |
| "learning_rate": 9.468899459959518e-06, |
| "loss": 0.0613, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.7411282984531392, |
| "grad_norm": 0.8367065537687267, |
| "learning_rate": 9.468258229740972e-06, |
| "loss": 0.087, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.7415832575068244, |
| "grad_norm": 0.6724757485261361, |
| "learning_rate": 9.467616634396542e-06, |
| "loss": 0.0513, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7420382165605095, |
| "grad_norm": 0.5923362651506067, |
| "learning_rate": 9.466974673978654e-06, |
| "loss": 0.0668, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.7424931756141947, |
| "grad_norm": 0.8046255156703264, |
| "learning_rate": 9.466332348539772e-06, |
| "loss": 0.0888, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.7429481346678799, |
| "grad_norm": 0.7456071657218726, |
| "learning_rate": 9.465689658132379e-06, |
| "loss": 0.0872, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.7434030937215651, |
| "grad_norm": 0.8751254537474247, |
| "learning_rate": 9.465046602808994e-06, |
| "loss": 0.0901, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.7438580527752502, |
| "grad_norm": 0.9953711560207276, |
| "learning_rate": 9.464403182622164e-06, |
| "loss": 0.1175, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7443130118289354, |
| "grad_norm": 0.738323897945569, |
| "learning_rate": 9.463759397624466e-06, |
| "loss": 0.1016, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.7447679708826206, |
| "grad_norm": 0.620705920516562, |
| "learning_rate": 9.46311524786851e-06, |
| "loss": 0.0654, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.7452229299363057, |
| "grad_norm": 1.2433273775382216, |
| "learning_rate": 9.462470733406929e-06, |
| "loss": 0.1403, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.7456778889899909, |
| "grad_norm": 1.0268174749706445, |
| "learning_rate": 9.461825854292394e-06, |
| "loss": 0.1065, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.7461328480436761, |
| "grad_norm": 0.6942991337802967, |
| "learning_rate": 9.4611806105776e-06, |
| "loss": 0.0736, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7465878070973613, |
| "grad_norm": 0.8367822612372433, |
| "learning_rate": 9.460535002315272e-06, |
| "loss": 0.089, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.7470427661510464, |
| "grad_norm": 0.5929887457730553, |
| "learning_rate": 9.459889029558167e-06, |
| "loss": 0.0665, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.7474977252047316, |
| "grad_norm": 0.5692342733265978, |
| "learning_rate": 9.459242692359072e-06, |
| "loss": 0.0708, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.7479526842584168, |
| "grad_norm": 0.6049162715481944, |
| "learning_rate": 9.4585959907708e-06, |
| "loss": 0.0716, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.7484076433121019, |
| "grad_norm": 0.5865800556894495, |
| "learning_rate": 9.457948924846201e-06, |
| "loss": 0.0562, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7488626023657871, |
| "grad_norm": 1.018263961729041, |
| "learning_rate": 9.457301494638147e-06, |
| "loss": 0.1129, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.7493175614194723, |
| "grad_norm": 0.8420303347709615, |
| "learning_rate": 9.456653700199542e-06, |
| "loss": 0.0982, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.7497725204731575, |
| "grad_norm": 0.6178217269864875, |
| "learning_rate": 9.456005541583326e-06, |
| "loss": 0.0777, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.7502274795268425, |
| "grad_norm": 0.6159701780113571, |
| "learning_rate": 9.455357018842458e-06, |
| "loss": 0.075, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.7506824385805277, |
| "grad_norm": 0.5563337669331565, |
| "learning_rate": 9.454708132029936e-06, |
| "loss": 0.0594, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7511373976342129, |
| "grad_norm": 0.7796132603413727, |
| "learning_rate": 9.454058881198782e-06, |
| "loss": 0.0842, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.7515923566878981, |
| "grad_norm": 0.5977999349867541, |
| "learning_rate": 9.45340926640205e-06, |
| "loss": 0.0623, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.7520473157415832, |
| "grad_norm": 0.7762091660359064, |
| "learning_rate": 9.452759287692824e-06, |
| "loss": 0.0923, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.7525022747952684, |
| "grad_norm": 1.029286283612893, |
| "learning_rate": 9.452108945124218e-06, |
| "loss": 0.1114, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.7529572338489536, |
| "grad_norm": 0.5046695202197234, |
| "learning_rate": 9.451458238749375e-06, |
| "loss": 0.058, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.7534121929026387, |
| "grad_norm": 0.6262659207860063, |
| "learning_rate": 9.450807168621468e-06, |
| "loss": 0.0607, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.7538671519563239, |
| "grad_norm": 0.7451490801568118, |
| "learning_rate": 9.450155734793697e-06, |
| "loss": 0.0716, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.7543221110100091, |
| "grad_norm": 0.6504007368655154, |
| "learning_rate": 9.449503937319297e-06, |
| "loss": 0.0913, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.7547770700636943, |
| "grad_norm": 0.8923820492879996, |
| "learning_rate": 9.448851776251528e-06, |
| "loss": 0.0984, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.7552320291173794, |
| "grad_norm": 0.7256175088606572, |
| "learning_rate": 9.448199251643684e-06, |
| "loss": 0.0834, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7556869881710646, |
| "grad_norm": 0.7778885787730276, |
| "learning_rate": 9.447546363549085e-06, |
| "loss": 0.0878, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.7561419472247498, |
| "grad_norm": 0.8265030986085233, |
| "learning_rate": 9.446893112021083e-06, |
| "loss": 0.0827, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.7565969062784349, |
| "grad_norm": 0.5801162274559535, |
| "learning_rate": 9.446239497113055e-06, |
| "loss": 0.0797, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.7570518653321201, |
| "grad_norm": 0.8974914764997551, |
| "learning_rate": 9.445585518878418e-06, |
| "loss": 0.1088, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.7575068243858053, |
| "grad_norm": 0.8878060872125964, |
| "learning_rate": 9.444931177370605e-06, |
| "loss": 0.1235, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.7579617834394905, |
| "grad_norm": 0.5088737676913533, |
| "learning_rate": 9.44427647264309e-06, |
| "loss": 0.0478, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.7584167424931756, |
| "grad_norm": 0.7484910765250183, |
| "learning_rate": 9.443621404749374e-06, |
| "loss": 0.0686, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.7588717015468608, |
| "grad_norm": 0.6292123912530658, |
| "learning_rate": 9.442965973742983e-06, |
| "loss": 0.0652, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.759326660600546, |
| "grad_norm": 1.037223955207567, |
| "learning_rate": 9.442310179677476e-06, |
| "loss": 0.0827, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.7597816196542311, |
| "grad_norm": 0.6769034013570638, |
| "learning_rate": 9.441654022606444e-06, |
| "loss": 0.0771, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7602365787079163, |
| "grad_norm": 0.8310244395490821, |
| "learning_rate": 9.440997502583503e-06, |
| "loss": 0.091, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.7606915377616015, |
| "grad_norm": 1.0039785109365194, |
| "learning_rate": 9.4403406196623e-06, |
| "loss": 0.1251, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.7611464968152867, |
| "grad_norm": 0.7908056524331212, |
| "learning_rate": 9.439683373896515e-06, |
| "loss": 0.0876, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.7616014558689718, |
| "grad_norm": 1.0809832712577787, |
| "learning_rate": 9.439025765339852e-06, |
| "loss": 0.1256, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.762056414922657, |
| "grad_norm": 0.5964161616065347, |
| "learning_rate": 9.438367794046053e-06, |
| "loss": 0.0585, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.7625113739763422, |
| "grad_norm": 0.8617975528364193, |
| "learning_rate": 9.437709460068882e-06, |
| "loss": 0.0783, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.7629663330300273, |
| "grad_norm": 0.6361215357389327, |
| "learning_rate": 9.437050763462132e-06, |
| "loss": 0.0692, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.7634212920837125, |
| "grad_norm": 0.9790069893643866, |
| "learning_rate": 9.436391704279632e-06, |
| "loss": 0.1173, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.7638762511373977, |
| "grad_norm": 1.1287905857392149, |
| "learning_rate": 9.435732282575235e-06, |
| "loss": 0.1505, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.7643312101910829, |
| "grad_norm": 0.8195744592905398, |
| "learning_rate": 9.435072498402832e-06, |
| "loss": 0.0877, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7647861692447679, |
| "grad_norm": 0.5293612997987346, |
| "learning_rate": 9.434412351816329e-06, |
| "loss": 0.0609, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.7652411282984531, |
| "grad_norm": 0.7565664140640663, |
| "learning_rate": 9.433751842869676e-06, |
| "loss": 0.0895, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.7656960873521383, |
| "grad_norm": 0.8390610329820178, |
| "learning_rate": 9.433090971616842e-06, |
| "loss": 0.0823, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.7661510464058234, |
| "grad_norm": 0.7979326314286513, |
| "learning_rate": 9.432429738111836e-06, |
| "loss": 0.0893, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.7666060054595086, |
| "grad_norm": 0.7985876042778349, |
| "learning_rate": 9.431768142408687e-06, |
| "loss": 0.0965, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.7670609645131938, |
| "grad_norm": 0.7008114448081032, |
| "learning_rate": 9.431106184561462e-06, |
| "loss": 0.0894, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.767515923566879, |
| "grad_norm": 0.8506122352220377, |
| "learning_rate": 9.430443864624249e-06, |
| "loss": 0.0949, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.7679708826205641, |
| "grad_norm": 1.0900644244466022, |
| "learning_rate": 9.429781182651171e-06, |
| "loss": 0.1211, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.7684258416742493, |
| "grad_norm": 0.585079487316927, |
| "learning_rate": 9.429118138696378e-06, |
| "loss": 0.0642, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.7688808007279345, |
| "grad_norm": 0.8727981223997378, |
| "learning_rate": 9.428454732814055e-06, |
| "loss": 0.0987, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7693357597816196, |
| "grad_norm": 0.7032463083497149, |
| "learning_rate": 9.427790965058407e-06, |
| "loss": 0.0685, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.7697907188353048, |
| "grad_norm": 0.6784390616651746, |
| "learning_rate": 9.42712683548368e-06, |
| "loss": 0.079, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.77024567788899, |
| "grad_norm": 0.774501448184362, |
| "learning_rate": 9.426462344144138e-06, |
| "loss": 0.0784, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.7707006369426752, |
| "grad_norm": 0.7793988116138444, |
| "learning_rate": 9.425797491094086e-06, |
| "loss": 0.0801, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.7711555959963603, |
| "grad_norm": 0.7642360389143683, |
| "learning_rate": 9.425132276387847e-06, |
| "loss": 0.1009, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.7716105550500455, |
| "grad_norm": 0.6080046843370063, |
| "learning_rate": 9.424466700079785e-06, |
| "loss": 0.0688, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.7720655141037307, |
| "grad_norm": 0.6270167280264678, |
| "learning_rate": 9.423800762224283e-06, |
| "loss": 0.0626, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.7725204731574158, |
| "grad_norm": 0.5357586110049548, |
| "learning_rate": 9.42313446287576e-06, |
| "loss": 0.0626, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.772975432211101, |
| "grad_norm": 0.6233095813256608, |
| "learning_rate": 9.422467802088664e-06, |
| "loss": 0.0804, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.7734303912647862, |
| "grad_norm": 0.7158265191654914, |
| "learning_rate": 9.42180077991747e-06, |
| "loss": 0.0887, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7738853503184714, |
| "grad_norm": 1.0305735114746193, |
| "learning_rate": 9.421133396416687e-06, |
| "loss": 0.1441, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.7743403093721565, |
| "grad_norm": 0.6965845039033058, |
| "learning_rate": 9.420465651640847e-06, |
| "loss": 0.079, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.7747952684258417, |
| "grad_norm": 0.4529773063241175, |
| "learning_rate": 9.419797545644516e-06, |
| "loss": 0.0443, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.7752502274795269, |
| "grad_norm": 0.5407082720421394, |
| "learning_rate": 9.41912907848229e-06, |
| "loss": 0.0625, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.775705186533212, |
| "grad_norm": 0.5625290405803486, |
| "learning_rate": 9.418460250208791e-06, |
| "loss": 0.0695, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.7761601455868972, |
| "grad_norm": 0.5288549658523206, |
| "learning_rate": 9.417791060878677e-06, |
| "loss": 0.0546, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.7766151046405824, |
| "grad_norm": 0.6390336517076213, |
| "learning_rate": 9.417121510546626e-06, |
| "loss": 0.0474, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.7770700636942676, |
| "grad_norm": 1.1628554226147039, |
| "learning_rate": 9.416451599267353e-06, |
| "loss": 0.1427, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.7775250227479527, |
| "grad_norm": 0.5775794942631142, |
| "learning_rate": 9.415781327095601e-06, |
| "loss": 0.0722, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.7779799818016379, |
| "grad_norm": 0.6702327788675698, |
| "learning_rate": 9.415110694086139e-06, |
| "loss": 0.0863, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.778434940855323, |
| "grad_norm": 1.0756620214218862, |
| "learning_rate": 9.41443970029377e-06, |
| "loss": 0.0916, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.7788898999090081, |
| "grad_norm": 0.6873597883249742, |
| "learning_rate": 9.413768345773324e-06, |
| "loss": 0.0928, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.7793448589626933, |
| "grad_norm": 0.546687059556293, |
| "learning_rate": 9.413096630579661e-06, |
| "loss": 0.0681, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.7797998180163785, |
| "grad_norm": 0.5882776722743176, |
| "learning_rate": 9.412424554767672e-06, |
| "loss": 0.0666, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.7802547770700637, |
| "grad_norm": 0.7757931395434748, |
| "learning_rate": 9.411752118392272e-06, |
| "loss": 0.0961, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.7807097361237488, |
| "grad_norm": 0.7533384044089068, |
| "learning_rate": 9.411079321508416e-06, |
| "loss": 0.0915, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.781164695177434, |
| "grad_norm": 0.6690633163427073, |
| "learning_rate": 9.410406164171076e-06, |
| "loss": 0.0757, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.7816196542311192, |
| "grad_norm": 0.9875033482174213, |
| "learning_rate": 9.40973264643526e-06, |
| "loss": 0.1016, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.7820746132848043, |
| "grad_norm": 0.7285855686862363, |
| "learning_rate": 9.409058768356007e-06, |
| "loss": 0.0777, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.7825295723384895, |
| "grad_norm": 0.5412833929378409, |
| "learning_rate": 9.408384529988385e-06, |
| "loss": 0.0596, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.7829845313921747, |
| "grad_norm": 0.48748390975323075, |
| "learning_rate": 9.407709931387486e-06, |
| "loss": 0.0451, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.7834394904458599, |
| "grad_norm": 0.8626755233369133, |
| "learning_rate": 9.407034972608436e-06, |
| "loss": 0.1093, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.783894449499545, |
| "grad_norm": 0.5986423081381415, |
| "learning_rate": 9.40635965370639e-06, |
| "loss": 0.0737, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.7843494085532302, |
| "grad_norm": 0.8697508747552452, |
| "learning_rate": 9.40568397473653e-06, |
| "loss": 0.0748, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.7848043676069154, |
| "grad_norm": 0.6651587535516658, |
| "learning_rate": 9.405007935754076e-06, |
| "loss": 0.0553, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.7852593266606005, |
| "grad_norm": 1.1307670638395897, |
| "learning_rate": 9.404331536814265e-06, |
| "loss": 0.1451, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 0.6724877006657928, |
| "learning_rate": 9.40365477797237e-06, |
| "loss": 0.0803, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.7861692447679709, |
| "grad_norm": 0.739524107451132, |
| "learning_rate": 9.40297765928369e-06, |
| "loss": 0.0713, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.7866242038216561, |
| "grad_norm": 0.6341880042511068, |
| "learning_rate": 9.402300180803563e-06, |
| "loss": 0.0739, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.7870791628753412, |
| "grad_norm": 0.5809522499341311, |
| "learning_rate": 9.401622342587346e-06, |
| "loss": 0.067, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.7875341219290264, |
| "grad_norm": 0.6208756444695567, |
| "learning_rate": 9.400944144690428e-06, |
| "loss": 0.0865, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.7879890809827116, |
| "grad_norm": 0.7358085271263743, |
| "learning_rate": 9.400265587168226e-06, |
| "loss": 0.0827, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.7884440400363967, |
| "grad_norm": 0.6985098389174249, |
| "learning_rate": 9.399586670076196e-06, |
| "loss": 0.0784, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.7888989990900819, |
| "grad_norm": 0.6524277365731544, |
| "learning_rate": 9.39890739346981e-06, |
| "loss": 0.0759, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.7893539581437671, |
| "grad_norm": 0.8500489687124628, |
| "learning_rate": 9.398227757404576e-06, |
| "loss": 0.1139, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.7898089171974523, |
| "grad_norm": 0.49161558761743546, |
| "learning_rate": 9.397547761936034e-06, |
| "loss": 0.0445, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.7902638762511374, |
| "grad_norm": 0.3886581827401007, |
| "learning_rate": 9.396867407119748e-06, |
| "loss": 0.0387, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.7907188353048226, |
| "grad_norm": 0.43315626329206963, |
| "learning_rate": 9.396186693011312e-06, |
| "loss": 0.0484, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.7911737943585078, |
| "grad_norm": 0.7578063731873546, |
| "learning_rate": 9.395505619666353e-06, |
| "loss": 0.0872, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.7916287534121929, |
| "grad_norm": 0.9087897001540515, |
| "learning_rate": 9.394824187140526e-06, |
| "loss": 0.0914, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.792083712465878, |
| "grad_norm": 0.5994634977370948, |
| "learning_rate": 9.394142395489512e-06, |
| "loss": 0.061, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.7925386715195633, |
| "grad_norm": 0.6263578026813904, |
| "learning_rate": 9.393460244769023e-06, |
| "loss": 0.0608, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.7929936305732485, |
| "grad_norm": 0.5753033056961346, |
| "learning_rate": 9.392777735034807e-06, |
| "loss": 0.0721, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.7934485896269335, |
| "grad_norm": 0.6561198773299641, |
| "learning_rate": 9.392094866342632e-06, |
| "loss": 0.0599, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.7939035486806187, |
| "grad_norm": 0.7317990056550264, |
| "learning_rate": 9.391411638748297e-06, |
| "loss": 0.0742, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.7943585077343039, |
| "grad_norm": 0.5011723772780661, |
| "learning_rate": 9.390728052307637e-06, |
| "loss": 0.0647, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.794813466787989, |
| "grad_norm": 0.6867846904523061, |
| "learning_rate": 9.390044107076506e-06, |
| "loss": 0.0779, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.7952684258416742, |
| "grad_norm": 0.9267872196876082, |
| "learning_rate": 9.389359803110796e-06, |
| "loss": 0.1001, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.7957233848953594, |
| "grad_norm": 3.487580179742763, |
| "learning_rate": 9.388675140466427e-06, |
| "loss": 0.1841, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.7961783439490446, |
| "grad_norm": 0.6520959532750612, |
| "learning_rate": 9.387990119199343e-06, |
| "loss": 0.0714, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7966333030027297, |
| "grad_norm": 0.8129917876989495, |
| "learning_rate": 9.387304739365524e-06, |
| "loss": 0.0949, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.7970882620564149, |
| "grad_norm": 0.6276053555905522, |
| "learning_rate": 9.386619001020974e-06, |
| "loss": 0.0552, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.7975432211101001, |
| "grad_norm": 0.7632340875896291, |
| "learning_rate": 9.385932904221729e-06, |
| "loss": 0.0655, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.7979981801637852, |
| "grad_norm": 0.7239218776412117, |
| "learning_rate": 9.385246449023853e-06, |
| "loss": 0.1113, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.7984531392174704, |
| "grad_norm": 1.0468381569335767, |
| "learning_rate": 9.38455963548344e-06, |
| "loss": 0.1042, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.7989080982711556, |
| "grad_norm": 0.8019558864262506, |
| "learning_rate": 9.383872463656616e-06, |
| "loss": 0.0868, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.7993630573248408, |
| "grad_norm": 0.7449121488820226, |
| "learning_rate": 9.383184933599531e-06, |
| "loss": 0.0945, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.7998180163785259, |
| "grad_norm": 0.5905383438931077, |
| "learning_rate": 9.382497045368368e-06, |
| "loss": 0.0672, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.8002729754322111, |
| "grad_norm": 0.5337189472762474, |
| "learning_rate": 9.381808799019336e-06, |
| "loss": 0.0509, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.8007279344858963, |
| "grad_norm": 1.0483707789224317, |
| "learning_rate": 9.38112019460868e-06, |
| "loss": 0.1069, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.8011828935395814, |
| "grad_norm": 0.8974041640796228, |
| "learning_rate": 9.380431232192663e-06, |
| "loss": 0.1061, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.8016378525932666, |
| "grad_norm": 0.774987790741639, |
| "learning_rate": 9.379741911827591e-06, |
| "loss": 0.0971, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.8020928116469518, |
| "grad_norm": 0.5037991292329869, |
| "learning_rate": 9.379052233569788e-06, |
| "loss": 0.0545, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.802547770700637, |
| "grad_norm": 0.7571282390818425, |
| "learning_rate": 9.37836219747561e-06, |
| "loss": 0.0774, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.8030027297543221, |
| "grad_norm": 0.47374252215612206, |
| "learning_rate": 9.377671803601447e-06, |
| "loss": 0.0479, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.8034576888080073, |
| "grad_norm": 0.563871853603133, |
| "learning_rate": 9.376981052003713e-06, |
| "loss": 0.0583, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.8039126478616925, |
| "grad_norm": 0.7260639419055305, |
| "learning_rate": 9.376289942738855e-06, |
| "loss": 0.0739, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.8043676069153776, |
| "grad_norm": 0.7704639306429572, |
| "learning_rate": 9.375598475863345e-06, |
| "loss": 0.08, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.8048225659690628, |
| "grad_norm": 0.8052864772012752, |
| "learning_rate": 9.374906651433689e-06, |
| "loss": 0.1155, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.805277525022748, |
| "grad_norm": 0.945940660466259, |
| "learning_rate": 9.374214469506416e-06, |
| "loss": 0.0942, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.8057324840764332, |
| "grad_norm": 0.8382092898318407, |
| "learning_rate": 9.373521930138092e-06, |
| "loss": 0.0831, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.8061874431301183, |
| "grad_norm": 0.5910933141386769, |
| "learning_rate": 9.372829033385306e-06, |
| "loss": 0.0825, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.8066424021838035, |
| "grad_norm": 0.7616883112365667, |
| "learning_rate": 9.37213577930468e-06, |
| "loss": 0.0907, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.8070973612374887, |
| "grad_norm": 0.9571485234330176, |
| "learning_rate": 9.37144216795286e-06, |
| "loss": 0.1322, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.8075523202911737, |
| "grad_norm": 0.770430324420924, |
| "learning_rate": 9.370748199386529e-06, |
| "loss": 0.0821, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8080072793448589, |
| "grad_norm": 0.6303205378749905, |
| "learning_rate": 9.370053873662393e-06, |
| "loss": 0.0694, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.8084622383985441, |
| "grad_norm": 0.6777135846807264, |
| "learning_rate": 9.36935919083719e-06, |
| "loss": 0.0685, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.8089171974522293, |
| "grad_norm": 0.7319936383805717, |
| "learning_rate": 9.368664150967686e-06, |
| "loss": 0.0679, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.8093721565059144, |
| "grad_norm": 0.7990830113911501, |
| "learning_rate": 9.367968754110675e-06, |
| "loss": 0.1023, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.8098271155595996, |
| "grad_norm": 0.5223284241529513, |
| "learning_rate": 9.367273000322983e-06, |
| "loss": 0.063, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8102820746132848, |
| "grad_norm": 1.040419010652034, |
| "learning_rate": 9.366576889661465e-06, |
| "loss": 0.1236, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.8107370336669699, |
| "grad_norm": 0.6404250074887077, |
| "learning_rate": 9.365880422183003e-06, |
| "loss": 0.0656, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.8111919927206551, |
| "grad_norm": 0.7564675990794105, |
| "learning_rate": 9.365183597944506e-06, |
| "loss": 0.0725, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.8116469517743403, |
| "grad_norm": 0.5955963027805166, |
| "learning_rate": 9.364486417002922e-06, |
| "loss": 0.07, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.8121019108280255, |
| "grad_norm": 0.6658882483856376, |
| "learning_rate": 9.363788879415217e-06, |
| "loss": 0.0616, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8125568698817106, |
| "grad_norm": 0.6032274064354748, |
| "learning_rate": 9.36309098523839e-06, |
| "loss": 0.0688, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.8130118289353958, |
| "grad_norm": 0.7627355718580127, |
| "learning_rate": 9.362392734529472e-06, |
| "loss": 0.0841, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.813466787989081, |
| "grad_norm": 0.6581922552034235, |
| "learning_rate": 9.361694127345523e-06, |
| "loss": 0.0773, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.8139217470427661, |
| "grad_norm": 0.5723109702485146, |
| "learning_rate": 9.360995163743622e-06, |
| "loss": 0.0755, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.8143767060964513, |
| "grad_norm": 0.8492692664232014, |
| "learning_rate": 9.360295843780893e-06, |
| "loss": 0.084, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8148316651501365, |
| "grad_norm": 0.7138327780528116, |
| "learning_rate": 9.35959616751448e-06, |
| "loss": 0.0754, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.8152866242038217, |
| "grad_norm": 0.7513269368015193, |
| "learning_rate": 9.358896135001555e-06, |
| "loss": 0.075, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8157415832575068, |
| "grad_norm": 6.226904157676098, |
| "learning_rate": 9.35819574629932e-06, |
| "loss": 0.2447, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.816196542311192, |
| "grad_norm": 0.9632842432595244, |
| "learning_rate": 9.35749500146501e-06, |
| "loss": 0.0968, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.8166515013648772, |
| "grad_norm": 0.6910899092527569, |
| "learning_rate": 9.356793900555891e-06, |
| "loss": 0.0736, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8171064604185623, |
| "grad_norm": 0.8430341812657529, |
| "learning_rate": 9.356092443629247e-06, |
| "loss": 0.0929, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.8175614194722475, |
| "grad_norm": 0.7425545237339678, |
| "learning_rate": 9.355390630742401e-06, |
| "loss": 0.1005, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.8180163785259327, |
| "grad_norm": 0.7004618898733044, |
| "learning_rate": 9.3546884619527e-06, |
| "loss": 0.0789, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.8184713375796179, |
| "grad_norm": 0.5461552026045962, |
| "learning_rate": 9.353985937317525e-06, |
| "loss": 0.0763, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.818926296633303, |
| "grad_norm": 0.6222175380121098, |
| "learning_rate": 9.35328305689428e-06, |
| "loss": 0.0754, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8193812556869882, |
| "grad_norm": 0.7386705168753549, |
| "learning_rate": 9.352579820740404e-06, |
| "loss": 0.0641, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.8198362147406734, |
| "grad_norm": 1.2544587029581489, |
| "learning_rate": 9.351876228913363e-06, |
| "loss": 0.107, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.8202911737943585, |
| "grad_norm": 0.6546855629883478, |
| "learning_rate": 9.351172281470645e-06, |
| "loss": 0.0781, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.8207461328480437, |
| "grad_norm": 0.7485647273392206, |
| "learning_rate": 9.350467978469782e-06, |
| "loss": 0.0898, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.8212010919017289, |
| "grad_norm": 0.5530668925780788, |
| "learning_rate": 9.34976331996832e-06, |
| "loss": 0.057, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.821656050955414, |
| "grad_norm": 0.870085999603916, |
| "learning_rate": 9.349058306023844e-06, |
| "loss": 0.1077, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.8221110100090991, |
| "grad_norm": 0.891036381079533, |
| "learning_rate": 9.348352936693964e-06, |
| "loss": 0.1082, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.8225659690627843, |
| "grad_norm": 0.5641275258385202, |
| "learning_rate": 9.347647212036316e-06, |
| "loss": 0.0613, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.8230209281164695, |
| "grad_norm": 0.7163257638587112, |
| "learning_rate": 9.346941132108575e-06, |
| "loss": 0.0842, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.8234758871701547, |
| "grad_norm": 0.7333770270884309, |
| "learning_rate": 9.346234696968435e-06, |
| "loss": 0.0782, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8239308462238398, |
| "grad_norm": 0.5399164747367127, |
| "learning_rate": 9.345527906673622e-06, |
| "loss": 0.0676, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.824385805277525, |
| "grad_norm": 1.0476291790994476, |
| "learning_rate": 9.344820761281892e-06, |
| "loss": 0.0984, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.8248407643312102, |
| "grad_norm": 0.639304845804496, |
| "learning_rate": 9.344113260851031e-06, |
| "loss": 0.0764, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.8252957233848953, |
| "grad_norm": 0.6071291165528282, |
| "learning_rate": 9.343405405438852e-06, |
| "loss": 0.0707, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.8257506824385805, |
| "grad_norm": 0.6973111552871604, |
| "learning_rate": 9.342697195103199e-06, |
| "loss": 0.0917, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.8262056414922657, |
| "grad_norm": 0.6486872321285189, |
| "learning_rate": 9.341988629901942e-06, |
| "loss": 0.0725, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.8266606005459509, |
| "grad_norm": 0.5216883119977757, |
| "learning_rate": 9.341279709892981e-06, |
| "loss": 0.0572, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.827115559599636, |
| "grad_norm": 0.4472530755665983, |
| "learning_rate": 9.340570435134248e-06, |
| "loss": 0.0412, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.8275705186533212, |
| "grad_norm": 0.786165560489741, |
| "learning_rate": 9.339860805683703e-06, |
| "loss": 0.0905, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.8280254777070064, |
| "grad_norm": 0.8504390923669081, |
| "learning_rate": 9.33915082159933e-06, |
| "loss": 0.0761, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8284804367606915, |
| "grad_norm": 0.5303034158640553, |
| "learning_rate": 9.338440482939146e-06, |
| "loss": 0.0735, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.8289353958143767, |
| "grad_norm": 0.6407993820931909, |
| "learning_rate": 9.337729789761199e-06, |
| "loss": 0.0829, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.8293903548680619, |
| "grad_norm": 2.670877671269915, |
| "learning_rate": 9.337018742123563e-06, |
| "loss": 0.1871, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.8298453139217471, |
| "grad_norm": 1.0355313595445745, |
| "learning_rate": 9.336307340084341e-06, |
| "loss": 0.0955, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.8303002729754322, |
| "grad_norm": 0.6127983226216669, |
| "learning_rate": 9.335595583701667e-06, |
| "loss": 0.0639, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.8307552320291174, |
| "grad_norm": 0.6196615465194765, |
| "learning_rate": 9.334883473033699e-06, |
| "loss": 0.0706, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.8312101910828026, |
| "grad_norm": 0.7243682512181147, |
| "learning_rate": 9.33417100813863e-06, |
| "loss": 0.0869, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.8316651501364877, |
| "grad_norm": 0.94108166831404, |
| "learning_rate": 9.33345818907468e-06, |
| "loss": 0.1349, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.8321201091901729, |
| "grad_norm": 4.6896190497823955, |
| "learning_rate": 9.332745015900097e-06, |
| "loss": 0.1125, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.8325750682438581, |
| "grad_norm": 0.7268733027831774, |
| "learning_rate": 9.332031488673156e-06, |
| "loss": 0.0651, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8330300272975433, |
| "grad_norm": 0.5169699897246913, |
| "learning_rate": 9.331317607452166e-06, |
| "loss": 0.0683, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.8334849863512284, |
| "grad_norm": 0.5056561715785393, |
| "learning_rate": 9.330603372295463e-06, |
| "loss": 0.0568, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.8339399454049136, |
| "grad_norm": 0.5749009883761049, |
| "learning_rate": 9.329888783261408e-06, |
| "loss": 0.0594, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.8343949044585988, |
| "grad_norm": 0.6696966952437984, |
| "learning_rate": 9.329173840408394e-06, |
| "loss": 0.0764, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.8348498635122839, |
| "grad_norm": 0.7329039198928983, |
| "learning_rate": 9.328458543794844e-06, |
| "loss": 0.0729, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.835304822565969, |
| "grad_norm": 0.5892831520257552, |
| "learning_rate": 9.327742893479212e-06, |
| "loss": 0.0838, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.8357597816196543, |
| "grad_norm": 0.848350653615326, |
| "learning_rate": 9.327026889519973e-06, |
| "loss": 0.0778, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.8362147406733395, |
| "grad_norm": 0.939837339633871, |
| "learning_rate": 9.326310531975636e-06, |
| "loss": 0.1005, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.8366696997270245, |
| "grad_norm": 0.6312875650471034, |
| "learning_rate": 9.32559382090474e-06, |
| "loss": 0.0626, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.8371246587807097, |
| "grad_norm": 0.9586580739045799, |
| "learning_rate": 9.324876756365853e-06, |
| "loss": 0.1154, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8375796178343949, |
| "grad_norm": 0.6108920091747637, |
| "learning_rate": 9.324159338417566e-06, |
| "loss": 0.0674, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.83803457688808, |
| "grad_norm": 0.9247779620401613, |
| "learning_rate": 9.323441567118508e-06, |
| "loss": 0.11, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.8384895359417652, |
| "grad_norm": 0.6152452902665, |
| "learning_rate": 9.322723442527328e-06, |
| "loss": 0.0657, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.8389444949954504, |
| "grad_norm": 0.6579130646316164, |
| "learning_rate": 9.32200496470271e-06, |
| "loss": 0.0721, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.8393994540491356, |
| "grad_norm": 0.6812573423845587, |
| "learning_rate": 9.321286133703365e-06, |
| "loss": 0.0627, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8398544131028207, |
| "grad_norm": 0.5946100319565307, |
| "learning_rate": 9.320566949588031e-06, |
| "loss": 0.0708, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.8403093721565059, |
| "grad_norm": 0.6319246275087805, |
| "learning_rate": 9.319847412415477e-06, |
| "loss": 0.0651, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.8407643312101911, |
| "grad_norm": 0.6789460664352271, |
| "learning_rate": 9.3191275222445e-06, |
| "loss": 0.0707, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.8412192902638762, |
| "grad_norm": 0.4396253526793688, |
| "learning_rate": 9.31840727913393e-06, |
| "loss": 0.0431, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.8416742493175614, |
| "grad_norm": 0.6745617928769184, |
| "learning_rate": 9.317686683142616e-06, |
| "loss": 0.0747, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8421292083712466, |
| "grad_norm": 0.6924165554321049, |
| "learning_rate": 9.316965734329447e-06, |
| "loss": 0.0575, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.8425841674249318, |
| "grad_norm": 0.7219679526943963, |
| "learning_rate": 9.316244432753332e-06, |
| "loss": 0.0935, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.8430391264786169, |
| "grad_norm": 1.0205930330831676, |
| "learning_rate": 9.315522778473214e-06, |
| "loss": 0.1213, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.8434940855323021, |
| "grad_norm": 1.009181015179975, |
| "learning_rate": 9.314800771548064e-06, |
| "loss": 0.1049, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.8439490445859873, |
| "grad_norm": 0.7263916504334191, |
| "learning_rate": 9.31407841203688e-06, |
| "loss": 0.1025, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8444040036396724, |
| "grad_norm": 0.6276487176726284, |
| "learning_rate": 9.31335569999869e-06, |
| "loss": 0.0587, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.8448589626933576, |
| "grad_norm": 0.6171084743549562, |
| "learning_rate": 9.31263263549255e-06, |
| "loss": 0.0495, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.8453139217470428, |
| "grad_norm": 0.6730791565382994, |
| "learning_rate": 9.31190921857755e-06, |
| "loss": 0.0789, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.845768880800728, |
| "grad_norm": 0.7874386993734893, |
| "learning_rate": 9.311185449312798e-06, |
| "loss": 0.088, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.8462238398544131, |
| "grad_norm": 0.5073783803158326, |
| "learning_rate": 9.310461327757442e-06, |
| "loss": 0.0561, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8466787989080983, |
| "grad_norm": 0.6051266904327832, |
| "learning_rate": 9.309736853970652e-06, |
| "loss": 0.0688, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.8471337579617835, |
| "grad_norm": 1.0483500354699085, |
| "learning_rate": 9.309012028011628e-06, |
| "loss": 0.1346, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.8475887170154686, |
| "grad_norm": 0.9049471090474998, |
| "learning_rate": 9.3082868499396e-06, |
| "loss": 0.0986, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.8480436760691538, |
| "grad_norm": 0.47381125867485346, |
| "learning_rate": 9.307561319813829e-06, |
| "loss": 0.058, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.848498635122839, |
| "grad_norm": 0.7964538075850383, |
| "learning_rate": 9.306835437693597e-06, |
| "loss": 0.0829, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.8489535941765242, |
| "grad_norm": 0.9919343521297046, |
| "learning_rate": 9.306109203638225e-06, |
| "loss": 0.0885, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.8494085532302093, |
| "grad_norm": 1.4502514405100166, |
| "learning_rate": 9.305382617707052e-06, |
| "loss": 0.1023, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.8498635122838945, |
| "grad_norm": 0.7238180713867792, |
| "learning_rate": 9.304655679959459e-06, |
| "loss": 0.0813, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.8503184713375797, |
| "grad_norm": 0.7360849022013412, |
| "learning_rate": 9.303928390454839e-06, |
| "loss": 0.0671, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.8507734303912647, |
| "grad_norm": 0.5803360108595549, |
| "learning_rate": 9.30320074925263e-06, |
| "loss": 0.075, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8512283894449499, |
| "grad_norm": 0.6838093346854254, |
| "learning_rate": 9.302472756412288e-06, |
| "loss": 0.0812, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.8516833484986351, |
| "grad_norm": 0.8850924783689049, |
| "learning_rate": 9.301744411993302e-06, |
| "loss": 0.0991, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.8521383075523203, |
| "grad_norm": 0.8273381019086633, |
| "learning_rate": 9.30101571605519e-06, |
| "loss": 0.0803, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.8525932666060054, |
| "grad_norm": 0.6554434764444423, |
| "learning_rate": 9.300286668657495e-06, |
| "loss": 0.0737, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.8530482256596906, |
| "grad_norm": 0.8230660869280486, |
| "learning_rate": 9.299557269859795e-06, |
| "loss": 0.0748, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.8535031847133758, |
| "grad_norm": 0.609738768294497, |
| "learning_rate": 9.298827519721692e-06, |
| "loss": 0.0608, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.8539581437670609, |
| "grad_norm": 0.7433208516076715, |
| "learning_rate": 9.298097418302817e-06, |
| "loss": 0.0992, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.8544131028207461, |
| "grad_norm": 0.5414027711398505, |
| "learning_rate": 9.29736696566283e-06, |
| "loss": 0.0642, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.8548680618744313, |
| "grad_norm": 0.8950820233319129, |
| "learning_rate": 9.296636161861422e-06, |
| "loss": 0.1121, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.8553230209281165, |
| "grad_norm": 2.0225500877401617, |
| "learning_rate": 9.295905006958308e-06, |
| "loss": 0.1409, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8557779799818016, |
| "grad_norm": 0.7783660516278756, |
| "learning_rate": 9.295173501013239e-06, |
| "loss": 0.0974, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.8562329390354868, |
| "grad_norm": 0.7064043776078144, |
| "learning_rate": 9.29444164408599e-06, |
| "loss": 0.0954, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.856687898089172, |
| "grad_norm": 0.6658976396134992, |
| "learning_rate": 9.29370943623636e-06, |
| "loss": 0.0636, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.6825106501213147, |
| "learning_rate": 9.292976877524189e-06, |
| "loss": 0.0908, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.8575978161965423, |
| "grad_norm": 0.8132731569130554, |
| "learning_rate": 9.292243968009332e-06, |
| "loss": 0.0952, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.8580527752502275, |
| "grad_norm": 1.283740720887758, |
| "learning_rate": 9.29151070775168e-06, |
| "loss": 0.1407, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.8585077343039127, |
| "grad_norm": 0.8987444265022443, |
| "learning_rate": 9.290777096811156e-06, |
| "loss": 0.1008, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.8589626933575978, |
| "grad_norm": 0.9027753674161602, |
| "learning_rate": 9.290043135247704e-06, |
| "loss": 0.0917, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.859417652411283, |
| "grad_norm": 0.7721264653335534, |
| "learning_rate": 9.289308823121302e-06, |
| "loss": 0.0876, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.8598726114649682, |
| "grad_norm": 0.8645055674602313, |
| "learning_rate": 9.28857416049195e-06, |
| "loss": 0.0775, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8603275705186533, |
| "grad_norm": 0.7828026058785104, |
| "learning_rate": 9.287839147419685e-06, |
| "loss": 0.0953, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.8607825295723385, |
| "grad_norm": 0.7581321197025821, |
| "learning_rate": 9.287103783964571e-06, |
| "loss": 0.1004, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.8612374886260237, |
| "grad_norm": 0.5836098633522236, |
| "learning_rate": 9.286368070186696e-06, |
| "loss": 0.0586, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.8616924476797089, |
| "grad_norm": 0.8102404855384281, |
| "learning_rate": 9.285632006146178e-06, |
| "loss": 0.0809, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.862147406733394, |
| "grad_norm": 0.5684276012396848, |
| "learning_rate": 9.284895591903167e-06, |
| "loss": 0.0736, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.8626023657870792, |
| "grad_norm": 0.629014301705328, |
| "learning_rate": 9.284158827517838e-06, |
| "loss": 0.0707, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.8630573248407644, |
| "grad_norm": 0.6150335967135018, |
| "learning_rate": 9.283421713050398e-06, |
| "loss": 0.0665, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.8635122838944495, |
| "grad_norm": 0.7977181385850289, |
| "learning_rate": 9.282684248561078e-06, |
| "loss": 0.1077, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.8639672429481347, |
| "grad_norm": 0.5184482645002529, |
| "learning_rate": 9.281946434110141e-06, |
| "loss": 0.0594, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.8644222020018199, |
| "grad_norm": 0.7148270230091635, |
| "learning_rate": 9.28120826975788e-06, |
| "loss": 0.1005, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.864877161055505, |
| "grad_norm": 0.6020497479816633, |
| "learning_rate": 9.280469755564613e-06, |
| "loss": 0.0595, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.8653321201091901, |
| "grad_norm": 0.7725143836000526, |
| "learning_rate": 9.279730891590688e-06, |
| "loss": 0.063, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.8657870791628753, |
| "grad_norm": 0.5341160118168524, |
| "learning_rate": 9.27899167789648e-06, |
| "loss": 0.0649, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.8662420382165605, |
| "grad_norm": 0.78025783272878, |
| "learning_rate": 9.278252114542398e-06, |
| "loss": 0.0987, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.8666969972702456, |
| "grad_norm": 1.0383225939834173, |
| "learning_rate": 9.277512201588871e-06, |
| "loss": 0.1532, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.8671519563239308, |
| "grad_norm": 0.742851971816876, |
| "learning_rate": 9.276771939096367e-06, |
| "loss": 0.1083, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.867606915377616, |
| "grad_norm": 0.6246586544484709, |
| "learning_rate": 9.276031327125371e-06, |
| "loss": 0.0798, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.8680618744313012, |
| "grad_norm": 0.6937230711216974, |
| "learning_rate": 9.275290365736408e-06, |
| "loss": 0.0764, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.8685168334849863, |
| "grad_norm": 0.6405216327010745, |
| "learning_rate": 9.274549054990022e-06, |
| "loss": 0.0553, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.8689717925386715, |
| "grad_norm": 0.6118088958703919, |
| "learning_rate": 9.273807394946791e-06, |
| "loss": 0.0719, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8694267515923567, |
| "grad_norm": 0.5929451056907732, |
| "learning_rate": 9.27306538566732e-06, |
| "loss": 0.0736, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.8698817106460418, |
| "grad_norm": 0.551189089448713, |
| "learning_rate": 9.272323027212244e-06, |
| "loss": 0.0802, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.870336669699727, |
| "grad_norm": 0.6964950682522272, |
| "learning_rate": 9.271580319642221e-06, |
| "loss": 0.0956, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.8707916287534122, |
| "grad_norm": 0.656523844824833, |
| "learning_rate": 9.270837263017947e-06, |
| "loss": 0.0716, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.8712465878070974, |
| "grad_norm": 0.5516956702822526, |
| "learning_rate": 9.270093857400138e-06, |
| "loss": 0.0756, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.8717015468607825, |
| "grad_norm": 0.6458984664434074, |
| "learning_rate": 9.269350102849542e-06, |
| "loss": 0.0762, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.8721565059144677, |
| "grad_norm": 0.6244797606471136, |
| "learning_rate": 9.268605999426936e-06, |
| "loss": 0.066, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.8726114649681529, |
| "grad_norm": 1.3051429800547985, |
| "learning_rate": 9.267861547193126e-06, |
| "loss": 0.1487, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.873066424021838, |
| "grad_norm": 0.9503536634109886, |
| "learning_rate": 9.267116746208944e-06, |
| "loss": 0.1088, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.8735213830755232, |
| "grad_norm": 0.6872044557187451, |
| "learning_rate": 9.26637159653525e-06, |
| "loss": 0.0952, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8739763421292084, |
| "grad_norm": 0.8261797174841458, |
| "learning_rate": 9.265626098232934e-06, |
| "loss": 0.0917, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.8744313011828936, |
| "grad_norm": 0.6285868744907084, |
| "learning_rate": 9.26488025136292e-06, |
| "loss": 0.0736, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.8748862602365787, |
| "grad_norm": 0.95408072866655, |
| "learning_rate": 9.264134055986152e-06, |
| "loss": 0.09, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.8753412192902639, |
| "grad_norm": 0.8126928412084633, |
| "learning_rate": 9.263387512163604e-06, |
| "loss": 0.0861, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.8757961783439491, |
| "grad_norm": 0.628340619476289, |
| "learning_rate": 9.262640619956282e-06, |
| "loss": 0.0853, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.8762511373976342, |
| "grad_norm": 0.822645279842771, |
| "learning_rate": 9.261893379425218e-06, |
| "loss": 0.0921, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.8767060964513194, |
| "grad_norm": 0.664699910134531, |
| "learning_rate": 9.261145790631475e-06, |
| "loss": 0.0661, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.8771610555050046, |
| "grad_norm": 0.46120202232971963, |
| "learning_rate": 9.26039785363614e-06, |
| "loss": 0.0548, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.8776160145586898, |
| "grad_norm": 0.47348608915538554, |
| "learning_rate": 9.259649568500333e-06, |
| "loss": 0.0579, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.8780709736123748, |
| "grad_norm": 0.5421377090850338, |
| "learning_rate": 9.258900935285199e-06, |
| "loss": 0.0591, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.87852593266606, |
| "grad_norm": 0.5523212054660892, |
| "learning_rate": 9.258151954051914e-06, |
| "loss": 0.0757, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.8789808917197452, |
| "grad_norm": 0.733320680764707, |
| "learning_rate": 9.25740262486168e-06, |
| "loss": 0.0999, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.8794358507734303, |
| "grad_norm": 0.5636961368288687, |
| "learning_rate": 9.25665294777573e-06, |
| "loss": 0.0525, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.8798908098271155, |
| "grad_norm": 0.5613709035035684, |
| "learning_rate": 9.255902922855326e-06, |
| "loss": 0.0512, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.8803457688808007, |
| "grad_norm": 0.6266000159117329, |
| "learning_rate": 9.255152550161753e-06, |
| "loss": 0.0714, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.8808007279344859, |
| "grad_norm": 0.5624931761265524, |
| "learning_rate": 9.25440182975633e-06, |
| "loss": 0.0667, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.881255686988171, |
| "grad_norm": 0.8855653361345076, |
| "learning_rate": 9.253650761700401e-06, |
| "loss": 0.1104, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.8817106460418562, |
| "grad_norm": 0.4051324158485566, |
| "learning_rate": 9.252899346055343e-06, |
| "loss": 0.0447, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.8821656050955414, |
| "grad_norm": 0.6705030425420828, |
| "learning_rate": 9.252147582882556e-06, |
| "loss": 0.08, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.8826205641492265, |
| "grad_norm": 0.745395756906896, |
| "learning_rate": 9.25139547224347e-06, |
| "loss": 0.0892, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.8830755232029117, |
| "grad_norm": 0.9577657000178205, |
| "learning_rate": 9.250643014199547e-06, |
| "loss": 0.1144, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.8835304822565969, |
| "grad_norm": 0.6774410545148242, |
| "learning_rate": 9.24989020881227e-06, |
| "loss": 0.0753, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.8839854413102821, |
| "grad_norm": 0.7409774305157982, |
| "learning_rate": 9.249137056143159e-06, |
| "loss": 0.0722, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.8844404003639672, |
| "grad_norm": 0.6042335346844097, |
| "learning_rate": 9.248383556253758e-06, |
| "loss": 0.0775, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.8848953594176524, |
| "grad_norm": 0.8396643903072698, |
| "learning_rate": 9.247629709205635e-06, |
| "loss": 0.1051, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.8853503184713376, |
| "grad_norm": 0.6590167845553623, |
| "learning_rate": 9.246875515060396e-06, |
| "loss": 0.0774, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.8858052775250227, |
| "grad_norm": 0.5876827286169646, |
| "learning_rate": 9.24612097387967e-06, |
| "loss": 0.0768, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.8862602365787079, |
| "grad_norm": 0.8894868784932225, |
| "learning_rate": 9.245366085725111e-06, |
| "loss": 0.0983, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.8867151956323931, |
| "grad_norm": 0.5389319757607208, |
| "learning_rate": 9.24461085065841e-06, |
| "loss": 0.0571, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.8871701546860783, |
| "grad_norm": 0.4677621224916707, |
| "learning_rate": 9.243855268741275e-06, |
| "loss": 0.0534, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.8876251137397634, |
| "grad_norm": 0.6166575793819061, |
| "learning_rate": 9.243099340035454e-06, |
| "loss": 0.0679, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.8880800727934486, |
| "grad_norm": 0.684219803564928, |
| "learning_rate": 9.242343064602719e-06, |
| "loss": 0.0797, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.8885350318471338, |
| "grad_norm": 0.6543060915410528, |
| "learning_rate": 9.241586442504865e-06, |
| "loss": 0.0876, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.8889899909008189, |
| "grad_norm": 0.6916358607655352, |
| "learning_rate": 9.240829473803723e-06, |
| "loss": 0.0816, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.8894449499545041, |
| "grad_norm": 0.6650683160408256, |
| "learning_rate": 9.240072158561146e-06, |
| "loss": 0.0851, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.8898999090081893, |
| "grad_norm": 0.8336397769475173, |
| "learning_rate": 9.239314496839022e-06, |
| "loss": 0.1075, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.8903548680618745, |
| "grad_norm": 0.6498784190415388, |
| "learning_rate": 9.23855648869926e-06, |
| "loss": 0.0748, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.8908098271155596, |
| "grad_norm": 0.7894795440995916, |
| "learning_rate": 9.237798134203803e-06, |
| "loss": 0.1045, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.8912647861692448, |
| "grad_norm": 0.5980997509859944, |
| "learning_rate": 9.237039433414623e-06, |
| "loss": 0.079, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.89171974522293, |
| "grad_norm": 0.8222326498301533, |
| "learning_rate": 9.236280386393712e-06, |
| "loss": 0.082, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.892174704276615, |
| "grad_norm": 0.6293204676003961, |
| "learning_rate": 9.2355209932031e-06, |
| "loss": 0.0741, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.8926296633303002, |
| "grad_norm": 0.47863668175134233, |
| "learning_rate": 9.23476125390484e-06, |
| "loss": 0.0524, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.8930846223839854, |
| "grad_norm": 0.7798093326874596, |
| "learning_rate": 9.234001168561013e-06, |
| "loss": 0.0691, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.8935395814376706, |
| "grad_norm": 0.7301612531501247, |
| "learning_rate": 9.233240737233733e-06, |
| "loss": 0.0965, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.8939945404913557, |
| "grad_norm": 1.0452984923884894, |
| "learning_rate": 9.232479959985136e-06, |
| "loss": 0.1293, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.8944494995450409, |
| "grad_norm": 0.6963389022030017, |
| "learning_rate": 9.23171883687739e-06, |
| "loss": 0.0767, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.8949044585987261, |
| "grad_norm": 0.45171069390219404, |
| "learning_rate": 9.23095736797269e-06, |
| "loss": 0.0522, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.8953594176524113, |
| "grad_norm": 1.0061313103020273, |
| "learning_rate": 9.230195553333263e-06, |
| "loss": 0.1277, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.8958143767060964, |
| "grad_norm": 1.5986138982364897, |
| "learning_rate": 9.229433393021358e-06, |
| "loss": 0.1405, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.8962693357597816, |
| "grad_norm": 0.6908357505139043, |
| "learning_rate": 9.228670887099256e-06, |
| "loss": 0.0739, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.8967242948134668, |
| "grad_norm": 0.5277345258701365, |
| "learning_rate": 9.227908035629266e-06, |
| "loss": 0.0526, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.8971792538671519, |
| "grad_norm": 0.6285224648148875, |
| "learning_rate": 9.227144838673724e-06, |
| "loss": 0.0706, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.8976342129208371, |
| "grad_norm": 0.949308919855668, |
| "learning_rate": 9.226381296294995e-06, |
| "loss": 0.1045, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.8980891719745223, |
| "grad_norm": 0.752138900094858, |
| "learning_rate": 9.225617408555471e-06, |
| "loss": 0.0907, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.8985441310282075, |
| "grad_norm": 0.9650799951574368, |
| "learning_rate": 9.224853175517578e-06, |
| "loss": 0.1261, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.8989990900818926, |
| "grad_norm": 0.6368811817284902, |
| "learning_rate": 9.224088597243762e-06, |
| "loss": 0.0759, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.8994540491355778, |
| "grad_norm": 0.7403608884362824, |
| "learning_rate": 9.223323673796503e-06, |
| "loss": 0.081, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.899909008189263, |
| "grad_norm": 0.8033696439311833, |
| "learning_rate": 9.222558405238303e-06, |
| "loss": 0.0968, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.9003639672429481, |
| "grad_norm": 0.7306511821068437, |
| "learning_rate": 9.2217927916317e-06, |
| "loss": 0.0916, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.9008189262966333, |
| "grad_norm": 0.8380967239417318, |
| "learning_rate": 9.221026833039256e-06, |
| "loss": 0.0945, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9012738853503185, |
| "grad_norm": 0.7718744506924977, |
| "learning_rate": 9.220260529523561e-06, |
| "loss": 0.0918, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.9017288444040037, |
| "grad_norm": 0.7393925382776323, |
| "learning_rate": 9.219493881147234e-06, |
| "loss": 0.0816, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.9021838034576888, |
| "grad_norm": 0.7687427983757074, |
| "learning_rate": 9.218726887972923e-06, |
| "loss": 0.0835, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.902638762511374, |
| "grad_norm": 0.6785077320109779, |
| "learning_rate": 9.2179595500633e-06, |
| "loss": 0.0799, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.9030937215650592, |
| "grad_norm": 0.9172539926736025, |
| "learning_rate": 9.217191867481072e-06, |
| "loss": 0.1147, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.9035486806187443, |
| "grad_norm": 0.9222679238503178, |
| "learning_rate": 9.21642384028897e-06, |
| "loss": 0.127, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.9040036396724295, |
| "grad_norm": 0.8844523810912496, |
| "learning_rate": 9.215655468549752e-06, |
| "loss": 0.1013, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.9044585987261147, |
| "grad_norm": 0.5874811797706115, |
| "learning_rate": 9.214886752326208e-06, |
| "loss": 0.0528, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.9049135577797999, |
| "grad_norm": 0.6774186522730414, |
| "learning_rate": 9.214117691681152e-06, |
| "loss": 0.0749, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.905368516833485, |
| "grad_norm": 0.46678264083336873, |
| "learning_rate": 9.213348286677429e-06, |
| "loss": 0.0502, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.9058234758871702, |
| "grad_norm": 0.6369505909634797, |
| "learning_rate": 9.21257853737791e-06, |
| "loss": 0.0597, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.9062784349408554, |
| "grad_norm": 0.7872482528902512, |
| "learning_rate": 9.211808443845499e-06, |
| "loss": 0.0842, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.9067333939945404, |
| "grad_norm": 0.6991340678786092, |
| "learning_rate": 9.211038006143121e-06, |
| "loss": 0.0714, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.9071883530482256, |
| "grad_norm": 0.5842126029431552, |
| "learning_rate": 9.210267224333735e-06, |
| "loss": 0.0686, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.9076433121019108, |
| "grad_norm": 0.6405241386542652, |
| "learning_rate": 9.209496098480324e-06, |
| "loss": 0.0843, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.908098271155596, |
| "grad_norm": 0.6431855863004138, |
| "learning_rate": 9.208724628645901e-06, |
| "loss": 0.0781, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.9085532302092811, |
| "grad_norm": 0.6571372788631167, |
| "learning_rate": 9.207952814893511e-06, |
| "loss": 0.0746, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.9090081892629663, |
| "grad_norm": 0.6228847041781231, |
| "learning_rate": 9.207180657286216e-06, |
| "loss": 0.0563, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.9094631483166515, |
| "grad_norm": 0.6649592874484661, |
| "learning_rate": 9.20640815588712e-06, |
| "loss": 0.0737, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.9099181073703366, |
| "grad_norm": 0.6395827893566276, |
| "learning_rate": 9.205635310759344e-06, |
| "loss": 0.0864, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9103730664240218, |
| "grad_norm": 0.6470816609318947, |
| "learning_rate": 9.204862121966044e-06, |
| "loss": 0.0819, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.910828025477707, |
| "grad_norm": 0.6954176357821441, |
| "learning_rate": 9.2040885895704e-06, |
| "loss": 0.0935, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.9112829845313922, |
| "grad_norm": 0.5250024400720148, |
| "learning_rate": 9.203314713635621e-06, |
| "loss": 0.0521, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.9117379435850773, |
| "grad_norm": 0.6765818316745539, |
| "learning_rate": 9.202540494224946e-06, |
| "loss": 0.1078, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.9121929026387625, |
| "grad_norm": 0.7602463030942905, |
| "learning_rate": 9.20176593140164e-06, |
| "loss": 0.068, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.9126478616924477, |
| "grad_norm": 0.4564764883431911, |
| "learning_rate": 9.200991025228998e-06, |
| "loss": 0.0576, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.9131028207461328, |
| "grad_norm": 0.87338946860691, |
| "learning_rate": 9.20021577577034e-06, |
| "loss": 0.1155, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.913557779799818, |
| "grad_norm": 0.67443699378812, |
| "learning_rate": 9.199440183089019e-06, |
| "loss": 0.0803, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.9140127388535032, |
| "grad_norm": 0.697779741574365, |
| "learning_rate": 9.198664247248408e-06, |
| "loss": 0.0886, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.9144676979071884, |
| "grad_norm": 0.6888292123310293, |
| "learning_rate": 9.197887968311917e-06, |
| "loss": 0.088, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9149226569608735, |
| "grad_norm": 0.593887211300783, |
| "learning_rate": 9.197111346342979e-06, |
| "loss": 0.0597, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.9153776160145587, |
| "grad_norm": 0.5222048906208826, |
| "learning_rate": 9.196334381405055e-06, |
| "loss": 0.055, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.9158325750682439, |
| "grad_norm": 0.7406902681131339, |
| "learning_rate": 9.195557073561636e-06, |
| "loss": 0.0725, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.916287534121929, |
| "grad_norm": 0.7369752030698005, |
| "learning_rate": 9.194779422876242e-06, |
| "loss": 0.0725, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.9167424931756142, |
| "grad_norm": 0.5674786098045346, |
| "learning_rate": 9.194001429412414e-06, |
| "loss": 0.0528, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.9171974522292994, |
| "grad_norm": 0.9561188233992612, |
| "learning_rate": 9.19322309323373e-06, |
| "loss": 0.1213, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.9176524112829846, |
| "grad_norm": 0.7666480467189352, |
| "learning_rate": 9.192444414403792e-06, |
| "loss": 0.0788, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.9181073703366697, |
| "grad_norm": 1.0242939804657472, |
| "learning_rate": 9.19166539298623e-06, |
| "loss": 0.1341, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.9185623293903549, |
| "grad_norm": 0.6407407288510717, |
| "learning_rate": 9.1908860290447e-06, |
| "loss": 0.0702, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.9190172884440401, |
| "grad_norm": 0.9262978099585683, |
| "learning_rate": 9.190106322642888e-06, |
| "loss": 0.0962, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9194722474977252, |
| "grad_norm": 0.6371294810639554, |
| "learning_rate": 9.189326273844512e-06, |
| "loss": 0.0716, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.9199272065514104, |
| "grad_norm": 0.616042736799084, |
| "learning_rate": 9.18854588271331e-06, |
| "loss": 0.0697, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.9203821656050956, |
| "grad_norm": 0.8652881040430276, |
| "learning_rate": 9.187765149313057e-06, |
| "loss": 0.0949, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.9208371246587808, |
| "grad_norm": 0.7171212404467417, |
| "learning_rate": 9.186984073707545e-06, |
| "loss": 0.0685, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.9212920837124658, |
| "grad_norm": 0.6434040420425213, |
| "learning_rate": 9.186202655960603e-06, |
| "loss": 0.0774, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.921747042766151, |
| "grad_norm": 0.6537324523008204, |
| "learning_rate": 9.185420896136086e-06, |
| "loss": 0.0786, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.9222020018198362, |
| "grad_norm": 0.6271186642997567, |
| "learning_rate": 9.184638794297873e-06, |
| "loss": 0.0636, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.9226569608735213, |
| "grad_norm": 0.7041069370791754, |
| "learning_rate": 9.183856350509877e-06, |
| "loss": 0.0809, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.9231119199272065, |
| "grad_norm": 0.8781019574614535, |
| "learning_rate": 9.183073564836033e-06, |
| "loss": 0.1051, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.9235668789808917, |
| "grad_norm": 0.48818413319632054, |
| "learning_rate": 9.182290437340308e-06, |
| "loss": 0.0474, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9240218380345769, |
| "grad_norm": 0.8775797840737246, |
| "learning_rate": 9.181506968086696e-06, |
| "loss": 0.0949, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.924476797088262, |
| "grad_norm": 0.958612912496998, |
| "learning_rate": 9.180723157139218e-06, |
| "loss": 0.121, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.9249317561419472, |
| "grad_norm": 0.6245762602830833, |
| "learning_rate": 9.179939004561925e-06, |
| "loss": 0.0655, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.9253867151956324, |
| "grad_norm": 0.5017046465493271, |
| "learning_rate": 9.17915451041889e-06, |
| "loss": 0.0661, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.9258416742493175, |
| "grad_norm": 0.710064858137144, |
| "learning_rate": 9.178369674774224e-06, |
| "loss": 0.0791, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.9262966333030027, |
| "grad_norm": 0.587851189554333, |
| "learning_rate": 9.177584497692056e-06, |
| "loss": 0.0637, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.9267515923566879, |
| "grad_norm": 1.3023478543600886, |
| "learning_rate": 9.176798979236548e-06, |
| "loss": 0.1095, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.9272065514103731, |
| "grad_norm": 0.540716658575828, |
| "learning_rate": 9.17601311947189e-06, |
| "loss": 0.0693, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.9276615104640582, |
| "grad_norm": 0.6208372361565256, |
| "learning_rate": 9.175226918462298e-06, |
| "loss": 0.0718, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.9281164695177434, |
| "grad_norm": 0.7701609774864682, |
| "learning_rate": 9.174440376272021e-06, |
| "loss": 0.0976, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 0.7010494768516853, |
| "learning_rate": 9.173653492965325e-06, |
| "loss": 0.0993, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.9290263876251137, |
| "grad_norm": 0.6373763175184742, |
| "learning_rate": 9.172866268606514e-06, |
| "loss": 0.0724, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.9294813466787989, |
| "grad_norm": 0.701200286737339, |
| "learning_rate": 9.172078703259917e-06, |
| "loss": 0.0825, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.9299363057324841, |
| "grad_norm": 0.4368340952860916, |
| "learning_rate": 9.171290796989887e-06, |
| "loss": 0.0477, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.9303912647861693, |
| "grad_norm": 0.6370651977402901, |
| "learning_rate": 9.170502549860813e-06, |
| "loss": 0.0796, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.9308462238398544, |
| "grad_norm": 1.1692149001382897, |
| "learning_rate": 9.169713961937104e-06, |
| "loss": 0.122, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.9313011828935396, |
| "grad_norm": 0.694595823352437, |
| "learning_rate": 9.168925033283199e-06, |
| "loss": 0.0935, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.9317561419472248, |
| "grad_norm": 0.672175800896758, |
| "learning_rate": 9.168135763963567e-06, |
| "loss": 0.0763, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.9322111010009099, |
| "grad_norm": 0.5254037194744254, |
| "learning_rate": 9.167346154042705e-06, |
| "loss": 0.0535, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.9326660600545951, |
| "grad_norm": 0.6788074343357934, |
| "learning_rate": 9.166556203585134e-06, |
| "loss": 0.0804, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9331210191082803, |
| "grad_norm": 0.6950456412782345, |
| "learning_rate": 9.165765912655407e-06, |
| "loss": 0.0727, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.9335759781619655, |
| "grad_norm": 0.8037111447772672, |
| "learning_rate": 9.1649752813181e-06, |
| "loss": 0.0811, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.9340309372156506, |
| "grad_norm": 0.6043473581913603, |
| "learning_rate": 9.164184309637824e-06, |
| "loss": 0.0773, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.9344858962693358, |
| "grad_norm": 0.6914300193057683, |
| "learning_rate": 9.16339299767921e-06, |
| "loss": 0.0888, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.934940855323021, |
| "grad_norm": 0.5973299516809696, |
| "learning_rate": 9.162601345506923e-06, |
| "loss": 0.0771, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.935395814376706, |
| "grad_norm": 0.5667027927032561, |
| "learning_rate": 9.161809353185651e-06, |
| "loss": 0.0589, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.9358507734303912, |
| "grad_norm": 0.5892355686848351, |
| "learning_rate": 9.161017020780114e-06, |
| "loss": 0.0562, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.9363057324840764, |
| "grad_norm": 0.8503563061945567, |
| "learning_rate": 9.160224348355057e-06, |
| "loss": 0.1075, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.9367606915377616, |
| "grad_norm": 0.8030569297687169, |
| "learning_rate": 9.159431335975255e-06, |
| "loss": 0.0651, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.9372156505914467, |
| "grad_norm": 0.6182029602806504, |
| "learning_rate": 9.158637983705505e-06, |
| "loss": 0.0908, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9376706096451319, |
| "grad_norm": 0.6167088007283392, |
| "learning_rate": 9.157844291610641e-06, |
| "loss": 0.0719, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.9381255686988171, |
| "grad_norm": 1.0378949375185438, |
| "learning_rate": 9.157050259755519e-06, |
| "loss": 0.0925, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.9385805277525022, |
| "grad_norm": 0.6009053311569907, |
| "learning_rate": 9.156255888205021e-06, |
| "loss": 0.0868, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.9390354868061874, |
| "grad_norm": 0.6730461926983252, |
| "learning_rate": 9.155461177024062e-06, |
| "loss": 0.0791, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.9394904458598726, |
| "grad_norm": 0.8310142050561945, |
| "learning_rate": 9.154666126277582e-06, |
| "loss": 0.0882, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.9399454049135578, |
| "grad_norm": 0.5455153208822874, |
| "learning_rate": 9.153870736030549e-06, |
| "loss": 0.0651, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.9404003639672429, |
| "grad_norm": 0.8245922923142007, |
| "learning_rate": 9.153075006347957e-06, |
| "loss": 0.1357, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.9408553230209281, |
| "grad_norm": 0.7891736693746195, |
| "learning_rate": 9.15227893729483e-06, |
| "loss": 0.0879, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.9413102820746133, |
| "grad_norm": 0.6032022964433661, |
| "learning_rate": 9.151482528936222e-06, |
| "loss": 0.0594, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.9417652411282984, |
| "grad_norm": 0.8087071917107507, |
| "learning_rate": 9.150685781337207e-06, |
| "loss": 0.0872, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9422202001819836, |
| "grad_norm": 1.1875700013397057, |
| "learning_rate": 9.149888694562896e-06, |
| "loss": 0.1447, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.9426751592356688, |
| "grad_norm": 0.7351727785498874, |
| "learning_rate": 9.149091268678423e-06, |
| "loss": 0.0708, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.943130118289354, |
| "grad_norm": 0.6792286796417435, |
| "learning_rate": 9.148293503748947e-06, |
| "loss": 0.0876, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.9435850773430391, |
| "grad_norm": 0.7417762096300724, |
| "learning_rate": 9.14749539983966e-06, |
| "loss": 0.0852, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.9440400363967243, |
| "grad_norm": 0.5155173170030183, |
| "learning_rate": 9.146696957015777e-06, |
| "loss": 0.0606, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.9444949954504095, |
| "grad_norm": 1.1023064832096257, |
| "learning_rate": 9.145898175342545e-06, |
| "loss": 0.1488, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.9449499545040946, |
| "grad_norm": 0.6914694719967308, |
| "learning_rate": 9.145099054885238e-06, |
| "loss": 0.0816, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.9454049135577798, |
| "grad_norm": 0.6905933706764309, |
| "learning_rate": 9.144299595709156e-06, |
| "loss": 0.0876, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.945859872611465, |
| "grad_norm": 0.5233906895741112, |
| "learning_rate": 9.143499797879626e-06, |
| "loss": 0.0562, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.9463148316651502, |
| "grad_norm": 0.5101515836442003, |
| "learning_rate": 9.142699661462005e-06, |
| "loss": 0.0559, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.9467697907188353, |
| "grad_norm": 0.48017157157527135, |
| "learning_rate": 9.141899186521675e-06, |
| "loss": 0.0503, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.9472247497725205, |
| "grad_norm": 0.6592673728640894, |
| "learning_rate": 9.141098373124048e-06, |
| "loss": 0.0797, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.9476797088262057, |
| "grad_norm": 0.86432014477488, |
| "learning_rate": 9.140297221334562e-06, |
| "loss": 0.0858, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.9481346678798908, |
| "grad_norm": 1.0397141319559977, |
| "learning_rate": 9.139495731218685e-06, |
| "loss": 0.1198, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.948589626933576, |
| "grad_norm": 0.862052866017664, |
| "learning_rate": 9.138693902841914e-06, |
| "loss": 0.1056, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.9490445859872612, |
| "grad_norm": 0.7709077621401632, |
| "learning_rate": 9.137891736269764e-06, |
| "loss": 0.0918, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.9494995450409464, |
| "grad_norm": 0.8691294728765458, |
| "learning_rate": 9.137089231567789e-06, |
| "loss": 0.0925, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.9499545040946314, |
| "grad_norm": 0.6098999809715144, |
| "learning_rate": 9.136286388801564e-06, |
| "loss": 0.0673, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.9504094631483166, |
| "grad_norm": 0.7157788293123913, |
| "learning_rate": 9.135483208036695e-06, |
| "loss": 0.0802, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.9508644222020018, |
| "grad_norm": 0.9397853662008804, |
| "learning_rate": 9.134679689338814e-06, |
| "loss": 0.1021, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9513193812556869, |
| "grad_norm": 0.5449934450219076, |
| "learning_rate": 9.133875832773582e-06, |
| "loss": 0.0698, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.9517743403093721, |
| "grad_norm": 0.5678662789014983, |
| "learning_rate": 9.133071638406684e-06, |
| "loss": 0.0726, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.9522292993630573, |
| "grad_norm": 0.704718355722168, |
| "learning_rate": 9.132267106303836e-06, |
| "loss": 0.0949, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.9526842584167425, |
| "grad_norm": 0.7119333629649424, |
| "learning_rate": 9.131462236530784e-06, |
| "loss": 0.0815, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.9531392174704276, |
| "grad_norm": 0.9543831010874976, |
| "learning_rate": 9.130657029153293e-06, |
| "loss": 0.1037, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.9535941765241128, |
| "grad_norm": 0.4141088945678519, |
| "learning_rate": 9.129851484237165e-06, |
| "loss": 0.0438, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.954049135577798, |
| "grad_norm": 0.880955172212152, |
| "learning_rate": 9.129045601848222e-06, |
| "loss": 0.1139, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.9545040946314831, |
| "grad_norm": 0.5340666725025275, |
| "learning_rate": 9.12823938205232e-06, |
| "loss": 0.0662, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.9549590536851683, |
| "grad_norm": 0.7598809630255295, |
| "learning_rate": 9.127432824915339e-06, |
| "loss": 0.086, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.9554140127388535, |
| "grad_norm": 0.5889551801250265, |
| "learning_rate": 9.126625930503187e-06, |
| "loss": 0.0618, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9558689717925387, |
| "grad_norm": 0.7452095277301981, |
| "learning_rate": 9.125818698881798e-06, |
| "loss": 0.0846, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.9563239308462238, |
| "grad_norm": 0.874570701264544, |
| "learning_rate": 9.125011130117139e-06, |
| "loss": 0.0711, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.956778889899909, |
| "grad_norm": 0.6700889468480424, |
| "learning_rate": 9.124203224275198e-06, |
| "loss": 0.0771, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.9572338489535942, |
| "grad_norm": 0.5713697589917575, |
| "learning_rate": 9.123394981421995e-06, |
| "loss": 0.0647, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.9576888080072793, |
| "grad_norm": 0.7416406361243658, |
| "learning_rate": 9.122586401623574e-06, |
| "loss": 0.0797, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.9581437670609645, |
| "grad_norm": 0.8792771411195691, |
| "learning_rate": 9.12177748494601e-06, |
| "loss": 0.1043, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.9585987261146497, |
| "grad_norm": 0.8409261244287831, |
| "learning_rate": 9.120968231455406e-06, |
| "loss": 0.0968, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.9590536851683349, |
| "grad_norm": 0.588499824544961, |
| "learning_rate": 9.120158641217885e-06, |
| "loss": 0.0675, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.95950864422202, |
| "grad_norm": 0.5664840104040384, |
| "learning_rate": 9.119348714299607e-06, |
| "loss": 0.0721, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.9599636032757052, |
| "grad_norm": 0.7544363313105896, |
| "learning_rate": 9.118538450766755e-06, |
| "loss": 0.0723, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.9604185623293904, |
| "grad_norm": 0.6699256182505398, |
| "learning_rate": 9.117727850685541e-06, |
| "loss": 0.0669, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.9608735213830755, |
| "grad_norm": 0.5711605071447146, |
| "learning_rate": 9.116916914122202e-06, |
| "loss": 0.0637, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.9613284804367607, |
| "grad_norm": 0.6965803730129388, |
| "learning_rate": 9.116105641143005e-06, |
| "loss": 0.0744, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.9617834394904459, |
| "grad_norm": 0.8598026014818454, |
| "learning_rate": 9.115294031814242e-06, |
| "loss": 0.0937, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.9622383985441311, |
| "grad_norm": 0.5794082624701737, |
| "learning_rate": 9.114482086202236e-06, |
| "loss": 0.0675, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.9626933575978162, |
| "grad_norm": 0.7600807206599288, |
| "learning_rate": 9.113669804373335e-06, |
| "loss": 0.1047, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.9631483166515014, |
| "grad_norm": 0.6377342056356247, |
| "learning_rate": 9.112857186393913e-06, |
| "loss": 0.0676, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.9636032757051866, |
| "grad_norm": 1.1042469320816768, |
| "learning_rate": 9.112044232330377e-06, |
| "loss": 0.1508, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.9640582347588716, |
| "grad_norm": 0.817690744261235, |
| "learning_rate": 9.111230942249156e-06, |
| "loss": 0.0904, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.9645131938125568, |
| "grad_norm": 0.7037231293816442, |
| "learning_rate": 9.110417316216708e-06, |
| "loss": 0.0636, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.964968152866242, |
| "grad_norm": 0.6588945759110881, |
| "learning_rate": 9.10960335429952e-06, |
| "loss": 0.0684, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.9654231119199272, |
| "grad_norm": 0.6220308381200076, |
| "learning_rate": 9.108789056564105e-06, |
| "loss": 0.0877, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.9658780709736123, |
| "grad_norm": 0.6262721502493606, |
| "learning_rate": 9.107974423077001e-06, |
| "loss": 0.0642, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.9663330300272975, |
| "grad_norm": 0.9510165739511419, |
| "learning_rate": 9.107159453904781e-06, |
| "loss": 0.0994, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.9667879890809827, |
| "grad_norm": 0.7410601791583596, |
| "learning_rate": 9.10634414911404e-06, |
| "loss": 0.0751, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.9672429481346679, |
| "grad_norm": 0.592927363864185, |
| "learning_rate": 9.105528508771395e-06, |
| "loss": 0.0785, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.967697907188353, |
| "grad_norm": 0.704125884709214, |
| "learning_rate": 9.104712532943502e-06, |
| "loss": 0.0672, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.9681528662420382, |
| "grad_norm": 0.6763649668606744, |
| "learning_rate": 9.10389622169704e-06, |
| "loss": 0.0813, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.9686078252957234, |
| "grad_norm": 1.0481681916194059, |
| "learning_rate": 9.103079575098708e-06, |
| "loss": 0.1165, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.9690627843494085, |
| "grad_norm": 0.6244343397167454, |
| "learning_rate": 9.102262593215246e-06, |
| "loss": 0.0548, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.9695177434030937, |
| "grad_norm": 0.6662772517701377, |
| "learning_rate": 9.101445276113407e-06, |
| "loss": 0.0672, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.9699727024567789, |
| "grad_norm": 0.7302079833291476, |
| "learning_rate": 9.100627623859985e-06, |
| "loss": 0.0747, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.9704276615104641, |
| "grad_norm": 0.7003598456468986, |
| "learning_rate": 9.09980963652179e-06, |
| "loss": 0.0763, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.9708826205641492, |
| "grad_norm": 0.8675523177046712, |
| "learning_rate": 9.098991314165668e-06, |
| "loss": 0.1123, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.9713375796178344, |
| "grad_norm": 0.6531391716615499, |
| "learning_rate": 9.098172656858484e-06, |
| "loss": 0.0626, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.9717925386715196, |
| "grad_norm": 0.8230462520119928, |
| "learning_rate": 9.097353664667138e-06, |
| "loss": 0.0873, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.9722474977252047, |
| "grad_norm": 0.6524897158303723, |
| "learning_rate": 9.096534337658558e-06, |
| "loss": 0.0658, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.9727024567788899, |
| "grad_norm": 0.7421742040769631, |
| "learning_rate": 9.095714675899688e-06, |
| "loss": 0.0782, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.9731574158325751, |
| "grad_norm": 0.6400011673563383, |
| "learning_rate": 9.094894679457511e-06, |
| "loss": 0.0605, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.9736123748862603, |
| "grad_norm": 0.5825220963314399, |
| "learning_rate": 9.094074348399034e-06, |
| "loss": 0.0711, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.9740673339399454, |
| "grad_norm": 0.9652267063711952, |
| "learning_rate": 9.09325368279129e-06, |
| "loss": 0.0996, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.9745222929936306, |
| "grad_norm": 0.9291202899333796, |
| "learning_rate": 9.09243268270134e-06, |
| "loss": 0.0818, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.9749772520473158, |
| "grad_norm": 0.8799622533298002, |
| "learning_rate": 9.091611348196272e-06, |
| "loss": 0.0904, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.9754322111010009, |
| "grad_norm": 0.8326816067428606, |
| "learning_rate": 9.090789679343201e-06, |
| "loss": 0.0931, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.9758871701546861, |
| "grad_norm": 0.783000579713321, |
| "learning_rate": 9.089967676209274e-06, |
| "loss": 0.0879, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.9763421292083713, |
| "grad_norm": 0.7001846964382422, |
| "learning_rate": 9.089145338861657e-06, |
| "loss": 0.0916, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.9767970882620565, |
| "grad_norm": 0.953946241556791, |
| "learning_rate": 9.08832266736755e-06, |
| "loss": 0.1205, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.9772520473157416, |
| "grad_norm": 0.7358151559070641, |
| "learning_rate": 9.087499661794177e-06, |
| "loss": 0.0915, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.9777070063694268, |
| "grad_norm": 0.8142291270830226, |
| "learning_rate": 9.08667632220879e-06, |
| "loss": 0.0995, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.978161965423112, |
| "grad_norm": 0.7106034630801776, |
| "learning_rate": 9.08585264867867e-06, |
| "loss": 0.0783, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.978616924476797, |
| "grad_norm": 0.826812478555379, |
| "learning_rate": 9.085028641271123e-06, |
| "loss": 0.1058, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.9790718835304822, |
| "grad_norm": 0.8960647231942128, |
| "learning_rate": 9.084204300053483e-06, |
| "loss": 0.108, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.9795268425841674, |
| "grad_norm": 0.7308955491972883, |
| "learning_rate": 9.083379625093111e-06, |
| "loss": 0.0963, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.9799818016378526, |
| "grad_norm": 0.854998609995297, |
| "learning_rate": 9.082554616457397e-06, |
| "loss": 0.1031, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.9804367606915377, |
| "grad_norm": 0.6134903423880519, |
| "learning_rate": 9.081729274213758e-06, |
| "loss": 0.0728, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.9808917197452229, |
| "grad_norm": 0.7494461465991118, |
| "learning_rate": 9.080903598429634e-06, |
| "loss": 0.0612, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.9813466787989081, |
| "grad_norm": 0.6477350071161301, |
| "learning_rate": 9.080077589172496e-06, |
| "loss": 0.0725, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.9818016378525932, |
| "grad_norm": 0.5949372826775987, |
| "learning_rate": 9.079251246509846e-06, |
| "loss": 0.0618, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.9822565969062784, |
| "grad_norm": 1.0457437129682037, |
| "learning_rate": 9.078424570509202e-06, |
| "loss": 0.134, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.9827115559599636, |
| "grad_norm": 0.7562918714504535, |
| "learning_rate": 9.077597561238123e-06, |
| "loss": 0.0746, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.9831665150136488, |
| "grad_norm": 0.705691881874251, |
| "learning_rate": 9.076770218764186e-06, |
| "loss": 0.0903, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.9836214740673339, |
| "grad_norm": 0.700571619924188, |
| "learning_rate": 9.075942543154996e-06, |
| "loss": 0.0905, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.9840764331210191, |
| "grad_norm": 0.5178609664739039, |
| "learning_rate": 9.075114534478187e-06, |
| "loss": 0.0623, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.9845313921747043, |
| "grad_norm": 0.5564063525132696, |
| "learning_rate": 9.074286192801423e-06, |
| "loss": 0.0622, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.9849863512283894, |
| "grad_norm": 0.8390150599738658, |
| "learning_rate": 9.07345751819239e-06, |
| "loss": 0.0894, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.9854413102820746, |
| "grad_norm": 0.6899304429749638, |
| "learning_rate": 9.072628510718804e-06, |
| "loss": 0.0715, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.9858962693357598, |
| "grad_norm": 0.7215157855324703, |
| "learning_rate": 9.071799170448409e-06, |
| "loss": 0.0767, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.986351228389445, |
| "grad_norm": 0.5513970488289187, |
| "learning_rate": 9.070969497448972e-06, |
| "loss": 0.0586, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.9868061874431301, |
| "grad_norm": 0.5126138943457034, |
| "learning_rate": 9.070139491788295e-06, |
| "loss": 0.0686, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.9872611464968153, |
| "grad_norm": 0.7021455623884609, |
| "learning_rate": 9.069309153534196e-06, |
| "loss": 0.0853, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.9877161055505005, |
| "grad_norm": 0.8937932838828458, |
| "learning_rate": 9.068478482754532e-06, |
| "loss": 0.1229, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.9881710646041856, |
| "grad_norm": 0.7580326063736847, |
| "learning_rate": 9.067647479517179e-06, |
| "loss": 0.1176, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.9886260236578708, |
| "grad_norm": 0.854693695415459, |
| "learning_rate": 9.066816143890042e-06, |
| "loss": 0.0624, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.989080982711556, |
| "grad_norm": 0.691622087221906, |
| "learning_rate": 9.065984475941056e-06, |
| "loss": 0.0821, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.9895359417652412, |
| "grad_norm": 0.5701976798754824, |
| "learning_rate": 9.065152475738182e-06, |
| "loss": 0.0525, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.9899909008189263, |
| "grad_norm": 0.5280985607821013, |
| "learning_rate": 9.064320143349405e-06, |
| "loss": 0.0532, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.9904458598726115, |
| "grad_norm": 0.7270073505569681, |
| "learning_rate": 9.063487478842738e-06, |
| "loss": 0.0729, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.9909008189262967, |
| "grad_norm": 0.5397573476737881, |
| "learning_rate": 9.062654482286228e-06, |
| "loss": 0.0546, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.9913557779799818, |
| "grad_norm": 0.8280519656078903, |
| "learning_rate": 9.061821153747938e-06, |
| "loss": 0.0794, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.991810737033667, |
| "grad_norm": 0.6367661759018886, |
| "learning_rate": 9.060987493295967e-06, |
| "loss": 0.0679, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.9922656960873522, |
| "grad_norm": 0.7859239736098618, |
| "learning_rate": 9.060153500998438e-06, |
| "loss": 0.0958, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.9927206551410374, |
| "grad_norm": 0.8770748630020422, |
| "learning_rate": 9.0593191769235e-06, |
| "loss": 0.1037, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.9931756141947224, |
| "grad_norm": 0.5493767625809909, |
| "learning_rate": 9.05848452113933e-06, |
| "loss": 0.0535, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.9936305732484076, |
| "grad_norm": 1.0509546431486094, |
| "learning_rate": 9.057649533714134e-06, |
| "loss": 0.1136, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.9940855323020928, |
| "grad_norm": 0.8067366260983323, |
| "learning_rate": 9.056814214716143e-06, |
| "loss": 0.0911, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.9945404913557779, |
| "grad_norm": 0.6708197750921108, |
| "learning_rate": 9.055978564213614e-06, |
| "loss": 0.0737, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.9949954504094631, |
| "grad_norm": 1.0620824544949425, |
| "learning_rate": 9.055142582274831e-06, |
| "loss": 0.1035, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.9954504094631483, |
| "grad_norm": 0.7809645088567875, |
| "learning_rate": 9.054306268968111e-06, |
| "loss": 0.0964, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.9959053685168335, |
| "grad_norm": 0.6922882332723763, |
| "learning_rate": 9.053469624361793e-06, |
| "loss": 0.0769, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.9963603275705186, |
| "grad_norm": 0.6135634693459231, |
| "learning_rate": 9.052632648524242e-06, |
| "loss": 0.0857, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.9968152866242038, |
| "grad_norm": 0.7230383107997012, |
| "learning_rate": 9.051795341523852e-06, |
| "loss": 0.0666, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.997270245677889, |
| "grad_norm": 0.7702877397526973, |
| "learning_rate": 9.050957703429044e-06, |
| "loss": 0.0861, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.9977252047315741, |
| "grad_norm": 0.79537510756259, |
| "learning_rate": 9.050119734308266e-06, |
| "loss": 0.0906, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.9981801637852593, |
| "grad_norm": 0.6318589660625535, |
| "learning_rate": 9.049281434229995e-06, |
| "loss": 0.0821, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.9986351228389445, |
| "grad_norm": 0.6618836956269952, |
| "learning_rate": 9.048442803262731e-06, |
| "loss": 0.0748, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.9990900818926297, |
| "grad_norm": 0.5469592163366095, |
| "learning_rate": 9.047603841475003e-06, |
| "loss": 0.066, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.9995450409463148, |
| "grad_norm": 0.6279887796401853, |
| "learning_rate": 9.046764548935368e-06, |
| "loss": 0.0743, |
| "step": 2197 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.40519899960847033, |
| "learning_rate": 9.045924925712411e-06, |
| "loss": 0.0327, |
| "step": 2198 |
| }, |
| { |
| "epoch": 1.000454959053685, |
| "grad_norm": 0.41468311147935694, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.0243, |
| "step": 2199 |
| }, |
| { |
| "epoch": 1.0009099181073704, |
| "grad_norm": 0.5188055788021196, |
| "learning_rate": 9.04424468749099e-06, |
| "loss": 0.0375, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.0013648771610555, |
| "grad_norm": 0.4764585088866917, |
| "learning_rate": 9.04340407262983e-06, |
| "loss": 0.0395, |
| "step": 2201 |
| }, |
| { |
| "epoch": 1.0018198362147406, |
| "grad_norm": 0.28928828491344616, |
| "learning_rate": 9.042563127359946e-06, |
| "loss": 0.0208, |
| "step": 2202 |
| }, |
| { |
| "epoch": 1.0022747952684259, |
| "grad_norm": 0.5179468693343099, |
| "learning_rate": 9.041721851750063e-06, |
| "loss": 0.0322, |
| "step": 2203 |
| }, |
| { |
| "epoch": 1.002729754322111, |
| "grad_norm": 0.4198208723720039, |
| "learning_rate": 9.04088024586892e-06, |
| "loss": 0.0366, |
| "step": 2204 |
| }, |
| { |
| "epoch": 1.0031847133757963, |
| "grad_norm": 0.4784473138415427, |
| "learning_rate": 9.040038309785293e-06, |
| "loss": 0.0422, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.0036396724294814, |
| "grad_norm": 0.576332931747316, |
| "learning_rate": 9.039196043567979e-06, |
| "loss": 0.0387, |
| "step": 2206 |
| }, |
| { |
| "epoch": 1.0040946314831665, |
| "grad_norm": 0.5205582439898824, |
| "learning_rate": 9.038353447285807e-06, |
| "loss": 0.0551, |
| "step": 2207 |
| }, |
| { |
| "epoch": 1.0045495905368518, |
| "grad_norm": 0.7737994932982504, |
| "learning_rate": 9.037510521007626e-06, |
| "loss": 0.042, |
| "step": 2208 |
| }, |
| { |
| "epoch": 1.0050045495905369, |
| "grad_norm": 0.4056433108647087, |
| "learning_rate": 9.03666726480232e-06, |
| "loss": 0.0309, |
| "step": 2209 |
| }, |
| { |
| "epoch": 1.005459508644222, |
| "grad_norm": 0.31259616668647877, |
| "learning_rate": 9.035823678738795e-06, |
| "loss": 0.0247, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.0059144676979073, |
| "grad_norm": 0.545747512672262, |
| "learning_rate": 9.034979762885985e-06, |
| "loss": 0.0379, |
| "step": 2211 |
| }, |
| { |
| "epoch": 1.0063694267515924, |
| "grad_norm": 0.3531093457798414, |
| "learning_rate": 9.034135517312848e-06, |
| "loss": 0.0198, |
| "step": 2212 |
| }, |
| { |
| "epoch": 1.0068243858052774, |
| "grad_norm": 0.3471778421349368, |
| "learning_rate": 9.033290942088377e-06, |
| "loss": 0.0191, |
| "step": 2213 |
| }, |
| { |
| "epoch": 1.0072793448589628, |
| "grad_norm": 0.45123302926671505, |
| "learning_rate": 9.032446037281582e-06, |
| "loss": 0.0233, |
| "step": 2214 |
| }, |
| { |
| "epoch": 1.0077343039126478, |
| "grad_norm": 0.40498118740009004, |
| "learning_rate": 9.031600802961508e-06, |
| "loss": 0.028, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.008189262966333, |
| "grad_norm": 0.44404852807953515, |
| "learning_rate": 9.030755239197224e-06, |
| "loss": 0.0343, |
| "step": 2216 |
| }, |
| { |
| "epoch": 1.0086442220200182, |
| "grad_norm": 0.41886201143517243, |
| "learning_rate": 9.029909346057826e-06, |
| "loss": 0.0276, |
| "step": 2217 |
| }, |
| { |
| "epoch": 1.0090991810737033, |
| "grad_norm": 0.2879285343911946, |
| "learning_rate": 9.029063123612431e-06, |
| "loss": 0.02, |
| "step": 2218 |
| }, |
| { |
| "epoch": 1.0095541401273886, |
| "grad_norm": 0.5781677724909076, |
| "learning_rate": 9.028216571930197e-06, |
| "loss": 0.0339, |
| "step": 2219 |
| }, |
| { |
| "epoch": 1.0100090991810737, |
| "grad_norm": 0.42128445628125777, |
| "learning_rate": 9.027369691080292e-06, |
| "loss": 0.0329, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.0104640582347588, |
| "grad_norm": 0.4867304814601137, |
| "learning_rate": 9.026522481131925e-06, |
| "loss": 0.0451, |
| "step": 2221 |
| }, |
| { |
| "epoch": 1.0109190172884441, |
| "grad_norm": 0.35647532367363194, |
| "learning_rate": 9.025674942154325e-06, |
| "loss": 0.0202, |
| "step": 2222 |
| }, |
| { |
| "epoch": 1.0113739763421292, |
| "grad_norm": 0.6154778638320356, |
| "learning_rate": 9.024827074216748e-06, |
| "loss": 0.0619, |
| "step": 2223 |
| }, |
| { |
| "epoch": 1.0118289353958143, |
| "grad_norm": 0.46447780693049373, |
| "learning_rate": 9.023978877388479e-06, |
| "loss": 0.0265, |
| "step": 2224 |
| }, |
| { |
| "epoch": 1.0122838944494996, |
| "grad_norm": 0.4551756875246183, |
| "learning_rate": 9.02313035173883e-06, |
| "loss": 0.0167, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.0127388535031847, |
| "grad_norm": 0.4341660568896861, |
| "learning_rate": 9.022281497337133e-06, |
| "loss": 0.0257, |
| "step": 2226 |
| }, |
| { |
| "epoch": 1.0131938125568698, |
| "grad_norm": 0.37807969634776667, |
| "learning_rate": 9.021432314252758e-06, |
| "loss": 0.0235, |
| "step": 2227 |
| }, |
| { |
| "epoch": 1.013648771610555, |
| "grad_norm": 0.43791115876653813, |
| "learning_rate": 9.020582802555095e-06, |
| "loss": 0.0285, |
| "step": 2228 |
| }, |
| { |
| "epoch": 1.0141037306642402, |
| "grad_norm": 0.7541669794368306, |
| "learning_rate": 9.019732962313562e-06, |
| "loss": 0.0412, |
| "step": 2229 |
| }, |
| { |
| "epoch": 1.0145586897179253, |
| "grad_norm": 0.41591203424935613, |
| "learning_rate": 9.018882793597605e-06, |
| "loss": 0.0217, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.0150136487716106, |
| "grad_norm": 0.531675738557164, |
| "learning_rate": 9.018032296476695e-06, |
| "loss": 0.0259, |
| "step": 2231 |
| }, |
| { |
| "epoch": 1.0154686078252957, |
| "grad_norm": 0.4525534861298487, |
| "learning_rate": 9.017181471020331e-06, |
| "loss": 0.032, |
| "step": 2232 |
| }, |
| { |
| "epoch": 1.015923566878981, |
| "grad_norm": 0.5572932855598556, |
| "learning_rate": 9.016330317298038e-06, |
| "loss": 0.0321, |
| "step": 2233 |
| }, |
| { |
| "epoch": 1.016378525932666, |
| "grad_norm": 0.4880772464783955, |
| "learning_rate": 9.01547883537937e-06, |
| "loss": 0.0242, |
| "step": 2234 |
| }, |
| { |
| "epoch": 1.0168334849863512, |
| "grad_norm": 0.5290436879010799, |
| "learning_rate": 9.014627025333906e-06, |
| "loss": 0.0268, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.0172884440400365, |
| "grad_norm": 0.3469524553449946, |
| "learning_rate": 9.01377488723125e-06, |
| "loss": 0.0189, |
| "step": 2236 |
| }, |
| { |
| "epoch": 1.0177434030937216, |
| "grad_norm": 0.5381328202645719, |
| "learning_rate": 9.012922421141036e-06, |
| "loss": 0.0282, |
| "step": 2237 |
| }, |
| { |
| "epoch": 1.0181983621474067, |
| "grad_norm": 0.5437416204093511, |
| "learning_rate": 9.012069627132925e-06, |
| "loss": 0.0365, |
| "step": 2238 |
| }, |
| { |
| "epoch": 1.018653321201092, |
| "grad_norm": 0.5151432843211493, |
| "learning_rate": 9.011216505276601e-06, |
| "loss": 0.0327, |
| "step": 2239 |
| }, |
| { |
| "epoch": 1.019108280254777, |
| "grad_norm": 0.7194165832171175, |
| "learning_rate": 9.01036305564178e-06, |
| "loss": 0.0447, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0195632393084622, |
| "grad_norm": 0.4895196525190099, |
| "learning_rate": 9.009509278298201e-06, |
| "loss": 0.0226, |
| "step": 2241 |
| }, |
| { |
| "epoch": 1.0200181983621475, |
| "grad_norm": 0.36403402277658775, |
| "learning_rate": 9.008655173315629e-06, |
| "loss": 0.0172, |
| "step": 2242 |
| }, |
| { |
| "epoch": 1.0204731574158326, |
| "grad_norm": 0.5192307375895406, |
| "learning_rate": 9.00780074076386e-06, |
| "loss": 0.0281, |
| "step": 2243 |
| }, |
| { |
| "epoch": 1.0209281164695176, |
| "grad_norm": 0.5855074570295021, |
| "learning_rate": 9.006945980712713e-06, |
| "loss": 0.039, |
| "step": 2244 |
| }, |
| { |
| "epoch": 1.021383075523203, |
| "grad_norm": 0.3530576777441414, |
| "learning_rate": 9.006090893232036e-06, |
| "loss": 0.0165, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.021838034576888, |
| "grad_norm": 0.46560015374930225, |
| "learning_rate": 9.005235478391704e-06, |
| "loss": 0.031, |
| "step": 2246 |
| }, |
| { |
| "epoch": 1.0222929936305734, |
| "grad_norm": 0.4320906337363968, |
| "learning_rate": 9.004379736261614e-06, |
| "loss": 0.0229, |
| "step": 2247 |
| }, |
| { |
| "epoch": 1.0227479526842584, |
| "grad_norm": 0.5843690219708401, |
| "learning_rate": 9.003523666911698e-06, |
| "loss": 0.0398, |
| "step": 2248 |
| }, |
| { |
| "epoch": 1.0232029117379435, |
| "grad_norm": 0.4876049343109499, |
| "learning_rate": 9.002667270411905e-06, |
| "loss": 0.0209, |
| "step": 2249 |
| }, |
| { |
| "epoch": 1.0236578707916288, |
| "grad_norm": 0.4996309287294051, |
| "learning_rate": 9.001810546832219e-06, |
| "loss": 0.0339, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.024112829845314, |
| "grad_norm": 0.44615485337683974, |
| "learning_rate": 9.000953496242648e-06, |
| "loss": 0.0367, |
| "step": 2251 |
| }, |
| { |
| "epoch": 1.024567788898999, |
| "grad_norm": 0.4816248261028461, |
| "learning_rate": 9.000096118713226e-06, |
| "loss": 0.0302, |
| "step": 2252 |
| }, |
| { |
| "epoch": 1.0250227479526843, |
| "grad_norm": 0.3202895454501902, |
| "learning_rate": 8.999238414314014e-06, |
| "loss": 0.018, |
| "step": 2253 |
| }, |
| { |
| "epoch": 1.0254777070063694, |
| "grad_norm": 0.39394390771447657, |
| "learning_rate": 8.998380383115098e-06, |
| "loss": 0.0203, |
| "step": 2254 |
| }, |
| { |
| "epoch": 1.0259326660600545, |
| "grad_norm": 0.6774965098079401, |
| "learning_rate": 8.997522025186592e-06, |
| "loss": 0.0444, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.0263876251137398, |
| "grad_norm": 0.6156285698131154, |
| "learning_rate": 8.996663340598642e-06, |
| "loss": 0.033, |
| "step": 2256 |
| }, |
| { |
| "epoch": 1.026842584167425, |
| "grad_norm": 0.6636465470342775, |
| "learning_rate": 8.995804329421408e-06, |
| "loss": 0.0282, |
| "step": 2257 |
| }, |
| { |
| "epoch": 1.02729754322111, |
| "grad_norm": 0.7643329557559453, |
| "learning_rate": 8.994944991725094e-06, |
| "loss": 0.0413, |
| "step": 2258 |
| }, |
| { |
| "epoch": 1.0277525022747953, |
| "grad_norm": 0.4484887858566329, |
| "learning_rate": 8.994085327579914e-06, |
| "loss": 0.0244, |
| "step": 2259 |
| }, |
| { |
| "epoch": 1.0282074613284804, |
| "grad_norm": 0.6046158805682427, |
| "learning_rate": 8.993225337056118e-06, |
| "loss": 0.0372, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.0286624203821657, |
| "grad_norm": 0.5297868937946675, |
| "learning_rate": 8.992365020223982e-06, |
| "loss": 0.0407, |
| "step": 2261 |
| }, |
| { |
| "epoch": 1.0291173794358508, |
| "grad_norm": 0.4805793953554321, |
| "learning_rate": 8.991504377153805e-06, |
| "loss": 0.0297, |
| "step": 2262 |
| }, |
| { |
| "epoch": 1.0295723384895359, |
| "grad_norm": 0.6196673347815759, |
| "learning_rate": 8.990643407915915e-06, |
| "loss": 0.0397, |
| "step": 2263 |
| }, |
| { |
| "epoch": 1.0300272975432212, |
| "grad_norm": 0.6223272220447811, |
| "learning_rate": 8.98978211258067e-06, |
| "loss": 0.0409, |
| "step": 2264 |
| }, |
| { |
| "epoch": 1.0304822565969063, |
| "grad_norm": 0.49952273986223505, |
| "learning_rate": 8.988920491218446e-06, |
| "loss": 0.0272, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.0309372156505914, |
| "grad_norm": 0.6292771186739616, |
| "learning_rate": 8.988058543899654e-06, |
| "loss": 0.0384, |
| "step": 2266 |
| }, |
| { |
| "epoch": 1.0313921747042767, |
| "grad_norm": 0.38772458827936923, |
| "learning_rate": 8.987196270694727e-06, |
| "loss": 0.024, |
| "step": 2267 |
| }, |
| { |
| "epoch": 1.0318471337579618, |
| "grad_norm": 0.8799833129039605, |
| "learning_rate": 8.986333671674128e-06, |
| "loss": 0.0341, |
| "step": 2268 |
| }, |
| { |
| "epoch": 1.0323020928116469, |
| "grad_norm": 0.6271731268799836, |
| "learning_rate": 8.985470746908342e-06, |
| "loss": 0.033, |
| "step": 2269 |
| }, |
| { |
| "epoch": 1.0327570518653322, |
| "grad_norm": 0.38786047905872434, |
| "learning_rate": 8.984607496467885e-06, |
| "loss": 0.021, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.0332120109190173, |
| "grad_norm": 0.6280644096851069, |
| "learning_rate": 8.9837439204233e-06, |
| "loss": 0.0491, |
| "step": 2271 |
| }, |
| { |
| "epoch": 1.0336669699727024, |
| "grad_norm": 0.5847841334225715, |
| "learning_rate": 8.98288001884515e-06, |
| "loss": 0.0337, |
| "step": 2272 |
| }, |
| { |
| "epoch": 1.0341219290263877, |
| "grad_norm": 0.36088677101245703, |
| "learning_rate": 8.982015791804032e-06, |
| "loss": 0.0156, |
| "step": 2273 |
| }, |
| { |
| "epoch": 1.0345768880800728, |
| "grad_norm": 0.4537884974426005, |
| "learning_rate": 8.981151239370566e-06, |
| "loss": 0.027, |
| "step": 2274 |
| }, |
| { |
| "epoch": 1.035031847133758, |
| "grad_norm": 0.6090066061447076, |
| "learning_rate": 8.9802863616154e-06, |
| "loss": 0.0378, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.0354868061874432, |
| "grad_norm": 0.7101749544755233, |
| "learning_rate": 8.979421158609206e-06, |
| "loss": 0.0439, |
| "step": 2276 |
| }, |
| { |
| "epoch": 1.0359417652411282, |
| "grad_norm": 0.5742339125588956, |
| "learning_rate": 8.978555630422686e-06, |
| "loss": 0.0328, |
| "step": 2277 |
| }, |
| { |
| "epoch": 1.0363967242948136, |
| "grad_norm": 0.632873074474985, |
| "learning_rate": 8.977689777126568e-06, |
| "loss": 0.0472, |
| "step": 2278 |
| }, |
| { |
| "epoch": 1.0368516833484986, |
| "grad_norm": 0.8069979527700195, |
| "learning_rate": 8.976823598791604e-06, |
| "loss": 0.0319, |
| "step": 2279 |
| }, |
| { |
| "epoch": 1.0373066424021837, |
| "grad_norm": 0.4015240288673539, |
| "learning_rate": 8.975957095488575e-06, |
| "loss": 0.0269, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.037761601455869, |
| "grad_norm": 0.5786381841993868, |
| "learning_rate": 8.975090267288286e-06, |
| "loss": 0.0296, |
| "step": 2281 |
| }, |
| { |
| "epoch": 1.0382165605095541, |
| "grad_norm": 0.5451914455456522, |
| "learning_rate": 8.974223114261574e-06, |
| "loss": 0.0343, |
| "step": 2282 |
| }, |
| { |
| "epoch": 1.0386715195632392, |
| "grad_norm": 0.6945170105788371, |
| "learning_rate": 8.973355636479294e-06, |
| "loss": 0.0476, |
| "step": 2283 |
| }, |
| { |
| "epoch": 1.0391264786169245, |
| "grad_norm": 0.5171663408691534, |
| "learning_rate": 8.972487834012338e-06, |
| "loss": 0.0301, |
| "step": 2284 |
| }, |
| { |
| "epoch": 1.0395814376706096, |
| "grad_norm": 0.494166229450044, |
| "learning_rate": 8.971619706931613e-06, |
| "loss": 0.0226, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.0400363967242947, |
| "grad_norm": 0.7676778552323048, |
| "learning_rate": 8.970751255308063e-06, |
| "loss": 0.045, |
| "step": 2286 |
| }, |
| { |
| "epoch": 1.04049135577798, |
| "grad_norm": 0.44323443611073776, |
| "learning_rate": 8.969882479212652e-06, |
| "loss": 0.0196, |
| "step": 2287 |
| }, |
| { |
| "epoch": 1.040946314831665, |
| "grad_norm": 0.41146000373164554, |
| "learning_rate": 8.969013378716371e-06, |
| "loss": 0.0196, |
| "step": 2288 |
| }, |
| { |
| "epoch": 1.0414012738853504, |
| "grad_norm": 0.3888711487160539, |
| "learning_rate": 8.968143953890242e-06, |
| "loss": 0.0228, |
| "step": 2289 |
| }, |
| { |
| "epoch": 1.0418562329390355, |
| "grad_norm": 0.49379959221935377, |
| "learning_rate": 8.96727420480531e-06, |
| "loss": 0.0306, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.0423111919927206, |
| "grad_norm": 0.48325360654642197, |
| "learning_rate": 8.966404131532645e-06, |
| "loss": 0.0265, |
| "step": 2291 |
| }, |
| { |
| "epoch": 1.042766151046406, |
| "grad_norm": 0.47493208719115093, |
| "learning_rate": 8.965533734143347e-06, |
| "loss": 0.0239, |
| "step": 2292 |
| }, |
| { |
| "epoch": 1.043221110100091, |
| "grad_norm": 0.556271091368108, |
| "learning_rate": 8.964663012708538e-06, |
| "loss": 0.0365, |
| "step": 2293 |
| }, |
| { |
| "epoch": 1.043676069153776, |
| "grad_norm": 0.8512257992210553, |
| "learning_rate": 8.963791967299375e-06, |
| "loss": 0.0332, |
| "step": 2294 |
| }, |
| { |
| "epoch": 1.0441310282074614, |
| "grad_norm": 0.4600946915818348, |
| "learning_rate": 8.96292059798703e-06, |
| "loss": 0.0254, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.0445859872611465, |
| "grad_norm": 0.5926927797370501, |
| "learning_rate": 8.962048904842713e-06, |
| "loss": 0.034, |
| "step": 2296 |
| }, |
| { |
| "epoch": 1.0450409463148316, |
| "grad_norm": 0.5174352508068348, |
| "learning_rate": 8.96117688793765e-06, |
| "loss": 0.0334, |
| "step": 2297 |
| }, |
| { |
| "epoch": 1.0454959053685169, |
| "grad_norm": 0.4726564762945724, |
| "learning_rate": 8.960304547343101e-06, |
| "loss": 0.0271, |
| "step": 2298 |
| }, |
| { |
| "epoch": 1.045950864422202, |
| "grad_norm": 0.49021838747059965, |
| "learning_rate": 8.959431883130348e-06, |
| "loss": 0.0272, |
| "step": 2299 |
| }, |
| { |
| "epoch": 1.046405823475887, |
| "grad_norm": 0.33392762330146264, |
| "learning_rate": 8.958558895370703e-06, |
| "loss": 0.0184, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0468607825295724, |
| "grad_norm": 0.43970090494512293, |
| "learning_rate": 8.9576855841355e-06, |
| "loss": 0.0247, |
| "step": 2301 |
| }, |
| { |
| "epoch": 1.0473157415832575, |
| "grad_norm": 0.34961568768416074, |
| "learning_rate": 8.956811949496108e-06, |
| "loss": 0.0207, |
| "step": 2302 |
| }, |
| { |
| "epoch": 1.0477707006369428, |
| "grad_norm": 0.5047819086466443, |
| "learning_rate": 8.955937991523908e-06, |
| "loss": 0.0358, |
| "step": 2303 |
| }, |
| { |
| "epoch": 1.0482256596906279, |
| "grad_norm": 0.5502957295717672, |
| "learning_rate": 8.955063710290322e-06, |
| "loss": 0.0396, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.048680618744313, |
| "grad_norm": 0.4007555279082937, |
| "learning_rate": 8.95418910586679e-06, |
| "loss": 0.0205, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.0491355777979983, |
| "grad_norm": 0.37932885662916804, |
| "learning_rate": 8.953314178324782e-06, |
| "loss": 0.0261, |
| "step": 2306 |
| }, |
| { |
| "epoch": 1.0495905368516834, |
| "grad_norm": 0.6331059696275105, |
| "learning_rate": 8.952438927735793e-06, |
| "loss": 0.0397, |
| "step": 2307 |
| }, |
| { |
| "epoch": 1.0500454959053684, |
| "grad_norm": 0.5533999405103901, |
| "learning_rate": 8.951563354171343e-06, |
| "loss": 0.0216, |
| "step": 2308 |
| }, |
| { |
| "epoch": 1.0505004549590538, |
| "grad_norm": 0.5064049753801714, |
| "learning_rate": 8.950687457702981e-06, |
| "loss": 0.0253, |
| "step": 2309 |
| }, |
| { |
| "epoch": 1.0509554140127388, |
| "grad_norm": 0.7762514931128638, |
| "learning_rate": 8.94981123840228e-06, |
| "loss": 0.0257, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.051410373066424, |
| "grad_norm": 0.5258772784610919, |
| "learning_rate": 8.948934696340842e-06, |
| "loss": 0.0402, |
| "step": 2311 |
| }, |
| { |
| "epoch": 1.0518653321201092, |
| "grad_norm": 0.5179164003875761, |
| "learning_rate": 8.948057831590296e-06, |
| "loss": 0.0392, |
| "step": 2312 |
| }, |
| { |
| "epoch": 1.0523202911737943, |
| "grad_norm": 0.4873683404674824, |
| "learning_rate": 8.94718064422229e-06, |
| "loss": 0.0225, |
| "step": 2313 |
| }, |
| { |
| "epoch": 1.0527752502274794, |
| "grad_norm": 0.42294954664238593, |
| "learning_rate": 8.94630313430851e-06, |
| "loss": 0.0239, |
| "step": 2314 |
| }, |
| { |
| "epoch": 1.0532302092811647, |
| "grad_norm": 0.5120965619588207, |
| "learning_rate": 8.945425301920656e-06, |
| "loss": 0.0239, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.0536851683348498, |
| "grad_norm": 0.5274581767565953, |
| "learning_rate": 8.944547147130467e-06, |
| "loss": 0.0395, |
| "step": 2316 |
| }, |
| { |
| "epoch": 1.0541401273885351, |
| "grad_norm": 0.6240914390797723, |
| "learning_rate": 8.943668670009698e-06, |
| "loss": 0.04, |
| "step": 2317 |
| }, |
| { |
| "epoch": 1.0545950864422202, |
| "grad_norm": 0.588480807715609, |
| "learning_rate": 8.942789870630133e-06, |
| "loss": 0.0379, |
| "step": 2318 |
| }, |
| { |
| "epoch": 1.0550500454959053, |
| "grad_norm": 0.5328051168509789, |
| "learning_rate": 8.941910749063587e-06, |
| "loss": 0.0256, |
| "step": 2319 |
| }, |
| { |
| "epoch": 1.0555050045495906, |
| "grad_norm": 0.5662136884367794, |
| "learning_rate": 8.941031305381894e-06, |
| "loss": 0.0349, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.0559599636032757, |
| "grad_norm": 0.4080289916939306, |
| "learning_rate": 8.940151539656922e-06, |
| "loss": 0.0203, |
| "step": 2321 |
| }, |
| { |
| "epoch": 1.0564149226569608, |
| "grad_norm": 0.6644738842779135, |
| "learning_rate": 8.93927145196056e-06, |
| "loss": 0.0295, |
| "step": 2322 |
| }, |
| { |
| "epoch": 1.056869881710646, |
| "grad_norm": 0.43989425636246393, |
| "learning_rate": 8.938391042364723e-06, |
| "loss": 0.0257, |
| "step": 2323 |
| }, |
| { |
| "epoch": 1.0573248407643312, |
| "grad_norm": 0.5431541428763835, |
| "learning_rate": 8.937510310941358e-06, |
| "loss": 0.03, |
| "step": 2324 |
| }, |
| { |
| "epoch": 1.0577797998180163, |
| "grad_norm": 0.5122724279785533, |
| "learning_rate": 8.936629257762429e-06, |
| "loss": 0.0273, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.0582347588717016, |
| "grad_norm": 0.41195858239961775, |
| "learning_rate": 8.935747882899937e-06, |
| "loss": 0.0216, |
| "step": 2326 |
| }, |
| { |
| "epoch": 1.0586897179253867, |
| "grad_norm": 0.5171757707727286, |
| "learning_rate": 8.9348661864259e-06, |
| "loss": 0.0299, |
| "step": 2327 |
| }, |
| { |
| "epoch": 1.0591446769790718, |
| "grad_norm": 0.6216382380161013, |
| "learning_rate": 8.93398416841237e-06, |
| "loss": 0.0525, |
| "step": 2328 |
| }, |
| { |
| "epoch": 1.059599636032757, |
| "grad_norm": 0.47615445722593264, |
| "learning_rate": 8.933101828931418e-06, |
| "loss": 0.0229, |
| "step": 2329 |
| }, |
| { |
| "epoch": 1.0600545950864422, |
| "grad_norm": 0.5543921495737715, |
| "learning_rate": 8.932219168055146e-06, |
| "loss": 0.0353, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.0605095541401275, |
| "grad_norm": 0.4807073495602966, |
| "learning_rate": 8.931336185855682e-06, |
| "loss": 0.029, |
| "step": 2331 |
| }, |
| { |
| "epoch": 1.0609645131938126, |
| "grad_norm": 0.7132043951444881, |
| "learning_rate": 8.930452882405178e-06, |
| "loss": 0.0573, |
| "step": 2332 |
| }, |
| { |
| "epoch": 1.0614194722474977, |
| "grad_norm": 0.7323908092635573, |
| "learning_rate": 8.929569257775816e-06, |
| "loss": 0.031, |
| "step": 2333 |
| }, |
| { |
| "epoch": 1.061874431301183, |
| "grad_norm": 0.7282498524373471, |
| "learning_rate": 8.9286853120398e-06, |
| "loss": 0.0212, |
| "step": 2334 |
| }, |
| { |
| "epoch": 1.062329390354868, |
| "grad_norm": 0.5041730540211715, |
| "learning_rate": 8.92780104526936e-06, |
| "loss": 0.0219, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.0627843494085532, |
| "grad_norm": 0.5694707546108049, |
| "learning_rate": 8.926916457536755e-06, |
| "loss": 0.0277, |
| "step": 2336 |
| }, |
| { |
| "epoch": 1.0632393084622385, |
| "grad_norm": 0.4942987205465501, |
| "learning_rate": 8.926031548914274e-06, |
| "loss": 0.0283, |
| "step": 2337 |
| }, |
| { |
| "epoch": 1.0636942675159236, |
| "grad_norm": 0.7094719472889628, |
| "learning_rate": 8.925146319474225e-06, |
| "loss": 0.0484, |
| "step": 2338 |
| }, |
| { |
| "epoch": 1.0641492265696086, |
| "grad_norm": 0.5401572696577567, |
| "learning_rate": 8.924260769288944e-06, |
| "loss": 0.032, |
| "step": 2339 |
| }, |
| { |
| "epoch": 1.064604185623294, |
| "grad_norm": 0.6271229371930636, |
| "learning_rate": 8.923374898430794e-06, |
| "loss": 0.0417, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.065059144676979, |
| "grad_norm": 0.5384710947557135, |
| "learning_rate": 8.922488706972165e-06, |
| "loss": 0.028, |
| "step": 2341 |
| }, |
| { |
| "epoch": 1.0655141037306644, |
| "grad_norm": 0.5738095562796759, |
| "learning_rate": 8.921602194985473e-06, |
| "loss": 0.0251, |
| "step": 2342 |
| }, |
| { |
| "epoch": 1.0659690627843494, |
| "grad_norm": 0.4114388383836, |
| "learning_rate": 8.920715362543158e-06, |
| "loss": 0.0257, |
| "step": 2343 |
| }, |
| { |
| "epoch": 1.0664240218380345, |
| "grad_norm": 0.4407026853756295, |
| "learning_rate": 8.919828209717691e-06, |
| "loss": 0.0318, |
| "step": 2344 |
| }, |
| { |
| "epoch": 1.0668789808917198, |
| "grad_norm": 0.5795706484311789, |
| "learning_rate": 8.918940736581565e-06, |
| "loss": 0.0384, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.067333939945405, |
| "grad_norm": 0.4997138165488597, |
| "learning_rate": 8.918052943207298e-06, |
| "loss": 0.0339, |
| "step": 2346 |
| }, |
| { |
| "epoch": 1.06778889899909, |
| "grad_norm": 0.6466785074736559, |
| "learning_rate": 8.91716482966744e-06, |
| "loss": 0.0412, |
| "step": 2347 |
| }, |
| { |
| "epoch": 1.0682438580527753, |
| "grad_norm": 0.6101860514996267, |
| "learning_rate": 8.916276396034561e-06, |
| "loss": 0.0349, |
| "step": 2348 |
| }, |
| { |
| "epoch": 1.0686988171064604, |
| "grad_norm": 0.6648890763063255, |
| "learning_rate": 8.915387642381261e-06, |
| "loss": 0.0374, |
| "step": 2349 |
| }, |
| { |
| "epoch": 1.0691537761601455, |
| "grad_norm": 0.6435783427790035, |
| "learning_rate": 8.914498568780163e-06, |
| "loss": 0.0425, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.0696087352138308, |
| "grad_norm": 0.4168529921191238, |
| "learning_rate": 8.913609175303923e-06, |
| "loss": 0.0222, |
| "step": 2351 |
| }, |
| { |
| "epoch": 1.070063694267516, |
| "grad_norm": 0.370333742802149, |
| "learning_rate": 8.912719462025213e-06, |
| "loss": 0.018, |
| "step": 2352 |
| }, |
| { |
| "epoch": 1.070518653321201, |
| "grad_norm": 0.3929772094003772, |
| "learning_rate": 8.911829429016737e-06, |
| "loss": 0.0184, |
| "step": 2353 |
| }, |
| { |
| "epoch": 1.0709736123748863, |
| "grad_norm": 0.36777976145335695, |
| "learning_rate": 8.910939076351228e-06, |
| "loss": 0.0199, |
| "step": 2354 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.5445905742319043, |
| "learning_rate": 8.910048404101437e-06, |
| "loss": 0.0297, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.0718835304822565, |
| "grad_norm": 0.517651494476337, |
| "learning_rate": 8.90915741234015e-06, |
| "loss": 0.0244, |
| "step": 2356 |
| }, |
| { |
| "epoch": 1.0723384895359418, |
| "grad_norm": 0.6079868190664829, |
| "learning_rate": 8.908266101140173e-06, |
| "loss": 0.0327, |
| "step": 2357 |
| }, |
| { |
| "epoch": 1.0727934485896269, |
| "grad_norm": 0.5005614750938115, |
| "learning_rate": 8.907374470574339e-06, |
| "loss": 0.0288, |
| "step": 2358 |
| }, |
| { |
| "epoch": 1.0732484076433122, |
| "grad_norm": 0.41084278869296126, |
| "learning_rate": 8.906482520715508e-06, |
| "loss": 0.0196, |
| "step": 2359 |
| }, |
| { |
| "epoch": 1.0737033666969973, |
| "grad_norm": 0.42883961230062595, |
| "learning_rate": 8.905590251636566e-06, |
| "loss": 0.0201, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.0741583257506824, |
| "grad_norm": 0.7507509176249603, |
| "learning_rate": 8.904697663410429e-06, |
| "loss": 0.0519, |
| "step": 2361 |
| }, |
| { |
| "epoch": 1.0746132848043677, |
| "grad_norm": 0.35684834441788627, |
| "learning_rate": 8.90380475611003e-06, |
| "loss": 0.0193, |
| "step": 2362 |
| }, |
| { |
| "epoch": 1.0750682438580528, |
| "grad_norm": 0.359991301638448, |
| "learning_rate": 8.902911529808338e-06, |
| "loss": 0.02, |
| "step": 2363 |
| }, |
| { |
| "epoch": 1.0755232029117379, |
| "grad_norm": 0.6485293447004715, |
| "learning_rate": 8.90201798457834e-06, |
| "loss": 0.05, |
| "step": 2364 |
| }, |
| { |
| "epoch": 1.0759781619654232, |
| "grad_norm": 0.35596882973823685, |
| "learning_rate": 8.901124120493055e-06, |
| "loss": 0.0201, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.0764331210191083, |
| "grad_norm": 0.5195485453283638, |
| "learning_rate": 8.900229937625522e-06, |
| "loss": 0.0267, |
| "step": 2366 |
| }, |
| { |
| "epoch": 1.0768880800727934, |
| "grad_norm": 0.5121436407601963, |
| "learning_rate": 8.899335436048813e-06, |
| "loss": 0.0293, |
| "step": 2367 |
| }, |
| { |
| "epoch": 1.0773430391264787, |
| "grad_norm": 0.574083355691705, |
| "learning_rate": 8.898440615836021e-06, |
| "loss": 0.0314, |
| "step": 2368 |
| }, |
| { |
| "epoch": 1.0777979981801638, |
| "grad_norm": 0.36323016195490376, |
| "learning_rate": 8.897545477060268e-06, |
| "loss": 0.0164, |
| "step": 2369 |
| }, |
| { |
| "epoch": 1.078252957233849, |
| "grad_norm": 0.44874033315946665, |
| "learning_rate": 8.8966500197947e-06, |
| "loss": 0.0255, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.0787079162875342, |
| "grad_norm": 0.4549169634711705, |
| "learning_rate": 8.895754244112486e-06, |
| "loss": 0.0252, |
| "step": 2371 |
| }, |
| { |
| "epoch": 1.0791628753412192, |
| "grad_norm": 0.5188300138751303, |
| "learning_rate": 8.894858150086832e-06, |
| "loss": 0.022, |
| "step": 2372 |
| }, |
| { |
| "epoch": 1.0796178343949046, |
| "grad_norm": 0.5077854205250166, |
| "learning_rate": 8.893961737790957e-06, |
| "loss": 0.027, |
| "step": 2373 |
| }, |
| { |
| "epoch": 1.0800727934485896, |
| "grad_norm": 0.5080695970336, |
| "learning_rate": 8.893065007298116e-06, |
| "loss": 0.0293, |
| "step": 2374 |
| }, |
| { |
| "epoch": 1.0805277525022747, |
| "grad_norm": 0.49124016807194615, |
| "learning_rate": 8.89216795868158e-06, |
| "loss": 0.0253, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.08098271155596, |
| "grad_norm": 0.746420330430573, |
| "learning_rate": 8.891270592014658e-06, |
| "loss": 0.0393, |
| "step": 2376 |
| }, |
| { |
| "epoch": 1.0814376706096451, |
| "grad_norm": 0.5899621371906842, |
| "learning_rate": 8.890372907370677e-06, |
| "loss": 0.0325, |
| "step": 2377 |
| }, |
| { |
| "epoch": 1.0818926296633302, |
| "grad_norm": 0.538668781912988, |
| "learning_rate": 8.889474904822987e-06, |
| "loss": 0.0254, |
| "step": 2378 |
| }, |
| { |
| "epoch": 1.0823475887170155, |
| "grad_norm": 0.48796027217616167, |
| "learning_rate": 8.888576584444976e-06, |
| "loss": 0.0284, |
| "step": 2379 |
| }, |
| { |
| "epoch": 1.0828025477707006, |
| "grad_norm": 0.4607384499708701, |
| "learning_rate": 8.887677946310045e-06, |
| "loss": 0.0293, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.0832575068243857, |
| "grad_norm": 0.6691227522534325, |
| "learning_rate": 8.886778990491632e-06, |
| "loss": 0.0479, |
| "step": 2381 |
| }, |
| { |
| "epoch": 1.083712465878071, |
| "grad_norm": 0.4131339751828579, |
| "learning_rate": 8.885879717063189e-06, |
| "loss": 0.0232, |
| "step": 2382 |
| }, |
| { |
| "epoch": 1.084167424931756, |
| "grad_norm": 0.49834287436563, |
| "learning_rate": 8.884980126098206e-06, |
| "loss": 0.0261, |
| "step": 2383 |
| }, |
| { |
| "epoch": 1.0846223839854412, |
| "grad_norm": 0.49133678192638947, |
| "learning_rate": 8.88408021767019e-06, |
| "loss": 0.0217, |
| "step": 2384 |
| }, |
| { |
| "epoch": 1.0850773430391265, |
| "grad_norm": 0.4897177991752284, |
| "learning_rate": 8.88317999185268e-06, |
| "loss": 0.0304, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.0855323020928116, |
| "grad_norm": 0.5332982190122252, |
| "learning_rate": 8.882279448719235e-06, |
| "loss": 0.024, |
| "step": 2386 |
| }, |
| { |
| "epoch": 1.085987261146497, |
| "grad_norm": 0.39337001966991797, |
| "learning_rate": 8.881378588343448e-06, |
| "loss": 0.0195, |
| "step": 2387 |
| }, |
| { |
| "epoch": 1.086442220200182, |
| "grad_norm": 0.5648723431118464, |
| "learning_rate": 8.88047741079893e-06, |
| "loss": 0.0277, |
| "step": 2388 |
| }, |
| { |
| "epoch": 1.086897179253867, |
| "grad_norm": 0.38358401084782046, |
| "learning_rate": 8.879575916159323e-06, |
| "loss": 0.0234, |
| "step": 2389 |
| }, |
| { |
| "epoch": 1.0873521383075524, |
| "grad_norm": 0.4916039064871815, |
| "learning_rate": 8.878674104498293e-06, |
| "loss": 0.0196, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.0878070973612375, |
| "grad_norm": 0.4574406020630443, |
| "learning_rate": 8.877771975889529e-06, |
| "loss": 0.0266, |
| "step": 2391 |
| }, |
| { |
| "epoch": 1.0882620564149226, |
| "grad_norm": 1.2527103886930033, |
| "learning_rate": 8.876869530406753e-06, |
| "loss": 0.085, |
| "step": 2392 |
| }, |
| { |
| "epoch": 1.0887170154686079, |
| "grad_norm": 0.6740099441800771, |
| "learning_rate": 8.875966768123705e-06, |
| "loss": 0.0491, |
| "step": 2393 |
| }, |
| { |
| "epoch": 1.089171974522293, |
| "grad_norm": 0.8127319301316774, |
| "learning_rate": 8.875063689114157e-06, |
| "loss": 0.0351, |
| "step": 2394 |
| }, |
| { |
| "epoch": 1.089626933575978, |
| "grad_norm": 0.6883882884250196, |
| "learning_rate": 8.874160293451903e-06, |
| "loss": 0.0351, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.0900818926296634, |
| "grad_norm": 0.472050537765526, |
| "learning_rate": 8.873256581210767e-06, |
| "loss": 0.0281, |
| "step": 2396 |
| }, |
| { |
| "epoch": 1.0905368516833485, |
| "grad_norm": 0.43429585005126187, |
| "learning_rate": 8.872352552464594e-06, |
| "loss": 0.0217, |
| "step": 2397 |
| }, |
| { |
| "epoch": 1.0909918107370338, |
| "grad_norm": 0.7559591015285818, |
| "learning_rate": 8.871448207287259e-06, |
| "loss": 0.0234, |
| "step": 2398 |
| }, |
| { |
| "epoch": 1.0914467697907189, |
| "grad_norm": 1.295843093263791, |
| "learning_rate": 8.870543545752657e-06, |
| "loss": 0.0378, |
| "step": 2399 |
| }, |
| { |
| "epoch": 1.091901728844404, |
| "grad_norm": 0.687703240327456, |
| "learning_rate": 8.869638567934718e-06, |
| "loss": 0.0428, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0923566878980893, |
| "grad_norm": 0.5316380088515792, |
| "learning_rate": 8.86873327390739e-06, |
| "loss": 0.0207, |
| "step": 2401 |
| }, |
| { |
| "epoch": 1.0928116469517744, |
| "grad_norm": 0.37080940024955544, |
| "learning_rate": 8.867827663744649e-06, |
| "loss": 0.014, |
| "step": 2402 |
| }, |
| { |
| "epoch": 1.0932666060054594, |
| "grad_norm": 0.551372034105751, |
| "learning_rate": 8.8669217375205e-06, |
| "loss": 0.0407, |
| "step": 2403 |
| }, |
| { |
| "epoch": 1.0937215650591448, |
| "grad_norm": 0.550827427093742, |
| "learning_rate": 8.866015495308967e-06, |
| "loss": 0.0295, |
| "step": 2404 |
| }, |
| { |
| "epoch": 1.0941765241128298, |
| "grad_norm": 0.5312346261037174, |
| "learning_rate": 8.865108937184108e-06, |
| "loss": 0.0329, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.094631483166515, |
| "grad_norm": 0.606116027973049, |
| "learning_rate": 8.864202063220003e-06, |
| "loss": 0.036, |
| "step": 2406 |
| }, |
| { |
| "epoch": 1.0950864422202002, |
| "grad_norm": 0.5039409256044083, |
| "learning_rate": 8.863294873490752e-06, |
| "loss": 0.0237, |
| "step": 2407 |
| }, |
| { |
| "epoch": 1.0955414012738853, |
| "grad_norm": 0.7088932326845141, |
| "learning_rate": 8.862387368070493e-06, |
| "loss": 0.0502, |
| "step": 2408 |
| }, |
| { |
| "epoch": 1.0959963603275704, |
| "grad_norm": 0.42457685669799344, |
| "learning_rate": 8.86147954703338e-06, |
| "loss": 0.0232, |
| "step": 2409 |
| }, |
| { |
| "epoch": 1.0964513193812557, |
| "grad_norm": 0.400049727629285, |
| "learning_rate": 8.860571410453598e-06, |
| "loss": 0.0137, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.0969062784349408, |
| "grad_norm": 0.5528326412344238, |
| "learning_rate": 8.859662958405352e-06, |
| "loss": 0.0259, |
| "step": 2411 |
| }, |
| { |
| "epoch": 1.097361237488626, |
| "grad_norm": 0.3740020218354164, |
| "learning_rate": 8.858754190962881e-06, |
| "loss": 0.0207, |
| "step": 2412 |
| }, |
| { |
| "epoch": 1.0978161965423112, |
| "grad_norm": 0.43380267454252947, |
| "learning_rate": 8.857845108200443e-06, |
| "loss": 0.03, |
| "step": 2413 |
| }, |
| { |
| "epoch": 1.0982711555959963, |
| "grad_norm": 0.41117776188244837, |
| "learning_rate": 8.856935710192326e-06, |
| "loss": 0.0217, |
| "step": 2414 |
| }, |
| { |
| "epoch": 1.0987261146496816, |
| "grad_norm": 0.7295481072089418, |
| "learning_rate": 8.856025997012837e-06, |
| "loss": 0.0355, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.0991810737033667, |
| "grad_norm": 0.6100308273835641, |
| "learning_rate": 8.85511596873632e-06, |
| "loss": 0.0369, |
| "step": 2416 |
| }, |
| { |
| "epoch": 1.0996360327570518, |
| "grad_norm": 0.41413261117443184, |
| "learning_rate": 8.854205625437135e-06, |
| "loss": 0.0198, |
| "step": 2417 |
| }, |
| { |
| "epoch": 1.100090991810737, |
| "grad_norm": 0.45865368499615844, |
| "learning_rate": 8.853294967189672e-06, |
| "loss": 0.0274, |
| "step": 2418 |
| }, |
| { |
| "epoch": 1.1005459508644222, |
| "grad_norm": 0.49503724640291885, |
| "learning_rate": 8.852383994068345e-06, |
| "loss": 0.039, |
| "step": 2419 |
| }, |
| { |
| "epoch": 1.1010009099181073, |
| "grad_norm": 0.3278139965958097, |
| "learning_rate": 8.851472706147595e-06, |
| "loss": 0.02, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.1014558689717926, |
| "grad_norm": 0.7072991481662654, |
| "learning_rate": 8.85056110350189e-06, |
| "loss": 0.0478, |
| "step": 2421 |
| }, |
| { |
| "epoch": 1.1019108280254777, |
| "grad_norm": 0.3754428113606483, |
| "learning_rate": 8.84964918620572e-06, |
| "loss": 0.0204, |
| "step": 2422 |
| }, |
| { |
| "epoch": 1.1023657870791628, |
| "grad_norm": 0.7096758634544409, |
| "learning_rate": 8.848736954333603e-06, |
| "loss": 0.0335, |
| "step": 2423 |
| }, |
| { |
| "epoch": 1.102820746132848, |
| "grad_norm": 0.5727995354405594, |
| "learning_rate": 8.847824407960083e-06, |
| "loss": 0.0323, |
| "step": 2424 |
| }, |
| { |
| "epoch": 1.1032757051865332, |
| "grad_norm": 0.6229568548114003, |
| "learning_rate": 8.84691154715973e-06, |
| "loss": 0.0309, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.1037306642402185, |
| "grad_norm": 0.5010513455704715, |
| "learning_rate": 8.845998372007136e-06, |
| "loss": 0.0286, |
| "step": 2426 |
| }, |
| { |
| "epoch": 1.1041856232939036, |
| "grad_norm": 0.34862957832393143, |
| "learning_rate": 8.845084882576924e-06, |
| "loss": 0.0165, |
| "step": 2427 |
| }, |
| { |
| "epoch": 1.1046405823475887, |
| "grad_norm": 0.5610710585811625, |
| "learning_rate": 8.84417107894374e-06, |
| "loss": 0.0381, |
| "step": 2428 |
| }, |
| { |
| "epoch": 1.105095541401274, |
| "grad_norm": 0.3998367702132408, |
| "learning_rate": 8.843256961182255e-06, |
| "loss": 0.0186, |
| "step": 2429 |
| }, |
| { |
| "epoch": 1.105550500454959, |
| "grad_norm": 0.6787215229828617, |
| "learning_rate": 8.842342529367167e-06, |
| "loss": 0.0487, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.1060054595086442, |
| "grad_norm": 0.6483563929183911, |
| "learning_rate": 8.8414277835732e-06, |
| "loss": 0.0409, |
| "step": 2431 |
| }, |
| { |
| "epoch": 1.1064604185623295, |
| "grad_norm": 0.6351823340870137, |
| "learning_rate": 8.840512723875103e-06, |
| "loss": 0.0497, |
| "step": 2432 |
| }, |
| { |
| "epoch": 1.1069153776160146, |
| "grad_norm": 0.3467791981865341, |
| "learning_rate": 8.839597350347648e-06, |
| "loss": 0.0172, |
| "step": 2433 |
| }, |
| { |
| "epoch": 1.1073703366696996, |
| "grad_norm": 0.4877926867999841, |
| "learning_rate": 8.838681663065638e-06, |
| "loss": 0.0268, |
| "step": 2434 |
| }, |
| { |
| "epoch": 1.107825295723385, |
| "grad_norm": 0.561052741145843, |
| "learning_rate": 8.837765662103898e-06, |
| "loss": 0.0351, |
| "step": 2435 |
| }, |
| { |
| "epoch": 1.10828025477707, |
| "grad_norm": 0.5339886977527083, |
| "learning_rate": 8.836849347537278e-06, |
| "loss": 0.0286, |
| "step": 2436 |
| }, |
| { |
| "epoch": 1.1087352138307551, |
| "grad_norm": 0.41940315295115715, |
| "learning_rate": 8.835932719440658e-06, |
| "loss": 0.016, |
| "step": 2437 |
| }, |
| { |
| "epoch": 1.1091901728844404, |
| "grad_norm": 0.500811248377599, |
| "learning_rate": 8.835015777888938e-06, |
| "loss": 0.0277, |
| "step": 2438 |
| }, |
| { |
| "epoch": 1.1096451319381255, |
| "grad_norm": 0.6905252242552301, |
| "learning_rate": 8.83409852295705e-06, |
| "loss": 0.0451, |
| "step": 2439 |
| }, |
| { |
| "epoch": 1.1101000909918108, |
| "grad_norm": 0.4932334291437054, |
| "learning_rate": 8.833180954719941e-06, |
| "loss": 0.023, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.110555050045496, |
| "grad_norm": 0.32570391119462067, |
| "learning_rate": 8.832263073252597e-06, |
| "loss": 0.0223, |
| "step": 2441 |
| }, |
| { |
| "epoch": 1.111010009099181, |
| "grad_norm": 0.5189620509513116, |
| "learning_rate": 8.831344878630022e-06, |
| "loss": 0.0345, |
| "step": 2442 |
| }, |
| { |
| "epoch": 1.1114649681528663, |
| "grad_norm": 0.35471915929013836, |
| "learning_rate": 8.830426370927246e-06, |
| "loss": 0.0178, |
| "step": 2443 |
| }, |
| { |
| "epoch": 1.1119199272065514, |
| "grad_norm": 0.4071867204646678, |
| "learning_rate": 8.829507550219323e-06, |
| "loss": 0.0187, |
| "step": 2444 |
| }, |
| { |
| "epoch": 1.1123748862602365, |
| "grad_norm": 0.5327053422443435, |
| "learning_rate": 8.828588416581338e-06, |
| "loss": 0.0321, |
| "step": 2445 |
| }, |
| { |
| "epoch": 1.1128298453139218, |
| "grad_norm": 0.4727447057361278, |
| "learning_rate": 8.827668970088397e-06, |
| "loss": 0.0256, |
| "step": 2446 |
| }, |
| { |
| "epoch": 1.113284804367607, |
| "grad_norm": 0.44344698021715867, |
| "learning_rate": 8.826749210815634e-06, |
| "loss": 0.0212, |
| "step": 2447 |
| }, |
| { |
| "epoch": 1.113739763421292, |
| "grad_norm": 0.48653354586078956, |
| "learning_rate": 8.825829138838206e-06, |
| "loss": 0.0252, |
| "step": 2448 |
| }, |
| { |
| "epoch": 1.1141947224749773, |
| "grad_norm": 0.4904789767614279, |
| "learning_rate": 8.824908754231299e-06, |
| "loss": 0.0219, |
| "step": 2449 |
| }, |
| { |
| "epoch": 1.1146496815286624, |
| "grad_norm": 0.5096306577344566, |
| "learning_rate": 8.823988057070122e-06, |
| "loss": 0.0269, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.1151046405823477, |
| "grad_norm": 0.4524604770972165, |
| "learning_rate": 8.823067047429908e-06, |
| "loss": 0.0197, |
| "step": 2451 |
| }, |
| { |
| "epoch": 1.1155595996360328, |
| "grad_norm": 0.6661762941224277, |
| "learning_rate": 8.82214572538592e-06, |
| "loss": 0.0432, |
| "step": 2452 |
| }, |
| { |
| "epoch": 1.1160145586897179, |
| "grad_norm": 0.45413808918893234, |
| "learning_rate": 8.821224091013445e-06, |
| "loss": 0.0252, |
| "step": 2453 |
| }, |
| { |
| "epoch": 1.1164695177434032, |
| "grad_norm": 0.4564359066247584, |
| "learning_rate": 8.820302144387794e-06, |
| "loss": 0.0305, |
| "step": 2454 |
| }, |
| { |
| "epoch": 1.1169244767970883, |
| "grad_norm": 0.5331752474098931, |
| "learning_rate": 8.819379885584303e-06, |
| "loss": 0.0285, |
| "step": 2455 |
| }, |
| { |
| "epoch": 1.1173794358507734, |
| "grad_norm": 0.8314482044455632, |
| "learning_rate": 8.818457314678336e-06, |
| "loss": 0.0474, |
| "step": 2456 |
| }, |
| { |
| "epoch": 1.1178343949044587, |
| "grad_norm": 0.5831509752587852, |
| "learning_rate": 8.817534431745283e-06, |
| "loss": 0.0204, |
| "step": 2457 |
| }, |
| { |
| "epoch": 1.1182893539581438, |
| "grad_norm": 0.42113991056064237, |
| "learning_rate": 8.816611236860554e-06, |
| "loss": 0.0207, |
| "step": 2458 |
| }, |
| { |
| "epoch": 1.1187443130118289, |
| "grad_norm": 0.5492674131587796, |
| "learning_rate": 8.815687730099594e-06, |
| "loss": 0.023, |
| "step": 2459 |
| }, |
| { |
| "epoch": 1.1191992720655142, |
| "grad_norm": 0.5627677712218775, |
| "learning_rate": 8.81476391153786e-06, |
| "loss": 0.0238, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.1196542311191993, |
| "grad_norm": 0.306412099822185, |
| "learning_rate": 8.813839781250848e-06, |
| "loss": 0.0136, |
| "step": 2461 |
| }, |
| { |
| "epoch": 1.1201091901728844, |
| "grad_norm": 0.4884139369729457, |
| "learning_rate": 8.812915339314073e-06, |
| "loss": 0.0325, |
| "step": 2462 |
| }, |
| { |
| "epoch": 1.1205641492265697, |
| "grad_norm": 0.6440331779678226, |
| "learning_rate": 8.811990585803074e-06, |
| "loss": 0.0462, |
| "step": 2463 |
| }, |
| { |
| "epoch": 1.1210191082802548, |
| "grad_norm": 0.6354635395644428, |
| "learning_rate": 8.81106552079342e-06, |
| "loss": 0.0326, |
| "step": 2464 |
| }, |
| { |
| "epoch": 1.1214740673339398, |
| "grad_norm": 0.4841057095746355, |
| "learning_rate": 8.810140144360701e-06, |
| "loss": 0.0288, |
| "step": 2465 |
| }, |
| { |
| "epoch": 1.1219290263876252, |
| "grad_norm": 0.7578064954916388, |
| "learning_rate": 8.809214456580539e-06, |
| "loss": 0.0444, |
| "step": 2466 |
| }, |
| { |
| "epoch": 1.1223839854413102, |
| "grad_norm": 0.36333027437030824, |
| "learning_rate": 8.80828845752857e-06, |
| "loss": 0.0166, |
| "step": 2467 |
| }, |
| { |
| "epoch": 1.1228389444949956, |
| "grad_norm": 1.0828419984965674, |
| "learning_rate": 8.80736214728047e-06, |
| "loss": 0.0509, |
| "step": 2468 |
| }, |
| { |
| "epoch": 1.1232939035486806, |
| "grad_norm": 0.41035853061268457, |
| "learning_rate": 8.806435525911927e-06, |
| "loss": 0.0152, |
| "step": 2469 |
| }, |
| { |
| "epoch": 1.1237488626023657, |
| "grad_norm": 0.48117366130842515, |
| "learning_rate": 8.805508593498662e-06, |
| "loss": 0.0358, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.124203821656051, |
| "grad_norm": 0.48865070302034325, |
| "learning_rate": 8.804581350116422e-06, |
| "loss": 0.0248, |
| "step": 2471 |
| }, |
| { |
| "epoch": 1.1246587807097361, |
| "grad_norm": 0.6166160347574816, |
| "learning_rate": 8.803653795840974e-06, |
| "loss": 0.0372, |
| "step": 2472 |
| }, |
| { |
| "epoch": 1.1251137397634212, |
| "grad_norm": 0.4235666133907878, |
| "learning_rate": 8.802725930748115e-06, |
| "loss": 0.0224, |
| "step": 2473 |
| }, |
| { |
| "epoch": 1.1255686988171065, |
| "grad_norm": 0.49371023402555386, |
| "learning_rate": 8.801797754913667e-06, |
| "loss": 0.0253, |
| "step": 2474 |
| }, |
| { |
| "epoch": 1.1260236578707916, |
| "grad_norm": 0.5375981231946215, |
| "learning_rate": 8.800869268413475e-06, |
| "loss": 0.0303, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.1264786169244767, |
| "grad_norm": 0.6200342528643785, |
| "learning_rate": 8.79994047132341e-06, |
| "loss": 0.0301, |
| "step": 2476 |
| }, |
| { |
| "epoch": 1.126933575978162, |
| "grad_norm": 0.7763567599332302, |
| "learning_rate": 8.79901136371937e-06, |
| "loss": 0.0367, |
| "step": 2477 |
| }, |
| { |
| "epoch": 1.127388535031847, |
| "grad_norm": 0.4168679527566863, |
| "learning_rate": 8.798081945677279e-06, |
| "loss": 0.0193, |
| "step": 2478 |
| }, |
| { |
| "epoch": 1.1278434940855324, |
| "grad_norm": 0.5499515478297102, |
| "learning_rate": 8.797152217273082e-06, |
| "loss": 0.0232, |
| "step": 2479 |
| }, |
| { |
| "epoch": 1.1282984531392175, |
| "grad_norm": 0.3629031290073349, |
| "learning_rate": 8.796222178582756e-06, |
| "loss": 0.0217, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.1287534121929026, |
| "grad_norm": 0.539897737827513, |
| "learning_rate": 8.795291829682293e-06, |
| "loss": 0.0272, |
| "step": 2481 |
| }, |
| { |
| "epoch": 1.129208371246588, |
| "grad_norm": 0.5636939303591514, |
| "learning_rate": 8.794361170647723e-06, |
| "loss": 0.0322, |
| "step": 2482 |
| }, |
| { |
| "epoch": 1.129663330300273, |
| "grad_norm": 0.6219815104303015, |
| "learning_rate": 8.793430201555095e-06, |
| "loss": 0.0274, |
| "step": 2483 |
| }, |
| { |
| "epoch": 1.130118289353958, |
| "grad_norm": 0.6542904517198702, |
| "learning_rate": 8.79249892248048e-06, |
| "loss": 0.0358, |
| "step": 2484 |
| }, |
| { |
| "epoch": 1.1305732484076434, |
| "grad_norm": 0.46666017679304383, |
| "learning_rate": 8.79156733349998e-06, |
| "loss": 0.0308, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.1310282074613285, |
| "grad_norm": 0.643787908195578, |
| "learning_rate": 8.790635434689722e-06, |
| "loss": 0.0325, |
| "step": 2486 |
| }, |
| { |
| "epoch": 1.1314831665150136, |
| "grad_norm": 0.6798497056398047, |
| "learning_rate": 8.789703226125853e-06, |
| "loss": 0.0388, |
| "step": 2487 |
| }, |
| { |
| "epoch": 1.1319381255686989, |
| "grad_norm": 0.45682700520723596, |
| "learning_rate": 8.78877070788455e-06, |
| "loss": 0.0248, |
| "step": 2488 |
| }, |
| { |
| "epoch": 1.132393084622384, |
| "grad_norm": 0.520494224107322, |
| "learning_rate": 8.787837880042016e-06, |
| "loss": 0.0251, |
| "step": 2489 |
| }, |
| { |
| "epoch": 1.132848043676069, |
| "grad_norm": 0.5608809735379154, |
| "learning_rate": 8.786904742674476e-06, |
| "loss": 0.0354, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.1333030027297544, |
| "grad_norm": 0.5383912877252518, |
| "learning_rate": 8.78597129585818e-06, |
| "loss": 0.0252, |
| "step": 2491 |
| }, |
| { |
| "epoch": 1.1337579617834395, |
| "grad_norm": 0.3952421850434973, |
| "learning_rate": 8.78503753966941e-06, |
| "loss": 0.0191, |
| "step": 2492 |
| }, |
| { |
| "epoch": 1.1342129208371245, |
| "grad_norm": 0.7660377240440205, |
| "learning_rate": 8.784103474184463e-06, |
| "loss": 0.0372, |
| "step": 2493 |
| }, |
| { |
| "epoch": 1.1346678798908099, |
| "grad_norm": 0.45419840808136375, |
| "learning_rate": 8.783169099479669e-06, |
| "loss": 0.0237, |
| "step": 2494 |
| }, |
| { |
| "epoch": 1.135122838944495, |
| "grad_norm": 0.6963944475868004, |
| "learning_rate": 8.782234415631381e-06, |
| "loss": 0.0402, |
| "step": 2495 |
| }, |
| { |
| "epoch": 1.1355777979981803, |
| "grad_norm": 0.43802475738162483, |
| "learning_rate": 8.781299422715979e-06, |
| "loss": 0.0238, |
| "step": 2496 |
| }, |
| { |
| "epoch": 1.1360327570518653, |
| "grad_norm": 0.6062845259672841, |
| "learning_rate": 8.780364120809863e-06, |
| "loss": 0.0299, |
| "step": 2497 |
| }, |
| { |
| "epoch": 1.1364877161055504, |
| "grad_norm": 0.44459971712814256, |
| "learning_rate": 8.779428509989463e-06, |
| "loss": 0.0205, |
| "step": 2498 |
| }, |
| { |
| "epoch": 1.1369426751592357, |
| "grad_norm": 0.8182256630287221, |
| "learning_rate": 8.778492590331234e-06, |
| "loss": 0.0358, |
| "step": 2499 |
| }, |
| { |
| "epoch": 1.1373976342129208, |
| "grad_norm": 0.35292113524041313, |
| "learning_rate": 8.777556361911652e-06, |
| "loss": 0.0188, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.137852593266606, |
| "grad_norm": 0.5495898839385301, |
| "learning_rate": 8.776619824807225e-06, |
| "loss": 0.0403, |
| "step": 2501 |
| }, |
| { |
| "epoch": 1.1383075523202912, |
| "grad_norm": 0.47715012261917683, |
| "learning_rate": 8.77568297909448e-06, |
| "loss": 0.0308, |
| "step": 2502 |
| }, |
| { |
| "epoch": 1.1387625113739763, |
| "grad_norm": 0.5057002315147829, |
| "learning_rate": 8.774745824849973e-06, |
| "loss": 0.0255, |
| "step": 2503 |
| }, |
| { |
| "epoch": 1.1392174704276614, |
| "grad_norm": 0.637445487028803, |
| "learning_rate": 8.773808362150284e-06, |
| "loss": 0.0441, |
| "step": 2504 |
| }, |
| { |
| "epoch": 1.1396724294813467, |
| "grad_norm": 0.46970000948757085, |
| "learning_rate": 8.772870591072016e-06, |
| "loss": 0.0203, |
| "step": 2505 |
| }, |
| { |
| "epoch": 1.1401273885350318, |
| "grad_norm": 0.48405940158780947, |
| "learning_rate": 8.771932511691805e-06, |
| "loss": 0.0248, |
| "step": 2506 |
| }, |
| { |
| "epoch": 1.1405823475887171, |
| "grad_norm": 0.5007699680851107, |
| "learning_rate": 8.7709941240863e-06, |
| "loss": 0.0299, |
| "step": 2507 |
| }, |
| { |
| "epoch": 1.1410373066424022, |
| "grad_norm": 0.47412512472759577, |
| "learning_rate": 8.770055428332187e-06, |
| "loss": 0.0289, |
| "step": 2508 |
| }, |
| { |
| "epoch": 1.1414922656960873, |
| "grad_norm": 0.6167640062421629, |
| "learning_rate": 8.769116424506168e-06, |
| "loss": 0.0308, |
| "step": 2509 |
| }, |
| { |
| "epoch": 1.1419472247497726, |
| "grad_norm": 0.39237316345479106, |
| "learning_rate": 8.768177112684976e-06, |
| "loss": 0.023, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.1424021838034577, |
| "grad_norm": 0.5186908295343413, |
| "learning_rate": 8.767237492945372e-06, |
| "loss": 0.0253, |
| "step": 2511 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.5056070603356543, |
| "learning_rate": 8.766297565364127e-06, |
| "loss": 0.0269, |
| "step": 2512 |
| }, |
| { |
| "epoch": 1.143312101910828, |
| "grad_norm": 0.572114404769031, |
| "learning_rate": 8.765357330018056e-06, |
| "loss": 0.04, |
| "step": 2513 |
| }, |
| { |
| "epoch": 1.1437670609645132, |
| "grad_norm": 0.5742667251635876, |
| "learning_rate": 8.764416786983987e-06, |
| "loss": 0.0341, |
| "step": 2514 |
| }, |
| { |
| "epoch": 1.1442220200181983, |
| "grad_norm": 0.7921946978016261, |
| "learning_rate": 8.763475936338778e-06, |
| "loss": 0.0297, |
| "step": 2515 |
| }, |
| { |
| "epoch": 1.1446769790718836, |
| "grad_norm": 0.5932003547457203, |
| "learning_rate": 8.762534778159313e-06, |
| "loss": 0.0329, |
| "step": 2516 |
| }, |
| { |
| "epoch": 1.1451319381255687, |
| "grad_norm": 0.4383972484081299, |
| "learning_rate": 8.761593312522496e-06, |
| "loss": 0.026, |
| "step": 2517 |
| }, |
| { |
| "epoch": 1.1455868971792538, |
| "grad_norm": 0.494406013971066, |
| "learning_rate": 8.76065153950526e-06, |
| "loss": 0.0252, |
| "step": 2518 |
| }, |
| { |
| "epoch": 1.146041856232939, |
| "grad_norm": 0.41600285124838154, |
| "learning_rate": 8.759709459184565e-06, |
| "loss": 0.03, |
| "step": 2519 |
| }, |
| { |
| "epoch": 1.1464968152866242, |
| "grad_norm": 0.7103449624996373, |
| "learning_rate": 8.758767071637391e-06, |
| "loss": 0.0293, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.1469517743403093, |
| "grad_norm": 0.7247596682387525, |
| "learning_rate": 8.757824376940748e-06, |
| "loss": 0.0534, |
| "step": 2521 |
| }, |
| { |
| "epoch": 1.1474067333939946, |
| "grad_norm": 0.5429066180348485, |
| "learning_rate": 8.756881375171664e-06, |
| "loss": 0.0366, |
| "step": 2522 |
| }, |
| { |
| "epoch": 1.1478616924476797, |
| "grad_norm": 0.5884373670939516, |
| "learning_rate": 8.755938066407201e-06, |
| "loss": 0.0335, |
| "step": 2523 |
| }, |
| { |
| "epoch": 1.148316651501365, |
| "grad_norm": 0.6156045708560577, |
| "learning_rate": 8.754994450724441e-06, |
| "loss": 0.0345, |
| "step": 2524 |
| }, |
| { |
| "epoch": 1.14877161055505, |
| "grad_norm": 0.5614699649040673, |
| "learning_rate": 8.754050528200493e-06, |
| "loss": 0.0329, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.1492265696087351, |
| "grad_norm": 0.6406021126928062, |
| "learning_rate": 8.753106298912488e-06, |
| "loss": 0.0306, |
| "step": 2526 |
| }, |
| { |
| "epoch": 1.1496815286624205, |
| "grad_norm": 0.5000438600163287, |
| "learning_rate": 8.752161762937586e-06, |
| "loss": 0.0223, |
| "step": 2527 |
| }, |
| { |
| "epoch": 1.1501364877161055, |
| "grad_norm": 0.3997197285041498, |
| "learning_rate": 8.751216920352967e-06, |
| "loss": 0.0221, |
| "step": 2528 |
| }, |
| { |
| "epoch": 1.1505914467697906, |
| "grad_norm": 0.5040179214810742, |
| "learning_rate": 8.750271771235844e-06, |
| "loss": 0.0196, |
| "step": 2529 |
| }, |
| { |
| "epoch": 1.151046405823476, |
| "grad_norm": 0.40549609696673644, |
| "learning_rate": 8.749326315663447e-06, |
| "loss": 0.0231, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.151501364877161, |
| "grad_norm": 0.406160230893779, |
| "learning_rate": 8.748380553713033e-06, |
| "loss": 0.0208, |
| "step": 2531 |
| }, |
| { |
| "epoch": 1.1519563239308463, |
| "grad_norm": 0.5844194685702613, |
| "learning_rate": 8.747434485461892e-06, |
| "loss": 0.0241, |
| "step": 2532 |
| }, |
| { |
| "epoch": 1.1524112829845314, |
| "grad_norm": 0.36029638509152084, |
| "learning_rate": 8.746488110987326e-06, |
| "loss": 0.015, |
| "step": 2533 |
| }, |
| { |
| "epoch": 1.1528662420382165, |
| "grad_norm": 0.7276197204807093, |
| "learning_rate": 8.745541430366671e-06, |
| "loss": 0.0418, |
| "step": 2534 |
| }, |
| { |
| "epoch": 1.1533212010919018, |
| "grad_norm": 1.5020467500828025, |
| "learning_rate": 8.744594443677284e-06, |
| "loss": 0.0582, |
| "step": 2535 |
| }, |
| { |
| "epoch": 1.153776160145587, |
| "grad_norm": 0.4311974728697227, |
| "learning_rate": 8.743647150996551e-06, |
| "loss": 0.0258, |
| "step": 2536 |
| }, |
| { |
| "epoch": 1.154231119199272, |
| "grad_norm": 0.6248463720530537, |
| "learning_rate": 8.742699552401878e-06, |
| "loss": 0.0398, |
| "step": 2537 |
| }, |
| { |
| "epoch": 1.1546860782529573, |
| "grad_norm": 0.5339944254155865, |
| "learning_rate": 8.7417516479707e-06, |
| "loss": 0.0252, |
| "step": 2538 |
| }, |
| { |
| "epoch": 1.1551410373066424, |
| "grad_norm": 0.3465118720450813, |
| "learning_rate": 8.740803437780474e-06, |
| "loss": 0.0183, |
| "step": 2539 |
| }, |
| { |
| "epoch": 1.1555959963603275, |
| "grad_norm": 0.6096918552154363, |
| "learning_rate": 8.739854921908684e-06, |
| "loss": 0.0318, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.1560509554140128, |
| "grad_norm": 0.42626286323793855, |
| "learning_rate": 8.73890610043284e-06, |
| "loss": 0.0292, |
| "step": 2541 |
| }, |
| { |
| "epoch": 1.156505914467698, |
| "grad_norm": 0.47325164391197866, |
| "learning_rate": 8.737956973430475e-06, |
| "loss": 0.0337, |
| "step": 2542 |
| }, |
| { |
| "epoch": 1.156960873521383, |
| "grad_norm": 0.6214186683671308, |
| "learning_rate": 8.737007540979146e-06, |
| "loss": 0.0235, |
| "step": 2543 |
| }, |
| { |
| "epoch": 1.1574158325750683, |
| "grad_norm": 0.4958886649213906, |
| "learning_rate": 8.736057803156436e-06, |
| "loss": 0.0255, |
| "step": 2544 |
| }, |
| { |
| "epoch": 1.1578707916287534, |
| "grad_norm": 0.3732620529932146, |
| "learning_rate": 8.735107760039954e-06, |
| "loss": 0.0197, |
| "step": 2545 |
| }, |
| { |
| "epoch": 1.1583257506824385, |
| "grad_norm": 0.5778213004705967, |
| "learning_rate": 8.734157411707334e-06, |
| "loss": 0.0277, |
| "step": 2546 |
| }, |
| { |
| "epoch": 1.1587807097361238, |
| "grad_norm": 0.4850677867721973, |
| "learning_rate": 8.733206758236235e-06, |
| "loss": 0.0235, |
| "step": 2547 |
| }, |
| { |
| "epoch": 1.1592356687898089, |
| "grad_norm": 0.5687049775983313, |
| "learning_rate": 8.732255799704337e-06, |
| "loss": 0.0335, |
| "step": 2548 |
| }, |
| { |
| "epoch": 1.159690627843494, |
| "grad_norm": 0.5063906062734673, |
| "learning_rate": 8.73130453618935e-06, |
| "loss": 0.0224, |
| "step": 2549 |
| }, |
| { |
| "epoch": 1.1601455868971793, |
| "grad_norm": 0.4830706957588217, |
| "learning_rate": 8.730352967769007e-06, |
| "loss": 0.026, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.1606005459508644, |
| "grad_norm": 0.4565903397736301, |
| "learning_rate": 8.729401094521066e-06, |
| "loss": 0.0171, |
| "step": 2551 |
| }, |
| { |
| "epoch": 1.1610555050045497, |
| "grad_norm": 0.5299141705331825, |
| "learning_rate": 8.728448916523309e-06, |
| "loss": 0.0283, |
| "step": 2552 |
| }, |
| { |
| "epoch": 1.1615104640582348, |
| "grad_norm": 0.5618467862878425, |
| "learning_rate": 8.727496433853543e-06, |
| "loss": 0.0289, |
| "step": 2553 |
| }, |
| { |
| "epoch": 1.1619654231119199, |
| "grad_norm": 0.464342731748468, |
| "learning_rate": 8.726543646589605e-06, |
| "loss": 0.0202, |
| "step": 2554 |
| }, |
| { |
| "epoch": 1.1624203821656052, |
| "grad_norm": 0.5984943035378484, |
| "learning_rate": 8.725590554809346e-06, |
| "loss": 0.0387, |
| "step": 2555 |
| }, |
| { |
| "epoch": 1.1628753412192903, |
| "grad_norm": 0.3103247899143151, |
| "learning_rate": 8.724637158590652e-06, |
| "loss": 0.0172, |
| "step": 2556 |
| }, |
| { |
| "epoch": 1.1633303002729753, |
| "grad_norm": 0.5719001232225214, |
| "learning_rate": 8.72368345801143e-06, |
| "loss": 0.0328, |
| "step": 2557 |
| }, |
| { |
| "epoch": 1.1637852593266607, |
| "grad_norm": 0.7184689253863656, |
| "learning_rate": 8.722729453149613e-06, |
| "loss": 0.0256, |
| "step": 2558 |
| }, |
| { |
| "epoch": 1.1642402183803457, |
| "grad_norm": 0.4264869300929295, |
| "learning_rate": 8.721775144083155e-06, |
| "loss": 0.0273, |
| "step": 2559 |
| }, |
| { |
| "epoch": 1.164695177434031, |
| "grad_norm": 0.6992959245688258, |
| "learning_rate": 8.72082053089004e-06, |
| "loss": 0.0391, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.1651501364877161, |
| "grad_norm": 0.5598830058244858, |
| "learning_rate": 8.719865613648276e-06, |
| "loss": 0.0348, |
| "step": 2561 |
| }, |
| { |
| "epoch": 1.1656050955414012, |
| "grad_norm": 0.4490293057873329, |
| "learning_rate": 8.718910392435892e-06, |
| "loss": 0.0185, |
| "step": 2562 |
| }, |
| { |
| "epoch": 1.1660600545950865, |
| "grad_norm": 0.3188239247752473, |
| "learning_rate": 8.717954867330943e-06, |
| "loss": 0.0118, |
| "step": 2563 |
| }, |
| { |
| "epoch": 1.1665150136487716, |
| "grad_norm": 0.529002754756549, |
| "learning_rate": 8.716999038411513e-06, |
| "loss": 0.0422, |
| "step": 2564 |
| }, |
| { |
| "epoch": 1.1669699727024567, |
| "grad_norm": 0.6102751055626958, |
| "learning_rate": 8.716042905755708e-06, |
| "loss": 0.0321, |
| "step": 2565 |
| }, |
| { |
| "epoch": 1.167424931756142, |
| "grad_norm": 0.4958464600211268, |
| "learning_rate": 8.715086469441659e-06, |
| "loss": 0.027, |
| "step": 2566 |
| }, |
| { |
| "epoch": 1.1678798908098271, |
| "grad_norm": 0.6925927485590572, |
| "learning_rate": 8.714129729547522e-06, |
| "loss": 0.0528, |
| "step": 2567 |
| }, |
| { |
| "epoch": 1.1683348498635122, |
| "grad_norm": 0.48346645004557054, |
| "learning_rate": 8.713172686151475e-06, |
| "loss": 0.0241, |
| "step": 2568 |
| }, |
| { |
| "epoch": 1.1687898089171975, |
| "grad_norm": 0.6160868757033329, |
| "learning_rate": 8.712215339331724e-06, |
| "loss": 0.0364, |
| "step": 2569 |
| }, |
| { |
| "epoch": 1.1692447679708826, |
| "grad_norm": 0.5521736841094272, |
| "learning_rate": 8.711257689166499e-06, |
| "loss": 0.0384, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.1696997270245677, |
| "grad_norm": 0.4358123533199606, |
| "learning_rate": 8.710299735734057e-06, |
| "loss": 0.0218, |
| "step": 2571 |
| }, |
| { |
| "epoch": 1.170154686078253, |
| "grad_norm": 0.49989161769199447, |
| "learning_rate": 8.709341479112676e-06, |
| "loss": 0.019, |
| "step": 2572 |
| }, |
| { |
| "epoch": 1.170609645131938, |
| "grad_norm": 0.6461070187412289, |
| "learning_rate": 8.70838291938066e-06, |
| "loss": 0.05, |
| "step": 2573 |
| }, |
| { |
| "epoch": 1.1710646041856232, |
| "grad_norm": 0.5015730644729591, |
| "learning_rate": 8.70742405661634e-06, |
| "loss": 0.0262, |
| "step": 2574 |
| }, |
| { |
| "epoch": 1.1715195632393085, |
| "grad_norm": 0.6731652049317264, |
| "learning_rate": 8.706464890898068e-06, |
| "loss": 0.0417, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.1719745222929936, |
| "grad_norm": 0.5953498514866105, |
| "learning_rate": 8.705505422304224e-06, |
| "loss": 0.0251, |
| "step": 2576 |
| }, |
| { |
| "epoch": 1.1724294813466787, |
| "grad_norm": 0.49337464142227694, |
| "learning_rate": 8.70454565091321e-06, |
| "loss": 0.0283, |
| "step": 2577 |
| }, |
| { |
| "epoch": 1.172884440400364, |
| "grad_norm": 0.40746621618427764, |
| "learning_rate": 8.703585576803455e-06, |
| "loss": 0.0235, |
| "step": 2578 |
| }, |
| { |
| "epoch": 1.173339399454049, |
| "grad_norm": 0.574388099759434, |
| "learning_rate": 8.702625200053412e-06, |
| "loss": 0.0357, |
| "step": 2579 |
| }, |
| { |
| "epoch": 1.1737943585077344, |
| "grad_norm": 0.49209063287204186, |
| "learning_rate": 8.701664520741558e-06, |
| "loss": 0.0271, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.1742493175614195, |
| "grad_norm": 0.49658769644628054, |
| "learning_rate": 8.700703538946396e-06, |
| "loss": 0.0312, |
| "step": 2581 |
| }, |
| { |
| "epoch": 1.1747042766151046, |
| "grad_norm": 0.48898735666034404, |
| "learning_rate": 8.699742254746452e-06, |
| "loss": 0.0308, |
| "step": 2582 |
| }, |
| { |
| "epoch": 1.1751592356687899, |
| "grad_norm": 0.6965571111870493, |
| "learning_rate": 8.698780668220281e-06, |
| "loss": 0.0587, |
| "step": 2583 |
| }, |
| { |
| "epoch": 1.175614194722475, |
| "grad_norm": 0.4680913844344663, |
| "learning_rate": 8.697818779446456e-06, |
| "loss": 0.0268, |
| "step": 2584 |
| }, |
| { |
| "epoch": 1.17606915377616, |
| "grad_norm": 0.5966094635320064, |
| "learning_rate": 8.696856588503582e-06, |
| "loss": 0.0441, |
| "step": 2585 |
| }, |
| { |
| "epoch": 1.1765241128298454, |
| "grad_norm": 0.41029105691286216, |
| "learning_rate": 8.69589409547028e-06, |
| "loss": 0.0238, |
| "step": 2586 |
| }, |
| { |
| "epoch": 1.1769790718835305, |
| "grad_norm": 0.4919555962191467, |
| "learning_rate": 8.694931300425204e-06, |
| "loss": 0.022, |
| "step": 2587 |
| }, |
| { |
| "epoch": 1.1774340309372158, |
| "grad_norm": 0.4941665993905159, |
| "learning_rate": 8.693968203447027e-06, |
| "loss": 0.0318, |
| "step": 2588 |
| }, |
| { |
| "epoch": 1.1778889899909009, |
| "grad_norm": 0.4471241857833498, |
| "learning_rate": 8.693004804614451e-06, |
| "loss": 0.0298, |
| "step": 2589 |
| }, |
| { |
| "epoch": 1.178343949044586, |
| "grad_norm": 0.42475689565329255, |
| "learning_rate": 8.692041104006201e-06, |
| "loss": 0.0245, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.1787989080982713, |
| "grad_norm": 0.7037247909228679, |
| "learning_rate": 8.691077101701024e-06, |
| "loss": 0.0422, |
| "step": 2591 |
| }, |
| { |
| "epoch": 1.1792538671519563, |
| "grad_norm": 0.4727292395507324, |
| "learning_rate": 8.690112797777695e-06, |
| "loss": 0.0286, |
| "step": 2592 |
| }, |
| { |
| "epoch": 1.1797088262056414, |
| "grad_norm": 0.4886187172760372, |
| "learning_rate": 8.689148192315013e-06, |
| "loss": 0.0253, |
| "step": 2593 |
| }, |
| { |
| "epoch": 1.1801637852593267, |
| "grad_norm": 0.4878895092851417, |
| "learning_rate": 8.6881832853918e-06, |
| "loss": 0.0294, |
| "step": 2594 |
| }, |
| { |
| "epoch": 1.1806187443130118, |
| "grad_norm": 0.3785632403936228, |
| "learning_rate": 8.687218077086905e-06, |
| "loss": 0.0262, |
| "step": 2595 |
| }, |
| { |
| "epoch": 1.181073703366697, |
| "grad_norm": 0.3032359273578328, |
| "learning_rate": 8.6862525674792e-06, |
| "loss": 0.0207, |
| "step": 2596 |
| }, |
| { |
| "epoch": 1.1815286624203822, |
| "grad_norm": 0.5805982565364416, |
| "learning_rate": 8.685286756647582e-06, |
| "loss": 0.0299, |
| "step": 2597 |
| }, |
| { |
| "epoch": 1.1819836214740673, |
| "grad_norm": 0.5312395563049912, |
| "learning_rate": 8.684320644670975e-06, |
| "loss": 0.0391, |
| "step": 2598 |
| }, |
| { |
| "epoch": 1.1824385805277524, |
| "grad_norm": 0.6427828501421616, |
| "learning_rate": 8.68335423162832e-06, |
| "loss": 0.0366, |
| "step": 2599 |
| }, |
| { |
| "epoch": 1.1828935395814377, |
| "grad_norm": 0.6549023820063344, |
| "learning_rate": 8.682387517598591e-06, |
| "loss": 0.0466, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1833484986351228, |
| "grad_norm": 0.4191743788408071, |
| "learning_rate": 8.681420502660785e-06, |
| "loss": 0.0233, |
| "step": 2601 |
| }, |
| { |
| "epoch": 1.183803457688808, |
| "grad_norm": 0.4871715984486466, |
| "learning_rate": 8.68045318689392e-06, |
| "loss": 0.0271, |
| "step": 2602 |
| }, |
| { |
| "epoch": 1.1842584167424932, |
| "grad_norm": 0.6701976394432037, |
| "learning_rate": 8.679485570377043e-06, |
| "loss": 0.0306, |
| "step": 2603 |
| }, |
| { |
| "epoch": 1.1847133757961783, |
| "grad_norm": 0.6441120205935942, |
| "learning_rate": 8.678517653189222e-06, |
| "loss": 0.0394, |
| "step": 2604 |
| }, |
| { |
| "epoch": 1.1851683348498634, |
| "grad_norm": 0.5060858425158437, |
| "learning_rate": 8.677549435409548e-06, |
| "loss": 0.0217, |
| "step": 2605 |
| }, |
| { |
| "epoch": 1.1856232939035487, |
| "grad_norm": 0.6752485468046396, |
| "learning_rate": 8.676580917117144e-06, |
| "loss": 0.039, |
| "step": 2606 |
| }, |
| { |
| "epoch": 1.1860782529572338, |
| "grad_norm": 0.3957815075118571, |
| "learning_rate": 8.675612098391149e-06, |
| "loss": 0.0188, |
| "step": 2607 |
| }, |
| { |
| "epoch": 1.186533212010919, |
| "grad_norm": 0.5187116630942156, |
| "learning_rate": 8.674642979310732e-06, |
| "loss": 0.026, |
| "step": 2608 |
| }, |
| { |
| "epoch": 1.1869881710646042, |
| "grad_norm": 0.5769983660492354, |
| "learning_rate": 8.673673559955086e-06, |
| "loss": 0.0343, |
| "step": 2609 |
| }, |
| { |
| "epoch": 1.1874431301182893, |
| "grad_norm": 0.4743399882711679, |
| "learning_rate": 8.672703840403428e-06, |
| "loss": 0.0293, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.1878980891719746, |
| "grad_norm": 0.3693698002797069, |
| "learning_rate": 8.671733820734996e-06, |
| "loss": 0.0162, |
| "step": 2611 |
| }, |
| { |
| "epoch": 1.1883530482256597, |
| "grad_norm": 0.7143210340908582, |
| "learning_rate": 8.670763501029059e-06, |
| "loss": 0.0424, |
| "step": 2612 |
| }, |
| { |
| "epoch": 1.1888080072793448, |
| "grad_norm": 0.44099669973790273, |
| "learning_rate": 8.669792881364905e-06, |
| "loss": 0.0288, |
| "step": 2613 |
| }, |
| { |
| "epoch": 1.18926296633303, |
| "grad_norm": 0.47880134181841405, |
| "learning_rate": 8.668821961821848e-06, |
| "loss": 0.0356, |
| "step": 2614 |
| }, |
| { |
| "epoch": 1.1897179253867152, |
| "grad_norm": 0.49961852236193943, |
| "learning_rate": 8.66785074247923e-06, |
| "loss": 0.0264, |
| "step": 2615 |
| }, |
| { |
| "epoch": 1.1901728844404005, |
| "grad_norm": 0.6606861173434392, |
| "learning_rate": 8.666879223416413e-06, |
| "loss": 0.0402, |
| "step": 2616 |
| }, |
| { |
| "epoch": 1.1906278434940856, |
| "grad_norm": 0.5832250365729773, |
| "learning_rate": 8.665907404712786e-06, |
| "loss": 0.0349, |
| "step": 2617 |
| }, |
| { |
| "epoch": 1.1910828025477707, |
| "grad_norm": 0.47607736173413934, |
| "learning_rate": 8.66493528644776e-06, |
| "loss": 0.0275, |
| "step": 2618 |
| }, |
| { |
| "epoch": 1.191537761601456, |
| "grad_norm": 0.4323045066773957, |
| "learning_rate": 8.663962868700773e-06, |
| "loss": 0.0215, |
| "step": 2619 |
| }, |
| { |
| "epoch": 1.191992720655141, |
| "grad_norm": 0.6823901111258103, |
| "learning_rate": 8.662990151551288e-06, |
| "loss": 0.0367, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.1924476797088261, |
| "grad_norm": 0.568395741941641, |
| "learning_rate": 8.66201713507879e-06, |
| "loss": 0.0327, |
| "step": 2621 |
| }, |
| { |
| "epoch": 1.1929026387625115, |
| "grad_norm": 0.8032308375903047, |
| "learning_rate": 8.661043819362788e-06, |
| "loss": 0.0396, |
| "step": 2622 |
| }, |
| { |
| "epoch": 1.1933575978161965, |
| "grad_norm": 0.5352047847553939, |
| "learning_rate": 8.660070204482818e-06, |
| "loss": 0.0384, |
| "step": 2623 |
| }, |
| { |
| "epoch": 1.1938125568698816, |
| "grad_norm": 0.43266491785940075, |
| "learning_rate": 8.65909629051844e-06, |
| "loss": 0.0235, |
| "step": 2624 |
| }, |
| { |
| "epoch": 1.194267515923567, |
| "grad_norm": 0.5039359947320041, |
| "learning_rate": 8.658122077549239e-06, |
| "loss": 0.0332, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.194722474977252, |
| "grad_norm": 0.46282675009108876, |
| "learning_rate": 8.65714756565482e-06, |
| "loss": 0.028, |
| "step": 2626 |
| }, |
| { |
| "epoch": 1.1951774340309371, |
| "grad_norm": 0.42685254155176316, |
| "learning_rate": 8.656172754914818e-06, |
| "loss": 0.0193, |
| "step": 2627 |
| }, |
| { |
| "epoch": 1.1956323930846224, |
| "grad_norm": 0.5644652302861507, |
| "learning_rate": 8.655197645408889e-06, |
| "loss": 0.0327, |
| "step": 2628 |
| }, |
| { |
| "epoch": 1.1960873521383075, |
| "grad_norm": 0.6017102850762671, |
| "learning_rate": 8.654222237216714e-06, |
| "loss": 0.0395, |
| "step": 2629 |
| }, |
| { |
| "epoch": 1.1965423111919926, |
| "grad_norm": 0.4828717952370834, |
| "learning_rate": 8.653246530418003e-06, |
| "loss": 0.0296, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.196997270245678, |
| "grad_norm": 0.4718632798920294, |
| "learning_rate": 8.652270525092481e-06, |
| "loss": 0.0175, |
| "step": 2631 |
| }, |
| { |
| "epoch": 1.197452229299363, |
| "grad_norm": 0.9210566120370747, |
| "learning_rate": 8.651294221319907e-06, |
| "loss": 0.0532, |
| "step": 2632 |
| }, |
| { |
| "epoch": 1.197907188353048, |
| "grad_norm": 0.5973832244257986, |
| "learning_rate": 8.650317619180057e-06, |
| "loss": 0.0356, |
| "step": 2633 |
| }, |
| { |
| "epoch": 1.1983621474067334, |
| "grad_norm": 0.4056353546459655, |
| "learning_rate": 8.649340718752736e-06, |
| "loss": 0.0233, |
| "step": 2634 |
| }, |
| { |
| "epoch": 1.1988171064604185, |
| "grad_norm": 0.6383917144915527, |
| "learning_rate": 8.648363520117773e-06, |
| "loss": 0.0282, |
| "step": 2635 |
| }, |
| { |
| "epoch": 1.1992720655141038, |
| "grad_norm": 0.30187722032440356, |
| "learning_rate": 8.647386023355017e-06, |
| "loss": 0.015, |
| "step": 2636 |
| }, |
| { |
| "epoch": 1.199727024567789, |
| "grad_norm": 0.7620089776567717, |
| "learning_rate": 8.646408228544349e-06, |
| "loss": 0.0449, |
| "step": 2637 |
| }, |
| { |
| "epoch": 1.200181983621474, |
| "grad_norm": 0.7042927681153068, |
| "learning_rate": 8.645430135765667e-06, |
| "loss": 0.04, |
| "step": 2638 |
| }, |
| { |
| "epoch": 1.2006369426751593, |
| "grad_norm": 0.5117403840739881, |
| "learning_rate": 8.644451745098896e-06, |
| "loss": 0.0297, |
| "step": 2639 |
| }, |
| { |
| "epoch": 1.2010919017288444, |
| "grad_norm": 0.7659399394915278, |
| "learning_rate": 8.643473056623987e-06, |
| "loss": 0.0592, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.2015468607825295, |
| "grad_norm": 0.5678495394727697, |
| "learning_rate": 8.642494070420912e-06, |
| "loss": 0.032, |
| "step": 2641 |
| }, |
| { |
| "epoch": 1.2020018198362148, |
| "grad_norm": 0.4587046178873542, |
| "learning_rate": 8.641514786569674e-06, |
| "loss": 0.0273, |
| "step": 2642 |
| }, |
| { |
| "epoch": 1.2024567788898999, |
| "grad_norm": 0.5810971871142143, |
| "learning_rate": 8.640535205150291e-06, |
| "loss": 0.0436, |
| "step": 2643 |
| }, |
| { |
| "epoch": 1.2029117379435852, |
| "grad_norm": 0.49553783255896267, |
| "learning_rate": 8.639555326242812e-06, |
| "loss": 0.0375, |
| "step": 2644 |
| }, |
| { |
| "epoch": 1.2033666969972703, |
| "grad_norm": 0.700954373813157, |
| "learning_rate": 8.638575149927306e-06, |
| "loss": 0.0416, |
| "step": 2645 |
| }, |
| { |
| "epoch": 1.2038216560509554, |
| "grad_norm": 0.51916075076626, |
| "learning_rate": 8.637594676283872e-06, |
| "loss": 0.0301, |
| "step": 2646 |
| }, |
| { |
| "epoch": 1.2042766151046407, |
| "grad_norm": 0.5616014526557234, |
| "learning_rate": 8.636613905392628e-06, |
| "loss": 0.0333, |
| "step": 2647 |
| }, |
| { |
| "epoch": 1.2047315741583258, |
| "grad_norm": 0.3996003632999196, |
| "learning_rate": 8.635632837333719e-06, |
| "loss": 0.0203, |
| "step": 2648 |
| }, |
| { |
| "epoch": 1.2051865332120109, |
| "grad_norm": 0.5908400254903149, |
| "learning_rate": 8.634651472187312e-06, |
| "loss": 0.0355, |
| "step": 2649 |
| }, |
| { |
| "epoch": 1.2056414922656962, |
| "grad_norm": 0.5521857176836706, |
| "learning_rate": 8.633669810033601e-06, |
| "loss": 0.0302, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.2060964513193813, |
| "grad_norm": 0.47154629646415547, |
| "learning_rate": 8.632687850952803e-06, |
| "loss": 0.0254, |
| "step": 2651 |
| }, |
| { |
| "epoch": 1.2065514103730663, |
| "grad_norm": 0.5084600548265098, |
| "learning_rate": 8.63170559502516e-06, |
| "loss": 0.0263, |
| "step": 2652 |
| }, |
| { |
| "epoch": 1.2070063694267517, |
| "grad_norm": 0.41669809700741084, |
| "learning_rate": 8.630723042330934e-06, |
| "loss": 0.0235, |
| "step": 2653 |
| }, |
| { |
| "epoch": 1.2074613284804367, |
| "grad_norm": 0.4239984269262903, |
| "learning_rate": 8.629740192950418e-06, |
| "loss": 0.0258, |
| "step": 2654 |
| }, |
| { |
| "epoch": 1.2079162875341218, |
| "grad_norm": 0.5493755020180808, |
| "learning_rate": 8.628757046963925e-06, |
| "loss": 0.0312, |
| "step": 2655 |
| }, |
| { |
| "epoch": 1.2083712465878071, |
| "grad_norm": 0.44940260929025, |
| "learning_rate": 8.627773604451795e-06, |
| "loss": 0.0253, |
| "step": 2656 |
| }, |
| { |
| "epoch": 1.2088262056414922, |
| "grad_norm": 0.49748760446391493, |
| "learning_rate": 8.626789865494388e-06, |
| "loss": 0.029, |
| "step": 2657 |
| }, |
| { |
| "epoch": 1.2092811646951773, |
| "grad_norm": 0.4473696250717918, |
| "learning_rate": 8.62580583017209e-06, |
| "loss": 0.0265, |
| "step": 2658 |
| }, |
| { |
| "epoch": 1.2097361237488626, |
| "grad_norm": 0.634783340896908, |
| "learning_rate": 8.624821498565316e-06, |
| "loss": 0.0375, |
| "step": 2659 |
| }, |
| { |
| "epoch": 1.2101910828025477, |
| "grad_norm": 0.5688906906342468, |
| "learning_rate": 8.623836870754497e-06, |
| "loss": 0.0291, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.210646041856233, |
| "grad_norm": 0.524163167377845, |
| "learning_rate": 8.622851946820094e-06, |
| "loss": 0.0343, |
| "step": 2661 |
| }, |
| { |
| "epoch": 1.2111010009099181, |
| "grad_norm": 0.4184285347511745, |
| "learning_rate": 8.621866726842592e-06, |
| "loss": 0.0245, |
| "step": 2662 |
| }, |
| { |
| "epoch": 1.2115559599636032, |
| "grad_norm": 0.5452023193304021, |
| "learning_rate": 8.620881210902497e-06, |
| "loss": 0.0361, |
| "step": 2663 |
| }, |
| { |
| "epoch": 1.2120109190172885, |
| "grad_norm": 0.8825681885181793, |
| "learning_rate": 8.61989539908034e-06, |
| "loss": 0.0551, |
| "step": 2664 |
| }, |
| { |
| "epoch": 1.2124658780709736, |
| "grad_norm": 0.6606796283358398, |
| "learning_rate": 8.61890929145668e-06, |
| "loss": 0.0501, |
| "step": 2665 |
| }, |
| { |
| "epoch": 1.2129208371246587, |
| "grad_norm": 0.5383057502775304, |
| "learning_rate": 8.617922888112093e-06, |
| "loss": 0.0327, |
| "step": 2666 |
| }, |
| { |
| "epoch": 1.213375796178344, |
| "grad_norm": 0.456267646438963, |
| "learning_rate": 8.616936189127189e-06, |
| "loss": 0.0271, |
| "step": 2667 |
| }, |
| { |
| "epoch": 1.213830755232029, |
| "grad_norm": 0.6876820645690198, |
| "learning_rate": 8.615949194582591e-06, |
| "loss": 0.0522, |
| "step": 2668 |
| }, |
| { |
| "epoch": 1.2142857142857142, |
| "grad_norm": 0.4235510337955621, |
| "learning_rate": 8.614961904558956e-06, |
| "loss": 0.0178, |
| "step": 2669 |
| }, |
| { |
| "epoch": 1.2147406733393995, |
| "grad_norm": 0.31389612581359266, |
| "learning_rate": 8.613974319136959e-06, |
| "loss": 0.0142, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.2151956323930846, |
| "grad_norm": 0.5466534592913287, |
| "learning_rate": 8.6129864383973e-06, |
| "loss": 0.0325, |
| "step": 2671 |
| }, |
| { |
| "epoch": 1.21565059144677, |
| "grad_norm": 0.6256801141600264, |
| "learning_rate": 8.611998262420707e-06, |
| "loss": 0.031, |
| "step": 2672 |
| }, |
| { |
| "epoch": 1.216105550500455, |
| "grad_norm": 0.5060382153635896, |
| "learning_rate": 8.611009791287926e-06, |
| "loss": 0.0262, |
| "step": 2673 |
| }, |
| { |
| "epoch": 1.21656050955414, |
| "grad_norm": 0.5027235560302646, |
| "learning_rate": 8.610021025079734e-06, |
| "loss": 0.0254, |
| "step": 2674 |
| }, |
| { |
| "epoch": 1.2170154686078254, |
| "grad_norm": 0.5543017523957823, |
| "learning_rate": 8.609031963876924e-06, |
| "loss": 0.0308, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.2174704276615105, |
| "grad_norm": 0.4737161111249352, |
| "learning_rate": 8.608042607760322e-06, |
| "loss": 0.0326, |
| "step": 2676 |
| }, |
| { |
| "epoch": 1.2179253867151956, |
| "grad_norm": 0.4843464243684333, |
| "learning_rate": 8.607052956810772e-06, |
| "loss": 0.0258, |
| "step": 2677 |
| }, |
| { |
| "epoch": 1.2183803457688809, |
| "grad_norm": 0.5194322149503382, |
| "learning_rate": 8.606063011109143e-06, |
| "loss": 0.0358, |
| "step": 2678 |
| }, |
| { |
| "epoch": 1.218835304822566, |
| "grad_norm": 0.5930513493210321, |
| "learning_rate": 8.60507277073633e-06, |
| "loss": 0.0362, |
| "step": 2679 |
| }, |
| { |
| "epoch": 1.219290263876251, |
| "grad_norm": 0.32996053031100914, |
| "learning_rate": 8.604082235773249e-06, |
| "loss": 0.0131, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.2197452229299364, |
| "grad_norm": 0.4531032973363827, |
| "learning_rate": 8.603091406300845e-06, |
| "loss": 0.0264, |
| "step": 2681 |
| }, |
| { |
| "epoch": 1.2202001819836215, |
| "grad_norm": 0.4752447004618926, |
| "learning_rate": 8.602100282400082e-06, |
| "loss": 0.0222, |
| "step": 2682 |
| }, |
| { |
| "epoch": 1.2206551410373065, |
| "grad_norm": 0.48294135837077795, |
| "learning_rate": 8.60110886415195e-06, |
| "loss": 0.0286, |
| "step": 2683 |
| }, |
| { |
| "epoch": 1.2211101000909919, |
| "grad_norm": 0.8146460808068521, |
| "learning_rate": 8.600117151637465e-06, |
| "loss": 0.0553, |
| "step": 2684 |
| }, |
| { |
| "epoch": 1.221565059144677, |
| "grad_norm": 0.5348405988590901, |
| "learning_rate": 8.599125144937666e-06, |
| "loss": 0.0341, |
| "step": 2685 |
| }, |
| { |
| "epoch": 1.222020018198362, |
| "grad_norm": 0.5209228039836593, |
| "learning_rate": 8.598132844133614e-06, |
| "loss": 0.0285, |
| "step": 2686 |
| }, |
| { |
| "epoch": 1.2224749772520473, |
| "grad_norm": 0.8667405302686297, |
| "learning_rate": 8.597140249306393e-06, |
| "loss": 0.0554, |
| "step": 2687 |
| }, |
| { |
| "epoch": 1.2229299363057324, |
| "grad_norm": 0.3662245233762516, |
| "learning_rate": 8.596147360537115e-06, |
| "loss": 0.0186, |
| "step": 2688 |
| }, |
| { |
| "epoch": 1.2233848953594177, |
| "grad_norm": 0.5675330701823686, |
| "learning_rate": 8.595154177906915e-06, |
| "loss": 0.0252, |
| "step": 2689 |
| }, |
| { |
| "epoch": 1.2238398544131028, |
| "grad_norm": 0.5055412550341041, |
| "learning_rate": 8.594160701496951e-06, |
| "loss": 0.0359, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.224294813466788, |
| "grad_norm": 0.4636507359192646, |
| "learning_rate": 8.593166931388408e-06, |
| "loss": 0.0235, |
| "step": 2691 |
| }, |
| { |
| "epoch": 1.2247497725204732, |
| "grad_norm": 0.5789114485670152, |
| "learning_rate": 8.592172867662488e-06, |
| "loss": 0.0309, |
| "step": 2692 |
| }, |
| { |
| "epoch": 1.2252047315741583, |
| "grad_norm": 0.5362511549256743, |
| "learning_rate": 8.591178510400424e-06, |
| "loss": 0.0288, |
| "step": 2693 |
| }, |
| { |
| "epoch": 1.2256596906278434, |
| "grad_norm": 0.665176698679116, |
| "learning_rate": 8.590183859683469e-06, |
| "loss": 0.0381, |
| "step": 2694 |
| }, |
| { |
| "epoch": 1.2261146496815287, |
| "grad_norm": 0.5319510120853973, |
| "learning_rate": 8.589188915592903e-06, |
| "loss": 0.0359, |
| "step": 2695 |
| }, |
| { |
| "epoch": 1.2265696087352138, |
| "grad_norm": 0.4177494615666587, |
| "learning_rate": 8.588193678210026e-06, |
| "loss": 0.0194, |
| "step": 2696 |
| }, |
| { |
| "epoch": 1.2270245677888991, |
| "grad_norm": 0.34563423472616117, |
| "learning_rate": 8.587198147616166e-06, |
| "loss": 0.0188, |
| "step": 2697 |
| }, |
| { |
| "epoch": 1.2274795268425842, |
| "grad_norm": 0.5420023688259344, |
| "learning_rate": 8.586202323892675e-06, |
| "loss": 0.0322, |
| "step": 2698 |
| }, |
| { |
| "epoch": 1.2279344858962693, |
| "grad_norm": 0.5715046852040315, |
| "learning_rate": 8.585206207120925e-06, |
| "loss": 0.0248, |
| "step": 2699 |
| }, |
| { |
| "epoch": 1.2283894449499546, |
| "grad_norm": 0.6150293588585071, |
| "learning_rate": 8.584209797382313e-06, |
| "loss": 0.0349, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2288444040036397, |
| "grad_norm": 0.7538546206140824, |
| "learning_rate": 8.583213094758262e-06, |
| "loss": 0.0415, |
| "step": 2701 |
| }, |
| { |
| "epoch": 1.2292993630573248, |
| "grad_norm": 0.41258699232239693, |
| "learning_rate": 8.582216099330218e-06, |
| "loss": 0.0252, |
| "step": 2702 |
| }, |
| { |
| "epoch": 1.22975432211101, |
| "grad_norm": 0.5992053934366026, |
| "learning_rate": 8.581218811179655e-06, |
| "loss": 0.0231, |
| "step": 2703 |
| }, |
| { |
| "epoch": 1.2302092811646952, |
| "grad_norm": 0.4911038111295034, |
| "learning_rate": 8.58022123038806e-06, |
| "loss": 0.0367, |
| "step": 2704 |
| }, |
| { |
| "epoch": 1.2306642402183803, |
| "grad_norm": 0.5415583441174247, |
| "learning_rate": 8.579223357036956e-06, |
| "loss": 0.0356, |
| "step": 2705 |
| }, |
| { |
| "epoch": 1.2311191992720656, |
| "grad_norm": 0.648050207407017, |
| "learning_rate": 8.578225191207881e-06, |
| "loss": 0.0322, |
| "step": 2706 |
| }, |
| { |
| "epoch": 1.2315741583257507, |
| "grad_norm": 0.6515223387873779, |
| "learning_rate": 8.577226732982405e-06, |
| "loss": 0.0424, |
| "step": 2707 |
| }, |
| { |
| "epoch": 1.2320291173794358, |
| "grad_norm": 0.7662318426027166, |
| "learning_rate": 8.576227982442114e-06, |
| "loss": 0.037, |
| "step": 2708 |
| }, |
| { |
| "epoch": 1.232484076433121, |
| "grad_norm": 0.4709920734770032, |
| "learning_rate": 8.575228939668623e-06, |
| "loss": 0.0284, |
| "step": 2709 |
| }, |
| { |
| "epoch": 1.2329390354868062, |
| "grad_norm": 0.7144313144730997, |
| "learning_rate": 8.574229604743566e-06, |
| "loss": 0.0316, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.2333939945404913, |
| "grad_norm": 0.4992331855484428, |
| "learning_rate": 8.573229977748609e-06, |
| "loss": 0.0345, |
| "step": 2711 |
| }, |
| { |
| "epoch": 1.2338489535941766, |
| "grad_norm": 0.6112686451914704, |
| "learning_rate": 8.572230058765434e-06, |
| "loss": 0.0358, |
| "step": 2712 |
| }, |
| { |
| "epoch": 1.2343039126478617, |
| "grad_norm": 0.8262726736467544, |
| "learning_rate": 8.571229847875751e-06, |
| "loss": 0.0641, |
| "step": 2713 |
| }, |
| { |
| "epoch": 1.2347588717015467, |
| "grad_norm": 0.4953827805427677, |
| "learning_rate": 8.570229345161293e-06, |
| "loss": 0.0247, |
| "step": 2714 |
| }, |
| { |
| "epoch": 1.235213830755232, |
| "grad_norm": 0.3801656553630412, |
| "learning_rate": 8.569228550703815e-06, |
| "loss": 0.0249, |
| "step": 2715 |
| }, |
| { |
| "epoch": 1.2356687898089171, |
| "grad_norm": 0.49612613452863535, |
| "learning_rate": 8.568227464585099e-06, |
| "loss": 0.0277, |
| "step": 2716 |
| }, |
| { |
| "epoch": 1.2361237488626025, |
| "grad_norm": 0.4582666835548743, |
| "learning_rate": 8.567226086886948e-06, |
| "loss": 0.0262, |
| "step": 2717 |
| }, |
| { |
| "epoch": 1.2365787079162875, |
| "grad_norm": 0.6697552955443566, |
| "learning_rate": 8.566224417691191e-06, |
| "loss": 0.0338, |
| "step": 2718 |
| }, |
| { |
| "epoch": 1.2370336669699726, |
| "grad_norm": 0.8001154357445661, |
| "learning_rate": 8.565222457079679e-06, |
| "loss": 0.0685, |
| "step": 2719 |
| }, |
| { |
| "epoch": 1.237488626023658, |
| "grad_norm": 0.4454996360487464, |
| "learning_rate": 8.56422020513429e-06, |
| "loss": 0.0233, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.237943585077343, |
| "grad_norm": 0.42231887554095254, |
| "learning_rate": 8.56321766193692e-06, |
| "loss": 0.0247, |
| "step": 2721 |
| }, |
| { |
| "epoch": 1.2383985441310281, |
| "grad_norm": 0.49520892835841024, |
| "learning_rate": 8.562214827569495e-06, |
| "loss": 0.0198, |
| "step": 2722 |
| }, |
| { |
| "epoch": 1.2388535031847134, |
| "grad_norm": 0.3119762559086726, |
| "learning_rate": 8.56121170211396e-06, |
| "loss": 0.0183, |
| "step": 2723 |
| }, |
| { |
| "epoch": 1.2393084622383985, |
| "grad_norm": 0.48127588980662994, |
| "learning_rate": 8.560208285652287e-06, |
| "loss": 0.0348, |
| "step": 2724 |
| }, |
| { |
| "epoch": 1.2397634212920838, |
| "grad_norm": 0.975980592939099, |
| "learning_rate": 8.559204578266471e-06, |
| "loss": 0.0712, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.240218380345769, |
| "grad_norm": 0.4739910877413602, |
| "learning_rate": 8.55820058003853e-06, |
| "loss": 0.027, |
| "step": 2726 |
| }, |
| { |
| "epoch": 1.240673339399454, |
| "grad_norm": 0.5358172750361924, |
| "learning_rate": 8.557196291050506e-06, |
| "loss": 0.0403, |
| "step": 2727 |
| }, |
| { |
| "epoch": 1.2411282984531393, |
| "grad_norm": 0.49464890318884047, |
| "learning_rate": 8.556191711384466e-06, |
| "loss": 0.0336, |
| "step": 2728 |
| }, |
| { |
| "epoch": 1.2415832575068244, |
| "grad_norm": 0.4046597291390638, |
| "learning_rate": 8.555186841122498e-06, |
| "loss": 0.024, |
| "step": 2729 |
| }, |
| { |
| "epoch": 1.2420382165605095, |
| "grad_norm": 0.656706108193742, |
| "learning_rate": 8.554181680346717e-06, |
| "loss": 0.0348, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.2424931756141948, |
| "grad_norm": 0.49134341156698247, |
| "learning_rate": 8.553176229139262e-06, |
| "loss": 0.033, |
| "step": 2731 |
| }, |
| { |
| "epoch": 1.24294813466788, |
| "grad_norm": 0.3673616941332998, |
| "learning_rate": 8.552170487582287e-06, |
| "loss": 0.0233, |
| "step": 2732 |
| }, |
| { |
| "epoch": 1.243403093721565, |
| "grad_norm": 0.3845834813421107, |
| "learning_rate": 8.551164455757985e-06, |
| "loss": 0.021, |
| "step": 2733 |
| }, |
| { |
| "epoch": 1.2438580527752503, |
| "grad_norm": 0.4219248857316413, |
| "learning_rate": 8.550158133748559e-06, |
| "loss": 0.0232, |
| "step": 2734 |
| }, |
| { |
| "epoch": 1.2443130118289354, |
| "grad_norm": 0.5359384657995739, |
| "learning_rate": 8.549151521636244e-06, |
| "loss": 0.0426, |
| "step": 2735 |
| }, |
| { |
| "epoch": 1.2447679708826205, |
| "grad_norm": 0.6147117803498731, |
| "learning_rate": 8.548144619503291e-06, |
| "loss": 0.0372, |
| "step": 2736 |
| }, |
| { |
| "epoch": 1.2452229299363058, |
| "grad_norm": 0.7816013628144164, |
| "learning_rate": 8.547137427431986e-06, |
| "loss": 0.0509, |
| "step": 2737 |
| }, |
| { |
| "epoch": 1.2456778889899909, |
| "grad_norm": 0.5732293106945054, |
| "learning_rate": 8.546129945504629e-06, |
| "loss": 0.0404, |
| "step": 2738 |
| }, |
| { |
| "epoch": 1.246132848043676, |
| "grad_norm": 0.5878496377747829, |
| "learning_rate": 8.545122173803547e-06, |
| "loss": 0.0349, |
| "step": 2739 |
| }, |
| { |
| "epoch": 1.2465878070973613, |
| "grad_norm": 0.5178543900697522, |
| "learning_rate": 8.544114112411088e-06, |
| "loss": 0.0317, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.2470427661510464, |
| "grad_norm": 0.44475184485600816, |
| "learning_rate": 8.54310576140963e-06, |
| "loss": 0.0246, |
| "step": 2741 |
| }, |
| { |
| "epoch": 1.2474977252047315, |
| "grad_norm": 0.41811991583751146, |
| "learning_rate": 8.542097120881572e-06, |
| "loss": 0.0264, |
| "step": 2742 |
| }, |
| { |
| "epoch": 1.2479526842584168, |
| "grad_norm": 0.504603909447871, |
| "learning_rate": 8.541088190909333e-06, |
| "loss": 0.037, |
| "step": 2743 |
| }, |
| { |
| "epoch": 1.2484076433121019, |
| "grad_norm": 0.5546565546187008, |
| "learning_rate": 8.540078971575355e-06, |
| "loss": 0.0321, |
| "step": 2744 |
| }, |
| { |
| "epoch": 1.2488626023657872, |
| "grad_norm": 0.5988533107048205, |
| "learning_rate": 8.539069462962115e-06, |
| "loss": 0.0356, |
| "step": 2745 |
| }, |
| { |
| "epoch": 1.2493175614194723, |
| "grad_norm": 0.5355497681868633, |
| "learning_rate": 8.538059665152097e-06, |
| "loss": 0.0219, |
| "step": 2746 |
| }, |
| { |
| "epoch": 1.2497725204731573, |
| "grad_norm": 0.5560216189929246, |
| "learning_rate": 8.537049578227823e-06, |
| "loss": 0.0318, |
| "step": 2747 |
| }, |
| { |
| "epoch": 1.2502274795268427, |
| "grad_norm": 0.41791265535852423, |
| "learning_rate": 8.536039202271828e-06, |
| "loss": 0.0296, |
| "step": 2748 |
| }, |
| { |
| "epoch": 1.2506824385805277, |
| "grad_norm": 0.6230283621476296, |
| "learning_rate": 8.53502853736668e-06, |
| "loss": 0.0229, |
| "step": 2749 |
| }, |
| { |
| "epoch": 1.251137397634213, |
| "grad_norm": 0.5883015192363978, |
| "learning_rate": 8.534017583594965e-06, |
| "loss": 0.0454, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2515923566878981, |
| "grad_norm": 0.5657093936113446, |
| "learning_rate": 8.53300634103929e-06, |
| "loss": 0.0328, |
| "step": 2751 |
| }, |
| { |
| "epoch": 1.2520473157415832, |
| "grad_norm": 0.9286848475357391, |
| "learning_rate": 8.531994809782294e-06, |
| "loss": 0.0651, |
| "step": 2752 |
| }, |
| { |
| "epoch": 1.2525022747952685, |
| "grad_norm": 0.5306254596544426, |
| "learning_rate": 8.530982989906632e-06, |
| "loss": 0.0264, |
| "step": 2753 |
| }, |
| { |
| "epoch": 1.2529572338489536, |
| "grad_norm": 0.599793814100533, |
| "learning_rate": 8.529970881494985e-06, |
| "loss": 0.038, |
| "step": 2754 |
| }, |
| { |
| "epoch": 1.2534121929026387, |
| "grad_norm": 0.4592924108716034, |
| "learning_rate": 8.52895848463006e-06, |
| "loss": 0.0253, |
| "step": 2755 |
| }, |
| { |
| "epoch": 1.253867151956324, |
| "grad_norm": 0.5025180855718538, |
| "learning_rate": 8.527945799394584e-06, |
| "loss": 0.0269, |
| "step": 2756 |
| }, |
| { |
| "epoch": 1.2543221110100091, |
| "grad_norm": 0.3690223518853051, |
| "learning_rate": 8.526932825871308e-06, |
| "loss": 0.0214, |
| "step": 2757 |
| }, |
| { |
| "epoch": 1.2547770700636942, |
| "grad_norm": 0.38161446652737785, |
| "learning_rate": 8.52591956414301e-06, |
| "loss": 0.0166, |
| "step": 2758 |
| }, |
| { |
| "epoch": 1.2552320291173795, |
| "grad_norm": 0.611622699149414, |
| "learning_rate": 8.524906014292488e-06, |
| "loss": 0.0412, |
| "step": 2759 |
| }, |
| { |
| "epoch": 1.2556869881710646, |
| "grad_norm": 0.4022077081421061, |
| "learning_rate": 8.523892176402565e-06, |
| "loss": 0.0234, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.2561419472247497, |
| "grad_norm": 0.4085009912666225, |
| "learning_rate": 8.522878050556087e-06, |
| "loss": 0.0271, |
| "step": 2761 |
| }, |
| { |
| "epoch": 1.256596906278435, |
| "grad_norm": 0.591494783456256, |
| "learning_rate": 8.521863636835924e-06, |
| "loss": 0.0288, |
| "step": 2762 |
| }, |
| { |
| "epoch": 1.25705186533212, |
| "grad_norm": 0.4315940956441906, |
| "learning_rate": 8.520848935324968e-06, |
| "loss": 0.0257, |
| "step": 2763 |
| }, |
| { |
| "epoch": 1.2575068243858052, |
| "grad_norm": 0.4623767141710468, |
| "learning_rate": 8.519833946106139e-06, |
| "loss": 0.0293, |
| "step": 2764 |
| }, |
| { |
| "epoch": 1.2579617834394905, |
| "grad_norm": 0.5965051882391731, |
| "learning_rate": 8.518818669262373e-06, |
| "loss": 0.0367, |
| "step": 2765 |
| }, |
| { |
| "epoch": 1.2584167424931756, |
| "grad_norm": 0.5441954958905808, |
| "learning_rate": 8.517803104876638e-06, |
| "loss": 0.0314, |
| "step": 2766 |
| }, |
| { |
| "epoch": 1.2588717015468607, |
| "grad_norm": 0.5077782576820083, |
| "learning_rate": 8.51678725303192e-06, |
| "loss": 0.0261, |
| "step": 2767 |
| }, |
| { |
| "epoch": 1.259326660600546, |
| "grad_norm": 0.6376855259836618, |
| "learning_rate": 8.515771113811226e-06, |
| "loss": 0.0409, |
| "step": 2768 |
| }, |
| { |
| "epoch": 1.259781619654231, |
| "grad_norm": 6.915760462322178, |
| "learning_rate": 8.514754687297598e-06, |
| "loss": 0.1986, |
| "step": 2769 |
| }, |
| { |
| "epoch": 1.2602365787079162, |
| "grad_norm": 0.5889806379105973, |
| "learning_rate": 8.513737973574088e-06, |
| "loss": 0.0336, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.2606915377616015, |
| "grad_norm": 0.5275667357404193, |
| "learning_rate": 8.512720972723779e-06, |
| "loss": 0.0289, |
| "step": 2771 |
| }, |
| { |
| "epoch": 1.2611464968152866, |
| "grad_norm": 0.3147286633021264, |
| "learning_rate": 8.511703684829773e-06, |
| "loss": 0.0163, |
| "step": 2772 |
| }, |
| { |
| "epoch": 1.2616014558689717, |
| "grad_norm": 0.8013976464224811, |
| "learning_rate": 8.510686109975202e-06, |
| "loss": 0.0468, |
| "step": 2773 |
| }, |
| { |
| "epoch": 1.262056414922657, |
| "grad_norm": 0.4994061588441834, |
| "learning_rate": 8.509668248243217e-06, |
| "loss": 0.02, |
| "step": 2774 |
| }, |
| { |
| "epoch": 1.262511373976342, |
| "grad_norm": 0.5749302842677763, |
| "learning_rate": 8.508650099716991e-06, |
| "loss": 0.0362, |
| "step": 2775 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 555, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 18262287138816.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|