| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5050045495905369, |
| "eval_steps": 500, |
| "global_step": 1110, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00045495905368516835, |
| "grad_norm": 9.461428161462043, |
| "learning_rate": 1e-05, |
| "loss": 0.1263, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009099181073703367, |
| "grad_norm": 5.190780450250769, |
| "learning_rate": 9.99999979571129e-06, |
| "loss": 0.1723, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001364877161055505, |
| "grad_norm": 7.521926017130347, |
| "learning_rate": 9.999999182845177e-06, |
| "loss": 0.1327, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0018198362147406734, |
| "grad_norm": 2.5665810200307217, |
| "learning_rate": 9.99999816140171e-06, |
| "loss": 0.1095, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0022747952684258415, |
| "grad_norm": 2.738508706395883, |
| "learning_rate": 9.999996731380973e-06, |
| "loss": 0.1151, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00272975432211101, |
| "grad_norm": 2.67941899677245, |
| "learning_rate": 9.999994892783083e-06, |
| "loss": 0.0821, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0031847133757961785, |
| "grad_norm": 2.137586234420784, |
| "learning_rate": 9.99999264560819e-06, |
| "loss": 0.0729, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003639672429481347, |
| "grad_norm": 2.8221590420989164, |
| "learning_rate": 9.999989989856477e-06, |
| "loss": 0.0929, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004094631483166515, |
| "grad_norm": 1.6167314639784554, |
| "learning_rate": 9.999986925528164e-06, |
| "loss": 0.0466, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004549590536851683, |
| "grad_norm": 2.1773262431631313, |
| "learning_rate": 9.999983452623498e-06, |
| "loss": 0.0709, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005004549590536852, |
| "grad_norm": 7.6444390817806465, |
| "learning_rate": 9.999979571142765e-06, |
| "loss": 0.0809, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00545950864422202, |
| "grad_norm": 2.034523884241798, |
| "learning_rate": 9.999975281086278e-06, |
| "loss": 0.0839, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005914467697907188, |
| "grad_norm": 3.576108282005355, |
| "learning_rate": 9.999970582454392e-06, |
| "loss": 0.0728, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006369426751592357, |
| "grad_norm": 2.623641566468802, |
| "learning_rate": 9.999965475247491e-06, |
| "loss": 0.1052, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.006824385805277525, |
| "grad_norm": 2.1413574998269085, |
| "learning_rate": 9.99995995946599e-06, |
| "loss": 0.0885, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007279344858962694, |
| "grad_norm": 1.4859066724415246, |
| "learning_rate": 9.999954035110342e-06, |
| "loss": 0.0644, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0077343039126478615, |
| "grad_norm": 2.851793157608408, |
| "learning_rate": 9.999947702181027e-06, |
| "loss": 0.1057, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00818926296633303, |
| "grad_norm": 4.693829546662477, |
| "learning_rate": 9.999940960678568e-06, |
| "loss": 0.0867, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.008644222020018199, |
| "grad_norm": 2.2728033563417362, |
| "learning_rate": 9.999933810603513e-06, |
| "loss": 0.0789, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009099181073703366, |
| "grad_norm": 1.6705986173507794, |
| "learning_rate": 9.999926251956447e-06, |
| "loss": 0.0683, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009554140127388535, |
| "grad_norm": 2.187579869114393, |
| "learning_rate": 9.999918284737986e-06, |
| "loss": 0.0984, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010009099181073703, |
| "grad_norm": 2.328040268012338, |
| "learning_rate": 9.999909908948782e-06, |
| "loss": 0.0699, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.010464058234758872, |
| "grad_norm": 5.572389775693198, |
| "learning_rate": 9.999901124589519e-06, |
| "loss": 0.0912, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01091901728844404, |
| "grad_norm": 1.84796719674859, |
| "learning_rate": 9.999891931660916e-06, |
| "loss": 0.1015, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.011373976342129208, |
| "grad_norm": 1.7501762990792236, |
| "learning_rate": 9.999882330163725e-06, |
| "loss": 0.0909, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.011828935395814377, |
| "grad_norm": 0.9922115950592263, |
| "learning_rate": 9.999872320098729e-06, |
| "loss": 0.0656, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.012283894449499545, |
| "grad_norm": 1.5612370560987539, |
| "learning_rate": 9.999861901466746e-06, |
| "loss": 0.0974, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.012738853503184714, |
| "grad_norm": 1.4617271794930395, |
| "learning_rate": 9.999851074268625e-06, |
| "loss": 0.0853, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013193812556869881, |
| "grad_norm": 1.8127085104491556, |
| "learning_rate": 9.999839838505257e-06, |
| "loss": 0.1081, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01364877161055505, |
| "grad_norm": 1.4710105512612208, |
| "learning_rate": 9.999828194177555e-06, |
| "loss": 0.0868, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014103730664240218, |
| "grad_norm": 1.3474487189311888, |
| "learning_rate": 9.999816141286472e-06, |
| "loss": 0.0817, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.014558689717925387, |
| "grad_norm": 1.0967596652549403, |
| "learning_rate": 9.99980367983299e-06, |
| "loss": 0.0637, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015013648771610554, |
| "grad_norm": 3.179425671823194, |
| "learning_rate": 9.999790809818134e-06, |
| "loss": 0.069, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.015468607825295723, |
| "grad_norm": 4.482257681577152, |
| "learning_rate": 9.999777531242951e-06, |
| "loss": 0.0915, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01592356687898089, |
| "grad_norm": 3.953299040475791, |
| "learning_rate": 9.999763844108528e-06, |
| "loss": 0.0562, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01637852593266606, |
| "grad_norm": 1.1127201050382067, |
| "learning_rate": 9.999749748415982e-06, |
| "loss": 0.0556, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01683348498635123, |
| "grad_norm": 79.45756094624792, |
| "learning_rate": 9.999735244166464e-06, |
| "loss": 0.1223, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.017288444040036398, |
| "grad_norm": 2777.9092912017113, |
| "learning_rate": 9.99972033136116e-06, |
| "loss": 0.3211, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.017743403093721567, |
| "grad_norm": 2.5204693177238466, |
| "learning_rate": 9.999705010001291e-06, |
| "loss": 0.0723, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.018198362147406732, |
| "grad_norm": 2.2975907071135655, |
| "learning_rate": 9.999689280088105e-06, |
| "loss": 0.0696, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0186533212010919, |
| "grad_norm": 2.998434349074003, |
| "learning_rate": 9.99967314162289e-06, |
| "loss": 0.083, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01910828025477707, |
| "grad_norm": 3.882239448575704, |
| "learning_rate": 9.999656594606966e-06, |
| "loss": 0.1015, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.019563239308462238, |
| "grad_norm": 3.5286596480512493, |
| "learning_rate": 9.999639639041681e-06, |
| "loss": 0.0817, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.020018198362147407, |
| "grad_norm": 1.6933989447443707, |
| "learning_rate": 9.999622274928424e-06, |
| "loss": 0.1003, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.020473157415832575, |
| "grad_norm": 1.2483160046323276, |
| "learning_rate": 9.999604502268614e-06, |
| "loss": 0.0952, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.020928116469517744, |
| "grad_norm": 0.9417906124383243, |
| "learning_rate": 9.9995863210637e-06, |
| "loss": 0.0731, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.021383075523202913, |
| "grad_norm": 2.8195414757816897, |
| "learning_rate": 9.99956773131517e-06, |
| "loss": 0.1845, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02183803457688808, |
| "grad_norm": 2.74390379471345, |
| "learning_rate": 9.999548733024545e-06, |
| "loss": 0.1826, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.022292993630573247, |
| "grad_norm": 1.5138494619527987, |
| "learning_rate": 9.999529326193373e-06, |
| "loss": 0.0857, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.022747952684258416, |
| "grad_norm": 1.215379974181271, |
| "learning_rate": 9.999509510823242e-06, |
| "loss": 0.0686, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023202911737943584, |
| "grad_norm": 1.292187967807859, |
| "learning_rate": 9.999489286915773e-06, |
| "loss": 0.0707, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.023657870791628753, |
| "grad_norm": 1.7888013203563982, |
| "learning_rate": 9.999468654472614e-06, |
| "loss": 0.0682, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.024112829845313922, |
| "grad_norm": 0.8979425621703144, |
| "learning_rate": 9.999447613495457e-06, |
| "loss": 0.0508, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02456778889899909, |
| "grad_norm": 1.9123835444775663, |
| "learning_rate": 9.99942616398602e-06, |
| "loss": 0.0689, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02502274795268426, |
| "grad_norm": 0.9393581994096443, |
| "learning_rate": 9.99940430594605e-06, |
| "loss": 0.0496, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.025477707006369428, |
| "grad_norm": 1.0234476513644222, |
| "learning_rate": 9.999382039377339e-06, |
| "loss": 0.0601, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.025932666060054597, |
| "grad_norm": 0.9291387208138827, |
| "learning_rate": 9.999359364281704e-06, |
| "loss": 0.0377, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.026387625113739762, |
| "grad_norm": 1.8209170803663992, |
| "learning_rate": 9.999336280660999e-06, |
| "loss": 0.1144, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02684258416742493, |
| "grad_norm": 1.1214625046464874, |
| "learning_rate": 9.99931278851711e-06, |
| "loss": 0.0622, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0272975432211101, |
| "grad_norm": 1.0331723997917317, |
| "learning_rate": 9.999288887851956e-06, |
| "loss": 0.0667, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.027752502274795268, |
| "grad_norm": 1.0412381501406744, |
| "learning_rate": 9.999264578667493e-06, |
| "loss": 0.0566, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.028207461328480437, |
| "grad_norm": 1.4510603110658047, |
| "learning_rate": 9.999239860965703e-06, |
| "loss": 0.0845, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.028662420382165606, |
| "grad_norm": 1.301162540669183, |
| "learning_rate": 9.999214734748609e-06, |
| "loss": 0.0759, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.029117379435850774, |
| "grad_norm": 0.9977688847603402, |
| "learning_rate": 9.999189200018263e-06, |
| "loss": 0.0528, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.029572338489535943, |
| "grad_norm": 1.2894688842348854, |
| "learning_rate": 9.99916325677675e-06, |
| "loss": 0.0899, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03002729754322111, |
| "grad_norm": 1.4627871680702638, |
| "learning_rate": 9.999136905026194e-06, |
| "loss": 0.1456, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.030482256596906277, |
| "grad_norm": 1.2304385710214434, |
| "learning_rate": 9.999110144768745e-06, |
| "loss": 0.079, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.030937215650591446, |
| "grad_norm": 1.085016380732753, |
| "learning_rate": 9.99908297600659e-06, |
| "loss": 0.0696, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03139217470427662, |
| "grad_norm": 0.989450558642297, |
| "learning_rate": 9.99905539874195e-06, |
| "loss": 0.069, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03184713375796178, |
| "grad_norm": 1.0510491151133208, |
| "learning_rate": 9.99902741297708e-06, |
| "loss": 0.0555, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03230209281164695, |
| "grad_norm": 0.8938033562648371, |
| "learning_rate": 9.998999018714264e-06, |
| "loss": 0.0783, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03275705186533212, |
| "grad_norm": 2.902512108322722, |
| "learning_rate": 9.998970215955824e-06, |
| "loss": 0.0702, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.033212010919017286, |
| "grad_norm": 0.7661831894133686, |
| "learning_rate": 9.998941004704113e-06, |
| "loss": 0.0519, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03366696997270246, |
| "grad_norm": 1.1047249497744047, |
| "learning_rate": 9.998911384961518e-06, |
| "loss": 0.0773, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.034121929026387623, |
| "grad_norm": 0.7750047299312716, |
| "learning_rate": 9.998881356730458e-06, |
| "loss": 0.0598, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.034576888080072796, |
| "grad_norm": 0.9815801555720315, |
| "learning_rate": 9.99885092001339e-06, |
| "loss": 0.0661, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03503184713375796, |
| "grad_norm": 1.3090963451351905, |
| "learning_rate": 9.998820074812799e-06, |
| "loss": 0.0713, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03548680618744313, |
| "grad_norm": 1.1489338732270693, |
| "learning_rate": 9.998788821131207e-06, |
| "loss": 0.0946, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0359417652411283, |
| "grad_norm": 0.9040381990998293, |
| "learning_rate": 9.998757158971164e-06, |
| "loss": 0.067, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.036396724294813464, |
| "grad_norm": 1.1019926198229115, |
| "learning_rate": 9.998725088335263e-06, |
| "loss": 0.0874, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.036851683348498636, |
| "grad_norm": 0.5779852750462403, |
| "learning_rate": 9.99869260922612e-06, |
| "loss": 0.0492, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0373066424021838, |
| "grad_norm": 1.2769852710418472, |
| "learning_rate": 9.998659721646393e-06, |
| "loss": 0.0781, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03776160145586897, |
| "grad_norm": 0.9020624084974485, |
| "learning_rate": 9.998626425598766e-06, |
| "loss": 0.0734, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03821656050955414, |
| "grad_norm": 0.9626764462141776, |
| "learning_rate": 9.99859272108596e-06, |
| "loss": 0.0719, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03867151956323931, |
| "grad_norm": 0.9435885887029873, |
| "learning_rate": 9.998558608110733e-06, |
| "loss": 0.0835, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.039126478616924476, |
| "grad_norm": 1.0578725525123687, |
| "learning_rate": 9.998524086675867e-06, |
| "loss": 0.0746, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03958143767060965, |
| "grad_norm": 1.0366588534208079, |
| "learning_rate": 9.998489156784188e-06, |
| "loss": 0.0933, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.040036396724294813, |
| "grad_norm": 1.0595948680723846, |
| "learning_rate": 9.998453818438547e-06, |
| "loss": 0.0846, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04049135577797998, |
| "grad_norm": 0.8807515753016749, |
| "learning_rate": 9.998418071641833e-06, |
| "loss": 0.0649, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04094631483166515, |
| "grad_norm": 0.9034225145874141, |
| "learning_rate": 9.998381916396967e-06, |
| "loss": 0.0621, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.041401273885350316, |
| "grad_norm": 0.6732889821553815, |
| "learning_rate": 9.998345352706901e-06, |
| "loss": 0.0367, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04185623293903549, |
| "grad_norm": 0.7136967603743426, |
| "learning_rate": 9.998308380574628e-06, |
| "loss": 0.0569, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.042311191992720654, |
| "grad_norm": 1.1459385364035048, |
| "learning_rate": 9.998271000003166e-06, |
| "loss": 0.1184, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.042766151046405826, |
| "grad_norm": 0.8224906129097734, |
| "learning_rate": 9.998233210995569e-06, |
| "loss": 0.0682, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04322111010009099, |
| "grad_norm": 1.5182946932236698, |
| "learning_rate": 9.998195013554926e-06, |
| "loss": 0.0875, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04367606915377616, |
| "grad_norm": 0.9355855711018981, |
| "learning_rate": 9.998156407684359e-06, |
| "loss": 0.0939, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04413102820746133, |
| "grad_norm": 0.7329840867165283, |
| "learning_rate": 9.998117393387022e-06, |
| "loss": 0.0466, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.044585987261146494, |
| "grad_norm": 0.8701001036058451, |
| "learning_rate": 9.9980779706661e-06, |
| "loss": 0.0729, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.045040946314831666, |
| "grad_norm": 1.0218896298663185, |
| "learning_rate": 9.99803813952482e-06, |
| "loss": 0.0828, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04549590536851683, |
| "grad_norm": 0.9044995357273884, |
| "learning_rate": 9.997997899966433e-06, |
| "loss": 0.0709, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.045950864422202004, |
| "grad_norm": 0.9877796099816964, |
| "learning_rate": 9.99795725199423e-06, |
| "loss": 0.0903, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04640582347588717, |
| "grad_norm": 1.0061501994463906, |
| "learning_rate": 9.99791619561153e-06, |
| "loss": 0.0831, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04686078252957234, |
| "grad_norm": 0.8789173954818107, |
| "learning_rate": 9.997874730821689e-06, |
| "loss": 0.0714, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.047315741583257506, |
| "grad_norm": 15.480920098194954, |
| "learning_rate": 9.997832857628093e-06, |
| "loss": 0.2603, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04777070063694268, |
| "grad_norm": 1.3806761301603454, |
| "learning_rate": 9.99779057603417e-06, |
| "loss": 0.1227, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.048225659690627844, |
| "grad_norm": 0.8462176607269959, |
| "learning_rate": 9.997747886043368e-06, |
| "loss": 0.0605, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04868061874431301, |
| "grad_norm": 0.7467169847716549, |
| "learning_rate": 9.997704787659179e-06, |
| "loss": 0.0618, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04913557779799818, |
| "grad_norm": 1.5653334818977065, |
| "learning_rate": 9.997661280885125e-06, |
| "loss": 0.1253, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.049590536851683346, |
| "grad_norm": 0.871706038604149, |
| "learning_rate": 9.99761736572476e-06, |
| "loss": 0.0716, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05004549590536852, |
| "grad_norm": 1.1398296008355844, |
| "learning_rate": 9.997573042181672e-06, |
| "loss": 0.0698, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.050500454959053684, |
| "grad_norm": 1.0487992691419916, |
| "learning_rate": 9.997528310259485e-06, |
| "loss": 0.1102, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.050955414012738856, |
| "grad_norm": 0.9112684449646818, |
| "learning_rate": 9.997483169961852e-06, |
| "loss": 0.1032, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05141037306642402, |
| "grad_norm": 0.9418790141923585, |
| "learning_rate": 9.997437621292463e-06, |
| "loss": 0.0771, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.051865332120109194, |
| "grad_norm": 0.7796140692842074, |
| "learning_rate": 9.99739166425504e-06, |
| "loss": 0.0627, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05232029117379436, |
| "grad_norm": 1.5434421216734795, |
| "learning_rate": 9.997345298853339e-06, |
| "loss": 0.1495, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.052775250227479524, |
| "grad_norm": 0.8898179660551836, |
| "learning_rate": 9.997298525091148e-06, |
| "loss": 0.0735, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.053230209281164696, |
| "grad_norm": 0.8585916871524272, |
| "learning_rate": 9.997251342972288e-06, |
| "loss": 0.068, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05368516833484986, |
| "grad_norm": 0.812806800238708, |
| "learning_rate": 9.997203752500616e-06, |
| "loss": 0.0689, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.054140127388535034, |
| "grad_norm": 0.9677722064277628, |
| "learning_rate": 9.997155753680021e-06, |
| "loss": 0.0795, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0545950864422202, |
| "grad_norm": 1.621934591654054, |
| "learning_rate": 9.997107346514425e-06, |
| "loss": 0.0707, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05505004549590537, |
| "grad_norm": 0.6750452750311531, |
| "learning_rate": 9.997058531007782e-06, |
| "loss": 0.0588, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.055505004549590536, |
| "grad_norm": 0.9583870506818666, |
| "learning_rate": 9.997009307164083e-06, |
| "loss": 0.0859, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05595996360327571, |
| "grad_norm": 1.247483970027119, |
| "learning_rate": 9.99695967498735e-06, |
| "loss": 0.0952, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.056414922656960874, |
| "grad_norm": 0.7937903902273558, |
| "learning_rate": 9.996909634481639e-06, |
| "loss": 0.0614, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.05686988171064604, |
| "grad_norm": 4.855426128828546, |
| "learning_rate": 9.996859185651038e-06, |
| "loss": 0.1629, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05732484076433121, |
| "grad_norm": 1.0499970639607177, |
| "learning_rate": 9.99680832849967e-06, |
| "loss": 0.1031, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.05777979981801638, |
| "grad_norm": 0.8730447821488512, |
| "learning_rate": 9.99675706303169e-06, |
| "loss": 0.0606, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05823475887170155, |
| "grad_norm": 1.2779985416162813, |
| "learning_rate": 9.99670538925129e-06, |
| "loss": 0.074, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.058689717925386714, |
| "grad_norm": 0.8606157718419157, |
| "learning_rate": 9.996653307162687e-06, |
| "loss": 0.0703, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.059144676979071886, |
| "grad_norm": 0.8920761218762643, |
| "learning_rate": 9.996600816770144e-06, |
| "loss": 0.0818, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05959963603275705, |
| "grad_norm": 1.1603462045917847, |
| "learning_rate": 9.996547918077944e-06, |
| "loss": 0.1148, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06005459508644222, |
| "grad_norm": 0.9108713801214797, |
| "learning_rate": 9.996494611090414e-06, |
| "loss": 0.0884, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06050955414012739, |
| "grad_norm": 0.6523725468628359, |
| "learning_rate": 9.996440895811907e-06, |
| "loss": 0.0535, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.060964513193812554, |
| "grad_norm": 0.8812777694752004, |
| "learning_rate": 9.996386772246816e-06, |
| "loss": 0.087, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.061419472247497726, |
| "grad_norm": 1.0622191207422995, |
| "learning_rate": 9.99633224039956e-06, |
| "loss": 0.0982, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06187443130118289, |
| "grad_norm": 3.7961077321923025, |
| "learning_rate": 9.996277300274596e-06, |
| "loss": 0.1526, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.062329390354868064, |
| "grad_norm": 0.9444433559435487, |
| "learning_rate": 9.996221951876415e-06, |
| "loss": 0.0996, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06278434940855324, |
| "grad_norm": 1.444871481552235, |
| "learning_rate": 9.996166195209539e-06, |
| "loss": 0.1075, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0632393084622384, |
| "grad_norm": 0.7446446480732116, |
| "learning_rate": 9.996110030278522e-06, |
| "loss": 0.0561, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 0.8913010543094952, |
| "learning_rate": 9.996053457087958e-06, |
| "loss": 0.0715, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06414922656960874, |
| "grad_norm": 0.7815821404043856, |
| "learning_rate": 9.995996475642466e-06, |
| "loss": 0.0796, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0646041856232939, |
| "grad_norm": 0.74337588448595, |
| "learning_rate": 9.995939085946704e-06, |
| "loss": 0.0661, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06505914467697907, |
| "grad_norm": 0.9974255688753435, |
| "learning_rate": 9.995881288005363e-06, |
| "loss": 0.0869, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06551410373066424, |
| "grad_norm": 1.2260290141946268, |
| "learning_rate": 9.995823081823162e-06, |
| "loss": 0.0766, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06596906278434941, |
| "grad_norm": 0.9751795993584637, |
| "learning_rate": 9.99576446740486e-06, |
| "loss": 0.091, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06642402183803457, |
| "grad_norm": 1.6175476325168967, |
| "learning_rate": 9.995705444755249e-06, |
| "loss": 0.1208, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.06687898089171974, |
| "grad_norm": 0.7580083688127299, |
| "learning_rate": 9.995646013879147e-06, |
| "loss": 0.0622, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.06733393994540492, |
| "grad_norm": 1.0194887039793072, |
| "learning_rate": 9.995586174781413e-06, |
| "loss": 0.0753, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.06778889899909009, |
| "grad_norm": 0.9065646408503975, |
| "learning_rate": 9.995525927466936e-06, |
| "loss": 0.0848, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.06824385805277525, |
| "grad_norm": 0.8871078738477127, |
| "learning_rate": 9.995465271940641e-06, |
| "loss": 0.0607, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06869881710646042, |
| "grad_norm": 1.1486707652049646, |
| "learning_rate": 9.995404208207485e-06, |
| "loss": 0.0809, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.06915377616014559, |
| "grad_norm": 1.1473150526096232, |
| "learning_rate": 9.995342736272453e-06, |
| "loss": 0.1035, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06960873521383075, |
| "grad_norm": 1.3025683052462544, |
| "learning_rate": 9.995280856140572e-06, |
| "loss": 0.1197, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07006369426751592, |
| "grad_norm": 0.8069596755970996, |
| "learning_rate": 9.9952185678169e-06, |
| "loss": 0.0526, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0705186533212011, |
| "grad_norm": 0.8153700064848134, |
| "learning_rate": 9.995155871306524e-06, |
| "loss": 0.0613, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07097361237488627, |
| "grad_norm": 0.7319023745966868, |
| "learning_rate": 9.995092766614567e-06, |
| "loss": 0.0512, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 1.0146656175738817, |
| "learning_rate": 9.995029253746186e-06, |
| "loss": 0.0846, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0718835304822566, |
| "grad_norm": 0.8015254985373994, |
| "learning_rate": 9.994965332706574e-06, |
| "loss": 0.0619, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07233848953594177, |
| "grad_norm": 1.0630207312416284, |
| "learning_rate": 9.994901003500952e-06, |
| "loss": 0.0796, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07279344858962693, |
| "grad_norm": 0.9431304991088505, |
| "learning_rate": 9.994836266134575e-06, |
| "loss": 0.0743, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0732484076433121, |
| "grad_norm": 1.023738915097686, |
| "learning_rate": 9.994771120612737e-06, |
| "loss": 0.0888, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07370336669699727, |
| "grad_norm": 0.9272637744585672, |
| "learning_rate": 9.994705566940757e-06, |
| "loss": 0.084, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07415832575068244, |
| "grad_norm": 1.122378326253592, |
| "learning_rate": 9.994639605123994e-06, |
| "loss": 0.0961, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0746132848043676, |
| "grad_norm": 0.753531768411978, |
| "learning_rate": 9.994573235167839e-06, |
| "loss": 0.0736, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07506824385805277, |
| "grad_norm": 0.9314766958597749, |
| "learning_rate": 9.994506457077715e-06, |
| "loss": 0.0838, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07552320291173795, |
| "grad_norm": 0.996008388557059, |
| "learning_rate": 9.994439270859077e-06, |
| "loss": 0.1076, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.07597816196542312, |
| "grad_norm": 0.9199332464612126, |
| "learning_rate": 9.994371676517418e-06, |
| "loss": 0.0724, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.07643312101910828, |
| "grad_norm": 0.8652292283168678, |
| "learning_rate": 9.994303674058259e-06, |
| "loss": 0.0628, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.07688808007279345, |
| "grad_norm": 0.8176262426438138, |
| "learning_rate": 9.994235263487158e-06, |
| "loss": 0.0743, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.07734303912647862, |
| "grad_norm": 0.8147855247941459, |
| "learning_rate": 9.994166444809705e-06, |
| "loss": 0.0559, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07779799818016378, |
| "grad_norm": 0.7853019575635352, |
| "learning_rate": 9.994097218031524e-06, |
| "loss": 0.0681, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.07825295723384895, |
| "grad_norm": 0.8445610480134321, |
| "learning_rate": 9.994027583158272e-06, |
| "loss": 0.0785, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.07870791628753412, |
| "grad_norm": 0.8555498692388026, |
| "learning_rate": 9.993957540195638e-06, |
| "loss": 0.077, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0791628753412193, |
| "grad_norm": 0.8281270493499452, |
| "learning_rate": 9.993887089149346e-06, |
| "loss": 0.0848, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.07961783439490445, |
| "grad_norm": 0.7180425978661062, |
| "learning_rate": 9.993816230025152e-06, |
| "loss": 0.0588, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08007279344858963, |
| "grad_norm": 0.9287545326980071, |
| "learning_rate": 9.99374496282885e-06, |
| "loss": 0.0874, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0805277525022748, |
| "grad_norm": 1.5950603980195528, |
| "learning_rate": 9.993673287566261e-06, |
| "loss": 0.1301, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08098271155595996, |
| "grad_norm": 0.505966633973175, |
| "learning_rate": 9.99360120424324e-06, |
| "loss": 0.0459, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08143767060964513, |
| "grad_norm": 0.6170796905443107, |
| "learning_rate": 9.993528712865681e-06, |
| "loss": 0.0666, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0818926296633303, |
| "grad_norm": 0.8965600572228928, |
| "learning_rate": 9.993455813439507e-06, |
| "loss": 0.0648, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08234758871701547, |
| "grad_norm": 0.7555745664692847, |
| "learning_rate": 9.993382505970673e-06, |
| "loss": 0.0479, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08280254777070063, |
| "grad_norm": 0.7885826993774436, |
| "learning_rate": 9.99330879046517e-06, |
| "loss": 0.0605, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0832575068243858, |
| "grad_norm": 0.6970911126559147, |
| "learning_rate": 9.993234666929024e-06, |
| "loss": 0.0545, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08371246587807098, |
| "grad_norm": 0.8281240642020996, |
| "learning_rate": 9.99316013536829e-06, |
| "loss": 0.0651, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08416742493175614, |
| "grad_norm": 0.8497823551734951, |
| "learning_rate": 9.993085195789057e-06, |
| "loss": 0.098, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08462238398544131, |
| "grad_norm": 0.8425278224044996, |
| "learning_rate": 9.993009848197452e-06, |
| "loss": 0.0861, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.08507734303912648, |
| "grad_norm": 0.729342450692031, |
| "learning_rate": 9.992934092599629e-06, |
| "loss": 0.0651, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.08553230209281165, |
| "grad_norm": 0.8810253378927329, |
| "learning_rate": 9.99285792900178e-06, |
| "loss": 0.0995, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.08598726114649681, |
| "grad_norm": 1.0402457083445067, |
| "learning_rate": 9.992781357410131e-06, |
| "loss": 0.1061, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.08644222020018198, |
| "grad_norm": 0.7397036090930822, |
| "learning_rate": 9.992704377830934e-06, |
| "loss": 0.0571, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08689717925386715, |
| "grad_norm": 1.4783630598693296, |
| "learning_rate": 9.992626990270484e-06, |
| "loss": 0.1154, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.08735213830755233, |
| "grad_norm": 1.1100322283473036, |
| "learning_rate": 9.992549194735101e-06, |
| "loss": 0.1179, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.08780709736123748, |
| "grad_norm": 0.5797984556503705, |
| "learning_rate": 9.992470991231144e-06, |
| "loss": 0.0466, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.08826205641492266, |
| "grad_norm": 1.059908713900853, |
| "learning_rate": 9.992392379765005e-06, |
| "loss": 0.0994, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.08871701546860783, |
| "grad_norm": 1.1187885391430794, |
| "learning_rate": 9.992313360343104e-06, |
| "loss": 0.0986, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08917197452229299, |
| "grad_norm": 0.7509441330173129, |
| "learning_rate": 9.992233932971901e-06, |
| "loss": 0.0634, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.08962693357597816, |
| "grad_norm": 0.9426276516690344, |
| "learning_rate": 9.992154097657888e-06, |
| "loss": 0.0857, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09008189262966333, |
| "grad_norm": 0.8754039034503873, |
| "learning_rate": 9.992073854407585e-06, |
| "loss": 0.0881, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0905368516833485, |
| "grad_norm": 2.8697219156120712, |
| "learning_rate": 9.99199320322755e-06, |
| "loss": 0.0851, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09099181073703366, |
| "grad_norm": 0.7429242681646778, |
| "learning_rate": 9.991912144124375e-06, |
| "loss": 0.0729, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09144676979071883, |
| "grad_norm": 1.0552979449251756, |
| "learning_rate": 9.991830677104682e-06, |
| "loss": 0.1066, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09190172884440401, |
| "grad_norm": 0.8812651371324355, |
| "learning_rate": 9.99174880217513e-06, |
| "loss": 0.0732, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09235668789808917, |
| "grad_norm": 1.0755107845413352, |
| "learning_rate": 9.991666519342407e-06, |
| "loss": 0.0977, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09281164695177434, |
| "grad_norm": 0.8925063431256136, |
| "learning_rate": 9.99158382861324e-06, |
| "loss": 0.0904, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09326660600545951, |
| "grad_norm": 0.8190206986922173, |
| "learning_rate": 9.991500729994384e-06, |
| "loss": 0.0729, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09372156505914468, |
| "grad_norm": 0.6635798147425112, |
| "learning_rate": 9.991417223492629e-06, |
| "loss": 0.0631, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09417652411282984, |
| "grad_norm": 1.0314655306023923, |
| "learning_rate": 9.991333309114798e-06, |
| "loss": 0.0852, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.09463148316651501, |
| "grad_norm": 0.8533496857694978, |
| "learning_rate": 9.991248986867753e-06, |
| "loss": 0.0868, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.09508644222020018, |
| "grad_norm": 1.039085255997433, |
| "learning_rate": 9.991164256758378e-06, |
| "loss": 0.095, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.09554140127388536, |
| "grad_norm": 1.1484522866350177, |
| "learning_rate": 9.9910791187936e-06, |
| "loss": 0.1333, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09599636032757052, |
| "grad_norm": 0.8277820800102422, |
| "learning_rate": 9.99099357298038e-06, |
| "loss": 0.0664, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.09645131938125569, |
| "grad_norm": 0.821796111319934, |
| "learning_rate": 9.9909076193257e-06, |
| "loss": 0.083, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.09690627843494086, |
| "grad_norm": 0.9448800546720313, |
| "learning_rate": 9.990821257836589e-06, |
| "loss": 0.0873, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.09736123748862602, |
| "grad_norm": 0.9002810379340489, |
| "learning_rate": 9.990734488520103e-06, |
| "loss": 0.099, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.09781619654231119, |
| "grad_norm": 0.6145149717344348, |
| "learning_rate": 9.990647311383334e-06, |
| "loss": 0.0425, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.09827115559599636, |
| "grad_norm": 1.1377497370761045, |
| "learning_rate": 9.990559726433404e-06, |
| "loss": 0.0903, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.09872611464968153, |
| "grad_norm": 0.8401357673155365, |
| "learning_rate": 9.99047173367747e-06, |
| "loss": 0.0812, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.09918107370336669, |
| "grad_norm": 0.6977882365614015, |
| "learning_rate": 9.990383333122722e-06, |
| "loss": 0.0613, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.09963603275705187, |
| "grad_norm": 0.6751056796776193, |
| "learning_rate": 9.990294524776384e-06, |
| "loss": 0.0636, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10009099181073704, |
| "grad_norm": 0.7973250315161167, |
| "learning_rate": 9.990205308645716e-06, |
| "loss": 0.0655, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1005459508644222, |
| "grad_norm": 0.6494979859380491, |
| "learning_rate": 9.990115684738005e-06, |
| "loss": 0.0461, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10100090991810737, |
| "grad_norm": 0.7863907355652456, |
| "learning_rate": 9.990025653060574e-06, |
| "loss": 0.0881, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10145586897179254, |
| "grad_norm": 1.2756737972223395, |
| "learning_rate": 9.98993521362078e-06, |
| "loss": 0.1102, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10191082802547771, |
| "grad_norm": 1.1992554133605928, |
| "learning_rate": 9.989844366426018e-06, |
| "loss": 0.1147, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10236578707916287, |
| "grad_norm": 0.5034605400337953, |
| "learning_rate": 9.989753111483707e-06, |
| "loss": 0.0462, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10282074613284804, |
| "grad_norm": 0.9881921480518578, |
| "learning_rate": 9.989661448801305e-06, |
| "loss": 0.0848, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10327570518653321, |
| "grad_norm": 0.7581777568438945, |
| "learning_rate": 9.989569378386303e-06, |
| "loss": 0.079, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.10373066424021839, |
| "grad_norm": 0.6464731162067388, |
| "learning_rate": 9.989476900246223e-06, |
| "loss": 0.0617, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.10418562329390355, |
| "grad_norm": 0.8780639185859085, |
| "learning_rate": 9.989384014388624e-06, |
| "loss": 0.086, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.10464058234758872, |
| "grad_norm": 0.6623808171307163, |
| "learning_rate": 9.989290720821095e-06, |
| "loss": 0.0694, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10509554140127389, |
| "grad_norm": 0.721054554263859, |
| "learning_rate": 9.98919701955126e-06, |
| "loss": 0.0735, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.10555050045495905, |
| "grad_norm": 0.7868134014829404, |
| "learning_rate": 9.989102910586776e-06, |
| "loss": 0.0546, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.10600545950864422, |
| "grad_norm": 0.9137158371163484, |
| "learning_rate": 9.989008393935331e-06, |
| "loss": 0.0771, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.10646041856232939, |
| "grad_norm": 0.8326009579593463, |
| "learning_rate": 9.98891346960465e-06, |
| "loss": 0.0667, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.10691537761601456, |
| "grad_norm": 0.6462724580348628, |
| "learning_rate": 9.988818137602494e-06, |
| "loss": 0.0717, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.10737033666969972, |
| "grad_norm": 0.7513725247558808, |
| "learning_rate": 9.988722397936646e-06, |
| "loss": 0.0733, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1078252957233849, |
| "grad_norm": 1.094509848236789, |
| "learning_rate": 9.988626250614932e-06, |
| "loss": 0.1009, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.10828025477707007, |
| "grad_norm": 0.8200579138639758, |
| "learning_rate": 9.98852969564521e-06, |
| "loss": 0.0844, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.10873521383075523, |
| "grad_norm": 0.7417763562196316, |
| "learning_rate": 9.988432733035369e-06, |
| "loss": 0.0611, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1091901728844404, |
| "grad_norm": 0.8476475869820355, |
| "learning_rate": 9.988335362793333e-06, |
| "loss": 0.0863, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10964513193812557, |
| "grad_norm": 0.9998642783878469, |
| "learning_rate": 9.988237584927058e-06, |
| "loss": 0.0909, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11010009099181074, |
| "grad_norm": 1.1689324698997519, |
| "learning_rate": 9.988139399444534e-06, |
| "loss": 0.124, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1105550500454959, |
| "grad_norm": 0.790901332269412, |
| "learning_rate": 9.988040806353786e-06, |
| "loss": 0.0855, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11101000909918107, |
| "grad_norm": 0.8931785977847209, |
| "learning_rate": 9.987941805662869e-06, |
| "loss": 0.1023, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11146496815286625, |
| "grad_norm": 0.7352781929773609, |
| "learning_rate": 9.98784239737987e-06, |
| "loss": 0.0563, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11191992720655142, |
| "grad_norm": 0.7169092611535308, |
| "learning_rate": 9.987742581512919e-06, |
| "loss": 0.0683, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11237488626023658, |
| "grad_norm": 0.6767560569792272, |
| "learning_rate": 9.987642358070167e-06, |
| "loss": 0.0669, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11282984531392175, |
| "grad_norm": 0.8442319805699996, |
| "learning_rate": 9.987541727059805e-06, |
| "loss": 0.0768, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11328480436760692, |
| "grad_norm": 0.7700876798522618, |
| "learning_rate": 9.987440688490058e-06, |
| "loss": 0.0643, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.11373976342129208, |
| "grad_norm": 0.7286087978317647, |
| "learning_rate": 9.98733924236918e-06, |
| "loss": 0.0698, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11419472247497725, |
| "grad_norm": 0.7917355018437868, |
| "learning_rate": 9.98723738870546e-06, |
| "loss": 0.0791, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.11464968152866242, |
| "grad_norm": 1.0469499693242315, |
| "learning_rate": 9.987135127507226e-06, |
| "loss": 0.0761, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1151046405823476, |
| "grad_norm": 0.8361714930383379, |
| "learning_rate": 9.987032458782828e-06, |
| "loss": 0.0789, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.11555959963603275, |
| "grad_norm": 0.5902853873046482, |
| "learning_rate": 9.986929382540662e-06, |
| "loss": 0.0479, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.11601455868971793, |
| "grad_norm": 0.7349436304465384, |
| "learning_rate": 9.986825898789145e-06, |
| "loss": 0.0668, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1164695177434031, |
| "grad_norm": 0.7657107039148755, |
| "learning_rate": 9.986722007536737e-06, |
| "loss": 0.0617, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.11692447679708826, |
| "grad_norm": 0.6450631027744769, |
| "learning_rate": 9.986617708791926e-06, |
| "loss": 0.0679, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.11737943585077343, |
| "grad_norm": 0.6292930010016882, |
| "learning_rate": 9.986513002563236e-06, |
| "loss": 0.0482, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1178343949044586, |
| "grad_norm": 0.8758541343517451, |
| "learning_rate": 9.986407888859221e-06, |
| "loss": 0.0994, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.11828935395814377, |
| "grad_norm": 0.6537445862223847, |
| "learning_rate": 9.986302367688473e-06, |
| "loss": 0.07, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11874431301182893, |
| "grad_norm": 0.8029660816844667, |
| "learning_rate": 9.986196439059613e-06, |
| "loss": 0.0623, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1191992720655141, |
| "grad_norm": 0.7339528606524214, |
| "learning_rate": 9.986090102981297e-06, |
| "loss": 0.0791, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.11965423111919928, |
| "grad_norm": 0.7934112522002073, |
| "learning_rate": 9.985983359462215e-06, |
| "loss": 0.0672, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12010919017288443, |
| "grad_norm": 1.0186962263060808, |
| "learning_rate": 9.98587620851109e-06, |
| "loss": 0.1213, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1205641492265696, |
| "grad_norm": 0.6769843647605545, |
| "learning_rate": 9.985768650136679e-06, |
| "loss": 0.0685, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12101910828025478, |
| "grad_norm": 0.7543020935976431, |
| "learning_rate": 9.985660684347765e-06, |
| "loss": 0.0861, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12147406733393995, |
| "grad_norm": 0.9552124731299731, |
| "learning_rate": 9.985552311153178e-06, |
| "loss": 0.0922, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12192902638762511, |
| "grad_norm": 0.7436699167226903, |
| "learning_rate": 9.985443530561769e-06, |
| "loss": 0.0885, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.12238398544131028, |
| "grad_norm": 1.329058937551934, |
| "learning_rate": 9.98533434258243e-06, |
| "loss": 0.1115, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.12283894449499545, |
| "grad_norm": 0.6835909813818813, |
| "learning_rate": 9.985224747224083e-06, |
| "loss": 0.0586, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12329390354868063, |
| "grad_norm": 1.0733107060854794, |
| "learning_rate": 9.98511474449568e-06, |
| "loss": 0.0811, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.12374886260236578, |
| "grad_norm": 0.5916007278667166, |
| "learning_rate": 9.985004334406215e-06, |
| "loss": 0.0696, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.12420382165605096, |
| "grad_norm": 0.9149357508392912, |
| "learning_rate": 9.984893516964707e-06, |
| "loss": 0.0704, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.12465878070973613, |
| "grad_norm": 1.1634742377762608, |
| "learning_rate": 9.984782292180212e-06, |
| "loss": 0.1178, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1251137397634213, |
| "grad_norm": 0.603957454908005, |
| "learning_rate": 9.98467066006182e-06, |
| "loss": 0.0585, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12556869881710647, |
| "grad_norm": 0.7735087790025026, |
| "learning_rate": 9.984558620618651e-06, |
| "loss": 0.0953, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.12602365787079162, |
| "grad_norm": 1.2570182633873541, |
| "learning_rate": 9.984446173859863e-06, |
| "loss": 0.1353, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1264786169244768, |
| "grad_norm": 0.7275895818672663, |
| "learning_rate": 9.984333319794642e-06, |
| "loss": 0.0774, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.12693357597816196, |
| "grad_norm": 0.6395006056363333, |
| "learning_rate": 9.984220058432212e-06, |
| "loss": 0.0591, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 0.6563921850032347, |
| "learning_rate": 9.984106389781828e-06, |
| "loss": 0.0573, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1278434940855323, |
| "grad_norm": 0.9399157526953884, |
| "learning_rate": 9.983992313852776e-06, |
| "loss": 0.0793, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.12829845313921748, |
| "grad_norm": 0.93528061821534, |
| "learning_rate": 9.983877830654381e-06, |
| "loss": 0.0807, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.12875341219290265, |
| "grad_norm": 0.7192448233352142, |
| "learning_rate": 9.983762940195996e-06, |
| "loss": 0.0773, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1292083712465878, |
| "grad_norm": 0.7097381072031733, |
| "learning_rate": 9.98364764248701e-06, |
| "loss": 0.0698, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.12966333030027297, |
| "grad_norm": 1.1635566012920768, |
| "learning_rate": 9.983531937536844e-06, |
| "loss": 0.0893, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13011828935395814, |
| "grad_norm": 0.8456555685011555, |
| "learning_rate": 9.983415825354954e-06, |
| "loss": 0.0628, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1305732484076433, |
| "grad_norm": 0.7151838393189083, |
| "learning_rate": 9.983299305950828e-06, |
| "loss": 0.0557, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13102820746132848, |
| "grad_norm": 0.7095193783870621, |
| "learning_rate": 9.983182379333989e-06, |
| "loss": 0.0604, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13148316651501366, |
| "grad_norm": 0.8581434444337498, |
| "learning_rate": 9.983065045513986e-06, |
| "loss": 0.0781, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13193812556869883, |
| "grad_norm": 0.5600994934804626, |
| "learning_rate": 9.982947304500414e-06, |
| "loss": 0.0498, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13239308462238397, |
| "grad_norm": 0.7355720212694087, |
| "learning_rate": 9.98282915630289e-06, |
| "loss": 0.0692, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.13284804367606914, |
| "grad_norm": 1.6846985851500909, |
| "learning_rate": 9.98271060093107e-06, |
| "loss": 0.1687, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.13330300272975432, |
| "grad_norm": 0.7959406174268434, |
| "learning_rate": 9.98259163839464e-06, |
| "loss": 0.0718, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1337579617834395, |
| "grad_norm": 0.6005858848115938, |
| "learning_rate": 9.982472268703323e-06, |
| "loss": 0.0465, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.13421292083712466, |
| "grad_norm": 0.7865103977061746, |
| "learning_rate": 9.982352491866874e-06, |
| "loss": 0.071, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13466787989080983, |
| "grad_norm": 0.7167219429964851, |
| "learning_rate": 9.982232307895077e-06, |
| "loss": 0.0658, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.135122838944495, |
| "grad_norm": 1.206398567596641, |
| "learning_rate": 9.982111716797758e-06, |
| "loss": 0.101, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.13557779799818018, |
| "grad_norm": 1.0085912508470862, |
| "learning_rate": 9.981990718584768e-06, |
| "loss": 0.0959, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.13603275705186532, |
| "grad_norm": 0.8594135430057543, |
| "learning_rate": 9.981869313265995e-06, |
| "loss": 0.0912, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1364877161055505, |
| "grad_norm": 0.9903339586980618, |
| "learning_rate": 9.981747500851357e-06, |
| "loss": 0.0692, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13694267515923567, |
| "grad_norm": 0.7623380548666351, |
| "learning_rate": 9.981625281350812e-06, |
| "loss": 0.0699, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.13739763421292084, |
| "grad_norm": 0.6267143484055344, |
| "learning_rate": 9.981502654774349e-06, |
| "loss": 0.0499, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.137852593266606, |
| "grad_norm": 0.8234150836820757, |
| "learning_rate": 9.98137962113198e-06, |
| "loss": 0.0788, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.13830755232029118, |
| "grad_norm": 0.8158733102806115, |
| "learning_rate": 9.98125618043377e-06, |
| "loss": 0.089, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.13876251137397635, |
| "grad_norm": 0.6372656549463032, |
| "learning_rate": 9.981132332689796e-06, |
| "loss": 0.0517, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1392174704276615, |
| "grad_norm": 0.7713863813548327, |
| "learning_rate": 9.981008077910184e-06, |
| "loss": 0.0769, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.13967242948134667, |
| "grad_norm": 0.8883775702857831, |
| "learning_rate": 9.980883416105084e-06, |
| "loss": 0.0828, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14012738853503184, |
| "grad_norm": 0.6490936355626988, |
| "learning_rate": 9.980758347284687e-06, |
| "loss": 0.0618, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14058234758871702, |
| "grad_norm": 0.8359554084586713, |
| "learning_rate": 9.980632871459209e-06, |
| "loss": 0.0714, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1410373066424022, |
| "grad_norm": 0.7373523328454649, |
| "learning_rate": 9.980506988638906e-06, |
| "loss": 0.0836, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14149226569608736, |
| "grad_norm": 0.6644370731485183, |
| "learning_rate": 9.980380698834064e-06, |
| "loss": 0.0777, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.14194722474977253, |
| "grad_norm": 0.870883965477211, |
| "learning_rate": 9.980254002055003e-06, |
| "loss": 0.0847, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.14240218380345768, |
| "grad_norm": 0.6021065409531002, |
| "learning_rate": 9.980126898312074e-06, |
| "loss": 0.0583, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.8705461588189498, |
| "learning_rate": 9.979999387615665e-06, |
| "loss": 0.0895, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.14331210191082802, |
| "grad_norm": 0.9639410731114018, |
| "learning_rate": 9.979871469976197e-06, |
| "loss": 0.0901, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1437670609645132, |
| "grad_norm": 0.7554126383153169, |
| "learning_rate": 9.97974314540412e-06, |
| "loss": 0.0699, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.14422202001819837, |
| "grad_norm": 1.1039648440512544, |
| "learning_rate": 9.979614413909922e-06, |
| "loss": 0.1013, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.14467697907188354, |
| "grad_norm": 0.5258831871743486, |
| "learning_rate": 9.979485275504121e-06, |
| "loss": 0.0544, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1451319381255687, |
| "grad_norm": 1.3025897394440575, |
| "learning_rate": 9.979355730197271e-06, |
| "loss": 0.1067, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.14558689717925385, |
| "grad_norm": 0.5206132423310033, |
| "learning_rate": 9.979225777999956e-06, |
| "loss": 0.0497, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14604185623293903, |
| "grad_norm": 0.7202189397663867, |
| "learning_rate": 9.9790954189228e-06, |
| "loss": 0.0807, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1464968152866242, |
| "grad_norm": 0.5738667169449175, |
| "learning_rate": 9.97896465297645e-06, |
| "loss": 0.0614, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.14695177434030937, |
| "grad_norm": 0.7972440737628133, |
| "learning_rate": 9.978833480171592e-06, |
| "loss": 0.0906, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.14740673339399454, |
| "grad_norm": 0.7697423454053598, |
| "learning_rate": 9.978701900518947e-06, |
| "loss": 0.0632, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.14786169244767972, |
| "grad_norm": 0.8259885564233931, |
| "learning_rate": 9.978569914029267e-06, |
| "loss": 0.0944, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1483166515013649, |
| "grad_norm": 0.8450006655868962, |
| "learning_rate": 9.978437520713335e-06, |
| "loss": 0.0862, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.14877161055505003, |
| "grad_norm": 0.7746078278616594, |
| "learning_rate": 9.978304720581973e-06, |
| "loss": 0.088, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1492265696087352, |
| "grad_norm": 0.9977734940815816, |
| "learning_rate": 9.97817151364603e-06, |
| "loss": 0.1036, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.14968152866242038, |
| "grad_norm": 0.7800752301510507, |
| "learning_rate": 9.978037899916393e-06, |
| "loss": 0.0778, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15013648771610555, |
| "grad_norm": 0.7521153273438224, |
| "learning_rate": 9.97790387940398e-06, |
| "loss": 0.0532, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15059144676979072, |
| "grad_norm": 0.8046420256419254, |
| "learning_rate": 9.977769452119741e-06, |
| "loss": 0.0708, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1510464058234759, |
| "grad_norm": 0.9071770528791517, |
| "learning_rate": 9.97763461807466e-06, |
| "loss": 0.1006, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.15150136487716107, |
| "grad_norm": 0.8824570234268595, |
| "learning_rate": 9.97749937727976e-06, |
| "loss": 0.0855, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.15195632393084624, |
| "grad_norm": 0.8286075823730068, |
| "learning_rate": 9.977363729746088e-06, |
| "loss": 0.077, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.15241128298453138, |
| "grad_norm": 0.6791233851472963, |
| "learning_rate": 9.977227675484729e-06, |
| "loss": 0.0698, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15286624203821655, |
| "grad_norm": 0.9813875260679181, |
| "learning_rate": 9.977091214506803e-06, |
| "loss": 0.0838, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.15332120109190173, |
| "grad_norm": 0.9986284190120469, |
| "learning_rate": 9.976954346823456e-06, |
| "loss": 0.0789, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1537761601455869, |
| "grad_norm": 0.6456071732838817, |
| "learning_rate": 9.976817072445878e-06, |
| "loss": 0.0566, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.15423111919927207, |
| "grad_norm": 0.7707362352402762, |
| "learning_rate": 9.976679391385283e-06, |
| "loss": 0.0677, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.15468607825295724, |
| "grad_norm": 0.5804713825378958, |
| "learning_rate": 9.976541303652923e-06, |
| "loss": 0.0547, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15514103730664242, |
| "grad_norm": 0.7705377953828665, |
| "learning_rate": 9.976402809260083e-06, |
| "loss": 0.0673, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.15559599636032756, |
| "grad_norm": 0.651002355082985, |
| "learning_rate": 9.976263908218076e-06, |
| "loss": 0.066, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.15605095541401273, |
| "grad_norm": 1.0075230687249708, |
| "learning_rate": 9.976124600538257e-06, |
| "loss": 0.1151, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1565059144676979, |
| "grad_norm": 0.7110146200064966, |
| "learning_rate": 9.975984886232006e-06, |
| "loss": 0.0693, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.15696087352138308, |
| "grad_norm": 0.782615076662302, |
| "learning_rate": 9.975844765310743e-06, |
| "loss": 0.071, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.15741583257506825, |
| "grad_norm": 1.091513822496144, |
| "learning_rate": 9.975704237785915e-06, |
| "loss": 0.1277, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.15787079162875342, |
| "grad_norm": 0.8244942271322709, |
| "learning_rate": 9.975563303669006e-06, |
| "loss": 0.092, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1583257506824386, |
| "grad_norm": 1.0997264747524325, |
| "learning_rate": 9.975421962971536e-06, |
| "loss": 0.102, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.15878070973612374, |
| "grad_norm": 1.0471722358260585, |
| "learning_rate": 9.97528021570505e-06, |
| "loss": 0.1112, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1592356687898089, |
| "grad_norm": 0.6366013160292697, |
| "learning_rate": 9.975138061881135e-06, |
| "loss": 0.0629, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15969062784349408, |
| "grad_norm": 0.7145502784859615, |
| "learning_rate": 9.974995501511404e-06, |
| "loss": 0.0567, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16014558689717925, |
| "grad_norm": 1.0825694007542435, |
| "learning_rate": 9.974852534607506e-06, |
| "loss": 0.0897, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.16060054595086443, |
| "grad_norm": 0.8874195306329471, |
| "learning_rate": 9.974709161181126e-06, |
| "loss": 0.0879, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1610555050045496, |
| "grad_norm": 0.8193025449594961, |
| "learning_rate": 9.974565381243982e-06, |
| "loss": 0.0969, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.16151046405823477, |
| "grad_norm": 0.76528422131405, |
| "learning_rate": 9.974421194807815e-06, |
| "loss": 0.0786, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16196542311191992, |
| "grad_norm": 0.8836543328533641, |
| "learning_rate": 9.974276601884416e-06, |
| "loss": 0.0744, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1624203821656051, |
| "grad_norm": 0.7482952108426273, |
| "learning_rate": 9.974131602485596e-06, |
| "loss": 0.0772, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.16287534121929026, |
| "grad_norm": 0.9122723647083647, |
| "learning_rate": 9.973986196623203e-06, |
| "loss": 0.0851, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.16333030027297543, |
| "grad_norm": 0.8373653902978805, |
| "learning_rate": 9.973840384309121e-06, |
| "loss": 0.0865, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1637852593266606, |
| "grad_norm": 0.6360069343077157, |
| "learning_rate": 9.973694165555264e-06, |
| "loss": 0.0618, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16424021838034578, |
| "grad_norm": 0.7967304456611868, |
| "learning_rate": 9.973547540373582e-06, |
| "loss": 0.0865, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.16469517743403095, |
| "grad_norm": 1.1699452577832765, |
| "learning_rate": 9.973400508776054e-06, |
| "loss": 0.1144, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1651501364877161, |
| "grad_norm": 0.6282867599706373, |
| "learning_rate": 9.973253070774698e-06, |
| "loss": 0.0633, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.16560509554140126, |
| "grad_norm": 0.79942272506218, |
| "learning_rate": 9.973105226381559e-06, |
| "loss": 0.069, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.16606005459508644, |
| "grad_norm": 0.9348674828410355, |
| "learning_rate": 9.972956975608719e-06, |
| "loss": 0.1019, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1665150136487716, |
| "grad_norm": 1.0942665884463076, |
| "learning_rate": 9.972808318468292e-06, |
| "loss": 0.0859, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.16696997270245678, |
| "grad_norm": 0.6283579225277517, |
| "learning_rate": 9.972659254972426e-06, |
| "loss": 0.0589, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.16742493175614195, |
| "grad_norm": 1.0989677054167046, |
| "learning_rate": 9.972509785133304e-06, |
| "loss": 0.1081, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.16787989080982713, |
| "grad_norm": 0.7310198219540203, |
| "learning_rate": 9.972359908963137e-06, |
| "loss": 0.0675, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.16833484986351227, |
| "grad_norm": 0.757671629194488, |
| "learning_rate": 9.972209626474172e-06, |
| "loss": 0.0734, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16878980891719744, |
| "grad_norm": 0.7966175159886519, |
| "learning_rate": 9.972058937678692e-06, |
| "loss": 0.075, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.16924476797088261, |
| "grad_norm": 0.9805514159267839, |
| "learning_rate": 9.97190784258901e-06, |
| "loss": 0.1071, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1696997270245678, |
| "grad_norm": 0.7000612574442994, |
| "learning_rate": 9.971756341217471e-06, |
| "loss": 0.0526, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.17015468607825296, |
| "grad_norm": 0.7917466702374949, |
| "learning_rate": 9.971604433576456e-06, |
| "loss": 0.0698, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.17060964513193813, |
| "grad_norm": 0.8412692631182211, |
| "learning_rate": 9.97145211967838e-06, |
| "loss": 0.0783, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1710646041856233, |
| "grad_norm": 0.5615038895232536, |
| "learning_rate": 9.971299399535685e-06, |
| "loss": 0.053, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.17151956323930848, |
| "grad_norm": 0.6849745369298482, |
| "learning_rate": 9.971146273160854e-06, |
| "loss": 0.0774, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.17197452229299362, |
| "grad_norm": 0.6466596777060115, |
| "learning_rate": 9.9709927405664e-06, |
| "loss": 0.0606, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1724294813466788, |
| "grad_norm": 0.7169884074840761, |
| "learning_rate": 9.970838801764866e-06, |
| "loss": 0.0839, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.17288444040036396, |
| "grad_norm": 0.9393396355410675, |
| "learning_rate": 9.970684456768836e-06, |
| "loss": 0.1132, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17333939945404914, |
| "grad_norm": 12.197098173453568, |
| "learning_rate": 9.970529705590918e-06, |
| "loss": 0.4858, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1737943585077343, |
| "grad_norm": 0.7355841274771772, |
| "learning_rate": 9.97037454824376e-06, |
| "loss": 0.0714, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.17424931756141948, |
| "grad_norm": 1.050385265783733, |
| "learning_rate": 9.97021898474004e-06, |
| "loss": 0.1024, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.17470427661510465, |
| "grad_norm": 0.8612087678995594, |
| "learning_rate": 9.970063015092469e-06, |
| "loss": 0.085, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1751592356687898, |
| "grad_norm": 1.3886472100476919, |
| "learning_rate": 9.969906639313793e-06, |
| "loss": 0.1212, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.17561419472247497, |
| "grad_norm": 0.8238176964814595, |
| "learning_rate": 9.96974985741679e-06, |
| "loss": 0.0721, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.17606915377616014, |
| "grad_norm": 0.8718897735731601, |
| "learning_rate": 9.969592669414272e-06, |
| "loss": 0.0959, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.17652411282984531, |
| "grad_norm": 6.796752422837202, |
| "learning_rate": 9.969435075319083e-06, |
| "loss": 0.115, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1769790718835305, |
| "grad_norm": 0.58176536820322, |
| "learning_rate": 9.969277075144104e-06, |
| "loss": 0.0459, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.17743403093721566, |
| "grad_norm": 0.7267253435076165, |
| "learning_rate": 9.969118668902242e-06, |
| "loss": 0.07, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17788898999090083, |
| "grad_norm": 0.7682389367523258, |
| "learning_rate": 9.968959856606442e-06, |
| "loss": 0.0542, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.17834394904458598, |
| "grad_norm": 0.7873348185837048, |
| "learning_rate": 9.968800638269682e-06, |
| "loss": 0.0598, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.17879890809827115, |
| "grad_norm": 1.287713292390112, |
| "learning_rate": 9.968641013904974e-06, |
| "loss": 0.1442, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.17925386715195632, |
| "grad_norm": 1.085650814952146, |
| "learning_rate": 9.968480983525359e-06, |
| "loss": 0.0926, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1797088262056415, |
| "grad_norm": 0.6716676596759695, |
| "learning_rate": 9.968320547143918e-06, |
| "loss": 0.0767, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.18016378525932666, |
| "grad_norm": 0.8467396807693714, |
| "learning_rate": 9.968159704773757e-06, |
| "loss": 0.0977, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.18061874431301184, |
| "grad_norm": 0.6438855833782786, |
| "learning_rate": 9.967998456428021e-06, |
| "loss": 0.0586, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.181073703366697, |
| "grad_norm": 0.7254140122399564, |
| "learning_rate": 9.967836802119886e-06, |
| "loss": 0.06, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.18152866242038215, |
| "grad_norm": 0.87517545358881, |
| "learning_rate": 9.967674741862563e-06, |
| "loss": 0.1016, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.18198362147406733, |
| "grad_norm": 1.0624206936058178, |
| "learning_rate": 9.967512275669294e-06, |
| "loss": 0.1296, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1824385805277525, |
| "grad_norm": 1.0284720738314184, |
| "learning_rate": 9.967349403553353e-06, |
| "loss": 0.0862, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.18289353958143767, |
| "grad_norm": 0.8342932737384292, |
| "learning_rate": 9.967186125528053e-06, |
| "loss": 0.0873, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.18334849863512284, |
| "grad_norm": 1.543095569701571, |
| "learning_rate": 9.967022441606734e-06, |
| "loss": 0.1209, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.18380345768880801, |
| "grad_norm": 0.70731586616612, |
| "learning_rate": 9.966858351802773e-06, |
| "loss": 0.0726, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1842584167424932, |
| "grad_norm": 0.6660531988680356, |
| "learning_rate": 9.966693856129576e-06, |
| "loss": 0.0562, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.18471337579617833, |
| "grad_norm": 0.8503640969928286, |
| "learning_rate": 9.966528954600587e-06, |
| "loss": 0.0838, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1851683348498635, |
| "grad_norm": 0.6021534124846688, |
| "learning_rate": 9.96636364722928e-06, |
| "loss": 0.0673, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.18562329390354868, |
| "grad_norm": 0.8782816795828058, |
| "learning_rate": 9.966197934029165e-06, |
| "loss": 0.0845, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.18607825295723385, |
| "grad_norm": 0.9030990654346936, |
| "learning_rate": 9.966031815013781e-06, |
| "loss": 0.0839, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.18653321201091902, |
| "grad_norm": 0.8567507299712805, |
| "learning_rate": 9.965865290196703e-06, |
| "loss": 0.0935, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1869881710646042, |
| "grad_norm": 0.8099856489670021, |
| "learning_rate": 9.96569835959154e-06, |
| "loss": 0.0747, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.18744313011828936, |
| "grad_norm": 0.8938878675243255, |
| "learning_rate": 9.965531023211931e-06, |
| "loss": 0.0854, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.18789808917197454, |
| "grad_norm": 0.735313860104022, |
| "learning_rate": 9.965363281071551e-06, |
| "loss": 0.0865, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.18835304822565968, |
| "grad_norm": 0.5495229598132649, |
| "learning_rate": 9.965195133184108e-06, |
| "loss": 0.0403, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.18880800727934485, |
| "grad_norm": 1.0700416713113117, |
| "learning_rate": 9.965026579563342e-06, |
| "loss": 0.1086, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.18926296633303002, |
| "grad_norm": 0.7118653717355078, |
| "learning_rate": 9.964857620223024e-06, |
| "loss": 0.0691, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1897179253867152, |
| "grad_norm": 0.6871481686027417, |
| "learning_rate": 9.964688255176963e-06, |
| "loss": 0.0667, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.19017288444040037, |
| "grad_norm": 0.9848841869658392, |
| "learning_rate": 9.964518484438998e-06, |
| "loss": 0.0813, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.19062784349408554, |
| "grad_norm": 0.6311750922074311, |
| "learning_rate": 9.964348308023001e-06, |
| "loss": 0.0592, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 0.7813168734245782, |
| "learning_rate": 9.964177725942881e-06, |
| "loss": 0.0826, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19153776160145586, |
| "grad_norm": 0.8572110622332836, |
| "learning_rate": 9.964006738212574e-06, |
| "loss": 0.0853, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.19199272065514103, |
| "grad_norm": 0.5304433423014596, |
| "learning_rate": 9.963835344846056e-06, |
| "loss": 0.048, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1924476797088262, |
| "grad_norm": 0.7598521228122416, |
| "learning_rate": 9.963663545857328e-06, |
| "loss": 0.0757, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.19290263876251137, |
| "grad_norm": 1.1542546683489703, |
| "learning_rate": 9.963491341260432e-06, |
| "loss": 0.104, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.19335759781619655, |
| "grad_norm": 0.7766563582253432, |
| "learning_rate": 9.963318731069437e-06, |
| "loss": 0.0952, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.19381255686988172, |
| "grad_norm": 1.1319194983916299, |
| "learning_rate": 9.96314571529845e-06, |
| "loss": 0.1005, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1942675159235669, |
| "grad_norm": 0.7230559135257585, |
| "learning_rate": 9.962972293961608e-06, |
| "loss": 0.0647, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.19472247497725204, |
| "grad_norm": 0.9863934566369588, |
| "learning_rate": 9.962798467073083e-06, |
| "loss": 0.0763, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1951774340309372, |
| "grad_norm": 0.8259784410005646, |
| "learning_rate": 9.96262423464708e-06, |
| "loss": 0.087, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.19563239308462238, |
| "grad_norm": 0.7987139095182185, |
| "learning_rate": 9.962449596697834e-06, |
| "loss": 0.0671, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19608735213830755, |
| "grad_norm": 1.130208173229934, |
| "learning_rate": 9.962274553239619e-06, |
| "loss": 0.119, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.19654231119199272, |
| "grad_norm": 0.7399696243677417, |
| "learning_rate": 9.962099104286735e-06, |
| "loss": 0.064, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1969972702456779, |
| "grad_norm": 1.156015767405528, |
| "learning_rate": 9.961923249853523e-06, |
| "loss": 0.1102, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.19745222929936307, |
| "grad_norm": 0.972422739757894, |
| "learning_rate": 9.961746989954349e-06, |
| "loss": 0.1093, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1979071883530482, |
| "grad_norm": 0.7766700420403171, |
| "learning_rate": 9.96157032460362e-06, |
| "loss": 0.0655, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.19836214740673339, |
| "grad_norm": 0.7460679115751414, |
| "learning_rate": 9.961393253815767e-06, |
| "loss": 0.0751, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.19881710646041856, |
| "grad_norm": 1.0684214450487566, |
| "learning_rate": 9.961215777605266e-06, |
| "loss": 0.0789, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.19927206551410373, |
| "grad_norm": 0.7683994291392229, |
| "learning_rate": 9.961037895986615e-06, |
| "loss": 0.0849, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1997270245677889, |
| "grad_norm": 0.7270368453251704, |
| "learning_rate": 9.960859608974352e-06, |
| "loss": 0.0779, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.20018198362147407, |
| "grad_norm": 0.701460207303568, |
| "learning_rate": 9.960680916583042e-06, |
| "loss": 0.0639, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20063694267515925, |
| "grad_norm": 0.6784619280926262, |
| "learning_rate": 9.960501818827292e-06, |
| "loss": 0.077, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2010919017288444, |
| "grad_norm": 0.8064075868568972, |
| "learning_rate": 9.960322315721735e-06, |
| "loss": 0.0827, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.20154686078252956, |
| "grad_norm": 0.9155026735417204, |
| "learning_rate": 9.960142407281039e-06, |
| "loss": 0.0841, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.20200181983621474, |
| "grad_norm": 0.6167749294869733, |
| "learning_rate": 9.959962093519904e-06, |
| "loss": 0.054, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2024567788898999, |
| "grad_norm": 0.8127781985331358, |
| "learning_rate": 9.959781374453066e-06, |
| "loss": 0.0751, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.20291173794358508, |
| "grad_norm": 0.98306444688532, |
| "learning_rate": 9.959600250095294e-06, |
| "loss": 0.075, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.20336669699727025, |
| "grad_norm": 0.7982130269360888, |
| "learning_rate": 9.959418720461384e-06, |
| "loss": 0.0834, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.20382165605095542, |
| "grad_norm": 0.7862225023823932, |
| "learning_rate": 9.959236785566175e-06, |
| "loss": 0.0704, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.20427661510464057, |
| "grad_norm": 0.562107514296544, |
| "learning_rate": 9.959054445424532e-06, |
| "loss": 0.0644, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.20473157415832574, |
| "grad_norm": 0.6089607791855781, |
| "learning_rate": 9.958871700051353e-06, |
| "loss": 0.0512, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2051865332120109, |
| "grad_norm": 0.6962095067981563, |
| "learning_rate": 9.958688549461573e-06, |
| "loss": 0.0712, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.20564149226569609, |
| "grad_norm": 1.155217046291275, |
| "learning_rate": 9.958504993670158e-06, |
| "loss": 0.1049, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.20609645131938126, |
| "grad_norm": 1.0913314226134752, |
| "learning_rate": 9.958321032692107e-06, |
| "loss": 0.1226, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.20655141037306643, |
| "grad_norm": 22.735025633907238, |
| "learning_rate": 9.958136666542455e-06, |
| "loss": 0.8419, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2070063694267516, |
| "grad_norm": 1.184019553325164, |
| "learning_rate": 9.957951895236262e-06, |
| "loss": 0.1113, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.20746132848043677, |
| "grad_norm": 0.7664792046331882, |
| "learning_rate": 9.957766718788632e-06, |
| "loss": 0.104, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.20791628753412192, |
| "grad_norm": 0.8672883026786035, |
| "learning_rate": 9.957581137214695e-06, |
| "loss": 0.074, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2083712465878071, |
| "grad_norm": 0.8772220264781722, |
| "learning_rate": 9.957395150529615e-06, |
| "loss": 0.0986, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.20882620564149226, |
| "grad_norm": 0.7016331971826193, |
| "learning_rate": 9.95720875874859e-06, |
| "loss": 0.0752, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.20928116469517744, |
| "grad_norm": 0.6308822051977305, |
| "learning_rate": 9.957021961886855e-06, |
| "loss": 0.0608, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2097361237488626, |
| "grad_norm": 0.9803601042372939, |
| "learning_rate": 9.956834759959669e-06, |
| "loss": 0.0908, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.21019108280254778, |
| "grad_norm": 0.7674462109758159, |
| "learning_rate": 9.95664715298233e-06, |
| "loss": 0.074, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.21064604185623295, |
| "grad_norm": 0.7450186566335193, |
| "learning_rate": 9.95645914097017e-06, |
| "loss": 0.0817, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2111010009099181, |
| "grad_norm": 0.7225723661612439, |
| "learning_rate": 9.956270723938553e-06, |
| "loss": 0.0849, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.21155595996360327, |
| "grad_norm": 0.7190355211871646, |
| "learning_rate": 9.956081901902875e-06, |
| "loss": 0.0748, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.21201091901728844, |
| "grad_norm": 1.210684562087392, |
| "learning_rate": 9.955892674878565e-06, |
| "loss": 0.1272, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2124658780709736, |
| "grad_norm": 0.834170476650907, |
| "learning_rate": 9.955703042881087e-06, |
| "loss": 0.0992, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.21292083712465878, |
| "grad_norm": 0.874478173291907, |
| "learning_rate": 9.955513005925934e-06, |
| "loss": 0.0858, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.21337579617834396, |
| "grad_norm": 0.5510320150423565, |
| "learning_rate": 9.95532256402864e-06, |
| "loss": 0.0574, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.21383075523202913, |
| "grad_norm": 0.5657171871822584, |
| "learning_rate": 9.955131717204762e-06, |
| "loss": 0.0671, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.7564664653864259, |
| "learning_rate": 9.954940465469898e-06, |
| "loss": 0.085, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.21474067333939945, |
| "grad_norm": 0.7594501005901694, |
| "learning_rate": 9.954748808839675e-06, |
| "loss": 0.0733, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.21519563239308462, |
| "grad_norm": 0.6748092428366178, |
| "learning_rate": 9.954556747329754e-06, |
| "loss": 0.0707, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2156505914467698, |
| "grad_norm": 1.715089789819449, |
| "learning_rate": 9.954364280955832e-06, |
| "loss": 0.1045, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.21610555050045496, |
| "grad_norm": 0.6668751648778155, |
| "learning_rate": 9.954171409733634e-06, |
| "loss": 0.0573, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.21656050955414013, |
| "grad_norm": 0.5963716475430643, |
| "learning_rate": 9.95397813367892e-06, |
| "loss": 0.0752, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2170154686078253, |
| "grad_norm": 0.9917190233932158, |
| "learning_rate": 9.953784452807487e-06, |
| "loss": 0.1049, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.21747042766151045, |
| "grad_norm": 0.5638529401686616, |
| "learning_rate": 9.953590367135159e-06, |
| "loss": 0.0547, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.21792538671519562, |
| "grad_norm": 0.6477110515460727, |
| "learning_rate": 9.953395876677796e-06, |
| "loss": 0.0564, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2183803457688808, |
| "grad_norm": 0.5492055118574499, |
| "learning_rate": 9.95320098145129e-06, |
| "loss": 0.0505, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21883530482256597, |
| "grad_norm": 0.8954528378372288, |
| "learning_rate": 9.95300568147157e-06, |
| "loss": 0.126, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.21929026387625114, |
| "grad_norm": 0.6155736143826033, |
| "learning_rate": 9.952809976754593e-06, |
| "loss": 0.0518, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2197452229299363, |
| "grad_norm": 1.1486004986445648, |
| "learning_rate": 9.952613867316351e-06, |
| "loss": 0.1142, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.22020018198362148, |
| "grad_norm": 0.8236924325360948, |
| "learning_rate": 9.95241735317287e-06, |
| "loss": 0.1047, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.22065514103730663, |
| "grad_norm": 0.832372102653505, |
| "learning_rate": 9.952220434340209e-06, |
| "loss": 0.0729, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2211101000909918, |
| "grad_norm": 0.7288716722109786, |
| "learning_rate": 9.952023110834456e-06, |
| "loss": 0.068, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.22156505914467697, |
| "grad_norm": 0.5327254294033283, |
| "learning_rate": 9.951825382671739e-06, |
| "loss": 0.0614, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.22202001819836215, |
| "grad_norm": 0.7204991379763186, |
| "learning_rate": 9.951627249868213e-06, |
| "loss": 0.0666, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.22247497725204732, |
| "grad_norm": 0.7485835393026234, |
| "learning_rate": 9.95142871244007e-06, |
| "loss": 0.068, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2229299363057325, |
| "grad_norm": 0.45602532896445397, |
| "learning_rate": 9.951229770403531e-06, |
| "loss": 0.0414, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22338489535941766, |
| "grad_norm": 0.7240661348572547, |
| "learning_rate": 9.951030423774858e-06, |
| "loss": 0.0798, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.22383985441310283, |
| "grad_norm": 0.7716352477687572, |
| "learning_rate": 9.950830672570337e-06, |
| "loss": 0.071, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.22429481346678798, |
| "grad_norm": 1.22677184750836, |
| "learning_rate": 9.95063051680629e-06, |
| "loss": 0.1373, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.22474977252047315, |
| "grad_norm": 0.7365431233953595, |
| "learning_rate": 9.950429956499074e-06, |
| "loss": 0.0699, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.22520473157415832, |
| "grad_norm": 0.705654951368504, |
| "learning_rate": 9.950228991665078e-06, |
| "loss": 0.0741, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2256596906278435, |
| "grad_norm": 0.8261497906057415, |
| "learning_rate": 9.950027622320724e-06, |
| "loss": 0.0764, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.22611464968152867, |
| "grad_norm": 0.9965395262255518, |
| "learning_rate": 9.949825848482465e-06, |
| "loss": 0.0852, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.22656960873521384, |
| "grad_norm": 0.6807161957389707, |
| "learning_rate": 9.949623670166794e-06, |
| "loss": 0.074, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.227024567788899, |
| "grad_norm": 1.1216390709095547, |
| "learning_rate": 9.949421087390228e-06, |
| "loss": 0.0931, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.22747952684258416, |
| "grad_norm": 1.1278655216416786, |
| "learning_rate": 9.949218100169322e-06, |
| "loss": 0.1177, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22793448589626933, |
| "grad_norm": 0.9160591457448575, |
| "learning_rate": 9.949014708520664e-06, |
| "loss": 0.1015, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2283894449499545, |
| "grad_norm": 0.9377363057118697, |
| "learning_rate": 9.948810912460872e-06, |
| "loss": 0.1059, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.22884440400363967, |
| "grad_norm": 0.8760932101779023, |
| "learning_rate": 9.948606712006601e-06, |
| "loss": 0.0812, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.22929936305732485, |
| "grad_norm": 0.6962605051289937, |
| "learning_rate": 9.948402107174537e-06, |
| "loss": 0.0735, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.22975432211101002, |
| "grad_norm": 0.6501265713488487, |
| "learning_rate": 9.948197097981401e-06, |
| "loss": 0.0551, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2302092811646952, |
| "grad_norm": 1.2156011775652311, |
| "learning_rate": 9.947991684443942e-06, |
| "loss": 0.1066, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.23066424021838033, |
| "grad_norm": 0.9679794435610901, |
| "learning_rate": 9.947785866578951e-06, |
| "loss": 0.0981, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2311191992720655, |
| "grad_norm": 0.7195724631231237, |
| "learning_rate": 9.94757964440324e-06, |
| "loss": 0.0777, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.23157415832575068, |
| "grad_norm": 0.549427502610929, |
| "learning_rate": 9.947373017933665e-06, |
| "loss": 0.0516, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.23202911737943585, |
| "grad_norm": 0.5667212336170355, |
| "learning_rate": 9.947165987187108e-06, |
| "loss": 0.0583, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23248407643312102, |
| "grad_norm": 0.6638127935874616, |
| "learning_rate": 9.946958552180489e-06, |
| "loss": 0.0723, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2329390354868062, |
| "grad_norm": 0.5226768129517959, |
| "learning_rate": 9.946750712930756e-06, |
| "loss": 0.0482, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.23339399454049137, |
| "grad_norm": 0.8358986518129136, |
| "learning_rate": 9.946542469454894e-06, |
| "loss": 0.1037, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2338489535941765, |
| "grad_norm": 0.6695809647699968, |
| "learning_rate": 9.94633382176992e-06, |
| "loss": 0.0728, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.23430391264786168, |
| "grad_norm": 1.0608546974350634, |
| "learning_rate": 9.946124769892884e-06, |
| "loss": 0.1192, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.23475887170154686, |
| "grad_norm": 0.5090717025630993, |
| "learning_rate": 9.945915313840869e-06, |
| "loss": 0.0612, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.23521383075523203, |
| "grad_norm": 0.8105130307542814, |
| "learning_rate": 9.94570545363099e-06, |
| "loss": 0.0838, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2356687898089172, |
| "grad_norm": 0.7752986876049957, |
| "learning_rate": 9.945495189280394e-06, |
| "loss": 0.092, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.23612374886260237, |
| "grad_norm": 0.869801315379322, |
| "learning_rate": 9.945284520806267e-06, |
| "loss": 0.077, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.23657870791628755, |
| "grad_norm": 0.5427153243822386, |
| "learning_rate": 9.94507344822582e-06, |
| "loss": 0.0592, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2370336669699727, |
| "grad_norm": 0.7368670007832758, |
| "learning_rate": 9.944861971556305e-06, |
| "loss": 0.0608, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.23748862602365786, |
| "grad_norm": 0.8141430793460733, |
| "learning_rate": 9.944650090814998e-06, |
| "loss": 0.0616, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.23794358507734303, |
| "grad_norm": 2.1096588720516425, |
| "learning_rate": 9.944437806019216e-06, |
| "loss": 0.0938, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2383985441310282, |
| "grad_norm": 0.7014907085161215, |
| "learning_rate": 9.944225117186306e-06, |
| "loss": 0.0812, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.23885350318471338, |
| "grad_norm": 0.5078467158211916, |
| "learning_rate": 9.944012024333647e-06, |
| "loss": 0.0561, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.23930846223839855, |
| "grad_norm": 0.6379031604907951, |
| "learning_rate": 9.943798527478652e-06, |
| "loss": 0.0678, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.23976342129208372, |
| "grad_norm": 0.799876019099874, |
| "learning_rate": 9.943584626638768e-06, |
| "loss": 0.0914, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.24021838034576887, |
| "grad_norm": 0.6550229607349646, |
| "learning_rate": 9.943370321831474e-06, |
| "loss": 0.0668, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.24067333939945404, |
| "grad_norm": 0.767534839542607, |
| "learning_rate": 9.943155613074279e-06, |
| "loss": 0.0711, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2411282984531392, |
| "grad_norm": 0.7571838990000624, |
| "learning_rate": 9.942940500384733e-06, |
| "loss": 0.0893, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24158325750682438, |
| "grad_norm": 17.807000846945513, |
| "learning_rate": 9.942724983780409e-06, |
| "loss": 0.3419, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.24203821656050956, |
| "grad_norm": 1.2088422410181228, |
| "learning_rate": 9.942509063278922e-06, |
| "loss": 0.1173, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.24249317561419473, |
| "grad_norm": 0.8811842157145667, |
| "learning_rate": 9.942292738897914e-06, |
| "loss": 0.1006, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2429481346678799, |
| "grad_norm": 0.7726281786442553, |
| "learning_rate": 9.942076010655063e-06, |
| "loss": 0.0909, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.24340309372156507, |
| "grad_norm": 0.9942256398778268, |
| "learning_rate": 9.941858878568078e-06, |
| "loss": 0.134, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.24385805277525022, |
| "grad_norm": 1.001596627292525, |
| "learning_rate": 9.941641342654702e-06, |
| "loss": 0.0977, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2443130118289354, |
| "grad_norm": 0.5064863363900076, |
| "learning_rate": 9.941423402932713e-06, |
| "loss": 0.0559, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.24476797088262056, |
| "grad_norm": 0.8589680374278897, |
| "learning_rate": 9.94120505941992e-06, |
| "loss": 0.0992, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.24522292993630573, |
| "grad_norm": 0.7830880681851201, |
| "learning_rate": 9.940986312134162e-06, |
| "loss": 0.0825, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2456778889899909, |
| "grad_norm": 0.5778344550660577, |
| "learning_rate": 9.940767161093316e-06, |
| "loss": 0.0637, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24613284804367608, |
| "grad_norm": 0.8661775200374767, |
| "learning_rate": 9.94054760631529e-06, |
| "loss": 0.0958, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.24658780709736125, |
| "grad_norm": 0.6976226834296251, |
| "learning_rate": 9.940327647818026e-06, |
| "loss": 0.0752, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2470427661510464, |
| "grad_norm": 0.7530160135685138, |
| "learning_rate": 9.940107285619495e-06, |
| "loss": 0.077, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.24749772520473157, |
| "grad_norm": 0.7997106896354084, |
| "learning_rate": 9.939886519737707e-06, |
| "loss": 0.0958, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.24795268425841674, |
| "grad_norm": 0.8918061918047896, |
| "learning_rate": 9.939665350190702e-06, |
| "loss": 0.0822, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2484076433121019, |
| "grad_norm": 0.804115756264787, |
| "learning_rate": 9.93944377699655e-06, |
| "loss": 0.0915, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.24886260236578708, |
| "grad_norm": 0.6234057941022288, |
| "learning_rate": 9.93922180017336e-06, |
| "loss": 0.0672, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.24931756141947226, |
| "grad_norm": 0.8269450754551354, |
| "learning_rate": 9.93899941973927e-06, |
| "loss": 0.1102, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.24977252047315743, |
| "grad_norm": 0.9233841316663005, |
| "learning_rate": 9.93877663571245e-06, |
| "loss": 0.0963, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2502274795268426, |
| "grad_norm": 0.9944861568923805, |
| "learning_rate": 9.938553448111108e-06, |
| "loss": 0.1127, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25068243858052774, |
| "grad_norm": 0.8423641298780182, |
| "learning_rate": 9.938329856953482e-06, |
| "loss": 0.0788, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.25113739763421294, |
| "grad_norm": 0.8124861649110975, |
| "learning_rate": 9.938105862257839e-06, |
| "loss": 0.0831, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2515923566878981, |
| "grad_norm": 0.6612222253979325, |
| "learning_rate": 9.937881464042485e-06, |
| "loss": 0.0703, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.25204731574158323, |
| "grad_norm": 0.854447666921162, |
| "learning_rate": 9.937656662325759e-06, |
| "loss": 0.1074, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.25250227479526843, |
| "grad_norm": 0.74521770368624, |
| "learning_rate": 9.937431457126028e-06, |
| "loss": 0.0777, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2529572338489536, |
| "grad_norm": 0.5044600553216889, |
| "learning_rate": 9.937205848461694e-06, |
| "loss": 0.0482, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2534121929026388, |
| "grad_norm": 1.0949051966397356, |
| "learning_rate": 9.936979836351197e-06, |
| "loss": 0.0945, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2538671519563239, |
| "grad_norm": 1.0332199252594778, |
| "learning_rate": 9.936753420813003e-06, |
| "loss": 0.092, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2543221110100091, |
| "grad_norm": 0.7029577630748303, |
| "learning_rate": 9.936526601865612e-06, |
| "loss": 0.0612, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.25477707006369427, |
| "grad_norm": 0.5251640812064944, |
| "learning_rate": 9.936299379527561e-06, |
| "loss": 0.0569, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2552320291173794, |
| "grad_norm": 0.6689496924283664, |
| "learning_rate": 9.936071753817416e-06, |
| "loss": 0.0831, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2556869881710646, |
| "grad_norm": 0.8094390650978945, |
| "learning_rate": 9.935843724753778e-06, |
| "loss": 0.0897, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.25614194722474976, |
| "grad_norm": 0.9168849457874456, |
| "learning_rate": 9.935615292355283e-06, |
| "loss": 0.1002, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.25659690627843496, |
| "grad_norm": 0.8829987760246157, |
| "learning_rate": 9.935386456640593e-06, |
| "loss": 0.0997, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2570518653321201, |
| "grad_norm": 0.9381858557170412, |
| "learning_rate": 9.93515721762841e-06, |
| "loss": 0.0926, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2575068243858053, |
| "grad_norm": 0.6555630906162114, |
| "learning_rate": 9.934927575337469e-06, |
| "loss": 0.0805, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.25796178343949044, |
| "grad_norm": 0.49897284031908906, |
| "learning_rate": 9.93469752978653e-06, |
| "loss": 0.0545, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2584167424931756, |
| "grad_norm": 0.8528689809178094, |
| "learning_rate": 9.934467080994394e-06, |
| "loss": 0.071, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2588717015468608, |
| "grad_norm": 0.7999188284583189, |
| "learning_rate": 9.934236228979893e-06, |
| "loss": 0.0675, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.25932666060054593, |
| "grad_norm": 0.6603615540899209, |
| "learning_rate": 9.934004973761888e-06, |
| "loss": 0.0584, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.25978161965423113, |
| "grad_norm": 0.907545218090885, |
| "learning_rate": 9.933773315359281e-06, |
| "loss": 0.0912, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2602365787079163, |
| "grad_norm": 1.2225854103436529, |
| "learning_rate": 9.933541253790998e-06, |
| "loss": 0.0996, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.2606915377616015, |
| "grad_norm": 0.821182112953313, |
| "learning_rate": 9.933308789076004e-06, |
| "loss": 0.0886, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2611464968152866, |
| "grad_norm": 0.5608593716975471, |
| "learning_rate": 9.933075921233292e-06, |
| "loss": 0.0597, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.26160145586897177, |
| "grad_norm": 0.977094581221023, |
| "learning_rate": 9.932842650281897e-06, |
| "loss": 0.0796, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.26205641492265697, |
| "grad_norm": 1.0086738407073246, |
| "learning_rate": 9.932608976240875e-06, |
| "loss": 0.1245, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2625113739763421, |
| "grad_norm": 0.7841605184531412, |
| "learning_rate": 9.932374899129323e-06, |
| "loss": 0.0798, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2629663330300273, |
| "grad_norm": 0.6360279282536222, |
| "learning_rate": 9.932140418966369e-06, |
| "loss": 0.0714, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.26342129208371245, |
| "grad_norm": 0.8673569892639119, |
| "learning_rate": 9.931905535771174e-06, |
| "loss": 0.0805, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.26387625113739765, |
| "grad_norm": 1.0489822111787226, |
| "learning_rate": 9.93167024956293e-06, |
| "loss": 0.1046, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2643312101910828, |
| "grad_norm": 0.5670611684906575, |
| "learning_rate": 9.931434560360864e-06, |
| "loss": 0.0662, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.26478616924476794, |
| "grad_norm": 0.6786486717931198, |
| "learning_rate": 9.931198468184236e-06, |
| "loss": 0.0705, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.26524112829845314, |
| "grad_norm": 0.7580601459978998, |
| "learning_rate": 9.93096197305234e-06, |
| "loss": 0.0852, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2656960873521383, |
| "grad_norm": 0.8802141056853473, |
| "learning_rate": 9.930725074984498e-06, |
| "loss": 0.0989, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2661510464058235, |
| "grad_norm": 0.6365186853726369, |
| "learning_rate": 9.930487774000071e-06, |
| "loss": 0.0639, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.26660600545950863, |
| "grad_norm": 0.5301331320559389, |
| "learning_rate": 9.930250070118448e-06, |
| "loss": 0.0628, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.26706096451319383, |
| "grad_norm": 0.6982626314754508, |
| "learning_rate": 9.930011963359055e-06, |
| "loss": 0.071, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.267515923566879, |
| "grad_norm": 1.0151988128038116, |
| "learning_rate": 9.929773453741346e-06, |
| "loss": 0.1074, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2679708826205642, |
| "grad_norm": 0.809050548171497, |
| "learning_rate": 9.929534541284814e-06, |
| "loss": 0.0715, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2684258416742493, |
| "grad_norm": 0.8254901916718546, |
| "learning_rate": 9.929295226008981e-06, |
| "loss": 0.0867, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.26888080072793447, |
| "grad_norm": 0.695875393623419, |
| "learning_rate": 9.929055507933403e-06, |
| "loss": 0.0667, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.26933575978161967, |
| "grad_norm": 0.6569370607259161, |
| "learning_rate": 9.928815387077668e-06, |
| "loss": 0.0667, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2697907188353048, |
| "grad_norm": 0.8509989554819866, |
| "learning_rate": 9.9285748634614e-06, |
| "loss": 0.0964, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.27024567788899, |
| "grad_norm": 0.7743154017799978, |
| "learning_rate": 9.928333937104249e-06, |
| "loss": 0.1008, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.27070063694267515, |
| "grad_norm": 0.6810806452813069, |
| "learning_rate": 9.928092608025905e-06, |
| "loss": 0.0623, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.27115559599636035, |
| "grad_norm": 0.6757764847225584, |
| "learning_rate": 9.927850876246087e-06, |
| "loss": 0.0621, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2716105550500455, |
| "grad_norm": 0.7561897396028232, |
| "learning_rate": 9.927608741784551e-06, |
| "loss": 0.0769, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.27206551410373064, |
| "grad_norm": 0.9087608421567758, |
| "learning_rate": 9.927366204661081e-06, |
| "loss": 0.1064, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.27252047315741584, |
| "grad_norm": 0.6090969825991095, |
| "learning_rate": 9.927123264895497e-06, |
| "loss": 0.0596, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.272975432211101, |
| "grad_norm": 0.5838273869575724, |
| "learning_rate": 9.926879922507651e-06, |
| "loss": 0.0581, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2734303912647862, |
| "grad_norm": 41.16319851924577, |
| "learning_rate": 9.926636177517427e-06, |
| "loss": 0.7305, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.27388535031847133, |
| "grad_norm": 0.7159907538362364, |
| "learning_rate": 9.926392029944743e-06, |
| "loss": 0.0655, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.27434030937215653, |
| "grad_norm": 0.6649118967721417, |
| "learning_rate": 9.92614747980955e-06, |
| "loss": 0.0676, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2747952684258417, |
| "grad_norm": 0.6955588874689645, |
| "learning_rate": 9.92590252713183e-06, |
| "loss": 0.0691, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2752502274795268, |
| "grad_norm": 1.0093833512385355, |
| "learning_rate": 9.925657171931603e-06, |
| "loss": 0.0788, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.275705186533212, |
| "grad_norm": 0.7222760734094591, |
| "learning_rate": 9.925411414228913e-06, |
| "loss": 0.0765, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.27616014558689717, |
| "grad_norm": 0.7901083190949632, |
| "learning_rate": 9.925165254043846e-06, |
| "loss": 0.0899, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.27661510464058237, |
| "grad_norm": 0.9417411536264935, |
| "learning_rate": 9.924918691396516e-06, |
| "loss": 0.105, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2770700636942675, |
| "grad_norm": 0.8531576003982281, |
| "learning_rate": 9.924671726307073e-06, |
| "loss": 0.0943, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2775250227479527, |
| "grad_norm": 0.5771833327707789, |
| "learning_rate": 9.924424358795694e-06, |
| "loss": 0.0649, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.27797998180163785, |
| "grad_norm": 0.6804808150530418, |
| "learning_rate": 9.924176588882597e-06, |
| "loss": 0.0591, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.278434940855323, |
| "grad_norm": 0.6916110773643345, |
| "learning_rate": 9.923928416588027e-06, |
| "loss": 0.082, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2788898999090082, |
| "grad_norm": 0.7302341341594485, |
| "learning_rate": 9.923679841932261e-06, |
| "loss": 0.0858, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.27934485896269334, |
| "grad_norm": 0.7190514572276734, |
| "learning_rate": 9.923430864935615e-06, |
| "loss": 0.0658, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.27979981801637854, |
| "grad_norm": 0.6872892360375661, |
| "learning_rate": 9.923181485618432e-06, |
| "loss": 0.0639, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2802547770700637, |
| "grad_norm": 0.6937876338258171, |
| "learning_rate": 9.92293170400109e-06, |
| "loss": 0.0759, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2807097361237489, |
| "grad_norm": 0.8498928251372749, |
| "learning_rate": 9.922681520104002e-06, |
| "loss": 0.0777, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.28116469517743403, |
| "grad_norm": 0.7409609990217324, |
| "learning_rate": 9.922430933947612e-06, |
| "loss": 0.0665, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2816196542311192, |
| "grad_norm": 1.2216942184143182, |
| "learning_rate": 9.922179945552393e-06, |
| "loss": 0.1405, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.2820746132848044, |
| "grad_norm": 0.6637234254274302, |
| "learning_rate": 9.921928554938857e-06, |
| "loss": 0.062, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2825295723384895, |
| "grad_norm": 0.9463087936758936, |
| "learning_rate": 9.921676762127548e-06, |
| "loss": 0.0767, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2829845313921747, |
| "grad_norm": 1.089309305809361, |
| "learning_rate": 9.921424567139042e-06, |
| "loss": 0.1171, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.28343949044585987, |
| "grad_norm": 0.8752119302288704, |
| "learning_rate": 9.921171969993942e-06, |
| "loss": 0.0813, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.28389444949954507, |
| "grad_norm": 0.7870883299373892, |
| "learning_rate": 9.920918970712894e-06, |
| "loss": 0.0993, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2843494085532302, |
| "grad_norm": 0.6504873266789636, |
| "learning_rate": 9.92066556931657e-06, |
| "loss": 0.073, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.28480436760691535, |
| "grad_norm": 1.1098031698420505, |
| "learning_rate": 9.920411765825679e-06, |
| "loss": 0.1218, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.28525932666060055, |
| "grad_norm": 1.217844501512982, |
| "learning_rate": 9.920157560260957e-06, |
| "loss": 0.1549, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.9728161223416268, |
| "learning_rate": 9.919902952643179e-06, |
| "loss": 0.0984, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.2861692447679709, |
| "grad_norm": 0.5217007184455262, |
| "learning_rate": 9.91964794299315e-06, |
| "loss": 0.0636, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.28662420382165604, |
| "grad_norm": 1.7394407973312302, |
| "learning_rate": 9.919392531331706e-06, |
| "loss": 0.1686, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.28707916287534124, |
| "grad_norm": 0.5702940927618096, |
| "learning_rate": 9.919136717679723e-06, |
| "loss": 0.0465, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2875341219290264, |
| "grad_norm": 0.5990973378462472, |
| "learning_rate": 9.9188805020581e-06, |
| "loss": 0.0678, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.28798908098271153, |
| "grad_norm": 0.9343816967111115, |
| "learning_rate": 9.918623884487777e-06, |
| "loss": 0.1068, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.28844404003639673, |
| "grad_norm": 0.5997939637509836, |
| "learning_rate": 9.91836686498972e-06, |
| "loss": 0.0629, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2888989990900819, |
| "grad_norm": 0.8063617612610782, |
| "learning_rate": 9.918109443584938e-06, |
| "loss": 0.0904, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2893539581437671, |
| "grad_norm": 0.6625405697250593, |
| "learning_rate": 9.917851620294461e-06, |
| "loss": 0.0638, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2898089171974522, |
| "grad_norm": 0.7423789779714624, |
| "learning_rate": 9.917593395139358e-06, |
| "loss": 0.0714, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.2902638762511374, |
| "grad_norm": 0.6102576569607258, |
| "learning_rate": 9.91733476814073e-06, |
| "loss": 0.0563, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.29071883530482256, |
| "grad_norm": 0.8342620452233175, |
| "learning_rate": 9.91707573931971e-06, |
| "loss": 0.0934, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.2911737943585077, |
| "grad_norm": 0.6397583044633867, |
| "learning_rate": 9.916816308697468e-06, |
| "loss": 0.0608, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2916287534121929, |
| "grad_norm": 0.7837909798874247, |
| "learning_rate": 9.9165564762952e-06, |
| "loss": 0.0936, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.29208371246587805, |
| "grad_norm": 0.9915309549496408, |
| "learning_rate": 9.916296242134142e-06, |
| "loss": 0.1364, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.29253867151956325, |
| "grad_norm": 0.7722166587924495, |
| "learning_rate": 9.916035606235555e-06, |
| "loss": 0.1022, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2929936305732484, |
| "grad_norm": 0.6446192951972597, |
| "learning_rate": 9.915774568620739e-06, |
| "loss": 0.0794, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2934485896269336, |
| "grad_norm": 0.7655996282008942, |
| "learning_rate": 9.915513129311025e-06, |
| "loss": 0.083, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.29390354868061874, |
| "grad_norm": 0.7358761993420325, |
| "learning_rate": 9.915251288327776e-06, |
| "loss": 0.0927, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2943585077343039, |
| "grad_norm": 0.8417441236168001, |
| "learning_rate": 9.914989045692388e-06, |
| "loss": 0.0791, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.2948134667879891, |
| "grad_norm": 0.8847229450668922, |
| "learning_rate": 9.914726401426293e-06, |
| "loss": 0.1114, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.29526842584167423, |
| "grad_norm": 0.6805089048669102, |
| "learning_rate": 9.91446335555095e-06, |
| "loss": 0.0645, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.29572338489535943, |
| "grad_norm": 0.9967907781154212, |
| "learning_rate": 9.914199908087856e-06, |
| "loss": 0.1125, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2961783439490446, |
| "grad_norm": 0.7069764233646496, |
| "learning_rate": 9.913936059058537e-06, |
| "loss": 0.0961, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.2966333030027298, |
| "grad_norm": 0.8237259808163154, |
| "learning_rate": 9.913671808484554e-06, |
| "loss": 0.0863, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2970882620564149, |
| "grad_norm": 0.5595221349609915, |
| "learning_rate": 9.913407156387503e-06, |
| "loss": 0.0477, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.29754322111010006, |
| "grad_norm": 0.8322598543263076, |
| "learning_rate": 9.913142102789005e-06, |
| "loss": 0.0785, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.29799818016378526, |
| "grad_norm": 0.9426946452527044, |
| "learning_rate": 9.912876647710723e-06, |
| "loss": 0.0993, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2984531392174704, |
| "grad_norm": 0.8902481236790349, |
| "learning_rate": 9.912610791174348e-06, |
| "loss": 0.0981, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2989080982711556, |
| "grad_norm": 0.6714333609160019, |
| "learning_rate": 9.912344533201604e-06, |
| "loss": 0.0716, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.29936305732484075, |
| "grad_norm": 0.6721636461789662, |
| "learning_rate": 9.91207787381425e-06, |
| "loss": 0.0675, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.29981801637852595, |
| "grad_norm": 0.628744075340254, |
| "learning_rate": 9.911810813034073e-06, |
| "loss": 0.0583, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3002729754322111, |
| "grad_norm": 0.9172548581720068, |
| "learning_rate": 9.9115433508829e-06, |
| "loss": 0.0972, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.30072793448589624, |
| "grad_norm": 0.914462327674233, |
| "learning_rate": 9.911275487382583e-06, |
| "loss": 0.089, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.30118289353958144, |
| "grad_norm": 0.7410939383575923, |
| "learning_rate": 9.911007222555011e-06, |
| "loss": 0.0744, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.3016378525932666, |
| "grad_norm": 0.6952942958219819, |
| "learning_rate": 9.91073855642211e-06, |
| "loss": 0.0627, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3020928116469518, |
| "grad_norm": 0.8802064643150562, |
| "learning_rate": 9.910469489005828e-06, |
| "loss": 0.0836, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.30254777070063693, |
| "grad_norm": 0.9015922573736656, |
| "learning_rate": 9.910200020328158e-06, |
| "loss": 0.0934, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.30300272975432213, |
| "grad_norm": 0.6635682732023674, |
| "learning_rate": 9.909930150411113e-06, |
| "loss": 0.0623, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3034576888080073, |
| "grad_norm": 1.928152977107998, |
| "learning_rate": 9.909659879276751e-06, |
| "loss": 0.1457, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3039126478616925, |
| "grad_norm": 0.7754006092902415, |
| "learning_rate": 9.909389206947156e-06, |
| "loss": 0.0621, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3043676069153776, |
| "grad_norm": 1.0461982822616211, |
| "learning_rate": 9.909118133444444e-06, |
| "loss": 0.1087, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.30482256596906276, |
| "grad_norm": 0.7981897376851527, |
| "learning_rate": 9.90884665879077e-06, |
| "loss": 0.0921, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.30527752502274796, |
| "grad_norm": 0.8941901965354629, |
| "learning_rate": 9.908574783008313e-06, |
| "loss": 0.1055, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3057324840764331, |
| "grad_norm": 1.0219508428898654, |
| "learning_rate": 9.908302506119291e-06, |
| "loss": 0.1152, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3061874431301183, |
| "grad_norm": 0.7623168423299865, |
| "learning_rate": 9.908029828145956e-06, |
| "loss": 0.0837, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.30664240218380345, |
| "grad_norm": 0.7026665400337327, |
| "learning_rate": 9.907756749110587e-06, |
| "loss": 0.0785, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.30709736123748865, |
| "grad_norm": 1.0861630797383492, |
| "learning_rate": 9.9074832690355e-06, |
| "loss": 0.1121, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3075523202911738, |
| "grad_norm": 0.8171913655631801, |
| "learning_rate": 9.907209387943042e-06, |
| "loss": 0.0759, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.30800727934485894, |
| "grad_norm": 0.695009650682766, |
| "learning_rate": 9.906935105855595e-06, |
| "loss": 0.0508, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.30846223839854414, |
| "grad_norm": 1.1629680848047237, |
| "learning_rate": 9.906660422795569e-06, |
| "loss": 0.1123, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3089171974522293, |
| "grad_norm": 1.1028006392582481, |
| "learning_rate": 9.906385338785411e-06, |
| "loss": 0.1048, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3093721565059145, |
| "grad_norm": 0.8590661780887954, |
| "learning_rate": 9.906109853847601e-06, |
| "loss": 0.0947, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.30982711555959963, |
| "grad_norm": 0.9160314729851723, |
| "learning_rate": 9.90583396800465e-06, |
| "loss": 0.0928, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.31028207461328483, |
| "grad_norm": 0.8935511298088069, |
| "learning_rate": 9.9055576812791e-06, |
| "loss": 0.0996, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.31073703366697, |
| "grad_norm": 0.7005723015579258, |
| "learning_rate": 9.905280993693533e-06, |
| "loss": 0.0863, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3111919927206551, |
| "grad_norm": 0.6441434987399284, |
| "learning_rate": 9.905003905270553e-06, |
| "loss": 0.0682, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3116469517743403, |
| "grad_norm": 0.9609160991558658, |
| "learning_rate": 9.904726416032803e-06, |
| "loss": 0.1095, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.31210191082802546, |
| "grad_norm": 0.723787688745946, |
| "learning_rate": 9.904448526002963e-06, |
| "loss": 0.0637, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.31255686988171066, |
| "grad_norm": 0.5250433090776031, |
| "learning_rate": 9.904170235203737e-06, |
| "loss": 0.0587, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.3130118289353958, |
| "grad_norm": 0.8819438583914972, |
| "learning_rate": 9.903891543657866e-06, |
| "loss": 0.1112, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.313466787989081, |
| "grad_norm": 0.5413774773467063, |
| "learning_rate": 9.903612451388122e-06, |
| "loss": 0.0722, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.31392174704276615, |
| "grad_norm": 0.8913097595158456, |
| "learning_rate": 9.903332958417315e-06, |
| "loss": 0.0893, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3143767060964513, |
| "grad_norm": 0.6466979890354269, |
| "learning_rate": 9.903053064768283e-06, |
| "loss": 0.0709, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3148316651501365, |
| "grad_norm": 0.8428101951038133, |
| "learning_rate": 9.902772770463892e-06, |
| "loss": 0.0814, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.31528662420382164, |
| "grad_norm": 0.5832299371816577, |
| "learning_rate": 9.902492075527057e-06, |
| "loss": 0.0597, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.31574158325750684, |
| "grad_norm": 0.7856263020740725, |
| "learning_rate": 9.902210979980705e-06, |
| "loss": 0.074, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.316196542311192, |
| "grad_norm": 0.8507681095680276, |
| "learning_rate": 9.90192948384781e-06, |
| "loss": 0.0941, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3166515013648772, |
| "grad_norm": 0.7777857824270489, |
| "learning_rate": 9.901647587151376e-06, |
| "loss": 0.0708, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.31710646041856233, |
| "grad_norm": 1.068022521735614, |
| "learning_rate": 9.901365289914437e-06, |
| "loss": 0.108, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3175614194722475, |
| "grad_norm": 1.1320770025873614, |
| "learning_rate": 9.901082592160059e-06, |
| "loss": 0.108, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3180163785259327, |
| "grad_norm": 0.803518334023751, |
| "learning_rate": 9.900799493911346e-06, |
| "loss": 0.0871, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3184713375796178, |
| "grad_norm": 0.8188444942805464, |
| "learning_rate": 9.900515995191431e-06, |
| "loss": 0.0808, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.318926296633303, |
| "grad_norm": 0.8993527964087475, |
| "learning_rate": 9.900232096023478e-06, |
| "loss": 0.0821, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.31938125568698816, |
| "grad_norm": 0.5600271316880729, |
| "learning_rate": 9.899947796430687e-06, |
| "loss": 0.0478, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.31983621474067336, |
| "grad_norm": 0.8369718087747545, |
| "learning_rate": 9.899663096436292e-06, |
| "loss": 0.0871, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3202911737943585, |
| "grad_norm": 0.8993771893247359, |
| "learning_rate": 9.899377996063554e-06, |
| "loss": 0.0858, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.32074613284804365, |
| "grad_norm": 0.6615773523414142, |
| "learning_rate": 9.899092495335772e-06, |
| "loss": 0.0601, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.32120109190172885, |
| "grad_norm": 0.8278593900178107, |
| "learning_rate": 9.898806594276273e-06, |
| "loss": 0.0769, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.321656050955414, |
| "grad_norm": 0.7866286577186284, |
| "learning_rate": 9.898520292908425e-06, |
| "loss": 0.0894, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3221110100090992, |
| "grad_norm": 0.8050313615570786, |
| "learning_rate": 9.89823359125562e-06, |
| "loss": 0.0732, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.32256596906278434, |
| "grad_norm": 1.0243914254387991, |
| "learning_rate": 9.897946489341286e-06, |
| "loss": 0.0901, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.32302092811646954, |
| "grad_norm": 0.7036337195424629, |
| "learning_rate": 9.897658987188882e-06, |
| "loss": 0.0686, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3234758871701547, |
| "grad_norm": 0.5593772745397846, |
| "learning_rate": 9.897371084821905e-06, |
| "loss": 0.045, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.32393084622383983, |
| "grad_norm": 0.608867956874154, |
| "learning_rate": 9.897082782263878e-06, |
| "loss": 0.0692, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.32438580527752503, |
| "grad_norm": 0.6488333561840038, |
| "learning_rate": 9.896794079538362e-06, |
| "loss": 0.0513, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3248407643312102, |
| "grad_norm": 0.5593745607285364, |
| "learning_rate": 9.896504976668948e-06, |
| "loss": 0.0437, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3252957233848954, |
| "grad_norm": 0.5072427035814352, |
| "learning_rate": 9.896215473679259e-06, |
| "loss": 0.0566, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3257506824385805, |
| "grad_norm": 0.7088539736923404, |
| "learning_rate": 9.895925570592952e-06, |
| "loss": 0.0878, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3262056414922657, |
| "grad_norm": 0.9653520712469312, |
| "learning_rate": 9.895635267433719e-06, |
| "loss": 0.101, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.32666060054595086, |
| "grad_norm": 1.2323140645024868, |
| "learning_rate": 9.895344564225277e-06, |
| "loss": 0.1359, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.327115559599636, |
| "grad_norm": 0.6826807669546061, |
| "learning_rate": 9.895053460991389e-06, |
| "loss": 0.0799, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3275705186533212, |
| "grad_norm": 0.9496304010026827, |
| "learning_rate": 9.894761957755834e-06, |
| "loss": 0.0928, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.32802547770700635, |
| "grad_norm": 0.8578622125964999, |
| "learning_rate": 9.894470054542438e-06, |
| "loss": 0.1149, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.32848043676069155, |
| "grad_norm": 0.5483719717114235, |
| "learning_rate": 9.894177751375053e-06, |
| "loss": 0.0621, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.3289353958143767, |
| "grad_norm": 0.6341198897869947, |
| "learning_rate": 9.893885048277564e-06, |
| "loss": 0.0568, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.3293903548680619, |
| "grad_norm": 0.7169738278552924, |
| "learning_rate": 9.893591945273888e-06, |
| "loss": 0.0752, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.32984531392174704, |
| "grad_norm": 0.9839905963719277, |
| "learning_rate": 9.89329844238798e-06, |
| "loss": 0.1167, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3303002729754322, |
| "grad_norm": 0.6825969142747964, |
| "learning_rate": 9.89300453964382e-06, |
| "loss": 0.0693, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3307552320291174, |
| "grad_norm": 1.0420794853330364, |
| "learning_rate": 9.892710237065423e-06, |
| "loss": 0.1561, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.33121019108280253, |
| "grad_norm": 1.0109988913697336, |
| "learning_rate": 9.892415534676844e-06, |
| "loss": 0.0813, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.33166515013648773, |
| "grad_norm": 0.6237179977245606, |
| "learning_rate": 9.892120432502161e-06, |
| "loss": 0.063, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3321201091901729, |
| "grad_norm": 0.7047649578988654, |
| "learning_rate": 9.891824930565488e-06, |
| "loss": 0.0757, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3325750682438581, |
| "grad_norm": 0.8381336709785119, |
| "learning_rate": 9.891529028890974e-06, |
| "loss": 0.1137, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3330300272975432, |
| "grad_norm": 1.108812928457643, |
| "learning_rate": 9.891232727502797e-06, |
| "loss": 0.0971, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.33348498635122836, |
| "grad_norm": 0.8911550238765422, |
| "learning_rate": 9.89093602642517e-06, |
| "loss": 0.0869, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.33393994540491356, |
| "grad_norm": 0.7527062298816352, |
| "learning_rate": 9.890638925682339e-06, |
| "loss": 0.085, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3343949044585987, |
| "grad_norm": 0.8028637093759472, |
| "learning_rate": 9.89034142529858e-06, |
| "loss": 0.0866, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3348498635122839, |
| "grad_norm": 0.6620365400447171, |
| "learning_rate": 9.890043525298203e-06, |
| "loss": 0.053, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.33530482256596905, |
| "grad_norm": 0.6606838089782118, |
| "learning_rate": 9.889745225705555e-06, |
| "loss": 0.0783, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.33575978161965425, |
| "grad_norm": 0.6719238881234298, |
| "learning_rate": 9.889446526545007e-06, |
| "loss": 0.079, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3362147406733394, |
| "grad_norm": 0.7379881342173255, |
| "learning_rate": 9.88914742784097e-06, |
| "loss": 0.0848, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.33666969972702454, |
| "grad_norm": 1.9725398231448836, |
| "learning_rate": 9.888847929617887e-06, |
| "loss": 0.1666, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.33712465878070974, |
| "grad_norm": 0.7800667095330575, |
| "learning_rate": 9.888548031900226e-06, |
| "loss": 0.0779, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3375796178343949, |
| "grad_norm": 0.9725198572426639, |
| "learning_rate": 9.888247734712497e-06, |
| "loss": 0.0719, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3380345768880801, |
| "grad_norm": 0.9547104503470986, |
| "learning_rate": 9.887947038079238e-06, |
| "loss": 0.1119, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.33848953594176523, |
| "grad_norm": 0.5879353672489683, |
| "learning_rate": 9.887645942025022e-06, |
| "loss": 0.0553, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.33894449499545043, |
| "grad_norm": 0.5485885922626542, |
| "learning_rate": 9.887344446574452e-06, |
| "loss": 0.0494, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3393994540491356, |
| "grad_norm": 0.9640668269863656, |
| "learning_rate": 9.887042551752163e-06, |
| "loss": 0.1104, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3398544131028208, |
| "grad_norm": 0.8639463935480832, |
| "learning_rate": 9.886740257582827e-06, |
| "loss": 0.0655, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3403093721565059, |
| "grad_norm": 0.6489702107287116, |
| "learning_rate": 9.886437564091148e-06, |
| "loss": 0.0777, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.34076433121019106, |
| "grad_norm": 0.8236523684362178, |
| "learning_rate": 9.886134471301854e-06, |
| "loss": 0.0916, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.34121929026387626, |
| "grad_norm": 0.8459143900125461, |
| "learning_rate": 9.885830979239718e-06, |
| "loss": 0.1017, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3416742493175614, |
| "grad_norm": 0.7496065352262437, |
| "learning_rate": 9.885527087929541e-06, |
| "loss": 0.0861, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.3421292083712466, |
| "grad_norm": 0.849292513666517, |
| "learning_rate": 9.88522279739615e-06, |
| "loss": 0.0839, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.34258416742493175, |
| "grad_norm": 0.7756671663835698, |
| "learning_rate": 9.884918107664417e-06, |
| "loss": 0.0809, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.34303912647861695, |
| "grad_norm": 0.7338987681003677, |
| "learning_rate": 9.884613018759234e-06, |
| "loss": 0.0721, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.3434940855323021, |
| "grad_norm": 0.6003946948163056, |
| "learning_rate": 9.884307530705534e-06, |
| "loss": 0.0782, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.34394904458598724, |
| "grad_norm": 0.5309561440373582, |
| "learning_rate": 9.88400164352828e-06, |
| "loss": 0.0563, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.34440400363967244, |
| "grad_norm": 0.6551261739802692, |
| "learning_rate": 9.883695357252467e-06, |
| "loss": 0.061, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.3448589626933576, |
| "grad_norm": 0.6598139820416582, |
| "learning_rate": 9.883388671903125e-06, |
| "loss": 0.084, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3453139217470428, |
| "grad_norm": 0.8678451615084499, |
| "learning_rate": 9.883081587505315e-06, |
| "loss": 0.0893, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.34576888080072793, |
| "grad_norm": 0.8849976199871086, |
| "learning_rate": 9.882774104084127e-06, |
| "loss": 0.0938, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.34622383985441313, |
| "grad_norm": 0.6157555054475868, |
| "learning_rate": 9.882466221664691e-06, |
| "loss": 0.0535, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.3466787989080983, |
| "grad_norm": 0.9555128068667961, |
| "learning_rate": 9.882157940272165e-06, |
| "loss": 0.0984, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3471337579617834, |
| "grad_norm": 0.8431106213501941, |
| "learning_rate": 9.881849259931738e-06, |
| "loss": 0.1062, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3475887170154686, |
| "grad_norm": 0.6608166650909644, |
| "learning_rate": 9.881540180668637e-06, |
| "loss": 0.0589, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.34804367606915376, |
| "grad_norm": 0.7177237690901401, |
| "learning_rate": 9.881230702508118e-06, |
| "loss": 0.0721, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.34849863512283896, |
| "grad_norm": 0.49396541889218665, |
| "learning_rate": 9.880920825475468e-06, |
| "loss": 0.0582, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3489535941765241, |
| "grad_norm": 0.7008727540015932, |
| "learning_rate": 9.88061054959601e-06, |
| "loss": 0.0689, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3494085532302093, |
| "grad_norm": 0.6417543130209264, |
| "learning_rate": 9.880299874895098e-06, |
| "loss": 0.0859, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.34986351228389445, |
| "grad_norm": 0.5325758158155319, |
| "learning_rate": 9.879988801398121e-06, |
| "loss": 0.0508, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3503184713375796, |
| "grad_norm": 0.653129374155715, |
| "learning_rate": 9.879677329130496e-06, |
| "loss": 0.0822, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3507734303912648, |
| "grad_norm": 0.6044703796770591, |
| "learning_rate": 9.879365458117678e-06, |
| "loss": 0.0662, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.35122838944494994, |
| "grad_norm": 0.6417796330386928, |
| "learning_rate": 9.879053188385148e-06, |
| "loss": 0.0649, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.35168334849863514, |
| "grad_norm": 0.6127493684308597, |
| "learning_rate": 9.878740519958425e-06, |
| "loss": 0.0601, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3521383075523203, |
| "grad_norm": 0.9092296350808027, |
| "learning_rate": 9.878427452863059e-06, |
| "loss": 0.1138, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3525932666060055, |
| "grad_norm": 0.8850379239223551, |
| "learning_rate": 9.878113987124633e-06, |
| "loss": 0.1135, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.35304822565969063, |
| "grad_norm": 0.8106864823035035, |
| "learning_rate": 9.877800122768761e-06, |
| "loss": 0.084, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3535031847133758, |
| "grad_norm": 0.6717791100158048, |
| "learning_rate": 9.877485859821092e-06, |
| "loss": 0.0764, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.353958143767061, |
| "grad_norm": 0.4266356830653338, |
| "learning_rate": 9.877171198307304e-06, |
| "loss": 0.0496, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3544131028207461, |
| "grad_norm": 0.7839112755574695, |
| "learning_rate": 9.87685613825311e-06, |
| "loss": 0.0864, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3548680618744313, |
| "grad_norm": 0.8928629316475961, |
| "learning_rate": 9.876540679684257e-06, |
| "loss": 0.0802, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.35532302092811646, |
| "grad_norm": 0.7427060191976654, |
| "learning_rate": 9.876224822626522e-06, |
| "loss": 0.0809, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.35577797998180166, |
| "grad_norm": 0.6618589317208607, |
| "learning_rate": 9.875908567105716e-06, |
| "loss": 0.0633, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3562329390354868, |
| "grad_norm": 0.9168643329932029, |
| "learning_rate": 9.87559191314768e-06, |
| "loss": 0.0977, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.35668789808917195, |
| "grad_norm": 1.010661772545197, |
| "learning_rate": 9.87527486077829e-06, |
| "loss": 0.112, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.7355960177801563, |
| "learning_rate": 9.874957410023458e-06, |
| "loss": 0.0578, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3575978161965423, |
| "grad_norm": 0.7012046376593928, |
| "learning_rate": 9.874639560909118e-06, |
| "loss": 0.0856, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3580527752502275, |
| "grad_norm": 0.629856671324697, |
| "learning_rate": 9.87432131346125e-06, |
| "loss": 0.079, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.35850773430391264, |
| "grad_norm": 0.6605442679933491, |
| "learning_rate": 9.874002667705855e-06, |
| "loss": 0.0713, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.35896269335759784, |
| "grad_norm": 0.6036439966816435, |
| "learning_rate": 9.873683623668972e-06, |
| "loss": 0.0734, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.359417652411283, |
| "grad_norm": 0.9098464282834562, |
| "learning_rate": 9.873364181376674e-06, |
| "loss": 0.1273, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.35987261146496813, |
| "grad_norm": 0.725232432410699, |
| "learning_rate": 9.873044340855062e-06, |
| "loss": 0.0704, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.36032757051865333, |
| "grad_norm": 0.8275864687946802, |
| "learning_rate": 9.872724102130273e-06, |
| "loss": 0.0722, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3607825295723385, |
| "grad_norm": 0.6908762665090429, |
| "learning_rate": 9.872403465228476e-06, |
| "loss": 0.068, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3612374886260237, |
| "grad_norm": 0.8007479624540592, |
| "learning_rate": 9.872082430175871e-06, |
| "loss": 0.0792, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3616924476797088, |
| "grad_norm": 0.7580697654486878, |
| "learning_rate": 9.871760996998692e-06, |
| "loss": 0.0662, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.362147406733394, |
| "grad_norm": 1.0378802589927232, |
| "learning_rate": 9.871439165723207e-06, |
| "loss": 0.0905, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.36260236578707916, |
| "grad_norm": 0.9366156924362913, |
| "learning_rate": 9.87111693637571e-06, |
| "loss": 0.0966, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3630573248407643, |
| "grad_norm": 0.9568919919938076, |
| "learning_rate": 9.870794308982536e-06, |
| "loss": 0.1092, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3635122838944495, |
| "grad_norm": 1.0303944561108107, |
| "learning_rate": 9.870471283570046e-06, |
| "loss": 0.1214, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.36396724294813465, |
| "grad_norm": 0.7123988620535131, |
| "learning_rate": 9.870147860164639e-06, |
| "loss": 0.0952, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36442220200181985, |
| "grad_norm": 0.6461145025804255, |
| "learning_rate": 9.86982403879274e-06, |
| "loss": 0.0653, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.364877161055505, |
| "grad_norm": 0.761176238728339, |
| "learning_rate": 9.869499819480815e-06, |
| "loss": 0.0911, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3653321201091902, |
| "grad_norm": 0.6778284620896282, |
| "learning_rate": 9.869175202255354e-06, |
| "loss": 0.0726, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.36578707916287534, |
| "grad_norm": 0.6378934869683002, |
| "learning_rate": 9.868850187142885e-06, |
| "loss": 0.0721, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3662420382165605, |
| "grad_norm": 0.725078464245391, |
| "learning_rate": 9.868524774169968e-06, |
| "loss": 0.0774, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3666969972702457, |
| "grad_norm": 0.7707907185217752, |
| "learning_rate": 9.86819896336319e-06, |
| "loss": 0.067, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.36715195632393083, |
| "grad_norm": 0.8162851407409059, |
| "learning_rate": 9.867872754749178e-06, |
| "loss": 0.0908, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.36760691537761603, |
| "grad_norm": 0.5330499489332517, |
| "learning_rate": 9.867546148354586e-06, |
| "loss": 0.066, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.3680618744313012, |
| "grad_norm": 0.6649993383235931, |
| "learning_rate": 9.867219144206105e-06, |
| "loss": 0.0672, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3685168334849864, |
| "grad_norm": 0.9824606570699352, |
| "learning_rate": 9.866891742330458e-06, |
| "loss": 0.11, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3689717925386715, |
| "grad_norm": 0.6507791006697302, |
| "learning_rate": 9.866563942754394e-06, |
| "loss": 0.0622, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.36942675159235666, |
| "grad_norm": 0.7455907568930894, |
| "learning_rate": 9.866235745504705e-06, |
| "loss": 0.0833, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.36988171064604186, |
| "grad_norm": 0.9927293122267482, |
| "learning_rate": 9.865907150608203e-06, |
| "loss": 0.0978, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.370336669699727, |
| "grad_norm": 0.817279180213694, |
| "learning_rate": 9.865578158091746e-06, |
| "loss": 0.1036, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.3707916287534122, |
| "grad_norm": 0.9966504261459711, |
| "learning_rate": 9.865248767982211e-06, |
| "loss": 0.1027, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.37124658780709735, |
| "grad_norm": 0.9561727776097537, |
| "learning_rate": 9.864918980306521e-06, |
| "loss": 0.1136, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.37170154686078255, |
| "grad_norm": 0.6718095123705313, |
| "learning_rate": 9.86458879509162e-06, |
| "loss": 0.0762, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3721565059144677, |
| "grad_norm": 0.9803345299998187, |
| "learning_rate": 9.864258212364492e-06, |
| "loss": 0.0791, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.37261146496815284, |
| "grad_norm": 0.8058679812037255, |
| "learning_rate": 9.86392723215215e-06, |
| "loss": 0.069, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.37306642402183804, |
| "grad_norm": 0.5836160590759203, |
| "learning_rate": 9.86359585448164e-06, |
| "loss": 0.0621, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3735213830755232, |
| "grad_norm": 0.6511599091669776, |
| "learning_rate": 9.863264079380039e-06, |
| "loss": 0.0745, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3739763421292084, |
| "grad_norm": 0.9308266206126162, |
| "learning_rate": 9.862931906874461e-06, |
| "loss": 0.1132, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.37443130118289353, |
| "grad_norm": 0.613775373571284, |
| "learning_rate": 9.862599336992048e-06, |
| "loss": 0.0545, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.37488626023657873, |
| "grad_norm": 0.6991388893487894, |
| "learning_rate": 9.862266369759976e-06, |
| "loss": 0.0754, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.37534121929026387, |
| "grad_norm": 0.6352968005261165, |
| "learning_rate": 9.861933005205454e-06, |
| "loss": 0.0576, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.37579617834394907, |
| "grad_norm": 1.109194467922723, |
| "learning_rate": 9.861599243355725e-06, |
| "loss": 0.1281, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.3762511373976342, |
| "grad_norm": 0.9742134289860664, |
| "learning_rate": 9.86126508423806e-06, |
| "loss": 0.1067, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.37670609645131936, |
| "grad_norm": 0.6015820455914206, |
| "learning_rate": 9.860930527879763e-06, |
| "loss": 0.055, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.37716105550500456, |
| "grad_norm": 1.0894948091440197, |
| "learning_rate": 9.860595574308179e-06, |
| "loss": 0.1147, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.3776160145586897, |
| "grad_norm": 0.7023892750192133, |
| "learning_rate": 9.860260223550672e-06, |
| "loss": 0.0815, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3780709736123749, |
| "grad_norm": 0.4943868719085533, |
| "learning_rate": 9.859924475634649e-06, |
| "loss": 0.0476, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.37852593266606005, |
| "grad_norm": 0.9974648765413693, |
| "learning_rate": 9.859588330587545e-06, |
| "loss": 0.1068, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.37898089171974525, |
| "grad_norm": 0.5960289391531881, |
| "learning_rate": 9.859251788436829e-06, |
| "loss": 0.0715, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3794358507734304, |
| "grad_norm": 0.907079582974149, |
| "learning_rate": 9.85891484921e-06, |
| "loss": 0.0905, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.37989080982711554, |
| "grad_norm": 0.8133034306250352, |
| "learning_rate": 9.858577512934592e-06, |
| "loss": 0.1012, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.38034576888080074, |
| "grad_norm": 0.7828785203637737, |
| "learning_rate": 9.858239779638173e-06, |
| "loss": 0.0726, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.3808007279344859, |
| "grad_norm": 1.3138864597148558, |
| "learning_rate": 9.857901649348338e-06, |
| "loss": 0.1307, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3812556869881711, |
| "grad_norm": 0.7000750227265026, |
| "learning_rate": 9.857563122092717e-06, |
| "loss": 0.0777, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3817106460418562, |
| "grad_norm": 0.757283984575844, |
| "learning_rate": 9.857224197898975e-06, |
| "loss": 0.083, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3821656050955414, |
| "grad_norm": 0.7113754486134378, |
| "learning_rate": 9.856884876794805e-06, |
| "loss": 0.0795, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.38262056414922657, |
| "grad_norm": 0.6891370217065743, |
| "learning_rate": 9.856545158807938e-06, |
| "loss": 0.0576, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3830755232029117, |
| "grad_norm": 0.7230826558764609, |
| "learning_rate": 9.856205043966134e-06, |
| "loss": 0.0973, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3835304822565969, |
| "grad_norm": 0.9951638416419379, |
| "learning_rate": 9.855864532297181e-06, |
| "loss": 0.1225, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.38398544131028206, |
| "grad_norm": 0.8272776971451865, |
| "learning_rate": 9.85552362382891e-06, |
| "loss": 0.0928, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.38444040036396726, |
| "grad_norm": 0.662562460388915, |
| "learning_rate": 9.855182318589174e-06, |
| "loss": 0.0711, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3848953594176524, |
| "grad_norm": 1.185659176011977, |
| "learning_rate": 9.854840616605866e-06, |
| "loss": 0.0922, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.3853503184713376, |
| "grad_norm": 0.7002426118833048, |
| "learning_rate": 9.854498517906908e-06, |
| "loss": 0.0828, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.38580527752502275, |
| "grad_norm": 0.8957633348930525, |
| "learning_rate": 9.854156022520252e-06, |
| "loss": 0.0809, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.3862602365787079, |
| "grad_norm": 1.0593251614278854, |
| "learning_rate": 9.853813130473887e-06, |
| "loss": 0.1109, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3867151956323931, |
| "grad_norm": 0.7751748709357449, |
| "learning_rate": 9.853469841795832e-06, |
| "loss": 0.0823, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.38717015468607824, |
| "grad_norm": 0.5943868690351954, |
| "learning_rate": 9.853126156514142e-06, |
| "loss": 0.0758, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.38762511373976344, |
| "grad_norm": 0.4901349757557767, |
| "learning_rate": 9.852782074656897e-06, |
| "loss": 0.064, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.3880800727934486, |
| "grad_norm": 0.7531191508768753, |
| "learning_rate": 9.852437596252216e-06, |
| "loss": 0.0824, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3885350318471338, |
| "grad_norm": 0.7684236261792305, |
| "learning_rate": 9.852092721328248e-06, |
| "loss": 0.0674, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3889899909008189, |
| "grad_norm": 0.8624513661560378, |
| "learning_rate": 9.851747449913176e-06, |
| "loss": 0.09, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.38944494995450407, |
| "grad_norm": 0.9125725996183891, |
| "learning_rate": 9.851401782035213e-06, |
| "loss": 0.129, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.38989990900818927, |
| "grad_norm": 0.7630714638300728, |
| "learning_rate": 9.851055717722604e-06, |
| "loss": 0.068, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3903548680618744, |
| "grad_norm": 0.834756070401477, |
| "learning_rate": 9.850709257003628e-06, |
| "loss": 0.0831, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.3908098271155596, |
| "grad_norm": 0.9864776662717517, |
| "learning_rate": 9.850362399906598e-06, |
| "loss": 0.0904, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.39126478616924476, |
| "grad_norm": 0.6242730295284743, |
| "learning_rate": 9.850015146459857e-06, |
| "loss": 0.0754, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.39171974522292996, |
| "grad_norm": 0.838271649072902, |
| "learning_rate": 9.84966749669178e-06, |
| "loss": 0.0899, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3921747042766151, |
| "grad_norm": 0.6826448278617049, |
| "learning_rate": 9.849319450630777e-06, |
| "loss": 0.0698, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.39262966333030025, |
| "grad_norm": 0.5533993282250775, |
| "learning_rate": 9.848971008305288e-06, |
| "loss": 0.0688, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.39308462238398545, |
| "grad_norm": 0.838673412156409, |
| "learning_rate": 9.848622169743784e-06, |
| "loss": 0.0815, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.3935395814376706, |
| "grad_norm": 0.9783580500729582, |
| "learning_rate": 9.848272934974774e-06, |
| "loss": 0.0745, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3939945404913558, |
| "grad_norm": 0.5976030953641746, |
| "learning_rate": 9.847923304026793e-06, |
| "loss": 0.0664, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.39444949954504094, |
| "grad_norm": 0.6999143793652887, |
| "learning_rate": 9.847573276928415e-06, |
| "loss": 0.0804, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.39490445859872614, |
| "grad_norm": 0.6338725165728231, |
| "learning_rate": 9.847222853708239e-06, |
| "loss": 0.0655, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3953594176524113, |
| "grad_norm": 0.7010627446349382, |
| "learning_rate": 9.846872034394902e-06, |
| "loss": 0.0667, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.3958143767060964, |
| "grad_norm": 0.6173227181881447, |
| "learning_rate": 9.84652081901707e-06, |
| "loss": 0.0674, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3962693357597816, |
| "grad_norm": 0.9673042020268607, |
| "learning_rate": 9.846169207603443e-06, |
| "loss": 0.1267, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.39672429481346677, |
| "grad_norm": 0.6294912489479282, |
| "learning_rate": 9.845817200182755e-06, |
| "loss": 0.0588, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.39717925386715197, |
| "grad_norm": 0.8477152807126976, |
| "learning_rate": 9.845464796783767e-06, |
| "loss": 0.1219, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3976342129208371, |
| "grad_norm": 0.5887483684825674, |
| "learning_rate": 9.845111997435279e-06, |
| "loss": 0.0731, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.3980891719745223, |
| "grad_norm": 0.5630369277247907, |
| "learning_rate": 9.844758802166116e-06, |
| "loss": 0.0579, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.39854413102820746, |
| "grad_norm": 0.6717541815357567, |
| "learning_rate": 9.844405211005145e-06, |
| "loss": 0.0711, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3989990900818926, |
| "grad_norm": 0.6571828619535791, |
| "learning_rate": 9.844051223981258e-06, |
| "loss": 0.0638, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3994540491355778, |
| "grad_norm": 0.6723710552364174, |
| "learning_rate": 9.84369684112338e-06, |
| "loss": 0.0676, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.39990900818926295, |
| "grad_norm": 0.7014173744195523, |
| "learning_rate": 9.84334206246047e-06, |
| "loss": 0.0751, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.40036396724294815, |
| "grad_norm": 0.7999660318519703, |
| "learning_rate": 9.842986888021518e-06, |
| "loss": 0.0895, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4008189262966333, |
| "grad_norm": 0.5578605501955606, |
| "learning_rate": 9.842631317835548e-06, |
| "loss": 0.0637, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.4012738853503185, |
| "grad_norm": 0.6615256090849237, |
| "learning_rate": 9.842275351931617e-06, |
| "loss": 0.0664, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.40172884440400364, |
| "grad_norm": 0.5263094198672195, |
| "learning_rate": 9.841918990338812e-06, |
| "loss": 0.0611, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4021838034576888, |
| "grad_norm": 0.8080883575450535, |
| "learning_rate": 9.841562233086252e-06, |
| "loss": 0.0912, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.402638762511374, |
| "grad_norm": 0.6655757939327012, |
| "learning_rate": 9.841205080203092e-06, |
| "loss": 0.0601, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4030937215650591, |
| "grad_norm": 0.8701903481119097, |
| "learning_rate": 9.840847531718515e-06, |
| "loss": 0.0914, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4035486806187443, |
| "grad_norm": 0.7730206436987713, |
| "learning_rate": 9.840489587661738e-06, |
| "loss": 0.0747, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.40400363967242947, |
| "grad_norm": 0.7410839527981146, |
| "learning_rate": 9.840131248062012e-06, |
| "loss": 0.079, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.40445859872611467, |
| "grad_norm": 0.627620281196765, |
| "learning_rate": 9.839772512948618e-06, |
| "loss": 0.0715, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4049135577797998, |
| "grad_norm": 0.8746014124114054, |
| "learning_rate": 9.83941338235087e-06, |
| "loss": 0.0824, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.40536851683348496, |
| "grad_norm": 1.0112737589697485, |
| "learning_rate": 9.839053856298116e-06, |
| "loss": 0.1251, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.40582347588717016, |
| "grad_norm": 0.72216805525771, |
| "learning_rate": 9.838693934819734e-06, |
| "loss": 0.0893, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4062784349408553, |
| "grad_norm": 0.7544949830136005, |
| "learning_rate": 9.838333617945134e-06, |
| "loss": 0.0968, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4067333939945405, |
| "grad_norm": 0.9543024355165705, |
| "learning_rate": 9.837972905703762e-06, |
| "loss": 0.102, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.40718835304822565, |
| "grad_norm": 1.02061795078975, |
| "learning_rate": 9.83761179812509e-06, |
| "loss": 0.0649, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.40764331210191085, |
| "grad_norm": 0.39738812842187227, |
| "learning_rate": 9.837250295238629e-06, |
| "loss": 0.0428, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.408098271155596, |
| "grad_norm": 0.8873895570319217, |
| "learning_rate": 9.836888397073919e-06, |
| "loss": 0.1068, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.40855323020928114, |
| "grad_norm": 0.7492126364897504, |
| "learning_rate": 9.836526103660533e-06, |
| "loss": 0.0953, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.40900818926296634, |
| "grad_norm": 0.821575499525911, |
| "learning_rate": 9.836163415028075e-06, |
| "loss": 0.0712, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4094631483166515, |
| "grad_norm": 1.0052579979241618, |
| "learning_rate": 9.835800331206183e-06, |
| "loss": 0.1138, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4099181073703367, |
| "grad_norm": 0.7848465428804848, |
| "learning_rate": 9.835436852224525e-06, |
| "loss": 0.0978, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4103730664240218, |
| "grad_norm": 0.9719856735481065, |
| "learning_rate": 9.835072978112804e-06, |
| "loss": 0.0846, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.410828025477707, |
| "grad_norm": 0.6607308818506346, |
| "learning_rate": 9.834708708900755e-06, |
| "loss": 0.0654, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.41128298453139217, |
| "grad_norm": 0.5191597312034261, |
| "learning_rate": 9.834344044618144e-06, |
| "loss": 0.0518, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.41173794358507737, |
| "grad_norm": 0.5336391872354229, |
| "learning_rate": 9.83397898529477e-06, |
| "loss": 0.0535, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4121929026387625, |
| "grad_norm": 0.5687342550017563, |
| "learning_rate": 9.833613530960462e-06, |
| "loss": 0.0578, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.41264786169244766, |
| "grad_norm": 0.8793783198642894, |
| "learning_rate": 9.833247681645083e-06, |
| "loss": 0.1286, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.41310282074613286, |
| "grad_norm": 0.8073005899800644, |
| "learning_rate": 9.832881437378534e-06, |
| "loss": 0.0853, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.413557779799818, |
| "grad_norm": 0.511699500000588, |
| "learning_rate": 9.832514798190738e-06, |
| "loss": 0.0504, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4140127388535032, |
| "grad_norm": 0.5082793074725768, |
| "learning_rate": 9.832147764111655e-06, |
| "loss": 0.056, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.41446769790718835, |
| "grad_norm": 0.9876041013395295, |
| "learning_rate": 9.83178033517128e-06, |
| "loss": 0.0984, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.41492265696087355, |
| "grad_norm": 0.7511273129930924, |
| "learning_rate": 9.831412511399633e-06, |
| "loss": 0.0969, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4153776160145587, |
| "grad_norm": 1.0144870263760433, |
| "learning_rate": 9.831044292826778e-06, |
| "loss": 0.1482, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.41583257506824384, |
| "grad_norm": 0.70444400073401, |
| "learning_rate": 9.830675679482797e-06, |
| "loss": 0.0802, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.41628753412192904, |
| "grad_norm": 1.0357251397748677, |
| "learning_rate": 9.830306671397816e-06, |
| "loss": 0.1061, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.4167424931756142, |
| "grad_norm": 0.895894802940119, |
| "learning_rate": 9.829937268601988e-06, |
| "loss": 0.1005, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4171974522292994, |
| "grad_norm": 0.6004589977630954, |
| "learning_rate": 9.829567471125497e-06, |
| "loss": 0.0664, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4176524112829845, |
| "grad_norm": 0.6058859475834909, |
| "learning_rate": 9.829197278998562e-06, |
| "loss": 0.0728, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4181073703366697, |
| "grad_norm": 0.5886912548442098, |
| "learning_rate": 9.828826692251435e-06, |
| "loss": 0.074, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.41856232939035487, |
| "grad_norm": 0.5982473215332103, |
| "learning_rate": 9.828455710914398e-06, |
| "loss": 0.0653, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.41901728844404, |
| "grad_norm": 0.8647804622811079, |
| "learning_rate": 9.828084335017763e-06, |
| "loss": 0.0741, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4194722474977252, |
| "grad_norm": 0.653767178815679, |
| "learning_rate": 9.827712564591883e-06, |
| "loss": 0.0604, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.41992720655141036, |
| "grad_norm": 0.7812500085225947, |
| "learning_rate": 9.827340399667132e-06, |
| "loss": 0.0708, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.42038216560509556, |
| "grad_norm": 0.7314008563711142, |
| "learning_rate": 9.826967840273921e-06, |
| "loss": 0.0721, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4208371246587807, |
| "grad_norm": 0.8727413076803472, |
| "learning_rate": 9.8265948864427e-06, |
| "loss": 0.0892, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.4212920837124659, |
| "grad_norm": 0.6051379056710864, |
| "learning_rate": 9.826221538203942e-06, |
| "loss": 0.0685, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.42174704276615105, |
| "grad_norm": 0.7279887191787228, |
| "learning_rate": 9.825847795588154e-06, |
| "loss": 0.0766, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.4222020018198362, |
| "grad_norm": 0.7126811268305303, |
| "learning_rate": 9.825473658625876e-06, |
| "loss": 0.0821, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4226569608735214, |
| "grad_norm": 0.8812960827967533, |
| "learning_rate": 9.825099127347684e-06, |
| "loss": 0.0982, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.42311191992720654, |
| "grad_norm": 0.7462955906438729, |
| "learning_rate": 9.824724201784182e-06, |
| "loss": 0.1073, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.42356687898089174, |
| "grad_norm": 0.5448066050338419, |
| "learning_rate": 9.824348881966004e-06, |
| "loss": 0.0637, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4240218380345769, |
| "grad_norm": 0.7750150802923693, |
| "learning_rate": 9.823973167923823e-06, |
| "loss": 0.09, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4244767970882621, |
| "grad_norm": 0.8695175796556455, |
| "learning_rate": 9.82359705968834e-06, |
| "loss": 0.0857, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.4249317561419472, |
| "grad_norm": 0.653112477618241, |
| "learning_rate": 9.823220557290289e-06, |
| "loss": 0.0722, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.42538671519563237, |
| "grad_norm": 0.7764742726938813, |
| "learning_rate": 9.822843660760434e-06, |
| "loss": 0.0582, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.42584167424931757, |
| "grad_norm": 0.8338160462571067, |
| "learning_rate": 9.822466370129576e-06, |
| "loss": 0.0993, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.4262966333030027, |
| "grad_norm": 0.7416650975880095, |
| "learning_rate": 9.822088685428543e-06, |
| "loss": 0.0782, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.4267515923566879, |
| "grad_norm": 0.5969422348364739, |
| "learning_rate": 9.821710606688199e-06, |
| "loss": 0.0546, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.42720655141037306, |
| "grad_norm": 0.6235404067325917, |
| "learning_rate": 9.82133213393944e-06, |
| "loss": 0.0638, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.42766151046405826, |
| "grad_norm": 0.7910461101358781, |
| "learning_rate": 9.820953267213194e-06, |
| "loss": 0.0775, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4281164695177434, |
| "grad_norm": 0.692978452923811, |
| "learning_rate": 9.820574006540415e-06, |
| "loss": 0.053, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.7310389759017597, |
| "learning_rate": 9.820194351952098e-06, |
| "loss": 0.0716, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.42902638762511375, |
| "grad_norm": 0.6553331509390902, |
| "learning_rate": 9.819814303479268e-06, |
| "loss": 0.0612, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.4294813466787989, |
| "grad_norm": 1.1310076957610966, |
| "learning_rate": 9.819433861152978e-06, |
| "loss": 0.1116, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.4299363057324841, |
| "grad_norm": 0.6933766894953944, |
| "learning_rate": 9.819053025004316e-06, |
| "loss": 0.0932, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.43039126478616924, |
| "grad_norm": 0.7823571557493696, |
| "learning_rate": 9.818671795064405e-06, |
| "loss": 0.0847, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.43084622383985444, |
| "grad_norm": 0.8000794358590197, |
| "learning_rate": 9.818290171364396e-06, |
| "loss": 0.0916, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4313011828935396, |
| "grad_norm": 0.6207042654318157, |
| "learning_rate": 9.817908153935473e-06, |
| "loss": 0.0568, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4317561419472247, |
| "grad_norm": 0.7957970680354334, |
| "learning_rate": 9.817525742808854e-06, |
| "loss": 0.1203, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4322111010009099, |
| "grad_norm": 0.6607960765057979, |
| "learning_rate": 9.817142938015786e-06, |
| "loss": 0.069, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.43266606005459507, |
| "grad_norm": 0.8132102265727185, |
| "learning_rate": 9.816759739587552e-06, |
| "loss": 0.0821, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.43312101910828027, |
| "grad_norm": 0.6410149691778323, |
| "learning_rate": 9.816376147555464e-06, |
| "loss": 0.0612, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4335759781619654, |
| "grad_norm": 1.0196998859089288, |
| "learning_rate": 9.815992161950867e-06, |
| "loss": 0.1183, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.4340309372156506, |
| "grad_norm": 0.5899375116434804, |
| "learning_rate": 9.81560778280514e-06, |
| "loss": 0.0604, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.43448589626933576, |
| "grad_norm": 1.0046158107797931, |
| "learning_rate": 9.815223010149693e-06, |
| "loss": 0.0876, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4349408553230209, |
| "grad_norm": 0.7980339738331416, |
| "learning_rate": 9.814837844015966e-06, |
| "loss": 0.0894, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4353958143767061, |
| "grad_norm": 0.6974524248281853, |
| "learning_rate": 9.814452284435433e-06, |
| "loss": 0.0741, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.43585077343039125, |
| "grad_norm": 0.7679692797858835, |
| "learning_rate": 9.814066331439603e-06, |
| "loss": 0.0796, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.43630573248407645, |
| "grad_norm": 0.8183774417740679, |
| "learning_rate": 9.813679985060012e-06, |
| "loss": 0.0963, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.4367606915377616, |
| "grad_norm": 0.7950656053104391, |
| "learning_rate": 9.81329324532823e-06, |
| "loss": 0.0837, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4372156505914468, |
| "grad_norm": 0.6056809369995887, |
| "learning_rate": 9.812906112275862e-06, |
| "loss": 0.0465, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.43767060964513194, |
| "grad_norm": 1.0980359635620318, |
| "learning_rate": 9.81251858593454e-06, |
| "loss": 0.1206, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4381255686988171, |
| "grad_norm": 0.6123483237764059, |
| "learning_rate": 9.812130666335933e-06, |
| "loss": 0.08, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4385805277525023, |
| "grad_norm": 0.8151730014839008, |
| "learning_rate": 9.81174235351174e-06, |
| "loss": 0.0983, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.4390354868061874, |
| "grad_norm": 0.7143828681073273, |
| "learning_rate": 9.811353647493691e-06, |
| "loss": 0.0809, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.4394904458598726, |
| "grad_norm": 0.5647036962239634, |
| "learning_rate": 9.810964548313549e-06, |
| "loss": 0.0581, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.43994540491355777, |
| "grad_norm": 0.7594400506736699, |
| "learning_rate": 9.81057505600311e-06, |
| "loss": 0.078, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.44040036396724297, |
| "grad_norm": 0.6515426202345832, |
| "learning_rate": 9.810185170594205e-06, |
| "loss": 0.0688, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4408553230209281, |
| "grad_norm": 0.8798906332352223, |
| "learning_rate": 9.809794892118687e-06, |
| "loss": 0.0915, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.44131028207461326, |
| "grad_norm": 0.7350866900672135, |
| "learning_rate": 9.809404220608451e-06, |
| "loss": 0.0671, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.44176524112829846, |
| "grad_norm": 0.7216847217866104, |
| "learning_rate": 9.809013156095424e-06, |
| "loss": 0.0726, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.4422202001819836, |
| "grad_norm": 0.8179702740752783, |
| "learning_rate": 9.808621698611557e-06, |
| "loss": 0.0758, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.4426751592356688, |
| "grad_norm": 0.5533105745807706, |
| "learning_rate": 9.808229848188842e-06, |
| "loss": 0.0528, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.44313011828935395, |
| "grad_norm": 0.7503486538749657, |
| "learning_rate": 9.807837604859296e-06, |
| "loss": 0.0878, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.44358507734303915, |
| "grad_norm": 0.40510949005498975, |
| "learning_rate": 9.807444968654975e-06, |
| "loss": 0.0424, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4440400363967243, |
| "grad_norm": 0.8540666353042626, |
| "learning_rate": 9.807051939607959e-06, |
| "loss": 0.1108, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.44449499545040944, |
| "grad_norm": 0.7543284179304937, |
| "learning_rate": 9.806658517750369e-06, |
| "loss": 0.0719, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.44494995450409464, |
| "grad_norm": 0.6982493359241757, |
| "learning_rate": 9.80626470311435e-06, |
| "loss": 0.0777, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.4454049135577798, |
| "grad_norm": 0.7275511253894157, |
| "learning_rate": 9.805870495732085e-06, |
| "loss": 0.0693, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.445859872611465, |
| "grad_norm": 0.8647890459895436, |
| "learning_rate": 9.805475895635787e-06, |
| "loss": 0.0882, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4463148316651501, |
| "grad_norm": 0.757804762973183, |
| "learning_rate": 9.8050809028577e-06, |
| "loss": 0.0724, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.4467697907188353, |
| "grad_norm": 0.7515219153063712, |
| "learning_rate": 9.8046855174301e-06, |
| "loss": 0.0659, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.44722474977252047, |
| "grad_norm": 1.0502681583017184, |
| "learning_rate": 9.804289739385297e-06, |
| "loss": 0.1207, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.44767970882620567, |
| "grad_norm": 0.5780062486364612, |
| "learning_rate": 9.803893568755633e-06, |
| "loss": 0.0772, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.4481346678798908, |
| "grad_norm": 0.5515644567052078, |
| "learning_rate": 9.80349700557348e-06, |
| "loss": 0.0628, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.44858962693357596, |
| "grad_norm": 0.6432677095504179, |
| "learning_rate": 9.803100049871246e-06, |
| "loss": 0.0817, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.44904458598726116, |
| "grad_norm": 0.5424958391196154, |
| "learning_rate": 9.802702701681366e-06, |
| "loss": 0.0649, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.4494995450409463, |
| "grad_norm": 0.6556126282036931, |
| "learning_rate": 9.80230496103631e-06, |
| "loss": 0.0579, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.4499545040946315, |
| "grad_norm": 0.5632646083130022, |
| "learning_rate": 9.801906827968578e-06, |
| "loss": 0.0591, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.45040946314831665, |
| "grad_norm": 1.0464719217252296, |
| "learning_rate": 9.801508302510707e-06, |
| "loss": 0.124, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.45086442220200185, |
| "grad_norm": 0.7231067459050019, |
| "learning_rate": 9.801109384695261e-06, |
| "loss": 0.0631, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.451319381255687, |
| "grad_norm": 0.775594128230074, |
| "learning_rate": 9.800710074554837e-06, |
| "loss": 0.0924, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.45177434030937214, |
| "grad_norm": 0.6340180385643369, |
| "learning_rate": 9.800310372122066e-06, |
| "loss": 0.068, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.45222929936305734, |
| "grad_norm": 0.9703750136380557, |
| "learning_rate": 9.799910277429609e-06, |
| "loss": 0.0902, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4526842584167425, |
| "grad_norm": 0.5881925827197537, |
| "learning_rate": 9.79950979051016e-06, |
| "loss": 0.0662, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4531392174704277, |
| "grad_norm": 0.7583235380843109, |
| "learning_rate": 9.799108911396446e-06, |
| "loss": 0.0755, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.4535941765241128, |
| "grad_norm": 0.6585135755735663, |
| "learning_rate": 9.798707640121224e-06, |
| "loss": 0.0669, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.454049135577798, |
| "grad_norm": 0.9344579240939844, |
| "learning_rate": 9.798305976717286e-06, |
| "loss": 0.1028, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.45450409463148317, |
| "grad_norm": 0.6238360425747993, |
| "learning_rate": 9.79790392121745e-06, |
| "loss": 0.0608, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.4549590536851683, |
| "grad_norm": 0.715680092291253, |
| "learning_rate": 9.797501473654573e-06, |
| "loss": 0.0792, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4554140127388535, |
| "grad_norm": 0.8167758856821831, |
| "learning_rate": 9.797098634061543e-06, |
| "loss": 0.0948, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.45586897179253866, |
| "grad_norm": 0.8318764431867516, |
| "learning_rate": 9.796695402471275e-06, |
| "loss": 0.0967, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.45632393084622386, |
| "grad_norm": 0.9700547030363569, |
| "learning_rate": 9.79629177891672e-06, |
| "loss": 0.1138, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.456778889899909, |
| "grad_norm": 0.7702596501705347, |
| "learning_rate": 9.79588776343086e-06, |
| "loss": 0.0826, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.4572338489535942, |
| "grad_norm": 0.833778163717652, |
| "learning_rate": 9.795483356046711e-06, |
| "loss": 0.0927, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.45768880800727935, |
| "grad_norm": 0.7006737675801851, |
| "learning_rate": 9.795078556797318e-06, |
| "loss": 0.0747, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.4581437670609645, |
| "grad_norm": 0.8810114143185821, |
| "learning_rate": 9.794673365715761e-06, |
| "loss": 0.0921, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4585987261146497, |
| "grad_norm": 0.7286145380478113, |
| "learning_rate": 9.794267782835148e-06, |
| "loss": 0.0832, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.45905368516833484, |
| "grad_norm": 0.8181887559127218, |
| "learning_rate": 9.793861808188622e-06, |
| "loss": 0.0729, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.45950864422202004, |
| "grad_norm": 1.0821839097582124, |
| "learning_rate": 9.793455441809359e-06, |
| "loss": 0.1025, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4599636032757052, |
| "grad_norm": 0.515896949523265, |
| "learning_rate": 9.793048683730564e-06, |
| "loss": 0.0512, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.4604185623293904, |
| "grad_norm": 0.7800604571516774, |
| "learning_rate": 9.792641533985474e-06, |
| "loss": 0.1065, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4608735213830755, |
| "grad_norm": 0.48365424866268936, |
| "learning_rate": 9.792233992607365e-06, |
| "loss": 0.0622, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.46132848043676067, |
| "grad_norm": 0.8472876133123602, |
| "learning_rate": 9.791826059629532e-06, |
| "loss": 0.0713, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.46178343949044587, |
| "grad_norm": 0.935522534168844, |
| "learning_rate": 9.791417735085316e-06, |
| "loss": 0.0853, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.462238398544131, |
| "grad_norm": 0.8028819334602026, |
| "learning_rate": 9.791009019008078e-06, |
| "loss": 0.0795, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4626933575978162, |
| "grad_norm": 0.6458928385673616, |
| "learning_rate": 9.79059991143122e-06, |
| "loss": 0.0836, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.46314831665150136, |
| "grad_norm": 0.8309912415690437, |
| "learning_rate": 9.790190412388173e-06, |
| "loss": 0.0895, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.46360327570518656, |
| "grad_norm": 0.6953691809158898, |
| "learning_rate": 9.789780521912396e-06, |
| "loss": 0.0686, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.4640582347588717, |
| "grad_norm": 0.7563151979586233, |
| "learning_rate": 9.789370240037385e-06, |
| "loss": 0.0879, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.46451319381255685, |
| "grad_norm": 0.6646619102460968, |
| "learning_rate": 9.788959566796667e-06, |
| "loss": 0.0761, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.46496815286624205, |
| "grad_norm": 0.8092527562913561, |
| "learning_rate": 9.788548502223801e-06, |
| "loss": 0.0863, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4654231119199272, |
| "grad_norm": 2.0284506817542396, |
| "learning_rate": 9.788137046352374e-06, |
| "loss": 0.2011, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4658780709736124, |
| "grad_norm": 0.6524644993097855, |
| "learning_rate": 9.787725199216011e-06, |
| "loss": 0.0765, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.46633303002729753, |
| "grad_norm": 0.48134373932870766, |
| "learning_rate": 9.787312960848368e-06, |
| "loss": 0.0505, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.46678798908098273, |
| "grad_norm": 0.6646547386252114, |
| "learning_rate": 9.786900331283128e-06, |
| "loss": 0.0825, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.4672429481346679, |
| "grad_norm": 0.5655812014606527, |
| "learning_rate": 9.78648731055401e-06, |
| "loss": 0.0659, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.467697907188353, |
| "grad_norm": 0.680196435092224, |
| "learning_rate": 9.786073898694766e-06, |
| "loss": 0.0734, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.4681528662420382, |
| "grad_norm": 0.6198434008496165, |
| "learning_rate": 9.785660095739176e-06, |
| "loss": 0.0687, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.46860782529572337, |
| "grad_norm": 0.5967309034966486, |
| "learning_rate": 9.785245901721054e-06, |
| "loss": 0.0443, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.46906278434940857, |
| "grad_norm": 0.588565790719301, |
| "learning_rate": 9.784831316674246e-06, |
| "loss": 0.0741, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.4695177434030937, |
| "grad_norm": 0.6384508627867143, |
| "learning_rate": 9.784416340632634e-06, |
| "loss": 0.0639, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4699727024567789, |
| "grad_norm": 0.528980291125106, |
| "learning_rate": 9.784000973630124e-06, |
| "loss": 0.0506, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.47042766151046406, |
| "grad_norm": 0.6297922247581061, |
| "learning_rate": 9.783585215700656e-06, |
| "loss": 0.0704, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.4708826205641492, |
| "grad_norm": 1.1014615381108162, |
| "learning_rate": 9.783169066878208e-06, |
| "loss": 0.1063, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.4713375796178344, |
| "grad_norm": 0.7370811970547196, |
| "learning_rate": 9.782752527196785e-06, |
| "loss": 0.0888, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.47179253867151955, |
| "grad_norm": 0.6272964856361817, |
| "learning_rate": 9.782335596690425e-06, |
| "loss": 0.0683, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.47224749772520475, |
| "grad_norm": 0.9675945822898259, |
| "learning_rate": 9.781918275393196e-06, |
| "loss": 0.1031, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4727024567788899, |
| "grad_norm": 0.8448129794628584, |
| "learning_rate": 9.781500563339202e-06, |
| "loss": 0.0818, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.4731574158325751, |
| "grad_norm": 0.5148120993988892, |
| "learning_rate": 9.781082460562574e-06, |
| "loss": 0.0525, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.47361237488626023, |
| "grad_norm": 0.7767251927940846, |
| "learning_rate": 9.780663967097477e-06, |
| "loss": 0.0869, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.4740673339399454, |
| "grad_norm": 0.9661754574144388, |
| "learning_rate": 9.780245082978112e-06, |
| "loss": 0.0923, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.4745222929936306, |
| "grad_norm": 0.780061387882855, |
| "learning_rate": 9.779825808238705e-06, |
| "loss": 0.095, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.4749772520473157, |
| "grad_norm": 0.8513172657519864, |
| "learning_rate": 9.77940614291352e-06, |
| "loss": 0.0772, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.4754322111010009, |
| "grad_norm": 0.6199453465731616, |
| "learning_rate": 9.778986087036846e-06, |
| "loss": 0.0701, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.47588717015468607, |
| "grad_norm": 0.5327629714743946, |
| "learning_rate": 9.778565640643011e-06, |
| "loss": 0.0447, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.47634212920837127, |
| "grad_norm": 0.8882337205809296, |
| "learning_rate": 9.778144803766375e-06, |
| "loss": 0.0788, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4767970882620564, |
| "grad_norm": 0.6023343672839219, |
| "learning_rate": 9.77772357644132e-06, |
| "loss": 0.0693, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.47725204731574156, |
| "grad_norm": 0.8031515985448552, |
| "learning_rate": 9.777301958702273e-06, |
| "loss": 0.0911, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.47770700636942676, |
| "grad_norm": 0.8695877166802147, |
| "learning_rate": 9.776879950583683e-06, |
| "loss": 0.12, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4781619654231119, |
| "grad_norm": 0.6077253389668626, |
| "learning_rate": 9.776457552120034e-06, |
| "loss": 0.0722, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.4786169244767971, |
| "grad_norm": 0.7976020915977983, |
| "learning_rate": 9.776034763345845e-06, |
| "loss": 0.0783, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.47907188353048225, |
| "grad_norm": 0.7091049596783572, |
| "learning_rate": 9.775611584295663e-06, |
| "loss": 0.0739, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.47952684258416745, |
| "grad_norm": 0.7919907245184465, |
| "learning_rate": 9.775188015004072e-06, |
| "loss": 0.0728, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4799818016378526, |
| "grad_norm": 0.9227645018819045, |
| "learning_rate": 9.774764055505676e-06, |
| "loss": 0.0905, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.48043676069153773, |
| "grad_norm": 0.7130315690029604, |
| "learning_rate": 9.774339705835127e-06, |
| "loss": 0.09, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.48089171974522293, |
| "grad_norm": 0.7993270676292756, |
| "learning_rate": 9.773914966027098e-06, |
| "loss": 0.1011, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.4813466787989081, |
| "grad_norm": 0.8955668988276211, |
| "learning_rate": 9.773489836116297e-06, |
| "loss": 0.0963, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.4818016378525933, |
| "grad_norm": 0.7582155580680914, |
| "learning_rate": 9.773064316137464e-06, |
| "loss": 0.0766, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4822565969062784, |
| "grad_norm": 0.6939955066308027, |
| "learning_rate": 9.772638406125367e-06, |
| "loss": 0.0687, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4827115559599636, |
| "grad_norm": 0.8091635860789653, |
| "learning_rate": 9.772212106114816e-06, |
| "loss": 0.0754, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.48316651501364877, |
| "grad_norm": 0.8236012040739623, |
| "learning_rate": 9.77178541614064e-06, |
| "loss": 0.0951, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.48362147406733397, |
| "grad_norm": 0.6622501946117725, |
| "learning_rate": 9.77135833623771e-06, |
| "loss": 0.083, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.4840764331210191, |
| "grad_norm": 0.8689743387052602, |
| "learning_rate": 9.770930866440927e-06, |
| "loss": 0.1074, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.48453139217470426, |
| "grad_norm": 0.6733750246744147, |
| "learning_rate": 9.770503006785214e-06, |
| "loss": 0.0639, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.48498635122838946, |
| "grad_norm": 0.9485233745498586, |
| "learning_rate": 9.770074757305541e-06, |
| "loss": 0.1106, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.4854413102820746, |
| "grad_norm": 0.8288392949652397, |
| "learning_rate": 9.769646118036902e-06, |
| "loss": 0.0661, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4858962693357598, |
| "grad_norm": 0.7475423805914638, |
| "learning_rate": 9.76921708901432e-06, |
| "loss": 0.0686, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.48635122838944495, |
| "grad_norm": 0.54120364671088, |
| "learning_rate": 9.768787670272855e-06, |
| "loss": 0.0629, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.48680618744313015, |
| "grad_norm": 0.7281619635509152, |
| "learning_rate": 9.768357861847598e-06, |
| "loss": 0.0723, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4872611464968153, |
| "grad_norm": 0.8883321717067604, |
| "learning_rate": 9.767927663773668e-06, |
| "loss": 0.0832, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.48771610555050043, |
| "grad_norm": 0.7681469789077073, |
| "learning_rate": 9.767497076086223e-06, |
| "loss": 0.0786, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.48817106460418563, |
| "grad_norm": 0.6590861395931087, |
| "learning_rate": 9.767066098820446e-06, |
| "loss": 0.0704, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.4886260236578708, |
| "grad_norm": 0.7944203702948146, |
| "learning_rate": 9.766634732011557e-06, |
| "loss": 0.0867, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.489080982711556, |
| "grad_norm": 0.7832480468570255, |
| "learning_rate": 9.766202975694801e-06, |
| "loss": 0.0873, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4895359417652411, |
| "grad_norm": 0.7232266679451883, |
| "learning_rate": 9.765770829905464e-06, |
| "loss": 0.0785, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.4899909008189263, |
| "grad_norm": 0.5406798309730716, |
| "learning_rate": 9.765338294678856e-06, |
| "loss": 0.0469, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.49044585987261147, |
| "grad_norm": 0.5866548164219128, |
| "learning_rate": 9.764905370050321e-06, |
| "loss": 0.0524, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.4909008189262966, |
| "grad_norm": 0.9915720236606885, |
| "learning_rate": 9.76447205605524e-06, |
| "loss": 0.1019, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.4913557779799818, |
| "grad_norm": 0.6838845303274752, |
| "learning_rate": 9.764038352729018e-06, |
| "loss": 0.0891, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.49181073703366696, |
| "grad_norm": 0.9385660559352969, |
| "learning_rate": 9.763604260107096e-06, |
| "loss": 0.1058, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.49226569608735216, |
| "grad_norm": 0.6710872617569944, |
| "learning_rate": 9.763169778224946e-06, |
| "loss": 0.0665, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.4927206551410373, |
| "grad_norm": 0.7878885609137168, |
| "learning_rate": 9.762734907118072e-06, |
| "loss": 0.0876, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4931756141947225, |
| "grad_norm": 0.6302166766090778, |
| "learning_rate": 9.76229964682201e-06, |
| "loss": 0.0507, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.49363057324840764, |
| "grad_norm": 0.5833462678864086, |
| "learning_rate": 9.761863997372325e-06, |
| "loss": 0.0612, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4940855323020928, |
| "grad_norm": 1.036522158484448, |
| "learning_rate": 9.761427958804621e-06, |
| "loss": 0.1395, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.494540491355778, |
| "grad_norm": 1.1502320115946314, |
| "learning_rate": 9.760991531154526e-06, |
| "loss": 0.1149, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.49499545040946313, |
| "grad_norm": 0.7616054217825209, |
| "learning_rate": 9.760554714457704e-06, |
| "loss": 0.0684, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.49545040946314833, |
| "grad_norm": 0.5129309167340426, |
| "learning_rate": 9.760117508749846e-06, |
| "loss": 0.0614, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4959053685168335, |
| "grad_norm": 0.7147170789642256, |
| "learning_rate": 9.759679914066686e-06, |
| "loss": 0.0842, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4963603275705187, |
| "grad_norm": 0.7513123367978354, |
| "learning_rate": 9.759241930443975e-06, |
| "loss": 0.0749, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4968152866242038, |
| "grad_norm": 0.5462870672862663, |
| "learning_rate": 9.75880355791751e-06, |
| "loss": 0.0588, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.49727024567788897, |
| "grad_norm": 0.6158644897786469, |
| "learning_rate": 9.758364796523105e-06, |
| "loss": 0.0578, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.49772520473157417, |
| "grad_norm": 0.5248367448810554, |
| "learning_rate": 9.757925646296617e-06, |
| "loss": 0.0504, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.4981801637852593, |
| "grad_norm": 0.7801307646100064, |
| "learning_rate": 9.757486107273935e-06, |
| "loss": 0.0819, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.4986351228389445, |
| "grad_norm": 0.6822936325355138, |
| "learning_rate": 9.75704617949097e-06, |
| "loss": 0.0828, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.49909008189262966, |
| "grad_norm": 0.49379397863131413, |
| "learning_rate": 9.756605862983675e-06, |
| "loss": 0.0606, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.49954504094631486, |
| "grad_norm": 0.5236513133369656, |
| "learning_rate": 9.756165157788029e-06, |
| "loss": 0.0493, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.7323812225903658, |
| "learning_rate": 9.755724063940047e-06, |
| "loss": 0.0794, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5004549590536852, |
| "grad_norm": 0.853156508842135, |
| "learning_rate": 9.755282581475769e-06, |
| "loss": 0.08, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5009099181073703, |
| "grad_norm": 0.7117091061791435, |
| "learning_rate": 9.754840710431274e-06, |
| "loss": 0.0773, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5013648771610555, |
| "grad_norm": 0.9350752111669145, |
| "learning_rate": 9.754398450842668e-06, |
| "loss": 0.1046, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5018198362147407, |
| "grad_norm": 0.8834833642233855, |
| "learning_rate": 9.753955802746091e-06, |
| "loss": 0.1284, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5022747952684259, |
| "grad_norm": 0.9022387216275947, |
| "learning_rate": 9.753512766177717e-06, |
| "loss": 0.0898, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.502729754322111, |
| "grad_norm": 0.551248880180483, |
| "learning_rate": 9.753069341173745e-06, |
| "loss": 0.0596, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5031847133757962, |
| "grad_norm": 0.5970423480352659, |
| "learning_rate": 9.752625527770409e-06, |
| "loss": 0.0723, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5036396724294814, |
| "grad_norm": 0.7620108531589319, |
| "learning_rate": 9.75218132600398e-06, |
| "loss": 0.0856, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5040946314831665, |
| "grad_norm": 0.7720887684681512, |
| "learning_rate": 9.751736735910753e-06, |
| "loss": 0.0904, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5045495905368517, |
| "grad_norm": 0.8672659681858957, |
| "learning_rate": 9.75129175752706e-06, |
| "loss": 0.1043, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.5050045495905369, |
| "grad_norm": 0.7511079874116621, |
| "learning_rate": 9.75084639088926e-06, |
| "loss": 0.0719, |
| "step": 1110 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 555, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7279902056448.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|