| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.25250227479526843, | |
| "eval_steps": 500, | |
| "global_step": 555, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00045495905368516835, | |
| "grad_norm": 9.461428161462043, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1263, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0009099181073703367, | |
| "grad_norm": 5.190780450250769, | |
| "learning_rate": 9.99999979571129e-06, | |
| "loss": 0.1723, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.001364877161055505, | |
| "grad_norm": 7.521926017130347, | |
| "learning_rate": 9.999999182845177e-06, | |
| "loss": 0.1327, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0018198362147406734, | |
| "grad_norm": 2.5665810200307217, | |
| "learning_rate": 9.99999816140171e-06, | |
| "loss": 0.1095, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0022747952684258415, | |
| "grad_norm": 2.738508706395883, | |
| "learning_rate": 9.999996731380973e-06, | |
| "loss": 0.1151, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00272975432211101, | |
| "grad_norm": 2.67941899677245, | |
| "learning_rate": 9.999994892783083e-06, | |
| "loss": 0.0821, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0031847133757961785, | |
| "grad_norm": 2.137586234420784, | |
| "learning_rate": 9.99999264560819e-06, | |
| "loss": 0.0729, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.003639672429481347, | |
| "grad_norm": 2.8221590420989164, | |
| "learning_rate": 9.999989989856477e-06, | |
| "loss": 0.0929, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.004094631483166515, | |
| "grad_norm": 1.6167314639784554, | |
| "learning_rate": 9.999986925528164e-06, | |
| "loss": 0.0466, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.004549590536851683, | |
| "grad_norm": 2.1773262431631313, | |
| "learning_rate": 9.999983452623498e-06, | |
| "loss": 0.0709, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005004549590536852, | |
| "grad_norm": 7.6444390817806465, | |
| "learning_rate": 9.999979571142765e-06, | |
| "loss": 0.0809, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.00545950864422202, | |
| "grad_norm": 2.034523884241798, | |
| "learning_rate": 9.999975281086278e-06, | |
| "loss": 0.0839, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.005914467697907188, | |
| "grad_norm": 3.576108282005355, | |
| "learning_rate": 9.999970582454392e-06, | |
| "loss": 0.0728, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.006369426751592357, | |
| "grad_norm": 2.623641566468802, | |
| "learning_rate": 9.999965475247491e-06, | |
| "loss": 0.1052, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.006824385805277525, | |
| "grad_norm": 2.1413574998269085, | |
| "learning_rate": 9.99995995946599e-06, | |
| "loss": 0.0885, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.007279344858962694, | |
| "grad_norm": 1.4859066724415246, | |
| "learning_rate": 9.999954035110342e-06, | |
| "loss": 0.0644, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0077343039126478615, | |
| "grad_norm": 2.851793157608408, | |
| "learning_rate": 9.999947702181027e-06, | |
| "loss": 0.1057, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.00818926296633303, | |
| "grad_norm": 4.693829546662477, | |
| "learning_rate": 9.999940960678568e-06, | |
| "loss": 0.0867, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.008644222020018199, | |
| "grad_norm": 2.2728033563417362, | |
| "learning_rate": 9.999933810603513e-06, | |
| "loss": 0.0789, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.009099181073703366, | |
| "grad_norm": 1.6705986173507794, | |
| "learning_rate": 9.999926251956447e-06, | |
| "loss": 0.0683, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009554140127388535, | |
| "grad_norm": 2.187579869114393, | |
| "learning_rate": 9.999918284737986e-06, | |
| "loss": 0.0984, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.010009099181073703, | |
| "grad_norm": 2.328040268012338, | |
| "learning_rate": 9.999909908948782e-06, | |
| "loss": 0.0699, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.010464058234758872, | |
| "grad_norm": 5.572389775693198, | |
| "learning_rate": 9.999901124589519e-06, | |
| "loss": 0.0912, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.01091901728844404, | |
| "grad_norm": 1.84796719674859, | |
| "learning_rate": 9.999891931660916e-06, | |
| "loss": 0.1015, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.011373976342129208, | |
| "grad_norm": 1.7501762990792236, | |
| "learning_rate": 9.999882330163725e-06, | |
| "loss": 0.0909, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.011828935395814377, | |
| "grad_norm": 0.9922115950592263, | |
| "learning_rate": 9.999872320098729e-06, | |
| "loss": 0.0656, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.012283894449499545, | |
| "grad_norm": 1.5612370560987539, | |
| "learning_rate": 9.999861901466746e-06, | |
| "loss": 0.0974, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.012738853503184714, | |
| "grad_norm": 1.4617271794930395, | |
| "learning_rate": 9.999851074268625e-06, | |
| "loss": 0.0853, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.013193812556869881, | |
| "grad_norm": 1.8127085104491556, | |
| "learning_rate": 9.999839838505257e-06, | |
| "loss": 0.1081, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.01364877161055505, | |
| "grad_norm": 1.4710105512612208, | |
| "learning_rate": 9.999828194177555e-06, | |
| "loss": 0.0868, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014103730664240218, | |
| "grad_norm": 1.3474487189311888, | |
| "learning_rate": 9.999816141286472e-06, | |
| "loss": 0.0817, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.014558689717925387, | |
| "grad_norm": 1.0967596652549403, | |
| "learning_rate": 9.99980367983299e-06, | |
| "loss": 0.0637, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.015013648771610554, | |
| "grad_norm": 3.179425671823194, | |
| "learning_rate": 9.999790809818134e-06, | |
| "loss": 0.069, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.015468607825295723, | |
| "grad_norm": 4.482257681577152, | |
| "learning_rate": 9.999777531242951e-06, | |
| "loss": 0.0915, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01592356687898089, | |
| "grad_norm": 3.953299040475791, | |
| "learning_rate": 9.999763844108528e-06, | |
| "loss": 0.0562, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01637852593266606, | |
| "grad_norm": 1.1127201050382067, | |
| "learning_rate": 9.999749748415982e-06, | |
| "loss": 0.0556, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01683348498635123, | |
| "grad_norm": 79.45756094624792, | |
| "learning_rate": 9.999735244166464e-06, | |
| "loss": 0.1223, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.017288444040036398, | |
| "grad_norm": 2777.9092912017113, | |
| "learning_rate": 9.99972033136116e-06, | |
| "loss": 0.3211, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.017743403093721567, | |
| "grad_norm": 2.5204693177238466, | |
| "learning_rate": 9.999705010001291e-06, | |
| "loss": 0.0723, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.018198362147406732, | |
| "grad_norm": 2.2975907071135655, | |
| "learning_rate": 9.999689280088105e-06, | |
| "loss": 0.0696, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0186533212010919, | |
| "grad_norm": 2.998434349074003, | |
| "learning_rate": 9.99967314162289e-06, | |
| "loss": 0.083, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.01910828025477707, | |
| "grad_norm": 3.882239448575704, | |
| "learning_rate": 9.999656594606966e-06, | |
| "loss": 0.1015, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.019563239308462238, | |
| "grad_norm": 3.5286596480512493, | |
| "learning_rate": 9.999639639041681e-06, | |
| "loss": 0.0817, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.020018198362147407, | |
| "grad_norm": 1.6933989447443707, | |
| "learning_rate": 9.999622274928424e-06, | |
| "loss": 0.1003, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.020473157415832575, | |
| "grad_norm": 1.2483160046323276, | |
| "learning_rate": 9.999604502268614e-06, | |
| "loss": 0.0952, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.020928116469517744, | |
| "grad_norm": 0.9417906124383243, | |
| "learning_rate": 9.9995863210637e-06, | |
| "loss": 0.0731, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.021383075523202913, | |
| "grad_norm": 2.8195414757816897, | |
| "learning_rate": 9.99956773131517e-06, | |
| "loss": 0.1845, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.02183803457688808, | |
| "grad_norm": 2.74390379471345, | |
| "learning_rate": 9.999548733024545e-06, | |
| "loss": 0.1826, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.022292993630573247, | |
| "grad_norm": 1.5138494619527987, | |
| "learning_rate": 9.999529326193373e-06, | |
| "loss": 0.0857, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.022747952684258416, | |
| "grad_norm": 1.215379974181271, | |
| "learning_rate": 9.999509510823242e-06, | |
| "loss": 0.0686, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.023202911737943584, | |
| "grad_norm": 1.292187967807859, | |
| "learning_rate": 9.999489286915773e-06, | |
| "loss": 0.0707, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.023657870791628753, | |
| "grad_norm": 1.7888013203563982, | |
| "learning_rate": 9.999468654472614e-06, | |
| "loss": 0.0682, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.024112829845313922, | |
| "grad_norm": 0.8979425621703144, | |
| "learning_rate": 9.999447613495457e-06, | |
| "loss": 0.0508, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.02456778889899909, | |
| "grad_norm": 1.9123835444775663, | |
| "learning_rate": 9.99942616398602e-06, | |
| "loss": 0.0689, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.02502274795268426, | |
| "grad_norm": 0.9393581994096443, | |
| "learning_rate": 9.99940430594605e-06, | |
| "loss": 0.0496, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.025477707006369428, | |
| "grad_norm": 1.0234476513644222, | |
| "learning_rate": 9.999382039377339e-06, | |
| "loss": 0.0601, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.025932666060054597, | |
| "grad_norm": 0.9291387208138827, | |
| "learning_rate": 9.999359364281704e-06, | |
| "loss": 0.0377, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.026387625113739762, | |
| "grad_norm": 1.8209170803663992, | |
| "learning_rate": 9.999336280660999e-06, | |
| "loss": 0.1144, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.02684258416742493, | |
| "grad_norm": 1.1214625046464874, | |
| "learning_rate": 9.99931278851711e-06, | |
| "loss": 0.0622, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0272975432211101, | |
| "grad_norm": 1.0331723997917317, | |
| "learning_rate": 9.999288887851956e-06, | |
| "loss": 0.0667, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.027752502274795268, | |
| "grad_norm": 1.0412381501406744, | |
| "learning_rate": 9.999264578667493e-06, | |
| "loss": 0.0566, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.028207461328480437, | |
| "grad_norm": 1.4510603110658047, | |
| "learning_rate": 9.999239860965703e-06, | |
| "loss": 0.0845, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.028662420382165606, | |
| "grad_norm": 1.301162540669183, | |
| "learning_rate": 9.999214734748609e-06, | |
| "loss": 0.0759, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.029117379435850774, | |
| "grad_norm": 0.9977688847603402, | |
| "learning_rate": 9.999189200018263e-06, | |
| "loss": 0.0528, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.029572338489535943, | |
| "grad_norm": 1.2894688842348854, | |
| "learning_rate": 9.99916325677675e-06, | |
| "loss": 0.0899, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03002729754322111, | |
| "grad_norm": 1.4627871680702638, | |
| "learning_rate": 9.999136905026194e-06, | |
| "loss": 0.1456, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.030482256596906277, | |
| "grad_norm": 1.2304385710214434, | |
| "learning_rate": 9.999110144768745e-06, | |
| "loss": 0.079, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.030937215650591446, | |
| "grad_norm": 1.085016380732753, | |
| "learning_rate": 9.99908297600659e-06, | |
| "loss": 0.0696, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.03139217470427662, | |
| "grad_norm": 0.989450558642297, | |
| "learning_rate": 9.99905539874195e-06, | |
| "loss": 0.069, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.03184713375796178, | |
| "grad_norm": 1.0510491151133208, | |
| "learning_rate": 9.99902741297708e-06, | |
| "loss": 0.0555, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03230209281164695, | |
| "grad_norm": 0.8938033562648371, | |
| "learning_rate": 9.998999018714264e-06, | |
| "loss": 0.0783, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.03275705186533212, | |
| "grad_norm": 2.902512108322722, | |
| "learning_rate": 9.998970215955824e-06, | |
| "loss": 0.0702, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.033212010919017286, | |
| "grad_norm": 0.7661831894133686, | |
| "learning_rate": 9.998941004704113e-06, | |
| "loss": 0.0519, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.03366696997270246, | |
| "grad_norm": 1.1047249497744047, | |
| "learning_rate": 9.998911384961518e-06, | |
| "loss": 0.0773, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.034121929026387623, | |
| "grad_norm": 0.7750047299312716, | |
| "learning_rate": 9.998881356730458e-06, | |
| "loss": 0.0598, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.034576888080072796, | |
| "grad_norm": 0.9815801555720315, | |
| "learning_rate": 9.99885092001339e-06, | |
| "loss": 0.0661, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.03503184713375796, | |
| "grad_norm": 1.3090963451351905, | |
| "learning_rate": 9.998820074812799e-06, | |
| "loss": 0.0713, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.03548680618744313, | |
| "grad_norm": 1.1489338732270693, | |
| "learning_rate": 9.998788821131207e-06, | |
| "loss": 0.0946, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0359417652411283, | |
| "grad_norm": 0.9040381990998293, | |
| "learning_rate": 9.998757158971164e-06, | |
| "loss": 0.067, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.036396724294813464, | |
| "grad_norm": 1.1019926198229115, | |
| "learning_rate": 9.998725088335263e-06, | |
| "loss": 0.0874, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.036851683348498636, | |
| "grad_norm": 0.5779852750462403, | |
| "learning_rate": 9.99869260922612e-06, | |
| "loss": 0.0492, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.0373066424021838, | |
| "grad_norm": 1.2769852710418472, | |
| "learning_rate": 9.998659721646393e-06, | |
| "loss": 0.0781, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.03776160145586897, | |
| "grad_norm": 0.9020624084974485, | |
| "learning_rate": 9.998626425598766e-06, | |
| "loss": 0.0734, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.03821656050955414, | |
| "grad_norm": 0.9626764462141776, | |
| "learning_rate": 9.99859272108596e-06, | |
| "loss": 0.0719, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.03867151956323931, | |
| "grad_norm": 0.9435885887029873, | |
| "learning_rate": 9.998558608110733e-06, | |
| "loss": 0.0835, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.039126478616924476, | |
| "grad_norm": 1.0578725525123687, | |
| "learning_rate": 9.998524086675867e-06, | |
| "loss": 0.0746, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.03958143767060965, | |
| "grad_norm": 1.0366588534208079, | |
| "learning_rate": 9.998489156784188e-06, | |
| "loss": 0.0933, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.040036396724294813, | |
| "grad_norm": 1.0595948680723846, | |
| "learning_rate": 9.998453818438547e-06, | |
| "loss": 0.0846, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.04049135577797998, | |
| "grad_norm": 0.8807515753016749, | |
| "learning_rate": 9.998418071641833e-06, | |
| "loss": 0.0649, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.04094631483166515, | |
| "grad_norm": 0.9034225145874141, | |
| "learning_rate": 9.998381916396967e-06, | |
| "loss": 0.0621, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.041401273885350316, | |
| "grad_norm": 0.6732889821553815, | |
| "learning_rate": 9.998345352706901e-06, | |
| "loss": 0.0367, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.04185623293903549, | |
| "grad_norm": 0.7136967603743426, | |
| "learning_rate": 9.998308380574628e-06, | |
| "loss": 0.0569, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.042311191992720654, | |
| "grad_norm": 1.1459385364035048, | |
| "learning_rate": 9.998271000003166e-06, | |
| "loss": 0.1184, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.042766151046405826, | |
| "grad_norm": 0.8224906129097734, | |
| "learning_rate": 9.998233210995569e-06, | |
| "loss": 0.0682, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.04322111010009099, | |
| "grad_norm": 1.5182946932236698, | |
| "learning_rate": 9.998195013554926e-06, | |
| "loss": 0.0875, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.04367606915377616, | |
| "grad_norm": 0.9355855711018981, | |
| "learning_rate": 9.998156407684359e-06, | |
| "loss": 0.0939, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.04413102820746133, | |
| "grad_norm": 0.7329840867165283, | |
| "learning_rate": 9.998117393387022e-06, | |
| "loss": 0.0466, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.044585987261146494, | |
| "grad_norm": 0.8701001036058451, | |
| "learning_rate": 9.9980779706661e-06, | |
| "loss": 0.0729, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.045040946314831666, | |
| "grad_norm": 1.0218896298663185, | |
| "learning_rate": 9.99803813952482e-06, | |
| "loss": 0.0828, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.04549590536851683, | |
| "grad_norm": 0.9044995357273884, | |
| "learning_rate": 9.997997899966433e-06, | |
| "loss": 0.0709, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.045950864422202004, | |
| "grad_norm": 0.9877796099816964, | |
| "learning_rate": 9.99795725199423e-06, | |
| "loss": 0.0903, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.04640582347588717, | |
| "grad_norm": 1.0061501994463906, | |
| "learning_rate": 9.99791619561153e-06, | |
| "loss": 0.0831, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.04686078252957234, | |
| "grad_norm": 0.8789173954818107, | |
| "learning_rate": 9.997874730821689e-06, | |
| "loss": 0.0714, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.047315741583257506, | |
| "grad_norm": 15.480920098194954, | |
| "learning_rate": 9.997832857628093e-06, | |
| "loss": 0.2603, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.04777070063694268, | |
| "grad_norm": 1.3806761301603454, | |
| "learning_rate": 9.99779057603417e-06, | |
| "loss": 0.1227, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.048225659690627844, | |
| "grad_norm": 0.8462176607269959, | |
| "learning_rate": 9.997747886043368e-06, | |
| "loss": 0.0605, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.04868061874431301, | |
| "grad_norm": 0.7467169847716549, | |
| "learning_rate": 9.997704787659179e-06, | |
| "loss": 0.0618, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.04913557779799818, | |
| "grad_norm": 1.5653334818977065, | |
| "learning_rate": 9.997661280885125e-06, | |
| "loss": 0.1253, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.049590536851683346, | |
| "grad_norm": 0.871706038604149, | |
| "learning_rate": 9.99761736572476e-06, | |
| "loss": 0.0716, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.05004549590536852, | |
| "grad_norm": 1.1398296008355844, | |
| "learning_rate": 9.997573042181672e-06, | |
| "loss": 0.0698, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.050500454959053684, | |
| "grad_norm": 1.0487992691419916, | |
| "learning_rate": 9.997528310259485e-06, | |
| "loss": 0.1102, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.050955414012738856, | |
| "grad_norm": 0.9112684449646818, | |
| "learning_rate": 9.997483169961852e-06, | |
| "loss": 0.1032, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.05141037306642402, | |
| "grad_norm": 0.9418790141923585, | |
| "learning_rate": 9.997437621292463e-06, | |
| "loss": 0.0771, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.051865332120109194, | |
| "grad_norm": 0.7796140692842074, | |
| "learning_rate": 9.99739166425504e-06, | |
| "loss": 0.0627, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.05232029117379436, | |
| "grad_norm": 1.5434421216734795, | |
| "learning_rate": 9.997345298853339e-06, | |
| "loss": 0.1495, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.052775250227479524, | |
| "grad_norm": 0.8898179660551836, | |
| "learning_rate": 9.997298525091148e-06, | |
| "loss": 0.0735, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.053230209281164696, | |
| "grad_norm": 0.8585916871524272, | |
| "learning_rate": 9.997251342972288e-06, | |
| "loss": 0.068, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.05368516833484986, | |
| "grad_norm": 0.812806800238708, | |
| "learning_rate": 9.997203752500616e-06, | |
| "loss": 0.0689, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.054140127388535034, | |
| "grad_norm": 0.9677722064277628, | |
| "learning_rate": 9.997155753680021e-06, | |
| "loss": 0.0795, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.0545950864422202, | |
| "grad_norm": 1.621934591654054, | |
| "learning_rate": 9.997107346514425e-06, | |
| "loss": 0.0707, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05505004549590537, | |
| "grad_norm": 0.6750452750311531, | |
| "learning_rate": 9.997058531007782e-06, | |
| "loss": 0.0588, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.055505004549590536, | |
| "grad_norm": 0.9583870506818666, | |
| "learning_rate": 9.997009307164083e-06, | |
| "loss": 0.0859, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.05595996360327571, | |
| "grad_norm": 1.247483970027119, | |
| "learning_rate": 9.99695967498735e-06, | |
| "loss": 0.0952, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.056414922656960874, | |
| "grad_norm": 0.7937903902273558, | |
| "learning_rate": 9.996909634481639e-06, | |
| "loss": 0.0614, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.05686988171064604, | |
| "grad_norm": 4.855426128828546, | |
| "learning_rate": 9.996859185651038e-06, | |
| "loss": 0.1629, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05732484076433121, | |
| "grad_norm": 1.0499970639607177, | |
| "learning_rate": 9.99680832849967e-06, | |
| "loss": 0.1031, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.05777979981801638, | |
| "grad_norm": 0.8730447821488512, | |
| "learning_rate": 9.99675706303169e-06, | |
| "loss": 0.0606, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.05823475887170155, | |
| "grad_norm": 1.2779985416162813, | |
| "learning_rate": 9.99670538925129e-06, | |
| "loss": 0.074, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.058689717925386714, | |
| "grad_norm": 0.8606157718419157, | |
| "learning_rate": 9.996653307162687e-06, | |
| "loss": 0.0703, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.059144676979071886, | |
| "grad_norm": 0.8920761218762643, | |
| "learning_rate": 9.996600816770144e-06, | |
| "loss": 0.0818, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05959963603275705, | |
| "grad_norm": 1.1603462045917847, | |
| "learning_rate": 9.996547918077944e-06, | |
| "loss": 0.1148, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.06005459508644222, | |
| "grad_norm": 0.9108713801214797, | |
| "learning_rate": 9.996494611090414e-06, | |
| "loss": 0.0884, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.06050955414012739, | |
| "grad_norm": 0.6523725468628359, | |
| "learning_rate": 9.996440895811907e-06, | |
| "loss": 0.0535, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.060964513193812554, | |
| "grad_norm": 0.8812777694752004, | |
| "learning_rate": 9.996386772246816e-06, | |
| "loss": 0.087, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.061419472247497726, | |
| "grad_norm": 1.0622191207422995, | |
| "learning_rate": 9.99633224039956e-06, | |
| "loss": 0.0982, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.06187443130118289, | |
| "grad_norm": 3.7961077321923025, | |
| "learning_rate": 9.996277300274596e-06, | |
| "loss": 0.1526, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.062329390354868064, | |
| "grad_norm": 0.9444433559435487, | |
| "learning_rate": 9.996221951876415e-06, | |
| "loss": 0.0996, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.06278434940855324, | |
| "grad_norm": 1.444871481552235, | |
| "learning_rate": 9.996166195209539e-06, | |
| "loss": 0.1075, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.0632393084622384, | |
| "grad_norm": 0.7446446480732116, | |
| "learning_rate": 9.996110030278522e-06, | |
| "loss": 0.0561, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.06369426751592357, | |
| "grad_norm": 0.8913010543094952, | |
| "learning_rate": 9.996053457087958e-06, | |
| "loss": 0.0715, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06414922656960874, | |
| "grad_norm": 0.7815821404043856, | |
| "learning_rate": 9.995996475642466e-06, | |
| "loss": 0.0796, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.0646041856232939, | |
| "grad_norm": 0.74337588448595, | |
| "learning_rate": 9.995939085946704e-06, | |
| "loss": 0.0661, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.06505914467697907, | |
| "grad_norm": 0.9974255688753435, | |
| "learning_rate": 9.995881288005363e-06, | |
| "loss": 0.0869, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.06551410373066424, | |
| "grad_norm": 1.2260290141946268, | |
| "learning_rate": 9.995823081823162e-06, | |
| "loss": 0.0766, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.06596906278434941, | |
| "grad_norm": 0.9751795993584637, | |
| "learning_rate": 9.99576446740486e-06, | |
| "loss": 0.091, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.06642402183803457, | |
| "grad_norm": 1.6175476325168967, | |
| "learning_rate": 9.995705444755249e-06, | |
| "loss": 0.1208, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.06687898089171974, | |
| "grad_norm": 0.7580083688127299, | |
| "learning_rate": 9.995646013879147e-06, | |
| "loss": 0.0622, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.06733393994540492, | |
| "grad_norm": 1.0194887039793072, | |
| "learning_rate": 9.995586174781413e-06, | |
| "loss": 0.0753, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.06778889899909009, | |
| "grad_norm": 0.9065646408503975, | |
| "learning_rate": 9.995525927466936e-06, | |
| "loss": 0.0848, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.06824385805277525, | |
| "grad_norm": 0.8871078738477127, | |
| "learning_rate": 9.995465271940641e-06, | |
| "loss": 0.0607, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06869881710646042, | |
| "grad_norm": 1.1486707652049646, | |
| "learning_rate": 9.995404208207485e-06, | |
| "loss": 0.0809, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.06915377616014559, | |
| "grad_norm": 1.1473150526096232, | |
| "learning_rate": 9.995342736272453e-06, | |
| "loss": 0.1035, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.06960873521383075, | |
| "grad_norm": 1.3025683052462544, | |
| "learning_rate": 9.995280856140572e-06, | |
| "loss": 0.1197, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.07006369426751592, | |
| "grad_norm": 0.8069596755970996, | |
| "learning_rate": 9.9952185678169e-06, | |
| "loss": 0.0526, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.0705186533212011, | |
| "grad_norm": 0.8153700064848134, | |
| "learning_rate": 9.995155871306524e-06, | |
| "loss": 0.0613, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.07097361237488627, | |
| "grad_norm": 0.7319023745966868, | |
| "learning_rate": 9.995092766614567e-06, | |
| "loss": 0.0512, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.07142857142857142, | |
| "grad_norm": 1.0146656175738817, | |
| "learning_rate": 9.995029253746186e-06, | |
| "loss": 0.0846, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.0718835304822566, | |
| "grad_norm": 0.8015254985373994, | |
| "learning_rate": 9.994965332706574e-06, | |
| "loss": 0.0619, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.07233848953594177, | |
| "grad_norm": 1.0630207312416284, | |
| "learning_rate": 9.994901003500952e-06, | |
| "loss": 0.0796, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.07279344858962693, | |
| "grad_norm": 0.9431304991088505, | |
| "learning_rate": 9.994836266134575e-06, | |
| "loss": 0.0743, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0732484076433121, | |
| "grad_norm": 1.023738915097686, | |
| "learning_rate": 9.994771120612737e-06, | |
| "loss": 0.0888, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.07370336669699727, | |
| "grad_norm": 0.9272637744585672, | |
| "learning_rate": 9.994705566940757e-06, | |
| "loss": 0.084, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.07415832575068244, | |
| "grad_norm": 1.122378326253592, | |
| "learning_rate": 9.994639605123994e-06, | |
| "loss": 0.0961, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.0746132848043676, | |
| "grad_norm": 0.753531768411978, | |
| "learning_rate": 9.994573235167839e-06, | |
| "loss": 0.0736, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.07506824385805277, | |
| "grad_norm": 0.9314766958597749, | |
| "learning_rate": 9.994506457077715e-06, | |
| "loss": 0.0838, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.07552320291173795, | |
| "grad_norm": 0.996008388557059, | |
| "learning_rate": 9.994439270859077e-06, | |
| "loss": 0.1076, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.07597816196542312, | |
| "grad_norm": 0.9199332464612126, | |
| "learning_rate": 9.994371676517418e-06, | |
| "loss": 0.0724, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.07643312101910828, | |
| "grad_norm": 0.8652292283168678, | |
| "learning_rate": 9.994303674058259e-06, | |
| "loss": 0.0628, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.07688808007279345, | |
| "grad_norm": 0.8176262426438138, | |
| "learning_rate": 9.994235263487158e-06, | |
| "loss": 0.0743, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.07734303912647862, | |
| "grad_norm": 0.8147855247941459, | |
| "learning_rate": 9.994166444809705e-06, | |
| "loss": 0.0559, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07779799818016378, | |
| "grad_norm": 0.7853019575635352, | |
| "learning_rate": 9.994097218031524e-06, | |
| "loss": 0.0681, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.07825295723384895, | |
| "grad_norm": 0.8445610480134321, | |
| "learning_rate": 9.994027583158272e-06, | |
| "loss": 0.0785, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.07870791628753412, | |
| "grad_norm": 0.8555498692388026, | |
| "learning_rate": 9.993957540195638e-06, | |
| "loss": 0.077, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.0791628753412193, | |
| "grad_norm": 0.8281270493499452, | |
| "learning_rate": 9.993887089149346e-06, | |
| "loss": 0.0848, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.07961783439490445, | |
| "grad_norm": 0.7180425978661062, | |
| "learning_rate": 9.993816230025152e-06, | |
| "loss": 0.0588, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.08007279344858963, | |
| "grad_norm": 0.9287545326980071, | |
| "learning_rate": 9.99374496282885e-06, | |
| "loss": 0.0874, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.0805277525022748, | |
| "grad_norm": 1.5950603980195528, | |
| "learning_rate": 9.993673287566261e-06, | |
| "loss": 0.1301, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.08098271155595996, | |
| "grad_norm": 0.505966633973175, | |
| "learning_rate": 9.99360120424324e-06, | |
| "loss": 0.0459, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.08143767060964513, | |
| "grad_norm": 0.6170796905443107, | |
| "learning_rate": 9.993528712865681e-06, | |
| "loss": 0.0666, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.0818926296633303, | |
| "grad_norm": 0.8965600572228928, | |
| "learning_rate": 9.993455813439507e-06, | |
| "loss": 0.0648, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08234758871701547, | |
| "grad_norm": 0.7555745664692847, | |
| "learning_rate": 9.993382505970673e-06, | |
| "loss": 0.0479, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.08280254777070063, | |
| "grad_norm": 0.7885826993774436, | |
| "learning_rate": 9.99330879046517e-06, | |
| "loss": 0.0605, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.0832575068243858, | |
| "grad_norm": 0.6970911126559147, | |
| "learning_rate": 9.993234666929024e-06, | |
| "loss": 0.0545, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.08371246587807098, | |
| "grad_norm": 0.8281240642020996, | |
| "learning_rate": 9.99316013536829e-06, | |
| "loss": 0.0651, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.08416742493175614, | |
| "grad_norm": 0.8497823551734951, | |
| "learning_rate": 9.993085195789057e-06, | |
| "loss": 0.098, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.08462238398544131, | |
| "grad_norm": 0.8425278224044996, | |
| "learning_rate": 9.993009848197452e-06, | |
| "loss": 0.0861, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.08507734303912648, | |
| "grad_norm": 0.729342450692031, | |
| "learning_rate": 9.992934092599629e-06, | |
| "loss": 0.0651, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.08553230209281165, | |
| "grad_norm": 0.8810253378927329, | |
| "learning_rate": 9.99285792900178e-06, | |
| "loss": 0.0995, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.08598726114649681, | |
| "grad_norm": 1.0402457083445067, | |
| "learning_rate": 9.992781357410131e-06, | |
| "loss": 0.1061, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.08644222020018198, | |
| "grad_norm": 0.7397036090930822, | |
| "learning_rate": 9.992704377830934e-06, | |
| "loss": 0.0571, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08689717925386715, | |
| "grad_norm": 1.4783630598693296, | |
| "learning_rate": 9.992626990270484e-06, | |
| "loss": 0.1154, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.08735213830755233, | |
| "grad_norm": 1.1100322283473036, | |
| "learning_rate": 9.992549194735101e-06, | |
| "loss": 0.1179, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.08780709736123748, | |
| "grad_norm": 0.5797984556503705, | |
| "learning_rate": 9.992470991231144e-06, | |
| "loss": 0.0466, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.08826205641492266, | |
| "grad_norm": 1.059908713900853, | |
| "learning_rate": 9.992392379765005e-06, | |
| "loss": 0.0994, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.08871701546860783, | |
| "grad_norm": 1.1187885391430794, | |
| "learning_rate": 9.992313360343104e-06, | |
| "loss": 0.0986, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.08917197452229299, | |
| "grad_norm": 0.7509441330173129, | |
| "learning_rate": 9.992233932971901e-06, | |
| "loss": 0.0634, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.08962693357597816, | |
| "grad_norm": 0.9426276516690344, | |
| "learning_rate": 9.992154097657888e-06, | |
| "loss": 0.0857, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.09008189262966333, | |
| "grad_norm": 0.8754039034503873, | |
| "learning_rate": 9.992073854407585e-06, | |
| "loss": 0.0881, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.0905368516833485, | |
| "grad_norm": 2.8697219156120712, | |
| "learning_rate": 9.99199320322755e-06, | |
| "loss": 0.0851, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.09099181073703366, | |
| "grad_norm": 0.7429242681646778, | |
| "learning_rate": 9.991912144124375e-06, | |
| "loss": 0.0729, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09144676979071883, | |
| "grad_norm": 1.0552979449251756, | |
| "learning_rate": 9.991830677104682e-06, | |
| "loss": 0.1066, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.09190172884440401, | |
| "grad_norm": 0.8812651371324355, | |
| "learning_rate": 9.99174880217513e-06, | |
| "loss": 0.0732, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.09235668789808917, | |
| "grad_norm": 1.0755107845413352, | |
| "learning_rate": 9.991666519342407e-06, | |
| "loss": 0.0977, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.09281164695177434, | |
| "grad_norm": 0.8925063431256136, | |
| "learning_rate": 9.99158382861324e-06, | |
| "loss": 0.0904, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.09326660600545951, | |
| "grad_norm": 0.8190206986922173, | |
| "learning_rate": 9.991500729994384e-06, | |
| "loss": 0.0729, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.09372156505914468, | |
| "grad_norm": 0.6635798147425112, | |
| "learning_rate": 9.991417223492629e-06, | |
| "loss": 0.0631, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.09417652411282984, | |
| "grad_norm": 1.0314655306023923, | |
| "learning_rate": 9.991333309114798e-06, | |
| "loss": 0.0852, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.09463148316651501, | |
| "grad_norm": 0.8533496857694978, | |
| "learning_rate": 9.991248986867753e-06, | |
| "loss": 0.0868, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.09508644222020018, | |
| "grad_norm": 1.039085255997433, | |
| "learning_rate": 9.991164256758378e-06, | |
| "loss": 0.095, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.09554140127388536, | |
| "grad_norm": 1.1484522866350177, | |
| "learning_rate": 9.9910791187936e-06, | |
| "loss": 0.1333, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.09599636032757052, | |
| "grad_norm": 0.8277820800102422, | |
| "learning_rate": 9.99099357298038e-06, | |
| "loss": 0.0664, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.09645131938125569, | |
| "grad_norm": 0.821796111319934, | |
| "learning_rate": 9.9909076193257e-06, | |
| "loss": 0.083, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.09690627843494086, | |
| "grad_norm": 0.9448800546720313, | |
| "learning_rate": 9.990821257836589e-06, | |
| "loss": 0.0873, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.09736123748862602, | |
| "grad_norm": 0.9002810379340489, | |
| "learning_rate": 9.990734488520103e-06, | |
| "loss": 0.099, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.09781619654231119, | |
| "grad_norm": 0.6145149717344348, | |
| "learning_rate": 9.990647311383334e-06, | |
| "loss": 0.0425, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.09827115559599636, | |
| "grad_norm": 1.1377497370761045, | |
| "learning_rate": 9.990559726433404e-06, | |
| "loss": 0.0903, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.09872611464968153, | |
| "grad_norm": 0.8401357673155365, | |
| "learning_rate": 9.99047173367747e-06, | |
| "loss": 0.0812, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.09918107370336669, | |
| "grad_norm": 0.6977882365614015, | |
| "learning_rate": 9.990383333122722e-06, | |
| "loss": 0.0613, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.09963603275705187, | |
| "grad_norm": 0.6751056796776193, | |
| "learning_rate": 9.990294524776384e-06, | |
| "loss": 0.0636, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.10009099181073704, | |
| "grad_norm": 0.7973250315161167, | |
| "learning_rate": 9.990205308645716e-06, | |
| "loss": 0.0655, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1005459508644222, | |
| "grad_norm": 0.6494979859380491, | |
| "learning_rate": 9.990115684738005e-06, | |
| "loss": 0.0461, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.10100090991810737, | |
| "grad_norm": 0.7863907355652456, | |
| "learning_rate": 9.990025653060574e-06, | |
| "loss": 0.0881, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.10145586897179254, | |
| "grad_norm": 1.2756737972223395, | |
| "learning_rate": 9.98993521362078e-06, | |
| "loss": 0.1102, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.10191082802547771, | |
| "grad_norm": 1.1992554133605928, | |
| "learning_rate": 9.989844366426018e-06, | |
| "loss": 0.1147, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.10236578707916287, | |
| "grad_norm": 0.5034605400337953, | |
| "learning_rate": 9.989753111483707e-06, | |
| "loss": 0.0462, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.10282074613284804, | |
| "grad_norm": 0.9881921480518578, | |
| "learning_rate": 9.989661448801305e-06, | |
| "loss": 0.0848, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.10327570518653321, | |
| "grad_norm": 0.7581777568438945, | |
| "learning_rate": 9.989569378386303e-06, | |
| "loss": 0.079, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.10373066424021839, | |
| "grad_norm": 0.6464731162067388, | |
| "learning_rate": 9.989476900246223e-06, | |
| "loss": 0.0617, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.10418562329390355, | |
| "grad_norm": 0.8780639185859085, | |
| "learning_rate": 9.989384014388624e-06, | |
| "loss": 0.086, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.10464058234758872, | |
| "grad_norm": 0.6623808171307163, | |
| "learning_rate": 9.989290720821095e-06, | |
| "loss": 0.0694, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.10509554140127389, | |
| "grad_norm": 0.721054554263859, | |
| "learning_rate": 9.98919701955126e-06, | |
| "loss": 0.0735, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.10555050045495905, | |
| "grad_norm": 0.7868134014829404, | |
| "learning_rate": 9.989102910586776e-06, | |
| "loss": 0.0546, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.10600545950864422, | |
| "grad_norm": 0.9137158371163484, | |
| "learning_rate": 9.989008393935331e-06, | |
| "loss": 0.0771, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.10646041856232939, | |
| "grad_norm": 0.8326009579593463, | |
| "learning_rate": 9.98891346960465e-06, | |
| "loss": 0.0667, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.10691537761601456, | |
| "grad_norm": 0.6462724580348628, | |
| "learning_rate": 9.988818137602494e-06, | |
| "loss": 0.0717, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.10737033666969972, | |
| "grad_norm": 0.7513725247558808, | |
| "learning_rate": 9.988722397936646e-06, | |
| "loss": 0.0733, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.1078252957233849, | |
| "grad_norm": 1.094509848236789, | |
| "learning_rate": 9.988626250614932e-06, | |
| "loss": 0.1009, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.10828025477707007, | |
| "grad_norm": 0.8200579138639758, | |
| "learning_rate": 9.98852969564521e-06, | |
| "loss": 0.0844, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.10873521383075523, | |
| "grad_norm": 0.7417763562196316, | |
| "learning_rate": 9.988432733035369e-06, | |
| "loss": 0.0611, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.1091901728844404, | |
| "grad_norm": 0.8476475869820355, | |
| "learning_rate": 9.988335362793333e-06, | |
| "loss": 0.0863, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10964513193812557, | |
| "grad_norm": 0.9998642783878469, | |
| "learning_rate": 9.988237584927058e-06, | |
| "loss": 0.0909, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.11010009099181074, | |
| "grad_norm": 1.1689324698997519, | |
| "learning_rate": 9.988139399444534e-06, | |
| "loss": 0.124, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1105550500454959, | |
| "grad_norm": 0.790901332269412, | |
| "learning_rate": 9.988040806353786e-06, | |
| "loss": 0.0855, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.11101000909918107, | |
| "grad_norm": 0.8931785977847209, | |
| "learning_rate": 9.987941805662869e-06, | |
| "loss": 0.1023, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.11146496815286625, | |
| "grad_norm": 0.7352781929773609, | |
| "learning_rate": 9.98784239737987e-06, | |
| "loss": 0.0563, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.11191992720655142, | |
| "grad_norm": 0.7169092611535308, | |
| "learning_rate": 9.987742581512919e-06, | |
| "loss": 0.0683, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.11237488626023658, | |
| "grad_norm": 0.6767560569792272, | |
| "learning_rate": 9.987642358070167e-06, | |
| "loss": 0.0669, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.11282984531392175, | |
| "grad_norm": 0.8442319805699996, | |
| "learning_rate": 9.987541727059805e-06, | |
| "loss": 0.0768, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.11328480436760692, | |
| "grad_norm": 0.7700876798522618, | |
| "learning_rate": 9.987440688490058e-06, | |
| "loss": 0.0643, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.11373976342129208, | |
| "grad_norm": 0.7286087978317647, | |
| "learning_rate": 9.98733924236918e-06, | |
| "loss": 0.0698, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11419472247497725, | |
| "grad_norm": 0.7917355018437868, | |
| "learning_rate": 9.98723738870546e-06, | |
| "loss": 0.0791, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.11464968152866242, | |
| "grad_norm": 1.0469499693242315, | |
| "learning_rate": 9.987135127507226e-06, | |
| "loss": 0.0761, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1151046405823476, | |
| "grad_norm": 0.8361714930383379, | |
| "learning_rate": 9.987032458782828e-06, | |
| "loss": 0.0789, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.11555959963603275, | |
| "grad_norm": 0.5902853873046482, | |
| "learning_rate": 9.986929382540662e-06, | |
| "loss": 0.0479, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.11601455868971793, | |
| "grad_norm": 0.7349436304465384, | |
| "learning_rate": 9.986825898789145e-06, | |
| "loss": 0.0668, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.1164695177434031, | |
| "grad_norm": 0.7657107039148755, | |
| "learning_rate": 9.986722007536737e-06, | |
| "loss": 0.0617, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.11692447679708826, | |
| "grad_norm": 0.6450631027744769, | |
| "learning_rate": 9.986617708791926e-06, | |
| "loss": 0.0679, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.11737943585077343, | |
| "grad_norm": 0.6292930010016882, | |
| "learning_rate": 9.986513002563236e-06, | |
| "loss": 0.0482, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.1178343949044586, | |
| "grad_norm": 0.8758541343517451, | |
| "learning_rate": 9.986407888859221e-06, | |
| "loss": 0.0994, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.11828935395814377, | |
| "grad_norm": 0.6537445862223847, | |
| "learning_rate": 9.986302367688473e-06, | |
| "loss": 0.07, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.11874431301182893, | |
| "grad_norm": 0.8029660816844667, | |
| "learning_rate": 9.986196439059613e-06, | |
| "loss": 0.0623, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.1191992720655141, | |
| "grad_norm": 0.7339528606524214, | |
| "learning_rate": 9.986090102981297e-06, | |
| "loss": 0.0791, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.11965423111919928, | |
| "grad_norm": 0.7934112522002073, | |
| "learning_rate": 9.985983359462215e-06, | |
| "loss": 0.0672, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.12010919017288443, | |
| "grad_norm": 1.0186962263060808, | |
| "learning_rate": 9.98587620851109e-06, | |
| "loss": 0.1213, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1205641492265696, | |
| "grad_norm": 0.6769843647605545, | |
| "learning_rate": 9.985768650136679e-06, | |
| "loss": 0.0685, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.12101910828025478, | |
| "grad_norm": 0.7543020935976431, | |
| "learning_rate": 9.985660684347765e-06, | |
| "loss": 0.0861, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.12147406733393995, | |
| "grad_norm": 0.9552124731299731, | |
| "learning_rate": 9.985552311153178e-06, | |
| "loss": 0.0922, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.12192902638762511, | |
| "grad_norm": 0.7436699167226903, | |
| "learning_rate": 9.985443530561769e-06, | |
| "loss": 0.0885, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.12238398544131028, | |
| "grad_norm": 1.329058937551934, | |
| "learning_rate": 9.98533434258243e-06, | |
| "loss": 0.1115, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.12283894449499545, | |
| "grad_norm": 0.6835909813818813, | |
| "learning_rate": 9.985224747224083e-06, | |
| "loss": 0.0586, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12329390354868063, | |
| "grad_norm": 1.0733107060854794, | |
| "learning_rate": 9.98511474449568e-06, | |
| "loss": 0.0811, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.12374886260236578, | |
| "grad_norm": 0.5916007278667166, | |
| "learning_rate": 9.985004334406215e-06, | |
| "loss": 0.0696, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.12420382165605096, | |
| "grad_norm": 0.9149357508392912, | |
| "learning_rate": 9.984893516964707e-06, | |
| "loss": 0.0704, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.12465878070973613, | |
| "grad_norm": 1.1634742377762608, | |
| "learning_rate": 9.984782292180212e-06, | |
| "loss": 0.1178, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.1251137397634213, | |
| "grad_norm": 0.603957454908005, | |
| "learning_rate": 9.98467066006182e-06, | |
| "loss": 0.0585, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.12556869881710647, | |
| "grad_norm": 0.7735087790025026, | |
| "learning_rate": 9.984558620618651e-06, | |
| "loss": 0.0953, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.12602365787079162, | |
| "grad_norm": 1.2570182633873541, | |
| "learning_rate": 9.984446173859863e-06, | |
| "loss": 0.1353, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.1264786169244768, | |
| "grad_norm": 0.7275895818672663, | |
| "learning_rate": 9.984333319794642e-06, | |
| "loss": 0.0774, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.12693357597816196, | |
| "grad_norm": 0.6395006056363333, | |
| "learning_rate": 9.984220058432212e-06, | |
| "loss": 0.0591, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.12738853503184713, | |
| "grad_norm": 0.6563921850032347, | |
| "learning_rate": 9.984106389781828e-06, | |
| "loss": 0.0573, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1278434940855323, | |
| "grad_norm": 0.9399157526953884, | |
| "learning_rate": 9.983992313852776e-06, | |
| "loss": 0.0793, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.12829845313921748, | |
| "grad_norm": 0.93528061821534, | |
| "learning_rate": 9.983877830654381e-06, | |
| "loss": 0.0807, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.12875341219290265, | |
| "grad_norm": 0.7192448233352142, | |
| "learning_rate": 9.983762940195996e-06, | |
| "loss": 0.0773, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.1292083712465878, | |
| "grad_norm": 0.7097381072031733, | |
| "learning_rate": 9.98364764248701e-06, | |
| "loss": 0.0698, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.12966333030027297, | |
| "grad_norm": 1.1635566012920768, | |
| "learning_rate": 9.983531937536844e-06, | |
| "loss": 0.0893, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.13011828935395814, | |
| "grad_norm": 0.8456555685011555, | |
| "learning_rate": 9.983415825354954e-06, | |
| "loss": 0.0628, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1305732484076433, | |
| "grad_norm": 0.7151838393189083, | |
| "learning_rate": 9.983299305950828e-06, | |
| "loss": 0.0557, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.13102820746132848, | |
| "grad_norm": 0.7095193783870621, | |
| "learning_rate": 9.983182379333989e-06, | |
| "loss": 0.0604, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.13148316651501366, | |
| "grad_norm": 0.8581434444337498, | |
| "learning_rate": 9.983065045513986e-06, | |
| "loss": 0.0781, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.13193812556869883, | |
| "grad_norm": 0.5600994934804626, | |
| "learning_rate": 9.982947304500414e-06, | |
| "loss": 0.0498, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.13239308462238397, | |
| "grad_norm": 0.7355720212694087, | |
| "learning_rate": 9.98282915630289e-06, | |
| "loss": 0.0692, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.13284804367606914, | |
| "grad_norm": 1.6846985851500909, | |
| "learning_rate": 9.98271060093107e-06, | |
| "loss": 0.1687, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.13330300272975432, | |
| "grad_norm": 0.7959406174268434, | |
| "learning_rate": 9.98259163839464e-06, | |
| "loss": 0.0718, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.1337579617834395, | |
| "grad_norm": 0.6005858848115938, | |
| "learning_rate": 9.982472268703323e-06, | |
| "loss": 0.0465, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.13421292083712466, | |
| "grad_norm": 0.7865103977061746, | |
| "learning_rate": 9.982352491866874e-06, | |
| "loss": 0.071, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.13466787989080983, | |
| "grad_norm": 0.7167219429964851, | |
| "learning_rate": 9.982232307895077e-06, | |
| "loss": 0.0658, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.135122838944495, | |
| "grad_norm": 1.206398567596641, | |
| "learning_rate": 9.982111716797758e-06, | |
| "loss": 0.101, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.13557779799818018, | |
| "grad_norm": 1.0085912508470862, | |
| "learning_rate": 9.981990718584768e-06, | |
| "loss": 0.0959, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.13603275705186532, | |
| "grad_norm": 0.8594135430057543, | |
| "learning_rate": 9.981869313265995e-06, | |
| "loss": 0.0912, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.1364877161055505, | |
| "grad_norm": 0.9903339586980618, | |
| "learning_rate": 9.981747500851357e-06, | |
| "loss": 0.0692, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.13694267515923567, | |
| "grad_norm": 0.7623380548666351, | |
| "learning_rate": 9.981625281350812e-06, | |
| "loss": 0.0699, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.13739763421292084, | |
| "grad_norm": 0.6267143484055344, | |
| "learning_rate": 9.981502654774349e-06, | |
| "loss": 0.0499, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.137852593266606, | |
| "grad_norm": 0.8234150836820757, | |
| "learning_rate": 9.98137962113198e-06, | |
| "loss": 0.0788, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.13830755232029118, | |
| "grad_norm": 0.8158733102806115, | |
| "learning_rate": 9.98125618043377e-06, | |
| "loss": 0.089, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.13876251137397635, | |
| "grad_norm": 0.6372656549463032, | |
| "learning_rate": 9.981132332689796e-06, | |
| "loss": 0.0517, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1392174704276615, | |
| "grad_norm": 0.7713863813548327, | |
| "learning_rate": 9.981008077910184e-06, | |
| "loss": 0.0769, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.13967242948134667, | |
| "grad_norm": 0.8883775702857831, | |
| "learning_rate": 9.980883416105084e-06, | |
| "loss": 0.0828, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.14012738853503184, | |
| "grad_norm": 0.6490936355626988, | |
| "learning_rate": 9.980758347284687e-06, | |
| "loss": 0.0618, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.14058234758871702, | |
| "grad_norm": 0.8359554084586713, | |
| "learning_rate": 9.980632871459209e-06, | |
| "loss": 0.0714, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.1410373066424022, | |
| "grad_norm": 0.7373523328454649, | |
| "learning_rate": 9.980506988638906e-06, | |
| "loss": 0.0836, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14149226569608736, | |
| "grad_norm": 0.6644370731485183, | |
| "learning_rate": 9.980380698834064e-06, | |
| "loss": 0.0777, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.14194722474977253, | |
| "grad_norm": 0.870883965477211, | |
| "learning_rate": 9.980254002055003e-06, | |
| "loss": 0.0847, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.14240218380345768, | |
| "grad_norm": 0.6021065409531002, | |
| "learning_rate": 9.980126898312074e-06, | |
| "loss": 0.0583, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.8705461588189498, | |
| "learning_rate": 9.979999387615665e-06, | |
| "loss": 0.0895, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.14331210191082802, | |
| "grad_norm": 0.9639410731114018, | |
| "learning_rate": 9.979871469976197e-06, | |
| "loss": 0.0901, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.1437670609645132, | |
| "grad_norm": 0.7554126383153169, | |
| "learning_rate": 9.97974314540412e-06, | |
| "loss": 0.0699, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.14422202001819837, | |
| "grad_norm": 1.1039648440512544, | |
| "learning_rate": 9.979614413909922e-06, | |
| "loss": 0.1013, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.14467697907188354, | |
| "grad_norm": 0.5258831871743486, | |
| "learning_rate": 9.979485275504121e-06, | |
| "loss": 0.0544, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.1451319381255687, | |
| "grad_norm": 1.3025897394440575, | |
| "learning_rate": 9.979355730197271e-06, | |
| "loss": 0.1067, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.14558689717925385, | |
| "grad_norm": 0.5206132423310033, | |
| "learning_rate": 9.979225777999956e-06, | |
| "loss": 0.0497, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.14604185623293903, | |
| "grad_norm": 0.7202189397663867, | |
| "learning_rate": 9.9790954189228e-06, | |
| "loss": 0.0807, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.1464968152866242, | |
| "grad_norm": 0.5738667169449175, | |
| "learning_rate": 9.97896465297645e-06, | |
| "loss": 0.0614, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.14695177434030937, | |
| "grad_norm": 0.7972440737628133, | |
| "learning_rate": 9.978833480171592e-06, | |
| "loss": 0.0906, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.14740673339399454, | |
| "grad_norm": 0.7697423454053598, | |
| "learning_rate": 9.978701900518947e-06, | |
| "loss": 0.0632, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.14786169244767972, | |
| "grad_norm": 0.8259885564233931, | |
| "learning_rate": 9.978569914029267e-06, | |
| "loss": 0.0944, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.1483166515013649, | |
| "grad_norm": 0.8450006655868962, | |
| "learning_rate": 9.978437520713335e-06, | |
| "loss": 0.0862, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.14877161055505003, | |
| "grad_norm": 0.7746078278616594, | |
| "learning_rate": 9.978304720581973e-06, | |
| "loss": 0.088, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.1492265696087352, | |
| "grad_norm": 0.9977734940815816, | |
| "learning_rate": 9.97817151364603e-06, | |
| "loss": 0.1036, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.14968152866242038, | |
| "grad_norm": 0.7800752301510507, | |
| "learning_rate": 9.978037899916393e-06, | |
| "loss": 0.0778, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.15013648771610555, | |
| "grad_norm": 0.7521153273438224, | |
| "learning_rate": 9.97790387940398e-06, | |
| "loss": 0.0532, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15059144676979072, | |
| "grad_norm": 0.8046420256419254, | |
| "learning_rate": 9.977769452119741e-06, | |
| "loss": 0.0708, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.1510464058234759, | |
| "grad_norm": 0.9071770528791517, | |
| "learning_rate": 9.97763461807466e-06, | |
| "loss": 0.1006, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.15150136487716107, | |
| "grad_norm": 0.8824570234268595, | |
| "learning_rate": 9.97749937727976e-06, | |
| "loss": 0.0855, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.15195632393084624, | |
| "grad_norm": 0.8286075823730068, | |
| "learning_rate": 9.977363729746088e-06, | |
| "loss": 0.077, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.15241128298453138, | |
| "grad_norm": 0.6791233851472963, | |
| "learning_rate": 9.977227675484729e-06, | |
| "loss": 0.0698, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.15286624203821655, | |
| "grad_norm": 0.9813875260679181, | |
| "learning_rate": 9.977091214506803e-06, | |
| "loss": 0.0838, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.15332120109190173, | |
| "grad_norm": 0.9986284190120469, | |
| "learning_rate": 9.976954346823456e-06, | |
| "loss": 0.0789, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.1537761601455869, | |
| "grad_norm": 0.6456071732838817, | |
| "learning_rate": 9.976817072445878e-06, | |
| "loss": 0.0566, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.15423111919927207, | |
| "grad_norm": 0.7707362352402762, | |
| "learning_rate": 9.976679391385283e-06, | |
| "loss": 0.0677, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.15468607825295724, | |
| "grad_norm": 0.5804713825378958, | |
| "learning_rate": 9.976541303652923e-06, | |
| "loss": 0.0547, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.15514103730664242, | |
| "grad_norm": 0.7705377953828665, | |
| "learning_rate": 9.976402809260083e-06, | |
| "loss": 0.0673, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.15559599636032756, | |
| "grad_norm": 0.651002355082985, | |
| "learning_rate": 9.976263908218076e-06, | |
| "loss": 0.066, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.15605095541401273, | |
| "grad_norm": 1.0075230687249708, | |
| "learning_rate": 9.976124600538257e-06, | |
| "loss": 0.1151, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.1565059144676979, | |
| "grad_norm": 0.7110146200064966, | |
| "learning_rate": 9.975984886232006e-06, | |
| "loss": 0.0693, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.15696087352138308, | |
| "grad_norm": 0.782615076662302, | |
| "learning_rate": 9.975844765310743e-06, | |
| "loss": 0.071, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.15741583257506825, | |
| "grad_norm": 1.091513822496144, | |
| "learning_rate": 9.975704237785915e-06, | |
| "loss": 0.1277, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.15787079162875342, | |
| "grad_norm": 0.8244942271322709, | |
| "learning_rate": 9.975563303669006e-06, | |
| "loss": 0.092, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.1583257506824386, | |
| "grad_norm": 1.0997264747524325, | |
| "learning_rate": 9.975421962971536e-06, | |
| "loss": 0.102, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.15878070973612374, | |
| "grad_norm": 1.0471722358260585, | |
| "learning_rate": 9.97528021570505e-06, | |
| "loss": 0.1112, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.1592356687898089, | |
| "grad_norm": 0.6366013160292697, | |
| "learning_rate": 9.975138061881135e-06, | |
| "loss": 0.0629, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.15969062784349408, | |
| "grad_norm": 0.7145502784859615, | |
| "learning_rate": 9.974995501511404e-06, | |
| "loss": 0.0567, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.16014558689717925, | |
| "grad_norm": 1.0825694007542435, | |
| "learning_rate": 9.974852534607506e-06, | |
| "loss": 0.0897, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.16060054595086443, | |
| "grad_norm": 0.8874195306329471, | |
| "learning_rate": 9.974709161181126e-06, | |
| "loss": 0.0879, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.1610555050045496, | |
| "grad_norm": 0.8193025449594961, | |
| "learning_rate": 9.974565381243982e-06, | |
| "loss": 0.0969, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.16151046405823477, | |
| "grad_norm": 0.76528422131405, | |
| "learning_rate": 9.974421194807815e-06, | |
| "loss": 0.0786, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.16196542311191992, | |
| "grad_norm": 0.8836543328533641, | |
| "learning_rate": 9.974276601884416e-06, | |
| "loss": 0.0744, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.1624203821656051, | |
| "grad_norm": 0.7482952108426273, | |
| "learning_rate": 9.974131602485596e-06, | |
| "loss": 0.0772, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.16287534121929026, | |
| "grad_norm": 0.9122723647083647, | |
| "learning_rate": 9.973986196623203e-06, | |
| "loss": 0.0851, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.16333030027297543, | |
| "grad_norm": 0.8373653902978805, | |
| "learning_rate": 9.973840384309121e-06, | |
| "loss": 0.0865, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.1637852593266606, | |
| "grad_norm": 0.6360069343077157, | |
| "learning_rate": 9.973694165555264e-06, | |
| "loss": 0.0618, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16424021838034578, | |
| "grad_norm": 0.7967304456611868, | |
| "learning_rate": 9.973547540373582e-06, | |
| "loss": 0.0865, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.16469517743403095, | |
| "grad_norm": 1.1699452577832765, | |
| "learning_rate": 9.973400508776054e-06, | |
| "loss": 0.1144, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.1651501364877161, | |
| "grad_norm": 0.6282867599706373, | |
| "learning_rate": 9.973253070774698e-06, | |
| "loss": 0.0633, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.16560509554140126, | |
| "grad_norm": 0.79942272506218, | |
| "learning_rate": 9.973105226381559e-06, | |
| "loss": 0.069, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.16606005459508644, | |
| "grad_norm": 0.9348674828410355, | |
| "learning_rate": 9.972956975608719e-06, | |
| "loss": 0.1019, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1665150136487716, | |
| "grad_norm": 1.0942665884463076, | |
| "learning_rate": 9.972808318468292e-06, | |
| "loss": 0.0859, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.16696997270245678, | |
| "grad_norm": 0.6283579225277517, | |
| "learning_rate": 9.972659254972426e-06, | |
| "loss": 0.0589, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.16742493175614195, | |
| "grad_norm": 1.0989677054167046, | |
| "learning_rate": 9.972509785133304e-06, | |
| "loss": 0.1081, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.16787989080982713, | |
| "grad_norm": 0.7310198219540203, | |
| "learning_rate": 9.972359908963137e-06, | |
| "loss": 0.0675, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.16833484986351227, | |
| "grad_norm": 0.757671629194488, | |
| "learning_rate": 9.972209626474172e-06, | |
| "loss": 0.0734, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.16878980891719744, | |
| "grad_norm": 0.7966175159886519, | |
| "learning_rate": 9.972058937678692e-06, | |
| "loss": 0.075, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.16924476797088261, | |
| "grad_norm": 0.9805514159267839, | |
| "learning_rate": 9.97190784258901e-06, | |
| "loss": 0.1071, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.1696997270245678, | |
| "grad_norm": 0.7000612574442994, | |
| "learning_rate": 9.971756341217471e-06, | |
| "loss": 0.0526, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.17015468607825296, | |
| "grad_norm": 0.7917466702374949, | |
| "learning_rate": 9.971604433576456e-06, | |
| "loss": 0.0698, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.17060964513193813, | |
| "grad_norm": 0.8412692631182211, | |
| "learning_rate": 9.97145211967838e-06, | |
| "loss": 0.0783, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1710646041856233, | |
| "grad_norm": 0.5615038895232536, | |
| "learning_rate": 9.971299399535685e-06, | |
| "loss": 0.053, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.17151956323930848, | |
| "grad_norm": 0.6849745369298482, | |
| "learning_rate": 9.971146273160854e-06, | |
| "loss": 0.0774, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.17197452229299362, | |
| "grad_norm": 0.6466596777060115, | |
| "learning_rate": 9.9709927405664e-06, | |
| "loss": 0.0606, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.1724294813466788, | |
| "grad_norm": 0.7169884074840761, | |
| "learning_rate": 9.970838801764866e-06, | |
| "loss": 0.0839, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.17288444040036396, | |
| "grad_norm": 0.9393396355410675, | |
| "learning_rate": 9.970684456768836e-06, | |
| "loss": 0.1132, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.17333939945404914, | |
| "grad_norm": 12.197098173453568, | |
| "learning_rate": 9.970529705590918e-06, | |
| "loss": 0.4858, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.1737943585077343, | |
| "grad_norm": 0.7355841274771772, | |
| "learning_rate": 9.97037454824376e-06, | |
| "loss": 0.0714, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.17424931756141948, | |
| "grad_norm": 1.050385265783733, | |
| "learning_rate": 9.97021898474004e-06, | |
| "loss": 0.1024, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.17470427661510465, | |
| "grad_norm": 0.8612087678995594, | |
| "learning_rate": 9.970063015092469e-06, | |
| "loss": 0.085, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.1751592356687898, | |
| "grad_norm": 1.3886472100476919, | |
| "learning_rate": 9.969906639313793e-06, | |
| "loss": 0.1212, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.17561419472247497, | |
| "grad_norm": 0.8238176964814595, | |
| "learning_rate": 9.96974985741679e-06, | |
| "loss": 0.0721, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.17606915377616014, | |
| "grad_norm": 0.8718897735731601, | |
| "learning_rate": 9.969592669414272e-06, | |
| "loss": 0.0959, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.17652411282984531, | |
| "grad_norm": 6.796752422837202, | |
| "learning_rate": 9.969435075319083e-06, | |
| "loss": 0.115, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.1769790718835305, | |
| "grad_norm": 0.58176536820322, | |
| "learning_rate": 9.969277075144104e-06, | |
| "loss": 0.0459, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.17743403093721566, | |
| "grad_norm": 0.7267253435076165, | |
| "learning_rate": 9.969118668902242e-06, | |
| "loss": 0.07, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.17788898999090083, | |
| "grad_norm": 0.7682389367523258, | |
| "learning_rate": 9.968959856606442e-06, | |
| "loss": 0.0542, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.17834394904458598, | |
| "grad_norm": 0.7873348185837048, | |
| "learning_rate": 9.968800638269682e-06, | |
| "loss": 0.0598, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.17879890809827115, | |
| "grad_norm": 1.287713292390112, | |
| "learning_rate": 9.968641013904974e-06, | |
| "loss": 0.1442, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.17925386715195632, | |
| "grad_norm": 1.085650814952146, | |
| "learning_rate": 9.968480983525359e-06, | |
| "loss": 0.0926, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.1797088262056415, | |
| "grad_norm": 0.6716676596759695, | |
| "learning_rate": 9.968320547143918e-06, | |
| "loss": 0.0767, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.18016378525932666, | |
| "grad_norm": 0.8467396807693714, | |
| "learning_rate": 9.968159704773757e-06, | |
| "loss": 0.0977, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.18061874431301184, | |
| "grad_norm": 0.6438855833782786, | |
| "learning_rate": 9.967998456428021e-06, | |
| "loss": 0.0586, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.181073703366697, | |
| "grad_norm": 0.7254140122399564, | |
| "learning_rate": 9.967836802119886e-06, | |
| "loss": 0.06, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.18152866242038215, | |
| "grad_norm": 0.87517545358881, | |
| "learning_rate": 9.967674741862563e-06, | |
| "loss": 0.1016, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.18198362147406733, | |
| "grad_norm": 1.0624206936058178, | |
| "learning_rate": 9.967512275669294e-06, | |
| "loss": 0.1296, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1824385805277525, | |
| "grad_norm": 1.0284720738314184, | |
| "learning_rate": 9.967349403553353e-06, | |
| "loss": 0.0862, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.18289353958143767, | |
| "grad_norm": 0.8342932737384292, | |
| "learning_rate": 9.967186125528053e-06, | |
| "loss": 0.0873, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.18334849863512284, | |
| "grad_norm": 1.543095569701571, | |
| "learning_rate": 9.967022441606734e-06, | |
| "loss": 0.1209, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.18380345768880801, | |
| "grad_norm": 0.70731586616612, | |
| "learning_rate": 9.966858351802773e-06, | |
| "loss": 0.0726, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.1842584167424932, | |
| "grad_norm": 0.6660531988680356, | |
| "learning_rate": 9.966693856129576e-06, | |
| "loss": 0.0562, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.18471337579617833, | |
| "grad_norm": 0.8503640969928286, | |
| "learning_rate": 9.966528954600587e-06, | |
| "loss": 0.0838, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.1851683348498635, | |
| "grad_norm": 0.6021534124846688, | |
| "learning_rate": 9.96636364722928e-06, | |
| "loss": 0.0673, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.18562329390354868, | |
| "grad_norm": 0.8782816795828058, | |
| "learning_rate": 9.966197934029165e-06, | |
| "loss": 0.0845, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.18607825295723385, | |
| "grad_norm": 0.9030990654346936, | |
| "learning_rate": 9.966031815013781e-06, | |
| "loss": 0.0839, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.18653321201091902, | |
| "grad_norm": 0.8567507299712805, | |
| "learning_rate": 9.965865290196703e-06, | |
| "loss": 0.0935, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1869881710646042, | |
| "grad_norm": 0.8099856489670021, | |
| "learning_rate": 9.96569835959154e-06, | |
| "loss": 0.0747, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.18744313011828936, | |
| "grad_norm": 0.8938878675243255, | |
| "learning_rate": 9.965531023211931e-06, | |
| "loss": 0.0854, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.18789808917197454, | |
| "grad_norm": 0.735313860104022, | |
| "learning_rate": 9.965363281071551e-06, | |
| "loss": 0.0865, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.18835304822565968, | |
| "grad_norm": 0.5495229598132649, | |
| "learning_rate": 9.965195133184108e-06, | |
| "loss": 0.0403, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.18880800727934485, | |
| "grad_norm": 1.0700416713113117, | |
| "learning_rate": 9.965026579563342e-06, | |
| "loss": 0.1086, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.18926296633303002, | |
| "grad_norm": 0.7118653717355078, | |
| "learning_rate": 9.964857620223024e-06, | |
| "loss": 0.0691, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1897179253867152, | |
| "grad_norm": 0.6871481686027417, | |
| "learning_rate": 9.964688255176963e-06, | |
| "loss": 0.0667, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.19017288444040037, | |
| "grad_norm": 0.9848841869658392, | |
| "learning_rate": 9.964518484438998e-06, | |
| "loss": 0.0813, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.19062784349408554, | |
| "grad_norm": 0.6311750922074311, | |
| "learning_rate": 9.964348308023001e-06, | |
| "loss": 0.0592, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.1910828025477707, | |
| "grad_norm": 0.7813168734245782, | |
| "learning_rate": 9.964177725942881e-06, | |
| "loss": 0.0826, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.19153776160145586, | |
| "grad_norm": 0.8572110622332836, | |
| "learning_rate": 9.964006738212574e-06, | |
| "loss": 0.0853, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.19199272065514103, | |
| "grad_norm": 0.5304433423014596, | |
| "learning_rate": 9.963835344846056e-06, | |
| "loss": 0.048, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.1924476797088262, | |
| "grad_norm": 0.7598521228122416, | |
| "learning_rate": 9.963663545857328e-06, | |
| "loss": 0.0757, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.19290263876251137, | |
| "grad_norm": 1.1542546683489703, | |
| "learning_rate": 9.963491341260432e-06, | |
| "loss": 0.104, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.19335759781619655, | |
| "grad_norm": 0.7766563582253432, | |
| "learning_rate": 9.963318731069437e-06, | |
| "loss": 0.0952, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.19381255686988172, | |
| "grad_norm": 1.1319194983916299, | |
| "learning_rate": 9.96314571529845e-06, | |
| "loss": 0.1005, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.1942675159235669, | |
| "grad_norm": 0.7230559135257585, | |
| "learning_rate": 9.962972293961608e-06, | |
| "loss": 0.0647, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.19472247497725204, | |
| "grad_norm": 0.9863934566369588, | |
| "learning_rate": 9.962798467073083e-06, | |
| "loss": 0.0763, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.1951774340309372, | |
| "grad_norm": 0.8259784410005646, | |
| "learning_rate": 9.96262423464708e-06, | |
| "loss": 0.087, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.19563239308462238, | |
| "grad_norm": 0.7987139095182185, | |
| "learning_rate": 9.962449596697834e-06, | |
| "loss": 0.0671, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.19608735213830755, | |
| "grad_norm": 1.130208173229934, | |
| "learning_rate": 9.962274553239619e-06, | |
| "loss": 0.119, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.19654231119199272, | |
| "grad_norm": 0.7399696243677417, | |
| "learning_rate": 9.962099104286735e-06, | |
| "loss": 0.064, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.1969972702456779, | |
| "grad_norm": 1.156015767405528, | |
| "learning_rate": 9.961923249853523e-06, | |
| "loss": 0.1102, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.19745222929936307, | |
| "grad_norm": 0.972422739757894, | |
| "learning_rate": 9.961746989954349e-06, | |
| "loss": 0.1093, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1979071883530482, | |
| "grad_norm": 0.7766700420403171, | |
| "learning_rate": 9.96157032460362e-06, | |
| "loss": 0.0655, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.19836214740673339, | |
| "grad_norm": 0.7460679115751414, | |
| "learning_rate": 9.961393253815767e-06, | |
| "loss": 0.0751, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.19881710646041856, | |
| "grad_norm": 1.0684214450487566, | |
| "learning_rate": 9.961215777605266e-06, | |
| "loss": 0.0789, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.19927206551410373, | |
| "grad_norm": 0.7683994291392229, | |
| "learning_rate": 9.961037895986615e-06, | |
| "loss": 0.0849, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1997270245677889, | |
| "grad_norm": 0.7270368453251704, | |
| "learning_rate": 9.960859608974352e-06, | |
| "loss": 0.0779, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.20018198362147407, | |
| "grad_norm": 0.701460207303568, | |
| "learning_rate": 9.960680916583042e-06, | |
| "loss": 0.0639, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.20063694267515925, | |
| "grad_norm": 0.6784619280926262, | |
| "learning_rate": 9.960501818827292e-06, | |
| "loss": 0.077, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.2010919017288444, | |
| "grad_norm": 0.8064075868568972, | |
| "learning_rate": 9.960322315721735e-06, | |
| "loss": 0.0827, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.20154686078252956, | |
| "grad_norm": 0.9155026735417204, | |
| "learning_rate": 9.960142407281039e-06, | |
| "loss": 0.0841, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.20200181983621474, | |
| "grad_norm": 0.6167749294869733, | |
| "learning_rate": 9.959962093519904e-06, | |
| "loss": 0.054, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.2024567788898999, | |
| "grad_norm": 0.8127781985331358, | |
| "learning_rate": 9.959781374453066e-06, | |
| "loss": 0.0751, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.20291173794358508, | |
| "grad_norm": 0.98306444688532, | |
| "learning_rate": 9.959600250095294e-06, | |
| "loss": 0.075, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.20336669699727025, | |
| "grad_norm": 0.7982130269360888, | |
| "learning_rate": 9.959418720461384e-06, | |
| "loss": 0.0834, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.20382165605095542, | |
| "grad_norm": 0.7862225023823932, | |
| "learning_rate": 9.959236785566175e-06, | |
| "loss": 0.0704, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.20427661510464057, | |
| "grad_norm": 0.562107514296544, | |
| "learning_rate": 9.959054445424532e-06, | |
| "loss": 0.0644, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.20473157415832574, | |
| "grad_norm": 0.6089607791855781, | |
| "learning_rate": 9.958871700051353e-06, | |
| "loss": 0.0512, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2051865332120109, | |
| "grad_norm": 0.6962095067981563, | |
| "learning_rate": 9.958688549461573e-06, | |
| "loss": 0.0712, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.20564149226569609, | |
| "grad_norm": 1.155217046291275, | |
| "learning_rate": 9.958504993670158e-06, | |
| "loss": 0.1049, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.20609645131938126, | |
| "grad_norm": 1.0913314226134752, | |
| "learning_rate": 9.958321032692107e-06, | |
| "loss": 0.1226, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.20655141037306643, | |
| "grad_norm": 22.735025633907238, | |
| "learning_rate": 9.958136666542455e-06, | |
| "loss": 0.8419, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2070063694267516, | |
| "grad_norm": 1.184019553325164, | |
| "learning_rate": 9.957951895236262e-06, | |
| "loss": 0.1113, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.20746132848043677, | |
| "grad_norm": 0.7664792046331882, | |
| "learning_rate": 9.957766718788632e-06, | |
| "loss": 0.104, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.20791628753412192, | |
| "grad_norm": 0.8672883026786035, | |
| "learning_rate": 9.957581137214695e-06, | |
| "loss": 0.074, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.2083712465878071, | |
| "grad_norm": 0.8772220264781722, | |
| "learning_rate": 9.957395150529615e-06, | |
| "loss": 0.0986, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.20882620564149226, | |
| "grad_norm": 0.7016331971826193, | |
| "learning_rate": 9.95720875874859e-06, | |
| "loss": 0.0752, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.20928116469517744, | |
| "grad_norm": 0.6308822051977305, | |
| "learning_rate": 9.957021961886855e-06, | |
| "loss": 0.0608, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2097361237488626, | |
| "grad_norm": 0.9803601042372939, | |
| "learning_rate": 9.956834759959669e-06, | |
| "loss": 0.0908, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.21019108280254778, | |
| "grad_norm": 0.7674462109758159, | |
| "learning_rate": 9.95664715298233e-06, | |
| "loss": 0.074, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.21064604185623295, | |
| "grad_norm": 0.7450186566335193, | |
| "learning_rate": 9.95645914097017e-06, | |
| "loss": 0.0817, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.2111010009099181, | |
| "grad_norm": 0.7225723661612439, | |
| "learning_rate": 9.956270723938553e-06, | |
| "loss": 0.0849, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.21155595996360327, | |
| "grad_norm": 0.7190355211871646, | |
| "learning_rate": 9.956081901902875e-06, | |
| "loss": 0.0748, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.21201091901728844, | |
| "grad_norm": 1.210684562087392, | |
| "learning_rate": 9.955892674878565e-06, | |
| "loss": 0.1272, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.2124658780709736, | |
| "grad_norm": 0.834170476650907, | |
| "learning_rate": 9.955703042881087e-06, | |
| "loss": 0.0992, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.21292083712465878, | |
| "grad_norm": 0.874478173291907, | |
| "learning_rate": 9.955513005925934e-06, | |
| "loss": 0.0858, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.21337579617834396, | |
| "grad_norm": 0.5510320150423565, | |
| "learning_rate": 9.95532256402864e-06, | |
| "loss": 0.0574, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.21383075523202913, | |
| "grad_norm": 0.5657171871822584, | |
| "learning_rate": 9.955131717204762e-06, | |
| "loss": 0.0671, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.21428571428571427, | |
| "grad_norm": 0.7564664653864259, | |
| "learning_rate": 9.954940465469898e-06, | |
| "loss": 0.085, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.21474067333939945, | |
| "grad_norm": 0.7594501005901694, | |
| "learning_rate": 9.954748808839675e-06, | |
| "loss": 0.0733, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.21519563239308462, | |
| "grad_norm": 0.6748092428366178, | |
| "learning_rate": 9.954556747329754e-06, | |
| "loss": 0.0707, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.2156505914467698, | |
| "grad_norm": 1.715089789819449, | |
| "learning_rate": 9.954364280955832e-06, | |
| "loss": 0.1045, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.21610555050045496, | |
| "grad_norm": 0.6668751648778155, | |
| "learning_rate": 9.954171409733634e-06, | |
| "loss": 0.0573, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.21656050955414013, | |
| "grad_norm": 0.5963716475430643, | |
| "learning_rate": 9.95397813367892e-06, | |
| "loss": 0.0752, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.2170154686078253, | |
| "grad_norm": 0.9917190233932158, | |
| "learning_rate": 9.953784452807487e-06, | |
| "loss": 0.1049, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.21747042766151045, | |
| "grad_norm": 0.5638529401686616, | |
| "learning_rate": 9.953590367135159e-06, | |
| "loss": 0.0547, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.21792538671519562, | |
| "grad_norm": 0.6477110515460727, | |
| "learning_rate": 9.953395876677796e-06, | |
| "loss": 0.0564, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.2183803457688808, | |
| "grad_norm": 0.5492055118574499, | |
| "learning_rate": 9.95320098145129e-06, | |
| "loss": 0.0505, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.21883530482256597, | |
| "grad_norm": 0.8954528378372288, | |
| "learning_rate": 9.95300568147157e-06, | |
| "loss": 0.126, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.21929026387625114, | |
| "grad_norm": 0.6155736143826033, | |
| "learning_rate": 9.952809976754593e-06, | |
| "loss": 0.0518, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2197452229299363, | |
| "grad_norm": 1.1486004986445648, | |
| "learning_rate": 9.952613867316351e-06, | |
| "loss": 0.1142, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.22020018198362148, | |
| "grad_norm": 0.8236924325360948, | |
| "learning_rate": 9.95241735317287e-06, | |
| "loss": 0.1047, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.22065514103730663, | |
| "grad_norm": 0.832372102653505, | |
| "learning_rate": 9.952220434340209e-06, | |
| "loss": 0.0729, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2211101000909918, | |
| "grad_norm": 0.7288716722109786, | |
| "learning_rate": 9.952023110834456e-06, | |
| "loss": 0.068, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.22156505914467697, | |
| "grad_norm": 0.5327254294033283, | |
| "learning_rate": 9.951825382671739e-06, | |
| "loss": 0.0614, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.22202001819836215, | |
| "grad_norm": 0.7204991379763186, | |
| "learning_rate": 9.951627249868213e-06, | |
| "loss": 0.0666, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.22247497725204732, | |
| "grad_norm": 0.7485835393026234, | |
| "learning_rate": 9.95142871244007e-06, | |
| "loss": 0.068, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.2229299363057325, | |
| "grad_norm": 0.45602532896445397, | |
| "learning_rate": 9.951229770403531e-06, | |
| "loss": 0.0414, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.22338489535941766, | |
| "grad_norm": 0.7240661348572547, | |
| "learning_rate": 9.951030423774858e-06, | |
| "loss": 0.0798, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.22383985441310283, | |
| "grad_norm": 0.7716352477687572, | |
| "learning_rate": 9.950830672570337e-06, | |
| "loss": 0.071, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.22429481346678798, | |
| "grad_norm": 1.22677184750836, | |
| "learning_rate": 9.95063051680629e-06, | |
| "loss": 0.1373, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.22474977252047315, | |
| "grad_norm": 0.7365431233953595, | |
| "learning_rate": 9.950429956499074e-06, | |
| "loss": 0.0699, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.22520473157415832, | |
| "grad_norm": 0.705654951368504, | |
| "learning_rate": 9.950228991665078e-06, | |
| "loss": 0.0741, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2256596906278435, | |
| "grad_norm": 0.8261497906057415, | |
| "learning_rate": 9.950027622320724e-06, | |
| "loss": 0.0764, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.22611464968152867, | |
| "grad_norm": 0.9965395262255518, | |
| "learning_rate": 9.949825848482465e-06, | |
| "loss": 0.0852, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.22656960873521384, | |
| "grad_norm": 0.6807161957389707, | |
| "learning_rate": 9.949623670166794e-06, | |
| "loss": 0.074, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.227024567788899, | |
| "grad_norm": 1.1216390709095547, | |
| "learning_rate": 9.949421087390228e-06, | |
| "loss": 0.0931, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.22747952684258416, | |
| "grad_norm": 1.1278655216416786, | |
| "learning_rate": 9.949218100169322e-06, | |
| "loss": 0.1177, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.22793448589626933, | |
| "grad_norm": 0.9160591457448575, | |
| "learning_rate": 9.949014708520664e-06, | |
| "loss": 0.1015, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.2283894449499545, | |
| "grad_norm": 0.9377363057118697, | |
| "learning_rate": 9.948810912460872e-06, | |
| "loss": 0.1059, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.22884440400363967, | |
| "grad_norm": 0.8760932101779023, | |
| "learning_rate": 9.948606712006601e-06, | |
| "loss": 0.0812, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.22929936305732485, | |
| "grad_norm": 0.6962605051289937, | |
| "learning_rate": 9.948402107174537e-06, | |
| "loss": 0.0735, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.22975432211101002, | |
| "grad_norm": 0.6501265713488487, | |
| "learning_rate": 9.948197097981401e-06, | |
| "loss": 0.0551, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2302092811646952, | |
| "grad_norm": 1.2156011775652311, | |
| "learning_rate": 9.947991684443942e-06, | |
| "loss": 0.1066, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.23066424021838033, | |
| "grad_norm": 0.9679794435610901, | |
| "learning_rate": 9.947785866578951e-06, | |
| "loss": 0.0981, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.2311191992720655, | |
| "grad_norm": 0.7195724631231237, | |
| "learning_rate": 9.94757964440324e-06, | |
| "loss": 0.0777, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.23157415832575068, | |
| "grad_norm": 0.549427502610929, | |
| "learning_rate": 9.947373017933665e-06, | |
| "loss": 0.0516, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.23202911737943585, | |
| "grad_norm": 0.5667212336170355, | |
| "learning_rate": 9.947165987187108e-06, | |
| "loss": 0.0583, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.23248407643312102, | |
| "grad_norm": 0.6638127935874616, | |
| "learning_rate": 9.946958552180489e-06, | |
| "loss": 0.0723, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.2329390354868062, | |
| "grad_norm": 0.5226768129517959, | |
| "learning_rate": 9.946750712930756e-06, | |
| "loss": 0.0482, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.23339399454049137, | |
| "grad_norm": 0.8358986518129136, | |
| "learning_rate": 9.946542469454894e-06, | |
| "loss": 0.1037, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.2338489535941765, | |
| "grad_norm": 0.6695809647699968, | |
| "learning_rate": 9.94633382176992e-06, | |
| "loss": 0.0728, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.23430391264786168, | |
| "grad_norm": 1.0608546974350634, | |
| "learning_rate": 9.946124769892884e-06, | |
| "loss": 0.1192, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.23475887170154686, | |
| "grad_norm": 0.5090717025630993, | |
| "learning_rate": 9.945915313840869e-06, | |
| "loss": 0.0612, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.23521383075523203, | |
| "grad_norm": 0.8105130307542814, | |
| "learning_rate": 9.94570545363099e-06, | |
| "loss": 0.0838, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.2356687898089172, | |
| "grad_norm": 0.7752986876049957, | |
| "learning_rate": 9.945495189280394e-06, | |
| "loss": 0.092, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.23612374886260237, | |
| "grad_norm": 0.869801315379322, | |
| "learning_rate": 9.945284520806267e-06, | |
| "loss": 0.077, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.23657870791628755, | |
| "grad_norm": 0.5427153243822386, | |
| "learning_rate": 9.94507344822582e-06, | |
| "loss": 0.0592, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2370336669699727, | |
| "grad_norm": 0.7368670007832758, | |
| "learning_rate": 9.944861971556305e-06, | |
| "loss": 0.0608, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.23748862602365786, | |
| "grad_norm": 0.8141430793460733, | |
| "learning_rate": 9.944650090814998e-06, | |
| "loss": 0.0616, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.23794358507734303, | |
| "grad_norm": 2.1096588720516425, | |
| "learning_rate": 9.944437806019216e-06, | |
| "loss": 0.0938, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.2383985441310282, | |
| "grad_norm": 0.7014907085161215, | |
| "learning_rate": 9.944225117186306e-06, | |
| "loss": 0.0812, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.23885350318471338, | |
| "grad_norm": 0.5078467158211916, | |
| "learning_rate": 9.944012024333647e-06, | |
| "loss": 0.0561, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.23930846223839855, | |
| "grad_norm": 0.6379031604907951, | |
| "learning_rate": 9.943798527478652e-06, | |
| "loss": 0.0678, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.23976342129208372, | |
| "grad_norm": 0.799876019099874, | |
| "learning_rate": 9.943584626638768e-06, | |
| "loss": 0.0914, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.24021838034576887, | |
| "grad_norm": 0.6550229607349646, | |
| "learning_rate": 9.943370321831474e-06, | |
| "loss": 0.0668, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.24067333939945404, | |
| "grad_norm": 0.767534839542607, | |
| "learning_rate": 9.943155613074279e-06, | |
| "loss": 0.0711, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.2411282984531392, | |
| "grad_norm": 0.7571838990000624, | |
| "learning_rate": 9.942940500384733e-06, | |
| "loss": 0.0893, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.24158325750682438, | |
| "grad_norm": 17.807000846945513, | |
| "learning_rate": 9.942724983780409e-06, | |
| "loss": 0.3419, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.24203821656050956, | |
| "grad_norm": 1.2088422410181228, | |
| "learning_rate": 9.942509063278922e-06, | |
| "loss": 0.1173, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.24249317561419473, | |
| "grad_norm": 0.8811842157145667, | |
| "learning_rate": 9.942292738897914e-06, | |
| "loss": 0.1006, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.2429481346678799, | |
| "grad_norm": 0.7726281786442553, | |
| "learning_rate": 9.942076010655063e-06, | |
| "loss": 0.0909, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.24340309372156507, | |
| "grad_norm": 0.9942256398778268, | |
| "learning_rate": 9.941858878568078e-06, | |
| "loss": 0.134, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.24385805277525022, | |
| "grad_norm": 1.001596627292525, | |
| "learning_rate": 9.941641342654702e-06, | |
| "loss": 0.0977, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.2443130118289354, | |
| "grad_norm": 0.5064863363900076, | |
| "learning_rate": 9.941423402932713e-06, | |
| "loss": 0.0559, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.24476797088262056, | |
| "grad_norm": 0.8589680374278897, | |
| "learning_rate": 9.94120505941992e-06, | |
| "loss": 0.0992, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.24522292993630573, | |
| "grad_norm": 0.7830880681851201, | |
| "learning_rate": 9.940986312134162e-06, | |
| "loss": 0.0825, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.2456778889899909, | |
| "grad_norm": 0.5778344550660577, | |
| "learning_rate": 9.940767161093316e-06, | |
| "loss": 0.0637, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.24613284804367608, | |
| "grad_norm": 0.8661775200374767, | |
| "learning_rate": 9.94054760631529e-06, | |
| "loss": 0.0958, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.24658780709736125, | |
| "grad_norm": 0.6976226834296251, | |
| "learning_rate": 9.940327647818026e-06, | |
| "loss": 0.0752, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.2470427661510464, | |
| "grad_norm": 0.7530160135685138, | |
| "learning_rate": 9.940107285619495e-06, | |
| "loss": 0.077, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.24749772520473157, | |
| "grad_norm": 0.7997106896354084, | |
| "learning_rate": 9.939886519737707e-06, | |
| "loss": 0.0958, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.24795268425841674, | |
| "grad_norm": 0.8918061918047896, | |
| "learning_rate": 9.939665350190702e-06, | |
| "loss": 0.0822, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.2484076433121019, | |
| "grad_norm": 0.804115756264787, | |
| "learning_rate": 9.93944377699655e-06, | |
| "loss": 0.0915, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.24886260236578708, | |
| "grad_norm": 0.6234057941022288, | |
| "learning_rate": 9.93922180017336e-06, | |
| "loss": 0.0672, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.24931756141947226, | |
| "grad_norm": 0.8269450754551354, | |
| "learning_rate": 9.93899941973927e-06, | |
| "loss": 0.1102, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.24977252047315743, | |
| "grad_norm": 0.9233841316663005, | |
| "learning_rate": 9.93877663571245e-06, | |
| "loss": 0.0963, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.2502274795268426, | |
| "grad_norm": 0.9944861568923805, | |
| "learning_rate": 9.938553448111108e-06, | |
| "loss": 0.1127, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.25068243858052774, | |
| "grad_norm": 0.8423641298780182, | |
| "learning_rate": 9.938329856953482e-06, | |
| "loss": 0.0788, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.25113739763421294, | |
| "grad_norm": 0.8124861649110975, | |
| "learning_rate": 9.938105862257839e-06, | |
| "loss": 0.0831, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.2515923566878981, | |
| "grad_norm": 0.6612222253979325, | |
| "learning_rate": 9.937881464042485e-06, | |
| "loss": 0.0703, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.25204731574158323, | |
| "grad_norm": 0.854447666921162, | |
| "learning_rate": 9.937656662325759e-06, | |
| "loss": 0.1074, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.25250227479526843, | |
| "grad_norm": 0.74521770368624, | |
| "learning_rate": 9.937431457126028e-06, | |
| "loss": 0.0777, | |
| "step": 555 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 10990, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 555, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3666645319680.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |