| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 990, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0030303030303030303, |
| "grad_norm": 6.14251184463501, |
| "learning_rate": 1.0101010101010103e-07, |
| "loss": 0.8811, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006060606060606061, |
| "grad_norm": 5.7579426765441895, |
| "learning_rate": 2.0202020202020205e-07, |
| "loss": 0.8291, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00909090909090909, |
| "grad_norm": 6.182405471801758, |
| "learning_rate": 3.0303030303030305e-07, |
| "loss": 0.8745, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.012121212121212121, |
| "grad_norm": 5.871363162994385, |
| "learning_rate": 4.040404040404041e-07, |
| "loss": 0.8764, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.015151515151515152, |
| "grad_norm": 5.848386287689209, |
| "learning_rate": 5.05050505050505e-07, |
| "loss": 0.8469, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01818181818181818, |
| "grad_norm": 5.616578102111816, |
| "learning_rate": 6.060606060606061e-07, |
| "loss": 0.8085, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.021212121212121213, |
| "grad_norm": 6.289897441864014, |
| "learning_rate": 7.070707070707071e-07, |
| "loss": 0.8985, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.024242424242424242, |
| "grad_norm": 5.57948112487793, |
| "learning_rate": 8.080808080808082e-07, |
| "loss": 0.8366, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02727272727272727, |
| "grad_norm": 5.735244274139404, |
| "learning_rate": 9.090909090909091e-07, |
| "loss": 0.86, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.030303030303030304, |
| "grad_norm": 5.462663173675537, |
| "learning_rate": 1.01010101010101e-06, |
| "loss": 0.8469, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03333333333333333, |
| "grad_norm": 4.708677768707275, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.8173, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03636363636363636, |
| "grad_norm": 4.475161552429199, |
| "learning_rate": 1.2121212121212122e-06, |
| "loss": 0.7915, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03939393939393939, |
| "grad_norm": 4.219878673553467, |
| "learning_rate": 1.3131313131313134e-06, |
| "loss": 0.8007, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04242424242424243, |
| "grad_norm": 2.822401762008667, |
| "learning_rate": 1.4141414141414143e-06, |
| "loss": 0.7783, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.045454545454545456, |
| "grad_norm": 2.4995763301849365, |
| "learning_rate": 1.5151515151515152e-06, |
| "loss": 0.7523, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.048484848484848485, |
| "grad_norm": 2.4066433906555176, |
| "learning_rate": 1.6161616161616164e-06, |
| "loss": 0.7468, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.051515151515151514, |
| "grad_norm": 2.2153358459472656, |
| "learning_rate": 1.7171717171717173e-06, |
| "loss": 0.7509, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05454545454545454, |
| "grad_norm": 2.120013952255249, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 0.7532, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05757575757575758, |
| "grad_norm": 1.9135936498641968, |
| "learning_rate": 1.9191919191919192e-06, |
| "loss": 0.721, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06060606060606061, |
| "grad_norm": 2.663780450820923, |
| "learning_rate": 2.02020202020202e-06, |
| "loss": 0.6962, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06363636363636363, |
| "grad_norm": 2.8611958026885986, |
| "learning_rate": 2.1212121212121216e-06, |
| "loss": 0.6855, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 3.166199207305908, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.7219, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0696969696969697, |
| "grad_norm": 2.878675937652588, |
| "learning_rate": 2.3232323232323234e-06, |
| "loss": 0.687, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.07272727272727272, |
| "grad_norm": 2.6447913646698, |
| "learning_rate": 2.4242424242424244e-06, |
| "loss": 0.6795, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07575757575757576, |
| "grad_norm": 2.3938543796539307, |
| "learning_rate": 2.5252525252525258e-06, |
| "loss": 0.6837, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07878787878787878, |
| "grad_norm": 1.7195990085601807, |
| "learning_rate": 2.6262626262626267e-06, |
| "loss": 0.6984, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08181818181818182, |
| "grad_norm": 1.28241765499115, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 0.6588, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.08484848484848485, |
| "grad_norm": 1.2121256589889526, |
| "learning_rate": 2.8282828282828286e-06, |
| "loss": 0.643, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08787878787878788, |
| "grad_norm": 1.1997158527374268, |
| "learning_rate": 2.9292929292929295e-06, |
| "loss": 0.624, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 1.0956838130950928, |
| "learning_rate": 3.0303030303030305e-06, |
| "loss": 0.6108, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09393939393939393, |
| "grad_norm": 1.0332937240600586, |
| "learning_rate": 3.131313131313132e-06, |
| "loss": 0.6191, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09696969696969697, |
| "grad_norm": 1.1140916347503662, |
| "learning_rate": 3.232323232323233e-06, |
| "loss": 0.6529, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.9233384728431702, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.6009, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.10303030303030303, |
| "grad_norm": 0.8741363883018494, |
| "learning_rate": 3.4343434343434347e-06, |
| "loss": 0.6248, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.10606060606060606, |
| "grad_norm": 0.7766424417495728, |
| "learning_rate": 3.5353535353535356e-06, |
| "loss": 0.6061, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10909090909090909, |
| "grad_norm": 0.7707573771476746, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 0.6274, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.11212121212121212, |
| "grad_norm": 0.7274775505065918, |
| "learning_rate": 3.737373737373738e-06, |
| "loss": 0.6017, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.11515151515151516, |
| "grad_norm": 0.8542383909225464, |
| "learning_rate": 3.8383838383838385e-06, |
| "loss": 0.5802, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.11818181818181818, |
| "grad_norm": 0.8010094165802002, |
| "learning_rate": 3.93939393939394e-06, |
| "loss": 0.5663, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.12121212121212122, |
| "grad_norm": 0.7044663429260254, |
| "learning_rate": 4.04040404040404e-06, |
| "loss": 0.5704, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12424242424242424, |
| "grad_norm": 0.6028207540512085, |
| "learning_rate": 4.141414141414142e-06, |
| "loss": 0.5762, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.12727272727272726, |
| "grad_norm": 0.8017765283584595, |
| "learning_rate": 4.242424242424243e-06, |
| "loss": 0.6181, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1303030303030303, |
| "grad_norm": 0.6999064683914185, |
| "learning_rate": 4.343434343434344e-06, |
| "loss": 0.5961, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 0.6400359272956848, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.5896, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.13636363636363635, |
| "grad_norm": 0.6916729807853699, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.5754, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1393939393939394, |
| "grad_norm": 0.6609205007553101, |
| "learning_rate": 4.646464646464647e-06, |
| "loss": 0.5743, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.14242424242424243, |
| "grad_norm": 0.5354890823364258, |
| "learning_rate": 4.747474747474748e-06, |
| "loss": 0.5791, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.14545454545454545, |
| "grad_norm": 0.5994871854782104, |
| "learning_rate": 4.848484848484849e-06, |
| "loss": 0.5567, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1484848484848485, |
| "grad_norm": 0.5859790444374084, |
| "learning_rate": 4.94949494949495e-06, |
| "loss": 0.5482, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 0.627583384513855, |
| "learning_rate": 5.0505050505050515e-06, |
| "loss": 0.5397, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15454545454545454, |
| "grad_norm": 0.48996925354003906, |
| "learning_rate": 5.151515151515152e-06, |
| "loss": 0.5541, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.15757575757575756, |
| "grad_norm": 0.5651494264602661, |
| "learning_rate": 5.252525252525253e-06, |
| "loss": 0.573, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1606060606060606, |
| "grad_norm": 0.6122561097145081, |
| "learning_rate": 5.353535353535354e-06, |
| "loss": 0.5512, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.16363636363636364, |
| "grad_norm": 0.49054795503616333, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 0.5583, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.5040543079376221, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.5457, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1696969696969697, |
| "grad_norm": 0.5023638606071472, |
| "learning_rate": 5.656565656565657e-06, |
| "loss": 0.5338, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.17272727272727273, |
| "grad_norm": 0.6167340874671936, |
| "learning_rate": 5.7575757575757586e-06, |
| "loss": 0.5328, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.17575757575757575, |
| "grad_norm": 0.5743213295936584, |
| "learning_rate": 5.858585858585859e-06, |
| "loss": 0.5312, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1787878787878788, |
| "grad_norm": 0.46841517090797424, |
| "learning_rate": 5.95959595959596e-06, |
| "loss": 0.5012, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.5114443302154541, |
| "learning_rate": 6.060606060606061e-06, |
| "loss": 0.5377, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18484848484848485, |
| "grad_norm": 0.5205867886543274, |
| "learning_rate": 6.1616161616161615e-06, |
| "loss": 0.5505, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.18787878787878787, |
| "grad_norm": 0.6010080575942993, |
| "learning_rate": 6.262626262626264e-06, |
| "loss": 0.5452, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.19090909090909092, |
| "grad_norm": 0.5454538464546204, |
| "learning_rate": 6.363636363636364e-06, |
| "loss": 0.527, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.19393939393939394, |
| "grad_norm": 0.5522276163101196, |
| "learning_rate": 6.464646464646466e-06, |
| "loss": 0.5355, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.19696969696969696, |
| "grad_norm": 0.5053198933601379, |
| "learning_rate": 6.565656565656566e-06, |
| "loss": 0.5227, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5276966094970703, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.5341, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.20303030303030303, |
| "grad_norm": 0.5439529418945312, |
| "learning_rate": 6.767676767676769e-06, |
| "loss": 0.519, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.20606060606060606, |
| "grad_norm": 0.5188765525817871, |
| "learning_rate": 6.868686868686869e-06, |
| "loss": 0.5015, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.20909090909090908, |
| "grad_norm": 0.5360773801803589, |
| "learning_rate": 6.969696969696971e-06, |
| "loss": 0.5246, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.21212121212121213, |
| "grad_norm": 0.5436122417449951, |
| "learning_rate": 7.070707070707071e-06, |
| "loss": 0.4942, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.21515151515151515, |
| "grad_norm": 0.5304339528083801, |
| "learning_rate": 7.171717171717172e-06, |
| "loss": 0.5126, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.21818181818181817, |
| "grad_norm": 0.5600169897079468, |
| "learning_rate": 7.272727272727273e-06, |
| "loss": 0.5075, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.22121212121212122, |
| "grad_norm": 0.6064160466194153, |
| "learning_rate": 7.373737373737374e-06, |
| "loss": 0.5287, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.22424242424242424, |
| "grad_norm": 0.46182766556739807, |
| "learning_rate": 7.474747474747476e-06, |
| "loss": 0.5357, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.22727272727272727, |
| "grad_norm": 0.5622665882110596, |
| "learning_rate": 7.5757575757575764e-06, |
| "loss": 0.5162, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.23030303030303031, |
| "grad_norm": 0.5098185539245605, |
| "learning_rate": 7.676767676767677e-06, |
| "loss": 0.5201, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.23333333333333334, |
| "grad_norm": 0.5697974562644958, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.5319, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.23636363636363636, |
| "grad_norm": 0.5686485171318054, |
| "learning_rate": 7.87878787878788e-06, |
| "loss": 0.5209, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.23939393939393938, |
| "grad_norm": 0.541465699672699, |
| "learning_rate": 7.97979797979798e-06, |
| "loss": 0.5081, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.24242424242424243, |
| "grad_norm": 0.5638330578804016, |
| "learning_rate": 8.08080808080808e-06, |
| "loss": 0.4961, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.24545454545454545, |
| "grad_norm": 0.5761530995368958, |
| "learning_rate": 8.181818181818183e-06, |
| "loss": 0.5067, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.24848484848484848, |
| "grad_norm": 0.48256605863571167, |
| "learning_rate": 8.282828282828283e-06, |
| "loss": 0.532, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2515151515151515, |
| "grad_norm": 0.6405602693557739, |
| "learning_rate": 8.383838383838384e-06, |
| "loss": 0.5021, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2545454545454545, |
| "grad_norm": 0.5617716908454895, |
| "learning_rate": 8.484848484848486e-06, |
| "loss": 0.4923, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.25757575757575757, |
| "grad_norm": 0.5595013499259949, |
| "learning_rate": 8.585858585858587e-06, |
| "loss": 0.5194, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2606060606060606, |
| "grad_norm": 0.5715003609657288, |
| "learning_rate": 8.686868686868687e-06, |
| "loss": 0.4951, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.2636363636363636, |
| "grad_norm": 0.5606926679611206, |
| "learning_rate": 8.787878787878788e-06, |
| "loss": 0.4966, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 0.5606850981712341, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.4945, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2696969696969697, |
| "grad_norm": 0.5261890292167664, |
| "learning_rate": 8.98989898989899e-06, |
| "loss": 0.5184, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.6513155698776245, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.5364, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.27575757575757576, |
| "grad_norm": 0.501545786857605, |
| "learning_rate": 9.191919191919193e-06, |
| "loss": 0.5108, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2787878787878788, |
| "grad_norm": 0.5412594079971313, |
| "learning_rate": 9.292929292929294e-06, |
| "loss": 0.517, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2818181818181818, |
| "grad_norm": 0.6492443084716797, |
| "learning_rate": 9.393939393939396e-06, |
| "loss": 0.4978, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.28484848484848485, |
| "grad_norm": 0.6265013217926025, |
| "learning_rate": 9.494949494949497e-06, |
| "loss": 0.5387, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2878787878787879, |
| "grad_norm": 0.6805964708328247, |
| "learning_rate": 9.595959595959597e-06, |
| "loss": 0.5024, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2909090909090909, |
| "grad_norm": 0.6327937841415405, |
| "learning_rate": 9.696969696969698e-06, |
| "loss": 0.4757, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.29393939393939394, |
| "grad_norm": 0.6182631850242615, |
| "learning_rate": 9.797979797979798e-06, |
| "loss": 0.5209, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.296969696969697, |
| "grad_norm": 0.564050018787384, |
| "learning_rate": 9.8989898989899e-06, |
| "loss": 0.513, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.5654965043067932, |
| "learning_rate": 1e-05, |
| "loss": 0.5094, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 0.6344062089920044, |
| "learning_rate": 9.99996891979347e-06, |
| "loss": 0.508, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.30606060606060603, |
| "grad_norm": 0.6290592551231384, |
| "learning_rate": 9.999875679560272e-06, |
| "loss": 0.5108, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.3090909090909091, |
| "grad_norm": 0.5513220429420471, |
| "learning_rate": 9.999720280459576e-06, |
| "loss": 0.4855, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.31212121212121213, |
| "grad_norm": 0.6386879682540894, |
| "learning_rate": 9.999502724423316e-06, |
| "loss": 0.4909, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3151515151515151, |
| "grad_norm": 0.555277407169342, |
| "learning_rate": 9.999223014156167e-06, |
| "loss": 0.4931, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3181818181818182, |
| "grad_norm": 0.5689773559570312, |
| "learning_rate": 9.99888115313551e-06, |
| "loss": 0.4886, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3212121212121212, |
| "grad_norm": 0.5847551226615906, |
| "learning_rate": 9.998477145611389e-06, |
| "loss": 0.518, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3242424242424242, |
| "grad_norm": 0.5526836514472961, |
| "learning_rate": 9.99801099660646e-06, |
| "loss": 0.5152, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.32727272727272727, |
| "grad_norm": 0.590153694152832, |
| "learning_rate": 9.997482711915926e-06, |
| "loss": 0.5202, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3303030303030303, |
| "grad_norm": 0.524453341960907, |
| "learning_rate": 9.996892298107466e-06, |
| "loss": 0.5102, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.5427029728889465, |
| "learning_rate": 9.996239762521152e-06, |
| "loss": 0.4645, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.33636363636363636, |
| "grad_norm": 0.5274785757064819, |
| "learning_rate": 9.99552511326936e-06, |
| "loss": 0.5018, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3393939393939394, |
| "grad_norm": 0.5401912927627563, |
| "learning_rate": 9.99474835923667e-06, |
| "loss": 0.4916, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3424242424242424, |
| "grad_norm": 0.5039514303207397, |
| "learning_rate": 9.993909510079752e-06, |
| "loss": 0.4974, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.34545454545454546, |
| "grad_norm": 0.5162566304206848, |
| "learning_rate": 9.993008576227248e-06, |
| "loss": 0.4974, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3484848484848485, |
| "grad_norm": 0.5239473581314087, |
| "learning_rate": 9.99204556887964e-06, |
| "loss": 0.4824, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3515151515151515, |
| "grad_norm": 0.46607157588005066, |
| "learning_rate": 9.991020500009118e-06, |
| "loss": 0.4699, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.35454545454545455, |
| "grad_norm": 0.45448705554008484, |
| "learning_rate": 9.989933382359423e-06, |
| "loss": 0.4797, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3575757575757576, |
| "grad_norm": 0.5446470975875854, |
| "learning_rate": 9.988784229445689e-06, |
| "loss": 0.4839, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3606060606060606, |
| "grad_norm": 0.49788519740104675, |
| "learning_rate": 9.98757305555428e-06, |
| "loss": 0.5038, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.5029739737510681, |
| "learning_rate": 9.986299875742612e-06, |
| "loss": 0.4795, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.36666666666666664, |
| "grad_norm": 0.6325995326042175, |
| "learning_rate": 9.98496470583896e-06, |
| "loss": 0.5298, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3696969696969697, |
| "grad_norm": 0.5292807221412659, |
| "learning_rate": 9.98356756244227e-06, |
| "loss": 0.5072, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.37272727272727274, |
| "grad_norm": 0.6047240495681763, |
| "learning_rate": 9.982108462921938e-06, |
| "loss": 0.497, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.37575757575757573, |
| "grad_norm": 0.64040607213974, |
| "learning_rate": 9.980587425417612e-06, |
| "loss": 0.5047, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3787878787878788, |
| "grad_norm": 0.5388203263282776, |
| "learning_rate": 9.97900446883896e-06, |
| "loss": 0.489, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.38181818181818183, |
| "grad_norm": 0.5968406200408936, |
| "learning_rate": 9.977359612865424e-06, |
| "loss": 0.5043, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.38484848484848483, |
| "grad_norm": 0.5099135637283325, |
| "learning_rate": 9.975652877945991e-06, |
| "loss": 0.4939, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3878787878787879, |
| "grad_norm": 0.5063946843147278, |
| "learning_rate": 9.973884285298932e-06, |
| "loss": 0.4882, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.39090909090909093, |
| "grad_norm": 0.5433374047279358, |
| "learning_rate": 9.972053856911534e-06, |
| "loss": 0.4902, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3939393939393939, |
| "grad_norm": 0.4687306880950928, |
| "learning_rate": 9.970161615539837e-06, |
| "loss": 0.4883, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.396969696969697, |
| "grad_norm": 0.5378535985946655, |
| "learning_rate": 9.96820758470834e-06, |
| "loss": 0.5082, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.49391430616378784, |
| "learning_rate": 9.966191788709716e-06, |
| "loss": 0.4528, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.403030303030303, |
| "grad_norm": 0.4575445055961609, |
| "learning_rate": 9.964114252604508e-06, |
| "loss": 0.4906, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.40606060606060607, |
| "grad_norm": 0.602308452129364, |
| "learning_rate": 9.961975002220816e-06, |
| "loss": 0.4973, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.4090909090909091, |
| "grad_norm": 0.4728878140449524, |
| "learning_rate": 9.959774064153977e-06, |
| "loss": 0.4803, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4121212121212121, |
| "grad_norm": 0.5318324565887451, |
| "learning_rate": 9.957511465766236e-06, |
| "loss": 0.4797, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.41515151515151516, |
| "grad_norm": 0.5328473448753357, |
| "learning_rate": 9.955187235186403e-06, |
| "loss": 0.4889, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.41818181818181815, |
| "grad_norm": 0.4802415370941162, |
| "learning_rate": 9.952801401309504e-06, |
| "loss": 0.5012, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4212121212121212, |
| "grad_norm": 0.5675839781761169, |
| "learning_rate": 9.950353993796424e-06, |
| "loss": 0.4973, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.42424242424242425, |
| "grad_norm": 0.5941027998924255, |
| "learning_rate": 9.947845043073533e-06, |
| "loss": 0.4947, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.42727272727272725, |
| "grad_norm": 0.5009533166885376, |
| "learning_rate": 9.945274580332316e-06, |
| "loss": 0.4963, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4303030303030303, |
| "grad_norm": 0.7182719111442566, |
| "learning_rate": 9.942642637528977e-06, |
| "loss": 0.492, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.43333333333333335, |
| "grad_norm": 0.6205947995185852, |
| "learning_rate": 9.939949247384046e-06, |
| "loss": 0.4953, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.43636363636363634, |
| "grad_norm": 0.581794023513794, |
| "learning_rate": 9.937194443381972e-06, |
| "loss": 0.4804, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4393939393939394, |
| "grad_norm": 0.5637320876121521, |
| "learning_rate": 9.934378259770708e-06, |
| "loss": 0.4819, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.44242424242424244, |
| "grad_norm": 0.5580178499221802, |
| "learning_rate": 9.931500731561279e-06, |
| "loss": 0.4816, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.44545454545454544, |
| "grad_norm": 0.5792706608772278, |
| "learning_rate": 9.928561894527354e-06, |
| "loss": 0.4912, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4484848484848485, |
| "grad_norm": 0.554824948310852, |
| "learning_rate": 9.925561785204797e-06, |
| "loss": 0.5057, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.45151515151515154, |
| "grad_norm": 0.548604428768158, |
| "learning_rate": 9.922500440891217e-06, |
| "loss": 0.4796, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.5710535645484924, |
| "learning_rate": 9.919377899645497e-06, |
| "loss": 0.4948, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4575757575757576, |
| "grad_norm": 0.5213266015052795, |
| "learning_rate": 9.916194200287329e-06, |
| "loss": 0.4715, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.46060606060606063, |
| "grad_norm": 0.5729761719703674, |
| "learning_rate": 9.912949382396728e-06, |
| "loss": 0.4699, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4636363636363636, |
| "grad_norm": 0.5509311556816101, |
| "learning_rate": 9.909643486313533e-06, |
| "loss": 0.4757, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 0.6052528619766235, |
| "learning_rate": 9.906276553136924e-06, |
| "loss": 0.5023, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4696969696969697, |
| "grad_norm": 0.6352577209472656, |
| "learning_rate": 9.902848624724887e-06, |
| "loss": 0.4839, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4727272727272727, |
| "grad_norm": 0.5328971743583679, |
| "learning_rate": 9.899359743693715e-06, |
| "loss": 0.4685, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.47575757575757577, |
| "grad_norm": 0.6209010481834412, |
| "learning_rate": 9.895809953417464e-06, |
| "loss": 0.4765, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.47878787878787876, |
| "grad_norm": 0.5842458605766296, |
| "learning_rate": 9.892199298027416e-06, |
| "loss": 0.492, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4818181818181818, |
| "grad_norm": 0.6122411489486694, |
| "learning_rate": 9.888527822411543e-06, |
| "loss": 0.4992, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 0.6631482839584351, |
| "learning_rate": 9.88479557221393e-06, |
| "loss": 0.5166, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.48787878787878786, |
| "grad_norm": 0.6380733847618103, |
| "learning_rate": 9.881002593834221e-06, |
| "loss": 0.5043, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.4909090909090909, |
| "grad_norm": 0.6373774409294128, |
| "learning_rate": 9.877148934427037e-06, |
| "loss": 0.5015, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.49393939393939396, |
| "grad_norm": 0.5428421497344971, |
| "learning_rate": 9.873234641901387e-06, |
| "loss": 0.5158, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.49696969696969695, |
| "grad_norm": 0.5517195463180542, |
| "learning_rate": 9.869259764920081e-06, |
| "loss": 0.4559, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.6365246772766113, |
| "learning_rate": 9.86522435289912e-06, |
| "loss": 0.4833, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.503030303030303, |
| "grad_norm": 0.49439719319343567, |
| "learning_rate": 9.861128456007076e-06, |
| "loss": 0.451, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5060606060606061, |
| "grad_norm": 0.6125072240829468, |
| "learning_rate": 9.85697212516448e-06, |
| "loss": 0.4971, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.509090909090909, |
| "grad_norm": 0.530681848526001, |
| "learning_rate": 9.85275541204318e-06, |
| "loss": 0.494, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5121212121212121, |
| "grad_norm": 0.4542466402053833, |
| "learning_rate": 9.848478369065703e-06, |
| "loss": 0.4688, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5151515151515151, |
| "grad_norm": 0.5714587569236755, |
| "learning_rate": 9.844141049404598e-06, |
| "loss": 0.4741, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5181818181818182, |
| "grad_norm": 0.5518572330474854, |
| "learning_rate": 9.839743506981783e-06, |
| "loss": 0.4873, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5212121212121212, |
| "grad_norm": 0.5865032076835632, |
| "learning_rate": 9.835285796467867e-06, |
| "loss": 0.4786, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5242424242424243, |
| "grad_norm": 0.5356167554855347, |
| "learning_rate": 9.830767973281477e-06, |
| "loss": 0.4615, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5272727272727272, |
| "grad_norm": 0.5017634630203247, |
| "learning_rate": 9.826190093588564e-06, |
| "loss": 0.4614, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5303030303030303, |
| "grad_norm": 0.5031861662864685, |
| "learning_rate": 9.821552214301705e-06, |
| "loss": 0.4873, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.4886428117752075, |
| "learning_rate": 9.816854393079402e-06, |
| "loss": 0.494, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5363636363636364, |
| "grad_norm": 0.5461684465408325, |
| "learning_rate": 9.812096688325354e-06, |
| "loss": 0.5059, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5393939393939394, |
| "grad_norm": 0.4634988605976105, |
| "learning_rate": 9.80727915918774e-06, |
| "loss": 0.4627, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5424242424242425, |
| "grad_norm": 0.44447776675224304, |
| "learning_rate": 9.802401865558477e-06, |
| "loss": 0.4916, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.46829262375831604, |
| "learning_rate": 9.797464868072489e-06, |
| "loss": 0.5005, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5484848484848485, |
| "grad_norm": 0.49591726064682007, |
| "learning_rate": 9.79246822810693e-06, |
| "loss": 0.4811, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5515151515151515, |
| "grad_norm": 0.5024096369743347, |
| "learning_rate": 9.787412007780445e-06, |
| "loss": 0.4904, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5545454545454546, |
| "grad_norm": 0.5566429495811462, |
| "learning_rate": 9.78229626995238e-06, |
| "loss": 0.4729, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5575757575757576, |
| "grad_norm": 0.5653529167175293, |
| "learning_rate": 9.777121078222015e-06, |
| "loss": 0.4747, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5606060606060606, |
| "grad_norm": 0.6398766040802002, |
| "learning_rate": 9.771886496927756e-06, |
| "loss": 0.4572, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5636363636363636, |
| "grad_norm": 0.5789164304733276, |
| "learning_rate": 9.766592591146353e-06, |
| "loss": 0.4813, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5666666666666667, |
| "grad_norm": 0.5712085366249084, |
| "learning_rate": 9.761239426692077e-06, |
| "loss": 0.4846, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5696969696969697, |
| "grad_norm": 0.5621404051780701, |
| "learning_rate": 9.755827070115915e-06, |
| "loss": 0.5014, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5727272727272728, |
| "grad_norm": 0.5247191190719604, |
| "learning_rate": 9.750355588704728e-06, |
| "loss": 0.4582, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5757575757575758, |
| "grad_norm": 0.5438876748085022, |
| "learning_rate": 9.744825050480425e-06, |
| "loss": 0.4495, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5787878787878787, |
| "grad_norm": 0.512938380241394, |
| "learning_rate": 9.739235524199117e-06, |
| "loss": 0.4682, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5818181818181818, |
| "grad_norm": 0.5813847184181213, |
| "learning_rate": 9.733587079350254e-06, |
| "loss": 0.4841, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5848484848484848, |
| "grad_norm": 0.5673701167106628, |
| "learning_rate": 9.727879786155767e-06, |
| "loss": 0.4633, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5878787878787879, |
| "grad_norm": 0.5400328040122986, |
| "learning_rate": 9.7221137155692e-06, |
| "loss": 0.4608, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5909090909090909, |
| "grad_norm": 0.47881829738616943, |
| "learning_rate": 9.716288939274818e-06, |
| "loss": 0.4873, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.593939393939394, |
| "grad_norm": 0.5495237112045288, |
| "learning_rate": 9.710405529686722e-06, |
| "loss": 0.4739, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5969696969696969, |
| "grad_norm": 0.4859870374202728, |
| "learning_rate": 9.704463559947944e-06, |
| "loss": 0.4646, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.5082584619522095, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.4722, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.603030303030303, |
| "grad_norm": 0.5721731781959534, |
| "learning_rate": 9.692404236229684e-06, |
| "loss": 0.4751, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 0.5285456776618958, |
| "learning_rate": 9.686287032172712e-06, |
| "loss": 0.4754, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6090909090909091, |
| "grad_norm": 0.4908636510372162, |
| "learning_rate": 9.680111567808212e-06, |
| "loss": 0.4788, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6121212121212121, |
| "grad_norm": 0.6461144089698792, |
| "learning_rate": 9.673877919910069e-06, |
| "loss": 0.4782, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6151515151515151, |
| "grad_norm": 0.535092830657959, |
| "learning_rate": 9.667586165975507e-06, |
| "loss": 0.4753, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6181818181818182, |
| "grad_norm": 0.6900569796562195, |
| "learning_rate": 9.66123638422413e-06, |
| "loss": 0.4644, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6212121212121212, |
| "grad_norm": 0.5520806908607483, |
| "learning_rate": 9.65482865359695e-06, |
| "loss": 0.4667, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6242424242424243, |
| "grad_norm": 0.6971328258514404, |
| "learning_rate": 9.648363053755406e-06, |
| "loss": 0.4874, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6272727272727273, |
| "grad_norm": 0.5781907439231873, |
| "learning_rate": 9.641839665080363e-06, |
| "loss": 0.4733, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6303030303030303, |
| "grad_norm": 0.5814110636711121, |
| "learning_rate": 9.635258568671135e-06, |
| "loss": 0.4882, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6333333333333333, |
| "grad_norm": 0.5779009461402893, |
| "learning_rate": 9.628619846344453e-06, |
| "loss": 0.468, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.6527352333068848, |
| "learning_rate": 9.621923580633462e-06, |
| "loss": 0.4706, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6393939393939394, |
| "grad_norm": 0.6215603351593018, |
| "learning_rate": 9.615169854786688e-06, |
| "loss": 0.4663, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6424242424242425, |
| "grad_norm": 0.5920988321304321, |
| "learning_rate": 9.608358752767013e-06, |
| "loss": 0.4747, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6454545454545455, |
| "grad_norm": 0.5126884579658508, |
| "learning_rate": 9.601490359250616e-06, |
| "loss": 0.4635, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6484848484848484, |
| "grad_norm": 0.5980071425437927, |
| "learning_rate": 9.594564759625936e-06, |
| "loss": 0.4874, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6515151515151515, |
| "grad_norm": 0.6158670783042908, |
| "learning_rate": 9.587582039992598e-06, |
| "loss": 0.4775, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6545454545454545, |
| "grad_norm": 0.6119588017463684, |
| "learning_rate": 9.580542287160348e-06, |
| "loss": 0.4727, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6575757575757576, |
| "grad_norm": 0.533804178237915, |
| "learning_rate": 9.573445588647978e-06, |
| "loss": 0.4448, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6606060606060606, |
| "grad_norm": 0.6370444297790527, |
| "learning_rate": 9.566292032682228e-06, |
| "loss": 0.4698, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6636363636363637, |
| "grad_norm": 0.6251326203346252, |
| "learning_rate": 9.559081708196696e-06, |
| "loss": 0.4641, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.4930393099784851, |
| "learning_rate": 9.551814704830734e-06, |
| "loss": 0.4629, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6696969696969697, |
| "grad_norm": 0.6046044230461121, |
| "learning_rate": 9.544491112928327e-06, |
| "loss": 0.4745, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6727272727272727, |
| "grad_norm": 0.5868167877197266, |
| "learning_rate": 9.537111023536973e-06, |
| "loss": 0.4862, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6757575757575758, |
| "grad_norm": 0.4980459213256836, |
| "learning_rate": 9.529674528406556e-06, |
| "loss": 0.4836, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6787878787878788, |
| "grad_norm": 0.5966117978096008, |
| "learning_rate": 9.522181719988196e-06, |
| "loss": 0.4719, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6818181818181818, |
| "grad_norm": 0.5293205380439758, |
| "learning_rate": 9.514632691433108e-06, |
| "loss": 0.4874, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6848484848484848, |
| "grad_norm": 0.4588055908679962, |
| "learning_rate": 9.507027536591436e-06, |
| "loss": 0.4798, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6878787878787879, |
| "grad_norm": 0.4997522234916687, |
| "learning_rate": 9.499366350011093e-06, |
| "loss": 0.4543, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6909090909090909, |
| "grad_norm": 0.5391511917114258, |
| "learning_rate": 9.491649226936586e-06, |
| "loss": 0.483, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.693939393939394, |
| "grad_norm": 0.525338351726532, |
| "learning_rate": 9.483876263307825e-06, |
| "loss": 0.4771, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.696969696969697, |
| "grad_norm": 0.47747671604156494, |
| "learning_rate": 9.476047555758938e-06, |
| "loss": 0.4869, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.4778452515602112, |
| "learning_rate": 9.468163201617063e-06, |
| "loss": 0.47, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.703030303030303, |
| "grad_norm": 0.7326351404190063, |
| "learning_rate": 9.460223298901138e-06, |
| "loss": 0.4854, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.706060606060606, |
| "grad_norm": 0.5560402274131775, |
| "learning_rate": 9.452227946320697e-06, |
| "loss": 0.4767, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7090909090909091, |
| "grad_norm": 0.6292885541915894, |
| "learning_rate": 9.444177243274619e-06, |
| "loss": 0.4675, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7121212121212122, |
| "grad_norm": 0.5762522220611572, |
| "learning_rate": 9.436071289849909e-06, |
| "loss": 0.4809, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7151515151515152, |
| "grad_norm": 0.5423203110694885, |
| "learning_rate": 9.42791018682045e-06, |
| "loss": 0.4767, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7181818181818181, |
| "grad_norm": 0.6047458052635193, |
| "learning_rate": 9.419694035645753e-06, |
| "loss": 0.4682, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7212121212121212, |
| "grad_norm": 0.4815622568130493, |
| "learning_rate": 9.411422938469683e-06, |
| "loss": 0.4563, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7242424242424242, |
| "grad_norm": 0.4963318705558777, |
| "learning_rate": 9.403096998119206e-06, |
| "loss": 0.4552, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.5713395476341248, |
| "learning_rate": 9.394716318103098e-06, |
| "loss": 0.4446, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7303030303030303, |
| "grad_norm": 0.5686458349227905, |
| "learning_rate": 9.386281002610669e-06, |
| "loss": 0.4611, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 0.6181318759918213, |
| "learning_rate": 9.377791156510456e-06, |
| "loss": 0.4524, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7363636363636363, |
| "grad_norm": 0.5240927934646606, |
| "learning_rate": 9.369246885348926e-06, |
| "loss": 0.4558, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7393939393939394, |
| "grad_norm": 0.6781958341598511, |
| "learning_rate": 9.360648295349165e-06, |
| "loss": 0.4712, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7424242424242424, |
| "grad_norm": 0.553178608417511, |
| "learning_rate": 9.351995493409556e-06, |
| "loss": 0.4772, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7454545454545455, |
| "grad_norm": 0.5585768222808838, |
| "learning_rate": 9.343288587102444e-06, |
| "loss": 0.4679, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7484848484848485, |
| "grad_norm": 0.5500451326370239, |
| "learning_rate": 9.334527684672809e-06, |
| "loss": 0.4862, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7515151515151515, |
| "grad_norm": 0.5181669592857361, |
| "learning_rate": 9.325712895036916e-06, |
| "loss": 0.476, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7545454545454545, |
| "grad_norm": 0.5939860343933105, |
| "learning_rate": 9.316844327780955e-06, |
| "loss": 0.4764, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.46427208185195923, |
| "learning_rate": 9.307922093159688e-06, |
| "loss": 0.4702, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7606060606060606, |
| "grad_norm": 0.47566163539886475, |
| "learning_rate": 9.298946302095074e-06, |
| "loss": 0.4544, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7636363636363637, |
| "grad_norm": 0.5817909836769104, |
| "learning_rate": 9.289917066174887e-06, |
| "loss": 0.4495, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7666666666666667, |
| "grad_norm": 0.544357180595398, |
| "learning_rate": 9.280834497651334e-06, |
| "loss": 0.4721, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7696969696969697, |
| "grad_norm": 0.5736443996429443, |
| "learning_rate": 9.271698709439658e-06, |
| "loss": 0.4719, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7727272727272727, |
| "grad_norm": 0.5211672186851501, |
| "learning_rate": 9.262509815116732e-06, |
| "loss": 0.4758, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7757575757575758, |
| "grad_norm": 0.5502645373344421, |
| "learning_rate": 9.253267928919652e-06, |
| "loss": 0.4706, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7787878787878788, |
| "grad_norm": 0.49078571796417236, |
| "learning_rate": 9.243973165744306e-06, |
| "loss": 0.4553, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7818181818181819, |
| "grad_norm": 0.5883756279945374, |
| "learning_rate": 9.234625641143962e-06, |
| "loss": 0.4508, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7848484848484848, |
| "grad_norm": 0.5382703542709351, |
| "learning_rate": 9.225225471327815e-06, |
| "loss": 0.4647, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7878787878787878, |
| "grad_norm": 0.5602476000785828, |
| "learning_rate": 9.215772773159556e-06, |
| "loss": 0.4871, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7909090909090909, |
| "grad_norm": 0.5052843689918518, |
| "learning_rate": 9.206267664155906e-06, |
| "loss": 0.4474, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.793939393939394, |
| "grad_norm": 0.720058023929596, |
| "learning_rate": 9.196710262485168e-06, |
| "loss": 0.4899, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.796969696969697, |
| "grad_norm": 0.564479410648346, |
| "learning_rate": 9.187100686965749e-06, |
| "loss": 0.4789, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.5418474078178406, |
| "learning_rate": 9.177439057064684e-06, |
| "loss": 0.4816, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.803030303030303, |
| "grad_norm": 0.49409568309783936, |
| "learning_rate": 9.167725492896153e-06, |
| "loss": 0.4764, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.806060606060606, |
| "grad_norm": 0.5403966903686523, |
| "learning_rate": 9.157960115219993e-06, |
| "loss": 0.487, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8090909090909091, |
| "grad_norm": 0.5134335160255432, |
| "learning_rate": 9.148143045440181e-06, |
| "loss": 0.4652, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8121212121212121, |
| "grad_norm": 0.4440280497074127, |
| "learning_rate": 9.138274405603342e-06, |
| "loss": 0.4638, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8151515151515152, |
| "grad_norm": 0.5449512600898743, |
| "learning_rate": 9.128354318397223e-06, |
| "loss": 0.4619, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.6011729836463928, |
| "learning_rate": 9.118382907149164e-06, |
| "loss": 0.4817, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8212121212121212, |
| "grad_norm": 0.49820375442504883, |
| "learning_rate": 9.108360295824576e-06, |
| "loss": 0.4436, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8242424242424242, |
| "grad_norm": 0.5298901796340942, |
| "learning_rate": 9.098286609025392e-06, |
| "loss": 0.4925, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8272727272727273, |
| "grad_norm": 0.49690747261047363, |
| "learning_rate": 9.088161971988517e-06, |
| "loss": 0.4512, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8303030303030303, |
| "grad_norm": 0.4673530161380768, |
| "learning_rate": 9.077986510584273e-06, |
| "loss": 0.4775, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.5070790648460388, |
| "learning_rate": 9.067760351314838e-06, |
| "loss": 0.4772, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8363636363636363, |
| "grad_norm": 0.5117365121841431, |
| "learning_rate": 9.057483621312671e-06, |
| "loss": 0.4577, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8393939393939394, |
| "grad_norm": 0.5128085613250732, |
| "learning_rate": 9.047156448338927e-06, |
| "loss": 0.4464, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.8424242424242424, |
| "grad_norm": 0.4917905628681183, |
| "learning_rate": 9.036778960781874e-06, |
| "loss": 0.4682, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8454545454545455, |
| "grad_norm": 0.5163683891296387, |
| "learning_rate": 9.026351287655294e-06, |
| "loss": 0.4475, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.8484848484848485, |
| "grad_norm": 0.4706951379776001, |
| "learning_rate": 9.01587355859688e-06, |
| "loss": 0.4527, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8515151515151516, |
| "grad_norm": 0.5465824604034424, |
| "learning_rate": 9.005345903866627e-06, |
| "loss": 0.4636, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.8545454545454545, |
| "grad_norm": 0.4861038327217102, |
| "learning_rate": 8.994768454345207e-06, |
| "loss": 0.459, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8575757575757575, |
| "grad_norm": 0.5132073760032654, |
| "learning_rate": 8.984141341532346e-06, |
| "loss": 0.4323, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8606060606060606, |
| "grad_norm": 0.48941031098365784, |
| "learning_rate": 8.973464697545191e-06, |
| "loss": 0.4689, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8636363636363636, |
| "grad_norm": 0.5348917841911316, |
| "learning_rate": 8.96273865511666e-06, |
| "loss": 0.4598, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 0.47538742423057556, |
| "learning_rate": 8.951963347593797e-06, |
| "loss": 0.4761, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8696969696969697, |
| "grad_norm": 0.5884383320808411, |
| "learning_rate": 8.941138908936118e-06, |
| "loss": 0.4705, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8727272727272727, |
| "grad_norm": 0.5642337203025818, |
| "learning_rate": 8.930265473713939e-06, |
| "loss": 0.4701, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8757575757575757, |
| "grad_norm": 0.5007638931274414, |
| "learning_rate": 8.9193431771067e-06, |
| "loss": 0.4705, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.8787878787878788, |
| "grad_norm": 0.5597187876701355, |
| "learning_rate": 8.908372154901302e-06, |
| "loss": 0.4762, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8818181818181818, |
| "grad_norm": 0.5402477383613586, |
| "learning_rate": 8.897352543490396e-06, |
| "loss": 0.4683, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.8848484848484849, |
| "grad_norm": 0.49998798966407776, |
| "learning_rate": 8.8862844798707e-06, |
| "loss": 0.4806, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8878787878787879, |
| "grad_norm": 0.5093072056770325, |
| "learning_rate": 8.875168101641294e-06, |
| "loss": 0.4809, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8909090909090909, |
| "grad_norm": 0.4708082377910614, |
| "learning_rate": 8.864003547001916e-06, |
| "loss": 0.4241, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.8939393939393939, |
| "grad_norm": 0.5265612602233887, |
| "learning_rate": 8.852790954751229e-06, |
| "loss": 0.4352, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.896969696969697, |
| "grad_norm": 0.45366954803466797, |
| "learning_rate": 8.841530464285105e-06, |
| "loss": 0.4517, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.49041837453842163, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.4538, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9030303030303031, |
| "grad_norm": 0.4863709509372711, |
| "learning_rate": 8.81886634926567e-06, |
| "loss": 0.4656, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.906060606060606, |
| "grad_norm": 0.48961129784584045, |
| "learning_rate": 8.807463006474514e-06, |
| "loss": 0.4439, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.5433361530303955, |
| "learning_rate": 8.796012328988716e-06, |
| "loss": 0.4766, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9121212121212121, |
| "grad_norm": 0.48235633969306946, |
| "learning_rate": 8.78451445916405e-06, |
| "loss": 0.4461, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.9151515151515152, |
| "grad_norm": 0.532062292098999, |
| "learning_rate": 8.772969539942981e-06, |
| "loss": 0.4732, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.9181818181818182, |
| "grad_norm": 0.45535221695899963, |
| "learning_rate": 8.7613777148529e-06, |
| "loss": 0.4664, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.9212121212121213, |
| "grad_norm": 0.4744938313961029, |
| "learning_rate": 8.749739128004329e-06, |
| "loss": 0.4818, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.9242424242424242, |
| "grad_norm": 0.5316322445869446, |
| "learning_rate": 8.738053924089149e-06, |
| "loss": 0.4609, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9272727272727272, |
| "grad_norm": 0.5303252935409546, |
| "learning_rate": 8.726322248378775e-06, |
| "loss": 0.4457, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9303030303030303, |
| "grad_norm": 0.5524774193763733, |
| "learning_rate": 8.714544246722369e-06, |
| "loss": 0.474, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 0.49604108929634094, |
| "learning_rate": 8.702720065545024e-06, |
| "loss": 0.455, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.9363636363636364, |
| "grad_norm": 0.5008924603462219, |
| "learning_rate": 8.690849851845933e-06, |
| "loss": 0.4653, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.9393939393939394, |
| "grad_norm": 0.5711862444877625, |
| "learning_rate": 8.678933753196577e-06, |
| "loss": 0.4701, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9424242424242424, |
| "grad_norm": 0.44712546467781067, |
| "learning_rate": 8.666971917738876e-06, |
| "loss": 0.4547, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.9454545454545454, |
| "grad_norm": 0.5717918872833252, |
| "learning_rate": 8.65496449418336e-06, |
| "loss": 0.4591, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.9484848484848485, |
| "grad_norm": 0.5090660452842712, |
| "learning_rate": 8.642911631807306e-06, |
| "loss": 0.4589, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.9515151515151515, |
| "grad_norm": 0.4903143346309662, |
| "learning_rate": 8.630813480452898e-06, |
| "loss": 0.4571, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.9545454545454546, |
| "grad_norm": 0.6317908763885498, |
| "learning_rate": 8.61867019052535e-06, |
| "loss": 0.4663, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9575757575757575, |
| "grad_norm": 0.4388335943222046, |
| "learning_rate": 8.606481912991052e-06, |
| "loss": 0.4626, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9606060606060606, |
| "grad_norm": 0.5695369839668274, |
| "learning_rate": 8.594248799375671e-06, |
| "loss": 0.462, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.9636363636363636, |
| "grad_norm": 0.518403947353363, |
| "learning_rate": 8.581971001762287e-06, |
| "loss": 0.4578, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.9666666666666667, |
| "grad_norm": 0.5362844467163086, |
| "learning_rate": 8.569648672789496e-06, |
| "loss": 0.4529, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.9696969696969697, |
| "grad_norm": 0.499873548746109, |
| "learning_rate": 8.557281965649508e-06, |
| "loss": 0.46, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9727272727272728, |
| "grad_norm": 0.5259467363357544, |
| "learning_rate": 8.54487103408625e-06, |
| "loss": 0.4496, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9757575757575757, |
| "grad_norm": 0.48420971632003784, |
| "learning_rate": 8.532416032393447e-06, |
| "loss": 0.4633, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9787878787878788, |
| "grad_norm": 0.49494659900665283, |
| "learning_rate": 8.51991711541271e-06, |
| "loss": 0.4509, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9818181818181818, |
| "grad_norm": 0.49319571256637573, |
| "learning_rate": 8.507374438531606e-06, |
| "loss": 0.4639, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9848484848484849, |
| "grad_norm": 0.48394539952278137, |
| "learning_rate": 8.494788157681733e-06, |
| "loss": 0.4536, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9878787878787879, |
| "grad_norm": 0.5943865776062012, |
| "learning_rate": 8.482158429336769e-06, |
| "loss": 0.4599, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.990909090909091, |
| "grad_norm": 0.4566466808319092, |
| "learning_rate": 8.469485410510545e-06, |
| "loss": 0.4593, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.9939393939393939, |
| "grad_norm": 0.5179756283760071, |
| "learning_rate": 8.456769258755078e-06, |
| "loss": 0.453, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.996969696969697, |
| "grad_norm": 0.6326143145561218, |
| "learning_rate": 8.444010132158614e-06, |
| "loss": 0.4932, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.48770657181739807, |
| "learning_rate": 8.43120818934367e-06, |
| "loss": 0.4645, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.003030303030303, |
| "grad_norm": 0.47949346899986267, |
| "learning_rate": 8.418363589465055e-06, |
| "loss": 0.4263, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.006060606060606, |
| "grad_norm": 0.5179343819618225, |
| "learning_rate": 8.405476492207902e-06, |
| "loss": 0.3961, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.009090909090909, |
| "grad_norm": 0.505066990852356, |
| "learning_rate": 8.392547057785662e-06, |
| "loss": 0.4154, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.0121212121212122, |
| "grad_norm": 0.44572776556015015, |
| "learning_rate": 8.379575446938136e-06, |
| "loss": 0.4076, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.0151515151515151, |
| "grad_norm": 0.4563922584056854, |
| "learning_rate": 8.366561820929457e-06, |
| "loss": 0.3917, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.018181818181818, |
| "grad_norm": 0.46021389961242676, |
| "learning_rate": 8.353506341546106e-06, |
| "loss": 0.4092, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.0212121212121212, |
| "grad_norm": 0.5978601574897766, |
| "learning_rate": 8.340409171094874e-06, |
| "loss": 0.436, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.0242424242424242, |
| "grad_norm": 0.4990571439266205, |
| "learning_rate": 8.32727047240087e-06, |
| "loss": 0.4089, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.0272727272727273, |
| "grad_norm": 0.5260419249534607, |
| "learning_rate": 8.314090408805481e-06, |
| "loss": 0.4224, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.0303030303030303, |
| "grad_norm": 0.5074383020401001, |
| "learning_rate": 8.300869144164346e-06, |
| "loss": 0.4389, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0333333333333334, |
| "grad_norm": 0.4625466465950012, |
| "learning_rate": 8.28760684284532e-06, |
| "loss": 0.4164, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.0363636363636364, |
| "grad_norm": 0.4897065758705139, |
| "learning_rate": 8.274303669726427e-06, |
| "loss": 0.4112, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.0393939393939393, |
| "grad_norm": 0.46699315309524536, |
| "learning_rate": 8.260959790193815e-06, |
| "loss": 0.4313, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.0424242424242425, |
| "grad_norm": 0.44992008805274963, |
| "learning_rate": 8.247575370139695e-06, |
| "loss": 0.4215, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.0454545454545454, |
| "grad_norm": 0.48453715443611145, |
| "learning_rate": 8.234150575960288e-06, |
| "loss": 0.3819, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.0484848484848486, |
| "grad_norm": 0.46452537178993225, |
| "learning_rate": 8.220685574553739e-06, |
| "loss": 0.3959, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.0515151515151515, |
| "grad_norm": 0.5160481333732605, |
| "learning_rate": 8.207180533318061e-06, |
| "loss": 0.4091, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.0545454545454545, |
| "grad_norm": 0.5158939957618713, |
| "learning_rate": 8.193635620149041e-06, |
| "loss": 0.4102, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.0575757575757576, |
| "grad_norm": 0.4726713299751282, |
| "learning_rate": 8.180051003438158e-06, |
| "loss": 0.4367, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.0606060606060606, |
| "grad_norm": 0.5599554777145386, |
| "learning_rate": 8.16642685207049e-06, |
| "loss": 0.4145, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0636363636363637, |
| "grad_norm": 0.5167244672775269, |
| "learning_rate": 8.152763335422612e-06, |
| "loss": 0.4276, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 0.4800064265727997, |
| "learning_rate": 8.139060623360494e-06, |
| "loss": 0.3964, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.0696969696969698, |
| "grad_norm": 0.5460079312324524, |
| "learning_rate": 8.125318886237382e-06, |
| "loss": 0.4498, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.0727272727272728, |
| "grad_norm": 0.5078116655349731, |
| "learning_rate": 8.111538294891684e-06, |
| "loss": 0.4302, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.0757575757575757, |
| "grad_norm": 0.5369091033935547, |
| "learning_rate": 8.097719020644855e-06, |
| "loss": 0.4339, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.0787878787878789, |
| "grad_norm": 0.47746509313583374, |
| "learning_rate": 8.083861235299253e-06, |
| "loss": 0.4207, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.0818181818181818, |
| "grad_norm": 0.4835416376590729, |
| "learning_rate": 8.06996511113601e-06, |
| "loss": 0.4294, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.084848484848485, |
| "grad_norm": 0.47259601950645447, |
| "learning_rate": 8.05603082091289e-06, |
| "loss": 0.4291, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.087878787878788, |
| "grad_norm": 0.43381235003471375, |
| "learning_rate": 8.04205853786214e-06, |
| "loss": 0.3999, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 0.469703733921051, |
| "learning_rate": 8.028048435688333e-06, |
| "loss": 0.4106, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.093939393939394, |
| "grad_norm": 0.44487234950065613, |
| "learning_rate": 8.014000688566224e-06, |
| "loss": 0.3955, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.096969696969697, |
| "grad_norm": 0.444295197725296, |
| "learning_rate": 7.999915471138562e-06, |
| "loss": 0.4258, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.4215056300163269, |
| "learning_rate": 7.985792958513932e-06, |
| "loss": 0.4327, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.103030303030303, |
| "grad_norm": 0.5015363097190857, |
| "learning_rate": 7.971633326264581e-06, |
| "loss": 0.4093, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.106060606060606, |
| "grad_norm": 0.4623165428638458, |
| "learning_rate": 7.957436750424223e-06, |
| "loss": 0.4187, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.1090909090909091, |
| "grad_norm": 0.5218795537948608, |
| "learning_rate": 7.943203407485864e-06, |
| "loss": 0.4277, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.112121212121212, |
| "grad_norm": 0.46455255150794983, |
| "learning_rate": 7.928933474399601e-06, |
| "loss": 0.435, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.1151515151515152, |
| "grad_norm": 0.5003970265388489, |
| "learning_rate": 7.91462712857042e-06, |
| "loss": 0.432, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.1181818181818182, |
| "grad_norm": 0.46695271134376526, |
| "learning_rate": 7.900284547855992e-06, |
| "loss": 0.407, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.121212121212121, |
| "grad_norm": 0.45484066009521484, |
| "learning_rate": 7.885905910564466e-06, |
| "loss": 0.4107, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1242424242424243, |
| "grad_norm": 0.49817416071891785, |
| "learning_rate": 7.87149139545225e-06, |
| "loss": 0.4096, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.1272727272727272, |
| "grad_norm": 0.4525980055332184, |
| "learning_rate": 7.857041181721788e-06, |
| "loss": 0.4368, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.1303030303030304, |
| "grad_norm": 0.5966557860374451, |
| "learning_rate": 7.842555449019326e-06, |
| "loss": 0.4166, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.1333333333333333, |
| "grad_norm": 0.42670103907585144, |
| "learning_rate": 7.828034377432694e-06, |
| "loss": 0.4333, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.1363636363636362, |
| "grad_norm": 0.5341598987579346, |
| "learning_rate": 7.813478147489052e-06, |
| "loss": 0.4469, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.1393939393939394, |
| "grad_norm": 0.5007196068763733, |
| "learning_rate": 7.798886940152654e-06, |
| "loss": 0.4077, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.1424242424242423, |
| "grad_norm": 0.5085919499397278, |
| "learning_rate": 7.784260936822592e-06, |
| "loss": 0.454, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.1454545454545455, |
| "grad_norm": 0.523694634437561, |
| "learning_rate": 7.769600319330553e-06, |
| "loss": 0.431, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.1484848484848484, |
| "grad_norm": 0.4709513783454895, |
| "learning_rate": 7.75490526993854e-06, |
| "loss": 0.4246, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.1515151515151516, |
| "grad_norm": 0.4955805540084839, |
| "learning_rate": 7.740175971336624e-06, |
| "loss": 0.4506, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1545454545454545, |
| "grad_norm": 0.4655817449092865, |
| "learning_rate": 7.725412606640658e-06, |
| "loss": 0.4353, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.1575757575757575, |
| "grad_norm": 0.46022751927375793, |
| "learning_rate": 7.710615359390018e-06, |
| "loss": 0.4161, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.1606060606060606, |
| "grad_norm": 0.5289224982261658, |
| "learning_rate": 7.6957844135453e-06, |
| "loss": 0.43, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.1636363636363636, |
| "grad_norm": 0.5375812649726868, |
| "learning_rate": 7.680919953486047e-06, |
| "loss": 0.4231, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.1666666666666667, |
| "grad_norm": 0.5493893027305603, |
| "learning_rate": 7.666022164008458e-06, |
| "loss": 0.4442, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.1696969696969697, |
| "grad_norm": 0.4879584014415741, |
| "learning_rate": 7.651091230323079e-06, |
| "loss": 0.4197, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.1727272727272728, |
| "grad_norm": 0.5651521682739258, |
| "learning_rate": 7.636127338052513e-06, |
| "loss": 0.4246, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.1757575757575758, |
| "grad_norm": 0.4888427257537842, |
| "learning_rate": 7.621130673229105e-06, |
| "loss": 0.438, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.1787878787878787, |
| "grad_norm": 0.5484802722930908, |
| "learning_rate": 7.606101422292629e-06, |
| "loss": 0.4293, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.1818181818181819, |
| "grad_norm": 0.4991911053657532, |
| "learning_rate": 7.5910397720879785e-06, |
| "loss": 0.4191, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.1848484848484848, |
| "grad_norm": 0.49404406547546387, |
| "learning_rate": 7.575945909862829e-06, |
| "loss": 0.4173, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.187878787878788, |
| "grad_norm": 0.5090894103050232, |
| "learning_rate": 7.5608200232653254e-06, |
| "loss": 0.4404, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.190909090909091, |
| "grad_norm": 0.5557565093040466, |
| "learning_rate": 7.545662300341736e-06, |
| "loss": 0.4463, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.1939393939393939, |
| "grad_norm": 0.5056024193763733, |
| "learning_rate": 7.530472929534126e-06, |
| "loss": 0.4203, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.196969696969697, |
| "grad_norm": 0.479216068983078, |
| "learning_rate": 7.515252099678011e-06, |
| "loss": 0.4024, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.5049172639846802, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.4178, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.2030303030303031, |
| "grad_norm": 0.4803786873817444, |
| "learning_rate": 7.484716820115461e-06, |
| "loss": 0.4151, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.206060606060606, |
| "grad_norm": 0.48565956950187683, |
| "learning_rate": 7.469402750026147e-06, |
| "loss": 0.3898, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.209090909090909, |
| "grad_norm": 0.5321947932243347, |
| "learning_rate": 7.454057980117842e-06, |
| "loss": 0.4268, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.2121212121212122, |
| "grad_norm": 0.4305421710014343, |
| "learning_rate": 7.438682701157993e-06, |
| "loss": 0.4046, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.215151515151515, |
| "grad_norm": 0.5250205993652344, |
| "learning_rate": 7.423277104293338e-06, |
| "loss": 0.4104, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.2181818181818183, |
| "grad_norm": 0.4442998170852661, |
| "learning_rate": 7.407841381047533e-06, |
| "loss": 0.4201, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.2212121212121212, |
| "grad_norm": 0.44337543845176697, |
| "learning_rate": 7.392375723318761e-06, |
| "loss": 0.4325, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.2242424242424241, |
| "grad_norm": 0.4957628548145294, |
| "learning_rate": 7.376880323377357e-06, |
| "loss": 0.4318, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.2272727272727273, |
| "grad_norm": 0.4373667538166046, |
| "learning_rate": 7.361355373863415e-06, |
| "loss": 0.427, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.2303030303030302, |
| "grad_norm": 0.48402732610702515, |
| "learning_rate": 7.345801067784388e-06, |
| "loss": 0.4319, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.2333333333333334, |
| "grad_norm": 0.43947574496269226, |
| "learning_rate": 7.330217598512696e-06, |
| "loss": 0.4327, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.2363636363636363, |
| "grad_norm": 0.4845799207687378, |
| "learning_rate": 7.314605159783313e-06, |
| "loss": 0.4284, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.2393939393939393, |
| "grad_norm": 0.4592854380607605, |
| "learning_rate": 7.298963945691371e-06, |
| "loss": 0.4347, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.2424242424242424, |
| "grad_norm": 0.4303816556930542, |
| "learning_rate": 7.283294150689735e-06, |
| "loss": 0.4342, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2454545454545454, |
| "grad_norm": 0.46144893765449524, |
| "learning_rate": 7.2675959695865896e-06, |
| "loss": 0.4362, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.2484848484848485, |
| "grad_norm": 0.5031886100769043, |
| "learning_rate": 7.251869597543019e-06, |
| "loss": 0.4114, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.2515151515151515, |
| "grad_norm": 0.40648922324180603, |
| "learning_rate": 7.2361152300705795e-06, |
| "loss": 0.397, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.2545454545454544, |
| "grad_norm": 0.4165934920310974, |
| "learning_rate": 7.2203330630288714e-06, |
| "loss": 0.4176, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.2575757575757576, |
| "grad_norm": 0.4433961510658264, |
| "learning_rate": 7.2045232926230965e-06, |
| "loss": 0.4051, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.2606060606060607, |
| "grad_norm": 0.4428854286670685, |
| "learning_rate": 7.188686115401628e-06, |
| "loss": 0.4109, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.2636363636363637, |
| "grad_norm": 0.4384450316429138, |
| "learning_rate": 7.172821728253563e-06, |
| "loss": 0.4092, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.2666666666666666, |
| "grad_norm": 0.5005664825439453, |
| "learning_rate": 7.156930328406268e-06, |
| "loss": 0.4125, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.2696969696969698, |
| "grad_norm": 0.46654659509658813, |
| "learning_rate": 7.141012113422942e-06, |
| "loss": 0.419, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.2727272727272727, |
| "grad_norm": 0.44787606596946716, |
| "learning_rate": 7.1250672812001505e-06, |
| "loss": 0.4199, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.2757575757575759, |
| "grad_norm": 0.5100336670875549, |
| "learning_rate": 7.109096029965362e-06, |
| "loss": 0.4294, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.2787878787878788, |
| "grad_norm": 0.4483383297920227, |
| "learning_rate": 7.093098558274494e-06, |
| "loss": 0.4091, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.2818181818181817, |
| "grad_norm": 0.4621780216693878, |
| "learning_rate": 7.0770750650094335e-06, |
| "loss": 0.4448, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.284848484848485, |
| "grad_norm": 0.4376955032348633, |
| "learning_rate": 7.061025749375572e-06, |
| "loss": 0.4014, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.2878787878787878, |
| "grad_norm": 0.4119098484516144, |
| "learning_rate": 7.044950810899332e-06, |
| "loss": 0.4251, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.290909090909091, |
| "grad_norm": 0.4029601514339447, |
| "learning_rate": 7.02885044942567e-06, |
| "loss": 0.4169, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.293939393939394, |
| "grad_norm": 0.40098485350608826, |
| "learning_rate": 7.012724865115615e-06, |
| "loss": 0.42, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.2969696969696969, |
| "grad_norm": 0.45420369505882263, |
| "learning_rate": 6.996574258443761e-06, |
| "loss": 0.4084, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.44172826409339905, |
| "learning_rate": 6.980398830195785e-06, |
| "loss": 0.4141, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.303030303030303, |
| "grad_norm": 0.42184752225875854, |
| "learning_rate": 6.964198781465948e-06, |
| "loss": 0.444, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.3060606060606061, |
| "grad_norm": 0.47455379366874695, |
| "learning_rate": 6.947974313654592e-06, |
| "loss": 0.4211, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.309090909090909, |
| "grad_norm": 0.4578656852245331, |
| "learning_rate": 6.931725628465643e-06, |
| "loss": 0.4372, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.312121212121212, |
| "grad_norm": 0.49530062079429626, |
| "learning_rate": 6.9154529279040985e-06, |
| "loss": 0.4534, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.3151515151515152, |
| "grad_norm": 0.42749521136283875, |
| "learning_rate": 6.899156414273514e-06, |
| "loss": 0.4357, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.3181818181818181, |
| "grad_norm": 0.3963139057159424, |
| "learning_rate": 6.882836290173493e-06, |
| "loss": 0.4072, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.3212121212121213, |
| "grad_norm": 0.4464685618877411, |
| "learning_rate": 6.866492758497171e-06, |
| "loss": 0.3984, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.3242424242424242, |
| "grad_norm": 0.4427390992641449, |
| "learning_rate": 6.850126022428678e-06, |
| "loss": 0.4386, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.3272727272727272, |
| "grad_norm": 0.4357937276363373, |
| "learning_rate": 6.833736285440632e-06, |
| "loss": 0.4021, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.3303030303030303, |
| "grad_norm": 0.4386712610721588, |
| "learning_rate": 6.817323751291598e-06, |
| "loss": 0.4415, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.4566034972667694, |
| "learning_rate": 6.800888624023552e-06, |
| "loss": 0.3911, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.3363636363636364, |
| "grad_norm": 0.4068944454193115, |
| "learning_rate": 6.78443110795936e-06, |
| "loss": 0.392, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.3393939393939394, |
| "grad_norm": 0.49135804176330566, |
| "learning_rate": 6.767951407700217e-06, |
| "loss": 0.4302, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.3424242424242423, |
| "grad_norm": 0.43315646052360535, |
| "learning_rate": 6.75144972812312e-06, |
| "loss": 0.428, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.3454545454545455, |
| "grad_norm": 0.4170973598957062, |
| "learning_rate": 6.734926274378313e-06, |
| "loss": 0.4313, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.3484848484848486, |
| "grad_norm": 0.4999806582927704, |
| "learning_rate": 6.7183812518867365e-06, |
| "loss": 0.417, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.3515151515151516, |
| "grad_norm": 0.4573524594306946, |
| "learning_rate": 6.701814866337477e-06, |
| "loss": 0.4292, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.3545454545454545, |
| "grad_norm": 0.458232045173645, |
| "learning_rate": 6.685227323685209e-06, |
| "loss": 0.4202, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.3575757575757577, |
| "grad_norm": 0.4656910002231598, |
| "learning_rate": 6.668618830147634e-06, |
| "loss": 0.3984, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.3606060606060606, |
| "grad_norm": 0.4599440097808838, |
| "learning_rate": 6.651989592202913e-06, |
| "loss": 0.4037, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.4676293134689331, |
| "learning_rate": 6.635339816587109e-06, |
| "loss": 0.4133, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3666666666666667, |
| "grad_norm": 0.4331243634223938, |
| "learning_rate": 6.618669710291607e-06, |
| "loss": 0.4101, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.3696969696969696, |
| "grad_norm": 0.4712773561477661, |
| "learning_rate": 6.601979480560543e-06, |
| "loss": 0.4254, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.3727272727272728, |
| "grad_norm": 0.41984647512435913, |
| "learning_rate": 6.5852693348882345e-06, |
| "loss": 0.4276, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.3757575757575757, |
| "grad_norm": 0.4325491786003113, |
| "learning_rate": 6.568539481016593e-06, |
| "loss": 0.4147, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.378787878787879, |
| "grad_norm": 0.4297987222671509, |
| "learning_rate": 6.551790126932543e-06, |
| "loss": 0.4157, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.3818181818181818, |
| "grad_norm": 0.4344542920589447, |
| "learning_rate": 6.535021480865439e-06, |
| "loss": 0.4204, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.3848484848484848, |
| "grad_norm": 0.3979593515396118, |
| "learning_rate": 6.5182337512844725e-06, |
| "loss": 0.422, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.387878787878788, |
| "grad_norm": 0.4758434593677521, |
| "learning_rate": 6.501427146896087e-06, |
| "loss": 0.4221, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.3909090909090909, |
| "grad_norm": 0.4996965527534485, |
| "learning_rate": 6.484601876641375e-06, |
| "loss": 0.4045, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.393939393939394, |
| "grad_norm": 0.4164718985557556, |
| "learning_rate": 6.467758149693486e-06, |
| "loss": 0.4302, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.396969696969697, |
| "grad_norm": 0.4488878548145294, |
| "learning_rate": 6.450896175455027e-06, |
| "loss": 0.4217, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.4654311239719391, |
| "learning_rate": 6.434016163555452e-06, |
| "loss": 0.4089, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.403030303030303, |
| "grad_norm": 0.435861736536026, |
| "learning_rate": 6.417118323848465e-06, |
| "loss": 0.46, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.406060606060606, |
| "grad_norm": 0.4406987726688385, |
| "learning_rate": 6.400202866409405e-06, |
| "loss": 0.4263, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.4090909090909092, |
| "grad_norm": 0.4633047580718994, |
| "learning_rate": 6.383270001532636e-06, |
| "loss": 0.4009, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.412121212121212, |
| "grad_norm": 0.44259321689605713, |
| "learning_rate": 6.366319939728934e-06, |
| "loss": 0.3968, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.415151515151515, |
| "grad_norm": 0.46387195587158203, |
| "learning_rate": 6.3493528917228664e-06, |
| "loss": 0.4453, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.4181818181818182, |
| "grad_norm": 0.4395183026790619, |
| "learning_rate": 6.332369068450175e-06, |
| "loss": 0.412, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.4212121212121211, |
| "grad_norm": 0.5252392292022705, |
| "learning_rate": 6.315368681055157e-06, |
| "loss": 0.4286, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.4242424242424243, |
| "grad_norm": 0.4034399390220642, |
| "learning_rate": 6.29835194088803e-06, |
| "loss": 0.4491, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.4272727272727272, |
| "grad_norm": 0.46798762679100037, |
| "learning_rate": 6.2813190595023135e-06, |
| "loss": 0.3974, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.4303030303030302, |
| "grad_norm": 0.4931304454803467, |
| "learning_rate": 6.264270248652199e-06, |
| "loss": 0.4235, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.4333333333333333, |
| "grad_norm": 0.42226365208625793, |
| "learning_rate": 6.247205720289907e-06, |
| "loss": 0.3972, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.4363636363636363, |
| "grad_norm": 0.4756371080875397, |
| "learning_rate": 6.230125686563068e-06, |
| "loss": 0.4113, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.4393939393939394, |
| "grad_norm": 0.487985223531723, |
| "learning_rate": 6.213030359812069e-06, |
| "loss": 0.4119, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.4424242424242424, |
| "grad_norm": 0.4072030484676361, |
| "learning_rate": 6.195919952567426e-06, |
| "loss": 0.4205, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.4454545454545453, |
| "grad_norm": 0.5348814129829407, |
| "learning_rate": 6.178794677547138e-06, |
| "loss": 0.4297, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.4484848484848485, |
| "grad_norm": 0.4264541268348694, |
| "learning_rate": 6.161654747654033e-06, |
| "loss": 0.4111, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.4515151515151516, |
| "grad_norm": 0.4194830358028412, |
| "learning_rate": 6.14450037597314e-06, |
| "loss": 0.4151, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.4842208921909332, |
| "learning_rate": 6.127331775769023e-06, |
| "loss": 0.4336, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.4575757575757575, |
| "grad_norm": 0.4886646866798401, |
| "learning_rate": 6.110149160483139e-06, |
| "loss": 0.3926, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.4606060606060607, |
| "grad_norm": 0.47493505477905273, |
| "learning_rate": 6.092952743731179e-06, |
| "loss": 0.4072, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.4636363636363636, |
| "grad_norm": 0.45231956243515015, |
| "learning_rate": 6.07574273930042e-06, |
| "loss": 0.4128, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.4666666666666668, |
| "grad_norm": 0.4732300937175751, |
| "learning_rate": 6.058519361147055e-06, |
| "loss": 0.4063, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.4696969696969697, |
| "grad_norm": 0.45487740635871887, |
| "learning_rate": 6.041282823393546e-06, |
| "loss": 0.3982, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.4727272727272727, |
| "grad_norm": 0.40932172536849976, |
| "learning_rate": 6.024033340325954e-06, |
| "loss": 0.4367, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.4757575757575758, |
| "grad_norm": 0.51799076795578, |
| "learning_rate": 6.006771126391278e-06, |
| "loss": 0.4075, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.4787878787878788, |
| "grad_norm": 0.46568235754966736, |
| "learning_rate": 5.989496396194787e-06, |
| "loss": 0.4234, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.481818181818182, |
| "grad_norm": 0.39532041549682617, |
| "learning_rate": 5.972209364497355e-06, |
| "loss": 0.4265, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.4848484848484849, |
| "grad_norm": 0.42684099078178406, |
| "learning_rate": 5.954910246212787e-06, |
| "loss": 0.4404, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.4878787878787878, |
| "grad_norm": 0.469150185585022, |
| "learning_rate": 5.937599256405151e-06, |
| "loss": 0.4242, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.490909090909091, |
| "grad_norm": 0.4257190525531769, |
| "learning_rate": 5.920276610286102e-06, |
| "loss": 0.4154, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.493939393939394, |
| "grad_norm": 0.4545654058456421, |
| "learning_rate": 5.90294252321221e-06, |
| "loss": 0.4277, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.496969696969697, |
| "grad_norm": 0.4450318217277527, |
| "learning_rate": 5.885597210682273e-06, |
| "loss": 0.4402, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.46171948313713074, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.4083, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.503030303030303, |
| "grad_norm": 0.49079129099845886, |
| "learning_rate": 5.850873771944581e-06, |
| "loss": 0.4058, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.506060606060606, |
| "grad_norm": 0.43085163831710815, |
| "learning_rate": 5.833496077421485e-06, |
| "loss": 0.4304, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.509090909090909, |
| "grad_norm": 0.45978105068206787, |
| "learning_rate": 5.816108020806297e-06, |
| "loss": 0.406, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.5121212121212122, |
| "grad_norm": 0.5006170868873596, |
| "learning_rate": 5.798709818268775e-06, |
| "loss": 0.4051, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.5151515151515151, |
| "grad_norm": 0.47631126642227173, |
| "learning_rate": 5.781301686104808e-06, |
| "loss": 0.4223, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.518181818181818, |
| "grad_norm": 0.4285683333873749, |
| "learning_rate": 5.763883840733736e-06, |
| "loss": 0.4246, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.5212121212121212, |
| "grad_norm": 0.4717022478580475, |
| "learning_rate": 5.746456498695648e-06, |
| "loss": 0.4322, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.5242424242424244, |
| "grad_norm": 0.43785741925239563, |
| "learning_rate": 5.729019876648704e-06, |
| "loss": 0.4181, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.5272727272727273, |
| "grad_norm": 0.4523105025291443, |
| "learning_rate": 5.711574191366427e-06, |
| "loss": 0.3962, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.5303030303030303, |
| "grad_norm": 0.46577563881874084, |
| "learning_rate": 5.694119659735018e-06, |
| "loss": 0.4185, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 0.48579537868499756, |
| "learning_rate": 5.6766564987506564e-06, |
| "loss": 0.4373, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.5363636363636364, |
| "grad_norm": 0.3950510621070862, |
| "learning_rate": 5.659184925516802e-06, |
| "loss": 0.4182, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.5393939393939395, |
| "grad_norm": 0.3853079080581665, |
| "learning_rate": 5.641705157241497e-06, |
| "loss": 0.4022, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.5424242424242425, |
| "grad_norm": 0.40813571214675903, |
| "learning_rate": 5.624217411234667e-06, |
| "loss": 0.4155, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.5454545454545454, |
| "grad_norm": 0.4753287434577942, |
| "learning_rate": 5.60672190490541e-06, |
| "loss": 0.3863, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.5484848484848484, |
| "grad_norm": 0.47361883521080017, |
| "learning_rate": 5.58921885575931e-06, |
| "loss": 0.4325, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.5515151515151515, |
| "grad_norm": 0.3923182487487793, |
| "learning_rate": 5.571708481395719e-06, |
| "loss": 0.4046, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.5545454545454547, |
| "grad_norm": 0.43248647451400757, |
| "learning_rate": 5.5541909995050554e-06, |
| "loss": 0.4127, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.5575757575757576, |
| "grad_norm": 0.4522246718406677, |
| "learning_rate": 5.536666627866104e-06, |
| "loss": 0.4001, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.5606060606060606, |
| "grad_norm": 0.4674279987812042, |
| "learning_rate": 5.519135584343301e-06, |
| "loss": 0.4105, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.5636363636363635, |
| "grad_norm": 0.488587886095047, |
| "learning_rate": 5.5015980868840254e-06, |
| "loss": 0.424, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.5666666666666667, |
| "grad_norm": 0.45722997188568115, |
| "learning_rate": 5.484054353515896e-06, |
| "loss": 0.4318, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.5696969696969698, |
| "grad_norm": 0.4935101568698883, |
| "learning_rate": 5.466504602344055e-06, |
| "loss": 0.4218, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.5727272727272728, |
| "grad_norm": 0.47893407940864563, |
| "learning_rate": 5.448949051548459e-06, |
| "loss": 0.4053, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.5757575757575757, |
| "grad_norm": 0.41225844621658325, |
| "learning_rate": 5.431387919381166e-06, |
| "loss": 0.397, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.5787878787878786, |
| "grad_norm": 0.4375216066837311, |
| "learning_rate": 5.41382142416362e-06, |
| "loss": 0.41, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.5818181818181818, |
| "grad_norm": 0.5372921228408813, |
| "learning_rate": 5.396249784283943e-06, |
| "loss": 0.4104, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.584848484848485, |
| "grad_norm": 0.4583219885826111, |
| "learning_rate": 5.3786732181942135e-06, |
| "loss": 0.4085, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.587878787878788, |
| "grad_norm": 0.37842702865600586, |
| "learning_rate": 5.361091944407751e-06, |
| "loss": 0.4033, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.5909090909090908, |
| "grad_norm": 0.44202497601509094, |
| "learning_rate": 5.343506181496405e-06, |
| "loss": 0.4111, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.593939393939394, |
| "grad_norm": 0.4649200141429901, |
| "learning_rate": 5.3259161480878354e-06, |
| "loss": 0.4304, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.596969696969697, |
| "grad_norm": 0.41630348563194275, |
| "learning_rate": 5.308322062862786e-06, |
| "loss": 0.436, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.4201316833496094, |
| "learning_rate": 5.290724144552379e-06, |
| "loss": 0.3961, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.603030303030303, |
| "grad_norm": 0.4639657437801361, |
| "learning_rate": 5.2731226119353915e-06, |
| "loss": 0.4089, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.606060606060606, |
| "grad_norm": 0.4132433533668518, |
| "learning_rate": 5.255517683835528e-06, |
| "loss": 0.4272, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.6090909090909091, |
| "grad_norm": 0.49520424008369446, |
| "learning_rate": 5.237909579118713e-06, |
| "loss": 0.4125, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.612121212121212, |
| "grad_norm": 0.4128758907318115, |
| "learning_rate": 5.220298516690353e-06, |
| "loss": 0.4158, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.6151515151515152, |
| "grad_norm": 0.46247929334640503, |
| "learning_rate": 5.202684715492635e-06, |
| "loss": 0.4225, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.6181818181818182, |
| "grad_norm": 0.47613954544067383, |
| "learning_rate": 5.185068394501791e-06, |
| "loss": 0.4263, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.621212121212121, |
| "grad_norm": 0.39713340997695923, |
| "learning_rate": 5.1674497727253766e-06, |
| "loss": 0.4323, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.6242424242424243, |
| "grad_norm": 0.424244225025177, |
| "learning_rate": 5.149829069199555e-06, |
| "loss": 0.4082, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.6272727272727274, |
| "grad_norm": 0.41466566920280457, |
| "learning_rate": 5.132206502986368e-06, |
| "loss": 0.4023, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.6303030303030304, |
| "grad_norm": 0.38547638058662415, |
| "learning_rate": 5.114582293171012e-06, |
| "loss": 0.4127, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.6333333333333333, |
| "grad_norm": 0.404697984457016, |
| "learning_rate": 5.096956658859122e-06, |
| "loss": 0.4204, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.6363636363636362, |
| "grad_norm": 0.4179372489452362, |
| "learning_rate": 5.07932981917404e-06, |
| "loss": 0.4346, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.6393939393939394, |
| "grad_norm": 0.38099920749664307, |
| "learning_rate": 5.061701993254092e-06, |
| "loss": 0.4371, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.6424242424242426, |
| "grad_norm": 0.4002698063850403, |
| "learning_rate": 5.044073400249867e-06, |
| "loss": 0.4108, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.6454545454545455, |
| "grad_norm": 0.43330150842666626, |
| "learning_rate": 5.026444259321489e-06, |
| "loss": 0.4197, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.6484848484848484, |
| "grad_norm": 0.4145337641239166, |
| "learning_rate": 5.008814789635894e-06, |
| "loss": 0.4141, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.6515151515151514, |
| "grad_norm": 0.3934999108314514, |
| "learning_rate": 4.9911852103641065e-06, |
| "loss": 0.3995, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.6545454545454545, |
| "grad_norm": 0.4316452443599701, |
| "learning_rate": 4.973555740678512e-06, |
| "loss": 0.4192, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.6575757575757577, |
| "grad_norm": 0.38727515935897827, |
| "learning_rate": 4.955926599750134e-06, |
| "loss": 0.4063, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.6606060606060606, |
| "grad_norm": 0.46579864621162415, |
| "learning_rate": 4.938298006745909e-06, |
| "loss": 0.4264, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.6636363636363636, |
| "grad_norm": 0.4145257771015167, |
| "learning_rate": 4.9206701808259605e-06, |
| "loss": 0.4188, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.48353952169418335, |
| "learning_rate": 4.903043341140879e-06, |
| "loss": 0.4312, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.6696969696969697, |
| "grad_norm": 0.42981258034706116, |
| "learning_rate": 4.885417706828989e-06, |
| "loss": 0.4192, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.6727272727272728, |
| "grad_norm": 0.4582386612892151, |
| "learning_rate": 4.867793497013634e-06, |
| "loss": 0.4116, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.6757575757575758, |
| "grad_norm": 0.44898802042007446, |
| "learning_rate": 4.850170930800447e-06, |
| "loss": 0.4165, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.6787878787878787, |
| "grad_norm": 0.3963959217071533, |
| "learning_rate": 4.832550227274624e-06, |
| "loss": 0.4181, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.6818181818181817, |
| "grad_norm": 0.45196351408958435, |
| "learning_rate": 4.81493160549821e-06, |
| "loss": 0.4227, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.6848484848484848, |
| "grad_norm": 0.458886057138443, |
| "learning_rate": 4.7973152845073666e-06, |
| "loss": 0.4223, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.687878787878788, |
| "grad_norm": 0.38994601368904114, |
| "learning_rate": 4.779701483309648e-06, |
| "loss": 0.4004, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.690909090909091, |
| "grad_norm": 0.4091901481151581, |
| "learning_rate": 4.762090420881289e-06, |
| "loss": 0.4087, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.6939393939393939, |
| "grad_norm": 0.4189499020576477, |
| "learning_rate": 4.7444823161644725e-06, |
| "loss": 0.4146, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.696969696969697, |
| "grad_norm": 0.41353610157966614, |
| "learning_rate": 4.726877388064609e-06, |
| "loss": 0.4073, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.43993616104125977, |
| "learning_rate": 4.7092758554476215e-06, |
| "loss": 0.4257, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.7030303030303031, |
| "grad_norm": 0.4330032169818878, |
| "learning_rate": 4.691677937137217e-06, |
| "loss": 0.4148, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.706060606060606, |
| "grad_norm": 0.4641442894935608, |
| "learning_rate": 4.674083851912167e-06, |
| "loss": 0.4046, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.709090909090909, |
| "grad_norm": 0.4123784899711609, |
| "learning_rate": 4.6564938185035954e-06, |
| "loss": 0.4219, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.7121212121212122, |
| "grad_norm": 0.4760873317718506, |
| "learning_rate": 4.638908055592252e-06, |
| "loss": 0.4103, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.7151515151515153, |
| "grad_norm": 0.45056799054145813, |
| "learning_rate": 4.62132678180579e-06, |
| "loss": 0.407, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.7181818181818183, |
| "grad_norm": 0.4206244647502899, |
| "learning_rate": 4.603750215716057e-06, |
| "loss": 0.4194, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.7212121212121212, |
| "grad_norm": 0.40745237469673157, |
| "learning_rate": 4.58617857583638e-06, |
| "loss": 0.4139, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.7242424242424241, |
| "grad_norm": 0.38897326588630676, |
| "learning_rate": 4.568612080618836e-06, |
| "loss": 0.3957, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.7272727272727273, |
| "grad_norm": 0.42113196849823, |
| "learning_rate": 4.551050948451542e-06, |
| "loss": 0.4134, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.7303030303030305, |
| "grad_norm": 0.39686131477355957, |
| "learning_rate": 4.533495397655946e-06, |
| "loss": 0.3993, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.7333333333333334, |
| "grad_norm": 0.43916943669319153, |
| "learning_rate": 4.515945646484105e-06, |
| "loss": 0.4104, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.7363636363636363, |
| "grad_norm": 0.4046113193035126, |
| "learning_rate": 4.498401913115975e-06, |
| "loss": 0.4172, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.7393939393939393, |
| "grad_norm": 0.39034318923950195, |
| "learning_rate": 4.4808644156567e-06, |
| "loss": 0.4322, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.7424242424242424, |
| "grad_norm": 0.41494661569595337, |
| "learning_rate": 4.463333372133897e-06, |
| "loss": 0.3948, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.7454545454545456, |
| "grad_norm": 0.40686750411987305, |
| "learning_rate": 4.445809000494945e-06, |
| "loss": 0.4179, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.7484848484848485, |
| "grad_norm": 0.4119640290737152, |
| "learning_rate": 4.428291518604283e-06, |
| "loss": 0.4149, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.7515151515151515, |
| "grad_norm": 0.44735684990882874, |
| "learning_rate": 4.410781144240692e-06, |
| "loss": 0.4385, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.7545454545454544, |
| "grad_norm": 0.47033703327178955, |
| "learning_rate": 4.393278095094591e-06, |
| "loss": 0.435, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.7575757575757576, |
| "grad_norm": 0.4110475182533264, |
| "learning_rate": 4.3757825887653345e-06, |
| "loss": 0.4069, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.7606060606060607, |
| "grad_norm": 0.3891106843948364, |
| "learning_rate": 4.358294842758504e-06, |
| "loss": 0.431, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.7636363636363637, |
| "grad_norm": 0.4366550147533417, |
| "learning_rate": 4.340815074483199e-06, |
| "loss": 0.4322, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.7666666666666666, |
| "grad_norm": 0.40945494174957275, |
| "learning_rate": 4.323343501249346e-06, |
| "loss": 0.4164, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.7696969696969695, |
| "grad_norm": 0.4169653654098511, |
| "learning_rate": 4.305880340264985e-06, |
| "loss": 0.4078, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.7727272727272727, |
| "grad_norm": 0.43987858295440674, |
| "learning_rate": 4.2884258086335755e-06, |
| "loss": 0.4, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.7757575757575759, |
| "grad_norm": 0.434929221868515, |
| "learning_rate": 4.270980123351299e-06, |
| "loss": 0.4413, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.7787878787878788, |
| "grad_norm": 0.4042617380619049, |
| "learning_rate": 4.2535435013043535e-06, |
| "loss": 0.4075, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.7818181818181817, |
| "grad_norm": 0.4270903468132019, |
| "learning_rate": 4.2361161592662655e-06, |
| "loss": 0.4372, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.7848484848484847, |
| "grad_norm": 0.37257200479507446, |
| "learning_rate": 4.218698313895192e-06, |
| "loss": 0.4094, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.7878787878787878, |
| "grad_norm": 0.3988351821899414, |
| "learning_rate": 4.2012901817312255e-06, |
| "loss": 0.4247, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.790909090909091, |
| "grad_norm": 0.4023801386356354, |
| "learning_rate": 4.183891979193703e-06, |
| "loss": 0.4177, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.793939393939394, |
| "grad_norm": 0.3908040225505829, |
| "learning_rate": 4.166503922578516e-06, |
| "loss": 0.4479, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.7969696969696969, |
| "grad_norm": 0.39427265524864197, |
| "learning_rate": 4.149126228055419e-06, |
| "loss": 0.3807, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.3684083819389343, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 0.4074, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.803030303030303, |
| "grad_norm": 0.4389978349208832, |
| "learning_rate": 4.114402789317729e-06, |
| "loss": 0.414, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.8060606060606061, |
| "grad_norm": 0.4645283818244934, |
| "learning_rate": 4.097057476787792e-06, |
| "loss": 0.4244, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.809090909090909, |
| "grad_norm": 0.4098891317844391, |
| "learning_rate": 4.079723389713899e-06, |
| "loss": 0.4185, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.812121212121212, |
| "grad_norm": 0.44468870759010315, |
| "learning_rate": 4.06240074359485e-06, |
| "loss": 0.4144, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.8151515151515152, |
| "grad_norm": 0.40740299224853516, |
| "learning_rate": 4.045089753787214e-06, |
| "loss": 0.4147, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.426737517118454, |
| "learning_rate": 4.027790635502646e-06, |
| "loss": 0.415, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.8212121212121213, |
| "grad_norm": 0.4545744061470032, |
| "learning_rate": 4.010503603805214e-06, |
| "loss": 0.4163, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.8242424242424242, |
| "grad_norm": 0.3721947968006134, |
| "learning_rate": 3.993228873608724e-06, |
| "loss": 0.4145, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.8272727272727272, |
| "grad_norm": 0.4343280494213104, |
| "learning_rate": 3.975966659674048e-06, |
| "loss": 0.4032, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.8303030303030303, |
| "grad_norm": 0.3900534212589264, |
| "learning_rate": 3.958717176606456e-06, |
| "loss": 0.4288, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.8333333333333335, |
| "grad_norm": 0.45665907859802246, |
| "learning_rate": 3.941480638852948e-06, |
| "loss": 0.3978, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.8363636363636364, |
| "grad_norm": 0.3952561616897583, |
| "learning_rate": 3.924257260699583e-06, |
| "loss": 0.4265, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.8393939393939394, |
| "grad_norm": 0.4145510494709015, |
| "learning_rate": 3.907047256268822e-06, |
| "loss": 0.4165, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.8424242424242423, |
| "grad_norm": 0.40336254239082336, |
| "learning_rate": 3.8898508395168645e-06, |
| "loss": 0.4075, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.8454545454545455, |
| "grad_norm": 0.41499027609825134, |
| "learning_rate": 3.872668224230979e-06, |
| "loss": 0.4098, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.8484848484848486, |
| "grad_norm": 0.45877882838249207, |
| "learning_rate": 3.855499624026861e-06, |
| "loss": 0.414, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.8515151515151516, |
| "grad_norm": 0.4164450764656067, |
| "learning_rate": 3.838345252345968e-06, |
| "loss": 0.4238, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.8545454545454545, |
| "grad_norm": 0.41379204392433167, |
| "learning_rate": 3.821205322452863e-06, |
| "loss": 0.4007, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.8575757575757574, |
| "grad_norm": 0.38134729862213135, |
| "learning_rate": 3.804080047432574e-06, |
| "loss": 0.4093, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.8606060606060606, |
| "grad_norm": 0.4260639548301697, |
| "learning_rate": 3.786969640187932e-06, |
| "loss": 0.4107, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.8636363636363638, |
| "grad_norm": 0.3941526710987091, |
| "learning_rate": 3.769874313436933e-06, |
| "loss": 0.4056, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.8666666666666667, |
| "grad_norm": 0.44000816345214844, |
| "learning_rate": 3.752794279710094e-06, |
| "loss": 0.4266, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.8696969696969696, |
| "grad_norm": 0.4201476275920868, |
| "learning_rate": 3.735729751347803e-06, |
| "loss": 0.4383, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.8727272727272726, |
| "grad_norm": 0.3836321234703064, |
| "learning_rate": 3.7186809404976877e-06, |
| "loss": 0.4197, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.8757575757575757, |
| "grad_norm": 0.394233763217926, |
| "learning_rate": 3.701648059111972e-06, |
| "loss": 0.4059, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.878787878787879, |
| "grad_norm": 0.4040839672088623, |
| "learning_rate": 3.6846313189448447e-06, |
| "loss": 0.413, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.8818181818181818, |
| "grad_norm": 0.39362630248069763, |
| "learning_rate": 3.667630931549826e-06, |
| "loss": 0.3943, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.8848484848484848, |
| "grad_norm": 0.36628755927085876, |
| "learning_rate": 3.6506471082771357e-06, |
| "loss": 0.4315, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.887878787878788, |
| "grad_norm": 0.4035452902317047, |
| "learning_rate": 3.6336800602710676e-06, |
| "loss": 0.4104, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.8909090909090909, |
| "grad_norm": 0.3663199543952942, |
| "learning_rate": 3.6167299984673655e-06, |
| "loss": 0.4189, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.893939393939394, |
| "grad_norm": 0.4386783242225647, |
| "learning_rate": 3.5997971335905966e-06, |
| "loss": 0.437, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.896969696969697, |
| "grad_norm": 0.4115349352359772, |
| "learning_rate": 3.582881676151536e-06, |
| "loss": 0.4128, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.3740464150905609, |
| "learning_rate": 3.5659838364445505e-06, |
| "loss": 0.4024, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.903030303030303, |
| "grad_norm": 0.39950650930404663, |
| "learning_rate": 3.549103824544975e-06, |
| "loss": 0.4143, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.906060606060606, |
| "grad_norm": 0.4304462671279907, |
| "learning_rate": 3.5322418503065148e-06, |
| "loss": 0.4266, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.9090909090909092, |
| "grad_norm": 0.385495662689209, |
| "learning_rate": 3.5153981233586277e-06, |
| "loss": 0.4211, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.912121212121212, |
| "grad_norm": 0.37890446186065674, |
| "learning_rate": 3.498572853103915e-06, |
| "loss": 0.4192, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.915151515151515, |
| "grad_norm": 0.3713083267211914, |
| "learning_rate": 3.481766248715528e-06, |
| "loss": 0.4203, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.9181818181818182, |
| "grad_norm": 0.38436827063560486, |
| "learning_rate": 3.4649785191345613e-06, |
| "loss": 0.3995, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.9212121212121214, |
| "grad_norm": 0.3605766296386719, |
| "learning_rate": 3.4482098730674577e-06, |
| "loss": 0.3961, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.9242424242424243, |
| "grad_norm": 0.3878589868545532, |
| "learning_rate": 3.4314605189834076e-06, |
| "loss": 0.4062, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.9272727272727272, |
| "grad_norm": 0.4051166772842407, |
| "learning_rate": 3.4147306651117663e-06, |
| "loss": 0.4083, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.9303030303030302, |
| "grad_norm": 0.3763566017150879, |
| "learning_rate": 3.398020519439459e-06, |
| "loss": 0.3853, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.9333333333333333, |
| "grad_norm": 0.3822113573551178, |
| "learning_rate": 3.3813302897083955e-06, |
| "loss": 0.4061, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.9363636363636365, |
| "grad_norm": 0.42593175172805786, |
| "learning_rate": 3.3646601834128924e-06, |
| "loss": 0.4307, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.9393939393939394, |
| "grad_norm": 0.42011937499046326, |
| "learning_rate": 3.348010407797088e-06, |
| "loss": 0.4172, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.9424242424242424, |
| "grad_norm": 0.38729822635650635, |
| "learning_rate": 3.3313811698523677e-06, |
| "loss": 0.4147, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.9454545454545453, |
| "grad_norm": 0.4373640716075897, |
| "learning_rate": 3.3147726763147913e-06, |
| "loss": 0.4049, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.9484848484848485, |
| "grad_norm": 0.3927765488624573, |
| "learning_rate": 3.298185133662525e-06, |
| "loss": 0.4158, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.9515151515151516, |
| "grad_norm": 0.40047505497932434, |
| "learning_rate": 3.2816187481132655e-06, |
| "loss": 0.4098, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.9545454545454546, |
| "grad_norm": 0.4280295670032501, |
| "learning_rate": 3.2650737256216885e-06, |
| "loss": 0.4316, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.9575757575757575, |
| "grad_norm": 0.41508781909942627, |
| "learning_rate": 3.2485502718768814e-06, |
| "loss": 0.4319, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.9606060606060605, |
| "grad_norm": 0.4096485376358032, |
| "learning_rate": 3.2320485922997842e-06, |
| "loss": 0.4026, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.9636363636363636, |
| "grad_norm": 0.37917032837867737, |
| "learning_rate": 3.2155688920406415e-06, |
| "loss": 0.4001, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.9666666666666668, |
| "grad_norm": 0.4002001881599426, |
| "learning_rate": 3.1991113759764493e-06, |
| "loss": 0.4111, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.9696969696969697, |
| "grad_norm": 0.4140879213809967, |
| "learning_rate": 3.1826762487084053e-06, |
| "loss": 0.4144, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.9727272727272727, |
| "grad_norm": 0.3691096305847168, |
| "learning_rate": 3.16626371455937e-06, |
| "loss": 0.4068, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.9757575757575756, |
| "grad_norm": 0.42086780071258545, |
| "learning_rate": 3.149873977571324e-06, |
| "loss": 0.3825, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.9787878787878788, |
| "grad_norm": 0.4335564374923706, |
| "learning_rate": 3.133507241502832e-06, |
| "loss": 0.4465, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.981818181818182, |
| "grad_norm": 0.4249853491783142, |
| "learning_rate": 3.1171637098265063e-06, |
| "loss": 0.4113, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.9848484848484849, |
| "grad_norm": 0.43183475732803345, |
| "learning_rate": 3.1008435857264862e-06, |
| "loss": 0.4142, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.9878787878787878, |
| "grad_norm": 0.4180411994457245, |
| "learning_rate": 3.0845470720959027e-06, |
| "loss": 0.4122, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.990909090909091, |
| "grad_norm": 0.44475847482681274, |
| "learning_rate": 3.0682743715343565e-06, |
| "loss": 0.3895, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.993939393939394, |
| "grad_norm": 0.4236146807670593, |
| "learning_rate": 3.0520256863454077e-06, |
| "loss": 0.416, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.996969696969697, |
| "grad_norm": 0.40536797046661377, |
| "learning_rate": 3.035801218534054e-06, |
| "loss": 0.421, |
| "step": 659 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.4580300748348236, |
| "learning_rate": 3.019601169804216e-06, |
| "loss": 0.4115, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.003030303030303, |
| "grad_norm": 0.5088746547698975, |
| "learning_rate": 3.00342574155624e-06, |
| "loss": 0.4071, |
| "step": 661 |
| }, |
| { |
| "epoch": 2.006060606060606, |
| "grad_norm": 0.43281951546669006, |
| "learning_rate": 2.9872751348843875e-06, |
| "loss": 0.3828, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.0090909090909093, |
| "grad_norm": 0.40724921226501465, |
| "learning_rate": 2.9711495505743317e-06, |
| "loss": 0.4039, |
| "step": 663 |
| }, |
| { |
| "epoch": 2.012121212121212, |
| "grad_norm": 0.39697274565696716, |
| "learning_rate": 2.9550491891006704e-06, |
| "loss": 0.3864, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.015151515151515, |
| "grad_norm": 0.42503514885902405, |
| "learning_rate": 2.938974250624429e-06, |
| "loss": 0.3684, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.018181818181818, |
| "grad_norm": 0.4273051917552948, |
| "learning_rate": 2.9229249349905686e-06, |
| "loss": 0.3763, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.021212121212121, |
| "grad_norm": 0.41625821590423584, |
| "learning_rate": 2.906901441725507e-06, |
| "loss": 0.4116, |
| "step": 667 |
| }, |
| { |
| "epoch": 2.0242424242424244, |
| "grad_norm": 0.40681958198547363, |
| "learning_rate": 2.8909039700346385e-06, |
| "loss": 0.3684, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.0272727272727273, |
| "grad_norm": 0.4323454797267914, |
| "learning_rate": 2.8749327187998516e-06, |
| "loss": 0.4021, |
| "step": 669 |
| }, |
| { |
| "epoch": 2.0303030303030303, |
| "grad_norm": 0.3722967505455017, |
| "learning_rate": 2.858987886577058e-06, |
| "loss": 0.3634, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.033333333333333, |
| "grad_norm": 0.3991397023200989, |
| "learning_rate": 2.843069671593734e-06, |
| "loss": 0.38, |
| "step": 671 |
| }, |
| { |
| "epoch": 2.036363636363636, |
| "grad_norm": 0.42172548174858093, |
| "learning_rate": 2.8271782717464413e-06, |
| "loss": 0.3739, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.0393939393939395, |
| "grad_norm": 0.39456048607826233, |
| "learning_rate": 2.811313884598373e-06, |
| "loss": 0.3701, |
| "step": 673 |
| }, |
| { |
| "epoch": 2.0424242424242425, |
| "grad_norm": 0.3840007483959198, |
| "learning_rate": 2.795476707376905e-06, |
| "loss": 0.3874, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.0454545454545454, |
| "grad_norm": 0.35414138436317444, |
| "learning_rate": 2.7796669369711294e-06, |
| "loss": 0.3882, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.0484848484848484, |
| "grad_norm": 0.4287824034690857, |
| "learning_rate": 2.7638847699294196e-06, |
| "loss": 0.3977, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.0515151515151517, |
| "grad_norm": 0.402102530002594, |
| "learning_rate": 2.7481304024569823e-06, |
| "loss": 0.3907, |
| "step": 677 |
| }, |
| { |
| "epoch": 2.0545454545454547, |
| "grad_norm": 0.39035528898239136, |
| "learning_rate": 2.7324040304134125e-06, |
| "loss": 0.3939, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.0575757575757576, |
| "grad_norm": 0.3569696843624115, |
| "learning_rate": 2.716705849310265e-06, |
| "loss": 0.3851, |
| "step": 679 |
| }, |
| { |
| "epoch": 2.0606060606060606, |
| "grad_norm": 0.39394411444664, |
| "learning_rate": 2.701036054308629e-06, |
| "loss": 0.3803, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.0636363636363635, |
| "grad_norm": 0.371428519487381, |
| "learning_rate": 2.685394840216688e-06, |
| "loss": 0.3746, |
| "step": 681 |
| }, |
| { |
| "epoch": 2.066666666666667, |
| "grad_norm": 0.3792346715927124, |
| "learning_rate": 2.6697824014873076e-06, |
| "loss": 0.3466, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.06969696969697, |
| "grad_norm": 0.37582284212112427, |
| "learning_rate": 2.654198932215613e-06, |
| "loss": 0.3879, |
| "step": 683 |
| }, |
| { |
| "epoch": 2.0727272727272728, |
| "grad_norm": 0.38081496953964233, |
| "learning_rate": 2.6386446261365874e-06, |
| "loss": 0.3783, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.0757575757575757, |
| "grad_norm": 0.3656942546367645, |
| "learning_rate": 2.623119676622645e-06, |
| "loss": 0.3796, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.0787878787878786, |
| "grad_norm": 0.36368727684020996, |
| "learning_rate": 2.607624276681241e-06, |
| "loss": 0.3497, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.081818181818182, |
| "grad_norm": 0.3967862129211426, |
| "learning_rate": 2.5921586189524694e-06, |
| "loss": 0.3823, |
| "step": 687 |
| }, |
| { |
| "epoch": 2.084848484848485, |
| "grad_norm": 0.37508049607276917, |
| "learning_rate": 2.5767228957066635e-06, |
| "loss": 0.3784, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.087878787878788, |
| "grad_norm": 0.3605888783931732, |
| "learning_rate": 2.561317298842008e-06, |
| "loss": 0.3858, |
| "step": 689 |
| }, |
| { |
| "epoch": 2.090909090909091, |
| "grad_norm": 0.3733239769935608, |
| "learning_rate": 2.5459420198821604e-06, |
| "loss": 0.3648, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.0939393939393938, |
| "grad_norm": 0.3924459218978882, |
| "learning_rate": 2.530597249973856e-06, |
| "loss": 0.3747, |
| "step": 691 |
| }, |
| { |
| "epoch": 2.096969696969697, |
| "grad_norm": 0.37564197182655334, |
| "learning_rate": 2.51528317988454e-06, |
| "loss": 0.3581, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 0.3885200321674347, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.3786, |
| "step": 693 |
| }, |
| { |
| "epoch": 2.103030303030303, |
| "grad_norm": 0.36674779653549194, |
| "learning_rate": 2.4847479003219926e-06, |
| "loss": 0.3707, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.106060606060606, |
| "grad_norm": 0.363193154335022, |
| "learning_rate": 2.4695270704658753e-06, |
| "loss": 0.3921, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.109090909090909, |
| "grad_norm": 0.4087706506252289, |
| "learning_rate": 2.454337699658267e-06, |
| "loss": 0.3795, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.1121212121212123, |
| "grad_norm": 0.37886175513267517, |
| "learning_rate": 2.439179976734677e-06, |
| "loss": 0.3594, |
| "step": 697 |
| }, |
| { |
| "epoch": 2.1151515151515152, |
| "grad_norm": 0.3760082721710205, |
| "learning_rate": 2.4240540901371727e-06, |
| "loss": 0.3462, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.118181818181818, |
| "grad_norm": 0.38874131441116333, |
| "learning_rate": 2.4089602279120224e-06, |
| "loss": 0.374, |
| "step": 699 |
| }, |
| { |
| "epoch": 2.121212121212121, |
| "grad_norm": 0.39027926325798035, |
| "learning_rate": 2.393898577707371e-06, |
| "loss": 0.353, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.124242424242424, |
| "grad_norm": 0.34905844926834106, |
| "learning_rate": 2.3788693267708975e-06, |
| "loss": 0.3712, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.1272727272727274, |
| "grad_norm": 0.3495723307132721, |
| "learning_rate": 2.363872661947488e-06, |
| "loss": 0.3594, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.1303030303030304, |
| "grad_norm": 0.3776390850543976, |
| "learning_rate": 2.3489087696769225e-06, |
| "loss": 0.3803, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.1333333333333333, |
| "grad_norm": 0.3674999177455902, |
| "learning_rate": 2.333977835991545e-06, |
| "loss": 0.3787, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.1363636363636362, |
| "grad_norm": 0.37632232904434204, |
| "learning_rate": 2.319080046513954e-06, |
| "loss": 0.3864, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.1393939393939396, |
| "grad_norm": 0.3536114990711212, |
| "learning_rate": 2.3042155864547024e-06, |
| "loss": 0.3877, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.1424242424242426, |
| "grad_norm": 0.37344491481781006, |
| "learning_rate": 2.2893846406099847e-06, |
| "loss": 0.3827, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.1454545454545455, |
| "grad_norm": 0.34745898842811584, |
| "learning_rate": 2.274587393359342e-06, |
| "loss": 0.3708, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.1484848484848484, |
| "grad_norm": 0.35172003507614136, |
| "learning_rate": 2.2598240286633787e-06, |
| "loss": 0.3932, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.1515151515151514, |
| "grad_norm": 0.35878872871398926, |
| "learning_rate": 2.245094730061463e-06, |
| "loss": 0.3473, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.1545454545454543, |
| "grad_norm": 0.359260231256485, |
| "learning_rate": 2.230399680669449e-06, |
| "loss": 0.397, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.1575757575757577, |
| "grad_norm": 0.3743046522140503, |
| "learning_rate": 2.215739063177409e-06, |
| "loss": 0.3694, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.1606060606060606, |
| "grad_norm": 0.37723642587661743, |
| "learning_rate": 2.2011130598473498e-06, |
| "loss": 0.3703, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.1636363636363636, |
| "grad_norm": 0.3454684615135193, |
| "learning_rate": 2.1865218525109496e-06, |
| "loss": 0.3637, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.1666666666666665, |
| "grad_norm": 0.3447061777114868, |
| "learning_rate": 2.171965622567308e-06, |
| "loss": 0.3653, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.16969696969697, |
| "grad_norm": 0.35390952229499817, |
| "learning_rate": 2.1574445509806764e-06, |
| "loss": 0.3695, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.172727272727273, |
| "grad_norm": 0.3610280752182007, |
| "learning_rate": 2.1429588182782147e-06, |
| "loss": 0.3456, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.175757575757576, |
| "grad_norm": 0.3974767029285431, |
| "learning_rate": 2.1285086045477515e-06, |
| "loss": 0.3989, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.1787878787878787, |
| "grad_norm": 0.38098010420799255, |
| "learning_rate": 2.1140940894355345e-06, |
| "loss": 0.3752, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.1818181818181817, |
| "grad_norm": 0.37288904190063477, |
| "learning_rate": 2.09971545214401e-06, |
| "loss": 0.378, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.184848484848485, |
| "grad_norm": 0.34389159083366394, |
| "learning_rate": 2.0853728714295807e-06, |
| "loss": 0.3581, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.187878787878788, |
| "grad_norm": 0.35233569145202637, |
| "learning_rate": 2.0710665256003994e-06, |
| "loss": 0.3677, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.190909090909091, |
| "grad_norm": 0.37292009592056274, |
| "learning_rate": 2.0567965925141366e-06, |
| "loss": 0.3593, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.193939393939394, |
| "grad_norm": 0.39068883657455444, |
| "learning_rate": 2.0425632495757776e-06, |
| "loss": 0.3873, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.196969696969697, |
| "grad_norm": 0.386063814163208, |
| "learning_rate": 2.028366673735421e-06, |
| "loss": 0.3601, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.410195529460907, |
| "learning_rate": 2.0142070414860704e-06, |
| "loss": 0.3771, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.203030303030303, |
| "grad_norm": 0.34756624698638916, |
| "learning_rate": 2.0000845288614396e-06, |
| "loss": 0.377, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.206060606060606, |
| "grad_norm": 0.4086437523365021, |
| "learning_rate": 1.9859993114337773e-06, |
| "loss": 0.3604, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.209090909090909, |
| "grad_norm": 0.42168623208999634, |
| "learning_rate": 1.971951564311668e-06, |
| "loss": 0.3511, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.212121212121212, |
| "grad_norm": 0.39504092931747437, |
| "learning_rate": 1.9579414621378624e-06, |
| "loss": 0.3804, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.2151515151515153, |
| "grad_norm": 0.3468715250492096, |
| "learning_rate": 1.943969179087112e-06, |
| "loss": 0.3705, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.2181818181818183, |
| "grad_norm": 0.34179291129112244, |
| "learning_rate": 1.9300348888639915e-06, |
| "loss": 0.3765, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.221212121212121, |
| "grad_norm": 0.3874762952327728, |
| "learning_rate": 1.916138764700747e-06, |
| "loss": 0.3444, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.224242424242424, |
| "grad_norm": 0.36602982878685, |
| "learning_rate": 1.902280979355146e-06, |
| "loss": 0.3676, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.227272727272727, |
| "grad_norm": 0.37903892993927, |
| "learning_rate": 1.8884617051083183e-06, |
| "loss": 0.3611, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.2303030303030305, |
| "grad_norm": 0.35178276896476746, |
| "learning_rate": 1.8746811137626208e-06, |
| "loss": 0.3738, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.2333333333333334, |
| "grad_norm": 0.3382170796394348, |
| "learning_rate": 1.8609393766395083e-06, |
| "loss": 0.3325, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.2363636363636363, |
| "grad_norm": 0.3748219907283783, |
| "learning_rate": 1.8472366645773892e-06, |
| "loss": 0.38, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.2393939393939393, |
| "grad_norm": 0.38781046867370605, |
| "learning_rate": 1.8335731479295105e-06, |
| "loss": 0.3989, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.242424242424242, |
| "grad_norm": 0.3649122714996338, |
| "learning_rate": 1.8199489965618433e-06, |
| "loss": 0.3884, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.2454545454545456, |
| "grad_norm": 0.35382363200187683, |
| "learning_rate": 1.8063643798509594e-06, |
| "loss": 0.3715, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.2484848484848485, |
| "grad_norm": 0.37757110595703125, |
| "learning_rate": 1.7928194666819398e-06, |
| "loss": 0.3854, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.2515151515151515, |
| "grad_norm": 0.3414751887321472, |
| "learning_rate": 1.7793144254462601e-06, |
| "loss": 0.3622, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.2545454545454544, |
| "grad_norm": 0.36525917053222656, |
| "learning_rate": 1.7658494240397127e-06, |
| "loss": 0.3773, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.257575757575758, |
| "grad_norm": 0.3339369297027588, |
| "learning_rate": 1.7524246298603053e-06, |
| "loss": 0.3663, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.2606060606060607, |
| "grad_norm": 0.3457212746143341, |
| "learning_rate": 1.739040209806186e-06, |
| "loss": 0.3829, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.2636363636363637, |
| "grad_norm": 0.3451812267303467, |
| "learning_rate": 1.7256963302735752e-06, |
| "loss": 0.3826, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.2666666666666666, |
| "grad_norm": 0.35478025674819946, |
| "learning_rate": 1.7123931571546826e-06, |
| "loss": 0.3446, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.2696969696969695, |
| "grad_norm": 0.3515215218067169, |
| "learning_rate": 1.6991308558356545e-06, |
| "loss": 0.3868, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 0.3347879946231842, |
| "learning_rate": 1.68590959119452e-06, |
| "loss": 0.3574, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.275757575757576, |
| "grad_norm": 0.3563007116317749, |
| "learning_rate": 1.6727295275991311e-06, |
| "loss": 0.3976, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.278787878787879, |
| "grad_norm": 0.3718889057636261, |
| "learning_rate": 1.6595908289051266e-06, |
| "loss": 0.38, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.2818181818181817, |
| "grad_norm": 0.35888758301734924, |
| "learning_rate": 1.646493658453896e-06, |
| "loss": 0.3813, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.2848484848484847, |
| "grad_norm": 0.34056735038757324, |
| "learning_rate": 1.6334381790705439e-06, |
| "loss": 0.3976, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.287878787878788, |
| "grad_norm": 0.3530712127685547, |
| "learning_rate": 1.6204245530618662e-06, |
| "loss": 0.3947, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.290909090909091, |
| "grad_norm": 0.3484843373298645, |
| "learning_rate": 1.6074529422143398e-06, |
| "loss": 0.3775, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.293939393939394, |
| "grad_norm": 0.3389803469181061, |
| "learning_rate": 1.5945235077921011e-06, |
| "loss": 0.3649, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.296969696969697, |
| "grad_norm": 0.35619989037513733, |
| "learning_rate": 1.5816364105349451e-06, |
| "loss": 0.3794, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 0.3550373315811157, |
| "learning_rate": 1.5687918106563326e-06, |
| "loss": 0.3992, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.303030303030303, |
| "grad_norm": 0.34751543402671814, |
| "learning_rate": 1.5559898678413898e-06, |
| "loss": 0.3791, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.306060606060606, |
| "grad_norm": 0.35747015476226807, |
| "learning_rate": 1.5432307412449244e-06, |
| "loss": 0.4039, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.309090909090909, |
| "grad_norm": 0.35150715708732605, |
| "learning_rate": 1.5305145894894547e-06, |
| "loss": 0.3784, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.312121212121212, |
| "grad_norm": 0.35025930404663086, |
| "learning_rate": 1.517841570663231e-06, |
| "loss": 0.3682, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.315151515151515, |
| "grad_norm": 0.3555355370044708, |
| "learning_rate": 1.5052118423182688e-06, |
| "loss": 0.3708, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.3181818181818183, |
| "grad_norm": 0.37226593494415283, |
| "learning_rate": 1.4926255614683931e-06, |
| "loss": 0.3976, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.3212121212121213, |
| "grad_norm": 0.36371520161628723, |
| "learning_rate": 1.48008288458729e-06, |
| "loss": 0.3852, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.324242424242424, |
| "grad_norm": 0.3754095137119293, |
| "learning_rate": 1.4675839676065534e-06, |
| "loss": 0.3574, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.327272727272727, |
| "grad_norm": 0.38677626848220825, |
| "learning_rate": 1.4551289659137497e-06, |
| "loss": 0.3849, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.33030303030303, |
| "grad_norm": 0.35939913988113403, |
| "learning_rate": 1.442718034350492e-06, |
| "loss": 0.3761, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.3449608087539673, |
| "learning_rate": 1.4303513272105057e-06, |
| "loss": 0.3897, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.3363636363636364, |
| "grad_norm": 0.34984907507896423, |
| "learning_rate": 1.4180289982377138e-06, |
| "loss": 0.3793, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.3393939393939394, |
| "grad_norm": 0.38575461506843567, |
| "learning_rate": 1.4057512006243312e-06, |
| "loss": 0.4093, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.3424242424242423, |
| "grad_norm": 0.38205093145370483, |
| "learning_rate": 1.3935180870089503e-06, |
| "loss": 0.3901, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.3454545454545457, |
| "grad_norm": 0.353354275226593, |
| "learning_rate": 1.3813298094746491e-06, |
| "loss": 0.3753, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.3484848484848486, |
| "grad_norm": 0.3562178313732147, |
| "learning_rate": 1.3691865195471037e-06, |
| "loss": 0.372, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.3515151515151516, |
| "grad_norm": 0.325185626745224, |
| "learning_rate": 1.357088368192696e-06, |
| "loss": 0.3668, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.3545454545454545, |
| "grad_norm": 0.3486754596233368, |
| "learning_rate": 1.345035505816642e-06, |
| "loss": 0.3965, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.3575757575757574, |
| "grad_norm": 0.36219727993011475, |
| "learning_rate": 1.3330280822611246e-06, |
| "loss": 0.3498, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.3606060606060604, |
| "grad_norm": 0.37098029255867004, |
| "learning_rate": 1.3210662468034246e-06, |
| "loss": 0.3865, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.3636363636363638, |
| "grad_norm": 0.35625922679901123, |
| "learning_rate": 1.3091501481540676e-06, |
| "loss": 0.3711, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.3666666666666667, |
| "grad_norm": 0.36688151955604553, |
| "learning_rate": 1.297279934454978e-06, |
| "loss": 0.3715, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.3696969696969696, |
| "grad_norm": 0.36043912172317505, |
| "learning_rate": 1.2854557532776323e-06, |
| "loss": 0.3581, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.3727272727272726, |
| "grad_norm": 0.33972594141960144, |
| "learning_rate": 1.2736777516212267e-06, |
| "loss": 0.3637, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.375757575757576, |
| "grad_norm": 0.3574168384075165, |
| "learning_rate": 1.2619460759108521e-06, |
| "loss": 0.3751, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.378787878787879, |
| "grad_norm": 0.35279446840286255, |
| "learning_rate": 1.250260871995671e-06, |
| "loss": 0.4013, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.381818181818182, |
| "grad_norm": 0.3540102541446686, |
| "learning_rate": 1.238622285147103e-06, |
| "loss": 0.3383, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.3848484848484848, |
| "grad_norm": 0.37313178181648254, |
| "learning_rate": 1.2270304600570193e-06, |
| "loss": 0.3719, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.3878787878787877, |
| "grad_norm": 0.3584001958370209, |
| "learning_rate": 1.2154855408359507e-06, |
| "loss": 0.3605, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.390909090909091, |
| "grad_norm": 0.36906707286834717, |
| "learning_rate": 1.2039876710112847e-06, |
| "loss": 0.3893, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.393939393939394, |
| "grad_norm": 0.33588269352912903, |
| "learning_rate": 1.1925369935254872e-06, |
| "loss": 0.3639, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.396969696969697, |
| "grad_norm": 0.3435981869697571, |
| "learning_rate": 1.1811336507343296e-06, |
| "loss": 0.3848, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.35146281123161316, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 0.3858, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.403030303030303, |
| "grad_norm": 0.35498523712158203, |
| "learning_rate": 1.1584695357148968e-06, |
| "loss": 0.3549, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.4060606060606062, |
| "grad_norm": 0.34189391136169434, |
| "learning_rate": 1.1472090452487728e-06, |
| "loss": 0.3652, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.409090909090909, |
| "grad_norm": 0.35392066836357117, |
| "learning_rate": 1.135996452998085e-06, |
| "loss": 0.3623, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.412121212121212, |
| "grad_norm": 0.36120468378067017, |
| "learning_rate": 1.1248318983587052e-06, |
| "loss": 0.3887, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.415151515151515, |
| "grad_norm": 0.334212064743042, |
| "learning_rate": 1.1137155201293021e-06, |
| "loss": 0.3756, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.418181818181818, |
| "grad_norm": 0.33589741587638855, |
| "learning_rate": 1.1026474565096068e-06, |
| "loss": 0.3827, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.4212121212121214, |
| "grad_norm": 0.3547380864620209, |
| "learning_rate": 1.0916278450986983e-06, |
| "loss": 0.3803, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.4242424242424243, |
| "grad_norm": 0.3462902307510376, |
| "learning_rate": 1.0806568228932995e-06, |
| "loss": 0.365, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.4272727272727272, |
| "grad_norm": 0.37670135498046875, |
| "learning_rate": 1.0697345262860638e-06, |
| "loss": 0.3824, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.43030303030303, |
| "grad_norm": 0.35779649019241333, |
| "learning_rate": 1.0588610910638825e-06, |
| "loss": 0.3607, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.4333333333333336, |
| "grad_norm": 0.35179200768470764, |
| "learning_rate": 1.0480366524062041e-06, |
| "loss": 0.3784, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.4363636363636365, |
| "grad_norm": 0.35515978932380676, |
| "learning_rate": 1.0372613448833429e-06, |
| "loss": 0.3727, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.4393939393939394, |
| "grad_norm": 0.34938177466392517, |
| "learning_rate": 1.0265353024548103e-06, |
| "loss": 0.3747, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.4424242424242424, |
| "grad_norm": 0.3501240909099579, |
| "learning_rate": 1.0158586584676533e-06, |
| "loss": 0.3809, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.4454545454545453, |
| "grad_norm": 0.35644036531448364, |
| "learning_rate": 1.0052315456547934e-06, |
| "loss": 0.3535, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.4484848484848483, |
| "grad_norm": 0.3286396861076355, |
| "learning_rate": 9.94654096133374e-07, |
| "loss": 0.3669, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.4515151515151516, |
| "grad_norm": 0.3664129972457886, |
| "learning_rate": 9.841264414031198e-07, |
| "loss": 0.3937, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.4545454545454546, |
| "grad_norm": 0.34694886207580566, |
| "learning_rate": 9.73648712344707e-07, |
| "loss": 0.3958, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.4575757575757575, |
| "grad_norm": 0.3321562111377716, |
| "learning_rate": 9.632210392181274e-07, |
| "loss": 0.3559, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.4606060606060605, |
| "grad_norm": 0.34712937474250793, |
| "learning_rate": 9.528435516610729e-07, |
| "loss": 0.3665, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.463636363636364, |
| "grad_norm": 0.32587698101997375, |
| "learning_rate": 9.425163786873292e-07, |
| "loss": 0.3712, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.466666666666667, |
| "grad_norm": 0.3688627779483795, |
| "learning_rate": 9.322396486851626e-07, |
| "loss": 0.3859, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.4696969696969697, |
| "grad_norm": 0.3505525588989258, |
| "learning_rate": 9.220134894157285e-07, |
| "loss": 0.3716, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.4727272727272727, |
| "grad_norm": 0.34579452872276306, |
| "learning_rate": 9.118380280114858e-07, |
| "loss": 0.3613, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.4757575757575756, |
| "grad_norm": 0.34811607003211975, |
| "learning_rate": 9.017133909746095e-07, |
| "loss": 0.3963, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.4787878787878785, |
| "grad_norm": 0.32743358612060547, |
| "learning_rate": 8.916397041754238e-07, |
| "loss": 0.379, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.481818181818182, |
| "grad_norm": 0.3497425615787506, |
| "learning_rate": 8.816170928508367e-07, |
| "loss": 0.3966, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.484848484848485, |
| "grad_norm": 0.3664569854736328, |
| "learning_rate": 8.716456816027791e-07, |
| "loss": 0.3804, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.487878787878788, |
| "grad_norm": 0.3476223349571228, |
| "learning_rate": 8.617255943966579e-07, |
| "loss": 0.3947, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.4909090909090907, |
| "grad_norm": 0.35610514879226685, |
| "learning_rate": 8.518569545598198e-07, |
| "loss": 0.377, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.493939393939394, |
| "grad_norm": 0.3268238306045532, |
| "learning_rate": 8.420398847800093e-07, |
| "loss": 0.3939, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.496969696969697, |
| "grad_norm": 0.3369584381580353, |
| "learning_rate": 8.322745071038474e-07, |
| "loss": 0.4004, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.3504047691822052, |
| "learning_rate": 8.225609429353187e-07, |
| "loss": 0.3667, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.503030303030303, |
| "grad_norm": 0.36456790566444397, |
| "learning_rate": 8.128993130342538e-07, |
| "loss": 0.3786, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.506060606060606, |
| "grad_norm": 0.34629517793655396, |
| "learning_rate": 8.032897375148324e-07, |
| "loss": 0.3591, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.509090909090909, |
| "grad_norm": 0.3457425832748413, |
| "learning_rate": 7.937323358440935e-07, |
| "loss": 0.4037, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.512121212121212, |
| "grad_norm": 0.3337177336215973, |
| "learning_rate": 7.84227226840445e-07, |
| "loss": 0.4071, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.515151515151515, |
| "grad_norm": 0.34807246923446655, |
| "learning_rate": 7.747745286721852e-07, |
| "loss": 0.3927, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.518181818181818, |
| "grad_norm": 0.33292651176452637, |
| "learning_rate": 7.653743588560387e-07, |
| "loss": 0.3601, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.5212121212121215, |
| "grad_norm": 0.3402540385723114, |
| "learning_rate": 7.560268342556948e-07, |
| "loss": 0.3879, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.5242424242424244, |
| "grad_norm": 0.32598671317100525, |
| "learning_rate": 7.467320710803505e-07, |
| "loss": 0.377, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.5272727272727273, |
| "grad_norm": 0.33640727400779724, |
| "learning_rate": 7.374901848832683e-07, |
| "loss": 0.3761, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.5303030303030303, |
| "grad_norm": 0.3488267958164215, |
| "learning_rate": 7.283012905603437e-07, |
| "loss": 0.3617, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.533333333333333, |
| "grad_norm": 0.3477902412414551, |
| "learning_rate": 7.191655023486682e-07, |
| "loss": 0.3518, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.536363636363636, |
| "grad_norm": 0.3296281695365906, |
| "learning_rate": 7.100829338251147e-07, |
| "loss": 0.3777, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.5393939393939395, |
| "grad_norm": 0.3473811149597168, |
| "learning_rate": 7.010536979049277e-07, |
| "loss": 0.3688, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.5424242424242425, |
| "grad_norm": 0.3439598083496094, |
| "learning_rate": 6.920779068403127e-07, |
| "loss": 0.379, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.5454545454545454, |
| "grad_norm": 0.3552432656288147, |
| "learning_rate": 6.831556722190453e-07, |
| "loss": 0.3718, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.5484848484848484, |
| "grad_norm": 0.3223789930343628, |
| "learning_rate": 6.74287104963085e-07, |
| "loss": 0.3776, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.5515151515151517, |
| "grad_norm": 0.31215062737464905, |
| "learning_rate": 6.654723153271913e-07, |
| "loss": 0.394, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.5545454545454547, |
| "grad_norm": 0.3523038625717163, |
| "learning_rate": 6.567114128975571e-07, |
| "loss": 0.3655, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.5575757575757576, |
| "grad_norm": 0.3508360981941223, |
| "learning_rate": 6.480045065904461e-07, |
| "loss": 0.3657, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.5606060606060606, |
| "grad_norm": 0.3497951030731201, |
| "learning_rate": 6.393517046508363e-07, |
| "loss": 0.3754, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.5636363636363635, |
| "grad_norm": 0.3315987288951874, |
| "learning_rate": 6.307531146510754e-07, |
| "loss": 0.4066, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.5666666666666664, |
| "grad_norm": 0.33407914638519287, |
| "learning_rate": 6.222088434895462e-07, |
| "loss": 0.3518, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.56969696969697, |
| "grad_norm": 0.33812716603279114, |
| "learning_rate": 6.137189973893331e-07, |
| "loss": 0.3476, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.5727272727272728, |
| "grad_norm": 0.3333025574684143, |
| "learning_rate": 6.052836818969027e-07, |
| "loss": 0.3766, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.5757575757575757, |
| "grad_norm": 0.3389793932437897, |
| "learning_rate": 5.969030018807953e-07, |
| "loss": 0.3737, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.5787878787878786, |
| "grad_norm": 0.3426154553890228, |
| "learning_rate": 5.885770615303182e-07, |
| "loss": 0.366, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.581818181818182, |
| "grad_norm": 0.3441448211669922, |
| "learning_rate": 5.803059643542491e-07, |
| "loss": 0.3675, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.584848484848485, |
| "grad_norm": 0.3547995984554291, |
| "learning_rate": 5.720898131795494e-07, |
| "loss": 0.3675, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.587878787878788, |
| "grad_norm": 0.32679978013038635, |
| "learning_rate": 5.639287101500923e-07, |
| "loss": 0.3864, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.590909090909091, |
| "grad_norm": 0.3426445424556732, |
| "learning_rate": 5.558227567253832e-07, |
| "loss": 0.3681, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.5939393939393938, |
| "grad_norm": 0.33950114250183105, |
| "learning_rate": 5.477720536793035e-07, |
| "loss": 0.3866, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.5969696969696967, |
| "grad_norm": 0.35901251435279846, |
| "learning_rate": 5.397767010988614e-07, |
| "loss": 0.3779, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.34515222907066345, |
| "learning_rate": 5.318367983829393e-07, |
| "loss": 0.3632, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.603030303030303, |
| "grad_norm": 0.34496960043907166, |
| "learning_rate": 5.239524442410627e-07, |
| "loss": 0.4095, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.606060606060606, |
| "grad_norm": 0.33476129174232483, |
| "learning_rate": 5.16123736692175e-07, |
| "loss": 0.3637, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.6090909090909093, |
| "grad_norm": 0.31554561853408813, |
| "learning_rate": 5.083507730634152e-07, |
| "loss": 0.3949, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.6121212121212123, |
| "grad_norm": 0.3599357306957245, |
| "learning_rate": 5.006336499889075e-07, |
| "loss": 0.3639, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.6151515151515152, |
| "grad_norm": 0.3472028970718384, |
| "learning_rate": 4.929724634085664e-07, |
| "loss": 0.3628, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.618181818181818, |
| "grad_norm": 0.32713690400123596, |
| "learning_rate": 4.853673085668947e-07, |
| "loss": 0.3867, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.621212121212121, |
| "grad_norm": 0.3341231346130371, |
| "learning_rate": 4.778182800118053e-07, |
| "loss": 0.368, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.624242424242424, |
| "grad_norm": 0.3199952244758606, |
| "learning_rate": 4.7032547159344466e-07, |
| "loss": 0.3857, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.6272727272727274, |
| "grad_norm": 0.34618866443634033, |
| "learning_rate": 4.628889764630279e-07, |
| "loss": 0.3761, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.6303030303030304, |
| "grad_norm": 0.3341084420681, |
| "learning_rate": 4.5550888707167505e-07, |
| "loss": 0.3584, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.6333333333333333, |
| "grad_norm": 0.3550304174423218, |
| "learning_rate": 4.481852951692672e-07, |
| "loss": 0.3694, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.6363636363636362, |
| "grad_norm": 0.3328497111797333, |
| "learning_rate": 4.4091829180330503e-07, |
| "loss": 0.3694, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.6393939393939396, |
| "grad_norm": 0.32884612679481506, |
| "learning_rate": 4.33707967317773e-07, |
| "loss": 0.3703, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.6424242424242426, |
| "grad_norm": 0.36629319190979004, |
| "learning_rate": 4.26554411352022e-07, |
| "loss": 0.3762, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.6454545454545455, |
| "grad_norm": 0.3324334919452667, |
| "learning_rate": 4.194577128396521e-07, |
| "loss": 0.3664, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.6484848484848484, |
| "grad_norm": 0.3406725227832794, |
| "learning_rate": 4.1241796000740296e-07, |
| "loss": 0.3995, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.6515151515151514, |
| "grad_norm": 0.3479924201965332, |
| "learning_rate": 4.054352403740641e-07, |
| "loss": 0.3846, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.6545454545454543, |
| "grad_norm": 0.3131670355796814, |
| "learning_rate": 3.985096407493838e-07, |
| "loss": 0.3806, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.6575757575757577, |
| "grad_norm": 0.3157781958580017, |
| "learning_rate": 3.916412472329884e-07, |
| "loss": 0.3826, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.6606060606060606, |
| "grad_norm": 0.3542560338973999, |
| "learning_rate": 3.8483014521331184e-07, |
| "loss": 0.3611, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.6636363636363636, |
| "grad_norm": 0.34196749329566956, |
| "learning_rate": 3.7807641936653984e-07, |
| "loss": 0.3815, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.3302360475063324, |
| "learning_rate": 3.7138015365554834e-07, |
| "loss": 0.3746, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.66969696969697, |
| "grad_norm": 0.3370774984359741, |
| "learning_rate": 3.6474143132886607e-07, |
| "loss": 0.3691, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.672727272727273, |
| "grad_norm": 0.3441545367240906, |
| "learning_rate": 3.581603349196372e-07, |
| "loss": 0.3679, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.675757575757576, |
| "grad_norm": 0.3569502830505371, |
| "learning_rate": 3.516369462445968e-07, |
| "loss": 0.354, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.6787878787878787, |
| "grad_norm": 0.34038347005844116, |
| "learning_rate": 3.4517134640305097e-07, |
| "loss": 0.4109, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.6818181818181817, |
| "grad_norm": 0.325907438993454, |
| "learning_rate": 3.3876361577587115e-07, |
| "loss": 0.3982, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.6848484848484846, |
| "grad_norm": 0.32801732420921326, |
| "learning_rate": 3.324138340244948e-07, |
| "loss": 0.3947, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.687878787878788, |
| "grad_norm": 0.3471670150756836, |
| "learning_rate": 3.261220800899323e-07, |
| "loss": 0.3918, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.690909090909091, |
| "grad_norm": 0.34977656602859497, |
| "learning_rate": 3.1988843219178776e-07, |
| "loss": 0.3774, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.693939393939394, |
| "grad_norm": 0.3262813985347748, |
| "learning_rate": 3.1371296782728875e-07, |
| "loss": 0.3744, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.6969696969696972, |
| "grad_norm": 0.33313867449760437, |
| "learning_rate": 3.0759576377031697e-07, |
| "loss": 0.3755, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 0.3310135006904602, |
| "learning_rate": 3.015368960704584e-07, |
| "loss": 0.388, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.703030303030303, |
| "grad_norm": 0.342500776052475, |
| "learning_rate": 2.955364400520583e-07, |
| "loss": 0.3875, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.706060606060606, |
| "grad_norm": 0.3611791133880615, |
| "learning_rate": 2.8959447031327916e-07, |
| "loss": 0.3618, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.709090909090909, |
| "grad_norm": 0.32603222131729126, |
| "learning_rate": 2.8371106072518194e-07, |
| "loss": 0.3921, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.712121212121212, |
| "grad_norm": 0.3323443531990051, |
| "learning_rate": 2.7788628443080003e-07, |
| "loss": 0.3544, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.7151515151515153, |
| "grad_norm": 0.31575021147727966, |
| "learning_rate": 2.7212021384423415e-07, |
| "loss": 0.363, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.7181818181818183, |
| "grad_norm": 0.34948068857192993, |
| "learning_rate": 2.664129206497479e-07, |
| "loss": 0.3999, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.721212121212121, |
| "grad_norm": 0.3439748287200928, |
| "learning_rate": 2.6076447580088426e-07, |
| "loss": 0.3744, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.724242424242424, |
| "grad_norm": 0.33368557691574097, |
| "learning_rate": 2.5517494951957544e-07, |
| "loss": 0.3768, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.33264872431755066, |
| "learning_rate": 2.4964441129527337e-07, |
| "loss": 0.3942, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.7303030303030305, |
| "grad_norm": 0.36262091994285583, |
| "learning_rate": 2.441729298840861e-07, |
| "loss": 0.381, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.7333333333333334, |
| "grad_norm": 0.3555268943309784, |
| "learning_rate": 2.3876057330792344e-07, |
| "loss": 0.3638, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.7363636363636363, |
| "grad_norm": 0.33674266934394836, |
| "learning_rate": 2.3340740885364922e-07, |
| "loss": 0.3484, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.7393939393939393, |
| "grad_norm": 0.34257423877716064, |
| "learning_rate": 2.2811350307224534e-07, |
| "loss": 0.3826, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.742424242424242, |
| "grad_norm": 0.3373337984085083, |
| "learning_rate": 2.2287892177798642e-07, |
| "loss": 0.364, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.7454545454545456, |
| "grad_norm": 0.3389836847782135, |
| "learning_rate": 2.1770373004762035e-07, |
| "loss": 0.3689, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.7484848484848485, |
| "grad_norm": 0.32391688227653503, |
| "learning_rate": 2.1258799221955618e-07, |
| "loss": 0.3962, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.7515151515151515, |
| "grad_norm": 0.3334949314594269, |
| "learning_rate": 2.0753177189307138e-07, |
| "loss": 0.3799, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.7545454545454544, |
| "grad_norm": 0.3442675769329071, |
| "learning_rate": 2.0253513192751374e-07, |
| "loss": 0.3931, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.757575757575758, |
| "grad_norm": 0.3674370348453522, |
| "learning_rate": 1.9759813444152342e-07, |
| "loss": 0.3712, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.7606060606060607, |
| "grad_norm": 0.33962857723236084, |
| "learning_rate": 1.9272084081226272e-07, |
| "loss": 0.3621, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.7636363636363637, |
| "grad_norm": 0.31753936409950256, |
| "learning_rate": 1.8790331167464758e-07, |
| "loss": 0.383, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.7666666666666666, |
| "grad_norm": 0.33068257570266724, |
| "learning_rate": 1.8314560692059836e-07, |
| "loss": 0.3827, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.7696969696969695, |
| "grad_norm": 0.32340729236602783, |
| "learning_rate": 1.7844778569829412e-07, |
| "loss": 0.3736, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.7727272727272725, |
| "grad_norm": 0.3291246294975281, |
| "learning_rate": 1.738099064114368e-07, |
| "loss": 0.3689, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.775757575757576, |
| "grad_norm": 0.32609397172927856, |
| "learning_rate": 1.6923202671852379e-07, |
| "loss": 0.4106, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.778787878787879, |
| "grad_norm": 0.3484046459197998, |
| "learning_rate": 1.6471420353213362e-07, |
| "loss": 0.3779, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.7818181818181817, |
| "grad_norm": 0.32795125246047974, |
| "learning_rate": 1.6025649301821877e-07, |
| "loss": 0.3986, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.7848484848484847, |
| "grad_norm": 0.3558945655822754, |
| "learning_rate": 1.5585895059540336e-07, |
| "loss": 0.3638, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.787878787878788, |
| "grad_norm": 0.32118871808052063, |
| "learning_rate": 1.5152163093429762e-07, |
| "loss": 0.3879, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.790909090909091, |
| "grad_norm": 0.34035980701446533, |
| "learning_rate": 1.4724458795681962e-07, |
| "loss": 0.3742, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.793939393939394, |
| "grad_norm": 0.3438761234283447, |
| "learning_rate": 1.4302787483551962e-07, |
| "loss": 0.381, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.796969696969697, |
| "grad_norm": 0.3301991820335388, |
| "learning_rate": 1.388715439929239e-07, |
| "loss": 0.3468, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.36519476771354675, |
| "learning_rate": 1.3477564710088097e-07, |
| "loss": 0.3593, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.8030303030303028, |
| "grad_norm": 0.3334772288799286, |
| "learning_rate": 1.3074023507991917e-07, |
| "loss": 0.3835, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.806060606060606, |
| "grad_norm": 0.3869641125202179, |
| "learning_rate": 1.267653580986139e-07, |
| "loss": 0.3666, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.809090909090909, |
| "grad_norm": 0.3358801007270813, |
| "learning_rate": 1.2285106557296479e-07, |
| "loss": 0.3873, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.812121212121212, |
| "grad_norm": 0.32431620359420776, |
| "learning_rate": 1.1899740616578004e-07, |
| "loss": 0.3619, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.8151515151515154, |
| "grad_norm": 0.34791114926338196, |
| "learning_rate": 1.1520442778607032e-07, |
| "loss": 0.3643, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.8181818181818183, |
| "grad_norm": 0.33882835507392883, |
| "learning_rate": 1.1147217758845752e-07, |
| "loss": 0.3669, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.8212121212121213, |
| "grad_norm": 0.35345929861068726, |
| "learning_rate": 1.0780070197258408e-07, |
| "loss": 0.3998, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.824242424242424, |
| "grad_norm": 0.4318199157714844, |
| "learning_rate": 1.0419004658253795e-07, |
| "loss": 0.3586, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.827272727272727, |
| "grad_norm": 0.33187335729599, |
| "learning_rate": 1.0064025630628583e-07, |
| "loss": 0.3688, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.83030303030303, |
| "grad_norm": 0.33873116970062256, |
| "learning_rate": 9.715137527511298e-08, |
| "loss": 0.3752, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.8333333333333335, |
| "grad_norm": 0.3571045994758606, |
| "learning_rate": 9.372344686307655e-08, |
| "loss": 0.3614, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.8363636363636364, |
| "grad_norm": 0.3408893346786499, |
| "learning_rate": 9.035651368646647e-08, |
| "loss": 0.3765, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.8393939393939394, |
| "grad_norm": 0.35720014572143555, |
| "learning_rate": 8.705061760327372e-08, |
| "loss": 0.354, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.8424242424242423, |
| "grad_norm": 0.33762773871421814, |
| "learning_rate": 8.380579971267178e-08, |
| "loss": 0.3729, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.8454545454545457, |
| "grad_norm": 0.3154089152812958, |
| "learning_rate": 8.06221003545038e-08, |
| "loss": 0.3574, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.8484848484848486, |
| "grad_norm": 0.3321691155433655, |
| "learning_rate": 7.749955910878459e-08, |
| "loss": 0.3676, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.8515151515151516, |
| "grad_norm": 0.3242235779762268, |
| "learning_rate": 7.443821479520441e-08, |
| "loss": 0.3932, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.8545454545454545, |
| "grad_norm": 0.3255268931388855, |
| "learning_rate": 7.143810547264762e-08, |
| "loss": 0.3812, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.8575757575757574, |
| "grad_norm": 0.3191761076450348, |
| "learning_rate": 6.849926843872257e-08, |
| "loss": 0.3922, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.8606060606060604, |
| "grad_norm": 0.34105202555656433, |
| "learning_rate": 6.562174022929358e-08, |
| "loss": 0.3734, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.8636363636363638, |
| "grad_norm": 0.3678226172924042, |
| "learning_rate": 6.280555661802857e-08, |
| "loss": 0.3707, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.8666666666666667, |
| "grad_norm": 0.34355148673057556, |
| "learning_rate": 6.005075261595495e-08, |
| "loss": 0.3835, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.8696969696969696, |
| "grad_norm": 0.3354954123497009, |
| "learning_rate": 5.735736247102497e-08, |
| "loss": 0.3691, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.8727272727272726, |
| "grad_norm": 0.3545462489128113, |
| "learning_rate": 5.472541966768552e-08, |
| "loss": 0.3765, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.875757575757576, |
| "grad_norm": 0.30889591574668884, |
| "learning_rate": 5.215495692646788e-08, |
| "loss": 0.3946, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.878787878787879, |
| "grad_norm": 0.32154256105422974, |
| "learning_rate": 4.9646006203577515e-08, |
| "loss": 0.3881, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.881818181818182, |
| "grad_norm": 0.33414721488952637, |
| "learning_rate": 4.719859869049659e-08, |
| "loss": 0.3714, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.8848484848484848, |
| "grad_norm": 0.34182053804397583, |
| "learning_rate": 4.481276481359764e-08, |
| "loss": 0.3876, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.8878787878787877, |
| "grad_norm": 0.34174323081970215, |
| "learning_rate": 4.2488534233764425e-08, |
| "loss": 0.3727, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.8909090909090907, |
| "grad_norm": 0.3377932012081146, |
| "learning_rate": 4.02259358460233e-08, |
| "loss": 0.352, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.893939393939394, |
| "grad_norm": 0.353381484746933, |
| "learning_rate": 3.8024997779185225e-08, |
| "loss": 0.375, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.896969696969697, |
| "grad_norm": 0.32608339190483093, |
| "learning_rate": 3.588574739549322e-08, |
| "loss": 0.3759, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.3262963891029358, |
| "learning_rate": 3.3808211290284886e-08, |
| "loss": 0.3906, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.9030303030303033, |
| "grad_norm": 0.33256980776786804, |
| "learning_rate": 3.179241529166099e-08, |
| "loss": 0.3642, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.9060606060606062, |
| "grad_norm": 0.32789772748947144, |
| "learning_rate": 2.983838446016407e-08, |
| "loss": 0.3708, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.909090909090909, |
| "grad_norm": 0.34206628799438477, |
| "learning_rate": 2.7946143088466437e-08, |
| "loss": 0.3714, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.912121212121212, |
| "grad_norm": 0.3323133885860443, |
| "learning_rate": 2.6115714701069327e-08, |
| "loss": 0.3664, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.915151515151515, |
| "grad_norm": 0.3356127440929413, |
| "learning_rate": 2.434712205400924e-08, |
| "loss": 0.3686, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.918181818181818, |
| "grad_norm": 0.32426542043685913, |
| "learning_rate": 2.264038713457706e-08, |
| "loss": 0.3526, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.9212121212121214, |
| "grad_norm": 0.3274119198322296, |
| "learning_rate": 2.0995531161041028e-08, |
| "loss": 0.3462, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.9242424242424243, |
| "grad_norm": 0.33501943945884705, |
| "learning_rate": 1.94125745823881e-08, |
| "loss": 0.3877, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.9272727272727272, |
| "grad_norm": 0.3432499170303345, |
| "learning_rate": 1.789153707806357e-08, |
| "loss": 0.4045, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.93030303030303, |
| "grad_norm": 0.3250257074832916, |
| "learning_rate": 1.6432437557732383e-08, |
| "loss": 0.3964, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.9333333333333336, |
| "grad_norm": 0.34239816665649414, |
| "learning_rate": 1.5035294161039882e-08, |
| "loss": 0.3536, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.9363636363636365, |
| "grad_norm": 0.32236921787261963, |
| "learning_rate": 1.3700124257388092e-08, |
| "loss": 0.4131, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.9393939393939394, |
| "grad_norm": 0.3150605857372284, |
| "learning_rate": 1.2426944445719791e-08, |
| "loss": 0.3854, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.9424242424242424, |
| "grad_norm": 0.33636316657066345, |
| "learning_rate": 1.1215770554312e-08, |
| "loss": 0.356, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.9454545454545453, |
| "grad_norm": 0.31633424758911133, |
| "learning_rate": 1.006661764057837e-08, |
| "loss": 0.3695, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.9484848484848483, |
| "grad_norm": 0.3274213671684265, |
| "learning_rate": 8.979499990882102e-09, |
| "loss": 0.3941, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.9515151515151516, |
| "grad_norm": 0.3197808265686035, |
| "learning_rate": 7.954431120359985e-09, |
| "loss": 0.3808, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.9545454545454546, |
| "grad_norm": 0.3330005705356598, |
| "learning_rate": 6.991423772753636e-09, |
| "loss": 0.3913, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.9575757575757575, |
| "grad_norm": 0.3442140817642212, |
| "learning_rate": 6.090489920249076e-09, |
| "loss": 0.3846, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.9606060606060605, |
| "grad_norm": 0.3263948857784271, |
| "learning_rate": 5.2516407633312895e-09, |
| "loss": 0.3581, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.963636363636364, |
| "grad_norm": 0.31933942437171936, |
| "learning_rate": 4.474886730641004e-09, |
| "loss": 0.3722, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.966666666666667, |
| "grad_norm": 0.316170334815979, |
| "learning_rate": 3.760237478849793e-09, |
| "loss": 0.363, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.9696969696969697, |
| "grad_norm": 0.3238048553466797, |
| "learning_rate": 3.1077018925351753e-09, |
| "loss": 0.3703, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.9727272727272727, |
| "grad_norm": 0.3422054350376129, |
| "learning_rate": 2.5172880840745873e-09, |
| "loss": 0.3714, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.9757575757575756, |
| "grad_norm": 0.2909259796142578, |
| "learning_rate": 1.989003393539912e-09, |
| "loss": 0.4171, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.9787878787878785, |
| "grad_norm": 0.32509645819664, |
| "learning_rate": 1.5228543886114389e-09, |
| "loss": 0.37, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.981818181818182, |
| "grad_norm": 0.33289819955825806, |
| "learning_rate": 1.118846864490708e-09, |
| "loss": 0.3711, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.984848484848485, |
| "grad_norm": 0.32753217220306396, |
| "learning_rate": 7.769858438338995e-10, |
| "loss": 0.3621, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.987878787878788, |
| "grad_norm": 0.3397236764431, |
| "learning_rate": 4.972755766846637e-10, |
| "loss": 0.364, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.990909090909091, |
| "grad_norm": 0.3434215188026428, |
| "learning_rate": 2.797195404247166e-10, |
| "loss": 0.358, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.993939393939394, |
| "grad_norm": 0.33324527740478516, |
| "learning_rate": 1.2432043972832042e-10, |
| "loss": 0.3845, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.996969696969697, |
| "grad_norm": 0.32625526189804077, |
| "learning_rate": 3.108020653008748e-11, |
| "loss": 0.3751, |
| "step": 989 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.3243604302406311, |
| "learning_rate": 0.0, |
| "loss": 0.3597, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 990, |
| "total_flos": 1.437255561186902e+18, |
| "train_loss": 0.0, |
| "train_runtime": 6.6052, |
| "train_samples_per_second": 14386.761, |
| "train_steps_per_second": 149.881 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.437255561186902e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|